Skip to content

Commit a97503a

Browse files
committed
Added AVX512 support for SkRasterPipeline_opts.h
Hi, The patch has integrated AVX512 support for certain functions in both highp and lowp. Testing and verification were conducted within the Pdfium repository, where it passed the pdfium_embeddertests.exe. Performance-wise, the AVX512 code path shows significant enhancement over the standard SSE and AVX2 paths. This performance boost was confirmed through testing with PDF files sourced from the resources folder of the Pdfium library. This is an imported pull request from google#149 GitOrigin-RevId: 3dfeb3b Change-Id: I91f95a69d914ed57707239b7d2257a6c8f0c3ffa This is an imported pull request from google#151 GitOrigin-RevId: 354e9e0 Change-Id: Ia674977e3c1a083938bbfda1e9d785595896cb88
1 parent c9c9ff7 commit a97503a

7 files changed

+139
-74
lines changed

BUILD.gn

+37-25
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,10 @@ config("skia_public") {
3131
defines = [
3232
"SK_DEFAULT_TYPEFACE_IS_EMPTY",
3333
"SK_DISABLE_LEGACY_DEFAULT_TYPEFACE",
34+
35+
# TODO(305780908) re-enable after fixing Android and G3
36+
#"SK_DISABLE_LEGACY_FONTMGR_FACTORY",
37+
#"SK_DISABLE_LEGACY_FONTMGR_REFDEFAULT",
3438
]
3539
cflags_objcc = []
3640
if (is_component_build) {
@@ -182,6 +186,19 @@ opts("hsw") {
182186
}
183187
}
184188

189+
opts("skx") {
190+
enabled = is_x86
191+
sources = skia_opts.skx_sources
192+
if (is_win) {
193+
cflags = [ "/arch:AVX512" ]
194+
} else {
195+
cflags = [ "-march=skylake-avx512" ]
196+
if (is_mac && is_debug) {
197+
cflags += [ "-fno-stack-check" ] # Work around skia:9709
198+
}
199+
}
200+
}
201+
185202
# Any feature of Skia that requires third-party code should be optional and use this template.
186203
template("optional") {
187204
if (invoker.enabled) {
@@ -426,7 +443,7 @@ optional("fontmgr_FontConfigInterface_factory") {
426443

427444
optional("fontmgr_fuchsia") {
428445
enabled = skia_enable_fontmgr_fuchsia
429-
446+
public_defines = [ "SK_FONTMGR_FUCHSIA_AVAILABLE" ]
430447
deps = []
431448

432449
if (is_fuchsia && using_fuchsia_sdk) {
@@ -910,7 +927,6 @@ optional("gpu_shared") {
910927

911928
if (skia_use_dawn) {
912929
public_defines += [ "SK_DAWN" ]
913-
sources += skia_shared_dawn_sources
914930

915931
# When building for WASM, the WebGPU headers are provided by Emscripten. For native builds we
916932
# have to depend on Dawn directly.
@@ -921,8 +937,8 @@ optional("gpu_shared") {
921937
"//third_party/externals/dawn/src/dawn:proc",
922938
]
923939

924-
if (dawn_enable_d3d12 || dawn_enable_desktop_gl || dawn_enable_metal ||
925-
dawn_enable_opengles || dawn_enable_vulkan) {
940+
if (dawn_enable_d3d12 || dawn_enable_d3d11 || dawn_enable_desktop_gl ||
941+
dawn_enable_metal || dawn_enable_opengles || dawn_enable_vulkan) {
926942
public_deps += [ "//third_party/externals/dawn/src/dawn/native" ]
927943
}
928944
if (dawn_enable_d3d12) {
@@ -959,6 +975,10 @@ optional("gpu_shared") {
959975
public_defines += [ "SK_METAL" ]
960976
sources += skia_shared_mtl_sources
961977
}
978+
979+
if (is_android) {
980+
sources += skia_shared_android_sources
981+
}
962982
}
963983

964984
optional("gpu") {
@@ -1230,7 +1250,8 @@ optional("graphite") {
12301250
}
12311251

12321252
optional("pdf") {
1233-
enabled = skia_use_zlib && skia_enable_pdf
1253+
enabled = skia_use_zlib && skia_enable_pdf && skia_use_libjpeg_turbo_decode &&
1254+
skia_use_libjpeg_turbo_encode
12341255
public_defines = [ "SK_SUPPORT_PDF" ]
12351256

12361257
deps = [ "//third_party/zlib" ]
@@ -1245,21 +1266,10 @@ optional("pdf") {
12451266
defines = [ "SK_PDF_USE_SFNTLY" ]
12461267
}
12471268

1248-
if (skia_use_libjpeg_turbo_decode) {
1249-
deps += [
1250-
":jpeg_decode",
1251-
1252-
# This is not a public_dep so we need to directly depend on it
1253-
# to use jpeg headers
1254-
"//third_party/libjpeg-turbo:libjpeg",
1255-
]
1256-
sources += [ "src/pdf/SkJpegInfo_libjpegturbo.cpp" ]
1257-
} else {
1258-
sources += [ "src/pdf/SkJpegInfo_none.cpp" ]
1259-
}
1260-
if (skia_use_libjpeg_turbo_encode) {
1261-
deps += [ ":jpeg_encode" ]
1262-
}
1269+
deps += [
1270+
":jpeg_decode",
1271+
":jpeg_encode",
1272+
]
12631273
}
12641274

12651275
optional("xps") {
@@ -1494,6 +1504,7 @@ skia_component("skia") {
14941504
":fontmgr_factory",
14951505
":heif",
14961506
":hsw",
1507+
":skx",
14971508
":jpeg_decode",
14981509
":jpegxl_decode",
14991510
":minify_sksl",
@@ -2023,7 +2034,11 @@ if (skia_enable_tools) {
20232034
"X11",
20242035
]
20252036
} else if (is_win) {
2026-
sources += [ "tools/gpu/gl/win/CreatePlatformGLTestContext_win.cpp" ]
2037+
sources += [
2038+
"tools/gpu/gl/win/CreatePlatformGLTestContext_win.cpp",
2039+
"tools/gpu/gl/win/SkWGL.h",
2040+
"tools/gpu/gl/win/SkWGL_win.cpp",
2041+
]
20272042
libs += [ "Gdi32.lib" ]
20282043
if (target_cpu != "arm64") {
20292044
libs += [ "OpenGL32.lib" ]
@@ -2680,10 +2695,7 @@ if (skia_enable_tools) {
26802695
"tools/skdiff/skdiff_main.cpp",
26812696
"tools/skdiff/skdiff_utils.cpp",
26822697
]
2683-
deps = [
2684-
":skia",
2685-
":tool_utils",
2686-
]
2698+
deps = [ ":skia" ]
26872699
}
26882700

26892701
test_app("skp_parser") {

gn/opts.gni

+1
Original file line numberDiff line numberDiff line change
@@ -7,3 +7,4 @@
77
_src = get_path_info("../src", "abspath")
88

99
hsw = [ "$_src/opts/SkOpts_hsw.cpp" ]
10+
skx = [ "$_src/opts/SkOpts_skx.cpp" ]

gn/shared_sources.gni

+1
Original file line numberDiff line numberDiff line change
@@ -19,4 +19,5 @@ import("xml.gni")
1919
import("xps.gni")
2020
skia_opts = {
2121
hsw_sources = hsw
22+
skx_sources = skx
2223
}

src/core/SkOpts.cpp

+10-2
Original file line numberDiff line numberDiff line change
@@ -29,20 +29,23 @@ namespace SkOpts {
2929
StageFn ops_highp[] = { SK_RASTER_PIPELINE_OPS_ALL(M) };
3030
StageFn just_return_highp = (StageFn)SK_OPTS_NS::just_return;
3131
void (*start_pipeline_highp)(size_t, size_t, size_t, size_t, SkRasterPipelineStage*,
32-
SkSpan<SkRasterPipeline_MemoryCtxPatch>) =
32+
SkSpan<SkRasterPipeline_MemoryCtxPatch>,
33+
uint8_t*) =
3334
SK_OPTS_NS::start_pipeline;
3435
#undef M
3536

3637
#define M(st) (StageFn)SK_OPTS_NS::lowp::st,
3738
StageFn ops_lowp[] = { SK_RASTER_PIPELINE_OPS_LOWP(M) };
3839
StageFn just_return_lowp = (StageFn)SK_OPTS_NS::lowp::just_return;
3940
void (*start_pipeline_lowp)(size_t, size_t, size_t, size_t, SkRasterPipelineStage*,
40-
SkSpan<SkRasterPipeline_MemoryCtxPatch>) =
41+
SkSpan<SkRasterPipeline_MemoryCtxPatch>,
42+
uint8_t*) =
4143
SK_OPTS_NS::lowp::start_pipeline;
4244
#undef M
4345

4446
// Each Init_foo() is defined in src/opts/SkOpts_foo.cpp.
4547
void Init_hsw();
48+
void Init_skx();
4649

4750
static bool init() {
4851
#if defined(SK_ENABLE_OPTIMIZE_SIZE)
@@ -51,6 +54,11 @@ namespace SkOpts {
5154
#if SK_CPU_SSE_LEVEL < SK_CPU_SSE_LEVEL_AVX2
5255
if (SkCpu::Supports(SkCpu::HSW)) { Init_hsw(); }
5356
#endif
57+
58+
#if SK_CPU_SSE_LEVEL < SK_CPU_SSE_LEVEL_SKX
59+
if (SkCpu::Supports(SkCpu::SKX)) { Init_skx(); }
60+
#endif
61+
5462
#endif
5563
return true;
5664
}

src/core/SkRasterPipelineOpContexts.h

+41-32
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,12 @@ namespace SkSL { class TraceHook; }
1919
// by stages that have no lowp implementation. They can therefore use the (smaller) highp value to
2020
// save memory in the arena.
2121
inline static constexpr int SkRasterPipeline_kMaxStride = 16;
22-
inline static constexpr int SkRasterPipeline_kMaxStride_highp = 16;
22+
inline static constexpr int SkRasterPipeline_kMaxStride_highp = 8;
23+
inline static constexpr int SkRasterPipeline_kMaxStride_highp_skx = 16;
2324

2425
// How much space to allocate for each MemoryCtx scratch buffer, as part of tail-pixel handling.
2526
inline static constexpr size_t SkRasterPipeline_MaxScratchPerPatch =
26-
std::max(SkRasterPipeline_kMaxStride_highp * 16, // 16 == largest highp bpp (RGBA_F32)
27+
std::max(SkRasterPipeline_kMaxStride_highp_skx * 16, // 16 == largest highp bpp (RGBA_F32)
2728
SkRasterPipeline_kMaxStride * 4); // 4 == largest lowp bpp (RGBA_8888)
2829

2930
// These structs hold the context data for many of the Raster Pipeline ops.
@@ -74,17 +75,17 @@ struct SkRasterPipeline_GatherCtx {
7475

7576
// State shared by save_xy, accumulate, and bilinear_* / bicubic_*.
7677
struct SkRasterPipeline_SamplerCtx {
77-
float x[SkRasterPipeline_kMaxStride_highp];
78-
float y[SkRasterPipeline_kMaxStride_highp];
79-
float fx[SkRasterPipeline_kMaxStride_highp];
80-
float fy[SkRasterPipeline_kMaxStride_highp];
81-
float scalex[SkRasterPipeline_kMaxStride_highp];
82-
float scaley[SkRasterPipeline_kMaxStride_highp];
78+
float x[SkRasterPipeline_kMaxStride_highp_skx];
79+
float y[SkRasterPipeline_kMaxStride_highp_skx];
80+
float fx[SkRasterPipeline_kMaxStride_highp_skx];
81+
float fy[SkRasterPipeline_kMaxStride_highp_skx];
82+
float scalex[SkRasterPipeline_kMaxStride_highp_skx];
83+
float scaley[SkRasterPipeline_kMaxStride_highp_skx];
8384

8485
// for bicubic_[np][13][xy]
8586
float weights[16];
86-
float wx[4][SkRasterPipeline_kMaxStride_highp];
87-
float wy[4][SkRasterPipeline_kMaxStride_highp];
87+
float wx[4][SkRasterPipeline_kMaxStride_highp_skx];
88+
float wy[4][SkRasterPipeline_kMaxStride_highp_skx];
8889
};
8990

9091
struct SkRasterPipeline_TileCtx {
@@ -111,14 +112,14 @@ struct SkRasterPipeline_DecalTileCtx {
111112
// State used by mipmap_linear_*
112113
struct SkRasterPipeline_MipmapCtx {
113114
// Original coords, saved before the base level logic
114-
float x[SkRasterPipeline_kMaxStride_highp];
115-
float y[SkRasterPipeline_kMaxStride_highp];
115+
float x[SkRasterPipeline_kMaxStride_highp_skx];
116+
float y[SkRasterPipeline_kMaxStride_highp_skx];
116117

117118
// Base level color
118-
float r[SkRasterPipeline_kMaxStride_highp];
119-
float g[SkRasterPipeline_kMaxStride_highp];
120-
float b[SkRasterPipeline_kMaxStride_highp];
121-
float a[SkRasterPipeline_kMaxStride_highp];
119+
float r[SkRasterPipeline_kMaxStride_highp_skx];
120+
float g[SkRasterPipeline_kMaxStride_highp_skx];
121+
float b[SkRasterPipeline_kMaxStride_highp_skx];
122+
float a[SkRasterPipeline_kMaxStride_highp_skx];
122123

123124
// Scale factors to transform base level coords to lower level coords
124125
float scaleX;
@@ -138,22 +139,22 @@ struct SkRasterPipeline_CallbackCtx {
138139

139140
// When called, fn() will have our active pixels available in rgba.
140141
// When fn() returns, the pipeline will read back those active pixels from read_from.
141-
float rgba[4*SkRasterPipeline_kMaxStride_highp];
142+
float rgba[4*SkRasterPipeline_kMaxStride_highp_skx];
142143
float* read_from = rgba;
143144
};
144145

145146
// state shared by stack_checkpoint and stack_rewind
146147
struct SkRasterPipelineStage;
147148

148149
struct SkRasterPipeline_RewindCtx {
149-
float r[SkRasterPipeline_kMaxStride_highp];
150-
float g[SkRasterPipeline_kMaxStride_highp];
151-
float b[SkRasterPipeline_kMaxStride_highp];
152-
float a[SkRasterPipeline_kMaxStride_highp];
153-
float dr[SkRasterPipeline_kMaxStride_highp];
154-
float dg[SkRasterPipeline_kMaxStride_highp];
155-
float db[SkRasterPipeline_kMaxStride_highp];
156-
float da[SkRasterPipeline_kMaxStride_highp];
150+
float r[SkRasterPipeline_kMaxStride_highp_skx];
151+
float g[SkRasterPipeline_kMaxStride_highp_skx];
152+
float b[SkRasterPipeline_kMaxStride_highp_skx];
153+
float a[SkRasterPipeline_kMaxStride_highp_skx];
154+
float dr[SkRasterPipeline_kMaxStride_highp_skx];
155+
float dg[SkRasterPipeline_kMaxStride_highp_skx];
156+
float db[SkRasterPipeline_kMaxStride_highp_skx];
157+
float da[SkRasterPipeline_kMaxStride_highp_skx];
157158
std::byte* base;
158159
SkRasterPipelineStage* stage;
159160
};
@@ -192,14 +193,18 @@ struct SkRasterPipeline_TablesCtx {
192193

193194
using SkRPOffset = uint32_t;
194195

196+
struct SkRasterPipeline_InitLaneMasksCtx {
197+
uint8_t* tail;
198+
};
199+
195200
struct SkRasterPipeline_ConstantCtx {
196201
float value;
197202
SkRPOffset dst;
198203
};
199204

200205
struct SkRasterPipeline_UniformCtx {
201-
float *dst;
202-
const float *src;
206+
float* dst;
207+
const float* src;
203208
};
204209

205210
struct SkRasterPipeline_BinaryOpCtx {
@@ -227,20 +232,20 @@ struct SkRasterPipeline_SwizzleCtx {
227232
};
228233

229234
struct SkRasterPipeline_ShuffleCtx {
230-
float *ptr;
235+
float* ptr;
231236
int count;
232237
uint16_t offsets[16]; // values must be byte offsets (4 * highp-stride * component-index)
233238
};
234239

235240
struct SkRasterPipeline_SwizzleCopyCtx {
236-
float *dst;
237-
float *src; // src values must _not_ overlap dst values
241+
float* dst;
242+
float* src; // src values must _not_ overlap dst values
238243
uint16_t offsets[4]; // values must be byte offsets (4 * highp-stride * component-index)
239244
};
240245

241246
struct SkRasterPipeline_CopyIndirectCtx {
242-
float *dst;
243-
const float *src;
247+
float* dst;
248+
const float* src;
244249
const uint32_t *indirectOffset; // this applies to `src` or `dst` based on the op
245250
uint32_t indirectLimit; // the indirect offset is clamped to this upper bound
246251
uint32_t slots; // the number of slots to copy
@@ -254,6 +259,10 @@ struct SkRasterPipeline_BranchCtx {
254259
int offset; // contains the label ID during compilation, and the program offset when compiled
255260
};
256261

262+
struct SkRasterPipeline_BranchIfAllLanesActiveCtx : public SkRasterPipeline_BranchCtx {
263+
uint8_t* tail = nullptr; // lanes past the tail are _never_ active, so we need to exclude them
264+
};
265+
257266
struct SkRasterPipeline_BranchIfEqualCtx : public SkRasterPipeline_BranchCtx {
258267
int value;
259268
const int* ptr;

src/opts/SkOpts_skx.cpp

+34
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
/*
2+
* Copyright 2018 Google Inc.
3+
*
4+
* Use of this source code is governed by a BSD-style license that can be
5+
* found in the LICENSE file.
6+
*/
7+
8+
#include "src/core/SkOpts.h"
9+
10+
#if !defined(SK_ENABLE_OPTIMIZE_SIZE)
11+
12+
#define SK_OPTS_NS skx
13+
#include "src/opts/SkRasterPipeline_opts.h"
14+
15+
namespace SkOpts {
16+
void Init_skx() {
17+
raster_pipeline_lowp_stride = SK_OPTS_NS::raster_pipeline_lowp_stride();
18+
raster_pipeline_highp_stride = SK_OPTS_NS::raster_pipeline_highp_stride();
19+
20+
#define M(st) ops_highp[(int)SkRasterPipelineOp::st] = (StageFn)SK_OPTS_NS::st;
21+
SK_RASTER_PIPELINE_OPS_ALL(M)
22+
just_return_highp = (StageFn)SK_OPTS_NS::just_return;
23+
start_pipeline_highp = SK_OPTS_NS::start_pipeline;
24+
#undef M
25+
26+
#define M(st) ops_lowp[(int)SkRasterPipelineOp::st] = (StageFn)SK_OPTS_NS::lowp::st;
27+
SK_RASTER_PIPELINE_OPS_LOWP(M)
28+
just_return_lowp = (StageFn)SK_OPTS_NS::lowp::just_return;
29+
start_pipeline_lowp = SK_OPTS_NS::lowp::start_pipeline;
30+
#undef M
31+
}
32+
} // namespace SkOpts
33+
34+
#endif // SK_ENABLE_OPTIMIZE_SIZE

0 commit comments

Comments
 (0)