Skip to content

Commit

Permalink
xe: jit: gemm: add separate f4_e2m1 strategies
Browse files Browse the repository at this point in the history
  • Loading branch information
kealan-barbieri committed Jan 30, 2025
1 parent 55c338d commit 91cfc9b
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 1 deletion.
2 changes: 1 addition & 1 deletion src/gpu/intel/jit/gemm/gen_gemm_kernel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -589,7 +589,7 @@ status_t gen_gemm_nocopy_kernel_desc_t::select_kernel(compute::gpu_arch_t arch,
}
}
add_mode_matches(true, [](Type dt) -> const char * {
if (dt.isFP4()) return "H";
if (dt.isFP4()) return "[EH]";
return nullptr;
});

Expand Down
8 changes: 8 additions & 0 deletions src/gpu/intel/jit/gemm/selector/db/kernel.db
Original file line number Diff line number Diff line change
Expand Up @@ -1030,6 +1030,14 @@ auto _CATALOG_ = kcatalog::toFlatCatalog({
{{'F', "gemm", {"[FO]", "O", "S"}, {"N", "T", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {1, 1, 1}, "#I"}, "aB32 aS32 aB wg 1x4x8 kr cab4 ks32 af hi pt bk0 grf256 sys l4 dm sr br", {16, (LoopType) 255, 256, {(LoopType) 208, (LoopType) 255, (LoopType) 2}, {524288, 131072, 16777216}, {524288, 131072, 16777216}, {32, 8, 32}, {1, 4, 8}, 1, (WGType) 1, 261, 8192, 8192, {1, 1, 4}, {true, true, true}}, {'E', 17, {1.0714e+06, 814336, -806.186, 79098.3, 0, 0, 1.57352, 2.83206, 2.69271, 7.17928, 0.0520116, 0.0520116, 0, 0.665122, 1.00198, 1.00049, 9.78834e-15}}},
{{'F', "gemm", {"[FO]", "O", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {4, 8, 1}, "IAB"}, "at32x2+m64@96 am64+m32@128 aB wg 4x2x4 kr xaf st hi pt sr br sb128 bk0 sm sn grf256 sys kv afb", {16, (LoopType) 255, 256, {(LoopType) 208, (LoopType) 255, (LoopType) 2}, {262144, 262144, 16777216}, {262144, 262144, 64}, {16, 16, 64}, {4, 2, 4}, 1, (WGType) 1, 445, 0, 8192, {4, 8, 4}, {true, true, true}}, {'E', 17, {1.12606e+06, -137008, -24369.3, 228077, 2.14139e+06, 1.80224e+06, 0.232306, 0.352856, 0.366483, 1.01866, 0.0096663, 0.00801051, 0.00318484, 0.842293, 1.37613, 0.921541, 4.97764e-12}}},
{{'F', "gemm", {"[FO]", "O", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {4, 8, 1}, "ABIps"}, "at32+m128@96 am64+m64@112 aB wg 4x8 xaf st hi pt sr br sb128 bk0 sm sn grf256 sys kv afb", {16, (LoopType) 255, 256, {(LoopType) 208, (LoopType) 255, (LoopType) 255}, {1048576, 655360, 16777216}, {1048576, 655360, 64}, {16, 40, 32}, {4, 8, 1}, 1, (WGType) 1, 441, 0, 0, {4, 8, 4}, {true, true, true}}, {'E', 17, {884649, 705009, 0, 0, 6.08092e+06, 1.03629e+07, 0.496687, 0.364134, 0.840897, 1.28383, 0.00200956, 0.00200956, 0, 1, 2.31371, 1.15477, 1.64496e-12}}},
{{'F', "gemm", {"[EH]", "[EH]", "S"}, {"N", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, 16, -1}, {1, 1, 1}, "#I"}, "aB16+m32@32 aB32 aB wg 2x8 af vav li nmk pt sr br ca3 bk0 sys kv dm afb l4", {16, (LoopType) 255, 128, {(LoopType) 225, (LoopType) 255, (LoopType) 255}, {1048576, 32768, 16777216}, {1048576, 32768, 32}, {64, 2, 32}, {2, 8, 1}, 1, (WGType) 1, 441, 24576, 0, {4, 2, 4}, {true, true, true}}, {'E', 17, {1.32162e+06, 161954, 0, 0, 2.32817e+06, 0, 0.71806, 4.15517, 0.786689, 1.40778, 0.0341164, 0.0131941, 0.0256486, 0.947188, 1.39057, 0.987284, 5.0128e-12}}},
{{'F', "gemm", {"[EH]", "[EH]", "S"}, {"N", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {8, 8, 1}, "ABI"}, "av64+m64@64 am32x2+m64@32 aB wg 4x8 xaf vav hi pt sr br sb64 bk0 sn grf256 sys", {16, (LoopType) 255, 256, {(LoopType) 208, (LoopType) 255, (LoopType) 255}, {262144, 131072, 16777216}, {262144, 131072, 16777216}, {16, 8, 64}, {4, 8, 1}, 1, (WGType) 1, 257, 0, 0, {8, 8, 4}, {true, true, true}}, {'E', 17, {888453, 388680, 0, 0, 0, 0, 1.36981, 1.36706, 1.57389, 2.50026, 0.0230892, 0.0230892, 0, 0.779666, 1.13216, 0.970824, 1.11098e-12}}},
{{'F', "gemm", {"[EH]", "[EH]", "S"}, {"N", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {8, 8, 1}, "ABI"}, "av16+m64@64 am32+m64@64 aB wg 4x8 af vav hi pt sr br sb256 bk0 sn grf256 sys", {16, (LoopType) 255, 256, {(LoopType) 208, (LoopType) 255, (LoopType) 255}, {524288, 262144, 16777216}, {524288, 262144, 16777216}, {32, 16, 32}, {4, 8, 1}, 1, (WGType) 1, 257, 0, 0, {8, 8, 4}, {true, true, true}}, {'E', 17, {872440, 433979, 0, 0, 0, 0, 0.692755, 0.929392, 0.682568, 1.28977, 0.00829318, 0.00829318, 0, 0.933146, 1.44966, 1.06633, 2.17433e-12}}},
{{'F', "gemm", {"[EH]", "[EH]", "S"}, {"N", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {8, 8, 1}, "ABI"}, "av16+m64@64 am16 aB wg 8x4 cb4x2 ks32 xaf vav hi pt sr br bk0 sn grf256 sys", {16, (LoopType) 255, 256, {(LoopType) 208, (LoopType) 255, (LoopType) 255}, {524288, 524288, 16777216}, {524288, 524288, 16777216}, {32, 32, 32}, {8, 4, 1}, 1, (WGType) 1, 257, 32768, 0, {8, 8, 4}, {true, true, true}}, {'E', 17, {1.00706e+06, 522382, 0, 0, 0, 0, 0.725659, 1.44632, 0.970408, 1.74134, 0.0067111, 0.0067111, 0, 0.90349, 1.42986, 1.13348, 2.91269e-12}}},
{{'F', "gemm", {"[EH]", "[EH]", "S"}, {"N", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {8, 8, 1}, "ABIps"}, "av32+m16@64 am32+m32@72 aB wg 8x4 xaf vav hi pt sr br sb64 bk0 sn grf256 sys kv afb", {16, (LoopType) 255, 256, {(LoopType) 208, (LoopType) 255, (LoopType) 255}, {524288, 786432, 16777216}, {524288, 786432, 64}, {32, 16, 32}, {8, 4, 1}, 1, (WGType) 1, 441, 0, 0, {8, 8, 4}, {true, true, true}}, {'E', 17, {876646, 564122, 0, 0, 6.5151e+06, 7.83974e+06, 0.629669, 0.87362, 0.885543, 1.48097, 0.00440774, 0.00440774, 0, 1, 1.66234, 1.24996, 2.85794e-12}}},
{{'F', "gemm", {"[EH]", "[EH]", "S"}, {"N", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {8, 8, 1}, "ABI"}, "av16+m16@32 am16x2 aB wg 4x4x2 kr cb4 ks16 xaf st vav hi pt sr br bk0 sn grf256 sys", {16, (LoopType) 255, 256, {(LoopType) 208, (LoopType) 255, (LoopType) 2}, {1048576, 524288, 16777216}, {1048576, 524288, 16777216}, {64, 32, 16}, {4, 4, 2}, 1, (WGType) 1, 261, 16384, 65536, {8, 8, 4}, {true, true, true}}, {'E', 17, {1.02495e+06, 13797.9, 15430.1, 758509, 0, 0, 0.535333, 1.20812, 0.912657, 1.84068, 0.00529983, 0.00529983, 0, 1, 1.60581, 1.15873, 3.51036e-12}}},
{{'F', "gemm", {"[EH]", "[EH]", "S"}, {"N", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {8, 8, 1}, "ABIqps"}, "av16+m32@72 am32+m32@64 aB wg 4x8 xaf vav hi pt sr br sb64 bk0 sn grf256 sys kv afb", {16, (LoopType) 255, 256, {(LoopType) 208, (LoopType) 255, (LoopType) 255}, {1048576, 655360, 16777216}, {1048576, 655360, 32}, {64, 24, 32}, {4, 8, 1}, 1, (WGType) 1, 441, 0, 0, {8, 8, 4}, {true, true, true}}, {'E', 17, {903365, 697556, 0, 0, 8.2903e+06, 1.21651e+07, 0.724506, 0.722081, 0.92287, 1.55416, 0.00402055, 0.00402055, 0, 0.997691, 1.6726, 1.18622, 5.18793e-12}}},
{{'F', "gemm", {"[EH]", "[EH]", "S"}, {"N", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {8, 8, 1}, "ABI"}, "av16+m32@40 am32+m32@32 aB wg 2x8x2 kr xaf st vav hi pt sr br sb64 bk0 sn grf256 sys", {16, (LoopType) 255, 256, {(LoopType) 208, (LoopType) 255, (LoopType) 2}, {1048576, 131072, 16777216}, {1048576, 131072, 16777216}, {64, 8, 32}, {2, 8, 2}, 1, (WGType) 1, 261, 0, 32768, {8, 8, 4}, {true, true, true}}, {'E', 17, {1.06324e+06, 220443, 364.907, 276934, 0, 0, 0.524524, 1.25881, 0.793843, 2.21167, 0.00974309, 0.00974309, 0, 0.984682, 1.55809, 1.03396, 4.08729e-12}}},
{{'F', "gemm", {"[SB]", "[SB]", "S"}, {"N", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {1, 1, 1}, "I"}, "aB16 aB16 aB wg 4x8 cab4x2 ks16 af vav hi pt bk0 sn grf256 sys sr br", {16, (LoopType) 255, 256, {(LoopType) 208, (LoopType) 255, (LoopType) 255}, {1048576, 524288, 16777216}, {8192, 8192, 16777216}, {64, 32, 16}, {4, 8, 1}, 1, (WGType) 1, 257, 65536, 0, {4, 4, 4}, {true, true, true}}, {'E', 17, {1.11006e+06, 935686, 0, 0, 0, 0, 1.58314, 3.00527, 1.01282, 1.59913, 0.00625344, 0.00625344, 0, 1, 1.56406, 1.11642, 3.07212e-12}}},
{{'F', "gemm", {"[SB]", "[SB]", "S"}, {"N", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {1, 1, 1}, "Ip"}, "aB16x2 aB16x2 aB wg 4x8 cab4 ks32 af vav hi pt bk0 sn grf256 sys dm sr br", {16, (LoopType) 255, 256, {(LoopType) 208, (LoopType) 255, (LoopType) 255}, {1048576, 262144, 16777216}, {8192, 8192, 16777216}, {64, 16, 32}, {4, 8, 1}, 1, (WGType) 1, 257, 98304, 0, {4, 4, 4}, {true, true, true}}, {'E', 17, {1.07316e+06, 789496, 0, 0, 0, 0, 1.617, 1.63049, 0.937992, 1.68308, 0.0104723, 0.0104723, 0, 0.85096, 1.32269, 1.03329, 2.07642e-12}}},
{{'F', "gemm", {"[SB]", "[SB]", "S"}, {"N", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {1, 1, 1}, "I"}, "aB64 aB64 aB wg 4x8 cab4 ks64 af vav hi pt bk0 sn grf256 sys dm sr br", {16, (LoopType) 255, 256, {(LoopType) 208, (LoopType) 255, (LoopType) 255}, {524288, 262144, 16777216}, {8192, 8192, 16777216}, {32, 16, 64}, {4, 8, 1}, 1, (WGType) 1, 257, 131072, 0, {4, 4, 4}, {true, true, true}}, {'E', 17, {1.07471e+06, 779135, 0, 0, 0, 0, 1.55845, 1.44307, 0.897078, 1.82423, 0.0150498, 0.0150498, 0, 0.56959, 1.2821, 0.858742, 5.47604e-12}}},
Expand Down

0 comments on commit 91cfc9b

Please sign in to comment.