Skip to content

Commit

Permalink
Merge pull request #16 from lukel97/fractional-lmul
Browse files Browse the repository at this point in the history
add support for fractional LMUL in instruction benchmarks
  • Loading branch information
camel-cdr authored Nov 2, 2024
2 parents 7f6dcbf + 8ab1568 commit c9e4b6a
Show file tree
Hide file tree
Showing 3 changed files with 63 additions and 26 deletions.
32 changes: 16 additions & 16 deletions instructions/rvv/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,22 +4,22 @@
#define RUNS 500

// processor specific configs
// m8 m4 m2 m1
// SEW: 6310 6310
// 4268... 4268...
#define T_A 0b1111111111111111 // all
#define T_W 0b0000011101110111 // widen
#define T_WR 0b0111011101110111 // widen reduction
#define T_N 0b0000011101110111 // narrow
#define T_F 0b1100110011001100 // float
#define T_FW 0b0000010001000100 // float widen
#define T_FWR 0b0100010001000100 // float widen reduction
#define T_FN 0b0000010001000100 // float narrow
// m8 m4 m2 m1 mf2 mf4 mf8
// SEW: 6310 6310 6310 6410
// 4268... 4268... 4268... 4268...
#define T_A 0b1111111111111111111111111111 // all
#define T_W 0b0000011101110111011101110111 // widen
#define T_WR 0b0111011101110111011101110111 // widen reduction
#define T_N 0b0000011101110111011101110111 // narrow
#define T_F 0b1110111011101110111011101110 // float
#define T_FW 0b0000011001100110011001100110 // float widen
#define T_FWR 0b0110011001100110011001100110 // float widen reduction
#define T_FN 0b0000011001100110011001100110 // float narrow

#define T_E2 0b1110111011101110 // extend 2
#define T_E4 0b1100110011001100 // extend 4
#define T_E8 0b1000100010001000 // extend 8
#define T_ei16 0b1110111111111111 // no m8
#define T_E2 0b1110111011101110111011101110 // extend 2
#define T_E4 0b1100110011001100110011001100 // extend 4
#define T_E8 0b1000100010001000100010001000 // extend 8
#define T_ei16 0b1110111111111111111111111111 // no m8

// special:
#define T_m1 ((1 << 16) | T_A) // emul<=1
#define T_m1 ((1 << 28) | T_A) // emul<=1
30 changes: 27 additions & 3 deletions instructions/rvv/gen.S
Original file line number Diff line number Diff line change
Expand Up @@ -107,8 +107,8 @@ define(`m_bench_wxi',

define(`m_ifmx_t', `m_$3(shift(shift(shift($*))))')
define(`m_ifmx_f', `m_$3($4, 0, m_nop, m_unimpl)')
define(`m_ifmx',`ifelse(eval($1 <= m_LMUL && $2 >= m_LMUL),1,`m_ifmx_t($*)',`m_ifmx_f($*)')')

define(`m_ifmx_lmul_to_i', `ifelse(regexp($1,f), -1, $1, 0)')
define(`m_ifmx',`ifelse(eval($1 <= m_ifmx_lmul_to_i(m_LMUL) && $2 >= m_ifmx_lmul_to_i(m_LMUL)),1,`m_ifmx_t($*)',`m_ifmx_f($*)')')

# calls $1 with: name type setup code:vararg
define(`m_bench_all',`
Expand Down Expand Up @@ -344,32 +344,56 @@ define(`m_bench_all',`
#define defptr .dword
#endif

define(`m_gen_function_pointers', `defptr $1_m`'m_LMUL')
define(`m_gen_function_pointers', `defptr $1_m`'m_benchLMUL')
define(`m_gen_types', `defptr $2')
define(`m_format',`$2 shift(shift($*))')
define(`m_gen_strings', `.string "m_format(,shift(shift(shift($*))))"')

.balign 8
define(`m_LMUL', f8)
define(`m_benchLMUL', 1)
.global bench_m`'m_LMUL
bench_m`'m_LMUL:
m_bench_all(`gen_function_pointers')

.balign 8
define(`m_LMUL', f4)
define(`m_benchLMUL', 1)
.global bench_m`'m_LMUL
bench_m`'m_LMUL:
m_bench_all(`gen_function_pointers')

.balign 8
define(`m_LMUL', f2)
define(`m_benchLMUL', 1)
.global bench_m`'m_LMUL
bench_m`'m_LMUL:
m_bench_all(`gen_function_pointers')

.balign 8
define(`m_LMUL', 1)
define(`m_benchLMUL', 1)
.global bench_m`'m_LMUL
bench_m`'m_LMUL:
m_bench_all(`gen_function_pointers')

.balign 8
define(`m_LMUL', 2)
define(`m_benchLMUL', 2)
.global bench_m`'m_LMUL
bench_m`'m_LMUL:
m_bench_all(`gen_function_pointers')

.balign 8
define(`m_LMUL', 4)
define(`m_benchLMUL', 4)
.global bench_m`'m_LMUL
bench_m`'m_LMUL:
m_bench_all(`gen_function_pointers')

.balign 8
define(`m_LMUL', 8)
define(`m_benchLMUL', 8)
.global bench_m`'m_LMUL
bench_m`'m_LMUL:
m_bench_all(`gen_function_pointers')
Expand Down
27 changes: 20 additions & 7 deletions instructions/rvv/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ typedef ux (*BenchFunc)(void);
extern size_t bench_count;
extern char bench_names;
extern ux bench_types;
extern BenchFunc bench_m1, bench_m2, bench_m4, bench_m8;
static BenchFunc *benches[] = { &bench_m1, &bench_m2, &bench_m4, &bench_m8 };
extern BenchFunc bench_mf8, bench_mf4, bench_mf2, bench_m1, bench_m2, bench_m4, bench_m8;
static BenchFunc *benches[] = { &bench_mf8, &bench_mf4, &bench_mf2, &bench_m1, &bench_m2, &bench_m4, &bench_m8 };

extern ux run_bench(ux (*bench)(void), ux type, ux vl, ux seed);

Expand All @@ -29,22 +29,35 @@ run_all_types(char const *name, ux bIdx, ux vl, int ta, int ma)
print("<tr><td>")(s,name)("</td>");
ux mask = bIdx[&bench_types];

ux lmuls[] = { 5, 6, 7, 0, 1, 2, 3 };

for (ux sew = 0; sew < 4; ++sew)
for (ux lmul = 0; lmul < 4; ++lmul) {
for (ux lmul_idx = 0; lmul_idx < 7; ++lmul_idx) {
ux lmul = lmuls[lmul_idx];
ux vtype = lmul | (sew<<3) | (!!ta << 6) | (!!ma << 7);

if (!(mask >> (lmul*4 + sew) & 1)) {
if (!(mask >> (lmul_idx*4 + sew) & 1)) {
print("<td></td>");
continue;
}

ux lmul_val = 1 << lmul_idx; // fixed-point, denum 8
ux sew_val = 1 << (sew + 3);
// > For a given supported fractional LMUL setting,
// > implementations must support SEW settings between SEWMIN
// > and LMUL * ELEN, inclusive.
if (sew_val * 8 > lmul_val * __riscv_v_elen) {
print("<td></td>");
continue;
}

ux emul = lmul;
ux emul = lmul_idx;
if (mask == T_W || mask == T_FW || mask == T_N || mask == T_FN)
emul += 1;
if (mask == T_ei16 && sew == 0)
emul = emul < 3 ? emul+1 : 3;
emul = emul < 7 ? emul+1 : 7;
if (mask == T_m1)
emul = 1;
emul = 4; // m2
BenchFunc bench = benches[emul][bIdx];

for (ux i = 0; i < RUNS; ++i) {
Expand Down

0 comments on commit c9e4b6a

Please sign in to comment.