diff --git a/microbm/cm.csv b/microbm/cm.csv index 1b058cb..6f359dd 100644 --- a/microbm/cm.csv +++ b/microbm/cm.csv @@ -1,42 +1,96 @@ -fneg,float,7 -fadd,float,7 -fsub,float,7 -fmul,float,7 -fdiv,float,7 -fcmp,float,6 -fpext_float_to_double,float,7 -fmuladd,float,7 -sin,float,42 -cos,float,40 -tan,float,75 -exp,float,44 -log,float,29 -sqrt,float,12 -expm1,float,33 -log1p,float,32 -cbrt,float,175 -pow,float,53 -fabs,float,7 -hypot,float,251 -fma,float,30 -fneg,double,7 -fadd,double,7 -fsub,double,7 -fmul,double,7 -fdiv,double,11 -fcmp,double,6 -fptrunc_double_to_float,double,7 -fmuladd,double,7 -sin,double,351 -cos,double,353 -tan,double,406 -exp,double,70 -log,double,33 -sqrt,double,21 -expm1,double,30 -log1p,double,42 -cbrt,double,98 -pow,double,104 -fabs,double,7 -hypot,double,30 -fma,double,29 \ No newline at end of file +fneg,float,76 +fadd,float,75 +fsub,float,75 +fmul,float,75 +fdiv,float,75 +fcmp,float,62 +fpext_float_to_double,float,75 +fmuladd,float,75 +sin,float,416 +cos,float,415 +tan,float,971 +exp,float,459 +log,float,308 +sqrt,float,122 +expm1,float,502 +log1p,float,484 +cbrt,float,3042 +pow,float,546 +fabs,float,75 +fma,float,75 +maxnum,float,75 +minnum,float,75 +ceil,float,75 +floor,float,75 +exp2,float,468 +log10,float,517 +log2,float,309 +rint,float,299 +round,float,304 +trunc,float,75 +copysign,float,75 +fdim,float,306 +fmod,float,312 +asin,float,351 +acos,float,309 +atan,float,797 +atan2,float,2598 +sinh,float,451 +cosh,float,323 +tanh,float,511 +asinh,float,479 +acosh,float,338 +atanh,float,358 +hypot,float,1831 +erf,float,318 +lgamma,float,1647 +tgamma,float,3278 +remainder,float,2812 +fneg,double,77 +fadd,double,76 +fsub,double,77 +fmul,double,77 +fdiv,double,116 +fcmp,double,63 +fptrunc_double_to_float,double,76 +fmuladd,double,75 +sin,double,3505 +cos,double,3524 +tan,double,4144 +exp,double,711 +log,double,340 +sqrt,double,214 +expm1,double,305 +log1p,double,428 +cbrt,double,991 +pow,double,1182 +fabs,double,75 +fma,double,75 +maxnum,double,75 +minnum,double,75 +ceil,double,75 +floor,double,75 +exp2,double,397 +log10,double,586 +log2,double,315 +rint,double,295 +round,double,297 +trunc,double,74 +copysign,double,74 +fdim,double,299 +fmod,double,303 +asin,double,550 +acos,double,540 +atan,double,767 +atan2,double,1129 +sinh,double,626 +cosh,double,535 +tanh,double,309 +asinh,double,479 +acosh,double,358 +atanh,double,326 +hypot,double,496 +erf,double,310 +lgamma,double,1867 +tgamma,double,1859 +remainder,double,1006 diff --git a/microbm/microbm.py b/microbm/microbm.py index 6ece918..7adf679 100644 --- a/microbm/microbm.py +++ b/microbm/microbm.py @@ -1,9 +1,8 @@ import time import csv -import os import struct -import numpy as np import random +import numpy as np import llvmlite.binding as llvm import ctypes @@ -15,77 +14,9 @@ FAST_MATH_FLAG = "reassoc nsz arcp contract afn" -instructions = ["fneg", "fadd", "fsub", "fmul", "fdiv", "fcmp", "fptrunc", "fpext"] -functions = [ - "fmuladd", - "sin", - "cos", - "tan", - "exp", - "log", - "sqrt", - "expm1", - "log1p", - "cbrt", - "pow", - "fabs", - "fma", - "maxnum", - "minnum", - "ceil", - "floor", - "exp2", - "log10", - "log2", - "rint", - "round", - "trunc", - "copysign", - "fdim", - "fmod", - "asin", - "acos", - "atan", - "atan2", - "sinh", - "cosh", - "tanh", - "asinh", - "acosh", - "atanh", - "hypot", - "erf", - "lgamma", - "tgamma", - "remainder", -] -functions_with_intrinsics = { - "sin", - "cos", - "exp", - "log", - "sqrt", - "pow", - "fabs", - "fma", - "maxnum", - "minnum", - "ceil", - "floor", - "exp2", - "log10", - "log2", - "rint", - "round", - "trunc", - "copysign", - "fdim", - "fmod", -} - -precisions = ["float", "double"] -iterations = 1 -unrolled = 1 +unrolled = 128 +iterations = 100000000 +AMPLIFIER = 10 precision_to_llvm_type = { "double": "double", @@ -107,14 +38,11 @@ precision_ranks = {"bf16": 0, "half": 1, "float": 2, "double": 3, "fp80": 4, "fp128": 5} precisions_ordered = ["bf16", "half", "float", "double", "fp80", "fp128"] +precisions = ["float", "double"] def get_zero_literal(precision): - if precision == "double": - return "0.0" - elif precision == "float": - return "0.0" - elif precision == "half": + if precision in ("double", "float", "half"): return "0.0" elif precision == "bf16": return "0xR0000" @@ -122,8 +50,7 @@ def get_zero_literal(precision): return "0xK00000000000000000000" elif precision == "fp128": return "0xL00000000000000000000000000000000" - else: - return "0.0" + return "0.0" def float64_to_fp80_bytes(value: np.float64) -> bytes: @@ -242,12 +169,71 @@ def generate_random_fp(precision): return dtype(f).item() -def generate_baseline_code(iterations): - return f""" +OP_INFO = { + "fneg": {"llvm_instr": "fneg", "num_operands": 1, "kind": "arithmetic"}, + "fadd": {"llvm_instr": "fadd", "num_operands": 2, "kind": "arithmetic"}, + "fsub": {"llvm_instr": "fsub", "num_operands": 2, "kind": "arithmetic"}, + "fmul": {"llvm_instr": "fmul", "num_operands": 2, "kind": "arithmetic"}, + "fdiv": {"llvm_instr": "fdiv", "num_operands": 2, "kind": "arithmetic"}, + "fcmp": {"llvm_instr": "fcmp", "num_operands": 2, "kind": "compare"}, + "fptrunc": {"llvm_instr": "fptrunc", "num_operands": 1, "kind": "cast"}, + "fpext": {"llvm_instr": "fpext", "num_operands": 1, "kind": "cast"}, +} + +FUNC_INFO = { + "fmuladd": {"intrinsic": "llvm.fmuladd", "num_operands": 3}, + "sin": {"intrinsic": "llvm.sin", "num_operands": 1}, + "cos": {"intrinsic": "llvm.cos", "num_operands": 1}, + "tan": {"intrinsic": None, "num_operands": 1}, + "exp": {"intrinsic": "llvm.exp", "num_operands": 1}, + "log": {"intrinsic": "llvm.log", "num_operands": 1}, + "sqrt": {"intrinsic": "llvm.sqrt", "num_operands": 1}, + "expm1": {"intrinsic": None, "num_operands": 1}, + "log1p": {"intrinsic": None, "num_operands": 1}, + "cbrt": {"intrinsic": None, "num_operands": 1}, + "pow": {"intrinsic": "llvm.pow", "num_operands": 2}, + "fabs": {"intrinsic": "llvm.fabs", "num_operands": 1}, + "fma": {"intrinsic": "llvm.fma", "num_operands": 3}, + "maxnum": {"intrinsic": "llvm.maxnum", "num_operands": 2}, + "minnum": {"intrinsic": "llvm.minnum", "num_operands": 2}, + "ceil": {"intrinsic": "llvm.ceil", "num_operands": 1}, + "floor": {"intrinsic": "llvm.floor", "num_operands": 1}, + "exp2": {"intrinsic": "llvm.exp2", "num_operands": 1}, + "log10": {"intrinsic": "llvm.log10", "num_operands": 1}, + "log2": {"intrinsic": "llvm.log2", "num_operands": 1}, + "rint": {"intrinsic": "llvm.rint", "num_operands": 1}, + "round": {"intrinsic": "llvm.round", "num_operands": 1}, + "trunc": {"intrinsic": "llvm.trunc", "num_operands": 1}, + "copysign": {"intrinsic": "llvm.copysign", "num_operands": 2}, + "fdim": {"intrinsic": None, "num_operands": 2}, + "fmod": {"intrinsic": None, "num_operands": 2}, + "asin": {"intrinsic": None, "num_operands": 1}, + "acos": {"intrinsic": None, "num_operands": 1}, + "atan": {"intrinsic": None, "num_operands": 1}, + "atan2": {"intrinsic": None, "num_operands": 2}, + "sinh": {"intrinsic": None, "num_operands": 1}, + "cosh": {"intrinsic": None, "num_operands": 1}, + "tanh": {"intrinsic": None, "num_operands": 1}, + "asinh": {"intrinsic": None, "num_operands": 1}, + "acosh": {"intrinsic": None, "num_operands": 1}, + "atanh": {"intrinsic": None, "num_operands": 1}, + "hypot": {"intrinsic": None, "num_operands": 2}, + "erf": {"intrinsic": None, "num_operands": 1}, + "lgamma": {"intrinsic": None, "num_operands": 1}, + "tgamma": {"intrinsic": None, "num_operands": 1}, + "remainder": {"intrinsic": None, "num_operands": 2}, +} + + +def generate_loop_code(llvm_type, iterations, body_instructions, final_acc_reg): + zero_literal = get_zero_literal(llvm_type) + code = f""" define i32 @main() optnone noinline {{ entry: %i = alloca i32 + %acc = alloca {llvm_type} store i32 0, i32* %i + store {llvm_type} {zero_literal}, {llvm_type}* %acc br label %loop loop: @@ -256,32 +242,69 @@ def generate_baseline_code(iterations): br i1 %cond, label %body, label %exit body: + %acc_val0 = load {llvm_type}, {llvm_type}* %acc +{body_instructions} + store {llvm_type} {final_acc_reg}, {llvm_type}* %acc %i_next = add i32 %i_val, 1 store i32 %i_next, i32* %i br label %loop exit: + %final_acc = load {llvm_type}, {llvm_type}* %acc + call void @use({llvm_type} %final_acc) ret i32 0 }} + +define void @use({llvm_type} %val) {{ + ret void +}} """ + return code -def generate_llvm_code(instruction, src_precision, dst_precision, iterations): - src_type = precision_to_llvm_type.get(src_precision) - dst_type = precision_to_llvm_type.get(dst_precision) - if not src_type or not dst_type: - return "" - zero_literal_dst = get_zero_literal(dst_precision) - if instruction == "fptrunc": - random_fps = [generate_random_fp(src_precision) for _ in range(unrolled)] - hex_fps = [float_to_llvm_hex(f, src_precision) for f in random_fps] - code = f""" +def generate_arithmetic_op_code(op_key, precision, iterations): + """Generate LLVM IR for a basic arithmetic operator (or fneg) based on OP_INFO.""" + op_info = OP_INFO[op_key] + llvm_type = precision_to_llvm_type[precision] + body_lines = "" + for idx in range(unrolled): + operands = [] + for _ in range(op_info["num_operands"]): + f_val = generate_random_fp(precision) + operands.append(float_to_llvm_hex(f_val, precision)) + if op_info["num_operands"] == 1: + line = f" %result{idx} = {op_info['llvm_instr']} {FAST_MATH_FLAG} {llvm_type} {operands[0]}" + elif op_info["num_operands"] == 2: + line = f" %result{idx} = {op_info['llvm_instr']} {FAST_MATH_FLAG} {llvm_type} {operands[0]}, {operands[1]}" + body_lines += line + "\n" + body_lines += f" %acc_val{idx+1} = fadd {FAST_MATH_FLAG} {llvm_type} %acc_val{idx}, %result{idx}\n" + final_acc = f"%acc_val{unrolled}" + return generate_loop_code(llvm_type, iterations, body_lines, final_acc) + + +def generate_compare_op_code(precision, iterations): + """Generate LLVM IR for an fcmp (comparison) operation.""" + llvm_type = precision_to_llvm_type[precision] + body_lines = "" + for idx in range(unrolled): + f_a = generate_random_fp(precision) + f_b = generate_random_fp(precision) + a_hex = float_to_llvm_hex(f_a, precision) + b_hex = float_to_llvm_hex(f_b, precision) + line = f" %cmp{idx} = fcmp {FAST_MATH_FLAG} olt {llvm_type} {a_hex}, {b_hex}" + body_lines += line + "\n" + body_lines += f" %cmp_int{idx} = zext i1 %cmp{idx} to i32\n" + body_lines += " %acc_val0 = load i32, i32* %acc\n" + for idx in range(unrolled): + body_lines += f" %acc_val{idx+1} = add i32 %acc_val{idx}, %cmp_int{idx}\n" + final_acc = f"%acc_val{unrolled}" + code = f""" define i32 @main() optnone noinline {{ entry: %i = alloca i32 - %acc = alloca {dst_type} + %acc = alloca i32 store i32 0, i32* %i - store {dst_type} {zero_literal_dst}, {dst_type}* %acc + store i32 0, i32* %acc br label %loop loop: @@ -290,38 +313,46 @@ def generate_llvm_code(instruction, src_precision, dst_precision, iterations): br i1 %cond, label %body, label %exit body: - %acc_val0 = load {dst_type}, {dst_type}* %acc -""" - for idx, hex_a in enumerate(hex_fps): - code += f" %result{idx} = fptrunc {src_type} {hex_a} to {dst_type}\n" - code += f" %acc_val{idx+1} = fadd {FAST_MATH_FLAG} {dst_type} %acc_val{idx}, %result{idx}\n" - code += f""" - store {dst_type} %acc_val{len(hex_fps)}, {dst_type}* %acc +{body_lines} + store i32 {final_acc}, i32* %acc %i_next = add i32 %i_val, 1 store i32 %i_next, i32* %i br label %loop exit: - %final_acc = load {dst_type}, {dst_type}* %acc - call void @use({dst_type} %final_acc) + %final_acc = load i32, i32* %acc + call void @use_i32(i32 %final_acc) ret i32 0 }} -define void @use({dst_type} %val) {{ +define void @use_i32(i32 %val) {{ ret void }} """ - return code - elif instruction == "fpext": - random_fps = [generate_random_fp(src_precision) for _ in range(unrolled)] - hex_fps = [float_to_llvm_hex(f, src_precision) for f in random_fps] - code = f""" + return code + + +def generate_cast_op_code(op_key, src_precision, dst_precision, iterations): + """Generate LLVM IR for a cast operation (fptrunc or fpext).""" + op_info = OP_INFO[op_key] + src_type = precision_to_llvm_type[src_precision] + dst_type = precision_to_llvm_type[dst_precision] + zero_literal = get_zero_literal(dst_precision) + body_lines = "" + for idx in range(unrolled): + f_val = generate_random_fp(src_precision) + hex_val = float_to_llvm_hex(f_val, src_precision) + line = f" %result{idx} = {op_info['llvm_instr']} {src_type} {hex_val} to {dst_type}" + body_lines += line + "\n" + body_lines += f" %acc_val{idx+1} = fadd {FAST_MATH_FLAG} {dst_type} %acc_val{idx}, %result{idx}\n" + final_acc = f"%acc_val{unrolled}" + code = f""" define i32 @main() optnone noinline {{ entry: %i = alloca i32 %acc = alloca {dst_type} store i32 0, i32* %i - store {dst_type} {zero_literal_dst}, {dst_type}* %acc + store {dst_type} {zero_literal}, {dst_type}* %acc br label %loop loop: @@ -331,12 +362,8 @@ def generate_llvm_code(instruction, src_precision, dst_precision, iterations): body: %acc_val0 = load {dst_type}, {dst_type}* %acc -""" - for idx, hex_a in enumerate(hex_fps): - code += f" %result{idx} = fpext {src_type} {hex_a} to {dst_type}\n" - code += f" %acc_val{idx+1} = fadd {FAST_MATH_FLAG} {dst_type} %acc_val{idx}, %result{idx}\n" - code += f""" - store {dst_type} %acc_val{len(hex_fps)}, {dst_type}* %acc +{body_lines} + store {dst_type} {final_acc}, {dst_type}* %acc %i_next = add i32 %i_val, 1 store i32 %i_next, i32* %i br label %loop @@ -351,65 +378,44 @@ def generate_llvm_code(instruction, src_precision, dst_precision, iterations): ret void }} """ - return code - return "" + return code -def generate_llvm_code_other(instruction, precision, iterations): +def generate_function_call_code(func_name, precision, iterations): + """Generate LLVM IR for a function call based on FUNC_INFO.""" + func_info = FUNC_INFO[func_name] llvm_type = precision_to_llvm_type[precision] - zero_literal = get_zero_literal(precision) - if instruction in ["fadd", "fsub", "fmul", "fdiv"]: - op_map = {"fadd": "fadd", "fsub": "fsub", "fmul": "fmul", "fdiv": "fdiv"} - op = op_map[instruction] - random_pairs = [(generate_random_fp(precision), generate_random_fp(precision)) for _ in range(unrolled)] - hex_pairs = [(float_to_llvm_hex(a, precision), float_to_llvm_hex(b, precision)) for a, b in random_pairs] - code = f""" -define i32 @main() optnone noinline {{ -entry: - %i = alloca i32 - %acc = alloca {llvm_type} - store i32 0, i32* %i - store {llvm_type} {zero_literal}, {llvm_type}* %acc - br label %loop - -loop: - %i_val = load i32, i32* %i - %cond = icmp slt i32 %i_val, {iterations} - br i1 %cond, label %body, label %exit - -body: - %acc_val0 = load {llvm_type}, {llvm_type}* %acc -""" - for idx, (hex_a, hex_b) in enumerate(hex_pairs): - code += f" %result{idx} = {op} {FAST_MATH_FLAG} {llvm_type} {hex_a}, {hex_b}\n" - code += f" %acc_val{idx+1} = fadd {FAST_MATH_FLAG} {llvm_type} %acc_val{idx}, %result{idx}\n" - code += f""" - store {llvm_type} %acc_val{len(hex_pairs)}, {llvm_type}* %acc - %i_next = add i32 %i_val, 1 - store i32 %i_next, i32* %i - br label %loop - -exit: - %final_acc = load {llvm_type}, {llvm_type}* %acc - call void @use({llvm_type} %final_acc) - ret i32 0 -}} - -define void @use({llvm_type} %val) {{ - ret void -}} -""" - return code - elif instruction == "fneg": - random_fps = [generate_random_fp(precision) for _ in range(unrolled)] - hex_fps = [float_to_llvm_hex(f, precision) for f in random_fps] - code = f""" + intrinsic_suffix = precision_to_intrinsic_suffix.get(precision, "") + if func_info["intrinsic"]: + fn = f"{func_info['intrinsic']}.{intrinsic_suffix}" + else: + fn = func_name + num_operands = func_info["num_operands"] + body_lines = "" + for idx in range(unrolled): + operands = [] + for _ in range(num_operands): + f_val = generate_random_fp(precision) + operands.append(float_to_llvm_hex(f_val, precision)) + if num_operands == 1: + call_str = f"call {FAST_MATH_FLAG} {llvm_type} @{fn}({llvm_type} {operands[0]})" + elif num_operands == 2: + call_str = f"call {FAST_MATH_FLAG} {llvm_type} @{fn}({llvm_type} {operands[0]}, {llvm_type} {operands[1]})" + elif num_operands == 3: + call_str = f"call {FAST_MATH_FLAG} {llvm_type} @{fn}({llvm_type} {operands[0]}, {llvm_type} {operands[1]}, {llvm_type} {operands[2]})" + else: + call_str = "" + body_lines += f" %result{idx} = {call_str}\n" + body_lines += f" %acc_val{idx+1} = fadd {FAST_MATH_FLAG} {llvm_type} %acc_val{idx}, %result{idx}\n" + decl = f"declare {llvm_type} @{fn}({', '.join([llvm_type]*num_operands)})" + code = f""" +{decl} define i32 @main() optnone noinline {{ entry: %i = alloca i32 %acc = alloca {llvm_type} store i32 0, i32* %i - store {llvm_type} {zero_literal}, {llvm_type}* %acc + store {llvm_type} {get_zero_literal(precision)}, {llvm_type}* %acc br label %loop loop: @@ -419,12 +425,8 @@ def generate_llvm_code_other(instruction, precision, iterations): body: %acc_val0 = load {llvm_type}, {llvm_type}* %acc -""" - for idx, hex_a in enumerate(hex_fps): - code += f" %result{idx} = fneg {FAST_MATH_FLAG} {llvm_type} {hex_a}\n" - code += f" %acc_val{idx+1} = fadd {FAST_MATH_FLAG} {llvm_type} %acc_val{idx}, %result{idx}\n" - code += f""" - store {llvm_type} %acc_val{len(hex_fps)}, {llvm_type}* %acc +{body_lines} + store {llvm_type} %acc_val{unrolled}, {llvm_type}* %acc %i_next = add i32 %i_val, 1 store i32 %i_next, i32* %i br label %loop @@ -439,88 +441,15 @@ def generate_llvm_code_other(instruction, precision, iterations): ret void }} """ - return code - elif instruction == "fcmp": - code = f""" -define i32 @main() optnone noinline {{ -entry: - %i = alloca i32 - %acc = alloca i32 - store i32 0, i32* %i - store i32 0, i32* %acc - br label %loop - -loop: - %i_val = load i32, i32* %i - %cond = icmp slt i32 %i_val, {iterations} - br i1 %cond, label %body, label %exit - -body: -""" - for idx in range(unrolled): - a = generate_random_fp(precision) - b = generate_random_fp(precision) - hex_a = float_to_llvm_hex(a, precision) - hex_b = float_to_llvm_hex(b, precision) - code += f" %cmp{idx} = fcmp {FAST_MATH_FLAG} olt {llvm_type} {hex_a}, {hex_b}\n" - code += f" %cmp_int{idx} = zext i1 %cmp{idx} to i32\n" - code += " %acc_val0 = load i32, i32* %acc\n" - for idx in range(unrolled): - code += f" %acc_val{idx+1} = add i32 %acc_val{idx}, %cmp_int{idx}\n" - code += f""" - store i32 %acc_val{unrolled}, i32* %acc - %i_next = add i32 %i_val, 1 - store i32 %i_next, i32* %i - br label %loop - -exit: - %final_acc = load i32, i32* %acc - call void @use_i32(i32 %final_acc) - ret i32 0 -}} - -define void @use_i32(i32 %val) {{ - ret void -}} -""" - return code - return "" + return code -def generate_llvm_function_call(function_name, precision, iterations): - print(f"DEBUG: Generating LLVM code for {function_name} @ {precision}") - llvm_type = precision_to_llvm_type[precision] - intrinsic_suffix = precision_to_intrinsic_suffix.get(precision) - if not intrinsic_suffix: - return "" - zero_literal = get_zero_literal(precision) - if function_name in ["pow", "maxnum", "minnum", "atan2", "copysign", "fdim", "fmod"]: - fn = f"llvm.pow.{intrinsic_suffix}" - decl = f"declare {llvm_type} @{fn}({llvm_type}, {llvm_type})" - tmpl = f"call {FAST_MATH_FLAG} {llvm_type} @{fn}({llvm_type} {{arg1}}, {llvm_type} {{arg2}})" - elif function_name in ["fma", "fmuladd"]: - fn = f"llvm.{function_name}.{intrinsic_suffix}" - decl = f"declare {llvm_type} @{fn}({llvm_type}, {llvm_type}, {llvm_type})" - tmpl = ( - f"call {FAST_MATH_FLAG} {llvm_type} @{fn}({llvm_type} {{arg1}}, {llvm_type} {{arg2}}, {llvm_type} {{arg3}})" - ) - elif function_name in functions_with_intrinsics: - fn = f"llvm.{function_name}.{intrinsic_suffix}" - decl = f"declare {llvm_type} @{fn}({llvm_type})" - tmpl = f"call {FAST_MATH_FLAG} {llvm_type} @{fn}({llvm_type} {{arg1}})" - else: - fn = function_name - decl = f"declare {llvm_type} @{fn}({llvm_type})" - tmpl = f"call {FAST_MATH_FLAG} {llvm_type} @{fn}({llvm_type} {{arg1}})" - code = ( - decl - + f""" +def generate_baseline_code(iterations): + return f""" define i32 @main() optnone noinline {{ entry: %i = alloca i32 - %acc = alloca {llvm_type} store i32 0, i32* %i - store {llvm_type} {zero_literal}, {llvm_type}* %acc br label %loop loop: @@ -529,47 +458,14 @@ def generate_llvm_function_call(function_name, precision, iterations): br i1 %cond, label %body, label %exit body: - %acc_val0 = load {llvm_type}, {llvm_type}* %acc -""" - ) - for idx in range(unrolled): - if function_name in ["pow", "hypot", "atan2", "maxnum", "minnum", "copysign", "fdim", "fmod"]: - a = generate_random_fp(precision) - b = generate_random_fp(precision) - hex_a = float_to_llvm_hex(a, precision) - hex_b = float_to_llvm_hex(b, precision) - call_ = tmpl.format(arg1=hex_a, arg2=hex_b) - elif function_name in ["fma", "fmuladd"]: - a = generate_random_fp(precision) - b = generate_random_fp(precision) - c = generate_random_fp(precision) - hex_a = float_to_llvm_hex(a, precision) - hex_b = float_to_llvm_hex(b, precision) - hex_c = float_to_llvm_hex(c, precision) - call_ = tmpl.format(arg1=hex_a, arg2=hex_b, arg3=hex_c) - else: - a = generate_random_fp(precision) - hex_a = float_to_llvm_hex(a, precision) - call_ = tmpl.format(arg1=hex_a) - code += f" %result{idx} = {call_}\n" - code += f" %acc_val{idx+1} = fadd {FAST_MATH_FLAG} {llvm_type} %acc_val{idx}, %result{idx}\n" - code += f""" - store {llvm_type} %acc_val{unrolled}, {llvm_type}* %acc %i_next = add i32 %i_val, 1 store i32 %i_next, i32* %i br label %loop exit: - %final_acc = load {llvm_type}, {llvm_type}* %acc - call void @use({llvm_type} %final_acc) ret i32 0 }} - -define void @use({llvm_type} %val) {{ - ret void -}} """ - return code def create_execution_engine(): @@ -596,19 +492,18 @@ def run_llvm_ir_jit(llvm_ir): csv_file = "results.csv" - with open(csv_file, "w", newline="") as csvfile: fieldnames = ["instruction", "precision", "cost"] writer = csv.DictWriter(csvfile, fieldnames=fieldnames) - writer.writeheader() llvm_code = generate_baseline_code(iterations) print("DEBUG: Running baseline") baseline_time, _ = run_llvm_ir_jit(llvm_code) for precision in precisions: - for instr in instructions: - if instr in ["fptrunc", "fpext"]: + for instr in OP_INFO: + op_kind = OP_INFO[instr]["kind"] + if op_kind == "cast": src_precision = precision src_rank = precision_ranks.get(src_precision) if src_rank is None: @@ -622,39 +517,33 @@ def run_llvm_ir_jit(llvm_ir): p for p in precisions_ordered if p in precisions and precision_ranks[p] > src_rank ] for dst_precision in dst_precisions: - if (src_precision == "half" and dst_precision == "bf16") or ( - src_precision == "bf16" and dst_precision == "half" - ): - continue - code = generate_llvm_code(instr, src_precision, dst_precision, iterations) - if not code.strip(): + if (src_precision, dst_precision) in [("half", "bf16"), ("bf16", "half")]: continue - print(f"DEBUG: Running '{instr}_{src_precision}_to_{dst_precision}'") + code = generate_cast_op_code(instr, src_precision, dst_precision, iterations) + name = f"{instr}_{src_precision}_to_{dst_precision}" + print(f"DEBUG: Running '{name}'") elapsed, _ = run_llvm_ir_jit(code) - adjusted = elapsed - baseline_time - writer.writerow( - { - "instruction": f"{instr}_{src_precision}_to_{dst_precision}", - "precision": src_precision, - "cost": int(adjusted), - } - ) + adjusted = (elapsed - baseline_time) * AMPLIFIER + writer.writerow({"instruction": name, "precision": src_precision, "cost": int(adjusted)}) else: - code = generate_llvm_code_other(instr, precision, iterations) - if not code.strip(): - continue - print(f"DEBUG: Running '{instr}'") + if op_kind == "arithmetic": + code = generate_arithmetic_op_code(instr, precision, iterations) + elif op_kind == "compare": + code = generate_compare_op_code(precision, iterations) + else: + code = "" + if code.strip(): + print(f"DEBUG: Running '{instr}' at precision {precision}") + elapsed, _ = run_llvm_ir_jit(code) + adjusted = (elapsed - baseline_time) * AMPLIFIER + writer.writerow({"instruction": instr, "precision": precision, "cost": int(adjusted)}) + + for func in FUNC_INFO: + code = generate_function_call_code(func, precision, iterations) + if code.strip(): + print(f"DEBUG: Running function '{func}' at precision {precision}") elapsed, _ = run_llvm_ir_jit(code) - adjusted = elapsed - baseline_time - writer.writerow({"instruction": instr, "precision": precision, "cost": int(adjusted)}) - - for func in functions: - code = generate_llvm_function_call(func, precision, iterations) - if not code.strip(): - continue - print(f"DEBUG: Running '{func}'") - elapsed, _ = run_llvm_ir_jit(code) - adjusted = elapsed - baseline_time - writer.writerow({"instruction": func, "precision": precision, "cost": int(adjusted)}) + adjusted = (elapsed - baseline_time) * AMPLIFIER + writer.writerow({"instruction": func, "precision": precision, "cost": int(adjusted)}) print(f"Results in '{csv_file}'. Baseline: {baseline_time:.6f}s")