diff --git a/microbm/cm.csv b/microbm/cm.csv
index 1b058cb..6f359dd 100644
--- a/microbm/cm.csv
+++ b/microbm/cm.csv
@@ -1,42 +1,96 @@
-fneg,float,7
-fadd,float,7
-fsub,float,7
-fmul,float,7
-fdiv,float,7
-fcmp,float,6
-fpext_float_to_double,float,7
-fmuladd,float,7
-sin,float,42
-cos,float,40
-tan,float,75
-exp,float,44
-log,float,29
-sqrt,float,12
-expm1,float,33
-log1p,float,32
-cbrt,float,175
-pow,float,53
-fabs,float,7
-hypot,float,251
-fma,float,30
-fneg,double,7
-fadd,double,7
-fsub,double,7
-fmul,double,7
-fdiv,double,11
-fcmp,double,6
-fptrunc_double_to_float,double,7
-fmuladd,double,7
-sin,double,351
-cos,double,353
-tan,double,406
-exp,double,70
-log,double,33
-sqrt,double,21
-expm1,double,30
-log1p,double,42
-cbrt,double,98
-pow,double,104
-fabs,double,7
-hypot,double,30
-fma,double,29
\ No newline at end of file
+fneg,float,76
+fadd,float,75
+fsub,float,75
+fmul,float,75
+fdiv,float,75
+fcmp,float,62
+fpext_float_to_double,float,75
+fmuladd,float,75
+sin,float,416
+cos,float,415
+tan,float,971
+exp,float,459
+log,float,308
+sqrt,float,122
+expm1,float,502
+log1p,float,484
+cbrt,float,3042
+pow,float,546
+fabs,float,75
+fma,float,75
+maxnum,float,75
+minnum,float,75
+ceil,float,75
+floor,float,75
+exp2,float,468
+log10,float,517
+log2,float,309
+rint,float,299
+round,float,304
+trunc,float,75
+copysign,float,75
+fdim,float,306
+fmod,float,312
+asin,float,351
+acos,float,309
+atan,float,797
+atan2,float,2598
+sinh,float,451
+cosh,float,323
+tanh,float,511
+asinh,float,479
+acosh,float,338
+atanh,float,358
+hypot,float,1831
+erf,float,318
+lgamma,float,1647
+tgamma,float,3278
+remainder,float,2812
+fneg,double,77
+fadd,double,76
+fsub,double,77
+fmul,double,77
+fdiv,double,116
+fcmp,double,63
+fptrunc_double_to_float,double,76
+fmuladd,double,75
+sin,double,3505
+cos,double,3524
+tan,double,4144
+exp,double,711
+log,double,340
+sqrt,double,214
+expm1,double,305
+log1p,double,428
+cbrt,double,991
+pow,double,1182
+fabs,double,75
+fma,double,75
+maxnum,double,75
+minnum,double,75
+ceil,double,75
+floor,double,75
+exp2,double,397
+log10,double,586
+log2,double,315
+rint,double,295
+round,double,297
+trunc,double,74
+copysign,double,74
+fdim,double,299
+fmod,double,303
+asin,double,550
+acos,double,540
+atan,double,767
+atan2,double,1129
+sinh,double,626
+cosh,double,535
+tanh,double,309
+asinh,double,479
+acosh,double,358
+atanh,double,326
+hypot,double,496
+erf,double,310
+lgamma,double,1867
+tgamma,double,1859
+remainder,double,1006
diff --git a/microbm/microbm.py b/microbm/microbm.py
index 6ece918..7adf679 100644
--- a/microbm/microbm.py
+++ b/microbm/microbm.py
@@ -1,9 +1,8 @@
 import time
 import csv
-import os
 import struct
-import numpy as np
 import random
+import numpy as np
 import llvmlite.binding as llvm
 import ctypes
 
@@ -15,77 +14,9 @@
 
 FAST_MATH_FLAG = "reassoc nsz arcp contract afn"
 
-instructions = ["fneg", "fadd", "fsub", "fmul", "fdiv", "fcmp", "fptrunc", "fpext"]
-functions = [
-    "fmuladd",
-    "sin",
-    "cos",
-    "tan",
-    "exp",
-    "log",
-    "sqrt",
-    "expm1",
-    "log1p",
-    "cbrt",
-    "pow",
-    "fabs",
-    "fma",
-    "maxnum",
-    "minnum",
-    "ceil",
-    "floor",
-    "exp2",
-    "log10",
-    "log2",
-    "rint",
-    "round",
-    "trunc",
-    "copysign",
-    "fdim",
-    "fmod",
-    "asin",
-    "acos",
-    "atan",
-    "atan2",
-    "sinh",
-    "cosh",
-    "tanh",
-    "asinh",
-    "acosh",
-    "atanh",
-    "hypot",
-    "erf",
-    "lgamma",
-    "tgamma",
-    "remainder",
-]
-functions_with_intrinsics = {
-    "sin",
-    "cos",
-    "exp",
-    "log",
-    "sqrt",
-    "pow",
-    "fabs",
-    "fma",
-    "maxnum",
-    "minnum",
-    "ceil",
-    "floor",
-    "exp2",
-    "log10",
-    "log2",
-    "rint",
-    "round",
-    "trunc",
-    "copysign",
-    "fdim",
-    "fmod",
-}
-
-precisions = ["float", "double"]
-iterations = 1
-unrolled = 1
+unrolled = 128
+iterations = 100000000
+AMPLIFIER = 10
 
 precision_to_llvm_type = {
     "double": "double",
@@ -107,14 +38,11 @@
 
 precision_ranks = {"bf16": 0, "half": 1, "float": 2, "double": 3, "fp80": 4, "fp128": 5}
 precisions_ordered = ["bf16", "half", "float", "double", "fp80", "fp128"]
+precisions = ["float", "double"]
 
 
 def get_zero_literal(precision):
-    if precision == "double":
-        return "0.0"
-    elif precision == "float":
-        return "0.0"
-    elif precision == "half":
+    if precision in ("double", "float", "half"):
         return "0.0"
     elif precision == "bf16":
         return "0xR0000"
@@ -122,8 +50,7 @@ def get_zero_literal(precision):
         return "0xK00000000000000000000"
     elif precision == "fp128":
         return "0xL00000000000000000000000000000000"
-    else:
-        return "0.0"
+    return "0.0"
 
 
 def float64_to_fp80_bytes(value: np.float64) -> bytes:
@@ -242,12 +169,71 @@ def generate_random_fp(precision):
     return dtype(f).item()
 
 
-def generate_baseline_code(iterations):
-    return f"""
+OP_INFO = {
+    "fneg": {"llvm_instr": "fneg", "num_operands": 1, "kind": "arithmetic"},
+    "fadd": {"llvm_instr": "fadd", "num_operands": 2, "kind": "arithmetic"},
+    "fsub": {"llvm_instr": "fsub", "num_operands": 2, "kind": "arithmetic"},
+    "fmul": {"llvm_instr": "fmul", "num_operands": 2, "kind": "arithmetic"},
+    "fdiv": {"llvm_instr": "fdiv", "num_operands": 2, "kind": "arithmetic"},
+    "fcmp": {"llvm_instr": "fcmp", "num_operands": 2, "kind": "compare"},
+    "fptrunc": {"llvm_instr": "fptrunc", "num_operands": 1, "kind": "cast"},
+    "fpext": {"llvm_instr": "fpext", "num_operands": 1, "kind": "cast"},
+}
+
+FUNC_INFO = {
+    "fmuladd": {"intrinsic": "llvm.fmuladd", "num_operands": 3},
+    "sin": {"intrinsic": "llvm.sin", "num_operands": 1},
+    "cos": {"intrinsic": "llvm.cos", "num_operands": 1},
+    "tan": {"intrinsic": None, "num_operands": 1},
+    "exp": {"intrinsic": "llvm.exp", "num_operands": 1},
+    "log": {"intrinsic": "llvm.log", "num_operands": 1},
+    "sqrt": {"intrinsic": "llvm.sqrt", "num_operands": 1},
+    "expm1": {"intrinsic": None, "num_operands": 1},
+    "log1p": {"intrinsic": None, "num_operands": 1},
+    "cbrt": {"intrinsic": None, "num_operands": 1},
+    "pow": {"intrinsic": "llvm.pow", "num_operands": 2},
+    "fabs": {"intrinsic": "llvm.fabs", "num_operands": 1},
+    "fma": {"intrinsic": "llvm.fma", "num_operands": 3},
+    "maxnum": {"intrinsic": "llvm.maxnum", "num_operands": 2},
+    "minnum": {"intrinsic": "llvm.minnum", "num_operands": 2},
+    "ceil": {"intrinsic": "llvm.ceil", "num_operands": 1},
+    "floor": {"intrinsic": "llvm.floor", "num_operands": 1},
+    "exp2": {"intrinsic": "llvm.exp2", "num_operands": 1},
+    "log10": {"intrinsic": "llvm.log10", "num_operands": 1},
+    "log2": {"intrinsic": "llvm.log2", "num_operands": 1},
+    "rint": {"intrinsic": "llvm.rint", "num_operands": 1},
+    "round": {"intrinsic": "llvm.round", "num_operands": 1},
+    "trunc": {"intrinsic": "llvm.trunc", "num_operands": 1},
+    "copysign": {"intrinsic": "llvm.copysign", "num_operands": 2},
+    "fdim": {"intrinsic": None, "num_operands": 2},
+    "fmod": {"intrinsic": None, "num_operands": 2},
+    "asin": {"intrinsic": None, "num_operands": 1},
+    "acos": {"intrinsic": None, "num_operands": 1},
+    "atan": {"intrinsic": None, "num_operands": 1},
+    "atan2": {"intrinsic": None, "num_operands": 2},
+    "sinh": {"intrinsic": None, "num_operands": 1},
+    "cosh": {"intrinsic": None, "num_operands": 1},
+    "tanh": {"intrinsic": None, "num_operands": 1},
+    "asinh": {"intrinsic": None, "num_operands": 1},
+    "acosh": {"intrinsic": None, "num_operands": 1},
+    "atanh": {"intrinsic": None, "num_operands": 1},
+    "hypot": {"intrinsic": None, "num_operands": 2},
+    "erf": {"intrinsic": None, "num_operands": 1},
+    "lgamma": {"intrinsic": None, "num_operands": 1},
+    "tgamma": {"intrinsic": None, "num_operands": 1},
+    "remainder": {"intrinsic": None, "num_operands": 2},
+}
+
+
+def generate_loop_code(llvm_type, iterations, body_instructions, final_acc_reg):
+    zero_literal = get_zero_literal(llvm_type)
+    code = f"""
 define i32 @main() optnone noinline {{
 entry:
   %i = alloca i32
+  %acc = alloca {llvm_type}
   store i32 0, i32* %i
+  store {llvm_type} {zero_literal}, {llvm_type}* %acc
   br label %loop
 
 loop:
@@ -256,32 +242,69 @@ def generate_baseline_code(iterations):
   br i1 %cond, label %body, label %exit
 
 body:
+  %acc_val0 = load {llvm_type}, {llvm_type}* %acc
+{body_instructions}
+  store {llvm_type} {final_acc_reg}, {llvm_type}* %acc
   %i_next = add i32 %i_val, 1
   store i32 %i_next, i32* %i
   br label %loop
 
 exit:
+  %final_acc = load {llvm_type}, {llvm_type}* %acc
+  call void @use({llvm_type} %final_acc)
   ret i32 0
 }}
+
+define void @use({llvm_type} %val) {{
+  ret void
+}}
 """
+    return code
 
 
-def generate_llvm_code(instruction, src_precision, dst_precision, iterations):
-    src_type = precision_to_llvm_type.get(src_precision)
-    dst_type = precision_to_llvm_type.get(dst_precision)
-    if not src_type or not dst_type:
-        return ""
-    zero_literal_dst = get_zero_literal(dst_precision)
-    if instruction == "fptrunc":
-        random_fps = [generate_random_fp(src_precision) for _ in range(unrolled)]
-        hex_fps = [float_to_llvm_hex(f, src_precision) for f in random_fps]
-        code = f"""
+def generate_arithmetic_op_code(op_key, precision, iterations):
+    """Generate LLVM IR for a basic arithmetic operator (or fneg) based on OP_INFO."""
+    op_info = OP_INFO[op_key]
+    llvm_type = precision_to_llvm_type[precision]
+    body_lines = ""
+    for idx in range(unrolled):
+        operands = []
+        for _ in range(op_info["num_operands"]):
+            f_val = generate_random_fp(precision)
+            operands.append(float_to_llvm_hex(f_val, precision))
+        if op_info["num_operands"] == 1:
+            line = f"  %result{idx} = {op_info['llvm_instr']} {FAST_MATH_FLAG} {llvm_type} {operands[0]}"
+        elif op_info["num_operands"] == 2:
+            line = f"  %result{idx} = {op_info['llvm_instr']} {FAST_MATH_FLAG} {llvm_type} {operands[0]}, {operands[1]}"
+        body_lines += line + "\n"
+        body_lines += f"  %acc_val{idx+1} = fadd {FAST_MATH_FLAG} {llvm_type} %acc_val{idx}, %result{idx}\n"
+    final_acc = f"%acc_val{unrolled}"
+    return generate_loop_code(llvm_type, iterations, body_lines, final_acc)
+
+
+def generate_compare_op_code(precision, iterations):
+    """Generate LLVM IR for an fcmp (comparison) operation."""
+    llvm_type = precision_to_llvm_type[precision]
+    body_lines = ""
+    for idx in range(unrolled):
+        f_a = generate_random_fp(precision)
+        f_b = generate_random_fp(precision)
+        a_hex = float_to_llvm_hex(f_a, precision)
+        b_hex = float_to_llvm_hex(f_b, precision)
+        line = f"  %cmp{idx} = fcmp {FAST_MATH_FLAG} olt {llvm_type} {a_hex}, {b_hex}"
+        body_lines += line + "\n"
+        body_lines += f"  %cmp_int{idx} = zext i1 %cmp{idx} to i32\n"
+    body_lines += "  %acc_val0 = load i32, i32* %acc\n"
+    for idx in range(unrolled):
+        body_lines += f"  %acc_val{idx+1} = add i32 %acc_val{idx}, %cmp_int{idx}\n"
+    final_acc = f"%acc_val{unrolled}"
+    code = f"""
 define i32 @main() optnone noinline {{
 entry:
   %i = alloca i32
-  %acc = alloca {dst_type}
+  %acc = alloca i32
   store i32 0, i32* %i
-  store {dst_type} {zero_literal_dst}, {dst_type}* %acc
+  store i32 0, i32* %acc
   br label %loop
 
 loop:
@@ -290,38 +313,46 @@ def generate_llvm_code(instruction, src_precision, dst_precision, iterations):
   br i1 %cond, label %body, label %exit
 
 body:
-  %acc_val0 = load {dst_type}, {dst_type}* %acc
-"""
-        for idx, hex_a in enumerate(hex_fps):
-            code += f"  %result{idx} = fptrunc {src_type} {hex_a} to {dst_type}\n"
-            code += f"  %acc_val{idx+1} = fadd {FAST_MATH_FLAG} {dst_type} %acc_val{idx}, %result{idx}\n"
-        code += f"""
-  store {dst_type} %acc_val{len(hex_fps)}, {dst_type}* %acc
+{body_lines}
+  store i32 {final_acc}, i32* %acc
   %i_next = add i32 %i_val, 1
   store i32 %i_next, i32* %i
   br label %loop
 
 exit:
-  %final_acc = load {dst_type}, {dst_type}* %acc
-  call void @use({dst_type} %final_acc)
+  %final_acc = load i32, i32* %acc
+  call void @use_i32(i32 %final_acc)
   ret i32 0
 }}
 
-define void @use({dst_type} %val) {{
+define void @use_i32(i32 %val) {{
   ret void
 }}
 """
-        return code
-    elif instruction == "fpext":
-        random_fps = [generate_random_fp(src_precision) for _ in range(unrolled)]
-        hex_fps = [float_to_llvm_hex(f, src_precision) for f in random_fps]
-        code = f"""
+    return code
+
+
+def generate_cast_op_code(op_key, src_precision, dst_precision, iterations):
+    """Generate LLVM IR for a cast operation (fptrunc or fpext)."""
+    op_info = OP_INFO[op_key]
+    src_type = precision_to_llvm_type[src_precision]
+    dst_type = precision_to_llvm_type[dst_precision]
+    zero_literal = get_zero_literal(dst_precision)
+    body_lines = ""
+    for idx in range(unrolled):
+        f_val = generate_random_fp(src_precision)
+        hex_val = float_to_llvm_hex(f_val, src_precision)
+        line = f"  %result{idx} = {op_info['llvm_instr']} {src_type} {hex_val} to {dst_type}"
+        body_lines += line + "\n"
+        body_lines += f"  %acc_val{idx+1} = fadd {FAST_MATH_FLAG} {dst_type} %acc_val{idx}, %result{idx}\n"
+    final_acc = f"%acc_val{unrolled}"
+    code = f"""
 define i32 @main() optnone noinline {{
 entry:
   %i = alloca i32
   %acc = alloca {dst_type}
   store i32 0, i32* %i
-  store {dst_type} {zero_literal_dst}, {dst_type}* %acc
+  store {dst_type} {zero_literal}, {dst_type}* %acc
   br label %loop
 
 loop:
@@ -331,12 +362,8 @@ def generate_llvm_code(instruction, src_precision, dst_precision, iterations):
 
 body:
   %acc_val0 = load {dst_type}, {dst_type}* %acc
-"""
-        for idx, hex_a in enumerate(hex_fps):
-            code += f"  %result{idx} = fpext {src_type} {hex_a} to {dst_type}\n"
-            code += f"  %acc_val{idx+1} = fadd {FAST_MATH_FLAG} {dst_type} %acc_val{idx}, %result{idx}\n"
-        code += f"""
-  store {dst_type} %acc_val{len(hex_fps)}, {dst_type}* %acc
+{body_lines}
+  store {dst_type} {final_acc}, {dst_type}* %acc
   %i_next = add i32 %i_val, 1
   store i32 %i_next, i32* %i
   br label %loop
@@ -351,65 +378,44 @@ def generate_llvm_code(instruction, src_precision, dst_precision, iterations):
   ret void
 }}
 """
-        return code
-    return ""
+    return code
 
 
-def generate_llvm_code_other(instruction, precision, iterations):
+def generate_function_call_code(func_name, precision, iterations):
+    """Generate LLVM IR for a function call based on FUNC_INFO."""
+    func_info = FUNC_INFO[func_name]
     llvm_type = precision_to_llvm_type[precision]
-    zero_literal = get_zero_literal(precision)
-    if instruction in ["fadd", "fsub", "fmul", "fdiv"]:
-        op_map = {"fadd": "fadd", "fsub": "fsub", "fmul": "fmul", "fdiv": "fdiv"}
-        op = op_map[instruction]
-        random_pairs = [(generate_random_fp(precision), generate_random_fp(precision)) for _ in range(unrolled)]
-        hex_pairs = [(float_to_llvm_hex(a, precision), float_to_llvm_hex(b, precision)) for a, b in random_pairs]
-        code = f"""
-define i32 @main() optnone noinline {{
-entry:
-  %i = alloca i32
-  %acc = alloca {llvm_type}
-  store i32 0, i32* %i
-  store {llvm_type} {zero_literal}, {llvm_type}* %acc
-  br label %loop
-
-loop:
-  %i_val = load i32, i32* %i
-  %cond = icmp slt i32 %i_val, {iterations}
-  br i1 %cond, label %body, label %exit
-
-body:
-  %acc_val0 = load {llvm_type}, {llvm_type}* %acc
-"""
-        for idx, (hex_a, hex_b) in enumerate(hex_pairs):
-            code += f"  %result{idx} = {op} {FAST_MATH_FLAG} {llvm_type} {hex_a}, {hex_b}\n"
-            code += f"  %acc_val{idx+1} = fadd {FAST_MATH_FLAG} {llvm_type} %acc_val{idx}, %result{idx}\n"
-        code += f"""
-  store {llvm_type} %acc_val{len(hex_pairs)}, {llvm_type}* %acc
-  %i_next = add i32 %i_val, 1
-  store i32 %i_next, i32* %i
-  br label %loop
-
-exit:
-  %final_acc = load {llvm_type}, {llvm_type}* %acc
-  call void @use({llvm_type} %final_acc)
-  ret i32 0
-}}
-
-define void @use({llvm_type} %val) {{
-  ret void
-}}
-"""
-        return code
-    elif instruction == "fneg":
-        random_fps = [generate_random_fp(precision) for _ in range(unrolled)]
-        hex_fps = [float_to_llvm_hex(f, precision) for f in random_fps]
-        code = f"""
+    intrinsic_suffix = precision_to_intrinsic_suffix.get(precision, "")
+    if func_info["intrinsic"]:
+        fn = f"{func_info['intrinsic']}.{intrinsic_suffix}"
+    else:
+        fn = func_name
+    num_operands = func_info["num_operands"]
+    body_lines = ""
+    for idx in range(unrolled):
+        operands = []
+        for _ in range(num_operands):
+            f_val = generate_random_fp(precision)
+            operands.append(float_to_llvm_hex(f_val, precision))
+        if num_operands == 1:
+            call_str = f"call {FAST_MATH_FLAG} {llvm_type} @{fn}({llvm_type} {operands[0]})"
+        elif num_operands == 2:
+            call_str = f"call {FAST_MATH_FLAG} {llvm_type} @{fn}({llvm_type} {operands[0]}, {llvm_type} {operands[1]})"
+        elif num_operands == 3:
+            call_str = f"call {FAST_MATH_FLAG} {llvm_type} @{fn}({llvm_type} {operands[0]}, {llvm_type} {operands[1]}, {llvm_type} {operands[2]})"
+        else:
+            call_str = ""
+        body_lines += f"  %result{idx} = {call_str}\n"
+        body_lines += f"  %acc_val{idx+1} = fadd {FAST_MATH_FLAG} {llvm_type} %acc_val{idx}, %result{idx}\n"
+    decl = f"declare {llvm_type} @{fn}({', '.join([llvm_type]*num_operands)})"
+    code = f"""
+{decl}
 define i32 @main() optnone noinline {{
 entry:
   %i = alloca i32
   %acc = alloca {llvm_type}
   store i32 0, i32* %i
-  store {llvm_type} {zero_literal}, {llvm_type}* %acc
+  store {llvm_type} {get_zero_literal(precision)}, {llvm_type}* %acc
   br label %loop
 
 loop:
@@ -419,12 +425,8 @@ def generate_llvm_code_other(instruction, precision, iterations):
 
 body:
   %acc_val0 = load {llvm_type}, {llvm_type}* %acc
-"""
-        for idx, hex_a in enumerate(hex_fps):
-            code += f"  %result{idx} = fneg {FAST_MATH_FLAG} {llvm_type} {hex_a}\n"
-            code += f"  %acc_val{idx+1} = fadd {FAST_MATH_FLAG} {llvm_type} %acc_val{idx}, %result{idx}\n"
-        code += f"""
-  store {llvm_type} %acc_val{len(hex_fps)}, {llvm_type}* %acc
+{body_lines}
+  store {llvm_type} %acc_val{unrolled}, {llvm_type}* %acc
   %i_next = add i32 %i_val, 1
   store i32 %i_next, i32* %i
   br label %loop
@@ -439,88 +441,15 @@ def generate_llvm_code_other(instruction, precision, iterations):
   ret void
 }}
 """
-        return code
-    elif instruction == "fcmp":
-        code = f"""
-define i32 @main() optnone noinline {{
-entry:
-  %i = alloca i32
-  %acc = alloca i32
-  store i32 0, i32* %i
-  store i32 0, i32* %acc
-  br label %loop
-
-loop:
-  %i_val = load i32, i32* %i
-  %cond = icmp slt i32 %i_val, {iterations}
-  br i1 %cond, label %body, label %exit
-
-body:
-"""
-        for idx in range(unrolled):
-            a = generate_random_fp(precision)
-            b = generate_random_fp(precision)
-            hex_a = float_to_llvm_hex(a, precision)
-            hex_b = float_to_llvm_hex(b, precision)
-            code += f"  %cmp{idx} = fcmp {FAST_MATH_FLAG} olt {llvm_type} {hex_a}, {hex_b}\n"
-            code += f"  %cmp_int{idx} = zext i1 %cmp{idx} to i32\n"
-        code += "  %acc_val0 = load i32, i32* %acc\n"
-        for idx in range(unrolled):
-            code += f"  %acc_val{idx+1} = add i32 %acc_val{idx}, %cmp_int{idx}\n"
-        code += f"""
-  store i32 %acc_val{unrolled}, i32* %acc
-  %i_next = add i32 %i_val, 1
-  store i32 %i_next, i32* %i
-  br label %loop
-
-exit:
-  %final_acc = load i32, i32* %acc
-  call void @use_i32(i32 %final_acc)
-  ret i32 0
-}}
-
-define void @use_i32(i32 %val) {{
-  ret void
-}}
-"""
-        return code
-    return ""
+    return code
 
 
-def generate_llvm_function_call(function_name, precision, iterations):
-    print(f"DEBUG: Generating LLVM code for {function_name} @ {precision}")
-    llvm_type = precision_to_llvm_type[precision]
-    intrinsic_suffix = precision_to_intrinsic_suffix.get(precision)
-    if not intrinsic_suffix:
-        return ""
-    zero_literal = get_zero_literal(precision)
-    if function_name in ["pow", "maxnum", "minnum", "atan2", "copysign", "fdim", "fmod"]:
-        fn = f"llvm.pow.{intrinsic_suffix}"
-        decl = f"declare {llvm_type} @{fn}({llvm_type}, {llvm_type})"
-        tmpl = f"call {FAST_MATH_FLAG} {llvm_type} @{fn}({llvm_type} {{arg1}}, {llvm_type} {{arg2}})"
-    elif function_name in ["fma", "fmuladd"]:
-        fn = f"llvm.{function_name}.{intrinsic_suffix}"
-        decl = f"declare {llvm_type} @{fn}({llvm_type}, {llvm_type}, {llvm_type})"
-        tmpl = (
-            f"call {FAST_MATH_FLAG} {llvm_type} @{fn}({llvm_type} {{arg1}}, {llvm_type} {{arg2}}, {llvm_type} {{arg3}})"
-        )
-    elif function_name in functions_with_intrinsics:
-        fn = f"llvm.{function_name}.{intrinsic_suffix}"
-        decl = f"declare {llvm_type} @{fn}({llvm_type})"
-        tmpl = f"call {FAST_MATH_FLAG} {llvm_type} @{fn}({llvm_type} {{arg1}})"
-    else:
-        fn = function_name
-        decl = f"declare {llvm_type} @{fn}({llvm_type})"
-        tmpl = f"call {FAST_MATH_FLAG} {llvm_type} @{fn}({llvm_type} {{arg1}})"
-    code = (
-        decl
-        + f"""
+def generate_baseline_code(iterations):
+    return f"""
 define i32 @main() optnone noinline {{
 entry:
   %i = alloca i32
-  %acc = alloca {llvm_type}
   store i32 0, i32* %i
-  store {llvm_type} {zero_literal}, {llvm_type}* %acc
   br label %loop
 
 loop:
@@ -529,47 +458,14 @@ def generate_llvm_function_call(function_name, precision, iterations):
   br i1 %cond, label %body, label %exit
 
 body:
-  %acc_val0 = load {llvm_type}, {llvm_type}* %acc
-"""
-    )
-    for idx in range(unrolled):
-        if function_name in ["pow", "hypot", "atan2", "maxnum", "minnum", "copysign", "fdim", "fmod"]:
-            a = generate_random_fp(precision)
-            b = generate_random_fp(precision)
-            hex_a = float_to_llvm_hex(a, precision)
-            hex_b = float_to_llvm_hex(b, precision)
-            call_ = tmpl.format(arg1=hex_a, arg2=hex_b)
-        elif function_name in ["fma", "fmuladd"]:
-            a = generate_random_fp(precision)
-            b = generate_random_fp(precision)
-            c = generate_random_fp(precision)
-            hex_a = float_to_llvm_hex(a, precision)
-            hex_b = float_to_llvm_hex(b, precision)
-            hex_c = float_to_llvm_hex(c, precision)
-            call_ = tmpl.format(arg1=hex_a, arg2=hex_b, arg3=hex_c)
-        else:
-            a = generate_random_fp(precision)
-            hex_a = float_to_llvm_hex(a, precision)
-            call_ = tmpl.format(arg1=hex_a)
-        code += f"  %result{idx} = {call_}\n"
-        code += f"  %acc_val{idx+1} = fadd {FAST_MATH_FLAG} {llvm_type} %acc_val{idx}, %result{idx}\n"
-    code += f"""
-  store {llvm_type} %acc_val{unrolled}, {llvm_type}* %acc
   %i_next = add i32 %i_val, 1
   store i32 %i_next, i32* %i
   br label %loop
 
 exit:
-  %final_acc = load {llvm_type}, {llvm_type}* %acc
-  call void @use({llvm_type} %final_acc)
   ret i32 0
 }}
-
-define void @use({llvm_type} %val) {{
-  ret void
-}}
 """
-    return code
 
 
 def create_execution_engine():
@@ -596,19 +492,18 @@ def run_llvm_ir_jit(llvm_ir):
 
 
 csv_file = "results.csv"
-
 with open(csv_file, "w", newline="") as csvfile:
     fieldnames = ["instruction", "precision", "cost"]
     writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
-    writer.writeheader()
 
     llvm_code = generate_baseline_code(iterations)
     print("DEBUG: Running baseline")
     baseline_time, _ = run_llvm_ir_jit(llvm_code)
 
     for precision in precisions:
-        for instr in instructions:
-            if instr in ["fptrunc", "fpext"]:
+        for instr in OP_INFO:
+            op_kind = OP_INFO[instr]["kind"]
+            if op_kind == "cast":
                 src_precision = precision
                 src_rank = precision_ranks.get(src_precision)
                 if src_rank is None:
@@ -622,39 +517,33 @@ def run_llvm_ir_jit(llvm_ir):
                         p for p in precisions_ordered if p in precisions and precision_ranks[p] > src_rank
                     ]
                 for dst_precision in dst_precisions:
-                    if (src_precision == "half" and dst_precision == "bf16") or (
-                        src_precision == "bf16" and dst_precision == "half"
-                    ):
-                        continue
-                    code = generate_llvm_code(instr, src_precision, dst_precision, iterations)
-                    if not code.strip():
+                    if (src_precision, dst_precision) in [("half", "bf16"), ("bf16", "half")]:
                         continue
-                    print(f"DEBUG: Running '{instr}_{src_precision}_to_{dst_precision}'")
+                    code = generate_cast_op_code(instr, src_precision, dst_precision, iterations)
+                    name = f"{instr}_{src_precision}_to_{dst_precision}"
+                    print(f"DEBUG: Running '{name}'")
                     elapsed, _ = run_llvm_ir_jit(code)
-                    adjusted = elapsed - baseline_time
-                    writer.writerow(
-                        {
-                            "instruction": f"{instr}_{src_precision}_to_{dst_precision}",
-                            "precision": src_precision,
-                            "cost": int(adjusted),
-                        }
-                    )
+                    adjusted = (elapsed - baseline_time) * AMPLIFIER
+                    writer.writerow({"instruction": name, "precision": src_precision, "cost": int(adjusted)})
             else:
-                code = generate_llvm_code_other(instr, precision, iterations)
-                if not code.strip():
-                    continue
-                print(f"DEBUG: Running '{instr}'")
+                if op_kind == "arithmetic":
+                    code = generate_arithmetic_op_code(instr, precision, iterations)
+                elif op_kind == "compare":
+                    code = generate_compare_op_code(precision, iterations)
+                else:
+                    code = ""
+                if code.strip():
+                    print(f"DEBUG: Running '{instr}' at precision {precision}")
+                    elapsed, _ = run_llvm_ir_jit(code)
+                    adjusted = (elapsed - baseline_time) * AMPLIFIER
+                    writer.writerow({"instruction": instr, "precision": precision, "cost": int(adjusted)})
+
+        for func in FUNC_INFO:
+            code = generate_function_call_code(func, precision, iterations)
+            if code.strip():
+                print(f"DEBUG: Running function '{func}' at precision {precision}")
                 elapsed, _ = run_llvm_ir_jit(code)
-                adjusted = elapsed - baseline_time
-                writer.writerow({"instruction": instr, "precision": precision, "cost": int(adjusted)})
-
-        for func in functions:
-            code = generate_llvm_function_call(func, precision, iterations)
-            if not code.strip():
-                continue
-            print(f"DEBUG: Running '{func}'")
-            elapsed, _ = run_llvm_ir_jit(code)
-            adjusted = elapsed - baseline_time
-            writer.writerow({"instruction": func, "precision": precision, "cost": int(adjusted)})
+                adjusted = (elapsed - baseline_time) * AMPLIFIER
+                writer.writerow({"instruction": func, "precision": precision, "cost": int(adjusted)})
 
 print(f"Results in '{csv_file}'. Baseline: {baseline_time:.6f}s")