From b70afae882ba068eb5ec76bc1990ded1399e2c06 Mon Sep 17 00:00:00 2001
From: sbrantq <scharfrichterq@gmail.com>
Date: Thu, 13 Feb 2025 05:20:13 -0600
Subject: [PATCH] fix up geomean & misc

---
 experiments/ablation.py   | 934 --------------------------------------
 experiments/example.c     |  22 -
 experiments/fp-logger.cpp | 105 +++--
 experiments/run.py        | 140 ++++--
 4 files changed, 176 insertions(+), 1025 deletions(-)
 delete mode 100644 experiments/ablation.py
 delete mode 100644 experiments/example.c

diff --git a/experiments/ablation.py b/experiments/ablation.py
deleted file mode 100644
index 78608ba..0000000
--- a/experiments/ablation.py
+++ /dev/null
@@ -1,934 +0,0 @@
-#!/usr/bin/env python3
-
-import os
-import subprocess
-import sys
-import shutil
-import re
-import argparse
-import matplotlib.pyplot as plt
-import math
-import random
-import numpy as np
-import json
-from statistics import mean
-import pickle
-from tqdm import trange
-from matplotlib import rcParams
-from concurrent.futures import ProcessPoolExecutor, as_completed
-
-HOME = "/home/sbrantq"
-ENZYME_PATH = os.path.join(HOME, "sync/Enzyme/build/Enzyme/ClangEnzyme-15.so")
-LLVM_PATH = os.path.join(HOME, "llvms/llvm15/build/bin")
-CXX = os.path.join(LLVM_PATH, "clang++")
-
-CXXFLAGS = [
-    "-O3",
-    "-I" + os.path.join(HOME, "include"),
-    "-L" + os.path.join(HOME, "lib"),
-    "-I/usr/include/c++/11",
-    "-I/usr/include/x86_64-linux-gnu/c++/11",
-    "-L/usr/lib/gcc/x86_64-linux-gnu/11",
-    "-fno-exceptions",
-    f"-fpass-plugin={ENZYME_PATH}",
-    "-Xclang",
-    "-load",
-    "-Xclang",
-    ENZYME_PATH,
-    "-lmpfr",
-    "-ffast-math",
-    "-fno-finite-math-only",
-    "-fuse-ld=lld",
-]
-
-FPOPTFLAGS_BASE_TEMPLATE = [
-    "-mllvm",
-    "--enzyme-enable-fpopt",
-    "-mllvm",
-    "--enzyme-print-herbie",
-    "-mllvm",
-    "--enzyme-print-fpopt",
-    "-mllvm",
-    "--fpopt-log-path=example.txt",
-    "-mllvm",
-    "--fpopt-target-func-regex=example",
-    "-mllvm",
-    "--fpopt-enable-solver",
-    "-mllvm",
-    "--fpopt-enable-pt",
-    "-mllvm",
-    "--fpopt-cost-dom-thres=0",
-    "-mllvm",
-    "--fpopt-acc-dom-thres=0",
-    "-mllvm",
-    "--fpopt-comp-cost-budget=0",
-    "-mllvm",
-    "--herbie-num-threads=8",
-    "-mllvm",
-    "--herbie-timeout=1000",
-    "-mllvm",
-    "--fpopt-num-samples=1024",
-    "-mllvm",
-    "--fpopt-cost-model-path=/home/sbrantq/sync/FPBench/microbm/cm.csv",
-    "-mllvm",
-    "-fpopt-cache-path=cache",
-]
-
-SRC = "example.c"
-LOGGER = "fp-logger.cpp"
-NUM_RUNS = 10
-DRIVER_NUM_SAMPLES = 10000000
-LOG_NUM_SAMPLES = 10000
-MAX_TESTED_COSTS = 999
-
-
-def run_command(command, description, capture_output=False, output_file=None, verbose=True, timeout=None):
-    print(f"=== {description} ===")
-    print("Running:", " ".join(command))
-    try:
-        if capture_output and output_file:
-            with open(output_file, "w") as f:
-                subprocess.check_call(command, stdout=f, stderr=subprocess.STDOUT, timeout=timeout)
-        elif capture_output:
-            result = subprocess.run(command, capture_output=True, text=True, check=True, timeout=timeout)
-            return result.stdout
-        else:
-            if verbose:
-                subprocess.check_call(command, timeout=timeout)
-            else:
-                subprocess.check_call(command, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, timeout=timeout)
-    except subprocess.TimeoutExpired:
-        print(f"Command '{' '.join(command)}' timed out after {timeout} seconds.")
-        return
-    except subprocess.CalledProcessError as e:
-        print(f"Error during: {description}")
-        if capture_output and output_file:
-            print(f"Check the output file: {output_file} for details.")
-        else:
-            print(e)
-        sys.exit(e.returncode)
-
-
-def clean(tmp_dir, logs_dir, plots_dir):
-    print("=== Cleaning up generated files ===")
-    directories = [tmp_dir, logs_dir, plots_dir]
-    for directory in directories:
-        if os.path.exists(directory):
-            shutil.rmtree(directory)
-            print(f"Removed directory: {directory}")
-
-
-def generate_example_cpp(tmp_dir, original_prefix, prefix):
-    script = "fpopt-original-driver-generator.py"
-    print(f"=== Running {script} ===")
-    src_prefixed = os.path.join(tmp_dir, f"{original_prefix}{SRC}")
-    dest_prefixed = os.path.join(tmp_dir, f"{prefix}example.cpp")
-    run_command(
-        ["python3", script, src_prefixed, dest_prefixed, "example", str(DRIVER_NUM_SAMPLES)],
-        f"Generating example.cpp from {SRC}",
-    )
-    if not os.path.exists(dest_prefixed):
-        print(f"Failed to generate {dest_prefixed}.")
-        sys.exit(1)
-    print(f"Generated {dest_prefixed} successfully.")
-
-
-def generate_example_logged_cpp(tmp_dir, original_prefix, prefix):
-    script = "fpopt-logged-driver-generator.py"
-    print(f"=== Running {script} ===")
-    src_prefixed = os.path.join(tmp_dir, f"{original_prefix}{SRC}")
-    dest_prefixed = os.path.join(tmp_dir, f"{prefix}example-logged.cpp")
-    run_command(
-        ["python3", script, src_prefixed, dest_prefixed, "example", str(LOG_NUM_SAMPLES)],
-        f"Generating example-logged.cpp from {SRC}",
-    )
-    if not os.path.exists(dest_prefixed):
-        print(f"Failed to generate {dest_prefixed}.")
-        sys.exit(1)
-    print(f"Generated {dest_prefixed} successfully.")
-
-
-def compile_example_exe(tmp_dir, prefix):
-    source = os.path.join(tmp_dir, f"{prefix}example.cpp")
-    output = os.path.join(tmp_dir, f"{prefix}example.exe")
-    cmd = [CXX, source] + CXXFLAGS + ["-o", output]
-    run_command(cmd, f"Compiling {output}")
-
-
-def compile_example_logged_exe(tmp_dir, prefix):
-    source = os.path.join(tmp_dir, f"{prefix}example-logged.cpp")
-    output = os.path.join(tmp_dir, f"{prefix}example-logged.exe")
-    cmd = [CXX, source, LOGGER] + CXXFLAGS + ["-o", output]
-    run_command(cmd, f"Compiling {output}")
-
-
-def generate_example_txt(tmp_dir, prefix):
-    exe = os.path.join(tmp_dir, f"{prefix}example-logged.exe")
-    output = os.path.join(tmp_dir, f"{prefix}example.txt")
-    if not os.path.exists(exe):
-        print(f"Executable {exe} not found. Cannot generate {output}.")
-        sys.exit(1)
-    with open(output, "w") as f:
-        print(f"=== Running {exe} to generate {output} ===")
-        try:
-            subprocess.check_call([exe], stdout=f)
-        except subprocess.TimeoutExpired:
-            print(f"Execution of {exe} timed out.")
-            if os.path.exists(exe):
-                os.remove(exe)
-                print(f"Removed executable {exe} due to timeout.")
-            return
-        except subprocess.CalledProcessError as e:
-            print(f"Error running {exe}")
-            sys.exit(e.returncode)
-
-
-def compile_example_fpopt_exe(tmp_dir, prefix, fpoptflags, output="example-fpopt.exe", verbose=True):
-    source = os.path.join(tmp_dir, f"{prefix}example.cpp")
-    output_path = os.path.join(tmp_dir, f"{prefix}{output}")
-    cmd = [CXX, source] + CXXFLAGS + fpoptflags + ["-o", output_path]
-    log_path = os.path.join("logs", f"{prefix}compile_fpopt.log")
-    if output == "example-fpopt.exe":
-        run_command(
-            cmd,
-            f"Compiling {output_path} with FPOPTFLAGS",
-            capture_output=True,
-            output_file=log_path,
-            verbose=verbose,
-        )
-    else:
-        run_command(
-            cmd,
-            f"Compiling {output_path} with FPOPTFLAGS",
-            verbose=verbose,
-        )
-
-
-def parse_critical_comp_costs(tmp_dir, prefix, log_path="compile_fpopt.log"):
-    print(f"=== Parsing critical computation costs from {log_path} ===")
-    full_log_path = os.path.join("logs", f"{prefix}{log_path}")
-    if not os.path.exists(full_log_path):
-        print(f"Log file {full_log_path} does not exist.")
-        sys.exit(1)
-    with open(full_log_path, "r") as f:
-        content = f.read()
-
-    pattern = r"\*\*\* Critical Computation Costs \*\*\*(.*?)\*\*\* End of Critical Computation Costs \*\*\*"
-    match = re.search(pattern, content, re.DOTALL)
-    if not match:
-        print("Critical Computation Costs block not found in the log.")
-        sys.exit(1)
-
-    costs_str = match.group(1).strip()
-    costs = [int(cost) for cost in costs_str.split(",") if re.fullmatch(r"-?\d+", cost.strip())]
-    print(f"Parsed computation costs: {costs}")
-
-    if not costs:
-        print("No valid computation costs found to sample.")
-        sys.exit(1)
-
-    num_to_sample = min(MAX_TESTED_COSTS, len(costs))
-
-    sampled_costs = random.sample(costs, num_to_sample)
-
-    sampled_costs_sorted = sorted(sampled_costs)
-
-    print(f"Sampled computation costs (sorted): {sampled_costs_sorted}")
-
-    return sampled_costs_sorted
-
-
-def measure_runtime(tmp_dir, prefix, executable, num_runs=NUM_RUNS):
-    print(f"=== Measuring runtime for {executable} ===")
-    runtimes = []
-    exe_path = os.path.join(tmp_dir, f"{prefix}{executable}")
-    for i in trange(1, num_runs + 1):
-        try:
-            result = subprocess.run([exe_path], capture_output=True, text=True, check=True, timeout=300)
-            output = result.stdout
-            match = re.search(r"Total runtime: ([\d\.]+) seconds", output)
-            if match:
-                runtime = float(match.group(1))
-                runtimes.append(runtime)
-            else:
-                print(f"Could not parse runtime from output on run {i}")
-                sys.exit(1)
-        except subprocess.TimeoutExpired:
-            print(f"Execution of {exe_path} timed out on run {i}")
-            if os.path.exists(exe_path):
-                os.remove(exe_path)
-                print(f"Removed executable {exe_path} due to timeout.")
-            return None
-        except subprocess.CalledProcessError as e:
-            print(f"Error running {exe_path} on run {i}")
-            sys.exit(e.returncode)
-    if runtimes:
-        average_runtime = mean(runtimes)
-        print(f"Average runtime for {prefix}{executable}: {average_runtime:.6f} seconds")
-        return average_runtime
-    else:
-        print(f"No successful runs for {prefix}{executable}")
-        return None
-
-
-def get_values_file_path(tmp_dir, prefix, binary_name):
-    return os.path.join(tmp_dir, f"{prefix}{binary_name}-values.txt")
-
-
-def generate_example_values(tmp_dir, prefix):
-    binary_name = "example.exe"
-    exe = os.path.join(tmp_dir, f"{prefix}{binary_name}")
-    output_values_file = get_values_file_path(tmp_dir, prefix, binary_name)
-    cmd = [exe, "--output-path", output_values_file]
-    run_command(cmd, f"Generating function values from {binary_name}", verbose=False, timeout=300)
-
-
-def generate_values(tmp_dir, prefix, binary_name):
-    exe = os.path.join(tmp_dir, f"{prefix}{binary_name}")
-    values_file = get_values_file_path(tmp_dir, prefix, binary_name)
-    cmd = [exe, "--output-path", values_file]
-    run_command(cmd, f"Generating function values from {binary_name}", verbose=False, timeout=300)
-
-
-def compile_golden_exe(tmp_dir, prefix):
-    source = os.path.join(tmp_dir, f"{prefix}golden.cpp")
-    output = os.path.join(tmp_dir, f"{prefix}golden.exe")
-    cmd = [CXX, source] + CXXFLAGS + ["-o", output]
-    run_command(cmd, f"Compiling {output}")
-
-
-def generate_golden_values(tmp_dir, original_prefix, prefix):
-    script = "fpopt-golden-driver-generator.py"
-    src_prefixed = os.path.join(tmp_dir, f"{original_prefix}{SRC}")
-    dest_prefixed = os.path.join(tmp_dir, f"{prefix}golden.cpp")
-    cur_prec = 128
-    max_prec = 4096
-    PREC_step = 128
-    prev_output = None
-    output_values_file = get_values_file_path(tmp_dir, prefix, "golden.exe")
-    while cur_prec <= max_prec:
-        run_command(
-            ["python3", script, src_prefixed, dest_prefixed, str(cur_prec), "example", str(DRIVER_NUM_SAMPLES)],
-            f"Generating golden.cpp with PREC={cur_prec}",
-        )
-        if not os.path.exists(dest_prefixed):
-            print(f"Failed to generate {dest_prefixed}.")
-            sys.exit(1)
-        print(f"Generated {dest_prefixed} successfully.")
-
-        compile_golden_exe(tmp_dir, prefix)
-
-        exe = os.path.join(tmp_dir, f"{prefix}golden.exe")
-        cmd = [exe, "--output-path", output_values_file]
-        run_command(cmd, f"Generating golden values with PREC={cur_prec}", verbose=False)
-
-        if not os.path.exists(output_values_file):
-            print(f"Failed to generate golden values at PREC={cur_prec} due to timeout.")
-            return
-
-        with open(output_values_file, "r") as f:
-            output = f.read()
-
-        if output == prev_output:
-            print(f"Golden values converged at PREC={cur_prec}")
-            break
-        else:
-            prev_output = output
-            cur_prec += PREC_step
-    else:
-        print(f"Failed to converge golden values up to PREC={max_prec}")
-        sys.exit(1)
-
-
-def get_avg_rel_error(tmp_dir, prefix, golden_values_file, binaries):
-    with open(golden_values_file, "r") as f:
-        golden_values = [float(line.strip()) for line in f]
-
-    errors = {}
-    for binary in binaries:
-        values_file = get_values_file_path(tmp_dir, prefix, binary)
-        if not os.path.exists(values_file):
-            print(f"Values file {values_file} does not exist. Skipping error calculation for {binary}.")
-            errors[binary] = None
-            continue
-        with open(values_file, "r") as f:
-            values = [float(line.strip()) for line in f]
-        if len(values) != len(golden_values):
-            print(f"Number of values in {values_file} does not match golden values")
-            sys.exit(1)
-
-        valid_errors = []
-        for v, g in zip(values, golden_values):
-            if math.isnan(v) or math.isnan(g):
-                continue
-            if g == 0:
-                continue
-            error = abs((v - g) / g) * 100
-            valid_errors.append(error)
-
-        if not valid_errors:
-            print(f"No valid data to compute rel error for binary {binary}. Setting rel error to None.")
-            errors[binary] = None
-            continue
-
-        try:
-            log_sum = sum(math.log1p(e) for e in valid_errors)
-            geo_mean = math.expm1(log_sum / len(valid_errors))
-            errors[binary] = geo_mean
-        except OverflowError:
-            print(
-                f"Overflow error encountered while computing geometric mean for binary {binary}. Setting rel error to None."
-            )
-            errors[binary] = None
-        except ZeroDivisionError:
-            print(f"No valid errors to compute geometric mean for binary {binary}. Setting rel error to None.")
-            errors[binary] = None
-
-    return errors
-
-
-def build_all(tmp_dir, logs_dir, original_prefix, prefix, fpoptflags, example_txt_path):
-    generate_example_cpp(tmp_dir, original_prefix, prefix)
-    generate_example_logged_cpp(tmp_dir, original_prefix, prefix)
-    compile_example_exe(tmp_dir, prefix)
-    compile_example_logged_exe(tmp_dir, prefix)
-    if not os.path.exists(example_txt_path):
-        generate_example_txt(tmp_dir, original_prefix)
-    compile_example_fpopt_exe(tmp_dir, prefix, fpoptflags, output="example-fpopt.exe")
-    print("=== Initial build process completed successfully ===")
-
-
-def process_cost(args):
-    cost, tmp_dir, prefix, fpoptflags = args
-
-    print(f"\n=== Processing computation cost budget: {cost} ===")
-    fpoptflags_cost = []
-    for flag in fpoptflags:
-        if flag.startswith("--fpopt-comp-cost-budget="):
-            fpoptflags_cost.append(f"--fpopt-comp-cost-budget={cost}")
-        else:
-            fpoptflags_cost.append(flag)
-
-    output_binary = f"example-fpopt-{cost}.exe"
-
-    compile_example_fpopt_exe(tmp_dir, prefix, fpoptflags_cost, output=output_binary, verbose=False)
-    generate_values(tmp_dir, prefix, output_binary)
-
-    return cost, output_binary
-
-
-def benchmark(tmp_dir, logs_dir, original_prefix, prefix, plots_dir, fpoptflags, num_parallel=1):
-    costs = parse_critical_comp_costs(tmp_dir, prefix)
-
-    original_avg_runtime = measure_runtime(tmp_dir, prefix, "example.exe", NUM_RUNS)
-    original_runtime = original_avg_runtime
-
-    if original_runtime is None:
-        print("Original binary timed out. Proceeding as if it doesn't exist.")
-        return
-
-    generate_example_values(tmp_dir, prefix)
-
-    generate_golden_values(tmp_dir, original_prefix, prefix)
-
-    golden_values_file = get_values_file_path(tmp_dir, prefix, "golden.exe")
-    example_binary = "example.exe"
-    rel_errs_example = get_avg_rel_error(tmp_dir, prefix, golden_values_file, [example_binary])
-    rel_err_example = rel_errs_example[example_binary]
-    print(f"Average Rel Error for {prefix}example.exe: {rel_err_example}")
-
-    data_tuples = []
-
-    args_list = [(cost, tmp_dir, prefix, fpoptflags) for cost in costs]
-
-    if num_parallel == 1:
-        for args in args_list:
-            cost, output_binary = process_cost(args)
-            data_tuples.append((cost, output_binary))
-    else:
-        with ProcessPoolExecutor(max_workers=num_parallel) as executor:
-            future_to_cost = {executor.submit(process_cost, args): args[0] for args in args_list}
-            for future in as_completed(future_to_cost):
-                cost = future_to_cost[future]
-                try:
-                    cost_result, output_binary = future.result()
-                    data_tuples.append((cost_result, output_binary))
-                except Exception as exc:
-                    print(f"Cost {cost} generated an exception: {exc}")
-
-    data_tuples_sorted = sorted(data_tuples, key=lambda x: x[0])
-    sorted_budgets, sorted_optimized_binaries = zip(*data_tuples_sorted) if data_tuples_sorted else ([], [])
-
-    sorted_runtimes = []
-    for cost, output_binary in zip(sorted_budgets, sorted_optimized_binaries):
-        avg_runtime = measure_runtime(tmp_dir, prefix, output_binary, NUM_RUNS)
-        if avg_runtime is not None:
-            sorted_runtimes.append(avg_runtime)
-        else:
-            print(f"Skipping cost {cost} due to runtime measurement failure.")
-            sorted_runtimes.append(None)
-
-    errors_dict = get_avg_rel_error(tmp_dir, prefix, golden_values_file, sorted_optimized_binaries)
-    sorted_errors = []
-    for binary in sorted_optimized_binaries:
-        sorted_errors.append(errors_dict.get(binary))
-        print(f"Average rel error for {binary}: {errors_dict.get(binary)}")
-
-    sorted_budgets = list(sorted_budgets)
-    sorted_runtimes = list(sorted_runtimes)
-    sorted_errors = list(sorted_errors)
-
-    data = {
-        "budgets": sorted_budgets,
-        "runtimes": sorted_runtimes,
-        "errors": sorted_errors,
-        "original_runtime": original_runtime,
-        "original_error": rel_err_example,
-    }
-
-    table_json_path = os.path.join("cache", "table.json")
-    if os.path.exists(table_json_path):
-        with open(table_json_path, "r") as f:
-            table_data = json.load(f)
-        if "costToAccuracyMap" in table_data:
-            predicted_costs = []
-            predicted_errors = []
-            for k, v in table_data["costToAccuracyMap"].items():
-                try:
-                    cost_val = float(k)
-                    err_val = float(v)
-                    predicted_costs.append(cost_val)
-                    predicted_errors.append(err_val)
-                except ValueError:
-                    pass
-            pred_sorted_indices = np.argsort(predicted_costs)
-            predicted_costs = np.array(predicted_costs)[pred_sorted_indices].tolist()
-            predicted_errors = np.array(predicted_errors)[pred_sorted_indices].tolist()
-            data["predicted_data"] = {"costs": predicted_costs, "errors": predicted_errors}
-
-    data_file = os.path.join(tmp_dir, f"{prefix}benchmark_data.pkl")
-    with open(data_file, "wb") as f:
-        pickle.dump(data, f)
-    print(f"Benchmark data saved to {data_file}")
-
-    return data
-
-
-def build_with_benchmark(
-    tmp_dir, logs_dir, plots_dir, original_prefix, prefix, fpoptflags, example_txt_path, num_parallel=1
-):
-    build_all(tmp_dir, logs_dir, original_prefix, prefix, fpoptflags, example_txt_path)
-    data = benchmark(tmp_dir, logs_dir, original_prefix, prefix, plots_dir, fpoptflags, num_parallel)
-    return data
-
-
-def remove_cache_dir():
-    cache_dir = "cache"
-    if os.path.exists(cache_dir):
-        shutil.rmtree(cache_dir)
-        print("=== Removed existing cache directory ===")
-
-
-def plot_ablation_results(tmp_dir, plots_dir, original_prefix, prefix, output_format="png", show_prediction=False):
-    ablation_data_file = os.path.join(tmp_dir, f"{prefix}ablation-widen-range.pkl")
-    if not os.path.exists(ablation_data_file):
-        print(f"Ablation data file {ablation_data_file} does not exist. Cannot plot.")
-        sys.exit(1)
-    with open(ablation_data_file, "rb") as f:
-        all_data = pickle.load(f)
-
-    if not all_data:
-        print("No data to plot.")
-        sys.exit(1)
-
-    rcParams["font.size"] = 20
-    rcParams["axes.titlesize"] = 24
-    rcParams["axes.labelsize"] = 20
-    rcParams["xtick.labelsize"] = 18
-    rcParams["ytick.labelsize"] = 18
-    rcParams["legend.fontsize"] = 18
-
-    if show_prediction and any("predicted_data" in d for d in all_data.values()):
-        fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(20, 8))
-    else:
-        fig, ax1 = plt.subplots(1, 1, figsize=(10, 8))
-        ax2 = None
-
-    # Extract original runtime/error from the first scenario
-    first_key = next(iter(all_data))
-    original_runtime = all_data[first_key]["original_runtime"]
-    original_error = all_data[first_key]["original_error"]
-
-    # Plot original program once
-    ax1.scatter(original_runtime, original_error, marker="x", color="black", s=100, label="Original Program")
-
-    colors = ["blue", "green", "red", "purple", "orange", "brown", "pink", "gray", "olive", "cyan"]
-    color_iter = iter(colors)
-
-    for X, data in sorted(all_data.items()):
-        budgets = data["budgets"]
-        runtimes = data["runtimes"]
-        errors = data["errors"]
-
-        data_points = list(zip(runtimes, errors))
-        filtered_data = [(r, e) for r, e in data_points if r is not None and e is not None]
-        if not filtered_data:
-            print(f"No valid data to plot for widen-range={X}.")
-            continue
-        runtimes_filtered, errors_filtered = zip(*filtered_data)
-        color = next(color_iter)
-        ax1.scatter(runtimes_filtered, errors_filtered, label=f"widen-range={X}", color=color)
-        points = np.array(filtered_data)
-        sorted_indices = np.argsort(points[:, 0])
-        sorted_points = points[sorted_indices]
-
-        # Compute pareto front
-        pareto_front = [sorted_points[0]]
-        for point in sorted_points[1:]:
-            if point[1] < pareto_front[-1][1]:
-                pareto_front.append(point)
-
-        pareto_front = np.array(pareto_front)
-        ax1.step(
-            pareto_front[:, 0],
-            pareto_front[:, 1],
-            where="post",
-            linestyle="-",
-            color=color,
-        )
-
-        if show_prediction and "predicted_data" in data and ax2 is not None:
-            p_costs = data["predicted_data"]["costs"]
-            p_errors = data["predicted_data"]["errors"]
-            if p_costs and p_errors:
-                ax2.scatter(p_costs, p_errors, label=f"Prediction (widen-range={X})", color=color)
-                pred_points = np.column_stack((p_costs, p_errors))
-                pred_sorted_indices = np.argsort(pred_points[:, 0])
-                pred_sorted_points = pred_points[pred_sorted_indices]
-
-                pred_pareto = [pred_sorted_points[0]]
-                for pt in pred_sorted_points[1:]:
-                    if pt[1] < pred_pareto[-1][1]:
-                        pred_pareto.append(pt)
-                pred_pareto = np.array(pred_pareto)
-
-                ax2.step(
-                    pred_pareto[:, 0],
-                    pred_pareto[:, 1],
-                    where="post",
-                    linestyle="-",
-                    color=color,
-                )
-
-    ax1.set_xlabel("Runtimes (seconds)")
-    ax1.set_ylabel("Relative Errors (%)")
-    ax1.set_title("Pareto Fronts for Different widen-range Values")
-    ax1.set_yscale("symlog", linthresh=1e-15)
-    ax1.set_ylim(bottom=0)
-    ax1.legend()
-    ax1.grid(True)
-
-    if ax2 is not None:
-        ax2.set_xlabel("Cost Budget")
-        ax2.set_ylabel("Predicted Error")
-        ax2.set_title("Predicted Pareto Fronts")
-        ax2.set_yscale("symlog", linthresh=1e-15)
-        ax2.set_ylim(bottom=-1e-13)
-        ax2.grid(True)
-
-    plot_filename = os.path.join(plots_dir, f"{prefix}ablation_widen_range_pareto_front.{output_format}")
-    plt.savefig(plot_filename, bbox_inches="tight", dpi=300)
-    plt.close()
-    print(f"Ablation plot saved to {plot_filename}")
-
-
-def plot_ablation_results_cost_model(
-    tmp_dir, plots_dir, original_prefix, prefix, output_format="png", show_prediction=False
-):
-    ablation_data_file = os.path.join(tmp_dir, f"{prefix}ablation-cost-model.pkl")
-    if not os.path.exists(ablation_data_file):
-        print(f"Ablation data file {ablation_data_file} does not exist. Cannot plot.")
-        sys.exit(1)
-    with open(ablation_data_file, "rb") as f:
-        all_data = pickle.load(f)
-
-    if not all_data:
-        print("No data to plot.")
-        sys.exit(1)
-
-    rcParams["font.size"] = 20
-    rcParams["axes.titlesize"] = 24
-    rcParams["axes.labelsize"] = 20
-    rcParams["xtick.labelsize"] = 18
-    rcParams["ytick.labelsize"] = 18
-    rcParams["legend.fontsize"] = 18
-
-    if show_prediction and any("predicted_data" in d for d in all_data.values()):
-        fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(20, 8))
-    else:
-        fig, ax1 = plt.subplots(1, 1, figsize=(10, 8))
-        ax2 = None
-
-    # Extract original runtime/error from the first scenario
-    first_key = next(iter(all_data))
-    original_runtime = all_data[first_key]["original_runtime"]
-    original_error = all_data[first_key]["original_error"]
-
-    # Plot original program once
-    ax1.scatter(original_runtime, original_error, marker="x", color="black", s=100, label="Original Program")
-
-    colors = ["blue", "green"]
-    labels = ["Custom Cost Model", "TTI Cost Model"]
-
-    for idx, key in enumerate(["with_cost_model", "without_cost_model"]):
-        data = all_data[key]
-        budgets = data["budgets"]
-        runtimes = data["runtimes"]
-        errors = data["errors"]
-
-        data_points = list(zip(runtimes, errors))
-        filtered_data = [(r, e) for r, e in data_points if r is not None and e is not None]
-        if not filtered_data:
-            print(f"No valid data to plot for {labels[idx]}.")
-            continue
-        runtimes_filtered, errors_filtered = zip(*filtered_data)
-        color = colors[idx]
-        ax1.scatter(runtimes_filtered, errors_filtered, label=labels[idx], color=color)
-        points = np.array(filtered_data)
-        sorted_indices = np.argsort(points[:, 0])
-        sorted_points = points[sorted_indices]
-
-        pareto_front = [sorted_points[0]]
-        for point in sorted_points[1:]:
-            if point[1] < pareto_front[-1][1]:
-                pareto_front.append(point)
-
-        pareto_front = np.array(pareto_front)
-
-        ax1.step(
-            pareto_front[:, 0],
-            pareto_front[:, 1],
-            where="post",
-            linestyle="-",
-            color=color,
-        )
-
-        if show_prediction and "predicted_data" in data and ax2 is not None:
-            p_costs = data["predicted_data"]["costs"]
-            p_errors = data["predicted_data"]["errors"]
-            if p_costs and p_errors:
-                ax2.scatter(p_costs, p_errors, label=f"{labels[idx]}", color=color)
-                pred_points = np.column_stack((p_costs, p_errors))
-                pred_sorted_indices = np.argsort(pred_points[:, 0])
-                pred_sorted_points = pred_points[pred_sorted_indices]
-
-                pred_pareto = [pred_sorted_points[0]]
-                for pt in pred_sorted_points[1:]:
-                    if pt[1] < pred_pareto[-1][1]:
-                        pred_pareto.append(pt)
-                pred_pareto = np.array(pred_pareto)
-
-                ax2.step(
-                    pred_pareto[:, 0],
-                    pred_pareto[:, 1],
-                    where="post",
-                    linestyle="-",
-                    color=color,
-                )
-
-    ax1.set_xlabel("Runtimes (seconds)")
-    ax1.set_ylabel("Relative Errors (%)")
-    ax1.set_title("Pareto Fronts for Cost Model Ablation")
-    ax1.set_yscale("symlog", linthresh=1e-14)
-    ax1.set_ylim(bottom=-1e-14)
-    ax1.legend()
-    ax1.grid(True)
-
-    if ax2 is not None:
-        ax2.set_xlabel("Cost Budget")
-        ax2.set_ylabel("Predicted Error")
-        ax2.set_title("Predicted Pareto Front")
-        ax2.set_yscale("symlog", linthresh=1e-14)
-        ax2.set_ylim(bottom=-1e-14)
-        ax2.legend()
-        ax2.grid(True)
-
-    plot_filename = os.path.join(plots_dir, f"{prefix}ablation_cost_model_pareto_front.{output_format}")
-    plt.savefig(plot_filename, bbox_inches="tight", dpi=300)
-    plt.close()
-    print(f"Ablation plot saved to {plot_filename}")
-
-
-def remove_mllvm_flag(flags_list, flag_prefix):
-    new_flags = []
-    i = 0
-    while i < len(flags_list):
-        if flags_list[i] == "-mllvm" and i + 1 < len(flags_list) and flags_list[i + 1].startswith(flag_prefix):
-            i += 2
-        else:
-            new_flags.append(flags_list[i])
-            i += 1
-    return new_flags
-
-
-def main():
-    parser = argparse.ArgumentParser(description="Run the ablation study with widen-range parameter.")
-    parser.add_argument("--prefix", type=str, required=True, help="Prefix for intermediate files (e.g., rosa-ex23-)")
-    parser.add_argument("--clean", action="store_true", help="Clean up generated files")
-    parser.add_argument("--plot-only", action="store_true", help="Plot results from existing data")
-    parser.add_argument("--output-format", type=str, default="png", help="Output format for plots (e.g., png, pdf)")
-    parser.add_argument(
-        "--num-parallel", type=int, default=16, help="Number of parallel processes to use (default: 16)"
-    )
-    parser.add_argument(
-        "--ablation-type",
-        type=str,
-        choices=["widen-range", "cost-model"],
-        default="widen-range",
-        help="Type of ablation study to perform (default: widen-range)",
-    )
-    parser.add_argument("--show-prediction", action="store_true", help="Show predicted results in a second subplot")
-
-    args = parser.parse_args()
-
-    original_prefix = args.prefix
-    if not original_prefix.endswith("-"):
-        original_prefix += "-"
-
-    tmp_dir = "tmp"
-    logs_dir = "logs"
-    plots_dir = "plots"
-
-    os.makedirs(tmp_dir, exist_ok=True)
-    os.makedirs(logs_dir, exist_ok=True)
-    os.makedirs(plots_dir, exist_ok=True)
-
-    example_txt_path = os.path.join(tmp_dir, f"{original_prefix}example.txt")
-
-    if args.clean:
-        clean(tmp_dir, logs_dir, plots_dir)
-        sys.exit(0)
-    elif args.plot_only:
-        if args.ablation_type == "widen-range":
-            plot_ablation_results(
-                tmp_dir,
-                plots_dir,
-                original_prefix,
-                original_prefix,
-                args.output_format,
-                show_prediction=args.show_prediction,
-            )
-        elif args.ablation_type == "cost-model":
-            plot_ablation_results_cost_model(
-                tmp_dir,
-                plots_dir,
-                original_prefix,
-                original_prefix,
-                args.output_format,
-                show_prediction=args.show_prediction,
-            )
-        sys.exit(0)
-    else:
-        if not os.path.exists(example_txt_path):
-            generate_example_logged_cpp(tmp_dir, original_prefix, original_prefix)
-            compile_example_logged_exe(tmp_dir, original_prefix)
-            generate_example_txt(tmp_dir, original_prefix)
-
-        if args.ablation_type == "widen-range":
-            widen_ranges = [0.001, 0.01, 0.1, 1.0, 10.0, 100.0, 1000.0]
-            all_data = {}
-            for X in widen_ranges:
-                print(f"=== Running ablation study with widen-range={X} ===")
-                remove_cache_dir()
-                FPOPTFLAGS_BASE = FPOPTFLAGS_BASE_TEMPLATE.copy()
-                for idx, flag in enumerate(FPOPTFLAGS_BASE):
-                    if flag.startswith("--fpopt-log-path="):
-                        FPOPTFLAGS_BASE[idx] = f"--fpopt-log-path={example_txt_path}"
-                FPOPTFLAGS_BASE.extend(["-mllvm", f"--fpopt-widen-range={X}"])
-
-                prefix_with_x = f"{original_prefix}abl-widen-range-{X}-"
-
-                data = build_with_benchmark(
-                    tmp_dir,
-                    logs_dir,
-                    plots_dir,
-                    original_prefix,
-                    prefix_with_x,
-                    FPOPTFLAGS_BASE,
-                    example_txt_path,
-                    num_parallel=args.num_parallel,
-                )
-
-                all_data[X] = data
-
-            ablation_data_file = os.path.join(tmp_dir, f"{original_prefix}ablation-widen-range.pkl")
-            with open(ablation_data_file, "wb") as f:
-                pickle.dump(all_data, f)
-            print(f"Ablation data saved to {ablation_data_file}")
-
-            plot_ablation_results(
-                tmp_dir,
-                plots_dir,
-                original_prefix,
-                original_prefix,
-                args.output_format,
-                show_prediction=args.show_prediction,
-            )
-
-        if args.ablation_type == "cost-model":
-            print("=== Running cost-model ablation study ===")
-            remove_cache_dir()
-            FPOPTFLAGS_WITH_CM = FPOPTFLAGS_BASE_TEMPLATE.copy()
-            for idx, flag in enumerate(FPOPTFLAGS_WITH_CM):
-                if flag.startswith("--fpopt-log-path="):
-                    FPOPTFLAGS_WITH_CM[idx] = f"--fpopt-log-path={example_txt_path}"
-            prefix_with_cm = f"{original_prefix}abl-with-cost-model-"
-
-            data_with_cm = build_with_benchmark(
-                tmp_dir,
-                logs_dir,
-                plots_dir,
-                original_prefix,
-                prefix_with_cm,
-                FPOPTFLAGS_WITH_CM,
-                example_txt_path,
-                num_parallel=args.num_parallel,
-            )
-
-            remove_cache_dir()
-            FPOPTFLAGS_NO_CM = remove_mllvm_flag(FPOPTFLAGS_WITH_CM, "--fpopt-cost-model-path=")
-            prefix_without_cm = f"{original_prefix}abl-without-cost-model-"
-
-            data_without_cm = build_with_benchmark(
-                tmp_dir,
-                logs_dir,
-                plots_dir,
-                original_prefix,
-                prefix_without_cm,
-                FPOPTFLAGS_NO_CM,
-                example_txt_path,
-                num_parallel=args.num_parallel,
-            )
-
-            all_data = {
-                "with_cost_model": data_with_cm,
-                "without_cost_model": data_without_cm,
-            }
-
-            ablation_data_file = os.path.join(tmp_dir, f"{original_prefix}ablation-cost-model.pkl")
-            with open(ablation_data_file, "wb") as f:
-                pickle.dump(all_data, f)
-            print(f"Ablation data saved to {ablation_data_file}")
-
-            plot_ablation_results_cost_model(
-                tmp_dir,
-                plots_dir,
-                original_prefix,
-                original_prefix,
-                args.output_format,
-                show_prediction=args.show_prediction,
-            )
-
-
-if __name__ == "__main__":
-    main()
diff --git a/experiments/example.c b/experiments/example.c
deleted file mode 100644
index 9dfe018..0000000
--- a/experiments/example.c
+++ /dev/null
@@ -1,22 +0,0 @@
-#include <math.h>
-#include <stdint.h>
-#define TRUE 1
-#define FALSE 0
-
-// ## PRE c: 1, 9
-// ## PRE a: 1, 9
-// ## PRE b: 1, 9
-__attribute__((noinline))
-double example(double a, double b, double c) {
-  double tmp;
-  if (a < b) {
-    tmp = sqrt(((((c + (b + a)) * (a - (c - b))) * (a + (c - b))) *
-                (c + (b - a)))) /
-          4.0;
-  } else {
-    tmp = sqrt(((((c + (a + b)) * (b - (c - a))) * (b + (c - a))) *
-                (c + (a - b)))) /
-          4.0;
-  }
-  return tmp;
-}
diff --git a/experiments/fp-logger.cpp b/experiments/fp-logger.cpp
index 545e31d..ba7f129 100644
--- a/experiments/fp-logger.cpp
+++ b/experiments/fp-logger.cpp
@@ -1,3 +1,4 @@
+#include <algorithm>
 #include <cassert>
 #include <cmath>
 #include <fstream>
@@ -6,9 +7,62 @@
 #include <limits>
 #include <string>
 #include <unordered_map>
+#include <utility>
 #include <vector>
 
-#include "fp-logger.hpp"
+// https://arxiv.org/pdf/1806.06403
+double geomean(const std::vector<double> &dataset, double epsilon = 1e-5) {
+  std::vector<double> dataset_nozeros;
+  for (double x : dataset) {
+    if (x > 0.0)
+      dataset_nozeros.push_back(x);
+  }
+
+  if (dataset_nozeros.empty()) {
+    return 0.0;
+  }
+
+  double sum_log = 0.0;
+  for (double x : dataset_nozeros) {
+    sum_log += std::log(x);
+  }
+  double geomeanNozeros = std::exp(sum_log / dataset_nozeros.size());
+
+  double min_val =
+      *std::min_element(dataset_nozeros.begin(), dataset_nozeros.end());
+  double deltamin = 0.0;
+  double deltamax = std::max(geomeanNozeros - min_val, 0.0);
+  double delta = (deltamin + deltamax) / 2.0;
+  double epsilon_threshold = epsilon * geomeanNozeros;
+
+  auto compute_auxExp = [&](double d) -> double {
+    double sum = 0.0;
+    for (double x : dataset_nozeros) {
+      sum += std::log(x + d);
+    }
+    return std::exp(sum / dataset_nozeros.size()) - d;
+  };
+
+  double auxExp = compute_auxExp(delta);
+
+  while ((auxExp - geomeanNozeros) > epsilon_threshold) {
+    if (auxExp < geomeanNozeros)
+      deltamin = delta;
+    else
+      deltamax = delta;
+    delta = (deltamin + deltamax) / 2.0;
+    auxExp = compute_auxExp(delta);
+  }
+
+  double sum_log_all = 0.0;
+  for (double x : dataset) {
+    sum_log_all += std::log(x + delta);
+  }
+  double gmeanE = std::exp(sum_log_all / dataset.size()) - delta;
+
+  assert(!std::isnan(gmeanE) && !std::isinf(gmeanE));
+  return gmeanE;
+}
 
 class ValueInfo {
 public:
@@ -17,8 +71,7 @@ class ValueInfo {
   std::vector<double> minOperands;
   std::vector<double> maxOperands;
   unsigned executions = 0;
-  double logSum = 0.0;
-  unsigned logCount = 0;
+  std::vector<double> loggedValues;
 
   void update(double res, const double *operands, unsigned numOperands) {
     minRes = std::min(minRes, res);
@@ -33,17 +86,17 @@ class ValueInfo {
     }
     ++executions;
 
-    if (!std::isnan(res)) {
-      logSum += std::log1p(std::fabs(res));
-      ++logCount;
+    if (!std::isnan(res) && !std::isinf(res)) {
+      loggedValues.push_back(std::fabs(res));
     }
   }
 
-  double getGeometricAverage() const {
-    if (logCount == 0) {
-      return 0.;
+  double getGeomean() const {
+    if (loggedValues.empty()) {
+      return 0.0;
     }
-    return std::expm1(logSum / logCount);
+
+    return geomean(loggedValues);
   }
 };
 
@@ -60,21 +113,20 @@ class ErrorInfo {
 
 class GradInfo {
 public:
-  double logSum = 0.0;
-  unsigned count = 0;
+  std::vector<double> loggedValues;
 
   void update(double grad) {
-    if (!std::isnan(grad)) {
-      logSum += std::log1p(std::fabs(grad));
-      ++count;
+    if (!std::isnan(grad) && !std::isinf(grad)) {
+      loggedValues.push_back(std::fabs(grad));
     }
   }
 
-  double getGeometricAverage() const {
-    if (count == 0) {
-      return 0.;
+  double getGeomean() const {
+    if (loggedValues.empty()) {
+      return 0.0;
     }
-    return std::expm1(logSum / count);
+
+    return geomean(loggedValues);
   }
 };
 
@@ -112,27 +164,18 @@ class Logger {
       std::cout << "\tMinRes = " << info.minRes << "\n";
       std::cout << "\tMaxRes = " << info.maxRes << "\n";
       std::cout << "\tExecutions = " << info.executions << "\n";
-      std::cout << "\tGeometric Average = " << info.getGeometricAverage()
-                << "\n";
+      std::cout << "\tGeometric Average = " << info.getGeomean() << "\n";
       for (unsigned i = 0; i < info.minOperands.size(); ++i) {
         std::cout << "\tOperand[" << i << "] = [" << info.minOperands[i] << ", "
                   << info.maxOperands[i] << "]\n";
       }
     }
 
-    for (const auto &pair : errorInfo) {
-      const auto &id = pair.first;
-      const auto &info = pair.second;
-      std::cout << "Error:" << id << "\n";
-      std::cout << "\tMinErr = " << info.minErr << "\n";
-      std::cout << "\tMaxErr = " << info.maxErr << "\n";
-    }
-
     for (const auto &pair : gradInfo) {
       const auto &id = pair.first;
       const auto &info = pair.second;
       std::cout << "Grad:" << id << "\n";
-      std::cout << "\tGrad = " << info.getGeometricAverage() << "\n";
+      std::cout << "\tGrad = " << info.getGeomean() << "\n";
     }
   }
 };
@@ -162,4 +205,4 @@ void enzymeLogValue(const char *id, double res, unsigned numOperands,
                     double *operands) {
   assert(logger && "Logger is not initialized");
   logger->updateValue(id, res, numOperands, operands);
-}
\ No newline at end of file
+}
diff --git a/experiments/run.py b/experiments/run.py
index 8cd2461..032d983 100755
--- a/experiments/run.py
+++ b/experiments/run.py
@@ -8,6 +8,7 @@
 import re
 import argparse
 import matplotlib.pyplot as plt
+import scipy.stats.mstats as ssm
 import math
 import random
 import numpy as np
@@ -19,7 +20,7 @@
 from concurrent.futures import ProcessPoolExecutor, as_completed
 
 HOME = "/home/sbrantq"
-ENZYME_PATH = os.path.join(HOME, "sync/Enzyme/build/Enzyme/ClangEnzyme-16.so")
+ENZYME_PATH = os.path.join(HOME, "sync/Enzyme/build-debug/Enzyme/ClangEnzyme-16.so")
 LLVM_PATH = os.path.join(HOME, "llvms/llvm16/build/bin")
 CXX = os.path.join(LLVM_PATH, "clang++")
 
@@ -80,6 +81,36 @@
 MAX_TESTED_COSTS = 999
 
 
+# https://arxiv.org/pdf/1806.06403
+def geomean(dataset):
+    dataset = np.array(dataset)
+    epsilon = 1e-5
+
+    dataset_nozeros = dataset[dataset > 0]
+
+    if len(dataset_nozeros) == 0:
+        return 0.0
+
+    geomeanNozeros = ssm.gmean(dataset_nozeros)
+
+    deltamin = 0
+    deltamax = geomeanNozeros - min(dataset_nozeros)
+    delta = (deltamin + deltamax) / 2
+
+    epsilon = epsilon * geomeanNozeros
+    auxExp = math.exp(np.mean(np.log(dataset_nozeros + delta))) - delta
+    while (auxExp - geomeanNozeros) > epsilon:
+        if auxExp < geomeanNozeros:
+            deltamin = delta
+        else:
+            deltamax = delta
+        delta = (deltamin + deltamax) / 2
+        auxExp = math.exp(np.mean(np.log(dataset_nozeros + delta))) - delta
+
+    gmeanE = math.exp(np.mean(np.log(dataset + delta))) - delta
+    return gmeanE
+
+
 def run_command(command, description, capture_output=False, output_file=None, verbose=True, timeout=None):
     print(f"=== {description} ===")
     print("Running:", " ".join(command))
@@ -383,7 +414,7 @@ def get_avg_rel_error(tmp_dir, prefix, golden_values_file, binaries):
                 continue
             if g == 0:
                 continue
-            error = abs((v - g) / g) * 100
+            error = abs((v - g) / g)
             valid_errors.append(error)
 
         if not valid_errors:
@@ -392,9 +423,7 @@ def get_avg_rel_error(tmp_dir, prefix, golden_values_file, binaries):
             continue
 
         try:
-            log_sum = sum(math.log1p(e) for e in valid_errors)
-            geo_mean = math.expm1(log_sum / len(valid_errors))
-            errors[binary] = geo_mean
+            errors[binary] = geomean(valid_errors)
         except OverflowError:
             print(
                 f"Overflow error encountered while computing geometric mean for binary {binary}. Setting rel error to None."
@@ -444,7 +473,7 @@ def plot_results(
         ax1.set_xlabel("Computation Cost Budget")
         ax1.set_ylabel("Runtimes (seconds)", color=color_runtime)
         (line1,) = ax1.step(
-            budgets, runtimes, marker="o", linestyle="-", label="Optimized Runtimes", color=color_runtime
+            budgets, runtimes, marker="o", linestyle="-", label="Optimized Runtimes", color=color_runtime, where="post"
         )
         if original_runtime is not None:
             line2 = ax1.axhline(y=original_runtime, color=color_runtime, linestyle="--", label="Original Runtime")
@@ -452,9 +481,15 @@ def plot_results(
 
         ax2 = ax1.twinx()
         color_error = "tab:green"
-        ax2.set_ylabel("Relative Errors (%)", color=color_error)
+        ax2.set_ylabel("Relative Errors", color=color_error)
         (line3,) = ax2.step(
-            budgets, errors, marker="s", linestyle="-", label="Optimized Relative Errors", color=color_error
+            budgets,
+            errors,
+            marker="s",
+            linestyle="-",
+            label="Optimized Relative Errors",
+            color=color_error,
+            where="post",
         )
         if original_error is not None:
             line4 = ax2.axhline(y=original_error, color=color_error, linestyle="--", label="Original Relative Error")
@@ -495,7 +530,7 @@ def plot_results(
         fig2, ax3 = plt.subplots(figsize=(10, 8))  # Adjust size as needed
 
         ax3.set_xlabel("Runtimes (seconds)")
-        ax3.set_ylabel("Relative Errors (%)")
+        ax3.set_ylabel("Relative Errors")
         ax3.set_title(f"Pareto Front of Optimized Programs ({prefix[:-1]})")
 
         scatter1 = ax3.scatter(runtimes, errors, label="Optimized Programs", color="blue")
@@ -523,7 +558,7 @@ def plot_results(
         pareto_front = np.array(pareto_front)
 
         (line_pareto,) = ax3.step(
-            pareto_front[:, 0], pareto_front[:, 1], linestyle="-", color="purple", label="Pareto Front"
+            pareto_front[:, 0], pareto_front[:, 1], linestyle="-", color="purple", label="Pareto Front", where="post"
         )
         ax3.set_yscale("log")
 
@@ -563,7 +598,7 @@ def plot_results(
         ax1.set_xlabel("Computation Cost Budget")
         ax1.set_ylabel("Runtimes (seconds)", color=color_runtime)
         (line1,) = ax1.step(
-            budgets, runtimes, marker="o", linestyle="-", label="Optimized Runtimes", color=color_runtime
+            budgets, runtimes, marker="o", linestyle="-", label="Optimized Runtimes", color=color_runtime, where="post"
         )
         if original_runtime is not None:
             line2 = ax1.axhline(y=original_runtime, color=color_runtime, linestyle="--", label="Original Runtime")
@@ -571,9 +606,15 @@ def plot_results(
 
         ax2 = ax1.twinx()
         color_error = "tab:green"
-        ax2.set_ylabel("Relative Errors (%)", color=color_error)
+        ax2.set_ylabel("Relative Errors", color=color_error)
         (line3,) = ax2.step(
-            budgets, errors, marker="s", linestyle="-", label="Optimized Relative Errors", color=color_error
+            budgets,
+            errors,
+            marker="s",
+            linestyle="-",
+            label="Optimized Relative Errors",
+            color=color_error,
+            where="post",
         )
         if original_error is not None:
             line4 = ax2.axhline(y=original_error, color=color_error, linestyle="--", label="Original Relative Error")
@@ -632,7 +673,7 @@ def plot_results(
         pareto_front = np.array(pareto_front)
 
         (line_pareto,) = ax3.step(
-            pareto_front[:, 0], pareto_front[:, 1], linestyle="-", color="purple", label="Pareto Front"
+            pareto_front[:, 0], pareto_front[:, 1], linestyle="-", color="purple", label="Pareto Front", where="post"
         )
         ax3.set_yscale("log")
 
@@ -848,10 +889,8 @@ def analyze_all_data(tmp_dir, thresholds=None):
 
         if original_error == 0:
             example_digits = 17
-            # print(f"Original program of {prefix} has zero relative error! Using 17 digits of accuracy.")
         else:
-            example_digits = min(-math.log10(original_error / 100), 17)
-            # print(f"Original program of {prefix} has {example_digits:.2f} digits of accuracy.")
+            example_digits = min(-math.log10(original_error), 17)
 
         original_digits.append(example_digits)
 
@@ -859,11 +898,9 @@ def analyze_all_data(tmp_dir, thresholds=None):
         for err in errors:
             if err == 0:
                 digits_list.append(17)
-                # print(f"Optimized program of {prefix} has zero relative error! Using 17 digits of accuracy.")
             elif err is not None:
-                digits = min(-math.log10(err / 100), 17)
+                digits = min(-math.log10(err), 17)
                 digits_list.append(digits)
-                # print(f"Optimized program of {prefix} has {digits:.2f} digits of accuracy.")
             else:
                 digits_list.append(None)
 
@@ -888,17 +925,14 @@ def analyze_all_data(tmp_dir, thresholds=None):
         for threshold in thresholds:
             min_ratio = None
             for err, runtime in zip(errors, runtimes):
-                if err is not None and runtime is not None and err <= threshold * 100:
-                    # print(f"Threshold: {threshold}, Error: {err}, Runtime: {runtime}")
+                if err is not None and runtime is not None and err <= threshold:
                     runtime_ratio = runtime / original_runtime
                     if min_ratio is None or runtime_ratio < min_ratio:
                         min_ratio = runtime_ratio
             if min_ratio is not None:
                 min_runtime_ratios[threshold][prefix] = min_ratio
-                # print(f"Threshold: {threshold}, maximum runtime improvement for {prefix}: {(1 - min_ratio) * 100:.2f}%")
 
-    log_sum = sum(math.log1p(digits) for digits in original_digits)
-    geo_mean = math.expm1(log_sum / len(original_digits))
+    geo_mean = geomean(original_digits)
     print(f"Original programs have {geo_mean:.2f} decimal digits of accuracy on average.")
     print(f"Original programs have {max(original_digits):.2f} decimal digits of accuracy at most.")
 
@@ -906,24 +940,16 @@ def analyze_all_data(tmp_dir, thresholds=None):
     overall_runtime_improvements = {}
     for threshold in thresholds:
         ratios = min_runtime_ratios[threshold].values()
-        # print(f"\nThreshold: {threshold}, Number of valid runtime ratios: {len(ratios)}")
         if ratios:
-            log_sum = sum(math.log1p(min(1, ratio)) for ratio in ratios)
-            geo_mean_ratio = math.expm1(log_sum / len(ratios))
+            log_sum = sum(math.log(min(1, ratio)) for ratio in ratios)
+            geo_mean_ratio = math.exp(log_sum / len(ratios))
             percentage_improvement = (1 - geo_mean_ratio) * 100
             overall_runtime_improvements[threshold] = percentage_improvement
         else:
             overall_runtime_improvements[threshold] = None
 
-    # ratios1 = min_runtime_ratios[1e-06].keys()
-    # ratios2 = min_runtime_ratios[1e-04].keys()
-
-    # for i in ratios2:
-    #     if i not in ratios1:
-    #         print(i)
-
     # Print maximum accuracy improvements per benchmark
-    print("Maximum accuracy improvements (in number of bits) per benchmark:")
+    print("Maximum accuracy improvements (in number of digits) per benchmark:")
     for prefix in prefixes:
         improvement = max_accuracy_improvements.get(prefix)
         if improvement:
@@ -941,8 +967,8 @@ def analyze_all_data(tmp_dir, thresholds=None):
     else:
         try:
             log_sum = sum(math.log(impr) for impr in positive_improvements)
-            geo_mean = math.exp(log_sum / len(positive_improvements))
-            print(f"\nGeometric mean of maximum accuracy improvements: {geo_mean:.2f} decimal digits")
+            geo_mean_improvement = math.exp(log_sum / len(positive_improvements))
+            print(f"\nGeometric mean of maximum accuracy improvements: {geo_mean_improvement:.2f} decimal digits")
         except ValueError as e:
             print(f"\nError in computing geometric mean: {e}")
 
@@ -955,6 +981,44 @@ def analyze_all_data(tmp_dir, thresholds=None):
         else:
             print(f"Allowed relative error ≤ {threshold}: No data")
 
+    # --- New Section: Geometric Mean of Relative Errors ---
+    # We'll compute:
+    # 1. The geometric mean of the original relative errors (from data["original_error"])
+    # 2. The geometric mean of the minimum achievable optimized relative errors (the smallest non-None, positive error from data["errors"])
+
+    original_errors = []
+    min_optimized_errors = []
+
+    for prefix, data in data_list:
+        orig_err = data["original_error"]
+        if orig_err is not None and orig_err > 0:
+            original_errors.append(orig_err)
+        # For optimized errors, consider all non-None and positive errors
+        valid_optimized_errors = [err for err in data["errors"] if err is not None and err > 0]
+        if valid_optimized_errors:
+            min_optimized_errors.append(min(valid_optimized_errors))
+
+    def compute_geomean(values):
+        if not values:
+            return None
+        # If any value is zero, the geometric mean is zero.
+        if any(v == 0 for v in values):
+            return 0.0
+        return math.exp(sum(math.log(v) for v in values) / len(values))
+
+    geo_mean_original_error = compute_geomean(original_errors)
+    geo_mean_min_optimized_error = compute_geomean(min_optimized_errors)
+
+    if geo_mean_original_error is not None:
+        print(f"\nGeometric mean of original relative errors: {geo_mean_original_error:.2e}")
+    else:
+        print("\nNo original relative errors available to compute geometric mean.")
+
+    if geo_mean_min_optimized_error is not None:
+        print(f"Geometric mean of minimum achievable optimized relative errors: {geo_mean_min_optimized_error:.2e}")
+    else:
+        print("No optimized relative errors available to compute geometric mean.")
+
 
 def build_with_benchmark(tmp_dir, logs_dir, plots_dir, prefix, num_parallel=1):
     build_all(tmp_dir, logs_dir, prefix)