From b70afae882ba068eb5ec76bc1990ded1399e2c06 Mon Sep 17 00:00:00 2001 From: sbrantq Date: Thu, 13 Feb 2025 05:20:13 -0600 Subject: [PATCH] fix up geomean & misc --- experiments/ablation.py | 934 -------------------------------------- experiments/example.c | 22 - experiments/fp-logger.cpp | 105 +++-- experiments/run.py | 140 ++++-- 4 files changed, 176 insertions(+), 1025 deletions(-) delete mode 100644 experiments/ablation.py delete mode 100644 experiments/example.c diff --git a/experiments/ablation.py b/experiments/ablation.py deleted file mode 100644 index 78608ba..0000000 --- a/experiments/ablation.py +++ /dev/null @@ -1,934 +0,0 @@ -#!/usr/bin/env python3 - -import os -import subprocess -import sys -import shutil -import re -import argparse -import matplotlib.pyplot as plt -import math -import random -import numpy as np -import json -from statistics import mean -import pickle -from tqdm import trange -from matplotlib import rcParams -from concurrent.futures import ProcessPoolExecutor, as_completed - -HOME = "/home/sbrantq" -ENZYME_PATH = os.path.join(HOME, "sync/Enzyme/build/Enzyme/ClangEnzyme-15.so") -LLVM_PATH = os.path.join(HOME, "llvms/llvm15/build/bin") -CXX = os.path.join(LLVM_PATH, "clang++") - -CXXFLAGS = [ - "-O3", - "-I" + os.path.join(HOME, "include"), - "-L" + os.path.join(HOME, "lib"), - "-I/usr/include/c++/11", - "-I/usr/include/x86_64-linux-gnu/c++/11", - "-L/usr/lib/gcc/x86_64-linux-gnu/11", - "-fno-exceptions", - f"-fpass-plugin={ENZYME_PATH}", - "-Xclang", - "-load", - "-Xclang", - ENZYME_PATH, - "-lmpfr", - "-ffast-math", - "-fno-finite-math-only", - "-fuse-ld=lld", -] - -FPOPTFLAGS_BASE_TEMPLATE = [ - "-mllvm", - "--enzyme-enable-fpopt", - "-mllvm", - "--enzyme-print-herbie", - "-mllvm", - "--enzyme-print-fpopt", - "-mllvm", - "--fpopt-log-path=example.txt", - "-mllvm", - "--fpopt-target-func-regex=example", - "-mllvm", - "--fpopt-enable-solver", - "-mllvm", - "--fpopt-enable-pt", - "-mllvm", - "--fpopt-cost-dom-thres=0", - "-mllvm", - "--fpopt-acc-dom-thres=0", - "-mllvm", - "--fpopt-comp-cost-budget=0", - "-mllvm", - "--herbie-num-threads=8", - "-mllvm", - "--herbie-timeout=1000", - "-mllvm", - "--fpopt-num-samples=1024", - "-mllvm", - "--fpopt-cost-model-path=/home/sbrantq/sync/FPBench/microbm/cm.csv", - "-mllvm", - "-fpopt-cache-path=cache", -] - -SRC = "example.c" -LOGGER = "fp-logger.cpp" -NUM_RUNS = 10 -DRIVER_NUM_SAMPLES = 10000000 -LOG_NUM_SAMPLES = 10000 -MAX_TESTED_COSTS = 999 - - -def run_command(command, description, capture_output=False, output_file=None, verbose=True, timeout=None): - print(f"=== {description} ===") - print("Running:", " ".join(command)) - try: - if capture_output and output_file: - with open(output_file, "w") as f: - subprocess.check_call(command, stdout=f, stderr=subprocess.STDOUT, timeout=timeout) - elif capture_output: - result = subprocess.run(command, capture_output=True, text=True, check=True, timeout=timeout) - return result.stdout - else: - if verbose: - subprocess.check_call(command, timeout=timeout) - else: - subprocess.check_call(command, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, timeout=timeout) - except subprocess.TimeoutExpired: - print(f"Command '{' '.join(command)}' timed out after {timeout} seconds.") - return - except subprocess.CalledProcessError as e: - print(f"Error during: {description}") - if capture_output and output_file: - print(f"Check the output file: {output_file} for details.") - else: - print(e) - sys.exit(e.returncode) - - -def clean(tmp_dir, logs_dir, plots_dir): - print("=== Cleaning up generated files ===") - directories = [tmp_dir, logs_dir, plots_dir] - for directory in directories: - if os.path.exists(directory): - shutil.rmtree(directory) - print(f"Removed directory: {directory}") - - -def generate_example_cpp(tmp_dir, original_prefix, prefix): - script = "fpopt-original-driver-generator.py" - print(f"=== Running {script} ===") - src_prefixed = os.path.join(tmp_dir, f"{original_prefix}{SRC}") - dest_prefixed = os.path.join(tmp_dir, f"{prefix}example.cpp") - run_command( - ["python3", script, src_prefixed, dest_prefixed, "example", str(DRIVER_NUM_SAMPLES)], - f"Generating example.cpp from {SRC}", - ) - if not os.path.exists(dest_prefixed): - print(f"Failed to generate {dest_prefixed}.") - sys.exit(1) - print(f"Generated {dest_prefixed} successfully.") - - -def generate_example_logged_cpp(tmp_dir, original_prefix, prefix): - script = "fpopt-logged-driver-generator.py" - print(f"=== Running {script} ===") - src_prefixed = os.path.join(tmp_dir, f"{original_prefix}{SRC}") - dest_prefixed = os.path.join(tmp_dir, f"{prefix}example-logged.cpp") - run_command( - ["python3", script, src_prefixed, dest_prefixed, "example", str(LOG_NUM_SAMPLES)], - f"Generating example-logged.cpp from {SRC}", - ) - if not os.path.exists(dest_prefixed): - print(f"Failed to generate {dest_prefixed}.") - sys.exit(1) - print(f"Generated {dest_prefixed} successfully.") - - -def compile_example_exe(tmp_dir, prefix): - source = os.path.join(tmp_dir, f"{prefix}example.cpp") - output = os.path.join(tmp_dir, f"{prefix}example.exe") - cmd = [CXX, source] + CXXFLAGS + ["-o", output] - run_command(cmd, f"Compiling {output}") - - -def compile_example_logged_exe(tmp_dir, prefix): - source = os.path.join(tmp_dir, f"{prefix}example-logged.cpp") - output = os.path.join(tmp_dir, f"{prefix}example-logged.exe") - cmd = [CXX, source, LOGGER] + CXXFLAGS + ["-o", output] - run_command(cmd, f"Compiling {output}") - - -def generate_example_txt(tmp_dir, prefix): - exe = os.path.join(tmp_dir, f"{prefix}example-logged.exe") - output = os.path.join(tmp_dir, f"{prefix}example.txt") - if not os.path.exists(exe): - print(f"Executable {exe} not found. Cannot generate {output}.") - sys.exit(1) - with open(output, "w") as f: - print(f"=== Running {exe} to generate {output} ===") - try: - subprocess.check_call([exe], stdout=f) - except subprocess.TimeoutExpired: - print(f"Execution of {exe} timed out.") - if os.path.exists(exe): - os.remove(exe) - print(f"Removed executable {exe} due to timeout.") - return - except subprocess.CalledProcessError as e: - print(f"Error running {exe}") - sys.exit(e.returncode) - - -def compile_example_fpopt_exe(tmp_dir, prefix, fpoptflags, output="example-fpopt.exe", verbose=True): - source = os.path.join(tmp_dir, f"{prefix}example.cpp") - output_path = os.path.join(tmp_dir, f"{prefix}{output}") - cmd = [CXX, source] + CXXFLAGS + fpoptflags + ["-o", output_path] - log_path = os.path.join("logs", f"{prefix}compile_fpopt.log") - if output == "example-fpopt.exe": - run_command( - cmd, - f"Compiling {output_path} with FPOPTFLAGS", - capture_output=True, - output_file=log_path, - verbose=verbose, - ) - else: - run_command( - cmd, - f"Compiling {output_path} with FPOPTFLAGS", - verbose=verbose, - ) - - -def parse_critical_comp_costs(tmp_dir, prefix, log_path="compile_fpopt.log"): - print(f"=== Parsing critical computation costs from {log_path} ===") - full_log_path = os.path.join("logs", f"{prefix}{log_path}") - if not os.path.exists(full_log_path): - print(f"Log file {full_log_path} does not exist.") - sys.exit(1) - with open(full_log_path, "r") as f: - content = f.read() - - pattern = r"\*\*\* Critical Computation Costs \*\*\*(.*?)\*\*\* End of Critical Computation Costs \*\*\*" - match = re.search(pattern, content, re.DOTALL) - if not match: - print("Critical Computation Costs block not found in the log.") - sys.exit(1) - - costs_str = match.group(1).strip() - costs = [int(cost) for cost in costs_str.split(",") if re.fullmatch(r"-?\d+", cost.strip())] - print(f"Parsed computation costs: {costs}") - - if not costs: - print("No valid computation costs found to sample.") - sys.exit(1) - - num_to_sample = min(MAX_TESTED_COSTS, len(costs)) - - sampled_costs = random.sample(costs, num_to_sample) - - sampled_costs_sorted = sorted(sampled_costs) - - print(f"Sampled computation costs (sorted): {sampled_costs_sorted}") - - return sampled_costs_sorted - - -def measure_runtime(tmp_dir, prefix, executable, num_runs=NUM_RUNS): - print(f"=== Measuring runtime for {executable} ===") - runtimes = [] - exe_path = os.path.join(tmp_dir, f"{prefix}{executable}") - for i in trange(1, num_runs + 1): - try: - result = subprocess.run([exe_path], capture_output=True, text=True, check=True, timeout=300) - output = result.stdout - match = re.search(r"Total runtime: ([\d\.]+) seconds", output) - if match: - runtime = float(match.group(1)) - runtimes.append(runtime) - else: - print(f"Could not parse runtime from output on run {i}") - sys.exit(1) - except subprocess.TimeoutExpired: - print(f"Execution of {exe_path} timed out on run {i}") - if os.path.exists(exe_path): - os.remove(exe_path) - print(f"Removed executable {exe_path} due to timeout.") - return None - except subprocess.CalledProcessError as e: - print(f"Error running {exe_path} on run {i}") - sys.exit(e.returncode) - if runtimes: - average_runtime = mean(runtimes) - print(f"Average runtime for {prefix}{executable}: {average_runtime:.6f} seconds") - return average_runtime - else: - print(f"No successful runs for {prefix}{executable}") - return None - - -def get_values_file_path(tmp_dir, prefix, binary_name): - return os.path.join(tmp_dir, f"{prefix}{binary_name}-values.txt") - - -def generate_example_values(tmp_dir, prefix): - binary_name = "example.exe" - exe = os.path.join(tmp_dir, f"{prefix}{binary_name}") - output_values_file = get_values_file_path(tmp_dir, prefix, binary_name) - cmd = [exe, "--output-path", output_values_file] - run_command(cmd, f"Generating function values from {binary_name}", verbose=False, timeout=300) - - -def generate_values(tmp_dir, prefix, binary_name): - exe = os.path.join(tmp_dir, f"{prefix}{binary_name}") - values_file = get_values_file_path(tmp_dir, prefix, binary_name) - cmd = [exe, "--output-path", values_file] - run_command(cmd, f"Generating function values from {binary_name}", verbose=False, timeout=300) - - -def compile_golden_exe(tmp_dir, prefix): - source = os.path.join(tmp_dir, f"{prefix}golden.cpp") - output = os.path.join(tmp_dir, f"{prefix}golden.exe") - cmd = [CXX, source] + CXXFLAGS + ["-o", output] - run_command(cmd, f"Compiling {output}") - - -def generate_golden_values(tmp_dir, original_prefix, prefix): - script = "fpopt-golden-driver-generator.py" - src_prefixed = os.path.join(tmp_dir, f"{original_prefix}{SRC}") - dest_prefixed = os.path.join(tmp_dir, f"{prefix}golden.cpp") - cur_prec = 128 - max_prec = 4096 - PREC_step = 128 - prev_output = None - output_values_file = get_values_file_path(tmp_dir, prefix, "golden.exe") - while cur_prec <= max_prec: - run_command( - ["python3", script, src_prefixed, dest_prefixed, str(cur_prec), "example", str(DRIVER_NUM_SAMPLES)], - f"Generating golden.cpp with PREC={cur_prec}", - ) - if not os.path.exists(dest_prefixed): - print(f"Failed to generate {dest_prefixed}.") - sys.exit(1) - print(f"Generated {dest_prefixed} successfully.") - - compile_golden_exe(tmp_dir, prefix) - - exe = os.path.join(tmp_dir, f"{prefix}golden.exe") - cmd = [exe, "--output-path", output_values_file] - run_command(cmd, f"Generating golden values with PREC={cur_prec}", verbose=False) - - if not os.path.exists(output_values_file): - print(f"Failed to generate golden values at PREC={cur_prec} due to timeout.") - return - - with open(output_values_file, "r") as f: - output = f.read() - - if output == prev_output: - print(f"Golden values converged at PREC={cur_prec}") - break - else: - prev_output = output - cur_prec += PREC_step - else: - print(f"Failed to converge golden values up to PREC={max_prec}") - sys.exit(1) - - -def get_avg_rel_error(tmp_dir, prefix, golden_values_file, binaries): - with open(golden_values_file, "r") as f: - golden_values = [float(line.strip()) for line in f] - - errors = {} - for binary in binaries: - values_file = get_values_file_path(tmp_dir, prefix, binary) - if not os.path.exists(values_file): - print(f"Values file {values_file} does not exist. Skipping error calculation for {binary}.") - errors[binary] = None - continue - with open(values_file, "r") as f: - values = [float(line.strip()) for line in f] - if len(values) != len(golden_values): - print(f"Number of values in {values_file} does not match golden values") - sys.exit(1) - - valid_errors = [] - for v, g in zip(values, golden_values): - if math.isnan(v) or math.isnan(g): - continue - if g == 0: - continue - error = abs((v - g) / g) * 100 - valid_errors.append(error) - - if not valid_errors: - print(f"No valid data to compute rel error for binary {binary}. Setting rel error to None.") - errors[binary] = None - continue - - try: - log_sum = sum(math.log1p(e) for e in valid_errors) - geo_mean = math.expm1(log_sum / len(valid_errors)) - errors[binary] = geo_mean - except OverflowError: - print( - f"Overflow error encountered while computing geometric mean for binary {binary}. Setting rel error to None." - ) - errors[binary] = None - except ZeroDivisionError: - print(f"No valid errors to compute geometric mean for binary {binary}. Setting rel error to None.") - errors[binary] = None - - return errors - - -def build_all(tmp_dir, logs_dir, original_prefix, prefix, fpoptflags, example_txt_path): - generate_example_cpp(tmp_dir, original_prefix, prefix) - generate_example_logged_cpp(tmp_dir, original_prefix, prefix) - compile_example_exe(tmp_dir, prefix) - compile_example_logged_exe(tmp_dir, prefix) - if not os.path.exists(example_txt_path): - generate_example_txt(tmp_dir, original_prefix) - compile_example_fpopt_exe(tmp_dir, prefix, fpoptflags, output="example-fpopt.exe") - print("=== Initial build process completed successfully ===") - - -def process_cost(args): - cost, tmp_dir, prefix, fpoptflags = args - - print(f"\n=== Processing computation cost budget: {cost} ===") - fpoptflags_cost = [] - for flag in fpoptflags: - if flag.startswith("--fpopt-comp-cost-budget="): - fpoptflags_cost.append(f"--fpopt-comp-cost-budget={cost}") - else: - fpoptflags_cost.append(flag) - - output_binary = f"example-fpopt-{cost}.exe" - - compile_example_fpopt_exe(tmp_dir, prefix, fpoptflags_cost, output=output_binary, verbose=False) - generate_values(tmp_dir, prefix, output_binary) - - return cost, output_binary - - -def benchmark(tmp_dir, logs_dir, original_prefix, prefix, plots_dir, fpoptflags, num_parallel=1): - costs = parse_critical_comp_costs(tmp_dir, prefix) - - original_avg_runtime = measure_runtime(tmp_dir, prefix, "example.exe", NUM_RUNS) - original_runtime = original_avg_runtime - - if original_runtime is None: - print("Original binary timed out. Proceeding as if it doesn't exist.") - return - - generate_example_values(tmp_dir, prefix) - - generate_golden_values(tmp_dir, original_prefix, prefix) - - golden_values_file = get_values_file_path(tmp_dir, prefix, "golden.exe") - example_binary = "example.exe" - rel_errs_example = get_avg_rel_error(tmp_dir, prefix, golden_values_file, [example_binary]) - rel_err_example = rel_errs_example[example_binary] - print(f"Average Rel Error for {prefix}example.exe: {rel_err_example}") - - data_tuples = [] - - args_list = [(cost, tmp_dir, prefix, fpoptflags) for cost in costs] - - if num_parallel == 1: - for args in args_list: - cost, output_binary = process_cost(args) - data_tuples.append((cost, output_binary)) - else: - with ProcessPoolExecutor(max_workers=num_parallel) as executor: - future_to_cost = {executor.submit(process_cost, args): args[0] for args in args_list} - for future in as_completed(future_to_cost): - cost = future_to_cost[future] - try: - cost_result, output_binary = future.result() - data_tuples.append((cost_result, output_binary)) - except Exception as exc: - print(f"Cost {cost} generated an exception: {exc}") - - data_tuples_sorted = sorted(data_tuples, key=lambda x: x[0]) - sorted_budgets, sorted_optimized_binaries = zip(*data_tuples_sorted) if data_tuples_sorted else ([], []) - - sorted_runtimes = [] - for cost, output_binary in zip(sorted_budgets, sorted_optimized_binaries): - avg_runtime = measure_runtime(tmp_dir, prefix, output_binary, NUM_RUNS) - if avg_runtime is not None: - sorted_runtimes.append(avg_runtime) - else: - print(f"Skipping cost {cost} due to runtime measurement failure.") - sorted_runtimes.append(None) - - errors_dict = get_avg_rel_error(tmp_dir, prefix, golden_values_file, sorted_optimized_binaries) - sorted_errors = [] - for binary in sorted_optimized_binaries: - sorted_errors.append(errors_dict.get(binary)) - print(f"Average rel error for {binary}: {errors_dict.get(binary)}") - - sorted_budgets = list(sorted_budgets) - sorted_runtimes = list(sorted_runtimes) - sorted_errors = list(sorted_errors) - - data = { - "budgets": sorted_budgets, - "runtimes": sorted_runtimes, - "errors": sorted_errors, - "original_runtime": original_runtime, - "original_error": rel_err_example, - } - - table_json_path = os.path.join("cache", "table.json") - if os.path.exists(table_json_path): - with open(table_json_path, "r") as f: - table_data = json.load(f) - if "costToAccuracyMap" in table_data: - predicted_costs = [] - predicted_errors = [] - for k, v in table_data["costToAccuracyMap"].items(): - try: - cost_val = float(k) - err_val = float(v) - predicted_costs.append(cost_val) - predicted_errors.append(err_val) - except ValueError: - pass - pred_sorted_indices = np.argsort(predicted_costs) - predicted_costs = np.array(predicted_costs)[pred_sorted_indices].tolist() - predicted_errors = np.array(predicted_errors)[pred_sorted_indices].tolist() - data["predicted_data"] = {"costs": predicted_costs, "errors": predicted_errors} - - data_file = os.path.join(tmp_dir, f"{prefix}benchmark_data.pkl") - with open(data_file, "wb") as f: - pickle.dump(data, f) - print(f"Benchmark data saved to {data_file}") - - return data - - -def build_with_benchmark( - tmp_dir, logs_dir, plots_dir, original_prefix, prefix, fpoptflags, example_txt_path, num_parallel=1 -): - build_all(tmp_dir, logs_dir, original_prefix, prefix, fpoptflags, example_txt_path) - data = benchmark(tmp_dir, logs_dir, original_prefix, prefix, plots_dir, fpoptflags, num_parallel) - return data - - -def remove_cache_dir(): - cache_dir = "cache" - if os.path.exists(cache_dir): - shutil.rmtree(cache_dir) - print("=== Removed existing cache directory ===") - - -def plot_ablation_results(tmp_dir, plots_dir, original_prefix, prefix, output_format="png", show_prediction=False): - ablation_data_file = os.path.join(tmp_dir, f"{prefix}ablation-widen-range.pkl") - if not os.path.exists(ablation_data_file): - print(f"Ablation data file {ablation_data_file} does not exist. Cannot plot.") - sys.exit(1) - with open(ablation_data_file, "rb") as f: - all_data = pickle.load(f) - - if not all_data: - print("No data to plot.") - sys.exit(1) - - rcParams["font.size"] = 20 - rcParams["axes.titlesize"] = 24 - rcParams["axes.labelsize"] = 20 - rcParams["xtick.labelsize"] = 18 - rcParams["ytick.labelsize"] = 18 - rcParams["legend.fontsize"] = 18 - - if show_prediction and any("predicted_data" in d for d in all_data.values()): - fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(20, 8)) - else: - fig, ax1 = plt.subplots(1, 1, figsize=(10, 8)) - ax2 = None - - # Extract original runtime/error from the first scenario - first_key = next(iter(all_data)) - original_runtime = all_data[first_key]["original_runtime"] - original_error = all_data[first_key]["original_error"] - - # Plot original program once - ax1.scatter(original_runtime, original_error, marker="x", color="black", s=100, label="Original Program") - - colors = ["blue", "green", "red", "purple", "orange", "brown", "pink", "gray", "olive", "cyan"] - color_iter = iter(colors) - - for X, data in sorted(all_data.items()): - budgets = data["budgets"] - runtimes = data["runtimes"] - errors = data["errors"] - - data_points = list(zip(runtimes, errors)) - filtered_data = [(r, e) for r, e in data_points if r is not None and e is not None] - if not filtered_data: - print(f"No valid data to plot for widen-range={X}.") - continue - runtimes_filtered, errors_filtered = zip(*filtered_data) - color = next(color_iter) - ax1.scatter(runtimes_filtered, errors_filtered, label=f"widen-range={X}", color=color) - points = np.array(filtered_data) - sorted_indices = np.argsort(points[:, 0]) - sorted_points = points[sorted_indices] - - # Compute pareto front - pareto_front = [sorted_points[0]] - for point in sorted_points[1:]: - if point[1] < pareto_front[-1][1]: - pareto_front.append(point) - - pareto_front = np.array(pareto_front) - ax1.step( - pareto_front[:, 0], - pareto_front[:, 1], - where="post", - linestyle="-", - color=color, - ) - - if show_prediction and "predicted_data" in data and ax2 is not None: - p_costs = data["predicted_data"]["costs"] - p_errors = data["predicted_data"]["errors"] - if p_costs and p_errors: - ax2.scatter(p_costs, p_errors, label=f"Prediction (widen-range={X})", color=color) - pred_points = np.column_stack((p_costs, p_errors)) - pred_sorted_indices = np.argsort(pred_points[:, 0]) - pred_sorted_points = pred_points[pred_sorted_indices] - - pred_pareto = [pred_sorted_points[0]] - for pt in pred_sorted_points[1:]: - if pt[1] < pred_pareto[-1][1]: - pred_pareto.append(pt) - pred_pareto = np.array(pred_pareto) - - ax2.step( - pred_pareto[:, 0], - pred_pareto[:, 1], - where="post", - linestyle="-", - color=color, - ) - - ax1.set_xlabel("Runtimes (seconds)") - ax1.set_ylabel("Relative Errors (%)") - ax1.set_title("Pareto Fronts for Different widen-range Values") - ax1.set_yscale("symlog", linthresh=1e-15) - ax1.set_ylim(bottom=0) - ax1.legend() - ax1.grid(True) - - if ax2 is not None: - ax2.set_xlabel("Cost Budget") - ax2.set_ylabel("Predicted Error") - ax2.set_title("Predicted Pareto Fronts") - ax2.set_yscale("symlog", linthresh=1e-15) - ax2.set_ylim(bottom=-1e-13) - ax2.grid(True) - - plot_filename = os.path.join(plots_dir, f"{prefix}ablation_widen_range_pareto_front.{output_format}") - plt.savefig(plot_filename, bbox_inches="tight", dpi=300) - plt.close() - print(f"Ablation plot saved to {plot_filename}") - - -def plot_ablation_results_cost_model( - tmp_dir, plots_dir, original_prefix, prefix, output_format="png", show_prediction=False -): - ablation_data_file = os.path.join(tmp_dir, f"{prefix}ablation-cost-model.pkl") - if not os.path.exists(ablation_data_file): - print(f"Ablation data file {ablation_data_file} does not exist. Cannot plot.") - sys.exit(1) - with open(ablation_data_file, "rb") as f: - all_data = pickle.load(f) - - if not all_data: - print("No data to plot.") - sys.exit(1) - - rcParams["font.size"] = 20 - rcParams["axes.titlesize"] = 24 - rcParams["axes.labelsize"] = 20 - rcParams["xtick.labelsize"] = 18 - rcParams["ytick.labelsize"] = 18 - rcParams["legend.fontsize"] = 18 - - if show_prediction and any("predicted_data" in d for d in all_data.values()): - fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(20, 8)) - else: - fig, ax1 = plt.subplots(1, 1, figsize=(10, 8)) - ax2 = None - - # Extract original runtime/error from the first scenario - first_key = next(iter(all_data)) - original_runtime = all_data[first_key]["original_runtime"] - original_error = all_data[first_key]["original_error"] - - # Plot original program once - ax1.scatter(original_runtime, original_error, marker="x", color="black", s=100, label="Original Program") - - colors = ["blue", "green"] - labels = ["Custom Cost Model", "TTI Cost Model"] - - for idx, key in enumerate(["with_cost_model", "without_cost_model"]): - data = all_data[key] - budgets = data["budgets"] - runtimes = data["runtimes"] - errors = data["errors"] - - data_points = list(zip(runtimes, errors)) - filtered_data = [(r, e) for r, e in data_points if r is not None and e is not None] - if not filtered_data: - print(f"No valid data to plot for {labels[idx]}.") - continue - runtimes_filtered, errors_filtered = zip(*filtered_data) - color = colors[idx] - ax1.scatter(runtimes_filtered, errors_filtered, label=labels[idx], color=color) - points = np.array(filtered_data) - sorted_indices = np.argsort(points[:, 0]) - sorted_points = points[sorted_indices] - - pareto_front = [sorted_points[0]] - for point in sorted_points[1:]: - if point[1] < pareto_front[-1][1]: - pareto_front.append(point) - - pareto_front = np.array(pareto_front) - - ax1.step( - pareto_front[:, 0], - pareto_front[:, 1], - where="post", - linestyle="-", - color=color, - ) - - if show_prediction and "predicted_data" in data and ax2 is not None: - p_costs = data["predicted_data"]["costs"] - p_errors = data["predicted_data"]["errors"] - if p_costs and p_errors: - ax2.scatter(p_costs, p_errors, label=f"{labels[idx]}", color=color) - pred_points = np.column_stack((p_costs, p_errors)) - pred_sorted_indices = np.argsort(pred_points[:, 0]) - pred_sorted_points = pred_points[pred_sorted_indices] - - pred_pareto = [pred_sorted_points[0]] - for pt in pred_sorted_points[1:]: - if pt[1] < pred_pareto[-1][1]: - pred_pareto.append(pt) - pred_pareto = np.array(pred_pareto) - - ax2.step( - pred_pareto[:, 0], - pred_pareto[:, 1], - where="post", - linestyle="-", - color=color, - ) - - ax1.set_xlabel("Runtimes (seconds)") - ax1.set_ylabel("Relative Errors (%)") - ax1.set_title("Pareto Fronts for Cost Model Ablation") - ax1.set_yscale("symlog", linthresh=1e-14) - ax1.set_ylim(bottom=-1e-14) - ax1.legend() - ax1.grid(True) - - if ax2 is not None: - ax2.set_xlabel("Cost Budget") - ax2.set_ylabel("Predicted Error") - ax2.set_title("Predicted Pareto Front") - ax2.set_yscale("symlog", linthresh=1e-14) - ax2.set_ylim(bottom=-1e-14) - ax2.legend() - ax2.grid(True) - - plot_filename = os.path.join(plots_dir, f"{prefix}ablation_cost_model_pareto_front.{output_format}") - plt.savefig(plot_filename, bbox_inches="tight", dpi=300) - plt.close() - print(f"Ablation plot saved to {plot_filename}") - - -def remove_mllvm_flag(flags_list, flag_prefix): - new_flags = [] - i = 0 - while i < len(flags_list): - if flags_list[i] == "-mllvm" and i + 1 < len(flags_list) and flags_list[i + 1].startswith(flag_prefix): - i += 2 - else: - new_flags.append(flags_list[i]) - i += 1 - return new_flags - - -def main(): - parser = argparse.ArgumentParser(description="Run the ablation study with widen-range parameter.") - parser.add_argument("--prefix", type=str, required=True, help="Prefix for intermediate files (e.g., rosa-ex23-)") - parser.add_argument("--clean", action="store_true", help="Clean up generated files") - parser.add_argument("--plot-only", action="store_true", help="Plot results from existing data") - parser.add_argument("--output-format", type=str, default="png", help="Output format for plots (e.g., png, pdf)") - parser.add_argument( - "--num-parallel", type=int, default=16, help="Number of parallel processes to use (default: 16)" - ) - parser.add_argument( - "--ablation-type", - type=str, - choices=["widen-range", "cost-model"], - default="widen-range", - help="Type of ablation study to perform (default: widen-range)", - ) - parser.add_argument("--show-prediction", action="store_true", help="Show predicted results in a second subplot") - - args = parser.parse_args() - - original_prefix = args.prefix - if not original_prefix.endswith("-"): - original_prefix += "-" - - tmp_dir = "tmp" - logs_dir = "logs" - plots_dir = "plots" - - os.makedirs(tmp_dir, exist_ok=True) - os.makedirs(logs_dir, exist_ok=True) - os.makedirs(plots_dir, exist_ok=True) - - example_txt_path = os.path.join(tmp_dir, f"{original_prefix}example.txt") - - if args.clean: - clean(tmp_dir, logs_dir, plots_dir) - sys.exit(0) - elif args.plot_only: - if args.ablation_type == "widen-range": - plot_ablation_results( - tmp_dir, - plots_dir, - original_prefix, - original_prefix, - args.output_format, - show_prediction=args.show_prediction, - ) - elif args.ablation_type == "cost-model": - plot_ablation_results_cost_model( - tmp_dir, - plots_dir, - original_prefix, - original_prefix, - args.output_format, - show_prediction=args.show_prediction, - ) - sys.exit(0) - else: - if not os.path.exists(example_txt_path): - generate_example_logged_cpp(tmp_dir, original_prefix, original_prefix) - compile_example_logged_exe(tmp_dir, original_prefix) - generate_example_txt(tmp_dir, original_prefix) - - if args.ablation_type == "widen-range": - widen_ranges = [0.001, 0.01, 0.1, 1.0, 10.0, 100.0, 1000.0] - all_data = {} - for X in widen_ranges: - print(f"=== Running ablation study with widen-range={X} ===") - remove_cache_dir() - FPOPTFLAGS_BASE = FPOPTFLAGS_BASE_TEMPLATE.copy() - for idx, flag in enumerate(FPOPTFLAGS_BASE): - if flag.startswith("--fpopt-log-path="): - FPOPTFLAGS_BASE[idx] = f"--fpopt-log-path={example_txt_path}" - FPOPTFLAGS_BASE.extend(["-mllvm", f"--fpopt-widen-range={X}"]) - - prefix_with_x = f"{original_prefix}abl-widen-range-{X}-" - - data = build_with_benchmark( - tmp_dir, - logs_dir, - plots_dir, - original_prefix, - prefix_with_x, - FPOPTFLAGS_BASE, - example_txt_path, - num_parallel=args.num_parallel, - ) - - all_data[X] = data - - ablation_data_file = os.path.join(tmp_dir, f"{original_prefix}ablation-widen-range.pkl") - with open(ablation_data_file, "wb") as f: - pickle.dump(all_data, f) - print(f"Ablation data saved to {ablation_data_file}") - - plot_ablation_results( - tmp_dir, - plots_dir, - original_prefix, - original_prefix, - args.output_format, - show_prediction=args.show_prediction, - ) - - if args.ablation_type == "cost-model": - print("=== Running cost-model ablation study ===") - remove_cache_dir() - FPOPTFLAGS_WITH_CM = FPOPTFLAGS_BASE_TEMPLATE.copy() - for idx, flag in enumerate(FPOPTFLAGS_WITH_CM): - if flag.startswith("--fpopt-log-path="): - FPOPTFLAGS_WITH_CM[idx] = f"--fpopt-log-path={example_txt_path}" - prefix_with_cm = f"{original_prefix}abl-with-cost-model-" - - data_with_cm = build_with_benchmark( - tmp_dir, - logs_dir, - plots_dir, - original_prefix, - prefix_with_cm, - FPOPTFLAGS_WITH_CM, - example_txt_path, - num_parallel=args.num_parallel, - ) - - remove_cache_dir() - FPOPTFLAGS_NO_CM = remove_mllvm_flag(FPOPTFLAGS_WITH_CM, "--fpopt-cost-model-path=") - prefix_without_cm = f"{original_prefix}abl-without-cost-model-" - - data_without_cm = build_with_benchmark( - tmp_dir, - logs_dir, - plots_dir, - original_prefix, - prefix_without_cm, - FPOPTFLAGS_NO_CM, - example_txt_path, - num_parallel=args.num_parallel, - ) - - all_data = { - "with_cost_model": data_with_cm, - "without_cost_model": data_without_cm, - } - - ablation_data_file = os.path.join(tmp_dir, f"{original_prefix}ablation-cost-model.pkl") - with open(ablation_data_file, "wb") as f: - pickle.dump(all_data, f) - print(f"Ablation data saved to {ablation_data_file}") - - plot_ablation_results_cost_model( - tmp_dir, - plots_dir, - original_prefix, - original_prefix, - args.output_format, - show_prediction=args.show_prediction, - ) - - -if __name__ == "__main__": - main() diff --git a/experiments/example.c b/experiments/example.c deleted file mode 100644 index 9dfe018..0000000 --- a/experiments/example.c +++ /dev/null @@ -1,22 +0,0 @@ -#include -#include -#define TRUE 1 -#define FALSE 0 - -// ## PRE c: 1, 9 -// ## PRE a: 1, 9 -// ## PRE b: 1, 9 -__attribute__((noinline)) -double example(double a, double b, double c) { - double tmp; - if (a < b) { - tmp = sqrt(((((c + (b + a)) * (a - (c - b))) * (a + (c - b))) * - (c + (b - a)))) / - 4.0; - } else { - tmp = sqrt(((((c + (a + b)) * (b - (c - a))) * (b + (c - a))) * - (c + (a - b)))) / - 4.0; - } - return tmp; -} diff --git a/experiments/fp-logger.cpp b/experiments/fp-logger.cpp index 545e31d..ba7f129 100644 --- a/experiments/fp-logger.cpp +++ b/experiments/fp-logger.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -6,9 +7,62 @@ #include #include #include +#include #include -#include "fp-logger.hpp" +// https://arxiv.org/pdf/1806.06403 +double geomean(const std::vector &dataset, double epsilon = 1e-5) { + std::vector dataset_nozeros; + for (double x : dataset) { + if (x > 0.0) + dataset_nozeros.push_back(x); + } + + if (dataset_nozeros.empty()) { + return 0.0; + } + + double sum_log = 0.0; + for (double x : dataset_nozeros) { + sum_log += std::log(x); + } + double geomeanNozeros = std::exp(sum_log / dataset_nozeros.size()); + + double min_val = + *std::min_element(dataset_nozeros.begin(), dataset_nozeros.end()); + double deltamin = 0.0; + double deltamax = std::max(geomeanNozeros - min_val, 0.0); + double delta = (deltamin + deltamax) / 2.0; + double epsilon_threshold = epsilon * geomeanNozeros; + + auto compute_auxExp = [&](double d) -> double { + double sum = 0.0; + for (double x : dataset_nozeros) { + sum += std::log(x + d); + } + return std::exp(sum / dataset_nozeros.size()) - d; + }; + + double auxExp = compute_auxExp(delta); + + while ((auxExp - geomeanNozeros) > epsilon_threshold) { + if (auxExp < geomeanNozeros) + deltamin = delta; + else + deltamax = delta; + delta = (deltamin + deltamax) / 2.0; + auxExp = compute_auxExp(delta); + } + + double sum_log_all = 0.0; + for (double x : dataset) { + sum_log_all += std::log(x + delta); + } + double gmeanE = std::exp(sum_log_all / dataset.size()) - delta; + + assert(!std::isnan(gmeanE) && !std::isinf(gmeanE)); + return gmeanE; +} class ValueInfo { public: @@ -17,8 +71,7 @@ class ValueInfo { std::vector minOperands; std::vector maxOperands; unsigned executions = 0; - double logSum = 0.0; - unsigned logCount = 0; + std::vector loggedValues; void update(double res, const double *operands, unsigned numOperands) { minRes = std::min(minRes, res); @@ -33,17 +86,17 @@ class ValueInfo { } ++executions; - if (!std::isnan(res)) { - logSum += std::log1p(std::fabs(res)); - ++logCount; + if (!std::isnan(res) && !std::isinf(res)) { + loggedValues.push_back(std::fabs(res)); } } - double getGeometricAverage() const { - if (logCount == 0) { - return 0.; + double getGeomean() const { + if (loggedValues.empty()) { + return 0.0; } - return std::expm1(logSum / logCount); + + return geomean(loggedValues); } }; @@ -60,21 +113,20 @@ class ErrorInfo { class GradInfo { public: - double logSum = 0.0; - unsigned count = 0; + std::vector loggedValues; void update(double grad) { - if (!std::isnan(grad)) { - logSum += std::log1p(std::fabs(grad)); - ++count; + if (!std::isnan(grad) && !std::isinf(grad)) { + loggedValues.push_back(std::fabs(grad)); } } - double getGeometricAverage() const { - if (count == 0) { - return 0.; + double getGeomean() const { + if (loggedValues.empty()) { + return 0.0; } - return std::expm1(logSum / count); + + return geomean(loggedValues); } }; @@ -112,27 +164,18 @@ class Logger { std::cout << "\tMinRes = " << info.minRes << "\n"; std::cout << "\tMaxRes = " << info.maxRes << "\n"; std::cout << "\tExecutions = " << info.executions << "\n"; - std::cout << "\tGeometric Average = " << info.getGeometricAverage() - << "\n"; + std::cout << "\tGeometric Average = " << info.getGeomean() << "\n"; for (unsigned i = 0; i < info.minOperands.size(); ++i) { std::cout << "\tOperand[" << i << "] = [" << info.minOperands[i] << ", " << info.maxOperands[i] << "]\n"; } } - for (const auto &pair : errorInfo) { - const auto &id = pair.first; - const auto &info = pair.second; - std::cout << "Error:" << id << "\n"; - std::cout << "\tMinErr = " << info.minErr << "\n"; - std::cout << "\tMaxErr = " << info.maxErr << "\n"; - } - for (const auto &pair : gradInfo) { const auto &id = pair.first; const auto &info = pair.second; std::cout << "Grad:" << id << "\n"; - std::cout << "\tGrad = " << info.getGeometricAverage() << "\n"; + std::cout << "\tGrad = " << info.getGeomean() << "\n"; } } }; @@ -162,4 +205,4 @@ void enzymeLogValue(const char *id, double res, unsigned numOperands, double *operands) { assert(logger && "Logger is not initialized"); logger->updateValue(id, res, numOperands, operands); -} \ No newline at end of file +} diff --git a/experiments/run.py b/experiments/run.py index 8cd2461..032d983 100755 --- a/experiments/run.py +++ b/experiments/run.py @@ -8,6 +8,7 @@ import re import argparse import matplotlib.pyplot as plt +import scipy.stats.mstats as ssm import math import random import numpy as np @@ -19,7 +20,7 @@ from concurrent.futures import ProcessPoolExecutor, as_completed HOME = "/home/sbrantq" -ENZYME_PATH = os.path.join(HOME, "sync/Enzyme/build/Enzyme/ClangEnzyme-16.so") +ENZYME_PATH = os.path.join(HOME, "sync/Enzyme/build-debug/Enzyme/ClangEnzyme-16.so") LLVM_PATH = os.path.join(HOME, "llvms/llvm16/build/bin") CXX = os.path.join(LLVM_PATH, "clang++") @@ -80,6 +81,36 @@ MAX_TESTED_COSTS = 999 +# https://arxiv.org/pdf/1806.06403 +def geomean(dataset): + dataset = np.array(dataset) + epsilon = 1e-5 + + dataset_nozeros = dataset[dataset > 0] + + if len(dataset_nozeros) == 0: + return 0.0 + + geomeanNozeros = ssm.gmean(dataset_nozeros) + + deltamin = 0 + deltamax = geomeanNozeros - min(dataset_nozeros) + delta = (deltamin + deltamax) / 2 + + epsilon = epsilon * geomeanNozeros + auxExp = math.exp(np.mean(np.log(dataset_nozeros + delta))) - delta + while (auxExp - geomeanNozeros) > epsilon: + if auxExp < geomeanNozeros: + deltamin = delta + else: + deltamax = delta + delta = (deltamin + deltamax) / 2 + auxExp = math.exp(np.mean(np.log(dataset_nozeros + delta))) - delta + + gmeanE = math.exp(np.mean(np.log(dataset + delta))) - delta + return gmeanE + + def run_command(command, description, capture_output=False, output_file=None, verbose=True, timeout=None): print(f"=== {description} ===") print("Running:", " ".join(command)) @@ -383,7 +414,7 @@ def get_avg_rel_error(tmp_dir, prefix, golden_values_file, binaries): continue if g == 0: continue - error = abs((v - g) / g) * 100 + error = abs((v - g) / g) valid_errors.append(error) if not valid_errors: @@ -392,9 +423,7 @@ def get_avg_rel_error(tmp_dir, prefix, golden_values_file, binaries): continue try: - log_sum = sum(math.log1p(e) for e in valid_errors) - geo_mean = math.expm1(log_sum / len(valid_errors)) - errors[binary] = geo_mean + errors[binary] = geomean(valid_errors) except OverflowError: print( f"Overflow error encountered while computing geometric mean for binary {binary}. Setting rel error to None." @@ -444,7 +473,7 @@ def plot_results( ax1.set_xlabel("Computation Cost Budget") ax1.set_ylabel("Runtimes (seconds)", color=color_runtime) (line1,) = ax1.step( - budgets, runtimes, marker="o", linestyle="-", label="Optimized Runtimes", color=color_runtime + budgets, runtimes, marker="o", linestyle="-", label="Optimized Runtimes", color=color_runtime, where="post" ) if original_runtime is not None: line2 = ax1.axhline(y=original_runtime, color=color_runtime, linestyle="--", label="Original Runtime") @@ -452,9 +481,15 @@ def plot_results( ax2 = ax1.twinx() color_error = "tab:green" - ax2.set_ylabel("Relative Errors (%)", color=color_error) + ax2.set_ylabel("Relative Errors", color=color_error) (line3,) = ax2.step( - budgets, errors, marker="s", linestyle="-", label="Optimized Relative Errors", color=color_error + budgets, + errors, + marker="s", + linestyle="-", + label="Optimized Relative Errors", + color=color_error, + where="post", ) if original_error is not None: line4 = ax2.axhline(y=original_error, color=color_error, linestyle="--", label="Original Relative Error") @@ -495,7 +530,7 @@ def plot_results( fig2, ax3 = plt.subplots(figsize=(10, 8)) # Adjust size as needed ax3.set_xlabel("Runtimes (seconds)") - ax3.set_ylabel("Relative Errors (%)") + ax3.set_ylabel("Relative Errors") ax3.set_title(f"Pareto Front of Optimized Programs ({prefix[:-1]})") scatter1 = ax3.scatter(runtimes, errors, label="Optimized Programs", color="blue") @@ -523,7 +558,7 @@ def plot_results( pareto_front = np.array(pareto_front) (line_pareto,) = ax3.step( - pareto_front[:, 0], pareto_front[:, 1], linestyle="-", color="purple", label="Pareto Front" + pareto_front[:, 0], pareto_front[:, 1], linestyle="-", color="purple", label="Pareto Front", where="post" ) ax3.set_yscale("log") @@ -563,7 +598,7 @@ def plot_results( ax1.set_xlabel("Computation Cost Budget") ax1.set_ylabel("Runtimes (seconds)", color=color_runtime) (line1,) = ax1.step( - budgets, runtimes, marker="o", linestyle="-", label="Optimized Runtimes", color=color_runtime + budgets, runtimes, marker="o", linestyle="-", label="Optimized Runtimes", color=color_runtime, where="post" ) if original_runtime is not None: line2 = ax1.axhline(y=original_runtime, color=color_runtime, linestyle="--", label="Original Runtime") @@ -571,9 +606,15 @@ def plot_results( ax2 = ax1.twinx() color_error = "tab:green" - ax2.set_ylabel("Relative Errors (%)", color=color_error) + ax2.set_ylabel("Relative Errors", color=color_error) (line3,) = ax2.step( - budgets, errors, marker="s", linestyle="-", label="Optimized Relative Errors", color=color_error + budgets, + errors, + marker="s", + linestyle="-", + label="Optimized Relative Errors", + color=color_error, + where="post", ) if original_error is not None: line4 = ax2.axhline(y=original_error, color=color_error, linestyle="--", label="Original Relative Error") @@ -632,7 +673,7 @@ def plot_results( pareto_front = np.array(pareto_front) (line_pareto,) = ax3.step( - pareto_front[:, 0], pareto_front[:, 1], linestyle="-", color="purple", label="Pareto Front" + pareto_front[:, 0], pareto_front[:, 1], linestyle="-", color="purple", label="Pareto Front", where="post" ) ax3.set_yscale("log") @@ -848,10 +889,8 @@ def analyze_all_data(tmp_dir, thresholds=None): if original_error == 0: example_digits = 17 - # print(f"Original program of {prefix} has zero relative error! Using 17 digits of accuracy.") else: - example_digits = min(-math.log10(original_error / 100), 17) - # print(f"Original program of {prefix} has {example_digits:.2f} digits of accuracy.") + example_digits = min(-math.log10(original_error), 17) original_digits.append(example_digits) @@ -859,11 +898,9 @@ def analyze_all_data(tmp_dir, thresholds=None): for err in errors: if err == 0: digits_list.append(17) - # print(f"Optimized program of {prefix} has zero relative error! Using 17 digits of accuracy.") elif err is not None: - digits = min(-math.log10(err / 100), 17) + digits = min(-math.log10(err), 17) digits_list.append(digits) - # print(f"Optimized program of {prefix} has {digits:.2f} digits of accuracy.") else: digits_list.append(None) @@ -888,17 +925,14 @@ def analyze_all_data(tmp_dir, thresholds=None): for threshold in thresholds: min_ratio = None for err, runtime in zip(errors, runtimes): - if err is not None and runtime is not None and err <= threshold * 100: - # print(f"Threshold: {threshold}, Error: {err}, Runtime: {runtime}") + if err is not None and runtime is not None and err <= threshold: runtime_ratio = runtime / original_runtime if min_ratio is None or runtime_ratio < min_ratio: min_ratio = runtime_ratio if min_ratio is not None: min_runtime_ratios[threshold][prefix] = min_ratio - # print(f"Threshold: {threshold}, maximum runtime improvement for {prefix}: {(1 - min_ratio) * 100:.2f}%") - log_sum = sum(math.log1p(digits) for digits in original_digits) - geo_mean = math.expm1(log_sum / len(original_digits)) + geo_mean = geomean(original_digits) print(f"Original programs have {geo_mean:.2f} decimal digits of accuracy on average.") print(f"Original programs have {max(original_digits):.2f} decimal digits of accuracy at most.") @@ -906,24 +940,16 @@ def analyze_all_data(tmp_dir, thresholds=None): overall_runtime_improvements = {} for threshold in thresholds: ratios = min_runtime_ratios[threshold].values() - # print(f"\nThreshold: {threshold}, Number of valid runtime ratios: {len(ratios)}") if ratios: - log_sum = sum(math.log1p(min(1, ratio)) for ratio in ratios) - geo_mean_ratio = math.expm1(log_sum / len(ratios)) + log_sum = sum(math.log(min(1, ratio)) for ratio in ratios) + geo_mean_ratio = math.exp(log_sum / len(ratios)) percentage_improvement = (1 - geo_mean_ratio) * 100 overall_runtime_improvements[threshold] = percentage_improvement else: overall_runtime_improvements[threshold] = None - # ratios1 = min_runtime_ratios[1e-06].keys() - # ratios2 = min_runtime_ratios[1e-04].keys() - - # for i in ratios2: - # if i not in ratios1: - # print(i) - # Print maximum accuracy improvements per benchmark - print("Maximum accuracy improvements (in number of bits) per benchmark:") + print("Maximum accuracy improvements (in number of digits) per benchmark:") for prefix in prefixes: improvement = max_accuracy_improvements.get(prefix) if improvement: @@ -941,8 +967,8 @@ def analyze_all_data(tmp_dir, thresholds=None): else: try: log_sum = sum(math.log(impr) for impr in positive_improvements) - geo_mean = math.exp(log_sum / len(positive_improvements)) - print(f"\nGeometric mean of maximum accuracy improvements: {geo_mean:.2f} decimal digits") + geo_mean_improvement = math.exp(log_sum / len(positive_improvements)) + print(f"\nGeometric mean of maximum accuracy improvements: {geo_mean_improvement:.2f} decimal digits") except ValueError as e: print(f"\nError in computing geometric mean: {e}") @@ -955,6 +981,44 @@ def analyze_all_data(tmp_dir, thresholds=None): else: print(f"Allowed relative error ≤ {threshold}: No data") + # --- New Section: Geometric Mean of Relative Errors --- + # We'll compute: + # 1. The geometric mean of the original relative errors (from data["original_error"]) + # 2. The geometric mean of the minimum achievable optimized relative errors (the smallest non-None, positive error from data["errors"]) + + original_errors = [] + min_optimized_errors = [] + + for prefix, data in data_list: + orig_err = data["original_error"] + if orig_err is not None and orig_err > 0: + original_errors.append(orig_err) + # For optimized errors, consider all non-None and positive errors + valid_optimized_errors = [err for err in data["errors"] if err is not None and err > 0] + if valid_optimized_errors: + min_optimized_errors.append(min(valid_optimized_errors)) + + def compute_geomean(values): + if not values: + return None + # If any value is zero, the geometric mean is zero. + if any(v == 0 for v in values): + return 0.0 + return math.exp(sum(math.log(v) for v in values) / len(values)) + + geo_mean_original_error = compute_geomean(original_errors) + geo_mean_min_optimized_error = compute_geomean(min_optimized_errors) + + if geo_mean_original_error is not None: + print(f"\nGeometric mean of original relative errors: {geo_mean_original_error:.2e}") + else: + print("\nNo original relative errors available to compute geometric mean.") + + if geo_mean_min_optimized_error is not None: + print(f"Geometric mean of minimum achievable optimized relative errors: {geo_mean_min_optimized_error:.2e}") + else: + print("No optimized relative errors available to compute geometric mean.") + def build_with_benchmark(tmp_dir, logs_dir, plots_dir, prefix, num_parallel=1): build_all(tmp_dir, logs_dir, prefix)