rust-lang · Kobzol · Jun 5, 2025 · Jun 17, 2025 · Jun 17, 2025 · Jun 17, 2025
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/collector/Cargo.toml b/collector/Cargo.toml
@@ -11,6 +11,7 @@ anyhow = { workspace = true }
 chrono = { workspace = true, features = ["serde"] }
 clap = { workspace = true, features = ["derive"] }
 env_logger = { workspace = true }
+hashbrown = { workspace = true }
 log = { workspace = true }
 reqwest = { workspace = true, features = ["blocking", "json"] }
 serde = { workspace = true, features = ["derive"] }

diff --git a/collector/src/bin/collector.rs b/collector/src/bin/collector.rs
@@ -225,6 +225,8 @@ fn profile_compile(
                 toolchain,
                 Some(1),
                 targets,
+                // We always want to profile everything
+                &hashbrown::HashSet::new(),
             ));
             eprintln!("Finished benchmark {benchmark_id}");
 
@@ -1804,11 +1806,8 @@ async fn bench_compile(
         print_intro: &dyn Fn(),
         measure: F,
     ) {
-        let is_fresh = collector.start_compile_step(conn, benchmark_name).await;
-        if !is_fresh {
-            eprintln!("skipping {} -- already benchmarked", benchmark_name);
-            return;
-        }
+        collector.start_compile_step(conn, benchmark_name).await;
+
         let mut tx = conn.transaction().await;
         let (supports_stable, category) = category.db_representation();
         tx.conn()
@@ -1819,7 +1818,7 @@ async fn bench_compile(
             tx.conn(),
             benchmark_name,
             &shared.artifact_id,
-            collector.artifact_row_id,
+            collector,
             config.is_self_profile,
         );
         let result = measure(&mut processor).await;
@@ -1866,6 +1865,7 @@ async fn bench_compile(
                     &shared.toolchain,
                     config.iterations,
                     &config.targets,
+                    &collector.measured_compile_test_cases,
                 ))
                 .await
                 .with_context(|| anyhow::anyhow!("Cannot compile {}", benchmark.name))

diff --git a/collector/src/compile/benchmark/mod.rs b/collector/src/compile/benchmark/mod.rs
@@ -8,6 +8,7 @@ use crate::compile::execute::{CargoProcess, Processor};
 use crate::toolchain::Toolchain;
 use crate::utils::wait_for_future;
 use anyhow::{bail, Context};
+use database::selector::CompileTestCase;
 use log::debug;
 use std::collections::{HashMap, HashSet};
 use std::fmt::{Display, Formatter};
@@ -243,6 +244,7 @@ impl Benchmark {
         toolchain: &Toolchain,
         iterations: Option<usize>,
         targets: &[Target],
+        already_computed: &hashbrown::HashSet<CompileTestCase>,
     ) -> anyhow::Result<()> {
         if self.config.disabled {
             eprintln!("Skipping {}: disabled", self.name);
@@ -273,19 +275,65 @@ impl Benchmark {
             return Ok(());
         }
 
-        eprintln!("Preparing {}", self.name);
-        let mut target_dirs: Vec<((CodegenBackend, Profile, Target), TempDir)> = vec![];
+        struct BenchmarkDir {
+            dir: TempDir,
+            scenarios: Vec<Scenario>,
+            profile: Profile,
+            backend: CodegenBackend,
+            target: Target,
+        }
+
+        // Materialize the test cases that we want to benchmark
+        // We need to handle scenarios a bit specially, because they share the target directory
+        let mut benchmark_dirs: Vec<BenchmarkDir> = vec![];
+
         for backend in backends {
             for profile in &profiles {
                 for target in targets {
-                    target_dirs.push((
-                        (*backend, *profile, *target),
-                        self.make_temp_dir(&self.path)?,
-                    ));
+                    // Do we have any scenarios left to compute?
+                    let mut remaining_scenarios = scenarios
+                        .iter()
+                        .flat_map(|scenario| {
+                            self.create_test_cases(scenario, profile, backend, target)
+                                .into_iter()
+                                .map(|test_case| (*scenario, test_case))
+                        })
+                        .filter(|(_, test_case)| !already_computed.contains(test_case))
+                        .map(|(scenario, _)| scenario)
+                        .collect::<HashSet<Scenario>>()
+                        .into_iter()
+                        .collect::<Vec<Scenario>>();
+                    if remaining_scenarios.is_empty() {
+                        continue;
+                    }
+                    remaining_scenarios.sort();
+
+                    let temp_dir = self.make_temp_dir(&self.path)?;
+                    benchmark_dirs.push(BenchmarkDir {
+                        dir: temp_dir,
+                        scenarios: remaining_scenarios,
+                        profile: *profile,
+                        backend: *backend,
+                        target: *target,
+                    });
                 }
             }
         }
 
+        if benchmark_dirs.is_empty() {
+            eprintln!(
+                "Skipping {}: all test cases were previously computed",
+                self.name
+            );
+            return Ok(());
+        }
+
+        eprintln!(
+            "Preparing {} (test cases: {})",
+            self.name,
+            benchmark_dirs.len()
+        );
+
         // In parallel (but with a limit to the number of CPUs), prepare all
         // profiles. This is done in parallel vs. sequentially because:
         //  * We don't record any measurements during this phase, so the
@@ -319,18 +367,18 @@ impl Benchmark {
                     .get(),
             )
             .context("jobserver::new")?;
-            let mut threads = Vec::with_capacity(target_dirs.len());
-            for ((backend, profile, target), prep_dir) in &target_dirs {
+            let mut threads = Vec::with_capacity(benchmark_dirs.len());
+            for benchmark_dir in &benchmark_dirs {
                 let server = server.clone();
                 let thread = s.spawn::<_, anyhow::Result<()>>(move || {
                     wait_for_future(async move {
                         let server = server.clone();
                         self.mk_cargo_process(
                             toolchain,
-                            prep_dir.path(),
-                            *profile,
-                            *backend,
-                            *target,
+                            benchmark_dir.dir.path(),
+                            benchmark_dir.profile,
+                            benchmark_dir.backend,
+                            benchmark_dir.target,
                         )
                         .jobserver(server)
                         .run_rustc(false)
@@ -365,10 +413,11 @@ impl Benchmark {
         let mut timing_dirs: Vec<ManuallyDrop<TempDir>> = vec![];
 
         let benchmark_start = std::time::Instant::now();
-        for ((backend, profile, target), prep_dir) in &target_dirs {
-            let backend = *backend;
-            let profile = *profile;
-            let target = *target;
+        for benchmark_dir in &benchmark_dirs {
+            let backend = benchmark_dir.backend;
+            let profile = benchmark_dir.profile;
+            let target = benchmark_dir.target;
+            let scenarios = &benchmark_dir.scenarios;
             eprintln!(
                 "Running {}: {:?} + {:?} + {:?} + {:?}",
                 self.name, profile, scenarios, backend, target,
@@ -388,7 +437,7 @@ impl Benchmark {
                 }
                 log::debug!("Benchmark iteration {}/{}", i + 1, iterations);
                 // Don't delete the directory on error.
-                let timing_dir = ManuallyDrop::new(self.make_temp_dir(prep_dir.path())?);
+                let timing_dir = ManuallyDrop::new(self.make_temp_dir(benchmark_dir.dir.path())?);
                 let cwd = timing_dir.path();
 
                 // A full non-incremental build.
@@ -458,6 +507,42 @@ impl Benchmark {
 
         Ok(())
     }
+
+    fn create_test_cases(
+        &self,
+        scenario: &Scenario,
+        profile: &Profile,
+        backend: &CodegenBackend,
+        target: &Target,
+    ) -> Vec<CompileTestCase> {
+        self.patches
+            .iter()
+            .map(|patch| CompileTestCase {
+                benchmark: database::Benchmark::from(self.name.0.as_str()),
+                profile: match profile {
+                    Profile::Check => database::Profile::Check,
+                    Profile::Debug => database::Profile::Debug,
+                    Profile::Doc => database::Profile::Doc,
+                    Profile::DocJson => database::Profile::DocJson,
+                    Profile::Opt => database::Profile::Opt,
+                    Profile::Clippy => database::Profile::Clippy,
+                },
+                scenario: match scenario {
+                    Scenario::Full => database::Scenario::Empty,
+                    Scenario::IncrFull => database::Scenario::IncrementalEmpty,
+                    Scenario::IncrUnchanged => database::Scenario::IncrementalFresh,
+                    Scenario::IncrPatched => database::Scenario::IncrementalPatch(patch.name),
+                },
+                backend: match backend {
+                    CodegenBackend::Llvm => database::CodegenBackend::Llvm,
+                    CodegenBackend::Cranelift => database::CodegenBackend::Cranelift,
+                },
+                target: match target {
+                    Target::X86_64UnknownLinuxGnu => database::Target::X86_64UnknownLinuxGnu,
+                },
+            })
+            .collect()
+    }
 }
 
 /// Directory containing compile-time benchmarks.

diff --git a/collector/src/compile/benchmark/scenario.rs b/collector/src/compile/benchmark/scenario.rs
@@ -1,4 +1,6 @@
-#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq, clap::ValueEnum, serde::Deserialize)]
+#[derive(
+    Clone, Copy, Debug, Eq, Hash, PartialEq, PartialOrd, Ord, clap::ValueEnum, serde::Deserialize,
+)]
 #[value(rename_all = "PascalCase")]
 pub enum Scenario {
     Full,

diff --git a/collector/src/compile/execute/bencher.rs b/collector/src/compile/execute/bencher.rs
@@ -10,6 +10,7 @@ use crate::compile::execute::{
 };
 use crate::toolchain::Toolchain;
 use crate::utils::git::get_rustc_perf_commit;
+use crate::CollectorCtx;
 use anyhow::Context;
 use database::CollectionId;
 use futures::stream::FuturesUnordered;
@@ -42,7 +43,7 @@ pub struct BenchProcessor<'a> {
     benchmark: &'a BenchmarkName,
     conn: &'a mut dyn database::Connection,
     artifact: &'a database::ArtifactId,
-    artifact_row_id: database::ArtifactIdNumber,
+    collector_ctx: &'a CollectorCtx,
     is_first_collection: bool,
     is_self_profile: bool,
     tries: u8,
@@ -54,7 +55,7 @@ impl<'a> BenchProcessor<'a> {
         conn: &'a mut dyn database::Connection,
         benchmark: &'a BenchmarkName,
         artifact: &'a database::ArtifactId,
-        artifact_row_id: database::ArtifactIdNumber,
+        collector_ctx: &'a CollectorCtx,
         is_self_profile: bool,
     ) -> Self {
         // Check we have `perf` or (`xperf.exe` and `tracelog.exe`)  available.
@@ -78,7 +79,7 @@ impl<'a> BenchProcessor<'a> {
             conn,
             benchmark,
             artifact,
-            artifact_row_id,
+            collector_ctx,
             is_first_collection: true,
             is_self_profile,
             tries: 0,
@@ -108,7 +109,7 @@ impl<'a> BenchProcessor<'a> {
         for (stat, value) in stats.iter() {
             buf.push(self.conn.record_statistic(
                 collection,
-                self.artifact_row_id,
+                self.collector_ctx.artifact_row_id,
                 self.benchmark.0.as_str(),
                 profile,
                 scenario,
@@ -123,7 +124,13 @@ impl<'a> BenchProcessor<'a> {
     }
 
     pub async fn measure_rustc(&mut self, toolchain: &Toolchain) -> anyhow::Result<()> {
-        rustc::measure(self.conn, toolchain, self.artifact, self.artifact_row_id).await
+        rustc::measure(
+            self.conn,
+            toolchain,
+            self.artifact,
+            self.collector_ctx.artifact_row_id,
+        )
+        .await
     }
 }
 
@@ -252,7 +259,7 @@ impl Processor for BenchProcessor<'_> {
                     .map(|profile| {
                         self.conn.record_raw_self_profile(
                             profile.collection,
-                            self.artifact_row_id,
+                            self.collector_ctx.artifact_row_id,
                             self.benchmark.0.as_str(),
                             profile.profile,
                             profile.scenario,
@@ -270,7 +277,7 @@ impl Processor for BenchProcessor<'_> {
 
                     // FIXME: Record codegen backend in the self profile name
                     let prefix = PathBuf::from("self-profile")
-                        .join(self.artifact_row_id.0.to_string())
+                        .join(self.collector_ctx.artifact_row_id.0.to_string())
                         .join(self.benchmark.0.as_str())
                         .join(profile.profile.to_string())
                         .join(profile.scenario.to_id());

diff --git a/collector/src/lib.rs b/collector/src/lib.rs
@@ -17,7 +17,9 @@ pub mod utils;
 
 use crate::compile::benchmark::{Benchmark, BenchmarkName};
 use crate::runtime::{BenchmarkGroup, BenchmarkSuite};
+use database::selector::CompileTestCase;
 use database::{ArtifactId, ArtifactIdNumber, Connection};
+use hashbrown::HashSet;
 use process::Stdio;
 use std::time::{Duration, Instant};
 
@@ -330,23 +332,30 @@ impl CollectorStepBuilder {
             tx.commit().await.unwrap();
             artifact_row_id
         };
-        CollectorCtx { artifact_row_id }
+        // Find out which tests cases were already computed
+        let measured_compile_test_cases = conn
+            .get_compile_test_cases_with_measurements(&artifact_row_id)
+            .await
+            .expect("cannot fetch measured compile test cases from DB");
+
+        CollectorCtx {
+            artifact_row_id,
+            measured_compile_test_cases,
+        }
     }
 }
 
 /// Represents an in-progress run for a given artifact.
 pub struct CollectorCtx {
     pub artifact_row_id: ArtifactIdNumber,
+    /// Which tests cases were already computed **before** this collection began?
+    pub measured_compile_test_cases: HashSet<CompileTestCase>,
 }
 
 impl CollectorCtx {
-    pub async fn start_compile_step(
-        &self,
-        conn: &dyn Connection,
-        benchmark_name: &BenchmarkName,
-    ) -> bool {
+    pub async fn start_compile_step(&self, conn: &dyn Connection, benchmark_name: &BenchmarkName) {
         conn.collector_start_step(self.artifact_row_id, &benchmark_name.0)
-            .await
+            .await;
     }
 
     pub async fn end_compile_step(&self, conn: &dyn Connection, benchmark_name: &BenchmarkName) {