Skip to content

Commit e11e01c

Browse files
committed
Update to add a func to get a preranked set of cpu ids: get_ranked_cpus that caches it's contents for 10s, also add is_higher_ranked to directly compare two cpuids in the rankings.
Remove the pref_rank from cpu struct in favour of the new mechanism.
1 parent 7a2acb1 commit e11e01c

File tree

1 file changed

+69
-18
lines changed

1 file changed

+69
-18
lines changed

rust/scx_utils/src/topology.rs

Lines changed: 69 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,8 @@ use sscanf::sscanf;
7878
use std::collections::BTreeMap;
7979
use std::path::Path;
8080
use std::path::PathBuf;
81-
use std::sync::Arc;
81+
use std::sync::{Arc, Mutex};
82+
use std::time::{Duration, Instant};
8283

8384
#[cfg(feature = "gpu-topology")]
8485
use crate::gpu::{create_gpus, Gpu, GpuIndex};
@@ -112,10 +113,6 @@ pub struct Cpu {
112113
pub id: usize,
113114
pub min_freq: usize,
114115
pub max_freq: usize,
115-
/// Currently used by AMD CPUs to show which cores to use vs others.
116-
/// The higher the number then the higher the priority the core.
117-
/// This is set at boot time but can change at runtime via tunables.
118-
pub pref_rank: usize,
119116
/// Base operational frqeuency. Only available on Intel Turbo Boost
120117
/// CPUs. If not available, this will simply return maximum frequency.
121118
pub base_freq: usize,
@@ -189,6 +186,33 @@ pub struct Topology {
189186
pub all_llcs: BTreeMap<usize, Arc<Llc>>,
190187
pub all_cores: BTreeMap<usize, Arc<Core>>,
191188
pub all_cpus: BTreeMap<usize, Arc<Cpu>>,
189+
190+
/// Cached list of ranked CPUs
191+
ranked_cpus: Mutex<RankedCpuCache>,
192+
}
193+
194+
const RANKED_CPU_CACHE_DURATION: Duration = Duration::from_secs(10);
195+
196+
/// Cached list of ranked CPUs
197+
#[derive(Debug)]
198+
struct RankedCpuCache {
199+
/// List of CPU IDs sorted by their ranking (highest to lowest)
200+
cpu_ids: Vec<usize>,
201+
/// When this cache was last updated
202+
last_updated: Instant,
203+
}
204+
205+
impl RankedCpuCache {
206+
fn new() -> Self {
207+
Self {
208+
cpu_ids: Vec::new(),
209+
last_updated: Instant::now() - RANKED_CPU_CACHE_DURATION,
210+
}
211+
}
212+
213+
fn is_valid(&self) -> bool {
214+
self.last_updated.elapsed() < RANKED_CPU_CACHE_DURATION
215+
}
192216
}
193217

194218
impl Topology {
@@ -244,6 +268,7 @@ impl Topology {
244268
all_llcs: topo_llcs,
245269
all_cores: topo_cores,
246270
all_cpus: topo_cpus,
271+
ranked_cpus: Mutex::new(RankedCpuCache::new()),
247272
})
248273
}
249274

@@ -323,21 +348,49 @@ impl Topology {
323348
}
324349
}
325350

326-
/// Update the pref_rank values for all CPUs in the topology by reading from sysfs
327-
pub fn update_pref_ranks(&mut self) -> Result<()> {
351+
/// Returns a sorted list of CPU IDs from highest to lowest rank.
352+
/// The list is cached internally and refreshed every 10 seconds.
353+
/// If preferred core ranking is not enabled, returns an empty slice.
354+
pub fn get_ranked_cpus(&self) -> Vec<usize> {
328355
if !self.has_pref_rank() {
329-
return Ok(());
356+
return Vec::new();
330357
}
331-
332-
for cpu in self.all_cpus.values_mut() {
333-
if let Some(cpu_mut) = Arc::get_mut(cpu) {
334-
let cpu_path = Path::new("/sys/devices/system/cpu").join(format!("cpu{}", cpu_mut.id));
335-
let freq_path = cpu_path.join("cpufreq");
336-
cpu_mut.pref_rank = read_file_usize(&freq_path.join("amd_pstate_prefcore_ranking")).unwrap_or(0);
358+
359+
let mut cache = self.ranked_cpus.lock().unwrap();
360+
if !cache.is_valid() {
361+
let mut cpu_ranks: Vec<(usize, usize)> = Vec::new();
362+
363+
for &cpu_id in self.all_cpus.keys() {
364+
let cpu_path = Path::new("/sys/devices/system/cpu")
365+
.join(format!("cpu{}", cpu_id))
366+
.join("cpufreq");
367+
368+
if let Ok(rank) = read_file_usize(&cpu_path.join("amd_pstate_prefcore_ranking")) {
369+
cpu_ranks.push((cpu_id, rank));
370+
}
337371
}
372+
373+
cpu_ranks.sort_by(|a, b| b.1.cmp(&a.1));
374+
375+
cache.cpu_ids = cpu_ranks.into_iter().map(|(id, _)| id).collect();
376+
cache.last_updated = Instant::now();
377+
}
378+
379+
cache.cpu_ids.clone()
380+
}
381+
382+
/// Returns true if cpu_a has a higher rank than cpu_b.
383+
/// If ranking is not enabled or either CPU is invalid, returns false.
384+
pub fn is_higher_ranked(&self, cpu_a: usize, cpu_b: usize) -> bool {
385+
let ranked_cpus = self.get_ranked_cpus();
386+
if let (Some(pos_a), Some(pos_b)) = (
387+
ranked_cpus.iter().position(|&id| id == cpu_a),
388+
ranked_cpus.iter().position(|&id| id == cpu_b),
389+
) {
390+
pos_a < pos_b
391+
} else {
392+
false
338393
}
339-
340-
Ok(())
341394
}
342395
}
343396

@@ -474,7 +527,6 @@ fn create_insert_cpu(
474527
let max_freq = read_file_usize(&freq_path.join("scaling_max_freq")).unwrap_or(0);
475528
let base_freq = read_file_usize(&freq_path.join("base_frequency")).unwrap_or(max_freq);
476529
let trans_lat_ns = read_file_usize(&freq_path.join("cpuinfo_transition_latency")).unwrap_or(0);
477-
let pref_rank = read_file_usize(&freq_path.join("amd_pstate_prefcore_ranking")).unwrap_or(0);
478530

479531
let num_llcs = topo_ctx.node_llc_kernel_ids.len();
480532
let llc_id = topo_ctx
@@ -531,7 +583,6 @@ fn create_insert_cpu(
531583
min_freq,
532584
max_freq,
533585
base_freq,
534-
pref_rank,
535586
trans_lat_ns,
536587
l2_id,
537588
l3_id,

0 commit comments

Comments
 (0)