Skip to content

Commit 74b28d4

Browse files
committed
scx_utils: Add support for AMD Pref Core
1 parent 43d2eee commit 74b28d4

File tree

1 file changed

+210
-2
lines changed

1 file changed

+210
-2
lines changed

rust/scx_utils/src/topology.rs

Lines changed: 210 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,9 @@ use sscanf::sscanf;
7878
use std::collections::BTreeMap;
7979
use std::path::Path;
8080
use std::path::PathBuf;
81-
use std::sync::Arc;
81+
use std::sync::{Arc, Mutex};
82+
use std::sync::atomic::{AtomicU32, Ordering};
83+
use std::time::{Duration, Instant};
8284

8385
#[cfg(feature = "gpu-topology")]
8486
use crate::gpu::{create_gpus, Gpu, GpuIndex};
@@ -99,6 +101,9 @@ lazy_static::lazy_static! {
99101
/// disabled CPUs that may not be onlined, whose IDs are lower than the
100102
/// IDs of other CPUs that may be onlined.
101103
pub static ref NR_CPUS_POSSIBLE: usize = libbpf_rs::num_possible_cpus().unwrap();
104+
105+
/// Whether AMD preferred core ranking is enabled on this system
106+
pub static ref HAS_PREF_RANK: bool = has_pref_rank();
102107
}
103108

104109
#[derive(Debug, Clone, Eq, Hash, Ord, PartialEq, PartialOrd)]
@@ -107,7 +112,7 @@ pub enum CoreType {
107112
Little,
108113
}
109114

110-
#[derive(Debug, Clone, Eq, Hash, Ord, PartialEq, PartialOrd)]
115+
#[derive(Debug)]
111116
pub struct Cpu {
112117
pub id: usize,
113118
pub min_freq: usize,
@@ -126,6 +131,105 @@ pub struct Cpu {
126131
pub node_id: usize,
127132
pub package_id: usize,
128133
pub cluster_id: usize,
134+
rank: AtomicU32,
135+
}
136+
137+
impl Clone for Cpu {
138+
fn clone(&self) -> Self {
139+
Cpu {
140+
id: self.id,
141+
min_freq: self.min_freq,
142+
max_freq: self.max_freq,
143+
base_freq: self.base_freq,
144+
trans_lat_ns: self.trans_lat_ns,
145+
l2_id: self.l2_id,
146+
l3_id: self.l3_id,
147+
core_type: self.core_type.clone(),
148+
core_id: self.core_id,
149+
llc_id: self.llc_id,
150+
node_id: self.node_id,
151+
package_id: self.package_id,
152+
cluster_id: self.cluster_id,
153+
rank: AtomicU32::new(self.rank.load(Ordering::Relaxed)),
154+
}
155+
}
156+
}
157+
158+
impl PartialEq for Cpu {
159+
fn eq(&self, other: &Self) -> bool {
160+
self.id == other.id
161+
&& self.min_freq == other.min_freq
162+
&& self.max_freq == other.max_freq
163+
&& self.base_freq == other.base_freq
164+
&& self.trans_lat_ns == other.trans_lat_ns
165+
&& self.l2_id == other.l2_id
166+
&& self.l3_id == other.l3_id
167+
&& self.core_type == other.core_type
168+
&& self.core_id == other.core_id
169+
&& self.llc_id == other.llc_id
170+
&& self.node_id == other.node_id
171+
&& self.package_id == other.package_id
172+
&& self.cluster_id == other.cluster_id
173+
&& self.rank() == other.rank()
174+
}
175+
}
176+
177+
impl Eq for Cpu {}
178+
179+
impl PartialOrd for Cpu {
180+
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
181+
Some(self.cmp(other))
182+
}
183+
}
184+
185+
impl Ord for Cpu {
186+
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
187+
self.id.cmp(&other.id)
188+
.then_with(|| self.min_freq.cmp(&other.min_freq))
189+
.then_with(|| self.max_freq.cmp(&other.max_freq))
190+
.then_with(|| self.base_freq.cmp(&other.base_freq))
191+
.then_with(|| self.trans_lat_ns.cmp(&other.trans_lat_ns))
192+
.then_with(|| self.l2_id.cmp(&other.l2_id))
193+
.then_with(|| self.l3_id.cmp(&other.l3_id))
194+
.then_with(|| self.core_type.cmp(&other.core_type))
195+
.then_with(|| self.core_id.cmp(&other.core_id))
196+
.then_with(|| self.llc_id.cmp(&other.llc_id))
197+
.then_with(|| self.node_id.cmp(&other.node_id))
198+
.then_with(|| self.package_id.cmp(&other.package_id))
199+
.then_with(|| self.cluster_id.cmp(&other.cluster_id))
200+
.then_with(|| self.rank().cmp(&other.rank()))
201+
}
202+
}
203+
204+
impl std::hash::Hash for Cpu {
205+
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
206+
self.id.hash(state);
207+
self.min_freq.hash(state);
208+
self.max_freq.hash(state);
209+
self.base_freq.hash(state);
210+
self.trans_lat_ns.hash(state);
211+
self.l2_id.hash(state);
212+
self.l3_id.hash(state);
213+
self.core_type.hash(state);
214+
self.core_id.hash(state);
215+
self.llc_id.hash(state);
216+
self.node_id.hash(state);
217+
self.package_id.hash(state);
218+
self.cluster_id.hash(state);
219+
self.rank().hash(state);
220+
}
221+
}
222+
223+
impl Cpu {
224+
/// Get the current rank value
225+
pub fn rank(&self) -> usize {
226+
self.rank.load(Ordering::Relaxed) as usize
227+
}
228+
229+
/// Set the rank value
230+
pub fn set_rank(&self, rank: usize) {
231+
self.rank.store(rank as u32, Ordering::Relaxed);
232+
}
129233
}
130234

131235
#[derive(Clone, Debug, Eq, Ord, PartialEq, PartialOrd)]
@@ -187,6 +291,36 @@ pub struct Topology {
187291
pub all_llcs: BTreeMap<usize, Arc<Llc>>,
188292
pub all_cores: BTreeMap<usize, Arc<Core>>,
189293
pub all_cpus: BTreeMap<usize, Arc<Cpu>>,
294+
295+
/// Cached list of ranked CPUs
296+
ranked_cpus: Mutex<Arc<RankedCpuCache>>,
297+
}
298+
299+
const RANKED_CPU_CACHE_DURATION: Duration = Duration::from_secs(10);
300+
301+
/// Cached list of ranked CPUs
302+
#[derive(Debug, Clone)]
303+
pub struct RankedCpuCache {
304+
/// List of CPU IDs sorted by their ranking (highest to lowest)
305+
pub cpu_ids: Vec<usize>,
306+
/// When this cache was last updated
307+
pub last_updated: Instant,
308+
/// Generation number that increments each time the order changes
309+
pub generation: u64,
310+
}
311+
312+
impl RankedCpuCache {
313+
pub fn new() -> Self {
314+
Self {
315+
cpu_ids: Vec::new(),
316+
last_updated: Instant::now() - RANKED_CPU_CACHE_DURATION,
317+
generation: 0,
318+
}
319+
}
320+
321+
pub fn is_valid(&self) -> bool {
322+
self.last_updated.elapsed() < RANKED_CPU_CACHE_DURATION
323+
}
190324
}
191325

192326
impl Topology {
@@ -242,6 +376,7 @@ impl Topology {
242376
all_llcs: topo_llcs,
243377
all_cores: topo_cores,
244378
all_cpus: topo_cpus,
379+
ranked_cpus: Mutex::new(Arc::new(RankedCpuCache::new())),
245380
})
246381
}
247382

@@ -309,6 +444,68 @@ impl Topology {
309444
}
310445
sibling_cpu
311446
}
447+
448+
/// Returns true if cpu_a has a higher rank than cpu_b.
449+
/// If ranking is not enabled or either CPU is invalid, returns false.
450+
pub fn is_higher_ranked(&self, cpu_a: usize, cpu_b: usize) -> bool {
451+
if !*HAS_PREF_RANK {
452+
return false;
453+
}
454+
455+
let cpu_a_rank = self.all_cpus.get(&cpu_a).map(|cpu| cpu.rank());
456+
let cpu_b_rank = self.all_cpus.get(&cpu_b).map(|cpu| cpu.rank());
457+
458+
match (cpu_a_rank, cpu_b_rank) {
459+
(Some(rank_a), Some(rank_b)) => rank_a > rank_b,
460+
_ => false,
461+
}
462+
}
463+
464+
/// Returns the cached ranked CPU list.
465+
/// The list is cached internally and refreshed every 10 seconds.
466+
/// If preferred core ranking is not enabled, returns an empty cache.
467+
pub fn get_ranked_cpus(&self) -> Arc<RankedCpuCache> {
468+
if !*HAS_PREF_RANK {
469+
return Arc::new(RankedCpuCache {
470+
cpu_ids: Vec::new(),
471+
last_updated: Instant::now(),
472+
generation: 0,
473+
});
474+
}
475+
476+
let mut cache = self.ranked_cpus.lock().unwrap();
477+
if !cache.is_valid() {
478+
let mut cpu_ranks: Vec<(usize, usize)> = Vec::new();
479+
480+
for &cpu_id in self.all_cpus.keys() {
481+
let cpu_path = Path::new("/sys/devices/system/cpu")
482+
.join(format!("cpu{}", cpu_id))
483+
.join("cpufreq");
484+
485+
if let Ok(rank) = read_file_usize(&cpu_path.join("amd_pstate_prefcore_ranking")) {
486+
// Update the rank directly in the CPU object
487+
if let Some(cpu) = self.all_cpus.get(&cpu_id) {
488+
cpu.set_rank(rank);
489+
}
490+
cpu_ranks.push((cpu_id, rank));
491+
}
492+
}
493+
494+
cpu_ranks.sort_by(|a, b| {
495+
let a_val = a.1;
496+
let b_val = b.1;
497+
b_val.cmp(&a_val).then_with(|| a.0.cmp(&b.0))
498+
});
499+
500+
let inner = Arc::make_mut(&mut *cache);
501+
inner.cpu_ids.clear();
502+
inner.cpu_ids.extend(cpu_ranks.iter().map(|(id, _)| *id));
503+
inner.last_updated = Instant::now();
504+
inner.generation += 1;
505+
}
506+
507+
Arc::clone(&cache)
508+
}
312509
}
313510

314511
/******************************************************
@@ -517,6 +714,7 @@ fn create_insert_cpu(
517714
node_id: node.id,
518715
package_id,
519716
cluster_id,
717+
rank: AtomicU32::new(0),
520718
}),
521719
);
522720

@@ -704,3 +902,13 @@ fn create_numa_nodes(
704902
}
705903
Ok(nodes)
706904
}
905+
906+
fn has_pref_rank() -> bool {
907+
if !Path::new("/sys/devices/system/cpu/amd_pstate/prefcore").exists() {
908+
return false;
909+
}
910+
match std::fs::read_to_string("/sys/devices/system/cpu/amd_pstate/prefcore") {
911+
Ok(contents) => contents.trim() == "enabled",
912+
Err(_) => false,
913+
}
914+
}

0 commit comments

Comments
 (0)