@@ -78,7 +78,8 @@ use sscanf::sscanf;
78
78
use std:: collections:: BTreeMap ;
79
79
use std:: path:: Path ;
80
80
use std:: path:: PathBuf ;
81
- use std:: sync:: Arc ;
81
+ use std:: sync:: { Arc , Mutex } ;
82
+ use std:: time:: { Duration , Instant } ;
82
83
83
84
#[ cfg( feature = "gpu-topology" ) ]
84
85
use crate :: gpu:: { create_gpus, Gpu , GpuIndex } ;
@@ -112,10 +113,6 @@ pub struct Cpu {
112
113
pub id : usize ,
113
114
pub min_freq : usize ,
114
115
pub max_freq : usize ,
115
- /// Currently used by AMD CPUs to show which cores to use vs others.
116
- /// The higher the number then the higher the priority the core.
117
- /// This is set at boot time but can change at runtime via tunables.
118
- pub pref_rank : usize ,
119
116
/// Base operational frqeuency. Only available on Intel Turbo Boost
120
117
/// CPUs. If not available, this will simply return maximum frequency.
121
118
pub base_freq : usize ,
@@ -189,6 +186,33 @@ pub struct Topology {
189
186
pub all_llcs : BTreeMap < usize , Arc < Llc > > ,
190
187
pub all_cores : BTreeMap < usize , Arc < Core > > ,
191
188
pub all_cpus : BTreeMap < usize , Arc < Cpu > > ,
189
+
190
+ /// Cached list of ranked CPUs
191
+ ranked_cpus : Mutex < RankedCpuCache > ,
192
+ }
193
+
194
+ const RANKED_CPU_CACHE_DURATION : Duration = Duration :: from_secs ( 10 ) ;
195
+
196
+ /// Cached list of ranked CPUs
197
+ #[ derive( Debug ) ]
198
+ struct RankedCpuCache {
199
+ /// List of CPU IDs sorted by their ranking (highest to lowest)
200
+ cpu_ids : Vec < usize > ,
201
+ /// When this cache was last updated
202
+ last_updated : Instant ,
203
+ }
204
+
205
+ impl RankedCpuCache {
206
+ fn new ( ) -> Self {
207
+ Self {
208
+ cpu_ids : Vec :: new ( ) ,
209
+ last_updated : Instant :: now ( ) - RANKED_CPU_CACHE_DURATION ,
210
+ }
211
+ }
212
+
213
+ fn is_valid ( & self ) -> bool {
214
+ self . last_updated . elapsed ( ) < RANKED_CPU_CACHE_DURATION
215
+ }
192
216
}
193
217
194
218
impl Topology {
@@ -244,6 +268,7 @@ impl Topology {
244
268
all_llcs : topo_llcs,
245
269
all_cores : topo_cores,
246
270
all_cpus : topo_cpus,
271
+ ranked_cpus : Mutex :: new ( RankedCpuCache :: new ( ) ) ,
247
272
} )
248
273
}
249
274
@@ -323,21 +348,49 @@ impl Topology {
323
348
}
324
349
}
325
350
326
- /// Update the pref_rank values for all CPUs in the topology by reading from sysfs
327
- pub fn update_pref_ranks ( & mut self ) -> Result < ( ) > {
351
+ /// Returns a sorted list of CPU IDs from highest to lowest rank.
352
+ /// The list is cached internally and refreshed every 10 seconds.
353
+ /// If preferred core ranking is not enabled, returns an empty slice.
354
+ pub fn get_ranked_cpus ( & self ) -> Vec < usize > {
328
355
if !self . has_pref_rank ( ) {
329
- return Ok ( ( ) ) ;
356
+ return Vec :: new ( ) ;
330
357
}
331
-
332
- for cpu in self . all_cpus . values_mut ( ) {
333
- if let Some ( cpu_mut) = Arc :: get_mut ( cpu) {
334
- let cpu_path = Path :: new ( "/sys/devices/system/cpu" ) . join ( format ! ( "cpu{}" , cpu_mut. id) ) ;
335
- let freq_path = cpu_path. join ( "cpufreq" ) ;
336
- cpu_mut. pref_rank = read_file_usize ( & freq_path. join ( "amd_pstate_prefcore_ranking" ) ) . unwrap_or ( 0 ) ;
358
+
359
+ let mut cache = self . ranked_cpus . lock ( ) . unwrap ( ) ;
360
+ if !cache. is_valid ( ) {
361
+ let mut cpu_ranks: Vec < ( usize , usize ) > = Vec :: new ( ) ;
362
+
363
+ for & cpu_id in self . all_cpus . keys ( ) {
364
+ let cpu_path = Path :: new ( "/sys/devices/system/cpu" )
365
+ . join ( format ! ( "cpu{}" , cpu_id) )
366
+ . join ( "cpufreq" ) ;
367
+
368
+ if let Ok ( rank) = read_file_usize ( & cpu_path. join ( "amd_pstate_prefcore_ranking" ) ) {
369
+ cpu_ranks. push ( ( cpu_id, rank) ) ;
370
+ }
337
371
}
372
+
373
+ cpu_ranks. sort_by ( |a, b| b. 1 . cmp ( & a. 1 ) ) ;
374
+
375
+ cache. cpu_ids = cpu_ranks. into_iter ( ) . map ( |( id, _) | id) . collect ( ) ;
376
+ cache. last_updated = Instant :: now ( ) ;
377
+ }
378
+
379
+ cache. cpu_ids . clone ( )
380
+ }
381
+
382
+ /// Returns true if cpu_a has a higher rank than cpu_b.
383
+ /// If ranking is not enabled or either CPU is invalid, returns false.
384
+ pub fn is_higher_ranked ( & self , cpu_a : usize , cpu_b : usize ) -> bool {
385
+ let ranked_cpus = self . get_ranked_cpus ( ) ;
386
+ if let ( Some ( pos_a) , Some ( pos_b) ) = (
387
+ ranked_cpus. iter ( ) . position ( |& id| id == cpu_a) ,
388
+ ranked_cpus. iter ( ) . position ( |& id| id == cpu_b) ,
389
+ ) {
390
+ pos_a < pos_b
391
+ } else {
392
+ false
338
393
}
339
-
340
- Ok ( ( ) )
341
394
}
342
395
}
343
396
@@ -474,7 +527,6 @@ fn create_insert_cpu(
474
527
let max_freq = read_file_usize ( & freq_path. join ( "scaling_max_freq" ) ) . unwrap_or ( 0 ) ;
475
528
let base_freq = read_file_usize ( & freq_path. join ( "base_frequency" ) ) . unwrap_or ( max_freq) ;
476
529
let trans_lat_ns = read_file_usize ( & freq_path. join ( "cpuinfo_transition_latency" ) ) . unwrap_or ( 0 ) ;
477
- let pref_rank = read_file_usize ( & freq_path. join ( "amd_pstate_prefcore_ranking" ) ) . unwrap_or ( 0 ) ;
478
530
479
531
let num_llcs = topo_ctx. node_llc_kernel_ids . len ( ) ;
480
532
let llc_id = topo_ctx
@@ -531,7 +583,6 @@ fn create_insert_cpu(
531
583
min_freq,
532
584
max_freq,
533
585
base_freq,
534
- pref_rank,
535
586
trans_lat_ns,
536
587
l2_id,
537
588
l3_id,
0 commit comments