|
| 1 | +#[allow(unused_imports)] |
| 2 | +use crate::support::{Flags, get_or_init_flags_cache}; |
| 3 | + |
| 4 | +/// CPU features that get cached (doesn't correlate to anything on the CPU). |
| 5 | +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] |
| 6 | +pub mod cpu_flags { |
| 7 | + use crate::support::unique_masks; |
| 8 | + |
| 9 | + unique_masks! { |
| 10 | + u32, |
| 11 | + SSE3, |
| 12 | + F16C, |
| 13 | + SSE, |
| 14 | + SSE2, |
| 15 | + ERMSB, |
| 16 | + MOVRS, |
| 17 | + FMA, |
| 18 | + FMA4, |
| 19 | + AVX512FP16, |
| 20 | + AVX512BF16, |
| 21 | + } |
| 22 | +} |
| 23 | + |
| 24 | +/// Get CPU features, loading from a cache if available. |
| 25 | +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] |
| 26 | +pub fn get_cpu_features() -> Flags { |
| 27 | + use core::sync::atomic::AtomicU32; |
| 28 | + static CACHE: AtomicU32 = AtomicU32::new(0); |
| 29 | + get_or_init_flags_cache(&CACHE, load_x86_features) |
| 30 | +} |
| 31 | + |
| 32 | +/// Read from cpuid and translate to a `Flags` instance, using `cpu_flags`. |
| 33 | +/// |
| 34 | +/// Implementation is taken from [std-detect][std-detect]. |
| 35 | +/// |
| 36 | +/// [std-detect]: https://github.com/rust-lang/stdarch/blob/690b3a6334d482874163bd6fcef408e0518febe9/crates/std_detect/src/detect/os/x86.rs#L142 |
| 37 | +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] |
| 38 | +fn load_x86_features() -> Flags { |
| 39 | + #[cfg(target_arch = "x86")] |
| 40 | + use core::arch::x86::{__cpuid, __cpuid_count, _xgetbv, CpuidResult}; |
| 41 | + #[cfg(target_arch = "x86_64")] |
| 42 | + use core::arch::x86_64::{__cpuid, __cpuid_count, _xgetbv, CpuidResult}; |
| 43 | + |
| 44 | + let mut value = Flags::empty(); |
| 45 | + |
| 46 | + if cfg!(target_env = "sgx") { |
| 47 | + // doesn't support this because it is untrusted data |
| 48 | + return Flags::empty(); |
| 49 | + } |
| 50 | + |
| 51 | + // Calling `__cpuid`/`__cpuid_count` from here on is safe because the CPU |
| 52 | + // has `cpuid` support. |
| 53 | + |
| 54 | + // 0. EAX = 0: Basic Information: |
| 55 | + // - EAX returns the "Highest Function Parameter", that is, the maximum leaf |
| 56 | + // value for subsequent calls of `cpuinfo` in range [0, 0x8000_0000]. |
| 57 | + // - The vendor ID is stored in 12 u8 ascii chars, returned in EBX, EDX, and ECX |
| 58 | + // (in that order) |
| 59 | + let mut vendor_id = [0u8; 12]; |
| 60 | + let max_basic_leaf; |
| 61 | + unsafe { |
| 62 | + let CpuidResult { eax, ebx, ecx, edx } = __cpuid(0); |
| 63 | + max_basic_leaf = eax; |
| 64 | + vendor_id[0..4].copy_from_slice(&ebx.to_ne_bytes()); |
| 65 | + vendor_id[4..8].copy_from_slice(&edx.to_ne_bytes()); |
| 66 | + vendor_id[8..12].copy_from_slice(&ecx.to_ne_bytes()); |
| 67 | + } |
| 68 | + |
| 69 | + if max_basic_leaf < 1 { |
| 70 | + // Earlier Intel 486, CPUID not implemented |
| 71 | + return value; |
| 72 | + } |
| 73 | + |
| 74 | + // EAX = 1, ECX = 0: Queries "Processor Info and Feature Bits"; |
| 75 | + // Contains information about most x86 features. |
| 76 | + let CpuidResult { ecx, edx, .. } = unsafe { __cpuid(0x0000_0001_u32) }; |
| 77 | + let proc_info_ecx = Flags::from_bits(ecx); |
| 78 | + let proc_info_edx = Flags::from_bits(edx); |
| 79 | + |
| 80 | + // EAX = 7: Queries "Extended Features"; |
| 81 | + // Contains information about bmi,bmi2, and avx2 support. |
| 82 | + let mut extended_features_ebx = Flags::empty(); |
| 83 | + let mut extended_features_edx = Flags::empty(); |
| 84 | + let mut extended_features_eax_leaf_1 = Flags::empty(); |
| 85 | + if max_basic_leaf >= 7 { |
| 86 | + let CpuidResult { ebx, edx, .. } = unsafe { __cpuid(0x0000_0007_u32) }; |
| 87 | + extended_features_ebx = Flags::from_bits(ebx); |
| 88 | + extended_features_edx = Flags::from_bits(edx); |
| 89 | + |
| 90 | + let CpuidResult { eax, .. } = unsafe { __cpuid_count(0x0000_0007_u32, 0x0000_0001_u32) }; |
| 91 | + extended_features_eax_leaf_1 = Flags::from_bits(eax) |
| 92 | + } |
| 93 | + |
| 94 | + // EAX = 0x8000_0000, ECX = 0: Get Highest Extended Function Supported |
| 95 | + // - EAX returns the max leaf value for extended information, that is, |
| 96 | + // `cpuid` calls in range [0x8000_0000; u32::MAX]: |
| 97 | + let extended_max_basic_leaf = unsafe { __cpuid(0x8000_0000_u32) }.eax; |
| 98 | + |
| 99 | + // EAX = 0x8000_0001, ECX=0: Queries "Extended Processor Info and Feature Bits" |
| 100 | + let mut extended_proc_info_ecx = Flags::empty(); |
| 101 | + if extended_max_basic_leaf >= 1 { |
| 102 | + let CpuidResult { ecx, .. } = unsafe { __cpuid(0x8000_0001_u32) }; |
| 103 | + extended_proc_info_ecx = Flags::from_bits(ecx); |
| 104 | + } |
| 105 | + |
| 106 | + let mut enable = |regflags: Flags, regbit, flag| { |
| 107 | + if regflags.test_nth(regbit) { |
| 108 | + value.insert(flag); |
| 109 | + } |
| 110 | + }; |
| 111 | + |
| 112 | + enable(proc_info_ecx, 0, cpu_flags::SSE3); |
| 113 | + enable(proc_info_ecx, 29, cpu_flags::F16C); |
| 114 | + enable(proc_info_edx, 25, cpu_flags::SSE); |
| 115 | + enable(proc_info_edx, 26, cpu_flags::SSE2); |
| 116 | + enable(extended_features_ebx, 9, cpu_flags::ERMSB); |
| 117 | + enable(extended_features_eax_leaf_1, 31, cpu_flags::MOVRS); |
| 118 | + |
| 119 | + // `XSAVE` and `AVX` support: |
| 120 | + let cpu_xsave = proc_info_ecx.test_nth(26); |
| 121 | + if cpu_xsave { |
| 122 | + // 0. Here the CPU supports `XSAVE`. |
| 123 | + |
| 124 | + // 1. Detect `OSXSAVE`, that is, whether the OS is AVX enabled and |
| 125 | + // supports saving the state of the AVX/AVX2 vector registers on |
| 126 | + // context-switches, see: |
| 127 | + // |
| 128 | + // - [intel: is avx enabled?][is_avx_enabled], |
| 129 | + // - [mozilla: sse.cpp][mozilla_sse_cpp]. |
| 130 | + // |
| 131 | + // [is_avx_enabled]: https://software.intel.com/en-us/blogs/2011/04/14/is-avx-enabled |
| 132 | + // [mozilla_sse_cpp]: https://hg.mozilla.org/mozilla-central/file/64bab5cbb9b6/mozglue/build/SSE.cpp#l190 |
| 133 | + let cpu_osxsave = proc_info_ecx.test_nth(27); |
| 134 | + |
| 135 | + if cpu_osxsave { |
| 136 | + // 2. The OS must have signaled the CPU that it supports saving and |
| 137 | + // restoring the: |
| 138 | + // |
| 139 | + // * SSE -> `XCR0.SSE[1]` |
| 140 | + // * AVX -> `XCR0.AVX[2]` |
| 141 | + // * AVX-512 -> `XCR0.AVX-512[7:5]`. |
| 142 | + // * AMX -> `XCR0.AMX[18:17]` |
| 143 | + // |
| 144 | + // by setting the corresponding bits of `XCR0` to `1`. |
| 145 | + // |
| 146 | + // This is safe because the CPU supports `xsave` and the OS has set `osxsave`. |
| 147 | + let xcr0 = unsafe { _xgetbv(0) }; |
| 148 | + // Test `XCR0.SSE[1]` and `XCR0.AVX[2]` with the mask `0b110 == 6`: |
| 149 | + let os_avx_support = xcr0 & 6 == 6; |
| 150 | + // Test `XCR0.AVX-512[7:5]` with the mask `0b1110_0000 == 0xe0`: |
| 151 | + let os_avx512_support = xcr0 & 0xe0 == 0xe0; |
| 152 | + |
| 153 | + // Only if the OS and the CPU support saving/restoring the AVX |
| 154 | + // registers we enable `xsave` support: |
| 155 | + if os_avx_support { |
| 156 | + // See "13.3 ENABLING THE XSAVE FEATURE SET AND XSAVE-ENABLED |
| 157 | + // FEATURES" in the "Intel® 64 and IA-32 Architectures Software |
| 158 | + // Developer’s Manual, Volume 1: Basic Architecture": |
| 159 | + // |
| 160 | + // "Software enables the XSAVE feature set by setting |
| 161 | + // CR4.OSXSAVE[bit 18] to 1 (e.g., with the MOV to CR4 |
| 162 | + // instruction). If this bit is 0, execution of any of XGETBV, |
| 163 | + // XRSTOR, XRSTORS, XSAVE, XSAVEC, XSAVEOPT, XSAVES, and XSETBV |
| 164 | + // causes an invalid-opcode exception (#UD)" |
| 165 | + |
| 166 | + // FMA (uses 256-bit wide registers): |
| 167 | + enable(proc_info_ecx, 12, cpu_flags::FMA); |
| 168 | + |
| 169 | + // For AVX-512 the OS also needs to support saving/restoring |
| 170 | + // the extended state, only then we enable AVX-512 support: |
| 171 | + if os_avx512_support { |
| 172 | + enable(extended_features_edx, 23, cpu_flags::AVX512FP16); |
| 173 | + enable(extended_features_eax_leaf_1, 5, cpu_flags::AVX512BF16); |
| 174 | + } |
| 175 | + } |
| 176 | + } |
| 177 | + } |
| 178 | + |
| 179 | + // As Hygon Dhyana originates from AMD technology and shares most of the architecture with |
| 180 | + // AMD's family 17h, but with different CPU Vendor ID("HygonGenuine")/Family series number |
| 181 | + // (Family 18h). |
| 182 | + // |
| 183 | + // For CPUID feature bits, Hygon Dhyana(family 18h) share the same definition with AMD |
| 184 | + // family 17h. |
| 185 | + // |
| 186 | + // Related AMD CPUID specification is https://www.amd.com/system/files/TechDocs/25481.pdf |
| 187 | + // (AMD64 Architecture Programmer's Manual, Appendix E). |
| 188 | + // Related Hygon kernel patch can be found on |
| 189 | + // http://lkml.kernel.org/r/5ce86123a7b9dad925ac583d88d2f921040e859b.1538583282.git.puwen@hygon.cn |
| 190 | + if vendor_id == *b"AuthenticAMD" || vendor_id == *b"HygonGenuine" { |
| 191 | + // These features are available on AMD arch CPUs: |
| 192 | + enable(extended_proc_info_ecx, 16, cpu_flags::FMA4); |
| 193 | + } |
| 194 | + |
| 195 | + value |
| 196 | +} |
| 197 | + |
| 198 | +#[cfg(test)] |
| 199 | +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] |
| 200 | +mod tests { |
| 201 | + extern crate std; |
| 202 | + use std::is_x86_feature_detected; |
| 203 | + |
| 204 | + use super::*; |
| 205 | + |
| 206 | + #[test] |
| 207 | + fn check_matches_std() { |
| 208 | + let features = get_cpu_features(); |
| 209 | + for i in 0..cpu_flags::ALL.len() { |
| 210 | + let flag = cpu_flags::ALL[i]; |
| 211 | + let name = cpu_flags::NAMES[i]; |
| 212 | + |
| 213 | + let std_detected = match flag { |
| 214 | + cpu_flags::SSE3 => is_x86_feature_detected!("sse3"), |
| 215 | + cpu_flags::F16C => is_x86_feature_detected!("f16c"), |
| 216 | + cpu_flags::SSE => is_x86_feature_detected!("sse"), |
| 217 | + cpu_flags::SSE2 => is_x86_feature_detected!("sse2"), |
| 218 | + cpu_flags::ERMSB => is_x86_feature_detected!("ermsb"), |
| 219 | + cpu_flags::MOVRS => continue, // only very recent support in std |
| 220 | + cpu_flags::FMA => is_x86_feature_detected!("fma"), |
| 221 | + cpu_flags::FMA4 => continue, // not yet supported in std |
| 222 | + cpu_flags::AVX512FP16 => is_x86_feature_detected!("avx512fp16"), |
| 223 | + cpu_flags::AVX512BF16 => is_x86_feature_detected!("avx512bf16"), |
| 224 | + _ => panic!("untested CPU flag {name}"), |
| 225 | + }; |
| 226 | + |
| 227 | + assert_eq!( |
| 228 | + std_detected, |
| 229 | + features.contains(flag), |
| 230 | + "different flag {name}. flags: {features:?}" |
| 231 | + ); |
| 232 | + } |
| 233 | + } |
| 234 | +} |
0 commit comments