Skip to main content

laminar_core/detect/
cpu.rs

1//! # CPU Feature Detection
2//!
3//! Detects CPU capabilities including SIMD instructions and cache configuration.
4//!
5//! ## Usage
6//!
7//! ```rust,ignore
8//! use laminar_core::detect::{CpuFeatures, cache_line_size};
9//!
10//! let features = CpuFeatures::detect();
11//! if features.avx2 {
12//!     println!("AVX2 is available!");
13//! }
14//!
15//! let cache_line = cache_line_size();
16//! println!("Cache line size: {} bytes", cache_line);
17//! ```
18
19/// CPU feature flags.
20///
21/// Detected using CPUID on x86/x86\_64 or equivalent on other architectures.
22#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
23#[allow(clippy::struct_excessive_bools)]
24pub struct CpuFeatures {
25    /// SSE4.2 support (CRC32, string compare)
26    pub sse4_2: bool,
27    /// AVX2 support (256-bit SIMD)
28    pub avx2: bool,
29    /// AVX-512 Foundation support (512-bit SIMD)
30    pub avx512f: bool,
31    /// AVX-512 VBMI2 support (byte/word manipulation)
32    pub avx512vbmi2: bool,
33    /// POPCNT support (population count)
34    pub popcnt: bool,
35    /// AES-NI support (hardware AES)
36    pub aes_ni: bool,
37    /// CLMUL support (carryless multiply, used for CRC)
38    pub clmul: bool,
39    /// BMI1 support (bit manipulation)
40    pub bmi1: bool,
41    /// BMI2 support (bit manipulation)
42    pub bmi2: bool,
43    /// LZCNT support (leading zero count)
44    pub lzcnt: bool,
45    /// NEON support (ARM SIMD)
46    pub neon: bool,
47    /// CRC32 hardware support (ARM)
48    pub arm_crc32: bool,
49}
50
51impl CpuFeatures {
52    /// Detect CPU features for the current processor.
53    #[must_use]
54    pub fn detect() -> Self {
55        let mut features = Self::default();
56
57        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
58        {
59            features.detect_x86();
60        }
61
62        #[cfg(target_arch = "aarch64")]
63        {
64            features.detect_aarch64();
65        }
66
67        features
68    }
69
70    /// Detect features on x86/x86\_64 using CPUID.
71    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
72    fn detect_x86(&mut self) {
73        // Use std::arch::is_x86_feature_detected! macro for reliable detection
74        self.sse4_2 = std::arch::is_x86_feature_detected!("sse4.2");
75        self.avx2 = std::arch::is_x86_feature_detected!("avx2");
76        self.avx512f = std::arch::is_x86_feature_detected!("avx512f");
77        self.avx512vbmi2 = std::arch::is_x86_feature_detected!("avx512vbmi2");
78        self.popcnt = std::arch::is_x86_feature_detected!("popcnt");
79        self.aes_ni = std::arch::is_x86_feature_detected!("aes");
80        self.clmul = std::arch::is_x86_feature_detected!("pclmulqdq");
81        self.bmi1 = std::arch::is_x86_feature_detected!("bmi1");
82        self.bmi2 = std::arch::is_x86_feature_detected!("bmi2");
83        self.lzcnt = std::arch::is_x86_feature_detected!("lzcnt");
84    }
85
86    /// Detect features on AArch64 (ARM64).
87    #[cfg(target_arch = "aarch64")]
88    fn detect_aarch64(&mut self) {
89        // NEON is mandatory on AArch64
90        self.neon = true;
91        // CRC32 is common but not universal
92        self.arm_crc32 = std::arch::is_aarch64_feature_detected!("crc");
93    }
94
95    /// Check if SIMD acceleration is available.
96    #[must_use]
97    pub fn has_simd(&self) -> bool {
98        self.avx2 || self.avx512f || self.neon
99    }
100
101    /// Check if hardware CRC32 is available.
102    #[must_use]
103    pub fn has_hw_crc32(&self) -> bool {
104        self.sse4_2 || self.arm_crc32
105    }
106
107    /// Check if hardware AES is available.
108    #[must_use]
109    pub fn has_hw_aes(&self) -> bool {
110        self.aes_ni
111    }
112
113    /// Get a summary of SIMD capabilities.
114    #[must_use]
115    pub fn simd_level(&self) -> SimdLevel {
116        if self.avx512f {
117            SimdLevel::Avx512
118        } else if self.avx2 {
119            SimdLevel::Avx2
120        } else if self.sse4_2 {
121            SimdLevel::Sse42
122        } else if self.neon {
123            SimdLevel::Neon
124        } else {
125            SimdLevel::None
126        }
127    }
128
129    /// Get a summary string.
130    #[must_use]
131    pub fn summary(&self) -> String {
132        let mut features = Vec::new();
133
134        if self.avx512f {
135            features.push("AVX-512");
136        } else if self.avx2 {
137            features.push("AVX2");
138        } else if self.sse4_2 {
139            features.push("SSE4.2");
140        }
141
142        if self.neon {
143            features.push("NEON");
144        }
145
146        if self.aes_ni {
147            features.push("AES-NI");
148        }
149
150        if self.popcnt {
151            features.push("POPCNT");
152        }
153
154        if features.is_empty() {
155            "None".to_string()
156        } else {
157            features.join(", ")
158        }
159    }
160}
161
162/// SIMD capability level.
163#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
164pub enum SimdLevel {
165    /// No SIMD support
166    None,
167    /// ARM NEON (128-bit)
168    Neon,
169    /// x86 SSE4.2 (128-bit)
170    Sse42,
171    /// x86 AVX2 (256-bit)
172    Avx2,
173    /// x86 AVX-512 (512-bit)
174    Avx512,
175}
176
177impl std::fmt::Display for SimdLevel {
178    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
179        match self {
180            SimdLevel::None => write!(f, "None"),
181            SimdLevel::Neon => write!(f, "NEON"),
182            SimdLevel::Sse42 => write!(f, "SSE4.2"),
183            SimdLevel::Avx2 => write!(f, "AVX2"),
184            SimdLevel::Avx512 => write!(f, "AVX-512"),
185        }
186    }
187}
188
189/// Detect the cache line size.
190///
191/// Returns 64 bytes as the default, which is correct for most modern processors.
192#[must_use]
193pub fn cache_line_size() -> usize {
194    detect_cache_line_size().unwrap_or(64)
195}
196
197/// Attempt to detect the cache line size from system information.
198fn detect_cache_line_size() -> Option<usize> {
199    // Try Linux sysfs first
200    #[cfg(target_os = "linux")]
201    {
202        if let Some(size) = detect_cache_line_sysfs() {
203            return Some(size);
204        }
205    }
206
207    // Try CPUID on x86
208    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
209    {
210        if let Some(size) = detect_cache_line_cpuid() {
211            return Some(size);
212        }
213    }
214
215    None
216}
217
218/// Detect cache line size from Linux sysfs.
219#[cfg(target_os = "linux")]
220fn detect_cache_line_sysfs() -> Option<usize> {
221    // Try L1 data cache first
222    let paths = [
223        "/sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size",
224        "/sys/devices/system/cpu/cpu0/cache/index1/coherency_line_size",
225        "/sys/devices/system/cpu/cpu0/cache/index2/coherency_line_size",
226    ];
227
228    for path in &paths {
229        if let Ok(content) = std::fs::read_to_string(path) {
230            if let Ok(size) = content.trim().parse::<usize>() {
231                return Some(size);
232            }
233        }
234    }
235
236    None
237}
238
239/// Detect cache line size using CPUID.
240#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
241fn detect_cache_line_cpuid() -> Option<usize> {
242    // CPUID leaf 0x80000006 (AMD-style cache info)
243    // On Intel, this also typically works
244
245    #[cfg(target_arch = "x86")]
246    use std::arch::x86::__cpuid;
247    #[cfg(target_arch = "x86_64")]
248    use std::arch::x86_64::__cpuid;
249
250    // Check if extended CPUID is supported
251    // SAFETY: CPUID is a read-only instruction on x86.
252    // allow(unused_unsafe): __cpuid is safe on newer Rust toolchains.
253    #[allow(unused_unsafe)]
254    let max_extended = unsafe { __cpuid(0x8000_0000) }.eax;
255
256    if max_extended >= 0x8000_0006 {
257        // SAFETY: CPUID is a read-only instruction on x86.
258        #[allow(unused_unsafe)]
259        let result = unsafe { __cpuid(0x8000_0006) };
260        // ECX bits 0-7 contain L2 cache line size
261        let line_size = (result.ecx & 0xFF) as usize;
262        if line_size > 0 {
263            return Some(line_size);
264        }
265    }
266
267    None
268}
269
270/// Get the number of logical CPUs.
271#[must_use]
272pub fn logical_cpu_count() -> usize {
273    std::thread::available_parallelism().map_or(1, std::num::NonZero::get)
274}
275
276/// Get the number of physical CPU cores.
277///
278/// Uses `num_cpus::get_physical()` as there is no std equivalent.
279#[must_use]
280pub fn physical_cpu_count() -> usize {
281    num_cpus::get_physical()
282}
283
284/// Check if SMT (Hyper-Threading) is enabled.
285#[must_use]
286pub fn is_smt_enabled() -> bool {
287    logical_cpu_count() > physical_cpu_count()
288}
289
290#[cfg(test)]
291mod tests {
292    use super::*;
293
294    #[test]
295    fn test_cpu_features_detect() {
296        let features = CpuFeatures::detect();
297
298        // At minimum, we should have some features on x86
299        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
300        {
301            // SSE4.2 and POPCNT are available on any CPU from the last 15+ years
302            // This test should pass on any reasonably modern system
303            assert!(
304                features.sse4_2 || features.popcnt || features.avx2,
305                "Expected at least one common x86 feature to be available"
306            );
307        }
308
309        // On ARM64, NEON is mandatory
310        #[cfg(target_arch = "aarch64")]
311        {
312            assert!(features.neon);
313        }
314    }
315
316    #[test]
317    fn test_cpu_features_simd_level() {
318        let features = CpuFeatures::detect();
319        let level = features.simd_level();
320
321        // simd_level should return a valid level
322        match level {
323            SimdLevel::None
324            | SimdLevel::Neon
325            | SimdLevel::Sse42
326            | SimdLevel::Avx2
327            | SimdLevel::Avx512 => {}
328        }
329
330        // Display should work
331        let level_str = format!("{level}");
332        assert!(!level_str.is_empty());
333    }
334
335    #[test]
336    fn test_cpu_features_summary() {
337        let features = CpuFeatures::detect();
338        let summary = features.summary();
339
340        // Summary should be non-empty (at least "None")
341        assert!(!summary.is_empty());
342    }
343
344    #[test]
345    fn test_cpu_features_default() {
346        let features = CpuFeatures::default();
347        assert!(!features.sse4_2);
348        assert!(!features.avx2);
349        assert!(!features.avx512f);
350        assert!(!features.neon);
351    }
352
353    #[test]
354    fn test_cache_line_size() {
355        let size = cache_line_size();
356        // Cache line size should be a reasonable power of 2
357        assert!(size >= 32);
358        assert!(size <= 256);
359        assert!(size.is_power_of_two());
360    }
361
362    #[test]
363    fn test_logical_cpu_count() {
364        let count = logical_cpu_count();
365        assert!(count >= 1);
366    }
367
368    #[test]
369    fn test_physical_cpu_count() {
370        let count = physical_cpu_count();
371        assert!(count >= 1);
372        // Physical count should not exceed logical
373        assert!(count <= logical_cpu_count());
374    }
375
376    #[test]
377    fn test_is_smt_enabled() {
378        // Just ensure this doesn't panic
379        let _ = is_smt_enabled();
380    }
381
382    #[test]
383    fn test_simd_level_ordering() {
384        assert!(SimdLevel::None < SimdLevel::Neon);
385        assert!(SimdLevel::Neon < SimdLevel::Sse42);
386        assert!(SimdLevel::Sse42 < SimdLevel::Avx2);
387        assert!(SimdLevel::Avx2 < SimdLevel::Avx512);
388    }
389
390    #[test]
391    fn test_has_simd() {
392        let mut features = CpuFeatures::default();
393        assert!(!features.has_simd());
394
395        features.avx2 = true;
396        assert!(features.has_simd());
397
398        features = CpuFeatures::default();
399        features.neon = true;
400        assert!(features.has_simd());
401    }
402
403    #[test]
404    fn test_has_hw_crc32() {
405        let mut features = CpuFeatures::default();
406        assert!(!features.has_hw_crc32());
407
408        features.sse4_2 = true;
409        assert!(features.has_hw_crc32());
410
411        features = CpuFeatures::default();
412        features.arm_crc32 = true;
413        assert!(features.has_hw_crc32());
414    }
415}