319 CPU_BMI1 = (1 << 22),
320 CPU_BMI2 = (1 << 23),
321 CPU_RTM = (1 << 24), // Restricted Transactional Memory instructions
322 CPU_ADX = (1 << 25),
323 CPU_AVX512F = (1 << 26), // AVX 512bit foundation instructions
324 CPU_AVX512DQ = (1 << 27),
325 CPU_AVX512PF = (1 << 28),
326 CPU_AVX512ER = (1 << 29),
327 CPU_AVX512CD = (1 << 30)
328 // Keeping sign bit 31 unassigned.
329 };
330
331 #define CPU_AVX512BW ((uint64_t)UCONST64(0x100000000)) // enums are limited to 31 bit
332 #define CPU_AVX512VL ((uint64_t)UCONST64(0x200000000)) // EVEX instructions with smaller vector length
333 #define CPU_SHA ((uint64_t)UCONST64(0x400000000)) // SHA instructions
334 #define CPU_FMA ((uint64_t)UCONST64(0x800000000)) // FMA instructions
335 #define CPU_VZEROUPPER ((uint64_t)UCONST64(0x1000000000)) // Vzeroupper instruction
336 #define CPU_AVX512_VPOPCNTDQ ((uint64_t)UCONST64(0x2000000000)) // Vector popcount
337 #define CPU_VPCLMULQDQ ((uint64_t)UCONST64(0x4000000000)) //Vector carryless multiplication
338 #define CPU_VAES ((uint64_t)UCONST64(0x8000000000)) // Vector AES instructions
339
340 enum Extended_Family {
341 // AMD
342 CPU_FAMILY_AMD_11H = 0x11,
343 // ZX
344 CPU_FAMILY_ZX_CORE_F6 = 6,
345 CPU_FAMILY_ZX_CORE_F7 = 7,
346 // Intel
347 CPU_FAMILY_INTEL_CORE = 6,
348 CPU_MODEL_NEHALEM = 0x1e,
349 CPU_MODEL_NEHALEM_EP = 0x1a,
350 CPU_MODEL_NEHALEM_EX = 0x2e,
351 CPU_MODEL_WESTMERE = 0x25,
352 CPU_MODEL_WESTMERE_EP = 0x2c,
353 CPU_MODEL_WESTMERE_EX = 0x2f,
354 CPU_MODEL_SANDYBRIDGE = 0x2a,
355 CPU_MODEL_SANDYBRIDGE_EP = 0x2d,
356 CPU_MODEL_IVYBRIDGE_EP = 0x3a,
357 CPU_MODEL_HASWELL_E3 = 0x3c,
358 CPU_MODEL_HASWELL_E7 = 0x3f,
531 _cpuid_info.xem_xcr0_eax.bits.zmm32 != 0) {
532 result |= CPU_AVX512F;
533 if (_cpuid_info.sef_cpuid7_ebx.bits.avx512cd != 0)
534 result |= CPU_AVX512CD;
535 if (_cpuid_info.sef_cpuid7_ebx.bits.avx512dq != 0)
536 result |= CPU_AVX512DQ;
537 if (_cpuid_info.sef_cpuid7_ebx.bits.avx512pf != 0)
538 result |= CPU_AVX512PF;
539 if (_cpuid_info.sef_cpuid7_ebx.bits.avx512er != 0)
540 result |= CPU_AVX512ER;
541 if (_cpuid_info.sef_cpuid7_ebx.bits.avx512bw != 0)
542 result |= CPU_AVX512BW;
543 if (_cpuid_info.sef_cpuid7_ebx.bits.avx512vl != 0)
544 result |= CPU_AVX512VL;
545 if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vpopcntdq != 0)
546 result |= CPU_AVX512_VPOPCNTDQ;
547 if (_cpuid_info.sef_cpuid7_ecx.bits.vpclmulqdq != 0)
548 result |= CPU_VPCLMULQDQ;
549 if (_cpuid_info.sef_cpuid7_ecx.bits.vaes != 0)
550 result |= CPU_VAES;
551 }
552 }
553 if(_cpuid_info.sef_cpuid7_ebx.bits.bmi1 != 0)
554 result |= CPU_BMI1;
555 if (_cpuid_info.std_cpuid1_edx.bits.tsc != 0)
556 result |= CPU_TSC;
557 if (_cpuid_info.ext_cpuid7_edx.bits.tsc_invariance != 0)
558 result |= CPU_TSCINV;
559 if (_cpuid_info.std_cpuid1_ecx.bits.aes != 0)
560 result |= CPU_AES;
561 if (_cpuid_info.sef_cpuid7_ebx.bits.erms != 0)
562 result |= CPU_ERMS;
563 if (_cpuid_info.std_cpuid1_ecx.bits.clmul != 0)
564 result |= CPU_CLMUL;
565 if (_cpuid_info.sef_cpuid7_ebx.bits.rtm != 0)
566 result |= CPU_RTM;
567 if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
568 result |= CPU_ADX;
569 if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
570 result |= CPU_BMI2;
799 static bool supports_sse4_1() { return (_features & CPU_SSE4_1) != 0; }
800 static bool supports_sse4_2() { return (_features & CPU_SSE4_2) != 0; }
801 static bool supports_popcnt() { return (_features & CPU_POPCNT) != 0; }
802 static bool supports_avx() { return (_features & CPU_AVX) != 0; }
803 static bool supports_avx2() { return (_features & CPU_AVX2) != 0; }
804 static bool supports_tsc() { return (_features & CPU_TSC) != 0; }
805 static bool supports_aes() { return (_features & CPU_AES) != 0; }
806 static bool supports_erms() { return (_features & CPU_ERMS) != 0; }
807 static bool supports_clmul() { return (_features & CPU_CLMUL) != 0; }
808 static bool supports_rtm() { return (_features & CPU_RTM) != 0; }
809 static bool supports_bmi1() { return (_features & CPU_BMI1) != 0; }
810 static bool supports_bmi2() { return (_features & CPU_BMI2) != 0; }
811 static bool supports_adx() { return (_features & CPU_ADX) != 0; }
812 static bool supports_evex() { return (_features & CPU_AVX512F) != 0; }
813 static bool supports_avx512dq() { return (_features & CPU_AVX512DQ) != 0; }
814 static bool supports_avx512pf() { return (_features & CPU_AVX512PF) != 0; }
815 static bool supports_avx512er() { return (_features & CPU_AVX512ER) != 0; }
816 static bool supports_avx512cd() { return (_features & CPU_AVX512CD) != 0; }
817 static bool supports_avx512bw() { return (_features & CPU_AVX512BW) != 0; }
818 static bool supports_avx512vl() { return (_features & CPU_AVX512VL) != 0; }
819 static bool supports_avx512vlbw() { return (supports_avx512bw() && supports_avx512vl()); }
820 static bool supports_avx512novl() { return (supports_evex() && !supports_avx512vl()); }
821 static bool supports_avx512nobw() { return (supports_evex() && !supports_avx512bw()); }
822 static bool supports_avx256only() { return (supports_avx2() && !supports_evex()); }
823 static bool supports_avxonly() { return ((supports_avx2() || supports_avx()) && !supports_evex()); }
824 static bool supports_sha() { return (_features & CPU_SHA) != 0; }
825 static bool supports_fma() { return (_features & CPU_FMA) != 0 && supports_avx(); }
826 static bool supports_vzeroupper() { return (_features & CPU_VZEROUPPER) != 0; }
827 static bool supports_vpopcntdq() { return (_features & CPU_AVX512_VPOPCNTDQ) != 0; }
828 static bool supports_vpclmulqdq() { return (_features & CPU_VPCLMULQDQ) != 0; }
829 static bool supports_vaes() { return (_features & CPU_VAES) != 0; }
830
831 // Intel features
832 static bool is_intel_family_core() { return is_intel() &&
833 extended_cpu_family() == CPU_FAMILY_INTEL_CORE; }
834
835 static bool is_intel_tsc_synched_at_init() {
836 if (is_intel_family_core()) {
837 uint32_t ext_model = extended_cpu_model();
838 if (ext_model == CPU_MODEL_NEHALEM_EP ||
|
319 CPU_BMI1 = (1 << 22),
320 CPU_BMI2 = (1 << 23),
321 CPU_RTM = (1 << 24), // Restricted Transactional Memory instructions
322 CPU_ADX = (1 << 25),
323 CPU_AVX512F = (1 << 26), // AVX 512bit foundation instructions
324 CPU_AVX512DQ = (1 << 27),
325 CPU_AVX512PF = (1 << 28),
326 CPU_AVX512ER = (1 << 29),
327 CPU_AVX512CD = (1 << 30)
328 // Keeping sign bit 31 unassigned.
329 };
330
331 #define CPU_AVX512BW ((uint64_t)UCONST64(0x100000000)) // enums are limited to 31 bit
332 #define CPU_AVX512VL ((uint64_t)UCONST64(0x200000000)) // EVEX instructions with smaller vector length
333 #define CPU_SHA ((uint64_t)UCONST64(0x400000000)) // SHA instructions
334 #define CPU_FMA ((uint64_t)UCONST64(0x800000000)) // FMA instructions
335 #define CPU_VZEROUPPER ((uint64_t)UCONST64(0x1000000000)) // Vzeroupper instruction
336 #define CPU_AVX512_VPOPCNTDQ ((uint64_t)UCONST64(0x2000000000)) // Vector popcount
337 #define CPU_VPCLMULQDQ ((uint64_t)UCONST64(0x4000000000)) //Vector carryless multiplication
338 #define CPU_VAES ((uint64_t)UCONST64(0x8000000000)) // Vector AES instructions
339 #define CPU_AVX512VBMI ((uint64_t)UCONST64(0x10000000000)) // Vector BMI instructions
340
341 enum Extended_Family {
342 // AMD
343 CPU_FAMILY_AMD_11H = 0x11,
344 // ZX
345 CPU_FAMILY_ZX_CORE_F6 = 6,
346 CPU_FAMILY_ZX_CORE_F7 = 7,
347 // Intel
348 CPU_FAMILY_INTEL_CORE = 6,
349 CPU_MODEL_NEHALEM = 0x1e,
350 CPU_MODEL_NEHALEM_EP = 0x1a,
351 CPU_MODEL_NEHALEM_EX = 0x2e,
352 CPU_MODEL_WESTMERE = 0x25,
353 CPU_MODEL_WESTMERE_EP = 0x2c,
354 CPU_MODEL_WESTMERE_EX = 0x2f,
355 CPU_MODEL_SANDYBRIDGE = 0x2a,
356 CPU_MODEL_SANDYBRIDGE_EP = 0x2d,
357 CPU_MODEL_IVYBRIDGE_EP = 0x3a,
358 CPU_MODEL_HASWELL_E3 = 0x3c,
359 CPU_MODEL_HASWELL_E7 = 0x3f,
532 _cpuid_info.xem_xcr0_eax.bits.zmm32 != 0) {
533 result |= CPU_AVX512F;
534 if (_cpuid_info.sef_cpuid7_ebx.bits.avx512cd != 0)
535 result |= CPU_AVX512CD;
536 if (_cpuid_info.sef_cpuid7_ebx.bits.avx512dq != 0)
537 result |= CPU_AVX512DQ;
538 if (_cpuid_info.sef_cpuid7_ebx.bits.avx512pf != 0)
539 result |= CPU_AVX512PF;
540 if (_cpuid_info.sef_cpuid7_ebx.bits.avx512er != 0)
541 result |= CPU_AVX512ER;
542 if (_cpuid_info.sef_cpuid7_ebx.bits.avx512bw != 0)
543 result |= CPU_AVX512BW;
544 if (_cpuid_info.sef_cpuid7_ebx.bits.avx512vl != 0)
545 result |= CPU_AVX512VL;
546 if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vpopcntdq != 0)
547 result |= CPU_AVX512_VPOPCNTDQ;
548 if (_cpuid_info.sef_cpuid7_ecx.bits.vpclmulqdq != 0)
549 result |= CPU_VPCLMULQDQ;
550 if (_cpuid_info.sef_cpuid7_ecx.bits.vaes != 0)
551 result |= CPU_VAES;
552 if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vbmi != 0)
553 result |= CPU_AVX512VBMI;
554 }
555 }
556 if(_cpuid_info.sef_cpuid7_ebx.bits.bmi1 != 0)
557 result |= CPU_BMI1;
558 if (_cpuid_info.std_cpuid1_edx.bits.tsc != 0)
559 result |= CPU_TSC;
560 if (_cpuid_info.ext_cpuid7_edx.bits.tsc_invariance != 0)
561 result |= CPU_TSCINV;
562 if (_cpuid_info.std_cpuid1_ecx.bits.aes != 0)
563 result |= CPU_AES;
564 if (_cpuid_info.sef_cpuid7_ebx.bits.erms != 0)
565 result |= CPU_ERMS;
566 if (_cpuid_info.std_cpuid1_ecx.bits.clmul != 0)
567 result |= CPU_CLMUL;
568 if (_cpuid_info.sef_cpuid7_ebx.bits.rtm != 0)
569 result |= CPU_RTM;
570 if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
571 result |= CPU_ADX;
572 if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
573 result |= CPU_BMI2;
802 static bool supports_sse4_1() { return (_features & CPU_SSE4_1) != 0; }
803 static bool supports_sse4_2() { return (_features & CPU_SSE4_2) != 0; }
804 static bool supports_popcnt() { return (_features & CPU_POPCNT) != 0; }
805 static bool supports_avx() { return (_features & CPU_AVX) != 0; }
806 static bool supports_avx2() { return (_features & CPU_AVX2) != 0; }
807 static bool supports_tsc() { return (_features & CPU_TSC) != 0; }
808 static bool supports_aes() { return (_features & CPU_AES) != 0; }
809 static bool supports_erms() { return (_features & CPU_ERMS) != 0; }
810 static bool supports_clmul() { return (_features & CPU_CLMUL) != 0; }
811 static bool supports_rtm() { return (_features & CPU_RTM) != 0; }
812 static bool supports_bmi1() { return (_features & CPU_BMI1) != 0; }
813 static bool supports_bmi2() { return (_features & CPU_BMI2) != 0; }
814 static bool supports_adx() { return (_features & CPU_ADX) != 0; }
815 static bool supports_evex() { return (_features & CPU_AVX512F) != 0; }
816 static bool supports_avx512dq() { return (_features & CPU_AVX512DQ) != 0; }
817 static bool supports_avx512pf() { return (_features & CPU_AVX512PF) != 0; }
818 static bool supports_avx512er() { return (_features & CPU_AVX512ER) != 0; }
819 static bool supports_avx512cd() { return (_features & CPU_AVX512CD) != 0; }
820 static bool supports_avx512bw() { return (_features & CPU_AVX512BW) != 0; }
821 static bool supports_avx512vl() { return (_features & CPU_AVX512VL) != 0; }
822 static bool supports_avx512vbmi() { return (_features & CPU_AVX512VBMI) != 0; }
823 static bool supports_avx512vlbw() { return (supports_avx512bw() && supports_avx512vl()); }
824 static bool supports_avx512novl() { return (supports_evex() && !supports_avx512vl()); }
825 static bool supports_avx512nobw() { return (supports_evex() && !supports_avx512bw()); }
826 static bool supports_avx256only() { return (supports_avx2() && !supports_evex()); }
827 static bool supports_avxonly() { return ((supports_avx2() || supports_avx()) && !supports_evex()); }
828 static bool supports_sha() { return (_features & CPU_SHA) != 0; }
829 static bool supports_fma() { return (_features & CPU_FMA) != 0 && supports_avx(); }
830 static bool supports_vzeroupper() { return (_features & CPU_VZEROUPPER) != 0; }
831 static bool supports_vpopcntdq() { return (_features & CPU_AVX512_VPOPCNTDQ) != 0; }
832 static bool supports_vpclmulqdq() { return (_features & CPU_VPCLMULQDQ) != 0; }
833 static bool supports_vaes() { return (_features & CPU_VAES) != 0; }
834
835 // Intel features
836 static bool is_intel_family_core() { return is_intel() &&
837 extended_cpu_family() == CPU_FAMILY_INTEL_CORE; }
838
839 static bool is_intel_tsc_synched_at_init() {
840 if (is_intel_family_core()) {
841 uint32_t ext_model = extended_cpu_model();
842 if (ext_model == CPU_MODEL_NEHALEM_EP ||
|