324 CPU_BMI2 = (1 << 23),
325 CPU_RTM = (1 << 24), // Restricted Transactional Memory instructions
326 CPU_ADX = (1 << 25),
327 CPU_AVX512F = (1 << 26), // AVX 512bit foundation instructions
328 CPU_AVX512DQ = (1 << 27),
329 CPU_AVX512PF = (1 << 28),
330 CPU_AVX512ER = (1 << 29),
331 CPU_AVX512CD = (1 << 30)
332 // Keeping sign bit 31 unassigned.
333 };
334
335 #define CPU_AVX512BW ((uint64_t)UCONST64(0x100000000)) // enums are limited to 31 bit
336 #define CPU_AVX512VL ((uint64_t)UCONST64(0x200000000)) // EVEX instructions with smaller vector length
337 #define CPU_SHA ((uint64_t)UCONST64(0x400000000)) // SHA instructions
338 #define CPU_FMA ((uint64_t)UCONST64(0x800000000)) // FMA instructions
339 #define CPU_VZEROUPPER ((uint64_t)UCONST64(0x1000000000)) // Vzeroupper instruction
340 #define CPU_AVX512_VPOPCNTDQ ((uint64_t)UCONST64(0x2000000000)) // Vector popcount
341 #define CPU_AVX512_VPCLMULQDQ ((uint64_t)UCONST64(0x4000000000)) //Vector carryless multiplication
342 #define CPU_VAES ((uint64_t)UCONST64(0x8000000000)) // Vector AES instructions
343 #define CPU_VNNI ((uint64_t)UCONST64(0x10000000000)) // Vector Neural Network Instructions
344
345 #define CPU_FLUSH ((uint64_t)UCONST64(0x20000000000)) // flush instruction
346 #define CPU_FLUSHOPT ((uint64_t)UCONST64(0x40000000000)) // flushopt instruction
347 #define CPU_CLWB ((uint64_t)UCONST64(0x80000000000)) // clwb instruction
348
349 enum Extended_Family {
350 // AMD
351 CPU_FAMILY_AMD_11H = 0x11,
352 // ZX
353 CPU_FAMILY_ZX_CORE_F6 = 6,
354 CPU_FAMILY_ZX_CORE_F7 = 7,
355 // Intel
356 CPU_FAMILY_INTEL_CORE = 6,
357 CPU_MODEL_NEHALEM = 0x1e,
358 CPU_MODEL_NEHALEM_EP = 0x1a,
359 CPU_MODEL_NEHALEM_EX = 0x2e,
360 CPU_MODEL_WESTMERE = 0x25,
361 CPU_MODEL_WESTMERE_EP = 0x2c,
362 CPU_MODEL_WESTMERE_EX = 0x2f,
363 CPU_MODEL_SANDYBRIDGE = 0x2a,
550 if (_cpuid_info.sef_cpuid7_ebx.bits.avx512cd != 0)
551 result |= CPU_AVX512CD;
552 if (_cpuid_info.sef_cpuid7_ebx.bits.avx512dq != 0)
553 result |= CPU_AVX512DQ;
554 if (_cpuid_info.sef_cpuid7_ebx.bits.avx512pf != 0)
555 result |= CPU_AVX512PF;
556 if (_cpuid_info.sef_cpuid7_ebx.bits.avx512er != 0)
557 result |= CPU_AVX512ER;
558 if (_cpuid_info.sef_cpuid7_ebx.bits.avx512bw != 0)
559 result |= CPU_AVX512BW;
560 if (_cpuid_info.sef_cpuid7_ebx.bits.avx512vl != 0)
561 result |= CPU_AVX512VL;
562 if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vpopcntdq != 0)
563 result |= CPU_AVX512_VPOPCNTDQ;
564 if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vpclmulqdq != 0)
565 result |= CPU_AVX512_VPCLMULQDQ;
566 if (_cpuid_info.sef_cpuid7_ecx.bits.vaes != 0)
567 result |= CPU_VAES;
568 if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vnni != 0)
569 result |= CPU_VNNI;
570 }
571 }
572 if (_cpuid_info.sef_cpuid7_ebx.bits.bmi1 != 0)
573 result |= CPU_BMI1;
574 if (_cpuid_info.std_cpuid1_edx.bits.tsc != 0)
575 result |= CPU_TSC;
576 if (_cpuid_info.ext_cpuid7_edx.bits.tsc_invariance != 0)
577 result |= CPU_TSCINV;
578 if (_cpuid_info.std_cpuid1_ecx.bits.aes != 0)
579 result |= CPU_AES;
580 if (_cpuid_info.sef_cpuid7_ebx.bits.erms != 0)
581 result |= CPU_ERMS;
582 if (_cpuid_info.std_cpuid1_ecx.bits.clmul != 0)
583 result |= CPU_CLMUL;
584 if (_cpuid_info.sef_cpuid7_ebx.bits.rtm != 0)
585 result |= CPU_RTM;
586 if (_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
587 result |= CPU_ADX;
588 if (_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
589 result |= CPU_BMI2;
841 static bool supports_avx512pf() { return (_features & CPU_AVX512PF) != 0; }
842 static bool supports_avx512er() { return (_features & CPU_AVX512ER) != 0; }
843 static bool supports_avx512cd() { return (_features & CPU_AVX512CD) != 0; }
844 static bool supports_avx512bw() { return (_features & CPU_AVX512BW) != 0; }
845 static bool supports_avx512vl() { return (_features & CPU_AVX512VL) != 0; }
846 static bool supports_avx512vlbw() { return (supports_evex() && supports_avx512bw() && supports_avx512vl()); }
847 static bool supports_avx512vldq() { return (supports_evex() && supports_avx512dq() && supports_avx512vl()); }
848 static bool supports_avx512vlbwdq() { return (supports_evex() && supports_avx512vl() &&
849 supports_avx512bw() && supports_avx512dq()); }
850 static bool supports_avx512novl() { return (supports_evex() && !supports_avx512vl()); }
851 static bool supports_avx512nobw() { return (supports_evex() && !supports_avx512bw()); }
852 static bool supports_avx256only() { return (supports_avx2() && !supports_evex()); }
853 static bool supports_avxonly() { return ((supports_avx2() || supports_avx()) && !supports_evex()); }
854 static bool supports_sha() { return (_features & CPU_SHA) != 0; }
855 static bool supports_fma() { return (_features & CPU_FMA) != 0 && supports_avx(); }
856 static bool supports_vzeroupper() { return (_features & CPU_VZEROUPPER) != 0; }
857 static bool supports_vpopcntdq() { return (_features & CPU_AVX512_VPOPCNTDQ) != 0; }
858 static bool supports_avx512_vpclmulqdq() { return (_features & CPU_AVX512_VPCLMULQDQ) != 0; }
859 static bool supports_vaes() { return (_features & CPU_VAES) != 0; }
860 static bool supports_vnni() { return (_features & CPU_VNNI) != 0; }
861
862 // Intel features
863 static bool is_intel_family_core() { return is_intel() &&
864 extended_cpu_family() == CPU_FAMILY_INTEL_CORE; }
865
866 static bool is_intel_tsc_synched_at_init() {
867 if (is_intel_family_core()) {
868 uint32_t ext_model = extended_cpu_model();
869 if (ext_model == CPU_MODEL_NEHALEM_EP ||
870 ext_model == CPU_MODEL_WESTMERE_EP ||
871 ext_model == CPU_MODEL_SANDYBRIDGE_EP ||
872 ext_model == CPU_MODEL_IVYBRIDGE_EP) {
873 // <= 2-socket invariant tsc support. EX versions are usually used
874 // in > 2-socket systems and likely don't synchronize tscs at
875 // initialization.
876 // Code that uses tsc values must be prepared for them to arbitrarily
877 // jump forward or backward.
878 return true;
879 }
880 }
|
324 CPU_BMI2 = (1 << 23),
325 CPU_RTM = (1 << 24), // Restricted Transactional Memory instructions
326 CPU_ADX = (1 << 25),
327 CPU_AVX512F = (1 << 26), // AVX 512bit foundation instructions
328 CPU_AVX512DQ = (1 << 27),
329 CPU_AVX512PF = (1 << 28),
330 CPU_AVX512ER = (1 << 29),
331 CPU_AVX512CD = (1 << 30)
332 // Keeping sign bit 31 unassigned.
333 };
334
335 #define CPU_AVX512BW ((uint64_t)UCONST64(0x100000000)) // enums are limited to 31 bit
336 #define CPU_AVX512VL ((uint64_t)UCONST64(0x200000000)) // EVEX instructions with smaller vector length
337 #define CPU_SHA ((uint64_t)UCONST64(0x400000000)) // SHA instructions
338 #define CPU_FMA ((uint64_t)UCONST64(0x800000000)) // FMA instructions
339 #define CPU_VZEROUPPER ((uint64_t)UCONST64(0x1000000000)) // Vzeroupper instruction
340 #define CPU_AVX512_VPOPCNTDQ ((uint64_t)UCONST64(0x2000000000)) // Vector popcount
341 #define CPU_AVX512_VPCLMULQDQ ((uint64_t)UCONST64(0x4000000000)) //Vector carryless multiplication
342 #define CPU_VAES ((uint64_t)UCONST64(0x8000000000)) // Vector AES instructions
343 #define CPU_VNNI ((uint64_t)UCONST64(0x10000000000)) // Vector Neural Network Instructions
344 #define CPU_VBMI2 ((uint64_t)UCONST64(0x20000000000)) // VBMI2 shift left double instructions
345
346 #define CPU_FLUSH ((uint64_t)UCONST64(0x20000000000)) // flush instruction
347 #define CPU_FLUSHOPT ((uint64_t)UCONST64(0x40000000000)) // flushopt instruction
348 #define CPU_CLWB ((uint64_t)UCONST64(0x80000000000)) // clwb instruction
349
350 enum Extended_Family {
351 // AMD
352 CPU_FAMILY_AMD_11H = 0x11,
353 // ZX
354 CPU_FAMILY_ZX_CORE_F6 = 6,
355 CPU_FAMILY_ZX_CORE_F7 = 7,
356 // Intel
357 CPU_FAMILY_INTEL_CORE = 6,
358 CPU_MODEL_NEHALEM = 0x1e,
359 CPU_MODEL_NEHALEM_EP = 0x1a,
360 CPU_MODEL_NEHALEM_EX = 0x2e,
361 CPU_MODEL_WESTMERE = 0x25,
362 CPU_MODEL_WESTMERE_EP = 0x2c,
363 CPU_MODEL_WESTMERE_EX = 0x2f,
364 CPU_MODEL_SANDYBRIDGE = 0x2a,
551 if (_cpuid_info.sef_cpuid7_ebx.bits.avx512cd != 0)
552 result |= CPU_AVX512CD;
553 if (_cpuid_info.sef_cpuid7_ebx.bits.avx512dq != 0)
554 result |= CPU_AVX512DQ;
555 if (_cpuid_info.sef_cpuid7_ebx.bits.avx512pf != 0)
556 result |= CPU_AVX512PF;
557 if (_cpuid_info.sef_cpuid7_ebx.bits.avx512er != 0)
558 result |= CPU_AVX512ER;
559 if (_cpuid_info.sef_cpuid7_ebx.bits.avx512bw != 0)
560 result |= CPU_AVX512BW;
561 if (_cpuid_info.sef_cpuid7_ebx.bits.avx512vl != 0)
562 result |= CPU_AVX512VL;
563 if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vpopcntdq != 0)
564 result |= CPU_AVX512_VPOPCNTDQ;
565 if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vpclmulqdq != 0)
566 result |= CPU_AVX512_VPCLMULQDQ;
567 if (_cpuid_info.sef_cpuid7_ecx.bits.vaes != 0)
568 result |= CPU_VAES;
569 if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vnni != 0)
570 result |= CPU_VNNI;
571 if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vbmi2 != 0)
572 result |= CPU_VBMI2;
573 }
574 }
575 if (_cpuid_info.sef_cpuid7_ebx.bits.bmi1 != 0)
576 result |= CPU_BMI1;
577 if (_cpuid_info.std_cpuid1_edx.bits.tsc != 0)
578 result |= CPU_TSC;
579 if (_cpuid_info.ext_cpuid7_edx.bits.tsc_invariance != 0)
580 result |= CPU_TSCINV;
581 if (_cpuid_info.std_cpuid1_ecx.bits.aes != 0)
582 result |= CPU_AES;
583 if (_cpuid_info.sef_cpuid7_ebx.bits.erms != 0)
584 result |= CPU_ERMS;
585 if (_cpuid_info.std_cpuid1_ecx.bits.clmul != 0)
586 result |= CPU_CLMUL;
587 if (_cpuid_info.sef_cpuid7_ebx.bits.rtm != 0)
588 result |= CPU_RTM;
589 if (_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
590 result |= CPU_ADX;
591 if (_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
592 result |= CPU_BMI2;
844 static bool supports_avx512pf() { return (_features & CPU_AVX512PF) != 0; }
845 static bool supports_avx512er() { return (_features & CPU_AVX512ER) != 0; }
846 static bool supports_avx512cd() { return (_features & CPU_AVX512CD) != 0; }
847 static bool supports_avx512bw() { return (_features & CPU_AVX512BW) != 0; }
848 static bool supports_avx512vl() { return (_features & CPU_AVX512VL) != 0; }
849 static bool supports_avx512vlbw() { return (supports_evex() && supports_avx512bw() && supports_avx512vl()); }
850 static bool supports_avx512vldq() { return (supports_evex() && supports_avx512dq() && supports_avx512vl()); }
851 static bool supports_avx512vlbwdq() { return (supports_evex() && supports_avx512vl() &&
852 supports_avx512bw() && supports_avx512dq()); }
853 static bool supports_avx512novl() { return (supports_evex() && !supports_avx512vl()); }
854 static bool supports_avx512nobw() { return (supports_evex() && !supports_avx512bw()); }
855 static bool supports_avx256only() { return (supports_avx2() && !supports_evex()); }
856 static bool supports_avxonly() { return ((supports_avx2() || supports_avx()) && !supports_evex()); }
857 static bool supports_sha() { return (_features & CPU_SHA) != 0; }
858 static bool supports_fma() { return (_features & CPU_FMA) != 0 && supports_avx(); }
859 static bool supports_vzeroupper() { return (_features & CPU_VZEROUPPER) != 0; }
860 static bool supports_vpopcntdq() { return (_features & CPU_AVX512_VPOPCNTDQ) != 0; }
861 static bool supports_avx512_vpclmulqdq() { return (_features & CPU_AVX512_VPCLMULQDQ) != 0; }
862 static bool supports_vaes() { return (_features & CPU_VAES) != 0; }
863 static bool supports_vnni() { return (_features & CPU_VNNI) != 0; }
864 static bool supports_vbmi2() { return (_features & CPU_VBMI2) != 0; }
865
866 // Intel features
867 static bool is_intel_family_core() { return is_intel() &&
868 extended_cpu_family() == CPU_FAMILY_INTEL_CORE; }
869
870 static bool is_intel_tsc_synched_at_init() {
871 if (is_intel_family_core()) {
872 uint32_t ext_model = extended_cpu_model();
873 if (ext_model == CPU_MODEL_NEHALEM_EP ||
874 ext_model == CPU_MODEL_WESTMERE_EP ||
875 ext_model == CPU_MODEL_SANDYBRIDGE_EP ||
876 ext_model == CPU_MODEL_IVYBRIDGE_EP) {
877 // <= 2-socket invariant tsc support. EX versions are usually used
878 // in > 2-socket systems and likely don't synchronize tscs at
879 // initialization.
880 // Code that uses tsc values must be prepared for them to arbitrarily
881 // jump forward or backward.
882 return true;
883 }
884 }
|