// [AsmJit] // Complete x86/x64 JIT and Remote Assembler for C++. // // [License] // Zlib - See LICENSE.md file in the package. // [Export] #define ASMJIT_EXPORTS // [Dependencies] #include "../base/cpuinfo.h" #include "../base/utils.h" #if ASMJIT_OS_POSIX # include # include # include # include #endif // ASMJIT_OS_POSIX #if ASMJIT_ARCH_X86 || ASMJIT_ARCH_X64 # if ASMJIT_CC_MSC_GE(14, 0, 0) # include // Required by `__cpuid()` and `_xgetbv()`. # endif // _MSC_VER >= 1400 #endif #if ASMJIT_ARCH_ARM32 || ASMJIT_ARCH_ARM64 # if ASMJIT_OS_LINUX # include // Required by `getauxval()`. # endif #endif // [Api-Begin] #include "../apibegin.h" namespace asmjit { // ============================================================================ // [asmjit::CpuInfo - Detect ARM & ARM64] // ============================================================================ // ARM information has to be retrieved by the OS (this is how ARM was designed). #if ASMJIT_ARCH_ARM32 || ASMJIT_ARCH_ARM64 #if ASMJIT_ARCH_ARM64 static void armPopulateBaseline64Features(CpuInfo* cpuInfo) noexcept { // Thumb (including all variations) is only supported on ARM32. // ARM64 is based on ARMv8 and newer. cpuInfo->addFeature(CpuInfo::kArmFeatureV6); cpuInfo->addFeature(CpuInfo::kArmFeatureV7); cpuInfo->addFeature(CpuInfo::kArmFeatureV8); // ARM64 comes with these features by default. cpuInfo->addFeature(CpuInfo::kArmFeatureDSP); cpuInfo->addFeature(CpuInfo::kArmFeatureIDIV); cpuInfo->addFeature(CpuInfo::kArmFeatureVFP2); cpuInfo->addFeature(CpuInfo::kArmFeatureVFP3); cpuInfo->addFeature(CpuInfo::kArmFeatureVFP4); } #endif // ASMJIT_ARCH_ARM64 #if ASMJIT_OS_WINDOWS //! \internal //! //! Detect ARM CPU features on Windows. //! //! The detection is based on `IsProcessorFeaturePresent()` API call. static void armDetectCpuInfoOnWindows(CpuInfo* cpuInfo) noexcept { #if ASMJIT_ARCH_ARM32 cpuInfo->setArch(kArchArm32); // Windows for ARM requires at least ARMv7 with DSP extensions. cpuInfo->addFeature(CpuInfo::kArmFeatureV6); cpuInfo->addFeature(CpuInfo::kArmFeatureV7); cpuInfo->addFeature(CpuInfo::kArmFeatureDSP); // Windows for ARM requires VFP3. cpuInfo->addFeature(CpuInfo::kArmFeatureVFP2); cpuInfo->addFeature(CpuInfo::kArmFeatureVFP3); // Windows for ARM requires and uses THUMB2. cpuInfo->addFeature(CpuInfo::kArmFeatureTHUMB); cpuInfo->addFeature(CpuInfo::kArmFeatureTHUMB2); #else cpuInfo->setArch(kArchArm64); armPopulateBaseline64Features(cpuInfo); #endif // Windows for ARM requires NEON. cpuInfo->addFeature(CpuInfo::kArmFeatureNEON); // Detect additional CPU features by calling `IsProcessorFeaturePresent()`. struct WinPFPMapping { uint32_t pfpId, featureId; }; static const WinPFPMapping mapping[] = { { PF_ARM_FMAC_INSTRUCTIONS_AVAILABLE , CpuInfo::kArmFeatureVFP4 }, { PF_ARM_VFP_32_REGISTERS_AVAILABLE , CpuInfo::kArmFeatureVFP_D32 }, { PF_ARM_DIVIDE_INSTRUCTION_AVAILABLE, CpuInfo::kArmFeatureIDIV }, { PF_ARM_64BIT_LOADSTORE_ATOMIC , CpuInfo::kArmFeatureAtomics64 } }; for (uint32_t i = 0; i < ASMJIT_ARRAY_SIZE(mapping); i++) if (::IsProcessorFeaturePresent(mapping[i].pfpId)) cpuInfo->addFeature(mapping[i].featureId); } #endif // ASMJIT_OS_WINDOWS #if ASMJIT_OS_LINUX struct LinuxHWCapMapping { uint32_t hwcapMask, featureId; }; static void armDetectHWCaps(CpuInfo* cpuInfo, unsigned long type, const LinuxHWCapMapping* mapping, size_t length) noexcept { unsigned long mask = getauxval(type); for (size_t i = 0; i < length; i++) if ((mask & mapping[i].hwcapMask) == mapping[i].hwcapMask) cpuInfo->addFeature(mapping[i].featureId); } //! \internal //! //! Detect ARM CPU features on Linux. //! //! The detection is based on `getauxval()`. static void armDetectCpuInfoOnLinux(CpuInfo* cpuInfo) noexcept { #if ASMJIT_ARCH_ARM32 cpuInfo->setArch(kArchArm32); // `AT_HWCAP` provides ARMv7 (and less) related flags. static const LinuxHWCapMapping hwCapMapping[] = { { /* HWCAP_VFPv3 */ (1 << 13), CpuInfo::kArmFeatureVFP3 }, { /* HWCAP_VFPv4 */ (1 << 16), CpuInfo::kArmFeatureVFP4 }, { /* HWCAP_IDIVA */ (3 << 17), CpuInfo::kArmFeatureIDIV }, { /* HWCAP_VFPD32 */ (1 << 19), CpuInfo::kArmFeatureVFP_D32 }, { /* HWCAP_NEON */ (1 << 12), CpuInfo::kArmFeatureNEON }, { /* HWCAP_EDSP */ (1 << 7), CpuInfo::kArmFeatureDSP } }; armDetectHWCaps(cpuInfo, AT_HWCAP, hwCapMapping, ASMJIT_ARRAY_SIZE(hwCapMapping)); // VFP3 implies VFP2. if (cpuInfo->hasFeature(CpuInfo::kArmFeatureVFP3)) cpuInfo->addFeature(CpuInfo::kArmFeatureVFP2); // VFP2 implies ARMv6. if (cpuInfo->hasFeature(CpuInfo::kArmFeatureVFP2)) cpuInfo->addFeature(CpuInfo::kArmFeatureV6); // VFP3 or NEON implies ARMv7. if (cpuInfo->hasFeature(CpuInfo::kArmFeatureVFP3) || cpuInfo->hasFeature(CpuInfo::kArmFeatureNEON)) cpuInfo->addFeature(CpuInfo::kArmFeatureV7); // `AT_HWCAP2` provides ARMv8 related flags. static const LinuxHWCapMapping hwCap2Mapping[] = { { /* HWCAP2_AES */ (1 << 0), CpuInfo::kArmFeatureAES }, { /* HWCAP2_CRC32 */ (1 << 4), CpuInfo::kArmFeatureCRC32 }, { /* HWCAP2_PMULL */ (1 << 1), CpuInfo::kArmFeaturePMULL }, { /* HWCAP2_SHA1 */ (1 << 2), CpuInfo::kArmFeatureSHA1 }, { /* HWCAP2_SHA2 */ (1 << 3), CpuInfo::kArmFeatureSHA256 } }; armDetectHWCaps(cpuInfo, AT_HWCAP2, hwCap2Mapping, ASMJIT_ARRAY_SIZE(hwCap2Mapping)); if (cpuInfo->hasFeature(CpuInfo::kArmFeatureAES ) || cpuInfo->hasFeature(CpuInfo::kArmFeatureCRC32 ) || cpuInfo->hasFeature(CpuInfo::kArmFeaturePMULL ) || cpuInfo->hasFeature(CpuInfo::kArmFeatureSHA1 ) || cpuInfo->hasFeature(CpuInfo::kArmFeatureSHA256)) { cpuInfo->addFeature(CpuInfo::kArmFeatureV8); } #else cpuInfo->setArch(kArchArm64); armPopulateBaseline64Features(cpuInfo); // `AT_HWCAP` provides ARMv8 related flags. static const LinuxHWCapMapping hwCapMapping[] = { { /* HWCAP_ASIMD */ (1 << 1), CpuInfo::kArmFeatureNEON }, { /* HWCAP_AES */ (1 << 3), CpuInfo::kArmFeatureAES }, { /* HWCAP_CRC32 */ (1 << 7), CpuInfo::kArmFeatureCRC32 }, { /* HWCAP_PMULL */ (1 << 4), CpuInfo::kArmFeaturePMULL }, { /* HWCAP_SHA1 */ (1 << 5), CpuInfo::kArmFeatureSHA1 }, { /* HWCAP_SHA2 */ (1 << 6), CpuInfo::kArmFeatureSHA256 } { /* HWCAP_ATOMICS */ (1 << 8), CpuInfo::kArmFeatureAtomics64 } }; armDetectHWCaps(cpuInfo, AT_HWCAP, hwCapMapping, ASMJIT_ARRAY_SIZE(hwCapMapping)); // `AT_HWCAP2` is not used at the moment. #endif } #endif // ASMJIT_OS_LINUX static void armDetectCpuInfo(CpuInfo* cpuInfo) noexcept { #if ASMJIT_OS_WINDOWS armDetectCpuInfoOnWindows(cpuInfo); #elif ASMJIT_OS_LINUX armDetectCpuInfoOnLinux(cpuInfo); #else # error "[asmjit] armDetectCpuInfo() - Unsupported OS." #endif } #endif // ASMJIT_ARCH_ARM32 || ASMJIT_ARCH_ARM64 // ============================================================================ // [asmjit::CpuInfo - Detect X86 & X64] // ============================================================================ #if ASMJIT_ARCH_X86 || ASMJIT_ARCH_X64 //! \internal //! //! X86 CPUID result. struct CpuIdResult { uint32_t eax, ebx, ecx, edx; }; //! \internal //! //! Content of XCR register, result of XGETBV instruction. struct XGetBVResult { uint32_t eax, edx; }; #if ASMJIT_CC_MSC && !ASMJIT_CC_MSC_GE(15, 0, 30729) && ASMJIT_ARCH_X64 //! \internal //! //! HACK: VS2008 or less, 64-bit mode - `__cpuidex` doesn't exist! However, //! 64-bit calling convention specifies the first parameter to be passed in //! ECX, so we may be lucky if compiler doesn't move the register, otherwise //! the result would be wrong. static void ASMJIT_NOINLINE void x86CallCpuIdWorkaround(uint32_t inEcx, uint32_t inEax, CpuIdResult* result) noexcept { __cpuid(reinterpret_cast(result), inEax); } #endif //! \internal //! //! Wrapper to call `cpuid` instruction. static void ASMJIT_INLINE x86CallCpuId(CpuIdResult* result, uint32_t inEax, uint32_t inEcx = 0) noexcept { #if ASMJIT_CC_MSC && ASMJIT_CC_MSC_GE(15, 0, 30729) __cpuidex(reinterpret_cast(result), inEax, inEcx); #elif ASMJIT_CC_MSC && ASMJIT_ARCH_X64 x86CallCpuIdWorkaround(inEcx, inEax, result); #elif ASMJIT_CC_MSC && ASMJIT_ARCH_X86 uint32_t paramEax = inEax; uint32_t paramEcx = inEcx; uint32_t* out = reinterpret_cast(result); __asm { mov eax, paramEax mov ecx, paramEcx mov edi, out cpuid mov dword ptr[edi + 0], eax mov dword ptr[edi + 4], ebx mov dword ptr[edi + 8], ecx mov dword ptr[edi + 12], edx } #elif (ASMJIT_CC_GCC || ASMJIT_CC_CLANG) && ASMJIT_ARCH_X86 __asm__ __volatile__( "mov %%ebx, %%edi\n" "cpuid\n" "xchg %%edi, %%ebx\n" : "=a"(result->eax), "=D"(result->ebx), "=c"(result->ecx), "=d"(result->edx) : "a"(inEax), "c"(inEcx) ); #elif (ASMJIT_CC_GCC || ASMJIT_CC_CLANG) && ASMJIT_ARCH_X64 __asm__ __volatile__( \ "mov %%rbx, %%rdi\n" "cpuid\n" "xchg %%rdi, %%rbx\n" : "=a"(result->eax), "=D"(result->ebx), "=c"(result->ecx), "=d"(result->edx) : "a"(inEax), "c"(inEcx) ); #else # error "[asmjit] x86CallCpuid() - Unsupported compiler." #endif } //! \internal //! //! Wrapper to call `xgetbv` instruction. static void x86CallXGetBV(XGetBVResult* result, uint32_t inEcx) noexcept { #if ASMJIT_CC_MSC_GE(16, 0, 40219) // 2010SP1+ uint64_t value = _xgetbv(inEcx); result->eax = static_cast(value & 0xFFFFFFFFU); result->edx = static_cast(value >> 32); #elif ASMJIT_CC_GCC || ASMJIT_CC_CLANG uint32_t outEax; uint32_t outEdx; // Replaced, because the world is not perfect: // __asm__ __volatile__("xgetbv" : "=a"(outEax), "=d"(outEdx) : "c"(inEcx)); __asm__ __volatile__(".byte 0x0F, 0x01, 0xd0" : "=a"(outEax), "=d"(outEdx) : "c"(inEcx)); result->eax = outEax; result->edx = outEdx; #else result->eax = 0; result->edx = 0; #endif } //! \internal //! //! Map a 12-byte vendor string returned by `cpuid` into a `CpuInfo::Vendor` ID. static uint32_t x86GetCpuVendorID(const char* vendorString) noexcept { struct VendorData { uint32_t id; char text[12]; }; static const VendorData vendorList[] = { { CpuInfo::kVendorIntel , { 'G', 'e', 'n', 'u', 'i', 'n', 'e', 'I', 'n', 't', 'e', 'l' } }, { CpuInfo::kVendorAMD , { 'A', 'u', 't', 'h', 'e', 'n', 't', 'i', 'c', 'A', 'M', 'D' } }, { CpuInfo::kVendorVIA , { 'V', 'I', 'A', 0 , 'V', 'I', 'A', 0 , 'V', 'I', 'A', 0 } }, { CpuInfo::kVendorVIA , { 'C', 'e', 'n', 't', 'a', 'u', 'r', 'H', 'a', 'u', 'l', 's' } } }; uint32_t dw0 = reinterpret_cast(vendorString)[0]; uint32_t dw1 = reinterpret_cast(vendorString)[1]; uint32_t dw2 = reinterpret_cast(vendorString)[2]; for (uint32_t i = 0; i < ASMJIT_ARRAY_SIZE(vendorList); i++) { if (dw0 == reinterpret_cast(vendorList[i].text)[0] && dw1 == reinterpret_cast(vendorList[i].text)[1] && dw2 == reinterpret_cast(vendorList[i].text)[2]) return vendorList[i].id; } return CpuInfo::kVendorNone; } static ASMJIT_INLINE void x86SimplifyBrandString(char* s) noexcept { // Used to always clear the current character to ensure that the result // doesn't contain garbage after the new zero terminator. char* d = s; char prev = 0; char curr = s[0]; s[0] = '\0'; for (;;) { if (curr == 0) break; if (curr == ' ') { if (prev == '@' || s[1] == ' ' || s[1] == '@') goto L_Skip; } d[0] = curr; d++; prev = curr; L_Skip: curr = *++s; s[0] = '\0'; } d[0] = '\0'; } static void x86DetectCpuInfo(CpuInfo* cpuInfo) noexcept { uint32_t i, maxId; CpuIdResult regs; XGetBVResult xcr0 = { 0, 0 }; // Architecture is known at compile-time. cpuInfo->setArch(ASMJIT_ARCH_X86 ? kArchX86 : kArchX64); // -------------------------------------------------------------------------- // [CPUID EAX=0x0] // -------------------------------------------------------------------------- // Get vendor string/id. x86CallCpuId(®s, 0x0); maxId = regs.eax; ::memcpy(cpuInfo->_vendorString + 0, ®s.ebx, 4); ::memcpy(cpuInfo->_vendorString + 4, ®s.edx, 4); ::memcpy(cpuInfo->_vendorString + 8, ®s.ecx, 4); cpuInfo->_vendorId = x86GetCpuVendorID(cpuInfo->_vendorString); // -------------------------------------------------------------------------- // [CPUID EAX=0x1] // -------------------------------------------------------------------------- if (maxId >= 0x1) { // Get feature flags in ECX/EDX and family/model in EAX. x86CallCpuId(®s, 0x1); // Fill family and model fields. cpuInfo->_family = (regs.eax >> 8) & 0x0F; cpuInfo->_model = (regs.eax >> 4) & 0x0F; cpuInfo->_stepping = (regs.eax ) & 0x0F; // Use extended family and model fields. if (cpuInfo->_family == 0x0F) { cpuInfo->_family += ((regs.eax >> 20) & 0xFF); cpuInfo->_model += ((regs.eax >> 16) & 0x0F) << 4; } cpuInfo->_x86Data._processorType = ((regs.eax >> 12) & 0x03); cpuInfo->_x86Data._brandIndex = ((regs.ebx ) & 0xFF); cpuInfo->_x86Data._flushCacheLineSize = ((regs.ebx >> 8) & 0xFF) * 8; cpuInfo->_x86Data._maxLogicalProcessors = ((regs.ebx >> 16) & 0xFF); if (regs.ecx & 0x00000001U) cpuInfo->addFeature(CpuInfo::kX86FeatureSSE3); if (regs.ecx & 0x00000002U) cpuInfo->addFeature(CpuInfo::kX86FeaturePCLMULQDQ); if (regs.ecx & 0x00000008U) cpuInfo->addFeature(CpuInfo::kX86FeatureMONITOR); if (regs.ecx & 0x00000200U) cpuInfo->addFeature(CpuInfo::kX86FeatureSSSE3); if (regs.ecx & 0x00002000U) cpuInfo->addFeature(CpuInfo::kX86FeatureCMPXCHG16B); if (regs.ecx & 0x00080000U) cpuInfo->addFeature(CpuInfo::kX86FeatureSSE4_1); if (regs.ecx & 0x00100000U) cpuInfo->addFeature(CpuInfo::kX86FeatureSSE4_2); if (regs.ecx & 0x00400000U) cpuInfo->addFeature(CpuInfo::kX86FeatureMOVBE); if (regs.ecx & 0x00800000U) cpuInfo->addFeature(CpuInfo::kX86FeaturePOPCNT); if (regs.ecx & 0x02000000U) cpuInfo->addFeature(CpuInfo::kX86FeatureAESNI); if (regs.ecx & 0x04000000U) cpuInfo->addFeature(CpuInfo::kX86FeatureXSAVE); if (regs.ecx & 0x08000000U) cpuInfo->addFeature(CpuInfo::kX86FeatureXSAVE_OS); if (regs.ecx & 0x40000000U) cpuInfo->addFeature(CpuInfo::kX86FeatureRDRAND); if (regs.edx & 0x00000010U) cpuInfo->addFeature(CpuInfo::kX86FeatureRDTSC); if (regs.edx & 0x00000100U) cpuInfo->addFeature(CpuInfo::kX86FeatureCMPXCHG8B); if (regs.edx & 0x00008000U) cpuInfo->addFeature(CpuInfo::kX86FeatureCMOV); if (regs.edx & 0x00080000U) cpuInfo->addFeature(CpuInfo::kX86FeatureCLFLUSH); if (regs.edx & 0x00800000U) cpuInfo->addFeature(CpuInfo::kX86FeatureMMX); if (regs.edx & 0x01000000U) cpuInfo->addFeature(CpuInfo::kX86FeatureFXSR); if (regs.edx & 0x02000000U) cpuInfo->addFeature(CpuInfo::kX86FeatureSSE) .addFeature(CpuInfo::kX86FeatureMMX2); if (regs.edx & 0x04000000U) cpuInfo->addFeature(CpuInfo::kX86FeatureSSE) .addFeature(CpuInfo::kX86FeatureSSE2); if (regs.edx & 0x10000000U) cpuInfo->addFeature(CpuInfo::kX86FeatureMT); // AMD sets multi-threading ON if it has two or more cores. if (cpuInfo->_hwThreadsCount == 1 && cpuInfo->_vendorId == CpuInfo::kVendorAMD && (regs.edx & 0x10000000U)) cpuInfo->_hwThreadsCount = 2; // Get the content of XCR0 if supported by CPU and enabled by OS. if ((regs.ecx & 0x0C000000U) == 0x0C000000U) x86CallXGetBV(&xcr0, 0); // Detect AVX+. if (regs.ecx & 0x10000000U) { // - XCR0[2:1] == 11b // XMM & YMM states need to be enabled by OS. if ((xcr0.eax & 0x00000006U) == 0x00000006U) { cpuInfo->addFeature(CpuInfo::kX86FeatureAVX); if (regs.ecx & 0x00004000U) cpuInfo->addFeature(CpuInfo::kX86FeatureFMA3); if (regs.ecx & 0x20000000U) cpuInfo->addFeature(CpuInfo::kX86FeatureF16C); } } } // -------------------------------------------------------------------------- // [CPUID EAX=0x7 ECX=0x0] // -------------------------------------------------------------------------- // Detect new features if the processor supports CPUID-07. bool maybeMPX = false; if (maxId >= 0x7) { x86CallCpuId(®s, 0x7); if (regs.ebx & 0x00000001U) cpuInfo->addFeature(CpuInfo::kX86FeatureFSGSBASE); if (regs.ebx & 0x00000008U) cpuInfo->addFeature(CpuInfo::kX86FeatureBMI); if (regs.ebx & 0x00000010U) cpuInfo->addFeature(CpuInfo::kX86FeatureHLE); if (regs.ebx & 0x00000080U) cpuInfo->addFeature(CpuInfo::kX86FeatureSMEP); if (regs.ebx & 0x00000100U) cpuInfo->addFeature(CpuInfo::kX86FeatureBMI2); if (regs.ebx & 0x00000200U) cpuInfo->addFeature(CpuInfo::kX86FeatureERMS); if (regs.ebx & 0x00000800U) cpuInfo->addFeature(CpuInfo::kX86FeatureRTM); if (regs.ebx & 0x00004000U) maybeMPX = true; if (regs.ebx & 0x00040000U) cpuInfo->addFeature(CpuInfo::kX86FeatureRDSEED); if (regs.ebx & 0x00080000U) cpuInfo->addFeature(CpuInfo::kX86FeatureADX); if (regs.ebx & 0x00100000U) cpuInfo->addFeature(CpuInfo::kX86FeatureSMAP); if (regs.ebx & 0x00400000U) cpuInfo->addFeature(CpuInfo::kX86FeaturePCOMMIT); if (regs.ebx & 0x00800000U) cpuInfo->addFeature(CpuInfo::kX86FeatureCLFLUSH_OPT); if (regs.ebx & 0x01000000U) cpuInfo->addFeature(CpuInfo::kX86FeatureCLWB); if (regs.ebx & 0x20000000U) cpuInfo->addFeature(CpuInfo::kX86FeatureSHA); if (regs.ecx & 0x00000001U) cpuInfo->addFeature(CpuInfo::kX86FeaturePREFETCHWT1); // Detect AVX2. if (cpuInfo->hasFeature(CpuInfo::kX86FeatureAVX)) if (regs.ebx & 0x00000020U) cpuInfo->addFeature(CpuInfo::kX86FeatureAVX2); // Detect AVX-512+. if (regs.ebx & 0x00010000U) { // - XCR0[2:1] == 11b // XMM/YMM states need to be enabled by OS. // - XCR0[7:5] == 111b // Upper 256-bit of ZMM0-XMM15 and ZMM16-ZMM31 need to be enabled by the OS. if ((xcr0.eax & 0x000000E6U) == 0x000000E6U) { cpuInfo->addFeature(CpuInfo::kX86FeatureAVX512F); if (regs.ebx & 0x00020000U) cpuInfo->addFeature(CpuInfo::kX86FeatureAVX512DQ); if (regs.ebx & 0x00200000U) cpuInfo->addFeature(CpuInfo::kX86FeatureAVX512IFMA); if (regs.ebx & 0x04000000U) cpuInfo->addFeature(CpuInfo::kX86FeatureAVX512PF); if (regs.ebx & 0x08000000U) cpuInfo->addFeature(CpuInfo::kX86FeatureAVX512ER); if (regs.ebx & 0x10000000U) cpuInfo->addFeature(CpuInfo::kX86FeatureAVX512CD); if (regs.ebx & 0x40000000U) cpuInfo->addFeature(CpuInfo::kX86FeatureAVX512BW); if (regs.ebx & 0x80000000U) cpuInfo->addFeature(CpuInfo::kX86FeatureAVX512VL); if (regs.ecx & 0x00000002U) cpuInfo->addFeature(CpuInfo::kX86FeatureAVX512VBMI); } } } // -------------------------------------------------------------------------- // [CPUID EAX=0xD, ECX=0x0] // -------------------------------------------------------------------------- if (maxId >= 0xD && maybeMPX) { x86CallCpuId(®s, 0xD); // Both CPUID result and XCR0 has to be enabled to have support for MPX. if (((regs.eax & xcr0.eax) & 0x00000018U) == 0x00000018U) { cpuInfo->addFeature(CpuInfo::kX86FeatureMPX); } } // -------------------------------------------------------------------------- // [CPUID EAX=0x80000000...maxId] // -------------------------------------------------------------------------- // Several CPUID calls are required to get the whole branc string. It's easy // to copy one DWORD at a time instead of performing a byte copy. uint32_t* brand = reinterpret_cast(cpuInfo->_brandString); i = maxId = 0x80000000U; do { x86CallCpuId(®s, i); switch (i) { case 0x80000000U: maxId = Utils::iMin(regs.eax, 0x80000004); break; case 0x80000001U: if (regs.ecx & 0x00000001U) cpuInfo->addFeature(CpuInfo::kX86FeatureLAHF_SAHF); if (regs.ecx & 0x00000020U) cpuInfo->addFeature(CpuInfo::kX86FeatureLZCNT); if (regs.ecx & 0x00000040U) cpuInfo->addFeature(CpuInfo::kX86FeatureSSE4A); if (regs.ecx & 0x00000080U) cpuInfo->addFeature(CpuInfo::kX86FeatureMSSE); if (regs.ecx & 0x00000100U) cpuInfo->addFeature(CpuInfo::kX86FeaturePREFETCH); if (regs.ecx & 0x00200000U) cpuInfo->addFeature(CpuInfo::kX86FeatureTBM); if (regs.edx & 0x00100000U) cpuInfo->addFeature(CpuInfo::kX86FeatureNX); if (regs.edx & 0x00200000U) cpuInfo->addFeature(CpuInfo::kX86FeatureFXSR_OPT); if (regs.edx & 0x00400000U) cpuInfo->addFeature(CpuInfo::kX86FeatureMMX2); if (regs.edx & 0x08000000U) cpuInfo->addFeature(CpuInfo::kX86FeatureRDTSCP); if (regs.edx & 0x40000000U) cpuInfo->addFeature(CpuInfo::kX86Feature3DNOW2) .addFeature(CpuInfo::kX86FeatureMMX2); if (regs.edx & 0x80000000U) cpuInfo->addFeature(CpuInfo::kX86Feature3DNOW); if (cpuInfo->hasFeature(CpuInfo::kX86FeatureAVX)) { if (regs.ecx & 0x00000800U) cpuInfo->addFeature(CpuInfo::kX86FeatureXOP); if (regs.ecx & 0x00010000U) cpuInfo->addFeature(CpuInfo::kX86FeatureFMA4); } break; case 0x80000002U: case 0x80000003U: case 0x80000004U: *brand++ = regs.eax; *brand++ = regs.ebx; *brand++ = regs.ecx; *brand++ = regs.edx; break; default: // Stop the loop, additional features can be detected in the future. i = maxId; break; } } while (i++ < maxId); // Simplify CPU brand string by removing unnecessary spaces. x86SimplifyBrandString(cpuInfo->_brandString); } #endif // ASMJIT_ARCH_X86 || ASMJIT_ARCH_X64 // ============================================================================ // [asmjit::CpuInfo - Detect - HWThreadsCount] // ============================================================================ static uint32_t cpuDetectHWThreadsCount() noexcept { #if ASMJIT_OS_WINDOWS SYSTEM_INFO info; ::GetSystemInfo(&info); return info.dwNumberOfProcessors; #elif ASMJIT_OS_POSIX && defined(_SC_NPROCESSORS_ONLN) long res = ::sysconf(_SC_NPROCESSORS_ONLN); if (res <= 0) return 1; return static_cast(res); #else return 1; #endif } // ============================================================================ // [asmjit::CpuInfo - Detect] // ============================================================================ void CpuInfo::detect() noexcept { reset(); // Detect the number of hardware threads available. _hwThreadsCount = cpuDetectHWThreadsCount(); #if ASMJIT_ARCH_ARM32 || ASMJIT_ARCH_ARM64 armDetectCpuInfo(this); #endif // ASMJIT_ARCH_ARM32 || ASMJIT_ARCH_ARM64 #if ASMJIT_ARCH_X86 || ASMJIT_ARCH_X64 x86DetectCpuInfo(this); #endif // ASMJIT_ARCH_X86 || ASMJIT_ARCH_X64 } // ============================================================================ // [asmjit::CpuInfo - GetHost] // ============================================================================ struct HostCpuInfo : public CpuInfo { ASMJIT_INLINE HostCpuInfo() noexcept : CpuInfo() { detect(); } }; const CpuInfo& CpuInfo::getHost() noexcept { static HostCpuInfo host; return host; } } // asmjit namespace // [Api-End] #include "../apiend.h"