sm-ext-dhooks2/DynamicHooks/thirdparty/AsmJit/base/cpuinfo.cpp

644 lines
23 KiB
C++

// [AsmJit]
// Complete x86/x64 JIT and Remote Assembler for C++.
//
// [License]
// Zlib - See LICENSE.md file in the package.
// [Export]
#define ASMJIT_EXPORTS
// [Dependencies]
#include "../base/cpuinfo.h"
#include "../base/utils.h"
#if ASMJIT_OS_POSIX
# include <errno.h>
# include <sys/statvfs.h>
# include <sys/utsname.h>
# include <unistd.h>
#endif // ASMJIT_OS_POSIX
#if ASMJIT_ARCH_X86 || ASMJIT_ARCH_X64
# if ASMJIT_CC_MSC_GE(14, 0, 0)
# include <intrin.h> // Required by `__cpuid()` and `_xgetbv()`.
# endif // _MSC_VER >= 1400
#endif
#if ASMJIT_ARCH_ARM32 || ASMJIT_ARCH_ARM64
# if ASMJIT_OS_LINUX
# include <sys/auxv.h> // Required by `getauxval()`.
# endif
#endif
// [Api-Begin]
#include "../apibegin.h"
namespace asmjit {
// ============================================================================
// [asmjit::CpuInfo - Detect ARM & ARM64]
// ============================================================================
// ARM information has to be retrieved by the OS (this is how ARM was designed).
#if ASMJIT_ARCH_ARM32 || ASMJIT_ARCH_ARM64
#if ASMJIT_ARCH_ARM64
static void armPopulateBaseline64Features(CpuInfo* cpuInfo) noexcept {
// Thumb (including all variations) is only supported on ARM32.
// ARM64 is based on ARMv8 and newer.
cpuInfo->addFeature(CpuInfo::kArmFeatureV6);
cpuInfo->addFeature(CpuInfo::kArmFeatureV7);
cpuInfo->addFeature(CpuInfo::kArmFeatureV8);
// ARM64 comes with these features by default.
cpuInfo->addFeature(CpuInfo::kArmFeatureDSP);
cpuInfo->addFeature(CpuInfo::kArmFeatureIDIV);
cpuInfo->addFeature(CpuInfo::kArmFeatureVFP2);
cpuInfo->addFeature(CpuInfo::kArmFeatureVFP3);
cpuInfo->addFeature(CpuInfo::kArmFeatureVFP4);
}
#endif // ASMJIT_ARCH_ARM64
#if ASMJIT_OS_WINDOWS
//! \internal
//!
//! Detect ARM CPU features on Windows.
//!
//! The detection is based on `IsProcessorFeaturePresent()` API call.
static void armDetectCpuInfoOnWindows(CpuInfo* cpuInfo) noexcept {
#if ASMJIT_ARCH_ARM32
cpuInfo->setArch(kArchArm32);
// Windows for ARM requires at least ARMv7 with DSP extensions.
cpuInfo->addFeature(CpuInfo::kArmFeatureV6);
cpuInfo->addFeature(CpuInfo::kArmFeatureV7);
cpuInfo->addFeature(CpuInfo::kArmFeatureDSP);
// Windows for ARM requires VFP3.
cpuInfo->addFeature(CpuInfo::kArmFeatureVFP2);
cpuInfo->addFeature(CpuInfo::kArmFeatureVFP3);
// Windows for ARM requires and uses THUMB2.
cpuInfo->addFeature(CpuInfo::kArmFeatureTHUMB);
cpuInfo->addFeature(CpuInfo::kArmFeatureTHUMB2);
#else
cpuInfo->setArch(kArchArm64);
armPopulateBaseline64Features(cpuInfo);
#endif
// Windows for ARM requires NEON.
cpuInfo->addFeature(CpuInfo::kArmFeatureNEON);
// Detect additional CPU features by calling `IsProcessorFeaturePresent()`.
struct WinPFPMapping {
uint32_t pfpId, featureId;
};
static const WinPFPMapping mapping[] = {
{ PF_ARM_FMAC_INSTRUCTIONS_AVAILABLE , CpuInfo::kArmFeatureVFP4 },
{ PF_ARM_VFP_32_REGISTERS_AVAILABLE , CpuInfo::kArmFeatureVFP_D32 },
{ PF_ARM_DIVIDE_INSTRUCTION_AVAILABLE, CpuInfo::kArmFeatureIDIV },
{ PF_ARM_64BIT_LOADSTORE_ATOMIC , CpuInfo::kArmFeatureAtomics64 }
};
for (uint32_t i = 0; i < ASMJIT_ARRAY_SIZE(mapping); i++)
if (::IsProcessorFeaturePresent(mapping[i].pfpId))
cpuInfo->addFeature(mapping[i].featureId);
}
#endif // ASMJIT_OS_WINDOWS
#if ASMJIT_OS_LINUX
struct LinuxHWCapMapping {
uint32_t hwcapMask, featureId;
};
static void armDetectHWCaps(CpuInfo* cpuInfo,
unsigned long type, const LinuxHWCapMapping* mapping, size_t length) noexcept {
unsigned long mask = getauxval(type);
for (size_t i = 0; i < length; i++)
if ((mask & mapping[i].hwcapMask) == mapping[i].hwcapMask)
cpuInfo->addFeature(mapping[i].featureId);
}
//! \internal
//!
//! Detect ARM CPU features on Linux.
//!
//! The detection is based on `getauxval()`.
static void armDetectCpuInfoOnLinux(CpuInfo* cpuInfo) noexcept {
#if ASMJIT_ARCH_ARM32
cpuInfo->setArch(kArchArm32);
// `AT_HWCAP` provides ARMv7 (and less) related flags.
static const LinuxHWCapMapping hwCapMapping[] = {
{ /* HWCAP_VFPv3 */ (1 << 13), CpuInfo::kArmFeatureVFP3 },
{ /* HWCAP_VFPv4 */ (1 << 16), CpuInfo::kArmFeatureVFP4 },
{ /* HWCAP_IDIVA */ (3 << 17), CpuInfo::kArmFeatureIDIV },
{ /* HWCAP_VFPD32 */ (1 << 19), CpuInfo::kArmFeatureVFP_D32 },
{ /* HWCAP_NEON */ (1 << 12), CpuInfo::kArmFeatureNEON },
{ /* HWCAP_EDSP */ (1 << 7), CpuInfo::kArmFeatureDSP }
};
armDetectHWCaps(cpuInfo, AT_HWCAP, hwCapMapping, ASMJIT_ARRAY_SIZE(hwCapMapping));
// VFP3 implies VFP2.
if (cpuInfo->hasFeature(CpuInfo::kArmFeatureVFP3))
cpuInfo->addFeature(CpuInfo::kArmFeatureVFP2);
// VFP2 implies ARMv6.
if (cpuInfo->hasFeature(CpuInfo::kArmFeatureVFP2))
cpuInfo->addFeature(CpuInfo::kArmFeatureV6);
// VFP3 or NEON implies ARMv7.
if (cpuInfo->hasFeature(CpuInfo::kArmFeatureVFP3) ||
cpuInfo->hasFeature(CpuInfo::kArmFeatureNEON))
cpuInfo->addFeature(CpuInfo::kArmFeatureV7);
// `AT_HWCAP2` provides ARMv8 related flags.
static const LinuxHWCapMapping hwCap2Mapping[] = {
{ /* HWCAP2_AES */ (1 << 0), CpuInfo::kArmFeatureAES },
{ /* HWCAP2_CRC32 */ (1 << 4), CpuInfo::kArmFeatureCRC32 },
{ /* HWCAP2_PMULL */ (1 << 1), CpuInfo::kArmFeaturePMULL },
{ /* HWCAP2_SHA1 */ (1 << 2), CpuInfo::kArmFeatureSHA1 },
{ /* HWCAP2_SHA2 */ (1 << 3), CpuInfo::kArmFeatureSHA256 }
};
armDetectHWCaps(cpuInfo, AT_HWCAP2, hwCap2Mapping, ASMJIT_ARRAY_SIZE(hwCap2Mapping));
if (cpuInfo->hasFeature(CpuInfo::kArmFeatureAES ) ||
cpuInfo->hasFeature(CpuInfo::kArmFeatureCRC32 ) ||
cpuInfo->hasFeature(CpuInfo::kArmFeaturePMULL ) ||
cpuInfo->hasFeature(CpuInfo::kArmFeatureSHA1 ) ||
cpuInfo->hasFeature(CpuInfo::kArmFeatureSHA256)) {
cpuInfo->addFeature(CpuInfo::kArmFeatureV8);
}
#else
cpuInfo->setArch(kArchArm64);
armPopulateBaseline64Features(cpuInfo);
// `AT_HWCAP` provides ARMv8 related flags.
static const LinuxHWCapMapping hwCapMapping[] = {
{ /* HWCAP_ASIMD */ (1 << 1), CpuInfo::kArmFeatureNEON },
{ /* HWCAP_AES */ (1 << 3), CpuInfo::kArmFeatureAES },
{ /* HWCAP_CRC32 */ (1 << 7), CpuInfo::kArmFeatureCRC32 },
{ /* HWCAP_PMULL */ (1 << 4), CpuInfo::kArmFeaturePMULL },
{ /* HWCAP_SHA1 */ (1 << 5), CpuInfo::kArmFeatureSHA1 },
{ /* HWCAP_SHA2 */ (1 << 6), CpuInfo::kArmFeatureSHA256 }
{ /* HWCAP_ATOMICS */ (1 << 8), CpuInfo::kArmFeatureAtomics64 }
};
armDetectHWCaps(cpuInfo, AT_HWCAP, hwCapMapping, ASMJIT_ARRAY_SIZE(hwCapMapping));
// `AT_HWCAP2` is not used at the moment.
#endif
}
#endif // ASMJIT_OS_LINUX
static void armDetectCpuInfo(CpuInfo* cpuInfo) noexcept {
#if ASMJIT_OS_WINDOWS
armDetectCpuInfoOnWindows(cpuInfo);
#elif ASMJIT_OS_LINUX
armDetectCpuInfoOnLinux(cpuInfo);
#else
# error "[asmjit] armDetectCpuInfo() - Unsupported OS."
#endif
}
#endif // ASMJIT_ARCH_ARM32 || ASMJIT_ARCH_ARM64
// ============================================================================
// [asmjit::CpuInfo - Detect X86 & X64]
// ============================================================================
#if ASMJIT_ARCH_X86 || ASMJIT_ARCH_X64
//! \internal
//!
//! X86 CPUID result.
struct CpuIdResult {
uint32_t eax, ebx, ecx, edx;
};
//! \internal
//!
//! Content of XCR register, result of XGETBV instruction.
struct XGetBVResult {
uint32_t eax, edx;
};
#if ASMJIT_CC_MSC && !ASMJIT_CC_MSC_GE(15, 0, 30729) && ASMJIT_ARCH_X64
//! \internal
//!
//! HACK: VS2008 or less, 64-bit mode - `__cpuidex` doesn't exist! However,
//! 64-bit calling convention specifies the first parameter to be passed in
//! ECX, so we may be lucky if compiler doesn't move the register, otherwise
//! the result would be wrong.
static void ASMJIT_NOINLINE void x86CallCpuIdWorkaround(uint32_t inEcx, uint32_t inEax, CpuIdResult* result) noexcept {
__cpuid(reinterpret_cast<int*>(result), inEax);
}
#endif
//! \internal
//!
//! Wrapper to call `cpuid` instruction.
static void ASMJIT_INLINE x86CallCpuId(CpuIdResult* result, uint32_t inEax, uint32_t inEcx = 0) noexcept {
#if ASMJIT_CC_MSC && ASMJIT_CC_MSC_GE(15, 0, 30729)
__cpuidex(reinterpret_cast<int*>(result), inEax, inEcx);
#elif ASMJIT_CC_MSC && ASMJIT_ARCH_X64
x86CallCpuIdWorkaround(inEcx, inEax, result);
#elif ASMJIT_CC_MSC && ASMJIT_ARCH_X86
uint32_t paramEax = inEax;
uint32_t paramEcx = inEcx;
uint32_t* out = reinterpret_cast<uint32_t*>(result);
__asm {
mov eax, paramEax
mov ecx, paramEcx
mov edi, out
cpuid
mov dword ptr[edi + 0], eax
mov dword ptr[edi + 4], ebx
mov dword ptr[edi + 8], ecx
mov dword ptr[edi + 12], edx
}
#elif (ASMJIT_CC_GCC || ASMJIT_CC_CLANG) && ASMJIT_ARCH_X86
__asm__ __volatile__(
"mov %%ebx, %%edi\n"
"cpuid\n"
"xchg %%edi, %%ebx\n"
: "=a"(result->eax),
"=D"(result->ebx),
"=c"(result->ecx),
"=d"(result->edx)
: "a"(inEax),
"c"(inEcx)
);
#elif (ASMJIT_CC_GCC || ASMJIT_CC_CLANG) && ASMJIT_ARCH_X64
__asm__ __volatile__( \
"mov %%rbx, %%rdi\n"
"cpuid\n"
"xchg %%rdi, %%rbx\n"
: "=a"(result->eax),
"=D"(result->ebx),
"=c"(result->ecx),
"=d"(result->edx)
: "a"(inEax),
"c"(inEcx)
);
#else
# error "[asmjit] x86CallCpuid() - Unsupported compiler."
#endif
}
//! \internal
//!
//! Wrapper to call `xgetbv` instruction.
static void x86CallXGetBV(XGetBVResult* result, uint32_t inEcx) noexcept {
#if ASMJIT_CC_MSC_GE(16, 0, 40219) // 2010SP1+
uint64_t value = _xgetbv(inEcx);
result->eax = static_cast<uint32_t>(value & 0xFFFFFFFFU);
result->edx = static_cast<uint32_t>(value >> 32);
#elif ASMJIT_CC_GCC || ASMJIT_CC_CLANG
uint32_t outEax;
uint32_t outEdx;
// Replaced, because the world is not perfect:
// __asm__ __volatile__("xgetbv" : "=a"(outEax), "=d"(outEdx) : "c"(inEcx));
__asm__ __volatile__(".byte 0x0F, 0x01, 0xd0" : "=a"(outEax), "=d"(outEdx) : "c"(inEcx));
result->eax = outEax;
result->edx = outEdx;
#else
result->eax = 0;
result->edx = 0;
#endif
}
//! \internal
//!
//! Map a 12-byte vendor string returned by `cpuid` into a `CpuInfo::Vendor` ID.
static uint32_t x86GetCpuVendorID(const char* vendorString) noexcept {
struct VendorData {
uint32_t id;
char text[12];
};
static const VendorData vendorList[] = {
{ CpuInfo::kVendorIntel , { 'G', 'e', 'n', 'u', 'i', 'n', 'e', 'I', 'n', 't', 'e', 'l' } },
{ CpuInfo::kVendorAMD , { 'A', 'u', 't', 'h', 'e', 'n', 't', 'i', 'c', 'A', 'M', 'D' } },
{ CpuInfo::kVendorVIA , { 'V', 'I', 'A', 0 , 'V', 'I', 'A', 0 , 'V', 'I', 'A', 0 } },
{ CpuInfo::kVendorVIA , { 'C', 'e', 'n', 't', 'a', 'u', 'r', 'H', 'a', 'u', 'l', 's' } }
};
uint32_t dw0 = reinterpret_cast<const uint32_t*>(vendorString)[0];
uint32_t dw1 = reinterpret_cast<const uint32_t*>(vendorString)[1];
uint32_t dw2 = reinterpret_cast<const uint32_t*>(vendorString)[2];
for (uint32_t i = 0; i < ASMJIT_ARRAY_SIZE(vendorList); i++) {
if (dw0 == reinterpret_cast<const uint32_t*>(vendorList[i].text)[0] &&
dw1 == reinterpret_cast<const uint32_t*>(vendorList[i].text)[1] &&
dw2 == reinterpret_cast<const uint32_t*>(vendorList[i].text)[2])
return vendorList[i].id;
}
return CpuInfo::kVendorNone;
}
static ASMJIT_INLINE void x86SimplifyBrandString(char* s) noexcept {
// Used to always clear the current character to ensure that the result
// doesn't contain garbage after the new zero terminator.
char* d = s;
char prev = 0;
char curr = s[0];
s[0] = '\0';
for (;;) {
if (curr == 0)
break;
if (curr == ' ') {
if (prev == '@' || s[1] == ' ' || s[1] == '@')
goto L_Skip;
}
d[0] = curr;
d++;
prev = curr;
L_Skip:
curr = *++s;
s[0] = '\0';
}
d[0] = '\0';
}
static void x86DetectCpuInfo(CpuInfo* cpuInfo) noexcept {
uint32_t i, maxId;
CpuIdResult regs;
XGetBVResult xcr0 = { 0, 0 };
// Architecture is known at compile-time.
cpuInfo->setArch(ASMJIT_ARCH_X86 ? kArchX86 : kArchX64);
// --------------------------------------------------------------------------
// [CPUID EAX=0x0]
// --------------------------------------------------------------------------
// Get vendor string/id.
x86CallCpuId(&regs, 0x0);
maxId = regs.eax;
::memcpy(cpuInfo->_vendorString + 0, &regs.ebx, 4);
::memcpy(cpuInfo->_vendorString + 4, &regs.edx, 4);
::memcpy(cpuInfo->_vendorString + 8, &regs.ecx, 4);
cpuInfo->_vendorId = x86GetCpuVendorID(cpuInfo->_vendorString);
// --------------------------------------------------------------------------
// [CPUID EAX=0x1]
// --------------------------------------------------------------------------
if (maxId >= 0x1) {
// Get feature flags in ECX/EDX and family/model in EAX.
x86CallCpuId(&regs, 0x1);
// Fill family and model fields.
cpuInfo->_family = (regs.eax >> 8) & 0x0F;
cpuInfo->_model = (regs.eax >> 4) & 0x0F;
cpuInfo->_stepping = (regs.eax ) & 0x0F;
// Use extended family and model fields.
if (cpuInfo->_family == 0x0F) {
cpuInfo->_family += ((regs.eax >> 20) & 0xFF);
cpuInfo->_model += ((regs.eax >> 16) & 0x0F) << 4;
}
cpuInfo->_x86Data._processorType = ((regs.eax >> 12) & 0x03);
cpuInfo->_x86Data._brandIndex = ((regs.ebx ) & 0xFF);
cpuInfo->_x86Data._flushCacheLineSize = ((regs.ebx >> 8) & 0xFF) * 8;
cpuInfo->_x86Data._maxLogicalProcessors = ((regs.ebx >> 16) & 0xFF);
if (regs.ecx & 0x00000001U) cpuInfo->addFeature(CpuInfo::kX86FeatureSSE3);
if (regs.ecx & 0x00000002U) cpuInfo->addFeature(CpuInfo::kX86FeaturePCLMULQDQ);
if (regs.ecx & 0x00000008U) cpuInfo->addFeature(CpuInfo::kX86FeatureMONITOR);
if (regs.ecx & 0x00000200U) cpuInfo->addFeature(CpuInfo::kX86FeatureSSSE3);
if (regs.ecx & 0x00002000U) cpuInfo->addFeature(CpuInfo::kX86FeatureCMPXCHG16B);
if (regs.ecx & 0x00080000U) cpuInfo->addFeature(CpuInfo::kX86FeatureSSE4_1);
if (regs.ecx & 0x00100000U) cpuInfo->addFeature(CpuInfo::kX86FeatureSSE4_2);
if (regs.ecx & 0x00400000U) cpuInfo->addFeature(CpuInfo::kX86FeatureMOVBE);
if (regs.ecx & 0x00800000U) cpuInfo->addFeature(CpuInfo::kX86FeaturePOPCNT);
if (regs.ecx & 0x02000000U) cpuInfo->addFeature(CpuInfo::kX86FeatureAESNI);
if (regs.ecx & 0x04000000U) cpuInfo->addFeature(CpuInfo::kX86FeatureXSAVE);
if (regs.ecx & 0x08000000U) cpuInfo->addFeature(CpuInfo::kX86FeatureXSAVE_OS);
if (regs.ecx & 0x40000000U) cpuInfo->addFeature(CpuInfo::kX86FeatureRDRAND);
if (regs.edx & 0x00000010U) cpuInfo->addFeature(CpuInfo::kX86FeatureRDTSC);
if (regs.edx & 0x00000100U) cpuInfo->addFeature(CpuInfo::kX86FeatureCMPXCHG8B);
if (regs.edx & 0x00008000U) cpuInfo->addFeature(CpuInfo::kX86FeatureCMOV);
if (regs.edx & 0x00080000U) cpuInfo->addFeature(CpuInfo::kX86FeatureCLFLUSH);
if (regs.edx & 0x00800000U) cpuInfo->addFeature(CpuInfo::kX86FeatureMMX);
if (regs.edx & 0x01000000U) cpuInfo->addFeature(CpuInfo::kX86FeatureFXSR);
if (regs.edx & 0x02000000U) cpuInfo->addFeature(CpuInfo::kX86FeatureSSE)
.addFeature(CpuInfo::kX86FeatureMMX2);
if (regs.edx & 0x04000000U) cpuInfo->addFeature(CpuInfo::kX86FeatureSSE)
.addFeature(CpuInfo::kX86FeatureSSE2);
if (regs.edx & 0x10000000U) cpuInfo->addFeature(CpuInfo::kX86FeatureMT);
// AMD sets multi-threading ON if it has two or more cores.
if (cpuInfo->_hwThreadsCount == 1 && cpuInfo->_vendorId == CpuInfo::kVendorAMD && (regs.edx & 0x10000000U))
cpuInfo->_hwThreadsCount = 2;
// Get the content of XCR0 if supported by CPU and enabled by OS.
if ((regs.ecx & 0x0C000000U) == 0x0C000000U)
x86CallXGetBV(&xcr0, 0);
// Detect AVX+.
if (regs.ecx & 0x10000000U) {
// - XCR0[2:1] == 11b
// XMM & YMM states need to be enabled by OS.
if ((xcr0.eax & 0x00000006U) == 0x00000006U) {
cpuInfo->addFeature(CpuInfo::kX86FeatureAVX);
if (regs.ecx & 0x00004000U) cpuInfo->addFeature(CpuInfo::kX86FeatureFMA3);
if (regs.ecx & 0x20000000U) cpuInfo->addFeature(CpuInfo::kX86FeatureF16C);
}
}
}
// --------------------------------------------------------------------------
// [CPUID EAX=0x7 ECX=0x0]
// --------------------------------------------------------------------------
// Detect new features if the processor supports CPUID-07.
bool maybeMPX = false;
if (maxId >= 0x7) {
x86CallCpuId(&regs, 0x7);
if (regs.ebx & 0x00000001U) cpuInfo->addFeature(CpuInfo::kX86FeatureFSGSBASE);
if (regs.ebx & 0x00000008U) cpuInfo->addFeature(CpuInfo::kX86FeatureBMI);
if (regs.ebx & 0x00000010U) cpuInfo->addFeature(CpuInfo::kX86FeatureHLE);
if (regs.ebx & 0x00000080U) cpuInfo->addFeature(CpuInfo::kX86FeatureSMEP);
if (regs.ebx & 0x00000100U) cpuInfo->addFeature(CpuInfo::kX86FeatureBMI2);
if (regs.ebx & 0x00000200U) cpuInfo->addFeature(CpuInfo::kX86FeatureERMS);
if (regs.ebx & 0x00000800U) cpuInfo->addFeature(CpuInfo::kX86FeatureRTM);
if (regs.ebx & 0x00004000U) maybeMPX = true;
if (regs.ebx & 0x00040000U) cpuInfo->addFeature(CpuInfo::kX86FeatureRDSEED);
if (regs.ebx & 0x00080000U) cpuInfo->addFeature(CpuInfo::kX86FeatureADX);
if (regs.ebx & 0x00100000U) cpuInfo->addFeature(CpuInfo::kX86FeatureSMAP);
if (regs.ebx & 0x00400000U) cpuInfo->addFeature(CpuInfo::kX86FeaturePCOMMIT);
if (regs.ebx & 0x00800000U) cpuInfo->addFeature(CpuInfo::kX86FeatureCLFLUSH_OPT);
if (regs.ebx & 0x01000000U) cpuInfo->addFeature(CpuInfo::kX86FeatureCLWB);
if (regs.ebx & 0x20000000U) cpuInfo->addFeature(CpuInfo::kX86FeatureSHA);
if (regs.ecx & 0x00000001U) cpuInfo->addFeature(CpuInfo::kX86FeaturePREFETCHWT1);
// Detect AVX2.
if (cpuInfo->hasFeature(CpuInfo::kX86FeatureAVX))
if (regs.ebx & 0x00000020U) cpuInfo->addFeature(CpuInfo::kX86FeatureAVX2);
// Detect AVX-512+.
if (regs.ebx & 0x00010000U) {
// - XCR0[2:1] == 11b
// XMM/YMM states need to be enabled by OS.
// - XCR0[7:5] == 111b
// Upper 256-bit of ZMM0-XMM15 and ZMM16-ZMM31 need to be enabled by the OS.
if ((xcr0.eax & 0x000000E6U) == 0x000000E6U) {
cpuInfo->addFeature(CpuInfo::kX86FeatureAVX512F);
if (regs.ebx & 0x00020000U) cpuInfo->addFeature(CpuInfo::kX86FeatureAVX512DQ);
if (regs.ebx & 0x00200000U) cpuInfo->addFeature(CpuInfo::kX86FeatureAVX512IFMA);
if (regs.ebx & 0x04000000U) cpuInfo->addFeature(CpuInfo::kX86FeatureAVX512PF);
if (regs.ebx & 0x08000000U) cpuInfo->addFeature(CpuInfo::kX86FeatureAVX512ER);
if (regs.ebx & 0x10000000U) cpuInfo->addFeature(CpuInfo::kX86FeatureAVX512CD);
if (regs.ebx & 0x40000000U) cpuInfo->addFeature(CpuInfo::kX86FeatureAVX512BW);
if (regs.ebx & 0x80000000U) cpuInfo->addFeature(CpuInfo::kX86FeatureAVX512VL);
if (regs.ecx & 0x00000002U) cpuInfo->addFeature(CpuInfo::kX86FeatureAVX512VBMI);
}
}
}
// --------------------------------------------------------------------------
// [CPUID EAX=0xD, ECX=0x0]
// --------------------------------------------------------------------------
if (maxId >= 0xD && maybeMPX) {
x86CallCpuId(&regs, 0xD);
// Both CPUID result and XCR0 has to be enabled to have support for MPX.
if (((regs.eax & xcr0.eax) & 0x00000018U) == 0x00000018U) {
cpuInfo->addFeature(CpuInfo::kX86FeatureMPX);
}
}
// --------------------------------------------------------------------------
// [CPUID EAX=0x80000000...maxId]
// --------------------------------------------------------------------------
// Several CPUID calls are required to get the whole branc string. It's easy
// to copy one DWORD at a time instead of performing a byte copy.
uint32_t* brand = reinterpret_cast<uint32_t*>(cpuInfo->_brandString);
i = maxId = 0x80000000U;
do {
x86CallCpuId(&regs, i);
switch (i) {
case 0x80000000U:
maxId = Utils::iMin<uint32_t>(regs.eax, 0x80000004);
break;
case 0x80000001U:
if (regs.ecx & 0x00000001U) cpuInfo->addFeature(CpuInfo::kX86FeatureLAHF_SAHF);
if (regs.ecx & 0x00000020U) cpuInfo->addFeature(CpuInfo::kX86FeatureLZCNT);
if (regs.ecx & 0x00000040U) cpuInfo->addFeature(CpuInfo::kX86FeatureSSE4A);
if (regs.ecx & 0x00000080U) cpuInfo->addFeature(CpuInfo::kX86FeatureMSSE);
if (regs.ecx & 0x00000100U) cpuInfo->addFeature(CpuInfo::kX86FeaturePREFETCH);
if (regs.ecx & 0x00200000U) cpuInfo->addFeature(CpuInfo::kX86FeatureTBM);
if (regs.edx & 0x00100000U) cpuInfo->addFeature(CpuInfo::kX86FeatureNX);
if (regs.edx & 0x00200000U) cpuInfo->addFeature(CpuInfo::kX86FeatureFXSR_OPT);
if (regs.edx & 0x00400000U) cpuInfo->addFeature(CpuInfo::kX86FeatureMMX2);
if (regs.edx & 0x08000000U) cpuInfo->addFeature(CpuInfo::kX86FeatureRDTSCP);
if (regs.edx & 0x40000000U) cpuInfo->addFeature(CpuInfo::kX86Feature3DNOW2)
.addFeature(CpuInfo::kX86FeatureMMX2);
if (regs.edx & 0x80000000U) cpuInfo->addFeature(CpuInfo::kX86Feature3DNOW);
if (cpuInfo->hasFeature(CpuInfo::kX86FeatureAVX)) {
if (regs.ecx & 0x00000800U) cpuInfo->addFeature(CpuInfo::kX86FeatureXOP);
if (regs.ecx & 0x00010000U) cpuInfo->addFeature(CpuInfo::kX86FeatureFMA4);
}
break;
case 0x80000002U:
case 0x80000003U:
case 0x80000004U:
*brand++ = regs.eax;
*brand++ = regs.ebx;
*brand++ = regs.ecx;
*brand++ = regs.edx;
break;
default:
// Stop the loop, additional features can be detected in the future.
i = maxId;
break;
}
} while (i++ < maxId);
// Simplify CPU brand string by removing unnecessary spaces.
x86SimplifyBrandString(cpuInfo->_brandString);
}
#endif // ASMJIT_ARCH_X86 || ASMJIT_ARCH_X64
// ============================================================================
// [asmjit::CpuInfo - Detect - HWThreadsCount]
// ============================================================================
static uint32_t cpuDetectHWThreadsCount() noexcept {
#if ASMJIT_OS_WINDOWS
SYSTEM_INFO info;
::GetSystemInfo(&info);
return info.dwNumberOfProcessors;
#elif ASMJIT_OS_POSIX && defined(_SC_NPROCESSORS_ONLN)
long res = ::sysconf(_SC_NPROCESSORS_ONLN);
if (res <= 0) return 1;
return static_cast<uint32_t>(res);
#else
return 1;
#endif
}
// ============================================================================
// [asmjit::CpuInfo - Detect]
// ============================================================================
void CpuInfo::detect() noexcept {
reset();
// Detect the number of hardware threads available.
_hwThreadsCount = cpuDetectHWThreadsCount();
#if ASMJIT_ARCH_ARM32 || ASMJIT_ARCH_ARM64
armDetectCpuInfo(this);
#endif // ASMJIT_ARCH_ARM32 || ASMJIT_ARCH_ARM64
#if ASMJIT_ARCH_X86 || ASMJIT_ARCH_X64
x86DetectCpuInfo(this);
#endif // ASMJIT_ARCH_X86 || ASMJIT_ARCH_X64
}
// ============================================================================
// [asmjit::CpuInfo - GetHost]
// ============================================================================
struct HostCpuInfo : public CpuInfo {
ASMJIT_INLINE HostCpuInfo() noexcept : CpuInfo() { detect(); }
};
const CpuInfo& CpuInfo::getHost() noexcept {
static HostCpuInfo host;
return host;
}
} // asmjit namespace
// [Api-End]
#include "../apiend.h"