// This file is part of AsmJit project // // See asmjit.h or LICENSE.md for license and copyright information // SPDX-License-Identifier: Zlib #include "../core/api-build_p.h" #include "../core/cpuinfo.h" #include "../core/support.h" #if !defined(_WIN32) #include #include #include #endif // Required by `getauxval()` on Linux. #if defined(__linux__) #include #endif //! Required to detect CPU and features on Apple platforms. #if defined(__APPLE__) #include #include #include #endif // Required by `__cpuidex()` and `_xgetbv()`. #if defined(_MSC_VER) #include #endif ASMJIT_BEGIN_NAMESPACE // CpuInfo - Detect - HW-Thread Count // ================================== #if defined(_WIN32) static inline uint32_t detectHWThreadCount() noexcept { SYSTEM_INFO info; ::GetSystemInfo(&info); return info.dwNumberOfProcessors; } #elif defined(_SC_NPROCESSORS_ONLN) static inline uint32_t detectHWThreadCount() noexcept { long res = ::sysconf(_SC_NPROCESSORS_ONLN); return res <= 0 ? uint32_t(1) : uint32_t(res); } #else static inline uint32_t detectHWThreadCount() noexcept { return 1; } #endif // CpuInfo - Detect - X86 // ====================== #if ASMJIT_ARCH_X86 struct cpuid_t { uint32_t eax, ebx, ecx, edx; }; struct xgetbv_t { uint32_t eax, edx; }; // Executes `cpuid` instruction. static inline void cpuidQuery(cpuid_t* out, uint32_t inEax, uint32_t inEcx = 0) noexcept { #if defined(_MSC_VER) __cpuidex(reinterpret_cast(out), inEax, inEcx); #elif defined(__GNUC__) && ASMJIT_ARCH_X86 == 32 __asm__ __volatile__( "mov %%ebx, %%edi\n" "cpuid\n" "xchg %%edi, %%ebx\n" : "=a"(out->eax), "=D"(out->ebx), "=c"(out->ecx), "=d"(out->edx) : "a"(inEax), "c"(inEcx)); #elif defined(__GNUC__) && ASMJIT_ARCH_X86 == 64 __asm__ __volatile__( "mov %%rbx, %%rdi\n" "cpuid\n" "xchg %%rdi, %%rbx\n" : "=a"(out->eax), "=D"(out->ebx), "=c"(out->ecx), "=d"(out->edx) : "a"(inEax), "c"(inEcx)); #else #error "[asmjit] x86::cpuidQuery() - Unsupported compiler." #endif } // Executes 'xgetbv' instruction. static inline void xgetbvQuery(xgetbv_t* out, uint32_t inEcx) noexcept { #if defined(_MSC_VER) uint64_t value = _xgetbv(inEcx); out->eax = uint32_t(value & 0xFFFFFFFFu); out->edx = uint32_t(value >> 32); #elif defined(__GNUC__) uint32_t outEax; uint32_t outEdx; // Replaced, because the world is not perfect: // __asm__ __volatile__("xgetbv" : "=a"(outEax), "=d"(outEdx) : "c"(inEcx)); __asm__ __volatile__(".byte 0x0F, 0x01, 0xD0" : "=a"(outEax), "=d"(outEdx) : "c"(inEcx)); out->eax = outEax; out->edx = outEdx; #else out->eax = 0; out->edx = 0; #endif } // Map a 12-byte vendor string returned by `cpuid` into a `CpuInfo::Vendor` ID. static inline void simplifyCpuVendor(CpuInfo& cpu, uint32_t d0, uint32_t d1, uint32_t d2) noexcept { struct Vendor { char normalized[8]; union { char text[12]; uint32_t d[3]; }; }; static const Vendor table[] = { { { 'A', 'M', 'D' }, {{ 'A', 'u', 't', 'h', 'e', 'n', 't', 'i', 'c', 'A', 'M', 'D' }} }, { { 'I', 'N', 'T', 'E', 'L' }, {{ 'G', 'e', 'n', 'u', 'i', 'n', 'e', 'I', 'n', 't', 'e', 'l' }} }, { { 'V', 'I', 'A' }, {{ 'C', 'e', 'n', 't', 'a', 'u', 'r', 'H', 'a', 'u', 'l', 's' }} }, { { 'V', 'I', 'A' }, {{ 'V', 'I', 'A', 0 , 'V', 'I', 'A', 0 , 'V', 'I', 'A', 0 }} }, { { 'U', 'N', 'K', 'N', 'O', 'W', 'N' }, {{ 0 }} } }; uint32_t i; for (i = 0; i < ASMJIT_ARRAY_SIZE(table) - 1; i++) if (table[i].d[0] == d0 && table[i].d[1] == d1 && table[i].d[2] == d2) break; memcpy(cpu._vendor.str, table[i].normalized, 8); } static ASMJIT_FAVOR_SIZE void simplifyCpuBrand(char* s) noexcept { char* d = s; char c = s[0]; char prev = 0; // Used to always clear the current character to ensure that the result // doesn't contain garbage after a new null terminator is placed at the end. s[0] = '\0'; for (;;) { if (!c) break; if (!(c == ' ' && (prev == '@' || s[1] == ' ' || s[1] == '@'))) { *d++ = c; prev = c; } c = *++s; s[0] = '\0'; } d[0] = '\0'; } static ASMJIT_FAVOR_SIZE void detectX86Cpu(CpuInfo& cpu) noexcept { using Support::bitTest; cpuid_t regs; xgetbv_t xcr0 { 0, 0 }; CpuFeatures::X86& features = cpu.features().x86(); cpu._wasDetected = true; cpu._maxLogicalProcessors = 1; // We are gonna execute CPUID, which was introduced by I486, so it's the requirement. features.add(CpuFeatures::X86::kI486); // CPUID EAX=0 // ----------- // Get vendor string/id. cpuidQuery(®s, 0x0); uint32_t maxId = regs.eax; uint32_t maxSubLeafId_0x7 = 0; simplifyCpuVendor(cpu, regs.ebx, regs.edx, regs.ecx); // CPUID EAX=1 // ----------- if (maxId >= 0x1) { // Get feature flags in ECX/EDX and family/model in EAX. cpuidQuery(®s, 0x1); // Fill family and model fields. uint32_t modelId = (regs.eax >> 4) & 0x0F; uint32_t familyId = (regs.eax >> 8) & 0x0F; // Use extended family and model fields. if (familyId == 0x06u || familyId == 0x0Fu) modelId += (((regs.eax >> 16) & 0x0Fu) << 4); if (familyId == 0x0Fu) familyId += ((regs.eax >> 20) & 0xFFu); cpu._modelId = modelId; cpu._familyId = familyId; cpu._brandId = ((regs.ebx ) & 0xFF); cpu._processorType = ((regs.eax >> 12) & 0x03); cpu._maxLogicalProcessors = ((regs.ebx >> 16) & 0xFF); cpu._stepping = ((regs.eax ) & 0x0F); cpu._cacheLineSize = ((regs.ebx >> 8) & 0xFF) * 8; features.addIf(bitTest(regs.ecx, 0), CpuFeatures::X86::kSSE3); features.addIf(bitTest(regs.ecx, 1), CpuFeatures::X86::kPCLMULQDQ); features.addIf(bitTest(regs.ecx, 3), CpuFeatures::X86::kMONITOR); features.addIf(bitTest(regs.ecx, 5), CpuFeatures::X86::kVMX); features.addIf(bitTest(regs.ecx, 6), CpuFeatures::X86::kSMX); features.addIf(bitTest(regs.ecx, 9), CpuFeatures::X86::kSSSE3); features.addIf(bitTest(regs.ecx, 13), CpuFeatures::X86::kCMPXCHG16B); features.addIf(bitTest(regs.ecx, 19), CpuFeatures::X86::kSSE4_1); features.addIf(bitTest(regs.ecx, 20), CpuFeatures::X86::kSSE4_2); features.addIf(bitTest(regs.ecx, 22), CpuFeatures::X86::kMOVBE); features.addIf(bitTest(regs.ecx, 23), CpuFeatures::X86::kPOPCNT); features.addIf(bitTest(regs.ecx, 25), CpuFeatures::X86::kAESNI); features.addIf(bitTest(regs.ecx, 26), CpuFeatures::X86::kXSAVE); features.addIf(bitTest(regs.ecx, 27), CpuFeatures::X86::kOSXSAVE); features.addIf(bitTest(regs.ecx, 30), CpuFeatures::X86::kRDRAND); features.addIf(bitTest(regs.edx, 0), CpuFeatures::X86::kFPU); features.addIf(bitTest(regs.edx, 4), CpuFeatures::X86::kRDTSC); features.addIf(bitTest(regs.edx, 5), CpuFeatures::X86::kMSR); features.addIf(bitTest(regs.edx, 8), CpuFeatures::X86::kCMPXCHG8B); features.addIf(bitTest(regs.edx, 15), CpuFeatures::X86::kCMOV); features.addIf(bitTest(regs.edx, 19), CpuFeatures::X86::kCLFLUSH); features.addIf(bitTest(regs.edx, 23), CpuFeatures::X86::kMMX); features.addIf(bitTest(regs.edx, 24), CpuFeatures::X86::kFXSR); features.addIf(bitTest(regs.edx, 25), CpuFeatures::X86::kSSE); features.addIf(bitTest(regs.edx, 25), CpuFeatures::X86::kSSE, CpuFeatures::X86::kSSE2); features.addIf(bitTest(regs.edx, 28), CpuFeatures::X86::kMT); // Get the content of XCR0 if supported by the CPU and enabled by the OS. if (features.hasXSAVE() && features.hasOSXSAVE()) { xgetbvQuery(&xcr0, 0); } // Detect AVX+. if (bitTest(regs.ecx, 28)) { // - XCR0[2:1] == 11b // XMM & YMM states need to be enabled by OS. if ((xcr0.eax & 0x00000006u) == 0x00000006u) { features.add(CpuFeatures::X86::kAVX); features.addIf(bitTest(regs.ecx, 12), CpuFeatures::X86::kFMA); features.addIf(bitTest(regs.ecx, 29), CpuFeatures::X86::kF16C); } } } constexpr uint32_t kXCR0_AMX_Bits = 0x3u << 17; bool amxEnabledByOS = (xcr0.eax & kXCR0_AMX_Bits) == kXCR0_AMX_Bits; #if defined(__APPLE__) // Apple platform provides on-demand AVX512 support. When an AVX512 instruction is used the first time it results // in #UD, which would cause the thread being promoted to use AVX512 support by the OS in addition to enabling the // necessary bits in XCR0 register. bool avx512EnabledByOS = true; #else // - XCR0[2:1] == 11b - XMM/YMM states need to be enabled by OS. // - XCR0[7:5] == 111b - Upper 256-bit of ZMM0-XMM15 and ZMM16-ZMM31 need to be enabled by OS. constexpr uint32_t kXCR0_AVX512_Bits = (0x3u << 1) | (0x7u << 5); bool avx512EnabledByOS = (xcr0.eax & kXCR0_AVX512_Bits) == kXCR0_AVX512_Bits; #endif // CPUID EAX=7 ECX=0 // ----------------- // Detect new features if the processor supports CPUID-07. bool maybeMPX = false; if (maxId >= 0x7) { cpuidQuery(®s, 0x7); maybeMPX = bitTest(regs.ebx, 14); maxSubLeafId_0x7 = regs.eax; features.addIf(bitTest(regs.ebx, 0), CpuFeatures::X86::kFSGSBASE); features.addIf(bitTest(regs.ebx, 3), CpuFeatures::X86::kBMI); features.addIf(bitTest(regs.ebx, 4), CpuFeatures::X86::kHLE); features.addIf(bitTest(regs.ebx, 7), CpuFeatures::X86::kSMEP); features.addIf(bitTest(regs.ebx, 8), CpuFeatures::X86::kBMI2); features.addIf(bitTest(regs.ebx, 9), CpuFeatures::X86::kERMS); features.addIf(bitTest(regs.ebx, 11), CpuFeatures::X86::kRTM); features.addIf(bitTest(regs.ebx, 18), CpuFeatures::X86::kRDSEED); features.addIf(bitTest(regs.ebx, 19), CpuFeatures::X86::kADX); features.addIf(bitTest(regs.ebx, 20), CpuFeatures::X86::kSMAP); features.addIf(bitTest(regs.ebx, 23), CpuFeatures::X86::kCLFLUSHOPT); features.addIf(bitTest(regs.ebx, 24), CpuFeatures::X86::kCLWB); features.addIf(bitTest(regs.ebx, 29), CpuFeatures::X86::kSHA); features.addIf(bitTest(regs.ecx, 0), CpuFeatures::X86::kPREFETCHWT1); features.addIf(bitTest(regs.ecx, 4), CpuFeatures::X86::kOSPKE); features.addIf(bitTest(regs.ecx, 5), CpuFeatures::X86::kWAITPKG); features.addIf(bitTest(regs.ecx, 7), CpuFeatures::X86::kCET_SS); features.addIf(bitTest(regs.ecx, 8), CpuFeatures::X86::kGFNI); features.addIf(bitTest(regs.ecx, 9), CpuFeatures::X86::kVAES); features.addIf(bitTest(regs.ecx, 10), CpuFeatures::X86::kVPCLMULQDQ); features.addIf(bitTest(regs.ecx, 22), CpuFeatures::X86::kRDPID); features.addIf(bitTest(regs.ecx, 25), CpuFeatures::X86::kCLDEMOTE); features.addIf(bitTest(regs.ecx, 27), CpuFeatures::X86::kMOVDIRI); features.addIf(bitTest(regs.ecx, 28), CpuFeatures::X86::kMOVDIR64B); features.addIf(bitTest(regs.ecx, 29), CpuFeatures::X86::kENQCMD); features.addIf(bitTest(regs.edx, 4), CpuFeatures::X86::kFSRM); features.addIf(bitTest(regs.edx, 5), CpuFeatures::X86::kUINTR); features.addIf(bitTest(regs.edx, 14), CpuFeatures::X86::kSERIALIZE); features.addIf(bitTest(regs.edx, 16), CpuFeatures::X86::kTSXLDTRK); features.addIf(bitTest(regs.edx, 18), CpuFeatures::X86::kPCONFIG); features.addIf(bitTest(regs.edx, 20), CpuFeatures::X86::kCET_IBT); // Detect 'TSX' - Requires at least one of `HLE` and `RTM` features. if (features.hasHLE() || features.hasRTM()) { features.add(CpuFeatures::X86::kTSX); } if (bitTest(regs.ebx, 5) && features.hasAVX()) { features.add(CpuFeatures::X86::kAVX2); } if (avx512EnabledByOS && bitTest(regs.ebx, 16)) { features.add(CpuFeatures::X86::kAVX512_F); features.addIf(bitTest(regs.ebx, 17), CpuFeatures::X86::kAVX512_DQ); features.addIf(bitTest(regs.ebx, 21), CpuFeatures::X86::kAVX512_IFMA); features.addIf(bitTest(regs.ebx, 26), CpuFeatures::X86::kAVX512_PFI); features.addIf(bitTest(regs.ebx, 27), CpuFeatures::X86::kAVX512_ERI); features.addIf(bitTest(regs.ebx, 28), CpuFeatures::X86::kAVX512_CDI); features.addIf(bitTest(regs.ebx, 30), CpuFeatures::X86::kAVX512_BW); features.addIf(bitTest(regs.ebx, 31), CpuFeatures::X86::kAVX512_VL); features.addIf(bitTest(regs.ecx, 1), CpuFeatures::X86::kAVX512_VBMI); features.addIf(bitTest(regs.ecx, 6), CpuFeatures::X86::kAVX512_VBMI2); features.addIf(bitTest(regs.ecx, 11), CpuFeatures::X86::kAVX512_VNNI); features.addIf(bitTest(regs.ecx, 12), CpuFeatures::X86::kAVX512_BITALG); features.addIf(bitTest(regs.ecx, 14), CpuFeatures::X86::kAVX512_VPOPCNTDQ); features.addIf(bitTest(regs.edx, 2), CpuFeatures::X86::kAVX512_4VNNIW); features.addIf(bitTest(regs.edx, 3), CpuFeatures::X86::kAVX512_4FMAPS); features.addIf(bitTest(regs.edx, 8), CpuFeatures::X86::kAVX512_VP2INTERSECT); features.addIf(bitTest(regs.edx, 23), CpuFeatures::X86::kAVX512_FP16); } if (amxEnabledByOS) { features.addIf(bitTest(regs.edx, 22), CpuFeatures::X86::kAMX_BF16); features.addIf(bitTest(regs.edx, 24), CpuFeatures::X86::kAMX_TILE); features.addIf(bitTest(regs.edx, 25), CpuFeatures::X86::kAMX_INT8); } } // CPUID EAX=7 ECX=1 // ----------------- if (maxSubLeafId_0x7 >= 1) { cpuidQuery(®s, 0x7, 1); features.addIf(bitTest(regs.eax, 3), CpuFeatures::X86::kRAO_INT); features.addIf(bitTest(regs.eax, 7), CpuFeatures::X86::kCMPCCXADD); features.addIf(bitTest(regs.eax, 10), CpuFeatures::X86::kFZRM); features.addIf(bitTest(regs.eax, 11), CpuFeatures::X86::kFSRS); features.addIf(bitTest(regs.eax, 12), CpuFeatures::X86::kFSRC); features.addIf(bitTest(regs.eax, 19), CpuFeatures::X86::kWRMSRNS); features.addIf(bitTest(regs.eax, 22), CpuFeatures::X86::kHRESET); features.addIf(bitTest(regs.eax, 26), CpuFeatures::X86::kLAM); features.addIf(bitTest(regs.eax, 27), CpuFeatures::X86::kMSRLIST); features.addIf(bitTest(regs.edx, 14), CpuFeatures::X86::kPREFETCHI); features.addIf(bitTest(regs.edx, 18), CpuFeatures::X86::kCET_SSS); if (features.hasAVX2()) { features.addIf(bitTest(regs.eax, 4), CpuFeatures::X86::kAVX_VNNI); features.addIf(bitTest(regs.eax, 23), CpuFeatures::X86::kAVX_IFMA); features.addIf(bitTest(regs.edx, 4), CpuFeatures::X86::kAVX_VNNI_INT8); features.addIf(bitTest(regs.edx, 5), CpuFeatures::X86::kAVX_NE_CONVERT); } if (features.hasAVX512_F()) { features.addIf(bitTest(regs.eax, 5), CpuFeatures::X86::kAVX512_BF16); } if (amxEnabledByOS) { features.addIf(bitTest(regs.eax, 21), CpuFeatures::X86::kAMX_FP16); } } // CPUID EAX=13 ECX=0 // ------------------ if (maxId >= 0xD) { cpuidQuery(®s, 0xD, 0); // Both CPUID result and XCR0 has to be enabled to have support for MPX. if (((regs.eax & xcr0.eax) & 0x00000018u) == 0x00000018u && maybeMPX) features.add(CpuFeatures::X86::kMPX); cpuidQuery(®s, 0xD, 1); features.addIf(bitTest(regs.eax, 0), CpuFeatures::X86::kXSAVEOPT); features.addIf(bitTest(regs.eax, 1), CpuFeatures::X86::kXSAVEC); features.addIf(bitTest(regs.eax, 3), CpuFeatures::X86::kXSAVES); } // CPUID EAX=14 ECX=0 // ------------------ if (maxId >= 0xE) { cpuidQuery(®s, 0xE, 0); features.addIf(bitTest(regs.ebx, 4), CpuFeatures::X86::kPTWRITE); } // CPUID EAX=0x80000000...maxId // ---------------------------- maxId = 0x80000000u; uint32_t i = maxId; // The highest EAX that we understand. constexpr uint32_t kHighestProcessedEAX = 0x8000001Fu; // Several CPUID calls are required to get the whole branc string. It's easier // to copy one DWORD at a time instead of copying the string a byte by byte. uint32_t* brand = cpu._brand.u32; do { cpuidQuery(®s, i); switch (i) { case 0x80000000u: maxId = Support::min(regs.eax, kHighestProcessedEAX); break; case 0x80000001u: features.addIf(bitTest(regs.ecx, 0), CpuFeatures::X86::kLAHFSAHF); features.addIf(bitTest(regs.ecx, 2), CpuFeatures::X86::kSVM); features.addIf(bitTest(regs.ecx, 5), CpuFeatures::X86::kLZCNT); features.addIf(bitTest(regs.ecx, 6), CpuFeatures::X86::kSSE4A); features.addIf(bitTest(regs.ecx, 7), CpuFeatures::X86::kMSSE); features.addIf(bitTest(regs.ecx, 8), CpuFeatures::X86::kPREFETCHW); features.addIf(bitTest(regs.ecx, 12), CpuFeatures::X86::kSKINIT); features.addIf(bitTest(regs.ecx, 15), CpuFeatures::X86::kLWP); features.addIf(bitTest(regs.ecx, 21), CpuFeatures::X86::kTBM); features.addIf(bitTest(regs.ecx, 29), CpuFeatures::X86::kMONITORX); features.addIf(bitTest(regs.edx, 20), CpuFeatures::X86::kNX); features.addIf(bitTest(regs.edx, 21), CpuFeatures::X86::kFXSROPT); features.addIf(bitTest(regs.edx, 22), CpuFeatures::X86::kMMX2); features.addIf(bitTest(regs.edx, 27), CpuFeatures::X86::kRDTSCP); features.addIf(bitTest(regs.edx, 29), CpuFeatures::X86::kPREFETCHW); features.addIf(bitTest(regs.edx, 30), CpuFeatures::X86::k3DNOW2, CpuFeatures::X86::kMMX2); features.addIf(bitTest(regs.edx, 31), CpuFeatures::X86::kPREFETCHW); if (features.hasAVX()) { features.addIf(bitTest(regs.ecx, 11), CpuFeatures::X86::kXOP); features.addIf(bitTest(regs.ecx, 16), CpuFeatures::X86::kFMA4); } // This feature seems to be only supported by AMD. if (cpu.isVendor("AMD")) { features.addIf(bitTest(regs.ecx, 4), CpuFeatures::X86::kALTMOVCR8); } break; case 0x80000002u: case 0x80000003u: case 0x80000004u: *brand++ = regs.eax; *brand++ = regs.ebx; *brand++ = regs.ecx; *brand++ = regs.edx; // Go directly to the next one we are interested in. if (i == 0x80000004u) i = 0x80000008u - 1; break; case 0x80000008u: features.addIf(bitTest(regs.ebx, 0), CpuFeatures::X86::kCLZERO); features.addIf(bitTest(regs.ebx, 0), CpuFeatures::X86::kRDPRU); features.addIf(bitTest(regs.ebx, 8), CpuFeatures::X86::kMCOMMIT); features.addIf(bitTest(regs.ebx, 9), CpuFeatures::X86::kWBNOINVD); // Go directly to the next one we are interested in. i = 0x8000001Fu - 1; break; case 0x8000001Fu: features.addIf(bitTest(regs.eax, 4), CpuFeatures::X86::kSNP); break; } } while (++i <= maxId); // Simplify CPU brand string a bit by removing some unnecessary spaces. simplifyCpuBrand(cpu._brand.str); } #endif // ASMJIT_ARCH_X86 // CpuInfo - Detect - ARM // ====================== // The most relevant and accurate information can be found here: // https://github.com/llvm-project/llvm/blob/master/lib/Target/AArch64/AArch64.td // https://github.com/apple/llvm-project/blob/apple/main/llvm/lib/Target/AArch64/AArch64.td (Apple fork) // // Other resources: // https://en.wikipedia.org/wiki/AArch64 // https://en.wikipedia.org/wiki/Apple_silicon#List_of_Apple_processors // https://developer.arm.com/architectures/learn-the-architecture/understanding-the-armv8-x-extensions/single-page #if ASMJIT_ARCH_ARM static inline void populateBaseARMFeatures(CpuInfo& cpu) noexcept { #if ASMJIT_ARCH_ARM == 32 // No baseline flags at the moment. DebugUtils::unused(cpu); #else // AArch64 is based on ARMv8-A and later. cpu.addFeature(CpuFeatures::ARM::kARMv6); cpu.addFeature(CpuFeatures::ARM::kARMv7); cpu.addFeature(CpuFeatures::ARM::kARMv8a); // AArch64 comes with these features by default. cpu.addFeature(CpuFeatures::ARM::kVFPv2); cpu.addFeature(CpuFeatures::ARM::kVFPv3); cpu.addFeature(CpuFeatures::ARM::kVFPv4); cpu.addFeature(CpuFeatures::ARM::kASIMD); cpu.addFeature(CpuFeatures::ARM::kIDIVA); #endif } // Detects ARM version by macros defined at compile time. This means that AsmJit will report features forced at // compile time that should always be provided by the target CPU. This also means that if we don't provide any // means to detect CPU features the features reported by AsmJit will at least not report less features than the // target it was compiled to. ASMJIT_MAYBE_UNUSED static ASMJIT_FAVOR_SIZE void detectARMFeaturesViaCompilerFlags(CpuInfo& cpu) noexcept { DebugUtils::unused(cpu); #if ASMJIT_ARCH_ARM == 32 // ARM targets have no baseline at the moment. # if defined(__ARM_ARCH_7A__) cpu.addFeature(CpuFeatures::ARM::kARMv7); # endif # if defined(__ARM_ARCH_8A__) cpu.addFeature(CpuFeatures::ARM::kARMv8a); # endif # if defined(__TARGET_ARCH_THUMB) cpu.addFeature(CpuFeatures::ARM::kTHUMB); # if __TARGET_ARCH_THUMB >= 4 cpu.addFeature(CpuFeatures::ARM::kTHUMBv2); # endif # endif # if defined(__ARM_FEATURE_FMA) cpu.addFeature(CpuFeatures::ARM::kVFPv3); cpu.addFeature(CpuFeatures::ARM::kVFPv4); # endif # if defined(__ARM_NEON) cpu.addFeature(CpuFeatures::ARM::kASIMD); # endif # if defined(__ARM_FEATURE_IDIV) && defined(__TARGET_ARCH_THUMB) cpu.addFeature(CpuFeatures::ARM::kIDIVT); #endif # if defined(__ARM_FEATURE_IDIV) && !defined(__TARGET_ARCH_THUMB) cpu.addFeature(CpuFeatures::ARM::kIDIVA); # endif #endif #if defined(__ARM_ARCH_8_1A__) cpu.addFeature(CpuFeatures::ARM::kARMv8_1a); #endif #if defined(__ARM_ARCH_8_2A__) cpu.addFeature(CpuFeatures::ARM::kARMv8_2a); #endif #if defined(__ARM_ARCH_8_3A__) cpu.addFeature(CpuFeatures::ARM::kARMv8_3a); #endif #if defined(__ARM_ARCH_8_4A__) cpu.addFeature(CpuFeatures::ARM::kARMv8_4a); #endif #if defined(__ARM_ARCH_8_5A__) cpu.addFeature(CpuFeatures::ARM::kARMv8_5a); #endif #if defined(__ARM_ARCH_8_6A__) cpu.addFeature(CpuFeatures::ARM::kARMv8_6a); #endif #if defined(__ARM_ARCH_8_7A__) cpu.addFeature(CpuFeatures::ARM::kARMv8_7a); #endif #if defined(__ARM_FEATURE_AES) cpu.addFeature(CpuFeatures::ARM::kAES); #endif #if defined(__ARM_FEATURE_BF16_SCALAR_ARITHMETIC) && defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC) cpu.addFeature(CpuFeatures::ARM::kBF16); #endif #if defined(__ARM_FEATURE_CRC32) cpu.addFeature(CpuFeatures::ARM::kCRC32); #endif #if defined(__ARM_FEATURE_CRYPTO) cpu.addFeature(CpuFeatures::ARM::kAES, CpuFeatures::ARM::kSHA1, CpuFeatures::ARM::kSHA2); #endif #if defined(__ARM_FEATURE_DOTPROD) cpu.addFeature(CpuFeatures::ARM::kDOTPROD); #endif #if defined(__ARM_FEATURE_FP16FML) || defined(__ARM_FEATURE_FP16_FML) cpu.addFeature(CpuFeatures::ARM::kFP16FML); #endif #if defined(__ARM_FEATURE_FP16_SCALAR_ARITHMETIC) cpu.addFeature(CpuFeatures::ARM::kFP16FULL); #endif #if defined(__ARM_FEATURE_FRINT) cpu.addFeature(CpuFeatures::ARM::kFRINT); #endif #if defined(__ARM_FEATURE_JCVT) cpu.addFeature(CpuFeatures::ARM::kFJCVTZS); #endif #if defined(__ARM_FEATURE_MATMUL_INT8) cpu.addFeature(CpuFeatures::ARM::kI8MM); #endif #if defined(__ARM_FEATURE_ATOMICS) cpu.addFeature(CpuFeatures::ARM::kLSE); #endif #if defined(__ARM_FEATURE_MEMORY_TAGGING) cpu.addFeature(CpuFeatures::ARM::kMTE); #endif #if defined(__ARM_FEATURE_QRDMX) cpu.addFeature(CpuFeatures::ARM::kRDM); #endif #if defined(__ARM_FEATURE_RNG) cpu.addFeature(CpuFeatures::ARM::kRNG); #endif #if defined(__ARM_FEATURE_SHA2) cpu.addFeature(CpuFeatures::ARM::kSHA2); #endif #if defined(__ARM_FEATURE_SHA3) cpu.addFeature(CpuFeatures::ARM::kSHA3); #endif #if defined(__ARM_FEATURE_SHA512) cpu.addFeature(CpuFeatures::ARM::kSHA512); #endif #if defined(__ARM_FEATURE_SM3) cpu.addFeature(CpuFeatures::ARM::kSM3); #endif #if defined(__ARM_FEATURE_SM4) cpu.addFeature(CpuFeatures::ARM::kSM4); #endif #if defined(__ARM_FEATURE_SVE) || defined(__ARM_FEATURE_SVE_VECTOR_OPERATORS) cpu.addFeature(CpuFeatures::ARM::kSVE); #endif #if defined(__ARM_FEATURE_SVE_MATMUL_INT8) cpu.addFeature(CpuFeatures::ARM::kSVE_I8MM); #endif #if defined(__ARM_FEATURE_SVE_MATMUL_FP32) cpu.addFeature(CpuFeatures::ARM::kSVE_F32MM); #endif #if defined(__ARM_FEATURE_SVE_MATMUL_FP64) cpu.addFeature(CpuFeatures::ARM::kSVE_F64MM); #endif #if defined(__ARM_FEATURE_SVE2) cpu.addFeature(CpuFeatures::ARM::kSVE2); #endif #if defined(__ARM_FEATURE_SVE2_AES) cpu.addFeature(CpuFeatures::ARM::kSVE2_AES); #endif #if defined(__ARM_FEATURE_SVE2_BITPERM) cpu.addFeature(CpuFeatures::ARM::kSVE2_BITPERM); #endif #if defined(__ARM_FEATURE_SVE2_SHA3) cpu.addFeature(CpuFeatures::ARM::kSVE2_SHA3); #endif #if defined(__ARM_FEATURE_SVE2_SM4) cpu.addFeature(CpuFeatures::ARM::kSVE2_SM4); #endif #if defined(__ARM_FEATURE_TME) cpu.addFeature(CpuFeatures::ARM::kTME); #endif } ASMJIT_MAYBE_UNUSED static ASMJIT_FAVOR_SIZE void expandARMFeaturesByVersion(CpuInfo& cpu) noexcept { CpuFeatures::ARM& features = cpu.features().arm(); if (features.hasARMv8_7a()) { features.add(CpuFeatures::ARM::kARMv8_6a); } if (features.hasARMv8_6a()) { features.add(CpuFeatures::ARM::kARMv8_5a, CpuFeatures::ARM::kBF16); if (features.hasSVE()) features.add(CpuFeatures::ARM::kSVE_I8MM); } if (features.hasARMv8_5a()) { features.add(CpuFeatures::ARM::kARMv8_4a, CpuFeatures::ARM::kALTNZCV, CpuFeatures::ARM::kBTI, CpuFeatures::ARM::kFRINT, CpuFeatures::ARM::kSB, CpuFeatures::ARM::kSSBS); } if (features.hasARMv8_4a()) { features.add(CpuFeatures::ARM::kARMv8_3a, CpuFeatures::ARM::kDIT, CpuFeatures::ARM::kDOTPROD, CpuFeatures::ARM::kFLAGM, CpuFeatures::ARM::kPMU, CpuFeatures::ARM::kRCPC_IMMO); } if (features.hasARMv8_3a()) { features.add(CpuFeatures::ARM::kARMv8_2a, CpuFeatures::ARM::kFCMA, CpuFeatures::ARM::kFJCVTZS); } if (features.hasARMv8_2a()) { features.add(CpuFeatures::ARM::kARMv8_1a); } if (features.hasARMv8_1a()) { features.add(CpuFeatures::ARM::kARMv8a, CpuFeatures::ARM::kCRC32, CpuFeatures::ARM::kLSE, CpuFeatures::ARM::kRDM); } if (features.hasARMv8a()) { features.add(CpuFeatures::ARM::kARMv7, CpuFeatures::ARM::kVFPv2, CpuFeatures::ARM::kVFPv3, CpuFeatures::ARM::kVFPv4, CpuFeatures::ARM::kVFP_D32, CpuFeatures::ARM::kASIMD, CpuFeatures::ARM::kIDIVA); } } // CpuInfo - Detect - ARM [Windows] // ================================ #if defined(_WIN32) struct WinPFPMapping { uint8_t featureId; uint8_t pfpFeatureId; }; static ASMJIT_FAVOR_SIZE void detectPFPFeatures(CpuInfo& cpu, const WinPFPMapping* mapping, size_t size) noexcept { for (size_t i = 0; i < size; i++) if (::IsProcessorFeaturePresent(mapping[i].pfpFeatureId)) cpu.addFeature(mapping[i].featureId); } //! Detect ARM CPU features on Windows. //! //! The detection is based on `IsProcessorFeaturePresent()` API call. static ASMJIT_FAVOR_SIZE void detectARMCpu(CpuInfo& cpu) noexcept { cpu._wasDetected = true; populateBaseARMFeatures(cpu); CpuFeatures::ARM& features = cpu.features().arm(); // Win32 for ARM requires ARMv7 with DSP extensions, VFPv3, and uses THUMBv2 by default. #if ASMJIT_ARCH_ARM == 32 features.add(CpuFeatures::ARM::kTHUMB); features.add(CpuFeatures::ARM::kTHUMBv2); features.add(CpuFeatures::ARM::kARMv6); features.add(CpuFeatures::ARM::kARMv7); features.add(CpuFeatures::ARM::kEDSP); features.add(CpuFeatures::ARM::kVFPv2); features.add(CpuFeatures::ARM::kVFPv3); #endif // Windows for ARM requires ASIMD. features.add(CpuFeatures::ARM::kASIMD); // Detect additional CPU features by calling `IsProcessorFeaturePresent()`. static const WinPFPMapping mapping[] = { #if ASMJIT_ARCH_ARM == 32 { uint8_t(CpuFeatures::ARM::kVFP_D32) , 18 }, // PF_ARM_VFP_32_REGISTERS_AVAILABLE { uint8_t(CpuFeatures::ARM::kIDIVT) , 24 }, // PF_ARM_DIVIDE_INSTRUCTION_AVAILABLE { uint8_t(CpuFeatures::ARM::kVFPv4) , 27 }, // PF_ARM_FMAC_INSTRUCTIONS_AVAILABLE { uint8_t(CpuFeatures::ARM::kARMv8a) , 29 }, // PF_ARM_V8_INSTRUCTIONS_AVAILABLE #endif { uint8_t(CpuFeatures::ARM::kAES) , 30 }, // PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE { uint8_t(CpuFeatures::ARM::kCRC32) , 31 }, // PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE { uint8_t(CpuFeatures::ARM::kLSE) , 34 } // PF_ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE }; detectPFPFeatures(cpu, mapping, ASMJIT_ARRAY_SIZE(mapping)); // Windows provides several instructions under a single flag: if (features.hasAES()) { features.add(CpuFeatures::ARM::kSHA1, CpuFeatures::ARM::kSHA2); } expandARMFeaturesByVersion(cpu); } // CpuInfo - Detect - ARM [Linux] // ============================== #elif defined(__linux__) struct LinuxHWCapMapping { uint8_t featureId; uint8_t hwCapBit; }; static ASMJIT_FAVOR_SIZE void detectHWCaps(CpuInfo& cpu, unsigned long type, const LinuxHWCapMapping* mapping, size_t size) noexcept { unsigned long mask = getauxval(type); for (size_t i = 0; i < size; i++) cpu.features().addIf(Support::bitTest(mask, mapping[i].hwCapBit), mapping[i].featureId); } #if ASMJIT_ARCH_ARM == 32 // `AT_HWCAP` provides ARMv7 (and less) related flags. static const LinuxHWCapMapping hwCapMapping[] = { { uint8_t(CpuFeatures::ARM::kVFPv2) , 6 }, // HWCAP_VFP { uint8_t(CpuFeatures::ARM::kEDSP) , 7 }, // HWCAP_EDSP { uint8_t(CpuFeatures::ARM::kASIMD) , 12 }, // HWCAP_NEON { uint8_t(CpuFeatures::ARM::kVFPv3) , 13 }, // HWCAP_VFPv3 { uint8_t(CpuFeatures::ARM::kVFPv4) , 16 }, // HWCAP_VFPv4 { uint8_t(CpuFeatures::ARM::kIDIVA) , 17 }, // HWCAP_IDIVA { uint8_t(CpuFeatures::ARM::kIDIVT) , 18 }, // HWCAP_IDIVT { uint8_t(CpuFeatures::ARM::kVFP_D32) , 19 } // HWCAP_VFPD32 }; // `AT_HWCAP2` provides ARMv8+ related flags. static const LinuxHWCapMapping hwCap2Mapping[] = { { uint8_t(CpuFeatures::ARM::kAES) , 0 }, // HWCAP2_AES { uint8_t(CpuFeatures::ARM::kPMULL) , 1 }, // HWCAP2_PMULL { uint8_t(CpuFeatures::ARM::kSHA1) , 2 }, // HWCAP2_SHA1 { uint8_t(CpuFeatures::ARM::kSHA2) , 3 }, // HWCAP2_SHA2 { uint8_t(CpuFeatures::ARM::kCRC32) , 4 } // HWCAP2_CRC32 }; static ASMJIT_FAVOR_SIZE void detectARMCpu(CpuInfo& cpu) noexcept { cpu._wasDetected = true; populateBaseARMFeatures(cpu); CpuFeatures::ARM& features = cpu.features().arm(); detectHWCaps(cpu, AT_HWCAP, hwCapMapping, ASMJIT_ARRAY_SIZE(hwCapMapping)); detectHWCaps(cpu, AT_HWCAP2, hwCap2Mapping, ASMJIT_ARRAY_SIZE(hwCap2Mapping)); // VFPv3 implies VFPv2. if (features.hasVFPv3()) features.add(CpuFeatures::ARM::kVFPv2); // VFPv2 implies ARMv6. if (features.hasVFPv2()) features.add(CpuFeatures::ARM::kARMv6); // ARMv7 provides VFPv3|ASIMD. if (features.hasVFPv3() || features.hasASIMD()) features.add(CpuFeatures::ARM::kARMv7); // ARMv8 provives AES, CRC32, PMULL, SHA1, and SHA2. if (features.hasAES() || features.hasCRC32() || features.hasPMULL() || features.hasSHA1() || features.hasSHA2()) features.add(CpuFeatures::ARM::kARMv8a); } #else // `AT_HWCAP` provides ARMv8+ related flags. static const LinuxHWCapMapping hwCapMapping[] = { /* { uint8_t(CpuFeatures::ARM::k) , 0 }, // HWCAP_FP */ { uint8_t(CpuFeatures::ARM::kASIMD) , 1 }, // HWCAP_ASIMD /* { uint8_t(CpuFeatures::ARM::k) , 2 }, // HWCAP_EVTSTRM */ { uint8_t(CpuFeatures::ARM::kAES) , 3 }, // HWCAP_AES { uint8_t(CpuFeatures::ARM::kPMULL) , 4 }, // HWCAP_PMULL { uint8_t(CpuFeatures::ARM::kSHA1) , 5 }, // HWCAP_SHA1 { uint8_t(CpuFeatures::ARM::kSHA2) , 6 }, // HWCAP_SHA2 { uint8_t(CpuFeatures::ARM::kCRC32) , 7 }, // HWCAP_CRC32 { uint8_t(CpuFeatures::ARM::kLSE) , 8 }, // HWCAP_ATOMICS { uint8_t(CpuFeatures::ARM::kFP16CONV) , 9 }, // HWCAP_FPHP { uint8_t(CpuFeatures::ARM::kFP16FULL) , 10 }, // HWCAP_ASIMDHP { uint8_t(CpuFeatures::ARM::kCPUID) , 11 }, // HWCAP_CPUID { uint8_t(CpuFeatures::ARM::kRDM) , 12 }, // HWCAP_ASIMDRDM { uint8_t(CpuFeatures::ARM::kFJCVTZS) , 13 }, // HWCAP_JSCVT { uint8_t(CpuFeatures::ARM::kFCMA) , 14 }, // HWCAP_FCMA /* { uint8_t(CpuFeatures::ARM::k) , 15 }, // HWCAP_LRCPC { uint8_t(CpuFeatures::ARM::k) , 16 }, // HWCAP_DCPOP */ { uint8_t(CpuFeatures::ARM::kSHA3) , 17 }, // HWCAP_SHA3 { uint8_t(CpuFeatures::ARM::kSM3) , 18 }, // HWCAP_SM3 { uint8_t(CpuFeatures::ARM::kSM4) , 19 }, // HWCAP_SM4 { uint8_t(CpuFeatures::ARM::kDOTPROD) , 20 }, // HWCAP_ASIMDDP { uint8_t(CpuFeatures::ARM::kSHA512) , 21 }, // HWCAP_SHA512 { uint8_t(CpuFeatures::ARM::kSVE) , 22 }, // HWCAP_SVE { uint8_t(CpuFeatures::ARM::kFP16FML) , 23 }, // HWCAP_ASIMDFHM { uint8_t(CpuFeatures::ARM::kDIT) , 24 }, // HWCAP_DIT /* { uint8_t(CpuFeatures::ARM::k) , 25 }, // HWCAP_USCAT { uint8_t(CpuFeatures::ARM::k) , 26 }, // HWCAP_ILRCPC */ { uint8_t(CpuFeatures::ARM::kFLAGM) , 27 }, // HWCAP_FLAGM { uint8_t(CpuFeatures::ARM::kSSBS) , 28 }, // HWCAP_SSBS { uint8_t(CpuFeatures::ARM::kSB) , 29 } // HWCAP_SB /* { uint8_t(CpuFeatures::ARM::k) , 30 }, // HWCAP_PACA { uint8_t(CpuFeatures::ARM::k) , 31 } // HWCAP_PACG */ }; // `AT_HWCAP2` provides ARMv8+ related flags. static const LinuxHWCapMapping hwCapMapping2[] = { /* { uint8_t(CpuFeatures::ARM::k) , 0 }, // HWCAP2_DCPODP */ { uint8_t(CpuFeatures::ARM::kSVE2) , 1 }, // HWCAP2_SVE2 { uint8_t(CpuFeatures::ARM::kSVE2_AES) , 2 }, // HWCAP2_SVEAES { uint8_t(CpuFeatures::ARM::kSVE_PMULL) , 3 }, // HWCAP2_SVEPMULL { uint8_t(CpuFeatures::ARM::kSVE2_BITPERM), 4 }, // HWCAP2_SVEBITPERM { uint8_t(CpuFeatures::ARM::kSVE2_SHA3) , 5 }, // HWCAP2_SVESHA3 { uint8_t(CpuFeatures::ARM::kSVE2_SM4) , 6 }, // HWCAP2_SVESM4 { uint8_t(CpuFeatures::ARM::kALTNZCV) , 7 }, // HWCAP2_FLAGM2 { uint8_t(CpuFeatures::ARM::kFRINT) , 8 }, // HWCAP2_FRINT { uint8_t(CpuFeatures::ARM::kSVE_I8MM) , 9 }, // HWCAP2_SVEI8MM { uint8_t(CpuFeatures::ARM::kSVE_F32MM) , 10 }, // HWCAP2_SVEF32MM { uint8_t(CpuFeatures::ARM::kSVE_F64MM) , 11 }, // HWCAP2_SVEF64MM { uint8_t(CpuFeatures::ARM::kSVE_BF16) , 12 }, // HWCAP2_SVEBF16 { uint8_t(CpuFeatures::ARM::kI8MM) , 13 }, // HWCAP2_I8MM { uint8_t(CpuFeatures::ARM::kBF16) , 14 }, // HWCAP2_BF16 { uint8_t(CpuFeatures::ARM::kDGH) , 15 }, // HWCAP2_DGH { uint8_t(CpuFeatures::ARM::kRNG) , 16 }, // HWCAP2_RNG { uint8_t(CpuFeatures::ARM::kBTI) , 17 }, // HWCAP2_BTI { uint8_t(CpuFeatures::ARM::kMTE) , 18 } // HWCAP2_MTE }; static ASMJIT_FAVOR_SIZE void detectARMCpu(CpuInfo& cpu) noexcept { cpu._wasDetected = true; populateBaseARMFeatures(cpu); detectHWCaps(cpu, AT_HWCAP, hwCapMapping, ASMJIT_ARRAY_SIZE(hwCapMapping)); detectHWCaps(cpu, AT_HWCAP2, hwCapMapping2, ASMJIT_ARRAY_SIZE(hwCapMapping2)); } #endif // CpuInfo - Detect - ARM [Apple] // ============================== #elif defined(__APPLE__) namespace AppleHWId { enum CpuFamily : uint32_t { // Generic ARM. kCpuFamily_ARM_9 = 0xE73283AEu, kCpuFamily_ARM_11 = 0x8FF620D8u, kCpuFamily_ARM_12 = 0xBD1B0AE9u, kCpuFamily_ARM_13 = 0x0CC90E64u, kCpuFamily_ARM_14 = 0x96077EF1u, kCpuFamily_ARM_15 = 0xA8511BCAu, // Apple design. kCpuFamily_SWIFT = 0x1E2D6381u, kCpuFamily_CYCLONE = 0x37A09642u, kCpuFamily_TYPHOON = 0x2C91A47Eu, kCpuFamily_TWISTER = 0x92FB37C8u, kCpuFamily_HURRICANE = 0x67CEEE93u, kCpuFamily_MONSOON_MISTRAL = 0xE81E7EF6u, kCpuFamily_VORTEX_TEMPEST = 0x07D34B9Fu, kCpuFamily_LIGHTNING_THUNDER = 0x462504D2u, kCpuFamily_FIRESTORM_ICESTORM = 0x1B588BB3u }; }; static ASMJIT_FAVOR_SIZE uint32_t queryARMCpuFamilyId() noexcept { uint32_t result = 0; size_t size = sizeof(result); int res = sysctlbyname("hw.cpufamily", &result, &size, nullptr, 0); if (res != 0) return 0; else return result; } static ASMJIT_FAVOR_SIZE void detectARMCpu(CpuInfo& cpu) noexcept { cpu._wasDetected = true; populateBaseARMFeatures(cpu); uint32_t cpuFamilyId = queryARMCpuFamilyId(); CpuFeatures::ARM& features = cpu.features().arm(); switch (cpuFamilyId) { case AppleHWId::kCpuFamily_ARM_9: case AppleHWId::kCpuFamily_ARM_11: case AppleHWId::kCpuFamily_ARM_12: break; // ARM Cortex A8. case AppleHWId::kCpuFamily_ARM_13: break; // ARM Cortex A9. case AppleHWId::kCpuFamily_ARM_14: break; // ARM Cortex A7 - ARMv7k. case AppleHWId::kCpuFamily_ARM_15: features.add(CpuFeatures::ARM::kARMv7); break; // Apple A6/A6X - ARMv7s. case AppleHWId::kCpuFamily_SWIFT: features.add(CpuFeatures::ARM::kARMv7); break; // Apple A7 - ARMv8.0-A. case AppleHWId::kCpuFamily_CYCLONE: features.add(CpuFeatures::ARM::kARMv8a, CpuFeatures::ARM::kAES, CpuFeatures::ARM::kSHA1, CpuFeatures::ARM::kSHA2); break; // Apple A8 - ARMv8.0-A. case AppleHWId::kCpuFamily_TYPHOON: features.add(CpuFeatures::ARM::kARMv8a, CpuFeatures::ARM::kAES, CpuFeatures::ARM::kSHA1, CpuFeatures::ARM::kSHA2); break; // Apple A9 - ARMv8.0-A. case AppleHWId::kCpuFamily_TWISTER: features.add(CpuFeatures::ARM::kARMv8a, CpuFeatures::ARM::kAES, CpuFeatures::ARM::kSHA1, CpuFeatures::ARM::kSHA2); break; // Apple A10 - ARMv8.1-A. case AppleHWId::kCpuFamily_HURRICANE: features.add(CpuFeatures::ARM::kARMv8_1a, CpuFeatures::ARM::kAES, CpuFeatures::ARM::kRDM, CpuFeatures::ARM::kSHA1, CpuFeatures::ARM::kSHA2); break; // Apple A11 - ARMv8.2-A. case AppleHWId::kCpuFamily_MONSOON_MISTRAL: features.add(CpuFeatures::ARM::kARMv8_2a, CpuFeatures::ARM::kAES, CpuFeatures::ARM::kFP16FULL, CpuFeatures::ARM::kSHA1, CpuFeatures::ARM::kSHA2); break; // Apple A12 - ARMv8.3-A. case AppleHWId::kCpuFamily_VORTEX_TEMPEST: features.add(CpuFeatures::ARM::kARMv8_3a, CpuFeatures::ARM::kAES, CpuFeatures::ARM::kFP16FULL, CpuFeatures::ARM::kSHA1, CpuFeatures::ARM::kSHA2); break; // Apple A13 - ARMv8.4-A. case AppleHWId::kCpuFamily_LIGHTNING_THUNDER: features.add(CpuFeatures::ARM::kARMv8_4a, CpuFeatures::ARM::kAES, CpuFeatures::ARM::kFP16FML, CpuFeatures::ARM::kFP16FULL, CpuFeatures::ARM::kSHA1, CpuFeatures::ARM::kSHA2, CpuFeatures::ARM::kSHA3, CpuFeatures::ARM::kSHA512); break; // Apple A14/M1 - ARMv8.5-A. case AppleHWId::kCpuFamily_FIRESTORM_ICESTORM: features.add(CpuFeatures::ARM::kARMv8_4a, CpuFeatures::ARM::kAES, CpuFeatures::ARM::kALTNZCV, CpuFeatures::ARM::kFP16FML, CpuFeatures::ARM::kFP16FULL, CpuFeatures::ARM::kFRINT, CpuFeatures::ARM::kSB, CpuFeatures::ARM::kSHA1, CpuFeatures::ARM::kSHA2, CpuFeatures::ARM::kSHA3, CpuFeatures::ARM::kSHA512, CpuFeatures::ARM::kSSBS); break; default: cpu._wasDetected = false; break; } expandARMFeaturesByVersion(cpu); } // CpuInfo - Detect - ARM [Unknown] // ================================ #else #if ASMJIT_ARCH_ARM == 64 #pragma message("[asmjit] Disabling runtime CPU detection - unsupported OS/CPU combination (Unknown OS with AArch64 CPU)") #else #pragma message("[asmjit] Disabling runtime CPU detection - unsupported OS/CPU combination (Unknown OS with ARM CPU)") #endif static ASMJIT_FAVOR_SIZE void detectARMCpu(CpuInfo& cpu) noexcept { populateBaseARMFeatures(cpu); detectARMFeaturesViaCompilerFlags(cpu); expandARMFeaturesByVersion(cpu); } #endif #endif // CpuInfo - Detect - Host // ======================= static uint32_t cpuInfoInitialized; static CpuInfo cpuInfoGlobal(Globals::NoInit); const CpuInfo& CpuInfo::host() noexcept { // This should never cause a problem as the resulting information should always be the same. In the worst case we // would just overwrite it non-atomically. if (!cpuInfoInitialized) { CpuInfo cpuInfoLocal; cpuInfoLocal._arch = Arch::kHost; cpuInfoLocal._subArch = SubArch::kHost; #if ASMJIT_ARCH_X86 detectX86Cpu(cpuInfoLocal); #elif ASMJIT_ARCH_ARM detectARMCpu(cpuInfoLocal); #else #pragma message("[asmjit] Disabling runtime CPU detection - unsupported OS/CPU combination (Unknown CPU)") #endif cpuInfoLocal._hwThreadCount = detectHWThreadCount(); cpuInfoGlobal = cpuInfoLocal; cpuInfoInitialized = 1; } return cpuInfoGlobal; } ASMJIT_END_NAMESPACE