From 9c82bf6e0cce0f50fd5ebdd59d5ce7df868efd5c Mon Sep 17 00:00:00 2001 From: opcm Date: Wed, 13 Dec 2023 13:00:22 +0000 Subject: [PATCH] factor out cpuid 0xb topology code and use it in OSX --- src/MacMSRDriver/PcmMsr/PcmMsr.cpp | 18 ++--- src/cpucounters.cpp | 92 +++---------------------- src/topologyentry.h | 82 ++++++++++++++++++++++ src/types.h | 107 +++++++++++++++++++++++++++++ src/utils.h | 94 ------------------------- 5 files changed, 208 insertions(+), 185 deletions(-) diff --git a/src/MacMSRDriver/PcmMsr/PcmMsr.cpp b/src/MacMSRDriver/PcmMsr/PcmMsr.cpp index dd008cd8..04dbd019 100644 --- a/src/MacMSRDriver/PcmMsr/PcmMsr.cpp +++ b/src/MacMSRDriver/PcmMsr/PcmMsr.cpp @@ -12,8 +12,6 @@ PcmMsrDriverClassName *g_pci_driver = NULL; asm volatile ("wrmsr" : : "c" (msr), "a" (lo), "d" (hi)) #define rdmsr(msr,lo,hi) \ asm volatile ("\trdmsr\n" : "=a" (lo), "=d" (hi) : "c" (msr)) -#define cpuid(func1, func2, a, b, c, d) \ -asm volatile ("cpuid" : "=a" (a), "=b" (b), "=c" (c), "=d" (d) : "a" (func1), "c" (func2)); extern "C" { extern void mp_rendezvous_no_intrs(void (*func)(void *), @@ -59,13 +57,17 @@ void cpuWriteMSR(void* pIDatas){ void cpuGetTopoData(void* pTopos){ TopologyEntry* entries = (TopologyEntry*)pTopos; int cpu = cpu_number(); - int info[4]; - entries[cpu].os_id = cpu; - cpuid(0xB, 1, info[0], info[1], info[2], info[3]); - entries[cpu].socket = info[3] >> info[0] & 0xF; - cpuid(0xB, 0, info[0], info[1], info[2], info[3]); - entries[cpu].core_id = info[3] >> info[0] & 0xF; + TopologyEntry & entry = entries[cpu]; + entry.os_id = cpu; + + uint32 smtMaskWidth = 0; + uint32 coreMaskWidth = 0; + uint32 l2CacheMaskShift = 0; + initCoreMasks(smtMaskWidth, coreMaskWidth, l2CacheMaskShift); + PCM_CPUID_INFO cpuid_args; + pcm_cpuid(0xb, 0x0, cpuid_args); + fillEntry(entry, smtMaskWidth, coreMaskWidth, l2CacheMaskShift, cpuid_args.array[3]); } OSDefineMetaClassAndStructors(com_intel_driver_PcmMsr, IOService) diff --git a/src/cpucounters.cpp b/src/cpucounters.cpp index bdd8417c..e2b4609e 100644 --- a/src/cpucounters.cpp +++ b/src/cpucounters.cpp @@ -339,17 +339,6 @@ void pcm_cpuid_bsd(int leaf, PCM_CPUID_INFO& info, int core) } #endif -/* Adding the new version of cpuid with leaf and subleaf as an input */ -void pcm_cpuid(const unsigned leaf, const unsigned subleaf, PCM_CPUID_INFO & info) -{ - #ifdef _MSC_VER - __cpuidex(info.array, leaf, subleaf); - #else - __asm__ __volatile__ ("cpuid" : \ - "=a" (info.reg.eax), "=b" (info.reg.ebx), "=c" (info.reg.ecx), "=d" (info.reg.edx) : "a" (leaf), "c" (subleaf)); - #endif -} - #ifdef __linux__ bool isNMIWatchdogEnabled(const bool silent); bool keepNMIWatchdogEnabled(); @@ -1121,16 +1110,9 @@ bool PCM::discoverSystemTopology() socketIdMap_type socketIdMap; PCM_CPUID_INFO cpuid_args; - // init constants for CPU topology leaf 0xB - // adapted from Topology Enumeration Reference code for Intel 64 Architecture - // https://software.intel.com/en-us/articles/intel-64-architecture-processor-topology-enumeration - int wasCoreReported = 0, wasThreadReported = 0; - int subleaf = 0, levelType, levelShift; - //uint32 coreSelectMask = 0, smtSelectMask = 0; uint32 smtMaskWidth = 0; - //uint32 pkgSelectMask = (-1), pkgSelectMaskShift = 0; - uint32 corePlusSMTMaskWidth = 0; uint32 coreMaskWidth = 0; + uint32 l2CacheMaskShift = 0; struct domain { @@ -1140,30 +1122,14 @@ bool PCM::discoverSystemTopology() std::unordered_map topologyDomainMap; { TemporalThreadAffinity aff0(0); - do + + if (initCoreMasks(smtMaskWidth, coreMaskWidth, l2CacheMaskShift) == false) { - pcm_cpuid(0xb, subleaf, cpuid_args); - if (cpuid_args.array[1] == 0) - { // if EBX ==0 then this subleaf is not valid, we can exit the loop - break; - } - levelType = extract_bits_ui(cpuid_args.array[2], 8, 15); - levelShift = extract_bits_ui(cpuid_args.array[0], 0, 4); - switch (levelType) - { - case 1: //level type is SMT, so levelShift is the SMT_Mask_Width - smtMaskWidth = levelShift; - wasThreadReported = 1; - break; - case 2: //level type is Core, so levelShift is the CorePlusSMT_Mask_Width - corePlusSMTMaskWidth = levelShift; - wasCoreReported = 1; - break; - default: - break; - } - subleaf++; - } while (1); + std::cerr << "ERROR: Major problem? No leaf 0 under cpuid function 11.\n"; + return false; + } + + int subleaf = 0; std::vector topologyDomains; if (max_cpuid >= 0x1F) @@ -1209,42 +1175,6 @@ bool PCM::discoverSystemTopology() } } - if (wasThreadReported && wasCoreReported) - { - coreMaskWidth = corePlusSMTMaskWidth - smtMaskWidth; - } - else if (!wasCoreReported && wasThreadReported) - { - coreMaskWidth = smtMaskWidth; - } - else - { - std::cerr << "ERROR: Major problem? No leaf 0 under cpuid function 11.\n"; - return false; - } - - (void) coreMaskWidth; // to suppress warnings on MacOS (unused vars) - - uint32 l2CacheMaskShift = 0; -#ifdef PCM_DEBUG_TOPOLOGY - uint32 threadsSharingL2; -#endif - uint32 l2CacheMaskWidth; - - pcm_cpuid(0x4, 2, cpuid_args); // get ID for L2 cache - l2CacheMaskWidth = 1 + extract_bits_ui(cpuid_args.array[0],14,25); // number of APIC IDs sharing L2 cache -#ifdef PCM_DEBUG_TOPOLOGY - threadsSharingL2 = l2CacheMaskWidth; -#endif - for( ; l2CacheMaskWidth > 1; l2CacheMaskWidth >>= 1) - { - l2CacheMaskShift++; - } -#ifdef PCM_DEBUG_TOPOLOGY - std::cerr << "DEBUG: Number of threads sharing L2 cache = " << threadsSharingL2 - << " [the most significant bit = " << l2CacheMaskShift << "]\n"; -#endif - #ifndef __APPLE__ auto populateEntry = [&topologyDomainMap,&smtMaskWidth, &coreMaskWidth, &l2CacheMaskShift](TopologyEntry& entry) { @@ -1285,11 +1215,7 @@ bool PCM::discoverSystemTopology() } else { - const int apic_id = getAPICID(0xb); - entry.thread_id = smtMaskWidth ? extract_bits_ui(apic_id, 0, smtMaskWidth - 1) : 0; - entry.core_id = (smtMaskWidth + coreMaskWidth) ? extract_bits_ui(apic_id, smtMaskWidth, smtMaskWidth + coreMaskWidth - 1) : 0; - entry.socket = extract_bits_ui(apic_id, smtMaskWidth + coreMaskWidth, 31); - entry.tile_id = extract_bits_ui(apic_id, l2CacheMaskShift, 31); + fillEntry(entry, smtMaskWidth, coreMaskWidth, l2CacheMaskShift, getAPICID(0xb)); } }; #endif diff --git a/src/topologyentry.h b/src/topologyentry.h index 55647c3e..c5922fc6 100644 --- a/src/topologyentry.h +++ b/src/topologyentry.h @@ -69,5 +69,87 @@ struct PCM_API TopologyEntry // describes a core } }; +inline void fillEntry(TopologyEntry & entry, const uint32 & smtMaskWidth, const uint32 & coreMaskWidth, const uint32 & l2CacheMaskShift, const int apic_id) +{ + entry.thread_id = smtMaskWidth ? extract_bits_ui(apic_id, 0, smtMaskWidth - 1) : 0; + entry.core_id = (smtMaskWidth + coreMaskWidth) ? extract_bits_ui(apic_id, smtMaskWidth, smtMaskWidth + coreMaskWidth - 1) : 0; + entry.socket = extract_bits_ui(apic_id, smtMaskWidth + coreMaskWidth, 31); + entry.tile_id = extract_bits_ui(apic_id, l2CacheMaskShift, 31); +} + +inline bool initCoreMasks(uint32 & smtMaskWidth, uint32 & coreMaskWidth, uint32 & l2CacheMaskShift) +{ + // init constants for CPU topology leaf 0xB + // adapted from Topology Enumeration Reference code for Intel 64 Architecture + // https://software.intel.com/en-us/articles/intel-64-architecture-processor-topology-enumeration + int wasCoreReported = 0, wasThreadReported = 0; + PCM_CPUID_INFO cpuid_args; + if (true) + { + uint32 corePlusSMTMaskWidth = 0; + int subleaf = 0, levelType, levelShift; + do + { + pcm_cpuid(0xb, subleaf, cpuid_args); + if (cpuid_args.array[1] == 0) + { // if EBX ==0 then this subleaf is not valid, we can exit the loop + break; + } + levelType = extract_bits_ui(cpuid_args.array[2], 8, 15); + levelShift = extract_bits_ui(cpuid_args.array[0], 0, 4); + switch (levelType) + { + case 1: //level type is SMT, so levelShift is the SMT_Mask_Width + smtMaskWidth = levelShift; + wasThreadReported = 1; + break; + case 2: //level type is Core, so levelShift is the CorePlusSMT_Mask_Width + corePlusSMTMaskWidth = levelShift; + wasCoreReported = 1; + break; + default: + break; + } + subleaf++; + } while (1); + + if (wasThreadReported && wasCoreReported) + { + coreMaskWidth = corePlusSMTMaskWidth - smtMaskWidth; + } + else if (!wasCoreReported && wasThreadReported) + { + coreMaskWidth = smtMaskWidth; + } + else + { + std::cerr << "ERROR: Major problem? No leaf 0 under cpuid function 11.\n"; + return false; + } + + (void) coreMaskWidth; // to suppress warnings on MacOS (unused vars) + + #ifdef PCM_DEBUG_TOPOLOGY + uint32 threadsSharingL2; + #endif + uint32 l2CacheMaskWidth; + + pcm_cpuid(0x4, 2, cpuid_args); // get ID for L2 cache + l2CacheMaskWidth = 1 + extract_bits_ui(cpuid_args.array[0],14,25); // number of APIC IDs sharing L2 cache + #ifdef PCM_DEBUG_TOPOLOGY + threadsSharingL2 = l2CacheMaskWidth; + #endif + for( ; l2CacheMaskWidth > 1; l2CacheMaskWidth >>= 1) + { + l2CacheMaskShift++; + } + #ifdef PCM_DEBUG_TOPOLOGY + std::cerr << "DEBUG: Number of threads sharing L2 cache = " << threadsSharingL2 + << " [the most significant bit = " << l2CacheMaskShift << "]\n"; + #endif + } + return true; +} + } diff --git a/src/types.h b/src/types.h index ba70c223..8cb607f6 100644 --- a/src/types.h +++ b/src/types.h @@ -20,6 +20,7 @@ #include #include #include +#include #ifdef _MSC_VER #include @@ -1434,6 +1435,112 @@ struct MCFGHeader #endif // #ifndef KERNEL + +inline uint32 build_bit_ui(uint32 beg, uint32 end) +{ + assert(end <= 31); + uint32 myll = 0; + if (end == 31) + { + myll = (uint32)(-1); + } + else + { + myll = (1 << (end + 1)) - 1; + } + myll = myll >> beg; + return myll; +} + +inline uint32 extract_bits_ui(uint32 myin, uint32 beg, uint32 end) +{ + uint32 myll = 0; + uint32 beg1, end1; + + // Let the user reverse the order of beg & end. + if (beg <= end) + { + beg1 = beg; + end1 = end; + } + else + { + beg1 = end; + end1 = beg; + } + myll = myin >> beg1; + myll = myll & build_bit_ui(beg1, end1); + return myll; +} + +inline uint64 build_bit(uint32 beg, uint32 end) +{ + uint64 myll = 0; + if (end > 63) + { + end = 63; + } + if (end == 63) + { + myll = static_cast(-1); + } + else + { + myll = (1LL << (end + 1)) - 1; + } + myll = myll >> beg; + return myll; +} + +inline uint64 extract_bits(uint64 myin, uint32 beg, uint32 end) +{ + uint64 myll = 0; + uint32 beg1, end1; + + // Let the user reverse the order of beg & end. + if (beg <= end) + { + beg1 = beg; + end1 = end; + } + else + { + beg1 = end; + end1 = beg; + } + myll = myin >> beg1; + myll = myll & build_bit(beg1, end1); + return myll; +} + +union PCM_CPUID_INFO +{ + int array[4]; + struct { unsigned int eax, ebx, ecx, edx; } reg; +}; + +inline void pcm_cpuid(int leaf, PCM_CPUID_INFO& info) +{ +#ifdef _MSC_VER + // version for Windows + __cpuid(info.array, leaf); +#else + __asm__ __volatile__("cpuid" : \ + "=a" (info.reg.eax), "=b" (info.reg.ebx), "=c" (info.reg.ecx), "=d" (info.reg.edx) : "a" (leaf)); +#endif +} + +/* Adding the new version of cpuid with leaf and subleaf as an input */ +inline void pcm_cpuid(const unsigned leaf, const unsigned subleaf, PCM_CPUID_INFO & info) +{ + #ifdef _MSC_VER + __cpuidex(info.array, leaf, subleaf); + #else + __asm__ __volatile__ ("cpuid" : \ + "=a" (info.reg.eax), "=b" (info.reg.ebx), "=c" (info.reg.ecx), "=d" (info.reg.edx) : "a" (leaf), "c" (subleaf)); + #endif +} + //IDX accel device/func number(PCIe). //The device/function number from SPR register guide. #define SPR_IDX_IAA_REGISTER_DEV_ADDR (2) diff --git a/src/utils.h b/src/utils.h index f80478df..06ebd823 100644 --- a/src/utils.h +++ b/src/utils.h @@ -436,23 +436,6 @@ bool match(const std::string& subtoken, const std::string& sname, uint64* result uint64 read_number(const char* str); -union PCM_CPUID_INFO -{ - int array[4]; - struct { unsigned int eax, ebx, ecx, edx; } reg; -}; - -inline void pcm_cpuid(int leaf, PCM_CPUID_INFO& info) -{ -#ifdef _MSC_VER - // version for Windows - __cpuid(info.array, leaf); -#else - __asm__ __volatile__("cpuid" : \ - "=a" (info.reg.eax), "=b" (info.reg.ebx), "=c" (info.reg.ecx), "=d" (info.reg.edx) : "a" (leaf)); -#endif -} - inline void clear_screen() { #ifdef _MSC_VER system("cls"); @@ -461,83 +444,6 @@ inline void clear_screen() { #endif } -inline uint32 build_bit_ui(uint32 beg, uint32 end) -{ - assert(end <= 31); - uint32 myll = 0; - if (end == 31) - { - myll = (uint32)(-1); - } - else - { - myll = (1 << (end + 1)) - 1; - } - myll = myll >> beg; - return myll; -} - -inline uint32 extract_bits_ui(uint32 myin, uint32 beg, uint32 end) -{ - uint32 myll = 0; - uint32 beg1, end1; - - // Let the user reverse the order of beg & end. - if (beg <= end) - { - beg1 = beg; - end1 = end; - } - else - { - beg1 = end; - end1 = beg; - } - myll = myin >> beg1; - myll = myll & build_bit_ui(beg1, end1); - return myll; -} - -inline uint64 build_bit(uint32 beg, uint32 end) -{ - uint64 myll = 0; - if (end > 63) - { - end = 63; - } - if (end == 63) - { - myll = static_cast(-1); - } - else - { - myll = (1LL << (end + 1)) - 1; - } - myll = myll >> beg; - return myll; -} - -inline uint64 extract_bits(uint64 myin, uint32 beg, uint32 end) -{ - uint64 myll = 0; - uint32 beg1, end1; - - // Let the user reverse the order of beg & end. - if (beg <= end) - { - beg1 = beg; - end1 = end; - } - else - { - beg1 = end; - end1 = beg; - } - myll = myin >> beg1; - myll = myll & build_bit(beg1, end1); - return myll; -} - #ifdef _MSC_VER #define PCM_MSR_DRV_NAME TEXT("\\\\.\\RDMSR")