Skip to content

Commit

Permalink
factor out cpuid 0xb topology code and use it in OSX
Browse files Browse the repository at this point in the history
  • Loading branch information
opcm committed Dec 13, 2023
1 parent 480abf3 commit 9c82bf6
Show file tree
Hide file tree
Showing 5 changed files with 208 additions and 185 deletions.
18 changes: 10 additions & 8 deletions src/MacMSRDriver/PcmMsr/PcmMsr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,6 @@ PcmMsrDriverClassName *g_pci_driver = NULL;
asm volatile ("wrmsr" : : "c" (msr), "a" (lo), "d" (hi))
#define rdmsr(msr,lo,hi) \
asm volatile ("\trdmsr\n" : "=a" (lo), "=d" (hi) : "c" (msr))
#define cpuid(func1, func2, a, b, c, d) \
asm volatile ("cpuid" : "=a" (a), "=b" (b), "=c" (c), "=d" (d) : "a" (func1), "c" (func2));

extern "C" {
extern void mp_rendezvous_no_intrs(void (*func)(void *),
Expand Down Expand Up @@ -59,13 +57,17 @@ void cpuWriteMSR(void* pIDatas){
void cpuGetTopoData(void* pTopos){
TopologyEntry* entries = (TopologyEntry*)pTopos;
int cpu = cpu_number();
int info[4];
entries[cpu].os_id = cpu;
cpuid(0xB, 1, info[0], info[1], info[2], info[3]);
entries[cpu].socket = info[3] >> info[0] & 0xF;

cpuid(0xB, 0, info[0], info[1], info[2], info[3]);
entries[cpu].core_id = info[3] >> info[0] & 0xF;
TopologyEntry & entry = entries[cpu];
entry.os_id = cpu;

uint32 smtMaskWidth = 0;
uint32 coreMaskWidth = 0;
uint32 l2CacheMaskShift = 0;
initCoreMasks(smtMaskWidth, coreMaskWidth, l2CacheMaskShift);
PCM_CPUID_INFO cpuid_args;
pcm_cpuid(0xb, 0x0, cpuid_args);
fillEntry(entry, smtMaskWidth, coreMaskWidth, l2CacheMaskShift, cpuid_args.array[3]);
}

OSDefineMetaClassAndStructors(com_intel_driver_PcmMsr, IOService)
Expand Down
92 changes: 9 additions & 83 deletions src/cpucounters.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -339,17 +339,6 @@ void pcm_cpuid_bsd(int leaf, PCM_CPUID_INFO& info, int core)
}
#endif

/* Adding the new version of cpuid with leaf and subleaf as an input */
void pcm_cpuid(const unsigned leaf, const unsigned subleaf, PCM_CPUID_INFO & info)
{
#ifdef _MSC_VER
__cpuidex(info.array, leaf, subleaf);
#else
__asm__ __volatile__ ("cpuid" : \
"=a" (info.reg.eax), "=b" (info.reg.ebx), "=c" (info.reg.ecx), "=d" (info.reg.edx) : "a" (leaf), "c" (subleaf));
#endif
}

#ifdef __linux__
bool isNMIWatchdogEnabled(const bool silent);
bool keepNMIWatchdogEnabled();
Expand Down Expand Up @@ -1121,16 +1110,9 @@ bool PCM::discoverSystemTopology()
socketIdMap_type socketIdMap;

PCM_CPUID_INFO cpuid_args;
// init constants for CPU topology leaf 0xB
// adapted from Topology Enumeration Reference code for Intel 64 Architecture
// https://software.intel.com/en-us/articles/intel-64-architecture-processor-topology-enumeration
int wasCoreReported = 0, wasThreadReported = 0;
int subleaf = 0, levelType, levelShift;
//uint32 coreSelectMask = 0, smtSelectMask = 0;
uint32 smtMaskWidth = 0;
//uint32 pkgSelectMask = (-1), pkgSelectMaskShift = 0;
uint32 corePlusSMTMaskWidth = 0;
uint32 coreMaskWidth = 0;
uint32 l2CacheMaskShift = 0;

struct domain
{
Expand All @@ -1140,30 +1122,14 @@ bool PCM::discoverSystemTopology()
std::unordered_map<int, domain> topologyDomainMap;
{
TemporalThreadAffinity aff0(0);
do

if (initCoreMasks(smtMaskWidth, coreMaskWidth, l2CacheMaskShift) == false)
{
pcm_cpuid(0xb, subleaf, cpuid_args);
if (cpuid_args.array[1] == 0)
{ // if EBX ==0 then this subleaf is not valid, we can exit the loop
break;
}
levelType = extract_bits_ui(cpuid_args.array[2], 8, 15);
levelShift = extract_bits_ui(cpuid_args.array[0], 0, 4);
switch (levelType)
{
case 1: //level type is SMT, so levelShift is the SMT_Mask_Width
smtMaskWidth = levelShift;
wasThreadReported = 1;
break;
case 2: //level type is Core, so levelShift is the CorePlusSMT_Mask_Width
corePlusSMTMaskWidth = levelShift;
wasCoreReported = 1;
break;
default:
break;
}
subleaf++;
} while (1);
std::cerr << "ERROR: Major problem? No leaf 0 under cpuid function 11.\n";
return false;
}

int subleaf = 0;

std::vector<domain> topologyDomains;
if (max_cpuid >= 0x1F)
Expand Down Expand Up @@ -1209,42 +1175,6 @@ bool PCM::discoverSystemTopology()
}
}

if (wasThreadReported && wasCoreReported)
{
coreMaskWidth = corePlusSMTMaskWidth - smtMaskWidth;
}
else if (!wasCoreReported && wasThreadReported)
{
coreMaskWidth = smtMaskWidth;
}
else
{
std::cerr << "ERROR: Major problem? No leaf 0 under cpuid function 11.\n";
return false;
}

(void) coreMaskWidth; // to suppress warnings on MacOS (unused vars)

uint32 l2CacheMaskShift = 0;
#ifdef PCM_DEBUG_TOPOLOGY
uint32 threadsSharingL2;
#endif
uint32 l2CacheMaskWidth;

pcm_cpuid(0x4, 2, cpuid_args); // get ID for L2 cache
l2CacheMaskWidth = 1 + extract_bits_ui(cpuid_args.array[0],14,25); // number of APIC IDs sharing L2 cache
#ifdef PCM_DEBUG_TOPOLOGY
threadsSharingL2 = l2CacheMaskWidth;
#endif
for( ; l2CacheMaskWidth > 1; l2CacheMaskWidth >>= 1)
{
l2CacheMaskShift++;
}
#ifdef PCM_DEBUG_TOPOLOGY
std::cerr << "DEBUG: Number of threads sharing L2 cache = " << threadsSharingL2
<< " [the most significant bit = " << l2CacheMaskShift << "]\n";
#endif

#ifndef __APPLE__
auto populateEntry = [&topologyDomainMap,&smtMaskWidth, &coreMaskWidth, &l2CacheMaskShift](TopologyEntry& entry)
{
Expand Down Expand Up @@ -1285,11 +1215,7 @@ bool PCM::discoverSystemTopology()
}
else
{
const int apic_id = getAPICID(0xb);
entry.thread_id = smtMaskWidth ? extract_bits_ui(apic_id, 0, smtMaskWidth - 1) : 0;
entry.core_id = (smtMaskWidth + coreMaskWidth) ? extract_bits_ui(apic_id, smtMaskWidth, smtMaskWidth + coreMaskWidth - 1) : 0;
entry.socket = extract_bits_ui(apic_id, smtMaskWidth + coreMaskWidth, 31);
entry.tile_id = extract_bits_ui(apic_id, l2CacheMaskShift, 31);
fillEntry(entry, smtMaskWidth, coreMaskWidth, l2CacheMaskShift, getAPICID(0xb));
}
};
#endif
Expand Down
82 changes: 82 additions & 0 deletions src/topologyentry.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,5 +69,87 @@ struct PCM_API TopologyEntry // describes a core
}
};

inline void fillEntry(TopologyEntry & entry, const uint32 & smtMaskWidth, const uint32 & coreMaskWidth, const uint32 & l2CacheMaskShift, const int apic_id)
{
entry.thread_id = smtMaskWidth ? extract_bits_ui(apic_id, 0, smtMaskWidth - 1) : 0;
entry.core_id = (smtMaskWidth + coreMaskWidth) ? extract_bits_ui(apic_id, smtMaskWidth, smtMaskWidth + coreMaskWidth - 1) : 0;
entry.socket = extract_bits_ui(apic_id, smtMaskWidth + coreMaskWidth, 31);
entry.tile_id = extract_bits_ui(apic_id, l2CacheMaskShift, 31);
}

inline bool initCoreMasks(uint32 & smtMaskWidth, uint32 & coreMaskWidth, uint32 & l2CacheMaskShift)
{
// init constants for CPU topology leaf 0xB
// adapted from Topology Enumeration Reference code for Intel 64 Architecture
// https://software.intel.com/en-us/articles/intel-64-architecture-processor-topology-enumeration
int wasCoreReported = 0, wasThreadReported = 0;
PCM_CPUID_INFO cpuid_args;
if (true)
{
uint32 corePlusSMTMaskWidth = 0;
int subleaf = 0, levelType, levelShift;
do
{
pcm_cpuid(0xb, subleaf, cpuid_args);
if (cpuid_args.array[1] == 0)
{ // if EBX ==0 then this subleaf is not valid, we can exit the loop
break;
}
levelType = extract_bits_ui(cpuid_args.array[2], 8, 15);
levelShift = extract_bits_ui(cpuid_args.array[0], 0, 4);
switch (levelType)
{
case 1: //level type is SMT, so levelShift is the SMT_Mask_Width
smtMaskWidth = levelShift;
wasThreadReported = 1;
break;
case 2: //level type is Core, so levelShift is the CorePlusSMT_Mask_Width
corePlusSMTMaskWidth = levelShift;
wasCoreReported = 1;
break;
default:
break;
}
subleaf++;
} while (1);

if (wasThreadReported && wasCoreReported)
{
coreMaskWidth = corePlusSMTMaskWidth - smtMaskWidth;
}
else if (!wasCoreReported && wasThreadReported)
{
coreMaskWidth = smtMaskWidth;
}
else
{
std::cerr << "ERROR: Major problem? No leaf 0 under cpuid function 11.\n";
return false;
}

(void) coreMaskWidth; // to suppress warnings on MacOS (unused vars)

#ifdef PCM_DEBUG_TOPOLOGY
uint32 threadsSharingL2;
#endif
uint32 l2CacheMaskWidth;

pcm_cpuid(0x4, 2, cpuid_args); // get ID for L2 cache
l2CacheMaskWidth = 1 + extract_bits_ui(cpuid_args.array[0],14,25); // number of APIC IDs sharing L2 cache
#ifdef PCM_DEBUG_TOPOLOGY
threadsSharingL2 = l2CacheMaskWidth;
#endif
for( ; l2CacheMaskWidth > 1; l2CacheMaskWidth >>= 1)
{
l2CacheMaskShift++;
}
#ifdef PCM_DEBUG_TOPOLOGY
std::cerr << "DEBUG: Number of threads sharing L2 cache = " << threadsSharingL2
<< " [the most significant bit = " << l2CacheMaskShift << "]\n";
#endif
}
return true;
}

}

107 changes: 107 additions & 0 deletions src/types.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include <sstream>
#include <iomanip>
#include <string.h>
#include <assert.h>

#ifdef _MSC_VER
#include <windows.h>
Expand Down Expand Up @@ -1434,6 +1435,112 @@ struct MCFGHeader

#endif // #ifndef KERNEL


inline uint32 build_bit_ui(uint32 beg, uint32 end)
{
assert(end <= 31);
uint32 myll = 0;
if (end == 31)
{
myll = (uint32)(-1);
}
else
{
myll = (1 << (end + 1)) - 1;
}
myll = myll >> beg;
return myll;
}

inline uint32 extract_bits_ui(uint32 myin, uint32 beg, uint32 end)
{
uint32 myll = 0;
uint32 beg1, end1;

// Let the user reverse the order of beg & end.
if (beg <= end)
{
beg1 = beg;
end1 = end;
}
else
{
beg1 = end;
end1 = beg;
}
myll = myin >> beg1;
myll = myll & build_bit_ui(beg1, end1);
return myll;
}

inline uint64 build_bit(uint32 beg, uint32 end)
{
uint64 myll = 0;
if (end > 63)
{
end = 63;
}
if (end == 63)
{
myll = static_cast<uint64>(-1);
}
else
{
myll = (1LL << (end + 1)) - 1;
}
myll = myll >> beg;
return myll;
}

inline uint64 extract_bits(uint64 myin, uint32 beg, uint32 end)
{
uint64 myll = 0;
uint32 beg1, end1;

// Let the user reverse the order of beg & end.
if (beg <= end)
{
beg1 = beg;
end1 = end;
}
else
{
beg1 = end;
end1 = beg;
}
myll = myin >> beg1;
myll = myll & build_bit(beg1, end1);
return myll;
}

union PCM_CPUID_INFO
{
int array[4];
struct { unsigned int eax, ebx, ecx, edx; } reg;
};

inline void pcm_cpuid(int leaf, PCM_CPUID_INFO& info)
{
#ifdef _MSC_VER
// version for Windows
__cpuid(info.array, leaf);
#else
__asm__ __volatile__("cpuid" : \
"=a" (info.reg.eax), "=b" (info.reg.ebx), "=c" (info.reg.ecx), "=d" (info.reg.edx) : "a" (leaf));
#endif
}

/* Adding the new version of cpuid with leaf and subleaf as an input */
inline void pcm_cpuid(const unsigned leaf, const unsigned subleaf, PCM_CPUID_INFO & info)
{
#ifdef _MSC_VER
__cpuidex(info.array, leaf, subleaf);
#else
__asm__ __volatile__ ("cpuid" : \
"=a" (info.reg.eax), "=b" (info.reg.ebx), "=c" (info.reg.ecx), "=d" (info.reg.edx) : "a" (leaf), "c" (subleaf));
#endif
}

//IDX accel device/func number(PCIe).
//The device/function number from SPR register guide.
#define SPR_IDX_IAA_REGISTER_DEV_ADDR (2)
Expand Down
Loading

0 comments on commit 9c82bf6

Please sign in to comment.