From 9cd6183c5ecf823517ef6640831b635bfd3f888e Mon Sep 17 00:00:00 2001
From: Alex Budovski <alexbud@meta.com>
Date: Wed, 22 Mar 2023 19:55:11 -0500
Subject: [PATCH 01/12] Add comments to various files

This is just a preparatory change to improve the documentation in the
code. No functional changes.
---
 src/PEImage.cpp   |  6 ++++++
 src/dwarf2pdb.cpp | 14 ++++++++++++++
 src/readDwarf.cpp |  5 +++++
 src/readDwarf.h   | 13 +++++++++++++
 4 files changed, 38 insertions(+)

diff --git a/src/PEImage.cpp b/src/PEImage.cpp
index 247d514..4b39a3f 100644
--- a/src/PEImage.cpp
+++ b/src/PEImage.cpp
@@ -375,6 +375,7 @@ bool PEImage::_initFromCVDebugDir(IMAGE_DEBUG_DIRECTORY* ddir)
 }
 
 ///////////////////////////////////////////////////////////////////////
+// Used for PE (EXE/DLL) files.
 bool PEImage::initDWARFPtr(bool initDbgDir)
 {
 	dos = DPV<IMAGE_DOS_HEADER> (0);
@@ -410,6 +411,7 @@ bool PEImage::initDWARFPtr(bool initDbgDir)
 	return true;
 }
 
+// Used for COFF objects.
 bool PEImage::initDWARFObject()
 {
 	IMAGE_FILE_HEADER* hdr = DPV<IMAGE_FILE_HEADER> (0);
@@ -466,8 +468,11 @@ void PEImage::initSec(PESection& peSec, int secNo) const
 	peSec.secNo = secNo;
 }
 
+// Initialize all the DWARF sections present in this PE or COFF file.
+// Common to both object and image modules.
 void PEImage::initDWARFSegments()
 {
+	// Scan all the PE sections in this image.
 	for(int s = 0; s < nsec; s++)
 	{
 		const char* name = (const char*) sec[s].Name;
@@ -477,6 +482,7 @@ void PEImage::initDWARFSegments()
 			name = strtable + off;
 		}
 
+		// Is 'name' one of the DWARF sections?
 		for (const SectionDescriptor *sec_desc : sec_descriptors) {
 			if (!strcmp(name, sec_desc->name)) {
 				PESection& peSec = this->*(sec_desc->pSec);
diff --git a/src/dwarf2pdb.cpp b/src/dwarf2pdb.cpp
index 3019856..f759142 100644
--- a/src/dwarf2pdb.cpp
+++ b/src/dwarf2pdb.cpp
@@ -953,6 +953,7 @@ int CV2PDB::addDWARFFields(DWARF_InfoData& structid, DIECursor cursor, int baseo
 	return nfields;
 }
 
+// Add a class/struct/union to the database.
 int CV2PDB::addDWARFStructure(DWARF_InfoData& structid, DIECursor cursor)
 {
 	//printf("Adding struct %s, entryoff %d, abbrev %d\n", structid.name, structid.entryOff, structid.abbrev);
@@ -1359,6 +1360,8 @@ int CV2PDB::getDWARFTypeSize(const DIECursor& parent, byte* typePtr)
 	return 0;
 }
 
+// Scan the .debug_info section and allocate type IDs for each unique type and
+// create a mapping to look them up by their address.
 bool CV2PDB::mapTypes()
 {
 	int typeID = nextUserType;
@@ -1367,13 +1370,18 @@ bool CV2PDB::mapTypes()
 	if (debug & DbgBasic)
 		fprintf(stderr, "%s:%d: mapTypes()\n", __FUNCTION__, __LINE__);
 
+	// Scan each compilation unit in '.debug_info'.
 	while (off < img.debug_info.length)
 	{
 		DWARF_CompilationUnitInfo cu{};
+
+		// Read the next compilation unit from 'off' and update it to the next
+		// CU.
 		byte* ptr = cu.read(debug, img, &off);
 		if (!ptr)
 			continue;
 
+		// We only support regular full 'DW_UT_compile' compilation units.
 		if (cu.unit_type != DW_UT_compile) {
 			if (debug & DbgDwarfCompilationUnit)
 				fprintf(stderr, "%s:%d: skipping compilation unit offs=%x, unit_type=%d\n", __FUNCTION__, __LINE__,
@@ -1418,6 +1426,7 @@ bool CV2PDB::mapTypes()
 				case DW_TAG_mutable_type: // withdrawn
 				case DW_TAG_shared_type:
 				case DW_TAG_rvalue_reference_type:
+					// Reserve a typeID and store it in the map for quick lookup.
 					mapOffsetToType.insert(std::make_pair(id.entryPtr, typeID));
 					typeID++;
 			}
@@ -1444,9 +1453,14 @@ bool CV2PDB::createTypes()
 		fprintf(stderr, "%s:%d: createTypes()\n", __FUNCTION__, __LINE__);
 
 	unsigned long off = 0;
+
+	// Scan each compilation unit in '.debug_info'.
 	while (off < img.debug_info.length)
 	{
 		DWARF_CompilationUnitInfo cu{};
+
+		// Read the next compilation unit from 'off' and update it to the next
+		// CU, returning the pointer just beyond the header to the first DIE.
 		byte* ptr = cu.read(debug, img, &off);
 		if (!ptr)
 			continue;
diff --git a/src/readDwarf.cpp b/src/readDwarf.cpp
index b77a1d0..1058c60 100644
--- a/src/readDwarf.cpp
+++ b/src/readDwarf.cpp
@@ -34,6 +34,11 @@ void DIECursor::setContext(PEImage* img_, DebugLevel debug_)
 	debug = debug_;
 }
 
+// Read one compilation unit from `img`'s .debug_info section, starting at
+// offset `*off`, updating it in the process to the start of the next one in the
+// section.
+// Returns a pointer to the first DIE, skipping past the CU header, or NULL
+// on failure.
 byte* DWARF_CompilationUnitInfo::read(DebugLevel debug, const PEImage& img, unsigned long *off)
 {
 	byte* ptr = img.debug_info.byteAt(*off);
diff --git a/src/readDwarf.h b/src/readDwarf.h
index 5e1db99..56e89a3 100644
--- a/src/readDwarf.h
+++ b/src/readDwarf.h
@@ -180,24 +180,37 @@ struct DWARF_FileName
 	}
 };
 
+// In-memory representation of a DIE (Debugging Info Entry).
 struct DWARF_InfoData
 {
+	// Pointer into the mapped image section where this DIE is located.
 	byte* entryPtr;
+
+	// Code to find the abbrev entry for this DIE, or 0 if it a sentinel marking
+	// the end of a sibling chain.
 	int code;
+
+	// Pointer to the abbreviation table entry that corresponds to this DIE.
 	byte* abbrev;
 	int tag;
+
+	// Does this DIE have children?
 	int hasChild;
 
 	const char* name;
 	const char* linkage_name;
 	const char* dir;
 	unsigned long byte_size;
+
+	// Pointer to the sibling DIE in the mapped image.
 	byte* sibling;
 	unsigned long encoding;
 	unsigned long pclo;
 	unsigned long pchi;
 	unsigned long ranges; // -1u when attribute is not present
 	unsigned long pcentry;
+
+	// Pointer to the DW_AT_type DIE describing the type of this DIE.
 	byte* type;
 	byte* containing_type;
 	byte* specification;

From ba89a5ff9de1d128a3af8cb965345501160a11a6 Mon Sep 17 00:00:00 2001
From: Alex Budovski <alexbud@meta.com>
Date: Wed, 22 Mar 2023 19:56:00 -0500
Subject: [PATCH 02/12] Upgrade VCXProj to VS 2022

---
 src/dviewhelper/dviewhelper.vcxproj | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/dviewhelper/dviewhelper.vcxproj b/src/dviewhelper/dviewhelper.vcxproj
index 8f9cb2a..db8fdf4 100644
--- a/src/dviewhelper/dviewhelper.vcxproj
+++ b/src/dviewhelper/dviewhelper.vcxproj
@@ -13,24 +13,24 @@
   <PropertyGroup Label="Globals">
     <ProjectGuid>{E4424774-A7A0-4502-8626-2723904D70EA}</ProjectGuid>
     <Keyword>Win32Proj</Keyword>
-	<!-- guess the installed Windows SDK -->
+    <!-- guess the installed Windows SDK -->
     <WindowsSdkInstallFolder_10 Condition="'$(WindowsSdkInstallFolder_10)' == ''">$(Registry:HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Microsoft SDKs\Windows\v10.0@InstallationFolder)</WindowsSdkInstallFolder_10>
     <WindowsSdkInstallFolder_10 Condition="'$(WindowsSdkInstallFolder_10)' == ''">$(Registry:HKEY_LOCAL_MACHINE\SOFTWARE\Wow6432Node\Microsoft\Microsoft SDKs\Windows\v10.0@InstallationFolder)</WindowsSdkInstallFolder_10>
     <WindowsTargetPlatformVersion_10 Condition="'$(WindowsTargetPlatformVersion_10)' == ''">$(Registry:HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Microsoft SDKs\Windows\v10.0@ProductVersion)</WindowsTargetPlatformVersion_10>
     <WindowsTargetPlatformVersion_10 Condition="'$(WindowsTargetPlatformVersion_10)' == ''">$(Registry:HKEY_LOCAL_MACHINE\SOFTWARE\Wow6432Node\Microsoft\Microsoft SDKs\Windows\v10.0@ProductVersion)</WindowsTargetPlatformVersion_10>
     <!-- Sometimes the version in the registry has to .0 suffix, and sometimes it doesn't. Check and add it -->
     <WindowsTargetPlatformVersion_10 Condition="'$(WindowsTargetPlatformVersion_10)' != '' and !$(WindowsTargetPlatformVersion_10.EndsWith('.0'))">$(WindowsTargetPlatformVersion_10).0</WindowsTargetPlatformVersion_10>
-	<WindowsTargetPlatformVersion Condition="'$(WindowsTargetPlatformVersion_10)' != ''">$(WindowsTargetPlatformVersion_10)</WindowsTargetPlatformVersion>
+    <WindowsTargetPlatformVersion Condition="'$(WindowsTargetPlatformVersion_10)' != ''">10.0</WindowsTargetPlatformVersion>
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
     <ConfigurationType>DynamicLibrary</ConfigurationType>
-    <PlatformToolset>v120</PlatformToolset>
+    <PlatformToolset>v143</PlatformToolset>
     <CharacterSet>MultiByte</CharacterSet>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
     <ConfigurationType>DynamicLibrary</ConfigurationType>
-    <PlatformToolset>v120</PlatformToolset>
+    <PlatformToolset>v143</PlatformToolset>
     <CharacterSet>MultiByte</CharacterSet>
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />

From 2e4c1bf97b1491385c37432aef58b15943eb118a Mon Sep 17 00:00:00 2001
From: Alex Budovski <alexbud@meta.com>
Date: Wed, 22 Mar 2023 20:36:35 -0500
Subject: [PATCH 03/12] Add constants for BASE_{USER/DWARF}_TYPE

We had a lot of magic numbers throughout the code. Introducing two
constants to make it clearer what they refer to. No functional changes.
---
 src/cv2pdb.cpp    | 61 +++++++++++++++++++++++++----------------------
 src/cv2pdb.h      |  8 +++++--
 src/dwarf2pdb.cpp |  2 +-
 3 files changed, 40 insertions(+), 31 deletions(-)

diff --git a/src/cv2pdb.cpp b/src/cv2pdb.cpp
index 696cb74..1ba7a94 100644
--- a/src/cv2pdb.cpp
+++ b/src/cv2pdb.cpp
@@ -36,8 +36,6 @@ CV2PDB::CV2PDB(PEImage& image, DebugLevel debug_)
 	memset(typedefs, 0, sizeof(typedefs));
 	memset(translatedTypedefs, 0, sizeof(translatedTypedefs));
 	cntTypedefs = 0;
-	nextUserType = 0x1000;
-	nextDwarfType = 0x1000;
 
 	addClassTypeEnum = true;
 	addObjectViewHelper = true;
@@ -899,25 +897,26 @@ void CV2PDB::checkGlobalTypeAlloc(int size, int add)
 	}
 }
 
+// CV-only. Returns NULL for DWARF-based images.
 const codeview_type* CV2PDB::getTypeData(int type)
 {
-	if (!globalTypeHeader)
+	if (!globalTypeHeader) // NULL for DWARF.
 		return 0;
-	if (type < 0x1000 || type >= (int) (0x1000 + globalTypeHeader->cTypes + nextUserType))
+	if (type < BASE_USER_TYPE || type >= (int) (BASE_USER_TYPE + globalTypeHeader->cTypes + nextUserType))
 		return 0;
-	if (type >= (int) (0x1000 + globalTypeHeader->cTypes))
+	if (type >= (int) (BASE_USER_TYPE + globalTypeHeader->cTypes))
 		return getUserTypeData(type);
 
 	DWORD* offset = (DWORD*)(globalTypeHeader + 1);
 	BYTE* typeData = (BYTE*)(offset + globalTypeHeader->cTypes);
 
-	return (codeview_type*)(typeData + offset[type - 0x1000]);
+	return (codeview_type*)(typeData + offset[type - BASE_USER_TYPE]);
 }
 
 const codeview_type* CV2PDB::getUserTypeData(int type)
 {
-	type -= 0x1000 + globalTypeHeader->cTypes;
-	if (type < 0 || type >= nextUserType - 0x1000)
+	type -= BASE_USER_TYPE + globalTypeHeader->cTypes;
+	if (type < 0 || type >= nextUserType - BASE_USER_TYPE)
 		return 0;
 
 	int pos = 0;
@@ -933,8 +932,8 @@ const codeview_type* CV2PDB::getUserTypeData(int type)
 
 const codeview_type* CV2PDB::getConvertedTypeData(int type)
 {
-	type -= 0x1000;
-	if (type < 0 || type >= nextUserType - 0x1000)
+	type -= BASE_USER_TYPE;
+	if (type < 0 || type >= nextUserType - BASE_USER_TYPE)
 		return 0;
 
 	int pos = typePrefix;
@@ -1013,7 +1012,7 @@ int CV2PDB::findMemberFunctionType(codeview_symbol* lastGProcSym, int thisPtrTyp
 				type->mfunction_v1.call == proctype->procedure_v1.call &&
 				type->mfunction_v1.rvtype == proctype->procedure_v1.rvtype)
 			{
-				return t + 0x1000;
+				return t + BASE_USER_TYPE;
 			}
 		}
 	}
@@ -1123,7 +1122,7 @@ int CV2PDB::sizeofBasicType(int type)
 
 int CV2PDB::sizeofType(int type)
 {
-	if (type < 0x1000)
+	if (type < BASE_USER_TYPE)
 		return sizeofBasicType(type);
 
 	const codeview_type* cvtype = getTypeData(type);
@@ -1144,11 +1143,14 @@ int CV2PDB::sizeofType(int type)
 // to be used when writing new type only to avoid double translation
 int CV2PDB::translateType(int type)
 {
-	if (type < 0x1000)
+	if (type < BASE_USER_TYPE)
 	{
+		// Check D lang typedefs.
 		for(int i = 0; i < cntTypedefs; i++)
 			if(type == typedefs[i])
 				return translatedTypedefs[i];
+
+		// Return original type.
 		return type;
 	}
 
@@ -1279,7 +1281,7 @@ bool CV2PDB::nameOfModifierType(int type, int mod, char* name, int maxlen)
 
 bool CV2PDB::nameOfType(int type, char* name, int maxlen)
 {
-	if(type < 0x1000)
+	if(type < BASE_USER_TYPE)
 		return nameOfBasicType(type, name, maxlen);
 
 	const codeview_type* ptype = getTypeData(type);
@@ -2032,7 +2034,7 @@ void CV2PDB::ensureUDT(int type, const codeview_type* cvtype)
 	if (getStructProperty(cvtype) & kPropIncomplete)
 		cvtype = findCompleteClassType(cvtype, &type);
 
-	if(findUdtSymbol(type + 0x1000))
+	if(findUdtSymbol(type + BASE_USER_TYPE))
 		return;
 
 	char name[kMaxNameLen];
@@ -2054,9 +2056,9 @@ void CV2PDB::ensureUDT(int type, const codeview_type* cvtype)
 		int viewHelperType = nextUserType++;
 		// addUdtSymbol(viewHelperType, "object_viewhelper");
 		addUdtSymbol(viewHelperType, name);
+	} else {
+		addUdtSymbol(type + BASE_USER_TYPE, name);
 	}
-	else
-		addUdtSymbol(type + 0x1000, name);
 }
 
 int CV2PDB::createEmptyFieldListType()
@@ -2135,6 +2137,7 @@ void CV2PDB::appendTypedefs()
 	appendComplex(0x52, 0x42, 10, "creal");
 }
 
+// CV-only.
 bool CV2PDB::initGlobalTypes()
 {
 	int object_derived_type = 0;
@@ -2160,7 +2163,7 @@ bool CV2PDB::initGlobalTypes()
 			*(DWORD*) globalTypes = 4;
 			cbGlobalTypes = typePrefix;
 
-			nextUserType = globalTypeHeader->cTypes + 0x1000;
+			nextUserType = globalTypeHeader->cTypes + BASE_USER_TYPE;
 
 			appendTypedefs();
 			if(Dversion > 0)
@@ -2277,7 +2280,7 @@ bool CV2PDB::initGlobalTypes()
 						if(const codeview_type* td = getTypeData(type->struct_v1.fieldlist))
 							if(td->generic.id == LF_FIELDLIST_V1 || td->generic.id == LF_FIELDLIST_V2)
 								dtype->struct_v2.n_element = countFields((const codeview_reftype*)td);
-					dtype->struct_v2.property = fixProperty(t + 0x1000, type->struct_v1.property,
+					dtype->struct_v2.property = fixProperty(t + BASE_USER_TYPE, type->struct_v1.property,
 					                                        type->struct_v1.fieldlist);
 #if REMOVE_LF_DERIVED
 					dtype->struct_v2.derived = 0;
@@ -2308,7 +2311,7 @@ bool CV2PDB::initGlobalTypes()
 					dtype->union_v2.id = v3 ? LF_UNION_V3 : LF_UNION_V2;
 					dtype->union_v2.count = type->union_v1.count;
 					dtype->union_v2.fieldlist = type->struct_v1.fieldlist;
-					dtype->union_v2.property = fixProperty(t + 0x1000, type->struct_v1.property, type->struct_v1.fieldlist);
+					dtype->union_v2.property = fixProperty(t + BASE_USER_TYPE, type->struct_v1.property, type->struct_v1.fieldlist);
 					leaf_len = numeric_leaf(&value, &type->union_v1.un_len);
 					memcpy (&dtype->union_v2.un_len, &type->union_v1.un_len, leaf_len);
 					len = pstrcpy_v(v3, (BYTE*)      &dtype->union_v2.un_len + leaf_len,
@@ -2349,10 +2352,10 @@ bool CV2PDB::initGlobalTypes()
 					dtype->mfunction_v2.rvtype = translateType(type->mfunction_v1.rvtype);
 					clsstype = type->mfunction_v1.class_type;
 					dtype->mfunction_v2.class_type = translateType(clsstype);
-					if (clsstype >= 0x1000 && clsstype < 0x1000 + globalTypeHeader->cTypes)
+					if (clsstype >= BASE_USER_TYPE && clsstype < BASE_USER_TYPE + globalTypeHeader->cTypes)
 					{
 						// fix class_type to point to class, not pointer to class
-						codeview_type* ctype = (codeview_type*)(typeData + offset[clsstype - 0x1000]);
+						codeview_type* ctype = (codeview_type*)(typeData + offset[clsstype - BASE_USER_TYPE]);
 						if (ctype->generic.id == LF_POINTER_V1)
 							dtype->mfunction_v2.class_type = translateType(ctype->pointer_v1.datatype);
 					}
@@ -2370,12 +2373,12 @@ bool CV2PDB::initGlobalTypes()
 					dtype->enumeration_v2.count = type->enumeration_v1.count;
 					dtype->enumeration_v2.type = translateType(type->enumeration_v1.type);
 					dtype->enumeration_v2.fieldlist = type->enumeration_v1.fieldlist;
-					dtype->enumeration_v2.property = fixProperty(t + 0x1000, type->enumeration_v1.property, type->enumeration_v1.fieldlist);
+					dtype->enumeration_v2.property = fixProperty(t + BASE_USER_TYPE, type->enumeration_v1.property, type->enumeration_v1.fieldlist);
 					len = pstrcpy_v (v3, (BYTE*) &dtype->enumeration_v2.p_name, (BYTE*) &type->enumeration_v1.p_name);
 					len += sizeof(dtype->enumeration_v2) - sizeof(dtype->enumeration_v2.p_name);
 					if(dtype->enumeration_v2.fieldlist && v3)
-						if(!findUdtSymbol(t + 0x1000))
-							addUdtSymbol(t + 0x1000, (char*) &dtype->enumeration_v2.p_name);
+						if(!findUdtSymbol(t + BASE_USER_TYPE))
+							addUdtSymbol(t + BASE_USER_TYPE, (char*) &dtype->enumeration_v2.p_name);
 					break;
 
 				case LF_FIELDLIST_V1:
@@ -2392,7 +2395,7 @@ bool CV2PDB::initGlobalTypes()
 					rdtype->derived_v2.id = LF_DERIVED_V2;
 					rdtype->derived_v2.num = rtype->derived_v1.num;
 					for (int i = 0; i < rtype->derived_v1.num; i++)
-						if (rtype->derived_v1.drvdcls[i] < 0x1000) // + globalTypeHeader->cTypes)
+						if (rtype->derived_v1.drvdcls[i] < BASE_USER_TYPE) // + globalTypeHeader->cTypes)
 							rdtype->derived_v2.drvdcls[i] = translateType(rtype->derived_v1.drvdcls[i] + 0xfff);
 						else
 							rdtype->derived_v2.drvdcls[i] = translateType(rtype->derived_v1.drvdcls[i]);
@@ -3166,8 +3169,8 @@ int CV2PDB::copySymbols(BYTE* srcSymbols, int srcSize, BYTE* destSymbols, int de
 					codeview_symbol* dsym = (codeview_symbol*)(destSymbols + destSize);
 					memcpy(dsym, sym, length);
 #endif
-					if (type >= 0x1000 && pointerTypes[type - 0x1000])
-						type = pointerTypes[type - 0x1000];
+					if (type >= BASE_USER_TYPE && pointerTypes[type - BASE_USER_TYPE])
+						type = pointerTypes[type - BASE_USER_TYPE];
 				}
 			}
 			dsym->stack_v2.id = v3 ? S_BPREL_V3 : S_BPREL_V1;
@@ -3279,6 +3282,8 @@ bool isUDTid(int id)
 	return id == S_UDT_V1 || id == S_UDT_V2 || id == S_UDT_V3;
 }
 
+// Find a user-defined type CV symbol.
+// CV-only.
 codeview_symbol* CV2PDB::findUdtSymbol(int type)
 {
 	type = translateType(type);
diff --git a/src/cv2pdb.h b/src/cv2pdb.h
index 52f3455..654470b 100644
--- a/src/cv2pdb.h
+++ b/src/cv2pdb.h
@@ -211,6 +211,7 @@ class CV2PDB : public LastError
 	OMFSegMapDesc* segMapDesc;
 	int* segFrame2Index;
 
+	// CV-only
 	OMFGlobalTypes* globalTypeHeader;
 
 	unsigned char* globalTypes;
@@ -236,8 +237,11 @@ class CV2PDB : public LastError
 	int cbDwarfTypes;
 	int allocDwarfTypes;
 
-	int nextUserType;
-	int nextDwarfType;
+	static constexpr int BASE_USER_TYPE = 0x1000;
+	static constexpr int BASE_DWARF_TYPE = 0x1000;
+
+	int nextUserType = BASE_USER_TYPE;
+	int nextDwarfType = BASE_DWARF_TYPE;
 	int objectType;
 
 	int emptyFieldListType;
diff --git a/src/dwarf2pdb.cpp b/src/dwarf2pdb.cpp
index f759142..33c8af6 100644
--- a/src/dwarf2pdb.cpp
+++ b/src/dwarf2pdb.cpp
@@ -1772,7 +1772,7 @@ bool CV2PDB::addDWARFPublics()
 	mspdb::Mod* mod = globalMod();
 
 	int type = 0;
-	int rc = mod->AddPublic2("public_all", img.text.secNo + 1, 0, 0x1000);
+	int rc = mod->AddPublic2("public_all", img.text.secNo + 1, 0, BASE_DWARF_TYPE);
 	if (rc <= 0)
 		return setError("cannot add public");
 	return true;

From 62f975d2b4030d10a50e140f44f39ede418bcec4 Mon Sep 17 00:00:00 2001
From: Alex Budovski <alexbud@meta.com>
Date: Wed, 22 Mar 2023 20:37:01 -0500
Subject: [PATCH 04/12] DWARF tree for fully-qualified name construction

The Windows debuggers expect PDB symbol names to be fully qualified.
I.e., if a class Foo has a constructor, its name should be emitted as
`Foo::Foo`, not simply `Foo` as is the case today. Linux debuggers like
GDB dynamically reconstruct the symbol tree at runtime each time a
program is debugged. Windows debuggers on the other hand do not, and
expect the name to be fully qualified from the outset. Failing this, the
constructor function `Foo` would have the same name as the class `Foo`
in the PDB, and WinDbg will get confused about what to dump (e.g. using
`dt Foo`) and arbitrarily pick the largest item, which might be the
constructor. Therefore you end up dumping the wrong thing and being
completely unable to inspect the contents of a `Foo` object.

This commit aims to fix that by introducing a DWARF tree during the
conversion process which allows us to efficiently reconstruct such fully
qualified names during the conversion.

A note about DWARF: the DWARF format does not explicitly record the
parent of any given DIE record. It is instead implicit in how the
records are layed out. Any record may have a "has children" flag, and if
it does, then the records following it are its children, terminated by a
special NULL record, popping back up one level of the tree.

The DIECursor already recognized this structure but did not capture it
in memory for later use.

In order to construct fully-qualified names for functions, enums,
classes, etc. (i.e. taking into account namespaces, nesting, etc), we
need a way to efficienctly lookup a node's parent. Thus the DWARF tree
was born.

At a high level, we take advantage of the fact that the DWARF sections
were already scanned in two passes. We hook into the first pass (where
the typeIDs were being reserved) and build the DWARF tree.

Then, in the second pass (where the CV symbols get emitted), we look up
the tree to figure out the correct fully-qualified symbol names.

NOTE: The first phase of this work focuses on subroutines only. Later
work will enable support for structs/classes/enums.

On the subroutine front, I also added a flag to capture whether a DIE is
a "declaration" or definition (based on the DW_AT_declaration
attribute). This is needed to consolidate function decl+defn into one
PDB symbol, as otherwise WinDbg will get confused. This also matches
what the MSVC toolset produces.

A few other related additions:

- Added helper to format a fully qualified function name by looking up
  the tree added in this commit.
- Added helper to print the DWARF tree for debugging purposes and a flag
  to control it.
---
 src/PEImage.h     |  10 +-
 src/cv2pdb.cpp    |   3 +
 src/cv2pdb.h      |  21 +++-
 src/dwarf2pdb.cpp | 252 +++++++++++++++++++++++++++++++++++++++-------
 src/readDwarf.cpp | 202 +++++++++++++++++++++++++++----------
 src/readDwarf.h   |  65 ++++++++----
 6 files changed, 443 insertions(+), 110 deletions(-)

diff --git a/src/PEImage.h b/src/PEImage.h
index 3ae00bd..a809523 100644
--- a/src/PEImage.h
+++ b/src/PEImage.h
@@ -178,11 +178,16 @@ class PEImage : public LastError
 
     template<typename SYM> const char* t_findSectionSymbolName(int s) const;
 
+	// File handle to PE image.
 	int fd;
+
+	// Pointer to in-memory buffer containing loaded PE image.
 	void* dump_base;
+
+	// Size of `dump_base` in bytes.
 	int dump_total_len;
 
-	// codeview
+	// codeview fields
 	IMAGE_DOS_HEADER *dos;
 	IMAGE_NT_HEADERS32* hdr32;
 	IMAGE_NT_HEADERS64* hdr64;
@@ -200,7 +205,8 @@ class PEImage : public LastError
 	std::unordered_map<std::string, SymbolInfo> symbolCache;
 
 public:
-	//dwarf
+	// dwarf fields
+	// List of DWARF section descriptors.
 #define EXPANDSEC(name) PESection name;
 	SECTION_LIST()
 #undef EXPANDSEC
diff --git a/src/cv2pdb.cpp b/src/cv2pdb.cpp
index 1ba7a94..704da4c 100644
--- a/src/cv2pdb.cpp
+++ b/src/cv2pdb.cpp
@@ -897,6 +897,7 @@ void CV2PDB::checkGlobalTypeAlloc(int size, int add)
 	}
 }
 
+// Get the CodeView type descriptor for the given type ID.
 // CV-only. Returns NULL for DWARF-based images.
 const codeview_type* CV2PDB::getTypeData(int type)
 {
@@ -913,6 +914,7 @@ const codeview_type* CV2PDB::getTypeData(int type)
 	return (codeview_type*)(typeData + offset[type - BASE_USER_TYPE]);
 }
 
+// CV-only. Never called for DWARF.
 const codeview_type* CV2PDB::getUserTypeData(int type)
 {
 	type -= BASE_USER_TYPE + globalTypeHeader->cTypes;
@@ -2116,6 +2118,7 @@ int CV2PDB::appendTypedef(int type, const char* name, bool saveTranslation)
 	return typedefType;
 }
 
+// CV-only.
 void CV2PDB::appendTypedefs()
 {
 	if(Dversion == 0)
diff --git a/src/cv2pdb.h b/src/cv2pdb.h
index 654470b..e5e8144 100644
--- a/src/cv2pdb.h
+++ b/src/cv2pdb.h
@@ -169,17 +169,23 @@ class CV2PDB : public LastError
 	bool addDWARFLines();
 	bool addDWARFPublics();
 	bool writeDWARFImage(const TCHAR* opath);
+	DWARF_InfoData* findEntryByPtr(byte* entryPtr) const;
+
+	// Helper to just print the DWARF tree we've built for debugging purposes.
+	void dumpDwarfTree() const;
 
 	bool addDWARFSectionContrib(mspdb::Mod* mod, unsigned long pclo, unsigned long pchi);
 	bool addDWARFProc(DWARF_InfoData& id, const std::vector<RangeEntry> &ranges, DIECursor cursor);
+	void formatFullyQualifiedProcName(const DWARF_InfoData* proc, char* buf, size_t cbBuf) const;
+
 	int  addDWARFStructure(DWARF_InfoData& id, DIECursor cursor);
-	int  addDWARFFields(DWARF_InfoData& structid, DIECursor cursor, int off, int flStart);
-	int  addDWARFArray(DWARF_InfoData& arrayid, DIECursor cursor);
+	int  addDWARFFields(DWARF_InfoData& structid, DIECursor& cursor, int off, int flStart);
+	int  addDWARFArray(DWARF_InfoData& arrayid, const DIECursor& cursor);
 	int  addDWARFBasicType(const char*name, int encoding, int byte_size);
 	int  addDWARFEnum(DWARF_InfoData& enumid, DIECursor cursor);
 	int  getTypeByDWARFPtr(byte* ptr);
 	int  getDWARFTypeSize(const DIECursor& parent, byte* ptr);
-	void getDWARFArrayBounds(DWARF_InfoData& arrayid, DIECursor cursor,
+	void getDWARFArrayBounds(DIECursor cursor,
 		int& basetype, int& lowerBound, int& upperBound);
 	void getDWARFSubrangeInfo(DWARF_InfoData& subrangeid, const DIECursor& parent,
 		int& basetype, int& lowerBound, int& upperBound);
@@ -278,7 +284,14 @@ class CV2PDB : public LastError
 
 	// DWARF
 	int codeSegOff;
-	std::unordered_map<byte*, int> mapOffsetToType;
+
+	// Lookup table for type IDs based on the DWARF_InfoData::entryPtr
+	std::unordered_map<byte*, int> mapEntryPtrToTypeID;
+	// Lookup table for entries based on the DWARF_InfoData::entryPtr
+	std::unordered_map<byte*, DWARF_InfoData*> mapEntryPtrToEntry;
+
+	// Head of list of DWARF DIE nodes.
+	DWARF_InfoData* dwarfHead = nullptr;
 
 	// Default lower bound for the current compilation unit. This depends on
 	// the language of the current unit.
diff --git a/src/dwarf2pdb.cpp b/src/dwarf2pdb.cpp
index 33c8af6..763be85 100644
--- a/src/dwarf2pdb.cpp
+++ b/src/dwarf2pdb.cpp
@@ -695,6 +695,74 @@ void CV2PDB::appendLexicalBlock(DWARF_InfoData& id, unsigned int proclo)
 	cbUdtSymbols += len;
 }
 
+// Helper to format a fully qualified proc name like 'some_ns::Foo::Foo' since
+// for a Foo constructor in a Foo class in a namespace called "some_ns".
+// PDBs require fully qualified names in their symbols.
+// TODO: better error handling for out of space.
+void CV2PDB::formatFullyQualifiedProcName(const DWARF_InfoData* proc, char* buf, size_t cbBuf) const {
+	if (proc->specification) {
+		// If the proc has a "specification", i.e. a declaration, use it instead
+		// of the definition, as it has a proper hierarchy connected to it
+		// which will give us a proper fully-qualified name like Foo::Foo
+		// instead of just Foo.
+		const DWARF_InfoData* entry = findEntryByPtr(proc->specification);
+		if (entry) {
+			proc = entry;
+		}
+	}
+	DWARF_InfoData* parent = proc->parent;
+	std::vector<const DWARF_InfoData*> segments;
+	segments.push_back(proc);
+
+	// Accumulate all the valid parent scopes so that we can reverse them for
+	// formatting.
+	while (parent) {
+		switch (parent->tag) {
+		// TODO: are there any other kinds of valid parents?
+		case DW_TAG_class_type:
+		case DW_TAG_structure_type:
+		case DW_TAG_namespace:			
+			segments.push_back(parent);
+			break;
+		default:
+			break;
+		}
+		parent = parent->parent;
+	}
+
+	int remain = cbBuf;
+	char* p = buf;
+
+	// Format the parents in reverse order with :: operator in between.
+	for (int i = segments.size() - 1; i >= 0; --i) {
+		const int nameLen = strlen(segments[i]->name);
+		if (remain < nameLen) {
+			fprintf(stderr, "unable to fit full proc name: %s\n", proc->name);
+			return;
+		}
+
+		memcpy(p, segments[i]->name, nameLen);
+
+		p += nameLen;
+		remain -= nameLen;
+
+		if (i > 0) {
+			// Append :: separator
+			if (remain < 2) {
+				fprintf(stderr, "unable to fit full proc name (:: separator): %s\n", proc->name);
+				return;
+			}
+			*p++ = ':';
+			*p++ = ':';
+			remain -= 2;
+		}
+	}
+	
+	if (remain > 0) {
+		*p = 0;  // NUL terminate.
+	}
+}
+
 bool CV2PDB::addDWARFProc(DWARF_InfoData& procid, const std::vector<RangeEntry> &ranges, DIECursor cursor)
 {
 	unsigned int pclo = ranges.front().pclo - codeSegOff;
@@ -723,8 +791,9 @@ bool CV2PDB::addDWARFProc(DWARF_InfoData& procid, const std::vector<RangeEntry>
 	cvs->proc_v2.flags    = 0;
 
 //    printf("GlobalPROC %s\n", procid.name);
-
-	len = cstrcpy_v (v3, (BYTE*) &cvs->proc_v2.p_name, procid.name);
+	char namebuf[kMaxNameLen] = {};
+	formatFullyQualifiedProcName(&procid, namebuf, sizeof namebuf);
+	len = cstrcpy_v (v3, (BYTE*) &cvs->proc_v2.p_name, namebuf);
 	len += (BYTE*) &cvs->proc_v2.p_name - (BYTE*) cvs;
 	for (; len & (align-1); len++)
 		udtSymbols[cbUdtSymbols + len] = 0xf4 - (len & 3);
@@ -762,8 +831,13 @@ bool CV2PDB::addDWARFProc(DWARF_InfoData& procid, const std::vector<RangeEntry>
 		DWARF_InfoData id;
 		int off = 8;
 
+		// Save off the cursor to the start of the proc.
 		DIECursor prev = cursor;
-		while (cursor.readNext(id, true))
+
+		// First, collect all the formal parameters of the proc.
+		// Don't worry about storing these in the tree as we're not going to need
+		// to generate fully-qualified names like we would for functions/classes.
+		while (cursor.readNext(&id, true /* stopAtNull */))
 		{
 			if (id.tag == DW_TAG_formal_parameter && id.name)
 			{
@@ -778,7 +852,11 @@ bool CV2PDB::addDWARFProc(DWARF_InfoData& procid, const std::vector<RangeEntry>
 		}
 		appendEndArg();
 
+		//  Now, collect all the lexical blocks and their stack variables.
 		std::vector<DIECursor> lexicalBlocks;
+
+		// Start from the proc base, and push all nested lexical blocks as you
+		// encounter them.
 		lexicalBlocks.push_back(prev);
 
 		while (!lexicalBlocks.empty())
@@ -786,7 +864,7 @@ bool CV2PDB::addDWARFProc(DWARF_InfoData& procid, const std::vector<RangeEntry>
 			cursor = lexicalBlocks.back();
 			lexicalBlocks.pop_back();
 
-			while (cursor.readNext(id))
+			while (cursor.readNext(&id))
 			{
 				if (id.tag == DW_TAG_lexical_block)
 				{
@@ -813,15 +891,23 @@ bool CV2PDB::addDWARFProc(DWARF_InfoData& procid, const std::vector<RangeEntry>
 					{
 						appendLexicalBlock(id, pclo + codeSegOff);
 						DIECursor next = cursor;
+
+						// Compute the sibling node of this lexical block.
 						next.gotoSibling();
 						assert(lexicalBlocks.empty() || next.ptr <= lexicalBlocks.back().ptr);
+
+						// Append the next lexical block to the list of blocks
+						// to scan later.
 						lexicalBlocks.push_back(next);
+
+						// But for now, scan down the current lexical block.
 						cursor = cursor.getSubtreeCursor();
 						continue;
 					}
 				}
 				else if (id.tag == DW_TAG_variable)
 				{
+					// Found a local variable.
 					if (id.name && (id.location.type == ExprLoc || id.location.type == Block))
 					{
 						Location loc = id.location.type == SecOffset ? findBestFBLoc(cursor, id.location.sec_offset)
@@ -864,14 +950,15 @@ bool CV2PDB::addDWARFProc(DWARF_InfoData& procid, const std::vector<RangeEntry>
 	return true;
 }
 
-int CV2PDB::addDWARFFields(DWARF_InfoData& structid, DIECursor cursor, int baseoff, int flStart)
+// Only looks at DW_TAG_member and DW_TAG_inheritance
+int CV2PDB::addDWARFFields(DWARF_InfoData& structid, DIECursor& cursor, int baseoff, int flStart)
 {
 	bool isunion = structid.tag == DW_TAG_union_type;
 	int nfields = 0;
 
-	// cursor points to the first member
+	// cursor points to the first member of the class/struct/union.
 	DWARF_InfoData id;
-	while (cursor.readNext(id, true))
+	while (cursor.readNext(&id, true))
 	{
 		if (cbDwarfTypes - flStart > 0x10000 - kMaxNameLen - 100)
 			break; // no more space in field list, TODO: add continuation record, see addDWARFEnum
@@ -905,12 +992,12 @@ int CV2PDB::addDWARFFields(DWARF_InfoData& structid, DIECursor cursor, int baseo
 					// if it doesn't have a name, and it's a struct or union, embed it directly
 					DIECursor membercursor(cursor, id.type);
 					DWARF_InfoData memberid;
-					if (membercursor.readNext(memberid))
+					if (membercursor.readNext(&memberid))
 					{
 						if (memberid.abstract_origin)
-							mergeAbstractOrigin(memberid, cursor);
+							mergeAbstractOrigin(memberid, *this);
 						if (memberid.specification)
-							mergeSpecification(memberid, cursor);
+							mergeSpecification(memberid, *this);
 
 						int cvtype = -1;
 						switch (memberid.tag)
@@ -1002,14 +1089,17 @@ int CV2PDB::addDWARFStructure(DWARF_InfoData& structid, DIECursor cursor)
 	return cvtype;
 }
 
-void CV2PDB::getDWARFArrayBounds(DWARF_InfoData& arrayid, DIECursor cursor, int& basetype, int& lowerBound, int& upperBound)
+// Compute the array bounds of the DIE at the given 'cursor'.
+void CV2PDB::getDWARFArrayBounds(DIECursor cursor, int& basetype, int& lowerBound, int& upperBound)
 {
 	DWARF_InfoData id;
 
 	// TODO: handle multi-dimensional arrays
 	if (cursor.cu)
 	{
-		while (cursor.readNext(id, true))
+		// Don't insert these elements into the DB. We're just using it for
+		// array bounds calculation.
+		while (cursor.readNext(&id, true /* stopAtNull */))
 		{
 			if (id.tag == DW_TAG_subrange_type)
 			{
@@ -1042,6 +1132,7 @@ void CV2PDB::getDWARFSubrangeInfo(DWARF_InfoData& subrangeid, const DIECursor& p
 	upperBound = subrangeid.upper_bound;
 }
 
+// Compute a type ID for a basic DWARF type.
 int CV2PDB::getDWARFBasicType(int encoding, int byte_size)
 {
 	int type = 0, mode = 0, size = 0;
@@ -1104,10 +1195,13 @@ int CV2PDB::getDWARFBasicType(int encoding, int byte_size)
 	return translateType(t);
 }
 
-int CV2PDB::addDWARFArray(DWARF_InfoData& arrayid, DIECursor cursor)
+// TODO: Array wanted to be scanned twice due to DW_TAG_subrange_type being looked at
+// in the caller. See if it can be handled in a single place for clarity, simplicity & efficiency.
+// Goal: don't rescan the same DIE twice.
+int CV2PDB::addDWARFArray(DWARF_InfoData& arrayid, const DIECursor& cursor)
 {
 	int basetype, upperBound, lowerBound;
-	getDWARFArrayBounds(arrayid, cursor, basetype, lowerBound, upperBound);
+	getDWARFArrayBounds(cursor, basetype, lowerBound, upperBound);
 
 	checkUserTypeAlloc(kMaxNameLen + 100);
 	codeview_type* cvt = (codeview_type*) (userTypes + cbUserTypes);
@@ -1243,7 +1337,7 @@ int CV2PDB::addDWARFEnum(DWARF_InfoData& enumid, DIECursor cursor)
 
 	/* Now fill this field list with the enumerators we find in DWARF. */
 	DWARF_InfoData id;
-	while (cursor.readNext(id, true))
+	while (cursor.readNext(&id, true /* stopAtNull */))
 	{
 		if (id.tag == DW_TAG_enumerator && id.has_const_value)
 		{
@@ -1323,18 +1417,22 @@ int CV2PDB::getTypeByDWARFPtr(byte* ptr)
 {
 	if (ptr == nullptr)
 		return 0x03; // void
-	std::unordered_map<byte*, int>::iterator it = mapOffsetToType.find(ptr);
-	if (it == mapOffsetToType.end())
+	std::unordered_map<byte*, int>::iterator it = mapEntryPtrToTypeID.find(ptr);
+	if (it == mapEntryPtrToTypeID.end())
 		return 0x03; // void
 	return it->second;
 }
 
+// Get the logical size of a DWARF type, starting from 'typePtr' and recursing
+// if necessary. E.g. for arrays.
 int CV2PDB::getDWARFTypeSize(const DIECursor& parent, byte* typePtr)
 {
 	DWARF_InfoData id;
 	DIECursor cursor(parent, typePtr);
 
-	if (!cursor.readNext(id))
+	// Don't allocate this into the tree since we're just interested
+	// in computing a type.
+	if (!cursor.readNext(&id))
 		return 0;
 
 	if(id.byte_size > 0)
@@ -1349,7 +1447,7 @@ int CV2PDB::getDWARFTypeSize(const DIECursor& parent, byte* typePtr)
 		case DW_TAG_array_type:
 		{
 			int basetype, upperBound, lowerBound;
-			getDWARFArrayBounds(id, cursor, basetype, lowerBound, upperBound);
+			getDWARFArrayBounds(cursor, basetype, lowerBound, upperBound);
 			return (upperBound - lowerBound + 1) * getDWARFTypeSize(cursor, id.type);
 		}
 		default:
@@ -1362,6 +1460,8 @@ int CV2PDB::getDWARFTypeSize(const DIECursor& parent, byte* typePtr)
 
 // Scan the .debug_info section and allocate type IDs for each unique type and
 // create a mapping to look them up by their address.
+// This is the first pass scan that builds up the DWARF tree. The second pass (createTypes)
+// emits the actual PDB symbols.
 bool CV2PDB::mapTypes()
 {
 	int typeID = nextUserType;
@@ -1370,6 +1470,9 @@ bool CV2PDB::mapTypes()
 	if (debug & DbgBasic)
 		fprintf(stderr, "%s:%d: mapTypes()\n", __FUNCTION__, __LINE__);
 
+	// Maintain the first node of each CU to ensure all of them get linked.
+	DWARF_InfoData* firstNode = nullptr;
+
 	// Scan each compilation unit in '.debug_info'.
 	while (off < img.debug_info.length)
 	{
@@ -1391,13 +1494,34 @@ bool CV2PDB::mapTypes()
 		}
 
 		DIECursor cursor(&cu, ptr);
-		DWARF_InfoData id;
-		while (cursor.readNext(id))
+
+		// Set up link to ensure this CU links to the prior one.
+		cursor.prevNode = firstNode;
+
+		DWARF_InfoData* node = nullptr;
+		bool setFirstNode = false;
+		// Start scanning this CU from the beginning and *build a tree of DIE nodes*.
+		while ((node = cursor.readNext(nullptr)) != nullptr)
 		{
+			DWARF_InfoData& id = *node;
+
+			// Initialize the head of the DWARF DIE list the first time.
+			if (!dwarfHead) {
+				dwarfHead = node;
+			}
+
+			if (!setFirstNode) {
+				firstNode = node;
+				setFirstNode = true;
+			}
+
 			if (debug & DbgDwarfTagRead)
 				fprintf(stderr, "%s:%d: 0x%08x, level = %d, id.code = %d, id.tag = %d\n", __FUNCTION__, __LINE__,
 						cursor.entryOff, cursor.level, id.code, id.tag);
 
+			// Insert it into the map.
+			mapEntryPtrToEntry[node->entryPtr] = node;
+
 			switch (id.tag)
 			{
 				case DW_TAG_base_type:
@@ -1427,20 +1551,21 @@ bool CV2PDB::mapTypes()
 				case DW_TAG_shared_type:
 				case DW_TAG_rvalue_reference_type:
 					// Reserve a typeID and store it in the map for quick lookup.
-					mapOffsetToType.insert(std::make_pair(id.entryPtr, typeID));
+					mapEntryPtrToTypeID.insert(std::make_pair(id.entryPtr, typeID));
 					typeID++;
 			}
 		}
 	}
 
 	if (debug & DbgBasic)
-		fprintf(stderr, "%s:%d: mapped %zd types\n", __FUNCTION__, __LINE__, mapOffsetToType.size());
+		fprintf(stderr, "%s:%d: mapped %zd types\n", __FUNCTION__, __LINE__, mapEntryPtrToTypeID.size());
 
 	nextDwarfType = typeID;
-	assert(nextDwarfType == nextUserType + mapOffsetToType.size());
+	assert(nextDwarfType == nextUserType + mapEntryPtrToTypeID.size());
 	return true;
 }
 
+// Walks the .debug_info section and builds a DIE tree.
 bool CV2PDB::createTypes()
 {
 	img.createSymbolCache();
@@ -1474,17 +1599,24 @@ bool CV2PDB::createTypes()
 		}
 
 		DIECursor cursor(&cu, ptr);
+
+		DWARF_InfoData* node = nullptr;
+		bool setFirstNode = false;
 		DWARF_InfoData id;
-		while (cursor.readNext(id))
+
+		// Scan the DIEs in this CU, reusing the elements.
+		while (cursor.readNext(&id))
 		{
 			if (debug & DbgDwarfTagRead)
 				fprintf(stderr, "%s:%d: 0x%08x, level = %d, id.code = %d, id.tag = %d\n", __FUNCTION__, __LINE__,
 						cursor.entryOff, cursor.level, id.code, id.tag);
 
+			// Merge in related entries. This relies on the DWARF tree having been built
+			// in the first pass (mapTypes).
 			if (id.abstract_origin)
-				mergeAbstractOrigin(id, cursor);
+				mergeAbstractOrigin(id, *this);
 			if (id.specification)
-				mergeSpecification(id, cursor);
+				mergeSpecification(id, *this);
 
 			int cvtype = -1;
 			switch (id.tag)
@@ -1515,14 +1647,14 @@ bool CV2PDB::createTypes()
 			case DW_TAG_class_type:
 			case DW_TAG_structure_type:
 			case DW_TAG_union_type:
-				cvtype = addDWARFStructure(id, cursor.getSubtreeCursor());
+				cvtype = addDWARFStructure(id, cursor);
 				break;
 			case DW_TAG_array_type:
-				cvtype = addDWARFArray(id, cursor.getSubtreeCursor());
+				cvtype = addDWARFArray(id, cursor);
 				break;
 
 			case DW_TAG_enumeration_type:
-				cvtype = addDWARFEnum(id, cursor.getSubtreeCursor());
+				cvtype = addDWARFEnum(id, cursor);
 				break;
 
 			case DW_TAG_subroutine_type:
@@ -1556,7 +1688,17 @@ bool CV2PDB::createTypes()
 
 							mod->AddPublic2(id.name, img.text.secNo + 1, entry_point - codeSegOff, 0);
 						}
-						addDWARFProc(id, ranges, cursor.getSubtreeCursor());
+
+						// Only add the definition, not declaration, because
+						// MSVC toolset only produces a single symbol for
+						// each function and will get confused if there are
+						// 2 PDB symbols for the same routine.
+						//
+						// TODO: Add more type info to the routine. Today we
+						// expose it as "T_NOTYPE" when we could do better.
+						if (!id.isDecl) {
+							addDWARFProc(id, ranges, cursor);
+						}
 					}
 				}
 				break;
@@ -1663,18 +1805,42 @@ bool CV2PDB::createTypes()
 
 			if (cvtype >= 0)
 			{
-				assert(cvtype == typeID); typeID++;
-				assert(mapOffsetToType[id.entryPtr] == cvtype);
+				assert(cvtype == typeID); 
+				typeID++;
+
+				assert(mapEntryPtrToTypeID[id.entryPtr] == cvtype);
 				assert(typeID == nextUserType);
 			}
 		}
 	}
 
 	assert(typeID == nextUserType);
-	assert(typeID == firstUserType + mapOffsetToType.size());
+	assert(typeID == firstUserType + mapEntryPtrToTypeID.size());
 	return true;
 }
 
+void printIndent(int level) {
+	for (int i = 0; i < level; ++i) {
+		printf("  ");
+	}
+}
+
+void dumpTreeHelper(DWARF_InfoData* node, int level) {
+	for (DWARF_InfoData* n = node; n; n = n->next) {
+		const unsigned dieOffset = n->img->debug_info.sectOff(n->entryPtr);
+
+		printIndent(level);
+		printf("offset: %#x, name: \"%s\", tag: %#x, abbrev: %d\n", dieOffset, n->name, n->tag, n->code);
+
+		// Visit the children.
+		dumpTreeHelper(n->children, level + 1);
+	}
+}
+
+void CV2PDB::dumpDwarfTree() const {
+	dumpTreeHelper(dwarfHead, 0);	
+}
+
 bool CV2PDB::createDWARFModules()
 {
 	if(!img.debug_info.isPresent())
@@ -1723,6 +1889,10 @@ bool CV2PDB::createDWARFModules()
 	if (!createTypes())
 		return false;
 
+	if (debug & DbgPrintDwarfTree) {
+		dumpDwarfTree();
+	}
+
 	/*
 	if(!iterateDWARFDebugInfo(kOpMapTypes))
 		return false;
@@ -1778,6 +1948,20 @@ bool CV2PDB::addDWARFPublics()
 	return true;
 }
 
+// Try to lookup a DWARF_InfoData in the constructed DWARF tree given its
+// "entryPtr". I.e. its memory-mapped location in the loaded PE image buffer.
+DWARF_InfoData* CV2PDB::findEntryByPtr(byte* entryPtr) const
+{
+	auto it = mapEntryPtrToEntry.find(entryPtr);
+	if (it == mapEntryPtrToEntry.end()) {
+		// Could not find decl for this definition.
+		return nullptr;
+	}
+	else {
+		return it->second;
+	}
+}
+
 bool CV2PDB::writeDWARFImage(const TCHAR* opath)
 {
 	int len = sizeof(*rsds) + strlen((char*)(rsds + 1)) + 1;
diff --git a/src/readDwarf.cpp b/src/readDwarf.cpp
index 1058c60..9218f41 100644
--- a/src/readDwarf.cpp
+++ b/src/readDwarf.cpp
@@ -1,14 +1,12 @@
 #include "readDwarf.h"
 #include <assert.h>
 #include <array>
+#include <memory> // unique_ptr
 
 #include "PEImage.h"
+#include "cv2pdb.h"
 #include "dwarf.h"
 #include "mspdb.h"
-extern "C" {
-	#include "mscvpdb.h"
-}
-
 
 // declare hasher for pair<T1,T2>
 namespace std
@@ -365,32 +363,52 @@ Location decodeLocation(const DWARF_Attribute& attr, const Location* frameBase,
 	return stack[0];
 }
 
-void mergeAbstractOrigin(DWARF_InfoData& id, const DIECursor& parent)
+// Find the source of an inlined function by following its 'abstract_origin' 
+// attribute references and recursively merge it into 'id'.
+// TODO: this description isn't quite right. See section 3.3.8.1 in DWARF 4 spec.
+void mergeAbstractOrigin(DWARF_InfoData& id, const CV2PDB& context)
 {
-	DIECursor specCursor(parent, id.abstract_origin);
-	DWARF_InfoData idspec;
-	specCursor.readNext(idspec);
-	// assert seems invalid, combination DW_TAG_member and DW_TAG_variable found in the wild
+	DWARF_InfoData* abstractOrigin = context.findEntryByPtr(id.abstract_origin);
+	if (!abstractOrigin) {
+		// Could not find abstract origin. Why not?
+		assert(false);
+		return;
+	}
+
+	// assert seems invalid, combination DW_TAG_member and DW_TAG_variable found
+	// in the wild.
+	//
 	// assert(id.tag == idspec.tag);
-	if (idspec.abstract_origin)
-		mergeAbstractOrigin(idspec, parent);
-	if (idspec.specification)
-		mergeSpecification(idspec, parent);
-	id.merge(idspec);
+
+	if (abstractOrigin->abstract_origin)
+		mergeAbstractOrigin(*abstractOrigin, context);
+	if (abstractOrigin->specification)
+		mergeSpecification(*abstractOrigin, context);
+	id.merge(*abstractOrigin);
 }
 
-void mergeSpecification(DWARF_InfoData& id, const DIECursor& parent)
+// Find the declaration entry for a definition by following its 'specification'
+// attribute references and merge it into 'id'.
+void mergeSpecification(DWARF_InfoData& id, const CV2PDB& context)
 {
-	DIECursor specCursor(parent, id.specification);
-	DWARF_InfoData idspec;
-	specCursor.readNext(idspec);
-	//assert seems invalid, combination DW_TAG_member and DW_TAG_variable found in the wild
-	//assert(id.tag == idspec.tag);
-	if (idspec.abstract_origin)
-		mergeAbstractOrigin(idspec, parent);
-	if (idspec.specification)
-		mergeSpecification(idspec, parent);
-	id.merge(idspec);
+	DWARF_InfoData* idspec = context.findEntryByPtr(id.specification);
+	if (!idspec) {
+		// Could not find decl for this definition. Why not?
+		assert(false);
+		return;
+	}
+
+	// assert seems invalid, combination DW_TAG_member and DW_TAG_variable found
+	// in the wild.
+	//
+	// assert(id.tag == idspec.tag);
+
+	if (idspec->abstract_origin)
+		mergeAbstractOrigin(*idspec, context);
+	if (idspec->specification) {
+		mergeSpecification(*idspec, context);
+	}
+	id.merge(*idspec);
 }
 
 LOCCursor::LOCCursor(const DIECursor& parent, unsigned long off)
@@ -584,7 +602,7 @@ DIECursor::DIECursor(DWARF_CompilationUnitInfo* cu_, byte* ptr_)
 	cu = cu_;
 	ptr = ptr_;
 	level = 0;
-	hasChild = false;
+	prevHasChild = false;
 	sibling = 0;
 }
 
@@ -594,40 +612,41 @@ DIECursor::DIECursor(const DIECursor& parent, byte* ptr_)
 	ptr = ptr_;
 }
 
+// Advance the cursor to the next sibling of the current node, using the fast
+// path when possible.
 void DIECursor::gotoSibling()
 {
 	if (sibling)
 	{
-		// use sibling pointer, if available
+		// Fast path: use sibling pointer, if available.
 		ptr = sibling;
-		hasChild = false;
+		prevHasChild = false;
 	}
-	else if (hasChild)
+	else if (prevHasChild)
 	{
-		int currLevel = level;
+		// Slow path. Skip over child nodes until we get back to the current
+		// level.
+		const int currLevel = level;
 		level = currLevel + 1;
-		hasChild = false;
+		prevHasChild = false;
 
+		// Don't store these in the tree since this is just used for skipping over
+		// last swaths of nodes.
 		DWARF_InfoData dummy;
-		// read untill we pop back to the level we were at
+
+		// read until we pop back to the level we were at
 		while (level > currLevel)
-			readNext(dummy, true);
+			readNext(&dummy, true /* stopAtNull */);
 	}
 }
 
-bool DIECursor::readSibling(DWARF_InfoData& id)
-{
-    gotoSibling();
-	return readNext(id, true);
-}
-
 DIECursor DIECursor::getSubtreeCursor()
 {
-	if (hasChild)
+	if (prevHasChild)
 	{
 		DIECursor subtree = *this;
 		subtree.level = 0;
-		subtree.hasChild = false;
+		subtree.prevHasChild = false;
 		return subtree;
 	}
 	else // Return invalid cursor
@@ -696,31 +715,80 @@ static byte* getPointerInSection(const PEImage &img, const SectionDescriptor &se
 	return peSec.byteAt(offset);
 }
 
-bool DIECursor::readNext(DWARF_InfoData& id, bool stopAtNull)
+// Scan the next DIE from the current CU.
+// TODO: Allocate a new element each time.
+DWARF_InfoData* DIECursor::readNext(DWARF_InfoData* entry, bool stopAtNull)
 {
-	id.clear();
+	std::unique_ptr<DWARF_InfoData> node;
+
+	// Controls whether we should bother establishing links between nodes.
+	// If 'entry' is provided, we are just going to be using it instead
+	// of allocating our own nodes. The callers typically reuse the same
+	// node over and over in this case, so don't bother tracking the links.
+	// Furthermore, since we clear the input node in this case, we can't rely
+	// on it from call to call.
+	// TODO: Rethink how to more cleanly express the alloc vs reuse modes of
+	// operation.
+	bool establishLinks = false;
+
+	// If an entry was passed in, use it. Else allocate one.
+	if (!entry) {
+		establishLinks = true;
+		node = std::make_unique<DWARF_InfoData>();
+		entry = node.get();
+	} else {
+		// If an entry was provided, make sure we clear it.
+		entry->clear();
+	}
 
-	if (hasChild)
+	entry->img = img;
+	
+	if (prevHasChild) {
+		// Prior element had a child, thus this element is its first child.
 		++level;
 
+		if (establishLinks) {
+			// Establish the first child.
+			prevParent->children = entry;
+		}
+	}
+
+	// Set up a convenience alias.
+	DWARF_InfoData& id = *entry;
+
+	// Find the first valid DIE.
 	for (;;)
 	{
 		if (level == -1)
-			return false; // we were already at the end of the subtree
+			return nullptr; // we were already at the end of the subtree
 
 		if (ptr >= cu->end_ptr)
-			return false; // root of the tree does not have a null terminator, but we know the length
+			return nullptr; // root of the tree does not have a null terminator, but we know the length
 
 		id.entryPtr = ptr;
 		entryOff = img->debug_info.sectOff(ptr);
 		id.code = LEB128(ptr);
+
+		// If the previously scanned node claimed to have a child, this must be a valid DIE.
+		assert(!prevHasChild || id.code);
+
+		// Check if we need to terminate the sibling chain.
 		if (id.code == 0)
 		{
-			--level; // pop up one level
+			// Done with this level.
+			if (establishLinks) {
+				// Continue linking siblings from the parent node.
+				prevNode = prevParent;
+
+				// Unwind the parent one level up.
+				prevParent = prevParent->parent;
+			}
+
+			--level;
 			if (stopAtNull)
 			{
-				hasChild = false;
-				return false;
+				prevHasChild = false;
+				return nullptr;
 			}
 			continue; // read the next DIE
 		}
@@ -733,17 +801,42 @@ bool DIECursor::readNext(DWARF_InfoData& id, bool stopAtNull)
 		fprintf(stderr, "ERROR: %s:%d: unknown abbrev: num=%d off=%x\n", __FUNCTION__, __LINE__,
 				id.code, entryOff);
 		assert(abbrev);
-		return false;
+		return nullptr;
 	}
 
 	id.abbrev = abbrev;
 	id.tag = LEB128(abbrev);
 	id.hasChild = *abbrev++;
 
+	if (establishLinks) {
+		// If there was a previous node, link it to this one, thus continuing the chain.
+		if (prevNode) {
+			prevNode->next = entry;
+		}
+
+		// Establish parent of current node. If 'prevParent' is NULL, that is fine.
+		// It just means this node is a top-level node.
+		entry->parent = prevParent;
+
+		if (id.hasChild) {
+			// This node has children! Establish it as the new parent for future nodes.		
+			prevParent = entry;
+
+			// Clear the last DIE because the next scanned node will form the *start*
+			// of a new linked list comprising the children of the current node.
+			prevNode = nullptr;
+		}
+		else {
+			// Ensure the next node appends itself to this one.
+			prevNode = entry;
+		}
+	}
+
 	if (debug & DbgDwarfAttrRead)
 		fprintf(stderr, "%s:%d: offs=%x level=%d tag=%d abbrev=%d\n", __FUNCTION__, __LINE__,
 				entryOff, level, id.tag, id.code);
 
+	// Read all the attribute data for this DIE.
 	int attr, form;
 	for (;;)
 	{
@@ -809,7 +902,7 @@ bool DIECursor::readNext(DWARF_InfoData& id, bool stopAtNull)
 			case DW_FORM_sec_offset:     a.type = SecOffset; a.sec_offset = RDref(ptr); break;
 			case DW_FORM_loclistx:       a.type = SecOffset; a.sec_offset = resolveIndirectSecPtr(LEB128(ptr), sec_desc_debug_loclists, cu->loclist_base); break;
 			case DW_FORM_rnglistx:       a.type = SecOffset; a.sec_offset = resolveIndirectSecPtr(LEB128(ptr), sec_desc_debug_rnglists, cu->rnglist_base); break;
-			default: assert(false && "Unsupported DWARF attribute form"); return false;
+			default: assert(false && "Unsupported DWARF attribute form"); return nullptr;
 		}
 
 		switch (attr)
@@ -852,6 +945,7 @@ bool DIECursor::readNext(DWARF_InfoData& id, bool stopAtNull)
 			case DW_AT_type:      assert(a.type == Ref); id.type = a.ref; break;
 			case DW_AT_inline:    assert(a.type == Const); id.inlined = a.cons; break;
 			case DW_AT_external:  assert(a.type == Flag); id.external = a.flag; break;
+			case DW_AT_declaration: assert(a.type == Flag); id.isDecl = a.flag; break;
 			case DW_AT_upper_bound:
 				assert(a.type == Const || a.type == Ref || a.type == ExprLoc || a.type == Block);
 				if (a.type == Const) // TODO: other types not supported yet
@@ -912,10 +1006,12 @@ bool DIECursor::readNext(DWARF_InfoData& id, bool stopAtNull)
 		}
 	}
 
-	hasChild = id.hasChild != 0;
+	prevHasChild = id.hasChild != 0;
 	sibling = id.sibling;
 
-	return true;
+	// Transfer ownership of 'node' to caller, if we allocated one.
+	node.release();
+	return entry;
 }
 
 byte* DIECursor::getDWARFAbbrev(unsigned off, unsigned findcode)
diff --git a/src/readDwarf.h b/src/readDwarf.h
index 56e89a3..06779c8 100644
--- a/src/readDwarf.h
+++ b/src/readDwarf.h
@@ -11,6 +11,7 @@
 typedef unsigned char byte;
 class PEImage;
 class DIECursor;
+class CV2PDB;
 struct SectionDescriptor;
 
 enum DebugLevel : unsigned {
@@ -24,7 +25,8 @@ enum DebugLevel : unsigned {
 	DbgDwarfAttrRead = 0x400,
 	DbgDwarfLocLists = 0x800,
 	DbgDwarfRangeLists = 0x1000,
-	DbgDwarfLines = 0x2000
+	DbgDwarfLines = 0x2000,
+	DbgPrintDwarfTree = 0x4000,
 };
 
 DEFINE_ENUM_FLAG_OPERATORS(DebugLevel);
@@ -183,7 +185,10 @@ struct DWARF_FileName
 // In-memory representation of a DIE (Debugging Info Entry).
 struct DWARF_InfoData
 {
-	// Pointer into the mapped image section where this DIE is located.
+	// The PEImage for this entry.
+	PEImage* img = nullptr;
+
+	// Pointer into the memory-mapped image section where this DIE is located.
 	byte* entryPtr;
 
 	// Code to find the abbrev entry for this DIE, or 0 if it a sentinel marking
@@ -197,6 +202,18 @@ struct DWARF_InfoData
 	// Does this DIE have children?
 	int hasChild;
 
+	// Parent of this DIE, or NULL if top-level element.
+	DWARF_InfoData* parent = nullptr;
+
+	// Pointer to sibling in the tree. Not to be confused with 'sibling' below,
+	// which is a raw pointer to the DIE in the mapped/loaded image section.
+	// NULL if no more elements.
+	DWARF_InfoData* next = nullptr;
+
+	// Pointer to first child. This forms a linked list with the 'next' pointer.
+	// NULL if no children.
+	DWARF_InfoData* children = nullptr;
+
 	const char* name;
 	const char* linkage_name;
 	const char* dir;
@@ -213,10 +230,14 @@ struct DWARF_InfoData
 	// Pointer to the DW_AT_type DIE describing the type of this DIE.
 	byte* type;
 	byte* containing_type;
+
+	// Pointer to the DIE representing the declaration for this element if it
+	// is a definition. E.g. function decl for its definition/body.
 	byte* specification;
 	byte* abstract_origin;
 	unsigned long inlined;
-	bool external;
+	bool external = false; // is this subroutine visible outside its compilation unit?
+	bool isDecl = false; // is this a declaration?
 	DWARF_Attribute location;
 	DWARF_Attribute member_location;
 	DWARF_Attribute frame_base;
@@ -236,6 +257,7 @@ struct DWARF_InfoData
 		abbrev = 0;
 		tag = 0;
 		hasChild = 0;
+		parent = nullptr;
 
 		name = 0;
 		linkage_name = 0;
@@ -252,7 +274,8 @@ struct DWARF_InfoData
 		specification = 0;
 		abstract_origin = 0;
 		inlined = 0;
-		external = 0;
+		external = false;
+		isDecl = false;
 		member_location.type = Invalid;
 		location.type = Invalid;
 		frame_base.type = Invalid;
@@ -508,19 +531,31 @@ typedef std::unordered_map<std::pair<unsigned, unsigned>, byte*> abbrevMap_t;
 // as either an absolute value, a register, or a register-relative address.
 Location decodeLocation(const DWARF_Attribute& attr, const Location* frameBase = 0, int at = 0);
 
-void mergeAbstractOrigin(DWARF_InfoData& id, const DIECursor& parent);
-void mergeSpecification(DWARF_InfoData& id, const DIECursor& parent);
+void mergeAbstractOrigin(DWARF_InfoData& id, const CV2PDB& context);
+void mergeSpecification(DWARF_InfoData& id, const CV2PDB& context);
 
 // Debug Information Entry Cursor
 class DIECursor
 {
+	// TODO: make these private.
 public:
-	DWARF_CompilationUnitInfo* cu;
-	byte* ptr;
+	DWARF_CompilationUnitInfo* cu = nullptr; // the CU we are reading from.
+	byte* ptr = nullptr; // the current mapped location we are reading from.
 	unsigned int entryOff;
-	int level;
-	bool hasChild; // indicates whether the last read DIE has children
-	byte* sibling;
+	int level; // the current level of the tree in the scan.
+	bool prevHasChild = false; // indicates whether the last read DIE has children
+
+	// last DIE scanned. Used to link subsequent nodes in a list.
+	DWARF_InfoData* prevNode = nullptr;
+	
+	// The last parent node to which all subsequent nodes should be assigned.
+	// Initially, NULL, but as we encounter a node with children, we establish
+	// it as the new "parent" for future nodes, and reset it once we reach
+	// a top level node.
+	DWARF_InfoData* prevParent = nullptr;
+
+	// The mapped address of the sibling of the last scanned node, if any.
+	byte* sibling = nullptr;
 
 	static PEImage *img;
 	static abbrevMap_t abbrevMap;
@@ -541,17 +576,13 @@ class DIECursor
 	// Goto next sibling DIE.  If the last read DIE had any children, they will be skipped over.
 	void gotoSibling();
 
-	// Reads next sibling DIE.  If the last read DIE had any children, they will be skipped over.
-	// Returns 'false' upon reaching the last sibling on the current level.
-	bool readSibling(DWARF_InfoData& id);
-
 	// Returns cursor that will enumerate children of the last read DIE.
 	DIECursor getSubtreeCursor();
 
-	// Reads the next DIE in physical order, returns 'true' if succeeds.
+	// Reads the next DIE in physical order, returns non-NULL if succeeds.
 	// If stopAtNull is true, readNext() will stop upon reaching a null DIE (end of the current tree level).
 	// Otherwise, it will skip null DIEs and stop only at the end of the subtree for which this DIECursor was created.
-	bool readNext(DWARF_InfoData& id, bool stopAtNull = false);
+	DWARF_InfoData* readNext(DWARF_InfoData* entry, bool stopAtNull = false);
 
 	// Read an address from p according to the ambient pointer size.
 	uint64_t RDAddr(byte* &p) const

From a32d9207c46fbe02ef60473598325e3b50aec006 Mon Sep 17 00:00:00 2001
From: Alex Budovski <alexbud@meta.com>
Date: Tue, 21 Mar 2023 08:09:41 -0500
Subject: [PATCH 05/12] Add natvis for DWARF_InfoData

This commit adds natvis support for the core DIE in-memory
representation type, which is especially helpful now that it effectively
models two "lists": children and siblings. This natvis visualizes both
as lists in the locals view.
---
 src/NatvisFile.natvis      | 26 ++++++++++++++++++++++++++
 src/cv2pdb.vcxproj         |  9 ++++++---
 src/cv2pdb.vcxproj.filters |  6 ++++++
 3 files changed, 38 insertions(+), 3 deletions(-)
 create mode 100644 src/NatvisFile.natvis

diff --git a/src/NatvisFile.natvis b/src/NatvisFile.natvis
new file mode 100644
index 0000000..21baadc
--- /dev/null
+++ b/src/NatvisFile.natvis
@@ -0,0 +1,26 @@
+<?xml version="1.0" encoding="utf-8"?> 
+<AutoVisualizer xmlns="http://schemas.microsoft.com/vstudio/debugger/natvis/2010">
+	<Type Name="DWARF_InfoData">
+		<DisplayString>tag={tag} code={code} {name,s}</DisplayString>
+		<Expand>
+			<Synthetic Name="children" Condition="children">
+				<Expand>
+					<LinkedListItems>
+						<HeadPointer>children</HeadPointer>
+						<NextPointer>next</NextPointer>
+						<ValueNode>this</ValueNode>
+					</LinkedListItems>
+				</Expand>
+			</Synthetic>
+			<Synthetic Name="siblings" Condition="next">
+				<Expand>
+					<LinkedListItems>
+						<HeadPointer>next</HeadPointer>
+						<NextPointer>next</NextPointer>
+						<ValueNode>this</ValueNode>
+					</LinkedListItems>
+				</Expand>
+			</Synthetic>
+		</Expand>
+	</Type>
+</AutoVisualizer>
\ No newline at end of file
diff --git a/src/cv2pdb.vcxproj b/src/cv2pdb.vcxproj
index 6b96851..bc07d66 100644
--- a/src/cv2pdb.vcxproj
+++ b/src/cv2pdb.vcxproj
@@ -30,14 +30,14 @@
     <ProjectGuid>{5E2BD27D-446A-4C99-9829-135F7C000D90}</ProjectGuid>
     <RootNamespace>cv2pdb</RootNamespace>
     <Keyword>Win32Proj</Keyword>
-	<!-- guess the installed Windows SDK -->
+    <!-- guess the installed Windows SDK -->
     <WindowsSdkInstallFolder_10 Condition="'$(WindowsSdkInstallFolder_10)' == ''">$(Registry:HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Microsoft SDKs\Windows\v10.0@InstallationFolder)</WindowsSdkInstallFolder_10>
     <WindowsSdkInstallFolder_10 Condition="'$(WindowsSdkInstallFolder_10)' == ''">$(Registry:HKEY_LOCAL_MACHINE\SOFTWARE\Wow6432Node\Microsoft\Microsoft SDKs\Windows\v10.0@InstallationFolder)</WindowsSdkInstallFolder_10>
     <WindowsTargetPlatformVersion_10 Condition="'$(WindowsTargetPlatformVersion_10)' == ''">$(Registry:HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Microsoft SDKs\Windows\v10.0@ProductVersion)</WindowsTargetPlatformVersion_10>
     <WindowsTargetPlatformVersion_10 Condition="'$(WindowsTargetPlatformVersion_10)' == ''">$(Registry:HKEY_LOCAL_MACHINE\SOFTWARE\Wow6432Node\Microsoft\Microsoft SDKs\Windows\v10.0@ProductVersion)</WindowsTargetPlatformVersion_10>
     <!-- Sometimes the version in the registry has to .0 suffix, and sometimes it doesn't. Check and add it -->
     <WindowsTargetPlatformVersion_10 Condition="'$(WindowsTargetPlatformVersion_10)' != '' and !$(WindowsTargetPlatformVersion_10.EndsWith('.0'))">$(WindowsTargetPlatformVersion_10).0</WindowsTargetPlatformVersion_10>
-	<WindowsTargetPlatformVersion Condition="'$(WindowsTargetPlatformVersion_10)' != ''">$(WindowsTargetPlatformVersion_10)</WindowsTargetPlatformVersion>
+    <WindowsTargetPlatformVersion Condition="'$(WindowsTargetPlatformVersion_10)' != ''">$(WindowsTargetPlatformVersion_10)</WindowsTargetPlatformVersion>
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
@@ -299,6 +299,9 @@
   <ItemGroup>
     <None Include="packages.config" />
   </ItemGroup>
+  <ItemGroup>
+    <Natvis Include="NatvisFile.natvis" />
+  </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
     <Import Project="$(VCTargetsPath)\BuildCustomizations\masm.targets" />
@@ -310,4 +313,4 @@
     </PropertyGroup>
     <Error Condition="!Exists('packages\Microsoft.VisualStudio.Setup.Configuration.Native.1.16.30\build\native\Microsoft.VisualStudio.Setup.Configuration.Native.targets')" Text="$([System.String]::Format('$(ErrorText)', 'packages\Microsoft.VisualStudio.Setup.Configuration.Native.1.16.30\build\native\Microsoft.VisualStudio.Setup.Configuration.Native.targets'))" />
   </Target>
-</Project>
+</Project>
\ No newline at end of file
diff --git a/src/cv2pdb.vcxproj.filters b/src/cv2pdb.vcxproj.filters
index 87f8a84..adf6a1b 100644
--- a/src/cv2pdb.vcxproj.filters
+++ b/src/cv2pdb.vcxproj.filters
@@ -78,4 +78,10 @@
       <Filter>Source Files</Filter>
     </MASM>
   </ItemGroup>
+  <ItemGroup>
+    <None Include="packages.config" />
+  </ItemGroup>
+  <ItemGroup>
+    <Natvis Include="NatvisFile.natvis" />
+  </ItemGroup>
 </Project>
\ No newline at end of file

From 6fd46e795333430d28657fbeba72b4a0232a28ce Mon Sep 17 00:00:00 2001
From: Alex Budovski <alexbud@meta.com>
Date: Wed, 22 Mar 2023 14:06:27 -0500
Subject: [PATCH 06/12] Add support for anon types in FQN generation

This fixes a bug in the fully qualified name generation we encounter a
segment that has no name, such as when a proc is really a C++ lambda
expression, which internally generates an anonymous class to house the
function. In this case, we generate a unique name based on the DIE's
offset to serve as the segment's name.
---
 src/dwarf2pdb.cpp | 20 ++++++++++++++++++--
 src/readDwarf.cpp |  2 +-
 src/readDwarf.h   |  2 ++
 3 files changed, 21 insertions(+), 3 deletions(-)

diff --git a/src/dwarf2pdb.cpp b/src/dwarf2pdb.cpp
index 763be85..afd062e 100644
--- a/src/dwarf2pdb.cpp
+++ b/src/dwarf2pdb.cpp
@@ -735,13 +735,29 @@ void CV2PDB::formatFullyQualifiedProcName(const DWARF_InfoData* proc, char* buf,
 
 	// Format the parents in reverse order with :: operator in between.
 	for (int i = segments.size() - 1; i >= 0; --i) {
-		const int nameLen = strlen(segments[i]->name);
+		const char* name = segments[i]->name;
+		char nameBuf[64] = {};
+		int nameLen = 0;
+		if (!segments[i]->name) {
+			// This segment has no name. This could be because it is part of
+			// an anonymous class, which often happens for lambda expressions.
+			// Generate a unique anonymous name for it.
+			nameLen = sprintf_s(nameBuf, "[anon_%x]", segments[i]->entryOff);
+			if (nameLen < 0) {
+				// Formatting failed. Try a default name.
+				assert(false);  // crash in debug builds.
+				name = "[anon]";
+			}
+			name = nameBuf;
+		} else {
+			nameLen = strlen(name);
+		}
 		if (remain < nameLen) {
 			fprintf(stderr, "unable to fit full proc name: %s\n", proc->name);
 			return;
 		}
 
-		memcpy(p, segments[i]->name, nameLen);
+		memcpy(p, name, nameLen);
 
 		p += nameLen;
 		remain -= nameLen;
diff --git a/src/readDwarf.cpp b/src/readDwarf.cpp
index 9218f41..8e4ca4d 100644
--- a/src/readDwarf.cpp
+++ b/src/readDwarf.cpp
@@ -766,7 +766,7 @@ DWARF_InfoData* DIECursor::readNext(DWARF_InfoData* entry, bool stopAtNull)
 			return nullptr; // root of the tree does not have a null terminator, but we know the length
 
 		id.entryPtr = ptr;
-		entryOff = img->debug_info.sectOff(ptr);
+		entryOff = id.entryOff = img->debug_info.sectOff(ptr);
 		id.code = LEB128(ptr);
 
 		// If the previously scanned node claimed to have a child, this must be a valid DIE.
diff --git a/src/readDwarf.h b/src/readDwarf.h
index 06779c8..04afa38 100644
--- a/src/readDwarf.h
+++ b/src/readDwarf.h
@@ -191,6 +191,8 @@ struct DWARF_InfoData
 	// Pointer into the memory-mapped image section where this DIE is located.
 	byte* entryPtr;
 
+	unsigned int entryOff = 0;  // the entry offset in the section it is in.
+
 	// Code to find the abbrev entry for this DIE, or 0 if it a sentinel marking
 	// the end of a sibling chain.
 	int code;

From d48181528456501562f4b7551878eefe19381cbe Mon Sep 17 00:00:00 2001
From: Alex Budovski <alexbud@meta.com>
Date: Wed, 22 Mar 2023 19:19:13 -0500
Subject: [PATCH 07/12] Fix bug in DWARF v4 location decoding

This fixes a long standing bug in the DWARF v4 location decoding logic.
Previously, the logic would not recognize "base address selection"
entries at all and assume an entry was either an end-of-list or a
location list entry thus resulting in incorrect decoding in release
builds, and an assert in debug builds.

See DWARF v4 spec section 2.6.2 for details.
---
 src/readDwarf.cpp | 27 ++++++++++++++++++++++++++-
 src/readDwarf.h   | 20 +++++++++++++++++---
 2 files changed, 43 insertions(+), 4 deletions(-)

diff --git a/src/readDwarf.cpp b/src/readDwarf.cpp
index 8e4ca4d..6562980 100644
--- a/src/readDwarf.cpp
+++ b/src/readDwarf.cpp
@@ -414,9 +414,12 @@ void mergeSpecification(DWARF_InfoData& id, const CV2PDB& context)
 LOCCursor::LOCCursor(const DIECursor& parent, unsigned long off)
 	: parent(parent)
 {
+	// Default the base address to the compilation unit (DWARF v4 2.6.2)
 	base = parent.cu->base_address;
 	isLocLists = (parent.cu->version >= 5);
 
+	// DWARF v4 uses .debug_loc, DWARF v5 uses .debug_loclists with a different
+	// schema.
 	const PESection& sec = isLocLists ? parent.img->debug_loclists : parent.img->debug_loc;
 	ptr = sec.byteAt(off);
 	end = sec.endByte();
@@ -426,6 +429,8 @@ bool LOCCursor::readNext(LOCEntry& entry)
 {
 	if (isLocLists)
 	{
+		// DWARF v5 location list parsing.
+
 		if (parent.debug & DbgDwarfLocLists)
 			fprintf(stderr, "%s:%d: loclists off=%x DIEoff=%x:\n", __FUNCTION__, __LINE__,
 					parent.img->debug_loclists.sectOff(ptr), parent.entryOff);
@@ -488,6 +493,8 @@ bool LOCCursor::readNext(LOCEntry& entry)
 	}
 	else
 	{
+		// The logic here is goverened by DWARF4 section 2.6.2.
+
 		if (ptr >= end)
 			return false;
 
@@ -495,10 +502,28 @@ bool LOCCursor::readNext(LOCEntry& entry)
 			fprintf(stderr, "%s:%d: loclist off=%x DIEoff=%x:\n", __FUNCTION__, __LINE__,
 					parent.img->debug_loc.sectOff(ptr), parent.entryOff);
 
+		// Extract the begin and end offset
+		// TODO: Why is this truncating to 32 bit?
 		entry.beg_offset = (unsigned long) parent.RDAddr(ptr);
 		entry.end_offset = (unsigned long) parent.RDAddr(ptr);
-		if (!entry.beg_offset && !entry.end_offset)
+
+		// Check for a base-address-selection entry.
+		if (entry.beg_offset == -1U) {
+			// This is a base address selection entry and thus has no location
+			// description.
+			// Update the base address with this entry's value.
+			base = entry.end_offset;
+
+			// Continue the scan, but don't try to decode further since there
+			// are no location description records following this type of entry.
+			return true;
+		}
+
+		// Check for end-of-list entry. (Both offsets 0)
+		if (!entry.beg_offset && !entry.end_offset) {
+			// Terminate the scan.
 			return false;
+		}
 
 		DWARF_Attribute attr;
 		attr.type = Block;
diff --git a/src/readDwarf.h b/src/readDwarf.h
index 04afa38..4b0ac98 100644
--- a/src/readDwarf.h
+++ b/src/readDwarf.h
@@ -470,9 +470,15 @@ struct Location
 class LOCEntry
 {
 public:
-	unsigned long beg_offset;
-	unsigned long end_offset;
+	// TODO: investigate making these 64bit (or vary). Also consider renaming
+	// to Value0 and Value1 since their meanings varies depending on entry type.
+	unsigned long beg_offset; // or -1U for base address selection entries
+	unsigned long end_offset; // or the base address in base address selection entries
+
+	// DWARF v5 only. See DW_LLE_default_location.
 	bool isDefault;
+
+	// Location description.
 	Location loc;
 
 	void addBase(uint32_t base)
@@ -482,16 +488,24 @@ class LOCEntry
 	}
 };
 
-// Location list cursor
+// Location list cursor (see DWARF v4 and v5 Section 2.6).
 class LOCCursor
 {
 public:
 	LOCCursor(const DIECursor& parent, unsigned long off);
 
 	const DIECursor& parent;
+
+	// The base address for subsequent loc list entries read in a given list.
+	// Default to the CU base in the absense of any base address selection entries.
+	//
+	// TODO: So far we only assign to this but never actually use it.
 	uint32_t base;
+
 	byte* end;
 	byte* ptr;
+
+	// Is this image using the new debug_loclists section in DWARF v5?
 	bool isLocLists;
 
 	bool readNext(LOCEntry& entry);

From 186bfe29adc6875fd26107a18ab1bb257cbd9dec Mon Sep 17 00:00:00 2001
From: Alex Budovski <alexbud@meta.com>
Date: Wed, 22 Mar 2023 21:31:32 -0500
Subject: [PATCH 08/12] Support namespaces/nesting for structs

This commit extends the FQN generation for procs to
structs/classes/unions using a similar pattern of looking up the parent
chain during construction of the name.

Like procs, we need to we need to perform a similar "deduplication" and
only emit a single entry for each struct. As with procs, DWARF has
multiple "flavors" of a single struct as multiple DIE records. All but
one are marked as a "declaration" and lack member information.  We want
the one with member information in the PDB as that will allow the
debugger to inspect the contents of objects of that type.

What makes structs tricker than procs is some formal parameters, local
(stack) variables, etc. reference the "declaration" flavors of the DIE,
which lack the member information. This makes this complicated for two
reasons:

1) We don't want to emit the declaration versions at all due to the
   consolidation reasons explained earlier.
2) If we don't emit the declaration versions, we need to "patch up" the
   type references of those params/locals to point to the definition
   entry we emit.

But (2) in itself is tricky, because, alas, DWARF has no direct pointer
from a "declaration" entry of a struct to is corresponding definition,
like it does with proc (procs have a "DW_AT_specification" attribute
which points from decl -> def. See 2.13.2 for details).

Alas, what I resorted to is a (hopefully precise) heuristic. I do the
matching manually by:

1) Introducing an index that lets me look up a DWARF entry by its
   (local, unqualified) name.
2) Using that index, look at all the matches (ignoring the decls, of
   course) and walking up their parent chains and comparing the tags and
   names of their ancestors.
3) If all of those things check out, we can be reasonably confident that
   this candidate entry is the logically equivalent "definition" entry
   for the "declaration" struct entry we're trying to resolve the typeID
   for.

So far it's worked well on the binaries I've tested it on. With this, we
can now dump structures (including nested and namespaced) ones in
WinDbg.
---
 src/cv2pdb.h      |  12 +++-
 src/dwarf2pdb.cpp | 172 ++++++++++++++++++++++++++++++++++++++--------
 2 files changed, 152 insertions(+), 32 deletions(-)

diff --git a/src/cv2pdb.h b/src/cv2pdb.h
index e5e8144..76d6340 100644
--- a/src/cv2pdb.h
+++ b/src/cv2pdb.h
@@ -176,14 +176,15 @@ class CV2PDB : public LastError
 
 	bool addDWARFSectionContrib(mspdb::Mod* mod, unsigned long pclo, unsigned long pchi);
 	bool addDWARFProc(DWARF_InfoData& id, const std::vector<RangeEntry> &ranges, DIECursor cursor);
-	void formatFullyQualifiedProcName(const DWARF_InfoData* proc, char* buf, size_t cbBuf) const;
+	void formatFullyQualifiedName(const DWARF_InfoData* node, char* buf, size_t cbBuf) const;
 
 	int  addDWARFStructure(DWARF_InfoData& id, DIECursor cursor);
 	int  addDWARFFields(DWARF_InfoData& structid, DIECursor& cursor, int off, int flStart);
 	int  addDWARFArray(DWARF_InfoData& arrayid, const DIECursor& cursor);
 	int  addDWARFBasicType(const char*name, int encoding, int byte_size);
 	int  addDWARFEnum(DWARF_InfoData& enumid, DIECursor cursor);
-	int  getTypeByDWARFPtr(byte* ptr);
+	int  getTypeByDWARFPtr(byte* typePtr);
+	int  findTypeIdByPtr(byte* typePtr) const;
 	int  getDWARFTypeSize(const DIECursor& parent, byte* ptr);
 	void getDWARFArrayBounds(DIECursor cursor,
 		int& basetype, int& lowerBound, int& upperBound);
@@ -282,14 +283,19 @@ class CV2PDB : public LastError
 
 	double Dversion;
 
-	// DWARF
+	// DWARF fields.
+
 	int codeSegOff;
 
 	// Lookup table for type IDs based on the DWARF_InfoData::entryPtr
 	std::unordered_map<byte*, int> mapEntryPtrToTypeID;
+	
 	// Lookup table for entries based on the DWARF_InfoData::entryPtr
 	std::unordered_map<byte*, DWARF_InfoData*> mapEntryPtrToEntry;
 
+	// A multimap keyed on entry name. Since this is not unique, we use a multimap.
+	std::multimap<std::string, DWARF_InfoData*> mapEntryNameToEntries;
+
 	// Head of list of DWARF DIE nodes.
 	DWARF_InfoData* dwarfHead = nullptr;
 
diff --git a/src/dwarf2pdb.cpp b/src/dwarf2pdb.cpp
index afd062e..c88768e 100644
--- a/src/dwarf2pdb.cpp
+++ b/src/dwarf2pdb.cpp
@@ -699,20 +699,27 @@ void CV2PDB::appendLexicalBlock(DWARF_InfoData& id, unsigned int proclo)
 // for a Foo constructor in a Foo class in a namespace called "some_ns".
 // PDBs require fully qualified names in their symbols.
 // TODO: better error handling for out of space.
-void CV2PDB::formatFullyQualifiedProcName(const DWARF_InfoData* proc, char* buf, size_t cbBuf) const {
-	if (proc->specification) {
+void CV2PDB::formatFullyQualifiedName(const DWARF_InfoData* node, char* buf, size_t cbBuf) const {
+	if (node->specification) {
 		// If the proc has a "specification", i.e. a declaration, use it instead
 		// of the definition, as it has a proper hierarchy connected to it
 		// which will give us a proper fully-qualified name like Foo::Foo
 		// instead of just Foo.
-		const DWARF_InfoData* entry = findEntryByPtr(proc->specification);
+		const DWARF_InfoData* entry = findEntryByPtr(node->specification);
 		if (entry) {
-			proc = entry;
+			node = entry;
 		}
+	} else {
+		// Find the node's entry in the DWARF tree. We can't use 'node' as is because
+		// it is a local copy without linkage into the tree, as it comes from
+		// the 2nd pass scan after the tree is already built.
+		const DWARF_InfoData* entry = findEntryByPtr(node->entryPtr);
+		assert(entry);  // how can it not exist? Bug in tree construction.
+		node = entry;
 	}
-	DWARF_InfoData* parent = proc->parent;
+	DWARF_InfoData* parent = node->parent;
 	std::vector<const DWARF_InfoData*> segments;
-	segments.push_back(proc);
+	segments.push_back(node);
 
 	// Accumulate all the valid parent scopes so that we can reverse them for
 	// formatting.
@@ -753,7 +760,7 @@ void CV2PDB::formatFullyQualifiedProcName(const DWARF_InfoData* proc, char* buf,
 			nameLen = strlen(name);
 		}
 		if (remain < nameLen) {
-			fprintf(stderr, "unable to fit full proc name: %s\n", proc->name);
+			fprintf(stderr, "unable to fit full symbol name: %s\n", node->name);
 			return;
 		}
 
@@ -765,7 +772,7 @@ void CV2PDB::formatFullyQualifiedProcName(const DWARF_InfoData* proc, char* buf,
 		if (i > 0) {
 			// Append :: separator
 			if (remain < 2) {
-				fprintf(stderr, "unable to fit full proc name (:: separator): %s\n", proc->name);
+				fprintf(stderr, "unable to fit full symbol name (:: separator): %s\n", node->name);
 				return;
 			}
 			*p++ = ':';
@@ -808,7 +815,7 @@ bool CV2PDB::addDWARFProc(DWARF_InfoData& procid, const std::vector<RangeEntry>
 
 //    printf("GlobalPROC %s\n", procid.name);
 	char namebuf[kMaxNameLen] = {};
-	formatFullyQualifiedProcName(&procid, namebuf, sizeof namebuf);
+	formatFullyQualifiedName(&procid, namebuf, sizeof namebuf);
 	len = cstrcpy_v (v3, (BYTE*) &cvs->proc_v2.p_name, namebuf);
 	len += (BYTE*) &cvs->proc_v2.p_name - (BYTE*) cvs;
 	for (; len & (align-1); len++)
@@ -974,7 +981,7 @@ int CV2PDB::addDWARFFields(DWARF_InfoData& structid, DIECursor& cursor, int base
 
 	// cursor points to the first member of the class/struct/union.
 	DWARF_InfoData id;
-	while (cursor.readNext(&id, true))
+	while (cursor.readNext(&id, true /* stopAtNull */))
 	{
 		if (cbDwarfTypes - flStart > 0x10000 - kMaxNameLen - 100)
 			break; // no more space in field list, TODO: add continuation record, see addDWARFEnum
@@ -1094,14 +1101,15 @@ int CV2PDB::addDWARFStructure(DWARF_InfoData& structid, DIECursor cursor)
 	checkUserTypeAlloc(kMaxNameLen + 100);
 	codeview_type* cvt = (codeview_type*) (userTypes + cbUserTypes);
 
-	const char* name = (structid.name ? structid.name : "__noname");
+	char namebuf[kMaxNameLen] = {};
+	formatFullyQualifiedName(&structid, namebuf, sizeof namebuf);
 	int attr = fieldlistType ? 0 : kPropIncomplete;
-	int len = addAggregate(cvt, false, nfields, fieldlistType, attr, 0, 0, structid.byte_size, name, nullptr);
+	int len = addAggregate(cvt, false, nfields, fieldlistType, attr, 0, 0, structid.byte_size, namebuf, nullptr);
 	cbUserTypes += len;
 
 	//ensureUDT()?
 	int cvtype = nextUserType++;
-	addUdtSymbol(cvtype, name);
+	addUdtSymbol(cvtype, namebuf);
 	return cvtype;
 }
 
@@ -1429,14 +1437,93 @@ int CV2PDB::addDWARFEnum(DWARF_InfoData& enumid, DIECursor cursor)
 	return enumType;
 }
 
-int CV2PDB::getTypeByDWARFPtr(byte* ptr)
+// Try to find or compute the "best" CV TypeID for a given DIE found by following
+// a DW_AT_type attribute or its closest counterpart.
+int CV2PDB::getTypeByDWARFPtr(byte* typePtr)
 {
-	if (ptr == nullptr)
-		return 0x03; // void
-	std::unordered_map<byte*, int>::iterator it = mapEntryPtrToTypeID.find(ptr);
-	if (it == mapEntryPtrToTypeID.end())
-		return 0x03; // void
-	return it->second;
+	if (typePtr == nullptr)
+		return T_NOTYPE;
+
+	// First just attempt to find the type entry directly.
+	int ret = findTypeIdByPtr(typePtr);
+	if (!ret) {
+		// TypeID was not found in the map. This may be due to struct
+		// decl / definition consolidation. I.e. we don't emit the struct decl
+		// because they show up as "empty" structs (devoid of members).
+		// Try to match this against the logically equivalent "definition"
+		// type.
+		DWARF_InfoData* entry = findEntryByPtr(typePtr);
+		assert(entry); // how can the entry not exist in the map?
+
+		// Skip anonymous structures or similar.
+		if (!entry || !entry->name) {
+			return T_NOTYPE;
+		}
+
+		// See if there exists another "logically equivalent" entry in the tree.
+		// 
+		// First, find all entries with the same (local) name as this type.
+		auto range = mapEntryNameToEntries.equal_range(entry->name);
+		for (auto it = range.first; it != range.second; ++it) {
+			DWARF_InfoData* candidate = it->second;
+
+			// Skip self.
+			if (candidate == entry) {
+				continue;
+			}
+
+			// Skip declarations (as when they are of structs, they don't help.
+			// We want definitions only as they define the fields in DWARF.)
+			if (candidate->isDecl) {
+				continue;
+			}
+			
+			// Filter nodes based on the matching tag.
+			if (candidate->tag != entry->tag) {
+				continue;
+			}
+
+			// Found a matching tag for this element. Walk up the tree and check
+			// if all parent tags and names match also. If they do, we found an
+			// "equivalent" node to 'typePtr' one that wasn't added to the
+			// typeID registry (because it was likely a decl that we filtered out)
+			DWARF_InfoData* candidateParent = candidate->parent;
+			DWARF_InfoData* entryParent = entry->parent;
+
+			bool equivalentHierarchy = true;
+			while (candidateParent && entryParent) {
+				if (candidateParent->tag != entryParent->tag) {
+					// Tag mismatch.
+					equivalentHierarchy = false;
+					break;
+				}
+
+				// Skip CUs as of course they have different names. We only
+				// care about namespaces, other containing structs, classes, etc.
+				// Entries have the same tag. Checking one is sufficient.
+				if (entryParent->tag != DW_TAG_compile_unit) {
+
+					if (strcmp(candidateParent->name, entryParent->name)) {
+						// Name mismatch.
+						equivalentHierarchy = false;
+						break;
+					}
+				}
+
+				candidateParent = candidateParent->parent;
+				entryParent = entryParent->parent;
+			}
+
+			if (equivalentHierarchy) {
+				// Try another lookup with this new candidate.
+				ret = findTypeIdByPtr(candidate->entryPtr);
+				assert(ret);  // how can it now be in the map?
+			} else {
+				fprintf(stderr, "warn: could not find equivalent entry for typePtr %p\n", typePtr);
+			}
+		}
+	}
+	return ret;
 }
 
 // Get the logical size of a DWARF type, starting from 'typePtr' and recursing
@@ -1535,24 +1622,32 @@ bool CV2PDB::mapTypes()
 				fprintf(stderr, "%s:%d: 0x%08x, level = %d, id.code = %d, id.tag = %d\n", __FUNCTION__, __LINE__,
 						cursor.entryOff, cursor.level, id.code, id.tag);
 
-			// Insert it into the map.
+			// Insert the node into the entryPtr-based index.
 			mapEntryPtrToEntry[node->entryPtr] = node;
 
+			// Insert named nodes into the name-based index.
+			if (node->name) {
+				mapEntryNameToEntries.insert({ node->name, node });
+			}
+
 			switch (id.tag)
 			{
+				case DW_TAG_structure_type:
+				case DW_TAG_class_type:
+				case DW_TAG_union_type:
+					// skip generating a typeID for declaration flavor of
+					// class/struct/union since we don't emit the PDB symbol
+					// for them. See related code in CV2PDB::createTypes().
+					if (id.isDecl) continue; 
 				case DW_TAG_base_type:
 				case DW_TAG_typedef:
 				case DW_TAG_pointer_type:
 				case DW_TAG_subroutine_type:
 				case DW_TAG_array_type:
 				case DW_TAG_const_type:
-				case DW_TAG_structure_type:
 				case DW_TAG_reference_type:
-
-				case DW_TAG_class_type:
 				case DW_TAG_enumeration_type:
 				case DW_TAG_string_type:
-				case DW_TAG_union_type:
 				case DW_TAG_ptr_to_member_type:
 				case DW_TAG_set_type:
 				case DW_TAG_subrange_type:
@@ -1663,7 +1758,16 @@ bool CV2PDB::createTypes()
 			case DW_TAG_class_type:
 			case DW_TAG_structure_type:
 			case DW_TAG_union_type:
-				cvtype = addDWARFStructure(id, cursor);
+				if (!id.isDecl) {
+					// Only export the non-declaration version of structs/classes.
+					// DWARF emits multiple copies of the same class, some of
+					// which are marked as declarations and lack members, resulting
+					// in an empty struct UDT in the PDB. Then when we encounter
+					// the non-declaration copy we emit it again, but now we
+					// end up with multiple copies of the same UDT in the PDB
+					// and the debugger gets confused.
+					cvtype = addDWARFStructure(id, cursor);
+				}
 				break;
 			case DW_TAG_array_type:
 				cvtype = addDWARFArray(id, cursor);
@@ -1972,10 +2076,20 @@ DWARF_InfoData* CV2PDB::findEntryByPtr(byte* entryPtr) const
 	if (it == mapEntryPtrToEntry.end()) {
 		// Could not find decl for this definition.
 		return nullptr;
+	} 
+	return it->second;
+}
+
+// Try to lookup a TypeID in the set of registered types by a
+// "typePtr". I.e. its memory-mapped location in the loaded PE image buffer.
+int CV2PDB::findTypeIdByPtr(byte* typePtr) const
+{
+	auto it = mapEntryPtrToTypeID.find(typePtr);
+	if (it == mapEntryPtrToTypeID.end()) {
+		// Could not find type for this definition.
+		return T_NOTYPE;
 	}
-	else {
-		return it->second;
-	}
+	return it->second;
 }
 
 bool CV2PDB::writeDWARFImage(const TCHAR* opath)

From b36951d9b0c9a9b69784cde6342926ee4a7d0381 Mon Sep 17 00:00:00 2001
From: Alex Budovski <alexbud@meta.com>
Date: Thu, 23 Mar 2023 18:36:52 -0500
Subject: [PATCH 09/12] Support fully qualified names for enums

This commit enables full support for enums in the following ways:

1) Enums now are also produced with fully-qualified names (such as those
   nested in classes, or namespaces). Thus two enums `Foo` in two
   classes can be uniquely examined in a debugger.
2) Fix the way in which we emit the underlying enum type in the PDB.
   Previously, we were emitting a "UDT reference" for almost all enum
   types, even "unsigned long", which it turns out Windows debuggers do
   not like. WinDbg crashes when encountering such a symbol in some
   cases, and Visual Studio Debugger fails to resolve the value.

   It turns out that the root cause here is that enums in a PDB must
   always use a "base type," never a UDT. To do this, I unwind the DIE
   type reference by walking its "type" chain until the last element,
   and produce a CV base type from the encoding and byte size of said
   record.

   E.g. for cases like enum Foo : size_t { ... }, we walk the size_t UDT
   reference and find, for example, an `unsigned long` UDT reference,
   and follow that, and ultimately find a base type with encoding
   `DW_ATE_unsigned` and byte size = 4, and generate a T_ULONG CV type
   for the enum.

Also, a minor improvement:

We now correctly indicate whether a structure is a class or struct,
instead of always emitting "struct" in the CV record.
---
 src/dwarf2pdb.cpp | 51 +++++++++++++++++++++++++++++++++++++----------
 1 file changed, 41 insertions(+), 10 deletions(-)

diff --git a/src/dwarf2pdb.cpp b/src/dwarf2pdb.cpp
index c88768e..b65d73d 100644
--- a/src/dwarf2pdb.cpp
+++ b/src/dwarf2pdb.cpp
@@ -1104,7 +1104,7 @@ int CV2PDB::addDWARFStructure(DWARF_InfoData& structid, DIECursor cursor)
 	char namebuf[kMaxNameLen] = {};
 	formatFullyQualifiedName(&structid, namebuf, sizeof namebuf);
 	int attr = fieldlistType ? 0 : kPropIncomplete;
-	int len = addAggregate(cvt, false, nfields, fieldlistType, attr, 0, 0, structid.byte_size, namebuf, nullptr);
+	int len = addAggregate(cvt, structid.tag == DW_TAG_class_type, nfields, fieldlistType, attr, 0, 0, structid.byte_size, namebuf, nullptr);
 	cbUserTypes += len;
 
 	//ensureUDT()?
@@ -1323,10 +1323,10 @@ int CV2PDB::addDWARFEnum(DWARF_InfoData& enumid, DIECursor cursor)
 	/* Enumerated types are described in CodeView with two components:
 
 	   1. A LF_ENUM leaf, representing the type itself. We put this one in the
-	      userTypes buffer.
+		  userTypes buffer.
 
 	   2. One or several LF_FIELDLIST records, to contain the list of
-	      enumerators (name and value) associated to the enum type
+		  enumerators (name and value) associated to the enum type
 		  (LF_ENUMERATE leaves). As type records cannot be larger 2**16 bytes,
 		  we need to create multiple records when there are too many
 		  enumerators. The first record contains the first LF_ENUMERATE leaves,
@@ -1372,7 +1372,7 @@ int CV2PDB::addDWARFEnum(DWARF_InfoData& enumid, DIECursor cursor)
 			int len = addFieldEnumerate(dfieldtype, id.name, id.const_value);
 
 			/* If adding this enumerate leaves no room for a LF_INDEX leaf,
-		       create a new LF_FIELDLIST record now. */
+			   create a new LF_FIELDLIST record now. */
 			if (fieldlistLength + len + sizeof(dfieldtype->index_v2) > 0xffff)
 			{
 				/* Append the LF_INDEX leaf. */
@@ -1425,15 +1425,46 @@ int CV2PDB::addDWARFEnum(DWARF_InfoData& enumid, DIECursor cursor)
 
 	/* Now the LF_FIELDLIST is ready, create the LF_ENUM type record itself. */
 	checkUserTypeAlloc();
-	int basetype = (enumid.type != 0)
-				   ? getTypeByDWARFPtr(enumid.type)
-				   : getDWARFBasicType(enumid.encoding, enumid.byte_size);
+	const DWARF_InfoData* entry = findEntryByPtr(enumid.entryPtr);
+	int prop = 0;
+	if (entry && entry->parent) {
+		int tag = entry->parent->tag;
+		if (tag == DW_TAG_class_type ||
+			tag == DW_TAG_structure_type ||
+			tag == DW_TAG_union_type)
+		{
+			prop |= kPropIsNested;
+		}
+	}
+
+	// NOTE: WinDbg/VS Dbg expects enum types to be base types, not indirect
+	// refs/UDTs.
+	// 
+	// Compute the best base/underlying type to use.
+	int encoding = DW_ATE_signed;  // default to int
+	const DWARF_InfoData* typeEntry = findEntryByPtr(enumid.type);
+	const DWARF_InfoData* t = typeEntry;
+
+	// Follow all the parent types to get to the base UDT.
+	while (t) {
+		t = findEntryByPtr(t->type);
+		if (t) typeEntry = t;
+	}
+
+	if (typeEntry) {
+		encoding = typeEntry->encoding;
+		assert(typeEntry->byte_size == enumid.byte_size);
+	}
+
+	const int basetype = getDWARFBasicType(encoding, enumid.byte_size);
+
 	dtype = (codeview_type*)(userTypes + cbUserTypes);
-	const char* name = (enumid.name ? enumid.name : "__noname");
-	cbUserTypes += addEnum(dtype, count, firstFieldlistType, 0, basetype, name);
+	char namebuf[kMaxNameLen] = {};
+	formatFullyQualifiedName(&enumid, namebuf, sizeof namebuf);
+	cbUserTypes += addEnum(dtype, count, firstFieldlistType, prop, basetype, namebuf);
 	int enumType = nextUserType++;
 
-	addUdtSymbol(enumType, name);
+	addUdtSymbol(enumType, namebuf);
 	return enumType;
 }
 

From 4d9596ce98097f817700948a90daa3e8b150ed9b Mon Sep 17 00:00:00 2001
From: Rainer Schuetze <r.sagitario@gmx.de>
Date: Sun, 2 Apr 2023 11:39:56 +0200
Subject: [PATCH 10/12] fix vcxproj files so they don't require a specific VS
 version

---
 src/dumplines.vcxproj               | 4 ++--
 src/dviewhelper/dviewhelper.vcxproj | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/dumplines.vcxproj b/src/dumplines.vcxproj
index 3460678..4bf62fd 100644
--- a/src/dumplines.vcxproj
+++ b/src/dumplines.vcxproj
@@ -26,13 +26,13 @@
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
-    <PlatformToolset>v120_xp</PlatformToolset>
+    <PlatformToolset>$(DefaultPlatformToolset)</PlatformToolset>
     <CharacterSet>Unicode</CharacterSet>
     <WholeProgramOptimization>true</WholeProgramOptimization>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
-    <PlatformToolset>v120_xp</PlatformToolset>
+    <PlatformToolset>$(DefaultPlatformToolset)</PlatformToolset>
     <CharacterSet>Unicode</CharacterSet>
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
diff --git a/src/dviewhelper/dviewhelper.vcxproj b/src/dviewhelper/dviewhelper.vcxproj
index db8fdf4..e373574 100644
--- a/src/dviewhelper/dviewhelper.vcxproj
+++ b/src/dviewhelper/dviewhelper.vcxproj
@@ -20,17 +20,17 @@
     <WindowsTargetPlatformVersion_10 Condition="'$(WindowsTargetPlatformVersion_10)' == ''">$(Registry:HKEY_LOCAL_MACHINE\SOFTWARE\Wow6432Node\Microsoft\Microsoft SDKs\Windows\v10.0@ProductVersion)</WindowsTargetPlatformVersion_10>
     <!-- Sometimes the version in the registry has to .0 suffix, and sometimes it doesn't. Check and add it -->
     <WindowsTargetPlatformVersion_10 Condition="'$(WindowsTargetPlatformVersion_10)' != '' and !$(WindowsTargetPlatformVersion_10.EndsWith('.0'))">$(WindowsTargetPlatformVersion_10).0</WindowsTargetPlatformVersion_10>
-    <WindowsTargetPlatformVersion Condition="'$(WindowsTargetPlatformVersion_10)' != ''">10.0</WindowsTargetPlatformVersion>
+    <WindowsTargetPlatformVersion Condition="'$(WindowsTargetPlatformVersion_10)' != ''">$(WindowsTargetPlatformVersion_10)</WindowsTargetPlatformVersion>
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
     <ConfigurationType>DynamicLibrary</ConfigurationType>
-    <PlatformToolset>v143</PlatformToolset>
+    <PlatformToolset>$(DefaultPlatformToolset)</PlatformToolset>
     <CharacterSet>MultiByte</CharacterSet>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
     <ConfigurationType>DynamicLibrary</ConfigurationType>
-    <PlatformToolset>v143</PlatformToolset>
+    <PlatformToolset>$(DefaultPlatformToolset)</PlatformToolset>
     <CharacterSet>MultiByte</CharacterSet>
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />

From da6546c45efdbf53f796192d1b65c5557d1f1882 Mon Sep 17 00:00:00 2001
From: Rainer Schuetze <r.sagitario@gmx.de>
Date: Sun, 2 Apr 2023 11:41:41 +0200
Subject: [PATCH 11/12] move mergeAbstractOrigin() and mergeSpecification()
 from readDwarf.cpp to dwarf2pdb.cpp, so they don't create a dependency for
 dumplines.exe

---
 src/dwarf2pdb.cpp | 50 +++++++++++++++++++++++++++++++++++++++++++++++
 src/readDwarf.cpp | 48 ---------------------------------------------
 src/readDwarf.h   |  3 ---
 3 files changed, 50 insertions(+), 51 deletions(-)

diff --git a/src/dwarf2pdb.cpp b/src/dwarf2pdb.cpp
index b65d73d..c46ab32 100644
--- a/src/dwarf2pdb.cpp
+++ b/src/dwarf2pdb.cpp
@@ -786,6 +786,56 @@ void CV2PDB::formatFullyQualifiedName(const DWARF_InfoData* node, char* buf, siz
 	}
 }
 
+void mergeSpecification(DWARF_InfoData& id, const CV2PDB& context);
+
+// Find the source of an inlined function by following its 'abstract_origin' 
+// attribute references and recursively merge it into 'id'.
+// TODO: this description isn't quite right. See section 3.3.8.1 in DWARF 4 spec.
+void mergeAbstractOrigin(DWARF_InfoData& id, const CV2PDB& context)
+{
+	DWARF_InfoData* abstractOrigin = context.findEntryByPtr(id.abstract_origin);
+	if (!abstractOrigin) {
+		// Could not find abstract origin. Why not?
+		assert(false);
+		return;
+	}
+
+	// assert seems invalid, combination DW_TAG_member and DW_TAG_variable found
+	// in the wild.
+	//
+	// assert(id.tag == idspec.tag);
+
+	if (abstractOrigin->abstract_origin)
+		mergeAbstractOrigin(*abstractOrigin, context);
+	if (abstractOrigin->specification)
+		mergeSpecification(*abstractOrigin, context);
+	id.merge(*abstractOrigin);
+}
+
+// Find the declaration entry for a definition by following its 'specification'
+// attribute references and merge it into 'id'.
+void mergeSpecification(DWARF_InfoData& id, const CV2PDB& context)
+{
+	DWARF_InfoData* idspec = context.findEntryByPtr(id.specification);
+	if (!idspec) {
+		// Could not find decl for this definition. Why not?
+		assert(false);
+		return;
+	}
+
+	// assert seems invalid, combination DW_TAG_member and DW_TAG_variable found
+	// in the wild.
+	//
+	// assert(id.tag == idspec.tag);
+
+	if (idspec->abstract_origin)
+		mergeAbstractOrigin(*idspec, context);
+	if (idspec->specification) {
+		mergeSpecification(*idspec, context);
+	}
+	id.merge(*idspec);
+}
+
 bool CV2PDB::addDWARFProc(DWARF_InfoData& procid, const std::vector<RangeEntry> &ranges, DIECursor cursor)
 {
 	unsigned int pclo = ranges.front().pclo - codeSegOff;
diff --git a/src/readDwarf.cpp b/src/readDwarf.cpp
index 6562980..e6f187c 100644
--- a/src/readDwarf.cpp
+++ b/src/readDwarf.cpp
@@ -363,54 +363,6 @@ Location decodeLocation(const DWARF_Attribute& attr, const Location* frameBase,
 	return stack[0];
 }
 
-// Find the source of an inlined function by following its 'abstract_origin' 
-// attribute references and recursively merge it into 'id'.
-// TODO: this description isn't quite right. See section 3.3.8.1 in DWARF 4 spec.
-void mergeAbstractOrigin(DWARF_InfoData& id, const CV2PDB& context)
-{
-	DWARF_InfoData* abstractOrigin = context.findEntryByPtr(id.abstract_origin);
-	if (!abstractOrigin) {
-		// Could not find abstract origin. Why not?
-		assert(false);
-		return;
-	}
-
-	// assert seems invalid, combination DW_TAG_member and DW_TAG_variable found
-	// in the wild.
-	//
-	// assert(id.tag == idspec.tag);
-
-	if (abstractOrigin->abstract_origin)
-		mergeAbstractOrigin(*abstractOrigin, context);
-	if (abstractOrigin->specification)
-		mergeSpecification(*abstractOrigin, context);
-	id.merge(*abstractOrigin);
-}
-
-// Find the declaration entry for a definition by following its 'specification'
-// attribute references and merge it into 'id'.
-void mergeSpecification(DWARF_InfoData& id, const CV2PDB& context)
-{
-	DWARF_InfoData* idspec = context.findEntryByPtr(id.specification);
-	if (!idspec) {
-		// Could not find decl for this definition. Why not?
-		assert(false);
-		return;
-	}
-
-	// assert seems invalid, combination DW_TAG_member and DW_TAG_variable found
-	// in the wild.
-	//
-	// assert(id.tag == idspec.tag);
-
-	if (idspec->abstract_origin)
-		mergeAbstractOrigin(*idspec, context);
-	if (idspec->specification) {
-		mergeSpecification(*idspec, context);
-	}
-	id.merge(*idspec);
-}
-
 LOCCursor::LOCCursor(const DIECursor& parent, unsigned long off)
 	: parent(parent)
 {
diff --git a/src/readDwarf.h b/src/readDwarf.h
index 4b0ac98..b4f2f85 100644
--- a/src/readDwarf.h
+++ b/src/readDwarf.h
@@ -547,9 +547,6 @@ typedef std::unordered_map<std::pair<unsigned, unsigned>, byte*> abbrevMap_t;
 // as either an absolute value, a register, or a register-relative address.
 Location decodeLocation(const DWARF_Attribute& attr, const Location* frameBase = 0, int at = 0);
 
-void mergeAbstractOrigin(DWARF_InfoData& id, const CV2PDB& context);
-void mergeSpecification(DWARF_InfoData& id, const CV2PDB& context);
-
 // Debug Information Entry Cursor
 class DIECursor
 {

From a21b4159ff2e72f8ad356c602db47bf2644498b1 Mon Sep 17 00:00:00 2001
From: Rainer Schuetze <r.sagitario@gmx.de>
Date: Sun, 2 Apr 2023 11:46:49 +0200
Subject: [PATCH 12/12] remove unnecessary BASE_DWARF_TYPE, BASE_USER_TYPE is
 enough to represent the CodeView base index

---
 src/cv2pdb.h      | 3 +--
 src/dwarf2pdb.cpp | 2 +-
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/cv2pdb.h b/src/cv2pdb.h
index 76d6340..c61a74b 100644
--- a/src/cv2pdb.h
+++ b/src/cv2pdb.h
@@ -245,10 +245,9 @@ class CV2PDB : public LastError
 	int allocDwarfTypes;
 
 	static constexpr int BASE_USER_TYPE = 0x1000;
-	static constexpr int BASE_DWARF_TYPE = 0x1000;
 
 	int nextUserType = BASE_USER_TYPE;
-	int nextDwarfType = BASE_DWARF_TYPE;
+	int nextDwarfType = BASE_USER_TYPE;
 	int objectType;
 
 	int emptyFieldListType;
diff --git a/src/dwarf2pdb.cpp b/src/dwarf2pdb.cpp
index c46ab32..b97dcb9 100644
--- a/src/dwarf2pdb.cpp
+++ b/src/dwarf2pdb.cpp
@@ -2143,7 +2143,7 @@ bool CV2PDB::addDWARFPublics()
 	mspdb::Mod* mod = globalMod();
 
 	int type = 0;
-	int rc = mod->AddPublic2("public_all", img.text.secNo + 1, 0, BASE_DWARF_TYPE);
+	int rc = mod->AddPublic2("public_all", img.text.secNo + 1, 0, BASE_USER_TYPE);
 	if (rc <= 0)
 		return setError("cannot add public");
 	return true;