From 701e472a1310a2c32f311ce3c970fc46b82add4f Mon Sep 17 00:00:00 2001 From: Alexander Wagner Date: Thu, 2 Mar 2023 22:00:23 +0100 Subject: [PATCH 01/21] pt: Add logging macros --- pintool/addrtrace.cpp | 6 +++++- pintool/utils.H | 28 ++++++++++++++++++++++++++++ 2 files changed, 33 insertions(+), 1 deletion(-) create mode 100644 pintool/utils.H diff --git a/pintool/addrtrace.cpp b/pintool/addrtrace.cpp index 44394931..8ee749f1 100644 --- a/pintool/addrtrace.cpp +++ b/pintool/addrtrace.cpp @@ -27,6 +27,7 @@ #include "pin-macros.H" #include "pin.H" +#include "utils.H" #include #include #include @@ -45,6 +46,8 @@ using namespace std; +int DEBUG_LEVEL; + /***********************************************************************/ VOID RecordFunctionEntry(THREADID threadid, ADDRINT bbl, ADDRINT bp, @@ -112,7 +115,6 @@ KNOB KnobDebug(KNOB_MODE_WRITEONCE, "pintool", "debug", "0", #define REALLOC "realloc" #define CALLOC "calloc" #define FREE "free" -#define DEBUG(x) if (KnobDebug.Value() >= x) int alloc_instrumented = 0; @@ -1960,6 +1962,8 @@ int main(int argc, char *argv[]) { PIN_InitSymbols(); + DEBUG_LEVEL = KnobDebug.Value(); + if (KnobLeaks.Value() && KnobCallstack.Value()) { leaks = new CallStack(); use_callstack = true; diff --git a/pintool/utils.H b/pintool/utils.H new file mode 100644 index 00000000..014ba0bd --- /dev/null +++ b/pintool/utils.H @@ -0,0 +1,28 @@ +#if !defined(UTILS_H_) +#define UTILS_H_ + +#include "pin.H" +#include "proc-map.H" + +extern int DEBUG_LEVEL; + +#define DEBUG(x) if (DEBUG_LEVEL >= x) +#define MESSAGE(x, y) std::cout << x << y << std::endl +#define PT_DEBUG(x, msg) DEBUG(x) MESSAGE("[pt-dbg" << x << "] ", msg) +#define PT_INFO(msg) MESSAGE("[pt-info] ", msg) +#define PT_WARN(msg) MESSAGE("[pt-warn] ", msg) +#define PT_ASSERT(x, msg) \ + { \ + if (!(x)) { \ + printProcMap(); \ + MESSAGE("[pt-error] ", msg); \ + ASSERT(false, "pintool failed."); \ + } \ + } +#define PT_ERROR(msg) PT_ASSERT(false, msg) + +// TODO PT_ASSERT +// printheap(); +// print_allocmap(); + +#endif From 36007a143c541a3ff3dbabb59e9194a94af394e3 Mon Sep 17 00:00:00 2001 From: Alexander Wagner Date: Fri, 3 Mar 2023 11:17:43 +0100 Subject: [PATCH 02/21] pt/proc-map: Add ProcMap commands --- pintool/makefile.rules | 3 +++ pintool/proc-map.H | 9 +++++++ pintool/proc-map.cpp | 55 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 67 insertions(+) create mode 100644 pintool/proc-map.H create mode 100644 pintool/proc-map.cpp diff --git a/pintool/makefile.rules b/pintool/makefile.rules index 2292fb5f..b01db1d7 100644 --- a/pintool/makefile.rules +++ b/pintool/makefile.rules @@ -83,6 +83,9 @@ $(OBJDIR)call-stack$(OBJ_SUFFIX): call-stack.cpp call-stack.H $(OBJDIR)sha1$(OBJ_SUFFIX): sha1.cpp sha1.H $(CXX) $(TOOL_CXXFLAGS) $(COMP_OBJ)$@ $< +$(OBJDIR)proc-map$(OBJ_SUFFIX): proc-map.cpp proc-map.H + $(CXX) $(TOOL_CXXFLAGS) $(COMP_OBJ)$@ $< + # Build the tool as a dll (shared object). $(OBJDIR)addrtrace$(PINTOOL_SUFFIX): $(OBJDIR)call-stack$(OBJ_SUFFIX) $(OBJDIR)sha1$(OBJ_SUFFIX) $(OBJDIR)addrtrace$(OBJ_SUFFIX) $(LINKER) $(TOOL_LDFLAGS) $(LINK_EXE)$@ $(^:%.h=) $(TOOL_LPATHS) $(TOOL_LIBS) diff --git a/pintool/proc-map.H b/pintool/proc-map.H new file mode 100644 index 00000000..474c0949 --- /dev/null +++ b/pintool/proc-map.H @@ -0,0 +1,9 @@ +#if !defined(PROC_MAP_H_) +#define PROC_MAP_H_ + +#include "pin.H" + +void printProcMap(void); +ADDRINT getAddrFromProcMap(const std::string command, short pos); + +#endif diff --git a/pintool/proc-map.cpp b/pintool/proc-map.cpp new file mode 100644 index 00000000..1f1b4de0 --- /dev/null +++ b/pintool/proc-map.cpp @@ -0,0 +1,55 @@ +#include "proc-map.H" +#include "pin.H" +#include "utils.H" +#include + +using namespace std; + +int pid = PIN_GetPid(); + +FILE *readProcMap(const std::string command, short pos) { + FILE *fp; + std::stringstream command_string; + command_string << "cat /proc/" << pid << "/maps"; + if (!command.empty()) { + command_string << " | grep '" << command + << "' | awk '{print $1}' | cut -f" << pos << " -d-"; + } + const std::string to_pass(command_string.str()); + PT_DEBUG(1, "readProcMap command: " << to_pass.c_str()); + + const char *arg = to_pass.c_str(); + fp = popen(arg, "r"); + if (!fp) { + PT_ERROR("readProcMap failed: " << to_pass.c_str()); + } + return fp; +} + +void printProcMap(void) { + char buffer[64]; + FILE *fp = readProcMap("", 0); + if (fp != NULL) { + while (fgets(buffer, 64, fp) != NULL) { + std::cout << buffer; + } + pclose(fp); + } +} + +ADDRINT getAddrFromProcMap(const std::string command, short pos) { + char buffer[64]; + FILE *fp = readProcMap(command, pos); + if (fp != NULL) { + while (fgets(buffer, 64, fp) != NULL) { + pclose(fp); + } + } + + std::string tmp = "0x" + (std::string)buffer; + ADDRINT addr = strtol(tmp.c_str(), NULL, 0); + + PT_DEBUG(3, "getAddrFromProcMap returns " << hex << addr); + + return addr; +} From 65c45228314a780d24b52fb4c9e2344b17bd18de Mon Sep 17 00:00:00 2001 From: Alexander Wagner Date: Fri, 3 Mar 2023 11:18:54 +0100 Subject: [PATCH 03/21] pt/addrtrace: Add type & vec for imgobj --- pintool/addrtrace.cpp | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/pintool/addrtrace.cpp b/pintool/addrtrace.cpp index 8ee749f1..e383552b 100644 --- a/pintool/addrtrace.cpp +++ b/pintool/addrtrace.cpp @@ -173,6 +173,17 @@ std::vector trace; /* Contains all traced instructions */ ofstream imgfile; /* Holds memory layout with function symbols */ ofstream vdsofile; /* Holds vdso shared library */ +/***********************************************************************/ +/* Image tracking*/ +typedef struct { + string name; + uint64_t baseaddr; + uint64_t endaddr; +} imgobj_t; + +typedef std::vector IMGVEC; +IMGVEC imgvec; + /***********************************************************************/ /* Heap tracking */ From 65f13000e099e544b152fdfcf7df371454fbd6aa Mon Sep 17 00:00:00 2001 From: Alexander Wagner Date: Fri, 3 Mar 2023 11:20:05 +0100 Subject: [PATCH 04/21] pt/addrtrace: Trace stack and vvar region --- pintool/addrtrace.cpp | 19 +++++++++++++++++++ pintool/makefile.rules | 2 +- 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/pintool/addrtrace.cpp b/pintool/addrtrace.cpp index e383552b..7809f33f 100644 --- a/pintool/addrtrace.cpp +++ b/pintool/addrtrace.cpp @@ -199,6 +199,10 @@ memobj_t *heapcache; typedef std::vector HEAPVEC; HEAPVEC heap; +/***********************************************************************/ +/* Stack tracking*/ +imgobj_t stack; + /***********************************************************************/ /* Multithreading */ @@ -2007,6 +2011,21 @@ int main(int argc, char *argv[]) { INS_AddInstrumentFunction(instrumentLeakingInstructions, 0); } + /* Getting the stack and vvar address range for this process */ + stack.baseaddr = getAddrFromProcMap("stack", 1); + stack.endaddr = getAddrFromProcMap("stack", 2); + PT_DEBUG(1, "stack.baseaddr is " << hex << stack.baseaddr); + PT_DEBUG(1, "stack.endaddr is " << hex << stack.endaddr); + + imgobj_t imgdata = { + .name = "vvar", + .baseaddr = getAddrFromProcMap("vvar", 1), + .endaddr = getAddrFromProcMap("vvar", 2), + }; + imgvec.push_back(imgdata); + PT_DEBUG(1, "vvar.baseaddr is " << hex << imgdata.baseaddr); + PT_DEBUG(1, "vvar.endaddr is " << hex << imgdata.endaddr); + PIN_AddThreadStartFunction(ThreadStart, 0); PIN_AddThreadFiniFunction(ThreadFini, 0); PIN_AddFiniFunction(Fini, 0); diff --git a/pintool/makefile.rules b/pintool/makefile.rules index b01db1d7..44cb33b2 100644 --- a/pintool/makefile.rules +++ b/pintool/makefile.rules @@ -87,5 +87,5 @@ $(OBJDIR)proc-map$(OBJ_SUFFIX): proc-map.cpp proc-map.H $(CXX) $(TOOL_CXXFLAGS) $(COMP_OBJ)$@ $< # Build the tool as a dll (shared object). -$(OBJDIR)addrtrace$(PINTOOL_SUFFIX): $(OBJDIR)call-stack$(OBJ_SUFFIX) $(OBJDIR)sha1$(OBJ_SUFFIX) $(OBJDIR)addrtrace$(OBJ_SUFFIX) +$(OBJDIR)addrtrace$(PINTOOL_SUFFIX): $(OBJDIR)call-stack$(OBJ_SUFFIX) $(OBJDIR)sha1$(OBJ_SUFFIX) $(OBJDIR)proc-map$(OBJ_SUFFIX) $(OBJDIR)addrtrace$(OBJ_SUFFIX) $(LINKER) $(TOOL_LDFLAGS) $(LINK_EXE)$@ $(^:%.h=) $(TOOL_LPATHS) $(TOOL_LIBS) From dd28a8af029cc3b4993e6e925b70aeec976a86ca Mon Sep 17 00:00:00 2001 From: Alexander Wagner Date: Fri, 3 Mar 2023 13:31:00 +0100 Subject: [PATCH 05/21] pt/addrtrace: Activate CallStackManager --- pintool/addrtrace.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pintool/addrtrace.cpp b/pintool/addrtrace.cpp index 7809f33f..b13dcb2c 100644 --- a/pintool/addrtrace.cpp +++ b/pintool/addrtrace.cpp @@ -25,6 +25,7 @@ /***********************************************************************/ +#include "call-stack.H" #include "pin-macros.H" #include "pin.H" #include "utils.H" @@ -2026,6 +2027,9 @@ int main(int argc, char *argv[]) { PT_DEBUG(1, "vvar.baseaddr is " << hex << imgdata.baseaddr); PT_DEBUG(1, "vvar.endaddr is " << hex << imgdata.endaddr); + auto mngr = CALLSTACK::CallStackManager::get_instance(); + mngr->activate(); + PIN_AddThreadStartFunction(ThreadStart, 0); PIN_AddThreadFiniFunction(ThreadFini, 0); PIN_AddFiniFunction(Fini, 0); From 1563971b3f481068f360bc87540e7cd7c1157ec7 Mon Sep 17 00:00:00 2001 From: Alexander Wagner Date: Fri, 3 Mar 2023 14:22:50 +0100 Subject: [PATCH 06/21] pt/addrtrace: Add print- & get- fns for CallStack --- pintool/addrtrace.cpp | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/pintool/addrtrace.cpp b/pintool/addrtrace.cpp index b13dcb2c..80473de6 100644 --- a/pintool/addrtrace.cpp +++ b/pintool/addrtrace.cpp @@ -1003,6 +1003,44 @@ void record_heap_op(memobj_t *obj, ADDRINT addr) { record_entry(entry); } +/** + * Fetch callstack for debugging purpose and to diversify the logical base + * addresses. + */ + +void fetchCallStack(THREADID threadid, vector &out, + CALLSTACK::IPVEC &ipvec) { + auto mngr = CALLSTACK::CallStackManager::get_instance(); + auto cs = mngr->get_stack(threadid); + cs.emit_stack(cs.depth(), out, ipvec); +} + +void printCallStack(THREADID threadid) { + vector out; + CALLSTACK::IPVEC ipvec; + fetchCallStack(threadid, out, ipvec); + + for (uint32_t i = 0; i < out.size(); i++) { + cout << out[i]; + } +} + +string getCallStack(THREADID threadid) { + vector out; + CALLSTACK::IPVEC ipvec; + fetchCallStack(threadid, out, ipvec); + + DEBUG(2) for (uint32_t i = 0; i < out.size(); i++) { cout << out[i]; } + + stringstream unique_cs(ios_base::app | ios_base::out); + for (auto i : ipvec) { + unique_cs << " 0x" << hex << i.ipaddr; + } + PT_DEBUG(2, "callstack " << unique_cs.str()); + + return unique_cs.str(); +} + /** * Handle calls to [m|re|c]alloc by keeping a list of all heap objects * This function is not thread-safe. Lock first. From 1b4c89db23ab3631276ac72edc8af221ca1ee70a Mon Sep 17 00:00:00 2001 From: Alexander Wagner Date: Fri, 3 Mar 2023 14:50:51 +0100 Subject: [PATCH 07/21] pt/addrtrace: Restructure instrumentMainAndAlloc fn --- pintool/addrtrace.cpp | 235 ++++++++++++++++++++---------------------- 1 file changed, 113 insertions(+), 122 deletions(-) diff --git a/pintool/addrtrace.cpp b/pintool/addrtrace.cpp index 80473de6..705eaa7f 100644 --- a/pintool/addrtrace.cpp +++ b/pintool/addrtrace.cpp @@ -1510,8 +1510,13 @@ VOID RecordFunctionExit(THREADID threadid, ADDRINT bbl, ADDRINT ins, * @param v UNUSED */ VOID instrumentMainAndAlloc(IMG img, VOID *v) { + if (!IMG_Valid(img)) { + PT_ERROR("loaded image is invalid"); + } + string name = IMG_Name(img); - DEBUG(1) std::cout << "[pintool] Instrumenting " << name << std::endl; + PT_DEBUG(1, "instrumenting " << name); + if (imgfile.is_open()) { uint64_t high = IMG_HighAddress(img); uint64_t low = IMG_LowAddress(img); @@ -1522,138 +1527,124 @@ VOID instrumentMainAndAlloc(IMG img, VOID *v) { * with IMG_SizeMapped instead. */ high = low + IMG_SizeMapped(img); - DEBUG(1) - std::cout << "[pintool] VDSO low: 0x" << std::hex << low - << std::endl; - DEBUG(1) - std::cout << "[pintool] VDSO high: 0x" << std::hex << high - << std::endl; - DEBUG(1) - std::cout << "[pintool] VDSO size mapped: 0x" << std::hex - << IMG_SizeMapped(img) << std::endl; + PT_DEBUG(1, "vdso low: 0x" << hex << low); + PT_DEBUG(1, "vdso high: 0x" << hex << high); + PT_DEBUG(1, "vdso size mapped: 0x" << hex << IMG_SizeMapped(img)); vdsofile.write((const char *)low, IMG_SizeMapped(img)); vdsofile.close(); name = KnobVDSO.Value(); } - imgfile << "Image:" << std::endl; - imgfile << name << std::endl; - imgfile << std::hex << low << ":" << high << std::endl; + PT_DEBUG(1, "image name: " << name); + PT_DEBUG(1, "image low: 0x " << hex << low); + PT_DEBUG(1, "image high: 0x " << hex << high); + imgfile << "Image:" << endl; + imgfile << name << endl; + imgfile << hex << low << ":" << hex << high << endl; + + for (SYM sym = IMG_RegsymHead(img); SYM_Valid(sym); + sym = SYM_Next(sym)) { + imgfile << hex << SYM_Address(sym) + << ":" + PIN_UndecorateSymbolName(SYM_Name(sym), + UNDECORATION_NAME_ONLY) + << endl; + } } - if (IMG_Valid(img)) { - if (imgfile.is_open()) { - for (SYM sym = IMG_RegsymHead(img); SYM_Valid(sym); - sym = SYM_Next(sym)) { - imgfile << std::hex << SYM_Address(sym) << ":" + SYM_Name(sym) - << std::endl; - } + PT_DEBUG(1, "KnobMain: " << KnobMain.Value()); + if (KnobMain.Value().compare("ALL") != 0) { + RTN mainRtn = RTN_FindByName(img, KnobMain.Value().c_str()); + if (mainRtn.is_valid()) { + PT_DEBUG(1, "KnobMain is valid"); + RTN_Open(mainRtn); + RTN_InsertCall(mainRtn, IPOINT_BEFORE, (AFUNPTR)RecordMainBegin, + IARG_THREAD_ID, IARG_ADDRINT, RTN_Address(mainRtn), + IARG_END); + RTN_InsertCall(mainRtn, IPOINT_AFTER, (AFUNPTR)RecordMainEnd, + IARG_THREAD_ID, IARG_ADDRINT, RTN_Address(mainRtn), + IARG_END); + RTN_Close(mainRtn); } - DEBUG(1) - std::cout << "[pintool] KnobMain: " << KnobMain.Value() << std::endl; - if (KnobMain.Value().compare("ALL") != 0) { - RTN mainRtn = RTN_FindByName(img, KnobMain.Value().c_str()); - if (mainRtn.is_valid()) { - RTN_Open(mainRtn); - RTN_InsertCall(mainRtn, IPOINT_BEFORE, (AFUNPTR)RecordMainBegin, - IARG_THREAD_ID, IARG_ADDRINT, - RTN_Address(mainRtn), IARG_END); - RTN_InsertCall(mainRtn, IPOINT_AFTER, (AFUNPTR)RecordMainEnd, - IARG_THREAD_ID, IARG_ADDRINT, - RTN_Address(mainRtn), IARG_END); - RTN_Close(mainRtn); - } - } else { - DEBUG(1) std::cout << "[pintool] Recording all" << std::endl; - if (!Record) { - WaitForFirstFunction = true; - } + } else { + PT_DEBUG(1, "recording all"); + if (!Record) { + WaitForFirstFunction = true; } + } - if (name.find("alloc.so") != std::string::npos || - name.find("libc.so") != std::string::npos) { - /* If alloc.so is pre-loaded, it will always be before libc - * We only instrument once - */ - if (alloc_instrumented) { - DEBUG(1) - std::cout << "[pintool] Allocation already instrumented" - << std::endl; - } else { - DEBUG(1) - std::cout << "[pintool] Instrumenting allocation" << std::endl; - if (KnobTrackHeap.Value()) { - RTN mallocRtn = RTN_FindByName(img, MALLOC); - if (mallocRtn.is_valid()) { - DEBUG(1) - std::cout << "[pintool] Malloc found in " - << IMG_Name(img) << std::endl; - RTN_Open(mallocRtn); - RTN_InsertCall(mallocRtn, IPOINT_BEFORE, - (AFUNPTR)RecordMallocBefore, - IARG_THREAD_ID, IARG_INST_PTR, - IARG_FUNCARG_ENTRYPOINT_VALUE, 0, - IARG_END); - RTN_InsertCall(mallocRtn, IPOINT_AFTER, - (AFUNPTR)RecordMallocAfter, - IARG_THREAD_ID, IARG_INST_PTR, - IARG_FUNCRET_EXITPOINT_VALUE, IARG_END); - RTN_Close(mallocRtn); - } - - RTN reallocRtn = RTN_FindByName(img, REALLOC); - if (reallocRtn.is_valid()) { - DEBUG(1) - std::cout << "[pintool] Realloc found in " - << IMG_Name(img) << std::endl; - RTN_Open(reallocRtn); - RTN_InsertCall( - reallocRtn, IPOINT_BEFORE, - (AFUNPTR)RecordReallocBefore, IARG_THREAD_ID, - IARG_INST_PTR, IARG_FUNCARG_ENTRYPOINT_VALUE, 0, - IARG_FUNCARG_ENTRYPOINT_VALUE, 1, IARG_END); - RTN_InsertCall(reallocRtn, IPOINT_AFTER, - (AFUNPTR)RecordReallocAfter, - IARG_THREAD_ID, IARG_INST_PTR, - IARG_FUNCRET_EXITPOINT_VALUE, IARG_END); - RTN_Close(reallocRtn); - } - - RTN callocRtn = RTN_FindByName(img, CALLOC); - if (callocRtn.is_valid()) { - DEBUG(1) - std::cout << "[pintool] Calloc found in " - << IMG_Name(img) << std::endl; - RTN_Open(callocRtn); - RTN_InsertCall( - callocRtn, IPOINT_BEFORE, - (AFUNPTR)RecordCallocBefore, IARG_THREAD_ID, - IARG_INST_PTR, IARG_FUNCARG_ENTRYPOINT_VALUE, 0, - IARG_FUNCARG_ENTRYPOINT_VALUE, 1, IARG_END); - RTN_InsertCall(callocRtn, IPOINT_AFTER, - (AFUNPTR)RecordCallocAfter, - IARG_THREAD_ID, IARG_INST_PTR, - IARG_FUNCRET_EXITPOINT_VALUE, IARG_END); - RTN_Close(callocRtn); - } - - RTN freeRtn = RTN_FindByName(img, FREE); - if (freeRtn.is_valid()) { - DEBUG(1) - std::cout << "[pintool] Free found in " << IMG_Name(img) - << std::endl; - RTN_Open(freeRtn); - RTN_InsertCall( - freeRtn, IPOINT_BEFORE, (AFUNPTR)RecordFreeBefore, - IARG_THREAD_ID, IARG_INST_PTR, - IARG_FUNCARG_ENTRYPOINT_VALUE, 0, IARG_END); - RTN_Close(freeRtn); - } - } - alloc_instrumented = 1; - } - } /* alloc.so or libc */ + if (!KnobTrackHeap.Value()) { + PT_INFO("heap tracking inactive"); + return; } + + if (alloc_instrumented) { + PT_DEBUG(1, "allocation already instrumented"); + return; + } + + if (name.find("alloc.so") == std::string::npos && + name.find("libc.so") == std::string::npos) { + PT_DEBUG(3, "image (" << name << ") is not named alloc.so or libc.so"); + return; + } + /* If alloc.so is pre-loaded, it will always be before libc + * We only instrument once + */ + PT_DEBUG(1, "instrumenting allocation in " << name); + alloc_instrumented = 1; + + RTN mallocRtn = RTN_FindByName(img, MALLOC); + if (!mallocRtn.is_valid()) { + PT_ERROR("malloc not found"); + } + PT_DEBUG(1, "malloc found in " << IMG_Name(img)); + RTN_Open(mallocRtn); + RTN_InsertCall(mallocRtn, IPOINT_BEFORE, (AFUNPTR)RecordMallocBefore, + IARG_THREAD_ID, IARG_INST_PTR, IARG_FUNCARG_ENTRYPOINT_VALUE, + 0, IARG_END); + RTN_InsertCall(mallocRtn, IPOINT_AFTER, (AFUNPTR)RecordMallocAfter, + IARG_THREAD_ID, IARG_INST_PTR, IARG_FUNCRET_EXITPOINT_VALUE, + IARG_END); + RTN_Close(mallocRtn); + + RTN reallocRtn = RTN_FindByName(img, REALLOC); + if (!reallocRtn.is_valid()) { + PT_ERROR("realloc not found"); + } + PT_DEBUG(1, "realloc found in " << IMG_Name(img)); + RTN_Open(reallocRtn); + RTN_InsertCall(reallocRtn, IPOINT_BEFORE, (AFUNPTR)RecordReallocBefore, + IARG_THREAD_ID, IARG_INST_PTR, IARG_FUNCARG_ENTRYPOINT_VALUE, + 0, IARG_FUNCARG_ENTRYPOINT_VALUE, 1, IARG_END); + RTN_InsertCall(reallocRtn, IPOINT_AFTER, (AFUNPTR)RecordReallocAfter, + IARG_THREAD_ID, IARG_INST_PTR, IARG_FUNCRET_EXITPOINT_VALUE, + IARG_END); + RTN_Close(reallocRtn); + + RTN callocRtn = RTN_FindByName(img, CALLOC); + if (!callocRtn.is_valid()) { + PT_ERROR("calloc not found"); + } + PT_DEBUG(1, "calloc found in " << IMG_Name(img)); + RTN_Open(callocRtn); + RTN_InsertCall(callocRtn, IPOINT_BEFORE, (AFUNPTR)RecordCallocBefore, + IARG_THREAD_ID, IARG_INST_PTR, IARG_FUNCARG_ENTRYPOINT_VALUE, + 0, IARG_FUNCARG_ENTRYPOINT_VALUE, 1, IARG_END); + RTN_InsertCall(callocRtn, IPOINT_AFTER, (AFUNPTR)RecordCallocAfter, + IARG_THREAD_ID, IARG_INST_PTR, IARG_FUNCRET_EXITPOINT_VALUE, + IARG_END); + RTN_Close(callocRtn); + + RTN freeRtn = RTN_FindByName(img, FREE); + if (!freeRtn.is_valid()) { + PT_ERROR("free not found"); + } + PT_DEBUG(1, "free found in " << IMG_Name(img)); + RTN_Open(freeRtn); + RTN_InsertCall(freeRtn, IPOINT_BEFORE, (AFUNPTR)RecordFreeBefore, + IARG_THREAD_ID, IARG_INST_PTR, IARG_FUNCARG_ENTRYPOINT_VALUE, + 0, IARG_END); + RTN_Close(freeRtn); } /** From fde079ab8ae28b2665b53b170829231d2d7e7d2a Mon Sep 17 00:00:00 2001 From: Alexander Wagner Date: Fri, 3 Mar 2023 14:51:40 +0100 Subject: [PATCH 08/21] pt/addrtrace: Add image and sections to imgvec --- pintool/addrtrace.cpp | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/pintool/addrtrace.cpp b/pintool/addrtrace.cpp index 705eaa7f..ef2851e1 100644 --- a/pintool/addrtrace.cpp +++ b/pintool/addrtrace.cpp @@ -28,6 +28,7 @@ #include "call-stack.H" #include "pin-macros.H" #include "pin.H" +#include "sha1.H" #include "utils.H" #include #include @@ -1542,6 +1543,32 @@ VOID instrumentMainAndAlloc(IMG img, VOID *v) { imgfile << name << endl; imgfile << hex << low << ":" << hex << high << endl; + imgobj_t imgdata; + imgdata.name = name; + imgdata.baseaddr = low; + imgdata.endaddr = high; + + for (SEC sec = IMG_SecHead(img); SEC_Valid(sec); sec = SEC_Next(sec)) { + string sec_name = SEC_Name(sec); + low = SEC_Address(sec); + high = SEC_Address(sec) + SEC_Size(sec); + + PT_DEBUG(1, "sec name: " << sec_name); + PT_DEBUG(1, "sec low: 0x " << hex << low); + PT_DEBUG(1, "sec high: 0x " << hex << high); + if (!SEC_Mapped(sec)) { + PT_INFO("unmapped sec dropped: " << sec_name); + continue; + } + imgdata.baseaddr = + (imgdata.baseaddr > low) ? low : imgdata.baseaddr; + imgdata.endaddr = (imgdata.endaddr < high) ? high : imgdata.endaddr; + } + + PT_DEBUG(1, "image low: 0x " << hex << imgdata.baseaddr); + PT_DEBUG(1, "image high: 0x " << hex << imgdata.endaddr); + imgvec.push_back(imgdata); + for (SYM sym = IMG_RegsymHead(img); SYM_Valid(sym); sym = SYM_Next(sym)) { imgfile << hex << SYM_Address(sym) From d72f4d0613ba1d2ef5618a5f02ff63e3688baa31 Mon Sep 17 00:00:00 2001 From: Alexander Wagner Date: Fri, 3 Mar 2023 17:24:17 +0100 Subject: [PATCH 09/21] pt/addrtrace: Add heap tracing with logical addrs --- pintool/addrtrace.cpp | 499 +++++++++++++++++++++++++++--------------- 1 file changed, 324 insertions(+), 175 deletions(-) diff --git a/pintool/addrtrace.cpp b/pintool/addrtrace.cpp index ef2851e1..78958fec 100644 --- a/pintool/addrtrace.cpp +++ b/pintool/addrtrace.cpp @@ -44,6 +44,7 @@ #include #include #include +#include #include using namespace std; @@ -190,17 +191,33 @@ IMGVEC imgvec; /* Heap tracking */ typedef struct { - uint32_t id; + char const *type; size_t size; uint64_t base; - bool used; + string callstack; + string hash; } memobj_t; -uint32_t nextheapid = 1; -memobj_t *heapcache; typedef std::vector HEAPVEC; HEAPVEC heap; +std::unordered_map hashmap; +std::unordered_map allocmap; + +imgobj_t heaprange; + +/***********************************************************************/ +/* Brk tracking*/ +typedef struct { + imgobj_t image; + ADDRINT low; + ADDRINT high; +} program_break_obj_t; + +typedef std::vector BRKVEC; +BRKVEC brk_vec; +imgobj_t brk_range; + /***********************************************************************/ /* Stack tracking*/ imgobj_t stack; @@ -211,13 +228,20 @@ imgobj_t stack; /* Global lock to protect trace buffer */ // PIN_MUTEX lock; +/***********************************************************************/ +/* Allocation tracking */ + typedef struct { + char const *type; ADDRINT size; + std::string callstack; } alloc_state_t; typedef struct { + char const *type; ADDRINT old; ADDRINT size; + std::string callstack; } realloc_state_t; typedef struct { @@ -942,66 +966,152 @@ VOID ThreadFini(THREADID threadid, const CONTEXT *ctxt, INT32 code, VOID *v) { } /***********************************************************************/ -/** Heap recording */ +/**Calculating the Logical Address from the Virtual Address + * Every Logical Address is 64 bit = 32 bit MemoryIndex + 32 bit Offset*/ /***********************************************************************/ -void printheap() { - std::cout << "[pintool] Heap:" << std::endl; +void printAllocmap() { + if (allocmap.size() == 0) { + return; + } + PT_INFO("allocmap:"); + for (auto &it : allocmap) { + cout << it.first << " - " << it.second << endl; + } +} + +void printHeap() { + if (heap.size() == 0) { + return; + } + PT_INFO("heap:"); for (HEAPVEC::iterator it = heap.begin(); it != heap.end(); ++it) { - std::cout << std::hex << it->id << ":" << it->base << "-" << it->size - << " used:" << it->used << std::endl; + std::cout << it->base << "-" << it->size << std::endl; } } -memobj_t *lookup_heap(uint64_t addr) { - uint64_t paddr = addr; - if (heapcache) { - ASSERT(heapcache->used, "[pintool] Error: Heapcache corrupt"); - if (paddr >= heapcache->base && - paddr < heapcache->base + heapcache->size) { - return heapcache; +uint64_t getIndex(string hash) { + uint64_t to_shift; + sscanf(hash.c_str(), "%llx", (long long unsigned int *)&to_shift); + return (to_shift << 32); +} + +void *getLogicalAddress(void *virt_addr, void *ip) { + PT_DEBUG(3, "get log_addr for virt_addr of " << virt_addr); + + if (virt_addr == nullptr) { + PT_WARN("dereferenced a nullptr"); + return virt_addr; + } + // Is the Virtual Address in the Heap address space? + /* Set heap start and end markers */ + if (heap.size() && + (heaprange.baseaddr != heap.front().base || + heaprange.endaddr != heap.back().base + heap.back().size)) { + heaprange.baseaddr = heap.front().base; + heaprange.endaddr = heap.back().base + heap.back().size; + PT_DEBUG(3, "heap.baseaddr: " << heaprange.baseaddr); + PT_DEBUG(3, "heap.endaddr: " << heaprange.endaddr); + } + // Does the Virtual Address belong to any heap object? + if ((uint64_t)virt_addr >= heaprange.baseaddr && + (uint64_t)virt_addr <= heaprange.endaddr) { + uint64_t *log_addr = static_cast(virt_addr); + for (auto i : heap) { + if ((uint64_t)virt_addr < i.base || + (uint64_t)virt_addr >= (i.base + i.size)) { + continue; + } + auto offset = (uint64_t)virt_addr - i.base; + log_addr = (uint64_t *)(allocmap[i.base] | offset); + PT_DEBUG(4, "found addr in heap vector, log_addr: " + << std::hex << (uint64_t)log_addr); + return log_addr; } } - - for (HEAPVEC::reverse_iterator it = heap.rbegin(); it != heap.rend(); - ++it) { - if (!it->used) { + // Is the Virtual Address in the Stack address space? + if ((uint64_t)virt_addr >= stack.baseaddr && + (uint64_t)virt_addr < stack.endaddr) { + PT_DEBUG(4, "found addr in stack " << std::hex << (uint64_t)virt_addr); + return virt_addr; + } + // Is the Virtual Address in the IMG/Code address space? + for (auto i : imgvec) { + if ((uint64_t)virt_addr < i.baseaddr || + (uint64_t)virt_addr >= i.endaddr) { continue; } - if (paddr >= it->base) { - if (paddr < it->base + it->size) { - return heapcache = &(*it); - } else { - break; + PT_DEBUG(4, "found addr in image " << std::hex << (uint64_t)virt_addr); + return virt_addr; + } + // Is the Virtual Address in the Program Break address space? + if ((uint64_t)virt_addr >= brk_range.baseaddr && + (uint64_t)virt_addr < brk_range.endaddr) { + PT_DEBUG(2, "found addr in brk " << std::hex << (uint64_t)virt_addr + << " called from " << std::hex + << (uint64_t)ip); + for (auto brk : brk_vec) { + if ((uint64_t)virt_addr < brk.low || + (uint64_t)virt_addr >= brk.high) { + continue; } + PT_ASSERT(((uint64_t)ip >= brk.image.baseaddr && + (uint64_t)ip < brk.image.endaddr), + "brk access within different image than brk " + "syscall originated."); + return virt_addr; + } + PT_WARN("found addr in brk " << std::hex << (uint64_t)virt_addr + << " called from " << std::hex + << (uint64_t)ip); + for (auto brk : brk_vec) { + PT_WARN("brk from " << brk.low << " to " << brk.high); } + PT_ERROR("brk access cannot be matched to any brk section"); } - return NULL; -} -VOID test_mem_heap(entry_t *pentry) { - memobj_t *obj = lookup_heap(pentry->data); - if (obj) { - uint64_t pdata = pentry->data; - pdata -= obj->base; - ASSERT((pdata & 0xFFFFFFFF00000000ULL) == 0, - "[pintool] Error: Heap object too big"); - pdata |= (uint64_t)obj->id << 32ULL; - pentry->data = pdata; - pentry->type |= MASK_HEAP; - } + PT_WARN("not found addr " << std::hex << (uint64_t)virt_addr); + // TODO + // PT_ASSERT(fast_recording == false, + // "virt_addr was not found despite being in fast_recording + // mode"); + DEBUG(3) printHeap(); + DEBUG(4) printProcMap(); + return virt_addr; } +/***********************************************************************/ +/** Heap recording */ +/***********************************************************************/ + /** - * Add alloc/free to the trace - * This function is not thread-safe. Lock first. + * Calculate sha1-hash and use the 4 bytes of the hash as the memory Index */ -void record_heap_op(memobj_t *obj, ADDRINT addr) { - entry_t entry; - entry.type = obj->used ? HALLOC : HFREE; - entry.ip = (((uint64_t)obj->id << 32ULL) | obj->size); - entry.data = addr; - record_entry(entry); +void calculateSha1Hash(memobj_t *obj) { + PT_DEBUG(2, "HashMap callstack " << obj->callstack); + + /* Hash shall be unique wrt. calling location */ + std::stringstream to_hash(obj->type, ios_base::app | ios_base::out); + to_hash << obj->callstack; + + /** + * A hash, i.e. logical base address, shall only occur once. + * For variation the occurence of a hash is counted within hashmap. + * This count is used together with the calling location to create an + * unique hash. + */ + std::stringstream count; + count << hex << hashmap[to_hash.str()]; + hashmap[to_hash.str()] += 1; + + SHA1 hash; + to_hash << count.str(); + hash.update(to_hash.str()); + obj->hash = hash.final(); + + PT_DEBUG(1, "HashMap for " << to_hash.str()); + PT_DEBUG(1, "HashMap count 0x" << count.str()); + PT_DEBUG(1, "Object hash " << hex << obj->hash); } /** @@ -1043,93 +1153,132 @@ string getCallStack(THREADID threadid) { } /** - * Handle calls to [m|re|c]alloc by keeping a list of all heap objects + * Handle calls to free by maintaining a list of all heap objects * This function is not thread-safe. Lock first. */ -void domalloc(ADDRINT addr, ADDRINT size, uint32_t objid) { - heapcache = NULL; - memobj_t obj; - if (objid) { - obj.id = objid; - } else { - obj.id = nextheapid++; +void dofree(ADDRINT addr) { + PT_DEBUG(1, "dofree 0x" << std::hex << addr); + + if (!addr) { + PT_DEBUG(3, "dofree called with NULL"); + return; } - obj.base = addr; - obj.size = size; - obj.used = true; - record_heap_op(&obj, addr); + if (allocmap.find(addr) == allocmap.end()) { + PT_ERROR("dofree didnot found an element in allocmap"); + } + allocmap.erase(addr); - DEBUG(2) - std::cout << "[pintool] Domalloc " << std::hex << addr << " " << size - << std::endl; - /* Keep heap vector sorted */ - HEAPVEC::iterator prev = heap.end(); - HEAPVEC::iterator found = heap.end(); for (HEAPVEC::iterator it = heap.begin(); it != heap.end(); ++it) { - if (it->used) { - if (it->base >= obj.base) { - /* insert before*/ - if (obj.base + obj.size > it->base) { - DEBUG(2) printheap(); - DEBUG(2) - std::cout << "[pintool] Inserting new object" << std::hex - << obj.base << "-" << obj.size << std::endl; - ASSERT(false, "[pintool] Error: Corrupted heap A?!"); - } - found = it; - break; - } + if (it->base != addr) { + continue; } - prev = it; + heap.erase(it); + return; } - if (found == heap.end()) { - /* no match found, append to the end */ - heap.push_back(obj); - } else { - if (prev == heap.end()) { - heap.insert(found, obj); - } else if (prev->used) { - /* We cannot reuse prev, insert at 'found' */ - if (prev->used && prev->base + prev->size > obj.base) { - DEBUG(2) printheap(); - DEBUG(2) - std::cout << "[pintool] Inserting new object" << std::hex - << obj.base << "-" << obj.size << std::endl; - ASSERT(false, "[pintool] Error: Corrupted heap B?!"); - } - heap.insert(found, obj); - } else { - /* prev is unused, reuse it */ - *prev = obj; - } - } + PT_ERROR("dofree didnot found an element in heap"); } /** - * Handle calls to free by maintaining a list of all heap objects + * Handle calls to [m|re|c]alloc by keeping a list of all heap objects * This function is not thread-safe. Lock first. */ -uint32_t dofree(ADDRINT addr) { - heapcache = NULL; - DEBUG(2) std::cout << "[pintool] Dofree " << std::hex << addr << std::endl; - if (!addr) { - return 0; +void doalloc(ADDRINT addr, alloc_state_t *alloc_state, + realloc_state_t *realloc_state) { + if (alloc_state == nullptr && realloc_state == nullptr) { + PT_ERROR("doalloc failed as only NULL states were passed"); } - for (HEAPVEC::iterator it = heap.begin(); it != heap.end(); ++it) { - if (!it->used) { - continue; + if (alloc_state != nullptr && realloc_state != nullptr) { + PT_ERROR("doalloc failed as multiple states were passed"); + } + + /* Convert (re)alloc_state to memobj */ + memobj_t obj; + obj.base = addr; + obj.size = (alloc_state) ? alloc_state->size : realloc_state->size; + obj.type = (alloc_state) ? alloc_state->type : realloc_state->type; + obj.callstack = + (alloc_state) ? alloc_state->callstack : realloc_state->callstack; + + PT_DEBUG(1, "doalloc " << hex << addr << " " << hex << obj.size << " type " + << obj.type); + + /* Edit object in heap vector, if in-place reallocation */ + /* allocmap does not require any update, as base address is not changed */ + if (realloc_state && addr == realloc_state->old) { + for (HEAPVEC::iterator it = heap.begin(); it != heap.end(); it++) { + if (obj.base != it->base) { + continue; + } + it->size = obj.size; + PT_DEBUG(2, "in-place reallocation addr " << std::hex << addr); + return; } - if (it->base == addr) { - it->used = false; - record_heap_op(it, addr); - return it->id; + PT_ERROR("in-place reallocation failed"); + } + + /* Write allocmap */ + ADDRINT log_addr = 0; + if (alloc_state) { + /* Create log_addr, if allocation */ + calculateSha1Hash(&obj); + log_addr = getIndex(obj.hash.substr(32, 8)); + } else { + /* Read log_addr, if not in-place reallocation */ + if (allocmap.find(realloc_state->old) == allocmap.end()) { + PT_ERROR("doalloc used invalid allocmap addr " + << std::hex << realloc_state->old); } + log_addr = allocmap[realloc_state->old]; + dofree(realloc_state->old); } - std::cout << "[pintool] Warning: Invalid free!" << std::endl; - DEBUG(2) printheap(); - return 0; + allocmap[addr] = log_addr; + + /* Insert object into heap vector for allocation or not-inplace reallocation + */ + /* Keep heap vector sorted */ + HEAPVEC::iterator below = heap.begin(); + HEAPVEC::iterator above = heap.end(); + for (HEAPVEC::iterator it = heap.begin(); it != heap.end(); it++) { + if (obj.base < it->base) { + above = it; + break; + } + below = it; + } + + if (!heap.size() || (below == &heap.back() && + obj.base >= heap.back().base + heap.back().size)) { + /* No inbetween slot found, thus append to the end */ + PT_INFO("Push to heap end"); + heap.push_back(obj); + } else if ( + /* Insert in front, if obj does not overlap first element */ + (above == heap.begin() && obj.base + obj.size <= above->base) + /* Valid inbetween slot found, thus insert before 'above' */ + || (obj.base >= below->base + below->size && + obj.base + obj.size <= above->base)) { + heap.insert(above, obj); + } else if ( + /* Insert in front, if below is of type MMAP/MREMAP and spans over obj + */ + (below->type == std::string(MMAP) || + below->type == std::string(MREMAP)) && + (obj.base >= below->base) && + (obj.base + obj.size <= below->base + below->size)) { + heap.insert(below, obj); + } else { + /* Invalid inbetween slot found, thus quit */ + printHeap(); + PT_INFO("below.base " << below->base); + PT_INFO("above.base " << above->base); + PT_INFO("obj.base " << obj.base); + PT_INFO("obj.size " << obj.size); + PT_ASSERT(false, "Corrupted heap?!"); + } + DEBUG(3) printHeap(); + DEBUG(3) printAllocmap(); } /** @@ -1138,19 +1287,21 @@ uint32_t dofree(ADDRINT addr) { * @param size The size parameter passed to malloc */ VOID RecordMallocBefore(THREADID threadid, VOID *ip, ADDRINT size) { - if (!Record) - return; + PT_DEBUG(1, "malloc called with 0x" << std::hex << size << " at " << ip); // PIN_MutexLock(&lock); if (thread_state[threadid].realloc_state.size() == 0) { - DEBUG(1) - std::cout << "[pintool] Malloc called with " << std::hex << size - << " at " << ip << std::endl; - alloc_state_t state = {.size = size}; + SHA1 hash; + hash.update(getCallStack(threadid)); /* calculate the hash of the set of + IPs in the Callstack */ + alloc_state_t state = { + .type = MALLOC, + .size = size, + .callstack = hash.final().substr(28, 12), /* 6 byte SHA1 hash */ + }; thread_state[threadid].malloc_state.push_back(state); } else { - DEBUG(1) - std::cout << "[pintool] Malloc ignored due to realloc_pending (size= " - << std::hex << size << ") at " << ip << std::endl; + PT_DEBUG(1, "malloc ignored due to realloc_pending (size= " + << std::hex << size << ") at " << ip); } // PIN_MutexUnlock(&lock); } @@ -1161,16 +1312,13 @@ VOID RecordMallocBefore(THREADID threadid, VOID *ip, ADDRINT size) { * @param addr The allocated heap pointer */ VOID RecordMallocAfter(THREADID threadid, VOID *ip, ADDRINT addr) { - if (!Record) - return; + PT_DEBUG(1, "malloc returned " << std::hex << addr); // PIN_MutexLock(&lock); - DEBUG(1) - std::cout << "[pintool] Malloc returned " << std::hex << addr << std::endl; - ASSERT(thread_state[threadid].malloc_state.size() > 0, - "[pintool] Error: Malloc returned but not called"); + PT_ASSERT(thread_state[threadid].malloc_state.size() > 0, + "malloc returned but not called"); alloc_state_t state = thread_state[threadid].malloc_state.back(); thread_state[threadid].malloc_state.pop_back(); - domalloc(addr, state.size, 0); + doalloc(addr, &state, nullptr); // PIN_MutexUnlock(&lock); } @@ -1182,15 +1330,18 @@ VOID RecordMallocAfter(THREADID threadid, VOID *ip, ADDRINT addr) { */ VOID RecordReallocBefore(THREADID threadid, VOID *ip, ADDRINT addr, ADDRINT size) { - if (!Record) - return; + PT_DEBUG(1, "realloc called with " << std::hex << addr << " " << size + << " at " << ip); // PIN_MutexLock(&lock); - DEBUG(1) - std::cout << "[pintool] Realloc called with " << std::hex << addr << " " - << size << " at " << ip << std::endl; - realloc_state_t state; - state.size = size; - state.old = addr; + SHA1 hash; + hash.update(getCallStack( + threadid)); /* calculate the hash of the set of IPs in the Callstack */ + realloc_state_t state = { + .type = REALLOC, + .old = addr, + .size = size, + .callstack = hash.final().substr(28, 12), /* 6 byte SHA1 hash */ + }; thread_state[threadid].realloc_state.push_back(state); // PIN_MutexUnlock(&lock); } @@ -1201,22 +1352,14 @@ VOID RecordReallocBefore(THREADID threadid, VOID *ip, ADDRINT addr, * @param addr The allocated heap pointer */ VOID RecordReallocAfter(THREADID threadid, VOID *ip, ADDRINT addr) { - if (!Record) - return; + PT_DEBUG(1, "realloc returned " << std::hex << addr << " at " << ip); // PIN_MutexLock(&lock); - DEBUG(1) - std::cout << "[pintool] Realloc returned " << std::hex << addr << " at " - << ip << std::endl; - ASSERT(thread_state[threadid].realloc_state.size() > 0, - "[pintool] Error: Realloc returned but not called"); + PT_ASSERT(thread_state[threadid].realloc_state.size() > 0, + "realloc returned but not called"); realloc_state_t state = thread_state[threadid].realloc_state.back(); thread_state[threadid].realloc_state.pop_back(); - uint32_t objid = 0; - if (state.old) { - objid = dofree(state.old); - } - domalloc(addr, state.size, objid); + doalloc(addr, nullptr, &state); // PIN_MutexUnlock(&lock); } @@ -1228,14 +1371,21 @@ VOID RecordReallocAfter(THREADID threadid, VOID *ip, ADDRINT addr) { */ VOID RecordCallocBefore(THREADID threadid, VOID *ip, ADDRINT nelem, ADDRINT size) { - if (!Record) - return; + PT_DEBUG(1, "calloc called with " << std::hex << nelem << "*" << std::hex + << size); // PIN_MutexLock(&lock); - DEBUG(1) - std::cout << "[pintool] Calloc called with " << std::hex << nelem << " " - << size << " at " << ip << std::endl; - alloc_state_t state = {.size = size}; - thread_state[threadid].calloc_state.push_back(state); + if (thread_state[threadid].calloc_state.size() == 0) { + SHA1 hash; + hash.update(getCallStack(threadid)); /* calculate the hash of the set of + IPs in the Callstack */ + alloc_state_t state = { + .type = CALLOC, + .size = nelem * size, + .callstack = hash.final().substr(28, 12), /* 6 byte SHA1 hash */ + }; + + thread_state[threadid].calloc_state.push_back(state); + } // PIN_MutexUnlock(&lock); } @@ -1245,17 +1395,13 @@ VOID RecordCallocBefore(THREADID threadid, VOID *ip, ADDRINT nelem, * @param addr The allocated heap pointer */ VOID RecordCallocAfter(THREADID threadid, VOID *ip, ADDRINT addr) { - if (!Record) - return; + PT_DEBUG(1, "calloc returned " << std::hex << addr); // PIN_MutexLock(&lock); - DEBUG(1) - std::cout << "[pintool] Calloc returned " << std::hex << addr << " at " - << ip << std::endl; - ASSERT(thread_state[threadid].calloc_state.size() > 0, - "[pintool] Error: Calloc returned but not called"); + PT_ASSERT(thread_state[threadid].calloc_state.size() != 0, + "calloc returned but not called"); alloc_state_t state = thread_state[threadid].calloc_state.back(); thread_state[threadid].calloc_state.pop_back(); - domalloc(addr, state.size, 0); + doalloc(addr, &state, nullptr); // PIN_MutexUnlock(&lock); } @@ -1265,16 +1411,20 @@ VOID RecordCallocAfter(THREADID threadid, VOID *ip, ADDRINT addr) { * @param addr The heap pointer which is freed */ VOID RecordFreeBefore(THREADID threadid, VOID *ip, ADDRINT addr) { - if (!Record) - return; + PT_DEBUG(1, "free called with " << std::hex << addr << " at " << ip); + DEBUG(2) printCallStack(threadid); // PIN_MutexLock(&lock); - DEBUG(1) - std::cout << "[pintool] Free called with " << std::hex << addr << " at " - << ip << std::endl; dofree(addr); // PIN_MutexUnlock(&lock); } +/** + * Record free + * @param threadid The thread + * @param addr The heap pointer which is freed + */ +VOID RecordFreeAfter(VOID) { PT_DEBUG(1, "free returned"); } + /***********************************************************************/ /** Instruction recording */ /***********************************************************************/ @@ -1294,8 +1444,7 @@ VOID RecordMemRead(THREADID threadid, VOID *ip, VOID *addr, entry_t entry; entry.type = READ; entry.ip = (uint64_t)((uintptr_t)ip); - entry.data = (uint64_t)((uintptr_t)addr); - test_mem_heap(&entry); + entry.data = (uint64_t)((uintptr_t)getLogicalAddress(addr, ip)); DEBUG(3) printf("[pintool] Read %" PRIx64 " to %" PRIx64 "\n", (uint64_t)entry.ip, (uint64_t)entry.data); @@ -1322,8 +1471,7 @@ VOID RecordMemWrite(THREADID threadid, VOID *ip, VOID *addr, entry_t entry; entry.type = WRITE; entry.ip = (uint64_t)((uintptr_t)ip); - entry.data = (uint64_t)((uintptr_t)addr); - test_mem_heap(&entry); + entry.data = (uint64_t)((uintptr_t)getLogicalAddress(addr, ip)); DEBUG(3) printf("[pintool] Write %" PRIx64 " to %" PRIx64 "\n", (uint64_t)entry.ip, (uint64_t)entry.data); @@ -1671,6 +1819,7 @@ VOID instrumentMainAndAlloc(IMG img, VOID *v) { RTN_InsertCall(freeRtn, IPOINT_BEFORE, (AFUNPTR)RecordFreeBefore, IARG_THREAD_ID, IARG_INST_PTR, IARG_FUNCARG_ENTRYPOINT_VALUE, 0, IARG_END); + RTN_InsertCall(freeRtn, IPOINT_AFTER, (AFUNPTR)RecordFreeAfter, IARG_END); RTN_Close(freeRtn); } From 917c2554fcb51bb40d553298e087997efbd3e6c8 Mon Sep 17 00:00:00 2001 From: Alexander Wagner Date: Wed, 23 Mar 2022 09:50:06 +0100 Subject: [PATCH 10/21] analysis/analyze: Delete apply of masks for heap --- analysis/analyze.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/analysis/analyze.py b/analysis/analyze.py index b637bca6..8fd507a2 100755 --- a/analysis/analyze.py +++ b/analysis/analyze.py @@ -67,7 +67,6 @@ SPLeak, TraceQueue, Type, - MaskType, Leak, ) import multiprocessing @@ -320,9 +319,6 @@ def iterate_queue(files, fast=True): assert e1.data != 0 assert e2.data != 0 assert queues[0].callstack == queues[1].callstack - if Type(e1.type) in (Type.HREAD, Type.HWRITE): - e1.data &= 0x00000000FFFFFFFF - e2.data &= 0x00000000FFFFFFFF if e1.data != e2.data: report_dataleak(queues[0].callstack, e1, e2) else: @@ -333,11 +329,6 @@ def iterate_queue(files, fast=True): # Mixture of heap and non-heap read/write. Maybe, heap tracking is imprecise # We require that both elements are either (h)read or (h)write debug(0, "Imprecise heap tracking @ %08x", (e1.ip)) - # assert((e1.type | MaskType.HEAP.value) == (e2.type | MaskType.HEAP.value)) - if (e1.type | MaskType.HEAP.value) > 0: - e1.data &= 0x00000000FFFFFFFF - if (e2.type | MaskType.HEAP.value) > 0: - e2.data &= 0x00000000FFFFFFFF report_dataleak(queues[0].callstack, e1, e2) else: # This should never happen. We miss some conditional branches in the code From 347b4df12ac9c9482c72014a58a1bcf0ef8e735c Mon Sep 17 00:00:00 2001 From: Alexander Wagner Date: Sat, 4 Mar 2023 07:53:20 +0100 Subject: [PATCH 11/21] pt/addrtrace: Rm unused masks for heap operations --- pintool/addrtrace.cpp | 8 -------- 1 file changed, 8 deletions(-) diff --git a/pintool/addrtrace.cpp b/pintool/addrtrace.cpp index 78958fec..85d31939 100644 --- a/pintool/addrtrace.cpp +++ b/pintool/addrtrace.cpp @@ -158,14 +158,6 @@ enum entry_type_t { FUNC_EXIT = MASK_BRANCH | C, FUNC_BBL = MASK_BRANCH | D, - MASK_HEAP = 8, - /* Instructions doing memory reads/writes on heap objects */ - HREAD = MASK_HEAP | READ, - HWRITE = MASK_HEAP | WRITE, - /* Heap alloc/free calls */ - HALLOC = MASK_HEAP | C, - HFREE = MASK_HEAP | D, - MASK_LEAK = 16, /* Dataleaks and Controlflow leaks, used for fast recording */ DLEAK = MASK_LEAK | A, From f033d4c4488b465c16295da3ff21ecb648f29de3 Mon Sep 17 00:00:00 2001 From: Alexander Wagner Date: Fri, 3 Mar 2023 17:30:17 +0100 Subject: [PATCH 12/21] pt/addrtrace: Add option to stop tracing within memory allocations --- pintool/addrtrace.cpp | 38 ++++++++++++++++++++++++++++++-------- 1 file changed, 30 insertions(+), 8 deletions(-) diff --git a/pintool/addrtrace.cpp b/pintool/addrtrace.cpp index 85d31939..e76799df 100644 --- a/pintool/addrtrace.cpp +++ b/pintool/addrtrace.cpp @@ -105,6 +105,9 @@ KNOB KnobMain(KNOB_MODE_WRITEONCE, "pintool", "main", "main", "Main method to start tracing. Defaults to 'main'. " "Provide ALL to trace from the beginning."); +KNOB KnobStopTrace(KNOB_MODE_WRITEONCE, "pintool", "stop_trace", "1", + "Stop tracing within memory allocations"); + KNOB KnobDebug(KNOB_MODE_WRITEONCE, "pintool", "debug", "0", "Enable debugging output."); @@ -126,6 +129,10 @@ bool WaitForFirstFunction = false; bool Record = false; bool use_callstack = false; +/* Stop tracing within memory allocations */ +bool StopTrace = true; +bool Trace = true; + /** * Traces are stored in a binary format, containing a sequence of * entry_t entries. @@ -1295,6 +1302,8 @@ VOID RecordMallocBefore(THREADID threadid, VOID *ip, ADDRINT size) { PT_DEBUG(1, "malloc ignored due to realloc_pending (size= " << std::hex << size << ") at " << ip); } + if (StopTrace) + Trace = false; // PIN_MutexUnlock(&lock); } @@ -1311,6 +1320,7 @@ VOID RecordMallocAfter(THREADID threadid, VOID *ip, ADDRINT addr) { alloc_state_t state = thread_state[threadid].malloc_state.back(); thread_state[threadid].malloc_state.pop_back(); doalloc(addr, &state, nullptr); + Trace = true; // PIN_MutexUnlock(&lock); } @@ -1335,6 +1345,8 @@ VOID RecordReallocBefore(THREADID threadid, VOID *ip, ADDRINT addr, .callstack = hash.final().substr(28, 12), /* 6 byte SHA1 hash */ }; thread_state[threadid].realloc_state.push_back(state); + if (StopTrace) + Trace = false; // PIN_MutexUnlock(&lock); } @@ -1352,6 +1364,7 @@ VOID RecordReallocAfter(THREADID threadid, VOID *ip, ADDRINT addr) { thread_state[threadid].realloc_state.pop_back(); doalloc(addr, nullptr, &state); + Trace = true; // PIN_MutexUnlock(&lock); } @@ -1378,6 +1391,8 @@ VOID RecordCallocBefore(THREADID threadid, VOID *ip, ADDRINT nelem, thread_state[threadid].calloc_state.push_back(state); } + if (StopTrace) + Trace = false; // PIN_MutexUnlock(&lock); } @@ -1394,6 +1409,7 @@ VOID RecordCallocAfter(THREADID threadid, VOID *ip, ADDRINT addr) { alloc_state_t state = thread_state[threadid].calloc_state.back(); thread_state[threadid].calloc_state.pop_back(); doalloc(addr, &state, nullptr); + Trace = true; // PIN_MutexUnlock(&lock); } @@ -1407,6 +1423,8 @@ VOID RecordFreeBefore(THREADID threadid, VOID *ip, ADDRINT addr) { DEBUG(2) printCallStack(threadid); // PIN_MutexLock(&lock); dofree(addr); + if (StopTrace) + Trace = false; // PIN_MutexUnlock(&lock); } @@ -1415,7 +1433,10 @@ VOID RecordFreeBefore(THREADID threadid, VOID *ip, ADDRINT addr) { * @param threadid The thread * @param addr The heap pointer which is freed */ -VOID RecordFreeAfter(VOID) { PT_DEBUG(1, "free returned"); } +VOID RecordFreeAfter(VOID) { + PT_DEBUG(1, "free returned"); + Trace = true; +} /***********************************************************************/ /** Instruction recording */ @@ -1430,7 +1451,7 @@ VOID RecordFreeAfter(VOID) { PT_DEBUG(1, "free returned"); } */ VOID RecordMemRead(THREADID threadid, VOID *ip, VOID *addr, bool fast_recording) { - if (!Record) + if (!Record || !Trace) return; // PIN_MutexLock(&lock); entry_t entry; @@ -1457,7 +1478,7 @@ VOID RecordMemRead(THREADID threadid, VOID *ip, VOID *addr, */ VOID RecordMemWrite(THREADID threadid, VOID *ip, VOID *addr, bool fast_recording) { - if (!Record) + if (!Record || !Trace) return; // PIN_MutexLock(&lock); entry_t entry; @@ -1484,7 +1505,7 @@ VOID RecordMemWrite(THREADID threadid, VOID *ip, VOID *addr, * @param target The next instruction (e.g. branch target) */ VOID RecordBranch_unlocked(THREADID threadid, ADDRINT ins, ADDRINT target) { - if (!Record) + if (!Record || !Trace) return; entry_t entry; entry.type = BRANCH; @@ -1548,7 +1569,7 @@ VOID RecordRep(THREADID threadid, ADDRINT bbl, ADDRINT bp, const CONTEXT *ctxt, */ VOID RecordFunctionEntry_unlocked(THREADID threadid, ADDRINT ins, BOOL indirect, ADDRINT target) { - if (!Record) + if (!Record || !Trace) return; entry_t entry; entry.type = FUNC_ENTRY; @@ -1577,7 +1598,7 @@ VOID RecordFunctionEntry(THREADID threadid, ADDRINT bbl, ADDRINT ins, Record = true; WaitForFirstFunction = false; } - if (!Record) + if (!Record || !Trace) return; // PIN_MutexLock(&lock); if (indirect) { @@ -1603,7 +1624,7 @@ VOID RecordFunctionEntry(THREADID threadid, ADDRINT bbl, ADDRINT ins, */ VOID RecordFunctionExit_unlocked(THREADID threadid, ADDRINT ins, ADDRINT target) { - if (!Record) + if (!Record || !Trace) return; entry_t entry; entry.type = FUNC_EXIT; @@ -1627,7 +1648,7 @@ VOID RecordFunctionExit_unlocked(THREADID threadid, ADDRINT ins, */ VOID RecordFunctionExit(THREADID threadid, ADDRINT bbl, ADDRINT ins, const CONTEXT *ctxt, bool fast_recording) { - if (!Record) + if (!Record || !Trace) return; ADDRINT target = ctxt != NULL ? (ADDRINT)PIN_GetContextReg(ctxt, REG_INST_PTR) : 0; @@ -2176,6 +2197,7 @@ int main(int argc, char *argv[]) { PIN_InitSymbols(); DEBUG_LEVEL = KnobDebug.Value(); + StopTrace = KnobStopTrace.Value(); if (KnobLeaks.Value() && KnobCallstack.Value()) { leaks = new CallStack(); From 312762d6795c547d63af1a60c5c2edd8fee2450b Mon Sep 17 00:00:00 2001 From: Alexander Wagner Date: Thu, 2 Mar 2023 22:01:25 +0100 Subject: [PATCH 13/21] pt/addrtrace: Add syscall alloc id macros --- pintool/addrtrace.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pintool/addrtrace.cpp b/pintool/addrtrace.cpp index e76799df..af884a15 100644 --- a/pintool/addrtrace.cpp +++ b/pintool/addrtrace.cpp @@ -120,7 +120,11 @@ KNOB KnobDebug(KNOB_MODE_WRITEONCE, "pintool", "debug", "0", #define MALLOC "malloc" #define REALLOC "realloc" #define CALLOC "calloc" +#define MMAP "mmap" +#define MREMAP "mremap" +#define MUNMAP "munmap" #define FREE "free" +#define BRK "brk" int alloc_instrumented = 0; From 27cca574d8b0784d362143c5be741334a9371488 Mon Sep 17 00:00:00 2001 From: Alexander Wagner Date: Fri, 3 Mar 2023 14:00:03 +0100 Subject: [PATCH 14/21] pt/addrtrace: Add Syscall Entry & Exit tracing --- pintool/addrtrace.cpp | 76 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 76 insertions(+) diff --git a/pintool/addrtrace.cpp b/pintool/addrtrace.cpp index af884a15..7fd21b00 100644 --- a/pintool/addrtrace.cpp +++ b/pintool/addrtrace.cpp @@ -50,6 +50,7 @@ using namespace std; int DEBUG_LEVEL; +int SYSCALL_NUMBER = -1; /***********************************************************************/ @@ -1840,6 +1841,77 @@ VOID instrumentMainAndAlloc(IMG img, VOID *v) { RTN_Close(freeRtn); } +/** + * Handle syscall entry + * We only trace allocation-related syscalls. + * If syscall is not traced SYSCALL_NUMBER is set to -1. + */ +VOID SyscallEntry(THREADID threadid, CONTEXT *ctxt, SYSCALL_STANDARD std, + VOID *v) { + SYSCALL_NUMBER = PIN_GetSyscallNumber(ctxt, std); + + PT_DEBUG(1, "syscall " << hex << PIN_GetContextReg(ctxt, REG_INST_PTR) + << " " << hex << SYSCALL_NUMBER << " " << hex + << PIN_GetSyscallArgument(ctxt, std, 0) << " " << hex + << PIN_GetSyscallArgument(ctxt, std, 1) << " " << hex + << PIN_GetSyscallArgument(ctxt, std, 2) << " " << hex + << PIN_GetSyscallArgument(ctxt, std, 3) << " " << hex + << PIN_GetSyscallArgument(ctxt, std, 4) << " " << hex + << PIN_GetSyscallArgument(ctxt, std, 5)); + + // https://filippo.io/linux-syscall-table/ + switch (SYSCALL_NUMBER) { + case 9: + // MMAP + break; + case 11: + // MUNMAP + break; + case 12: + // BRK + break; + case 25: + // MREMAP + break; + default: + SYSCALL_NUMBER = -1; + PT_INFO("Syscall not catched. syscall number: " + << std::hex << PIN_GetSyscallNumber(ctxt, std)); + break; + } +} + +/** + * Handle syscall exit + */ +VOID SyscallExit(THREADID threadid, CONTEXT *ctxt, SYSCALL_STANDARD std, + VOID *v) { + PT_DEBUG(1, "returns: " << hex << PIN_GetSyscallReturn(ctxt, std)); + + // https://filippo.io/linux-syscall-table/ + switch (SYSCALL_NUMBER) { + case -1: + // Syscall will be dropped, as its number is set to -1 in SyscallEntry + break; + case 9: + // MMAP + break; + case 11: + // MUNMAP + break; + case 12: + // BRK + break; + case 25: + // MREMAP + break; + default: + PT_ERROR("syscall unknown. syscall number: " << SYSCALL_NUMBER); + break; + } + SYSCALL_NUMBER = -1; +} + /** * Instruments instructions operating on memory * @param ins The instruction @@ -2235,6 +2307,10 @@ int main(int argc, char *argv[]) { INS_AddInstrumentFunction(instrumentLeakingInstructions, 0); } + /* Syscall tracing */ + PIN_AddSyscallEntryFunction(SyscallEntry, 0); + PIN_AddSyscallExitFunction(SyscallExit, 0); + /* Getting the stack and vvar address range for this process */ stack.baseaddr = getAddrFromProcMap("stack", 1); stack.endaddr = getAddrFromProcMap("stack", 2); From 610625592329b9585f13b9ad3e74a86526083b29 Mon Sep 17 00:00:00 2001 From: Alexander Wagner Date: Sat, 4 Mar 2023 08:10:25 +0100 Subject: [PATCH 15/21] pt/addrtrace: Add [m, mre]map to thread_state --- pintool/addrtrace.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pintool/addrtrace.cpp b/pintool/addrtrace.cpp index 7fd21b00..254c2ba8 100644 --- a/pintool/addrtrace.cpp +++ b/pintool/addrtrace.cpp @@ -254,6 +254,8 @@ typedef struct { std::vector malloc_state; std::vector calloc_state; std::vector realloc_state; + std::vector mmap_state; + std::vector mremap_state; ADDRINT RetIP; int newbbl; } thread_state_t; From 224395abbce31614e897ebf108dc3e313d59fbd8 Mon Sep 17 00:00:00 2001 From: Alexander Wagner Date: Sat, 4 Mar 2023 08:22:51 +0100 Subject: [PATCH 16/21] pt/addrtrace: Add handlers for [m,mre,mun]map & brk --- pintool/addrtrace.cpp | 186 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 186 insertions(+) diff --git a/pintool/addrtrace.cpp b/pintool/addrtrace.cpp index 254c2ba8..bf25b1e4 100644 --- a/pintool/addrtrace.cpp +++ b/pintool/addrtrace.cpp @@ -1445,6 +1445,192 @@ VOID RecordFreeAfter(VOID) { Trace = true; } +/** + * Record mmap + * @param threadid thread + * @param size size parameter passed to mmap + * @param ret TODO + * @param force + */ +VOID RecordMmapBefore(THREADID threadid, ADDRINT size) { + PT_DEBUG(1, "mmap called with " << std::hex << size); + if (thread_state[threadid].mremap_state.size() != 0) { + PT_DEBUG(1, "mmap ignored due to mremap_pending (size= " + << std::hex << size << ")"); + return; + } + if (thread_state[threadid].malloc_state.size() != 0) { + PT_DEBUG(1, "nested mmap stemming from pending malloc" + << " (size= " << std::hex << size << ")"); + } + if (thread_state[threadid].realloc_state.size() != 0) { + PT_DEBUG(1, "nested mmap stemming from pending realloc" + << " (size= " << std::hex << size << ")"); + } + // PIN_MutexLock(&lock); + SHA1 hash; + hash.update(getCallStack(threadid)); /* calculate the hash of the set of + IPs in the Callstack */ + alloc_state_t state = { + .type = MMAP, + .size = size, + .callstack = hash.final().substr(28, 12), /* 6 byte SHA1 hash */ + }; + + thread_state[threadid].mmap_state.push_back(state); + // PIN_MutexUnlock(&lock); +} + +/** + * Record mmap's result + *@param threadid The thread + * @param addr The allocated heap pointer + */ +VOID RecordMmapAfter(THREADID threadid, ADDRINT addr) { + PT_DEBUG(1, "mmap returned " << std::hex << addr); + if (thread_state[threadid].mremap_state.size() != 0) { + PT_DEBUG(1, "mmap ignored due to mremap_pending"); + return; + } + if (thread_state[threadid].malloc_state.size() != 0 || + thread_state[threadid].realloc_state.size() != 0) { + PT_DEBUG(1, "nested mmap due to [m,re]alloc pending"); + } + // PIN_MutexLock(&lock); + + PT_ASSERT(thread_state[threadid].mmap_state.size() != 0, + "mmap returned but not called"); + + alloc_state_t state = thread_state[threadid].mmap_state.back(); + thread_state[threadid].mmap_state.pop_back(); + + doalloc(addr, &state, nullptr); + + // PIN_MutexUnlock(&lock); +} + +/** + * Record mremap + * @param threadid The thread + * @param addr The heap pointer param of mremap + * @param size The size parameter passed to mremap + */ +VOID RecordMremapBefore(THREADID threadid, ADDRINT addr, ADDRINT old_size, + ADDRINT new_size) { + PT_DEBUG(1, "mremap called with " << std::hex << addr << " " << new_size); + // PIN_MutexLock(&lock); + + SHA1 hash; + hash.update(getCallStack( + threadid)); /* calculte the hash of the set of IPs in the Callstack */ + realloc_state_t state = { + .type = MREMAP, + .old = addr, + .size = new_size, + .callstack = hash.final().substr(28, 12), /* 6 byte SHA1 hash */ + }; + thread_state[threadid].mremap_state.push_back(state); + + // PIN_MutexUnlock(&lock); +} + +/** + * Record mremap's result + * @param threadid The thread + * @param addr The allocated heap pointer + */ +VOID RecordMremapAfter(THREADID threadid, ADDRINT addr) { + PT_DEBUG(1, "mremap returned " << std::hex << addr); + // PIN_MutexLock(&lock); + PT_ASSERT(thread_state[threadid].mremap_state.size() != 0, + "mremap returned but not called"); + + realloc_state_t state = thread_state[threadid].mremap_state.back(); + thread_state[threadid].mremap_state.pop_back(); + + doalloc(addr, nullptr, &state); + // PIN_MutexUnlock(&lock); +} + +/** + * Record munmap + * @param threadid The thread + * @param addr The heap pointer which is munmapped + */ +VOID RecordMunmapBefore(THREADID threadid, ADDRINT addr) { + PT_DEBUG(1, "munmap called with " << std::hex << addr); + DEBUG(2) printCallStack(threadid); + // PIN_MutexLock(&lock); + dofree(addr); + // PIN_MutexUnlock(&lock); +} + +/** + * Record brk's call + *@param threadid The thread + * @param addr The returned program break end address + */ +VOID RecordBrkBefore(THREADID threadid, ADDRINT addr) { + PT_DEBUG(1, "brk called with " << std::hex << addr); + DEBUG(3) printCallStack(threadid); + + // In case addr == 0 a new image "owns" brk + if (addr != 0) { + return; + } + // PIN_MutexLock(&lock); + + program_break_obj_t program_break; + brk_vec.push_back(program_break); + + // PIN_MutexUnlock(&lock); +} + +/** + * Record brk's result + *@param threadid The thread + * @param addr The returned program break end address + */ +VOID RecordBrkAfter(THREADID threadid, ADDRINT addr, ADDRINT ret) { + PT_DEBUG(1, "brk returned from " << std::hex << ret << " with " << std::hex + << addr); + // PIN_MutexLock(&lock); + + imgobj_t img; + for (auto i : imgvec) { + if ((uint64_t)ret < i.baseaddr || (uint64_t)ret >= i.endaddr) { + continue; + } + img = i; + break; + } + + program_break_obj_t program_break = brk_vec.back(); + brk_vec.pop_back(); + + program_break.high = addr; + brk_range.endaddr = addr; + if (program_break.image.name.empty()) { + program_break.image = img; + program_break.low = addr; + PT_INFO("new brk owned by image: " << img.name); + PT_DEBUG(1, "ranging from " << program_break.low << " to " + << program_break.high); + } else if (program_break.image.name.compare(img.name) != 0) { + PT_INFO("brk called before from image: " << program_break.image.name); + PT_INFO("brk called now from image: " << img.name); + PT_ASSERT(false, "brk syscalls called within different images"); + } + + if (brk_range.baseaddr == 0) { + brk_range.baseaddr = addr; + } + + brk_vec.push_back(program_break); + + // PIN_MutexUnlock(&lock); +} + /***********************************************************************/ /** Instruction recording */ /***********************************************************************/ From 3ea855af4897681bc37080ac5808a5a9a96d8d18 Mon Sep 17 00:00:00 2001 From: Alexander Wagner Date: Sat, 4 Mar 2023 08:31:12 +0100 Subject: [PATCH 17/21] pt/addrtrace: Call [m,mre,mun]map & brk within syscall handler --- pintool/addrtrace.cpp | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/pintool/addrtrace.cpp b/pintool/addrtrace.cpp index bf25b1e4..01889db5 100644 --- a/pintool/addrtrace.cpp +++ b/pintool/addrtrace.cpp @@ -2050,16 +2050,23 @@ VOID SyscallEntry(THREADID threadid, CONTEXT *ctxt, SYSCALL_STANDARD std, // https://filippo.io/linux-syscall-table/ switch (SYSCALL_NUMBER) { case 9: - // MMAP + if (PIN_GetSyscallArgument(ctxt, std, 0)) { + PT_INFO("mmap syscall dropped."); + SYSCALL_NUMBER = -1; + break; + } + RecordMmapBefore(threadid, PIN_GetSyscallArgument(ctxt, std, 1)); break; case 11: - // MUNMAP + RecordMunmapBefore(threadid, PIN_GetSyscallArgument(ctxt, std, 0)); break; case 12: - // BRK + RecordBrkBefore(threadid, PIN_GetSyscallArgument(ctxt, std, 0)); break; case 25: - // MREMAP + RecordMremapBefore(threadid, PIN_GetSyscallArgument(ctxt, std, 0), + PIN_GetSyscallArgument(ctxt, std, 1), + PIN_GetSyscallArgument(ctxt, std, 2)); break; default: SYSCALL_NUMBER = -1; @@ -2082,16 +2089,17 @@ VOID SyscallExit(THREADID threadid, CONTEXT *ctxt, SYSCALL_STANDARD std, // Syscall will be dropped, as its number is set to -1 in SyscallEntry break; case 9: - // MMAP + RecordMmapAfter(threadid, PIN_GetSyscallReturn(ctxt, std)); break; case 11: - // MUNMAP + // Handling of munmap exit is not needed. break; case 12: - // BRK + RecordBrkAfter(threadid, PIN_GetSyscallReturn(ctxt, std), + PIN_GetContextReg(ctxt, REG_INST_PTR)); break; case 25: - // MREMAP + RecordMremapAfter(threadid, PIN_GetSyscallReturn(ctxt, std)); break; default: PT_ERROR("syscall unknown. syscall number: " << SYSCALL_NUMBER); From 1c45883fc633ccc900e353446e4cacddeb9d8a50 Mon Sep 17 00:00:00 2001 From: Alexander Wagner Date: Mon, 6 Mar 2023 10:49:40 +0100 Subject: [PATCH 18/21] pt/addrtrace: Exit if no log_addr, except phase 1 For any accessed virtual address, DATA tries to resolve the matching logical address. There seems to be a general memory range that is not mapped to any image, but used generally. DATA allows such an event to occur in phase 1, but will fail if it occurs in any other phase. Experiments have shown that these accessed addresses do not differ between different runs. --- pintool/addrtrace.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/pintool/addrtrace.cpp b/pintool/addrtrace.cpp index 01889db5..40347d42 100644 --- a/pintool/addrtrace.cpp +++ b/pintool/addrtrace.cpp @@ -51,6 +51,7 @@ using namespace std; int DEBUG_LEVEL; int SYSCALL_NUMBER = -1; +bool PHASE_1 = true; /***********************************************************************/ @@ -1077,10 +1078,7 @@ void *getLogicalAddress(void *virt_addr, void *ip) { } PT_WARN("not found addr " << std::hex << (uint64_t)virt_addr); - // TODO - // PT_ASSERT(fast_recording == false, - // "virt_addr was not found despite being in fast_recording - // mode"); + PT_ASSERT(PHASE_1, "virt_addr was not found despite being not in phase 1"); DEBUG(3) printHeap(); DEBUG(4) printProcMap(); return virt_addr; @@ -2469,6 +2467,7 @@ int main(int argc, char *argv[]) { PIN_InitSymbols(); DEBUG_LEVEL = KnobDebug.Value(); + PHASE_1 = KnobLeaks.Value() == false; StopTrace = KnobStopTrace.Value(); if (KnobLeaks.Value() && KnobCallstack.Value()) { From 29756340d9775b5a945f974f85035b1601a62e4d Mon Sep 17 00:00:00 2001 From: Alexander Wagner Date: Mon, 6 Mar 2023 10:59:49 +0100 Subject: [PATCH 19/21] pt/utils: Print Heap and Allocmap at exit --- pintool/utils.H | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/pintool/utils.H b/pintool/utils.H index 014ba0bd..40711b93 100644 --- a/pintool/utils.H +++ b/pintool/utils.H @@ -15,14 +15,16 @@ extern int DEBUG_LEVEL; { \ if (!(x)) { \ printProcMap(); \ + printHeap(); \ + printAllocmap(); \ MESSAGE("[pt-error] ", msg); \ ASSERT(false, "pintool failed."); \ } \ } #define PT_ERROR(msg) PT_ASSERT(false, msg) -// TODO PT_ASSERT -// printheap(); -// print_allocmap(); +void printHeap(); + +void printAllocmap(); #endif From e934450736aafc7fb89c71f499db6ee9b5a7c8c1 Mon Sep 17 00:00:00 2001 From: Alexander Wagner Date: Tue, 14 Feb 2023 13:52:26 +0100 Subject: [PATCH 20/21] pt/call-stack: Log demangled function name --- pintool/call-stack.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pintool/call-stack.cpp b/pintool/call-stack.cpp index ccbfb32b..876e3dd5 100644 --- a/pintool/call-stack.cpp +++ b/pintool/call-stack.cpp @@ -290,7 +290,8 @@ void CallStack::emit_stack(UINT32 depth, vector &out, o << right << dec << setw(2) << id << "# "; o << "0x" << hex << setw(width) << setfill('0') << iter->target() << " "; - o << setw(20) << setfill(' ') << left << info.func_name; + o << setw(20) << setfill(' ') << left + << PIN_UndecorateSymbolName(info.func_name, UNDECORATION_COMPLETE); o << setw(20) << info.image_name; if (_source_location && info.file_name) { o << " at " << info.file_name << ":" << dec << info.line; From 858ee06a602e01938054eab55a0983138f2e9d31 Mon Sep 17 00:00:00 2001 From: Alexander Wagner Date: Thu, 16 Mar 2023 21:25:00 +0100 Subject: [PATCH 21/21] pt/addrtrace: Reuse logical addrs for heap tracing --- pintool/addrtrace.cpp | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/pintool/addrtrace.cpp b/pintool/addrtrace.cpp index 40347d42..a5d4355f 100644 --- a/pintool/addrtrace.cpp +++ b/pintool/addrtrace.cpp @@ -200,6 +200,7 @@ typedef struct { size_t size; uint64_t base; string callstack; + uint64_t count; string hash; } memobj_t; @@ -1090,13 +1091,10 @@ void *getLogicalAddress(void *virt_addr, void *ip) { /** * Calculate sha1-hash and use the 4 bytes of the hash as the memory Index + * Hash shall be unique wrt. calling location */ void calculateSha1Hash(memobj_t *obj) { - PT_DEBUG(2, "HashMap callstack " << obj->callstack); - - /* Hash shall be unique wrt. calling location */ std::stringstream to_hash(obj->type, ios_base::app | ios_base::out); - to_hash << obj->callstack; /** * A hash, i.e. logical base address, shall only occur once. @@ -1104,18 +1102,17 @@ void calculateSha1Hash(memobj_t *obj) { * This count is used together with the calling location to create an * unique hash. */ - std::stringstream count; - count << hex << hashmap[to_hash.str()]; - hashmap[to_hash.str()] += 1; + obj->count = hashmap[obj->callstack]; + hashmap[obj->callstack] += 1; SHA1 hash; - to_hash << count.str(); + to_hash << obj->callstack << hex << obj->count; hash.update(to_hash.str()); obj->hash = hash.final(); - PT_DEBUG(1, "HashMap for " << to_hash.str()); - PT_DEBUG(1, "HashMap count 0x" << count.str()); - PT_DEBUG(1, "Object hash " << hex << obj->hash); + PT_DEBUG(1, "HashMap callstack 0x" << hex << obj->callstack); + PT_DEBUG(1, "HashMap count 0x" << hex << obj->count); + PT_DEBUG(1, "Object hash 0x" << hex << obj->hash); } /** @@ -1177,6 +1174,12 @@ void dofree(ADDRINT addr) { if (it->base != addr) { continue; } + + if ((it->count + 1) == hashmap[it->callstack]) { + PT_DEBUG(2, + "stack-like heap tracing for 0x" << hex << it->callstack); + hashmap[it->callstack] -= 1; + } heap.erase(it); return; }