Skip to content

Commit

Permalink
Try to respect RPATHS of calling dlopen modules with dlinfo
Browse files Browse the repository at this point in the history
This commit is just for backup on our main strategy of using dlinfo
instead of dlopen.

Signed-off-by: Pablo Galindo <[email protected]>
  • Loading branch information
pablogsal committed Sep 3, 2024
1 parent 91d7016 commit a81955c
Show file tree
Hide file tree
Showing 3 changed files with 73 additions and 66 deletions.
1 change: 1 addition & 0 deletions news/549.bugfix.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fix a deadlock in libc between memray locks and dlopen internal locks.
134 changes: 70 additions & 64 deletions src/memray/_memray/hooks.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -306,75 +306,82 @@ posix_memalign(void** memptr, size_t alignment, size_t size) noexcept
return ret;
}

// We need to override dlopen/dlclose to account for new shared libraries being
// loaded in the process memory space. This is needed so we can correctly track
// allocations in those libraries by overriding their PLT entries and also so we
// can properly map the addresses of the symbols in those libraries when we
// resolve later native traces. Unfortunately, we can't just override dlopen
// directly because of the following edge case: when a shared library dlopen's
// another by name (e.g. dlopen("libfoo.so")), the dlopen call will honor the
// RPATH/RUNPATH of the calling library if it's set. Some libraries set an
// RPATH/RUNPATH based on $ORIGIN (the path of the calling library) to load
// dependencies from a relative directory based on the location of the calling
// library. This means that if we override dlopen, we'll end up loading the
// library from the wrong path or more likely, not loading it at all because the
// dynamic loader will think the memray extenion it's the calling library and
// the RPATH of the real calling library will not be honoured.
//
// To work around this, we override dlsym instead and override the symbols in
// the loaded libraries only the first time we have seen a handle passed to
// dlsym. This works because for a symbol from a given dlopen-ed library to
// appear in a call stack, *something* from that library has to be dlsym-ed
// first. The only exception to this are static initializers, but we cannot
// track those anyway by overriding dlopen as they run within the dlopen call
// itself.
// There's another set of cases we would miss: if library A has a static initializer
// that passes a pointer to one of its functions to library B, and library B stores
// that function pointer, then we could see calls into library A via the function pointer
// held by library B, even though dlsym was never called on library A. This should be
// very rare and will be corrected the next time library B calls dlsym so this should
// not be a problem in practice.

class DlsymCache
{
public:
auto insert(const void* handle)
{
std::unique_lock lock(mutex_);
return d_handles.insert(handle);
}

void erase(const void* handle)
{
std::unique_lock lock(mutex_);
d_handles.erase(handle);
}

private:
mutable std::mutex mutex_;
std::unordered_set<const void*> d_handles;
};

static DlsymCache dlsym_cache;

void*
dlsym(void* handle, const char* symbol) noexcept
dlopen(const char* filename, int flag) noexcept
{
assert(MEMRAY_ORIG(dlsym));
void* ret;
assert(MEMRAY_ORIG(dlopen));
void* ret = nullptr;
{
tracking_api::RecursionGuard guard;
ret = MEMRAY_ORIG(dlsym)(handle, symbol);
#if defined(__GLIBC__)
if (filename != nullptr && filename[0] != '\0' && std::strchr(filename, '/') == nullptr) {
void* const callerAddr = __builtin_extract_return_addr(__builtin_return_address(0));

Dl_info info;
if (dladdr(callerAddr, &info)) {
const char* dlname = info.dli_fname;
{
struct stat dlstat, exestat;
// Check fi we are being called from the main executable
Dl_info main_info;
void* main_sym = NULL;
void* self_handle = dlopen(nullptr, RTLD_LAZY | RTLD_NOLOAD);
if (self_handle) {
main_sym = dlsym(self_handle, "main");
dlclose(self_handle);
}
if (main_sym && dladdr(main_sym, &main_info)
&& strcmp(main_info.dli_fname, info.dli_fname) == 0)
{
dlname = nullptr;
}
}

void* caller = MEMRAY_ORIG(dlopen)(dlname, RTLD_LAZY | RTLD_NOLOAD);
if (caller != nullptr) {
Dl_serinfo size;
if (dlinfo(caller, RTLD_DI_SERINFOSIZE, &size) == 0) {
auto* paths = reinterpret_cast<Dl_serinfo*>(new char[size.dls_size]);
*paths = size;
if (dlinfo(caller, RTLD_DI_SERINFO, paths) == 0) {
for (unsigned i = 0; i != paths->dls_cnt; ++i) {
const char* name = paths->dls_serpath[i].dls_name;
std::string path;
if (name == nullptr || name[0] == '\0') {
// In the dynamic linking search path, an
// empty entry typically represents the
// current working directory ($PWD).
path = filename;
}
path = name;
if (path.back() != '/') {
path += '/';
}
path += filename;
ret = MEMRAY_ORIG(dlopen)(path.c_str(), flag);
if (ret) {
break;
}
}
}
delete[] reinterpret_cast<char*>(paths);
}
dlclose(caller);
}
}
}
#endif
// Fallback if we found nothing
if (ret == nullptr) {
ret = MEMRAY_ORIG(dlopen)(filename, flag);
}
}
if (ret) {
auto [_, inserted] = dlsym_cache.insert(handle);
if (inserted) {
tracking_api::Tracker::invalidate_module_cache();
if (symbol
&& (0 == strcmp(symbol, "PyInit_greenlet") || 0 == strcmp(symbol, "PyInit__greenlet")))
{
tracking_api::Tracker::beginTrackingGreenlets();
}
tracking_api::Tracker::invalidate_module_cache();
if (filename
&& (nullptr != strstr(filename, "/_greenlet.") || nullptr != strstr(filename, "/greenlet.")))
{
tracking_api::Tracker::beginTrackingGreenlets();
}
}
return ret;
Expand All @@ -390,7 +397,6 @@ dlclose(void* handle) noexcept
tracking_api::RecursionGuard guard;
ret = MEMRAY_ORIG(dlclose)(handle);
}
dlsym_cache.erase(handle);
tracking_api::NativeTrace::flushCache();
if (!ret) tracking_api::Tracker::invalidate_module_cache();
return ret;
Expand Down
4 changes: 2 additions & 2 deletions src/memray/_memray/hooks.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
FOR_EACH_HOOKED_FUNCTION(aligned_alloc) \
FOR_EACH_HOOKED_FUNCTION(mmap) \
FOR_EACH_HOOKED_FUNCTION(munmap) \
FOR_EACH_HOOKED_FUNCTION(dlsym) \
FOR_EACH_HOOKED_FUNCTION(dlopen) \
FOR_EACH_HOOKED_FUNCTION(dlclose) \
FOR_EACH_HOOKED_FUNCTION(PyGILState_Ensure) \
MEMRAY_PLATFORM_HOOKED_FUNCTIONS
Expand Down Expand Up @@ -179,7 +179,7 @@ void*
pvalloc(size_t size) noexcept;

void*
dlsym(void* handle, const char* symbol) noexcept;
dlopen(const char* filename, int flag) noexcept;

int
dlclose(void* handle) noexcept;
Expand Down

0 comments on commit a81955c

Please sign in to comment.