Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Avoid creating more backtrace_state* than needed #473

Merged
merged 2 commits into from
Oct 2, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions news/473.bugfix.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fix a memory leak in Memray itself when many different capture files are opened by a single Memray process and native stacks are being reported. This issue primarily affected ``pytest-memray``.
149 changes: 72 additions & 77 deletions src/memray/_memray/native_resolver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,53 +14,54 @@ static const logLevel RESOLVE_LIB_LOG_LEVEL = DEBUG;
static const logLevel RESOLVE_LIB_LOG_LEVEL = WARNING;
#endif

StringStorage::StringStorage()
{
d_interned_data.reserve(4096);
d_interned_data_storage.reserve(4096);
}
std::unordered_set<std::string> InternedString::s_interned_data = []() {
std::unordered_set<std::string> ret;
ret.reserve(4096);
return ret;
}();

size_t
StringStorage::internString(const std::string& str, const char** interned_string)
{
if (str.empty()) {
return 0;
}
std::mutex InternedString::s_mutex;

const size_t id = d_interned_data.size() + 1;
auto inserted = d_interned_data.insert({str, id});
if (interned_string) {
*interned_string = inserted.first->first.c_str();
}
SymbolResolver::BacktraceStateCache SymbolResolver::s_backtrace_states = []() {
SymbolResolver::BacktraceStateCache ret;
ret.reserve(PREALLOCATED_BACKTRACE_STATES);
return ret;
}();

if (!inserted.second) {
return inserted.first->second;
}
// C++11 standard § 23.2.5/8: Rehashing the elements of an unordered associative container
// invalidates iterators, changes ordering between elements, and changes which buckets elements
// appear in, but does not invalidate pointers or references to elements.
d_interned_data_storage.push_back(&inserted.first->first);
std::mutex SymbolResolver::s_backtrace_states_mutex;

return id;
InternedString::InternedString(const std::string& orig)
: d_ref(internString(orig))
{
}

const std::string&
StringStorage::resolveString(size_t index) const
InternedString::get() const
{
assert(index != 0);
return *d_interned_data_storage.at(index - 1);
return d_ref.get();
}

InternedString::operator const std::string&() const
{
return d_ref.get();
}

std::reference_wrapper<const std::string>
InternedString::internString(const std::string& orig)
{
std::lock_guard<std::mutex> lock(s_mutex);
auto inserted = s_interned_data.insert(orig);
return *inserted.first;
}

MemorySegment::MemorySegment(
std::string filename,
InternedString filename,
uintptr_t start,
uintptr_t end,
backtrace_state* state,
size_t filename_index)
: d_filename(std::move(filename))
backtrace_state* state)
: d_filename(filename)
, d_start(start)
, d_end(end)
, d_index(filename_index)
, d_state(state)
{
}
Expand Down Expand Up @@ -107,7 +108,7 @@ MemorySegment::resolveFromSymbolTable(uintptr_t address, MemorySegment::Expanded
auto error_callback = [](void* _data, const char* msg, int errnum) {
auto* data = reinterpret_cast<const CallbackData*>(_data);
LOG(ERROR) << "Error getting backtrace for address " << std::hex << data->address << std::dec
<< " in segment " << data->segment->d_filename << " (errno " << errnum
<< " in segment " << data->segment->d_filename.get() << " (errno " << errnum
<< "): " << msg;
};
backtrace_syminfo(d_state, address, callback, error_callback, &data);
Expand Down Expand Up @@ -157,14 +158,15 @@ MemorySegment::resolveIp(uintptr_t address) const
bool
MemorySegment::operator<(const MemorySegment& segment) const
{
return std::tie(d_start, d_end, d_index) < std::tie(segment.d_start, segment.d_end, segment.d_index);
return std::tie(d_start, d_end, d_filename.get())
< std::tie(segment.d_start, segment.d_end, segment.d_filename.get());
}

bool
MemorySegment::operator!=(const MemorySegment& segment) const
{
return std::tie(d_start, d_end, d_index)
!= std::tie(segment.d_start, segment.d_end, segment.d_index);
return std::tie(d_start, d_end, d_filename.get())
!= std::tie(segment.d_start, segment.d_end, segment.d_filename.get());
}

bool
Expand All @@ -185,45 +187,37 @@ MemorySegment::end() const
return d_end;
}

uintptr_t
MemorySegment::filenameIndex() const
{
return d_index;
}

const std::string&
InternedString
MemorySegment::filename() const
{
return d_filename;
}

ResolvedFrame::ResolvedFrame(
const MemorySegment::Frame& frame,
const std::shared_ptr<StringStorage>& d_string_storage)
: d_string_storage(d_string_storage)
, d_symbol_index(d_string_storage->internString(frame.symbol))
, d_file_index(d_string_storage->internString(frame.filename))
, d_line(frame.lineno)
ResolvedFrame::ResolvedFrame(InternedString symbol, InternedString filename, int lineno)
: d_symbol(symbol)
, d_filename(filename)
, d_line(lineno)
{
}

const std::string&
ResolvedFrame::Symbol() const
{
return d_string_storage->resolveString(d_symbol_index);
return d_symbol;
}

const std::string&
ResolvedFrame::File() const
{
return d_string_storage->resolveString(d_file_index);
return d_filename;
}

int
ResolvedFrame::Line() const
{
return d_line;
}

PyObject*
ResolvedFrame::toPythonObject(python_helpers::PyUnicode_Cache& pystring_cache) const
{
Expand Down Expand Up @@ -255,7 +249,7 @@ ResolvedFrame::toPythonObject(python_helpers::PyUnicode_Cache& pystring_cache) c
const std::string&
ResolvedFrames::memoryMap() const
{
return d_string_storage->resolveString(d_memory_map_index);
return d_interned_memory_map_name;
}

const std::vector<ResolvedFrame>&
Expand All @@ -266,7 +260,6 @@ ResolvedFrames::frames() const

SymbolResolver::SymbolResolver()
{
d_backtrace_states.reserve(PREALLOCATED_BACKTRACE_STATES);
d_resolved_ips_cache.reserve(PREALLOCATED_IPS_CACHE_ITEMS);
}

Expand Down Expand Up @@ -315,27 +308,28 @@ SymbolResolver::resolveFromSegments(uintptr_t ip, size_t generation)
if (expanded_frame.empty()) {
return nullptr;
}
auto segment_index = segment->filenameIndex();
std::transform(
expanded_frame.begin(),
expanded_frame.end(),
std::back_inserter(frames),
[this](const auto& frame) {
return ResolvedFrame{frame, d_string_storage};
[](const auto& frame) {
return ResolvedFrame{
InternedString(frame.symbol),
InternedString(frame.filename),
frame.lineno,
};
});
return std::make_shared<ResolvedFrames>(segment_index, std::move(frames), d_string_storage);
return std::make_shared<ResolvedFrames>(segment->filename(), std::move(frames));
}

void
SymbolResolver::addSegment(
const std::string& filename,
InternedString filename,
backtrace_state* backtrace_state,
const size_t filename_index,
const uintptr_t address_start,
const uintptr_t address_end)
{
currentSegments()
.emplace_back(filename, address_start, address_end, backtrace_state, filename_index);
currentSegments().emplace_back(filename, address_start, address_end, backtrace_state);
d_are_segments_dirty = true;
}

Expand All @@ -345,12 +339,8 @@ SymbolResolver::addSegments(
uintptr_t addr,
const std::vector<tracking_api::Segment>& segments)
{
// We use a char* for the filename to reduce the memory footprint and
// because the libbacktrace callback in findBacktraceState operates on char*
const char* interned_filename = nullptr;
auto filename_index = d_string_storage->internString(filename, &interned_filename);

auto state = findBacktraceState(interned_filename, addr);
InternedString interned_filename(filename);
auto state = getBacktraceState(interned_filename, addr);
if (state == nullptr) {
LOG(RESOLVE_LIB_LOG_LEVEL) << "Failed to prepare a backtrace state for " << filename;
return;
Expand All @@ -359,7 +349,7 @@ SymbolResolver::addSegments(
for (const auto& segment : segments) {
const uintptr_t segment_start = addr + segment.vaddr;
const uintptr_t segment_end = addr + segment.vaddr + segment.memsz;
addSegment(filename, state, filename_index, segment_start, segment_end);
addSegment(interned_filename, state, segment_start, segment_end);
}
}

Expand All @@ -378,13 +368,17 @@ SymbolResolver::clearSegments()
}

backtrace_state*
SymbolResolver::findBacktraceState(const char* filename, uintptr_t address_start)
SymbolResolver::getBacktraceState(InternedString interned_filename, uintptr_t address_start)
{
// We hash into "d_backtrace_states" using a char* as it's safe on the condition that every
// const char* used as a key in the map is one that was returned by "d_string_storage",
// and it's safe because no pointer that's returned by "d_string_storage" is ever invalidated.
auto it = d_backtrace_states.find(filename);
if (it != d_backtrace_states.end()) {
// We hash into "s_backtrace_states" using a `const char*`. This is safe
// because every `const char*` we save is owned by an interned string.
const char* filename = interned_filename.get().c_str();
auto key = std::make_pair(filename, address_start);

std::lock_guard<std::mutex> lock(s_backtrace_states_mutex);

auto it = s_backtrace_states.find(key);
if (it != s_backtrace_states.end()) {
return it->second;
}

Expand All @@ -400,7 +394,7 @@ SymbolResolver::findBacktraceState(const char* filename, uintptr_t address_start
<< "(errno " << errnum << "): " << msg;
};

auto state = backtrace_create_state(data.fileName, false, errorHandler, &data);
auto state = backtrace_create_state(data.fileName, true, errorHandler, &data);

if (!state) {
return nullptr;
Expand Down Expand Up @@ -447,7 +441,8 @@ SymbolResolver::findBacktraceState(const char* filename, uintptr_t address_start
return nullptr;
#endif
}
d_backtrace_states.insert(it, {filename, state});

s_backtrace_states.insert(it, {key, state});
return state;
}

Expand Down
Loading
Loading