Skip to content

Commit

Permalink
Limit the number of memory records reported
Browse files Browse the repository at this point in the history
For very big files, the generated reporters will hung when trying to
process all of the memory records produced. This happens quite a lot in
flamegraphs produced from very big files, where the browser cannot
display the ploy with millions of points.

To help here, add a new parameter to the FileReader class that limits
the number of memory records (and therefore temporal snapshots) stored
and reported. This should not affect most regular capture files but will
help with the very big ones.
  • Loading branch information
pablogsal committed Nov 3, 2023
1 parent 9cc9eb1 commit bc88a1d
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 15 deletions.
5 changes: 4 additions & 1 deletion src/memray/_memray.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,10 @@ class FileReader:
@property
def metadata(self) -> Metadata: ...
def __init__(
self, file_name: Union[str, Path], *, report_progress: bool = False
self,
file_name: Union[str, Path],
*,
report_progress: bool = False,
) -> None: ...
def get_allocation_records(self) -> Iterable[AllocationRecord]: ...
def get_temporal_allocation_records(
Expand Down
4 changes: 2 additions & 2 deletions src/memray/_memray.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -880,7 +880,7 @@ cdef class FileReader:
n_memory_snapshots_approx = 2048
if 0 < stats["start_time"] < stats["end_time"]:
n_memory_snapshots_approx = (stats["end_time"] - stats["start_time"]) / 10

if n_memory_snapshots_approx > max_memory_records:
n_memory_snapshots_approx = max_memory_records
self._memory_snapshots.reserve(n_memory_snapshots_approx)
Expand Down Expand Up @@ -920,7 +920,7 @@ cdef class FileReader:
self._memory_snapshots.push_back(reader.getLatestMemorySnapshot())
else:
break

if len(self._memory_snapshots) > max_memory_records:
self._memory_snapshot_bucket = len(self._memory_snapshots) // max_memory_records
self._memory_snapshots = self._memory_snapshots[::self._memory_snapshot_bucket]
Expand Down
28 changes: 16 additions & 12 deletions tests/integration/test_tracking.py
Original file line number Diff line number Diff line change
Expand Up @@ -1679,22 +1679,26 @@ def test_memory_snapshots_limit_when_reading(self, tmp_path):

# WHEN
with Tracker(output):
allocator.valloc(ALLOC_SIZE)
time.sleep(0.11)
allocator.free()
for _ in range(2):
allocator.valloc(ALLOC_SIZE)
time.sleep(0.11)
allocator.free()

memory_snapshots = list(FileReader(output).get_memory_snapshots())
reader = FileReader(output)
memory_snapshots = list(reader.get_memory_snapshots())
temporal_records = list(reader.get_temporal_allocation_records())

assert memory_snapshots
assert all(record.rss > 0 for record in memory_snapshots)
assert any(record.heap >= ALLOC_SIZE for record in memory_snapshots)
assert sorted(memory_snapshots, key=lambda r: r.time) == memory_snapshots
assert all(
_next.time - prev.time >= 10
for prev, _next in zip(memory_snapshots, memory_snapshots[1:])
)
n_snapshots = len(memory_snapshots)
n_temporal_records = len(temporal_records)

memory_snapshots = list(FileReader(output).get_memory_snapshots())
reader = FileReader(output, max_memory_records=n_snapshots // 2)
memory_snapshots = list(reader.get_memory_snapshots())
temporal_records = list(reader.get_temporal_allocation_records())

assert memory_snapshots
assert len(memory_snapshots) <= n_snapshots // 2 + 1
assert len(temporal_records) <= n_temporal_records // 2 + 1

def test_temporary_allocations_when_filling_vector_without_preallocating(
self, tmp_path
Expand Down

0 comments on commit bc88a1d

Please sign in to comment.