From bc88a1d71877b08a5da3e762dc595399fbdd2038 Mon Sep 17 00:00:00 2001 From: Pablo Galindo Date: Fri, 3 Nov 2023 21:54:36 +0000 Subject: [PATCH] Limit the number of memory records reported For very big files, the generated reporters will hung when trying to process all of the memory records produced. This happens quite a lot in flamegraphs produced from very big files, where the browser cannot display the ploy with millions of points. To help here, add a new parameter to the FileReader class that limits the number of memory records (and therefore temporal snapshots) stored and reported. This should not affect most regular capture files but will help with the very big ones. --- src/memray/_memray.pyi | 5 ++++- src/memray/_memray.pyx | 4 ++-- tests/integration/test_tracking.py | 28 ++++++++++++++++------------ 3 files changed, 22 insertions(+), 15 deletions(-) diff --git a/src/memray/_memray.pyi b/src/memray/_memray.pyi index 09c818ab4b..e54736d63b 100644 --- a/src/memray/_memray.pyi +++ b/src/memray/_memray.pyi @@ -133,7 +133,10 @@ class FileReader: @property def metadata(self) -> Metadata: ... def __init__( - self, file_name: Union[str, Path], *, report_progress: bool = False + self, + file_name: Union[str, Path], + *, + report_progress: bool = False, ) -> None: ... def get_allocation_records(self) -> Iterable[AllocationRecord]: ... def get_temporal_allocation_records( diff --git a/src/memray/_memray.pyx b/src/memray/_memray.pyx index eb7c372fe4..b15cd40752 100644 --- a/src/memray/_memray.pyx +++ b/src/memray/_memray.pyx @@ -880,7 +880,7 @@ cdef class FileReader: n_memory_snapshots_approx = 2048 if 0 < stats["start_time"] < stats["end_time"]: n_memory_snapshots_approx = (stats["end_time"] - stats["start_time"]) / 10 - + if n_memory_snapshots_approx > max_memory_records: n_memory_snapshots_approx = max_memory_records self._memory_snapshots.reserve(n_memory_snapshots_approx) @@ -920,7 +920,7 @@ cdef class FileReader: self._memory_snapshots.push_back(reader.getLatestMemorySnapshot()) else: break - + if len(self._memory_snapshots) > max_memory_records: self._memory_snapshot_bucket = len(self._memory_snapshots) // max_memory_records self._memory_snapshots = self._memory_snapshots[::self._memory_snapshot_bucket] diff --git a/tests/integration/test_tracking.py b/tests/integration/test_tracking.py index b6f685ba0d..4bcb7920c4 100644 --- a/tests/integration/test_tracking.py +++ b/tests/integration/test_tracking.py @@ -1679,22 +1679,26 @@ def test_memory_snapshots_limit_when_reading(self, tmp_path): # WHEN with Tracker(output): - allocator.valloc(ALLOC_SIZE) - time.sleep(0.11) - allocator.free() + for _ in range(2): + allocator.valloc(ALLOC_SIZE) + time.sleep(0.11) + allocator.free() - memory_snapshots = list(FileReader(output).get_memory_snapshots()) + reader = FileReader(output) + memory_snapshots = list(reader.get_memory_snapshots()) + temporal_records = list(reader.get_temporal_allocation_records()) assert memory_snapshots - assert all(record.rss > 0 for record in memory_snapshots) - assert any(record.heap >= ALLOC_SIZE for record in memory_snapshots) - assert sorted(memory_snapshots, key=lambda r: r.time) == memory_snapshots - assert all( - _next.time - prev.time >= 10 - for prev, _next in zip(memory_snapshots, memory_snapshots[1:]) - ) + n_snapshots = len(memory_snapshots) + n_temporal_records = len(temporal_records) - memory_snapshots = list(FileReader(output).get_memory_snapshots()) + reader = FileReader(output, max_memory_records=n_snapshots // 2) + memory_snapshots = list(reader.get_memory_snapshots()) + temporal_records = list(reader.get_temporal_allocation_records()) + + assert memory_snapshots + assert len(memory_snapshots) <= n_snapshots // 2 + 1 + assert len(temporal_records) <= n_temporal_records // 2 + 1 def test_temporary_allocations_when_filling_vector_without_preallocating( self, tmp_path