From 03ddecf2ac230e37b3060d25d81557f63e787e33 Mon Sep 17 00:00:00 2001 From: Pablo Galindo Date: Fri, 3 Nov 2023 21:54:36 +0000 Subject: [PATCH] Limit the number of memory records reported For very big files, the generated reporters will hung when trying to process all of the memory records produced. This happens quite a lot in flamegraphs produced from very big files, where the browser cannot display the ploy with millions of points. To help here, add a new parameter to the FileReader class that limits the number of memory records (and therefore temporal snapshots) stored and reported. This should not affect most regular capture files but will help with the very big ones. Signed-off-by: Pablo Galindo --- news/491.bugfix.rst | 1 + src/memray/_memray.pyi | 5 ++++- src/memray/_memray.pyx | 10 ++++++++++ tests/integration/test_tracking.py | 28 ++++++++++++++++++++++++++++ 4 files changed, 43 insertions(+), 1 deletion(-) create mode 100644 news/491.bugfix.rst diff --git a/news/491.bugfix.rst b/news/491.bugfix.rst new file mode 100644 index 0000000000..85a8b24ce1 --- /dev/null +++ b/news/491.bugfix.rst @@ -0,0 +1 @@ +Limit the number of memory records displayed in reporters by default. This will help displaying flamegraphs for very big files. diff --git a/src/memray/_memray.pyi b/src/memray/_memray.pyi index 09c818ab4b..e54736d63b 100644 --- a/src/memray/_memray.pyi +++ b/src/memray/_memray.pyi @@ -133,7 +133,10 @@ class FileReader: @property def metadata(self) -> Metadata: ... def __init__( - self, file_name: Union[str, Path], *, report_progress: bool = False + self, + file_name: Union[str, Path], + *, + report_progress: bool = False, ) -> None: ... def get_allocation_records(self) -> Iterable[AllocationRecord]: ... def get_temporal_allocation_records( diff --git a/src/memray/_memray.pyx b/src/memray/_memray.pyx index 4dafb2ae4a..337342509c 100644 --- a/src/memray/_memray.pyx +++ b/src/memray/_memray.pyx @@ -879,6 +879,9 @@ cdef class FileReader: n_memory_snapshots_approx = 2048 if 0 < stats["start_time"] < stats["end_time"]: n_memory_snapshots_approx = (stats["end_time"] - stats["start_time"]) / 10 + + if n_memory_snapshots_approx > max_memory_records: + n_memory_snapshots_approx = max_memory_records self._memory_snapshots.reserve(n_memory_snapshots_approx) cdef object total = stats['n_allocations'] or None @@ -915,6 +918,13 @@ cdef class FileReader: self._memory_snapshots.push_back(reader.getLatestMemorySnapshot()) else: break +<<<<<<< HEAD +======= + + if len(self._memory_snapshots) > max_memory_records: + self._memory_snapshot_bucket = len(self._memory_snapshots) // max_memory_records + self._memory_snapshots = self._memory_snapshots[::self._memory_snapshot_bucket] +>>>>>>> bc88a1d (Limit the number of memory records reported) self._high_watermark = finder.getHighWatermark() stats["n_allocations"] = progress_indicator.num_processed diff --git a/tests/integration/test_tracking.py b/tests/integration/test_tracking.py index 5e17500154..4bcb7920c4 100644 --- a/tests/integration/test_tracking.py +++ b/tests/integration/test_tracking.py @@ -1672,6 +1672,34 @@ def test_memory_snapshots_tick_interval(self, tmp_path): for prev, _next in zip(memory_snapshots, memory_snapshots[1:]) ) + def test_memory_snapshots_limit_when_reading(self, tmp_path): + # GIVEN + allocator = MemoryAllocator() + output = tmp_path / "test.bin" + + # WHEN + with Tracker(output): + for _ in range(2): + allocator.valloc(ALLOC_SIZE) + time.sleep(0.11) + allocator.free() + + reader = FileReader(output) + memory_snapshots = list(reader.get_memory_snapshots()) + temporal_records = list(reader.get_temporal_allocation_records()) + + assert memory_snapshots + n_snapshots = len(memory_snapshots) + n_temporal_records = len(temporal_records) + + reader = FileReader(output, max_memory_records=n_snapshots // 2) + memory_snapshots = list(reader.get_memory_snapshots()) + temporal_records = list(reader.get_temporal_allocation_records()) + + assert memory_snapshots + assert len(memory_snapshots) <= n_snapshots // 2 + 1 + assert len(temporal_records) <= n_temporal_records // 2 + 1 + def test_temporary_allocations_when_filling_vector_without_preallocating( self, tmp_path ):