From 86b75db82ef12f7c2999f75245189853a6a4467e Mon Sep 17 00:00:00 2001 From: Zach Liu Date: Thu, 1 Aug 2024 12:17:54 -0400 Subject: [PATCH] get data size in memory for better logs (#7090) --- redash/tasks/queries/execution.py | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/redash/tasks/queries/execution.py b/redash/tasks/queries/execution.py index cba2aded07..a863903cdb 100644 --- a/redash/tasks/queries/execution.py +++ b/redash/tasks/queries/execution.py @@ -1,5 +1,7 @@ import signal +import sys import time +from collections import deque import redis from rq import get_current_job @@ -145,6 +147,30 @@ def _resolve_user(user_id, is_api_key, query_id): return None +def _get_size_iterative(dict_obj): + """Iteratively finds size of objects in bytes""" + seen = set() + size = 0 + objects = deque([dict_obj]) + + while objects: + current = objects.popleft() + if id(current) in seen: + continue + seen.add(id(current)) + size += sys.getsizeof(current) + + if isinstance(current, dict): + objects.extend(current.keys()) + objects.extend(current.values()) + elif hasattr(current, "__dict__"): + objects.append(current.__dict__) + elif hasattr(current, "__iter__") and not isinstance(current, (str, bytes, bytearray)): + objects.extend(current) + + return size + + class QueryExecutor: def __init__(self, query, data_source_id, user_id, is_api_key, metadata, is_scheduled_query): self.job = get_current_job() @@ -195,7 +221,7 @@ def run(self): "job=execute_query query_hash=%s ds_id=%d data_length=%s error=[%s]", self.query_hash, self.data_source_id, - data and len(data), + data and _get_size_iterative(data), error, )