From a1a223dcc6dda1f87df35e48078be058e16ea4c2 Mon Sep 17 00:00:00 2001 From: Peter Webb Date: Thu, 14 Mar 2024 19:02:14 -0400 Subject: [PATCH] Stream instead of buffer JSON in read_json/write_json (#96) * Stream instead of buffer JSON in read_json/write_json * Add changelog entry * Mollify mypy * Fix potential encoding issue. --- .../Under the Hood-20240314-161737.yaml | 6 ++++++ dbt_common/clients/system.py | 21 +++++++++++++++++-- dbt_common/utils/executor.py | 4 ++-- 3 files changed, 27 insertions(+), 4 deletions(-) create mode 100644 .changes/unreleased/Under the Hood-20240314-161737.yaml diff --git a/.changes/unreleased/Under the Hood-20240314-161737.yaml b/.changes/unreleased/Under the Hood-20240314-161737.yaml new file mode 100644 index 00000000..d5d58059 --- /dev/null +++ b/.changes/unreleased/Under the Hood-20240314-161737.yaml @@ -0,0 +1,6 @@ +kind: Under the Hood +body: Stream JSON on read/write instead of holding it in memory +time: 2024-03-14T16:17:37.570328-04:00 +custom: + Author: peterallenwebb + Issue: "96" diff --git a/dbt_common/clients/system.py b/dbt_common/clients/system.py index e22305fb..bcf798d2 100644 --- a/dbt_common/clients/system.py +++ b/dbt_common/clients/system.py @@ -292,11 +292,28 @@ def write_file(path: str, contents: str = "") -> bool: def read_json(path: str) -> Dict[str, Any]: - return json.loads(load_file_contents(path)) + path = convert_path(path) + with open(path, "r") as f: + return json.load(f) def write_json(path: str, data: Dict[str, Any]) -> bool: - return write_file(path, json.dumps(data, cls=dbt_common.utils.encoding.JSONEncoder)) + path = convert_path(path) + try: + make_directory(os.path.dirname(path)) + with open(path, "w", encoding="utf-8") as f: + json.dump(data, f, cls=dbt_common.utils.encoding.JSONEncoder) + except Exception as exc: + # See write_file() for an explanation of this error handling. + if os.name == "nt": + if getattr(exc, "winerror", 0) == 3: + reason = "Path was too long" + else: + reason = "Path was possibly too long" + fire_event(SystemCouldNotWrite(path=path, reason=reason, exc=str(exc))) + else: + raise + return True def _windows_rmdir_readonly(func: Callable[[str], Any], path: str, exc: Tuple[Any, OSError, Any]): diff --git a/dbt_common/utils/executor.py b/dbt_common/utils/executor.py index 0be40fcd..0dd8490c 100644 --- a/dbt_common/utils/executor.py +++ b/dbt_common/utils/executor.py @@ -74,6 +74,6 @@ def executor(config: HasThreadingConfig) -> ConnectingExecutor: else: return MultiThreadedExecutor( max_workers=config.threads, - initializer=_thread_initializer, - initargs=(get_invocation_context(),), + initializer=_thread_initializer, # type: ignore + initargs=(get_invocation_context(),), # type: ignore )