From d1699a46ebdeaed149d6695a5f17d6ebef1ad422 Mon Sep 17 00:00:00 2001 From: Ian Thomas Date: Thu, 7 Sep 2023 16:55:15 +0100 Subject: [PATCH 1/2] Automatically delete temporary file cache --- fsspec/implementations/cached.py | 12 +++++- fsspec/implementations/tests/test_cached.py | 46 +++++++++++++++++++++ 2 files changed, 57 insertions(+), 1 deletion(-) diff --git a/fsspec/implementations/cached.py b/fsspec/implementations/cached.py index d16e22707..b679cce51 100644 --- a/fsspec/implementations/cached.py +++ b/fsspec/implementations/cached.py @@ -5,6 +5,7 @@ import os import tempfile import time +import weakref from shutil import rmtree from typing import TYPE_CHECKING, Any, Callable, ClassVar @@ -111,7 +112,9 @@ def __init__( "Both filesystems (fs) and target_protocol may not be both given." ) if cache_storage == "TMP": - storage = [tempfile.mkdtemp()] + tempdir = tempfile.mkdtemp() + storage = [tempdir] + weakref.finalize(self, self._remove_tempdir, tempdir) else: if isinstance(cache_storage, str): storage = [cache_storage] @@ -152,6 +155,13 @@ def _strip_protocol(path): self._strip_protocol: Callable = _strip_protocol + @staticmethod + def _remove_tempdir(tempdir): + try: + rmtree(tempdir) + except Exception: + pass + def _mkcache(self): os.makedirs(self.storage[-1], exist_ok=True) diff --git a/fsspec/implementations/tests/test_cached.py b/fsspec/implementations/tests/test_cached.py index 3307495b1..e26c6251b 100644 --- a/fsspec/implementations/tests/test_cached.py +++ b/fsspec/implementations/tests/test_cached.py @@ -1115,3 +1115,49 @@ def test_getitems_errors(tmpdir): assert m.getitems(["afile", "bfile"], on_error="omit") == {"afile": b"test"} with pytest.raises(FileNotFoundError): m.getitems(["afile", "bfile"]) + + +def task_cache_dir_auto_deleted(tmpdir, cache_storage, queue): + source = os.path.join(tmpdir, "source") + afile = os.path.join(source, "afile") + os.mkdir(source) + open(afile, "w").write("test") + + fs = fsspec.filesystem( + "filecache", + target_protocol="file", + cache_storage=cache_storage, + ) + + cache_dir = fs.storage[-1] + + # Force cache to be created + with fs.open(afile, "rb") as f: + assert f.read(5) == b"test" + + # Confirm cache exists + local = fsspec.filesystem("file") + assert local.exists(cache_dir) + + queue.put(cache_dir) + + +@pytest.mark.parametrize("temp_cache", [False, True]) +def test_cache_dir_auto_deleted(temp_cache, tmpdir): + # Run cache creation in separate process to ensure it is deleted + import multiprocessing as mp + + queue = mp.SimpleQueue() + cache_storage = "TMP" if temp_cache else os.path.join(tmpdir, "cache") + process = mp.Process( + target=task_cache_dir_auto_deleted, args=(tmpdir, cache_storage, queue) + ) + process.start() + process.join() + + cache_dir = queue.get() + local = fsspec.filesystem("file") + if temp_cache: + assert not local.exists(cache_dir) + else: + assert local.exists(cache_dir) From 74529d85788b4473d91d3d31cd20af36fa656c4d Mon Sep 17 00:00:00 2001 From: Ian Thomas Date: Fri, 8 Sep 2023 10:31:20 +0100 Subject: [PATCH 2/2] Simpler test --- fsspec/implementations/tests/test_cached.py | 29 +++++++-------------- 1 file changed, 10 insertions(+), 19 deletions(-) diff --git a/fsspec/implementations/tests/test_cached.py b/fsspec/implementations/tests/test_cached.py index e26c6251b..ee69045c3 100644 --- a/fsspec/implementations/tests/test_cached.py +++ b/fsspec/implementations/tests/test_cached.py @@ -1117,7 +1117,10 @@ def test_getitems_errors(tmpdir): m.getitems(["afile", "bfile"]) -def task_cache_dir_auto_deleted(tmpdir, cache_storage, queue): +@pytest.mark.parametrize("temp_cache", [False, True]) +def test_cache_dir_auto_deleted(temp_cache, tmpdir): + import gc + source = os.path.join(tmpdir, "source") afile = os.path.join(source, "afile") os.mkdir(source) @@ -1126,7 +1129,8 @@ def task_cache_dir_auto_deleted(tmpdir, cache_storage, queue): fs = fsspec.filesystem( "filecache", target_protocol="file", - cache_storage=cache_storage, + cache_storage="TMP" if temp_cache else os.path.join(tmpdir, "cache"), + skip_instance_cache=True, # Important to avoid fs itself being cached ) cache_dir = fs.storage[-1] @@ -1139,24 +1143,11 @@ def task_cache_dir_auto_deleted(tmpdir, cache_storage, queue): local = fsspec.filesystem("file") assert local.exists(cache_dir) - queue.put(cache_dir) - - -@pytest.mark.parametrize("temp_cache", [False, True]) -def test_cache_dir_auto_deleted(temp_cache, tmpdir): - # Run cache creation in separate process to ensure it is deleted - import multiprocessing as mp - - queue = mp.SimpleQueue() - cache_storage = "TMP" if temp_cache else os.path.join(tmpdir, "cache") - process = mp.Process( - target=task_cache_dir_auto_deleted, args=(tmpdir, cache_storage, queue) - ) - process.start() - process.join() + # Delete file system + del fs + gc.collect() - cache_dir = queue.get() - local = fsspec.filesystem("file") + # Ensure cache has been deleted, if it is temporary if temp_cache: assert not local.exists(cache_dir) else: