From c94f2e77055ac2ba1b67de7670310f529d800df8 Mon Sep 17 00:00:00 2001 From: Sebastian Galkin Date: Thu, 10 Oct 2024 11:44:27 -0300 Subject: [PATCH 1/2] python: handle open modes explicitly Also better error message when trying to open an existing store and it doesn't exist --- icechunk-python/python/icechunk/__init__.py | 60 ++++++++++++--------- 1 file changed, 36 insertions(+), 24 deletions(-) diff --git a/icechunk-python/python/icechunk/__init__.py b/icechunk-python/python/icechunk/__init__.py index f4457c3d..c94851fe 100644 --- a/icechunk-python/python/icechunk/__init__.py +++ b/icechunk-python/python/icechunk/__init__.py @@ -2,7 +2,7 @@ from collections.abc import AsyncGenerator, Iterable from typing import Any, Self -from zarr.abc.store import AccessMode, ByteRangeRequest, Store +from zarr.abc.store import ByteRangeRequest, Store from zarr.core.buffer import Buffer, BufferPrototype from zarr.core.common import AccessModeLiteral, BytesLike from zarr.core.sync import SyncMixin @@ -37,16 +37,11 @@ class IcechunkStore(Store, SyncMixin): @classmethod async def open(cls, *args: Any, **kwargs: Any) -> Self: - """FIXME: Better handle the open method based on the access mode the user passed in along with the kwargs - https://github.com/zarr-developers/zarr-python/blob/c878da2a900fc621ff23cc6d84d45cd3cb26cbed/src/zarr/abc/store.py#L24-L30 - """ if "mode" in kwargs: mode = kwargs.pop("mode") else: mode = "r" - access_mode = AccessMode.from_literal(mode) - if "storage" in kwargs: storage = kwargs.pop("storage") else: @@ -54,22 +49,28 @@ async def open(cls, *args: Any, **kwargs: Any) -> Self: "Storage configuration is required. Pass a Storage object to construct an IcechunkStore" ) - store_exists = await pyicechunk_store_exists(storage) - - if access_mode.overwrite: - if store_exists: - raise ValueError( - "Store already exists and overwrite is not allowed for IcechunkStore" - ) - store = await cls.create(storage, mode, *args, **kwargs) - elif access_mode.create or access_mode.update: - if store_exists: + store = None + match mode: + case "r" | "r+": store = await cls.open_existing(storage, mode, *args, **kwargs) - else: - store = await cls.create(storage, mode, *args, **kwargs) - else: - store = await cls.open_existing(storage, mode, *args, **kwargs) - + case "a": + if await pyicechunk_store_exists(storage): + store = await cls.open_existing(storage, mode, *args, **kwargs) + else: + store = await cls.create(storage, mode, *args, **kwargs) + case "w": + if await pyicechunk_store_exists(storage): + store = await cls.open_existing(storage, mode, *args, **kwargs) + await store.clear() + else: + store = await cls.create(storage, mode, *args, **kwargs) + case "w-": + if await pyicechunk_store_exists(storage): + raise ValueError("""Zarr store already exists, open using mode "w" or "r+""""") + else: + store = await cls.create(storage, mode, *args, **kwargs) + + assert(store) # We dont want to call _open() becuase icechunk handles the opening, etc. # if we have gotten this far we can mark it as open store._is_open = True @@ -112,9 +113,20 @@ async def open_existing( """ config = config or StoreConfig() read_only = mode == "r" - store = await pyicechunk_store_open_existing( - storage, read_only=read_only, config=config - ) + # We have delayed checking if the repository exists, to avoid the delay in the happy case + # So we need to check now if open fails, to provide a nice error message + try: + store = await pyicechunk_store_open_existing( + storage, read_only=read_only, config=config + ) + # TODO: we should have an exception type to catch here, for the case of non-existing repo + except Exception as e: + if await pyicechunk_store_exists(storage): + # if the repo exists, this is an actual error we need to raise + raise e + else: + # if the repo doesn't exists, we want to point users to that issue instead + raise ValueError("No Icechunk repository at the provided location, try opening in create mode or changing the location") from None return cls(store=store, mode=mode, args=args, kwargs=kwargs) @classmethod From cb06e2995b640f09267ca381286a8a0e916c42f9 Mon Sep 17 00:00:00 2001 From: Sebastian Galkin Date: Thu, 10 Oct 2024 12:09:34 -0300 Subject: [PATCH 2/2] fix tests --- icechunk-python/tests/test_virtual_ref.py | 2 +- .../tests/test_zarr/test_store/test_icechunk_store.py | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/icechunk-python/tests/test_virtual_ref.py b/icechunk-python/tests/test_virtual_ref.py index 543f01c8..59611878 100644 --- a/icechunk-python/tests/test_virtual_ref.py +++ b/icechunk-python/tests/test_virtual_ref.py @@ -51,7 +51,7 @@ async def test_write_minino_virtual_refs(): allow_http=True, region="us-east-1", ), - mode="r+", + mode="w", config=StoreConfig( virtual_ref_config=VirtualRefConfig.s3_from_config( credentials=S3Credentials( diff --git a/icechunk-python/tests/test_zarr/test_store/test_icechunk_store.py b/icechunk-python/tests/test_zarr/test_store/test_icechunk_store.py index 1103655e..c837d08f 100644 --- a/icechunk-python/tests/test_zarr/test_store/test_icechunk_store.py +++ b/icechunk-python/tests/test_zarr/test_store/test_icechunk_store.py @@ -52,7 +52,7 @@ def store_kwargs( ) -> dict[str, str | None | dict[str, Buffer]]: kwargs = { "storage": StorageConfig.memory(""), - "mode": "r+", + "mode": "w", } return kwargs @@ -68,6 +68,10 @@ def test_store_repr(self, store: IcechunkStore) -> None: def test_serializable_store(self, store: IcechunkStore) -> None: super().test_serializable_store(store) + def test_store_mode(self, store, store_kwargs: dict[str, Any]) -> None: + assert store.mode == AccessMode.from_literal("w") + assert not store.mode.readonly + async def test_not_writable_store_raises(self, store_kwargs: dict[str, Any]) -> None: create_kwargs = {**store_kwargs, "mode": "r"} with pytest.raises(ValueError):