From 88cffa3f9be42507f6f577c30070d094f70c3374 Mon Sep 17 00:00:00 2001 From: Sebastian Galkin Date: Tue, 8 Oct 2024 09:18:02 -0300 Subject: [PATCH 1/4] Fix many issues reported by repo-review In preparation for launch --- .github/dependabot.yml | 4 ++ .github/workflows/python-ci.yaml | 4 ++ .github/workflows/rust-ci.yaml | 4 ++ icechunk-python/examples/dask_write.py | 39 ++++++++++--------- icechunk-python/examples/smoke-test.py | 18 ++++----- .../notebooks/demo-dummy-data.ipynb | 3 +- icechunk-python/notebooks/demo-s3.ipynb | 6 +-- .../notebooks/version-control.ipynb | 1 - icechunk-python/pyproject.toml | 22 +++++++++++ icechunk-python/python/icechunk/__init__.py | 18 ++++----- .../python/icechunk/_icechunk_python.pyi | 9 ++--- icechunk-python/tests/conftest.py | 2 +- icechunk-python/tests/test_concurrency.py | 4 +- .../tests/test_distributed_writers.py | 15 +++---- icechunk-python/tests/test_pickle.py | 5 +-- icechunk-python/tests/test_timetravel.py | 3 +- icechunk-python/tests/test_virtual_ref.py | 10 ++--- icechunk-python/tests/test_zarr/test_api.py | 5 +-- icechunk-python/tests/test_zarr/test_array.py | 5 +-- icechunk-python/tests/test_zarr/test_group.py | 20 ++++------ .../tests/test_zarr/test_store/test_core.py | 1 - .../test_store/test_icechunk_store.py | 24 ++++-------- 22 files changed, 117 insertions(+), 105 deletions(-) diff --git a/.github/dependabot.yml b/.github/dependabot.yml index cc0cdfd8..f226efb9 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -18,3 +18,7 @@ updates: day: "monday" time: "05:00" timezone: "US/Pacific" + groups: + actions: + patterns: + - "*" diff --git a/.github/workflows/python-ci.yaml b/.github/workflows/python-ci.yaml index cc6d5e11..78dd4851 100644 --- a/.github/workflows/python-ci.yaml +++ b/.github/workflows/python-ci.yaml @@ -15,6 +15,10 @@ on: pull_request: workflow_dispatch: +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + permissions: contents: read diff --git a/.github/workflows/rust-ci.yaml b/.github/workflows/rust-ci.yaml index 48843b67..93dc5c98 100644 --- a/.github/workflows/rust-ci.yaml +++ b/.github/workflows/rust-ci.yaml @@ -9,6 +9,10 @@ on: branches: - main +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + env: CARGO_INCREMENTAL: 0 CARGO_NET_RETRY: 10 diff --git a/icechunk-python/examples/dask_write.py b/icechunk-python/examples/dask_write.py index 59ad60f8..ca129620 100644 --- a/icechunk-python/examples/dask_write.py +++ b/icechunk-python/examples/dask_write.py @@ -11,14 +11,15 @@ """ import argparse -from dataclasses import dataclass -import time import asyncio -import zarr -from dask.distributed import Client -import numpy as np +import time +from dataclasses import dataclass +from typing import cast import icechunk +import numpy as np +import zarr +from dask.distributed import Client @dataclass @@ -57,7 +58,7 @@ async def execute_write_task(task: Task): store = await mk_store("w", task) group = zarr.group(store=store, overwrite=False) - array = group["array"] + array = cast(zarr.Array, group["array"]) print(f"Writing at t={task.time}") data = generate_task_array(task, array.shape[0:2]) array[:, :, task.time] = data @@ -72,7 +73,7 @@ async def execute_read_task(task: Task): print(f"Reading t={task.time}") store = await mk_store("r", task) group = zarr.group(store=store, overwrite=False) - array = group["array"] + array = cast(zarr.Array, group["array"]) actual = array[:, :, task.time] expected = generate_task_array(task, array.shape[0:2]) @@ -235,9 +236,7 @@ async def distributed_write(): help="size of chunks in the y dimension", default=112, ) - create_parser.add_argument( - "--name", type=str, help="repository name", required=True - ) + create_parser.add_argument("--name", type=str, help="repository name", required=True) create_parser.set_defaults(command="create") update_parser = subparsers.add_parser("update", help="add chunks to the array") @@ -256,9 +255,7 @@ async def distributed_write(): update_parser.add_argument( "--workers", type=int, help="number of workers to use", required=True ) - update_parser.add_argument( - "--name", type=str, help="repository name", required=True - ) + update_parser.add_argument("--name", type=str, help="repository name", required=True) update_parser.add_argument( "--max-sleep", type=float, @@ -275,7 +272,11 @@ async def distributed_write(): "--sleep-tasks", type=int, help="this many tasks sleep", default=0 ) update_parser.add_argument( - "--distributed-cluster", type=bool, help="use multiple machines", action=argparse.BooleanOptionalAction, default=False + "--distributed-cluster", + type=bool, + help="use multiple machines", + action=argparse.BooleanOptionalAction, + default=False, ) update_parser.set_defaults(command="update") @@ -295,11 +296,13 @@ async def distributed_write(): verify_parser.add_argument( "--workers", type=int, help="number of workers to use", required=True ) + verify_parser.add_argument("--name", type=str, help="repository name", required=True) verify_parser.add_argument( - "--name", type=str, help="repository name", required=True - ) - verify_parser.add_argument( - "--distributed-cluster", type=bool, help="use multiple machines", action=argparse.BooleanOptionalAction, default=False + "--distributed-cluster", + type=bool, + help="use multiple machines", + action=argparse.BooleanOptionalAction, + default=False, ) verify_parser.set_defaults(command="verify") diff --git a/icechunk-python/examples/smoke-test.py b/icechunk-python/examples/smoke-test.py index 3247f1e0..14d3905e 100644 --- a/icechunk-python/examples/smoke-test.py +++ b/icechunk-python/examples/smoke-test.py @@ -1,17 +1,15 @@ import asyncio -from typing import Literal -from zarr.storage import LocalStore, MemoryStore, RemoteStore import math +import random +import string +import time +from typing import Literal import numpy as np import zarr -import time - +from icechunk import IcechunkStore, S3Credentials, StorageConfig, StoreConfig from zarr.abc.store import Store - -from icechunk import IcechunkStore, StorageConfig, S3Credentials, StoreConfig -import random -import string +from zarr.storage import LocalStore, MemoryStore, RemoteStore def rdms(n): @@ -149,7 +147,7 @@ async def run(store: Store) -> None: assert isinstance(array, zarr.Array) print( - f"numchunks: {math.prod(s // c for s, c in zip(array.shape, array.chunks))}" + f"numchunks: {math.prod(s // c for s, c in zip(array.shape, array.chunks, strict=False))}" ) np.testing.assert_array_equal(array[:], value) @@ -176,7 +174,7 @@ async def create_zarr_store(*, store: Literal["memory", "local", "s3"]) -> Store "key": "minio123", "secret": "minio123", "region": "us-east-1", - "endpoint_url": "http://localhost:9000" + "endpoint_url": "http://localhost:9000", }, ) diff --git a/icechunk-python/notebooks/demo-dummy-data.ipynb b/icechunk-python/notebooks/demo-dummy-data.ipynb index 5b98d361..b6319158 100644 --- a/icechunk-python/notebooks/demo-dummy-data.ipynb +++ b/icechunk-python/notebooks/demo-dummy-data.ipynb @@ -21,7 +21,6 @@ "\n", "import numpy as np\n", "import zarr\n", - "\n", "from icechunk import IcechunkStore, StorageConfig" ] }, @@ -1381,7 +1380,7 @@ " tic = time.time()\n", " array = root_group[key]\n", " assert array.dtype == value.dtype, (array.dtype, value.dtype)\n", - " print(f\"numchunks: {math.prod(s // c for s, c in zip(array.shape, array.chunks))}\")\n", + " print(f\"numchunks: {math.prod(s // c for s, c in zip(array.shape, array.chunks, strict=False))}\")\n", " np.testing.assert_array_equal(array[:], value)\n", " print(time.time() - tic)" ] diff --git a/icechunk-python/notebooks/demo-s3.ipynb b/icechunk-python/notebooks/demo-s3.ipynb index c43df6d4..93ad64af 100644 --- a/icechunk-python/notebooks/demo-s3.ipynb +++ b/icechunk-python/notebooks/demo-s3.ipynb @@ -18,7 +18,6 @@ "outputs": [], "source": [ "import zarr\n", - "\n", "from icechunk import IcechunkStore, StorageConfig" ] }, @@ -1148,7 +1147,6 @@ "outputs": [], "source": [ "import zarr\n", - "\n", "from icechunk import IcechunkStore, StorageConfig\n", "\n", "# TODO: catalog will handle this\n", @@ -1298,8 +1296,8 @@ "metadata": {}, "outputs": [], "source": [ - "import matplotlib.pyplot as plt\n", - "import matplotlib as mpl" + "import matplotlib as mpl\n", + "import matplotlib.pyplot as plt" ] }, { diff --git a/icechunk-python/notebooks/version-control.ipynb b/icechunk-python/notebooks/version-control.ipynb index 6b94072c..4749ada1 100644 --- a/icechunk-python/notebooks/version-control.ipynb +++ b/icechunk-python/notebooks/version-control.ipynb @@ -16,7 +16,6 @@ "outputs": [], "source": [ "import zarr\n", - "\n", "from icechunk import IcechunkStore, StorageConfig" ] }, diff --git a/icechunk-python/pyproject.toml b/icechunk-python/pyproject.toml index f0ee68d9..a302a709 100644 --- a/icechunk-python/pyproject.toml +++ b/icechunk-python/pyproject.toml @@ -34,7 +34,29 @@ python-source = "python" [tool.pytest.ini_options] asyncio_mode = "auto" +minversion = "7" +testpaths = ["tests"] +log_cli_level = "INFO" +xfail_strict = true +addopts = ["-ra", "--strict-config", "--strict-markers"] +filterwarnings = ["error"] [tool.pyright] venvPath = "." venv = ".venv" + +[tool.mypy] +python_version = "3.10" +strict = true +warn_unreachable = true +enable_error_code = ["ignore-without-code", "redundant-expr", "truthy-bool"] + +[tool.ruff] +line-length = 90 + +[tool.ruff.lint] +extend-select = [ + "B", # flake8-bugbear + "I", # isort + "UP", # pypupgrade +] diff --git a/icechunk-python/python/icechunk/__init__.py b/icechunk-python/python/icechunk/__init__.py index b7558c51..6c0b52eb 100644 --- a/icechunk-python/python/icechunk/__init__.py +++ b/icechunk-python/python/icechunk/__init__.py @@ -8,17 +8,17 @@ from zarr.core.sync import SyncMixin from ._icechunk_python import ( - __version__, PyIcechunkStore, S3Credentials, SnapshotMetadata, StorageConfig, StoreConfig, VirtualRefConfig, + __version__, pyicechunk_store_create, pyicechunk_store_exists, - pyicechunk_store_open_existing, pyicechunk_store_from_bytes, + pyicechunk_store_open_existing, ) __all__ = [ @@ -96,7 +96,7 @@ async def open_existing( cls, storage: StorageConfig, mode: AccessModeLiteral = "r", - config: StoreConfig = StoreConfig(), + config: StoreConfig | None = None, *args: Any, **kwargs: Any, ) -> Self: @@ -110,6 +110,7 @@ async def open_existing( If opened with AccessModeLiteral "r", the store will be read-only. Otherwise the store will be writable. """ + config = config or StoreConfig() read_only = mode == "r" store = await pyicechunk_store_open_existing( storage, read_only=read_only, config=config @@ -121,7 +122,7 @@ async def create( cls, storage: StorageConfig, mode: AccessModeLiteral = "w", - config: StoreConfig = StoreConfig(), + config: StoreConfig | None = None, *args: Any, **kwargs: Any, ) -> Self: @@ -133,6 +134,7 @@ async def create( this will be configured automatically with the provided storage_config as the underlying storage backend. """ + config = config or StoreConfig() store = await pyicechunk_store_create(storage, config=config) return cls(store=store, mode=mode, args=args, kwargs=kwargs) @@ -169,8 +171,8 @@ def __getstate__(self) -> object: def __setstate__(self, state: Any) -> None: store_repr = state["store"] - mode = state['mode'] - is_read_only = (mode == "r") + mode = state["mode"] + is_read_only = mode == "r" self._store = pyicechunk_store_from_bytes(store_repr, is_read_only) self._is_open = True @@ -305,9 +307,7 @@ async def get_partial_values( # NOTE: pyo3 has not implicit conversion from an Iterable to a rust iterable. So we convert it # to a list here first. Possible opportunity for optimization. result = await self._store.get_partial_values(list(key_ranges)) - return [ - prototype.buffer.from_bytes(r) if r is not None else None for r in result - ] + return [prototype.buffer.from_bytes(r) if r is not None else None for r in result] async def exists(self, key: str) -> bool: """Check if a key exists in the store. diff --git a/icechunk-python/python/icechunk/_icechunk_python.pyi b/icechunk-python/python/icechunk/_icechunk_python.pyi index 18c3e7d5..b6acc932 100644 --- a/icechunk-python/python/icechunk/_icechunk_python.pyi +++ b/icechunk-python/python/icechunk/_icechunk_python.pyi @@ -224,16 +224,15 @@ class StoreConfig: async def pyicechunk_store_exists(storage: StorageConfig) -> bool: ... async def pyicechunk_store_create( - storage: StorageConfig, config: StoreConfig + storage: StorageConfig, config: StoreConfig | None ) -> PyIcechunkStore: ... async def pyicechunk_store_open_existing( - storage: StorageConfig, read_only: bool, config: StoreConfig + storage: StorageConfig, read_only: bool, config: StoreConfig | None ) -> PyIcechunkStore: ... + # async def pyicechunk_store_from_json_config( # config: str, read_only: bool # ) -> PyIcechunkStore: ... -def pyicechunk_store_from_bytes( - bytes: bytes, read_only: bool -) -> PyIcechunkStore: ... +def pyicechunk_store_from_bytes(bytes: bytes, read_only: bool) -> PyIcechunkStore: ... __version__: str diff --git a/icechunk-python/tests/conftest.py b/icechunk-python/tests/conftest.py index bf670b19..02046273 100644 --- a/icechunk-python/tests/conftest.py +++ b/icechunk-python/tests/conftest.py @@ -1,7 +1,7 @@ from typing import Literal -from icechunk import IcechunkStore, StorageConfig import pytest +from icechunk import IcechunkStore, StorageConfig async def parse_store(store: Literal["local", "memory"], path: str) -> IcechunkStore: diff --git a/icechunk-python/tests/test_concurrency.py b/icechunk-python/tests/test_concurrency.py index 767ff9fd..907d7f0c 100644 --- a/icechunk-python/tests/test_concurrency.py +++ b/icechunk-python/tests/test_concurrency.py @@ -1,8 +1,8 @@ import asyncio import random -import zarr import icechunk +import zarr N = 15 @@ -41,7 +41,7 @@ async def list_store(store, barrier): async def test_concurrency(): store = await icechunk.IcechunkStore.open( mode="w", - storage=icechunk.StorageConfig.memory(prefix='concurrency'), + storage=icechunk.StorageConfig.memory(prefix="concurrency"), ) group = zarr.group(store=store, overwrite=True) diff --git a/icechunk-python/tests/test_distributed_writers.py b/icechunk-python/tests/test_distributed_writers.py index fb464e41..7c2249f5 100644 --- a/icechunk-python/tests/test_distributed_writers.py +++ b/icechunk-python/tests/test_distributed_writers.py @@ -1,11 +1,12 @@ -from dataclasses import dataclass -import time import asyncio -import zarr -from dask.distributed import Client -import numpy as np +import time +from dataclasses import dataclass +from typing import cast import icechunk +import numpy as np +import zarr +from dask.distributed import Client @dataclass @@ -13,7 +14,7 @@ class Task: # fixme: useee StorageConfig and StoreConfig once those are pickable storage_config: dict store_config: dict - area: slice + area: tuple[slice, slice] seed: int @@ -57,7 +58,7 @@ async def execute_task(task: Task): store = await mk_store("w", task) group = zarr.group(store=store, overwrite=False) - array = group["array"] + array = cast(zarr.Array, group["array"]) array[task.area] = generate_task_array(task) return store.change_set_bytes() diff --git a/icechunk-python/tests/test_pickle.py b/icechunk-python/tests/test_pickle.py index ee02fdff..50b0b529 100644 --- a/icechunk-python/tests/test_pickle.py +++ b/icechunk-python/tests/test_pickle.py @@ -1,11 +1,10 @@ import pickle +import icechunk import pytest import zarr from zarr.storage import LocalStore -import icechunk - @pytest.fixture(scope="function") async def tmp_store(tmpdir): @@ -32,7 +31,7 @@ async def test_pickle(tmp_store): assert store_loaded == tmp_store root_loaded = zarr.open_group(store_loaded) - array_loaded = root_loaded['ones'] + array_loaded = root_loaded["ones"] assert type(array_loaded) is zarr.Array assert array_loaded == array diff --git a/icechunk-python/tests/test_timetravel.py b/icechunk-python/tests/test_timetravel.py index 84e49bba..d992e508 100644 --- a/icechunk-python/tests/test_timetravel.py +++ b/icechunk-python/tests/test_timetravel.py @@ -1,6 +1,5 @@ -import zarr - import icechunk +import zarr async def test_timetravel(): diff --git a/icechunk-python/tests/test_virtual_ref.py b/icechunk-python/tests/test_virtual_ref.py index dbcb63cb..543f01c8 100644 --- a/icechunk-python/tests/test_virtual_ref.py +++ b/icechunk-python/tests/test_virtual_ref.py @@ -1,14 +1,14 @@ -from object_store import ClientOptions, ObjectStore +import zarr +import zarr.core +import zarr.core.buffer from icechunk import ( IcechunkStore, + S3Credentials, StorageConfig, StoreConfig, - S3Credentials, VirtualRefConfig, ) -import zarr -import zarr.core -import zarr.core.buffer +from object_store import ClientOptions, ObjectStore def write_chunks_to_minio(chunks: list[tuple[str, bytes]]): diff --git a/icechunk-python/tests/test_zarr/test_api.py b/icechunk-python/tests/test_zarr/test_api.py index 7a1fafb3..3d629f46 100644 --- a/icechunk-python/tests/test_zarr/test_api.py +++ b/icechunk-python/tests/test_zarr/test_api.py @@ -1,11 +1,10 @@ import pathlib -from icechunk import IcechunkStore import numpy as np import pytest -from numpy.testing import assert_array_equal - import zarr +from icechunk import IcechunkStore +from numpy.testing import assert_array_equal from zarr import Array, Group from zarr.abc.store import Store from zarr.api.synchronous import ( diff --git a/icechunk-python/tests/test_zarr/test_array.py b/icechunk-python/tests/test_zarr/test_array.py index 31e5fe4e..43044509 100644 --- a/icechunk-python/tests/test_zarr/test_array.py +++ b/icechunk-python/tests/test_zarr/test_array.py @@ -1,13 +1,12 @@ from typing import Literal -from icechunk import IcechunkStore import numpy as np import pytest - -from zarr import Array, AsyncGroup, Group import zarr import zarr.api import zarr.api.asynchronous +from icechunk import IcechunkStore +from zarr import Array, AsyncGroup, Group from zarr.core.common import ZarrFormat from zarr.errors import ContainsArrayError, ContainsGroupError from zarr.storage import StorePath diff --git a/icechunk-python/tests/test_zarr/test_group.py b/icechunk-python/tests/test_zarr/test_group.py index 2fad26b0..1d51813a 100644 --- a/icechunk-python/tests/test_zarr/test_group.py +++ b/icechunk-python/tests/test_zarr/test_group.py @@ -2,14 +2,13 @@ from typing import TYPE_CHECKING, Any, Literal, cast -from icechunk import IcechunkStore import numpy as np import pytest - -from zarr import Array, AsyncArray, AsyncGroup, Group import zarr import zarr.api import zarr.api.asynchronous +from icechunk import IcechunkStore +from zarr import Array, AsyncArray, AsyncGroup, Group from zarr.core.buffer import default_buffer_prototype from zarr.core.common import JSON, ZarrFormat from zarr.core.group import GroupMetadata @@ -152,7 +151,7 @@ def test_group_members(store: IcechunkStore, zarr_format: ZarrFormat) -> None: members_expected["subgroup"] = group.create_group("subgroup") # make a sub-sub-subgroup, to ensure that the children calculation doesn't go # too deep in the hierarchy - subsubgroup = members_expected["subgroup"].create_group("subsubgroup") + subsubgroup = cast(Group, members_expected["subgroup"]).create_group("subsubgroup") subsubsubgroup = subsubgroup.create_group("subsubsubgroup") members_expected["subarray"] = group.create_array( @@ -600,9 +599,7 @@ async def test_asyncgroup_open_wrong_format( store: IcechunkStore, zarr_format: ZarrFormat, ) -> None: - _ = await AsyncGroup.from_store( - store=store, exists_ok=False, zarr_format=zarr_format - ) + _ = await AsyncGroup.from_store(store=store, exists_ok=False, zarr_format=zarr_format) zarr_format_wrong: ZarrFormat # try opening with the wrong zarr format if zarr_format == 3: @@ -639,9 +636,7 @@ def test_asyncgroup_from_dict(store: IcechunkStore, data: dict[str, Any]) -> Non # todo: replace this with a declarative API where we model a full hierarchy -async def test_asyncgroup_getitem( - store: IcechunkStore, zarr_format: ZarrFormat -) -> None: +async def test_asyncgroup_getitem(store: IcechunkStore, zarr_format: ZarrFormat) -> None: """ Create an `AsyncGroup`, then create members of that group, and ensure that we can access those members via the `AsyncGroup.getitem` method. @@ -663,9 +658,7 @@ async def test_asyncgroup_getitem( await agroup.getitem("foo") -async def test_asyncgroup_delitem( - store: IcechunkStore, zarr_format: ZarrFormat -) -> None: +async def test_asyncgroup_delitem(store: IcechunkStore, zarr_format: ZarrFormat) -> None: agroup = await AsyncGroup.from_store(store=store, zarr_format=zarr_format) array_name = "sub_array" _ = await agroup.create_array( @@ -915,6 +908,7 @@ async def test_require_array(store: IcechunkStore, zarr_format: ZarrFormat) -> N with pytest.raises(TypeError, match="Incompatible object"): await root.require_array("bar", shape=(10,), dtype="int8") + class TestGroupMetadata: def test_from_dict_extra_fields(self): data = { diff --git a/icechunk-python/tests/test_zarr/test_store/test_core.py b/icechunk-python/tests/test_zarr/test_store/test_core.py index 4a334ffc..c0ba4501 100644 --- a/icechunk-python/tests/test_zarr/test_store/test_core.py +++ b/icechunk-python/tests/test_zarr/test_store/test_core.py @@ -1,5 +1,4 @@ from icechunk import IcechunkStore - from zarr.storage import make_store_path from ...conftest import parse_store diff --git a/icechunk-python/tests/test_zarr/test_store/test_icechunk_store.py b/icechunk-python/tests/test_zarr/test_store/test_icechunk_store.py index c3981a8e..1ef112b9 100644 --- a/icechunk-python/tests/test_zarr/test_store/test_icechunk_store.py +++ b/icechunk-python/tests/test_zarr/test_store/test_icechunk_store.py @@ -1,17 +1,15 @@ from __future__ import annotations + from typing import Any, cast import pytest - +from icechunk import IcechunkStore, StorageConfig from zarr.abc.store import AccessMode from zarr.core.buffer import Buffer, cpu, default_buffer_prototype from zarr.core.common import AccessModeLiteral from zarr.core.sync import collect_aiterator from zarr.testing.store import StoreTests -from icechunk import IcechunkStore, StorageConfig - - DEFAULT_GROUP_METADATA = b'{"zarr_format":3,"node_type":"group","attributes":null}' ARRAY_METADATA = ( b'{"zarr_format":3,"node_type":"array","attributes":{"foo":42},' @@ -25,11 +23,11 @@ class TestIcechunkStore(StoreTests[IcechunkStore, cpu.Buffer]): store_cls = IcechunkStore buffer_cls = cpu.Buffer - @pytest.mark.xfail(reason="not implemented") - async def test_store_eq(self) -> None: + @pytest.mark.xfail(reason="not implemented", strict=False) + def test_store_eq(self, store: IcechunkStore, store_kwargs: dict[str, Any]) -> None: pass - @pytest.mark.xfail(reason="not implemented") + @pytest.mark.xfail(reason="not implemented", strict=False) async def test_serizalizable_store(self, store) -> None: pass @@ -59,9 +57,7 @@ def store_kwargs( return kwargs @pytest.fixture(scope="function") - async def store( - self, store_kwargs: str | None | dict[str, Buffer] - ) -> IcechunkStore: + async def store(self, store_kwargs: str | None | dict[str, Buffer]) -> IcechunkStore: return await IcechunkStore.open(**store_kwargs) @pytest.mark.xfail(reason="Not implemented") @@ -72,9 +68,7 @@ def test_store_repr(self, store: IcechunkStore) -> None: def test_serializable_store(self, store: IcechunkStore) -> None: super().test_serializable_store(store) - async def test_not_writable_store_raises( - self, store_kwargs: dict[str, Any] - ) -> None: + async def test_not_writable_store_raises(self, store_kwargs: dict[str, Any]) -> None: create_kwargs = {**store_kwargs, "mode": "r"} with pytest.raises(ValueError): _store = await self.store_cls.open(**create_kwargs) @@ -105,9 +99,7 @@ async def test_set_many(self, store: IcechunkStore) -> None: ] # icechunk strictly checks metadata? data_buf = [ - self.buffer_cls.from_bytes( - k.encode() if k != "zarr.json" else ARRAY_METADATA - ) + self.buffer_cls.from_bytes(k.encode() if k != "zarr.json" else ARRAY_METADATA) for k in keys ] store_dict = dict(zip(keys, data_buf, strict=True)) From edf87bb7f20524a14363c3f1f6fde0c06ca28219 Mon Sep 17 00:00:00 2001 From: Sebastian Galkin Date: Tue, 8 Oct 2024 10:15:28 -0300 Subject: [PATCH 2/4] mypy --- icechunk-python/pyproject.toml | 2 +- icechunk-python/python/icechunk/__init__.py | 2 -- icechunk-python/python/icechunk/_icechunk_python.pyi | 3 ++- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/icechunk-python/pyproject.toml b/icechunk-python/pyproject.toml index a302a709..a35756e7 100644 --- a/icechunk-python/pyproject.toml +++ b/icechunk-python/pyproject.toml @@ -46,7 +46,7 @@ venvPath = "." venv = ".venv" [tool.mypy] -python_version = "3.10" +python_version = "3.11" strict = true warn_unreachable = true enable_error_code = ["ignore-without-code", "redundant-expr", "truthy-bool"] diff --git a/icechunk-python/python/icechunk/__init__.py b/icechunk-python/python/icechunk/__init__.py index 6c0b52eb..8965091f 100644 --- a/icechunk-python/python/icechunk/__init__.py +++ b/icechunk-python/python/icechunk/__init__.py @@ -279,8 +279,6 @@ async def get( """ try: result = await self._store.get(key, byte_range) - if result is None: - return None except ValueError as _e: # Zarr python expects None to be returned if the key does not exist # but an IcechunkStore returns an error if the key does not exist diff --git a/icechunk-python/python/icechunk/_icechunk_python.pyi b/icechunk-python/python/icechunk/_icechunk_python.pyi index b6acc932..f7451515 100644 --- a/icechunk-python/python/icechunk/_icechunk_python.pyi +++ b/icechunk-python/python/icechunk/_icechunk_python.pyi @@ -1,3 +1,4 @@ +from typing import Any import abc import datetime from collections.abc import AsyncGenerator @@ -52,7 +53,7 @@ class PyIcechunkStore: def list(self) -> PyAsyncStringGenerator: ... def list_prefix(self, prefix: str) -> PyAsyncStringGenerator: ... def list_dir(self, prefix: str) -> PyAsyncStringGenerator: ... - def __eq__(self, other) -> bool: ... + def __eq__(self, other: Any) -> bool: ... class PyAsyncStringGenerator(AsyncGenerator[str, None], metaclass=abc.ABCMeta): def __aiter__(self) -> PyAsyncStringGenerator: ... From fe2fd6f6ac46e78b2d951bfeab26857951662888 Mon Sep 17 00:00:00 2001 From: Sebastian Galkin Date: Tue, 8 Oct 2024 10:24:23 -0300 Subject: [PATCH 3/4] mypy --- icechunk-python/python/icechunk/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/icechunk-python/python/icechunk/__init__.py b/icechunk-python/python/icechunk/__init__.py index 8965091f..f4457c3d 100644 --- a/icechunk-python/python/icechunk/__init__.py +++ b/icechunk-python/python/icechunk/__init__.py @@ -305,7 +305,7 @@ async def get_partial_values( # NOTE: pyo3 has not implicit conversion from an Iterable to a rust iterable. So we convert it # to a list here first. Possible opportunity for optimization. result = await self._store.get_partial_values(list(key_ranges)) - return [prototype.buffer.from_bytes(r) if r is not None else None for r in result] + return [prototype.buffer.from_bytes(r) for r in result] async def exists(self, key: str) -> bool: """Check if a key exists in the store. @@ -392,7 +392,7 @@ async def set_partial_values( """ # NOTE: pyo3 does not implicit conversion from an Iterable to a rust iterable. So we convert it # to a list here first. Possible opportunity for optimization. - return await self._store.set_partial_values(list(key_start_values)) # type: ignore + return await self._store.set_partial_values(list(key_start_values)) @property def supports_listing(self) -> bool: From 1aec1eed0dc6d2b6558e21fb3cfdcad1f897d01c Mon Sep 17 00:00:00 2001 From: Sebastian Galkin Date: Tue, 8 Oct 2024 10:32:28 -0300 Subject: [PATCH 4/4] ruff --- icechunk-python/python/icechunk/_icechunk_python.pyi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/icechunk-python/python/icechunk/_icechunk_python.pyi b/icechunk-python/python/icechunk/_icechunk_python.pyi index f7451515..68fe74b4 100644 --- a/icechunk-python/python/icechunk/_icechunk_python.pyi +++ b/icechunk-python/python/icechunk/_icechunk_python.pyi @@ -1,7 +1,7 @@ -from typing import Any import abc import datetime from collections.abc import AsyncGenerator +from typing import Any class PyIcechunkStore: def as_bytes(self) -> bytes: ...