Skip to content

Commit

Permalink
Merge branch 'main' into seba/release-tasks
Browse files Browse the repository at this point in the history
  • Loading branch information
paraseba authored Oct 8, 2024
2 parents 98b05a7 + e2d5b3b commit 921b221
Show file tree
Hide file tree
Showing 22 changed files with 120 additions and 109 deletions.
4 changes: 4 additions & 0 deletions .github/dependabot.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,7 @@ updates:
day: "monday"
time: "05:00"
timezone: "US/Pacific"
groups:
actions:
patterns:
- "*"
4 changes: 4 additions & 0 deletions .github/workflows/python-ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@ on:
pull_request:
workflow_dispatch:

concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true

permissions:
contents: read

Expand Down
4 changes: 4 additions & 0 deletions .github/workflows/rust-ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@ on:
branches:
- main

concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true

env:
CARGO_INCREMENTAL: 0
CARGO_NET_RETRY: 10
Expand Down
39 changes: 21 additions & 18 deletions icechunk-python/examples/dask_write.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,15 @@
"""

import argparse
from dataclasses import dataclass
import time
import asyncio
import zarr
from dask.distributed import Client
import numpy as np
import time
from dataclasses import dataclass
from typing import cast

import icechunk
import numpy as np
import zarr
from dask.distributed import Client


@dataclass
Expand Down Expand Up @@ -57,7 +58,7 @@ async def execute_write_task(task: Task):
store = await mk_store("w", task)

group = zarr.group(store=store, overwrite=False)
array = group["array"]
array = cast(zarr.Array, group["array"])
print(f"Writing at t={task.time}")
data = generate_task_array(task, array.shape[0:2])
array[:, :, task.time] = data
Expand All @@ -72,7 +73,7 @@ async def execute_read_task(task: Task):
print(f"Reading t={task.time}")
store = await mk_store("r", task)
group = zarr.group(store=store, overwrite=False)
array = group["array"]
array = cast(zarr.Array, group["array"])

actual = array[:, :, task.time]
expected = generate_task_array(task, array.shape[0:2])
Expand Down Expand Up @@ -235,9 +236,7 @@ async def distributed_write():
help="size of chunks in the y dimension",
default=112,
)
create_parser.add_argument(
"--name", type=str, help="repository name", required=True
)
create_parser.add_argument("--name", type=str, help="repository name", required=True)
create_parser.set_defaults(command="create")

update_parser = subparsers.add_parser("update", help="add chunks to the array")
Expand All @@ -256,9 +255,7 @@ async def distributed_write():
update_parser.add_argument(
"--workers", type=int, help="number of workers to use", required=True
)
update_parser.add_argument(
"--name", type=str, help="repository name", required=True
)
update_parser.add_argument("--name", type=str, help="repository name", required=True)
update_parser.add_argument(
"--max-sleep",
type=float,
Expand All @@ -275,7 +272,11 @@ async def distributed_write():
"--sleep-tasks", type=int, help="this many tasks sleep", default=0
)
update_parser.add_argument(
"--distributed-cluster", type=bool, help="use multiple machines", action=argparse.BooleanOptionalAction, default=False
"--distributed-cluster",
type=bool,
help="use multiple machines",
action=argparse.BooleanOptionalAction,
default=False,
)
update_parser.set_defaults(command="update")

Expand All @@ -295,11 +296,13 @@ async def distributed_write():
verify_parser.add_argument(
"--workers", type=int, help="number of workers to use", required=True
)
verify_parser.add_argument("--name", type=str, help="repository name", required=True)
verify_parser.add_argument(
"--name", type=str, help="repository name", required=True
)
verify_parser.add_argument(
"--distributed-cluster", type=bool, help="use multiple machines", action=argparse.BooleanOptionalAction, default=False
"--distributed-cluster",
type=bool,
help="use multiple machines",
action=argparse.BooleanOptionalAction,
default=False,
)
verify_parser.set_defaults(command="verify")

Expand Down
18 changes: 8 additions & 10 deletions icechunk-python/examples/smoke-test.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,15 @@
import asyncio
from typing import Literal
from zarr.storage import LocalStore, MemoryStore, RemoteStore
import math
import random
import string
import time
from typing import Literal

import numpy as np
import zarr
import time

from icechunk import IcechunkStore, S3Credentials, StorageConfig, StoreConfig
from zarr.abc.store import Store

from icechunk import IcechunkStore, StorageConfig, S3Credentials, StoreConfig
import random
import string
from zarr.storage import LocalStore, MemoryStore, RemoteStore


def rdms(n):
Expand Down Expand Up @@ -149,7 +147,7 @@ async def run(store: Store) -> None:
assert isinstance(array, zarr.Array)

print(
f"numchunks: {math.prod(s // c for s, c in zip(array.shape, array.chunks))}"
f"numchunks: {math.prod(s // c for s, c in zip(array.shape, array.chunks, strict=False))}"
)
np.testing.assert_array_equal(array[:], value)

Expand All @@ -176,7 +174,7 @@ async def create_zarr_store(*, store: Literal["memory", "local", "s3"]) -> Store
"key": "minio123",
"secret": "minio123",
"region": "us-east-1",
"endpoint_url": "http://localhost:9000"
"endpoint_url": "http://localhost:9000",
},
)

Expand Down
3 changes: 1 addition & 2 deletions icechunk-python/notebooks/demo-dummy-data.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
"\n",
"import numpy as np\n",
"import zarr\n",
"\n",
"from icechunk import IcechunkStore, StorageConfig"
]
},
Expand Down Expand Up @@ -1381,7 +1380,7 @@
" tic = time.time()\n",
" array = root_group[key]\n",
" assert array.dtype == value.dtype, (array.dtype, value.dtype)\n",
" print(f\"numchunks: {math.prod(s // c for s, c in zip(array.shape, array.chunks))}\")\n",
" print(f\"numchunks: {math.prod(s // c for s, c in zip(array.shape, array.chunks, strict=False))}\")\n",
" np.testing.assert_array_equal(array[:], value)\n",
" print(time.time() - tic)"
]
Expand Down
6 changes: 2 additions & 4 deletions icechunk-python/notebooks/demo-s3.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
"outputs": [],
"source": [
"import zarr\n",
"\n",
"from icechunk import IcechunkStore, StorageConfig"
]
},
Expand Down Expand Up @@ -1148,7 +1147,6 @@
"outputs": [],
"source": [
"import zarr\n",
"\n",
"from icechunk import IcechunkStore, StorageConfig\n",
"\n",
"# TODO: catalog will handle this\n",
Expand Down Expand Up @@ -1298,8 +1296,8 @@
"metadata": {},
"outputs": [],
"source": [
"import matplotlib.pyplot as plt\n",
"import matplotlib as mpl"
"import matplotlib as mpl\n",
"import matplotlib.pyplot as plt"
]
},
{
Expand Down
1 change: 0 additions & 1 deletion icechunk-python/notebooks/version-control.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
"outputs": [],
"source": [
"import zarr\n",
"\n",
"from icechunk import IcechunkStore, StorageConfig"
]
},
Expand Down
22 changes: 22 additions & 0 deletions icechunk-python/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,29 @@ python-source = "python"

[tool.pytest.ini_options]
asyncio_mode = "auto"
minversion = "7"
testpaths = ["tests"]
log_cli_level = "INFO"
xfail_strict = true
addopts = ["-ra", "--strict-config", "--strict-markers"]
filterwarnings = ["error"]

[tool.pyright]
venvPath = "."
venv = ".venv"

[tool.mypy]
python_version = "3.11"
strict = true
warn_unreachable = true
enable_error_code = ["ignore-without-code", "redundant-expr", "truthy-bool"]

[tool.ruff]
line-length = 90

[tool.ruff.lint]
extend-select = [
"B", # flake8-bugbear
"I", # isort
"UP", # pypupgrade
]
22 changes: 10 additions & 12 deletions icechunk-python/python/icechunk/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,17 +8,17 @@
from zarr.core.sync import SyncMixin

from ._icechunk_python import (
__version__,
PyIcechunkStore,
S3Credentials,
SnapshotMetadata,
StorageConfig,
StoreConfig,
VirtualRefConfig,
__version__,
pyicechunk_store_create,
pyicechunk_store_exists,
pyicechunk_store_open_existing,
pyicechunk_store_from_bytes,
pyicechunk_store_open_existing,
)

__all__ = [
Expand Down Expand Up @@ -96,7 +96,7 @@ async def open_existing(
cls,
storage: StorageConfig,
mode: AccessModeLiteral = "r",
config: StoreConfig = StoreConfig(),
config: StoreConfig | None = None,
*args: Any,
**kwargs: Any,
) -> Self:
Expand All @@ -110,6 +110,7 @@ async def open_existing(
If opened with AccessModeLiteral "r", the store will be read-only. Otherwise the store will be writable.
"""
config = config or StoreConfig()
read_only = mode == "r"
store = await pyicechunk_store_open_existing(
storage, read_only=read_only, config=config
Expand All @@ -121,7 +122,7 @@ async def create(
cls,
storage: StorageConfig,
mode: AccessModeLiteral = "w",
config: StoreConfig = StoreConfig(),
config: StoreConfig | None = None,
*args: Any,
**kwargs: Any,
) -> Self:
Expand All @@ -133,6 +134,7 @@ async def create(
this will be configured automatically with the provided storage_config as the underlying
storage backend.
"""
config = config or StoreConfig()
store = await pyicechunk_store_create(storage, config=config)
return cls(store=store, mode=mode, args=args, kwargs=kwargs)

Expand Down Expand Up @@ -169,8 +171,8 @@ def __getstate__(self) -> object:

def __setstate__(self, state: Any) -> None:
store_repr = state["store"]
mode = state['mode']
is_read_only = (mode == "r")
mode = state["mode"]
is_read_only = mode == "r"
self._store = pyicechunk_store_from_bytes(store_repr, is_read_only)
self._is_open = True

Expand Down Expand Up @@ -277,8 +279,6 @@ async def get(
"""
try:
result = await self._store.get(key, byte_range)
if result is None:
return None
except ValueError as _e:
# Zarr python expects None to be returned if the key does not exist
# but an IcechunkStore returns an error if the key does not exist
Expand All @@ -305,9 +305,7 @@ async def get_partial_values(
# NOTE: pyo3 has not implicit conversion from an Iterable to a rust iterable. So we convert it
# to a list here first. Possible opportunity for optimization.
result = await self._store.get_partial_values(list(key_ranges))
return [
prototype.buffer.from_bytes(r) if r is not None else None for r in result
]
return [prototype.buffer.from_bytes(r) for r in result]

async def exists(self, key: str) -> bool:
"""Check if a key exists in the store.
Expand Down Expand Up @@ -394,7 +392,7 @@ async def set_partial_values(
"""
# NOTE: pyo3 does not implicit conversion from an Iterable to a rust iterable. So we convert it
# to a list here first. Possible opportunity for optimization.
return await self._store.set_partial_values(list(key_start_values)) # type: ignore
return await self._store.set_partial_values(list(key_start_values))

@property
def supports_listing(self) -> bool:
Expand Down
12 changes: 6 additions & 6 deletions icechunk-python/python/icechunk/_icechunk_python.pyi
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import abc
import datetime
from collections.abc import AsyncGenerator
from typing import Any

class PyIcechunkStore:
def as_bytes(self) -> bytes: ...
Expand Down Expand Up @@ -52,7 +53,7 @@ class PyIcechunkStore:
def list(self) -> PyAsyncStringGenerator: ...
def list_prefix(self, prefix: str) -> PyAsyncStringGenerator: ...
def list_dir(self, prefix: str) -> PyAsyncStringGenerator: ...
def __eq__(self, other) -> bool: ...
def __eq__(self, other: Any) -> bool: ...

class PyAsyncStringGenerator(AsyncGenerator[str, None], metaclass=abc.ABCMeta):
def __aiter__(self) -> PyAsyncStringGenerator: ...
Expand Down Expand Up @@ -224,16 +225,15 @@ class StoreConfig:

async def pyicechunk_store_exists(storage: StorageConfig) -> bool: ...
async def pyicechunk_store_create(
storage: StorageConfig, config: StoreConfig
storage: StorageConfig, config: StoreConfig | None
) -> PyIcechunkStore: ...
async def pyicechunk_store_open_existing(
storage: StorageConfig, read_only: bool, config: StoreConfig
storage: StorageConfig, read_only: bool, config: StoreConfig | None
) -> PyIcechunkStore: ...

# async def pyicechunk_store_from_json_config(
# config: str, read_only: bool
# ) -> PyIcechunkStore: ...
def pyicechunk_store_from_bytes(
bytes: bytes, read_only: bool
) -> PyIcechunkStore: ...
def pyicechunk_store_from_bytes(bytes: bytes, read_only: bool) -> PyIcechunkStore: ...

__version__: str
2 changes: 1 addition & 1 deletion icechunk-python/tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from typing import Literal

from icechunk import IcechunkStore, StorageConfig
import pytest
from icechunk import IcechunkStore, StorageConfig


async def parse_store(store: Literal["local", "memory"], path: str) -> IcechunkStore:
Expand Down
Loading

0 comments on commit 921b221

Please sign in to comment.