Skip to content

Commit

Permalink
Merge branch 'main' into reader_options
Browse files Browse the repository at this point in the history
  • Loading branch information
norlandrhagen authored May 13, 2024
2 parents 3a29b41 + 5d8d198 commit b9c056a
Show file tree
Hide file tree
Showing 12 changed files with 69 additions and 81 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ jobs:
shell: bash -l {0}
strategy:
matrix:
python-version: ["3.9", "3.10", "3.11", "3.12"]
python-version: ["3.10", "3.11", "3.12"]
steps:
- uses: actions/checkout@v4

Expand Down
6 changes: 2 additions & 4 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,8 @@ repos:
]
# run this occasionally, ref discussion https://github.com/pydata/xarray/pull/3194
# - repo: https://github.com/asottile/pyupgrade
# rev: v1.22.1
# rev: v3.15.2
# hooks:
# - id: pyupgrade
# args:
# - "--py3-only"
# # remove on f-strings in Py3.7
# - "--keep-percent-format"
# - "--py310-plus"
2 changes: 1 addition & 1 deletion ci/doc.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ channels:
- conda-forge
- nodefaults
dependencies:
- python>=3.9
- python>=3.10
- "sphinx"
- pip
- pip:
Expand Down
1 change: 0 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ classifiers = [
"License :: OSI Approved :: Apache Software License",
"Operating System :: OS Independent",
"Programming Language :: Python",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
Expand Down
9 changes: 5 additions & 4 deletions virtualizarr/kerchunk.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from pathlib import Path
from typing import List, NewType, Optional, Tuple, Union, cast
from typing import NewType, cast

import ujson # type: ignore
import xarray as xr
Expand Down Expand Up @@ -40,10 +40,11 @@ class FileType(AutoName):

def read_kerchunk_references_from_file(
filepath: str,
filetype: Optional[FileType],
filepath: str, filetype: FileType | None
reader_options: Optional[dict] = {
"storage_options": {"key": "", "secret": "", "anon": True}
},

) -> KerchunkStoreRefs:
"""
Read a single legacy file and return kerchunk references to its contents.
Expand Down Expand Up @@ -166,7 +167,7 @@ def extract_array_refs(

def parse_array_refs(
arr_refs: KerchunkArrRefs,
) -> Tuple[dict, ZArray, ZAttrs]:
) -> tuple[dict, ZArray, ZAttrs]:
zarray = ZArray.from_kerchunk_refs(arr_refs.pop(".zarray"))
zattrs = arr_refs.pop(".zattrs", {})
chunk_dict = arr_refs
Expand Down Expand Up @@ -228,7 +229,7 @@ def variable_to_kerchunk_arr_refs(var: xr.Variable) -> KerchunkArrRefs:
f"Can only serialize wrapped arrays of type ManifestArray, but got type {type(marr)}"
)

arr_refs: dict[str, Union[str, List[Union[str, int]]]] = {
arr_refs: dict[str, str | list[str | int]] = {
str(chunk_key): chunk_entry.to_kerchunk()
for chunk_key, chunk_entry in marr.manifest.entries.items()
}
Expand Down
8 changes: 4 additions & 4 deletions virtualizarr/manifests/array.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import warnings
from typing import Any, Tuple, Union
from typing import Any, Union

import numpy as np

Expand All @@ -26,8 +26,8 @@ class ManifestArray:

def __init__(
self,
zarray: Union[ZArray, dict],
chunkmanifest: Union[dict, ChunkManifest],
zarray: ZArray | dict,
chunkmanifest: dict | ChunkManifest,
) -> None:
"""
Create a ManifestArray directly from the .zarray information of a zarr array and the manifest of chunks.
Expand Down Expand Up @@ -80,7 +80,7 @@ def zarray(self) -> ZArray:
return self._zarray

@property
def chunks(self) -> Tuple[int, ...]:
def chunks(self) -> tuple[int, ...]:
return tuple(self.zarray.chunks)

@property
Expand Down
23 changes: 12 additions & 11 deletions virtualizarr/manifests/array_api.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import itertools
from typing import TYPE_CHECKING, Callable, Dict, Iterable, List, Tuple, Union, cast
from collections.abc import Callable, Iterable
from typing import TYPE_CHECKING, cast

import numpy as np

Expand All @@ -10,7 +11,7 @@
from .array import ManifestArray


MANIFESTARRAY_HANDLED_ARRAY_FUNCTIONS: Dict[
MANIFESTARRAY_HANDLED_ARRAY_FUNCTIONS: dict[
str, Callable
] = {} # populated by the @implements decorators below

Expand Down Expand Up @@ -51,7 +52,7 @@ def _check_same_dtypes(dtypes: list[np.dtype]) -> None:
)


def _check_same_codecs(codecs: List[Codec]) -> None:
def _check_same_codecs(codecs: list[Codec]) -> None:
first_codec, *other_codecs = codecs
for codec in other_codecs:
if codec != first_codec:
Expand All @@ -62,7 +63,7 @@ def _check_same_codecs(codecs: List[Codec]) -> None:
)


def _check_same_chunk_shapes(chunks_list: List[Tuple[int, ...]]) -> None:
def _check_same_chunk_shapes(chunks_list: list[tuple[int, ...]]) -> None:
"""Check all the chunk shapes are the same"""

first_chunks, *other_chunks_list = chunks_list
Expand All @@ -77,7 +78,7 @@ def _check_same_chunk_shapes(chunks_list: List[Tuple[int, ...]]) -> None:
@implements(np.result_type)
def result_type(*arrays_and_dtypes) -> np.dtype:
"""Called by xarray to ensure all arguments to concat have the same dtype."""
first_dtype, *other_dtypes = [np.dtype(obj) for obj in arrays_and_dtypes]
first_dtype, *other_dtypes = (np.dtype(obj) for obj in arrays_and_dtypes)
for other_dtype in other_dtypes:
if other_dtype != first_dtype:
raise ValueError("dtypes not all consistent")
Expand All @@ -86,10 +87,10 @@ def result_type(*arrays_and_dtypes) -> np.dtype:

@implements(np.concatenate)
def concatenate(
arrays: Union[tuple["ManifestArray", ...], list["ManifestArray"]],
arrays: tuple["ManifestArray", ...] | list["ManifestArray"],
/,
*,
axis: Union[int, None] = 0,
axis: int | None = 0,
) -> "ManifestArray":
"""
Concatenate ManifestArrays by merging their chunk manifests.
Expand Down Expand Up @@ -176,7 +177,7 @@ def _remove_element_at_position(t: tuple[int, ...], pos: int) -> tuple[int, ...]

@implements(np.stack)
def stack(
arrays: Union[tuple["ManifestArray", ...], list["ManifestArray"]],
arrays: tuple["ManifestArray", ...] | list["ManifestArray"],
/,
*,
axis: int = 0,
Expand Down Expand Up @@ -234,7 +235,7 @@ def stack(
return ManifestArray(chunkmanifest=stacked_manifest, zarray=new_zarray)


def _check_same_shapes(shapes: List[Tuple[int, ...]]) -> None:
def _check_same_shapes(shapes: list[tuple[int, ...]]) -> None:
first_shape, *other_shapes = shapes
for other_shape in other_shapes:
if other_shape != first_shape:
Expand All @@ -251,7 +252,7 @@ def expand_dims(x: "ManifestArray", /, *, axis: int = 0) -> "ManifestArray":


@implements(np.broadcast_to)
def broadcast_to(x: "ManifestArray", /, shape: Tuple[int, ...]) -> "ManifestArray":
def broadcast_to(x: "ManifestArray", /, shape: tuple[int, ...]) -> "ManifestArray":
"""
Broadcasts an array to a specified shape, by either manipulating chunk keys or copying chunk manifest entries.
"""
Expand Down Expand Up @@ -328,7 +329,7 @@ def _broadcast_scalar(x: "ManifestArray", new_axis_length: int) -> "ManifestArra

@implements(np.full_like)
def full_like(
x: "ManifestArray", /, fill_value: bool, *, dtype: Union[np.dtype, None]
x: "ManifestArray", /, fill_value: bool, *, dtype: np.dtype | None
) -> np.ndarray:
"""
Returns a new array filled with fill_value and having the same shape as an input array x.
Expand Down
31 changes: 15 additions & 16 deletions virtualizarr/manifests/manifest.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import itertools
import json
import re
from typing import Any, Iterable, Iterator, List, Mapping, Tuple, Union, cast
from collections.abc import Iterable, Iterator, Mapping
from typing import Any, cast

import numpy as np
from pydantic import BaseModel, ConfigDict, field_validator
Expand Down Expand Up @@ -32,13 +33,11 @@ def __repr__(self) -> str:
return f"ChunkEntry(path='{self.path}', offset={self.offset}, length={self.length})"

@classmethod
def from_kerchunk(
cls, path_and_byte_range_info: List[Union[str, int]]
) -> "ChunkEntry":
def from_kerchunk(cls, path_and_byte_range_info: list[str | int]) -> "ChunkEntry":
path, offset, length = path_and_byte_range_info
return ChunkEntry(path=path, offset=offset, length=length)

def to_kerchunk(self) -> List[Union[str, int]]:
def to_kerchunk(self) -> list[str | int]:
"""Write out in the format that kerchunk uses for chunk entries."""
return [self.path, self.offset, self.length]

Expand Down Expand Up @@ -87,7 +86,7 @@ def ndim_chunk_grid(self) -> int:
return get_ndim_from_key(list(self.entries.keys())[0])

@property
def shape_chunk_grid(self) -> Tuple[int, ...]:
def shape_chunk_grid(self) -> tuple[int, ...]:
"""
Number of separate chunks along each dimension.
Expand All @@ -107,14 +106,14 @@ def __iter__(self) -> Iterator[ChunkKey]:
def __len__(self) -> int:
return len(self.entries)

def dict(self) -> dict[str, dict[str, Union[str, int]]]:
def dict(self) -> dict[str, dict[str, str | int]]:
"""Converts the entire manifest to a nested dictionary."""
return {k: dict(entry) for k, entry in self.entries.items()}

@classmethod
def from_zarr_json(cls, filepath: str) -> "ChunkManifest":
"""Create a ChunkManifest from a Zarr manifest.json file."""
with open(filepath, "r") as manifest_file:
with open(filepath) as manifest_file:
entries_dict = json.load(manifest_file)

entries = {
Expand All @@ -135,7 +134,7 @@ def _from_kerchunk_chunk_dict(cls, kerchunk_chunk_dict) -> "ChunkManifest":
return ChunkManifest(entries=chunkentries)


def split(key: ChunkKey) -> List[int]:
def split(key: ChunkKey) -> list[int]:
return list(int(i) for i in key.split("."))


Expand Down Expand Up @@ -168,7 +167,7 @@ def validate_chunk_keys(chunk_keys: Iterable[ChunkKey]):
check_keys_form_grid(chunk_keys)


def get_chunk_grid_shape(chunk_keys: Iterable[ChunkKey]) -> Tuple[int, ...]:
def get_chunk_grid_shape(chunk_keys: Iterable[ChunkKey]) -> tuple[int, ...]:
# find max chunk index along each dimension
zipped_indices = zip(*[split(key) for key in chunk_keys])
chunk_grid_shape = tuple(
Expand All @@ -186,16 +185,16 @@ def check_keys_form_grid(chunk_keys: Iterable[ChunkKey]):
all_possible_combos = itertools.product(
*[range(length) for length in chunk_grid_shape]
)
all_required_chunk_keys: set[ChunkKey] = set(
all_required_chunk_keys: set[ChunkKey] = {
join(inds) for inds in all_possible_combos
)
}

# check that every possible combination is represented once in the list of chunk keys
if set(chunk_keys) != all_required_chunk_keys:
raise ValueError("Chunk keys do not form a complete grid")


def concat_manifests(manifests: List["ChunkManifest"], axis: int) -> "ChunkManifest":
def concat_manifests(manifests: list["ChunkManifest"], axis: int) -> "ChunkManifest":
"""
Concatenate manifests along an existing dimension.
Expand All @@ -216,7 +215,7 @@ def concat_manifests(manifests: List["ChunkManifest"], axis: int) -> "ChunkManif
for manifest, offset in zip(manifests[1:], chunk_index_offsets)
]
all_entries = [manifests[0].entries] + new_entries
merged_entries = dict((k, v) for d in all_entries for k, v in d.items())
merged_entries = {k: v for d in all_entries for k, v in d.items()}

# Arguably don't need to re-perform validation checks on a manifest we created out of already-validated manifests
# Could use pydantic's model_construct classmethod to skip these checks
Expand All @@ -237,7 +236,7 @@ def offset_key(key: ChunkKey, axis: int, offset: int) -> ChunkKey:
return {offset_key(k, axis, offset): v for k, v in entries.items()}


def stack_manifests(manifests: List[ChunkManifest], axis: int) -> "ChunkManifest":
def stack_manifests(manifests: list[ChunkManifest], axis: int) -> "ChunkManifest":
"""
Stack manifests along a new dimension.
Expand All @@ -252,7 +251,7 @@ def stack_manifests(manifests: List[ChunkManifest], axis: int) -> "ChunkManifest
insert_new_axis_into_chunk_keys(manifest.entries, axis, new_index_value)
for manifest, new_index_value in zip(manifests, chunk_indexes_along_new_dim)
]
merged_entries = dict((k, v) for d in new_entries for k, v in d.items())
merged_entries = {k: v for d in new_entries for k, v in d.items()}

# Arguably don't need to re-perform validation checks on a manifest we created out of already-validated manifests
# Could use pydantic's model_construct classmethod to skip these checks
Expand Down
2 changes: 1 addition & 1 deletion virtualizarr/tests/test_kerchunk.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ def test_accessor_to_kerchunk_json(self, tmp_path):

ds.virtualize.to_kerchunk(filepath, format="json")

with open(filepath, "r") as json_file:
with open(filepath) as json_file:
loaded_refs = ujson.load(json_file)

expected_ds_refs = {
Expand Down
2 changes: 1 addition & 1 deletion virtualizarr/tests/test_xarray.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Mapping
from collections.abc import Mapping

import numpy as np
import pytest
Expand Down
Loading

0 comments on commit b9c056a

Please sign in to comment.