Skip to content

Commit

Permalink
Merge pull request #488 from martindurant/append_deep
Browse files Browse the repository at this point in the history
More thorough store handling during combine/append
  • Loading branch information
martindurant authored Aug 2, 2024
2 parents bc1275c + 429d1df commit 3ae8939
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 6 deletions.
7 changes: 5 additions & 2 deletions kerchunk/combine.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ def __init__(
raise ValueError("Values being mapped cannot also be identical")
self.preprocess = preprocess
self.postprocess = postprocess
self.out = out or {}
self.out = out if out is not None else {}
self.coos = None
self.done = set()

Expand Down Expand Up @@ -383,7 +383,9 @@ def store_coords(self):
"""
Write coordinate arrays into the output
"""
group = zarr.open(self.out)
kv = {}
store = zarr.storage.KVStore(kv)
group = zarr.open(store)
m = self.fss[0].get_mapper("")
z = zarr.open(m)
for k, v in self.coos.items():
Expand Down Expand Up @@ -435,6 +437,7 @@ def store_coords(self):
else:
arr.attrs.update(self.cf_units[k])
# TODO: rewrite .zarray/.zattrs with ujson to save space. Maybe make them by hand anyway.
self.out.update(kv)
logger.debug("Written coordinates")
for fn in [".zgroup", ".zattrs"]:
# top-level group attributes from first input
Expand Down
8 changes: 4 additions & 4 deletions kerchunk/tests/test_combine.py
Original file line number Diff line number Diff line change
Expand Up @@ -475,29 +475,29 @@ def test_lazy_filler(tmpdir, refs):
mzz.second_pass()

# actual references don't show
assert set(out) == {
assert set(out.zmetadata) == {
".zattrs",
".zgroup",
".zmetadata",
"data/.zarray",
"data/.zattrs",
"static/.zarray",
"static/.zattrs",
"time/.zarray",
"time/.zattrs",
}
assert out._items
out.flush()
assert set(out) == {
assert set(out.zmetadata) == {
".zattrs",
".zgroup",
".zmetadata",
"data/.zarray",
"data/.zattrs",
"static/.zarray",
"static/.zattrs",
"time/.zarray",
"time/.zattrs",
}
assert set(out._items) == {".zmetadata"}
allfiles = fs.find(tmpdir)
assert [
f"{tmpdir}/{a}" in allfiles for a in ["static/refs.0.parq", "data/refs.0.parq"]
Expand Down

0 comments on commit 3ae8939

Please sign in to comment.