-
Notifications
You must be signed in to change notification settings - Fork 25
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fix group kwarg #338
Fix group kwarg #338
Changes from 18 commits
48c74b9
97ea1ce
66165fe
aefbb4f
2bd6547
18ce7a5
5681a89
10c04e4
e6ae664
bffd921
10c7153
8fdcefe
71aa18d
97cddf0
28d56ab
dcbf9ff
1991c81
9470dcd
57b65cb
6846606
bcafaf1
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||
---|---|---|---|---|
|
@@ -43,42 +43,40 @@ def virtual_vars_and_metadata_from_kerchunk_refs( | |||
return virtual_vars, ds_attrs, coord_names | ||||
|
||||
|
||||
def extract_group(vds_refs: KerchunkStoreRefs, group: str | None) -> KerchunkStoreRefs: | ||||
"""Extract only the part of the kerchunk reference dict that is relevant to a single HDF group""" | ||||
def extract_group(vds_refs: KerchunkStoreRefs, group: str) -> KerchunkStoreRefs: | ||||
""" | ||||
Extract only the part of the kerchunk reference dict that is relevant to a single HDF group. | ||||
|
||||
group : str | ||||
Should be a non-empty string | ||||
""" | ||||
hdf_groups = [ | ||||
k.removesuffix(".zgroup") for k in vds_refs["refs"].keys() if ".zgroup" in k | ||||
] | ||||
if len(hdf_groups) == 1: | ||||
return vds_refs | ||||
else: | ||||
if group is None: | ||||
raise ValueError( | ||||
f"Multiple HDF Groups found. Must specify group= keyword to select one of {hdf_groups}" | ||||
) | ||||
else: | ||||
# Ensure supplied group kwarg is consistent with kerchunk keys | ||||
if not group.endswith("/"): | ||||
group += "/" | ||||
if group.startswith("/"): | ||||
group = group.removeprefix("/") | ||||
|
||||
if group not in hdf_groups: | ||||
raise ValueError(f'Group "{group}" not found in {hdf_groups}') | ||||
|
||||
# Filter by group prefix and remove prefix from all keys | ||||
groupdict = { | ||||
k.removeprefix(group): v | ||||
for k, v in vds_refs["refs"].items() | ||||
if k.startswith(group) | ||||
} | ||||
# Also remove group prefix from _ARRAY_DIMENSIONS | ||||
for k, v in groupdict.items(): | ||||
if isinstance(v, str): | ||||
groupdict[k] = v.replace("\\/", "/").replace(group, "") | ||||
|
||||
vds_refs["refs"] = groupdict | ||||
# Ensure supplied group kwarg is consistent with kerchunk keys | ||||
if not group.endswith("/"): | ||||
group += "/" | ||||
if group.startswith("/"): | ||||
group = group.removeprefix("/") | ||||
|
||||
return KerchunkStoreRefs(vds_refs) | ||||
if group not in hdf_groups: | ||||
raise ValueError(f'Group "{group}" not found in {hdf_groups}') | ||||
|
||||
# Filter by group prefix and remove prefix from all keys | ||||
groupdict = { | ||||
k.removeprefix(group): v | ||||
for k, v in vds_refs["refs"].items() | ||||
if k.startswith(group) | ||||
} | ||||
# Also remove group prefix from _ARRAY_DIMENSIONS | ||||
for k, v in groupdict.items(): | ||||
if isinstance(v, str): | ||||
groupdict[k] = v.replace("\\/", "/").replace(group, "") | ||||
|
||||
vds_refs["refs"] = groupdict | ||||
|
||||
return KerchunkStoreRefs(vds_refs) | ||||
|
||||
|
||||
def virtual_vars_from_kerchunk_refs( | ||||
|
@@ -222,9 +220,17 @@ def find_var_names(ds_reference_dict: KerchunkStoreRefs) -> list[str]: | |||
"""Find the names of zarr variables in this store/group.""" | ||||
|
||||
refs = ds_reference_dict["refs"] | ||||
found_var_names = {key.split("/")[0] for key in refs.keys() if "/" in key} | ||||
|
||||
return list(found_var_names) | ||||
found_var_names = [] | ||||
for key in refs.keys(): | ||||
# has to capture "foo/.zarray", but ignore ".zgroup", ".zattrs", and "subgroup/bar/.zarray" | ||||
# TODO this might be a sign that we should introduce a KerchunkGroupRefs type and cut down the references before getting to this point... | ||||
if key not in (".zgroup", ".zattrs", ".zmetadata"): | ||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @norlandrhagen the only reason I needed the
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hmm, I think we were relying on the parquet directory to have a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Okay let's merge then. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oh I have another bug to fix first... |
||||
first_part, second_part, *_ = key.split("/") | ||||
if second_part == ".zarray": | ||||
found_var_names.append(first_part) | ||||
|
||||
return found_var_names | ||||
|
||||
|
||||
def extract_array_refs( | ||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This would be cleaner, but is a refactor that can be done afterwards.