Skip to content

Commit

Permalink
Merge pull request #43 from TomNicholas/netcdf3_netcdf4
Browse files Browse the repository at this point in the history
Adds netCDF3 vs netCDF4 distinction to _automatically_determine_filetype.
  • Loading branch information
TomNicholas authored Mar 22, 2024
2 parents d028fe3 + a2e2cd5 commit b80059b
Show file tree
Hide file tree
Showing 4 changed files with 39 additions and 5 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ jobs:

- name: Install virtualizarr
run: |
python -m pip install -e . --no-deps --force-reinstall
python -m pip install -e ".[test]"
- name: Conda list
run: conda list
Expand Down
2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,11 @@ dependencies = [

[project.optional-dependencies]
test = [
"netCDF4",
"pre-commit",
"pytest-mypy",
"pytest",
"scipy"
]


Expand Down
25 changes: 21 additions & 4 deletions virtualizarr/kerchunk.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import importlib.util
from pathlib import Path
from typing import List, NewType, Optional, Tuple, Union, cast

Expand Down Expand Up @@ -67,10 +68,26 @@ def _automatically_determine_filetype(filepath: str) -> str:
file_extension = Path(filepath).suffix

if file_extension == ".nc":
# TODO how can we automatically distinguish netCDF3 and 4?
raise NotImplementedError(
"Cannot unambiguously automatically determine which kerchunk file format reader to use"
)
# checks if netCDF library is installed.
# It currently is not a requirement in the pyproj.toml.

if importlib.util.find_spec("netCDF4") is None:
raise ImportError(
"netCDF4 library is required to determine NetCDF file type."
)

import netCDF4

with netCDF4.Dataset(filepath, "r") as dataset:
if dataset.data_model == "NETCDF4":
filetype = "netCDF4"
elif dataset.data_model == "NETCDF3_CLASSIC":
filetype = "netCDF3"
else:
raise NotImplementedError(
".nc file does not appear to be NETCDF3 OR NETCDF4"
)

elif file_extension == ".zarr":
# TODO we could imagine opening an existing zarr store, concatenating it, and writing a new virtual one...
raise NotImplementedError()
Expand Down
15 changes: 15 additions & 0 deletions virtualizarr/tests/test_kerchunk.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import xarray as xr
import xarray.testing as xrt

from virtualizarr.kerchunk import _automatically_determine_filetype
from virtualizarr.manifests import ChunkEntry, ChunkManifest, ManifestArray
from virtualizarr.xarray import dataset_from_kerchunk_refs

Expand Down Expand Up @@ -130,3 +131,17 @@ def test_kerchunk_roundtrip_in_memory_no_concat():

# Assert equal to original dataset
xrt.assert_equal(roundtrip, ds)


def test_automatically_determine_filetype_netcdf3_netcdf4():
# test the NetCDF3 vs NetCDF4 automatic file type selection

ds = xr.Dataset({"a": (["x"], [0, 1])})
netcdf3_file_path = "/tmp/netcdf3.nc"
netcdf4_file_path = "/tmp/netcdf4.nc"

# write two version of NetCDF
ds.to_netcdf(netcdf3_file_path, engine="scipy", format="NETCDF3_CLASSIC")
ds.to_netcdf(netcdf4_file_path)
assert "netCDF3" == _automatically_determine_filetype(netcdf3_file_path)
assert "netCDF4" == _automatically_determine_filetype(netcdf4_file_path)

0 comments on commit b80059b

Please sign in to comment.