Skip to content

Commit

Permalink
_automatically_determine_filetype netCDF3 vs netCDF4
Browse files Browse the repository at this point in the history
  • Loading branch information
norlandrhagen committed Mar 20, 2024
1 parent 5eb15bb commit 54483c5
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 4 deletions.
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ test = [
"pre-commit",
"pytest-mypy",
"pytest",
"netCDF4"
]


Expand Down
25 changes: 21 additions & 4 deletions virtualizarr/kerchunk.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import importlib.util
from pathlib import Path
from typing import List, NewType, Optional, Tuple, Union, cast

Expand Down Expand Up @@ -61,10 +62,26 @@ def _automatically_determine_filetype(filepath: str) -> str:
file_extension = Path(filepath).suffix

if file_extension == ".nc":
# TODO how can we automatically distinguish netCDF3 and 4?
raise NotImplementedError(
"Cannot unambiguously automatically determine which kerchunk file format reader to use"
)
# checks if netCDF library is installed.
# It currently is not a requirement in the pyproj.toml.

if importlib.util.find_spec("netCDF4") is None:
raise ImportError(
"netCDF4 library is required to determine NetCDF file type."
)

import netCDF4

with netCDF4.Dataset(filepath, "r") as dataset:
if dataset.data_model == "NETCDF4":
filetype = "netCDF4"
elif dataset.data_model == "NETCDF3_CLASSIC":
filetype = "netCDF3"
else:
raise NotImplementedError(
".nc file does not appear to be NETCDF3 OR NETCDF4"
)

elif file_extension == ".zarr":
# TODO we could imagine opening an existing zarr store, concatenating it, and writing a new virtual one...
raise NotImplementedError()
Expand Down
15 changes: 15 additions & 0 deletions virtualizarr/tests/test_kerchunk.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import xarray as xr
import xarray.testing as xrt

from virtualizarr.kerchunk import _automatically_determine_filetype
from virtualizarr.manifests import ChunkEntry, ChunkManifest, ManifestArray
from virtualizarr.xarray import dataset_from_kerchunk_refs

Expand Down Expand Up @@ -130,3 +131,17 @@ def test_kerchunk_roundtrip_in_memory_no_concat():

# Assert equal to original dataset
xrt.assert_equal(roundtrip, ds)


def test_automatically_determine_filetype_netcdf3_netcdf4():
# test the NetCDF3 vs NetCDF4 automatic file type selection

ds = xr.Dataset({"a": (["x"], [0, 1])})
netcdf3_file_path = "/tmp/netcdf3.nc"
netcdf4_file_path = "/tmp/netcdf4.nc"

# write two version of NetCDF
ds.to_netcdf(netcdf3_file_path, engine="scipy", format="NETCDF3_CLASSIC")
ds.to_netcdf(netcdf4_file_path)
assert "netCDF3" == _automatically_determine_filetype(netcdf3_file_path)
assert "netCDF4" == _automatically_determine_filetype(netcdf4_file_path)

0 comments on commit 54483c5

Please sign in to comment.