Skip to content

Commit

Permalink
dev
Browse files Browse the repository at this point in the history
  • Loading branch information
davidhassell committed Jun 20, 2024
1 parent 3703495 commit b3a1b56
Show file tree
Hide file tree
Showing 10 changed files with 98 additions and 65 deletions.
4 changes: 2 additions & 2 deletions Changelog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@ Version NEXTVERSION
* New class `cfdm.NetCDFIndexer`
* New dependency: ``h5netcdf>=1.3.0``
* New dependency: ``h5py>=3.10.0``
* New dependency: ``s3fs>=2024.3.0``
* New dependency: ``dask>=2024.4.1``
* New dependency: ``s3fs>=2024.6.0``
* New dependency: ``dask>=2024.6.0``
* Removed dependency: ``netcdf_flattener``

----
Expand Down
4 changes: 2 additions & 2 deletions cfdm/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@
except ImportError as error1:
raise ImportError(_error0 + str(error1))

_minimum_vn = "2024.3.0"
_minimum_vn = "2024.6.0"
if Version(s3fs.__version__) < Version(_minimum_vn):
raise ValueError(
f"Bad s3fs version: cfdm requires s3fs>={_minimum_vn}. "
Expand All @@ -143,7 +143,7 @@
except ImportError as error1:
raise ImportError(_error0 + str(error1))

_minimum_vn = "2024.4.0"
_minimum_vn = "2024.6.0"
if Version(dask.__version__) < Version(_minimum_vn):
raise ValueError(
f"Bad scipy version: cfdm requires dask>={_minimum_vn}. "
Expand Down
6 changes: 3 additions & 3 deletions cfdm/data/mixin/filearraymixin.py
Original file line number Diff line number Diff line change
Expand Up @@ -314,9 +314,9 @@ def get_storage_options(

if parsed_filename is not None and parsed_filename.scheme == "s3":
# Derive endpoint_url from filename
storage_options[
"endpoint_url"
] = f"https://{parsed_filename.netloc}"
storage_options["endpoint_url"] = (
f"https://{parsed_filename.netloc}"
)

return storage_options

Expand Down
99 changes: 52 additions & 47 deletions cfdm/data/netcdfindexer.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""A data indexer that applies netCDF masking and unpacking.
Portions of this code were adapted from the `netCDF4` library, which
carries the following MIT License:
Portions of this code were adapted from the `netCDF4` Python library,
which carries the following MIT License:
Copyright 2008 Jeffrey Whitaker
Expand All @@ -19,6 +19,7 @@
included in all copies or substantial portions of the Software.
"""

import logging
from math import prod
from numbers import Integral
Expand All @@ -38,22 +39,18 @@ class netcdf_indexer:
means that the index for each dimension is applied independently,
regardless of how that index was defined. For instance, the
indices ``[[0, 1], [1, 3], 0]`` and ``[:2, 1:4:2, 0]`` will give
identical results. This behaviour is different to that of
`numpy`. Non-orthogonal indexing means that normal `numpy`
indexing rules are applied.
During indexing, masking and unpacking is applied according to the
netCDF conventions, either or both of which may be disabled via
initialisation options.
identical results. Orthogonal indexing is different to the
indexing behaviour of `numpy`. Non-orthogonal indexing means that
normal `numpy` indexing rules are applied.
In addition, string and character variables are always converted
to unicode arrays, the latter with the last dimension
concatenated.
Masking and unpacking operations are defined by the conventions
for netCDF attributes, which are either provided as part of the
input *data* object, or given with the input *attributes*
parameter.
Masking and unpacking operations, either or both may be disabled
via initialisation options, are defined by the conventions for
netCDF attributes, which are either provided as part of the input
*variable* object, or given with the input *attributes* parameter.
The relevant netCDF attributes that are considered are:
Expand Down Expand Up @@ -129,7 +126,7 @@ def __init__(
variable:
The variable to be indexed. May be any variable that
has the same API as one of `numpy.ndarray`,
`netCDF4.Variable` or `h5py.Variable` (which includes
`netCDF4.Variable`, or `h5py.Variable` (which includes
`h5netcdf.Variable`). Any masking and unpacking that
could be applied by *variable* itself (e.g. by a
`netCDF4.Variable` instance) is disabled, ensuring
Expand Down Expand Up @@ -174,15 +171,20 @@ def __init__(
relevant to masking and unpacking are considered, with
all other attributes being ignored. If *attributes* is
`None`, the default, then the netCDF attributes stored
by *variable* itself (if any) are used. If
*attributes* is not `None`, then any netCDF attributes
stored by *variable* itself are ignored.
by *variable* (if any) are used. If *attributes* is
not `None`, then any netCDF attributes stored by
*variable* are ignored.
copy: `bool`, optional
If True then return a copy of the subspace that is not
a view of part of the the original data. If False, the
default, then the returned subspace could be either a
copy or a view.
If True then return a `numpy` array that is not a view
of part of the the original data, i.e. in-place
changes to the returned subspace will not affect the
original *variable*. This is done by returning an
in-memory copy the subspace. If False, the default, no
in-memory copy is done, and then whether or not
in-place changes to the returned subspace affect
*variable* will depend on how subspacing is
implemented by *variable*`.
"""
self.variable = variable
Expand All @@ -196,7 +198,7 @@ def __init__(
def __getitem__(self, index):
"""Return a subspace of the variable as a `numpy` array.
v.__getitem__(index) <==> v[index]
n.__getitem__(index) <==> v[index]
If `__orthogonal_indexing__` is True then indexing is
orthogonal. If `__orthogonal_indexing__` is False then normal
Expand Down Expand Up @@ -414,9 +416,9 @@ def _index(self, index):
# Still here? Then do orthogonal indexing.
# ------------------------------------------------------------

# Create an index that replaces integer indices with size 1
# slices, so that their axes are not dropped yet (they will be
# dropeed later).
# Create an index that replaces integers with size 1 slices,
# so that their axes are not dropped yet (they will be dropped
# later).
index0 = [
slice(i, i + 1) if isinstance(i, Integral) else i for i in index
]
Expand All @@ -426,17 +428,18 @@ def _index(self, index):
# variable natively supports orthogonal indexing.
#
# Note: `netCDF4.Variable` natively supports orthogonal
# indexing; but `numpy.ndarray`, `h5netcdf.File` and
# `h5py.File` do not.
# indexing; but `h5netcdf.File`, `h5py.File`, and
# `numpy.ndarray`, do not.
data = data[tuple(index0)]
else:
# There are two or more list/1-d array indices, and the
# variable does not natively support orthogonal indexing
# => emulate orthogonal indexing with a sequence of
# subspaces, one for each list/1-d array index.
# independent subspaces, one for each list/1-d array
# index.

# 1) Apply the slice indices at the time as the list/1-d
# array index that gives the smallest result.
# 1) Apply the slice indices at the same time as the
# list/1-d array index that gives the smallest result.

# Create an index that replaces each list/1-d array with
# slice(None)
Expand All @@ -447,7 +450,7 @@ def _index(self, index):
# Find the position of the list/1-d array index that gives
# the smallest result, and apply the subspace of slices
# and the chosen list/1-d array index. This will give the
# samllest memory footprint of the whole operation.
# smallest high-water memory mark of the whole operation.
shape1 = self.index_shape(index1, data.shape)
size1 = prod(shape1)
sizes = [
Expand Down Expand Up @@ -503,7 +506,7 @@ def _mask(self, data, dtype, attributes, dtype_unsigned_int):
:Returns:
`nump.ndarray`
`numpy.ndarray`
The masked data.
"""
Expand Down Expand Up @@ -802,7 +805,7 @@ def attributes(self):
**Examples**
>>> v.attributes()
>>> n.attributes()
{'standard_name': 'air_temperature',
'missing_value': -999.0}
Expand Down Expand Up @@ -833,46 +836,48 @@ def attributes(self):
def index_shape(cls, index, shape):
"""Return the shape of the array subspace implied by indices.
.. versionadded:: (cfdm) NEXTRELEASE
.. versionadded:: (cfdm) NEXTVERSION
:Parameters:
indices: `tuple`
index: `tuple`
The indices to be applied to an array with shape
*shape*.
shape: sequence of `ints`
shape: sequence of `int`
The shape of the array to be subspaced.
:Returns:
`list`
The shape of the subspace defined by the *indices*.
The shape of the subspace defined by the *index*.
**Examples**
>>> import numpy as np
>>> n.indices_shape((slice(2, 5), 4), (10, 20))
>>> n.index_shape((slice(2, 5), [4]), (10, 20))
[3, 1]
>>> n.indices_shape(([2, 3, 4], np.arange(1, 6)), (10, 20))
>>> n.index_shape((slice(2, 5), 4), (10, 20))
[3]
>>> n.index_shape(([2, 3, 4], np.arange(1, 6)), (10, 20))
[3, 5]
>>> n.indices_shape((slice(None), [True, False, True]), (10, 3))
>>> n.index_shape((slice(None), [True, False, True]), (10, 3))
[10, 2]
>>> index0 = np.arange(5)
>>> index0 = index0[index0 < 3]
>>> n.indices_shape((index0, []), (10, 20))
>>> n.index_shape((index0, []), (10, 20))
[3, 0]
>>> n.indices_shape((slice(1, 5, 3), 3), (10, 20))
[2, 1]
>>> n.indices_shape((slice(5, 1, -2), 3), (10, 20))
>>> n.index_shape((slice(1, 5, 3), [3]), (10, 20))
[2, 1]
>>> n.indices_shape((slice(5, 1, 3), 3), (10, 20))
[0, 1]
>>> n.indices_shape((slice(1, 5, -3), 3), (10, 20))
>>> n.index_shape((slice(5, 1, -2), 3), (10, 20))
[2]
>>> n.index_shape((slice(5, 1, 3), [3]), (10, 20))
[0, 1]
>>> n.index_shape((slice(1, 5, -3), 3), (10, 20))
[0]
"""
implied_shape = []
Expand Down
1 change: 1 addition & 0 deletions cfdm/read_write/netcdf/flatten/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,5 @@
of the License at http://www.apache.org/licenses/LICENSE-2.0.
"""

from .flatten import netcdf_flatten
1 change: 1 addition & 0 deletions cfdm/read_write/netcdf/flatten/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
.. versionadded:: (cfdm) NEXTVERSION
"""

from dataclasses import dataclass

# Maximum length of name after which it is replaced with its hash
Expand Down
12 changes: 6 additions & 6 deletions cfdm/read_write/netcdf/flatten/flatten.py
Original file line number Diff line number Diff line change
Expand Up @@ -741,9 +741,9 @@ def flatten_dimension(self, dim):
)

# Store new name in dict for resolving references later
self._dim_map[
self.pathname(self.group(dim), self.name(dim))
] = new_name
self._dim_map[self.pathname(self.group(dim), self.name(dim))] = (
new_name
)

# Add to name mapping attribute
self._dim_map_value.append(
Expand Down Expand Up @@ -822,9 +822,9 @@ def flatten_variable(self, var):
new_var.setncatts(attributes)

# Store new name in dict for resolving references later
self._var_map[
self.pathname(self.group(var), self.name(var))
] = new_name
self._var_map[self.pathname(self.group(var), self.name(var))] = (
new_name
)

# Add to name mapping attribute
self._var_map_value.append(
Expand Down
6 changes: 3 additions & 3 deletions cfdm/read_write/netcdf/netcdfread.py
Original file line number Diff line number Diff line change
Expand Up @@ -10307,9 +10307,9 @@ def _get_storage_options(self, filename, parsed_filename):
"endpoint_url" not in storage_options
and "endpoint_url" not in client_kwargs
):
storage_options[
"endpoint_url"
] = f"https://{parsed_filename.netloc}"
storage_options["endpoint_url"] = (
f"https://{parsed_filename.netloc}"
)

g["file_system_storage_options"].setdefault(filename, storage_options)

Expand Down
26 changes: 26 additions & 0 deletions cfdm/test/test_netcdf_indexer.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,32 @@ def test_netcdf_indexer_Ellipsis(self):
x = cfdm.netcdf_indexer(n)
self.assertTrue((x[...] == n).all())

def test_netcdf_indexer_index_shape(self):
"""Test netcdf_indexer shape."""
x = cfdm.netcdf_indexer
self.assertEqual(x.index_shape((slice(2, 5), [4]), (10, 20)), [3, 1])
self.assertEqual(x.index_shape((slice(2, 5), 4), (10, 20)), [3])
self.assertEqual(
x.index_shape(([2, 3, 4], np.arange(1, 6)), (10, 20)), [3, 5]
)

self.assertEqual(
x.index_shape((slice(None), [True, False, True]), (10, 3)), [10, 2]
)

index0 = np.arange(5)
index0 = index0[index0 < 3]
self.assertEqual(x.index_shape((index0, []), (10, 20)), [3, 0])

self.assertEqual(
x.index_shape((slice(1, 5, 3), [3]), (10, 20)), [2, 1]
)
self.assertEqual(x.index_shape((slice(5, 1, -2), 3), (10, 20)), [2])
self.assertEqual(
x.index_shape((slice(5, 1, 3), [3]), (10, 20)), [0, 1]
)
self.assertEqual(x.index_shape((slice(1, 5, -3), 3), (10, 20)), [0])


if __name__ == "__main__":
print("Run date:", datetime.datetime.now())
Expand Down
4 changes: 2 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,6 @@ packaging>=20.0
scipy>=1.10.0
h5netcdf>=1.3.0
h5py>=3.10.0
s3fs>=2024.3.0
dask>=2024.4.0
s3fs>=2024.6.0
dask>=2024.6.0

0 comments on commit b3a1b56

Please sign in to comment.