dev

NCAS-CMS · Jun 20, 2024 · b3a1b56 · b3a1b56
1 parent 3703495
commit b3a1b56
Show file tree

Hide file tree

Showing 10 changed files with 98 additions and 65 deletions.
diff --git a/Changelog.rst b/Changelog.rst
@@ -19,8 +19,8 @@ Version NEXTVERSION
 * New class `cfdm.NetCDFIndexer`
 * New dependency: ``h5netcdf>=1.3.0``
 * New dependency: ``h5py>=3.10.0``
-* New dependency: ``s3fs>=2024.3.0``
-* New dependency: ``dask>=2024.4.1``
+* New dependency: ``s3fs>=2024.6.0``
+* New dependency: ``dask>=2024.6.0``
 * Removed dependency: ``netcdf_flattener``
 
 ----

diff --git a/cfdm/__init__.py b/cfdm/__init__.py
@@ -117,7 +117,7 @@
 except ImportError as error1:
     raise ImportError(_error0 + str(error1))
 
-_minimum_vn = "2024.3.0"
+_minimum_vn = "2024.6.0"
 if Version(s3fs.__version__) < Version(_minimum_vn):
     raise ValueError(
         f"Bad s3fs version: cfdm requires s3fs>={_minimum_vn}. "
@@ -143,7 +143,7 @@
 except ImportError as error1:
     raise ImportError(_error0 + str(error1))
 
-_minimum_vn = "2024.4.0"
+_minimum_vn = "2024.6.0"
 if Version(dask.__version__) < Version(_minimum_vn):
     raise ValueError(
         f"Bad scipy version: cfdm requires dask>={_minimum_vn}. "

diff --git a/cfdm/data/mixin/filearraymixin.py b/cfdm/data/mixin/filearraymixin.py
@@ -314,9 +314,9 @@ def get_storage_options(
 
             if parsed_filename is not None and parsed_filename.scheme == "s3":
                 # Derive endpoint_url from filename
-                storage_options[
-                    "endpoint_url"
-                ] = f"https://{parsed_filename.netloc}"
+                storage_options["endpoint_url"] = (
+                    f"https://{parsed_filename.netloc}"
+                )
 
         return storage_options
 

diff --git a/cfdm/data/netcdfindexer.py b/cfdm/data/netcdfindexer.py
@@ -1,7 +1,7 @@
 """A data indexer that applies netCDF masking and unpacking.
 
-Portions of this code were adapted from the `netCDF4` library, which
-carries the following MIT License:
+Portions of this code were adapted from the `netCDF4` Python library,
+which carries the following MIT License:
 
 Copyright 2008 Jeffrey Whitaker
 
@@ -19,6 +19,7 @@
 included in all copies or substantial portions of the Software.
 
 """
+
 import logging
 from math import prod
 from numbers import Integral
@@ -38,22 +39,18 @@ class netcdf_indexer:
     means that the index for each dimension is applied independently,
     regardless of how that index was defined. For instance, the
     indices ``[[0, 1], [1, 3], 0]`` and ``[:2, 1:4:2, 0]`` will give
-    identical results. This behaviour is different to that of
-    `numpy`. Non-orthogonal indexing means that normal `numpy`
-    indexing rules are applied.
-
-    During indexing, masking and unpacking is applied according to the
-    netCDF conventions, either or both of which may be disabled via
-    initialisation options.
+    identical results. Orthogonal indexing is different to the
+    indexing behaviour of `numpy`. Non-orthogonal indexing means that
+    normal `numpy` indexing rules are applied.
 
     In addition, string and character variables are always converted
     to unicode arrays, the latter with the last dimension
     concatenated.
 
-    Masking and unpacking operations are defined by the conventions
-    for netCDF attributes, which are either provided as part of the
-    input *data* object, or given with the input *attributes*
-    parameter.
+    Masking and unpacking operations, either or both may be disabled
+    via initialisation options, are defined by the conventions for
+    netCDF attributes, which are either provided as part of the input
+    *variable* object, or given with the input *attributes* parameter.
 
     The relevant netCDF attributes that are considered are:
 
@@ -129,7 +126,7 @@ def __init__(
             variable:
                 The variable to be indexed. May be any variable that
                 has the same API as one of `numpy.ndarray`,
-                `netCDF4.Variable` or `h5py.Variable` (which includes
+                `netCDF4.Variable`, or `h5py.Variable` (which includes
                 `h5netcdf.Variable`). Any masking and unpacking that
                 could be applied by *variable* itself (e.g. by a
                 `netCDF4.Variable` instance) is disabled, ensuring
@@ -174,15 +171,20 @@ def __init__(
                 relevant to masking and unpacking are considered, with
                 all other attributes being ignored. If *attributes* is
                 `None`, the default, then the netCDF attributes stored
-                by *variable* itself (if any) are used. If
-                *attributes* is not `None`, then any netCDF attributes
-                stored by *variable* itself are ignored.
+                by *variable* (if any) are used. If *attributes* is
+                not `None`, then any netCDF attributes stored by
+                *variable* are ignored.
 
             copy: `bool`, optional
-                If True then return a copy of the subspace that is not
-                a view of part of the the original data. If False, the
-                default, then the returned subspace could be either a
-                copy or a view.
+                If True then return a `numpy` array that is not a view
+                of part of the the original data, i.e. in-place
+                changes to the returned subspace will not affect the
+                original *variable*. This is done by returning an
+                in-memory copy the subspace. If False, the default, no
+                in-memory copy is done, and then whether or not
+                in-place changes to the returned subspace affect
+                *variable* will depend on how subspacing is
+                implemented by *variable*`.
 
         """
         self.variable = variable
@@ -196,7 +198,7 @@ def __init__(
     def __getitem__(self, index):
         """Return a subspace of the variable as a `numpy` array.
 
-        v.__getitem__(index) <==> v[index]
+        n.__getitem__(index) <==> v[index]
 
         If `__orthogonal_indexing__` is True then indexing is
         orthogonal.  If `__orthogonal_indexing__` is False then normal
@@ -414,9 +416,9 @@ def _index(self, index):
         # Still here? Then do orthogonal indexing.
         # ------------------------------------------------------------
 
-        # Create an index that replaces integer indices with size 1
-        # slices, so that their axes are not dropped yet (they will be
-        # dropeed later).
+        # Create an index that replaces integers with size 1 slices,
+        # so that their axes are not dropped yet (they will be dropped
+        # later).
         index0 = [
             slice(i, i + 1) if isinstance(i, Integral) else i for i in index
         ]
@@ -426,17 +428,18 @@ def _index(self, index):
             # variable natively supports orthogonal indexing.
             #
             # Note: `netCDF4.Variable` natively supports orthogonal
-            #       indexing; but `numpy.ndarray`, `h5netcdf.File` and
-            #       `h5py.File` do not.
+            #       indexing; but `h5netcdf.File`, `h5py.File`, and
+            #       `numpy.ndarray`, do not.
             data = data[tuple(index0)]
         else:
             # There are two or more list/1-d array indices, and the
             # variable does not natively support orthogonal indexing
             # => emulate orthogonal indexing with a sequence of
-            # subspaces, one for each list/1-d array index.
+            # independent subspaces, one for each list/1-d array
+            # index.
 
-            # 1) Apply the slice indices at the time as the list/1-d
-            #    array index that gives the smallest result.
+            # 1) Apply the slice indices at the same time as the
+            #    list/1-d array index that gives the smallest result.
 
             # Create an index that replaces each list/1-d array with
             # slice(None)
@@ -447,7 +450,7 @@ def _index(self, index):
             # Find the position of the list/1-d array index that gives
             # the smallest result, and apply the subspace of slices
             # and the chosen list/1-d array index. This will give the
-            # samllest memory footprint of the whole operation.
+            # smallest high-water memory mark of the whole operation.
             shape1 = self.index_shape(index1, data.shape)
             size1 = prod(shape1)
             sizes = [
@@ -503,7 +506,7 @@ def _mask(self, data, dtype, attributes, dtype_unsigned_int):
 
         :Returns:
 
-            `nump.ndarray`
+            `numpy.ndarray`
                 The masked data.
 
         """
@@ -802,7 +805,7 @@ def attributes(self):
 
         **Examples**
 
-        >>> v.attributes()
+        >>> n.attributes()
         {'standard_name': 'air_temperature',
          'missing_value': -999.0}
 
@@ -833,46 +836,48 @@ def attributes(self):
     def index_shape(cls, index, shape):
         """Return the shape of the array subspace implied by indices.
 
-        .. versionadded:: (cfdm) NEXTRELEASE
+        .. versionadded:: (cfdm) NEXTVERSION
 
         :Parameters:
 
-            indices: `tuple`
+            index: `tuple`
                 The indices to be applied to an array with shape
                 *shape*.
 
-            shape: sequence of `ints`
+            shape: sequence of `int`
                 The shape of the array to be subspaced.
 
         :Returns:
 
             `list`
-                The shape of the subspace defined by the *indices*.
+                The shape of the subspace defined by the *index*.
 
         **Examples**
 
         >>> import numpy as np
-        >>> n.indices_shape((slice(2, 5), 4), (10, 20))
+        >>> n.index_shape((slice(2, 5), [4]), (10, 20))
         [3, 1]
-        >>> n.indices_shape(([2, 3, 4], np.arange(1, 6)), (10, 20))
+        >>> n.index_shape((slice(2, 5), 4), (10, 20))
+        [3]
+        >>> n.index_shape(([2, 3, 4], np.arange(1, 6)), (10, 20))
         [3, 5]
 
-        >>> n.indices_shape((slice(None), [True, False, True]), (10, 3))
+        >>> n.index_shape((slice(None), [True, False, True]), (10, 3))
         [10, 2]
 
         >>> index0 = np.arange(5)
         >>> index0 = index0[index0 < 3]
-        >>> n.indices_shape((index0, []), (10, 20))
+        >>> n.index_shape((index0, []), (10, 20))
         [3, 0]
 
-        >>> n.indices_shape((slice(1, 5, 3), 3), (10, 20))
-        [2, 1]
-        >>> n.indices_shape((slice(5, 1, -2), 3), (10, 20))
+        >>> n.index_shape((slice(1, 5, 3), [3]), (10, 20))
         [2, 1]
-        >>> n.indices_shape((slice(5, 1, 3), 3), (10, 20))
-        [0, 1]
-        >>> n.indices_shape((slice(1, 5, -3), 3), (10, 20))
+        >>> n.index_shape((slice(5, 1, -2), 3), (10, 20))
+        [2]
+        >>> n.index_shape((slice(5, 1, 3), [3]), (10, 20))
         [0, 1]
+        >>> n.index_shape((slice(1, 5, -3), 3), (10, 20))
+        [0]
 
         """
         implied_shape = []

diff --git a/cfdm/read_write/netcdf/flatten/__init__.py b/cfdm/read_write/netcdf/flatten/__init__.py
@@ -12,4 +12,5 @@
 of the License at http://www.apache.org/licenses/LICENSE-2.0.
 
 """
+
 from .flatten import netcdf_flatten
diff --git a/cfdm/read_write/netcdf/flatten/config.py b/cfdm/read_write/netcdf/flatten/config.py
@@ -3,6 +3,7 @@
 .. versionadded:: (cfdm) NEXTVERSION
 
 """
+
 from dataclasses import dataclass
 
 # Maximum length of name after which it is replaced with its hash

diff --git a/cfdm/read_write/netcdf/flatten/flatten.py b/cfdm/read_write/netcdf/flatten/flatten.py
@@ -741,9 +741,9 @@ def flatten_dimension(self, dim):
         )
 
         # Store new name in dict for resolving references later
-        self._dim_map[
-            self.pathname(self.group(dim), self.name(dim))
-        ] = new_name
+        self._dim_map[self.pathname(self.group(dim), self.name(dim))] = (
+            new_name
+        )
 
         # Add to name mapping attribute
         self._dim_map_value.append(
@@ -822,9 +822,9 @@ def flatten_variable(self, var):
         new_var.setncatts(attributes)
 
         # Store new name in dict for resolving references later
-        self._var_map[
-            self.pathname(self.group(var), self.name(var))
-        ] = new_name
+        self._var_map[self.pathname(self.group(var), self.name(var))] = (
+            new_name
+        )
 
         # Add to name mapping attribute
         self._var_map_value.append(

diff --git a/cfdm/read_write/netcdf/netcdfread.py b/cfdm/read_write/netcdf/netcdfread.py
@@ -10307,9 +10307,9 @@ def _get_storage_options(self, filename, parsed_filename):
             "endpoint_url" not in storage_options
             and "endpoint_url" not in client_kwargs
         ):
-            storage_options[
-                "endpoint_url"
-            ] = f"https://{parsed_filename.netloc}"
+            storage_options["endpoint_url"] = (
+                f"https://{parsed_filename.netloc}"
+            )
 
         g["file_system_storage_options"].setdefault(filename, storage_options)
 

diff --git a/cfdm/test/test_netcdf_indexer.py b/cfdm/test/test_netcdf_indexer.py
@@ -194,6 +194,32 @@ def test_netcdf_indexer_Ellipsis(self):
         x = cfdm.netcdf_indexer(n)
         self.assertTrue((x[...] == n).all())
 
+    def test_netcdf_indexer_index_shape(self):
+        """Test netcdf_indexer shape."""
+        x = cfdm.netcdf_indexer
+        self.assertEqual(x.index_shape((slice(2, 5), [4]), (10, 20)), [3, 1])
+        self.assertEqual(x.index_shape((slice(2, 5), 4), (10, 20)), [3])
+        self.assertEqual(
+            x.index_shape(([2, 3, 4], np.arange(1, 6)), (10, 20)), [3, 5]
+        )
+
+        self.assertEqual(
+            x.index_shape((slice(None), [True, False, True]), (10, 3)), [10, 2]
+        )
+
+        index0 = np.arange(5)
+        index0 = index0[index0 < 3]
+        self.assertEqual(x.index_shape((index0, []), (10, 20)), [3, 0])
+
+        self.assertEqual(
+            x.index_shape((slice(1, 5, 3), [3]), (10, 20)), [2, 1]
+        )
+        self.assertEqual(x.index_shape((slice(5, 1, -2), 3), (10, 20)), [2])
+        self.assertEqual(
+            x.index_shape((slice(5, 1, 3), [3]), (10, 20)), [0, 1]
+        )
+        self.assertEqual(x.index_shape((slice(1, 5, -3), 3), (10, 20)), [0])
+
 
 if __name__ == "__main__":
     print("Run date:", datetime.datetime.now())

diff --git a/requirements.txt b/requirements.txt
@@ -5,6 +5,6 @@ packaging>=20.0
 scipy>=1.10.0
 h5netcdf>=1.3.0
 h5py>=3.10.0
-s3fs>=2024.3.0
-dask>=2024.4.0
+s3fs>=2024.6.0
+dask>=2024.6.0