From a1e8bc88ec591942ed6ef18bf09e9364ff2c1063 Mon Sep 17 00:00:00 2001 From: David Hassell Date: Thu, 20 Jun 2024 14:45:44 +0100 Subject: [PATCH] dev --- cfdm/data/h5netcdfarray.py | 9 ++-- cfdm/data/mixin/filearraymixin.py | 51 ++++-------------- cfdm/data/mixin/netcdffilemixin.py | 40 -------------- cfdm/read_write/netcdf/netcdfread.py | 45 +++++++--------- cfdm/read_write/netcdf/netcdfwrite.py | 4 +- cfdm/read_write/read.py | 69 ++++++++++++------------ docs/source/class/cfdm.H5netcdfArray.rst | 53 +++++++++--------- docs/source/class/cfdm.NetCDF4Array.rst | 61 +++++++++++---------- docs/source/class/cfdm.NetCDFIndexer.rst | 6 +-- 9 files changed, 127 insertions(+), 211 deletions(-) diff --git a/cfdm/data/h5netcdfarray.py b/cfdm/data/h5netcdfarray.py index ba5beae82..825693065 100644 --- a/cfdm/data/h5netcdfarray.py +++ b/cfdm/data/h5netcdfarray.py @@ -204,16 +204,13 @@ def _set_attributes(self, var): :Returns: - `dict` - The attributes. + `None` """ - attributes = self._get_component("attributes", None) - if attributes is not None: + if self._get_component("attributes", None) is not None: return - attributes = dict(var.attrs) - self._set_component("attributes", attributes, copy=False) + self._set_component("attributes", dict(var.attrs), copy=False) def close(self, dataset): """Close the dataset containing the data. diff --git a/cfdm/data/mixin/filearraymixin.py b/cfdm/data/mixin/filearraymixin.py index b5b40d7ab..be440e5de 100644 --- a/cfdm/data/mixin/filearraymixin.py +++ b/cfdm/data/mixin/filearraymixin.py @@ -111,35 +111,6 @@ def get_addresses(self): """ return self._get_component("address", ()) - # def get_attributes(self, default=ValueError()): - # """The attributes of the array. - # - # .. versionadded:: (cfdm) NEXTVERSION - # - # :Parameters: - # - # default: optional - # Return the value of the *default* parameter if the - # attributes have not been set. If set to an `Exception` - # instance then it will be raised instead. - # - # :Returns: - # - # The attributes. - # - # """ - # attributes = self._get_component("attributes", None) - # if attributes is None: - # if default is None: - # return - # - # return self._default( - # default, - # f"{self.__class__.__name__} attributes have not yet been set", - # ) - # - # return deepcopy(attributes) - def get_filename(self, default=AttributeError()): """The name of the file containing the array. @@ -248,20 +219,20 @@ def get_storage_options( create_endpoint_url: `bool`, optional If True, the default, then create an - ``'endpoint_url'`` if and only if one has not already - been provided. See *filename* and *parsed_filename* - for details. + ``'endpoint_url'`` option if and only if one has not + already been provided. See *filename* and + *parsed_filename* for details. filename: `str`, optional - Used to set the ``'endpoint_url'`` key if it has not - been previously defined. Ignored if *parse_filename* - has been set. + Used to set the ``'endpoint_url'`` option if it has + not been previously defined. Ignored if + *parse_filename* has been set. parsed_filename: `urllib.parse.ParseResult`, optional - Used to set the ``'endpoint_url'`` key if it has not - been previously defined. By default the - ``'endpoint_url'`` key, if required, is set from the - file name returned by `get_filename`. + Used to set the ``'endpoint_url'`` option if it has + not been previously defined. By default the + ``'endpoint_url'`` optiona, if required, is set from + the file name returned by `get_filename`. :Returns: @@ -339,7 +310,7 @@ def open(self, func, *args, **kwargs): :Returns: - `tuple` + 2-`tuple` The file object for the dataset, and the address of the data within the file. diff --git a/cfdm/data/mixin/netcdffilemixin.py b/cfdm/data/mixin/netcdffilemixin.py index 0cd75886a..8a53be200 100644 --- a/cfdm/data/mixin/netcdffilemixin.py +++ b/cfdm/data/mixin/netcdffilemixin.py @@ -66,46 +66,6 @@ def _set_attributes(self, var): f"Must implement {self.__class__.__name__}._set_attributes" ) # pragma: no cover - # def _set_units(self, var): - # """The units and calendar properties. - # - # These are set from the netCDF variable attributes, but only if - # they have already not been defined, either during {{class}} - # instantiation or by a previous call to `_set_units`. - # - # .. versionadded:: (cfdm) 1.10.0.1 - # - # :Parameters: - # - # var: `netCDF4.Variable` or `h5netcdf.Variable` - # The variable containing the units and calendar - # definitions. - # - # :Returns: - # - # `tuple` - # The units and calendar values, either of which may be - # `None`. - # - # """ - # # We assume that an attributes dictionary exists - # attributes = self._get_component("attributes") - # - # # Note: Can't use None as the default since it is a valid - # # `units` or 'calendar' value that indicates that the - # # attribute has not been set in the dataset. - # units = self._get_component("units", False) - # if units is False: - # self._set_component("units", attributes.get("units"), copy=False) - # - # calendar = self._get_component("calendar", False) - # if calendar is False: - # self._set_component( - # "calendar", attributes.get("calendar"), copy=False - # ) - # - # return units, calendar - @property def array(self): """Return an independent numpy array containing the data. diff --git a/cfdm/read_write/netcdf/netcdfread.py b/cfdm/read_write/netcdf/netcdfread.py index 3a4e32a3d..77e2fc792 100644 --- a/cfdm/read_write/netcdf/netcdfread.py +++ b/cfdm/read_write/netcdf/netcdfread.py @@ -629,6 +629,12 @@ def _open_netCDF4(self, filename): def _open_h5netcdf(self, filename): """Return an open `h5netcdf.File`. + Uses values of the ``rdcc_nbytes``, ``rdcc_w0``, and + ``rdcc_nslots`` parameters to `h5netcdf.File` that correspond + to the default values of the `netCDF4.set_chunk_cache` + parameters ``size``, ``nelems``, and ``preemption``, + respectively. + .. versionadded:: (cfdm) NEXTVERSION :Parameters: @@ -641,7 +647,14 @@ def _open_h5netcdf(self, filename): `h5netcdf.File` """ - return h5netcdf.File(filename, "r", decode_vlen_strings=True) + return h5netcdf.File( + filename, + "r", + decode_vlen_strings=True, + rdcc_nbytes=16777216, + rdcc_w0=0.75, + rdcc_nslots=4133, + ) @classmethod def cdl_to_netcdf(cls, filename): @@ -650,7 +663,7 @@ def cdl_to_netcdf(cls, filename): :Parameters: filename: `str` - The name sdef _netof the CDL file. + The name of the CDL file. :Returns: @@ -1076,13 +1089,13 @@ def read( # -------------------------------------------------------- # S3 # -------------------------------------------------------- - # + # Input file system storage options "storage_options": storage_options, - # - "file_systems": {}, - # + # File system storage options for each file "file_system_storage_options": {}, - # + # Cached s3fs.S3FileSystem objects + "file_systems": {}, + # Cache of open s3fs.File objects "s3fs_File_objects": [], } @@ -1166,26 +1179,12 @@ def read( # 'global_attributes' dictionary # ---------------------------------------------------------------- global_attributes = {} - # for attr in map(str,nc.ncattrs()): for attr, value in self._file_global_attributes(nc).items(): attr = str(attr) if isinstance(value, bytes): value = value.decode(errors="ignore") global_attributes[attr] = value - # print (attr, value, type(value)) - - # var - # try: - # if isinstance(value, str): - # try: - # global_attributes[attr] = str(value) - # except UnicodeEncodeError: - # global_attributes[attr] = value.encode(errors="ignore") - # else: - # global_attributes[attr] = value.decode('utf-8') - # except UnicodeDecodeError: - # pass g["global_attributes"] = global_attributes if debug: @@ -1397,7 +1396,6 @@ def read( variable_grouped_dataset[ncvar] = g["nc_grouped"] variable_attributes[ncvar] = {} - # for attr in map(str, variable.ncattrs()): for attr, value in self._file_variable_attributes( variable ).items(): @@ -1495,7 +1493,6 @@ def read( # The netCDF dimensions of the parent file internal_dimension_sizes = {} - # for name, dimension in nc.dimensions.items(): for name, dimension in self._file_dimensions(nc).items(): if ( has_groups @@ -2309,8 +2306,6 @@ def _get_variables_from_external_files(self, netcdf_external_variables): # Remove this ncvar from the set of external variables external_variables.remove(ncvar) - # TODO h5netcdf S3: include s3 vars here? - def _parse_compression_gathered(self, ncvar, compress): """Parse a list variable for compressing arrays by gathering.""" g = self.read_vars diff --git a/cfdm/read_write/netcdf/netcdfwrite.py b/cfdm/read_write/netcdf/netcdfwrite.py index deed75606..02519a309 100644 --- a/cfdm/read_write/netcdf/netcdfwrite.py +++ b/cfdm/read_write/netcdf/netcdfwrite.py @@ -2661,8 +2661,7 @@ def _write_netcdf_variable( if g["dry_run"]: return - # print (ncvar, repr(cfvar.properties())) - # logger.info(f" Writing {cfvar!r}") # pragma: no cover + logger.info(f" Writing {cfvar!r}") # pragma: no cover # Set 'construct_type' if not construct_type: @@ -4460,7 +4459,6 @@ def file_open(self, filename, mode, fmt, fields): os.remove(filename) try: - # nc.set_chunk_cache(16*1024*1024) # 16MiB chunkcache nc = netCDF4.Dataset(filename, mode, format=fmt) except RuntimeError as error: raise RuntimeError(f"{error}: {filename}") diff --git a/cfdm/read_write/read.py b/cfdm/read_write/read.py index 714a9e6c9..e700c91c3 100644 --- a/cfdm/read_write/read.py +++ b/cfdm/read_write/read.py @@ -292,42 +292,39 @@ def read( options are interpreted depends on the location of the file: - **Local File System** - - Storage options are ignored for local files. - - **HTTP(S)** - - Storage options are ignored for files available across the - network via OPeNDAP. - - **S3-compatible services** - - The backend used is `s3fs`, and the storage options are - used to initialise an `s3fs.S3FileSystem` file system - object. By default, or if `None`, then *storage_options* - is taken as ``{}``. - - If the ``'endpoint_url'`` key is not in *storage_options*, - nor in a dictionary defined by the ``'client_kwargs'`` key - (both of which are the case when *storage_options* is - `None`), then one will be automatically inserted for - accessing an S3 file. For example, for a file name of - ``'s3://store/data/file.nc'``, an ``'endpoint_url'`` key - with value ``'https://store'`` would be created. To - disable this, set ``'endpoint_url'`` to `None`. - - *Parameter example:* - For a file name of ``'s3://store/data/file.nc'``, the - following are equivalent: ``None``, ``{}``, - ``{'endpoint_url': 'https://store'}``, and - ``{'client_kwargs': {'endpoint_url': 'https://store'}}`` - - *Parameter example:* - ``{'key: 'scaleway-api-key...', 'secret': - 'scaleway-secretkey...', 'endpoint_url': - 'https://s3.fr-par.scw.cloud', 'client_kwargs': - {'region_name': 'fr-par'}}`` + * **Local File System**: Storage options are ignored for + local files. + + * **HTTP(S)**: Storage options are ignored for files + available across the network via OPeNDAP. + + * **S3-compatible services**: The backend used is `s3fs`, + and the storage options are used to initialise an + `s3fs.S3FileSystem` file system object. By default, or + if `None`, then *storage_options* is taken as ``{}``. + + If the ``'endpoint_url'`` key is not in + *storage_options*, nor in a dictionary defined by the + ``'client_kwargs'`` key (both of which are the case when + *storage_options* is `None`), then one will be + automatically inserted for accessing an S3 file. For + instance, with a file name of + ``'s3://store/data/file.nc'``, an ``'endpoint_url'`` key + with value ``'https://store'`` would be created. To + disable this, set the ``'endpoint_url'`` key to `None`. + + *Parameter example:* + For a file name of ``'s3://store/data/file.nc'``, the + following are equivalent: ``None``, ``{}``, + ``{'endpoint_url': 'https://store'}``, and + ``{'client_kwargs': {'endpoint_url': + 'https://store'}}`` + + *Parameter example:* + ``{'key: 'scaleway-api-key...', 'secret': + 'scaleway-secretkey...', 'endpoint_url': + 'https://s3.fr-par.scw.cloud', 'client_kwargs': + {'region_name': 'fr-par'}}`` .. versionadded:: (cfdm) NEXTVERSION diff --git a/docs/source/class/cfdm.H5netcdfArray.rst b/docs/source/class/cfdm.H5netcdfArray.rst index 506fc3c3b..1576d7594 100644 --- a/docs/source/class/cfdm.H5netcdfArray.rst +++ b/docs/source/class/cfdm.H5netcdfArray.rst @@ -24,14 +24,13 @@ Inspection ~cfdm.H5netcdfArray.get_subspace ~cfdm.H5netcdfArray.get_attributes - .. rubric:: Attributes .. autosummary:: :nosignatures: :toctree: ../attribute/ :template: attribute.rst - + ~cfdm.H5netcdfArray.array ~cfdm.H5netcdfArray.dtype ~cfdm.H5netcdfArray.ndim @@ -47,20 +46,20 @@ Units :nosignatures: :toctree: ../method/ :template: method.rst - + ~cfdm.H5netcdfArray.get_calendar ~cfdm.H5netcdfArray.get_units - + File ---- - + .. rubric:: Methods .. autosummary:: :nosignatures: :toctree: ../method/ :template: method.rst - + ~cfdm.H5netcdfArray.get_address ~cfdm.H5netcdfArray.get_addresses ~cfdm.H5netcdfArray.close @@ -73,7 +72,7 @@ File ~cfdm.H5netcdfArray.get_mask ~cfdm.H5netcdfArray.get_unpack ~cfdm.H5netcdfArray.get_storage_options - + Miscellaneous ------------- @@ -81,10 +80,10 @@ Miscellaneous :nosignatures: :toctree: ../method/ :template: method.rst - + ~cfdm.H5netcdfArray.copy ~cfdm.H5netcdfArray.to_memory - + Special ------- @@ -92,32 +91,32 @@ Special :nosignatures: :toctree: ../method/ :template: method.rst - + ~cfdm.H5netcdfArray.__getitem__ Docstring substitutions ----------------------- - + .. rubric:: Methods - + .. autosummary:: - :nosignatures: - :toctree: ../method/ - :template: method.rst - + :nosignatures: + :toctree: ../method/ + :template: method.rst + ~cfdm.H5netcdfArray._docstring_special_substitutions - ~cfdm.H5netcdfArray._docstring_substitutions - ~cfdm.H5netcdfArray._docstring_package_depth - ~cfdm.H5netcdfArray._docstring_method_exclusions + ~cfdm.H5netcdfArray._docstring_substitutions + ~cfdm.H5netcdfArray._docstring_package_depth + ~cfdm.H5netcdfArray._docstring_method_exclusions Deprecated ---------- - -.. rubric:: Methods - -.. autosummary:: - :nosignatures: - :toctree: ../method/ - :template: method.rst - + +.. rubric:: Methods + +.. autosummary:: + :nosignatures: + :toctree: ../method/ + :template: method.rst + ~cfdm.H5netcdfArray.get_missing_values diff --git a/docs/source/class/cfdm.NetCDF4Array.rst b/docs/source/class/cfdm.NetCDF4Array.rst index 0b2e22668..d087ab192 100644 --- a/docs/source/class/cfdm.NetCDF4Array.rst +++ b/docs/source/class/cfdm.NetCDF4Array.rst @@ -19,19 +19,18 @@ Inspection :nosignatures: :toctree: ../method/ :template: method.rst - + ~cfdm.NetCDF4Array.get_compression_type ~cfdm.NetCDF4Array.get_subspace ~cfdm.NetCDF4Array.get_attributes - .. rubric:: Attributes .. autosummary:: :nosignatures: :toctree: ../attribute/ :template: attribute.rst - + ~cfdm.NetCDF4Array.array ~cfdm.NetCDF4Array.dtype ~cfdm.NetCDF4Array.ndim @@ -47,20 +46,20 @@ Units :nosignatures: :toctree: ../method/ :template: method.rst - + ~cfdm.NetCDF4Array.get_calendar ~cfdm.NetCDF4Array.get_units - + File ---- - + .. rubric:: Methods .. autosummary:: :nosignatures: :toctree: ../method/ :template: method.rst - + ~cfdm.NetCDF4Array.get_address ~cfdm.NetCDF4Array.get_addresses ~cfdm.NetCDF4Array.close @@ -73,7 +72,7 @@ File ~cfdm.NetCDF4Array.get_mask ~cfdm.NetCDF4Array.get_unpack ~cfdm.NetCDF4Array.get_storage_options - + Miscellaneous ------------- @@ -81,10 +80,10 @@ Miscellaneous :nosignatures: :toctree: ../method/ :template: method.rst - + ~cfdm.NetCDF4Array.copy ~cfdm.NetCDF4Array.to_memory - + Special ------- @@ -92,32 +91,32 @@ Special :nosignatures: :toctree: ../method/ :template: method.rst - + ~cfdm.NetCDF4Array.__getitem__ Docstring substitutions ------------------------ - -.. rubric:: Methods - -.. autosummary:: - :nosignatures: - :toctree: ../method/ - :template: method.rst - +----------------------- + +.. rubric:: Methods + +.. autosummary:: + :nosignatures: + :toctree: ../method/ + :template: method.rst + ~cfdm.NetCDF4Array._docstring_special_substitutions - ~cfdm.NetCDF4Array._docstring_substitutions - ~cfdm.NetCDF4Array._docstring_package_depth - ~cfdm.NetCDF4Array._docstring_method_exclusions + ~cfdm.NetCDF4Array._docstring_substitutions + ~cfdm.NetCDF4Array._docstring_package_depth + ~cfdm.NetCDF4Array._docstring_method_exclusions Deprecated ---------- - -.. rubric:: Methods - -.. autosummary:: - :nosignatures: - :toctree: ../method/ - :template: method.rst - + +.. rubric:: Methods + +.. autosummary:: + :nosignatures: + :toctree: ../method/ + :template: method.rst + ~cfdm.NetCDF4Array.get_missing_values diff --git a/docs/source/class/cfdm.NetCDFIndexer.rst b/docs/source/class/cfdm.NetCDFIndexer.rst index 5e236b0f8..6dd64d263 100644 --- a/docs/source/class/cfdm.NetCDFIndexer.rst +++ b/docs/source/class/cfdm.NetCDFIndexer.rst @@ -19,11 +19,11 @@ Inspection :nosignatures: :toctree: ../method/ :template: method.rst - + ~cfdm.NetCDFIndexer.attributes .. rubric:: Attributes - + .. autosummary:: :nosignatures: :toctree: ../attribute/ @@ -38,5 +38,5 @@ Special :nosignatures: :toctree: ../method/ :template: method.rst - + ~cfdm.NetCDFIndexer.__getitem__