From cb012dd8d542047ac17197db41eaa61cd11a20e0 Mon Sep 17 00:00:00 2001 From: fraserwg Date: Tue, 23 Nov 2021 14:59:00 +0000 Subject: [PATCH] some bug fixes --- xmitgcm/mds_store.py | 18 +++++++++++++---- xmitgcm/utils.py | 47 ++++++++++++++++++++++++++------------------ 2 files changed, 42 insertions(+), 23 deletions(-) diff --git a/xmitgcm/mds_store.py b/xmitgcm/mds_store.py index c01bd5d..8b2e1b2 100644 --- a/xmitgcm/mds_store.py +++ b/xmitgcm/mds_store.py @@ -198,7 +198,7 @@ def open_mdsdataset(data_dir, grid_dir=None, # We either have a single iter, in which case we create a fresh store, # or a list of iters, in which case we combine. if iters == 'all': - iters = _get_all_iternums(data_dir, file_prefixes=prefix) + iters = _get_all_iternums(data_dir, file_prefixes=prefix, tiled=tiled) if iters is None: iternum = None else: @@ -996,13 +996,23 @@ def _concat_dicts(list_of_dicts): return result -def _get_all_iternums(data_dir, file_prefixes=None, - file_format='*.??????????.data'): +def _get_all_iternums(data_dir, file_prefixes=None, tiled=False, file_format=None): """Scan a directory for all iteration number suffixes.""" + if not tiled and file_format == None: + file_format = '*.??????????.data' + + elif tiled and file_format == None: + file_format = '*.??????????.???.???.data' + iternums = set() all_datafiles = listdir_fnmatch(data_dir, file_format) + istart = file_format.find('?')-len(file_format) - iend = file_format.rfind('?')-len(file_format)+1 + if not tiled: + iend = file_format.rfind('?')-len(file_format) + 1 + else: + iend = file_format.rfind('?')-len(file_format) - 7 + for f in all_datafiles: iternum = int(f[istart:iend]) prefix = os.path.split(f[:istart-1])[-1] diff --git a/xmitgcm/utils.py b/xmitgcm/utils.py index 5764884..38fe4a4 100644 --- a/xmitgcm/utils.py +++ b/xmitgcm/utils.py @@ -510,26 +510,28 @@ def read_tiled_mds(fname, iternum=None, use_mmap=None, endian='>', shape=None, _get_useful_info_from_meta_file(metafile, tiled=True) dtype = dtype.newbyteorder(endian) except IOError: + raise IOError("Cannotfind the shape associated to %s in the \ + metadata." % fname) # we can recover from not having a .meta file if dtype and shape have # been specified already - if tile_shape is None: - raise IOError("Cannot find the shape associated to %s in the \ - metadata." % fname) - elif dtype is None: - raise IOError("Cannot find the dtype associated to %s in the \ - metadata, please specify the default dtype to \ - avoid this error." % fname) - else: - # add time dimensions - domain_shape = (1,) + domain_shape - domain_shape = list(domain_shape) - - tile_shape = (1,) + tile_shape - tile_shape = list(tile_shape) - name = os.path.basename(fname) - - metadata = {'basename': name, 'domain_shape': domain_shape, - 'tile_shape': tile_shape} + #if tile_shape is None: + # raise IOError("Cannot find the shape associated to %s in the \ + # metadata." % fname) + #elif dtype is None: + # raise IOError("Cannot find the dtype associated to %s in the \ + # metadata, please specify the default dtype to \ + # avoid this error." % fname) + #else: + # # add time dimensions + # domain_shape = (1,) + domain_shape + # domain_shape = list(domain_shape) + + # tile_shape = (1,) + tile_shape + # tile_shape = list(tile_shape) + # name = os.path.basename(fname) + + # metadata = {'basename': name, 'domain_shape': domain_shape, + # 'tile_shape': tile_shape} # figure out dimensions ndims = len(domain_shape)-1 @@ -1035,6 +1037,9 @@ def read_all_variables(variable_list, file_metadata, use_mmap=False, described by file_metadata """ + if tiled and chunks == "2D": + raise NotImplementedError("2D chunking is not supported for tiled datasets") + out = [] for variable in variable_list: @@ -1215,7 +1220,9 @@ def read_3D_chunks(variable, file_metadata, use_mmap=False, use_dask=False, tile or numpy.ndarray or memmap, depending on input args """ - + if tiled and use_mmap: + raise NotImplementedError( + "tiled data cannot be read using numpy.memmap") def load_chunk(rec): return _read_xyz_chunk(variable, file_metadata, rec=rec, @@ -1272,6 +1279,8 @@ def _read_xyz_chunk(variable, file_metadata, rec=0, use_mmap=False, tiled=False) ------- numpy array or memmap """ + if tiled and use_mmap: + raise NotImplementedError("tiled data cannot be read using numpy.memmap") if file_metadata['has_faces'] and ((file_metadata['nx'] > 1) or (file_metadata['ny'] > 1)):