From 34547de6dd086ea254981e8fe6fb777c8d011dd2 Mon Sep 17 00:00:00 2001 From: Brioch Hemmings Date: Fri, 20 Dec 2024 07:41:56 +1300 Subject: [PATCH] Propagating headers in array files to model input files (#566) * Trying to provide better support for propagating headers in array like files through to model input files + bit of work around tpl files including header for direct array style pars + added too test to include direct add_pars and pp add_pars * docstring mods --- autotest/pst_from_tests.py | 60 ++++++++++++++++++++++++--------- pyemu/utils/helpers.py | 15 +++++++-- pyemu/utils/pst_from.py | 69 ++++++++++++++++++++++---------------- 3 files changed, 96 insertions(+), 48 deletions(-) diff --git a/autotest/pst_from_tests.py b/autotest/pst_from_tests.py index e0002374..cd35e9bc 100644 --- a/autotest/pst_from_tests.py +++ b/autotest/pst_from_tests.py @@ -5223,48 +5223,48 @@ def test_array_fmt(tmp_path): fp.write(" 3.000 3.0000 03.000\n" " 3.0 3.0000 03.000") # will be converted to Exp format -- only safe option - arr, fmt = _load_array_get_fmt(Path(tmp_path, "test.dat")) + arr, fmt, h = _load_array_get_fmt(Path(tmp_path, "test.dat")) assert fmt == ''.join([" %11.4F"] * 3) assert arr.sum(axis=1).sum() == 18 # actually space delim but could be fixed (first col is 1 wider) with open(Path(tmp_path, "test.dat"), 'w') as fp: fp.write("3.000 3.00 03.0\n" " 3.0 3.0 03.") - arr, fmt = _load_array_get_fmt(Path(tmp_path, "test.dat")) + arr, fmt, h = _load_array_get_fmt(Path(tmp_path, "test.dat")) assert fmt == ''.join([" %4.1F"] * 3) # actually space delim but could be fixed (first col is 1 wider) with open(Path(tmp_path, "test.dat"), 'w') as fp: fp.write(" 3.000000000 3.00 03.0\n" " 3.0 3.0 03.") - arr, fmt = _load_array_get_fmt(Path(tmp_path, "test.dat")) + arr, fmt, h = _load_array_get_fmt(Path(tmp_path, "test.dat")) assert fmt == ''.join([" %11.8F"] * 3) assert arr.sum(axis=1).sum() == 18 # true space delim option -- sep passed with open(Path(tmp_path, "test.dat"), 'w') as fp: fp.write("3.000 3.00000 03.000\n" "3.0 3.0000 03.000") - arr, fmt = _load_array_get_fmt(Path(tmp_path, "test.dat"), sep=' ') + arr, fmt, h = _load_array_get_fmt(Path(tmp_path, "test.dat"), sep=' ') assert fmt == "%7.5F" assert arr.sum(axis=1).sum() == 18 # true space delim option with sep None with open(Path(tmp_path, "test.dat"), 'w') as fp: fp.write("3.000 3.00000 03.000\n" "3.0 3.0000 03.000") - arr, fmt = _load_array_get_fmt(Path(tmp_path, "test.dat")) + arr, fmt, h = _load_array_get_fmt(Path(tmp_path, "test.dat")) assert fmt == "%7.5F" assert arr.sum(axis=1).sum() == 18 # comma delim option with open(Path(tmp_path, "test.dat"), 'w') as fp: fp.write("3.000, 3.00000, 03.000\n" " 3.0, 3.0000,03.000") - arr, fmt = _load_array_get_fmt(Path(tmp_path, "test.dat"), sep=',') + arr, fmt, h = _load_array_get_fmt(Path(tmp_path, "test.dat"), sep=',') assert fmt == "%8.5F" assert arr.sum(axis=1).sum() == 18 # partial sci note option (fixed format) but short with open(Path(tmp_path, "test.dat"), 'w') as fp: fp.write(" 00.3E01 30.0E-1 03.00\n" " 3.0 3.00 03.000") - arr, fmt = _load_array_get_fmt(Path(tmp_path, "test.dat")) + arr, fmt, h = _load_array_get_fmt(Path(tmp_path, "test.dat")) assert fmt == ''.join([" %7.0E"] * 3) assert arr.sum(axis=1).sum() == 18 try: @@ -5272,7 +5272,7 @@ def test_array_fmt(tmp_path): with open(Path(tmp_path, "test.dat"), 'w') as fp: fp.write(" 0.3E01 3.0E-1 03.00\n" " 3.0 3.00 03.000") - arr, fmt = _load_array_get_fmt(Path(tmp_path, "test.dat")) + arr, fmt, h = _load_array_get_fmt(Path(tmp_path, "test.dat")) except ValueError: # should fail pass @@ -5280,14 +5280,14 @@ def test_array_fmt(tmp_path): with open(Path(tmp_path, "test.dat"), 'w') as fp: fp.write(" 3.0E00 30.0000E-1 03.00\n" " 3.0 3.00 03.000") - arr, fmt = _load_array_get_fmt(Path(tmp_path, "test.dat")) + arr, fmt, h = _load_array_get_fmt(Path(tmp_path, "test.dat")) assert fmt == ''.join([" %11.4E"] * 3) assert arr.sum(axis=1).sum() == 18 # free but not passing delim with open(Path(tmp_path, "test.dat"), 'w') as fp: fp.write(" 0.3E01 30.0E-1 03.00\n" "3.0 3.00 03.000") - arr, fmt = _load_array_get_fmt(Path(tmp_path, "test.dat"), + arr, fmt, h = _load_array_get_fmt(Path(tmp_path, "test.dat"), fullfile=True) assert fmt == "%9.3G" assert arr.sum(axis=1).sum() == 18 @@ -5295,22 +5295,22 @@ def test_array_fmt(tmp_path): with open(Path(tmp_path, "test.dat"), 'w') as fp: fp.write(" 00.3E01,30.0E-1, 03.00\n" "3.0, 3.00,03.000") - arr, fmt = _load_array_get_fmt(Path(tmp_path, "test.dat"), + arr, fmt, h = _load_array_get_fmt(Path(tmp_path, "test.dat"), fullfile=True, sep=',') assert fmt == "%8.3G" assert arr.sum(axis=1).sum() == 18 # 1 col option with open(Path(tmp_path, "test.dat"), 'w') as fp: fp.write("3.0000000000\n30.000000E-1\n03.00000\n3.0\n3.00\n03.000") - arr, fmt = _load_array_get_fmt(Path(tmp_path, "test.dat")) + arr, fmt, h = _load_array_get_fmt(Path(tmp_path, "test.dat")) assert arr.shape == (6,1) assert fmt == "%12.10G" assert arr.sum(axis=1).sum() == 18 shutil.copy(Path('utils','arrayskip', "AWC_subset.txt"), tmp_path) - arr, fmt = _load_array_get_fmt(Path(tmp_path, "AWC_subset.txt"), skip=6) + arr, fmt, h = _load_array_get_fmt(Path(tmp_path, "AWC_subset.txt"), skip=6) - arr, fmt = _load_array_get_fmt(Path(tmp_path, "AWC_subset.txt"), fullfile=True, skip=6) + arr, fmt, h = _load_array_get_fmt(Path(tmp_path, "AWC_subset.txt"), fullfile=True, skip=6) @@ -6105,9 +6105,37 @@ def mf6_freyberg_ppu_hyperpars_thresh_invest(tmp_path): def arrayskip_test(tmp_path): from pathlib import Path - pf = pyemu.utils.PstFrom(Path('utils','arrayskip'), Path(tmp_path, "template")) - pf.add_parameters("AWC_subset.txt", 'grid', mfile_skip=6) + sr = pyemu.SpatialReference(delr=[1000]*81, delc=[1000]*57, xll=841955, yll=2208285) + pf = pyemu.utils.PstFrom(Path('utils','arrayskip'), Path(tmp_path, "template"), + spatial_reference=sr) + shutil.copy(Path(pf.new_d, "AWC_subset.txt"), Path(pf.new_d, "AWC_subset_1.txt")) + pf.add_parameters(["AWC_subset.txt", "AWC_subset_1.txt"], 'grid', mfile_skip=6) + shutil.copy(Path(pf.new_d, "AWC_subset.txt"), Path(pf.new_d, "d_AWC_subset.txt")) + pf.add_parameters("d_AWC_subset.txt", 'grid', par_style='d', mfile_skip=6) assert pf.par_dfs[0].shape[0] == 81*57 + assert pf.par_dfs[1].shape[0] == 81 * 57 + + shutil.copy(Path(pf.new_d, "AWC_subset.txt"), Path(pf.new_d, "pp0_AWC_subset.txt")) + # shutil.copy(Path(pf.new_d, "AWC_subset.txt"), Path(pf.new_d, "ppd_AWC_subset.txt")) + pf.add_parameters("pp0_AWC_subset.txt", 'pp', mfile_skip=6, + pp_options={'pp_space':4, 'try_use_ppu': True}) + # pf.add_parameters("ppd_AWC_subset.txt", 'pp', mfile_skip=6, par_style='d', + # pp_options={'pp_space':4, 'try_use_ppu': True}) + + pst = pf.build_pst() + + pars = pst.parameter_data + pars.loc[pars.pargp=='p_inst:0', 'parval1'] = 10 + pars.loc[pars.pargp == 'p_inst:1', 'parval1'] *= 10 + pars.loc[pars.pargp == 'p_inst:2', 'parval1'] = 10 + check_apply(pf) + a0 = np.loadtxt(Path(pf.new_d, "AWC_subset.txt"), skiprows=6) + a1 = np.loadtxt(Path(pf.new_d, "AWC_subset_1.txt"), skiprows=6) + assert (a0 == a1).all() + a2 = np.loadtxt(Path(pf.new_d, "d_AWC_subset.txt"), skiprows=6) + assert (a1 == a2).all() + a3 = np.loadtxt(Path(pf.new_d, "pp0_AWC_subset.txt"), skiprows=6) + assert (a2 == a3).all() pass diff --git a/pyemu/utils/helpers.py b/pyemu/utils/helpers.py index d60886fc..59d5b776 100644 --- a/pyemu/utils/helpers.py +++ b/pyemu/utils/helpers.py @@ -1819,14 +1819,21 @@ def _process_chunk_array_files(chunk, i, df): def _process_array_file(model_file, df): if "operator" not in df.columns: - df.loc[:, "operator"] = "m" + df["operator"] = "m" # find all mults that need to be applied to this array df_mf = df.loc[df.model_file == model_file, :] results = [] org_file = df_mf.org_file.unique() if org_file.shape[0] != 1: raise Exception("wrong number of org_files for {0}".format(model_file)) - org_arr = np.loadtxt(org_file[0], ndmin=2) + if "head_rows" not in df.columns: + skip = 0 + else: + skip = df_mf.head_rows.values[0] + with open(org_file[0], 'r') as fp: + header = [fp.readline() for _ in range(skip)] + org_arr = np.loadtxt(org_file[0], ndmin=2, skiprows=skip) + if "mlt_file" in df_mf.columns: for mlt, operator in zip(df_mf.mlt_file, df_mf.operator): @@ -1881,7 +1888,9 @@ def _process_array_file(model_file, df): sep = df_mf.sep.fillna(' ').iloc[0] except AttributeError: sep = ' ' - np.savetxt(model_file, np.atleast_2d(org_arr), fmt=fmt, delimiter=sep) + with open(model_file, 'w') as fp: + fp.writelines(header) + np.savetxt(fp, np.atleast_2d(org_arr), fmt=fmt, delimiter=sep) def apply_array_pars(arr_par="arr_pars.csv", arr_par_file=None, chunk_len=50): diff --git a/pyemu/utils/pst_from.py b/pyemu/utils/pst_from.py index a1baac5a..852e8d7f 100644 --- a/pyemu/utils/pst_from.py +++ b/pyemu/utils/pst_from.py @@ -54,7 +54,7 @@ def _load_array_get_fmt(fname, sep=None, fullfile=False, skip=0, logger=None): if sep is None: # need to split line with space and count multiple splitsep = ' ' with open(fname, 'r') as fp: # load file or line - _ = [fp.readline() for _ in range(skip)] + header = [fp.readline() for _ in range(skip)] if fullfile: lines = [line for line in fp.readlines()] arr = np.genfromtxt(lines, delimiter=sep, ndmin=2) @@ -170,7 +170,7 @@ def _load_array_get_fmt(fname, sep=None, fullfile=False, skip=0, logger=None): else: typ = "F" fmt = f"%{width}.{prec}{typ}" - return arr, fmt + return arr, fmt, header class PstFrom(object): @@ -1155,8 +1155,8 @@ def _par_prep( if not dest_filepath.exists(): self.logger.lraise(f"par filename '{dest_filepath}' not found ") # read array type input file - arr, infmt = _load_array_get_fmt(dest_filepath, sep=sep, skip=skip, - logger=self.logger) + arr, infmt, storehead = _load_array_get_fmt(dest_filepath, sep=sep, skip=skip, + logger=self.logger) # arr = np.loadtxt(dest_filepath, delimiter=sep, ndmin=2) self.logger.log(f"loading array {dest_filepath}") self.logger.statement( @@ -1165,7 +1165,9 @@ def _par_prep( # save copy of input file to `org` dir # make any subfolders if they don't exist # this will be python auto precision - np.savetxt(self.original_file_d / rel_filepath.name, arr) + with open(self.original_file_d / rel_filepath.name, 'w') as fp: + fp.writelines(storehead) + np.savetxt(fp, arr) file_dict[rel_filepath] = arr fmt_dict[rel_filepath] = infmt sep_dict[rel_filepath] = sep @@ -2363,13 +2365,14 @@ def add_parameters( tpl_filename=tpl_filename, suffix="", par_type=par_type, + data_array=file_dict[filenames[0]], zone_array=zone_array, - shape=shp, get_xy=self.get_xy, fill_value=initial_value if initial_value is not None else 1.0, gpname=pargp, input_filename=in_fileabs, par_style=par_style, + headerlines=headerlines ) self.logger.log( "writing template file" @@ -3956,17 +3959,18 @@ def _get_tpl_or_ins_df( def write_array_tpl( - name, - tpl_filename, - suffix, - par_type, - zone_array=None, - gpname=None, - shape=None, - fill_value=1.0, - get_xy=None, - input_filename=None, - par_style="m", + name, + tpl_filename, + suffix, + par_type, + data_array=None, # todo reintroduce shape tuple flexibility + zone_array=None, + gpname=None, + fill_value=1.0, + get_xy=None, + input_filename=None, + par_style="m", + headerlines=None ): """ write a template file for a 2D array. @@ -3976,10 +3980,10 @@ def write_array_tpl( tpl_filename (`str`): the template file to write - include path suffix (`str`): suffix to append to par names par_type (`str`): type of parameter + data_array (`numpy.ndarray`): original data array zone_array (`numpy.ndarray`): an array used to skip inactive cells. Values less than 1 are not parameterized and are assigned a value of fill_value. Default is None. gpname (`str`): pargp filed in dataframe - shape (`tuple`): dimensions of array to write fill_value: get_xy: input_filename: @@ -3992,19 +3996,23 @@ def write_array_tpl( This function is called by `PstFrom` programmatically """ - - if shape is None and zone_array is None: + if headerlines is None: + headerlines = [] + if data_array is None and zone_array is None: raise Exception( - "write_array_tpl() error: must pass either zone_array " "or shape" + "write_array_tpl() error: must pass either zone_array " "or data_array" ) - elif shape is not None and zone_array is not None: - if shape != zone_array.shape: - raise Exception( - "write_array_tpl() error: passed " - "shape {0} != zone_array.shape {1}".format(shape, zone_array.shape) - ) - elif shape is None: + elif data_array is not None: + shape = data_array.shape + if zone_array is not None: + if data_array.shape != zone_array.shape: + raise Exception( + "write_array_tpl() error: passed " + "shape {0} != zone_array.shape {1}".format(data_array.shape, zone_array.shape) + ) + else: shape = zone_array.shape + if len(shape) != 2: raise Exception( "write_array_tpl() error: shape '{0}' not 2D" "".format(str(shape)) @@ -4012,6 +4020,7 @@ def write_array_tpl( par_style = par_style.lower() if par_style == "d": + assert data_array is not None if not os.path.exists(input_filename): raise Exception( "write_array_tpl() error: couldn't find input file " @@ -4019,7 +4028,7 @@ def write_array_tpl( input_filename ) ) - org_arr = np.loadtxt(input_filename, ndmin=2) + org_arr = data_array if par_type == "grid": pass elif par_type == "constant": @@ -4089,6 +4098,8 @@ def grid_namer(i, j): xx, yy, ii, jj = [], [], [], [] with open(tpl_filename, "w") as f: f.write("ptf ~\n") + if par_style == 'd': + f.writelines(headerlines) for i in range(shape[0]): for j in range(shape[1]): if zone_array is not None and zone_array[i, j] < 1: