Propagating headers in array files to model input files (#566)

* Trying to provide better support for propagating headers in array like files through to model input files + bit of work around tpl files including header for direct array style pars + added too test to include direct add_pars and pp add_pars * docstring mods
pypest · Dec 19, 2024 · 34547de · 34547de
1 parent 0f69c6b
commit 34547de
Show file tree

Hide file tree

Showing 3 changed files with 96 additions and 48 deletions.
diff --git a/autotest/pst_from_tests.py b/autotest/pst_from_tests.py
@@ -5223,94 +5223,94 @@ def test_array_fmt(tmp_path):
         fp.write("       3.000      3.0000      03.000\n"
                  "         3.0      3.0000      03.000")
     # will be converted to Exp format -- only safe option
-    arr, fmt = _load_array_get_fmt(Path(tmp_path, "test.dat"))
+    arr, fmt, h = _load_array_get_fmt(Path(tmp_path, "test.dat"))
     assert fmt == ''.join([" %11.4F"] * 3)
     assert arr.sum(axis=1).sum() == 18
     # actually space delim but could be fixed (first col is 1 wider)
     with open(Path(tmp_path, "test.dat"), 'w') as fp:
         fp.write("3.000 3.00 03.0\n"
                  "  3.0  3.0  03.")
-    arr, fmt = _load_array_get_fmt(Path(tmp_path, "test.dat"))
+    arr, fmt, h = _load_array_get_fmt(Path(tmp_path, "test.dat"))
     assert fmt == ''.join([" %4.1F"] * 3)
     # actually space delim but could be fixed (first col is 1 wider)
     with open(Path(tmp_path, "test.dat"), 'w') as fp:
         fp.write(" 3.000000000        3.00        03.0\n"
                  "         3.0         3.0         03.")
-    arr, fmt = _load_array_get_fmt(Path(tmp_path, "test.dat"))
+    arr, fmt, h = _load_array_get_fmt(Path(tmp_path, "test.dat"))
     assert fmt == ''.join([" %11.8F"] * 3)
     assert arr.sum(axis=1).sum() == 18
     # true space delim option -- sep passed
     with open(Path(tmp_path, "test.dat"), 'w') as fp:
         fp.write("3.000 3.00000 03.000\n"
                  "3.0 3.0000 03.000")
-    arr, fmt = _load_array_get_fmt(Path(tmp_path, "test.dat"), sep=' ')
+    arr, fmt, h = _load_array_get_fmt(Path(tmp_path, "test.dat"), sep=' ')
     assert fmt == "%7.5F"
     assert arr.sum(axis=1).sum() == 18
     # true space delim option with sep None
     with open(Path(tmp_path, "test.dat"), 'w') as fp:
         fp.write("3.000 3.00000 03.000\n"
                  "3.0 3.0000 03.000")
-    arr, fmt = _load_array_get_fmt(Path(tmp_path, "test.dat"))
+    arr, fmt, h = _load_array_get_fmt(Path(tmp_path, "test.dat"))
     assert fmt == "%7.5F"
     assert arr.sum(axis=1).sum() == 18
     # comma delim option
     with open(Path(tmp_path, "test.dat"), 'w') as fp:
         fp.write("3.000, 3.00000, 03.000\n"
                  " 3.0, 3.0000,03.000")
-    arr, fmt = _load_array_get_fmt(Path(tmp_path, "test.dat"), sep=',')
+    arr, fmt, h = _load_array_get_fmt(Path(tmp_path, "test.dat"), sep=',')
     assert fmt == "%8.5F"
     assert arr.sum(axis=1).sum() == 18
     # partial sci note option (fixed format) but short
     with open(Path(tmp_path, "test.dat"), 'w') as fp:
         fp.write(" 00.3E01 30.0E-1   03.00\n"
                  "     3.0    3.00  03.000")
-    arr, fmt = _load_array_get_fmt(Path(tmp_path, "test.dat"))
+    arr, fmt, h = _load_array_get_fmt(Path(tmp_path, "test.dat"))
     assert fmt == ''.join([" %7.0E"] * 3)
     assert arr.sum(axis=1).sum() == 18
     try:
         # partial sci note option (fixed format) but short
         with open(Path(tmp_path, "test.dat"), 'w') as fp:
             fp.write(" 0.3E01 3.0E-1  03.00\n"
                      "    3.0   3.00 03.000")
-        arr, fmt = _load_array_get_fmt(Path(tmp_path, "test.dat"))
+        arr, fmt, h = _load_array_get_fmt(Path(tmp_path, "test.dat"))
     except ValueError:
         # should fail
         pass
     # sci note option fixed
     with open(Path(tmp_path, "test.dat"), 'w') as fp:
         fp.write("      3.0E00  30.0000E-1       03.00\n"
                  "         3.0        3.00      03.000")
-    arr, fmt = _load_array_get_fmt(Path(tmp_path, "test.dat"))
+    arr, fmt, h = _load_array_get_fmt(Path(tmp_path, "test.dat"))
     assert fmt == ''.join([" %11.4E"] * 3)
     assert arr.sum(axis=1).sum() == 18
     # free but not passing delim
     with open(Path(tmp_path, "test.dat"), 'w') as fp:
         fp.write(" 0.3E01   30.0E-1 03.00\n"
                  "3.0 3.00  03.000")
-    arr, fmt = _load_array_get_fmt(Path(tmp_path, "test.dat"),
+    arr, fmt, h = _load_array_get_fmt(Path(tmp_path, "test.dat"),
                                    fullfile=True)
     assert fmt == "%9.3G"
     assert arr.sum(axis=1).sum() == 18
 
     with open(Path(tmp_path, "test.dat"), 'w') as fp:
         fp.write(" 00.3E01,30.0E-1, 03.00\n"
                  "3.0, 3.00,03.000")
-    arr, fmt = _load_array_get_fmt(Path(tmp_path, "test.dat"),
+    arr, fmt, h = _load_array_get_fmt(Path(tmp_path, "test.dat"),
                                    fullfile=True, sep=',')
     assert fmt == "%8.3G"
     assert arr.sum(axis=1).sum() == 18
     # 1 col option
     with open(Path(tmp_path, "test.dat"), 'w') as fp:
         fp.write("3.0000000000\n30.000000E-1\n03.00000\n3.0\n3.00\n03.000")
-    arr, fmt = _load_array_get_fmt(Path(tmp_path, "test.dat"))
+    arr, fmt, h = _load_array_get_fmt(Path(tmp_path, "test.dat"))
     assert arr.shape == (6,1)
     assert fmt == "%12.10G"
     assert arr.sum(axis=1).sum() == 18
 
     shutil.copy(Path('utils','arrayskip', "AWC_subset.txt"), tmp_path)
-    arr, fmt = _load_array_get_fmt(Path(tmp_path, "AWC_subset.txt"), skip=6)
+    arr, fmt, h = _load_array_get_fmt(Path(tmp_path, "AWC_subset.txt"), skip=6)
 
-    arr, fmt = _load_array_get_fmt(Path(tmp_path, "AWC_subset.txt"), fullfile=True, skip=6)
+    arr, fmt, h = _load_array_get_fmt(Path(tmp_path, "AWC_subset.txt"), fullfile=True, skip=6)
 
 
 
@@ -6105,9 +6105,37 @@ def mf6_freyberg_ppu_hyperpars_thresh_invest(tmp_path):
 
 def arrayskip_test(tmp_path):
     from pathlib import Path
-    pf = pyemu.utils.PstFrom(Path('utils','arrayskip'), Path(tmp_path, "template"))
-    pf.add_parameters("AWC_subset.txt", 'grid', mfile_skip=6)
+    sr = pyemu.SpatialReference(delr=[1000]*81, delc=[1000]*57, xll=841955, yll=2208285)
+    pf = pyemu.utils.PstFrom(Path('utils','arrayskip'), Path(tmp_path, "template"),
+                             spatial_reference=sr)
+    shutil.copy(Path(pf.new_d, "AWC_subset.txt"), Path(pf.new_d, "AWC_subset_1.txt"))
+    pf.add_parameters(["AWC_subset.txt",  "AWC_subset_1.txt"], 'grid', mfile_skip=6)
+    shutil.copy(Path(pf.new_d, "AWC_subset.txt"), Path(pf.new_d, "d_AWC_subset.txt"))
+    pf.add_parameters("d_AWC_subset.txt", 'grid', par_style='d', mfile_skip=6)
     assert pf.par_dfs[0].shape[0] == 81*57
+    assert pf.par_dfs[1].shape[0] == 81 * 57
+
+    shutil.copy(Path(pf.new_d, "AWC_subset.txt"), Path(pf.new_d, "pp0_AWC_subset.txt"))
+    # shutil.copy(Path(pf.new_d, "AWC_subset.txt"), Path(pf.new_d, "ppd_AWC_subset.txt"))
+    pf.add_parameters("pp0_AWC_subset.txt", 'pp', mfile_skip=6,
+                      pp_options={'pp_space':4, 'try_use_ppu': True})
+    # pf.add_parameters("ppd_AWC_subset.txt", 'pp', mfile_skip=6, par_style='d',
+    #                   pp_options={'pp_space':4, 'try_use_ppu': True})
+
+    pst = pf.build_pst()
+
+    pars = pst.parameter_data
+    pars.loc[pars.pargp=='p_inst:0', 'parval1'] = 10
+    pars.loc[pars.pargp == 'p_inst:1', 'parval1'] *= 10
+    pars.loc[pars.pargp == 'p_inst:2', 'parval1'] = 10
+    check_apply(pf)
+    a0 = np.loadtxt(Path(pf.new_d, "AWC_subset.txt"), skiprows=6)
+    a1 = np.loadtxt(Path(pf.new_d, "AWC_subset_1.txt"), skiprows=6)
+    assert (a0 == a1).all()
+    a2 = np.loadtxt(Path(pf.new_d, "d_AWC_subset.txt"), skiprows=6)
+    assert (a1 == a2).all()
+    a3 = np.loadtxt(Path(pf.new_d, "pp0_AWC_subset.txt"), skiprows=6)
+    assert (a2 == a3).all()
     pass
 
 

diff --git a/pyemu/utils/helpers.py b/pyemu/utils/helpers.py
@@ -1819,14 +1819,21 @@ def _process_chunk_array_files(chunk, i, df):
 
 def _process_array_file(model_file, df):
     if "operator" not in df.columns:
-        df.loc[:, "operator"] = "m"
+        df["operator"] = "m"
     # find all mults that need to be applied to this array
     df_mf = df.loc[df.model_file == model_file, :]
     results = []
     org_file = df_mf.org_file.unique()
     if org_file.shape[0] != 1:
         raise Exception("wrong number of org_files for {0}".format(model_file))
-    org_arr = np.loadtxt(org_file[0], ndmin=2)
+    if "head_rows" not in df.columns:
+        skip = 0
+    else:
+        skip = df_mf.head_rows.values[0]
+    with open(org_file[0], 'r') as fp:
+         header = [fp.readline() for _ in range(skip)]
+    org_arr = np.loadtxt(org_file[0], ndmin=2, skiprows=skip)
+
 
     if "mlt_file" in df_mf.columns:
         for mlt, operator in zip(df_mf.mlt_file, df_mf.operator):
@@ -1881,7 +1888,9 @@ def _process_array_file(model_file, df):
         sep = df_mf.sep.fillna(' ').iloc[0]
     except AttributeError:
         sep = ' '
-    np.savetxt(model_file, np.atleast_2d(org_arr), fmt=fmt, delimiter=sep)
+    with open(model_file, 'w') as fp:
+        fp.writelines(header)
+        np.savetxt(fp, np.atleast_2d(org_arr), fmt=fmt, delimiter=sep)
 
 
 def apply_array_pars(arr_par="arr_pars.csv", arr_par_file=None, chunk_len=50):

diff --git a/pyemu/utils/pst_from.py b/pyemu/utils/pst_from.py
@@ -54,7 +54,7 @@ def _load_array_get_fmt(fname, sep=None, fullfile=False, skip=0, logger=None):
     if sep is None:  # need to split line with space and count multiple
         splitsep = ' '
     with open(fname, 'r') as fp:  # load file or line
-        _ = [fp.readline() for _ in range(skip)]
+        header = [fp.readline() for _ in range(skip)]
         if fullfile:
             lines = [line for line in fp.readlines()]
             arr = np.genfromtxt(lines, delimiter=sep, ndmin=2)
@@ -170,7 +170,7 @@ def _load_array_get_fmt(fname, sep=None, fullfile=False, skip=0, logger=None):
         else:
             typ = "F"
         fmt = f"%{width}.{prec}{typ}"
-    return arr, fmt
+    return arr, fmt, header
 
 
 class PstFrom(object):
@@ -1155,8 +1155,8 @@ def _par_prep(
                 if not dest_filepath.exists():
                     self.logger.lraise(f"par filename '{dest_filepath}' not found ")
                 # read array type input file
-                arr, infmt = _load_array_get_fmt(dest_filepath, sep=sep, skip=skip,
-                                                 logger=self.logger)
+                arr, infmt, storehead = _load_array_get_fmt(dest_filepath, sep=sep, skip=skip,
+                                                            logger=self.logger)
                 # arr = np.loadtxt(dest_filepath, delimiter=sep, ndmin=2)
                 self.logger.log(f"loading array {dest_filepath}")
                 self.logger.statement(
@@ -1165,7 +1165,9 @@ def _par_prep(
                 # save copy of input file to `org` dir
                 # make any subfolders if they don't exist
                 # this will be python auto precision
-                np.savetxt(self.original_file_d / rel_filepath.name, arr)
+                with open(self.original_file_d / rel_filepath.name, 'w') as fp:
+                    fp.writelines(storehead)
+                    np.savetxt(fp, arr)
                 file_dict[rel_filepath] = arr
                 fmt_dict[rel_filepath] = infmt
                 sep_dict[rel_filepath] = sep
@@ -2363,13 +2365,14 @@ def add_parameters(
                     tpl_filename=tpl_filename,
                     suffix="",
                     par_type=par_type,
+                    data_array=file_dict[filenames[0]],
                     zone_array=zone_array,
-                    shape=shp,
                     get_xy=self.get_xy,
                     fill_value=initial_value if initial_value is not None else 1.0,
                     gpname=pargp,
                     input_filename=in_fileabs,
                     par_style=par_style,
+                    headerlines=headerlines
                 )
                 self.logger.log(
                     "writing template file"
@@ -3956,17 +3959,18 @@ def _get_tpl_or_ins_df(
 
 
 def write_array_tpl(
-    name,
-    tpl_filename,
-    suffix,
-    par_type,
-    zone_array=None,
-    gpname=None,
-    shape=None,
-    fill_value=1.0,
-    get_xy=None,
-    input_filename=None,
-    par_style="m",
+        name,
+        tpl_filename,
+        suffix,
+        par_type,
+        data_array=None, # todo reintroduce shape tuple flexibility
+        zone_array=None,
+        gpname=None,
+        fill_value=1.0,
+        get_xy=None,
+        input_filename=None,
+        par_style="m",
+        headerlines=None
 ):
     """
     write a template file for a 2D array.
@@ -3976,10 +3980,10 @@ def write_array_tpl(
         tpl_filename (`str`): the template file to write - include path
         suffix (`str`): suffix to append to par names
         par_type (`str`): type of parameter
+        data_array (`numpy.ndarray`): original data array
         zone_array (`numpy.ndarray`): an array used to skip inactive cells. Values less than 1 are
             not parameterized and are assigned a value of fill_value. Default is None.
         gpname (`str`): pargp filed in dataframe
-        shape (`tuple`): dimensions of array to write
         fill_value:
         get_xy:
         input_filename:
@@ -3992,34 +3996,39 @@ def write_array_tpl(
         This function is called by `PstFrom` programmatically
 
     """
-
-    if shape is None and zone_array is None:
+    if headerlines is None:
+        headerlines = []
+    if data_array is None and zone_array is None:
         raise Exception(
-            "write_array_tpl() error: must pass either zone_array " "or shape"
+            "write_array_tpl() error: must pass either zone_array " "or data_array"
         )
-    elif shape is not None and zone_array is not None:
-        if shape != zone_array.shape:
-            raise Exception(
-                "write_array_tpl() error: passed "
-                "shape {0} != zone_array.shape {1}".format(shape, zone_array.shape)
-            )
-    elif shape is None:
+    elif data_array is not None:
+        shape = data_array.shape
+        if zone_array is not None:
+            if data_array.shape != zone_array.shape:
+                raise Exception(
+                    "write_array_tpl() error: passed "
+                    "shape {0} != zone_array.shape {1}".format(data_array.shape, zone_array.shape)
+                )
+    else:
         shape = zone_array.shape
+
     if len(shape) != 2:
         raise Exception(
             "write_array_tpl() error: shape '{0}' not 2D" "".format(str(shape))
         )
 
     par_style = par_style.lower()
     if par_style == "d":
+        assert data_array is not None
         if not os.path.exists(input_filename):
             raise Exception(
                 "write_array_tpl() error: couldn't find input file "
                 + " {0}, which is required for 'direct' par_style".format(
                     input_filename
                 )
             )
-        org_arr = np.loadtxt(input_filename, ndmin=2)
+        org_arr = data_array
         if par_type == "grid":
             pass
         elif par_type == "constant":
@@ -4089,6 +4098,8 @@ def grid_namer(i, j):
     xx, yy, ii, jj = [], [], [], []
     with open(tpl_filename, "w") as f:
         f.write("ptf ~\n")
+        if par_style == 'd':
+            f.writelines(headerlines)
         for i in range(shape[0]):
             for j in range(shape[1]):
                 if zone_array is not None and zone_array[i, j] < 1: