Skip to content

Commit

Permalink
Propagating headers in array files to model input files (#566)
Browse files Browse the repository at this point in the history
* Trying to provide better support for propagating headers in array like files through to model input files

+ bit of work around tpl files including header for direct array style pars
+ added too test to include direct add_pars and pp add_pars

* docstring mods
  • Loading branch information
briochh authored Dec 19, 2024
1 parent 0f69c6b commit 34547de
Show file tree
Hide file tree
Showing 3 changed files with 96 additions and 48 deletions.
60 changes: 44 additions & 16 deletions autotest/pst_from_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -5223,94 +5223,94 @@ def test_array_fmt(tmp_path):
fp.write(" 3.000 3.0000 03.000\n"
" 3.0 3.0000 03.000")
# will be converted to Exp format -- only safe option
arr, fmt = _load_array_get_fmt(Path(tmp_path, "test.dat"))
arr, fmt, h = _load_array_get_fmt(Path(tmp_path, "test.dat"))
assert fmt == ''.join([" %11.4F"] * 3)
assert arr.sum(axis=1).sum() == 18
# actually space delim but could be fixed (first col is 1 wider)
with open(Path(tmp_path, "test.dat"), 'w') as fp:
fp.write("3.000 3.00 03.0\n"
" 3.0 3.0 03.")
arr, fmt = _load_array_get_fmt(Path(tmp_path, "test.dat"))
arr, fmt, h = _load_array_get_fmt(Path(tmp_path, "test.dat"))
assert fmt == ''.join([" %4.1F"] * 3)
# actually space delim but could be fixed (first col is 1 wider)
with open(Path(tmp_path, "test.dat"), 'w') as fp:
fp.write(" 3.000000000 3.00 03.0\n"
" 3.0 3.0 03.")
arr, fmt = _load_array_get_fmt(Path(tmp_path, "test.dat"))
arr, fmt, h = _load_array_get_fmt(Path(tmp_path, "test.dat"))
assert fmt == ''.join([" %11.8F"] * 3)
assert arr.sum(axis=1).sum() == 18
# true space delim option -- sep passed
with open(Path(tmp_path, "test.dat"), 'w') as fp:
fp.write("3.000 3.00000 03.000\n"
"3.0 3.0000 03.000")
arr, fmt = _load_array_get_fmt(Path(tmp_path, "test.dat"), sep=' ')
arr, fmt, h = _load_array_get_fmt(Path(tmp_path, "test.dat"), sep=' ')
assert fmt == "%7.5F"
assert arr.sum(axis=1).sum() == 18
# true space delim option with sep None
with open(Path(tmp_path, "test.dat"), 'w') as fp:
fp.write("3.000 3.00000 03.000\n"
"3.0 3.0000 03.000")
arr, fmt = _load_array_get_fmt(Path(tmp_path, "test.dat"))
arr, fmt, h = _load_array_get_fmt(Path(tmp_path, "test.dat"))
assert fmt == "%7.5F"
assert arr.sum(axis=1).sum() == 18
# comma delim option
with open(Path(tmp_path, "test.dat"), 'w') as fp:
fp.write("3.000, 3.00000, 03.000\n"
" 3.0, 3.0000,03.000")
arr, fmt = _load_array_get_fmt(Path(tmp_path, "test.dat"), sep=',')
arr, fmt, h = _load_array_get_fmt(Path(tmp_path, "test.dat"), sep=',')
assert fmt == "%8.5F"
assert arr.sum(axis=1).sum() == 18
# partial sci note option (fixed format) but short
with open(Path(tmp_path, "test.dat"), 'w') as fp:
fp.write(" 00.3E01 30.0E-1 03.00\n"
" 3.0 3.00 03.000")
arr, fmt = _load_array_get_fmt(Path(tmp_path, "test.dat"))
arr, fmt, h = _load_array_get_fmt(Path(tmp_path, "test.dat"))
assert fmt == ''.join([" %7.0E"] * 3)
assert arr.sum(axis=1).sum() == 18
try:
# partial sci note option (fixed format) but short
with open(Path(tmp_path, "test.dat"), 'w') as fp:
fp.write(" 0.3E01 3.0E-1 03.00\n"
" 3.0 3.00 03.000")
arr, fmt = _load_array_get_fmt(Path(tmp_path, "test.dat"))
arr, fmt, h = _load_array_get_fmt(Path(tmp_path, "test.dat"))
except ValueError:
# should fail
pass
# sci note option fixed
with open(Path(tmp_path, "test.dat"), 'w') as fp:
fp.write(" 3.0E00 30.0000E-1 03.00\n"
" 3.0 3.00 03.000")
arr, fmt = _load_array_get_fmt(Path(tmp_path, "test.dat"))
arr, fmt, h = _load_array_get_fmt(Path(tmp_path, "test.dat"))
assert fmt == ''.join([" %11.4E"] * 3)
assert arr.sum(axis=1).sum() == 18
# free but not passing delim
with open(Path(tmp_path, "test.dat"), 'w') as fp:
fp.write(" 0.3E01 30.0E-1 03.00\n"
"3.0 3.00 03.000")
arr, fmt = _load_array_get_fmt(Path(tmp_path, "test.dat"),
arr, fmt, h = _load_array_get_fmt(Path(tmp_path, "test.dat"),
fullfile=True)
assert fmt == "%9.3G"
assert arr.sum(axis=1).sum() == 18

with open(Path(tmp_path, "test.dat"), 'w') as fp:
fp.write(" 00.3E01,30.0E-1, 03.00\n"
"3.0, 3.00,03.000")
arr, fmt = _load_array_get_fmt(Path(tmp_path, "test.dat"),
arr, fmt, h = _load_array_get_fmt(Path(tmp_path, "test.dat"),
fullfile=True, sep=',')
assert fmt == "%8.3G"
assert arr.sum(axis=1).sum() == 18
# 1 col option
with open(Path(tmp_path, "test.dat"), 'w') as fp:
fp.write("3.0000000000\n30.000000E-1\n03.00000\n3.0\n3.00\n03.000")
arr, fmt = _load_array_get_fmt(Path(tmp_path, "test.dat"))
arr, fmt, h = _load_array_get_fmt(Path(tmp_path, "test.dat"))
assert arr.shape == (6,1)
assert fmt == "%12.10G"
assert arr.sum(axis=1).sum() == 18

shutil.copy(Path('utils','arrayskip', "AWC_subset.txt"), tmp_path)
arr, fmt = _load_array_get_fmt(Path(tmp_path, "AWC_subset.txt"), skip=6)
arr, fmt, h = _load_array_get_fmt(Path(tmp_path, "AWC_subset.txt"), skip=6)

arr, fmt = _load_array_get_fmt(Path(tmp_path, "AWC_subset.txt"), fullfile=True, skip=6)
arr, fmt, h = _load_array_get_fmt(Path(tmp_path, "AWC_subset.txt"), fullfile=True, skip=6)



Expand Down Expand Up @@ -6105,9 +6105,37 @@ def mf6_freyberg_ppu_hyperpars_thresh_invest(tmp_path):

def arrayskip_test(tmp_path):
from pathlib import Path
pf = pyemu.utils.PstFrom(Path('utils','arrayskip'), Path(tmp_path, "template"))
pf.add_parameters("AWC_subset.txt", 'grid', mfile_skip=6)
sr = pyemu.SpatialReference(delr=[1000]*81, delc=[1000]*57, xll=841955, yll=2208285)
pf = pyemu.utils.PstFrom(Path('utils','arrayskip'), Path(tmp_path, "template"),
spatial_reference=sr)
shutil.copy(Path(pf.new_d, "AWC_subset.txt"), Path(pf.new_d, "AWC_subset_1.txt"))
pf.add_parameters(["AWC_subset.txt", "AWC_subset_1.txt"], 'grid', mfile_skip=6)
shutil.copy(Path(pf.new_d, "AWC_subset.txt"), Path(pf.new_d, "d_AWC_subset.txt"))
pf.add_parameters("d_AWC_subset.txt", 'grid', par_style='d', mfile_skip=6)
assert pf.par_dfs[0].shape[0] == 81*57
assert pf.par_dfs[1].shape[0] == 81 * 57

shutil.copy(Path(pf.new_d, "AWC_subset.txt"), Path(pf.new_d, "pp0_AWC_subset.txt"))
# shutil.copy(Path(pf.new_d, "AWC_subset.txt"), Path(pf.new_d, "ppd_AWC_subset.txt"))
pf.add_parameters("pp0_AWC_subset.txt", 'pp', mfile_skip=6,
pp_options={'pp_space':4, 'try_use_ppu': True})
# pf.add_parameters("ppd_AWC_subset.txt", 'pp', mfile_skip=6, par_style='d',
# pp_options={'pp_space':4, 'try_use_ppu': True})

pst = pf.build_pst()

pars = pst.parameter_data
pars.loc[pars.pargp=='p_inst:0', 'parval1'] = 10
pars.loc[pars.pargp == 'p_inst:1', 'parval1'] *= 10
pars.loc[pars.pargp == 'p_inst:2', 'parval1'] = 10
check_apply(pf)
a0 = np.loadtxt(Path(pf.new_d, "AWC_subset.txt"), skiprows=6)
a1 = np.loadtxt(Path(pf.new_d, "AWC_subset_1.txt"), skiprows=6)
assert (a0 == a1).all()
a2 = np.loadtxt(Path(pf.new_d, "d_AWC_subset.txt"), skiprows=6)
assert (a1 == a2).all()
a3 = np.loadtxt(Path(pf.new_d, "pp0_AWC_subset.txt"), skiprows=6)
assert (a2 == a3).all()
pass


Expand Down
15 changes: 12 additions & 3 deletions pyemu/utils/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1819,14 +1819,21 @@ def _process_chunk_array_files(chunk, i, df):

def _process_array_file(model_file, df):
if "operator" not in df.columns:
df.loc[:, "operator"] = "m"
df["operator"] = "m"
# find all mults that need to be applied to this array
df_mf = df.loc[df.model_file == model_file, :]
results = []
org_file = df_mf.org_file.unique()
if org_file.shape[0] != 1:
raise Exception("wrong number of org_files for {0}".format(model_file))
org_arr = np.loadtxt(org_file[0], ndmin=2)
if "head_rows" not in df.columns:
skip = 0
else:
skip = df_mf.head_rows.values[0]
with open(org_file[0], 'r') as fp:
header = [fp.readline() for _ in range(skip)]
org_arr = np.loadtxt(org_file[0], ndmin=2, skiprows=skip)


if "mlt_file" in df_mf.columns:
for mlt, operator in zip(df_mf.mlt_file, df_mf.operator):
Expand Down Expand Up @@ -1881,7 +1888,9 @@ def _process_array_file(model_file, df):
sep = df_mf.sep.fillna(' ').iloc[0]
except AttributeError:
sep = ' '
np.savetxt(model_file, np.atleast_2d(org_arr), fmt=fmt, delimiter=sep)
with open(model_file, 'w') as fp:
fp.writelines(header)
np.savetxt(fp, np.atleast_2d(org_arr), fmt=fmt, delimiter=sep)


def apply_array_pars(arr_par="arr_pars.csv", arr_par_file=None, chunk_len=50):
Expand Down
69 changes: 40 additions & 29 deletions pyemu/utils/pst_from.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def _load_array_get_fmt(fname, sep=None, fullfile=False, skip=0, logger=None):
if sep is None: # need to split line with space and count multiple
splitsep = ' '
with open(fname, 'r') as fp: # load file or line
_ = [fp.readline() for _ in range(skip)]
header = [fp.readline() for _ in range(skip)]
if fullfile:
lines = [line for line in fp.readlines()]
arr = np.genfromtxt(lines, delimiter=sep, ndmin=2)
Expand Down Expand Up @@ -170,7 +170,7 @@ def _load_array_get_fmt(fname, sep=None, fullfile=False, skip=0, logger=None):
else:
typ = "F"
fmt = f"%{width}.{prec}{typ}"
return arr, fmt
return arr, fmt, header


class PstFrom(object):
Expand Down Expand Up @@ -1155,8 +1155,8 @@ def _par_prep(
if not dest_filepath.exists():
self.logger.lraise(f"par filename '{dest_filepath}' not found ")
# read array type input file
arr, infmt = _load_array_get_fmt(dest_filepath, sep=sep, skip=skip,
logger=self.logger)
arr, infmt, storehead = _load_array_get_fmt(dest_filepath, sep=sep, skip=skip,
logger=self.logger)
# arr = np.loadtxt(dest_filepath, delimiter=sep, ndmin=2)
self.logger.log(f"loading array {dest_filepath}")
self.logger.statement(
Expand All @@ -1165,7 +1165,9 @@ def _par_prep(
# save copy of input file to `org` dir
# make any subfolders if they don't exist
# this will be python auto precision
np.savetxt(self.original_file_d / rel_filepath.name, arr)
with open(self.original_file_d / rel_filepath.name, 'w') as fp:
fp.writelines(storehead)
np.savetxt(fp, arr)
file_dict[rel_filepath] = arr
fmt_dict[rel_filepath] = infmt
sep_dict[rel_filepath] = sep
Expand Down Expand Up @@ -2363,13 +2365,14 @@ def add_parameters(
tpl_filename=tpl_filename,
suffix="",
par_type=par_type,
data_array=file_dict[filenames[0]],
zone_array=zone_array,
shape=shp,
get_xy=self.get_xy,
fill_value=initial_value if initial_value is not None else 1.0,
gpname=pargp,
input_filename=in_fileabs,
par_style=par_style,
headerlines=headerlines
)
self.logger.log(
"writing template file"
Expand Down Expand Up @@ -3956,17 +3959,18 @@ def _get_tpl_or_ins_df(


def write_array_tpl(
name,
tpl_filename,
suffix,
par_type,
zone_array=None,
gpname=None,
shape=None,
fill_value=1.0,
get_xy=None,
input_filename=None,
par_style="m",
name,
tpl_filename,
suffix,
par_type,
data_array=None, # todo reintroduce shape tuple flexibility
zone_array=None,
gpname=None,
fill_value=1.0,
get_xy=None,
input_filename=None,
par_style="m",
headerlines=None
):
"""
write a template file for a 2D array.
Expand All @@ -3976,10 +3980,10 @@ def write_array_tpl(
tpl_filename (`str`): the template file to write - include path
suffix (`str`): suffix to append to par names
par_type (`str`): type of parameter
data_array (`numpy.ndarray`): original data array
zone_array (`numpy.ndarray`): an array used to skip inactive cells. Values less than 1 are
not parameterized and are assigned a value of fill_value. Default is None.
gpname (`str`): pargp filed in dataframe
shape (`tuple`): dimensions of array to write
fill_value:
get_xy:
input_filename:
Expand All @@ -3992,34 +3996,39 @@ def write_array_tpl(
This function is called by `PstFrom` programmatically
"""

if shape is None and zone_array is None:
if headerlines is None:
headerlines = []
if data_array is None and zone_array is None:
raise Exception(
"write_array_tpl() error: must pass either zone_array " "or shape"
"write_array_tpl() error: must pass either zone_array " "or data_array"
)
elif shape is not None and zone_array is not None:
if shape != zone_array.shape:
raise Exception(
"write_array_tpl() error: passed "
"shape {0} != zone_array.shape {1}".format(shape, zone_array.shape)
)
elif shape is None:
elif data_array is not None:
shape = data_array.shape
if zone_array is not None:
if data_array.shape != zone_array.shape:
raise Exception(
"write_array_tpl() error: passed "
"shape {0} != zone_array.shape {1}".format(data_array.shape, zone_array.shape)
)
else:
shape = zone_array.shape

if len(shape) != 2:
raise Exception(
"write_array_tpl() error: shape '{0}' not 2D" "".format(str(shape))
)

par_style = par_style.lower()
if par_style == "d":
assert data_array is not None
if not os.path.exists(input_filename):
raise Exception(
"write_array_tpl() error: couldn't find input file "
+ " {0}, which is required for 'direct' par_style".format(
input_filename
)
)
org_arr = np.loadtxt(input_filename, ndmin=2)
org_arr = data_array
if par_type == "grid":
pass
elif par_type == "constant":
Expand Down Expand Up @@ -4089,6 +4098,8 @@ def grid_namer(i, j):
xx, yy, ii, jj = [], [], [], []
with open(tpl_filename, "w") as f:
f.write("ptf ~\n")
if par_style == 'd':
f.writelines(headerlines)
for i in range(shape[0]):
for j in range(shape[1]):
if zone_array is not None and zone_array[i, j] < 1:
Expand Down

0 comments on commit 34547de

Please sign in to comment.