Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add a Python parser for diag.out files and fix header formatting #2666

Merged
merged 2 commits into from
Dec 3, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 26 additions & 35 deletions Source/driver/sum_integrated_quantities.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -359,7 +359,7 @@ Castro::sum_integrated_quantities ()
header << std::endl;

data_log1 << std::setw(intwidth) << "# COLUMN 1";
data_log1 << std::setw(fixwidth) << " 2";
data_log1 << std::setw(fixwidth) << 2;

for (int icol = 3; icol <= n; ++icol) {
data_log1 << std::setw(datwidth) << icol;
Expand Down Expand Up @@ -482,28 +482,29 @@ Castro::sum_integrated_quantities ()

if (time == 0.0) {

log << std::setw(intwidth) << "# COLUMN 1";
log << std::setw(fixwidth) << " 2";
log << std::setw(fixwidth) << " 3";
log << std::setw(fixwidth) << " 4";
log << std::setw(fixwidth) << " 5";
log << std::setw(fixwidth) << " 6";
log << std::setw(fixwidth) << " 7";
int n = 0;

std::ostringstream header;

header << std::setw(intwidth) << "# TIMESTEP";
header << std::setw(fixwidth) << " TIME";
header << std::setw(intwidth) << "# TIMESTEP"; ++n;
header << std::setw(fixwidth) << " TIME"; ++n;

header << std::setw(datwidth) << " h_+ (x)";
header << std::setw(datwidth) << " h_x (x)";
header << std::setw(datwidth) << " h_+ (y)";
header << std::setw(datwidth) << " h_x (y)";
header << std::setw(datwidth) << " h_+ (z)";
header << std::setw(datwidth) << " h_x (z)";
header << std::setw(datwidth) << " h_+ (x)"; ++n;
header << std::setw(datwidth) << " h_x (x)"; ++n;
header << std::setw(datwidth) << " h_+ (y)"; ++n;
header << std::setw(datwidth) << " h_x (y)"; ++n;
header << std::setw(datwidth) << " h_+ (z)"; ++n;
header << std::setw(datwidth) << " h_x (z)"; ++n;

header << std::endl;

log << std::setw(intwidth) << "# COLUMN 1";
log << std::setw(fixwidth) << 2;

for (int i = 3; i <= n; ++i) {
log << std::setw(datwidth) << i;
}

log << std::endl;

log << header.str();
Expand Down Expand Up @@ -589,27 +590,17 @@ Castro::sum_integrated_quantities ()

std::ostringstream header;

header << std::setw(intwidth) << "# TIMESTEP"; ++n;
header << std::setw(fixwidth) << " TIME"; ++n;

// We need to be careful here since the species names have differing numbers of characters
header << std::setw(intwidth) << "# TIMESTEP"; ++n;
header << std::setw(fixwidth) << " TIME"; ++n;

for (int i = 0; i < NumSpec; i++) {
std::string outString{};
std::string massString{"Mass "};
std::string specString{species_names[i]};
while (static_cast<int>(outString.length() + specString.length() + massString.length()) < datwidth) {
outString += " ";
}
outString += massString;
outString += specString;
header << std::setw(datwidth) << outString; ++n;
header << std::setw(datwidth) << ("Mass " + species_names[i]); ++n;
}

header << std::endl;

log << std::setw(intwidth) << "# COLUMN 1";
log << std::setw(fixwidth) << " 2";
log << std::setw(fixwidth) << 2;

for (int i = 3; i <= n; ++i) {
log << std::setw(datwidth) << i;
Expand Down Expand Up @@ -694,19 +685,19 @@ Castro::sum_integrated_quantities ()
header << std::setw(fixwidth) << " DT"; ++n;
header << std::setw(intwidth) << " FINEST LEV"; ++n;
header << std::setw(fixwidth) << " MAX NUMBER OF SUBCYCLES"; ++n;
header << std::setw(fixwidth) << " COARSE TIMESTEP WALLTIME"; ++n;
header << std::setw(datwidth) << " COARSE TIMESTEP WALLTIME"; ++n;
#ifdef AMREX_USE_GPU
header << std::setw(fixwidth) << " MAXIMUM GPU MEMORY USED"; ++n;
header << std::setw(fixwidth) << " MINIMUM GPU MEMORY FREE"; ++n;
header << std::setw(datwidth) << " MAXIMUM GPU MEMORY USED"; ++n;
header << std::setw(datwidth) << " MINIMUM GPU MEMORY FREE"; ++n;
#endif

header << std::endl;

log << std::setw(intwidth) << "# COLUMN 1";
log << std::setw(fixwidth) << " 2";
log << std::setw(fixwidth) << 2;

for (int i = 3; i < 4; ++i) {
log << std::setw(datwidth) << i;
log << std::setw(fixwidth) << i;
}

log << std::setw(intwidth) << 4; // Handle the finest lev column
Expand Down
99 changes: 99 additions & 0 deletions Util/scripts/diag_parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
"""Helper functions for working with Castro diagnostic files (*_diag.out)

To use these in a standalone script, you can do one of the following:

* append $CASTRO_HOME/Util/scripts to sys.path at the top of your script:
sys.path.append("<path to Castro>/Util/scripts")

* add a symlink to this file in the same directory as your script:
$ ln -s "$CASTRO_HOME/Util/scripts/diag_parser.py" .

* copy this file into the same directory as your script

Then you can do `from diag_parser import deduplicate, read_diag_file`.
"""

from pathlib import Path

import numpy as np

""" Format notes
files are opened in Castro.cpp, data is written in sum_integrated_quantities.cpp

data_logs[0]: grid_diag.out
intwidth, fixwidth, datwidth*

data_logs[1]: gravity_diag.out
- this was previously missing the last column number (8), which we handle for
backwards compatibility
intwidth, fixwidth, datwidth, datwidth, datwidth, datwidth, datwidth, datwidth

data_logs[2]: species_diag.out
intwidth, fixwidth, datwidth*

data_logs[3]: amr_diag.out
- if compiled with GPU support, this will have two additional integer fields at
the end with size `datwidth` for the GPU memory usage
- column 5 (max number of subcycles) is an integer
intwidth, fixwidth, fixwidth, intwidth, fixwidth, datwidth
"""

datwidth = 25 # Floating point data in scientific notation
fixwidth = 25 # Floating point data not in scientific notation
intwidth = 12 # Integer data

# Any additional columns after these are assumed to be floating point values in
# scientific notation (amr_diag.out gets special handling)
FIELD_WIDTHS = {
"grid_diag.out": [intwidth, fixwidth],
"gravity_diag.out": [intwidth, fixwidth] + [datwidth] * 6,
"species_diag.out": [intwidth, fixwidth],
"amr_diag.out": [intwidth, fixwidth, fixwidth, intwidth, fixwidth, datwidth],
}


def read_diag_file(file_path):
"""Reads a Castro diagnostic file into a numpy structured array.

Currently only supports the default files that Castro generates.
"""
if not isinstance(file_path, Path):
file_path = Path(file_path)
filetype = file_path.name
if filetype not in FIELD_WIDTHS:
raise ValueError("Unsupported file name")
widths = FIELD_WIDTHS[filetype]
with open(file_path, "r") as f:
# try getting the number of columns from the first line
first_line = f.readline().rstrip("\n")
if filetype == "gravity_diag.out":
# gravity_diag.out is missing the last column number, but it
# fortunately has a fixed number of columns
num_columns = 8
else:
num_columns = int(first_line.split()[-1])
# pad out the widths list on the right if necessary
widths.extend([datwidth] * (num_columns - len(widths)))
# infer datatypes from the widths
dtypes = [int if w == intwidth else float for w in widths]
# amr_diag.out has several integer columns with long names
if filetype == "amr_diag.out":
dtypes[4] = int # max number of subcycles
if num_columns >= 8:
dtypes[6] = int # maximum gpu memory used
dtypes[7] = int # minimum gpu memory free
# already read the first header line, so we don't need to skip any rows
data = np.genfromtxt(
f, delimiter=widths, comments="#", dtype=dtypes, names=True
)
return data


def deduplicate(data):
"""Deduplicate based on the timestep, keeping the only last occurrence."""
# get the unique indices into the reversed timestep array, so we find the
# final occurrence of each timestep
_, rev_indices = np.unique(data["TIMESTEP"][::-1], return_index=True)
# np.unique() sorts by value, so we don't need to un-reverse rev_indices
unique_indices = data.shape[0] - rev_indices - 1
return data[unique_indices]