From 4cfc04d9fcdbe26ff9b9990d348c3c337f791f9b Mon Sep 17 00:00:00 2001 From: Ed Safford <62339196+EdwardSafford-NOAA@users.noreply.github.com> Date: Mon, 27 Nov 2023 15:56:42 -0500 Subject: [PATCH] Add csv_space.py (#164) Add data/csv_space.py to load csv data from csv files. Most immediately this is to be used with the MinMon summary plots, though I've tried to make it as generic as possible to support other uses. --- src/eva/data/csv_space.py | 162 ++++++++++++++++++++++++++++++ src/eva/tests/config/testCsv.yaml | 110 ++++++++++++++++++++ src/eva/tests/data/csv_space.txt | 121 ++++++++++++++++++++++ 3 files changed, 393 insertions(+) create mode 100644 src/eva/data/csv_space.py create mode 100644 src/eva/tests/config/testCsv.yaml create mode 100644 src/eva/tests/data/csv_space.txt diff --git a/src/eva/data/csv_space.py b/src/eva/data/csv_space.py new file mode 100644 index 00000000..a853c182 --- /dev/null +++ b/src/eva/data/csv_space.py @@ -0,0 +1,162 @@ +from eva.data.eva_dataset_base import EvaDatasetBase +from eva.utilities.config import get +from datetime import datetime +import xarray as xr +import numpy as np +import csv + + +class CsvSpace(EvaDatasetBase): + + """ + A class for handling CSV dataset configuration and processing. + """ + + def execute(self, dataset_config, data_collections, timing): + + """ + Executes the processing of CSV dataset. + + Args: + dataset_config (dict): Configuration dictionary for the dataset. + data_collections (DataCollections): Object for managing data collections. + timing (Timing): Timing object for tracking execution time. + """ + + # Set the collection name + # ----------------------- + collection_name = get(dataset_config, self.logger, 'name') + + # Filename to be read into this collection + filenames = get(dataset_config, self.logger, 'filenames') + + # get 'groups' + groups = get(dataset_config, self.logger, 'groups') + + # Set the collection name + collection_name = dataset_config['name'] + + ds_list = [] + + # Read in the CSV files + for file in filenames: + + f = open(file, "r") + fd = list(csv.reader(f, delimiter=",")) + file_data = [[element.strip() for element in row] for row in fd] + f.close() + + for group in groups: + group_name = get(group, self.logger, 'name') + header_info = get(group, self.logger, 'header', None, False) + group_vars = get(group, self.logger, 'variables', None, False) + + selected_fields = get(group, self.logger, 'selected_fields', 'all') + date_config = get(group, self.logger, 'date', None, False) + + coord_config = get(group, self.logger, 'coordinate', None, False) + coord = None if not coord_config else coord_config.get('name') + + # skip header if present + if header_info is not None: + nrows = int(header_info.get('rows')) + file_data.pop(nrows-1) + + # load datetime if available + if date_config is not None: + coord = 'Cycle' + var_name = group_name + "::datetime" + dt_arr = self.get_datetime_array(file_data, date_config) + ds = xr.Dataset({var_name: (coord, dt_arr), + coord: range(0, len(dt_arr))}) + + ds_list.append(ds) + + # load requested data + if group_vars is not None: + for key, var in group_vars.items(): + if coord is None: + coord = 'Unit' + var_name = group_name + "::" + key + st_var_arr = [row[var] for row in file_data] + var_arr = np.array(st_var_arr, dtype=np.float32) + + ds = xr.Dataset({var_name: (coord, var_arr), + coord: range(0, len(var_arr))}) + ds_list.append(ds) + + # Concatenate datasets from ds_list into a single dataset + ds = xr.merge(ds_list) + + # Assert that the collection contains at least one variable + if not ds.keys(): + self.logger.abort('Collection \'' + dataset_config['name'] + '\', group \'' + + group + '\' in file ' + filename + + ' does not have any variables.') + + # add the dataset_config to the collections + data_collections.create_or_add_to_collection(collection_name, ds) + ds.close() + + # Display the contents of the collections for helping the user with making plots + data_collections.display_collections() + + # ---------------------------------------------------------------------------------------------- + + def generate_default_config(self, filenames, collection_name): + + """ + Generates a default configuration for CSV dataset. + + Args: + filenames (list): List of file names. + collection_name (str): Name of the data collection. + + Returns: + dict: Default configuration dictionary. + """ + + eva_dict = {'channels': [], + 'regions': [], + 'levels': [], + 'groups': [], + 'name': collection_name} + return eva_dict + + # ---------------------------------------------------------------------------------------------- + + def get_datetime_array(self, file_data, date_config): + + """ + Load date components and return a datetime array. Date/time information + may be in a single field of file_data or in 4 fields (y,m,d,h). + + Args: + file_data (list): List of data + date_config (dict): date configuration information + + Returns: + np.array: datetime array + """ + + datetime_key = 'datetime' + date_keys = {'year', 'month', 'day', 'hour'} + + if datetime_key in date_config: + date_str_list = [row[date_config.get('datetime')] for row in file_data] + + elif all(k in (date_keys) for k in date_config): + yr = [row[date_config.get('year')] for row in file_data] + mon = [row[date_config.get('month')] for row in file_data] + day = [row[date_config.get('day')] for row in file_data] + hr = [row[date_config.get('hour')] for row in file_data] + date_str_list = [y + m + d + h for y, m, d, h in zip(yr, mon, day, hr)] + + else: + self.logger.abort("The date configuration in yaml file does not contain required " + + "date information. A \'date\': entry must specify either " + + "\'datetime\': (int) file field position or entries specifying " + + " \'year\': int, \'month\': int, \'day\': int, and \'hour\': int. " + + f" Date information found was {date_config}") + + return np.array([np.datetime64(datetime.strptime(ds, '%Y%m%d%H')) for ds in date_str_list]) diff --git a/src/eva/tests/config/testCsv.yaml b/src/eva/tests/config/testCsv.yaml new file mode 100644 index 00000000..5212b9f1 --- /dev/null +++ b/src/eva/tests/config/testCsv.yaml @@ -0,0 +1,110 @@ +# +# MinMon summary plots for gfs +# +# Generate three, 120 cycle (30 day) time series plots: +# - initial gradients +# - final gnorm values +# - min/max gnorm values for final 10 iterations + + +# Data read +# --------- +datasets: + - name: gnorm + type: CsvSpace + filenames: + - ${data_input_path}/csv_space.txt + groups: + - name: GsiIeee + variables: + igrad: 4 + final_gnorm: 5 + min_gnorm: 6 + max_gnorm: 7 + date: + year: 0 + month: 1 + day: 2 + hour: 3 + +graphics: + + plotting_backend: Emcpy + figure_list: + + # Gnorm single cycle & 7 day hourly mean + # -------------------------------------- + - figure: + layout: [3,1] + figure size: [20,18] + tight layout: + title: "Valid: 2023062006" + output name: line_plots/minimization/gfs_gdas.summary.gnorms.png + plot logo: + which: 'noaa/nws' + loc: 'upper left' + subplot_orientation: 'first' + + plots: + - add_xlabel: 'Cycle Time' + add_ylabel: 'Initial Gradient' + add_grid: + axis: 'both' + linestyle: 'dotted' + linewidth: 0.5 + color: 'black' + add_legend: + loc: 'upper right' + layers: + - type: LinePlot + x: + variable: gnorm::GsiIeee::datetime + y: + variable: gnorm::GsiIeee::igrad + color: 'blue' + label: 'Initial Gradient' + + - add_xlabel: 'Cycle Time' + add_ylabel: 'Final Gnorm' + set_yscale: 'log' + add_grid: + axis: 'both' + linestyle: 'dotted' + linewidth: 0.5 + color: 'black' + add_legend: + loc: 'upper right' + layers: + - type: LinePlot + x: + variable: gnorm::GsiIeee::datetime + y: + variable: gnorm::GsiIeee::final_gnorm + color: 'blue' + label: 'Final Gnorm' + + - add_xlabel: 'Cycle Time' + add_ylabel: 'Min/Max Gnorm, final 10 iterations' + set_yscale: 'log' + add_grid: + axis: 'both' + linestyle: 'dotted' + linewidth: 0.5 + color: 'black' + add_legend: + loc: 'upper right' + layers: + - type: LinePlot + x: + variable: gnorm::GsiIeee::datetime + y: + variable: gnorm::GsiIeee::min_gnorm + color: 'blue' + label: 'Minimum Gnorm' + - type: LinePlot + x: + variable: gnorm::GsiIeee::datetime + y: + variable: gnorm::GsiIeee::max_gnorm + color: 'red' + label: 'Maximum Gnorm' diff --git a/src/eva/tests/data/csv_space.txt b/src/eva/tests/data/csv_space.txt new file mode 100644 index 00000000..6f52c637 --- /dev/null +++ b/src/eva/tests/data/csv_space.txt @@ -0,0 +1,121 @@ + 2023,10,12,00,4.894399e+03,4.233295e-06,4.157784e-06,2.527239e-06,6.636099e-06 + 2023,10,12,06,4.023608e+03,1.595213e-06,2.024559e-06,9.756433e-07,3.485845e-06 + 2023,10,12,12,4.587318e+03,4.376192e-07,1.053710e-06,4.376192e-07,1.980284e-06 + 2023,10,12,18,3.759925e+03,6.325531e-06,3.782423e-06,2.003764e-06,7.383456e-06 + 2023,10,13,00,4.807758e+03,7.411554e-07,7.764517e-07,5.335968e-07,1.110592e-06 + 2023,10,13,06,3.994376e+03,1.281396e-07,2.524666e-07,1.281396e-07,4.380333e-07 + 2023,10,13,12,4.354049e+03,1.432959e-07,1.677295e-07,1.421184e-07,2.242394e-07 + 2023,10,13,18,3.886603e+03,9.616619e-08,1.476399e-07,9.616619e-08,2.135922e-07 + 2023,10,14,00,4.998903e+03,4.483393e-07,1.247429e-06,4.483393e-07,3.631838e-06 + 2023,10,14,06,4.150016e+03,1.359891e-06,2.485284e-06,1.225489e-06,4.234755e-06 + 2023,10,14,12,4.551380e+03,1.164685e-06,2.642705e-06,1.164685e-06,6.044827e-06 + 2023,10,14,18,4.439727e+03,6.554656e-08,1.200949e-07,6.554656e-08,1.874206e-07 + 2023,10,15,00,4.834534e+03,7.517315e-07,6.839872e-07,3.739223e-07,9.623215e-07 + 2023,10,15,06,4.311401e+03,1.656324e-06,5.468610e-06,1.656324e-06,1.870500e-05 + 2023,10,15,12,3.861327e+03,3.416310e-06,4.341716e-06,2.636429e-06,6.656894e-06 + 2023,10,15,18,4.411814e+03,9.274206e-07,2.773167e-06,9.274206e-07,8.967605e-06 + 2023,10,16,00,4.512454e+03,1.482961e-05,1.219268e-05,5.437765e-06,2.832911e-05 + 2023,10,16,06,4.170745e+03,1.379255e-06,2.116023e-06,9.853267e-07,3.151101e-06 + 2023,10,16,12,4.385015e+03,3.006092e-07,3.151679e-07,1.932619e-07,6.034793e-07 + 2023,10,16,18,4.186223e+03,1.726534e-07,2.164490e-07,1.314246e-07,3.751648e-07 + 2023,10,17,00,3.986592e+03,3.613061e-06,5.130025e-06,1.824303e-06,9.656198e-06 + 2023,10,17,06,3.781130e+03,1.029327e-07,2.161222e-07,1.029327e-07,3.914104e-07 + 2023,10,17,12,4.458915e+03,1.013451e-05,1.018016e-05,2.431550e-06,2.683090e-05 + 2023,10,17,18,3.866923e+03,2.013560e-07,2.681136e-07,1.533570e-07,5.121388e-07 + 2023,10,18,00,3.925362e+03,1.105134e-05,8.106728e-06,3.291588e-06,1.462514e-05 + 2023,10,18,06,3.702350e+03,1.158046e-06,1.901894e-06,1.158046e-06,4.617507e-06 + 2023,10,18,12,4.304557e+03,2.464535e-07,4.742450e-07,2.464535e-07,7.465408e-07 + 2023,10,18,18,3.439888e+03,6.525397e-07,7.366882e-07,4.942131e-07,1.502528e-06 + 2023,10,19,00,3.747911e+03,7.891661e-08,1.197340e-07,7.891661e-08,1.788678e-07 + 2023,10,19,06,3.681204e+03,7.785630e-07,2.752787e-06,7.785630e-07,6.537970e-06 + 2023,10,19,12,4.302109e+03,2.140281e-06,2.190056e-06,1.337329e-06,3.398679e-06 + 2023,10,19,18,3.740212e+03,1.798385e-06,5.895170e-06,1.798385e-06,1.457979e-05 + 2023,10,20,00,3.669374e+03,7.896003e-07,1.735931e-06,7.896003e-07,3.007289e-06 + 2023,10,20,06,4.165050e+03,1.029685e-07,1.910961e-07,1.029685e-07,3.863140e-07 + 2023,10,20,12,4.197527e+03,4.117225e-06,1.419651e-05,4.117225e-06,3.021296e-05 + 2023,10,20,18,3.756638e+03,2.629942e-05,4.331542e-05,1.731776e-05,8.298646e-05 + 2023,10,21,00,3.705694e+03,5.757426e-06,1.386155e-05,5.757426e-06,3.375409e-05 + 2023,10,21,06,4.299770e+03,9.859885e-07,2.735550e-06,9.859885e-07,6.309701e-06 + 2023,10,21,12,3.922871e+03,2.523554e-07,4.726534e-07,2.523554e-07,1.073013e-06 + 2023,10,21,18,3.568892e+03,5.160198e-07,9.384017e-07,5.020585e-07,1.964750e-06 + 2023,10,22,00,3.757538e+03,5.421490e-07,5.947036e-07,3.515274e-07,1.112250e-06 + 2023,10,22,06,3.960955e+03,1.584701e-07,3.048424e-07,1.584701e-07,4.854683e-07 + 2023,10,22,12,4.236783e+03,8.098347e-07,1.657607e-06,6.407775e-07,4.931053e-06 + 2023,10,22,18,3.641734e+03,5.469786e-07,8.142593e-07,5.030705e-07,1.943070e-06 + 2023,10,23,00,3.845650e+03,5.336470e-06,5.918934e-06,3.358255e-06,1.059133e-05 + 2023,10,23,06,3.860824e+03,2.288709e-07,4.683739e-07,2.288709e-07,6.931459e-07 + 2023,10,23,12,6.033292e+03,1.995044e-05,4.074108e-05,1.447194e-05,1.048507e-04 + 2023,10,23,18,3.607429e+03,9.313987e-07,1.443698e-06,9.313987e-07,1.860455e-06 + 2023,10,24,00,4.359522e+03,6.175247e-07,6.793850e-07,5.444400e-07,8.250995e-07 + 2023,10,24,06,4.011668e+03,3.141413e-05,4.889750e-05,1.579539e-05,1.466346e-04 + 2023,10,24,12,4.217701e+03,7.183782e-07,1.574340e-06,7.183782e-07,3.506986e-06 + 2023,10,24,18,3.649975e+03,5.005049e-07,8.665958e-07,4.695667e-07,2.368658e-06 + 2023,10,25,00,4.387455e+03,1.520607e-07,3.539789e-07,1.520607e-07,9.081467e-07 + 2023,10,25,06,3.993071e+03,4.913066e-07,9.143959e-07,3.815433e-07,2.156376e-06 + 2023,10,25,12,4.142251e+03,1.150976e-05,1.092985e-05,4.590735e-06,1.961206e-05 + 2023,10,25,18,3.781343e+03,5.007572e-07,9.316928e-07,4.927323e-07,1.619994e-06 + 2023,10,26,00,4.299237e+03,2.901853e-06,3.394160e-06,1.360418e-06,6.572427e-06 + 2023,10,26,06,4.188449e+03,1.105491e-07,1.140972e-07,8.587739e-08,1.828696e-07 + 2023,10,26,12,4.128633e+03,2.304131e-06,4.847823e-06,2.304131e-06,9.336155e-06 + 2023,10,26,18,4.109869e+03,1.777035e-07,2.820936e-07,1.643252e-07,4.495320e-07 + 2023,10,27,00,4.086383e+03,1.142632e-05,2.256458e-05,8.743291e-06,5.633250e-05 + 2023,10,27,06,4.127988e+03,2.728146e-07,4.236991e-07,2.728146e-07,5.748265e-07 + 2023,10,27,12,3.898810e+03,1.667946e-07,3.292033e-07,1.667946e-07,5.000171e-07 + 2023,10,27,18,4.112868e+03,6.873891e-08,1.114629e-07,6.873891e-08,1.526627e-07 + 2023,10,28,00,4.373810e+03,1.749626e-07,2.804269e-07,1.749626e-07,4.531691e-07 + 2023,10,28,06,4.103993e+03,6.499782e-08,9.796438e-08,6.499782e-08,1.345742e-07 + 2023,10,28,12,4.198430e+03,9.041911e-08,1.334047e-07,9.041911e-08,1.810777e-07 + 2023,10,28,18,4.141742e+03,1.086710e-06,2.391335e-06,1.086710e-06,4.492986e-06 + 2023,10,29,00,4.919303e+03,1.375622e-07,1.817139e-07,1.158485e-07,3.139976e-07 + 2023,10,29,06,3.929326e+03,7.309165e-07,1.321968e-06,7.309165e-07,2.145131e-06 + 2023,10,29,12,4.415701e+03,1.348325e-05,3.722396e-05,1.348325e-05,1.274615e-04 + 2023,10,29,18,4.187227e+03,6.318270e-06,8.961316e-06,5.268552e-06,1.791225e-05 + 2023,10,30,00,4.637597e+03,4.098962e-06,1.035667e-05,4.098962e-06,1.427245e-05 + 2023,10,30,06,3.512628e+03,1.197586e-06,1.194033e-06,6.320990e-07,2.139734e-06 + 2023,10,30,12,4.450129e+03,3.277841e-06,6.277239e-06,3.269006e-06,1.227810e-05 + 2023,10,30,18,3.984785e+03,6.925378e-07,1.873816e-06,6.925378e-07,5.470688e-06 + 2023,10,31,00,4.642559e+03,8.665066e-07,1.429904e-06,7.208292e-07,3.727239e-06 + 2023,10,31,06,3.500513e+03,3.602451e-07,5.432714e-07,3.602451e-07,7.164891e-07 + 2023,10,31,12,4.338586e+03,7.705307e-05,4.248455e-05,2.223191e-05,7.705307e-05 + 2023,10,31,18,4.059734e+03,3.656467e-06,6.640640e-06,3.656467e-06,1.414518e-05 + 2023,11,01,00,4.566295e+03,7.405862e-06,1.340018e-05,7.337858e-06,3.905492e-05 + 2023,11,01,06,3.615823e+03,3.548041e-07,4.481598e-07,2.298334e-07,8.173221e-07 + 2023,11,01,12,4.673239e+03,4.532234e-07,6.380456e-07,4.342136e-07,9.719158e-07 + 2023,11,01,18,3.936104e+03,3.188326e-07,5.007364e-07,2.929082e-07,1.086602e-06 + 2023,11,02,00,4.453143e+03,9.961180e-08,1.419371e-07,9.961180e-08,1.898886e-07 + 2023,11,02,06,3.896473e+03,5.855159e-07,9.633835e-07,5.040025e-07,1.924336e-06 + 2023,11,02,12,4.727724e+03,1.426480e-07,2.220382e-07,1.426480e-07,2.897083e-07 + 2023,11,02,18,4.340278e+03,3.664511e-07,5.994720e-07,3.059239e-07,1.158600e-06 + 2023,11,03,00,4.375919e+03,3.756896e-06,4.074607e-06,2.137921e-06,8.375806e-06 + 2023,11,03,06,3.983594e+03,7.495471e-08,1.104044e-07,7.495471e-08,1.504620e-07 + 2023,11,03,12,4.582426e+03,2.672754e-06,5.281630e-06,2.672754e-06,8.361873e-06 + 2023,11,03,18,4.149566e+03,9.980210e-07,2.439937e-06,9.551378e-07,4.989904e-06 + 2023,11,04,00,4.825637e+03,4.537011e-06,3.790334e-06,2.224490e-06,6.762975e-06 + 2023,11,04,06,4.123875e+03,2.446826e-07,4.220858e-07,2.411497e-07,8.584981e-07 + 2023,11,04,12,4.849220e+03,1.659103e-05,1.308526e-05,4.293994e-06,3.015394e-05 + 2023,11,04,18,4.318360e+03,7.864584e-08,1.102521e-07,7.864584e-08,1.672115e-07 + 2023,11,05,00,5.131622e+03,1.795588e-06,4.475847e-06,1.795588e-06,9.969248e-06 + 2023,11,05,06,4.220459e+03,6.782641e-07,9.013981e-07,5.426709e-07,2.136158e-06 + 2023,11,05,12,4.937044e+03,1.373654e-06,2.679311e-06,1.373654e-06,4.339270e-06 + 2023,11,05,18,4.180756e+03,1.038402e-07,1.069615e-07,8.978271e-08,1.396452e-07 + 2023,11,06,00,5.012665e+03,7.537220e-07,1.037975e-06,5.026764e-07,1.918519e-06 + 2023,11,06,06,3.971010e+03,1.749852e-06,1.373613e-06,6.669512e-07,2.258544e-06 + 2023,11,06,12,4.674841e+03,9.238037e-08,1.594412e-07,9.238037e-08,2.143240e-07 + 2023,11,06,18,4.095972e+03,2.156211e-07,2.746697e-07,2.000031e-07,4.983165e-07 + 2023,11,07,00,4.697821e+03,8.968946e-08,1.788461e-07,8.968946e-08,2.964696e-07 + 2023,11,07,06,3.874461e+03,6.416993e-08,1.005481e-07,6.416993e-08,1.527012e-07 + 2023,11,07,12,4.528810e+03,7.433471e-08,1.149741e-07,7.433471e-08,1.535487e-07 + 2023,11,07,18,3.877052e+03,4.248042e-07,8.160000e-07,4.248042e-07,2.341471e-06 + 2023,11,08,00,4.836079e+03,5.914077e-08,9.016615e-08,5.914077e-08,1.310474e-07 + 2023,11,08,06,3.776411e+03,3.573270e-07,7.093658e-07,3.573270e-07,1.844329e-06 + 2023,11,08,12,4.526606e+03,6.338166e-08,9.338263e-08,6.338166e-08,1.463379e-07 + 2023,11,08,18,4.011873e+03,6.201867e-06,7.811689e-06,3.035285e-06,1.454809e-05 + 2023,11,09,00,4.884227e+03,2.343375e-06,2.809569e-06,1.059671e-06,7.830898e-06 + 2023,11,09,06,3.736895e+03,2.167837e-07,4.186776e-07,2.116924e-07,8.032723e-07 + 2023,11,09,12,4.534107e+03,1.754398e-07,3.346542e-07,1.754398e-07,4.611875e-07 + 2023,11,09,18,4.204355e+03,1.072311e-07,2.018517e-07,1.072311e-07,3.081991e-07 + 2023,11,10,00,5.011764e+03,1.257736e-07,2.373325e-07,1.257736e-07,4.447160e-07 + 2023,11,10,06,3.905730e+03,8.772388e-07,1.958966e-06,8.434032e-07,5.071218e-06 + 2023,11,10,12,4.851985e+03,8.896214e-08,1.163560e-07,8.530586e-08,1.637857e-07 + 2023,11,10,18,4.320364e+03,5.015246e-07,9.909834e-07,4.062608e-07,1.952985e-06 + 2023,11,11,00,5.060165e+03,2.278430e-06,2.356386e-06,8.997228e-07,5.231458e-06