Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Multiple Timeseries #209

Merged
merged 6 commits into from
Oct 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/yaml_coding_norms.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ jobs:
name: Check YAML Coding Norms
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v4
- id: yaml-lint
name: yaml-lint
uses: ibiqlik/action-yamllint@v3
Expand All @@ -27,7 +27,7 @@ jobs:
format: colored
config_file: .yamllint.yml

- uses: actions/upload-artifact@v2
- uses: actions/upload-artifact@v4
if: always()
with:
name: yamllint-logfile
Expand Down
2 changes: 1 addition & 1 deletion requirements-github.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ xarray>=2022.6.0
seaborn>=0.12.2
hvplot>=0.8.2
nbconvert>=6.5.4
bokeh<3.5.0,>=3.4.0
bokeh<3.6.0,>=3.5.0
geopandas>=0.13.2
geoviews>=1.10.0
nbsite
Expand Down
22 changes: 11 additions & 11 deletions src/eva/data/data_collections.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,9 +65,9 @@ def create_or_add_to_collection(self, collection_name, collection, concat_dimens
"""

# If time_series collection name must also be time_series
if self.time_series and collection_name != 'time_series':
self.logger.abort('In create_or_add_to_collection: time_series collection must ' +
'be \'time_series\'')
if self.time_series and 'time_series' not in collection_name:
self.logger.abort('In get_variable_data: time_series collection must ' +
'have name containing \'time_series\'')

# Collections should only be xarray datasets
if not isinstance(collection, Dataset):
Expand Down Expand Up @@ -158,9 +158,9 @@ def add_variable_to_collection(self, collection_name, group_name, variable_name,
"""

# If time_series collection name must also be time_series
if self.time_series and collection_name != 'time_series':
self.logger.abort('In add_variable_to_collection: time_series collection must ' +
'be \'time_series\'')
if self.time_series and 'time_series' not in collection_name:
self.logger.abort('In get_variable_data: time_series collection must ' +
'have name containing \'time_series\'')

# Assert that new variable is an xarray Dataarray
if not isinstance(variable, DataArray):
Expand Down Expand Up @@ -211,9 +211,9 @@ def get_variable_data_array(self, collection_name, group_name, variable_name,
"""

# If time_series collection name must also be time_series
if self.time_series and collection_name != 'time_series':
self.logger.abort('In get_variable_data_array: time_series collection must ' +
'be \'time_series\'')
if self.time_series and 'time_series' not in collection_name:
self.logger.abort('In get_variable_data: time_series collection must ' +
'have name containing \'time_series\'')

group_variable_name = group_name + '::' + variable_name
data_array = self._collections[collection_name][group_variable_name]
Expand Down Expand Up @@ -293,9 +293,9 @@ def get_variable_data(self, collection_name, group_name, variable_name,
"""

# If time_series collection name must also be time_series
if self.time_series and collection_name != 'time_series':
if self.time_series and 'time_series' not in collection_name:
self.logger.abort('In get_variable_data: time_series collection must ' +
'be \'time_series\'')
'have name containing \'time_series\'')

variable_array = self.get_variable_data_array(collection_name, group_name, variable_name,
channels, levels, datatypes)
Expand Down
173 changes: 96 additions & 77 deletions src/eva/eva_driver.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from datetime import datetime
import argparse
import os
from collections import defaultdict

from eva.utilities.config import get
from eva.utilities.logger import Logger
Expand Down Expand Up @@ -94,85 +95,103 @@ def read_transform_time_series(logger, timing, eva_dict, data_collections):
None
"""

# Check for required keys
# -----------------------
required_keys = [
'begin_date',
'final_date',
'interval',
'collection',
'variables',
]
for key in required_keys:
logger.assert_abort(key in eva_dict['time_series'], 'If running Eva in time series ' +
f'mode the time series config must contain "{key}"')

# Write message that this is a time series run
logger.info('This instance of Eva is being used to accumulate a time series.')

# Optionally suppress the display of the collection
suppress_collection_display = get(eva_dict, logger, 'suppress_collection_display', False)

# Get the datasets configuration
time_series_config = eva_dict['time_series']

# Extract the dates of the time series
begin_date = time_series_config['begin_date']
final_date = time_series_config['final_date']
interval = time_series_config['interval']

# Convert begin and end dates from ISO strings to datetime objects
begin_date = datetime.fromisoformat(begin_date)
final_date = datetime.fromisoformat(final_date)

# Convert interval ISO string to timedelta object
interval = iso_duration_to_timedelta(logger, interval)

# Make list of dates from begin to end with interval
dates = []
date = begin_date
count = 0
while date <= final_date:
dates.append(date)
date += interval
count += 1
# Abort if count hits one million
logger.assert_abort(count < 1e6, 'You are planning to read more than one million ' +
'time steps. This is likely an error. Please check your configuration.')

# Get the datasets configuration
datasets_config = get(eva_dict, logger, 'datasets')

# Assert that datasets_config is the same length as dates
logger.assert_abort(len(datasets_config) == len(dates), 'When running in time series mode ' +
'the number of datasets must be the same as the number of dates.')

# Loop over datasets reading each one in turn, internally appending the data_collections
for ind, dataset_config in enumerate(datasets_config):

# Create a temporary collection for this time step
data_collections_tmp = DataCollections()

# Prepare diagnostic data
logger.info('Running data driver')
timing.start('DataDriverExecute')
data_driver(dataset_config, data_collections_tmp, timing, logger)
timing.stop('DataDriverExecute')

# Perform any transforms on the fly
# Iterate through list of time series dictionaries
for time_series_config in eva_dict['time_series']:

# Check for required keys
# -----------------------
required_keys = [
'begin_date',
'final_date',
'interval',
'collection',
'variables',
]
for key in required_keys:
logger.assert_abort(key in time_series_config, 'If running Eva in time series ' +
f'mode the time series config must contain "{key}"')

# Write message that this is a time series run
logger.info('This instance of Eva is being used to accumulate a time series.')

# Optionally suppress the display of the collection
suppress_collection_display = get(eva_dict, logger, 'suppress_collection_display', False)

# Extract the dates of the time series
begin_date = time_series_config['begin_date']
final_date = time_series_config['final_date']
interval = time_series_config['interval']

# Convert begin and end dates from ISO strings to datetime objects
begin_date = datetime.fromisoformat(begin_date)
final_date = datetime.fromisoformat(final_date)

# Convert interval ISO string to timedelta object
interval = iso_duration_to_timedelta(logger, interval)

# Make list of dates from begin to end with interval
dates = []
date = begin_date
count = 0
while date <= final_date:
dates.append(date)
date += interval
count += 1
# Abort if count hits one million
logger.assert_abort(count < 1e6, 'You are planning to read more than one million ' +
'time steps. This is likely an error. Please check your ' +
'configuration.')

# Get all datasets configuration
all_datasets = get(eva_dict, logger, 'datasets')

# Find all dataset_configs with collection name
datasets_config = []
for dataset in all_datasets:
if dataset['name'] == time_series_config["collection"]:
datasets_config.append(dataset)

# Save transforms to transform_dict based on collection
transform_dict = defaultdict(list)
if 'transforms' in eva_dict:
logger.info(f'Running transform driver')
timing.start('TransformDriverExecute')
transform_driver(eva_dict, data_collections_tmp, timing, logger)
timing.stop('TransformDriverExecute')
for transform in get(eva_dict, logger, 'transforms'):
# Get collection name
name = transform['new name'].split('::')[0]
if name == time_series_config['collection']:
transform_dict['transforms'].append(transform)

# Assert that datasets_config is the same length as dates
logger.assert_abort(len(datasets_config) == len(dates), 'When running in time ' +
'series mode the number of datasets must be the same as the ' +
'number of dates.')

# Loop over datasets reading each one in turn, internally appending the data_collections
for ind, dataset_config in enumerate(datasets_config):

# Create a temporary collection for this time step
data_collections_tmp = DataCollections()

# Prepare diagnostic data
logger.info('Running data driver')
timing.start('DataDriverExecute')
data_driver(dataset_config, data_collections_tmp, timing, logger)
timing.stop('DataDriverExecute')

# Perform any transforms on the fly
if transform_dict:
logger.info(f'Running transform driver')
timing.start('TransformDriverExecute')
transform_driver(transform_dict, data_collections_tmp, timing, logger)
timing.stop('TransformDriverExecute')

# Collapse data into time series
date = dates[ind]
collapse_collection_to_time_series(logger, ind, date, time_series_config,
data_collections, data_collections_tmp)

# Collapse data into time series
collapse_collection_to_time_series(logger, ind, dates, time_series_config, data_collections,
data_collections_tmp)

if not suppress_collection_display:
logger.info('Computing of Eva time series complete: status of collection:')
data_collections.display_collections()
if not suppress_collection_display:
logger.info('Computing of Eva time series complete: status of collection:')
data_collections.display_collections()


# --------------------------------------------------------------------------------------------------
Expand Down
119 changes: 119 additions & 0 deletions src/eva/tests/config/testIodaObsSpaceAmsuaN19_Multiple_TimeSeries.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
suppress_collection_display: false
datasets:
- name: control
type: IodaObsSpace
filenames:
- ${data_input_path}/ctrl_amsua_n19.20230726T030000Z.nc4
channels: 3,8
groups:
- name: ObsValue
variables:
- brightnessTemperature
- name: GsiHofXBc
- name: hofx0
- name: MetaData
- name: oman
- name: control
type: IodaObsSpace
filenames:
- ${data_input_path}/ctrl_amsua_n19.20230726T090000Z.nc4
channels: 3,8
groups:
- name: ObsValue
variables:
- brightnessTemperature
- name: GsiHofXBc
- name: hofx0
- name: MetaData
- name: oman
- name: experiment
type: IodaObsSpace
filenames:
- ${data_input_path}/exp_amsua_n19.20230726T030000Z.nc4
channels: 3,8
groups:
- name: ObsValue
variables:
- brightnessTemperature
- name: GsiHofXBc
- name: hofx0
- name: MetaData
- name: oman
- name: experiment
type: IodaObsSpace
filenames:
- ${data_input_path}/exp_amsua_n19.20230726T090000Z.nc4
channels: 3,8
groups:
- name: ObsValue
variables:
- brightnessTemperature
- name: GsiHofXBc
- name: hofx0
- name: MetaData
- name: oman
transforms:
- transform: arithmetic
new name: control::ObsValueMinusHofx::${variable}
equals: control::ObsValue::${variable}-control::hofx0::${variable}
for:
variable: &id001
- brightnessTemperature
- transform: arithmetic
new name: experiment::ObsValueMinusHofx::${variable}
equals: experiment::ObsValue::${variable}-experiment::hofx0::${variable}
for:
variable: *id001
time_series:
- begin_date: '2023-07-26T03:00:00'
final_date: '2023-07-26T09:00:00'
interval: PT6H
collection: experiment
variables:
- ObsValueMinusHofx::brightnessTemperature
aggregation_methods:
- mean
dimension: Location
- begin_date: '2023-07-26T03:00:00'
final_date: '2023-07-26T09:00:00'
interval: PT6H
collection: control
variables:
- ObsValueMinusHofx::brightnessTemperature
aggregation_methods:
- mean
dimension: Location
graphics:
plotting_backend: Emcpy
figure_list:
- figure:
layout:
- 1
- 1
title: Mean OmB | AMSU-A NOAA-19 | Ch 3 | ObsValueMinusHofx::brightnessTemperature
output name: time_series/amsua_n19/brightnessTemperature_mean/3/time_series_compare_omb.png
plots:
- add_xlabel: Datetime
add_ylabel: JEDI h(x)
add_grid: null
add_legend:
loc: upper left
layers:
- type: LinePlot
x:
variable: control_time_series::MetaData::Dates
y:
variable: control_time_series::ObsValueMinusHofx::brightnessTemperature_mean
channel: 3
markersize: 5
color: black
label: Observation minus h(x) - ctrl
- type: LinePlot
x:
variable: experiment_time_series::MetaData::Dates
y:
variable: experiment_time_series::ObsValueMinusHofx::brightnessTemperature_mean
channel: 3
markersize: 5
color: blue
label: Observation minus h(x) - exp
Loading
Loading