diff --git a/neural_lam/weather_dataset.py b/neural_lam/weather_dataset.py index e7122e9d..da1c7a30 100644 --- a/neural_lam/weather_dataset.py +++ b/neural_lam/weather_dataset.py @@ -76,7 +76,11 @@ def __init__( def __len__(self): if self.datastore.is_forecast: # for now we simply create a single sample for each analysis time - # and then the next ar_steps forecast times + # and then take the first (2 + ar_steps) forecast times. In + # addition we only use the first ensemble member (if ensemble data + # has been provided). + # This means that for each analysis time we get a single sample + if self.datastore.is_ensemble: warnings.warn( "only using first ensemble member, so dataset size is " @@ -84,9 +88,18 @@ def __len__(self): f"({self.da_state.ensemble_member.size})", UserWarning, ) - # XXX: we should maybe check that the 2+ar_steps actually fits in - # the elapsed_forecast_duration dimension, should that be checked - # here? + + # check that there are enough forecast steps available to create + # samples given the number of autoregressive steps requested + n_forecast_steps = self.da_state.elapsed_forecast_duration.size + if n_forecast_steps < 2 + self.ar_steps: + raise ValueError( + "The number of forecast steps available " + f"({n_forecast_steps}) is less than the required " + f"2+ar_steps (2+{self.ar_steps}={2 + self.ar_steps}) for " + "creating a sample with initial and target states." + ) + return self.da_state.analysis_time.size else: # sample_len = 2 + ar_steps