MeteoSwiss · gugerlir · Jan 8, 2024 · Dec 20, 2023 · Jan 3, 2024 · Jan 4, 2024
diff --git a/rainforest/common/utils.py b/rainforest/common/utils.py
@@ -520,7 +520,7 @@ def read_task_file(task_file):
             tasks_dic[int(line[0])] = line[1:]
     return tasks_dic
 
-def read_df(pattern, dbsystem = 'dask', sqlContext = None):
+def read_df(pattern, dbsystem='dask', sqlContext=None):
     """
     Reads a set of data contained in a folder as a spark or dask DataFrame
 
@@ -532,7 +532,7 @@ def read_df(pattern, dbsystem = 'dask', sqlContext = None):
     dbsystem : str
         Either "dask" if you want a Dask DataFrame or "spark" if you want a 
         spark dataframe
-    sqlContext : sqlContext instance
+    sqlContext : sqlContext instance, new: SparkSession instant
         sqlContext to use, required only if dbystem = 'spark'
 
     Returns

diff --git a/rainforest/database/database.py b/rainforest/database/database.py
@@ -11,7 +11,7 @@
 
 from pyspark import SparkConf
 from pyspark import SparkContext
-from pyspark.sql import SQLContext, DataFrame
+from pyspark.sql import SparkSession, DataFrame
 
 # This could benefit from some tweaks especially if the database becomes larger
 conf = SparkConf()
@@ -96,7 +96,7 @@ def __init__(self, config_file = None):
 
         """
         sparkContext = SparkContext(conf = conf)
-        self.sqlContext = SQLContext(sparkContext)
+        self.sqlContext = SparkSession(sparkContext)
         self.tables = TableDict()
         self.summaries = {}
         if config_file:
@@ -888,7 +888,7 @@ def update_radar_data(self, gauge_table_name,  output_folder,
 
         for fn in job_files:
             logging.info('Submitting job {}'.format(fn))
-            #subprocess.call('sbatch {:s}'.format(fn), shell = True)
+            subprocess.call('sbatch {:s}'.format(fn), shell = True)
 
 
 def _compare_config(config1, config2, keys = None):

diff --git a/rainforest/database/retrieve_radar_data.py b/rainforest/database/retrieve_radar_data.py
@@ -423,8 +423,8 @@ def process_all_timesteps(self):
 
             logging.info('Processing timestep '+str(tstep))
             # Set t-start -5 minutes to get all the files between, e.g., H:01 and H:10 and log at H:10
-            tstart = datetime.datetime.utcfromtimestamp(float(tstep)) - datetime.timedelta(minutes=5)
-            tend= datetime.datetime.utcfromtimestamp(float(tstep))
+            tstart = datetime.datetime.fromtimestamp(float(tstep), tz=datetime.timezone.utc) - datetime.timedelta(minutes=5)
+            tend= datetime.datetime.fromtimestamp(float(tstep), tz=datetime.timezone.utc)
 
             stations_to_get = self.tasks[tstep]
             # Change to the timestep where the data is logged
@@ -502,7 +502,8 @@ def process_all_timesteps(self):
 
                         status_file = rad_files['status'][tstamp]
 
-                        radar = Radar(r, rad_files['radar'][tstamp], status_file, vpr_file)
+                        radar = Radar(r, rad_files['radar'][tstamp], status_file, vpr_file,
+                                    temp_ref=self.temp_ref)
 
                         # Add ISO0_HEIGHT and height_over_iso0 to radar object
                         if (self.temp_ref == "ISO0_HEIGHT") or ("ISO0_HEIGHT" in self.other_variables):

diff --git a/rainforest/performance/eval_get_estimates.py b/rainforest/performance/eval_get_estimates.py
@@ -213,12 +213,14 @@ def _get_data(self):
 
         # Check if the RF-models are there
         for model in self.modellist:
+            self.model_files = {}
             path = self.qpefolder+'{}'.format(model)
-            if len(os.listdir(path)) == 0 :
+            if not os.path.exists(path) or (len(os.listdir(path)) == 0):
+                logging.info('Extracting {} files from archive'.format(model))
                 try:
                     path = self.qpefolder+'{}'.format(model)
-                    self.ref_files[ref] = retrieve_prod(path + '/', self.tstart, 
-                                                                    self.tend, ref)
+                    self.model_files[model] = retrieve_prod(path + '/', self.tstart, 
+                                                                    self.tend, model)
                     logging.info('Model data: {} taken from file archive!'.format(model))
                 except:
                     logging.error('No QPE maps available for {}, please check path or produce QPE maps'.format(model))