From 290a5455498eb871243adfab7e9dcddf099a2194 Mon Sep 17 00:00:00 2001 From: Baptiste Vandecrux <35140661+BaptisteVandecrux@users.noreply.github.com> Date: Tue, 7 May 2024 13:54:34 +0200 Subject: [PATCH 01/22] Update persistence for lat/lon combined, new filter on gps_alt # 238 --- src/pypromice/process/L1toL2.py | 10 ++++++- src/pypromice/qc/persistence.py | 48 ++++++++++++++++++++++----------- 2 files changed, 42 insertions(+), 16 deletions(-) diff --git a/src/pypromice/process/L1toL2.py b/src/pypromice/process/L1toL2.py index d986f391..02962e32 100644 --- a/src/pypromice/process/L1toL2.py +++ b/src/pypromice/process/L1toL2.py @@ -66,12 +66,20 @@ def toL2( except Exception: logger.exception('Flagging and fixing failed:') + ds = persistence_qc(ds) # Flag and remove persistence outliers if ds.attrs['format'] == 'TX': - ds = persistence_qc(ds) # Flag and remove persistence outliers # TODO: The configuration should be provided explicitly outlier_detector = ThresholdBasedOutlierDetector.default() ds = outlier_detector.filter_data(ds) # Flag and remove percentile outliers + # filtering gps_lat, gps_lon and gps_alt based on the difference to a baseline elevation + # right now baseline elevation is gapfilled monthly median elevation + baseline_elevation = (ds.gps_alt.resample(time='M').median() + .interp(time=ds.time,method='nearest') + .ffill(dim='time').bfill(dim='time')) + mask = (np.abs(ds.gps_alt - baseline_elevation) < 100) & ds.gps_alt.notnull() + ds[['gps_alt','gps_lon', 'gps_lat']] = ds4[['gps_alt','gps_lon', 'gps_lat']].where(mask) + T_100 = _getTempK(T_0) ds['rh_u_cor'] = correctHumidity(ds['rh_u'], ds['t_u'], T_0, T_100, ews, ei0) diff --git a/src/pypromice/qc/persistence.py b/src/pypromice/qc/persistence.py index d2ea5ef3..1b78d59a 100644 --- a/src/pypromice/qc/persistence.py +++ b/src/pypromice/qc/persistence.py @@ -17,6 +17,8 @@ DEFAULT_VARIABLE_THRESHOLDS = { "t": {"max_diff": 0.0001, "period": 2}, "p": {"max_diff": 0.0001, "period": 2}, + 'gps_lat_lon':{"max_diff": 0.000001, "period": 12}, + 'gps_alt':{"max_diff": 0.0001, "period": 12}, # Relative humidity can be very stable around 100%. #"rh": {"max_diff": 0.0001, "period": 2}, } @@ -58,27 +60,43 @@ def persistence_qc( if variable_thresholds is None: variable_thresholds = DEFAULT_VARIABLE_THRESHOLDS - logger.debug(f"Running persistence_qc using {variable_thresholds}") + logger.info(f"Running persistence_qc using {variable_thresholds}") for k in variable_thresholds.keys(): - var_all = [ - k + "_u", - k + "_l", - k + "_i", - ] # apply to upper, lower boom, and instant + if k in ['t','p','rh','wspd','wdir', 'z_boom']: + var_all = [ + k + "_u", + k + "_l", + k + "_i", + ] # apply to upper, lower boom, and instant + else: + var_all = [k] max_diff = variable_thresholds[k]["max_diff"] # loading persistent limit period = variable_thresholds[k]["period"] # loading diff period for v in var_all: - if v in df: - mask = find_persistent_regions(df[v], period, max_diff) - n_masked = mask.sum() - n_samples = len(mask) - logger.debug( - f"Applying persistent QC in {v}. Filtering {n_masked}/{n_samples} samples" - ) - # setting outliers to NaN - df.loc[mask, v] = np.nan + if (v in df) | (v == 'gps_lat_lon'): + if v != 'gps_lat_lon': + mask = find_persistent_regions(df[v], period, max_diff) + n_masked = mask.sum() + n_samples = len(mask) + logger.info( + f"Applying persistent QC in {v}. Filtering {n_masked}/{n_samples} samples" + ) + # setting outliers to NaN + df.loc[mask, v] = np.nan + else: + mask = (find_persistent_regions(df['gps_lon'], period, max_diff) \ + & find_persistent_regions(df['gps_lat'], period, max_diff) ) + + n_masked = mask.sum() + n_samples = len(mask) + logger.info( + f"Applying persistent QC in {v}. Filtering {n_masked}/{n_samples} samples" + ) + # setting outliers to NaN + df.loc[mask, 'gps_lon'] = np.nan + df.loc[mask, 'gps_lat'] = np.nan # Back to xarray, and re-assign the original attrs ds_out = df.to_xarray() From 73774b137cc53cdbf58d030d45614a24b13c5d29 Mon Sep 17 00:00:00 2001 From: Baptiste Vandecrux <35140661+BaptisteVandecrux@users.noreply.github.com> Date: Tue, 7 May 2024 16:04:52 +0200 Subject: [PATCH 02/22] Update persistence.py #240 added t_trad to persistence --- src/pypromice/qc/persistence.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/pypromice/qc/persistence.py b/src/pypromice/qc/persistence.py index 1b78d59a..fbc048be 100644 --- a/src/pypromice/qc/persistence.py +++ b/src/pypromice/qc/persistence.py @@ -19,6 +19,7 @@ "p": {"max_diff": 0.0001, "period": 2}, 'gps_lat_lon':{"max_diff": 0.000001, "period": 12}, 'gps_alt':{"max_diff": 0.0001, "period": 12}, + 't_rad':{"max_diff": 0.0001, "period": 2}, # Relative humidity can be very stable around 100%. #"rh": {"max_diff": 0.0001, "period": 2}, } From 499e64882cc91d9bd27877dd6f26e299c342ae71 Mon Sep 17 00:00:00 2001 From: Baptiste Vandecrux <35140661+BaptisteVandecrux@users.noreply.github.com> Date: Tue, 7 May 2024 16:06:39 +0200 Subject: [PATCH 03/22] recalculate rh after averaging p_vap and es #193 --- src/pypromice/process/L1toL2.py | 9 +++-- src/pypromice/process/L2toL3.py | 4 +-- src/pypromice/process/aws.py | 58 +++++++++++++++++++++++++++++++++ 3 files changed, 67 insertions(+), 4 deletions(-) diff --git a/src/pypromice/process/L1toL2.py b/src/pypromice/process/L1toL2.py index 02962e32..b9aa0449 100644 --- a/src/pypromice/process/L1toL2.py +++ b/src/pypromice/process/L1toL2.py @@ -78,8 +78,13 @@ def toL2( .interp(time=ds.time,method='nearest') .ffill(dim='time').bfill(dim='time')) mask = (np.abs(ds.gps_alt - baseline_elevation) < 100) & ds.gps_alt.notnull() - ds[['gps_alt','gps_lon', 'gps_lat']] = ds4[['gps_alt','gps_lon', 'gps_lat']].where(mask) - + ds[['gps_alt','gps_lon', 'gps_lat']] = ds[['gps_alt','gps_lon', 'gps_lat']].where(mask) + + # removing dlr and ulr that are missing t_rad + # this is done now becasue t_rad can be filtered either manually or with persistence + ds['dlr'] = ds.dlr.where(ds.t_rad.notnull()) + ds['ulr'] = ds.ulr.where(ds.t_rad.notnull()) + T_100 = _getTempK(T_0) ds['rh_u_cor'] = correctHumidity(ds['rh_u'], ds['t_u'], T_0, T_100, ews, ei0) diff --git a/src/pypromice/process/L2toL3.py b/src/pypromice/process/L2toL3.py index 9b8a4cee..a71a4028 100755 --- a/src/pypromice/process/L2toL3.py +++ b/src/pypromice/process/L2toL3.py @@ -161,7 +161,7 @@ def calcHeatFlux(T_0, T_h, Tsurf_h, rho_atm, WS_h, z_WS, z_T, nu, q_h, p_h, es_0 : int Saturation vapour pressure at the melting point (hPa). Default is 6.1071. eps : int - Default is 0.622. + Ratio of molar masses of vapor and dry air (0.622). gamma : int Flux profile correction (Paulson & Dyer). Default is 16.. L_sub : int @@ -313,7 +313,7 @@ def calcHumid(T_0, T_100, T_h, es_0, es_100, eps, p_h, RH_cor_h): T_h : xarray.DataArray Air temperature eps : int - DESCRIPTION + ratio of molar masses of vapor and dry air (0.622) es_0 : float Saturation vapour pressure at the melting point (hPa) es_100 : float diff --git a/src/pypromice/process/aws.py b/src/pypromice/process/aws.py index 7fa1e3de..1455d96b 100644 --- a/src/pypromice/process/aws.py +++ b/src/pypromice/process/aws.py @@ -734,6 +734,7 @@ def resampleL3(ds_h, t): L3 AWS hourly dataset ''' df_d = ds_h.to_dataframe().resample(t).mean() + # recalculating wind direction from averaged directional wind speeds for var in ['wdir_u','wdir_l','wdir_i']: if var in df_d.columns: @@ -742,12 +743,69 @@ def resampleL3(ds_h, t): df_d['wspd_y_'+var.split('_')[1]]) else: logger.info(var,'in dataframe but not','wspd_x_'+var.split('_')[1],'wspd_x_'+var.split('_')[1]) + + # recalculating relative humidity from average vapour pressure and average + # saturation vapor pressure + for var in ['rh_u','rh_l']: + lvl = var.split('_')[1] + if var in df_d.columns: + if ('t_'+lvl in ds_h.keys()): + es_wtr, es_cor = calculateSaturationVaporPressure(ds_h['t_'+lvl]) + p_vap = ds_h[var] / 100 * es_wtr + p_vap_cor = ds_h[var+'_cor'] / 100 * es_cor + + df_d[var] = (p_vap.to_dataframe(name='p_vap').p_vap.resample(t).mean() \ + / es_wtr.to_dataframe(name='es_wtr').es_wtr.resample(t).mean())*100 + df_d[var+'_cor'] = (p_vap_cor.to_dataframe(name='p_vap_cor').p_vap_cor.resample(t).mean() \ + / es_cor.to_dataframe(name='es_cor').es_cor.resample(t).mean())*100 + vals = [xr.DataArray(data=df_d[c], dims=['time'], coords={'time':df_d.index}, attrs=ds_h[c].attrs) for c in df_d.columns] ds_d = xr.Dataset(dict(zip(df_d.columns,vals)), attrs=ds_h.attrs) return ds_d +def calculateSaturationVaporPressure(t, T_0=273.15, T_100=373.15, es_0=6.1071, + es_100=1013.246, eps=0.622): + '''Calculate specific humidity + + Parameters + ---------- + T_0 : float + Steam point temperature. Default is 273.15. + T_100 : float + Steam point temperature in Kelvin + t : xarray.DataArray + Air temperature + es_0 : float + Saturation vapour pressure at the melting point (hPa) + es_100 : float + Saturation vapour pressure at steam point temperature (hPa) + + Returns + ------- + xarray.DataArray + Specific humidity data array + ''' + # Saturation vapour pressure above 0 C (hPa) + es_wtr = 10**(-7.90298 * (T_100 / (t + T_0) - 1) + 5.02808 * np.log10(T_100 / (t + T_0)) + - 1.3816E-7 * (10**(11.344 * (1 - (t + T_0) / T_100)) - 1) + + 8.1328E-3 * (10**(-3.49149 * (T_100 / (t + T_0) -1)) - 1) + np.log10(es_100)) + + # Saturation vapour pressure below 0 C (hPa) + es_ice = 10**(-9.09718 * (T_0 / (t + T_0) - 1) - 3.56654 + * np.log10(T_0 / (t + T_0)) + 0.876793 + * (1 - (t + T_0) / T_0) + + np.log10(es_0)) + + # Saturation vapour pressure (hPa) + es_cor = es_wtr + freezing = t < 0 + es_cor[freezing] = es_ice[freezing] + + return es_wtr, es_cor + + def _calcWindDir(wspd_x, wspd_y): '''Calculate wind direction in degrees From a5ee3a4c58a9ac59ddde88d6f2a769b16d709d78 Mon Sep 17 00:00:00 2001 From: Baptiste Vandecrux <35140661+BaptisteVandecrux@users.noreply.github.com> Date: Tue, 7 May 2024 18:54:43 +0200 Subject: [PATCH 04/22] Update L1toL2.py --- src/pypromice/process/L1toL2.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/pypromice/process/L1toL2.py b/src/pypromice/process/L1toL2.py index b9aa0449..85b7ef55 100644 --- a/src/pypromice/process/L1toL2.py +++ b/src/pypromice/process/L1toL2.py @@ -74,9 +74,9 @@ def toL2( # filtering gps_lat, gps_lon and gps_alt based on the difference to a baseline elevation # right now baseline elevation is gapfilled monthly median elevation - baseline_elevation = (ds.gps_alt.resample(time='M').median() - .interp(time=ds.time,method='nearest') - .ffill(dim='time').bfill(dim='time')) + baseline_elevation = (ds.gps_alt.to_series().resample('M').median() + .interpolate(method='nearest') + .ffill().bfill() mask = (np.abs(ds.gps_alt - baseline_elevation) < 100) & ds.gps_alt.notnull() ds[['gps_alt','gps_lon', 'gps_lat']] = ds[['gps_alt','gps_lon', 'gps_lat']].where(mask) From d794872bf6eb3048ad6ce121b8e9c6feb9d45789 Mon Sep 17 00:00:00 2001 From: Baptiste Vandecrux <35140661+BaptisteVandecrux@users.noreply.github.com> Date: Tue, 7 May 2024 19:00:33 +0200 Subject: [PATCH 05/22] Update L1toL2.py --- src/pypromice/process/L1toL2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pypromice/process/L1toL2.py b/src/pypromice/process/L1toL2.py index 85b7ef55..d4ee0b69 100644 --- a/src/pypromice/process/L1toL2.py +++ b/src/pypromice/process/L1toL2.py @@ -76,7 +76,7 @@ def toL2( # right now baseline elevation is gapfilled monthly median elevation baseline_elevation = (ds.gps_alt.to_series().resample('M').median() .interpolate(method='nearest') - .ffill().bfill() + .ffill().bfill()) mask = (np.abs(ds.gps_alt - baseline_elevation) < 100) & ds.gps_alt.notnull() ds[['gps_alt','gps_lon', 'gps_lat']] = ds[['gps_alt','gps_lon', 'gps_lat']].where(mask) From 91c7a90df9c7ec67c6da6788d93b1b788cee9d08 Mon Sep 17 00:00:00 2001 From: Baptiste Vandecrux <35140661+BaptisteVandecrux@users.noreply.github.com> Date: Tue, 7 May 2024 22:01:10 +0200 Subject: [PATCH 06/22] Update L1toL2.py --- src/pypromice/process/L1toL2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pypromice/process/L1toL2.py b/src/pypromice/process/L1toL2.py index d4ee0b69..edc729e6 100644 --- a/src/pypromice/process/L1toL2.py +++ b/src/pypromice/process/L1toL2.py @@ -75,7 +75,7 @@ def toL2( # filtering gps_lat, gps_lon and gps_alt based on the difference to a baseline elevation # right now baseline elevation is gapfilled monthly median elevation baseline_elevation = (ds.gps_alt.to_series().resample('M').median() - .interpolate(method='nearest') + .reindex(ds.time.to_series().index, method='nearest') .ffill().bfill()) mask = (np.abs(ds.gps_alt - baseline_elevation) < 100) & ds.gps_alt.notnull() ds[['gps_alt','gps_lon', 'gps_lat']] = ds[['gps_alt','gps_lon', 'gps_lat']].where(mask) From de1af0e4f3677317679cb287455cee9a8a7addc2 Mon Sep 17 00:00:00 2001 From: Baptiste Vandecrux <35140661+BaptisteVandecrux@users.noreply.github.com> Date: Wed, 8 May 2024 11:16:52 +0200 Subject: [PATCH 07/22] update doc in aws.py --- src/pypromice/process/aws.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/pypromice/process/aws.py b/src/pypromice/process/aws.py index 1455d96b..c3c86fda 100644 --- a/src/pypromice/process/aws.py +++ b/src/pypromice/process/aws.py @@ -723,7 +723,7 @@ def resampleL3(ds_h, t): Parameters ---------- ds_h : xarray.Dataset - L3 AWS daily dataset + L3 AWS dataset either at 10 min (for raw data) or hourly (for tx data) t : str Resample factor, same variable definition as in pandas.DataFrame.resample() @@ -731,7 +731,7 @@ def resampleL3(ds_h, t): Returns ------- ds_d : xarray.Dataset - L3 AWS hourly dataset + L3 AWS dataset resampled to the frequency defined by t ''' df_d = ds_h.to_dataframe().resample(t).mean() From 04aca15670556fc4a2f8a164bb71f0b4fd824c73 Mon Sep 17 00:00:00 2001 From: Baptiste Vandecrux <35140661+BaptisteVandecrux@users.noreply.github.com> Date: Wed, 8 May 2024 11:47:04 +0200 Subject: [PATCH 08/22] simplifying rh resample - p_vap and p_vap_cor are the same - use .to_series() insterad of .to_dataframe() --- src/pypromice/process/aws.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/pypromice/process/aws.py b/src/pypromice/process/aws.py index c3c86fda..1a186468 100644 --- a/src/pypromice/process/aws.py +++ b/src/pypromice/process/aws.py @@ -752,12 +752,11 @@ def resampleL3(ds_h, t): if ('t_'+lvl in ds_h.keys()): es_wtr, es_cor = calculateSaturationVaporPressure(ds_h['t_'+lvl]) p_vap = ds_h[var] / 100 * es_wtr - p_vap_cor = ds_h[var+'_cor'] / 100 * es_cor - df_d[var] = (p_vap.to_dataframe(name='p_vap').p_vap.resample(t).mean() \ - / es_wtr.to_dataframe(name='es_wtr').es_wtr.resample(t).mean())*100 - df_d[var+'_cor'] = (p_vap_cor.to_dataframe(name='p_vap_cor').p_vap_cor.resample(t).mean() \ - / es_cor.to_dataframe(name='es_cor').es_cor.resample(t).mean())*100 + df_d[var] = (p_vap.to_series().resample(t).mean() \ + / es_wtr.to_series().resample(t).mean())*100 + df_d[var+'_cor'] = (p_vap.to_series().resample(t).mean() \ + / es_cor.to_series().resample(t).mean())*100 vals = [xr.DataArray(data=df_d[c], dims=['time'], coords={'time':df_d.index}, attrs=ds_h[c].attrs) for c in df_d.columns] From ac2089609f60d0170d0e3fe92eac1b72d535d387 Mon Sep 17 00:00:00 2001 From: Baptiste Vandecrux <35140661+BaptisteVandecrux@users.noreply.github.com> Date: Wed, 8 May 2024 14:19:18 +0200 Subject: [PATCH 09/22] Update thresholds.csv FRE station had wrongly defined limits Now set manually --- src/pypromice/qc/percentiles/thresholds.csv | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/pypromice/qc/percentiles/thresholds.csv b/src/pypromice/qc/percentiles/thresholds.csv index 9bf50727..6152165c 100644 --- a/src/pypromice/qc/percentiles/thresholds.csv +++ b/src/pypromice/qc/percentiles/thresholds.csv @@ -122,8 +122,8 @@ JAR,p_[ul],881.00,929.00, JAR,p_i,-119.00,-71.00, JAR,wspd_[uli],-11.62,29.82, JAR,t_[uli],-14.60,13.30,summer -FRE,p_[ul],638.31,799.82, -FRE,p_i,-361.69,-200.18, +FRE,p_[ul],800,1000, +FRE,p_i,-200,0, FRE,wspd_[uli],-12.00,22.62, FRE,t_[uli],-38.20,10.02,winter FRE,t_[uli],-36.55,18.07,spring From 11ba26e0b59075253f9823d7ed482a768799d58f Mon Sep 17 00:00:00 2001 From: Baptiste Vandecrux <35140661+BaptisteVandecrux@users.noreply.github.com> Date: Wed, 8 May 2024 15:33:08 +0200 Subject: [PATCH 10/22] Removing percentile QC as of #242 --- src/pypromice/process/L1toL2.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/pypromice/process/L1toL2.py b/src/pypromice/process/L1toL2.py index edc729e6..048de6af 100644 --- a/src/pypromice/process/L1toL2.py +++ b/src/pypromice/process/L1toL2.py @@ -67,10 +67,10 @@ def toL2( logger.exception('Flagging and fixing failed:') ds = persistence_qc(ds) # Flag and remove persistence outliers - if ds.attrs['format'] == 'TX': - # TODO: The configuration should be provided explicitly - outlier_detector = ThresholdBasedOutlierDetector.default() - ds = outlier_detector.filter_data(ds) # Flag and remove percentile outliers + # if ds.attrs['format'] == 'TX': + # # TODO: The configuration should be provided explicitly + # outlier_detector = ThresholdBasedOutlierDetector.default() + # ds = outlier_detector.filter_data(ds) # Flag and remove percentile outliers # filtering gps_lat, gps_lon and gps_alt based on the difference to a baseline elevation # right now baseline elevation is gapfilled monthly median elevation From 956bc88d57a4f89dbb2fa182dd6efb8d5ed82255 Mon Sep 17 00:00:00 2001 From: Baptiste Vandecrux <35140661+BaptisteVandecrux@users.noreply.github.com> Date: Wed, 8 May 2024 16:00:02 +0200 Subject: [PATCH 11/22] adding wspd and rh to persistence --- src/pypromice/qc/persistence.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/pypromice/qc/persistence.py b/src/pypromice/qc/persistence.py index fbc048be..273b523a 100644 --- a/src/pypromice/qc/persistence.py +++ b/src/pypromice/qc/persistence.py @@ -17,11 +17,11 @@ DEFAULT_VARIABLE_THRESHOLDS = { "t": {"max_diff": 0.0001, "period": 2}, "p": {"max_diff": 0.0001, "period": 2}, - 'gps_lat_lon':{"max_diff": 0.000001, "period": 12}, + 'gps_lat_lon':{"max_diff": 0.000001, "period": 12}, # gets special handling to remove simultaneously constant gps_lat and gps_lon 'gps_alt':{"max_diff": 0.0001, "period": 12}, 't_rad':{"max_diff": 0.0001, "period": 2}, - # Relative humidity can be very stable around 100%. - #"rh": {"max_diff": 0.0001, "period": 2}, + "rh": {"max_diff": 0.0001, "period": 2}, # gets special handling to allow constant 100% + "wspd": {"max_diff": 0.0001, "period": 6}, } @@ -79,6 +79,8 @@ def persistence_qc( if (v in df) | (v == 'gps_lat_lon'): if v != 'gps_lat_lon': mask = find_persistent_regions(df[v], period, max_diff) + if 'rh' in v: + mask = mask & (df[v]<99) n_masked = mask.sum() n_samples = len(mask) logger.info( From 2cb6efd09b03db5e40a0a1c7591a6a3517436e25 Mon Sep 17 00:00:00 2001 From: Baptiste Vandecrux <35140661+BaptisteVandecrux@users.noreply.github.com> Date: Wed, 8 May 2024 17:48:56 +0200 Subject: [PATCH 12/22] Update variables.csv removing msg_i --- src/pypromice/process/variables.csv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pypromice/process/variables.csv b/src/pypromice/process/variables.csv index 7ecc27d7..36bafb14 100644 --- a/src/pypromice/process/variables.csv +++ b/src/pypromice/process/variables.csv @@ -89,4 +89,4 @@ wspd_i,wind_speed,Wind speed (instantaneous),m s-1,0,100,wdir_i wspd_x_i wspd_y_ wdir_i,wind_from_direction,Wind from direction (instantaneous),degrees,1,360,wspd_x_i wspd_y_i,all,all,4,physicalMeasurement,time lat lon alt,True,For meteorological observations wspd_x_i,wind_speed_from_x_direction,Wind speed from x direction (instantaneous),m s-1,-100,100,wdir_i wspd_i,all,all,4,modelResult,time lat lon alt,True,For meteorological observations wspd_y_i,wind_speed_from_y_direction,Wind speed from y direction (instantaneous),m s-1,-100,100,wdir_i wspd_i,all,all,4,modelResult,time lat lon alt,True,For meteorological observations -msg_i,message,Message string (instantaneous),-,,,,all,all,,qualityInformation,time lat lon alt,True,For meteorological observations +msg_i,message,Message string (instantaneous),-,,,,all,all,,qualityInformation,time lat lon alt,True,L0 only From 473869cef943a98b4e3ca2fbd458d78af61e3d6d Mon Sep 17 00:00:00 2001 From: Baptiste Vandecrux <35140661+BaptisteVandecrux@users.noreply.github.com> Date: Wed, 8 May 2024 18:44:17 +0200 Subject: [PATCH 13/22] better handling of subrfreezing conditions in es calculation --- src/pypromice/process/L1toL2.py | 5 +---- src/pypromice/process/aws.py | 4 +--- 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/src/pypromice/process/L1toL2.py b/src/pypromice/process/L1toL2.py index 048de6af..b52e0b55 100644 --- a/src/pypromice/process/L1toL2.py +++ b/src/pypromice/process/L1toL2.py @@ -331,11 +331,8 @@ def correctHumidity(rh, T, T_0, T_100, ews, ei0): #TODO f + 0.876793 * (1 - (T + T_0) / T_0) + np.log10(ei0)) - # Define freezing point. Why > -100? - freezing = (T < 0) & (T > -100).values - # Set to Groff & Gratch values when freezing, otherwise just rh - rh_cor = rh.where(~freezing, other = rh*(e_s_wtr / e_s_ice)) + rh_cor = xr.where(T>=0, rh, rh*(e_s_wtr / e_s_ice)) rh_cor = rh_cor.where(T.notnull()) return rh_cor diff --git a/src/pypromice/process/aws.py b/src/pypromice/process/aws.py index 1a186468..35f4e4bd 100644 --- a/src/pypromice/process/aws.py +++ b/src/pypromice/process/aws.py @@ -798,9 +798,7 @@ def calculateSaturationVaporPressure(t, T_0=273.15, T_100=373.15, es_0=6.1071, + np.log10(es_0)) # Saturation vapour pressure (hPa) - es_cor = es_wtr - freezing = t < 0 - es_cor[freezing] = es_ice[freezing] + es_cor = xr.where(t < 0, es_ice, es_wtr) return es_wtr, es_cor From 95430c1db869b70f6c3da203e7e51cdc3879257b Mon Sep 17 00:00:00 2001 From: BaptisteVandecrux Date: Thu, 9 May 2024 10:58:22 +0200 Subject: [PATCH 14/22] bug fix in usr_cor dsr_cor --- src/pypromice/process/L1toL2.py | 13 ++++++++----- src/pypromice/process/aws.py | 2 +- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/src/pypromice/process/L1toL2.py b/src/pypromice/process/L1toL2.py index b52e0b55..c000150e 100644 --- a/src/pypromice/process/L1toL2.py +++ b/src/pypromice/process/L1toL2.py @@ -132,6 +132,7 @@ def toL2( theta_sensor_rad, HourAngle_rad, ZenithAngle_rad, ZenithAngle_deg, lat, DifFrac, deg2rad) + # CorFac_all = xr.where(cc.notnull(), CorFac_all, 1) ds['dsr_cor'] = ds['dsr'].copy(deep=True) * CorFac_all # Apply correction AngleDif_deg = calcAngleDiff(ZenithAngle_rad, HourAngle_rad, # Calculate angle between sun and sensor @@ -158,9 +159,9 @@ def toL2( TOA_crit_nopass = (ds['dsr_cor'] > (0.9 * isr_toa + 10)) # Determine filter ds['dsr_cor'][TOA_crit_nopass] = np.nan # Apply filter and interpolate ds['usr_cor'][TOA_crit_nopass] = np.nan - ds['dsr_cor'] = ds['dsr_cor'].interpolate_na(dim='time', use_coordinate=False) - ds['usr_cor'] = ds['usr_cor'].interpolate_na(dim='time', use_coordinate=False) - + + ds['dsr_cor'] = ds.dsr_cor.where(ds.dsr.notnull()) + ds['usr_cor'] = ds.usr_cor.where(ds.usr.notnull()) # # Check sun position # sundown = ZenithAngle_deg >= 90 # _checkSunPos(ds, OKalbedos, sundown, sunonlowerdome, TOA_crit_nopass) @@ -331,9 +332,11 @@ def correctHumidity(rh, T, T_0, T_100, ews, ei0): #TODO f + 0.876793 * (1 - (T + T_0) / T_0) + np.log10(ei0)) + # Define freezing point. Why > -100? + freezing = (T < 0) & (T > -100).values + # Set to Groff & Gratch values when freezing, otherwise just rh - rh_cor = xr.where(T>=0, rh, rh*(e_s_wtr / e_s_ice)) - rh_cor = rh_cor.where(T.notnull()) + rh_cor = rh.where(~freezing, other = rh*(e_s_wtr / e_s_ice)) return rh_cor diff --git a/src/pypromice/process/aws.py b/src/pypromice/process/aws.py index 35f4e4bd..91521a0f 100644 --- a/src/pypromice/process/aws.py +++ b/src/pypromice/process/aws.py @@ -224,7 +224,7 @@ def loadL0(self): except pd.errors.ParserError as e: # ParserError: Too many columns specified: expected 40 and found 38 - logger.info(f'-----> No msg_lat or msg_lon for {k}') + # logger.info(f'-----> No msg_lat or msg_lon for {k}') for item in ['msg_lat', 'msg_lon']: target['columns'].remove(item) # Also removes from self.config ds_list.append(self.readL0file(target)) From 5fe6e9bf4337dcd0e6c43565f887da9a4595b35d Mon Sep 17 00:00:00 2001 From: BaptisteVandecrux Date: Thu, 9 May 2024 13:46:27 +0200 Subject: [PATCH 15/22] smoothing and gap-filling of tilt --- src/pypromice/process/L1toL2.py | 44 ++++++++++++++++++++++++++------- 1 file changed, 35 insertions(+), 9 deletions(-) diff --git a/src/pypromice/process/L1toL2.py b/src/pypromice/process/L1toL2.py index c000150e..8a151d93 100644 --- a/src/pypromice/process/L1toL2.py +++ b/src/pypromice/process/L1toL2.py @@ -90,13 +90,15 @@ def toL2( T_0, T_100, ews, ei0) # Determiune cloud cover for on-ice stations - if not ds.attrs['bedrock']: - cc = calcCloudCoverage(ds['t_u'], T_0, eps_overcast, eps_clear, # Calculate cloud coverage - ds['dlr'], ds.attrs['station_id']) - ds['cc'] = (('time'), cc.data) - else: - # Default cloud cover for bedrock station for which tilt should be 0 anyway. - cc = 0.8 + cc = calcCloudCoverage(ds['t_u'], T_0, eps_overcast, eps_clear, # Calculate cloud coverage + ds['dlr'], ds.attrs['station_id']) + ds['cc'] = (('time'), cc.data) + + # removed by bav: station on bedrock can tilt + # anyways cc cannot be either a dataArray and a float + # if not ds.attrs['bedrock']: + # # Default cloud cover for bedrock station for which tilt should be 0 anyway. + # cc = 0.8 # Determine surface temperature ds['t_surf'] = calcSurfaceTemperature(T_0, ds['ulr'], ds['dlr'], # Calculate surface temperature @@ -116,6 +118,29 @@ def toL2( lat = ds['gps_lat'].mean() lon = ds['gps_lon'].mean() + # smoothing tilt + for v in ['tilt_x','tilt_y']: + threshold = 0.2 + + ds[v] = (ds[v].where( + (ds[v].resample(time='H').median().rolling( + time= 3*24, center=True, min_periods=2 + ).std().reindex(time=ds.time, method='bfill')) Date: Thu, 9 May 2024 14:47:17 +0200 Subject: [PATCH 16/22] using pandas resample instead of xarray's --- src/pypromice/process/L1toL2.py | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/src/pypromice/process/L1toL2.py b/src/pypromice/process/L1toL2.py index 8a151d93..975d3a3d 100644 --- a/src/pypromice/process/L1toL2.py +++ b/src/pypromice/process/L1toL2.py @@ -122,24 +122,23 @@ def toL2( for v in ['tilt_x','tilt_y']: threshold = 0.2 - ds[v] = (ds[v].where( - (ds[v].resample(time='H').median().rolling( - time= 3*24, center=True, min_periods=2 - ).std().reindex(time=ds.time, method='bfill')) Date: Thu, 9 May 2024 19:07:37 +0200 Subject: [PATCH 17/22] Removing msg_i from output file --- src/pypromice/process/variables.csv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pypromice/process/variables.csv b/src/pypromice/process/variables.csv index 36bafb14..2960259b 100644 --- a/src/pypromice/process/variables.csv +++ b/src/pypromice/process/variables.csv @@ -89,4 +89,4 @@ wspd_i,wind_speed,Wind speed (instantaneous),m s-1,0,100,wdir_i wspd_x_i wspd_y_ wdir_i,wind_from_direction,Wind from direction (instantaneous),degrees,1,360,wspd_x_i wspd_y_i,all,all,4,physicalMeasurement,time lat lon alt,True,For meteorological observations wspd_x_i,wind_speed_from_x_direction,Wind speed from x direction (instantaneous),m s-1,-100,100,wdir_i wspd_i,all,all,4,modelResult,time lat lon alt,True,For meteorological observations wspd_y_i,wind_speed_from_y_direction,Wind speed from y direction (instantaneous),m s-1,-100,100,wdir_i wspd_i,all,all,4,modelResult,time lat lon alt,True,For meteorological observations -msg_i,message,Message string (instantaneous),-,,,,all,all,,qualityInformation,time lat lon alt,True,L0 only +msg_i,message,Message string (instantaneous),-,,,,all,,,qualityInformation,time lat lon alt,True,L0 only From eee1cbde9898adbc589c89c6775edb283cf3caeb Mon Sep 17 00:00:00 2001 From: Baptiste Vandecrux <35140661+BaptisteVandecrux@users.noreply.github.com> Date: Mon, 13 May 2024 09:43:37 +0200 Subject: [PATCH 18/22] length of persistent periods now in hours --- src/pypromice/qc/persistence.py | 82 +++++++++++++++++++++++---------- 1 file changed, 57 insertions(+), 25 deletions(-) diff --git a/src/pypromice/qc/persistence.py b/src/pypromice/qc/persistence.py index 273b523a..de541ca5 100644 --- a/src/pypromice/qc/persistence.py +++ b/src/pypromice/qc/persistence.py @@ -76,30 +76,31 @@ def persistence_qc( period = variable_thresholds[k]["period"] # loading diff period for v in var_all: - if (v in df) | (v == 'gps_lat_lon'): - if v != 'gps_lat_lon': - mask = find_persistent_regions(df[v], period, max_diff) - if 'rh' in v: - mask = mask & (df[v]<99) - n_masked = mask.sum() - n_samples = len(mask) - logger.info( - f"Applying persistent QC in {v}. Filtering {n_masked}/{n_samples} samples" - ) - # setting outliers to NaN - df.loc[mask, v] = np.nan - else: - mask = (find_persistent_regions(df['gps_lon'], period, max_diff) \ - & find_persistent_regions(df['gps_lat'], period, max_diff) ) - - n_masked = mask.sum() - n_samples = len(mask) - logger.info( - f"Applying persistent QC in {v}. Filtering {n_masked}/{n_samples} samples" - ) - # setting outliers to NaN - df.loc[mask, 'gps_lon'] = np.nan - df.loc[mask, 'gps_lat'] = np.nan + if v in df: + mask = find_persistent_regions(df[v], period, max_diff) + if 'rh' in v: + mask = mask & (df[v]<99) + n_masked = mask.sum() + n_samples = len(mask) + logger.info( + f"Applying persistent QC in {v}. Filtering {n_masked}/{n_samples} samples" + ) + # setting outliers to NaN + df.loc[mask, v] = np.nan + elif v == 'gps_lat_lon': + mask = ( + find_persistent_regions(df['gps_lon'], period, max_diff) + & find_persistent_regions(df['gps_lat'], period, max_diff) + ) + + n_masked = mask.sum() + n_samples = len(mask) + logger.info( + f"Applying persistent QC in {v}. Filtering {n_masked}/{n_samples} samples" + ) + # setting outliers to NaN + df.loc[mask, 'gps_lon'] = np.nan + df.loc[mask, 'gps_lat'] = np.nan # Back to xarray, and re-assign the original attrs ds_out = df.to_xarray() @@ -131,7 +132,8 @@ def count_consecutive_persistent_values( ) -> pd.Series: diff = data.ffill().diff().abs() # forward filling all NaNs! mask: pd.Series = diff < max_diff - return count_consecutive_true(mask) + # return count_consecutive_true(mask) + return duration_consecutive_true(mask) def count_consecutive_true( @@ -161,3 +163,33 @@ def count_consecutive_true( is_first = series.astype("int").diff() == 1 offset = (is_first * cumsum).replace(0, np.nan).fillna(method="ffill").fillna(0) return ((cumsum - offset + 1) * series).astype("int") + + +def duration_consecutive_true( + series: Union[pd.Series, pd.DataFrame] +) -> Union[pd.Series, pd.DataFrame]: + """ + Froma a boolean series, calculates the duration, in hours, of the periods with connective true values. + + Examples + -------- + >>> duration_consecutive_true(pd.Series([False, True, False, False, True, True, True, False, True])) + pd.Series([0, 1, 0, 0, 1, 2, 3, 0, 1]) + + Parameters + ---------- + series + Boolean pandas Series or DataFrame + + Returns + ------- + consecutive_true_duration + Integer pandas Series or DataFrame with values representing the number of connective true values. + + """ + # assert series.dtype == bool + cumsum = ((series.index - series.index[0]).total_seconds()/3600).to_series(index=series.index) + is_first = series.astype("int").diff() == 1 + offset = (is_first * cumsum).replace(0, np.nan).fillna(method="ffill").fillna(0) + + return (cumsum - offset) * series From a2b28cd06c3296810053f29bbcbd9567c56da02f Mon Sep 17 00:00:00 2001 From: Baptiste Vandecrux <35140661+BaptisteVandecrux@users.noreply.github.com> Date: Mon, 13 May 2024 10:25:30 +0200 Subject: [PATCH 19/22] minor edits from PR#241 - better doc for calculateSaturationVaporPressure - default threshold values converted to hours in persistence_qc - removed unecessary comment regarding cloud coverage calculation for bedrock stations --- src/pypromice/process/L1toL2.py | 6 ------ src/pypromice/process/aws.py | 4 +++- src/pypromice/qc/persistence.py | 13 +++++++------ 3 files changed, 10 insertions(+), 13 deletions(-) diff --git a/src/pypromice/process/L1toL2.py b/src/pypromice/process/L1toL2.py index 975d3a3d..7c0d5344 100644 --- a/src/pypromice/process/L1toL2.py +++ b/src/pypromice/process/L1toL2.py @@ -94,12 +94,6 @@ def toL2( ds['dlr'], ds.attrs['station_id']) ds['cc'] = (('time'), cc.data) - # removed by bav: station on bedrock can tilt - # anyways cc cannot be either a dataArray and a float - # if not ds.attrs['bedrock']: - # # Default cloud cover for bedrock station for which tilt should be 0 anyway. - # cc = 0.8 - # Determine surface temperature ds['t_surf'] = calcSurfaceTemperature(T_0, ds['ulr'], ds['dlr'], # Calculate surface temperature emissivity) diff --git a/src/pypromice/process/aws.py b/src/pypromice/process/aws.py index 91521a0f..58440595 100644 --- a/src/pypromice/process/aws.py +++ b/src/pypromice/process/aws.py @@ -784,7 +784,9 @@ def calculateSaturationVaporPressure(t, T_0=273.15, T_100=373.15, es_0=6.1071, Returns ------- xarray.DataArray - Specific humidity data array + Saturation vapour pressure with regard to water above 0 C (hPa) + xarray.DataArray + Saturation vapour pressure where subfreezing timestamps are with regards to ice (hPa) ''' # Saturation vapour pressure above 0 C (hPa) es_wtr = 10**(-7.90298 * (T_100 / (t + T_0) - 1) + 5.02808 * np.log10(T_100 / (t + T_0)) diff --git a/src/pypromice/qc/persistence.py b/src/pypromice/qc/persistence.py index de541ca5..e0a41830 100644 --- a/src/pypromice/qc/persistence.py +++ b/src/pypromice/qc/persistence.py @@ -14,13 +14,14 @@ logger = logging.getLogger(__name__) +# period is given in hours, 2 persistent 10 min values will be flagged if period < 0.333 DEFAULT_VARIABLE_THRESHOLDS = { - "t": {"max_diff": 0.0001, "period": 2}, - "p": {"max_diff": 0.0001, "period": 2}, - 'gps_lat_lon':{"max_diff": 0.000001, "period": 12}, # gets special handling to remove simultaneously constant gps_lat and gps_lon - 'gps_alt':{"max_diff": 0.0001, "period": 12}, - 't_rad':{"max_diff": 0.0001, "period": 2}, - "rh": {"max_diff": 0.0001, "period": 2}, # gets special handling to allow constant 100% + "t": {"max_diff": 0.0001, "period": 0.3}, + "p": {"max_diff": 0.0001, "period": 0.3}, + 'gps_lat_lon':{"max_diff": 0.000001, "period": 6}, # gets special handling to remove simultaneously constant gps_lat and gps_lon + 'gps_alt':{"max_diff": 0.0001, "period": 6}, + 't_rad':{"max_diff": 0.0001, "period": 0.3}, + "rh": {"max_diff": 0.0001, "period": 0.3}, # gets special handling to allow constant 100% "wspd": {"max_diff": 0.0001, "period": 6}, } From 705de5f28d4d9d9276da2d7a373a7b12f7bb4931 Mon Sep 17 00:00:00 2001 From: Baptiste Vandecrux <35140661+BaptisteVandecrux@users.noreply.github.com> Date: Mon, 13 May 2024 11:13:13 +0200 Subject: [PATCH 20/22] moved smoothTilt and smoothRot to separate function --- src/pypromice/process/L1toL2.py | 86 ++++++++++++++++++++++++--------- 1 file changed, 64 insertions(+), 22 deletions(-) diff --git a/src/pypromice/process/L1toL2.py b/src/pypromice/process/L1toL2.py index 7c0d5344..9b74f3ef 100644 --- a/src/pypromice/process/L1toL2.py +++ b/src/pypromice/process/L1toL2.py @@ -111,28 +111,11 @@ def toL2( else: lat = ds['gps_lat'].mean() lon = ds['gps_lon'].mean() - - # smoothing tilt - for v in ['tilt_x','tilt_y']: - threshold = 0.2 - - ds[v] = ds[v].where( - ds[v].to_series().resample('H').median().rolling( - 3*24, center=True, min_periods=2 - ).std().reindex(ds.time, method='bfill').values Date: Mon, 13 May 2024 12:48:16 +0200 Subject: [PATCH 21/22] removing count_consecutive_true + fixed types in duration_consecutive_true --- src/pypromice/process/L1toL2.py | 2 +- src/pypromice/qc/persistence.py | 40 +++++---------------------------- 2 files changed, 6 insertions(+), 36 deletions(-) diff --git a/src/pypromice/process/L1toL2.py b/src/pypromice/process/L1toL2.py index 9b74f3ef..73ca1a7d 100644 --- a/src/pypromice/process/L1toL2.py +++ b/src/pypromice/process/L1toL2.py @@ -272,7 +272,7 @@ def smoothTilt(da: xr.DataArray, threshold=0.2): xarray.DataArray either X or Y smoothed tilt inclinometer measurements ''' - # we caluclate the moving standard deviation over a 3-day sliding window + # we calculate the moving standard deviation over a 3-day sliding window # hourly resampling is necessary to make sure the same threshold can be used # for 10 min and hourly data moving_std_gap_filled = da.to_series().resample('H').median().rolling( diff --git a/src/pypromice/qc/persistence.py b/src/pypromice/qc/persistence.py index e0a41830..20ceb60c 100644 --- a/src/pypromice/qc/persistence.py +++ b/src/pypromice/qc/persistence.py @@ -133,44 +133,14 @@ def count_consecutive_persistent_values( ) -> pd.Series: diff = data.ffill().diff().abs() # forward filling all NaNs! mask: pd.Series = diff < max_diff - # return count_consecutive_true(mask) return duration_consecutive_true(mask) -def count_consecutive_true( - series: Union[pd.Series, pd.DataFrame] -) -> Union[pd.Series, pd.DataFrame]: - """ - Convert boolean series to integer series where the values represent the number of connective true values. - - Examples - -------- - >>> count_consecutive_true(pd.Series([False, True, False, False, True, True, True, False, True])) - pd.Series([0, 1, 0, 0, 1, 2, 3, 0, 1]) - - Parameters - ---------- - series - Boolean pandas Series or DataFrame - - Returns - ------- - consecutive_true_count - Integer pandas Series or DataFrame with values representing the number of connective true values. - - """ - # assert series.dtype == bool - cumsum = series.cumsum() - is_first = series.astype("int").diff() == 1 - offset = (is_first * cumsum).replace(0, np.nan).fillna(method="ffill").fillna(0) - return ((cumsum - offset + 1) * series).astype("int") - - def duration_consecutive_true( - series: Union[pd.Series, pd.DataFrame] -) -> Union[pd.Series, pd.DataFrame]: + series: pd.Series, +) -> pd.Series: """ - Froma a boolean series, calculates the duration, in hours, of the periods with connective true values. + From a boolean series, calculates the duration, in hours, of the periods with connective true values. Examples -------- @@ -179,12 +149,12 @@ def duration_consecutive_true( Parameters ---------- - series + pd.Series Boolean pandas Series or DataFrame Returns ------- - consecutive_true_duration + pd.Series Integer pandas Series or DataFrame with values representing the number of connective true values. """ From 0ad2d31a1519a3474e161537e9c08dc1f9ee37ac Mon Sep 17 00:00:00 2001 From: Baptiste Vandecrux <35140661+BaptisteVandecrux@users.noreply.github.com> Date: Mon, 13 May 2024 13:49:56 +0200 Subject: [PATCH 22/22] switch back periods fro t, p, rh t 2 hours in persistence_qc --- src/pypromice/qc/persistence.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/pypromice/qc/persistence.py b/src/pypromice/qc/persistence.py index 20ceb60c..f59bf45c 100644 --- a/src/pypromice/qc/persistence.py +++ b/src/pypromice/qc/persistence.py @@ -16,12 +16,12 @@ # period is given in hours, 2 persistent 10 min values will be flagged if period < 0.333 DEFAULT_VARIABLE_THRESHOLDS = { - "t": {"max_diff": 0.0001, "period": 0.3}, - "p": {"max_diff": 0.0001, "period": 0.3}, + "t": {"max_diff": 0.0001, "period": 2}, + "p": {"max_diff": 0.0001, "period": 2}, 'gps_lat_lon':{"max_diff": 0.000001, "period": 6}, # gets special handling to remove simultaneously constant gps_lat and gps_lon 'gps_alt':{"max_diff": 0.0001, "period": 6}, - 't_rad':{"max_diff": 0.0001, "period": 0.3}, - "rh": {"max_diff": 0.0001, "period": 0.3}, # gets special handling to allow constant 100% + 't_rad':{"max_diff": 0.0001, "period": 2}, + "rh": {"max_diff": 0.0001, "period": 2}, # gets special handling to allow constant 100% "wspd": {"max_diff": 0.0001, "period": 6}, }