diff --git a/.github/workflows/CI_pylinter.yml b/.github/workflows/CI_pylinter.yml index 9c0b2a5..9da6ac9 100644 --- a/.github/workflows/CI_pylinter.yml +++ b/.github/workflows/CI_pylinter.yml @@ -32,9 +32,10 @@ jobs: python-version: ${{ matrix.python-version }} # Install any dependency we require - - name: Install dependancies + - name: Install dependencies run: | python -m pip install --upgrade pip + python -m pip install setuptools shell: bash # Here, let's install our module to make sure all the dependencies specified in setup.py are diff --git a/CHANGELOG b/CHANGELOG index 1c1a1b7..934390b 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -3,6 +3,8 @@ All notable changes to ampycloud will be documented in this file. The format is inspired from [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [v1.1.0] + - [regDaniel, 2024-04-09] Add flag for clouds above (MSA + MSA_HIT_BUFFER) and allow for NSC in METAR message ## [v1.0.0] ### Added - [regDaniel, 2023-11-09] Add minimum separation condition for grouping step diff --git a/docs/source/scope.rst b/docs/source/scope.rst index fe23e06..5825706 100644 --- a/docs/source/scope.rst +++ b/docs/source/scope.rst @@ -9,6 +9,16 @@ This has the following implications for ampycloud: passing them to ampycloud, e.g. by removing them or by converting them to cloud base heights. + * Note that regulation says that "if there are no clouds of operational significance + and no restriction on vertical visibility and the abbreviation 'CAVOK' is not + appropriate, the abbreviation 'NSC' should be used" (AMC1 MET.TR.205(e)(1)). + ampycloud cannot decide whether a 'CAVOK' is appropriate, and will therefore + always return 'NSC' if no clouds of operational significance are found. If no clouds + are detected at all by the ceilometers, ampycloud will return 'NCD'. Importantly, + users should bear in mind that ampycloud cannot handle CB and TCU cases, + such that any 'NCD'/'NSC' codes issued may need to be overwritten by the user in + certain situations. + * ampycloud can evidently be used for R&D work, but the code itself should not be seen as an R&D platform. diff --git a/src/ampycloud/data.py b/src/ampycloud/data.py index 6633dfa..56e7126 100644 --- a/src/ampycloud/data.py +++ b/src/ampycloud/data.py @@ -1,5 +1,5 @@ """ -Copyright (c) 2021-2023 MeteoSwiss, contributors listed in AUTHORS. +Copyright (c) 2021-2024 MeteoSwiss, contributors listed in AUTHORS. Distributed under the terms of the 3-Clause BSD License. @@ -93,16 +93,29 @@ def _cleanup_pdf(self, data: pd.DataFrame) -> pd.DataFrame: # Begin with a thorough inspection of the dataset data = utils.check_data_consistency(data, req_cols=self.DATA_COLS) - # Then also drop any hits that is too high + # By default we set this flag to false and overwrite if enough hits are present + self._clouds_above_msa_buffer = False + + # Drop any hits that are too high and check if they exceed the threshold for 1 OKTA + # if yes, set the flag clouds_above_msa_buffer to True if self.msa is not None: hit_alt_lim = self.msa + self.msa_hit_buffer logger.info('Cropping hits above MSA+buffer: %s ft', str(hit_alt_lim)) - # Type 1 or less hits above the cut threshold get turned to NaNs, to signal a - # non-detection below the MSA. Also change the hit type to 0 accordingly ! - data.loc[data[(data.alt > hit_alt_lim) & (data.type <= 1)].index, 'type'] = 0 - data.loc[data[(data.alt > hit_alt_lim) & (data.type <= 1)].index, 'alt'] = np.nan + # First layer and vervis hits above the cut threshold get turned to NaNs, to signal a + # non-detection below the MSA. Also change the hit type to 0 accordingly in order + # to create a "no hit detected" in the range of interest (i.e. below MSA). + above_msa_t1_or_less = data[(data.alt > hit_alt_lim) & (data.type <= 1)].index + data.loc[above_msa_t1_or_less, 'type'] = 0 + data.loc[above_msa_t1_or_less, 'alt'] = np.nan # Type 2 or more hits get cropped (there should be only 1 non-detection per time-stamp). - data = data.drop(data[(data.alt > hit_alt_lim) & (data.type > 1)].index) + above_msa_t2_or_more = data[(data.alt > hit_alt_lim) & (data.type > 1)].index + data = data.drop(above_msa_t2_or_more) + if len(above_msa_t1_or_less) + len(above_msa_t2_or_more) > self._prms['MAX_HITS_OKTA0']: + logger.info( + "Hits above MSA + MSA_HIT_BUFFER exceeded threshold MAX_HITS_OKTA0. Will add " + "flag 'high_clouds_detected' to indicate the presence of high clouds." + ) + self._clouds_above_msa_buffer = True return data @@ -186,8 +199,10 @@ def __init__(self, data: pd.DataFrame, prms: Optional[dict] = None, self._layers = None @log_func_call(logger) - def data_rescaled(self, dt_mode: Optional[str] = None, alt_mode: Optional[str] = None, - dt_kwargs: Optional[dict] = None, alt_kwargs: Optional[dict] = None) -> pd.DataFrame: + def data_rescaled( + self, dt_mode: Optional[str] = None, alt_mode: Optional[str] = None, + dt_kwargs: Optional[dict] = None, alt_kwargs: Optional[dict] = None + ) -> pd.DataFrame: """ Returns a copy of the data, rescaled according to the provided parameters. Args: @@ -296,13 +311,12 @@ def _get_min_sep_for_altitude(self, altitude: float) -> float: MIN_SEP_VALS """ - if len(self.prms['MIN_SEP_LIMS']) != \ - len(self.prms['MIN_SEP_VALS']) - 1: - raise AmpycloudError( - '"MIN_SEP_LIMS" must have one less item than "MIN_SEP_VALS".' - 'Got MIN_SEP_LIMS %i and MIN_SEP_VALS %i', - (self.prms['MIN_SEP_LIMS'], self.prms['MIN_SEP_VALS']) - ) + if len(self.prms['MIN_SEP_LIMS']) != len(self.prms['MIN_SEP_VALS']) - 1: + raise AmpycloudError( + '"MIN_SEP_LIMS" must have one less item than "MIN_SEP_VALS".' + f'Got MIN_SEP_LIMS {self.prms["MIN_SEP_LIMS"]} ' + f'and MIN_SEP_VALS {self.prms["MIN_SEP_VALS"]}.', + ) min_sep_val_id = np.searchsorted(self.prms['MIN_SEP_LIMS'], altitude) @@ -361,12 +375,11 @@ def _setup_sligrolay_pdf(self, which: str = 'slices') -> tuple[pd.DataFrame, npt ] # We want to raise early if 'which' is unknown. - if not which in ['slices', 'groups', 'layers']: + if which not in ['slices', 'groups', 'layers']: raise AmpycloudError( - 'Trying to initialize a data frame for %s ' + f'Trying to initialize a data frame for {which}, ' 'which is unknown. Keyword arg "which" must be one of' '"slices", "groups" or "layers"' - %which ) # If I am looking at the slices, also keep track of whether they are isolated, or not. @@ -519,9 +532,9 @@ def _calculate_sligrolay_base_height( # Which hits are in this sli/gro/lay ? in_sligrolay = self.data[which[:-1]+'_id'] == cid # Compute the base altitude - pdf.iloc[ind, pdf.columns.get_loc('alt_base')] = self._calculate_base_height_for_selection( - in_sligrolay, - ) + pdf.iloc[ + ind, pdf.columns.get_loc('alt_base') + ] = self._calculate_base_height_for_selection(in_sligrolay,) return pdf @log_func_call(logger) @@ -992,6 +1005,29 @@ def layers(self) -> pd.DataFrame: identified by the layering algorithm. """ return self._layers + @property + def clouds_above_msa_buffer(self) -> bool: + """ Returns whether a number of hits exceeding the threshold for 1 okta is detected above + MSA + MSA_HIT_BUFFER. + + Returns: + bool: whether high clouds were detected. + + """ + return self._clouds_above_msa_buffer + + def _ncd_or_nsc(self) -> str: + """ Return the METAR code for No Cloud Detected / No Significant Cloud. + Decision based on the attribute self._clouds_above_msa_buffer. + + Returns: + str: 'NCD' or 'NSC' + + """ + if self._clouds_above_msa_buffer: + return 'NSC' + return 'NCD' + def metar_msg(self, which: str = 'layers') -> str: """ Construct a METAR-like message for the identified cloud slices, groups, or layers. @@ -1025,7 +1061,7 @@ def metar_msg(self, which: str = 'layers') -> str: # Deal with the 0 layer situation if getattr(self, f'n_{which}') == 0: - return 'NCD' + return self._ncd_or_nsc() # Deal with the situation where layers have been found ... msg = sligrolay['code'] @@ -1036,6 +1072,10 @@ def metar_msg(self, which: str = 'layers') -> str: # Here, deal with the situations when all clouds are above the MSA if len(msg) == 0: - return 'NCD' + # first check if any significant clouds are in the interval [MSA, MSA+MSA_HIT_BUFFER] + sligrolay_in_buffer = sligrolay['significant'] * (sligrolay['alt_base'] >= msa_val) + if sligrolay_in_buffer.any(): + return 'NSC' # and return a NSC as it implies that the cloud is above the MSA + return self._ncd_or_nsc() # else, check for CBH above MSA + MSA_HIT_BUFFER return msg diff --git a/src/ampycloud/scaler.py b/src/ampycloud/scaler.py index ee115d5..e752319 100644 --- a/src/ampycloud/scaler.py +++ b/src/ampycloud/scaler.py @@ -221,10 +221,10 @@ def convert_kwargs(vals: np.ndarray, fct: str, **kwargs: dict) -> dict: if fct == 'shift-and-scale': # In this case, the only data I may need to derive from the data is the shift. - if 'shift' in kwargs.keys(): + if 'shift' in kwargs: # Already set - do nothing return kwargs - if 'mode' in kwargs.keys(): + if 'mode' in kwargs: if kwargs['mode'] == 'do': kwargs['shift'] = np.nanmax(vals) elif kwargs['mode'] == 'undo': @@ -240,12 +240,12 @@ def convert_kwargs(vals: np.ndarray, fct: str, **kwargs: dict) -> dict: if fct == 'minmax-scale': # In this case, the challenge lies with identifying min_val and max_val, knowing that the # user may specify a min_range value. - if 'min_val' in kwargs.keys() and 'max_val' in kwargs.keys(): + if 'min_val' in kwargs and 'max_val' in kwargs: # Already specified ... do nothing return kwargs - if 'mode' in kwargs.keys(): + if 'mode' in kwargs: if kwargs['mode'] == 'do': - if 'min_range' in kwargs.keys(): + if 'min_range' in kwargs: min_range = kwargs['min_range'] kwargs.pop('min_range', None) else: @@ -260,7 +260,7 @@ def convert_kwargs(vals: np.ndarray, fct: str, **kwargs: dict) -> dict: raise AmpycloudError(f"Mode unknown: {kwargs['mode']}") # 'mode' not set -> will default to 'do' - if 'min_range' in kwargs.keys(): + if 'min_range' in kwargs: min_range = kwargs['min_range'] kwargs.pop('min_range', None) else: diff --git a/test/ampycloud/test_core.py b/test/ampycloud/test_core.py index 75dc17c..1908d61 100644 --- a/test/ampycloud/test_core.py +++ b/test/ampycloud/test_core.py @@ -1,5 +1,5 @@ """ -Copyright (c) 2021-2022 MeteoSwiss, contributors listed in AUTHORS. +Copyright (c) 2021-2024 MeteoSwiss, contributors listed in AUTHORS. Distributed under the terms of the 3-Clause BSD License. @@ -82,7 +82,7 @@ def test_run(): # Test the ability to specific parameters locally only out = run(mock_data, prms={'MSA': 0}) - assert out.metar_msg() == 'NCD' + assert out.metar_msg() == 'NSC' assert dynamic.AMPYCLOUD_PRMS['MSA'] is None # Test that warnings are being raised if a bad parameter is being given diff --git a/test/ampycloud/test_data.py b/test/ampycloud/test_data.py index 75be198..0cbe18f 100644 --- a/test/ampycloud/test_data.py +++ b/test/ampycloud/test_data.py @@ -1,5 +1,5 @@ """ -Copyright (c) 2021-2022 MeteoSwiss, contributors listed in AUTHORS. +Copyright (c) 2021-2024 MeteoSwiss, contributors listed in AUTHORS. Distributed under the terms of the 3-Clause BSD License. @@ -80,6 +80,33 @@ def test_ceilochunk_init(): reset_prms() +@mark.parametrize('alt,expected_flag', [ + param(1000, False, id='low clouds'), + param(15000, True, id='high clouds'), +]) +def test_clouds_above_msa_buffer_flag(alt: int, expected_flag: bool): + """ Test the high clouds flagging routine. """ + + dynamic.AMPYCLOUD_PRMS['MAX_HITS_OKTA0'] = 3 + dynamic.AMPYCLOUD_PRMS['MSA'] = 10000 + dynamic.AMPYCLOUD_PRMS['MSA_HIT_BUFFER'] = 1000 + + n_ceilos = 4 + lookback_time = 1200 + rate = 30 + # Create some fake data to get started + # 1 very flat layer with no gaps + mock_data = mocker.mock_layers( + n_ceilos, lookback_time, rate, [ + {'alt': alt, 'alt_std': 10, 'sky_cov_frac': 0.1, 'period': 100, 'amplitude': 0} + ] + ) + chunk = CeiloChunk(mock_data) + assert chunk.clouds_above_msa_buffer == expected_flag + + reset_prms() + + def test_ceilochunk_basic(): """ Test the basic methods of the CeiloChunk class. """ @@ -228,6 +255,41 @@ def test_ceilochunk_nocld(): assert chunk.metar_msg() == 'NCD' +@mark.parametrize('alt', [ + param(10500, id='in buffer'), + param(15000, id='above buffer'), +]) +def test_ceilochunk_highcld(alt): + """ Test the methods of CeiloChunks when high clouds are seen in the interval. """ + + dynamic.AMPYCLOUD_PRMS['MAX_HITS_OKTA0'] = 3 + dynamic.AMPYCLOUD_PRMS['MSA'] = 10000 + dynamic.AMPYCLOUD_PRMS['MSA_HIT_BUFFER'] = 1000 + + n_ceilos = 4 + lookback_time = 1200 + rate = 30 + # Create some fake data to get started + # 1 very flat layer with no gaps + mock_data = mocker.mock_layers( + n_ceilos, lookback_time, rate, + [{'alt': alt, 'alt_std': 10, 'sky_cov_frac': 0.1, 'period': 100, 'amplitude': 0}] + ) + + # Instantiate a CeiloChunk entity ... + chunk = CeiloChunk(mock_data) + + # Do the dance ... + chunk.find_slices() + chunk.find_groups() + chunk.find_layers() + + # Assert the final METAR code is correct + assert chunk.metar_msg() == 'NSC' + + reset_prms() + + def test_ceilochunk_2lay(): """ Test the methods of CeiloChunks when 2 layers are seen in the interval. """