From 4d9d89fc4a099191f5e090337874c6bf40358fef Mon Sep 17 00:00:00 2001 From: Jessy Barrette <30420025+JessyBarrette@users.noreply.github.com> Date: Fri, 6 Sep 2024 15:05:19 -0400 Subject: [PATCH 1/8] add some test data --- .../pme/wiper/2024-07-28 100000Z.txt | 14 ++++++++++++++ .../pme/wiper/2024-07-29 130000Z.txt | 14 ++++++++++++++ .../pme/wiper/2024-07-30 160000Z.txt | 14 ++++++++++++++ 3 files changed, 42 insertions(+) create mode 100644 tests/parsers_test_files/pme/wiper/2024-07-28 100000Z.txt create mode 100644 tests/parsers_test_files/pme/wiper/2024-07-29 130000Z.txt create mode 100644 tests/parsers_test_files/pme/wiper/2024-07-30 160000Z.txt diff --git a/tests/parsers_test_files/pme/wiper/2024-07-28 100000Z.txt b/tests/parsers_test_files/pme/wiper/2024-07-28 100000Z.txt new file mode 100644 index 00000000..08b01e02 --- /dev/null +++ b/tests/parsers_test_files/pme/wiper/2024-07-28 100000Z.txt @@ -0,0 +1,14 @@ +5958-066383 +OS REV: 2.30 +Type: 0, Scrub: 1, Angle: 45 +Timeout: 10, Threshold: 200 +Time (sec), Bat (Volt), T (deg C), Wipes (), Cal Wipe Time (sec), Wipe Time (sec), Start I (mA), Ave I (mA), Peak I (mA), Final I (mA), Rsource (Ohm) + 1722160800, +3.43, +25.154, 26, +6.0, +5.9, +84.5, +68.7, +79.6, 225.2, +1.1 + 1722171600, +3.48, +25.396, 27, +6.0, +6.0, +73.3, +70.8, +77.3, 230.3, +1.1 + 1722182400, +3.48, +25.396, 28, +6.0, +5.9, +76.5, +70.1, +78.6, 225.8, +1.1 + 1722193200, +3.48, +25.154, 29, +6.0, +6.0, +75.8, +70.4, +78.2, 230.1, +1.1 + 1722204000, +3.48, +25.396, 30, +6.0, +5.9, +75.4, +69.4, +79.1, 200.1, +1.1 + 1722214800, +3.43, +26.123, 31, +6.0, +5.9, +85.3, +67.4, +78.9, 225.5, +1.1 + 1722225600, +3.48, +25.639, 32, +6.0, +6.0, +76.1, +69.5, +77.7, 230.4, +1.1 + 1722236400, +3.48, +25.396, 33, +6.0, +5.9, +76.1, +68.8, +78.4, 218.3, +1.1 + 1722247200, +3.48, +25.396, 34, +6.0, +5.9, +73.9, +69.5, +77.3, 203.7, +1.1 diff --git a/tests/parsers_test_files/pme/wiper/2024-07-29 130000Z.txt b/tests/parsers_test_files/pme/wiper/2024-07-29 130000Z.txt new file mode 100644 index 00000000..ab9d7403 --- /dev/null +++ b/tests/parsers_test_files/pme/wiper/2024-07-29 130000Z.txt @@ -0,0 +1,14 @@ +5958-066383 +OS REV: 2.30 +Type: 0, Scrub: 1, Angle: 45 +Timeout: 10, Threshold: 200 +Time (sec), Bat (Volt), T (deg C), Wipes (), Cal Wipe Time (sec), Wipe Time (sec), Start I (mA), Ave I (mA), Peak I (mA), Final I (mA), Rsource (Ohm) + 1722258000, +3.48, +24.912, 35, +6.0, +5.9, +76.4, +69.2, +79.5, 217.1, +1.1 + 1722268800, +3.43, +25.396, 36, +6.0, +5.9, +83.9, +67.1, +78.6, 226.7, +1.1 + 1722279600, +3.48, +24.912, 37, +6.0, +6.0, +74.8, +69.4, +77.6, 228.8, +1.1 + 1722290400, +3.48, +24.670, 38, +6.0, +5.9, +76.0, +70.3, +77.9, 204.8, +1.1 + 1722301200, +3.48, +24.670, 39, +6.0, +6.0, +75.8, +70.2, +78.9, 231.4, +1.1 + 1722312000, +3.48, +25.154, 40, +6.0, +5.9, +75.6, +68.3, +79.2, 224.8, +1.1 + 1722322800, +3.43, +25.396, 41, +5.9, +5.8, +84.3, +66.5, +74.9, 207.4, +1.0 + 1722333600, +3.48, +24.427, 42, +5.9, +6.0, +73.3, +69.4, +77.7, 230.7, +1.1 + 1722344400, +3.48, +24.427, 43, +5.9, +5.9, +78.1, +68.9, +77.3, 209.0, +1.1 diff --git a/tests/parsers_test_files/pme/wiper/2024-07-30 160000Z.txt b/tests/parsers_test_files/pme/wiper/2024-07-30 160000Z.txt new file mode 100644 index 00000000..73bec657 --- /dev/null +++ b/tests/parsers_test_files/pme/wiper/2024-07-30 160000Z.txt @@ -0,0 +1,14 @@ +5958-066383 +OS REV: 2.30 +Type: 0, Scrub: 1, Angle: 45 +Timeout: 10, Threshold: 200 +Time (sec), Bat (Volt), T (deg C), Wipes (), Cal Wipe Time (sec), Wipe Time (sec), Start I (mA), Ave I (mA), Peak I (mA), Final I (mA), Rsource (Ohm) + 1722355200, +3.48, +23.700, 44, +5.9, +6.0, +73.6, +69.9, +78.2, 231.5, +1.1 + 1722366000, +3.48, +25.396, 45, +5.9, +5.9, +76.1, +69.4, +75.9, 209.6, +1.1 + 1722376800, +3.43, +25.154, 46, +6.0, +5.9, +83.3, +67.8, +79.2, 231.2, +1.1 + 1722387600, +3.48, +24.185, 47, +6.0, +6.0, +73.7, +70.1, +77.6, 231.6, +1.1 + 1722398400, +3.48, +24.427, 48, +6.0, +5.9, +76.5, +68.8, +79.5, 212.1, +1.1 + 1722409200, +3.48, +22.974, 49, +6.0, +6.0, +75.5, +70.1, +77.7, 228.9, +1.1 + 1722420000, +3.48, +22.974, 50, +6.0, +5.9, +76.6, +69.4, +78.3, 215.8, +1.1 + 1722430800, +3.42, +23.458, 51, +6.0, +5.9, +85.2, +67.9, +79.8, 231.7, +1.1 + 1722441600, +3.47, +24.185, 52, +6.0, +6.0, +73.2, +69.5, +78.0, 230.5, +1.1 From f77a3c3f9ecf2bf24f2a8c83515cd3d1c6ed6f8e Mon Sep 17 00:00:00 2001 From: Jessy Barrette <30420025+JessyBarrette@users.noreply.github.com> Date: Wed, 18 Sep 2024 15:18:06 -0400 Subject: [PATCH 2/8] add pressure to pme parser --- ocean_data_parser/parsers/star_oddi.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/ocean_data_parser/parsers/star_oddi.py b/ocean_data_parser/parsers/star_oddi.py index 11c9234e..cfeb7d70 100644 --- a/ocean_data_parser/parsers/star_oddi.py +++ b/ocean_data_parser/parsers/star_oddi.py @@ -31,6 +31,10 @@ "long_name": "Sound Velocity", "standard_name": "speed_of_sound_in_sea_water", }, + "pressure": { + "long_name": "Pressure", + "standard_name": "sea_water_pressure", + }, } From 6c623dc3908c9c52b088aa916f16af478a00254a Mon Sep 17 00:00:00 2001 From: Jessy Barrette <30420025+JessyBarrette@users.noreply.github.com> Date: Mon, 23 Sep 2024 13:59:24 -0400 Subject: [PATCH 3/8] deprecate pme.minidot_* parsers to pme.* --- ocean_data_parser/parsers/pme.py | 23 +++++++++++++++++++---- ocean_data_parser/read.py | 2 +- 2 files changed, 20 insertions(+), 5 deletions(-) diff --git a/ocean_data_parser/parsers/pme.py b/ocean_data_parser/parsers/pme.py index 6eec784e..6383c937 100644 --- a/ocean_data_parser/parsers/pme.py +++ b/ocean_data_parser/parsers/pme.py @@ -56,8 +56,23 @@ global_attributes = {"Conventions": "CF-1.6"} - -def minidot_txt( +# Deprecated functions +def minidot_txt(*args, **kwargs): + """Rename minidot_txt to txt""" + logger.warning("minidot_txt is deprecated, use txt instead") + return txt(*args, **kwargs) + +def minidot_txts(*args, **kwargs): + """Rename minidot_txts to txts""" + logger.warning("minidot_txts is deprecated, use txts instead") + return txts(*args, **kwargs) + +def minidot_cat(*args, **kwargs): + """Rename minidot_cat to cat""" + logger.warning("minidot_cat is deprecated, use cat instead") + return cat(*args, **kwargs) + +def txt( path: str, rename_variables: bool = True, encoding: str = "utf-8", @@ -168,7 +183,7 @@ def _append_to_history(msg): return ds -def minidot_txts( +def txts( paths: Union[list, str], encoding: str = "utf-8", errors: str = "strict" ) -> xr.Dataset: """Parse PME Minidots txt files @@ -197,7 +212,7 @@ def minidot_txts( return xr.merge(datasets) -def minidot_cat( +def cat( path: str, encoding: str = "utf-8", errors: str = "strict" ) -> xr.Dataset: """cat reads PME MiniDot concatenated CAT files diff --git a/ocean_data_parser/read.py b/ocean_data_parser/read.py index 68116085..0981102b 100644 --- a/ocean_data_parser/read.py +++ b/ocean_data_parser/read.py @@ -98,7 +98,7 @@ def detect_file_format(file: str, encoding: str = "UTF-8") -> str: elif ext == "MON": parser = "van_essen_instruments.mon" elif ext == "txt" and re.match(r"\d+\-\d+\s*\nOS REV\:", header): - parser = "pme.minidot_txt" + parser = "pme.txt" elif ext == "txt" and re.match(r"Model\=.*\nFirmware\=.*\nSerial\=.*", header): parser = "rbr.rtext" elif ext == "txt" and "Front panel parameter change:" in header: From f6aa792d688159260fad6d5ff05a8739eaf21741 Mon Sep 17 00:00:00 2001 From: Jessy Barrette <30420025+JessyBarrette@users.noreply.github.com> Date: Mon, 23 Sep 2024 15:46:15 -0400 Subject: [PATCH 4/8] capture all header metadata from both wiper and minidot data --- ocean_data_parser/parsers/pme.py | 67 +++++++++++++++++++++----------- tests/test_parsers.py | 2 +- 2 files changed, 45 insertions(+), 24 deletions(-) diff --git a/ocean_data_parser/parsers/pme.py b/ocean_data_parser/parsers/pme.py index 6383c937..3e5459df 100644 --- a/ocean_data_parser/parsers/pme.py +++ b/ocean_data_parser/parsers/pme.py @@ -54,7 +54,8 @@ "Q ()": "q", } -global_attributes = {"Conventions": "CF-1.6"} +default_global_attributes = {"Conventions": "CF-1.6"} + # Deprecated functions def minidot_txt(*args, **kwargs): @@ -62,22 +63,35 @@ def minidot_txt(*args, **kwargs): logger.warning("minidot_txt is deprecated, use txt instead") return txt(*args, **kwargs) + def minidot_txts(*args, **kwargs): """Rename minidot_txts to txts""" logger.warning("minidot_txts is deprecated, use txts instead") return txts(*args, **kwargs) + def minidot_cat(*args, **kwargs): """Rename minidot_cat to cat""" logger.warning("minidot_cat is deprecated, use cat instead") return cat(*args, **kwargs) +def _rename_variable(variable:str) -> str: + if variable in VARIABLE_RENAMING_MAPPING: + return VARIABLE_RENAMING_MAPPING[variable] + elif "I (mA)" in variable: + return variable.replace("I (mA)", "current").replace(" ", "_").lower() + elif " (Volt)" in variable: + return variable.replace(" (Volt)", "_volt").replace(" ", "_").lower() + else: + return variable.split("(")[0].strip().replace(" ", "_").lower() + def txt( path: str, rename_variables: bool = True, encoding: str = "utf-8", errors: str = "strict", timezone: str = "UTC", + global_attributes: dict = None, ) -> xr.Dataset: """Parse PME MiniDot txt file @@ -86,6 +100,8 @@ def txt( rename_variables (bool, optional): _description_. Defaults to True. encoding (str, optional): File encoding. Defaults to 'utf-8'. errors (str, optional): Error handling. Defaults to 'strict'. + timezone (str, optional): Timezone to localize the time. Defaults to 'UTC'. + global_attributes (dict, optional): Global attributes to add to the dataset. Defaults to {}. Returns: xarray.Dataset @@ -102,23 +118,26 @@ def _append_to_history(msg): errors=errors, ) as f: # Read the headre - serial_number = f.readline().replace("\n", "") - logger.debug("Parse file from serial number: %s", serial_number) - metadata = re.search( - ( - r"OS REV: (?P\d+\.\d+)\s" - r"Sensor Cal: (?P\d*)" - ), - f.readline(), - ) + header = [f.readline()] + while "Time (sec)" not in header[-1]: + header += [f.readline()] + + # Parse metadata from header + metadata = {} + metadata["serial_number"] = header[0].replace("\n", "") + metadata["software_version"] = re.search(r"OS REV: (\d+\.\d+)\s", header[1])[1] + if "Sensor Cal" in header[1]: + metadata["instrument_calibration"] = re.search(r"Sensor Cal: (\d*)",header[1])[1] + if len(header) > 2: + for key, value in re.findall("(\w+)\: ([^,\n]+)", "".join(header[2:-1])): + metadata[key.lower()] = value.strip() - # If metadata is null than it's likely not a minidot file if metadata is None: warnings.warn("Failed to read: {path}", RuntimeWarning) return pd.DataFrame(), None # Parse column names - columns = [item.strip() for item in f.readline().split(",")] + columns = [item.strip() for item in header[-1].split(",")] # Read the data with pandas df = pd.read_csv( @@ -139,12 +158,11 @@ def _append_to_history(msg): # Global attributes ds.attrs = { - **global_attributes, - **metadata.groupdict(), + **default_global_attributes, + **metadata, "instrument_manufacturer": "PME", - "instrument_model": "MiniDot", - "instrument_sn": serial_number, "history": "", + **(global_attributes or {}), } # Retrieve raw saturation values from minidot @@ -170,14 +188,19 @@ def _append_to_history(msg): for var in ds.variables: if var not in VARIABLE_ATTRIBUTES: logger.warning("Unknown variable: %s", var) + if "(" in var and ")" in var: + variable, unit = var.split("(") + unit = unit.replace(")", "") + ds[var].attrs.update({"units": unit}) continue ds[var].attrs.update(VARIABLE_ATTRIBUTES[var]) if rename_variables: - ds = ds.rename_vars(VARIABLE_RENAMING_MAPPING) - ds.attrs["history"] += ( - f"\n{pd.Timestamp.now().isoformat()} Rename variables: {VARIABLE_RENAMING_MAPPING}" - ) + variable_mapping = {variable: _rename_variable(variable) for variable in ds.variables} + ds = ds.rename_vars(variable_mapping) + ds.attrs["history"] += ( + f"\n{pd.Timestamp.now().isoformat()} Rename variables: {variable_mapping}" + ) ds = standardize_dataset(ds) return ds @@ -212,9 +235,7 @@ def txts( return xr.merge(datasets) -def cat( - path: str, encoding: str = "utf-8", errors: str = "strict" -) -> xr.Dataset: +def cat(path: str, encoding: str = "utf-8", errors: str = "strict") -> xr.Dataset: """cat reads PME MiniDot concatenated CAT files Args: diff --git a/tests/test_parsers.py b/tests/test_parsers.py index 6b39a4ca..5fa61e6b 100644 --- a/tests/test_parsers.py +++ b/tests/test_parsers.py @@ -73,7 +73,7 @@ class TestPMEParsers: "path", glob("tests/parsers_test_files/pme/**/*.txt", recursive=True) ) def test_txt_parser(self, path, caplog): - ds = pme.minidot_txt(path) + ds = pme.txt(path) review_parsed_dataset(ds, path, caplog) From 54ae8ebaec74dadd3365d3dc2a9cc0e68e202c4d Mon Sep 17 00:00:00 2001 From: Jessy Barrette <30420025+JessyBarrette@users.noreply.github.com> Date: Mon, 23 Sep 2024 16:03:40 -0400 Subject: [PATCH 5/8] ruff fixes --- ocean_data_parser/parsers/pme.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/ocean_data_parser/parsers/pme.py b/ocean_data_parser/parsers/pme.py index 3e5459df..4ed037c8 100644 --- a/ocean_data_parser/parsers/pme.py +++ b/ocean_data_parser/parsers/pme.py @@ -75,7 +75,8 @@ def minidot_cat(*args, **kwargs): logger.warning("minidot_cat is deprecated, use cat instead") return cat(*args, **kwargs) -def _rename_variable(variable:str) -> str: + +def _rename_variable(variable: str) -> str: if variable in VARIABLE_RENAMING_MAPPING: return VARIABLE_RENAMING_MAPPING[variable] elif "I (mA)" in variable: @@ -85,6 +86,7 @@ def _rename_variable(variable:str) -> str: else: return variable.split("(")[0].strip().replace(" ", "_").lower() + def txt( path: str, rename_variables: bool = True, @@ -127,7 +129,9 @@ def _append_to_history(msg): metadata["serial_number"] = header[0].replace("\n", "") metadata["software_version"] = re.search(r"OS REV: (\d+\.\d+)\s", header[1])[1] if "Sensor Cal" in header[1]: - metadata["instrument_calibration"] = re.search(r"Sensor Cal: (\d*)",header[1])[1] + metadata["instrument_calibration"] = re.search( + r"Sensor Cal: (\d*)", header[1] + )[1] if len(header) > 2: for key, value in re.findall("(\w+)\: ([^,\n]+)", "".join(header[2:-1])): metadata[key.lower()] = value.strip() @@ -196,7 +200,9 @@ def _append_to_history(msg): ds[var].attrs.update(VARIABLE_ATTRIBUTES[var]) if rename_variables: - variable_mapping = {variable: _rename_variable(variable) for variable in ds.variables} + variable_mapping = { + variable: _rename_variable(variable) for variable in ds.variables + } ds = ds.rename_vars(variable_mapping) ds.attrs["history"] += ( f"\n{pd.Timestamp.now().isoformat()} Rename variables: {variable_mapping}" From cdee82b65b0e0896b42b8ef2e099b8e6a6467a32 Mon Sep 17 00:00:00 2001 From: Jessy Barrette <30420025+JessyBarrette@users.noreply.github.com> Date: Mon, 23 Sep 2024 16:14:33 -0400 Subject: [PATCH 6/8] fix new warning with star_oddi parser dayfirst missing input --- ocean_data_parser/parsers/star_oddi.py | 1 + 1 file changed, 1 insertion(+) diff --git a/ocean_data_parser/parsers/star_oddi.py b/ocean_data_parser/parsers/star_oddi.py index cfeb7d70..67cd27b1 100644 --- a/ocean_data_parser/parsers/star_oddi.py +++ b/ocean_data_parser/parsers/star_oddi.py @@ -112,6 +112,7 @@ def _standardize_attributes(item): names=variables.keys(), parse_dates=["time"], date_format=date_format, + dayfirst=True, ) if "time" in df: df = df.set_index(["time"]) From 5bf8008da037e0726da9c20b4c1b3490855b88af Mon Sep 17 00:00:00 2001 From: Jessy Barrette <30420025+JessyBarrette@users.noreply.github.com> Date: Mon, 23 Sep 2024 16:40:10 -0400 Subject: [PATCH 7/8] specify dayfirst for star_oddi sensors format --- ocean_data_parser/parsers/star_oddi.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/ocean_data_parser/parsers/star_oddi.py b/ocean_data_parser/parsers/star_oddi.py index 67cd27b1..11321aaf 100644 --- a/ocean_data_parser/parsers/star_oddi.py +++ b/ocean_data_parser/parsers/star_oddi.py @@ -144,9 +144,15 @@ def _standardize_attributes(item): ) ), "n_records": n_records, - "start_time": pd.to_datetime(start_time).isoformat(), - "end_time": pd.to_datetime(end_time).isoformat(), - "date_created": pd.to_datetime(metadata.pop("created")).isoformat(), + "start_time": pd.to_datetime( + start_time, format=date_format, dayfirst=True + ).isoformat(), + "end_time": pd.to_datetime( + end_time, format=date_format, dayfirst=True + ).isoformat(), + "date_created": pd.to_datetime( + metadata.pop("created"), format=date_format, dayfirst=True + ).isoformat(), "original_file_header": original_header, } # Add variable attributes From 3450ab2b50d34955b029454f770d1ef0f1f6d07a Mon Sep 17 00:00:00 2001 From: Jessy Barrette <30420025+JessyBarrette@users.noreply.github.com> Date: Mon, 23 Sep 2024 16:45:50 -0400 Subject: [PATCH 8/8] add to change log --- CHANGELOG.md | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 19d63a84..5eead0b5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,11 +5,23 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## `development` + +### Added + +- Add compatibility with PME wipers txt format. + +### Fixed + +- Fixed warning regarding star_oddi dayfirst=True missing input +- Rename pme parsers by removing `minidot_`. New functions are called `pme.txt`, +`pme.txts`, `pme.cat`. Maintain still a placeholder for those functions. + ## `0.6.1` - 2024-08-30 ### Added -- Add `onset.xlsx` parser +- Add `onset.xlsx` parser. - Make `onset.xlsx` and `onset.csv` raise a `pytz.exception.AmbiguousTimeError` when jumps associated with daylight saving time changes are detected. - Add `star_oddi.DAT` ctd test file and fix timestamp format handling.