make pme wiper compatible with pme parser (#113)

* add some pme wiper test data * add pressure to pme parser * deprecate pme.minidot_* parsers to pme.* * capture all header metadata from both wiper and minidot data * fix new warning with star_oddi parser dayfirst missing input * specify dayfirst for star_oddi sensors format
cioos-siooc · Sep 23, 2024 · 2e2b98f · 2e2b98f
1 parent f217e08
commit 2e2b98f
Show file tree

Hide file tree

Showing 8 changed files with 138 additions and 31 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -5,11 +5,23 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## `development`
+
+### Added
+
+- Add compatibility with PME wipers txt format.
+
+### Fixed
+
+- Fixed warning regarding star_oddi dayfirst=True missing input
+- Rename pme parsers by removing `minidot_`. New functions are called `pme.txt`,
+`pme.txts`, `pme.cat`. Maintain still a placeholder for those functions.
+
 ## `0.6.1` - 2024-08-30
 
 ### Added
 
-- Add `onset.xlsx` parser
+- Add `onset.xlsx` parser.
 - Make `onset.xlsx` and `onset.csv` raise a `pytz.exception.AmbiguousTimeError`
    when jumps associated with daylight saving time changes are detected.
 - Add `star_oddi.DAT` ctd test file and fix timestamp format handling.

diff --git a/ocean_data_parser/parsers/pme.py b/ocean_data_parser/parsers/pme.py
@@ -54,15 +54,46 @@
     "Q ()": "q",
 }
 
-global_attributes = {"Conventions": "CF-1.6"}
+default_global_attributes = {"Conventions": "CF-1.6"}
 
 
-def minidot_txt(
+# Deprecated functions
+def minidot_txt(*args, **kwargs):
+    """Rename minidot_txt to txt"""
+    logger.warning("minidot_txt is deprecated, use txt instead")
+    return txt(*args, **kwargs)
+
+
+def minidot_txts(*args, **kwargs):
+    """Rename minidot_txts to txts"""
+    logger.warning("minidot_txts is deprecated, use txts instead")
+    return txts(*args, **kwargs)
+
+
+def minidot_cat(*args, **kwargs):
+    """Rename minidot_cat to cat"""
+    logger.warning("minidot_cat is deprecated, use cat instead")
+    return cat(*args, **kwargs)
+
+
+def _rename_variable(variable: str) -> str:
+    if variable in VARIABLE_RENAMING_MAPPING:
+        return VARIABLE_RENAMING_MAPPING[variable]
+    elif "I (mA)" in variable:
+        return variable.replace("I (mA)", "current").replace(" ", "_").lower()
+    elif " (Volt)" in variable:
+        return variable.replace(" (Volt)", "_volt").replace(" ", "_").lower()
+    else:
+        return variable.split("(")[0].strip().replace(" ", "_").lower()
+
+
+def txt(
     path: str,
     rename_variables: bool = True,
     encoding: str = "utf-8",
     errors: str = "strict",
     timezone: str = "UTC",
+    global_attributes: dict = None,
 ) -> xr.Dataset:
     """Parse PME MiniDot txt file
 
@@ -71,6 +102,8 @@ def minidot_txt(
         rename_variables (bool, optional): _description_. Defaults to True.
         encoding (str, optional): File encoding. Defaults to 'utf-8'.
         errors (str, optional): Error handling. Defaults to 'strict'.
+        timezone (str, optional): Timezone to localize the time. Defaults to 'UTC'.
+        global_attributes (dict, optional): Global attributes to add to the dataset. Defaults to {}.
 
     Returns:
         xarray.Dataset
@@ -87,23 +120,28 @@ def _append_to_history(msg):
         errors=errors,
     ) as f:
         # Read the headre
-        serial_number = f.readline().replace("\n", "")
-        logger.debug("Parse file from serial number: %s", serial_number)
-        metadata = re.search(
-            (
-                r"OS REV: (?P<software_version>\d+\.\d+)\s"
-                r"Sensor Cal: (?P<instrument_calibration>\d*)"
-            ),
-            f.readline(),
-        )
+        header = [f.readline()]
+        while "Time (sec)" not in header[-1]:
+            header += [f.readline()]
+
+        # Parse metadata from header
+        metadata = {}
+        metadata["serial_number"] = header[0].replace("\n", "")
+        metadata["software_version"] = re.search(r"OS REV: (\d+\.\d+)\s", header[1])[1]
+        if "Sensor Cal" in header[1]:
+            metadata["instrument_calibration"] = re.search(
+                r"Sensor Cal: (\d*)", header[1]
+            )[1]
+        if len(header) > 2:
+            for key, value in re.findall("(\w+)\: ([^,\n]+)", "".join(header[2:-1])):
+                metadata[key.lower()] = value.strip()
 
-        # If metadata is null than it's likely not a minidot file
         if metadata is None:
             warnings.warn("Failed to read: {path}", RuntimeWarning)
             return pd.DataFrame(), None
 
         # Parse column names
-        columns = [item.strip() for item in f.readline().split(",")]
+        columns = [item.strip() for item in header[-1].split(",")]
 
         # Read the data with pandas
         df = pd.read_csv(
@@ -124,12 +162,11 @@ def _append_to_history(msg):
 
     # Global attributes
     ds.attrs = {
-        **global_attributes,
-        **metadata.groupdict(),
+        **default_global_attributes,
+        **metadata,
         "instrument_manufacturer": "PME",
-        "instrument_model": "MiniDot",
-        "instrument_sn": serial_number,
         "history": "",
+        **(global_attributes or {}),
     }
 
     # Retrieve raw saturation values from minidot
@@ -155,20 +192,27 @@ def _append_to_history(msg):
     for var in ds.variables:
         if var not in VARIABLE_ATTRIBUTES:
             logger.warning("Unknown variable: %s", var)
+            if "(" in var and ")" in var:
+                variable, unit = var.split("(")
+                unit = unit.replace(")", "")
+                ds[var].attrs.update({"units": unit})
             continue
         ds[var].attrs.update(VARIABLE_ATTRIBUTES[var])
 
     if rename_variables:
-        ds = ds.rename_vars(VARIABLE_RENAMING_MAPPING)
-    ds.attrs["history"] += (
-        f"\n{pd.Timestamp.now().isoformat()} Rename variables: {VARIABLE_RENAMING_MAPPING}"
-    )
+        variable_mapping = {
+            variable: _rename_variable(variable) for variable in ds.variables
+        }
+        ds = ds.rename_vars(variable_mapping)
+        ds.attrs["history"] += (
+            f"\n{pd.Timestamp.now().isoformat()} Rename variables: {variable_mapping}"
+        )
 
     ds = standardize_dataset(ds)
     return ds
 
 
-def minidot_txts(
+def txts(
     paths: Union[list, str], encoding: str = "utf-8", errors: str = "strict"
 ) -> xr.Dataset:
     """Parse PME Minidots txt files
@@ -197,9 +241,7 @@ def minidot_txts(
     return xr.merge(datasets)
 
 
-def minidot_cat(
-    path: str, encoding: str = "utf-8", errors: str = "strict"
-) -> xr.Dataset:
+def cat(path: str, encoding: str = "utf-8", errors: str = "strict") -> xr.Dataset:
     """cat reads PME MiniDot concatenated CAT files
 
     Args:

diff --git a/ocean_data_parser/parsers/star_oddi.py b/ocean_data_parser/parsers/star_oddi.py
@@ -31,6 +31,10 @@
         "long_name": "Sound Velocity",
         "standard_name": "speed_of_sound_in_sea_water",
     },
+    "pressure": {
+        "long_name": "Pressure",
+        "standard_name": "sea_water_pressure",
+    },
 }
 
 
@@ -108,6 +112,7 @@ def _standardize_attributes(item):
             names=variables.keys(),
             parse_dates=["time"],
             date_format=date_format,
+            dayfirst=True,
         )
         if "time" in df:
             df = df.set_index(["time"])
@@ -139,9 +144,15 @@ def _standardize_attributes(item):
                 )
             ),
             "n_records": n_records,
-            "start_time": pd.to_datetime(start_time).isoformat(),
-            "end_time": pd.to_datetime(end_time).isoformat(),
-            "date_created": pd.to_datetime(metadata.pop("created")).isoformat(),
+            "start_time": pd.to_datetime(
+                start_time, format=date_format, dayfirst=True
+            ).isoformat(),
+            "end_time": pd.to_datetime(
+                end_time, format=date_format, dayfirst=True
+            ).isoformat(),
+            "date_created": pd.to_datetime(
+                metadata.pop("created"), format=date_format, dayfirst=True
+            ).isoformat(),
             "original_file_header": original_header,
         }
         # Add variable attributes

diff --git a/ocean_data_parser/read.py b/ocean_data_parser/read.py
@@ -98,7 +98,7 @@ def detect_file_format(file: str, encoding: str = "UTF-8") -> str:
     elif ext == "MON":
         parser = "van_essen_instruments.mon"
     elif ext == "txt" and re.match(r"\d+\-\d+\s*\nOS REV\:", header):
-        parser = "pme.minidot_txt"
+        parser = "pme.txt"
     elif ext == "txt" and re.match(r"Model\=.*\nFirmware\=.*\nSerial\=.*", header):
         parser = "rbr.rtext"
     elif ext == "txt" and "Front panel parameter change:" in header:

diff --git a/tests/parsers_test_files/pme/wiper/2024-07-28 100000Z.txt b/tests/parsers_test_files/pme/wiper/2024-07-28 100000Z.txt
@@ -0,0 +1,14 @@
+5958-066383
+OS REV: 2.30
+Type:   0, Scrub:   1, Angle:  45
+Timeout:  10, Threshold: 200
+Time (sec),  Bat (Volt),  T (deg C), Wipes (), Cal Wipe Time (sec), Wipe Time (sec), Start I (mA), Ave I (mA), Peak I (mA), Final I (mA), Rsource (Ohm)
+ 1722160800, +3.43, +25.154,     26,  +6.0,  +5.9,  +84.5,  +68.7,  +79.6,  225.2,  +1.1
+ 1722171600, +3.48, +25.396,     27,  +6.0,  +6.0,  +73.3,  +70.8,  +77.3,  230.3,  +1.1
+ 1722182400, +3.48, +25.396,     28,  +6.0,  +5.9,  +76.5,  +70.1,  +78.6,  225.8,  +1.1
+ 1722193200, +3.48, +25.154,     29,  +6.0,  +6.0,  +75.8,  +70.4,  +78.2,  230.1,  +1.1
+ 1722204000, +3.48, +25.396,     30,  +6.0,  +5.9,  +75.4,  +69.4,  +79.1,  200.1,  +1.1
+ 1722214800, +3.43, +26.123,     31,  +6.0,  +5.9,  +85.3,  +67.4,  +78.9,  225.5,  +1.1
+ 1722225600, +3.48, +25.639,     32,  +6.0,  +6.0,  +76.1,  +69.5,  +77.7,  230.4,  +1.1
+ 1722236400, +3.48, +25.396,     33,  +6.0,  +5.9,  +76.1,  +68.8,  +78.4,  218.3,  +1.1
+ 1722247200, +3.48, +25.396,     34,  +6.0,  +5.9,  +73.9,  +69.5,  +77.3,  203.7,  +1.1
diff --git a/tests/parsers_test_files/pme/wiper/2024-07-29 130000Z.txt b/tests/parsers_test_files/pme/wiper/2024-07-29 130000Z.txt
@@ -0,0 +1,14 @@
+5958-066383
+OS REV: 2.30
+Type:   0, Scrub:   1, Angle:  45
+Timeout:  10, Threshold: 200
+Time (sec),  Bat (Volt),  T (deg C), Wipes (), Cal Wipe Time (sec), Wipe Time (sec), Start I (mA), Ave I (mA), Peak I (mA), Final I (mA), Rsource (Ohm)
+ 1722258000, +3.48, +24.912,     35,  +6.0,  +5.9,  +76.4,  +69.2,  +79.5,  217.1,  +1.1
+ 1722268800, +3.43, +25.396,     36,  +6.0,  +5.9,  +83.9,  +67.1,  +78.6,  226.7,  +1.1
+ 1722279600, +3.48, +24.912,     37,  +6.0,  +6.0,  +74.8,  +69.4,  +77.6,  228.8,  +1.1
+ 1722290400, +3.48, +24.670,     38,  +6.0,  +5.9,  +76.0,  +70.3,  +77.9,  204.8,  +1.1
+ 1722301200, +3.48, +24.670,     39,  +6.0,  +6.0,  +75.8,  +70.2,  +78.9,  231.4,  +1.1
+ 1722312000, +3.48, +25.154,     40,  +6.0,  +5.9,  +75.6,  +68.3,  +79.2,  224.8,  +1.1
+ 1722322800, +3.43, +25.396,     41,  +5.9,  +5.8,  +84.3,  +66.5,  +74.9,  207.4,  +1.0
+ 1722333600, +3.48, +24.427,     42,  +5.9,  +6.0,  +73.3,  +69.4,  +77.7,  230.7,  +1.1
+ 1722344400, +3.48, +24.427,     43,  +5.9,  +5.9,  +78.1,  +68.9,  +77.3,  209.0,  +1.1
diff --git a/tests/parsers_test_files/pme/wiper/2024-07-30 160000Z.txt b/tests/parsers_test_files/pme/wiper/2024-07-30 160000Z.txt
@@ -0,0 +1,14 @@
+5958-066383
+OS REV: 2.30
+Type:   0, Scrub:   1, Angle:  45
+Timeout:  10, Threshold: 200
+Time (sec),  Bat (Volt),  T (deg C), Wipes (), Cal Wipe Time (sec), Wipe Time (sec), Start I (mA), Ave I (mA), Peak I (mA), Final I (mA), Rsource (Ohm)
+ 1722355200, +3.48, +23.700,     44,  +5.9,  +6.0,  +73.6,  +69.9,  +78.2,  231.5,  +1.1
+ 1722366000, +3.48, +25.396,     45,  +5.9,  +5.9,  +76.1,  +69.4,  +75.9,  209.6,  +1.1
+ 1722376800, +3.43, +25.154,     46,  +6.0,  +5.9,  +83.3,  +67.8,  +79.2,  231.2,  +1.1
+ 1722387600, +3.48, +24.185,     47,  +6.0,  +6.0,  +73.7,  +70.1,  +77.6,  231.6,  +1.1
+ 1722398400, +3.48, +24.427,     48,  +6.0,  +5.9,  +76.5,  +68.8,  +79.5,  212.1,  +1.1
+ 1722409200, +3.48, +22.974,     49,  +6.0,  +6.0,  +75.5,  +70.1,  +77.7,  228.9,  +1.1
+ 1722420000, +3.48, +22.974,     50,  +6.0,  +5.9,  +76.6,  +69.4,  +78.3,  215.8,  +1.1
+ 1722430800, +3.42, +23.458,     51,  +6.0,  +5.9,  +85.2,  +67.9,  +79.8,  231.7,  +1.1
+ 1722441600, +3.47, +24.185,     52,  +6.0,  +6.0,  +73.2,  +69.5,  +78.0,  230.5,  +1.1
diff --git a/tests/test_parsers.py b/tests/test_parsers.py
@@ -73,7 +73,7 @@ class TestPMEParsers:
         "path", glob("tests/parsers_test_files/pme/**/*.txt", recursive=True)
     )
     def test_txt_parser(self, path, caplog):
-        ds = pme.minidot_txt(path)
+        ds = pme.txt(path)
         review_parsed_dataset(ds, path, caplog)