Skip to content

Commit

Permalink
make pme wiper compatible with pme parser (#113)
Browse files Browse the repository at this point in the history
* add some pme wiper test data
* add pressure to pme parser
* deprecate pme.minidot_* parsers to pme.*
* capture all header metadata from both wiper and minidot data
* fix new warning with star_oddi parser dayfirst missing input
* specify dayfirst for star_oddi sensors format
  • Loading branch information
JessyBarrette authored Sep 23, 2024
1 parent f217e08 commit 2e2b98f
Show file tree
Hide file tree
Showing 8 changed files with 138 additions and 31 deletions.
14 changes: 13 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,23 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## `development`

### Added

- Add compatibility with PME wipers txt format.

### Fixed

- Fixed warning regarding star_oddi dayfirst=True missing input
- Rename pme parsers by removing `minidot_`. New functions are called `pme.txt`,
`pme.txts`, `pme.cat`. Maintain still a placeholder for those functions.

## `0.6.1` - 2024-08-30

### Added

- Add `onset.xlsx` parser
- Add `onset.xlsx` parser.
- Make `onset.xlsx` and `onset.csv` raise a `pytz.exception.AmbiguousTimeError`
when jumps associated with daylight saving time changes are detected.
- Add `star_oddi.DAT` ctd test file and fix timestamp format handling.
Expand Down
92 changes: 67 additions & 25 deletions ocean_data_parser/parsers/pme.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,15 +54,46 @@
"Q ()": "q",
}

global_attributes = {"Conventions": "CF-1.6"}
default_global_attributes = {"Conventions": "CF-1.6"}


def minidot_txt(
# Deprecated functions
def minidot_txt(*args, **kwargs):
"""Rename minidot_txt to txt"""
logger.warning("minidot_txt is deprecated, use txt instead")
return txt(*args, **kwargs)


def minidot_txts(*args, **kwargs):
"""Rename minidot_txts to txts"""
logger.warning("minidot_txts is deprecated, use txts instead")
return txts(*args, **kwargs)


def minidot_cat(*args, **kwargs):
"""Rename minidot_cat to cat"""
logger.warning("minidot_cat is deprecated, use cat instead")
return cat(*args, **kwargs)


def _rename_variable(variable: str) -> str:
if variable in VARIABLE_RENAMING_MAPPING:
return VARIABLE_RENAMING_MAPPING[variable]
elif "I (mA)" in variable:
return variable.replace("I (mA)", "current").replace(" ", "_").lower()
elif " (Volt)" in variable:
return variable.replace(" (Volt)", "_volt").replace(" ", "_").lower()
else:
return variable.split("(")[0].strip().replace(" ", "_").lower()


def txt(
path: str,
rename_variables: bool = True,
encoding: str = "utf-8",
errors: str = "strict",
timezone: str = "UTC",
global_attributes: dict = None,
) -> xr.Dataset:
"""Parse PME MiniDot txt file
Expand All @@ -71,6 +102,8 @@ def minidot_txt(
rename_variables (bool, optional): _description_. Defaults to True.
encoding (str, optional): File encoding. Defaults to 'utf-8'.
errors (str, optional): Error handling. Defaults to 'strict'.
timezone (str, optional): Timezone to localize the time. Defaults to 'UTC'.
global_attributes (dict, optional): Global attributes to add to the dataset. Defaults to {}.
Returns:
xarray.Dataset
Expand All @@ -87,23 +120,28 @@ def _append_to_history(msg):
errors=errors,
) as f:
# Read the headre
serial_number = f.readline().replace("\n", "")
logger.debug("Parse file from serial number: %s", serial_number)
metadata = re.search(
(
r"OS REV: (?P<software_version>\d+\.\d+)\s"
r"Sensor Cal: (?P<instrument_calibration>\d*)"
),
f.readline(),
)
header = [f.readline()]
while "Time (sec)" not in header[-1]:
header += [f.readline()]

# Parse metadata from header
metadata = {}
metadata["serial_number"] = header[0].replace("\n", "")
metadata["software_version"] = re.search(r"OS REV: (\d+\.\d+)\s", header[1])[1]
if "Sensor Cal" in header[1]:
metadata["instrument_calibration"] = re.search(
r"Sensor Cal: (\d*)", header[1]
)[1]
if len(header) > 2:
for key, value in re.findall("(\w+)\: ([^,\n]+)", "".join(header[2:-1])):
metadata[key.lower()] = value.strip()

# If metadata is null than it's likely not a minidot file
if metadata is None:
warnings.warn("Failed to read: {path}", RuntimeWarning)
return pd.DataFrame(), None

# Parse column names
columns = [item.strip() for item in f.readline().split(",")]
columns = [item.strip() for item in header[-1].split(",")]

# Read the data with pandas
df = pd.read_csv(
Expand All @@ -124,12 +162,11 @@ def _append_to_history(msg):

# Global attributes
ds.attrs = {
**global_attributes,
**metadata.groupdict(),
**default_global_attributes,
**metadata,
"instrument_manufacturer": "PME",
"instrument_model": "MiniDot",
"instrument_sn": serial_number,
"history": "",
**(global_attributes or {}),
}

# Retrieve raw saturation values from minidot
Expand All @@ -155,20 +192,27 @@ def _append_to_history(msg):
for var in ds.variables:
if var not in VARIABLE_ATTRIBUTES:
logger.warning("Unknown variable: %s", var)
if "(" in var and ")" in var:
variable, unit = var.split("(")
unit = unit.replace(")", "")
ds[var].attrs.update({"units": unit})
continue
ds[var].attrs.update(VARIABLE_ATTRIBUTES[var])

if rename_variables:
ds = ds.rename_vars(VARIABLE_RENAMING_MAPPING)
ds.attrs["history"] += (
f"\n{pd.Timestamp.now().isoformat()} Rename variables: {VARIABLE_RENAMING_MAPPING}"
)
variable_mapping = {
variable: _rename_variable(variable) for variable in ds.variables
}
ds = ds.rename_vars(variable_mapping)
ds.attrs["history"] += (
f"\n{pd.Timestamp.now().isoformat()} Rename variables: {variable_mapping}"
)

ds = standardize_dataset(ds)
return ds


def minidot_txts(
def txts(
paths: Union[list, str], encoding: str = "utf-8", errors: str = "strict"
) -> xr.Dataset:
"""Parse PME Minidots txt files
Expand Down Expand Up @@ -197,9 +241,7 @@ def minidot_txts(
return xr.merge(datasets)


def minidot_cat(
path: str, encoding: str = "utf-8", errors: str = "strict"
) -> xr.Dataset:
def cat(path: str, encoding: str = "utf-8", errors: str = "strict") -> xr.Dataset:
"""cat reads PME MiniDot concatenated CAT files
Args:
Expand Down
17 changes: 14 additions & 3 deletions ocean_data_parser/parsers/star_oddi.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,10 @@
"long_name": "Sound Velocity",
"standard_name": "speed_of_sound_in_sea_water",
},
"pressure": {
"long_name": "Pressure",
"standard_name": "sea_water_pressure",
},
}


Expand Down Expand Up @@ -108,6 +112,7 @@ def _standardize_attributes(item):
names=variables.keys(),
parse_dates=["time"],
date_format=date_format,
dayfirst=True,
)
if "time" in df:
df = df.set_index(["time"])
Expand Down Expand Up @@ -139,9 +144,15 @@ def _standardize_attributes(item):
)
),
"n_records": n_records,
"start_time": pd.to_datetime(start_time).isoformat(),
"end_time": pd.to_datetime(end_time).isoformat(),
"date_created": pd.to_datetime(metadata.pop("created")).isoformat(),
"start_time": pd.to_datetime(
start_time, format=date_format, dayfirst=True
).isoformat(),
"end_time": pd.to_datetime(
end_time, format=date_format, dayfirst=True
).isoformat(),
"date_created": pd.to_datetime(
metadata.pop("created"), format=date_format, dayfirst=True
).isoformat(),
"original_file_header": original_header,
}
# Add variable attributes
Expand Down
2 changes: 1 addition & 1 deletion ocean_data_parser/read.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ def detect_file_format(file: str, encoding: str = "UTF-8") -> str:
elif ext == "MON":
parser = "van_essen_instruments.mon"
elif ext == "txt" and re.match(r"\d+\-\d+\s*\nOS REV\:", header):
parser = "pme.minidot_txt"
parser = "pme.txt"
elif ext == "txt" and re.match(r"Model\=.*\nFirmware\=.*\nSerial\=.*", header):
parser = "rbr.rtext"
elif ext == "txt" and "Front panel parameter change:" in header:
Expand Down
14 changes: 14 additions & 0 deletions tests/parsers_test_files/pme/wiper/2024-07-28 100000Z.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
5958-066383
OS REV: 2.30
Type: 0, Scrub: 1, Angle: 45
Timeout: 10, Threshold: 200
Time (sec), Bat (Volt), T (deg C), Wipes (), Cal Wipe Time (sec), Wipe Time (sec), Start I (mA), Ave I (mA), Peak I (mA), Final I (mA), Rsource (Ohm)
1722160800, +3.43, +25.154, 26, +6.0, +5.9, +84.5, +68.7, +79.6, 225.2, +1.1
1722171600, +3.48, +25.396, 27, +6.0, +6.0, +73.3, +70.8, +77.3, 230.3, +1.1
1722182400, +3.48, +25.396, 28, +6.0, +5.9, +76.5, +70.1, +78.6, 225.8, +1.1
1722193200, +3.48, +25.154, 29, +6.0, +6.0, +75.8, +70.4, +78.2, 230.1, +1.1
1722204000, +3.48, +25.396, 30, +6.0, +5.9, +75.4, +69.4, +79.1, 200.1, +1.1
1722214800, +3.43, +26.123, 31, +6.0, +5.9, +85.3, +67.4, +78.9, 225.5, +1.1
1722225600, +3.48, +25.639, 32, +6.0, +6.0, +76.1, +69.5, +77.7, 230.4, +1.1
1722236400, +3.48, +25.396, 33, +6.0, +5.9, +76.1, +68.8, +78.4, 218.3, +1.1
1722247200, +3.48, +25.396, 34, +6.0, +5.9, +73.9, +69.5, +77.3, 203.7, +1.1
14 changes: 14 additions & 0 deletions tests/parsers_test_files/pme/wiper/2024-07-29 130000Z.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
5958-066383
OS REV: 2.30
Type: 0, Scrub: 1, Angle: 45
Timeout: 10, Threshold: 200
Time (sec), Bat (Volt), T (deg C), Wipes (), Cal Wipe Time (sec), Wipe Time (sec), Start I (mA), Ave I (mA), Peak I (mA), Final I (mA), Rsource (Ohm)
1722258000, +3.48, +24.912, 35, +6.0, +5.9, +76.4, +69.2, +79.5, 217.1, +1.1
1722268800, +3.43, +25.396, 36, +6.0, +5.9, +83.9, +67.1, +78.6, 226.7, +1.1
1722279600, +3.48, +24.912, 37, +6.0, +6.0, +74.8, +69.4, +77.6, 228.8, +1.1
1722290400, +3.48, +24.670, 38, +6.0, +5.9, +76.0, +70.3, +77.9, 204.8, +1.1
1722301200, +3.48, +24.670, 39, +6.0, +6.0, +75.8, +70.2, +78.9, 231.4, +1.1
1722312000, +3.48, +25.154, 40, +6.0, +5.9, +75.6, +68.3, +79.2, 224.8, +1.1
1722322800, +3.43, +25.396, 41, +5.9, +5.8, +84.3, +66.5, +74.9, 207.4, +1.0
1722333600, +3.48, +24.427, 42, +5.9, +6.0, +73.3, +69.4, +77.7, 230.7, +1.1
1722344400, +3.48, +24.427, 43, +5.9, +5.9, +78.1, +68.9, +77.3, 209.0, +1.1
14 changes: 14 additions & 0 deletions tests/parsers_test_files/pme/wiper/2024-07-30 160000Z.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
5958-066383
OS REV: 2.30
Type: 0, Scrub: 1, Angle: 45
Timeout: 10, Threshold: 200
Time (sec), Bat (Volt), T (deg C), Wipes (), Cal Wipe Time (sec), Wipe Time (sec), Start I (mA), Ave I (mA), Peak I (mA), Final I (mA), Rsource (Ohm)
1722355200, +3.48, +23.700, 44, +5.9, +6.0, +73.6, +69.9, +78.2, 231.5, +1.1
1722366000, +3.48, +25.396, 45, +5.9, +5.9, +76.1, +69.4, +75.9, 209.6, +1.1
1722376800, +3.43, +25.154, 46, +6.0, +5.9, +83.3, +67.8, +79.2, 231.2, +1.1
1722387600, +3.48, +24.185, 47, +6.0, +6.0, +73.7, +70.1, +77.6, 231.6, +1.1
1722398400, +3.48, +24.427, 48, +6.0, +5.9, +76.5, +68.8, +79.5, 212.1, +1.1
1722409200, +3.48, +22.974, 49, +6.0, +6.0, +75.5, +70.1, +77.7, 228.9, +1.1
1722420000, +3.48, +22.974, 50, +6.0, +5.9, +76.6, +69.4, +78.3, 215.8, +1.1
1722430800, +3.42, +23.458, 51, +6.0, +5.9, +85.2, +67.9, +79.8, 231.7, +1.1
1722441600, +3.47, +24.185, 52, +6.0, +6.0, +73.2, +69.5, +78.0, 230.5, +1.1
2 changes: 1 addition & 1 deletion tests/test_parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ class TestPMEParsers:
"path", glob("tests/parsers_test_files/pme/**/*.txt", recursive=True)
)
def test_txt_parser(self, path, caplog):
ds = pme.minidot_txt(path)
ds = pme.txt(path)
review_parsed_dataset(ds, path, caplog)


Expand Down

0 comments on commit 2e2b98f

Please sign in to comment.