From 8d2bc96b701d22e528172fbdd81f9a1f172bfc15 Mon Sep 17 00:00:00 2001 From: Callum Rollo Date: Thu, 12 Dec 2024 16:41:38 +0100 Subject: [PATCH 1/2] add og1 output option to seaxexplorer processing --- pyglider/seaexplorer.py | 29 ++++++++++----- pyglider/utils.py | 81 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 101 insertions(+), 9 deletions(-) diff --git a/pyglider/seaexplorer.py b/pyglider/seaexplorer.py index 7057444..6934b39 100644 --- a/pyglider/seaexplorer.py +++ b/pyglider/seaexplorer.py @@ -304,7 +304,7 @@ def _remove_fill_values(df, fill_value=9999): def raw_to_timeseries(indir, outdir, deploymentyaml, kind='raw', profile_filt_time=100, profile_min_time=300, - maxgap=10, interpolate=False, fnamesuffix=''): + maxgap=10, interpolate=False, fnamesuffix='', og_format=False): """ A little different than above, for the 4-file version of the data set. """ @@ -320,7 +320,12 @@ def raw_to_timeseries(indir, outdir, deploymentyaml, kind='raw', _log.info(f'Opening combined payload file {indir}/{id}-{kind}pld.parquet') sensor = pl.read_parquet(f'{indir}/{id}-{kind}pld.parquet') sensor = _remove_fill_values(sensor) - + if og_format: + # temporarily translate from og names to pyglider names + for pyglider_var, og_var in utils.pyglider_og_var_dict.items(): + if og_var in ncvar.keys(): + ncvar[pyglider_var] = ncvar.pop(og_var) + # build a new data set based on info in `deploymentyaml.` # We will use ctd as the interpolant ds = xr.Dataset() @@ -470,8 +475,6 @@ def raw_to_timeseries(indir, outdir, deploymentyaml, kind='raw', ds = ds.assign_coords(longitude=ds.longitude) ds = ds.assign_coords(latitude=ds.latitude) ds = ds.assign_coords(depth=ds.depth) - # ds = ds._get_distance_over_ground(ds) - ds = utils.fill_metadata(ds, deployment['metadata'], device_data) start = ds['time'].values[0] @@ -485,8 +488,6 @@ def raw_to_timeseries(indir, outdir, deploymentyaml, kind='raw', except: pass id0 = ds.attrs['deployment_name'] - outname = outdir + id0 + fnamesuffix + '.nc' - _log.info('writing %s', outname) if 'units' in ds.time.attrs.keys(): ds.time.attrs.pop('units') if 'calendar' in ds.time.attrs.keys(): @@ -494,9 +495,19 @@ def raw_to_timeseries(indir, outdir, deploymentyaml, kind='raw', if 'ad2cp_time' in list(ds): if 'units' in ds.ad2cp_time.attrs.keys(): ds.ad2cp_time.attrs.pop('units') - ds.to_netcdf(outname, 'w', - encoding={'time': {'units': 'seconds since 1970-01-01T00:00:00Z', - 'dtype': 'float64'}}) + if og_format: + ds = utils.add_og1_metadata(ds, deployment) + outname = f"{outdir}/{ds.attrs['id']}.nc" + _log.info('writing %s', outname) + ds.to_netcdf(outname, 'w', + encoding={'TIME': {'units': 'seconds since 1970-01-01T00:00:00Z', + 'dtype': 'float64'}}) + else: + outname = outdir + id0 + fnamesuffix + '.nc' + _log.info('writing %s', outname) + ds.to_netcdf(outname, 'w', + encoding={'time': {'units': 'seconds since 1970-01-01T00:00:00Z', + 'dtype': 'float64'}}) return outname diff --git a/pyglider/utils.py b/pyglider/utils.py index e55350c..afa5027 100644 --- a/pyglider/utils.py +++ b/pyglider/utils.py @@ -633,6 +633,87 @@ def find_gaps(sample_time, timebase, maxgap): return index +pyglider_og_var_dict = {'time': 'TIME', + 'longitude': 'LONGITUDE', + 'latitude': 'LATITUDE', + 'depth': 'DEPTH', + 'pressure': 'PRES'} + + +def add_og1_metadata(ds, deployment_yaml): + # Translate back to og names from pyglider names + for pyglider_var, og_var in pyglider_og_var_dict.items(): + if pyglider_var in ds.keys(): + ds = ds.rename({pyglider_var: og_var}) + + # add some global attributes + attrs = ds.attrs + attrs["start_date"] = attrs["time_coverage_start"] + ds.attrs = attrs + + # Add empty variables for OG1 + for variable, variable_dict in deployment_yaml['oceangliders_empty_variables'].items(): + attrs = {name: str(val) for name, val in variable_dict.items() if name != 'value'} + ds[variable] = xr.DataArray(variable_dict['value'], attrs=attrs) + + + # Add sensors + for device, device_dict in deployment_yaml['glider_devices'].items(): + attrs = {name: str(val) for name, val in device_dict.items() if name != 'value'} + sensor_name = f"SENSOR_{device}_{device_dict['sensor_serial_number']}" + ds[sensor_name] = xr.DataArray(attrs=attrs) + + # add GPS variables + for vname in ["LATITUDE", "LONGITUDE", "TIME"]: + ds[f"{vname}_GPS"] = ds[vname].copy() + nan_val = np.nan + if vname == 'TIME': + nan_val = np.datetime64("NaT") + ds[f"{vname}_GPS"].values[ds["dead_reckoning"].values != 0] = nan_val + ds[f"{vname}_GPS"].attrs["long_name"] = f"{vname.lower()} of each GPS location" + ds["LATITUDE_GPS"].attrs["URI"] = ( + "https://vocab.nerc.ac.uk/collection/OG1/current/LAT_GPS/" + ) + ds["LONGITUDE_GPS"].attrs["URI"] = ( + "https://vocab.nerc.ac.uk/collection/OG1/current/LON_GPS/" + ) + + ds["TRAJECTORY"] = xr.DataArray( + ds.attrs["id"], + attrs={"cf_role": "trajectory_id", "long_name": "trajectory name"}, + ) + ds["PLATFORM_MODEL"] = xr.DataArray( + ds.attrs["glider_model"], + attrs={ + "long_name": "model of the glider", + "platform_model_vocabulary": "None", + }, + ) + ds["PLATFORM_SERIAL_NUMBER"] = xr.DataArray( + f"sea{ds.attrs['glider_serial'].zfill(3)}", + attrs={"long_name": "glider serial number"}, + ) + ds["DEPLOYMENT_TIME"] = np.nanmin(ds.TIME.values) + ds["DEPLOYMENT_TIME"].attrs = { + "long_name": "date of deployment", + "standard_name": "time", + "units": "seconds since 1970-01-01T00:00:00Z", + "calendar": "gregorian", + } + ds["DEPLOYMENT_LATITUDE"] = ds.LATITUDE.values[0] + ds["DEPLOYMENT_LATITUDE"].attrs = {"long_name": "latitude of deployment"} + ds["DEPLOYMENT_LONGITUDE"] = ds.LONGITUDE.values[0] + ds["DEPLOYMENT_LONGITUDE"].attrs = {"long_name": "longitude of deployment"} + for var_name in ds.keys(): + if "time" in var_name.lower() and var_name is not "TIME": + if 'units' in ds[var_name].attrs.keys(): + ds[var_name].attrs.pop('units') + if 'calendar' in ds[var_name].attrs.keys(): + ds[var_name].attrs.pop('calendar') + ds = ds.rename_dims({'TIME': 'N_MEASUREMENTS'}) + return ds + + def _parse_gliderxml_pos(fname): """ DEPRECATED: use slocum.parse_gliderState instead From 778a6b9731c96558b4d825c0ef00858a5bd4dbbf Mon Sep 17 00:00:00 2001 From: Callum Rollo Date: Thu, 12 Dec 2024 16:44:07 +0100 Subject: [PATCH 2/2] add example yml and script --- .../example-seaexplorer-og1/ocean_gliders.yml | 278 ++++++++++++++++++ .../process_deploymentRealTime.py | 22 ++ 2 files changed, 300 insertions(+) create mode 100644 tests/example-data/example-seaexplorer-og1/ocean_gliders.yml create mode 100644 tests/example-data/example-seaexplorer-og1/process_deploymentRealTime.py diff --git a/tests/example-data/example-seaexplorer-og1/ocean_gliders.yml b/tests/example-data/example-seaexplorer-og1/ocean_gliders.yml new file mode 100644 index 0000000..b06410b --- /dev/null +++ b/tests/example-data/example-seaexplorer-og1/ocean_gliders.yml @@ -0,0 +1,278 @@ +metadata: + # mandatory Ocean Gliders global attributes of fixed value, do not change! + featureType: trajectory + # mandatory Ocean Gliders global attributes to be specified by the user + title: OceanGliders trajectory file + platform: sub-surface gliders + platform_vocabulary: https://vocab.nerc.ac.uk/collection/L06/current/27/ + id: sea063_20210822T122401_R + contributor_name: Callum Rollo, Louise Biddle, Olle Petersson, Aleksandra Mazur, Marcus Melin + contributor_role: Data Processor, PI, Operator, Operator, Operator + contributor_email: "callum.rollo@voiceoftheocean.org, louise.biddle@voiceoftheocean.org, , , " + contributor_role_vocabulary: https://vocab.nerc.ac.uk/collection/W08 + contributing_institutions: Voice of the Ocean Foundation + contributing_institutions_role: Operator + contributing_institutions_role_vocabulary: https://vocab.nerc.ac.uk/collection/W08/current/ + rtqc_method: Quality control performed with IOOS QARTOD toolbox v2.1 https://github.com/ioos/ioos_qc + Conventions: "CF-1.10, ACDD-1.3, OG-1.0" + # Other attributes + acknowledgement: This study used data collected and made freely available by Voice + of the Ocean Foundation (https://voiceoftheocean.org) accessed from https://erddap.observations.voiceoftheocean.org/erddap/index.html + institution: Voice of the Ocean Foundation + license: Creative Commons Attribution 4.0 (https://creativecommons.org/licenses/by/4.0/) + This study used data collected and made freely available by Voice of the Ocean + Foundation (https://voiceoftheocean.org) accessed from https://erddap.observations.voiceoftheocean.org/erddap/index.html + format_version: IOOS_Glider_NetCDF_v2.0.nc + glider_model: SeaExplorer + glider_instrument_name: seaexplorer + keywords: AUVS, Autonomous Underwater Vehicles, Oceans, Ocean Pressure, Water Pressure, + Ocean Temperature, Water Temperature, Salinity/Density, Conductivity, Density, + Salinity + keywords_vocabulary: GCMD Science Keywords + metadata_link: http://cfconventions.org/cf-conventions/v1.6.0/cf-conventions.html + Metadata_Conventions: CF-1.6, Unidata Dataset Discovery v1.0 + naming_authority: Voice of the Ocean Foundation + processing_level: Data provided as is with no expressed or implied assurance of + quality assurance or quality control. L0 + publisher_email: callum.rollo@voiceoftheocean.org + publisher_name: Callum Rollo + publisher_url: https://voiceoftheocean.org + references: created with pyglider https://github.com/c-proof/pyglider + source: Observational data from a profiling glider. + standard_name_vocabulary: CF STandard Name Table v49 + sea_name: Baltic + transmission_system: IRIDIUM + glider_name: Ljung + glider_serial: '63' + wmo_id: '6801707' + comment: Altitude set to 6m for the entire mission duration. Zt/zs set to 5m for + the entire mission duration + creator_email: callum.rollo@voiceoftheocean.org + creator_name: Callum Rollo + creator_url: https://observations.voiceoftheocean.org + deployment_id: '22' + deployment_name: SAMBA + project: SAMBA + project_url: https://voiceoftheocean.org/samba-smart-autonomous-monitoring-of-the-baltic-sea/ + summary: 'Part of SAMBA continuous monitoring' + +oceangliders_empty_variables: + WMO_IDENTIFIER: + long_name: "wmo id" + value: 6801706 + PLATFORM_MODEL: + long_name: "name of the glider" + platform_model_vocabulary: "http://vocab.nerc.ac.uk/collection/B76/current/B7600002/" + value: 6801706 + +glider_devices: + ctd: + make: RBR + model: legato + sensor_serial_number: '206523' + long_name: RBR legato CTD + make_model: RBR legato CTD + factory_calibrated: 'Yes' + calibration_date: '2021-03-01' + optics: + make: Wetlabs + model: FLNTU + sensor_serial_number: '6661' + long_name: Wetlabs ECO Puck FLNTU + make_model: Wetlabs FLNTU + factory_calibrated: 'Yes' + calibration_date: '2021-02-01' + calibration_parameters: + Chl_DarkCounts: 48 + Chl_SF: 0.0121 + NTU_DarkCounts: 49 + NTU_SF: 0.006 + oxygen: + make: JFE Advantech + model: AROD-FT + sensor_serial_number: '0044' + long_name: JFE Advantech RINKO-FT + make_model: JFE Advantech AROD_FT + factory_calibrated: 'Yes' + calibration_date: '2021-01-23' + +# map between glider variables and netcdf variables. This shouldn't +# change too much. +netcdf_variables: + timebase: + source: NAV_LATITUDE + # Time and Place: + TIME: + source: time + long_name: Time elapsed since 1970-01-01T00:00:00Z + standard_name: time + axis: T + observation_type: "measured" + time_vocabulary: https://vocab.nerc.ac.uk/collection/OG1/current/TIME/” + _FillValue: -1 + valid_min: 1e9 + valid_max: 4e9 + coordinates: time depth latitude longitude + + LATITUDE: + source: NAV_LATITUDE + long_name: latitude of each measurement and GPS location + standard_name: latitude + units: degrees_north + axis: Y + coordinates: time depth latitude longitude + conversion: nmea2deg + comment: "Estimated between surface fixes" + observation_type: measured + platform: platform + reference: WGS84 + _FillValue: -9999.9 + valid_min: -90.0 + valid_max: 90.0 + latitude_vocabulary: https://vocab.nerc.ac.uk/collection/OG1/current/LAT/ + coordinate_reference_frame: urn:ogc:crs:EPSG::4326 + + LONGITUDE: + source: NAV_LONGITUDE + long_name: longitude of each measurement and GPS location + standard_name: longitude + units: degrees_east + axis: X + coordinates: time depth latitude longitude + conversion: nmea2deg + comment: "Estimated between surface fixes" + observation_type: measured + platform: platform + reference: WGS84 + _FillValue: -9999.9 + valid_min: -180.0 + valid_max: 180.0 + longitude_vocabulary: https://vocab.nerc.ac.uk/collection/OG1/current/LON/ + coordinate_reference_frame: urn:ogc:crs:EPSG::4326 + + dead_reckoning: + source: DeadReckoning + long_name: dead reckoning + standard_name: dead_reckoning + units: None + coordinates: time depth latitude longitude + + heading: + source: Heading + long_name: glider heading angle + standard_name: platform_orientation + units: degrees + coordinates: time depth latitude longitude + + pitch: + source: Pitch + long_name: glider pitch angle + standard_name: platform_pitch_angle + units: degrees + coordinates: time depth latitude longitude + + roll: + source: Roll + long_name: glider roll angle + standard_name: platform_roll_angle + units: degrees + coordinates: time depth latitude longitude + + # data parameters + conductivity: + source: LEGATO_CONDUCTIVITY + long_name: water conductivity + standard_name: sea_water_electrical_conductivity + units: mS cm-1 + coordinates: time depth latitude longitude + instrument: instrument_ctd + valid_min: "0." + valid_max: "10." + observation_type: "measured" + accuracy: "0.0003" + precision: "0.0001" + resolution: "0.00002" + + temperature: + source: LEGATO_TEMPERATURE + long_name: water temperature + standard_name: sea_water_temperature + units: Celsius + coordinates: time depth latitude longitude + instrument: instrument_ctd + valid_min: "-5" + valid_max: "50" + observation_type: "measured" + accuracy: "0.002" + precision: "0.001" + resolution: "0.0002" + + pressure: + source: LEGATO_PRESSURE + long_name: water pressure + standard_name: sea_water_pressure + units: dbar + coordinates: time depth latitude longitude + valid_min: "0" + valid_max: "2000" + positive: "down" + reference_datum: "sea-surface" + instrument: "instrument_ctd" + observation_type: "measured" + accuracy: "1" + precision: "2" + resolution: "0.02" + comment: "ctd pressure sensor" + + salinity: + source: LEGATO_SALINITY + long_name: water salinity + standard_name: sea_water_salinity + units: kg m-3 + coordinates: time depth latitude longitude + instrument: instrument_ctd + valid_min: "0." + valid_max: "50." + observation_type: "calculated" + accuracy: "0.0003" + precision: "0.0001" + resolution: "0.00002" + +# optics: + chlorophyll: + source: FLNTU_CHL_SCALED + long_name: chlorophyll + standard_name: concentration_of_chlorophyll_in_sea_water + units: mg m-3 + coordinates: time depth latitude longitude + # example of how to use median rather than mean when producing gridded average netCDF + average_method: median + + cdom: + source: FLNTU_NTU_SCALED + long_name: flntu variable + units: arbitrary + coordinates: time depth latitude longitude + # Use geometric mean for gridded netCDF. Useful for PAR + average_method: "geometric mean" + + + +# Oxygen + oxygen_concentration: + source: AROD_FT_DO + long_name: oxygen concentration + standard_name: mole_concentration_of_dissolved_molecular_oxygen_in_sea_water + correct_oxygen: "True" + reference_salinity: "0" + units: umol l-1 + coordinates: time depth latitude longitude + + temperature_oxygen: + source: AROD_FT_TEMP + long_name: oxygen sensor temperature + standard_name: temperature_of_sensor_for_oxygen_in_sea_water + units: Celsius + coordinates: time depth latitude longitude + + diff --git a/tests/example-data/example-seaexplorer-og1/process_deploymentRealTime.py b/tests/example-data/example-seaexplorer-og1/process_deploymentRealTime.py new file mode 100644 index 0000000..1694b57 --- /dev/null +++ b/tests/example-data/example-seaexplorer-og1/process_deploymentRealTime.py @@ -0,0 +1,22 @@ +import logging +import os +import pyglider.seaexplorer as seaexplorer +logging.basicConfig(level='INFO') + +rawdir = './realtime_raw/' +rawncdir = './realtime_rawnc/' +deploymentyaml = './ocean_gliders.yml' +l0tsdir = './L0-timeseries/' + +if __name__ == '__main__': + # clean last processing... + os.system('rm ' + rawncdir + '* ' + l0tsdir + '* ') + + # turn seaexplorer zipped csvs into nc files. + seaexplorer.raw_to_rawnc(rawdir, rawncdir, deploymentyaml) + # merge individual netcdf files into single netcdf files *.gli*.nc and *.pld1*.nc + seaexplorer.merge_parquet(rawncdir, rawncdir, deploymentyaml, kind='sub') + # Make OceanGliders valid netcdf file + outname = seaexplorer.raw_to_timeseries(rawncdir, l0tsdir, deploymentyaml, kind='sub', og_format=True) + +