Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

start refactor of get_profiles #124

Closed
wants to merge 12 commits into from
4 changes: 0 additions & 4 deletions pyglider/seaexplorer.py
Original file line number Diff line number Diff line change
Expand Up @@ -303,7 +303,6 @@ def _remove_fill_values(df, fill_value=9999):


def raw_to_timeseries(indir, outdir, deploymentyaml, kind='raw',
profile_filt_time=100, profile_min_time=300,
maxgap=10, interpolate=False, fnamesuffix=''):
"""
A little different than above, for the 4-file version of the data set.
Expand Down Expand Up @@ -433,9 +432,6 @@ def raw_to_timeseries(indir, outdir, deploymentyaml, kind='raw',
# some derived variables:
ds = utils.get_glider_depth(ds)
ds = utils.get_distance_over_ground(ds)
# ds = utils.get_profiles(ds)
ds = utils.get_profiles_new(ds, filt_time=profile_filt_time,
profile_min_time=profile_min_time)
ds = utils.get_derived_eos_raw(ds)

# somehow this comes out unsorted:
Expand Down
4 changes: 0 additions & 4 deletions pyglider/slocum.py
Original file line number Diff line number Diff line change
Expand Up @@ -767,8 +767,6 @@ def raw_to_timeseries(indir, outdir, deploymentyaml, *,
ds.attrs['deployment_end'] = str(end)
_log.debug(ds.depth.values[:100])
_log.debug(ds.depth.values[2000:2100])
ds = utils.get_profiles_new(
ds, filt_time=profile_filt_time, profile_min_time=profile_min_time)
_log.debug(ds.depth.values[:100])
_log.debug(ds.depth.values[2000:2100])

Expand Down Expand Up @@ -939,8 +937,6 @@ def binary_to_timeseries(indir, cachedir, outdir, deploymentyaml, *,
_log.debug(ds.depth.values[:100])
_log.debug(ds.depth.values[2000:2100])

ds = utils.get_profiles_new(
ds, filt_time=profile_filt_time, profile_min_time=profile_min_time)
_log.debug(ds.depth.values[:100])
_log.debug(ds.depth.values[2000:2100])

Expand Down
69 changes: 11 additions & 58 deletions pyglider/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import numpy as np
from scipy.signal import argrelextrema
import gsw
import shutil
import logging
import yaml

Expand Down Expand Up @@ -84,67 +85,13 @@ def get_glider_depth(ds):
return ds


def get_profiles(ds, min_dp=10.0, inversion=3., filt_length=7,
min_nsamples=14):
"""
Not currently used...

make two variables: profile_direction and profile_index; this version
is good for lots of data. Less good for sparse data
"""
profile = ds.pressure.values * np.nan
direction = ds.pressure.values * np.nan
pronum = 1
lastpronum = 0

good = np.where(~np.isnan(ds.pressure))[0]
p = np.convolve(ds.pressure.values[good],
np.ones(filt_length) / filt_length, 'same')
dpall = np.diff(p)
inflect = np.where(dpall[:-1] * dpall[1:] < 0)[0]
for n, i in enumerate(inflect[:-1]):
nprofile = inflect[n+1] - inflect[n]
inds = np.arange(good[inflect[n]], good[inflect[n+1]]+1) + 1
dp = np.diff(ds.pressure[inds[[-1, 0]]])
if ((nprofile >= min_nsamples) and (np.abs(dp) > 10)):
_log.debug('Good')
direction[inds] = np.sign(dp)
profile[inds] = pronum
lastpronum = pronum
pronum += 1
else:
profile[good[inflect[n]]:good[inflect[n+1]]] = lastpronum + 0.5

attrs = collections.OrderedDict([
('long_name', 'profile index'),
('units', '1'),
('comment',
'N = inside profile N, N + 0.5 = between profiles N and N + 1'),
('sources', 'time pressure'),
('method', 'get_profiles'),
('min_dp', min_dp),
('filt_length', filt_length),
('min_nsamples', min_nsamples)])
ds['profile_index'] = (('time'), profile, attrs)

attrs = collections.OrderedDict([
('long_name', 'glider vertical speed direction'),
('units', '1'),
('comment',
'-1 = ascending, 0 = inflecting or stalled, 1 = descending'),
('sources', 'time pressure'),
('method', 'get_profiles')])
ds['profile_direction'] = (('time'), direction, attrs)
return ds


def get_profiles_new(ds, min_dp=10.0, filt_time=100, profile_min_time=300):
def get_profiles(nc, min_dp=10.0, filt_time=100, profile_min_time=300):
"""
Find profiles in a glider timeseries:

Parameters
----------
ds : `xarray.Dataset`
nc : `str or path` path to netCDF file
Must have *time* coordinate and *pressure* as a variable
min_dp : float, default=10.0
Minimum distance a profile must transit to be considered a profile, in dbar.
Expand All @@ -157,6 +104,7 @@ def get_profiles_new(ds, min_dp=10.0, filt_time=100, profile_min_time=300):
Minimum time length of profile in s.
"""

ds = xr.open_dataset(nc)
profile = ds.pressure.values * 0
direction = ds.pressure.values * 0
pronum = 1
Expand Down Expand Up @@ -246,7 +194,12 @@ def get_profiles_new(ds, min_dp=10.0, filt_time=100, profile_min_time=300):
('sources', 'time pressure'),
('method', 'get_profiles_new')])
ds['profile_direction'] = (('time'), direction, attrs)
return ds
tempfile = "tmp.nc"
ds.to_netcdf(tempfile, 'w',
encoding={'time': {'units':
'seconds since 1970-01-01T00:00:00Z'}})
shutil.move(tempfile, nc)
return nc


def get_derived_eos_raw(ds):
Expand Down Expand Up @@ -711,6 +664,6 @@ def _get_deployment(deploymentyaml):
return deployment


__all__ = ['get_distance_over_ground', 'get_glider_depth', 'get_profiles_new',
__all__ = ['get_distance_over_ground', 'get_glider_depth', 'get_profiles',
'get_derived_eos_raw', "fill_metadata", "nmea2deg",
"gappy_fill_vertical", "oxygen_concentration_correction"]
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
seaexplorer.merge_parquet(rawncdir, rawncdir, deploymentyaml, kind='sub')
# Make level-0 timeseries netcdf file from the raw files...
outname = seaexplorer.raw_to_timeseries(rawncdir, l0tsdir, deploymentyaml, kind='sub')
outname = pgutils.get_profiles(outname)
ncprocess.extract_timeseries_profiles(outname, profiledir, deploymentyaml)
outname2 = ncprocess.make_gridfiles(outname, griddir, deploymentyaml)
# make profile netcdf files for ioos gdac...
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@

# Make level-1 timeseries netcdf file from th raw files...
outname = seaexplorer.raw_to_timeseries(rawncdir, l0tsdir, deploymentyaml, kind='sub')
outname = pgutils.get_profiles(outname)
ncprocess.extract_timeseries_profiles(outname, profiledir, deploymentyaml)
outname2 = ncprocess.make_gridfiles(outname, griddir, deploymentyaml)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import os
import shutil
import pyglider.ncprocess as ncprocess
import pyglider.utils as utils
import cproofutils.plotting as cpplot
import cproofutils.plotlinepmap as lpmap
import pyglider.slocum as slocum
Expand Down Expand Up @@ -47,8 +48,8 @@

if True:
# Make level-1 timeseries netcdf file from the raw files...
outname = slocum.raw_to_timeseries(rawdir, l1tsdir, deploymentyaml,
profile_filt_time=400, profile_min_time=100)
outname = slocum.raw_to_timeseries(rawdir, l1tsdir, deploymentyaml)
outname = utils.get_profiles(outname, filt_time=400, profile_min_time=100)
# make profile netcdf files for ioos gdac...
#ncprocess.extract_L1timeseries_profiles(outname, profiledir, deploymentyaml)
# make grid of dataset....
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@
if do_direct:
# turn *.sdb and *.tbd into timeseries netcdf files
outname = slocum.binary_to_timeseries(
binarydir, cacdir, l1tsdir, deploymentyaml, search='*.[s|t]bd',
profile_filt_time=20, profile_min_time=20)
binarydir, cacdir, l1tsdir, deploymentyaml, search='*.[s|t]bd')
outname = pgutils.get_profiles(outname,filt_time=20, profile_min_time=20)
else:
# turn *.EBD and *.DBD into *.ebd.nc and *.dbd.nc netcdf files.
slocum.binary_to_rawnc(
Expand Down
6 changes: 6 additions & 0 deletions tests/test_pyglider.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import pyglider.ncprocess as ncprocess
import pyglider.seaexplorer as seaexplorer
import pyglider.slocum as slocum
import pyglider.utils as pgutils

library_dir = Path(__file__).parent.parent.absolute()
example_dir = library_dir / 'tests/example-data/'
Expand All @@ -21,6 +22,7 @@
seaexplorer.merge_parquet(rawncdir, rawncdir, deploymentyaml, kind='sub')
outname = seaexplorer.raw_to_L0timeseries(rawncdir, l0tsdir,
deploymentyaml, kind='sub')
outname = pgutils.get_profiles(outname)
output = xr.open_dataset(outname)
# Open test data file
test_data = xr.open_dataset(
Expand Down Expand Up @@ -69,8 +71,10 @@ def test_example_seaexplorer_metadata():
l0tsdir_interp = str(example_dir / 'example-seaexplorer/L0-timeseries-test-interp/') + '/'

outname_interp = seaexplorer.raw_to_L0timeseries(rawncdir, l0tsdir_interp, interp_yaml, kind='sub')
outname_interp = pgutils.get_profiles(outname_interp)
output_interp = xr.open_dataset(outname_interp)


@pytest.mark.parametrize("var", variables)
def test_example_seaexplorer_interp_nrt(var):
assert output_interp[var].attrs == test_data[var].attrs
Expand All @@ -93,6 +97,7 @@ def test_example_seaexplorer_interp_nrt(var):
seaexplorer.merge_parquet(rawncdir, rawncdir, deploymentyaml_raw, kind='raw')
outname_raw = seaexplorer.raw_to_L0timeseries(rawncdir, l0tsdir,
deploymentyaml_raw, kind='raw')
outname_raw = pgutils.get_profiles(outname_raw)
output_raw = xr.open_dataset(outname_raw)
# Open test data file
test_data_raw = xr.open_dataset(
Expand Down Expand Up @@ -134,6 +139,7 @@ def test_example_seaexplorer_metadata_raw():
l0tsdir_interp_raw = str(example_dir / 'example-seaexplorer-raw/L0-timeseries-test-interp/') + '/'

outname_interp_raw = seaexplorer.raw_to_L0timeseries(rawncdir, l0tsdir_interp_raw, interp_yaml, kind='raw')
outname_interp_raw = pgutils.get_profiles(outname_interp_raw)
output_interp_raw = xr.open_dataset(outname_interp_raw)

@pytest.mark.parametrize("var", variables)
Expand Down
6 changes: 4 additions & 2 deletions tests/test_slocum.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import pyglider.ncprocess as ncprocess
import pyglider.seaexplorer as seaexplorer
import pyglider.slocum as slocum
import pyglider.utils as pgutils



Expand All @@ -33,8 +34,9 @@

# turn *.sbd and *.tbd into timeseries netcdf files
outname_slocum = slocum.binary_to_timeseries(binarydir, cacdir, tsdir, deploymentyaml_slocum,
search='*.[s|t]bd', profile_filt_time=20,
profile_min_time=20)
search='*.[s|t]bd')
outname_slocum = pgutils.get_profiles(outname_slocum, filt_time=20, profile_min_time=20)

# make profiles...
ncprocess.extract_timeseries_profiles(outname_slocum, profiledir, deploymentyaml_slocum,
force=True)
Expand Down