diff --git a/ocean_data_parser/geo.py b/ocean_data_parser/geo.py index 075d7304..de0c57d9 100644 --- a/ocean_data_parser/geo.py +++ b/ocean_data_parser/geo.py @@ -1,15 +1,14 @@ import json import os -from geographiclib.geodesic import Geodesic -from shapely.geometry import shape, Point, Polygon +from geographiclib.geodesic import Geodesic +from shapely.geometry import Point, Polygon, shape def read_geojson( path: str, encoding: str = "UTF-8", ) -> dict: - """Parse geojson files and return it as a dictionary. If features are available, generate a shapely feature object. diff --git a/ocean_data_parser/metadata/pdc.py b/ocean_data_parser/metadata/pdc.py index f906451e..3143b294 100644 --- a/ocean_data_parser/metadata/pdc.py +++ b/ocean_data_parser/metadata/pdc.py @@ -7,6 +7,7 @@ logger = logging.getLogger(__name__) + # def fgdc_to_acdd(xml) def fgdc_to_acdd(url=None, xml=None): """Convert PDC FGDC XML format to an ACDD 1.3 standard dictionary format.""" diff --git a/ocean_data_parser/read/__init__.py b/ocean_data_parser/read/__init__.py index f150d15b..9718f53a 100644 --- a/ocean_data_parser/read/__init__.py +++ b/ocean_data_parser/read/__init__.py @@ -1 +1 @@ -from ocean_data_parser.read.auto import file, detect_file_format +from ocean_data_parser.read.auto import detect_file_format, file diff --git a/ocean_data_parser/read/amundsen.py b/ocean_data_parser/read/amundsen.py index 35d14fed..1adf0035 100644 --- a/ocean_data_parser/read/amundsen.py +++ b/ocean_data_parser/read/amundsen.py @@ -1,14 +1,15 @@ """Module use to handle int file format generated historically ArcticNet and the Amundsen inc.""" __version__ = "0.1.0" -import re +import json import logging import os -import json +import re import pandas as pd from gsw import z_from_p -from .utils import standardize_dataset, get_history_handler + +from .utils import get_history_handler, standardize_dataset logger = logging.getLogger(__name__) string_attributes = ["Cruise_Number", "Cruise_Name", "Station"] diff --git a/ocean_data_parser/read/auto.py b/ocean_data_parser/read/auto.py index dc6ae518..a9426e14 100644 --- a/ocean_data_parser/read/auto.py +++ b/ocean_data_parser/read/auto.py @@ -1,7 +1,7 @@ -from importlib import import_module -import re -import os import logging +import os +import re +from importlib import import_module logger = logging.getLogger(__name__) @@ -84,7 +84,6 @@ def detect_file_format(file: str, encoding: str = "UTF-8") -> str: def file(path, parser=None, **kwargs): - # Review the file format if no parser is specified if parser is None: parser = detect_file_format(path) diff --git a/ocean_data_parser/read/dfo/odf.py b/ocean_data_parser/read/dfo/odf.py index cd8b4795..7f7e81c8 100644 --- a/ocean_data_parser/read/dfo/odf.py +++ b/ocean_data_parser/read/dfo/odf.py @@ -4,6 +4,7 @@ the different data formats developped by the different Canadian DFO offices. """ from typing import Union + from ocean_data_parser.read.dfo.odf_source.process import ( parse_odf, read_config, diff --git a/ocean_data_parser/read/dfo/odf_source/attributes.py b/ocean_data_parser/read/dfo/odf_source/attributes.py index f7357f2b..7cfe6971 100644 --- a/ocean_data_parser/read/dfo/odf_source/attributes.py +++ b/ocean_data_parser/read/dfo/odf_source/attributes.py @@ -10,6 +10,7 @@ from difflib import get_close_matches import pandas as pd + from ocean_data_parser.read.seabird import ( get_seabird_instrument_from_header, get_seabird_processing_history, diff --git a/ocean_data_parser/read/dfo/odf_source/process.py b/ocean_data_parser/read/dfo/odf_source/process.py index 393c41dc..b1d4e024 100644 --- a/ocean_data_parser/read/dfo/odf_source/process.py +++ b/ocean_data_parser/read/dfo/odf_source/process.py @@ -19,9 +19,9 @@ # ) import ocean_data_parser.read.dfo.odf_source.attributes as attributes import ocean_data_parser.read.dfo.odf_source.parser as odf_parser +from ocean_data_parser.geo import get_geo_code, get_nearest_station, read_geojson from ocean_data_parser.read import seabird from ocean_data_parser.read.utils import standardize_dataset -from ocean_data_parser.geo import get_nearest_station, read_geojson, get_geo_code tqdm.pandas() diff --git a/ocean_data_parser/read/electricblue.py b/ocean_data_parser/read/electricblue.py index 4d1e17d1..1089def9 100644 --- a/ocean_data_parser/read/electricblue.py +++ b/ocean_data_parser/read/electricblue.py @@ -1,10 +1,11 @@ import logging +import re import pandas as pd -import re + from ocean_data_parser.read.utils import ( - standardize_dataset, rename_variables_to_valid_netcdf, + standardize_dataset, ) logger = logging.getLogger(__name__) @@ -114,7 +115,6 @@ def csv( def log_csv(path, encoding="UTF-8", rename_variables=True): - df = pd.read_csv(path, encoding=encoding, parse_dates=True, index_col=["time"]) ds = df.to_xarray() # add default attributes diff --git a/ocean_data_parser/read/nmea.py b/ocean_data_parser/read/nmea.py index f77ce5a1..9a35f028 100644 --- a/ocean_data_parser/read/nmea.py +++ b/ocean_data_parser/read/nmea.py @@ -214,7 +214,6 @@ def rename_variable(name): nmea = [] long_names = {} with open(path, encoding=encoding) as f: - for row, line in enumerate(f): if not line: continue diff --git a/ocean_data_parser/read/onset.py b/ocean_data_parser/read/onset.py index e4cd97a7..6d9801ea 100644 --- a/ocean_data_parser/read/onset.py +++ b/ocean_data_parser/read/onset.py @@ -88,7 +88,6 @@ def _parse_onset_time(time, timezone="UTC"): def _parse_onset_csv_header(header_lines): - full_header = "\n".join(header_lines) header = { "instrument_manufacturer": "Onset", @@ -157,7 +156,6 @@ def csv( read_csv_kwargs: dict = None, standardize_variable_names: bool = True, ): - """tidbit_csv parses the Onset Tidbit CSV format into a pandas dataframe Returns: diff --git a/ocean_data_parser/read/rbr.py b/ocean_data_parser/read/rbr.py index e64c8e42..579b728a 100644 --- a/ocean_data_parser/read/rbr.py +++ b/ocean_data_parser/read/rbr.py @@ -5,6 +5,7 @@ import re import pandas as pd + from ocean_data_parser.read.utils import test_parsed_dataset diff --git a/ocean_data_parser/read/seabird.py b/ocean_data_parser/read/seabird.py index 257ec422..6c194102 100644 --- a/ocean_data_parser/read/seabird.py +++ b/ocean_data_parser/read/seabird.py @@ -237,7 +237,6 @@ def read_number_line(line): header["history"] = [] read_next_line = True while "*END*" not in line and line.startswith(("*", "#")): - if read_next_line: line = f.readline() else: @@ -490,7 +489,6 @@ def generate_binned_attributes(ds, seabird_header): ds.attrs["time_coverage_resolution"] = pd.Timedelta(bin_str).isoformat() for var in ds: if (len(ds.dims) == 1 and len(ds[var].dims) == 1) or binvar in ds[var].dims: - ds[var].attrs["cell_method"] = f"{binvar}: mean (interval: {bin_str})" return ds diff --git a/ocean_data_parser/read/star_oddi.py b/ocean_data_parser/read/star_oddi.py index 0e5ddc51..164cb4c4 100644 --- a/ocean_data_parser/read/star_oddi.py +++ b/ocean_data_parser/read/star_oddi.py @@ -1,7 +1,7 @@ -import pandas as pd +import logging import re -import logging +import pandas as pd logger = logging.getLogger(__name__) diff --git a/ocean_data_parser/read/van_essen_instruments.py b/ocean_data_parser/read/van_essen_instruments.py index 73a7767c..b1f49a1c 100644 --- a/ocean_data_parser/read/van_essen_instruments.py +++ b/ocean_data_parser/read/van_essen_instruments.py @@ -5,7 +5,7 @@ import pandas as pd -from .utils import test_parsed_dataset, standardize_dataset +from .utils import standardize_dataset, test_parsed_dataset logger = logging.getLogger(__name__) diff --git a/ocean_data_parser/tools/postgresql.py b/ocean_data_parser/tools/postgresql.py index 6d628ecb..ef923eba 100644 --- a/ocean_data_parser/tools/postgresql.py +++ b/ocean_data_parser/tools/postgresql.py @@ -1,6 +1,6 @@ -from io import StringIO import csv import logging +from io import StringIO logger = logging.getLogger(__name__) diff --git a/scripts/pdc_amundsen_conversion.py b/scripts/pdc_amundsen_conversion.py index 1ff8a414..8e952e71 100644 --- a/scripts/pdc_amundsen_conversion.py +++ b/scripts/pdc_amundsen_conversion.py @@ -4,13 +4,14 @@ Those NetCDFs are then served by the PDC Hyrax and CIOOS ERDAP servers """ -from glob import glob import logging +from glob import glob -from ocean_data_parser.read import amundsen -from ocean_data_parser.metadata import pdc from tqdm import tqdm +from ocean_data_parser.metadata import pdc +from ocean_data_parser.read import amundsen + FORMAT = "%(asctime)s [%(levelname)s] %(message)s" logging.basicConfig( filename="pdc-amundsen-conversion.log", level=logging.WARNING, format=FORMAT diff --git a/setup.py b/setup.py index 1a12747e..189bbe55 100644 --- a/setup.py +++ b/setup.py @@ -1,5 +1,4 @@ -from setuptools import setup, find_packages - +from setuptools import find_packages, setup setup( name="ocean_data_parser", diff --git a/tests/parser_detection_test.py b/tests/parser_detection_test.py index 5d1c5cef..9b2468a3 100644 --- a/tests/parser_detection_test.py +++ b/tests/parser_detection_test.py @@ -1,10 +1,10 @@ import logging +import os +import re import unittest from glob import glob -import re -import os -from ocean_data_parser.read import file, detect_file_format +from ocean_data_parser.read import detect_file_format, file logging.basicConfig(level=logging.DEBUG) logger = logging.getLogger() diff --git a/tests/parsers_test.py b/tests/parsers_test.py index 995f8c8e..3080903b 100644 --- a/tests/parsers_test.py +++ b/tests/parsers_test.py @@ -1,26 +1,27 @@ import logging +import os import re import unittest from glob import glob -import os -import xarray as xr -import pandas as pd import numpy as np +import pandas as pd +import xarray as xr + from ocean_data_parser.read import ( - seabird, - van_essen_instruments, - onset, amundsen, - electricblue, - star_oddi, - rbr, - sunburst, dfo, + electricblue, + file, nmea, + onset, pme, + rbr, + seabird, + star_oddi, + sunburst, + van_essen_instruments, ) -from ocean_data_parser.read import file logging.basicConfig(level=logging.DEBUG) logger = logging.getLogger() @@ -80,7 +81,6 @@ def ignore_from_attr(attr, expression, placeholder): test = standardize_dataset(test) if not ref.identical(test): - # Global attributes for key in ref.attrs.keys(): if not_identical(ref.attrs[key], test.attrs.get(key)):