From cab69b83c7c184ddd78dd122a1aa972d619af34a Mon Sep 17 00:00:00 2001 From: Matt Ueckermann Date: Wed, 27 Dec 2023 16:18:20 -0500 Subject: [PATCH] ENH: Splitting out the datalib module into its own package. --- doc/source/conf.py | 2 +- podpac/core/algorithm/stats.py | 24 +- podpac/core/coordinates/polar_coordinates.py | 2 +- podpac/core/data/csv_source.py | 1 + podpac/core/data/dataset_source.py | 1 + podpac/core/data/file_source.py | 1 + podpac/core/data/zarr_source.py | 1 + .../nearest_neighbor_interpolator.py | 2 +- .../core/interpolation/none_interpolator.py | 1 + .../interpolation/test/test_interpolation.py | 1 + podpac/core/test/test_units.py | 6 +- podpac/core/utils.py | 2 + podpac/datalib/__init__.py | 26 +- podpac/datalib/cosmos_stations.py | 459 ------------ podpac/datalib/drought_monitor.py | 108 --- podpac/datalib/egi.py | 604 ---------------- podpac/datalib/gfs.py | 136 ---- podpac/datalib/intake_catalog.py | 206 ------ podpac/datalib/modis_pds.py | 342 --------- podpac/datalib/nasaCMR.py | 186 ----- podpac/datalib/nsidc_smap_opendap_url.txt | 1 - podpac/datalib/satutils.py | 295 -------- podpac/datalib/smap_egi.py | 309 -------- podpac/datalib/soilgrids.py | 119 --- podpac/datalib/soilscape.py | 646 ----------------- podpac/datalib/terraintiles.py | 556 -------------- podpac/datalib/test/__init__.py | 0 podpac/datalib/test/coordinates_for_tests.py | 47 -- podpac/datalib/test/test_cosmos.py | 32 - podpac/datalib/test/test_gfs.py | 61 -- podpac/datalib/test/test_modis.py | 16 - podpac/datalib/test/test_satutils.py | 42 -- podpac/datalib/test/test_smap_egi.py | 20 - podpac/datalib/test/test_soilgrids.py | 16 - podpac/datalib/test/test_soilscape.py | 41 -- podpac/datalib/test/test_terrain_tiles.py | 38 - podpac/datalib/test/test_weathercitizen.py | 21 - podpac/datalib/weathercitizen.py | 684 ------------------ .../datalib/weathercitizen_sensorburst_pb2.py | 585 --------------- setup.py | 13 +- 40 files changed, 44 insertions(+), 5609 deletions(-) delete mode 100644 podpac/datalib/cosmos_stations.py delete mode 100644 podpac/datalib/drought_monitor.py delete mode 100644 podpac/datalib/egi.py delete mode 100644 podpac/datalib/gfs.py delete mode 100644 podpac/datalib/intake_catalog.py delete mode 100644 podpac/datalib/modis_pds.py delete mode 100644 podpac/datalib/nasaCMR.py delete mode 100644 podpac/datalib/nsidc_smap_opendap_url.txt delete mode 100644 podpac/datalib/satutils.py delete mode 100644 podpac/datalib/smap_egi.py delete mode 100644 podpac/datalib/soilgrids.py delete mode 100644 podpac/datalib/soilscape.py delete mode 100644 podpac/datalib/terraintiles.py delete mode 100644 podpac/datalib/test/__init__.py delete mode 100644 podpac/datalib/test/coordinates_for_tests.py delete mode 100644 podpac/datalib/test/test_cosmos.py delete mode 100644 podpac/datalib/test/test_gfs.py delete mode 100644 podpac/datalib/test/test_modis.py delete mode 100644 podpac/datalib/test/test_satutils.py delete mode 100644 podpac/datalib/test/test_smap_egi.py delete mode 100644 podpac/datalib/test/test_soilgrids.py delete mode 100644 podpac/datalib/test/test_soilscape.py delete mode 100644 podpac/datalib/test/test_terrain_tiles.py delete mode 100644 podpac/datalib/test/test_weathercitizen.py delete mode 100644 podpac/datalib/weathercitizen.py delete mode 100644 podpac/datalib/weathercitizen_sensorburst_pb2.py diff --git a/doc/source/conf.py b/doc/source/conf.py index 22963c318..ff4ab5bae 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -85,7 +85,7 @@ # # This is also used if you do content translation via gettext catalogs. # Usually you set "language" from the command line for these cases. -language = 'en' +language = "en" # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. diff --git a/podpac/core/algorithm/stats.py b/podpac/core/algorithm/stats.py index 019316668..5cc8f9700 100644 --- a/podpac/core/algorithm/stats.py +++ b/podpac/core/algorithm/stats.py @@ -80,7 +80,7 @@ def chunk_size(self): chunk_size = podpac.settings["CHUNK_SIZE"] if chunk_size == "auto": - return 1024**2 # TODO + return 1024 ** 2 # TODO else: return chunk_size @@ -611,7 +611,7 @@ def reduce_chunked(self, xs, output): Nx = np.isfinite(x).sum(dim=self._dims) M1x = x.mean(dim=self._dims) Ex = x - M1x - Ex2 = Ex**2 + Ex2 = Ex ** 2 Ex3 = Ex2 * Ex M2x = (Ex2).sum(dim=self._dims) M3x = (Ex3).sum(dim=self._dims) @@ -632,13 +632,13 @@ def reduce_chunked(self, xs, output): n = Nb + Nx NNx = Nb * Nx - M3.data[b] += M3x + d**3 * NNx * (Nb - Nx) / n**2 + 3 * d * (Nb * M2x - Nx * M2b) / n - M2.data[b] += M2x + d**2 * NNx / n + M3.data[b] += M3x + d ** 3 * NNx * (Nb - Nx) / n ** 2 + 3 * d * (Nb * M2x - Nx * M2b) / n + M2.data[b] += M2x + d ** 2 * NNx / n M1.data[b] += d * Nx / n N.data[b] = n # calculate skew - skew = np.sqrt(N) * M3 / np.sqrt(M2**3) + skew = np.sqrt(N) * M3 / np.sqrt(M2 ** 3) return skew @@ -697,9 +697,9 @@ def reduce_chunked(self, xs, output): Nx = np.isfinite(x).sum(dim=self._dims) M1x = x.mean(dim=self._dims) Ex = x - M1x - Ex2 = Ex**2 + Ex2 = Ex ** 2 Ex3 = Ex2 * Ex - Ex4 = Ex2**2 + Ex4 = Ex2 ** 2 M2x = (Ex2).sum(dim=self._dims) M3x = (Ex3).sum(dim=self._dims) M4x = (Ex4).sum(dim=self._dims) @@ -724,18 +724,18 @@ def reduce_chunked(self, xs, output): M4.data[b] += ( M4x - + d**4 * NNx * (Nb**2 - NNx + Nx**2) / n**3 - + 6 * d**2 * (Nb**2 * M2x + Nx**2 * M2b) / n**2 + + d ** 4 * NNx * (Nb ** 2 - NNx + Nx ** 2) / n ** 3 + + 6 * d ** 2 * (Nb ** 2 * M2x + Nx ** 2 * M2b) / n ** 2 + 4 * d * (Nb * M3x - Nx * M3b) / n ) - M3.data[b] += M3x + d**3 * NNx * (Nb - Nx) / n**2 + 3 * d * (Nb * M2x - Nx * M2b) / n - M2.data[b] += M2x + d**2 * NNx / n + M3.data[b] += M3x + d ** 3 * NNx * (Nb - Nx) / n ** 2 + 3 * d * (Nb * M2x - Nx * M2b) / n + M2.data[b] += M2x + d ** 2 * NNx / n M1.data[b] += d * Nx / n N.data[b] = n # calculate kurtosis - kurtosis = N * M4 / M2**2 - 3 + kurtosis = N * M4 / M2 ** 2 - 3 return kurtosis diff --git a/podpac/core/coordinates/polar_coordinates.py b/podpac/core/coordinates/polar_coordinates.py index 4b8454c22..64394fccf 100644 --- a/podpac/core/coordinates/polar_coordinates.py +++ b/podpac/core/coordinates/polar_coordinates.py @@ -58,7 +58,7 @@ def __init__(self, center, radius, theta=None, theta_size=None, dims=None): def _validate_dims(self, d): val = d["value"] for dim in val: - if dim not in ["lat", "lon"]: # Hardcoding example. What is actually trying to be accomplished? + if dim not in ["lat", "lon"]: # Hardcoding example. What is actually trying to be accomplished? raise ValueError("PolarCoordinates dims must be 'lat' or 'lon', not '%s'" % dim) if val[0] == val[1]: raise ValueError("Duplicate dimension '%s'" % val[0]) diff --git a/podpac/core/data/csv_source.py b/podpac/core/data/csv_source.py index 7498edf23..d04c21e4e 100644 --- a/podpac/core/data/csv_source.py +++ b/podpac/core/data/csv_source.py @@ -44,6 +44,7 @@ class CSVRaw(FileKeysMixin, LoadFileMixin, BaseFileSource): -------- CSV : Interpolated CSV file datasource for general use. """ + # No support here for custom Dimension names? selection in dataset_source.py header = tl.Any(default_value=0).tag(attr=True) lat_key = tl.Union([tl.Unicode(), tl.Int()], default_value="lat").tag(attr=True) diff --git a/podpac/core/data/dataset_source.py b/podpac/core/data/dataset_source.py index 500499de5..2b24abb1e 100644 --- a/podpac/core/data/dataset_source.py +++ b/podpac/core/data/dataset_source.py @@ -45,6 +45,7 @@ class DatasetRaw(FileKeysMixin, LoadFileMixin, BaseFileSource): -------- Dataset : Interpolated xarray dataset source for general use. """ + # selection lets you use other dims # dataset = tl.Instance(xr.Dataset).tag(readonly=True) selection = tl.Dict(allow_none=True, default_value=None).tag(attr=True) diff --git a/podpac/core/data/file_source.py b/podpac/core/data/file_source.py index e93bb5eb2..b891c47ca 100644 --- a/podpac/core/data/file_source.py +++ b/podpac/core/data/file_source.py @@ -163,6 +163,7 @@ class FileKeysMixin(tl.HasTraits): cf_calendar : str calendar, when decoding CF datetimes """ + # Other dims? data_key = tl.Union([tl.Unicode(), tl.List(trait=tl.Unicode())]).tag(attr=True) lat_key = tl.Unicode(default_value="lat").tag(attr=True) diff --git a/podpac/core/data/zarr_source.py b/podpac/core/data/zarr_source.py index 7bc596aee..58c907ec8 100644 --- a/podpac/core/data/zarr_source.py +++ b/podpac/core/data/zarr_source.py @@ -52,6 +52,7 @@ class ZarrRaw(S3Mixin, FileKeysMixin, BaseFileSource): -------- Zarr : Interpolated Zarr Datasource for general use. """ + # Doesnt support other dims file_mode = tl.Unicode(default_value="r").tag(readonly=True) coordinate_index_type = "slice" diff --git a/podpac/core/interpolation/nearest_neighbor_interpolator.py b/podpac/core/interpolation/nearest_neighbor_interpolator.py index 784528f24..a73910068 100644 --- a/podpac/core/interpolation/nearest_neighbor_interpolator.py +++ b/podpac/core/interpolation/nearest_neighbor_interpolator.py @@ -310,7 +310,7 @@ def _get_uniform_index(self, dim, source, request, bounds=None): # Find all the 0.5 and 1.5's that were rounded to even numbers, and make sure they all round down I = (index % 0.5) == 0 rindex[I] = np.ceil(index[I]) - else: # "unbiased", that's the default np.around behavior, so do nothing + else: # "unbiased", that's the default np.around behavior, so do nothing pass stop_ind = int(source.size) diff --git a/podpac/core/interpolation/none_interpolator.py b/podpac/core/interpolation/none_interpolator.py index e160969df..74c5bef77 100644 --- a/podpac/core/interpolation/none_interpolator.py +++ b/podpac/core/interpolation/none_interpolator.py @@ -21,6 +21,7 @@ from podpac.core.coordinates.utils import get_timedelta from podpac.core.interpolation.selector import Selector, _higher_precision_time_coords1d, _higher_precision_time_stack + @common_doc(COMMON_INTERPOLATOR_DOCS) class NoneInterpolator(Interpolator): """None Interpolation""" diff --git a/podpac/core/interpolation/test/test_interpolation.py b/podpac/core/interpolation/test/test_interpolation.py index 9b7c270ff..98ef8fa8a 100644 --- a/podpac/core/interpolation/test/test_interpolation.py +++ b/podpac/core/interpolation/test/test_interpolation.py @@ -44,6 +44,7 @@ class InterpArray(InterpolationMixin, ArrayRaw): np.testing.assert_array_equal(iaso.data, aso.data) np.testing.assert_array_equal(abso.data, data) + from podpac.core.coordinates.utils import VALID_DIMENSION_NAMES diff --git a/podpac/core/test/test_units.py b/podpac/core/test/test_units.py index d0f27db73..6467c42ae 100644 --- a/podpac/core/test/test_units.py +++ b/podpac/core/test/test_units.py @@ -83,7 +83,7 @@ def test_pow(self): dims=["lat", "lon", "alt"], attrs={"units": ureg.meter}, ) - assert (a**2).attrs["units"] == ureg.meter**2 + assert (a ** 2).attrs["units"] == ureg.meter ** 2 def test_set_to_value_using_UnitsDataArray_as_mask_does_nothing_if_mask_has_dim_not_in_array(self): a = UnitsDataArray( @@ -273,7 +273,7 @@ def test_units_allpass(self): assert a6[0, 0].data[()] == False a7 = a1 * a2 - assert a7[0, 0].to(ureg.m**2).data[()] == (1 * ureg.meter * ureg.inch).to(ureg.meter**2).magnitude + assert a7[0, 0].to(ureg.m ** 2).data[()] == (1 * ureg.meter * ureg.inch).to(ureg.meter ** 2).magnitude a8 = a2 / a1 assert a8[0, 0].to_base_units().data[()] == (1 * ureg.inch / ureg.meter).to_base_units().magnitude @@ -328,7 +328,7 @@ def test_ufuncs(self): np.mean(a1) np.min(a1) np.max(a1) - a1**2 + a1 ** 2 # These don't have units! np.dot(a2.T, a1) diff --git a/podpac/core/utils.py b/podpac/core/utils.py index bdeb6c31e..4871e092f 100644 --- a/podpac/core/utils.py +++ b/podpac/core/utils.py @@ -37,6 +37,7 @@ from . import settings from podpac.core.coordinates.utils import VALID_DIMENSION_NAMES + def common_doc(doc_dict): """Decorator: replaces commond fields in a function docstring @@ -144,6 +145,7 @@ def validate(self, obj, value): super(OrderedDictTrait, self).validate(obj, value) return value + else: OrderedDictTrait = tl.Dict diff --git a/podpac/datalib/__init__.py b/podpac/datalib/__init__.py index 218bc6cb6..633dd3808 100644 --- a/podpac/datalib/__init__.py +++ b/podpac/datalib/__init__.py @@ -1,24 +1,16 @@ """ Datalib Public API -This module gets imported in the root __init__.py +This module import the podpacdatalib package and exposed its contents to podpac.datalib """ -import sys +try: + from podpacdatalib import * +except ModuleNotFoundError: + import logging -from podpac.datalib.cosmos_stations import COSMOSStations -from podpac.datalib.drought_monitor import DroughtCategory, DroughtMonitorCategory -from podpac.datalib.egi import EGI -from podpac.datalib.gfs import GFS, GFSLatest -from podpac.datalib.modis_pds import MODIS -from podpac.datalib.satutils import Landsat8, Sentinel2 -from podpac.datalib.smap_egi import SMAP -from podpac.datalib.terraintiles import TerrainTiles -from podpac.datalib.weathercitizen import WeatherCitizen -from podpac.datalib.soilscape import SoilSCAPE20min -from podpac.datalib import soilgrids - -# intake requires python >= 3.6 -if sys.version >= "3.6": - from podpac.datalib.intake_catalog import IntakeCatalog + _logger = logging.getLogger(__name__) + _logger.warning( + "The podpacdatalib module is not installed but user tried to import podpac.datalib which depends on it." + ) diff --git a/podpac/datalib/cosmos_stations.py b/podpac/datalib/cosmos_stations.py deleted file mode 100644 index ebcd45fb5..000000000 --- a/podpac/datalib/cosmos_stations.py +++ /dev/null @@ -1,459 +0,0 @@ -from __future__ import division, unicode_literals, print_function, absolute_import - -import re -import json -import logging -from six import string_types -from dateutil import parser -from io import StringIO - -try: - import cPickle # Python 2.7 -except: - import _pickle as cPickle - -import numpy as np -import traitlets as tl - -# Optional dependencies -from lazy_import import lazy_module - -bs4 = lazy_module("bs4") - -import podpac -from podpac.core.utils import _get_from_url, cached_property -from podpac.data import DataSource -from podpac.compositor import TileCompositorRaw -from podpac.interpolators import InterpolationMixin - -_logger = logging.getLogger(__name__) - - -def _convert_str_to_vals(properties): - IGNORE_KEYS = ["sitenumber"] - for k, v in properties.items(): - if not isinstance(v, string_types) or k in IGNORE_KEYS: - continue - try: - if "," in v: - properties[k] = tuple([float(vv) for vv in v.split(",")]) - else: - properties[k] = float(v) - except ValueError: - try: - properties[k] = np.datetime64(v) - except ValueError: - pass - return properties - - -class COSMOSStation(DataSource): - _repr_keys = ["label", "network", "location"] - - url = tl.Unicode("http://cosmos.hwr.arizona.edu/Probes/StationDat/") - station_data = tl.Dict().tag(attr=True) - - @cached_property - def raw_data(self): - _logger.info("Downloading station data from {}".format(self.station_data_url)) - - r = _get_from_url(self.station_data_url) - if r is None: - raise ConnectionError( - "COSMOS data cannot be retrieved. Is the site {} down?".format(self.station_calibration_url) - ) - return r.text - - @cached_property - def data_columns(self): - return self.raw_data.split("\n", 1)[0].split(" ") - - @property - def site_number(self): - return str(self.station_data["sitenumber"]) - - @property - def station_data_url(self): - return self.url + self.site_number + "/smcounts.txt" - - @property - def station_calibration_url(self): - return self.url + self.site_number + "/calibrationInfo.php" - - @property - def station_properties_url(self): - return self.url + self.site_number + "/index.php" - - def get_data(self, coordinates, coordinates_index): - data = np.loadtxt(StringIO(self.raw_data), skiprows=1, usecols=self.data_columns.index("SOILM"))[ - coordinates_index[0] - ] - data[data > 100] = np.nan - data[data < 0] = np.nan - data /= 100.0 # Make it fractional - return self.create_output_array(coordinates, data=data.reshape(coordinates.shape)) - - def get_coordinates(self): - lat_lon = self.station_data["location"] - time = np.atleast_2d( - np.loadtxt( - StringIO(self.raw_data), - skiprows=1, - usecols=[self.data_columns.index("YYYY-MM-DD"), self.data_columns.index("HH:MM")], - dtype=str, - ) - ) - if time.size == 0: - time = np.datetime64("NaT") - else: - time = np.array([t[0] + "T" + t[1] for t in time], np.datetime64) - c = podpac.Coordinates([time, [lat_lon[0], lat_lon[1]]], ["time", ["lat", "lon"]]) - return c - - @property - def label(self): - return self.station_data["label"] - - @property - def network(self): - return self.station_data["network"] - - @property - def location(self): - return self.station_data["location"] - - @cached_property(use_cache_ctrl=True) - def calibration_data(self): - cd = _get_from_url(self.station_calibration_url) - if cd is None: - raise ConnectionError( - "COSMOS data cannot be retrieved. Is the site {} down?".format(self.station_calibration_url) - ) - cd = cd.json() - cd["items"] = [_convert_str_to_vals(i) for i in cd["items"]] - return cd - - @cached_property(use_cache_ctrl=True) - def site_properties(self): - r = _get_from_url(self.station_properties_url) - if r is None: - raise ConnectionError( - "COSMOS data cannot be retrieved. Is the site {} down?".format(self.station_properties_url) - ) - soup = bs4.BeautifulSoup(r.text, "lxml") - regex = re.compile("Soil Organic Carbon") - loc = soup.body.findAll(text=regex)[0].parent.parent - label, value = loc.findAll("div") - labels = [l.strip() for l in label.children if "br" not in str(l)] - values = [l.strip() for l in value.children if "br" not in str(l) and l.strip() != ""] - - properties = {k: v for k, v in zip(labels, values)} - - return _convert_str_to_vals(properties) - - -class COSMOSStationsRaw(TileCompositorRaw): - url = tl.Unicode("http://cosmos.hwr.arizona.edu/Probes/") - stations_url = tl.Unicode("sitesNoLegend.js") - dims = ["lat", "lon", "time"] - - from podpac.style import Style - - style = Style(colormap="jet") - - ## PROPERTIES - @cached_property(use_cache_ctrl=True) - def _stations_data_raw(self): - url = self.url + self.stations_url - r = _get_from_url(url) - if r is None: - raise ConnectionError("COSMOS data cannot be retrieved. Is the site {} down?".format(url)) - - t = r.text - - # Fix the JSON - t_f = re.sub(':\s?",', ': "",', t) # Missing closing parenthesis - if t_f[-5:] == ",\n]}\n": # errant comma - t_f = t_f[:-5] + "\n]}\n" - - return t_f - - @cached_property - def stations_data(self): - stations = json.loads(self._stations_data_raw) - stations["items"] = [_convert_str_to_vals(i) for i in stations["items"]] - return stations - - @cached_property(use_cache_ctrl=True) - def source_coordinates(self): - lat_lon = np.array(self.stations_value("location"))[self.has_data] - c = podpac.Coordinates([[lat_lon[:, 0], lat_lon[:, 1]]], ["lat_lon"]) - return c - - @cached_property - def has_data(self): - return ~(np.array(self.stations_value("lastdat")) == "YYYY-MM-DD") - - @cached_property - def sources(self): - return np.array([COSMOSStation(station_data=item) for item in self.stations_data["items"]])[self.has_data] - - @property - def available_data_keys(self): - return list(self.stations_data["items"][0].keys()) - - ## UTILITY FUNCTIONS - def stations_value(self, key, stations_data=None): - """Returns a list of values for all the station for a particular key - - Parameters - ----------- - key: str - Key describing the station data. See self.available_data_keys for available keys. - - Returns - -------- - list - A list of the values for the keys for each station - """ - if key not in self.available_data_keys: - raise ValueError("Input key {} is not in available keys {}".format(key, self.available_data_keys)) - - return self._stations_value(key, stations_data) - - def _stations_value(self, key, stations_data=None): - """helper function for stations_value""" - if stations_data is None: - stations_data = self.stations_data - - return [i[key] for i in stations_data["items"]] - - @property - def stations_label(self): - return self.stations_value("label") - - def label_from_latlon(self, lat_lon): - """Returns the COSMOS station's label given it's lat/lon coordinates - - Parameters - ----------- - lat_lon : podpac.Coordinates - The lat/lon locations whose station name will be returned. Note, the lat/lon coordinates have to match - exactly the coordinates given in station_data[N]['location'], where N is the station. - This should be Coordinates object with 'lat_lon' stacked coordinates as one of the dimensions. - - Returns - -------- - list - List of COSMOS station names corresponding to the given coordinates. If a coordinate has no match, then - "None" is returned. - """ - if "lon_lat" in lat_lon.dims: - lat_lon = lat_lon.transpose("lon_lat") - elif "lat_lon" not in lat_lon.dims: - raise ValueError("The coordinates object must have a stacked 'lat_lon' dimension.") - - labels_map = {s["location"]: s["label"] for s in self.stations_data["items"]} - labels = [labels_map.get(ll, None) for ll in lat_lon.xcoords["lat_lon"]] - return labels - - def latlon_from_label(self, label): - """Returns the lat/lon coordinates of COSMOS stations that match the given labels - - Parameters - ------------ - label: str, list - Strings that partially describe a COSMOS station label. - - Returns - -------- - podpac.Coordinates - The coordinates of the COSMOS stations matching the input data - """ - if not isinstance(label, list): - label = [label] - - ind = self._get_label_inds(label) - if ind.size == 0: - return podpac.Coordinates([]) # Empty - - lat_lon = np.array(self.stations_value("location"))[ind].squeeze() - c = podpac.Coordinates([[lat_lon[0], lat_lon[1]]], ["lat_lon"]) - - return c - - def _get_label_inds(self, label): - """Helper function to get source indices for partially matched labels""" - ind = [] - for lab in label: - ind.extend([i for i, l in enumerate(self.stations_label) if lab.lower() in l.lower()]) - - ind = np.unique(ind) - return ind - - def get_calibration_data(self, label=None, lat_lon=None): - """Returns the calibration information for a station. Users must supply a label or lat_lon coordinates. - - Parameters - ------------ - label: str, List (optional) - Labels describing the station. - - lat_lon: podpac.Coordinates (optional) - Coordinates of the COSMOS station. Note, this object has to have a 'lat_lon' dimension which matches exactly - with the COSMOS stations. - - Returns - -------- - list - A list of dictionaries containing the calibration data for the requested stations. - """ - - if label is None and lat_lon is None: - raise ValueError("Must supply either 'label' or 'lat_lon'") - - if lat_lon is not None: - label = self.label_from_latlon(lat_lon) - - if isinstance(label, string_types): - label = [label] - - inds = self._get_label_inds(label) - - return [self.sources[i].calibration_data for i in inds] - - def get_site_properties(self, label=None, lat_lon=None): - """Returns the site properties for a station. Users must supply a label or lat_lon coordinates. - - Parameters - ------------ - label: str, List (optional) - Labels describing the station. - - lat_lon: podpac.Coordinates (optional) - Coordinates of the COSMOS station. Note, this object has to have a 'lat_lon' dimension which matches exactly - with the COSMOS stations. - - Returns - -------- - list - A list of dictionaries containing the properties for the requested stations. - """ - - if label is None and lat_lon is None: - raise ValueError("Must supply either 'label' or 'lat_lon'") - - if lat_lon is not None: - label = self.label_from_latlon(lat_lon) - - if isinstance(label, string_types): - label = [label] - - inds = self._get_label_inds(label) - - return [self.sources[i].site_properties for i in inds] - - def get_station_data(self, label=None, lat_lon=None): - """Returns the station data. Users must supply a label or lat_lon coordinates. - - Parameters - ------------ - label: str, List (optional) - Labels describing the station. - - lat_lon: podpac.Coordinates (optional) - Coordinates of the COSMOS station. Note, this object has to have a 'lat_lon' dimension which matches exactly - with the COSMOS stations. - - Returns - -------- - list - A list of dictionaries containing the data for the requested stations. - """ - - if label is None and lat_lon is None: - raise ValueError("Must supply either 'label' or 'lat_lon'") - - if lat_lon is not None: - label = self.label_from_latlon(lat_lon) - - if isinstance(label, string_types): - label = [label] - - inds = self._get_label_inds(label) - - return [self.stations_data["items"][i] for i in inds] - - -class COSMOSStations(InterpolationMixin, COSMOSStationsRaw): - @tl.default("interpolation") - def _interpolation_default(self): - return {"method": "nearest", "params": {"use_selector": False, "remove_nan": False, "time_scale": "1,M"}} - - -if __name__ == "__main__": - bounds = {"lat": [40, 46], "lon": [-78, -68]} - cs = COSMOSStations( - cache_ctrl=["ram", "disk"], - interpolation={"method": "nearest", "params": {"use_selector": False, "remove_nan": True, "time_scale": "1,M"}}, - ) - csr = COSMOSStationsRaw( - cache_ctrl=["ram", "disk"], - interpolation={"method": "nearest", "params": {"use_selector": False, "remove_nan": True, "time_scale": "1,M"}}, - ) - - sd = cs.stations_data - ci = cs.source_coordinates.select(bounds) - ce = podpac.coordinates.merge_dims( - [podpac.Coordinates([podpac.crange("2018-05-01", "2018-06-01", "1,D", "time")]), ci] - ) - cg = podpac.Coordinates( - [ - podpac.clinspace(ci["lat"].bounds[1], ci["lat"].bounds[0], 12, "lat"), - podpac.clinspace(ci["lon"].bounds[1], ci["lon"].bounds[0], 16, "lon"), - ce["time"], - ] - ) - o = cs.eval(ce) - o_r = csr.eval(ce) - og = cs.eval(cg) - - # Test helper functions - labels = cs.stations_label - lat_lon = cs.latlon_from_label("Manitou") - labels = cs.label_from_latlon(lat_lon) - lat_lon2 = cs.latlon_from_label("No Match Here") - cal = cs.get_calibration_data("Manitou") - props = cs.get_site_properties("Manitou") - - from matplotlib import rcParams - - rcParams["axes.labelsize"] = 12 - rcParams["xtick.labelsize"] = 10 - rcParams["ytick.labelsize"] = 10 - rcParams["legend.fontsize"] = 8 - rcParams["lines.linewidth"] = 2 - rcParams["font.size"] = 12 - - import matplotlib.pyplot as plt - import matplotlib.dates as mdates - from pandas.plotting import register_matplotlib_converters - - register_matplotlib_converters() - - fig = plt.figure(figsize=(6.5, 3), dpi=300) - plt.plot(o.time, o.data, "o-") - ax = plt.gca() - plt.ylim(0, 1) - plt.legend(cs.label_from_latlon(ce)) - # plt.plot(o_r.time, o_r.data, ".-") - plt.ylabel("Soil Moisture ($m^3/m^3$)") - plt.xlabel("Date") - # plt.xticks(rotation=90) - fig.autofmt_xdate() - ax.fmt_xdata = mdates.DateFormatter("%m-%d") - plt.title("COSMOS Data for 2018 over lat (40, 46) by lon (-78,-68)") - plt.tight_layout() - plt.show() - - print("Done") diff --git a/podpac/datalib/drought_monitor.py b/podpac/datalib/drought_monitor.py deleted file mode 100644 index 54bc74ba4..000000000 --- a/podpac/datalib/drought_monitor.py +++ /dev/null @@ -1,108 +0,0 @@ -from podpac.algorithm import Algorithm -from podpac.data import Zarr -from podpac.style import Style -from podpac.utils import NodeTrait - - -class DroughtMonitorCategory(Zarr): - style = Style(clim=[0, 0.6], colormap="gist_earth_r") - - -class DroughtCategory(Algorithm): - # soil_moisture = NodeTrait().tag(attr=True, required=True) - # d0 = NodeTrait().tag(attr=True, required=True) - # d1 = NodeTrait().tag(attr=True, required=True) - # d2 = NodeTrait().tag(attr=True, required=True) - # d3 = NodeTrait().tag(attr=True, required=True) - # d4 = NodeTrait().tag(attr=True, required=True) - soil_moisture = NodeTrait().tag(attr=True) - d0 = NodeTrait().tag(attr=True) - d1 = NodeTrait().tag(attr=True) - d2 = NodeTrait().tag(attr=True) - d3 = NodeTrait().tag(attr=True) - d4 = NodeTrait().tag(attr=True) - - style = Style( - clim=[0, 6], - enumeration_colors={ - 0: (0.45098039, 0.0, 0.0, 1.0), - 1: (0.90196078, 0.0, 0.0, 1.0), - 2: (1.0, 0.66666667, 0.0, 1.0), - 3: (0.98823529, 0.82745098, 0.49803922, 1.0), - 4: (1.0, 1.0, 0.0, 1.0), - 5: (1.0, 1.0, 1.0, 0.0), - }, - ) - - def algorithm(self, inputs, coordinates): - sm = inputs["soil_moisture"] - d0 = inputs["d0"] - d1 = inputs["d1"] - d2 = inputs["d2"] - d3 = inputs["d3"] - d4 = inputs["d4"] - - return ( - (sm >= 0) * (sm < d4) * ((sm - 0) / (d4 - 0) + 0) - + (sm >= d4) * (sm < d3) * ((sm - d4) / (d3 - d4) + 1) - + (sm >= d3) * (sm < d2) * ((sm - d3) / (d2 - d3) + 2) - + (sm >= d2) * (sm < d1) * ((sm - d2) / (d1 - d2) + 3) - + (sm >= d1) * (sm < d0) * ((sm - d1) / (d0 - d1) + 4) - + (sm >= d0) * (sm < 0.75) * ((sm - d0) / (0.75 - d1) + 5) - + (sm >= 0.75) * 6 - ) - - -if __name__ == "__main__": - import os - import numpy as np - import podpac - - c = podpac.Coordinates([46.6, -123.5, "2018-06-01"], dims=["lat", "lon", "time"]) - - # local - path = "droughtmonitor/beta_parameters.zarr" - if not os.path.exists(path): - print("No local drought monitor data found at '%s'" % path) - else: - # drought monitor parameters - d0 = DroughtMonitorCategory(source=path, data_key="d0") - print(d0.coordinates) - print(d0.eval(c)) - - # drought category - mock_sm = podpac.data.Array(data=np.random.random(d0.coordinates.shape), coordinates=d0.coordinates) - - category = DroughtCategory( - soil_moisture=mock_sm, - d0=DroughtMonitorCategory(source=path, data_key="d0"), - d1=DroughtMonitorCategory(source=path, data_key="d1"), - d2=DroughtMonitorCategory(source=path, data_key="d2"), - d3=DroughtMonitorCategory(source=path, data_key="d3"), - d4=DroughtMonitorCategory(source=path, data_key="d4"), - ) - print(category.eval(c)) - - # s3 - bucket = "podpac-internal-test" - store = "drought_parameters.zarr" - path = "s3://%s/%s" % (bucket, store) - d0 = DroughtMonitorCategory(source=path, data_key="d0") - if not d0.s3.exists(path): - print("No drought monitor data found at '%s'. Check your AWS credentials." % path) - else: - print(d0.coordinates) - print(d0.eval(c)) - - # drought category algorithm - mock_sm = podpac.data.Array(source=np.random.random(d0.coordinates.shape), coordinates=d0.coordinates) - - category = DroughtCategory( - soil_moisture=mock_sm, - d0=DroughtMonitorCategory(source=path, data_key="d0"), - d1=DroughtMonitorCategory(source=path, data_key="d1"), - d2=DroughtMonitorCategory(source=path, data_key="d2"), - d3=DroughtMonitorCategory(source=path, data_key="d3"), - d4=DroughtMonitorCategory(source=path, data_key="d4"), - ) - print(category.eval(c)) diff --git a/podpac/datalib/egi.py b/podpac/datalib/egi.py deleted file mode 100644 index 5725584bd..000000000 --- a/podpac/datalib/egi.py +++ /dev/null @@ -1,604 +0,0 @@ -""" -PODPAC node to access the NASA EGI Programmatic Interface -https://developer.earthdata.nasa.gov/sdps/programmatic-access-docs#overview -""" - - -import os -from io import BytesIO -import socket -import logging -import copy -import zipfile -import xml.etree.ElementTree -from xml.etree.ElementTree import ParseError - -import requests -from six import string_types -from traitlets.traitlets import default -import numpy as np -import xarray as xr -import traitlets as tl -from lazy_import import lazy_module - -# optional imports -h5py = lazy_module("h5py") - -# Internal dependencies -from podpac import Coordinates, Node -from podpac.compositor import OrderedCompositor -from podpac.data import DataSource -from podpac.interpolators import InterpolationMixin -from podpac import authentication -from podpac import settings -from podpac import cached_property -from podpac.core.units import UnitsDataArray - -# Set up logging -_log = logging.getLogger(__name__) - - -# Base URLs -# https://developer.earthdata.nasa.gov/sdps/programmatic-access-docs#egiparameters -BASE_URL = "https://n5eil01u.ecs.nsidc.org/egi/request" - - -class EGI(InterpolationMixin, DataSource): - """ - PODPAC DataSource node to access the NASA EGI Programmatic Interface - https://developer.earthdata.nasa.gov/sdps/programmatic-access-docs#cmrparameters - - Parameters - ---------- - short_name : str - Specifies the short name of the collection used to find granules for the coverage requested. Required. - See https://developer.earthdata.nasa.gov/sdps/programmatic-access-docs#cmrparameters - data_key : str - Path to the subset data layer or group for Parameter Subsetting. Required. - Equivalent to "Coverage" paramter described in - https://developer.earthdata.nasa.gov/sdps/programmatic-access-docs#cmrparameters - lat_key : str - Key for latitude data in endpoint HDF-5 file. Required. - lon_key : str - Key for longitude data in endpoint HDF-5 file. Required. - min_bounds_span: dict, optional - Default is {}. When specified, gives the minimum bounds that will be used for a coordinate in the EGI query, so - it works properly. If a user specified a lat,lon point, the EGI query may fail since the min/max values for - lat/lon are the same. When specified, these bounds will be padded by the following for latitude (as an example): - [lat - min_bounds_span['lat'] / 2, lat + min_bounds_span['lat'] / 2] - base_url : str, optional - URL for EGI data endpoint. - Defaults to :str:`BASE_URL` - See https://developer.earthdata.nasa.gov/sdps/programmatic-access-docs#egiparameters - page_size : int, optional - Number of granules returned from CMR per HTTP call. Defaults to 20. - See https://developer.earthdata.nasa.gov/sdps/programmatic-access-docs#cmrparameters - updated_since : str, optional - Can be used to find granules recently updated in CMR. Optional. - See https://developer.earthdata.nasa.gov/sdps/programmatic-access-docs#cmrparameters - version : str, int, optional - Data product version. Optional. - Number input will be cast into a 3 character string NNN, i.e. 3 -> "003" - token : str, optional - EGI Token from authentication process. - See https://wiki.earthdata.nasa.gov/display/CMR/Creating+a+Token+Common - If undefined, the node will look for a token under the setting key "token@urs.earthdata.nasa.gov". - If this setting is not defined, the node will attempt to generate a token using - :attr:`self.username` and :attr:`self.password` - username : str, optional - Earthdata username (https://urs.earthdata.nasa.gov/) - If undefined, node will look for a username under setting key "username@urs.earthdata.nasa.gov" - password : str, optional - Earthdata password (https://urs.earthdata.nasa.gov/) - If undefined, node will look for a password under setting key "password@urs.earthdata.nasa.gov" - - Attributes - ---------- - data : :class:`podpac.UnitsDataArray` - The data array compiled from downloaded EGI data - """ - - base_url = tl.Unicode(default_value=BASE_URL).tag( - attr=True, required=False, default="https://n5eil01u.ecs.nsidc.org/egi/request" - ) - - # required - short_name = tl.Unicode().tag(attr=True, required=True) - data_key = tl.Unicode().tag(attr=True, required=True) - lat_key = tl.Unicode(allow_none=True).tag(attr=True, required=True) - lon_key = tl.Unicode(allow_none=True).tag(attr=True, required=True) - time_key = tl.Unicode(allow_none=True).tag(attr=True, required=True) - udims_overwrite = tl.List() - - min_bounds_span = tl.Dict(allow_none=True).tag(attr=True) - - @property - def udims(self): - if self.udims_overwrite: - return self.udims_overwrite - """ This needs to be implemented so this node will cache properly. See Datasource.eval.""" - raise NotImplementedError - - # optional - - # full list of supported formats ["GeoTIFF", "HDF-EOS5", "NetCDF4-CF", "NetCDF-3", "ASCII", "HDF-EOS", "KML"] - # response_format = tl.Enum(["HDF-EOS5"], default_value="HDF-EOS5", allow_none=True) - page_size = tl.Int(default_value=20) - version = tl.Union( - [tl.Unicode(default_value=None, allow_none=True), tl.Int(default_value=None, allow_none=True)] - ).tag(attr=True) - - @tl.validate("version") - def _version_to_str(self, proposal): - v = proposal["value"] - if isinstance(v, int): - return "{:03d}".format(v) - - if isinstance(v, string_types): - return v.zfill(3) - - return None - - updated_since = tl.Unicode(default_value=None, allow_none=True) - - # auth - username = tl.Unicode(allow_none=True) - - @tl.default("username") - def _username_default(self): - if "username@urs.earthdata.nasa.gov" in settings: - return settings["username@urs.earthdata.nasa.gov"] - - return None - - password = tl.Unicode(allow_none=True) - - @tl.default("password") - def _password_default(self): - if "password@urs.earthdata.nasa.gov" in settings: - return settings["password@urs.earthdata.nasa.gov"] - - return None - - token = tl.Unicode(allow_none=True) - - @tl.default("token") - def _token_default(self): - if "token@urs.earthdata.nasa.gov" in settings: - return settings["token@urs.earthdata.nasa.gov"] - - return None - - @property - def coverage(self): - return (self.data_key, self.lat_key, self.lon_key) - - # attributes - data = tl.Any(allow_none=True) - _url = tl.Unicode(allow_none=True) - - @cached_property - def source(self): - """ - URL Endpoint built from input parameters - - Returns - ------- - str - """ - url = copy.copy(self.base_url) - url += "?short_name={}".format(self.short_name) - - def _append(u, key, val): - u += "&{key}={val}".format(key=key, val=val) - return u - - url = _append(url, "Coverage", ",".join(self.coverage)) - - # Format could be customized - see response_format above - # For now we set to HDF5 - # url = _append(url, "format", self.response_format) - url = _append(url, "format", "HDF-EOS") - - if self.version: - url = _append(url, "version", self.version) - - if self.updated_since: - url = _append(url, "Updated_since", self.updated_since) - - # other parameters are included at eval time - return url - - @property - def coordinates(self): - if self.data is None: - _log.warning("No coordinates found in EGI source") - return Coordinates([], dims=[]) - - return Coordinates.from_xarray(self.data) - - def get_data(self, coordinates, coordinates_index): - if self.data is not None: - da = self.data.data[coordinates_index] - return da - else: - _log.warning("No data found in EGI source") - return np.array([]) - - def _eval(self, coordinates, output=None, _selector=None): - # download data for coordinate bounds, then handle that data as an H5PY node - zip_files = self._download(coordinates) - try: - self.data = self._read_zips(zip_files) # reads each file in zip archive and creates single dataarray - except KeyError as e: - print("This following error may occur if data_key, lat_key, or lon_key is not correct.") - print( - "This error may also occur if the specified area bounds are smaller than the dataset pixel size, in" - " which case EGI is returning no data." - ) - raise e - - # run normal eval once self.data is prepared - return super(EGI, self)._eval(coordinates, output=output, _selector=_selector) - - ########## - # Data I/O - ########## - def read_file(self, filelike): - """Interpret individual file from EGI zip archive. - - Parameters - ---------- - filelike : filelike - Reference to file inside EGI zip archive - - Returns - ------- - podpac.UnitsDataArray - - Raises - ------ - ValueError - """ - - raise NotImplementedError("read_file must be implemented for EGI DataSource") - - ## TODO: implement generic handler based on keys and dimensions - - # # load file - # hdf5_file = h5py.File(filelike) - - # # handle data - # data = hdf5_file[self.data_key] - # lat = hdf5_file[self.lat_key] if self.lat_key in hdf5_file else None - # lon = hdf5_file[self.lon_key] if self.lon_key in hdf5_file else None - # time = hdf5_file[self.time_key] if self.time_key in hdf5_file else None - - # # stacked coords - # if data.ndim == 2: - # c = Coordinates([(lat, lon), time], dims=['lat_lon', 'time']) - - # # gridded coords - # elif data.ndim == 3: - # c = Coordinates([lat, lon, time], dims=['lat', 'lon', 'time']) - # else: - # raise ValueError('Data must have either 2 or 3 dimensions') - - def append_file(self, all_data, data): - """Append new data - - Parameters - ---------- - all_data : podpac.UnitsDataArray - aggregated data - data : podpac.UnitsDataArray - new data to append - - Raises - ------ - NotImplementedError - """ - raise NotImplementedError() - - def _download(self, coordinates): - """ - Download data from EGI Interface within PODPAC coordinates - - Parameters - ---------- - coordinates : :class:`podpac.Coordinates` - PODPAC coordinates specifying spatial and temporal bounds - - Raises - ------ - ValueError - Error raised when no spatial or temporal bounds are provided - - Returns - ------- - zipfile.ZipFile - Returns zip file byte-str to downloaded data - """ - - # Ensure Coordinates are in decimal lat-lon - coordinates = coordinates.transform("epsg:4326") - self._authenticate() - - time_bounds = None - bbox = None - - if "time" in coordinates.udims: - time_bounds = [ - str(np.datetime64(bound, "s")) - for bound in coordinates["time"].bounds - if isinstance(bound, np.datetime64) - ] - if len(time_bounds) < 2: - raise ValueError("Time coordinates must be of type np.datetime64") - - if self.min_bounds_span != None and "time" in self.min_bounds_span: - time_span, time_unit = self.min_bounds_span["time"].split(",") - time_delta = np.timedelta64(int(time_span), time_unit) - time_bounds_dt = [np.datetime64(tb) for tb in time_bounds] - timediff = np.diff(time_bounds_dt) - if timediff < time_delta: - pad = (time_delta - timediff) / 2 - time_bounds = [str((time_bounds_dt[0] - pad)[0]), str((time_bounds_dt[1] + pad)[0])] - - if "lat" in coordinates.udims or "lon" in coordinates.udims: - lat = coordinates["lat"].bounds - lon = coordinates["lon"].bounds - if (self.min_bounds_span != None) and ("lat" in self.min_bounds_span) and ("lon" in self.min_bounds_span): - latdiff = np.diff(lat) - londiff = np.diff(lon) - if latdiff < self.min_bounds_span["lat"]: - pad = ((self.min_bounds_span["lat"] - latdiff) / 2)[0] - lat = [lat[0] - pad, lat[1] + pad] - - if londiff < self.min_bounds_span["lon"]: - pad = ((self.min_bounds_span["lon"] - londiff) / 2)[0] - lon = [lon[0] - pad, lon[1] + pad] - - bbox = "{},{},{},{}".format(lon[0], lat[0], lon[1], lat[1]) - - # TODO: do we actually want to limit an open query? - if time_bounds is None and bbox is None: - raise ValueError("No time or spatial coordinates requested") - - url = self.source - - if time_bounds is not None: - url += "&time={start_time},{end_time}".format(start_time=time_bounds[0], end_time=time_bounds[1]) - - if bbox is not None: - url += "&Bbox={bbox}".format(bbox=bbox) - - # admin parameters - url += "&token={token}&page_size={page_size}".format(token=self.token, page_size=self.page_size) - self._url = url # for debugging - - # iterate through pages to build up zipfiles containg data - return list(self._query_egi(url)) - - def _query_egi(self, url, page_num=1): - """Generator for getting zip files from EGI interface - - Parameters - ---------- - url : str - base url without page_num attached - page_num : int, optional - page_num to query - - Yields - ------ - zipfile.ZipFile - ZipFile of results from page - - Raises - ------ - ValueError - Raises value error if no granules available from EGI - """ - good_result = True - while good_result: - # create the full url - page_url = "{}&page_num={}".format(url, page_num) - _log.debug("Querying EGI url: {}".format(page_url)) - r = requests.get(page_url) - - if r.status_code != 200: - good_result = False - - # raise exception if the status is not 200 on the first page - if page_num == 1: - raise ValueError("Failed to download data from EGI Interface. EGI Reponse: {}".format(r.text)) - - # end iteration - elif r.status_code == 501 and "No granules returned by CMR" in r.text: - _log.debug("Last page returned from EGI Interface: {}".format(page_num - 1)) - - # not sure of response, so end iteration - else: - _log.warning("Page returned from EGI Interface with unknown response: {}".format(r.text)) - - else: - good_result = True - # most of the time, EGI returns a zip file - if ".zip" in r.headers["Content-Disposition"]: - # load content into file-like object and then read into zip file - f = BytesIO(r.content) - zip_file = zipfile.ZipFile(f) - - # if only one file exists, it will return the single file. This puts the single file in a zip archive - else: - filename = r.headers["Content-Disposition"].split('filename="')[1].replace('"', "") - f = BytesIO() - zip_file = zipfile.ZipFile(f, "w") - zip_file.writestr(filename, r.content) - - # yield the current zip file - yield zip_file - page_num += 1 - - def _read_zips(self, zip_files): - - all_data = None - _log.debug("Processing {} zip files from EGI response".format(len(zip_files))) - - for zip_file in zip_files: - for name in zip_file.namelist(): - if name.endswith("json"): - _log.debug("Ignoring file: {}".format(name)) - continue - - _log.debug("Reading file: {}".format(name)) - - # BytesIO - try: - bio = BytesIO(zip_file.read(name)) - except (zipfile.BadZipfile, EOFError) as e: - _log.warning(str(e)) - continue - - # read file - uda = self.read_file(bio) - - # TODO: this can likely be simpler and automated - if uda is not None: - if all_data is None: - all_data = uda.isel(lon=np.isfinite(uda.lon), lat=np.isfinite(uda.lat)) - else: - all_data = self.append_file(all_data, uda) - else: - _log.warning("No data returned from file: {}".format(name)) - - return all_data - - ###################################### - # Token and Authentication Handling # - ###################################### - def set_credentials(self, username=None, password=None): - """Shortcut to :func:`podpac.authentication.set_crendentials` using class member :attr:`self.hostname` for the hostname - - Parameters - ---------- - username : str, optional - Username to store in settings for `self.hostname`. - If no username is provided and the username does not already exist in the settings, - the user will be prompted to enter one. - password : str, optional - Password to store in settings for `self.hostname` - If no password is provided and the password does not already exist in the settings, - the user will be prompted to enter one. - """ - return authentication.set_credentials("urs.earthdata.nasa.gov", username=username, password=password) - - def _authenticate(self): - if self.token is None: - self.get_token() - - # if token's not valid, try getting a new token - if not self.token_valid(): - self.get_token() - - # if token is still not valid, throw error - if not self.token_valid(): - raise ValueError( - "Failed to get a valid token from EGI Interface. " - + "Try requesting a token manually using `self.get_token()`" - ) - - _log.debug("EGI Token valid") - - def token_valid(self): - """ - Validate EGI token set in :attr:`token` attribute of EGI Node - - Returns - ------- - Bool - True if token is valid, False if token is invalid - """ - r = requests.get("{base_url}?token={token}".format(base_url=self.base_url, token=self.token)) - - return r.status_code != 401 - - def get_token(self): - """ - Get token for EGI interface using Earthdata credentials - - Returns - ------- - str - Token for access to EGI interface - - Raises - ------ - ValueError - Raised if Earthdata username or password is unavailable - """ - # token access URL - url = "https://cmr.earthdata.nasa.gov/legacy-services/rest/tokens" - - if self.username is not None: - settings["username@urs.earthdata.nasa.gov"] = self.username - else: - raise ValueError("No Earthdata username available to request EGI token") - - if self.password is not None: - settings["password@urs.earthdata.nasa.gov"] = self.password - else: - raise ValueError("No Earthdata password available to request EGI token") - - _ip = self._get_ip() - request = """ - - {username} - {password} - podpac - {ip} - - """.format( - username=self.username, password=self.password, ip=_ip - ) - headers = {"Content-Type": "application/xml"} - r = requests.post(url, data=request, headers=headers) - - try: - tree = xml.etree.ElementTree.fromstring(r.text) - except ParseError: - _log.error("Failed to parse returned text from EGI interface: {}".format(r.text)) - return - - try: - token = [element.text for element in tree.findall("id")][0] - except IndexError: - _log.error("No token found in XML response from EGI: {}".format(r.text)) - return - - settings["token@urs.earthdata.nasa.gov"] = token - self.token = token - - def _get_ip(self): - """ - Utility to return a best guess at the IP address of the local machine. - Required by EGI authentication to get EGI token. - """ - s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) - try: - s.connect(("8.8.8.8", 80)) - ip = s.getsockname()[0] - except Exception: - ip = "127.0.0.1" - finally: - s.close() - - return ip - - @classmethod - def get_ui_spec(cls, help_as_html=False): - spec = super().get_ui_spec(help_as_html=help_as_html) - spec["attrs"]["username"] = {} - spec["attrs"]["password"] = {} - return spec diff --git a/podpac/datalib/gfs.py b/podpac/datalib/gfs.py deleted file mode 100644 index 2de2d4f31..000000000 --- a/podpac/datalib/gfs.py +++ /dev/null @@ -1,136 +0,0 @@ -from __future__ import division, unicode_literals, print_function, absolute_import - -import datetime - -import traitlets as tl -import numpy as np - -from lazy_import import lazy_module - -s3fs = lazy_module("s3fs") - -# Internal imports -from podpac.core.data.rasterio_source import RasterioRaw -from podpac.core.authentication import S3Mixin -from podpac.coordinates import Coordinates -from podpac.utils import cached_property, DiskCacheMixin -from podpac.compositor import TileCompositor - -BUCKET = "noaa-gfs-pds" - - -class GFSSourceRaw(DiskCacheMixin, RasterioRaw): - """Raw GFS data from S3 - - Attributes - ---------- - parameter : str - parameter, e.g. 'SOIM'. - level : str - depth, e.g. "0-10 m DPTH" - date : str - source date in '%Y%m%d' format, e.g. '20200130' - hour : str - source hour, e.g. '1200' - forecast : str - forecast time in hours from the source date and hour, e.g. '003' - """ - - parameter = tl.Unicode().tag(attr=True) - level = tl.Unicode().tag(attr=True) - date = tl.Unicode().tag(attr=True) - hour = tl.Unicode().tag(attr=True) - forecast = tl.Unicode().tag(attr=True) - - @property - def source(self): - return "s3://%s/%s/%s/%s/%s/%s" % (BUCKET, self.parameter, self.level, self.date, self.hour, self.forecast) - - -class GFS(S3Mixin, DiskCacheMixin, TileCompositor): - """Composited and interpolated GFS data from S3 - - Attributes - ---------- - parameter : str - parameter, e.g. 'SOIM'. - level : str - source depth, e.g. "0-10 m DPTH" - date : str - source date in '%Y%m%d' format, e.g. '20200130' - hour : str - source hour, e.g. '1200' - """ - - parameter = tl.Unicode().tag(attr=True, required=True) - level = tl.Unicode().tag(attr=True, required=True) - date = tl.Unicode().tag(attr=True, required=True) - hour = tl.Unicode().tag(attr=True, required=True) - - @property - def _repr_keys(self): - return ["parameter", "level", "date", "hour"] + super()._repr_keys - - @property - def prefix(self): - return "%s/%s/%s/%s/%s/" % (BUCKET, self.parameter, self.level, self.date, self.hour) - - @cached_property(use_cache_ctrl=True) - def forecasts(self): - return [path.replace(self.prefix, "") for path in self.s3.find(self.prefix)] - - @cached_property - def sources(self): - params = { - "parameter": self.parameter, - "level": self.level, - "date": self.date, - "hour": self.hour, - "cache_ctrl": self.cache_ctrl, - } - return np.array([GFSSourceRaw(forecast=forecast, **params) for forecast in self.forecasts]) - - @cached_property - def source_coordinates(self): - base_time = datetime.datetime.strptime("%s %s" % (self.date, self.hour), "%Y%m%d %H%M") - forecast_times = [base_time + datetime.timedelta(hours=int(h)) for h in self.forecasts] - return Coordinates( - [[dt.strftime("%Y-%m-%d %H:%M") for dt in forecast_times]], dims=["time"], validate_crs=False - ) - - -def GFSLatest(parameter=None, level=None, **kwargs): - """ - The latest composited and interpolated GFS data from S3 - - Arguments - --------- - parameter : str - parameter, e.g. 'SOIM'. - level : str - source depth, e.g. "0-10 m DPTH" - - Returns - ------- - node : GFS - GFS node with the latest forecast data available for the given parameter and level. - """ - - s3 = s3fs.S3FileSystem(anon=True) - - # get latest date - prefix = "%s/%s/%s/" % (BUCKET, parameter, level) - dates = [path.replace(prefix, "") for path in s3.ls(prefix)] - if not dates: - raise RuntimeError("No data found at '%s'" % prefix) - date = max(dates) - - # get latest hour - prefix = "%s/%s/%s/%s/" % (BUCKET, parameter, level, date) - hours = [path.replace(prefix, "") for path in s3.ls(prefix)] - if not hours: - raise RuntimeError("No data found at '%s'" % prefix) - hour = max(hours) - - # node - return GFS(parameter=parameter, level=level, date=date, hour=hour, **kwargs) diff --git a/podpac/datalib/intake_catalog.py b/podpac/datalib/intake_catalog.py deleted file mode 100644 index de56fee6e..000000000 --- a/podpac/datalib/intake_catalog.py +++ /dev/null @@ -1,206 +0,0 @@ -from __future__ import division, unicode_literals, print_function, absolute_import - -import logging -import datetime - -import traitlets as tl -import numpy as np - -# Helper utility for optional imports -from lazy_import import lazy_module - -# Internal imports -import podpac -from podpac import Coordinates -from podpac.utils import cached_property - -intake = lazy_module("intake") -# lazy_module('intake.catalog.local.LocalCatalogEntry') - - -class IntakeCatalog(podpac.data.DataSource): - """ - Support for Intake Catalogs (https://intake.readthedocs.io/en/latest/index.html) - This primarily supports CSV data sources while we expand for Intake Catalogs. - - Parameters - ---------- - uri : str, required - Intake Catalog uri (local path to catalog yml file, or remote uri) - See https://intake.readthedocs.io/en/latest/catalog.html#local-catalogs - source : str, required - Intake Catalog source - field : str, optional, - If source is a dataframe with multiple fields, this specifies the field to use for analysis.for - Can be defined in the metadata in the intake catalog source. - dims : dict, optional - Dictionary defining the coordinates dimensions in the intake catalog source. - Keys are the podpac dimensions (lat, lon, time, alt) in stacked or unstacked form. - Values are the identifiers which locate the coordinates in the datasource. - Can be defined in the metadata in the intake catalog source. - Examples: - {'lat': 'lat column', 'time': 'time column'} - {'lat_lon': ['lat column', 'lon column']} - {'time': 'time'} - crs : str, optional - Coordinate reference system of the coordinates. - Can be defined in the metadata in the intake catalog source. - query : str, optional - A pandas dataframe query which will sub-select the rows in the data. For example, self.source_data = self.datasource.read().query(self.query) - - - Attributes - ---------- - catalog : :class:`intake.catalog.Catalog` - Loaded intake catalog class - See https://intake.readthedocs.io/en/latest/api_base.html#intake.catalog.Catalog - dataset : :class:`intake.catalog.local.CatalogEntry` - Loaded intake catalog data source - See https://intake.readthedocs.io/en/latest/api_base.html#intake.catalog.entry.CatalogEntry - """ - - # input parameters - source = tl.Unicode().tag(attr=True, required=True) - uri = tl.Unicode().tag(attr=True, required=True) - - # optional input parameters - field = tl.Unicode(default_value=None, allow_none=True).tag(attr=True) - dims = tl.Dict(default_value=None, allow_none=True).tag(attr=True) - crs = tl.Unicode(default_value=None, allow_none=True).tag(attr=True) - query = tl.Unicode(default_value=None, allow_none=True).tag(attr=True) - - @cached_property - def catalog(self): - return intake.open_catalog(self.uri) - - @cached_property - def dataset(self): - return getattr(self.catalog, self.source) - - @cached_property - def source_data(self): - data = self.dataset.read() - if self.dataset.container == "dataframe" and self.query: - data = data.query(self.query) - return data - - # TODO: validators may not be necessary - - # @tl.validate('uri') - # def _validate_uri(self, proposed): - # p = proposed['value'] - # self.catalog = intake.open_catalog(p) - # self.dataset = getattr(self.catalog, self.source) - - # @tl.validate('source') - # def _validate_source(self, proposed): - # s = proposed['value'] - # self.dataset = getattr(self.catalog, s) - - @tl.validate("field") - def _validate_field(self, proposed): - f = proposed["value"] - - if self.dataset.container == "dataframe" and f is None: - raise ValueError("Field is required when source container is a dataframe") - - return f - - # # more strict checking - # if 'fields' not in self.dataset.metadata: - # raise ValueError('No fields defined in catalog metadata') - # if f not in self.dataset.metadata['fields'].keys(): - # raise ValueError('Field {} not defined in catalog'.format(f)) - - @tl.validate("dims") - def _validate_dims(self, proposed): - dims = proposed["value"] - - # TODO: this needs to be improved to expand validation - for dim in dims: - udims = dim.split("_") - if isinstance(dims[dim], list) and len(dims[dim]) != len(udims): - raise ValueError( - 'Native Coordinate dimension "{}" does not have an identifier defined'.format(dims[dim]) - ) - - return dims - - def get_coordinates(self): - """Get coordinates from catalog definition or input dims""" - - # look for dims in catalog - if self.dims is None: - if "dims" in self.dataset.metadata: - self.dims = self.dataset.metadata["dims"] - else: - raise ValueError("No coordinates dims defined in catalog or input") - - # look for crs in catalog - if self.crs is None: - if "crs" in self.dataset.metadata: - self.crs = self.dataset.metadata["crs"] - - source_data = self.source_data - c_data = [] - - # indentifiers are columns when container is a dataframe - if self.dataset.container == "dataframe": - for dim in self.dims: - c_data.append(source_data[self.dims[dim]].values) - - return Coordinates(c_data, dims=list(self.dims.keys())) - - ## TODO: this needs to be tested - elif self.dataset.container == "ndarray": - for dim in self.dims: - c_data.append(source_data[self.dims[dim]]) - - return Coordinates(c_data, dims=list(self.dims.keys())) - - else: - raise ValueError("podpac does not currently support dataset container {}".format(self.dataset.container)) - - def get_data(self, coordinates, coordinates_index): - """Get Data from intake catalog source definition""" - - data = self.source_data - - # dataframe container - if self.dataset.container == "dataframe": - - # look for field in catalog - if self.field is None: - if "field" in self.dataset.metadata: - self.field = self.dataset.metadata["field"] - else: - raise ValueError("No field defined in catalog or input") - - data = data[self.field] - - # create UnitDataArray with subselected data (idx) - uda = self.create_output_array(coordinates, data=data[coordinates_index]) - return uda - - -if __name__ == "__main__": - node = IntakeCatalog( - uri="../podpac-examples/notebooks/demos/intake/precip/catalog.yml", # path to catalog - source="southern_rockies", # name of the source within catalog - field="precip", # this can be defined in catalog source metadata - dims={"time": "time"}, # this can be defined in catalog source metadata - ) - - print("catalog") - print(node.catalog) - - print("dataset") - print(node.dataset) - - print("coordinates") - print(node.coordinates) - - print("eval") - print(node.eval(node.coordinates)) - - print("done") diff --git a/podpac/datalib/modis_pds.py b/podpac/datalib/modis_pds.py deleted file mode 100644 index 9b57c168c..000000000 --- a/podpac/datalib/modis_pds.py +++ /dev/null @@ -1,342 +0,0 @@ -""" -MODIS on AWS OpenData - -MODIS Coordinates Grids: https://modis-land.gsfc.nasa.gov/MODLAND_grid.html -""" - -import logging -import datetime - -import numpy as np -import traitlets as tl - -import podpac -from podpac.utils import cached_property -from podpac.compositor import TileCompositorRaw -from podpac.core.data.rasterio_source import RasterioRaw -from podpac.authentication import S3Mixin -from podpac.interpolators import InterpolationMixin - -_logger = logging.getLogger(__name__) - -BUCKET = "modis-pds" -PRODUCTS = ["MCD43A4.006", "MOD09GA.006", "MYD09GA.006", "MOD09GQ.006", "MYD09GQ.006"] -CRS = "+proj=sinu +lon_0=0 +x_0=0 +y_0=0 +R=6371007.181 +units=m +no_defs +type=crs" - -SINUSOIDAL_HORIZONTAL = { - "00": (-20014877.697641734, -18903390.490691263), - "01": (-18902927.177974734, -17791439.971025266), - "02": (-17790976.658308737, -16679489.451358264), - "03": (-16679026.138641736, -15567538.931691263), - "04": (-15567075.618974736, -14455588.412025262), - "05": (-14455125.099308735, -13343637.892358262), - "06": (-13343174.579641735, -12231687.372691263), - "07": (-12231224.059974736, -11119736.853025263), - "08": (-11119273.540308736, -10007786.333358264), - "09": (-10007323.020641735, -8895835.813691262), - "10": (-8895372.500974735, -7783885.294025263), - "11": (-7783421.981308736, -6671934.774358263), - "12": (-6671471.461641735, -5559984.254691264), - "13": (-5559520.941974737, -4448033.735025264), - "14": (-4447570.422308736, -3336083.215358263), - "15": (-3335619.902641736, -2224132.695691264), - "16": (-2223669.382974736, -1112182.176025264), - "17": (-1111718.863308736, -231.656358264), - "18": (231.656358264, 1111718.863308736), - "19": (1112182.176025264, 2223669.382974736), - "20": (2224132.695691264, 3335619.902641736), - "21": (3336083.215358264, 4447570.422308736), - "22": (4448033.735025263, 5559520.941974737), - "23": (5559984.254691265, 6671471.461641736), - "24": (6671934.774358264, 7783421.981308737), - "25": (7783885.294025264, 8895372.500974735), - "26": (8895835.813691264, 10007323.020641737), - "27": (10007786.333358264, 11119273.540308736), - "28": (11119736.853025265, 12231224.059974737), - "29": (12231687.372691264, 13343174.579641737), - "30": (13343637.892358264, 14455125.099308737), - "31": (14455588.412025264, 15567075.618974738), - "32": (15567538.931691265, 16679026.138641737), - "33": (16679489.451358264, 17790976.658308737), - "34": (17791439.971025266, 18902927.177974734), - "35": (18903390.490691263, 20014877.697641734), -} - -SINUSOIDAL_VERTICAL = { - "00": (10007323.020641735, 8895835.813691262), - "01": (8895372.500974735, 7783885.294025263), - "02": (7783421.981308736, 6671934.774358263), - "03": (6671471.461641735, 5559984.254691264), - "04": (5559520.941974737, 4448033.735025264), - "05": (4447570.422308736, 3336083.215358263), - "06": (3335619.902641736, 2224132.695691264), - "07": (2223669.382974736, 1112182.176025264), - "08": (1111718.863308736, 231.656358264), - "09": (-231.656358264, -1111718.863308736), - "10": (-1112182.176025264, -2223669.382974736), - "11": (-2224132.695691264, -3335619.902641736), - "12": (-3336083.215358264, -4447570.422308736), - "13": (-4448033.735025263, -5559520.941974737), - "14": (-5559984.254691265, -6671471.461641736), - "15": (-6671934.774358264, -7783421.981308737), - "16": (-7783885.294025264, -8895372.500974735), - "17": (-8895835.813691264, -10007323.020641737), -} - - -def _parse_modis_date(date): - return datetime.datetime.strptime(date, "%Y%j").strftime("%Y-%m-%d") - - -def _available(s3, *l): - prefix = "/".join([BUCKET] + list(l)) - return [obj.replace(prefix + "/", "") for obj in s3.ls(prefix) if "_scenes.txt" not in obj] - - -def get_tile_coordinates(h, v): - """use pre-fetched lat and lon bounds to get coordinates for a single tile""" - lat_start, lat_stop = SINUSOIDAL_VERTICAL[v] - lon_start, lon_stop = SINUSOIDAL_HORIZONTAL[h] - lat = podpac.clinspace(lat_start, lat_stop, 2400, name="lat") - lon = podpac.clinspace(lon_start, lon_stop, 2400, name="lon") - return podpac.Coordinates([lat, lon], crs=CRS) - - -class MODISSource(RasterioRaw): - """ - Individual MODIS data tile using AWS OpenData, with caching. - - Attributes - ---------- - product : str - MODIS product ('MCD43A4.006', 'MOD09GA.006', 'MYD09GA.006', 'MOD09GQ.006', or 'MYD09GQ.006') - horizontal : str - column in the MODIS Sinusoidal Tiling System, e.g. '21' - vertical : str - row in the MODIS Sinusoidal Tiling System, e.g. '07' - date : str - year and three-digit day of year, e.g. '2011260' - data_key : str - individual object (varies by product) - """ - - product = tl.Enum(values=PRODUCTS, help="MODIS product ID").tag(attr=True) - horizontal = tl.Unicode(help="column in the MODIS Sinusoidal Tiling System, e.g. '21'").tag(attr=True) - vertical = tl.Unicode(help="row in the MODIS Sinusoidal Tiling System, e.g. '07'").tag(attr=True) - date = tl.Unicode(help="year and three-digit day of year, e.g. '2011460'").tag(attr=True) - data_key = tl.Unicode(help="data to retrieve (varies by product)").tag(attr=True) - anon = tl.Bool(True) - check_exists = tl.Bool(True) - - _repr_keys = ["prefix", "data_key"] - - def init(self): - """validation""" - for key in ["horizontal", "vertical", "date", "data_key"]: - if not getattr(self, key): - raise ValueError("MODISSource '%s' required" % key) - if self.horizontal not in ["%02d" % h for h in range(36)]: - raise ValueError("MODISSource horizontal invalid ('%s' should be between '00' and '35')" % self.horizontal) - if self.vertical not in ["%02d" % v for v in range(36)]: - raise ValueError("MODISSource vertical invalid ('%s' should be between '00' and '17'" % self.vertical) - try: - _parse_modis_date(self.date) - except ValueError: - raise ValueError("MODISSource date invalid ('%s' should be year and doy, e.g. '2009260'" % self.date) - if self.check_exists and not self.exists: - raise ValueError("No S3 object found at '%s'" % self.source) - - @cached_property(use_cache_ctrl=True) - def filename(self): - _logger.info( - "Looking up source filename (product=%s, h=%s, v=%s, date=%s, data_key=%s)..." - % (self.product, self.horizontal, self.vertical, self.date, self.data_key) - ) - prefix = "/".join([BUCKET, self.product, self.horizontal, self.vertical, self.date]) - objs = [obj.replace(prefix + "/", "") for obj in self.s3.ls(prefix) if obj.endswith("%s.TIF" % self.data_key)] - if len(objs) == 0: - raise RuntimeError("No matches found for data_key='%s' at '%s'" % (self.data_key, prefix)) - if len(objs) > 1: - raise RuntimeError("Too many matches for data_key='%s' at '%s' (%s)" % (self.data_key, prefix, objs)) - return objs[0] - - @property - def prefix(self): - return "%s/%s/%s/%s" % (self.product, self.horizontal, self.vertical, self.date) - - @cached_property - def source(self): - return "s3://%s/%s/%s" % (BUCKET, self.prefix, self.filename) - - @cached_property - def exists(self): - return self.s3.exists(self.source) - - def get_coordinates(self): - # use pre-fetched coordinate bounds (instead of loading from the dataset) - spatial_coords = get_tile_coordinates(self.horizontal, self.vertical) - time_coords = podpac.Coordinates([_parse_modis_date(self.date)], ["time"], crs=spatial_coords.crs) - return podpac.coordinates.merge_dims([spatial_coords, time_coords]) - - -class MODISComposite(S3Mixin, TileCompositorRaw): - """MODIS whole-world compositor. - For documentation about the data, start here: https://ladsweb.modaps.eosdis.nasa.gov/search/order/1 - For information about the bands, see here: https://modis.gsfc.nasa.gov/about/specifications.php - - Attributes - ---------- - product : str - MODIS product ('MCD43A4.006', 'MOD09GA.006', 'MYD09GA.006', 'MOD09GQ.006', or 'MYD09GQ.006') - data_key : str - individual object (varies by product) - """ - - product = tl.Enum(values=PRODUCTS, help="MODIS product ID").tag(attr=True, required=True) - data_key = tl.Unicode(help="data to retrieve (varies by product)").tag(attr=True, required=True) - - tile_width = (1, 2400, 2400) - start_date = "2013-01-01" - end_date = datetime.date.today().strftime("%Y-%m-%d") - anon = tl.Bool(True) - - dims = ["time", "lat", "lon"] - - _repr_keys = ["product", "data_key"] - - @cached_property(use_cache_ctrl=True) - def tile_coordinates(self): - return [get_tile_coordinates(*hv) for hv in self.available_tiles] - - @cached_property(use_cache_ctrl=True) - def available_tiles(self): - _logger.info("Looking up available tiles...") - return [(h, v) for h in _available(self.s3, self.product) for v in _available(self.s3, self.product, h)] - - def select_sources(self, coordinates, _selector=None): - """2d select sources filtering""" - - # filter tiles spatially - ct = coordinates.transform(CRS) - tiles = [at for at, atc in zip(self.available_tiles, self.tile_coordinates) if ct.select(atc.bounds).size > 0] - sources = [] - for tile in tiles: - h, v = tile - available_dates = _available(self.s3, self.product, h, v) - dates = [_parse_modis_date(date) for date in available_dates] - date_coords = podpac.Coordinates([dates], dims=["time"]) - # Filter individual tiles temporally - if _selector is not None: - _, I = _selector(date_coords, ct, index_type="numpy") - else: - _, I = date_coords.intersect(ct, outer=True, return_index=True) - valid_dates = np.array(available_dates)[I] - valid_sources = [ - MODISSource( - product=self.product, - horizontal=h, - vertical=v, - date=date, - data_key=self.data_key, - check_exists=False, - cache_ctrl=self.cache_ctrl, - force_eval=self.force_eval, - cache_output=self.cache_output, - cache_dataset=True, - s3=self.s3, - ) - for date in valid_dates - ] - sources.extend(valid_sources) - self.set_trait("sources", sources) - return sources - - -class MODIS(InterpolationMixin, MODISComposite): - pass - - -if __name__ == "__main__": - from matplotlib import pyplot - - # ------------------------------------------------------------------------- - # basic modis source - # ------------------------------------------------------------------------- - - source = MODISSource( - product=PRODUCTS[0], - data_key="B01", - horizontal="01", - vertical="11", - date="2020009", - cache_ctrl=["disk"], - cache_dataset=True, - cache_output=False, - ) - - print("source: %s" % repr(source)) - print("path: %s" % source.source) - print("coordinates: %s", source.coordinates) - - # native coordinates - o1 = source.eval(source.coordinates) - - # cropped and resampled using EPSG:4326 coordinates - c = podpac.Coordinates([podpac.clinspace(-22, -20, 200), podpac.clinspace(-176, -174, 200)], dims=["lat", "lon"]) - o2 = source.eval(c) - - # ------------------------------------------------------------------------- - # modis tile with time - # ------------------------------------------------------------------------- - - tile = MODISTile( - product=PRODUCTS[0], data_key="B01", horizontal="01", vertical="11", cache_ctrl=["disk"], cache_output=False - ) - - print("tile: %s" % repr(tile)) - print( - "available dates: %s-%s (n=%d)" % (tile.available_dates[0], tile.available_dates[-1], len(tile.available_dates)) - ) - print("coordinates: %s" % tile.coordinates) - - # existing date - assert "2020009" in tile.available_dates - ct1 = podpac.Coordinates(["2020-01-09", c["lat"], c["lon"]], dims=["time", "lat", "lon"]) - o2 = tile.eval(ct1) - - # nearest date - assert "2020087" not in tile.available_dates - ct2 = podpac.Coordinates(["2020-03-27", c["lat"], c["lon"]], dims=["time", "lat", "lon"]) - o3 = tile.eval(ct2) - - # time-series - ct3 = podpac.Coordinates([["2019-01-01", "2019-02-01", "2019-03-01"], -21.45, -174.92], dims=["time", "lat", "lon"]) - o4 = tile.eval(ct3) - - # ------------------------------------------------------------------------- - # modis compositor - # ------------------------------------------------------------------------- - - node = MODIS(product=PRODUCTS[0], data_key="B01", cache_ctrl=["disk"], cache_output=False) - - print("node: %s" % repr(node)) - print("sources: n=%d" % len(node.sources)) - print(" .e.g: %s" % repr(node.sources[0])) - - # single tile - assert len(node.select_sources(ct2)) == 1 - o5 = node.eval(ct2) - - # time-series in a single tile - assert len(node.select_sources(ct3)) == 1 - o6 = node.eval(ct3) - - # multiple tiles - ct3 = podpac.Coordinates( - ["2020-01-09", podpac.clinspace(45, 55, 200), podpac.clinspace(-80, -40, 200)], dims=["time", "lat", "lon"] - ) - assert len(node.select_sources(ct3)) == 7 - o7 = node.eval(ct3) - - # o7.plot() - # pyplot.show() diff --git a/podpac/datalib/nasaCMR.py b/podpac/datalib/nasaCMR.py deleted file mode 100644 index 88173421e..000000000 --- a/podpac/datalib/nasaCMR.py +++ /dev/null @@ -1,186 +0,0 @@ -""" -Search using NASA CMR -""" - -from __future__ import division, unicode_literals, print_function, absolute_import -import json -import logging - -import requests -import numpy as np - -_logger = logging.getLogger(__name__) - -from podpac.core.utils import _get_from_url - -CMR_URL = r"https://cmr.earthdata.nasa.gov/search/" - - -def get_collection_entries(session=None, short_name=None, keyword=None, **kwargs): - """Uses NASA CMR to retrieve metadata about a collection - - Parameters - ----------- - session: :class:`requets.Session`, optional - An authenticated Earthdata login session - short_name: str, optional - The short name of the dataset - keyword: str, optional - Any keyword search parameters - **kwargs: str, optional - Any additional query parameters - - Returns - --------- - list: - A list of collection metadata dictionaries - - Examples: - ----------- - >>> # This make the following request https://cmr.earthdata.nasa.gov/search/collections.json?short_name=SPL2SMAP_S - >>> get_collection_id(short_name='SPL2SMAP_S') - ['C1522341104-NSIDC_ECS'] - """ - - base_url = CMR_URL + "collections.json?" - if short_name is not None: - kwargs["short_name"] = short_name - if keyword is not None: - kwargs["keyword"] = keyword - - query_string = "&".join([k + "=" + v for k, v in kwargs.items()]) - - # use generic requests session if `session` is not defined - if session is None: - session = requests - - pydict = _get_from_url(base_url + query_string, session).json() - - entries = pydict["feed"]["entry"] - - return entries - - -def get_collection_id(session=None, short_name=None, keyword=None, **kwargs): - """Uses NASA CMR to retrieve collection id - - Parameters - ----------- - session: :class:`requets.Session`, optional - An authenticated Earthdata login session - short_name: str, optional - The short name of the dataset - keyword: str, optional - Any keyword search parameters - **kwargs: str, optional - Any additional query parameters - - Returns - --------- - list - A list of collection id's (ideally only one) - - Examples: - ----------- - >>> # This make the following request https://cmr.earthdata.nasa.gov/search/collections.json?short_name=SPL2SMAP_S - >>> get_collection_id(short_name='SPL2SMAP_S') - ['C1522341104-NSIDC_ECS'] - """ - - entries = get_collection_entries(session=session, short_name=short_name, keyword=keyword, **kwargs) - if len(entries) > 1: - _logger.warning("Found more than 1 entry for collection_id search") - - collection_id = [e["id"] for e in entries] - - return collection_id - - -def search_granule_json(session=None, entry_map=None, **kwargs): - """Search for specific files from NASA CMR for a particular collection - - Parameters - ----------- - session: :class:`requets.Session`, optional - An authenticated Earthdata login session - entry_map: function - A function applied to each individual entry. Could be used to filter out certain data in an entry - **kwargs: dict - Additional query string parameters. - At minimum the provider, provider_id, concept_id, collection_concept_id, short_name, version, or entry_title - need to be provided for a granule search. - - Returns - --------- - list - Entries for each granule in the collection based on the search terms - """ - base_url = CMR_URL + "granules.json?" - - if not np.any( - [ - m not in kwargs - for m in [ - "provider", - "provider_id", - "concept_id", - "collection_concept_id", - "short_name", - "version", - "entry_title", - ] - ] - ): - raise ValueError( - "Need to provide either" - " provider, provider_id, concept_id, collection_concept_id, short_name, version or entry_title" - " for granule search." - ) - - if "page_size" not in kwargs: - kwargs["page_size"] = "2000" - - if entry_map is None: - entry_map = lambda x: x - - query_string = "&".join([k + "=" + str(v) for k, v in kwargs.items()]) - - if session is None: - session = requests - - url = base_url + query_string - if "page_num" not in kwargs: - entries = _get_all_granule_pages(session, url, entry_map) - else: - pydict = _get_from_url(url, session).json() - entries = list(map(entry_map, pydict["feed"]["entry"])) - - return entries - - -def _get_all_granule_pages(session, url, entry_map, max_paging_depth=1000000): - """Helper function for searching through all pages for a collection. - - Parameters - ----------- - session: :class:`requets.Session`, optional - An authenticated Earthdata login session - url: str - URL to website - entry_map: function - Function for mapping the entries to a desired format - max_paging_depth - """ - page_size = int([q for q in url.split("?")[1].split("&") if "page_size" in q][0].split("=")[1]) - max_pages = int(max_paging_depth / page_size) - - pydict = _get_from_url(url, session).json() - entries = list(map(entry_map, pydict["feed"]["entry"])) - - for i in range(1, max_pages): - page_url = url + "&page_num=%d" % (i + 1) - page_entries = _get_from_url(page_url, session).json()["feed"]["entry"] - if not page_entries: - break - entries.extend(list(map(entry_map, page_entries))) - return entries diff --git a/podpac/datalib/nsidc_smap_opendap_url.txt b/podpac/datalib/nsidc_smap_opendap_url.txt deleted file mode 100644 index e8c5b95e7..000000000 --- a/podpac/datalib/nsidc_smap_opendap_url.txt +++ /dev/null @@ -1 +0,0 @@ -https://n5eil02u.ecs.nsidc.org/opendap/SMAP \ No newline at end of file diff --git a/podpac/datalib/satutils.py b/podpac/datalib/satutils.py deleted file mode 100644 index 756876974..000000000 --- a/podpac/datalib/satutils.py +++ /dev/null @@ -1,295 +0,0 @@ -""" -Satellite data access using sat-utils (https://github.com/sat-utils) developed by Development Seed - -Supports access to: - -- Landsat 8 on AWS OpenData: https://registry.opendata.aws/landsat-8/ -- Sentinel 2 -""" - -import logging -import datetime -import os - -import numpy as np -import traitlets as tl -from lazy_import import lazy_module - -satsearch = lazy_module("satsearch") - -# Internal dependencies -import podpac -from podpac.compositor import TileCompositor -from podpac.core.data.rasterio_source import RasterioRaw -from podpac.core.units import UnitsDataArray -from podpac.authentication import S3Mixin -from podpac import settings - -_logger = logging.getLogger(__name__) - - -def _get_asset_info(item, name): - """for forwards/backwards compatibility, convert B0x to/from Bx as needed""" - - if name in item.assets: - return item.assets[name] - elif name.replace("B", "B0") in item.assets: - # Bx -> B0x - return item.assets[name.replace("B", "B0")] - elif name.replace("B0", "B") in item.assets: - # B0x -> Bx - return item.assets[name.replace("B0", "B")] - else: - available = [key for key in item.assets.keys() if key not in ["thumbnail", "overview", "info", "metadata"]] - raise KeyError("asset '%s' not found. Available assets: %s" % (name, avaialable)) - - -def _get_s3_url(item, asset_name): - """convert to s3:// urls - href: https://landsat-pds.s3.us-west-2.amazonaws.com/c1/L8/034/033/LC08_L1TP_034033_20201209_20201218_01_T1/LC08_L1TP_034033_20201209_20201218_01_T1_B2.TIF - url: s3://landsat-pds/c1/L8/034/033/LC08_L1TP_034033_20201209_20201218_01_T1/LC08_L1TP_034033_20201209_20201218_01_T1_B2.TIF - """ - - info = _get_asset_info(item, asset_name) - - if info["href"].startswith("s3://"): - return info["href"] - - elif info["href"].startswith("https://"): - root, key = info["href"][8:].split("/", 1) - bucket = root.split(".")[0] - return "s3://%s/%s" % (bucket, key) - - else: - raise ValueError("Could not parse satutils asset href '%s'" % info["href"]) - - -class SatUtilsSource(RasterioRaw): - date = tl.Unicode(help="item.properties.datetime from sat-utils item").tag(attr=True) - - def get_coordinates(self): - # get spatial coordinates from rasterio over s3 - spatial_coordinates = super(SatUtilsSource, self).get_coordinates() - time = podpac.Coordinates([self.date], dims=["time"], crs=spatial_coordinates.crs) - return podpac.coordinates.merge_dims([spatial_coordinates, time]) - - -class SatUtils(S3Mixin, TileCompositor): - """ - PODPAC DataSource node to access the data using sat-utils developed by Development Seed - See https://github.com/sat-utils - - See :class:`podpac.compositor.OrderedCompositor` for more information. - - Parameters - ---------- - collection : str, optional - Specifies the collection for satellite data. - Options include "landsat-8-l1", "sentinel-2-l1c". - Defaults to all collections. - query : dict, optional - Dictionary of properties to query on, supports eq, lt, gt, lte, gte - Passed through to the sat-search module. - See https://github.com/sat-utils/sat-search/blob/master/tutorial-1.ipynb - Defaults to None - asset : str, optional - Asset to download from the satellite image. - The asset must be a band name or a common extension name, see https://github.com/radiantearth/stac-spec/tree/master/extensions/eo - See also the Assets section of this tutorial: https://github.com/sat-utils/sat-stac/blob/master/tutorial-2.ipynb - Defaults to "B3" (green) - min_bounds_span : dict, optional - Default is {}. When specified, gives the minimum bounds that will be used for a coordinate in the query, so - it works properly. If a user specified a lat, lon point, the query may fail since the min/max values for - lat/lon are the same. When specified, these bounds will be padded by the following for latitude (as an example): - [lat - min_bounds_span['lat'] / 2, lat + min_bounds_span['lat'] / 2] - """ - - stac_api_url = tl.Unicode().tag(attr=True) - collection = tl.Unicode(default_value=None, allow_none=True).tag(attr=True) - asset = tl.Unicode().tag(attr=True) - query = tl.Dict(default_value=None, allow_none=True).tag(attr=True) - anon = tl.Bool(default_value=False).tag(attr=True) - min_bounds_span = tl.Dict(allow_none=True).tag(attr=True) - - @tl.default("interpolation") - def _default_interpolation(self): - # this default interpolation enables NN interpolation without having to expand past the bounds of the query - # we're relying on satutils to give us the nearest neighboring tile here. - return {"method": "nearest", "params": {"respect_bounds": False}} - - @tl.default("stac_api_url") - def _get_stac_api_url_from_env(self): - if "STAC_API_URL" not in os.environ: - raise TypeError( - "STAC endpoint required. Please define the SatUtils 'stac_api_url' or 'STAC_API_URL' environmental variable" - ) - - return os.environ - - def select_sources(self, coordinates, _selector=None): - result = self.search(coordinates) - - if result.found() == 0: - _logger.warning( - "Sat Utils did not find any items for collection {}. Ensure that sat-stac is installed, or try with a different set of coordinates (self.search(coordinates)).".format( - self.collection - ) - ) - return [] - - return [ - SatUtilsSource(source=_get_s3_url(item, self.asset), date=item.properties["datetime"], anon=self.anon) - for item in result.items() - ] - - def search(self, coordinates): - """ - Query data from sat-utils interface within PODPAC coordinates - - Parameters - ---------- - coordinates : :class:`podpac.Coordinates` - PODPAC coordinates specifying spatial and temporal bounds - - Raises - ------ - ValueError - Error raised when no spatial or temporal bounds are provided - - Returns - ------- - search : :class:`satsearch.search.Search` - Results form sat-search - """ - - # Ensure Coordinates are in decimal lat-lon - coordinates = coordinates.transform("epsg:4326") - - time_bounds = None - if "time" in coordinates.udims: - time_bounds = [ - str(np.datetime64(bound, "s")) - for bound in coordinates["time"].bounds - if isinstance(bound, np.datetime64) - ] - if len(time_bounds) < 2: - raise ValueError("Time coordinates must be of type np.datetime64") - - if self.min_bounds_span != None and "time" in self.min_bounds_span: - time_span, time_unit = self.min_bounds_span["time"].split(",") - time_delta = np.timedelta64(int(time_span), time_unit) - time_bounds_dt = [np.datetime64(tb) for tb in time_bounds] - timediff = np.diff(time_bounds_dt) - if timediff < time_delta: - pad = (time_delta - timediff) / 2 - time_bounds = [str((time_bounds_dt[0] - pad)[0]), str((time_bounds_dt[1] + pad)[0])] - - bbox = None - if "lat" in coordinates.udims or "lon" in coordinates.udims: - lat = coordinates["lat"].bounds - lon = coordinates["lon"].bounds - if (self.min_bounds_span != None) and ("lat" in self.min_bounds_span) and ("lon" in self.min_bounds_span): - latdiff = np.diff(lat) - londiff = np.diff(lon) - if latdiff < self.min_bounds_span["lat"]: - pad = ((self.min_bounds_span["lat"] - latdiff) / 2)[0] - lat = [lat[0] - pad, lat[1] + pad] - - if londiff < self.min_bounds_span["lon"]: - pad = ((self.min_bounds_span["lon"] - londiff) / 2)[0] - lon = [lon[0] - pad, lon[1] + pad] - - bbox = [lon[0], lat[0], lon[1], lat[1]] - - # TODO: do we actually want to limit an open query? - if time_bounds is None and bbox is None: - raise ValueError("No time or spatial coordinates requested") - - # search dict - search_kwargs = {} - - search_kwargs["url"] = self.stac_api_url - - if time_bounds is not None: - search_kwargs["datetime"] = "{start_time}/{end_time}".format( - start_time=time_bounds[0], end_time=time_bounds[1] - ) - - if bbox is not None: - search_kwargs["bbox"] = bbox - - if self.query is not None: - search_kwargs["query"] = self.query - else: - search_kwargs["query"] = {} - - if self.collection is not None: - search_kwargs["collections"] = [self.collection] - - # search with sat-search - _logger.debug("sat-search searching with {}".format(search_kwargs)) - search = satsearch.Search(**search_kwargs) - _logger.debug("sat-search found {} items".format(search.found())) - - return search - - -class Landsat8(SatUtils): - """ - Landsat 8 on AWS OpenData - https://registry.opendata.aws/landsat-8/ - - Leverages sat-utils (https://github.com/sat-utils) developed by Development Seed - - Parameters - ---------- - asset : str, optional - Asset to download from the satellite image. - For Landsat8, this includes: 'B01','B02','B03','B04','B05','B06','B07','B08','B09','B10','B11','B12' - The asset must be a band name or a common extension name, see https://github.com/radiantearth/stac-spec/tree/master/extensions/eo - See also the Assets section of this tutorial: https://github.com/sat-utils/sat-stac/blob/master/tutorial-2.ipynb - query : dict, optional - Dictionary of properties to query on, supports eq, lt, gt, lte, gte - Passed through to the sat-search module. - See https://github.com/sat-utils/sat-search/blob/master/tutorial-1.ipynb - Defaults to None - min_bounds_span : dict, optional - Default is {}. When specified, gives the minimum bounds that will be used for a coordinate in the query, so - it works properly. If a user specified a lat, lon point, the query may fail since the min/max values for - lat/lon are the same. When specified, these bounds will be padded by the following for latitude (as an example): - [lat - min_bounds_span['lat'] / 2, lat + min_bounds_span['lat'] / 2] - """ - - collection = "landsat-8-l1-c1" - anon = True - - -class Sentinel2(SatUtils): - """ - Sentinel 2 on AWS OpenData - https://registry.opendata.aws/sentinel-2/ - - Leverages sat-utils (https://github.com/sat-utils) developed by Development Seed. - - Note this data source requires the requester to pay, so you must set podpac settings["AWS_REQUESTER_PAYS"] = True - - Parameters - ---------- - asset : str, optional - Asset to download from the satellite image. - For Sentinel2, this includes: 'tki','B01','B02','B03','B04','B05','B06','B07','B08','B8A','B09','B10','B11','B12 - The asset must be a band name or a common extension name, see https://github.com/radiantearth/stac-spec/tree/master/extensions/eo - See also the Assets section of this tutorial: https://github.com/sat-utils/sat-stac/blob/master/tutorial-2.ipynb - query : dict, optional - Dictionary of properties to query on, supports eq, lt, gt, lte, gte - Passed through to the sat-search module. - See https://github.com/sat-utils/sat-search/blob/master/tutorial-1.ipynb - Defaults to None - min_bounds_span : dict, optional - Default is {}. When specified, gives the minimum bounds that will be used for a coordinate in the query, so - it works properly. If a user specified a lat, lon point, the query may fail since the min/max values for - lat/lon are the same. When specified, these bounds will be padded by the following for latitude (as an example): - [lat - min_bounds_span['lat'] / 2, lat + min_bounds_span['lat'] / 2] - """ - - collection = "sentinel-s2-l1c" diff --git a/podpac/datalib/smap_egi.py b/podpac/datalib/smap_egi.py deleted file mode 100644 index 53fe8475f..000000000 --- a/podpac/datalib/smap_egi.py +++ /dev/null @@ -1,309 +0,0 @@ -""" -PODPAC Nodes to access SMAP data via EGI Interface -""" - -from __future__ import division, unicode_literals, print_function, absolute_import - -import os -import copy -import logging -from datetime import datetime - -import requests -from six import string_types -import numpy as np -import xarray as xr -import traitlets as tl - -from podpac.datalib import nasaCMR - -# Set up logging -_log = logging.getLogger(__name__) - -# Helper utility for optional imports -from lazy_import import lazy_module, lazy_class - -h5py = lazy_module("h5py") -lazy_class("h5py.File") - -# fixing problem with older versions of numpy -if not hasattr(np, "isnat"): - - def isnat(a): - return a.astype(str) == "None" - - np.isnat = isnat - -# Internal dependencies -from podpac import Coordinates, UnitsDataArray, cached_property -from podpac.datalib import EGI - -BASE_URL = "https://n5eil01u.ecs.nsidc.org/egi/request" - -SMAP_PRODUCT_DICT = { - #'shortname': ['lat_key', 'lon_key', '_data_key', 'quality_flag', 'default_verison'] - "SPL4SMAU": ["/x", "/y", "/Analysis_Data/sm_surface_analysis", None, None], - "SPL4SMGP": ["/x", "/y", "/Geophysical_Data/sm_surface", None, 4], - "SPL4SMLM": ["/x", "/y", "/Land_Model_Constants_Data", None, 4], - "SPL3SMAP": [ - "/Soil_Moisture_Retrieval_Data/latitude", - "/Soil_Moisture_Retrieval_Data/longitude", - "/Soil_Moisture_Retrieval_Data/soil_moisture", - "/Soil_Moisture_Retrieval_Data/retrieval_qual_flag", - "003", - ], - "SPL3SMA": [ - "/Soil_Moisture_Retrieval_Data/latitude", - "/Soil_Moisture_Retrieval_Data/longitude", - "/Soil_Moisture_Retrieval_Data/soil_moisture", - "/Soil_Moisture_Retrieval_Data/retrieval_qual_flag", - "003", - ], - "SPL3SMP_AM": [ - "/Soil_Moisture_Retrieval_Data_AM/latitude", - "/Soil_Moisture_Retrieval_Data_AM/longitude", - "/Soil_Moisture_Retrieval_Data_AM/soil_moisture", - "/Soil_Moisture_Retrieval_Data_AM/retrieval_qual_flag", - "005", - ], - "SPL3SMP_PM": [ - "/Soil_Moisture_Retrieval_Data_PM/latitude", - "/Soil_Moisture_Retrieval_Data_PM/longitude", - "/Soil_Moisture_Retrieval_Data_PM/soil_moisture_pm", - "/Soil_Moisture_Retrieval_Data_PM/retrieval_qual_flag_pm", - "005", - ], - "SPL3SMP_E_AM": [ - "/Soil_Moisture_Retrieval_Data_AM/latitude", - "/Soil_Moisture_Retrieval_Data_AM/longitude", - "/Soil_Moisture_Retrieval_Data_AM/soil_moisture", - "/Soil_Moisture_Retrieval_Data_AM/retrieval_qual_flag", - "004", - ], - "SPL3SMP_E_PM": [ - "/Soil_Moisture_Retrieval_Data_PM/latitude_pm", - "/Soil_Moisture_Retrieval_Data_PM/longitude_pm", - "/Soil_Moisture_Retrieval_Data_PM/soil_moisture_pm", - "/Soil_Moisture_Retrieval_Data_PM/retrieval_qual_flag_pm", - "004", - ], -} - -SMAP_PRODUCTS = list(SMAP_PRODUCT_DICT.keys()) - - -class SMAP(EGI): - """ - SMAP Node. For more information about SMAP, see https://nsidc.org/data/smap - - SMAP interface using the EGI Data Portal - https://developer.earthdata.nasa.gov/sdps/programmatic-access-docs - with the base URL: https://n5eil01u.ecs.nsidc.org/egi/request - - To access data from this node, an Earthdata login is required. This can either be specified when - creating the node: - ```python - smap = SMAP(username="your_user_name", password="your_password") - ``` - OR you can set the following PODPAC settings: - ```python - podpac.settings["username@urs.earthdata.nasa.gov"] = "your_user_name" - podpac.settings["password@urs.earthdata.nasa.gov"] = "your_password" - podpac.settings.save() # To have this information persist - smap = SMAP() - ``` - - Parameters - ---------- - product : str - One of the :list:`SMAP_PRODUCTS` strings - check_quality_flags : bool, optional - Default is True. If True, data will be filtered based on the SMAP data quality flag, and only - high quality data is returned. - data_key : str, optional - Default will return soil moisture and is set automatically based on the product selected. Other - possible data keys can be found - - Attributes - ---------- - nan_vals : list - Nan values in SMAP data - username : str, optional - Earthdata username (https://urs.earthdata.nasa.gov/) - If undefined, node will look for a username under setting key "username@urs.earthdata.nasa.gov" - password : str, optional - Earthdata password (https://urs.earthdata.nasa.gov/) - If undefined, node will look for a password under setting key "password@urs.earthdata.nasa.gov" - """ - - product = tl.Enum(SMAP_PRODUCTS, default_value="SPL4SMAU").tag(attr=True) - nan_vals = [-9999.0] - min_bounds_span = tl.Dict(default_value={"lon": 0.3, "lat": 0.3, "time": "3,h"}).tag(attr=True) - check_quality_flags = tl.Bool(True).tag(attr=True, default=True) - quality_flag_key = tl.Unicode(allow_none=True).tag(attr=True) - data_key = tl.Unicode(allow_none=True, default_value=None).tag(attr=True) - base_url = tl.Unicode(default_value=BASE_URL).tag(attr=True) - - @property - def short_name(self): - if "SPL3SMP" in self.product: - return self.product.replace("_AM", "").replace("_PM", "") - else: - return self.product - - # pull _data_key, lat_key, lon_key, and version from product dict - @cached_property - def _product_data(self): - return SMAP_PRODUCT_DICT[self.product] - - @property - def udims(self): - return ["lat", "lon", "time"] - - @property - def lat_key(self): - return self._product_data[0] - - @property - def lon_key(self): - return self._product_data[1] - - @property - def _data_key(self): - if self.data_key is None: - return self._product_data[2] - else: - return self.data_key - - @property - def quality_flag_key(self): - return self._product_data[3] - - @property - def version(self): - try: - return nasaCMR.get_collection_entries(short_name=self.product)[-1]["version_id"] - except: - _log.warning("Could not automatically retrieve newest product version id from NASA CMR.") - return self._product_data[4] - - @property - def coverage(self): - if self.quality_flag_key: - return (self._data_key, self.quality_flag_key, self.lat_key, self.lon_key) - else: - return (self._data_key, self.lat_key, self.lon_key) - - def read_file(self, filelike): - """Interpret individual SMAP file from EGI zip archive. - - Parameters - ---------- - filelike : filelike - Reference to file inside EGI zip archive - - Returns - ------- - podpac.UnitsDataArray - - Raises - ------ - ValueError - """ - ds = h5py.File(filelike, "r") - - # handle data - data = ds[self._data_key][()] - - if self.check_quality_flags and self.quality_flag_key: - flag = ds[self.quality_flag_key][()] - flag = flag > 0 - [flag] == np.nan - - data = np.array([data]) # add extra dimension for time slice - - # handle time - if "SPL3" in self.product: - # TODO: make this py2.7 compatible - # take the midpoint between the range identified in the file - t_start = np.datetime64(ds["Metadata/Extent"].attrs["rangeBeginningDateTime"].replace("Z", "")) - t_end = np.datetime64(ds["Metadata/Extent"].attrs["rangeEndingDateTime"].replace("Z", "")) - time = np.array([t_start + (t_end - t_start) / 2]) - time = time.astype("datetime64[D]") - - elif "SPL4" in self.product: - time_unit = ds["time"].attrs["units"].decode() - time = xr.coding.times.decode_cf_datetime(ds["time"][()][0], units=time_unit) - time = time.astype("datetime64[h]") - - # handle spatial coordinates - if "SPL3" in self.product: - - # take nan mean along each axis - lons = ds[self.lon_key][()] - lats = ds[self.lat_key][()] - lons[lons == self.nan_vals[0]] = np.nan - lats[lats == self.nan_vals[0]] = np.nan - - # short-circuit if all lat/lon are non - if np.all(np.isnan(lats)) and np.all(np.isnan(lons)): - return None - - # make podpac coordinates - lon = np.nanmean(lons, axis=0) - lat = np.nanmean(lats, axis=1) - c = Coordinates([time, lat, lon], dims=["time", "lat", "lon"]) - - elif "SPL4" in self.product: - # lat/lon coordinates in EPSG:6933 (https://epsg.io/6933) - lon = ds["x"][()] - lat = ds["y"][()] - - # short-circuit if all lat/lon are nan - if np.all(np.isnan(lat)) and np.all(np.isnan(lon)): - return None - - c = Coordinates([time, lat, lon], dims=["time", "lat", "lon"], crs="epsg:6933") - - # make units data array with coordinates and data - return UnitsDataArray.create(c, data=data) - - def append_file(self, all_data, data): - """Append data - - Parameters - ---------- - all_data : podpac.UnitsDataArray - aggregated data - data : podpac.UnitsDataArray - new data to append - - Raises - ------ - NotImplementedError - """ - if all_data.shape[1:] == data.shape[1:]: - data.lat.data[:] = all_data.lat.data - data.lon.data[:] = all_data.lon.data - else: - # select only data with finite coordinates - data = data.isel(lon=np.isfinite(data.lon), lat=np.isfinite(data.lat)) - - # select lat based on the old data - lat = all_data.lat.sel(lat=data.lat, method="nearest") - - # When the difference between old and new coordintaes are large, it means there are new coordinates - Ilat = (np.abs(lat.data - data.lat) > 1e-3).data - # Use the new data's coordinates for the new coordinates - lat.data[Ilat] = data.lat.data[Ilat] - - # Repeat for lon - lon = all_data.lon.sel(lon=data.lon, method="nearest") - Ilon = (np.abs(lon.data - data.lon) > 1e-3).data - lon.data[Ilon] = data.lon.data[Ilon] - - # Assign to data - data.lon.data[:] = lon.data - data.lat.data[:] = lat.data - - return all_data.combine_first(data) diff --git a/podpac/datalib/soilgrids.py b/podpac/datalib/soilgrids.py deleted file mode 100644 index 2b12659aa..000000000 --- a/podpac/datalib/soilgrids.py +++ /dev/null @@ -1,119 +0,0 @@ -""" -SoilGrids - -See: https://maps.isric.org/ -""" - - -from podpac.data import WCS - - -class SoilGridsBase(WCS): - """Base SoilGrids WCS datasource.""" - - format = "geotiff_byte" - max_size = 16384 - _repr_keys = ["layer"] - - -class SoilGridsWRB(SoilGridsBase): - """SoilGrids: WRB classes and probabilities (WCS)""" - - source = "https://maps.isric.org/mapserv?map=/map/wrb.map" - - -class SoilGridsBDOD(SoilGridsBase): - """SoilGrids: Bulk density (WCS)""" - - source = "https://maps.isric.org/mapserv?map=/map/bdod.map" - - -class SoilGridsCEC(SoilGridsBase): - """SoilGrids: Cation exchange capacity and ph 7 (WCS)""" - - source = "https://maps.isric.org/mapserv?map=/map/cec.map" - - -class SoilGridsCFVO(SoilGridsBase): - """SoilGrids: Coarse fragments volumetric (WCS)""" - - source = "https://maps.isric.org/mapserv?map=/map/cfvo.map" - - -class SoilGridsClay(SoilGridsBase): - """SoilGrids: Clay content (WCS)""" - - source = "https://maps.isric.org/mapserv?map=/map/clay.map" - - -class SoilGridsNitrogen(SoilGridsBase): - """SoilGrids: Nitrogen (WCS)""" - - source = "https://maps.isric.org/mapserv?map=/map/nitrogen.map" - - -class SoilGridsPHH2O(SoilGridsBase): - """SoilGrids: Soil pH in H2O (WCS)""" - - source = "https://maps.isric.org/mapserv?map=/map/phh2o.map" - - -class SoilGridsSand(SoilGridsBase): - """SoilGrids: Sand content (WCS)""" - - source = "https://maps.isric.org/mapserv?map=/map/sand.map" - - -class SoilGridsSilt(SoilGridsBase): - """SoilGrids: Silt content (WCS)""" - - source = "https://maps.isric.org/mapserv?map=/map/silt.map" - - -class SoilGridsSOC(SoilGridsBase): - """SoilGrids: Soil organic carbon content (WCS)""" - - source = "https://maps.isric.org/mapserv?map=/map/soc.map" - - -class SoilGridsOCS(SoilGridsBase): - """SoilGrids: Soil organic carbon stock (WCS)""" - - source = "https://maps.isric.org/mapserv?map=/map/ocs.map" - - -class SoilGridsOCD(SoilGridsBase): - """SoilGrids: Organic carbon densities (WCS)""" - - source = "https://maps.isric.org/mapserv?map=/map/ocd.map" - - -if __name__ == "__main__": - import podpac - - c = podpac.Coordinates( - [podpac.clinspace(-132.9023, -53.6051, 346, name="lon"), podpac.clinspace(23.6293, 53.7588, 131, name="lat")] - ) - - print("layers") - print(SoilGridsSand.get_layers()) - - node = SoilGridsSand(layer="sand_0-5cm_mean") - print("node") - print(node) - - output = node.eval(c) - print("eval") - print(output) - - node_chunked = SoilGridsSand(layer="sand_0-5cm_mean", max_size=10000) - output_chunked = node_chunked.eval(c) - - from matplotlib import pyplot - - pyplot.figure() - pyplot.subplot(211) - output.plot() - pyplot.subplot(212) - output_chunked.plot() - pyplot.show(block=False) diff --git a/podpac/datalib/soilscape.py b/podpac/datalib/soilscape.py deleted file mode 100644 index b72aec591..000000000 --- a/podpac/datalib/soilscape.py +++ /dev/null @@ -1,646 +0,0 @@ -import logging -import datetime - -import traitlets as tl -import numpy as np - -import podpac -from podpac.core.utils import cached_property -from podpac.interpolators import InterpolationMixin - -_logger = logging.getLogger(__name__) - -SOILSCAPE_FILESERVER_BASE = "https://thredds.daac.ornl.gov/thredds/fileServer/ornldaac/1339" -CRS = "+proj=longlat +datum=WGS84 +vunits=cm" - -NODES = { - "BLMLand1STonzi_CA": [900, 901, 902, 903, 904, 905, 906, 907, 908, 909, 910, 911, 912, 913, 914, 915, 916], - "BLMLand2STonzi_CA": [ - 1000, - 1017, - 1018, - 1019, - 1020, - 1021, - 1022, - 1023, - 1024, - 1025, - 1026, - 1027, - 1028, - 1029, - 1030, - 1031, - ], - "BLMLand3NTonzi_CA": [1200, 1201, 1202, 1204, 1205, 1206], - "Canton_OK": [ - 101, - 102, - 103, - 104, - 105, - 106, - 107, - 108, - 109, - 110, - 111, - 112, - 113, - 114, - 115, - 116, - 117, - 118, - 119, - 120, - 121, - ], - "Kendall_AZ": [1400, 1401, 1402, 1403, 1404, 1405, 1406, 1407, 1408, 1409], - "LuckyHills_AZ": [1500, 1501, 1502, 1503, 1504, 1505, 1506, 1507], - "MatthaeiGardens_MI": [ - 200, - 202, - 203, - 204, - 206, - 207, - 208, - 209, - 210, - 211, - 212, - 214, - 215, - 216, - 217, - 218, - 219, - 220, - 221, - 222, - 223, - 224, - 225, - 226, - 227, - 228, - 230, - ], - "NewHoganLakeN_CA": [701, 702, 703, 704, 705, 706, 707, 708, 709, 710, 711, 712, 713, 715], - "NewHoganLakeS_CA": [501, 502, 503, 504, 505, 506, 507, 508, 509, 510, 511, 512, 513, 514, 515, 516, 517, 518], - "TerradOro_CA": [ - 1300, - 1301, - 1302, - 1303, - 1304, - 1305, - 1306, - 1307, - 1308, - 1309, - 1310, - 1311, - 1312, - 1313, - 1314, - 1315, - 1316, - 1317, - 1318, - 1319, - 1320, - 1321, - 1322, - 1323, - 801, - 802, - 803, - 804, - 805, - 806, - 807, - 808, - 809, - 810, - 811, - 812, - 813, - 814, - 815, - 816, - 817, - 818, - 819, - 820, - 821, - 822, - 823, - 824, - 825, - 827, - 828, - ], - "TonziRanch_CA": [401, 402, 403, 404, 405, 406, 408, 409, 410, 411, 412, 413, 414, 415, 416, 417, 418, 419, 420], - "Vaira_CA": [ - 600, - 642, - 644, - 645, - 646, - 649, - 651, - 653, - 656, - 659, - 661, - 662, - 664, - 665, - 666, - 667, - 668, - 669, - 670, - 671, - 675, - 676, - 679, - 680, - ], -} - -NODE2SITE = {node: site for site in NODES for node in NODES[site]} - -NODE_LOCATIONS = { - 101: (36.00210189819336, -98.6310806274414), - 102: (36.00204849243164, -98.63016510009766), - 103: (36.00210189819336, -98.62930297851562), - 104: (36.00208282470703, -98.62843322753906), - 105: (36.00148391723633, -98.6310806274414), - 106: (36.00153350830078, -98.63020324707031), - 107: (36.00153350830078, -98.62933349609375), - 108: (36.00148391723633, -98.62846374511719), - 109: (36.00094985961914, -98.63113403320312), - 110: (36.00091552734375, -98.63023376464844), - 111: (36.00090026855469, -98.62934875488281), - 112: (36.00094985961914, -98.62848663330078), - 113: (36.0009651184082, -98.62813568115234), - 114: (36.00080108642578, -98.63224792480469), - 115: (36.00051498413086, -98.63228607177734), - 116: (36.0003662109375, -98.631103515625), - 117: (36.00038146972656, -98.63026428222656), - 118: (36.000450134277344, -98.62934875488281), - 119: (36.00043487548828, -98.62848663330078), - 120: (36.00041580200195, -98.62813568115234), - 121: (36.0002326965332, -98.63236999511719), - 900: (38.39332962036133, -120.9057388305664), - 901: (38.39311218261719, -120.90608978271484), - 902: (38.39351272583008, -120.90546417236328), - 903: (38.3934326171875, -120.90485382080078), - 904: (38.39322280883789, -120.90523529052734), - 905: (38.3930549621582, -120.90473937988281), - 906: (38.39268493652344, -120.90612030029297), - 907: (38.392616271972656, -120.90575408935547), - 908: (38.39276885986328, -120.90546417236328), - 909: (38.392539978027344, -120.90435791015625), - 910: (38.39240646362305, -120.90538024902344), - 911: (38.39205551147461, -120.90554809570312), - 912: (38.39206314086914, -120.90470123291016), - 913: (38.39163589477539, -120.9051513671875), - 914: (38.391475677490234, -120.90571594238281), - 915: (38.39125442504883, -120.90613555908203), - 916: (38.390968322753906, -120.90583038330078), - 1000: (38.38666534423828, -120.90629577636719), - 1017: (38.38922119140625, -120.90509796142578), - 1018: (38.38896179199219, -120.90476989746094), - 1019: (38.38833236694336, -120.90593719482422), - 1020: (38.388309478759766, -120.90525817871094), - 1021: (38.38798904418945, -120.90575408935547), - 1022: (38.38800048828125, -120.90465545654297), - 1023: (38.38770294189453, -120.90603637695312), - 1024: (38.38750457763672, -120.905517578125), - 1025: (38.38759231567383, -120.90480041503906), - 1026: (38.38713836669922, -120.9060287475586), - 1027: (38.38721466064453, -120.90483856201172), - 1028: (38.38737487792969, -120.90460205078125), - 1029: (38.387081146240234, -120.90457153320312), - 1030: (38.38673400878906, -120.90526580810547), - 1031: (38.38658142089844, -120.90449523925781), - 1200: (38.47132110595703, -120.99401092529297), - 1201: (38.47126770019531, -120.99333190917969), - 1202: (38.471134185791016, -120.99433135986328), - 1204: (38.47080612182617, -120.99436950683594), - 1205: (38.47153854370117, -120.99373626708984), - 1206: (38.47169876098633, -120.99419403076172), - 1400: (31.736480712890625, -109.94183349609375), - 1401: (31.737104415893555, -109.94342041015625), - 1402: (31.736867904663086, -109.94287109375), - 1403: (31.736665725708008, -109.94261169433594), - 1404: (31.735862731933594, -109.94123840332031), - 1405: (31.735509872436523, -109.94096374511719), - 1406: (31.736804962158203, -109.94418334960938), - 1407: (31.736730575561523, -109.94657897949219), - 1408: (31.737119674682617, -109.94674682617188), - 1409: (31.737913131713867, -109.9466323852539), - 1500: (31.743972778320312, -110.05152130126953), - 1501: (31.742450714111328, -110.05266571044922), - 1502: (31.742727279663086, -110.05255889892578), - 1503: (31.743019104003906, -110.05261993408203), - 1504: (31.742687225341797, -110.05278015136719), - 1505: (31.744464874267578, -110.05230712890625), - 1506: (31.744050979614258, -110.05297088623047), - 1507: (31.742982864379883, -110.05318450927734), - 200: (42.29828643798828, -83.66435241699219), - 202: (42.2977409362793, -83.66523742675781), - 203: (42.29869842529297, -83.66448974609375), - 204: (42.297706604003906, -83.66458892822266), - 206: (42.298709869384766, -83.66460418701172), - 207: (42.29875183105469, -83.66545867919922), - 208: (42.29795837402344, -83.66423797607422), - 209: (42.29881286621094, -83.66336822509766), - 210: (42.29900360107422, -83.66426086425781), - 211: (42.29833984375, -83.66524505615234), - 212: (42.299034118652344, -83.66350555419922), - 214: (42.29782485961914, -83.66586303710938), - 215: (42.298744201660156, -83.66431427001953), - 216: (42.29877853393555, -83.66446685791016), - 217: (42.29825210571289, -83.66380310058594), - 218: (42.298919677734375, -83.6642837524414), - 219: (42.29914855957031, -83.66537475585938), - 220: (42.29893112182617, -83.66461944580078), - 221: (42.29887390136719, -83.66364288330078), - 222: (42.297340393066406, -83.6669921875), - 223: (42.299259185791016, -83.66424560546875), - 224: (42.29872512817383, -83.66317749023438), - 225: (42.29872512817383, -83.66317749023438), - 226: (42.29866027832031, -83.66433715820312), - 227: (42.29909896850586, -83.66386413574219), - 228: (42.29883575439453, -83.66429138183594), - 230: (42.29800033569336, -83.66378021240234), - 701: (38.17225646972656, -120.80365753173828), - 702: (38.17338943481445, -120.80694580078125), - 703: (38.17353057861328, -120.806396484375), - 704: (38.17322540283203, -120.80656433105469), - 705: (38.172794342041016, -120.80677795410156), - 706: (38.17293167114258, -120.80503845214844), - 707: (38.17230987548828, -120.80622100830078), - 708: (38.171714782714844, -120.8061752319336), - 709: (38.172157287597656, -120.80663299560547), - 710: (38.171875, -120.80497741699219), - 711: (38.17270278930664, -120.80281066894531), - 712: (38.172607421875, -120.80424499511719), - 713: (38.17242431640625, -120.80474853515625), - 715: (38.17243576049805, -120.80218505859375), - 501: (38.149559020996094, -120.78845977783203), - 502: (38.14886474609375, -120.78742218017578), - 503: (38.14878463745117, -120.78624725341797), - 504: (38.14914321899414, -120.7858657836914), - 505: (38.14955520629883, -120.78559112548828), - 506: (38.15018081665039, -120.78546905517578), - 507: (38.148681640625, -120.78858184814453), - 508: (38.14809799194336, -120.78727722167969), - 509: (38.14791488647461, -120.78558349609375), - 510: (38.148048400878906, -120.78516387939453), - 511: (38.148773193359375, -120.78800201416016), - 512: (38.1482048034668, -120.78649139404297), - 513: (38.14846420288086, -120.78553771972656), - 514: (38.14806365966797, -120.78932189941406), - 515: (38.1475944519043, -120.78753662109375), - 516: (38.145992279052734, -120.78764343261719), - 517: (38.147003173828125, -120.78844451904297), - 518: (38.14594650268555, -120.78685760498047), - 1300: (38.506004333496094, -120.79766082763672), - 1301: (38.506587982177734, -120.79779052734375), - 1302: (38.50718688964844, -120.79734802246094), - 1303: (38.50724792480469, -120.79829406738281), - 1304: (38.50733184814453, -120.7967529296875), - 1305: (38.506893157958984, -120.79652404785156), - 1306: (38.50655746459961, -120.79510498046875), - 1307: (38.507179260253906, -120.79487609863281), - 1308: (38.506568908691406, -120.79683685302734), - 1309: (38.5062255859375, -120.79696655273438), - 1310: (38.506229400634766, -120.79557037353516), - 1311: (38.50590896606445, -120.79559326171875), - 1312: (38.50571060180664, -120.79660034179688), - 1313: (38.505611419677734, -120.79607391357422), - 1314: (38.50545120239258, -120.79638671875), - 1315: (38.50564956665039, -120.79755401611328), - 1316: (38.50514221191406, -120.7978744506836), - 1317: (38.505279541015625, -120.79678344726562), - 1318: (38.50477981567383, -120.7974853515625), - 1319: (38.505733489990234, -120.79812622070312), - 1320: (38.506534576416016, -120.7989730834961), - 1321: (38.505680084228516, -120.7990951538086), - 1322: (38.50444030761719, -120.79913330078125), - 1323: (38.50520324707031, -120.7984848022461), - 801: (38.506587982177734, -120.79779052734375), - 802: (38.50718688964844, -120.79734802246094), - 803: (38.50724792480469, -120.79829406738281), - 804: (38.50733184814453, -120.7967529296875), - 805: (38.506893157958984, -120.79652404785156), - 806: (38.50655746459961, -120.79510498046875), - 807: (38.507179260253906, -120.79487609863281), - 808: (38.506568908691406, -120.79683685302734), - 809: (38.5062255859375, -120.79696655273438), - 810: (38.506229400634766, -120.79557037353516), - 811: (38.50590896606445, -120.79559326171875), - 812: (38.50571060180664, -120.79660034179688), - 813: (38.505611419677734, -120.79607391357422), - 814: (38.50545120239258, -120.79638671875), - 815: (38.50564956665039, -120.79755401611328), - 816: (38.50514221191406, -120.7978744506836), - 817: (38.505279541015625, -120.79678344726562), - 818: (38.50477981567383, -120.7974853515625), - 819: (38.505733489990234, -120.79812622070312), - 820: (38.506534576416016, -120.7989730834961), - 821: (38.505680084228516, -120.7990951538086), - 822: (38.50444030761719, -120.79913330078125), - 823: (38.50520324707031, -120.7984848022461), - 824: (38.50476837158203, -120.79838562011719), - 825: (38.50437927246094, -120.79798126220703), - 827: (38.50798797607422, -120.79448699951172), - 828: (38.5061149597168, -120.79402923583984), - 401: (38.431915283203125, -120.96541595458984), - 402: (38.431888580322266, -120.96483612060547), - 403: (38.4322509765625, -120.96546936035156), - 404: (38.43227767944336, -120.96485900878906), - 405: (38.43230438232422, -120.96441650390625), - 406: (38.43255615234375, -120.96488952636719), - 408: (38.432777404785156, -120.9669189453125), - 409: (38.433223724365234, -120.96697235107422), - 410: (38.43375015258789, -120.9669418334961), - 411: (38.43077850341797, -120.96622467041016), - 412: (38.43091583251953, -120.96663665771484), - 413: (38.43063735961914, -120.96785736083984), - 414: (38.43002700805664, -120.96749877929688), - 415: (38.43063735961914, -120.9666976928711), - 416: (38.43058395385742, -120.96700286865234), - 417: (38.43080520629883, -120.96736145019531), - 418: (38.43077850341797, -120.96808624267578), - 419: (38.43033218383789, -120.96736145019531), - 420: (38.43030548095703, -120.96785736083984), - 600: (38.41737365722656, -120.9493179321289), - 642: (38.41261291503906, -120.95011138916016), - 644: (38.41255569458008, -120.95069122314453), - 645: (38.41291809082031, -120.94975280761719), - 646: (38.41238784790039, -120.95085906982422), - 649: (38.41522216796875, -120.95005798339844), - 651: (38.41477966308594, -120.95024871826172), - 653: (38.41236114501953, -120.94789123535156), - 656: (38.41458511352539, -120.95175170898438), - 659: (38.414249420166016, -120.94966888427734), - 661: (38.41211700439453, -120.9535140991211), - 662: (38.41253662109375, -120.95446014404297), - 664: (38.41349411010742, -120.94889068603516), - 665: (38.414520263671875, -120.94795989990234), - 666: (38.416015625, -120.94960021972656), - 667: (38.414939880371094, -120.94808197021484), - 668: (38.41300582885742, -120.95339965820312), - 669: (38.41335678100586, -120.95158386230469), - 670: (38.41355514526367, -120.95103454589844), - 671: (38.416221618652344, -120.95032501220703), - 675: (38.414466857910156, -120.9533920288086), - 676: (38.41484832763672, -120.95587158203125), - 679: (38.4171142578125, -120.9493179321289), - 680: (38.41756057739258, -120.94951629638672), -} - - -def get_node_location(node): - """ - Get SoilSCAPE node location by id. - - Arguments - --------- - node : int - node id - - Returns - ------- - location : tuple - (lat, lon) coordinates - """ - - if node not in NODE_LOCATIONS: - _logger.info("Looking up location for '%s' node %d" % (NODE2SITE[node], node)) - source = SoilSCAPENode(site=NODE2SITE[node], node=node) - NODE_LOCATIONS[node] = (source.lat, source.lon) - return NODE_LOCATIONS[node] - - -def get_site_coordinates(site, time=None, depth=None): - """ - Get location coordinates for the given SoilSCAPE site. - - Arguments - --------- - site : str - SoilSCAPE site, e.g. 'Canton_OK' - time : array, datetime64, str - datetime(s). Default is the current time. - depth : float, array - depth(s). Default: [4, 13, 30] (all available depths) - - Returns - ------- - coords : Coordinates - Coordinates with (lat_lon) for all nodes at the site and the given time and depth - """ - - if site not in NODES: - raise ValueError("site '%s' not found" % site) - - if time is None: - time = np.datetime64(datetime.datetime.now()) # now - - if depth is None: - depth = [4, 13, 30] # all - - lats = [] - lons = [] - for node in NODES[site]: - try: - lat, lon = get_node_location(node) - except: - _logger.exception("Could not get coordinates for '%s' node '%s'" % (NODE2SITE[node], node)) - continue - lats.append(lat) - lons.append(lon) - - return podpac.Coordinates([[lats, lons], time, depth], dims=["lat_lon", "time", "alt"], crs=CRS) - - -class SoilSCAPENode(podpac.core.data.dataset_source.DatasetRaw): - """SoilSCAPE 20min soil moisture for a particular node. - - Data is loaded from the THREDDS https fileserver. - - Attributes - ---------- - site : str - SoilSCAPE site, e.g. 'Canton_OK'. - node : int - SoilSCAPE node id. - rescale : float - Default is 0.01. The soilscape soil moisture is multiplied by this number. - Soilscape soil moisture by default is absolute volumetric percentage, so we can rescale that to absolute volumetric fraction. - """ - - alt_key = "depth" - site = tl.Enum(list(NODES)).tag(attr=True) - node = tl.Int().tag(attr=True) - cache_dataset = tl.Bool(True) - rescale = tl.Float(0.01) - coordinate_index_type = "numpy" - - _repr_keys = ["site", "node"] - - @cached_property - def dims(self): - """dataset coordinate dims""" - lookup = {self.lat_key: "lat", self.lon_key: "lon", self.alt_key: "alt", self.time_key: "time"} - return [lookup[dim] for dim in self.dataset.dims] + ["lat", "lon"] - - def get_data(self, coordinates, coordinates_index): - """{get_data}""" - - if not isinstance(self.data_key, list): - data = self.dataset[self.data_key] - data = data.transpose(*self.dataset.dims) - else: - data = self.dataset[self.data_key].to_array(dim="output") - tdims = tuple(self.dataset.dims) + ("output",) - data = data.transpose(*tdims) - - # add dims for lat and lon - data = data.data.reshape(data.shape + (1, 1)) - - return self.create_output_array(coordinates, data[coordinates_index] * self.rescale) - - @property - def lat(self): - return self.dataset.lat.item() - - @property - def lon(self): - return self.dataset.lon.item() - - @property - def physicalid(self): - # note: this should be the same as the node number - return self.dataset.physicalid.item() - - def get_coordinates(self): - coordinates = super(SoilSCAPENode, self).get_coordinates() - coordinates.set_trait("crs", CRS) - return coordinates - - @tl.validate("node") - def _validate_node(self, d): - if d["value"] not in NODES[self.site]: - raise ValueError("Site '%s' does not have a node n%d" % (self.site, d["value"])) - - return d["value"] - - @property - def source(self): - return "{base_url}/{filename}.nc".format(base_url=SOILSCAPE_FILESERVER_BASE, filename=self.filename) - - @property - def filename(self): - return "soil_moist_20min_{site}_n{node}".format(site=self.site, node=self.node) - - -class SoilSCAPE20minRaw(podpac.compositor.TileCompositorRaw): - """Raw SoilSCAPE 20min soil moisture data for an entire site. - - Data is loaded from the THREDDS https fileserver. - - Attributes - ---------- - site : str - SoilSCAPE site, e.g. 'Canton_OK'. - exclude : list - data points with these quality flags will be excluded. Default excludes [1, 2, 3, 4]. - Flags:: - * 0 - (G) Good (Standard for all data) - * 1 - (D) Dubious (Automatically flagged, spikes etc.,) - * 2 - (I) Interpolated / Estimated - * 3 - (B) Bad (Manually flagged) - * 4 - (M) Missing - * 5 - (C) Exceeds field size (Negative SM values, fixed at 0.1 percent) - """ - - site = tl.Enum(list(NODES), allow_none=True, default_value=None).tag(attr=True) - exclude = tl.List([1, 2, 3, 4]).tag(attr=True) - dataset_expires = tl.Any() - data_key = tl.Unicode(allow_none=True, default_value=None).tag(attr=True) - - @tl.validate("dataset_expires") - def _validate_dataset_expires(self, d): - podpac.core.cache.utils.expiration_timestamp(d["value"]) - return d["value"] - - @property - def _repr_keys(self): - keys = [] - if self.site is not None: - keys.append("site") - return keys - - @podpac.cached_property - def nodes(self): - if self.site is not None: - return [(self.site, node) for node in NODES[self.site]] - else: - return [(site, node) for site in NODES for node in NODES[site]] - - @podpac.cached_property - def source_coordinates(self): - latlons = np.array([NODE_LOCATIONS[node[1]] for node in self.nodes]) - return podpac.Coordinates([latlons.T.tolist()], ["lat_lon"]) - - @podpac.cached_property - def sources(self): - return [self._make_source(site, node) for site, node in self.nodes] - - def _make_source(self, site, node): - return SoilSCAPENode( - site=site, - node=node, - cache_ctrl=self.cache_ctrl, - dataset_expires=self.dataset_expires, - data_key=self.data_key, - ) - - def make_coordinates(self, time=None, depth=None): - """ - Make coordinates with the site locations and the given time and depth. - - Arguments - --------- - time : array, datetime64, str - datetime(s). Default is the current time. - depth : float, array - depth(s). Default: [4, 13, 30] (all available depths) - - Returns - ------- - coords : Coordinates - Coordinates with (lat_lon) for all nodes at the site and the given time and depth - """ - - return get_site_coordinates(self.site, time=time, depth=depth) - - @property - def available_sites(self): - return list(NODES.keys()) - - -class SoilSCAPE20min(InterpolationMixin, SoilSCAPE20minRaw): - """SoilSCAPE 20min soil moisture data for an entire site, with interpolation.""" - - pass diff --git a/podpac/datalib/terraintiles.py b/podpac/datalib/terraintiles.py deleted file mode 100644 index 5e35d6884..000000000 --- a/podpac/datalib/terraintiles.py +++ /dev/null @@ -1,556 +0,0 @@ -""" -Terrain Tiles - -Hosted on AWS S3 -https://registry.opendata.aws/terrain-tiles/ - -Description - Gridded elevation tiles -Resource type - S3 Bucket -Amazon Resource Name (ARN) - arn:aws:s3:::elevation-tiles-prod -AWS Region - us-east-1 - -Documentation: https://mapzen.com/documentation/terrain-tiles/ - -Attribution ------------ -- Some source adapted from https://github.com/tilezen/joerd -- See required attribution when using terrain tiles: - https://github.com/tilezen/joerd/blob/master/docs/attribution.md - -Attributes ----------- -TILE_FORMATS : list - list of support tile formats - -Notes ------ -See https://github.com/racemap/elevation-service/blob/master/tileset.js -for example skadi implementation -""" - -import os -import re -from itertools import product -import logging -from io import BytesIO - -import traitlets as tl -import numpy as np - -import podpac -from podpac.core.data.rasterio_source import RasterioRaw -from podpac.compositor import TileCompositorRaw -from podpac.interpolators import InterpolationMixin -from podpac.interpolators import RasterioInterpolator, ScipyGrid, ScipyPoint -from podpac.utils import cached_property -from podpac.authentication import S3Mixin - -#### -# private module attributes -#### - -# create log for module -_logger = logging.getLogger(__name__) -ZOOM_SIZES = [ - 8271.5169531233, - 39135.75848200978, - 19567.87924100587, - 9783.939620502935, - 4891.969810250487, - 2445.9849051252454, - 1222.9924525636013, - 611.4962262818025, - 305.7481131408976, - 152.8740565714275, - 76.43702828571375, - 38.218514142856876, - 19.109257072407146, - 9.554628536203573, - 4.777314268103609, -] - - -class TerrainTilesSourceRaw(RasterioRaw): - """DataSource to handle individual TerrainTiles raster files - - Parameters - ---------- - source : str - Path to the sourcefile on S3 - - Attributes - ---------- - dataset : :class:`rasterio.io.DatasetReader` - rasterio dataset - """ - - anon = tl.Bool(True) - - @tl.default("crs") - def _default_crs(self): - if "geotiff" in self.source: - return "EPSG:3857" - if "terrarium" in self.source: - return "EPSG:3857" - if "normal" in self.source: - return "EPSG:3857" - - def download(self, path="terraintiles"): - """ - Download the TerrainTile file from S3 to a local file. - This is a convience method for users and not used by PODPAC machinery. - - Parameters - ---------- - path : str - Subdirectory to put files. Defaults to 'terraintiles'. - Within this directory, the tile files will retain the same directory structure as on S3. - """ - - filename = os.path.split(self.source)[1] # get filename off of source - joined_path = os.path.join(path, os.path.split(self.source)[0].replace("s3://", "")) # path to file - filepath = os.path.abspath(os.path.join(joined_path, filename)) - - # make the directory if it hasn't been made already - if not os.path.exists(joined_path): - os.makedirs(joined_path) - - # download the file - _logger.debug("Downloading terrain tile {} to filepath: {}".format(self.source, filepath)) - self.s3.get(self.source, filepath) - - # this is a little crazy, but I get floating point issues with indexing if i don't round to 7 decimal digits - def get_coordinates(self): - coordinates = super(TerrainTilesSourceRaw, self).get_coordinates() - - for dim in coordinates: - coordinates[dim] = np.round(coordinates[dim].coordinates, 6) - - return coordinates - - -class TerrainTilesComposite(TileCompositorRaw): - """Terrain Tiles gridded elevation tiles data library - - Hosted on AWS S3 - https://registry.opendata.aws/terrain-tiles/ - - Description - Gridded elevation tiles - Resource type - S3 Bucket - Amazon Resource Name (ARN) - arn:aws:s3:::elevation-tiles-prod - AWS Region - us-east-1 - - Documentation: https://mapzen.com/documentation/terrain-tiles/ - - Parameters - ---------- - zoom : int - Zoom level of tiles, in [0, ..., 14]. Defaults to 7. A value of "-1" will automatically determine the zoom level. - WARNING: When automatic zoom is used, evaluating points (stacked lat,lon) uses the maximum zoom level (level 14) - tile_format : str - One of ['geotiff', 'terrarium', 'normal']. Defaults to 'geotiff' - PODPAC node can only evaluate 'geotiff' formats. - Other tile_formats can be specified for :meth:`download` - No support for 'skadi' formats at this time. - bucket : str - Bucket of the terrain tiles. - Defaults to 'elevation-tiles-prod' - """ - - # parameters - zoom = tl.Int(default_value=-1).tag(attr=True) - tile_format = tl.Enum(["geotiff", "terrarium", "normal"], default_value="geotiff").tag(attr=True) - bucket = tl.Unicode(default_value="elevation-tiles-prod").tag(attr=True) - sources = [] # these are loaded as needed - urls = tl.List(trait=tl.Unicode()).tag(attr=True) # Maps directly to sources - dims = ["lat", "lon"] - anon = tl.Bool(True) - - def _zoom(self, coordinates): - if self.zoom >= 0: - return self.zoom - crds = coordinates.transform("EPSG:3857") - if coordinates.is_stacked("lat") or coordinates.is_stacked("lon"): - return len(ZOOM_SIZES) - 1 - steps = [] - for crd in crds.values(): - if crd.name not in ["lat", "lon"]: - continue - if crd.size == 1: - continue - if isinstance(crd, podpac.coordinates.UniformCoordinates1d): - steps.append(np.abs(crd.step)) - elif isinstance(crd, podpac.coordinates.ArrayCoordinates1d): - steps.append(np.abs(np.diff(crd.coordinates)).min()) - else: - continue - if not steps: - return len(ZOOM_SIZES) - 1 - - step = min(steps) / 2 - zoom = 0 - for z, zs in enumerate(ZOOM_SIZES): - zoom = z - if zs < step: - break - return zoom - - def select_sources(self, coordinates, _selector=None): - # get all the tile sources for the requested zoom level and coordinates - sources = get_tile_urls(self.tile_format, self._zoom(coordinates), coordinates) - urls = ["s3://{}/{}".format(self.bucket, s) for s in sources] - - # create TerrainTilesSourceRaw classes for each url source - self.sources = self._create_composite(urls) - if self.trait_is_defined("interpolation") and self.interpolation is not None: - for s in self.sources: - if s.has_trait("interpolation"): - s.set_trait("interpolation", self.interpolation) - return self.sources - - def find_coordinates(self): - return [podpac.coordinates.union([source.coordinates for source in self.sources])] - - def download(self, path="terraintiles"): - """ - Download active terrain tile source files to local directory - - Parameters - ---------- - path : str - Subdirectory to put files. Defaults to 'terraintiles'. - Within this directory, the tile files will retain the same directory structure as on S3. - """ - - try: - for source in self.sources[0].sources: - source.download(path) - except tl.TraitError as e: - raise ValueError("No terrain tile sources selected. Evaluate node at coordinates to select sources.") from e - - def _create_composite(self, urls): - # Share the s3 connection - sample_source = TerrainTilesSourceRaw( - source=urls[0], - cache_ctrl=self.cache_ctrl, - force_eval=self.force_eval, - cache_output=self.cache_output, - cache_dataset=True, - ) - return [ - TerrainTilesSourceRaw( - source=url, - s3=sample_source.s3, - cache_ctrl=self.cache_ctrl, - force_eval=self.force_eval, - cache_output=self.cache_output, - cache_dataset=True, - ) - for url in urls - ] - - -class TerrainTiles(InterpolationMixin, TerrainTilesComposite): - """Terrain Tiles gridded elevation tiles data library - - Hosted on AWS S3 - https://registry.opendata.aws/terrain-tiles/ - - Description - Gridded elevation tiles - Resource type - S3 Bucket - Amazon Resource Name (ARN) - arn:aws:s3:::elevation-tiles-prod - AWS Region - us-east-1 - - Documentation: https://mapzen.com/documentation/terrain-tiles/ - - Parameters - ---------- - zoom : int - Zoom level of tiles. Defaults to 6. - tile_format : str - One of ['geotiff', 'terrarium', 'normal']. Defaults to 'geotiff' - PODPAC node can only evaluate 'geotiff' formats. - Other tile_formats can be specified for :meth:`download` - No support for 'skadi' formats at this time. - bucket : str - Bucket of the terrain tiles. - Defaults to 'elevation-tiles-prod' - """ - - pass - - -############ -# Utilities -############ -def get_tile_urls(tile_format, zoom, coordinates=None): - """Get tile urls for a specific zoom level and geospatial coordinates - - Parameters - ---------- - tile_format : str - format of the tile to get - zoom : int - zoom level - coordinates : :class:`podpac.Coordinates`, optional - only return tiles within coordinates - - Returns - ------- - list of str - list of tile urls - """ - - # get all the tile definitions for the requested zoom level - tiles = _get_tile_tuples(zoom, coordinates) - - # get source urls - return [_tile_url(tile_format, x, y, z) for (x, y, z) in tiles] - - -############ -# Private Utilites -############ - - -def _get_tile_tuples(zoom, coordinates=None): - """Query for tiles within podpac coordinates - - This method allows you to get the available tiles in a given spatial area. - This will work for all :attr:`TILE_FORMAT` types - - Parameters - ---------- - coordinates : :class:`podpac.coordinates.Coordinates` - Find available tiles within coordinates - zoom : int, optional - zoom level - - Raises - ------ - TypeError - Description - - Returns - ------- - list of tuple - list of tile tuples (x, y, zoom) for zoom level and coordinates - """ - - # if no coordinates are supplied, get all tiles for zoom level - if coordinates is None: - # get whole world - tiles = _get_tiles_grid([-90, 90], [-180, 180], zoom) - - # down select tiles based on coordinates - else: - _logger.debug("Getting tiles for coordinates {}".format(coordinates)) - - if "lat" not in coordinates.udims or "lon" not in coordinates.udims: - raise TypeError("input coordinates must have lat and lon dimensions to get tiles") - - # transform to WGS84 (epsg:4326) to use the mapzen example for transforming coordinates to tilespace - # it doesn't seem to conform to standard google tile indexing - c = coordinates.transform("epsg:4326") - - # point coordinates - if "lat_lon" in c.dims or "lon_lat" in c.dims: - lat_lon = zip(c["lat"].coordinates, c["lon"].coordinates) - - tiles = [] - for (lat, lon) in lat_lon: - tile = _get_tiles_point(lat, lon, zoom) - if tile not in tiles: - tiles.append(tile) - - # gridded coordinates - else: - lat_bounds = c["lat"].bounds - lon_bounds = c["lon"].bounds - - tiles = _get_tiles_grid(lat_bounds, lon_bounds, zoom) - - return tiles - - -def _tile_url(tile_format, x, y, zoom): - """Build S3 URL prefix - - The S3 bucket is organized {tile_format}/{z}/{x}/{y}.tif - - Parameters - ---------- - tile_format : str - One of 'terrarium', 'normal', 'geotiff' - zoom : int - zoom level - x : int - x tilespace coordinate - y : int - x tilespace coordinate - - Returns - ------- - str - Bucket prefix - - Raises - ------ - TypeError - """ - - tile_url = "{tile_format}/{zoom}/{x}/{y}.{ext}" - ext = {"geotiff": "tif", "normal": "png", "terrarium": "png"} - - return tile_url.format(tile_format=tile_format, zoom=zoom, x=x, y=y, ext=ext[tile_format]) - - -def _get_tiles_grid(lat_bounds, lon_bounds, zoom): - """ - Convert geographic bounds into a list of tile coordinates at given zoom. - Adapted from https://github.com/tilezen/joerd - - Parameters - ---------- - lat_bounds : :class:`np.array` of float - [min, max] bounds from lat (y) coordinates - lon_bounds : :class:`np.array` of float - [min, max] bounds from lon (x) coordinates - zoom : int - zoom level - - Returns - ------- - list of tuple - list of tuples (x, y, zoom) describing the tiles to cover coordinates - """ - - # convert to mercator - xm_min, ym_min = _mercator(lat_bounds[1], lon_bounds[0]) - xm_max, ym_max = _mercator(lat_bounds[0], lon_bounds[1]) - - # convert to tile-space bounding box - xmin, ymin = _mercator_to_tilespace(xm_min, ym_min, zoom) - xmax, ymax = _mercator_to_tilespace(xm_max, ym_max, zoom) - - # generate a list of tiles - xs = range(xmin, xmax + 1) - ys = range(ymin, ymax + 1) - - tiles = [(x, y, zoom) for (y, x) in product(ys, xs)] - return tiles - - -def _get_tiles_point(lat, lon, zoom): - """Get tiles at a single point and zoom level - - Parameters - ---------- - lat : float - latitude - lon : float - longitude - zoom : int - zoom level - - Returns - ------- - tuple - (x, y, zoom) tile url - """ - xm, ym = _mercator(lat, lon) - x, y = _mercator_to_tilespace(xm, ym, zoom) - - return x, y, zoom - - -def _mercator(lat, lon): - """Convert latitude, longitude to x, y mercator coordinate at given zoom - Adapted from https://github.com/tilezen/joerd - - Parameters - ---------- - lat : float - latitude - lon : float - longitude - - Returns - ------- - tuple - (x, y) float mercator coordinates - """ - # convert to radians - x1, y1 = lon * np.pi / 180, lat * np.pi / 180 - - # project to mercator - x, y = x1, np.log(np.tan(0.25 * np.pi + 0.5 * y1) + 1e-32) - - return x, y - - -def _mercator_to_tilespace(xm, ym, zoom): - """Convert mercator to tilespace coordinates - - Parameters - ---------- - x : float - mercator x coordinate - y : float - mercator y coordinate - zoom : int - zoom level - - Returns - ------- - tuple - (x, y) int tile coordinates - """ - - tiles = 2**zoom - diameter = 2 * np.pi - x = int(tiles * (xm + np.pi) / diameter) - y = int(tiles * (np.pi - ym) / diameter) - - return x, y - - -if __name__ == "__main__": - from podpac import Coordinates, clinspace - - c = Coordinates([clinspace(40, 43, 1000), clinspace(-76, -72, 1000)], dims=["lat", "lon"]) - c2 = Coordinates( - [clinspace(40, 43, 1000), clinspace(-76, -72, 1000), ["2018-01-01", "2018-01-02"]], dims=["lat", "lon", "time"] - ) - - print("TerrainTiles") - node = TerrainTiles(tile_format="geotiff", zoom=8) - output = node.eval(c) - print(output) - - output = node.eval(c2) - print(output) - - print("TerrainTiles cached") - node = TerrainTiles(tile_format="geotiff", zoom=8, cache_ctrl=["ram", "disk"]) - output = node.eval(c) - print(output) - - # tile urls - print("get tile urls") - print(np.array(get_tile_urls("geotiff", 1))) - print(np.array(get_tile_urls("geotiff", 9, coordinates=c))) - - print("done") diff --git a/podpac/datalib/test/__init__.py b/podpac/datalib/test/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/podpac/datalib/test/coordinates_for_tests.py b/podpac/datalib/test/coordinates_for_tests.py deleted file mode 100644 index bc04085e0..000000000 --- a/podpac/datalib/test/coordinates_for_tests.py +++ /dev/null @@ -1,47 +0,0 @@ -import numpy as np - -import podpac -import podpac.datalib - -# Create some nodes to help get realistic coordinates -cosmos = podpac.datalib.cosmos_stations.COSMOSStations() -soilscape = podpac.datalib.soilscape.SoilSCAPE20min(site="Canton_OK") - -# Now do the coordinates -time_points = podpac.crange("2016-01-01", "2016-02-01", "1,D", "time") -# Soilscape coordinates -soilscape_points = soilscape.make_coordinates(time="2016-01-01") -soilscape_region = podpac.Coordinates( - [ - podpac.clinspace(soilscape_points["lat"].bounds[1], soilscape_points["lat"].bounds[0], 64), - podpac.clinspace(soilscape_points["lon"].bounds[0], soilscape_points["lon"].bounds[1], 64), - "2016-01-01", - 4.0, - ], - dims=["lat", "lon", "time", "alt"], -) -soilscape_timeseries = podpac.coordinates.merge_dims( - [soilscape_points[:2].drop("time"), podpac.Coordinates([time_points], crs=soilscape_points.crs)] -) - -# COSMOS coordinates -cosmos_points = cosmos.source_coordinates.select({"lat": [36, 37], "lon": [-98, -97]}) -cosmos_region = podpac.Coordinates( - [ - podpac.clinspace(cosmos_points["lat"].bounds[1], cosmos_points["lat"].bounds[0], 64), - podpac.clinspace(cosmos_points["lon"].bounds[0], cosmos_points["lon"].bounds[1], 64), - "2016-01-01", - ], - dims=["lat", "lon", "time"], -) -cosmos_timeseries = podpac.coordinates.merge_dims( - [cosmos_points, podpac.Coordinates([time_points], crs=cosmos_points.crs)] -) - -COORDINATES = { - "soilscape_points": soilscape_points, - "soilscape_region": soilscape_region, - "soilscape_timeseries": soilscape_timeseries, - "cosmos_region": cosmos_region, - "cosmos_timeseries": cosmos_timeseries, -} diff --git a/podpac/datalib/test/test_cosmos.py b/podpac/datalib/test/test_cosmos.py deleted file mode 100644 index 842a97488..000000000 --- a/podpac/datalib/test/test_cosmos.py +++ /dev/null @@ -1,32 +0,0 @@ -import numpy as np -import pytest - -from .coordinates_for_tests import COORDINATES -import podpac.datalib.cosmos_stations -from podpac import Coordinates, clinspace - - -@pytest.mark.integration -class TestCOSMOS(object): - def test_common_coordinates(self): - point_interpolation = { - "method": "nearest", - "params": {"use_selector": False, "remove_nan": True, "time_scale": "1,M", "respect_bounds": False}, - } - cosmos = podpac.datalib.cosmos_stations.COSMOSStations() - cosmos_raw = podpac.datalib.cosmos_stations.COSMOSStationsRaw() - cosmos_filled = podpac.datalib.cosmos_stations.COSMOSStations(interpolation=point_interpolation) - for ck, c in COORDINATES.items(): - if ck != "cosmos_region": - continue - print("Evaluating: ", ck) - o_f = cosmos_filled.eval(c) - assert np.any(np.isfinite(o_f.data)) - o = cosmos.eval(c) - o_r = cosmos.eval(c) - if "soilscape" in ck: - assert np.any(np.isnan(o.data)) - assert np.any(np.isnan(o_r.data)) - continue - assert np.any(np.isfinite(o.data)) - assert np.any(np.isfinite(o_r.data)) diff --git a/podpac/datalib/test/test_gfs.py b/podpac/datalib/test/test_gfs.py deleted file mode 100644 index 9d098195f..000000000 --- a/podpac/datalib/test/test_gfs.py +++ /dev/null @@ -1,61 +0,0 @@ -import datetime - -import pytest -import s3fs - -import podpac -from podpac.datalib import gfs - - -@pytest.mark.skip("Broken, GFS data source structure changed. ") -@pytest.mark.integration -class TestGFS(object): - parameter = "SOIM" - level = "0-10 m DPTH" - - @classmethod - def setup_class(cls): - # find an existing date - s3 = s3fs.S3FileSystem(anon=True) - prefix = "%s/%s/%s/" % (gfs.BUCKET, cls.parameter, cls.level) - dates = [path.replace(prefix, "") for path in s3.ls(prefix)] - cls.date = dates[0] - - def test_source(self): - # specify source datetime and forecast - gfs_soim = gfs.GFSSourceRaw( - parameter=self.parameter, - level=self.level, - date=self.date, - hour="1200", - forecast="003", - anon=True, - ) - - o = gfs_soim.eval(gfs_soim.coordinates) - - def test_composited(self): - # specify source datetime, select forecast at evaluation from time coordinates - gfs_soim = gfs.GFS(parameter=self.parameter, level=self.level, date=self.date, hour="1200", anon=True) - - # whole world forecast at 15:30 - forecast_time = datetime.datetime.strptime(self.date + " 15:30", "%Y%m%d %H:%M") - coords = gfs_soim.sources[0].coordinates - c = podpac.Coordinates([coords["lat"], coords["lon"], forecast_time], dims=["lat", "lon", "time"]) - o = gfs_soim.eval(c) - - # time series: get the forecast at lat=42, lon=275 every hour for 6 hours - start = forecast_time - stop = forecast_time + datetime.timedelta(hours=6) - c = podpac.Coordinates([42, 282, podpac.crange(start, stop, "1,h")], dims=["lat", "lon", "time"]) - o = gfs_soim.eval(c) - - def test_latest(self): - # get latest source, select forecast at evaluation - gfs_soim = gfs.GFSLatest(parameter=self.parameter, level=self.level, anon=True) - - # latest whole world forecast - forecast_time = datetime.datetime.strptime(gfs_soim.date + " " + gfs_soim.hour, "%Y%m%d %H%M") - coords = gfs_soim.sources[0].coordinates - c = podpac.Coordinates([coords["lat"], coords["lon"], forecast_time], dims=["lat", "lon", "time"]) - o = gfs_soim.eval(c) diff --git a/podpac/datalib/test/test_modis.py b/podpac/datalib/test/test_modis.py deleted file mode 100644 index d46af200f..000000000 --- a/podpac/datalib/test/test_modis.py +++ /dev/null @@ -1,16 +0,0 @@ -import numpy as np -import pytest - -from .coordinates_for_tests import COORDINATES -import podpac.datalib -from podpac import Coordinates, clinspace - - -@pytest.mark.integration -class TestMODIS(object): - def test_common_coordinates(self): - modis = podpac.datalib.modis_pds.MODIS(product="MCD43A4.006", data_key="B01") # Band 01, 620 - 670nm - for ck, c in COORDINATES.items(): - print("Evaluating: ", ck) - o = modis.eval(c) - assert np.any(np.isfinite(o.data)) diff --git a/podpac/datalib/test/test_satutils.py b/podpac/datalib/test/test_satutils.py deleted file mode 100644 index 92ea3ffb9..000000000 --- a/podpac/datalib/test/test_satutils.py +++ /dev/null @@ -1,42 +0,0 @@ -import datetime - -import numpy as np -import pytest - -import podpac - -STAC_API_URL = "https://earth-search.aws.element84.com/v0" - - -@pytest.mark.integration -class TestLandsat8(object): - def test_landsat8(self): - lat = [39.5, 40.5] - lon = [-110, -105] - time = ["2020-12-09", "2020-12-10"] - c = podpac.Coordinates([lat, lon, time], dims=["lat", "lon", "time"]) - - node = podpac.datalib.satutils.Landsat8( - stac_api_url=STAC_API_URL, - asset="B01", - ) - output = node.eval(c) - assert np.isfinite(output).sum() > 0 - - -@pytest.mark.skip(reason="requester pays") -@pytest.mark.integration -class TestSentinel2(object): - def test_sentinel2(self): - lat = [39.5, 40.5] - lon = [-110, -105] - time = ["2020-12-09", "2020-12-10"] - c = podpac.Coordinates([lat, lon, time], dims=["lat", "lon", "time"]) - - with podpac.settings: - podpac.settings["AWS_REQUESTER_PAYS"] = True - node = podpac.datalib.satutils.Sentinel2( - stac_api_url=STAC_API_URL, asset="B01", aws_region_name="eu-central-1" - ) - output = node.eval(c) - assert np.isfinite(output).sum() > 0 diff --git a/podpac/datalib/test/test_smap_egi.py b/podpac/datalib/test/test_smap_egi.py deleted file mode 100644 index 3b76de774..000000000 --- a/podpac/datalib/test/test_smap_egi.py +++ /dev/null @@ -1,20 +0,0 @@ -import pytest -import podpac - - -@pytest.mark.integration -class TestSMAP_EGI(object): - def test_eval_level_3(self): - # level 3 access - c = podpac.Coordinates( - [ - podpac.clinspace(-82, -81, 10), - podpac.clinspace(38, 39, 10), - podpac.clinspace("2015-07-06", "2015-07-08", 10), - ], - dims=["lon", "lat", "time"], - ) - - node = podpac.datalib.smap_egi.SMAP(product="SPL3SMP_AM") - output = node.eval(c) - print(output) diff --git a/podpac/datalib/test/test_soilgrids.py b/podpac/datalib/test/test_soilgrids.py deleted file mode 100644 index 6cbcfc283..000000000 --- a/podpac/datalib/test/test_soilgrids.py +++ /dev/null @@ -1,16 +0,0 @@ -import numpy as np -import pytest - -from .coordinates_for_tests import COORDINATES -import podpac.datalib -from podpac import Coordinates, clinspace - - -@pytest.mark.integration -class TestSoilGrids(object): - def test_common_coordinates(self): - soil_organic_carbon = podpac.datalib.soilgrids.SoilGridsSOC(layer="soc_0-5cm_Q0.95") - for ck, c in COORDINATES.items(): - print("Evaluating: ", ck) - o = soil_organic_carbon.eval(c) - assert np.any(np.isfinite(o.data)) diff --git a/podpac/datalib/test/test_soilscape.py b/podpac/datalib/test/test_soilscape.py deleted file mode 100644 index a7ea0c6a2..000000000 --- a/podpac/datalib/test/test_soilscape.py +++ /dev/null @@ -1,41 +0,0 @@ -import numpy as np -import pytest - -from .coordinates_for_tests import COORDINATES -import podpac.datalib - - -@pytest.mark.integration -class TestSoilscape(object): - def test_common_coordinates(self): - point_interpolation = { - "method": "nearest", - "params": {"use_selector": False, "remove_nan": True, "time_scale": "1,M", "respect_bounds": False}, - } - soilscape = podpac.datalib.soilscape.SoilSCAPE20min( - site="Canton_OK", data_key="soil_moisture", interpolation=point_interpolation - ) - for ck, c in COORDINATES.items(): - if "cosmos" in ck: - continue - print("Evaluating: ", ck) - o = soilscape.eval(c) - assert np.any(np.isfinite(o.data)) - - def test_site_raw(self): - sm = podpac.datalib.soilscape.SoilSCAPE20minRaw(site="Canton_OK", data_key="soil_moisture") - coords_source = sm.make_coordinates(time=sm.sources[0].coordinates["time"][:5]) - coords_interp_time = sm.make_coordinates(time="2016-01-01") - coords_interp_alt = sm.make_coordinates(time=sm.sources[0].coordinates["time"][:5], depth=5) - o1 = sm.eval(coords_source) - o2 = sm.eval(coords_interp_time) - o3 = sm.eval(coords_interp_alt) - - def test_site_interpolated(self): - sm = podpac.datalib.soilscape.SoilSCAPE20min(site="Canton_OK", data_key="soil_moisture") - coords_source = sm.make_coordinates(time=sm.sources[0].coordinates["time"][:5]) - coords_interp_time = sm.make_coordinates(time="2016-01-01") - coords_interp_alt = sm.make_coordinates(time=sm.sources[0].coordinates["time"][:5], depth=5) - o1 = sm.eval(coords_source) - o2 = sm.eval(coords_interp_time) - o3 = sm.eval(coords_interp_alt) diff --git a/podpac/datalib/test/test_terrain_tiles.py b/podpac/datalib/test/test_terrain_tiles.py deleted file mode 100644 index 7a9b59acc..000000000 --- a/podpac/datalib/test/test_terrain_tiles.py +++ /dev/null @@ -1,38 +0,0 @@ -import numpy as np -import pytest - -from .coordinates_for_tests import COORDINATES -from podpac.datalib.terraintiles import TerrainTiles, get_tile_urls -from podpac import Coordinates, clinspace - - -@pytest.mark.integration -class TestTerrainTiles(object): - def test_common_coordinates(self): - node = TerrainTiles() - for ck, c in COORDINATES.items(): - print("Evaluating: ", ck) - o = node.eval(c) - assert np.any(np.isfinite(o.data)) - - def test_terrain_tiles(self): - c = Coordinates([clinspace(40, 43, 1000), clinspace(-76, -72, 1000)], dims=["lat", "lon"]) - c2 = Coordinates( - [clinspace(40, 43, 1000), clinspace(-76, -72, 1000), ["2018-01-01", "2018-01-02"]], - dims=["lat", "lon", "time"], - ) - - node = TerrainTiles(tile_format="geotiff", zoom=8) - output = node.eval(c) - assert np.any(np.isfinite(output)) - - output = node.eval(c2) - assert np.any(np.isfinite(output)) - - node = TerrainTiles(tile_format="geotiff", zoom=8, cache_ctrl=["ram", "disk"]) - output = node.eval(c) - assert np.any(np.isfinite(output)) - - # tile urls - print(np.array(get_tile_urls("geotiff", 1))) - print(np.array(get_tile_urls("geotiff", 9, coordinates=c))) diff --git a/podpac/datalib/test/test_weathercitizen.py b/podpac/datalib/test/test_weathercitizen.py deleted file mode 100644 index 46788fe2d..000000000 --- a/podpac/datalib/test/test_weathercitizen.py +++ /dev/null @@ -1,21 +0,0 @@ -import warnings -import pytest -import podpac - - -@pytest.mark.integration -class TestWeatherCitizen(object): - data_key = "pressure" - uuid = "re5wm615" - - def test_eval_source_coordinates(self): - node = podpac.datalib.weathercitizen.WeatherCitizen(data_key=self.data_key, uuid=self.uuid) - with warnings.catch_warnings(): - warnings.filterwarnings("ignore", message="parsing timezone aware datetimes is deprecated") - o = node.eval(node.coordinates[:3]) - - def test_eval_interpolated(self): - node = podpac.datalib.weathercitizen.WeatherCitizen(data_key=self.data_key, uuid=self.uuid) - with warnings.catch_warnings(): - warnings.filterwarnings("ignore", message="parsing timezone aware datetimes is deprecated") - o = node.eval(podpac.Coordinates([0, 0], dims=["lat", "lon"])) diff --git a/podpac/datalib/weathercitizen.py b/podpac/datalib/weathercitizen.py deleted file mode 100644 index 391de7225..000000000 --- a/podpac/datalib/weathercitizen.py +++ /dev/null @@ -1,684 +0,0 @@ -""" -Weather Citizen - -Crowd sourced environmental observations from mobile devices (https://weathercitizen.org) - -- Documentation: https://weathercitizen.org/docs -- API: https://api.weathercitizen.org - -Requires - -- requests: `pip install requests` -- pandas: `pip install pandas` - -Optionally: - -- read_protobuf: `pip install read-protobuf` - decodes sensor burst media files -""" - -import json -from datetime import datetime, timedelta -import logging -from copy import deepcopy - -import traitlets as tl -import pandas as pd -import numpy as np -import requests - -from podpac.interpolators import InterpolationMixin -from podpac.core.data.datasource import DataSource, COMMON_DATA_DOC -from podpac.core.utils import common_doc, trait_is_defined -from podpac.core.coordinates import Coordinates, UniformCoordinates1d, ArrayCoordinates1d, StackedCoordinates - - -URL = "https://api.weathercitizen.org/" -DATE_FORMAT = "%Y-%m-%dT%H:%M:%S.%fZ" # always UTC (ISO 8601 / RFC 3339 format) - -# create log for module -_logger = logging.getLogger(__name__) - - -class WeatherCitizen(InterpolationMixin, DataSource): - """DataSource to handle WeatherCitizen data - - Attributes - ---------- - source : str - Collection (database) to pull data from. - Defaults to "geosensors" which is the primary data collection - data_key : str, int - Data key of interest, default "properties.pressure" - uuid : str, list(str), options - String or list of strings to filter data by uuid - device : str, list(str), ObjectId, list(ObjectId), optional - String or list of strings to filter data by device object id - version : string, list(str), optional - String or list of strings to filter data to filter data by WeatherCitizen version - query : dict, optional - Arbitrary pymongo query to apply to data. - Note that certain fields in this query may be overriden if other keyword arguments are specified - verbose : bool, optional - Display log messages or progress - """ - - source = tl.Unicode(allow_none=True, default_value="geosensors").tag(attr=True, required=True) - data_key = tl.Unicode(allow_none=True, default_value="properties.pressure").tag(attr=True) - uuid = tl.Unicode(allow_none=True, default_value=None).tag(attr=True) - device = tl.Unicode(allow_none=True, default_value=None).tag(attr=True) - version = tl.Unicode(allow_none=True, default_value=None).tag(attr=True) - query = tl.Unicode(allow_none=True, default_value=None).tag(attr=True) - verbose = tl.Bool(allow_none=True, default_value=True).tag(attr=True) - override_limit = tl.Bool(allow_none=True, default_value=False).tag(attr=True) - - @common_doc(COMMON_DATA_DOC) - def get_coordinates(self): - """{get_coordinates}""" - - # TODO: how to limit data retrieval for large queries? - - # query parameters - start_time = datetime(2016, 1, 1, 1, 0, 0) # before WeatherCitizen existed - projection = {"properties.time": 1, "geometry.coordinates": 1} - - # make sure data_key exists in dataset - key = "properties.%s" % self.data_key - query = {key: {"$exists": True}} - - # handle if the user specifies and query and the data_key is already in that query - if self.query is not None and self.data_key in self.query: - query = deepcopy(self.query) - query[key]["$exists"] = True - - # check the length of the matched items - length = get( - collection=self.source, - start_time=start_time, - uuid=self.uuid, - device=self.device, - version=self.version, - query=query, - projection=projection, - verbose=self.verbose, - return_length=True, - ) - - # add some kind of stop on querying above a certain length? - if length > 10000 and not self.override_limit: - raise ValueError( - "More than {} data points match this WeatherCitizen query. Please reduce the scope of your query.".format( - length - ) - ) - - items = get( - collection=self.source, - start_time=start_time, - uuid=self.uuid, - device=self.device, - version=self.version, - query=query, - projection=projection, - verbose=self.verbose, - ) - - lat = [item["geometry"]["coordinates"][1] for item in items] - lon = [item["geometry"]["coordinates"][0] for item in items] - time = [item["properties"]["time"] for item in items] - - return Coordinates([[lat, lon, time]], dims=["lat_lon_time"]) - - @common_doc(COMMON_DATA_DOC) - def get_data(self, coordinates, coordinates_index): - """{get_data}""" - - # TODO: how to limit data retrieval for large queries? - - # default coordinate bounds for queries - time_bounds = [datetime(2016, 1, 1, 1, 0, 0), None] # before WeatherCitizen existed - lat_bounds = [-90, 90] - lon_bounds = [-180, 180] - - # override bounds - if "time" in coordinates.udims: - time_bounds = coordinates["time"].bounds - if "lat" in coordinates.udims: - lat_bounds = coordinates["lat"].bounds - if "lon" in coordinates.udims: - lon_bounds = coordinates["lon"].bounds - - box = [[lon_bounds[0], lat_bounds[0]], [lon_bounds[1], lat_bounds[1]]] - - # make sure data_key exists in dataset - key = "properties.%s" % self.data_key - query = {key: {"$exists": True}} - - # handle if the user specifies and query and the data_key is already in that query - if self.query is not None and self.data_key in self.query: - query = deepcopy(self.query) - query[key]["$exists"] = True - - # only project data key - projection = {key: 1} - - # check the length of the matched items - length = get( - collection=self.source, - start_time=time_bounds[0], - end_time=time_bounds[1], - box=box, - uuid=self.uuid, - device=self.device, - version=self.version, - query=query, - projection=projection, - verbose=self.verbose, - return_length=True, - ) - - # add some kind of stop on querying above a certain length? - if length > 10000 and not self.override_limit: - raise ValueError( - "More than {} data points match this WeatherCitizen query. Please reduce the scope of your query.".format( - length - ) - ) - - items = get( - collection=self.source, - start_time=time_bounds[0], - end_time=time_bounds[1], - box=box, - uuid=self.uuid, - device=self.device, - version=self.version, - query=query, - projection=projection, - verbose=self.verbose, - ) - - data = np.array([item["properties"][self.data_key] for item in items]) - - return self.create_output_array(coordinates, data=data) - - -############## -# Standalone functions -############## -def get( - collection="geosensors", - start_time=None, - end_time=None, - box=None, - near=None, - uuid=None, - device=None, - version=None, - query=None, - projection=None, - verbose=False, - dry_run=False, - return_length=False, -): - """Get documents from the server for devices in a timerange - - Parameters - ---------- - collection : str, list(str) - Collection(s) to query - start_time : str, datetime, optional - String or datetime for start of timerange (>=). - Defaults to 1 hour ago. - This input must be compatible with pandas `pd.to_datetime(start_time, utc=True)` - Input assumes UTC by default, but will recognize timezone string EDT, UTC, etc. For example "2019-09-01 08:00 EDT" - end_time : str, datetime, optional - Same as `start_time` but specifies end of time range (<). - Defaults to now. - box : list(list(float)), optional - Geo bounding box described as 2-d array of bottom-left and top-right corners. - If specified, `near` will be ignored. - Contents: [[ , (bottom left coordinates) ], [ , (upper right coordinates) ]] - For example: [[-83, 36], [-81, 34]] - near : tuple([float, float], int), optional - Geo bounding box described as 2-d near with a center point and a radius (km) from center point. - This input will be ignored if box is defined. - Contents: ([, ], ) - For example: ([-72.544655, 40.932559], 16000) - uuid : str, list(str), options - String or list of strings to filter data by uuid - device : str, list(str), ObjectId, list(ObjectId), optional - String or list of strings to filter data by device object id - version : string, list(str), optional - String or list of strings to filter data to filter data by WeatherCitizen version - query : dict, optional - Arbitrary pymongo query to apply to data. - Note that certain fields in this query may be overriden if other keyword arguments are specified - projection: dict, optional - Specify what fields should or should not be returned. - Dict keys are field names. - Dict values should be set to 1 to include field (and exclude all others) or set to 0 to exclude field and include all others - verbose : bool, optional - Display log messages or progress - dry_run : bool, optional - Return urls of queries instead of the actual query. - Returns a list of str with urls for each collections. - Defaults to False. - return_length : bool, optional - Return length of the documents that match the query - - Returns - ------- - list - List of items from server matching query. - If `dry_run` is True, returns a list or url strings for query. - """ - - # always make collection a list - if isinstance(collection, str): - collection = [collection] - - # get query string for each collection in list - query_strs = [ - _build_query( - collection=coll, - start_time=start_time, - end_time=end_time, - box=box, - near=near, - uuid=uuid, - device=device, - version=version, - query=query, - projection=projection, - ) - for coll in collection - ] - - # dry run - if dry_run: - return query_strs - - if verbose: - print("Querying WeatherCitizen API") - - # only return the length of the matched documents - if return_length: - length = 0 - for query_str in query_strs: - length += _get(query_str, verbose=verbose, return_length=return_length) - - if verbose: - print("Returned {} records".format(length)) - - return length - - # start query at page 0 with no items - # iterate through collections aggregating items - items = [] - for query_str in query_strs: - items += _get(query_str, verbose=verbose) - - if verbose: - print("\r") - print("Downloaded {} records".format(len(items))) - - return items - - -def get_record(collection, obj_id, url=URL): - """Get a single record from a collection by obj_id - - Parameters - ---------- - collection : str - Collection name - obj_id : str - Object id - """ - - # check url - if url[-1] != "/": - url = "{}/".format(url) - - # query the server - r = requests.get(url + collection + "/" + obj_id) - - if r.status_code != 200: - raise ValueError("Failed to query the server with status {}.\n\nResponse:\n {}".format(r.status_code, r.text)) - - return r.json() - - -def get_file(media, save=False, output_path=None): - """Get media file - - Parameters - ---------- - media : str, dict - Media record or media record object id in the media or geomedia collections. - save : bool, optional - Save to file - output_path : None, optional - If save is True, output the file to different file path - - Returns - ------- - bytes - If output_path is None, returns raw file content as bytes - - Raises - ------ - ValueError - Description - """ - - if isinstance(media, str): - media_id = media - elif isinstance(media, dict): - media_id = media["_id"] - - try: - record = get_record("media", media_id) - except ValueError: - try: - record = get_record("geomedia", media_id) - - except ValueError: - raise ValueError("Media id {} not found in the database".format(media_id)) - - # get file - r = requests.get(record["file"]["url"]) - - if r.status_code != 200: - raise ValueError( - "Failed to download binary data with status code {}.\n\nResponse:\n {}".format(r.status_code, r.text) - ) - - # save to file if output_path is not None - if save: - if output_path is None: - output_path = record["properties"]["filename"] - with open(output_path, "wb") as f: - f.write(r.content) - else: - return r.content - - -def read_sensorburst(media): - """Download and read sensorburst records. - - Requires: - - read-protobuf: `pip install read-protobuf` - - sensorburst_pb2: Download from https://api.weathercitizen.org/static/sensorburst_pb2.py - - Once downloaded, put this file in the directory as your analysis - - Parameters - ---------- - media : str, dict, list of str, list of dict - Media record(s) or media record object id(s) in the media or geomedia collections. - - Returns - ------- - pd.DataFrame - Returns pandas dataframe of records - """ - - try: - from read_protobuf import read_protobuf - except ImportError: - raise ImportError( - "Reading sensorburst requires `read_protobuf` module. Install using `pip install read-protobuf`." - ) - - # import sensorburst definition - try: - from podpac.datalib import weathercitizen_sensorburst_pb2 as sensorburst_pb2 - except ImportError: - try: - import sensorburst_pb2 - except ImportError: - raise ImportError( - "Processing WeatherCitizen protobuf requires `sensorburst_pb2.py` in the current working directory. Download from https://api.weathercitizen.org/static/sensorburst_pb2.py." - ) - - if isinstance(media, (str, dict)): - media = [media] - - # get pb content - pbs = [get_file(m) for m in media] - - # initialize protobuf object - Burst = sensorburst_pb2.Burst() - - # get the first dataframe - df = read_protobuf(pbs[0], Burst) - - # append later dataframes - if len(pbs) > 1: - for pb in pbs[1:]: - df = df.append(read_protobuf(pb, Burst), sort=False) - - return df - - -def to_dataframe(items): - """Create normalized dataframe from records - - Parameters - ---------- - items : list of dict - Record items returned from `get()` - """ - df = pd.json_normalize(items) - - # Convert geometry.coordinates to lat and lon - df["lat"] = df["geometry.coordinates"].apply(lambda coord: coord[1] if coord and coord is not np.nan else None) - df["lon"] = df["geometry.coordinates"].apply(lambda coord: coord[0] if coord and coord is not np.nan else None) - df = df.drop(["geometry.coordinates"], axis=1) - - # break up all the arrays so the data is easier to use - arrays = [ - "properties.accelerometer", - "properties.gravity", - "properties.gyroscope", - "properties.linear_acceleration", - "properties.magnetic_field", - "properties.orientation", - "properties.rotation_vector", - ] - - for col in arrays: - df[col + "_0"] = df[col].apply(lambda val: val[0] if val and val is not np.nan else None) - df[col + "_1"] = df[col].apply(lambda val: val[1] if val and val is not np.nan else None) - df[col + "_2"] = df[col].apply(lambda val: val[2] if val and val is not np.nan else None) - - df = df.drop([col], axis=1) - - return df - - -def to_csv(items, filename="weathercitizen-data.csv"): - """Convert items to CSV output - - Parameters - ---------- - items : list of dict - Record items returned from `get()` - """ - - df = to_dataframe(items) - - df.to_csv(filename) - - -def update_progress(current, total): - """ - Parameters - ---------- - current : int, float - current number - total : int, floar - total number - """ - - if total == 0: - return - - progress = float(current / total) - bar_length = 20 - block = int(round(bar_length * progress)) - text = "Progress: |{0}| [{1} / {2}]".format("#" * block + " " * (bar_length - block), current, total) - - print("\r", text, end="") - - -def _build_query( - collection="geosensors", - start_time=None, - end_time=None, - box=None, - near=None, - uuid=None, - device=None, - version=None, - query=None, - projection=None, -): - """Build a query string for a single collection. - See :func:`get` for type definitions of each input - - Returns - ------- - string - query string - """ - - if query is None: - query = {} - - # filter by time - # default to 1 hour ago - one_hour_ago = (datetime.utcnow() - timedelta(hours=1)).strftime(DATE_FORMAT) - if start_time is not None: - start_time = pd.to_datetime(start_time, utc=True, infer_datetime_format=True).strftime(DATE_FORMAT) - query["properties.time"] = {"$gte": start_time} - else: - query["properties.time"] = {"$gte": one_hour_ago} - - # default to now - if end_time is not None: - end_time = pd.to_datetime(end_time, utc=True, infer_datetime_format=True).strftime(DATE_FORMAT) - query["properties.time"]["$lte"] = end_time - - # geo bounding box - if box is not None: - if len(box) != 2: - raise ValueError("box parameter must be a list of length 2") - - query["geometry"] = {"$geoWithin": {"$box": box}} - - # geo bounding circle - if near is not None: - if len(near) != 2 or not isinstance(near, tuple): - raise ValueError("near parameter must be a tuple of length 2") - - query["geometry"] = {"$near": {"$geometry": {"type": "Point", "coordinates": near[0]}, "$maxDistance": near[1]}} - - # specify uuid - if uuid is not None: - if isinstance(uuid, str): - query["properties.uuid"] = uuid - elif isinstance(uuid, list): - query["properties.uuid"] = {"$in": uuid} - - # specify device - if device is not None: - if isinstance(device, str): - query["properties.device"] = device - elif isinstance(device, list): - query["properties.device"] = {"$in": device} - - # specify version - if version is not None: - if isinstance(version, str): - query["version"] = version - elif isinstance(version, list): - query["version"] = {"$in": version} - - # add collection to query string and handle projection - if projection is not None: - query_str = "{}?where={}&projection={}".format(collection, json.dumps(query), json.dumps(projection)) - else: - query_str = "{}?where={}".format(collection, json.dumps(query)) - - return query_str - - -def _get(query, items=None, url=URL, verbose=False, return_length=False): - """Internal method to query API. - See `get` for interface. - - Parameters - ---------- - query : dict, str - query dict or string - if dict, it will be converted into a string with json.dumps() - items : list, optional - aggregated items as this method is recursively called. Defaults to []. - url : str, optional - API url. Defaults to module URL. - verbose : bool, optional - Display log messages or progress - return_length : bool, optional - Return length of the documents that match the query - - Returns - ------- - list - - Raises - ------ - ValueError - Description - """ - - # if items are none, set to [] - if items is None: - items = [] - - # check url - if url[-1] != "/": - url = "{}/".format(url) - - # query the server - r = requests.get(url + query) - - if r.status_code != 200: - raise ValueError("Failed to query the server with status {}.\n\nResponse:\n {}".format(r.status_code, r.text)) - - # get json out of response - resp = r.json() - - # return length only if requested - if return_length: - return resp["_meta"]["total"] - - # return documents - if len(resp["_items"]): - - # show progress - if verbose: - current_page = resp["_meta"]["page"] - total_pages = round(resp["_meta"]["total"] / resp["_meta"]["max_results"]) - update_progress(current_page, total_pages) - - # append items - items += resp["_items"] - - # get next set, if in links - if "_links" in resp and "next" in resp["_links"]: - return _get(resp["_links"]["next"]["href"], items=items) - else: - return items - else: - return items diff --git a/podpac/datalib/weathercitizen_sensorburst_pb2.py b/podpac/datalib/weathercitizen_sensorburst_pb2.py deleted file mode 100644 index 94d4f09fa..000000000 --- a/podpac/datalib/weathercitizen_sensorburst_pb2.py +++ /dev/null @@ -1,585 +0,0 @@ -# -*- coding: utf-8 -*- -# Generated by the protocol buffer compiler. DO NOT EDIT! -# source: sensorburst.proto - -import sys - -_b = sys.version_info[0] < 3 and (lambda x: x) or (lambda x: x.encode("latin1")) -from google.protobuf import descriptor as _descriptor -from google.protobuf import message as _message -from google.protobuf import reflection as _reflection -from google.protobuf import symbol_database as _symbol_database - -# @@protoc_insertion_point(imports) - -_sym_db = _symbol_database.Default() - - -DESCRIPTOR = _descriptor.FileDescriptor( - name="sensorburst.proto", - package="sensorburst", - syntax="proto3", - serialized_options=_b("H\003"), - serialized_pb=_b( - '\n\x11sensorburst.proto\x12\x0bsensorburst"\xbf\x04\n\x06Record\x12\x0c\n\x04time\x18\x01 \x01(\x03\x12\x0c\n\x04long\x18\x02 \x01(\x02\x12\x0b\n\x03lat\x18\x03 \x01(\x02\x12\x10\n\x08\x61ltitude\x18\x04 \x01(\x02\x12\x13\n\x0btemperature\x18\x05 \x01(\x02\x12\x10\n\x08pressure\x18\x06 \x01(\x02\x12\r\n\x05light\x18\x07 \x01(\x02\x12\x11\n\tproximity\x18\x08 \x01(\x05\x12\x17\n\x0f\x61\x63\x63\x65lerometer_x\x18\t \x01(\x02\x12\x17\n\x0f\x61\x63\x63\x65lerometer_y\x18\n \x01(\x02\x12\x17\n\x0f\x61\x63\x63\x65lerometer_z\x18\x0b \x01(\x02\x12\x1d\n\x15linear_acceleration_x\x18\x0c \x01(\x02\x12\x1d\n\x15linear_acceleration_y\x18\r \x01(\x02\x12\x1d\n\x15linear_acceleration_z\x18\x0e \x01(\x02\x12\x15\n\rorientation_x\x18\x0f \x01(\x02\x12\x15\n\rorientation_y\x18\x10 \x01(\x02\x12\x15\n\rorientation_z\x18\x11 \x01(\x02\x12\x18\n\x10magnetic_field_x\x18\x12 \x01(\x02\x12\x18\n\x10magnetic_field_y\x18\x13 \x01(\x02\x12\x18\n\x10magnetic_field_z\x18\x14 \x01(\x02\x12\x13\n\x0bgyroscope_x\x18\x15 \x01(\x02\x12\x13\n\x0bgyroscope_y\x18\x16 \x01(\x02\x12\x13\n\x0bgyroscope_z\x18\x17 \x01(\x02\x12\x11\n\tgravity_x\x18\x18 \x01(\x02\x12\x11\n\tgravity_y\x18\x19 \x01(\x02\x12\x11\n\tgravity_z\x18\x1a \x01(\x02"-\n\x05\x42urst\x12$\n\x07records\x18\x01 \x03(\x0b\x32\x13.sensorburst.RecordB\x02H\x03\x62\x06proto3' - ), -) - - -_RECORD = _descriptor.Descriptor( - name="Record", - full_name="sensorburst.Record", - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name="time", - full_name="sensorburst.Record.time", - index=0, - number=1, - type=3, - cpp_type=2, - label=1, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="long", - full_name="sensorburst.Record.long", - index=1, - number=2, - type=2, - cpp_type=6, - label=1, - has_default_value=False, - default_value=float(0), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="lat", - full_name="sensorburst.Record.lat", - index=2, - number=3, - type=2, - cpp_type=6, - label=1, - has_default_value=False, - default_value=float(0), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="altitude", - full_name="sensorburst.Record.altitude", - index=3, - number=4, - type=2, - cpp_type=6, - label=1, - has_default_value=False, - default_value=float(0), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="temperature", - full_name="sensorburst.Record.temperature", - index=4, - number=5, - type=2, - cpp_type=6, - label=1, - has_default_value=False, - default_value=float(0), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="pressure", - full_name="sensorburst.Record.pressure", - index=5, - number=6, - type=2, - cpp_type=6, - label=1, - has_default_value=False, - default_value=float(0), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="light", - full_name="sensorburst.Record.light", - index=6, - number=7, - type=2, - cpp_type=6, - label=1, - has_default_value=False, - default_value=float(0), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="proximity", - full_name="sensorburst.Record.proximity", - index=7, - number=8, - type=5, - cpp_type=1, - label=1, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="accelerometer_x", - full_name="sensorburst.Record.accelerometer_x", - index=8, - number=9, - type=2, - cpp_type=6, - label=1, - has_default_value=False, - default_value=float(0), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="accelerometer_y", - full_name="sensorburst.Record.accelerometer_y", - index=9, - number=10, - type=2, - cpp_type=6, - label=1, - has_default_value=False, - default_value=float(0), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="accelerometer_z", - full_name="sensorburst.Record.accelerometer_z", - index=10, - number=11, - type=2, - cpp_type=6, - label=1, - has_default_value=False, - default_value=float(0), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="linear_acceleration_x", - full_name="sensorburst.Record.linear_acceleration_x", - index=11, - number=12, - type=2, - cpp_type=6, - label=1, - has_default_value=False, - default_value=float(0), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="linear_acceleration_y", - full_name="sensorburst.Record.linear_acceleration_y", - index=12, - number=13, - type=2, - cpp_type=6, - label=1, - has_default_value=False, - default_value=float(0), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="linear_acceleration_z", - full_name="sensorburst.Record.linear_acceleration_z", - index=13, - number=14, - type=2, - cpp_type=6, - label=1, - has_default_value=False, - default_value=float(0), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="orientation_x", - full_name="sensorburst.Record.orientation_x", - index=14, - number=15, - type=2, - cpp_type=6, - label=1, - has_default_value=False, - default_value=float(0), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="orientation_y", - full_name="sensorburst.Record.orientation_y", - index=15, - number=16, - type=2, - cpp_type=6, - label=1, - has_default_value=False, - default_value=float(0), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="orientation_z", - full_name="sensorburst.Record.orientation_z", - index=16, - number=17, - type=2, - cpp_type=6, - label=1, - has_default_value=False, - default_value=float(0), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="magnetic_field_x", - full_name="sensorburst.Record.magnetic_field_x", - index=17, - number=18, - type=2, - cpp_type=6, - label=1, - has_default_value=False, - default_value=float(0), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="magnetic_field_y", - full_name="sensorburst.Record.magnetic_field_y", - index=18, - number=19, - type=2, - cpp_type=6, - label=1, - has_default_value=False, - default_value=float(0), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="magnetic_field_z", - full_name="sensorburst.Record.magnetic_field_z", - index=19, - number=20, - type=2, - cpp_type=6, - label=1, - has_default_value=False, - default_value=float(0), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="gyroscope_x", - full_name="sensorburst.Record.gyroscope_x", - index=20, - number=21, - type=2, - cpp_type=6, - label=1, - has_default_value=False, - default_value=float(0), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="gyroscope_y", - full_name="sensorburst.Record.gyroscope_y", - index=21, - number=22, - type=2, - cpp_type=6, - label=1, - has_default_value=False, - default_value=float(0), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="gyroscope_z", - full_name="sensorburst.Record.gyroscope_z", - index=22, - number=23, - type=2, - cpp_type=6, - label=1, - has_default_value=False, - default_value=float(0), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="gravity_x", - full_name="sensorburst.Record.gravity_x", - index=23, - number=24, - type=2, - cpp_type=6, - label=1, - has_default_value=False, - default_value=float(0), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="gravity_y", - full_name="sensorburst.Record.gravity_y", - index=24, - number=25, - type=2, - cpp_type=6, - label=1, - has_default_value=False, - default_value=float(0), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="gravity_z", - full_name="sensorburst.Record.gravity_z", - index=25, - number=26, - type=2, - cpp_type=6, - label=1, - has_default_value=False, - default_value=float(0), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=35, - serialized_end=610, -) - - -_BURST = _descriptor.Descriptor( - name="Burst", - full_name="sensorburst.Burst", - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name="records", - full_name="sensorburst.Burst.records", - index=0, - number=1, - type=11, - cpp_type=10, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ) - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=612, - serialized_end=657, -) - -_BURST.fields_by_name["records"].message_type = _RECORD -DESCRIPTOR.message_types_by_name["Record"] = _RECORD -DESCRIPTOR.message_types_by_name["Burst"] = _BURST -_sym_db.RegisterFileDescriptor(DESCRIPTOR) - -Record = _reflection.GeneratedProtocolMessageType( - "Record", - (_message.Message,), - { - "DESCRIPTOR": _RECORD, - "__module__": "sensorburst_pb2" - # @@protoc_insertion_point(class_scope:sensorburst.Record) - }, -) -_sym_db.RegisterMessage(Record) - -Burst = _reflection.GeneratedProtocolMessageType( - "Burst", - (_message.Message,), - { - "DESCRIPTOR": _BURST, - "__module__": "sensorburst_pb2" - # @@protoc_insertion_point(class_scope:sensorburst.Burst) - }, -) -_sym_db.RegisterMessage(Burst) - - -DESCRIPTOR._options = None -# @@protoc_insertion_point(module_scope) diff --git a/setup.py b/setup.py index ce510f010..0db210fa1 100644 --- a/setup.py +++ b/setup.py @@ -48,6 +48,7 @@ ], "aws": ["awscli>=1.16", "boto3>=1.9.200", "s3fs>=0.4"], "algorithms": ["numexpr>=2.6"], + "datalib": ["podpacdatalib"], "notebook": [ "jupyterlab", "ipyleaflet", @@ -60,9 +61,7 @@ "sat-search>=0.2", "sat-stac>=0.3", ], - "node_ui": [ - "numpydoc" - ], + "node_ui": ["numpydoc"], "dev": [ "pylint>=1.8.2", "pytest-cov>=2.5.1", @@ -79,7 +78,13 @@ if sys.version_info.major == 2: extras_require["dev"] += ["pytest>=3.3.2"] else: - extras_require["dev"] += ["sphinx>=2.3, <3.0", "sphinx-rtd-theme>=0.4", "sphinx-autobuild>=0.7", "pytest>=5.0", "numpydoc"] + extras_require["dev"] += [ + "sphinx>=2.3, <3.0", + "sphinx-rtd-theme>=0.4", + "sphinx-autobuild>=0.7", + "pytest>=5.0", + "numpydoc", + ] if sys.version >= "3.6": extras_require["dev"] += [