diff --git a/MANIFEST.in b/MANIFEST.in index 7038e37a..074f775c 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -7,6 +7,7 @@ graft xpublish prune docs prune tests prune notebooks +prune examples prune *.egg-info global-exclude *.nc diff --git a/codecov.yml b/codecov.yml index f07c1c5b..94d671bb 100644 --- a/codecov.yml +++ b/codecov.yml @@ -21,3 +21,7 @@ coverage: default: threshold: 0% if_not_found: success + +ignore: + - "examples" + - "docs" diff --git a/examples/README.md b/examples/README.md new file mode 100644 index 00000000..65825d75 --- /dev/null +++ b/examples/README.md @@ -0,0 +1,63 @@ +# XPublish Exploring Setup + +## Using Mamba + +In many cases, conda times out while trying to create this environment. In that case, install [Micromamba](https://mamba.readthedocs.io/en/latest/micromamba-installation.html) first and then run these commands. + +```bash +micromamba env create -f environment.yaml +micromamba activate xpublish-exploring +``` + +## Using conda + +> Note: This method may time out + +Create the environment and activate: + +```bash +conda env create -f environment.yaml +conda activate xpublish-exploring +``` + +## Starting the Server + +Execute `demo.py` and you should see similar output indicating success: + +`Uvicorn running on http://0.0.0.0:9000 (Press CTRL+C to quit)` + +After seeing this message, you can then navigate to this address in your browser: + +http://localhost:9000/docs + +Here you should see a web page that allows you to inspect and test the available endpoints from Xpublish. + +# XPublish Tutorial + +## Exploring available datasets + +Navigate to http://localhost:9000/datasets to return a list of available datasets running on your xpublish host. These are defined in the `TutorialsDataset` plugin class in `demo.py`. + +Inspect one of those datasets by appending the dataset name to the URL. For example, http://localhost:9000/datasets/air_temperature/ + +This returns an xarray model view of that dataset. + +## Calculating the mean value + +XPublish's plugin system allows custom operations to be run against the data on the server. In the example `mean.py`, a custom plugin has been defined that allows the mean of a specific variable to be calculated. The power of this ecosystem is that any dataset in xpublish is interoperable with any sensible plugin. + +For the end-user, you can choose which variable to calculate using the URL syntax `/[dataset]/[variable]/mean`. For example, http://localhost:9000/datasets/air_temperature/air/mean + +## Working with Custom Bounding Boxes + +Another custom plugin is defined in `lme.py`. You can access this plugin at http://localhost:9000/lme/ + +The plugin defines several regions of interest (not actual LME's, but several areas of interest from GMRI). The key or `lmi_ID` references a bounding box which can be used to subset any of the datasets, assuming those overlap. + +This plugin will provide a subset of data based on the bounding box defined for each LME. The URL structure is `/[dataset]/lme/[lme_ID]` for example, http://localhost:9000/datasets/air_temperature/lme/EC/ + +> Note: This will currently return "NaN" when the regions don't overlap. This is something we can work on as part of this breakout session. + +## Combining the Plugins + +Plugins can be combined by adding their URLs together. For example, to find the mean of a specific region, you could access http://localhost:9000/datasets/air_temperature/lme/EC/air/mean adding the variable and mean calculation to the end. diff --git a/examples/demo.py b/examples/demo.py new file mode 100644 index 00000000..c834d605 --- /dev/null +++ b/examples/demo.py @@ -0,0 +1,57 @@ +import xarray as xr +from requests import HTTPError + +from xpublish import Plugin, Rest, hookimpl + + +class TutorialDataset(Plugin): + """Demonstrates how to create a plugin to load a dataset for demo purposes. + + This uses the default xarray tutorial datasets. + """ + + name: str = 'xarray-tutorial-datasets' + + @hookimpl + def get_datasets(self): + """Returns a list of available datasets. + + This function returns a list of the available datasets that can be loaded using the xarray.tutorial.file_formats module. + """ + return list(xr.tutorial.file_formats) + + @hookimpl + def get_dataset(self, dataset_id: str): + """Retrieves a dataset from the xarray tutorial dataset by the given dataset ID. + + Args: + dataset_id (str): The ID of the dataset to retrieve. + + Returns: + xarray.Dataset: The retrieved dataset, or None if the dataset could not be loaded. + """ + try: + ds = xr.tutorial.open_dataset(dataset_id) + if ds.cf.coords['longitude'].dims[0] == 'longitude': + ds = ds.assign_coords(longitude=(((ds.longitude + 180) % 360) - 180)).sortby( + 'longitude' + ) + # TODO: Yeah this should not be assumed... but for regular grids we will viz with rioxarray so for now we will assume + ds = ds.rio.write_crs(4326) + return ds + except HTTPError: + return None + + +rest = Rest({}) +rest.register_plugin(TutorialDataset()) + +### For this tutorial, you can uncomment the following lines to activate the other plugins: + +from lme import LmeSubsetPlugin +from mean import MeanPlugin + +rest.register_plugin(MeanPlugin()) +rest.register_plugin(LmeSubsetPlugin()) + +rest.serve() diff --git a/examples/environment.yaml b/examples/environment.yaml new file mode 100644 index 00000000..d5310f78 --- /dev/null +++ b/examples/environment.yaml @@ -0,0 +1,25 @@ +name: xpublish-exploring +channels: + - conda-forge +dependencies: + - python=3.11 + - xarray-datatree + - xpublish>=0.3.1 + - xpublish-edr + - xpublish-opendap + - dask + - distributed + - netcdf4 + - zarr + - s3fs + - fsspec + - cf_xarray + - kerchunk + - h5py + - intake-xarray + - h5netcdf + - pydap + - h5pyd + - regionmask + - ipykernel + - pooch diff --git a/examples/lme.py b/examples/lme.py new file mode 100644 index 00000000..21152874 --- /dev/null +++ b/examples/lme.py @@ -0,0 +1,107 @@ +from typing import Sequence + +from fastapi import APIRouter + +from xpublish import Dependencies, Plugin, hookimpl + +regions = { + 'GB': {'bbox': [-69.873, -65.918, 40.280, 42.204], 'name': 'Georges Bank'}, + 'GOM': {'bbox': [-70.975, -65.375, 40.375, 45.125], 'name': 'Gulf Of Maine'}, + 'MAB': { + 'bbox': [-77.036, -70.005, 35.389, 41.640], + 'name': 'MidAtlantic Bight', + }, + 'NESHELF': { + 'bbox': [-77.45, -66.35, 34.50, 44.50], + 'name': 'North East Shelf', + }, + 'SS': {'bbox': [-66.775, -65.566, 41.689, 45.011], 'name': 'Scotian Shelf'}, + 'EC': {'bbox': [-81.75, -65.375, 25.000, 45.125], 'name': 'East Coast'}, + 'NEC': {'bbox': [-81.45, -63.30, 28.70, 44.80], 'name': 'Northeast Coast'}, +} + +DEFAULT_TAGS = ['lme', 'large marine ecosystem', 'subset'] + + +class LmeSubsetPlugin(Plugin): + """The LmeSubsetPlugin class is a FastAPI plugin that provides an API for retrieving information about Large Marine Ecosystems (LMEs) and generating datasets for specific LME regions. + + The plugin defines two routers: + - The `app_router` provides a GET endpoint at `/lme` that returns a dictionary of LME names and their IDs. + - The `dataset_router` provides a GET endpoint at `/lme/{region_id}` that takes a dataset ID and a region ID, and returns a subset of the dataset for the specified region. + + The `get_region_dataset` function is used to generate the dataset subset by slicing the dataset along the latitude dimension based on the bounding box of the specified region. + """ + + name: str = 'lme-subset-plugin' + + app_router_prefix: str = '/lme' + app_router_tags: Sequence[str] = DEFAULT_TAGS + + dataset_router_prefix: str = '/lme' + dataset_router_tags: Sequence[str] = DEFAULT_TAGS + + @hookimpl + def app_router(self): + """Provides an API router for retrieving a list of LME regions. + + The `app_router` function returns an instance of `APIRouter` with the following configuration: + - Prefix: The value of `self.app_router_prefix` + - Tags: A list of values from `self.app_router_tags` + + The router includes a single GET endpoint at the root path ("/") that returns a dictionary mapping region keys to their names. + """ + router = APIRouter(prefix=self.app_router_prefix, tags=list(self.app_router_tags)) + + @router.get('/') + def get_lme_regions(): + return {key: value['name'] for key, value in regions.items()} + + return router + + @hookimpl + def dataset_router(self, deps: Dependencies): + """Defines a dataset router that allows accessing datasets for specific regions. + + The `dataset_router` function creates a FastAPI router that provides an endpoint for retrieving a dataset for a specific region. The region is identified by its `region_id`, and the dataset is identified by its `dataset_id`. + + The function uses the `Dependencies` object to access the dataset and perform the necessary slicing operations to extract the data for the specified region. The `get_region_dataset` function is defined within the `dataset_router` function and is responsible for the actual data retrieval and slicing. + + The router is then populated with the necessary routes and returned for inclusion in the main application. + """ + router = APIRouter(prefix=self.dataset_router_prefix, tags=list(self.dataset_router_tags)) + + def get_region_dataset(dataset_id: str, region_id: str): + region = regions[region_id] + bbox = region['bbox'] + + # lat_slice = slice(bbox[2], bbox[3]) + # air_temperature lats are descending + lat_slice = slice(bbox[3], bbox[2]) + # lon_slice = slice(bbox[0], bbox[1]) + + # print(lat_slice, lon_slice) + + dataset = deps.dataset(dataset_id) + + sliced = dataset.cf.sel(latitude=lat_slice) + + return sliced + + region_deps = Dependencies( + dataset_ids=deps.dataset_ids, + dataset=get_region_dataset, + cache=deps.cache, + plugins=deps.plugins, + plugin_manager=deps.plugin_manager, + ) + + all_plugins = list(deps.plugin_manager().get_plugins()) + this_plugin = [p for p in all_plugins if p.name == self.name] + + for new_router in deps.plugin_manager().subset_hook_caller( + 'dataset_router', remove_plugins=this_plugin + )(deps=region_deps): + router.include_router(new_router, prefix='/{region_id}') + + return router diff --git a/examples/mean.py b/examples/mean.py new file mode 100644 index 00000000..b5cb10b5 --- /dev/null +++ b/examples/mean.py @@ -0,0 +1,46 @@ +from typing import Sequence + +from fastapi import APIRouter, Depends, HTTPException + +from xpublish import Dependencies, Plugin, hookimpl + + +class MeanPlugin(Plugin): + """Provides a plugin that adds a dataset router for computing the mean of variables in a dataset. + + The `MeanPlugin` class defines the following: + - `name`: The name of the plugin, set to 'mean'. + - `dataset_router_prefix`: The prefix for the dataset router, set to an empty string. + - `dataset_router_tags`: The tags for the dataset router, set to ['mean']. + + The `dataset_router` method creates an APIRouter with the defined prefix and tags, and adds a GET endpoint for computing the mean of a variable in the dataset. If the variable is not found in the dataset, an HTTPException is raised with a 404 status code. + """ + + name: str = 'mean' + + dataset_router_prefix: str = '' + dataset_router_tags: Sequence[str] = ['mean'] + + @hookimpl + def dataset_router(self, deps: Dependencies): + """Provides a route to retrieve the mean value of a variable in a dataset. + + Args: + deps (Dependencies): The dependencies for plugin routers + """ + router = APIRouter(prefix=self.dataset_router_prefix, tags=list(self.dataset_router_tags)) + + @router.get('/{var_name}/mean') + def get_mean(var_name: str, dataset=Depends(deps.dataset)): + if var_name not in dataset.variables: + raise HTTPException( + status_code=404, + detail=f"Variable '{var_name}' not found in dataset", + ) + + mean = dataset[var_name].mean() + if mean.isnull(): + return 'NaN' + return float(mean) + + return router