From f9c57ba4a714c84522a8ff24b3fe5ec24c43689f Mon Sep 17 00:00:00 2001 From: Alex Merose Date: Sat, 23 Mar 2024 22:50:39 +0530 Subject: [PATCH] Added script arguments. --- demo/sst | 50 --------------------------------------- demo/sst.py | 63 ++++++++++++++++++++++++++++++++++++++++++++++++++ pyproject.toml | 3 +++ 3 files changed, 66 insertions(+), 50 deletions(-) delete mode 100644 demo/sst create mode 100755 demo/sst.py diff --git a/demo/sst b/demo/sst deleted file mode 100644 index 178bfa4..0000000 --- a/demo/sst +++ /dev/null @@ -1,50 +0,0 @@ -#!/usr/bin/env python3 -"""Demo of calculating global average sea surface temperature (SST) with SQL. - -Please run the following to access the ERA5 dataset: -``` -gcloud auth application-default login -``` -""" -import xarray as xr -import xarray_sql as qr - -from coiled import Cluster - -cluster = Cluster( - region='us-central1', - worker_memory='16 GiB', - spot_policy='spot_with_fallback', - arm=True, -) -client = cluster.get_client() -cluster.adapt(minimum=1, maximum=100) - -era5_ds = xr.open_zarr( - 'gs://gcp-public-data-arco-era5/ar/' - '1959-2022-full_37-1h-0p25deg-chunk-1.zarr-v2', - chunks={'time': 240, 'level': 1} -) -print('dataset opened.') -# TODO(alxmrs): Slice to small time range based on script args. -era5_sst_ds = era5_ds[['sea_surface_temperature']].sel( - level=1000, # surface level only. -) - -# chunk sizes determined from VM memory limit of 16 GiB. -c = qr.Context() -c.create_table('era5', era5_sst_ds, chunks=dict(time=24)) - -print('beginning query.') -df = c.sql(""" -SELECT - DATE("time") as date, - AVG("sea_surface_temperature") as daily_avg_sst -FROM - "era5" -GROUP BY - DATE("time") -""") - -# TODO(alxmrs): time slice should be in file name. -df.to_csv('global_avg_sst_*.cvs') \ No newline at end of file diff --git a/demo/sst.py b/demo/sst.py new file mode 100755 index 0000000..1482ec5 --- /dev/null +++ b/demo/sst.py @@ -0,0 +1,63 @@ +#!/usr/bin/env python3 +"""Demo of calculating global average sea surface temperature (SST) with SQL. + +Please run the following to set up cloud resources: +``` +gcloud auth application-default login +coiled setup +``` +""" +import argparse +import xarray as xr +import xarray_sql as qr + +parser = argparse.ArgumentParser() +parser.add_argument('--start', type=str, default='2020-01-01', help='start time ISO string') +parser.add_argument('--end', type=str, default='2020-01-02', help='end time ISO string') +parser.add_argument('--cluster', action='store_true', help='deploy on coiled cluster') + +args = parser.parse_args() + +if args.cluster: + from coiled import Cluster + + cluster = Cluster( + region='us-central1', + worker_memory='16 GiB', + spot_policy='spot_with_fallback', + arm=True, + ) + client = cluster.get_client() + cluster.adapt(minimum=1, maximum=100) +else: + from dask.distributed import LocalCluster + cluster = LocalCluster(processes=False) + client = cluster.get_client() + +era5_ds = xr.open_zarr( + 'gs://gcp-public-data-arco-era5/ar/' + '1959-2022-full_37-1h-0p25deg-chunk-1.zarr-v2', + chunks={'time': 240, 'level': 1} +) +print('dataset opened.') +era5_sst_ds = era5_ds[['sea_surface_temperature']].sel( + time=slice(args.start, args.end), + level=1000, # surface level only. +) + +c = qr.Context() +# chunk sizes determined from VM memory limit of 16 GiB. +c.create_table('era5', era5_sst_ds, chunks=dict(time=24)) + +print('beginning query.') +df = c.sql(""" +SELECT + DATE("time") as date, + AVG("sea_surface_temperature") as daily_avg_sst +FROM + "era5" +GROUP BY + DATE("time") +""") + +df.to_csv(f'global_avg_sst_{args.start}-{args.end}_*.cvs') diff --git a/pyproject.toml b/pyproject.toml index 2d6eeb9..2242507 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -41,6 +41,9 @@ dev = [ "pyink", "py-spy" ] +demo = [ + "coiled" +] [project.urls] Homepage = "https://github.com/alxmrs/xarray-sql"