From 9a9f72b9c69bc653b5dcfc3622b251c63973dc3c Mon Sep 17 00:00:00 2001 From: Alex Merose Date: Sat, 23 Mar 2024 20:55:21 +0530 Subject: [PATCH] Simplest version of the SST Demo. --- demo/sst | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 demo/sst diff --git a/demo/sst b/demo/sst new file mode 100644 index 0000000..6e38b6e --- /dev/null +++ b/demo/sst @@ -0,0 +1,41 @@ +#!/usr/bin/env python3 +"""Demo of calculating global average sea surface temperature (SST) with SQL. + +Please run the following to access the ERA5 dataset: +``` +gcloud auth application-default login +``` +""" +import xarray as xr +import xarray_sql as qr + +# TODO(alxmrs): Add coiled or dask cluster code. + +era5_ds = xr.open_zarr( + 'gs://gcp-public-data-arco-era5/ar/' + '1959-2022-full_37-1h-0p25deg-chunk-1.zarr-v2', + chunks={'time': 240, 'level': 1} +) +print('dataset opened.') +# TODO(alxmrs): Slice to small time range based on script args. +era5_sst_ds = era5_ds[['sea_surface_temperature']].sel( + level=1000, # surface level only. +) + +# chunk sizes determined from VM memory limit of 16 GB. +c = qr.Context() +c.create_table('era5', era5_sst_ds, chunks=dict(time=24)) + +print('beginning query.') +df = c.sql(""" +SELECT + DATE("time") as date, + AVG("sea_surface_temperature") as daily_avg_sst +FROM + "era5" +GROUP BY + DATE("time") +""") + +# TODO(alxmrs): time slice should be in file name. +df.to_csv('global_avg_sst_*.cvs') \ No newline at end of file