diff --git a/notebooks/working_notebook.ipynb b/notebooks/working_notebook.ipynb new file mode 100644 index 0000000..e40434e --- /dev/null +++ b/notebooks/working_notebook.ipynb @@ -0,0 +1,1397 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### First implementation of RGDR's dbscan-based clustering & dimensionality reduction\n", + "This notebook outlines the current status of the RGDR implementation." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "First we will load in some example data, and resample them using the `AdventCalendar`" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import xarray as xr\n", + "import s2spy.time\n", + "import s2spy.rgdr\n", + "\n", + "file_path = '../tests/test_rgdr/test_data'\n", + "field = xr.open_dataset(f'{file_path}/sst_daily_1979-2018_5deg_Pacific_175_240E_25_50N.nc')\n", + "target = xr.open_dataset(f'{file_path}/tf5_nc5_dendo_80d77.nc')\n", + "\n", + "cal = s2spy.time.AdventCalendar((8, 31), freq = \"30d\")\n", + "field_resampled = cal.resample(field)\n", + "target_resampled = cal.resample(target)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The target timeseries comes from already pre-clustered land surface temperature data." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "target_timeseries = target_resampled.sel(cluster=3).ts" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The precursor field consists of sea surface temperature data, with latitude and longitude dimensions:" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "With `_rgdr.correlation` we can determine correlation coefficient and p-values" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "field_resampled['corr'], field_resampled['p_val'] = (\n", + " s2spy.rgdr._rgdr.correlation(field_resampled.sst,\n", + " target_timeseries.sel(i_interval=0),\n", + " corr_dim='anchor_year')\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
<xarray.Dataset>\n", + "Dimensions: (anchor_year: 39, i_interval: 12, latitude: 5, longitude: 13)\n", + "Coordinates:\n", + " index (anchor_year, i_interval) int64 0 1 2 3 4 ... 464 465 466 467\n", + " interval (anchor_year, i_interval) object (1980-08-01, 1980-08-31] .....\n", + " * latitude (latitude) float64 47.5 42.5 37.5 32.5 27.5\n", + " * longitude (longitude) float64 177.5 182.5 187.5 ... 227.5 232.5 237.5\n", + " * anchor_year (anchor_year) int64 1980 1981 1982 1983 ... 2015 2016 2017 2018\n", + " * i_interval (i_interval) int64 0 1 2 3 4 5 6 7 8 9 10 11\n", + " target (i_interval) bool True False False False ... False False False\n", + " tfreq int64 5\n", + " n_clusters int64 6\n", + " cluster int64 3\n", + "Data variables:\n", + " sst (anchor_year, i_interval, latitude, longitude) float64 284.2...\n", + " corr (i_interval, latitude, longitude) float64 -0.08036 ... -0.00...\n", + " p_val (i_interval, latitude, longitude) float64 0.6267 ... 0.9864