From ad17b83a3dda119ad32a59862066b059c0b2b3d2 Mon Sep 17 00:00:00 2001 From: Matthew Iannucci Date: Wed, 4 Dec 2024 22:34:42 -0500 Subject: [PATCH] Update notebook (#327) Co-authored-by: Aimee Barciauskas --- noaa-cdr-sst.ipynb | 1445 ++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 1403 insertions(+), 42 deletions(-) diff --git a/noaa-cdr-sst.ipynb b/noaa-cdr-sst.ipynb index af96ecc3..a72deb04 100644 --- a/noaa-cdr-sst.ipynb +++ b/noaa-cdr-sst.ipynb @@ -41,7 +41,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 1, "id": "2f69f0bb-316b-452c-b1ba-4d7ef4afcf67", "metadata": {}, "outputs": [], @@ -59,7 +59,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 2, "id": "1532c33b-804f-49fa-9fa9-0eb42ea87e26", "metadata": {}, "outputs": [], @@ -75,7 +75,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 3, "id": "06bbec92-3974-4859-8bda-353afc7800b9", "metadata": {}, "outputs": [], @@ -90,7 +90,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 4, "id": "77fb94c8-870f-4c9e-8421-ac9c17402122", "metadata": {}, "outputs": [], @@ -106,7 +106,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 5, "id": "abefd6fa-386a-4e07-a7c8-219d3730eeeb", "metadata": {}, "outputs": [], @@ -116,7 +116,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 6, "id": "79a4228a-0e17-4b07-9144-f24fe06db832", "metadata": {}, "outputs": [], @@ -129,7 +129,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 7, "id": "5fd0c082-8d5e-46a8-a994-fee80baa4ecc", "metadata": {}, "outputs": [], @@ -148,36 +148,33 @@ { "data": { "text/plain": [ - "" + "'JKZCDPMTJ3ETZFY2KXKG'" ] }, - "execution_count": 11, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "store" + "store.commit(\"first 2 days of 202408 data\")" ] }, { "cell_type": "code", "execution_count": 10, - "id": "55ebbc5f-add2-4de8-81f6-5aaf64d9e2b6", + "id": "9387e1ff-46c1-45fd-9796-0457538209a7", "metadata": {}, "outputs": [ { - "ename": "TypeError", - "evalue": "expected type IcechunkStore, but got type ", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[10], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mvirtual_ds\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mvirtualize\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mto_icechunk\u001b[49m\u001b[43m(\u001b[49m\u001b[43mstore\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/github/virtualizarr/virtualizarr/accessor.py:58\u001b[0m, in \u001b[0;36mVirtualiZarrDatasetAccessor.to_icechunk\u001b[0;34m(self, store, append_dim)\u001b[0m\n\u001b[1;32m 44\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 45\u001b[0m \u001b[38;5;124;03mWrite an xarray dataset to an Icechunk store.\u001b[39;00m\n\u001b[1;32m 46\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 54\u001b[0m \u001b[38;5;124;03mappend_dim: str, optional\u001b[39;00m\n\u001b[1;32m 55\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 56\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mvirtualizarr\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mwriters\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01micechunk\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m dataset_to_icechunk\n\u001b[0;32m---> 58\u001b[0m \u001b[43mdataset_to_icechunk\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mds\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstore\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mappend_dim\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mappend_dim\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/github/virtualizarr/virtualizarr/writers/icechunk.py:47\u001b[0m, in \u001b[0;36mdataset_to_icechunk\u001b[0;34m(ds, store, append_dim)\u001b[0m\n\u001b[1;32m 42\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mImportError\u001b[39;00m(\n\u001b[1;32m 43\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mThe \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124micechunk\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m and \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mzarr\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m version 3 libraries are required to use this function\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 44\u001b[0m )\n\u001b[1;32m 46\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(store, IcechunkStore):\n\u001b[0;32m---> 47\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mexpected type IcechunkStore, but got type \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mtype\u001b[39m(store)\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 49\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m store\u001b[38;5;241m.\u001b[39msupports_writes:\n\u001b[1;32m 50\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msupplied store does not support writes\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n", - "\u001b[0;31mTypeError\u001b[0m: expected type IcechunkStore, but got type " - ] + "data": { + "text/plain": [ + "b'x^cx\\xd3\\xe2\\x06\\x00\\x04\\x16\\x01\\xb7'" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ @@ -206,10 +203,457 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "id": "b6271bd1-bc0b-4901-9901-91aabe508cf7", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset> Size: 66MB\n",
+       "Dimensions:  (time: 2, lon: 1440, zlev: 1, lat: 720)\n",
+       "Coordinates:\n",
+       "  * time     (time) datetime64[ns] 16B 2024-08-01T12:00:00 2024-08-02T12:00:00\n",
+       "  * lon      (lon) float32 6kB 0.125 0.375 0.625 0.875 ... 359.4 359.6 359.9\n",
+       "  * zlev     (zlev) float32 4B 0.0\n",
+       "  * lat      (lat) float32 3kB -89.88 -89.62 -89.38 -89.12 ... 89.38 89.62 89.88\n",
+       "Data variables:\n",
+       "    ice      (time, zlev, lat, lon) float64 17MB ...\n",
+       "    sst      (time, zlev, lat, lon) float64 17MB ...\n",
+       "    anom     (time, zlev, lat, lon) float64 17MB ...\n",
+       "    err      (time, zlev, lat, lon) float64 17MB ...\n",
+       "Attributes: (12/37)\n",
+       "    Conventions:                CF-1.6, ACDD-1.3\n",
+       "    cdm_data_type:              Grid\n",
+       "    comment:                    Data was converted from NetCDF-3 to NetCDF-4 ...\n",
+       "    creator_email:              oisst-help@noaa.gov\n",
+       "    creator_url:                https://www.ncei.noaa.gov/\n",
+       "    date_created:               2024-08-16T09:12:00Z\n",
+       "    ...                         ...\n",
+       "    source:                     ICOADS, NCEP_GTS, GSFC_ICE, NCEP_ICE, Pathfin...\n",
+       "    standard_name_vocabulary:   CF Standard Name Table (v40, 25 January 2017)\n",
+       "    summary:                    NOAAs 1/4-degree Daily Optimum Interpolation ...\n",
+       "    time_coverage_end:          2024-08-01T23:59:59Z\n",
+       "    time_coverage_start:        2024-08-01T00:00:00Z\n",
+       "    title:                      NOAA/NCEI 1/4 Degree Daily Optimum Interpolat...
" + ], + "text/plain": [ + " Size: 66MB\n", + "Dimensions: (time: 2, lon: 1440, zlev: 1, lat: 720)\n", + "Coordinates:\n", + " * time (time) datetime64[ns] 16B 2024-08-01T12:00:00 2024-08-02T12:00:00\n", + " * lon (lon) float32 6kB 0.125 0.375 0.625 0.875 ... 359.4 359.6 359.9\n", + " * zlev (zlev) float32 4B 0.0\n", + " * lat (lat) float32 3kB -89.88 -89.62 -89.38 -89.12 ... 89.38 89.62 89.88\n", + "Data variables:\n", + " ice (time, zlev, lat, lon) float64 17MB ...\n", + " sst (time, zlev, lat, lon) float64 17MB ...\n", + " anom (time, zlev, lat, lon) float64 17MB ...\n", + " err (time, zlev, lat, lon) float64 17MB ...\n", + "Attributes: (12/37)\n", + " Conventions: CF-1.6, ACDD-1.3\n", + " cdm_data_type: Grid\n", + " comment: Data was converted from NetCDF-3 to NetCDF-4 ...\n", + " creator_email: oisst-help@noaa.gov\n", + " creator_url: https://www.ncei.noaa.gov/\n", + " date_created: 2024-08-16T09:12:00Z\n", + " ... ...\n", + " source: ICOADS, NCEP_GTS, GSFC_ICE, NCEP_ICE, Pathfin...\n", + " standard_name_vocabulary: CF Standard Name Table (v40, 25 January 2017)\n", + " summary: NOAAs 1/4-degree Daily Optimum Interpolation ...\n", + " time_coverage_end: 2024-08-01T23:59:59Z\n", + " time_coverage_start: 2024-08-01T00:00:00Z\n", + " title: NOAA/NCEI 1/4 Degree Daily Optimum Interpolat..." + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "ds = xr.open_zarr(store, consolidated=False, zarr_format=3)\n", "ds" @@ -225,7 +669,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "id": "190c25f9-e000-4b17-83eb-cf551141dfea", "metadata": {}, "outputs": [], @@ -240,7 +684,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "id": "af330082-207a-4f08-aefe-fc15aa8b2eb3", "metadata": {}, "outputs": [], @@ -256,17 +700,451 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, "id": "103b44d2-124a-4de5-8074-e997fd5a1698", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset> Size: 17MB\n",
+       "Dimensions:  (time: 2, zlev: 1, lat: 720, lon: 1440)\n",
+       "Coordinates:\n",
+       "    lat      (lat) float32 3kB ManifestArray<shape=(720,), dtype=float32, chu...\n",
+       "    lon      (lon) float32 6kB ManifestArray<shape=(1440,), dtype=float32, ch...\n",
+       "    time     (time) float32 8B ManifestArray<shape=(2,), dtype=float32, chunk...\n",
+       "    zlev     (zlev) float32 4B ManifestArray<shape=(1,), dtype=float32, chunk...\n",
+       "Data variables:\n",
+       "    anom     (time, zlev, lat, lon) int16 4MB ManifestArray<shape=(2, 1, 720,...\n",
+       "    ice      (time, zlev, lat, lon) int16 4MB ManifestArray<shape=(2, 1, 720,...\n",
+       "    sst      (time, zlev, lat, lon) int16 4MB ManifestArray<shape=(2, 1, 720,...\n",
+       "    err      (time, zlev, lat, lon) int16 4MB ManifestArray<shape=(2, 1, 720,...\n",
+       "Attributes: (12/37)\n",
+       "    Conventions:                CF-1.6, ACDD-1.3\n",
+       "    title:                      NOAA/NCEI 1/4 Degree Daily Optimum Interpolat...\n",
+       "    references:                 Reynolds, et al.(2007) Daily High-Resolution-...\n",
+       "    source:                     ICOADS, NCEP_GTS, GSFC_ICE, NCEP_ICE, Pathfin...\n",
+       "    id:                         oisst-avhrr-v02r01.20240803.nc\n",
+       "    naming_authority:           gov.noaa.ncei\n",
+       "    ...                         ...\n",
+       "    time_coverage_start:        2024-08-03T00:00:00Z\n",
+       "    time_coverage_end:          2024-08-03T23:59:59Z\n",
+       "    metadata_link:              https://doi.org/10.25921/RE9P-PT57\n",
+       "    ncei_template_version:      NCEI_NetCDF_Grid_Template_v2.0\n",
+       "    comment:                    Data was converted from NetCDF-3 to NetCDF-4 ...\n",
+       "    sensor:                     Thermometer, AVHRR
" + ], + "text/plain": [ + " Size: 17MB\n", + "Dimensions: (time: 2, zlev: 1, lat: 720, lon: 1440)\n", + "Coordinates:\n", + " lat (lat) float32 3kB ManifestArray\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset> Size: 133MB\n",
+       "Dimensions:  (lat: 720, time: 4, zlev: 1, lon: 1440)\n",
+       "Coordinates:\n",
+       "  * lat      (lat) float32 3kB -89.88 -89.62 -89.38 -89.12 ... 89.38 89.62 89.88\n",
+       "  * lon      (lon) float32 6kB 0.125 0.375 0.625 0.875 ... 359.4 359.6 359.9\n",
+       "  * zlev     (zlev) float32 4B 0.0\n",
+       "  * time     (time) datetime64[ns] 32B 2024-08-01T12:00:00 ... 2024-08-04T12:...\n",
+       "Data variables:\n",
+       "    ice      (time, zlev, lat, lon) float64 33MB ...\n",
+       "    anom     (time, zlev, lat, lon) float64 33MB ...\n",
+       "    err      (time, zlev, lat, lon) float64 33MB ...\n",
+       "    sst      (time, zlev, lat, lon) float64 33MB ...\n",
+       "Attributes: (12/37)\n",
+       "    Conventions:                CF-1.6, ACDD-1.3\n",
+       "    cdm_data_type:              Grid\n",
+       "    comment:                    Data was converted from NetCDF-3 to NetCDF-4 ...\n",
+       "    creator_email:              oisst-help@noaa.gov\n",
+       "    creator_url:                https://www.ncei.noaa.gov/\n",
+       "    date_created:               2024-08-18T09:12:00Z\n",
+       "    ...                         ...\n",
+       "    source:                     ICOADS, NCEP_GTS, GSFC_ICE, NCEP_ICE, Pathfin...\n",
+       "    standard_name_vocabulary:   CF Standard Name Table (v40, 25 January 2017)\n",
+       "    summary:                    NOAAs 1/4-degree Daily Optimum Interpolation ...\n",
+       "    time_coverage_end:          2024-08-03T23:59:59Z\n",
+       "    time_coverage_start:        2024-08-03T00:00:00Z\n",
+       "    title:                      NOAA/NCEI 1/4 Degree Daily Optimum Interpolat...
" + ], + "text/plain": [ + " Size: 133MB\n", + "Dimensions: (lat: 720, time: 4, zlev: 1, lon: 1440)\n", + "Coordinates:\n", + " * lat (lat) float32 3kB -89.88 -89.62 -89.38 -89.12 ... 89.38 89.62 89.88\n", + " * lon (lon) float32 6kB 0.125 0.375 0.625 0.875 ... 359.4 359.6 359.9\n", + " * zlev (zlev) float32 4B 0.0\n", + " * time (time) datetime64[ns] 32B 2024-08-01T12:00:00 ... 2024-08-04T12:...\n", + "Data variables:\n", + " ice (time, zlev, lat, lon) float64 33MB ...\n", + " anom (time, zlev, lat, lon) float64 33MB ...\n", + " err (time, zlev, lat, lon) float64 33MB ...\n", + " sst (time, zlev, lat, lon) float64 33MB ...\n", + "Attributes: (12/37)\n", + " Conventions: CF-1.6, ACDD-1.3\n", + " cdm_data_type: Grid\n", + " comment: Data was converted from NetCDF-3 to NetCDF-4 ...\n", + " creator_email: oisst-help@noaa.gov\n", + " creator_url: https://www.ncei.noaa.gov/\n", + " date_created: 2024-08-18T09:12:00Z\n", + " ... ...\n", + " source: ICOADS, NCEP_GTS, GSFC_ICE, NCEP_ICE, Pathfin...\n", + " standard_name_vocabulary: CF Standard Name Table (v40, 25 January 2017)\n", + " summary: NOAAs 1/4-degree Daily Optimum Interpolation ...\n", + " time_coverage_end: 2024-08-03T23:59:59Z\n", + " time_coverage_start: 2024-08-03T00:00:00Z\n", + " title: NOAA/NCEI 1/4 Degree Daily Optimum Interpolat..." + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "ds = xr.open_zarr(read_store, consolidated=False, zarr_format=3)\n", "ds" @@ -350,9 +1711,9 @@ ], "metadata": { "kernelspec": { - "display_name": "virtualizarr", + "display_name": ".venv", "language": "python", - "name": "virtualizarr" + "name": "python3" }, "language_info": { "codemirror_mode": { @@ -364,7 +1725,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.10" + "version": "3.11.9" } }, "nbformat": 4,