diff --git a/figures/figure-1.png b/figures/figure-1.png
new file mode 100644
index 0000000..b3188af
Binary files /dev/null and b/figures/figure-1.png differ
diff --git a/figures/figure-2.png b/figures/figure-2.png
new file mode 100644
index 0000000..9ffd184
Binary files /dev/null and b/figures/figure-2.png differ
diff --git a/figures/figure-3.png b/figures/figure-3.png
new file mode 100644
index 0000000..860e66d
Binary files /dev/null and b/figures/figure-3.png differ
diff --git a/notebooks/.ipynb_checkpoints/portable-full-comparison-checkpoint.ipynb b/notebooks/.ipynb_checkpoints/portable-full-comparison-checkpoint.ipynb
new file mode 100644
index 0000000..cc56db2
--- /dev/null
+++ b/notebooks/.ipynb_checkpoints/portable-full-comparison-checkpoint.ipynb
@@ -0,0 +1,1313 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "6c9b37e2-2daa-4283-a228-ea581498de0c",
+ "metadata": {
+ "tags": [],
+ "user_expressions": []
+ },
+ "source": [
+ "## AB testing access time for ICESat-2 ATL03 HDF5 files in the cloud.\n",
+ "\n",
+ "This notebook requires that we have cloud optimized versions of an HDF5 file:\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "3b78fb94-10ae-48cb-8e30-521b2c8b7822",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "import xarray as xr\n",
+ "import h5py\n",
+ "import fsspec\n",
+ "import s3fs\n",
+ "import logging\n",
+ "import re\n",
+ "import time\n",
+ "import numpy as np\n",
+ "import zarr\n",
+ "import pandas as pd\n",
+ "import matplotlib.pyplot as plt\n",
+ "\n",
+ "\n",
+ "from h5coro import h5coro, s3driver, filedriver\n",
+ "driver = s3driver.S3Driver\n",
+ "\n",
+ "logger = logging.getLogger('fsspec')\n",
+ "logger.setLevel(logging.DEBUG)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "431d900d-0656-4b75-af6b-82f0f171d5f8",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "xarray v2024.6.0\n",
+ "h5py v3.11.0\n",
+ "fsspec v2024.6.0\n",
+ "h5coro v0.0.6\n",
+ "zarr v2.18.2\n"
+ ]
+ }
+ ],
+ "source": [
+ "for library in (xr, h5py, fsspec, h5coro, zarr):\n",
+ " print(f'{library.__name__} v{library.__version__}')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "7998cd99-6034-4a1b-9ae5-d651bc265bff",
+ "metadata": {
+ "tags": []
+ },
+ "source": [
+ "For listing available test files\n",
+ "\n",
+ "```bash\n",
+ "aws s3 ls s3://its-live-data/test-space/cloud-experiments/h5cloud/ --recursive\n",
+ "```"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "id": "9850faac-f534-4bc2-9214-c8dababe0f52",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "test_dict = {\n",
+ " \"1GB\": {\n",
+ " \"links\": {\n",
+ " \"original\": \"s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20230618223036_13681901_006_01.h5\",\n",
+ " \"original-kerchunk\": \"s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20230618223036_13681901_006_01.json\",\n",
+ " \"page-only-4mb\": \"s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20230618223036_13681901_006_01-page-only-4mb.h5\",\n",
+ " \"page-only-8mb\": \"s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20230618223036_13681901_006_01-page-only-4mb.h5\",\n",
+ " \"rechunked-4mb\": \"s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20230618223036_13681901_006_01_rechunked-100k-page-4mb.h5\",\n",
+ " \"rechunked-8mb\": \"s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20230618223036_13681901_006_01_rechunked-100k-page-8mb.h5\",\n",
+ " \"rechunked-8mb-kerchunk\": \"s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20230618223036_13681901_006_01_rechunked-100k-page-8mb.json\",\n",
+ " \n",
+ " },\n",
+ " \"group\": \"/gt1l/heights\",\n",
+ " \"variable\": \"h_ph\"\n",
+ " },\n",
+ " \"7GB\": {\n",
+ " \"links\": {\n",
+ " \"original\": \"s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20181120182818_08110112_006_02.h5\",\n",
+ " \"original-kerchunk\": \"s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20181120182818_08110112_006_02.json\",\n",
+ " \"page-only-4mb\": \"s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20181120182818_08110112_006_02-page-only-4mb.h5\",\n",
+ " \"page-only-8mb\": \"s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20181120182818_08110112_006_02-page-only-8mb.h5\",\n",
+ " \"rechunked-4mb\": \"s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20181120182818_08110112_006_02_rechunked-100k-page-4mb.h5\",\n",
+ " \"rechunked-8mb\": \"s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20181120182818_08110112_006_02_rechunked-100k-page-8mb.h5\",\n",
+ " \"rechunked-8mb-kerchunk\": \"s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20181120182818_08110112_006_02_rechunked-100k-page-8mb.json\",\n",
+ " },\n",
+ " \"group\": \"/gt1l/heights\",\n",
+ " \"variable\": \"h_ph\"\n",
+ " }\n",
+ "}\n",
+ "\n",
+ "# This will use the embedded credentials in the hub to access the s3://nasa-cryo-persistent bucket\n",
+ "fs = s3fs.S3FileSystem(anon=True)\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "4d166627-6144-40bf-884d-2188e5c764ba",
+ "metadata": {
+ "tags": [],
+ "user_expressions": []
+ },
+ "source": [
+ "## [h5coro](https://github.com/ICESat2-SlideRule/h5coro/)\n",
+ "\n",
+ "**h5coro** is optimized for reading HDF5 data in high-latency high-throughput environments. It accomplishes this through a few key design decisions:\n",
+ "* __All reads are concurrent.__ Each dataset and/or attribute read by **h5coro** is performed in its own thread.\n",
+ "* __Intelligent range gets__ are used to read as many dataset chunks as possible in each read operation. This drastically reduces the number of HTTP requests to S3 and means there is no longer a need to re-chunk the data (it actually works better on smaller chunk sizes due to the granularity of the request).\n",
+ "* __Block caching__ is used to minimize the number of GET requests made to S3. S3 has a large first-byte latency (we've measured it at ~60ms on our systems), which means there is a large penalty for each read operation performed. **h5coro** performs all reads to S3 as large block reads and then maintains data in a local cache for access to smaller amounts of data within those blocks.\n",
+ "* __The system is serverless__ and does not depend on any external services to read the data. This means it scales naturally as the user application scales, and it reduces overall system complexity.\n",
+ "* __No metadata repository is needed.__ The structure of the file are cached as they are read so that successive reads to other datasets in the same file will not have to re-read and re-build the directory structure of the file.\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 22,
+ "id": "efe41d4a-1947-438b-a3c3-7ab954d75e13",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Processing format: original, link: s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20230618223036_13681901_006_01.h5\n",
+ "Skipping s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20230618223036_13681901_006_01.json\n",
+ "Processing format: page-only-4mb, link: s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20230618223036_13681901_006_01-page-only-4mb.h5\n",
+ "Processing format: page-only-8mb, link: s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20230618223036_13681901_006_01-page-only-4mb.h5\n",
+ "Processing format: rechunked-4mb, link: s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20230618223036_13681901_006_01_rechunked-100k-page-4mb.h5\n",
+ "Processing format: rechunked-8mb, link: s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20230618223036_13681901_006_01_rechunked-100k-page-8mb.h5\n",
+ "Skipping s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20230618223036_13681901_006_01_rechunked-100k-page-8mb.json\n",
+ "Processing format: original, link: s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20181120182818_08110112_006_02.h5\n",
+ "Skipping s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20181120182818_08110112_006_02.json\n",
+ "Processing format: page-only-4mb, link: s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20181120182818_08110112_006_02-page-only-4mb.h5\n",
+ "Processing format: page-only-8mb, link: s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20181120182818_08110112_006_02-page-only-8mb.h5\n",
+ "Processing format: rechunked-4mb, link: s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20181120182818_08110112_006_02_rechunked-100k-page-4mb.h5\n",
+ "Processing format: rechunked-8mb, link: s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20181120182818_08110112_006_02_rechunked-100k-page-8mb.h5\n",
+ "Skipping s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20181120182818_08110112_006_02_rechunked-100k-page-8mb.json\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " tool | \n",
+ " dataset | \n",
+ " cloud-aware | \n",
+ " format | \n",
+ " file | \n",
+ " time | \n",
+ " shape | \n",
+ " mean | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " h5coro | \n",
+ " 1GB | \n",
+ " no | \n",
+ " original | \n",
+ " s3://its-live-data/test-space/cloud-experiment... | \n",
+ " 15.046684 | \n",
+ " (9720204,) | \n",
+ " 386.067383 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " h5coro | \n",
+ " 1GB | \n",
+ " no | \n",
+ " page-only-4mb | \n",
+ " s3://its-live-data/test-space/cloud-experiment... | \n",
+ " 19.098737 | \n",
+ " (9720204,) | \n",
+ " 386.067383 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " h5coro | \n",
+ " 1GB | \n",
+ " no | \n",
+ " page-only-8mb | \n",
+ " s3://its-live-data/test-space/cloud-experiment... | \n",
+ " 9.906620 | \n",
+ " (9720204,) | \n",
+ " 386.067383 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " h5coro | \n",
+ " 1GB | \n",
+ " no | \n",
+ " rechunked-4mb | \n",
+ " s3://its-live-data/test-space/cloud-experiment... | \n",
+ " 14.992573 | \n",
+ " (9720204,) | \n",
+ " 386.067383 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " h5coro | \n",
+ " 1GB | \n",
+ " no | \n",
+ " rechunked-8mb | \n",
+ " s3://its-live-data/test-space/cloud-experiment... | \n",
+ " 17.416383 | \n",
+ " (9720204,) | \n",
+ " 386.067383 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " h5coro | \n",
+ " 7GB | \n",
+ " no | \n",
+ " original | \n",
+ " s3://its-live-data/test-space/cloud-experiment... | \n",
+ " 53.325380 | \n",
+ " (46484912,) | \n",
+ " 1035.163086 | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " h5coro | \n",
+ " 7GB | \n",
+ " no | \n",
+ " page-only-4mb | \n",
+ " s3://its-live-data/test-space/cloud-experiment... | \n",
+ " 61.361221 | \n",
+ " (46484912,) | \n",
+ " 1035.163086 | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " h5coro | \n",
+ " 7GB | \n",
+ " no | \n",
+ " page-only-8mb | \n",
+ " s3://its-live-data/test-space/cloud-experiment... | \n",
+ " 53.559258 | \n",
+ " (46484912,) | \n",
+ " 1035.163086 | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " h5coro | \n",
+ " 7GB | \n",
+ " no | \n",
+ " rechunked-4mb | \n",
+ " s3://its-live-data/test-space/cloud-experiment... | \n",
+ " 52.564497 | \n",
+ " (46484912,) | \n",
+ " 1035.163086 | \n",
+ "
\n",
+ " \n",
+ " 9 | \n",
+ " h5coro | \n",
+ " 7GB | \n",
+ " no | \n",
+ " rechunked-8mb | \n",
+ " s3://its-live-data/test-space/cloud-experiment... | \n",
+ " 59.514650 | \n",
+ " (46484912,) | \n",
+ " 1035.163086 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " tool dataset cloud-aware format \\\n",
+ "0 h5coro 1GB no original \n",
+ "1 h5coro 1GB no page-only-4mb \n",
+ "2 h5coro 1GB no page-only-8mb \n",
+ "3 h5coro 1GB no rechunked-4mb \n",
+ "4 h5coro 1GB no rechunked-8mb \n",
+ "5 h5coro 7GB no original \n",
+ "6 h5coro 7GB no page-only-4mb \n",
+ "7 h5coro 7GB no page-only-8mb \n",
+ "8 h5coro 7GB no rechunked-4mb \n",
+ "9 h5coro 7GB no rechunked-8mb \n",
+ "\n",
+ " file time shape \\\n",
+ "0 s3://its-live-data/test-space/cloud-experiment... 15.046684 (9720204,) \n",
+ "1 s3://its-live-data/test-space/cloud-experiment... 19.098737 (9720204,) \n",
+ "2 s3://its-live-data/test-space/cloud-experiment... 9.906620 (9720204,) \n",
+ "3 s3://its-live-data/test-space/cloud-experiment... 14.992573 (9720204,) \n",
+ "4 s3://its-live-data/test-space/cloud-experiment... 17.416383 (9720204,) \n",
+ "5 s3://its-live-data/test-space/cloud-experiment... 53.325380 (46484912,) \n",
+ "6 s3://its-live-data/test-space/cloud-experiment... 61.361221 (46484912,) \n",
+ "7 s3://its-live-data/test-space/cloud-experiment... 53.559258 (46484912,) \n",
+ "8 s3://its-live-data/test-space/cloud-experiment... 52.564497 (46484912,) \n",
+ "9 s3://its-live-data/test-space/cloud-experiment... 59.514650 (46484912,) \n",
+ "\n",
+ " mean \n",
+ "0 386.067383 \n",
+ "1 386.067383 \n",
+ "2 386.067383 \n",
+ "3 386.067383 \n",
+ "4 386.067383 \n",
+ "5 1035.163086 \n",
+ "6 1035.163086 \n",
+ "7 1035.163086 \n",
+ "8 1035.163086 \n",
+ "9 1035.163086 "
+ ]
+ },
+ "execution_count": 22,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "h5coro_beanchmarks = []\n",
+ "\n",
+ "for key, dataset in test_dict.items():\n",
+ " for k, link in dataset[\"links\"].items():\n",
+ " if \"kerchunk\" in k or link.endswith(\".json\"):\n",
+ " print(f\"Skipping {link}\")\n",
+ " continue\n",
+ " print (f\"Processing format: {k}, link: {link}\")\n",
+ " group = dataset[\"group\"]\n",
+ " variable = dataset['variable'] \n",
+ " final_h5coro_array = []\n",
+ " start = time.time()\n",
+ " if link.startswith(\"s3://nasa-cryo-persistent/\"):\n",
+ " h5obj = h5coro.H5Coro(link.replace(\"s3://\", \"\"), s3driver.S3Driver)\n",
+ " else:\n",
+ " h5obj = h5coro.H5Coro(link.replace(\"s3://\", \"\"), s3driver.S3Driver, credentials={\"annon\": True})\n",
+ " ds = h5obj.readDatasets(datasets=[f'{group}/{variable}'], block=True)\n",
+ " data = ds[f'{group}/{variable}']\n",
+ " data_mean = np.mean(data)\n",
+ " elapsed = time.time() - start\n",
+ " \n",
+ " h5coro_beanchmarks.append({\"tool\": \"h5coro\",\n",
+ " \"dataset\": key,\n",
+ " \"cloud-aware\": \"no\",\n",
+ " \"format\": k,\n",
+ " \"file\": link,\n",
+ " \"time\": elapsed,\n",
+ " \"shape\": data.shape,\n",
+ " \"mean\": data_mean})\n",
+ "\n",
+ "df = pd.DataFrame.from_dict(h5coro_beanchmarks)\n",
+ "df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 23,
+ "id": "42821313-904d-4b1b-a139-9cc5b05021d1",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "image/png": "",
+ "text/plain": [
+ "