diff --git a/.gitignore b/.gitignore index c8c6efb..9b5e924 100644 --- a/.gitignore +++ b/.gitignore @@ -7,4 +7,5 @@ docs /notebooks/*files/ *.pyc __pycache__/ - +/site_libs/manuscript-notebook/ +.ipynb_checkpoints/ diff --git a/README.md b/README.md index 4ebd16d..3d86a4a 100644 --- a/README.md +++ b/README.md @@ -7,6 +7,8 @@ This repository contains use case gathering, benchmarking, and prototyping work related to cloud-optimization of ICESat-2 data, with the overall goal of better enabling cloud access patterns for the ICESat-2 community. The audience of this repository includes ICESat-2 data providers and tool and service developers with experience and interest in developing solutions to improve the performance of ICESat-2 data in the cloud. +[![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/nsidc/cloud-optimized-icesat2/main?labpath=notebooks%2F) + ## Level of Support diff --git a/_quarto.yml b/_quarto.yml index d4673a6..3c8a5a7 100644 --- a/_quarto.yml +++ b/_quarto.yml @@ -2,9 +2,7 @@ project: type: manuscript render: - paper.qmd - - notebooks/portable-full-comparison.ipynb - - notebooks/h5py.ipynb - - optimize.py + - notebooks/h5py-atl03.ipynb output-dir: docs manuscript: article: paper.qmd diff --git a/notebooks/environment.yml b/environment.yml similarity index 85% rename from notebooks/environment.yml rename to environment.yml index 85dc00a..55a4fb2 100644 --- a/notebooks/environment.yml +++ b/environment.yml @@ -3,14 +3,16 @@ channels: - conda-forge dependencies: - python=3.11 - - jupyterlab>3 + - jupyterlab>4 - fsspec>=2024.05 - s3fs + - numpy<2.0 - matplotlib-base - pandas - xarray - dask - distributed + - dask-labextension - geopandas - h5py>=3.10 - zarr diff --git a/figures/figure-4.png b/figures/figure-4.png new file mode 100644 index 0000000..f9c6a05 Binary files /dev/null and b/figures/figure-4.png differ diff --git a/figures/figure-5.png b/figures/figure-5.png index 860e66d..ee40319 100644 Binary files a/figures/figure-5.png and b/figures/figure-5.png differ diff --git a/notebooks/portable-full-comparison.ipynb b/notebooks/all-access-patterns.ipynb similarity index 66% rename from notebooks/portable-full-comparison.ipynb rename to notebooks/all-access-patterns.ipynb index 2ace505..84e5123 100644 --- a/notebooks/portable-full-comparison.ipynb +++ b/notebooks/all-access-patterns.ipynb @@ -10,12 +10,12 @@ "source": [ "## Testing access time for ICESat-2 ATL03 HDF5 files in the cloud.\n", "\n", - "This notebook requires that we have cloud optimized versions of an HDF5 file:\n" + "This notebook requires that we have cloud optimized versions of an HDF5 file.\n" ] }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 1, "id": "3b78fb94-10ae-48cb-8e30-521b2c8b7822", "metadata": { "tags": [] @@ -44,7 +44,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 60, "id": "431d900d-0656-4b75-af6b-82f0f171d5f8", "metadata": { "tags": [] @@ -83,7 +83,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 61, "id": "9850faac-f534-4bc2-9214-c8dababe0f52", "metadata": { "tags": [] @@ -91,7 +91,7 @@ "outputs": [], "source": [ "test_dict = {\n", - " \"1GB\": {\n", + " \"ATL03-1GB\": {\n", " \"links\": {\n", " \"original\": \"s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20230618223036_13681901_006_01.h5\",\n", " \"original-kerchunk\": \"s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20230618223036_13681901_006_01.json\",\n", @@ -102,20 +102,7 @@ " \"rechunked-8mb-kerchunk\": \"s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20230618223036_13681901_006_01_rechunked-100k-page-8mb.json\",\n", " \n", " },\n", - " \"group\": \"/gt1l/heights\",\n", - " \"variable\": \"h_ph\"\n", - " },\n", - " \"7GB\": {\n", - " \"links\": {\n", - " \"original\": \"s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20181120182818_08110112_006_02.h5\",\n", - " \"original-kerchunk\": \"s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20181120182818_08110112_006_02.json\",\n", - " \"page-only-4mb\": \"s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20181120182818_08110112_006_02-page-only-4mb.h5\",\n", - " \"page-only-8mb\": \"s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20181120182818_08110112_006_02-page-only-8mb.h5\",\n", - " \"rechunked-4mb\": \"s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20181120182818_08110112_006_02_rechunked-100k-page-4mb.h5\",\n", - " \"rechunked-8mb\": \"s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20181120182818_08110112_006_02_rechunked-100k-page-8mb.h5\",\n", - " \"rechunked-8mb-kerchunk\": \"s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20181120182818_08110112_006_02_rechunked-100k-page-8mb.json\",\n", - " },\n", - " \"group\": \"/gt1l/heights\",\n", + " \"group\": \"/gt1l/heights\", # For simplicity we only read one variable\n", " \"variable\": \"h_ph\"\n", " }\n", "}\n", @@ -144,7 +131,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 62, "id": "efe41d4a-1947-438b-a3c3-7ab954d75e13", "metadata": { "tags": [] @@ -205,13 +192,13 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 63, "id": "42821313-904d-4b1b-a139-9cc5b05021d1", "metadata": {}, "outputs": [ { "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAA0oAAAIhCAYAAABwnkrAAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8fJSN1AAAACXBIWXMAAA9hAAAPYQGoP6dpAABn+klEQVR4nO3deXRN1///8deVOTIQRJLKRMyzhtZQomquKlpKzepjLlWEUtLW0BpKVdH2o0RRfGqobxVV81iUqNY8ldbYIjEmJOf3R1fuzz1JSAg3kudjrbtW7j777PM+V+LmlX3OvhbDMAwBAAAAAKxy2bsAAAAAAMhqCEoAAAAAYEJQAgAAAAATghIAAAAAmBCUAAAAAMCEoAQAAAAAJgQlAAAAADAhKAEAAACACUEJAAAAAEwISgDSJSoqShaLRX///fd9+0ZERMhisaR4NGjQ4DFUah8Wi0VRUVF2OXZERIQiIiIe+3HPnDmjqKgoxcTEpNiW/P2SmR7FmOmR3tc3JCREL774Yqrbdu3aJYvFolmzZlnbZs2aZfPz4erqKj8/P9WuXVtjxozRhQsXUoyT/Bqk9pgyZYpNLan16d69e4bP/3FYsGCBSpcuLTc3N1ksllS/pwDgcXO0dwEAsqfChQtr7ty5Nm158uSxTzF4JM6cOaP33ntPISEhqlChgs22N954I9OD8aMYMyuYOXOmSpQoodu3b+vChQvavHmzPvroI40fP14LFizQCy+8kGKflStXytvb26YtNDTU5nn16tU1fvx4m7aCBQtm/gk8pIsXL6pdu3Zq0KCBpk6dKhcXFxUrVszeZQEAQQnAo+Hm5qZnn33WrjUkJibqzp07cnFxsWsdOVGhQoVUqFChLD9mVlCmTBmFh4dbn7do0UJvvfWWatSooebNm+vIkSMpAs7TTz+t/Pnz33PcPHny2P1n8F5u3rwpV1dXHT58WLdv31bbtm1Vq1atTBn7xo0bcnd3z5SxAORcXHoHIEPOnz+v1q1by9vbWwULFlTnzp0VGxv7wOP99ddf+s9//qPAwEA5OzsrICBAr7zyis6fP2/tc+rUKbVt21a+vr5ycXFRyZIlNWHCBCUlJVn7nDx5UhaLRWPHjtXIkSMVGhoqFxcXrVu3TpK0bNkyVa1aVe7u7vL09FTdunW1bdu2dNV45coVvf322ypcuLBcXFzk6+urRo0a6eDBg/fc77ffflPTpk2VN29eubq6qkKFCoqOjrbpk3z51cmTJ23a169fL4vFovXr11vbDMPQ2LFjFRwcLFdXV1WqVEkrVqxI1zlI0q1btzRkyBCFhobK2dlZTz31lHr16qUrV67Y9Eu+hGzJkiUqV66cXF1dVbhwYU2ePNmmvsqVK0uSOnXqZL20K/nyw9Quk0se9/vvv1fFihXl5uamkiVL6vvvv7e+FiVLllTu3LlVpUoV7dq1y2Z/85jmS9fuftx9qZxhGJo6daoqVKggNzc35c2bV6+88oqOHz9uM/7Dvr6ZKSgoSBMmTNDVq1f1+eefP9ZjJ3/vzZkzR/3795efn5/c3NxUq1Yt7dmzJ0X/Xbt26aWXXpKPj49cXV1VsWJFLVy40KZP8r/Vjz/+qM6dO6tAgQJyd3dX69atVaNGDUlSq1atUvzbpefnNvn7Yvfu3XrllVeUN29eFSlSRNLDf8/t2rVLr732mkJCQuTm5qaQkBC1bt1af/zxR6rnt27dOvXo0UP58+dXvnz51Lx5c505cybFazZv3jxVrVpVHh4e8vDwUIUKFTRjxgybPj/99JPq1KkjLy8vubu7q3r16lqzZs29/ukAZDKCEoAMadGihYoVK6ZFixZp8ODBmjdvnt56660U/Y4dOyYfHx85OjqqSJEiGjp0qG7evGnT56+//lLlypW1ZMkS9e/fXytWrNCkSZPk7e2ty5cvS/r3spxq1arpxx9/1AcffKBly5bphRde0IABA9S7d+8Ux508ebLWrl2r8ePHa8WKFSpRooTmzZunpk2bysvLS998841mzJihy5cvKyIiQps3b77n+V69elU1atTQ559/rk6dOun//u//NH36dBUrVkxnz55Nc79Dhw6pWrVq+v333zV58mQtXrxYpUqVUseOHTV27Nj0vNQpvPfee4qMjFTdunW1dOlS9ejRQ127dtWhQ4fuu69hGHr55Zc1fvx4tWvXTsuXL1f//v0VHR2t559/XvHx8Tb9Y2Ji1K9fP7311ltasmSJqlWrpr59+1ov5apUqZJmzpwpSRo2bJi2bdumbdu26Y033rhnHXv37tWQIUMUGRmpxYsXy9vbW82bN9eIESP03//+V6NHj9bcuXMVGxurF198McX3zN0aN25sPW7y4+OPP5YklS5d2tqvW7du6tevn1544QUtXbpUU6dO1e+//65q1arZBPKHeX3vfp3v3LmT4pGYmJjuMZI1atRIDg4O2rhxY4ptybOl9xp/48aN8vT0lJOTk0qVKqUJEyZkqI533nlHx48f13//+1/997//1ZkzZxQREWETMNetW6fq1avrypUrmj59ur777jtVqFBBrVq1srkfK1nnzp3l5OSkr7/+Wt9++61GjRqlzz77TJI0evRobdu2TVOnTpWkDP/cNm/eXGFhYfrf//6n6dOnW9sf5nvu5MmTKl68uCZNmqRVq1bpo48+0tmzZ1W5cuVU79d844035OTkpHnz5mns2LFav3692rZta9Nn+PDhev311xUQEKBZs2ZpyZIl6tChg034mjNnjurVqycvLy9FR0dr4cKF8vHxUf369QlLwONkAEA6jBgxwpBkjB071qa9Z8+ehqurq5GUlGRtGzp0qDF16lRj7dq1xvLly43evXsbjo6ORs2aNY3ExERrv86dOxtOTk7G/v370zzu4MGDDUnGzz//bNPeo0cPw2KxGIcOHTIMwzBOnDhhSDKKFCliJCQkWPslJiYaAQEBRtmyZW2OffXqVcPX19eoVq3aPc/7/fffNyQZq1evvmc/ScaIESOsz1977TXDxcXFOHXqlE2/hg0bGu7u7saVK1cMwzCMmTNnGpKMEydO2PRbt26dIclYt26dYRiGcfnyZcPV1dVo1qyZTb8tW7YYkoxatWrds76VK1em+u+3YMECQ5LxxRdfWNuCg4MNi8VixMTE2PStW7eu4eXlZVy/ft0wDMPYuXOnIcmYOXNmiuMlf7/cLTg42HBzczP+/PNPa1tMTIwhyfD397eOaxiGsXTpUkOSsWzZsnuOebeDBw8a+fLlM2rXrm3Ex8cbhmEY27ZtMyQZEyZMsOl7+vRpw83NzRg0aJBhGA//+iafn6R7Pu5+rZL/7Xfu3JnmmAULFjRKliyZ4jUwP5566imb/Xr27Gl89dVXxoYNG4ylS5car7/+uiHJaNu27X3PI/l7r1KlSjY/1ydPnjScnJyMN954w9pWokQJo2LFisbt27dtxnjxxRcNf39/689c8rm2b98+zeP973//s7Zl5Oc2+TUZPnx4irEf9nvO7M6dO8a1a9eM3LlzG5988om1Pfn8evbsadN/7NixhiTj7NmzhmEYxvHjxw0HBwfj9ddfT/MY169fN3x8fIwmTZrYtCcmJhrly5c3qlSpkua+ADIXM0oAMuSll16yeV6uXDndunXLZoWukSNHqkePHqpdu7YaNWqkTz/9VB9++KE2btyo7777ztpvxYoVql27tkqWLJnm8dauXatSpUqpSpUqNu0dO3aUYRhau3ZtivqcnJyszw8dOqQzZ86oXbt2ypXr//+X5+HhoRYtWmj79u26ceNGmsdfsWKFihUrluoN9feydu1a1alTR4GBgSnqvnHjRrov+0u2bds23bp1S6+//rpNe7Vq1RQcHJyuepKPf7dXX31VuXPnTvFX6tKlS6t8+fI2bW3atFFcXJx2796dodrvVqFCBT311FPW58n/9hERETb3lCS3my9xSsu5c+fUoEED+fv7a8mSJXJ2dpYkff/997JYLGrbtq3NDIyfn5/Kly9vvbTxYV/fZDVq1NDOnTtTPGbPnp3uMe5mGEaq7T/99JPN+D/88IPN9s8++0ydOnVSzZo11bRpU82ZM0e9e/fWnDlzUr18LjVt2rSxudQxODhY1apVs17OevToUR08eND6mt39+jZq1Ehnz55NMRvXokWLdB37QX5u0xr7Yb7nrl27psjISIWFhcnR0VGOjo7y8PDQ9evXdeDAgRTHSu3/x7vHXL16tRITE9WrV680z33r1q26dOmSOnToYPOaJiUlqUGDBtq5c6euX7+e5v4AMg+LOQDIkHz58tk8T14o4V6XSElS27ZtNWDAAG3fvl3NmjWT9O9ldfe7Of+ff/5RSEhIivaAgADr9rv5+/un2D+19uQxkpKSdPny5TRv/L548aKCgoLuWWNadad1zNTqTs94kuTn55diW2ptqe3v6OioAgUK2LRbLBb5+fmlqOdex8lo7Xfz8fGxeZ4caNJqv3Xr1n3HvHr1qho1aqTbt29rxYoVNqvBnT9/XoZhpLnaW+HChSU9/OubzNvb22Zhhodx/fp1/fPPPypbtmyKbeXLl7/vYg5mbdu21ZQpU7R9+3ZVrFjxvv3Tei327t0rSdbLFgcMGKABAwakOob58rTUfiZS8yA/t2mN/TDfc23atNGaNWv07rvvqnLlyvLy8pLFYlGjRo1S/T/vfv8/Xrx4UZLu+f9e8uv6yiuvpNnn0qVLyp07d5rbAWQOghKAx+ruvw4XKFBAf/755z3758uXL9V7gZJvkDb/smheQCD5F5e0xsiVK5fy5s2b5vHTU+PD1O3q6ipJKe4RMv+CmXwe586dSzHmuXPnUg2T5v3v3Lmjixcv2oQlwzB07tw568IMd4+Z2nHuriUruH37tlq0aKFjx45p06ZNKX4BzZ8/vywWizZt2pTq6ofJbQ/7+j4Ky5cvV2JiYqZ9Rlby7NTdP4P3ktZrkfxaJX8PDxkyRM2bN091jOLFi9s8T+/nYD3Iz21mf8ZWbGysvv/+e40YMUKDBw+2tsfHx+vSpUsPNGbyz96ff/6ZYrY5WfLr+umnn6a5amFWXOYdyI649A7AY5G82tvdb/wNGzbUunXr7nmzfJ06dbR///4Ul3vNnj1bFotFtWvXvudxixcvrqeeekrz5s2zuYzp+vXrWrRokXVFrbQ0bNhQhw8fTnGJ3/3UqVNHa9euTbHi1ezZs+Xu7m59HZJ/Af/1119t+i1btszm+bPPPitXV9cUn021devWdF2eVqdOHUn/3iR+t0WLFun69evW7cl+//1368xBsnnz5snT01OVKlWSlP7ZxEepS5cuWr9+vRYvXmy9zOluL774ogzD0F9//aXw8PAUj+TZmod9fTPbqVOnNGDAAHl7e6tbt26ZMmby5X/pXTL8m2++sfmZ+eOPP7R161ZrcCtevLiKFi2qvXv3pvrahoeHy9PT84Fqfdif28xgsVhkGEaKgP3f//73gRbnkKR69erJwcFB06ZNS7NP9erVlSdPHu3fvz/N1zV59gvAo8WMEoBMtWnTJo0aNUrNmjVT4cKFdevWLa1YsUJffPGFnn/+eTVp0sTa9/3339eKFStUs2ZNvfPOOypbtqyuXLmilStXqn///ipRooTeeustzZ49W40bN9b777+v4OBgLV++XFOnTlWPHj3u+8GUuXLl0tixY/X666/rxRdfVLdu3RQfH69x48bpypUr+vDDD++5f79+/bRgwQI1bdpUgwcPVpUqVXTz5k1t2LBBL774YppBbcSIEfr+++9Vu3ZtDR8+XD4+Ppo7d66WL1+usWPHWi8Pq1y5sooXL64BAwbozp07yps3r5YsWZJiVa+8efNqwIABGjlypN544w29+uqrOn36tKKiotJ1aVjdunVVv359RUZGKi4uTtWrV9evv/6qESNGqGLFimrXrp1N/4CAAL300kuKioqSv7+/5syZo9WrV+ujjz6y/oJapEgRubm5ae7cuSpZsqQ8PDwUEBBgvbzwURs3bpy+/vpr9enTR7lz59b27dut27y8vFSqVClVr15d//nPf9SpUyft2rVLNWvWVO7cuXX27Flt3rxZZcuWVY8ePR769X0Yv/32m/U+lAsXLmjTpk2aOXOmHBwctGTJkhSXS97PvHnztHjxYjVu3FjBwcG6cuWK/ve//2n+/Pnq2LFjinvP0nLhwgU1a9ZMXbt2VWxsrEaMGCFXV1cNGTLE2ufzzz9Xw4YNVb9+fXXs2FFPPfWULl26pAMHDmj37t363//+l6Hakz3sz21m8PLyUs2aNTVu3Djlz59fISEh2rBhg2bMmPHAH54dEhKid955Rx988IFu3rxp/aiF/fv36++//9Z7770nDw8Pffrpp+rQoYMuXbqkV155Rb6+vrp48aL27t2rixcv3jNoAchE9lpFAsCTJXllqYsXL9q0m1dtO3LkiNGoUSPjqaeeMlxcXAxXV1ejbNmyxqhRo4xbt26lGPf06dNG586dDT8/P8PJyckICAgwWrZsaZw/f97a548//jDatGlj5MuXz3BycjKKFy9ujBs3zmY1rORV78aNG5dq/UuXLjWeeeYZw9XV1cidO7dRp04dY8uWLek698uXLxt9+/Y1goKCDCcnJ8PX19do3LixcfDgQWsfmVa9MwzD2Ldvn9GkSRPD29vbcHZ2NsqXL5/qCnGHDx826tWrZ3h5eRkFChQw+vTpYyxfvtxm1TvDMIykpCRjzJgxRmBgoOHs7GyUK1fO+L//+z+jVq1a6VqV7ebNm0ZkZKQRHBxsODk5Gf7+/kaPHj2My5cv2/QLDg42GjdubHz77bdG6dKlDWdnZyMkJMT4+OOPU4z5zTffGCVKlDCcnJxsXoO0Vr1r3LhxijEkGb169bJpS+3f0zxmhw4d0lxdzvx6fPXVV8Yzzzxj5M6d23BzczOKFClitG/f3ti1a5e1z8O+vmmdn2GkvkJg8s9O8sPZ2dnw9fU1atWqZYwePdq4cOFCinHS+jm827Zt24w6depYf6bc3d2NypUrG1OnTrX5mUlL8ip0X3/9tfHmm28aBQoUMFxcXIznnnvO5vVKtnfvXqNly5aGr6+v4eTkZPj5+RnPP/+8MX369BTnmtoKf6mtepcsPT+393pNHvZ77s8//zRatGhh5M2b1/D09DQaNGhg/Pbbb0ZwcLDRoUOH+56fefXKZLNnzzYqV65suLq6Gh4eHkbFihVT/N+wYcMGo3HjxoaPj4/h5ORkPPXUU0bjxo1TfZ0APBoWw0hjSR0AQI4UEhKiMmXKWD+UEznL+vXrVbt2bf3vf/+754ICAJDdcY8SAAAAAJgQlAAAAADAhEvvAAAAAMCEGSUAAAAAMCEoAQAAAIAJQQkAAAAATLL9B84mJSXpzJkz8vT0lMVisXc5AAAAAOzEMAxdvXpVAQEBypXr3nNG2T4onTlzRoGBgfYuAwAAAEAWcfr0aRUqVOiefbJ9UPL09JT074vh5eVl52oAAAAA2EtcXJwCAwOtGeFesn1QSr7czsvLi6AEAAAAIF235LCYAwAAAACYEJQAAAAAwISgBAAAAAAm2f4epfQwDEN37txRYmKivUtBDuDg4CBHR0eWqwcAAMjCcnxQSkhI0NmzZ3Xjxg17l4IcxN3dXf7+/nJ2drZ3KQAAAEhFjg5KSUlJOnHihBwcHBQQECBnZ2f+yo9HyjAMJSQk6OLFizpx4oSKFi163w87AwAAwOOXo4NSQkKCkpKSFBgYKHd3d3uXgxzCzc1NTk5O+uOPP5SQkCBXV1d7lwQAAAAT/pQt8Rd9PHZ8zwEAAGRt/LYGAAAAACYEJQAAAAAwISgBAAAAgAlBKZvr2LGjLBZLisfRo0ftXZqNkydPymKxKCYmxt6lAAAAADl71bucokGDBpo5c6ZNW4ECBTI8TkJCAp/7AwAAgByBGaUcwMXFRX5+fjYPBwcHbdiwQVWqVJGLi4v8/f01ePBg3blzx7pfRESEevfurf79+yt//vyqW7eu1q9fL4vFolWrVqlixYpyc3PT888/rwsXLmjFihUqWbKkvLy81Lp1a5sP8V25cqVq1KihPHnyKF++fHrxxRd17Ngx6/bQ0FBJUsWKFWWxWBQREfHYXh8AAADAjKCUQ/31119q1KiRKleurL1792ratGmaMWOGRo4cadMvOjpajo6O2rJliz7//HNre1RUlKZMmaKtW7fq9OnTatmypSZNmqR58+Zp+fLlWr16tT799FNr/+vXr6t///7auXOn1qxZo1y5cqlZs2ZKSkqSJO3YsUOS9NNPP+ns2bNavHjxY3gVAAAAgNRx6V0O8P3338vDw8P6vGHDhipWrJgCAwM1ZcoUWSwWlShRQmfOnFFkZKSGDx9u/ZyfsLAwjR071rrvuXPnJEkjR45U9erVJUldunTRkCFDdOzYMRUuXFiS9Morr2jdunWKjIyUJLVo0cKmphkzZsjX11f79+9XmTJlrJcC5suXT35+fo/olQAAAADShxmlHKB27dqKiYmxPiZPnqwDBw6oatWqslgs1n7Vq1fXtWvX9Oeff1rbwsPDUx2zXLly1q8LFiwod3d3a0hKbrtw4YL1+bFjx9SmTRsVLlxYXl5e1kvtTp06lWnnCQAAAGQWZpRygNy5cyssLMymzTAMm5CU3CbJpj137typjunk5GT92mKx2DxPbku+rE6SmjRposDAQH355ZcKCAhQUlKSypQpo4SEhAc7KQAAAOARYkYphypVqpS2bt1qDUeStHXrVnl6euqpp57K1GP9888/OnDggIYNG6Y6deqoZMmSunz5sk2f5NX0EhMTM/XYAAAAwINgRimH6tmzpyZNmqQ+ffqod+/eOnTokEaMGKH+/ftb70/KLHnz5lW+fPn0xRdfyN/fX6dOndLgwYNt+vj6+srNzU0rV65UoUKF5OrqKm9v70ytAwAAZK4DJUrau4RUlTx4wN4lIBtgRimHeuqpp/TDDz9ox44dKl++vLp3764uXbpo2LBhmX6sXLlyaf78+frll19UpkwZvfXWWxo3bpxNH0dHR02ePFmff/65AgIC1LRp00yvAwAAAEgvi3H3tVfZUFxcnLy9vRUbGysvLy+bbbdu3dKJEycUGhoqV1dXO1WInIjvPQBAdsCMEp4098oGZswoAQAAAIAJQQkAAAAATAhKAAAAAGBCUAIAAAAAE5YHBwAAQLbyWfe19i4hTb2mP2/vEpBOzCgBAAAAgAlBCQAAAABMCEoAAAAAYEJQAgAAAAATFnNIQ8jg5Y/tWCc/bPxYjhMVFaWlS5cqJiYm3ftERESoQoUKmjRpkl3rAAAAAB4nglIOMmDAAPXp0ydD+yxevFhOTk6PqCIAAAAgayIo5QCGYSgxMVEeHh7y8PDI0L4+Pj6PqCoAAAAg6+IepSdUfHy83nzzTfn6+srV1VU1atTQzp07JUnr16+XxWLRqlWrFB4eLhcXF23atElRUVGqUKGCdYw7d+7ozTffVJ48eZQvXz5FRkaqQ4cOevnll619IiIi1K9fP+vzkJAQjR49Wp07d5anp6eCgoL0xRdf2NQWGRmpYsWKyd3dXYULF9a7776r27dvP8qXAwAAAMhUBKUn1KBBg7Ro0SJFR0dr9+7dCgsLU/369XXp0iWbPmPGjNGBAwdUrly5FGN89NFHmjt3rmbOnKktW7YoLi5OS5cuve+xJ0yYoPDwcO3Zs0c9e/ZUjx49dPDgQet2T09PzZo1S/v379cnn3yiL7/8UhMnTsyU8wYAAAAeB4LSE+j69euaNm2axo0bp4YNG6pUqVL68ssv5ebmphkzZlj7vf/++6pbt66KFCmifPnypRjn008/1ZAhQ9SsWTOVKFFCU6ZMUZ48ee57/EaNGqlnz54KCwtTZGSk8ufPr/Xr11u3Dxs2TNWqVVNISIiaNGmit99+WwsXLsyMUwcAAAAeC+5RegIdO3ZMt2/fVvXq1a1tTk5OqlKlig4cOKDKlStLksLDw9McIzY2VufPn1eVKlWsbQ4ODnr66aeVlJR0z+PfPTtlsVjk5+enCxcuWNu+/fZbTZo0SUePHtW1a9d0584deXl5Zfg8AQAAAHthRukJZBiGpH9Dirn97rbcuXPfd6zUxrgf8yp4FovFGq62b9+u1157TQ0bNtT333+vPXv2aOjQoUpISLjvuAAAAEBWQVB6AoWFhcnZ2VmbN2+2tt2+fVu7du1SyZIl0zWGt7e3ChYsqB07dljbEhMTtWfPnoeqbcuWLQoODtbQoUMVHh6uokWL6o8//nioMQEAAIDHjUvvnkC5c+dWjx49NHDgQPn4+CgoKEhjx47VjRs31KVLF+3duzdd4/Tp00djxoxRWFiYSpQooU8//VSXL19OMcuUEWFhYTp16pTmz5+vypUra/ny5VqyZMkDjwcAAADYA0EpDSc/bGzvEu7pww8/VFJSktq1a6erV68qPDxcq1atUt68edM9RmRkpM6dO6f27dvLwcFB//nPf1S/fn05ODg8cF1NmzbVW2+9pd69eys+Pl6NGzfWu+++q6ioqAceEwAAAHjcLEZ6bkp5gsXFxcnb21uxsbEpFhS4deuWTpw4odDQULm6utqpwqwjKSlJJUuWVMuWLfXBBx/Yu5xsje89AEB2cKBE+i75f9zWRnxm7xLS1Gv68/YuIUe7VzYwY0YpB/vjjz/0448/qlatWoqPj9eUKVN04sQJtWnTxt6lAQAAAHbFYg45WK5cuTRr1ixVrlxZ1atX1759+/TTTz+le0EIAAAAILtiRikHCwwM1JYtW+xdBgAAAJDlMKMEAAAAACYEJQAAAAAwISgBAAAAgIldg9K0adNUrlw5eXl5ycvLS1WrVtWKFSus2zt27CiLxWLzePbZZ+1YMQAAAICcwK6LORQqVEgffvihwsLCJEnR0dFq2rSp9uzZo9KlS0uSGjRooJkzZ1r3cXZ2tkutAAAAAHIOuwalJk2a2DwfNWqUpk2bpu3bt1uDkouLi/z8/OxRHgAAAIAcKsssD56YmKj//e9/un79uqpWrWptX79+vXx9fZUnTx7VqlVLo0aNkq+vb5rjxMfHKz4+3vo8Li7uwQqK8n6w/R7oWLGP71hPiKioKC1dulQxMTH2LiWFWbNmqV+/frpy5Yq9SwEAZJbH+b6fUfyeANiF3Rdz2Ldvnzw8POTi4qLu3btryZIlKlWqlCSpYcOGmjt3rtauXasJEyZo586dev75522CkNmYMWPk7e1tfQQGBj6uU0EWNmbMGFksFvXr18/epQAAAOAJYPcZpeLFiysmJkZXrlzRokWL1KFDB23YsEGlSpVSq1atrP3KlCmj8PBwBQcHa/ny5WrevHmq4w0ZMkT9+/e3Po+LiyMs5XA7d+7UF198oXLlytm7FAAAADwh7D6j5OzsrLCwMIWHh2vMmDEqX768Pvnkk1T7+vv7Kzg4WEeOHElzPBcXF+sqesmP7CgiIkK9e/dW7969lSdPHuXLl0/Dhg2TYRiSpDlz5ig8PFyenp7y8/NTmzZtdOHCBZsxli1bpqJFi8rNzU21a9dWdHS0LBaLzSVlW7duVc2aNeXm5qbAwEC9+eabun79+j1ri4+P15tvvilfX1+5urqqRo0a2rlzp3X7+vXrZbFYtGbNGoWHh8vd3V3VqlXToUOHUh1v48aNcnJy0rlz52za3377bdWsWfOetVy7dk2vv/66vvzyS+XNmzfF9pCQEI0cOVLt27eXh4eHgoOD9d133+nixYtq2rSpPDw8VLZsWe3atSvFvkuXLlWxYsXk6uqqunXr6vTp0/esBQAAAE8OuwclM8Mw0ry07p9//tHp06fl7+//mKvKmqKjo+Xo6Kiff/5ZkydP1sSJE/Xf//5XkpSQkKAPPvhAe/fu1dKlS3XixAl17NjRuu/Jkyf1yiuv6OWXX1ZMTIy6deumoUOH2oy/b98+1a9fX82bN9evv/6qBQsWaPPmzerdu/c96xo0aJAWLVqk6Oho7d69W2FhYapfv74uXbpk02/o0KGaMGGCdu3aJUdHR3Xu3DnV8WrWrKnChQvr66+/trbduXNHc+bMUadOne5ZS69evdS4cWO98MILafaZOHGiqlevrj179qhx48Zq166d2rdvr7Zt21rrb9++vTWEStKNGzc0atQoRUdHa8uWLYqLi9Nrr712z1oAAADw5LDrpXfvvPOOGjZsqMDAQF29elXz58/X+vXrtXLlSl27dk1RUVFq0aKF/P39dfLkSb3zzjvKnz+/mjVrZs+ys4zAwEBNnDhRFotFxYsX1759+zRx4kR17drVJnQULlxYkydPVpUqVXTt2jV5eHho+vTpKl68uMaNGyfp30sgf/vtN40aNcq637hx49SmTRvrfT1FixbV5MmTVatWLU2bNk2urq4parp+/bqmTZumWbNmqWHDhpKkL7/8UqtXr9aMGTM0cOBAa99Ro0apVq1akqTBgwercePGunXrVqrjdunSRTNnzrTuv3z5ct24cUMtW7ZM8/WZP3++du/ebTOblZpGjRqpW7dukqThw4dr2rRpqly5sl599VVJUmRkpKpWrarz589bV2C8ffu2pkyZomeeeUbSv6G1ZMmS2rFjh6pUqXLP4wEAACDrs+uM0vnz59WuXTsVL15cderU0c8//6yVK1eqbt26cnBw0L59+9S0aVMVK1ZMHTp0ULFixbRt2zZ5enras+ws49lnn5XFYrE+r1q1qo4cOaLExETt2bNHTZs2VXBwsDw9PRURESFJOnXqlCTp0KFDqly5ss145l/wf/nlF82aNUseHh7WR/369ZWUlKQTJ05o9OjRNttOnTqlY8eO6fbt26pevbp1HCcnJ1WpUkUHDhywGf/ue4aSZwnNlwcm69ixo44ePart27dLkr766iu1bNlSuXPn1qZNm2zqmDt3rk6fPq2+fftqzpw5qQavtOooWLCgJKls2bIp2u6uzdHRUeHh4dbnJUqUUJ48eVKcIwAAAJ5Mdp1RmjFjRprb3NzctGrVqsdYTfZx69Yt1atXT/Xq1dOcOXNUoEABnTp1SvXr11dCQoKkfy9xvDtkJbfdLSkpSd26ddObb76Z4hhBQUHq3r27zYxOQECALl++LEmpjm1uc3Jysn6dvC0pKSnVc/L19VWTJk00c+ZMFS5cWD/88IPWr18vSQoPD7dZRrxgwYJas2aNLly4oKefftranpiYqI0bN2rKlCmKj4+Xg4NDmnWkpzbz+aTVBgAAgCeP3Ve9w4NLnl25+3nRokV18OBB/f333/rwww+tK/6ZFyMoUaKEfvjhB5s2c59KlSrp999/V1hYWKrH9/HxkY+Pj01bWFiYnJ2dtXnzZrVp00bSv5ep7dq166GX5n7jjTf02muvqVChQipSpIh11srNzS1FjXXq1NG+ffts2jp16qQSJUooMjLSGpIe1J07d7Rr1y7rLNyhQ4d05coVlShR4qHGBQAAQNaQ5RZzQPqdPn1a/fv316FDh/TNN9/o008/Vd++fRUUFCRnZ2d9+umnOn78uJYtW6YPPvjAZt9u3brp4MGDioyM1OHDh7Vw4ULNmjVL0v+fFYmMjNS2bdvUq1cvxcTE6MiRI1q2bJn69OmTZk25c+dWjx49NHDgQK1cuVL79+9X165ddePGDXXp0uWhzrd+/fry9vbWyJEj77uIg6enp8qUKWPzyJ07t/Lly6cyZco8VB3SvzNOffr00c8//6zdu3erU6dOevbZZ7k/CQAAIJtgRiktT8CnYLdv3143b95UlSpV5ODgoD59+ug///mPLBaLZs2apXfeeUeTJ09WpUqVNH78eL300kvWfUNDQ/Xtt9/q7bff1ieffKKqVatq6NCh6tGjh1xcXCT9e+/Ohg0bNHToUD333HMyDENFihSx+Xyr1Hz44YdKSkpSu3btdPXqVYWHh2vVqlWpLs+dEbly5VLHjh01evRotW/f/qHGelju7u6KjIxUmzZt9Oeff6pGjRr66quv7FoTAAAAMo/FMN+Yks3ExcXJ29tbsbGxKT5T6datWzpx4oRCQ0Pve8N/VhMREaEKFSpo0qRJmTbmqFGjNH369Cz9eUBdu3bV+fPntWzZMnuX8lCe5O89AMiWorztXUHasvAfbw+UKGnvElK1NuIze5eQpl7Tn7d3CTnavbKBGTNKOdjUqVNVuXJl5cuXT1u2bNG4cePu+xlJ9hIbG6udO3dq7ty5+u677+xdDgAAALI5glIOduTIEY0cOVKXLl1SUFCQ3n77bQ0ZMsTeZaWqadOm2rFjh7p166a6devauxwAAABkcwSlJ1Ty0tgPY+LEiZo4ceLDF/MYZMb5AgAAAOnFqncAAAAAYEJQAgAAAAATghIAAAAAmBCUAAAAAMCEoAQAAAAAJgQlAAAAADBhefA0lI0u+9iOta/Dvsd2rCdFVFSUli5dqpiYGHuXksKsWbPUr18/Xblyxd6lAAAA4BFhRgnZ1p07dzRs2DCFhobKzc1NhQsX1vvvv6+kpCR7lwYAAIAsjhklZFsfffSRpk+frujoaJUuXVq7du1Sp06d5O3trb59+9q7PAAAAGRhzCg9oSIiItS7d2/17t1befLkUb58+TRs2DAZhiFJmjNnjsLDw+Xp6Sk/Pz+1adNGFy5csBlj2bJlKlq0qNzc3FS7dm1FR0fLYrHYXFK2detW1axZU25ubgoMDNSbb76p69ev37O2+Ph4vfnmm/L19ZWrq6tq1KihnTt3WrevX79eFotFa9asUXh4uNzd3VWtWjUdOnQo1fE2btwoJycnnTt3zqb97bffVs2aNdOsY9u2bWratKkaN26skJAQvfLKK6pXr5527dpl7RMSEqKRI0eqffv28vDwUHBwsL777jtdvHhRTZs2lYeHh8qWLWuzT7KlS5eqWLFicnV1Vd26dXX69Ol7vi4AAAB4chCUnmDR0dFydHTUzz//rMmTJ2vixIn673//K0lKSEjQBx98oL1792rp0qU6ceKEOnbsaN335MmTeuWVV/Tyyy8rJiZG3bp109ChQ23G37dvn+rXr6/mzZvr119/1YIFC7R582b17t37nnUNGjRIixYtUnR0tHbv3q2wsDDVr19fly5dsuk3dOhQTZgwQbt27ZKjo6M6d+6c6ng1a9ZU4cKF9fXXX1vb7ty5ozlz5qhTp05p1lGjRg2tWbNGhw8fliTt3btXmzdvVqNGjWz6TZw4UdWrV9eePXvUuHFjtWvXTu3bt1fbtm2t9bdv394aQiXpxo0bGjVqlKKjo7VlyxbFxcXptddeu+frAgAAgCcHl949wQIDAzVx4kRZLBYVL15c+/bt08SJE9W1a1eb0FG4cGFNnjxZVapU0bVr1+Th4aHp06erePHiGjdunCSpePHi+u233zRq1CjrfuPGjVObNm3Ur18/SVLRokU1efJk1apVS9OmTZOrq2uKmq5fv65p06Zp1qxZatiwoSTpyy+/1OrVqzVjxgwNHDjQ2nfUqFGqVauWJGnw4MFq3Lixbt26leq4Xbp00cyZM637L1++XDdu3FDLli3TfH0iIyMVGxurEiVKyMHBQYmJiRo1apRat25t069Ro0bq1q2bJGn48OGaNm2aKleurFdffdU6TtWqVXX+/Hn5+flJkm7fvq0pU6bomWeekfRvaC1ZsqR27NihKlWqpFkTAAAAngzMKD3Bnn32WVksFuvzqlWr6siRI0pMTNSePXvUtGlTBQcHy9PTUxEREZKkU6dOSZIOHTqkypUr24xn/gX/l19+0axZs+Th4WF91K9fX0lJSTpx4oRGjx5ts+3UqVM6duyYbt++rerVq1vHcXJyUpUqVXTgwAGb8cuVK2f92t/fX5JSXB6YrGPHjjp69Ki2b98uSfrqq6/UsmVL5c6dW5s2bbKpY+7cuZKkBQsWaM6cOZo3b552796t6OhojR8/XtHR0WnWUbBgQUlS2bJlU7TdXZujo6PCw8Otz0uUKKE8efKkOEcAAAA8mZhRyoZu3bqlevXqqV69epozZ44KFCigU6dOqX79+kpISJAkGYZhE7KS2+6WlJSkbt266c0330xxjKCgIHXv3t1mRicgIECXL1+WpFTHNrc5OTlZv07eltaKdL6+vmrSpIlmzpypwoUL64cfftD69eslSeHh4TbLiCcHm4EDB2rw4MHWS+LKli2rP/74Q2PGjFGHDh3uWUd6ajOfT1ptAAAAePIQlJ5gybMrdz8vWrSoDh48qL///lsffvihAgMDJSnFYgQlSpTQDz/8YNNm7lOpUiX9/vvvCgsLS/X4Pj4+8vHxsWkLCwuTs7OzNm/erDZt2kj69zK1Xbt2WS/he1BvvPGGXnvtNRUqVEhFihSxzlq5ubmlWuONGzeUK5ftpKmDg0OmLA9+584d7dq1yzoLd+jQIV25ckUlSpR46LEBAABgf1x69wQ7ffq0+vfvr0OHDumbb77Rp59+qr59+yooKEjOzs769NNPdfz4cS1btkwffPCBzb7dunXTwYMHFRkZqcOHD2vhwoWaNWuWpP8/KxIZGalt27apV69eiomJ0ZEjR7Rs2TL16dMnzZpy586tHj16aODAgVq5cqX279+vrl276saNG+rSpctDnW/9+vXl7e2tkSNH3nMRh2RNmjTRqFGjtHz5cp08eVJLlizRxx9/rGbNmj1UHdK/M059+vTRzz//rN27d6tTp0569tlnuT8JAAAgm2BGKQ37Ouyzdwn31b59e928eVNVqlSRg4OD+vTpo//85z+yWCyaNWuW3nnnHU2ePFmVKlXS+PHj9dJLL1n3DQ0N1bfffqu3335bn3zyiapWraqhQ4eqR48ecnFxkfTvvTsbNmzQ0KFD9dxzz8kwDBUpUkStWrW6Z10ffvihkpKS1K5dO129elXh4eFatWqV8ubN+1DnmytXLnXs2FGjR49W+/bt79v/008/1bvvvquePXvqwoULCggIULdu3TR8+PCHqkOS3N3dFRkZqTZt2ujPP/9UjRo19NVXXz30uAAAAMgaLIb5xpRsJi4uTt7e3oqNjZWXl5fNtlu3bunEiRMKDQ1NdaW1rCwiIkIVKlTQpEmTMm3MUaNGafr06Vn684C6du2q8+fPa9myZfYu5aE8yd97AJAtRXnbu4K0RcXau4I0HShR0t4lpGptxGf2LiFNvaY/b+8ScrR7ZQMzZpRysKlTp6py5crKly+ftmzZonHjxt33M5LsJTY2Vjt37tTcuXP13Xff2bscAAAem7LRZe/fyU4W2rsA4BEiKOVgR44c0ciRI3Xp0iUFBQXp7bff1pAhQ+xdVqqaNm2qHTt2qFu3bqpbt669ywEAAEA2R1B6QiUvjf0wJk6cqIkTJz58MY9BZpwvAAAAkF6segcAAAAAJgQlAAAAADAhKAEAAACACUEJAAAAAEwISgAAAABgQlACAAAAABOWB0/D4/yk6ZIHDzy2Y6Xl5MmTCg0N1Z49e1ShQoVHeqyQkBD169dP/fr1y7QxIyIiVKFCBU2aNCnTxkyvjh076sqVK1q6dOljPzYAAAAeDWaUkOMYhqGGDRvKYrEQbgAAAJAqglI2kZCQYO8SnhiTJk2SxWKxdxkAAADIwghKT6iIiAj17t1b/fv3V/78+VW3bl3t379fjRo1koeHhwoWLKh27drp77//tu6TlJSkjz76SGFhYXJxcVFQUJBGjRplM+7x48dVu3Ztubu7q3z58tq2bZt1W1RUVIrL8iZNmqSQkBDr844dO+rll1/W+PHj5e/vr3z58qlXr166fft2mucyc+ZMeXt7a/Xq1ZJ03/O4fv262rdvLw8PD/n7+2vChAnpft327t2rjz/+WF999VWKbSdPnpTFYtHChQv13HPPyc3NTZUrV9bhw4e1c+dOhYeHy8PDQw0aNNDFixdT7P/ee+/J19dXXl5e6tatG+EVAADgCUZQeoJFR0fL0dFRW7Zs0YcffqhatWqpQoUK2rVrl1auXKnz58+rZcuW1v5DhgzRRx99pHfffVf79+/XvHnzVLBgQZsxhw4dqgEDBigmJkbFihVT69atdefOnQzVtW7dOh07dkzr1q1TdHS0Zs2apVmzZqXad/z48RowYIBWrVqlunXr6uzZs/c9j4EDB2rdunVasmSJfvzxR61fv16//PLLfeu6ceOGWrdurSlTpsjPzy/NfiNGjNCwYcO0e/duOTo6qnXr1ho0aJA++eQTbdq0SceOHdPw4cNt9lmzZo0OHDigdevW6ZtvvtGSJUv03nvvpe8FAwAAQJbDYg5PsLCwMI0dO1aSNHz4cFWqVEmjR4+2bv/qq68UGBiow4cPy9/fX5988ommTJmiDh06SJKKFCmiGjVq2Iw5YMAANW7cWNK/MySlS5fW0aNHVaJEiXTXlTdvXk2ZMkUODg4qUaKEGjdurDVr1qhr1642/YYMGaLo6GitX79eZcuWlSRNmzbtnucREBCgGTNmaPbs2apbt66kfwNjoUKF7lvXW2+9pWrVqqlp06b37DdgwADVr19fktS3b1+1bt1aa9asUfXq1SVJXbp0SRH8nJ2d9dVXX8nd3V2lS5fW+++/r4EDB+qDDz5Qrlz8PQIAAOBJQ1B6goWHh1u//uWXX7Ru3Tp5eHik6Hfs2DFduXJF8fHxqlOnzj3HLFeunPVrf39/SdKFCxcyFJRKly4tBwcHm3H27dtn02fChAm6fv26du3apcKFC6f7PG7evKmEhARVrVrV2u7j46PixYtbn48ePdomaO3fv18xMTFau3at9uzZc9/6734NkmfckoNcctuFCxds9ilfvrzc3d2tz6tWrapr167p9OnTCg4Ovu8xAQAAkLXwp+4nWO7cua1fJyUlqUmTJoqJibF5HDlyRDVr1pSbm1u6xnRycrJ+nbzgQVJSkiQpV65cMgzDpn9q9x7dPUbyOMljJHvuueeUmJiohQsX2rTf7zzMx09N9+7dbfYNCAjQ2rVrdezYMeXJk0eOjo5ydPz3bwQtWrRQRETEfV8Dc5v5fNLCohEAAABPJmaUsolKlSpp0aJFCgkJsYaAuxUtWlRubm5as2aN3njjjQc6RoECBXTu3DkZhmENADExMQ80VpUqVdSnTx/Vr19fDg4OGjhwYLrOIywsTE5OTtq+fbuCgoIkSZcvX9bhw4dVq1YtSf/OMPn4+NjsN3jw4BTnXbZsWU2cOFFNmjR5oHO42969e3Xz5k1rIN2+fbs8PDzSdUkgAAAAsh5mlLKJXr166dKlS2rdurV27Nih48eP68cff1Tnzp2VmJgoV1dXRUZGatCgQZo9e7aOHTum7du3a8aMGek+RkREhC5evKixY8fq2LFj+uyzz7RixYoHrrlq1apasWKF3n//fU2cODFd5+Hh4aEuXbpo4MCBWrNmjX777Td17NjxvvcB+fn5qUyZMjYPSQoKClJoaOgDn0OyhIQEdenSRfv379eKFSs0YsQI9e7dm/uTAAAAnlDMKKWh5MED9i4hQwICArRlyxZFRkaqfv36io+PV3BwsBo0aGD9Zf3dd9+Vo6Ojhg8frjNnzsjf31/du3dP9zFKliypqVOnavTo0frggw/UokULDRgwQF988cUD1129enUtX75cjRo1koODg9588837nse4ceN07do1vfTSS/L09NTbb7+t2NjYB64hM9SpU0dFixZVzZo1FR8fr9dee01RUVF2rQkAAAAPzmKk56aPJ1hcXJy8vb0VGxsrLy8vm223bt3SiRMnFBoaKldXVztViJyI7z0AyGKivO1dQZrKhgbZu4Q0LRyTsY8QeVzWRnxm7xLS1Gv68/YuIUe7VzYw47ogAAAAADCxa1CaNm2aypUrJy8vL3l5eVnvWUlmGIaioqIUEBAgNzc3RURE6Pfff7djxQAAAAByArsGpUKFCunDDz/Url27tGvXLj3//PNq2rSpNQyNHTtWH3/8saZMmaKdO3fKz89PdevW1dWrV+1ZNgAAAIBszq5BqUmTJmrUqJGKFSumYsWKadSoUfLw8ND27dtlGIYmTZqkoUOHqnnz5ipTpoyio6N148YNzZs3z55lAwAAAMjmssw9SomJiZo/f76uX7+uqlWr6sSJEzp37pzq1atn7ePi4qJatWpp69ataY4THx+vuLg4mwcAAAAAZITdg9K+ffvk4eEhFxcXde/eXUuWLFGpUqV07tw5SVLBggVt+hcsWNC6LTVjxoyRt7e39REYGPhI6wcAAACQ/dg9KBUvXlwxMTHavn27evTooQ4dOmj//v3W7RaLxaa/YRgp2u42ZMgQxcbGWh+nT59+ZLUDAAAAyJ7s/oGzzs7OCgsLkySFh4dr586d+uSTTxQZGSlJOnfunPz9/a39L1y4kGKW6W4uLi5ycXF5tEUDAAAAyNbsPqNkZhiG4uPjFRoaKj8/P61evdq6LSEhQRs2bFC1atXsWCEAAACA7M6uM0rvvPOOGjZsqMDAQF29elXz58/X+vXrtXLlSlksFvXr10+jR49W0aJFVbRoUY0ePVru7u5q06bNI6/ts+5rH/kxkmWFT2g+efKkQkNDtWfPHlWoUOGRHiskJET9+vVTv379Mm3MiIgIVahQQZMmTcq0MdOrY8eOunLlipYuXfrYjw0AAIBHw65B6fz582rXrp3Onj0rb29vlStXTitXrlTdunUlSYMGDdLNmzfVs2dPXb58Wc8884x+/PFHeXp62rNsPKHOnTungQMHavXq1bp69aqKFy+ud955R6+88oq9SwMAAEAWY9egNGPGjHtut1gsioqKUlRU1OMp6AmWkJAgZ2dne5eRpbVr106xsbFatmyZ8ufPr3nz5qlVq1batWuXKlasaO/yAAAAkIVkuXuUkD4RERHq3bu3+vfvr/z586tu3brav3+/GjVqJA8PDxUsWFDt2rXT33//bd0nKSlJH330kcLCwuTi4qKgoCCNGjXKZtzjx4+rdu3acnd3V/ny5bVt2zbrtqioqBSX5U2aNEkhISHW5x07dtTLL7+s8ePHy9/fX/ny5VOvXr10+/btNM9l5syZ8vb2tt6Pdr/zuH79utq3by8PDw/5+/trwoQJ6XrNtm3bpj59+qhKlSoqXLiwhg0bpjx58mj37t2S/r380GKxaOHChXruuefk5uamypUr6/Dhw9q5c6fCw8Pl4eGhBg0a6OLFiynGf++99+Tr6ysvLy9169ZNCQkJ6aoLAAAAWQ9B6QkWHR0tR0dHbdmyRR9++KFq1aqlChUqaNeuXVq5cqXOnz+vli1bWvsPGTJEH330kd59913t379f8+bNS7GC4NChQzVgwADFxMSoWLFiat26te7cuZOhutatW6djx45p3bp1io6O1qxZszRr1qxU+44fP14DBgzQqlWrVLduXZ09e/a+5zFw4ECtW7dOS5Ys0Y8//qj169frl19+uW9dNWrU0IIFC3Tp0iUlJSVp/vz5io+PV0REhE2/ESNGaNiwYdq9e7ccHR3VunVrDRo0SJ988ok2bdqkY8eOafjw4Tb7rFmzRgcOHNC6dev0zTffaMmSJXrvvfcy9LoBAAAg67D78uB4cGFhYRo7dqwkafjw4apUqZJGjx5t3f7VV18pMDBQhw8flr+/vz755BNNmTJFHTp0kCQVKVJENWrUsBlzwIABaty4saR/Z0hKly6to0ePqkSJEumuK2/evJoyZYocHBxUokQJNW7cWGvWrFHXrl1t+g0ZMkTR0dFav369ypYtK0maNm3aPc8jICBAM2bM0OzZs633skVHR6tQoUL3rWvBggVq1aqV8uXLJ0dHR7m7u2vJkiUqUqRIitegfv36kqS+ffuqdevWWrNmjapXry5J6tKlS4rg5+zsrK+++kru7u4qXbq03n//fQ0cOFAffPCBcuXi7xEAAABPGoLSEyw8PNz69S+//KJ169bJw8MjRb9jx47pypUrio+PV506de45Zrly5axfJ39+1YULFzIUlEqXLi0HBwebcfbt22fTZ8KECbp+/bp27dqlwoULp/s8bt68qYSEBFWtWtXa7uPjo+LFi1ufjx492iZo7d+/X0FBQRo2bJguX76sn376Sfnz59fSpUv16quvatOmTdagZn4Nkmfc7t5esGBBXbhwwaa28uXLy93d3fq8atWqunbtmk6fPq3g4OB7vFoAAADIighKT7DcuXNbv05KSlKTJk300Ucfpejn7++v48ePp2tMJycn69cWi8U6tiTlypVLhmHY9E/t3qO7x0geJ3mMZM8995yWL1+uhQsXavDgwek+jyNHjtz3HLp3725zqV5AQICOHTumKVOm6LffflPp0qUl/RtuNm3apM8++0zTp09Ptf7k18DcZj6ftCTvDwAAgCcLQSmbqFSpkhYtWqSQkBA5Oqb8Zy1atKjc3Ny0Zs0avfHGGw90jAIFCujcuXMyDMMaAGJiYh5orCpVqqhPnz6qX7++HBwcNHDgwHSdR1hYmJycnLR9+3YFBQVJki5fvqzDhw+rVq1akv6dYfLx8bHZ78aNG5KU4jI4BweHdIeee9m7d69u3rwpNzc3SdL27dvl4eGRrksCAQAAkPVw80Q20atXL126dEmtW7fWjh07dPz4cf3444/q3LmzEhMT5erqqsjISA0aNEizZ8/WsWPHtH379vsu0X63iIgIXbx4UWPHjtWxY8f02WefacWKFQ9cc9WqVbVixQq9//77mjhxYrrOw8PDQ126dNHAgQO1Zs0a/fbbb+rYseN97wMqUaKEwsLC1K1bN+3YsUPHjh3ThAkTtHr1ar388ssPfA7JEhIS1KVLF+3fv18rVqzQiBEj1Lt3b+5PAgAAeEIxo5SGXtOft3cJGRIQEKAtW7YoMjJS9evXV3x8vIKDg9WgQQPrL+vvvvuuHB0dNXz4cJ05c0b+/v7q3r17uo9RsmRJTZ06VaNHj9YHH3ygFi1aaMCAAfriiy8euO7q1atr+fLlatSokRwcHPTmm2/e9zzGjRuna9eu6aWXXpKnp6fefvttxcbG3vM4Tk5O+uGHHzR48GA1adJE165dU1hYmKKjo9WoUaMHrj9ZnTp1VLRoUdWsWVPx8fF67bXX+PwvAACAJ5jFMN90ks3ExcXJ29tbsbGx8vLystl269YtnThxQqGhoXJ1dbVThciJ+N4DgCwmytveFaSpbGiQvUtI08IxGfsIkcdlbcRn9i4hTU/aH+Ozm3tlAzOuCwIAAAAAE4ISAAAAAJgQlAAAAADAhKAEAAAAACYEJSlTPkcHyAi+5wAAALK2HL08uLOzs3LlyqUzZ86oQIECcnZ2tn6QKvAoGIahhIQEXbx4Ubly5ZKzs7O9SwIAAEAqcnRQypUrl0JDQ3X27FmdOXPG3uUgB3F3d1dQUBAfSAsAAJBF5eigJP07qxQUFKQ7d+4oMTHR3uUgB3BwcJCjoyOzlwAAAFlYjg9KkmSxWOTk5CQnJyd7lwIAAAAgC+C6HwAAAAAwISgBAAAAgAlBCQAAAABMCEoAAAAAYEJQAgAAAAATghIAAAAAmLA8OAAAeCxCBi+3dwlpOulq7woAZDXMKAEAAACACUEJAAAAAEwISgAAAABgQlACAAAAABOCEgAAAACYEJQAAAAAwISgBAAAAAAmBCUAAAAAMCEoAQAAAIAJQQkAAAAATAhKAAAAAGBCUAIAAAAAE4ISAAAAAJgQlAAAAADAhKAEAAAAACYEJQAAAAAwISgBAAAAgAlBCQAAAABMCEoAAAAAYEJQAgAAAAATghIAAAAAmBCUAAAAAMDErkFpzJgxqly5sjw9PeXr66uXX35Zhw4dsunTsWNHWSwWm8ezzz5rp4oBAAAA5AR2DUobNmxQr169tH37dq1evVp37txRvXr1dP36dZt+DRo00NmzZ62PH374wU4VAwAAAMgJHO158JUrV9o8nzlzpnx9ffXLL7+oZs2a1nYXFxf5+fk97vIAAAAA5FBZ6h6l2NhYSZKPj49N+/r16+Xr66tixYqpa9euunDhQppjxMfHKy4uzuYBAAAAABmRZYKSYRjq37+/atSooTJlyljbGzZsqLlz52rt2rWaMGGCdu7cqeeff17x8fGpjjNmzBh5e3tbH4GBgY/rFAAAAABkE3a99O5uvXv31q+//qrNmzfbtLdq1cr6dZkyZRQeHq7g4GAtX75czZs3TzHOkCFD1L9/f+vzuLg4whIAAACADMkSQalPnz5atmyZNm7cqEKFCt2zr7+/v4KDg3XkyJFUt7u4uMjFxeVRlAkAAAAgh7BrUDIMQ3369NGSJUu0fv16hYaG3neff/75R6dPn5a/v/9jqBAAAABATmTXe5R69eqlOXPmaN68efL09NS5c+d07tw53bx5U5J07do1DRgwQNu2bdPJkye1fv16NWnSRPnz51ezZs3sWToAAACAbMyuM0rTpk2TJEVERNi0z5w5Ux07dpSDg4P27dun2bNn68qVK/L391ft2rW1YMECeXp62qFiAAAAADmB3S+9uxc3NzetWrXqMVUDAAAAAP/KMsuDAwAAAEBWQVACAAAAABOCEgAAAACYEJQAAAAAwISgBAAAAAAmBCUAAAAAMCEoAQAAAIAJQQkAAAAATAhKAAAAAGBCUAIAAAAAE4ISAAAAAJgQlAAAAADAhKAEAAAAACYEJQAAAAAwISgBAAAAgAlBCQAAAABMHigoHTt2TMOGDVPr1q114cIFSdLKlSv1+++/Z2pxAAAAAGAPGQ5KGzZsUNmyZfXzzz9r8eLFunbtmiTp119/1YgRIzK9QAAAAAB43DIclAYPHqyRI0dq9erVcnZ2trbXrl1b27Zty9TiAAAAAMAeMhyU9u3bp2bNmqVoL1CggP75559MKQoAAAAA7CnDQSlPnjw6e/ZsivY9e/boqaeeypSiAAAAAMCeMhyU2rRpo8jISJ07d04Wi0VJSUnasmWLBgwYoPbt2z+KGgEAAADgscpwUBo1apSCgoL01FNP6dq1aypVqpRq1qypatWqadiwYY+iRgAAAAB4rBwzuoOTk5Pmzp2r999/X3v27FFSUpIqVqyookWLPor6AAAAAOCxy3BQSlakSBEVKVIkM2sBAAAAgCwhw0HJMAx9++23WrdunS5cuKCkpCSb7YsXL8604gAAAADAHjIclPr27asvvvhCtWvXVsGCBWWxWB5FXQAAAABgNxkOSnPmzNHixYvVqFGjR1EPAAAAANhdhle98/b2VuHChR9FLQAAAACQJWQ4KEVFRem9997TzZs3H0U9AAAAAGB3Gb707tVXX9U333wjX19fhYSEyMnJyWb77t27M604AAAAALCHDAeljh076pdfflHbtm1ZzAEAAABAtpThoLR8+XKtWrVKNWrUeBT1AAAAAIDdZfgepcDAQHl5eT2KWgAAAAAgS8hwUJowYYIGDRqkkydPPoJyAAAAAMD+MnzpXdu2bXXjxg0VKVJE7u7uKRZzuHTpUqYVBwAAAAD2kOGgNGnSpEdQBgAAAABkHRkOSh06dHgUdQAAAABAlpGuoBQXF2ddwCEuLu6efVnoAQAAAMCTLl1BKW/evDp79qx8fX2VJ0+eVD87yTAMWSwWJSYmZnqRAAAAAPA4pSsorV27Vj4+PpKkdevWPdKCAAAAAMDe0hWUatWqpcKFC2vnzp2qVavWo64JAAAAAOwq3Z+jdPLkSS6rAwAAAJAjZPgDZwEAAAAgu8vQ8uD79+/XuXPn7tmnXLlyD1UQAAAAANhbhoJSnTp1ZBhGinaLxcKqdwAAAACyjQwFpZ9//lkFChR4VLUAAAAAQJaQoXuUgoKCFBwcfM9HRowZM0aVK1eWp6enfH199fLLL+vQoUM2fQzDUFRUlAICAuTm5qaIiAj9/vvvGToOAAAAAGSEXRdz2LBhg3r16qXt27dr9erVunPnjurVq6fr169b+4wdO1Yff/yxpkyZop07d8rPz09169bV1atX7Vg5AAAAgOws3Zfe1apVS87Ozpl68JUrV9o8nzlzpnx9ffXLL7+oZs2aMgxDkyZN0tChQ9W8eXNJUnR0tAoWLKh58+apW7dumVoPAAAAAEgZmFFat26d8uTJ8whLkWJjYyVJPj4+kqQTJ07o3LlzqlevnrWPi4uLatWqpa1bt6Y6Rnx8vOLi4mweAAAAAJARWeZzlAzDUP/+/VWjRg2VKVNGkqxLkRcsWNCmb8GCBdNcpnzMmDHy9va2PgIDAx9t4QAAAACynSwTlHr37q1ff/1V33zzTYptFovF5nnyUuSpGTJkiGJjY62P06dPP5J6AQAAAGRfGVoe/FHp06ePli1bpo0bN6pQoULWdj8/P0n/ziz5+/tb2y9cuJBilimZi4uLXFxcHm3BAAAAALI1u84oGYah3r17a/HixVq7dq1CQ0NttoeGhsrPz0+rV6+2tiUkJGjDhg2qVq3a4y4XAAAAQA6R4RmlxMREzZo1S2vWrNGFCxeUlJRks33t2rXpHqtXr16aN2+evvvuO3l6elrvO/L29pabm5ssFov69eun0aNHq2jRoipatKhGjx4td3d3tWnTJqOlAwAAAEC6ZDgo9e3bV7NmzVLjxo1VpkyZNO8VSo9p06ZJkiIiImzaZ86cqY4dO0qSBg0apJs3b6pnz566fPmynnnmGf3444/y9PR84OMCAAAAwL1kOCjNnz9fCxcuVKNGjR764IZh3LePxWJRVFSUoqKiHvp4AAAAAJAeGb5HydnZWWFhYY+iFgAAAADIEjIclN5++2198skn6ZoNAgAAAIAnUYYvvdu8ebPWrVunFStWqHTp0nJycrLZvnjx4kwrDgAAAADsIcNBKU+ePGrWrNmjqAUAAAAAsoQMB6WZM2c+ijoAAAAAIMuw6wfOAgAAAEBWlOEZJUn69ttvtXDhQp06dUoJCQk223bv3p0phQEAAACAvWR4Rmny5Mnq1KmTfH19tWfPHlWpUkX58uXT8ePH1bBhw0dRIwAAAAA8VhkOSlOnTtUXX3yhKVOmyNnZWYMGDdLq1av15ptvKjY29lHUCAAAAACPVYaD0qlTp1StWjVJkpubm65evSpJateunb755pvMrQ4AAAAA7CDDQcnPz0///POPJCk4OFjbt2+XJJ04cYIPoQUAAACQLWQ4KD3//PP6v//7P0lSly5d9NZbb6lu3bpq1aoVn68EAAAAIFvI8Kp3X3zxhZKSkiRJ3bt3l4+PjzZv3qwmTZqoe/fumV4gAAAAADxuGQ5KuXLlUq5c/38iqmXLlmrZsmWmFgUAAAAA9vRAHzi7adMmtW3bVlWrVtVff/0lSfr666+1efPmTC0OAAAAAOwhw0Fp0aJFql+/vtzc3LRnzx7Fx8dLkq5evarRo0dneoEAAAAA8LhlOCiNHDlS06dP15dffiknJydre7Vq1bR79+5MLQ4AAAAA7CHDQenQoUOqWbNminYvLy9duXIlM2oCAAAAALvKcFDy9/fX0aNHU7Rv3rxZhQsXzpSiAAAAAMCeMhyUunXrpr59++rnn3+WxWLRmTNnNHfuXA0YMEA9e/Z8FDUCAAAAwGOV4eXBBw0apNjYWNWuXVu3bt1SzZo15eLiogEDBqh3796PokYAAAAAeKwyHJQkadSoURo6dKj279+vpKQklSpVSh4eHpldGwAAAADYxQMFJUlyd3dXeHh4ZtYCAAAAAFlCuoNS586d09Xvq6++euBiAAAAACArSHdQmjVrloKDg1WxYkUZhvEoawIAAAAAu0p3UOrevbvmz5+v48ePq3Pnzmrbtq18fHweZW0AAAAAYBfpXh586tSpOnv2rCIjI/V///d/CgwMVMuWLbVq1SpmmAAAAABkKxn6HCUXFxe1bt1aq1ev1v79+1W6dGn17NlTwcHBunbt2qOqEQAAAAAeqwx/4Gwyi8Uii8UiwzCUlJSUmTUBAAAAgF1lKCjFx8frm2++Ud26dVW8eHHt27dPU6ZM0alTp/gcJQAAAADZRroXc+jZs6fmz5+voKAgderUSfPnz1e+fPkeZW0AAAAAYBfpDkrTp09XUFCQQkNDtWHDBm3YsCHVfosXL8604gAAAADAHtIdlNq3by+LxfIoawEAAACALCFDHzgLAAAAADnBA696BwAAAADZFUEJAAAAAEwISgAAAABgQlACAAAAABOCEgAAAACYEJQAAAAAwISgBAAAAAAmBCUAAAAAMCEoAQAAAIAJQQkAAAAATAhKAAAAAGBCUAIAAAAAE4ISAAAAAJjYNSht3LhRTZo0UUBAgCwWi5YuXWqzvWPHjrJYLDaPZ5991j7FAgAAAMgx7BqUrl+/rvLly2vKlClp9mnQoIHOnj1rffzwww+PsUIAAAAAOZGjPQ/esGFDNWzY8J59XFxc5Ofn95gqAgAAAAA7B6X0WL9+vXx9fZUnTx7VqlVLo0aNkq+vb5r94+PjFR8fb30eFxf3OMrEfYQMXm7vEtJ08sPG9i4BAAAAWUyWXsyhYcOGmjt3rtauXasJEyZo586dev75522CkNmYMWPk7e1tfQQGBj7GigEAAABkB1l6RqlVq1bWr8uUKaPw8HAFBwdr+fLlat68ear7DBkyRP3797c+j4uLIywBAAAAyJAsHZTM/P39FRwcrCNHjqTZx8XFRS4uLo+xKgAAAADZTZa+9M7sn3/+0enTp+Xv72/vUgAAAABkY3adUbp27ZqOHj1qfX7ixAnFxMTIx8dHPj4+ioqKUosWLeTv76+TJ0/qnXfeUf78+dWsWTM7Vg0AAAAgu7NrUNq1a5dq165tfZ58b1GHDh00bdo07du3T7Nnz9aVK1fk7++v2rVra8GCBfL09LRXyQAAAAByALsGpYiICBmGkeb2VatWPcZqAAAAAOBfT9RiDgCyhgMlStq7hDSVPHjA3iUAAIBs4IlazAEAAAAAHgeCEgAAAACYcOkdgGzls+5r7V1CmnpNf97eJQAAgHRiRgkAAAAATAhKAAAAAGBCUAIAAAAAE4ISAAAAAJgQlAAAAADAhKAEAAAAACYEJQAAAAAwISgBAAAAgAlBCQAAAABMCEoAAAAAYEJQAgAAAAATghIAAAAAmBCUAAAAAMCEoAQAAAAAJgQlAAAAADAhKAEAAACACUEJAAAAAEwISgAAAABgQlACAAAAABOCEgAAAACYEJQAAAAAwISgBAAAAAAmBCUAAAAAMCEoAQAAAIAJQQkAAAAATBztXQBgd1He9q4gdVGx9q4AAAAgx2JGCQAAAABMCEoAAAAAYEJQAgAAAAATghIAAAAAmBCUAAAAAMCEoAQAAAAAJgQlAAAAADAhKAEAAACACUEJAAAAAEwISgAAAABgQlACAAAAABOCEgAAAACYEJQAAAAAwISgBAAAAAAmBCUAAAAAMCEoAQAAAIAJQQkAAAAATOwalDZu3KgmTZooICBAFotFS5cutdluGIaioqIUEBAgNzc3RURE6Pfff7dPsQAAAAByDLsGpevXr6t8+fKaMmVKqtvHjh2rjz/+WFOmTNHOnTvl5+enunXr6urVq4+5UgAAAAA5iaM9D96wYUM1bNgw1W2GYWjSpEkaOnSomjdvLkmKjo5WwYIFNW/ePHXr1u1xlgoAAAAgB8my9yidOHFC586dU7169axtLi4uqlWrlrZu3ZrmfvHx8YqLi7N5AAAAAEBGZNmgdO7cOUlSwYIFbdoLFixo3ZaaMWPGyNvb2/oIDAx8pHUCAAAAyH6ybFBKZrFYbJ4bhpGi7W5DhgxRbGys9XH69OlHXSIAAACAbMau9yjdi5+fn6R/Z5b8/f2t7RcuXEgxy3Q3FxcXubi4PPL6AAAAAGRfWXZGKTQ0VH5+flq9erW1LSEhQRs2bFC1atXsWBkAAACA7M6uM0rXrl3T0aNHrc9PnDihmJgY+fj4KCgoSP369dPo0aNVtGhRFS1aVKNHj5a7u7vatGljx6oBAAAAZHd2DUq7du1S7dq1rc/79+8vSerQoYNmzZqlQYMG6ebNm+rZs6cuX76sZ555Rj/++KM8PT3tVTIAAACAHMCuQSkiIkKGYaS53WKxKCoqSlFRUY+vKAAAAAA5Xpa9RwkAAAAA7IWgBAAAAAAmBCUAAAAAMCEoAQAAAIAJQQkAAAAATAhKAAAAAGBCUAIAAAAAE4ISAAAAAJgQlAAAAADAhKAEAAAAACYEJQAAAAAwISgBAAAAgImjvQsAkLqy0WXtXUKaFtq7AAAAgEeMGSUAAAAAMCEoAQAAAIAJQQkAAAAATAhKAAAAAGBCUAIAAAAAE4ISAAAAAJgQlAAAAADAhKAEAAAAACYEJQAAAAAwISgBAAAAgAlBCQAAAABMCEoAAAAAYEJQAgAAAAATghIAAAAAmBCUAAAAAMCEoAQAAAAAJgQlAAAAADAhKAEAAACACUEJAAAAAEwISgAAAABgQlACAAAAABOCEgAAAACYEJQAAAAAwISgBAAAAAAmBCUAAAAAMCEoAQAAAIAJQQkAAAAATAhKAAAAAGBCUAIAAAAAE4ISAAAAAJgQlAAAAADAhKAEAAAAACYEJQAAAAAwISgBAAAAgEmWDkpRUVGyWCw2Dz8/P3uXBQAAACCbc7R3AfdTunRp/fTTT9bnDg4OdqwGAAAAQE6Q5YOSo6Mjs0gAAAAAHqssfemdJB05ckQBAQEKDQ3Va6+9puPHj9+zf3x8vOLi4mweAAAAAJARWTooPfPMM5o9e7ZWrVqlL7/8UufOnVO1atX0zz//pLnPmDFj5O3tbX0EBgY+xooBAAAAZAdZOig1bNhQLVq0UNmyZfXCCy9o+fLlkqTo6Og09xkyZIhiY2Otj9OnTz+ucgEAAABkE1n+HqW75c6dW2XLltWRI0fS7OPi4iIXF5fHWBUAAACA7CZLzyiZxcfH68CBA/L397d3KQAAAACysSwdlAYMGKANGzboxIkT+vnnn/XKK68oLi5OHTp0sHdpAAAAALKxLH3p3Z9//qnWrVvr77//VoECBfTss89q+/btCg4OtndpAAAAALKxLB2U5s+fb+8SAAAAAORAWfrSOwAAAACwB4ISAAAAAJgQlAAAAADAhKAEAAAAACYEJQAAAAAwISgBAAAAgAlBCQAAAABMCEoAAAAAYEJQAgAAAAATghIAAAAAmBCUAAAAAMCEoAQAAAAAJgQlAAAAADAhKAEAAACACUEJAAAAAEwISgAAAABgQlACAAAAABOCEgAAAACYEJQAAAAAwISgBAAAAAAmBCUAAAAAMCEoAQAAAIAJQQkAAAAATAhKAAAAAGBCUAIAAAAAE4ISAAAAAJgQlAAAAADAhKAEAAAAACYEJQAAAAAwISgBAAAAgAlBCQAAAABMCEoAAAAAYEJQAgAAAAATghIAAAAAmBCUAAAAAMCEoAQAAAAAJgQlAAAAADAhKAEAAACACUEJAAAAAEwISgAAAABgQlACAAAAABOCEgAAAACYEJQAAAAAwISgBAAAAAAmBCUAAAAAMCEoAQAAAIAJQQkAAAAATJ6IoDR16lSFhobK1dVVTz/9tDZt2mTvkgAAAABkY1k+KC1YsED9+vXT0KFDtWfPHj333HNq2LChTp06Ze/SAAAAAGRTWT4offzxx+rSpYveeOMNlSxZUpMmTVJgYKCmTZtm79IAAAAAZFOO9i7gXhISEvTLL79o8ODBNu316tXT1q1bU90nPj5e8fHx1uexsbGSpLi4uEdXKO4rKf6GvUtIU5zFsHcJqUq8mWjvEtJ0LTHr1nYz4bq9S0gT/w8hp+O94MHwfpBxvBcgLcmvv2Hc/2c+Swelv//+W4mJiSpYsKBNe8GCBXXu3LlU9xkzZozee++9FO2BgYGPpEY8+bztXUCaDti7gDRVsXcB93L0JXtXkKaBM+1dAYC0ZN33Aon3gwfAewHu4+rVq/L2vvdPfpYOSsksFovNc8MwUrQlGzJkiPr37299npSUpEuXLilfvnxp7gNkd3FxcQoMDNTp06fl5eVl73IAAHbAewHwb464evWqAgIC7ts3Swel/Pnzy8HBIcXs0YULF1LMMiVzcXGRi4uLTVuePHkeVYnAE8XLy4s3RwDI4XgvQE53v5mkZFl6MQdnZ2c9/fTTWr16tU376tWrVa1aNTtVBQAAACC7y9IzSpLUv39/tWvXTuHh4apataq++OILnTp1St27d7d3aQAAAACyqSwflFq1aqV//vlH77//vs6ePasyZcrohx9+UHBwsL1LA54YLi4uGjFiRIrLUgEAOQfvBUDGWIz0rI0HAAAAADlIlr5HCQAAAADsgaAEAAAAACYEJQAAAAAwISgBGfDPP//I19dXJ0+elCStX79eFotFV65csWtd2c2UKVP00ktZ91PVAYD3g8eD9wPYE0EJyIAxY8aoSZMmCgkJSfc+yW+e5sfBgwcfXaGZaOPGjWrSpIkCAgJksVi0dOnSVPsdPXpUnTt3VlBQkFxcXPTUU0+pTp06mjt3ru7cuWPtd/dr4OjoqKCgIPXv31/x8fHWPl27dtXOnTu1efPmR316APBAcuL7QUhISKr19+rVy6Yf7wfILrL88uBAVnHz5k3NmDFDP/zwwwPtf+jQIZtPQi9QoEBmlZbC7du35eTklCljXb9+XeXLl1enTp3UokWLVPvs2LFDL7zwgkqXLq3PPvtMJUqU0LVr17R//35Nnz5dZcqUUfny5a39Z86cqQYNGuj27dvau3evOnXqpNy5c+uDDz6Q9O8Stm3atNGnn36qGjVqZMp5AEBmyanvBzt37lRiYqL1+W+//aa6devq1VdftbbxfoBsxQCQLosWLTLy589v07Zu3TpDkvHTTz8ZTz/9tOHm5mZUrVrVOHjwYIo+ly9fvuf4M2bMMEqVKmU4Ozsbfn5+Rq9evazb/vjjD+Oll14ycufObXh6ehqvvvqqce7cOev2ESNGGOXLlzdmzJhhhIaGGhaLxUhKSrrvfhklyViyZIlNW1JSklGyZEnj6aefNhITE1PdLykp6Z5jdO7c2WjUqJFN2/r16w1nZ2fjxo0bD1wvADwKvB/8q2/fvkaRIkWs/8fzfoDshkvvgHTauHGjwsPDU902dOhQTZgwQbt27ZKjo6M6d+6cok/FihXl7++vOnXqaN26dTbbpk2bpl69euk///mP9u3bp2XLliksLEySZBiGXn75ZV26dEkbNmzQ6tWrdezYMbVq1cpmjKNHj2rhwoVatGiRYmJiJCld+z2smJgYHThwQAMGDFCuXKn/l2KxWNLc//Dhw1q3bp2eeeYZm/bw8HDdvn1bO3bsyNR6AeBh8X4gJSQkaM6cOercubP1/3jeD5Dt2DupAU+Kpk2bGp07d7Zpu/sviMmWL19uSDJu3rxpGIZhHDx40Pjiiy+MX375xdi6davRo0cPw2KxGBs2bLDuExAQYAwdOjTV4/7444+Gg4ODcerUKWvb77//bkgyduzYYRjGv39BdHJyMi5cuJCh/TJKqfz1b/78+YYkY/fu3da28+fPG7lz57Y+PvvsM5sxXF1djdy5cxsuLi6GJOPFF180EhISUhwvb968xqxZsx6oVgB4VHg/MIwFCxYYDg4Oxl9//WVt4/0A2Q0zSkA63bx5U66urqluK1eunPVrf39/SdKFCxckScWLF1fXrl1VqVIlVa1aVVOnTlXjxo01fvx4a78zZ86oTp06qY594MABBQYGKjAw0NpWqlQp5cmTRwcOHLC2BQcH21znnt79MsvdfyXMly+fYmJiFBMTozx58ighIcGm78SJExUTE6O9e/fq+++/1+HDh9WuXbsUY7q5uenGjRuZXisAPAzeD6QZM2aoYcOGCggISLGN9wNkFyzmAKRT/vz5dfny5VS33X2jbPIbRFJSUppjPfvss5ozZ46kf//zvxfDMFK9VMHcnjt37gfa72EVLVpUknTw4EFVqFBBkuTg4GC9VMTRMeV/M35+ftbtxYsX19WrV9W6dWuNHDnS2i5Jly5deqQ3OQPAg8jp7wd//PGHfvrpJy1evNimnfcDZDfMKAHpVLFiRe3fvz9TxtqzZ4/1L42enp4KCQnRmjVrUu1bqlQpnTp1SqdPn7a27d+/X7GxsSpZsmSax3jQ/TKqYsWKKlGihMaPH3/PXwbuxcHBQdK/f6VNduzYMd26dUsVK1bMlDoBILPk9PeDmTNnytfXV40bN7Zp5/0A2Q0zSkA61a9fX0OGDNHly5eVN2/edO83adIkhYSEqHTp0tabXxctWqRFixZZ+0RFRal79+7y9fVVw4YNdfXqVW3ZskV9+vTRCy+8oHLlyun111/XpEmTdOfOHfXs2VO1atVK82ZiSQ+8n9m1a9d09OhR6/MTJ04oJiZGPj4+CgoKksVi0cyZM1W3bl1Vr15dQ4YMUcmSJXX79m1t3LhRFy9etL7xJbty5YrOnTunpKQkHTlyRO+//76KFStm84a9adMmFS5cWEWKFEl3rQDwOOTU9wPp39mxmTNnqkOHDilmiHg/QLZjv9ujgCfPs88+a0yfPt36PLWlXvfs2WNIMk6cOGEYhmF89NFHRpEiRQxXV1cjb968Ro0aNYzly5enGHv69OlG8eLFDScnJ8Pf39/o06ePdVt6l4M1u99+J06cMCQZ69atS/Ock8/R/OjQoYNNv0OHDhkdOnQwChUqZDg6Ohre3t5GzZo1jc8//9y4ffu2td/dY1gsFsPf399o1aqVcezYMZvx6tWrZ4wZMybNugDAnnLi+4FhGMaqVasMScahQ4fS7MP7AbILi2EYxuMOZ8CT6ocfftCAAQP022+/pbn06ZNk/fr1atasmY4fP56hv4o+ar/99pvq1Kmjw4cPy9vb297lAEAKvB88HrwfwJ649A7IgEaNGunIkSP666+/bFYPelKtXLlS77zzTpZ6U5SkM2fOaPbs2bwpAsiyeD94PHg/gD0xowQAAAAAJk/+XDEAAAAAZDKCEgAAAACYEJQAAAAAwISgBAAAAAAmBCUAAAAAMCEoAQAAAIAJQQkAAJOoqChVqFDB3mUAAOyIoAQAeOJYLJZ7Pjp27GjvEgEATzhHexcAAEBGnT171vr1ggULNHz4cB06dMja5ubmZo+yAADZCDNKAIAnjp+fn/Xh7e0ti8Vi0zZv3jwVKVJEzs7OKl68uL7++mub/U+dOqWmTZvKw8NDXl5eatmypc6fP2+nswEAZEUEJQBAtrJkyRL17dtXb7/9tn777Td169ZNnTp10rp16yRJhmHo5Zdf1qVLl7RhwwatXr1ax44dU6tWrexcOQAgK+HSOwBAtjJ+/Hh17NhRPXv2lCT1799f27dv1/jx41W7dm399NNP+vXXX3XixAkFBgZKkr7++muVLl1aO3fuVOXKle1ZPgAgi2BGCQCQrRw4cEDVq1e3aatevboOHDhg3R4YGGgNSZJUqlQp5cmTx9oHAACCEgAg27FYLDbPDcOwtt39dVp9AAAgKAEAspWSJUtq8+bNNm1bt25VyZIlJf07e3Tq1CmdPn3aun3//v2KjY219gEAgHuUAADZysCBA9WyZUtVqlRJderU0f/93/9p8eLF+umnnyRJL7zwgsqVK6fXX39dkyZN0p07d9SzZ0/VqlVL4eHhdq4eAJBVMKMEAMhWXn75ZX3yyScaN26cSpcurc8//1wzZ85URESEpH8vy1u6dKny5s2rmjVr6oUXXlDhwoW1YMEC+xYOAMhSLIZhGPYuAgAAAACyEmaUAAAAAMCEoAQAAAAAJgQlAAAAADAhKAEAAACACUEJAAAAAEwISgAAAABgQlACAAAAABOCEgAAAACYEJQAAAAAwISgBAAAAAAmBCUAAAAAMPl/kr/L9FL8SdQAAAAASUVORK5CYII=", + "image/png": "", "text/plain": [ "
" ] @@ -253,7 +240,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "id": "ff56958f-8c1d-4fd7-b885-6efb81af8da7", "metadata": { "tags": [] @@ -263,20 +250,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Processing format: original, link: s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20230618223036_13681901_006_01.h5\n", - "Processing format: original-kerchunk, link: s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20230618223036_13681901_006_01.json\n", - "Processing format: page-only-4mb, link: s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20230618223036_13681901_006_01-page-only-4mb.h5\n", - "Processing format: page-only-8mb, link: s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20230618223036_13681901_006_01-page-only-4mb.h5\n", - "Processing format: rechunked-4mb, link: s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20230618223036_13681901_006_01_rechunked-100k-page-4mb.h5\n", - "Processing format: rechunked-8mb, link: s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20230618223036_13681901_006_01_rechunked-100k-page-8mb.h5\n", - "Processing format: rechunked-8mb-kerchunk, link: s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20230618223036_13681901_006_01_rechunked-100k-page-8mb.json\n", - "Processing format: original, link: s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20181120182818_08110112_006_02.h5\n", - "Processing format: original-kerchunk, link: s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20181120182818_08110112_006_02.json\n", - "Processing format: page-only-4mb, link: s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20181120182818_08110112_006_02-page-only-4mb.h5\n", - "Processing format: page-only-8mb, link: s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20181120182818_08110112_006_02-page-only-8mb.h5\n", - "Processing format: rechunked-4mb, link: s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20181120182818_08110112_006_02_rechunked-100k-page-4mb.h5\n", - "Processing format: rechunked-8mb, link: s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20181120182818_08110112_006_02_rechunked-100k-page-8mb.h5\n", - "Processing format: rechunked-8mb-kerchunk, link: s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20181120182818_08110112_006_02_rechunked-100k-page-8mb.json\n" + "Processing format: original, link: s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20230618223036_13681901_006_01.h5\n" ] } ], @@ -308,24 +282,21 @@ " )\n", " return ds[variable]\n", "\n", + "\n", "for key, dataset in test_dict.items():\n", " for k, link in dataset[\"links\"].items():\n", + " # log_filename = f\"logs/fsspec-xarray-{key}-{k}-default.log\" \n", + " # # Create a new FileHandler for each iteration\n", + " # file_handler = logging.FileHandler(log_filename)\n", + " # file_handler.setLevel(logging.DEBUG)\n", "\n", - " log_filename = f\"logs/fsspec-xarray-{key}-{k}-default.log\"\n", - "\n", - " \n", - " # Create a new FileHandler for each iteration\n", - " file_handler = logging.FileHandler(log_filename)\n", - " file_handler.setLevel(logging.DEBUG)\n", - "\n", - " # Add the handler to the root logger\n", - " logging.getLogger().addHandler(file_handler)\n", + " # # Add the handler to the root logger\n", + " # logging.getLogger().addHandler(file_handler)\n", " print (f\"Processing format: {k}, link: {link}\")\n", " start = time.time()\n", " if \"kerchunk\" in k or link.endswith(\".json\"):\n", " ds = kerchunk_result(link, dataset[\"group\"], dataset[\"variable\"])\n", " data_mean = ds.mean()\n", - " \n", " elapsed = time.time() - start\n", " kerchunk_benchmarks.append(\n", " {\"tool\": \"kerchunk\",\n", @@ -354,8 +325,8 @@ " \"bytes_requested\": fo.cache.total_requested_bytes,\n", " \"shape\": ds[dataset[\"variable\"]].values.shape,\n", " \"mean\": data_mean})\n", - " logging.getLogger().removeHandler(file_handler)\n", - " file_handler.close()" + " # logging.getLogger().removeHandler(file_handler)\n", + " # file_handler.close()" ] }, { @@ -370,15 +341,24 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 59, "id": "a7aacf16-8276-4a50-b5af-3103056d73f4", "metadata": {}, "outputs": [ { "data": { - "image/png": "", "text/plain": [ - "
" + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" ] }, "metadata": {}, @@ -386,24 +366,34 @@ } ], "source": [ - "df = pd.DataFrame.from_dict(kerchunk_benchmarks + regular_xarray_benchmarks)\n", + "df = pd.DataFrame.from_dict(kerchunk_benchmarks + regular_xarray_benchmarks + regular_h5py_benchmarks)\n", "\n", - "pivot_df = df.pivot_table(index=['tool','dataset'], columns=['format'], values='time', aggfunc='mean')\n", + "plt.figure(figsize=(20, 5)) \n", + "plt.style.use('ggplot')\n", + "# plt.style.use('grayscale')\n", "\n", - "# Plotting\n", - "pivot_df.plot(kind='bar', figsize=(15, 5))\n", + "pivot_df = df.pivot_table(index=['tool',], columns=['format', ], values='time', aggfunc='mean')\n", + "baseline_original = pivot_df['original'].max()\n", + "kerchunk_original = pivot_df['original-kerchunk'].max()\n", "\n", - "plt.title(\"Out of the box I/O parameters\", fontsize=10)\n", - "plt.suptitle('Cloud-optimized HDF5 access performance (less is better)', fontsize=14)\n", + "# Plotting\n", + "pivot_df.plot(kind='barh', figsize=(20, 8), fontsize=14, width=0.8)\n", "\n", - "plt.xlabel('Tool')\n", - "plt.ylabel('Time in seconds')\n", - "# plt.xticks(rotation=90)\n", - "plt.legend(title='Format')\n", - "# plt.grid(True)\n", + "plt.suptitle('Cloud-optimized HDF5 performance (less is better)', fontsize=18)\n", + "plt.title(\"Out of the box I/O parameters\", fontsize=14)\n", + "plt.xlabel('Mean Time (S)')\n", + "plt.ylabel('Library', fontsize=16)\n", "plt.xticks(rotation=0)\n", - "# plt.grid(True)\n", - "# plt.grid(axis='y', which='major', linestyle='-')\n", + "plt.legend(title='Format', fontsize=14)\n", + "plt.grid(False)\n", + "\n", + "\n", + "plt.axvline(x=baseline_original, color='red', linestyle='--', linewidth=2, label=f\"Baseline: {baseline_original:.2f}\")\n", + "\n", + "\n", + "plt.tight_layout()\n", + "plt.savefig(\"stats.png\", transparent=True, dpi=150)\n", + "\n", "plt.show()" ] }, @@ -420,7 +410,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 57, "id": "98c29558-de50-44af-87e9-074092fcd0ac", "metadata": { "tags": [] @@ -430,24 +420,11 @@ "name": "stdout", "output_type": "stream", "text": [ - "Processing: s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20230618223036_13681901_006_01.h5\n", - "Processing: s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20230618223036_13681901_006_01.json\n", - "Unable to synchronously open file (file signature not found)\n", - "Processing: s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20230618223036_13681901_006_01-page-only-4mb.h5\n", - "Processing: s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20230618223036_13681901_006_01-page-only-4mb.h5\n", - "Processing: s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20230618223036_13681901_006_01_rechunked-100k-page-4mb.h5\n", - "Processing: s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20230618223036_13681901_006_01_rechunked-100k-page-8mb.h5\n", - "Processing: s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20230618223036_13681901_006_01_rechunked-100k-page-8mb.json\n", - "Unable to synchronously open file (file signature not found)\n", "Processing: s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20181120182818_08110112_006_02.h5\n", - "Processing: s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20181120182818_08110112_006_02.json\n", - "Unable to synchronously open file (file signature not found)\n", "Processing: s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20181120182818_08110112_006_02-page-only-4mb.h5\n", "Processing: s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20181120182818_08110112_006_02-page-only-8mb.h5\n", "Processing: s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20181120182818_08110112_006_02_rechunked-100k-page-4mb.h5\n", - "Processing: s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20181120182818_08110112_006_02_rechunked-100k-page-8mb.h5\n", - "Processing: s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20181120182818_08110112_006_02_rechunked-100k-page-8mb.json\n", - "Unable to synchronously open file (file signature not found)\n" + "Processing: s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20181120182818_08110112_006_02_rechunked-100k-page-8mb.h5\n" ] } ], @@ -460,17 +437,8 @@ " if \"kerchunk\" in k or link.endswith(\".json\"):\n", " continue \n", " print (f\"Processing: {link}\")\n", - " log_filename = f\"logs/fsspec-h5py-{key}-{k}_default.log\"\n", - " \n", - " # Create a new FileHandler for each iteration\n", - " file_handler = logging.FileHandler(log_filename)\n", - " file_handler.setLevel(logging.DEBUG)\n", - "\n", - " # Add the handler to the root logger\n", - " logging.getLogger().addHandler(file_handler)\n", - " # this is mostly IO so no perf_counter is needed\n", " start = time.time()\n", - " fo = fs.open(link, mode=\"rb\")\n", + " fo = fs.open(link, cache_type=None, mode=\"rb\")\n", " with h5py.File(fo) as f:\n", " path = f\"{dataset['group']}/{dataset['variable']}\"\n", " data = f[path][:]\n", @@ -486,10 +454,6 @@ " \"shape\": data.shape,\n", " \"bytes_requested\": fo.cache.total_requested_bytes,\n", " \"mean\": data_mean})\n", - "\n", - " logging.getLogger().removeHandler(file_handler) \n", - " file_handler.close()\n", - " \n", " except Exception as e:\n", " print(e)" ] @@ -507,7 +471,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 56, "id": "d8fa6dca-f408-4298-beca-f2839d4c3b67", "metadata": { "tags": [] @@ -515,7 +479,7 @@ "outputs": [ { "data": { - "image/png": "", + "image/png": "", "text/plain": [ "
" ] @@ -1031,7 +995,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.9" + "version": "3.11.10" } }, "nbformat": 4, diff --git a/notebooks/benchmarks.csv b/notebooks/benchmarks.csv new file mode 100644 index 0000000..e4c48ed --- /dev/null +++ b/notebooks/benchmarks.csv @@ -0,0 +1,44 @@ +,tool,dataset,cloud-aware,format,file,time,shape,bytes_requested,mean +0,h5py,7GB,no,original,s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20181120182818_08110112_006_02.h5,25.779539585113525,"(46484912,)",289026695.0,1035.1631 +1,h5py,7GB,no,page-only-4mb,s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20181120182818_08110112_006_02-page-only-4mb.h5,68.79631090164185,"(46484912,)",1036723526.0,1035.1631 +2,h5py,7GB,no,page-only-8mb,s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20181120182818_08110112_006_02-page-only-8mb.h5,62.35629677772522,"(46484912,)",947145210.0,1035.1631 +3,h5py,7GB,no,rechunked-4mb,s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20181120182818_08110112_006_02_rechunked-100k-page-4mb.h5,27.586012840270996,"(46484912,)",286737116.0,1035.1631 +4,h5py,7GB,no,rechunked-8mb,s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20181120182818_08110112_006_02_rechunked-100k-page-8mb.h5,27.63655662536621,"(46484912,)",269539164.0,1035.1631 +5,kerchunk,1GB,no,original,s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20230618223036_13681901_006_01.json,3.2203612327575684,"(9720204,)",," Size: 4B +array(386.06738, dtype=float32)" +6,kerchunk,1GB,no,rechunked-8mb,s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20230618223036_13681901_006_01_rechunked-100k-page-8mb.json,4.984490156173706,"(9720204,)",," Size: 4B +array(386.06738, dtype=float32)" +7,kerchunk,7GB,no,original,s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20181120182818_08110112_006_02.json,14.271384954452515,"(46484912,)",," Size: 4B +array(1035.1631, dtype=float32)" +8,kerchunk,7GB,no,rechunked-8mb,s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20181120182818_08110112_006_02_rechunked-100k-page-8mb.json,8.327512979507446,"(46484912,)",," Size: 4B +array(1035.1631, dtype=float32)" +9,xarray,1GB,no,original,s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20230618223036_13681901_006_01.h5,216.67426800727844,"(9720204,)",3125613354.0," Size: 4B +array(386.06738, dtype=float32)" +10,xarray,1GB,no,page-only-4mb,s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20230618223036_13681901_006_01-page-only-4mb.h5,224.73643493652344,"(9720204,)",3671050532.0," Size: 4B +array(386.06738, dtype=float32)" +11,xarray,1GB,no,page-only-8mb,s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20230618223036_13681901_006_01-page-only-4mb.h5,227.53268146514893,"(9720204,)",3671050532.0," Size: 4B +array(386.06738, dtype=float32)" +12,xarray,1GB,no,rechunked-4mb,s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20230618223036_13681901_006_01_rechunked-100k-page-4mb.h5,15.311556816101074,"(9720204,)",162702540.0," Size: 4B +array(386.06738, dtype=float32)" +13,xarray,1GB,no,rechunked-8mb,s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20230618223036_13681901_006_01_rechunked-100k-page-8mb.h5,13.107250928878784,"(9720204,)",136218854.0," Size: 4B +array(386.06738, dtype=float32)" +14,xarray,7GB,no,original,s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20181120182818_08110112_006_02.h5,281.35928440093994,"(46484912,)",3350528419.0," Size: 4B +array(1035.1631, dtype=float32)" +15,xarray,7GB,no,page-only-4mb,s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20181120182818_08110112_006_02-page-only-4mb.h5,435.6504566669464,"(46484912,)",5526810548.0," Size: 4B +array(1035.1631, dtype=float32)" +16,xarray,7GB,no,page-only-8mb,s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20181120182818_08110112_006_02-page-only-8mb.h5,359.13231015205383,"(46484912,)",5337618697.0," Size: 4B +array(1035.1631, dtype=float32)" +17,xarray,7GB,no,rechunked-4mb,s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20181120182818_08110112_006_02_rechunked-100k-page-4mb.h5,156.73291158676147,"(46484912,)",2064608594.0," Size: 4B +array(1035.1631, dtype=float32)" +18,xarray,7GB,no,rechunked-8mb,s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20181120182818_08110112_006_02_rechunked-100k-page-8mb.h5,41.19576835632324,"(46484912,)",432504529.0," Size: 4B +array(1035.1631, dtype=float32)" +19,h5coro,1GB,no,original,s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20230618223036_13681901_006_01.h5,10.852986097335815,"(9720204,)",,386.06738 +20,h5coro,1GB,no,page-only-4mb,s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20230618223036_13681901_006_01-page-only-4mb.h5,8.163445472717285,"(9720204,)",,386.06738 +21,h5coro,1GB,no,page-only-8mb,s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20230618223036_13681901_006_01-page-only-4mb.h5,6.128530740737915,"(9720204,)",,386.06738 +22,h5coro,1GB,no,rechunked-4mb,s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20230618223036_13681901_006_01_rechunked-100k-page-4mb.h5,19.339251279830933,"(9720204,)",,386.06738 +23,h5coro,1GB,no,rechunked-8mb,s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20230618223036_13681901_006_01_rechunked-100k-page-8mb.h5,10.59383749961853,"(9720204,)",,386.06738 +24,h5coro,7GB,no,original,s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20181120182818_08110112_006_02.h5,24.180256843566895,"(46484912,)",,1035.1631 +25,h5coro,7GB,no,page-only-4mb,s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20181120182818_08110112_006_02-page-only-4mb.h5,51.16006398200989,"(46484912,)",,1035.1631 +26,h5coro,7GB,no,page-only-8mb,s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20181120182818_08110112_006_02-page-only-8mb.h5,23.30572533607483,"(46484912,)",,1035.1631 +27,h5coro,7GB,no,rechunked-4mb,s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20181120182818_08110112_006_02_rechunked-100k-page-4mb.h5,34.204936504364014,"(46484912,)",,1035.1631 +28,h5coro,7GB,no,rechunked-8mb,s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20181120182818_08110112_006_02_rechunked-100k-page-8mb.h5,33.829309940338135,"(46484912,)",,1035.1631 diff --git a/notebooks/byte_ranges.pkl.gz b/notebooks/byte_ranges.pkl.gz new file mode 100644 index 0000000..9991478 Binary files /dev/null and b/notebooks/byte_ranges.pkl.gz differ diff --git a/notebooks/h5logger.py b/notebooks/h5logger.py new file mode 100644 index 0000000..a3e5757 --- /dev/null +++ b/notebooks/h5logger.py @@ -0,0 +1,140 @@ +import re +import numpy as np +import pandas as pd +import logging +import s3fs +import fsspec +import time +import h5py +from datetime import datetime +from uuid import uuid4 + + +def parse_fsspec_log(log_path): + """ + This method only parses fsspec logs that have a FileSize: attached to them. + """ + head_line = re.compile('\s*(read: 0 - \d+)') + fsize_line = re.compile('FileSize: (\d+)') + # range_line = re.compile('\s* read: (?P[0-9]+) - (?P[0-9]+)') + range_line = re.compile('\s* read: (?P[0-9]+) - (?P[0-9]+)(?:\s*,\s*.*?:\s*(?P[0-9]+)\s*hits,\s*(?P[0-9]+)\s*misses)?') + + + + ranges = list() + with open(log_path) as logtxt: + for line in logtxt: + if head_line.match(line): + break + else: + raise RuntimeError('HEAD line not found in the log file') + + for line in logtxt: + match = fsize_line.match(line) + if match: + fsize = int(match.group(1)) + break + else: + raise RuntimeError('FILESIZE line not found in the log file') + + logtxt.seek(0) + for line in logtxt: + match = range_line.match(line) + if match: + start=int(match.group(1)) + end=int(match.group(2)) + hits=match.group(3) + missed=match.group(4) + rsize=end-start+1 + + ranges.append({"start": start, "end": end, "size": rsize, "hits": hits, "missed": missed}) + + df = pd.DataFrame(ranges, columns=['start', 'end', 'size', 'hits', 'missed']) + return df + +def read_file(info): + h5py_fsspec_benchmarks = {} + ranges = None + file_size = None + block_size = None + iteration, dataset, variables, flavor, url, optimized_read, driver, default_io_params, optimized_io_params = info + if url.endswith(".json"): + return {} + io_params = default_io_params + if optimized_read: + if "rechunked" in url or "page" in url: + optimized = "yes" + print(f"Reading: {url} with optimized I/O parameters") + io_params = optimized_io_params + block_size = io_params["fsspec_params"]["block_size"] + else: + # we cannot read the original file with optimized parameters + optimized = "no" + print(f"Reading: {url} with default parameters") + else: + optimized = "no" + print(f"Reading: {url} with default parameters") + cache_type = io_params["fsspec_params"]["cache_type"] + + # this is mostly IO so no perf_counter is needed + start = time.time() + if driver == "fsspec": + fs = s3fs.S3FileSystem(anon=True) + logger = logging.getLogger('fsspec') + logger.setLevel(logging.DEBUG) + file_info = fs.info(url) + file_size = file_info['size'] + file_name = url.split("/")[-1] + current_time = datetime.now() + formatted_time = current_time.strftime(f"%Y-%m-%d_%H-%M-%S-{uuid4()}") + log_filename = f"logs/fsspec-{file_name}-{driver}-{optimized}-{formatted_time}.log" + # Create a new FileHandler for each iteration + file_handler = logging.FileHandler(log_filename) + file_handler.setLevel(logging.DEBUG) + # Add the handler to the root logger + logging.getLogger().addHandler(file_handler) + with fs.open(url, mode="rb", **io_params["fsspec_params"]) as fo: + with h5py.File(fo, **io_params["h5py_params"]) as f: + for variable in variables: + data = f[variable][:] + data_mean = data.mean() + req_bytes = fo.cache.total_requested_bytes + logger.debug(f"FileSize: {file_size}") + logging.getLogger().removeHandler(file_handler) + file_handler.close() + ranges = parse_fsspec_log(log_filename) + else: + cloud_params = { + "mode": "r", + "driver": "ros3", + "aws_region": "us-west-2".encode("utf-8") + } + with h5py.File(url, **io_params["h5py_params"], **cloud_params) as f: + for variable in variables: + data = f[variable][:] + data_mean = data.mean() + req_bytes = None # not available + elapsed = time.time() - start + return { + "benchmark": { + "iteration": iteration, + "library": "h5py", + "driver": driver, + "dataset": dataset, + "optimized-read": optimized, + "format": flavor, + "file": url, + "time": elapsed, + "shape": data.shape, + "bytes_requested": req_bytes, + "mean": data_mean}, + "ranges": { + "file": url, + "optimized-read": optimized, + "cache_type": cache_type, + "block_size": block_size, + "time": time, + "bytes_requested": req_bytes, + "file_size": file_size, + "ranges": ranges} + } diff --git a/notebooks/h5py-atl03-benchmarks.csv b/notebooks/h5py-atl03-benchmarks.csv new file mode 100644 index 0000000..5b8b471 --- /dev/null +++ b/notebooks/h5py-atl03-benchmarks.csv @@ -0,0 +1,21 @@ +,iteration,library,driver,dataset,optimized-read,format,file,time,shape,bytes_requested,mean +0,0,h5py,ros3,ATL03-7GB,no,original,s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20181120182818_08110112_006_02.h5,2007.8612365722656,"(46484912,)",,-66.14486441580091 +1,0,h5py,ros3,ATL03-7GB,no,page-only-4mb,s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20181120182818_08110112_006_02-page-only-4mb.h5,2142.443084716797,"(46484912,)",,-66.14486441580091 +2,0,h5py,ros3,ATL03-7GB,no,page-only-8mb,s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20181120182818_08110112_006_02-page-only-8mb.h5,2124.307473897934,"(46484912,)",,-66.14486441580091 +3,0,h5py,ros3,ATL03-7GB,no,rechunked-4mb,s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20181120182818_08110112_006_02_rechunked-100k-page-4mb.h5,288.2669167518616,"(46484912,)",,-66.14486441580091 +4,0,h5py,ros3,ATL03-7GB,no,rechunked-8mb,s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20181120182818_08110112_006_02_rechunked-100k-page-8mb.h5,274.670058965683,"(46484912,)",,-66.14486441580091 +5,0,h5py,fsspec,ATL03-7GB,no,original,s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20181120182818_08110112_006_02.h5,2062.315348148346,"(46484912,)",0.0,-66.14486441580091 +6,0,h5py,fsspec,ATL03-7GB,no,page-only-4mb,s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20181120182818_08110112_006_02-page-only-4mb.h5,2226.6228652000427,"(46484912,)",0.0,-66.14486441580091 +7,0,h5py,fsspec,ATL03-7GB,no,page-only-8mb,s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20181120182818_08110112_006_02-page-only-8mb.h5,2231.902267932892,"(46484912,)",0.0,-66.14486441580091 +8,0,h5py,fsspec,ATL03-7GB,no,rechunked-4mb,s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20181120182818_08110112_006_02_rechunked-100k-page-4mb.h5,325.2194719314575,"(46484912,)",0.0,-66.14486441580091 +9,0,h5py,fsspec,ATL03-7GB,no,rechunked-8mb,s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20181120182818_08110112_006_02_rechunked-100k-page-8mb.h5,362.87884545326233,"(46484912,)",0.0,-66.14486441580091 +10,0,h5py,ros3,ATL03-7GB,no,original,s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20181120182818_08110112_006_02.h5,1997.6913211345673,"(46484912,)",,-66.14486441580091 +11,0,h5py,ros3,ATL03-7GB,yes,page-only-4mb,s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20181120182818_08110112_006_02-page-only-4mb.h5,87.55571722984314,"(46484912,)",,-66.14486441580091 +12,0,h5py,ros3,ATL03-7GB,yes,page-only-8mb,s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20181120182818_08110112_006_02-page-only-8mb.h5,95.51402950286865,"(46484912,)",,-66.14486441580091 +13,0,h5py,ros3,ATL03-7GB,yes,rechunked-4mb,s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20181120182818_08110112_006_02_rechunked-100k-page-4mb.h5,81.75408124923706,"(46484912,)",,-66.14486441580091 +14,0,h5py,ros3,ATL03-7GB,yes,rechunked-8mb,s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20181120182818_08110112_006_02_rechunked-100k-page-8mb.h5,75.82156276702881,"(46484912,)",,-66.14486441580091 +15,0,h5py,fsspec,ATL03-7GB,no,original,s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20181120182818_08110112_006_02.h5,2091.811399459839,"(46484912,)",0.0,-66.14486441580091 +16,0,h5py,fsspec,ATL03-7GB,yes,page-only-4mb,s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20181120182818_08110112_006_02-page-only-4mb.h5,159.82449340820312,"(46484912,)",771751936.0,-66.14486441580091 +17,0,h5py,fsspec,ATL03-7GB,yes,page-only-8mb,s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20181120182818_08110112_006_02-page-only-8mb.h5,113.62834787368774,"(46484912,)",754974720.0,-66.14486441580091 +18,0,h5py,fsspec,ATL03-7GB,yes,rechunked-4mb,s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20181120182818_08110112_006_02_rechunked-100k-page-4mb.h5,171.39278054237366,"(46484912,)",654311424.0,-66.14486441580091 +19,0,h5py,fsspec,ATL03-7GB,yes,rechunked-8mb,s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20181120182818_08110112_006_02_rechunked-100k-page-8mb.h5,169.21530079841614,"(46484912,)",645922816.0,-66.14486441580091 diff --git a/notebooks/h5py-atl03.ipynb b/notebooks/h5py-atl03.ipynb new file mode 100644 index 0000000..ffce00d --- /dev/null +++ b/notebooks/h5py-atl03.ipynb @@ -0,0 +1,387 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "12725ef5-255d-4b78-b4db-c27717db0d25", + "metadata": {}, + "source": [ + "# Testing access times to cloud optimized HDF5 files with the fsspec and ROS3 drivers.\n", + "\n", + "This notebook tests both I/O drivers on cloud optimized HDF5 files from the ICESat-2 mission. \n", + "\n", + "> Note: The ROS3 driver is only available in the Conda distribution of h5py" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "3ac69e2f-87bc-4253-acab-54e2b0fa0348", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "h5py v3.11.0\n", + "fsspec v2024.9.0\n" + ] + } + ], + "source": [ + "import fsspec\n", + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "import h5py\n", + "\n", + "from dask.distributed import Client, LocalCluster\n", + "import dask.bag as db\n", + "from dask.diagnostics import ProgressBar\n", + "\n", + "from h5logger import parse_fsspec_log, read_file\n", + "\n", + "\n", + "for library in (h5py, fsspec):\n", + " print(f'{library.__name__} v{library.__version__}')" + ] + }, + { + "cell_type": "markdown", + "id": "94d6203f-2af2-4c64-9b4f-2354c480bd4c", + "metadata": {}, + "source": [ + "The folowing dictionary is generic enough that we can use it for different datasets, we only require file URLS and the variables we want to read from them using h5py. \n", + "The tests take for granted that the original file has no cloud optimizations and can not be read using cloud optimized patterns, the next check is to verify if the keywords \"paged\" or \"rechunked\" are present in the file name, it's presumed to be cloud optimized. \n", + "\n", + "This notebook uses dask to speed up the testing, we issue requests to each file at the same time, first looping using default parameters, this is to learn what happens when we access the different flavors without knowing that some are cloud optimized. Then we use optimized I/O parameters, we do the same for both fsspec and the HDF5 native driver ROS3. " + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "87720bcc-7764-4c01-87cb-81d3eb1aa1b2", + "metadata": {}, + "outputs": [], + "source": [ + "test_dict = {\n", + " \"ATL03-7GB\": {\n", + " \"files\": {\n", + " \"original\": \"s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20181120182818_08110112_006_02.h5\",\n", + " \"page-only-4mb\": \"s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20181120182818_08110112_006_02-page-only-4mb.h5\",\n", + " \"page-only-8mb\": \"s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20181120182818_08110112_006_02-page-only-8mb.h5\",\n", + " \"rechunked-4mb\": \"s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20181120182818_08110112_006_02_rechunked-100k-page-4mb.h5\",\n", + " \"rechunked-8mb\": \"s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20181120182818_08110112_006_02_rechunked-100k-page-8mb.h5\",\n", + " },\n", + " \"variables\": [\"/gt1l/heights/h_ph\", \"/gt1l/heights/lat_ph\", \"/gt1l/heights/lon_ph\"]\n", + " },\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "410ddda8-8182-4cf5-a825-09f8ba7021c6", + "metadata": {}, + "outputs": [], + "source": [ + "# If there is a dask_client cluster let's not create new ones.\n", + "if \"dask_client\" not in locals():\n", + " cluster = LocalCluster(threads_per_worker=1)\n", + " dask_client = Client(cluster)\n", + " dask_client" + ] + }, + { + "cell_type": "markdown", + "id": "823228b5-6700-4abb-8c28-4f8e69d76431", + "metadata": {}, + "source": [ + "The importance of caching and over-reads with remote files\n", + "\n", + "* **simple**: Caches entire files on disk.\n", + "* **blockcache**: Caches file data in chunks (blocks) on memory.\n", + "* **bytes**: Caches entire files in memory.\n", + "* **none**: Does not use caching on any request" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "3c5225bb-361b-4146-ad01-eff90b762d9f", + "metadata": {}, + "outputs": [], + "source": [ + "num_runs = 1\n", + "benchmarks = []\n", + "ranges = []\n", + "\n", + "#the real default is readahead with 5MB of block sizes, we disabled to test real times without caching anything\n", + "default_io_params = {\n", + " \"fsspec_params\": {\n", + " \"skip_instance_cache\": True,\n", + " \"cache_type\": \"none\"\n", + " # \"cache_type\": \"first\", # could be first, or cachiing the entier file with simple, \n", + " # \"block_size\": 4*1024*1024\n", + " },\n", + " \"h5py_params\": {}\n", + "}\n", + "\n", + "# we can fine-tune these\n", + "optimized_io_params ={\n", + " \"fsspec_params\": {\n", + " \"cache_type\": \"blockcache\", # could be first, or cachiing the entier file with simple, \n", + " \"block_size\": 8*1024*1024\n", + " },\n", + " \"h5py_params\" : {\n", + " \"page_buf_size\": 16*1024*1024,\n", + " \"rdcc_nbytes\": 4*1024*1024\n", + " }\n", + "}\n", + "\n", + "for optimized_read in [False, True]:\n", + " for driver in [\"ros3\", \"fsspec\"]:\n", + " for run in range(num_runs): # Running N times\n", + " for dataset_name, dataset_item in test_dict.items():\n", + " # Inner loop (parallelized)\n", + " urls = dataset_item[\"files\"].items() \n", + " benchmark_list = [(run, dataset_name, dataset_item[\"variables\"], flavor, url, optimized_read, driver, default_io_params, optimized_io_params) for flavor, url in urls]\n", + " bag = db.from_sequence(benchmark_list, npartitions=len(benchmark_list))\n", + " result = bag.map(read_file)\n", + " with ProgressBar():\n", + " results = result.compute()\n", + " for result in results:\n", + " if len(result[\"benchmark\"]):\n", + " benchmarks.append(result[\"benchmark\"])\n", + " # For now we can only log I/O with fsspec\n", + " if result[\"benchmark\"][\"driver\"] == \"fsspec\":\n", + " ranges.append(result[\"ranges\"])\n", + " \n", + "df = pd.DataFrame.from_dict(benchmarks)" + ] + }, + { + "cell_type": "markdown", + "id": "c9c48844-8ba6-456f-b6a7-05ad782ea07c", + "metadata": {}, + "source": [ + "Now that we have collected the information we need we are going to plot how the drivers and the parameters performed.\n", + "The \"baseline\" is what HDF5 and fsspec do when they don't use cloud optimized parameters on a non-optimized file. Here is when we see the worst performance due te many small serial request to cloud storage. Presumably, the best case would be when we use optimized I/O that aligns to the scheme used for a cloud optimized file. E.G. if a file was optimized using paged aggregation and page sizes of 4MB, the best performance should be when we tell the I/O driver that we should read 4MB at the time. " + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "96f782cf-c2da-4523-ac19-6cef6b865579", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "plt.figure(figsize=(10, 6)) \n", + "plt.style.use('ggplot')\n", + "\n", + "x_max = max(df[\"time\"])\n", + "pivot_df = df.pivot_table(index=['driver', 'optimized-read'], columns=['format', ], values='time', aggfunc='mean')\n", + "baseline_original = pivot_df['original'].max()\n", + "\n", + "# Plotting\n", + "pivot_df.plot(kind='barh', figsize=(20, 8), fontsize=14, width=0.5)\n", + "\n", + "plt.xlim(0, x_max)\n", + "\n", + "plt.suptitle('Cloud-optimized HDF5 performance (less is better)', fontsize=18)\n", + "# plt.title(\"Default I/O parameters (ATL03_20181120182818_08110112_006_02.h5: 7GB)\", fontsize=14)\n", + "plt.xlabel('Mean Time (S)')\n", + "plt.ylabel('Access Pattern', fontsize=16)\n", + "plt.xticks(rotation=0)\n", + "plt.legend(title='Format', fontsize=14, loc='upper right', bbox_to_anchor=(1.15, 1.015))\n", + "plt.grid(False)\n", + "\n", + "plt.axvline(x=baseline_original, color='red', linestyle='--', linewidth=2, label=f\"Baseline: {baseline_original:.2f}\")\n", + "\n", + "plt.tight_layout()\n", + "plt.savefig(\"h5py-default.png\", transparent=True, dpi=150)\n", + "\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "c39fb04d-f35d-43e6-a506-26b5d7edc0c5", + "metadata": {}, + "outputs": [], + "source": [ + "df.to_csv(\"h5py-atl03-benchmarks.csv\")" + ] + }, + { + "cell_type": "markdown", + "id": "4b3fbc1c-25de-49a8-b58d-5f845cb7f53b", + "metadata": {}, + "source": [ + "The following cell is experimental, it plots the access pattern signature and the reads on a remote HDF5 file, optimized or not, we can only record the info using fsspec for now as ROS3 logging requires to compile h5py from scratch using custom flags. " + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "2fb9f951-6a83-4e09-b3e0-0c00f67eea73", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "from matplotlib.lines import Line2D\n", + "import matplotlib.patches as patches\n", + "import numpy as np\n", + "\n", + "fig, axs = plt.subplots(ncols=1, nrows=len(ranges), figsize=(18, 18), sharex=True)\n", + "\n", + "for index, range_stats in enumerate(ranges):\n", + " rdf = range_stats[\"ranges\"]\n", + " file_size = range_stats[\"file_size\"]\n", + "\n", + " bins = [0, 1 * 1024, 10 * 1024, 100 * 1024, np.inf]\n", + " colors = ['red', 'orange', 'purple', 'blue']\n", + " labels = ['< 1KB', '1KB - 10KB', '10KB - 100KB', '> 100KB']\n", + " rdf['color'] = pd.cut(rdf['size'], bins=bins, labels=colors)\n", + " rdf['label'] = pd.cut(rdf['size'], bins=bins, labels=labels)\n", + "\n", + " for i, row in rdf.iterrows():\n", + " rect = patches.Rectangle((row['start'], 0), row['end']-row['start'], 1, \n", + " linewidth=1, edgecolor=row['color'], facecolor=row['color'], alpha=0.3)\n", + " axs[index].add_patch(rect)\n", + "\n", + " axs[index].set_xlim(0, 1.1e9)\n", + " axs[index].set_ylim(0, 1)\n", + " axs[index].set_yticklabels(\"\")\n", + " axs[index].set_yticks([])\n", + " xticks = [\n", + " 1024*1024,\n", + " 10*1024*1024,\n", + " 100*1024*1024,\n", + " 1024*1024*1024,\n", + " 10*1024*1024*1024,\n", + " ]\n", + " xtick_labels = [\n", + " '1 MB',\n", + " '10 MB',\n", + " '100 MB',\n", + " '1 GB',\n", + " '10GB'\n", + " ]\n", + " axs[index].set_xticks(xticks)\n", + " axs[index].set_xticklabels(xtick_labels)\n", + "\n", + "# The last axis will retain the x-ticks\n", + "axs[-1].tick_params(axis='x', which='both', bottom=True, labelbottom=True)\n", + "\n", + "# Create custom legend handles\n", + "legend_elements = [Line2D([0], [0], color=color, lw=2, label=label) for color, label in zip(colors, labels)]\n", + "# plt.legend(handles=legend_elements, title=\"Request Size\", loc='upper right')\n", + "\n", + "handles, labels = axs[0].get_legend_handles_labels()\n", + "fig.legend(handles=legend_elements, loc='upper right')\n", + "\n", + "# plt.suptitle(f'ATL06 Read Pattern. File Size: {round(file_size/1e6,2)} MB, Total Requests:{len(rdf)}, Requests <10kb: {len(rdf[rdf[\"size\"]<10000])}', fontsize=18)\n", + "plt.tight_layout()\n", + "\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1441c37f-60f6-42da-8c4f-a3399331d5e8", + "metadata": {}, + "outputs": [], + "source": [ + "# import holoviews as hv\n", + "# from holoviews.operation.datashader import rasterize\n", + "# import datashader as ds\n", + "# hv.extension(\"bokeh\")\n", + "\n", + "# xticks = [\n", + "# (1024*1024, '1MB'),\n", + "# (10*1024*1024, '10MB'),\n", + "# (100*1024*1024, '100MB'),\n", + "# (1024*1024*1024, '1GB'),\n", + "# (1024*1024*1024*10, '10GB')\n", + "# ]\n", + "\n", + "# # Function to create Rectangles\n", + "# def get_rectangles(ranges):\n", + "# rectangles = hv.Rectangles([]) # Start with an empty set of rectangles\n", + "# for i, row in ranges[0][\"ranges\"].iterrows():\n", + "# rect = (row['start'], 0, row['end'], 1) # Define rectangle bounds\n", + "# rectangles = rectangles * hv.Rectangles([rect]).opts(\n", + "# color=row['color'], \n", + "# line_color=row['color'], \n", + "# line_width=1\n", + "# )\n", + "# return rectangles\n", + "\n", + "# # Create an overlay of all rectangles\n", + "# rectangles = get_rectangles(ranges)\n", + "\n", + "# # Rasterize the plot using Datashader\n", + "# rasterized_rectangles = rasterize(rectangles, width=1200, height=300)\n", + "\n", + "# # Customize the plot with xticks and limits\n", + "# rasterized_rectangles.opts(\n", + "# xlabel='File Offset', ylabel='', xticks=xticks, \n", + "# xlim=(0, file_size), ylim=(0, 1), \n", + "# show_legend=True, legend_position='top_right'\n", + "# )\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.10" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebooks/h5py-atl06.ipynb b/notebooks/h5py-atl06.ipynb new file mode 100644 index 0000000..d17fdc8 --- /dev/null +++ b/notebooks/h5py-atl06.ipynb @@ -0,0 +1,312 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "12725ef5-255d-4b78-b4db-c27717db0d25", + "metadata": {}, + "source": [ + "# Testing ROS3 and fsspec with h5py on cloud optimized HDF5 files \n", + "\n", + "This notebook tests both I/O drivers on cloud optimized HDF5 files from the ICESat-2 mission. \n", + "\n", + "> Note: The ROS3 driver is only available in the Conda distribution of h5py" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3ac69e2f-87bc-4253-acab-54e2b0fa0348", + "metadata": {}, + "outputs": [], + "source": [ + "import fsspec\n", + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "import h5py\n", + "\n", + "from dask.distributed import Client, LocalCluster\n", + "import dask.bag as db\n", + "from dask.diagnostics import ProgressBar\n", + "\n", + "from h5logger import parse_fsspec_log, read_file\n", + "\n", + "\n", + "for library in (h5py, fsspec):\n", + " print(f'{library.__name__} v{library.__version__}')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "87720bcc-7764-4c01-87cb-81d3eb1aa1b2", + "metadata": {}, + "outputs": [], + "source": [ + "test_dict = {\n", + " \"ATL06\": {\n", + " \"files\": {\n", + " \"original\": \"s3://its-live-data/test-space/cloud-experiments/h5cloud/atl06/ATL06_20200811143458_07210811_006_01.h5\",\n", + " \"page-only-8mb\": \"s3://its-live-data/test-space/cloud-experiments/h5cloud/atl06/ATL06_20200811143458_07210811_006_01_page_8mb.h5\",\n", + " \"rechunked-2mb\": \"s3://its-live-data/test-space/cloud-experiments/h5cloud/atl06/ATL06_20200811143458_07210811_006_01_rechunked-2mb-repacked.h5\",\n", + " \"rechunked-4mb\": \"s3://its-live-data/test-space/cloud-experiments/h5cloud/atl06/ATL06_20200811143458_07210811_006_01_rechunked-4mb-repacked.h5\",\n", + " \"rechunked-8mb\": \"s3://its-live-data/test-space/cloud-experiments/h5cloud/atl06/ATL06_20200811143458_07210811_006_01_rechunked-4mb-repacked.h5\",\n", + " },\n", + " \"variables\": [\"/gt1l/land_ice_segments/h_li\", \"/gt1l/land_ice_segments/latitude\", \"/gt1l/land_ice_segments/longitude\"]\n", + " }\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "410ddda8-8182-4cf5-a825-09f8ba7021c6", + "metadata": {}, + "outputs": [], + "source": [ + "if \"dask_client\" not in locals():\n", + " cluster = LocalCluster(threads_per_worker=1)\n", + " dask_client = Client(cluster)\n", + " dask_client" + ] + }, + { + "cell_type": "markdown", + "id": "823228b5-6700-4abb-8c28-4f8e69d76431", + "metadata": {}, + "source": [ + "The importance of caching and over-reads with remote files\n", + "\n", + "* **simple**: Caches entire files on disk.\n", + "* **blockcache**: Caches file data in chunks (blocks) on disk.\n", + "* **bytes**: Caches entire files in memory.\n", + "* **none**: Does not use caching on any request" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3c5225bb-361b-4146-ad01-eff90b762d9f", + "metadata": {}, + "outputs": [], + "source": [ + "num_runs = 1\n", + "benchmarks = []\n", + "ranges = []\n", + "\n", + "#the real default is readahead with 5MB of block sizes, we disabled to test real times without caching anything\n", + "default_io_params = {\n", + " \"fsspec_params\": {\n", + " \"skip_instance_cache\": True,\n", + " \"cache_type\": \"none\"\n", + " # \"cache_type\": \"first\", # could be first, or cachiing the entier file with simple, \n", + " # \"block_size\": 4*1024*1024\n", + " },\n", + " \"h5py_params\": {}\n", + "}\n", + "\n", + "# we can fine-tune these\n", + "optimized_io_params ={\n", + " \"fsspec_params\": {\n", + " \"cache_type\": \"blockcache\", # could be first, or cachiing the entier file with simple, \n", + " \"block_size\": 8*1024*1024\n", + " },\n", + " \"h5py_params\" : {\n", + " \"page_buf_size\": 16*1024*1024,\n", + " \"rdcc_nbytes\": 4*1024*1024\n", + " }\n", + "}\n", + "\n", + "for optimized_read in [False, True]:\n", + " for driver in [\"ros3\", \"fsspec\"]:\n", + " for run in range(num_runs): # Running N times\n", + " for dataset_name, dataset_item in test_dict.items():\n", + " # Inner loop (parallelized)\n", + " urls = dataset_item[\"files\"].items() \n", + " benchmark_list = [(run, dataset_name, dataset_item[\"variables\"], flavor, url, optimized_read, driver, default_io_params, optimized_io_params) for flavor, url in urls]\n", + " bag = db.from_sequence(benchmark_list, npartitions=len(benchmark_list))\n", + " result = bag.map(read_file)\n", + " with ProgressBar():\n", + " results = result.compute()\n", + " for result in results:\n", + " if len(result[\"benchmark\"]):\n", + " benchmarks.append(result[\"benchmark\"])\n", + " # For now we can only log I/O with fsspec\n", + " if result[\"benchmark\"][\"driver\"] == \"fsspec\":\n", + " ranges.append(result[\"ranges\"])\n", + " \n", + "df = pd.DataFrame.from_dict(benchmarks)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c672ed19-0733-4ebd-8863-61b110b855fb", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "96f782cf-c2da-4523-ac19-6cef6b865579", + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure(figsize=(10, 6)) \n", + "plt.style.use('ggplot')\n", + "\n", + "x_max = max(df[\"time\"])\n", + "pivot_df = df.pivot_table(index=['driver', 'optimized-read'], columns=['format', ], values='time', aggfunc='mean')\n", + "baseline_original = pivot_df['original'].max()\n", + "\n", + "# Plotting\n", + "pivot_df.plot(kind='barh', figsize=(20, 8), fontsize=14, width=0.5)\n", + "\n", + "plt.xlim(0, x_max)\n", + "\n", + "plt.suptitle('Cloud-optimized HDF5 performance (less is better)', fontsize=18)\n", + "# plt.title(\"Default I/O parameters (ATL03_20181120182818_08110112_006_02.h5: 7GB)\", fontsize=14)\n", + "plt.xlabel('Mean Time (S)')\n", + "plt.ylabel('Access Pattern', fontsize=16)\n", + "plt.xticks(rotation=0)\n", + "plt.legend(title='Format', fontsize=14, loc='upper right', bbox_to_anchor=(1.15, 1.015))\n", + "plt.grid(False)\n", + "\n", + "plt.axvline(x=baseline_original, color='red', linestyle='--', linewidth=2, label=f\"Baseline: {baseline_original:.2f}\")\n", + "\n", + "plt.tight_layout()\n", + "plt.savefig(\"stats-default.png\", transparent=True, dpi=150)\n", + "\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c39fb04d-f35d-43e6-a506-26b5d7edc0c5", + "metadata": {}, + "outputs": [], + "source": [ + "df.to_csv(\"h5py-benchmarks.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "345d2b51-10d0-4db9-9810-38a6b13b19d8", + "metadata": {}, + "outputs": [], + "source": [ + "ranges[0]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2fb9f951-6a83-4e09-b3e0-0c00f67eea73", + "metadata": {}, + "outputs": [], + "source": [ + "from matplotlib.lines import Line2D\n", + "import matplotlib.patches as patches\n", + "import numpy as np\n", + "\n", + "fig, axs = plt.subplots(ncols=1, nrows=len(ranges), figsize=(18, 18), sharex=True)\n", + "\n", + "for index, range_stats in enumerate(ranges):\n", + " rdf = range_stats[\"ranges\"]\n", + " file_size = range_stats[\"file_size\"]\n", + "\n", + " bins = [0, 1 * 1024, 10 * 1024, 100 * 1024, np.inf]\n", + " colors = ['red', 'orange', 'purple', 'blue']\n", + " labels = ['< 1KB', '1KB - 10KB', '10KB - 100KB', '> 100KB']\n", + " rdf['color'] = pd.cut(rdf['size'], bins=bins, labels=colors)\n", + " rdf['label'] = pd.cut(rdf['size'], bins=bins, labels=labels)\n", + "\n", + " for i, row in rdf.iterrows():\n", + " rect = patches.Rectangle((row['start'], 0), row['end']-row['start'], 1, \n", + " linewidth=1, edgecolor=row['color'], facecolor=row['color'], alpha=0.3)\n", + " axs[index].add_patch(rect)\n", + "\n", + " axs[index].set_xlim(0, 1.1e8)\n", + " axs[index].set_ylim(0, 1) \n", + " axs[index].set_yticklabels(\"\")\n", + " axs[index].set_yticks([])\n", + "\n", + "\n", + "# The last axis will retain the x-ticks\n", + "axs[-1].tick_params(axis='x', which='both', bottom=True, labelbottom=True)\n", + "\n", + "# Create custom legend handles\n", + "legend_elements = [Line2D([0], [0], color=color, lw=2, label=label) for color, label in zip(colors, labels)]\n", + "# plt.legend(handles=legend_elements, title=\"Request Size\", loc='upper right')\n", + "\n", + "handles, labels = axs[0].get_legend_handles_labels()\n", + "fig.legend(handles=legend_elements, loc='upper right')\n", + "\n", + "plt.suptitle(f'ATL06 Read Pattern. File Size: {round(file_size/1e6,2)} MB, Total Requests:{len(rdf)}, Requests <10kb: {len(rdf[rdf[\"size\"]<10000])}', fontsize=18)\n", + "plt.tight_layout()\n", + "\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1441c37f-60f6-42da-8c4f-a3399331d5e8", + "metadata": {}, + "outputs": [], + "source": [ + "# import holoviews as hv\n", + "# hv.extension(\"bokeh\")\n", + "\n", + "# xticks = [\n", + "# (1024, '1KB'),\n", + "# (1024*1024, '1MB'),\n", + "# (10*1024*1024, '10MB'),\n", + "# (100*1024*1024, '100MB'),\n", + "# (1024*1024*1024, '1GB')\n", + "# ]\n", + "\n", + "# rectangles = hv.Overlay()\n", + "\n", + "# for index, row in rdf.iterrows():\n", + "# # Create a rectangle for each row\n", + "# rect = hv.Rectangles((row['start'], 0, row['end'], 1), label=row['label']).opts(\n", + "# color=row['color'],\n", + "# line_color=row['color'],\n", + "# line_width=1,\n", + "# alpha=0.7 # Optional: Set transparency for better visibility\n", + "# )\n", + "# rectangles *= rect # Overlay the rectangle on top of the previous ones\n", + "\n", + "# # Customize and display the plot\n", + "# rectangles.opts(\n", + "# width=1200, height=300, xlim=(0, file_size), ylim=(0, 1),\n", + "# xlabel='File Offset', ylabel='', xticks=xticks, show_legend=True, legend_position='top_right'\n", + "# )" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.10" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebooks/h5py-benchmarks.csv b/notebooks/h5py-benchmarks.csv new file mode 100644 index 0000000..a534636 --- /dev/null +++ b/notebooks/h5py-benchmarks.csv @@ -0,0 +1,61 @@ +,iteration,library,driver,dataset,optimized-read,format,file,time,shape,bytes_requested,mean +0,0,h5py,ros3,ATL06,no,original,s3://its-live-data/test-space/cloud-experiments/h5cloud/atl06/ATL06_20200811143458_07210811_006_01.h5,8.009551525115967,"(120035,)",,-12.659630361199927 +1,0,h5py,ros3,ATL06,no,page-only-8mb,s3://its-live-data/test-space/cloud-experiments/h5cloud/atl06/ATL06_20200811143458_07210811_006_01_page_8mb.h5,3.4555869102478027,"(120035,)",,-12.659630361199927 +2,0,h5py,ros3,ATL06,no,rechunked-2mb,s3://its-live-data/test-space/cloud-experiments/h5cloud/atl06/ATL06_20200811143458_07210811_006_01_rechunked-2mb-repacked.h5,6.641781330108643,"(120035,)",,-12.659630361199927 +3,0,h5py,ros3,ATL06,no,rechunked-4mb,s3://its-live-data/test-space/cloud-experiments/h5cloud/atl06/ATL06_20200811143458_07210811_006_01_rechunked-4mb-repacked.h5,10.559669017791748,"(120035,)",,-12.659630361199927 +4,0,h5py,ros3,ATL06,no,rechunked-8mb,s3://its-live-data/test-space/cloud-experiments/h5cloud/atl06/ATL06_20200811143458_07210811_006_01_rechunked-4mb-repacked.h5,11.996437788009644,"(120035,)",,-12.659630361199927 +5,1,h5py,ros3,ATL06,no,original,s3://its-live-data/test-space/cloud-experiments/h5cloud/atl06/ATL06_20200811143458_07210811_006_01.h5,6.271223068237305,"(120035,)",,-12.659630361199927 +6,1,h5py,ros3,ATL06,no,page-only-8mb,s3://its-live-data/test-space/cloud-experiments/h5cloud/atl06/ATL06_20200811143458_07210811_006_01_page_8mb.h5,4.8463568687438965,"(120035,)",,-12.659630361199927 +7,1,h5py,ros3,ATL06,no,rechunked-2mb,s3://its-live-data/test-space/cloud-experiments/h5cloud/atl06/ATL06_20200811143458_07210811_006_01_rechunked-2mb-repacked.h5,8.921051025390625,"(120035,)",,-12.659630361199927 +8,1,h5py,ros3,ATL06,no,rechunked-4mb,s3://its-live-data/test-space/cloud-experiments/h5cloud/atl06/ATL06_20200811143458_07210811_006_01_rechunked-4mb-repacked.h5,5.880865573883057,"(120035,)",,-12.659630361199927 +9,1,h5py,ros3,ATL06,no,rechunked-8mb,s3://its-live-data/test-space/cloud-experiments/h5cloud/atl06/ATL06_20200811143458_07210811_006_01_rechunked-4mb-repacked.h5,3.2046899795532227,"(120035,)",,-12.659630361199927 +10,2,h5py,ros3,ATL06,no,original,s3://its-live-data/test-space/cloud-experiments/h5cloud/atl06/ATL06_20200811143458_07210811_006_01.h5,6.800616264343262,"(120035,)",,-12.659630361199927 +11,2,h5py,ros3,ATL06,no,page-only-8mb,s3://its-live-data/test-space/cloud-experiments/h5cloud/atl06/ATL06_20200811143458_07210811_006_01_page_8mb.h5,4.855101585388184,"(120035,)",,-12.659630361199927 +12,2,h5py,ros3,ATL06,no,rechunked-2mb,s3://its-live-data/test-space/cloud-experiments/h5cloud/atl06/ATL06_20200811143458_07210811_006_01_rechunked-2mb-repacked.h5,4.025922060012817,"(120035,)",,-12.659630361199927 +13,2,h5py,ros3,ATL06,no,rechunked-4mb,s3://its-live-data/test-space/cloud-experiments/h5cloud/atl06/ATL06_20200811143458_07210811_006_01_rechunked-4mb-repacked.h5,5.30383563041687,"(120035,)",,-12.659630361199927 +14,2,h5py,ros3,ATL06,no,rechunked-8mb,s3://its-live-data/test-space/cloud-experiments/h5cloud/atl06/ATL06_20200811143458_07210811_006_01_rechunked-4mb-repacked.h5,3.963735580444336,"(120035,)",,-12.659630361199927 +15,0,h5py,fsspec,ATL06,no,original,s3://its-live-data/test-space/cloud-experiments/h5cloud/atl06/ATL06_20200811143458_07210811_006_01.h5,9.075090408325195,"(120035,)",0.0,-12.659630361199927 +16,0,h5py,fsspec,ATL06,no,page-only-8mb,s3://its-live-data/test-space/cloud-experiments/h5cloud/atl06/ATL06_20200811143458_07210811_006_01_page_8mb.h5,11.841322183609009,"(120035,)",0.0,-12.659630361199927 +17,0,h5py,fsspec,ATL06,no,rechunked-2mb,s3://its-live-data/test-space/cloud-experiments/h5cloud/atl06/ATL06_20200811143458_07210811_006_01_rechunked-2mb-repacked.h5,5.047335863113403,"(120035,)",0.0,-12.659630361199927 +18,0,h5py,fsspec,ATL06,no,rechunked-4mb,s3://its-live-data/test-space/cloud-experiments/h5cloud/atl06/ATL06_20200811143458_07210811_006_01_rechunked-4mb-repacked.h5,5.137251138687134,"(120035,)",0.0,-12.659630361199927 +19,0,h5py,fsspec,ATL06,no,rechunked-8mb,s3://its-live-data/test-space/cloud-experiments/h5cloud/atl06/ATL06_20200811143458_07210811_006_01_rechunked-4mb-repacked.h5,5.244110345840454,"(120035,)",0.0,-12.659630361199927 +20,1,h5py,fsspec,ATL06,no,original,s3://its-live-data/test-space/cloud-experiments/h5cloud/atl06/ATL06_20200811143458_07210811_006_01.h5,8.759339094161987,"(120035,)",0.0,-12.659630361199927 +21,1,h5py,fsspec,ATL06,no,page-only-8mb,s3://its-live-data/test-space/cloud-experiments/h5cloud/atl06/ATL06_20200811143458_07210811_006_01_page_8mb.h5,11.117734670639038,"(120035,)",0.0,-12.659630361199927 +22,1,h5py,fsspec,ATL06,no,rechunked-2mb,s3://its-live-data/test-space/cloud-experiments/h5cloud/atl06/ATL06_20200811143458_07210811_006_01_rechunked-2mb-repacked.h5,5.022376298904419,"(120035,)",0.0,-12.659630361199927 +23,1,h5py,fsspec,ATL06,no,rechunked-4mb,s3://its-live-data/test-space/cloud-experiments/h5cloud/atl06/ATL06_20200811143458_07210811_006_01_rechunked-4mb-repacked.h5,4.962268829345703,"(120035,)",0.0,-12.659630361199927 +24,1,h5py,fsspec,ATL06,no,rechunked-8mb,s3://its-live-data/test-space/cloud-experiments/h5cloud/atl06/ATL06_20200811143458_07210811_006_01_rechunked-4mb-repacked.h5,5.486270189285278,"(120035,)",0.0,-12.659630361199927 +25,2,h5py,fsspec,ATL06,no,original,s3://its-live-data/test-space/cloud-experiments/h5cloud/atl06/ATL06_20200811143458_07210811_006_01.h5,9.126240730285645,"(120035,)",0.0,-12.659630361199927 +26,2,h5py,fsspec,ATL06,no,page-only-8mb,s3://its-live-data/test-space/cloud-experiments/h5cloud/atl06/ATL06_20200811143458_07210811_006_01_page_8mb.h5,10.805852890014648,"(120035,)",0.0,-12.659630361199927 +27,2,h5py,fsspec,ATL06,no,rechunked-2mb,s3://its-live-data/test-space/cloud-experiments/h5cloud/atl06/ATL06_20200811143458_07210811_006_01_rechunked-2mb-repacked.h5,5.048081636428833,"(120035,)",0.0,-12.659630361199927 +28,2,h5py,fsspec,ATL06,no,rechunked-4mb,s3://its-live-data/test-space/cloud-experiments/h5cloud/atl06/ATL06_20200811143458_07210811_006_01_rechunked-4mb-repacked.h5,4.96171498298645,"(120035,)",0.0,-12.659630361199927 +29,2,h5py,fsspec,ATL06,no,rechunked-8mb,s3://its-live-data/test-space/cloud-experiments/h5cloud/atl06/ATL06_20200811143458_07210811_006_01_rechunked-4mb-repacked.h5,4.986459255218506,"(120035,)",0.0,-12.659630361199927 +30,0,h5py,ros3,ATL06,no,original,s3://its-live-data/test-space/cloud-experiments/h5cloud/atl06/ATL06_20200811143458_07210811_006_01.h5,10.310771942138672,"(120035,)",,-12.659630361199927 +31,0,h5py,ros3,ATL06,yes,page-only-8mb,s3://its-live-data/test-space/cloud-experiments/h5cloud/atl06/ATL06_20200811143458_07210811_006_01_page_8mb.h5,6.174020051956177,"(120035,)",,-12.659630361199927 +32,0,h5py,ros3,ATL06,yes,rechunked-2mb,s3://its-live-data/test-space/cloud-experiments/h5cloud/atl06/ATL06_20200811143458_07210811_006_01_rechunked-2mb-repacked.h5,6.522758960723877,"(120035,)",,-12.659630361199927 +33,0,h5py,ros3,ATL06,yes,rechunked-4mb,s3://its-live-data/test-space/cloud-experiments/h5cloud/atl06/ATL06_20200811143458_07210811_006_01_rechunked-4mb-repacked.h5,4.112564325332642,"(120035,)",,-12.659630361199927 +34,0,h5py,ros3,ATL06,yes,rechunked-8mb,s3://its-live-data/test-space/cloud-experiments/h5cloud/atl06/ATL06_20200811143458_07210811_006_01_rechunked-4mb-repacked.h5,4.797414779663086,"(120035,)",,-12.659630361199927 +35,1,h5py,ros3,ATL06,no,original,s3://its-live-data/test-space/cloud-experiments/h5cloud/atl06/ATL06_20200811143458_07210811_006_01.h5,10.215899229049683,"(120035,)",,-12.659630361199927 +36,1,h5py,ros3,ATL06,yes,page-only-8mb,s3://its-live-data/test-space/cloud-experiments/h5cloud/atl06/ATL06_20200811143458_07210811_006_01_page_8mb.h5,2.9037656784057617,"(120035,)",,-12.659630361199927 +37,1,h5py,ros3,ATL06,yes,rechunked-2mb,s3://its-live-data/test-space/cloud-experiments/h5cloud/atl06/ATL06_20200811143458_07210811_006_01_rechunked-2mb-repacked.h5,5.924455881118774,"(120035,)",,-12.659630361199927 +38,1,h5py,ros3,ATL06,yes,rechunked-4mb,s3://its-live-data/test-space/cloud-experiments/h5cloud/atl06/ATL06_20200811143458_07210811_006_01_rechunked-4mb-repacked.h5,6.5820512771606445,"(120035,)",,-12.659630361199927 +39,1,h5py,ros3,ATL06,yes,rechunked-8mb,s3://its-live-data/test-space/cloud-experiments/h5cloud/atl06/ATL06_20200811143458_07210811_006_01_rechunked-4mb-repacked.h5,8.97324538230896,"(120035,)",,-12.659630361199927 +40,2,h5py,ros3,ATL06,no,original,s3://its-live-data/test-space/cloud-experiments/h5cloud/atl06/ATL06_20200811143458_07210811_006_01.h5,6.3302013874053955,"(120035,)",,-12.659630361199927 +41,2,h5py,ros3,ATL06,yes,page-only-8mb,s3://its-live-data/test-space/cloud-experiments/h5cloud/atl06/ATL06_20200811143458_07210811_006_01_page_8mb.h5,5.722134113311768,"(120035,)",,-12.659630361199927 +42,2,h5py,ros3,ATL06,yes,rechunked-2mb,s3://its-live-data/test-space/cloud-experiments/h5cloud/atl06/ATL06_20200811143458_07210811_006_01_rechunked-2mb-repacked.h5,6.119923830032349,"(120035,)",,-12.659630361199927 +43,2,h5py,ros3,ATL06,yes,rechunked-4mb,s3://its-live-data/test-space/cloud-experiments/h5cloud/atl06/ATL06_20200811143458_07210811_006_01_rechunked-4mb-repacked.h5,10.62471055984497,"(120035,)",,-12.659630361199927 +44,2,h5py,ros3,ATL06,yes,rechunked-8mb,s3://its-live-data/test-space/cloud-experiments/h5cloud/atl06/ATL06_20200811143458_07210811_006_01_rechunked-4mb-repacked.h5,3.990654468536377,"(120035,)",,-12.659630361199927 +45,0,h5py,fsspec,ATL06,no,original,s3://its-live-data/test-space/cloud-experiments/h5cloud/atl06/ATL06_20200811143458_07210811_006_01.h5,9.118420839309692,"(120035,)",0.0,-12.659630361199927 +46,0,h5py,fsspec,ATL06,yes,page-only-8mb,s3://its-live-data/test-space/cloud-experiments/h5cloud/atl06/ATL06_20200811143458_07210811_006_01_page_8mb.h5,4.45331072807312,"(120035,)",16777216.0,-12.659630361199927 +47,0,h5py,fsspec,ATL06,yes,rechunked-2mb,s3://its-live-data/test-space/cloud-experiments/h5cloud/atl06/ATL06_20200811143458_07210811_006_01_rechunked-2mb-repacked.h5,4.095282077789307,"(120035,)",16777216.0,-12.659630361199927 +48,0,h5py,fsspec,ATL06,yes,rechunked-4mb,s3://its-live-data/test-space/cloud-experiments/h5cloud/atl06/ATL06_20200811143458_07210811_006_01_rechunked-4mb-repacked.h5,3.2909836769104004,"(120035,)",8388608.0,-12.659630361199927 +49,0,h5py,fsspec,ATL06,yes,rechunked-8mb,s3://its-live-data/test-space/cloud-experiments/h5cloud/atl06/ATL06_20200811143458_07210811_006_01_rechunked-4mb-repacked.h5,3.6027727127075195,"(120035,)",8388608.0,-12.659630361199927 +50,1,h5py,fsspec,ATL06,no,original,s3://its-live-data/test-space/cloud-experiments/h5cloud/atl06/ATL06_20200811143458_07210811_006_01.h5,8.456340074539185,"(120035,)",0.0,-12.659630361199927 +51,1,h5py,fsspec,ATL06,yes,page-only-8mb,s3://its-live-data/test-space/cloud-experiments/h5cloud/atl06/ATL06_20200811143458_07210811_006_01_page_8mb.h5,8.179700374603271,"(120035,)",16777216.0,-12.659630361199927 +52,1,h5py,fsspec,ATL06,yes,rechunked-2mb,s3://its-live-data/test-space/cloud-experiments/h5cloud/atl06/ATL06_20200811143458_07210811_006_01_rechunked-2mb-repacked.h5,3.8282923698425293,"(120035,)",16777216.0,-12.659630361199927 +53,1,h5py,fsspec,ATL06,yes,rechunked-4mb,s3://its-live-data/test-space/cloud-experiments/h5cloud/atl06/ATL06_20200811143458_07210811_006_01_rechunked-4mb-repacked.h5,6.5400710105896,"(120035,)",8388608.0,-12.659630361199927 +54,1,h5py,fsspec,ATL06,yes,rechunked-8mb,s3://its-live-data/test-space/cloud-experiments/h5cloud/atl06/ATL06_20200811143458_07210811_006_01_rechunked-4mb-repacked.h5,3.28332781791687,"(120035,)",8388608.0,-12.659630361199927 +55,2,h5py,fsspec,ATL06,no,original,s3://its-live-data/test-space/cloud-experiments/h5cloud/atl06/ATL06_20200811143458_07210811_006_01.h5,8.094577550888062,"(120035,)",0.0,-12.659630361199927 +56,2,h5py,fsspec,ATL06,yes,page-only-8mb,s3://its-live-data/test-space/cloud-experiments/h5cloud/atl06/ATL06_20200811143458_07210811_006_01_page_8mb.h5,10.045564651489258,"(120035,)",16777216.0,-12.659630361199927 +57,2,h5py,fsspec,ATL06,yes,rechunked-2mb,s3://its-live-data/test-space/cloud-experiments/h5cloud/atl06/ATL06_20200811143458_07210811_006_01_rechunked-2mb-repacked.h5,4.747626543045044,"(120035,)",16777216.0,-12.659630361199927 +58,2,h5py,fsspec,ATL06,yes,rechunked-4mb,s3://its-live-data/test-space/cloud-experiments/h5cloud/atl06/ATL06_20200811143458_07210811_006_01_rechunked-4mb-repacked.h5,2.4647138118743896,"(120035,)",8388608.0,-12.659630361199927 +59,2,h5py,fsspec,ATL06,yes,rechunked-8mb,s3://its-live-data/test-space/cloud-experiments/h5cloud/atl06/ATL06_20200811143458_07210811_006_01_rechunked-4mb-repacked.h5,6.894026041030884,"(120035,)",8388608.0,-12.659630361199927 diff --git a/notebooks/h5py-default.png b/notebooks/h5py-default.png new file mode 100644 index 0000000..2bba631 Binary files /dev/null and b/notebooks/h5py-default.png differ diff --git a/notebooks/h5py-original.csv b/notebooks/h5py-original.csv new file mode 100644 index 0000000..25e6353 --- /dev/null +++ b/notebooks/h5py-original.csv @@ -0,0 +1,11 @@ +,tool,dataset,cloud-aware,format,file,time,shape,bytes_requested,mean +0,h5py-ros3,7GB,yes,original,s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20181120182818_08110112_006_02.h5,609.4096472263336,"(46484912,)",,1035.1631 +1,h5py-ros3,7GB,yes,page-only-4mb,s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20181120182818_08110112_006_02-page-only-4mb.h5,638.2754406929016,"(46484912,)",,1035.1631 +2,h5py-ros3,7GB,yes,page-only-8mb,s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20181120182818_08110112_006_02-page-only-8mb.h5,653.3336625099182,"(46484912,)",,1035.1631 +3,h5py-ros3,7GB,yes,rechunked-4mb,s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20181120182818_08110112_006_02_rechunked-100k-page-4mb.h5,81.40565180778503,"(46484912,)",,1035.1631 +4,h5py-ros3,7GB,yes,rechunked-8mb,s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20181120182818_08110112_006_02_rechunked-100k-page-8mb.h5,76.86670875549316,"(46484912,)",,1035.1631 +5,h5py-fsspec,7GB,yes,original,s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20181120182818_08110112_006_02.h5,618.8997149467468,"(46484912,)",0.0,1035.1631 +6,h5py-fsspec,7GB,yes,page-only-4mb,s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20181120182818_08110112_006_02-page-only-4mb.h5,652.322696685791,"(46484912,)",0.0,1035.1631 +7,h5py-fsspec,7GB,yes,page-only-8mb,s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20181120182818_08110112_006_02-page-only-8mb.h5,663.9397876262665,"(46484912,)",0.0,1035.1631 +8,h5py-fsspec,7GB,yes,rechunked-4mb,s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20181120182818_08110112_006_02_rechunked-100k-page-4mb.h5,70.95594787597656,"(46484912,)",0.0,1035.1631 +9,h5py-fsspec,7GB,yes,rechunked-8mb,s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20181120182818_08110112_006_02_rechunked-100k-page-8mb.h5,70.55050539970398,"(46484912,)",0.0,1035.1631 diff --git a/notebooks/h5py.ipynb b/notebooks/h5py.ipynb deleted file mode 100644 index aaaf877..0000000 --- a/notebooks/h5py.ipynb +++ /dev/null @@ -1,308 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "12725ef5-255d-4b78-b4db-c27717db0d25", - "metadata": {}, - "source": [ - "# Testing ROS3 and fsspec with h5py on cloud optimized HDF5 files \n", - "\n", - "This notebook tests both I/O drivers on cloud optimized HDF5 files from the ICESat-2 mission. \n", - "\n", - "> Note: The ROS3 driver is only available in the Conda distribution of h5py" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "3ac69e2f-87bc-4253-acab-54e2b0fa0348", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "xarray v2024.6.0\n", - "h5py v3.11.0\n", - "fsspec v2024.6.0\n", - "h5coro v0.0.6\n", - "zarr v2.18.2\n" - ] - } - ], - "source": [ - "import xarray as xr\n", - "import h5py\n", - "import fsspec\n", - "import s3fs\n", - "import logging\n", - "import re\n", - "import time\n", - "import numpy as np\n", - "import zarr\n", - "import pandas as pd\n", - "import matplotlib.pyplot as plt\n", - "\n", - "\n", - "from h5coro import h5coro, s3driver, filedriver\n", - "driver = s3driver.S3Driver\n", - "\n", - "logger = logging.getLogger('fsspec')\n", - "logger.setLevel(logging.DEBUG)\n", - "\n", - "for library in (xr, h5py, fsspec, h5coro, zarr):\n", - " print(f'{library.__name__} v{library.__version__}')" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "87720bcc-7764-4c01-87cb-81d3eb1aa1b2", - "metadata": {}, - "outputs": [], - "source": [ - "test_dict = {\n", - " \"1GB\": {\n", - " \"links\": {\n", - " \"original\": \"s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20230618223036_13681901_006_01.h5\",\n", - " \"original-kerchunk\": \"s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20230618223036_13681901_006_01.json\",\n", - " \"page-only-4mb\": \"s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20230618223036_13681901_006_01-page-only-4mb.h5\",\n", - " \"page-only-8mb\": \"s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20230618223036_13681901_006_01-page-only-4mb.h5\",\n", - " \"rechunked-4mb\": \"s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20230618223036_13681901_006_01_rechunked-100k-page-4mb.h5\",\n", - " \"rechunked-8mb\": \"s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20230618223036_13681901_006_01_rechunked-100k-page-8mb.h5\",\n", - " \"rechunked-8mb-kerchunk\": \"s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20230618223036_13681901_006_01_rechunked-100k-page-8mb.json\",\n", - " \n", - " },\n", - " \"group\": \"/gt1l/heights\",\n", - " \"variable\": \"h_ph\"\n", - " },\n", - " \"7GB\": {\n", - " \"links\": {\n", - " \"original\": \"s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20181120182818_08110112_006_02.h5\",\n", - " \"original-kerchunk\": \"s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20181120182818_08110112_006_02.json\",\n", - " \"page-only-4mb\": \"s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20181120182818_08110112_006_02-page-only-4mb.h5\",\n", - " \"page-only-8mb\": \"s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20181120182818_08110112_006_02-page-only-8mb.h5\",\n", - " \"rechunked-4mb\": \"s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20181120182818_08110112_006_02_rechunked-100k-page-4mb.h5\",\n", - " \"rechunked-8mb\": \"s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20181120182818_08110112_006_02_rechunked-100k-page-8mb.h5\",\n", - " \"rechunked-8mb-kerchunk\": \"s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20181120182818_08110112_006_02_rechunked-100k-page-8mb.json\",\n", - " },\n", - " \"group\": \"/gt1l/heights\",\n", - " \"variable\": \"h_ph\"\n", - " }\n", - "}\n", - "\n", - "fs = s3fs.S3FileSystem(anon=True)" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "54990389-9e89-476f-928a-af4844caa595", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Processing: s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20230618223036_13681901_006_01.h5\n", - "Processing: s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20230618223036_13681901_006_01-page-only-4mb.h5\n", - "Processing: s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20230618223036_13681901_006_01-page-only-4mb.h5\n", - "Processing: s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20230618223036_13681901_006_01_rechunked-100k-page-4mb.h5\n", - "Processing: s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20230618223036_13681901_006_01_rechunked-100k-page-8mb.h5\n", - "Processing: s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20181120182818_08110112_006_02.h5\n", - "Processing: s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20181120182818_08110112_006_02-page-only-4mb.h5\n", - "Processing: s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20181120182818_08110112_006_02-page-only-8mb.h5\n", - "Processing: s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20181120182818_08110112_006_02_rechunked-100k-page-4mb.h5\n", - "Processing: s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20181120182818_08110112_006_02_rechunked-100k-page-8mb.h5\n" - ] - } - ], - "source": [ - "h5py_fsspec_benchmarks = []\n", - "\n", - "\n", - "for key, dataset in test_dict.items():\n", - " for k, link in dataset[\"links\"].items():\n", - " try:\n", - " if \"kerchunk\" in k or link.endswith(\".json\"):\n", - " continue \n", - " print (f\"Processing: {link}\")\n", - " log_filename = f\"logs/fsspec-h5py-{key}-{k}_default.log\"\n", - " \n", - " # Create a new FileHandler for each iteration\n", - " file_handler = logging.FileHandler(log_filename)\n", - " file_handler.setLevel(logging.DEBUG)\n", - "\n", - " # Add the handler to the root logger\n", - " logging.getLogger().addHandler(file_handler)\n", - " # this is mostly IO so no perf_counter is needed\n", - " io_params = {\n", - " \"fsspec_params\": {},\n", - " \"h5py_params\": {}\n", - " }\n", - " \n", - " if \"rechunked\" in link or \"page\" in link:\n", - " io_params ={\n", - " \"fsspec_params\": {\n", - " \"cache_type\": \"blockcache\",\n", - " \"block_size\": 8*1024*1024\n", - " },\n", - " \"h5py_params\" : {\n", - " \"page_buf_size\": 32*1024*1024,\n", - " \"rdcc_nbytes\": 4*1024*1024\n", - " }\n", - " }\n", - " \n", - " start = time.time()\n", - " fo = fs.open(link, mode=\"rb\", **io_params[\"fsspec_params\"])\n", - " with h5py.File(fo, **io_params[\"h5py_params\"]) as f:\n", - " path = f\"{dataset['group']}/{dataset['variable']}\"\n", - " data = f[path][:]\n", - " data_mean = data.mean()\n", - " elapsed = time.time() - start\n", - " h5py_fsspec_benchmarks.append(\n", - " {\"tool\": \"h5py-fsspec\",\n", - " \"dataset\": key,\n", - " \"cloud-aware\": \"yes\",\n", - " \"format\": k,\n", - " \"file\": link,\n", - " \"time\": elapsed,\n", - " \"shape\": data.shape,\n", - " \"bytes_requested\": fo.cache.total_requested_bytes,\n", - " \"mean\": data_mean})\n", - "\n", - " logging.getLogger().removeHandler(file_handler) \n", - " file_handler.close()\n", - " \n", - " except Exception as e:\n", - " print(e)" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "4efd9495-5a7f-4502-a9d4-20c7494f16db", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Processing: s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20230618223036_13681901_006_01.h5\n", - "Processing: s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20230618223036_13681901_006_01-page-only-4mb.h5\n", - "Processing: s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20230618223036_13681901_006_01-page-only-4mb.h5\n", - "Processing: s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20230618223036_13681901_006_01_rechunked-100k-page-4mb.h5\n", - "Processing: s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20230618223036_13681901_006_01_rechunked-100k-page-8mb.h5\n", - "Processing: s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20181120182818_08110112_006_02.h5\n", - "Processing: s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20181120182818_08110112_006_02-page-only-4mb.h5\n", - "Processing: s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20181120182818_08110112_006_02-page-only-8mb.h5\n", - "Processing: s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20181120182818_08110112_006_02_rechunked-100k-page-4mb.h5\n", - "Processing: s3://its-live-data/test-space/cloud-experiments/h5cloud/atl03/ATL03_20181120182818_08110112_006_02_rechunked-100k-page-8mb.h5\n" - ] - } - ], - "source": [ - "h5py_ros3_benchmarks = []\n", - "\n", - "for key, dataset in test_dict.items():\n", - " for k, link in dataset[\"links\"].items():\n", - " try:\n", - " if \"kerchunk\" in k or link.endswith(\".json\"):\n", - " continue\n", - " print (f\"Processing: {link}\")\n", - "\n", - " h5py_params = {\n", - " \"mode\": \"r\",\n", - " \"driver\": \"ros3\",\n", - " \"aws_region\": \"us-west-2\".encode(\"utf-8\"),\n", - " }\n", - "\n", - " \n", - " if \"rechunked\" in link or \"page\" in link:\n", - " h5py_params[\"page_buf_size\"] = 32*1024*1024\n", - " h5py_params[\"rdcc_nbytes\"] = 4*1024*1024\n", - " \n", - " start = time.time()\n", - " with h5py.File(link, **h5py_params) as f:\n", - " path = f\"{dataset['group']}/{dataset['variable']}\"\n", - " data = f[path][:]\n", - " data_mean = data.mean()\n", - " elapsed = time.time() - start\n", - " h5py_ros3_benchmarks.append(\n", - " {\"tool\": \"h5py-ros3\",\n", - " \"dataset\": key,\n", - " \"cloud-aware\": \"yes\",\n", - " \"format\": k,\n", - " \"file\": link,\n", - " \"time\": elapsed,\n", - " \"shape\": data.shape,\n", - " \"bytes_requested\": None, # metrics not easily available in ROS3\n", - " \"mean\": data_mean})\n", - "\n", - " \n", - " except Exception as e:\n", - " print(e)" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "96f782cf-c2da-4523-ac19-6cef6b865579", - "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "df = pd.DataFrame.from_dict(h5py_ros3_benchmarks + h5py_fsspec_benchmarks)\n", - "\n", - "pivot_df = df.pivot_table(index=['tool','dataset'], columns=['format'], values='time', aggfunc='mean')\n", - "\n", - "# Plotting\n", - "pivot_df.plot(kind='bar', figsize=(10, 6))\n", - "\n", - "plt.suptitle('Cloud-optimized HDF5 I/O performance (less is better)', fontsize=14)\n", - "plt.title(\"Informed I/O parameters\", fontsize=10)\n", - "plt.xlabel('Tool')\n", - "plt.ylabel('Time (seconds)')\n", - "plt.xticks(rotation=0)\n", - "plt.legend(title='Format')\n", - "plt.tight_layout()\n", - "ax = plt.gca()\n", - "plt.grid(False)\n", - "# ax.yaxis.grid(True)\n", - "\n", - "plt.show()" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.9" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/notebooks/logs/.placeholder b/notebooks/logs/.placeholder deleted file mode 100644 index e69de29..0000000 diff --git a/notebooks/optimize.py b/notebooks/optimize-atl03.py similarity index 100% rename from notebooks/optimize.py rename to notebooks/optimize-atl03.py diff --git a/notebooks/optimize-atl06.py b/notebooks/optimize-atl06.py new file mode 100755 index 0000000..2add637 --- /dev/null +++ b/notebooks/optimize-atl06.py @@ -0,0 +1,573 @@ +#!/usr/bin/env pythonw +"""Creates a cloud-optimized version of an ICESat-2 HDF5 file.""" + +# Copyright (c) 2024, University of Washington +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# 3. Neither the name of the University of Washington nor the names of its +# contributors may be used to endorse or promote products derived from this +# software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF WASHINGTON AND CONTRIBUTORS +# “AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +# TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF WASHINGTON OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF +# ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import os +import sys +import argparse +import traceback +import h5py +from h5py import h5p, h5f +import numpy as np +import time + +__version_ = "1.0.1" + + +# ----------------------------------------------------------------------------- +def h5_s2b(s: str) -> bytes: + """Converts a Python3 string to ASCII (bytes) for low-level calls. + + Args: + s: Python string + + Returns: + `s` as bytes + """ + try: + b = s.encode("ascii", "ignore") + except (UnicodeEncodeError, AttributeError): + b = s + return b + + +# ------------------------------------------------------------------------------ +def init(): + """Parses arguments""" + + parser = argparse.ArgumentParser( + description="""Re-writes ICESat-2 HDF5 files using cloud optimization techniques.""" + ) + + parser.add_argument("infile", help="Input HDF5 file.") + + parser.add_argument("outfile", help="Output HDF5 file.") + + parser.add_argument( + "-min_chunk", + "--min_chunk", + required=False, + type=int, + default=500, + help="""Do not resize any dataset with chunksize < this size (num elements).""", + ) + + parser.add_argument( + "-seg_chunk", + "--seg_chunk", + required=False, + type=int, + default=100000, + help="""New chunksize for segment (non_photon) datasets (num elements).""", + ) + + parser.add_argument( + "-ph_chunk", + "--ph_chunk", + required=False, + type=int, + default=1000000, + help="""New chunksize for any photon datasets (num elements).""", + ) + + parser.add_argument( + "-gzip", + "--gzip", + required=False, + type=int, + default=-1, + help="""New gzip level (if dataset is compressed; 0=disable, -1=do not change)""", + ) + + parser.add_argument( + "-fs_space_page", + "--fs_space_page", + dest="fs_space_page", + action="store_true", + required=False, + default=False, + help="Use H5F_FSPACE_STRATEGY_PAGE.", + ) + + parser.add_argument( + "-fs_page_size", + "--fs_page_size", + required=False, + type=int, + default=0.0, + help="""Filespace page size (bytes), 0=do not change (requires -fs_space_page).""", + ) + + parser.add_argument( + "-cache_slots", + "--cache_slots", + required=False, + type=int, + default=0, + help="""Number of cache slots, 0=do not change. (requires -fs_space_page)""", + ) + + parser.add_argument( + "-chunk_cache", + "--chunk_cache", + required=False, + type=int, + default=0.0, + help="""Size of the chunk cache (bytes), 0=do not change. (requires -fs_space_page)""", + ) + + parser.add_argument( + "-user_blocksize", + "--user_blocksize", + required=False, + type=int, + default=0.0, + help="""User block size (bytes), 0=do not change.""", + ) + + parser.add_argument( + "-meta_blocksize", + "--meta_blocksize", + required=False, + type=int, + default=0.0, + help="""Metadata block size (bytes), 0=do not change.""", + ) + + return parser.parse_args() + + +# ------------------------------------------------------------------------------ +def full_path(obj): + """Returns the full pathname of an object""" + + if obj.parent is not None and obj.name != "/": + return os.path.join(full_path(obj.parent), obj.name) + else: + return "" + + +# ------------------------------------------------------------------------------ +def print_stats_hdr(): + """Prints the stats header""" + + print( + " {0:40s} {1:7s} {2:10s} {3:10s} {4:7s} {5:10s} {6:10s}".format( + "name", + "src_zip", + "src_size", + "src_chunks", + "dst_zip", + "dst_nbytes", + "dst_chunks", + ) + ) + print( + " {0:40s} {1:7s} {2:10s} {3:10s} {4:7s} {5:10s} {6:10s}".format( + "----", + "-------", + "----------", + "----------", + "-------", + "----------", + "----------", + ) + ) + + +# ------------------------------------------------------------------------------ +def print_stats(stats: dict): + """Prints stats""" + + print( + " {0:40s} {1:7d} {2:10d} {3:10d} {4:7d} {5:10d} {6:10d}".format( + stats["name"], + stats["src_gzip"], + stats["src_size"], + stats["src_chunks"], + stats["dst_gzip"], + stats["dst_nbytes"], + stats["dst_chunks"], + ) + ) + + +# ------------------------------------------------------------------------------ +def copy_attributes(src, dest): + """Copies attributes from an input object to an output object""" + + if isinstance(src, h5py.Dataset): + for name, value in src.attrs.items(): + # Avoid attributes that belong to dimension scales and the datasets + # they are attached to + if name in ("CLASS", "NAME", "DIMENSION_LIST", "REFERENCE_LIST"): + continue + dest.attrs.create(name, data=value, dtype=value.dtype) + else: + for key in src.attrs.keys(): + dest.attrs.create(key, data=src.attrs[key], dtype=src.attrs[key].dtype) + + +# ------------------------------------------------------------------------------ +def copy_dataset(args, src, dst, name, photons=False): + """Copies a dataset from an input group to an output group""" + + stats = { + "name": name, + "src_gzip": 0, + "src_size": 0, + "src_nbytes": 0, + "src_chunks": 0, + "dst_gzip": 0, + "dst_nbytes": 0, + "dst_chunks": 0, + } + + src_dset = src[name] + chunks = src_dset.chunks + maxshape = src_dset.maxshape + compress = src_dset.compression + compress_opts = src_dset.compression_opts + + if src_dset.compression_opts is None: + stats["src_gzip"] = -1 + else: + stats["src_gzip"] = src_dset.compression_opts + + stats["src_size"] = src_dset.size + stats["src_nbytes"] = src_dset.size + + if src_dset.chunks is None: + stats["src_chunks"] = -1 + else: + stats["src_chunks"] = src_dset.chunks[0] + + # Optionally modify compression (for compressed datasets) + if compress == "gzip": + if args.gzip == 0: + compress = None + compress_opts = None + elif args.gzip > 0: + compress_opts = args.gzip + + # Optionally modify chunking (for select chunked datasets) + # + # This simplistic approach is probably not optimal for 2D or 3D + # datasets, but it's a start. + # + # The 'variable' dimensions for ICESat-2 data are the left-most. + # Do not change any chunksize less than args and do not change + # any chunksize that is equal to maxshape. + if maxshape[0] is None and chunks[0] is not None: + if chunks[0] > args.min_chunk: + chunks = list(chunks) + if photons: + chunks[0] = args.ph_chunk + else: + chunks[0] = args.seg_chunk + chunks = tuple(chunks) + + dst_dset = dst.create_dataset( + name, + shape=src_dset.shape, + dtype=src_dset.dtype, + maxshape=src_dset.maxshape, + chunks=chunks, + compression=compress, + compression_opts=compress_opts, + shuffle=src_dset.shuffle, + fillvalue=src_dset.fillvalue, + track_order=True, + ) + dst_dset.write_direct(np.array(src_dset)) + if src_dset.is_scale: + dst_dset.make_scale(h5py.h5ds.get_scale_name(src_dset.id)) + + copy_attributes(src_dset, dst_dset) + + if dst_dset.compression_opts is None: + stats["dst_gzip"] = -1 + else: + stats["dst_gzip"] = dst_dset.compression_opts + + stats["dst_size"] = dst_dset.size + stats["dst_nbytes"] = dst_dset.size + + if dst_dset.chunks is None: + stats["dst_chunks"] = -1 + else: + stats["dst_chunks"] = dst_dset.chunks[0] + + print_stats(stats) + + return stats + + +# ------------------------------------------------------------------------------ +def copy_group(args, src, dst, name): + """Copies a group from an input file to an output file + + Could do something with stats here. ie: + min/max within each group, etc. Not sure its worth it now. + """ + + src_grp = src[name] + if name != "/": + dst_grp = dst.create_group(name, track_order=True) + else: + dst_grp = dst + + # Copy group attributes + copy_attributes(src_grp, dst_grp) + + path = full_path(src_grp) + print(f"\nGroup: {path}\n") + print_stats_hdr() + + # This hack will only work for ATL03 + if name == "land_ice_segments": + photons = True + else: + photons = False + + # Copy child datasets + for item in src_grp.keys(): + if isinstance(src_grp[item], h5py.Dataset): + if item == "control": + continue + copy_dataset(args, src_grp, dst_grp, item, photons=photons) + + # Copy child groups + for item in src_grp.keys(): + if isinstance(src_grp[item], h5py.Group): + copy_group(args, src_grp, dst_grp, item) + + +# ------------------------------------------------------------------------------ +def optimize_file(args): + """Copies an ICESat-2 HDF5 file to a new cloud-optimized file. + + Uses the low-level h5py APID to open/query the input file and to create + the output file. I started using existing code that was written with an + old h5py and then realized (too late) new h5py supports all these things. + + So, then switches to the high-level APID to actually copy the data. + """ + + strategies = [ + "H5F_FSPACE_STRATEGY_FSM_AGGR", + "H5F_FSPACE_STRATEGY_PAGE", + "H5F_FSPACE_STRATEGY_AGGR", + "H5F_FSPACE_STRATEGY_NONE", + ] + + if not os.path.isfile(args.infile): + print(f"{args.infile} : file does not exist.") + sys.exit(3) + + # Open the input file + try: + in_fid = h5f.open(h5_s2b(args.infile), h5f.ACC_RDONLY) + except Exception as e: + print(f"Error opening input file - {e}") + sys.exit(3) + + # Get info from the file creation plist + try: + fcpl = in_fid.get_create_plist() + fs_strategy = fcpl.get_file_space_strategy() + fspace_page_size = fcpl.get_file_space_page_size() + user_blocksize = fcpl.get_userblock() + except Exception as e: + print(f"Error reading the file creation plist - {e}") + in_fid.close() + sys.exit(3) + + # Get info from the file access plist + try: + fapl = in_fid.get_access_plist() + cache_info = fapl.get_cache() + meta_blocksize = fapl.get_meta_block_size() + except Exception as e: + print(f"Error reading the file access plist - {e}") + in_fid.close() + sys.exit(3) + + cache_slots = cache_info[1] + chunk_cache = cache_info[2] + + strategy_str = strategies[fs_strategy[0]] + print("--") + print(f"Original fs_strategy = {strategy_str}") + print(f"Original fspace_page_size = {fspace_page_size}") + print(f"Original cache_slots = {cache_slots}") + print(f"Original chunk_cache = {chunk_cache}") + print(f"Original userblock_size = {user_blocksize}") + print(f"Original meta_blocksize = {meta_blocksize}") + print("--") + + # Apply any optional argument values + if args.fs_space_page: + fs_strategy = list(fs_strategy) + cache_info = list(cache_info) + fs_strategy[0] = 1 + if args.fs_page_size > 0.0: + fspace_page_size = args.fs_page_size + if args.chunk_cache > 0.0: + cache_info[2] = args.chunk_cache + if args.cache_slots > 0.0: + cache_info[1] = args.cache_slots + fs_strategy = tuple(fs_strategy) + cache_info = tuple(cache_info) + + if args.user_blocksize > 0.0: + user_blocksize = args.user_blocksize + if args.meta_blocksize > 0.0: + meta_blocksize = args.meta_blocksize + + # Create a new file create property list + try: + fcpl = h5p.create(h5p.FILE_CREATE) + fcpl.set_link_creation_order(h5p.CRT_ORDER_TRACKED | h5p.CRT_ORDER_INDEXED) + fcpl.set_attr_creation_order(h5p.CRT_ORDER_TRACKED | h5p.CRT_ORDER_INDEXED) + fcpl.set_file_space_strategy(fs_strategy[0], fs_strategy[1], fs_strategy[2]) + fcpl.set_file_space_page_size(fspace_page_size) + fcpl.set_userblock(user_blocksize) + except Exception as e: + print(f"Error creating the file creation plist - {e}") + in_fid.close() + sys.exit(3) + + # Create a new file access property list + try: + fapl = h5p.create(h5p.FILE_ACCESS) + fapl.set_cache(cache_info[0], cache_info[1], cache_info[2], cache_info[3]) + fapl.set_meta_block_size(meta_blocksize) + except Exception as e: + print(f"Error creating the file access plist - {e}") + in_fid.close() + sys.exit(3) + + # Create the output file + try: + out_fid = h5f.create(h5_s2b(args.outfile), h5f.ACC_TRUNC, fcpl=fcpl, fapl=fapl) + except Exception as e: + print(f"Error creating the output file - {e}") + sys.exit(3) + + # Get info from the file creation plist + try: + fcpl = out_fid.get_create_plist() + fs_strategy = fcpl.get_file_space_strategy() + fspace_page_size = fcpl.get_file_space_page_size() + user_blocksize = fcpl.get_userblock() + except Exception as e: + print(f"Error reading the file creation plist - {e}") + in_fid.close() + out_fid.close() + sys.exit(3) + + # Get info from the file access plist + try: + fapl = out_fid.get_access_plist() + cache_info = fapl.get_cache() + meta_blocksize = fapl.get_meta_block_size() + except Exception as e: + print(f"Error reading the file access plist - {e}") + in_fid.close() + out_fid.close() + sys.exit(3) + + cache_slots = cache_info[1] + chunk_cache = cache_info[2] + + strategy_str = strategies[fs_strategy[0]] + print("--") + print(f"Revised fs_strategy = {strategy_str}") + print(f"Revised fspace_page_size = {fspace_page_size}") + print(f"Revised cache_slots = {cache_slots}") + print(f"Revised chunk_cache = {chunk_cache}") + print(f"Revised userblock_size = {user_blocksize}") + print(f"Revised meta_blocksize = {meta_blocksize}") + print("--") + + # Close the files so we can reopen them in the high-level APID + in_fid.close() + out_fid.close() + + # Read from input file and write to output file + try: + beg_time = time.time() + + with h5py.File(args.infile, "r") as src, h5py.File(args.outfile, "r+") as dst: + copy_group(args, src, dst, "/") + + # Attach dimension scales in the output file exactly how it is in + # the input file + def attach(name, obj): + if isinstance(obj, h5py.Dataset): + for idx, dim in enumerate(obj.dims, start=0): + for scale in dim.values(): + print(f"Dimscale {scale.name} attached to {obj.name} at dim #{idx}") + dst[obj.name].dims[idx].attach_scale(dst[scale.name]) + + src.visititems(attach) + + end_time = time.time() + + except Exception as e: + print(f"Error reading/writing data - {e}") + print(traceback.format_exc()) + in_fid.close() + out_fid.close() + sys.exit(3) + + # Print file sizes/change + src_stats = os.stat(args.infile) + dst_stats = os.stat(args.outfile) + src_mb = src_stats.st_size / (1024 * 1024) + dst_mb = dst_stats.st_size / (1024 * 1024) + schange = (dst_mb / src_mb) * 100.0 + print("--") + print("Total read/write time=", end_time - beg_time) + print(f"File Sizes (MiB): input={src_mb}, output={dst_mb}, change={schange}%") + print("--") + + +# ------------------------------------------------------------------------------ +def run(): + args = init() + optimize_file(args) + + +# ------------------------------------------------------------------------------- +if __name__ == "__main__": + run() diff --git a/notebooks/ros3.png b/notebooks/ros3.png deleted file mode 100644 index 9a5653f..0000000 Binary files a/notebooks/ros3.png and /dev/null differ diff --git a/notebooks/stats-default.png b/notebooks/stats-default.png new file mode 100644 index 0000000..8f91bde Binary files /dev/null and b/notebooks/stats-default.png differ diff --git a/notebooks/stats.png b/notebooks/stats.png deleted file mode 100644 index 3ecc72c..0000000 Binary files a/notebooks/stats.png and /dev/null differ diff --git a/paper.qmd b/paper.qmd index 571b039..1ee2e74 100644 --- a/paper.qmd +++ b/paper.qmd @@ -118,13 +118,13 @@ shows how reads (Rn) are done in order to access file metadata, In the first rea #### **Background and data selection** -As a result of community feedback and “hack weeks” organized by NSIDC and UW eScience Institute in 2023[@h5cloud2023], NSIDC started the Cloud Optimized Format Investigation (COFI) project to improve access to HDF5 from the ICESat-2 mission. A spaceborne lidar that retrieves surface topography of the Earth’s ice sheets, land and oceans [@NEUMANN2019111325]. Because of its complexity, large size and importance for cryospheric studies we targeted the ATL03 dataset. ATL03 core data are geolocated photon heights from the ICESat-2 ATLAS instrument. Each file contains 1003 geophysical variables in 6 data groups. Although our research was focused on this dataset, most of our findings are applicable to any dataset stored in HDF5 and NetCDF4. +As a result of community feedback and “hack weeks” organized by NSIDC and UW eScience Institute in 2023[@h5cloud2023], NSIDC started the Cloud Optimized Format Investigation (COFI) project to improve access to HDF5 from the ICESat-2 mission, a spaceborne lidar that retrieves surface topography of the Earth’s ice sheets, land and oceans [@NEUMANN2019111325]. Because of its complexity, large size and importance for cryospheric studies we targeted the ATL03 data product. The most relevant variable in ATL03 are geolocated photon heights from the ICESat-2 ATLAS instrument. Each ATL03 file contains 1003 geophysical variables in 6 data groups. Although our research was focused on this dataset, most of our findings are applicable to any dataset stored in HDF5 and NetCDF4. ## Methodology -We tested access times to original and different configurations of cloud-optimized HDF5 ATL03 files [list files tested] stored in AWS S3 buckets in region us-west-2, the region hosting NASA’s Earthdata Cloud archives. Files were accessed using Python tools commonly used by Earth scientists: h5py and Xarray[@Hoyer2017-su]. h5py is a Python wrapper around the HDF5 C API. xarray^[`h5py` is a dependency of Xarray] is a widely used Python package for working with n-dimensional data. We also tested access times using h5coro, a python package optimized for reading HDF5 files from S3 buckets and kerchunk, a tool that creates an efficient lookup table for file chunks to allow performant partial reads of files. +We tested access times to original and different configurations of cloud-optimized HDF5 [ATL03 files](https://its-live-data.s3.amazonaws.com/index.html#test-space/cloud-experiments/h5cloud/) stored in AWS S3 buckets in region us-west-2, the region hosting NASA’s Earthdata Cloud archives. Files were accessed using Python tools commonly used by Earth scientists: h5py and Xarray[@Hoyer2017-su]. h5py is a Python wrapper around the HDF5 C API. xarray^[`h5py` is a dependency of Xarray] is a widely used Python package for working with n-dimensional data. We also tested access times using h5coro, a python package optimized for reading HDF5 files from S3 buckets and kerchunk, a tool that creates an efficient lookup table for file chunks to allow performant partial reads of files. -The test files were originally cloud optimized by “repacking” them, using a relatively new feature in the HDF5 C API called “paged aggregation”. Page aggregation does 2 things: it collects file-level metadata from datasets and stores it on dedicated metadata blocks in the file; and it forces the library to write both data and metadata using these fixed-size pages. Aggregation allows client libraries to read file metadata with only a few requests and uses the page size used in the aggregation as the minimal request size, overriding the 1 request per chunk behavior. +The test files were originally cloud optimized by “repacking” them, using a relatively new feature in the HDF5 C API called “paged aggregation”. Page aggregation does 2 things: fisrt, it collects file-level metadata from datasets and stores it on dedicated metadata blocks at the front of the file; second, it forces the library to write both data and metadata using these fixed-size pages. Aggregation allows client libraries to read file metadata with only a few requests using the page size as a fixed request size, overriding the 1 request per chunk behavior. ::: {#fig-2 fig-env="figure*"} @@ -158,7 +158,7 @@ for an HTTP request, especially when we have to read them sequentially. Because | page-only-8mb | paged-aggregated file with 4mb per pag8 | ~1% | 1.5km | (10000,) | 8MB | 35kb | | rechunked-4mb | page-aggregated and bigger chunk sizes | ~1% | 10km | (100000,) | 4MB | 400kb | | rechunked-8mb | page-aggregated and bigger chunk sizes | ~1% | 10km | (100000,) | 8MB | 400kb | -| rechunked-8mb-kerchunk | kerchunk sidecar of the last paged-aggregated file | N/A | 10km | (10000,) | 8MB | 400kb | +| rechunked-8mb-kerchunk | kerchunk sidecar of the last paged-aggregated file | N/A | 10km | (100000,) | 8MB | 400kb | This table represents the different configurations we used for our tests in 2 file sizes. It's worth noticing that we encountered a few outlier cases where compression and chunk sizes led page aggregation to an increase in file size of approximately 10% which was above the desired value for NSIDC (5% max) We tested these files using the most common libraries to handle HDF5 and 2 different I/O drivers that support remote access to AWS S3, fsspec and the native S3. The results of our testing is explained in the next section and the code @@ -167,14 +167,30 @@ to reproduce the results is in the attached notebooks. ## Results +::: {#fig-4 fig-env="figure*"} + +![](figures/figure-4.png) + +shows that using paged aggregation alone is not a complete solution. This behavior us caused by over-reads of data now distributed in pages and the internals of HDF5 not knowing how to optimize +the requests. This means that if we cloud optimize alone and use the same code, in some cases we'll make access to these files even slower. A very important thing to notice here is that rechunking the file, in this case using 10X bigger chunks results in a predictable 10X improvement in access times without any cloud optimization involved. +Having less chunks generates less metadata and bigger requests, in general is it recommended that chunk sizes should range between 1MB and 10MB[Add citation, S3 and HDF5] and if we have anough memory and bandwith even +bigger (Pangeo recommends up to 100MB chunks)[Add citation.] + +::: + + + ::: {#fig-5 fig-env="figure*"} ![](figures/figure-5.png) -Benchmarks show that cloud optimizing ATL03 files improved access times at least an order of magnitude when used with aligned I/O patterns, this is telling the library about the cloud optimization and page size. +shows that performance once the I/O configuration is aligned with the chunking in the file, access times perform on par with cloud optimized access patterns like Kerchunk/Zarr. +These numbers are from in-region execution. Out of region is considerable slower for the non cloud optimized case. ::: + + ## Recommendations We have split our recommendations for the ATL03 product into 3 main categories, creating the files, accessing the files, and future tool development. @@ -204,11 +220,11 @@ will be filled but that is not the case and we will end up with unused space [Se ### Recommended access patterns -Placeholder +In progress ### Recommended tooling development -Placeholder +In progress ### Mission implementation