From f3f4da4d1cf45f6c205ae4aefac25d562323a9b4 Mon Sep 17 00:00:00 2001 From: Raphael Hagen Date: Tue, 12 Mar 2024 22:35:03 -0600 Subject: [PATCH 1/8] added test and docs opt installs in pyproj --- pyproject.toml | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 9e1c6d39..51be335d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,3 +1,4 @@ + [project] name = "virtualizarr" description = "Create virtual Zarr stores from archival data using xarray API" @@ -18,13 +19,29 @@ classifiers = [ "Programming Language :: Python :: 3.11", ] requires-python = ">=3.9" +dynamic = ["version"] dependencies = [ "xarray", "kerchunk", "pydantic", "packaging", ] -dynamic = ["version"] + +[project.optional-dependencies] +test = [ + "pre-commit", + "pytest-mypy", + "pytest", +] + +docs = [ + "sphinx", + "pangeo-sphinx-book-theme", + "sphinx-autodoc-typehints", +] + + + [project.urls] Home = "https://github.com/TomNicholas/VirtualiZarr" From 6c357bbc59ad6f3b1b2f6fd67240d9c78812a3da Mon Sep 17 00:00:00 2001 From: Raphael Hagen Date: Tue, 12 Mar 2024 23:09:37 -0600 Subject: [PATCH 2/8] roughout of sphinx docs --- docs/Makefile | 20 ++++++++++++++++ docs/conf.py | 49 ++++++++++++++++++++++++++++++++++++++ docs/dev_status_roadmap.md | 7 ++++++ docs/how_it_works.md | 14 +++++++++++ docs/index.md | 36 ++++++++++++++++++++++++++++ docs/installation.md | 22 +++++++++++++++++ docs/make.bat | 35 +++++++++++++++++++++++++++ docs/usage.md | 49 ++++++++++++++++++++++++++++++++++++++ pyproject.toml | 5 +++- 9 files changed, 236 insertions(+), 1 deletion(-) create mode 100644 docs/Makefile create mode 100644 docs/conf.py create mode 100644 docs/dev_status_roadmap.md create mode 100644 docs/how_it_works.md create mode 100644 docs/index.md create mode 100644 docs/installation.md create mode 100644 docs/make.bat create mode 100644 docs/usage.md diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 00000000..d4bb2cbb --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,20 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line, and also +# from the environment for the first two. +SPHINXOPTS ?= +SPHINXBUILD ?= sphinx-build +SOURCEDIR = . +BUILDDIR = _build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/conf.py b/docs/conf.py new file mode 100644 index 00000000..caacc3ce --- /dev/null +++ b/docs/conf.py @@ -0,0 +1,49 @@ +# Configuration file for the Sphinx documentation builder. +# +# For the full list of built-in configuration values, see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html + +# -- Project information ----------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information + +project = 'virtualizarr' +copyright = '2024, Thomas Nicholas' +author = 'Thomas Nicholas' + +# -- General configuration --------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration + + +extensions = [ + "myst_nb", + "sphinx.ext.autodoc", + "sphinx.ext.extlinks", + "sphinx_autodoc_typehints", + "sphinx_copybutton", + "sphinx_togglebutton", + "sphinx_design", +] +templates_path = ['_templates'] +exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] + + + +# -- Options for HTML output ------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output + +html_theme = "pangeo_sphinx_book_theme" +html_theme_options = { + "repository_url": "https://github.com/TomNicholas/VirtualiZarr", + "repository_branch": "main", + "path_to_docs": "docs", +} + +html_logo = "_static/_future_logo.png" + +html_static_path = ['_static'] + + +# issues +# pangeo logo +# dark mode/lm switch +# needs to add api ref diff --git a/docs/dev_status_roadmap.md b/docs/dev_status_roadmap.md new file mode 100644 index 00000000..b86aa8d6 --- /dev/null +++ b/docs/dev_status_roadmap.md @@ -0,0 +1,7 @@ +# Development Status and Roadmap + +VirtualiZarr is ready to use for many of the tasks that we are used to using kerchunk for, but the most general and powerful vision of this library can only be implemented once certain changes upstream in Zarr have occurred. + +VirtualiZarr is therefore evolving in tandem with developments in the Zarr Specification, which then need to be implemented in specific Zarr reader implementations (especially the Zarr-Python V3 implementation). There is an [overall roadmap for this integration with Zarr](https://hackmd.io/t9Myqt0HR7O0nq6wiHWCDA), whose final completion requires acceptance of at least two new Zarr Enhancement Proposals (the ["Chunk Manifest"](https://github.com/zarr-developers/zarr-specs/issues/287) and ["Virtual Concatenation"](https://github.com/zarr-developers/zarr-specs/issues/288) ZEPs). + +Whilst we wait for these upstream changes, in the meantime VirtualiZarr aims to provide utility in a significant subset of cases, for example by enabling writing virtualized zarr stores out to the existing kerchunk references format, so that they can be read by fsspec today. \ No newline at end of file diff --git a/docs/how_it_works.md b/docs/how_it_works.md new file mode 100644 index 00000000..06fd8be9 --- /dev/null +++ b/docs/how_it_works.md @@ -0,0 +1,14 @@ +# How Does This Work? + + +I'm glad you asked! We can think of the problem of providing virtualized zarr-like access to a set of legacy files in some other format as a series of steps: + +1) **Read byte ranges** - We use the various [kerchunk file format backends](https://fsspec.github.io/kerchunk/reference.html#file-format-backends) to determine which byte ranges within a given legacy file would have to be read in order to get a specific chunk of data we want. +2) **Construct a representation of a single file (or array within a file)** - Kerchunk's backends return a nested dictionary representing an entire file, but we instead immediately parse this dict and wrap it up into a set of `ManifestArray` objects. The record of where to look to find the file and the byte ranges is stored under the `ManifestArray.manifest` attribute, in a `ChunkManifest` object. Both steps (1) and (2) are handled by the `'virtualizarr'` xarray backend, which returns one `xarray.Dataset` object per file, each wrapping multiple `ManifestArray` instances (as opposed to e.g. numpy/dask arrays). +3) **Deduce the concatenation order** - The desired order of concatenation can either be inferred from the order in which the datasets are supplied (which is what `xr.combined_nested` assumes), or it can be read from the coordinate data in the files (which is what `xr.combine_by_coords` does). If the ordering information is not present as a coordinate (e.g. because it's in the filename), a pre-processing step might be required. +4) **Check that the desired concatenation is valid** - Whether called explicitly by the user or implicitly via `xr.combine_nested/combine_by_coords/open_mfdataset`, `xr.concat` is used to concatenate/stack the wrapped `ManifestArray` objects. When doing this xarray will spend time checking that the array objects and any coordinate indexes can be safely aligned and concatenated. Along with opening files, and loading coordinates in step (3), this is the main reason why `xr.open_mfdataset` can take a long time to return a dataset created from a large number of files. +5) **Combine into one big dataset** - `xr.concat` dispatches to the `concat/stack` methods of the underlying `ManifestArray` objects. These perform concatenation by merging their respective Chunk Manifests. Using xarray's `combine_*` methods means that we can handle multi-dimensional concatenations as well as merging many different variables. +6) **Serialize the combined result to disk** - The resultant `xr.Dataset` object wraps `ManifestArray` objects which contain the complete list of byte ranges for every chunk we might want to read. We now serialize this information to disk, either using the [kerchunk specification](https://fsspec.github.io/kerchunk/spec.html#version-1), or in future we plan to use [new Zarr extensions](https://github.com/zarr-developers/zarr-specs/issues/287) to write valid Zarr stores directly. +7) **Open the virtualized dataset from disk** - The virtualized zarr store can now be read from disk, skipping all the work we did above. Chunk reads from this store will be redirected to read the corresponding bytes in the original legacy files. + +**Note:** Using the `kerchunk` library alone will perform a similar set of steps overall, but because (3), (4), (5), and (6) are all performed by the `kerchunk.combine.MultiZarrToZarr` function, and no internal abstractions are exposed, the design is much less modular, and the use cases are limited by kerchunk's API surface. diff --git a/docs/index.md b/docs/index.md new file mode 100644 index 00000000..68327af0 --- /dev/null +++ b/docs/index.md @@ -0,0 +1,36 @@ +# VirtualiZarr + +**VirtualiZarr creates virtual Zarr stores for cloud-friendly access to archival data, using familiar xarray syntax.** + +VirtualiZarr grew out of [discussions](https://github.com/fsspec/kerchunk/issues/377) on the [kerchunk repository](https://github.com/fsspec/kerchunk), and is an attempt to provide the game-changing power of kerchunk in a zarr-native way, and with a familiar array-like API. + +## What's the difference between VirtualiZarr and Kerchunk? + +The Kerchunk idea solves an incredibly important problem: accessing big archival datasets via a cloud-optimized pattern, but without copying or modifying the original data in any way. This is a win-win-win for users, data engineers, and data providers. Users see fast-opening zarr-compliant stores that work performantly with libraries like xarray and dask, data engineers can provide this speed by adding a lightweight virtualization layer on top of existing data (without having to ask anyone's permission), and data providers don't have to change anything about their legacy files for them to be used in a cloud-optimized way. + +However, kerchunk's current design is limited: +- Store-level abstractions make combining datasets complicated, idiosyncratic, and requires duplicating logic that already exists in libraries like xarray, +- The kerchunk format for storing on-disk references requires the caller to understand it, usually via [`fsspec`](https://github.com/fsspec/filesystem_spec) (which is currently only implemented in python). + +VirtualiZarr aims to build on the excellent ideas of kerchunk whilst solving the above problems: +- Using array-level abstractions instead is more modular, easier to reason about, allows convenient wrapping by high-level tools like xarray, and is simpler to parallelize, +- Writing the virtualized arrays out as a valid Zarr store directly (through new Zarr Extensions) will allow for Zarr implementations in any language to read the archival data. + +## Licence + +Apache 2.0 + +## Site Contents + +```{toctree} +:maxdepth: 2 + +self +installation +usage +how_it_works +dev_status_roadmap + + +``` + diff --git a/docs/installation.md b/docs/installation.md new file mode 100644 index 00000000..a4297a97 --- /dev/null +++ b/docs/installation.md @@ -0,0 +1,22 @@ +# Installation + + +```shell +git clone https://github.com/TomNicholas/VirtualiZarr +cd VirtualiZarr +pip install -e . +``` + + +## Install Test Dependencies + +```shell +pip install '-e .[test]' +``` + + +## Install Docs Dependencies + +```shell +pip install '-e .[docs]' +``` diff --git a/docs/make.bat b/docs/make.bat new file mode 100644 index 00000000..32bb2452 --- /dev/null +++ b/docs/make.bat @@ -0,0 +1,35 @@ +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set SOURCEDIR=. +set BUILDDIR=_build + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.https://www.sphinx-doc.org/ + exit /b 1 +) + +if "%1" == "" goto help + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% + +:end +popd diff --git a/docs/usage.md b/docs/usage.md new file mode 100644 index 00000000..da06735c --- /dev/null +++ b/docs/usage.md @@ -0,0 +1,49 @@ +# Usage + + +Let's say you have a bunch of legacy files (e.g. netCDF) which together tile to form a large dataset. Let's imagine you already know how to use xarray to open these files and combine the opened dataset objects into one complete dataset. (If you don't then read the [xarray docs page on combining data](https://docs.xarray.dev/en/stable/user-guide/combining.html).) + +```python +ds = xr.open_mfdataset( + '/my/files*.nc', + engine='h5netcdf', + combine='by_coords', # 'by_coords' requires reading coord data to determine concatenation order +) +ds # the complete lazy xarray dataset +``` + +However, you don't want to run this set of xarray operations every time you open this dataset, as running commands like `xr.open_mfdataset` can be expensive. Instead you would prefer to just be able to open a virtualized Zarr store (i.e. `xr.open_dataset('my_virtual_store.zarr')`), as that would open instantly, but still give access to the same data underneath. + +**`VirtualiZarr` allows you to use the same xarray incantation you would normally use to open and combine all your files, but cache that result as a virtual Zarr store.** + +What's being cached here, you ask? We're effectively caching the result of performing all the various consistency checks that xarray performs when it combines newly-encountered datasets together. Once you have the new virtual Zarr store xarray is able to assume that this checking has already been done, and trusts your Zarr store enough to just open it instantly. + +Creating the virtual store looks very similar to how we normally open data with xarray: + +```python +import virtualizarr # required for the xarray backend and accessor to be present + +virtual_ds = xr.open_mfdataset( + '/my/files*.nc', + engine='virtualizarr', # virtualizarr registers an xarray IO backend that returns ManifestArray objects + combine='by_coords', # 'by_coords' stills requires actually reading coordinate data +) + +virtual_ds # now wraps a bunch of virtual ManifestArray objects directly + +# cache the combined dataset pattern to disk, in this case using the existing kerchunk specification for reference files +virtual_ds.virtualize.to_kerchunk('combined.json', format='json') +``` + +Now you can open your shiny new Zarr store instantly: + +```python +fs = fsspec.filesystem('reference', fo='combined.json') +m = fs.get_mapper('') + +ds = xr.open_dataset(m, engine='kerchunk', chunks={}) # normal xarray.Dataset object, wrapping dask/numpy arrays etc. +``` + +(Since we serialized the cached results using the kerchunk specification then opening this zarr store still requires using fsspec via the kerchunk xarray backend.) + +No data has been loaded or copied in this process, we have merely created an on-disk lookup table that points xarray into the specific parts of the original netCDF files when it needs to read each chunk. \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 51be335d..eb15bb9d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -38,11 +38,14 @@ docs = [ "sphinx", "pangeo-sphinx-book-theme", "sphinx-autodoc-typehints", + "sphinx_copybutton", + "sphinx_togglebutton", + "sphinx_design", + "myst_nb" ] - [project.urls] Home = "https://github.com/TomNicholas/VirtualiZarr" Documentation = "https://github.com/TomNicholas/VirtualiZarr/blob/main/README.md" From 33fe32c624527698a8327ef240cfb99c3f52e2d5 Mon Sep 17 00:00:00 2001 From: Raphael Hagen Date: Wed, 13 Mar 2024 19:32:35 -0600 Subject: [PATCH 3/8] add api + lint --- docs/api.md | 21 +++++++++++++++++++++ docs/conf.py | 13 ++++++------- docs/dev_status_roadmap.md | 2 +- docs/index.md | 3 +-- docs/usage.md | 2 +- 5 files changed, 30 insertions(+), 11 deletions(-) create mode 100644 docs/api.md diff --git a/docs/api.md b/docs/api.md new file mode 100644 index 00000000..0a4f41a8 --- /dev/null +++ b/docs/api.md @@ -0,0 +1,21 @@ +# API Reference + + +## Manifests + +```{eval-rst} +.. autoclass:: virtualizarr.manifests.manifest.ChunkManifest + :members: +``` + +```{eval-rst} +.. autoclass:: virtualizarr.manifests.ManifestArray + :members: +``` + +## Xarray + +```{eval-rst} +.. autoclass:: virtualizarr.xarray.open_dataset_via_kerchunk + :members: +``` diff --git a/docs/conf.py b/docs/conf.py index caacc3ce..3c8f1236 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -6,9 +6,9 @@ # -- Project information ----------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information -project = 'virtualizarr' -copyright = '2024, Thomas Nicholas' -author = 'Thomas Nicholas' +project = "virtualizarr" +copyright = "2024, Thomas Nicholas" +author = "Thomas Nicholas" # -- General configuration --------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration @@ -23,9 +23,8 @@ "sphinx_togglebutton", "sphinx_design", ] -templates_path = ['_templates'] -exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] - +templates_path = ["_templates"] +exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] # -- Options for HTML output ------------------------------------------------- @@ -40,7 +39,7 @@ html_logo = "_static/_future_logo.png" -html_static_path = ['_static'] +html_static_path = ["_static"] # issues diff --git a/docs/dev_status_roadmap.md b/docs/dev_status_roadmap.md index b86aa8d6..375ec327 100644 --- a/docs/dev_status_roadmap.md +++ b/docs/dev_status_roadmap.md @@ -4,4 +4,4 @@ VirtualiZarr is ready to use for many of the tasks that we are used to using ker VirtualiZarr is therefore evolving in tandem with developments in the Zarr Specification, which then need to be implemented in specific Zarr reader implementations (especially the Zarr-Python V3 implementation). There is an [overall roadmap for this integration with Zarr](https://hackmd.io/t9Myqt0HR7O0nq6wiHWCDA), whose final completion requires acceptance of at least two new Zarr Enhancement Proposals (the ["Chunk Manifest"](https://github.com/zarr-developers/zarr-specs/issues/287) and ["Virtual Concatenation"](https://github.com/zarr-developers/zarr-specs/issues/288) ZEPs). -Whilst we wait for these upstream changes, in the meantime VirtualiZarr aims to provide utility in a significant subset of cases, for example by enabling writing virtualized zarr stores out to the existing kerchunk references format, so that they can be read by fsspec today. \ No newline at end of file +Whilst we wait for these upstream changes, in the meantime VirtualiZarr aims to provide utility in a significant subset of cases, for example by enabling writing virtualized zarr stores out to the existing kerchunk references format, so that they can be read by fsspec today. diff --git a/docs/index.md b/docs/index.md index 68327af0..a5300f3e 100644 --- a/docs/index.md +++ b/docs/index.md @@ -30,7 +30,6 @@ installation usage how_it_works dev_status_roadmap - +api ``` - diff --git a/docs/usage.md b/docs/usage.md index da06735c..29dcfcd7 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -46,4 +46,4 @@ ds = xr.open_dataset(m, engine='kerchunk', chunks={}) # normal xarray.Dataset o (Since we serialized the cached results using the kerchunk specification then opening this zarr store still requires using fsspec via the kerchunk xarray backend.) -No data has been loaded or copied in this process, we have merely created an on-disk lookup table that points xarray into the specific parts of the original netCDF files when it needs to read each chunk. \ No newline at end of file +No data has been loaded or copied in this process, we have merely created an on-disk lookup table that points xarray into the specific parts of the original netCDF files when it needs to read each chunk. From d318e8ba3013295af6ee9858791548b82d51a7f6 Mon Sep 17 00:00:00 2001 From: Raphael Hagen Date: Sun, 17 Mar 2024 13:40:28 -0600 Subject: [PATCH 4/8] updated to pydata_sphinx_theme --- docs/conf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/conf.py b/docs/conf.py index 3c8f1236..fd8919eb 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -30,7 +30,7 @@ # -- Options for HTML output ------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output -html_theme = "pangeo_sphinx_book_theme" +html_theme = "pydata_sphinx_theme" html_theme_options = { "repository_url": "https://github.com/TomNicholas/VirtualiZarr", "repository_branch": "main", From 0e8edd26330ebf72576dac00a4d46140314a4c10 Mon Sep 17 00:00:00 2001 From: Raphael Hagen Date: Sun, 17 Mar 2024 13:41:17 -0600 Subject: [PATCH 5/8] added pydata_sphinx_theme to pyproject tests --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index b37d8a7b..afba4918 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,7 +36,7 @@ test = [ docs = [ "sphinx", - "pangeo-sphinx-book-theme", + "pydata_sphinx_theme", "sphinx-autodoc-typehints", "sphinx_copybutton", "sphinx_togglebutton", From b2e7112dd84ddcd82788522d3d1ac18ff6153922 Mon Sep 17 00:00:00 2001 From: Raphael Hagen Date: Sun, 17 Mar 2024 13:45:16 -0600 Subject: [PATCH 6/8] updated main.yml workflow to ignore docs change --- .github/workflows/main.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 000e3285..0626b551 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -2,11 +2,11 @@ name: CI on: push: - branches: - - main + branches: [ "main" ] + paths-ignore: + - 'docs/**' pull_request: - branches: - - main + branches: [ "main" ] schedule: - cron: "0 0 * * *" From dd4e1be611005cd49047e18650c13c3060b5830c Mon Sep 17 00:00:00 2001 From: Raphael Hagen Date: Sun, 17 Mar 2024 13:46:09 -0600 Subject: [PATCH 7/8] nit docs main.yml --- .github/workflows/main.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 0626b551..457d278a 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -7,6 +7,8 @@ on: - 'docs/**' pull_request: branches: [ "main" ] + paths-ignore: + - 'docs/**' schedule: - cron: "0 0 * * *" From 350d806d3cc2267604475320a4841274c1b53bc6 Mon Sep 17 00:00:00 2001 From: Raphael Hagen Date: Sun, 17 Mar 2024 14:03:43 -0600 Subject: [PATCH 8/8] lint + doc.yml & readthedocs.yml --- .readthedocs.yml | 23 +++++++++++++++++++++++ ci/doc.yml | 15 +++++++++++++++ pyproject.toml | 14 -------------- virtualizarr/__init__.py | 6 ++---- 4 files changed, 40 insertions(+), 18 deletions(-) create mode 100644 .readthedocs.yml create mode 100644 ci/doc.yml diff --git a/.readthedocs.yml b/.readthedocs.yml new file mode 100644 index 00000000..08a0fa94 --- /dev/null +++ b/.readthedocs.yml @@ -0,0 +1,23 @@ +# Read the Docs configuration file +# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details + +# Required +version: 2 + +# Set the version of Python and other tools you might need +build: + os: ubuntu-20.04 + tools: + python: "mambaforge-4.10" + +# Build documentation in the doc/ directory with Sphinx +sphinx: + configuration: docs/conf.py + +# Optionally declare the Python requirements required to build your docs +conda: + environment: ci/doc.yml +python: + install: + - method: pip + path: . diff --git a/ci/doc.yml b/ci/doc.yml new file mode 100644 index 00000000..075d2cfd --- /dev/null +++ b/ci/doc.yml @@ -0,0 +1,15 @@ +name: virtualizarr-docs +channels: + - conda-forge + - nodefaults +dependencies: + - python>=3.9 + - "sphinx" + - "pydata_sphinx_theme" + - "sphinx-autodoc-typehints" + - "sphinx_copybutton" + - "sphinx_togglebutton" + - "sphinx_design" + - "myst_nb" + - pip: + - -e .. diff --git a/pyproject.toml b/pyproject.toml index 1d3c0719..6a45df11 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -23,7 +23,6 @@ dynamic = ["version"] dependencies = [ "xarray", "kerchunk", - "ujson", "pydantic", "packaging", ] @@ -35,22 +34,9 @@ test = [ "pytest", ] -docs = [ - "sphinx", - "pydata_sphinx_theme", - "sphinx-autodoc-typehints", - "sphinx_copybutton", - "sphinx_togglebutton", - "sphinx_design", - "myst_nb" -] -[project.optional-dependencies] -test = [ - "pytest" -] [project.urls] Home = "https://github.com/TomNicholas/VirtualiZarr" diff --git a/virtualizarr/__init__.py b/virtualizarr/__init__.py index 790ff02c..485fd87e 100644 --- a/virtualizarr/__init__.py +++ b/virtualizarr/__init__.py @@ -1,5 +1,3 @@ from .manifests import ManifestArray # type: ignore # noqa -from .xarray import ( # type: ignore # noqa - VirtualiZarrDatasetAccessor, - open_dataset_via_kerchunk, -) +from .xarray import VirtualiZarrDatasetAccessor # type: ignore # noqa +from .xarray import open_dataset_via_kerchunk # noqa: F401