diff --git a/.github/workflows/test-gpu.yml b/.github/workflows/test-gpu.yml index 02f681c4d..97b2c689c 100644 --- a/.github/workflows/test-gpu.yml +++ b/.github/workflows/test-gpu.yml @@ -85,3 +85,10 @@ jobs: token: ${{ secrets.CODECOV_TOKEN }} fail_ci_if_error: true verbose: true + + - name: Remove 'run-gpu-ci' Label + if: always() + uses: actions-ecosystem/action-remove-labels@v1 + with: + labels: "run-gpu-ci" + github_token: ${{ secrets.GITHUB_TOKEN }} diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 05c0c2a72..ed2d362cb 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,6 +1,6 @@ repos: - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.6.8 + rev: v0.7.0 hooks: - id: ruff types_or: [python, pyi, jupyter] @@ -14,7 +14,7 @@ repos: exclude_types: - markdown - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.6.0 + rev: v5.0.0 hooks: - id: trailing-whitespace - id: end-of-file-fixer diff --git a/src/anndata/_io/specs/registry.py b/src/anndata/_io/specs/registry.py index 2cd21b5fc..3b43def7c 100644 --- a/src/anndata/_io/specs/registry.py +++ b/src/anndata/_io/specs/registry.py @@ -395,10 +395,62 @@ def read_elem_as_dask( chunks, optional length `n`, the same `n` as the size of the underlying array. Note that the minor axis dimension must match the shape for sparse. + Defaults to `(1000, adata.shape[1])` for CSR sparse, + `(adata.shape[0], 1000)` for CSC sparse, + and the on-disk chunking otherwise for dense. Returns ------- DaskArray + + Examples + -------- + + Setting up our example: + + >>> from scanpy.datasets import pbmc3k + >>> import tempfile + >>> import anndata as ad + >>> import zarr + + >>> tmp_path = tempfile.gettempdir() + >>> zarr_path = tmp_path + "/adata.zarr" + + >>> adata = pbmc3k() + >>> adata.layers["dense"] = adata.X.toarray() + >>> adata.write_zarr(zarr_path) + + Reading a sparse matrix from a zarr store lazily, with custom chunk size and default: + + >>> g = zarr.open(zarr_path) + >>> adata.X = ad.experimental.read_elem_as_dask(g["X"]) + >>> adata.X + dask.array + >>> adata.X = ad.experimental.read_elem_as_dask( + ... g["X"], chunks=(500, adata.shape[1]) + ... ) + >>> adata.X + dask.array + + Reading a dense matrix from a zarr store lazily: + + >>> adata.layers["dense"] = ad.experimental.read_elem_as_dask(g["layers/dense"]) + >>> adata.layers["dense"] + dask.array + + Making a new anndata object from on-disk, with custom chunks: + + >>> adata = ad.AnnData( + ... obs=ad.io.read_elem(g["obs"]), + ... var=ad.io.read_elem(g["var"]), + ... uns=ad.io.read_elem(g["uns"]), + ... obsm=ad.io.read_elem(g["obsm"]), + ... varm=ad.io.read_elem(g["varm"]), + ... ) + >>> adata.X = ad.experimental.read_elem_as_dask( + ... g["X"], chunks=(500, adata.shape[1]) + ... ) + >>> adata.layers["dense"] = ad.experimental.read_elem_as_dask(g["layers/dense"]) """ return DaskReader(_LAZY_REGISTRY).read_elem(elem, chunks=chunks)