Merge branch 'main' into fix-pollution

scverse · Aug 31, 2023 · 1200b83 · 1200b83
2 parents 76e1c26 + 22f33bb
commit 1200b83
Show file tree

Hide file tree

Showing 41 changed files with 481 additions and 154 deletions.
diff --git a/.azure-pipelines.yml b/.azure-pipelines.yml
@@ -13,13 +13,13 @@ jobs:
       vmImage: "ubuntu-22.04"
     strategy:
       matrix:
-        Python310:
-          python.version: "3.10"
+        Python3.11:
+          python.version: "3.11"
           RUN_COVERAGE: yes
-        Python38:
+        Python3.8:
           python.version: "3.8"
         PreRelease:
-          python.version: "3.10"
+          python.version: "3.11"
           PRERELEASE_DEPENDENCIES: yes
     steps:
       - task: UsePythonVersion@0
@@ -85,8 +85,8 @@ jobs:
     steps:
       - task: UsePythonVersion@0
         inputs:
-          versionSpec: "3.10"
-        displayName: "Use Python 3.10"
+          versionSpec: "3.11"
+        displayName: "Use Python 3.11"
 
       - script: |
           python -m pip install --upgrade pip

diff --git a/.github/workflows/check-pr-milestoned.yml b/.github/workflows/check-pr-milestoned.yml
@@ -20,12 +20,16 @@ on:
       - synchronize
 
 env:
-  LABELS: ${{ join( github.event.pull_request.labels.*.name, '|' ) }}
+  LABELS: ${{ join(github.event.pull_request.labels.*.name, '|') }}
 
 jobs:
   check-milestone:
     name: "Triage: Check Milestone"
     runs-on: ubuntu-latest
     steps:
-      - if: github.event.pull_request.milestone == null && contains( env.LABELS, 'no milestone' ) == false
+      - name: Check if merging isn’t blocked
+        if: contains(env.LABELS, 'DON’T MERGE')
+        run: exit 1
+      - name: Check if a milestone is necessary and exists
+        if: github.event.pull_request.milestone == null && contains(env.LABELS, 'no milestone') == false
         run: exit 1
diff --git a/.github/workflows/codespell.yml b/.github/workflows/codespell.yml
@@ -0,0 +1,22 @@
+---
+name: Codespell
+
+on:
+  push:
+    branches: [main]
+  pull_request:
+    branches: [main]
+
+permissions:
+  contents: read
+
+jobs:
+  codespell:
+    name: Check for spelling errors
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v3
+      - name: Codespell
+        uses: codespell-project/actions-codespell@v2
diff --git a/.github/workflows/test-gpu.yml b/.github/workflows/test-gpu.yml
@@ -51,7 +51,7 @@ jobs:
           micromamba-version: "1.3.1-0"
           environment-name: anndata-gpu-ci
           create-args: >-
-            python=3.10
+            python=3.11
             cupy
             numba
             pytest

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -5,12 +5,12 @@ repos:
       - id: black
   - repo: https://github.com/astral-sh/ruff-pre-commit
     # Ruff version.
-    rev: "v0.0.282"
+    rev: "v0.0.286"
     hooks:
       - id: ruff
         args: ["--fix"]
   - repo: https://github.com/pre-commit/mirrors-prettier
-    rev: v3.0.1
+    rev: v3.0.2
     hooks:
       - id: prettier
   - repo: https://github.com/pre-commit/pre-commit-hooks
@@ -26,3 +26,10 @@ repos:
       - id: detect-private-key
       - id: no-commit-to-branch
         args: ["--branch=main"]
+
+  - repo: https://github.com/codespell-project/codespell
+    rev: v2.2.5
+    hooks:
+      - id: codespell
+        additional_dependencies:
+          - tomli
diff --git a/.readthedocs.yml b/.readthedocs.yml
@@ -2,7 +2,7 @@ version: 2
 build:
   os: ubuntu-20.04
   tools:
-    python: "3.10"
+    python: "3.11"
 sphinx:
   configuration: docs/conf.py
   fail_on_warning: true # do not change or you will be fired

diff --git a/README.md b/README.md
@@ -3,8 +3,8 @@
 [![Coverage](https://codecov.io/gh/scverse/anndata/branch/main/graph/badge.svg?token=IN1mJN1Wi8)](https://codecov.io/gh/scverse/anndata)
 [![Docs](https://readthedocs.com/projects/icb-anndata/badge/?version=latest)](https://anndata.readthedocs.io)
 [![PyPI](https://img.shields.io/pypi/v/anndata.svg)](https://pypi.org/project/anndata)
-[![PyPIDownloadsMonth](https://img.shields.io/pypi/dm/scanpy?logo=PyPI&color=blue)](https://pypi.org/project/anndata)
-[![PyPIDownloadsTotal](https://pepy.tech/badge/anndata)](https://pepy.tech/project/anndata)
+[![Downloads](https://static.pepy.tech/badge/anndata/month)](https://pepy.tech/project/anndata)
+[![Downloads](https://static.pepy.tech/badge/anndata)](https://pepy.tech/project/anndata)
 [![Stars](https://img.shields.io/github/stars/scverse/anndata?logo=GitHub&color=yellow)](https://github.com/scverse/anndata/stargazers)
 [![Powered by NumFOCUS](https://img.shields.io/badge/powered%20by-NumFOCUS-orange.svg?style=flat&colorA=E1523D&colorB=007D8A)](http://numfocus.org)
 

diff --git a/anndata/__init__.py b/anndata/__init__.py
@@ -12,6 +12,13 @@
             "anndata is not correctly installed. Please install it, e.g. with pip."
         )
 
+# Allowing notes to be added to exceptions. See: https://github.com/scverse/anndata/issues/868
+import sys
+
+if sys.version_info < (3, 11):
+    # Backport package for exception groups
+    import exceptiongroup  # noqa: F401
+
 from ._core.anndata import AnnData
 from ._core.merge import concat
 from ._core.raw import Raw
@@ -34,8 +41,17 @@
 )
 from . import experimental
 
-# backwards compat / shortcut for default format
-read = read_h5ad
+
+def read(*args, **kwargs):
+    import warnings
+
+    warnings.warn(
+        "`anndata.read` is deprecated, use `anndata.read_h5ad` instead. "
+        "`ad.read` will be removed in mid 2024.",
+        FutureWarning,
+    )
+    return read_h5ad(*args, **kwargs)
+
 
 __all__ = [
     "__version__",

diff --git a/anndata/_core/aligned_mapping.py b/anndata/_core/aligned_mapping.py
@@ -244,12 +244,17 @@ def _validate_value(self, val: V, key: str) -> V:
         if (
             hasattr(val, "index")
             and isinstance(val.index, cabc.Collection)
-            and not (val.index == self.dim_names).all()
+            and not val.index.equals(self.dim_names)
         ):
             # Could probably also re-order index if it’s contained
-            raise ValueError(
-                f"value.index does not match parent’s axis {self.axes[0]} names"
-            )
+            try:
+                pd.testing.assert_index_equal(val.index, self.dim_names)
+            except AssertionError as e:
+                msg = f"value.index does not match parent’s axis {self.axes[0]} names:\n{e}"
+                raise ValueError(msg) from None
+            else:
+                msg = "Index.equals and pd.testing.assert_index_equal disagree"
+                raise AssertionError(msg)
         return super()._validate_value(val, key)
 
     @property
@@ -300,7 +305,7 @@ class LayersBase(AlignedMapping):
     attrname = "layers"
     axes = (0, 1)
 
-    # TODO: I thought I had a more elegant solution to overiding this...
+    # TODO: I thought I had a more elegant solution to overriding this...
     def copy(self) -> "Layers":
         d = self._actual_class(self.parent)
         for k, v in self.items():

diff --git a/anndata/_core/anndata.py b/anndata/_core/anndata.py
@@ -1,6 +1,8 @@
 """\
 Main class and helper functions.
 """
+from __future__ import annotations
+
 import warnings
 import collections.abc as cabc
 from collections import OrderedDict
@@ -19,7 +21,7 @@
 import numpy as np
 from numpy import ma
 import pandas as pd
-from pandas.api.types import infer_dtype, is_string_dtype, is_categorical_dtype
+from pandas.api.types import infer_dtype, is_string_dtype
 from scipy import sparse
 from scipy.sparse import issparse, csr_matrix
 
@@ -648,7 +650,7 @@ def X(self, value: Optional[Union[np.ndarray, sparse.spmatrix]]):
 
         # If indices are both arrays, we need to modify them
         # so we don’t set values like coordinates
-        # This can occur if there are succesive views
+        # This can occur if there are successive views
         if (
             self.is_view
             and isinstance(self._oidx, np.ndarray)
@@ -665,7 +667,7 @@ def X(self, value: Optional[Union[np.ndarray, sparse.spmatrix]]):
         ):
             if not np.isscalar(value) and self.shape != value.shape:
                 # For assigning vector of values to 2d array or matrix
-                # Not neccesary for row of 2d array
+                # Not necessary for row of 2d array
                 value = value.reshape(self.shape)
             if self.isbacked:
                 if self.is_view:
@@ -1114,9 +1116,11 @@ def __getitem__(self, index: Index) -> "AnnData":
         oidx, vidx = self._normalize_indices(index)
         return AnnData(self, oidx=oidx, vidx=vidx, asview=True)
 
-    def _remove_unused_categories(self, df_full, df_sub, uns):
+    def _remove_unused_categories(
+        self, df_full: pd.DataFrame, df_sub: pd.DataFrame, uns: dict[str, Any]
+    ):
         for k in df_full:
-            if not is_categorical_dtype(df_full[k]):
+            if not isinstance(df_full[k].dtype, pd.CategoricalDtype):
                 continue
             all_categories = df_full[k].cat.categories
             with pd.option_context("mode.chained_assignment", None):
@@ -1373,7 +1377,7 @@ def obs_vector(self, k: str, *, layer: Optional[str] = None) -> np.ndarray:
 
         Returns
         -------
-        A one dimensional nd array, with values for each obs in the same order
+        A one dimensional ndarray, with values for each obs in the same order
         as :attr:`obs_names`.
         """
         if layer == "X":
@@ -1405,7 +1409,7 @@ def var_vector(self, k, *, layer: Optional[str] = None) -> np.ndarray:
 
         Returns
         -------
-        A one dimensional nd array, with values for each var in the same order
+        A one dimensional ndarray, with values for each var in the same order
         as :attr:`var_names`.
         """
         if layer == "X":

diff --git a/anndata/_core/merge.py b/anndata/_core/merge.py
@@ -4,27 +4,25 @@
 from __future__ import annotations
 
 from collections import OrderedDict
-from collections.abc import Mapping, MutableSet
-from functools import reduce, singledispatch
-from itertools import repeat
-from operator import and_, or_, sub
-from typing import (
-    Any,
+from collections.abc import (
     Callable,
     Collection,
+    Mapping,
+    MutableSet,
     Iterable,
-    Optional,
-    Tuple,
-    TypeVar,
-    Union,
-    Literal,
+    Sequence,
 )
+from functools import reduce, singledispatch
+from itertools import repeat
+from operator import and_, or_, sub
+from typing import Any, Optional, TypeVar, Union, Literal
 import typing
 from warnings import warn, filterwarnings
 
 from natsort import natsorted
 import numpy as np
 import pandas as pd
+from pandas.api.extensions import ExtensionDtype
 from scipy import sparse
 from scipy.sparse import spmatrix
 
@@ -96,7 +94,7 @@ def not_missing(v) -> bool:
 
 
 # We need to be able to check for equality of arrays to know which are the same.
-# Unfortunatley equality of arrays is poorly defined.
+# Unfortunately equality of arrays is poorly defined.
 # * `np.array_equal` does not work for sparse arrays
 # * `np.array_equal(..., equal_nan=True)` does not work for null values at the moment
 #   (see https://github.com/numpy/numpy/issues/16377)
@@ -211,7 +209,7 @@ def unify_dtypes(dfs: Iterable[pd.DataFrame]) -> list[pd.DataFrame]:
     df_dtypes = [dict(df.dtypes) for df in dfs]
     columns = reduce(lambda x, y: x.union(y), [df.columns for df in dfs])
 
-    dtypes = {col: list() for col in columns}
+    dtypes: dict[str, list[np.dtype | ExtensionDtype]] = {col: [] for col in columns}
     for col in columns:
         for df in df_dtypes:
             dtypes[col].append(df.get(col, None))
@@ -235,7 +233,9 @@ def unify_dtypes(dfs: Iterable[pd.DataFrame]) -> list[pd.DataFrame]:
     return dfs
 
 
-def try_unifying_dtype(col: list) -> pd.core.dtypes.base.ExtensionDtype | None:
+def try_unifying_dtype(
+    col: Sequence[np.dtype | ExtensionDtype],
+) -> pd.core.dtypes.base.ExtensionDtype | None:
     """
     If dtypes can be unified, returns the dtype they would be unified to.
 
@@ -248,26 +248,26 @@ def try_unifying_dtype(col: list) -> pd.core.dtypes.base.ExtensionDtype | None:
         A list of dtypes to unify. Can be numpy/ pandas dtypes, or None (which denotes
         a missing value)
     """
-    dtypes = set()
+    dtypes: set[pd.CategoricalDtype] = set()
     # Categorical
-    if any([pd.api.types.is_categorical_dtype(x) for x in col]):
+    if any(isinstance(dtype, pd.CategoricalDtype) for dtype in col):
         ordered = False
         for dtype in col:
-            if pd.api.types.is_categorical_dtype(dtype):
+            if isinstance(dtype, pd.CategoricalDtype):
                 dtypes.add(dtype)
                 ordered = ordered | dtype.ordered
             elif not pd.isnull(dtype):
                 return False
         if len(dtypes) > 0 and not ordered:
             categories = reduce(
                 lambda x, y: x.union(y),
-                [x.categories for x in dtypes if not pd.isnull(x)],
+                [dtype.categories for dtype in dtypes if not pd.isnull(dtype)],
             )
 
             return pd.CategoricalDtype(natsorted(categories), ordered=False)
     # Boolean
-    elif all([pd.api.types.is_bool_dtype(x) or x is None for x in col]):
-        if any([x is None for x in col]):
+    elif all(pd.api.types.is_bool_dtype(dtype) or dtype is None for dtype in col):
+        if any(dtype is None for dtype in col):
             return pd.BooleanDtype()
         else:
             return None
@@ -942,7 +942,7 @@ def merge_outer(mappings, batch_keys, *, join_index="-", merge=merge_unique):
     return out
 
 
-def _resolve_dim(*, dim: str = None, axis: int = None) -> Tuple[int, str]:
+def _resolve_dim(*, dim: str = None, axis: int = None) -> tuple[int, str]:
     _dims = ("obs", "var")
     if (dim is None and axis is None) or (dim is not None and axis is not None):
         raise ValueError(
@@ -1042,7 +1042,7 @@ def concat(
         incrementing integer labels.
     index_unique
         Whether to make the index unique by using the keys. If provided, this
-        is the delimeter between "{orig_idx}{index_unique}{key}". When `None`,
+        is the delimiter between "{orig_idx}{index_unique}{key}". When `None`,
         the original indices are kept.
     fill_value
         When `join="outer"`, this is the value that will be used to fill the introduced

diff --git a/anndata/_core/raw.py b/anndata/_core/raw.py
@@ -29,7 +29,11 @@ def __init__(
         self._n_obs = adata.n_obs
         # construct manually
         if adata.isbacked == (X is None):
-            self._X = X
+            # Move from GPU to CPU since it's large and not always used
+            if isinstance(X, (CupyArray, CupySparseMatrix)):
+                self._X = X.get()
+            else:
+                self._X = X
             self._var = _gen_dataframe(var, self.X.shape[1], ["var_names"])
             self._varm = AxisArrays(self, 1, varm)
         elif X is None:  # construct from adata

diff --git a/anndata/_io/read.py b/anndata/_io/read.py
@@ -441,7 +441,7 @@ def _read_text(
         else:
             data.append(np.array(line_list, dtype=dtype))
     # logg.msg("    read data into list of lists", t=True, v=4)
-    # transfrom to array, this takes a long time and a lot of memory
+    # transform to array, this takes a long time and a lot of memory
     # but it’s actually the same thing as np.genfromtxt does
     # - we don’t use the latter as it would involve another slicing step
     #   in the end, to separate row_names from float data, slicing takes