diff --git a/anndata/__init__.py b/anndata/__init__.py index 17289c6de..b3b1c674e 100644 --- a/anndata/__init__.py +++ b/anndata/__init__.py @@ -1,4 +1,5 @@ """Annotated multivariate observation data.""" +from __future__ import annotations try: # See https://github.com/maresb/hatch-vcs-footgun-example from setuptools_scm import get_version @@ -8,9 +9,8 @@ try: from ._version import __version__ except ModuleNotFoundError: - raise RuntimeError( - "anndata is not correctly installed. Please install it, e.g. with pip." - ) from None + msg = "anndata is not correctly installed. Please install it, e.g. with pip." + raise RuntimeError(msg) from None # Allowing notes to be added to exceptions. See: https://github.com/scverse/anndata/issues/868 import sys @@ -34,10 +34,10 @@ read_zarr, ) from ._warnings import ( + ExperimentalFeatureWarning, + ImplicitModificationWarning, OldFormatWarning, WriteWarning, - ImplicitModificationWarning, - ExperimentalFeatureWarning, ) if True: # Bypass isort, this needs to come last diff --git a/anndata/_core/access.py b/anndata/_core/access.py index 7dacaa6dc..43e23932e 100644 --- a/anndata/_core/access.py +++ b/anndata/_core/access.py @@ -1,16 +1,19 @@ +from __future__ import annotations + from functools import reduce -from typing import NamedTuple, Tuple +from typing import TYPE_CHECKING, NamedTuple -from . import anndata +if TYPE_CHECKING: + from . import anndata class ElementRef(NamedTuple): - parent: "anndata.AnnData" + parent: anndata.AnnData attrname: str - keys: Tuple[str, ...] = () + keys: tuple[str, ...] = () def __str__(self) -> str: - return f".{self.attrname}" + "".join(map(lambda x: f"['{x}']", self.keys)) + return f".{self.attrname}" + "".join(f"['{x}']" for x in self.keys) @property def _parent_el(self): diff --git a/anndata/_core/aligned_mapping.py b/anndata/_core/aligned_mapping.py index 7d11262b5..2d3eab666 100644 --- a/anndata/_core/aligned_mapping.py +++ b/anndata/_core/aligned_mapping.py @@ -1,26 +1,34 @@ +from __future__ import annotations + +import warnings from abc import ABC, abstractmethod from collections import abc as cabc +from collections.abc import Iterator, Mapping, Sequence from copy import copy -from typing import Union, Optional, Type, ClassVar, TypeVar -from typing import Iterator, Mapping, Sequence -from typing import Tuple, List, Dict -import warnings +from typing import ( + TYPE_CHECKING, + ClassVar, + TypeVar, + Union, +) import numpy as np import pandas as pd from scipy.sparse import spmatrix -from ..utils import deprecated, ensure_df_homogeneous, dim_len -from . import raw, anndata -from .views import as_view, view_update +from anndata._warnings import ExperimentalFeatureWarning, ImplicitModificationWarning +from anndata.compat import AwkArray + +from ..utils import deprecated, dim_len, ensure_df_homogeneous from .access import ElementRef from .index import _subset -from anndata.compat import AwkArray -from anndata._warnings import ExperimentalFeatureWarning, ImplicitModificationWarning +from .views import as_view, view_update +if TYPE_CHECKING: + from . import anndata, raw OneDIdx = Union[Sequence[int], Sequence[bool], slice] -TwoDIdx = Tuple[OneDIdx, OneDIdx] +TwoDIdx = tuple[OneDIdx, OneDIdx] I = TypeVar("I", OneDIdx, TwoDIdx, covariant=True) # TODO: pd.DataFrame only allowed in AxisArrays? @@ -36,16 +44,16 @@ class AlignedMapping(cabc.MutableMapping, ABC): _allow_df: ClassVar[bool] """If this mapping supports heterogeneous DataFrames""" - _view_class: ClassVar[Type["AlignedViewMixin"]] + _view_class: ClassVar[type[AlignedViewMixin]] """The view class for this aligned mapping.""" - _actual_class: ClassVar[Type["AlignedActualMixin"]] + _actual_class: ClassVar[type[AlignedActualMixin]] """The actual class (which has it’s own data) for this aligned mapping.""" def __repr__(self): return f"{type(self).__name__} with keys: {', '.join(self.keys())}" - def _ipython_key_completions_(self) -> List[str]: + def _ipython_key_completions_(self) -> list[str]: return list(self.keys()) def _validate_value(self, val: V, key: str) -> V: @@ -69,17 +77,14 @@ def _validate_value(self, val: V, key: str) -> V: right_shape = tuple(self.parent.shape[a] for a in self.axes) actual_shape = tuple(dim_len(val, a) for a, _ in enumerate(self.axes)) if actual_shape[i] is None and isinstance(val, AwkArray): + msg = f"The AwkwardArray is of variable length in dimension {i}." raise ValueError( - f"The AwkwardArray is of variable length in dimension {i}.", + msg, f"Try ak.to_regular(array, {i}) before including the array in AnnData", ) else: - raise ValueError( - f"Value passed for key {key!r} is of incorrect shape. " - f"Values of {self.attrname} must match dimensions " - f"{self.axes} of parent. Value had shape {actual_shape} while " - f"it should have had {right_shape}." - ) + msg = f"Value passed for key {key!r} is of incorrect shape. Values of {self.attrname} must match dimensions {self.axes} of parent. Value had shape {actual_shape} while it should have had {right_shape}." + raise ValueError(msg) if not self._allow_df and isinstance(val, pd.DataFrame): name = self.attrname.title().rstrip("s") @@ -90,13 +95,11 @@ def _validate_value(self, val: V, key: str) -> V: @abstractmethod def attrname(self) -> str: """What attr for the AnnData is this?""" - pass @property @abstractmethod - def axes(self) -> Tuple[int, ...]: + def axes(self) -> tuple[int, ...]: """Which axes of the parent is this aligned to?""" - pass @property @abstractmethod @@ -104,7 +107,7 @@ def is_view(self) -> bool: pass @property - def parent(self) -> Union["anndata.AnnData", "raw.Raw"]: + def parent(self) -> anndata.AnnData | raw.Raw: return self._parent def copy(self): @@ -117,7 +120,7 @@ def copy(self): d[k] = v.copy() return d - def _view(self, parent: "anndata.AnnData", subset_idx: I): + def _view(self, parent: anndata.AnnData, subset_idx: I): """Returns a subset copy-on-write view of the object.""" return self._view_class(self, parent, subset_idx) @@ -127,7 +130,7 @@ def as_dict(self) -> dict: class AlignedViewMixin: - parent: "anndata.AnnData" + parent: anndata.AnnData """Reference to parent AnnData view""" attrname: str @@ -177,7 +180,7 @@ def __len__(self) -> int: class AlignedActualMixin: - _data: Dict[str, V] + _data: dict[str, V] """Underlying mapping to the data""" is_view = False @@ -216,7 +219,7 @@ def attrname(self) -> str: return f"{self.dim}m" @property - def axes(self) -> Tuple[int]: + def axes(self) -> tuple[int]: """Axes of the parent this is aligned to""" return (self._axis,) @@ -225,7 +228,7 @@ def dim(self) -> str: """Name of the dimension this aligned to.""" return self._dimnames[self._axis] - def flipped(self) -> "AxisArraysBase": + def flipped(self) -> AxisArraysBase: """Transpose.""" new = self.copy() new.dimension = abs(self._axis - 1) @@ -265,9 +268,9 @@ def dim_names(self) -> pd.Index: class AxisArrays(AlignedActualMixin, AxisArraysBase): def __init__( self, - parent: Union["anndata.AnnData", "raw.Raw"], + parent: anndata.AnnData | raw.Raw, axis: int, - vals: Union[Mapping, AxisArraysBase, None] = None, + vals: Mapping | AxisArraysBase | None = None, ): self._parent = parent if axis not in (0, 1): @@ -282,7 +285,7 @@ class AxisArraysView(AlignedViewMixin, AxisArraysBase): def __init__( self, parent_mapping: AxisArraysBase, - parent_view: "anndata.AnnData", + parent_view: anndata.AnnData, subset_idx: OneDIdx, ): self.parent_mapping = parent_mapping @@ -306,7 +309,7 @@ class LayersBase(AlignedMapping): axes = (0, 1) # TODO: I thought I had a more elegant solution to overriding this... - def copy(self) -> "Layers": + def copy(self) -> Layers: d = self._actual_class(self.parent) for k, v in self.items(): d[k] = v.copy() @@ -314,7 +317,7 @@ def copy(self) -> "Layers": class Layers(AlignedActualMixin, LayersBase): - def __init__(self, parent: "anndata.AnnData", vals: Optional[Mapping] = None): + def __init__(self, parent: anndata.AnnData, vals: Mapping | None = None): self._parent = parent self._data = dict() if vals is not None: @@ -325,7 +328,7 @@ class LayersView(AlignedViewMixin, LayersBase): def __init__( self, parent_mapping: LayersBase, - parent_view: "anndata.AnnData", + parent_view: anndata.AnnData, subset_idx: TwoDIdx, ): self.parent_mapping = parent_mapping @@ -351,7 +354,7 @@ def attrname(self) -> str: return f"{self.dim}p" @property - def axes(self) -> Tuple[int, int]: + def axes(self) -> tuple[int, int]: """Axes of the parent this is aligned to""" return self._axis, self._axis @@ -364,9 +367,9 @@ def dim(self) -> str: class PairwiseArrays(AlignedActualMixin, PairwiseArraysBase): def __init__( self, - parent: "anndata.AnnData", + parent: anndata.AnnData, axis: int, - vals: Optional[Mapping] = None, + vals: Mapping | None = None, ): self._parent = parent if axis not in (0, 1): @@ -381,7 +384,7 @@ class PairwiseArraysView(AlignedViewMixin, PairwiseArraysBase): def __init__( self, parent_mapping: PairwiseArraysBase, - parent_view: "anndata.AnnData", + parent_view: anndata.AnnData, subset_idx: OneDIdx, ): self.parent_mapping = parent_mapping diff --git a/anndata/_core/anndata.py b/anndata/_core/anndata.py index b631b79f8..8aecece40 100644 --- a/anndata/_core/anndata.py +++ b/anndata/_core/anndata.py @@ -3,60 +3,66 @@ """ from __future__ import annotations -import warnings import collections.abc as cabc +import warnings from collections import OrderedDict +from collections.abc import Iterable, Mapping, MutableMapping, Sequence from copy import copy, deepcopy from enum import Enum from functools import partial, singledispatch from pathlib import Path -from os import PathLike from textwrap import dedent -from typing import Any, Union, Optional, Literal # Meta -from typing import Iterable, Sequence, Mapping, MutableMapping # Generic ABCs -from typing import Tuple, List # Generic +from typing import ( # Meta # Generic ABCs # Generic + TYPE_CHECKING, + Any, + Literal, +) import h5py -from natsort import natsorted import numpy as np -from numpy import ma import pandas as pd +from natsort import natsorted +from numpy import ma from pandas.api.types import infer_dtype, is_string_dtype from scipy import sparse -from scipy.sparse import issparse, csr_matrix +from scipy.sparse import csr_matrix, issparse from anndata._warnings import ImplicitModificationWarning -from .raw import Raw -from .index import _normalize_indices, _subset, Index, Index1D, get_vector -from .file_backing import AnnDataFileManager, to_memory + +from .. import utils +from ..compat import ( + CupyArray, + CupySparseMatrix, + DaskArray, + ZappyArray, + ZarrArray, + _move_adj_mtx, +) +from ..logging import anndata_logger as logger +from ..utils import convert_to_dict, dim_len, ensure_df_homogeneous from .access import ElementRef from .aligned_mapping import ( AxisArrays, AxisArraysView, - PairwiseArrays, - PairwiseArraysView, Layers, LayersView, + PairwiseArrays, + PairwiseArraysView, ) +from .file_backing import AnnDataFileManager, to_memory +from .index import Index, Index1D, _normalize_indices, _subset, get_vector +from .raw import Raw +from .sparse_dataset import sparse_dataset from .views import ( ArrayView, - DictView, DataFrameView, - as_view, + DictView, _resolve_idxs, + as_view, ) -from .sparse_dataset import sparse_dataset -from .. import utils -from ..utils import convert_to_dict, ensure_df_homogeneous, dim_len -from ..logging import anndata_logger as logger -from ..compat import ( - ZarrArray, - ZappyArray, - DaskArray, - CupyArray, - CupySparseMatrix, - _move_adj_mtx, -) + +if TYPE_CHECKING: + from os import PathLike class StorageType(Enum): @@ -97,9 +103,8 @@ def _check_2d_shape(X): Assure that X is always 2D: Unlike numpy we always deal with 2D arrays. """ if X.dtype.names is None and len(X.shape) != 2: - raise ValueError( - f"X needs to be 2-dimensional, not {len(X.shape)}-dimensional." - ) + msg = f"X needs to be 2-dimensional, not {len(X.shape)}-dimensional." + raise ValueError(msg) def _mk_df_error( @@ -143,7 +148,7 @@ def mk_index(l: int) -> pd.Index: df = pd.DataFrame( anno, index=anno[index_name], - columns=[k for k in anno.keys() if k != index_name], + columns=[k for k in anno if k != index_name], ) break else: @@ -190,7 +195,8 @@ def _gen_dataframe_1d( attr: Literal["obs", "var"], length: int | None = None, ): - raise ValueError(f"Cannot convert {type(anno)} to {attr} DataFrame") + msg = f"Cannot convert {type(anno)} to {attr} DataFrame" + raise ValueError(msg) class AnnData(metaclass=utils.DeprecationMixinMeta): @@ -310,28 +316,29 @@ class AnnData(metaclass=utils.DeprecationMixinMeta): def __init__( self, - X: Optional[Union[np.ndarray, sparse.spmatrix, pd.DataFrame]] = None, - obs: Optional[Union[pd.DataFrame, Mapping[str, Iterable[Any]]]] = None, - var: Optional[Union[pd.DataFrame, Mapping[str, Iterable[Any]]]] = None, - uns: Optional[Mapping[str, Any]] = None, - obsm: Optional[Union[np.ndarray, Mapping[str, Sequence[Any]]]] = None, - varm: Optional[Union[np.ndarray, Mapping[str, Sequence[Any]]]] = None, - layers: Optional[Mapping[str, Union[np.ndarray, sparse.spmatrix]]] = None, - raw: Optional[Mapping[str, Any]] = None, - dtype: Optional[Union[np.dtype, type, str]] = None, - shape: Optional[Tuple[int, int]] = None, - filename: Optional[PathLike] = None, - filemode: Optional[Literal["r", "r+"]] = None, + X: np.ndarray | sparse.spmatrix | pd.DataFrame | None = None, + obs: pd.DataFrame | Mapping[str, Iterable[Any]] | None = None, + var: pd.DataFrame | Mapping[str, Iterable[Any]] | None = None, + uns: Mapping[str, Any] | None = None, + obsm: np.ndarray | Mapping[str, Sequence[Any]] | None = None, + varm: np.ndarray | Mapping[str, Sequence[Any]] | None = None, + layers: Mapping[str, np.ndarray | sparse.spmatrix] | None = None, + raw: Mapping[str, Any] | None = None, + dtype: np.dtype | type | str | None = None, + shape: tuple[int, int] | None = None, + filename: PathLike | None = None, + filemode: Literal["r", "r+"] | None = None, asview: bool = False, *, - obsp: Optional[Union[np.ndarray, Mapping[str, Sequence[Any]]]] = None, - varp: Optional[Union[np.ndarray, Mapping[str, Sequence[Any]]]] = None, + obsp: np.ndarray | Mapping[str, Sequence[Any]] | None = None, + varp: np.ndarray | Mapping[str, Sequence[Any]] | None = None, oidx: Index1D = None, vidx: Index1D = None, ): if asview: if not isinstance(X, AnnData): - raise ValueError("`X` has to be an AnnData object.") + msg = "`X` has to be an AnnData object." + raise ValueError(msg) self._init_as_view(X, oidx, vidx) else: self._init_as_actual( @@ -351,21 +358,21 @@ def __init__( filemode=filemode, ) - def _init_as_view(self, adata_ref: "AnnData", oidx: Index, vidx: Index): + def _init_as_view(self, adata_ref: AnnData, oidx: Index, vidx: Index): if adata_ref.isbacked and adata_ref.is_view: - raise ValueError( - "Currently, you cannot index repeatedly into a backed AnnData, " - "that is, you cannot make a view of a view." - ) + msg = "Currently, you cannot index repeatedly into a backed AnnData, that is, you cannot make a view of a view." + raise ValueError(msg) self._is_view = True if isinstance(oidx, (int, np.integer)): if not (-adata_ref.n_obs <= oidx < adata_ref.n_obs): - raise IndexError(f"Observation index `{oidx}` is out of range.") + msg = f"Observation index `{oidx}` is out of range." + raise IndexError(msg) oidx += adata_ref.n_obs * (oidx < 0) oidx = slice(oidx, oidx + 1, 1) if isinstance(vidx, (int, np.integer)): if not (-adata_ref.n_vars <= vidx < adata_ref.n_vars): - raise IndexError(f"Variable index `{vidx}` is out of range.") + msg = f"Variable index `{vidx}` is out of range." + raise IndexError(msg) vidx += adata_ref.n_vars * (vidx < 0) vidx = slice(vidx, vidx + 1, 1) if adata_ref.is_view: @@ -446,9 +453,8 @@ def _init_as_actual( # init from AnnData if isinstance(X, AnnData): if any((obs, var, uns, obsm, varm, obsp, varp)): - raise ValueError( - "If `X` is a dict no further arguments must be provided." - ) + msg = "If `X` is a dict no further arguments must be provided." + raise ValueError(msg) X, obs, var, uns, obsm, varm, obsp, varp, layers, raw = ( X._X, X.obs, @@ -486,11 +492,11 @@ def _init_as_actual( break else: class_names = ", ".join(c.__name__ for c in StorageType.classes()) - raise ValueError( - f"`X` needs to be of one of {class_names}, not {type(X)}." - ) + msg = f"`X` needs to be of one of {class_names}, not {type(X)}." + raise ValueError(msg) if shape is not None: - raise ValueError("`shape` needs to be `None` if `X` is not `None`.") + msg = "`shape` needs to be `None` if `X` is not `None`." + raise ValueError(msg) _check_2d_shape(X) # if type doesn’t match, a copy is made, otherwise, use a view if dtype is not None: @@ -530,7 +536,8 @@ def _init_as_actual( if isinstance(attr.index, pd.RangeIndex): attr.index = idx elif not idx.equals(attr.index): - raise ValueError(f"Index of {attr_name} must match {x_name} of X.") + msg = f"Index of {attr_name} must match {x_name} of X." + raise ValueError(msg) # unstructured annotations self.uns = uns or OrderedDict() @@ -576,8 +583,8 @@ def get_size(X): return X.__sizeof__() size = 0 - attrs = list(["_X", "_obs", "_var"]) - attrs_multi = list(["_uns", "_obsm", "_varm", "varp", "_obsp", "_layers"]) + attrs = ["_X", "_obs", "_var"] + attrs_multi = ["_uns", "_obsm", "_varm", "varp", "_obsp", "_layers"] for attr in attrs + attrs_multi: if attr in attrs_multi: keys = getattr(self, attr).keys() @@ -591,10 +598,7 @@ def get_size(X): return size def _gen_repr(self, n_obs, n_vars) -> str: - if self.isbacked: - backed_at = f" backed at {str(self.filename)!r}" - else: - backed_at = "" + backed_at = f" backed at {str(self.filename)!r}" if self.isbacked else "" descr = f"AnnData object with n_obs × n_vars = {n_obs} × {n_vars}{backed_at}" for attr in [ "obs", @@ -619,18 +623,16 @@ def __repr__(self) -> str: def __eq__(self, other): """Equality testing""" - raise NotImplementedError( - "Equality comparisons are not supported for AnnData objects, " - "instead compare the desired attributes." - ) + msg = "Equality comparisons are not supported for AnnData objects, instead compare the desired attributes." + raise NotImplementedError(msg) @property - def shape(self) -> Tuple[int, int]: + def shape(self) -> tuple[int, int]: """Shape of data matrix (:attr:`n_obs`, :attr:`n_vars`).""" return self.n_obs, self.n_vars @property - def X(self) -> Optional[Union[np.ndarray, sparse.spmatrix, ArrayView]]: + def X(self) -> np.ndarray | sparse.spmatrix | ArrayView | None: """Data matrix of shape :attr:`n_obs` × :attr:`n_vars`.""" if self.isbacked: if not self.file.is_open: @@ -661,12 +663,11 @@ def X(self) -> Optional[Union[np.ndarray, sparse.spmatrix, ArrayView]]: # return X @X.setter - def X(self, value: Optional[Union[np.ndarray, sparse.spmatrix]]): + def X(self, value: np.ndarray | sparse.spmatrix | None): if value is None: if self.isbacked: - raise NotImplementedError( - "Cannot currently remove data matrix from backed object." - ) + msg = "Cannot currently remove data matrix from backed object." + raise NotImplementedError(msg) if self.is_view: self._init_as_actual(self.copy()) self._X = None @@ -716,17 +717,15 @@ def X(self, value: Optional[Union[np.ndarray, sparse.spmatrix]]): else: self._X = value else: - raise ValueError( - f"Data matrix has wrong shape {value.shape}, " - f"need to be {self.shape}." - ) + msg = f"Data matrix has wrong shape {value.shape}, need to be {self.shape}." + raise ValueError(msg) @X.deleter def X(self): self.X = None @property - def layers(self) -> Union[Layers, LayersView]: + def layers(self) -> Layers | LayersView: """\ Dictionary-like object with values of the same dimensions as :attr:`X`. @@ -793,11 +792,12 @@ def raw(self) -> Raw: return self._raw @raw.setter - def raw(self, value: "AnnData"): + def raw(self, value: AnnData): if value is None: del self.raw elif not isinstance(value, AnnData): - raise ValueError("Can only init raw attribute with an AnnData object.") + msg = "Can only init raw attribute with an AnnData object." + raise ValueError(msg) else: if self.is_view: self._init_as_actual(self.copy()) @@ -821,7 +821,8 @@ def n_vars(self) -> int: def _set_dim_df(self, value: pd.DataFrame, attr: str): if not isinstance(value, pd.DataFrame): - raise ValueError(f"Can only assign pd.DataFrame to {attr}.") + msg = f"Can only assign pd.DataFrame to {attr}." + raise ValueError(msg) value_idx = self._prep_dim_index(value.index, attr) if self.is_view: self._init_as_actual(self.copy()) @@ -836,16 +837,13 @@ def _prep_dim_index(self, value, attr: str) -> pd.Index: If a pd.Index is passed, this will use a reference, otherwise a new index object is created. """ if self.shape[attr == "var"] != len(value): - raise ValueError( - f"Length of passed value for {attr}_names is {len(value)}, but this AnnData has shape: {self.shape}" - ) + msg = f"Length of passed value for {attr}_names is {len(value)}, but this AnnData has shape: {self.shape}" + raise ValueError(msg) if isinstance(value, pd.Index) and not isinstance( value.name, (str, type(None)) ): - raise ValueError( - f"AnnData expects .{attr}.index.name to be a string or None, " - f"but you passed a name of type {type(value.name).__name__!r}" - ) + msg = f"AnnData expects .{attr}.index.name to be a string or None, but you passed a name of type {type(value.name).__name__!r}" + raise ValueError(msg) else: value = pd.Index(value) if not isinstance(value.name, (str, type(None))): @@ -863,7 +861,7 @@ def _prep_dim_index(self, value, attr: str) -> pd.Index: Inferred to be: {infer_dtype(value)} """ - ), # noqa + ), stacklevel=2, ) # fmt: on @@ -935,9 +933,8 @@ def uns(self) -> MutableMapping: @uns.setter def uns(self, value: MutableMapping): if not isinstance(value, MutableMapping): - raise ValueError( - "Only mutable mapping types (e.g. dict) are allowed for `.uns`." - ) + msg = "Only mutable mapping types (e.g. dict) are allowed for `.uns`." + raise ValueError(msg) if isinstance(value, DictView): value = value.copy() if self.is_view: @@ -949,7 +946,7 @@ def uns(self): self.uns = OrderedDict() @property - def obsm(self) -> Union[AxisArrays, AxisArraysView]: + def obsm(self) -> AxisArrays | AxisArraysView: """\ Multi-dimensional annotation of observations (mutable structured :class:`~numpy.ndarray`). @@ -972,7 +969,7 @@ def obsm(self): self.obsm = dict() @property - def varm(self) -> Union[AxisArrays, AxisArraysView]: + def varm(self) -> AxisArrays | AxisArraysView: """\ Multi-dimensional annotation of variables/features (mutable structured :class:`~numpy.ndarray`). @@ -995,7 +992,7 @@ def varm(self): self.varm = dict() @property - def obsp(self) -> Union[PairwiseArrays, PairwiseArraysView]: + def obsp(self) -> PairwiseArrays | PairwiseArraysView: """\ Pairwise annotation of observations, a mutable mapping with array-like values. @@ -1018,7 +1015,7 @@ def obsp(self): self.obsp = dict() @property - def varp(self) -> Union[PairwiseArrays, PairwiseArraysView]: + def varp(self) -> PairwiseArrays | PairwiseArraysView: """\ Pairwise annotation of variables/features, a mutable mapping with array-like values. @@ -1040,25 +1037,25 @@ def varp(self, value): def varp(self): self.varp = dict() - def obs_keys(self) -> List[str]: + def obs_keys(self) -> list[str]: """List keys of observation annotation :attr:`obs`.""" return self._obs.keys().tolist() - def var_keys(self) -> List[str]: + def var_keys(self) -> list[str]: """List keys of variable annotation :attr:`var`.""" return self._var.keys().tolist() - def obsm_keys(self) -> List[str]: + def obsm_keys(self) -> list[str]: """List keys of observation annotation :attr:`obsm`.""" return list(self._obsm.keys()) - def varm_keys(self) -> List[str]: + def varm_keys(self) -> list[str]: """List keys of variable annotation :attr:`varm`.""" return list(self._varm.keys()) - def uns_keys(self) -> List[str]: + def uns_keys(self) -> list[str]: """List keys of unstructured annotation.""" - return sorted(list(self._uns.keys())) + return sorted(self._uns.keys()) @property def isbacked(self) -> bool: @@ -1071,7 +1068,7 @@ def is_view(self) -> bool: return self._is_view @property - def filename(self) -> Optional[Path]: + def filename(self) -> Path | None: """\ Change to backing mode by setting the filename of a `.h5ad` file. @@ -1083,7 +1080,7 @@ def filename(self) -> Optional[Path]: return self.file.filename @filename.setter - def filename(self, filename: Optional[PathLike]): + def filename(self, filename: PathLike | None): # convert early for later comparison filename = None if filename is None else Path(filename) # change from backing-mode back to full loading into memory @@ -1107,10 +1104,7 @@ def filename(self, filename: Optional[PathLike]): else: # change from memory to backing-mode # write the content of self to disk - if self.raw is not None: - as_dense = ("X", "raw/X") - else: - as_dense = ("X",) + as_dense = ("X", "raw/X") if self.raw is not None else ("X",) self.write(filename, as_dense=as_dense) # open new file for accessing self.file.open(filename, "r+") @@ -1122,7 +1116,7 @@ def _set_backed(self, attr, value): write_attribute(self.file._file, attr, value) - def _normalize_indices(self, index: Optional[Index]) -> Tuple[slice, slice]: + def _normalize_indices(self, index: Index | None) -> tuple[slice, slice]: return _normalize_indices(index, self.obs_names, self.var_names) # TODO: this is not quite complete... @@ -1140,7 +1134,7 @@ def __delitem__(self, index: Index): if obs == slice(None): del self._var.iloc[var, :] - def __getitem__(self, index: Index) -> "AnnData": + def __getitem__(self, index: Index) -> AnnData: """Returns a sliced view of the object.""" oidx, vidx = self._normalize_indices(index) return AnnData(self, oidx=oidx, vidx=vidx, asview=True) @@ -1188,7 +1182,8 @@ def rename_categories(self, key: str, categories: Sequence[Any]): New categories, the same number as the old categories. """ if isinstance(categories, Mapping): - raise ValueError("Only list-like `categories` is supported.") + msg = "Only list-like `categories` is supported." + raise ValueError(msg) if key in self.obs: old_categories = self.obs[key].cat.categories.tolist() self.obs[key] = self.obs[key].cat.rename_categories(categories) @@ -1196,7 +1191,8 @@ def rename_categories(self, key: str, categories: Sequence[Any]): old_categories = self.var[key].cat.categories.tolist() self.var[key] = self.var[key].cat.rename_categories(categories) else: - raise ValueError(f"{key} is neither in `.obs` nor in `.var`.") + msg = f"{key} is neither in `.obs` nor in `.var`." + raise ValueError(msg) # this is not a good solution # but depends on the scanpy conventions for storing the categorical key # as `groupby` in the `params` slot @@ -1219,7 +1215,7 @@ def rename_categories(self, key: str, categories: Sequence[Any]): f"Omitting {k1}/{k2} as old categories do not match." ) - def strings_to_categoricals(self, df: Optional[pd.DataFrame] = None): + def strings_to_categoricals(self, df: pd.DataFrame | None = None): """\ Transform string annotations to categoricals. @@ -1261,11 +1257,8 @@ def strings_to_categoricals(self, df: Optional[pd.DataFrame] = None): if not np.array_equal(c.categories, sorted_categories): c = c.reorder_categories(sorted_categories) if dont_modify: - raise RuntimeError( - "Please call `.strings_to_categoricals()` on full " - "AnnData, not on this view. You might encounter this" - "error message while copying or writing to disk." - ) + msg = "Please call `.strings_to_categoricals()` on full AnnData, not on this view. You might encounter thiserror message while copying or writing to disk." + raise RuntimeError(msg) df[key] = c logger.info(f"... storing {key!r} as categorical") @@ -1291,10 +1284,11 @@ def _inplace_subset_obs(self, index: Index1D): # TODO: Update, possibly remove def __setitem__( - self, index: Index, val: Union[int, float, np.ndarray, sparse.spmatrix] + self, index: Index, val: int | float | np.ndarray | sparse.spmatrix ): if self.is_view: - raise ValueError("Object is view and cannot be accessed with `[]`.") + msg = "Object is view and cannot be accessed with `[]`." + raise ValueError(msg) obs, var = self._normalize_indices(index) if not self.isbacked: self._X[obs, var] = val @@ -1306,7 +1300,7 @@ def __setitem__( def __len__(self) -> int: return self.shape[0] - def transpose(self) -> "AnnData": + def transpose(self) -> AnnData: """\ Transpose whole object. @@ -1314,15 +1308,10 @@ def transpose(self) -> "AnnData": Ignores `.raw`. """ - if not self.isbacked: - X = self.X - else: - X = self.file["X"] + X = self.X if not self.isbacked else self.file["X"] if self.is_view: - raise ValueError( - "You’re trying to transpose a view of an `AnnData`, " - "which is currently not implemented. Call `.copy()` before transposing." - ) + msg = "You’re trying to transpose a view of an `AnnData`, which is currently not implemented. Call `.copy()` before transposing." + raise ValueError(msg) def t_csr(m: sparse.spmatrix) -> sparse.csr_matrix: return m.T.tocsr() if sparse.isspmatrix_csr(m) else m.T @@ -1362,7 +1351,8 @@ def to_df(self, layer=None) -> pd.DataFrame: if layer is not None: X = self.layers[layer] elif not self._has_X(): - raise ValueError("X is None, cannot convert to dataframe.") + msg = "X is None, cannot convert to dataframe." + raise ValueError(msg) else: X = self.X if issparse(X): @@ -1376,20 +1366,19 @@ def _get_X(self, use_raw=False, layer=None): """ is_layer = layer is not None if use_raw and is_layer: - raise ValueError( - "Cannot use expression from both layer and raw. You provided:" - f"`use_raw={use_raw}` and `layer={layer}`" - ) + msg = f"Cannot use expression from both layer and raw. You provided:`use_raw={use_raw}` and `layer={layer}`" + raise ValueError(msg) if is_layer: return self.layers[layer] elif use_raw: if self.raw is None: - raise ValueError("This AnnData doesn’t have a value in `.raw`.") + msg = "This AnnData doesn’t have a value in `.raw`." + raise ValueError(msg) return self.raw.X else: return self.X - def obs_vector(self, k: str, *, layer: Optional[str] = None) -> np.ndarray: + def obs_vector(self, k: str, *, layer: str | None = None) -> np.ndarray: """\ Convenience function for returning a 1 dimensional ndarray of values from :attr:`X`, :attr:`layers`\\ `[k]`, or :attr:`obs`. @@ -1421,7 +1410,7 @@ def obs_vector(self, k: str, *, layer: Optional[str] = None) -> np.ndarray: layer = None return get_vector(self, k, "obs", "var", layer=layer) - def var_vector(self, k, *, layer: Optional[str] = None) -> np.ndarray: + def var_vector(self, k, *, layer: str | None = None) -> np.ndarray: """\ Convenience function for returning a 1 dimensional ndarray of values from :attr:`X`, :attr:`layers`\\ `[k]`, or :attr:`obs`. @@ -1480,10 +1469,8 @@ def _mutated_copy(self, **kwargs): if self.isbacked and ( "X" not in kwargs or (self.raw is not None and "raw" not in kwargs) ): - raise NotImplementedError( - "This function does not currently handle backed objects " - "internally, this should be dealt with before." - ) + msg = "This function does not currently handle backed objects internally, this should be dealt with before." + raise NotImplementedError(msg) new = {} for key in ["obs", "var", "obsm", "varm", "obsp", "varp", "layers"]: @@ -1505,7 +1492,7 @@ def _mutated_copy(self, **kwargs): new["raw"] = self.raw.copy() return AnnData(**new) - def to_memory(self, copy=False) -> "AnnData": + def to_memory(self, copy=False) -> AnnData: """Return a new AnnData object with all backed arrays loaded into memory. Params @@ -1550,7 +1537,7 @@ def to_memory(self, copy=False) -> "AnnData": return AnnData(**new) - def copy(self, filename: Optional[PathLike] = None) -> "AnnData": + def copy(self, filename: PathLike | None = None) -> AnnData: """Full copy, optionally on disk.""" if not self.isbacked: if self.is_view and self._has_X(): @@ -1567,25 +1554,22 @@ def copy(self, filename: Optional[PathLike] = None) -> "AnnData": from .._io import read_h5ad, write_h5ad if filename is None: - raise ValueError( - "To copy an AnnData object in backed mode, " - "pass a filename: `.copy(filename='myfilename.h5ad')`. " - "To load the object into memory, use `.to_memory()`." - ) + msg = "To copy an AnnData object in backed mode, pass a filename: `.copy(filename='myfilename.h5ad')`. To load the object into memory, use `.to_memory()`." + raise ValueError(msg) mode = self.file._filemode write_h5ad(filename, self) return read_h5ad(filename, backed=mode) def concatenate( self, - *adatas: "AnnData", + *adatas: AnnData, join: str = "inner", batch_key: str = "batch", - batch_categories: Sequence[Any] = None, - uns_merge: Optional[str] = None, - index_unique: Optional[str] = "-", + batch_categories: Sequence[Any] | None = None, + uns_merge: str | None = None, + index_unique: str | None = "-", fill_value=None, - ) -> "AnnData": + ) -> AnnData: """\ Concatenate along the observations axis. @@ -1797,7 +1781,7 @@ def concatenate( [0., 0., 2., 1.], [0., 6., 5., 0.]], dtype=float32) """ - from .merge import concat, merge_outer, merge_dataframes, merge_same + from .merge import concat, merge_dataframes, merge_outer, merge_same warnings.warn( "The AnnData.concatenate method is deprecated in favour of the " @@ -1808,13 +1792,14 @@ def concatenate( ) if self.isbacked: - raise ValueError("Currently, concatenate only works in memory mode.") + msg = "Currently, concatenate only works in memory mode." + raise ValueError(msg) if len(adatas) == 0: return self.copy() elif len(adatas) == 1 and not isinstance(adatas[0], AnnData): adatas = adatas[0] # backwards compatibility - all_adatas = (self,) + tuple(adatas) + all_adatas = (self, *tuple(adatas)) out = concat( all_adatas, @@ -1879,41 +1864,33 @@ def _check_uniqueness(self): utils.warn_names_duplicates("var") def __contains__(self, key: Any): - raise AttributeError( - "AnnData has no attribute __contains__, don’t check `in adata`." - ) + msg = "AnnData has no attribute __contains__, don’t check `in adata`." + raise AttributeError(msg) def _check_dimensions(self, key=None): - if key is None: - key = {"obsm", "varm"} - else: - key = {key} + key = {"obsm", "varm"} if key is None else {key} if "obsm" in key: obsm = self._obsm if ( - not all([dim_len(o, 0) == self.n_obs for o in obsm.values()]) + not all(dim_len(o, 0) == self.n_obs for o in obsm.values()) and len(obsm.dim_names) != self.n_obs ): - raise ValueError( - "Observations annot. `obsm` must have number of rows of `X`" - f" ({self.n_obs}), but has {len(obsm)} rows." - ) + msg = f"Observations annot. `obsm` must have number of rows of `X` ({self.n_obs}), but has {len(obsm)} rows." + raise ValueError(msg) if "varm" in key: varm = self._varm if ( - not all([dim_len(v, 0) == self.n_vars for v in varm.values()]) + not all(dim_len(v, 0) == self.n_vars for v in varm.values()) and len(varm.dim_names) != self.n_vars ): - raise ValueError( - "Variables annot. `varm` must have number of columns of `X`" - f" ({self.n_vars}), but has {len(varm)} rows." - ) + msg = f"Variables annot. `varm` must have number of columns of `X` ({self.n_vars}), but has {len(varm)} rows." + raise ValueError(msg) def write_h5ad( self, - filename: Optional[PathLike] = None, - compression: Optional[Literal["gzip", "lzf"]] = None, - compression_opts: Union[int, Any] = None, + filename: PathLike | None = None, + compression: Literal["gzip", "lzf"] | None = None, + compression_opts: int | Any = None, as_dense: Sequence[str] = (), ): """\ @@ -1981,7 +1958,8 @@ def write_h5ad( from .._io import write_h5ad if filename is None and not self.isbacked: - raise ValueError("Provide a filename!") + msg = "Provide a filename!" + raise ValueError(msg) if filename is None: filename = self.filename @@ -2033,8 +2011,8 @@ def write_loom(self, filename: PathLike, write_obsm_varm: bool = False): def write_zarr( self, - store: Union[MutableMapping, PathLike], - chunks: Union[bool, int, Tuple[int, ...], None] = None, + store: MutableMapping | PathLike, + chunks: bool | int | tuple[int, ...] | None = None, ): """\ Write a hierarchical Zarr array store. @@ -2050,7 +2028,7 @@ def write_zarr( write_zarr(store, self, chunks=chunks) - def chunked_X(self, chunk_size: Optional[int] = None): + def chunked_X(self, chunk_size: int | None = None): """\ Return an iterator over the rows of the data matrix :attr:`X`. @@ -2073,7 +2051,7 @@ def chunked_X(self, chunk_size: Optional[int] = None): def chunk_X( self, - select: Union[int, Sequence[int], np.ndarray] = 1000, + select: int | Sequence[int] | np.ndarray = 1000, replace: bool = True, ): """\ @@ -2099,7 +2077,8 @@ def chunk_X( elif isinstance(select, (np.ndarray, cabc.Sequence)): choice = np.asarray(select) else: - raise ValueError("select should be int or array") + msg = "select should be int or array" + raise ValueError(msg) reverse = None if self.isbacked: diff --git a/anndata/_core/file_backing.py b/anndata/_core/file_backing.py index eca759dd5..b6e001622 100644 --- a/anndata/_core/file_backing.py +++ b/anndata/_core/file_backing.py @@ -1,14 +1,19 @@ +from __future__ import annotations + +from collections.abc import Iterator, Mapping from functools import singledispatch -from os import PathLike from pathlib import Path -from typing import Optional, Union, Iterator, Literal -from collections.abc import Mapping +from typing import TYPE_CHECKING, Literal import h5py -from . import anndata +from ..compat import AwkArray, DaskArray, ZarrArray, ZarrGroup from .sparse_dataset import BaseCompressedSparseDataset -from ..compat import ZarrArray, ZarrGroup, DaskArray, AwkArray + +if TYPE_CHECKING: + from os import PathLike + + from . import anndata class AnnDataFileManager: @@ -16,9 +21,9 @@ class AnnDataFileManager: def __init__( self, - adata: "anndata.AnnData", - filename: Optional[PathLike] = None, - filemode: Optional[Literal["r", "r+"]] = None, + adata: anndata.AnnData, + filename: PathLike | None = None, + filemode: Literal["r", "r+"] | None = None, ): self._adata = adata self.filename = filename @@ -41,13 +46,13 @@ def __iter__(self) -> Iterator[str]: def __getitem__( self, key: str - ) -> Union[h5py.Group, h5py.Dataset, BaseCompressedSparseDataset]: + ) -> h5py.Group | h5py.Dataset | BaseCompressedSparseDataset: return self._file[key] def __setitem__( self, key: str, - value: Union[h5py.Group, h5py.Dataset, BaseCompressedSparseDataset], + value: h5py.Group | h5py.Dataset | BaseCompressedSparseDataset, ): self._file[key] = value @@ -59,20 +64,21 @@ def filename(self) -> Path: return self._filename @filename.setter - def filename(self, filename: Optional[PathLike]): + def filename(self, filename: PathLike | None): self._filename = None if filename is None else Path(filename) def open( self, - filename: Optional[PathLike] = None, - filemode: Optional[Literal["r", "r+"]] = None, + filename: PathLike | None = None, + filemode: Literal["r", "r+"] | None = None, ): if filename is not None: self.filename = filename if filemode is not None: self._filemode = filemode if self.filename is None: - raise ValueError("Cannot open backing file if backing not initialized.") + msg = "Cannot open backing file if backing not initialized." + raise ValueError(msg) self._file = h5py.File(self.filename, self._filemode) def close(self): @@ -141,7 +147,8 @@ def _(x, copy=False): @singledispatch def filename(x): - raise NotImplementedError(f"Not implemented for {type(x)}") + msg = f"Not implemented for {type(x)}" + raise NotImplementedError(msg) @filename.register(h5py.Group) diff --git a/anndata/_core/index.py b/anndata/_core/index.py index 859c1bcdd..e934c58f5 100644 --- a/anndata/_core/index.py +++ b/anndata/_core/index.py @@ -1,18 +1,21 @@ +from __future__ import annotations + import collections.abc as cabc +from collections.abc import Sequence from functools import singledispatch from itertools import repeat -from typing import Union, Sequence, Optional, Tuple import h5py import numpy as np import pandas as pd -from scipy.sparse import spmatrix, issparse +from scipy.sparse import issparse, spmatrix + from ..compat import AwkArray, DaskArray, Index, Index1D def _normalize_indices( - index: Optional[Index], names0: pd.Index, names1: pd.Index -) -> Tuple[slice, slice]: + index: Index | None, names0: pd.Index, names1: pd.Index +) -> tuple[slice, slice]: # deal with tuples of length 1 if isinstance(index, tuple) and len(index) == 1: index = index[0] @@ -21,7 +24,8 @@ def _normalize_indices( index: Index = index.values if isinstance(index, tuple): if len(index) > 2: - raise ValueError("AnnData can only be sliced in rows and columns.") + msg = "AnnData can only be sliced in rows and columns." + raise ValueError(msg) # deal with pd.Series # TODO: The series should probably be aligned first if isinstance(index[1], pd.Series): @@ -35,17 +39,15 @@ def _normalize_indices( def _normalize_index( - indexer: Union[ - slice, - np.integer, - int, - str, - Sequence[Union[int, np.integer]], - np.ndarray, - pd.Index, - ], + indexer: slice + | np.integer + | int + | str + | Sequence[int | np.integer] + | np.ndarray + | pd.Index, index: pd.Index, -) -> Union[slice, int, np.ndarray]: # ndarray of int +) -> slice | int | np.ndarray: # ndarray of int if not isinstance(index, pd.RangeIndex): assert ( index.dtype != float and index.dtype != int @@ -84,27 +86,23 @@ def name_idx(i): return indexer # Might not work for range indexes elif issubclass(indexer.dtype.type, np.bool_): if indexer.shape != index.shape: - raise IndexError( - f"Boolean index does not match AnnData’s shape along this " - f"dimension. Boolean index has shape {indexer.shape} while " - f"AnnData index has shape {index.shape}." - ) + msg = f"Boolean index does not match AnnData’s shape along this dimension. Boolean index has shape {indexer.shape} while AnnData index has shape {index.shape}." + raise IndexError(msg) positions = np.where(indexer)[0] return positions # np.ndarray[int] else: # indexer should be string array positions = index.get_indexer(indexer) if np.any(positions < 0): not_found = indexer[positions < 0] - raise KeyError( - f"Values {list(not_found)}, from {list(indexer)}, " - "are not valid obs/ var names or indices." - ) + msg = f"Values {list(not_found)}, from {list(indexer)}, are not valid obs/ var names or indices." + raise KeyError(msg) return positions # np.ndarray[int] else: - raise IndexError(f"Unknown indexer {indexer!r} of type {type(indexer)}") + msg = f"Unknown indexer {indexer!r} of type {type(indexer)}" + raise IndexError(msg) -def unpack_index(index: Index) -> Tuple[Index1D, Index1D]: +def unpack_index(index: Index) -> tuple[Index1D, Index1D]: if not isinstance(index, tuple): return index, slice(None) elif len(index) == 2: @@ -112,11 +110,12 @@ def unpack_index(index: Index) -> Tuple[Index1D, Index1D]: elif len(index) == 1: return index[0], slice(None) else: - raise IndexError("invalid number of indices") + msg = "invalid number of indices" + raise IndexError(msg) @singledispatch -def _subset(a: Union[np.ndarray, pd.DataFrame], subset_idx: Index): +def _subset(a: np.ndarray | pd.DataFrame, subset_idx: Index): # Select as combination of indexes, not coordinates # Correcting for indexing behaviour of np.ndarray if all(isinstance(x, cabc.Iterable) for x in subset_idx): @@ -184,13 +183,11 @@ def get_vector(adata, k, coldim, idxdim, layer=None): in_idx = k in idx if (in_col + in_idx) == 2: - raise ValueError( - f"Key {k} could be found in both .{idxdim}_names and .{coldim}.columns" - ) + msg = f"Key {k} could be found in both .{idxdim}_names and .{coldim}.columns" + raise ValueError(msg) elif (in_col + in_idx) == 0: - raise KeyError( - f"Could not find key {k} in .{idxdim}_names or .{coldim}.columns." - ) + msg = f"Could not find key {k} in .{idxdim}_names or .{coldim}.columns." + raise KeyError(msg) elif in_col: return getattr(adata, coldim)[k].values elif in_idx: diff --git a/anndata/_core/merge.py b/anndata/_core/merge.py index dd193005f..18d060770 100644 --- a/anndata/_core/merge.py +++ b/anndata/_core/merge.py @@ -3,34 +3,37 @@ """ from __future__ import annotations +import typing from collections import OrderedDict from collections.abc import ( Callable, Collection, + Iterable, Mapping, MutableSet, - Iterable, Sequence, ) from functools import reduce, singledispatch from itertools import repeat from operator import and_, or_, sub -from typing import Any, Optional, TypeVar, Union, Literal -import typing -from warnings import warn, filterwarnings +from typing import Any, Literal, TypeVar +from warnings import filterwarnings, warn -from natsort import natsorted import numpy as np import pandas as pd -from pandas.api.extensions import ExtensionDtype +from natsort import natsorted from scipy import sparse -from scipy.sparse import spmatrix -from .anndata import AnnData -from ..compat import AwkArray, DaskArray, CupySparseMatrix, CupyArray, CupyCSRMatrix +from anndata._warnings import ExperimentalFeatureWarning + +from ..compat import AwkArray, CupyArray, CupyCSRMatrix, CupySparseMatrix, DaskArray from ..utils import asarray, dim_len +from .anndata import AnnData from .index import _subset, make_slice -from anndata._warnings import ExperimentalFeatureWarning + +if typing.TYPE_CHECKING: + from pandas.api.extensions import ExtensionDtype + from scipy.sparse import spmatrix T = TypeVar("T") @@ -62,14 +65,14 @@ def copy(self): def add(self, val): self.dict[val] = None - def union(self, *vals) -> "OrderedSet": + def union(self, *vals) -> OrderedSet: return reduce(or_, vals, self) def discard(self, val): if val in self: del self.dict[val] - def difference(self, *vals) -> "OrderedSet": + def difference(self, *vals) -> OrderedSet: return reduce(sub, vals, self) @@ -119,9 +122,8 @@ def equal_dask_array(a, b) -> bool: return True if a.shape != b.shape: return False - if isinstance(b, DaskArray): - if tokenize(a) == tokenize(b): - return True + if isinstance(b, DaskArray) and tokenize(a) == tokenize(b): + return True return da.equal(a, b, where=~(da.isnan(a) == da.isnan(b))).all() @@ -220,7 +222,7 @@ def unify_dtypes(dfs: Iterable[pd.DataFrame]) -> list[pd.DataFrame]: dfs = [df.copy(deep=False) for df in dfs] new_dtypes = {} - for col in dtypes.keys(): + for col in dtypes: target_dtype = try_unifying_dtype(dtypes[col]) if target_dtype is not None: new_dtypes[col] = target_dtype @@ -291,11 +293,8 @@ def check_combinable_cols(cols: list[pd.Index], join: Literal["inner", "outer"]) if len(problem_cols) > 0: problem_cols = list(problem_cols) - raise pd.errors.InvalidIndexError( - f"Cannot combine dataframes as some contained duplicated column names - " - "causing ambiguity.\n\n" - f"The problem columns are: {problem_cols}" - ) + msg = f"Cannot combine dataframes as some contained duplicated column names - causing ambiguity.\n\nThe problem columns are: {problem_cols}" + raise pd.errors.InvalidIndexError(msg) # TODO: open PR or feature request to cupy @@ -340,7 +339,7 @@ def _cpblock_diag(mats, format=None, dtype=None): ################### -def unique_value(vals: Collection[T]) -> Union[T, MissingVal]: +def unique_value(vals: Collection[T]) -> T | MissingVal: """ Given a collection vals, returns the unique value (if one exists), otherwise returns MissingValue. @@ -352,7 +351,7 @@ def unique_value(vals: Collection[T]) -> Union[T, MissingVal]: return unique_val -def first(vals: Collection[T]) -> Union[T, MissingVal]: +def first(vals: Collection[T]) -> T | MissingVal: """ Given a collection of vals, return the first non-missing one.If they're all missing, return MissingVal. @@ -363,7 +362,7 @@ def first(vals: Collection[T]) -> Union[T, MissingVal]: return MissingVal -def only(vals: Collection[T]) -> Union[T, MissingVal]: +def only(vals: Collection[T]) -> T | MissingVal: """Return the only value in the collection, otherwise MissingVal.""" if len(vals) == 1: return vals[0] @@ -436,7 +435,7 @@ def merge_only(ds: Collection[Mapping]) -> Mapping: def resolve_merge_strategy( - strategy: Union[str, Callable, None] + strategy: str | Callable | None, ) -> Callable[[Collection[Mapping]], Mapping]: if not isinstance(strategy, Callable): strategy = MERGE_STRATEGIES[strategy] @@ -647,7 +646,8 @@ def _apply_to_awkward(self, el: AwkArray, *, axis, fill_value=None): return el[self.new_idx] else: # outer join # TODO: this code isn't actually hit, we should refactor - raise Exception("This should be unreachable, please open an issue.") + msg = "This should be unreachable, please open an issue." + raise Exception(msg) else: if len(self.new_idx) > len(self.old_idx): el = ak.pad_none(el, 1, axis=axis) # axis == 0 @@ -715,9 +715,8 @@ def concat_arrays(arrays, reindexers, axis=0, index=None, fill_value=None): isinstance(a, pd.DataFrame) or a is MissingVal or 0 in a.shape for a in arrays ): - raise NotImplementedError( - "Cannot concatenate a dataframe with other array types." - ) + msg = "Cannot concatenate a dataframe with other array types." + raise NotImplementedError(msg) # TODO: behaviour here should be chosen through a merge strategy df = pd.concat( unify_dtypes([f(x) for f, x in zip(reindexers, arrays)]), @@ -732,9 +731,8 @@ def concat_arrays(arrays, reindexers, axis=0, index=None, fill_value=None): if not all( isinstance(a, AwkArray) or a is MissingVal or 0 in a.shape for a in arrays ): - raise NotImplementedError( - "Cannot concatenate an AwkwardArray with other array types." - ) + msg = "Cannot concatenate an AwkwardArray with other array types." + raise NotImplementedError(msg) return ak.concatenate([f(a) for f, a in zip(reindexers, arrays)], axis=axis) elif any(isinstance(a, CupySparseMatrix) for a in arrays): @@ -752,9 +750,8 @@ def concat_arrays(arrays, reindexers, axis=0, index=None, fill_value=None): import cupy as cp if not all(isinstance(a, CupyArray) or 0 in a.shape for a in arrays): - raise NotImplementedError( - "Cannot concatenate a cupy array with other array types." - ) + msg = "Cannot concatenate a cupy array with other array types." + raise NotImplementedError(msg) return cp.concatenate( [ f(cp.asarray(x), fill_value=fill_value, axis=1 - axis) @@ -809,9 +806,8 @@ def gen_inner_reindexers(els, new_index, axis: Literal[0, 1] = 0): reindexers = [Reindexer(df_indices(el), common_ind) for el in els] elif any(isinstance(el, AwkArray) for el in els if not_missing(el)): if not all(isinstance(el, AwkArray) for el in els if not_missing(el)): - raise NotImplementedError( - "Cannot concatenate an AwkwardArray with other array types." - ) + msg = "Cannot concatenate an AwkwardArray with other array types." + raise NotImplementedError(msg) common_keys = intersect_keys(el.fields for el in els) reindexers = [ Reindexer(pd.Index(el.fields), pd.Index(list(common_keys))) for el in els @@ -837,9 +833,8 @@ def gen_outer_reindexers(els, shapes, new_index: pd.Index, *, axis=0): import awkward as ak if not all(isinstance(el, AwkArray) for el in els if not_missing(el)): - raise NotImplementedError( - "Cannot concatenate an AwkwardArray with other array types." - ) + msg = "Cannot concatenate an AwkwardArray with other array types." + raise NotImplementedError(msg) warn( "Outer joins on awkward.Arrays will have different return values in the future." "For details, and to offer input, please see:\n\n\t" @@ -942,16 +937,17 @@ def merge_outer(mappings, batch_keys, *, join_index="-", merge=merge_unique): return out -def _resolve_dim(*, dim: str = None, axis: int = None) -> tuple[int, str]: +def _resolve_dim(*, dim: str | None = None, axis: int | None = None) -> tuple[int, str]: _dims = ("obs", "var") if (dim is None and axis is None) or (dim is not None and axis is not None): - raise ValueError( - f"Must pass exactly one of `dim` or `axis`. Got: dim={dim}, axis={axis}." - ) + msg = f"Must pass exactly one of `dim` or `axis`. Got: dim={dim}, axis={axis}." + raise ValueError(msg) elif dim is not None and dim not in _dims: - raise ValueError(f"`dim` must be one of ('obs', 'var'), was {dim}") + msg = f"`dim` must be one of ('obs', 'var'), was {dim}" + raise ValueError(msg) elif axis is not None and axis not in (0, 1): - raise ValueError(f"`axis` must be either 0 or 1, was {axis}") + msg = f"`axis` must be either 0 or 1, was {axis}" + raise ValueError(msg) if dim is not None: return _dims.index(dim), dim else: @@ -984,26 +980,23 @@ def concat_Xs(adatas, reindexers, axis, fill_value): if all(X is None for X in Xs): return None elif any(X is None for X in Xs): - raise NotImplementedError( - "Some (but not all) of the AnnData's to be concatenated had no .X value. " - "Concatenation is currently only implemented for cases where all or none of" - " the AnnData's have .X assigned." - ) + msg = "Some (but not all) of the AnnData's to be concatenated had no .X value. Concatenation is currently only implemented for cases where all or none of the AnnData's have .X assigned." + raise NotImplementedError(msg) else: return concat_arrays(Xs, reindexers, axis=axis, fill_value=fill_value) def concat( - adatas: Union[Collection[AnnData], "typing.Mapping[str, AnnData]"], + adatas: Collection[AnnData] | typing.Mapping[str, AnnData], *, axis: Literal[0, 1] = 0, join: Literal["inner", "outer"] = "inner", - merge: Union[StrategiesLiteral, Callable, None] = None, - uns_merge: Union[StrategiesLiteral, Callable, None] = None, - label: Optional[str] = None, - keys: Optional[Collection] = None, - index_unique: Optional[str] = None, - fill_value: Optional[Any] = None, + merge: StrategiesLiteral | Callable | None = None, + uns_merge: StrategiesLiteral | Callable | None = None, + label: str | None = None, + keys: Collection | None = None, + index_unique: str | None = None, + fill_value: Any | None = None, pairwise: bool = False, ) -> AnnData: """Concatenates AnnData objects along an axis. @@ -1181,10 +1174,8 @@ def concat( if isinstance(adatas, Mapping): if keys is not None: - raise TypeError( - "Cannot specify categories in both mapping keys and using `keys`. " - "Only specify this once." - ) + msg = "Cannot specify categories in both mapping keys and using `keys`. Only specify this once." + raise TypeError(msg) keys, adatas = list(adatas.keys()), list(adatas.values()) else: adatas = list(adatas) diff --git a/anndata/_core/raw.py b/anndata/_core/raw.py index baa951c56..abf8fd25a 100644 --- a/anndata/_core/raw.py +++ b/anndata/_core/raw.py @@ -1,27 +1,32 @@ -from typing import Union, Mapping, Sequence, Tuple +from __future__ import annotations + +from typing import TYPE_CHECKING import h5py import numpy as np import pandas as pd -from scipy import sparse from scipy.sparse import issparse +from ..compat import CupyArray, CupySparseMatrix from . import anndata -from .index import _normalize_index, _subset, unpack_index, get_vector from .aligned_mapping import AxisArrays +from .index import _normalize_index, _subset, get_vector, unpack_index from .sparse_dataset import BaseCompressedSparseDataset, sparse_dataset -from ..compat import CupyArray, CupySparseMatrix +if TYPE_CHECKING: + from collections.abc import Mapping, Sequence + + from scipy import sparse # TODO: Implement views for Raw class Raw: def __init__( self, - adata: "anndata.AnnData", - X: Union[np.ndarray, sparse.spmatrix, None] = None, - var: Union[pd.DataFrame, Mapping[str, Sequence], None] = None, - varm: Union[AxisArrays, Mapping[str, np.ndarray], None] = None, + adata: anndata.AnnData, + X: np.ndarray | sparse.spmatrix | None = None, + var: pd.DataFrame | Mapping[str, Sequence] | None = None, + varm: AxisArrays | Mapping[str, np.ndarray] | None = None, ): from .anndata import _gen_dataframe @@ -48,7 +53,8 @@ def __init__( self._var = adata.var.copy() self._varm = AxisArrays(self, 1, adata.varm.copy()) elif adata.isbacked: - raise ValueError("Cannot specify X if adata is backed") + msg = "Cannot specify X if adata is backed" + raise ValueError(msg) def _get_X(self, layer=None): if layer is not None: @@ -56,7 +62,7 @@ def _get_X(self, layer=None): return self.X @property - def X(self) -> Union[BaseCompressedSparseDataset, np.ndarray, sparse.spmatrix]: + def X(self) -> BaseCompressedSparseDataset | np.ndarray | sparse.spmatrix: # TODO: Handle unsorted array of integer indices for h5py.Datasets if not self._adata.isbacked: return self._X @@ -68,10 +74,8 @@ def X(self) -> Union[BaseCompressedSparseDataset, np.ndarray, sparse.spmatrix]: elif "raw.X" in self._adata.file: X = self._adata.file["raw.X"] # Backwards compat else: - raise AttributeError( - f"Could not find dataset for raw X in file: " - f"{self._adata.file.filename}." - ) + msg = f"Could not find dataset for raw X in file: {self._adata.file.filename}." + raise AttributeError(msg) if isinstance(X, h5py.Group): X = sparse_dataset(X) # Check if we need to subset @@ -119,10 +123,7 @@ def __getitem__(self, index): if isinstance(oidx, (int, np.integer)): oidx = slice(oidx, oidx + 1, 1) - if not self._adata.isbacked: - X = _subset(self.X, (oidx, vidx)) - else: - X = None + X = _subset(self.X, (oidx, vidx)) if not self._adata.isbacked else None var = self._var.iloc[vidx] new = Raw(self._adata, X=X, var=var) @@ -191,12 +192,12 @@ def obs_vector(self, k: str) -> np.ndarray: # This exists to accommodate AlignedMappings, # until we implement a proper RawView or get rid of Raw in favor of modes. class _RawViewHack: - def __init__(self, raw: Raw, vidx: Union[slice, np.ndarray]): + def __init__(self, raw: Raw, vidx: slice | np.ndarray): self.parent_raw = raw self.vidx = vidx @property - def shape(self) -> Tuple[int, int]: + def shape(self) -> tuple[int, int]: return self.parent_raw.n_obs, len(self.var_names) @property diff --git a/anndata/_core/sparse_dataset.py b/anndata/_core/sparse_dataset.py index 83eb4befa..071205b19 100644 --- a/anndata/_core/sparse_dataset.py +++ b/anndata/_core/sparse_dataset.py @@ -12,17 +12,18 @@ # - think about supporting the COO format from __future__ import annotations -from abc import ABC import collections.abc as cabc -from itertools import accumulate, chain -from typing import Literal, Union, NamedTuple, Tuple, Sequence, Iterable, Type import warnings +from abc import ABC +from itertools import accumulate, chain +from typing import TYPE_CHECKING, Literal, NamedTuple import h5py import numpy as np import scipy.sparse as ss from scipy.sparse import _sparsetools -from anndata.compat import ZarrGroup, H5Group + +from anndata.compat import H5Group, ZarrGroup from ..compat import _read_attr @@ -32,13 +33,16 @@ except ImportError: _cs_matrix = ss.spmatrix -from .index import unpack_index, Index, _subset +from .index import Index, _subset, unpack_index + +if TYPE_CHECKING: + from collections.abc import Iterable, Sequence class BackedFormat(NamedTuple): format: str - backed_type: Type["BackedSparseMatrix"] - memory_type: Type[ss.spmatrix] + backed_type: type[BackedSparseMatrix] + memory_type: type[ss.spmatrix] class BackedSparseMatrix(_cs_matrix): @@ -74,9 +78,8 @@ def _set_many(self, i: Iterable[int], j: Iterable[int], x): return else: - raise ValueError( - "You cannot change the sparsity structure of a SparseDataset." - ) + msg = "You cannot change the sparsity structure of a SparseDataset." + raise ValueError(msg) # replace where possible # mask = offsets > -1 # # offsets[mask] @@ -192,7 +195,7 @@ def slice_as_int(s: slice, l: int) -> int: def get_compressed_vectors( x: BackedSparseMatrix, row_idxs: Iterable[int] -) -> Tuple[Sequence, Sequence, Sequence]: +) -> tuple[Sequence, Sequence, Sequence]: slices = [slice(*(x.indptr[i : i + 2])) for i in row_idxs] data = np.concatenate([x.data[s] for s in slices]) indices = np.concatenate([x.indices[s] for s in slices]) @@ -202,7 +205,7 @@ def get_compressed_vectors( def get_compressed_vector( x: BackedSparseMatrix, idx: int -) -> Tuple[Sequence, Sequence, Sequence]: +) -> tuple[Sequence, Sequence, Sequence]: s = slice(*(x.indptr[idx : idx + 2])) data = x.data[s] indices = x.indices[s] @@ -214,21 +217,24 @@ def get_format(data: ss.spmatrix) -> str: for fmt, _, memory_class in FORMATS: if isinstance(data, memory_class): return fmt - raise ValueError(f"Data type {type(data)} is not supported.") + msg = f"Data type {type(data)} is not supported." + raise ValueError(msg) -def get_memory_class(format: str) -> Type[ss.spmatrix]: +def get_memory_class(format: str) -> type[ss.spmatrix]: for fmt, _, memory_class in FORMATS: if format == fmt: return memory_class - raise ValueError(f"Format string {format} is not supported.") + msg = f"Format string {format} is not supported." + raise ValueError(msg) -def get_backed_class(format: str) -> Type[BackedSparseMatrix]: +def get_backed_class(format: str) -> type[BackedSparseMatrix]: for fmt, backed_class, _ in FORMATS: if format == fmt: return backed_class - raise ValueError(f"Format string {format} is not supported.") + msg = f"Format string {format} is not supported." + raise ValueError(msg) def _get_group_format(group) -> str: @@ -258,7 +264,8 @@ def backend(self) -> Literal["zarr", "hdf5"]: elif isinstance(self.group, H5Group): return "hdf5" else: - raise ValueError(f"Unknown group type {type(self.group)}") + msg = f"Unknown group type {type(self.group)}" + raise ValueError(msg) @property def dtype(self) -> np.dtype: @@ -284,7 +291,7 @@ def name(self) -> str: return self.group.name @property - def shape(self) -> Tuple[int, int]: + def shape(self) -> tuple[int, int]: shape = _read_attr(self.group.attrs, "shape", None) if shape is None: # TODO warn @@ -304,7 +311,7 @@ def value(self) -> ss.spmatrix: def __repr__(self) -> str: return f"{type(self).__name__}: backend {self.backend}, shape {self.shape}, data_dtype {self.dtype}" - def __getitem__(self, index: Union[Index, Tuple[()]]) -> Union[float, ss.spmatrix]: + def __getitem__(self, index: Index | tuple[()]) -> float | ss.spmatrix: row, col = self._normalize_index(index) mtx = self._to_backed() sub = mtx[row, col] @@ -316,8 +323,8 @@ def __getitem__(self, index: Union[Index, Tuple[()]]) -> Union[float, ss.spmatri return sub def _normalize_index( - self, index: Union[Index, Tuple[()]] - ) -> Tuple[np.ndarray, np.ndarray]: + self, index: Index | tuple[()] + ) -> tuple[np.ndarray, np.ndarray]: if index == (): index = slice(None) row, col = unpack_index(index) @@ -325,7 +332,7 @@ def _normalize_index( row, col = np.ix_(row, col) return row, col - def __setitem__(self, index: Union[Index, Tuple[()]], value): + def __setitem__(self, index: Index | tuple[()], value): warnings.warn( "__setitem__ will likely be removed in the near future. We do not recommend relying on its stability.", PendingDeprecationWarning, @@ -343,19 +350,14 @@ def append(self, sparse_matrix: ss.spmatrix): # Check input if not ss.isspmatrix(sparse_matrix): - raise NotImplementedError( - "Currently, only sparse matrices of equivalent format can be " - "appended to a SparseDataset." - ) + msg = "Currently, only sparse matrices of equivalent format can be appended to a SparseDataset." + raise NotImplementedError(msg) if self.format not in {"csr", "csc"}: - raise NotImplementedError( - f"The append method for format {self.format} " f"is not implemented." - ) + msg = f"The append method for format {self.format} is not implemented." + raise NotImplementedError(msg) if self.format != get_format(sparse_matrix): - raise ValueError( - f"Matrices must have same format. Currently are " - f"{self.format!r} and {get_format(sparse_matrix)!r}" - ) + msg = f"Matrices must have same format. Currently are {self.format!r} and {get_format(sparse_matrix)!r}" + raise ValueError(msg) # shape if self.format == "csr": @@ -369,7 +371,8 @@ def append(self, sparse_matrix: ss.spmatrix): ), "CSC matrices must have same size of dimension 0 to be appended." new_shape = (shape[0], shape[1] + sparse_matrix.shape[1]) else: - assert False, "We forgot to update this branching to a new format" + msg = "We forgot to update this branching to a new format" + raise AssertionError(msg) if "h5sparse_shape" in self.group.attrs: del self.group.attrs["h5sparse_shape"] self.group.attrs["shape"] = new_shape diff --git a/anndata/_core/views.py b/anndata/_core/views.py index 7e38266a6..3718d99df 100644 --- a/anndata/_core/views.py +++ b/anndata/_core/views.py @@ -1,28 +1,32 @@ from __future__ import annotations +import warnings from contextlib import contextmanager from copy import deepcopy -from collections.abc import Sequence, KeysView, Callable, Iterable from functools import reduce, singledispatch, wraps -from typing import Any, ClassVar, Literal -import warnings +from typing import TYPE_CHECKING, Any, ClassVar, Literal import numpy as np import pandas as pd from pandas.api.types import is_bool_dtype from scipy import sparse -import anndata from anndata._warnings import ImplicitModificationWarning -from .access import ElementRef + from ..compat import ( - ZappyArray, AwkArray, - DaskArray, CupyArray, CupyCSCMatrix, CupyCSRMatrix, + DaskArray, + ZappyArray, ) +from .access import ElementRef + +if TYPE_CHECKING: + from collections.abc import Callable, Iterable, KeysView, Sequence + + import anndata @contextmanager @@ -79,7 +83,7 @@ class _ViewMixin(_SetItemMixin): def __init__( self, *args, - view_args: tuple["anndata.AnnData", str, tuple[str, ...]] = None, + view_args: tuple[anndata.AnnData, str, tuple[str, ...]] | None = None, **kwargs, ): if view_args is not None: @@ -100,7 +104,7 @@ class ArrayView(_SetItemMixin, np.ndarray): def __new__( cls, input_array: Sequence[Any], - view_args: tuple["anndata.AnnData", str, tuple[str, ...]] = None, + view_args: tuple[anndata.AnnData, str, tuple[str, ...]] | None = None, ): arr = np.asanyarray(input_array).view(cls) @@ -172,7 +176,7 @@ class DaskArrayView(_SetItemMixin, DaskArray): def __new__( cls, input_array: DaskArray, - view_args: tuple["anndata.AnnData", str, tuple[str, ...]] = None, + view_args: tuple[anndata.AnnData, str, tuple[str, ...]] | None = None, ): arr = super().__new__( cls, @@ -226,7 +230,7 @@ class CupyArrayView(_ViewMixin, CupyArray): def __new__( cls, input_array: Sequence[Any], - view_args: tuple["anndata.AnnData", str, tuple[str, ...]] = None, + view_args: tuple[anndata.AnnData, str, tuple[str, ...]] | None = None, ): import cupy as cp @@ -260,7 +264,8 @@ def drop(self, *args, inplace: bool = False, **kw): @singledispatch def as_view(obj, view_args): - raise NotImplementedError(f"No view type has been registered for {type(obj)}") + msg = f"No view type has been registered for {type(obj)}" + raise NotImplementedError(msg) @as_view.register(np.ndarray) @@ -316,9 +321,10 @@ def as_view_cupy_csc(mtx, view_args): try: - from ..compat import awkward as ak import weakref + from ..compat import awkward as ak + # Registry to store weak references from AwkwardArrayViews to their parent AnnData container _registry = weakref.WeakValueDictionary() _PARAM_NAME = "_view_args" @@ -362,10 +368,8 @@ def as_view_awkarray(array, view_args): # A better solution might be based on xarray-style "attrs", once this is implemented # https://github.com/scikit-hep/awkward/issues/1391#issuecomment-1412297114 if type(array).__name__ != "Array": - raise NotImplementedError( - "Cannot create a view of an awkward array with __array__ parameter. " - "Please open an issue in the AnnData repo and describe your use-case." - ) + msg = "Cannot create a view of an awkward array with __array__ parameter. Please open an issue in the AnnData repo and describe your use-case." + raise NotImplementedError(msg) array = ak.with_parameter(array, _PARAM_NAME, (parent_key, attrname, keys)) array = ak.with_parameter(array, "__list__", "AwkwardArrayView") return array diff --git a/anndata/_io/__init__.py b/anndata/_io/__init__.py index f305d42ab..7bb6ba506 100644 --- a/anndata/_io/__init__.py +++ b/anndata/_io/__init__.py @@ -1,3 +1,6 @@ +from __future__ import annotations + +from .h5ad import read_h5ad, write_h5ad from .read import ( read_csv, read_excel, @@ -8,7 +11,6 @@ read_umi_tools, read_zarr, ) -from .h5ad import read_h5ad, write_h5ad from .write import write_csvs, write_loom diff --git a/anndata/_io/h5ad.py b/anndata/_io/h5ad.py index 5ba94e8bd..d7dd4cd8e 100644 --- a/anndata/_io/h5ad.py +++ b/anndata/_io/h5ad.py @@ -1,41 +1,50 @@ +from __future__ import annotations + import re from functools import partial -from warnings import warn from pathlib import Path from types import MappingProxyType -from typing import Callable, Type, TypeVar, Union, Literal -from typing import Collection, Sequence, Mapping +from typing import ( + TYPE_CHECKING, + Callable, + Literal, + TypeVar, +) +from warnings import warn import h5py import numpy as np import pandas as pd from scipy import sparse -from .._core.sparse_dataset import BaseCompressedSparseDataset -from .._core.file_backing import AnnDataFileManager, filename +from anndata._warnings import OldFormatWarning + from .._core.anndata import AnnData +from .._core.file_backing import AnnDataFileManager, filename +from .._core.sparse_dataset import BaseCompressedSparseDataset from ..compat import ( - _from_fixed_length_strings, - _decode_structured_array, _clean_uns, + _decode_structured_array, + _from_fixed_length_strings, ) from ..experimental import read_dispatched +from .specs import read_elem, write_elem from .utils import ( H5PY_V3, + _read_legacy_raw, + idx_chunks_along_axis, report_read_key_on_error, report_write_key_on_error, - idx_chunks_along_axis, - _read_legacy_raw, ) -from .specs import read_elem, write_elem -from anndata._warnings import OldFormatWarning +if TYPE_CHECKING: + from collections.abc import Collection, Mapping, Sequence T = TypeVar("T") def write_h5ad( - filepath: Union[Path, str], + filepath: Path | str, adata: AnnData, *, as_dense: Sequence[str] = (), @@ -48,11 +57,11 @@ def write_h5ad( as_dense = list(as_dense) as_dense[as_dense.index("raw.X")] = "raw/X" if any(val not in {"X", "raw/X"} for val in as_dense): - raise NotImplementedError( - "Currently, only `X` and `raw/X` are supported values in `as_dense`" - ) + msg = "Currently, only `X` and `raw/X` are supported values in `as_dense`" + raise NotImplementedError(msg) if "raw/X" in as_dense and adata.raw is None: - raise ValueError("Cannot specify writing `raw/X` to dense if it doesn’t exist.") + msg = "Cannot specify writing `raw/X` to dense if it doesn’t exist." + raise ValueError(msg) adata.strings_to_categoricals() if adata.raw is not None: @@ -122,7 +131,7 @@ def write_sparse_as_dense(f, key, value, dataset_kwargs=MappingProxyType({})): del f[key] -def read_h5ad_backed(filename: Union[str, Path], mode: Literal["r", "r+"]) -> AnnData: +def read_h5ad_backed(filename: str | Path, mode: Literal["r", "r+"]) -> AnnData: d = dict(filename=filename, filemode=mode) f = h5py.File(filename, mode) @@ -151,11 +160,11 @@ def read_h5ad_backed(filename: Union[str, Path], mode: Literal["r", "r+"]) -> An def read_h5ad( - filename: Union[str, Path], - backed: Union[Literal["r", "r+"], bool, None] = None, + filename: str | Path, + backed: Literal["r", "r+"] | bool | None = None, *, as_sparse: Sequence[str] = (), - as_sparse_fmt: Type[sparse.spmatrix] = sparse.csr_matrix, + as_sparse_fmt: type[sparse.spmatrix] = sparse.csr_matrix, chunk_size: int = 6000, # TODO, probably make this 2d chunks ) -> AnnData: """\ @@ -197,20 +206,15 @@ def read_h5ad( return read_h5ad_backed(filename, mode) if as_sparse_fmt not in (sparse.csr_matrix, sparse.csc_matrix): - raise NotImplementedError( - "Dense formats can only be read to CSR or CSC matrices at this time." - ) - if isinstance(as_sparse, str): - as_sparse = [as_sparse] - else: - as_sparse = list(as_sparse) + msg = "Dense formats can only be read to CSR or CSC matrices at this time." + raise NotImplementedError(msg) + as_sparse = [as_sparse] if isinstance(as_sparse, str) else list(as_sparse) for i in range(len(as_sparse)): if as_sparse[i] in {("raw", "X"), "raw.X"}: as_sparse[i] = "raw/X" elif as_sparse[i] not in {"raw/X", "X"}: - raise NotImplementedError( - "Currently only `X` and `raw/X` can be read as sparse." - ) + msg = "Currently only `X` and `raw/X` can be read as sparse." + raise NotImplementedError(msg) rdasp = partial( read_dense_as_sparse, sparse_format=as_sparse_fmt, axis_chunk=chunk_size @@ -225,7 +229,7 @@ def callback(func, elem_name: str, elem, iospec): # This is covering up backwards compat in the anndata initializer # In most cases we should be able to call `func(elen[k])` instead k: read_dispatched(elem[k], callback) - for k in elem.keys() + for k in elem if not k.startswith("raw.") } ) @@ -256,9 +260,9 @@ def callback(func, elem_name: str, elem, iospec): def _read_raw( - f: Union[h5py.File, AnnDataFileManager], + f: h5py.File | AnnDataFileManager, as_sparse: Collection[str] = (), - rdasp: Callable[[h5py.Dataset], sparse.spmatrix] = None, + rdasp: Callable[[h5py.Dataset], sparse.spmatrix] | None = None, *, attrs: Collection[str] = ("X", "var", "varm"), ) -> dict: @@ -338,7 +342,8 @@ def read_dense_as_sparse( elif sparse_format == sparse.csc_matrix: return read_dense_as_csc(dataset, axis_chunk) else: - raise ValueError(f"Cannot read dense array as type: {sparse_format}") + msg = f"Cannot read dense array as type: {sparse_format}" + raise ValueError(msg) def read_dense_as_csr(dataset, axis_chunk=6000): diff --git a/anndata/_io/read.py b/anndata/_io/read.py index 91115a81b..5c34fe24e 100644 --- a/anndata/_io/read.py +++ b/anndata/_io/read.py @@ -1,11 +1,12 @@ -from pathlib import Path +from __future__ import annotations + +import bz2 +import gzip +from collections import OrderedDict from os import PathLike, fspath +from pathlib import Path from types import MappingProxyType -from typing import Union, Optional, Mapping, Tuple -from typing import Iterable, Iterator, Generator -from collections import OrderedDict -import gzip -import bz2 +from typing import TYPE_CHECKING from warnings import warn import h5py @@ -17,6 +18,9 @@ from ..compat import _deprecate_positional_args from .utils import is_float +if TYPE_CHECKING: + from collections.abc import Generator, Iterable, Iterator, Mapping + try: from .zarr import read_zarr except ImportError as _e: @@ -27,9 +31,9 @@ def read_zarr(*_, **__): def read_csv( - filename: Union[PathLike, Iterator[str]], - delimiter: Optional[str] = ",", - first_column_names: Optional[bool] = None, + filename: PathLike | Iterator[str], + delimiter: str | None = ",", + first_column_names: bool | None = None, dtype: str = "float32", ) -> AnnData: """\ @@ -53,9 +57,7 @@ def read_csv( return read_text(filename, delimiter, first_column_names, dtype) -def read_excel( - filename: PathLike, sheet: Union[str, int], dtype: str = "float32" -) -> AnnData: +def read_excel(filename: PathLike, sheet: str | int, dtype: str = "float32") -> AnnData: """\ Read `.xlsx` (Excel) file. @@ -118,12 +120,10 @@ def read_hdf(filename: PathLike, key: str) -> AnnData: with h5py.File(filename, "r") as f: # the following is necessary in Python 3, because only # a view and not a list is returned - keys = [k for k in f.keys()] + keys = list(f.keys()) if key == "": - raise ValueError( - f"The file {filename} stores the following sheets:\n{keys}\n" - f"Call read/read_hdf5 with one of them." - ) + msg = f"The file {filename} stores the following sheets:\n{keys}\nCall read/read_hdf5 with one of them." + raise ValueError(msg) # read array X = f[key][()] # try to find row and column names @@ -137,7 +137,7 @@ def read_hdf(filename: PathLike, key: str) -> AnnData: def _fmt_loom_axis_attrs( input: Mapping, idx_name: str, dimm_mapping: Mapping[str, Iterable[str]] -) -> Tuple[pd.DataFrame, Mapping[str, np.ndarray]]: +) -> tuple[pd.DataFrame, Mapping[str, np.ndarray]]: axis_df = pd.DataFrame() axis_mapping = {} for key, names in dimm_mapping.items(): @@ -163,9 +163,9 @@ def read_loom( cleanup: bool = False, X_name: str = "spliced", obs_names: str = "CellID", - obsm_names: Optional[Mapping[str, Iterable[str]]] = None, + obsm_names: Mapping[str, Iterable[str]] | None = None, var_names: str = "Gene", - varm_names: Optional[Mapping[str, Iterable[str]]] = None, + varm_names: Mapping[str, Iterable[str]] | None = None, dtype: str = "float32", obsm_mapping: Mapping[str, Iterable[str]] = MappingProxyType({}), varm_mapping: Mapping[str, Iterable[str]] = MappingProxyType({}), @@ -225,10 +225,8 @@ def read_loom( FutureWarning, ) if obsm_mapping != {}: - raise ValueError( - "Received values for both `obsm_names` and `obsm_mapping`. This is " - "ambiguous, only pass `obsm_mapping`." - ) + msg = "Received values for both `obsm_names` and `obsm_mapping`. This is ambiguous, only pass `obsm_mapping`." + raise ValueError(msg) obsm_mapping = obsm_names if varm_names is not None: warn( @@ -237,17 +235,15 @@ def read_loom( FutureWarning, ) if varm_mapping != {}: - raise ValueError( - "Received values for both `varm_names` and `varm_mapping`. This is " - "ambiguous, only pass `varm_mapping`." - ) + msg = "Received values for both `varm_names` and `varm_mapping`. This is ambiguous, only pass `varm_mapping`." + raise ValueError(msg) varm_mapping = varm_names filename = fspath(filename) # allow passing pathlib.Path objects from loompy import connect with connect(filename, "r", **kwargs) as lc: - if X_name not in lc.layers.keys(): + if X_name not in lc.layers: X_name = "" X = lc.layers[X_name].sparse().T.tocsr() if sparse else lc.layers[X_name][()].T X = X.astype(dtype, copy=False) @@ -257,7 +253,7 @@ def read_loom( layers["matrix"] = ( lc.layers[""].sparse().T.tocsr() if sparse else lc.layers[""][()].T ) - for key in lc.layers.keys(): + for key in lc.layers: if key != "": layers[key] = ( lc.layers[key].sparse().T.tocsr() @@ -320,9 +316,9 @@ def read_mtx(filename: PathLike, dtype: str = "float32") -> AnnData: def read_text( - filename: Union[PathLike, Iterator[str]], - delimiter: Optional[str] = None, - first_column_names: Optional[bool] = None, + filename: PathLike | Iterator[str], + delimiter: str | None = None, + first_column_names: bool | None = None, dtype: str = "float32", ) -> AnnData: """\ @@ -368,8 +364,8 @@ def _iter_lines(file_like: Iterable[str]) -> Generator[str, None, None]: def _read_text( f: Iterator[str], - delimiter: Optional[str], - first_column_names: Optional[bool], + delimiter: str | None, + first_column_names: bool | None, dtype: str, ) -> AnnData: comments = [] @@ -385,7 +381,8 @@ def _read_text( comments.append(comment) else: if delimiter is not None and delimiter not in line: - raise ValueError(f"Did not find delimiter {delimiter!r} in first line.") + msg = f"Did not find delimiter {delimiter!r} in first line." + raise ValueError(msg) line_list = line.split(delimiter) # the first column might be row names, so check the last if not is_float(line_list[-1]): @@ -447,10 +444,8 @@ def _read_text( # in the end, to separate row_names from float data, slicing takes # a lot of memory and CPU time if data[0].size != data[-1].size: - raise ValueError( - f"Length of first line ({data[0].size}) is different " - f"from length of last line ({data[-1].size})." - ) + msg = f"Length of first line ({data[0].size}) is different from length of last line ({data[-1].size})." + raise ValueError(msg) data = np.array(data, dtype=dtype) # logg.msg(" constructed array from list of list", t=True, v=4) # transform row_names diff --git a/anndata/_io/specs/__init__.py b/anndata/_io/specs/__init__.py index 28281a1e0..ceff8b3d6 100644 --- a/anndata/_io/specs/__init__.py +++ b/anndata/_io/specs/__init__.py @@ -1,6 +1,15 @@ +from __future__ import annotations + from . import methods -from .registry import write_elem, get_spec, read_elem, Reader, Writer, IOSpec -from .registry import _REGISTRY # noqa: F401 +from .registry import ( + _REGISTRY, # noqa: F401 + IOSpec, + Reader, + Writer, + get_spec, + read_elem, + write_elem, +) __all__ = [ "methods", diff --git a/anndata/_io/specs/methods.py b/anndata/_io/specs/methods.py index 6ca9102b7..a9b63e860 100644 --- a/anndata/_io/specs/methods.py +++ b/anndata/_io/specs/methods.py @@ -1,11 +1,10 @@ from __future__ import annotations -from os import PathLike from collections.abc import Mapping -from itertools import product from functools import partial -from typing import Union, Literal +from itertools import product from types import MappingProxyType +from typing import TYPE_CHECKING, Literal from warnings import warn import h5py @@ -15,24 +14,30 @@ import anndata as ad from anndata import AnnData, Raw +from anndata._core import views from anndata._core.index import _normalize_indices from anndata._core.merge import intersect_keys from anndata._core.sparse_dataset import CSCDataset, CSRDataset, sparse_dataset -from anndata._core import views +from anndata._io.utils import H5PY_V3, check_key +from anndata._warnings import OldFormatWarning from anndata.compat import ( + AwkArray, + CupyArray, + CupyCSCMatrix, + CupyCSRMatrix, + DaskArray, ZarrArray, ZarrGroup, - DaskArray, - _read_attr, - _from_fixed_length_strings, _decode_structured_array, + _from_fixed_length_strings, + _read_attr, ) -from anndata._io.utils import check_key, H5PY_V3 -from anndata._warnings import OldFormatWarning -from anndata.compat import AwkArray, CupyArray, CupyCSRMatrix, CupyCSCMatrix from .registry import _REGISTRY, IOSpec, read_elem, read_elem_partial +if TYPE_CHECKING: + from os import PathLike + H5Array = h5py.Dataset H5Group = h5py.Group H5File = h5py.File @@ -224,16 +229,10 @@ def read_partial( def _read_partial(group, *, items=None, indices=(slice(None), slice(None))): if group is None: return None - if items is None: - keys = intersect_keys((group,)) - else: - keys = intersect_keys((group, items)) + keys = intersect_keys((group,)) if items is None else intersect_keys((group, items)) result = {} for k in keys: - if isinstance(items, Mapping): - next_items = items.get(k, None) - else: - next_items = None + next_items = items.get(k, None) if isinstance(items, Mapping) else None result[k] = read_elem_partial(group[k], items=next_items, indices=indices) return result @@ -358,9 +357,8 @@ def write_basic_dask_h5(f, k, elem, _writer, dataset_kwargs=MappingProxyType({}) import dask.config as dc if dc.get("scheduler", None) == "dask.distributed": - raise ValueError( - "Cannot write dask arrays to hdf5 when using distributed scheduler" - ) + msg = "Cannot write dask arrays to hdf5 when using distributed scheduler" + raise ValueError(msg) g = f.require_dataset(k, shape=elem.shape, dtype=elem.dtype, **dataset_kwargs) da.store(elem, g) @@ -590,7 +588,7 @@ def read_awkward(elem, _reader): form = _read_attr(elem.attrs, "form") length = _read_attr(elem.attrs, "length") - container = {k: _reader.read_elem(elem[k]) for k in elem.keys()} + container = {k: _reader.read_elem(elem[k]) for k in elem} return ak.from_buffers(form, length, container) @@ -608,15 +606,13 @@ def write_dataframe(f, key, df, _writer, dataset_kwargs=MappingProxyType({})): # Check arguments for reserved in ("_index",): if reserved in df.columns: - raise ValueError(f"{reserved!r} is a reserved name for dataframe columns.") + msg = f"{reserved!r} is a reserved name for dataframe columns." + raise ValueError(msg) group = f.require_group(key) col_names = [check_key(c) for c in df.columns] group.attrs["column-order"] = col_names - if df.index.name is not None: - index_name = df.index.name - else: - index_name = "_index" + index_name = df.index.name if df.index.name is not None else "_index" group.attrs["_index"] = check_key(index_name) # ._values is "the best" array representation. It's the true array backing the @@ -688,7 +684,7 @@ def read_dataframe_0_1_0(elem, _reader): return df -def read_series(dataset: h5py.Dataset) -> Union[np.ndarray, pd.Categorical]: +def read_series(dataset: h5py.Dataset) -> np.ndarray | pd.Categorical: # For reading older dataframes if "categories" in dataset.attrs: if isinstance(dataset, ZarrArray): @@ -713,10 +709,7 @@ def read_series(dataset: h5py.Dataset) -> Union[np.ndarray, pd.Categorical]: def read_partial_dataframe_0_1_0( elem, *, items=None, indices=(slice(None), slice(None)) ): - if items is None: - items = slice(None) - else: - items = list(items) + items = slice(None) if items is None else list(items) return read_elem(elem)[items].iloc[indices[0]] diff --git a/anndata/_io/specs/registry.py b/anndata/_io/specs/registry.py index 07ddbb744..216aa8bac 100644 --- a/anndata/_io/specs/registry.py +++ b/anndata/_io/specs/registry.py @@ -1,14 +1,16 @@ from __future__ import annotations -from collections.abc import Mapping, Callable, Iterable +from collections.abc import Callable, Iterable, Mapping from dataclasses import dataclass from functools import singledispatch, wraps from types import MappingProxyType -from typing import Any, Union +from typing import TYPE_CHECKING, Any +from anndata._io.utils import report_read_key_on_error, report_write_key_on_error from anndata.compat import _read_attr -from anndata._types import StorageType, GroupStorageType -from anndata._io.utils import report_write_key_on_error, report_read_key_on_error + +if TYPE_CHECKING: + from anndata._types import GroupStorageType, StorageType # TODO: This probably should be replaced by a hashable Mapping due to conversion b/w "_" and "-" # TODO: Should filetype be included in the IOSpec if it changes the encoding? Or does the intent that these things be "the same" overrule that? @@ -66,7 +68,7 @@ def __init__(self): self.write: dict[ tuple[type, type | tuple[type, str], frozenset[str]], Callable ] = {} - self.write_specs: dict[Union[type, tuple[type, str]], IOSpec] = {} + self.write_specs: dict[type | tuple[type, str], IOSpec] = {} def register_write( self, @@ -82,10 +84,8 @@ def register_write( if src_type in self.write_specs and (spec != self.write_specs[src_type]): # First check for consistency current_spec = self.write_specs[src_type] - raise TypeError( - "Cannot overwrite IO specifications. Attempted to overwrite encoding " - f"for {src_type} from {current_spec} to {spec}" - ) + msg = f"Cannot overwrite IO specifications. Attempted to overwrite encoding for {src_type} from {current_spec} to {spec}" + raise TypeError(msg) else: self.write_specs[src_type] = spec @@ -140,9 +140,8 @@ def get_reader( if (src_type, spec, modifiers) in self.read: return self.read[(src_type, spec, modifiers)] else: - raise IORegistryError._from_read_parts( - "read", _REGISTRY.read, src_type, spec - ) + msg = "read" + raise IORegistryError._from_read_parts(msg, _REGISTRY.read, src_type, spec) def has_reader( self, src_type: type, spec: IOSpec, modifiers: frozenset[str] = frozenset() @@ -170,8 +169,9 @@ def get_partial_reader( if (src_type, spec, modifiers) in self.read_partial: return self.read_partial[(src_type, spec, modifiers)] else: + msg = "read_partial" raise IORegistryError._from_read_parts( - "read_partial", _REGISTRY.read_partial, src_type, spec + msg, _REGISTRY.read_partial, src_type, spec ) def get_spec(self, elem: Any) -> IOSpec: @@ -187,7 +187,8 @@ def get_spec(self, elem: Any) -> IOSpec: @singledispatch def proc_spec(spec) -> IOSpec: - raise NotImplementedError(f"proc_spec not defined for type: {type(spec)}.") + msg = f"proc_spec not defined for type: {type(spec)}." + raise NotImplementedError(msg) @proc_spec.register(IOSpec) @@ -212,9 +213,7 @@ def get_spec( class Reader: - def __init__( - self, registry: IORegistry, callback: Union[Callable, None] = None - ) -> None: + def __init__(self, registry: IORegistry, callback: Callable | None = None) -> None: self.registry = registry self.callback = callback @@ -241,18 +240,16 @@ class Writer: def __init__( self, registry: IORegistry, - callback: Union[ - Callable[ - [ - GroupStorageType, - str, - StorageType, - dict, - ], - None, + callback: Callable[ + [ + GroupStorageType, + str, + StorageType, + dict, ], None, - ] = None, + ] + | None = None, ): self.registry = registry self.callback = callback diff --git a/anndata/_io/utils.py b/anndata/_io/utils.py index 906a82e1e..3973e6152 100644 --- a/anndata/_io/utils.py +++ b/anndata/_io/utils.py @@ -4,12 +4,13 @@ from typing import Callable, Literal from warnings import warn -from packaging import version import h5py +from packaging import version -from .._core.sparse_dataset import BaseCompressedSparseDataset from anndata.compat import H5Group, ZarrGroup, add_note +from .._core.sparse_dataset import BaseCompressedSparseDataset + # For allowing h5py v3 # https://github.com/scverse/anndata/issues/442 H5PY_V3 = version.parse(h5py.__version__).major >= 3 @@ -109,7 +110,8 @@ def check_key(key): # elif issubclass(typ, bytes): # return key else: - raise TypeError(f"{key} of type {typ} is an invalid key. Should be str.") + msg = f"{key} of type {typ} is an invalid key. Should be str." + raise TypeError(msg) # ------------------------------------------------------------------------------- @@ -147,8 +149,6 @@ def write_attribute(*args, **kwargs): class AnnDataReadError(OSError): """Error caused while trying to read in AnnData.""" - pass - def _get_parent(elem): try: @@ -174,7 +174,7 @@ def re_raise_error(e, elem, key, op=Literal["read", "writ"]): parent = _get_parent(elem) add_note( e, - f"Error raised while {op}ing key {key!r} of {type(elem)} to " f"{parent}", + f"Error raised while {op}ing key {key!r} of {type(elem)} to {parent}", ) raise e @@ -263,7 +263,8 @@ def _read_legacy_raw( if modern_raw: if any(k.startswith("raw.") for k in f): what = f"File {f.filename}" if hasattr(f, "filename") else "Store" - raise ValueError(f"{what} has both legacy and current raw formats.") + msg = f"{what} has both legacy and current raw formats." + raise ValueError(msg) return modern_raw raw = {} diff --git a/anndata/_io/write.py b/anndata/_io/write.py index da801888e..95f8dcbd2 100644 --- a/anndata/_io/write.py +++ b/anndata/_io/write.py @@ -1,15 +1,20 @@ +from __future__ import annotations + +import math import warnings -from pathlib import Path from os import PathLike, fspath +from pathlib import Path +from typing import TYPE_CHECKING -import pandas as pd -import math import numpy as np +import pandas as pd from scipy.sparse import issparse -from .. import AnnData -from ..logging import get_logger from .._warnings import WriteWarning +from ..logging import get_logger + +if TYPE_CHECKING: + from .. import AnnData logger = get_logger(__name__) @@ -80,12 +85,13 @@ def write_loom(filename: PathLike, adata: AnnData, write_obsm_varm: bool = False col_attrs[col_dim] = col_names.values if adata.X is None: - raise ValueError("loompy does not accept empty matrices as data") + msg = "loompy does not accept empty matrices as data" + raise ValueError(msg) if write_obsm_varm: - for key in adata.obsm.keys(): + for key in adata.obsm: col_attrs[key] = adata.obsm[key] - for key in adata.varm.keys(): + for key in adata.varm: row_attrs[key] = adata.varm[key] elif len(adata.obsm.keys()) > 0 or len(adata.varm.keys()) > 0: logger.warning( @@ -95,7 +101,7 @@ def write_loom(filename: PathLike, adata: AnnData, write_obsm_varm: bool = False ) layers = {"": adata.X.T} - for key in adata.layers.keys(): + for key in adata.layers: layers[key] = adata.layers[key].T from loompy import create diff --git a/anndata/_io/zarr.py b/anndata/_io/zarr.py index d85e1fc19..c6f990251 100644 --- a/anndata/_io/zarr.py +++ b/anndata/_io/zarr.py @@ -1,32 +1,36 @@ -from collections.abc import MutableMapping +from __future__ import annotations + from pathlib import Path -from typing import TypeVar, Union +from typing import TYPE_CHECKING, TypeVar from warnings import warn import numpy as np -from scipy import sparse import pandas as pd import zarr +from scipy import sparse + +from anndata._warnings import OldFormatWarning from .._core.anndata import AnnData from ..compat import ( - _from_fixed_length_strings, _clean_uns, + _from_fixed_length_strings, ) from ..experimental import read_dispatched, write_dispatched +from .specs import read_elem from .utils import ( - report_read_key_on_error, _read_legacy_raw, + report_read_key_on_error, ) -from .specs import read_elem -from anndata._warnings import OldFormatWarning +if TYPE_CHECKING: + from collections.abc import MutableMapping T = TypeVar("T") def write_zarr( - store: Union[MutableMapping, str, Path], + store: MutableMapping | str | Path, adata: AnnData, chunks=None, **ds_kwargs, @@ -50,7 +54,7 @@ def callback(func, s, k, elem, dataset_kwargs, iospec): write_dispatched(f, "/", adata, callback=callback, dataset_kwargs=ds_kwargs) -def read_zarr(store: Union[str, Path, MutableMapping, zarr.Group]) -> AnnData: +def read_zarr(store: str | Path | MutableMapping | zarr.Group) -> AnnData: """\ Read from a hierarchical Zarr array store. @@ -62,10 +66,7 @@ def read_zarr(store: Union[str, Path, MutableMapping, zarr.Group]) -> AnnData: if isinstance(store, Path): store = str(store) - if isinstance(store, zarr.Group): - f = store - else: - f = zarr.open(store, mode="r") + f = store if isinstance(store, zarr.Group) else zarr.open(store, mode="r") # Read with handling for backwards compat def callback(func, elem_name: str, elem, iospec): diff --git a/anndata/_types.py b/anndata/_types.py index d5b6b1c5c..7f57e380f 100644 --- a/anndata/_types.py +++ b/anndata/_types.py @@ -1,6 +1,8 @@ """ Defines some useful types for this library. Should probably be cleaned up before thinking about exporting. """ +from __future__ import annotations + from typing import Union from anndata.compat import H5Array, H5Group, ZarrArray, ZarrGroup diff --git a/anndata/_warnings.py b/anndata/_warnings.py index 5bc0c461c..786c79a0a 100644 --- a/anndata/_warnings.py +++ b/anndata/_warnings.py @@ -1,3 +1,6 @@ +from __future__ import annotations + + class WriteWarning(UserWarning): pass @@ -5,8 +8,6 @@ class WriteWarning(UserWarning): class OldFormatWarning(PendingDeprecationWarning): """Raised when a file in an old file format is read.""" - pass - class ImplicitModificationWarning(UserWarning): """\ @@ -20,10 +21,6 @@ class ImplicitModificationWarning(UserWarning): ImplicitModificationWarning: Transforming to str index. """ - pass - class ExperimentalFeatureWarning(Warning): """Raised when an unstable experimental feature is used.""" - - pass diff --git a/anndata/compat/__init__.py b/anndata/compat/__init__.py index f69276bff..7e2411f95 100644 --- a/anndata/compat/__init__.py +++ b/anndata/compat/__init__.py @@ -1,20 +1,20 @@ from __future__ import annotations +import os +from codecs import decode +from collections.abc import Mapping from contextlib import AbstractContextManager from dataclasses import dataclass, field - from functools import singledispatch, wraps -from codecs import decode -from inspect import signature, Parameter -import os +from inspect import Parameter, signature from pathlib import Path -from typing import Any, Tuple, Union, Mapping, Optional +from typing import Any, Union from warnings import warn import h5py -from scipy.sparse import spmatrix import numpy as np import pandas as pd +from scipy.sparse import spmatrix from .exceptiongroups import add_note # noqa: F401 @@ -24,7 +24,7 @@ class Empty: Index1D = Union[slice, int, str, np.int64, np.ndarray] -Index = Union[Index1D, Tuple[Index1D, Index1D], spmatrix] +Index = Union[Index1D, tuple[Index1D, Index1D], spmatrix] H5Group = h5py.Group H5Array = h5py.Dataset @@ -105,12 +105,16 @@ def __repr__(): try: + from cupy import ndarray as CupyArray from cupyx.scipy.sparse import ( - spmatrix as CupySparseMatrix, - csr_matrix as CupyCSRMatrix, csc_matrix as CupyCSCMatrix, ) - from cupy import ndarray as CupyArray + from cupyx.scipy.sparse import ( + csr_matrix as CupyCSRMatrix, + ) + from cupyx.scipy.sparse import ( + spmatrix as CupySparseMatrix, + ) except ImportError: class CupySparseMatrix: @@ -140,7 +144,7 @@ def __repr__(): @singledispatch -def _read_attr(attrs: Mapping, name: str, default: Optional[Any] = Empty): +def _read_attr(attrs: Mapping, name: str, default: Any | None = Empty): if default is Empty: return attrs[name] else: @@ -149,7 +153,7 @@ def _read_attr(attrs: Mapping, name: str, default: Optional[Any] = Empty): @_read_attr.register(h5py.AttributeManager) def _read_attr_hdf5( - attrs: h5py.AttributeManager, name: str, default: Optional[Any] = Empty + attrs: h5py.AttributeManager, name: str, default: Any | None = Empty ): """ Read an HDF5 attribute and perform all necessary conversions. @@ -200,7 +204,7 @@ def _from_fixed_length_strings(value): def _decode_structured_array( - arr: np.ndarray, dtype: Optional[np.dtype] = None, copy: bool = False + arr: np.ndarray, dtype: np.dtype | None = None, copy: bool = False ) -> np.ndarray: """ h5py 3.0 now reads all strings as bytes. There is a helper method which can convert these to strings, @@ -250,7 +254,7 @@ def _to_fixed_length_strings(value: np.ndarray) -> np.ndarray: ############################# -def _clean_uns(adata: "AnnData"): # noqa: F821 +def _clean_uns(adata: AnnData): # noqa: F821 """ Compat function for when categorical keys were stored in uns. This used to be buggy because when storing categorical columns in obs and var with @@ -342,7 +346,7 @@ def inner_f(*args, **kwargs): # extra_args > 0 args_msg = [ - "{}={}".format(name, arg) + f"{name}={arg}" for name, arg in zip(kwonly_args[:extra_args], args[-extra_args:]) ] args_msg = ", ".join(args_msg) diff --git a/anndata/compat/exceptiongroups.py b/anndata/compat/exceptiongroups.py index f64090017..6cfef914c 100644 --- a/anndata/compat/exceptiongroups.py +++ b/anndata/compat/exceptiongroups.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import sys @@ -6,7 +8,7 @@ def add_note(err: BaseException, msg: str) -> BaseException: Adds a note to an exception inplace and returns it. """ if sys.version_info < (3, 11): - err.__notes__ = getattr(err, "__notes__", []) + [msg] + err.__notes__ = [*getattr(err, "__notes__", []), msg] else: err.add_note(msg) return err diff --git a/anndata/core.py b/anndata/core.py index c4b254c0e..8e6ef0382 100644 --- a/anndata/core.py +++ b/anndata/core.py @@ -1,3 +1,5 @@ +from __future__ import annotations + from warnings import warn warn("Please only import from anndata, not anndata.core", DeprecationWarning) diff --git a/anndata/experimental/__init__.py b/anndata/experimental/__init__.py index 13667e214..486f14e8d 100644 --- a/anndata/experimental/__init__.py +++ b/anndata/experimental/__init__.py @@ -1,12 +1,12 @@ from __future__ import annotations -from .multi_files import AnnCollection -from .pytorch import AnnLoader +from anndata._core.sparse_dataset import CSCDataset, CSRDataset, sparse_dataset +from anndata._io.specs import IOSpec, read_elem, write_elem -from anndata._io.specs import read_elem, write_elem, IOSpec -from anndata._core.sparse_dataset import sparse_dataset, CSRDataset, CSCDataset from ._dispatch_io import read_dispatched, write_dispatched from .merge import concat_on_disk +from .multi_files import AnnCollection +from .pytorch import AnnLoader __all__ = [ "AnnCollection", diff --git a/anndata/experimental/_dispatch_io.py b/anndata/experimental/_dispatch_io.py index 2df14b4f1..4df4d417a 100644 --- a/anndata/experimental/_dispatch_io.py +++ b/anndata/experimental/_dispatch_io.py @@ -1,11 +1,11 @@ from __future__ import annotations from types import MappingProxyType -from typing import Callable, Any +from typing import TYPE_CHECKING, Any, Callable - -from anndata._io.specs import IOSpec -from anndata._types import StorageType, GroupStorageType +if TYPE_CHECKING: + from anndata._io.specs import IOSpec + from anndata._types import GroupStorageType, StorageType def read_dispatched( @@ -39,7 +39,7 @@ def read_dispatched( :doc:`/tutorials/notebooks/{read,write}_dispatched` """ - from anndata._io.specs import Reader, _REGISTRY + from anndata._io.specs import _REGISTRY, Reader reader = Reader(_REGISTRY, callback=callback) @@ -90,7 +90,7 @@ def write_dispatched( :doc:`/tutorials/notebooks/{read,write}_dispatched` """ - from anndata._io.specs import Writer, _REGISTRY + from anndata._io.specs import _REGISTRY, Writer writer = Writer(_REGISTRY, callback=callback) diff --git a/anndata/experimental/merge.py b/anndata/experimental/merge.py index c16aead46..d9e689210 100644 --- a/anndata/experimental/merge.py +++ b/anndata/experimental/merge.py @@ -1,18 +1,14 @@ +from __future__ import annotations + import os import shutil +from collections.abc import Collection, Iterable, Mapping, MutableMapping, Sequence from functools import singledispatch from pathlib import Path from typing import ( Any, Callable, - Collection, - Iterable, Literal, - Mapping, - Optional, - Sequence, - Union, - MutableMapping, ) import numpy as np @@ -104,12 +100,13 @@ def _gen_slice_to_append( @singledispatch -def as_group(store, *args, **kwargs) -> Union[ZarrGroup, H5Group]: - raise NotImplementedError("This is not yet implemented.") +def as_group(store, *args, **kwargs) -> ZarrGroup | H5Group: + msg = "This is not yet implemented." + raise NotImplementedError(msg) @as_group.register(os.PathLike) -def _(store: os.PathLike, *args, **kwargs) -> Union[ZarrGroup, H5Group]: +def _(store: os.PathLike, *args, **kwargs) -> ZarrGroup | H5Group: if store.suffix == ".h5ad": import h5py @@ -120,7 +117,7 @@ def _(store: os.PathLike, *args, **kwargs) -> Union[ZarrGroup, H5Group]: @as_group.register(str) -def _(store: str, *args, **kwargs) -> Union[ZarrGroup, H5Group]: +def _(store: str, *args, **kwargs) -> ZarrGroup | H5Group: return as_group(Path(store), *args, **kwargs) @@ -135,7 +132,7 @@ def _(store, *args, **kwargs): ################### -def read_as_backed(group: Union[ZarrGroup, H5Group]): +def read_as_backed(group: ZarrGroup | H5Group): """ Read the group until BaseCompressedSparseDataset, Array or EAGER_TYPES are encountered. @@ -156,7 +153,7 @@ def callback(func, elem_name: str, elem, iospec): return read_dispatched(group, callback=callback) -def _df_index(df: Union[ZarrGroup, H5Group]) -> pd.Index: +def _df_index(df: ZarrGroup | H5Group) -> pd.Index: index_key = df.attrs["_index"] return pd.Index(read_elem(df[index_key])) @@ -167,9 +164,9 @@ def _df_index(df: Union[ZarrGroup, H5Group]) -> pd.Index: def write_concat_dense( - arrays: Sequence[Union[ZarrArray, H5Array]], - output_group: Union[ZarrGroup, H5Group], - output_path: Union[ZarrGroup, H5Group], + arrays: Sequence[ZarrArray | H5Array], + output_group: ZarrGroup | H5Group, + output_path: ZarrGroup | H5Group, axis: Literal[0, 1] = 0, reindexers: Reindexer = None, fill_value=None, @@ -196,8 +193,8 @@ def write_concat_dense( def write_concat_sparse( datasets: Sequence[BaseCompressedSparseDataset], - output_group: Union[ZarrGroup, H5Group], - output_path: Union[ZarrGroup, H5Group], + output_group: ZarrGroup | H5Group, + output_path: ZarrGroup | H5Group, max_loaded_elems: int, axis: Literal[0, 1] = 0, reindexers: Reindexer = None, @@ -235,7 +232,7 @@ def write_concat_sparse( def _write_concat_mappings( mappings, - output_group: Union[ZarrGroup, H5Group], + output_group: ZarrGroup | H5Group, keys, path, max_loaded_elems, @@ -269,7 +266,7 @@ def _write_concat_mappings( def _write_concat_arrays( - arrays: Sequence[Union[ZarrArray, H5Array, BaseCompressedSparseDataset]], + arrays: Sequence[ZarrArray | H5Array | BaseCompressedSparseDataset], output_group, output_path, max_loaded_elems, @@ -281,15 +278,15 @@ def _write_concat_arrays( init_elem = arrays[0] init_type = type(init_elem) if not all(isinstance(a, init_type) for a in arrays): - raise NotImplementedError( - f"All elements must be the same type instead got types: {[type(a) for a in arrays]}" - ) + msg = f"All elements must be the same type instead got types: {[type(a) for a in arrays]}" + raise NotImplementedError(msg) if reindexers is None: if join == "inner": reindexers = gen_inner_reindexers(arrays, new_index=None, axis=axis) else: - raise NotImplementedError("Cannot reindex arrays with outer join.") + msg = "Cannot reindex arrays with outer join." + raise NotImplementedError(msg) if isinstance(init_elem, BaseCompressedSparseDataset): expected_sparse_fmt = ["csr", "csc"][axis] @@ -304,9 +301,8 @@ def _write_concat_arrays( fill_value, ) else: - raise NotImplementedError( - f"Concat of following not supported: {[a.format for a in arrays]}" - ) + msg = f"Concat of following not supported: {[a.format for a in arrays]}" + raise NotImplementedError(msg) else: write_concat_dense( arrays, output_group, output_path, axis, reindexers, fill_value @@ -314,9 +310,7 @@ def _write_concat_arrays( def _write_concat_sequence( - arrays: Sequence[ - Union[pd.DataFrame, BaseCompressedSparseDataset, H5Array, ZarrArray] - ], + arrays: Sequence[pd.DataFrame | BaseCompressedSparseDataset | H5Array | ZarrArray], output_group, output_path, max_loaded_elems, @@ -334,14 +328,14 @@ def _write_concat_sequence( if join == "inner": reindexers = gen_inner_reindexers(arrays, None, axis=axis) else: - raise NotImplementedError("Cannot reindex dataframes with outer join.") + msg = "Cannot reindex dataframes with outer join." + raise NotImplementedError(msg) if not all( isinstance(a, pd.DataFrame) or a is MissingVal or 0 in a.shape for a in arrays ): - raise NotImplementedError( - "Cannot concatenate a dataframe with other array types." - ) + msg = "Cannot concatenate a dataframe with other array types." + raise NotImplementedError(msg) df = concat_arrays( arrays=arrays, reindexers=reindexers, @@ -365,9 +359,8 @@ def _write_concat_sequence( join, ) else: - raise NotImplementedError( - f"Concatenation of these types is not yet implemented: {[type(a) for a in arrays] } with axis={axis}." - ) + msg = f"Concatenation of these types is not yet implemented: {[type(a) for a in arrays]} with axis={axis}." + raise NotImplementedError(msg) def _write_alt_mapping(groups, output_group, alt_dim, alt_indices, merge): @@ -401,26 +394,21 @@ def _write_dim_annot(groups, output_group, dim, concat_indices, label, label_col def concat_on_disk( - in_files: Union[ - Collection[Union[str, os.PathLike]], - MutableMapping[str, Union[str, os.PathLike]], - ], - out_file: Union[str, os.PathLike], + in_files: Collection[str | os.PathLike] | MutableMapping[str, str | os.PathLike], + out_file: str | os.PathLike, *, overwrite: bool = False, max_loaded_elems: int = 100_000_000, axis: Literal[0, 1] = 0, join: Literal["inner", "outer"] = "inner", - merge: Union[ - StrategiesLiteral, Callable[[Collection[Mapping]], Mapping], None - ] = None, - uns_merge: Union[ - StrategiesLiteral, Callable[[Collection[Mapping]], Mapping], None - ] = None, - label: Optional[str] = None, - keys: Optional[Collection[str]] = None, - index_unique: Optional[str] = None, - fill_value: Optional[Any] = None, + merge: StrategiesLiteral | Callable[[Collection[Mapping]], Mapping] | None = None, + uns_merge: StrategiesLiteral + | Callable[[Collection[Mapping]], Mapping] + | None = None, + label: str | None = None, + keys: Collection[str] | None = None, + index_unique: str | None = None, + fill_value: Any | None = None, pairwise: bool = False, ) -> None: """Concatenates multiple AnnData objects along a specified axis using their @@ -502,26 +490,27 @@ def concat_on_disk( """ # Argument normalization if pairwise: - raise NotImplementedError("pairwise concatenation not yet implemented") + msg = "pairwise concatenation not yet implemented" + raise NotImplementedError(msg) if join != "inner": - raise NotImplementedError("only inner join is currently supported") + msg = "only inner join is currently supported" + raise NotImplementedError(msg) merge = resolve_merge_strategy(merge) uns_merge = resolve_merge_strategy(uns_merge) if len(in_files) <= 1: if len(in_files) == 1: if not overwrite and Path(out_file).is_file(): - raise FileExistsError( + msg = ( f"File “{out_file}” already exists and `overwrite` is set to False" ) + raise FileExistsError(msg) shutil.copy2(in_files[0], out_file) return if isinstance(in_files, Mapping): if keys is not None: - raise TypeError( - "Cannot specify categories in both mapping keys and using `keys`. " - "Only specify this once." - ) + msg = "Cannot specify categories in both mapping keys and using `keys`. Only specify this once." + raise TypeError(msg) keys, in_files = list(in_files.keys()), list(in_files.values()) else: in_files = list(in_files) @@ -546,7 +535,8 @@ def concat_on_disk( # All groups must be anndata if not all(g.attrs.get("encoding-type") == "anndata" for g in groups): - raise ValueError("All groups must be anndata") + msg = "All groups must be anndata" + raise ValueError(msg) # Write metadata output_group.attrs.update({"encoding-type": "anndata", "encoding-version": "0.1.0"}) diff --git a/anndata/experimental/multi_files/__init__.py b/anndata/experimental/multi_files/__init__.py index 86d4e8f44..956ebb8d2 100644 --- a/anndata/experimental/multi_files/__init__.py +++ b/anndata/experimental/multi_files/__init__.py @@ -1,3 +1,5 @@ +from __future__ import annotations + from ._anncollection import AnnCollection __all__ = ["AnnCollection"] diff --git a/anndata/experimental/multi_files/_anncollection.py b/anndata/experimental/multi_files/_anncollection.py index 741cd412b..8f116f8c8 100644 --- a/anndata/experimental/multi_files/_anncollection.py +++ b/anndata/experimental/multi_files/_anncollection.py @@ -1,18 +1,20 @@ -from collections.abc import Mapping +from __future__ import annotations + +import warnings +from collections.abc import Mapping, Sequence from functools import reduce -from h5py import Dataset +from typing import Callable, Literal, Union + import numpy as np import pandas as pd -import warnings - -from typing import Dict, Union, Optional, Sequence, Callable, Literal +from h5py import Dataset +from ..._core.aligned_mapping import AxisArrays from ..._core.anndata import AnnData -from ..._core.index import _normalize_indices, _normalize_index, Index -from ..._core.views import _resolve_idx +from ..._core.index import Index, _normalize_index, _normalize_indices from ..._core.merge import concat_arrays, inner_concat_aligned_mapping from ..._core.sparse_dataset import BaseCompressedSparseDataset -from ..._core.aligned_mapping import AxisArrays +from ..._core.views import _resolve_idx ATTRS = ["obs", "obsm", "layers"] @@ -84,7 +86,7 @@ def _resolve_idx(self, oidx, vidx): if len(self.adatas) == 1: return [u_oidx], oidx, vidx, reverse - iter_limits = list(zip([0] + self.limits, self.limits)) + iter_limits = list(zip([0, *self.limits], self.limits)) n_adatas_used = 0 for lower, upper in iter_limits: @@ -135,21 +137,16 @@ def iterate_axis( Set to `True` to drop a batch with the length lower than `batch_size`. """ if axis not in (0, 1): - raise ValueError("Axis should be either 0 or 1.") + msg = "Axis should be either 0 or 1." + raise ValueError(msg) n = self.shape[axis] - if shuffle: - indices = np.random.permutation(n).tolist() - else: - indices = list(range(n)) + indices = np.random.permutation(n).tolist() if shuffle else list(range(n)) for i in range(0, n, batch_size): idx = indices[i : min(i + batch_size, n)] - if axis == 1: - batch = self[:, idx] - else: - batch = self[idx] + batch = self[:, idx] if axis == 1 else self[idx] # only happens if the last batch is smaller than batch_size if len(batch) < batch_size and drop_last: continue @@ -182,7 +179,8 @@ def __init__( def __getitem__(self, key, use_convert=True): if self._keys is not None and key not in self._keys: - raise KeyError(f"No {key} in {self.attr} view") + msg = f"No {key} in {self.attr} view" + raise KeyError(msg) arrs = [] for i, oidx in enumerate(self.adatas_oidx): @@ -191,15 +189,9 @@ def __getitem__(self, key, use_convert=True): arr = getattr(self.adatas[i], self.attr)[key] - if self.adatas_vidx is not None: - vidx = self.adatas_vidx[i] - else: - vidx = None + vidx = self.adatas_vidx[i] if self.adatas_vidx is not None else None - if vidx is not None: - idx = oidx, vidx - else: - idx = oidx + idx = (oidx, vidx) if vidx is not None else oidx if isinstance(arr, pd.DataFrame): arrs.append(arr.iloc[idx]) @@ -314,10 +306,7 @@ def _lazy_init_attr(self, attr, set_vidx=False): if self.convert is not None: attr_convert = _select_convert(attr, self.convert) - if attr == "obs": - obs_names = self.obs_names - else: - obs_names = None + obs_names = self.obs_names if attr == "obs" else None setattr( self, @@ -567,8 +556,8 @@ def attrs_keys(self): return self.reference.attrs_keys -DictCallable = Dict[str, Callable] -ConvertType = Union[Callable, DictCallable, Dict[str, DictCallable]] +DictCallable = dict[str, Callable] +ConvertType = Union[Callable, DictCallable, dict[str, DictCallable]] class AnnCollection(_ConcatViewMixin, _IterateViewMixin): @@ -661,23 +650,21 @@ class AnnCollection(_ConcatViewMixin, _IterateViewMixin): def __init__( self, - adatas: Union[Sequence[AnnData], Dict[str, AnnData]], - join_obs: Optional[Literal["inner", "outer"]] = "inner", - join_obsm: Optional[Literal["inner"]] = None, - join_vars: Optional[Literal["inner"]] = None, - label: Optional[str] = None, - keys: Optional[Sequence[str]] = None, - index_unique: Optional[str] = None, - convert: Optional[ConvertType] = None, + adatas: Sequence[AnnData] | dict[str, AnnData], + join_obs: Literal["inner", "outer"] | None = "inner", + join_obsm: Literal["inner"] | None = None, + join_vars: Literal["inner"] | None = None, + label: str | None = None, + keys: Sequence[str] | None = None, + index_unique: str | None = None, + convert: ConvertType | None = None, harmonize_dtypes: bool = True, indices_strict: bool = True, ): if isinstance(adatas, Mapping): if keys is not None: - raise TypeError( - "Cannot specify categories in both mapping keys and using `keys`. " - "Only specify this once." - ) + msg = "Cannot specify categories in both mapping keys and using `keys`. Only specify this once." + raise TypeError(msg) keys, adatas = list(adatas.keys()), list(adatas.values()) else: adatas = list(adatas) @@ -685,7 +672,7 @@ def __init__( # check if the variables are the same in all adatas self.adatas_vidx = [None for adata in adatas] vars_names_list = [adata.var_names for adata in adatas] - vars_eq = all([adatas[0].var_names.equals(vrs) for vrs in vars_names_list[1:]]) + vars_eq = all(adatas[0].var_names.equals(vrs) for vrs in vars_names_list[1:]) if vars_eq: self.var_names = adatas[0].var_names elif join_vars == "inner": @@ -699,10 +686,8 @@ def __init__( self.adatas_vidx.append(adata_vidx) self.var_names = var_names else: - raise ValueError( - "Adatas have different variables. " - "Please specify join_vars='inner' for intersection." - ) + msg = "Adatas have different variables. Please specify join_vars='inner' for intersection." + raise ValueError(msg) concat_indices = pd.concat( [pd.Series(a.obs_names) for a in adatas], ignore_index=True @@ -761,7 +746,7 @@ def __init__( a0_attr = getattr(adatas[0], attr) new_keys = [] for key in keys: - if key in ai_attr.keys(): + if key in ai_attr: a0_ashape = a0_attr[key].shape ai_ashape = ai_attr[key].shape if ( @@ -897,7 +882,7 @@ def lazy_attr(self, attr, key=None): @property def has_backed(self): """`True` if `adatas` have backed AnnData objects, `False` otherwise.""" - return any([adata.isbacked for adata in self.adatas]) + return any(adata.isbacked for adata in self.adatas) @property def attrs_keys(self): @@ -929,7 +914,7 @@ def __repr__(self): class LazyAttrData(_IterateViewMixin): - def __init__(self, adset: AnnCollection, attr: str, key: Optional[str] = None): + def __init__(self, adset: AnnCollection, attr: str, key: str | None = None): self.adset = adset self.attr = attr self.key = key @@ -943,10 +928,7 @@ def __getitem__(self, index): if len(index) > 1: vidx = index[1] - if oidx is None: - view = self.adset[index] - else: - view = self.adset[oidx] + view = self.adset[index] if oidx is None else self.adset[oidx] attr_arr = getattr(view, self.attr) if self.key is not None: attr_arr = attr_arr[self.key] diff --git a/anndata/experimental/pytorch/__init__.py b/anndata/experimental/pytorch/__init__.py index d4fbffce7..36c9441fe 100644 --- a/anndata/experimental/pytorch/__init__.py +++ b/anndata/experimental/pytorch/__init__.py @@ -1,3 +1,5 @@ +from __future__ import annotations + from ._annloader import AnnLoader __all__ = ["AnnLoader"] diff --git a/anndata/experimental/pytorch/_annloader.py b/anndata/experimental/pytorch/_annloader.py index 15ad53a52..45b12f38c 100644 --- a/anndata/experimental/pytorch/_annloader.py +++ b/anndata/experimental/pytorch/_annloader.py @@ -1,18 +1,22 @@ -from scipy.sparse import issparse -from math import ceil +from __future__ import annotations + from copy import copy from functools import partial -from typing import Dict, Union, Sequence +from math import ceil +from typing import TYPE_CHECKING import numpy as np +from scipy.sparse import issparse from ..._core.anndata import AnnData from ..multi_files._anncollection import AnnCollection, _ConcatViewMixin +if TYPE_CHECKING: + from collections.abc import Sequence try: import torch - from torch.utils.data import Sampler, BatchSampler, DataLoader + from torch.utils.data import BatchSampler, DataLoader, Sampler except ImportError: Sampler, BatchSampler, DataLoader = object, object, object @@ -83,10 +87,7 @@ def compose_convert(arr): if attr not in convert: new_convert[attr] = top_convert else: - if isinstance(attrs_keys, list): - as_ks = None - else: - as_ks = attrs_keys[attr] + as_ks = None if isinstance(attrs_keys, list) else attrs_keys[attr] new_convert[attr] = _convert_on_top(convert[attr], top_convert, as_ks) return new_convert @@ -123,7 +124,7 @@ class AnnLoader(DataLoader): def __init__( self, - adatas: Union[Sequence[AnnData], Dict[str, AnnData]], + adatas: Sequence[AnnData] | dict[str, AnnData], batch_size: int = 1, shuffle: bool = False, use_default_converter: bool = True, @@ -133,11 +134,7 @@ def __init__( if isinstance(adatas, AnnData): adatas = [adatas] - if ( - isinstance(adatas, list) - or isinstance(adatas, tuple) - or isinstance(adatas, dict) - ): + if isinstance(adatas, (list, tuple, dict)): join_obs = kwargs.pop("join_obs", "inner") join_obsm = kwargs.pop("join_obsm", None) label = kwargs.pop("label", None) @@ -162,7 +159,8 @@ def __init__( elif isinstance(adatas, _ConcatViewMixin): dataset = copy(adatas) else: - raise ValueError("adata should be of type AnnData or AnnCollection.") + msg = "adata should be of type AnnData or AnnCollection." + raise ValueError(msg) if use_default_converter: pin_memory = kwargs.pop("pin_memory", False) diff --git a/anndata/logging.py b/anndata/logging.py index f5feac09c..a2a890c51 100644 --- a/anndata/logging.py +++ b/anndata/logging.py @@ -1,5 +1,7 @@ -import os +from __future__ import annotations + import logging +import os _previous_memory_usage = None diff --git a/anndata/readwrite.py b/anndata/readwrite.py index dfe5a7074..f3d07f732 100644 --- a/anndata/readwrite.py +++ b/anndata/readwrite.py @@ -1,3 +1,5 @@ +from __future__ import annotations + from warnings import warn warn("Please only import from anndata, not anndata.readwrite", DeprecationWarning) diff --git a/anndata/tests/conftest.py b/anndata/tests/conftest.py index ef8a3b50e..e16197cc7 100644 --- a/anndata/tests/conftest.py +++ b/anndata/tests/conftest.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import warnings import pytest @@ -5,7 +7,6 @@ import anndata from anndata.tests.helpers import subset_func # noqa: F401 - # TODO: Should be done in pyproject.toml, see anndata/conftest.py warnings.filterwarnings("ignore", category=anndata.OldFormatWarning) diff --git a/anndata/tests/helpers.py b/anndata/tests/helpers.py index d87d3285c..c4c65ec61 100644 --- a/anndata/tests/helpers.py +++ b/anndata/tests/helpers.py @@ -1,34 +1,33 @@ from __future__ import annotations -from contextlib import contextmanager -from functools import singledispatch, wraps, partial +import random import re -from string import ascii_letters -from typing import Tuple, Optional, Type -from collections.abc import Mapping, Collection import warnings +from collections.abc import Collection, Mapping +from contextlib import contextmanager +from functools import partial, singledispatch, wraps +from string import ascii_letters import h5py import numpy as np import pandas as pd -from pandas.api.types import is_numeric_dtype import pytest +from pandas.api.types import is_numeric_dtype from scipy import sparse -import random from anndata import AnnData, Raw -from anndata._core.views import ArrayView -from anndata._core.sparse_dataset import BaseCompressedSparseDataset from anndata._core.aligned_mapping import AlignedMapping -from anndata.utils import asarray +from anndata._core.sparse_dataset import BaseCompressedSparseDataset +from anndata._core.views import ArrayView from anndata.compat import ( AwkArray, - DaskArray, - CupySparseMatrix, CupyArray, CupyCSCMatrix, CupyCSRMatrix, + CupySparseMatrix, + DaskArray, ) +from anndata.utils import asarray # Give this to gen_adata when dask array support is expected. GEN_ADATA_DASK_ARGS = dict( @@ -110,7 +109,8 @@ def gen_awkward(shape, dtype=np.int32): import awkward as ak if shape[0] is None: - raise ValueError("The first dimension must be fixed-length.") + msg = "The first dimension must be fixed-length." + raise ValueError(msg) rng = random.Random(123) shape = np.array(shape) @@ -152,24 +152,24 @@ def gen_typed_df_t2_size(m, n, index=None, columns=None) -> pd.DataFrame: # TODO: Use hypothesis for this? def gen_adata( - shape: Tuple[int, int], + shape: tuple[int, int], X_type=sparse.csr_matrix, X_dtype=np.float32, # obs_dtypes, # var_dtypes, - obsm_types: "Collection[Type]" = ( + obsm_types: Collection[type] = ( sparse.csr_matrix, np.ndarray, pd.DataFrame, AwkArray, ), - varm_types: "Collection[Type]" = ( + varm_types: Collection[type] = ( sparse.csr_matrix, np.ndarray, pd.DataFrame, AwkArray, ), - layers_types: "Collection[Type]" = (sparse.csr_matrix, np.ndarray, pd.DataFrame), + layers_types: Collection[type] = (sparse.csr_matrix, np.ndarray, pd.DataFrame), sparse_fmt: str = "csr", ) -> AnnData: """\ @@ -295,9 +295,8 @@ def spmatrix_bool_subset(index, min_size=2): def array_subset(index, min_size=2): if len(index) < min_size: - raise ValueError( - f"min_size (={min_size}) must be smaller than len(index) (={len(index)}" - ) + msg = f"min_size (={min_size}) must be smaller than len(index) (={len(index)}" + raise ValueError(msg) return np.random.choice( index, size=np.random.randint(min_size, len(index), ()), replace=False ) @@ -305,9 +304,8 @@ def array_subset(index, min_size=2): def array_int_subset(index, min_size=2): if len(index) < min_size: - raise ValueError( - f"min_size (={min_size}) must be smaller than len(index) (={len(index)}" - ) + msg = f"min_size (={min_size}) must be smaller than len(index) (={len(index)}" + raise ValueError(msg) return np.random.choice( np.arange(len(index)), size=np.random.randint(min_size, len(index), ()), @@ -475,7 +473,7 @@ def assert_equal_awkarray(a, b, exact=False, elem_name=None): @assert_equal.register(Mapping) def assert_equal_mapping(a, b, exact=False, elem_name=None): assert set(a.keys()) == set(b.keys()), format_msg(elem_name) - for k in a.keys(): + for k in a: if elem_name is None: elem_name = "" assert_equal(a[k], b[k], exact, f"{elem_name}/{k}") @@ -531,7 +529,7 @@ def assert_is_not_none(x): # can't put an assert in a lambda @assert_equal.register(AnnData) def assert_adata_equal( - a: AnnData, b: AnnData, exact: bool = False, elem_name: Optional[str] = None + a: AnnData, b: AnnData, exact: bool = False, elem_name: str | None = None ): """\ Check whether two AnnData objects are equivalent, @@ -602,7 +600,7 @@ def _(a): @contextmanager -def pytest_8_raises(exc_cls, *, match: str | re.Pattern = None): +def pytest_8_raises(exc_cls, *, match: str | re.Pattern | None = None): """Error handling using pytest 8's support for __notes__. See: https://github.com/pytest-dev/pytest/pull/11227 @@ -645,9 +643,8 @@ def as_cupy_type(val, typ=None): elif issubclass(input_typ, sparse.csc_matrix): typ = CupyCSCMatrix else: - raise NotImplementedError( - f"No default target type for input type {input_typ}" - ) + msg = f"No default target type for input type {input_typ}" + raise NotImplementedError(msg) if issubclass(typ, CupyArray): import cupy as cp @@ -656,25 +653,24 @@ def as_cupy_type(val, typ=None): val = val.toarray() return cp.array(val) elif issubclass(typ, CupyCSRMatrix): - import cupyx.scipy.sparse as cpsparse import cupy as cp + import cupyx.scipy.sparse as cpsparse if isinstance(val, np.ndarray): return cpsparse.csr_matrix(cp.array(val)) else: return cpsparse.csr_matrix(val) elif issubclass(typ, CupyCSCMatrix): - import cupyx.scipy.sparse as cpsparse import cupy as cp + import cupyx.scipy.sparse as cpsparse if isinstance(val, np.ndarray): return cpsparse.csc_matrix(cp.array(val)) else: return cpsparse.csc_matrix(val) else: - raise NotImplementedError( - f"Conversion from {type(val)} to {typ} not implemented" - ) + msg = f"Conversion from {type(val)} to {typ} not implemented" + raise NotImplementedError(msg) BASE_MATRIX_PARAMS = [ diff --git a/anndata/tests/test_anncollection.py b/anndata/tests/test_anncollection.py index b8def9508..aaef199e7 100644 --- a/anndata/tests/test_anncollection.py +++ b/anndata/tests/test_anncollection.py @@ -1,10 +1,11 @@ -import pytest -import anndata as ad -import numpy as np +from __future__ import annotations +import numpy as np +import pytest from scipy.sparse import csr_matrix, issparse - from sklearn.preprocessing import LabelEncoder + +import anndata as ad from anndata.experimental.multi_files import AnnCollection _dense = lambda a: a.toarray() if issparse(a) else a diff --git a/anndata/tests/test_annot.py b/anndata/tests/test_annot.py index 025c7d5a6..0ea609906 100644 --- a/anndata/tests/test_annot.py +++ b/anndata/tests/test_annot.py @@ -1,12 +1,13 @@ """Test handling of values in `obs`/ `var`""" -from natsort import natsorted +from __future__ import annotations + import numpy as np import pandas as pd +import pytest +from natsort import natsorted import anndata as ad -import pytest - @pytest.mark.parametrize("dtype", [object, "string"]) def test_str_to_categorical(dtype): diff --git a/anndata/tests/test_awkward.py b/anndata/tests/test_awkward.py index 87280d5a2..993fb91de 100644 --- a/anndata/tests/test_awkward.py +++ b/anndata/tests/test_awkward.py @@ -1,15 +1,16 @@ """Tests related to awkward arrays""" -import pytest +from __future__ import annotations + import numpy as np import numpy.testing as npt +import pandas as pd +import pytest -from anndata.tests.helpers import assert_equal, gen_adata, gen_awkward +import anndata +from anndata import AnnData, ImplicitModificationWarning, read_h5ad from anndata.compat import awkward as ak -from anndata import ImplicitModificationWarning +from anndata.tests.helpers import assert_equal, gen_adata, gen_awkward from anndata.utils import dim_len -from anndata import AnnData, read_h5ad -import anndata -import pandas as pd @pytest.mark.parametrize( diff --git a/anndata/tests/test_backed_sparse.py b/anndata/tests/test_backed_sparse.py index 2809b7c9f..1450641bf 100644 --- a/anndata/tests/test_backed_sparse.py +++ b/anndata/tests/test_backed_sparse.py @@ -1,15 +1,16 @@ +from __future__ import annotations + import h5py import numpy as np import pytest +import zarr from scipy import sparse import anndata as ad from anndata._core.anndata import AnnData from anndata._core.sparse_dataset import sparse_dataset -from anndata.tests.helpers import assert_equal, subset_func from anndata.experimental import read_dispatched - -import zarr +from anndata.tests.helpers import assert_equal, subset_func subset_func2 = subset_func @@ -92,10 +93,7 @@ def test_dataset_append_memory(tmp_path, sparse_format, append_method, diskfmt): ) # diskfmt is either h5ad or zarr a = sparse_format(sparse.random(100, 100)) b = sparse_format(sparse.random(100, 100)) - if diskfmt == "zarr": - f = zarr.open_group(path, "a") - else: - f = h5py.File(path, "a") + f = zarr.open_group(path, "a") if diskfmt == "zarr" else h5py.File(path, "a") ad._io.specs.write_elem(f, "mtx", a) diskmtx = sparse_dataset(f["mtx"]) @@ -121,10 +119,7 @@ def test_dataset_append_disk(tmp_path, sparse_format, append_method, diskfmt): a = sparse_format(sparse.random(10, 10)) b = sparse_format(sparse.random(10, 10)) - if diskfmt == "zarr": - f = zarr.open_group(path, "a") - else: - f = h5py.File(path, "a") + f = zarr.open_group(path, "a") if diskfmt == "zarr" else h5py.File(path, "a") ad._io.specs.write_elem(f, "a", a) ad._io.specs.write_elem(f, "b", b) a_disk = sparse_dataset(f["a"]) @@ -152,10 +147,7 @@ def test_wrong_shape(tmp_path, sparse_format, a_shape, b_shape, diskfmt): a_mem = sparse.random(*a_shape, format=sparse_format) b_mem = sparse.random(*b_shape, format=sparse_format) - if diskfmt == "zarr": - f = zarr.open_group(path, "a") - else: - f = h5py.File(path, "a") + f = zarr.open_group(path, "a") if diskfmt == "zarr" else h5py.File(path, "a") ad._io.specs.write_elem(f, "a", a_mem) ad._io.specs.write_elem(f, "b", b_mem) @@ -172,10 +164,7 @@ def test_wrong_formats(tmp_path, diskfmt): ) # diskfmt is either h5ad or zarr base = sparse.random(100, 100, format="csr") - if diskfmt == "zarr": - f = zarr.open_group(path, "a") - else: - f = h5py.File(path, "a") + f = zarr.open_group(path, "a") if diskfmt == "zarr" else h5py.File(path, "a") ad._io.specs.write_elem(f, "base", base) disk_mtx = sparse_dataset(f["base"]) diff --git a/anndata/tests/test_base.py b/anndata/tests/test_base.py index ffdc2a04d..6911b870f 100644 --- a/anndata/tests/test_base.py +++ b/anndata/tests/test_base.py @@ -1,14 +1,14 @@ from __future__ import annotations -from functools import partial -from itertools import product import re import warnings +from functools import partial +from itertools import product import numpy as np -from numpy import ma import pandas as pd import pytest +from numpy import ma from scipy import sparse as sp from scipy.sparse import csr_matrix, issparse @@ -16,7 +16,6 @@ from anndata._core.raw import Raw from anndata.tests.helpers import assert_equal, gen_adata - # some test objects that we use below adata_dense = AnnData(np.array([[1, 2], [3, 4]])) adata_dense.layers["test"] = adata_dense.X @@ -642,7 +641,7 @@ def assert_eq_not_id(a, b): map_copy = getattr(adata_copy, attr) assert map_sprs is not map_copy assert_eq_not_id(map_sprs.keys(), map_copy.keys()) - for key in map_sprs.keys(): + for key in map_sprs: assert_eq_not_id(map_sprs[key], map_copy[key]) diff --git a/anndata/tests/test_concatenate.py b/anndata/tests/test_concatenate.py index 7660638e8..6ad1c9992 100644 --- a/anndata/tests/test_concatenate.py +++ b/anndata/tests/test_concatenate.py @@ -1,33 +1,34 @@ +from __future__ import annotations + +import warnings from collections.abc import Hashable from copy import deepcopy -from itertools import chain, product from functools import partial, singledispatch -from typing import Any, List, Callable -import warnings +from itertools import chain, product +from typing import Any, Callable import numpy as np -from numpy import ma import pandas as pd import pytest +from boltons.iterutils import default_exit, remap, research +from numpy import ma from scipy import sparse -from boltons.iterutils import research, remap, default_exit - from anndata import AnnData, Raw, concat -from anndata._core.index import _subset from anndata._core import merge +from anndata._core.index import _subset +from anndata.compat import AwkArray, DaskArray from anndata.tests import helpers from anndata.tests.helpers import ( - assert_equal, - as_dense_dask_array, - gen_adata, - GEN_ADATA_DASK_ARGS, BASE_MATRIX_PARAMS, - DASK_MATRIX_PARAMS, CUPY_MATRIX_PARAMS, + DASK_MATRIX_PARAMS, + GEN_ADATA_DASK_ARGS, + as_dense_dask_array, + assert_equal, + gen_adata, ) from anndata.utils import asarray -from anndata.compat import DaskArray, AwkArray @singledispatch @@ -958,7 +959,7 @@ def map_values(mapping, path, key, old_parent, new_parent, new_items): return ret -def permute_nested_values(dicts: "List[dict]", gen_val: "Callable[[int], Any]"): +def permute_nested_values(dicts: list[dict], gen_val: Callable[[int], Any]): """ This function permutes the values of a nested mapping, for testing that out merge method work regardless of the values types. @@ -1081,7 +1082,7 @@ def test_concatenate_uns(unss, merge_strategy, result, value_gen): """ # So we can see what the initial pattern was meant to be print(merge_strategy, "\n", unss, "\n", result) - result, *unss = permute_nested_values([result] + unss, value_gen) + result, *unss = permute_nested_values([result, *unss], value_gen) adatas = [uns_ad(uns) for uns in unss] with pytest.warns(FutureWarning, match=r"concatenate method is deprecated"): merged = AnnData.concatenate(*adatas, uns_merge=merge_strategy).uns @@ -1301,7 +1302,7 @@ def test_concat_size_0_dim(axis, join_type, merge_strategy, shape): check_filled_like(result.X[axis_idx], elem_name="X") check_filled_like(result.X[altaxis_idx], elem_name="X") - for k, elem in getattr(result, "layers").items(): + for k, elem in result.layers.items(): check_filled_like(elem[axis_idx], elem_name=f"layers/{k}") check_filled_like(elem[altaxis_idx], elem_name=f"layers/{k}") @@ -1384,9 +1385,10 @@ def test_concat_X_dtype(): # Tests how dask plays with other types on concatenation. def test_concat_different_types_dask(merge_strategy, array_type): + import dask.array as da from scipy import sparse + import anndata as ad - import dask.array as da varm_array = sparse.random(5, 20, density=0.5, format="csr") diff --git a/anndata/tests/test_concatenate_disk.py b/anndata/tests/test_concatenate_disk.py index 1ffbb63ef..bf76640ad 100644 --- a/anndata/tests/test_concatenate_disk.py +++ b/anndata/tests/test_concatenate_disk.py @@ -1,23 +1,21 @@ -from typing import Mapping +from __future__ import annotations + +from collections.abc import Mapping import numpy as np import pandas as pd import pytest from scipy import sparse -from anndata.experimental.merge import concat_on_disk, as_group -from anndata.experimental import write_elem, read_elem - from anndata import AnnData, concat +from anndata.experimental import read_elem, write_elem +from anndata.experimental.merge import as_group, concat_on_disk from anndata.tests.helpers import ( assert_equal, gen_adata, ) - - from anndata.utils import asarray - GEN_ADATA_OOC_CONCAT_ARGS = dict( obsm_types=( sparse.csr_matrix, @@ -101,7 +99,8 @@ def get_array_type(array_type, axis): if array_type == "array": return asarray else: - raise NotImplementedError(f"array_type {array_type} not implemented") + msg = f"array_type {array_type} not implemented" + raise NotImplementedError(msg) def test_anndatas_without_reindex( diff --git a/anndata/tests/test_dask.py b/anndata/tests/test_dask.py index 5a20cc5d1..c0439e0c1 100644 --- a/anndata/tests/test_dask.py +++ b/anndata/tests/test_dask.py @@ -1,19 +1,22 @@ """ For tests using dask """ -import anndata as ad +from __future__ import annotations + import pandas as pd -from anndata._core.anndata import AnnData import pytest + +import anndata as ad +from anndata._core.anndata import AnnData +from anndata.compat import DaskArray +from anndata.experimental import read_elem, write_elem +from anndata.experimental.merge import as_group from anndata.tests.helpers import ( - as_dense_dask_array, GEN_ADATA_DASK_ARGS, - gen_adata, + as_dense_dask_array, assert_equal, + gen_adata, ) -from anndata.experimental import write_elem, read_elem -from anndata.experimental.merge import as_group -from anndata.compat import DaskArray pytest.importorskip("dask.array") @@ -242,6 +245,7 @@ def test_assign_X(adata): """Check if assignment works""" import dask.array as da import numpy as np + from anndata.compat import DaskArray adata.X = da.ones(adata.X.shape) @@ -305,8 +309,8 @@ def test_dask_to_memory_copy_unbacked(): def test_to_memory_raw(): - import numpy as np import dask.array as da + import numpy as np orig = gen_adata((20, 10), **GEN_ADATA_DASK_ARGS) orig.X = da.ones((20, 10)) @@ -326,8 +330,8 @@ def test_to_memory_raw(): def test_to_memory_copy_raw(): - import numpy as np import dask.array as da + import numpy as np orig = gen_adata((20, 10), **GEN_ADATA_DASK_ARGS) orig.X = da.ones((20, 10)) diff --git a/anndata/tests/test_dask_view_mem.py b/anndata/tests/test_dask_view_mem.py index d1648d02d..b713a23eb 100644 --- a/anndata/tests/test_dask_view_mem.py +++ b/anndata/tests/test_dask_view_mem.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import pytest import anndata as ad @@ -42,8 +44,8 @@ def alloc_cache(): da.random.random(*size), layers=dict(m=da.random.random(*size)), obsm=dict(m=da.random.random(*size)), - obs=dict(m=da.random.random((N))), - var=dict(m=da.random.random((N))), + obs=dict(m=da.random.random(N)), + var=dict(m=da.random.random(N)), varm=dict(m=da.random.random(*size)), ) subset = adata[:10, :][:, :10] diff --git a/anndata/tests/test_deprecations.py b/anndata/tests/test_deprecations.py index d632e3ea2..1b60471d7 100644 --- a/anndata/tests/test_deprecations.py +++ b/anndata/tests/test_deprecations.py @@ -3,6 +3,8 @@ This includes correct behaviour as well as throwing warnings. """ +from __future__ import annotations + import warnings import h5py @@ -10,9 +12,8 @@ import pytest from scipy import sparse -from anndata import AnnData import anndata as ad - +from anndata import AnnData from anndata.tests.helpers import assert_equal @@ -101,12 +102,11 @@ def test_dtype_warning(): def test_deprecated_write_attribute(tmp_path): pth = tmp_path / "file.h5" A = np.random.randn(20, 10) - from anndata._io.utils import read_attribute, write_attribute from anndata._io.specs import read_elem + from anndata._io.utils import read_attribute, write_attribute - with h5py.File(pth, "w") as f: - with pytest.warns(DeprecationWarning, match="write_elem"): - write_attribute(f, "written_attribute", A) + with h5py.File(pth, "w") as f, pytest.warns(DeprecationWarning, match="write_elem"): + write_attribute(f, "written_attribute", A) with h5py.File(pth, "r") as f: elem_A = read_elem(f["written_attribute"]) @@ -129,6 +129,7 @@ def test_deprecated_read(tmp_path): def test_deprecated_sparse_dataset_values(): import zarr + from anndata.experimental import sparse_dataset, write_elem mtx = sparse.random(50, 50, format="csr") @@ -138,7 +139,7 @@ def test_deprecated_sparse_dataset_values(): mtx_backed = sparse_dataset(g["mtx"]) with pytest.warns(FutureWarning, match="Please use .to_memory()"): - mtx_backed.value + mtx_backed.value # noqa: B018 with pytest.warns(FutureWarning, match="Please use .format"): - mtx_backed.format_str + mtx_backed.format_str # noqa: B018 diff --git a/anndata/tests/test_get_vector.py b/anndata/tests/test_get_vector.py index ca2ce18a7..baf0fd7d6 100644 --- a/anndata/tests/test_get_vector.py +++ b/anndata/tests/test_get_vector.py @@ -1,7 +1,9 @@ +from __future__ import annotations + import numpy as np import pandas as pd -from scipy import sparse import pytest +from scipy import sparse import anndata as ad diff --git a/anndata/tests/test_gpu.py b/anndata/tests/test_gpu.py index 434567ca8..c6f49a696 100644 --- a/anndata/tests/test_gpu.py +++ b/anndata/tests/test_gpu.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import pytest from scipy import sparse @@ -16,8 +18,8 @@ def test_gpu(): @pytest.mark.gpu def test_adata_raw_gpu(): - from cupyx.scipy import sparse as cupy_sparse import cupy as cp + from cupyx.scipy import sparse as cupy_sparse adata = AnnData( X=cupy_sparse.random(500, 50, density=0.01, format="csr", dtype=cp.float32) @@ -28,8 +30,8 @@ def test_adata_raw_gpu(): @pytest.mark.gpu def test_raw_gpu(): - from cupyx.scipy import sparse as cupy_sparse import cupy as cp + from cupyx.scipy import sparse as cupy_sparse adata = AnnData( X=cupy_sparse.random(500, 50, density=0.01, format="csr", dtype=cp.float32) diff --git a/anndata/tests/test_hdf5_backing.py b/anndata/tests/test_hdf5_backing.py index ab308e363..61c0c905c 100644 --- a/anndata/tests/test_hdf5_backing.py +++ b/anndata/tests/test_hdf5_backing.py @@ -1,16 +1,18 @@ +from __future__ import annotations + from pathlib import Path import joblib -import pytest import numpy as np +import pytest from scipy import sparse import anndata as ad from anndata.tests.helpers import ( - as_dense_dask_array, GEN_ADATA_DASK_ARGS, - gen_adata, + as_dense_dask_array, assert_equal, + gen_adata, subset_func, ) from anndata.utils import asarray diff --git a/anndata/tests/test_helpers.py b/anndata/tests/test_helpers.py index f540c43f4..0b25dc3f1 100644 --- a/anndata/tests/test_helpers.py +++ b/anndata/tests/test_helpers.py @@ -1,21 +1,23 @@ +from __future__ import annotations + from string import ascii_letters +import numpy as np import pandas as pd import pytest -import numpy as np from scipy import sparse import anndata as ad +from anndata.compat import add_note from anndata.tests.helpers import ( + asarray, assert_equal, - gen_awkward, - report_name, gen_adata, - asarray, + gen_awkward, pytest_8_raises, + report_name, ) from anndata.utils import dim_len -from anndata.compat import add_note # Testing to see if all error types can have the key name appended. # Currently fails for 22/118 since they have required arguments. Not sure what to do about that. @@ -79,7 +81,8 @@ def test_gen_awkward(shape, datashape): # Does this work for every warning? def test_report_name(): def raise_error(): - raise Exception("an error occurred!") + msg = "an error occurred!" + raise Exception(msg) letters = np.array(list(ascii_letters)) tag = "".join(np.random.permutation(letters)) @@ -116,7 +119,7 @@ def test_assert_equal(): # exact=False, # ) adata2 = adata.copy() - to_modify = list(adata2.layers.keys())[0] + to_modify = next(iter(adata2.layers.keys())) del adata2.layers[to_modify] with pytest.raises(AssertionError) as missing_layer_error: assert_equal(adata, adata2) @@ -272,6 +275,5 @@ def test_check_error_notes_success(error, match): ], ) def test_check_error_notes_failure(error, match): - with pytest.raises(AssertionError): - with pytest_8_raises(Exception, match=match): - raise error + with pytest.raises(AssertionError), pytest_8_raises(Exception, match=match): + raise error diff --git a/anndata/tests/test_inplace_subset.py b/anndata/tests/test_inplace_subset.py index b90421965..110d2574a 100644 --- a/anndata/tests/test_inplace_subset.py +++ b/anndata/tests/test_inplace_subset.py @@ -1,11 +1,13 @@ +from __future__ import annotations + import numpy as np import pytest from scipy import sparse from anndata.tests.helpers import ( + as_dense_dask_array, assert_equal, gen_adata, - as_dense_dask_array, ) from anndata.utils import asarray diff --git a/anndata/tests/test_io_backwards_compat.py b/anndata/tests/test_io_backwards_compat.py index a060d1779..fb12c8161 100644 --- a/anndata/tests/test_io_backwards_compat.py +++ b/anndata/tests/test_io_backwards_compat.py @@ -1,10 +1,12 @@ +from __future__ import annotations + from pathlib import Path +import pandas as pd import pytest +from scipy import sparse import anndata as ad -import pandas as pd -from scipy import sparse from anndata.tests.helpers import assert_equal ARCHIVE_PTH = Path(__file__).parent / "data/archives" diff --git a/anndata/tests/test_io_conversion.py b/anndata/tests/test_io_conversion.py index dd5e9ab61..29a5d27e9 100644 --- a/anndata/tests/test_io_conversion.py +++ b/anndata/tests/test_io_conversion.py @@ -1,13 +1,15 @@ """\ This file contains tests for conversion made during io. """ +from __future__ import annotations + import h5py import numpy as np import pytest from scipy import sparse import anndata as ad -from anndata.tests.helpers import gen_adata, assert_equal +from anndata.tests.helpers import assert_equal, gen_adata @pytest.fixture( diff --git a/anndata/tests/test_io_dispatched.py b/anndata/tests/test_io_dispatched.py index 4f6a526b3..f3c739158 100644 --- a/anndata/tests/test_io_dispatched.py +++ b/anndata/tests/test_io_dispatched.py @@ -1,17 +1,19 @@ +from __future__ import annotations + import re -from scipy import sparse import h5py import zarr +from scipy import sparse import anndata as ad from anndata.experimental import ( read_dispatched, - write_dispatched, read_elem, + write_dispatched, write_elem, ) -from anndata.tests.helpers import gen_adata, assert_equal +from anndata.tests.helpers import assert_equal, gen_adata def test_read_dispatched_w_regex(): @@ -79,7 +81,7 @@ def test_read_dispatched_null_case(): def test_write_dispatched_chunks(): - from itertools import repeat, chain + from itertools import chain, repeat def determine_chunks(elem_shape, specified_chunks): chunk_iterator = chain(specified_chunks, repeat(None)) diff --git a/anndata/tests/test_io_elementwise.py b/anndata/tests/test_io_elementwise.py index 48f93c405..bfb4ba4be 100644 --- a/anndata/tests/test_io_elementwise.py +++ b/anndata/tests/test_io_elementwise.py @@ -9,19 +9,18 @@ import numpy as np import pandas as pd import pytest -from scipy import sparse import zarr +from scipy import sparse import anndata as ad -from anndata._io.specs import _REGISTRY, get_spec, IOSpec +from anndata._io.specs import _REGISTRY, IOSpec, get_spec, read_elem, write_elem from anndata._io.specs.registry import IORegistryError -from anndata.compat import _read_attr, H5Group, ZarrGroup -from anndata._io.specs import write_elem, read_elem +from anndata.compat import H5Group, ZarrGroup, _read_attr from anndata.tests.helpers import ( - assert_equal, as_cupy_type, - pytest_8_raises, + assert_equal, gen_adata, + pytest_8_raises, ) @@ -38,7 +37,7 @@ def store(request, tmp_path) -> H5Group | ZarrGroup: elif request.param == "zarr": store = zarr.open(tmp_path / "test.zarr", "w") else: - assert False + raise AssertionError() try: yield store @@ -132,7 +131,7 @@ def test_io_spec_raw(store): write_elem(store, "adata", adata) - assert "raw" == _read_attr(store["adata/raw"].attrs, "encoding-type") + assert _read_attr(store["adata/raw"].attrs, "encoding-type") == "raw" from_disk = read_elem(store["adata"]) assert_equal(from_disk.raw, adata.raw) @@ -144,7 +143,7 @@ def test_write_anndata_to_root(store): write_elem(store, "/", adata) from_disk = read_elem(store) - assert "anndata" == _read_attr(store.attrs, "encoding-type") + assert _read_attr(store.attrs, "encoding-type") == "anndata" assert_equal(from_disk, adata) @@ -270,10 +269,7 @@ def test_read_zarr_from_group(tmp_path, consolidated): if consolidated: zarr.convenience.consolidate_metadata(z.store) - if consolidated: - read_func = zarr.open_consolidated - else: - read_func = zarr.open + read_func = zarr.open_consolidated if consolidated else zarr.open with read_func(pth) as z: assert_equal(ad.read_zarr(z["table/table"]), adata) diff --git a/anndata/tests/test_io_partial.py b/anndata/tests/test_io_partial.py index b75e5ccf1..e2709ac78 100644 --- a/anndata/tests/test_io_partial.py +++ b/anndata/tests/test_io_partial.py @@ -1,14 +1,18 @@ +from __future__ import annotations + from importlib.util import find_spec -from anndata import AnnData -from anndata._io.specs import read_elem -from anndata._io.specs.registry import read_elem_partial -from anndata._io import write_h5ad, write_zarr -from scipy.sparse import csr_matrix from pathlib import Path + +import h5py import numpy as np import pytest import zarr -import h5py +from scipy.sparse import csr_matrix + +from anndata import AnnData +from anndata._io import write_h5ad, write_zarr +from anndata._io.specs import read_elem +from anndata._io.specs.registry import read_elem_partial X = np.array([[1.0, 0.0, 3.0], [4.0, 0.0, 6.0], [0.0, 8.0, 0.0]], dtype="float32") X_check = np.array([[4.0, 0.0], [0.0, 8.0]], dtype="float32") @@ -70,15 +74,15 @@ def test_read_partial_adata(tmp_path, accessor): assert np.all(part.keys() == adata_sbs.var.keys()) assert np.all(part.index == adata_sbs.var.index) - for key in storage["obsm"].keys(): + for key in storage["obsm"]: part = read_elem_partial(storage["obsm"][key], indices=(obs_idx,)) assert np.all(part == adata_sbs.obsm[key]) - for key in storage["varm"].keys(): + for key in storage["varm"]: part = read_elem_partial(storage["varm"][key], indices=(var_idx,)) np.testing.assert_equal(part, adata_sbs.varm[key]) - for key in storage["obsp"].keys(): + for key in storage["obsp"]: part = read_elem_partial(storage["obsp"][key], indices=(obs_idx, obs_idx)) part = part.toarray() assert np.all(part == adata_sbs.obsp[key]) diff --git a/anndata/tests/test_io_utils.py b/anndata/tests/test_io_utils.py index 637e7e5b5..9f7428900 100644 --- a/anndata/tests/test_io_utils.py +++ b/anndata/tests/test_io_utils.py @@ -1,16 +1,18 @@ +from __future__ import annotations + from contextlib import nullcontext -import pytest -import zarr import h5py import pandas as pd +import pytest +import zarr import anndata as ad from anndata._io.specs.registry import IORegistryError -from anndata.compat import _clean_uns from anndata._io.utils import ( report_read_key_on_error, ) +from anndata.compat import _clean_uns from anndata.experimental import read_elem, write_elem from anndata.tests.helpers import pytest_8_raises diff --git a/anndata/tests/test_io_warnings.py b/anndata/tests/test_io_warnings.py index 284d86ecc..dfc33ccf1 100644 --- a/anndata/tests/test_io_warnings.py +++ b/anndata/tests/test_io_warnings.py @@ -1,6 +1,8 @@ +from __future__ import annotations + +import warnings from importlib.util import find_spec from pathlib import Path -import warnings import pytest diff --git a/anndata/tests/test_layers.py b/anndata/tests/test_layers.py index 4f9f00973..4b6a7f287 100644 --- a/anndata/tests/test_layers.py +++ b/anndata/tests/test_layers.py @@ -1,11 +1,13 @@ -from importlib.util import find_spec +from __future__ import annotations + import warnings +from importlib.util import find_spec -import pytest import numpy as np import pandas as pd +import pytest -from anndata import AnnData, read_loom, read_h5ad +from anndata import AnnData, read_h5ad, read_loom from anndata.tests.helpers import gen_typed_df_t2_size X = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) diff --git a/anndata/tests/test_obsmvarm.py b/anndata/tests/test_obsmvarm.py index 1c08e7545..e1e802a9d 100644 --- a/anndata/tests/test_obsmvarm.py +++ b/anndata/tests/test_obsmvarm.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import joblib import numpy as np import pandas as pd diff --git a/anndata/tests/test_obspvarp.py b/anndata/tests/test_obspvarp.py index 8ff025d76..5b3e063d3 100644 --- a/anndata/tests/test_obspvarp.py +++ b/anndata/tests/test_obspvarp.py @@ -1,4 +1,6 @@ # TODO: These tests should share code with test_layers, and test_obsmvarm +from __future__ import annotations + import warnings import joblib diff --git a/anndata/tests/test_raw.py b/anndata/tests/test_raw.py index 5686a4edc..7e4689d60 100644 --- a/anndata/tests/test_raw.py +++ b/anndata/tests/test_raw.py @@ -1,10 +1,11 @@ +from __future__ import annotations + import numpy as np import pytest import anndata as ad from anndata._core.anndata import ImplicitModificationWarning -from anndata.tests.helpers import assert_equal, gen_adata, GEN_ADATA_DASK_ARGS - +from anndata.tests.helpers import GEN_ADATA_DASK_ARGS, assert_equal, gen_adata # ------------------------------------------------------------------------------- # Some test data diff --git a/anndata/tests/test_readwrite.py b/anndata/tests/test_readwrite.py index 05047fbea..49b718a42 100644 --- a/anndata/tests/test_readwrite.py +++ b/anndata/tests/test_readwrite.py @@ -1,26 +1,26 @@ +from __future__ import annotations + +import re +import warnings from contextlib import contextmanager from importlib.util import find_spec -from os import PathLike from pathlib import Path -import re from string import ascii_letters -import warnings import h5py import numpy as np import pandas as pd import pytest -from scipy.sparse import csr_matrix, csc_matrix import zarr +from scipy.sparse import csc_matrix, csr_matrix import anndata as ad from anndata._io.specs.registry import IORegistryError -from anndata.compat import _read_attr, DaskArray - +from anndata.compat import DaskArray, _read_attr from anndata.tests.helpers import ( - gen_adata, - assert_equal, as_dense_dask_array, + assert_equal, + gen_adata, pytest_8_raises, ) @@ -498,7 +498,7 @@ def md5_path(pth: Path) -> bytes: checksum.update(buf) return checksum.digest() - def hash_dir_contents(dir: Path) -> "dict[str, bytes]": + def hash_dir_contents(dir: Path) -> dict[str, bytes]: root_pth = str(dir) return { str(k)[len(root_pth) :]: md5_path(k) for k in dir.rglob("*") if k.is_file() @@ -661,7 +661,7 @@ def test_write_string_types(tmp_path, diskfmt): with pytest_8_raises(TypeError, match=r"writing key 'obs'") as exc_info: write(adata_pth) - assert str("b'c'") in str(exc_info.value) + assert "b'c'" in str(exc_info.value) @pytest.mark.parametrize( diff --git a/anndata/tests/test_repr.py b/anndata/tests/test_repr.py index 862b9cd65..18fffb74f 100644 --- a/anndata/tests/test_repr.py +++ b/anndata/tests/test_repr.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import re from string import ascii_letters diff --git a/anndata/tests/test_structured_arrays.py b/anndata/tests/test_structured_arrays.py index 580a72554..81b6be22f 100644 --- a/anndata/tests/test_structured_arrays.py +++ b/anndata/tests/test_structured_arrays.py @@ -1,12 +1,13 @@ -from itertools import product, combinations +from __future__ import annotations + +from itertools import combinations, product import numpy as np import pytest -from anndata.tests.helpers import gen_vstr_recarray - -from anndata import AnnData import anndata as ad +from anndata import AnnData +from anndata.tests.helpers import gen_vstr_recarray @pytest.fixture(params=["h5ad", "zarr"]) diff --git a/anndata/tests/test_transpose.py b/anndata/tests/test_transpose.py index a7c010b3e..8b86a9b23 100644 --- a/anndata/tests/test_transpose.py +++ b/anndata/tests/test_transpose.py @@ -1,8 +1,9 @@ -from scipy import sparse +from __future__ import annotations import pytest +from scipy import sparse -from anndata.tests.helpers import gen_adata, assert_equal +from anndata.tests.helpers import assert_equal, gen_adata def test_transpose_orig(): @@ -58,9 +59,7 @@ def test_transposed_contents(adata): else: assert adata.X is t.X is None - assert_equal( - {k: v.T for k, v in adata.layers.items()}, {k: v for k, v in t.layers.items()} - ) + assert_equal({k: v.T for k, v in adata.layers.items()}, dict(t.layers.items())) assert_equal(adata.obs, t.var) assert_equal(adata.var, t.obs) assert_equal(dict(adata.obsm), dict(t.varm)) diff --git a/anndata/tests/test_uns.py b/anndata/tests/test_uns.py index 013c0dea5..ef0f4f8fe 100644 --- a/anndata/tests/test_uns.py +++ b/anndata/tests/test_uns.py @@ -1,6 +1,7 @@ +from __future__ import annotations + import numpy as np import pandas as pd - import pytest from anndata import AnnData diff --git a/anndata/tests/test_utils.py b/anndata/tests/test_utils.py index 7d766f304..f57fc5d6e 100644 --- a/anndata/tests/test_utils.py +++ b/anndata/tests/test_utils.py @@ -1,11 +1,14 @@ -import pandas as pd -from scipy import sparse +from __future__ import annotations + from itertools import repeat + +import pandas as pd import pytest +from scipy import sparse import anndata as ad -from anndata.utils import make_index_unique from anndata.tests.helpers import gen_typed_df +from anndata.utils import make_index_unique def test_make_index_unique(): diff --git a/anndata/tests/test_views.py b/anndata/tests/test_views.py index 7945d17ad..02bd1c4fe 100644 --- a/anndata/tests/test_views.py +++ b/anndata/tests/test_views.py @@ -1,30 +1,31 @@ +from __future__ import annotations + from copy import deepcopy from operator import mul import joblib import numpy as np -from scipy import sparse import pandas as pd import pytest +from dask.base import normalize_token, tokenize +from scipy import sparse import anndata as ad from anndata._core.index import _normalize_index -from anndata._core.views import ArrayView, SparseCSRView, SparseCSCView +from anndata._core.views import ArrayView, SparseCSCView, SparseCSRView from anndata.compat import CupyCSCMatrix -from anndata.utils import asarray from anndata.tests.helpers import ( - gen_adata, - subset_func, - slice_subset, - single_subset, - assert_equal, - GEN_ADATA_DASK_ARGS, BASE_MATRIX_PARAMS, - DASK_MATRIX_PARAMS, CUPY_MATRIX_PARAMS, + DASK_MATRIX_PARAMS, + GEN_ADATA_DASK_ARGS, + assert_equal, + gen_adata, + single_subset, + slice_subset, + subset_func, ) -from dask.base import tokenize, normalize_token - +from anndata.utils import asarray # ------------------------------------------------------------------------------ # Some test data @@ -277,7 +278,7 @@ def test_not_set_subset_X(matrix_type_base, subset_func): assert subset.is_view subset.X[:, internal_idx] = 1 assert not subset.is_view - assert not np.any(asarray(adata.X != orig_X_val)) + assert not np.any(asarray(orig_X_val != adata.X)) assert init_hash == joblib.hash(adata) @@ -317,7 +318,7 @@ def test_not_set_subset_X_dask(matrix_type_no_gpu, subset_func): assert subset.is_view subset.X[:, internal_idx] = 1 assert not subset.is_view - assert not np.any(asarray(adata.X != orig_X_val)) + assert not np.any(asarray(orig_X_val != adata.X)) assert init_hash == tokenize(adata) @@ -336,11 +337,11 @@ def test_set_scalar_subset_X(matrix_type, subset_func): if isinstance(adata.X, CupyCSCMatrix): # Comparison broken for CSC matrices # https://github.com/cupy/cupy/issues/7757 - assert asarray((orig_X_val.tocsr() != adata.X.tocsr())).sum() == mul( + assert asarray(orig_X_val.tocsr() != adata.X.tocsr()).sum() == mul( *adata_subset.shape ) else: - assert asarray((orig_X_val != adata.X)).sum() == mul(*adata_subset.shape) + assert asarray(orig_X_val != adata.X).sum() == mul(*adata_subset.shape) # TODO: Use different kind of subsetting for adata and view @@ -634,10 +635,7 @@ def test_view_mixin_copies_data(adata, array_type: type, attr): view = adata[:50] - if attr == "X": - arr_view = view.X - else: - arr_view = getattr(view, attr)["arr"] + arr_view = view.X if attr == "X" else getattr(view, attr)["arr"] arr_view_copy = arr_view.copy() diff --git a/anndata/tests/test_x.py b/anndata/tests/test_x.py index 9ec8800e6..87766a24f 100644 --- a/anndata/tests/test_x.py +++ b/anndata/tests/test_x.py @@ -1,16 +1,16 @@ """Tests for the attribute .X""" +from __future__ import annotations + import numpy as np import pandas as pd +import pytest from scipy import sparse import anndata as ad from anndata import AnnData +from anndata.tests.helpers import assert_equal, gen_adata from anndata.utils import asarray -import pytest - -from anndata.tests.helpers import gen_adata, assert_equal - UNLABELLED_ARRAY_TYPES = [ pytest.param(sparse.csr_matrix, id="csr"), pytest.param(sparse.csc_matrix, id="csc"), @@ -91,7 +91,7 @@ def test_init_x_as_none_explicit_shape(): assert adata.shape == shape -@pytest.mark.parametrize("shape", SINGULAR_SHAPES + [pytest.param((5, 3), id="(5, 3)")]) +@pytest.mark.parametrize("shape", [*SINGULAR_SHAPES, pytest.param((5, 3), id="(5, 3)")]) def test_transpose_with_X_as_none(shape): adata = gen_adata(shape, X_type=lambda x: None) adataT = adata.transpose() diff --git a/anndata/utils.py b/anndata/utils.py index fa7e3810d..269e91173 100644 --- a/anndata/utils.py +++ b/anndata/utils.py @@ -1,15 +1,20 @@ +from __future__ import annotations + import warnings -from functools import wraps, singledispatch -from typing import Mapping, Any, Sequence, Union +from functools import singledispatch, wraps +from typing import TYPE_CHECKING, Any import h5py -import pandas as pd import numpy as np +import pandas as pd from scipy import sparse -from .logging import get_logger from ._core.sparse_dataset import BaseCompressedSparseDataset from .compat import CupyArray, CupySparseMatrix +from .logging import get_logger + +if TYPE_CHECKING: + from collections.abc import Mapping, Sequence logger = get_logger(__name__) @@ -58,11 +63,9 @@ def convert_to_dict_dict(obj: dict): @convert_to_dict.register(np.ndarray) def convert_to_dict_ndarray(obj: np.ndarray): if obj.dtype.fields is None: - raise TypeError( - "Can only convert np.ndarray with compound dtypes to dict, " - f"passed array had “{obj.dtype}”." - ) - return {k: obj[k] for k in obj.dtype.fields.keys()} + msg = f"Can only convert np.ndarray with compound dtypes to dict, passed array had “{obj.dtype}”." + raise TypeError(msg) + return {k: obj[k] for k in obj.dtype.fields} @convert_to_dict.register(type(None)) @@ -88,13 +91,11 @@ def _size_at_depth(layout, depth, lateral_context, **kwargs): if layout.is_numpy: # if it's an embedded rectilinear array, we have to deal with its shape # which might not be 1-dimensional - if layout.is_unknown: - shape = (0,) - else: - shape = layout.shape + shape = (0,) if layout.is_unknown else layout.shape numpy_axis = lateral_context["axis"] - depth + 1 if not (1 <= numpy_axis < len(shape)): - raise TypeError(f"axis={lateral_context['axis']} is too deep") + msg = f"axis={lateral_context['axis']} is too deep" + raise TypeError(msg) lateral_context["out"] = shape[numpy_axis] return ak.contents.EmptyArray() @@ -103,7 +104,8 @@ def _size_at_depth(layout, depth, lateral_context, **kwargs): # Strings are implemented like an array of lists of uint8 (ListType(NumpyType(...))) # which results in an extra hierarchy-level that shouldn't show up in dim_len # See https://github.com/scikit-hep/awkward/discussions/1654#discussioncomment-3736747 - raise TypeError(f"axis={lateral_context['axis']} is too deep") + msg = f"axis={lateral_context['axis']} is too deep" + raise TypeError(msg) if layout.is_regular: # if it's a regular list, you want the size @@ -121,9 +123,8 @@ def _size_at_depth(layout, depth, lateral_context, **kwargs): # currently, we don't recurse into records # in theory we could, just not sure how to do it at the moment # Would need to consider cases like: scalars, unevenly sized values - raise TypeError( - f"Cannot recurse into record type found at axis={lateral_context['axis']}" - ) + msg = f"Cannot recurse into record type found at axis={lateral_context['axis']}" + raise TypeError(msg) elif layout.is_union: # if it's a union, you could get the result of each union branch @@ -154,7 +155,8 @@ def dim_len_awkward(array, axis): Code adapted from @jpivarski's solution in https://github.com/scikit-hep/awkward/discussions/1654#discussioncomment-3521574 """ if axis < 0: # negative axis is another can of worms... maybe later - raise NotImplementedError("Does not support negative axis") + msg = "Does not support negative axis" + raise NotImplementedError(msg) elif axis == 0: return len(array) else: @@ -253,7 +255,7 @@ def warn_names_duplicates(attr: str): def ensure_df_homogeneous( df: pd.DataFrame, name: str -) -> Union[np.ndarray, sparse.csr_matrix]: +) -> np.ndarray | sparse.csr_matrix: # TODO: rename this function, I would not expect this to return a non-dataframe if all(isinstance(dt, pd.SparseDtype) for dt in df.dtypes): arr = df.sparse.to_coo().tocsr() @@ -274,10 +276,8 @@ def convert_dictionary_to_structured_array(source: Mapping[str, Sequence[Any]]): for col in source.values() ] except UnicodeEncodeError as e: - raise ValueError( - "Currently only support ascii strings. " - "Don’t use “ö” etc. for sample annotation." - ) from e + msg = "Currently only support ascii strings. Don’t use “ö” etc. for sample annotation." + raise ValueError(msg) from e # if old_index_key not in source: # names.append(new_index_key) diff --git a/benchmarks/benchmarks/readwrite.py b/benchmarks/benchmarks/readwrite.py index a19bd5218..e2032d278 100644 --- a/benchmarks/benchmarks/readwrite.py +++ b/benchmarks/benchmarks/readwrite.py @@ -19,21 +19,21 @@ * io for backed objects * Reading dense as sparse, writing sparse as dense """ +from __future__ import annotations + +import sys import tempfile from pathlib import Path -import sys from typing import ClassVar -from memory_profiler import memory_usage import numpy as np import pooch - -from .utils import sedate, get_peak_mem, get_actualsize +from memory_profiler import memory_usage # from . import datasets - import anndata +from .utils import get_actualsize, get_peak_mem, sedate PBMC_3K_URL = "http://falexwolf.de/data/pbmc3k_raw.h5ad" diff --git a/benchmarks/benchmarks/utils.py b/benchmarks/benchmarks/utils.py index 8d16da522..62b4f66e6 100644 --- a/benchmarks/benchmarks/utils.py +++ b/benchmarks/benchmarks/utils.py @@ -1,12 +1,14 @@ +from __future__ import annotations + +import gc +import sys from string import ascii_lowercase from time import sleep -from memory_profiler import memory_usage import numpy as np import pandas as pd +from memory_profiler import memory_usage from scipy import sparse -import sys -import gc from anndata import AnnData @@ -61,9 +63,8 @@ def gen_indexer(adata, dim, index_kind, ratio): index_kinds = {"slice", "intarray", "boolarray", "strarray"} if index_kind not in index_kinds: - raise ValueError( - f"Argument 'index_kind' must be one of {index_kinds}. Was {index_kind}." - ) + msg = f"Argument 'index_kind' must be one of {index_kinds}. Was {index_kind}." + raise ValueError(msg) axis = dimnames.index(dim) subset = [slice(None), slice(None)] @@ -95,7 +96,7 @@ def gen_indexer(adata, dim, index_kind, ratio): def take_repeated_view(adata, *, dim, index_kind, ratio=0.9, nviews=10): v = adata views = [] - for i in range(nviews): + for _i in range(nviews): subset = gen_indexer(v, dim, index_kind, ratio) v = v[subset] views.append(v) @@ -114,10 +115,10 @@ def gen_adata(n_obs, n_var, attr_set): if "obs,var" in attr_set: adata.obs = pd.DataFrame( {k: np.random.randint(0, 100, n_obs) for k in ascii_lowercase}, - index=["cell{}".format(i) for i in range(n_obs)], + index=[f"cell{i}" for i in range(n_obs)], ) adata.var = pd.DataFrame( {k: np.random.randint(0, 100, n_var) for k in ascii_lowercase}, - index=["gene{}".format(i) for i in range(n_var)], + index=[f"gene{i}" for i in range(n_var)], ) return adata diff --git a/conftest.py b/conftest.py index fe4dace31..a98969b53 100644 --- a/conftest.py +++ b/conftest.py @@ -2,13 +2,16 @@ # 1. to allow ignoring warnings without test collection failing on CI # 2. as a pytest plugin/config that applies to doctests as well # TODO: Fix that, e.g. with the `pytest -p anndata.testing._pytest` pattern. +from __future__ import annotations -from pathlib import Path +from typing import TYPE_CHECKING import pytest from anndata.compat import chdir +if TYPE_CHECKING: + from pathlib import Path doctest_marker = pytest.mark.usefixtures("doctest_env") @@ -27,7 +30,7 @@ def pytest_itemcollected(item): """Define behavior of pytest.mark.gpu and doctests.""" from importlib.util import find_spec - is_gpu = len([mark for mark in item.iter_markers(name="gpu")]) > 0 + is_gpu = len(list(item.iter_markers(name="gpu"))) > 0 if is_gpu: item.add_marker( pytest.mark.skipif(not find_spec("cupy"), reason="Cupy not installed.") diff --git a/docs/conf.py b/docs/conf.py index 0e2c3f96a..a25b0f6cf 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -1,9 +1,13 @@ +from __future__ import annotations + import sys -from pathlib import Path from datetime import datetime from importlib import metadata +from pathlib import Path +from typing import TYPE_CHECKING -from sphinx.application import Sphinx +if TYPE_CHECKING: + from sphinx.application import Sphinx HERE = Path(__file__).parent sys.path[:0] = [str(HERE / "extensions")]