Skip to content

Commit

Permalink
Merge branch 'main' into fix-pollution
Browse files Browse the repository at this point in the history
  • Loading branch information
flying-sheep authored Aug 31, 2023
2 parents 76e1c26 + 22f33bb commit 1200b83
Show file tree
Hide file tree
Showing 41 changed files with 481 additions and 154 deletions.
12 changes: 6 additions & 6 deletions .azure-pipelines.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,13 @@ jobs:
vmImage: "ubuntu-22.04"
strategy:
matrix:
Python310:
python.version: "3.10"
Python3.11:
python.version: "3.11"
RUN_COVERAGE: yes
Python38:
Python3.8:
python.version: "3.8"
PreRelease:
python.version: "3.10"
python.version: "3.11"
PRERELEASE_DEPENDENCIES: yes
steps:
- task: UsePythonVersion@0
Expand Down Expand Up @@ -85,8 +85,8 @@ jobs:
steps:
- task: UsePythonVersion@0
inputs:
versionSpec: "3.10"
displayName: "Use Python 3.10"
versionSpec: "3.11"
displayName: "Use Python 3.11"

- script: |
python -m pip install --upgrade pip
Expand Down
8 changes: 6 additions & 2 deletions .github/workflows/check-pr-milestoned.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,16 @@ on:
- synchronize

env:
LABELS: ${{ join( github.event.pull_request.labels.*.name, '|' ) }}
LABELS: ${{ join(github.event.pull_request.labels.*.name, '|') }}

jobs:
check-milestone:
name: "Triage: Check Milestone"
runs-on: ubuntu-latest
steps:
- if: github.event.pull_request.milestone == null && contains( env.LABELS, 'no milestone' ) == false
- name: Check if merging isn’t blocked
if: contains(env.LABELS, 'DON’T MERGE')
run: exit 1
- name: Check if a milestone is necessary and exists
if: github.event.pull_request.milestone == null && contains(env.LABELS, 'no milestone') == false
run: exit 1
22 changes: 22 additions & 0 deletions .github/workflows/codespell.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
---
name: Codespell

on:
push:
branches: [main]
pull_request:
branches: [main]

permissions:
contents: read

jobs:
codespell:
name: Check for spelling errors
runs-on: ubuntu-latest

steps:
- name: Checkout
uses: actions/checkout@v3
- name: Codespell
uses: codespell-project/actions-codespell@v2
2 changes: 1 addition & 1 deletion .github/workflows/test-gpu.yml
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ jobs:
micromamba-version: "1.3.1-0"
environment-name: anndata-gpu-ci
create-args: >-
python=3.10
python=3.11
cupy
numba
pytest
Expand Down
11 changes: 9 additions & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,12 @@ repos:
- id: black
- repo: https://github.com/astral-sh/ruff-pre-commit
# Ruff version.
rev: "v0.0.282"
rev: "v0.0.286"
hooks:
- id: ruff
args: ["--fix"]
- repo: https://github.com/pre-commit/mirrors-prettier
rev: v3.0.1
rev: v3.0.2
hooks:
- id: prettier
- repo: https://github.com/pre-commit/pre-commit-hooks
Expand All @@ -26,3 +26,10 @@ repos:
- id: detect-private-key
- id: no-commit-to-branch
args: ["--branch=main"]

- repo: https://github.com/codespell-project/codespell
rev: v2.2.5
hooks:
- id: codespell
additional_dependencies:
- tomli
2 changes: 1 addition & 1 deletion .readthedocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ version: 2
build:
os: ubuntu-20.04
tools:
python: "3.10"
python: "3.11"
sphinx:
configuration: docs/conf.py
fail_on_warning: true # do not change or you will be fired
Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
[![Coverage](https://codecov.io/gh/scverse/anndata/branch/main/graph/badge.svg?token=IN1mJN1Wi8)](https://codecov.io/gh/scverse/anndata)
[![Docs](https://readthedocs.com/projects/icb-anndata/badge/?version=latest)](https://anndata.readthedocs.io)
[![PyPI](https://img.shields.io/pypi/v/anndata.svg)](https://pypi.org/project/anndata)
[![PyPIDownloadsMonth](https://img.shields.io/pypi/dm/scanpy?logo=PyPI&color=blue)](https://pypi.org/project/anndata)
[![PyPIDownloadsTotal](https://pepy.tech/badge/anndata)](https://pepy.tech/project/anndata)
[![Downloads](https://static.pepy.tech/badge/anndata/month)](https://pepy.tech/project/anndata)
[![Downloads](https://static.pepy.tech/badge/anndata)](https://pepy.tech/project/anndata)
[![Stars](https://img.shields.io/github/stars/scverse/anndata?logo=GitHub&color=yellow)](https://github.com/scverse/anndata/stargazers)
[![Powered by NumFOCUS](https://img.shields.io/badge/powered%20by-NumFOCUS-orange.svg?style=flat&colorA=E1523D&colorB=007D8A)](http://numfocus.org)

Expand Down
20 changes: 18 additions & 2 deletions anndata/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,13 @@
"anndata is not correctly installed. Please install it, e.g. with pip."
)

# Allowing notes to be added to exceptions. See: https://github.com/scverse/anndata/issues/868
import sys

if sys.version_info < (3, 11):
# Backport package for exception groups
import exceptiongroup # noqa: F401

from ._core.anndata import AnnData
from ._core.merge import concat
from ._core.raw import Raw
Expand All @@ -34,8 +41,17 @@
)
from . import experimental

# backwards compat / shortcut for default format
read = read_h5ad

def read(*args, **kwargs):
import warnings

warnings.warn(
"`anndata.read` is deprecated, use `anndata.read_h5ad` instead. "
"`ad.read` will be removed in mid 2024.",
FutureWarning,
)
return read_h5ad(*args, **kwargs)


__all__ = [
"__version__",
Expand Down
15 changes: 10 additions & 5 deletions anndata/_core/aligned_mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,12 +244,17 @@ def _validate_value(self, val: V, key: str) -> V:
if (
hasattr(val, "index")
and isinstance(val.index, cabc.Collection)
and not (val.index == self.dim_names).all()
and not val.index.equals(self.dim_names)
):
# Could probably also re-order index if it’s contained
raise ValueError(
f"value.index does not match parent’s axis {self.axes[0]} names"
)
try:
pd.testing.assert_index_equal(val.index, self.dim_names)
except AssertionError as e:
msg = f"value.index does not match parent’s axis {self.axes[0]} names:\n{e}"
raise ValueError(msg) from None
else:
msg = "Index.equals and pd.testing.assert_index_equal disagree"
raise AssertionError(msg)
return super()._validate_value(val, key)

@property
Expand Down Expand Up @@ -300,7 +305,7 @@ class LayersBase(AlignedMapping):
attrname = "layers"
axes = (0, 1)

# TODO: I thought I had a more elegant solution to overiding this...
# TODO: I thought I had a more elegant solution to overriding this...
def copy(self) -> "Layers":
d = self._actual_class(self.parent)
for k, v in self.items():
Expand Down
18 changes: 11 additions & 7 deletions anndata/_core/anndata.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
"""\
Main class and helper functions.
"""
from __future__ import annotations

import warnings
import collections.abc as cabc
from collections import OrderedDict
Expand All @@ -19,7 +21,7 @@
import numpy as np
from numpy import ma
import pandas as pd
from pandas.api.types import infer_dtype, is_string_dtype, is_categorical_dtype
from pandas.api.types import infer_dtype, is_string_dtype
from scipy import sparse
from scipy.sparse import issparse, csr_matrix

Expand Down Expand Up @@ -648,7 +650,7 @@ def X(self, value: Optional[Union[np.ndarray, sparse.spmatrix]]):

# If indices are both arrays, we need to modify them
# so we don’t set values like coordinates
# This can occur if there are succesive views
# This can occur if there are successive views
if (
self.is_view
and isinstance(self._oidx, np.ndarray)
Expand All @@ -665,7 +667,7 @@ def X(self, value: Optional[Union[np.ndarray, sparse.spmatrix]]):
):
if not np.isscalar(value) and self.shape != value.shape:
# For assigning vector of values to 2d array or matrix
# Not neccesary for row of 2d array
# Not necessary for row of 2d array
value = value.reshape(self.shape)
if self.isbacked:
if self.is_view:
Expand Down Expand Up @@ -1114,9 +1116,11 @@ def __getitem__(self, index: Index) -> "AnnData":
oidx, vidx = self._normalize_indices(index)
return AnnData(self, oidx=oidx, vidx=vidx, asview=True)

def _remove_unused_categories(self, df_full, df_sub, uns):
def _remove_unused_categories(
self, df_full: pd.DataFrame, df_sub: pd.DataFrame, uns: dict[str, Any]
):
for k in df_full:
if not is_categorical_dtype(df_full[k]):
if not isinstance(df_full[k].dtype, pd.CategoricalDtype):
continue
all_categories = df_full[k].cat.categories
with pd.option_context("mode.chained_assignment", None):
Expand Down Expand Up @@ -1373,7 +1377,7 @@ def obs_vector(self, k: str, *, layer: Optional[str] = None) -> np.ndarray:
Returns
-------
A one dimensional nd array, with values for each obs in the same order
A one dimensional ndarray, with values for each obs in the same order
as :attr:`obs_names`.
"""
if layer == "X":
Expand Down Expand Up @@ -1405,7 +1409,7 @@ def var_vector(self, k, *, layer: Optional[str] = None) -> np.ndarray:
Returns
-------
A one dimensional nd array, with values for each var in the same order
A one dimensional ndarray, with values for each var in the same order
as :attr:`var_names`.
"""
if layer == "X":
Expand Down
44 changes: 22 additions & 22 deletions anndata/_core/merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,27 +4,25 @@
from __future__ import annotations

from collections import OrderedDict
from collections.abc import Mapping, MutableSet
from functools import reduce, singledispatch
from itertools import repeat
from operator import and_, or_, sub
from typing import (
Any,
from collections.abc import (
Callable,
Collection,
Mapping,
MutableSet,
Iterable,
Optional,
Tuple,
TypeVar,
Union,
Literal,
Sequence,
)
from functools import reduce, singledispatch
from itertools import repeat
from operator import and_, or_, sub
from typing import Any, Optional, TypeVar, Union, Literal
import typing
from warnings import warn, filterwarnings

from natsort import natsorted
import numpy as np
import pandas as pd
from pandas.api.extensions import ExtensionDtype
from scipy import sparse
from scipy.sparse import spmatrix

Expand Down Expand Up @@ -96,7 +94,7 @@ def not_missing(v) -> bool:


# We need to be able to check for equality of arrays to know which are the same.
# Unfortunatley equality of arrays is poorly defined.
# Unfortunately equality of arrays is poorly defined.
# * `np.array_equal` does not work for sparse arrays
# * `np.array_equal(..., equal_nan=True)` does not work for null values at the moment
# (see https://github.com/numpy/numpy/issues/16377)
Expand Down Expand Up @@ -211,7 +209,7 @@ def unify_dtypes(dfs: Iterable[pd.DataFrame]) -> list[pd.DataFrame]:
df_dtypes = [dict(df.dtypes) for df in dfs]
columns = reduce(lambda x, y: x.union(y), [df.columns for df in dfs])

dtypes = {col: list() for col in columns}
dtypes: dict[str, list[np.dtype | ExtensionDtype]] = {col: [] for col in columns}
for col in columns:
for df in df_dtypes:
dtypes[col].append(df.get(col, None))
Expand All @@ -235,7 +233,9 @@ def unify_dtypes(dfs: Iterable[pd.DataFrame]) -> list[pd.DataFrame]:
return dfs


def try_unifying_dtype(col: list) -> pd.core.dtypes.base.ExtensionDtype | None:
def try_unifying_dtype(
col: Sequence[np.dtype | ExtensionDtype],
) -> pd.core.dtypes.base.ExtensionDtype | None:
"""
If dtypes can be unified, returns the dtype they would be unified to.
Expand All @@ -248,26 +248,26 @@ def try_unifying_dtype(col: list) -> pd.core.dtypes.base.ExtensionDtype | None:
A list of dtypes to unify. Can be numpy/ pandas dtypes, or None (which denotes
a missing value)
"""
dtypes = set()
dtypes: set[pd.CategoricalDtype] = set()
# Categorical
if any([pd.api.types.is_categorical_dtype(x) for x in col]):
if any(isinstance(dtype, pd.CategoricalDtype) for dtype in col):
ordered = False
for dtype in col:
if pd.api.types.is_categorical_dtype(dtype):
if isinstance(dtype, pd.CategoricalDtype):
dtypes.add(dtype)
ordered = ordered | dtype.ordered
elif not pd.isnull(dtype):
return False
if len(dtypes) > 0 and not ordered:
categories = reduce(
lambda x, y: x.union(y),
[x.categories for x in dtypes if not pd.isnull(x)],
[dtype.categories for dtype in dtypes if not pd.isnull(dtype)],
)

return pd.CategoricalDtype(natsorted(categories), ordered=False)
# Boolean
elif all([pd.api.types.is_bool_dtype(x) or x is None for x in col]):
if any([x is None for x in col]):
elif all(pd.api.types.is_bool_dtype(dtype) or dtype is None for dtype in col):
if any(dtype is None for dtype in col):
return pd.BooleanDtype()
else:
return None
Expand Down Expand Up @@ -942,7 +942,7 @@ def merge_outer(mappings, batch_keys, *, join_index="-", merge=merge_unique):
return out


def _resolve_dim(*, dim: str = None, axis: int = None) -> Tuple[int, str]:
def _resolve_dim(*, dim: str = None, axis: int = None) -> tuple[int, str]:
_dims = ("obs", "var")
if (dim is None and axis is None) or (dim is not None and axis is not None):
raise ValueError(
Expand Down Expand Up @@ -1042,7 +1042,7 @@ def concat(
incrementing integer labels.
index_unique
Whether to make the index unique by using the keys. If provided, this
is the delimeter between "{orig_idx}{index_unique}{key}". When `None`,
is the delimiter between "{orig_idx}{index_unique}{key}". When `None`,
the original indices are kept.
fill_value
When `join="outer"`, this is the value that will be used to fill the introduced
Expand Down
6 changes: 5 additions & 1 deletion anndata/_core/raw.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,11 @@ def __init__(
self._n_obs = adata.n_obs
# construct manually
if adata.isbacked == (X is None):
self._X = X
# Move from GPU to CPU since it's large and not always used
if isinstance(X, (CupyArray, CupySparseMatrix)):
self._X = X.get()
else:
self._X = X
self._var = _gen_dataframe(var, self.X.shape[1], ["var_names"])
self._varm = AxisArrays(self, 1, varm)
elif X is None: # construct from adata
Expand Down
2 changes: 1 addition & 1 deletion anndata/_io/read.py
Original file line number Diff line number Diff line change
Expand Up @@ -441,7 +441,7 @@ def _read_text(
else:
data.append(np.array(line_list, dtype=dtype))
# logg.msg(" read data into list of lists", t=True, v=4)
# transfrom to array, this takes a long time and a lot of memory
# transform to array, this takes a long time and a lot of memory
# but it’s actually the same thing as np.genfromtxt does
# - we don’t use the latter as it would involve another slicing step
# in the end, to separate row_names from float data, slicing takes
Expand Down
Loading

0 comments on commit 1200b83

Please sign in to comment.