Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

anndata 0.8 compat #8

Merged
merged 9 commits into from
May 24, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion mudata/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,6 @@
from ._core.io import *
from ._core.config import set_options

__version__ = "0.1.2"
__version__ = "0.2.0"
__anndataversion__ = "0.1.0"
__mudataversion__ = "0.1.0"
166 changes: 66 additions & 100 deletions mudata/_core/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,29 +28,29 @@


def _write_h5mu(file: h5py.File, mdata: MuData, write_data=True, **kwargs):
from anndata._io.utils import write_attribute
from anndata._io.specs.registry import write_elem
from .. import __version__, __mudataversion__, __anndataversion__

write_attribute(
write_elem(
file,
"obs",
mdata.strings_to_categoricals(mdata._shrink_attr("obs", inplace=False)),
dataset_kwargs=kwargs,
)
write_attribute(
write_elem(
file,
"var",
mdata.strings_to_categoricals(mdata._shrink_attr("var", inplace=False)),
dataset_kwargs=kwargs,
)
write_attribute(file, "obsm", mdata.obsm, dataset_kwargs=kwargs)
write_attribute(file, "varm", mdata.varm, dataset_kwargs=kwargs)
write_attribute(file, "obsp", mdata.obsp, dataset_kwargs=kwargs)
write_attribute(file, "varp", mdata.varp, dataset_kwargs=kwargs)
write_attribute(file, "uns", mdata.uns, dataset_kwargs=kwargs)
write_elem(file, "obsm", dict(mdata.obsm), dataset_kwargs=kwargs)
write_elem(file, "varm", dict(mdata.varm), dataset_kwargs=kwargs)
write_elem(file, "obsp", dict(mdata.obsp), dataset_kwargs=kwargs)
write_elem(file, "varp", dict(mdata.varp), dataset_kwargs=kwargs)
write_elem(file, "uns", dict(mdata.uns), dataset_kwargs=kwargs)

write_attribute(file, "obsmap", mdata.obsmap, dataset_kwargs=kwargs)
write_attribute(file, "varmap", mdata.varmap, dataset_kwargs=kwargs)
write_elem(file, "obsmap", dict(mdata.obsmap), dataset_kwargs=kwargs)
write_elem(file, "varmap", dict(mdata.varmap), dataset_kwargs=kwargs)

attrs = file.attrs
attrs["axis"] = mdata.axis
Expand All @@ -66,18 +66,18 @@ def _write_h5mu(file: h5py.File, mdata: MuData, write_data=True, **kwargs):
adata.strings_to_categoricals(adata.raw.var)

if write_data:
write_attribute(group, "X", adata.X, dataset_kwargs=kwargs)
write_elem(group, "X", adata.X, dataset_kwargs=kwargs)
if adata.raw is not None:
write_h5ad_raw(group, "raw", adata.raw)
write_elem(group, "raw", adata.raw)

write_attribute(group, "obs", adata.obs, dataset_kwargs=kwargs)
write_attribute(group, "var", adata.var, dataset_kwargs=kwargs)
write_attribute(group, "obsm", adata.obsm, dataset_kwargs=kwargs)
write_attribute(group, "varm", adata.varm, dataset_kwargs=kwargs)
write_attribute(group, "obsp", adata.obsp, dataset_kwargs=kwargs)
write_attribute(group, "varp", adata.varp, dataset_kwargs=kwargs)
write_attribute(group, "layers", adata.layers, dataset_kwargs=kwargs)
write_attribute(group, "uns", adata.uns, dataset_kwargs=kwargs)
write_elem(group, "obs", adata.obs, dataset_kwargs=kwargs)
write_elem(group, "var", adata.var, dataset_kwargs=kwargs)
write_elem(group, "obsm", dict(adata.obsm), dataset_kwargs=kwargs)
write_elem(group, "varm", dict(adata.varm), dataset_kwargs=kwargs)
write_elem(group, "obsp", dict(adata.obsp), dataset_kwargs=kwargs)
write_elem(group, "varp", dict(adata.varp), dataset_kwargs=kwargs)
write_elem(group, "layers", dict(adata.layers), dataset_kwargs=kwargs)
write_elem(group, "uns", dict(adata.uns), dataset_kwargs=kwargs)

attrs = group.attrs
attrs["encoding-type"] = "anndata"
Expand Down Expand Up @@ -112,7 +112,7 @@ def write_zarr(
Matrices - sparse or dense - are currently stored as they are.
"""
import zarr
from anndata._io.utils import write_attribute
from anndata._io.specs.registry import write_elem
from anndata._io.zarr import write_zarr as anndata_write_zarr
from .. import __version__, __mudataversion__, __anndataversion__

Expand All @@ -124,26 +124,26 @@ def write_zarr(
store = str(store)
file = zarr.open(store, mode="w")
mdata = data
write_attribute(
write_elem(
file,
"obs",
mdata.strings_to_categoricals(mdata._shrink_attr("obs", inplace=False)),
dataset_kwargs=kwargs,
)
write_attribute(
write_elem(
file,
"var",
mdata.strings_to_categoricals(mdata._shrink_attr("var", inplace=False)),
dataset_kwargs=kwargs,
)
write_attribute(file, "obsm", mdata.obsm, dataset_kwargs=kwargs)
write_attribute(file, "varm", mdata.varm, dataset_kwargs=kwargs)
write_attribute(file, "obsp", mdata.obsp, dataset_kwargs=kwargs)
write_attribute(file, "varp", mdata.varp, dataset_kwargs=kwargs)
write_attribute(file, "uns", mdata.uns, dataset_kwargs=kwargs)
write_elem(file, "obsm", dict(mdata.obsm), dataset_kwargs=kwargs)
write_elem(file, "varm", dict(mdata.varm), dataset_kwargs=kwargs)
write_elem(file, "obsp", dict(mdata.obsp), dataset_kwargs=kwargs)
write_elem(file, "varp", dict(mdata.varp), dataset_kwargs=kwargs)
write_elem(file, "uns", dict(mdata.uns), dataset_kwargs=kwargs)

write_attribute(file, "obsmap", mdata.obsmap, dataset_kwargs=kwargs)
write_attribute(file, "varmap", mdata.varmap, dataset_kwargs=kwargs)
write_elem(file, "obsmap", dict(mdata.obsmap), dataset_kwargs=kwargs)
write_elem(file, "varmap", dict(mdata.varmap), dataset_kwargs=kwargs)

attrs = file.attrs
attrs["axis"] = mdata.axis
Expand All @@ -160,22 +160,20 @@ def write_zarr(

if write_data:
if chunks is not None and not isinstance(adata.X, sparse.spmatrix):
write_attribute(
group, "X", adata.X, dataset_kwargs=dict(chunks=chunks, **kwargs)
)
write_elem(group, "X", adata.X, dataset_kwargs=dict(chunks=chunks, **kwargs))
else:
write_attribute(group, "X", adata.X, dataset_kwargs=kwargs)
write_elem(group, "X", adata.X, dataset_kwargs=kwargs)
if adata.raw is not None:
write_zarr_raw(group, "raw", adata.raw)
write_elem(group, "raw", adata.raw)

write_attribute(group, "obs", adata.obs, dataset_kwargs=kwargs)
write_attribute(group, "var", adata.var, dataset_kwargs=kwargs)
write_attribute(group, "obsm", adata.obsm, dataset_kwargs=kwargs)
write_attribute(group, "varm", adata.varm, dataset_kwargs=kwargs)
write_attribute(group, "obsp", adata.obsp, dataset_kwargs=kwargs)
write_attribute(group, "varp", adata.varp, dataset_kwargs=kwargs)
write_attribute(group, "layers", adata.layers, dataset_kwargs=kwargs)
write_attribute(group, "uns", adata.uns, dataset_kwargs=kwargs)
write_elem(group, "obs", adata.obs, dataset_kwargs=kwargs)
write_elem(group, "var", adata.var, dataset_kwargs=kwargs)
write_elem(group, "obsm", dict(adata.obsm), dataset_kwargs=kwargs)
write_elem(group, "varm", dict(adata.varm), dataset_kwargs=kwargs)
write_elem(group, "obsp", dict(adata.obsp), dataset_kwargs=kwargs)
write_elem(group, "varp", dict(adata.varp), dataset_kwargs=kwargs)
write_elem(group, "layers", dict(adata.layers), dataset_kwargs=kwargs)
write_elem(group, "uns", dict(adata.uns), dataset_kwargs=kwargs)

attrs = group.attrs
attrs["encoding-type"] = "anndata"
Expand Down Expand Up @@ -227,7 +225,7 @@ def write_h5ad(filename: PathLike, mod: str, data: Union[MuData, AnnData]):

Ideally this is merged later to anndata._io.h5ad.write_h5ad.
"""
from anndata._io.utils import write_attribute
from anndata._io.specs.registry import write_elem
from anndata._io.h5ad import write_h5ad
from .. import __version__, __anndataversion__

Expand Down Expand Up @@ -257,20 +255,20 @@ def write_h5ad(filename: PathLike, mod: str, data: Union[MuData, AnnData]):
filepath = Path(filename)

if not (adata.isbacked and Path(adata.filename) == Path(filepath)):
write_attribute(fmd, f"X", adata.X)
write_elem(fmd, f"X", adata.X)

# NOTE: Calling write_attribute() does not allow writing .raw into .h5mu modalities
# NOTE: Calling write_elem() does not allow writing .raw into .h5mu modalities
if adata.raw is not None:
write_h5ad_raw(f, f"mod/{mod}/raw", adata.raw)
write_elem(f, f"mod/{mod}/raw", adata.raw)

write_attribute(fmd, "obs", adata.obs)
write_attribute(fmd, "var", adata.var)
write_attribute(fmd, "obsm", adata.obsm)
write_attribute(fmd, "varm", adata.varm)
write_attribute(fmd, "obsp", adata.obsp)
write_attribute(fmd, "varp", adata.varp)
write_attribute(fmd, "layers", adata.layers)
write_attribute(fmd, "uns", adata.uns)
write_elem(fmd, "obs", adata.obs)
write_elem(fmd, "var", adata.var)
write_elem(fmd, "obsm", dict(adata.obsm))
write_elem(fmd, "varm", dict(adata.varm))
write_elem(fmd, "obsp", dict(adata.obsp))
write_elem(fmd, "varp", dict(adata.varp))
write_elem(fmd, "layers", dict(adata.layers))
write_elem(fmd, "uns", dict(adata.uns))

attrs = fmd.attrs
attrs["encoding-type"] = "anndata"
Expand All @@ -282,39 +280,6 @@ def write_h5ad(filename: PathLike, mod: str, data: Union[MuData, AnnData]):
write_anndata = write_h5ad


def write_h5ad_raw(f, key, raw, **kwargs):
"""
Replicates write_raw() in anndata/_io/h5ad.py but allow
to write raw slots to modalities inside .h5mu files
"""
from anndata._io.utils import write_attribute, EncodingVersions

group = f.create_group(key)
group.attrs["encoding-type"] = "raw"
group.attrs["encoding-version"] = EncodingVersions.raw.value
group.attrs["shape"] = raw.shape
write_attribute(f, f"{key}/X", raw.X, dataset_kwargs=kwargs)
write_attribute(f, f"{key}/var", raw.var, dataset_kwargs=kwargs)
write_attribute(f, f"{key}/varm", raw.varm, dataset_kwargs=kwargs)


def write_zarr_raw(f, key, raw, **kwargs):
"""
Replicates write_raw() in anndata/_io/zarr.py but allow
to write raw slots to modalities inside .zarr stores
"""
from anndata._io.zarr import write_attribute
from anndata._io.utils import EncodingVersions

group = f.create_group(key)
group.attrs["encoding-type"] = "raw"
group.attrs["encoding-version"] = EncodingVersions.raw.value
group.attrs["shape"] = raw.shape
write_attribute(f, f"{key}/X", raw.X, dataset_kwargs=kwargs)
write_attribute(f, f"{key}/var", raw.var, dataset_kwargs=kwargs)
write_attribute(f, f"{key}/varm", raw.varm, dataset_kwargs=kwargs)


def write(filename: PathLike, data: Union[MuData, AnnData]):
"""
Write MuData or AnnData to an HDF5 file
Expand Down Expand Up @@ -385,7 +350,7 @@ def read_h5mu(filename: PathLike, backed: Union[str, bool, None] = None):
"r+",
], "Argument `backed` should be boolean, or r/r+, or None"

from anndata._io.utils import read_attribute
from anndata._io.specs.registry import read_elem
from anndata._io.h5ad import read_dataframe

if backed is True or not backed:
Expand Down Expand Up @@ -425,7 +390,7 @@ def read_h5mu(filename: PathLike, backed: Union[str, bool, None] = None):

d[k] = mods
else:
d[k] = read_attribute(f[k])
d[k] = read_elem(f[k])

if "axis" in f.attrs:
d["axis"] = f.attrs["axis"]
Expand All @@ -444,8 +409,8 @@ def read_zarr(store: Union[str, Path, MutableMapping, zarr.Group]):
The filename, a :class:`~typing.MutableMapping`, or a Zarr storage class.
"""
import zarr
from anndata._io.specs.registry import read_elem
from anndata._io.zarr import (
read_attribute,
read_zarr as anndata_read_zarr,
read_dataframe,
_read_legacy_raw,
Expand All @@ -472,7 +437,7 @@ def read_zarr(store: Union[str, Path, MutableMapping, zarr.Group]):
mods[m] = ad
d[k] = mods
else: # Base case
d[k] = read_attribute(f[k])
d[k] = read_elem(f[k])

mu = MuData._init_from_dict_(**d)
mu.file = manager
Expand All @@ -482,7 +447,8 @@ def read_zarr(store: Union[str, Path, MutableMapping, zarr.Group]):

def _read_zarr_mod(g: zarr.Group, manager: MuDataFileManager = None, backed: bool = False) -> dict:
import zarr
from anndata._io.zarr import read_attribute, read_dataframe, _read_legacy_raw
from anndata._io.specs.registry import read_elem
from anndata._io.zarr import read_dataframe, _read_legacy_raw
from anndata import Raw

d = {}
Expand All @@ -500,9 +466,9 @@ def _read_zarr_mod(g: zarr.Group, manager: MuDataFileManager = None, backed: boo
raise ValueError()
d["dtype"] = dtype
if not backed:
d["X"] = read_attribute(X)
d["X"] = read_elem(X)
elif k != "raw":
d[k] = read_attribute(g[k])
d[k] = read_elem(g[k])
ad = AnnData(**d)
if manager is not None:
ad.file = AnnDataFileManager(ad, os.path.basename(g.name), manager)
Expand All @@ -511,7 +477,7 @@ def _read_zarr_mod(g: zarr.Group, manager: MuDataFileManager = None, backed: boo
g,
d.get("raw"),
read_dataframe,
read_attribute,
read_elem,
attrs=("var", "varm") if backed else ("var", "varm", "X"),
)
if raw:
Expand All @@ -522,7 +488,7 @@ def _read_zarr_mod(g: zarr.Group, manager: MuDataFileManager = None, backed: boo
def _read_h5mu_mod(
g: "h5py.Group", manager: MuDataFileManager = None, backed: bool = False
) -> dict:
from anndata._io.utils import read_attribute
from anndata._io.specs.registry import read_elem
from anndata._io.h5ad import read_dataframe, _read_raw
from anndata import Raw

Expand All @@ -541,9 +507,9 @@ def _read_h5mu_mod(
raise ValueError()
d["dtype"] = dtype
if not backed:
d["X"] = read_attribute(X)
d["X"] = read_elem(X)
elif k != "raw":
d[k] = read_attribute(g[k])
d[k] = read_elem(g[k])
ad = AnnData(**d)
if manager is not None:
ad.file = AnnDataFileManager(ad, os.path.basename(g.name), manager)
Expand Down Expand Up @@ -576,7 +542,7 @@ def read_h5ad(
"r+",
], "Argument `backed` should be boolean, or r/r+, or None"

from anndata._io.utils import read_attribute
from anndata._io.specs.registry import read_elem
from anndata._io.h5ad import read_dataframe, _read_raw

d = {}
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ requires = [
"numpy",
"pandas",
"h5py",
"anndata < 0.8",
"anndata >= 0.8",
]

[tool.flit.metadata.requires-extra]
Expand Down