From 1aaa6629535457788670fa7213e9ff747d79946b Mon Sep 17 00:00:00 2001
From: Luca Marconato <m.lucalmer@gmail.com>
Date: Fri, 29 Mar 2024 01:34:49 +0100
Subject: [PATCH 1/2] fix precommit

---
 CHANGELOG.md                                |   4 +
 README.md                                   |  11 +-
 docs/api.md                                 |  11 +-
 src/spatialdata_io/__init__.py              |   2 +
 src/spatialdata_io/_constants/_constants.py |  57 ++++
 src/spatialdata_io/readers/visium_hd.py     | 350 ++++++++++++++++++++
 6 files changed, 425 insertions(+), 10 deletions(-)
 create mode 100644 src/spatialdata_io/readers/visium_hd.py

diff --git a/CHANGELOG.md b/CHANGELOG.md
index fba2d92d..ca932661 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -10,6 +10,10 @@ and this project adheres to [Semantic Versioning][].
 
 ## [0.1.2] - xxxx-xx-xx
 
+### Added
+
+-   (Visium HD) added reader
+
 ### Fixed
 
 -   (Xenium) reader for 1.0.1 (paper data) and unknown versions
diff --git a/README.md b/README.md
index f6c817a3..b324dafd 100644
--- a/README.md
+++ b/README.md
@@ -12,15 +12,16 @@
 
 This package contains reader functions to load common spatial omics formats into SpatialData. Currently, we provide support for:
 
--   NanoString CosMx
--   MCMICRO (output data)
--   Steinbock (output data)
 -   10x Genomics Visium
+-   10x Genomics Visium HD
 -   10x Genomics Xenium
--   Curio Seeker
 -   Akoya PhenoCycler (formerly CODEX)
--   Vizgen MERSCOPE (MERFISH)
+-   Curio Seeker
 -   DBiT-seq
+-   MCMICRO (output data)
+-   NanoString CosMx
+-   Steinbock (output data)
+-   Vizgen MERSCOPE (MERFISH)
 
 ## Getting started
 
diff --git a/docs/api.md b/docs/api.md
index 280df0cb..a00ef427 100644
--- a/docs/api.md
+++ b/docs/api.md
@@ -15,14 +15,15 @@ I/O for the `spatialdata` project.
     :toctree: generated
 
     codex
-    curio
     cosmx
+    curio
+    dbit
+    mcmicro
+    merscope
+    steinbock
     visium
+    visium_hd
     xenium
-    steinbock
-    merscope
-    mcmicro
-    dbit
 ```
 
 ### Conversion functions
diff --git a/src/spatialdata_io/__init__.py b/src/spatialdata_io/__init__.py
index e4eb41ae..f2d3cfcb 100644
--- a/src/spatialdata_io/__init__.py
+++ b/src/spatialdata_io/__init__.py
@@ -8,6 +8,7 @@
 from spatialdata_io.readers.merscope import merscope
 from spatialdata_io.readers.steinbock import steinbock
 from spatialdata_io.readers.visium import visium
+from spatialdata_io.readers.visium_hd import visium_hd
 from spatialdata_io.readers.xenium import (
     xenium,
     xenium_aligned_image,
@@ -26,6 +27,7 @@
     "xenium_aligned_image",
     "xenium_explorer_selection",
     "dbit",
+    "visium_hd",
 ]
 
 __version__ = version("spatialdata-io")
diff --git a/src/spatialdata_io/_constants/_constants.py b/src/spatialdata_io/_constants/_constants.py
index c485214d..cd22c021 100644
--- a/src/spatialdata_io/_constants/_constants.py
+++ b/src/spatialdata_io/_constants/_constants.py
@@ -227,3 +227,60 @@ class DbitKeys(ModeEnum):
     BARCODE_POSITION = "barcode_list"
     # image
     IMAGE_LOWRES_FILE = "tissue_lowres_image.png"
+
+
+@unique
+class VisiumHDKeys(ModeEnum):
+    """Keys for *10X Genomics Visium hd* formatted dataset."""
+
+    # directories
+    SPATIAL = "spatial"
+    BINNED = "binned_outputs"
+    DEFAULT_BIN = "square_008um"
+    MICROSCOPE_IMAGE = "microscope_image"
+
+    # counts and locations files
+    FILTERED_COUNTS_FILE = "filtered_feature_bc_matrix.h5"
+    RAW_COUNTS_FILE = "raw_feature_bc_matrix.h5"
+    TISSUE_POSITIONS_FILE = "tissue_positions.parquet"
+
+    # images
+    IMAGE_HIRES_FILE = "spatial/tissue_hires_image.png"
+    IMAGE_LOWRES_FILE = "spatial/tissue_lowres_image.png"
+    IMAGE_CYTASSIST = "spatial/cytassist_image.tiff"
+
+    # scalefactors
+    SCALEFACTORS_FILE = "scalefactors_json.json"
+
+    # scalefactors keys
+    SCALEFACTORS_HIRES = "tissue_hires_scalef"
+    SCALEFACTORS_LOWRES = "tissue_lowres_scalef"
+    SCALEFACTORS_SPOT_DIAMETER_FULLRES = "spot_diameter_fullres"
+    SCALEFACTORS_BIN_SIZE_UM = "bin_size_um"
+    SCALEFACTORS_MICRONS_PER_PIXEL = "microns_per_pixel"
+
+    # locations keys
+    LOCATIONS_X = "pxl_col_in_fullres"
+    LOCATIONS_Y = "pxl_row_in_fullres"
+    BARCODE = "barcode"
+    IN_TISSUE = "in_tissue"
+    ARRAY_ROW = "array_row"
+    ARRAY_COL = "array_col"
+
+    # table keys
+    REGION_KEY = "region"
+    INSTANCE_KEY = "location_id"
+
+    # feature slice file (it contains transformation matrices in the metadata)
+    FEATURE_SLICE_FILE = "feature_slice.h5"
+
+    # METADATA_KEYS
+    METADATA_JSON = "metadata_json"
+    HD_LAYOUT_JSON = "hd_layout_json"
+    TRANSFORM = "transform"
+    TRANSFORM_MATRICES = "transform_matrices"
+    CYTASSIST_COLROW_TO_SPOT_COLROW = ("cytassist_colrow_to_spot_colrow",)
+    SPOT_COLROW_TO_CYTASSIST_COLROW = ("spot_colrow_to_cytassist_colrow",)
+    MICROSCOPE_COLROW_TO_SPOT_COLROW = ("microscope_colrow_to_spot_colrow",)
+    SPOT_COLROW_TO_MICROSCOPE_COLROW = ("spot_colrow_to_microscope_colrow",)
+    FILE_FORMAT = "file_format"
diff --git a/src/spatialdata_io/readers/visium_hd.py b/src/spatialdata_io/readers/visium_hd.py
new file mode 100644
index 00000000..23be547f
--- /dev/null
+++ b/src/spatialdata_io/readers/visium_hd.py
@@ -0,0 +1,350 @@
+from __future__ import annotations
+
+import json
+import os
+import re
+import warnings
+from collections.abc import Mapping
+from pathlib import Path
+from types import MappingProxyType
+from typing import Any
+
+import h5py
+import numpy as np
+import pandas as pd
+import scanpy as sc
+from dask_image.imread import imread
+from imageio import imread as imread2
+from multiscale_spatial_image import MultiscaleSpatialImage
+from spatial_image import SpatialImage
+from spatialdata import SpatialData
+from spatialdata.models import Image2DModel, ShapesModel, TableModel
+from spatialdata.transformations import (
+    Affine,
+    Identity,
+    Scale,
+    Sequence,
+    set_transformation,
+)
+from xarray import DataArray
+
+from spatialdata_io._constants._constants import VisiumHDKeys
+from spatialdata_io._docs import inject_docs
+
+
+@inject_docs(vx=VisiumHDKeys)
+def visium_hd(
+    path: str | Path,
+    dataset_id: str,
+    filtered_counts_file: bool = True,
+    bin_size: int | list[int] | None = None,
+    fullres_image_file: str | Path | None = None,
+    load_all_images: bool = False,
+    imread_kwargs: Mapping[str, Any] = MappingProxyType({}),
+    image_models_kwargs: Mapping[str, Any] = MappingProxyType({}),
+    anndata_kwargs: Mapping[str, Any] = MappingProxyType({}),
+) -> SpatialData:
+    """
+    Read *10x Genomics* Visium HD formatted dataset.
+
+    .. seealso::
+
+        - `Space Ranger output
+            <https://support.10xgenomics.com/spatial-gene-expression/software/pipelines/latest/output/overview>`_.
+
+    Parameters
+    ----------
+    path
+        Path to directory containing the *10x Genomics* Visium HD output.
+    dataset_id
+        Unique identifier of the dataset.
+    filtered_counts_file
+        It sets the value of `counts_file` to ``{vx.FILTERED_COUNTS_FILE!r}`` (when `True`) or to
+        ``{vx.RAW_COUNTS_FILE!r}`` (when `False`).
+    bin_size
+        When specified, load the data of a specific bin size, or a list of bin sizes. By default, it loads all the
+        available bin sizes.
+    fullres_image_file
+        Path to the full-resolution image. By default the image is searched in the ``{vx.MICROSCOPE_IMAGE!r}``
+        directory.
+    load_all_images
+        If `False`, load only the full resolution, high resolution and low resolution images. If `True`, also the
+        following images: ``{vx.IMAGE_CYTASSIST!r}``.
+    imread_kwargs
+        Keyword arguments for :func:`imageio.imread`.
+    image_models_kwargs
+        Keyword arguments for :class:`spatialdata.models.Image2DModel`.
+    anndata_kwargs
+        Keyword arguments for :func:`anndata.read_h5ad`.
+
+    Returns
+    -------
+    SpatialData object for the Visium HD data.
+    """
+    path = Path(path)
+    tables = {}
+    shapes = {}
+    images: dict[str, Any] = {}
+
+    def load_image(path: Path, suffix: str, scale_factors: list[int] | None = None) -> None:
+        _load_image(
+            path=path,
+            images=images,
+            suffix=suffix,
+            dataset_id=dataset_id,
+            imread_kwargs=imread_kwargs,
+            image_models_kwargs=image_models_kwargs,
+            scale_factors=scale_factors,
+        )
+
+    metadata, hd_layout = _parse_metadata(path)
+    transform_matrices = _get_transform_matrices(metadata, hd_layout)
+    file_format = hd_layout[VisiumHDKeys.FILE_FORMAT]
+    if file_format != "1.0":
+        warnings.warn(
+            f"File format {file_format} is not supported. A more recent file format may be supported in a newer version"
+            f"of the spatialdata-io package.",
+            UserWarning,
+            stacklevel=2,
+        )
+
+    path_bins = path / VisiumHDKeys.BINNED
+    all_bin_sizes = sorted(
+        [bin_size for bin_size in os.listdir(path_bins) if os.path.isdir(os.path.join(path_bins, bin_size))]
+    )
+    if bin_size is None:
+        bin_sizes = all_bin_sizes
+    elif isinstance(bin_size, int) or isinstance(bin_size, list) and len(bin_size) == 0:
+        if f"square_{bin_size:03}um" not in all_bin_sizes:
+            warnings.warn(
+                f"Requested bin size {bin_size} not found (available {all_bin_sizes}). Using all available bins.",
+                UserWarning,
+                stacklevel=2,
+            )
+            bin_sizes = all_bin_sizes
+        else:
+            bin_sizes = [f"square_{bin_size:03}um"]
+
+    # iterate over the given bins and load the data
+    for bin_size_str in bin_sizes:
+        path_bin = path_bins / bin_size_str
+        counts_file = VisiumHDKeys.FILTERED_COUNTS_FILE if filtered_counts_file else VisiumHDKeys.RAW_COUNTS_FILE
+        adata = sc.read_10x_h5(
+            path_bin / counts_file,
+            gex_only=False,
+            **anndata_kwargs,
+        )
+
+        path_bin_spatial = path_bin / VisiumHDKeys.SPATIAL
+
+        with open(path_bin_spatial / VisiumHDKeys.SCALEFACTORS_FILE) as file:
+            scalefactors = json.load(file)
+
+        # consistency check
+        found_bin_size = re.search(r"\d{3}", bin_size_str)
+        assert found_bin_size is not None
+        assert float(found_bin_size.group()) == scalefactors[VisiumHDKeys.SCALEFACTORS_BIN_SIZE_UM]
+        assert np.isclose(
+            scalefactors[VisiumHDKeys.SCALEFACTORS_BIN_SIZE_UM]
+            / scalefactors[VisiumHDKeys.SCALEFACTORS_SPOT_DIAMETER_FULLRES],
+            scalefactors[VisiumHDKeys.SCALEFACTORS_MICRONS_PER_PIXEL],
+        )
+
+        tissue_positions_file = path_bin_spatial / VisiumHDKeys.TISSUE_POSITIONS_FILE
+
+        # read coordinates and set up adata.obs and adata.obsm
+        coords = pd.read_parquet(tissue_positions_file)
+        assert all(
+            coords.columns.values
+            == [
+                VisiumHDKeys.BARCODE,
+                VisiumHDKeys.IN_TISSUE,
+                VisiumHDKeys.ARRAY_ROW,
+                VisiumHDKeys.ARRAY_COL,
+                VisiumHDKeys.LOCATIONS_Y,
+                VisiumHDKeys.LOCATIONS_X,
+            ]
+        )
+        coords.set_index(VisiumHDKeys.BARCODE, inplace=True, drop=True)
+        coords_filtered = coords.loc[adata.obs.index]
+        adata.obs = pd.merge(adata.obs, coords_filtered, how="left", left_index=True, right_index=True)
+        # compatibility to legacy squidpy
+        adata.obsm["spatial"] = adata.obs[[VisiumHDKeys.LOCATIONS_X, VisiumHDKeys.LOCATIONS_Y]].values
+        # dropping the spatial coordinates (will be stored in shapes)
+        adata.obs.drop(
+            columns=[
+                VisiumHDKeys.LOCATIONS_X,
+                VisiumHDKeys.LOCATIONS_Y,
+            ],
+            inplace=True,
+        )
+        adata.obs[VisiumHDKeys.INSTANCE_KEY] = np.arange(len(adata))
+
+        # scaling
+        transform_original = Identity()
+        transform_lowres = Scale(
+            np.array(
+                [
+                    scalefactors[VisiumHDKeys.SCALEFACTORS_LOWRES],
+                    scalefactors[VisiumHDKeys.SCALEFACTORS_LOWRES],
+                ]
+            ),
+            axes=("x", "y"),
+        )
+        transform_hires = Scale(
+            np.array(
+                [
+                    scalefactors[VisiumHDKeys.SCALEFACTORS_HIRES],
+                    scalefactors[VisiumHDKeys.SCALEFACTORS_HIRES],
+                ]
+            ),
+            axes=("x", "y"),
+        )
+        # parse shapes
+        circles = ShapesModel.parse(
+            adata.obsm["spatial"],
+            geometry=0,
+            radius=scalefactors[VisiumHDKeys.SCALEFACTORS_SPOT_DIAMETER_FULLRES] / 2.0,
+            index=adata.obs[VisiumHDKeys.INSTANCE_KEY].copy(),
+            transformations={
+                "global": transform_original,
+                "downscaled_hires": transform_hires,
+                "downscaled_lowres": transform_lowres,
+            },
+        )
+        shapes_name = dataset_id + "_" + bin_size_str
+        shapes[shapes_name] = circles
+
+        # parse table
+        adata.obs[VisiumHDKeys.REGION_KEY] = shapes_name
+        adata.obs[VisiumHDKeys.REGION_KEY] = adata.obs[VisiumHDKeys.REGION_KEY].astype("category")
+        tables[bin_size_str] = TableModel.parse(
+            adata,
+            region=shapes_name,
+            region_key=str(VisiumHDKeys.REGION_KEY),
+            instance_key=str(VisiumHDKeys.INSTANCE_KEY),
+        )
+
+    # read full resolution image
+    if fullres_image_file is not None:
+        fullres_image_file = Path(fullres_image_file)
+    else:
+        path_fullres = path / VisiumHDKeys.MICROSCOPE_IMAGE
+        if path_fullres.exists():
+            fullres_image_filenames = [
+                f for f in os.listdir(path_fullres) if os.path.isfile(os.path.join(path_fullres, f))
+            ]
+            if len(fullres_image_filenames) > 1:
+                warnings.warn(
+                    f"Multiple files found in {path_fullres}, using the first one: {fullres_image_filenames[0]}. Please"
+                    " specify the path to the full resolution image manually using the `fullres_image_file` argument.",
+                    UserWarning,
+                    stacklevel=2,
+                )
+            fullres_image_filename = fullres_image_filenames[0]
+            fullres_image_file = path_fullres / fullres_image_filename
+
+    if fullres_image_file is not None:
+        load_image(
+            path=fullres_image_file,
+            suffix="_full_image",
+            scale_factors=[2, 2, 2, 2],
+        )
+
+    # hires image
+    load_image(
+        path=path / VisiumHDKeys.IMAGE_HIRES_FILE,
+        suffix="_hires_image",
+    )
+    set_transformation(
+        images[dataset_id + "_hires_image"],
+        {"downscaled_hires": Identity()},
+        set_all=True,
+    )
+
+    # lowres image
+    load_image(
+        path=path / VisiumHDKeys.IMAGE_LOWRES_FILE,
+        suffix="_lowres_image",
+    )
+    set_transformation(
+        images[dataset_id + "_lowres_image"],
+        {"downscaled_lowres": Identity()},
+        set_all=True,
+    )
+
+    # cytassist image
+    if load_all_images:
+        load_image(
+            path=path / VisiumHDKeys.IMAGE_CYTASSIST,
+            suffix="_cytassist_image",
+        )
+    image = images[dataset_id + "_cytassist_image"]
+    affine0 = transform_matrices["cytassist_colrow_to_spot_colrow"]
+    affine1 = transform_matrices["spot_colrow_to_microscope_colrow"]
+    set_transformation(image, Sequence([affine0, affine1]), "global")
+
+    sdata = SpatialData(tables=tables, images=images, shapes=shapes)
+
+    return sdata
+
+
+def _load_image(
+    path: Path,
+    images: dict[str, SpatialImage | MultiscaleSpatialImage],
+    suffix: str,
+    dataset_id: str,
+    imread_kwargs: Mapping[str, Any],
+    image_models_kwargs: Mapping[str, Any],
+    scale_factors: list[int] | None,
+) -> None:
+    if path.exists():
+        if path.suffix != ".btf":
+            data = imread(path, **imread_kwargs)
+            if len(data.shape) == 4:
+                # this happens for the cytassist, hires and lowres images; the umi image doesn't need processing
+                data = data.squeeze().transpose(2, 0, 1)
+        else:
+            if "MAX_IMAGE_PIXELS" in imread_kwargs:
+                from PIL import Image as ImagePIL
+
+                ImagePIL.MAX_IMAGE_PIXELS = dict(imread_kwargs).pop("MAX_IMAGE_PIXELS")
+            # dask_image doesn't recognize .btf automatically and imageio v3 throws error due to pixel limit -> use imageio v2
+            data = imread2(path, **imread_kwargs).squeeze().transpose(2, 0, 1)
+        image = DataArray(data, dims=("c", "y", "x"))
+        parsed = Image2DModel.parse(image, scale_factors=scale_factors, **image_models_kwargs)
+        images[dataset_id + suffix] = parsed
+    else:
+        warnings.warn(f"File {path} does not exist, skipping it.", UserWarning, stacklevel=2)
+    return None
+
+
+def _get_affine(coefficients: list[int]) -> Affine:
+    matrix = np.array(coefficients).reshape(3, 3)
+    # the last row doesn't match with machine precision, let's check the matrix it's still close to a homogeneous
+    # matrix, and fix this
+    assert np.allclose(matrix[2], [0, 0, 1], atol=1e-2), matrix
+    matrix[2] = [0, 0, 1]
+    return Affine(matrix, input_axes=("x", "y"), output_axes=("x", "y"))
+
+
+def _parse_metadata(path: Path) -> tuple[dict[str, Any], dict[str, Any]]:
+    with h5py.File(path / VisiumHDKeys.FEATURE_SLICE_FILE, "r") as f5:
+        metadata = json.loads(dict(f5.attrs)[VisiumHDKeys.METADATA_JSON])
+        hd_layout = json.loads(metadata[VisiumHDKeys.HD_LAYOUT_JSON])
+    return metadata, hd_layout
+
+
+def _get_transform_matrices(metadata: dict[str, Any], hd_layout: dict[str, Any]) -> dict[str, Affine]:
+    transform_matrices = {}
+
+    for key in [
+        VisiumHDKeys.CYTASSIST_COLROW_TO_SPOT_COLROW,
+        VisiumHDKeys.SPOT_COLROW_TO_CYTASSIST_COLROW,
+        VisiumHDKeys.MICROSCOPE_COLROW_TO_SPOT_COLROW,
+        VisiumHDKeys.SPOT_COLROW_TO_MICROSCOPE_COLROW,
+    ]:
+        data = metadata[VisiumHDKeys.TRANSFORM_MATRICES][key]
+        transform_matrices[key.value] = _get_affine(data)
+
+    return transform_matrices

From b5b5742afe8f586c7339ffd4138f3e3fc8129a76 Mon Sep 17 00:00:00 2001
From: Luca Marconato <m.lucalmer@gmail.com>
Date: Fri, 29 Mar 2024 02:44:45 +0100
Subject: [PATCH 2/2] add visium hd reader; fix tests

---
 CHANGELOG.md                                  |  2 +-
 src/spatialdata_io/_constants/_constants.py   |  2 +-
 .../converters/legacy_anndata.py              |  6 +--
 src/spatialdata_io/readers/visium_hd.py       | 49 +++++++++++++------
 4 files changed, 40 insertions(+), 19 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index ca932661..b8888528 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -12,7 +12,7 @@ and this project adheres to [Semantic Versioning][].
 
 ### Added
 
--   (Visium HD) added reader
+-   (Visium HD) added reader, coauthored by @LLehner
 
 ### Fixed
 
diff --git a/src/spatialdata_io/_constants/_constants.py b/src/spatialdata_io/_constants/_constants.py
index cd22c021..08928215 100644
--- a/src/spatialdata_io/_constants/_constants.py
+++ b/src/spatialdata_io/_constants/_constants.py
@@ -235,8 +235,8 @@ class VisiumHDKeys(ModeEnum):
 
     # directories
     SPATIAL = "spatial"
-    BINNED = "binned_outputs"
     DEFAULT_BIN = "square_008um"
+    BIN_PREFIX = "square_"
     MICROSCOPE_IMAGE = "microscope_image"
 
     # counts and locations files
diff --git a/src/spatialdata_io/converters/legacy_anndata.py b/src/spatialdata_io/converters/legacy_anndata.py
index 17406a9a..c4fa5aa6 100644
--- a/src/spatialdata_io/converters/legacy_anndata.py
+++ b/src/spatialdata_io/converters/legacy_anndata.py
@@ -8,7 +8,7 @@
     SpatialData,
     get_centroids,
     get_extent,
-    join_sdata_spatialelement_table,
+    join_spatialelement_table,
     to_circles,
 )
 from spatialdata._core.operations._utils import transform_to_data_extent
@@ -149,8 +149,8 @@ def to_legacy_anndata(
     shapes = to_circles(element)
     circles_sdata = SpatialData(tables={table_name: table}, shapes={region_name: shapes})
 
-    joined_elements, new_table = join_sdata_spatialelement_table(
-        sdata=circles_sdata, spatial_element_name=region_name, table_name=table_name, how="inner", match_rows="left"
+    joined_elements, new_table = join_spatialelement_table(
+        sdata=circles_sdata, spatial_element_names=region_name, table_name=table_name, how="inner", match_rows="left"
     )
 
     # the table after the filtering must not be empty
diff --git a/src/spatialdata_io/readers/visium_hd.py b/src/spatialdata_io/readers/visium_hd.py
index 23be547f..e54e8759 100644
--- a/src/spatialdata_io/readers/visium_hd.py
+++ b/src/spatialdata_io/readers/visium_hd.py
@@ -35,7 +35,7 @@
 @inject_docs(vx=VisiumHDKeys)
 def visium_hd(
     path: str | Path,
-    dataset_id: str,
+    dataset_id: str | None = None,
     filtered_counts_file: bool = True,
     bin_size: int | list[int] | None = None,
     fullres_image_file: str | Path | None = None,
@@ -57,7 +57,7 @@ def visium_hd(
     path
         Path to directory containing the *10x Genomics* Visium HD output.
     dataset_id
-        Unique identifier of the dataset.
+        Unique identifier of the dataset. If `None`, it tries to infer it from the file name of the feature slice file.
     filtered_counts_file
         It sets the value of `counts_file` to ``{vx.FILTERED_COUNTS_FILE!r}`` (when `True`) or to
         ``{vx.RAW_COUNTS_FILE!r}`` (when `False`).
@@ -86,6 +86,12 @@ def visium_hd(
     shapes = {}
     images: dict[str, Any] = {}
 
+    if dataset_id is None:
+        dataset_id = _infer_dataset_id(path)
+        filename_prefix = f"{dataset_id}_"
+    else:
+        filename_prefix = ""
+
     def load_image(path: Path, suffix: str, scale_factors: list[int] | None = None) -> None:
         _load_image(
             path=path,
@@ -97,7 +103,7 @@ def load_image(path: Path, suffix: str, scale_factors: list[int] | None = None)
             scale_factors=scale_factors,
         )
 
-    metadata, hd_layout = _parse_metadata(path)
+    metadata, hd_layout = _parse_metadata(path, filename_prefix)
     transform_matrices = _get_transform_matrices(metadata, hd_layout)
     file_format = hd_layout[VisiumHDKeys.FILE_FORMAT]
     if file_format != "1.0":
@@ -108,9 +114,13 @@ def load_image(path: Path, suffix: str, scale_factors: list[int] | None = None)
             stacklevel=2,
         )
 
-    path_bins = path / VisiumHDKeys.BINNED
+    path_bins = path
     all_bin_sizes = sorted(
-        [bin_size for bin_size in os.listdir(path_bins) if os.path.isdir(os.path.join(path_bins, bin_size))]
+        [
+            bin_size
+            for bin_size in os.listdir(path_bins)
+            if os.path.isdir(os.path.join(path_bins, bin_size)) and bin_size.startswith(VisiumHDKeys.BIN_PREFIX)
+        ]
     )
     if bin_size is None:
         bin_sizes = all_bin_sizes
@@ -246,7 +256,7 @@ def load_image(path: Path, suffix: str, scale_factors: list[int] | None = None)
 
     if fullres_image_file is not None:
         load_image(
-            path=fullres_image_file,
+            path=path / fullres_image_file,
             suffix="_full_image",
             scale_factors=[2, 2, 2, 2],
         )
@@ -279,14 +289,22 @@ def load_image(path: Path, suffix: str, scale_factors: list[int] | None = None)
             path=path / VisiumHDKeys.IMAGE_CYTASSIST,
             suffix="_cytassist_image",
         )
-    image = images[dataset_id + "_cytassist_image"]
-    affine0 = transform_matrices["cytassist_colrow_to_spot_colrow"]
-    affine1 = transform_matrices["spot_colrow_to_microscope_colrow"]
-    set_transformation(image, Sequence([affine0, affine1]), "global")
+        image = images[dataset_id + "_cytassist_image"]
+        affine0 = transform_matrices["cytassist_colrow_to_spot_colrow"]
+        affine1 = transform_matrices["spot_colrow_to_microscope_colrow"]
+        set_transformation(image, Sequence([affine0, affine1]), "global")
 
-    sdata = SpatialData(tables=tables, images=images, shapes=shapes)
+    return SpatialData(tables=tables, images=images, shapes=shapes)
 
-    return sdata
+
+def _infer_dataset_id(path: Path) -> str:
+    suffix = f"_{VisiumHDKeys.FEATURE_SLICE_FILE.value}"
+    files = [f for f in os.listdir(path) if os.path.isfile(os.path.join(path, f)) and f.endswith(suffix)]
+    if len(files) == 0 or len(files) > 1:
+        raise ValueError(
+            f"Cannot infer `dataset_id` from the feature slice file in {path}, please pass `dataset_id` as an argument."
+        )
+    return files[0].replace(suffix, "")
 
 
 def _load_image(
@@ -328,8 +346,8 @@ def _get_affine(coefficients: list[int]) -> Affine:
     return Affine(matrix, input_axes=("x", "y"), output_axes=("x", "y"))
 
 
-def _parse_metadata(path: Path) -> tuple[dict[str, Any], dict[str, Any]]:
-    with h5py.File(path / VisiumHDKeys.FEATURE_SLICE_FILE, "r") as f5:
+def _parse_metadata(path: Path, filename_prefix: str) -> tuple[dict[str, Any], dict[str, Any]]:
+    with h5py.File(path / f"{filename_prefix}{VisiumHDKeys.FEATURE_SLICE_FILE.value}", "r") as f5:
         metadata = json.loads(dict(f5.attrs)[VisiumHDKeys.METADATA_JSON])
         hd_layout = json.loads(metadata[VisiumHDKeys.HD_LAYOUT_JSON])
     return metadata, hd_layout
@@ -338,6 +356,9 @@ def _parse_metadata(path: Path) -> tuple[dict[str, Any], dict[str, Any]]:
 def _get_transform_matrices(metadata: dict[str, Any], hd_layout: dict[str, Any]) -> dict[str, Affine]:
     transform_matrices = {}
 
+    # not used
+    transform_matrices["hd_layout_transform"] = _get_affine(hd_layout[VisiumHDKeys.TRANSFORM])
+
     for key in [
         VisiumHDKeys.CYTASSIST_COLROW_TO_SPOT_COLROW,
         VisiumHDKeys.SPOT_COLROW_TO_CYTASSIST_COLROW,