From 748f6fd6741e95c6895120fb8b23837a7eb18786 Mon Sep 17 00:00:00 2001
From: alikhuseynov <alikhuseynov@gmail.com>
Date: Fri, 31 May 2024 16:19:55 +0200
Subject: [PATCH] support VisiumHD loading

---
 R/graph_wrappers.R |   7 ++
 R/read.R           | 212 ++++++++++++++++++++++++++++++++++++++++-----
 2 files changed, 196 insertions(+), 23 deletions(-)

diff --git a/R/graph_wrappers.R b/R/graph_wrappers.R
index 787ad9f4..46754cde 100644
--- a/R/graph_wrappers.R
+++ b/R/graph_wrappers.R
@@ -580,6 +580,13 @@ setMethod(
         match(bcs_use2, visium_row_col$barcode),
         c("col", "row")
     ]
+    if (is.na(coords_use) |> any()) {
+      # use "array_" cols from colData
+      coords_use <- 
+        colData(x)[, grep("array_", names(colData(x)))] |> 
+        as.data.frame() |> suppressWarnings()
+        colnames(coords_use) <- gsub("array_", "", colnames(coords_use))
+    }
     # So adjacent spots are equidistant
     coords_use$row <- coords_use$row * sqrt(3)
     g <- dnearneigh(as.matrix(coords_use),
diff --git a/R/read.R b/R/read.R
index 05c0bdb0..54055b16 100644
--- a/R/read.R
+++ b/R/read.R
@@ -10,6 +10,9 @@
 #' @inheritParams SpatialExperiment::read10xVisium
 #' @inheritParams findVisiumGraph
 #' @inheritParams SpatialFeatureExperiment
+#' @param bin_size \code{c(character)}, use this only when loading VisiumHD. 
+#'  Specify which bin resolution to load, default is \code{NULL} which assumes that data is standard Visium.
+#'  Eg, single resolution is \code{c("8")}, if to load all three resolutions, use \code{c("2", "8", "16")}.
 #' @param type Either "HDF5", and the matrix will be represented as
 #'   \code{TENxMatrix}, or "sparse", and the matrix will be read as a
 #'   \code{dgCMatrix}.
@@ -18,6 +21,8 @@
 #'   \code{outs} directory under the directory specified in the \code{samples}
 #'   argument, as in Space Ranger output. Change the \code{dirs} argument if you
 #'   have moved or renamed the output directory.
+#' @param add_Graph \code{c(local)}, if to add spatial neighborhood graph for spots and only if \code{c(data = "filtered")}.
+#'  Default is \code{c(TRUE)}
 #' @param unit Whether to use pixels in full resolution image or microns as the
 #'   unit. If using microns, then spacing between spots in pixels will be used
 #'   to convert the coordinates into microns, as the spacing is known to be 100
@@ -51,6 +56,25 @@
 #'     type = "sparse", data = "filtered",
 #'     load = FALSE
 #' ))
+#' 
+#' # load VisiumHD
+#' # path to "binned_outputs" directory containing:
+#' └── binned_outputs
+#'     ├── square_002um
+#'     ├── square_008um
+#'     └── square_016um
+#' dir_hd <- "~/Downloads/Visium_HD_Mouse_Brain/binned_outputs/"
+#' # this is public dataset -> https://www.10xgenomics.com/datasets/visium-hd-cytassist-gene-expression-libraries-of-mouse-brain-he
+#' sfe_hd <-
+#' read10xVisiumSFE(samples = list.files(dir_hd), # 1:3 resolutions
+#'                  dirs = dir_hd,
+#'                  bin_size = c("8", "16"), # this defines which of 1:3 resolutions to load
+#'                  type = "HDF5", # Note, "sparse" -> takes longer to load
+#'                  data = "filtered", # spots under tissue
+#'                  images = c("lowres"), # for now low res. image only
+#'                  add_Graph = FALSE # Note, if VisiumHD this can take time for 2 or 8µm res.
+#'                  )
+#'                   
 read10xVisiumSFE <- function(samples = "",
                              dirs = file.path(samples, "outs"),
                              sample_id = paste0(
@@ -60,30 +84,49 @@ read10xVisiumSFE <- function(samples = "",
                                      seq_along(samples)
                                  )
                              ),
+                             bin_size = NULL,
                              type = c("HDF5", "sparse"),
                              data = c("filtered", "raw"),
                              images = c("lowres", "hires"),
                              unit = c("full_res_image_pixel", "micron"),
+                             add_Graph = TRUE,
                              style = "W", zero.policy = NULL, load = FALSE) {
     type <- match.arg(type)
     data <- match.arg(data)
     unit <- match.arg(unit)
+    
     images <- match.arg(images, several.ok = TRUE)
     img_fns <- c(
-        lowres="tissue_lowres_image.png",
-        hires="tissue_hires_image.png")
+      lowres = "tissue_lowres_image.png",
+      hires = "tissue_hires_image.png")
     img_fns <- img_fns[images]
-
+    
+    # supports VisiumHD
+    if (!is.null(bin_size)) {
+      # sanity, to make sure it is VisiumHD using file pattern
+      sanity_passed <- 
+        grep("binned_out|square|um$", c(dirs, list.files(dirs))) |> any()
+      if (sanity_passed) {
+        # match sample names with bin_size
+        samples <- grep(paste0(bin_size, collapse = "|"), samples, value = TRUE)
+        sample_id <- samples
+        is_VisiumHD <- TRUE
+        }
+      } else { is_VisiumHD <- FALSE }
+    
     # Read one sample at a time, in order to get spot diameter one sample at a time
-    # TODO: need support for VisiumHD ----
     sfes <- lapply(seq_along(samples), function(i) {
-        o <- read10xVisium(dirs[i], sample_id[i], type, data, images, load = FALSE)
+        o <- .read10xVisium(if (is_VisiumHD) dirs else dirs[i],
+                            sample_id[i],
+                            if (is_VisiumHD) bin_size[i] else "", 
+                            type, data, images, load = FALSE)
         imgData(o) <- NULL
-
-        scalefactors <- fromJSON(file = file.path(
-            dirs[i], "spatial",
-            "scalefactors_json.json"
-        ))
+        scalefactors <- 
+          fromJSON(file = file.path(
+            if (is_VisiumHD) dirs else dirs[i], 
+            if (is_VisiumHD) samples[i] else "", 
+            "spatial", "scalefactors_json.json"))
+        
         o <- .spe_to_sfe(o,
                          colGeometries = NULL, rowGeometries = NULL,
                          annotGeometries = NULL, spatialCoordsNames = NULL,
@@ -92,7 +135,7 @@ read10xVisiumSFE <- function(samples = "",
                          unit = unit
         )
         # Add spatial enrichment if present
-        fn <- file.path(dirs[i], "spatial", "spatial_enrichment.csv")
+        fn <- file.path(dirs[i], if (is_VisiumHD) samples[i] else "", "spatial", "spatial_enrichment.csv")
         if (file.exists(fn)) {
             enrichment <- read.csv(fn)
             row_inds <- match(rownames(o), enrichment$Feature.ID)
@@ -111,7 +154,8 @@ read10xVisiumSFE <- function(samples = "",
             rowData(o) <- cbind(rowData(o), enrichment2)
         }
         # Add barcode fluorescence intensity if present
-        fn2 <- file.path(dirs[i], "spatial", "barcode_fluorescence_intensity.csv")
+        fn2 <- file.path(dirs[i], if (is_VisiumHD) samples[i] else "", 
+                         "spatial", "barcode_fluorescence_intensity.csv")
         if (file.exists(fn2)) {
             fluo <- read.csv(fn2)
             row_inds <- match(colnames(o), fluo$barcode)
@@ -119,37 +163,56 @@ read10xVisiumSFE <- function(samples = "",
             fluo$in_tissue <- NULL
             colData(o) <- cbind(colData(o), fluo[row_inds,])
         }
-
         names_use <- paste("tissue", images, "scalef", sep = "_")
         scale_imgs <- unlist(scalefactors[names_use])
+
         # Convert to microns and set extent for image
         if (unit == "micron") {
+          message(">>> Converting pixels to microns")
+          # for VisiumHD
+          if (is_VisiumHD)
+            scale_fct <- 
+              as.integer(bin_size[i]) / scalefactors$microns_per_pixel
+          else 
             scale_fct <- .pixel2micron(o)
             cg <- spotPoly(o)
             cg$geometry <- cg$geometry * scale_fct
             spotPoly(o) <- cg
             # Scale factors for images
             scale_imgs <- scale_imgs / scale_fct
-        } else {
-            scale_imgs <- scalefactors[paste("tissue", images, "scalef",
-                                             sep = "_")]
-        }
+            } else { 
+              scale_imgs <- scalefactors[names_use]
+            }
+
+        # add sample id to SFE
+        spotPoly(o)$sample_id <- sampleIDs(o)
+        
         # Set up ImgData
-        img_fns2 <- file.path(dirs[i], "spatial", img_fns)
+        img_fns2 <- file.path(if (is_VisiumHD) dirs else dirs[i],
+                              if (is_VisiumHD) samples[i] else "", 
+                              "spatial", img_fns)
+        scale_imgs_out <- scale_imgs
+        
         img_dfs <- lapply(seq_along(img_fns), function(j) {
             .get_imgData(img_fns2[j], sample_id = sample_id[i],
                          image_id = names(img_fns)[j],
                          extent = NULL, scale_fct = scale_imgs[[j]],
                          flip = TRUE)
         })
+        
         img_df <- do.call(rbind, img_dfs)
         imgData(o) <- img_df
+        
         # Create Visium graph for filtered data
         if (data == "filtered") {
-            colGraph(o, "visium") <- findVisiumGraph(
-                o, sample_id = "all", style = style,
-                zero.policy = zero.policy
-            )
+          if (add_Graph) {
+            message(paste0(">>> Adding spatial neighborhood graph to ", 
+                           sample_id[i], "\n"))
+            colGraph(o, "visium") <- 
+              findVisiumGraph(o, sample_id = "all", 
+                              style = style,
+                              zero.policy = zero.policy)
+          } else { return(o) }
         }
         o
     })
@@ -157,6 +220,109 @@ read10xVisiumSFE <- function(samples = "",
     out
 }
 
+# Modified version of SpatialExperiment::read10xVisium to support VisiumHD
+.read10xVisium <-
+  function(samples = "", # eg, path to "./binned_outputs"
+           sample_id = paste0("sample", sprintf("%02d", seq_along(samples))),
+           bin_size = "8",
+           type = c("HDF5", "sparse"), 
+           data = c("filtered", "raw"), 
+           images = "lowres", 
+           load = TRUE)
+  {
+    if (!requireNamespace("DropletUtils", quietly = TRUE)) {
+      warning("DropletUtils package must be installed to use read10xVisium()")
+    }
+    type <- match.arg(type)
+    data <- match.arg(data)
+    imgs <- c("lowres", "hires", "detected", "aligned")
+    imgs <- match.arg(images, imgs, several.ok = TRUE)
+    # check if input is VisiumHD
+    if (any(grep("square_", list.files(samples))))
+      VisiumHD <- TRUE 
+    else 
+      VisiumHD <- FALSE
+    if (VisiumHD) {
+      samples <- file.path(samples, 
+                           grep(paste0(bin_size, collapse = "|"), 
+                                list.files(samples), value = TRUE))
+      # sanity
+      if (any(length(samples) != length(bin_size))) {
+        # match samples and bin_size
+        samples <- 
+          grep(paste0(bin_size, collapse = "|"), 
+               samples, value = TRUE)
+        }
+      sids <- basename(samples)
+      names(samples) <- sids
+      } else {
+        if (is.null(sids <- names(samples))) {
+          if (is.null(sids <- sample_id)) {
+            stop("'sample_id' mustn't be NULL when 'samples' are unnamed")
+          } else if (!is.character(sample_id) && length(unique(sample_id)) != 
+                     length(samples)) 
+            stop("'sample_id' should contain as many unique values as 'samples'")
+        } else if (length(unique(sids)) != length(samples)) 
+          stop("names of 'samples' should be unique")
+        names(samples) <- sids
+        i <- basename(samples) != "outs"
+        samples[i] <- file.path(samples[i], "outs")
+      }
+    message(paste0(">>> 10X ", ifelse(VisiumHD, "VisiumHD", "Visium"), 
+                   " data will be loaded: ", basename(sids), "\n"))
+    fns <- paste0(data, "_feature_bc_matrix", switch(type, HDF5 = ".h5", ""))
+    counts <- file.path(samples, fns)
+    dir <- file.path(samples, "spatial")
+    suffix <- c("", "_list")
+    if (VisiumHD) {
+      xyz <- file.path(dir, "tissue_positions.parquet")
+      } else {
+        xyz <- file.path(rep(dir, each = length(suffix)), sprintf("tissue_positions%s.csv", 
+                                                                rep(suffix, length(sids))))
+      }
+    xyz <- xyz[file.exists(xyz)]
+    sfs <- file.path(dir, "scalefactors_json.json")
+    names(xyz) <- names(sfs) <- sids
+    img_fns <- list(lowres = "tissue_lowres_image.png", hires = "tissue_hires_image.png", 
+                    detected = "detected_tissue_image.jpg", aligned = "aligned_fiducials.jpg")
+    img_fns <- img_fns[imgs]
+    img_fns <- lapply(dir, file.path, img_fns)
+    img_fns <- unlist(img_fns)
+    nan <- !file.exists(img_fns)
+    if (all(nan)) {
+      stop(sprintf("No matching files found for 'images=c(%s)", 
+                   paste(dQuote(imgs), collapse = ", ")))
+      } else if (any(nan)) {
+        message("Skipping missing images\n  ", paste(img_fns[nan], 
+                                                   collapse = "\n  "))
+      img_fns <- img_fns[!nan]
+      }
+    img <- SpatialExperiment::readImgData(samples, sids, img_fns, sfs, load)
+    spel <- lapply(seq_along(counts), function(i) {
+      sce <- DropletUtils::read10xCounts(samples = counts[i], 
+                                         sample.names = sids[i], 
+                                         col.names = TRUE, 
+                                         row.names = "symbol")
+      if (VisiumHD) {
+        spd <- 
+          arrow::read_parquet(xyz[i]) |> 
+          as.data.frame()
+        rownames(spd) <- spd$barcode
+        } else {
+          spd <- SpatialExperiment:::.read_xyz(xyz[i])
+        }
+      obs <- intersect(colnames(sce), rownames(spd))
+      sce <- sce[, obs]
+      spd <- spd[obs, ]
+      SpatialExperiment::SpatialExperiment(assays = assays(sce), rowData = DataFrame(symbol = rowData(sce)$Symbol), 
+                                           sample_id = sids[i], colData = DataFrame(spd), 
+                                           spatialCoordsNames = c("pxl_col_in_fullres", "pxl_row_in_fullres"))
+      })
+    spe <- do.call(cbind, spel)
+    SpatialExperiment::imgData(spe) <- img
+    return(spe)
+  }
+
 #' @importFrom sf st_nearest_feature st_distance
 #' @importFrom stats median
 .pixel2micron <- function(sfe) {
@@ -627,7 +793,7 @@ readVizgen <- function(data_dir,
     # NOTE: might take some time to run
     if (use_bboxes && is.null(polys)) {
         message(">>> Creating bounding boxes from `cell_metadata`")
-        # TODO: rewrite to use the now much faster df2sf ----
+        # TODO: rewrite bboxes_sfc using much faster df2sf ----
         bboxes_sfc <-
             bplapply(seq_len(nrow(metadata)),
                      function(i) {