AlexsLemonade · sjspielman · Nov 20, 2024 · Nov 19, 2024 · Nov 19, 2024 · Nov 19, 2024
@@ -50,20 +50,21 @@ fi
 # We'll define file names with absolute paths for robustness
 
 # First, download the fetal kidney reference (Stewart et al)
-kidney_ref_url="https://datasets.cellxgene.cziscience.com/40ebb8e4-1a25-4a33-b8ff-02d1156e4e9b.rds"
-kidney_ref_file="${PWD}/scratch/fetal_kidney.rds"
+kidney_ref_url="https://datasets.cellxgene.cziscience.com/fcb6225a-b329-4ac9-8a3f-559ca9bac50e.h5ad"
+kidney_ref_file="${PWD}/scratch/fetal_kidney.h5ad"
+kidney_ref_file_seurat="${PWD}/scratch/fetal_kidney_seurat.rds" # will be used by 00b
 if [[ ! -f $kidney_ref_file ]]; then
   curl -o $kidney_ref_file $kidney_ref_url
 fi
-# Second, download the homologs file for gene ID conversion
+# Second, download the Seurat/Azimuth homologs file for gene ID conversion
 homologs_url="https://seurat.nygenome.org/azimuth/references/homologs.rds"
 homologs_file="${PWD}/scratch/homologs.rds"
 if [[ ! -f $homologs_file ]]; then
   curl -o $homologs_file $homologs_url
 fi
 
 # Create the fetal references for label transfer (Stewart et al and Cao et al)
-Rscript scripts/prepare-fetal-references.R --kidney_ref_file "${kidney_ref_file}"
+Rscript scripts/prepare-fetal-references.R --kidney_ref_file "${kidney_ref_file}" --kidney_ref_file_seurat ${kidney_ref_file_seurat}
 
 # Characterize the fetal kidney reference (Stewart et al.)
 # This step does not directly contribute to the final annotations
@@ -72,7 +73,7 @@ if [[ $RUN_EXPLORATORY -eq 1 ]]; then
       output_format = 'html_document',
       output_file = '00b_characterization_fetal_kidney_reference_Stewart.html',
       output_dir = '${notebook_output_dir}/00-reference',
-      params = list(fetal_kidney_path = '${kidney_ref_file}'))"
+      params = list(fetal_kidney_path = '${kidney_ref_file_seurat}'))"
 fi
 
 
@@ -104,8 +105,9 @@ for sample_dir in ${data_dir}/${project_id}/SCPCS*; do
                     output_dir = '${sample_notebook_dir}')"
 
     # Label transfer from the Stewart reference
+    # Note that this reference has ensembl IDs
     Rscript -e "rmarkdown::render('${notebook_template_dir}/02b_label-transfer_fetal_kidney_reference_Stewart.Rmd',
-                    params = list(scpca_project_id = '${project_id}', sample_id = '${sample_id}', homologs_file = '${homologs_file}', testing = ${TESTING}),
+                    params = list(scpca_project_id = '${project_id}', sample_id = '${sample_id}', testing = ${TESTING}),
                     output_format = 'html_document',
                     output_file = '02b_fetal_kidney_reference_Stewart_${sample_id}.html',
                     output_dir = '${sample_notebook_dir}')"

@@ -187,6 +187,7 @@ We thus decided to test and compare two fetal (kidney) references that could be
 
 We first wanted to try the human fetal kidney atlas to transfer label into the Wilms tumor samples using azimuth.
 You can find more about the human kidney atlas here: https://www.kidneycellatlas.org/ [3]
+The reference was obtained from CELLxGENE: <https://cellxgene.cziscience.com/collections/120e86b4-1195-48c5-845b-b98054105eec>, specifically the download for `Fetal kidney dataset: full`. 
 
 ##### Human Azimuth fetal reference from Cao et al.
 

@@ -7,9 +7,9 @@ params:
   lfc_threshold: 1
   rate1_threshold: 0.5
   seed: 12345
-  fetal_kidney_path: "../scratch/fetal_kidney.rds"
-output: 
-  html_document: 
+  fetal_kidney_path: "../scratch/fetal_kidney_seurat.rds"
+output:
+  html_document:
     toc: yes
     toc_float: yes
     code_folding: hide
@@ -30,9 +30,8 @@ knitr::opts_chunk$set(
 
 # Introduction
 
-The aim is to characterize the human fetal kidney from the kidney cell atlas.
-You can find more about the human kidney atlas here: https://www.kidneycellatlas.org/ [1]
-The rds data can be download using the download link: <https://datasets.cellxgene.cziscience.com/40ebb8e4-1a25-4a33-b8ff-02d1156e4e9b.rds>.
+The aim is to characterize the human fetal kidney from the kidney cell atlas obtained from CELLxGENE.
+You can find more about the human kidney atlas here: https://www.kidneycellatlas.org/ [1].
 The reference was downloaded and created in the script `scripts/prepare-fetal-references.R`.
 
 ## Packages
@@ -90,12 +89,12 @@ d1 | d2
 
 Here, we use an unbiased approach to find transcripts that characterized the different compartments and cell types.
 
-This is just to get markers genes of the different population, in case some could be of interest for the Wilms tumor annotations. 
+This is just to get markers genes of the different population, in case some could be of interest for the Wilms tumor annotations.
 
-We run `DElegate::FindAllMarkers2()` to find markers of the different clusters and manually check if they do make sense. 
-`DElegate::FindAllMarkers2()` is an improved version of `Seurat::FindAllMarkers()` based on pseudobulk differential expression method. 
+We run `DElegate::FindAllMarkers2()` to find markers of the different clusters and manually check if they do make sense.
+`DElegate::FindAllMarkers2()` is an improved version of `Seurat::FindAllMarkers()` based on pseudobulk differential expression method.
 Please check the preprint from Hafemeister and Halbritter: https://www.biorxiv.org/content/10.1101/2023.03.28.534443v2
-and tool described here: https://github.com/cancerbits/DElegate 
+and tool described here: https://github.com/cancerbits/DElegate
 
 ### Find marker genes for each of the compartment
 
@@ -188,9 +187,9 @@ show((((p1 / p3) + plot_layout(heights = c(3, 2)) | p2)) + plot_layout(widths =
 write_csv(de_results, file = file.path(path_to_output, "00a_marker_cell-type_fetal_kidney_Stewart.csv"))
 ```
 
-## References 
+## References
 
-- [1] https://www.science.org/doi/10.1126/science.aat5031 
+- [1] https://www.science.org/doi/10.1126/science.aat5031
 
 ## Session info
 

@@ -161,10 +161,10 @@ srat_assay <- "RNA"
 #  removes features that are not present in the reference
 DefaultAssay(srat) <- srat_assay
 query <- prepare_query(
-  srat,
-  rownames(reference),
-  srat_assay,
-  params$homologs_file
+  query = srat,
+  reference_rownames = rownames(reference),
+  assay = srat_assay,
+  homolog_file = params$homologs_file
 )
 ```
 

@@ -6,7 +6,6 @@ params:
   scpca_project_id: "SCPCP000006"
   sample_id: "SCPCS000176"
   seed: 12345
-  homologs_file: "../scratch/homologs.rds"
   testing: 0
 output: 
   html_document: 
@@ -166,7 +165,7 @@ query <- prepare_query(
   srat,
   rownames(reference),
   srat_assay,
-  params$homologs_file
+  convert_gene_names = FALSE
 )
 ```
 

@@ -22,22 +22,26 @@
 #' @param query The Seurat object which will undergo label transfer
 #' @param reference_rownames The rownames (aka, features) in the reference object
 #' @param assay Name of assay in query to prepare
+#' @param convert_gene_names Whether ensembl IDs should be converted to gene symbols
 #' @param homolog_file Path to the homologs.rds file obtained from Seurat
 #'
 #' @return Seurat object prepared for label transfer
 prepare_query <- function(
     query,
     reference_rownames,
     assay = NULL,
+    convert_gene_names = TRUE,
     homolog_file = homologs_file) {
-  # Convert the query (sample) row names from ensembl IDs to gene names to match what
+  # If specified, convert the query (sample) row names from ensembl IDs to gene names to match what
   # the Azimuth reference uses
   # Source: https://github.com/satijalab/azimuth/blob/243ee5db80fcbffa3452c944254a325a3da2ef9e/R/azimuth.R#L99-L104
-  query <- Azimuth::ConvertGeneNames(
-    object = query,
-    reference.names = reference_rownames,
-    homolog.table = homolog_file
-  )
+  if (convert_gene_names) {
+    query <- Azimuth::ConvertGeneNames(
+      object = query,
+      reference.names = reference_rownames,
+      homolog.table = homolog_file
+    )
+  }
 
   # Calculate nCount_RNA and nFeature_RNA if the query does not
   # contain them already

@@ -18,15 +18,23 @@
 library(optparse)
 library(Seurat)
 library(Azimuth)
+library(zellkonverter)
+library(SingleCellExperiment)
 
 # Parse arguments --------------------------------------------------------------
 # set up arguments
 option_list <- list(
   make_option(
     opt_str = c("--kidney_ref_file"),
     type = "character",
-    default = "scratch/fetal_kidney.rds",
-    help = "The relative path from the current directory to the fetal kidney atlas downloaded from CELLxGENE"
+    default = "scratch/fetal_kidney.h5ad",
+    help = "The relative path from the current directory to the H5AD (AnnData) fetal kidney atlas downloaded from CELLxGENE"
+  ),
+  make_option(
+    opt_str = c("--kidney_ref_file_seurat"),
+    type = "character",
+    default = "scratch/fetal_kidney_seurat.rds",
+    help = "The relative path from the current directory to file to export with the Seurat-formatted kidney reference"
   ),
   make_option(
     opt_str = c("-d", "--output_dir"),
@@ -124,9 +132,21 @@ cao_annotation_levels <- c("annotation.l1", "annotation.l2", "organ")
 
 # Read in data
 if (!file.exists(opts$kidney_ref_file)) {
-  stop("The kidney reference file could not be found.")
+  stop("The H5AD kidney reference file could not be found.")
 }
-seurat <- readRDS(opts$kidney_ref_file)
+# Use raw=TRUE to get the raw counts as an alternative experiment
+sce_kidney <- zellkonverter::readH5AD(opts$kidney_ref_file, raw = TRUE)
+
+raw_sce_counts <- assay(altExp(sce_kidney), "X")
+rownames(raw_sce_counts) <- rownames(sce_kidney)
+
+seurat <- SeuratObject::CreateSeuratObject(
+  counts = raw_sce_counts,
+  assay = "RNA",
+  project = "kidneyatlas"
+) |>
+  AddMetaData(sce_kidney$compartment, "compartment") |>
+  AddMetaData(sce_kidney$cell_type, "cell_type")
 
 # Transform and dimension reduction
 set.seed(opts$seed)
@@ -150,13 +170,7 @@ fetal_kidney <- AzimuthReference(
   dims = 1:50,
   k.param = 31,
   plotref = "umap",
-  plot.metadata = NULL,
-  ori.index = NULL,
-  colormap = NULL,
-  assays = NULL,
   metadata = stewart_annotation_levels,
-  reference.version = "0.0.0",
-  verbose = FALSE
 )
 
 # format for label transfer
@@ -165,6 +179,8 @@ stewart_ref_list <- prepare_azimuth_reference(fetal_kidney, stewart_annotation_l
 # export formatted reference
 saveRDS(stewart_ref_list, stewart_ref_file)
 
+# export Seurat object for analysis in ../notebook/00b_characterize_fetal_kidney_reference_Stewart.Rmd
+saveRDS(s, opts$kidney_ref_file_seurat)
 
 # Prepare Cao (full fetal organ) reference ------------------------------