From c8176e521f022d3d1376f956042125524ef18911 Mon Sep 17 00:00:00 2001
From: Nirmayi <ynirmayi@gmail.com>
Date: Mon, 28 Oct 2024 13:58:07 +0100
Subject: [PATCH] update readme

---
 README.md | 304 ++++++++++++++++++++++++++++++------------------------
 1 file changed, 167 insertions(+), 137 deletions(-)
diff --git a/README.md b/README.md
index 9ddb773..e2f934e 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
-# Spatial decomposition
+# Spatial Decomposition
 
 
 <!--
@@ -9,10 +9,10 @@ Do not edit this file directly.
 Estimation of cell type proportions per spot in 2D space from spatial
 transcriptomic data coupled with corresponding single-cell data
 
-Path to source:
-[`src`](https://github.com/openproblems-bio/task_spatial_decomposition/src)
+Repository:
+[openproblems-bio/task_spatial_decomposition](https://github.com/openproblems-bio/task_spatial_decomposition)
 
-## Motivation
+## Description
 
 Spatial decomposition (also often referred to as Spatial deconvolution)
 is applicable to spatial transcriptomics data where the transcription
@@ -25,16 +25,16 @@ type/states estimates are presented as proportion values, representing
 the proportion of the cells at each capture location that belong to a
 given cell type.
 
-## Description
-
 We distinguish between *reference-based* decomposition and *de novo*
 decomposition, where the former leverage external data (e.g., scRNA-seq
 or scNuc-seq) to guide the inference process, while the latter only work
 with the spatial data. We require that all datasets have an associated
 reference single cell data set, but methods are free to ignore this
-information. Due to the lack of real datasets with the necessary
-ground-truth, this task makes use of a simulated dataset generated by
-creating cell-aggregates by sampling from a Dirichlet distribution. The
+information.
+
+Due to the lack of real datasets with the necessary ground-truth, this
+task makes use of a simulated dataset generated by creating
+cell-aggregates by sampling from a Dirichlet distribution. The
 ground-truth dataset consists of the spatial expression matrix, XY
 coordinates of the spots, true cell-type proportions for each spot, and
 the reference single-cell data (from which cell aggregated were
@@ -46,32 +46,33 @@ simulated).
 |:-----------------|:-------------------|
 | Giovanni Palla   | author, maintainer |
 | Scott Gigante    | author             |
-| Sai Nirmayi Yasa | author             |
+| Sai Nirmayi Yasa | contributor        |
 
 ## API
 
 ``` mermaid
-flowchart LR
-  file_common_dataset("Common Dataset")
-  comp_process_dataset[/"Data processor"/]
-  file_single_cell("Single cell data")
-  file_spatial_masked("Spatial masked")
-  file_solution("Solution")
-  comp_control_method[/"Control method"/]
-  comp_method[/"Method"/]
-  comp_metric[/"Metric"/]
-  file_output("Output")
-  file_score("Score")
+flowchart TB
+  file_common_dataset("<a href='https://github.com/openproblems-bio/task_spatial_decomposition#file-format-common-dataset'>Common Dataset</a>")
+  comp_process_dataset[/"<a href='https://github.com/openproblems-bio/task_spatial_decomposition#component-type-data-processor'>Data processor</a>"/]
+  file_single_cell("<a href='https://github.com/openproblems-bio/task_spatial_decomposition#file-format-single-cell-data'>Single cell data</a>")
+  file_solution("<a href='https://github.com/openproblems-bio/task_spatial_decomposition#file-format-solution'>Solution</a>")
+  file_spatial_masked("<a href='https://github.com/openproblems-bio/task_spatial_decomposition#file-format-spatial-masked'>Spatial masked</a>")
+  comp_control_method[/"<a href='https://github.com/openproblems-bio/task_spatial_decomposition#component-type-control-method'>Control method</a>"/]
+  comp_method[/"<a href='https://github.com/openproblems-bio/task_spatial_decomposition#component-type-method'>Method</a>"/]
+  comp_metric[/"<a href='https://github.com/openproblems-bio/task_spatial_decomposition#component-type-metric'>Metric</a>"/]
+  file_output("<a href='https://github.com/openproblems-bio/task_spatial_decomposition#file-format-output'>Output</a>")
+  file_score("<a href='https://github.com/openproblems-bio/task_spatial_decomposition#file-format-score'>Score</a>")
+  file_simulated_dataset("<a href='https://github.com/openproblems-bio/task_spatial_decomposition#file-format-common-dataset'>Common Dataset</a>")
   file_common_dataset---comp_process_dataset
   comp_process_dataset-->file_single_cell
-  comp_process_dataset-->file_spatial_masked
   comp_process_dataset-->file_solution
+  comp_process_dataset-->file_spatial_masked
   file_single_cell---comp_control_method
   file_single_cell---comp_method
-  file_spatial_masked---comp_control_method
-  file_spatial_masked---comp_method
   file_solution---comp_control_method
   file_solution---comp_metric
+  file_spatial_masked---comp_control_method
+  file_spatial_masked---comp_method
   comp_control_method-->file_output
   comp_method-->file_output
   comp_metric-->file_score
@@ -98,46 +99,43 @@ Format:
 
 </div>
 
-Slot description:
+Data structure:
 
 <div class="small">
 
-| Slot                         | Type      | Description                                                                                                         |
-|:-----------------------------|:----------|:--------------------------------------------------------------------------------------------------------------------|
-| `obs["cell_type"]`           | `string`  | Cell type label IDs.                                                                                                |
-| `obs["batch"]`               | `string`  | A batch identifier. This label is very context-dependent and may be a combination of the tissue, assay, donor, etc. |
-| `var["hvg"]`                 | `boolean` | Whether or not the feature is considered to be a ‘highly variable gene’.                                            |
-| `var["hvg_score"]`           | `double`  | A ranking of the features by hvg.                                                                                   |
-| `obsm["X_pca"]`              | `double`  | (*Optional*) The resulting PCA embedding.                                                                           |
-| `layers["counts"]`           | `integer` | Raw counts.                                                                                                         |
-| `uns["cell_type_names"]`     | `string`  | (*Optional*) Cell type names corresponding to values in `cell_type`.                                                |
-| `uns["dataset_id"]`          | `string`  | A unique identifier for the dataset.                                                                                |
-| `uns["dataset_name"]`        | `string`  | Nicely formatted name.                                                                                              |
-| `uns["dataset_url"]`         | `string`  | (*Optional*) Link to the original source of the dataset.                                                            |
-| `uns["dataset_reference"]`   | `string`  | (*Optional*) Bibtex reference of the paper in which the dataset was published.                                      |
-| `uns["dataset_summary"]`     | `string`  | Short description of the dataset.                                                                                   |
-| `uns["dataset_description"]` | `string`  | Long description of the dataset.                                                                                    |
-| `uns["dataset_organism"]`    | `string`  | (*Optional*) The organism of the sample in the dataset.                                                             |
+| Slot | Type | Description |
+|:---|:---|:---|
+| `obs["cell_type"]` | `string` | Cell type label IDs. |
+| `obs["batch"]` | `string` | A batch identifier. This label is very context-dependent and may be a combination of the tissue, assay, donor, etc. |
+| `var["hvg"]` | `boolean` | Whether or not the feature is considered to be a ‘highly variable gene’. |
+| `var["hvg_score"]` | `double` | A ranking of the features by hvg. |
+| `obsm["X_pca"]` | `double` | (*Optional*) The resulting PCA embedding. |
+| `layers["counts"]` | `integer` | Raw counts. |
+| `uns["cell_type_names"]` | `string` | (*Optional*) Cell type names corresponding to values in `cell_type`. |
+| `uns["dataset_id"]` | `string` | A unique identifier for the dataset. |
+| `uns["dataset_name"]` | `string` | Nicely formatted name. |
+| `uns["dataset_url"]` | `string` | (*Optional*) Link to the original source of the dataset. |
+| `uns["dataset_reference"]` | `string` | (*Optional*) Bibtex reference of the paper in which the dataset was published. |
+| `uns["dataset_summary"]` | `string` | Short description of the dataset. |
+| `uns["dataset_description"]` | `string` | Long description of the dataset. |
+| `uns["dataset_organism"]` | `string` | (*Optional*) The organism of the sample in the dataset. |
 
 </div>
 
 ## Component type: Data processor
 
-Path:
-[`src/process_dataset`](https://github.com/openproblems-bio/openproblems-v2/tree/main/src/process_dataset)
-
 A spatial decomposition dataset processor.
 
 Arguments:
 
 <div class="small">
 
-| Name                      | Type   | Description                                                                                                                                                     |
-|:--------------------------|:-------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| `--input`                 | `file` | A subset of the common dataset.                                                                                                                                 |
-| `--output_single_cell`    | `file` | (*Output*) The single-cell data file used as reference for the spatial data.                                                                                    |
-| `--output_spatial_masked` | `file` | (*Output*) The spatial data file containing transcription profiles for each capture location, without cell-type proportions for each spot.                      |
-| `--output_solution`       | `file` | (*Output*) The spatial data file containing transcription profiles for each capture location, with true cell-type proportions for each spot / capture location. |
+| Name | Type | Description |
+|:---|:---|:---|
+| `--input` | `file` | A subset of the common dataset. |
+| `--output_single_cell` | `file` | (*Output*) The single-cell data file used as reference for the spatial data. |
+| `--output_spatial_masked` | `file` | (*Output*) The spatial data file containing transcription profiles for each capture location, without cell-type proportions for each spot. |
+| `--output_solution` | `file` | (*Output*) The spatial data file containing transcription profiles for each capture location, with true cell-type proportions for each spot / capture location. |
 
 </div>
 
@@ -146,7 +144,7 @@ Arguments:
 The single-cell data file used as reference for the spatial data
 
 Example file:
-`resources_test/spatial_decomposition/cxg_mouse_pancreas_atlas/single_cell_ref.h5ad`
+`resources_test/task_spatial_decomposition/cxg_mouse_pancreas_atlas/single_cell_ref.h5ad`
 
 Format:
 
@@ -159,148 +157,139 @@ Format:
 
 </div>
 
-Slot description:
+Data structure:
 
 <div class="small">
 
-| Slot                     | Type      | Description                                                                                                                      |
-|:-------------------------|:----------|:---------------------------------------------------------------------------------------------------------------------------------|
-| `obs["cell_type"]`       | `string`  | Cell type label IDs.                                                                                                             |
-| `obs["batch"]`           | `string`  | (*Optional*) A batch identifier. This label is very context-dependent and may be a combination of the tissue, assay, donor, etc. |
-| `layers["counts"]`       | `integer` | Raw counts.                                                                                                                      |
-| `uns["cell_type_names"]` | `string`  | Cell type names corresponding to values in `cell_type`.                                                                          |
-| `uns["dataset_id"]`      | `string`  | A unique identifier for the dataset.                                                                                             |
+| Slot | Type | Description |
+|:---|:---|:---|
+| `obs["cell_type"]` | `string` | Cell type label IDs. |
+| `obs["batch"]` | `string` | (*Optional*) A batch identifier. This label is very context-dependent and may be a combination of the tissue, assay, donor, etc. |
+| `layers["counts"]` | `integer` | Raw counts. |
+| `uns["cell_type_names"]` | `string` | Cell type names corresponding to values in `cell_type`. |
+| `uns["dataset_id"]` | `string` | A unique identifier for the dataset. |
 
 </div>
 
-## File format: Spatial masked
+## File format: Solution
 
 The spatial data file containing transcription profiles for each capture
-location, without cell-type proportions for each spot.
+location, with true cell-type proportions for each spot / capture
+location.
 
 Example file:
-`resources_test/spatial_decomposition/cxg_mouse_pancreas_atlas/spatial_masked.h5ad`
+`resources_test/task_spatial_decomposition/cxg_mouse_pancreas_atlas/solution.h5ad`
 
 Format:
 
 <div class="small">
 
     AnnData object
-     obsm: 'coordinates'
+     obsm: 'spatial', 'proportions_true'
      layers: 'counts'
-     uns: 'cell_type_names', 'dataset_id'
+     uns: 'cell_type_names', 'dataset_id', 'dataset_name', 'dataset_url', 'dataset_reference', 'dataset_summary', 'dataset_description', 'dataset_organism', 'normalization_id'
 
 </div>
 
-Slot description:
+Data structure:
 
 <div class="small">
 
-| Slot                     | Type      | Description                                                               |
-|:-------------------------|:----------|:--------------------------------------------------------------------------|
-| `obsm["coordinates"]`    | `double`  | XY coordinates for each spot.                                             |
-| `layers["counts"]`       | `integer` | Raw counts.                                                               |
-| `uns["cell_type_names"]` | `string`  | Cell type names corresponding to columns of `proportions_pred` in output. |
-| `uns["dataset_id"]`      | `string`  | A unique identifier for the dataset.                                      |
+| Slot | Type | Description |
+|:---|:---|:---|
+| `obsm["spatial"]` | `double` | XY coordinates for each spot. |
+| `obsm["proportions_true"]` | `double` | True cell type proportions for each spot. |
+| `layers["counts"]` | `integer` | Raw counts. |
+| `uns["cell_type_names"]` | `string` | Cell type names corresponding to columns of `proportions`. |
+| `uns["dataset_id"]` | `string` | A unique identifier for the dataset. |
+| `uns["dataset_name"]` | `string` | Nicely formatted name. |
+| `uns["dataset_url"]` | `string` | (*Optional*) Link to the original source of the dataset. |
+| `uns["dataset_reference"]` | `string` | (*Optional*) Bibtex reference of the paper in which the dataset was published. |
+| `uns["dataset_summary"]` | `string` | Short description of the dataset. |
+| `uns["dataset_description"]` | `string` | Long description of the dataset. |
+| `uns["dataset_organism"]` | `string` | (*Optional*) The organism of the sample in the dataset. |
+| `uns["normalization_id"]` | `string` | Which normalization was used. |
 
 </div>
 
-## File format: Solution
+## File format: Spatial masked
 
 The spatial data file containing transcription profiles for each capture
-location, with true cell-type proportions for each spot / capture
-location.
+location, without cell-type proportions for each spot.
 
 Example file:
-`resources_test/spatial_decomposition/cxg_mouse_pancreas_atlas/solution.h5ad`
+`resources_test/task_spatial_decomposition/cxg_mouse_pancreas_atlas/spatial_masked.h5ad`
 
 Format:
 
 <div class="small">
 
     AnnData object
-     obsm: 'coordinates', 'proportions_true'
+     obsm: 'spatial'
      layers: 'counts'
-     uns: 'cell_type_names', 'dataset_id', 'dataset_name', 'dataset_url', 'dataset_reference', 'dataset_summary', 'dataset_description', 'dataset_organism', 'normalization_id'
+     uns: 'cell_type_names', 'dataset_id'
 
 </div>
 
-Slot description:
+Data structure:
 
 <div class="small">
 
-| Slot                         | Type      | Description                                                                    |
-|:-----------------------------|:----------|:-------------------------------------------------------------------------------|
-| `obsm["coordinates"]`        | `double`  | XY coordinates for each spot.                                                  |
-| `obsm["proportions_true"]`   | `double`  | True cell type proportions for each spot.                                      |
-| `layers["counts"]`           | `integer` | Raw counts.                                                                    |
-| `uns["cell_type_names"]`     | `string`  | Cell type names corresponding to columns of `proportions`.                     |
-| `uns["dataset_id"]`          | `string`  | A unique identifier for the dataset.                                           |
-| `uns["dataset_name"]`        | `string`  | Nicely formatted name.                                                         |
-| `uns["dataset_url"]`         | `string`  | (*Optional*) Link to the original source of the dataset.                       |
-| `uns["dataset_reference"]`   | `string`  | (*Optional*) Bibtex reference of the paper in which the dataset was published. |
-| `uns["dataset_summary"]`     | `string`  | Short description of the dataset.                                              |
-| `uns["dataset_description"]` | `string`  | Long description of the dataset.                                               |
-| `uns["dataset_organism"]`    | `string`  | (*Optional*) The organism of the sample in the dataset.                        |
-| `uns["normalization_id"]`    | `string`  | Which normalization was used.                                                  |
+| Slot | Type | Description |
+|:---|:---|:---|
+| `obsm["spatial"]` | `double` | XY coordinates for each spot. |
+| `layers["counts"]` | `integer` | Raw counts. |
+| `uns["cell_type_names"]` | `string` | Cell type names corresponding to columns of `proportions_pred` in output. |
+| `uns["dataset_id"]` | `string` | A unique identifier for the dataset. |
 
 </div>
 
 ## Component type: Control method
 
-Path:
-[`src/control_methods`](https://github.com/openproblems-bio/openproblems-v2/tree/main/src/control_methods)
-
 Quality control methods for verifying the pipeline.
 
 Arguments:
 
 <div class="small">
 
-| Name                     | Type   | Description                                                                                                                                          |
-|:-------------------------|:-------|:-----------------------------------------------------------------------------------------------------------------------------------------------------|
-| `--input_single_cell`    | `file` | The single-cell data file used as reference for the spatial data.                                                                                    |
-| `--input_spatial_masked` | `file` | The spatial data file containing transcription profiles for each capture location, without cell-type proportions for each spot.                      |
-| `--input_solution`       | `file` | The spatial data file containing transcription profiles for each capture location, with true cell-type proportions for each spot / capture location. |
-| `--output`               | `file` | (*Output*) Spatial data with estimated proportions.                                                                                                  |
+| Name | Type | Description |
+|:---|:---|:---|
+| `--input_single_cell` | `file` | The single-cell data file used as reference for the spatial data. |
+| `--input_spatial_masked` | `file` | The spatial data file containing transcription profiles for each capture location, without cell-type proportions for each spot. |
+| `--input_solution` | `file` | The spatial data file containing transcription profiles for each capture location, with true cell-type proportions for each spot / capture location. |
+| `--output` | `file` | (*Output*) Spatial data with estimated proportions. |
 
 </div>
 
 ## Component type: Method
 
-Path:
-[`src/methods`](https://github.com/openproblems-bio/openproblems-v2/tree/main/src/methods)
-
 A spatial composition method.
 
 Arguments:
 
 <div class="small">
 
-| Name                     | Type   | Description                                                                                                                     |
-|:-------------------------|:-------|:--------------------------------------------------------------------------------------------------------------------------------|
-| `--input_single_cell`    | `file` | The single-cell data file used as reference for the spatial data.                                                               |
+| Name | Type | Description |
+|:---|:---|:---|
+| `--input_single_cell` | `file` | The single-cell data file used as reference for the spatial data. |
 | `--input_spatial_masked` | `file` | The spatial data file containing transcription profiles for each capture location, without cell-type proportions for each spot. |
-| `--output`               | `file` | (*Output*) Spatial data with estimated proportions.                                                                             |
+| `--output` | `file` | (*Output*) Spatial data with estimated proportions. |
 
 </div>
 
 ## Component type: Metric
 
-Path:
-[`src/metrics`](https://github.com/openproblems-bio/openproblems-v2/tree/main/src/metrics)
-
 A spatial decomposition metric.
 
 Arguments:
 
 <div class="small">
 
-| Name               | Type   | Description                                                                                                                                          |
-|:-------------------|:-------|:-----------------------------------------------------------------------------------------------------------------------------------------------------|
-| `--input_method`   | `file` | Spatial data with estimated proportions.                                                                                                             |
+| Name | Type | Description |
+|:---|:---|:---|
+| `--input_method` | `file` | Spatial data with estimated proportions. |
 | `--input_solution` | `file` | The spatial data file containing transcription profiles for each capture location, with true cell-type proportions for each spot / capture location. |
-| `--output`         | `file` | (*Output*) Metric score file.                                                                                                                        |
+| `--output` | `file` | (*Output*) Metric score file. |
 
 </div>
 
@@ -309,35 +298,31 @@ Arguments:
 Spatial data with estimated proportions.
 
 Example file:
-`resources_test/spatial_decomposition/cxg_mouse_pancreas_atlas/output.h5ad`
-
-Description:
-
-Spatial data file with estimated cell type proportions.
+`resources_test/task_spatial_decomposition/cxg_mouse_pancreas_atlas/output.h5ad`
 
 Format:
 
 <div class="small">
 
     AnnData object
-     obsm: 'coordinates', 'proportions_pred'
+     obsm: 'spatial', 'proportions_pred'
      layers: 'counts'
      uns: 'cell_type_names', 'dataset_id', 'method_id'
 
 </div>
 
-Slot description:
+Data structure:
 
 <div class="small">
 
-| Slot                       | Type      | Description                                                |
-|:---------------------------|:----------|:-----------------------------------------------------------|
-| `obsm["coordinates"]`      | `double`  | XY coordinates for each spot.                              |
-| `obsm["proportions_pred"]` | `double`  | Estimated cell type proportions for each spot.             |
-| `layers["counts"]`         | `integer` | Raw counts.                                                |
-| `uns["cell_type_names"]`   | `string`  | Cell type names corresponding to columns of `proportions`. |
-| `uns["dataset_id"]`        | `string`  | A unique identifier for the dataset.                       |
-| `uns["method_id"]`         | `string`  | A unique identifier for the method.                        |
+| Slot | Type | Description |
+|:---|:---|:---|
+| `obsm["spatial"]` | `double` | XY coordinates for each spot. |
+| `obsm["proportions_pred"]` | `double` | Estimated cell type proportions for each spot. |
+| `layers["counts"]` | `integer` | Raw counts. |
+| `uns["cell_type_names"]` | `string` | Cell type names corresponding to columns of `proportions`. |
+| `uns["dataset_id"]` | `string` | A unique identifier for the dataset. |
+| `uns["method_id"]` | `string` | A unique identifier for the method. |
 
 </div>
 
@@ -346,7 +331,7 @@ Slot description:
 Metric score file.
 
 Example file:
-`resources_test/spatial_decomposition/cxg_mouse_pancreas_atlas/score.h5ad`
+`resources_test/task_spatial_decomposition/cxg_mouse_pancreas_atlas/score.h5ad`
 
 Format:
 
@@ -357,16 +342,61 @@ Format:
 
 </div>
 
-Slot description:
+Data structure:
 
 <div class="small">
 
-| Slot                   | Type     | Description                                                                                  |
-|:-----------------------|:---------|:---------------------------------------------------------------------------------------------|
-| `uns["dataset_id"]`    | `string` | A unique identifier for the dataset.                                                         |
-| `uns["method_id"]`     | `string` | A unique identifier for the method.                                                          |
-| `uns["metric_ids"]`    | `string` | One or more unique metric identifiers.                                                       |
+| Slot | Type | Description |
+|:---|:---|:---|
+| `uns["dataset_id"]` | `string` | A unique identifier for the dataset. |
+| `uns["method_id"]` | `string` | A unique identifier for the method. |
+| `uns["metric_ids"]` | `string` | One or more unique metric identifiers. |
 | `uns["metric_values"]` | `double` | The metric values obtained for the given prediction. Must be of same length as ‘metric_ids’. |
 
 </div>
 
+## File format: Common Dataset
+
+A subset of the common dataset.
+
+Example file:
+`resources_test/task_spatial_decomposition/cxg_mouse_pancreas_atlas/simulated_dataset.h5ad`
+
+Format:
+
+<div class="small">
+
+    AnnData object
+     obs: 'cell_type', 'batch'
+     var: 'hvg', 'hvg_score'
+     obsm: 'X_pca', 'spatial', 'proportions_true'
+     layers: 'counts'
+     uns: 'cell_type_names', 'dataset_id', 'dataset_name', 'dataset_url', 'dataset_reference', 'dataset_summary', 'dataset_description', 'dataset_organism'
+
+</div>
+
+Data structure:
+
+<div class="small">
+
+| Slot | Type | Description |
+|:---|:---|:---|
+| `obs["cell_type"]` | `string` | Cell type label IDs. |
+| `obs["batch"]` | `string` | A batch identifier. This label is very context-dependent and may be a combination of the tissue, assay, donor, etc. |
+| `var["hvg"]` | `boolean` | Whether or not the feature is considered to be a ‘highly variable gene’. |
+| `var["hvg_score"]` | `double` | A ranking of the features by hvg. |
+| `obsm["X_pca"]` | `double` | The resulting PCA embedding. |
+| `obsm["spatial"]` | `double` | (*Optional*) XY coordinates for each spot. |
+| `obsm["proportions_true"]` | `double` | (*Optional*) True cell type proportions for each spot. |
+| `layers["counts"]` | `integer` | Raw counts. |
+| `uns["cell_type_names"]` | `string` | (*Optional*) Cell type names corresponding to values in `cell_type`. |
+| `uns["dataset_id"]` | `string` | A unique identifier for the dataset. |
+| `uns["dataset_name"]` | `string` | Nicely formatted name. |
+| `uns["dataset_url"]` | `string` | (*Optional*) Link to the original source of the dataset. |
+| `uns["dataset_reference"]` | `string` | (*Optional*) Bibtex reference of the paper in which the dataset was published. |
+| `uns["dataset_summary"]` | `string` | Short description of the dataset. |
+| `uns["dataset_description"]` | `string` | Long description of the dataset. |
+| `uns["dataset_organism"]` | `string` | (*Optional*) The organism of the sample in the dataset. |
+
+</div>
+