Skip to content

Commit

Permalink
Merge pull request #789 from UTSouthwesternDSSR/main
Browse files Browse the repository at this point in the history
celltype/tumor annotation for non-ETP T-ALL (SCPCP000003)
  • Loading branch information
jaclyn-taroni authored Oct 8, 2024
2 parents 2eb3fc0 + 44e7bf8 commit ff20cd2
Show file tree
Hide file tree
Showing 33 changed files with 563 additions and 47 deletions.
32 changes: 16 additions & 16 deletions .github/workflows/docker_cell-type-nonETP-ALL-03.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,22 +13,22 @@ concurrency:
cancel-in-progress: true

on:
# pull_request:
# branches:
# - main
# paths:
# - "analyses/cell-type-nonETP-ALL-03/Dockerfile"
# - "analyses/cell-type-nonETP-ALL-03/.dockerignore"
# - "analyses/cell-type-nonETP-ALL-03/renv.lock"
# - "analyses/cell-type-nonETP-ALL-03/conda-lock.yml"
# push:
# branches:
# - main
# paths:
# - "analyses/cell-type-nonETP-ALL-03/Dockerfile"
# - "analyses/cell-type-nonETP-ALL-03/.dockerignore"
# - "analyses/cell-type-nonETP-ALL-03/renv.lock"
# - "analyses/cell-type-nonETP-ALL-03/conda-lock.yml"
pull_request:
branches:
- main
paths:
- "analyses/cell-type-nonETP-ALL-03/Dockerfile"
- "analyses/cell-type-nonETP-ALL-03/.dockerignore"
- "analyses/cell-type-nonETP-ALL-03/renv.lock"
- "analyses/cell-type-nonETP-ALL-03/conda-lock.yml"
push:
branches:
- main
paths:
- "analyses/cell-type-nonETP-ALL-03/Dockerfile"
- "analyses/cell-type-nonETP-ALL-03/.dockerignore"
- "analyses/cell-type-nonETP-ALL-03/renv.lock"
- "analyses/cell-type-nonETP-ALL-03/conda-lock.yml"
workflow_dispatch:
inputs:
push-ecr:
Expand Down
50 changes: 40 additions & 10 deletions .github/workflows/run_cell-type-nonETP-ALL-03.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,20 +19,23 @@ concurrency:

on:
workflow_dispatch:
# workflow_call:
# pull_request:
# branches:
# - main
# paths:
# - analyses/cell-type-nonETP-ALL-03/**
# - "!analyses/cell-type-nonETP-ALL-03/Dockerfile"
# - "!analyses/cell-type-nonETP-ALL-03/.dockerignore"
# - .github/workflows/run_cell-type-nonETP-ALL-03.yml
workflow_call:
pull_request:
branches:
- main
paths:
- analyses/cell-type-nonETP-ALL-03/**
- "!analyses/cell-type-nonETP-ALL-03/Dockerfile"
- "!analyses/cell-type-nonETP-ALL-03/.dockerignore"
- .github/workflows/run_cell-type-nonETP-ALL-03.yml

jobs:
run-module:
if: github.repository_owner == 'AlexsLemonade'
runs-on: ubuntu-latest
defaults:
run:
shell: bash -el {0}

steps:
- name: Checkout repo
Expand All @@ -47,16 +50,43 @@ jobs:
- name: Set up pandoc
uses: r-lib/actions/setup-pandoc@v2

- name: Install system dependencies
run: |
sudo apt-get install -y libcurl4-openssl-dev \
libhdf5-dev \
libglpk40 \
libxml2-dev \
libfontconfig1-dev \
libharfbuzz-dev \
libfribidi-dev \
libtiff5-dev
- name: Set up renv
uses: r-lib/actions/setup-renv@v2
with:
working-directory: ${{ env.MODULE_PATH }}

- name: Set up conda
# Note that this creates and activates an environment named 'test' by default
uses: conda-incubator/setup-miniconda@v3
with:
miniforge-version: latest

- name: Install conda-lock and activate locked conda environment
run: |
conda install conda-lock
conda-lock install --name openscpca-cell-type-nonETP-ALL-03 ${MODULE_PATH}/conda-lock.yml
# Update this step as needed to download the desired data
- name: Download test data
run: ./download-data.py --test-data --format SCE
run: |
./download-data.py --projects SCPCP000003 --test-data --format SCE
./download-results.py --projects SCPCP000003 --test-data --modules doublet-detection
- name: Run analysis module
run: |
cd ${MODULE_PATH}
# run module script(s) here
Rscript scripts/00-01_processing_rds.R
Rscript scripts/02-03_annotation.R
Rscript scripts/multipanel_plot.R
4 changes: 0 additions & 4 deletions analyses/cell-type-nonETP-ALL-03/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,6 @@
/results/*
!/results/README.md

#Plots should not be committed
/plots/*
!/plots/.gitkeep

# Ignore the scratch directory (but keep it present)
/scratch/*
!/scratch/.gitkeep
4 changes: 3 additions & 1 deletion analyses/cell-type-nonETP-ALL-03/Azimuth_BM_level1.csv
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
tissueType,cellName,geneSymbolmore1,geneSymbolmore2,fullName,ontologyID
tissueType,cellName,ensembl_id_positive_marker,ensembl_id_negative_marker,fullName,ontologyID
Immune system,B,"ENSG00000163534,ENSG00000132704,ENSG00000012124,ENSG00000138639,ENSG00000153064,ENSG00000156738,ENSG00000116191,ENSG00000104894,ENSG00000133789,ENSG00000105369",,B cell,CL_0000945
Immune system,CD4 T,"ENSG00000172005,ENSG00000138795,ENSG00000168685,ENSG00000081059,ENSG00000227507,ENSG00000104660,ENSG00000198851,ENSG00000167286,ENSG00000160654,ENSG00000139193",,CD4 T cell,CL_0000624
Immune system,CD8 T,"ENSG00000172116,ENSG00000153563,ENSG00000184613,ENSG00000167286,ENSG00000198851,ENSG00000160307,ENSG00000100450,ENSG00000160654,ENSG00000227191,ENSG00000271503",,CD8 T cell,CL_0000625
Expand All @@ -13,4 +13,6 @@ Immune system,Late Eryth,"ENSG00000196188,ENSG00000112212,ENSG00000204010,ENSG00
Immune system,Plasma,"ENSG00000115884,ENSG00000222037,ENSG00000211640,ENSG00000048462,ENSG00000211673,ENSG00000240505,ENSG00000211685,ENSG00000167476,ENSG00000143297,ENSG00000243466",,Plasma cell,CL_0000786
Immune system,Platelet,"ENSG00000150681,ENSG00000187699,ENSG00000088726,ENSG00000169704,ENSG00000163737,ENSG00000163736,ENSG00000153071,ENSG00000113140,ENSG00000176783,ENSG00000124491",,Platelet,CL_0000233
Immune system,Stromal,"ENSG00000115461,ENSG00000047457,ENSG00000091513,ENSG00000011465,ENSG00000139329,ENSG00000164692,ENSG00000147571,ENSG00000041982,ENSG00000152583,ENSG00000112175",,Stromal cell,CL_0000499
Immune system,Blast,"ENSG00000002586,ENSG00000173762,ENSG00000124766,ENSG00000177606,ENSG00000117632,ENSG00000123416,ENSG00000167286",,Blast cell,CL_0000055
Immune system,Cancer,"ENSG00000026508,ENSG00000119888,ENSG00000141736,ENSG00000086205,ENSG00000111057,ENSG00000007062",,Cancer cell,CL_0001064
Immune system,Pre Eryth,"ENSG00000081237,ENSG00000170180,ENSG00000175792,ENSG00000072274,ENSG00000110195,ENSG00000135218,ENSG00000115232,ENSG00000244734,ENSG00000223609,ENSG00000133742",,Erythroid-like and erythroid precursor cell,CL_0000038
59 changes: 57 additions & 2 deletions analyses/cell-type-nonETP-ALL-03/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,10 +1,65 @@
# A template docker file for creating a new analysis
FROM ubuntu:22.04

FROM bioconductor/r-ver:3.19

# Labels following the Open Containers Initiative (OCI) recommendations
# For more information, see https://specs.opencontainers.org/image-spec/annotations/?v=v1.0.1
LABEL org.opencontainers.image.title="openscpca/cell-type-nonETP-ALL-03"
LABEL org.opencontainers.image.description="Docker image for the OpenScPCA analysis module 'cell-type-nonETP-ALL-03'"
LABEL org.opencontainers.image.authors="OpenScPCA [email protected]"
LABEL org.opencontainers.image.source="https://github.com/AlexsLemonade/OpenScPCA-analysis/tree/main/templates/analysis-module"

# Set an environment variable to allow checking if we are in an OpenScPCA container
ENV OPENSCPCA_DOCKER=TRUE

# set a name for the conda environment
ARG ENV_NAME=openscpca-cell-type-nonETP-ALL-03

# set environment variables to install conda
ENV PATH="/opt/conda/bin:${PATH}"

# Install conda via miniforge
# adapted from https://github.com/conda-forge/miniforge-images/blob/master/ubuntu/Dockerfile
RUN curl -L "https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-$(uname)-$(uname -m).sh" -o /tmp/miniforge.sh \
&& bash /tmp/miniforge.sh -b -p /opt/conda \
&& rm -f /tmp/miniforge.sh \
&& conda clean --tarballs --index-cache --packages --yes \
&& find /opt/conda -follow -type f -name '*.a' -delete \
&& find /opt/conda -follow -type f -name '*.pyc' -delete \
&& conda clean --force-pkgs-dirs --all --yes

# Activate conda environments in bash
RUN ln -s /opt/conda/etc/profile.d/conda.sh /etc/profile.d/conda.sh \
&& echo ". /opt/conda/etc/profile.d/conda.sh" >> /etc/skel/.bashrc \
&& echo ". /opt/conda/etc/profile.d/conda.sh" >> ~/.bashrc

# Install conda-lock
RUN conda install --channel=conda-forge --name=base conda-lock \
&& conda clean --all --yes

# Install renv
RUN Rscript -e "install.packages('renv')"

# Disable the renv cache to install packages directly into the R library
ENV RENV_CONFIG_CACHE_ENABLED=FALSE

# Copy conda lock file to image
COPY conda-lock.yml conda-lock.yml

# restore from conda-lock.yml file and clean up to reduce image size
RUN conda-lock install -n ${ENV_NAME} conda-lock.yml \
&& conda clean --all --yes

# Copy the renv.lock file from the host environment to the image
COPY renv.lock renv.lock

# restore from renv.lock file and clean up to reduce image size
RUN Rscript -e 'renv::restore()' \
&& rm -rf ~/.cache/R/renv \
&& rm -rf /tmp/downloaded_packages \
&& rm -rf /tmp/Rtmp*

# Activate conda environment on bash launch
RUN echo "conda activate ${ENV_NAME}" >> ~/.bashrc

# Set CMD to bash to activate the environment when launching
CMD ["/bin/bash"]
14 changes: 11 additions & 3 deletions analyses/cell-type-nonETP-ALL-03/README.md
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
# Non-ETP T-ALL Annotation (SCPCP000003)

This analysis module will include code to annotate cell types in non-ETP T-ALL from SCPCP000003 (n=11) present on the ScPCA portal.
This analysis module will include code to annotate cell types and tumor/normal status in non-ETP T-ALL from SCPCP000003 (n=11) present on the ScPCA portal.

## Description

We first aim to annotate the cell types in non-ETP T-ALL, and use the annotated B cells in the sample as the "normal" cells to identify tumor cells, since T-ALL is caused by the clonal proliferation of immature T-cell [<https://www.nature.com/articles/s41375-018-0127-8>].

- We use the cell type marker (`Azimuth_BM_level1.csv`) from [Azimuth Human Bone Marrow reference](https://azimuth.hubmapconsortium.org/references/#Human%20-%20Bone%20Marrow). In total, there are 14 cell types: B, CD4T, CD8T, Other T, DC, Monocytes, Macrophages, NK, Early Erythrocytes, Late Erythrocytes, Plasma, Platelet, Stromal, and Hematopoietic Stem and Progenitor Cells (HSPC). Based on the exploratory analysis, we believe that most of the cells in these samples do not express adequate markers to be distinguished at finer cell type level (eg. naive vs memory, CD14 vs CD16 etc.), and majority of the cells should belong to T-cells. In addition, we include the marker genes for cancer cell in immune system from [ScType](https://sctype.app/database.php) database.
- We use the cell type marker (`Azimuth_BM_level1.csv`) from [Azimuth Human Bone Marrow reference](https://azimuth.hubmapconsortium.org/references/#Human%20-%20Bone%20Marrow). In total, there are 14 cell types: B, CD4T, CD8T, Other T, DC, Monocytes, Macrophages, NK, Early Erythrocytes, Late Erythrocytes, Plasma, Platelet, Stromal, and Hematopoietic Stem and Progenitor Cells (HSPC). Based on the exploratory analysis, we believe that most of the cells in these samples do not express adequate markers to be distinguished at finer cell type level (eg. naive vs memory, CD14 vs CD16 etc.), and majority of the cells should belong to T-cells. In addition, we include the marker genes for blast cell [[Bhasin et al. (2023)](https://www.nature.com/articles/s41598-023-39152-z)] as well as erythroid precursor and cancer cell in immune system [[ScType](https://sctype.app/database.php) database].

- Since ScType annotates cell types at cluster level using marker genes provided by user or from the built-in database, we employ [self-assembling manifold](https://github.com/atarashansky/self-assembling-manifold/tree/master) (SAM) algorithm, a soft feature selection strategy for better separation of homogeneous cell types.

Expand Down Expand Up @@ -42,7 +42,7 @@ The `scripts/00-01_processing_rds.R` requires the processed SingleCellExperiment
../../download-results.py --projects SCPCP000003 --modules doublet-detection
```

As for the annotation, `scripts/02-03_annotation.R` requires cell type marker gene file, `Azimuth_BM_level1.csv`, as an input for ScType. This excel file contains a list of positive marker genes in Ensembl ID under `geneSymbolmore1` for each cell type, and *TMEM56* is not detected in our dataset, thus it is being removed as part of the markers for Late Eryth. As of now, there is no negative marker genes provided under `geneSymbolmore2`.
As for the annotation, `scripts/02-03_annotation.R` requires cell type marker gene file, `Azimuth_BM_level1.csv`, as an input for ScType. This excel file contains a list of positive marker genes in Ensembl ID under `ensembl_id_positive_marker` for each cell type; *TMEM56* and *CD235a* are not detected in our dataset, thus they are being removed as part of the markers for Late Eryth and Pre Eryth respectively. As of now, there is no negative marker genes provided under `ensembl_id_negative_marker`.

## Output files

Expand All @@ -52,6 +52,14 @@ Running `scripts/00-01_processing_rds.R` will generate two types of output:

- umap plots showing leiden clustering in `plots/`

Running `scripts/02-03_annotation.R` will generate several outputs:

- updated `rds` objects in `scratch/`

- umap plots showing cell type and CopyKat prediction (if there is any) and dotplots showing the features added with `AddModuleScore()` in `plots/`

- ScType results of top 10 possible cell types in a cluster (`_sctype_top10_celltypes_perCluster.txt`) and metadata file tabulating leiden cluster, cell type, low confidence cell type, and CopyKat prediction for each cell (`_metadata.txt`) in `results/`

## Software requirements

To run the analysis, execute the Rscript in R or Rstudio (version 4.4.0). The main libraries used are:
Expand Down
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading

0 comments on commit ff20cd2

Please sign in to comment.