From 45a276da50d8b5d0b1f412f4eba491b741fc51d4 Mon Sep 17 00:00:00 2001
From: Tobi Alegbe <obba2@cam.ac.uk>
Date: Sun, 21 Apr 2024 21:11:59 +0100
Subject: [PATCH 1/4] Fix celltypist model path not working

---
 bin/run_celltypist.py                      | 3 +--
 modules/nf-core/modules/celltypist/main.nf | 4 ++--
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/bin/run_celltypist.py b/bin/run_celltypist.py
index 276974b2..2768e3f5 100755
--- a/bin/run_celltypist.py
+++ b/bin/run_celltypist.py
@@ -145,7 +145,7 @@ def run_celltypist(samplename, filtered_matrix_h5, celltypist_model,
     
     # Indeed, the `model` argument defaults to `Immune_All_Low.pkl`.
     logging.info("celltypist_model: " + celltypist_model)
-    celltypist_model1 = celltypist_model.split('.')[0]
+    celltypist_model1 = os.path.splitext(os.path.basename(celltypist_model))[0]
     # print(os.listdir('/tmp/.celltypist/data/models'))
     model = models.Model.load(model = celltypist_model ) # model = 'Immune_All_Low.pkl')
     model.description
@@ -172,7 +172,6 @@ def run_celltypist(samplename, filtered_matrix_h5, celltypist_model,
     
     adata = predictions.to_adata()
 
-    
     predictions.to_table(folder = output_dir, prefix = samplename + '___'+celltypist_model1+'___')
     Data = adata.obs
     Data= Data.drop('cell_barcode',axis=1)
diff --git a/modules/nf-core/modules/celltypist/main.nf b/modules/nf-core/modules/celltypist/main.nf
index b55bdb33..b970bd8c 100755
--- a/modules/nf-core/modules/celltypist/main.nf
+++ b/modules/nf-core/modules/celltypist/main.nf
@@ -1,6 +1,6 @@
 
 process CELLTYPIST {
-    tag "${samplename}"
+    tag "${model}_${sample}"
     label 'process_medium_memory'
     publishDir "${params.outdir}/celltype/celltypist/${model}/${sample}/", mode: "${params.celltypist.copy_mode}", overwrite: true,
 	  saveAs: {filename -> filename.replaceFirst("outputs/","").replaceFirst("figures/","") }
@@ -27,7 +27,7 @@ process CELLTYPIST {
       tuple val(sample), path("outputs/plot_prob/*_*.pdf"), emit: sample_plots_prob_pdf
 
     script:
-      model="${celltypist_model}".replaceFirst(".pkl","")
+      model="${celltypist_model}".replaceAll(/^.*[\\/]/, "").replaceFirst(/\\..+$/, "")
 
       filtered_matrix_h5_path = file("${filtered_matrix_h5}/../filtered_feature_bc_matrix.h5")
       if (filtered_matrix_h5_path.exists()){

From 7d25af19748702473a252ec78239551199cea8d0 Mon Sep 17 00:00:00 2001
From: Matiss <dr.matiss.ozols@gmail.com>
Date: Tue, 23 Apr 2024 13:57:05 +0100
Subject: [PATCH 2/4] Update README.md

---
 README.md | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/README.md b/README.md
index d42296c1..0091a3b8 100755
--- a/README.md
+++ b/README.md
@@ -4,8 +4,7 @@
 
 ## Introduction
 
-**nf-core/yascp** is a bioinformatics best-practice analysis pipeline for deconvolution, qc, clustering, integration of a single cell datasets.
-This is a large scale single-cell pipeline developed initially for processing Cardinal project samples, however it is applicable to any other scRNA analysis. The pipeline has been inspired by [deconvolution](https://github.com/wtsi-hgi/nf_scrna_deconvolution.git ), [cellbender](https://github.com/wtsi-hgi/nf_cellbender ) and [qc](https://github.com/wtsi-hgi/nf_qc_cluster/tree/main ) pipelines initially developed in Anderson lab. 
+**nf-core/yascp** is a bioinformatics best-practice analysis pipeline tailored for deconvolution, quality control, clustering, and integration of single-cell datasets. Developed under the leadership of N.Soranzo at the Human Genetics Informatics (HGI), this large-scale single-cell pipeline was originally crafted for the Cardinal project (profiling UKBB and ELGH participants) but is versatile enough for broader scRNA analysis applications. The foundational ideas were inspired by earlier pipelines from Anderson lab but has been expanded, specifically those for [deconvolution](https://github.com/wtsi-hgi/nf_scrna_deconvolution.git), [cellbender](https://github.com/wtsi-hgi/nf_cellbender), and [quality control and clustering](https://github.com/wtsi-hgi/nf_qc_cluster/tree/main). This ensures a robust integration of proven methodologies tailored to meet the demands of expansive single-cell data analysis.
 
 Input requires a tsv seperated file [(please read detailed documentation here)](https://github.com/wtsi-hgi/yascp/tree/yascp_docs) with paths and if running in an genotype  additional input is required to be provided in an input.nf file pointing to the vcf location. This pipeline is designed to be used any large scale single cell experiments.
 

From 891baa6c8533a311ba851fb1543462780c16abda Mon Sep 17 00:00:00 2001
From: Matiss <dr.matiss.ozols@gmail.com>
Date: Tue, 23 Apr 2024 16:22:54 +0100
Subject: [PATCH 3/4] Update README.md

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 0091a3b8..4583e3f0 100755
--- a/README.md
+++ b/README.md
@@ -4,7 +4,7 @@
 
 ## Introduction
 
-**nf-core/yascp** is a bioinformatics best-practice analysis pipeline tailored for deconvolution, quality control, clustering, and integration of single-cell datasets. Developed under the leadership of N.Soranzo at the Human Genetics Informatics (HGI), this large-scale single-cell pipeline was originally crafted for the Cardinal project (profiling UKBB and ELGH participants) but is versatile enough for broader scRNA analysis applications. The foundational ideas were inspired by earlier pipelines from Anderson lab but has been expanded, specifically those for [deconvolution](https://github.com/wtsi-hgi/nf_scrna_deconvolution.git), [cellbender](https://github.com/wtsi-hgi/nf_cellbender), and [quality control and clustering](https://github.com/wtsi-hgi/nf_qc_cluster/tree/main). This ensures a robust integration of proven methodologies tailored to meet the demands of expansive single-cell data analysis.
+**nf-core/yascp** is a bioinformatics best-practice analysis pipeline tailored for deconvolution, quality control, clustering, and integration of single-cell datasets. Developed under the leadership of N.Soranzo and Human Genetics Informatics (HGI), this large-scale single-cell pipeline was originally crafted for the Cardinal project (profiling UKBB and ELGH participants) but is versatile enough for broader scRNA analysis applications. The foundational ideas were inspired by earlier pipelines from Anderson lab but has been expanded, specifically those for [deconvolution](https://github.com/wtsi-hgi/nf_scrna_deconvolution.git), [cellbender](https://github.com/wtsi-hgi/nf_cellbender), and [quality control and clustering](https://github.com/wtsi-hgi/nf_qc_cluster/tree/main). This ensures a robust integration of proven methodologies tailored to meet the demands of expansive single-cell data analysis.
 
 Input requires a tsv seperated file [(please read detailed documentation here)](https://github.com/wtsi-hgi/yascp/tree/yascp_docs) with paths and if running in an genotype  additional input is required to be provided in an input.nf file pointing to the vcf location. This pipeline is designed to be used any large scale single cell experiments.
 

From 245d02f1519ad31893e12797830fa41d83352f9e Mon Sep 17 00:00:00 2001
From: Tobi Alegbe <obba2@cam.ac.uk>
Date: Thu, 25 Apr 2024 10:04:41 +0100
Subject: [PATCH 4/4] Slight change in regex back to original

---
 modules/nf-core/modules/celltypist/main.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/nf-core/modules/celltypist/main.nf b/modules/nf-core/modules/celltypist/main.nf
index b970bd8c..d063eede 100755
--- a/modules/nf-core/modules/celltypist/main.nf
+++ b/modules/nf-core/modules/celltypist/main.nf
@@ -27,7 +27,7 @@ process CELLTYPIST {
       tuple val(sample), path("outputs/plot_prob/*_*.pdf"), emit: sample_plots_prob_pdf
 
     script:
-      model="${celltypist_model}".replaceAll(/^.*[\\/]/, "").replaceFirst(/\\..+$/, "")
+      model="${celltypist_model}".replaceAll(/^.*[\\/]/, "").replaceFirst(".pkl","")
 
       filtered_matrix_h5_path = file("${filtered_matrix_h5}/../filtered_feature_bc_matrix.h5")
       if (filtered_matrix_h5_path.exists()){