From a75153a2dd2d6f5e290edfa2dd0899fd725f0f25 Mon Sep 17 00:00:00 2001
From: Stef Piatek <stefan.piatek@nhs.net>
Date: Fri, 10 May 2019 10:05:09 +0100
Subject: [PATCH 1/2] Add panelcn.MOPS Dockerfile

Also break up copwriter Dockerfile steps so that they can be reused
---
 docker-images/copywriter/Dockerfile   |  5 +++--
 docker-images/panelcn-mops/Dockerfile | 12 ++++++++++++
 2 files changed, 15 insertions(+), 2 deletions(-)
 create mode 100755 docker-images/panelcn-mops/Dockerfile

diff --git a/docker-images/copywriter/Dockerfile b/docker-images/copywriter/Dockerfile
index ac63f8d..6a13fdc 100755
--- a/docker-images/copywriter/Dockerfile
+++ b/docker-images/copywriter/Dockerfile
@@ -4,7 +4,8 @@ RUN apt-get update \
     && apt-get install -y --no-install-recommends libcurl4-openssl-dev libxml2-dev libssl-dev \
     && rm -rf /var/lib/apt/lists/*
 
-RUN R -e "install.packages(c('optparse', 'dplyr', 'tidyr', 'stringr', 'BiocManager')); \
-    BiocManager::install('CopywriteR', version = '3.8');" 
+RUN R -e "install.packages(c('optparse', 'dplyr', 'tidyr', 'stringr', 'BiocManager'));"
+
+RUN R -e "BiocManager::install('CopywriteR', version = '3.8');" 
 
 CMD ["bash"]
diff --git a/docker-images/panelcn-mops/Dockerfile b/docker-images/panelcn-mops/Dockerfile
new file mode 100755
index 0000000..a340618
--- /dev/null
+++ b/docker-images/panelcn-mops/Dockerfile
@@ -0,0 +1,12 @@
+FROM r-base:3.5.1  
+
+RUN apt-get update \
+    && apt-get install -y --no-install-recommends libcurl4-openssl-dev libxml2-dev libssl-dev \
+    && rm -rf /var/lib/apt/lists/*
+
+RUN R -e "install.packages(c('optparse', 'dplyr', 'tidyr', 'stringr', 'BiocManager'));"
+
+RUN R -e "install.packages(c('purrr')); \
+    BiocManager::install('panelcn.mops', version = '3.8');" 
+
+CMD ["bash"]

From 7e25fb7f3590a6fdb12db543a68b7d6c47e192da Mon Sep 17 00:00:00 2001
From: Stef Piatek <stefan.piatek@nhs.net>
Date: Fri, 10 May 2019 10:07:23 +0100
Subject: [PATCH 2/2] Add panelcn.MOPS to callers

---
 .../panelcn_mops/panelcn_mops_runner.R        | 62 ++++++++++++++++
 cnv_patissier.py                              | 27 +++++--
 scripts/panelcn_mops.py                       | 70 +++++++++++++++++++
 .../output_parsing/panelcn_mops/results.txt   |  5 ++
 tests/unit/test_panelcn_mops.py               | 61 ++++++++++++++++
 5 files changed, 218 insertions(+), 7 deletions(-)
 create mode 100755 cnv-caller-resources/panelcn_mops/panelcn_mops_runner.R
 create mode 100755 scripts/panelcn_mops.py
 create mode 100755 tests/test_files/output_parsing/panelcn_mops/results.txt
 create mode 100755 tests/unit/test_panelcn_mops.py

diff --git a/cnv-caller-resources/panelcn_mops/panelcn_mops_runner.R b/cnv-caller-resources/panelcn_mops/panelcn_mops_runner.R
new file mode 100755
index 0000000..63f414c
--- /dev/null
+++ b/cnv-caller-resources/panelcn_mops/panelcn_mops_runner.R
@@ -0,0 +1,62 @@
+library(dplyr)
+library(optparse)
+library(panelcn.mops)
+library(purrr)
+
+
+detect_cnvs <- function(sample_number) {
+  "
+    Function to detect CNV for each column of unknown Granges object
+    Takes sample_number 
+    Returns table of only positive CNVs with sample name
+  "
+  # Set up test data
+  test_and_control <- unknowns[, sample_number]
+  elementMetadata(test_and_control) <- cbind(elementMetadata(test_and_control), elementMetadata(normals))
+  # Detect CNVs
+  result_list <- runPanelcnMops(test_and_control, countWindows = count_windows, selectedGenes = selected_gene, maxControls = 30)
+  # Output results from S4 object to table
+  file_names <- colnames(elementMetadata(unknowns))
+  raw_results <- createResultTable(
+    resultlist = result_list, XandCB = test_and_control,
+    countWindows = count_windows,
+    selectedGenes = selected_gene,
+    sampleNames = file_names
+  )
+  
+  # Process table for output
+  cnv_results <- raw_results[[1]] %>%
+    filter(CN != "CN2") %>%
+    mutate(Sample = unknown_samples$sample_name[sample_number]) %>%
+    return()
+}
+
+
+# Parse options
+option_list <- list(
+  make_option(c("--output-path"), type = "character"),
+  make_option(c("--gene"), type = "character"),
+  make_option(c("--chrom-prefix"), type = "character", default = NULL)
+)
+opt <- parse_args(OptionParser(option_list = option_list), convert_hyphens_to_underscores = TRUE)
+
+# Set up parameters
+bed <- file.path(opt$output_path, "capture.bed")
+split_bed <- file.path(opt$output_path, "capture_split.bed")
+splitROIs(bed, split_bed)
+
+all_samples <- read.table(file.path(opt$output_path, "samples.tsv"), header = TRUE, stringsAsFactors = FALSE)
+unknown_samples <- all_samples[all_samples$sample_type == "unknown", ]
+normal_samples <- all_samples[all_samples$sample_type == "normal_panel", ]
+selected_gene <- opt$gene
+
+# Get counts
+count_windows <- getWindows(split_bed)
+normals <- countBamListInGRanges(countWindows = count_windows, bam.files = normal_samples$bam_path, read.width = 150)
+unknowns <- countBamListInGRanges(countWindows = count_windows, bam.files = unknown_samples$bam_path, read.width = 150)
+
+# Run CNV calling
+called_cnvs <- map_dfr(.x = seq(1, nrow(unknown_samples)), .f = detect_cnvs)
+
+# Save results
+write.table(called_cnvs, file = file.path(opt$output_path, "calls.tsv"), sep = "\t", quote = FALSE, row.names = FALSE)
diff --git a/cnv_patissier.py b/cnv_patissier.py
index fe22c55..b99b311 100755
--- a/cnv_patissier.py
+++ b/cnv_patissier.py
@@ -3,14 +3,26 @@
 import pathlib
 
 from scripts.db_session import DbSession
-from scripts import utils, copywriter, codex2, cnv_kit, decon, excavator2, exome_depth, gatk, savvy_cnv, xhmm
-
+from scripts import (
+    utils,
+    copywriter,
+    codex2,
+    cnv_kit,
+    decon,
+    excavator2,
+    exome_depth,
+    gatk,
+    panelcn_mops,
+    savvy_cnv,
+    xhmm,
+)
 
 
 def init_db(capture_name):
     db_path = f"{utils.get_cnv_patissier_dir()}/output/{capture_name}.sqlite"
     DbSession.global_init(db_path)
 
+
 if __name__ == "__main__":
     parser = ArgumentParser(description="Ochrestrating your CNV-caller bakeoff")
     parser.add_argument("capture_name", help="After following the setup in the README.md, please give the capture name")
@@ -18,7 +30,7 @@ def init_db(capture_name):
 
     start_time = datetime.datetime.now().strftime("%Y-%m-%d_%H-%m-%S")
 
-    capture_name = args.capture_name 
+    capture_name = args.capture_name
     init_db(capture_name)
     cnv_pat_dir = utils.get_cnv_patissier_dir()
     sample_sheet_path = pathlib.Path(cnv_pat_dir, "input", capture_name, "sample-sheets")
@@ -26,11 +38,11 @@ def init_db(capture_name):
     genes = [path.stem for path in list(sample_sheet_path.glob("*.txt"))]
     assert genes, "No genes found in path!"
 
-    for gene in genes:
+    for gene in sorted(genes):
         # cnv_caller = cnv_kit.CNVKit(capture_name, gene, start_time)
         # cnv_caller.main()
         cnv_caller = codex2.CODEX2(capture_name, gene, start_time)
-        cnv_caller.main()        
+        cnv_caller.main()
         cnv_caller = copywriter.Copywriter(capture_name, gene, start_time)
         cnv_caller.main()
         cnv_caller = decon.DECoN(capture_name, gene, start_time)
@@ -45,10 +57,11 @@ def init_db(capture_name):
         cnv_caller.main()
         cnv_caller = gatk.GATKCase(capture_name, gene, start_time)
         cnv_caller.main()
+        cnv_caller = panelcn_mops.panelcnMOPS(capture_name, gene, start_time)
+        cnv_caller.main()
         cnv_caller = savvy_cnv.SavvyCNV(capture_name, gene, start_time)
         cnv_caller.main()
         cnv_caller = xhmm.XHMM(capture_name, gene, start_time)
         cnv_caller.main()
 
-
-print("Congrats, you're all done")
\ No newline at end of file
+    print("Congrats, you're all done")
diff --git a/scripts/panelcn_mops.py b/scripts/panelcn_mops.py
new file mode 100755
index 0000000..f2074bf
--- /dev/null
+++ b/scripts/panelcn_mops.py
@@ -0,0 +1,70 @@
+"""
+panelcn.MOPS set up using basic instructions in manual https://bioconductor.org/packages/release/bioc/vignettes/panelcn.mops/inst/doc/panelcn.mops.pdf
+
+Used splitting of BED file with defaults (100bp bins with 50 bp overlap)
+"""
+
+import csv
+import pathlib
+
+from . import base_classes
+
+
+class panelcnMOPS(base_classes.BaseCNVTool):
+    def __init__(self, capture, gene, start_time, normal_panel=True):
+        self.run_type = "panelcn_mops"
+        super().__init__(capture, gene, start_time, normal_panel=normal_panel)
+        self.extra_db_fields = ["gene", "exon", "rc", "medrc", "rc.norm", "medrc.norm", "lowqual", "cn"]
+        self.settings = {**self.settings, "docker_image": "stefpiatek/panelcn_mops:1.4.0"}
+
+    def parse_output_file(self, file_path, sample_id):
+        cnvs = []
+        with open(file_path, "r") as handle:
+            output = csv.DictReader(handle, delimiter="\t")
+            for row in output:
+                if row["Sample"] == sample_id:
+                    cnv = {key.lower(): value for key, value in row.items()}
+                    cnv["chrom"] = f"{self.settings['chromosome_prefix']}{cnv.pop('chr')}"
+                    cnv["sample_id"] = cnv.pop("sample")
+                    copy_number = int(cnv["cn"].lstrip("CN"))
+                    if copy_number < 2:
+                        cnv["alt"] = "DEL"
+                    elif copy_number > 2:
+                        cnv["alt"] = "DUP"
+                    else:
+                        raise Exception(f"row doesn't have a copy number change {cnv}")
+                    cnvs.append(cnv)
+
+        return cnvs
+
+    def run_command(self, args):
+        self.run_docker_subprocess(["Rscript", f"/mnt/cnv-caller-resources/panelcn_mops/panelcn_mops_runner.R", *args])
+
+    def run_workflow(self):
+        bed_path = self.settings["capture_path"].replace("/mnt", base_classes.cnv_pat_dir)
+
+        pathlib.Path(self.output_base).mkdir(parents=True, exist_ok=True)
+
+        with open(bed_path, "r") as input_bed:
+            with open(f"{self.output_base}/capture.bed", "w") as output_bed:
+                for line in input_bed:
+                    chrom, start, end, gene = line.split()
+                    output_bed.write(f"{chrom}\t{start}\t{end}\t{gene}.{chrom}.{start}.{end}\n")
+
+        with open(f"{self.output_base}/samples.tsv", "w") as handle:
+            handle.write(f"bam_path\tsample_name\tsample_type\n")
+            for bam in self.settings["unknown_bams"] + self.settings["normal_bams"]:
+                sample = self.bam_to_sample[bam]
+                if bam in self.settings["unknown_bams"]:
+                    sample_type = "unknown"
+                else:
+                    sample_type = "normal_panel"
+
+                handle.write(f"{bam}\t{sample}\t{sample_type}\n")
+
+        self.run_command([f"--output-path={self.docker_output_base}", f"--gene={self.gene}"])
+
+        sample_names = [f"{self.bam_to_sample[unknown_bam]}" for unknown_bam in self.settings["unknown_bams"]]
+        output_paths = [f"{self.output_base}/calls.tsv" for sample_name in sample_names]
+
+        return output_paths, sample_names
diff --git a/tests/test_files/output_parsing/panelcn_mops/results.txt b/tests/test_files/output_parsing/panelcn_mops/results.txt
new file mode 100755
index 0000000..0d60e10
--- /dev/null
+++ b/tests/test_files/output_parsing/panelcn_mops/results.txt
@@ -0,0 +1,5 @@
+Sample	Chr	Gene	Exon	Start	End	RC	medRC	RC.norm	medRC.norm	lowQual	CN
+sample_1	2	test	test.NA.chr2.5700.5750	5700	5750	1430	2546	1537	2508		CN1
+sample_1	17	test	test.NA.chr17.100.4000	100	4000	1430	2546	1537	2508		CN1
+sample_2	2	test	test.NA.chr2.5700.5750	5700	5750	5430	2546	3537	2508		CN3
+sample_2	17	test	test.NA.chr17.100.4000	100	4000	5430	2546	3537	2508		CN3
diff --git a/tests/unit/test_panelcn_mops.py b/tests/unit/test_panelcn_mops.py
new file mode 100755
index 0000000..8d67ed2
--- /dev/null
+++ b/tests/unit/test_panelcn_mops.py
@@ -0,0 +1,61 @@
+import pathlib
+
+from scripts.panelcn_mops import panelcnMOPS
+
+
+class TestParseOutputFile:
+    def setup(self):
+        self.caller = panelcnMOPS("capture", "gene_1", "time")
+        self.caller.settings = {"chromosome_prefix": "chr"}
+        self.output = pathlib.Path("tests/test_files/output_parsing/panelcn_mops/results.txt")
+        self.del_expected_output = [
+            {
+                "chrom": "chr2",
+                "start": "5700",
+                "end": "5750",
+                "gene": "test",
+                "exon": "test.NA.chr2.5700.5750",
+                "alt": "DEL",
+                "sample_id": "sample_1",
+                "rc": "1430",
+                "medrc": "2546",
+                "rc.norm": "1537",
+                "medrc.norm": "2508",
+                "lowqual": "",
+                "cn": "CN1",
+            },
+            {
+                "chrom": "chr17",
+                "start": "100",
+                "end": "4000",
+                "gene": "test",
+                "exon": "test.NA.chr17.100.4000",
+                "alt": "DEL",
+                "sample_id": "sample_1",
+                "rc": "1430",
+                "medrc": "2546",
+                "rc.norm": "1537",
+                "medrc.norm": "2508",
+                "lowqual": "",
+                "cn": "CN1",
+            },
+        ]
+
+    def test_del(self):
+        parsed = self.caller.parse_output_file(self.output, "sample_1")
+        assert parsed == self.del_expected_output
+
+    def test_dup(self):
+        expected_output = list(self.del_expected_output)
+        for row in expected_output:
+            row["rc"] = "5430"
+            row["rc.norm"] = "3537"
+            row["alt"] = "DUP"
+            row["sample_id"] = "sample_2"
+            row["cn"] = "CN3"
+        parsed = self.caller.parse_output_file(self.output, "sample_2")
+        assert parsed == expected_output
+
+    def test_normal(self):
+        parsed = self.caller.parse_output_file(self.output, "sample_3")
+        assert parsed == []