-
Notifications
You must be signed in to change notification settings - Fork 4
/
run_full_pipeline.snakefile
81 lines (66 loc) · 3.56 KB
/
run_full_pipeline.snakefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
configfile: "config.yaml"
include: "helpers.py"
#include: "facets.snakefile"
if config["multisample"] == True:
include: "phyloWGSparsing_multi.snakefile"
else:
include: "phyloWGSparsing.snakefile"
include: "run_pwgs.snakefile"
DATA = pd.read_csv(config["sampleCSVpath"])
PATIENTS = DATA['patient'].tolist()
SAMPLES = DATA["sample"].tolist()
if config["multisample"] == True:
rule all:
input:
expand(config["parsedOutputPWGS"] + "{patient}/{sample}/tree_likelihoods.txt", zip, patient=PATIENTS, sample=PATIENTS)
else:
rule all:
input:
expand(config["parsedOutputPWGS"] + "{patient}/{sample}/tree_likelihoods.txt", zip, patient=PATIENTS, sample=SAMPLES)
rule write_results:
input:
lambda wildcards: config['outputdirPhyloOutput'] + "{patient}/{sample}/" + "trees.zip"
params:
pwgsPythonDir = config['pwgsPythonDir']
output:
mutass_zip = config["outputdirPhyloOutput"] + "{patient}/{sample}/{sample}.mutass.zip",
muts_json_gz = config["outputdirPhyloOutput"] + "{patient}/{sample}/{sample}.muts.json.gz",
summ_json_gz = config["outputdirPhyloOutput"] + "{patient}/{sample}/{sample}.summ.json.gz"
shell:
"python2 {params.pwgsPythonDir}/write_results.py --allow-polyclonal --include-polyclonal {wildcards.sample} {input} {output.summ_json_gz} {output.muts_json_gz} {output.mutass_zip}"
rule move_and_unzip_files:
input:
mutass_file = lambda wildcards: config["outputdirPhyloOutput"] + "{patient}/{sample}/{sample}.mutass.zip",
muts_json = lambda wildcards: config["outputdirPhyloOutput"] + "{patient}/{sample}/{sample}.muts.json.gz",
summ_json = lambda wildcards: config["outputdirPhyloOutput"] + "{patient}/{sample}/{sample}.summ.json.gz"
params:
pwgs_python_directory = config["pwgsPythonDir"],
pwgs_fold = config["outputdirPhyloOutput"],
witness_fold = config["outputdirWitness"] + "{patient}/",
pwgsPythonDir = config['pwgsPythonDir']
output:
mutass_file = config["outputdirWitness"] + "{patient}/{sample}.mutass.zip",
muts_json = config["outputdirWitness"] + "{patient}/{sample}.muts.json",
summ_json = config["outputdirWitness"] + "{patient}/{sample}.summ.json"
run:
shell("mkdir -p {params.witness_fold}")
shell("cp `find {params.pwgs_fold}{wildcards.patient} -name '*.gz' -print` {params.witness_fold}")
shell("cp `find {params.pwgs_fold}{wildcards.patient} -name '*.mutass.zip' -print` {params.witness_fold}")
shell("gunzip -f {params.witness_fold}{wildcards.sample}*.gz")
shell("python2 {params.pwgsPythonDir}/witness/index_data.py")
rule run_final_parser_ms:
input:
cnv_input = lambda wildcards: config["outputdirPhyloInput"] + "{patient}/{sample}.cnvs.txt",
ssm_input = lambda wildcards: config["outputdirPhyloInput"] + "{patient}/{sample}.ssm.txt",
summary_file = lambda wildcards: config["outputdirWitness"] + "{patient}/{sample}.summ.json",
mutasgn_path = lambda wildcards: config["outputdirWitness"] + "{patient}/{sample}.mutass.zip"
params:
output_parsed_pwgs_results = config['parsedOutputPWGS'],
k_trees = config['k_trees'],
pwgs_pipeline = config['pipeline_folder']
output:
#dynamic(config['parsedOutputPWGS'] + "{patient}/{sample}/{treenumber}.csv"),
config['parsedOutputPWGS'] + "{patient}/{sample}/tree_likelihoods.txt"
run:
shell('mkdir -p {params.output_parsed_pwgs_results}{wildcards.patient}/{wildcards.sample}')
shell('python2 {params.pwgs_pipeline}parse_pwgs_output.py --k {params.k_trees} --cnv_input {input.cnv_input} --ssm_input {input.ssm_input} --summary_file {input.summary_file} --mutasgn_path {input.mutasgn_path} --output_folder {params.output_parsed_pwgs_results}{wildcards.patient}/{wildcards.sample}')