forked from Kennedy-Lab-UW/Duplex-Sequencing
-
Notifications
You must be signed in to change notification settings - Fork 0
/
bash_template.sh
executable file
·157 lines (126 loc) · 4.73 KB
/
bash_template.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
#!/bin/bash
# DS bash script
# Version 2.1
#
# Step 1: Setup variables for run:
clear
# Set up error checking
# Stop on any error
set -e
# Stop on an error inside a pipeline
set -o pipefail
# Throw an error on calling an unassigned variable
set -u
#DEFAULTS
DSpath=''
alignRef=''
runIdentifier=''
read1in=seq1.fq
read2in=seq2.fq
iSize=-1
minMem=3
maxMem=1000
cutOff=0.7
nCutOff=1
readLength=0
barcodeLength=12
spacerLength=5
filtersSet='os'
readTypes='dpm'
repFilt=9
readOut=1000000
Ncores=18
#NONDEFAULTS
#FINAL_READ_LENGTH
readLength=$((readLength-barcodeLength-spacerLength))
#LOG_FILE_NAME
logFile=${runIdentifier}.log.txt
#Output folder
output_folder=${runIdentifier}
#Export all variables
export DSpath
export alignRef
export runIdentifier
export read1in
export read2in
export iSize
export minMem
export maxMem
export cutOff
export nCutOff
export readLength
export barcodeLength
export spacerLength
export filtersSet
export readTypes
export repFilt
export readOut
export Ncores
export output_folder
# Load required software into path using the Environment Modules Project (http://modules.sourceforge.net)
module load Python
module load BWA
module load SAMtools
# Print out options used to log file
touch $logFile
echo "Run identifier: " $runIdentifier | tee -a ${logFile}
echo "Program path: " $DSpath | tee -a ${logFile}
echo "Reference genome: " $alignRef | tee -a ${logFile}
echo "Barcode length: " $barcodeLength | tee -a ${logFile}
echo "Spacer length: " $spacerLength | tee -a ${logFile}
echo "Post-tag_to_header read length: " $readLength | tee -a ${logFile}
echo "Repetitive tag filter length: " $repFilt | tee -a ${logFile}
echo "Minimum family size: " $minMem | tee -a ${logFile}
echo "Maximum family size: " $maxMem | tee -a ${logFile}
echo "Consensus cutoff: " $cutOff | tee -a ${logFile}
echo "Consensus N cutoff: " $nCutOff | tee -a ${logFile}
echo "Read types: " $readTypes | tee -a ${logFile}
echo "Filters: " $filtersSet | tee -a ${logFile}
echo "" | tee -a ${logFile}
# Step 2: Run tag_to_header.py on imput files
echo "Starting Run" | tee -a ${logFile}
echo "tag_to_header starting" | tee -a ${logFile}
date | tee -a ${logFile}
echo "" | tee -a ${logFile}
python ${DSpath}/tag_to_header.py --infile1 $read1in --infile2 $read2in --outprefix ${runIdentifier} --tagstats --spacerlen ${spacerLength} --taglen ${barcodeLength}
# Step 3: Align sequences
echo "Aligning with BWA" | tee -a ${logFile}
date | tee -a ${logFile}
bwa aln -t ${Ncores} $alignRef ${runIdentifier}.seq1.smi.fq > ${runIdentifier}.seq1.aln
bwa aln -t ${Ncores} $alignRef ${runIdentifier}.seq2.smi.fq > ${runIdentifier}.seq2.aln
bwa sampe -s $alignRef ${runIdentifier}.seq1.aln ${runIdentifier}.seq2.aln ${runIdentifier}.seq1.smi.fq ${runIdentifier}.seq2.smi.fq > ${runIdentifier}.pe.sam
# Step 4: Sort aligned sequences
echo "Sorting aligned sequences" | tee -a ${logFile}
date | tee -a ${logFile}
samtools view -Sbu ${runIdentifier}.pe.sam | samtools sort - ${runIdentifier}.pe.sort
# Step 5: Run Consensus Maker
echo "Starting Consensus Maker" | tee -a ${logFile}
date | tee -a ${logFile}
python ${DSpath}/ConsensusMaker.py --infile ${runIdentifier}.pe.sort.bam --tagfile ${runIdentifier}.pe.tagcounts --outfile ${runIdentifier}.sscs.bam --minmem $minMem --maxmem $maxMem --readlength $readLength --cutoff $cutOff --Ncutoff $nCutOff --read_type $readTypes --filt $filtersSet --isize $iSize
# Step 6: Sort SSCSs
echo "Sorting SSCSs" | tee -a ${logFile}
date | tee -a ${logFile}
samtools view -bu ${runIdentifier}.sscs.bam | samtools sort - ${runIdentifier}.sscs.sort
# Step 7: Run Duplex Maker
echo "Starting Duplex Maker" | tee -a ${logFile}
date | tee -a ${logFile}
python ${DSpath}/DuplexMaker.py --infile ${runIdentifier}.sscs.sort.bam --outfile ${runIdentifier}.dcs.bam --Ncutoff $nCutOff --readlength $readLength
# Step 8: Align DCSs
echo "Aligning DCSs" | tee -a ${logFile}
date | tee -a ${logFile}
bwa aln -t ${Ncores} $alignRef ${runIdentifier}.dcs.r1.fq > ${runIdentifier}.dcs.r1.aln
bwa aln -t ${Ncores} $alignRef ${runIdentifier}.dcs.r2.fq > ${runIdentifier}.dcs.r2.aln
bwa sampe -s $alignRef ${runIdentifier}.dcs.r1.aln ${runIdentifier}.dcs.r2.aln ${runIdentifier}.dcs.r1.fq ${runIdentifier}.dcs.r2.fq > ${runIdentifier}.dcs.sam
# Step 9: Sort aligned DCSs
echo "Sorting aligned DCSs" | tee -a ${logFile}
date | tee -a ${logFile}
samtools view -Sbu ${runIdentifier}.dcs.sam | samtools sort - ${runIdentifier}.dcs.aln.sort
# Step 10: Index sorted DCSs
echo "Indexing sorted DCSs" | tee -a ${logFile}
date | tee -a ${logFile}
samtools index ${runIdentifier}.dcs.aln.sort.bam
# Step 11: Clean up
echo "Finishing with run.. " $runIdentifier | tee -a ${logFile}
echo "Cleaning.." | tee -a ${logFile}
date | tee -a ${logFile}
python ${DSpath}/clean.py --scripts_folder $(pwd) --output_folder ${output_folder}