-
Notifications
You must be signed in to change notification settings - Fork 1
/
main.sh
executable file
·183 lines (164 loc) · 7.8 KB
/
main.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
#!/bin/bash
set -eo pipefail
$(command -v qsub > /dev/null 2> /dev/null) || (echo "This tool submits jobs to a queue, please run on a cluster of some sort"; exit 1)
# Usage message
function Usage() {
echo -e "\
Usage: $(basename $0) <handler> proj.conf \n\
Where: <handler> is one of:
1 or Quality_Assessment \n\
2 or Sequence_Trimming \n\
3 or Read_Mapping \n\
4 or SAM_Processing \n\
5 or Quantify_Summarize \n\
And: proj.conf is the full file path to the configuration file
" >&2
exit 1
}
# Export the function
export -f Usage
# Function to find maximum memory
function MaxMem() {
local qsub="$1"
local memraw=$(echo "${qsub}" | grep -oE 'mem=[[:alnum:]]+' | cut -f 2 -d '=')
local digits=$(echo "${memraw}" | grep -oE '[[:digit:]]+')
if $(echo "${memraw}" | grep -i 'g' > /dev/null 2> /dev/null); then
local maxmem="${digits}G"
elif $(echo "${memraw}" | grep -i 'm' > /dev/null 2> /dev/null); then
local maxmem="${digits}M"
elif $(echo "${memraw}" | grep -i 'k' > /dev/null 2> /dev/null); then
local maxmem="${digits}K"
else
local maxmem="${digits}"
fi
echo "${maxmem}"
}
# Export the function
export -f MaxMem
# Function to find number of threads requested
function Nthreads() {
local qsub="$1"
local nthreads=$(echo "${qsub}" | grep -oE 'pe smp [[:digit:]]+' | cut -f 3 -d ' ')
echo "${nthreads}"
}
# Export the function
export -f Nthreads
# Check to make sure our samples exist
function checkSamples() {
local sample_list="$1" # Sample ist
[[ -f "${sample_list}" ]] || (echo "Cannot find sample list ${sample_list}" >&2; exit 1)
for sample in $(<${sample_list}); do [[ -f "${sample}" ]] || (echo "Cannot find sample ${sample}" >&2; exit 1); done
}
# Export the function to be used elsewhere
export -f checkSamples
# Where is 'sequence_handling' located?
SEQUENCE_HANDLING=$(pwd -P)
# For clusters that don't use login shells
$(type module > /dev/null 2> /dev/null) || function module() { eval $(/usr/bin/modulecmd bash $*); }
export -f module
# If we don't have two arguments
if [[ "$#" -ne 2 ]]; then Usage; fi # Display the usage message and exit
ROUTINE="$1" # What routine are we running?
CONFIG="$2" # Where is our config file?
# Find full path to the config file
# Because SGE is stupid...
CONFIG="$(realpath ${CONFIG})"
# For the home directory
CONFIG="${CONFIG/'~/'/${HOME}/}"
# For a directory above us
CONFIG="${CONFIG/'../'/$(pwd -P | rev | cut -f 2- -d '/' | rev)/}"
# For this directory
CONFIG="${CONFIG/'./'/$(pwd -P)/}"
# Check the existence of the config file
[[ -f "${CONFIG}" ]] || (echo "Cannot find configuration file" >&2; exit 1)
source "${CONFIG}" # Source it, providing parameters and software
bash "${CONFIG}" > /dev/null 2> /dev/null # Load any modules
# Make our output directory
[[ -z "${OUT_DIR}" ]] && OUT_DIR="${SEQUENCE_HANDLING}/${PROJECT}"
mkdir -p "${OUT_DIR}"
[[ -z "${EMAIL}" ]] && QSUB_EMAIL='' || QSUB_EMAIL="-m abe -M ${EMAIL}"
case "${ROUTINE}" in
1|Quality_Assessment)
echo "$(basename $0): Assessing quality..." >&2
checkSamples ${RAW_SAMPLES}
echo "${SEQUENCE_HANDLING}/scripts/assessQuality.sh --sample-list ${RAW_SAMPLES} --outdir ${OUT_DIR} --project ${PROJECT}" | qsub ${QA_QSUB} ${QSUB_EMAIL} -N Quality_Assessment
;;
2|Sequence_Trimming)
echo "$(basename $0): Trimming read sequences..." >&2
checkSamples ${RAW_SAMPLES}
# Partition samples into forward, reverse, and single-ended samples
if [[ -z "${FORWARD_NAMING}" || -z "${REVERSE_NAMING}" ]]; then
declare -a FORWARD=()
declare -a REVERSE=()
declare -a SINGLES=($(<${RAW_SAMPLES}))
else
declare -a FORWARD=($(grep -E "${FORWARD_NAMING}" "${RAW_SAMPLES}" | sort))
declare -a REVERSE=($(grep -E "${REVERSE_NAMING}" "${RAW_SAMPLES}" | sort))
declare -a SINGLES=($(grep -vEf <(echo -e "${FORWARD_NAMING}\n${REVERSE_NAMING}") "${RAW_SAMPLES}" | sort))
fi
$(${PHRED64}) && QUAL='-64' || QUAL=''
# Submit paired samples
[[ "${#FORWARD[@]}" -ne "${#REVERSE[@]}" ]] && (echo "Unequal numbers of forward and reverse samples" >&2; exit 1)
for i in $(seq ${#FORWARD[@]}); do
NAME=$(basename ${FORWARD[$(($i - 1))]} ${FORWARD_NAMING})
echo "${SEQUENCE_HANDLING}/scripts/trimFASTQ.sh --forward ${FORWARD[$(($i - 1))]} --reverse ${REVERSE[$(($i - 1))]} --outdir ${OUT_DIR} --adapters ${ADAPTERS} --project ${PROJECT} ${QUAL}" | qsub ${ST_QSUB} ${QSUB_EMAIL} -N "Trim_${NAME}_paired"
done
# Submit single samples
for i in $(seq ${#SINGLES[@]}); do
NAME=$(basename ${SINGLES[$((i - 1))]})
echo "${SEQUENCE_HANDLING}/scripts/trimFASTQ.sh --forward ${SINGLES[$(($i - 1))]} --outdir ${OUT_DIR} --adapters ${ADAPTERS} ${QUAL}" | qsub ${ST_QSUB} ${QSUB_EMAIL} -N "Trim_${NAME}"
done
;;
3|Read_Mapping)
echo "$(basename $0): Mapping reads..." >&2
checkSamples "${TRIMMED_LIST}"
NTHREADS=$(Nthreads "${RM_QSUB}")
# Partition samples into forward, reverse, and single-ended reads
declare -a FORWARD=($(grep -E "${FORWARD_TRIMMED}" ${TRIMMED_LIST} | sort))
declare -a REVERSE=($(grep -E "${REVERSE_TRIMMED}" ${TRIMMED_LIST} | sort))
declare -a SINGLES=($(grep -E "${SINGLES_TRIMMED}" ${TRIMMED_LIST} | sort))
# Submit paired samples
[[ "${#FORWARD[@]}" -ne "${#REVERSE[@]}" ]] && (echo "Unequal numbers of forward and reverse samples" >&2; exit 1)
for i in $(seq ${#FORWARD[@]}); do
NAME=$(basename ${FORWARD[$(($i - 1))]} ${FORWARD_TRIMMED})
echo "${SEQUENCE_HANDLING}/scripts/starMap.sh --forward ${FORWARD[$(($i - 1))]} --reverse ${REVERSE[$(($i - 1))]} --index ${REF_IND} --genome ${REF_GEN} --threads ${NTHREADS} --sample-name ${NAME} --project ${PROJECT} --outdir ${OUT_DIR}" | qsub ${RM_QSUB} ${QSUB_EMAIL} -N "Map_${NAME}_paired"
done
# Submit single samples
for i in $(seq ${#SINGLES[@]}); do
NAME=$(basename ${SINGLES[$(($i - 1))]} ${FORWARD_TRIMMED})
echo "${SEQUENCE_HANDLING}/scripts/starMap.sh --forward ${SINGLES[$(($i - 1))]} --index ${REF_IND} --genome ${REF_GEN} --threads ${NTHREADS} --sample-name ${NAME} --project ${PROJECT} --outdir ${OUT_DIR}" | qsub ${RM_QSUB} ${QSUB_EMAIL} -N "Map_${NAME}_single"
done
;;
4|SAM_Processing)
echo "$(basename $0): Processing SAM files" >&2
checkSamples "${MAPPED_LIST}"
MAXMEM=$(MaxMem "${SP_QSUB}")
case ${INDEX_TYPE} in
BAI)
INDEXCMD=''
;;
CSI)
INDEXCMD='--csi'
;;
*)
echo "Unknown index type ${INDEX_TYPE}, please choose from 'BAI' or 'CSI'"
exit 1
;;
esac
for sample in $(<${MAPPED_LIST}); do
echo "${SEQUENCE_HANDLING}/scripts/processSAM.sh --sam-file ${sample} --reference ${REF_GEN} --outdir ${OUT_DIR} --max-mem ${MAXMEM} --project ${PROJECT} ${INDEXCMD}" | qsub ${SP_QSUB} ${QSUB_EMAIL} -N "Process_$(basename ${sample} .sam)"
done
;;
5|Quantify_Summarize)
echo "$(basename $0): Counting genes..." >&2
checkSamples "${BAM_LIST}"
echo "${SEQUENCE_HANDLING}/scripts/countFeatures.sh --sample-list ${BAM_LIST} --annotation ${REF_ANN} --outdir ${OUT_DIR} --project ${PROJECT} --structural ${STRUCTURAL} --expression ${DSTAT_EXPR}" | qsub ${QS_QSUB} ${QSUB_EMAIL} -N "Quantify_Summarize"
;;
Variant_Calling)
echo "$(basename $0): Calling variants..." >&2
echo "Variant_Calling is not yet implemented, exiting..." >&2; exit 1
;;
*)
Usage
;;
esac