-
Notifications
You must be signed in to change notification settings - Fork 4
/
picker.sh
executable file
·127 lines (104 loc) · 2.27 KB
/
picker.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
#!/bin/bash
#
# picker.sh
#
# Chiu Laboratory
# University of California, San Francisco
#
# Copyright (C) 2015 Scot Federman - All Rights Reserved
# metaPORE has been released under a modified BSD license.
# Please see license file for details.
nt_DB="/reference/BLASTDB_01_27_2015/nt"
# if [[ $1 ]]
# then
# search_term=$1
# else
# search_term="Zaire"
# fi
# unified_file="../realtime_test1.unified"
optspec=":s:hu:"
bold=$(tput bold)
normal=$(tput sgr0)
scriptname=${0##*/}
while getopts "$optspec" option; do
case "${option}" in
s) search_term=${OPTARG};;
h) HELP=1;;
u) unified_file=${OPTARG};;
:) echo "Option -$OPTARG requires an argument." >&2
exit 1
;;
esac
done
if [[ $HELP -eq 1 || $# -lt 1 ]]
then
cat <<USAGE
${bold}${scriptname}${normal}
This program will pick a representative gi from a MetaPORE unified file.
${bold}Command Line Switches:${normal}
-h Show this help & ignore all other switches
-s Specify search term
-u Specify unified file
${bold}Usage:${normal}
Pick representative gi for Zaire ebolavirus
$scriptname -u realtime_test1.unified -s "Zaire ebolavirus"
USAGE
exit
fi
#create correctly ordered list of gi (by frequency)
gi_list=$(grep "$search_term" "$unified_file" | awk '{print $4}' | sort | uniq -c | sort -rn | awk '{print $2}')
#then, look down list for:
#1. complete genome
#2. complete sequence
#3. partial sequence/individual gene
#4. top remaining sequence
gi_counter=0
for gi in $gi_list
do
((gi_counter++))
gi_type=$(blastdbcmd -db $nt_DB -entry $gi | head -1)
if [[ "$gi_type" =~ "omplete genome" ]]
then
gi_to_pick=$gi
break
fi
# echo -e "$gi_counter\t$gi\t$gi_type"
done
if [[ ! $gi_to_pick ]]
then
for gi in $gi_list
do
((gi_counter++))
gi_type=$(blastdbcmd -db $nt_DB -entry $gi | head -1)
if [[ "$gi_type" =~ "omplete sequence" ]]
then
gi_to_pick=$gi
break
fi
# echo -e "$gi_counter\t$gi\t$gi_type"
done
fi
if [[ ! $gi_to_pick ]]
then
for gi in $gi_list
do
((gi_counter++))
gi_type=$(blastdbcmd -db $nt_DB -entry $gi | head -1)
if [[ "$gi_type" =~ "partial sequence" ]]
then
gi_to_pick=$gi
break
fi
# echo -e "$gi_counter\t$gi\t$gi_type"
done
fi
if [[ ! $gi_to_pick ]]
then
for gi in $gi_list
do
((gi_counter++))
gi_to_pick=$gi
break
done
fi
echo "$gi_to_pick"