forked from chiulab/surpi
-
Notifications
You must be signed in to change notification settings - Fork 0
/
ribo_snap_bac_euk.sh
executable file
·74 lines (63 loc) · 2.59 KB
/
ribo_snap_bac_euk.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
#!/bin/bash
#
# ribo_snap_bac_euk.sh
#
# Chiu Laboratory
# University of California, San Francisco
#
#
# Copyright (C) 2014 Samia Naccache - All Rights Reserved
# SURPI has been released under a modified BSD license.
# Please see license file for details.
#
scriptname=${0##*/}
if [ $# -lt 4 ]
then
echo "Usage: $scriptname <.sorted> <BAC/EUK> <cores> <folder containing SNAP databases>"
exit 65
fi
###
inputfile=$1
inputfile_type=$2
cores=$3
directory=$4
###
nopathf=${1##*/}
basef=${nopathf%.annotated}
if [ $inputfile_type == "BAC" ]
then
SNAP_index_Large="$directory/snap_index_23sRNA"
SNAP_index_Small="$directory/snap_index_rdp_typed_iso_goodq_9210seqs"
fi
if [ $inputfile_type == "EUK" ]
then
SNAP_index_Large="$directory/snap_index_28s_rRNA_gene_NOT_partial_18s_spacer_5.8s.fa"
SNAP_index_Small="$directory/snap_index_18s_rRNA_gene_not_partial.fa"
fi
awk '{print ">"$1"\n"$10}' $inputfile > $inputfile.fasta # if Bacteria.annotated file has full quality, convert from Sam -> Fastq at this step
fasta_to_fastq $inputfile.fasta > $inputfile.fakq # if Bacteria.annotated file has full quality, convert from Sam -> Fastq at this step
crop_reads.csh $inputfile.fakq 10 75 > $inputfile.fakq.crop
# snap against large ribosomal subunit
snap single $SNAP_index_Large $inputfile.fakq.crop -o $inputfile.noLargeS.unmatched.sam -t $cores -x -f -h 250 -d 18 -n 200 -F u
egrep -v "^@" $inputfile.noLargeS.unmatched.sam | awk '{if($3 == "*") print "@"$1"\n"$10"\n""+"$1"\n"$11}' > $(echo "$inputfile".noLargeS.unmatched.sam | sed 's/\(.*\)\..*/\1/').fastq
echo -e "$(date)\t$scriptname\tDone: first snap alignment"
# snap against small ribosomal subunit
snap single $SNAP_index_Small $inputfile.noLargeS.unmatched.fastq -o $inputfile.noSmallS_LargeS.sam -t $cores -h 250 -d 18 -n 200 -F u
echo -e "$(date)\t$scriptname\tDone: second snap alignment"
# convert snap unmatched to ribo output to header format
awk '{print$1}' $inputfile.noSmallS_LargeS.sam | sed '/^@/d' > $inputfile.noSmallS_LargeS.header.sam
# retrieve reads from original $inputfile
extractSamFromSam.sh $inputfile.noSmallS_LargeS.header.sam $inputfile $basef.noRibo.annotated
echo -e "$(date)\t$scriptname\tCreated $inputfile.noRibo.annotated"
table_generator.sh $basef.noRibo.annotated SNAP N Y N N
rm -f $inputfile.noLargeS.sam
rm -f $inputfile.noLargeS.matched.sam
rm -f $inputfile.noLargeS.unmatched.sam
rm -f $inputfile.noSmallS_LargeS.sam
rm -f $inputfile.noSmallS_LargeS.sam.header
rm -f $inputfile.noLargeS.unmatched.fastq
rm -f $inputfile.fakq
rm -f $inputfile.fakq.crop
rm -f $inputfile.fasta
rm -f $inputfile.sorted
rm -f $inputfile.noSmallS_LargeS.header.sam