forked from chiulab/surpi
-
Notifications
You must be signed in to change notification settings - Fork 0
/
extractAlltoFast.sh
executable file
·98 lines (90 loc) · 2.79 KB
/
extractAlltoFast.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
#!/bin/bash
#
# extractAlltoFast.sh
#
# retrieve FASTA or FASTQ records (parent file) from an input file that's either BLASTn (m8 table, sam file, or list of headers); FASTA; FASTQ
# Chiu Laboratory
# University of California, San Francisco
# January, 2014
#
# --- uses the C program "fqextract.c" (http://www.biostars.org/p/10353/) ---
#
# Copyright (C) 2014 Samia Naccache - All Rights Reserved
# SURPI has been released under a modified BSD license.
# Please see license file for details.
scriptname=${0##*/}
if [ $# -lt 6 ]
then
echo "Usage: $scriptname <inputfile> <Input File Type [BLASTN/FASTA/FASTQ]> <Parent file> <Parent File Type [FASTA/FASTQ]> <Output file> <Output format: [FASTA/FASTQ]>"
exit 65
fi
###
inputfile=$1
inputfile_type=$2
parentfile=$3
parentfile_type=$4
outputfile=$5
output_format=$6
###
echo -e "$(date)\t$scriptname\tprepare $inputfile_type file"
if [ $inputfile_type = BLASTN ]
then
awk '{print$1}' $inputfile > $inputfile.header
echo -e "$(date)\t$scriptname\tuniqued blastn file, replaced beginning with @"
if [ $parentfile_type = FASTA ]
then
seqtk subseq $parentfile $inputfile.header > $outputfile
elif [ $parentfile_type = FASTQ ]
then
if [ $output_format = FASTQ ]
then
cat $parentfile | fqextract $inputfile.header > $outputfile
elif [ $output_format = FASTA ]
then
cat $parentfile | fqextract $inputfile.header > $inputfile.ex.fq
sed "n;n;n;d" $inputfile.ex.fq | sed "n;n;d" | sed "s/^@/>/g" > $outputfile
rm -f $inputfile.ex.fq
fi
fi
rm -f $inputfile.header
elif [ $inputfile_type = FASTA ]
then
grep ">" $inputfile | sed 's/>//g' > $inputfile.header
echo -e "$(date)\t$scriptname\tDone preparing input Fasta file "
if [ $parentfile_type = FASTA ]
then
seqtk subseq $parentfile $inputfile.header > $outputfile
elif [ $parentfile_type = FASTQ ]
then
if [ $output_format = FASTQ ]
then
cat $parentfile | fqextract $inputfile.header > $outputfile
elif [ $output_format = FASTA ]
then
cat $parentfile | fqextract $inputfile.header > $inputfile.ex.fq
sed "n;n;n;d" $inputfile.ex.fq | sed "n;n;d" | sed "s/^@/>/g" > $outputfile
rm -f $inputfile.ex.fq
fi
fi
rm -f $inputfile.header
elif [ $inputfile_type = FASTQ ]
then
grep "^@" $inputfile | sed 's/@//g' > $inputfile.header
echo -e "$(date)\t$scriptname\tDone preparing input Fastq file"
if [ $parentfile_type = FASTA ]
then
seqtk subseq $parentfile $inputfile.header > $outputfile
elif [ $parentfile_type = FASTQ ]
then
if [ $output_format = FASTQ ]
then
cat $parentfile | fqextract $inputfile.header > $inputfile.ex.fq
elif [ $output_format = FASTA ]
then
cat $parentfile | fqextract $inputfile.header > $inputfile.ex.fq
sed "n;n;n;d" $inputfile.ex.fq | sed "n;n;d" | sed "s/^@/>/g" > $outputfile
rm -f $inputfile.ex.fq
fi
fi
rm -f $inputfile.header
fi