-
Notifications
You must be signed in to change notification settings - Fork 0
/
extract_fasta.py
56 lines (46 loc) · 1.67 KB
/
extract_fasta.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
#
# Copyright (c) 2018-2020 Tuukka Norri
# This code is licensed under MIT license (see LICENSE for details).
#
import argparse
import os
import sys
from Bio import SeqIO
# Input: FASTA.
# Output: The specified sequence.
def extract_fasta(fh, dst, seq_id, new_id, reverse_complement, remove_gaps, log):
if log:
print("Reading FASTA…", file = sys.stderr)
seq = None
for rec in SeqIO.parse(fh, format = "fasta"):
if log:
print("Found record: %s" % rec.id, file = sys.stderr)
if rec.id == seq_id:
print(">%s" % new_id, file = dst)
if reverse_complement:
table = {'A': 'T', 'C': 'G', 'G': 'C', 'T': 'A', '-': '-', 'N': 'N'}
for i in reversed(range(len(rec.seq))):
if remove_gaps and '-' == rec.seq[i]:
continue
dst.write(table[rec.seq[i]])
else:
# FIXME: this is really inefficient.
for i in range(len(rec.seq)):
if remove_gaps and '-' == rec.seq[i]:
continue
dst.write(rec.seq[i])
dst.write("\n")
return True
return False
print("Sequence identifier not found in FASTA.", file = sys.stderr)
sys.exit(os.EX_DATAERR)
if __name__ == "__main__":
parser = argparse.ArgumentParser("Output the specified sequence in a FASTA file.")
parser.add_argument('--fasta', required = True, type = argparse.FileType('rU'))
parser.add_argument('--seq-id', required = True)
parser.add_argument('--reverse-complement', action = "store_true")
parser.add_argument('--remove-gaps', action = "store_true")
args = parser.parse_args()
if not extract_fasta(args.fasta, sys.stdout, args.seq_id, args.reverse_complement, args.remove_gaps, True):
print("Sequence identifier not found in FASTA.", file = sys.stderr)
sys.exit(os.EX_DATAERR)