-
Notifications
You must be signed in to change notification settings - Fork 1
/
splitFasta.py
60 lines (44 loc) · 1.7 KB
/
splitFasta.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
from src.FeaturesData import *
import click
@click.group(chain=True, invoke_without_command=True)
def cli():
pass
@click.command()
@click.option('--input', required=True, help='Input FASTA file ')
@click.option('--outdir', required=True, help='Output folder')
@click.option('--batchsize', required=False, default=1000, help='Number of sequences to be split per file. [default: 1000]')
def splitFastaFile( input:Path, outdir:Path, batchsize:int) :
# TODO implement a better FASTA file check
seqData = {}
try:
with open( input, 'r' ) as inputFile :
lines = inputFile.readlines()
for i, line in enumerate(lines):
if line[0] == ">":
if i>0:
seqData[ name ] = seq
name = line.split()[0][1:]
seq = ''
else:
seq += line[:-1]
seqData[name] = seq
except :
print("Input FASTA file error")
raise
count = 1
part = 1
outputFile = open( str(outdir) + '/' + Path(input).stem + '_part_' + str(part) + '.fasta', 'w')
for name in seqData:
if count <= batchsize:
print('>', name, sep='', file=outputFile)
print(seqData[name], file=outputFile)
count += 1
else:
outputFile.close()
part += 1
count = 1
outputFile = open(str(outdir) + '/' + Path(input).stem + '_part_' + str(part) + '.fasta', 'w')
print('>', name, sep='', file=outputFile)
print(seqData[name], file=outputFile)
if __name__ == '__main__':
splitFastaFile()