forked from jgurtowski/nanocorr
-
Notifications
You must be signed in to change notification settings - Fork 0
/
partition.py
54 lines (40 loc) · 1.26 KB
/
partition.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
#!/usr/bin/env python
import sys
import os
from itertools import starmap, chain, imap
from operator import itemgetter
from jbio.io.file import iterator_over_file_from_extension as ioffe
from jbio.fasta import record_to_string as fasta_record_to_string
from jbio.functional import compose
if not len(sys.argv) >=4:
sys.exit("partition.py <reads_per_file (int)> <files_per_dir (int)> file1.fa [file2.fa ...]")
def pstr(num):
return "%04d" % num
(rpf,fpd) = map(int,sys.argv[1:3])
in_files = sys.argv[3:]
openers = map(ioffe,in_files)
input_data = chain.from_iterable(openers)
total_reads = 0
dnum = 0
fnum = 0
fh = None
readidx_fh = open("ReadIndex.txt", "w")
for record in input_data:
if total_reads % rpf == 0:
if total_reads % (rpf * fpd) == 0:
dnum += 1
fnum = 0
os.mkdir(pstr(dnum))
fnum += 1
if fh:
fh.close()
current_file ="%s/p%s" % (pstr(dnum),pstr(fnum))
fh = open(current_file, "w")
clean_name = str(record.name).split()[0]
clean_record = record._replace(name=clean_name)
readidx_fh.write(clean_name +"\t" + current_file + "\n")
fh.write(fasta_record_to_string(clean_record))
fh.write("\n")
total_reads += 1
readidx_fh.close()
fh.close()