-
Notifications
You must be signed in to change notification settings - Fork 42
/
beagle_split_by_n_samples.py
executable file
·50 lines (39 loc) · 1.13 KB
/
beagle_split_by_n_samples.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
#!/usr/bin/env python3
"""Split beagle (can be gzip-compressed) into multiple beagle files with split_num samples each
Usage:
<program> input_beagle split_num
"""
# Modules
import math
import gzip
import sys
# Functions
def myopen(_file, mode="rt"):
if _file.endswith(".gz"):
return gzip.open(_file, mode=mode)
else:
return open(_file, mode=mode)
# Parsing user input
try:
input_beagle = sys.argv[1]
split_num = int(sys.argv[2])
except:
print(__doc__)
sys.exit(1)
# Let's go
output_handles = {}
with myopen(input_beagle) as infile:
for line in infile:
l = line.strip().split("\t")
info = l[: 3]
data = l[3: ]
if line.startswith("marker"):
num_samples = int(len(data)/3)
num_files = math.ceil(num_samples / split_num)
for i in range(num_files):
output_handles[i] = myopen(("split_" + str(i) + ".beagle"), "wt")
for i in range(num_files):
new_line = info[:]
new_line += data[: split_num]
data = data[split_num: ]
output_handles[i].write("\t".join(new_line) + "\n")