forked from AstraZeneca-NGS/VarDict
-
Notifications
You must be signed in to change notification settings - Fork 0
/
splitBed.pl
executable file
·48 lines (43 loc) · 977 Bytes
/
splitBed.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
#!/usr/bin/perl -w
# Split bed file
use strict;
my $bed = shift;
my $size = shift;
my ($lines, $t) = split(/\s+/, `wc -l $bed`);
my $seg = int($lines/$size);
$seg++ if ( $lines % $size > 0 && $lines % $size <= $seg );
my $n = 0;
open(BED, $bed);
my $base = `basename $bed`; chomp $base;
my %genes;
while(<BED>) {
chomp;
next if ( /^track/i || /^browser/i );
my @a = split(/\t/);
push(@{ $genes{ $a[3] } }, $_);
}
close( BED );
my @beds = ();
my @cur = ();
for(my $i = 1; $i <= $size; $i++) {
my $out;
open($out, ">$base.$i" );
push(@beds, $out);
$cur[$i-1] = 0;
}
while( my ($g, $v) = each %genes ) {
my $N = 0;
my $min = $cur[$N];
for(my $i = 1; $i < @cur; $i++) {
($min, $N) = ($cur[$i], $i) if ( $cur[$i] < $min );
}
my $out = $beds[$N];
foreach (@$v) {
print $out "$_\n";
}
$cur[$N++] += @$v;
$N = 0 if ( $N >= $size );
}
for(my $i = 0; $i < $size; $i++) {
close( $beds[$i] );
}