forked from Axolotl233/Simple_Script
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Gff3.convert.bed.allcds.pl
66 lines (63 loc) · 1.65 KB
/
Gff3.convert.bed.allcds.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
#! perl
use strict;
use warnings;
my $f = shift;
my $class = shift;
$class //= "cds";
my $up_down = shift;
$up_down //= 0;
if($class eq "cds"){
&get_cds($f);
}elsif($class eq "gene"){
&get_gene($f,$up_down);
}
sub get_gene{
my $g = shift @_;
my $ex = shift @_;
open (F,$g)||die"$!";
open (O,">$g.mrna.bed")||die "$!";
while (<F>){
chomp;
my @a=split(/\s+/,$_);
if ($a[2] eq 'mRNA'){
my $s = ($a[3] - $ex < 0)? 0 : $a[3] - 2000;
my $e = $a[4] + $ex;
$a[8]=~/ID=([^;]+);.*?;?Parent=([^;]+)/;
print O "$a[0]\t$2\t$1\t$s-$e\n";
}
}
close O;
}
sub get_cds{
my $g = shift @_;
my %gff;
open (F,$g)||die"$!";
while (<F>) {
chomp;
my @a=split(/\s+/,$_);
if ($a[2] eq 'mRNA'){
$a[8]=~/ID=([^;]+);.*?;?Parent=([^;]+)/;
my ($trans,$gene)=($1,$2);
$gff{gene}{$a[0]}{$gene}{$trans}=$a[4]-$a[3]+1;
}elsif ($a[2] eq 'CDS') {
$a[8]=~/ID=([^;]+);Parent=([^;]+)/;
my ($CDS,$trans)=($1,$2);
$gff{CDS}{$trans}{$a[3]}=$a[4];
}
}
close F;
open(O,">$g.CDS.bed");
for my $chr (sort keys %{$gff{gene}}){
for my $gene (sort keys %{$gff{gene}{$chr}}){
my @trans=sort{$gff{gene}{$chr}{$gene}{$b} <=> $gff{gene}{$chr}{$gene}{$a}} keys %{$gff{gene}{$chr}{$gene}};
my $trans=$trans[0];
print O "$chr\t$gene\t$trans\t";
my @pos;
for my $s (sort{$a<=>$b} keys %{$gff{CDS}{$trans}}){
push @pos,"$s-$gff{CDS}{$trans}{$s}";
}
print O join(";",@pos),"\n";
}
}
close O;
}