forked from Axolotl233/Simple_Script
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Fasta.filter.cdspep.pl
58 lines (45 loc) · 1.21 KB
/
Fasta.filter.cdspep.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
#! perl
use warnings;
use strict;
use File::Basename;
use Bio::SeqIO;
my $cds = shift;
my $pep = shift;
unless ($pep){
print STDERR "USAGE : perl $0 file.cds file.pep\n";
exit;
}
print STDERR"##### cds and pep must have same sequence name\n";
(my $pname = basename $pep) =~ s/\.pep(\.fa)?//;
(my $cname = basename $cds) =~ s/\.cds(\.fa)?//;
open O,'>',"$pname.fix.pep" or die "can't create pep file\n";
my %h;
my %c;
my $seqio_obj = Bio::SeqIO -> new (-file => $pep, -format => "fasta");
while(my $seq_obj = $seqio_obj -> next_seq){
my $seq = $seq_obj -> seq;
my $id = $seq_obj -> display_id;
$h{$id} = $seq;
}
foreach my $key (keys %h){
unless($h{$key} =~ /\w\*\w/) {
if($h{$key} =~/\*$/){
print O ">$key\n"."$h{$key}\n";
$c{$key} += 1;
}
}
}
close O;
open O,'>',"$cname.fix.cds" or die "can't create cds file\n";
my $m = Bio::SeqIO -> new (-file => $cds, -format => "fasta");
while(my $seq_obj = $m -> next_seq){
my $id = $seq_obj -> display_id;
next unless exists $c{$id};
if ($c{$id} > 1){
print STDERR "$id duplicate\n";
}
my $seq = $seq_obj -> seq;
print O ">$id\n$seq\n";
}
print STDERR "##### All done\n";
close O;