-
Notifications
You must be signed in to change notification settings - Fork 5
/
gff3_to_refgene.pl
72 lines (57 loc) · 2.37 KB
/
gff3_to_refgene.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
# 导入 -> 系统 package
use strict;
use warnings;
use File::Spec;
use IO::Uncompress::Gunzip qw(gunzip $GunzipError);
use Getopt::Long;
# 定义 -> 常量
use constant SCRIPTDIR => (File::Spec->splitpath(File::Spec->rel2abs($0)))[1];
use constant PWD => $ENV{"PWD"};
# 定义 -> 核心变量
my $gff3ToGenePred = "/home/genesky/software/ucsc/bin/gff3ToGenePred";
my $retrieve_seq_from_fasta = "/home/genesky/software/annovar/2018Apr16/retrieve_seq_from_fasta.pl";
# 检测 -> 脚本输入
my ($gff3, $genome, $prefix, $output_dir, $if_help);
GetOptions(
"gff3=s" => \$gff3,
"genome|g=s" => \$genome,
"prefix|p=s" => \$prefix,
"output_dir|o=s" => \$output_dir,
"help|h" => \$if_help,
);
die help() if (defined $if_help or not defined $gff3 or not defined $prefix);
###################################################################### 主程序
my $refGene0 = "$output_dir/$prefix\_refGene0.txt";
my $refGeneMrna = "$output_dir/$prefix\_refGeneMrna.fa";
my $refGene = "$output_dir/$prefix\_refGene.txt";
system "$gff3ToGenePred -useName $gff3 $refGene0";
system "sed -i s/transcript\://g $refGene0";
system "sed -i s/gene\://g $refGene0";
system "nl $refGene0 > $refGene";
system "rm $refGene0";
if (defined($genome)){
if (substr("$genome", -2) eq 'gz'){
my $tmp = substr("$genome", 0, -3);
gunzip $genome => $tmp or die "gunzip failed: $GunzipError\n";
system "perl $retrieve_seq_from_fasta --format refGene --seqfile $tmp $refGene --out $refGeneMrna";
system "rm $tmp";
}else{
system "perl $retrieve_seq_from_fasta --format refGene --seqfile $genome $refGene --out $refGeneMrna";
}
}
###################################################################### 子函数
sub help{
my $info = "
Program: GFF3 转 refGene
Version: 2019-11-08
Contact: 341 zhangqing
Usage: perl ".(File::Spec->splitpath(File::Spec->rel2abs($0)))[2]." [options]
Options:
--gff3 GFF3文件,例:Arabidopsis_thaliana.TAIR10.45.gff3,必填
--genome/-g 基因组文件,例:Arabidopsis_thaliana.TAIR10.dna.toplevel.fa,选填
--prefix/-p 结果文件前缀(物种名称或其首字母缩写),必填
--output_dir/-o 结果输出目录
--help/-h 查看帮助文档
\n";
return $info;
}