Skip to content

Commit

Permalink
testing version for compleasm on genome level integration
Browse files Browse the repository at this point in the history
  • Loading branch information
KatharinaHoff committed Nov 27, 2023
1 parent 297c39a commit 98d338c
Showing 1 changed file with 58 additions and 2 deletions.
60 changes: 58 additions & 2 deletions scripts/braker.pl
Original file line number Diff line number Diff line change
Expand Up @@ -808,6 +808,9 @@
$pubs{'gffread'} = "\nPertea, G., & Pertea, M. (2020). GFF utilities: GffRead and GffCompare. F1000Research, 9.\n";
$pubs{'tsebra'} = "\nGabriel, L., Hoff, K. J., Bruna, T., Borodovsky, M., & Stanke, M. (2021). TSEBRA: transcript selector for BRAKER. BMC Bioinformatics, 22:566.\n";
$pubs{'braker3'} = "\nGabriel, L., Bruna, T., Hoff, K. J., Ebel, M., Lomsadze, A., Borodovsky, M., & Stanke, M. (2023). BRAKER3: Fully Automated Genome Annotation Using RNA-Seq and Protein Evidence with GeneMark-ETP, AUGUSTUS and TSEBRA. bioRxiv, https://doi.org/10.1101/2023.06.10.544449.\n";
$pubs{'busco'} = "\nSimao, F. A., Waterhouse, R. M., Ioannidis, P., Kriventseva, E. V., & Zdobnov, E. M. (2015). BUSCO: assessing genome assembly and annotation completeness with single-copy orthologs. Bioinformatics, 31(19), 3210-3212.\n";
$pubs{'miniprot'} = "\nLi, H. (2023). Protein-to-genome alignment with miniprot. Bioinformatics, 30(1):btad014.\n";
$pubs{'compleasm'} = "\nHuang, N., & Li, H. (2023). compleasm: a faster and more accurate reimplementation of BUSCO. Bioinformatics 39(10):btad595.\n";


# Make paths to input files absolute ###########################################
Expand Down Expand Up @@ -877,7 +880,9 @@
set_AUGUSTUS_SCRIPTS_PATH();
fix_AUGUSTUS_CONFIG_PATH();
set_PYTHON3_PATH();
set_COMPLEASM_PATH();
if (defined($busco_lineage)){
set_COMPLEASM_PATH(); # todo: make sure that compleasm to hints actually passes the path, or make the hints script a hints parser, only
}

if($UTR eq "on" || $addUTR eq "on"){
set_JAVA_PATH();
Expand Down Expand Up @@ -1364,7 +1369,7 @@
my @tmp_prot_seq;
foreach (@prot_seq_files) {
push(@tmp_prot_seq, $_);
check_fasta_headers($_, 0);
check_fasta_headers($_, 0); # todo: this generates a header map that looks like it's genome headers, needs to be fixed
}
@prot_seq_files = @tmp_prot_seq;
}
Expand Down Expand Up @@ -1448,6 +1453,11 @@
run_prothint();
}

# make BUSCO hints with compleasm
if (defined($busco_lineage)) {
make_compleasm_hints();
}

# make hints from RNA-Seq
if ( !$ETPmode && @bam ) {
make_rnaseq_hints();
Expand Down Expand Up @@ -4495,6 +4505,52 @@ sub make_bam_file {
}
}

#################### make_compleasm_hints ######################################
# * make hints from compleasm with BUSCOs
# * this will only pick up BUSCOs without frameshift that are complete/duplicated
# * the hints are trimmed by 3 nt on each end, converted to CDSpart
# * M hints (enforced in prediction)
################################################################################

sub make_compleasm_hints {
print LOG "\# "
. (localtime)
. ": Running compleasm and converting the output to hints\n" if ($v > 2);
my $compleasm_hints = "$otherfilesDir/compleasm_hints.gff";
# call compleasm_to_hints.py from Augustus scripts
$string = find(
"compleasm_to_hints.py", $AUGUSTUS_BIN_PATH,
$AUGUSTUS_SCRIPTS_PATH, $AUGUSTUS_CONFIG_PATH
);
$errorfile = "$errorfilesDir/compleasm_to_hints.stderr";
$cmdString = "$string -p $COMPLEASM_PATH/compleasm.py -g $genome -d $busco_lineage -t $CPU "
. "-o $compleasm_hints 1> $errorfile 2>&1";
open(CHINTS, "<", $compleasm_hints) or
clean_abort("$AUGUSTUS_CONFIG_PATH/species/$species",
$useexisting, "ERROR in file " . __FILE__
. " at line ". __LINE__
. "\nFailed to open $compleasm_hints!\n");
open(HINTS, ">>", "$otherfilesDir/hintsfile.gff") or
clean_abort("$AUGUSTUS_CONFIG_PATH/species/$species",
$useexisting, "ERROR in file " . __FILE__ ." at line "
. __LINE__ ."\nfailed to open file $otherfilesDir/hintsfile.gff!\n");

while(<CHINTS>){
print HINTS $_;
}
close(CHINTS) or clean_abort("$AUGUSTUS_CONFIG_PATH/species/$species",
$useexisting, "ERROR in file " . __FILE__
. " at line ". __LINE__
. "\nFailed to open $compleasm_hints!\n");

close(HINTS) or clean_abort("$AUGUSTUS_CONFIG_PATH/species/$species",
$useexisting, "ERROR in file " . __FILE__ ." at line "
. __LINE__ ."\nfailed to close file $otherfilesDir/hintsfile.gff!\n");

print LOG "\# " . (localtime)
. ": Generating hints from compleasm (genome level) finished.\n" if ($v > 2);
}

####################### make_rnaseq_hints ######################################
# * make hints from BAM files
# * merge hints files from different BAM files
Expand Down

0 comments on commit 98d338c

Please sign in to comment.