diff --git a/scripts/newreference.py b/scripts/newreference.py index 0d7b9e5..063dafb 100644 --- a/scripts/newreference.py +++ b/scripts/newreference.py @@ -3,18 +3,27 @@ from Bio.SeqFeature import SeqFeature, FeatureLocation, Seq import shutil import argparse +import sys def new_reference(referencefile, outgenbank, outfasta, gene): ref = SeqIO.read(referencefile, "genbank") + startofgene = None + endofgene = None for feature in ref.features: if feature.type == 'source': ref_source_feature = feature - if feature.type =='gene': + if feature.type =='gene' or feature.type == 'CDS': a = list(feature.qualifiers.items())[0][-1][0] if a == gene: startofgene = int(list(feature.location)[0]) endofgene = int(list(feature.location)[-1])+1 + # If user provides a --gene 'some name' is not found, print a warning and use the entire genome. + # Otherwise do not print a warning. + if(gene is not None and startofgene is None and endofgene is None): + print(f"ERROR: No '{gene}' was found under 'gene' or 'CDS' features in the GenBank file.", file=sys.stderr) + sys.exit(1) + record = ref[startofgene:endofgene] source_feature = SeqFeature(FeatureLocation(start=0, end=len(record)), type='source', qualifiers=ref_source_feature.qualifiers)