From 8cd6a1384301161117252f8e6f612f7332bd9416 Mon Sep 17 00:00:00 2001 From: Jennifer Chang Date: Fri, 8 Mar 2024 13:57:13 -0800 Subject: [PATCH] newreference: If gene is given but not found in genbank, error out Currently, if the --gene option is used and the gene name is not found, the script will use the entire genome which may cause some silent undesired behaviors. This commit changes that such that the script will error out if the gene is not found in the GenBank file as this indicates the gene name may be misspelled or the user may be using the wrong GenBank file. If the --gene option is not used, the script will continue to process the entire genome as expected. Co-authored-by: Jover Lee --- scripts/newreference.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/scripts/newreference.py b/scripts/newreference.py index c6b47ec..063dafb 100644 --- a/scripts/newreference.py +++ b/scripts/newreference.py @@ -3,6 +3,7 @@ from Bio.SeqFeature import SeqFeature, FeatureLocation, Seq import shutil import argparse +import sys def new_reference(referencefile, outgenbank, outfasta, gene): ref = SeqIO.read(referencefile, "genbank") @@ -17,6 +18,12 @@ def new_reference(referencefile, outgenbank, outfasta, gene): startofgene = int(list(feature.location)[0]) endofgene = int(list(feature.location)[-1])+1 + # If user provides a --gene 'some name' is not found, print a warning and use the entire genome. + # Otherwise do not print a warning. + if(gene is not None and startofgene is None and endofgene is None): + print(f"ERROR: No '{gene}' was found under 'gene' or 'CDS' features in the GenBank file.", file=sys.stderr) + sys.exit(1) + record = ref[startofgene:endofgene] source_feature = SeqFeature(FeatureLocation(start=0, end=len(record)), type='source', qualifiers=ref_source_feature.qualifiers)