nextstrain · j23414 · Mar 12, 2024 · Mar 8, 2024 · Mar 8, 2024 · jameshadfield
diff --git a/scripts/newreference.py b/scripts/newreference.py
@@ -3,18 +3,27 @@
 from Bio.SeqFeature import SeqFeature, FeatureLocation, Seq
 import shutil
 import argparse
+import sys
 
 def new_reference(referencefile, outgenbank, outfasta, gene):
     ref = SeqIO.read(referencefile, "genbank")
+    startofgene = None
+    endofgene = None
     for feature in ref.features:
         if feature.type == 'source':
             ref_source_feature = feature
-        if feature.type =='gene':
+        if feature.type =='gene' or feature.type == 'CDS':
             a = list(feature.qualifiers.items())[0][-1][0]
             if a == gene:
                 startofgene = int(list(feature.location)[0])
                 endofgene =  int(list(feature.location)[-1])+1
 
+    # If user provides a --gene 'some name' is not found, print a warning and use the entire genome.
+    # Otherwise do not print a warning.
+    if(gene is not None and startofgene is None and endofgene is None):
+        print(f"ERROR: No '{gene}' was found under 'gene' or 'CDS' features in the GenBank file.", file=sys.stderr)
+        sys.exit(1)
+
     record = ref[startofgene:endofgene]
     source_feature =  SeqFeature(FeatureLocation(start=0, end=len(record)), type='source',
                                  qualifiers=ref_source_feature.qualifiers)