From 4af099649a73840556a4f24171be9c1bb0c07c8a Mon Sep 17 00:00:00 2001 From: James Hadfield Date: Wed, 29 Nov 2023 14:51:03 +1300 Subject: [PATCH] [ancestral] Check invalid args up-front Check that provided arguments are compatible. Where possible we use argparse built-ins, but they don't cover everything we want to check. This checking isn't intended to replace conditional checks in the code which check for arg existence (as demonstrated by the additional conditional added here), however by checking for invalid combinations up-front we can exit quickly. --- augur/ancestral.py | 41 ++++++++++------- ...no-acid-sequences-with-missing-arguments.t | 15 ------- .../functional/ancestral/cram/invalid-args.t | 44 +++++++++++++++++++ 3 files changed, 70 insertions(+), 30 deletions(-) delete mode 100644 tests/functional/ancestral/cram/infer-amino-acid-sequences-with-missing-arguments.t create mode 100644 tests/functional/ancestral/cram/invalid-args.t diff --git a/augur/ancestral.py b/augur/ancestral.py index 64b25d6fe..fde4b5bbe 100644 --- a/augur/ancestral.py +++ b/augur/ancestral.py @@ -228,15 +228,32 @@ def register_parser(parent_subparsers): return parser -def run(args): - # Validate arguments. +def check_arg_combinations(args, is_vcf): + """ + Check that provided arguments are compatible. + Where possible we use argparse built-ins, but they don't cover everything we want to check. + This checking shouldn't be used by downstream code to assume arguments exist, however by checking for + invalid combinations up-front we can exit quickly. + """ aa_arguments = (args.annotation, args.genes, args.translations) if any(aa_arguments) and not all(aa_arguments): raise AugurError("For amino acid sequence reconstruction, you must provide an annotation file, a list of genes, and a template path to amino acid sequences.") + if args.output_sequences and args.output_vcf: + raise AugurError("Both sequence (fasta) and VCF output have been requested, but these are incompatible.") + + if is_vcf and args.output_sequences: + raise AugurError("Sequence (fasta) output has been requested but the input alignment is VCF.") + + if not is_vcf and args.output_vcf: + raise AugurError("VCF output has been requested but the input alignment is not VCF.") + + +def run(args): # check alignment type, set flags, read in if VCF is_vcf = any([args.alignment.lower().endswith(x) for x in ['.vcf', '.vcf.gz']]) ref = None + check_arg_combinations(args, is_vcf) try: T = read_tree(args.tree) @@ -351,19 +368,13 @@ def run(args): write_json(anc_seqs, out_name) print("ancestral mutations written to", out_name, file=sys.stdout) - if args.output_sequences: - if args.output_vcf: - # TODO: This should be an error and we should check for this - # unsupported combination of arguments at the beginning of the - # script to avoid wasting time for users. - print("WARNING: augur only supports sequence output for FASTA alignments and not for VCFs.", file=sys.stderr) - else: - records = [ - SeqRecord(Seq(node_data["sequence"]), id=node_name, description="") - for node_name, node_data in anc_seqs["nodes"].items() - ] - SeqIO.write(records, args.output_sequences, "fasta") - print("ancestral sequences FASTA written to", args.output_sequences, file=sys.stdout) + if not is_vcf and args.output_sequences: + records = [ + SeqRecord(Seq(node_data["sequence"]), id=node_name, description="") + for node_name, node_data in anc_seqs["nodes"].items() + ] + SeqIO.write(records, args.output_sequences, "fasta") + print("ancestral sequences FASTA written to", args.output_sequences, file=sys.stdout) # If VCF, output VCF including new ancestral seqs if is_vcf: diff --git a/tests/functional/ancestral/cram/infer-amino-acid-sequences-with-missing-arguments.t b/tests/functional/ancestral/cram/infer-amino-acid-sequences-with-missing-arguments.t deleted file mode 100644 index 3fd014296..000000000 --- a/tests/functional/ancestral/cram/infer-amino-acid-sequences-with-missing-arguments.t +++ /dev/null @@ -1,15 +0,0 @@ -Setup - - $ source "$TESTDIR"/_setup.sh - -Try to infer ancestral amino acid sequences without all required arguments. -This should fail. - - $ ${AUGUR} ancestral \ - > --tree $TESTDIR/../data/tree.nwk \ - > --alignment $TESTDIR/../data/aligned.fasta \ - > --annotation $TESTDIR/../data/zika_outgroup.gb \ - > --genes ENV PRO \ - > --output-node-data "$CRAMTMP/$TESTFILE/ancestral_mutations.json" > /dev/null - ERROR: For amino acid sequence reconstruction, you must provide an annotation file, a list of genes, and a template path to amino acid sequences. - [2] diff --git a/tests/functional/ancestral/cram/invalid-args.t b/tests/functional/ancestral/cram/invalid-args.t new file mode 100644 index 000000000..40075dc58 --- /dev/null +++ b/tests/functional/ancestral/cram/invalid-args.t @@ -0,0 +1,44 @@ +Setup + + $ source "$TESTDIR"/_setup.sh + +Input FASTA + VCF output is not possible + + $ ${AUGUR} ancestral \ + > --tree $TESTDIR/../data/tree.nwk \ + > --alignment $TESTDIR/../data/aligned.fasta \ + > --output-vcf "$CRAMTMP/$TESTFILE/output.vcf" > /dev/null + ERROR: VCF output has been requested but the input alignment is not VCF. + [2] + +Input VCF + FASTA output is not possible (Note that the input file doesn't exist, but we exit before that's checked) + + $ ${AUGUR} ancestral \ + > --tree $TESTDIR/../data/tree.nwk \ + > --alignment $TESTDIR/../data/snps.vcf \ + > --output-sequences "$CRAMTMP/$TESTFILE/output.fasta" > /dev/null + ERROR: Sequence (fasta) output has been requested but the input alignment is VCF. + [2] + +Output FASTA _and_ VCF is not possible + + $ ${AUGUR} ancestral \ + > --tree $TESTDIR/../data/tree.nwk \ + > --alignment $TESTDIR/../data/aligned.fasta \ + > --output-vcf "$CRAMTMP/$TESTFILE/output.vcf" \ + > --output-sequences "$CRAMTMP/$TESTFILE/output.fasta" > /dev/null + ERROR: Both sequence (fasta) and VCF output have been requested, but these are incompatible. + [2] + + +Try to infer ancestral amino acid sequences without all required arguments. +This should fail. + + $ ${AUGUR} ancestral \ + > --tree $TESTDIR/../data/tree.nwk \ + > --alignment $TESTDIR/../data/aligned.fasta \ + > --annotation $TESTDIR/../data/zika_outgroup.gb \ + > --genes ENV PRO \ + > --output-node-data "$CRAMTMP/$TESTFILE/ancestral_mutations.json" > /dev/null + ERROR: For amino acid sequence reconstruction, you must provide an annotation file, a list of genes, and a template path to amino acid sequences. + [2]