diff --git a/augur/ancestral.py b/augur/ancestral.py index 64b25d6fe..fde4b5bbe 100644 --- a/augur/ancestral.py +++ b/augur/ancestral.py @@ -228,15 +228,32 @@ def register_parser(parent_subparsers): return parser -def run(args): - # Validate arguments. +def check_arg_combinations(args, is_vcf): + """ + Check that provided arguments are compatible. + Where possible we use argparse built-ins, but they don't cover everything we want to check. + This checking shouldn't be used by downstream code to assume arguments exist, however by checking for + invalid combinations up-front we can exit quickly. + """ aa_arguments = (args.annotation, args.genes, args.translations) if any(aa_arguments) and not all(aa_arguments): raise AugurError("For amino acid sequence reconstruction, you must provide an annotation file, a list of genes, and a template path to amino acid sequences.") + if args.output_sequences and args.output_vcf: + raise AugurError("Both sequence (fasta) and VCF output have been requested, but these are incompatible.") + + if is_vcf and args.output_sequences: + raise AugurError("Sequence (fasta) output has been requested but the input alignment is VCF.") + + if not is_vcf and args.output_vcf: + raise AugurError("VCF output has been requested but the input alignment is not VCF.") + + +def run(args): # check alignment type, set flags, read in if VCF is_vcf = any([args.alignment.lower().endswith(x) for x in ['.vcf', '.vcf.gz']]) ref = None + check_arg_combinations(args, is_vcf) try: T = read_tree(args.tree) @@ -351,19 +368,13 @@ def run(args): write_json(anc_seqs, out_name) print("ancestral mutations written to", out_name, file=sys.stdout) - if args.output_sequences: - if args.output_vcf: - # TODO: This should be an error and we should check for this - # unsupported combination of arguments at the beginning of the - # script to avoid wasting time for users. - print("WARNING: augur only supports sequence output for FASTA alignments and not for VCFs.", file=sys.stderr) - else: - records = [ - SeqRecord(Seq(node_data["sequence"]), id=node_name, description="") - for node_name, node_data in anc_seqs["nodes"].items() - ] - SeqIO.write(records, args.output_sequences, "fasta") - print("ancestral sequences FASTA written to", args.output_sequences, file=sys.stdout) + if not is_vcf and args.output_sequences: + records = [ + SeqRecord(Seq(node_data["sequence"]), id=node_name, description="") + for node_name, node_data in anc_seqs["nodes"].items() + ] + SeqIO.write(records, args.output_sequences, "fasta") + print("ancestral sequences FASTA written to", args.output_sequences, file=sys.stdout) # If VCF, output VCF including new ancestral seqs if is_vcf: diff --git a/tests/functional/ancestral/cram/infer-amino-acid-sequences-with-missing-arguments.t b/tests/functional/ancestral/cram/infer-amino-acid-sequences-with-missing-arguments.t deleted file mode 100644 index 3fd014296..000000000 --- a/tests/functional/ancestral/cram/infer-amino-acid-sequences-with-missing-arguments.t +++ /dev/null @@ -1,15 +0,0 @@ -Setup - - $ source "$TESTDIR"/_setup.sh - -Try to infer ancestral amino acid sequences without all required arguments. -This should fail. - - $ ${AUGUR} ancestral \ - > --tree $TESTDIR/../data/tree.nwk \ - > --alignment $TESTDIR/../data/aligned.fasta \ - > --annotation $TESTDIR/../data/zika_outgroup.gb \ - > --genes ENV PRO \ - > --output-node-data "$CRAMTMP/$TESTFILE/ancestral_mutations.json" > /dev/null - ERROR: For amino acid sequence reconstruction, you must provide an annotation file, a list of genes, and a template path to amino acid sequences. - [2] diff --git a/tests/functional/ancestral/cram/invalid-args.t b/tests/functional/ancestral/cram/invalid-args.t new file mode 100644 index 000000000..40075dc58 --- /dev/null +++ b/tests/functional/ancestral/cram/invalid-args.t @@ -0,0 +1,44 @@ +Setup + + $ source "$TESTDIR"/_setup.sh + +Input FASTA + VCF output is not possible + + $ ${AUGUR} ancestral \ + > --tree $TESTDIR/../data/tree.nwk \ + > --alignment $TESTDIR/../data/aligned.fasta \ + > --output-vcf "$CRAMTMP/$TESTFILE/output.vcf" > /dev/null + ERROR: VCF output has been requested but the input alignment is not VCF. + [2] + +Input VCF + FASTA output is not possible (Note that the input file doesn't exist, but we exit before that's checked) + + $ ${AUGUR} ancestral \ + > --tree $TESTDIR/../data/tree.nwk \ + > --alignment $TESTDIR/../data/snps.vcf \ + > --output-sequences "$CRAMTMP/$TESTFILE/output.fasta" > /dev/null + ERROR: Sequence (fasta) output has been requested but the input alignment is VCF. + [2] + +Output FASTA _and_ VCF is not possible + + $ ${AUGUR} ancestral \ + > --tree $TESTDIR/../data/tree.nwk \ + > --alignment $TESTDIR/../data/aligned.fasta \ + > --output-vcf "$CRAMTMP/$TESTFILE/output.vcf" \ + > --output-sequences "$CRAMTMP/$TESTFILE/output.fasta" > /dev/null + ERROR: Both sequence (fasta) and VCF output have been requested, but these are incompatible. + [2] + + +Try to infer ancestral amino acid sequences without all required arguments. +This should fail. + + $ ${AUGUR} ancestral \ + > --tree $TESTDIR/../data/tree.nwk \ + > --alignment $TESTDIR/../data/aligned.fasta \ + > --annotation $TESTDIR/../data/zika_outgroup.gb \ + > --genes ENV PRO \ + > --output-node-data "$CRAMTMP/$TESTFILE/ancestral_mutations.json" > /dev/null + ERROR: For amino acid sequence reconstruction, you must provide an annotation file, a list of genes, and a template path to amino acid sequences. + [2]