Skip to content

Commit

Permalink
[ancestral] Check invalid args up-front
Browse files Browse the repository at this point in the history
Check that provided arguments are compatible. Where possible we use
argparse built-ins, but they don't cover everything we want to check.
This checking isn't intended to replace conditional checks in the code
which check for arg existence (as demonstrated by the additional
conditional added here), however by checking for invalid combinations
up-front we can exit quickly.
  • Loading branch information
jameshadfield committed Nov 29, 2023
1 parent 7cb3848 commit 4af0996
Show file tree
Hide file tree
Showing 3 changed files with 70 additions and 30 deletions.
41 changes: 26 additions & 15 deletions augur/ancestral.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,15 +228,32 @@ def register_parser(parent_subparsers):

return parser

def run(args):
# Validate arguments.
def check_arg_combinations(args, is_vcf):
"""
Check that provided arguments are compatible.
Where possible we use argparse built-ins, but they don't cover everything we want to check.
This checking shouldn't be used by downstream code to assume arguments exist, however by checking for
invalid combinations up-front we can exit quickly.
"""
aa_arguments = (args.annotation, args.genes, args.translations)
if any(aa_arguments) and not all(aa_arguments):
raise AugurError("For amino acid sequence reconstruction, you must provide an annotation file, a list of genes, and a template path to amino acid sequences.")

if args.output_sequences and args.output_vcf:
raise AugurError("Both sequence (fasta) and VCF output have been requested, but these are incompatible.")

if is_vcf and args.output_sequences:
raise AugurError("Sequence (fasta) output has been requested but the input alignment is VCF.")

if not is_vcf and args.output_vcf:
raise AugurError("VCF output has been requested but the input alignment is not VCF.")


def run(args):
# check alignment type, set flags, read in if VCF
is_vcf = any([args.alignment.lower().endswith(x) for x in ['.vcf', '.vcf.gz']])
ref = None
check_arg_combinations(args, is_vcf)

try:
T = read_tree(args.tree)
Expand Down Expand Up @@ -351,19 +368,13 @@ def run(args):
write_json(anc_seqs, out_name)
print("ancestral mutations written to", out_name, file=sys.stdout)

if args.output_sequences:
if args.output_vcf:
# TODO: This should be an error and we should check for this
# unsupported combination of arguments at the beginning of the
# script to avoid wasting time for users.
print("WARNING: augur only supports sequence output for FASTA alignments and not for VCFs.", file=sys.stderr)
else:
records = [
SeqRecord(Seq(node_data["sequence"]), id=node_name, description="")
for node_name, node_data in anc_seqs["nodes"].items()
]
SeqIO.write(records, args.output_sequences, "fasta")
print("ancestral sequences FASTA written to", args.output_sequences, file=sys.stdout)
if not is_vcf and args.output_sequences:
records = [
SeqRecord(Seq(node_data["sequence"]), id=node_name, description="")
for node_name, node_data in anc_seqs["nodes"].items()
]
SeqIO.write(records, args.output_sequences, "fasta")
print("ancestral sequences FASTA written to", args.output_sequences, file=sys.stdout)

# If VCF, output VCF including new ancestral seqs
if is_vcf:
Expand Down

This file was deleted.

44 changes: 44 additions & 0 deletions tests/functional/ancestral/cram/invalid-args.t
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
Setup

$ source "$TESTDIR"/_setup.sh

Input FASTA + VCF output is not possible

$ ${AUGUR} ancestral \
> --tree $TESTDIR/../data/tree.nwk \
> --alignment $TESTDIR/../data/aligned.fasta \
> --output-vcf "$CRAMTMP/$TESTFILE/output.vcf" > /dev/null
ERROR: VCF output has been requested but the input alignment is not VCF.
[2]

Input VCF + FASTA output is not possible (Note that the input file doesn't exist, but we exit before that's checked)

$ ${AUGUR} ancestral \
> --tree $TESTDIR/../data/tree.nwk \
> --alignment $TESTDIR/../data/snps.vcf \
> --output-sequences "$CRAMTMP/$TESTFILE/output.fasta" > /dev/null
ERROR: Sequence (fasta) output has been requested but the input alignment is VCF.
[2]

Output FASTA _and_ VCF is not possible

$ ${AUGUR} ancestral \
> --tree $TESTDIR/../data/tree.nwk \
> --alignment $TESTDIR/../data/aligned.fasta \
> --output-vcf "$CRAMTMP/$TESTFILE/output.vcf" \
> --output-sequences "$CRAMTMP/$TESTFILE/output.fasta" > /dev/null
ERROR: Both sequence (fasta) and VCF output have been requested, but these are incompatible.
[2]


Try to infer ancestral amino acid sequences without all required arguments.
This should fail.

$ ${AUGUR} ancestral \
> --tree $TESTDIR/../data/tree.nwk \
> --alignment $TESTDIR/../data/aligned.fasta \
> --annotation $TESTDIR/../data/zika_outgroup.gb \
> --genes ENV PRO \
> --output-node-data "$CRAMTMP/$TESTFILE/ancestral_mutations.json" > /dev/null
ERROR: For amino acid sequence reconstruction, you must provide an annotation file, a list of genes, and a template path to amino acid sequences.
[2]

0 comments on commit 4af0996

Please sign in to comment.