From 70a21e3762a717377c2df2b902cf852ace4a777c Mon Sep 17 00:00:00 2001 From: Victor Lin <13424970+victorlin@users.noreply.github.com> Date: Tue, 15 Oct 2024 11:24:08 -0700 Subject: [PATCH] Parse unnamed inputs Preparing for sequence support, which allows unnamed inputs. --- augur/merge.py | 44 ++++++++++++++++---- tests/functional/merge/cram/merge-metadata.t | 2 +- 2 files changed, 37 insertions(+), 9 deletions(-) diff --git a/augur/merge.py b/augur/merge.py index c49e77a39..38b3eccbc 100644 --- a/augur/merge.py +++ b/augur/merge.py @@ -186,7 +186,7 @@ def get_metadata( ) -> List[NamedMetadata]: # Validate --metadata arguments try: - metadata = parse_named_inputs(input_metadata) + metadata = parse_inputs(input_metadata, require_names=True) except UnnamedInputError as e: raise AugurError(dedent(f"""\ All metadata inputs must be assigned a name, e.g. with NAME=FILE. @@ -558,18 +558,41 @@ def quote(s): return quoted if quoted else shquote('') -def parse_named_inputs(inputs: Sequence[str]): - if unnamed := [x for x in inputs if "=" not in x or x.startswith("=")]: - raise UnnamedInputError(unnamed) +def parse_inputs(inputs: Sequence[str], require_names: bool = False): + """ + Parse inputs into tuples of (name, file). + name is an empty string for unnamed inputs. + + If names are required, this function can raise UnnamedInputError or DuplicateInputNameError. + If names are optional, this function can raise InvalidNamedInputError or DuplicateInputNameError. + """ + # These are only used for error checking. + # The original order of inputs should still be used at the end. + invalid_named_inputs: List[str] = [] + named_inputs: List[str] = [] + unnamed_inputs: List[str] = [] + for x in inputs: + if x.startswith("="): + invalid_named_inputs.append(x) + elif "=" in x: + named_inputs.append(x) + else: + unnamed_inputs.append(x) - named_inputs = pairs(inputs) + if require_names: + if bad_inputs := [*invalid_named_inputs, *unnamed_inputs]: + raise UnnamedInputError(bad_inputs) + elif invalid_named_inputs: + raise InvalidNamedInputError(invalid_named_inputs) + + input_pairs = pairs(inputs) if duplicate_names := [name for name, count - in count_unique(name for name, _ in named_inputs) - if count > 1]: + in count_unique(name for name, _ in input_pairs) + if name != "" and count > 1]: raise DuplicateInputNameError(duplicate_names) - return named_inputs + return input_pairs class UnnamedInputError(Exception): @@ -577,6 +600,11 @@ def __init__(self, unnamed: Sequence[str]): self.unnamed = unnamed +class InvalidNamedInputError(Exception): + def __init__(self, invalid: Sequence[str]): + self.invalid = invalid + + class DuplicateInputNameError(Exception): def __init__(self, duplicates: Sequence[str]): self.duplicates = duplicates diff --git a/tests/functional/merge/cram/merge-metadata.t b/tests/functional/merge/cram/merge-metadata.t index 87b29ad79..c2270f235 100644 --- a/tests/functional/merge/cram/merge-metadata.t +++ b/tests/functional/merge/cram/merge-metadata.t @@ -246,8 +246,8 @@ Metadata names are required. The following inputs were missing a name: - 'x.tsv' '=y.tsv' + 'x.tsv' [2]