Skip to content

Commit

Permalink
Merge pull request #18884 from jmchilton/test_format
Browse files Browse the repository at this point in the history
Implement Pydantic model for workflow test format.
  • Loading branch information
mvdbeek authored Sep 25, 2024
2 parents 82b8893 + a38f610 commit 3c92414
Show file tree
Hide file tree
Showing 20 changed files with 1,076 additions and 418 deletions.
98 changes: 97 additions & 1 deletion lib/galaxy/tool_util/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,23 @@
"""

from typing import (
Any,
Dict,
List,
Optional,
Union,
)

from pydantic import BaseModel
from pydantic import (
AnyUrl,
BaseModel,
ConfigDict,
RootModel,
)
from typing_extensions import (
NotRequired,
TypedDict,
)

from .parameters import (
input_models_for_tool_source,
Expand All @@ -18,13 +30,15 @@
from .parser.interface import (
Citation,
HelpContent,
OutputCompareType,
ToolSource,
XrefDict,
)
from .parser.output_models import (
from_tool_source,
ToolOutput,
)
from .verify.assertion_models import assertions


class ParsedTool(BaseModel):
Expand Down Expand Up @@ -73,3 +87,85 @@ def parse_tool(tool_source: ToolSource) -> ParsedTool:
xrefs=xrefs,
help=help,
)


class StrictModel(BaseModel):

model_config = ConfigDict(
extra="forbid",
)


class BaseTestOutputModel(StrictModel):
file: Optional[str] = None
path: Optional[str] = None
location: Optional[AnyUrl] = None
ftype: Optional[str] = None
sort: Optional[bool] = None
compare: Optional[OutputCompareType] = None
checksum: Optional[str] = None
metadata: Optional[Dict[str, Any]] = None
asserts: Optional[assertions] = None
delta: Optional[int] = None
delta_frac: Optional[float] = None
lines_diff: Optional[int] = None
decompress: Optional[bool] = None


class TestDataOutputAssertions(BaseTestOutputModel):
pass


class TestCollectionCollectionElementAssertions(StrictModel):
elements: Optional[Dict[str, "TestCollectionElementAssertion"]] = None
element_tests: Optional[Dict[str, "TestCollectionElementAssertion"]] = None


class TestCollectionDatasetElementAssertions(BaseTestOutputModel):
pass


TestCollectionElementAssertion = Union[
TestCollectionDatasetElementAssertions, TestCollectionCollectionElementAssertions
]
TestCollectionCollectionElementAssertions.model_rebuild()


class CollectionAttributes(StrictModel):
collection_type: Optional[str] = None


class TestCollectionOutputAssertions(StrictModel):
elements: Optional[Dict[str, TestCollectionElementAssertion]] = None
element_tests: Optional[Dict[str, "TestCollectionElementAssertion"]] = None
attributes: Optional[CollectionAttributes] = None


TestOutputLiteral = Union[bool, int, float, str]

TestOutputAssertions = Union[TestCollectionOutputAssertions, TestDataOutputAssertions, TestOutputLiteral]

JobDict = Dict[str, Any]


class TestJob(StrictModel):
doc: Optional[str]
job: JobDict
outputs: Dict[str, TestOutputAssertions]


Tests = RootModel[List[TestJob]]

# TODO: typed dict versions of all thee above for verify code - make this Dict[str, Any] here more
# specific.
OutputChecks = Union[TestOutputLiteral, Dict[str, Any]]
OutputsDict = Dict[str, OutputChecks]


class TestJobDict(TypedDict):
doc: NotRequired[str]
job: NotRequired[JobDict]
outputs: OutputsDict


TestDicts = List[TestJobDict]
12 changes: 11 additions & 1 deletion lib/galaxy/tool_util/parser/interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
ABCMeta,
abstractmethod,
)
from enum import Enum
from os.path import join
from typing import (
Any,
Expand Down Expand Up @@ -49,9 +50,18 @@ class AssertionDict(TypedDict):
XmlInt = Union[str, int]


class OutputCompareType(str, Enum):
diff = "diff"
re_match = "re_match"
sim_size = "sim_size"
re_match_multiline = "re_match_multiline"
contains = "contains"
image_diff = "image_diff"


class ToolSourceTestOutputAttributes(TypedDict):
object: NotRequired[Optional[Any]]
compare: str
compare: OutputCompareType
lines_diff: int
delta: int
delta_frac: Optional[float]
Expand Down
3 changes: 2 additions & 1 deletion lib/galaxy/tool_util/parser/xml.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
DynamicOptions,
HelpContent,
InputSource,
OutputCompareType,
PageSource,
PagesSource,
RequiredFiles,
Expand Down Expand Up @@ -834,7 +835,7 @@ def __parse_test_attributes(
value_object = json.loads(attrib.pop("value_json"))

# Method of comparison
compare: str = attrib.pop("compare", "diff").lower()
compare: OutputCompareType = cast(OutputCompareType, attrib.pop("compare", "diff").lower())
# Number of lines to allow to vary in logs (for dates, etc)
lines_diff: int = int(attrib.pop("lines_diff", "0"))
# Allow a file size to vary if sim_size compare
Expand Down
40 changes: 40 additions & 0 deletions lib/galaxy/tool_util/validate_test_format.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
#!/usr/bin/env python

import argparse
import sys

import yaml

from galaxy.tool_util.models import Tests

DESCRIPTION = """
A small utility to verify the Planemo test format.
This script doesn't use semantic information about tools or workflows so only
the structure of the file is checked and things like inputs matching up is not
included.
"""


def validate_test_file(test_file: str) -> None:
with open(test_file) as f:
json = yaml.safe_load(f)
Tests.model_validate(json)


def arg_parser() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser(description=DESCRIPTION)
parser.add_argument("test_file")
return parser


def main(argv=None) -> None:
if argv is None:
argv = sys.argv[1:]

args = arg_parser().parse_args(argv)
validate_test_file(args.test_file)


if __name__ == "__main__":
main()
58 changes: 34 additions & 24 deletions lib/galaxy/tool_util/verify/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -597,6 +597,8 @@ def files_image_diff(file1: str, file2: str, attributes: Optional[Dict[str, Any]
# TODO: After tool-util with this included is published, fefactor planemo.test._check_output
# to use this function. There is already a comment there about breaking fewer abstractions.
# https://github.com/galaxyproject/planemo/blob/master/planemo/test/_check_output.py
# TODO: Also migrate the logic for checking non-dictionaries out of Planemo - this function now
# does that check also.
def verify_file_path_against_dict(
get_filename: GetFilenameT,
get_location: GetLocationT,
Expand All @@ -621,30 +623,38 @@ def verify_file_contents_against_dict(
test_properties,
test_data_target_dir: Optional[str] = None,
) -> None:
# Support Galaxy-like file location (using "file") or CWL-like ("path" or "location").
expected_file = test_properties.get("file", None)
if expected_file is None:
expected_file = test_properties.get("path", None)
if expected_file is None:
location = test_properties.get("location")
if location:
if location.startswith(("http://", "https://")):
assert get_location
expected_file = get_location(location)
else:
expected_file = location.split("file://", 1)[-1]

if "asserts" in test_properties:
test_properties["assert_list"] = to_test_assert_list(test_properties["asserts"])
verify(
item_label,
output_content,
attributes=test_properties,
filename=expected_file,
get_filename=get_filename,
keep_outputs_dir=test_data_target_dir,
verify_extra_files=None,
)
expected_file: Optional[str] = None
if isinstance(test_properties, dict):
# Support Galaxy-like file location (using "file") or CWL-like ("path" or "location").
expected_file = test_properties.get("file", None)
if expected_file is None:
expected_file = test_properties.get("path", None)
if expected_file is None:
location = test_properties.get("location")
if location:
if location.startswith(("http://", "https://")):
assert get_location
expected_file = get_location(location)
else:
expected_file = location.split("file://", 1)[-1]

if "asserts" in test_properties:
test_properties["assert_list"] = to_test_assert_list(test_properties["asserts"])
verify(
item_label,
output_content,
attributes=test_properties,
filename=expected_file,
get_filename=get_filename,
keep_outputs_dir=test_data_target_dir,
verify_extra_files=None,
)
else:
output_value = json.loads(output_content.decode("utf-8"))
if test_properties != output_value:
template = "Output [%s] value [%s] does not match expected value [%s]."
message = template % (item_label, output_value, test_properties)
raise AssertionError(message)


__all__ = [
Expand Down
Loading

0 comments on commit 3c92414

Please sign in to comment.