From 98b4d215ee763c75e43c655ace98dcfc495f84dd Mon Sep 17 00:00:00 2001 From: davelopez <46503462+davelopez@users.noreply.github.com> Date: Mon, 4 Nov 2024 12:20:55 +0100 Subject: [PATCH 1/3] Increase test coverage for DocumentValidator's root tag validation To detect false positives. --- server/galaxyls/tests/unit/test_validation.py | 53 +++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/server/galaxyls/tests/unit/test_validation.py b/server/galaxyls/tests/unit/test_validation.py index 7904a23..8a4a104 100644 --- a/server/galaxyls/tests/unit/test_validation.py +++ b/server/galaxyls/tests/unit/test_validation.py @@ -36,6 +36,19 @@ class TestDocumentValidatorClass: ('\n', True), ("test", False), ("", False), + ("", False), + ( + '\n\n', + False, + ), + ( + '\n\n', + False, + ), + ( + '\n\n None: @@ -45,3 +58,43 @@ def test_has_valid_root_returns_expected(self, source: str, expected: bool) -> N actual = validator.has_valid_root(document) assert actual == expected + + @pytest.mark.parametrize( + "source, expected", + [ + ("", None), + (" ", None), + (" \n ", None), + ("test", None), + ("<", ""), + ("", "tool"), + (" ", "tool"), + ("\n", "tool"), + ("\n ", "tool"), + (" \n ", "tool"), + ("unexpected ", "tool"), + (" unexpected ", "tool"), + ("\nunexpected\n ", "tool"), + ('', "tool"), + ('\n', "tool"), + ("", "macros"), + ('\n', "macros"), + ("", "test"), + ('', "test"), + ("", "visualization"), + ('\n\n', None), + ( + '\n\n', + "visualization", + ), + ], + ) + def test_get_document_root_tag_returns_expected(self, source: str, expected: bool) -> None: + document = TestUtils.to_document(source) + validator = DocumentValidator() + + actual = validator.get_document_root_tag(document) + + assert actual == expected From a13ab131d5aae80d81215d077f51b9333f59a74f Mon Sep 17 00:00:00 2001 From: davelopez <46503462+davelopez@users.noreply.github.com> Date: Mon, 4 Nov 2024 12:22:09 +0100 Subject: [PATCH 2/3] Increase MAX_PEEK_CONTENT to 1000 in DocumentValidator As some documents may contain extra definitions before the root tag. --- server/galaxyls/services/validation.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/server/galaxyls/services/validation.py b/server/galaxyls/services/validation.py index 5a1d093..435e7aa 100644 --- a/server/galaxyls/services/validation.py +++ b/server/galaxyls/services/validation.py @@ -5,9 +5,7 @@ from galaxyls.services.xml.types import DocumentType -MAX_PEEK_CONTENT = 100 -TAG_GROUP_NAME = "tag" -TAG_REGEX = rf"[\n\s]*?.*?[\n\s]*?<(?!\?)(?P<{TAG_GROUP_NAME}>[\w]*)" +MAX_PEEK_CONTENT = 1000 class DocumentValidator: From 1ad7558ccf866b19c13db769cba39dfdf52e4094 Mon Sep 17 00:00:00 2001 From: davelopez <46503462+davelopez@users.noreply.github.com> Date: Mon, 4 Nov 2024 12:22:59 +0100 Subject: [PATCH 3/3] Refactor DocumentValidator to improve regex matching Fixes some false positives when detecting a valid tool document. --- server/galaxyls/services/validation.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/server/galaxyls/services/validation.py b/server/galaxyls/services/validation.py index 435e7aa..b5c58cd 100644 --- a/server/galaxyls/services/validation.py +++ b/server/galaxyls/services/validation.py @@ -6,6 +6,9 @@ from galaxyls.services.xml.types import DocumentType MAX_PEEK_CONTENT = 1000 +TAG_GROUP_NAME = "root_tag" +TAG_REGEX = r"[\n\s]*?.*?[\n\s]*?<(?!\?)(?!\!)(?P[\w]*)" +SUPPORTED_ROOT_TAGS = [e.name.lower() for e in DocumentType if e != DocumentType.UNKNOWN] class DocumentValidator: @@ -17,17 +20,15 @@ def has_valid_root(cls, document: Document) -> bool: or is an empty document.""" if DocumentValidator.is_empty_document(document): return True - root = DocumentValidator._get_document_root_tag(document) - if root is not None: - root_tag = root.upper() - supported = [e.name for e in DocumentType if e != DocumentType.UNKNOWN] - return root_tag == "" or root_tag in supported + root_tag = DocumentValidator.get_document_root_tag(document) + if root_tag is not None: + return root_tag == "" or root_tag in SUPPORTED_ROOT_TAGS return False @classmethod def is_tool_document(cls, document: Document) -> bool: """Checks if the document's root element is .""" - root = DocumentValidator._get_document_root_tag(document) + root = DocumentValidator.get_document_root_tag(document) if root is not None: root_tag = root.upper() return root_tag == DocumentType.TOOL.name @@ -39,11 +40,11 @@ def is_empty_document(cls, document: Document) -> bool: return not document.source or document.source.isspace() @classmethod - def _get_document_root_tag(cls, document: Document) -> Optional[str]: + def get_document_root_tag(cls, document: Document) -> Optional[str]: """Checks the first MAX_PEEK_CONTENT characters of the document for a root tag and returns the name of the tag if found.""" content_peek = document.source[:MAX_PEEK_CONTENT] - match = re.match(TAG_REGEX, content_peek) + match = re.search(TAG_REGEX, content_peek) if match: group = match.group(TAG_GROUP_NAME) return group