Skip to content

Commit

Permalink
Refactor DocumentValidator to improve regex matching
Browse files Browse the repository at this point in the history
Fixes some false positives when detecting a valid tool document.
  • Loading branch information
davelopez committed Nov 4, 2024
1 parent a13ab13 commit 1ad7558
Showing 1 changed file with 9 additions and 8 deletions.
17 changes: 9 additions & 8 deletions server/galaxyls/services/validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@
from galaxyls.services.xml.types import DocumentType

MAX_PEEK_CONTENT = 1000
TAG_GROUP_NAME = "root_tag"
TAG_REGEX = r"[\n\s]*?.*?[\n\s]*?<(?!\?)(?!\!)(?P<root_tag>[\w]*)"
SUPPORTED_ROOT_TAGS = [e.name.lower() for e in DocumentType if e != DocumentType.UNKNOWN]


class DocumentValidator:
Expand All @@ -17,17 +20,15 @@ def has_valid_root(cls, document: Document) -> bool:
or is an empty document."""
if DocumentValidator.is_empty_document(document):
return True
root = DocumentValidator._get_document_root_tag(document)
if root is not None:
root_tag = root.upper()
supported = [e.name for e in DocumentType if e != DocumentType.UNKNOWN]
return root_tag == "" or root_tag in supported
root_tag = DocumentValidator.get_document_root_tag(document)
if root_tag is not None:
return root_tag == "" or root_tag in SUPPORTED_ROOT_TAGS
return False

@classmethod
def is_tool_document(cls, document: Document) -> bool:
"""Checks if the document's root element is <tool>."""
root = DocumentValidator._get_document_root_tag(document)
root = DocumentValidator.get_document_root_tag(document)
if root is not None:
root_tag = root.upper()
return root_tag == DocumentType.TOOL.name
Expand All @@ -39,11 +40,11 @@ def is_empty_document(cls, document: Document) -> bool:
return not document.source or document.source.isspace()

@classmethod
def _get_document_root_tag(cls, document: Document) -> Optional[str]:
def get_document_root_tag(cls, document: Document) -> Optional[str]:
"""Checks the first MAX_PEEK_CONTENT characters of the document for a root tag and
returns the name of the tag if found."""
content_peek = document.source[:MAX_PEEK_CONTENT]
match = re.match(TAG_REGEX, content_peek)
match = re.search(TAG_REGEX, content_peek)
if match:
group = match.group(TAG_GROUP_NAME)
return group
Expand Down

0 comments on commit 1ad7558

Please sign in to comment.