Skip to content

Commit

Permalink
🎉 add scoring block for logical regular expressions
Browse files Browse the repository at this point in the history
  • Loading branch information
ZenithClown committed Dec 2, 2024
1 parent aab7246 commit ff6da6e
Show file tree
Hide file tree
Showing 4 changed files with 43 additions and 0 deletions.
1 change: 1 addition & 0 deletions nlpurify/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

# init-time options registrations
from nlpurify.scoring import fuzzy
from nlpurify.scoring import regexp

from nlpurify.feature import (
selection as feature_selection
Expand Down
2 changes: 2 additions & 0 deletions nlpurify/scoring/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,5 @@
fuzzy_score,
LogicalFuzzy
)

from nlpurify.scoring.regexp import LogicalRegexp
2 changes: 2 additions & 0 deletions nlpurify/scoring/regexp/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,5 @@
multiple expressions and bound them under a logical operator to find
sequence and score the same.
"""

from nlpurify.scoring.regexp.logical import LogicalRegexp
38 changes: 38 additions & 0 deletions nlpurify/scoring/regexp/logical.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,48 @@
and/or conditional statements for the end user.
"""

import re

from typing import List, Iterable

from nlpurify.scoring.baseclass import BaseLogicalOperator

class LogicalRegexp(BaseLogicalOperator):
"""
An Extension to Perform Logical Operations for Regular Expression
Regular expression are handy when searching for keywords in a
sequence of text. This object combines multiple regular expressions
and bound them under a logical operator to find sequence and score
the same.
:type string: str
:param string: The original string against which the reference
values are to be checked and validated.
:type references: list
:param references: A list of n-references against which fuzzy
score is determined. The score is also a n-length array.
"""

def __init__(self, string : str, *references : List[str]) -> None:
super().__init__(string, *references)


def scores(self) -> Iterable[float]:
"""
Finds if the Expression is Found in the Statement
Given n-references to search in the statement, the function,
and returns ``100`` or ``0`` for each reference, for boolean
values as scores.
Caveat:: the score is either ``100`` if ``True`` else ``0``
based on the regexp pattern matching.
"""

found = [
re.findall(pattern, self.string) for pattern in self.references
]

return [ 100 if li else 0 for li in found ]

0 comments on commit ff6da6e

Please sign in to comment.