Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

πŸ› πŸ’£ Introduce Scoring Module #19

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions nlpurify/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,11 @@

# ? package follows https://peps.python.org/pep-0440/
# ? https://python-semver.readthedocs.io/en/latest/advanced/convert-pypi-to-semver.html
__version__ = "v2.0.0.a0"
__version__ = "v2.1.0.dev0"

# init-time options registrations
from nlpurify import fuzzy
from nlpurify.scoring import fuzzy
from nlpurify.scoring import regexp

from nlpurify.feature import (
selection as feature_selection
Expand Down
13 changes: 13 additions & 0 deletions nlpurify/scoring/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# -*- encoding: utf-8 -*-

"""
The module provides scoring methods for text matching using different
modules like fuzzywuzzy, regular expressions, etc.
"""

from nlpurify.scoring.fuzzy import (
fuzzy_score,
LogicalFuzzy
)

from nlpurify.scoring.regexp import LogicalRegexp
58 changes: 58 additions & 0 deletions nlpurify/scoring/baseclass.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
# -*- encoding: utf-8 -*-

from typing import List, Iterable
from abc import ABC, abstractmethod

class BaseLogicalOperator(ABC):
def __init__(self, string : str, *references : List[str]) -> None:
self.string = string

# list of any n-reference strings for fuzzy scoring
self.references = references


@abstractmethod
def scores(self, *args, **kwargs) -> Iterable[float]:
"""
The Abstract Class Defination for Calculation of Individual Scores

The individual scores are calculated in this function in the
child methods and the abstract method is used as a placeholder.
The abstract method ensures that all the child class use the
same naming convention and thus can now safely invoke the
``evaluate`` method from the child class.
"""

pass


def evaluate(self, thresh : int, logic : str, operator : str = ">="):
"""
Evaluate the Final Score using Logical Operators

The operator like :attr:`>=`, :attr:`<=` is dynamic and thus
provides additional controls to the logical operations as it
can now be used to efficiently negate both the side of the
curve for sequence matching. The operator is used internally
and is evaluated using the :func:`eval()` to determine the
final result and provide the score.

..versionchanged:: v2.0.0 gh#19 gh#18 gh#20
The function ``scores()`` is now used to calculate the
individual scores and the function ``evaluate()`` is now used
to evaluate the final score using logical operators. The
method is now available as a abstract method in the baseclass
and returns an iterable values.

:type logic: str
:param logic: The logical operator which is either :attr:`all`
i.e., and condition and :attr:`any` which is or condition.

:type operator: str
:param operator: The deterministic operator which can be used
to efficiently control both the side of the curve for
fuzzy scoring.
"""

scores = self.scores()
return eval(f"{logic}([score {operator} {thresh} for score in {scores}])")
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,5 @@
based on similarity, and thus extending the logic to cluster, etc.
"""

from nlpurify.fuzzy.wrapper import fuzzy_score # noqa: F401, F403 # pyright: ignore[reportMissingImports]
from nlpurify.fuzzy.logical import LogicalFuzzy # noqa: F401, F403 # pyright: ignore[reportMissingImports]
from nlpurify.scoring.fuzzy.wrapper import fuzzy_score # noqa: F401, F403 # pyright: ignore[reportMissingImports]
from nlpurify.scoring.fuzzy.logical import LogicalFuzzy # noqa: F401, F403 # pyright: ignore[reportMissingImports]
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,12 @@
and/or conditional statements for the end user.
"""

from typing import List
from nlpurify.fuzzy.wrapper import fuzzy_score
from typing import Iterable, List

class LogicalFuzzy:
from nlpurify.scoring.fuzzy.wrapper import fuzzy_score
from nlpurify.scoring.baseclass import BaseLogicalOperator

class LogicalFuzzy(BaseLogicalOperator):
"""
The Logical Fuzzy is an Extension of Scoring for Logical Operation

Expand Down Expand Up @@ -40,7 +42,7 @@ class LogicalFuzzy:
logical_fuzzy = nlpurify.fuzzy.LogicalFuzzy(statement, "quick", "foxy")

# let's check the individual score of `quick` and `foxy` against statement
print(logical_fuzzy.fuzzy_scores())
print(logical_fuzzy.scores())
>> [100, 75]

# code to check if any of the value is <= 80
Expand All @@ -58,17 +60,14 @@ def __init__(
*references : List[str],
method : str = "partial_ratio"
) -> None:
self.string = string

# list of any n-reference strings for fuzzy scoring
self.references = references
super().__init__(string, *references)

# mandatory keyword arguments which determines the fuzzy
# the method is any of the supported argument of the fuzzy
self.method = method


def fuzzy_scores(self) -> list:
def scores(self) -> Iterable[float]:
"""
Calculate Fuzzy Score of Each Reference to Statement

Expand All @@ -81,28 +80,3 @@ def fuzzy_scores(self) -> list:
fuzzy_score(self.string, reference, self.method)
for reference in self.references
]


def evaluate(self, thresh : int, logic : str, operator : str = ">="):
"""
Evaluate the Final Score using Operators

The operator like :attr:`>=`, :attr:`<=` is dynamic and thus
provides additional controls to the logical operations as it
can now be used to efficiently negate both the side of the
curve for sequence matching. The operator is used internally
and is evaluated using the :func:`eval()` to determine the
final result and provide the score.

:type logic: str
:param logic: The logical operator which is either :attr:`all`
i.e., and condition and :attr:`any` which is or condition.

:type operator: str
:param operator: The deterministic operator which can be used
to efficiently control both the side of the curve for
fuzzy scoring.
"""

scores = self.fuzzy_scores()
return eval(f"{logic}([score {operator} {thresh} for score in {scores}])")
File renamed without changes.
11 changes: 11 additions & 0 deletions nlpurify/scoring/regexp/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# -*- encoding: utf-8 -*-

"""
Regular expressions are often very powerful to classify texts and
score them based on wild card patterns as allowed under the linux
shell ``grep`` command. The module extends its ability to use
multiple expressions and bound them under a logical operator to find
sequence and score the same.
"""

from nlpurify.scoring.regexp.logical import LogicalRegexp
55 changes: 55 additions & 0 deletions nlpurify/scoring/regexp/logical.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
# -*- encoding: utf-8 -*-

"""
Creates an Extension to Perform Logical Operations

Logical operations like ``and`` or ``or`` can be implemented with the
module. This reduces manual intervention like using repeated for-loop
and/or conditional statements for the end user.
"""

import re

from typing import List, Iterable

from nlpurify.scoring.baseclass import BaseLogicalOperator

class LogicalRegexp(BaseLogicalOperator):
"""
An Extension to Perform Logical Operations for Regular Expression

Regular expression are handy when searching for keywords in a
sequence of text. This object combines multiple regular expressions
and bound them under a logical operator to find sequence and score
the same.

:type string: str
:param string: The original string against which the reference
values are to be checked and validated.

:type references: list
:param references: A list of n-references against which fuzzy
score is determined. The score is also a n-length array.
"""

def __init__(self, string : str, *references : List[str]) -> None:
super().__init__(string, *references)


def scores(self) -> Iterable[float]:
"""
Finds if the Expression is Found in the Statement

Given n-references to search in the statement, the function,
and returns ``100`` or ``0`` for each reference, for boolean
values as scores.

Caveat:: the score is either ``100`` if ``True`` else ``0``
based on the regexp pattern matching.
"""

found = [
re.findall(pattern, self.string) for pattern in self.references
]

return [ 100 if li else 0 for li in found ]