Skip to content

Commit

Permalink
🐛 bugfix names, return identifier, parameter name conflict
Browse files Browse the repository at this point in the history
  • Loading branch information
ZenithClown committed Nov 6, 2024
1 parent ee23a14 commit 65eeb73
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 10 deletions.
14 changes: 5 additions & 9 deletions nlpurify/feature/selection/nltk.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,7 @@ def tokenize_text(text : str, regexp : bool = False, vanilla : bool = False, **k
return tokens[tokenize_method]


def remove_stopwords(text : str, language : str = "english", rtype : object = str, **kwargs) -> Union[str, list]:
def remove_stopwords(text : str, language : str = "english", rtype : object = str, **kwargs) -> str | list:
"""
Function to Remove Stopwods from a Raw Text using NLTK
Expand Down Expand Up @@ -294,20 +294,16 @@ def remove_stopwords(text : str, language : str = "english", rtype : object = st
features from a given text message.
"""

tokenize = kwargs.get("tokenize", True)
normalize = kwargs.get("normalize", True)
tokenize_ = kwargs.get("tokenize", True)
normalize_ = kwargs.get("normalize", True)

stopwords_ = stopwords.words(language) # defaults to english

# ? normalize the text using nlpurify.normalizeText()
# else, left at user's discreations or additional functionalities
text = normalize(
text,
uniform_text_case = "lower",
strip_line_breaks = True
) if normalize else text
text = normalize(text, **kwargs) if normalize_ else text

tokens = tokenize_text(text, **kwargs) if tokenize else text
tokens = tokenize_text(text, **kwargs) if tokenize_ else text
tokens = [word for word in tokens if word not in stopwords_]

# ensure return type of the data, else raise error
Expand Down
2 changes: 1 addition & 1 deletion nlpurify/fuzzy/logical.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,5 +103,5 @@ def evaluate(self, thresh : int, logic : str, operator : str = ">="):
fuzzy scoring.
"""

scores = self._fuzzy_score_()
scores = self.fuzzy_scores()
return eval(f"{logic}([score {operator} {thresh} for score in {scores}])")

0 comments on commit 65eeb73

Please sign in to comment.