Skip to content

Commit

Permalink
increase test coverage to 100
Browse files Browse the repository at this point in the history
  • Loading branch information
quillcraftsman committed Jul 20, 2024
1 parent 96d246e commit 72bd65a
Show file tree
Hide file tree
Showing 3 changed files with 48 additions and 3 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ test:
pytest

coverage:
pytest -s --cov --cov-report html --cov-fail-under 97
pytest -s --cov --cov-report html --cov-fail-under 100

yamllint:
yamllint -d relaxed .
Expand Down
4 changes: 2 additions & 2 deletions find_similar/tokenize.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,11 +101,11 @@ def split_text_and_digits(text):
:param text: enter text
:return: list of separated texts
"""
regex = r"^\D+[0]\D+$"
regex = r"^\D+[0]\D+$" # so0os
match = re.search(regex, text, re.MULTILINE)
if match:
return [text]
# Проверяем на вольты и амперы
# Проверяем на вольты и амперы 55В -> 55 v
regex = r"\d+[.]?\d?[в|а|В|А|B|A|a]{1}$"
match = re.search(regex, text, re.MULTILINE)
if match:
Expand Down
45 changes: 45 additions & 0 deletions testing/test_algorithm/test_tokenize.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Tests for tokenize
"""
from unittest import mock
import pytest
from find_similar.calc_models import LanguageNotFoundException
from find_similar.tokenize import (
Expand All @@ -15,6 +16,7 @@
HashebleSet,
replace_yio,
add_nltk_stopwords,
get_stopwords_from_nltk,
)


Expand Down Expand Up @@ -47,6 +49,23 @@ def test_split_text_and_digits():
assert result == ["1", "some", "2", "string", "5", "with", "9"]


def test_split_text_and_digits_match():
"""
Test split_text_and_digits when we use regex
"""
input_str = "Voltage 55В"
result = split_text_and_digits(input_str)
assert result == ["55", "v"]


def test_split_text_and_digits_other_match():
"""
Test split_text_and_digits with first regex match ^\\D+[0]\\D+$
"""
input_str = "so0os"
assert split_text_and_digits(input_str) == [input_str]


def test_get_normal_form():
"""
Test get_normal_form
Expand Down Expand Up @@ -161,3 +180,29 @@ def test_remove_or_not_stopwords():
assert tokenize(text, "russian", remove_stopwords=True) == result
result = {"что", "я", "о", "круг"}
assert tokenize(text, "russian", remove_stopwords=False) == result


def test_get_stopwords_from_nltk_lookup_error():
"""
Test get_stopwords_from_nltk when LookupError raised
"""

class MockStopwords:
"""
Mock class for nltk.corpus.stopwords
"""
def words(self, *args, **kwargs):
"""
mock function to words
"""
raise LookupError

def mock_download(*args, **kwargs): # pylint:disable=unused-argument
"""
Mock function for nltk.download
"""

with mock.patch('find_similar.tokenize.stopwords', MockStopwords()):
with mock.patch('nltk.download', mock_download):
with pytest.raises(LookupError):
get_stopwords_from_nltk('english')

0 comments on commit 72bd65a

Please sign in to comment.