increase test coverage to 100

findsimilar · Jul 20, 2024 · 72bd65a · 72bd65a
1 parent 96d246e
commit 72bd65a
Show file tree

Hide file tree

Showing 3 changed files with 48 additions and 3 deletions.
diff --git a/Makefile b/Makefile
@@ -2,7 +2,7 @@ test:
 	pytest
 
 coverage:
-	pytest -s --cov --cov-report html --cov-fail-under 97
+	pytest -s --cov --cov-report html --cov-fail-under 100
 
 yamllint:
 	yamllint -d relaxed .

diff --git a/find_similar/tokenize.py b/find_similar/tokenize.py
@@ -101,11 +101,11 @@ def split_text_and_digits(text):
     :param text: enter text
     :return: list of separated texts
     """
-    regex = r"^\D+[0]\D+$"
+    regex = r"^\D+[0]\D+$"  # so0os
     match = re.search(regex, text, re.MULTILINE)
     if match:
         return [text]
-    # Проверяем на вольты и амперы
+    # Проверяем на вольты и амперы 55В -> 55 v
     regex = r"\d+[.]?\d?[в|а|В|А|B|A|a]{1}$"
     match = re.search(regex, text, re.MULTILINE)
     if match:

diff --git a/testing/test_algorithm/test_tokenize.py b/testing/test_algorithm/test_tokenize.py
@@ -1,6 +1,7 @@
 """
 Tests for tokenize
 """
+from unittest import mock
 import pytest
 from find_similar.calc_models import LanguageNotFoundException
 from find_similar.tokenize import (
@@ -15,6 +16,7 @@
     HashebleSet,
     replace_yio,
     add_nltk_stopwords,
+    get_stopwords_from_nltk,
 )
 
 
@@ -47,6 +49,23 @@ def test_split_text_and_digits():
     assert result == ["1", "some", "2", "string", "5", "with", "9"]
 
 
+def test_split_text_and_digits_match():
+    """
+    Test split_text_and_digits when we use regex
+    """
+    input_str = "Voltage 55В"
+    result = split_text_and_digits(input_str)
+    assert result == ["55", "v"]
+
+
+def test_split_text_and_digits_other_match():
+    """
+    Test split_text_and_digits with first regex match ^\\D+[0]\\D+$
+    """
+    input_str = "so0os"
+    assert split_text_and_digits(input_str) == [input_str]
+
+
 def test_get_normal_form():
     """
     Test get_normal_form
@@ -161,3 +180,29 @@ def test_remove_or_not_stopwords():
     assert tokenize(text, "russian", remove_stopwords=True) == result
     result = {"что", "я", "о", "круг"}
     assert tokenize(text, "russian", remove_stopwords=False) == result
+
+
+def test_get_stopwords_from_nltk_lookup_error():
+    """
+    Test get_stopwords_from_nltk when LookupError raised
+    """
+
+    class MockStopwords:
+        """
+        Mock class for nltk.corpus.stopwords
+        """
+        def words(self, *args, **kwargs):
+            """
+            mock function to words
+            """
+            raise LookupError
+
+    def mock_download(*args, **kwargs):  # pylint:disable=unused-argument
+        """
+        Mock function for nltk.download
+        """
+
+    with mock.patch('find_similar.tokenize.stopwords', MockStopwords()):
+        with mock.patch('nltk.download', mock_download):
+            with pytest.raises(LookupError):
+                get_stopwords_from_nltk('english')