From 7b5f10940d912589d34c30ad75e92ca979af1a70 Mon Sep 17 00:00:00 2001 From: Tyler Barrus Date: Sat, 15 Jan 2022 11:59:35 -0500 Subject: [PATCH] update word_probability deprication warning --- spellchecker/spellchecker.py | 4 +- tests/__init__.py | 2 +- tests/spellchecker_test.py | 461 ++++++++++++++++++++--------------- 3 files changed, 261 insertions(+), 206 deletions(-) diff --git a/spellchecker/spellchecker.py b/spellchecker/spellchecker.py index 4b61b39..ed6f9e6 100644 --- a/spellchecker/spellchecker.py +++ b/spellchecker/spellchecker.py @@ -155,7 +155,7 @@ def word_usage_frequency(self, word: KeyT, total_words: typing.Optional[int] = N word = ensure_unicode(word) return self._word_frequency.dictionary[word] / total_words - @deprecated("Deprecated as of version 0.6.1; use word_usage_frequency instead") + @deprecated("Deprecated as of version 0.6.5; use word_usage_frequency instead") def word_probability(self, word: KeyT, total_words: typing.Optional[int] = None) -> float: """Calculate the frequency to the `word` provided as seen across the entire dictionary; function was a misnomar and is therefore @@ -169,7 +169,7 @@ def word_probability(self, word: KeyT, total_words: typing.Optional[int] = None) Note: Deprecated as of version 0.6.1; use `word_usage_frequency` instead Note: - Will be removed in version 0.6.3""" + Will be removed in version 0.6.4""" return self.word_usage_frequency(word, total_words) def correction(self, word: KeyT) -> str: diff --git a/tests/__init__.py b/tests/__init__.py index 0ed4c3c..a381faf 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -1 +1 @@ -''' Testing Module ''' +""" Testing Module """ diff --git a/tests/spellchecker_test.py b/tests/spellchecker_test.py index aba0100..3930bfb 100644 --- a/tests/spellchecker_test.py +++ b/tests/spellchecker_test.py @@ -1,5 +1,5 @@ -#coding=UTF-8 -''' Unittest class ''' +# coding=UTF-8 +""" Unittest class """ import unittest import os @@ -7,197 +7,250 @@ from spellchecker import SpellChecker from spellchecker.utils import fail_after + class TestSpellChecker(unittest.TestCase): - ''' test the spell checker class ''' + """test the spell checker class""" def test_correction(self): - ''' test spell checker corrections ''' + """test spell checker corrections""" spell = SpellChecker() - self.assertEqual(spell.correction('ths'), 'the') - self.assertEqual(spell.correction('ergo'), 'ergo') + self.assertEqual(spell.correction("ths"), "the") + self.assertEqual(spell.correction("ergo"), "ergo") # self.assertEqual(spell.correction('alot'), 'a lot') - self.assertEqual(spell.correction('this'), 'this') - self.assertEqual(spell.correction('-'), '-') - self.assertEqual(spell.correction('1213'), '1213') - self.assertEqual(spell.correction('1213.9'), '1213.9') + self.assertEqual(spell.correction("this"), "this") + self.assertEqual(spell.correction("-"), "-") + self.assertEqual(spell.correction("1213"), "1213") + self.assertEqual(spell.correction("1213.9"), "1213.9") def test_candidates(self): - ''' test spell checker candidates ''' + """test spell checker candidates""" spell = SpellChecker() - cands = {'tes', 'thas', 'tis', 'thse', 'thes', 'thus', 'ohs', 'thu', - 'thy', 'thi', 'tas', 'tus', 'thos', 'ahs', 'tho', 'tha', - 'thsi', 'tos', 'the', 'this', 'iths'} - self.assertEqual(spell.candidates('ths'), cands) - self.assertEqual(spell.candidates('the'), {'the'}) - self.assertEqual(spell.candidates('-'), {'-'}) + cands = { + "tes", + "thas", + "tis", + "thse", + "thes", + "thus", + "ohs", + "thu", + "thy", + "thi", + "tas", + "tus", + "thos", + "ahs", + "tho", + "tha", + "thsi", + "tos", + "the", + "this", + "iths", + } + self.assertEqual(spell.candidates("ths"), cands) + self.assertEqual(spell.candidates("the"), {"the"}) + self.assertEqual(spell.candidates("-"), {"-"}) # something that cannot exist... should return just the same element... - self.assertEqual(spell.candidates('manasaeds'), {'manasaeds'}) + self.assertEqual(spell.candidates("manasaeds"), {"manasaeds"}) def test_words(self): - ''' test the parsing of words ''' + """test the parsing of words""" spell = SpellChecker() - res = ['This', 'is', 'a', 'test', 'of', 'this'] - self.assertEqual(spell.split_words('This is a test of this'), res) + res = ["This", "is", "a", "test", "of", "this"] + self.assertEqual(spell.split_words("This is a test of this"), res) def test_words_more_complete(self): - ''' test the parsing of words ''' + """test the parsing of words""" spell = SpellChecker() - res = ['This', 'is', 'a', 'test', 'of', 'the', 'word', 'parser', 'It', 'should', 'work', 'correctly'] - self.assertEqual(spell.split_words('This is a test of the word parser. It should work correctly!!!'), res) + res = ["This", "is", "a", "test", "of", "the", "word", "parser", "It", "should", "work", "correctly"] + self.assertEqual(spell.split_words("This is a test of the word parser. It should work correctly!!!"), res) def test_word_frequency(self): - ''' test word frequency ''' + """test word frequency""" spell = SpellChecker() # if the default load changes so will this... - self.assertEqual(spell.word_frequency['the'], 76138318) + self.assertEqual(spell.word_frequency["the"], 76138318) def test_word_usage_frequency(self): - ''' test the word usage frequency calculation ''' + """test the word usage frequency calculation""" spell = SpellChecker() # if the default load changes so will this... - num = spell.word_frequency['the'] + num = spell.word_frequency["the"] denom = spell.word_frequency.total_words - self.assertEqual(spell.word_usage_frequency('the'), num / denom) + self.assertEqual(spell.word_usage_frequency("the"), num / denom) # deprecated! @fail_after("0.6.4") def test_word_probability_calc(self): - ''' test the word probability calculation ''' + """test the word probability calculation""" spell = SpellChecker() # if the default load changes so will this... - num = spell.word_frequency['the'] + num = spell.word_frequency["the"] denom = spell.word_frequency.total_words - self.assertEqual(spell.word_probability('the'), num / denom) + self.assertEqual(spell.word_probability("the"), num / denom) def test_word_known(self): - ''' test if the word is a `known` word or not ''' + """test if the word is a `known` word or not""" spell = SpellChecker() - self.assertEqual(spell.known(['this']), {'this'}) - self.assertEqual(spell.known(['sherlock']), {'sherlock'}) - self.assertEqual(spell.known(['holmes']), {'holmes'}) - self.assertEqual(spell.known(['known']), {'known'}) + self.assertEqual(spell.known(["this"]), {"this"}) + self.assertEqual(spell.known(["sherlock"]), {"sherlock"}) + self.assertEqual(spell.known(["holmes"]), {"holmes"}) + self.assertEqual(spell.known(["known"]), {"known"}) - self.assertEqual(spell.known(['-']), set()) - self.assertEqual(spell.known(['foobar']), set()) - self.assertEqual(spell.known(['ths']), set()) - self.assertEqual(spell.known(['ergos']), set()) + self.assertEqual(spell.known(["-"]), set()) + self.assertEqual(spell.known(["foobar"]), set()) + self.assertEqual(spell.known(["ths"]), set()) + self.assertEqual(spell.known(["ergos"]), set()) def test_unknown_words(self): - ''' test the unknown word functionality ''' + """test the unknown word functionality""" spell = SpellChecker() - self.assertEqual(spell.unknown(['this']), set()) - self.assertEqual(spell.unknown(['sherlock']), set()) - self.assertEqual(spell.unknown(['holmes']), set()) - self.assertEqual(spell.unknown(['known']), set()) - self.assertEqual(spell.unknown(['-']), set()) + self.assertEqual(spell.unknown(["this"]), set()) + self.assertEqual(spell.unknown(["sherlock"]), set()) + self.assertEqual(spell.unknown(["holmes"]), set()) + self.assertEqual(spell.unknown(["known"]), set()) + self.assertEqual(spell.unknown(["-"]), set()) - self.assertEqual(spell.unknown(['foobar']), {'foobar'}) - self.assertEqual(spell.unknown(['ths']), {'ths'}) - self.assertEqual(spell.unknown(['ergos']), {'ergos'}) + self.assertEqual(spell.unknown(["foobar"]), {"foobar"}) + self.assertEqual(spell.unknown(["ths"]), {"ths"}) + self.assertEqual(spell.unknown(["ergos"]), {"ergos"}) def test_word_in(self): - ''' test the use of the `in` operator ''' + """test the use of the `in` operator""" spell = SpellChecker() - self.assertTrue('key' in spell) - self.assertFalse('wantthis' in spell) # a known excluded word + self.assertTrue("key" in spell) + self.assertFalse("wantthis" in spell) # a known excluded word def test_word_contains(self): - ''' test the contains functionality ''' + """test the contains functionality""" spell = SpellChecker() - self.assertEqual(spell['the'], 76138318) + self.assertEqual(spell["the"], 76138318) def test_spanish_dict(self): - ''' test loading in the spanish dictionary ''' - spell = SpellChecker(language='es') - res = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', - 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', - 'á','é', 'í', 'ñ', 'ó', 'ú', 'ü'] + """test loading in the spanish dictionary""" + spell = SpellChecker(language="es") + res = [ + "a", + "b", + "c", + "d", + "e", + "f", + "g", + "h", + "i", + "j", + "k", + "l", + "m", + "n", + "o", + "p", + "q", + "r", + "s", + "t", + "u", + "v", + "w", + "x", + "y", + "z", + "á", + "é", + "í", + "ñ", + "ó", + "ú", + "ü", + ] self.assertEqual(sorted(list(spell.word_frequency.letters)), res) - self.assertTrue('mañana' in spell) - self.assertEqual(spell['que'], 37141781) + self.assertTrue("mañana" in spell) + self.assertEqual(spell["que"], 37141781) def test_multiple_dicts(self): - ''' test loading in multiple dictionaries ''' - es = SpellChecker(language='es') - self.assertEqual(es['a'], 25517457) - en = SpellChecker(language='en') - self.assertEqual(en['a'], 48779620) - spell = SpellChecker(language=['en', 'es']) - self.assertEqual(spell['a'], 74297077) # 48779620 + 25517457 + """test loading in multiple dictionaries""" + es = SpellChecker(language="es") + self.assertEqual(es["a"], 25517457) + en = SpellChecker(language="en") + self.assertEqual(en["a"], 48779620) + spell = SpellChecker(language=["en", "es"]) + self.assertEqual(spell["a"], 74297077) # 48779620 + 25517457 def test_missing_dictionary(self): try: - SpellChecker(language='no') + SpellChecker(language="no") except ValueError as ex: - msg = 'The provided dictionary language (no) does not exist!' + msg = "The provided dictionary language (no) does not exist!" self.assertEqual(str(ex), msg) def test_load_external_dictionary(self): - ''' test loading a local dictionary ''' + """test loading a local dictionary""" here = os.path.dirname(__file__) - filepath = '{}/resources/small_dictionary.json'.format(here) + filepath = "{}/resources/small_dictionary.json".format(here) spell = SpellChecker(language=None, local_dictionary=filepath) - self.assertEqual(spell['a'], 1) - self.assertTrue('apple' in spell) + self.assertEqual(spell["a"], 1) + self.assertTrue("apple" in spell) def test_edit_distance_one(self): - ''' test a case where edit distance must be one ''' + """test a case where edit distance must be one""" here = os.path.dirname(__file__) - filepath = '{}/resources/small_dictionary.json'.format(here) + filepath = "{}/resources/small_dictionary.json".format(here) spell = SpellChecker(language=None, local_dictionary=filepath, distance=1) - self.assertEqual(spell.candidates('hike'), {'bike'}) + self.assertEqual(spell.candidates("hike"), {"bike"}) def test_edit_distance_two(self): - ''' test a case where edit distance must be two ''' + """test a case where edit distance must be two""" here = os.path.dirname(__file__) - filepath = '{}/resources/small_dictionary.json'.format(here) + filepath = "{}/resources/small_dictionary.json".format(here) spell = SpellChecker(language=None, local_dictionary=filepath) - self.assertEqual(spell.candidates('ale'), {'a', 'apple'}) + self.assertEqual(spell.candidates("ale"), {"a", "apple"}) def test_edit_distance_one_property(self): - ''' check the property setting of the distance property ''' + """check the property setting of the distance property""" spell = SpellChecker(distance=1) self.assertEqual(spell.distance, 1) spell.distance = 2 self.assertEqual(spell.distance, 2) def test_edit_distance_invalud(self): - ''' check the property setting of the distance property on invalid inputs ''' + """check the property setting of the distance property on invalid inputs""" spell = SpellChecker(distance=None) self.assertEqual(spell.distance, 2) spell.distance = 1 self.assertEqual(spell.distance, 1) - spell.distance = 'string' + spell.distance = "string" self.assertEqual(spell.distance, 2) def test_load_text_file(self): - ''' test loading a text file ''' + """test loading a text file""" here = os.path.dirname(__file__) - filepath = '{}/resources/small_doc.txt'.format(here) + filepath = "{}/resources/small_doc.txt".format(here) spell = SpellChecker(language=None) # just from this doc! spell.word_frequency.load_text_file(filepath) - self.assertEqual(spell['a'], 3) - self.assertEqual(spell['storm'], 2) - self.assertFalse('awesome' in spell) - self.assertTrue(spell['whale']) - self.assertTrue('waves' in spell) + self.assertEqual(spell["a"], 3) + self.assertEqual(spell["storm"], 2) + self.assertFalse("awesome" in spell) + self.assertTrue(spell["whale"]) + self.assertTrue("waves" in spell) def test_remove_words(self): - ''' test is a word is removed ''' + """test is a word is removed""" spell = SpellChecker() - self.assertEqual(spell['the'], 76138318) - spell.word_frequency.remove_words(['the']) - self.assertEqual(spell['the'], 0) + self.assertEqual(spell["the"], 76138318) + spell.word_frequency.remove_words(["the"]) + self.assertEqual(spell["the"], 0) def test_remove_word(self): - ''' test a single word removed ''' + """test a single word removed""" spell = SpellChecker() - self.assertEqual(spell['the'], 76138318) - spell.word_frequency.remove('the') - self.assertEqual(spell['the'], 0) + self.assertEqual(spell["the"], 76138318) + spell.word_frequency.remove("the") + self.assertEqual(spell["the"], 0) def test_remove_by_threshold(self): - ''' test removing everything below a certain threshold ''' + """test removing everything below a certain threshold""" spell = SpellChecker() cnt = 0 for key in spell.word_frequency.keys(): @@ -212,7 +265,7 @@ def test_remove_by_threshold(self): self.assertEqual(cnt, 0) def test_remove_by_threshold_using_items(self): - ''' test removing everything below a certain threshold; using items to test ''' + """test removing everything below a certain threshold; using items to test""" spell = SpellChecker() cnt = 0 for _, val in spell.word_frequency.items(): @@ -227,144 +280,144 @@ def test_remove_by_threshold_using_items(self): self.assertEqual(cnt, 0) def test_add_word(self): - ''' test adding a word ''' + """test adding a word""" spell = SpellChecker() - self.assertEqual(spell['appt'], 0) - spell.word_frequency.add('appt') - self.assertEqual(spell['appt'], 1) + self.assertEqual(spell["appt"], 0) + spell.word_frequency.add("appt") + self.assertEqual(spell["appt"], 1) def test_checking_odd_word(self): - ''' test checking a word that is really a number ''' + """test checking a word that is really a number""" spell = SpellChecker() - self.assertEqual(spell.edit_distance_1('12345'), {'12345'}) + self.assertEqual(spell.edit_distance_1("12345"), {"12345"}) def test_unique_words(self): - ''' test the unique word count ''' + """test the unique word count""" spell = SpellChecker() self.assertEqual(spell.word_frequency.unique_words, len(list(spell.word_frequency.keys()))) def test_import_export_json(self): - ''' test the export functionality as json ''' + """test the export functionality as json""" here = os.path.dirname(__file__) - filepath = '{}/resources/small_dictionary.json'.format(here) + filepath = "{}/resources/small_dictionary.json".format(here) spell = SpellChecker(language=None, local_dictionary=filepath) - spell.word_frequency.add('meh') - new_filepath = '{}/resources/small_dictionary_new.json'.format(here) + spell.word_frequency.add("meh") + new_filepath = "{}/resources/small_dictionary_new.json".format(here) spell.export(new_filepath, gzipped=False) sp = SpellChecker(language=None, local_dictionary=new_filepath) - self.assertTrue('meh' in sp) - self.assertFalse('bananna' in sp) + self.assertTrue("meh" in sp) + self.assertFalse("bananna" in sp) os.remove(new_filepath) def test_import_export_gzip(self): - ''' test the export functionality as gzip ''' + """test the export functionality as gzip""" here = os.path.dirname(__file__) - filepath = '{}/resources/small_dictionary.json'.format(here) + filepath = "{}/resources/small_dictionary.json".format(here) spell = SpellChecker(language=None, local_dictionary=filepath) - spell.word_frequency.add('meh') - new_filepath = '{}/resources/small_dictionary_new.json.gz'.format(here) + spell.word_frequency.add("meh") + new_filepath = "{}/resources/small_dictionary_new.json.gz".format(here) spell.export(new_filepath, gzipped=True) sp = SpellChecker(language=None, local_dictionary=new_filepath) - self.assertTrue('meh' in sp) - self.assertFalse('bananna' in sp) + self.assertTrue("meh" in sp) + self.assertFalse("bananna" in sp) os.remove(new_filepath) def test_capitalization_when_case_sensitive_defaults_to_false(self): - ''' test that capitalization doesn't affect in comparisons ''' + """test that capitalization doesn't affect in comparisons""" spell = SpellChecker(language=None) - spell.word_frequency.add('Bob') - spell.word_frequency.add('Bob') - spell.word_frequency.add('Bab') - self.assertEqual('Bob' in spell, True) - self.assertEqual('BOb' in spell, True) - self.assertEqual('BOB' in spell, True) - self.assertEqual('bob' in spell, True) + spell.word_frequency.add("Bob") + spell.word_frequency.add("Bob") + spell.word_frequency.add("Bab") + self.assertEqual("Bob" in spell, True) + self.assertEqual("BOb" in spell, True) + self.assertEqual("BOB" in spell, True) + self.assertEqual("bob" in spell, True) - words = ['Bb', 'bb', 'BB'] - self.assertEqual(spell.unknown(words), {'bb'}) + words = ["Bb", "bb", "BB"] + self.assertEqual(spell.unknown(words), {"bb"}) - known_words = ['BOB', 'bOb'] - self.assertEqual(spell.known(known_words), {'bob'}) + known_words = ["BOB", "bOb"] + self.assertEqual(spell.known(known_words), {"bob"}) - self.assertEqual(spell.candidates('BB'), {'bob', 'bab'}) - self.assertEqual(spell.correction('BB'), 'bob') + self.assertEqual(spell.candidates("BB"), {"bob", "bab"}) + self.assertEqual(spell.correction("BB"), "bob") def test_large_words(self): - ''' test checking for words that are clearly larger than the largest dictionary word ''' + """test checking for words that are clearly larger than the largest dictionary word""" spell = SpellChecker(language=None, distance=2) - spell.word_frequency.add('Bob') + spell.word_frequency.add("Bob") - words = ['Bb', 'bb', 'BB'] - self.assertEqual(spell.unknown(words), {'bb'}) + words = ["Bb", "bb", "BB"] + self.assertEqual(spell.unknown(words), {"bb"}) - known_words = ['BOB', 'bOb'] - self.assertEqual(spell.known(known_words), {'bob'}) + known_words = ["BOB", "bOb"] + self.assertEqual(spell.known(known_words), {"bob"}) - self.assertEqual(spell.correction('bobs'), 'bob') - self.assertEqual(spell.correction('bobb'), 'bob') - self.assertEqual(spell.correction('bobby'), 'bob') + self.assertEqual(spell.correction("bobs"), "bob") + self.assertEqual(spell.correction("bobb"), "bob") + self.assertEqual(spell.correction("bobby"), "bob") self.assertEqual(spell.word_frequency.longest_word_length, 3) - self.assertEqual(spell.correction('bobbys'), 'bobbys') + self.assertEqual(spell.correction("bobbys"), "bobbys") def test_extremely_large_words(self): - ''' test when a word is just extreamly large ''' + """test when a word is just extreamly large""" spell = SpellChecker() - horrible_word = 'thisisnotarealisticwordthisisnotarealisticwordthisisnotarealisticwordthisisnotarealisticword' + horrible_word = "thisisnotarealisticwordthisisnotarealisticwordthisisnotarealisticwordthisisnotarealisticword" self.assertEqual(spell.correction(horrible_word), horrible_word) def test_capitalization_when_case_sensitive_true(self): - ''' test that capitalization affects comparisons ''' + """test that capitalization affects comparisons""" spell = SpellChecker(language=None, case_sensitive=True) - spell.word_frequency.add('Bob') - self.assertEqual('Bob' in spell, True) - self.assertEqual('BOb' in spell, False) - self.assertEqual('BOB' in spell, False) - self.assertEqual('bob' in spell, False) + spell.word_frequency.add("Bob") + self.assertEqual("Bob" in spell, True) + self.assertEqual("BOb" in spell, False) + self.assertEqual("BOB" in spell, False) + self.assertEqual("bob" in spell, False) - words = ['Bb', 'bb', 'BB'] - self.assertEqual(spell.unknown(words), {'Bb', 'bb', 'BB'}) + words = ["Bb", "bb", "BB"] + self.assertEqual(spell.unknown(words), {"Bb", "bb", "BB"}) - case_variant_words = ['BOB', 'bOb'] + case_variant_words = ["BOB", "bOb"] self.assertEqual(spell.known(case_variant_words), set()) - self.assertEqual(spell.candidates('Bb'), {'Bob'}) - self.assertEqual(spell.candidates('bob'), {'Bob'}) - self.assertEqual(spell.correction('Bb'), 'Bob') - self.assertEqual(spell.correction('bob'), 'Bob') - self.assertEqual(spell.unknown(['bob']), {'bob'}) + self.assertEqual(spell.candidates("Bb"), {"Bob"}) + self.assertEqual(spell.candidates("bob"), {"Bob"}) + self.assertEqual(spell.correction("Bb"), "Bob") + self.assertEqual(spell.correction("bob"), "Bob") + self.assertEqual(spell.unknown(["bob"]), {"bob"}) def test_capitalization_when_language_set(self): - ''' test that capitalization doesn't affect comparisons when language not None''' + """test that capitalization doesn't affect comparisons when language not None""" spell = SpellChecker(language="en") - self.assertEqual(spell.known(['Bike']), {'bike'}) + self.assertEqual(spell.known(["Bike"]), {"bike"}) def test_pop(self): - ''' test the popping of a word ''' + """test the popping of a word""" spell = SpellChecker() - self.assertEqual('apple' in spell, True) - self.assertGreater(spell.word_frequency.pop('apple'), 1) - self.assertEqual('apple' in spell, False) + self.assertEqual("apple" in spell, True) + self.assertGreater(spell.word_frequency.pop("apple"), 1) + self.assertEqual("apple" in spell, False) def test_pop_default(self): - ''' test the default value being set for popping a word ''' + """test the default value being set for popping a word""" spell = SpellChecker() - self.assertEqual('appleies' in spell, False) - self.assertEqual(spell.word_frequency.pop('appleies', False), False) + self.assertEqual("appleies" in spell, False) + self.assertEqual(spell.word_frequency.pop("appleies", False), False) def test_adding_unicode(self): - ''' test adding a unicode word to the dictionary ''' + """test adding a unicode word to the dictionary""" spell = SpellChecker() - spell.word_frequency.load_words(['mañana']) - self.assertEqual('ñ' in spell.word_frequency.letters, True) + spell.word_frequency.load_words(["mañana"]) + self.assertEqual("ñ" in spell.word_frequency.letters, True) here = os.path.dirname(__file__) - new_filepath = '{}/resources/small_dictionary_new.json.gz'.format(here) + new_filepath = "{}/resources/small_dictionary_new.json.gz".format(here) spell.export(new_filepath, gzipped=True) spell2 = SpellChecker(language=None, local_dictionary=new_filepath) @@ -373,61 +426,63 @@ def test_adding_unicode(self): os.remove(new_filepath) def test_tokenizer_file(self): - """ def using a custom tokenizer for file loading """ + """def using a custom tokenizer for file loading""" + def tokens(txt): for x in txt.split(): yield x here = os.path.dirname(__file__) - filepath = '{}/resources/small_doc.txt'.format(here) + filepath = "{}/resources/small_doc.txt".format(here) spell = SpellChecker(language=None) # just from this doc! spell.word_frequency.load_text_file(filepath, tokenizer=tokens) - self.assertEqual(spell['a'], 3) - self.assertEqual(spell['storm'], 1) - self.assertEqual(spell['storm.'], 1) - self.assertFalse('awesome' in spell) - self.assertTrue(spell['whale']) - self.assertTrue('sea.' in spell) + self.assertEqual(spell["a"], 3) + self.assertEqual(spell["storm"], 1) + self.assertEqual(spell["storm."], 1) + self.assertFalse("awesome" in spell) + self.assertTrue(spell["whale"]) + self.assertTrue("sea." in spell) def test_tokenizer_provided(self): - """ Test passing in a tokenizer """ + """Test passing in a tokenizer""" + def tokens(txt): for x in txt.split(): yield x here = os.path.dirname(__file__) - filepath = '{}/resources/small_doc.txt'.format(here) + filepath = "{}/resources/small_doc.txt".format(here) spell = SpellChecker(language=None, tokenizer=tokens) # just from this doc! spell.word_frequency.load_text_file(filepath) - self.assertEqual(spell['a'], 3) - self.assertEqual(spell['storm'], 1) - self.assertEqual(spell['storm.'], 1) - self.assertFalse('awesome' in spell) - self.assertTrue(spell['whale']) - self.assertTrue('sea.' in spell) + self.assertEqual(spell["a"], 3) + self.assertEqual(spell["storm"], 1) + self.assertEqual(spell["storm."], 1) + self.assertFalse("awesome" in spell) + self.assertTrue(spell["whale"]) + self.assertTrue("sea." in spell) def test_bytes_input(self): - """ Test using bytes instead of unicode as input """ + """Test using bytes instead of unicode as input""" var = b"bike" here = os.path.dirname(__file__) - filepath = '{}/resources/small_dictionary.json'.format(here) + filepath = "{}/resources/small_dictionary.json".format(here) spell = SpellChecker(language=None, local_dictionary=filepath) self.assertTrue(var in spell) self.assertEqual(spell[var], 60) def test_split_words(self): - ''' test using split_words ''' + """test using split_words""" spell = SpellChecker() res = spell.split_words("This isn't a good test, but it is a test!!!!") self.assertEqual(set(res), set(["This", "isn't", "a", "good", "test", "but", "it", "is", "a", "test"])) def test_iter_spellchecker(self): - """ Test using the iterator on the SpellChecker """ + """Test using the iterator on the SpellChecker""" here = os.path.dirname(__file__) - filepath = '{}/resources/small_dictionary.json'.format(here) + filepath = "{}/resources/small_dictionary.json".format(here) spell = SpellChecker(language=None, local_dictionary=filepath) @@ -438,9 +493,9 @@ def test_iter_spellchecker(self): self.assertEqual(cnt, len(spell.word_frequency.dictionary)) def test_iter_word_frequency(self): - """ Test using the iterator on the WordFrequency """ + """Test using the iterator on the WordFrequency""" here = os.path.dirname(__file__) - filepath = '{}/resources/small_dictionary.json'.format(here) + filepath = "{}/resources/small_dictionary.json".format(here) spell = SpellChecker(language=None, local_dictionary=filepath) @@ -451,23 +506,23 @@ def test_iter_word_frequency(self): self.assertEqual(cnt, len(spell.word_frequency.dictionary)) def test_case_sensitive_parse_words(self): - """ Test using the parse words to generate a case sensitive dict """ + """Test using the parse words to generate a case sensitive dict""" spell = SpellChecker(language=None, case_sensitive=True) spell.word_frequency.load_text("This is a Test of the test!") self.assertTrue("This" in spell) self.assertFalse("this" in spell) def test_case_insensitive_parse_words(self): - """ Test using the parse words to generate a case insensitive dict """ + """Test using the parse words to generate a case insensitive dict""" spell = SpellChecker(language=None, case_sensitive=False) spell.word_frequency.load_text("This is a Test of the test!") # in makes sure it is lower case in this instance self.assertTrue("this" in spell) def test_language_list(self): - """ Test pulling a list of supported languages """ + """Test pulling a list of supported languages""" langs = SpellChecker.languages() - self.assertIn('en', langs) - self.assertIn('de', langs) - self.assertIn('es', langs) - self.assertNotIn('ja', langs) + self.assertIn("en", langs) + self.assertIn("de", langs) + self.assertIn("es", langs) + self.assertNotIn("ja", langs)