diff --git a/.github/badges/coverage.svg b/.github/badges/coverage.svg index 565169e..ac4943a 100644 --- a/.github/badges/coverage.svg +++ b/.github/badges/coverage.svg @@ -9,13 +9,13 @@ - + coverage coverage - 28% - 28% + 47% + 46% diff --git a/kebbie/layout.py b/kebbie/layout.py index 4efc1f1..4c020e7 100644 --- a/kebbie/layout.py +++ b/kebbie/layout.py @@ -124,7 +124,10 @@ def _extract_infos( # noqa: C901 pass else: # Other special characters are ignored - continue + char = None + + if char is None: + continue # Save the character and its key information # Save it only if it's not already in a previous klayer diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..cd4f487 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,21 @@ +import random + +import pytest + + +@pytest.fixture(autouse=True) +def no_requests(monkeypatch): + # Make sure unit-tests don't use any resources pulled from the internet + # using `requests` + monkeypatch.delattr("requests.sessions.Session.request") + + +@pytest.fixture(scope="session") +def monkeypatch_session(): + with pytest.MonkeyPatch.context() as mp: + yield mp + + +@pytest.fixture +def seeded(): + random.seed(36) diff --git a/tests/test_gesture.py b/tests/test_gesture.py new file mode 100644 index 0000000..2493c85 --- /dev/null +++ b/tests/test_gesture.py @@ -0,0 +1,29 @@ +from kebbie.gesture import MAX_N_POINTS_PER_DIST, MIN_N_POINTS_PER_DIST, make_swipe_gesture +from kebbie.utils import euclidian_dist + + +def test_make_swipe_gesture_between_2_points(seeded): + control_points = [(0, 0), (100, 100)] + + points = make_swipe_gesture(control_points) + + d = euclidian_dist(control_points[0], control_points[1]) + assert int(d * MIN_N_POINTS_PER_DIST) <= len(points) <= int(d * MAX_N_POINTS_PER_DIST) + + +def test_make_swipe_gesture_between_more_points(seeded): + points = make_swipe_gesture([(0, 0), (100, 100), (50, 80), (-100, 10)]) + assert len(points) > 4 + + +def test_make_swipe_gesture_single_control_point(): + assert make_swipe_gesture([(0, 0)]) == [(0, 0)] + + +def test_make_swipe_gesture_same_points(): + assert make_swipe_gesture([(0, 0), (0, 0)]) == [(0, 0)] + + +def test_make_swipe_gesture_too_small_points(seeded): + points = make_swipe_gesture([(0, 0), (0.1, 0)]) + assert len(points) > 2 diff --git a/tests/test_layout.py b/tests/test_layout.py new file mode 100644 index 0000000..017f7a3 --- /dev/null +++ b/tests/test_layout.py @@ -0,0 +1,93 @@ +import pytest + +from kebbie.layout import LayoutHelper + + +@pytest.fixture +def layout(): + return LayoutHelper() + + +def test_get_existing_key_info_same_layer(layout): + q_info = layout.get_key_info("q") + w_info = layout.get_key_info("w") + + q_width, q_height, q_x_center, q_y_center, q_layer = q_info + w_width, w_height, w_x_center, w_y_center, w_layer = w_info + assert q_width == w_width + assert q_height == w_height + assert q_x_center < w_x_center + assert q_y_center == w_y_center + assert q_layer == w_layer + + +def test_get_existing_key_info_different_layer(layout): + q_info = layout.get_key_info("q") + uq_info = layout.get_key_info("Q") + + q_width, q_height, q_x_center, q_y_center, q_layer = q_info + uq_width, uq_height, uq_x_center, uq_y_center, uq_layer = uq_info + assert q_width == uq_width + assert q_height == uq_height + assert q_x_center == uq_x_center + assert q_y_center == uq_y_center + assert q_layer < uq_layer + + +def test_get_existing_key_info_accent(layout): + e_info = layout.get_key_info("e") + é_info = layout.get_key_info("é") + + e_width, e_height, e_x_center, e_y_center, e_layer = e_info + é_width, é_height, é_x_center, é_y_center, é_layer = é_info + assert e_width == é_width + assert e_height == é_height + assert e_layer < é_layer + + +def test_get_non_existing_key_info(layout): + with pytest.raises(KeyError): + layout.get_key_info("☯") + + +def test_get_key_within_bounds(layout): + f_info = layout.get_key_info("f") + f_width, f_height, f_x_center, f_y_center, f_layer = f_info + + assert layout.get_key((f_x_center, f_y_center), 0) == "f" + assert layout.get_key((f_x_center + f_width / 3, f_y_center + f_height / 3), 0) == "f" + assert layout.get_key((f_x_center - f_width / 3, f_y_center - f_height / 3), 0) == "f" + + assert layout.get_key((f_x_center, f_y_center), 1) == "F" + + +def test_get_key_outside_of_bounds(layout): + f_info = layout.get_key_info("f") + f_width, f_height, f_x_center, f_y_center, f_layer = f_info + + assert layout.get_key((f_x_center + f_width + 1, f_y_center), 0) != "f" + assert layout.get_key((f_x_center, f_y_center + f_height + 1), 0) != "f" + + +def test_get_closest_border_key(layout): + assert layout.get_key((-5000, -5000), 0) == "q" + + +def test_ignore_additional_keyboard_layers(): + layout = LayoutHelper(ignore_layers_after=0) + with pytest.raises(KeyError): + layout.get_key_info("Q") + + +@pytest.mark.parametrize("k", [" ", "."]) +def test_special_keys_that_should_exist(layout, k): + try: + layout.get_key_info(k) + except KeyError: + pytest.fail(f"Key `{k}` is not part of the layout") + + +@pytest.mark.parametrize("k", ["shift", "SHIFT", "mic"]) +def test_special_keys_that_should_not_exist(layout, k): + with pytest.raises(KeyError): + layout.get_key_info(k) diff --git a/tests/test_noise_model.py b/tests/test_noise_model.py new file mode 100644 index 0000000..8763cbd --- /dev/null +++ b/tests/test_noise_model.py @@ -0,0 +1,383 @@ +import shutil + +import pytest +import requests + +import kebbie +from kebbie.noise_model import NoiseModel, Typo + + +@pytest.fixture(scope="session") +def tmp_cache(): + return "/tmp/kebbie_test" + + +class MockCommonTypos: + def __init__(self): + self.text = "\n".join( + [ + "\t".join( + ["intvite", "invite", "IN", "invite", "google_wave_intvite(2)", "google_wave_invite(38802)"] + ), + "\t".join(["goole", "google", "RM", "goo(g)le", "my_goole_wave(1)", "my_google_wave(35841)"]), + "\t".join(["goolge", "google", "R1", "goo[l/g]e", "a_goolge_wave(1)", "a_google_wave(42205)"]), + "\t".join(["waze", "wave", "R2", "wa[z:v]e", "google_waxe_invite(2)", "google_wave_invite(38802)"]), + ] + ) + + +@pytest.fixture(scope="session") +def noisy(monkeypatch_session, tmp_cache): + # Change the cache directory to a temporary folder, to not impact the + # current cache + monkeypatch_session.setattr(kebbie.noise_model, "CACHE_DIR", tmp_cache) + + # Make sure the cache folder is empty + try: + shutil.rmtree(tmp_cache) + except FileNotFoundError: + pass + + # Patch `requests` temporarily, so a custom list of common typos is used + with monkeypatch_session.context() as m: + + def mock_get(*args, **kwargs): + return MockCommonTypos() + + m.setattr(requests, "get", mock_get) + + # Create a clean noise model (which will populate the cache with the + # mocked list of common typos) + # Note that we initialize it with all typo probabilities set to 0, and + # each test will individually change these probabilities + return NoiseModel(lang="en-US", typo_probs={t: 0.0 for t in Typo}, x_ratio=float("inf"), y_ratio=float("inf")) + + +def test_retrieve_common_typos_cached(noisy): + # The common typos were retrieved in the fixture, and cached + # So, when rebuilding another noise model, the data from the cache should + # be retrieved (without pulling the data from the internet) + noisy2 = NoiseModel("en-US") + assert noisy.common_typos == noisy2.common_typos + + +def test_common_typos_with_unsupported_language(noisy): + noisy2 = NoiseModel("en-US") + noisy2.lang = "fr-FR" # We don't have a list of common typos for French + assert noisy2._get_common_typos() == {} + + +@pytest.mark.parametrize( + "x", ["great", "Great", "GREAT", "grEAT", "éthéré", "éthÉré", "한국", "I'm", "in-depth", "gr8t"] +) +def test_correctable_words(noisy, x): + assert noisy._is_correctable(x) + + +@pytest.mark.parametrize("x", ["667", ";", "???"]) +def test_non_correctable_words(noisy, x): + assert not noisy._is_correctable(x) + + +def test_create_swipe_gesture_for_correctable_word(noisy, seeded): + points = noisy.swipe("make") + assert len(points) > 4 + + +def test_cant_create_swipe_gesture_for_non_correctable_word(noisy, seeded): + assert noisy.swipe("667") is None + + +def test_perfect_fuzzy_type_normal_word(noisy, seeded): + word = "great" + keystrokes, typed_word, typos = noisy._fuzzy_type(word) + + assert len(keystrokes) == len(word) + assert all(k is not None for k in keystrokes) + assert typed_word == word + assert len(typos) == 0 + + +def test_perfect_fuzzy_type_word_with_characters_from_another_layer(noisy, seeded): + word = "greAt" + keystrokes, typed_word, typos = noisy._fuzzy_type(word) + + assert len(keystrokes) == len(word) + assert keystrokes[3] is None + assert typed_word == word + assert len(typos) == 0 + + +def test_perfect_fuzzy_type_word_with_unknown_characters(noisy, seeded): + word = "gr☂t" + keystrokes, typed_word, typos = noisy._fuzzy_type(word) + + assert len(keystrokes) == len(word) + assert keystrokes[2] is None + assert typed_word == word + assert len(typos) == 0 + + +def test_very_fuzzy_typing(noisy, seeded, monkeypatch): + word = "great" + with monkeypatch.context() as m: + # Set Gaussian standard deviation to 1, which means more chance + # to type outside of the intended key press (giving a fuzzy typo) + m.setattr(noisy, "x_ratio", 1) + m.setattr(noisy, "y_ratio", 1) + + keystrokes, typed_word, typos = noisy._fuzzy_type(word) + + assert len(keystrokes) == len(word) + assert typed_word != word + assert len(typos) > 0 + assert all(t == Typo.SUBSTITUTE_CHAR for t in typos) + + +def test_introduce_typos_no_typos(noisy): + s = "This" + noisy_s, typos = noisy._introduce_typos(s, error_free=True) + assert noisy_s == s + assert len(typos) == 0 + + +def test_introduce_typos_common_typos(noisy, seeded, monkeypatch): + with monkeypatch.context() as m: + # Set probability of typo to 1 for this test, to ensure it's generated + m.setitem(noisy.probs, Typo.COMMON_TYPO, 1) + + noisy_s, typos = noisy._introduce_typos("wave") + + assert noisy_s == "waze" + assert len(typos) == 1 and typos[0] == Typo.COMMON_TYPO + + +def test_introduce_typos_simplify_accent(noisy, seeded, monkeypatch): + with monkeypatch.context() as m: + # Set probability of typo to 1 for this test, to ensure it's generated + m.setitem(noisy.probs, Typo.SIMPLIFY_ACCENT, 1) + + noisy_s, typos = noisy._introduce_typos("cassé") + + assert noisy_s == "casse" + assert len(typos) == 1 and typos[0] == Typo.SIMPLIFY_ACCENT + + +def test_introduce_typos_simplify_case(noisy, seeded, monkeypatch): + with monkeypatch.context() as m: + # Set probability of typo to 1 for this test, to ensure it's generated + m.setitem(noisy.probs, Typo.SIMPLIFY_CASE, 1) + + noisy_s, typos = noisy._introduce_typos("This") + + assert noisy_s == "this" + assert len(typos) == 1 and typos[0] == Typo.SIMPLIFY_CASE + + +def test_introduce_typos_dont_simplify_case_for_full_uppercase(noisy, seeded, monkeypatch): + s = "HELLYEAH" + with monkeypatch.context() as m: + # Set probability of typo to 1 for this test, to ensure it's generated + m.setitem(noisy.probs, Typo.SIMPLIFY_CASE, 1) + + noisy_s, typos = noisy._introduce_typos(s) + + assert noisy_s == s + assert len(typos) == 0 + + +def test_introduce_typos_transposition(noisy, seeded, monkeypatch): + with monkeypatch.context() as m: + # Set probability of typo to 1 for this test, to ensure it's generated + m.setitem(noisy.probs, Typo.TRANSPOSE_CHAR, 1) + + noisy_s, typos = noisy._introduce_typos("hi") + + assert noisy_s == "ih" + assert len(typos) == 1 and typos[0] == Typo.TRANSPOSE_CHAR + + +def test_introduce_typos_no_transposition_on_different_layer(noisy, seeded, monkeypatch): + s = "Hi" + with monkeypatch.context() as m: + # Set probability of typo to 1 for this test, to ensure it's generated + m.setitem(noisy.probs, Typo.TRANSPOSE_CHAR, 1) + + noisy_s, typos = noisy._introduce_typos(s) + + assert noisy_s == s + assert len(typos) == 0 + + +def test_introduce_typos_no_transposition_on_unknown_character(noisy, seeded, monkeypatch): + s = "a⛬" + with monkeypatch.context() as m: + # Set probability of typo to 1 for this test, to ensure it's generated + m.setitem(noisy.probs, Typo.TRANSPOSE_CHAR, 1) + + noisy_s, typos = noisy._introduce_typos(s) + + assert noisy_s == s + assert len(typos) == 0 + + +def test_introduce_typos_delete_spelling_symbol(noisy, seeded, monkeypatch): + with monkeypatch.context() as m: + # Set probability of typo to 1 for this test, to ensure it's generated + m.setitem(noisy.probs, Typo.DELETE_SPELLING_SYMBOL, 1) + + noisy_s, typos = noisy._introduce_typos("I'm") + + assert noisy_s == "Im" + assert len(typos) == 1 and typos[0] == Typo.DELETE_SPELLING_SYMBOL + + +def test_introduce_typos_add_spelling_symbol(noisy, seeded, monkeypatch): + with monkeypatch.context() as m: + # Set probability of typo to 1 for this test, to ensure it's generated + m.setitem(noisy.probs, Typo.ADD_SPELLING_SYMBOL, 1) + + noisy_s, typos = noisy._introduce_typos("I'm") + + assert noisy_s == "I''m" + assert len(typos) == 1 and typos[0] == Typo.ADD_SPELLING_SYMBOL + + +def test_introduce_typos_delete_space(noisy, seeded, monkeypatch): + with monkeypatch.context() as m: + # Set probability of typo to 1 for this test, to ensure it's generated + m.setitem(noisy.probs, Typo.DELETE_SPACE, 1) + + noisy_s, typos = noisy._introduce_typos("This is") + + assert noisy_s == "Thisis" + assert len(typos) == 1 and typos[0] == Typo.DELETE_SPACE + + +def test_introduce_typos_delete_single_space(noisy, seeded, monkeypatch): + with monkeypatch.context() as m: + # Set probability of typo to 1 for this test, to ensure it's generated + m.setitem(noisy.probs, Typo.DELETE_SPACE, 1) + + noisy_s, typos = noisy._introduce_typos(" ") + + assert noisy_s == "" + assert len(typos) == 1 and typos[0] == Typo.DELETE_SPACE + + +def test_introduce_typos_add_space(noisy, seeded, monkeypatch): + with monkeypatch.context() as m: + # Set probability of typo to 1 for this test, to ensure it's generated + m.setitem(noisy.probs, Typo.ADD_SPACE, 1) + + noisy_s, typos = noisy._introduce_typos("This is") + + assert noisy_s == "This is" + assert len(typos) == 1 and typos[0] == Typo.ADD_SPACE + + +def test_introduce_typos_delete_punctuation(noisy, seeded, monkeypatch): + with monkeypatch.context() as m: + # Set probability of typo to 1 for this test, to ensure it's generated + m.setitem(noisy.probs, Typo.DELETE_PUNCTUATION, 1) + + noisy_s, typos = noisy._introduce_typos("This, and") + + assert noisy_s == "This and" + assert len(typos) == 1 and typos[0] == Typo.DELETE_PUNCTUATION + + +def test_introduce_typos_add_punctuation(noisy, seeded, monkeypatch): + with monkeypatch.context() as m: + # Set probability of typo to 1 for this test, to ensure it's generated + m.setitem(noisy.probs, Typo.ADD_PUNCTUATION, 1) + + noisy_s, typos = noisy._introduce_typos("This, and") + + assert noisy_s == "This,, and" + assert len(typos) == 1 and typos[0] == Typo.ADD_PUNCTUATION + + +def test_introduce_typos_delete_char(noisy, seeded, monkeypatch): + with monkeypatch.context() as m: + # Set probability of typo to 1 for this test, to ensure it's generated + m.setitem(noisy.probs, Typo.DELETE_CHAR, 1) + + noisy_s, typos = noisy._introduce_typos("hi") + + # Note, the last character is never deleted, because this is an auto-completion case + assert noisy_s == "i" + assert len(typos) == 1 and typos[0] == Typo.DELETE_CHAR + + +def test_introduce_typos_add_char(noisy, seeded, monkeypatch): + with monkeypatch.context() as m: + # Set probability of typo to 1 for this test, to ensure it's generated + m.setitem(noisy.probs, Typo.ADD_CHAR, 1) + + noisy_s, typos = noisy._introduce_typos("hi") + + assert noisy_s == "hhii" + assert len(typos) == 2 and all(t == Typo.ADD_CHAR for t in typos) + + +def test_introduce_typos_dont_add_typos_on_numbers(noisy, seeded, monkeypatch): + with monkeypatch.context() as m: + # Set probability of typo to 1 for this test, to ensure it's generated + m.setitem(noisy.probs, Typo.ADD_CHAR, 1) + + noisy_s, typos = noisy._introduce_typos("he11o") + + assert noisy_s == "hhee11oo" + + +def test_introduce_typos_dont_add_typos_on_unknown_characters(noisy, seeded, monkeypatch): + with monkeypatch.context() as m: + # Set probability of typo to 1 for this test, to ensure it's generated + m.setitem(noisy.probs, Typo.ADD_CHAR, 1) + + noisy_s, typos = noisy._introduce_typos("h⛬e") + + assert noisy_s == "hh⛬ee" + + +def test_perfect_type_till_space(noisy, seeded): + keystrokes, typed, n_words, typos = noisy.type_till_space(["This", "is", "great"]) + + assert len(keystrokes) == len("This") + assert typed == "This" + assert n_words == 1 + assert len(typos) == 0 + + +def test_imperfect_type_till_space_both_fuzzy_and_other_typo(noisy, seeded, monkeypatch): + with monkeypatch.context() as m: + # Set Gaussian standard deviation to 1, which means more chance + # to type outside of the intended key press (giving a fuzzy typo) + m.setattr(noisy, "x_ratio", 1) + m.setattr(noisy, "y_ratio", 1) + + # Set probability of typo to 1 for this test, to ensure it's generated + m.setitem(noisy.probs, Typo.SIMPLIFY_CASE, 1) + + keystrokes, typed, n_words, typos = noisy.type_till_space(["Hello", "there"]) + + assert len(keystrokes) == len("Hello") + assert typed != "Hello" + assert n_words == 1 + assert len(typos) > 2 + assert Typo.SIMPLIFY_CASE in typos and Typo.SUBSTITUTE_CHAR in typos + + +def test_type_till_space_with_missing_space_typo(noisy, seeded, monkeypatch): + with monkeypatch.context() as m: + # Set probability of typo to 1 for this test, to ensure it's generated + m.setitem(noisy.probs, Typo.DELETE_SPACE, 1) + m.setattr(kebbie.noise_model, "FRONT_DELETION_MULTIPLIER", 1) + + keystrokes, typed, n_words, typos = noisy.type_till_space(["Hello", "there"]) + + assert len(keystrokes) == len("Hello") + len("there") + assert typed == "Hellothere" + assert n_words == 2 + assert len(typos) > 0 and all(t == Typo.DELETE_SPACE for t in typos) diff --git a/tests/test_tokenizer.py b/tests/test_tokenizer.py new file mode 100644 index 0000000..7624829 --- /dev/null +++ b/tests/test_tokenizer.py @@ -0,0 +1,56 @@ +import pytest + +from kebbie.tokenizer import BasicTokenizer + + +@pytest.fixture +def tokenizer(): + return BasicTokenizer() + + +@pytest.mark.parametrize("x", ['I"m', "I“m", "I”m", "I„m"]) +def test_normalization_of_double_quotes(tokenizer, x): + assert tokenizer.preprocess(x) == "I m" + + +@pytest.mark.parametrize("x", ["I'm", "I’m", "Iʻm", "I‘m", "I´m", "Iʼm"]) +def test_normalization_of_single_quotes(tokenizer, x): + assert tokenizer.preprocess(x) == "I'm" + + +@pytest.mark.parametrize("x", ["in-depth", "in–depth", "in—depth", "in‑depth", "in−depth", "inーdepth"]) +def test_normalization_of_dash(tokenizer, x): + assert tokenizer.preprocess(x) == "in-depth" + + +@pytest.mark.parametrize( + "x, out", + [ + ("this.", "this "), + ("this․", "this "), + ("this...", "this "), + ("this…", "this "), + ("this,", "this "), + ("this‚", "this "), + ], +) +def test_normalization_of_other_symbols(tokenizer, x, out): + assert tokenizer.preprocess(x) == out + + +@pytest.mark.parametrize( + "x", ["a,b", "a, b", "a ,b", "a , b", "a . b", "a ... b", "a. b", "a... b", "a? b", "a !b", "a : b"] +) +def test_punctuations_removal(tokenizer, x): + assert tokenizer.preprocess(x) == "a b" + + +@pytest.mark.parametrize( + "inp, out", [("Several words", ["Several", "words"]), (" Several words ", ["Several", "words"])] +) +def test_word_split(tokenizer, inp, out): + assert tokenizer.word_split(inp) == out + + +def test_update_context(tokenizer): + assert tokenizer.update_context("This is ", "great") == "This is great "