diff --git a/.github/badges/coverage.svg b/.github/badges/coverage.svg
index 565169e..ac4943a 100644
--- a/.github/badges/coverage.svg
+++ b/.github/badges/coverage.svg
@@ -9,13 +9,13 @@
-
+
coverage
coverage
- 28%
- 28%
+ 47%
+ 46%
diff --git a/kebbie/layout.py b/kebbie/layout.py
index 4efc1f1..4c020e7 100644
--- a/kebbie/layout.py
+++ b/kebbie/layout.py
@@ -124,7 +124,10 @@ def _extract_infos( # noqa: C901
pass
else:
# Other special characters are ignored
- continue
+ char = None
+
+ if char is None:
+ continue
# Save the character and its key information
# Save it only if it's not already in a previous klayer
diff --git a/tests/conftest.py b/tests/conftest.py
new file mode 100644
index 0000000..cd4f487
--- /dev/null
+++ b/tests/conftest.py
@@ -0,0 +1,21 @@
+import random
+
+import pytest
+
+
+@pytest.fixture(autouse=True)
+def no_requests(monkeypatch):
+ # Make sure unit-tests don't use any resources pulled from the internet
+ # using `requests`
+ monkeypatch.delattr("requests.sessions.Session.request")
+
+
+@pytest.fixture(scope="session")
+def monkeypatch_session():
+ with pytest.MonkeyPatch.context() as mp:
+ yield mp
+
+
+@pytest.fixture
+def seeded():
+ random.seed(36)
diff --git a/tests/test_gesture.py b/tests/test_gesture.py
new file mode 100644
index 0000000..2493c85
--- /dev/null
+++ b/tests/test_gesture.py
@@ -0,0 +1,29 @@
+from kebbie.gesture import MAX_N_POINTS_PER_DIST, MIN_N_POINTS_PER_DIST, make_swipe_gesture
+from kebbie.utils import euclidian_dist
+
+
+def test_make_swipe_gesture_between_2_points(seeded):
+ control_points = [(0, 0), (100, 100)]
+
+ points = make_swipe_gesture(control_points)
+
+ d = euclidian_dist(control_points[0], control_points[1])
+ assert int(d * MIN_N_POINTS_PER_DIST) <= len(points) <= int(d * MAX_N_POINTS_PER_DIST)
+
+
+def test_make_swipe_gesture_between_more_points(seeded):
+ points = make_swipe_gesture([(0, 0), (100, 100), (50, 80), (-100, 10)])
+ assert len(points) > 4
+
+
+def test_make_swipe_gesture_single_control_point():
+ assert make_swipe_gesture([(0, 0)]) == [(0, 0)]
+
+
+def test_make_swipe_gesture_same_points():
+ assert make_swipe_gesture([(0, 0), (0, 0)]) == [(0, 0)]
+
+
+def test_make_swipe_gesture_too_small_points(seeded):
+ points = make_swipe_gesture([(0, 0), (0.1, 0)])
+ assert len(points) > 2
diff --git a/tests/test_layout.py b/tests/test_layout.py
new file mode 100644
index 0000000..017f7a3
--- /dev/null
+++ b/tests/test_layout.py
@@ -0,0 +1,93 @@
+import pytest
+
+from kebbie.layout import LayoutHelper
+
+
+@pytest.fixture
+def layout():
+ return LayoutHelper()
+
+
+def test_get_existing_key_info_same_layer(layout):
+ q_info = layout.get_key_info("q")
+ w_info = layout.get_key_info("w")
+
+ q_width, q_height, q_x_center, q_y_center, q_layer = q_info
+ w_width, w_height, w_x_center, w_y_center, w_layer = w_info
+ assert q_width == w_width
+ assert q_height == w_height
+ assert q_x_center < w_x_center
+ assert q_y_center == w_y_center
+ assert q_layer == w_layer
+
+
+def test_get_existing_key_info_different_layer(layout):
+ q_info = layout.get_key_info("q")
+ uq_info = layout.get_key_info("Q")
+
+ q_width, q_height, q_x_center, q_y_center, q_layer = q_info
+ uq_width, uq_height, uq_x_center, uq_y_center, uq_layer = uq_info
+ assert q_width == uq_width
+ assert q_height == uq_height
+ assert q_x_center == uq_x_center
+ assert q_y_center == uq_y_center
+ assert q_layer < uq_layer
+
+
+def test_get_existing_key_info_accent(layout):
+ e_info = layout.get_key_info("e")
+ é_info = layout.get_key_info("é")
+
+ e_width, e_height, e_x_center, e_y_center, e_layer = e_info
+ é_width, é_height, é_x_center, é_y_center, é_layer = é_info
+ assert e_width == é_width
+ assert e_height == é_height
+ assert e_layer < é_layer
+
+
+def test_get_non_existing_key_info(layout):
+ with pytest.raises(KeyError):
+ layout.get_key_info("☯")
+
+
+def test_get_key_within_bounds(layout):
+ f_info = layout.get_key_info("f")
+ f_width, f_height, f_x_center, f_y_center, f_layer = f_info
+
+ assert layout.get_key((f_x_center, f_y_center), 0) == "f"
+ assert layout.get_key((f_x_center + f_width / 3, f_y_center + f_height / 3), 0) == "f"
+ assert layout.get_key((f_x_center - f_width / 3, f_y_center - f_height / 3), 0) == "f"
+
+ assert layout.get_key((f_x_center, f_y_center), 1) == "F"
+
+
+def test_get_key_outside_of_bounds(layout):
+ f_info = layout.get_key_info("f")
+ f_width, f_height, f_x_center, f_y_center, f_layer = f_info
+
+ assert layout.get_key((f_x_center + f_width + 1, f_y_center), 0) != "f"
+ assert layout.get_key((f_x_center, f_y_center + f_height + 1), 0) != "f"
+
+
+def test_get_closest_border_key(layout):
+ assert layout.get_key((-5000, -5000), 0) == "q"
+
+
+def test_ignore_additional_keyboard_layers():
+ layout = LayoutHelper(ignore_layers_after=0)
+ with pytest.raises(KeyError):
+ layout.get_key_info("Q")
+
+
+@pytest.mark.parametrize("k", [" ", "."])
+def test_special_keys_that_should_exist(layout, k):
+ try:
+ layout.get_key_info(k)
+ except KeyError:
+ pytest.fail(f"Key `{k}` is not part of the layout")
+
+
+@pytest.mark.parametrize("k", ["shift", "SHIFT", "mic"])
+def test_special_keys_that_should_not_exist(layout, k):
+ with pytest.raises(KeyError):
+ layout.get_key_info(k)
diff --git a/tests/test_noise_model.py b/tests/test_noise_model.py
new file mode 100644
index 0000000..8763cbd
--- /dev/null
+++ b/tests/test_noise_model.py
@@ -0,0 +1,383 @@
+import shutil
+
+import pytest
+import requests
+
+import kebbie
+from kebbie.noise_model import NoiseModel, Typo
+
+
+@pytest.fixture(scope="session")
+def tmp_cache():
+ return "/tmp/kebbie_test"
+
+
+class MockCommonTypos:
+ def __init__(self):
+ self.text = "\n".join(
+ [
+ "\t".join(
+ ["intvite", "invite", "IN", "invite", "google_wave_intvite(2)", "google_wave_invite(38802)"]
+ ),
+ "\t".join(["goole", "google", "RM", "goo(g)le", "my_goole_wave(1)", "my_google_wave(35841)"]),
+ "\t".join(["goolge", "google", "R1", "goo[l/g]e", "a_goolge_wave(1)", "a_google_wave(42205)"]),
+ "\t".join(["waze", "wave", "R2", "wa[z:v]e", "google_waxe_invite(2)", "google_wave_invite(38802)"]),
+ ]
+ )
+
+
+@pytest.fixture(scope="session")
+def noisy(monkeypatch_session, tmp_cache):
+ # Change the cache directory to a temporary folder, to not impact the
+ # current cache
+ monkeypatch_session.setattr(kebbie.noise_model, "CACHE_DIR", tmp_cache)
+
+ # Make sure the cache folder is empty
+ try:
+ shutil.rmtree(tmp_cache)
+ except FileNotFoundError:
+ pass
+
+ # Patch `requests` temporarily, so a custom list of common typos is used
+ with monkeypatch_session.context() as m:
+
+ def mock_get(*args, **kwargs):
+ return MockCommonTypos()
+
+ m.setattr(requests, "get", mock_get)
+
+ # Create a clean noise model (which will populate the cache with the
+ # mocked list of common typos)
+ # Note that we initialize it with all typo probabilities set to 0, and
+ # each test will individually change these probabilities
+ return NoiseModel(lang="en-US", typo_probs={t: 0.0 for t in Typo}, x_ratio=float("inf"), y_ratio=float("inf"))
+
+
+def test_retrieve_common_typos_cached(noisy):
+ # The common typos were retrieved in the fixture, and cached
+ # So, when rebuilding another noise model, the data from the cache should
+ # be retrieved (without pulling the data from the internet)
+ noisy2 = NoiseModel("en-US")
+ assert noisy.common_typos == noisy2.common_typos
+
+
+def test_common_typos_with_unsupported_language(noisy):
+ noisy2 = NoiseModel("en-US")
+ noisy2.lang = "fr-FR" # We don't have a list of common typos for French
+ assert noisy2._get_common_typos() == {}
+
+
+@pytest.mark.parametrize(
+ "x", ["great", "Great", "GREAT", "grEAT", "éthéré", "éthÉré", "한국", "I'm", "in-depth", "gr8t"]
+)
+def test_correctable_words(noisy, x):
+ assert noisy._is_correctable(x)
+
+
+@pytest.mark.parametrize("x", ["667", ";", "???"])
+def test_non_correctable_words(noisy, x):
+ assert not noisy._is_correctable(x)
+
+
+def test_create_swipe_gesture_for_correctable_word(noisy, seeded):
+ points = noisy.swipe("make")
+ assert len(points) > 4
+
+
+def test_cant_create_swipe_gesture_for_non_correctable_word(noisy, seeded):
+ assert noisy.swipe("667") is None
+
+
+def test_perfect_fuzzy_type_normal_word(noisy, seeded):
+ word = "great"
+ keystrokes, typed_word, typos = noisy._fuzzy_type(word)
+
+ assert len(keystrokes) == len(word)
+ assert all(k is not None for k in keystrokes)
+ assert typed_word == word
+ assert len(typos) == 0
+
+
+def test_perfect_fuzzy_type_word_with_characters_from_another_layer(noisy, seeded):
+ word = "greAt"
+ keystrokes, typed_word, typos = noisy._fuzzy_type(word)
+
+ assert len(keystrokes) == len(word)
+ assert keystrokes[3] is None
+ assert typed_word == word
+ assert len(typos) == 0
+
+
+def test_perfect_fuzzy_type_word_with_unknown_characters(noisy, seeded):
+ word = "gr☂t"
+ keystrokes, typed_word, typos = noisy._fuzzy_type(word)
+
+ assert len(keystrokes) == len(word)
+ assert keystrokes[2] is None
+ assert typed_word == word
+ assert len(typos) == 0
+
+
+def test_very_fuzzy_typing(noisy, seeded, monkeypatch):
+ word = "great"
+ with monkeypatch.context() as m:
+ # Set Gaussian standard deviation to 1, which means more chance
+ # to type outside of the intended key press (giving a fuzzy typo)
+ m.setattr(noisy, "x_ratio", 1)
+ m.setattr(noisy, "y_ratio", 1)
+
+ keystrokes, typed_word, typos = noisy._fuzzy_type(word)
+
+ assert len(keystrokes) == len(word)
+ assert typed_word != word
+ assert len(typos) > 0
+ assert all(t == Typo.SUBSTITUTE_CHAR for t in typos)
+
+
+def test_introduce_typos_no_typos(noisy):
+ s = "This"
+ noisy_s, typos = noisy._introduce_typos(s, error_free=True)
+ assert noisy_s == s
+ assert len(typos) == 0
+
+
+def test_introduce_typos_common_typos(noisy, seeded, monkeypatch):
+ with monkeypatch.context() as m:
+ # Set probability of typo to 1 for this test, to ensure it's generated
+ m.setitem(noisy.probs, Typo.COMMON_TYPO, 1)
+
+ noisy_s, typos = noisy._introduce_typos("wave")
+
+ assert noisy_s == "waze"
+ assert len(typos) == 1 and typos[0] == Typo.COMMON_TYPO
+
+
+def test_introduce_typos_simplify_accent(noisy, seeded, monkeypatch):
+ with monkeypatch.context() as m:
+ # Set probability of typo to 1 for this test, to ensure it's generated
+ m.setitem(noisy.probs, Typo.SIMPLIFY_ACCENT, 1)
+
+ noisy_s, typos = noisy._introduce_typos("cassé")
+
+ assert noisy_s == "casse"
+ assert len(typos) == 1 and typos[0] == Typo.SIMPLIFY_ACCENT
+
+
+def test_introduce_typos_simplify_case(noisy, seeded, monkeypatch):
+ with monkeypatch.context() as m:
+ # Set probability of typo to 1 for this test, to ensure it's generated
+ m.setitem(noisy.probs, Typo.SIMPLIFY_CASE, 1)
+
+ noisy_s, typos = noisy._introduce_typos("This")
+
+ assert noisy_s == "this"
+ assert len(typos) == 1 and typos[0] == Typo.SIMPLIFY_CASE
+
+
+def test_introduce_typos_dont_simplify_case_for_full_uppercase(noisy, seeded, monkeypatch):
+ s = "HELLYEAH"
+ with monkeypatch.context() as m:
+ # Set probability of typo to 1 for this test, to ensure it's generated
+ m.setitem(noisy.probs, Typo.SIMPLIFY_CASE, 1)
+
+ noisy_s, typos = noisy._introduce_typos(s)
+
+ assert noisy_s == s
+ assert len(typos) == 0
+
+
+def test_introduce_typos_transposition(noisy, seeded, monkeypatch):
+ with monkeypatch.context() as m:
+ # Set probability of typo to 1 for this test, to ensure it's generated
+ m.setitem(noisy.probs, Typo.TRANSPOSE_CHAR, 1)
+
+ noisy_s, typos = noisy._introduce_typos("hi")
+
+ assert noisy_s == "ih"
+ assert len(typos) == 1 and typos[0] == Typo.TRANSPOSE_CHAR
+
+
+def test_introduce_typos_no_transposition_on_different_layer(noisy, seeded, monkeypatch):
+ s = "Hi"
+ with monkeypatch.context() as m:
+ # Set probability of typo to 1 for this test, to ensure it's generated
+ m.setitem(noisy.probs, Typo.TRANSPOSE_CHAR, 1)
+
+ noisy_s, typos = noisy._introduce_typos(s)
+
+ assert noisy_s == s
+ assert len(typos) == 0
+
+
+def test_introduce_typos_no_transposition_on_unknown_character(noisy, seeded, monkeypatch):
+ s = "a⛬"
+ with monkeypatch.context() as m:
+ # Set probability of typo to 1 for this test, to ensure it's generated
+ m.setitem(noisy.probs, Typo.TRANSPOSE_CHAR, 1)
+
+ noisy_s, typos = noisy._introduce_typos(s)
+
+ assert noisy_s == s
+ assert len(typos) == 0
+
+
+def test_introduce_typos_delete_spelling_symbol(noisy, seeded, monkeypatch):
+ with monkeypatch.context() as m:
+ # Set probability of typo to 1 for this test, to ensure it's generated
+ m.setitem(noisy.probs, Typo.DELETE_SPELLING_SYMBOL, 1)
+
+ noisy_s, typos = noisy._introduce_typos("I'm")
+
+ assert noisy_s == "Im"
+ assert len(typos) == 1 and typos[0] == Typo.DELETE_SPELLING_SYMBOL
+
+
+def test_introduce_typos_add_spelling_symbol(noisy, seeded, monkeypatch):
+ with monkeypatch.context() as m:
+ # Set probability of typo to 1 for this test, to ensure it's generated
+ m.setitem(noisy.probs, Typo.ADD_SPELLING_SYMBOL, 1)
+
+ noisy_s, typos = noisy._introduce_typos("I'm")
+
+ assert noisy_s == "I''m"
+ assert len(typos) == 1 and typos[0] == Typo.ADD_SPELLING_SYMBOL
+
+
+def test_introduce_typos_delete_space(noisy, seeded, monkeypatch):
+ with monkeypatch.context() as m:
+ # Set probability of typo to 1 for this test, to ensure it's generated
+ m.setitem(noisy.probs, Typo.DELETE_SPACE, 1)
+
+ noisy_s, typos = noisy._introduce_typos("This is")
+
+ assert noisy_s == "Thisis"
+ assert len(typos) == 1 and typos[0] == Typo.DELETE_SPACE
+
+
+def test_introduce_typos_delete_single_space(noisy, seeded, monkeypatch):
+ with monkeypatch.context() as m:
+ # Set probability of typo to 1 for this test, to ensure it's generated
+ m.setitem(noisy.probs, Typo.DELETE_SPACE, 1)
+
+ noisy_s, typos = noisy._introduce_typos(" ")
+
+ assert noisy_s == ""
+ assert len(typos) == 1 and typos[0] == Typo.DELETE_SPACE
+
+
+def test_introduce_typos_add_space(noisy, seeded, monkeypatch):
+ with monkeypatch.context() as m:
+ # Set probability of typo to 1 for this test, to ensure it's generated
+ m.setitem(noisy.probs, Typo.ADD_SPACE, 1)
+
+ noisy_s, typos = noisy._introduce_typos("This is")
+
+ assert noisy_s == "This is"
+ assert len(typos) == 1 and typos[0] == Typo.ADD_SPACE
+
+
+def test_introduce_typos_delete_punctuation(noisy, seeded, monkeypatch):
+ with monkeypatch.context() as m:
+ # Set probability of typo to 1 for this test, to ensure it's generated
+ m.setitem(noisy.probs, Typo.DELETE_PUNCTUATION, 1)
+
+ noisy_s, typos = noisy._introduce_typos("This, and")
+
+ assert noisy_s == "This and"
+ assert len(typos) == 1 and typos[0] == Typo.DELETE_PUNCTUATION
+
+
+def test_introduce_typos_add_punctuation(noisy, seeded, monkeypatch):
+ with monkeypatch.context() as m:
+ # Set probability of typo to 1 for this test, to ensure it's generated
+ m.setitem(noisy.probs, Typo.ADD_PUNCTUATION, 1)
+
+ noisy_s, typos = noisy._introduce_typos("This, and")
+
+ assert noisy_s == "This,, and"
+ assert len(typos) == 1 and typos[0] == Typo.ADD_PUNCTUATION
+
+
+def test_introduce_typos_delete_char(noisy, seeded, monkeypatch):
+ with monkeypatch.context() as m:
+ # Set probability of typo to 1 for this test, to ensure it's generated
+ m.setitem(noisy.probs, Typo.DELETE_CHAR, 1)
+
+ noisy_s, typos = noisy._introduce_typos("hi")
+
+ # Note, the last character is never deleted, because this is an auto-completion case
+ assert noisy_s == "i"
+ assert len(typos) == 1 and typos[0] == Typo.DELETE_CHAR
+
+
+def test_introduce_typos_add_char(noisy, seeded, monkeypatch):
+ with monkeypatch.context() as m:
+ # Set probability of typo to 1 for this test, to ensure it's generated
+ m.setitem(noisy.probs, Typo.ADD_CHAR, 1)
+
+ noisy_s, typos = noisy._introduce_typos("hi")
+
+ assert noisy_s == "hhii"
+ assert len(typos) == 2 and all(t == Typo.ADD_CHAR for t in typos)
+
+
+def test_introduce_typos_dont_add_typos_on_numbers(noisy, seeded, monkeypatch):
+ with monkeypatch.context() as m:
+ # Set probability of typo to 1 for this test, to ensure it's generated
+ m.setitem(noisy.probs, Typo.ADD_CHAR, 1)
+
+ noisy_s, typos = noisy._introduce_typos("he11o")
+
+ assert noisy_s == "hhee11oo"
+
+
+def test_introduce_typos_dont_add_typos_on_unknown_characters(noisy, seeded, monkeypatch):
+ with monkeypatch.context() as m:
+ # Set probability of typo to 1 for this test, to ensure it's generated
+ m.setitem(noisy.probs, Typo.ADD_CHAR, 1)
+
+ noisy_s, typos = noisy._introduce_typos("h⛬e")
+
+ assert noisy_s == "hh⛬ee"
+
+
+def test_perfect_type_till_space(noisy, seeded):
+ keystrokes, typed, n_words, typos = noisy.type_till_space(["This", "is", "great"])
+
+ assert len(keystrokes) == len("This")
+ assert typed == "This"
+ assert n_words == 1
+ assert len(typos) == 0
+
+
+def test_imperfect_type_till_space_both_fuzzy_and_other_typo(noisy, seeded, monkeypatch):
+ with monkeypatch.context() as m:
+ # Set Gaussian standard deviation to 1, which means more chance
+ # to type outside of the intended key press (giving a fuzzy typo)
+ m.setattr(noisy, "x_ratio", 1)
+ m.setattr(noisy, "y_ratio", 1)
+
+ # Set probability of typo to 1 for this test, to ensure it's generated
+ m.setitem(noisy.probs, Typo.SIMPLIFY_CASE, 1)
+
+ keystrokes, typed, n_words, typos = noisy.type_till_space(["Hello", "there"])
+
+ assert len(keystrokes) == len("Hello")
+ assert typed != "Hello"
+ assert n_words == 1
+ assert len(typos) > 2
+ assert Typo.SIMPLIFY_CASE in typos and Typo.SUBSTITUTE_CHAR in typos
+
+
+def test_type_till_space_with_missing_space_typo(noisy, seeded, monkeypatch):
+ with monkeypatch.context() as m:
+ # Set probability of typo to 1 for this test, to ensure it's generated
+ m.setitem(noisy.probs, Typo.DELETE_SPACE, 1)
+ m.setattr(kebbie.noise_model, "FRONT_DELETION_MULTIPLIER", 1)
+
+ keystrokes, typed, n_words, typos = noisy.type_till_space(["Hello", "there"])
+
+ assert len(keystrokes) == len("Hello") + len("there")
+ assert typed == "Hellothere"
+ assert n_words == 2
+ assert len(typos) > 0 and all(t == Typo.DELETE_SPACE for t in typos)
diff --git a/tests/test_tokenizer.py b/tests/test_tokenizer.py
new file mode 100644
index 0000000..7624829
--- /dev/null
+++ b/tests/test_tokenizer.py
@@ -0,0 +1,56 @@
+import pytest
+
+from kebbie.tokenizer import BasicTokenizer
+
+
+@pytest.fixture
+def tokenizer():
+ return BasicTokenizer()
+
+
+@pytest.mark.parametrize("x", ['I"m', "I“m", "I”m", "I„m"])
+def test_normalization_of_double_quotes(tokenizer, x):
+ assert tokenizer.preprocess(x) == "I m"
+
+
+@pytest.mark.parametrize("x", ["I'm", "I’m", "Iʻm", "I‘m", "I´m", "Iʼm"])
+def test_normalization_of_single_quotes(tokenizer, x):
+ assert tokenizer.preprocess(x) == "I'm"
+
+
+@pytest.mark.parametrize("x", ["in-depth", "in–depth", "in—depth", "in‑depth", "in−depth", "inーdepth"])
+def test_normalization_of_dash(tokenizer, x):
+ assert tokenizer.preprocess(x) == "in-depth"
+
+
+@pytest.mark.parametrize(
+ "x, out",
+ [
+ ("this.", "this "),
+ ("this․", "this "),
+ ("this...", "this "),
+ ("this…", "this "),
+ ("this,", "this "),
+ ("this‚", "this "),
+ ],
+)
+def test_normalization_of_other_symbols(tokenizer, x, out):
+ assert tokenizer.preprocess(x) == out
+
+
+@pytest.mark.parametrize(
+ "x", ["a,b", "a, b", "a ,b", "a , b", "a . b", "a ... b", "a. b", "a... b", "a? b", "a !b", "a : b"]
+)
+def test_punctuations_removal(tokenizer, x):
+ assert tokenizer.preprocess(x) == "a b"
+
+
+@pytest.mark.parametrize(
+ "inp, out", [("Several words", ["Several", "words"]), (" Several words ", ["Several", "words"])]
+)
+def test_word_split(tokenizer, inp, out):
+ assert tokenizer.word_split(inp) == out
+
+
+def test_update_context(tokenizer):
+ assert tokenizer.update_context("This is ", "great") == "This is great "