diff --git a/.github/badges/coverage.svg b/.github/badges/coverage.svg
index 565169e..ac4943a 100644
--- a/.github/badges/coverage.svg
+++ b/.github/badges/coverage.svg
@@ -9,13 +9,13 @@
     </mask>
     <g mask="url(#a)">
         <path fill="#555" d="M0 0h63v20H0z"/>
-        <path fill="#e05d44" d="M63 0h36v20H63z"/>
+        <path fill="#fe7d37" d="M63 0h36v20H63z"/>
         <path fill="url(#b)" d="M0 0h99v20H0z"/>
     </g>
     <g fill="#fff" text-anchor="middle" font-family="DejaVu Sans,Verdana,Geneva,sans-serif" font-size="11">
         <text x="31.5" y="15" fill="#010101" fill-opacity=".3">coverage</text>
         <text x="31.5" y="14">coverage</text>
-        <text x="80" y="15" fill="#010101" fill-opacity=".3">28%</text>
-        <text x="80" y="14">28%</text>
+        <text x="80" y="15" fill="#010101" fill-opacity=".3">47%</text>
+        <text x="80" y="14">46%</text>
     </g>
 </svg>
diff --git a/kebbie/layout.py b/kebbie/layout.py
index 4efc1f1..4c020e7 100644
--- a/kebbie/layout.py
+++ b/kebbie/layout.py
@@ -124,7 +124,10 @@ def _extract_infos(  # noqa: C901
                         pass
                     else:
                         # Other special characters are ignored
-                        continue
+                        char = None
+
+                if char is None:
+                    continue
 
                 # Save the character and its key information
                 # Save it only if it's not already in a previous klayer
diff --git a/tests/conftest.py b/tests/conftest.py
new file mode 100644
index 0000000..cd4f487
--- /dev/null
+++ b/tests/conftest.py
@@ -0,0 +1,21 @@
+import random
+
+import pytest
+
+
+@pytest.fixture(autouse=True)
+def no_requests(monkeypatch):
+    # Make sure unit-tests don't use any resources pulled from the internet
+    # using `requests`
+    monkeypatch.delattr("requests.sessions.Session.request")
+
+
+@pytest.fixture(scope="session")
+def monkeypatch_session():
+    with pytest.MonkeyPatch.context() as mp:
+        yield mp
+
+
+@pytest.fixture
+def seeded():
+    random.seed(36)
diff --git a/tests/test_gesture.py b/tests/test_gesture.py
new file mode 100644
index 0000000..2493c85
--- /dev/null
+++ b/tests/test_gesture.py
@@ -0,0 +1,29 @@
+from kebbie.gesture import MAX_N_POINTS_PER_DIST, MIN_N_POINTS_PER_DIST, make_swipe_gesture
+from kebbie.utils import euclidian_dist
+
+
+def test_make_swipe_gesture_between_2_points(seeded):
+    control_points = [(0, 0), (100, 100)]
+
+    points = make_swipe_gesture(control_points)
+
+    d = euclidian_dist(control_points[0], control_points[1])
+    assert int(d * MIN_N_POINTS_PER_DIST) <= len(points) <= int(d * MAX_N_POINTS_PER_DIST)
+
+
+def test_make_swipe_gesture_between_more_points(seeded):
+    points = make_swipe_gesture([(0, 0), (100, 100), (50, 80), (-100, 10)])
+    assert len(points) > 4
+
+
+def test_make_swipe_gesture_single_control_point():
+    assert make_swipe_gesture([(0, 0)]) == [(0, 0)]
+
+
+def test_make_swipe_gesture_same_points():
+    assert make_swipe_gesture([(0, 0), (0, 0)]) == [(0, 0)]
+
+
+def test_make_swipe_gesture_too_small_points(seeded):
+    points = make_swipe_gesture([(0, 0), (0.1, 0)])
+    assert len(points) > 2
diff --git a/tests/test_layout.py b/tests/test_layout.py
new file mode 100644
index 0000000..017f7a3
--- /dev/null
+++ b/tests/test_layout.py
@@ -0,0 +1,93 @@
+import pytest
+
+from kebbie.layout import LayoutHelper
+
+
+@pytest.fixture
+def layout():
+    return LayoutHelper()
+
+
+def test_get_existing_key_info_same_layer(layout):
+    q_info = layout.get_key_info("q")
+    w_info = layout.get_key_info("w")
+
+    q_width, q_height, q_x_center, q_y_center, q_layer = q_info
+    w_width, w_height, w_x_center, w_y_center, w_layer = w_info
+    assert q_width == w_width
+    assert q_height == w_height
+    assert q_x_center < w_x_center
+    assert q_y_center == w_y_center
+    assert q_layer == w_layer
+
+
+def test_get_existing_key_info_different_layer(layout):
+    q_info = layout.get_key_info("q")
+    uq_info = layout.get_key_info("Q")
+
+    q_width, q_height, q_x_center, q_y_center, q_layer = q_info
+    uq_width, uq_height, uq_x_center, uq_y_center, uq_layer = uq_info
+    assert q_width == uq_width
+    assert q_height == uq_height
+    assert q_x_center == uq_x_center
+    assert q_y_center == uq_y_center
+    assert q_layer < uq_layer
+
+
+def test_get_existing_key_info_accent(layout):
+    e_info = layout.get_key_info("e")
+    é_info = layout.get_key_info("é")
+
+    e_width, e_height, e_x_center, e_y_center, e_layer = e_info
+    é_width, é_height, é_x_center, é_y_center, é_layer = é_info
+    assert e_width == é_width
+    assert e_height == é_height
+    assert e_layer < é_layer
+
+
+def test_get_non_existing_key_info(layout):
+    with pytest.raises(KeyError):
+        layout.get_key_info("☯")
+
+
+def test_get_key_within_bounds(layout):
+    f_info = layout.get_key_info("f")
+    f_width, f_height, f_x_center, f_y_center, f_layer = f_info
+
+    assert layout.get_key((f_x_center, f_y_center), 0) == "f"
+    assert layout.get_key((f_x_center + f_width / 3, f_y_center + f_height / 3), 0) == "f"
+    assert layout.get_key((f_x_center - f_width / 3, f_y_center - f_height / 3), 0) == "f"
+
+    assert layout.get_key((f_x_center, f_y_center), 1) == "F"
+
+
+def test_get_key_outside_of_bounds(layout):
+    f_info = layout.get_key_info("f")
+    f_width, f_height, f_x_center, f_y_center, f_layer = f_info
+
+    assert layout.get_key((f_x_center + f_width + 1, f_y_center), 0) != "f"
+    assert layout.get_key((f_x_center, f_y_center + f_height + 1), 0) != "f"
+
+
+def test_get_closest_border_key(layout):
+    assert layout.get_key((-5000, -5000), 0) == "q"
+
+
+def test_ignore_additional_keyboard_layers():
+    layout = LayoutHelper(ignore_layers_after=0)
+    with pytest.raises(KeyError):
+        layout.get_key_info("Q")
+
+
+@pytest.mark.parametrize("k", [" ", "."])
+def test_special_keys_that_should_exist(layout, k):
+    try:
+        layout.get_key_info(k)
+    except KeyError:
+        pytest.fail(f"Key `{k}` is not part of the layout")
+
+
+@pytest.mark.parametrize("k", ["shift", "SHIFT", "mic"])
+def test_special_keys_that_should_not_exist(layout, k):
+    with pytest.raises(KeyError):
+        layout.get_key_info(k)
diff --git a/tests/test_noise_model.py b/tests/test_noise_model.py
new file mode 100644
index 0000000..8763cbd
--- /dev/null
+++ b/tests/test_noise_model.py
@@ -0,0 +1,383 @@
+import shutil
+
+import pytest
+import requests
+
+import kebbie
+from kebbie.noise_model import NoiseModel, Typo
+
+
+@pytest.fixture(scope="session")
+def tmp_cache():
+    return "/tmp/kebbie_test"
+
+
+class MockCommonTypos:
+    def __init__(self):
+        self.text = "\n".join(
+            [
+                "\t".join(
+                    ["intvite", "invite", "IN", "in<t>vite", "google_wave_intvite(2)", "google_wave_invite(38802)"]
+                ),
+                "\t".join(["goole", "google", "RM", "goo(g)le", "my_goole_wave(1)", "my_google_wave(35841)"]),
+                "\t".join(["goolge", "google", "R1", "goo[l/g]e", "a_goolge_wave(1)", "a_google_wave(42205)"]),
+                "\t".join(["waze", "wave", "R2", "wa[z:v]e", "google_waxe_invite(2)", "google_wave_invite(38802)"]),
+            ]
+        )
+
+
+@pytest.fixture(scope="session")
+def noisy(monkeypatch_session, tmp_cache):
+    # Change the cache directory to a temporary folder, to not impact the
+    # current cache
+    monkeypatch_session.setattr(kebbie.noise_model, "CACHE_DIR", tmp_cache)
+
+    # Make sure the cache folder is empty
+    try:
+        shutil.rmtree(tmp_cache)
+    except FileNotFoundError:
+        pass
+
+    # Patch `requests` temporarily, so a custom list of common typos is used
+    with monkeypatch_session.context() as m:
+
+        def mock_get(*args, **kwargs):
+            return MockCommonTypos()
+
+        m.setattr(requests, "get", mock_get)
+
+        # Create a clean noise model (which will populate the cache with the
+        # mocked list of common typos)
+        # Note that we initialize it with all typo probabilities set to 0, and
+        # each test will individually change these probabilities
+        return NoiseModel(lang="en-US", typo_probs={t: 0.0 for t in Typo}, x_ratio=float("inf"), y_ratio=float("inf"))
+
+
+def test_retrieve_common_typos_cached(noisy):
+    # The common typos were retrieved in the fixture, and cached
+    # So, when rebuilding another noise model, the data from the cache should
+    # be retrieved (without pulling the data from the internet)
+    noisy2 = NoiseModel("en-US")
+    assert noisy.common_typos == noisy2.common_typos
+
+
+def test_common_typos_with_unsupported_language(noisy):
+    noisy2 = NoiseModel("en-US")
+    noisy2.lang = "fr-FR"  # We don't have a list of common typos for French
+    assert noisy2._get_common_typos() == {}
+
+
+@pytest.mark.parametrize(
+    "x", ["great", "Great", "GREAT", "grEAT", "éthéré", "éthÉré", "한국", "I'm", "in-depth", "gr8t"]
+)
+def test_correctable_words(noisy, x):
+    assert noisy._is_correctable(x)
+
+
+@pytest.mark.parametrize("x", ["667", ";", "???"])
+def test_non_correctable_words(noisy, x):
+    assert not noisy._is_correctable(x)
+
+
+def test_create_swipe_gesture_for_correctable_word(noisy, seeded):
+    points = noisy.swipe("make")
+    assert len(points) > 4
+
+
+def test_cant_create_swipe_gesture_for_non_correctable_word(noisy, seeded):
+    assert noisy.swipe("667") is None
+
+
+def test_perfect_fuzzy_type_normal_word(noisy, seeded):
+    word = "great"
+    keystrokes, typed_word, typos = noisy._fuzzy_type(word)
+
+    assert len(keystrokes) == len(word)
+    assert all(k is not None for k in keystrokes)
+    assert typed_word == word
+    assert len(typos) == 0
+
+
+def test_perfect_fuzzy_type_word_with_characters_from_another_layer(noisy, seeded):
+    word = "greAt"
+    keystrokes, typed_word, typos = noisy._fuzzy_type(word)
+
+    assert len(keystrokes) == len(word)
+    assert keystrokes[3] is None
+    assert typed_word == word
+    assert len(typos) == 0
+
+
+def test_perfect_fuzzy_type_word_with_unknown_characters(noisy, seeded):
+    word = "gr☂t"
+    keystrokes, typed_word, typos = noisy._fuzzy_type(word)
+
+    assert len(keystrokes) == len(word)
+    assert keystrokes[2] is None
+    assert typed_word == word
+    assert len(typos) == 0
+
+
+def test_very_fuzzy_typing(noisy, seeded, monkeypatch):
+    word = "great"
+    with monkeypatch.context() as m:
+        # Set Gaussian standard deviation to 1, which means more chance
+        # to type outside of the intended key press (giving a fuzzy typo)
+        m.setattr(noisy, "x_ratio", 1)
+        m.setattr(noisy, "y_ratio", 1)
+
+        keystrokes, typed_word, typos = noisy._fuzzy_type(word)
+
+    assert len(keystrokes) == len(word)
+    assert typed_word != word
+    assert len(typos) > 0
+    assert all(t == Typo.SUBSTITUTE_CHAR for t in typos)
+
+
+def test_introduce_typos_no_typos(noisy):
+    s = "This"
+    noisy_s, typos = noisy._introduce_typos(s, error_free=True)
+    assert noisy_s == s
+    assert len(typos) == 0
+
+
+def test_introduce_typos_common_typos(noisy, seeded, monkeypatch):
+    with monkeypatch.context() as m:
+        # Set probability of typo to 1 for this test, to ensure it's generated
+        m.setitem(noisy.probs, Typo.COMMON_TYPO, 1)
+
+        noisy_s, typos = noisy._introduce_typos("wave")
+
+    assert noisy_s == "waze"
+    assert len(typos) == 1 and typos[0] == Typo.COMMON_TYPO
+
+
+def test_introduce_typos_simplify_accent(noisy, seeded, monkeypatch):
+    with monkeypatch.context() as m:
+        # Set probability of typo to 1 for this test, to ensure it's generated
+        m.setitem(noisy.probs, Typo.SIMPLIFY_ACCENT, 1)
+
+        noisy_s, typos = noisy._introduce_typos("cassé")
+
+    assert noisy_s == "casse"
+    assert len(typos) == 1 and typos[0] == Typo.SIMPLIFY_ACCENT
+
+
+def test_introduce_typos_simplify_case(noisy, seeded, monkeypatch):
+    with monkeypatch.context() as m:
+        # Set probability of typo to 1 for this test, to ensure it's generated
+        m.setitem(noisy.probs, Typo.SIMPLIFY_CASE, 1)
+
+        noisy_s, typos = noisy._introduce_typos("This")
+
+    assert noisy_s == "this"
+    assert len(typos) == 1 and typos[0] == Typo.SIMPLIFY_CASE
+
+
+def test_introduce_typos_dont_simplify_case_for_full_uppercase(noisy, seeded, monkeypatch):
+    s = "HELLYEAH"
+    with monkeypatch.context() as m:
+        # Set probability of typo to 1 for this test, to ensure it's generated
+        m.setitem(noisy.probs, Typo.SIMPLIFY_CASE, 1)
+
+        noisy_s, typos = noisy._introduce_typos(s)
+
+    assert noisy_s == s
+    assert len(typos) == 0
+
+
+def test_introduce_typos_transposition(noisy, seeded, monkeypatch):
+    with monkeypatch.context() as m:
+        # Set probability of typo to 1 for this test, to ensure it's generated
+        m.setitem(noisy.probs, Typo.TRANSPOSE_CHAR, 1)
+
+        noisy_s, typos = noisy._introduce_typos("hi")
+
+    assert noisy_s == "ih"
+    assert len(typos) == 1 and typos[0] == Typo.TRANSPOSE_CHAR
+
+
+def test_introduce_typos_no_transposition_on_different_layer(noisy, seeded, monkeypatch):
+    s = "Hi"
+    with monkeypatch.context() as m:
+        # Set probability of typo to 1 for this test, to ensure it's generated
+        m.setitem(noisy.probs, Typo.TRANSPOSE_CHAR, 1)
+
+        noisy_s, typos = noisy._introduce_typos(s)
+
+    assert noisy_s == s
+    assert len(typos) == 0
+
+
+def test_introduce_typos_no_transposition_on_unknown_character(noisy, seeded, monkeypatch):
+    s = "a⛬"
+    with monkeypatch.context() as m:
+        # Set probability of typo to 1 for this test, to ensure it's generated
+        m.setitem(noisy.probs, Typo.TRANSPOSE_CHAR, 1)
+
+        noisy_s, typos = noisy._introduce_typos(s)
+
+    assert noisy_s == s
+    assert len(typos) == 0
+
+
+def test_introduce_typos_delete_spelling_symbol(noisy, seeded, monkeypatch):
+    with monkeypatch.context() as m:
+        # Set probability of typo to 1 for this test, to ensure it's generated
+        m.setitem(noisy.probs, Typo.DELETE_SPELLING_SYMBOL, 1)
+
+        noisy_s, typos = noisy._introduce_typos("I'm")
+
+    assert noisy_s == "Im"
+    assert len(typos) == 1 and typos[0] == Typo.DELETE_SPELLING_SYMBOL
+
+
+def test_introduce_typos_add_spelling_symbol(noisy, seeded, monkeypatch):
+    with monkeypatch.context() as m:
+        # Set probability of typo to 1 for this test, to ensure it's generated
+        m.setitem(noisy.probs, Typo.ADD_SPELLING_SYMBOL, 1)
+
+        noisy_s, typos = noisy._introduce_typos("I'm")
+
+    assert noisy_s == "I''m"
+    assert len(typos) == 1 and typos[0] == Typo.ADD_SPELLING_SYMBOL
+
+
+def test_introduce_typos_delete_space(noisy, seeded, monkeypatch):
+    with monkeypatch.context() as m:
+        # Set probability of typo to 1 for this test, to ensure it's generated
+        m.setitem(noisy.probs, Typo.DELETE_SPACE, 1)
+
+        noisy_s, typos = noisy._introduce_typos("This is")
+
+    assert noisy_s == "Thisis"
+    assert len(typos) == 1 and typos[0] == Typo.DELETE_SPACE
+
+
+def test_introduce_typos_delete_single_space(noisy, seeded, monkeypatch):
+    with monkeypatch.context() as m:
+        # Set probability of typo to 1 for this test, to ensure it's generated
+        m.setitem(noisy.probs, Typo.DELETE_SPACE, 1)
+
+        noisy_s, typos = noisy._introduce_typos(" ")
+
+    assert noisy_s == ""
+    assert len(typos) == 1 and typos[0] == Typo.DELETE_SPACE
+
+
+def test_introduce_typos_add_space(noisy, seeded, monkeypatch):
+    with monkeypatch.context() as m:
+        # Set probability of typo to 1 for this test, to ensure it's generated
+        m.setitem(noisy.probs, Typo.ADD_SPACE, 1)
+
+        noisy_s, typos = noisy._introduce_typos("This is")
+
+    assert noisy_s == "This  is"
+    assert len(typos) == 1 and typos[0] == Typo.ADD_SPACE
+
+
+def test_introduce_typos_delete_punctuation(noisy, seeded, monkeypatch):
+    with monkeypatch.context() as m:
+        # Set probability of typo to 1 for this test, to ensure it's generated
+        m.setitem(noisy.probs, Typo.DELETE_PUNCTUATION, 1)
+
+        noisy_s, typos = noisy._introduce_typos("This, and")
+
+    assert noisy_s == "This and"
+    assert len(typos) == 1 and typos[0] == Typo.DELETE_PUNCTUATION
+
+
+def test_introduce_typos_add_punctuation(noisy, seeded, monkeypatch):
+    with monkeypatch.context() as m:
+        # Set probability of typo to 1 for this test, to ensure it's generated
+        m.setitem(noisy.probs, Typo.ADD_PUNCTUATION, 1)
+
+        noisy_s, typos = noisy._introduce_typos("This, and")
+
+    assert noisy_s == "This,, and"
+    assert len(typos) == 1 and typos[0] == Typo.ADD_PUNCTUATION
+
+
+def test_introduce_typos_delete_char(noisy, seeded, monkeypatch):
+    with monkeypatch.context() as m:
+        # Set probability of typo to 1 for this test, to ensure it's generated
+        m.setitem(noisy.probs, Typo.DELETE_CHAR, 1)
+
+        noisy_s, typos = noisy._introduce_typos("hi")
+
+    # Note, the last character is never deleted, because this is an auto-completion case
+    assert noisy_s == "i"
+    assert len(typos) == 1 and typos[0] == Typo.DELETE_CHAR
+
+
+def test_introduce_typos_add_char(noisy, seeded, monkeypatch):
+    with monkeypatch.context() as m:
+        # Set probability of typo to 1 for this test, to ensure it's generated
+        m.setitem(noisy.probs, Typo.ADD_CHAR, 1)
+
+        noisy_s, typos = noisy._introduce_typos("hi")
+
+    assert noisy_s == "hhii"
+    assert len(typos) == 2 and all(t == Typo.ADD_CHAR for t in typos)
+
+
+def test_introduce_typos_dont_add_typos_on_numbers(noisy, seeded, monkeypatch):
+    with monkeypatch.context() as m:
+        # Set probability of typo to 1 for this test, to ensure it's generated
+        m.setitem(noisy.probs, Typo.ADD_CHAR, 1)
+
+        noisy_s, typos = noisy._introduce_typos("he11o")
+
+    assert noisy_s == "hhee11oo"
+
+
+def test_introduce_typos_dont_add_typos_on_unknown_characters(noisy, seeded, monkeypatch):
+    with monkeypatch.context() as m:
+        # Set probability of typo to 1 for this test, to ensure it's generated
+        m.setitem(noisy.probs, Typo.ADD_CHAR, 1)
+
+        noisy_s, typos = noisy._introduce_typos("h⛬e")
+
+    assert noisy_s == "hh⛬ee"
+
+
+def test_perfect_type_till_space(noisy, seeded):
+    keystrokes, typed, n_words, typos = noisy.type_till_space(["This", "is", "great"])
+
+    assert len(keystrokes) == len("This")
+    assert typed == "This"
+    assert n_words == 1
+    assert len(typos) == 0
+
+
+def test_imperfect_type_till_space_both_fuzzy_and_other_typo(noisy, seeded, monkeypatch):
+    with monkeypatch.context() as m:
+        # Set Gaussian standard deviation to 1, which means more chance
+        # to type outside of the intended key press (giving a fuzzy typo)
+        m.setattr(noisy, "x_ratio", 1)
+        m.setattr(noisy, "y_ratio", 1)
+
+        # Set probability of typo to 1 for this test, to ensure it's generated
+        m.setitem(noisy.probs, Typo.SIMPLIFY_CASE, 1)
+
+        keystrokes, typed, n_words, typos = noisy.type_till_space(["Hello", "there"])
+
+    assert len(keystrokes) == len("Hello")
+    assert typed != "Hello"
+    assert n_words == 1
+    assert len(typos) > 2
+    assert Typo.SIMPLIFY_CASE in typos and Typo.SUBSTITUTE_CHAR in typos
+
+
+def test_type_till_space_with_missing_space_typo(noisy, seeded, monkeypatch):
+    with monkeypatch.context() as m:
+        # Set probability of typo to 1 for this test, to ensure it's generated
+        m.setitem(noisy.probs, Typo.DELETE_SPACE, 1)
+        m.setattr(kebbie.noise_model, "FRONT_DELETION_MULTIPLIER", 1)
+
+        keystrokes, typed, n_words, typos = noisy.type_till_space(["Hello", "there"])
+
+    assert len(keystrokes) == len("Hello") + len("there")
+    assert typed == "Hellothere"
+    assert n_words == 2
+    assert len(typos) > 0 and all(t == Typo.DELETE_SPACE for t in typos)
diff --git a/tests/test_tokenizer.py b/tests/test_tokenizer.py
new file mode 100644
index 0000000..7624829
--- /dev/null
+++ b/tests/test_tokenizer.py
@@ -0,0 +1,56 @@
+import pytest
+
+from kebbie.tokenizer import BasicTokenizer
+
+
+@pytest.fixture
+def tokenizer():
+    return BasicTokenizer()
+
+
+@pytest.mark.parametrize("x", ['I"m', "I“m", "I”m", "I„m"])
+def test_normalization_of_double_quotes(tokenizer, x):
+    assert tokenizer.preprocess(x) == "I m"
+
+
+@pytest.mark.parametrize("x", ["I'm", "I’m", "Iʻm", "I‘m", "I´m", "Iʼm"])
+def test_normalization_of_single_quotes(tokenizer, x):
+    assert tokenizer.preprocess(x) == "I'm"
+
+
+@pytest.mark.parametrize("x", ["in-depth", "in–depth", "in—depth", "in‑depth", "in−depth", "inーdepth"])
+def test_normalization_of_dash(tokenizer, x):
+    assert tokenizer.preprocess(x) == "in-depth"
+
+
+@pytest.mark.parametrize(
+    "x, out",
+    [
+        ("this.", "this "),
+        ("this․", "this "),
+        ("this...", "this "),
+        ("this…", "this "),
+        ("this,", "this "),
+        ("this‚", "this "),
+    ],
+)
+def test_normalization_of_other_symbols(tokenizer, x, out):
+    assert tokenizer.preprocess(x) == out
+
+
+@pytest.mark.parametrize(
+    "x", ["a,b", "a, b", "a ,b", "a , b", "a . b", "a ... b", "a. b", "a... b", "a?  b", "a  !b", "a  :    b"]
+)
+def test_punctuations_removal(tokenizer, x):
+    assert tokenizer.preprocess(x) == "a b"
+
+
+@pytest.mark.parametrize(
+    "inp, out", [("Several words", ["Several", "words"]), (" Several words ", ["Several", "words"])]
+)
+def test_word_split(tokenizer, inp, out):
+    assert tokenizer.word_split(inp) == out
+
+
+def test_update_context(tokenizer):
+    assert tokenizer.update_context("This is ", "great") == "This is great "