Release v2.1.0
- Support Sudachi tokenizer: #20
from tiny_tokenizer import SentenceTokenizer
from tiny_tokenizer import WordTokenizer
if __name__ == "__main__":
sentence_tokenizer = SentenceTokenizer()
tokenizer = WordTokenizer(tokenizer="Sudachi", mode="A")
# ^^^^^^^^
# You can choose splitting mode.
#
# (https://github.com/WorksApplications/SudachiPy#as-a-python-package)
#
sentence = "我輩は猫である."
print("input: ", sentence)
result = tokenizer.tokenize(sentence)
print(result)