From fd4561c7dbc09ab9958942dd9db7a66291f2f075 Mon Sep 17 00:00:00 2001 From: yokomotod Date: Thu, 24 Jun 2021 14:21:01 +0900 Subject: [PATCH] fix: add fallback for not in trie words --- sudachipy/dictionarylib/doublearraylexicon.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/sudachipy/dictionarylib/doublearraylexicon.py b/sudachipy/dictionarylib/doublearraylexicon.py index 39337f8..fb77ee8 100644 --- a/sudachipy/dictionarylib/doublearraylexicon.py +++ b/sudachipy/dictionarylib/doublearraylexicon.py @@ -79,13 +79,21 @@ def size(self) -> int: def get_word_id(self, headword: str, pos_id: int, reading_form: str) -> int: for wid, _ in self.lookup(headword.encode('utf-8'), 0): - info = self.word_infos.get_word_info(wid) - if info.surface == headword \ - and info.pos_id == pos_id \ - and info.reading_form == reading_form: + if self._compare_word_id(wid, headword, pos_id, reading_form): return wid + + for wid in range(self.word_infos.size()): + if self._compare_word_id(wid, headword, pos_id, reading_form): + return wid + return -1 + def _compare_word_id(self, wid: int, headword: str, pos_id: int, reading_form: str) -> bool: + info = self.word_infos.get_word_info(wid) + return info.surface == headword \ + and info.pos_id == pos_id \ + and info.reading_form == reading_form + def get_dictionary_id(self, word_id: int) -> int: return 0