From 242828c5235d58da0c8bb055487848c74d9f6c00 Mon Sep 17 00:00:00 2001 From: mh-northlander Date: Tue, 21 May 2024 11:46:03 +0900 Subject: [PATCH] add tests --- .../attributes/MorphemeAttributeImplTest.kt | 73 +++++++++++++++++++ test-scripts/01-integration-test.py | 21 ++++++ 2 files changed, 94 insertions(+) create mode 100644 src/test/java/com/worksap/nlp/lucene/sudachi/ja/attributes/MorphemeAttributeImplTest.kt diff --git a/src/test/java/com/worksap/nlp/lucene/sudachi/ja/attributes/MorphemeAttributeImplTest.kt b/src/test/java/com/worksap/nlp/lucene/sudachi/ja/attributes/MorphemeAttributeImplTest.kt new file mode 100644 index 0000000..1df0434 --- /dev/null +++ b/src/test/java/com/worksap/nlp/lucene/sudachi/ja/attributes/MorphemeAttributeImplTest.kt @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2024 Works Applications Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.worksap.nlp.lucene.sudachi.ja.attributes + +import com.worksap.nlp.sudachi.Config +import com.worksap.nlp.sudachi.DictionaryFactory +import com.worksap.nlp.sudachi.Morpheme +import com.worksap.nlp.test.TestDictionary +import com.worksap.nlp.lucene.aliases.ToXContent +import kotlin.test.Test +import kotlin.test.assertEquals +import kotlin.test.assertNull +import kotlin.test.assertTrue +import org.junit.Before +import org.junit.Rule + +class MorphemeAttributeImplTest { + @JvmField @Rule var testDic = TestDictionary("system") + + private lateinit var config: Config + + fun getFirstMorpheme(text: String): Morpheme? { + val dict = DictionaryFactory().create(config) + val tok = dict.create() + val morphemes = tok.tokenize(text) + + return if (morphemes.size == 0) null else morphemes.get(0) + } + + @Before + fun setup() { + val configDir = testDic.root.toPath().resolve("config/sudachi") + config = Config.fromFile(configDir.resolve("sudachi.json")) + } + + @Test + fun setMorpheme() { + var morphemeAtt = MorphemeAttributeImpl() + assertNull(morphemeAtt.getMorpheme()) + + val morph = getFirstMorpheme("東京都")!! + morphemeAtt.setMorpheme(morph) + assertEquals(morph, morphemeAtt.getMorpheme()) + } + + @Test + fun reflectMorpheme() { + var morphemeAtt = MorphemeAttributeImpl() + val morph = getFirstMorpheme("東京都")!! + morphemeAtt.setMorpheme(morph) + + morphemeAtt.reflectWith( + fun(attClass, key, value) { + assertEquals(MorphemeAttribute::class.java, attClass) + assertEquals("morpheme", key) + assertTrue(value is ToXContent) + }) + } +} diff --git a/test-scripts/01-integration-test.py b/test-scripts/01-integration-test.py index b310391..383b8db 100644 --- a/test-scripts/01-integration-test.py +++ b/test-scripts/01-integration-test.py @@ -97,6 +97,27 @@ def test_tokenize_using_sudachi_tokenizer(self): self.assertEqual(6, tokens[3]["end_offset"]) return + def test_explain_tokenizer_details(self): + body = {"tokenizer": "sudachi_tokenizer", + "text": "すだち", "explain": True} + resp = es_instance.analyze(body) + self.assertEqual(200, resp.status) + + morpheme = json.loads(resp.data)[ + "detail"]["tokenizer"]["tokens"][0]["morpheme"] + self.assertIn("surface", morpheme) + self.assertEqual("すだち", morpheme["surface"]) + self.assertIn("dictionaryForm", morpheme) + self.assertEqual("すだち", morpheme["dictionaryForm"]) + self.assertIn("normalizedForm", morpheme) + self.assertEqual("酢橘", morpheme["normalizedForm"]) + self.assertIn("readingForm", morpheme) + self.assertEqual("スダチ", morpheme["readingForm"]) + self.assertIn("partOfSpeech", morpheme) + self.assertEqual(["名詞", "普通名詞", "一般", "*", "*", "*"], + morpheme["partOfSpeech"]) + return + class TestICUFiltered(unittest.TestCase): # requires analysis-icu plugin installed