Skip to content

Commit

Permalink
add tests
Browse files Browse the repository at this point in the history
  • Loading branch information
mh-northlander committed May 21, 2024
1 parent e3b33b5 commit 242828c
Show file tree
Hide file tree
Showing 2 changed files with 94 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
/*
* Copyright (c) 2024 Works Applications Co., Ltd.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.worksap.nlp.lucene.sudachi.ja.attributes

import com.worksap.nlp.sudachi.Config
import com.worksap.nlp.sudachi.DictionaryFactory
import com.worksap.nlp.sudachi.Morpheme
import com.worksap.nlp.test.TestDictionary
import com.worksap.nlp.lucene.aliases.ToXContent
import kotlin.test.Test
import kotlin.test.assertEquals
import kotlin.test.assertNull
import kotlin.test.assertTrue
import org.junit.Before
import org.junit.Rule

class MorphemeAttributeImplTest {
@JvmField @Rule var testDic = TestDictionary("system")

private lateinit var config: Config

fun getFirstMorpheme(text: String): Morpheme? {
val dict = DictionaryFactory().create(config)
val tok = dict.create()
val morphemes = tok.tokenize(text)

return if (morphemes.size == 0) null else morphemes.get(0)
}

@Before
fun setup() {
val configDir = testDic.root.toPath().resolve("config/sudachi")
config = Config.fromFile(configDir.resolve("sudachi.json"))
}

@Test
fun setMorpheme() {
var morphemeAtt = MorphemeAttributeImpl()
assertNull(morphemeAtt.getMorpheme())

val morph = getFirstMorpheme("東京都")!!
morphemeAtt.setMorpheme(morph)
assertEquals(morph, morphemeAtt.getMorpheme())
}

@Test
fun reflectMorpheme() {
var morphemeAtt = MorphemeAttributeImpl()
val morph = getFirstMorpheme("東京都")!!
morphemeAtt.setMorpheme(morph)

morphemeAtt.reflectWith(
fun(attClass, key, value) {
assertEquals(MorphemeAttribute::class.java, attClass)
assertEquals("morpheme", key)
assertTrue(value is ToXContent)
})
}
}
21 changes: 21 additions & 0 deletions test-scripts/01-integration-test.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,27 @@ def test_tokenize_using_sudachi_tokenizer(self):
self.assertEqual(6, tokens[3]["end_offset"])
return

def test_explain_tokenizer_details(self):
body = {"tokenizer": "sudachi_tokenizer",
"text": "すだち", "explain": True}
resp = es_instance.analyze(body)
self.assertEqual(200, resp.status)

morpheme = json.loads(resp.data)[
"detail"]["tokenizer"]["tokens"][0]["morpheme"]
self.assertIn("surface", morpheme)
self.assertEqual("すだち", morpheme["surface"])
self.assertIn("dictionaryForm", morpheme)
self.assertEqual("すだち", morpheme["dictionaryForm"])
self.assertIn("normalizedForm", morpheme)
self.assertEqual("酢橘", morpheme["normalizedForm"])
self.assertIn("readingForm", morpheme)
self.assertEqual("スダチ", morpheme["readingForm"])
self.assertIn("partOfSpeech", morpheme)
self.assertEqual(["名詞", "普通名詞", "一般", "*", "*", "*"],
morpheme["partOfSpeech"])
return


class TestICUFiltered(unittest.TestCase):
# requires analysis-icu plugin installed
Expand Down

0 comments on commit 242828c

Please sign in to comment.