Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Deprecate dictionary factory #259

Merged
merged 2 commits into from
Dec 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions src/main/java/com/worksap/nlp/sudachi/Dictionary.java
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,19 @@
*/
public interface Dictionary extends AutoCloseable {

/**
* Creates {@code Dictionary} from configuration.
*
* @param config
* configuration of the dictionary to create
* @return {@link Dictionary}
* @throws IOException
* if reading a file is failed
*/
public static Dictionary load(Config config) throws IOException {
return new JapaneseDictionary(config);
}

/**
* Creates a tokenizer instance.
*
Expand Down
6 changes: 6 additions & 0 deletions src/main/java/com/worksap/nlp/sudachi/DictionaryFactory.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,10 @@

/**
* Build a {@link Dictionary} instance from a dictionary file.
*
* @deprecated use {@link Dictionary#load} instead
*/
@Deprecated
public class DictionaryFactory {

/**
Expand All @@ -47,7 +50,10 @@ public Dictionary create() throws IOException {
* @return {@link Dictionary}
* @throws IOException
* if reading a file is failed
*
* @deprecated use {@link Dictionary#load(Config)} instead
*/
@Deprecated
public Dictionary create(Config config) throws IOException {
return new JapaneseDictionary(config);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ class JapaneseDictionaryTest {
@Test
fun instantiateConfigWithoutCharDef() {
val config = setupMinimumConfig()
val jdict = DictionaryFactory().create(config)
val jdict = Dictionary.load(config)

assertNotNull(jdict)
assertNotNull(jdict.tokenizer())
Expand All @@ -87,7 +87,7 @@ class JapaneseDictionaryTest {
@Test
fun throwExceptionOnDictionaryUsageAfterClose() {
val config = setupMinimumConfig()
val jdict = DictionaryFactory().create(config)
val jdict = Dictionary.load(config)
jdict.close()

assertFailsWith(IllegalStateException::class) { jdict.tokenizer() }
Expand All @@ -96,7 +96,7 @@ class JapaneseDictionaryTest {
@Test
fun throwExceptionOnTokenizerUsageAfterClose() {
val config = setupMinimumConfig()
val jdict = DictionaryFactory().create(config)
val jdict = Dictionary.load(config)
val tok = jdict.tokenizer()
jdict.close()

Expand Down Expand Up @@ -189,7 +189,7 @@ abc,1,1,4675,AbC,名詞,普通名詞,一般,*,*,*,エービーシー,,,,,""")
.clearUserDictionaries()
.systemDictionary(sdict)
.addUserDictionary(udict)
val mdict = DictionaryFactory().create(cfg)
val mdict = Dictionary.load(cfg)

val found = mdict.lookup("ABC")
assertEquals(4, found.size)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2022 Works Applications Co., Ltd.
* Copyright (c) 2022-2024 Works Applications Co., Ltd.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -40,7 +40,7 @@ class JapaneseTokenizerMaskTest {
cfg0.addOovProviderPlugin(CaptureOtherWords::class.java)
cfg0.addOovProviderPlugin(SimpleOovProviderPlugin::class.java)
val cfg = cfg0.withFallback(TestDictionary.user0Cfg())
val dic = DictionaryFactory().create(cfg) as JapaneseDictionary
val dic = Dictionary.load(cfg) as JapaneseDictionary
val tokenizer = dic.tokenizer()

assertEquals(2, dic.oovProviderPlugins.size)
Expand All @@ -61,7 +61,7 @@ class JapaneseTokenizerMaskTest {
fun correctMasksWithSecondProvider() {
val cfg = TestDictionary.user0Cfg()
cfg.addOovProviderPlugin(CaptureOtherWords::class.java)
val dic = DictionaryFactory().create(cfg) as JapaneseDictionary
val dic = Dictionary.load(cfg) as JapaneseDictionary
val tokenizer = dic.tokenizer()

assertIs<SimpleOovProviderPlugin>(dic.oovProviderPlugins[0])
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -353,7 +353,7 @@ public void zeroLengthMorpheme() {
@Test
public void disableEmptyMorpheme() throws IOException {
Config config = TestDictionary.INSTANCE.user1Cfg();
dict = new DictionaryFactory().create(Config.empty().withFallback(config).allowEmptyMorpheme(false));
dict = Dictionary.load(Config.empty().withFallback(config).allowEmptyMorpheme(false));
tokenizer = (JapaneseTokenizer) dict.tokenizer();

List<Morpheme> s = tokenizer.tokenize("…");
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2017-2022 Works Applications Co., Ltd.
* Copyright (c) 2017-2024 Works Applications Co., Ltd.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -32,7 +32,7 @@ public class JoinNumericPluginTest {
public void setUp() throws IOException {
Config config = TestDictionary.INSTANCE.user0Cfg()
.characterDefinition(getClass().getClassLoader().getResource("joinnumeric/char.def"));
Dictionary dict = new DictionaryFactory().create(config);
Dictionary dict = Dictionary.load(config);
tokenizer = (JapaneseTokenizer) dict.tokenizer();

plugin = new JoinNumericPlugin();
Expand Down
14 changes: 7 additions & 7 deletions src/test/java/com/worksap/nlp/sudachi/OovProviderPluginTest.kt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2022 Works Applications Co., Ltd.
* Copyright (c) 2022-2024 Works Applications Co., Ltd.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -49,7 +49,7 @@ class OovProviderPluginTest {
val cfg = TestDictionary.user0Cfg()
cfg.addOovProviderPlugin(FakeOovProvider::class.java)
.addList("pos", "名詞", "普通名詞", "一般", "*", "*", "*")
val inst = DictionaryFactory().create(cfg) as JapaneseDictionary
val inst = Dictionary.load(cfg) as JapaneseDictionary
val plugin = assertIs<FakeOovProvider>(inst.oovProviderPlugins.last())
assertEquals(4, plugin.posId)
}
Expand All @@ -60,7 +60,7 @@ class OovProviderPluginTest {
cfg.addOovProviderPlugin(FakeOovProvider::class.java)
.addList("pos", "名詞", "普通名詞", "一般", "*", "*", "new")
.add(USER_POS, USER_POS_ALLOW)
val inst = DictionaryFactory().create(cfg) as JapaneseDictionary
val inst = Dictionary.load(cfg) as JapaneseDictionary
val plugin = assertIs<FakeOovProvider>(inst.oovProviderPlugins.last())
assertEquals(8, plugin.posId)
}
Expand All @@ -71,15 +71,15 @@ class OovProviderPluginTest {
cfg.addOovProviderPlugin(FakeOovProvider::class.java)
.addList("pos", "名詞", "普通名詞", "一般", "*", "*", "*")
.add(USER_POS, "test")
assertFails { DictionaryFactory().create(cfg) }
assertFails { Dictionary.load(cfg) }
}

@Test
fun failInvalidPos() {
val cfg = TestDictionary.user0Cfg()
cfg.addOovProviderPlugin(FakeOovProvider::class.java)
.addList("pos", "名詞", "普通名詞", "一般", "*", "*", "test")
assertFails { DictionaryFactory().create(cfg) }
assertFails { Dictionary.load(cfg) }
}

@Test
Expand All @@ -91,7 +91,7 @@ class OovProviderPluginTest {
cfg.addOovProviderPlugin(FakeOovProvider::class.java)
.addList("pos", "名詞", "普通名詞", "一般", "*", "*", "new")
.add(USER_POS, USER_POS_ALLOW)
val inst = DictionaryFactory().create(cfg) as JapaneseDictionary
val inst = Dictionary.load(cfg) as JapaneseDictionary
val oovPlugins = inst.oovProviderPlugins
val p1 = assertIs<FakeOovProvider>(oovPlugins[oovPlugins.size - 2])
assertEquals(8, p1.posId)
Expand All @@ -105,7 +105,7 @@ class OovProviderPluginTest {
cfg.addOovProviderPlugin(FakeOovProvider::class.java)
.addList("pos", "名詞", "普通名詞", "一般", "*", "*", "new")
.add(USER_POS, USER_POS_ALLOW)
val dict = DictionaryFactory().create(cfg) as JapaneseDictionary
val dict = Dictionary.load(cfg) as JapaneseDictionary
val plugin = assertIs<FakeOovProvider>(dict.oovProviderPlugins.last())
assertEquals(8, plugin.posId)
val tokinzer = dict.tokenizer()
Expand Down
4 changes: 2 additions & 2 deletions src/test/java/com/worksap/nlp/sudachi/PosMatcherTest.kt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2022 Works Applications Co., Ltd.
* Copyright (c) 2022-2024 Works Applications Co., Ltd.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -21,7 +21,7 @@ import kotlin.test.*

class PosMatcherTest {

private val dic = DictionaryFactory().create(TestDictionary.user2Cfg()) as JapaneseDictionary
private val dic = Dictionary.load(TestDictionary.user2Cfg()) as JapaneseDictionary
private val tok = dic.tokenizer()

@Test
Expand Down
4 changes: 2 additions & 2 deletions src/test/java/com/worksap/nlp/sudachi/RegexOovProviderTest.kt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2022 Works Applications Co., Ltd.
* Copyright (c) 2022-2024 Works Applications Co., Ltd.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -34,7 +34,7 @@ class RegexOovProviderTest {
.addList("pos", "名詞", "普通名詞", "一般", "*", "*", "*")
@Suppress("UNCHECKED_CAST") block(cfg, pluginCfg as Config.PluginConf<RegexOovProvider>)
// prepend our OOV configuration to the main configuration
return DictionaryFactory().create(cfg.withFallback(TestDictionary.user0Cfg())).tokenizer()
return Dictionary.load(cfg.withFallback(TestDictionary.user0Cfg())).tokenizer()
}

@Test
Expand Down
6 changes: 3 additions & 3 deletions src/test/java/com/worksap/nlp/sudachi/TestDictionary.kt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2017-2022 Works Applications Co., Ltd.
* Copyright (c) 2017-2024 Works Applications Co., Ltd.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -67,11 +67,11 @@ object TestDictionary {

/** System only */
fun user0(): JapaneseDictionary {
return DictionaryFactory().create(user0Cfg()) as JapaneseDictionary
return Dictionary.load(user0Cfg()) as JapaneseDictionary
}

/** System + One User dictionary */
fun user1(): JapaneseDictionary {
return DictionaryFactory().create(user1Cfg()) as JapaneseDictionary
return Dictionary.load(user1Cfg()) as JapaneseDictionary
}
}
4 changes: 2 additions & 2 deletions src/test/java/com/worksap/nlp/sudachi/TextNormalizerTest.kt
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ import kotlin.test.*
class TextNormalizerTest {

private val dic =
DictionaryFactory()
.create(TestDictionary.user2Cfg().characterDefinition(CharacterCategory.loadDefault()))
Dictionary.load(
TestDictionary.user2Cfg().characterDefinition(CharacterCategory.loadDefault()))
as JapaneseDictionary

@Test
Expand Down
12 changes: 6 additions & 6 deletions src/test/java/com/worksap/nlp/sudachi/UserDictionaryTest.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2017-2022 Works Applications Co., Ltd.
* Copyright (c) 2017-2024 Works Applications Co., Ltd.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -37,7 +37,7 @@ public void fullUserDict() throws IOException {
}
config.addUserDictionary(instance.getUserDict2());

try (Dictionary dict = new DictionaryFactory().create(config)) {
try (Dictionary dict = Dictionary.load(config)) {
Tokenizer tokenizer = dict.tokenizer();
List<Morpheme> morphs = tokenizer.tokenize("ぴさる");
assertThat(morphs.size(), is(1));
Expand All @@ -54,14 +54,14 @@ public void openTooManyUserDict() throws IOException {
for (int i = 0; i < 15; i++) {
config.addUserDictionary(instance.getUserDict1());
}
new DictionaryFactory().create(config);
Dictionary.load(config);
}

@Test
public void splitForUserDict() throws IOException {
TestDictionary td = TestDictionary.INSTANCE;
Config config = td.user0Cfg().addUserDictionary(td.getUserDict2()).addUserDictionary(td.getUserDict1());
try (Dictionary dict = new DictionaryFactory().create(config)) {
try (Dictionary dict = Dictionary.load(config)) {
Tokenizer tokenizer = dict.tokenizer();
List<Morpheme> morphs = tokenizer.tokenize("東京府");
assertThat(morphs.size(), is(1));
Expand All @@ -76,7 +76,7 @@ public void splitForUserDict() throws IOException {
@Test
public void userDefinedPos() throws IOException {
Config config = TestDictionary.INSTANCE.user2Cfg();
try (Dictionary dict = new DictionaryFactory().create(config)) {
try (Dictionary dict = Dictionary.load(config)) {
Tokenizer tokenizer = dict.tokenizer();
List<Morpheme> morphs = tokenizer.tokenize("すだちかぼす");
assertThat(morphs.size(), is(2));
Expand All @@ -88,7 +88,7 @@ public void userDefinedPos() throws IOException {

TestDictionary td = TestDictionary.INSTANCE;
config = td.user0Cfg().addUserDictionary(td.getUserDict2()).addUserDictionary(td.getUserDict1());
try (Dictionary dict = new DictionaryFactory().create(config)) {
try (Dictionary dict = Dictionary.load(config)) {
Tokenizer tokenizer = dict.tokenizer();
List<Morpheme> morphs = tokenizer.tokenize("すだちかぼす");
assertThat(morphs.size(), is(2));
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2017-2022 Works Applications Co., Ltd.
* Copyright (c) 2017-2024 Works Applications Co., Ltd.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -71,7 +71,7 @@ class TestDic {
fun load(): Dictionary {
val config = Config.fromClasspath(config).systemDictionary(systemDic)
userDics.forEach { config.addUserDictionary(it) }
return DictionaryFactory().create(config)
return Dictionary.load(config)
}
}

Expand Down
Loading