Skip to content

Commit

Permalink
Bump up Es version
Browse files Browse the repository at this point in the history
  • Loading branch information
kazuma-t committed Nov 11, 2024
1 parent 692f2d1 commit 6ab77a8
Show file tree
Hide file tree
Showing 7 changed files with 20 additions and 16 deletions.
2 changes: 1 addition & 1 deletion build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ repositories {
}

dependencies {
implementation 'com.worksap.nlp:sudachi:0.7.0', 'com.github.takawitter:trie4j:0.9.8', 'com.google.guava:guava:27.0.1-jre'
implementation 'com.worksap.nlp:sudachi:0.7.4', 'com.github.takawitter:trie4j:0.9.8', 'com.google.guava:guava:27.0.1-jre'
compileOnly "org.elasticsearch:elasticsearch:${elasticsearchVersion}"
testImplementation "org.elasticsearch:elasticsearch:${elasticsearchVersion}", "org.elasticsearch.test:framework:${elasticsearchVersion}", 'org.mockito:mockito-core:2.27.0', "org.apache.logging.log4j:log4j-core:2.17.1"
}
Expand Down
2 changes: 1 addition & 1 deletion gradle.properties
Original file line number Diff line number Diff line change
@@ -1 +1 @@
elasticsearchVersion=7.17.1
elasticsearchVersion=8.15.2
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import org.apache.lucene.analysis.TokenStream;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexService.IndexCreationContext;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
import org.elasticsearch.index.analysis.CharFilterFactory;
Expand Down Expand Up @@ -64,7 +65,7 @@ public class ChikkarSynonymGraphTokenFilterFactory extends AbstractTokenFilterFa
*/
public ChikkarSynonymGraphTokenFilterFactory(IndexSettings indexSettings, Environment env, String name,
Settings settings) {
super(indexSettings, name, settings);
super(name, settings);

// get the filter setting params
this.ignoreCase = settings.getAsBoolean("ignore_case", false);
Expand All @@ -84,7 +85,7 @@ public TokenStream create(TokenStream tokenStream) {
}

@Override
public TokenFilterFactory getChainAwareTokenFilterFactory(TokenizerFactory tokenizer,
public TokenFilterFactory getChainAwareTokenFilterFactory(IndexCreationContext context, TokenizerFactory tokenizer,
List<CharFilterFactory> charFilters, List<TokenFilterFactory> previousTokenFilters,
Function<String, TokenFilterFactory> allFilters) {
final Analyzer analyzer = buildSynonymAnalyzer(tokenizer, charFilters, previousTokenFilters);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -77,8 +77,8 @@ public Builder(boolean dedup) {
public ChikkarSynonymMap build(Chikkar chikkar) throws IOException {
ByteSequenceOutputs outputs = ByteSequenceOutputs.getSingleton();
// TODO: are we using the best sharing options?
org.apache.lucene.util.fst.Builder<BytesRef> builder = new org.apache.lucene.util.fst.Builder<>(
FST.INPUT_TYPE.BYTE4, outputs);
org.apache.lucene.util.fst.FSTCompiler<BytesRef> fstCompiler =
new org.apache.lucene.util.fst.FSTCompiler.Builder<>(FST.INPUT_TYPE.BYTE4, outputs).build();

BytesRefBuilder scratch = new BytesRefBuilder();
ByteArrayDataOutput scratchOutput = new ByteArrayDataOutput();
Expand Down Expand Up @@ -143,10 +143,10 @@ public ChikkarSynonymMap build(Chikkar chikkar) throws IOException {
}

scratch.setLength(scratchOutput.getPosition());
builder.add(Util.toUTF32(input, scratchIntsRef), scratch.toBytesRef());
fstCompiler.add(Util.toUTF32(input, scratchIntsRef), scratch.toBytesRef());
}

FST<BytesRef> fst = builder.finish();
FST<BytesRef> fst = FST.fromFSTReader(fstCompiler.compile(), fstCompiler.getFSTReader());
// remove unused relation manager
chikkar.clearRelation();
return new ChikkarSynonymMap(chikkar, fst, maxHorizontalContext);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import org.apache.lucene.analysis.TokenStream;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexService.IndexCreationContext;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.analysis.*;

Expand Down Expand Up @@ -60,7 +61,7 @@ public class ChikkarSynonymTokenFilterFactory extends AbstractTokenFilterFactory
*/
public ChikkarSynonymTokenFilterFactory(IndexSettings indexSettings, Environment env, String name,
Settings settings) {
super(indexSettings, name, settings);
super(name, settings);

// get the filter setting params
this.ignoreCase = settings.getAsBoolean("ignore_case", false);
Expand All @@ -81,7 +82,7 @@ public TokenStream create(TokenStream tokenStream) {
}

@Override
public TokenFilterFactory getChainAwareTokenFilterFactory(TokenizerFactory tokenizer,
public TokenFilterFactory getChainAwareTokenFilterFactory(IndexCreationContext context, TokenizerFactory tokenizer,
List<CharFilterFactory> charFilters, List<TokenFilterFactory> previousTokenFilters,
Function<String, TokenFilterFactory> allFilters) {
final Analyzer analyzer = buildSynonymAnalyzer(tokenizer, charFilters, previousTokenFilters);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.IndexService.IndexCreationContext;
import org.elasticsearch.index.analysis.CharFilterFactory;
import org.elasticsearch.index.analysis.CustomAnalyzer;
import org.elasticsearch.index.analysis.TokenFilterFactory;
Expand Down Expand Up @@ -1211,7 +1212,7 @@ TokenFilterFactory createChikkarSynonymGraphFactory(Path configPath, String dict

assertTrue(factory instanceof ChikkarSynonymGraphTokenFilterFactory);

return factory.getChainAwareTokenFilterFactory(tokenizer, charFilters, tokenFilters, null);
return factory.getChainAwareTokenFilterFactory(IndexCreationContext.CREATE_INDEX, tokenizer, charFilters, tokenFilters, null);
}

TokenFilterFactory createChikkarSynonymFactory(Path configPath, String dictPath, String dictId,
Expand All @@ -1237,7 +1238,7 @@ TokenFilterFactory createChikkarSynonymFactory(Path configPath, String dictPath,

assertTrue(factory instanceof ChikkarSynonymTokenFilterFactory);

return factory.getChainAwareTokenFilterFactory(tokenizer, charFilters, tokenFilters, null);
return factory.getChainAwareTokenFilterFactory(IndexCreationContext.CREATE_INDEX, tokenizer, charFilters, tokenFilters, null);
}

TokenFilterFactory createSynonymFactory(Path configPath, String dictPath, TokenizerFactory tokenizer,
Expand All @@ -1257,7 +1258,7 @@ TokenFilterFactory createSynonymFactory(Path configPath, String dictPath, Tokeni

TokenFilterFactory factory = new ModSynonymTokenFilterFactory(indexSettings, env, "mod_synonym", settings);

return factory.getChainAwareTokenFilterFactory(tokenizer, charFilters, tokenFilters, null);
return factory.getChainAwareTokenFilterFactory(IndexCreationContext.CREATE_INDEX, tokenizer, charFilters, tokenFilters, null);
}

void assertSynonymsEquals(List<TokenAttribute> expected, List<TokenAttribute> actual) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.IndexService.IndexCreationContext;
import org.elasticsearch.index.analysis.*;

import java.io.Reader;
Expand All @@ -45,7 +46,7 @@ public class ModSynonymTokenFilterFactory extends AbstractTokenFilterFactory {
protected final AnalysisMode analysisMode;

public ModSynonymTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
super(indexSettings, name, settings);
super(name, settings);
this.settings = settings;

if (settings.get("ignore_case") != null) {
Expand Down Expand Up @@ -76,7 +77,7 @@ public TokenStream create(TokenStream tokenStream) {
}

@Override
public TokenFilterFactory getChainAwareTokenFilterFactory(TokenizerFactory tokenizer,
public TokenFilterFactory getChainAwareTokenFilterFactory(IndexCreationContext context, TokenizerFactory tokenizer,
List<CharFilterFactory> charFilters, List<TokenFilterFactory> previousTokenFilters,
Function<String, TokenFilterFactory> allFilters) {
final Analyzer analyzer = buildSynonymAnalyzer(tokenizer, charFilters, previousTokenFilters, allFilters);
Expand Down Expand Up @@ -140,7 +141,7 @@ Reader getRulesFromSettings(Environment env) {
}
rulesReader = new StringReader(sb.toString());
} else if (settings.get("synonyms_path") != null) {
rulesReader = Analysis.getReaderFromFile(env, settings, "synonyms_path");
rulesReader = Analysis.getReaderFromFile(env, settings.get("synonyms_path"), "synonyms");
} else {
throw new IllegalArgumentException(
"synonym requires either `synonyms` or `synonyms_path` to be configured");
Expand Down

0 comments on commit 6ab77a8

Please sign in to comment.