Skip to content

Commit

Permalink
introduce MorphemeImplBase to satisfy sonar
Browse files Browse the repository at this point in the history
  • Loading branch information
mh-northlander committed Nov 25, 2024
1 parent 1aa19ec commit 81f3e62
Show file tree
Hide file tree
Showing 12 changed files with 196 additions and 198 deletions.
4 changes: 2 additions & 2 deletions src/main/java/com/worksap/nlp/sudachi/Dictionary.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021 Works Applications Co., Ltd.
* Copyright (c) 2021-2024 Works Applications Co., Ltd.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -92,7 +92,7 @@ public Morpheme oovMorpheme(short posId, String surface, String reading, String
* surface of the morpheme
* @return an oov morpheme with given information
*/
default public Morpheme oovMorpheme(short posId, String surface) {
public default Morpheme oovMorpheme(short posId, String surface) {
return oovMorpheme(posId, surface, surface, surface, surface);
}

Expand Down
4 changes: 2 additions & 2 deletions src/main/java/com/worksap/nlp/sudachi/JapaneseDictionary.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2017-2022 Works Applications Co., Ltd.
* Copyright (c) 2017-2024 Works Applications Co., Ltd.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -148,7 +148,7 @@ public List<Morpheme> lookup(CharSequence surface) {
int[] wordIds = wordLookup.getWordsIds();
for (int word = 0; word < numWords; ++word) {
int wordId = wordIds[word];
Morpheme morpheme = new SingleMorphemeImpl(this, wordId);
Morpheme morpheme = new SingleMorphemeImpl(getGrammar(), getLexicon(), wordId);
morphemes.add(morpheme);
}
}
Expand Down
10 changes: 5 additions & 5 deletions src/main/java/com/worksap/nlp/sudachi/LatticeNode.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021 Works Applications Co., Ltd.
* Copyright (c) 2021-2024 Works Applications Co., Ltd.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -109,28 +109,28 @@ public interface LatticeNode {
/**
* @return the text of node.
*/
default public String getSurface() {
public default String getSurface() {
return getStrings().getSurface();
}

/**
* @return the reading form of node.
*/
default public String getReading() {
public default String getReading() {
return getStrings().getReading();
}

/**
* @return the normalized form of node.
*/
default public String getNormalizedForm() {
public default String getNormalizedForm() {
return getStrings().getNormalizedForm();
}

/**
* @return the dictionary form of node.
*/
default public String getDictionaryForm() {
public default String getDictionaryForm() {
return getStrings().getDictionaryForm();
}

Expand Down
2 changes: 1 addition & 1 deletion src/main/java/com/worksap/nlp/sudachi/LatticeNodeImpl.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021 Works Applications Co., Ltd.
* Copyright (c) 2021-2024 Works Applications Co., Ltd.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down
106 changes: 22 additions & 84 deletions src/main/java/com/worksap/nlp/sudachi/MorphemeImpl.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2017-2022 Works Applications Co., Ltd.
* Copyright (c) 2017-2024 Works Applications Co., Ltd.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -19,6 +19,7 @@
import java.util.List;

import com.worksap.nlp.sudachi.dictionary.POS;
import com.worksap.nlp.sudachi.dictionary.Grammar;
import com.worksap.nlp.sudachi.dictionary.WordInfo;

/**
Expand All @@ -28,7 +29,7 @@
* @see MorphemeList
* @see SingleMorphemeImpl
*/
class MorphemeImpl implements Morpheme {
class MorphemeImpl extends MorphemeImplBase {
private final MorphemeList list;
private final int index;

Expand All @@ -40,112 +41,49 @@ class MorphemeImpl implements Morpheme {
this.index = index;
}

@Override
public int begin() {
return list.getBegin(index);
}

@Override
public int end() {
return list.getEnd(index);
protected Grammar getGrammar() {
return list.grammar;
}

@Override
public String surface() {
return list.getSurface(index);
protected WordInfo getWordInfo() {
return node().getWordInfo();
}

@Override
public POS partOfSpeech() {
WordInfo wi = getWordInfo();
return list.grammar.getPartOfSpeechString(wi.getPOSId());
protected StringsCache strings() {
return node().getStrings();
}

@Override
public short partOfSpeechId() {
WordInfo wi = getWordInfo();
return wi.getPOSId();
private LatticeNodeImpl node() {
LatticeNodeImpl n = node;
if (n == null) {
n = list.node(index);
node = n;
}
return n;
}

@Override
public String dictionaryForm() {
return strings().getDictionaryForm();
public int begin() {
return list.getBegin(index);
}

@Override
public String normalizedForm() {
return strings().getNormalizedForm();
public int end() {
return list.getEnd(index);
}

@Override
public String readingForm() {
return strings().getReading();
public String surface() {
return list.getSurface(index);
}

@Override
public List<Morpheme> split(Tokenizer.SplitMode mode) {
return list.split(mode, index);
}

@Override
public boolean isOOV() {
return node().isOOV();
}

@Override
public int getWordId() {
return node().getWordId();
}

@Override
public int getDictionaryId() {
return node().getDictionaryId();
}

@Override
public int[] getSynonymGroupIds() {
WordInfo wi = getWordInfo();
return wi.getSynonymGroupIds();
}

@Override
public String getUserData() {
WordInfo wi = getWordInfo();
return wi.getUserData();
}

private LatticeNodeImpl node() {
LatticeNodeImpl n = node;
if (n == null) {
n = list.node(index);
node = n;
}
return n;
}

/* internal for test */ WordInfo getWordInfo() {
return node().getWordInfo();
}

private StringsCache strings() {
return node().getStrings();
}

@Override
public String toString() {
final StringBuilder sb = new StringBuilder(getClass().getSimpleName());
sb.append("{");
sb.append("begin=").append(begin());
sb.append(", end=").append(end());
sb.append(", surface=").append(surface());
sb.append(", pos=").append(partOfSpeechId()).append('/').append(partOfSpeech());
int wordId = getWordId();
sb.append(", wid=(").append(WordId.dic(wordId)).append(',').append(WordId.word(wordId));
sb.append(")}");
return sb.toString();
}

/* internal */ boolean isCompatible(JapaneseDictionary dictionary) {
return dictionary.grammar == this.list.grammar;
}
}
123 changes: 123 additions & 0 deletions src/main/java/com/worksap/nlp/sudachi/MorphemeImplBase.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
/*
* Copyright (c) 2024 Works Applications Co., Ltd.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.worksap.nlp.sudachi;

import java.util.List;

import com.worksap.nlp.sudachi.dictionary.Grammar;
import com.worksap.nlp.sudachi.dictionary.POS;
import com.worksap.nlp.sudachi.dictionary.WordInfo;

/**
* base class for the morpheme classes.
*
* @see MorphemeImpl
* @see SingleMorphemeImpl
*/
abstract class MorphemeImplBase implements Morpheme {

protected abstract Grammar getGrammar();

protected abstract WordInfo getWordInfo();

protected abstract StringsCache strings();

@Override
public abstract int begin();

@Override
public abstract int end();

@Override
public abstract String surface();

@Override
public abstract List<Morpheme> split(Tokenizer.SplitMode mode);

@Override
public abstract int getWordId();

@Override
public POS partOfSpeech() {
WordInfo wi = getWordInfo();
return getGrammar().getPartOfSpeechString(wi.getPOSId());
}

@Override
public short partOfSpeechId() {
WordInfo wi = getWordInfo();
return wi.getPOSId();
}

@Override
public String dictionaryForm() {
return strings().getDictionaryForm();
}

@Override
public String normalizedForm() {
return strings().getNormalizedForm();
}

@Override
public String readingForm() {
return strings().getReading();
}

@Override
public boolean isOOV() {
return WordId.isOov(getWordId());
}

@Override
public int getDictionaryId() {
if (isOOV()) {
return -1;
}
return WordId.dic(getWordId());
}

@Override
public int[] getSynonymGroupIds() {
WordInfo wi = getWordInfo();
return wi.getSynonymGroupIds();
}

@Override
public String getUserData() {
WordInfo wi = getWordInfo();
return wi.getUserData();
}

@Override
public String toString() {
final StringBuilder sb = new StringBuilder(getClass().getSimpleName());
sb.append("{");
sb.append("begin=").append(begin());
sb.append(", end=").append(end());
sb.append(", surface=").append(surface());
sb.append(", pos=").append(partOfSpeechId()).append('/').append(partOfSpeech());
int wid = getWordId();
sb.append(", wid=(").append(WordId.dic(wid)).append(',').append(WordId.word(wid));
sb.append(")}");
return sb.toString();
}

/* internal */ boolean isCompatible(JapaneseDictionary dictionary) {
return dictionary.grammar == getGrammar();
}
}
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021 Works Applications Co., Ltd.
* Copyright (c) 2021-2024 Works Applications Co., Ltd.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down
13 changes: 9 additions & 4 deletions src/main/java/com/worksap/nlp/sudachi/PosMatcher.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2022 Works Applications Co., Ltd.
* Copyright (c) 2022-2024 Works Applications Co., Ltd.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -121,12 +121,17 @@ private void checkCompatibility(PosMatcher other) {
*/
@Override
public boolean test(Morpheme morpheme) {
assert (morpheme instanceof MorphemeImpl ? ((MorphemeImpl) morpheme).isCompatible(dictionary)
: morpheme instanceof SingleMorphemeImpl ? ((SingleMorphemeImpl) morpheme).isCompatible(dictionary)
: false);
assert isCompatible(morpheme);
return matching.get(morpheme.partOfSpeechId());
}

private boolean isCompatible(Morpheme morpheme) {
if (morpheme instanceof MorphemeImplBase) {
return ((MorphemeImplBase) morpheme).isCompatible(dictionary);
}
return false;
}

/**
* Iterates POS tags which are matched by this matcher
*
Expand Down
Loading

0 comments on commit 81f3e62

Please sign in to comment.