Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add NeedlemanWunschGotoh Algorithm #34

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ target
*.iws
*.ipynb
*.egg-info
*~
java.hprof.txt

.classpath
.project
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,33 +21,107 @@

import eu.interedition.collatex.dekker.DekkerAlgorithm;
import eu.interedition.collatex.medite.MediteAlgorithm;
import eu.interedition.collatex.needlemanwunsch.NeedlemanWunschAlgorithm;
import eu.interedition.collatex.needlemanwunsch.*;
import eu.interedition.collatex.needlemanwunschgotoh.*;
import eu.interedition.collatex.matching.*;
import eu.interedition.collatex.simple.SimpleToken;
import eu.interedition.collatex.util.GreedyStringTilingAlgorithm;
import eu.interedition.collatex.util.VertexMatch;

import java.util.Comparator;
import java.util.SortedSet;
import java.util.function.Function;
import java.util.logging.Level;
import java.util.logging.Logger;

/**
* @author <a href="http://gregor.middell.net/">Gregor Middell</a>
* @author Ronald Haentjens Dekker
*/
public class CollationAlgorithmFactory {
protected final static Logger LOG = Logger.getLogger("CollationAlgorithmFactory");

public static CollationAlgorithm dekker(Comparator<Token> comparator) {
return new DekkerAlgorithm(comparator);
}


public static CollationAlgorithm needlemanWunsch(Comparator<Token> comparator) {
return new NeedlemanWunschAlgorithm(comparator);
return new eu.interedition.collatex.needlemanwunsch.NeedlemanWunschAlgorithm(comparator);
}


public static CollationAlgorithm needlemanWunschGotoh(StringMetricScorer scorer) {
return new eu.interedition.collatex.needlemanwunschgotoh.NeedlemanWunschGotohAlgorithm(scorer);
}

public static CollationAlgorithm needlemanWunschGotoh() {
return needlemanWunschGotoh(new TrigramRatioScorer());
}


public static CollationAlgorithm greedyStringTiling(Comparator<Token> comparator) {
return greedyStringTiling(comparator, 2);
}

public static CollationAlgorithm greedyStringTiling(Comparator<Token> comparator, int minimumTileLength) {
public static CollationAlgorithm greedyStringTiling(Comparator<Token> comparator,
Integer minimumTileLength) {
return new GreedyStringTilingAlgorithm(comparator, minimumTileLength);
}

public static CollationAlgorithm medite(Comparator<Token> comparator, Function<SortedSet<VertexMatch.WithToken>, Integer> matchEvaluator) {

public static CollationAlgorithm medite(Comparator<Token> comparator) {
return medite(comparator, SimpleToken.TOKEN_MATCH_EVALUATOR);
}

public static CollationAlgorithm medite(Comparator<Token> comparator,
Function<SortedSet<VertexMatch.WithToken>, Integer> matchEvaluator) {
return new MediteAlgorithm(comparator, matchEvaluator);
}


public static Comparator<Token> createComparator(String name, Object... args) {
if (LOG.isLoggable(Level.CONFIG)) {
LOG.log(Level.CONFIG, "Comparator: {0}", name);
}
switch (name) {
case "equality":
new EqualityTokenComparator();
case "levenshtein.distance":
return args.length >= 1 ?
new EditDistanceTokenComparator((Integer) args[0]) :
new EditDistanceTokenComparator();
case "levenshtein.ratio":
return args.length >= 1 ?
new EditDistanceRatioTokenComparator((Double) args[0]) :
new EditDistanceRatioTokenComparator();
}
return new EqualityTokenComparator(); // default
}

public static CollationAlgorithm createAlgorithm(String name, Comparator<Token> comparator,
Object... args) {
if (LOG.isLoggable(Level.CONFIG)) {
LOG.log(Level.CONFIG, "Algorithm: {0}", name);
}
switch (name) {
case "dekker":
return dekker(comparator);
case "gst":
return args.length >= 1 ?
greedyStringTiling(comparator, (Integer) args[0]) :
greedyStringTiling(comparator);
case "medite":
return args.length >= 1 ?
medite(comparator, (Function<SortedSet<VertexMatch.WithToken>, Integer>) args[0]) :
medite(comparator);
case "needleman-wunsch":
return needlemanWunsch(comparator);
case "needleman-wunsch-gotoh":
return args.length >= 1 ?
needlemanWunschGotoh((eu.interedition.collatex.matching.StringMetricScorer) args[0]) :
needlemanWunschGotoh();
}
return dekker(comparator); // default
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -37,12 +37,16 @@
* @author <a href="http://gregor.middell.net/">Gregor Middell</a>
*/
public class VariantGraph {
final VariantGraph.Vertex start;
final VariantGraph.Vertex end;
VariantGraph.Vertex start;
VariantGraph.Vertex end;
final Map<Vertex, Set<Set<Vertex>>> transpositionIndex = new HashMap<>();

public VariantGraph() {
super();
init();
}

public void init() {
this.start = new VariantGraph.Vertex(this);
this.end = new VariantGraph.Vertex(this);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,10 @@ public final class EditDistance {
private static final int MAX_DISTANCE_COMPARISON = 2500;

public static int compute(String str1, String str2) {
return compute(str1, str2, 1);
}

public static int compute(String str1, String str2, int subst_cost) {
if ((str1.length() * str2.length() > MAX_DISTANCE_COMPARISON)) {
return MAX_DISTANCE_COMPARISON;
}
Expand Down Expand Up @@ -53,7 +57,7 @@ public static int compute(String str1, String str2) {
final char str1Char = str1Chars[i - 1];
for (int j = 1; j <= str2Length; j++) {
final char str2Char = str2Chars[j - 1];
final int cost = (str1Char == str2Char ? 0 : 1);
final int cost = (str1Char == str2Char ? 0 : subst_cost);
matrix[i][j] = min3(matrix[i - 1][j] + 1, matrix[i][j - 1] + 1, matrix[i - 1][j - 1] + cost);
}
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
/*
* Copyright (c) 2015 The Interedition Development Group.
*
* This file is part of CollateX.
*
* CollateX is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* CollateX is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with CollateX. If not, see <http://www.gnu.org/licenses/>.
*/

package eu.interedition.collatex.matching;

import eu.interedition.collatex.Token;
import eu.interedition.collatex.simple.SimpleToken;

import java.util.Comparator;

public class EditDistanceRatioTokenComparator implements Comparator<Token> {

private final double threshold;
private final LevenshteinRatioScorer scorer;

public EditDistanceRatioTokenComparator() {
this(0.6);
}

public EditDistanceRatioTokenComparator(double threshold) {
this.threshold = threshold;
this.scorer = new LevenshteinRatioScorer();
}

@Override
public int compare(Token token_a, Token token_b) {
final String a = ((SimpleToken) token_a).getNormalized();
final String b = ((SimpleToken) token_b).getNormalized();
return (scorer.score(a, b) >= threshold) ? 0 : a.compareTo(b);
}
}
Empty file.
Loading