Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Mapped c to greek letter ψ in the greeklish generator #18

Open
wants to merge 3 commits into
base: 5.4.2
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ public class GreeklishConverter {
private final boolean generateGreekVariants;

// Constructor
public GreeklishConverter(int maxExpansions, boolean generateGreekVariants) {
public GreeklishConverter(int maxExpansions, boolean generateGreekVariants, boolean useSpecialMapping) {

// Initialize greekWords list
this.greekWords = new ArrayList<String>();
Expand All @@ -68,7 +68,7 @@ public GreeklishConverter(int maxExpansions, boolean generateGreekVariants) {
this.reverseStemmer = new GreekReverseStemmer();

// Initialize greeklish generator
this.greeklishGenerator = new GreeklishGenerator(maxExpansions);
this.greeklishGenerator = new GreeklishGenerator(maxExpansions, useSpecialMapping);

// Initialize setting for generating greek variants
this.generateGreekVariants = generateGreekVariants;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,13 @@ public class GreeklishGenerator {
{ "φ", "f", "ph" }, { "χ", "x", "h", "ch" }, { "ψ", "ps" },
{ "ω", "w", "o", "v" } };

/**
* The possible string conversions for special cases.
*/
private static final String[][] specialConvertStrings = new String[][] {
{ "ψ", "c", "ps"}
};

/**
* The maximum greeklish expansions per greek token.
*/
Expand Down Expand Up @@ -102,7 +109,7 @@ public class GreeklishGenerator {
private String initialToken;

// Constructor
public GreeklishGenerator(int maxExpansions) {
public GreeklishGenerator(int maxExpansions, boolean useSpecialMapping) {

this.maxExpansions = maxExpansions;

Expand All @@ -120,6 +127,13 @@ public GreeklishGenerator(int maxExpansions) {
conversions.put(convertString[0].charAt(0),
Arrays.copyOfRange(convertString, 1, convertString.length));
}

if(useSpecialMapping) {
for (String[] convertString : specialConvertStrings) {
conversions.put(convertString[0].charAt(0),
Arrays.copyOfRange(convertString, 1, convertString.length));
}
}
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,9 @@ public class GreeklishTokenFilter extends TokenFilter {
private GreeklishConverter greeklishConverter;

// Constructor
public GreeklishTokenFilter(TokenStream tokenStream, int maxExpansions, boolean generateGreekVariants) {
public GreeklishTokenFilter(TokenStream tokenStream, int maxExpansions, boolean generateGreekVariants, boolean useSpecialMapping) {
super(tokenStream);
this.greeklishConverter = new GreeklishConverter(maxExpansions, generateGreekVariants);
this.greeklishConverter = new GreeklishConverter(maxExpansions, generateGreekVariants, useSpecialMapping);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
public class GreeklishTokenFilterFactory extends AbstractTokenFilterFactory {
private final int maxExpansions;
private final boolean generateGreekVariants;
private final boolean useSpecialMapping;

@Inject
public GreeklishTokenFilterFactory(IndexSettings indexSettings,
Expand All @@ -21,11 +22,12 @@ public GreeklishTokenFilterFactory(IndexSettings indexSettings,
super(indexSettings, name, settings);
this.maxExpansions = settings.getAsInt("max_expansions", 20);
this.generateGreekVariants = settings.getAsBoolean("greek_variants", true);
this.useSpecialMapping = settings.getAsBoolean("use_special_mapping", false);
}

@Override
public TokenStream create(TokenStream tokenStream) {
return new GreeklishTokenFilter(tokenStream, maxExpansions, generateGreekVariants);
return new GreeklishTokenFilter(tokenStream, maxExpansions, generateGreekVariants, useSpecialMapping);
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@ public class GreeklishConverterTest {

private static final boolean GENERATE_GREEK_VARIANTS = true;

private static final boolean USE_SPECIAL_MAPPING_ON = true;
private static final boolean USE_SPECIAL_MAPPING_OFF = false;

private GreeklishConverter converter;

/**
Expand All @@ -23,6 +26,12 @@ public class GreeklishConverterTest {
private static final String[] greekWords = { "αυτοκινητο", "ομπρελα",
"ξεσκεπαστοσ"};

/**
* a sample of greek words to generate their greeklish
* counterparts.
*/
private static final String[] greekWordsSpecial = { "ωιψηυ"};

/**
* the greeklish counterparts that should be generated from the
* greek words.
Expand All @@ -35,6 +44,19 @@ public class GreeklishConverterTest {
{"kseskepastos", "xeskepastos", "kseskepastou", "xeskepastwn", "kseskepastoi"}
};

/**
* the greeklish counterparts that should be generated from the
* greek words.
*/
private static final String[][] generatedGreeklishWordsSpecial = {
{
"oichu", "wichi", "wichu", "vipsiy",
"oipsiy", "wipsiy", "viciy", "oiciy",
"wiciy", "vipshy", "oipshy", "wipshy",
"vichy", "oichy", "wichy"
}
};

/**
* these words should not be processed by the converter.
*/
Expand All @@ -47,7 +69,7 @@ public class GreeklishConverterTest {

@BeforeClass
public void setUp() {
this.converter = new GreeklishConverter(MAX_EXPANSIONS, GENERATE_GREEK_VARIANTS);
this.converter = new GreeklishConverter(MAX_EXPANSIONS, GENERATE_GREEK_VARIANTS, USE_SPECIAL_MAPPING_OFF);
}

@BeforeMethod
Expand Down Expand Up @@ -85,11 +107,32 @@ public void testGreekTokenConversionForValidWords() {
}
}

@Test
public void testGreekTokenConversionForValidWordsSpecial() {
int newMaxExpansions = 20;
converter = new GreeklishConverter(newMaxExpansions, GENERATE_GREEK_VARIANTS, USE_SPECIAL_MAPPING_ON);
for (int i = 0; i < greekWordsSpecial.length; i++) {
greeklishWords = converter.convert(greekWordsSpecial[i].toCharArray(),
greekWordsSpecial[i].length());

populateConvertedStringsList();

Assert.assertFalse(greeklishWords.isEmpty(),
"Greeklish words should be generated");

for (String greeklishWord : generatedGreeklishWordsSpecial[i]) {
Assert.assertTrue(convertedGreeklishStrings
.contains(greeklishWord),
"It should contain greeklish word: " + greeklishWord);
}
}
}

@Test
public void testMaxGreeklishExpansions() {
int newMaxExpansions = 2;
boolean generateGreekVariants = false;
converter = new GreeklishConverter(newMaxExpansions, generateGreekVariants);
converter = new GreeklishConverter(newMaxExpansions, generateGreekVariants, USE_SPECIAL_MAPPING_OFF);

greeklishWords = converter.convert(greekWords[0].toCharArray(),
greekWords[0].length());
Expand All @@ -114,7 +157,7 @@ public void testMaxGreeklishExpansions() {
public void testGreekVariantsGeneration() {
int newMaxExpansions = 1;
boolean generateGreekVariants = false;
converter = new GreeklishConverter(newMaxExpansions, generateGreekVariants);
converter = new GreeklishConverter(newMaxExpansions, generateGreekVariants, USE_SPECIAL_MAPPING_OFF);

greeklishWords = converter.convert(greekWords[0].toCharArray(),
greekWords[0].length());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
public class GreeklishGeneratorTest {

private static final int MAX_EXPANSIONS = 10;
private static final boolean USE_SPECIAL_MAPPING_ON = true;
private static final boolean USE_SPECIAL_MAPPING_OFF = false;

/**
* a sample of greek words to generate their greeklish
Expand All @@ -19,6 +21,12 @@ public class GreeklishGeneratorTest {
private static final String[] greekWords = { "αυτοκινητο", "ομπρελα",
"ξεσκεπαστοσ", };

/**
* a special sample of greek words to generate their greeklish
* counterparts.
*/
private static final String[] greekWordsSpecial = { "ωιψηυ" };

/**
* the greeklish counterparts that should be generated from the
* greek words.
Expand All @@ -29,6 +37,17 @@ public class GreeklishGeneratorTest {
"omprela", "obrela", "kseskepastos", "xeskepastos"
};

/**
* the special greeklish counterparts that should be generated from the
* greek words.
*/
private static final String[] generatedGreeklishWordsSpecial = {
"oichu", "wichi", "wichu", "vipsiy",
"oipsiy", "wipsiy", "viciy", "oiciy",
"wiciy", "vipshy", "oipshy", "wipshy",
"vichy", "oichy", "wichy"
};

private GreeklishGenerator generator;

private List<String> inputGreekList = new ArrayList<String>();
Expand All @@ -39,7 +58,7 @@ public class GreeklishGeneratorTest {

@BeforeClass
public void populateInputGreekList() {
this.generator = new GreeklishGenerator(MAX_EXPANSIONS);
this.generator = new GreeklishGenerator(MAX_EXPANSIONS, USE_SPECIAL_MAPPING_OFF);

for (String word : greekWords) {
inputGreekList.add(word);
Expand Down Expand Up @@ -72,7 +91,7 @@ public void testGreekTokenConversionForValidWords() {
@Test
public void testMaxGreeklishWordGenerations() {
int newMaxExpansions = 2;
generator = new GreeklishGenerator(newMaxExpansions);
generator = new GreeklishGenerator(newMaxExpansions, USE_SPECIAL_MAPPING_OFF);

greeklishWords = generator.generateGreeklishWords(inputGreekList);

Expand All @@ -82,6 +101,31 @@ public void testMaxGreeklishWordGenerations() {

}

@Test
public void testGreekTokenConversionForValidWordsSpecial() {
inputGreekList.clear();
for (String word : greekWordsSpecial) {
inputGreekList.add(word);
}

for (int i = 0; i < greekWordsSpecial.length; i++) {
int newMaxExpansions = 20;
generator = new GreeklishGenerator(newMaxExpansions, USE_SPECIAL_MAPPING_ON);
greeklishWords = generator.generateGreeklishWords(inputGreekList);

populateConvertedStringsList();

Assert.assertFalse(greeklishWords.isEmpty(),
"Greeklish words should be generated");
for (String greeklishWord : generatedGreeklishWordsSpecial) {
Assert.assertTrue(
convertedGreeklishStrings.contains(greeklishWord),
"It should contain the greeklish word: "
+ greeklishWord);
}
}
}

private final void populateConvertedStringsList() {
for (StringBuilder word : greeklishWords) {
convertedGreeklishStrings.add(word.toString());
Expand Down