Skip to content

Commit

Permalink
apply abbriviations
Browse files Browse the repository at this point in the history
  • Loading branch information
danemadsen committed Aug 13, 2024
1 parent 57f460b commit e2774c8
Showing 1 changed file with 26 additions and 0 deletions.
26 changes: 26 additions & 0 deletions src/cleaners.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,29 @@
#include <algorithm>
#include <iostream>
#include <sstream>
#include <unordered_map>

std::unordered_map<std::string, std::string> abbreviations = {
{"mrs", "misess"},
{"mr", "mister"},
{"dr", "doctor"},
{"st", "saint"},
{"co", "company"},
{"jr", "junior"},
{"maj", "major"},
{"gen", "general"},
{"drs", "doctors"},
{"rev", "reverend"},
{"lt", "lieutenant"},
{"hon", "honorable"},
{"sgt", "sergeant"},
{"capt", "captain"},
{"esq", "esquire"},
{"ltd", "limited"},
{"col", "colonel"},
{"ft", "foot"},
{"pty", "proprietary"}
};

std::vector<std::string> split_into_threes(const std::string& str) {
std::vector<std::string> parts;
Expand Down Expand Up @@ -177,6 +200,9 @@ namespace DeepPhonemizer {
std::vector<std::string> number_words = numbers_to_words(cleaned_word);
words.insert(words.end(), number_words.begin(), number_words.end());
}
else if (abbreviations.find(word) != abbreviations.end()) {
words.push_back(abbreviations[word]);
}
else {
words.push_back(word);
}
Expand Down

0 comments on commit e2774c8

Please sign in to comment.