Skip to content

Commit

Permalink
fix getSubwords for EOS
Browse files Browse the repository at this point in the history
Summary: Within initNgrams subwords aren't compute if the word is EOS, but not within getSubwords

Reviewed By: EdouardGrave

Differential Revision: D6550445

fbshipit-source-id: 918cc8e70e4578cbab6bab28e158901753a8876f
  • Loading branch information
cpuhrsch authored and facebook-github-bot committed Dec 13, 2017
1 parent b928c9f commit c9de932
Showing 1 changed file with 9 additions and 3 deletions.
12 changes: 9 additions & 3 deletions src/dictionary.cc
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,9 @@ const std::vector<int32_t> Dictionary::getSubwords(
return getSubwords(i);
}
std::vector<int32_t> ngrams;
computeSubwords(BOW + word + EOW, ngrams);
if (word != EOS) {
computeSubwords(BOW + word + EOW, ngrams);
}
return ngrams;
}

Expand All @@ -94,7 +96,9 @@ void Dictionary::getSubwords(const std::string& word,
ngrams.push_back(i);
substrings.push_back(words_[i].word);
}
computeSubwords(BOW + word + EOW, ngrams, substrings);
if (word != EOS) {
computeSubwords(BOW + word + EOW, ngrams, substrings);
}
}

bool Dictionary::discard(int32_t id, real rand) const {
Expand Down Expand Up @@ -296,7 +300,9 @@ void Dictionary::addSubwords(std::vector<int32_t>& line,
const std::string& token,
int32_t wid) const {
if (wid < 0) { // out of vocab
computeSubwords(BOW + token + EOW, line);
if (token != EOS) {
computeSubwords(BOW + token + EOW, line);
}
} else {
if (args_->maxn <= 0) { // in vocab w/o subwords
line.push_back(wid);
Expand Down

0 comments on commit c9de932

Please sign in to comment.