Skip to content

Commit

Permalink
Merge pull request #691 from openzim/diacritics_insensitive_suggestions
Browse files Browse the repository at this point in the history
  • Loading branch information
mgautierfr authored Apr 27, 2022
2 parents 82b4df1 + c35c75b commit 8e05f1f
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 2 deletions.
5 changes: 3 additions & 2 deletions src/suggestion.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -255,9 +255,10 @@ Xapian::Enquire& SuggestionSearch::getEnquire() const

auto enquire = std::unique_ptr<Xapian::Enquire>(new Xapian::Enquire(mp_internalDb->m_database));

auto query = mp_internalDb->parseQuery(m_query);
const auto unaccentedQuery = removeAccents(m_query);
auto query = mp_internalDb->parseQuery(unaccentedQuery);
if (mp_internalDb->m_verbose) {
std::cout << "Parsed query '" << m_query << "' to " << query.get_description() << std::endl;
std::cout << "Parsed query '" << unaccentedQuery << "' to " << query.get_description() << std::endl;
}
enquire->set_query(query);

Expand Down
36 changes: 36 additions & 0 deletions test/suggestion.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,42 @@ TEST(Suggestion, searchByTitle)
ASSERT_EQ(expectedResult , resultSet);
}

TEST(Suggestion, caseDiacriticsAndHomogrpaphsHandling) {
std::vector<std::string> titles = {
"nonberlin",
"simply berlin",
"accented bérlin",
"uppercase BERLIN",
"homograph bеrlin", // е is cyrillic
};

TempZimArchive tza("testZim");
const zim::Archive archive = tza.createZimFromTitles(titles);

const std::vector<std::string> expectedResult{
"accented bérlin",
"simply berlin",
"uppercase BERLIN",
};

ASSERT_EQ(getSuggestions(archive, "berlin", archive.getEntryCount()),
expectedResult
);

ASSERT_EQ(getSuggestions(archive, "BERLIN", archive.getEntryCount()),
expectedResult
);

ASSERT_EQ(getSuggestions(archive, "bêřlïñ", archive.getEntryCount()),
expectedResult
);

// е in the query string "bеrlin" below is a cyrillic character
ASSERT_EQ(getSuggestions(archive, "bеrlin", archive.getEntryCount()),
std::vector<std::string>{"homograph bеrlin"}
);
}

TEST(Suggestion, resultsGreaterThanLimit) {
std::vector<std::string> titles = {
"foobar b",
Expand Down

0 comments on commit 8e05f1f

Please sign in to comment.