Skip to content

Commit

Permalink
Merge pull request #1016 from kiwix/fix_query_with_dot
Browse files Browse the repository at this point in the history
Do not index book's name as a phrase.
  • Loading branch information
kelson42 authored Nov 8, 2023
2 parents e89f4e2 + 07ff4ea commit 37274f7
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 5 deletions.
2 changes: 1 addition & 1 deletion src/library.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -436,7 +436,7 @@ void Library::updateBookDB(const Book& book)
}
indexer.index_text(normalizeText(book.getCreator()), 1, "A");
indexer.index_text(normalizeText(book.getPublisher()), 1, "XP");
indexer.index_text(normalizeText(book.getName()), 1, "XN");
doc.add_term("XN"+normalizeText(book.getName()));
indexer.index_text(normalizeText(book.getCategory()), 1, "XC");

for ( const auto& tag : split(normalizeText(book.getTags()), ";") ) {
Expand Down
29 changes: 25 additions & 4 deletions test/library.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,7 @@ const char sampleLibraryXML[] = R"(
creator="Wikibooks"
publisher="Kiwix & Some Enthusiasts"
date="2021-04-11"
name="wikibooks_de"
name="wikibooks.de"
tags="unittest;wikibooks;_category:wikibooks"
articleCount="12"
mediaCount="0"
Expand Down Expand Up @@ -680,17 +680,38 @@ TEST_F(LibraryTest, filterByPublisher)

TEST_F(LibraryTest, filterByName)
{
EXPECT_FILTER_RESULTS(kiwix::Filter().name("wikibooks_de"),
EXPECT_FILTER_RESULTS(kiwix::Filter().name("wikibooks.de"),
"An example ZIM archive"
);

EXPECT_FILTER_RESULTS(kiwix::Filter().query("name:wikibooks_de"),
// Parsing the query with `name:` prefix splits the token on the dot, as if it was 2 sentences.
// It creates a query "XNwikibook@1 PHRASE 2 XNde@2".
// I haven't found the syntax to not split on dot.
EXPECT_FILTER_RESULTS(kiwix::Filter().query("name:wikibooks.de"),
/* no results */
);

EXPECT_FILTER_RESULTS(kiwix::Filter().name("wikibooks"),
/* no results */
);

// Wikibooks is in `tags` so it matches.
EXPECT_FILTER_RESULTS(kiwix::Filter().query("wikibooks"),
"An example ZIM archive"
);

EXPECT_FILTER_RESULTS(kiwix::Filter().query("wikibooks_de"),
// But "wikibooks.de" is only in name and `query` doesn't looks in name.
EXPECT_FILTER_RESULTS(kiwix::Filter().query("wikibooks.de"),
/* no results */
);

EXPECT_FILTER_RESULTS(kiwix::Filter().name("wikipedia_en_ray_charles"),
"Ray Charles"
);

EXPECT_FILTER_RESULTS(kiwix::Filter().query("name:wikipedia_en_ray_charles"),
"Ray Charles"
);
}

TEST_F(LibraryTest, filterByCategory)
Expand Down

0 comments on commit 37274f7

Please sign in to comment.