From 81c57ad4a2758cc9e38864bee48d39313f1abdab Mon Sep 17 00:00:00 2001 From: Tor Egge Date: Tue, 5 Mar 2024 16:29:58 +0100 Subject: [PATCH] Adjust rewriting of number terms for string fields in streaming search. --- .../src/tests/query/streaming_query_test.cpp | 25 ++++++++++++ .../searchlib/query/streaming/querynode.cpp | 39 ++++++++++++++----- 2 files changed, 55 insertions(+), 9 deletions(-) diff --git a/searchlib/src/tests/query/streaming_query_test.cpp b/searchlib/src/tests/query/streaming_query_test.cpp index 5559e194c5e0..2129cb6805a1 100644 --- a/searchlib/src/tests/query/streaming_query_test.cpp +++ b/searchlib/src/tests/query/streaming_query_test.cpp @@ -380,6 +380,31 @@ TEST(StreamingQueryTest, onedot0e_is_rewritten_if_allowed_too) } } +TEST(StreamingQueryTest, negative_integer_is_rewritten_if_allowed_for_string_field) +{ + const char term[7] = {TERM_UNIQ, 3, 1, 'c', 2, '-', '5'}; + vespalib::stringref stackDump(term, sizeof(term)); + EXPECT_EQ(7u, stackDump.size()); + AllowRewrite empty("c"); + const Query q(empty, stackDump); + EXPECT_TRUE(q.valid()); + auto& root = q.getRoot(); + auto& equiv = dynamic_cast(root); + EXPECT_EQ(2u, equiv.get_terms().size()); + { + auto& qt = *equiv.get_terms()[0]; + EXPECT_EQ("c", qt.index()); + EXPECT_EQ(vespalib::stringref("-5"), qt.getTerm()); + EXPECT_EQ(3u, qt.uniqueId()); + } + { + auto& qt = *equiv.get_terms()[1]; + EXPECT_EQ("c", qt.index()); + EXPECT_EQ(vespalib::stringref("5"), qt.getTerm()); + EXPECT_EQ(0u, qt.uniqueId()); + } +} + TEST(StreamingQueryTest, test_get_query_parts) { QueryBuilder builder; diff --git a/searchlib/src/vespa/searchlib/query/streaming/querynode.cpp b/searchlib/src/vespa/searchlib/query/streaming/querynode.cpp index 16406bffd3dd..55301132a184 100644 --- a/searchlib/src/vespa/searchlib/query/streaming/querynode.cpp +++ b/searchlib/src/vespa/searchlib/query/streaming/querynode.cpp @@ -14,10 +14,13 @@ #include #include #include +#include #include #include LOG_SETUP(".vsm.querynode"); +using search::queryeval::SplitFloat; + namespace search::streaming { namespace { @@ -29,7 +32,7 @@ bool disableRewrite(const QueryNode * qn) { } bool possibleFloat(const QueryTerm & qt, const QueryTerm::string & term) { - return !qt.encoding().isBase10Integer() && qt.encoding().isFloat() && (term.find('.') != QueryTerm::string::npos); + return qt.encoding().isFloat() && ((term.find('.') != QueryTerm::string::npos) || (term.find('-') != QueryTerm::string::npos)); } } @@ -139,14 +142,32 @@ QueryNode::Build(const QueryNode * parent, const QueryNodeResultFactory & factor qt->setUniqueId(queryRep.getUniqueId()); qt->setRanked( ! queryRep.hasNoRankFlag()); if (allowRewrite && possibleFloat(*qt, ssTerm) && factory.allow_float_terms_rewrite(ssIndex)) { - auto phrase = std::make_unique(factory.create(), ssIndex, arity); - auto dotPos = ssTerm.find('.'); - phrase->add_term(std::make_unique(factory.create(), ssTerm.substr(0, dotPos), ssIndex, TermType::WORD, normalize_mode)); - phrase->add_term(std::make_unique(factory.create(), ssTerm.substr(dotPos + 1), ssIndex, TermType::WORD, normalize_mode)); - auto eqn = std::make_unique(factory.create(), 2); - eqn->add_term(std::move(qt)); - eqn->add_term(std::move(phrase)); - qn = std::move(eqn); + /* + * Tokenize number term and make add alternative + * phrase or term when searching for numbers in string + * fields. See + * CreateBlueprintVisitorHelper::handleNumberTermAsText() + * for similar code used for indexed search. + */ + SplitFloat splitter(ssTerm); + std::unique_ptr alt_qt; + if (splitter.parts() > 1) { + auto phrase = std::make_unique(factory.create(), ssIndex, splitter.parts()); + for (size_t i = 0; i < splitter.parts(); ++i) { + phrase->add_term(std::make_unique(factory.create(), splitter.getPart(i), ssIndex, TermType::WORD, normalize_mode)); + } + alt_qt = std::move(phrase); + } else if (splitter.parts() == 1 && ssTerm != splitter.getPart(0)) { + alt_qt = std::make_unique(factory.create(), splitter.getPart(0), ssIndex, TermType::WORD, normalize_mode); + } + if (alt_qt) { + auto eqn = std::make_unique(factory.create(), 2); + eqn->add_term(std::move(qt)); + eqn->add_term(std::move(alt_qt)); + qn = std::move(eqn); + } else { + qn = std::move(qt); + } } else { qn = std::move(qt); }