Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

enhance: Rename textmatch to text_match #37290

Merged
merged 1 commit into from
Nov 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion internal/parser/planparserv2/Plan.g4
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ NE: '!=';

LIKE: 'like' | 'LIKE';
EXISTS: 'exists' | 'EXISTS';
TEXTMATCH: 'TextMatch'|'textmatch'|'TEXTMATCH';
TEXTMATCH: 'text_match'|'TEXT_MATCH';

ADD: '+';
SUB: '-';
Expand Down
2 changes: 1 addition & 1 deletion internal/parser/planparserv2/generated/PlanLexer.interp

Large diffs are not rendered by default.

648 changes: 322 additions & 326 deletions internal/parser/planparserv2/generated/plan_lexer.go

Large diffs are not rendered by default.

6 changes: 3 additions & 3 deletions internal/parser/planparserv2/plan_parser_v2_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -216,15 +216,15 @@ func TestExpr_TextMatch(t *testing.T) {
assert.NoError(t, err)

exprStrs := []string{
`TextMatch(VarCharField, "query")`,
`text_match(VarCharField, "query")`,
}
for _, exprStr := range exprStrs {
assertValidExpr(t, helper, exprStr)
}

unsupported := []string{
`TextMatch(not_exist, "query")`,
`TextMatch(BoolField, "query")`,
`text_match(not_exist, "query")`,
`text_match(BoolField, "query")`,
}
for _, exprStr := range unsupported {
assertInvalidExpr(t, helper, exprStr)
Expand Down
4 changes: 2 additions & 2 deletions tests/python_client/chaos/checker.py
Original file line number Diff line number Diff line change
Expand Up @@ -1397,7 +1397,7 @@ def __init__(self, collection_name=None, shards_num=2, replica_number=1, schema=
self.c_wrap.load(replica_number=replica_number) # do load before query
self.insert_data()
key_word = self.word_freq.most_common(1)[0][0]
self.term_expr = f"TextMatch({self.text_field_name}, '{key_word}')"
self.term_expr = f"TEXT_MATCH({self.text_field_name}, '{key_word}')"

@trace()
def query(self):
Expand All @@ -1408,7 +1408,7 @@ def query(self):
@exception_handler()
def run_task(self):
key_word = self.word_freq.most_common(1)[0][0]
self.term_expr = f"TextMatch({self.text_field_name}, '{key_word}')"
self.term_expr = f"TEXT_MATCH({self.text_field_name}, '{key_word}')"
res, result = self.query()
return res, result

Expand Down
2 changes: 1 addition & 1 deletion tests/python_client/common/common_func.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,7 @@ def generate_text_match_expr(query_dict):

def process_node(node):
if isinstance(node, dict) and 'field' in node and 'value' in node:
return f"TextMatch({node['field']}, '{node['value']}')"
return f"TEXT_MATCH({node['field']}, '{node['value']}')"
elif isinstance(node, dict) and 'not' in node:
return f"not {process_node(node['not'])}"
elif isinstance(node, list):
Expand Down
6 changes: 3 additions & 3 deletions tests/python_client/testcases/test_bulk_insert.py
Original file line number Diff line number Diff line change
Expand Up @@ -899,7 +899,7 @@ def test_bulk_insert_all_field_with_new_json_format(self, auto_id, dim, entities
query_data = [r[expr_field] for r in res][:len(self.collection_wrap.partitions)]
res, _ = self.collection_wrap.query(expr=f"{expr_field} in {query_data}", output_fields=[expr_field])
assert len(res) == len(query_data)
res, _ = self.collection_wrap.query(expr=f"TextMatch({df.text_field}, 'milvus')", output_fields=[df.text_field])
res, _ = self.collection_wrap.query(expr=f"text_match({df.text_field}, 'milvus')", output_fields=[df.text_field])
if nullable is False:
assert len(res) == entities
else:
Expand Down Expand Up @@ -1052,7 +1052,7 @@ def test_bulk_insert_all_field_with_numpy(self, auto_id, dim, entities, enable_d
query_data = [r[df.string_field] for r in res][:len(self.collection_wrap.partitions)]
res, _ = self.collection_wrap.query(expr=f"{df.string_field} in {query_data}", output_fields=[df.string_field])
assert len(res) == len(query_data)
res, _ = self.collection_wrap.query(expr=f"TextMatch({df.text_field}, 'milvus')", output_fields=[df.text_field])
res, _ = self.collection_wrap.query(expr=f"TEXT_MATCH({df.text_field}, 'milvus')", output_fields=[df.text_field])
if nullable is False:
assert len(res) == entities
else:
Expand Down Expand Up @@ -1218,7 +1218,7 @@ def test_bulk_insert_all_field_with_parquet(self, auto_id, dim, entities, enable
query_data = [r[expr_field] for r in res][:len(self.collection_wrap.partitions)]
res, _ = self.collection_wrap.query(expr=f"{expr_field} in {query_data}", output_fields=[expr_field])
assert len(res) == len(query_data)
res, _ = self.collection_wrap.query(expr=f"TextMatch({df.text_field}, 'milvus')", output_fields=[df.text_field])
res, _ = self.collection_wrap.query(expr=f"TEXT_MATCH({df.text_field}, 'milvus')", output_fields=[df.text_field])
if not nullable:
assert len(res) == entities
else:
Expand Down
4 changes: 2 additions & 2 deletions tests/python_client/testcases/test_full_text_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -2236,7 +2236,7 @@ def test_full_text_search_default(
token = random.choice(tokens)
search_data = [fake.text().lower() + f" {token} " for _ in range(nq)]
if expr == "text_match":
filter = f"TextMatch(text, '{token}')"
filter = f"TEXT_MATCH(text, '{token}')"
res, _ = collection_w.query(
expr=filter,
)
Expand Down Expand Up @@ -2431,7 +2431,7 @@ def test_full_text_search_with_jieba_tokenizer(
limit = 100
search_data = [fake.text().lower() + " " + random.choice(tokens) for _ in range(nq)]
if expr == "text_match":
filter = f"TextMatch(text, '{tokens[0]}')"
filter = f"text_match(text, '{tokens[0]}')"
res, _ = collection_w.query(
expr=filter,
)
Expand Down
26 changes: 13 additions & 13 deletions tests/python_client/testcases/test_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -4538,7 +4538,7 @@ def test_query_text_match_normal(
# query single field for one token
for field in text_fields:
token = wf_map[field].most_common()[0][0]
expr = f"TextMatch({field}, '{token}')"
expr = f"text_match({field}, '{token}')"
log.info(f"expr: {expr}")
res, _ = collection_w.query(expr=expr, output_fields=["id", field])
assert len(res) > 0
Expand All @@ -4562,7 +4562,7 @@ def test_query_text_match_normal(
for word, count in wf_map[field].most_common(10):
top_10_tokens.append(word)
string_of_top_10_words = " ".join(top_10_tokens)
expr = f"TextMatch({field}, '{string_of_top_10_words}')"
expr = f"text_match({field}, '{string_of_top_10_words}')"
log.info(f"expr {expr}")
res, _ = collection_w.query(expr=expr, output_fields=["id", field])
log.info(f"res len {len(res)}")
Expand Down Expand Up @@ -4677,7 +4677,7 @@ def test_query_text_match_custom_analyzer(self):
# query single field for one word
for field in text_fields:
token = list(wf_map[field].keys())[0]
expr = f"TextMatch({field}, '{token}')"
expr = f"text_match({field}, '{token}')"
log.info(f"expr: {expr}")
res, _ = collection_w.query(expr=expr, output_fields=["id", field])
log.info(f"res len {len(res)}")
Expand All @@ -4691,7 +4691,7 @@ def test_query_text_match_custom_analyzer(self):
for word, count in wf_map[field].most_common(10):
top_10_tokens.append(word)
string_of_top_10_words = " ".join(top_10_tokens)
expr = f"TextMatch({field}, '{string_of_top_10_words}')"
expr = f"text_match({field}, '{string_of_top_10_words}')"
log.info(f"expr {expr}")
res, _ = collection_w.query(expr=expr, output_fields=["id", field])
log.info(f"res len {len(res)}")
Expand Down Expand Up @@ -4793,7 +4793,7 @@ def test_query_text_match_with_combined_expression_for_single_field(self):
wf_counter = Counter(wf_map[field])
pd_tmp_res_list = []
for word, count in wf_counter.most_common(2):
tmp = f"TextMatch({field}, '{word}')"
tmp = f"text_match({field}, '{word}')"
log.info(f"tmp expr {tmp}")
expr_list.append(tmp)
manual_result = df_new[
Expand Down Expand Up @@ -5074,7 +5074,7 @@ def test_query_text_match_with_multi_lang(self):
# query single field for one word
for field in text_fields:
token = wf_map[field].most_common()[-1][0]
expr = f"TextMatch({field}, '{token}')"
expr = f"text_match({field}, '{token}')"
log.info(f"expr: {expr}")
res, _ = collection_w.query(expr=expr, output_fields=["id", field])
log.info(f"res len {len(res)}")
Expand All @@ -5089,7 +5089,7 @@ def test_query_text_match_with_multi_lang(self):
for word, count in wf_map[field].most_common(3):
multi_words.append(word)
string_of_multi_words = " ".join(multi_words)
expr = f"TextMatch({field}, '{string_of_multi_words}')"
expr = f"text_match({field}, '{string_of_multi_words}')"
log.info(f"expr {expr}")
res, _ = collection_w.query(expr=expr, output_fields=["id", field])
log.info(f"res len {len(res)}")
Expand Down Expand Up @@ -5194,7 +5194,7 @@ def test_query_text_match_with_addition_inverted_index(self):
# query single field for one word
for field in text_fields:
token = wf_map[field].most_common()[-1][0]
expr = f"TextMatch({field}, '{token}')"
expr = f"text_match({field}, '{token}')"
log.info(f"expr: {expr}")
res, _ = collection_w.query(expr=expr, output_fields=["id", field])
pandas_res = df_split[df_split.apply(lambda row: token in row[field], axis=1)]
Expand Down Expand Up @@ -5311,7 +5311,7 @@ def test_query_text_match_with_non_varchar_fields_expr(self, combine_op):
# query single field for one word
for field in text_fields:
token = wf_map[field].most_common()[0][0]
tm_expr = f"TextMatch({field}, '{token}')"
tm_expr = f"text_match({field}, '{token}')"
int_expr = "age > 10"
combined_expr = f"{tm_expr} {combine_op} {int_expr}"
log.info(f"expr: {combined_expr}")
Expand Down Expand Up @@ -5445,7 +5445,7 @@ def test_query_text_match_with_some_empty_string(self):
# query single field for one word
for field in text_fields:
token = wf_map[field].most_common()[-1][0]
expr = f"TextMatch({field}, '{token}')"
expr = f"text_match({field}, '{token}')"
log.info(f"expr: {expr}")
res, _ = collection_w.query(expr=expr, output_fields=["id", field])
log.info(f"res len {len(res)}")
Expand All @@ -5459,7 +5459,7 @@ def test_query_text_match_with_some_empty_string(self):
for word, count in wf_map[field].most_common(3):
multi_words.append(word)
string_of_multi_words = " ".join(multi_words)
expr = f"TextMatch({field}, '{string_of_multi_words}')"
expr = f"text_match({field}, '{string_of_multi_words}')"
log.info(f"expr {expr}")
res, _ = collection_w.query(expr=expr, output_fields=["id", field])
log.info(f"res len {len(res)}")
Expand Down Expand Up @@ -5563,7 +5563,7 @@ def test_query_text_match_with_nullable(self):
# query single field for one word
for field in text_fields:
token = wf_map[field].most_common()[-1][0]
expr = f"TextMatch({field}, '{token}')"
expr = f"text_match({field}, '{token}')"
log.info(f"expr: {expr}")
res, _ = collection_w.query(expr=expr, output_fields=text_fields)
log.info(f"res len {len(res)}, \n{res}")
Expand All @@ -5577,7 +5577,7 @@ def test_query_text_match_with_nullable(self):
for word, count in wf_map[field].most_common(3):
multi_words.append(word)
string_of_multi_words = " ".join(multi_words)
expr = f"TextMatch({field}, '{string_of_multi_words}')"
expr = f"text_match({field}, '{string_of_multi_words}')"
log.info(f"expr {expr}")
res, _ = collection_w.query(expr=expr, output_fields=text_fields)
log.info(f"res len {len(res)}, {res}")
Expand Down
4 changes: 2 additions & 2 deletions tests/python_client/testcases/test_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -13402,7 +13402,7 @@ def test_search_with_text_match_filter_normal(
search_data = [[random.random() for _ in range(dim)]]
for field in text_fields:
token = wf_map[field].most_common()[0][0]
expr = f"TextMatch({field}, '{token}')"
expr = f"text_match({field}, '{token}')"
manual_result = df_split[
df_split.apply(lambda row: token in row[field], axis=1)
]
Expand All @@ -13427,7 +13427,7 @@ def test_search_with_text_match_filter_normal(
for word, count in wf_map[field].most_common(10):
top_10_tokens.append(word)
string_of_top_10_words = " ".join(top_10_tokens)
expr = f"TextMatch({field}, '{string_of_top_10_words}')"
expr = f"text_match({field}, '{string_of_top_10_words}')"
log.info(f"expr {expr}")
res_list, _ = collection_w.search(
data=search_data,
Expand Down
4 changes: 2 additions & 2 deletions tests/restful_client_v2/testcases/test_vector_operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -1977,7 +1977,7 @@ def test_search_vector_with_text_match_filter(self, tokenizer):
vector_to_search = [[random.random() for _ in range(dim)]]
for field in text_fields:
token = wf_map[field].most_common()[0][0]
expr = f"TextMatch({field}, '{token}')"
expr = f"text_match({field}, '{token}')"
logger.info(f"expr: {expr}")
rsp = self.vector_client.vector_search({"collectionName": name, "data":vector_to_search, "filter": f"{expr}", "outputFields": ["*"]})
assert rsp['code'] == 0, rsp
Expand Down Expand Up @@ -2813,7 +2813,7 @@ def test_query_vector_with_text_match_filter(self, tokenizer):
time.sleep(5)
for field in text_fields:
token = wf_map[field].most_common()[0][0]
expr = f"TextMatch({field}, '{token}')"
expr = f"text_match({field}, '{token}')"
logger.info(f"expr: {expr}")
rsp = self.vector_client.vector_query({"collectionName": name, "filter": f"{expr}", "outputFields": ["*"]})
assert rsp['code'] == 0, rsp
Expand Down
Loading