From 249dc4d9eba597175ca3d6126a3f9b3be43964d7 Mon Sep 17 00:00:00 2001 From: yanliang567 <82361606+yanliang567@users.noreply.github.com> Date: Tue, 20 Aug 2024 14:20:56 +0800 Subject: [PATCH] test: Add tests for upsert with auto id (#35556) Related issue: #34668 --------- Signed-off-by: yanliang567 --- .../python_client/base/collection_wrapper.py | 4 +- tests/python_client/requirements.txt | 4 +- tests/python_client/testcases/test_insert.py | 105 +++++++++++++----- tests/python_client/testcases/test_utility.py | 4 +- 4 files changed, 81 insertions(+), 36 deletions(-) diff --git a/tests/python_client/base/collection_wrapper.py b/tests/python_client/base/collection_wrapper.py index 2bb9fcb82abe1..2fae11cbec0df 100644 --- a/tests/python_client/base/collection_wrapper.py +++ b/tests/python_client/base/collection_wrapper.py @@ -339,10 +339,10 @@ def upsert(self, data, partition_name=None, timeout=None, check_task=None, check return res, check_result @trace() - def compact(self, timeout=None, check_task=None, check_items=None, **kwargs): + def compact(self, is_clustering=False, timeout=None, check_task=None, check_items=None, **kwargs): timeout = TIMEOUT if timeout is None else timeout func_name = sys._getframe().f_code.co_name - res, check = api_request([self.collection.compact, timeout], **kwargs) + res, check = api_request([self.collection.compact, is_clustering, timeout], **kwargs) check_result = ResponseChecker(res, func_name, check_task, check_items, check, **kwargs).run() return res, check_result diff --git a/tests/python_client/requirements.txt b/tests/python_client/requirements.txt index 928f09bc0e489..523983569cdaa 100644 --- a/tests/python_client/requirements.txt +++ b/tests/python_client/requirements.txt @@ -12,8 +12,8 @@ allure-pytest==2.7.0 pytest-print==0.2.1 pytest-level==0.1.1 pytest-xdist==2.5.0 -pymilvus==2.5.0rc45 -pymilvus[bulk_writer]==2.5.0rc45 +pymilvus==2.5.0rc70 +pymilvus[bulk_writer]==2.5.0rc70 pytest-rerunfailures==9.1.1 git+https://github.com/Projectplace/pytest-tags ndg-httpsclient diff --git a/tests/python_client/testcases/test_insert.py b/tests/python_client/testcases/test_insert.py index 04bae701a4aa3..ea44b0664b89c 100644 --- a/tests/python_client/testcases/test_insert.py +++ b/tests/python_client/testcases/test_insert.py @@ -513,7 +513,7 @@ def test_insert_exceed_varchar_limit(self): data = [vectors, ["limit_1___________", "limit_2___________"], ['1', '2']] error = {ct.err_code: 999, - ct.err_msg: "invalid input, length of string exceeds max length"} + ct.err_msg: "length of string exceeds max length"} collection_w.insert( data, check_task=CheckTasks.err_res, check_items=error) @@ -815,16 +815,6 @@ def insert(thread_i): t.join() assert collection_w.num_entities == ct.default_nb * thread_num - @pytest.mark.tags(CaseLabel.L2) - @pytest.mark.skip(reason="Currently primary keys are not unique") - def test_insert_multi_threading_auto_id(self): - """ - target: test concurrent insert auto_id=True collection - method: 1.create auto_id=True collection 2.concurrent insert - expected: verify primary keys unique - """ - pass - @pytest.mark.tags(CaseLabel.L1) def test_insert_multi_times(self, dim): """ @@ -1211,11 +1201,11 @@ def test_insert_with_invalid_partition_name(self): check_items=error) @pytest.mark.tags(CaseLabel.L2) - def test_insert_invalid_with_pk_varchar_auto_id_true(self): + def test_insert_with_pk_varchar_auto_id_true(self): """ target: test insert invalid with pk varchar and auto id true method: set pk varchar max length < 18, insert data - expected: raise exception + expected: varchar pk supports auto_id=true """ string_field = cf.gen_string_field(is_primary=True, max_length=6) embedding_field = cf.gen_float_vec_field() @@ -1547,8 +1537,56 @@ def test_upsert_data_pk_exist(self, start): res = collection_w.query(exp, output_fields=[default_float_name])[0] assert [res[i][default_float_name] for i in range(upsert_nb)] == float_values.to_list() - @pytest.mark.tags(CaseLabel.L2) - def test_upsert_with_primary_key_string(self): + @pytest.mark.tags(CaseLabel.L0) + def test_upsert_with_auto_id(self): + """ + target: test upsert with auto id + method: 1. create a collection with autoID=true + 2. upsert 10 entities with non-existing pks + verify: success, and the pks are auto-generated + 3. query 10 entities to get the existing pks + 4. upsert 10 entities with existing pks + verify: success, and the pks are re-generated, and the new pks are visibly + """ + dim = 32 + collection_w, _, _, insert_ids, _ = self.init_collection_general(pre_upsert, auto_id=True, + dim=dim, insert_data=True, with_json=False) + nb = 10 + start = ct.default_nb * 10 + data = cf.gen_default_list_data(dim=dim, nb=nb, start=start, with_json=False) + res_upsert1 = collection_w.upsert(data=data)[0] + collection_w.flush() + # assert the pks are auto-generated, and num_entities increased for upsert with non_existing pks + assert res_upsert1.primary_keys[0] > insert_ids[-1] + assert collection_w.num_entities == ct.default_nb + nb + + # query 10 entities to get the existing pks + res_q = collection_w.query(expr='', limit=nb)[0] + print(f"res_q: {res_q}") + existing_pks = [res_q[i][ct.default_int64_field_name] for i in range(nb)] + existing_count = collection_w.query(expr=f"{ct.default_int64_field_name} in {existing_pks}", + output_fields=[ct.default_count_output])[0] + assert nb == existing_count[0].get(ct.default_count_output) + # upsert 10 entities with the existing pks + start = ct.default_nb * 20 + data = cf.gen_default_list_data(dim=dim, nb=nb, start=start, with_json=False) + data[0] = existing_pks + res_upsert2 = collection_w.upsert(data=data)[0] + collection_w.flush() + # assert the new pks are auto-generated again + assert res_upsert2.primary_keys[0] > res_upsert1.primary_keys[-1] + existing_count = collection_w.query(expr=f"{ct.default_int64_field_name} in {existing_pks}", + output_fields=[ct.default_count_output])[0] + assert 0 == existing_count[0].get(ct.default_count_output) + res_q = collection_w.query(expr=f"{ct.default_int64_field_name} in {res_upsert2.primary_keys}", + output_fields=["*"])[0] + assert nb == len(res_q) + current_count = collection_w.query(expr='', output_fields=[ct.default_count_output])[0] + assert current_count[0].get(ct.default_count_output) == ct.default_nb + nb + + @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.parametrize("auto_id", [True, False]) + def test_upsert_with_primary_key_string(self, auto_id): """ target: test upsert with string primary key method: 1. create a collection with pk string @@ -1558,11 +1596,18 @@ def test_upsert_with_primary_key_string(self): """ c_name = cf.gen_unique_str(pre_upsert) fields = [cf.gen_string_field(), cf.gen_float_vec_field(dim=ct.default_dim)] - schema = cf.gen_collection_schema(fields=fields, primary_field=ct.default_string_field_name) + schema = cf.gen_collection_schema(fields=fields, primary_field=ct.default_string_field_name, + auto_id=auto_id) collection_w = self.init_collection_wrap(name=c_name, schema=schema) vectors = [[random.random() for _ in range(ct.default_dim)] for _ in range(2)] - collection_w.insert([["a", "b"], vectors]) - collection_w.upsert([[" a", "b "], vectors]) + if not auto_id: + collection_w.insert([["a", "b"], vectors]) + res_upsert = collection_w.upsert([[" a", "b "], vectors])[0] + assert res_upsert.primary_keys[0] == " a" and res_upsert.primary_keys[1] == "b " + else: + collection_w.insert([vectors]) + res_upsert = collection_w.upsert([[" a", "b "], vectors])[0] + assert res_upsert.primary_keys[0] != " a" and res_upsert.primary_keys[1] != "b " assert collection_w.num_entities == 4 @pytest.mark.tags(CaseLabel.L2) @@ -2046,7 +2091,7 @@ def test_upsert_partition_name_nonexistent(self): check_task=CheckTasks.err_res, check_items=error) @pytest.mark.tags(CaseLabel.L2) - @pytest.mark.skip("insert and upsert have removed the [] error check") + @pytest.mark.xfail("insert and upsert have removed the [] error check") def test_upsert_multi_partitions(self): """ target: test upsert two partitions @@ -2066,20 +2111,20 @@ def test_upsert_multi_partitions(self): check_task=CheckTasks.err_res, check_items=error) @pytest.mark.tags(CaseLabel.L2) - @pytest.mark.skip(reason="smellthemoon: behavior changed") - def test_upsert_with_auto_id(self): + def test_upsert_with_auto_id_pk_type_dismacth(self): """ - target: test upsert with auto id - method: 1. create a collection with autoID=true - 2. upsert data no pk + target: test upsert with auto_id and pk type dismatch + method: 1. create a collection with pk int64 and auto_id=True + 2. upsert with pk string type dismatch expected: raise exception """ - collection_w = self.init_collection_general(pre_upsert, auto_id=True, is_index=False)[0] - error = {ct.err_code: 999, - ct.err_msg: "Upsert don't support autoid == true"} - float_vec_values = cf.gen_vectors(ct.default_nb, ct.default_dim) - data = [[np.float32(i) for i in range(ct.default_nb)], [str(i) for i in range(ct.default_nb)], - float_vec_values] + dim = 16 + collection_w = self.init_collection_general(pre_upsert, auto_id=False, + dim=dim, insert_data=True, with_json=False)[0] + nb = 10 + data = cf.gen_default_list_data(dim=dim, nb=nb, with_json=False) + data[0] = [str(i) for i in range(nb)] + error = {ct.err_code: 999, ct.err_msg: "The Input data type is inconsistent with defined schema"} collection_w.upsert(data=data, check_task=CheckTasks.err_res, check_items=error) @pytest.mark.tags(CaseLabel.L2) diff --git a/tests/python_client/testcases/test_utility.py b/tests/python_client/testcases/test_utility.py index f4eccf19597cc..ee578b0d8efeb 100644 --- a/tests/python_client/testcases/test_utility.py +++ b/tests/python_client/testcases/test_utility.py @@ -731,7 +731,7 @@ def test_index_process_collection_empty(self): cw = self.init_collection_wrap(name=c_name) self.index_wrap.init_index(cw.collection, default_field_name, default_index_params) res, _ = self.utility_wrap.index_building_progress(c_name) - exp_res = {'total_rows': 0, 'indexed_rows': 0, 'pending_index_rows': 0} + exp_res = {'total_rows': 0, 'indexed_rows': 0, 'pending_index_rows': 0, 'state': 'Finished'} assert res == exp_res @pytest.mark.tags(CaseLabel.L2) @@ -822,7 +822,7 @@ def test_wait_index_collection_empty(self): cw.create_index(default_field_name, default_index_params) assert self.utility_wrap.wait_for_index_building_complete(c_name)[0] res, _ = self.utility_wrap.index_building_progress(c_name) - exp_res = {'total_rows': 0, 'indexed_rows': 0, 'pending_index_rows': 0} + exp_res = {'total_rows': 0, 'indexed_rows': 0, 'pending_index_rows': 0, 'state': 'Finished'} assert res == exp_res @pytest.mark.tags(CaseLabel.L1)