diff --git a/tests/python_client/base/collection_wrapper.py b/tests/python_client/base/collection_wrapper.py index bca67fefa6da1..1a01ab5706f10 100644 --- a/tests/python_client/base/collection_wrapper.py +++ b/tests/python_client/base/collection_wrapper.py @@ -336,10 +336,10 @@ def upsert(self, data, partition_name=None, timeout=None, check_task=None, check return res, check_result @trace() - def compact(self, timeout=None, check_task=None, check_items=None, **kwargs): + def compact(self, is_clustering=False, timeout=None, check_task=None, check_items=None, **kwargs): timeout = TIMEOUT if timeout is None else timeout func_name = sys._getframe().f_code.co_name - res, check = api_request([self.collection.compact, timeout], **kwargs) + res, check = api_request([self.collection.compact, is_clustering, timeout], **kwargs) check_result = ResponseChecker(res, func_name, check_task, check_items, check, **kwargs).run() return res, check_result diff --git a/tests/python_client/requirements.txt b/tests/python_client/requirements.txt index 9c6d3e555dab6..82b5fb33a69fc 100644 --- a/tests/python_client/requirements.txt +++ b/tests/python_client/requirements.txt @@ -12,8 +12,8 @@ allure-pytest==2.7.0 pytest-print==0.2.1 pytest-level==0.1.1 pytest-xdist==2.5.0 -pymilvus==2.4.5rc11 -pymilvus[bulk_writer]==2.4.5rc11 +pymilvus==2.4.6rc4 +pymilvus[bulk_writer]==2.4.6rc4 pytest-rerunfailures==9.1.1 git+https://github.com/Projectplace/pytest-tags ndg-httpsclient diff --git a/tests/python_client/testcases/test_alias.py b/tests/python_client/testcases/test_alias.py index c0f2dbda5e4be..c3d18d53cbfc3 100644 --- a/tests/python_client/testcases/test_alias.py +++ b/tests/python_client/testcases/test_alias.py @@ -74,10 +74,9 @@ def test_alias_create_operation_default(self): alias_name = cf.gen_unique_str(prefix) self.utility_wrap.create_alias(collection_w.name, alias_name) - collection_alias, _ = self.collection_wrap.init_collection(name=alias_name, - check_task=CheckTasks.check_collection_property, - check_items={exp_name: alias_name, - exp_schema: default_schema}) + collection_alias = self.init_collection_wrap(name=alias_name, + check_task=CheckTasks.check_collection_property, + check_items={exp_name: alias_name, exp_schema: default_schema}) # assert collection is equal to alias according to partitions assert [p.name for p in collection_w.partitions] == [ p.name for p in collection_alias.partitions] @@ -110,10 +109,9 @@ def test_alias_alter_operation_default(self): alias_a_name = cf.gen_unique_str(prefix) self.utility_wrap.create_alias(collection_1.name, alias_a_name) - collection_alias_a, _ = self.collection_wrap.init_collection(name=alias_a_name, - check_task=CheckTasks.check_collection_property, - check_items={exp_name: alias_a_name, - exp_schema: default_schema}) + collection_alias_a = self.init_collection_wrap(name=alias_a_name, + check_task=CheckTasks.check_collection_property, + check_items={exp_name: alias_a_name, exp_schema: default_schema}) # assert collection is equal to alias according to partitions assert [p.name for p in collection_1.partitions] == [ p.name for p in collection_alias_a.partitions] @@ -132,10 +130,9 @@ def test_alias_alter_operation_default(self): alias_b_name = cf.gen_unique_str(prefix) self.utility_wrap.create_alias(collection_2.name, alias_b_name) - collection_alias_b, _ = self.collection_wrap.init_collection(name=alias_b_name, - check_task=CheckTasks.check_collection_property, - check_items={exp_name: alias_b_name, - exp_schema: default_schema}) + collection_alias_b = self.init_collection_wrap(name=alias_b_name, + check_task=CheckTasks.check_collection_property, + check_items={exp_name: alias_b_name, exp_schema: default_schema}) # assert collection is equal to alias according to partitions assert [p.name for p in collection_2.partitions] == [ p.name for p in collection_alias_b.partitions] @@ -177,10 +174,9 @@ def test_alias_drop_operation_default(self): alias_name = cf.gen_unique_str(prefix) self.utility_wrap.create_alias(collection_w.name, alias_name) # collection_w.create_alias(alias_name) - collection_alias, _ = self.collection_wrap.init_collection(name=alias_name, - check_task=CheckTasks.check_collection_property, - check_items={exp_name: alias_name, - exp_schema: default_schema}) + collection_alias = self.init_collection_wrap(name=alias_name, + check_task=CheckTasks.check_collection_property, + check_items={exp_name: alias_name, exp_schema: default_schema}) # assert collection is equal to alias according to partitions assert [p.name for p in collection_w.partitions] == [ p.name for p in collection_alias.partitions] @@ -406,7 +402,7 @@ def test_enable_mmap_by_alias(self): """ self._connect() c_name = cf.gen_unique_str("collection") - collection_w, _ = self.collection_wrap.init_collection(c_name, schema=default_schema) + collection_w = self.init_collection_wrap(c_name, schema=default_schema) alias_name = cf.gen_unique_str(prefix) self.utility_wrap.create_alias(collection_w.name, alias_name) collection_alias, _ = self.collection_wrap.init_collection(name=alias_name, @@ -414,7 +410,7 @@ def test_enable_mmap_by_alias(self): check_items={exp_name: alias_name, exp_schema: default_schema}) collection_alias.set_properties({'mmap.enabled': True}) - pro = collection_w.describe().get("properties") + pro = collection_w.describe()[0].get("properties") assert pro["mmap.enabled"] == 'True' collection_w.set_properties({'mmap.enabled': False}) pro = collection_alias.describe().get("properties") diff --git a/tests/python_client/testcases/test_collection.py b/tests/python_client/testcases/test_collection.py index 361f869cb3fa8..f0257896be47a 100644 --- a/tests/python_client/testcases/test_collection.py +++ b/tests/python_client/testcases/test_collection.py @@ -616,7 +616,7 @@ def test_collection_auto_id_inconsistent(self, auto_id): int_field = cf.gen_int64_field(is_primary=True, auto_id=auto_id) vec_field = cf.gen_float_vec_field(name='vec') schema, _ = self.collection_schema_wrap.init_collection_schema([int_field, vec_field], auto_id=not auto_id) - collection_w = self.collection_wrap.init_collection(cf.gen_unique_str(prefix), schema=schema)[0] + collection_w = self.init_collection_wrap(cf.gen_unique_str(prefix), schema=schema) assert collection_w.schema.auto_id is auto_id diff --git a/tests/python_client/testcases/test_index.py b/tests/python_client/testcases/test_index.py index 5167fb45b30b1..6dbb5f1f46d79 100644 --- a/tests/python_client/testcases/test_index.py +++ b/tests/python_client/testcases/test_index.py @@ -923,24 +923,24 @@ def test_turn_off_index_mmap(self): """ self._connect() c_name = cf.gen_unique_str(prefix) - collection_w, _ = self.collection_wrap.init_collection(c_name, schema=default_schema) + collection_w = self.init_collection_wrap(c_name, schema=default_schema) collection_w.insert(cf.gen_default_list_data()) collection_w.create_index(ct.default_float_vec_field_name, default_index_params, index_name=ct.default_index_name) collection_w.alter_index(ct.default_index_name, {'mmap.enabled': True}) - assert collection_w.index().params["mmap.enabled"] == 'True' + assert collection_w.index()[0].params["mmap.enabled"] == 'True' collection_w.load() collection_w.release() collection_w.alter_index(ct.default_index_name, {'mmap.enabled': False}) collection_w.load() - assert collection_w.index().params["mmap.enabled"] == 'False' + assert collection_w.index()[0].params["mmap.enabled"] == 'False' vectors = [[random.random() for _ in range(default_dim)] for _ in range(default_nq)] collection_w.search(vectors[:default_nq], default_search_field, default_search_params, default_limit, default_search_exp) collection_w.release() collection_w.alter_index(ct.default_index_name, {'mmap.enabled': True}) - assert collection_w.index().params["mmap.enabled"] == 'True' + assert collection_w.index()[0].params["mmap.enabled"] == 'True' collection_w.load() collection_w.search(vectors[:default_nq], default_search_field, default_search_params, default_limit, @@ -958,12 +958,11 @@ def test_drop_mmap_index(self, index, params): expected: search success """ self._connect() - c_name = cf.gen_unique_str(prefix) - collection_w, _ = self.collection_wrap.init_collection(c_name, schema=cf.gen_default_collection_schema()) + collection_w = self.init_collection_general(prefix, insert_data=True, is_index=False)[0] default_index = {"index_type": index, "params": params, "metric_type": "L2"} collection_w.create_index(field_name, default_index, index_name=f"mmap_index_{index}") collection_w.alter_index(f"mmap_index_{index}", {'mmap.enabled': True}) - assert collection_w.index().params["mmap.enabled"] == 'True' + assert collection_w.index()[0].params["mmap.enabled"] == 'True' collection_w.drop_index(index_name=f"mmap_index_{index}") collection_w.create_index(field_name, default_index, index_name=f"index_{index}") collection_w.load() @@ -984,21 +983,21 @@ def test_rebuild_mmap_index(self): """ self._connect() c_name = cf.gen_unique_str(prefix) - collection_w, _ = self.collection_wrap.init_collection(c_name, schema=default_schema) + collection_w = self.init_collection_general(c_name, insert_data=True, is_index=False)[0] collection_w.create_index(ct.default_float_vec_field_name, default_index_params, index_name=ct.default_index_name) collection_w.set_properties({'mmap.enabled': True}) - pro = collection_w.describe().get("properties") + pro = collection_w.describe()[0].get("properties") assert pro["mmap.enabled"] == 'True' collection_w.alter_index(ct.default_index_name, {'mmap.enabled': True}) - assert collection_w.index().params["mmap.enabled"] == 'True' + assert collection_w.index()[0].params["mmap.enabled"] == 'True' collection_w.insert(cf.gen_default_list_data()) collection_w.flush() # check if mmap works after rebuild index collection_w.create_index(ct.default_float_vec_field_name, default_index_params, index_name=ct.default_index_name) - assert collection_w.index().params["mmap.enabled"] == 'True' + assert collection_w.index()[0].params["mmap.enabled"] == 'True' collection_w.load() collection_w.release() @@ -1006,8 +1005,8 @@ def test_rebuild_mmap_index(self): # check if mmap works after reloading and rebuilding index. collection_w.create_index(ct.default_float_vec_field_name, default_index_params, index_name=ct.default_index_name) - assert collection_w.index().params["mmap.enabled"] == 'True' - pro = collection_w.describe().get("properties") + assert collection_w.index()[0].params["mmap.enabled"] == 'True' + pro = collection_w.describe()[0].get("properties") assert pro["mmap.enabled"] == 'True' collection_w.load() @@ -1305,7 +1304,7 @@ def test_create_inverted_index_on_array_field(self): """ target: test create scalar index on array field method: 1.create collection, and create index - expected: Raise exception + expected: supported create inverted index on array since 2.4.x """ # 1. create a collection schema = cf.gen_array_collection_schema() @@ -1313,6 +1312,9 @@ def test_create_inverted_index_on_array_field(self): # 2. create index scalar_index_params = {"index_type": "INVERTED"} collection_w.create_index(ct.default_int32_array_field_name, index_params=scalar_index_params) + res, _ = self.utility_wrap.index_building_progress(collection_w.name, ct.default_int32_array_field_name) + exp_res = {'total_rows': 0, 'indexed_rows': 0, 'pending_index_rows': 0, 'state': 'Finished'} + assert res == exp_res @pytest.mark.tags(CaseLabel.L1) def test_create_inverted_index_no_vector_index(self): diff --git a/tests/python_client/testcases/test_insert.py b/tests/python_client/testcases/test_insert.py index 66bd54cce5f53..91d7766c4d4a8 100644 --- a/tests/python_client/testcases/test_insert.py +++ b/tests/python_client/testcases/test_insert.py @@ -513,7 +513,7 @@ def test_insert_exceed_varchar_limit(self): data = [vectors, ["limit_1___________", "limit_2___________"], ['1', '2']] error = {ct.err_code: 999, - ct.err_msg: "invalid input, length of string exceeds max length"} + ct.err_msg: "length of string exceeds max length"} collection_w.insert( data, check_task=CheckTasks.err_res, check_items=error) @@ -815,16 +815,6 @@ def insert(thread_i): t.join() assert collection_w.num_entities == ct.default_nb * thread_num - @pytest.mark.tags(CaseLabel.L2) - @pytest.mark.skip(reason="Currently primary keys are not unique") - def test_insert_multi_threading_auto_id(self): - """ - target: test concurrent insert auto_id=True collection - method: 1.create auto_id=True collection 2.concurrent insert - expected: verify primary keys unique - """ - pass - @pytest.mark.tags(CaseLabel.L1) def test_insert_multi_times(self, dim): """ @@ -1211,11 +1201,11 @@ def test_insert_with_invalid_partition_name(self): check_items=error) @pytest.mark.tags(CaseLabel.L2) - def test_insert_invalid_with_pk_varchar_auto_id_true(self): + def test_insert_with_pk_varchar_auto_id_true(self): """ target: test insert invalid with pk varchar and auto id true method: set pk varchar max length < 18, insert data - expected: raise exception + expected: varchar pk supports auto_id=true """ string_field = cf.gen_string_field(is_primary=True, max_length=6) embedding_field = cf.gen_float_vec_field() @@ -1547,8 +1537,56 @@ def test_upsert_data_pk_exist(self, start): res = collection_w.query(exp, output_fields=[default_float_name])[0] assert [res[i][default_float_name] for i in range(upsert_nb)] == float_values.to_list() - @pytest.mark.tags(CaseLabel.L2) - def test_upsert_with_primary_key_string(self): + @pytest.mark.tags(CaseLabel.L0) + def test_upsert_with_auto_id(self): + """ + target: test upsert with auto id + method: 1. create a collection with autoID=true + 2. upsert 10 entities with non-existing pks + verify: success, and the pks are auto-generated + 3. query 10 entities to get the existing pks + 4. upsert 10 entities with existing pks + verify: success, and the pks are re-generated, and the new pks are visibly + """ + dim = 32 + collection_w, _, _, insert_ids, _ = self.init_collection_general(pre_upsert, auto_id=True, + dim=dim, insert_data=True, with_json=False) + nb = 10 + start = ct.default_nb * 10 + data = cf.gen_default_list_data(dim=dim, nb=nb, start=start, with_json=False) + res_upsert1 = collection_w.upsert(data=data)[0] + collection_w.flush() + # assert the pks are auto-generated, and num_entities increased for upsert with non_existing pks + assert res_upsert1.primary_keys[0] > insert_ids[-1] + assert collection_w.num_entities == ct.default_nb + nb + + # query 10 entities to get the existing pks + res_q = collection_w.query(expr='', limit=nb)[0] + print(f"res_q: {res_q}") + existing_pks = [res_q[i][ct.default_int64_field_name] for i in range(nb)] + existing_count = collection_w.query(expr=f"{ct.default_int64_field_name} in {existing_pks}", + output_fields=[ct.default_count_output])[0] + assert nb == existing_count[0].get(ct.default_count_output) + # upsert 10 entities with the existing pks + start = ct.default_nb * 20 + data = cf.gen_default_list_data(dim=dim, nb=nb, start=start, with_json=False) + data[0] = existing_pks + res_upsert2 = collection_w.upsert(data=data)[0] + collection_w.flush() + # assert the new pks are auto-generated again + assert res_upsert2.primary_keys[0] > res_upsert1.primary_keys[-1] + existing_count = collection_w.query(expr=f"{ct.default_int64_field_name} in {existing_pks}", + output_fields=[ct.default_count_output])[0] + assert 0 == existing_count[0].get(ct.default_count_output) + res_q = collection_w.query(expr=f"{ct.default_int64_field_name} in {res_upsert2.primary_keys}", + output_fields=["*"])[0] + assert nb == len(res_q) + current_count = collection_w.query(expr='', output_fields=[ct.default_count_output])[0] + assert current_count[0].get(ct.default_count_output) == ct.default_nb + nb + + @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.parametrize("auto_id", [True, False]) + def test_upsert_with_primary_key_string(self, auto_id): """ target: test upsert with string primary key method: 1. create a collection with pk string @@ -1558,11 +1596,18 @@ def test_upsert_with_primary_key_string(self): """ c_name = cf.gen_unique_str(pre_upsert) fields = [cf.gen_string_field(), cf.gen_float_vec_field(dim=ct.default_dim)] - schema = cf.gen_collection_schema(fields=fields, primary_field=ct.default_string_field_name) + schema = cf.gen_collection_schema(fields=fields, primary_field=ct.default_string_field_name, + auto_id=auto_id) collection_w = self.init_collection_wrap(name=c_name, schema=schema) vectors = [[random.random() for _ in range(ct.default_dim)] for _ in range(2)] - collection_w.insert([["a", "b"], vectors]) - collection_w.upsert([[" a", "b "], vectors]) + if not auto_id: + collection_w.insert([["a", "b"], vectors]) + res_upsert = collection_w.upsert([[" a", "b "], vectors])[0] + assert res_upsert.primary_keys[0] == " a" and res_upsert.primary_keys[1] == "b " + else: + collection_w.insert([vectors]) + res_upsert = collection_w.upsert([[" a", "b "], vectors])[0] + assert res_upsert.primary_keys[0] != " a" and res_upsert.primary_keys[1] != "b " assert collection_w.num_entities == 4 @pytest.mark.tags(CaseLabel.L2) @@ -2046,7 +2091,7 @@ def test_upsert_partition_name_nonexistent(self): check_task=CheckTasks.err_res, check_items=error) @pytest.mark.tags(CaseLabel.L2) - @pytest.mark.skip("insert and upsert have removed the [] error check") + @pytest.mark.xfail("insert and upsert have removed the [] error check") def test_upsert_multi_partitions(self): """ target: test upsert two partitions @@ -2065,21 +2110,20 @@ def test_upsert_multi_partitions(self): collection_w.upsert(data=data, partition_name=["partition_1", "partition_2"], check_task=CheckTasks.err_res, check_items=error) - @pytest.mark.tags(CaseLabel.L2) - @pytest.mark.skip(reason="smellthemoon: behavior changed") - def test_upsert_with_auto_id(self): + def test_upsert_with_auto_id_pk_type_dismacth(self): """ - target: test upsert with auto id - method: 1. create a collection with autoID=true - 2. upsert data no pk + target: test upsert with auto_id and pk type dismatch + method: 1. create a collection with pk int64 and auto_id=True + 2. upsert with pk string type dismatch expected: raise exception """ - collection_w = self.init_collection_general(pre_upsert, auto_id=True, is_index=False)[0] - error = {ct.err_code: 999, - ct.err_msg: "Upsert don't support autoid == true"} - float_vec_values = cf.gen_vectors(ct.default_nb, ct.default_dim) - data = [[np.float32(i) for i in range(ct.default_nb)], [str(i) for i in range(ct.default_nb)], - float_vec_values] + dim = 16 + collection_w = self.init_collection_general(pre_upsert, auto_id=False, + dim=dim, insert_data=True, with_json=False)[0] + nb = 10 + data = cf.gen_default_list_data(dim=dim, nb=nb, with_json=False) + data[0] = [str(i) for i in range(nb)] + error = {ct.err_code: 999, ct.err_msg: "The Input data type is inconsistent with defined schema"} collection_w.upsert(data=data, check_task=CheckTasks.err_res, check_items=error) @pytest.mark.tags(CaseLabel.L2) diff --git a/tests/python_client/testcases/test_issues.py b/tests/python_client/testcases/test_issues.py index 1dad8133ff23f..4b79d253a6405 100644 --- a/tests/python_client/testcases/test_issues.py +++ b/tests/python_client/testcases/test_issues.py @@ -27,7 +27,7 @@ def test_issue_30607(self, par_key_field, use_upsert): schema = cf.gen_collection_schema(fields=[pk_field, int64_field, string_field, vector_field], auto_id=False, partition_key_field=par_key_field) c_name = cf.gen_unique_str("par_key") - collection_w, _ = self.collection_wrap.init_collection(name=c_name, schema=schema, num_partitions=9) + collection_w = self.init_collection_wrap(name=c_name, schema=schema, num_partitions=9) # insert nb = 500 @@ -61,7 +61,7 @@ def test_issue_30607(self, par_key_field, use_upsert): seeds = 200 rand_ids = random.sample(range(0, num_entities), seeds) rand_ids = [str(rand_ids[i]) for i in range(len(rand_ids))] - res = collection_w.query(expr=f"pk in {rand_ids}", output_fields=["pk", par_key_field]) + res, _ = collection_w.query(expr=f"pk in {rand_ids}", output_fields=["pk", par_key_field]) # verify every the random id exists assert len(res) == len(rand_ids) @@ -69,8 +69,8 @@ def test_issue_30607(self, par_key_field, use_upsert): for i in range(len(res)): pk = res[i].get("pk") parkey_value = res[i].get(par_key_field) - res_parkey = collection_w.query(expr=f"{par_key_field}=={parkey_value} and pk=='{pk}'", - output_fields=["pk", par_key_field]) + res_parkey, _ = collection_w.query(expr=f"{par_key_field}=={parkey_value} and pk=='{pk}'", + output_fields=["pk", par_key_field]) if len(res_parkey) != 1: log.info(f"dirty data found: pk {pk} with parkey {parkey_value}") dirty_count += 1 diff --git a/tests/python_client/testcases/test_partition_key.py b/tests/python_client/testcases/test_partition_key.py index 284bad956492f..d630b32691e7f 100644 --- a/tests/python_client/testcases/test_partition_key.py +++ b/tests/python_client/testcases/test_partition_key.py @@ -24,7 +24,7 @@ def test_partition_key_on_field_schema(self, par_key_field): vector_field = cf.gen_float_vec_field() schema = cf.gen_collection_schema(fields=[pk_field, int64_field, string_field, vector_field], auto_id=True) c_name = cf.gen_unique_str("par_key") - collection_w, _ = self.collection_wrap.init_collection(name=c_name, schema=schema) + collection_w = self.init_collection_wrap(name=c_name, schema=schema) assert len(collection_w.partitions) == ct.default_partition_num # insert @@ -53,23 +53,24 @@ def test_partition_key_on_field_schema(self, par_key_field): expr=f'{int64_field.name} in [1,3,5] && {string_field.name} in ["{string_prefix}1","{string_prefix}3","{string_prefix}5"]', output_fields=[int64_field.name, string_field.name], check_task=CheckTasks.check_search_results, - check_items={"nq": nq, "limit": ct.default_limit})[0] + check_items={"nq": nq, "limit": entities_per_parkey})[0] # search with partition key filter only or with non partition key res2 = collection_w.search(data=search_vectors, anns_field=vector_field.name, param=ct.default_search_params, limit=entities_per_parkey, expr=f'{int64_field.name} in [1,3,5]', output_fields=[int64_field.name, string_field.name], check_task=CheckTasks.check_search_results, - check_items={"nq": nq, "limit": ct.default_limit})[0] + check_items={"nq": nq, "limit": entities_per_parkey})[0] # search with partition key filter only or with non partition key res3 = collection_w.search(data=search_vectors, anns_field=vector_field.name, param=ct.default_search_params, limit=entities_per_parkey, expr=f'{string_field.name} in ["{string_prefix}1","{string_prefix}3","{string_prefix}5"]', output_fields=[int64_field.name, string_field.name], check_task=CheckTasks.check_search_results, - check_items={"nq": nq, "limit": ct.default_limit})[0] + check_items={"nq": nq, "limit": entities_per_parkey})[0] # assert the results persist - assert res1.ids == res2.ids == res3.ids + for i in range(nq): + assert res1[i].ids == res2[i].ids == res3[i].ids @pytest.mark.tags(CaseLabel.L0) @pytest.mark.parametrize("par_key_field", [ct.default_int64_field_name, ct.default_string_field_name]) @@ -89,14 +90,14 @@ def test_partition_key_on_collection_schema(self, par_key_field, index_on_par_ke schema = cf.gen_collection_schema(fields=[pk_field, int64_field, string_field, vector_field], auto_id=False, partition_key_field=par_key_field) c_name = cf.gen_unique_str("par_key") - collection_w, _ = self.collection_wrap.init_collection(name=c_name, schema=schema, num_partitions=9) + collection_w = self.init_collection_wrap(name=c_name, schema=schema, num_partitions=9) # insert nb = 1000 string_prefix = cf.gen_str_by_length(length=6) entities_per_parkey = 20 for n in range(entities_per_parkey): - pk_values = [str(i) for i in range(n * nb, (n+1)*nb)] + pk_values = [str(i) for i in range(n * nb, (n + 1) * nb)] int64_values = [i for i in range(0, nb)] string_values = [string_prefix + str(i) for i in range(0, nb)] float_vec_values = gen_vectors(nb, ct.default_dim) @@ -120,7 +121,7 @@ def test_partition_key_on_collection_schema(self, par_key_field, index_on_par_ke expr=f'{int64_field.name} in [1,3,5] && {string_field.name} in ["{string_prefix}1","{string_prefix}3","{string_prefix}5"]', output_fields=[int64_field.name, string_field.name], check_task=CheckTasks.check_search_results, - check_items={"nq": nq, "limit": ct.default_limit})[0] + check_items={"nq": nq, "limit": entities_per_parkey})[0] @pytest.mark.tags(CaseLabel.L1) def test_partition_key_off_in_field_but_enable_in_schema(self): @@ -139,8 +140,7 @@ def test_partition_key_off_in_field_but_enable_in_schema(self): err_msg = "fail to create collection" c_name = cf.gen_unique_str("par_key") - collection_w, _ = self.collection_wrap.init_collection(name=c_name, schema=schema, - num_partitions=10) + collection_w = self.init_collection_wrap(name=c_name, schema=schema, num_partitions=10) assert len(collection_w.partitions) == 10 @pytest.mark.skip("need more investigation") @@ -152,44 +152,7 @@ def test_partition_key_bulk_insert(self): 2. bulk insert data 3. verify the data bulk inserted and be searched successfully """ - self._connect() - pk_field = cf.gen_int64_field(name='pk', is_primary=True) - int64_field = cf.gen_int64_field() - string_field = cf.gen_string_field(is_partition_key=True) - vector_field = cf.gen_float_vec_field() - schema = cf.gen_collection_schema(fields=[pk_field, int64_field, string_field, vector_field], - auto_id=True) - c_name = cf.gen_unique_str("par_key") - collection_w, _ = self.collection_wrap.init_collection(name=c_name, schema=schema, - num_partitions=10) - # bulk insert - nb = 1000 - string_prefix = cf.gen_str_by_length(length=6) - entities_per_parkey = 20 - for n in range(entities_per_parkey): - pk_values = [str(i) for i in range(n * nb, (n+1)*nb)] - int64_values = [i for i in range(0, nb)] - string_values = [string_prefix + str(i) for i in range(0, nb)] - float_vec_values = gen_vectors(nb, ct.default_dim) - data = [pk_values, int64_values, string_values, float_vec_values] - collection_w.insert(data) - - # flush - collection_w.flush() - # build index - collection_w.create_index(field_name=vector_field.name, index_params=ct.default_index) - # load - collection_w.load() - # search - nq = 10 - search_vectors = gen_vectors(nq, ct.default_dim) - # search with mixed filtered - res1 = collection_w.search(data=search_vectors, anns_field=vector_field.name, - param=ct.default_search_params, limit=entities_per_parkey, - expr=f'{int64_field.name} in [1,3,5] && {string_field.name} in ["{string_prefix}1","{string_prefix}3","{string_prefix}5"]', - output_fields=[int64_field.name, string_field.name], - check_task=CheckTasks.check_search_results, - check_items={"nq": nq, "limit": ct.default_limit})[0] + pass class TestPartitionKeyInvalidParams(TestcaseBase): @@ -212,8 +175,7 @@ def test_max_partitions(self): schema = cf.gen_collection_schema(fields=[pk_field, int64_field, string_field, vector_field], auto_id=True) c_name = cf.gen_unique_str("par_key") - collection_w, _ = self.collection_wrap.init_collection(name=c_name, schema=schema, - num_partitions=max_partition) + collection_w = self.init_collection_wrap(name=c_name, schema=schema, num_partitions=max_partition) assert len(collection_w.partitions) == max_partition # insert @@ -233,10 +195,9 @@ def test_max_partitions(self): num_partitions = max_partition + 1 err_msg = f"partition number ({num_partitions}) exceeds max configuration ({max_partition})" c_name = cf.gen_unique_str("par_key") - collection_w, _ = self.collection_wrap.init_collection(name=c_name, schema=schema, - num_partitions=num_partitions, - check_task=CheckTasks.err_res, - check_items={"err_code": 1100, "err_msg": err_msg}) + self.init_collection_wrap(name=c_name, schema=schema, num_partitions=num_partitions, + check_task=CheckTasks.err_res, + check_items={"err_code": 1100, "err_msg": err_msg}) @pytest.mark.tags(CaseLabel.L1) def test_min_partitions(self): @@ -257,8 +218,7 @@ def test_min_partitions(self): schema = cf.gen_collection_schema(fields=[pk_field, int64_field, string_field, vector_field], partition_key_field=int64_field.name) c_name = cf.gen_unique_str("par_key") - collection_w, _ = self.collection_wrap.init_collection(name=c_name, schema=schema, - num_partitions=min_partition) + collection_w = self.init_collection_wrap(name=c_name, schema=schema, num_partitions=min_partition) assert len(collection_w.partitions) == min_partition # insert @@ -279,14 +239,12 @@ def test_min_partitions(self): # create a collection with min partitions - 1 err_msg = "The specified num_partitions should be greater than or equal to 1" c_name = cf.gen_unique_str("par_key") - collection_w, _ = self.collection_wrap.init_collection(name=c_name, schema=schema, - num_partitions=min_partition - 1, - check_task=CheckTasks.err_res, - check_items={"err_code": 2, "err_msg": err_msg}) - collection_w, _ = self.collection_wrap.init_collection(name=c_name, schema=schema, - num_partitions=min_partition - 3, - check_task=CheckTasks.err_res, - check_items={"err_code": 2, "err_msg": err_msg}) + self.init_collection_wrap(name=c_name, schema=schema, num_partitions=min_partition - 1, + check_task=CheckTasks.err_res, + check_items={"err_code": 2, "err_msg": err_msg}) + self.init_collection_wrap(name=c_name, schema=schema, num_partitions=min_partition - 3, + check_task=CheckTasks.err_res, + check_items={"err_code": 2, "err_msg": err_msg}) @pytest.mark.tags(CaseLabel.L0) @pytest.mark.parametrize("is_par_key", [None, "", "invalid", 0.1, [], {}, ()]) @@ -298,9 +256,9 @@ def test_invalid_partition_key_values(self, is_par_key): """ self._connect() err_msg = "Param is_partition_key must be bool type" - int64_field = cf.gen_int64_field(is_partition_key=is_par_key, - check_task=CheckTasks.err_res, - check_items={"err_code": 2, "err_msg": err_msg}) + cf.gen_int64_field(is_partition_key=is_par_key, + check_task=CheckTasks.err_res, + check_items={"err_code": 2, "err_msg": err_msg}) @pytest.mark.tags(CaseLabel.L0) @pytest.mark.parametrize("num_partitions", [True, False, "", "invalid", 0.1, [], {}, ()]) @@ -319,10 +277,9 @@ def test_invalid_partitions_values(self, num_partitions): err_msg = "invalid num_partitions type" c_name = cf.gen_unique_str("par_key") - collection_w, _ = self.collection_wrap.init_collection(name=c_name, schema=schema, - num_partitions=num_partitions, - check_task=CheckTasks.err_res, - check_items={"err_code": 2, "err_msg": err_msg}) + self.init_collection_wrap(name=c_name, schema=schema, num_partitions=num_partitions, + check_task=CheckTasks.err_res, + check_items={"err_code": 2, "err_msg": err_msg}) @pytest.mark.tags(CaseLabel.L0) def test_partition_key_on_multi_fields(self): @@ -338,30 +295,30 @@ def test_partition_key_on_multi_fields(self): string_field = cf.gen_string_field(is_partition_key=True) vector_field = cf.gen_float_vec_field() err_msg = "Expected only one partition key field" - schema = cf.gen_collection_schema(fields=[pk_field, int64_field, string_field, vector_field], - auto_id=True, - check_task=CheckTasks.err_res, - check_items={"err_code": 2, "err_msg": err_msg}) + cf.gen_collection_schema(fields=[pk_field, int64_field, string_field, vector_field], + auto_id=True, + check_task=CheckTasks.err_res, + check_items={"err_code": 2, "err_msg": err_msg}) # both defined in collection schema err_msg = "Param partition_key_field must be str type" int64_field = cf.gen_int64_field() string_field = cf.gen_string_field() - schema = cf.gen_collection_schema(fields=[pk_field, int64_field, string_field, vector_field], - partition_key_field=[int64_field.name, string_field.name], - auto_id=True, - check_task=CheckTasks.err_res, - check_items={"err_code": 2, "err_msg": err_msg}) + cf.gen_collection_schema(fields=[pk_field, int64_field, string_field, vector_field], + partition_key_field=[int64_field.name, string_field.name], + auto_id=True, + check_task=CheckTasks.err_res, + check_items={"err_code": 2, "err_msg": err_msg}) # one defined in field schema, one defined in collection schema err_msg = "Expected only one partition key field" int64_field = cf.gen_int64_field(is_partition_key=True) string_field = cf.gen_string_field() - schema = cf.gen_collection_schema(fields=[pk_field, int64_field, string_field, vector_field], - partition_key_field=string_field.name, - auto_id=True, - check_task=CheckTasks.err_res, - check_items={"err_code": 2, "err_msg": err_msg}) + cf.gen_collection_schema(fields=[pk_field, int64_field, string_field, vector_field], + partition_key_field=string_field.name, + auto_id=True, + check_task=CheckTasks.err_res, + check_items={"err_code": 2, "err_msg": err_msg}) @pytest.mark.tags(CaseLabel.L0) @pytest.mark.parametrize("is_int64_primary", [True, False]) @@ -384,9 +341,9 @@ def test_partition_key_on_primary_key(self, is_int64_primary): err_msg = "the partition key field must not be primary field" c_name = cf.gen_unique_str("par_key") - collection_w, _ = self.collection_wrap.init_collection(name=c_name, schema=schema, - check_task=CheckTasks.err_res, - check_items={"err_code": 2, "err_msg": err_msg}) + self.init_collection_wrap(name=c_name, schema=schema, + check_task=CheckTasks.err_res, + check_items={"err_code": 2, "err_msg": err_msg}) # if settings on collection schema if is_int64_primary: @@ -399,9 +356,9 @@ def test_partition_key_on_primary_key(self, is_int64_primary): err_msg = "the partition key field must not be primary field" c_name = cf.gen_unique_str("par_key") - collection_w, _ = self.collection_wrap.init_collection(name=c_name, schema=schema, - check_task=CheckTasks.err_res, - check_items={"err_code": 2, "err_msg": err_msg}) + self.init_collection_wrap(name=c_name, schema=schema, + check_task=CheckTasks.err_res, + check_items={"err_code": 2, "err_msg": err_msg}) @pytest.mark.tags(CaseLabel.L0) def test_partition_key_on_and_off(self): @@ -416,21 +373,21 @@ def test_partition_key_on_and_off(self): string_field = cf.gen_string_field() vector_field = cf.gen_float_vec_field() err_msg = "Expected only one partition key field" - schema = cf.gen_collection_schema(fields=[pk_field, int64_field, string_field, vector_field], - partition_key_field=vector_field.name, - auto_id=True, - check_task=CheckTasks.err_res, - check_items={"err_code": 2, "err_msg": err_msg}) + cf.gen_collection_schema(fields=[pk_field, int64_field, string_field, vector_field], + partition_key_field=vector_field.name, + auto_id=True, + check_task=CheckTasks.err_res, + check_items={"err_code": 2, "err_msg": err_msg}) # if two fields with same type string_field = cf.gen_string_field(name="string1", is_partition_key=True) string_field2 = cf.gen_string_field(name="string2") err_msg = "Expected only one partition key field" - schema = cf.gen_collection_schema(fields=[pk_field, string_field, string_field2, vector_field], - partition_key_field=string_field2.name, - auto_id=True, - check_task=CheckTasks.err_res, - check_items={"err_code": 2, "err_msg": err_msg}) + cf.gen_collection_schema(fields=[pk_field, string_field, string_field2, vector_field], + partition_key_field=string_field2.name, + auto_id=True, + check_task=CheckTasks.err_res, + check_items={"err_code": 2, "err_msg": err_msg}) @pytest.mark.tags(CaseLabel.L0) @pytest.mark.parametrize("field_type", [DataType.FLOAT_VECTOR, DataType.BINARY_VECTOR, DataType.FLOAT, @@ -458,12 +415,12 @@ def test_partition_key_on_invalid_type_fields(self, field_type): vector_field = cf.gen_binary_vec_field(is_partition_key=(field_type == DataType.BINARY_VECTOR)) err_msg = "Partition key field type must be DataType.INT64 or DataType.VARCHAR" - schema = cf.gen_collection_schema(fields=[pk_field, int8_field, int16_field, int32_field, - bool_field, float_field, double_field, json_field, - int64_field, string_field, vector_field], - auto_id=True, - check_task=CheckTasks.err_res, - check_items={"err_code": 2, "err_msg": err_msg}) + cf.gen_collection_schema(fields=[pk_field, int8_field, int16_field, int32_field, + bool_field, float_field, double_field, json_field, + int64_field, string_field, vector_field], + auto_id=True, + check_task=CheckTasks.err_res, + check_items={"err_code": 2, "err_msg": err_msg}) @pytest.mark.tags(CaseLabel.L1) def test_partition_key_on_not_existed_fields(self): @@ -478,11 +435,11 @@ def test_partition_key_on_not_existed_fields(self): string_field = cf.gen_string_field() vector_field = cf.gen_float_vec_field() err_msg = "the specified partition key field {non_existing_field} not exist" - schema = cf.gen_collection_schema(fields=[pk_field, int64_field, string_field, vector_field], - partition_key_field="non_existing_field", - auto_id=True, - check_task=CheckTasks.err_res, - check_items={"err_code": 2, "err_msg": err_msg}) + cf.gen_collection_schema(fields=[pk_field, int64_field, string_field, vector_field], + partition_key_field="non_existing_field", + auto_id=True, + check_task=CheckTasks.err_res, + check_items={"err_code": 2, "err_msg": err_msg}) @pytest.mark.tags(CaseLabel.L1) def test_partition_key_on_empty_and_num_partitions_set(self): @@ -497,18 +454,17 @@ def test_partition_key_on_empty_and_num_partitions_set(self): string_field = cf.gen_string_field() vector_field = cf.gen_float_vec_field() err_msg = "the specified partition key field {} not exist" - schema = cf.gen_collection_schema(fields=[pk_field, int64_field, string_field, vector_field], - partition_key_field="", auto_id=True, - check_task=CheckTasks.err_res, - check_items={"err_code": 2, "err_msg": err_msg}) + cf.gen_collection_schema(fields=[pk_field, int64_field, string_field, vector_field], + partition_key_field="", auto_id=True, + check_task=CheckTasks.err_res, + check_items={"err_code": 2, "err_msg": err_msg}) schema = cf.gen_default_collection_schema() err_msg = "num_partitions should only be specified with partition key field enabled" c_name = cf.gen_unique_str("par_key") - collection_w, _ = self.collection_wrap.init_collection(name=c_name, schema=schema, - num_partitions=200, - check_task=CheckTasks.err_res, - check_items={"err_code": 2, "err_msg": err_msg}) + self.init_collection_wrap(name=c_name, schema=schema, num_partitions=200, + check_task=CheckTasks.err_res, + check_items={"err_code": 2, "err_msg": err_msg}) @pytest.mark.tags(CaseLabel.L1) @pytest.mark.parametrize("invalid_data", [99, True, None, [], {}, ()]) @@ -528,7 +484,7 @@ def test_partition_key_insert_invalid_data(self, invalid_data): partition_key_field=string_field.name, auto_id=False) c_name = cf.gen_unique_str("par_key") - collection_w, _ = self.collection_wrap.init_collection(name=c_name, schema=schema) + collection_w = self.init_collection_wrap(name=c_name, schema=schema) # insert nb = 10 @@ -541,7 +497,7 @@ def test_partition_key_insert_invalid_data(self, invalid_data): data = [pk_values, int64_values, string_values, float_vec_values] err_msg = "expect string input" - self.collection_wrap.insert(data, check_task=CheckTasks.err_res, check_items={"err_code": 2, "err_msg": err_msg}) + collection_w.insert(data, check_task=CheckTasks.err_res, check_items={"err_code": 2, "err_msg": err_msg}) class TestPartitionApiForbidden(TestcaseBase): @@ -564,23 +520,23 @@ def test_create_partition(self): vector_field = cf.gen_float_vec_field() schema = cf.gen_collection_schema(fields=[pk_field, int64_field, string_field, vector_field], auto_id=True) c_name = cf.gen_unique_str("par_key") - collection_w, _ = self.collection_wrap.init_collection(name=c_name, schema=schema) + collection_w = self.init_collection_wrap(name=c_name, schema=schema) # create partition err_msg = "disable create partition if partition key mode is used" partition_name = cf.gen_unique_str("partition") - self.collection_wrap.create_partition(partition_name, - check_task=CheckTasks.err_res, - check_items={"err_code": 2, "err_msg": err_msg}) - self.partition_wrap.init_partition(collection_w, partition_name, - check_task=CheckTasks.err_res, - check_items={"err_code": 2, "err_msg": err_msg}) + collection_w.create_partition(partition_name, + check_task=CheckTasks.err_res, + check_items={"err_code": 2, "err_msg": err_msg}) + self.init_partition_wrap(collection_w, partition_name, + check_task=CheckTasks.err_res, + check_items={"err_code": 2, "err_msg": err_msg}) # get partition is allowed - partitions = self.collection_wrap.partitions + partitions = collection_w.partitions collection_w.partition(partitions[0].name) - self.partition_wrap.init_partition(collection_w, partitions[0].name) - assert self.partition_wrap.name == partitions[0].name + partition_w = self.init_partition_wrap(collection_w, partitions[0].name) + assert partition_w.name == partitions[0].name # has partition is allowed assert collection_w.has_partition(partitions[0].name) assert self.utility_wrap.has_partition(collection_w.name, partitions[0].name) @@ -594,21 +550,21 @@ def test_create_partition(self): string_values = [string_prefix + str(i) for i in range(0, nb)] float_vec_values = gen_vectors(nb, ct.default_dim) data = [int64_values, string_values, float_vec_values] - self.collection_wrap.insert(data) + collection_w.insert(data) err_msg = "not support manually specifying the partition names if partition key mode is used" - self.partition_wrap.insert(data, check_task=CheckTasks.err_res, - check_items={"err_code": 2, "err_msg": err_msg}) - self.collection_wrap.insert(data, partition_name=partitions[0].name, - check_task=CheckTasks.err_res, - check_items={"err_code": 2, "err_msg": err_msg}) + partition_w.insert(data, check_task=CheckTasks.err_res, + check_items={"err_code": 2, "err_msg": err_msg}) + collection_w.insert(data, partition_name=partitions[0].name, + check_task=CheckTasks.err_res, + check_items={"err_code": 2, "err_msg": err_msg}) err_msg = "disable load partitions if partition key mode is used" - self.partition_wrap.load(check_task=CheckTasks.err_res, - check_items={"err_code": 2, "err_msg": err_msg}) - self.collection_wrap.load(partition_names=[partitions[0].name], - check_task=CheckTasks.err_res, - check_items={"err_code": 2, "err_msg": err_msg}) + partition_w.load(check_task=CheckTasks.err_res, + check_items={"err_code": 2, "err_msg": err_msg}) + collection_w.load(partition_names=[partitions[0].name], + check_task=CheckTasks.err_res, + check_items={"err_code": 2, "err_msg": err_msg}) # flush collection_w.flush() @@ -621,26 +577,26 @@ def test_create_partition(self): nq = 10 search_vectors = gen_vectors(nq, ct.default_dim) # search with mixed filtered - res1 = self.collection_wrap.search(data=search_vectors, anns_field=vector_field.name, - param=ct.default_search_params, limit=entities_per_parkey, - expr=f'{int64_field.name} in [1,3,5] && {string_field.name} in ["{string_prefix}1","{string_prefix}3","{string_prefix}5"]', - output_fields=[int64_field.name, string_field.name], - check_task=CheckTasks.check_search_results, - check_items={"nq": nq, "limit": ct.default_limit})[0] + res1 = collection_w.search(data=search_vectors, anns_field=vector_field.name, + param=ct.default_search_params, limit=entities_per_parkey, + expr=f'{int64_field.name} in [1,3,5] && {string_field.name} in ["{string_prefix}1","{string_prefix}3","{string_prefix}5"]', + output_fields=[int64_field.name, string_field.name], + check_task=CheckTasks.check_search_results, + check_items={"nq": nq, "limit": ct.default_limit})[0] pks = res1[0].ids[:3] err_msg = "not support manually specifying the partition names if partition key mode is used" - self.collection_wrap.search(data=search_vectors, anns_field=vector_field.name, partition_names=[partitions[0].name], - param=ct.default_search_params, limit=entities_per_parkey, - expr=f'{int64_field.name} in [1,3,5]', - output_fields=[int64_field.name, string_field.name], - check_task=CheckTasks.err_res, - check_items={"err_code": nq, "err_msg": err_msg}) - self.partition_wrap.search(data=search_vectors, anns_field=vector_field.name, - params=ct.default_search_params, limit=entities_per_parkey, - expr=f'{string_field.name} in ["{string_prefix}1","{string_prefix}3","{string_prefix}5"]', - output_fields=[int64_field.name, string_field.name], - check_task=CheckTasks.err_res, - check_items={"err_code": nq, "err_msg": err_msg}) + collection_w.search(data=search_vectors, anns_field=vector_field.name, partition_names=[partitions[0].name], + param=ct.default_search_params, limit=entities_per_parkey, + expr=f'{int64_field.name} in [1,3,5]', + output_fields=[int64_field.name, string_field.name], + check_task=CheckTasks.err_res, + check_items={"err_code": nq, "err_msg": err_msg}) + partition_w.search(data=search_vectors, anns_field=vector_field.name, + params=ct.default_search_params, limit=entities_per_parkey, + expr=f'{string_field.name} in ["{string_prefix}1","{string_prefix}3","{string_prefix}5"]', + output_fields=[int64_field.name, string_field.name], + check_task=CheckTasks.err_res, + check_items={"err_code": nq, "err_msg": err_msg}) # partition loading progress is allowed self.utility_wrap.loading_progress(collection_name=collection_w.name) @@ -652,18 +608,22 @@ def test_create_partition(self): self.utility_wrap.wait_for_loading_complete(collection_name=collection_w.name, partition_names=[partitions[0].name]) # partition flush is allowed: #24165 - self.partition_wrap.flush() + partition_w.flush() # partition delete is not allowed - self.partition_wrap.delete(expr=f'{pk_field.name} in {pks}', - check_task=CheckTasks.err_res, check_items={"err_code": 2, "err_msg": err_msg}) - self.collection_wrap.delete(expr=f'{pk_field.name} in {pks}', partition_name=partitions[0].name, - check_task=CheckTasks.err_res, check_items={"err_code": 2, "err_msg": err_msg}) + partition_w.delete(expr=f'{pk_field.name} in {pks}', + check_task=CheckTasks.err_res, + check_items={"err_code": 2, "err_msg": err_msg}) + collection_w.delete(expr=f'{pk_field.name} in {pks}', partition_name=partitions[0].name, + check_task=CheckTasks.err_res, + check_items={"err_code": 2, "err_msg": err_msg}) # partition query is not allowed - self.partition_wrap.query(expr=f'{pk_field.name} in {pks}', - check_task=CheckTasks.err_res, check_items={"err_code": 2, "err_msg": err_msg}) - self.collection_wrap.query(expr=f'{pk_field.name} in {pks}', partition_names=[partitions[0].name], - check_task=CheckTasks.err_res, check_items={"err_code": 2, "err_msg": err_msg}) + partition_w.query(expr=f'{pk_field.name} in {pks}', + check_task=CheckTasks.err_res, + check_items={"err_code": 2, "err_msg": err_msg}) + collection_w.query(expr=f'{pk_field.name} in {pks}', partition_names=[partitions[0].name], + check_task=CheckTasks.err_res, + check_items={"err_code": 2, "err_msg": err_msg}) # partition upsert is not allowed # self.partition_wrap.upsert(data=data, check_task=CheckTasks.err_res, # check_items={"err_code": 2, "err_msg": err_msg}) @@ -671,10 +631,10 @@ def test_create_partition(self): # chek_task=CheckTasks.err_res, check_items={"err_code": 2, "err_msg": err_msg}) # partition release err_msg = "disable release partitions if partition key mode is used" - self.partition_wrap.release(check_task=CheckTasks.err_res, check_items={"err_code": 2, "err_msg": err_msg}) + partition_w.release(check_task=CheckTasks.err_res, check_items={"err_code": 2, "err_msg": err_msg}) # partition drop err_msg = "disable drop partition if partition key mode is used" - self.partition_wrap.drop(check_task=CheckTasks.err_res, check_items={"err_code": 2, "err_msg": err_msg}) + partition_w.drop(check_task=CheckTasks.err_res, check_items={"err_code": 2, "err_msg": err_msg}) # # partition bulk insert # self.utility_wrap.do_bulk_insert(collection_w.name, files, partition_names=[partitions[0].name], diff --git a/tests/python_client/testcases/test_query.py b/tests/python_client/testcases/test_query.py index 9c583e56394da..bc6811b15f307 100644 --- a/tests/python_client/testcases/test_query.py +++ b/tests/python_client/testcases/test_query.py @@ -3838,7 +3838,7 @@ def test_counts_expression_sparse_vectors(self, index): self._connect() c_name = cf.gen_unique_str(prefix) schema = cf.gen_default_sparse_schema() - collection_w, _ = self.collection_wrap.init_collection(c_name, schema=schema) + collection_w = self.init_collection_wrap(c_name, schema=schema) data = cf.gen_default_list_sparse_data() collection_w.insert(data) params = cf.get_index_params_params(index) diff --git a/tests/python_client/testcases/test_search.py b/tests/python_client/testcases/test_search.py index fcf3694fa7cba..9155b4f31f4e5 100644 --- a/tests/python_client/testcases/test_search.py +++ b/tests/python_client/testcases/test_search.py @@ -4676,9 +4676,9 @@ def test_binary_indexed_over_max_dim(self, dim): self._connect() c_name = cf.gen_unique_str(prefix) binary_schema = cf.gen_default_binary_collection_schema(dim=dim) - self.collection_wrap.init_collection(c_name, schema=binary_schema, - check_task=CheckTasks.err_res, - check_items={"err_code": 65535, "err_msg": f"invalid dimension {dim}."}) + self.init_collection_wrap(c_name, schema=binary_schema, + check_task=CheckTasks.err_res, + check_items={"err_code": 999, "err_msg": f"invalid dimension: {dim}."}) class TestSearchBase(TestcaseBase): @@ -5173,8 +5173,9 @@ def test_each_index_with_mmap_enabled_search(self, index): expected: search success """ self._connect() - c_name = cf.gen_unique_str(prefix) - collection_w, _ = self.collection_wrap.init_collection(c_name, schema=cf.gen_default_collection_schema()) + nb = 2000 + dim = 32 + collection_w = self.init_collection_general(prefix, True, nb, dim=dim, is_index=False)[0] params = cf.get_index_params_params(index) default_index = {"index_type": index, "params": params, "metric_type": "L2"} collection_w.create_index(field_name, default_index, index_name="mmap_index") @@ -5183,13 +5184,18 @@ def test_each_index_with_mmap_enabled_search(self, index): # search collection_w.load() search_params = cf.gen_search_param(index)[0] - vector = [[random.random() for _ in range(default_dim)] for _ in range(default_nq)] - collection_w.search(vector, default_search_field, search_params, ct.default_limit) + vector = [[random.random() for _ in range(dim)] for _ in range(default_nq)] + collection_w.search(vector, default_search_field, search_params, ct.default_limit, + output_fields=["*"], + check_task=CheckTasks.check_search_results, + check_items={"nq": default_nq, + "limit": ct.default_limit}) # enable mmap collection_w.release() collection_w.alter_index("mmap_index", {'mmap.enabled': False}) collection_w.load() collection_w.search(vector, default_search_field, search_params, ct.default_limit, + output_fields=["*"], check_task=CheckTasks.check_search_results, check_items={"nq": default_nq, "limit": ct.default_limit}) @@ -5204,29 +5210,27 @@ def test_enable_mmap_search_for_binary_indexes(self, index): """ self._connect() dim = 64 - c_name = cf.gen_unique_str(prefix) - default_schema = cf.gen_default_binary_collection_schema(auto_id=False, dim=dim, - primary_field=ct.default_int64_field_name) - collection_w, _ = self.collection_wrap.init_collection(c_name, schema=default_schema) + nb = 2000 + collection_w = self.init_collection_general(prefix, True, nb, dim=dim, is_index=False, is_binary=True)[0] params = cf.get_index_params_params(index) default_index = {"index_type": index, "params": params, "metric_type": "JACCARD"} - collection_w.create_index("binary_vector", default_index, index_name="binary_idx_name") + collection_w.create_index(ct.default_binary_vec_field_name, default_index, index_name="binary_idx_name") collection_w.alter_index("binary_idx_name", {'mmap.enabled': True}) collection_w.set_properties({'mmap.enabled': True}) collection_w.load() - pro = collection_w.describe().get("properties") + pro = collection_w.describe()[0].get("properties") assert pro["mmap.enabled"] == 'True' - assert collection_w.index().params["mmap.enabled"] == 'True' + assert collection_w.index()[0].params["mmap.enabled"] == 'True' # search - binary_vectors = cf.gen_binary_vectors(3000, dim)[1] + binary_vectors = cf.gen_binary_vectors(default_nq, dim)[1] search_params = {"metric_type": "JACCARD", "params": {"nprobe": 10}} - output_fields = [default_string_field_name] - collection_w.search(binary_vectors[:default_nq], "binary_vector", search_params, + output_fields = ["*"] + collection_w.search(binary_vectors, ct.default_binary_vec_field_name, search_params, default_limit, default_search_string_exp, output_fields=output_fields, check_task=CheckTasks.check_search_results, - check_items={"nq": nq, - "limit": ct.default_top_k}) + check_items={"nq": default_nq, + "limit": default_limit}) class TestSearchDSL(TestcaseBase):