Skip to content

Commit

Permalink
test: Add tests for upsert with auto id (milvus-io#35556)
Browse files Browse the repository at this point in the history
Related issue: milvus-io#34668

---------

Signed-off-by: yanliang567 <[email protected]>
  • Loading branch information
yanliang567 authored Aug 20, 2024
1 parent e09dc3b commit 249dc4d
Show file tree
Hide file tree
Showing 4 changed files with 81 additions and 36 deletions.
4 changes: 2 additions & 2 deletions tests/python_client/base/collection_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -339,10 +339,10 @@ def upsert(self, data, partition_name=None, timeout=None, check_task=None, check
return res, check_result

@trace()
def compact(self, timeout=None, check_task=None, check_items=None, **kwargs):
def compact(self, is_clustering=False, timeout=None, check_task=None, check_items=None, **kwargs):
timeout = TIMEOUT if timeout is None else timeout
func_name = sys._getframe().f_code.co_name
res, check = api_request([self.collection.compact, timeout], **kwargs)
res, check = api_request([self.collection.compact, is_clustering, timeout], **kwargs)
check_result = ResponseChecker(res, func_name, check_task, check_items, check, **kwargs).run()
return res, check_result

Expand Down
4 changes: 2 additions & 2 deletions tests/python_client/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@ allure-pytest==2.7.0
pytest-print==0.2.1
pytest-level==0.1.1
pytest-xdist==2.5.0
pymilvus==2.5.0rc45
pymilvus[bulk_writer]==2.5.0rc45
pymilvus==2.5.0rc70
pymilvus[bulk_writer]==2.5.0rc70
pytest-rerunfailures==9.1.1
git+https://github.com/Projectplace/pytest-tags
ndg-httpsclient
Expand Down
105 changes: 75 additions & 30 deletions tests/python_client/testcases/test_insert.py
Original file line number Diff line number Diff line change
Expand Up @@ -513,7 +513,7 @@ def test_insert_exceed_varchar_limit(self):
data = [vectors, ["limit_1___________",
"limit_2___________"], ['1', '2']]
error = {ct.err_code: 999,
ct.err_msg: "invalid input, length of string exceeds max length"}
ct.err_msg: "length of string exceeds max length"}
collection_w.insert(
data, check_task=CheckTasks.err_res, check_items=error)

Expand Down Expand Up @@ -815,16 +815,6 @@ def insert(thread_i):
t.join()
assert collection_w.num_entities == ct.default_nb * thread_num

@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.skip(reason="Currently primary keys are not unique")
def test_insert_multi_threading_auto_id(self):
"""
target: test concurrent insert auto_id=True collection
method: 1.create auto_id=True collection 2.concurrent insert
expected: verify primary keys unique
"""
pass

@pytest.mark.tags(CaseLabel.L1)
def test_insert_multi_times(self, dim):
"""
Expand Down Expand Up @@ -1211,11 +1201,11 @@ def test_insert_with_invalid_partition_name(self):
check_items=error)

@pytest.mark.tags(CaseLabel.L2)
def test_insert_invalid_with_pk_varchar_auto_id_true(self):
def test_insert_with_pk_varchar_auto_id_true(self):
"""
target: test insert invalid with pk varchar and auto id true
method: set pk varchar max length < 18, insert data
expected: raise exception
expected: varchar pk supports auto_id=true
"""
string_field = cf.gen_string_field(is_primary=True, max_length=6)
embedding_field = cf.gen_float_vec_field()
Expand Down Expand Up @@ -1547,8 +1537,56 @@ def test_upsert_data_pk_exist(self, start):
res = collection_w.query(exp, output_fields=[default_float_name])[0]
assert [res[i][default_float_name] for i in range(upsert_nb)] == float_values.to_list()

@pytest.mark.tags(CaseLabel.L2)
def test_upsert_with_primary_key_string(self):
@pytest.mark.tags(CaseLabel.L0)
def test_upsert_with_auto_id(self):
"""
target: test upsert with auto id
method: 1. create a collection with autoID=true
2. upsert 10 entities with non-existing pks
verify: success, and the pks are auto-generated
3. query 10 entities to get the existing pks
4. upsert 10 entities with existing pks
verify: success, and the pks are re-generated, and the new pks are visibly
"""
dim = 32
collection_w, _, _, insert_ids, _ = self.init_collection_general(pre_upsert, auto_id=True,
dim=dim, insert_data=True, with_json=False)
nb = 10
start = ct.default_nb * 10
data = cf.gen_default_list_data(dim=dim, nb=nb, start=start, with_json=False)
res_upsert1 = collection_w.upsert(data=data)[0]
collection_w.flush()
# assert the pks are auto-generated, and num_entities increased for upsert with non_existing pks
assert res_upsert1.primary_keys[0] > insert_ids[-1]
assert collection_w.num_entities == ct.default_nb + nb

# query 10 entities to get the existing pks
res_q = collection_w.query(expr='', limit=nb)[0]
print(f"res_q: {res_q}")
existing_pks = [res_q[i][ct.default_int64_field_name] for i in range(nb)]
existing_count = collection_w.query(expr=f"{ct.default_int64_field_name} in {existing_pks}",
output_fields=[ct.default_count_output])[0]
assert nb == existing_count[0].get(ct.default_count_output)
# upsert 10 entities with the existing pks
start = ct.default_nb * 20
data = cf.gen_default_list_data(dim=dim, nb=nb, start=start, with_json=False)
data[0] = existing_pks
res_upsert2 = collection_w.upsert(data=data)[0]
collection_w.flush()
# assert the new pks are auto-generated again
assert res_upsert2.primary_keys[0] > res_upsert1.primary_keys[-1]
existing_count = collection_w.query(expr=f"{ct.default_int64_field_name} in {existing_pks}",
output_fields=[ct.default_count_output])[0]
assert 0 == existing_count[0].get(ct.default_count_output)
res_q = collection_w.query(expr=f"{ct.default_int64_field_name} in {res_upsert2.primary_keys}",
output_fields=["*"])[0]
assert nb == len(res_q)
current_count = collection_w.query(expr='', output_fields=[ct.default_count_output])[0]
assert current_count[0].get(ct.default_count_output) == ct.default_nb + nb

@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.parametrize("auto_id", [True, False])
def test_upsert_with_primary_key_string(self, auto_id):
"""
target: test upsert with string primary key
method: 1. create a collection with pk string
Expand All @@ -1558,11 +1596,18 @@ def test_upsert_with_primary_key_string(self):
"""
c_name = cf.gen_unique_str(pre_upsert)
fields = [cf.gen_string_field(), cf.gen_float_vec_field(dim=ct.default_dim)]
schema = cf.gen_collection_schema(fields=fields, primary_field=ct.default_string_field_name)
schema = cf.gen_collection_schema(fields=fields, primary_field=ct.default_string_field_name,
auto_id=auto_id)
collection_w = self.init_collection_wrap(name=c_name, schema=schema)
vectors = [[random.random() for _ in range(ct.default_dim)] for _ in range(2)]
collection_w.insert([["a", "b"], vectors])
collection_w.upsert([[" a", "b "], vectors])
if not auto_id:
collection_w.insert([["a", "b"], vectors])
res_upsert = collection_w.upsert([[" a", "b "], vectors])[0]
assert res_upsert.primary_keys[0] == " a" and res_upsert.primary_keys[1] == "b "
else:
collection_w.insert([vectors])
res_upsert = collection_w.upsert([[" a", "b "], vectors])[0]
assert res_upsert.primary_keys[0] != " a" and res_upsert.primary_keys[1] != "b "
assert collection_w.num_entities == 4

@pytest.mark.tags(CaseLabel.L2)
Expand Down Expand Up @@ -2046,7 +2091,7 @@ def test_upsert_partition_name_nonexistent(self):
check_task=CheckTasks.err_res, check_items=error)

@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.skip("insert and upsert have removed the [] error check")
@pytest.mark.xfail("insert and upsert have removed the [] error check")
def test_upsert_multi_partitions(self):
"""
target: test upsert two partitions
Expand All @@ -2066,20 +2111,20 @@ def test_upsert_multi_partitions(self):
check_task=CheckTasks.err_res, check_items=error)

@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.skip(reason="smellthemoon: behavior changed")
def test_upsert_with_auto_id(self):
def test_upsert_with_auto_id_pk_type_dismacth(self):
"""
target: test upsert with auto id
method: 1. create a collection with autoID=true
2. upsert data no pk
target: test upsert with auto_id and pk type dismatch
method: 1. create a collection with pk int64 and auto_id=True
2. upsert with pk string type dismatch
expected: raise exception
"""
collection_w = self.init_collection_general(pre_upsert, auto_id=True, is_index=False)[0]
error = {ct.err_code: 999,
ct.err_msg: "Upsert don't support autoid == true"}
float_vec_values = cf.gen_vectors(ct.default_nb, ct.default_dim)
data = [[np.float32(i) for i in range(ct.default_nb)], [str(i) for i in range(ct.default_nb)],
float_vec_values]
dim = 16
collection_w = self.init_collection_general(pre_upsert, auto_id=False,
dim=dim, insert_data=True, with_json=False)[0]
nb = 10
data = cf.gen_default_list_data(dim=dim, nb=nb, with_json=False)
data[0] = [str(i) for i in range(nb)]
error = {ct.err_code: 999, ct.err_msg: "The Input data type is inconsistent with defined schema"}
collection_w.upsert(data=data, check_task=CheckTasks.err_res, check_items=error)

@pytest.mark.tags(CaseLabel.L2)
Expand Down
4 changes: 2 additions & 2 deletions tests/python_client/testcases/test_utility.py
Original file line number Diff line number Diff line change
Expand Up @@ -731,7 +731,7 @@ def test_index_process_collection_empty(self):
cw = self.init_collection_wrap(name=c_name)
self.index_wrap.init_index(cw.collection, default_field_name, default_index_params)
res, _ = self.utility_wrap.index_building_progress(c_name)
exp_res = {'total_rows': 0, 'indexed_rows': 0, 'pending_index_rows': 0}
exp_res = {'total_rows': 0, 'indexed_rows': 0, 'pending_index_rows': 0, 'state': 'Finished'}
assert res == exp_res

@pytest.mark.tags(CaseLabel.L2)
Expand Down Expand Up @@ -822,7 +822,7 @@ def test_wait_index_collection_empty(self):
cw.create_index(default_field_name, default_index_params)
assert self.utility_wrap.wait_for_index_building_complete(c_name)[0]
res, _ = self.utility_wrap.index_building_progress(c_name)
exp_res = {'total_rows': 0, 'indexed_rows': 0, 'pending_index_rows': 0}
exp_res = {'total_rows': 0, 'indexed_rows': 0, 'pending_index_rows': 0, 'state': 'Finished'}
assert res == exp_res

@pytest.mark.tags(CaseLabel.L1)
Expand Down

0 comments on commit 249dc4d

Please sign in to comment.