Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Bug]: Returns empty when querying with varchar primary key value with spaces #612

Closed
1 task done
ThreadDao opened this issue Nov 6, 2023 · 3 comments
Closed
1 task done
Assignees

Comments

@ThreadDao
Copy link
Contributor

Is there an existing issue for this?

  • I have searched the existing issues

Current Behavior

  • milvus image: master-20231102-d39ffba5-amd64

  • go-sdk branch: master

  • Test steps:

  1. create collection with varchar pk and Strong consistence level
  2. insert 3000 entities with pks: ["0", "1", "2", .... "2999"]
  3. flush -> index -> load
  4. upsert pks: ["0", "1", ... "9"] with new vectors -> query and verify the upsert is success
  5. upsert pks_1: [" 0 ", " 1 ", ... " 9 "] with new vector_1 -> query pks_1 and gets empty result. Expected to return pks_1 and vector_1
  • Test Cases:
func TestUpsertVarcharPk(t *testing.T) {
	/*
		test upsert varchar pks
		upsert after query
		upsert "a" -> " a " -> actually new insert
	*/
	t.Parallel()
	ctx := createContext(t, time.Second*common.DefaultTimeout)
	// connect
	mc := createMilvusClient(ctx, t)

	// create -> insert [0, 3000) -> flush -> index -> load
	cp := CollectionParams{CollectionFieldsType: VarcharBinaryVec, AutoID: false, EnableDynamicField: true,
		ShardsNum: common.DefaultShards, Dim: common.DefaultDim}

	idx, _ := entity.NewIndexBinIvfFlat(entity.JACCARD, 16)
	ip := IndexParams{BuildIndex: true, Index: idx, FieldName: common.DefaultBinaryVecFieldName, async: false}
	collName := PrepareCollection(ctx, t, mc, cp, WithIndexParams(ip), WithCreateOption(client.WithConsistencyLevel(entity.ClStrong)))

	upsertNb := 10
	// upsert exist entities [0, 10) varchar: ["1", ... "9"]
	varcharColumn, binaryColumn := common.GenDefaultVarcharData(0, upsertNb, common.DefaultDim)
	_, err := mc.Upsert(ctx, collName, "", varcharColumn, binaryColumn)
	common.CheckErr(t, err, true)

	// query and verify the updated entities
	pkColumn := entity.NewColumnVarChar(common.DefaultVarcharFieldName, varcharColumn.(*entity.ColumnVarChar).Data()[:upsertNb])
	resSet, err := mc.QueryByPks(ctx, collName, []string{}, pkColumn, []string{common.DefaultBinaryVecFieldName})
	common.CheckErr(t, err, true)
	require.ElementsMatch(t, binaryColumn.(*entity.ColumnBinaryVector).Data()[:upsertNb],
		resSet.GetColumn(common.DefaultBinaryVecFieldName).(*entity.ColumnBinaryVector).Data())

	// upsert varchar (with space): [" 1 ", ... " 9 "]
	varcharValues := make([]string, 0, upsertNb)
	for i := 0; i < upsertNb; i++ {
		varcharValues = append(varcharValues, " "+strconv.Itoa(i)+" ")
	}
	varcharColumn1 := entity.NewColumnVarChar(common.DefaultVarcharFieldName, varcharValues)
	_, binaryColumn1 := common.GenDefaultVarcharData(0, upsertNb, common.DefaultDim)
	ids, err := mc.Upsert(ctx, collName, "", varcharColumn1, binaryColumn1)
	common.CheckErr(t, err, true)
	log.Println(ids.FieldData())
	require.ElementsMatch(t, ids.(*entity.ColumnVarChar).Data(), varcharValues)

	// query old varchar pk (no space): ["1", ... "9"]
	resSet, err = mc.QueryByPks(ctx, collName, []string{}, pkColumn, []string{common.DefaultVarcharFieldName, common.DefaultBinaryVecFieldName})
	log.Println(resSet.GetColumn(common.DefaultVarcharFieldName).(*entity.ColumnVarChar).Data())
	common.CheckErr(t, err, true)
	require.ElementsMatch(t, varcharColumn.(*entity.ColumnVarChar).Data()[:upsertNb], resSet.GetColumn(common.DefaultVarcharFieldName).(*entity.ColumnVarChar).Data())
	require.ElementsMatch(t, binaryColumn.(*entity.ColumnBinaryVector).Data()[:upsertNb], resSet.GetColumn(common.DefaultBinaryVecFieldName).(*entity.ColumnBinaryVector).Data())

	// query and verify the updated entities

	resSet, err = mc.QueryByPks(ctx, collName, []string{}, entity.NewColumnVarChar(common.DefaultVarcharFieldName, []string{" 1 "}), []string{common.DefaultVarcharFieldName, common.DefaultBinaryVecFieldName})
	log.Println(resSet.GetColumn(common.DefaultVarcharFieldName).(*entity.ColumnVarChar).Data())

	pkColumn1 := entity.NewColumnVarChar(common.DefaultVarcharFieldName, varcharColumn1.Data()[:upsertNb])
	resSet, err = mc.QueryByPks(ctx, collName, []string{}, pkColumn1, []string{common.DefaultVarcharFieldName, common.DefaultBinaryVecFieldName})
	log.Println(resSet.GetColumn(common.DefaultVarcharFieldName).(*entity.ColumnVarChar).Data())
	common.CheckErr(t, err, true)
	require.ElementsMatch(t, varcharColumn1.Data()[:upsertNb], resSet.GetColumn(common.DefaultVarcharFieldName).(*entity.ColumnVarChar).Data())
	require.ElementsMatch(t, binaryColumn1.(*entity.ColumnBinaryVector).Data()[:upsertNb], resSet.GetColumn(common.DefaultBinaryVecFieldName).(*entity.ColumnBinaryVector).Data())

}
  • Test logs:
=== CONT  TestUpsertVarcharPk
2023/11/06 18:28:57 milvus_client.go:14: (ApiRequest): func [NewDefaultGrpcClient], args: [context.Background.WithDeadline(2023-11-06 18:30:57.271162994 +0800 CST m=+120.003349848 [1m59.999977174s]) 10.102.10.20:19530]
2023/11/06 18:28:57 milvus_client.go:21: (ApiResponse): func [NewDefaultGrpcClient], results: [0xc0000888c0]
2023/11/06 18:28:57 milvus_client.go:14: (ApiRequest): func [CreateCollection], args: [context.Background.WithDeadline(2023-11-06 18:30:57.271162994 +0800 CST m=+120.003349848 [1m59.974341751s]) RThh 0xc00009d680 2 [0xa578c0]]
2023/11/06 18:28:57 milvus_client.go:21: (ApiResponse): func [CreateCollection], results: []
2023/11/06 18:28:57 milvus_client.go:14: (ApiRequest): func [Insert], args: [context.Background.WithDeadline(2023-11-06 18:30:57.271162994 +0800 CST m=+120.003349848 [1m59.949837786s]) RThh  [0xc0004044e0 0xc000404510 0xc000404540 0xc000404570 0xc0004045a0]]
2023/11/06 18:28:57 milvus_client.go:21: (ApiResponse): func [Insert], results: [0xc0005da030]
2023/11/06 18:28:57 milvus_client.go:14: (ApiRequest): func [Flush], args: [context.Background.WithDeadline(2023-11-06 18:30:57.271162994 +0800 CST m=+120.003349848 [1m59.833695467s]) RThh false]
2023/11/06 18:29:00 milvus_client.go:21: (ApiResponse): func [Flush], results: []
2023/11/06 18:29:00 milvus_client.go:14: (ApiRequest): func [CreateIndex], args: [context.Background.WithDeadline(2023-11-06 18:30:57.271162994 +0800 CST m=+120.003349848 [1m57.071817841s]) RThh binaryVec false 0xc0003e8990 []]
2023/11/06 18:29:02 milvus_client.go:21: (ApiResponse): func [CreateIndex], results: []
2023/11/06 18:29:02 milvus_client.go:14: (ApiRequest): func [LoadCollection], args: [context.Background.WithDeadline(2023-11-06 18:30:57.271162994 +0800 CST m=+120.003349848 [1m55.180661571s]) RThh []]
2023/11/06 18:29:07 milvus_client.go:21: (ApiResponse): func [LoadCollection], results: []
2023/11/06 18:29:07 milvus_client.go:14: (ApiRequest): func [Upsert], args: [context.Background.WithDeadline(2023-11-06 18:30:57.271162994 +0800 CST m=+120.003349848 [1m49.546809185s]) RThh  [0xc00018f320 0xc00018f350]]
2023/11/06 18:29:07 milvus_client.go:21: (ApiResponse): func [Upsert], results: [0xc0004b4cf0]
2023/11/06 18:29:07 milvus_client.go:14: (ApiRequest): func [QueryByPks], args: [context.Background.WithDeadline(2023-11-06 18:30:57.271162994 +0800 CST m=+120.003349848 [1m49.514009888s]) RThh [] 0xc0004b4d20 [binaryVec] []]
2023/11/06 18:29:07 milvus_client.go:21: (ApiResponse): func [QueryByPks], results: [[0xc0004b5710 0xc0004b5740]]
2023/11/06 18:29:07 milvus_client.go:14: (ApiRequest): func [Upsert], args: [context.Background.WithDeadline(2023-11-06 18:30:57.271162994 +0800 CST m=+120.003349848 [1m49.352220728s]) RThh  [0xc0004b5770 0xc0004b57d0]]
2023/11/06 18:29:07 milvus_client.go:21: (ApiResponse): func [Upsert], results: [0xc000154330]
2023/11/06 18:29:07 upsert_test.go:160: type:VarChar scalars:<string_data:<data:" 0 " data:" 1 " data:" 2 " data:" 3 " data:" 4 " data:" 5 " data:" 6 " data:" 7 " data:" 8 " data:" 9 " > > 
2023/11/06 18:29:07 milvus_client.go:14: (ApiRequest): func [QueryByPks], args: [context.Background.WithDeadline(2023-11-06 18:30:57.271162994 +0800 CST m=+120.003349848 [1m49.309037223s]) RThh [] 0xc0004b4d20 [varchar binaryVec] []]
2023/11/06 18:29:08 milvus_client.go:21: (ApiResponse): func [QueryByPks], results: [[0xc000154a50 0xc000154a80]]
2023/11/06 18:29:08 upsert_test.go:165: [0 1 2 3 4 5 6 7 8 9]
2023/11/06 18:29:08 milvus_client.go:14: (ApiRequest): func [QueryByPks], args: [context.Background.WithDeadline(2023-11-06 18:30:57.271162994 +0800 CST m=+120.003349848 [1m48.949210388s]) RThh [] 0xc000154ab0 [varchar binaryVec] []]
2023/11/06 18:29:08 milvus_client.go:21: (ApiResponse): func [QueryByPks], results: [[0xc00018fe30 0xc00018fe60]]
2023/11/06 18:29:08 upsert_test.go:173: []
2023/11/06 18:29:08 milvus_client.go:14: (ApiRequest): func [QueryByPks], args: [context.Background.WithDeadline(2023-11-06 18:30:57.271162994 +0800 CST m=+120.003349848 [1m48.752538357s]) RThh [] 0xc00018fe90 [varchar binaryVec] []]
2023/11/06 18:29:08 milvus_client.go:21: (ApiResponse): func [QueryByPks], results: [[0xc00019c330 0xc00019c360]]
2023/11/06 18:29:08 upsert_test.go:177: []
    upsert_test.go:179: 
        	Error Trace:	/home/zong/zong/go-projects/milvus-sdk-go/test/testcases/upsert_test.go:179
        	Error:      	elements differ
        	            	
        	            	extra elements in list A:
        	            	([]interface {}) (len=10) {
        	            	 (string) (len=3) " 0 ",
        	            	 (string) (len=3) " 1 ",
        	            	 (string) (len=3) " 2 ",
        	            	 (string) (len=3) " 3 ",
        	            	 (string) (len=3) " 4 ",
        	            	 (string) (len=3) " 5 ",
        	            	 (string) (len=3) " 6 ",
        	            	 (string) (len=3) " 7 ",
        	            	 (string) (len=3) " 8 ",
        	            	 (string) (len=3) " 9 "
        	            	}
        	            	
        	            	
        	            	listA:
        	            	([]string) (len=10) {
        	            	 (string) (len=3) " 0 ",
        	            	 (string) (len=3) " 1 ",
        	            	 (string) (len=3) " 2 ",
        	            	 (string) (len=3) " 3 ",
        	            	 (string) (len=3) " 4 ",
        	            	 (string) (len=3) " 5 ",
        	            	 (string) (len=3) " 6 ",
        	            	 (string) (len=3) " 7 ",
        	            	 (string) (len=3) " 8 ",
        	            	 (string) (len=3) " 9 "
        	            	}
        	            	
        	            	
        	            	listB:
        	            	([]string) <nil>
        	Test:       	TestUpsertVarcharPk
2023/11/06 18:29:08 milvus_client.go:14: (ApiRequest): func [DropCollection], args: [context.Background.WithDeadline(2023-11-06 18:30:57.271162994 +0800 CST m=+120.003349848 [1m48.553825915s]) RThh]
2023/11/06 18:29:08 milvus_client.go:21: (ApiResponse): func [DropCollection], results: []

Expected Behavior

Return pks: [" 0 ", " 1 ", ... " 9 "]

Steps To Reproduce

No response

Environment

No response

Anything else?

No response

@ThreadDao
Copy link
Contributor Author

/assign @congqixia

@congqixia
Copy link
Contributor

Should be fixed by #619
/assign @ThreadDao
could you please verify?

@ThreadDao
Copy link
Contributor Author

Fixed

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

2 participants