diff --git a/client/collection.go b/client/collection.go index 142aac915..98b0cb4be 100644 --- a/client/collection.go +++ b/client/collection.go @@ -218,7 +218,8 @@ func (c *GrpcClient) validateSchema(sch *entity.Schema) error { if field.DataType == entity.FieldTypeFloatVector || field.DataType == entity.FieldTypeBinaryVector || field.DataType == entity.FieldTypeBFloat16Vector || - field.DataType == entity.FieldTypeFloat16Vector { + field.DataType == entity.FieldTypeFloat16Vector || + field.DataType == entity.FieldTypeSparseVector { vectors++ } } diff --git a/entity/rows.go b/entity/rows.go index 33799d410..f4a7ff6ca 100644 --- a/entity/rows.go +++ b/entity/rows.go @@ -389,6 +389,10 @@ func AnyToColumns(rows []interface{}, schemas ...*Schema) ([]Column, error) { } col := NewColumnBFloat16Vector(field.Name, int(dim), data) nameColumns[field.Name] = col + case FieldTypeSparseVector: + data := make([]SparseEmbedding, 0, rowsLen) + col := NewColumnSparseVectors(field.Name, data) + nameColumns[field.Name] = col } } diff --git a/test/common/response_check.go b/test/common/response_check.go index 1db4d8053..869d61090 100644 --- a/test/common/response_check.go +++ b/test/common/response_check.go @@ -190,8 +190,6 @@ func CheckOutputFields(t *testing.T, actualColumns []entity.Column, expFields [] for _, actualColumn := range actualColumns { actualFields = append(actualFields, actualColumn.Name()) } - log.Printf("actual fields: %v", actualFields) - log.Printf("expected fields: %v", expFields) require.ElementsMatchf(t, expFields, actualFields, fmt.Sprintf("Expected search output fields: %v, actual: %v", expFields, actualFields)) } diff --git a/test/common/utils.go b/test/common/utils.go index a16bfedef..02358e16a 100644 --- a/test/common/utils.go +++ b/test/common/utils.go @@ -33,6 +33,7 @@ const ( DefaultBinaryVecFieldName = "binaryVec" DefaultFloat16VecFieldName = "fp16Vec" DefaultBFloat16VecFieldName = "bf16Vec" + DefaultSparseVecFieldName = "sparseVec" DefaultDynamicNumberField = "dynamicNumber" DefaultDynamicStringField = "dynamicString" DefaultDynamicBoolField = "dynamicBool" @@ -51,9 +52,10 @@ const ( DefaultShards = int32(2) DefaultNb = 3000 DefaultNq = 5 - DefaultTopK = 10 - TestCapacity = 100 // default array field capacity - TestMaxLen = 100 // default varchar field max length + //DefaultNq = 1 + DefaultTopK = 10 + TestCapacity = 100 // default array field capacity + TestMaxLen = 100 // default varchar field max length ) // const default value from milvus @@ -223,6 +225,21 @@ func GenBinaryVector(dim int64) []byte { return vector } +func GenSparseVector(maxLen int) entity.SparseEmbedding { + length := 1 + rand.Intn(1+maxLen) + positions := make([]uint32, length) + values := make([]float32, length) + for i := 0; i < length; i++ { + positions[i] = uint32(2*i + 1) + values[i] = rand.Float32() + } + vector, err := entity.NewSliceSparseEmbedding(positions, values) + if err != nil { + log.Fatalf("Generate vector failed %s", err) + } + return vector +} + // --- common utils --- // --- gen fields --- @@ -405,6 +422,13 @@ func GenColumnData(start int, nb int, fieldType entity.FieldType, fieldName stri bf16Vectors = append(bf16Vectors, vec) } return entity.NewColumnBFloat16Vector(fieldName, int(opt.dim), bf16Vectors) + case entity.FieldTypeSparseVector: + vectors := make([]entity.SparseEmbedding, 0, nb) + for i := start; i < start+nb; i++ { + vec := GenSparseVector(opt.maxLenSparse) + vectors = append(vectors, vec) + } + return entity.NewColumnSparseVectors(fieldName, vectors) default: return nil } @@ -984,6 +1008,53 @@ func GenDefaultArrayRows(start int, nb int, dim int64, enableDynamicField bool, return rows } +func GenDefaultSparseRows(start int, nb int, dim int64, maxLenSparse int, enableDynamicField bool) []interface{} { + rows := make([]interface{}, 0, nb) + type BaseRow struct { + Int64 int64 `json:"int64" milvus:"name:int64"` + Varchar string `json:"varchar" milvus:"name:varchar"` + FloatVec []float32 `json:"floatVec" milvus:"name:floatVec"` + SparseVec entity.SparseEmbedding `json:"sparseVec" milvus:"name:sparseVec"` + } + + type DynamicRow struct { + Int64 int64 `json:"int64" milvus:"name:int64"` + Varchar string `json:"varchar" milvus:"name:varchar"` + FloatVec []float32 `json:"floatVec" milvus:"name:floatVec"` + SparseVec entity.SparseEmbedding `json:"sparseVec" milvus:"name:sparseVec"` + Dynamic Dynamic `json:"dynamic" milvus:"name:dynamic"` + } + + for i := start; i < start+nb; i++ { + baseRow := BaseRow{ + Int64: int64(i), + Varchar: strconv.Itoa(i), + FloatVec: GenFloatVector(dim), + SparseVec: GenSparseVector(maxLenSparse), + } + // json and dynamic field + dynamicJSON := Dynamic{ + Number: int32(i), + String: strconv.Itoa(i), + Bool: i%2 == 0, + List: []int64{int64(i), int64(i + 1)}, + } + if enableDynamicField { + dynamicRow := DynamicRow{ + Int64: baseRow.Int64, + Varchar: baseRow.Varchar, + FloatVec: baseRow.FloatVec, + SparseVec: baseRow.SparseVec, + Dynamic: dynamicJSON, + } + rows = append(rows, dynamicRow) + } else { + rows = append(rows, &baseRow) + } + } + return rows +} + func GenAllVectorsRows(start int, nb int, dim int64, enableDynamicField bool) []interface{} { rows := make([]interface{}, 0, nb) type BaseRow struct { @@ -1234,11 +1305,28 @@ var SupportBinIvfFlatMetricType = []entity.MetricType{ entity.HAMMING, } +var UnsupportedSparseVecMetricsType = []entity.MetricType{ + entity.L2, + entity.COSINE, + entity.JACCARD, + entity.HAMMING, + entity.SUBSTRUCTURE, + entity.SUPERSTRUCTURE, +} + // GenAllFloatIndex gen all float vector index -func GenAllFloatIndex() []entity.Index { +func GenAllFloatIndex(metricTypes ...entity.MetricType) []entity.Index { nlist := 128 var allFloatIndex []entity.Index - for _, metricType := range SupportFloatMetricType { + var allMetricTypes []entity.MetricType + log.Println(metricTypes) + if len(metricTypes) == 0 { + allMetricTypes = SupportFloatMetricType + } else { + allMetricTypes = metricTypes + } + for _, metricType := range allMetricTypes { + log.Println(metricType) idxFlat, _ := entity.NewIndexFlat(metricType) idxIvfFlat, _ := entity.NewIndexIvfFlat(metricType, nlist) idxIvfSq8, _ := entity.NewIndexIvfSQ8(metricType, nlist) @@ -1279,6 +1367,11 @@ func GenSearchVectors(nq int, dim int64, dataType entity.FieldType) []entity.Vec vector := GenBFloat16Vector(dim) vectors = append(vectors, entity.BFloat16Vector(vector)) } + case entity.FieldTypeSparseVector: + for i := 0; i < nq; i++ { + vec := GenSparseVector(int(dim)) + vectors = append(vectors, vec) + } } return vectors } diff --git a/test/common/utils_client.go b/test/common/utils_client.go index da730684e..d81cd3156 100644 --- a/test/common/utils_client.go +++ b/test/common/utils_client.go @@ -114,9 +114,10 @@ func GenSchema(name string, autoID bool, fields []*entity.Field, opts ...CreateS // GenColumnDataOption -- create column data -- type GenColumnDataOption func(opt *genDataOpt) type genDataOpt struct { - dim int64 - ElementType entity.FieldType - capacity int64 + dim int64 + ElementType entity.FieldType + capacity int64 + maxLenSparse int } func WithVectorDim(dim int64) GenColumnDataOption { @@ -137,4 +138,10 @@ func WithArrayCapacity(capacity int64) GenColumnDataOption { } } +func WithSparseVectorLen(length int) GenColumnDataOption { + return func(opt *genDataOpt) { + opt.maxLenSparse = length + } +} + // -- create column data -- diff --git a/test/testcases/collection_test.go b/test/testcases/collection_test.go index 1beb3590d..7111055cc 100644 --- a/test/testcases/collection_test.go +++ b/test/testcases/collection_test.go @@ -640,6 +640,50 @@ func TestCreateMultiVectorExceed(t *testing.T) { common.CheckErr(t, errCreateCollection, false, "maximum vector field's number should be limited to 4") } +// specify dim for sparse vector -> error +func TestCreateCollectionSparseVectorWithDim(t *testing.T) { + ctx := createContext(t, time.Second*common.DefaultTimeout) + mc := createMilvusClient(ctx, t) + allFields := []*entity.Field{ + common.GenField(common.DefaultIntFieldName, entity.FieldTypeInt64, common.WithIsPrimaryKey(true), common.WithAutoID(false)), + common.GenField(common.DefaultSparseVecFieldName, entity.FieldTypeSparseVector, common.WithDim(common.DefaultDim)), + } + collName := common.GenRandomString(6) + schema := common.GenSchema(collName, false, allFields) + + // create collection + errCreateCollection := mc.CreateCollection(ctx, schema, common.DefaultShards) + common.CheckErr(t, errCreateCollection, false, "dim should not be specified for sparse vector field sparseVec(0)") +} + +// create collection with sparse vector +func TestCreateCollectionSparseVector(t *testing.T) { + ctx := createContext(t, time.Second*common.DefaultTimeout) + mc := createMilvusClient(ctx, t) + allFields := []*entity.Field{ + common.GenField(common.DefaultIntFieldName, entity.FieldTypeInt64, common.WithIsPrimaryKey(true), common.WithAutoID(false)), + common.GenField(common.DefaultVarcharFieldName, entity.FieldTypeVarChar, common.WithMaxLength(common.TestMaxLen)), + common.GenField(common.DefaultSparseVecFieldName, entity.FieldTypeSparseVector), + } + collName := common.GenRandomString(6) + schema := common.GenSchema(collName, false, allFields) + + // create collection + errCreateCollection := mc.CreateCollection(ctx, schema, common.DefaultShards) + common.CheckErr(t, errCreateCollection, true) + + // describe collection + collection, err := mc.DescribeCollection(ctx, collName) + common.CheckErr(t, err, true) + common.CheckCollection(t, collection, collName, common.DefaultShards, schema, common.DefaultConsistencyLevel) + require.Len(t, collection.Schema.Fields, 3) + for _, field := range collection.Schema.Fields { + if field.DataType == entity.FieldTypeSparseVector { + require.Equal(t, common.DefaultSparseVecFieldName, field.Name) + } + } +} + // -- Get Collection Statistics -- func TestGetStaticsCollectionNotExisted(t *testing.T) { diff --git a/test/testcases/groupby_search_test.go b/test/testcases/groupby_search_test.go index 264b8e43f..04ee32fd8 100644 --- a/test/testcases/groupby_search_test.go +++ b/test/testcases/groupby_search_test.go @@ -171,6 +171,70 @@ func TestSearchGroupByFloatDefault(t *testing.T) { } } +// test groupBy search sparse vector +func TestGroupBySearchSparseVector(t *testing.T) { + t.Parallel() + idxInverted, _ := entity.NewIndexSparseInverted(entity.IP, 0.3) + idxWand, _ := entity.NewIndexSparseWAND(entity.IP, 0.2) + for _, idx := range []entity.Index{idxInverted, idxWand} { + ctx := createContext(t, time.Second*common.DefaultTimeout*2) + // connect + mc := createMilvusClient(ctx, t) + + // create -> insert [0, 3000) -> flush -> index -> load + cp := CollectionParams{CollectionFieldsType: Int64VarcharSparseVec, AutoID: false, EnableDynamicField: true, + ShardsNum: common.DefaultShards, Dim: common.DefaultDim, MaxLength: common.TestMaxLen} + collName := createCollection(ctx, t, mc, cp, client.WithConsistencyLevel(entity.ClStrong)) + + // insert data + dp := DataParams{DoInsert: true, CollectionName: collName, CollectionFieldsType: Int64VarcharSparseVec, start: 0, + nb: 200, dim: common.DefaultDim, EnableDynamicField: true} + for i := 0; i < 100; i++ { + _, _ = insertData(ctx, t, mc, dp) + } + mc.Flush(ctx, collName, false) + + // index and load + idxHnsw, _ := entity.NewIndexHNSW(entity.L2, 8, 96) + mc.CreateIndex(ctx, collName, common.DefaultFloatVecFieldName, idxHnsw, false) + mc.CreateIndex(ctx, collName, common.DefaultSparseVecFieldName, idx, false) + mc.LoadCollection(ctx, collName, false) + + // groupBy search + queryVec := common.GenSearchVectors(common.DefaultNq, common.DefaultDim, entity.FieldTypeSparseVector) + sp, _ := entity.NewIndexSparseInvertedSearchParam(0.2) + resGroupBy, _ := mc.Search(ctx, collName, []string{}, "", []string{common.DefaultIntFieldName, common.DefaultVarcharFieldName}, queryVec, + common.DefaultSparseVecFieldName, entity.IP, common.DefaultTopK, sp, client.WithGroupByField(common.DefaultVarcharFieldName)) + + // verify each topK entity is the top1 of the whole group + hitsNum := 0 + total := 0 + for _, rs := range resGroupBy { + for i := 0; i < rs.ResultCount; i++ { + groupByValue, _ := rs.GroupByValue.Get(i) + pkValue, _ := rs.IDs.GetAsInt64(i) + expr := fmt.Sprintf("%s == '%v' ", common.DefaultVarcharFieldName, groupByValue) + + // search filter with groupByValue is the top1 + resFilter, _ := mc.Search(ctx, collName, []string{}, expr, []string{common.DefaultIntFieldName, + common.DefaultVarcharFieldName}, queryVec, common.DefaultSparseVecFieldName, entity.IP, 1, sp) + filterTop1Pk, _ := resFilter[0].IDs.GetAsInt64(0) + if filterTop1Pk == pkValue { + hitsNum += 1 + } + total += 1 + } + } + + // verify hits rate + hitsRate := float32(hitsNum) / float32(total) + _str := fmt.Sprintf("GroupBy search with field %s, nq=%d and limit=%d , then hitsNum= %d, hitsRate=%v\n", + common.DefaultSparseVecFieldName, common.DefaultNq, common.DefaultTopK, hitsNum, hitsRate) + log.Println(_str) + require.GreaterOrEqualf(t, hitsRate, float32(0.8), _str) + } +} + // binary vector -> not supported func TestSearchGroupByBinaryDefault(t *testing.T) { t.Parallel() diff --git a/test/testcases/hybrid_search_test.go b/test/testcases/hybrid_search_test.go index 2111b4953..a43ae2164 100644 --- a/test/testcases/hybrid_search_test.go +++ b/test/testcases/hybrid_search_test.go @@ -311,3 +311,54 @@ func TestHybridSearchMultiVectorsRangeSearch(t *testing.T) { } } } + +func TestHybridSearchSparseVector(t *testing.T) { + t.Skip("https://github.com/milvus-io/milvus/pull/32177") + t.Parallel() + idxInverted := entity.NewGenericIndex(common.DefaultSparseVecFieldName, "SPARSE_INVERTED_INDEX", map[string]string{"drop_ratio_build": "0.2", "metric_type": "IP"}) + idxWand := entity.NewGenericIndex(common.DefaultSparseVecFieldName, "SPARSE_WAND", map[string]string{"drop_ratio_build": "0.3", "metric_type": "IP"}) + for _, idx := range []entity.Index{idxInverted, idxWand} { + ctx := createContext(t, time.Second*common.DefaultTimeout*2) + // connect + mc := createMilvusClient(ctx, t) + + // create -> insert [0, 3000) -> flush -> index -> load + cp := CollectionParams{CollectionFieldsType: Int64VarcharSparseVec, AutoID: false, EnableDynamicField: true, + ShardsNum: common.DefaultShards, Dim: common.DefaultDim, MaxLength: common.TestMaxLen} + + dp := DataParams{DoInsert: true, CollectionFieldsType: Int64VarcharSparseVec, start: 0, nb: common.DefaultNb * 3, + dim: common.DefaultDim, EnableDynamicField: true} + + // index params + idxHnsw, _ := entity.NewIndexHNSW(entity.L2, 8, 96) + ips := []IndexParams{ + {BuildIndex: true, Index: idx, FieldName: common.DefaultSparseVecFieldName, async: false}, + {BuildIndex: true, Index: idxHnsw, FieldName: common.DefaultFloatVecFieldName, async: false}, + } + collName := prepareCollection(ctx, t, mc, cp, WithDataParams(dp), WithIndexParams(ips), WithCreateOption(client.WithConsistencyLevel(entity.ClStrong))) + + // search + queryVec1 := common.GenSearchVectors(common.DefaultNq, common.DefaultDim*2, entity.FieldTypeSparseVector) + queryVec2 := common.GenSearchVectors(common.DefaultNq, common.DefaultDim, entity.FieldTypeFloatVector) + sp1, _ := entity.NewIndexSparseInvertedSearchParam(0.2) + sp2, _ := entity.NewIndexHNSWSearchParam(20) + expr := fmt.Sprintf("%s > 1", common.DefaultIntFieldName) + sReqs := []*client.ANNSearchRequest{ + client.NewANNSearchRequest(common.DefaultSparseVecFieldName, entity.IP, expr, queryVec1, sp1, common.DefaultTopK), + client.NewANNSearchRequest(common.DefaultFloatVecFieldName, entity.L2, "", queryVec2, sp2, common.DefaultTopK), + } + for _, reranker := range []client.Reranker{ + client.NewRRFReranker(), + client.NewWeightedReranker([]float64{0.5, 0.6}), + } { + // hybrid search + searchRes, errSearch := mc.HybridSearch(ctx, collName, []string{}, common.DefaultTopK, []string{"*"}, reranker, sReqs) + common.CheckErr(t, errSearch, true) + common.CheckSearchResult(t, searchRes, common.DefaultNq, common.DefaultTopK) + common.CheckErr(t, errSearch, true) + outputFields := []string{common.DefaultIntFieldName, common.DefaultVarcharFieldName, common.DefaultFloatVecFieldName, + common.DefaultSparseVecFieldName, common.DefaultDynamicFieldName} + common.CheckOutputFields(t, searchRes[0].Fields, outputFields) + } + } +} diff --git a/test/testcases/index_test.go b/test/testcases/index_test.go index e6e03cc08..07d7c01d2 100644 --- a/test/testcases/index_test.go +++ b/test/testcases/index_test.go @@ -670,6 +670,200 @@ func TestCreateAutoIndexScalarFields(t *testing.T) { } } +// TODO https://github.com/milvus-io/milvus-sdk-go/issues/726 +func TestCreateIndexSparseVector(t *testing.T) { + t.Parallel() + idxInverted := entity.NewGenericIndex(common.DefaultSparseVecFieldName, "SPARSE_INVERTED_INDEX", map[string]string{"drop_ratio_build": "0.2", "metric_type": "IP"}) + idxWand := entity.NewGenericIndex(common.DefaultSparseVecFieldName, "SPARSE_WAND", map[string]string{"drop_ratio_build": "0.3", "metric_type": "IP"}) + + for _, idx := range []entity.Index{idxInverted, idxWand} { + ctx := createContext(t, time.Second*common.DefaultTimeout) + //connect + mc := createMilvusClient(ctx, t) + + // create collection with all datatype + cp := CollectionParams{CollectionFieldsType: Int64VarcharSparseVec, AutoID: false, EnableDynamicField: true, + ShardsNum: common.DefaultShards, Dim: common.DefaultDim, MaxLength: 300} + collName := createCollection(ctx, t, mc, cp) + + // insert + dp := DataParams{CollectionName: collName, PartitionName: "", CollectionFieldsType: Int64VarcharSparseVec, + start: 0, nb: common.DefaultNb, dim: common.DefaultDim, EnableDynamicField: true, WithRows: false} + _, _ = insertData(ctx, t, mc, dp, common.WithSparseVectorLen(100)) + mc.Flush(ctx, collName, false) + + // create index + err := mc.CreateIndex(ctx, collName, common.DefaultSparseVecFieldName, idx, false) + common.CheckErr(t, err, true) + + // describe index + idx2, err := mc.DescribeIndex(ctx, collName, common.DefaultSparseVecFieldName) + common.CheckErr(t, err, true) + common.CheckIndexResult(t, idx2, idx) + } +} + +// TODO https://github.com/milvus-io/milvus-sdk-go/issues/726 +func TestCreateIndexSparseVector2(t *testing.T) { + t.Parallel() + idxInverted1, _ := entity.NewIndexSparseInverted(entity.IP, 0.2) + idxWand1, _ := entity.NewIndexSparseWAND(entity.IP, 0.3) + for _, idx := range []entity.Index{idxInverted1, idxWand1} { + ctx := createContext(t, time.Second*common.DefaultTimeout) + //connect + mc := createMilvusClient(ctx, t) + + // create collection with all datatype + cp := CollectionParams{CollectionFieldsType: Int64VarcharSparseVec, AutoID: false, EnableDynamicField: true, + ShardsNum: common.DefaultShards, Dim: common.DefaultDim, MaxLength: 300} + collName := createCollection(ctx, t, mc, cp) + + // insert + dp := DataParams{CollectionName: collName, PartitionName: "", CollectionFieldsType: Int64VarcharSparseVec, + start: 0, nb: common.DefaultNb, dim: common.DefaultDim, EnableDynamicField: true, WithRows: false} + _, _ = insertData(ctx, t, mc, dp, common.WithSparseVectorLen(100)) + mc.Flush(ctx, collName, false) + + // create index + err := mc.CreateIndex(ctx, collName, common.DefaultSparseVecFieldName, idx, false) + common.CheckErr(t, err, true) + + // describe index + idx2, err := mc.DescribeIndex(ctx, collName, common.DefaultSparseVecFieldName) + expIndex := entity.NewGenericIndex(common.DefaultSparseVecFieldName, idx.IndexType(), idx.Params()) + common.CheckErr(t, err, true) + common.CheckIndexResult(t, idx2, expIndex) + } +} + +// create index on sparse vector with invalid params +func TestCreateSparseIndexInvalidParams(t *testing.T) { + for _, indexType := range []entity.IndexType{"SPARSE_INVERTED_INDEX", "SPARSE_WAND"} { + ctx := createContext(t, time.Second*common.DefaultTimeout) + //connect + mc := createMilvusClient(ctx, t) + + // create collection with all datatype + cp := CollectionParams{CollectionFieldsType: Int64VarcharSparseVec, AutoID: false, EnableDynamicField: true, + ShardsNum: common.DefaultShards, Dim: common.DefaultDim, MaxLength: 300} + collName := createCollection(ctx, t, mc, cp) + + // insert + dp := DataParams{CollectionName: collName, PartitionName: "", CollectionFieldsType: Int64VarcharSparseVec, + start: 0, nb: common.DefaultNb, dim: common.DefaultDim, EnableDynamicField: true, WithRows: false} + _, _ = insertData(ctx, t, mc, dp, common.WithSparseVectorLen(100)) + mc.Flush(ctx, collName, false) + + // create index with invalid metric type + for _, mt := range common.UnsupportedSparseVecMetricsType { + idx := entity.NewGenericIndex(common.DefaultSparseVecFieldName, indexType, map[string]string{"drop_ratio_build": "0.2", "metric_type": string(mt)}) + err := mc.CreateIndex(ctx, collName, common.DefaultSparseVecFieldName, idx, false) + common.CheckErr(t, err, false, "only IP is the supported metric type for sparse index") + } + + // create index with invalid drop_ratio_build + for _, drb := range []string{"a", "-0.1", "1.3"} { + idx := entity.NewGenericIndex(common.DefaultSparseVecFieldName, indexType, map[string]string{"drop_ratio_build": drb, "metric_type": "IP"}) + err := mc.CreateIndex(ctx, collName, common.DefaultSparseVecFieldName, idx, false) + common.CheckErr(t, err, false, "must be in range [0, 1)") + } + + // create index and describe index + idx := entity.NewGenericIndex(common.DefaultSparseVecFieldName, indexType, map[string]string{"drop_ratio_build": "0", "metric_type": "IP"}) + err := mc.CreateIndex(ctx, collName, common.DefaultSparseVecFieldName, idx, false) + common.CheckErr(t, err, true) + + descIdx, _ := mc.DescribeIndex(ctx, collName, common.DefaultSparseVecFieldName) + common.CheckIndexResult(t, descIdx, idx) + } +} + +func TestCreateSparseIndexInvalidParams2(t *testing.T) { + ctx := createContext(t, time.Second*common.DefaultTimeout) + //connect + mc := createMilvusClient(ctx, t) + + // create collection with all datatype + cp := CollectionParams{CollectionFieldsType: Int64VarcharSparseVec, AutoID: false, EnableDynamicField: true, + ShardsNum: common.DefaultShards, Dim: common.DefaultDim, MaxLength: 300} + collName := createCollection(ctx, t, mc, cp) + + // insert + dp := DataParams{CollectionName: collName, PartitionName: "", CollectionFieldsType: Int64VarcharSparseVec, + start: 0, nb: common.DefaultNb, dim: common.DefaultDim, EnableDynamicField: true, WithRows: false} + _, _ = insertData(ctx, t, mc, dp, common.WithSparseVectorLen(100)) + mc.Flush(ctx, collName, false) + + // create index with invalid metric type + for _, mt := range common.UnsupportedSparseVecMetricsType { + idx, _ := entity.NewIndexSparseInverted(mt, 0.2) + err := mc.CreateIndex(ctx, collName, common.DefaultSparseVecFieldName, idx, false) + common.CheckErr(t, err, false, "only IP is the supported metric type for sparse index") + + idxWand, _ := entity.NewIndexSparseWAND(mt, 0.2) + err = mc.CreateIndex(ctx, collName, common.DefaultSparseVecFieldName, idxWand, false) + common.CheckErr(t, err, false, "only IP is the supported metric type for sparse index") + } + + // create index with invalid drop_ratio_build + for _, drb := range []float64{-0.3, 1.3} { + _, err := entity.NewIndexSparseInverted(entity.IP, drb) + common.CheckErr(t, err, false, "must be in range [0, 1)") + + _, err = entity.NewIndexSparseWAND(entity.IP, drb) + common.CheckErr(t, err, false, "must be in range [0, 1)") + } + + // create index and describe index + idx, _ := entity.NewIndexSparseInverted(entity.IP, 0.1) + err := mc.CreateIndex(ctx, collName, common.DefaultSparseVecFieldName, idx, false) + common.CheckErr(t, err, true) + + descIdx, _ := mc.DescribeIndex(ctx, collName, common.DefaultSparseVecFieldName) + expIdx := entity.NewGenericIndex(common.DefaultSparseVecFieldName, idx.IndexType(), idx.Params()) + common.CheckIndexResult(t, descIdx, expIdx) +} + +//create sparse unsupported index: other vector index and scalar index and auto index +func TestCreateSparseUnsupportedIndex(t *testing.T) { + ctx := createContext(t, time.Second*common.DefaultTimeout) + //connect + mc := createMilvusClient(ctx, t) + + // create collection with all datatype + cp := CollectionParams{CollectionFieldsType: Int64VarcharSparseVec, AutoID: false, EnableDynamicField: true, + ShardsNum: common.DefaultShards, Dim: common.DefaultDim, MaxLength: 300} + collName := createCollection(ctx, t, mc, cp) + + // insert + dp := DataParams{CollectionName: collName, PartitionName: "", CollectionFieldsType: Int64VarcharSparseVec, + start: 0, nb: common.DefaultNb, dim: common.DefaultDim, EnableDynamicField: true, WithRows: false} + _, _ = insertData(ctx, t, mc, dp, common.WithSparseVectorLen(100)) + mc.Flush(ctx, collName, false) + + // create unsupported vector index on sparse field + autoIdx, _ := entity.NewIndexAUTOINDEX(entity.IP) + vectorIndex := append(common.GenAllFloatIndex(entity.IP), autoIdx) + for _, idx := range vectorIndex { + err := mc.CreateIndex(ctx, collName, common.DefaultSparseVecFieldName, idx, false) + common.CheckErr(t, err, false, "data type should be FloatVector, Float16Vector or BFloat16Vector", + "HNSW only support float vector data type") + } + + // create scalar index on sparse vector + for _, idx := range []entity.Index{ + entity.NewScalarIndex(), + entity.NewScalarIndexWithType(entity.Trie), + entity.NewScalarIndexWithType(entity.Sorted), + entity.NewScalarIndexWithType(entity.Inverted), + } { + err := mc.CreateIndex(ctx, collName, common.DefaultSparseVecFieldName, idx, false) + common.CheckErr(t, err, false, "TRIE are only supported on varchar field", + "STL_SORT are only supported on numeric field", "HNSW only support float vector data type", + "INVERTED are not supported on SparseFloatVector field") + } +} + // test new index by Generic index func TestCreateIndexGeneric(t *testing.T) { ctx := createContext(t, time.Second*common.DefaultTimeout) diff --git a/test/testcases/insert_test.go b/test/testcases/insert_test.go index 20a505f63..5d07646cb 100644 --- a/test/testcases/insert_test.go +++ b/test/testcases/insert_test.go @@ -6,6 +6,7 @@ import ( "context" "fmt" "log" + "math" "math/rand" "strconv" "testing" @@ -426,7 +427,7 @@ func TestInsertEmptyArray(t *testing.T) { collName := createCollection(ctx, t, mc, cp) // prepare and insert data - var capacity int64 = 0 + var capacity int64 dp := DataParams{CollectionName: collName, PartitionName: "", CollectionFieldsType: Int64FloatVecArray, start: 0, nb: common.DefaultNb, dim: common.DefaultDim, EnableDynamicField: true, WithRows: false} _, _ = insertData(ctx, t, mc, dp, common.WithArrayCapacity(capacity)) @@ -525,3 +526,132 @@ func TestInsertArrayDataCapacityExceed(t *testing.T) { common.CheckErr(t, err, false, "array length exceeds max capacity") } } + +// test insert sparse vector column and rows +func TestInsertSparseData(t *testing.T) { + ctx := createContext(t, time.Second*common.DefaultTimeout) + // connect + mc := createMilvusClient(ctx, t) + + cp := CollectionParams{CollectionFieldsType: Int64VarcharSparseVec, AutoID: false, EnableDynamicField: true, + ShardsNum: common.DefaultShards, Dim: common.DefaultDim, MaxLength: common.TestMaxLen} + collName := createCollection(ctx, t, mc, cp) + + // insert data column + intColumn1 := common.GenColumnData(0, common.DefaultNb, entity.FieldTypeInt64, common.DefaultIntFieldName) + data := []entity.Column{ + intColumn1, + common.GenColumnData(0, common.DefaultNb, entity.FieldTypeVarChar, common.DefaultVarcharFieldName), + common.GenColumnData(0, common.DefaultNb, entity.FieldTypeFloatVector, common.DefaultFloatVecFieldName, common.WithVectorDim(common.DefaultDim)), + common.GenColumnData(0, common.DefaultNb, entity.FieldTypeSparseVector, common.DefaultSparseVecFieldName, common.WithSparseVectorLen(20)), + } + ids, err := mc.Insert(ctx, collName, "", data...) + common.CheckErr(t, err, true) + common.CheckInsertResult(t, ids, intColumn1) + + // insert rows + rows := common.GenDefaultSparseRows(common.DefaultNb, common.DefaultNb, common.DefaultDim, 50, true) + ids2, err := mc.InsertRows(ctx, collName, "", rows) + common.CheckErr(t, err, true) + require.Equal(t, ids2.Len(), common.DefaultNb) + + // flush and verify + err = mc.Flush(ctx, collName, false) + common.CheckErr(t, err, true) + stats, _ := mc.GetCollectionStatistics(ctx, collName) + require.Equal(t, strconv.Itoa(common.DefaultNb*2), stats[common.RowCount]) +} + +// the dimension of a sparse embedding can be any value from 0 to (maximum of uint32 - 1) +func TestInsertSparseDataMaxDim(t *testing.T) { + // invalid sparse vector: positions >= uint32 + ctx := createContext(t, time.Second*common.DefaultTimeout) + // connect + mc := createMilvusClient(ctx, t) + + cp := CollectionParams{CollectionFieldsType: Int64VarcharSparseVec, AutoID: false, EnableDynamicField: true, + ShardsNum: common.DefaultShards, Dim: common.DefaultDim, MaxLength: common.TestMaxLen} + collName := createCollection(ctx, t, mc, cp) + + // insert data column + pkColumn := common.GenColumnData(0, 1, entity.FieldTypeInt64, common.DefaultIntFieldName) + data := []entity.Column{ + pkColumn, + common.GenColumnData(0, 1, entity.FieldTypeVarChar, common.DefaultVarcharFieldName), + common.GenColumnData(0, 1, entity.FieldTypeFloatVector, common.DefaultFloatVecFieldName, common.WithVectorDim(common.DefaultDim)), + } + // sparse vector with max dim + positions := []uint32{0, math.MaxUint32 - 10, math.MaxUint32 - 1} + values := []float32{0.453, 5.0776, 100.098} + sparseVec, err := entity.NewSliceSparseEmbedding(positions, values) + common.CheckErr(t, err, true) + data = append(data, entity.NewColumnSparseVectors(common.DefaultSparseVecFieldName, []entity.SparseEmbedding{sparseVec})) + ids, err := mc.Insert(ctx, collName, "", data...) + common.CheckErr(t, err, true) + common.CheckInsertResult(t, ids, pkColumn) +} + +func TestInsertSparseInvalidVector(t *testing.T) { + // invalid sparse vector: len(positions) != len(values) + positions := []uint32{1, 10} + values := []float32{0.4, 5.0, 0.34} + _, err := entity.NewSliceSparseEmbedding(positions, values) + common.CheckErr(t, err, false, "invalid sparse embedding input, positions shall have same number of values") + + // invalid sparse vector: positions >= uint32 + ctx := createContext(t, time.Second*common.DefaultTimeout) + // connect + mc := createMilvusClient(ctx, t) + + cp := CollectionParams{CollectionFieldsType: Int64VarcharSparseVec, AutoID: false, EnableDynamicField: true, + ShardsNum: common.DefaultShards, Dim: common.DefaultDim, MaxLength: common.TestMaxLen} + collName := createCollection(ctx, t, mc, cp) + + // insert data column + data := []entity.Column{ + common.GenColumnData(0, 1, entity.FieldTypeInt64, common.DefaultIntFieldName), + common.GenColumnData(0, 1, entity.FieldTypeVarChar, common.DefaultVarcharFieldName), + common.GenColumnData(0, 1, entity.FieldTypeFloatVector, common.DefaultFloatVecFieldName, common.WithVectorDim(common.DefaultDim)), + } + // invalid sparse vector: position > (maximum of uint32 - 1) + positions = []uint32{math.MaxUint32} + values = []float32{0.4} + sparseVec, err := entity.NewSliceSparseEmbedding(positions, values) + common.CheckErr(t, err, true) + data1 := append(data, entity.NewColumnSparseVectors(common.DefaultSparseVecFieldName, []entity.SparseEmbedding{sparseVec})) + _, err = mc.Insert(ctx, collName, "", data1...) + common.CheckErr(t, err, false, "invalid index in sparse float vector: must be less than 2^32-1") + + // invalid sparse vector: empty position and values + positions = []uint32{} + values = []float32{} + sparseVec, err = entity.NewSliceSparseEmbedding(positions, values) + common.CheckErr(t, err, true) + data2 := append(data, entity.NewColumnSparseVectors(common.DefaultSparseVecFieldName, []entity.SparseEmbedding{sparseVec})) + _, err = mc.Insert(ctx, collName, "", data2...) + common.CheckErr(t, err, false, "empty sparse float vector row") +} + +func TestInsertSparseVectorSamePosition(t *testing.T) { + // invalid sparse vector: positions >= uint32 + ctx := createContext(t, time.Second*common.DefaultTimeout) + // connect + mc := createMilvusClient(ctx, t) + + cp := CollectionParams{CollectionFieldsType: Int64VarcharSparseVec, AutoID: false, EnableDynamicField: true, + ShardsNum: common.DefaultShards, Dim: common.DefaultDim, MaxLength: common.TestMaxLen} + collName := createCollection(ctx, t, mc, cp) + + //insert data column + data := []entity.Column{ + common.GenColumnData(0, 1, entity.FieldTypeInt64, common.DefaultIntFieldName), + common.GenColumnData(0, 1, entity.FieldTypeVarChar, common.DefaultVarcharFieldName), + common.GenColumnData(0, 1, entity.FieldTypeFloatVector, common.DefaultFloatVecFieldName, common.WithVectorDim(common.DefaultDim)), + } + //invalid sparse vector: position > (maximum of uint32 - 1) + sparseVec, err := entity.NewSliceSparseEmbedding([]uint32{2, 10, 2}, []float32{0.4, 0.5, 0.6}) + common.CheckErr(t, err, true) + data = append(data, entity.NewColumnSparseVectors(common.DefaultSparseVecFieldName, []entity.SparseEmbedding{sparseVec})) + _, err = mc.Insert(ctx, collName, "", data...) + common.CheckErr(t, err, false, "unsorted or same indices in sparse float vector") +} diff --git a/test/testcases/load_release_test.go b/test/testcases/load_release_test.go index 9354baf29..a421a41c1 100644 --- a/test/testcases/load_release_test.go +++ b/test/testcases/load_release_test.go @@ -817,3 +817,61 @@ func TestMmapAlterIndex(t *testing.T) { common.CheckOutputFields(t, searchRes[0].Fields, []string{common.DefaultIntFieldName, common.DefaultFloatFieldName, common.DefaultFloatVecFieldName}) } } + +// test search when mmap sparse collection +func TestMmapSparseCollection(t *testing.T) { + t.Parallel() + idxInverted, _ := entity.NewIndexSparseInverted(entity.IP, 0) + idxWand, _ := entity.NewIndexSparseWAND(entity.IP, 0) + for _, idx := range []entity.Index{idxInverted, idxWand} { + ctx := createContext(t, time.Second*common.DefaultTimeout*2) + // connect + mc := createMilvusClient(ctx, t) + + // create -> insert [0, 3000) -> flush -> index -> load + cp := CollectionParams{CollectionFieldsType: Int64VarcharSparseVec, AutoID: false, EnableDynamicField: true, + ShardsNum: common.DefaultShards, Dim: common.DefaultDim, MaxLength: common.TestMaxLen} + + dp := DataParams{DoInsert: true, CollectionFieldsType: Int64VarcharSparseVec, start: 0, nb: common.DefaultNb * 5, + dim: common.DefaultDim, EnableDynamicField: true} + + // index params + idxHnsw, _ := entity.NewIndexHNSW(entity.L2, 8, 96) + ips := []IndexParams{ + {BuildIndex: true, Index: idx, FieldName: common.DefaultSparseVecFieldName, async: false}, + {BuildIndex: true, Index: idxHnsw, FieldName: common.DefaultFloatVecFieldName, async: false}, + } + collName := prepareCollection(ctx, t, mc, cp, WithDataParams(dp), WithIndexParams(ips), WithCreateOption(client.WithConsistencyLevel(entity.ClStrong))) + + // alter mmap + mc.ReleaseCollection(ctx, collName) + // alter index and enable mmap + err := mc.AlterIndex(ctx, collName, common.DefaultSparseVecFieldName, client.WithMmap(true)) + common.CheckErr(t, err, false, fmt.Sprintf("index type %s does not support mmap", idx.IndexType())) + err = mc.AlterIndex(ctx, collName, common.DefaultFloatVecFieldName, client.WithMmap(true)) + common.CheckErr(t, err, true) + err = mc.AlterCollection(ctx, collName, entity.Mmap(true)) + common.CheckErr(t, err, true) + err = mc.LoadCollection(ctx, collName, false) + common.CheckErr(t, err, true) + + // search with floatVec field + outputFields := []string{common.DefaultIntFieldName, common.DefaultVarcharFieldName, common.DefaultFloatVecFieldName, + common.DefaultSparseVecFieldName, common.DefaultDynamicFieldName} + queryVecFloat := common.GenSearchVectors(1, common.DefaultDim, entity.FieldTypeFloatVector) + sp, _ := entity.NewIndexSparseInvertedSearchParam(0) + resSearch, errSearch := mc.Search(ctx, collName, []string{}, "", []string{"*"}, queryVecFloat, common.DefaultFloatVecFieldName, + entity.L2, common.DefaultTopK, sp) + common.CheckErr(t, errSearch, true) + common.CheckSearchResult(t, resSearch, 1, common.DefaultTopK) + common.CheckOutputFields(t, resSearch[0].Fields, outputFields) + + // search with sparse vector field + queryVecSparse := common.GenSearchVectors(1, common.DefaultDim, entity.FieldTypeSparseVector) + resSearch, errSearch = mc.Search(ctx, collName, []string{}, "", []string{"*"}, queryVecSparse, common.DefaultSparseVecFieldName, + entity.IP, common.DefaultTopK, sp) + common.CheckErr(t, errSearch, true) + common.CheckSearchResult(t, resSearch, 1, common.DefaultTopK) + common.CheckOutputFields(t, resSearch[0].Fields, outputFields) + } +} diff --git a/test/testcases/main_test.go b/test/testcases/main_test.go index 74d21e5c6..db7e16fc5 100644 --- a/test/testcases/main_test.go +++ b/test/testcases/main_test.go @@ -242,13 +242,14 @@ func createVarcharCollectionWithDataIndex(ctx context.Context, t *testing.T, mc } const ( - Int64FloatVec CollectionFieldsType = "PkInt64FloatVec" // int64 + float + floatVec - Int64BinaryVec CollectionFieldsType = "Int64BinaryVec" // int64 + float + binaryVec - VarcharBinaryVec CollectionFieldsType = "PkVarcharBinaryVec" // varchar + binaryVec - Int64FloatVecJSON CollectionFieldsType = "PkInt64FloatVecJson" // int64 + float + floatVec + json - Int64FloatVecArray CollectionFieldsType = "Int64FloatVecArray" // int64 + float + floatVec + all array - AllVectors CollectionFieldsType = "AllVectors" // int64 + fp32Vec + fp16Vec + binaryVec - AllFields CollectionFieldsType = "AllFields" // all scalar fields + floatVec + Int64FloatVec CollectionFieldsType = "PkInt64FloatVec" // int64 + float + floatVec + Int64BinaryVec CollectionFieldsType = "Int64BinaryVec" // int64 + float + binaryVec + VarcharBinaryVec CollectionFieldsType = "PkVarcharBinaryVec" // varchar + binaryVec + Int64FloatVecJSON CollectionFieldsType = "PkInt64FloatVecJson" // int64 + float + floatVec + json + Int64FloatVecArray CollectionFieldsType = "Int64FloatVecArray" // int64 + float + floatVec + all array + Int64VarcharSparseVec CollectionFieldsType = "Int64VarcharSparseVec" // int64 + varchar + float32Vec + sparseVec + AllVectors CollectionFieldsType = "AllVectors" // int64 + fp32Vec + fp16Vec + binaryVec + AllFields CollectionFieldsType = "AllFields" // all scalar fields + floatVec ) func createCollection(ctx context.Context, t *testing.T, mc *base.MilvusClient, cp CollectionParams, opts ...client.CreateCollectionOption) string { @@ -271,6 +272,14 @@ func createCollection(ctx context.Context, t *testing.T, mc *base.MilvusClient, case Int64FloatVecArray: fields = common.GenDefaultFields(cp.AutoID) fields = append(fields, common.GenAllArrayFieldsWithCapacity(cp.MaxCapacity)...) + case Int64VarcharSparseVec: + fields = []*entity.Field{ + common.GenField(common.DefaultIntFieldName, entity.FieldTypeInt64, common.WithIsPrimaryKey(true), common.WithAutoID(cp.AutoID)), + common.GenField(common.DefaultVarcharFieldName, entity.FieldTypeVarChar, common.WithMaxLength(cp.MaxLength)), + common.GenField(common.DefaultFloatVecFieldName, entity.FieldTypeFloatVector, common.WithDim(cp.Dim)), + common.GenField(common.DefaultSparseVecFieldName, entity.FieldTypeSparseVector), + } + case AllVectors: fields = []*entity.Field{ common.GenField(common.DefaultIntFieldName, entity.FieldTypeInt64, common.WithIsPrimaryKey(true), common.WithAutoID(cp.AutoID)), @@ -347,7 +356,15 @@ func insertData(ctx context.Context, t *testing.T, mc *base.MilvusClient, dp Dat intColumn, floatColumn, vecColumn := common.GenDefaultColumnData(dp.start, dp.nb, dp.dim) data = append(data, intColumn, floatColumn, vecColumn) } - + case Int64VarcharSparseVec: + if dp.WithRows { + rows = common.GenDefaultSparseRows(dp.start, dp.nb, dp.dim, dp.maxLenSparse, dp.EnableDynamicField) + } else { + intColumn, _, vecColumn := common.GenDefaultColumnData(dp.start, dp.nb, dp.dim) + varColumn := common.GenColumnData(dp.start, dp.nb, entity.FieldTypeVarChar, common.DefaultVarcharFieldName) + sparseColumn := common.GenColumnData(dp.start, dp.nb, entity.FieldTypeSparseVector, common.DefaultSparseVecFieldName, opts...) + data = append(data, intColumn, varColumn, vecColumn, sparseColumn) + } case AllVectors: if dp.WithRows { rows = common.GenAllVectorsRows(dp.start, dp.nb, dp.dim, dp.EnableDynamicField) diff --git a/test/testcases/option.go b/test/testcases/option.go index 171f69669..6a88973d0 100644 --- a/test/testcases/option.go +++ b/test/testcases/option.go @@ -33,6 +33,7 @@ type DataParams struct { EnableDynamicField bool // whether insert dynamic field data WithRows bool DoInsert bool + maxLenSparse int } func (d DataParams) IsEmpty() bool { diff --git a/test/testcases/query_test.go b/test/testcases/query_test.go index 0b384fb55..1aa41fdc7 100644 --- a/test/testcases/query_test.go +++ b/test/testcases/query_test.go @@ -1075,6 +1075,49 @@ func TestQueryCountAfterDml(t *testing.T) { require.Equal(t, int64(common.DefaultNb+upsertNb), countAfterCompact.GetColumn(common.QueryCountFieldName).(*entity.ColumnInt64).Data()[0]) } +func TestQuerySparseVector(t *testing.T) { + t.Parallel() + idxInverted := entity.NewGenericIndex(common.DefaultSparseVecFieldName, "SPARSE_INVERTED_INDEX", map[string]string{"drop_ratio_build": "0.2", "metric_type": "IP"}) + idxWand := entity.NewGenericIndex(common.DefaultSparseVecFieldName, "SPARSE_WAND", map[string]string{"drop_ratio_build": "0.3", "metric_type": "IP"}) + for _, idx := range []entity.Index{idxInverted, idxWand} { + ctx := createContext(t, time.Second*common.DefaultTimeout*2) + // connect + mc := createMilvusClient(ctx, t) + + // create -> insert [0, 3000) -> flush -> index -> load + cp := CollectionParams{CollectionFieldsType: Int64VarcharSparseVec, AutoID: false, EnableDynamicField: false, + ShardsNum: common.DefaultShards, Dim: common.DefaultDim, MaxLength: common.TestMaxLen} + collName := createCollection(ctx, t, mc, cp) + + // index + idxHnsw, _ := entity.NewIndexHNSW(entity.L2, 8, 96) + mc.CreateIndex(ctx, collName, common.DefaultFloatVecFieldName, idxHnsw, false) + mc.CreateIndex(ctx, collName, common.DefaultSparseVecFieldName, idx, false) + + // insert + intColumn, _, floatColumn := common.GenDefaultColumnData(0, common.DefaultNb, common.DefaultDim) + varColumn := common.GenColumnData(0, common.DefaultNb, entity.FieldTypeVarChar, common.DefaultVarcharFieldName) + sparseColumn := common.GenColumnData(0, common.DefaultNb, entity.FieldTypeSparseVector, common.DefaultSparseVecFieldName) + mc.Insert(ctx, collName, "", intColumn, varColumn, floatColumn, sparseColumn) + mc.Flush(ctx, collName, false) + mc.LoadCollection(ctx, collName, false) + + // count(*) + countRes, _ := mc.Query(ctx, collName, []string{}, fmt.Sprintf("%s >=0", common.DefaultIntFieldName), []string{common.QueryCountFieldName}) + require.Equal(t, int64(common.DefaultNb), countRes.GetColumn(common.QueryCountFieldName).(*entity.ColumnInt64).Data()[0]) + + // query + queryResult, err := mc.Query(ctx, collName, []string{}, fmt.Sprintf("%s == 0", common.DefaultIntFieldName), []string{"*"}) + common.CheckErr(t, err, true) + expIntColumn := entity.NewColumnInt64(common.DefaultIntFieldName, intColumn.(*entity.ColumnInt64).Data()[:1]) + expVarcharColumn := entity.NewColumnVarChar(common.DefaultVarcharFieldName, varColumn.(*entity.ColumnVarChar).Data()[:1]) + expVecColumn := entity.NewColumnFloatVector(common.DefaultFloatVecFieldName, int(common.DefaultDim), floatColumn.(*entity.ColumnFloatVector).Data()[:1]) + expSparseColumn := entity.NewColumnSparseVectors(common.DefaultSparseVecFieldName, sparseColumn.(*entity.ColumnSparseFloatVector).Data()[:1]) + common.CheckOutputFields(t, queryResult, []string{common.DefaultIntFieldName, common.DefaultVarcharFieldName, common.DefaultFloatVecFieldName, common.DefaultSparseVecFieldName}) + common.CheckQueryResult(t, queryResult, []entity.Column{expIntColumn, expVarcharColumn, expVecColumn, expSparseColumn}) + } +} + // TODO offset and limit // TODO consistency level // TODO ignore growing diff --git a/test/testcases/search_test.go b/test/testcases/search_test.go index f7021f5a9..598953b28 100644 --- a/test/testcases/search_test.go +++ b/test/testcases/search_test.go @@ -5,6 +5,7 @@ package testcases import ( "fmt" "log" + "math/rand" "testing" "time" @@ -167,7 +168,7 @@ func TestSearchEmptyCollection(t *testing.T) { ctx, collName, []string{common.DefaultPartition}, "", - []string{common.DefaultFloatFieldName}, + []string{"*"}, //[]entity.Vector{entity.FloatVector([]float32{0.1, 0.2})}, common.GenSearchVectors(common.DefaultNq, common.DefaultDim, entity.FieldTypeFloatVector), common.DefaultFloatVecFieldName, @@ -179,6 +180,44 @@ func TestSearchEmptyCollection(t *testing.T) { } } +func TestSearchEmptyCollection2(t *testing.T) { + ctx := createContext(t, time.Second*common.DefaultTimeout*2) + // connect + mc := createMilvusClient(ctx, t) + + // create -> insert [0, 3000) -> flush -> index -> load + cp := CollectionParams{CollectionFieldsType: AllFields, AutoID: false, EnableDynamicField: true, + ShardsNum: common.DefaultShards, Dim: common.DefaultDim, MaxLength: common.TestMaxLen} + + dp := DataParams{DoInsert: false} + + // index params + ips := GenDefaultIndexParamsForAllVectors() + collName := prepareCollection(ctx, t, mc, cp, WithDataParams(dp), WithIndexParams(ips), WithCreateOption(client.WithConsistencyLevel(entity.ClStrong))) + + // search + type mNameVec struct { + fieldName string + metricType entity.MetricType + queryVec []entity.Vector + } + nameVecs := []mNameVec{ + {fieldName: common.DefaultFloatVecFieldName, metricType: entity.L2, queryVec: common.GenSearchVectors(common.DefaultNq, common.DefaultDim, entity.FieldTypeFloatVector)}, + {fieldName: common.DefaultFloat16VecFieldName, metricType: entity.L2, queryVec: common.GenSearchVectors(common.DefaultNq, common.DefaultDim, entity.FieldTypeFloat16Vector)}, + {fieldName: common.DefaultBFloat16VecFieldName, metricType: entity.L2, queryVec: common.GenSearchVectors(common.DefaultNq, common.DefaultDim, entity.FieldTypeBFloat16Vector)}, + {fieldName: common.DefaultBinaryVecFieldName, metricType: entity.JACCARD, queryVec: common.GenSearchVectors(common.DefaultNq, common.DefaultDim, entity.FieldTypeBinaryVector)}, + } + sp, _ := entity.NewIndexHNSWSearchParam(100) + for _, nv := range nameVecs { + resSearch, errSearch := mc.Search(ctx, collName, []string{}, "", []string{"*"}, nv.queryVec, nv.fieldName, + nv.metricType, common.DefaultTopK, sp) + common.CheckErr(t, errSearch, true) + for _, res := range resSearch { + require.Nil(t, res) + } + } +} + // test search with partition names []string{}, []string{""} func TestSearchEmptyPartitions(t *testing.T) { ctx := createContext(t, time.Second*common.DefaultTimeout) @@ -645,7 +684,7 @@ func TestSearchInvalidVectors(t *testing.T) { ctx, collName, []string{}, "", - []string{common.DefaultIntFieldName}, + []string{"*"}, invalidVector.vectors, common.DefaultFloatVecFieldName, entity.L2, @@ -1551,7 +1590,210 @@ func TestSearchMultiVectors(t *testing.T) { } // TODO iterator search } +} + +func TestSearchSparseVector(t *testing.T) { + t.Skip("https://github.com/milvus-io/milvus-sdk-go/issues/725") + t.Parallel() + idxInverted := entity.NewGenericIndex(common.DefaultSparseVecFieldName, "SPARSE_INVERTED_INDEX", map[string]string{"drop_ratio_build": "0.2", "metric_type": "IP"}) + idxWand := entity.NewGenericIndex(common.DefaultSparseVecFieldName, "SPARSE_WAND", map[string]string{"drop_ratio_build": "0.3", "metric_type": "IP"}) + for _, idx := range []entity.Index{idxInverted, idxWand} { + ctx := createContext(t, time.Second*common.DefaultTimeout*2) + // connect + mc := createMilvusClient(ctx, t) + + // create -> insert [0, 3000) -> flush -> index -> load + cp := CollectionParams{CollectionFieldsType: Int64VarcharSparseVec, AutoID: false, EnableDynamicField: true, + ShardsNum: common.DefaultShards, Dim: common.DefaultDim, MaxLength: common.TestMaxLen} + + dp := DataParams{DoInsert: true, CollectionFieldsType: Int64VarcharSparseVec, start: 0, nb: common.DefaultNb * 4, + dim: common.DefaultDim, EnableDynamicField: true} + + // index params + idxHnsw, _ := entity.NewIndexHNSW(entity.L2, 8, 96) + ips := []IndexParams{ + {BuildIndex: true, Index: idx, FieldName: common.DefaultSparseVecFieldName, async: false}, + {BuildIndex: true, Index: idxHnsw, FieldName: common.DefaultFloatVecFieldName, async: false}, + } + collName := prepareCollection(ctx, t, mc, cp, WithDataParams(dp), WithIndexParams(ips), WithCreateOption(client.WithConsistencyLevel(entity.ClStrong))) + + // search + queryVec := common.GenSearchVectors(common.DefaultNq, common.DefaultDim, entity.FieldTypeSparseVector) + sp, _ := entity.NewIndexSparseInvertedSearchParam(0.2) + resSearch, errSearch := mc.Search(ctx, collName, []string{}, "", []string{"*"}, queryVec, common.DefaultSparseVecFieldName, + entity.IP, common.DefaultTopK, sp) + common.CheckErr(t, errSearch, true) + common.CheckSearchResult(t, resSearch, common.DefaultNq, common.DefaultTopK) + outputFields := []string{common.DefaultIntFieldName, common.DefaultVarcharFieldName, common.DefaultFloatVecFieldName, + common.DefaultSparseVecFieldName, common.DefaultDynamicFieldName} + common.CheckOutputFields(t, resSearch[0].Fields, outputFields) + } +} + +// test search with invalid sparse vector +func TestSearchInvalidSparseVector(t *testing.T) { + t.Skip("https://github.com/milvus-io/milvus/issues/32368") + t.Parallel() + idxInverted := entity.NewGenericIndex(common.DefaultSparseVecFieldName, "SPARSE_INVERTED_INDEX", map[string]string{"drop_ratio_build": "0.2", "metric_type": "IP"}) + idxWand := entity.NewGenericIndex(common.DefaultSparseVecFieldName, "SPARSE_WAND", map[string]string{"drop_ratio_build": "0.3", "metric_type": "IP"}) + for _, idx := range []entity.Index{idxInverted, idxWand} { + ctx := createContext(t, time.Second*common.DefaultTimeout*2) + // connect + mc := createMilvusClient(ctx, t) + + // create -> insert [0, 3000) -> flush -> index -> load + cp := CollectionParams{CollectionFieldsType: Int64VarcharSparseVec, AutoID: false, EnableDynamicField: true, + ShardsNum: common.DefaultShards, Dim: common.DefaultDim, MaxLength: common.TestMaxLen} + + dp := DataParams{DoInsert: true, CollectionFieldsType: Int64VarcharSparseVec, start: 0, nb: common.DefaultNb, + dim: common.DefaultDim, EnableDynamicField: true} + + // index params + idxHnsw, _ := entity.NewIndexHNSW(entity.L2, 8, 96) + ips := []IndexParams{ + {BuildIndex: true, Index: idx, FieldName: common.DefaultSparseVecFieldName, async: false}, + {BuildIndex: true, Index: idxHnsw, FieldName: common.DefaultFloatVecFieldName, async: false}, + } + collName := prepareCollection(ctx, t, mc, cp, WithDataParams(dp), WithIndexParams(ips), WithCreateOption(client.WithConsistencyLevel(entity.ClStrong))) + sp, _ := entity.NewIndexSparseInvertedSearchParam(0) + + _, errSearch := mc.Search(ctx, collName, []string{}, "", []string{"*"}, []entity.Vector{}, common.DefaultSparseVecFieldName, + entity.IP, common.DefaultTopK, sp) + common.CheckErr(t, errSearch, false, "nq (number of search vector per search request) should be in range [1, 16384]") + + vector1, err := entity.NewSliceSparseEmbedding([]uint32{}, []float32{}) + common.CheckErr(t, err, true) + searchRes, errSearch := mc.Search(ctx, collName, []string{}, "", []string{"*"}, []entity.Vector{vector1}, common.DefaultSparseVecFieldName, + entity.IP, common.DefaultTopK, sp) + common.CheckErr(t, errSearch, true) + require.Len(t, searchRes, 0) + + positions := make([]uint32, 100) + values := make([]float32, 100) + for i := 0; i < 100; i++ { + positions[i] = uint32(1) + values[i] = rand.Float32() + } + vector, err := entity.NewSliceSparseEmbedding(positions, values) + searchRes, errSearch = mc.Search(ctx, collName, []string{}, "", []string{"*"}, []entity.Vector{vector}, common.DefaultSparseVecFieldName, + entity.IP, common.DefaultTopK, sp) + common.CheckErr(t, errSearch, false, "unsorted or same indices in sparse float vector") + } +} + +// TODO https://github.com/milvus-io/milvus-sdk-go/issues/725 +func TestSearchEmptySparseCollection(t *testing.T) { + t.Parallel() + idxInverted := entity.NewGenericIndex(common.DefaultSparseVecFieldName, "SPARSE_INVERTED_INDEX", map[string]string{"drop_ratio_build": "0.2", "metric_type": "IP"}) + for _, idx := range []entity.Index{idxInverted} { + ctx := createContext(t, time.Second*common.DefaultTimeout*2) + // connect + mc := createMilvusClient(ctx, t) + + // create -> insert [0, 3000) -> flush -> index -> load + cp := CollectionParams{CollectionFieldsType: Int64VarcharSparseVec, AutoID: false, EnableDynamicField: true, + ShardsNum: common.DefaultShards, Dim: common.DefaultDim, MaxLength: common.TestMaxLen} + + dp := DataParams{DoInsert: false} + + // index params + idxHnsw, _ := entity.NewIndexHNSW(entity.L2, 8, 96) + ips := []IndexParams{ + {BuildIndex: true, Index: idx, FieldName: common.DefaultSparseVecFieldName, async: false}, + {BuildIndex: true, Index: idxHnsw, FieldName: common.DefaultFloatVecFieldName, async: false}, + } + collName := prepareCollection(ctx, t, mc, cp, WithDataParams(dp), WithIndexParams(ips), WithCreateOption(client.WithConsistencyLevel(entity.ClStrong))) + + // search + sp, _ := entity.NewIndexSparseInvertedSearchParam(0) + queryVec := common.GenSearchVectors(common.DefaultNq, common.DefaultDim, entity.FieldTypeSparseVector) + resSearch, errSearch := mc.Search(ctx, collName, []string{}, "", []string{"*"}, queryVec, common.DefaultSparseVecFieldName, + entity.IP, common.DefaultTopK, sp) + common.CheckErr(t, errSearch, true) + require.Empty(t, resSearch) + //require.Len(t, resSearch, common.DefaultNq) + //for _, res := range resSearch { + // require.Nil(t, res) + //} + } +} + +func TestSearchSparseVectorPagination(t *testing.T) { + t.Parallel() + idxInverted, _ := entity.NewIndexSparseInverted(entity.IP, 0.2) + idxWand, _ := entity.NewIndexSparseWAND(entity.IP, 0.2) + for _, idx := range []entity.Index{idxInverted, idxWand} { + ctx := createContext(t, time.Second*common.DefaultTimeout*2) + // connect + mc := createMilvusClient(ctx, t) + + // create -> insert [0, 3000) -> flush -> index -> load + cp := CollectionParams{CollectionFieldsType: Int64VarcharSparseVec, AutoID: false, EnableDynamicField: true, + ShardsNum: common.DefaultShards, Dim: common.DefaultDim, MaxLength: common.TestMaxLen} + + dp := DataParams{DoInsert: true, CollectionFieldsType: Int64VarcharSparseVec, start: 0, nb: common.DefaultNb * 4, + dim: common.DefaultDim, EnableDynamicField: true} + + // index params + idxHnsw, _ := entity.NewIndexHNSW(entity.L2, 8, 96) + ips := []IndexParams{ + {BuildIndex: true, Index: idx, FieldName: common.DefaultSparseVecFieldName, async: false}, + {BuildIndex: true, Index: idxHnsw, FieldName: common.DefaultFloatVecFieldName, async: false}, + } + collName := prepareCollection(ctx, t, mc, cp, WithDataParams(dp), WithIndexParams(ips), WithCreateOption(client.WithConsistencyLevel(entity.ClStrong))) + + // search + queryVec := common.GenSearchVectors(common.DefaultNq, common.DefaultDim, entity.FieldTypeSparseVector) + sp, _ := entity.NewIndexSparseInvertedSearchParam(0.2) + resSearch, errSearch := mc.Search(ctx, collName, []string{}, "", []string{"*"}, queryVec, common.DefaultSparseVecFieldName, + entity.IP, common.DefaultTopK, sp) + common.CheckErr(t, errSearch, true) + + pageSearch, errSearch := mc.Search(ctx, collName, []string{}, "", []string{"*"}, queryVec, common.DefaultSparseVecFieldName, + entity.IP, 5, sp, client.WithOffset(5)) + common.CheckErr(t, errSearch, true) + for i := 0; i < len(resSearch); i++ { + require.Equal(t, resSearch[i].IDs.(*entity.ColumnInt64).Data()[5:], pageSearch[i].IDs.(*entity.ColumnInt64).Data()) + } + } +} + +// test sparse vector unsupported search: range search, TODO iterator search +func TestSearchSparseVectorNotSupported(t *testing.T) { + // invalid sparse search params + for _, dropRatio := range []float64{1.2, -0.3, 1} { + _, err := entity.NewIndexSparseInvertedSearchParam(dropRatio) + common.CheckErr(t, err, false, fmt.Sprintf("invalid dropRatio for search: %v, must be in range [0, 1)", dropRatio)) + } + + ctx := createContext(t, time.Second*common.DefaultTimeout*2) + // connect + mc := createMilvusClient(ctx, t) + + // create -> insert [0, 3000) -> flush -> index -> load + cp := CollectionParams{CollectionFieldsType: Int64VarcharSparseVec, AutoID: false, EnableDynamicField: true, + ShardsNum: common.DefaultShards, Dim: common.DefaultDim, MaxLength: common.TestMaxLen} + + dp := DataParams{DoInsert: true, CollectionFieldsType: Int64VarcharSparseVec, start: 0, nb: common.DefaultNb * 2, + dim: common.DefaultDim, EnableDynamicField: true} + + // index params + idxHnsw, _ := entity.NewIndexHNSW(entity.L2, 8, 96) + idxWand := entity.NewGenericIndex(common.DefaultSparseVecFieldName, "SPARSE_WAND", map[string]string{"drop_ratio_build": "0.3", "metric_type": "IP"}) + ips := []IndexParams{ + {BuildIndex: true, Index: idxWand, FieldName: common.DefaultSparseVecFieldName, async: false}, + {BuildIndex: true, Index: idxHnsw, FieldName: common.DefaultFloatVecFieldName, async: false}, + } + collName := prepareCollection(ctx, t, mc, cp, WithDataParams(dp), WithIndexParams(ips), WithCreateOption(client.WithConsistencyLevel(entity.ClStrong))) + // range search + queryVec := common.GenSearchVectors(common.DefaultNq, common.DefaultDim, entity.FieldTypeSparseVector) + sp, _ := entity.NewIndexSparseInvertedSearchParam(0.3) + sp.AddRadius(10) + sp.AddRangeFilter(100) + _, errSearch := mc.Search(ctx, collName, []string{}, "", []string{"*"}, queryVec, common.DefaultSparseVecFieldName, + entity.IP, common.DefaultTopK, sp) + common.CheckErr(t, errSearch, false, "RangeSearch not supported for current index type") } // TODO offset and limit