From a1b5d7d6d31f660d157478a06dda0b6ba5d3481f Mon Sep 17 00:00:00 2001 From: pingliu Date: Mon, 20 May 2024 16:21:38 +0800 Subject: [PATCH 001/126] doc: [skip-e2e] change milvus docker image version to v2.4.1 (#33170) Signed-off-by: ping.liu --- scripts/standalone_embed.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/standalone_embed.sh b/scripts/standalone_embed.sh index 4284c9cfb9a77..647b73d37de0f 100755 --- a/scripts/standalone_embed.sh +++ b/scripts/standalone_embed.sh @@ -42,7 +42,7 @@ EOF --health-start-period=90s \ --health-timeout=20s \ --health-retries=3 \ - milvusdb/milvus:v2.4.0 \ + milvusdb/milvus:v2.4.1 \ milvus run standalone 1> /dev/null } From 7eeb120aab0b63ee36bf204bc0900445172ac4bd Mon Sep 17 00:00:00 2001 From: congqixia Date: Mon, 20 May 2024 20:47:38 +0800 Subject: [PATCH 002/126] enhance: Add lint rules for client pkg and fix problems (#33180) See also #31293 --------- Signed-off-by: Congqi Xia --- Makefile | 7 +- client/.golangci.yml | 172 +++++++++++++ client/client_config.go | 3 +- client/client_test.go | 2 +- client/column/columns.go | 8 +- client/column/sparse.go | 1 + client/column/sparse_test.go | 3 +- client/column/varchar.go | 5 +- client/database_test.go | 5 +- client/entity/sparse.go | 2 +- client/example/database/main.go | 52 ++++ client/example/playground/main.go | 120 +++++++-- client/go.mod | 1 + client/go.sum | 2 + client/index.go | 3 +- client/index_test.go | 7 +- client/interceptors.go | 2 +- client/interceptors_test.go | 8 +- client/maintenance_test.go | 7 +- client/partition.go | 3 +- client/partition_test.go | 5 +- client/read.go | 2 +- client/read_options.go | 1 + client/read_test.go | 7 +- client/ruleguard/rules.go | 409 ++++++++++++++++++++++++++++++ client/write_test.go | 7 +- 26 files changed, 793 insertions(+), 51 deletions(-) create mode 100644 client/.golangci.yml create mode 100644 client/ruleguard/rules.go diff --git a/Makefile b/Makefile index 24cf720f10e97..6ed5b46d19680 100644 --- a/Makefile +++ b/Makefile @@ -142,20 +142,25 @@ lint-fix: getdeps @$(INSTALL_PATH)/gofumpt -l -w internal/ @$(INSTALL_PATH)/gofumpt -l -w cmd/ @$(INSTALL_PATH)/gofumpt -l -w pkg/ + @$(INSTALL_PATH)/gofumpt -l -w client/ @$(INSTALL_PATH)/gofumpt -l -w tests/integration/ @echo "Running gci fix" @$(INSTALL_PATH)/gci write cmd/ --skip-generated -s standard -s default -s "prefix(github.com/milvus-io)" --custom-order @$(INSTALL_PATH)/gci write internal/ --skip-generated -s standard -s default -s "prefix(github.com/milvus-io)" --custom-order @$(INSTALL_PATH)/gci write pkg/ --skip-generated -s standard -s default -s "prefix(github.com/milvus-io)" --custom-order + @$(INSTALL_PATH)/gci write client/ --skip-generated -s standard -s default -s "prefix(github.com/milvus-io)" --custom-order @$(INSTALL_PATH)/gci write tests/ --skip-generated -s standard -s default -s "prefix(github.com/milvus-io)" --custom-order @echo "Running golangci-lint auto-fix" - @source $(PWD)/scripts/setenv.sh && GO111MODULE=on $(INSTALL_PATH)/golangci-lint run --fix --timeout=30m --config $(PWD)/.golangci.yml; cd pkg && GO111MODULE=on $(INSTALL_PATH)/golangci-lint run --fix --timeout=30m --config $(PWD)/.golangci.yml + @source $(PWD)/scripts/setenv.sh && GO111MODULE=on $(INSTALL_PATH)/golangci-lint run --fix --timeout=30m --config $(PWD)/.golangci.yml; + @source $(PWD)/scripts/setenv.sh && cd pkg && GO111MODULE=on $(INSTALL_PATH)/golangci-lint run --fix --timeout=30m --config $(PWD)/.golangci.yml + @source $(PWD)/scripts/setenv.sh && cd client && GO111MODULE=on $(INSTALL_PATH)/golangci-lint run --fix --timeout=30m --config $(PWD)/client/.golangci.yml #TODO: Check code specifications by golangci-lint static-check: getdeps @echo "Running $@ check" @source $(PWD)/scripts/setenv.sh && GO111MODULE=on $(INSTALL_PATH)/golangci-lint run --timeout=30m --config $(PWD)/.golangci.yml @source $(PWD)/scripts/setenv.sh && cd pkg && GO111MODULE=on $(INSTALL_PATH)/golangci-lint run --timeout=30m --config $(PWD)/.golangci.yml + @source $(PWD)/scripts/setenv.sh && cd client && GO111MODULE=on $(INSTALL_PATH)/golangci-lint run --timeout=30m --config $(PWD)/client/.golangci.yml verifiers: build-cpp getdeps cppcheck fmt static-check diff --git a/client/.golangci.yml b/client/.golangci.yml new file mode 100644 index 0000000000000..5c90b6d694fc3 --- /dev/null +++ b/client/.golangci.yml @@ -0,0 +1,172 @@ +run: + go: "1.20" + skip-dirs: + - build + - configs + - deployments + - docs + - scripts + - internal/core + - cmake_build + skip-files: + - partial_search_test.go + +linters: + disable-all: true + enable: + - gosimple + - govet + - ineffassign + - staticcheck + - decorder + - depguard + - gofmt + - goimports + - gosec + - revive + - unconvert + - misspell + - typecheck + - durationcheck + - forbidigo + - gci + - whitespace + - gofumpt + - gocritic + +linters-settings: + gci: + sections: + - standard + - default + - prefix(github.com/milvus-io) + custom-order: true + gofumpt: + lang-version: "1.18" + module-path: github.com/milvus-io + goimports: + local-prefixes: github.com/milvus-io + revive: + rules: + - name: unused-parameter + disabled: true + - name: var-naming + severity: warning + disabled: false + arguments: + - ["ID"] # Allow list + - name: context-as-argument + severity: warning + disabled: false + arguments: + - allowTypesBefore: "*testing.T" + - name: datarace + severity: warning + disabled: false + - name: duplicated-imports + severity: warning + disabled: false + - name: waitgroup-by-value + severity: warning + disabled: false + - name: indent-error-flow + severity: warning + disabled: false + arguments: + - "preserveScope" + - name: range-val-in-closure + severity: warning + disabled: false + - name: range-val-address + severity: warning + disabled: false + - name: string-of-int + severity: warning + disabled: false + misspell: + locale: US + gocritic: + enabled-checks: + - ruleguard + settings: + ruleguard: + failOnError: true + rules: "ruleguard/rules.go" + depguard: + rules: + main: + deny: + - pkg: "errors" + desc: not allowed, use github.com/cockroachdb/errors + - pkg: "github.com/pkg/errors" + desc: not allowed, use github.com/cockroachdb/errors + - pkg: "github.com/pingcap/errors" + desc: not allowed, use github.com/cockroachdb/errors + - pkg: "golang.org/x/xerrors" + desc: not allowed, use github.com/cockroachdb/errors + - pkg: "github.com/go-errors/errors" + desc: not allowed, use github.com/cockroachdb/errors + - pkg: "io/ioutil" + desc: ioutil is deprecated after 1.16, 1.17, use os and io package instead + - pkg: "github.com/tikv/client-go/rawkv" + desc: not allowed, use github.com/tikv/client-go/v2/txnkv + - pkg: "github.com/tikv/client-go/v2/rawkv" + desc: not allowed, use github.com/tikv/client-go/v2/txnkv + forbidigo: + forbid: + - '^time\.Tick$' + - 'return merr\.Err[a-zA-Z]+' + - 'merr\.Wrap\w+\(\)\.Error\(\)' + - '\.(ErrorCode|Reason) = ' + - 'Reason:\s+\w+\.Error\(\)' + - 'errors.New\((.+)\.GetReason\(\)\)' + - 'commonpb\.Status\{[\s\n]*ErrorCode:[\s\n]*.+[\s\S\n]*?\}' + - 'os\.Open\(.+\)' + - 'os\.ReadFile\(.+\)' + - 'os\.WriteFile\(.+\)' + - "runtime.NumCPU" + - "runtime.GOMAXPROCS(0)" + #- 'fmt\.Print.*' WIP + +issues: + exclude-use-default: false + exclude-rules: + - path: .+_test\.go + linters: + - forbidigo + exclude: + - should have a package comment + - should have comment + - should be of the form + - should not use dot imports + - which can be annoying to use + # Binds to all network interfaces + - G102 + # Use of unsafe calls should be audited + - G103 + # Errors unhandled + - G104 + # file/folder Permission + - G301 + - G302 + # Potential file inclusion via variable + - G304 + # Deferring unsafe method like *os.File Close + - G307 + # TLS MinVersion too low + - G402 + # Use of weak random number generator math/rand + - G404 + # Unused parameters + - SA1019 + # defer return errors + - SA5001 + + # Maximum issues count per one linter. Set to 0 to disable. Default is 50. + max-issues-per-linter: 0 + # Maximum count of issues with the same text. Set to 0 to disable. Default is 3. + max-same-issues: 0 + +service: + # use the fixed version to not introduce new linters unexpectedly + golangci-lint-version: 1.55.2 diff --git a/client/client_config.go b/client/client_config.go index 63a4f6d2b8565..01f82877f7967 100644 --- a/client/client_config.go +++ b/client/client_config.go @@ -10,10 +10,11 @@ import ( "time" "github.com/cockroachdb/errors" - "github.com/milvus-io/milvus/pkg/util/crypto" "google.golang.org/grpc" "google.golang.org/grpc/backoff" "google.golang.org/grpc/keepalive" + + "github.com/milvus-io/milvus/pkg/util/crypto" ) const ( diff --git a/client/client_test.go b/client/client_test.go index f23a9d9941d83..c6d0867ee8af3 100644 --- a/client/client_test.go +++ b/client/client_test.go @@ -31,7 +31,7 @@ func (s *ClientSuite) TestNewClient() { s.NotNil(c) }) - s.Run("emtpy_addr", func() { + s.Run("empty_addr", func() { _, err := New(ctx, &ClientConfig{}) s.Error(err) s.T().Log(err) diff --git a/client/column/columns.go b/client/column/columns.go index 8a2a52d87941f..a30b064e15235 100644 --- a/client/column/columns.go +++ b/client/column/columns.go @@ -239,7 +239,7 @@ func FieldDataColumn(fd *schemapb.FieldData, begin, end int) (Column, error) { data := x.FloatVector.GetData() dim := int(vectors.GetDim()) if end < 0 { - end = int(len(data) / dim) + end = len(data) / dim } vector := make([][]float32, 0, end-begin) // shall not have remanunt for i := begin; i < end; i++ { @@ -262,7 +262,7 @@ func FieldDataColumn(fd *schemapb.FieldData, begin, end int) (Column, error) { dim := int(vectors.GetDim()) blen := dim / 8 if end < 0 { - end = int(len(data) / blen) + end = len(data) / blen } vector := make([][]byte, 0, end-begin) for i := begin; i < end; i++ { @@ -281,7 +281,7 @@ func FieldDataColumn(fd *schemapb.FieldData, begin, end int) (Column, error) { data := x.Float16Vector dim := int(vectors.GetDim()) if end < 0 { - end = int(len(data) / dim) + end = len(data) / dim } vector := make([][]byte, 0, end-begin) for i := begin; i < end; i++ { @@ -300,7 +300,7 @@ func FieldDataColumn(fd *schemapb.FieldData, begin, end int) (Column, error) { data := x.Bfloat16Vector dim := int(vectors.GetDim()) if end < 0 { - end = int(len(data) / dim) + end = len(data) / dim } vector := make([][]byte, 0, end-begin) // shall not have remanunt for i := begin; i < end; i++ { diff --git a/client/column/sparse.go b/client/column/sparse.go index b9d20fd616ded..cc02e3ee2ffe2 100644 --- a/client/column/sparse.go +++ b/client/column/sparse.go @@ -22,6 +22,7 @@ import ( "math" "github.com/cockroachdb/errors" + "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" "github.com/milvus-io/milvus/client/v2/entity" ) diff --git a/client/column/sparse_test.go b/client/column/sparse_test.go index 387df9efe7d7c..564f223ff1532 100644 --- a/client/column/sparse_test.go +++ b/client/column/sparse_test.go @@ -21,9 +21,10 @@ import ( "math/rand" "testing" - "github.com/milvus-io/milvus/client/v2/entity" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + + "github.com/milvus-io/milvus/client/v2/entity" ) func TestColumnSparseEmbedding(t *testing.T) { diff --git a/client/column/varchar.go b/client/column/varchar.go index 9ed1646450189..63aff96ae94c8 100644 --- a/client/column/varchar.go +++ b/client/column/varchar.go @@ -17,9 +17,10 @@ package column import ( - "errors" "fmt" + "github.com/cockroachdb/errors" + "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" "github.com/milvus-io/milvus/client/v2/entity" ) @@ -70,7 +71,7 @@ func (c *ColumnVarChar) FieldData() *schemapb.FieldData { } data := make([]string, 0, c.Len()) for i := 0; i < c.Len(); i++ { - data = append(data, string(c.values[i])) + data = append(data, c.values[i]) } fd.Field = &schemapb.FieldData_Scalars{ Scalars: &schemapb.ScalarField{ diff --git a/client/database_test.go b/client/database_test.go index f46a0cafb8b7b..d7555d7d5aa44 100644 --- a/client/database_test.go +++ b/client/database_test.go @@ -5,11 +5,12 @@ import ( "fmt" "testing" + mock "github.com/stretchr/testify/mock" + "github.com/stretchr/testify/suite" + "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" "github.com/milvus-io/milvus-proto/go-api/v2/milvuspb" "github.com/milvus-io/milvus/pkg/util/merr" - mock "github.com/stretchr/testify/mock" - "github.com/stretchr/testify/suite" ) type DatabaseSuite struct { diff --git a/client/entity/sparse.go b/client/entity/sparse.go index 00f41c60d355e..2bded8f6e8f2b 100644 --- a/client/entity/sparse.go +++ b/client/entity/sparse.go @@ -56,7 +56,7 @@ func (e sliceSparseEmbedding) FieldType() FieldType { } func (e sliceSparseEmbedding) Get(idx int) (uint32, float32, bool) { - if idx < 0 || idx >= int(e.len) { + if idx < 0 || idx >= e.len { return 0, 0, false } return e.positions[idx], e.values[idx], true diff --git a/client/example/database/main.go b/client/example/database/main.go index 5b978b6261549..0069923d9a2c6 100644 --- a/client/example/database/main.go +++ b/client/example/database/main.go @@ -5,6 +5,7 @@ import ( "log" milvusclient "github.com/milvus-io/milvus/client/v2" + "github.com/milvus-io/milvus/client/v2/entity" ) const ( @@ -34,4 +35,55 @@ func main() { log.Fatal("failed to list databases", err.Error()) } log.Println("=== Databases: ", dbNames) + + schema := entity.NewSchema().WithName("hello_milvus"). + WithField(entity.NewField().WithName("ID").WithDataType(entity.FieldTypeInt64).WithIsPrimaryKey(true)). + WithField(entity.NewField().WithName("Vector").WithDataType(entity.FieldTypeFloatVector).WithDim(128)) + + if err := c.CreateCollection(ctx, milvusclient.NewCreateCollectionOption("hello_milvus", schema)); err != nil { + log.Fatal("failed to create collection:", err.Error()) + } + + collections, err := c.ListCollections(ctx, milvusclient.NewListCollectionOption()) + if err != nil { + log.Fatal("failed to list collections,", err.Error()) + } + + for _, collectionName := range collections { + collection, err := c.DescribeCollection(ctx, milvusclient.NewDescribeCollectionOption(collectionName)) + if err != nil { + log.Fatal(err.Error()) + } + log.Println(collection.Name) + for _, field := range collection.Schema.Fields { + log.Println("=== Field: ", field.Name, field.DataType, field.AutoID) + } + } + + c.CreateDatabase(ctx, milvusclient.NewCreateDatabaseOption("test")) + c.UsingDatabase(ctx, milvusclient.NewUsingDatabaseOption("test")) + + schema = entity.NewSchema().WithName("hello_milvus"). + WithField(entity.NewField().WithName("ID").WithDataType(entity.FieldTypeVarChar).WithMaxLength(64).WithIsPrimaryKey(true)). + WithField(entity.NewField().WithName("Vector").WithDataType(entity.FieldTypeFloatVector).WithDim(128)) + + if err := c.CreateCollection(ctx, milvusclient.NewCreateCollectionOption("hello_milvus", schema)); err != nil { + log.Fatal("failed to create collection:", err.Error()) + } + + collections, err = c.ListCollections(ctx, milvusclient.NewListCollectionOption()) + if err != nil { + log.Fatal("failed to list collections,", err.Error()) + } + + for _, collectionName := range collections { + collection, err := c.DescribeCollection(ctx, milvusclient.NewDescribeCollectionOption(collectionName)) + if err != nil { + log.Fatal(err.Error()) + } + log.Println(collection.Name) + for _, field := range collection.Schema.Fields { + log.Println("=== Field: ", field.Name, field.DataType, field.AutoID) + } + } } diff --git a/client/example/playground/main.go b/client/example/playground/main.go index e5984648cf71c..43ae57915cfd2 100644 --- a/client/example/playground/main.go +++ b/client/example/playground/main.go @@ -18,6 +18,7 @@ const ( helloMilvusCmd = `hello_milvus` partitionsCmd = `partitions` indexCmd = `indexes` + countCmd = `count` milvusAddr = `localhost:19530` nEntities, dim = 3000, 128 @@ -38,9 +39,109 @@ func main() { Partitions() case indexCmd: Indexes() + case countCmd: + Count() } } +func Count() { + ctx := context.Background() + + collectionName := "hello_count_inverted" + + c, err := milvusclient.New(ctx, &milvusclient.ClientConfig{ + Address: "127.0.0.1:19530", + }) + if err != nil { + log.Fatal("failed to connect to milvus, err: ", err.Error()) + } + + schema := entity.NewSchema().WithName(collectionName). + WithField(entity.NewField().WithName("id").WithDataType(entity.FieldTypeInt64).WithIsAutoID(true).WithIsPrimaryKey(true)). + WithField(entity.NewField().WithName("vector").WithDataType(entity.FieldTypeFloatVector).WithDim(128)) + + err = c.CreateCollection(ctx, milvusclient.NewCreateCollectionOption(collectionName, schema)) + if err != nil { + log.Fatal("failed to connect to milvus, err: ", err.Error()) + } + + indexTask, err := c.CreateIndex(ctx, milvusclient.NewCreateIndexOption(collectionName, "id", index.NewGenericIndex("inverted", map[string]string{}))) + if err != nil { + log.Fatal("failed to connect to milvus, err: ", err.Error()) + } + + indexTask.Await(ctx) + + indexTask, err = c.CreateIndex(ctx, milvusclient.NewCreateIndexOption(collectionName, "vector", index.NewHNSWIndex(entity.L2, 16, 32))) + if err != nil { + log.Fatal("failed to connect to milvus, err: ", err.Error()) + } + + indexTask.Await(ctx) + + loadTask, err := c.LoadCollection(ctx, milvusclient.NewLoadCollectionOption(collectionName)) + if err != nil { + log.Fatal("faied to load collection, err: ", err.Error()) + } + loadTask.Await(ctx) + + for i := 0; i < 100; i++ { + // randomData := make([]int64, 0, nEntities) + vectorData := make([][]float32, 0, nEntities) + // generate data + for i := 0; i < nEntities; i++ { + // randomData = append(randomData, rand.Int63n(1000)) + vec := make([]float32, 0, dim) + for j := 0; j < dim; j++ { + vec = append(vec, rand.Float32()) + } + vectorData = append(vectorData, vec) + } + + _, err = c.Insert(ctx, milvusclient.NewColumnBasedInsertOption(collectionName).WithFloatVectorColumn("vector", dim, vectorData)) + if err != nil { + log.Fatal("failed to insert data") + } + + log.Println("start flush collection") + flushTask, err := c.Flush(ctx, milvusclient.NewFlushOption(collectionName)) + if err != nil { + log.Fatal("failed to flush", err.Error()) + } + start := time.Now() + err = flushTask.Await(ctx) + if err != nil { + log.Fatal("failed to flush", err.Error()) + } + log.Println("flush done, elapsed", time.Since(start)) + + result, err := c.Query(ctx, milvusclient.NewQueryOption(collectionName). + WithOutputFields([]string{"count(*)"}). + WithConsistencyLevel(entity.ClStrong)) + if err != nil { + log.Fatal("failed to connect to milvus, err: ", err.Error()) + } + for _, rs := range result.Fields { + log.Println(rs) + } + result, err = c.Query(ctx, milvusclient.NewQueryOption(collectionName). + WithOutputFields([]string{"count(*)"}). + WithFilter("id > 0"). + WithConsistencyLevel(entity.ClStrong)) + if err != nil { + log.Fatal("failed to connect to milvus, err: ", err.Error()) + } + for _, rs := range result.Fields { + log.Println(rs) + } + } + + // err = c.DropCollection(ctx, milvusclient.NewDropCollectionOption(collectionName)) + // if err != nil { + // log.Fatal("=== Failed to drop collection", err.Error()) + // } +} + func HelloMilvus() { ctx := context.Background() @@ -92,7 +193,7 @@ func HelloMilvus() { vectorData = append(vectorData, vec) } - err = c.Insert(ctx, milvusclient.NewColumnBasedInsertOption(collectionName).WithFloatVectorColumn("vector", dim, vectorData)) + _, err = c.Insert(ctx, milvusclient.NewColumnBasedInsertOption(collectionName).WithFloatVectorColumn("vector", dim, vectorData)) if err != nil { log.Fatal("failed to insert data") } @@ -107,22 +208,7 @@ func HelloMilvus() { if err != nil { log.Fatal("failed to flush", err.Error()) } - log.Println("flush done, elasped", time.Since(start)) - - indexTask, err := c.CreateIndex(ctx, milvusclient.NewCreateIndexOption(collectionName, "vector", index.NewHNSWIndex(entity.L2, 16, 100))) - if err != nil { - log.Fatal("failed to create index, err: ", err.Error()) - } - err = indexTask.Await(ctx) - if err != nil { - log.Fatal("failed to wait index construction complete") - } - - loadTask, err := c.LoadCollection(ctx, milvusclient.NewLoadCollectionOption(collectionName)) - if err != nil { - log.Fatal("failed to load collection", err.Error()) - } - loadTask.Await(ctx) + log.Println("flush done, elapsed", time.Since(start)) vec2search := []entity.Vector{ entity.FloatVector(vectorData[len(vectorData)-2]), diff --git a/client/go.mod b/client/go.mod index c0f6882c3d768..79dce6b878164 100644 --- a/client/go.mod +++ b/client/go.mod @@ -10,6 +10,7 @@ require ( github.com/grpc-ecosystem/go-grpc-middleware v1.3.0 github.com/milvus-io/milvus-proto/go-api/v2 v2.3.4-0.20240430035521-259ae1d10016 github.com/milvus-io/milvus/pkg v0.0.2-0.20240317152703-17b4938985f3 + github.com/quasilyte/go-ruleguard/dsl v0.3.22 github.com/samber/lo v1.27.0 github.com/stretchr/testify v1.8.4 github.com/tidwall/gjson v1.17.1 diff --git a/client/go.sum b/client/go.sum index 1efeee2111774..44e4615201642 100644 --- a/client/go.sum +++ b/client/go.sum @@ -476,6 +476,8 @@ github.com/prometheus/procfs v0.6.0/go.mod h1:cz+aTbrPOrUb4q7XlbU9ygM+/jj0fzG6c1 github.com/prometheus/procfs v0.9.0 h1:wzCHvIvM5SxWqYvwgVL7yJY8Lz3PKn49KQtpgMYJfhI= github.com/prometheus/procfs v0.9.0/go.mod h1:+pB4zwohETzFnmlpe6yd2lSc+0/46IYZRB/chUwxUZY= github.com/prometheus/tsdb v0.7.1/go.mod h1:qhTCs0VvXwvX/y3TZrWD7rabWM+ijKTux40TwIPHuXU= +github.com/quasilyte/go-ruleguard/dsl v0.3.22 h1:wd8zkOhSNr+I+8Qeciml08ivDt1pSXe60+5DqOpCjPE= +github.com/quasilyte/go-ruleguard/dsl v0.3.22/go.mod h1:KeCP03KrjuSO0H1kTuZQCWlQPulDV6YMIXmpQss17rU= github.com/rogpeppe/fastuuid v0.0.0-20150106093220-6724a57986af/go.mod h1:XWv6SoW27p1b0cqNHllgS5HIMJraePCO15w5zCzIWYg= github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ= github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= diff --git a/client/index.go b/client/index.go index 79dd57ed3e9c6..79320484632e7 100644 --- a/client/index.go +++ b/client/index.go @@ -21,12 +21,13 @@ import ( "fmt" "time" + "google.golang.org/grpc" + "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" "github.com/milvus-io/milvus-proto/go-api/v2/milvuspb" "github.com/milvus-io/milvus/client/v2/entity" "github.com/milvus-io/milvus/client/v2/index" "github.com/milvus-io/milvus/pkg/util/merr" - "google.golang.org/grpc" ) type CreateIndexTask struct { diff --git a/client/index_test.go b/client/index_test.go index ac9f5e40699e5..920457f9a2160 100644 --- a/client/index_test.go +++ b/client/index_test.go @@ -22,14 +22,15 @@ import ( "testing" "time" + "github.com/stretchr/testify/mock" + "github.com/stretchr/testify/suite" + "go.uber.org/atomic" + "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" "github.com/milvus-io/milvus-proto/go-api/v2/milvuspb" "github.com/milvus-io/milvus/client/v2/entity" "github.com/milvus-io/milvus/client/v2/index" "github.com/milvus-io/milvus/pkg/util/merr" - "github.com/stretchr/testify/mock" - "github.com/stretchr/testify/suite" - "go.uber.org/atomic" ) type IndexSuite struct { diff --git a/client/interceptors.go b/client/interceptors.go index 16396c4aed7f9..6756a74895825 100644 --- a/client/interceptors.go +++ b/client/interceptors.go @@ -20,12 +20,12 @@ import ( "context" "time" + grpc_retry "github.com/grpc-ecosystem/go-grpc-middleware/retry" "google.golang.org/grpc" "google.golang.org/grpc/codes" "google.golang.org/grpc/metadata" "google.golang.org/grpc/status" - grpc_retry "github.com/grpc-ecosystem/go-grpc-middleware/retry" "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" "github.com/milvus-io/milvus-proto/go-api/v2/milvuspb" ) diff --git a/client/interceptors_test.go b/client/interceptors_test.go index e3bcb34fcea66..648575dbd42ed 100644 --- a/client/interceptors_test.go +++ b/client/interceptors_test.go @@ -28,9 +28,11 @@ import ( "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" ) -var mockInvokerError error -var mockInvokerReply interface{} -var mockInvokeTimes = 0 +var ( + mockInvokerError error + mockInvokerReply interface{} + mockInvokeTimes = 0 +) var mockInvoker grpc.UnaryInvoker = func(ctx context.Context, method string, req, reply interface{}, cc *grpc.ClientConn, opts ...grpc.CallOption) error { mockInvokeTimes++ diff --git a/client/maintenance_test.go b/client/maintenance_test.go index 333146f8ca4c9..0efcd449dfc41 100644 --- a/client/maintenance_test.go +++ b/client/maintenance_test.go @@ -22,13 +22,14 @@ import ( "testing" "time" + "github.com/stretchr/testify/mock" + "github.com/stretchr/testify/suite" + "go.uber.org/atomic" + "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" "github.com/milvus-io/milvus-proto/go-api/v2/milvuspb" "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" "github.com/milvus-io/milvus/pkg/util/merr" - "github.com/stretchr/testify/mock" - "github.com/stretchr/testify/suite" - "go.uber.org/atomic" ) type MaintenanceSuite struct { diff --git a/client/partition.go b/client/partition.go index 93036b2300dc8..18483687175b4 100644 --- a/client/partition.go +++ b/client/partition.go @@ -19,9 +19,10 @@ package client import ( "context" + "google.golang.org/grpc" + "github.com/milvus-io/milvus-proto/go-api/v2/milvuspb" "github.com/milvus-io/milvus/pkg/util/merr" - "google.golang.org/grpc" ) // CreatePartition is the API for creating a partition for a collection. diff --git a/client/partition_test.go b/client/partition_test.go index 2c6c4e2ed82c4..7bd7cd74360b0 100644 --- a/client/partition_test.go +++ b/client/partition_test.go @@ -21,11 +21,12 @@ import ( "fmt" "testing" + "github.com/stretchr/testify/mock" + "github.com/stretchr/testify/suite" + "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" "github.com/milvus-io/milvus-proto/go-api/v2/milvuspb" "github.com/milvus-io/milvus/pkg/util/merr" - "github.com/stretchr/testify/mock" - "github.com/stretchr/testify/suite" ) type PartitionSuite struct { diff --git a/client/read.go b/client/read.go index 3aeaff769d31b..1907ed8e07fa4 100644 --- a/client/read.go +++ b/client/read.go @@ -19,9 +19,9 @@ package client import ( "context" + "github.com/cockroachdb/errors" "google.golang.org/grpc" - "github.com/cockroachdb/errors" "github.com/milvus-io/milvus-proto/go-api/v2/milvuspb" "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" "github.com/milvus-io/milvus/client/v2/column" diff --git a/client/read_options.go b/client/read_options.go index a1f563bfc0642..2bdaf78a553eb 100644 --- a/client/read_options.go +++ b/client/read_options.go @@ -21,6 +21,7 @@ import ( "strconv" "github.com/golang/protobuf/proto" + "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" "github.com/milvus-io/milvus-proto/go-api/v2/milvuspb" "github.com/milvus-io/milvus/client/v2/entity" diff --git a/client/read_test.go b/client/read_test.go index 6606226d1bb76..0e815a0563382 100644 --- a/client/read_test.go +++ b/client/read_test.go @@ -6,13 +6,14 @@ import ( "math/rand" "testing" + "github.com/samber/lo" + "github.com/stretchr/testify/mock" + "github.com/stretchr/testify/suite" + "github.com/milvus-io/milvus-proto/go-api/v2/milvuspb" "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" "github.com/milvus-io/milvus/client/v2/entity" "github.com/milvus-io/milvus/pkg/util/merr" - "github.com/samber/lo" - "github.com/stretchr/testify/mock" - "github.com/stretchr/testify/suite" ) type ReadSuite struct { diff --git a/client/ruleguard/rules.go b/client/ruleguard/rules.go new file mode 100644 index 0000000000000..5bc3422c9b450 --- /dev/null +++ b/client/ruleguard/rules.go @@ -0,0 +1,409 @@ +// Licensed to the LF AI & Data foundation under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package gorules + +import ( + "github.com/quasilyte/go-ruleguard/dsl" +) + +// This is a collection of rules for ruleguard: https://github.com/quasilyte/go-ruleguard + +// Remove extra conversions: mdempsky/unconvert +func unconvert(m dsl.Matcher) { + m.Match("int($x)").Where(m["x"].Type.Is("int") && !m["x"].Const).Report("unnecessary conversion").Suggest("$x") + + m.Match("float32($x)").Where(m["x"].Type.Is("float32") && !m["x"].Const).Report("unnecessary conversion").Suggest("$x") + m.Match("float64($x)").Where(m["x"].Type.Is("float64") && !m["x"].Const).Report("unnecessary conversion").Suggest("$x") + + // m.Match("byte($x)").Where(m["x"].Type.Is("byte")).Report("unnecessary conversion").Suggest("$x") + // m.Match("rune($x)").Where(m["x"].Type.Is("rune")).Report("unnecessary conversion").Suggest("$x") + m.Match("bool($x)").Where(m["x"].Type.Is("bool") && !m["x"].Const).Report("unnecessary conversion").Suggest("$x") + + m.Match("int8($x)").Where(m["x"].Type.Is("int8") && !m["x"].Const).Report("unnecessary conversion").Suggest("$x") + m.Match("int16($x)").Where(m["x"].Type.Is("int16") && !m["x"].Const).Report("unnecessary conversion").Suggest("$x") + m.Match("int32($x)").Where(m["x"].Type.Is("int32") && !m["x"].Const).Report("unnecessary conversion").Suggest("$x") + m.Match("int64($x)").Where(m["x"].Type.Is("int64") && !m["x"].Const).Report("unnecessary conversion").Suggest("$x") + + m.Match("uint8($x)").Where(m["x"].Type.Is("uint8") && !m["x"].Const).Report("unnecessary conversion").Suggest("$x") + m.Match("uint16($x)").Where(m["x"].Type.Is("uint16") && !m["x"].Const).Report("unnecessary conversion").Suggest("$x") + m.Match("uint32($x)").Where(m["x"].Type.Is("uint32") && !m["x"].Const).Report("unnecessary conversion").Suggest("$x") + m.Match("uint64($x)").Where(m["x"].Type.Is("uint64") && !m["x"].Const).Report("unnecessary conversion").Suggest("$x") + + m.Match("time.Duration($x)").Where(m["x"].Type.Is("time.Duration") && !m["x"].Text.Matches("^[0-9]*$")).Report("unnecessary conversion").Suggest("$x") +} + +// Don't use == or != with time.Time +// https://github.com/dominikh/go-tools/issues/47 : Wontfix +func timeeq(m dsl.Matcher) { + m.Match("$t0 == $t1").Where(m["t0"].Type.Is("time.Time")).Report("using == with time.Time") + m.Match("$t0 != $t1").Where(m["t0"].Type.Is("time.Time")).Report("using != with time.Time") + m.Match(`map[$k]$v`).Where(m["k"].Type.Is("time.Time")).Report("map with time.Time keys are easy to misuse") +} + +// err but no an error +func errnoterror(m dsl.Matcher) { + // Would be easier to check for all err identifiers instead, but then how do we get the type from m[] ? + + m.Match( + "if $*_, err := $x; $err != nil { $*_ } else if $_ { $*_ }", + "if $*_, err := $x; $err != nil { $*_ } else { $*_ }", + "if $*_, err := $x; $err != nil { $*_ }", + + "if $*_, err = $x; $err != nil { $*_ } else if $_ { $*_ }", + "if $*_, err = $x; $err != nil { $*_ } else { $*_ }", + "if $*_, err = $x; $err != nil { $*_ }", + + "$*_, err := $x; if $err != nil { $*_ } else if $_ { $*_ }", + "$*_, err := $x; if $err != nil { $*_ } else { $*_ }", + "$*_, err := $x; if $err != nil { $*_ }", + + "$*_, err = $x; if $err != nil { $*_ } else if $_ { $*_ }", + "$*_, err = $x; if $err != nil { $*_ } else { $*_ }", + "$*_, err = $x; if $err != nil { $*_ }", + ). + Where(m["err"].Text == "err" && !m["err"].Type.Is("error") && m["x"].Text != "recover()"). + Report("err variable not error type") +} + +// Identical if and else bodies +func ifbodythenbody(m dsl.Matcher) { + m.Match("if $*_ { $body } else { $body }"). + Report("identical if and else bodies") + + // Lots of false positives. + // m.Match("if $*_ { $body } else if $*_ { $body }"). + // Report("identical if and else bodies") +} + +// Odd inequality: A - B < 0 instead of != +// Too many false positives. +/* +func subtractnoteq(m dsl.Matcher) { + m.Match("$a - $b < 0").Report("consider $a != $b") + m.Match("$a - $b > 0").Report("consider $a != $b") + m.Match("0 < $a - $b").Report("consider $a != $b") + m.Match("0 > $a - $b").Report("consider $a != $b") +} +*/ + +// Self-assignment +func selfassign(m dsl.Matcher) { + m.Match("$x = $x").Report("useless self-assignment") +} + +// Odd nested ifs +func oddnestedif(m dsl.Matcher) { + m.Match("if $x { if $x { $*_ }; $*_ }", + "if $x == $y { if $x != $y {$*_ }; $*_ }", + "if $x != $y { if $x == $y {$*_ }; $*_ }", + "if $x { if !$x { $*_ }; $*_ }", + "if !$x { if $x { $*_ }; $*_ }"). + Report("odd nested ifs") + + m.Match("for $x { if $x { $*_ }; $*_ }", + "for $x == $y { if $x != $y {$*_ }; $*_ }", + "for $x != $y { if $x == $y {$*_ }; $*_ }", + "for $x { if !$x { $*_ }; $*_ }", + "for !$x { if $x { $*_ }; $*_ }"). + Report("odd nested for/ifs") +} + +// odd bitwise expressions +func oddbitwise(m dsl.Matcher) { + m.Match("$x | $x", + "$x | ^$x", + "^$x | $x"). + Report("odd bitwise OR") + + m.Match("$x & $x", + "$x & ^$x", + "^$x & $x"). + Report("odd bitwise AND") + + m.Match("$x &^ $x"). + Report("odd bitwise AND-NOT") +} + +// odd sequence of if tests with return +func ifreturn(m dsl.Matcher) { + m.Match("if $x { return $*_ }; if $x {$*_ }").Report("odd sequence of if test") + m.Match("if $x { return $*_ }; if !$x {$*_ }").Report("odd sequence of if test") + m.Match("if !$x { return $*_ }; if $x {$*_ }").Report("odd sequence of if test") + m.Match("if $x == $y { return $*_ }; if $x != $y {$*_ }").Report("odd sequence of if test") + m.Match("if $x != $y { return $*_ }; if $x == $y {$*_ }").Report("odd sequence of if test") +} + +func oddifsequence(m dsl.Matcher) { + /* + m.Match("if $x { $*_ }; if $x {$*_ }").Report("odd sequence of if test") + + m.Match("if $x == $y { $*_ }; if $y == $x {$*_ }").Report("odd sequence of if tests") + m.Match("if $x != $y { $*_ }; if $y != $x {$*_ }").Report("odd sequence of if tests") + + m.Match("if $x < $y { $*_ }; if $y > $x {$*_ }").Report("odd sequence of if tests") + m.Match("if $x <= $y { $*_ }; if $y >= $x {$*_ }").Report("odd sequence of if tests") + + m.Match("if $x > $y { $*_ }; if $y < $x {$*_ }").Report("odd sequence of if tests") + m.Match("if $x >= $y { $*_ }; if $y <= $x {$*_ }").Report("odd sequence of if tests") + */ +} + +// odd sequence of nested if tests +func nestedifsequence(m dsl.Matcher) { + /* + m.Match("if $x < $y { if $x >= $y {$*_ }; $*_ }").Report("odd sequence of nested if tests") + m.Match("if $x <= $y { if $x > $y {$*_ }; $*_ }").Report("odd sequence of nested if tests") + m.Match("if $x > $y { if $x <= $y {$*_ }; $*_ }").Report("odd sequence of nested if tests") + m.Match("if $x >= $y { if $x < $y {$*_ }; $*_ }").Report("odd sequence of nested if tests") + */ +} + +// odd sequence of assignments +func identicalassignments(m dsl.Matcher) { + m.Match("$x = $y; $y = $x").Report("odd sequence of assignments") +} + +func oddcompoundop(m dsl.Matcher) { + m.Match("$x += $x + $_", + "$x += $x - $_"). + Report("odd += expression") + + m.Match("$x -= $x + $_", + "$x -= $x - $_"). + Report("odd -= expression") +} + +func constswitch(m dsl.Matcher) { + m.Match("switch $x { $*_ }", "switch $*_; $x { $*_ }"). + Where(m["x"].Const && !m["x"].Text.Matches(`^runtime\.`)). + Report("constant switch") +} + +func oddcomparisons(m dsl.Matcher) { + m.Match( + "$x - $y == 0", + "$x - $y != 0", + "$x - $y < 0", + "$x - $y <= 0", + "$x - $y > 0", + "$x - $y >= 0", + "$x ^ $y == 0", + "$x ^ $y != 0", + ).Report("odd comparison") +} + +func oddmathbits(m dsl.Matcher) { + m.Match( + "64 - bits.LeadingZeros64($x)", + "32 - bits.LeadingZeros32($x)", + "16 - bits.LeadingZeros16($x)", + "8 - bits.LeadingZeros8($x)", + ).Report("odd math/bits expression: use bits.Len*() instead?") +} + +// func floateq(m dsl.Matcher) { +// m.Match( +// "$x == $y", +// "$x != $y", +// ). +// Where(m["x"].Type.Is("float32") && !m["x"].Const && !m["y"].Text.Matches("0(.0+)?") && !m.File().Name.Matches("floating_comparision.go")). +// Report("floating point tested for equality") + +// m.Match( +// "$x == $y", +// "$x != $y", +// ). +// Where(m["x"].Type.Is("float64") && !m["x"].Const && !m["y"].Text.Matches("0(.0+)?") && !m.File().Name.Matches("floating_comparision.go")). +// Report("floating point tested for equality") + +// m.Match("switch $x { $*_ }", "switch $*_; $x { $*_ }"). +// Where(m["x"].Type.Is("float32")). +// Report("floating point as switch expression") + +// m.Match("switch $x { $*_ }", "switch $*_; $x { $*_ }"). +// Where(m["x"].Type.Is("float64")). +// Report("floating point as switch expression") + +// } + +func badexponent(m dsl.Matcher) { + m.Match( + "2 ^ $x", + "10 ^ $x", + ). + Report("caret (^) is not exponentiation") +} + +func floatloop(m dsl.Matcher) { + m.Match( + "for $i := $x; $i < $y; $i += $z { $*_ }", + "for $i = $x; $i < $y; $i += $z { $*_ }", + ). + Where(m["i"].Type.Is("float64")). + Report("floating point for loop counter") + + m.Match( + "for $i := $x; $i < $y; $i += $z { $*_ }", + "for $i = $x; $i < $y; $i += $z { $*_ }", + ). + Where(m["i"].Type.Is("float32")). + Report("floating point for loop counter") +} + +func urlredacted(m dsl.Matcher) { + m.Match( + "log.Println($x, $*_)", + "log.Println($*_, $x, $*_)", + "log.Println($*_, $x)", + "log.Printf($*_, $x, $*_)", + "log.Printf($*_, $x)", + + "log.Println($x, $*_)", + "log.Println($*_, $x, $*_)", + "log.Println($*_, $x)", + "log.Printf($*_, $x, $*_)", + "log.Printf($*_, $x)", + ). + Where(m["x"].Type.Is("*url.URL")). + Report("consider $x.Redacted() when outputting URLs") +} + +func sprinterr(m dsl.Matcher) { + m.Match(`fmt.Sprint($err)`, + `fmt.Sprintf("%s", $err)`, + `fmt.Sprintf("%v", $err)`, + ). + Where(m["err"].Type.Is("error")). + Report("maybe call $err.Error() instead of fmt.Sprint()?") +} + +// disable this check, because it can not apply to generic type +//func largeloopcopy(m dsl.Matcher) { +// m.Match( +// `for $_, $v := range $_ { $*_ }`, +// ). +// Where(m["v"].Type.Size > 1024). +// Report(`loop copies large value each iteration`) +//} + +func joinpath(m dsl.Matcher) { + m.Match( + `strings.Join($_, "/")`, + `strings.Join($_, "\\")`, + "strings.Join($_, `\\`)", + ). + Report(`did you mean path.Join() or filepath.Join() ?`) +} + +func readfull(m dsl.Matcher) { + m.Match(`$n, $err := io.ReadFull($_, $slice) + if $err != nil || $n != len($slice) { + $*_ + }`, + `$n, $err := io.ReadFull($_, $slice) + if $n != len($slice) || $err != nil { + $*_ + }`, + `$n, $err = io.ReadFull($_, $slice) + if $err != nil || $n != len($slice) { + $*_ + }`, + `$n, $err = io.ReadFull($_, $slice) + if $n != len($slice) || $err != nil { + $*_ + }`, + `if $n, $err := io.ReadFull($_, $slice); $n != len($slice) || $err != nil { + $*_ + }`, + `if $n, $err := io.ReadFull($_, $slice); $err != nil || $n != len($slice) { + $*_ + }`, + `if $n, $err = io.ReadFull($_, $slice); $n != len($slice) || $err != nil { + $*_ + }`, + `if $n, $err = io.ReadFull($_, $slice); $err != nil || $n != len($slice) { + $*_ + }`, + ).Report("io.ReadFull() returns err == nil iff n == len(slice)") +} + +func nilerr(m dsl.Matcher) { + m.Match( + `if err == nil { return err }`, + `if err == nil { return $*_, err }`, + ). + Report(`return nil error instead of nil value`) +} + +func mailaddress(m dsl.Matcher) { + m.Match( + "fmt.Sprintf(`\"%s\" <%s>`, $NAME, $EMAIL)", + "fmt.Sprintf(`\"%s\"<%s>`, $NAME, $EMAIL)", + "fmt.Sprintf(`%s <%s>`, $NAME, $EMAIL)", + "fmt.Sprintf(`%s<%s>`, $NAME, $EMAIL)", + `fmt.Sprintf("\"%s\"<%s>", $NAME, $EMAIL)`, + `fmt.Sprintf("\"%s\" <%s>", $NAME, $EMAIL)`, + `fmt.Sprintf("%s<%s>", $NAME, $EMAIL)`, + `fmt.Sprintf("%s <%s>", $NAME, $EMAIL)`, + ). + Report("use net/mail Address.String() instead of fmt.Sprintf()"). + Suggest("(&mail.Address{Name:$NAME, Address:$EMAIL}).String()") +} + +func errnetclosed(m dsl.Matcher) { + m.Match( + `strings.Contains($err.Error(), $text)`, + ). + Where(m["text"].Text.Matches("\".*closed network connection.*\"")). + Report(`String matching against error texts is fragile; use net.ErrClosed instead`). + Suggest(`errors.Is($err, net.ErrClosed)`) +} + +func httpheaderadd(m dsl.Matcher) { + m.Match( + `$H.Add($KEY, $VALUE)`, + ). + Where(m["H"].Type.Is("http.Header")). + Report("use http.Header.Set method instead of Add to overwrite all existing header values"). + Suggest(`$H.Set($KEY, $VALUE)`) +} + +func hmacnew(m dsl.Matcher) { + m.Match("hmac.New(func() hash.Hash { return $x }, $_)", + `$f := func() hash.Hash { return $x } + $*_ + hmac.New($f, $_)`, + ).Where(m["x"].Pure). + Report("invalid hash passed to hmac.New()") +} + +func writestring(m dsl.Matcher) { + m.Match(`io.WriteString($w, string($b))`). + Where(m["b"].Type.Is("[]byte")). + Suggest("$w.Write($b)") +} + +func badlock(m dsl.Matcher) { + // Shouldn't give many false positives without type filter + // as Lock+Unlock pairs in combination with defer gives us pretty + // a good chance to guess correctly. If we constrain the type to sync.Mutex + // then it'll be harder to match embedded locks and custom methods + // that may forward the call to the sync.Mutex (or other synchronization primitive). + + m.Match(`$mu.Lock(); defer $mu.RUnlock()`).Report(`maybe $mu.RLock() was intended?`) + m.Match(`$mu.RLock(); defer $mu.Unlock()`).Report(`maybe $mu.Lock() was intended?`) +} diff --git a/client/write_test.go b/client/write_test.go index 3fdb9ece0f615..a87957e615c0a 100644 --- a/client/write_test.go +++ b/client/write_test.go @@ -22,13 +22,14 @@ import ( "math/rand" "testing" + "github.com/samber/lo" + "github.com/stretchr/testify/mock" + "github.com/stretchr/testify/suite" + "github.com/milvus-io/milvus-proto/go-api/v2/milvuspb" "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" "github.com/milvus-io/milvus/client/v2/entity" "github.com/milvus-io/milvus/pkg/util/merr" - "github.com/samber/lo" - "github.com/stretchr/testify/mock" - "github.com/stretchr/testify/suite" ) type WriteSuite struct { From d27db996973f8ef5c4080f6d66173e43b5c7355a Mon Sep 17 00:00:00 2001 From: shaoting-huang <167743503+shaoting-huang@users.noreply.github.com> Date: Mon, 20 May 2024 21:11:39 +0800 Subject: [PATCH 003/126] enhance: upgrade amazonlinux2023 builder image go version to 1.21 (#33176) Signed-off-by: shaoting-huang [shaoting-huang@zilliz.com] issue: https://github.com/milvus-io/milvus/issues/32982 Go 1.21 introduces several improvements and changes over Go 1.20, which is quite stable now. This PR is mainly for upgrading images Golang version from 1.20 to 1.21. Signed-off-by: shaoting-huang --- build/docker/builder/cpu/amazonlinux2023/Dockerfile | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/build/docker/builder/cpu/amazonlinux2023/Dockerfile b/build/docker/builder/cpu/amazonlinux2023/Dockerfile index d5516fd46ab0f..0e0502d0ad621 100644 --- a/build/docker/builder/cpu/amazonlinux2023/Dockerfile +++ b/build/docker/builder/cpu/amazonlinux2023/Dockerfile @@ -14,10 +14,19 @@ FROM amazonlinux:2023 ARG TARGETARCH RUN dnf install -y wget g++ gcc gdb libatomic libstdc++-static ninja-build git make zip unzip tar which \ - autoconf automake golang python3 python3-pip perl-FindBin texinfo \ + autoconf automake python3 python3-pip perl-FindBin texinfo \ pkg-config libuuid-devel libaio perl-IPC-Cmd libasan openblas-devel && \ rm -rf /var/cache/yum/* +ENV GOPATH /go +ENV GOROOT /usr/local/go +ENV GO111MODULE on +ENV PATH $GOPATH/bin:$GOROOT/bin:$PATH +RUN mkdir -p /usr/local/go && wget -qO- "https://go.dev/dl/go1.21.10.linux-$TARGETARCH.tar.gz" | tar --strip-components=1 -xz -C /usr/local/go && \ + mkdir -p "$GOPATH/src" "$GOPATH/bin" && \ + go clean --modcache && \ + chmod -R 777 "$GOPATH" && chmod -R a+w $(go env GOTOOLDIR) + RUN pip3 install conan==1.61.0 RUN echo "target arch $TARGETARCH" From 0d99db23b865827cb5d740dbefb1872f77b79398 Mon Sep 17 00:00:00 2001 From: jaime Date: Mon, 20 May 2024 22:03:39 +0800 Subject: [PATCH 004/126] fix: metrics leak on the coord nodes (#33075) issue: #32980 Signed-off-by: jaime --- internal/datacoord/meta.go | 17 ++-------- .../querycoordv2/meta/collection_manager.go | 1 + internal/querycoordv2/task/scheduler.go | 6 +++- pkg/metrics/datacoord_metrics.go | 34 +++++++++++-------- pkg/metrics/querycoord_metrics.go | 10 +++++- 5 files changed, 38 insertions(+), 30 deletions(-) diff --git a/internal/datacoord/meta.go b/internal/datacoord/meta.go index 892166fc4b46a..ea8cbc4dd647f 100644 --- a/internal/datacoord/meta.go +++ b/internal/datacoord/meta.go @@ -176,21 +176,10 @@ func (m *meta) AddCollection(collection *collectionInfo) { // DropCollection drop a collection from meta func (m *meta) DropCollection(collectionID int64) { log.Info("meta update: drop collection", zap.Int64("collectionID", collectionID)) - segments := m.SelectSegments(WithCollection(collectionID)) m.Lock() defer m.Unlock() - coll, ok := m.collections[collectionID] - if ok { - metrics.CleanupDataCoordNumStoredRows(coll.DatabaseName, collectionID) - metrics.CleanupDataCoordBulkInsertVectors(coll.DatabaseName, collectionID) - for _, seg := range segments { - metrics.CleanupDataCoordSegmentMetrics(coll.DatabaseName, collectionID, seg.ID) - } - } else { - log.Warn("not found database name", zap.Int64("collectionID", collectionID)) - } - delete(m.collections, collectionID) + metrics.CleanupDataCoordWithCollectionID(collectionID) metrics.DataCoordNumCollections.WithLabelValues().Set(float64(len(m.collections))) log.Info("meta update: drop collection - complete", zap.Int64("collectionID", collectionID)) } @@ -318,13 +307,13 @@ func (m *meta) GetCollectionBinlogSize() (int64, map[UniqueID]int64, map[UniqueI collectionRowsNum[segment.GetCollectionID()][segment.GetState()] += segment.GetNumOfRows() } } + + metrics.DataCoordNumStoredRows.Reset() for collectionID, statesRows := range collectionRowsNum { for state, rows := range statesRows { coll, ok := m.collections[collectionID] if ok { metrics.DataCoordNumStoredRows.WithLabelValues(coll.DatabaseName, fmt.Sprint(collectionID), state.String()).Set(float64(rows)) - } else { - log.Warn("not found database name", zap.Int64("collectionID", collectionID)) } } } diff --git a/internal/querycoordv2/meta/collection_manager.go b/internal/querycoordv2/meta/collection_manager.go index 766e59b66f0d8..4871459812c01 100644 --- a/internal/querycoordv2/meta/collection_manager.go +++ b/internal/querycoordv2/meta/collection_manager.go @@ -555,6 +555,7 @@ func (m *CollectionManager) RemoveCollection(collectionID typeutil.UniqueID) err } delete(m.collectionPartitions, collectionID) } + metrics.CleanQueryCoordMetricsWithCollectionID(collectionID) return nil } diff --git a/internal/querycoordv2/task/scheduler.go b/internal/querycoordv2/task/scheduler.go index 055e88a31e024..ed0f04c4d0a3c 100644 --- a/internal/querycoordv2/task/scheduler.go +++ b/internal/querycoordv2/task/scheduler.go @@ -798,7 +798,11 @@ func (scheduler *taskScheduler) remove(task Task) { scheduler.updateTaskMetrics() log.Info("task removed") - metrics.QueryCoordTaskLatency.WithLabelValues(scheduler.getTaskMetricsLabel(task), task.Shard()).Observe(float64(task.GetTaskLatency())) + + if scheduler.meta.Exist(task.CollectionID()) { + metrics.QueryCoordTaskLatency.WithLabelValues(fmt.Sprint(task.CollectionID()), + scheduler.getTaskMetricsLabel(task), task.Shard()).Observe(float64(task.GetTaskLatency())) + } } func (scheduler *taskScheduler) getTaskMetricsLabel(task Task) string { diff --git a/pkg/metrics/datacoord_metrics.go b/pkg/metrics/datacoord_metrics.go index 0fb96d9cff305..35c27256effe9 100644 --- a/pkg/metrics/datacoord_metrics.go +++ b/pkg/metrics/datacoord_metrics.go @@ -21,7 +21,6 @@ import ( "github.com/prometheus/client_golang/prometheus" - "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" "github.com/milvus-io/milvus/pkg/util/typeutil" ) @@ -341,19 +340,26 @@ func CleanupDataCoordSegmentMetrics(dbName string, collectionID int64, segmentID }) } -func CleanupDataCoordNumStoredRows(dbName string, collectionID int64) { - for _, state := range commonpb.SegmentState_name { - DataCoordNumStoredRows.Delete(prometheus.Labels{ - databaseLabelName: dbName, - collectionIDLabelName: fmt.Sprint(collectionID), - segmentStateLabelName: fmt.Sprint(state), - }) - } -} - -func CleanupDataCoordBulkInsertVectors(dbName string, collectionID int64) { - DataCoordBulkVectors.Delete(prometheus.Labels{ - databaseLabelName: dbName, +func CleanupDataCoordWithCollectionID(collectionID int64) { + IndexTaskNum.DeletePartialMatch(prometheus.Labels{ + collectionIDLabelName: fmt.Sprint(collectionID), + }) + DataCoordNumStoredRows.DeletePartialMatch(prometheus.Labels{ + collectionIDLabelName: fmt.Sprint(collectionID), + }) + DataCoordBulkVectors.DeletePartialMatch(prometheus.Labels{ + collectionIDLabelName: fmt.Sprint(collectionID), + }) + DataCoordSegmentBinLogFileCount.DeletePartialMatch(prometheus.Labels{ + collectionIDLabelName: fmt.Sprint(collectionID), + }) + DataCoordStoredBinlogSize.DeletePartialMatch(prometheus.Labels{ + collectionIDLabelName: fmt.Sprint(collectionID), + }) + DataCoordStoredIndexFilesSize.DeletePartialMatch(prometheus.Labels{ + collectionIDLabelName: fmt.Sprint(collectionID), + }) + DataCoordSizeStoredL0Segment.Delete(prometheus.Labels{ collectionIDLabelName: fmt.Sprint(collectionID), }) } diff --git a/pkg/metrics/querycoord_metrics.go b/pkg/metrics/querycoord_metrics.go index 0bbc196a95a5a..b8a1301a09478 100644 --- a/pkg/metrics/querycoord_metrics.go +++ b/pkg/metrics/querycoord_metrics.go @@ -17,6 +17,8 @@ package metrics import ( + "fmt" + "github.com/prometheus/client_golang/prometheus" "github.com/milvus-io/milvus/pkg/util/typeutil" @@ -129,7 +131,7 @@ var ( Name: "task_latency", Help: "latency of all kind of task in query coord scheduler scheduler", Buckets: longTaskBuckets, - }, []string{taskTypeLabel, channelNameLabelName}) + }, []string{collectionIDLabelName, taskTypeLabel, channelNameLabelName}) ) // RegisterQueryCoord registers QueryCoord metrics @@ -145,3 +147,9 @@ func RegisterQueryCoord(registry *prometheus.Registry) { registry.MustRegister(QueryCoordCurrentTargetCheckpointUnixSeconds) registry.MustRegister(QueryCoordTaskLatency) } + +func CleanQueryCoordMetricsWithCollectionID(collectionID int64) { + QueryCoordTaskLatency.DeletePartialMatch(prometheus.Labels{ + collectionIDLabelName: fmt.Sprint(collectionID), + }) +} From 89ad3eb0caeae0e74b14ea92a202a06b3fc4febf Mon Sep 17 00:00:00 2001 From: smellthemoon <64083300+smellthemoon@users.noreply.github.com> Date: Mon, 20 May 2024 22:25:38 +0800 Subject: [PATCH 005/126] enhance: reduce memory when read field (#33195) Signed-off-by: lixinguo Co-authored-by: lixinguo --- .../util/importutilv2/parquet/field_reader.go | 22 +++++++------------ 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/internal/util/importutilv2/parquet/field_reader.go b/internal/util/importutilv2/parquet/field_reader.go index 090a5e2a638fe..282366fff993d 100644 --- a/internal/util/importutilv2/parquet/field_reader.go +++ b/internal/util/importutilv2/parquet/field_reader.go @@ -130,15 +130,13 @@ func ReadBoolData(pcr *FieldReader, count int64) (any, error) { data := make([]bool, 0, count) for _, chunk := range chunked.Chunks() { dataNums := chunk.Data().Len() - chunkData := make([]bool, dataNums) boolReader, ok := chunk.(*array.Boolean) if !ok { return nil, WrapTypeErr("bool", chunk.DataType().Name(), pcr.field) } for i := 0; i < dataNums; i++ { - chunkData[i] = boolReader.Value(i) + data = append(data, boolReader.Value(i)) } - data = append(data, chunkData...) } if len(data) == 0 { return nil, nil @@ -154,42 +152,40 @@ func ReadIntegerOrFloatData[T constraints.Integer | constraints.Float](pcr *Fiel data := make([]T, 0, count) for _, chunk := range chunked.Chunks() { dataNums := chunk.Data().Len() - chunkData := make([]T, dataNums) switch chunk.DataType().ID() { case arrow.INT8: int8Reader := chunk.(*array.Int8) for i := 0; i < dataNums; i++ { - chunkData[i] = T(int8Reader.Value(i)) + data = append(data, T(int8Reader.Value(i))) } case arrow.INT16: int16Reader := chunk.(*array.Int16) for i := 0; i < dataNums; i++ { - chunkData[i] = T(int16Reader.Value(i)) + data = append(data, T(int16Reader.Value(i))) } case arrow.INT32: int32Reader := chunk.(*array.Int32) for i := 0; i < dataNums; i++ { - chunkData[i] = T(int32Reader.Value(i)) + data = append(data, T(int32Reader.Value(i))) } case arrow.INT64: int64Reader := chunk.(*array.Int64) for i := 0; i < dataNums; i++ { - chunkData[i] = T(int64Reader.Value(i)) + data = append(data, T(int64Reader.Value(i))) } case arrow.FLOAT32: float32Reader := chunk.(*array.Float32) for i := 0; i < dataNums; i++ { - chunkData[i] = T(float32Reader.Value(i)) + data = append(data, T(float32Reader.Value(i))) } case arrow.FLOAT64: float64Reader := chunk.(*array.Float64) for i := 0; i < dataNums; i++ { - chunkData[i] = T(float64Reader.Value(i)) + data = append(data, T(float64Reader.Value(i))) } default: return nil, WrapTypeErr("integer|float", chunk.DataType().Name(), pcr.field) } - data = append(data, chunkData...) } if len(data) == 0 { return nil, nil @@ -205,15 +201,13 @@ func ReadStringData(pcr *FieldReader, count int64) (any, error) { data := make([]string, 0, count) for _, chunk := range chunked.Chunks() { dataNums := chunk.Data().Len() - chunkData := make([]string, dataNums) stringReader, ok := chunk.(*array.String) if !ok { return nil, WrapTypeErr("string", chunk.DataType().Name(), pcr.field) } for i := 0; i < dataNums; i++ { - chunkData[i] = stringReader.Value(i) + data = append(data, stringReader.Value(i)) } - data = append(data, chunkData...) } if len(data) == 0 { return nil, nil From f8929cc36a0186de577ec2f2c03a40f254170ead Mon Sep 17 00:00:00 2001 From: aoiasd <45024769+aoiasd@users.noreply.github.com> Date: Tue, 21 May 2024 10:39:39 +0800 Subject: [PATCH 006/126] fix: can't generate traceID when use noop exporter (#33191) relate: https://github.com/milvus-io/milvus/issues/33190 Signed-off-by: aoiasd --- pkg/tracer/tracer.go | 6 ------ 1 file changed, 6 deletions(-) diff --git a/pkg/tracer/tracer.go b/pkg/tracer/tracer.go index bb675f6f48a87..7f18634064df4 100644 --- a/pkg/tracer/tracer.go +++ b/pkg/tracer/tracer.go @@ -29,7 +29,6 @@ import ( "go.opentelemetry.io/otel/sdk/resource" sdk "go.opentelemetry.io/otel/sdk/trace" semconv "go.opentelemetry.io/otel/semconv/v1.4.0" - "go.opentelemetry.io/otel/trace" "go.uber.org/zap" "github.com/milvus-io/milvus/pkg/log" @@ -63,11 +62,6 @@ func CloseTracerProvider(ctx context.Context) error { } func SetTracerProvider(exp sdk.SpanExporter, traceIDRatio float64) { - if exp == nil { - otel.SetTracerProvider(trace.NewNoopTracerProvider()) - return - } - tp := sdk.NewTracerProvider( sdk.WithBatcher(exp), sdk.WithResource(resource.NewWithAttributes( From b3bcc107bbd274d9c88eddc8bbaa32fb287b6582 Mon Sep 17 00:00:00 2001 From: XuanYang-cn Date: Tue, 21 May 2024 11:35:38 +0800 Subject: [PATCH 007/126] fix: Remove L0 compactor in completedCompactor (#33169) See also: #33168 Signed-off-by: yangxuan --- internal/datanode/broker/mock_broker.go | 30 ++++++------- internal/datanode/compaction_executor.go | 3 +- internal/datanode/compaction_executor_test.go | 42 +++++++++++++++++++ 3 files changed, 59 insertions(+), 16 deletions(-) diff --git a/internal/datanode/broker/mock_broker.go b/internal/datanode/broker/mock_broker.go index f8b731c80e281..ae735bff96dbe 100644 --- a/internal/datanode/broker/mock_broker.go +++ b/internal/datanode/broker/mock_broker.go @@ -1,4 +1,4 @@ -// Code generated by mockery v2.30.1. DO NOT EDIT. +// Code generated by mockery v2.32.4. DO NOT EDIT. package broker @@ -63,8 +63,8 @@ type MockBroker_AssignSegmentID_Call struct { } // AssignSegmentID is a helper method to define mock.On call -// - ctx context.Context -// - reqs ...*datapb.SegmentIDRequest +// - ctx context.Context +// - reqs ...*datapb.SegmentIDRequest func (_e *MockBroker_Expecter) AssignSegmentID(ctx interface{}, reqs ...interface{}) *MockBroker_AssignSegmentID_Call { return &MockBroker_AssignSegmentID_Call{Call: _e.mock.On("AssignSegmentID", append([]interface{}{ctx}, reqs...)...)} @@ -125,8 +125,8 @@ type MockBroker_DropVirtualChannel_Call struct { } // DropVirtualChannel is a helper method to define mock.On call -// - ctx context.Context -// - req *datapb.DropVirtualChannelRequest +// - ctx context.Context +// - req *datapb.DropVirtualChannelRequest func (_e *MockBroker_Expecter) DropVirtualChannel(ctx interface{}, req interface{}) *MockBroker_DropVirtualChannel_Call { return &MockBroker_DropVirtualChannel_Call{Call: _e.mock.On("DropVirtualChannel", ctx, req)} } @@ -180,8 +180,8 @@ type MockBroker_GetSegmentInfo_Call struct { } // GetSegmentInfo is a helper method to define mock.On call -// - ctx context.Context -// - segmentIDs []int64 +// - ctx context.Context +// - segmentIDs []int64 func (_e *MockBroker_Expecter) GetSegmentInfo(ctx interface{}, segmentIDs interface{}) *MockBroker_GetSegmentInfo_Call { return &MockBroker_GetSegmentInfo_Call{Call: _e.mock.On("GetSegmentInfo", ctx, segmentIDs)} } @@ -223,8 +223,8 @@ type MockBroker_ReportTimeTick_Call struct { } // ReportTimeTick is a helper method to define mock.On call -// - ctx context.Context -// - msgs []*msgpb.DataNodeTtMsg +// - ctx context.Context +// - msgs []*msgpb.DataNodeTtMsg func (_e *MockBroker_Expecter) ReportTimeTick(ctx interface{}, msgs interface{}) *MockBroker_ReportTimeTick_Call { return &MockBroker_ReportTimeTick_Call{Call: _e.mock.On("ReportTimeTick", ctx, msgs)} } @@ -266,8 +266,8 @@ type MockBroker_SaveBinlogPaths_Call struct { } // SaveBinlogPaths is a helper method to define mock.On call -// - ctx context.Context -// - req *datapb.SaveBinlogPathsRequest +// - ctx context.Context +// - req *datapb.SaveBinlogPathsRequest func (_e *MockBroker_Expecter) SaveBinlogPaths(ctx interface{}, req interface{}) *MockBroker_SaveBinlogPaths_Call { return &MockBroker_SaveBinlogPaths_Call{Call: _e.mock.On("SaveBinlogPaths", ctx, req)} } @@ -309,8 +309,8 @@ type MockBroker_UpdateChannelCheckpoint_Call struct { } // UpdateChannelCheckpoint is a helper method to define mock.On call -// - ctx context.Context -// - channelCPs []*msgpb.MsgPosition +// - ctx context.Context +// - channelCPs []*msgpb.MsgPosition func (_e *MockBroker_Expecter) UpdateChannelCheckpoint(ctx interface{}, channelCPs interface{}) *MockBroker_UpdateChannelCheckpoint_Call { return &MockBroker_UpdateChannelCheckpoint_Call{Call: _e.mock.On("UpdateChannelCheckpoint", ctx, channelCPs)} } @@ -352,8 +352,8 @@ type MockBroker_UpdateSegmentStatistics_Call struct { } // UpdateSegmentStatistics is a helper method to define mock.On call -// - ctx context.Context -// - req *datapb.UpdateSegmentStatisticsRequest +// - ctx context.Context +// - req *datapb.UpdateSegmentStatisticsRequest func (_e *MockBroker_Expecter) UpdateSegmentStatistics(ctx interface{}, req interface{}) *MockBroker_UpdateSegmentStatistics_Call { return &MockBroker_UpdateSegmentStatistics_Call{Call: _e.mock.On("UpdateSegmentStatistics", ctx, req)} } diff --git a/internal/datanode/compaction_executor.go b/internal/datanode/compaction_executor.go index 11a8a93ab4a78..bbcfbbb8279d6 100644 --- a/internal/datanode/compaction_executor.go +++ b/internal/datanode/compaction_executor.go @@ -190,9 +190,10 @@ func (c *compactionExecutor) getAllCompactionResults() []*datapb.CompactionPlanR return true }) - // remote level zero results + // remove level zero results lo.ForEach(completedLevelZero, func(planID int64, _ int) { c.completed.Remove(planID) + c.completedCompactor.Remove(planID) }) if len(results) > 0 { diff --git a/internal/datanode/compaction_executor_test.go b/internal/datanode/compaction_executor_test.go index d56cb5e2bc39a..68eb61c531e57 100644 --- a/internal/datanode/compaction_executor_test.go +++ b/internal/datanode/compaction_executor_test.go @@ -21,7 +21,9 @@ import ( "testing" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" "github.com/milvus-io/milvus/internal/proto/datapb" ) @@ -115,6 +117,46 @@ func TestCompactionExecutor(t *testing.T) { t.FailNow() } }) + + t.Run("test getAllCompactionResults", func(t *testing.T) { + ex := newCompactionExecutor() + + mockC := newMockCompactor(true) + ex.executing.Insert(int64(1), mockC) + + ex.completedCompactor.Insert(int64(2), mockC) + ex.completed.Insert(int64(2), &datapb.CompactionPlanResult{ + PlanID: 2, + State: commonpb.CompactionState_Completed, + Type: datapb.CompactionType_MixCompaction, + }) + + ex.completedCompactor.Insert(int64(3), mockC) + ex.completed.Insert(int64(3), &datapb.CompactionPlanResult{ + PlanID: 3, + State: commonpb.CompactionState_Completed, + Type: datapb.CompactionType_Level0DeleteCompaction, + }) + + require.Equal(t, 2, ex.completed.Len()) + require.Equal(t, 2, ex.completedCompactor.Len()) + require.Equal(t, 1, ex.executing.Len()) + + result := ex.getAllCompactionResults() + assert.Equal(t, 3, len(result)) + + for _, res := range result { + if res.PlanID == int64(1) { + assert.Equal(t, res.GetState(), commonpb.CompactionState_Executing) + } else { + assert.Equal(t, res.GetState(), commonpb.CompactionState_Completed) + } + } + + assert.Equal(t, 1, ex.completed.Len()) + require.Equal(t, 1, ex.completedCompactor.Len()) + require.Equal(t, 1, ex.executing.Len()) + }) } func newMockCompactor(isvalid bool) *mockCompactor { From 0f8c6f49ff19624d6cd3c4aa788bb184baa051aa Mon Sep 17 00:00:00 2001 From: Bingyi Sun Date: Tue, 21 May 2024 11:53:39 +0800 Subject: [PATCH 008/126] enhance: mmap load raw data if scalar index does not have raw data (#33175) Signed-off-by: sunby --- internal/querynodev2/segments/reduce_test.go | 2 +- internal/querynodev2/segments/retrieve_test.go | 2 +- internal/querynodev2/segments/search_test.go | 2 +- internal/querynodev2/segments/segment.go | 4 ++-- internal/querynodev2/segments/segment_loader.go | 6 +++--- internal/querynodev2/segments/segment_test.go | 2 +- 6 files changed, 9 insertions(+), 9 deletions(-) diff --git a/internal/querynodev2/segments/reduce_test.go b/internal/querynodev2/segments/reduce_test.go index 21defdd64c122..9693dc2f717ad 100644 --- a/internal/querynodev2/segments/reduce_test.go +++ b/internal/querynodev2/segments/reduce_test.go @@ -101,7 +101,7 @@ func (suite *ReduceSuite) SetupTest() { ) suite.Require().NoError(err) for _, binlog := range binlogs { - err = suite.segment.(*LocalSegment).LoadFieldData(ctx, binlog.FieldID, int64(msgLength), binlog) + err = suite.segment.(*LocalSegment).LoadFieldData(ctx, binlog.FieldID, int64(msgLength), binlog, false) suite.Require().NoError(err) } } diff --git a/internal/querynodev2/segments/retrieve_test.go b/internal/querynodev2/segments/retrieve_test.go index aa2562b1e866c..ea58f2802b92e 100644 --- a/internal/querynodev2/segments/retrieve_test.go +++ b/internal/querynodev2/segments/retrieve_test.go @@ -109,7 +109,7 @@ func (suite *RetrieveSuite) SetupTest() { ) suite.Require().NoError(err) for _, binlog := range binlogs { - err = suite.sealed.(*LocalSegment).LoadFieldData(ctx, binlog.FieldID, int64(msgLength), binlog) + err = suite.sealed.(*LocalSegment).LoadFieldData(ctx, binlog.FieldID, int64(msgLength), binlog, false) suite.Require().NoError(err) } diff --git a/internal/querynodev2/segments/search_test.go b/internal/querynodev2/segments/search_test.go index 81475b14c27db..415ad28ccee98 100644 --- a/internal/querynodev2/segments/search_test.go +++ b/internal/querynodev2/segments/search_test.go @@ -100,7 +100,7 @@ func (suite *SearchSuite) SetupTest() { ) suite.Require().NoError(err) for _, binlog := range binlogs { - err = suite.sealed.(*LocalSegment).LoadFieldData(ctx, binlog.FieldID, int64(msgLength), binlog) + err = suite.sealed.(*LocalSegment).LoadFieldData(ctx, binlog.FieldID, int64(msgLength), binlog, false) suite.Require().NoError(err) } diff --git a/internal/querynodev2/segments/segment.go b/internal/querynodev2/segments/segment.go index c9a0df822efad..68f914d73302c 100644 --- a/internal/querynodev2/segments/segment.go +++ b/internal/querynodev2/segments/segment.go @@ -967,7 +967,7 @@ func (s *LocalSegment) LoadMultiFieldData(ctx context.Context) error { return nil } -func (s *LocalSegment) LoadFieldData(ctx context.Context, fieldID int64, rowCount int64, field *datapb.FieldBinlog) error { +func (s *LocalSegment) LoadFieldData(ctx context.Context, fieldID int64, rowCount int64, field *datapb.FieldBinlog, useMmap bool) error { if !s.ptrLock.RLockIf(state.IsNotReleased) { return merr.WrapErrSegmentNotLoaded(s.ID(), "segment released") } @@ -1006,7 +1006,7 @@ func (s *LocalSegment) LoadFieldData(ctx context.Context, fieldID int64, rowCoun } collection := s.collection - mmapEnabled := common.IsFieldMmapEnabled(collection.Schema(), fieldID) || + mmapEnabled := useMmap || common.IsFieldMmapEnabled(collection.Schema(), fieldID) || (!common.FieldHasMmapKey(collection.Schema(), fieldID) && params.Params.QueryNodeCfg.MmapEnabled.GetAsBool()) loadFieldDataInfo.appendMMapDirPath(paramtable.Get().QueryNodeCfg.MmapDirPath.GetValue()) loadFieldDataInfo.enableMmap(fieldID, mmapEnabled) diff --git a/internal/querynodev2/segments/segment_loader.go b/internal/querynodev2/segments/segment_loader.go index 7c6f83dda7938..2db43f4ede218 100644 --- a/internal/querynodev2/segments/segment_loader.go +++ b/internal/querynodev2/segments/segment_loader.go @@ -508,7 +508,7 @@ func (loader *segmentLoaderV2) loadSealedSegmentFields(ctx context.Context, segm runningGroup, _ := errgroup.WithContext(ctx) fields.Range(func(fieldID int64, field *schemapb.FieldSchema) bool { runningGroup.Go(func() error { - return segment.LoadFieldData(ctx, fieldID, rowCount, nil) + return segment.LoadFieldData(ctx, fieldID, rowCount, nil, false) }) return true }) @@ -1058,7 +1058,7 @@ func (loader *segmentLoader) loadSealedSegment(ctx context.Context, loadInfo *qu zap.String("index", info.IndexInfo.GetIndexName()), ) // for scalar index's raw data, only load to mmap not memory - if err = segment.LoadFieldData(ctx, fieldID, loadInfo.GetNumOfRows(), info.FieldBinlog); err != nil { + if err = segment.LoadFieldData(ctx, fieldID, loadInfo.GetNumOfRows(), info.FieldBinlog, true); err != nil { log.Warn("load raw data failed", zap.Int64("fieldID", fieldID), zap.Error(err)) return err } @@ -1212,7 +1212,7 @@ func loadSealedSegmentFields(ctx context.Context, collection *Collection, segmen fieldID, rowCount, fieldBinLog, - ) + false) }) } err := runningGroup.Wait() diff --git a/internal/querynodev2/segments/segment_test.go b/internal/querynodev2/segments/segment_test.go index c05de4c83d100..d4f1855ab422f 100644 --- a/internal/querynodev2/segments/segment_test.go +++ b/internal/querynodev2/segments/segment_test.go @@ -100,7 +100,7 @@ func (suite *SegmentSuite) SetupTest() { g, err := suite.sealed.(*LocalSegment).StartLoadData() suite.Require().NoError(err) for _, binlog := range binlogs { - err = suite.sealed.(*LocalSegment).LoadFieldData(ctx, binlog.FieldID, int64(msgLength), binlog) + err = suite.sealed.(*LocalSegment).LoadFieldData(ctx, binlog.FieldID, int64(msgLength), binlog, false) suite.Require().NoError(err) } g.Done(nil) From 9f81290c6329739d2df50ef92f0af412eb74eab3 Mon Sep 17 00:00:00 2001 From: Jiquan Long Date: Tue, 21 May 2024 11:57:51 +0800 Subject: [PATCH 009/126] fix: try best to get enough query results (#33178) issue: https://github.com/milvus-io/milvus/issues/33137 Signed-off-by: longjiquan --- internal/proxy/task_query.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/internal/proxy/task_query.go b/internal/proxy/task_query.go index 618805a4f9bc1..18abb6ed0531b 100644 --- a/internal/proxy/task_query.go +++ b/internal/proxy/task_query.go @@ -633,7 +633,7 @@ func reduceRetrieveResults(ctx context.Context, retrieveResults []*internalpb.Re var retSize int64 maxOutputSize := paramtable.Get().QuotaConfig.MaxOutputSize.GetAsInt64() - for j := 0; j < loopEnd; j++ { + for j := 0; j < loopEnd; { sel, drainOneResult := typeutil.SelectMinPK(retrieveLimit, validRetrieveResults, cursors) if sel == -1 || (reduceStopForBest && drainOneResult) { break @@ -643,6 +643,7 @@ func reduceRetrieveResults(ctx context.Context, retrieveResults []*internalpb.Re if _, ok := idSet[pk]; !ok { retSize += typeutil.AppendFieldData(ret.FieldsData, validRetrieveResults[sel].GetFieldsData(), cursors[sel]) idSet[pk] = struct{}{} + j++ } else { // primary keys duplicate skipDupCnt++ From f31a20faadc131daac0a2cebafd1b1c017776ba1 Mon Sep 17 00:00:00 2001 From: congqixia Date: Tue, 21 May 2024 11:59:39 +0800 Subject: [PATCH 010/126] fix: [Backport] Mark channel checkpoint dropped prevent cp lag metrics leakage (#32454) (#33198) Cherry-pick from 2.3 pr: #32454 See also #31506 #31508 --------- Signed-off-by: Congqi Xia --- internal/datacoord/meta.go | 22 ++++++++++++++++++++++ internal/datacoord/services.go | 2 ++ 2 files changed, 24 insertions(+) diff --git a/internal/datacoord/meta.go b/internal/datacoord/meta.go index ea8cbc4dd647f..065802a6343f6 100644 --- a/internal/datacoord/meta.go +++ b/internal/datacoord/meta.go @@ -1363,6 +1363,28 @@ func (m *meta) UpdateChannelCheckpoint(vChannel string, pos *msgpb.MsgPosition) return nil } +// MarkChannelCheckpointDropped set channel checkpoint to MaxUint64 preventing future update +// and remove the metrics for channel checkpoint lag. +func (m *meta) MarkChannelCheckpointDropped(ctx context.Context, channel string) error { + m.channelCPs.Lock() + defer m.channelCPs.Unlock() + + cp := &msgpb.MsgPosition{ + ChannelName: channel, + Timestamp: math.MaxUint64, + } + + err := m.catalog.SaveChannelCheckpoints(ctx, []*msgpb.MsgPosition{cp}) + if err != nil { + return err + } + + m.channelCPs.checkpoints[channel] = cp + + metrics.DataCoordCheckpointUnixSeconds.DeleteLabelValues(fmt.Sprint(paramtable.GetNodeID()), channel) + return nil +} + // UpdateChannelCheckpoints updates and saves channel checkpoints. func (m *meta) UpdateChannelCheckpoints(positions []*msgpb.MsgPosition) error { m.channelCPs.Lock() diff --git a/internal/datacoord/services.go b/internal/datacoord/services.go index 49ee83fda364f..6a00585e8fc9e 100644 --- a/internal/datacoord/services.go +++ b/internal/datacoord/services.go @@ -618,6 +618,8 @@ func (s *Server) DropVirtualChannel(ctx context.Context, req *datapb.DropVirtual s.segmentManager.DropSegmentsOfChannel(ctx, channel) s.compactionHandler.removeTasksByChannel(channel) metrics.DataCoordCheckpointUnixSeconds.DeleteLabelValues(fmt.Sprint(paramtable.GetNodeID()), channel) + s.meta.MarkChannelCheckpointDropped(ctx, channel) + // no compaction triggered in Drop procedure return resp, nil } From f681c4b03484a1e13869991c7ea036878ac05567 Mon Sep 17 00:00:00 2001 From: Xiaofan <83447078+xiaofan-luan@users.noreply.github.com> Date: Tue, 21 May 2024 14:11:39 +0800 Subject: [PATCH 011/126] enhance: remove describe index in rootcoord broker (#33206) fix #33205 remove the dependency between datacoord and rootcoord Signed-off-by: xiaofanluan --- internal/rootcoord/broker.go | 10 +--------- internal/rootcoord/mock_test.go | 5 ----- 2 files changed, 1 insertion(+), 14 deletions(-) diff --git a/internal/rootcoord/broker.go b/internal/rootcoord/broker.go index c1fa30d0ac037..8701a5c6f2068 100644 --- a/internal/rootcoord/broker.go +++ b/internal/rootcoord/broker.go @@ -58,9 +58,7 @@ type Broker interface { GcConfirm(ctx context.Context, collectionID, partitionID UniqueID) bool DropCollectionIndex(ctx context.Context, collID UniqueID, partIDs []UniqueID) error - GetSegmentIndexState(ctx context.Context, collID UniqueID, indexName string, segIDs []UniqueID) ([]*indexpb.SegmentIndexState, error) - DescribeIndex(ctx context.Context, colID UniqueID) (*indexpb.DescribeIndexResponse, error) - + // notify observer to clean their meta cache BroadcastAlteredCollection(ctx context.Context, req *milvuspb.AlterCollectionRequest) error } @@ -270,12 +268,6 @@ func (b *ServerBroker) BroadcastAlteredCollection(ctx context.Context, req *milv return nil } -func (b *ServerBroker) DescribeIndex(ctx context.Context, colID UniqueID) (*indexpb.DescribeIndexResponse, error) { - return b.s.dataCoord.DescribeIndex(ctx, &indexpb.DescribeIndexRequest{ - CollectionID: colID, - }) -} - func (b *ServerBroker) GcConfirm(ctx context.Context, collectionID, partitionID UniqueID) bool { log := log.Ctx(ctx).With(zap.Int64("collection", collectionID), zap.Int64("partition", partitionID)) diff --git a/internal/rootcoord/mock_test.go b/internal/rootcoord/mock_test.go index f17ff9d1e27eb..fe0940928d6a7 100644 --- a/internal/rootcoord/mock_test.go +++ b/internal/rootcoord/mock_test.go @@ -899,7 +899,6 @@ type mockBroker struct { FlushFunc func(ctx context.Context, cID int64, segIDs []int64) error DropCollectionIndexFunc func(ctx context.Context, collID UniqueID, partIDs []UniqueID) error - DescribeIndexFunc func(ctx context.Context, colID UniqueID) (*indexpb.DescribeIndexResponse, error) GetSegmentIndexStateFunc func(ctx context.Context, collID UniqueID, indexName string, segIDs []UniqueID) ([]*indexpb.SegmentIndexState, error) BroadcastAlteredCollectionFunc func(ctx context.Context, req *milvuspb.AlterCollectionRequest) error @@ -935,10 +934,6 @@ func (b mockBroker) DropCollectionIndex(ctx context.Context, collID UniqueID, pa return b.DropCollectionIndexFunc(ctx, collID, partIDs) } -func (b mockBroker) DescribeIndex(ctx context.Context, colID UniqueID) (*indexpb.DescribeIndexResponse, error) { - return b.DescribeIndexFunc(ctx, colID) -} - func (b mockBroker) GetSegmentIndexState(ctx context.Context, collID UniqueID, indexName string, segIDs []UniqueID) ([]*indexpb.SegmentIndexState, error) { return b.GetSegmentIndexStateFunc(ctx, collID, indexName, segIDs) } From 2013d972439a06febeb6c18fab2fe8261b4a4f77 Mon Sep 17 00:00:00 2001 From: wei liu Date: Tue, 21 May 2024 14:29:39 +0800 Subject: [PATCH 012/126] enhance: Enable to dynamic update balancer policy in querycoord (#33037) issue: #33036 This PR enable to dynamic update balancer policy without restart querycoord. --------- Signed-off-by: Wei Liu --- .../querycoordv2/checkers/balance_checker.go | 8 +-- .../checkers/balance_checker_test.go | 2 +- .../querycoordv2/checkers/channel_checker.go | 16 +++--- .../checkers/channel_checker_test.go | 2 +- internal/querycoordv2/checkers/controller.go | 10 ++-- .../checkers/controller_base_test.go | 3 +- .../querycoordv2/checkers/controller_test.go | 2 +- .../querycoordv2/checkers/segment_checker.go | 16 +++--- .../checkers/segment_checker_test.go | 2 +- internal/querycoordv2/handlers.go | 4 +- internal/querycoordv2/ops_service_test.go | 4 +- internal/querycoordv2/server.go | 55 ++++++++++++------- internal/querycoordv2/server_test.go | 2 +- internal/querycoordv2/services_test.go | 2 +- 14 files changed, 73 insertions(+), 55 deletions(-) diff --git a/internal/querycoordv2/checkers/balance_checker.go b/internal/querycoordv2/checkers/balance_checker.go index f611bdef1887f..81c7c96271637 100644 --- a/internal/querycoordv2/checkers/balance_checker.go +++ b/internal/querycoordv2/checkers/balance_checker.go @@ -39,28 +39,28 @@ import ( // BalanceChecker checks the cluster distribution and generates balance tasks. type BalanceChecker struct { *checkerActivation - balance.Balance meta *meta.Meta nodeManager *session.NodeManager normalBalanceCollectionsCurrentRound typeutil.UniqueSet scheduler task.Scheduler targetMgr *meta.TargetManager + getBalancerFunc GetBalancerFunc } func NewBalanceChecker(meta *meta.Meta, targetMgr *meta.TargetManager, - balancer balance.Balance, nodeMgr *session.NodeManager, scheduler task.Scheduler, + getBalancerFunc GetBalancerFunc, ) *BalanceChecker { return &BalanceChecker{ checkerActivation: newCheckerActivation(), - Balance: balancer, meta: meta, targetMgr: targetMgr, nodeManager: nodeMgr, normalBalanceCollectionsCurrentRound: typeutil.NewUniqueSet(), scheduler: scheduler, + getBalancerFunc: getBalancerFunc, } } @@ -155,7 +155,7 @@ func (b *BalanceChecker) balanceReplicas(replicaIDs []int64) ([]balance.SegmentA if replica == nil { continue } - sPlans, cPlans := b.Balance.BalanceReplica(replica) + sPlans, cPlans := b.getBalancerFunc().BalanceReplica(replica) segmentPlans = append(segmentPlans, sPlans...) channelPlans = append(channelPlans, cPlans...) if len(segmentPlans) != 0 || len(channelPlans) != 0 { diff --git a/internal/querycoordv2/checkers/balance_checker_test.go b/internal/querycoordv2/checkers/balance_checker_test.go index 6cc52b58145d4..e389ab64f9370 100644 --- a/internal/querycoordv2/checkers/balance_checker_test.go +++ b/internal/querycoordv2/checkers/balance_checker_test.go @@ -78,7 +78,7 @@ func (suite *BalanceCheckerTestSuite) SetupTest() { suite.targetMgr = meta.NewTargetManager(suite.broker, suite.meta) suite.balancer = balance.NewMockBalancer(suite.T()) - suite.checker = NewBalanceChecker(suite.meta, suite.targetMgr, suite.balancer, suite.nodeMgr, suite.scheduler) + suite.checker = NewBalanceChecker(suite.meta, suite.targetMgr, suite.nodeMgr, suite.scheduler, func() balance.Balance { return suite.balancer }) } func (suite *BalanceCheckerTestSuite) TearDownTest() { diff --git a/internal/querycoordv2/checkers/channel_checker.go b/internal/querycoordv2/checkers/channel_checker.go index 9ba0761107b2f..d00ea8cb46d7f 100644 --- a/internal/querycoordv2/checkers/channel_checker.go +++ b/internal/querycoordv2/checkers/channel_checker.go @@ -36,27 +36,27 @@ import ( // TODO(sunby): have too much similar codes with SegmentChecker type ChannelChecker struct { *checkerActivation - meta *meta.Meta - dist *meta.DistributionManager - targetMgr *meta.TargetManager - nodeMgr *session.NodeManager - balancer balance.Balance + meta *meta.Meta + dist *meta.DistributionManager + targetMgr *meta.TargetManager + nodeMgr *session.NodeManager + getBalancerFunc GetBalancerFunc } func NewChannelChecker( meta *meta.Meta, dist *meta.DistributionManager, targetMgr *meta.TargetManager, - balancer balance.Balance, nodeMgr *session.NodeManager, + getBalancerFunc GetBalancerFunc, ) *ChannelChecker { return &ChannelChecker{ checkerActivation: newCheckerActivation(), meta: meta, dist: dist, targetMgr: targetMgr, - balancer: balancer, nodeMgr: nodeMgr, + getBalancerFunc: getBalancerFunc, } } @@ -215,7 +215,7 @@ func (c *ChannelChecker) createChannelLoadTask(ctx context.Context, channels []* if len(rwNodes) == 0 { rwNodes = replica.GetRWNodes() } - plan := c.balancer.AssignChannel([]*meta.DmChannel{ch}, rwNodes, false) + plan := c.getBalancerFunc().AssignChannel([]*meta.DmChannel{ch}, rwNodes, false) plans = append(plans, plan...) } diff --git a/internal/querycoordv2/checkers/channel_checker_test.go b/internal/querycoordv2/checkers/channel_checker_test.go index 6aa9c062887fe..149123194d997 100644 --- a/internal/querycoordv2/checkers/channel_checker_test.go +++ b/internal/querycoordv2/checkers/channel_checker_test.go @@ -77,7 +77,7 @@ func (suite *ChannelCheckerTestSuite) SetupTest() { distManager := meta.NewDistributionManager() balancer := suite.createMockBalancer() - suite.checker = NewChannelChecker(suite.meta, distManager, targetManager, balancer, suite.nodeMgr) + suite.checker = NewChannelChecker(suite.meta, distManager, targetManager, suite.nodeMgr, func() balance.Balance { return balancer }) suite.broker.EXPECT().GetPartitions(mock.Anything, int64(1)).Return([]int64{1}, nil).Maybe() } diff --git a/internal/querycoordv2/checkers/controller.go b/internal/querycoordv2/checkers/controller.go index 133a5abf18202..efc8b05faf4e9 100644 --- a/internal/querycoordv2/checkers/controller.go +++ b/internal/querycoordv2/checkers/controller.go @@ -35,6 +35,8 @@ import ( var errTypeNotFound = errors.New("checker type not found") +type GetBalancerFunc = func() balance.Balance + type CheckerController struct { cancel context.CancelFunc manualCheckChs map[utils.CheckerType]chan struct{} @@ -55,17 +57,17 @@ func NewCheckerController( meta *meta.Meta, dist *meta.DistributionManager, targetMgr *meta.TargetManager, - balancer balance.Balance, nodeMgr *session.NodeManager, scheduler task.Scheduler, broker meta.Broker, + getBalancerFunc GetBalancerFunc, ) *CheckerController { // CheckerController runs checkers with the order, // the former checker has higher priority checkers := map[utils.CheckerType]Checker{ - utils.ChannelChecker: NewChannelChecker(meta, dist, targetMgr, balancer, nodeMgr), - utils.SegmentChecker: NewSegmentChecker(meta, dist, targetMgr, balancer, nodeMgr), - utils.BalanceChecker: NewBalanceChecker(meta, targetMgr, balancer, nodeMgr, scheduler), + utils.ChannelChecker: NewChannelChecker(meta, dist, targetMgr, nodeMgr, getBalancerFunc), + utils.SegmentChecker: NewSegmentChecker(meta, dist, targetMgr, nodeMgr, getBalancerFunc), + utils.BalanceChecker: NewBalanceChecker(meta, targetMgr, nodeMgr, scheduler, getBalancerFunc), utils.IndexChecker: NewIndexChecker(meta, dist, broker, nodeMgr), utils.LeaderChecker: NewLeaderChecker(meta, dist, targetMgr, nodeMgr), } diff --git a/internal/querycoordv2/checkers/controller_base_test.go b/internal/querycoordv2/checkers/controller_base_test.go index 9f5b233defa74..762a8a2bde590 100644 --- a/internal/querycoordv2/checkers/controller_base_test.go +++ b/internal/querycoordv2/checkers/controller_base_test.go @@ -77,7 +77,8 @@ func (suite *ControllerBaseTestSuite) SetupTest() { suite.balancer = balance.NewMockBalancer(suite.T()) suite.scheduler = task.NewMockScheduler(suite.T()) - suite.controller = NewCheckerController(suite.meta, suite.dist, suite.targetManager, suite.balancer, suite.nodeMgr, suite.scheduler, suite.broker) + + suite.controller = NewCheckerController(suite.meta, suite.dist, suite.targetManager, suite.nodeMgr, suite.scheduler, suite.broker, func() balance.Balance { return suite.balancer }) } func (s *ControllerBaseTestSuite) TestActivation() { diff --git a/internal/querycoordv2/checkers/controller_test.go b/internal/querycoordv2/checkers/controller_test.go index b69ab9c10f620..c04f4ecaea179 100644 --- a/internal/querycoordv2/checkers/controller_test.go +++ b/internal/querycoordv2/checkers/controller_test.go @@ -81,7 +81,7 @@ func (suite *CheckerControllerSuite) SetupTest() { suite.balancer = balance.NewMockBalancer(suite.T()) suite.scheduler = task.NewMockScheduler(suite.T()) - suite.controller = NewCheckerController(suite.meta, suite.dist, suite.targetManager, suite.balancer, suite.nodeMgr, suite.scheduler, suite.broker) + suite.controller = NewCheckerController(suite.meta, suite.dist, suite.targetManager, suite.nodeMgr, suite.scheduler, suite.broker, func() balance.Balance { return suite.balancer }) } func (suite *CheckerControllerSuite) TestBasic() { diff --git a/internal/querycoordv2/checkers/segment_checker.go b/internal/querycoordv2/checkers/segment_checker.go index 1c85aef177df3..bcdfdb3f45cf4 100644 --- a/internal/querycoordv2/checkers/segment_checker.go +++ b/internal/querycoordv2/checkers/segment_checker.go @@ -41,27 +41,27 @@ const initialTargetVersion = int64(0) type SegmentChecker struct { *checkerActivation - meta *meta.Meta - dist *meta.DistributionManager - targetMgr *meta.TargetManager - balancer balance.Balance - nodeMgr *session.NodeManager + meta *meta.Meta + dist *meta.DistributionManager + targetMgr *meta.TargetManager + nodeMgr *session.NodeManager + getBalancerFunc GetBalancerFunc } func NewSegmentChecker( meta *meta.Meta, dist *meta.DistributionManager, targetMgr *meta.TargetManager, - balancer balance.Balance, nodeMgr *session.NodeManager, + getBalancerFunc GetBalancerFunc, ) *SegmentChecker { return &SegmentChecker{ checkerActivation: newCheckerActivation(), meta: meta, dist: dist, targetMgr: targetMgr, - balancer: balancer, nodeMgr: nodeMgr, + getBalancerFunc: getBalancerFunc, } } @@ -403,7 +403,7 @@ func (c *SegmentChecker) createSegmentLoadTasks(ctx context.Context, segments [] SegmentInfo: s, } }) - shardPlans := c.balancer.AssignSegment(replica.GetCollectionID(), segmentInfos, rwNodes, false) + shardPlans := c.getBalancerFunc().AssignSegment(replica.GetCollectionID(), segmentInfos, rwNodes, false) for i := range shardPlans { shardPlans[i].Replica = replica } diff --git a/internal/querycoordv2/checkers/segment_checker_test.go b/internal/querycoordv2/checkers/segment_checker_test.go index 88861f6d060a8..c6fdd03440398 100644 --- a/internal/querycoordv2/checkers/segment_checker_test.go +++ b/internal/querycoordv2/checkers/segment_checker_test.go @@ -77,7 +77,7 @@ func (suite *SegmentCheckerTestSuite) SetupTest() { targetManager := meta.NewTargetManager(suite.broker, suite.meta) balancer := suite.createMockBalancer() - suite.checker = NewSegmentChecker(suite.meta, distManager, targetManager, balancer, suite.nodeMgr) + suite.checker = NewSegmentChecker(suite.meta, distManager, targetManager, suite.nodeMgr, func() balance.Balance { return balancer }) suite.broker.EXPECT().GetPartitions(mock.Anything, int64(1)).Return([]int64{1}, nil).Maybe() } diff --git a/internal/querycoordv2/handlers.go b/internal/querycoordv2/handlers.go index e3387ae6b785a..13fa55008d0a1 100644 --- a/internal/querycoordv2/handlers.go +++ b/internal/querycoordv2/handlers.go @@ -99,7 +99,7 @@ func (s *Server) balanceSegments(ctx context.Context, copyMode bool, ) error { log := log.Ctx(ctx).With(zap.Int64("collectionID", collectionID), zap.Int64("srcNode", srcNode)) - plans := s.balancer.AssignSegment(collectionID, segments, dstNodes, true) + plans := s.getBalancerFunc().AssignSegment(collectionID, segments, dstNodes, true) for i := range plans { plans[i].From = srcNode plans[i].Replica = replica @@ -175,7 +175,7 @@ func (s *Server) balanceChannels(ctx context.Context, ) error { log := log.Ctx(ctx).With(zap.Int64("collectionID", collectionID)) - plans := s.balancer.AssignChannel(channels, dstNodes, true) + plans := s.getBalancerFunc().AssignChannel(channels, dstNodes, true) for i := range plans { plans[i].From = srcNode plans[i].Replica = replica diff --git a/internal/querycoordv2/ops_service_test.go b/internal/querycoordv2/ops_service_test.go index 509ba091e9865..c9d062d631a30 100644 --- a/internal/querycoordv2/ops_service_test.go +++ b/internal/querycoordv2/ops_service_test.go @@ -121,7 +121,7 @@ func (suite *OpsServiceSuite) SetupTest() { suite.distController = dist.NewMockController(suite.T()) suite.checkerController = checkers.NewCheckerController(suite.meta, suite.distMgr, - suite.targetMgr, suite.balancer, suite.nodeMgr, suite.taskScheduler, suite.broker) + suite.targetMgr, suite.nodeMgr, suite.taskScheduler, suite.broker, func() balance.Balance { return suite.balancer }) suite.server = &Server{ kv: suite.kv, @@ -137,7 +137,7 @@ func (suite *OpsServiceSuite) SetupTest() { cluster: suite.cluster, jobScheduler: suite.jobScheduler, taskScheduler: suite.taskScheduler, - balancer: suite.balancer, + getBalancerFunc: func() balance.Balance { return suite.balancer }, distController: suite.distController, ctx: context.Background(), checkerController: suite.checkerController, diff --git a/internal/querycoordv2/server.go b/internal/querycoordv2/server.go index d115c4ceb7cf7..0504c5f6682db 100644 --- a/internal/querycoordv2/server.go +++ b/internal/querycoordv2/server.go @@ -115,7 +115,9 @@ type Server struct { resourceObserver *observers.ResourceObserver leaderCacheObserver *observers.LeaderCacheObserver - balancer balance.Balance + getBalancerFunc checkers.GetBalancerFunc + balancerMap map[string]balance.Balance + balancerLock sync.RWMutex // Active-standby enableActiveStandBy bool @@ -137,6 +139,7 @@ func NewQueryCoord(ctx context.Context) (*Server, error) { cancel: cancel, nodeUpEventChan: make(chan int64, 10240), notifyNodeUp: make(chan struct{}), + balancerMap: make(map[string]balance.Balance), } server.UpdateStateCode(commonpb.StateCode_Abnormal) server.queryNodeCreator = session.DefaultQueryNodeCreator @@ -287,34 +290,46 @@ func (s *Server) initQueryCoord() error { s.taskScheduler, ) - // Init balancer map and balancer - log.Info("init balancer") - switch params.Params.QueryCoordCfg.Balancer.GetValue() { - case meta.RoundRobinBalancerName: - s.balancer = balance.NewRoundRobinBalancer(s.taskScheduler, s.nodeMgr) - case meta.RowCountBasedBalancerName: - s.balancer = balance.NewRowCountBasedBalancer(s.taskScheduler, s.nodeMgr, s.dist, s.meta, s.targetMgr) - case meta.ScoreBasedBalancerName: - s.balancer = balance.NewScoreBasedBalancer(s.taskScheduler, s.nodeMgr, s.dist, s.meta, s.targetMgr) - case meta.MultiTargetBalancerName: - s.balancer = balance.NewMultiTargetBalancer(s.taskScheduler, s.nodeMgr, s.dist, s.meta, s.targetMgr) - case meta.ChannelLevelScoreBalancerName: - s.balancer = balance.NewChannelLevelScoreBalancer(s.taskScheduler, s.nodeMgr, s.dist, s.meta, s.targetMgr) - default: - log.Info(fmt.Sprintf("default to use %s", meta.ScoreBasedBalancerName)) - s.balancer = balance.NewScoreBasedBalancer(s.taskScheduler, s.nodeMgr, s.dist, s.meta, s.targetMgr) - } - // Init checker controller log.Info("init checker controller") + s.getBalancerFunc = func() balance.Balance { + balanceKey := paramtable.Get().QueryCoordCfg.Balancer.GetValue() + s.balancerLock.Lock() + defer s.balancerLock.Unlock() + + balancer, ok := s.balancerMap[balanceKey] + if ok { + return balancer + } + + log.Info("switch to new balancer", zap.String("name", balanceKey)) + switch balanceKey { + case meta.RoundRobinBalancerName: + balancer = balance.NewRoundRobinBalancer(s.taskScheduler, s.nodeMgr) + case meta.RowCountBasedBalancerName: + balancer = balance.NewRowCountBasedBalancer(s.taskScheduler, s.nodeMgr, s.dist, s.meta, s.targetMgr) + case meta.ScoreBasedBalancerName: + balancer = balance.NewScoreBasedBalancer(s.taskScheduler, s.nodeMgr, s.dist, s.meta, s.targetMgr) + case meta.MultiTargetBalancerName: + balancer = balance.NewMultiTargetBalancer(s.taskScheduler, s.nodeMgr, s.dist, s.meta, s.targetMgr) + case meta.ChannelLevelScoreBalancerName: + balancer = balance.NewChannelLevelScoreBalancer(s.taskScheduler, s.nodeMgr, s.dist, s.meta, s.targetMgr) + default: + log.Info(fmt.Sprintf("default to use %s", meta.ScoreBasedBalancerName)) + balancer = balance.NewScoreBasedBalancer(s.taskScheduler, s.nodeMgr, s.dist, s.meta, s.targetMgr) + } + + s.balancerMap[balanceKey] = balancer + return balancer + } s.checkerController = checkers.NewCheckerController( s.meta, s.dist, s.targetMgr, - s.balancer, s.nodeMgr, s.taskScheduler, s.broker, + s.getBalancerFunc, ) // Init observers diff --git a/internal/querycoordv2/server_test.go b/internal/querycoordv2/server_test.go index c3be55e29f1b8..f71172fd89394 100644 --- a/internal/querycoordv2/server_test.go +++ b/internal/querycoordv2/server_test.go @@ -567,10 +567,10 @@ func (suite *ServerSuite) hackServer() { suite.server.meta, suite.server.dist, suite.server.targetMgr, - suite.server.balancer, suite.server.nodeMgr, suite.server.taskScheduler, suite.server.broker, + suite.server.getBalancerFunc, ) suite.server.targetObserver = observers.NewTargetObserver( suite.server.meta, diff --git a/internal/querycoordv2/services_test.go b/internal/querycoordv2/services_test.go index 9c486a26b9c81..744004fd8f074 100644 --- a/internal/querycoordv2/services_test.go +++ b/internal/querycoordv2/services_test.go @@ -201,7 +201,7 @@ func (suite *ServiceSuite) SetupTest() { cluster: suite.cluster, jobScheduler: suite.jobScheduler, taskScheduler: suite.taskScheduler, - balancer: suite.balancer, + getBalancerFunc: func() balance.Balance { return suite.balancer }, distController: suite.distController, ctx: context.Background(), } From f336b2d67224dd1d3d60257348b795df3664f6de Mon Sep 17 00:00:00 2001 From: congqixia Date: Tue, 21 May 2024 14:33:39 +0800 Subject: [PATCH 013/126] fix: Check schema without vector field in proxy (#33211) Related to #33199 Signed-off-by: Congqi Xia --- internal/proxy/task.go | 7 ++++++- internal/proxy/task_test.go | 19 +++++++++++++++++++ 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/internal/proxy/task.go b/internal/proxy/task.go index 4167a04ce3fce..7aa1457fd1151 100644 --- a/internal/proxy/task.go +++ b/internal/proxy/task.go @@ -293,10 +293,15 @@ func (t *createCollectionTask) PreExecute(ctx context.Context) error { return fmt.Errorf("maximum field's number should be limited to %d", Params.ProxyCfg.MaxFieldNum.GetAsInt()) } - if len(typeutil.GetVectorFieldSchemas(t.schema)) > Params.ProxyCfg.MaxVectorFieldNum.GetAsInt() { + vectorFields := len(typeutil.GetVectorFieldSchemas(t.schema)) + if vectorFields > Params.ProxyCfg.MaxVectorFieldNum.GetAsInt() { return fmt.Errorf("maximum vector field's number should be limited to %d", Params.ProxyCfg.MaxVectorFieldNum.GetAsInt()) } + if vectorFields == 0 { + return merr.WrapErrParameterInvalidMsg("schema does not contain vector field") + } + // validate collection name if err := validateCollectionName(t.schema.Name); err != nil { return err diff --git a/internal/proxy/task_test.go b/internal/proxy/task_test.go index 161f5b1bf9f11..33fb1bfaed225 100644 --- a/internal/proxy/task_test.go +++ b/internal/proxy/task_test.go @@ -754,6 +754,25 @@ func TestCreateCollectionTask(t *testing.T) { err = task.PreExecute(ctx) assert.Error(t, err) + // without vector field + schema = &schemapb.CollectionSchema{ + Name: collectionName, + Description: "", + AutoID: false, + Fields: []*schemapb.FieldSchema{ + { + Name: "id", + DataType: schemapb.DataType_Int64, + IsPrimaryKey: true, + }, + }, + } + noVectorSchema, err := proto.Marshal(schema) + assert.NoError(t, err) + task.CreateCollectionRequest.Schema = noVectorSchema + err = task.PreExecute(ctx) + assert.Error(t, err) + task.CreateCollectionRequest = reqBackup // validateCollectionName From 2d6f12d48b998104fdf1f411d8a71e3442dd3182 Mon Sep 17 00:00:00 2001 From: XuanYang-cn Date: Tue, 21 May 2024 14:35:39 +0800 Subject: [PATCH 014/126] fix: channel manager's goroutine run order (#33118) See also: #33117 --------- Signed-off-by: yangxuan --- internal/datanode/channel_manager.go | 47 +++++++++++--------- internal/datanode/channel_manager_test.go | 52 ++++++++++++++--------- 2 files changed, 58 insertions(+), 41 deletions(-) diff --git a/internal/datanode/channel_manager.go b/internal/datanode/channel_manager.go index 97ae15e714656..1fb3e4d4a01eb 100644 --- a/internal/datanode/channel_manager.go +++ b/internal/datanode/channel_manager.go @@ -32,7 +32,10 @@ import ( "github.com/milvus-io/milvus/pkg/util/typeutil" ) -type releaseFunc func(channel string) +type ( + releaseFunc func(channel string) + watchFunc func(ctx context.Context, dn *DataNode, info *datapb.ChannelWatchInfo, tickler *tickler) (*dataSyncService, error) +) type ChannelManager interface { Submit(info *datapb.ChannelWatchInfo) error @@ -206,7 +209,7 @@ func (m *ChannelManagerImpl) handleOpState(opState *opState) { } func (m *ChannelManagerImpl) getOrCreateRunner(channel string) *opRunner { - runner, loaded := m.opRunners.GetOrInsert(channel, NewOpRunner(channel, m.dn, m.releaseFunc, m.communicateCh)) + runner, loaded := m.opRunners.GetOrInsert(channel, NewOpRunner(channel, m.dn, m.releaseFunc, executeWatch, m.communicateCh)) if !loaded { runner.Start() } @@ -228,6 +231,7 @@ type opRunner struct { channel string dn *DataNode releaseFunc releaseFunc + watchFunc watchFunc guard sync.RWMutex allOps map[UniqueID]*opInfo // opID -> tickler @@ -238,11 +242,12 @@ type opRunner struct { closeWg sync.WaitGroup } -func NewOpRunner(channel string, dn *DataNode, f releaseFunc, resultCh chan *opState) *opRunner { +func NewOpRunner(channel string, dn *DataNode, releaseF releaseFunc, watchF watchFunc, resultCh chan *opState) *opRunner { return &opRunner{ channel: channel, dn: dn, - releaseFunc: f, + releaseFunc: releaseF, + watchFunc: watchF, opsInQueue: make(chan *datapb.ChannelWatchInfo, 10), allOps: make(map[UniqueID]*opInfo), resultCh: resultCh, @@ -333,16 +338,16 @@ func (r *opRunner) watchWithTimer(info *datapb.ChannelWatchInfo) *opState { opInfo.tickler = tickler var ( - successSig = make(chan struct{}, 1) - waiter sync.WaitGroup + successSig = make(chan struct{}, 1) + finishWaiter sync.WaitGroup ) watchTimeout := Params.DataCoordCfg.WatchTimeoutInterval.GetAsDuration(time.Second) ctx, cancel := context.WithTimeout(context.Background(), watchTimeout) defer cancel() - startTimer := func(wg *sync.WaitGroup) { - defer wg.Done() + startTimer := func(finishWg *sync.WaitGroup) { + defer finishWg.Done() timer := time.NewTimer(watchTimeout) defer timer.Stop() @@ -377,11 +382,12 @@ func (r *opRunner) watchWithTimer(info *datapb.ChannelWatchInfo) *opState { } } - waiter.Add(2) - go startTimer(&waiter) + finishWaiter.Add(2) + go startTimer(&finishWaiter) + go func() { - defer waiter.Done() - fg, err := executeWatch(ctx, r.dn, info, tickler) + defer finishWaiter.Done() + fg, err := r.watchFunc(ctx, r.dn, info, tickler) if err != nil { opState.state = datapb.ChannelWatchState_WatchFailure } else { @@ -391,7 +397,7 @@ func (r *opRunner) watchWithTimer(info *datapb.ChannelWatchInfo) *opState { } }() - waiter.Wait() + finishWaiter.Wait() return opState } @@ -402,13 +408,14 @@ func (r *opRunner) releaseWithTimer(releaseFunc releaseFunc, channel string, opI opID: opID, } var ( - successSig = make(chan struct{}, 1) - waiter sync.WaitGroup + successSig = make(chan struct{}, 1) + finishWaiter sync.WaitGroup ) log := log.With(zap.Int64("opID", opID), zap.String("channel", channel)) - startTimer := func(wg *sync.WaitGroup) { - defer wg.Done() + startTimer := func(finishWaiter *sync.WaitGroup) { + defer finishWaiter.Done() + releaseTimeout := Params.DataCoordCfg.WatchTimeoutInterval.GetAsDuration(time.Second) timer := time.NewTimer(releaseTimeout) defer timer.Stop() @@ -435,8 +442,8 @@ func (r *opRunner) releaseWithTimer(releaseFunc releaseFunc, channel string, opI } } - waiter.Add(1) - go startTimer(&waiter) + finishWaiter.Add(1) + go startTimer(&finishWaiter) go func() { // TODO: failure should panic this DN, but we're not sure how // to recover when releaseFunc stuck. @@ -450,7 +457,7 @@ func (r *opRunner) releaseWithTimer(releaseFunc releaseFunc, channel string, opI successSig <- struct{}{} }() - waiter.Wait() + finishWaiter.Wait() return opState } diff --git a/internal/datanode/channel_manager_test.go b/internal/datanode/channel_manager_test.go index 85c13d7fe9be4..0dad91c14c786 100644 --- a/internal/datanode/channel_manager_test.go +++ b/internal/datanode/channel_manager_test.go @@ -20,6 +20,7 @@ import ( "context" "testing" + "github.com/cockroachdb/errors" "github.com/stretchr/testify/suite" "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" @@ -56,7 +57,7 @@ func (s *OpRunnerSuite) TestWatchWithTimer() { mockReleaseFunc := func(channel string) { log.Info("mock release func") } - runner := NewOpRunner(channel, s.node, mockReleaseFunc, commuCh) + runner := NewOpRunner(channel, s.node, mockReleaseFunc, executeWatch, commuCh) err := runner.Enqueue(info) s.Require().NoError(err) @@ -67,6 +68,35 @@ func (s *OpRunnerSuite) TestWatchWithTimer() { runner.FinishOp(100) } +func (s *OpRunnerSuite) TestWatchTimeout() { + channel := "by-dev-rootcoord-dml-1000" + paramtable.Get().Save(Params.DataCoordCfg.WatchTimeoutInterval.Key, "0.000001") + defer paramtable.Get().Reset(Params.DataCoordCfg.WatchTimeoutInterval.Key) + info := getWatchInfoByOpID(100, channel, datapb.ChannelWatchState_ToWatch) + + sig := make(chan struct{}) + commuCh := make(chan *opState) + + mockReleaseFunc := func(channel string) { log.Info("mock release func") } + mockWatchFunc := func(ctx context.Context, dn *DataNode, info *datapb.ChannelWatchInfo, tickler *tickler) (*dataSyncService, error) { + <-ctx.Done() + sig <- struct{}{} + return nil, errors.New("timeout") + } + + runner := NewOpRunner(channel, s.node, mockReleaseFunc, mockWatchFunc, commuCh) + runner.Start() + defer runner.Close() + err := runner.Enqueue(info) + s.Require().NoError(err) + + <-sig + opState := <-commuCh + s.Require().NotNil(opState) + s.Equal(info.GetOpID(), opState.opID) + s.Equal(datapb.ChannelWatchState_WatchFailure, opState.state) +} + type OpRunnerSuite struct { suite.Suite node *DataNode @@ -126,26 +156,6 @@ func (s *ChannelManagerSuite) TearDownTest() { } } -func (s *ChannelManagerSuite) TestWatchFail() { - channel := "by-dev-rootcoord-dml-2" - paramtable.Get().Save(Params.DataCoordCfg.WatchTimeoutInterval.Key, "0.000001") - defer paramtable.Get().Reset(Params.DataCoordCfg.WatchTimeoutInterval.Key) - info := getWatchInfoByOpID(100, channel, datapb.ChannelWatchState_ToWatch) - s.Require().Equal(0, s.manager.opRunners.Len()) - err := s.manager.Submit(info) - s.Require().NoError(err) - - opState := <-s.manager.communicateCh - s.Require().NotNil(opState) - s.Equal(info.GetOpID(), opState.opID) - s.Equal(datapb.ChannelWatchState_WatchFailure, opState.state) - - s.manager.handleOpState(opState) - - resp := s.manager.GetProgress(info) - s.Equal(datapb.ChannelWatchState_WatchFailure, resp.GetState()) -} - func (s *ChannelManagerSuite) TestReleaseStuck() { var ( channel = "by-dev-rootcoord-dml-2" From cb480d17c88e00ae869f4316487e37694369439b Mon Sep 17 00:00:00 2001 From: Cai Yudong Date: Tue, 21 May 2024 15:09:39 +0800 Subject: [PATCH 015/126] fix: Fix SparseFloatVector data parse error for parquet (#33187) Issue: #22837 Signed-off-by: Cai Yudong --- .../util/importutilv2/parquet/field_reader.go | 2 +- pkg/util/typeutil/schema.go | 33 +++- pkg/util/typeutil/schema_test.go | 144 +++++++++++++++++- 3 files changed, 165 insertions(+), 14 deletions(-) diff --git a/internal/util/importutilv2/parquet/field_reader.go b/internal/util/importutilv2/parquet/field_reader.go index 282366fff993d..707bdade50c1e 100644 --- a/internal/util/importutilv2/parquet/field_reader.go +++ b/internal/util/importutilv2/parquet/field_reader.go @@ -291,7 +291,7 @@ func ReadSparseFloatVectorData(pcr *FieldReader, count int64) (any, error) { for _, str := range data.([]string) { rowVec, err := typeutil.CreateSparseFloatRowFromJSON([]byte(str)) if err != nil { - return nil, merr.WrapErrImportFailed(fmt.Sprintf("Invalid JSON string for SparseFloatVector: '%s'", str)) + return nil, merr.WrapErrImportFailed(fmt.Sprintf("Invalid JSON string for SparseFloatVector: '%s', err = %v", str, err)) } byteArr = append(byteArr, rowVec) elemCount := len(rowVec) / 8 diff --git a/pkg/util/typeutil/schema.go b/pkg/util/typeutil/schema.go index 8277ccbe438a1..272447981a5ec 100644 --- a/pkg/util/typeutil/schema.go +++ b/pkg/util/typeutil/schema.go @@ -1550,12 +1550,31 @@ func CreateSparseFloatRowFromMap(input map[string]interface{}) ([]byte, error) { return nil, fmt.Errorf("empty JSON input") } - // try format1 - indices, ok1 := input["indices"].([]uint32) - values, ok2 := input["values"].([]float32) - - // try format2 - if !ok1 && !ok2 { + jsonIndices, ok1 := input["indices"].([]interface{}) + jsonValues, ok2 := input["values"].([]interface{}) + + if ok1 && ok2 { + // try format1 + for _, v1 := range jsonIndices { + if num1, suc1 := v1.(int); suc1 { + indices = append(indices, uint32(num1)) + } else { + if num2, suc2 := v1.(float64); suc2 && num2 == float64(int(num2)) { + indices = append(indices, uint32(num2)) + } else { + return nil, fmt.Errorf("invalid index type: %v(%s)", v1, reflect.TypeOf(v1)) + } + } + } + for _, v2 := range jsonValues { + if num, ok := v2.(float64); ok { + values = append(values, float32(num)) + } else { + return nil, fmt.Errorf("invalid value type: %s", reflect.TypeOf(v2)) + } + } + } else if !ok1 && !ok2 { + // try format2 for k, v := range input { idx, err := strconv.ParseUint(k, 0, 32) if err != nil { @@ -1578,7 +1597,7 @@ func CreateSparseFloatRowFromMap(input map[string]interface{}) ([]byte, error) { indices = append(indices, uint32(idx)) values = append(values, float32(val)) } - } else if ok1 != ok2 { + } else { return nil, fmt.Errorf("invalid JSON input") } diff --git a/pkg/util/typeutil/schema_test.go b/pkg/util/typeutil/schema_test.go index 67601a719d9e9..b1e5ec4b835f1 100644 --- a/pkg/util/typeutil/schema_test.go +++ b/pkg/util/typeutil/schema_test.go @@ -2120,39 +2120,45 @@ func TestValidateSparseFloatRows(t *testing.T) { func TestParseJsonSparseFloatRow(t *testing.T) { t.Run("valid row 1", func(t *testing.T) { - row := map[string]interface{}{"indices": []uint32{1, 3, 5}, "values": []float32{1.0, 2.0, 3.0}} + row := map[string]interface{}{"indices": []interface{}{1, 3, 5}, "values": []interface{}{1.0, 2.0, 3.0}} res, err := CreateSparseFloatRowFromMap(row) assert.NoError(t, err) assert.Equal(t, CreateSparseFloatRow([]uint32{1, 3, 5}, []float32{1.0, 2.0, 3.0}), res) }) t.Run("valid row 2", func(t *testing.T) { - row := map[string]interface{}{"indices": []uint32{3, 1, 5}, "values": []float32{1.0, 2.0, 3.0}} + row := map[string]interface{}{"indices": []interface{}{3, 1, 5}, "values": []interface{}{1.0, 2.0, 3.0}} res, err := CreateSparseFloatRowFromMap(row) assert.NoError(t, err) assert.Equal(t, CreateSparseFloatRow([]uint32{1, 3, 5}, []float32{2.0, 1.0, 3.0}), res) }) t.Run("invalid row 1", func(t *testing.T) { - row := map[string]interface{}{"indices": []uint32{1, 3, 5}, "values": []float32{1.0, 2.0}} + row := map[string]interface{}{"indices": []interface{}{1, 3, 5}, "values": []interface{}{1.0, 2.0}} _, err := CreateSparseFloatRowFromMap(row) assert.Error(t, err) }) t.Run("invalid row 2", func(t *testing.T) { - row := map[string]interface{}{"indices": []uint32{1}, "values": []float32{1.0, 2.0}} + row := map[string]interface{}{"indices": []interface{}{1}, "values": []interface{}{1.0, 2.0}} _, err := CreateSparseFloatRowFromMap(row) assert.Error(t, err) }) t.Run("invalid row 3", func(t *testing.T) { - row := map[string]interface{}{"indices": []uint32{}, "values": []float32{}} + row := map[string]interface{}{"indices": []interface{}{}, "values": []interface{}{}} _, err := CreateSparseFloatRowFromMap(row) assert.Error(t, err) }) t.Run("invalid row 4", func(t *testing.T) { - row := map[string]interface{}{"indices": []uint32{3}, "values": []float32{-0.2}} + row := map[string]interface{}{"indices": []interface{}{3}, "values": []interface{}{-0.2}} + _, err := CreateSparseFloatRowFromMap(row) + assert.Error(t, err) + }) + + t.Run("invalid row 5", func(t *testing.T) { + row := map[string]interface{}{"indices": []interface{}{3.1}, "values": []interface{}{0.2}} _, err := CreateSparseFloatRowFromMap(row) assert.Error(t, err) }) @@ -2206,4 +2212,130 @@ func TestParseJsonSparseFloatRow(t *testing.T) { _, err := CreateSparseFloatRowFromMap(row) assert.Error(t, err) }) + + t.Run("invalid dict row 7", func(t *testing.T) { + row := map[string]interface{}{"1.1": 1.0, "3": 2.0, "5": 3.0} + _, err := CreateSparseFloatRowFromMap(row) + assert.Error(t, err) + }) +} + +func TestParseJsonSparseFloatRowBytes(t *testing.T) { + t.Run("valid row 1", func(t *testing.T) { + row := []byte(`{"indices":[1,3,5],"values":[1.0,2.0,3.0]}`) + res, err := CreateSparseFloatRowFromJSON(row) + assert.NoError(t, err) + assert.Equal(t, CreateSparseFloatRow([]uint32{1, 3, 5}, []float32{1.0, 2.0, 3.0}), res) + }) + + t.Run("valid row 2", func(t *testing.T) { + row := []byte(`{"indices":[3,1,5],"values":[1.0,2.0,3.0]}`) + res, err := CreateSparseFloatRowFromJSON(row) + assert.NoError(t, err) + assert.Equal(t, CreateSparseFloatRow([]uint32{1, 3, 5}, []float32{2.0, 1.0, 3.0}), res) + }) + + t.Run("invalid row 1", func(t *testing.T) { + row := []byte(`{"indices":[1,3,5],"values":[1.0,2.0,3.0`) + _, err := CreateSparseFloatRowFromJSON(row) + assert.Error(t, err) + }) + + t.Run("invalid row 2", func(t *testing.T) { + row := []byte(`{"indices":[1,3,5],"values":[1.0,2.0]`) + _, err := CreateSparseFloatRowFromJSON(row) + assert.Error(t, err) + }) + + t.Run("invalid row 3", func(t *testing.T) { + row := []byte(`{"indices":[1],"values":[1.0,2.0]`) + _, err := CreateSparseFloatRowFromJSON(row) + assert.Error(t, err) + }) + + t.Run("invalid row 4", func(t *testing.T) { + row := []byte(`{"indices":[],"values":[]`) + _, err := CreateSparseFloatRowFromJSON(row) + assert.Error(t, err) + }) + + t.Run("invalid row 5", func(t *testing.T) { + row := []byte(`{"indices":[-3],"values":[0.2]`) + _, err := CreateSparseFloatRowFromJSON(row) + assert.Error(t, err) + }) + + t.Run("invalid row 6", func(t *testing.T) { + row := []byte(`{"indices":[3],"values":[-0.2]`) + _, err := CreateSparseFloatRowFromJSON(row) + assert.Error(t, err) + }) + + t.Run("invalid row 7", func(t *testing.T) { + row := []byte(`{"indices": []interface{}{3.1}, "values": []interface{}{0.2}}`) + _, err := CreateSparseFloatRowFromJSON(row) + assert.Error(t, err) + }) + + t.Run("valid dict row 1", func(t *testing.T) { + row := []byte(`{"1": 1.0, "3": 2.0, "5": 3.0}`) + res, err := CreateSparseFloatRowFromJSON(row) + assert.NoError(t, err) + assert.Equal(t, CreateSparseFloatRow([]uint32{1, 3, 5}, []float32{1.0, 2.0, 3.0}), res) + }) + + t.Run("valid dict row 2", func(t *testing.T) { + row := []byte(`{"3": 1.0, "1": 2.0, "5": 3.0}`) + res, err := CreateSparseFloatRowFromJSON(row) + assert.NoError(t, err) + assert.Equal(t, CreateSparseFloatRow([]uint32{1, 3, 5}, []float32{2.0, 1.0, 3.0}), res) + }) + + t.Run("invalid dict row 1", func(t *testing.T) { + row := []byte(`{"a": 1.0, "3": 2.0, "5": 3.0}`) + _, err := CreateSparseFloatRowFromJSON(row) + assert.Error(t, err) + }) + + t.Run("invalid dict row 2", func(t *testing.T) { + row := []byte(`{"1": "a", "3": 2.0, "5": 3.0}`) + _, err := CreateSparseFloatRowFromJSON(row) + assert.Error(t, err) + }) + + t.Run("invalid dict row 3", func(t *testing.T) { + row := []byte(`{"1": "1.0", "3": 2.0, "5": 3.0}`) + _, err := CreateSparseFloatRowFromJSON(row) + assert.Error(t, err) + }) + + t.Run("invalid dict row 4", func(t *testing.T) { + row := []byte(`{"1": 1.0, "3": 2.0, "5": }`) + _, err := CreateSparseFloatRowFromJSON(row) + assert.Error(t, err) + }) + + t.Run("invalid dict row 5", func(t *testing.T) { + row := []byte(`{"-1": 1.0, "3": 2.0, "5": 3.0}`) + _, err := CreateSparseFloatRowFromJSON(row) + assert.Error(t, err) + }) + + t.Run("invalid dict row 6", func(t *testing.T) { + row := []byte(`{"1": -1.0, "3": 2.0, "5": 3.0}`) + _, err := CreateSparseFloatRowFromJSON(row) + assert.Error(t, err) + }) + + t.Run("invalid dict row 7", func(t *testing.T) { + row := []byte(`{}`) + _, err := CreateSparseFloatRowFromJSON(row) + assert.Error(t, err) + }) + + t.Run("invalid dict row 8", func(t *testing.T) { + row := []byte(`{"1.1": 1.0, "3": 2.0, "5": 3.0}`) + _, err := CreateSparseFloatRowFromJSON(row) + assert.Error(t, err) + }) } From 33bd6eed28b4f6157d408c25e3c9eef41c00318d Mon Sep 17 00:00:00 2001 From: wei liu Date: Tue, 21 May 2024 15:41:39 +0800 Subject: [PATCH 016/126] fix: Clean offline node from replica after qc recover (#33213) issue: #33200 #33207 pr#33104 remove this logic by mistake, which cause the offline node will be kept in replica after qc recover, and request send to offline qn will go a NodeNotFound error. Signed-off-by: Wei Liu --- internal/querycoordv2/server.go | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/internal/querycoordv2/server.go b/internal/querycoordv2/server.go index 0504c5f6682db..0f54d5e2f8da2 100644 --- a/internal/querycoordv2/server.go +++ b/internal/querycoordv2/server.go @@ -456,6 +456,7 @@ func (s *Server) startQueryCoord() error { s.nodeMgr.Stopping(node.ServerID) } } + s.checkReplicas() for _, node := range sessions { s.handleNodeUp(node.ServerID) } @@ -777,6 +778,33 @@ func (s *Server) handleNodeDown(node int64) { s.meta.ResourceManager.HandleNodeDown(node) } +// checkReplicas checks whether replica contains offline node, and remove those nodes +func (s *Server) checkReplicas() { + for _, collection := range s.meta.CollectionManager.GetAll() { + log := log.With(zap.Int64("collectionID", collection)) + replicas := s.meta.ReplicaManager.GetByCollection(collection) + for _, replica := range replicas { + toRemove := make([]int64, 0) + for _, node := range replica.GetNodes() { + if s.nodeMgr.Get(node) == nil { + toRemove = append(toRemove, node) + } + } + + if len(toRemove) > 0 { + log := log.With( + zap.Int64("replicaID", replica.GetID()), + zap.Int64s("offlineNodes", toRemove), + ) + log.Info("some nodes are offline, remove them from replica", zap.Any("toRemove", toRemove)) + if err := s.meta.ReplicaManager.RemoveNode(replica.GetID(), toRemove...); err != nil { + log.Warn("failed to remove offline nodes from replica") + } + } + } + } +} + func (s *Server) updateBalanceConfigLoop(ctx context.Context) { success := s.updateBalanceConfig() if success { From 017fd7bc25de61734bb2af39e4edecf35e522d82 Mon Sep 17 00:00:00 2001 From: "yihao.dai" Date: Tue, 21 May 2024 16:13:39 +0800 Subject: [PATCH 017/126] enhance: Select L2 segments in L0Compaction as well (#32991) /kind improvement Signed-off-by: bigsheeper --- internal/datacoord/compaction.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/internal/datacoord/compaction.go b/internal/datacoord/compaction.go index d4d7a0ef82751..87b3fdbe2cd7c 100644 --- a/internal/datacoord/compaction.go +++ b/internal/datacoord/compaction.go @@ -322,7 +322,6 @@ func (c *compactionPlanHandler) RefreshPlan(task *compactionTask) error { // Select sealed L1 segments for LevelZero compaction that meets the condition: // dmlPos < triggerInfo.pos - // TODO: select L2 segments too sealedSegments := c.meta.SelectSegments(WithCollection(task.triggerInfo.collectionID), SegmentFilterFunc(func(info *SegmentInfo) bool { return (task.triggerInfo.partitionID == -1 || info.GetPartitionID() == task.triggerInfo.partitionID) && info.GetInsertChannel() == plan.GetChannel() && @@ -339,7 +338,7 @@ func (c *compactionPlanHandler) RefreshPlan(task *compactionTask) error { sealedSegBinlogs := lo.Map(sealedSegments, func(info *SegmentInfo, _ int) *datapb.CompactionSegmentBinlogs { return &datapb.CompactionSegmentBinlogs{ SegmentID: info.GetID(), - Level: datapb.SegmentLevel_L1, + Level: info.GetLevel(), CollectionID: info.GetCollectionID(), PartitionID: info.GetPartitionID(), } From 7ab7e3a0048a087cccc7b7ace6f1af831968b934 Mon Sep 17 00:00:00 2001 From: "sammy.huang" Date: Tue, 21 May 2024 16:54:38 +0800 Subject: [PATCH 018/126] feat: support arm-based image build and pull request (#33219) Signed-off-by: Liang Huang --- ci/jenkins/PR-Arm.groovy | 324 +++++++++++++++++++++ ci/jenkins/PublishArmBasedGPUImages.groovy | 14 +- ci/jenkins/PublishArmBasedImages.groovy | 93 ++++++ ci/jenkins/pod/rte-arm.yaml | 66 +++++ tests/scripts/values/ci/pr-arm.yaml | 202 +++++++++++++ 5 files changed, 696 insertions(+), 3 deletions(-) create mode 100644 ci/jenkins/PR-Arm.groovy create mode 100644 ci/jenkins/PublishArmBasedImages.groovy create mode 100644 ci/jenkins/pod/rte-arm.yaml create mode 100644 tests/scripts/values/ci/pr-arm.yaml diff --git a/ci/jenkins/PR-Arm.groovy b/ci/jenkins/PR-Arm.groovy new file mode 100644 index 0000000000000..cdf50e1678b27 --- /dev/null +++ b/ci/jenkins/PR-Arm.groovy @@ -0,0 +1,324 @@ +#!/usr/bin/env groovy + +int total_timeout_minutes = 60 * 5 +int e2e_timeout_seconds = 120 * 60 +def imageTag='' +int case_timeout_seconds = 20 * 60 +def chart_version='4.1.28' +pipeline { + options { + timestamps() + timeout(time: total_timeout_minutes, unit: 'MINUTES') + buildDiscarder logRotator(artifactDaysToKeepStr: '30') + parallelsAlwaysFailFast() + preserveStashes(buildCount: 5) + disableConcurrentBuilds(abortPrevious: true) + + } + agent { + kubernetes { + cloud '4am' + defaultContainer 'main' + yamlFile 'ci/jenkins/pod/rte-arm.yaml' + customWorkspace '/home/jenkins/agent/workspace' + } + } + environment { + PROJECT_NAME = 'milvus' + SEMVER = "${BRANCH_NAME.contains('/') ? BRANCH_NAME.substring(BRANCH_NAME.lastIndexOf('/') + 1) : BRANCH_NAME}" + DOCKER_BUILDKIT = 1 + ARTIFACTS = "${env.WORKSPACE}/_artifacts" + CI_DOCKER_CREDENTIAL_ID = "harbor-milvus-io-registry" + MILVUS_HELM_NAMESPACE = "milvus-ci" + DISABLE_KIND = true + HUB = 'harbor.milvus.io/milvus' + JENKINS_BUILD_ID = "${env.BUILD_ID}" + CI_MODE="pr" + SHOW_MILVUS_CONFIGMAP= true + + DOCKER_CREDENTIALS_ID = "dockerhub" + TARGET_REPO = "milvusdb" + HARBOR_REPO = "harbor.milvus.io" + } + + stages { + stage ('Build'){ + steps { + container('main') { + script { + sh 'printenv' + def date = sh(returnStdout: true, script: 'date +%Y%m%d').trim() + sh 'git config --global --add safe.directory /home/jenkins/agent/workspace' + def gitShortCommit = sh(returnStdout: true, script: 'git rev-parse --short HEAD').trim() + imageTag="${env.BRANCH_NAME}-${date}-${gitShortCommit}" + + + sh """ + echo "Building image with tag: ${imageTag}" + + set -a # automatically export all variables from .env + . .env + set +a # stop automatically + + + docker run --net=host -v /root/.conan:/root/.conan -v \$(pwd):/root/milvus -w /root/milvus milvusdb/milvus-env:ubuntu20.04-\${DATE_VERSION} sh -c "make clean && make install" + """ + + withCredentials([usernamePassword(credentialsId: "${env.CI_DOCKER_CREDENTIAL_ID}", usernameVariable: 'CI_REGISTRY_USERNAME', passwordVariable: 'CI_REGISTRY_PASSWORD')]){ + sh "docker login ${env.HARBOR_REPO} -u '${CI_REGISTRY_USERNAME}' -p '${CI_REGISTRY_PASSWORD}'" + sh """ + export MILVUS_HARBOR_IMAGE_REPO="${env.HARBOR_REPO}/milvus/milvus" + export MILVUS_IMAGE_TAG="${imageTag}" + + docker build --build-arg TARGETARCH=arm64 -f "./build/docker/milvus/ubuntu20.04/Dockerfile" -t \${MILVUS_HARBOR_IMAGE_REPO}:\${MILVUS_IMAGE_TAG} . + + docker push \${MILVUS_HARBOR_IMAGE_REPO}:\${MILVUS_IMAGE_TAG} + docker logout + """ + } + + // stash imageTag info for rebuild install & E2E Test only + sh "echo ${imageTag} > imageTag.txt" + stash includes: 'imageTag.txt', name: 'imageTag' + + } + } + } + } + + + stage('Install & E2E Test') { + matrix { + axes { + axis { + name 'MILVUS_SERVER_TYPE' + values 'standalone' + } + axis { + name 'MILVUS_CLIENT' + values 'pymilvus' + } + } + + stages { + stage('Install') { + agent { + kubernetes { + cloud '4am' + inheritFrom 'milvus-e2e-4am' + defaultContainer 'main' + yamlFile 'ci/jenkins/pod/rte-build.yaml' + customWorkspace '/home/jenkins/agent/workspace' + } + } + steps { + container('main') { + stash includes: 'tests/**', name: 'testCode', useDefaultExcludes: false + dir ('tests/scripts') { + script { + sh 'printenv' + def clusterEnabled = "false" + def valuesFile = "pr-arm.yaml" + + if ("${MILVUS_SERVER_TYPE}" == "standalone-one-pod") { + valuesFile = "nightly-one-pod.yaml" + } + + if ("${MILVUS_CLIENT}" == "pymilvus") { + if ("${imageTag}"==''){ + dir ("imageTag"){ + try{ + unstash 'imageTag' + imageTag=sh(returnStdout: true, script: 'cat imageTag.txt | tr -d \'\n\r\'') + }catch(e){ + print "No Image Tag info remained ,please rerun build to build new image." + exit 1 + } + } + } + // modify values file to enable kafka + if ("${MILVUS_SERVER_TYPE}".contains("kafka")) { + sh ''' + apt-get update + apt-get install wget -y + wget https://github.com/mikefarah/yq/releases/download/v4.34.1/yq_linux_amd64 -O /usr/bin/yq + chmod +x /usr/bin/yq + ''' + sh """ + cp values/ci/pr-4am.yaml values/ci/pr_kafka.yaml + yq -i '.pulsar.enabled=false' values/ci/pr_kafka.yaml + yq -i '.kafka.enabled=true' values/ci/pr_kafka.yaml + yq -i '.kafka.metrics.kafka.enabled=true' values/ci/pr_kafka.yaml + yq -i '.kafka.metrics.jmx.enabled=true' values/ci/pr_kafka.yaml + yq -i '.kafka.metrics.serviceMonitor.enabled=true' values/ci/pr_kafka.yaml + """ + } + withCredentials([usernamePassword(credentialsId: "${env.CI_DOCKER_CREDENTIAL_ID}", usernameVariable: 'CI_REGISTRY_USERNAME', passwordVariable: 'CI_REGISTRY_PASSWORD')]){ + if ("${MILVUS_SERVER_TYPE}" == "standalone-one-pod") { + try { + sh """ + MILVUS_CLUSTER_ENABLED=${clusterEnabled} \ + MILVUS_HELM_REPO="https://nexus-ci.zilliz.cc/repository/milvus-proxy" \ + TAG=${imageTag}\ + ./e2e-k8s.sh \ + --skip-export-logs \ + --skip-cleanup \ + --skip-setup \ + --skip-test \ + --skip-build \ + --skip-build-image \ + --install-extra-arg " + --set etcd.metrics.enabled=true \ + --set etcd.metrics.podMonitor.enabled=true \ + --set indexCoordinator.gc.interval=1 \ + --set indexNode.disk.enabled=true \ + --set queryNode.disk.enabled=true \ + --set standalone.disk.enabled=true \ + --version ${chart_version} \ + -f values/ci/${valuesFile}" + """ + } catch (Exception e) { + echo "Tests failed, but the build will not be marked as failed." + } + + }else{ + sh """ + MILVUS_CLUSTER_ENABLED=${clusterEnabled} \ + MILVUS_HELM_REPO="https://nexus-ci.zilliz.cc/repository/milvus-proxy" \ + TAG=${imageTag}\ + ./e2e-k8s.sh \ + --skip-export-logs \ + --skip-cleanup \ + --skip-setup \ + --skip-test \ + --skip-build \ + --skip-build-image \ + --install-extra-arg " + --set etcd.metrics.enabled=true \ + --set etcd.metrics.podMonitor.enabled=true \ + --set indexCoordinator.gc.interval=1 \ + --set indexNode.disk.enabled=true \ + --set queryNode.disk.enabled=true \ + --set standalone.disk.enabled=true \ + --version ${chart_version} \ + -f values/ci/${valuesFile}" + """ + } + } + } else { + error "Error: Unsupported Milvus client: ${MILVUS_CLIENT}" + } + } + } + } + + } + } + stage('E2E Test'){ + options { + skipDefaultCheckout() + } + agent { + kubernetes { + cloud '4am' + inheritFrom 'default' + defaultContainer 'main' + yamlFile 'ci/jenkins/pod/e2e.yaml' + customWorkspace '/home/jenkins/agent/workspace' + } + } + steps { + container('pytest') { + unstash('testCode') + script { + sh 'ls -lah' + } + dir ('tests/scripts') { + script { + def release_name=sh(returnStdout: true, script: './get_release_name.sh') + def clusterEnabled = 'false' + if ("${MILVUS_SERVER_TYPE}".contains("distributed")) { + clusterEnabled = "true" + } + if ("${MILVUS_CLIENT}" == "pymilvus") { + if ("${MILVUS_SERVER_TYPE}" == "standalone-one-pod") { + try { + sh """ + MILVUS_HELM_RELEASE_NAME="${release_name}" \ + MILVUS_HELM_NAMESPACE="milvus-ci" \ + MILVUS_CLUSTER_ENABLED="${clusterEnabled}" \ + TEST_TIMEOUT="${e2e_timeout_seconds}" \ + ./ci_e2e_4am.sh "-n 6 -x --tags L0 L1 --timeout ${case_timeout_seconds}" + """ + } catch (Exception e) { + echo "Tests failed, but the build will not be marked as failed." + } + }else{ + sh """ + MILVUS_HELM_RELEASE_NAME="${release_name}" \ + MILVUS_HELM_NAMESPACE="milvus-ci" \ + MILVUS_CLUSTER_ENABLED="${clusterEnabled}" \ + TEST_TIMEOUT="${e2e_timeout_seconds}" \ + ./ci_e2e_4am.sh "-n 6 -x --tags L0 L1 --timeout ${case_timeout_seconds}" + """ + } + } else { + error "Error: Unsupported Milvus client: ${MILVUS_CLIENT}" + } + } + } + } + } + post{ + always { + container('pytest'){ + dir("${env.ARTIFACTS}") { + sh "tar -zcvf ${PROJECT_NAME}-${MILVUS_SERVER_TYPE}-${MILVUS_CLIENT}-pytest-logs.tar.gz /tmp/ci_logs/test --remove-files || true" + archiveArtifacts artifacts: "${PROJECT_NAME}-${MILVUS_SERVER_TYPE}-${MILVUS_CLIENT}-pytest-logs.tar.gz ", allowEmptyArchive: true + } + } + } + + } + } + } + post{ + always { + container('main') { + dir ('tests/scripts') { + script { + def release_name=sh(returnStdout: true, script: './get_release_name.sh') + sh "kubectl get pods -n ${MILVUS_HELM_NAMESPACE} | grep ${release_name} " + sh "./uninstall_milvus.sh --release-name ${release_name}" + sh "./ci_logs.sh --log-dir /ci-logs --artifacts-name ${env.ARTIFACTS}/artifacts-${PROJECT_NAME}-${MILVUS_SERVER_TYPE}-${SEMVER}-${env.BUILD_NUMBER}-${MILVUS_CLIENT}-e2e-logs \ + --release-name ${release_name}" + dir("${env.ARTIFACTS}") { + archiveArtifacts artifacts: "artifacts-${PROJECT_NAME}-${MILVUS_SERVER_TYPE}-${SEMVER}-${env.BUILD_NUMBER}-${MILVUS_CLIENT}-e2e-logs.tar.gz", allowEmptyArchive: true + } + } + } + } + } + } + + } + + } + } + post{ + unsuccessful { + container('jnlp') { + dir ('tests/scripts') { + script { + def authorEmail = sh(returnStdout: true, script: './get_author_email.sh ') + emailext subject: '$DEFAULT_SUBJECT', + body: '$DEFAULT_CONTENT', + recipientProviders: [developers(), culprits()], + replyTo: '$DEFAULT_REPLYTO', + to: "${authorEmail},devops@zilliz.com" + } + } + } + } + } +} diff --git a/ci/jenkins/PublishArmBasedGPUImages.groovy b/ci/jenkins/PublishArmBasedGPUImages.groovy index 3ed8affb075f4..16245b8661b8c 100644 --- a/ci/jenkins/PublishArmBasedGPUImages.groovy +++ b/ci/jenkins/PublishArmBasedGPUImages.groovy @@ -2,7 +2,15 @@ pipeline { agent { - label 'arm' + kubernetes { + cloud '4am' + defaultContainer 'main' + yamlFile "ci/jenkins/pod/rte-arm.yaml" + customWorkspace '/home/jenkins/agent/workspace' + // We allow this pod to remain active for a while, later jobs can + // reuse cache in previous created nodes. + // idleMinutes 120 + } } options { @@ -27,10 +35,10 @@ pipeline { script { sh """ set -a # automatically export all variables from .env - . ${WORKSPACE}/.env + . .env set +a # stop automatically - docker run -v \$(pwd):/root/milvus -v \$(pwd)/.docker/.conan:/root/.conan -w /root/milvus milvusdb/milvus-env:gpu-ubuntu22.04-\${GPU_DATE_VERSION} sh -c "make clean && make gpu-install" + docker run --net=host -v \$(pwd):/root/milvus -v /root/.conan:/root/.conan -w /root/milvus milvusdb/milvus-env:gpu-ubuntu22.04-\${GPU_DATE_VERSION} sh -c "make clean && make gpu-install" """ def date = sh(returnStdout: true, script: 'date +%Y%m%d').trim() diff --git a/ci/jenkins/PublishArmBasedImages.groovy b/ci/jenkins/PublishArmBasedImages.groovy new file mode 100644 index 0000000000000..0fd77c7da7666 --- /dev/null +++ b/ci/jenkins/PublishArmBasedImages.groovy @@ -0,0 +1,93 @@ +#!/usr/bin/env groovy + +pipeline { + agent { + kubernetes { + cloud '4am' + defaultContainer 'main' + yamlFile "ci/jenkins/pod/rte-arm.yaml" + customWorkspace '/home/jenkins/agent/workspace' + // We allow this pod to remain active for a while, later jobs can + // reuse cache in previous created nodes. + // idleMinutes 120 + } + } + parameters { + string(name: 'image-tag', defaultValue: '', description: 'the image tag to be pushed to image registry') + } + + options { + timestamps() + timeout(time: 300, unit: 'MINUTES') + // parallelsAlwaysFailFast() + disableConcurrentBuilds() + } + + environment { + DOCKER_CREDENTIALS_ID = "dockerhub" + DOCKER_BUILDKIT = 1 + TARGET_REPO = "milvusdb" + CI_DOCKER_CREDENTIAL_ID = "harbor-milvus-io-registry" + HARBOR_REPO = "harbor.milvus.io" + } + + stages { + stage('Publish Milvus cpu Images'){ + + steps { + script { + sh """ + git config --global --add safe.directory /home/jenkins/agent/workspace + """ + + def tag = "" + if (params['image-tag'] == '') { + def date = sh(returnStdout: true, script: 'date +%Y%m%d').trim() + def gitShortCommit = sh(returnStdout: true, script: 'git rev-parse --short HEAD').trim() + tag = "${env.BRANCH_NAME}-${date}-${gitShortCommit}-arm" + }else{ + tag = params['image-tag'] + } + + sh """ + echo "Building image with tag: ${tag}" + + set -a # automatically export all variables from .env + . .env + set +a # stop automatically + + + docker run --net=host -v /root/.conan:/root/.conan -v \$(pwd):/root/milvus -w /root/milvus milvusdb/milvus-env:ubuntu20.04-\${DATE_VERSION} sh -c "make clean && make install" + """ + + + withCredentials([usernamePassword(credentialsId: "${env.DOCKER_CREDENTIALS_ID}", usernameVariable: 'DOCKER_USERNAME', passwordVariable: 'DOCKER_PASSWORD')]) { + sh 'docker login -u ${DOCKER_USERNAME} -p ${DOCKER_PASSWORD}' + sh """ + export MILVUS_IMAGE_REPO="${env.TARGET_REPO}/milvus" + export MILVUS_HARBOR_IMAGE_REPO="${env.HARBOR_REPO}/milvus/milvus" + export MILVUS_IMAGE_TAG="${tag}" + + docker build --build-arg TARGETARCH=arm64 -f "./build/docker/milvus/ubuntu20.04/Dockerfile" -t \${MILVUS_IMAGE_REPO}:\${MILVUS_IMAGE_TAG} . + + docker push \${MILVUS_IMAGE_REPO}:\${MILVUS_IMAGE_TAG} + docker tag \${MILVUS_IMAGE_REPO}:\${MILVUS_IMAGE_TAG} \${MILVUS_HARBOR_IMAGE_REPO}:\${MILVUS_IMAGE_TAG} + docker logout + """ + } + + withCredentials([usernamePassword(credentialsId: "${env.CI_DOCKER_CREDENTIAL_ID}", usernameVariable: 'CI_REGISTRY_USERNAME', passwordVariable: 'CI_REGISTRY_PASSWORD')]){ + sh "docker login ${env.HARBOR_REPO} -u '${CI_REGISTRY_USERNAME}' -p '${CI_REGISTRY_PASSWORD}'" + sh """ + export MILVUS_HARBOR_IMAGE_REPO="${env.HARBOR_REPO}/milvus/milvus" + export MILVUS_IMAGE_TAG="${tag}" + docker push \${MILVUS_HARBOR_IMAGE_REPO}:\${MILVUS_IMAGE_TAG} + docker logout + """ + } + } + } + } + } + +} diff --git a/ci/jenkins/pod/rte-arm.yaml b/ci/jenkins/pod/rte-arm.yaml new file mode 100644 index 0000000000000..7d10349f40275 --- /dev/null +++ b/ci/jenkins/pod/rte-arm.yaml @@ -0,0 +1,66 @@ +apiVersion: v1 +kind: Pod +metadata: + labels: + app: milvus-e2e + namespace: milvus-ci +spec: + hostNetwork: true + securityContext: # Optional: Restrict capabilities for some security hardening + privileged: true + tolerations: + - key: "node-role.kubernetes.io/arm" + operator: "Exists" + effect: "NoSchedule" + nodeSelector: + "kubernetes.io/arch": "arm64" + enableServiceLinks: false + containers: + - name: main + image: docker:latest + args: ["sleep", "36000"] + # workingDir: /home/jenkins/agent/workspace + securityContext: + privileged: true + resources: + limits: + cpu: "6" + memory: 12Gi + requests: + cpu: "0.5" + memory: 5Gi + volumeMounts: + - mountPath: /var/run + name: docker-root + - mountPath: /root/.conan + name: build-cache + # - mountPath: /ci-logs + # name: ci-logs + - name: dind + image: docker:dind + securityContext: + privileged: true + args: ["dockerd","--host=unix:///var/run/docker.sock","--registry-mirror=https://docker-nexus-ci.zilliz.cc"] + resources: + limits: + cpu: "6" + memory: 12Gi + requests: + cpu: "0.5" + memory: 5Gi + volumeMounts: + - mountPath: /var/run + name: docker-root + - mountPath: /root/.conan + name: build-cache + volumes: + - emptyDir: {} + name: docker-root + - hostPath: + path: /root/.conan + type: DirectoryOrCreate + name: build-cache + # - name: ci-logs + # nfs: + # path: /ci-logs + # server: 172.16.70.249 diff --git a/tests/scripts/values/ci/pr-arm.yaml b/tests/scripts/values/ci/pr-arm.yaml new file mode 100644 index 0000000000000..9327ae611322c --- /dev/null +++ b/tests/scripts/values/ci/pr-arm.yaml @@ -0,0 +1,202 @@ +metrics: + serviceMonitor: + enabled: true +log: + level: debug + +nodeSelector: + "kubernetes.io/arch": "arm64" +tolerations: + - key: "node-role.kubernetes.io/arm" + operator: "Exists" + effect: "NoSchedule" + +proxy: + resources: + requests: + cpu: "0.3" + memory: "256Mi" + limits: + cpu: "1" +rootCoordinator: + resources: + requests: + cpu: "0.2" + memory: "256Mi" + limits: + cpu: "1" +queryCoordinator: + resources: + requests: + cpu: "0.2" + memory: "100Mi" + limits: + cpu: "1" +queryNode: + resources: + requests: + cpu: "0.5" + memory: "500Mi" + limits: + cpu: "2" +indexCoordinator: + resources: + requests: + cpu: "0.1" + memory: "50Mi" + limits: + cpu: "1" +indexNode: + resources: + requests: + cpu: "0.5" + memory: "500Mi" + limits: + cpu: "2" +dataCoordinator: + resources: + requests: + cpu: "0.1" + memory: "50Mi" + limits: + cpu: "1" +dataNode: + resources: + requests: + cpu: "0.5" + memory: "500Mi" + limits: + cpu: "2" + +pulsar: + components: + autorecovery: false + proxy: + configData: + PULSAR_MEM: > + -Xms1024m -Xmx1024m + PULSAR_GC: > + -XX:MaxDirectMemorySize=2048m + httpNumThreads: "50" + resources: + requests: + cpu: "0.5" + memory: "1Gi" + # Resources for the websocket proxy + wsResources: + requests: + memory: "100Mi" + cpu: "0.1" + broker: + resources: + requests: + cpu: "0.5" + memory: "4Gi" + configData: + PULSAR_MEM: > + -Xms4096m + -Xmx4096m + -XX:MaxDirectMemorySize=8192m + PULSAR_GC: > + -Dio.netty.leakDetectionLevel=disabled + -Dio.netty.recycler.linkCapacity=1024 + -XX:+ParallelRefProcEnabled + -XX:+UnlockExperimentalVMOptions + -XX:+DoEscapeAnalysis + -XX:ParallelGCThreads=32 + -XX:ConcGCThreads=32 + -XX:G1NewSizePercent=50 + -XX:+DisableExplicitGC + -XX:-ResizePLAB + -XX:+ExitOnOutOfMemoryError + maxMessageSize: "104857600" + defaultRetentionTimeInMinutes: "10080" + defaultRetentionSizeInMB: "8192" + backlogQuotaDefaultLimitGB: "8" + backlogQuotaDefaultRetentionPolicy: producer_exception + + bookkeeper: + configData: + PULSAR_MEM: > + -Xms4096m + -Xmx4096m + -XX:MaxDirectMemorySize=8192m + PULSAR_GC: > + -Dio.netty.leakDetectionLevel=disabled + -Dio.netty.recycler.linkCapacity=1024 + -XX:+UseG1GC -XX:MaxGCPauseMillis=10 + -XX:+ParallelRefProcEnabled + -XX:+UnlockExperimentalVMOptions + -XX:+DoEscapeAnalysis + -XX:ParallelGCThreads=32 + -XX:ConcGCThreads=32 + -XX:G1NewSizePercent=50 + -XX:+DisableExplicitGC + -XX:-ResizePLAB + -XX:+ExitOnOutOfMemoryError + -XX:+PerfDisableSharedMem + -XX:+PrintGCDetails + nettyMaxFrameSizeBytes: "104867840" + resources: + requests: + cpu: "0.5" + memory: "4Gi" + + zookeeper: + + replicaCount: 1 + configData: + PULSAR_MEM: > + -Xms1024m + -Xmx1024m + PULSAR_GC: > + -Dcom.sun.management.jmxremote + -Djute.maxbuffer=10485760 + -XX:+ParallelRefProcEnabled + -XX:+UnlockExperimentalVMOptions + -XX:+DoEscapeAnalysis + -XX:+DisableExplicitGC + -XX:+PerfDisableSharedMem + -Dzookeeper.forceSync=no + resources: + requests: + cpu: "0.3" + memory: "512Mi" +kafka: + + resources: + requests: + cpu: "0.5" + memory: "1Gi" + zookeeper: + + replicaCount: 1 + resources: + requests: + cpu: "0.3" + memory: "512Mi" +etcd: + + + replicaCount: 1 + resources: + requests: + cpu: "0.3" + memory: "100Mi" +minio: + + resources: + requests: + cpu: "0.3" + memory: "512Mi" +standalone: + persistence: + persistentVolumeClaim: + storageClass: local-path + resources: + requests: + cpu: "1" + memory: "3.5Gi" + limits: + cpu: "4" + From 12e8c6c583bf430346fc869ac1ee535a108b743d Mon Sep 17 00:00:00 2001 From: congqixia Date: Tue, 21 May 2024 16:59:39 +0800 Subject: [PATCH 019/126] enhance: Try LatestMessageID when checkpoint unmarshal fails (#33158) See also #33122 Signed-off-by: Congqi Xia --- pkg/mq/msgstream/mq_msgstream.go | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/pkg/mq/msgstream/mq_msgstream.go b/pkg/mq/msgstream/mq_msgstream.go index 41f6b260debc6..86ad3f7dfe578 100644 --- a/pkg/mq/msgstream/mq_msgstream.go +++ b/pkg/mq/msgstream/mq_msgstream.go @@ -482,10 +482,15 @@ func (ms *mqMsgStream) Seek(ctx context.Context, msgPositions []*msgpb.MsgPositi messageID, err := ms.client.BytesToMsgID(mp.MsgID) if err != nil { if paramtable.Get().MQCfg.IgnoreBadPosition.GetAsBool() { - log.Ctx(ctx).Warn("Ignoring bad message id", zap.Error(err)) - continue + // try to use latest message ID first + messageID, err = consumer.GetLatestMsgID() + if err != nil { + log.Ctx(ctx).Warn("Ignoring bad message id", zap.Error(err)) + continue + } + } else { + return err } - return err } log.Info("MsgStream seek begin", zap.String("channel", mp.ChannelName), zap.Any("MessageID", mp.MsgID)) @@ -853,11 +858,17 @@ func (ms *MqTtMsgStream) Seek(ctx context.Context, msgPositions []*msgpb.MsgPosi seekMsgID, err := ms.client.BytesToMsgID(mp.MsgID) if err != nil { if paramtable.Get().MQCfg.IgnoreBadPosition.GetAsBool() { - log.Ctx(ctx).Warn("Ignoring bad message id", zap.Error(err)) - return false, nil + // try to use latest message ID first + seekMsgID, err = consumer.GetLatestMsgID() + if err != nil { + log.Ctx(ctx).Warn("Ignoring bad message id", zap.Error(err)) + return false, nil + } + } else { + return false, err } - return false, err } + log.Info("MsgStream begin to seek start msg: ", zap.String("channel", mp.ChannelName), zap.Any("MessageID", mp.MsgID)) err = consumer.Seek(seekMsgID, true) if err != nil { From ed39a38953b2587a2b4caa091cb30f5c4d59b690 Mon Sep 17 00:00:00 2001 From: "cai.zhang" Date: Tue, 21 May 2024 19:27:39 +0800 Subject: [PATCH 020/126] enhance: Reduce the frequency of logs describing indexing failures (#33212) issue: #33001 #33102 Signed-off-by: Cai Zhang --- internal/datacoord/index_service.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/datacoord/index_service.go b/internal/datacoord/index_service.go index d1e166d8fb291..b0c68a33e3f95 100644 --- a/internal/datacoord/index_service.go +++ b/internal/datacoord/index_service.go @@ -690,7 +690,7 @@ func (s *Server) DescribeIndex(ctx context.Context, req *indexpb.DescribeIndexRe indexes := s.meta.indexMeta.GetIndexesForCollection(req.GetCollectionID(), req.GetIndexName()) if len(indexes) == 0 { err := merr.WrapErrIndexNotFound(req.GetIndexName()) - log.Warn("DescribeIndex fail", zap.Error(err)) + log.RatedWarn(60, "DescribeIndex fail", zap.Error(err)) return &indexpb.DescribeIndexResponse{ Status: merr.Status(err), }, nil From e18d5aceb6f21d9b32065ac02edc8ef439b51c47 Mon Sep 17 00:00:00 2001 From: SimFG Date: Tue, 21 May 2024 22:39:46 +0800 Subject: [PATCH 021/126] enhance: add config to control whether to init public role permissions (#33165) issue: #33164 Signed-off-by: SimFG --- internal/rootcoord/root_coord.go | 19 +++++++++--- internal/rootcoord/root_coord_test.go | 42 ++++++++++++++++++++++++++ pkg/util/paramtable/component_param.go | 9 ++++++ 3 files changed, 66 insertions(+), 4 deletions(-) diff --git a/internal/rootcoord/root_coord.go b/internal/rootcoord/root_coord.go index cc330fa3c9a8e..4e8fc35ee1700 100644 --- a/internal/rootcoord/root_coord.go +++ b/internal/rootcoord/root_coord.go @@ -545,15 +545,29 @@ func (c *Core) initRbac() error { } } + if Params.ProxyCfg.EnablePublicPrivilege.GetAsBool() { + err = c.initPublicRolePrivilege() + if err != nil { + return err + } + } + + if Params.RoleCfg.Enabled.GetAsBool() { + return c.initBuiltinRoles() + } + return nil +} + +func (c *Core) initPublicRolePrivilege() error { // grant privileges for the public role globalPrivileges := []string{ commonpb.ObjectPrivilege_PrivilegeDescribeCollection.String(), - commonpb.ObjectPrivilege_PrivilegeShowCollections.String(), } collectionPrivileges := []string{ commonpb.ObjectPrivilege_PrivilegeIndexDetail.String(), } + var err error for _, globalPrivilege := range globalPrivileges { err = c.meta.OperatePrivilege(util.DefaultTenant, &milvuspb.GrantEntity{ Role: &milvuspb.RoleEntity{Name: util.RolePublic}, @@ -584,9 +598,6 @@ func (c *Core) initRbac() error { return errors.Wrap(err, "failed to grant collection privilege") } } - if Params.RoleCfg.Enabled.GetAsBool() { - return c.initBuiltinRoles() - } return nil } diff --git a/internal/rootcoord/root_coord_test.go b/internal/rootcoord/root_coord_test.go index 832526d61bef9..bbdb896b52d17 100644 --- a/internal/rootcoord/root_coord_test.go +++ b/internal/rootcoord/root_coord_test.go @@ -1807,6 +1807,48 @@ func TestCore_Stop(t *testing.T) { }) } +func TestCore_InitRBAC(t *testing.T) { + paramtable.Init() + t.Run("init default role and public role privilege", func(t *testing.T) { + meta := mockrootcoord.NewIMetaTable(t) + c := newTestCore(withHealthyCode(), withMeta(meta)) + meta.EXPECT().CreateRole(mock.Anything, mock.Anything).Return(nil).Twice() + meta.EXPECT().OperatePrivilege(mock.Anything, mock.Anything, mock.Anything).Return(nil).Twice() + + Params.Save(Params.RoleCfg.Enabled.Key, "false") + Params.Save(Params.ProxyCfg.EnablePublicPrivilege.Key, "true") + + defer func() { + Params.Reset(Params.RoleCfg.Enabled.Key) + Params.Reset(Params.ProxyCfg.EnablePublicPrivilege.Key) + }() + + err := c.initRbac() + assert.NoError(t, err) + }) + + t.Run("not init public role privilege and init default privilege", func(t *testing.T) { + builtinRoles := `{"db_admin": {"privileges": [{"object_type": "Global", "object_name": "*", "privilege": "CreateCollection", "db_name": "*"}]}}` + meta := mockrootcoord.NewIMetaTable(t) + c := newTestCore(withHealthyCode(), withMeta(meta)) + meta.EXPECT().CreateRole(mock.Anything, mock.Anything).Return(nil).Times(3) + meta.EXPECT().OperatePrivilege(mock.Anything, mock.Anything, mock.Anything).Return(nil).Once() + + Params.Save(Params.RoleCfg.Enabled.Key, "true") + Params.Save(Params.RoleCfg.Roles.Key, builtinRoles) + Params.Save(Params.ProxyCfg.EnablePublicPrivilege.Key, "false") + + defer func() { + Params.Reset(Params.RoleCfg.Enabled.Key) + Params.Reset(Params.RoleCfg.Roles.Key) + Params.Reset(Params.ProxyCfg.EnablePublicPrivilege.Key) + }() + + err := c.initRbac() + assert.NoError(t, err) + }) +} + type RootCoordSuite struct { suite.Suite } diff --git a/pkg/util/paramtable/component_param.go b/pkg/util/paramtable/component_param.go index 117757815a39b..8804240da4ef9 100644 --- a/pkg/util/paramtable/component_param.go +++ b/pkg/util/paramtable/component_param.go @@ -1034,6 +1034,7 @@ type proxyConfig struct { MustUsePartitionKey ParamItem `refreshable:"true"` SkipAutoIDCheck ParamItem `refreshable:"true"` SkipPartitionKeyCheck ParamItem `refreshable:"true"` + EnablePublicPrivilege ParamItem `refreshable:"false"` AccessLog AccessLogConfig @@ -1394,6 +1395,14 @@ please adjust in embedded Milvus: false`, } p.SkipPartitionKeyCheck.Init(base.mgr) + p.EnablePublicPrivilege = ParamItem{ + Key: "proxy.enablePublicPrivilege", + Version: "2.4.1", + DefaultValue: "true", + Doc: "switch for whether proxy shall enable public privilege", + } + p.EnablePublicPrivilege.Init(base.mgr) + p.GracefulStopTimeout = ParamItem{ Key: "proxy.gracefulStopTimeout", Version: "2.3.7", From 3d105fcb4d6727ea9618c6b8b06f15a0e4a75ad0 Mon Sep 17 00:00:00 2001 From: Xiaofan <83447078+xiaofan-luan@users.noreply.github.com> Date: Tue, 21 May 2024 22:53:40 +0800 Subject: [PATCH 022/126] enhance: Remove l0 delete cache (#32990) fix #32979 remove l0 cache and build delete pk and ts everytime. this reduce the memory and also increase the code readability Signed-off-by: xiaofanluan --- internal/querynodev2/delegator/delegator.go | 12 +- .../querynodev2/delegator/delegator_data.go | 114 +++++------------- .../delegator/delegator_data_test.go | 68 +++++++---- 3 files changed, 81 insertions(+), 113 deletions(-) diff --git a/internal/querynodev2/delegator/delegator.go b/internal/querynodev2/delegator/delegator.go index 7fca3c6acdb63..7f3cc38d38f6b 100644 --- a/internal/querynodev2/delegator/delegator.go +++ b/internal/querynodev2/delegator/delegator.go @@ -106,12 +106,11 @@ type shardDelegator struct { lifetime lifetime.Lifetime[lifetime.State] - distribution *distribution - segmentManager segments.SegmentManager - tsafeManager tsafe.Manager - pkOracle pkoracle.PkOracle - level0Mut sync.RWMutex - level0Deletions map[int64]*storage.DeleteData // partitionID -> deletions + distribution *distribution + segmentManager segments.SegmentManager + tsafeManager tsafe.Manager + pkOracle pkoracle.PkOracle + level0Mut sync.RWMutex // stream delete buffer deleteMut sync.RWMutex deleteBuffer deletebuffer.DeleteBuffer[*deletebuffer.Item] @@ -876,7 +875,6 @@ func NewShardDelegator(ctx context.Context, collectionID UniqueID, replicaID Uni workerManager: workerManager, lifetime: lifetime.NewLifetime(lifetime.Initializing), distribution: NewDistribution(), - level0Deletions: make(map[int64]*storage.DeleteData), deleteBuffer: deletebuffer.NewListDeleteBuffer[*deletebuffer.Item](startTs, sizePerBlock), pkOracle: pkoracle.NewPkOracle(), tsafeManager: tsafeManager, diff --git a/internal/querynodev2/delegator/delegator_data.go b/internal/querynodev2/delegator/delegator_data.go index be4870a34c571..6055d8ec731ea 100644 --- a/internal/querynodev2/delegator/delegator_data.go +++ b/internal/querynodev2/delegator/delegator_data.go @@ -365,7 +365,7 @@ func (sd *shardDelegator) LoadGrowing(ctx context.Context, infos []*querypb.Segm log := log.With( zap.Int64("segmentID", segment.ID()), ) - deletedPks, deletedTss := sd.GetLevel0Deletions(segment.Partition()) + deletedPks, deletedTss := sd.GetLevel0Deletions(segment.Partition(), pkoracle.NewCandidateKey(segment.ID(), segment.Partition(), segments.SegmentTypeGrowing)) if len(deletedPks) == 0 { continue } @@ -488,7 +488,7 @@ func (sd *shardDelegator) LoadSegments(ctx context.Context, req *querypb.LoadSeg } }) if req.GetInfos()[0].GetLevel() == datapb.SegmentLevel_L0 { - sd.GenerateLevel0DeletionCache() + sd.RefreshLevel0DeletionStats() } else { log.Debug("load delete...") err = sd.loadStreamDelete(ctx, candidates, infos, req.GetDeltaPositions(), targetNodeID, worker, entries) @@ -512,94 +512,51 @@ func (sd *shardDelegator) LoadSegments(ctx context.Context, req *querypb.LoadSeg return nil } -func (sd *shardDelegator) GetLevel0Deletions(partitionID int64) ([]storage.PrimaryKey, []storage.Timestamp) { - sd.level0Mut.RLock() - deleteData, ok1 := sd.level0Deletions[partitionID] - allPartitionsDeleteData, ok2 := sd.level0Deletions[common.AllPartitionsID] - sd.level0Mut.RUnlock() - // we may need to merge the specified partition deletions and the all partitions deletions, - // so release the mutex as early as possible. - - if ok1 && ok2 { - pks := make([]storage.PrimaryKey, 0, deleteData.RowCount+allPartitionsDeleteData.RowCount) - tss := make([]storage.Timestamp, 0, deleteData.RowCount+allPartitionsDeleteData.RowCount) - - i := 0 - j := 0 - for i < int(deleteData.RowCount) || j < int(allPartitionsDeleteData.RowCount) { - if i == int(deleteData.RowCount) { - pks = append(pks, allPartitionsDeleteData.Pks[j]) - tss = append(tss, allPartitionsDeleteData.Tss[j]) - j++ - } else if j == int(allPartitionsDeleteData.RowCount) { - pks = append(pks, deleteData.Pks[i]) - tss = append(tss, deleteData.Tss[i]) - i++ - } else if deleteData.Tss[i] < allPartitionsDeleteData.Tss[j] { - pks = append(pks, deleteData.Pks[i]) - tss = append(tss, deleteData.Tss[i]) - i++ - } else { - pks = append(pks, allPartitionsDeleteData.Pks[j]) - tss = append(tss, allPartitionsDeleteData.Tss[j]) - j++ - } - } - - return pks, tss - } else if ok1 { - return deleteData.Pks, deleteData.Tss - } else if ok2 { - return allPartitionsDeleteData.Pks, allPartitionsDeleteData.Tss - } - - return nil, nil -} +func (sd *shardDelegator) GetLevel0Deletions(partitionID int64, candidate pkoracle.Candidate) ([]storage.PrimaryKey, []storage.Timestamp) { + sd.level0Mut.Lock() + defer sd.level0Mut.Unlock() -func (sd *shardDelegator) GenerateLevel0DeletionCache() { + // TODO: this could be large, host all L0 delete on delegator might be a dangerous, consider mmap it on local segment and stream processing it level0Segments := sd.segmentManager.GetBy(segments.WithLevel(datapb.SegmentLevel_L0), segments.WithChannel(sd.vchannelName)) - deletions := make(map[int64]*storage.DeleteData) + pks := make([]storage.PrimaryKey, 0) + tss := make([]storage.Timestamp, 0) + for _, segment := range level0Segments { segment := segment.(*segments.L0Segment) - pks, tss := segment.DeleteRecords() - deleteData, ok := deletions[segment.Partition()] - if !ok { - deleteData = storage.NewDeleteData(pks, tss) - } else { - deleteData.AppendBatch(pks, tss) + if segment.Partition() == partitionID || segment.Partition() == common.AllPartitionsID { + segmentPks, segmentTss := segment.DeleteRecords() + for i, pk := range segmentPks { + if candidate.MayPkExist(pk) { + pks = append(pks, pk) + tss = append(tss, segmentTss[i]) + } + } } - deletions[segment.Partition()] = deleteData } - type DeletePair struct { - Pk storage.PrimaryKey - Ts storage.Timestamp - } - for _, deleteData := range deletions { - pairs := make([]DeletePair, deleteData.RowCount) - for i := range deleteData.Pks { - pairs[i] = DeletePair{deleteData.Pks[i], deleteData.Tss[i]} - } - sort.Slice(pairs, func(i, j int) bool { - return pairs[i].Ts < pairs[j].Ts - }) - for i := range pairs { - deleteData.Pks[i], deleteData.Tss[i] = pairs[i].Pk, pairs[i].Ts - } - } + sort.Slice(pks, func(i, j int) bool { + return tss[i] < tss[j] + }) + return pks, tss +} + +func (sd *shardDelegator) RefreshLevel0DeletionStats() { sd.level0Mut.Lock() defer sd.level0Mut.Unlock() + level0Segments := sd.segmentManager.GetBy(segments.WithLevel(datapb.SegmentLevel_L0), segments.WithChannel(sd.vchannelName)) totalSize := int64(0) - for _, delete := range deletions { - totalSize += delete.Size() + for _, segment := range level0Segments { + segment := segment.(*segments.L0Segment) + pks, tss := segment.DeleteRecords() + totalSize += lo.SumBy(pks, func(pk storage.PrimaryKey) int64 { return pk.Size() }) + int64(len(tss)*8) } + metrics.QueryNodeLevelZeroSize.WithLabelValues( fmt.Sprint(paramtable.GetNodeID()), fmt.Sprint(sd.collectionID), sd.vchannelName, ).Set(float64(totalSize)) - sd.level0Deletions = deletions } func (sd *shardDelegator) loadStreamDelete(ctx context.Context, @@ -635,14 +592,9 @@ func (sd *shardDelegator) loadStreamDelete(ctx context.Context, position = deltaPositions[0] } - deletedPks, deletedTss := sd.GetLevel0Deletions(candidate.Partition()) + deletedPks, deletedTss := sd.GetLevel0Deletions(candidate.Partition(), candidate) deleteData := &storage.DeleteData{} - for i, pk := range deletedPks { - if candidate.MayPkExist(pk) { - deleteData.Append(pk, deletedTss[i]) - } - } - + deleteData.AppendBatch(deletedPks, deletedTss) if deleteData.RowCount > 0 { log.Info("forward L0 delete to worker...", zap.Int64("deleteRowNum", deleteData.RowCount), @@ -900,7 +852,7 @@ func (sd *shardDelegator) ReleaseSegments(ctx context.Context, req *querypb.Rele } if hasLevel0 { - sd.GenerateLevel0DeletionCache() + sd.RefreshLevel0DeletionStats() } partitionsToReload := make([]UniqueID, 0) lo.ForEach(req.GetSegmentIDs(), func(segmentID int64, _ int) { diff --git a/internal/querynodev2/delegator/delegator_data_test.go b/internal/querynodev2/delegator/delegator_data_test.go index 6d2ae22411b66..47a284afd4c89 100644 --- a/internal/querynodev2/delegator/delegator_data_test.go +++ b/internal/querynodev2/delegator/delegator_data_test.go @@ -1110,44 +1110,62 @@ func (s *DelegatorDataSuite) TestLevel0Deletions() { partitionID := int64(10) partitionDeleteData := storage.NewDeleteData([]storage.PrimaryKey{storage.NewInt64PrimaryKey(1)}, []storage.Timestamp{100}) allPartitionDeleteData := storage.NewDeleteData([]storage.PrimaryKey{storage.NewInt64PrimaryKey(2)}, []storage.Timestamp{101}) - delegator.level0Deletions[partitionID] = partitionDeleteData - pks, _ := delegator.GetLevel0Deletions(partitionID) + schema := segments.GenTestCollectionSchema("test_stop", schemapb.DataType_Int64, true) + collection := segments.NewCollection(1, schema, nil, &querypb.LoadMetaInfo{ + LoadType: querypb.LoadType_LoadCollection, + }) + + l0, _ := segments.NewL0Segment(collection, segments.SegmentTypeSealed, 1, &querypb.SegmentLoadInfo{ + CollectionID: 1, + SegmentID: 2, + PartitionID: partitionID, + InsertChannel: delegator.vchannelName, + Level: datapb.SegmentLevel_L0, + NumOfRows: 1, + }) + l0.LoadDeltaData(context.TODO(), partitionDeleteData) + delegator.segmentManager.Put(context.TODO(), segments.SegmentTypeSealed, l0) + + l0Global, _ := segments.NewL0Segment(collection, segments.SegmentTypeSealed, 2, &querypb.SegmentLoadInfo{ + CollectionID: 1, + SegmentID: 3, + PartitionID: common.AllPartitionsID, + InsertChannel: delegator.vchannelName, + Level: datapb.SegmentLevel_L0, + NumOfRows: int64(1), + }) + l0Global.LoadDeltaData(context.TODO(), allPartitionDeleteData) + + pks, _ := delegator.GetLevel0Deletions(partitionID, pkoracle.NewCandidateKey(l0.ID(), l0.Partition(), segments.SegmentTypeGrowing)) s.True(pks[0].EQ(partitionDeleteData.Pks[0])) - pks, _ = delegator.GetLevel0Deletions(partitionID + 1) + pks, _ = delegator.GetLevel0Deletions(partitionID+1, pkoracle.NewCandidateKey(l0.ID(), l0.Partition(), segments.SegmentTypeGrowing)) s.Empty(pks) - delegator.level0Deletions[common.AllPartitionsID] = allPartitionDeleteData - pks, _ = delegator.GetLevel0Deletions(partitionID) - s.Len(pks, 2) + delegator.segmentManager.Put(context.TODO(), segments.SegmentTypeSealed, l0Global) + pks, _ = delegator.GetLevel0Deletions(partitionID, pkoracle.NewCandidateKey(l0.ID(), l0.Partition(), segments.SegmentTypeGrowing)) s.True(pks[0].EQ(partitionDeleteData.Pks[0])) s.True(pks[1].EQ(allPartitionDeleteData.Pks[0])) - delete(delegator.level0Deletions, partitionID) - pks, _ = delegator.GetLevel0Deletions(partitionID) - s.True(pks[0].EQ(allPartitionDeleteData.Pks[0])) - - // exchange the order - delegator.level0Deletions = make(map[int64]*storage.DeleteData) - partitionDeleteData, allPartitionDeleteData = allPartitionDeleteData, partitionDeleteData - delegator.level0Deletions[partitionID] = partitionDeleteData + bfs := pkoracle.NewBloomFilterSet(3, l0.Partition(), commonpb.SegmentState_Sealed) + bfs.UpdateBloomFilter(allPartitionDeleteData.Pks) - pks, _ = delegator.GetLevel0Deletions(partitionID) - s.True(pks[0].EQ(partitionDeleteData.Pks[0])) - - pks, _ = delegator.GetLevel0Deletions(partitionID + 1) - s.Empty(pks) + pks, _ = delegator.GetLevel0Deletions(partitionID, bfs) + // bf filtered segment + s.Equal(len(pks), 1) + s.True(pks[0].EQ(allPartitionDeleteData.Pks[0])) - delegator.level0Deletions[common.AllPartitionsID] = allPartitionDeleteData - pks, _ = delegator.GetLevel0Deletions(partitionID) - s.Len(pks, 2) + delegator.segmentManager.Remove(context.TODO(), l0.ID(), querypb.DataScope_All) + pks, _ = delegator.GetLevel0Deletions(partitionID, pkoracle.NewCandidateKey(l0.ID(), l0.Partition(), segments.SegmentTypeGrowing)) s.True(pks[0].EQ(allPartitionDeleteData.Pks[0])) - s.True(pks[1].EQ(partitionDeleteData.Pks[0])) - delete(delegator.level0Deletions, partitionID) - pks, _ = delegator.GetLevel0Deletions(partitionID) + pks, _ = delegator.GetLevel0Deletions(partitionID+1, pkoracle.NewCandidateKey(l0.ID(), l0.Partition(), segments.SegmentTypeGrowing)) s.True(pks[0].EQ(allPartitionDeleteData.Pks[0])) + + delegator.segmentManager.Remove(context.TODO(), l0Global.ID(), querypb.DataScope_All) + pks, _ = delegator.GetLevel0Deletions(partitionID+1, pkoracle.NewCandidateKey(l0.ID(), l0.Partition(), segments.SegmentTypeGrowing)) + s.Empty(pks) } func (s *DelegatorDataSuite) TestReadDeleteFromMsgstream() { From 303470fc3516f08890e7623f8720182867c593b9 Mon Sep 17 00:00:00 2001 From: wei liu Date: Wed, 22 May 2024 10:03:40 +0800 Subject: [PATCH 023/126] fix: Clean offline node from resource group after qc restart (#33232) issue: #33200 #33207 pr#33104 causes the offline node will be kept in resource group after qc recover, and offline node will be assign to new replica as rwNode, then request send to those node will fail by NodeNotFound. Signed-off-by: Wei Liu --- internal/querycoordv2/server.go | 33 ++++++++++----------------------- 1 file changed, 10 insertions(+), 23 deletions(-) diff --git a/internal/querycoordv2/server.go b/internal/querycoordv2/server.go index 0f54d5e2f8da2..da16b65fb4406 100644 --- a/internal/querycoordv2/server.go +++ b/internal/querycoordv2/server.go @@ -456,7 +456,7 @@ func (s *Server) startQueryCoord() error { s.nodeMgr.Stopping(node.ServerID) } } - s.checkReplicas() + s.checkNodeStateInRG() for _, node := range sessions { s.handleNodeUp(node.ServerID) } @@ -778,28 +778,15 @@ func (s *Server) handleNodeDown(node int64) { s.meta.ResourceManager.HandleNodeDown(node) } -// checkReplicas checks whether replica contains offline node, and remove those nodes -func (s *Server) checkReplicas() { - for _, collection := range s.meta.CollectionManager.GetAll() { - log := log.With(zap.Int64("collectionID", collection)) - replicas := s.meta.ReplicaManager.GetByCollection(collection) - for _, replica := range replicas { - toRemove := make([]int64, 0) - for _, node := range replica.GetNodes() { - if s.nodeMgr.Get(node) == nil { - toRemove = append(toRemove, node) - } - } - - if len(toRemove) > 0 { - log := log.With( - zap.Int64("replicaID", replica.GetID()), - zap.Int64s("offlineNodes", toRemove), - ) - log.Info("some nodes are offline, remove them from replica", zap.Any("toRemove", toRemove)) - if err := s.meta.ReplicaManager.RemoveNode(replica.GetID(), toRemove...); err != nil { - log.Warn("failed to remove offline nodes from replica") - } +func (s *Server) checkNodeStateInRG() { + for _, rgName := range s.meta.ListResourceGroups() { + rg := s.meta.ResourceManager.GetResourceGroup(rgName) + for _, node := range rg.GetNodes() { + info := s.nodeMgr.Get(node) + if info == nil { + s.meta.ResourceManager.HandleNodeDown(node) + } else if info.IsStoppingState() { + s.meta.ResourceManager.HandleNodeStopping(node) } } } From f20becb7255494e74f0f735cdd2912a3e5ba51b7 Mon Sep 17 00:00:00 2001 From: Alexander Guzhva Date: Tue, 21 May 2024 23:15:39 -0400 Subject: [PATCH 024/126] fix: Download and install cmake for the current platform, not x86_64 only (#32548) issue #32476 tested on x86_64 and aarch64. I'm not sure what needs to be done on some exotic architectures. Signed-off-by: Alexandr Guzhva --- scripts/install_deps.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/install_deps.sh b/scripts/install_deps.sh index 8ae371aa2a075..38f1e1f294be6 100755 --- a/scripts/install_deps.sh +++ b/scripts/install_deps.sh @@ -48,7 +48,7 @@ function install_linux_deps() { cmake_version=$(echo "$(cmake --version | head -1)" | grep -o '[0-9][\.][0-9]*') if [ ! $cmake_version ] || [ `expr $cmake_version \>= 3.26` -eq 0 ]; then echo "cmake version $cmake_version is less than 3.26, wait to installing ..." - wget -qO- "https://cmake.org/files/v3.26/cmake-3.26.5-linux-x86_64.tar.gz" | sudo tar --strip-components=1 -xz -C /usr/local + wget -qO- "https://cmake.org/files/v3.26/cmake-3.26.5-linux-$(uname -m).tar.gz" | sudo tar --strip-components=1 -xz -C /usr/local else echo "cmake version is $cmake_version" fi From 819a624753b0026445f0d03373b68f30ee6ec577 Mon Sep 17 00:00:00 2001 From: XuanYang-cn Date: Wed, 22 May 2024 11:17:38 +0800 Subject: [PATCH 025/126] fix: Return error when startup Delete/AddNode fail (#33193) See also: #33151, #33149 --------- Signed-off-by: yangxuan --- internal/datacoord/channel_manager_v2.go | 25 ++++--- internal/datacoord/channel_manager_v2_test.go | 68 +++++++++++++++++++ 2 files changed, 84 insertions(+), 9 deletions(-) diff --git a/internal/datacoord/channel_manager_v2.go b/internal/datacoord/channel_manager_v2.go index 6243761ce70cd..ccaed65ea9ce7 100644 --- a/internal/datacoord/channel_manager_v2.go +++ b/internal/datacoord/channel_manager_v2.go @@ -31,6 +31,7 @@ import ( "github.com/milvus-io/milvus/pkg/log" "github.com/milvus-io/milvus/pkg/util/conc" "github.com/milvus-io/milvus/pkg/util/lock" + "github.com/milvus-io/milvus/pkg/util/merr" "github.com/milvus-io/milvus/pkg/util/typeutil" ) @@ -131,16 +132,19 @@ func (m *ChannelManagerImplV2) Startup(ctx context.Context, legacyNodes, allNode oNodes := m.store.GetNodes() m.mu.Unlock() - // Add new online nodes to the cluster. offLines, newOnLines := lo.Difference(oNodes, allNodes) - lo.ForEach(newOnLines, func(nodeID int64, _ int) { - m.AddNode(nodeID) - }) - // Delete offlines from the cluster - lo.ForEach(offLines, func(nodeID int64, _ int) { - m.DeleteNode(nodeID) - }) + for _, nodeID := range offLines { + if err := m.DeleteNode(nodeID); err != nil { + return err + } + } + // Add new online nodes to the cluster. + for _, nodeID := range newOnLines { + if err := m.AddNode(nodeID); err != nil { + return err + } + } m.mu.Lock() nodeChannels := m.store.GetNodeChannelsBy( @@ -654,7 +658,10 @@ func (m *ChannelManagerImplV2) Check(ctx context.Context, nodeID int64, info *da ) resp, err := m.subCluster.CheckChannelOperationProgress(ctx, nodeID, info) if err != nil { - log.Warn("Fail to check channel operation progress") + log.Warn("Fail to check channel operation progress", zap.Error(err)) + if errors.Is(err, merr.ErrNodeNotFound) { + return false, true + } return false, false } log.Info("Got channel operation progress", diff --git a/internal/datacoord/channel_manager_v2_test.go b/internal/datacoord/channel_manager_v2_test.go index 4bacd11399b0d..b2093b9881b0f 100644 --- a/internal/datacoord/channel_manager_v2_test.go +++ b/internal/datacoord/channel_manager_v2_test.go @@ -21,6 +21,7 @@ import ( "fmt" "testing" + "github.com/cockroachdb/errors" "github.com/golang/protobuf/proto" "github.com/samber/lo" "github.com/stretchr/testify/mock" @@ -31,6 +32,7 @@ import ( "github.com/milvus-io/milvus/internal/kv/predicates" "github.com/milvus-io/milvus/internal/proto/datapb" "github.com/milvus-io/milvus/pkg/log" + "github.com/milvus-io/milvus/pkg/util/merr" "github.com/milvus-io/milvus/pkg/util/paramtable" ) @@ -446,6 +448,29 @@ func (s *ChannelManagerSuite) TestAdvanceChannelState() { s.checkAssignment(m, 1, "ch1", Watching) s.checkAssignment(m, 1, "ch2", Watching) }) + s.Run("advance watching channels check ErrNodeNotFound", func() { + chNodes := map[string]int64{ + "ch1": 1, + "ch2": 1, + } + s.prepareMeta(chNodes, datapb.ChannelWatchState_ToWatch) + s.mockCluster.EXPECT().NotifyChannelOperation(mock.Anything, mock.Anything, mock.Anything).Return(nil).Twice() + m, err := NewChannelManagerV2(s.mockKv, s.mockHandler, s.mockCluster, s.mockAlloc) + s.Require().NoError(err) + s.checkAssignment(m, 1, "ch1", ToWatch) + s.checkAssignment(m, 1, "ch2", ToWatch) + + m.AdvanceChannelState(ctx) + s.checkAssignment(m, 1, "ch1", Watching) + s.checkAssignment(m, 1, "ch2", Watching) + + s.mockCluster.EXPECT().CheckChannelOperationProgress(mock.Anything, mock.Anything, mock.Anything). + Return(nil, merr.WrapErrNodeNotFound(1)).Twice() + m.AdvanceChannelState(ctx) + s.checkAssignment(m, 1, "ch1", Standby) + s.checkAssignment(m, 1, "ch2", Standby) + }) + s.Run("advance watching channels check watch success", func() { chNodes := map[string]int64{ "ch1": 1, @@ -517,6 +542,28 @@ func (s *ChannelManagerSuite) TestAdvanceChannelState() { s.checkAssignment(m, 1, "ch1", Releasing) s.checkAssignment(m, 1, "ch2", Releasing) }) + s.Run("advance releasing channels check ErrNodeNotFound", func() { + chNodes := map[string]int64{ + "ch1": 1, + "ch2": 1, + } + s.prepareMeta(chNodes, datapb.ChannelWatchState_ToRelease) + s.mockCluster.EXPECT().NotifyChannelOperation(mock.Anything, mock.Anything, mock.Anything).Return(nil).Twice() + m, err := NewChannelManagerV2(s.mockKv, s.mockHandler, s.mockCluster, s.mockAlloc) + s.Require().NoError(err) + s.checkAssignment(m, 1, "ch1", ToRelease) + s.checkAssignment(m, 1, "ch2", ToRelease) + + m.AdvanceChannelState(ctx) + s.checkAssignment(m, 1, "ch1", Releasing) + s.checkAssignment(m, 1, "ch2", Releasing) + + s.mockCluster.EXPECT().CheckChannelOperationProgress(mock.Anything, mock.Anything, mock.Anything). + Return(nil, merr.WrapErrNodeNotFound(1)).Twice() + m.AdvanceChannelState(ctx) + s.checkAssignment(m, 1, "ch1", Standby) + s.checkAssignment(m, 1, "ch2", Standby) + }) s.Run("advance releasing channels check release success", func() { chNodes := map[string]int64{ "ch1": 1, @@ -659,5 +706,26 @@ func (s *ChannelManagerSuite) TestStartup() { s.checkAssignment(m, 2, "ch3", ToWatch) } +func (s *ChannelManagerSuite) TestStartupRootCoordFailed() { + chNodes := map[string]int64{ + "ch1": 1, + "ch2": 1, + "ch3": 1, + "ch4": bufferID, + } + s.prepareMeta(chNodes, datapb.ChannelWatchState_ToWatch) + + s.mockAlloc = NewNMockAllocator(s.T()) + s.mockAlloc.EXPECT().allocID(mock.Anything).Return(0, errors.New("mock rootcoord failure")) + m, err := NewChannelManagerV2(s.mockKv, s.mockHandler, s.mockCluster, s.mockAlloc) + s.Require().NoError(err) + + err = m.Startup(context.TODO(), nil, []int64{2}) + s.Error(err) + + err = m.Startup(context.TODO(), nil, []int64{1, 2}) + s.Error(err) +} + func (s *ChannelManagerSuite) TestCheckLoop() {} func (s *ChannelManagerSuite) TestGet() {} From 648d5661ca8771fadc664f427ac330b083b8734e Mon Sep 17 00:00:00 2001 From: Alexander Guzhva Date: Tue, 21 May 2024 23:37:40 -0400 Subject: [PATCH 026/126] enhance: Upgrade bitset for ARM SVE (#32718) issue: #32826 improve ARM SVE performance for `internal/core/src/bitset` Baseline timings for gcc 11.4 + Graviton 3 + manually enabled SVE: https://gist.github.com/alexanderguzhva/a974b50134c8bb9255fb15f144e5ac83 Candidate timings for gcc 11.4 + Graviton 3 + manually enabled SVE: https://gist.github.com/alexanderguzhva/19fc88f4ad3757e05e0f7feaf563b3d3 Signed-off-by: Alexandr Guzhva --- .../src/bitset/detail/platform/arm/sve-impl.h | 621 +++++++----------- 1 file changed, 254 insertions(+), 367 deletions(-) diff --git a/internal/core/src/bitset/detail/platform/arm/sve-impl.h b/internal/core/src/bitset/detail/platform/arm/sve-impl.h index 18433402d04d9..dfc84f2824d8a 100644 --- a/internal/core/src/bitset/detail/platform/arm/sve-impl.h +++ b/internal/core/src/bitset/detail/platform/arm/sve-impl.h @@ -42,63 +42,6 @@ namespace { // constexpr size_t MAX_SVE_WIDTH = 2048; -constexpr uint8_t SVE_LANES_8[MAX_SVE_WIDTH / 8] = { - 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, - 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, - 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, 0x20, - 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, - 0x2C, 0x2D, 0x2E, 0x2F, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, - 0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F, - - 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4A, - 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, - 0x56, 0x57, 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F, 0x60, - 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B, - 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, - 0x77, 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F, - - 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8A, - 0x8B, 0x8C, 0x8D, 0x8E, 0x8F, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, - 0x96, 0x97, 0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9E, 0x9F, 0xA0, - 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xAA, 0xAB, - 0xAC, 0xAD, 0xAE, 0xAF, 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, - 0xB7, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF, - - 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, - 0xCB, 0xCC, 0xCD, 0xCE, 0xCF, 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, - 0xD6, 0xD7, 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, 0xE0, - 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xEB, - 0xEC, 0xED, 0xEE, 0xEF, 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, - 0xF7, 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF}; - -constexpr uint16_t SVE_LANES_16[MAX_SVE_WIDTH / 16] = { - 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, - 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, - 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, 0x20, - 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, - 0x2C, 0x2D, 0x2E, 0x2F, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, - 0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F, - - 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4A, - 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, - 0x56, 0x57, 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F, 0x60, - 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B, - 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, - 0x77, 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F}; - -constexpr uint32_t SVE_LANES_32[MAX_SVE_WIDTH / 32] = { - 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, - 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, - 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, 0x20, - 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, - 0x2C, 0x2D, 0x2E, 0x2F, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, - 0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F}; - -constexpr uint64_t SVE_LANES_64[MAX_SVE_WIDTH / 64] = { - 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, - 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, - 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F}; - /* // debugging facilities @@ -131,179 +74,28 @@ void print_svuint8_t(const svuint8_t value) { /////////////////////////////////////////////////////////////////////////// -// todo: replace with pext whenever available - -// generate 16-bit bitmask from 8 serialized 16-bit svbool_t values -void -write_bitmask_16_8x(uint8_t* const __restrict res_u8, - const svbool_t pred_op, - const svbool_t pred_write, - const uint8_t* const __restrict pred_buf) { - // perform parallel pext - // 2048b -> 32 bytes mask -> 256 bytes total, 128 uint16_t values - // 512b -> 8 bytes mask -> 64 bytes total, 32 uint16_t values - // 256b -> 4 bytes mask -> 32 bytes total, 16 uint16_t values - // 128b -> 2 bytes mask -> 16 bytes total, 8 uint16_t values - - // this code does reduction of 16-bit 0b0A0B0C0D0E0F0G0H words into - // uint8_t values 0bABCDEFGH, then writes ones to the memory - - // we need to operate in uint8_t - const svuint8_t mask_8b = svld1_u8(pred_op, pred_buf); - - const svuint8_t mask_04_8b = svand_n_u8_z(pred_op, mask_8b, 0x01); - const svuint8_t mask_15_8b = svand_n_u8_z(pred_op, mask_8b, 0x04); - const svuint8_t mask_15s_8b = svlsr_n_u8_z(pred_op, mask_15_8b, 1); - const svuint8_t mask_26_8b = svand_n_u8_z(pred_op, mask_8b, 0x10); - const svuint8_t mask_26s_8b = svlsr_n_u8_z(pred_op, mask_26_8b, 2); - const svuint8_t mask_37_8b = svand_n_u8_z(pred_op, mask_8b, 0x40); - const svuint8_t mask_37s_8b = svlsr_n_u8_z(pred_op, mask_37_8b, 3); - - const svuint8_t mask_0347_8b = svorr_u8_z(pred_op, mask_04_8b, mask_37s_8b); - const svuint8_t mask_1256_8b = - svorr_u8_z(pred_op, mask_15s_8b, mask_26s_8b); - const svuint8_t mask_cmb_8b = - svorr_u8_z(pred_op, mask_0347_8b, mask_1256_8b); - - // - const svuint16_t shifts_16b = svdup_u16(0x0400UL); - const svuint8_t shifts_8b = svreinterpret_u8_u16(shifts_16b); - const svuint8_t shifted_8b_m0 = svlsl_u8_z(pred_op, mask_cmb_8b, shifts_8b); - - const svuint8_t zero_8b = svdup_n_u8(0); - - const svuint8_t shifted_8b_m3 = - svorr_u8_z(pred_op, - svuzp1_u8(shifted_8b_m0, zero_8b), - svuzp2_u8(shifted_8b_m0, zero_8b)); - - // write a finished bitmask - svst1_u8(pred_write, res_u8, shifted_8b_m3); -} - -// generate 32-bit bitmask from 8 serialized 32-bit svbool_t values -void -write_bitmask_32_8x(uint8_t* const __restrict res_u8, - const svbool_t pred_op, - const svbool_t pred_write, - const uint8_t* const __restrict pred_buf) { - // perform parallel pext - // 2048b -> 32 bytes mask -> 256 bytes total, 64 uint32_t values - // 512b -> 8 bytes mask -> 64 bytes total, 16 uint32_t values - // 256b -> 4 bytes mask -> 32 bytes total, 8 uint32_t values - // 128b -> 2 bytes mask -> 16 bytes total, 4 uint32_t values - - // this code does reduction of 32-bit 0b000A000B000C000D... dwords into - // uint8_t values 0bABCDEFGH, then writes ones to the memory - - // we need to operate in uint8_t - const svuint8_t mask_8b = svld1_u8(pred_op, pred_buf); - - const svuint8_t mask_024_8b = svand_n_u8_z(pred_op, mask_8b, 0x01); - const svuint8_t mask_135s_8b = svlsr_n_u8_z(pred_op, mask_8b, 3); - const svuint8_t mask_cmb_8b = - svorr_u8_z(pred_op, mask_024_8b, mask_135s_8b); - - // - const svuint32_t shifts_32b = svdup_u32(0x06040200UL); - const svuint8_t shifts_8b = svreinterpret_u8_u32(shifts_32b); - const svuint8_t shifted_8b_m0 = svlsl_u8_z(pred_op, mask_cmb_8b, shifts_8b); - - const svuint8_t zero_8b = svdup_n_u8(0); - - const svuint8_t shifted_8b_m2 = - svorr_u8_z(pred_op, - svuzp1_u8(shifted_8b_m0, zero_8b), - svuzp2_u8(shifted_8b_m0, zero_8b)); - const svuint8_t shifted_8b_m3 = - svorr_u8_z(pred_op, - svuzp1_u8(shifted_8b_m2, zero_8b), - svuzp2_u8(shifted_8b_m2, zero_8b)); - - // write a finished bitmask - svst1_u8(pred_write, res_u8, shifted_8b_m3); -} - -// generate 64-bit bitmask from 8 serialized 64-bit svbool_t values -void -write_bitmask_64_8x(uint8_t* const __restrict res_u8, - const svbool_t pred_op, - const svbool_t pred_write, - const uint8_t* const __restrict pred_buf) { - // perform parallel pext - // 2048b -> 32 bytes mask -> 256 bytes total, 32 uint64_t values - // 512b -> 8 bytes mask -> 64 bytes total, 4 uint64_t values - // 256b -> 4 bytes mask -> 32 bytes total, 2 uint64_t values - // 128b -> 2 bytes mask -> 16 bytes total, 1 uint64_t values - - // this code does reduction of 64-bit 0b0000000A0000000B... qwords into - // uint8_t values 0bABCDEFGH, then writes ones to the memory - - // we need to operate in uint8_t - const svuint8_t mask_8b = svld1_u8(pred_op, pred_buf); - const svuint64_t shifts_64b = svdup_u64(0x706050403020100ULL); - const svuint8_t shifts_8b = svreinterpret_u8_u64(shifts_64b); - const svuint8_t shifted_8b_m0 = svlsl_u8_z(pred_op, mask_8b, shifts_8b); - - const svuint8_t zero_8b = svdup_n_u8(0); - - const svuint8_t shifted_8b_m1 = - svorr_u8_z(pred_op, - svuzp1_u8(shifted_8b_m0, zero_8b), - svuzp2_u8(shifted_8b_m0, zero_8b)); - const svuint8_t shifted_8b_m2 = - svorr_u8_z(pred_op, - svuzp1_u8(shifted_8b_m1, zero_8b), - svuzp2_u8(shifted_8b_m1, zero_8b)); - const svuint8_t shifted_8b_m3 = - svorr_u8_z(pred_op, - svuzp1_u8(shifted_8b_m2, zero_8b), - svuzp2_u8(shifted_8b_m2, zero_8b)); - - // write a finished bitmask - svst1_u8(pred_write, res_u8, shifted_8b_m3); -} - -/////////////////////////////////////////////////////////////////////////// - // inline svbool_t get_pred_op_8(const size_t n_elements) { - const svbool_t pred_all_8 = svptrue_b8(); - const svuint8_t lanes_8 = svld1_u8(pred_all_8, SVE_LANES_8); - const svuint8_t leftovers_op = svdup_n_u8(n_elements); - const svbool_t pred_op = svcmpgt_u8(pred_all_8, leftovers_op, lanes_8); - return pred_op; + return svwhilelt_b8(uint32_t(0), uint32_t(n_elements)); } // inline svbool_t get_pred_op_16(const size_t n_elements) { - const svbool_t pred_all_16 = svptrue_b16(); - const svuint16_t lanes_16 = svld1_u16(pred_all_16, SVE_LANES_16); - const svuint16_t leftovers_op = svdup_n_u16(n_elements); - const svbool_t pred_op = svcmpgt_u16(pred_all_16, leftovers_op, lanes_16); - return pred_op; + return svwhilelt_b16(uint32_t(0), uint32_t(n_elements)); } // inline svbool_t get_pred_op_32(const size_t n_elements) { - const svbool_t pred_all_32 = svptrue_b32(); - const svuint32_t lanes_32 = svld1_u32(pred_all_32, SVE_LANES_32); - const svuint32_t leftovers_op = svdup_n_u32(n_elements); - const svbool_t pred_op = svcmpgt_u32(pred_all_32, leftovers_op, lanes_32); - return pred_op; + return svwhilelt_b32(uint32_t(0), uint32_t(n_elements)); } // inline svbool_t get_pred_op_64(const size_t n_elements) { - const svbool_t pred_all_64 = svptrue_b64(); - const svuint64_t lanes_64 = svld1_u64(pred_all_64, SVE_LANES_64); - const svuint64_t leftovers_op = svdup_n_u64(n_elements); - const svbool_t pred_op = svcmpgt_u64(pred_all_64, leftovers_op, lanes_64); - return pred_op; + return svwhilelt_b64(uint32_t(0), uint32_t(n_elements)); } // @@ -579,7 +371,7 @@ struct SVEVector { using sve_type = svint8_t; // measured in the number of elements that an SVE register can hold - static inline size_t + static inline uint64_t width() { return svcntb(); } @@ -606,7 +398,7 @@ struct SVEVector { using sve_type = svint16_t; // measured in the number of elements that an SVE register can hold - static inline size_t + static inline uint64_t width() { return svcnth(); } @@ -633,7 +425,7 @@ struct SVEVector { using sve_type = svint32_t; // measured in the number of elements that an SVE register can hold - static inline size_t + static inline uint64_t width() { return svcntw(); } @@ -660,7 +452,7 @@ struct SVEVector { using sve_type = svint64_t; // measured in the number of elements that an SVE register can hold - static inline size_t + static inline uint64_t width() { return svcntd(); } @@ -687,7 +479,7 @@ struct SVEVector { using sve_type = svfloat32_t; // measured in the number of elements that an SVE register can hold - static inline size_t + static inline uint64_t width() { return svcntw(); } @@ -714,7 +506,7 @@ struct SVEVector { using sve_type = svfloat64_t; // measured in the number of elements that an SVE register can hold - static inline size_t + static inline uint64_t width() { return svcntd(); } @@ -737,159 +529,262 @@ struct SVEVector { /////////////////////////////////////////////////////////////////////////// -// an interesting discussion here: -// https://stackoverflow.com/questions/77834169/what-is-a-fast-fallback-algorithm-which-emulates-pdep-and-pext-in-software - -// SVE2 has bitperm, which contains the implementation of pext - -// todo: replace with pext whenever available - -// +// NBYTES is the size of the underlying datatype in bytes. +// So, for example, for i8/u8 use 1, for i64/u64/f64 use 8/ template struct MaskHelper {}; template <> struct MaskHelper<1> { static inline void - write(uint8_t* const __restrict bitmask, - const size_t size, - const svbool_t pred0, - const svbool_t pred1, - const svbool_t pred2, - const svbool_t pred3, - const svbool_t pred4, - const svbool_t pred5, - const svbool_t pred6, - const svbool_t pred7) { - const size_t sve_width = svcntb(); - if (sve_width == 8 * sve_width) { - // perform a full write - *((svbool_t*)(bitmask + 0 * sve_width / 8)) = pred0; - *((svbool_t*)(bitmask + 1 * sve_width / 8)) = pred1; - *((svbool_t*)(bitmask + 2 * sve_width / 8)) = pred2; - *((svbool_t*)(bitmask + 3 * sve_width / 8)) = pred3; - *((svbool_t*)(bitmask + 4 * sve_width / 8)) = pred4; - *((svbool_t*)(bitmask + 5 * sve_width / 8)) = pred5; - *((svbool_t*)(bitmask + 6 * sve_width / 8)) = pred6; - *((svbool_t*)(bitmask + 7 * sve_width / 8)) = pred7; - } else { - // perform a partial write - - // this is the buffer for the maximum possible case of 2048 bits - uint8_t pred_buf[MAX_SVE_WIDTH / 8]; - *((volatile svbool_t*)(pred_buf + 0 * sve_width / 8)) = pred0; - *((volatile svbool_t*)(pred_buf + 1 * sve_width / 8)) = pred1; - *((volatile svbool_t*)(pred_buf + 2 * sve_width / 8)) = pred2; - *((volatile svbool_t*)(pred_buf + 3 * sve_width / 8)) = pred3; - *((volatile svbool_t*)(pred_buf + 4 * sve_width / 8)) = pred4; - *((volatile svbool_t*)(pred_buf + 5 * sve_width / 8)) = pred5; - *((volatile svbool_t*)(pred_buf + 6 * sve_width / 8)) = pred6; - *((volatile svbool_t*)(pred_buf + 7 * sve_width / 8)) = pred7; - - // make the write mask - const svbool_t pred_write = get_pred_op_8(size / 8); - - // load the buffer - const svuint8_t mask_u8 = svld1_u8(pred_write, pred_buf); - // write it to the bitmask - svst1_u8(pred_write, bitmask, mask_u8); - } + write_full(uint8_t* const __restrict bitmask, + const svbool_t pred0, + const svbool_t pred1, + const svbool_t pred2, + const svbool_t pred3, + const svbool_t pred4, + const svbool_t pred5, + const svbool_t pred6, + const svbool_t pred7) { + const uint64_t sve_width = svcntb(); + + // perform a full write + *((svbool_t*)(bitmask + 0 * sve_width / 8)) = pred0; + *((svbool_t*)(bitmask + 1 * sve_width / 8)) = pred1; + *((svbool_t*)(bitmask + 2 * sve_width / 8)) = pred2; + *((svbool_t*)(bitmask + 3 * sve_width / 8)) = pred3; + *((svbool_t*)(bitmask + 4 * sve_width / 8)) = pred4; + *((svbool_t*)(bitmask + 5 * sve_width / 8)) = pred5; + *((svbool_t*)(bitmask + 6 * sve_width / 8)) = pred6; + *((svbool_t*)(bitmask + 7 * sve_width / 8)) = pred7; + } + + static inline void + write_partial(uint8_t* const __restrict bitmask, + const size_t size, + const svbool_t pred_0, + const svbool_t pred_1, + const svbool_t pred_2, + const svbool_t pred_3, + const svbool_t pred_4, + const svbool_t pred_5, + const svbool_t pred_6, + const svbool_t pred_7) { + const uint64_t sve_width = svcntb(); + + // perform a partial write + + // this is a temporary buffer for the maximum possible case of 2048 bits + uint8_t pred_buf[MAX_SVE_WIDTH / 8]; + // write to the temporary buffer + *((volatile svbool_t*)(pred_buf + 0 * sve_width / 8)) = pred_0; + *((volatile svbool_t*)(pred_buf + 1 * sve_width / 8)) = pred_1; + *((volatile svbool_t*)(pred_buf + 2 * sve_width / 8)) = pred_2; + *((volatile svbool_t*)(pred_buf + 3 * sve_width / 8)) = pred_3; + *((volatile svbool_t*)(pred_buf + 4 * sve_width / 8)) = pred_4; + *((volatile svbool_t*)(pred_buf + 5 * sve_width / 8)) = pred_5; + *((volatile svbool_t*)(pred_buf + 6 * sve_width / 8)) = pred_6; + *((volatile svbool_t*)(pred_buf + 7 * sve_width / 8)) = pred_7; + + // make the write mask. (size % 8) == 0 is guaranteed by the caller. + const svbool_t pred_write = + svwhilelt_b8(uint32_t(0), uint32_t(size / 8)); + + // load the buffer + const svuint8_t mask_u8 = svld1_u8(pred_write, pred_buf); + // write it to the bitmask + svst1_u8(pred_write, bitmask, mask_u8); } }; template <> struct MaskHelper<2> { static inline void - write(uint8_t* const __restrict bitmask, - const size_t size, - const svbool_t pred0, - const svbool_t pred1, - const svbool_t pred2, - const svbool_t pred3, - const svbool_t pred4, - const svbool_t pred5, - const svbool_t pred6, - const svbool_t pred7) { - const size_t sve_width = svcnth(); - - // this is the buffer for the maximum possible case of 2048 bits - uint8_t pred_buf[MAX_SVE_WIDTH / 8]; - *((volatile svbool_t*)(pred_buf + 0 * sve_width / 4)) = pred0; - *((volatile svbool_t*)(pred_buf + 1 * sve_width / 4)) = pred1; - *((volatile svbool_t*)(pred_buf + 2 * sve_width / 4)) = pred2; - *((volatile svbool_t*)(pred_buf + 3 * sve_width / 4)) = pred3; - *((volatile svbool_t*)(pred_buf + 4 * sve_width / 4)) = pred4; - *((volatile svbool_t*)(pred_buf + 5 * sve_width / 4)) = pred5; - *((volatile svbool_t*)(pred_buf + 6 * sve_width / 4)) = pred6; - *((volatile svbool_t*)(pred_buf + 7 * sve_width / 4)) = pred7; - - const svbool_t pred_op_8 = get_pred_op_8(size / 4); - const svbool_t pred_write_8 = get_pred_op_8(size / 8); - write_bitmask_16_8x(bitmask, pred_op_8, pred_write_8, pred_buf); + write_full(uint8_t* const __restrict bitmask, + const svbool_t pred_0, + const svbool_t pred_1, + const svbool_t pred_2, + const svbool_t pred_3, + const svbool_t pred_4, + const svbool_t pred_5, + const svbool_t pred_6, + const svbool_t pred_7) { + const uint64_t sve_width = svcntb(); + + // compact predicates + const svbool_t pred_01 = svuzp1_b8(pred_0, pred_1); + const svbool_t pred_23 = svuzp1_b8(pred_2, pred_3); + const svbool_t pred_45 = svuzp1_b8(pred_4, pred_5); + const svbool_t pred_67 = svuzp1_b8(pred_6, pred_7); + + // perform a full write + *((svbool_t*)(bitmask + 0 * sve_width / 8)) = pred_01; + *((svbool_t*)(bitmask + 1 * sve_width / 8)) = pred_23; + *((svbool_t*)(bitmask + 2 * sve_width / 8)) = pred_45; + *((svbool_t*)(bitmask + 3 * sve_width / 8)) = pred_67; + } + + static inline void + write_partial(uint8_t* const __restrict bitmask, + const size_t size, + const svbool_t pred_0, + const svbool_t pred_1, + const svbool_t pred_2, + const svbool_t pred_3, + const svbool_t pred_4, + const svbool_t pred_5, + const svbool_t pred_6, + const svbool_t pred_7) { + const uint64_t sve_width = svcntb(); + + // compact predicates + const svbool_t pred_01 = svuzp1_b8(pred_0, pred_1); + const svbool_t pred_23 = svuzp1_b8(pred_2, pred_3); + const svbool_t pred_45 = svuzp1_b8(pred_4, pred_5); + const svbool_t pred_67 = svuzp1_b8(pred_6, pred_7); + + // this is a temporary buffer for the maximum possible case of 1024 bits + uint8_t pred_buf[MAX_SVE_WIDTH / 16]; + // write to the temporary buffer + *((volatile svbool_t*)(pred_buf + 0 * sve_width / 8)) = pred_01; + *((volatile svbool_t*)(pred_buf + 1 * sve_width / 8)) = pred_23; + *((volatile svbool_t*)(pred_buf + 2 * sve_width / 8)) = pred_45; + *((volatile svbool_t*)(pred_buf + 3 * sve_width / 8)) = pred_67; + + // make the write mask. (size % 8) == 0 is guaranteed by the caller. + const svbool_t pred_write = + svwhilelt_b8(uint32_t(0), uint32_t(size / 8)); + + // load the buffer + const svuint8_t mask_u8 = svld1_u8(pred_write, pred_buf); + // write it to the bitmask + svst1_u8(pred_write, bitmask, mask_u8); } }; template <> struct MaskHelper<4> { static inline void - write(uint8_t* const __restrict bitmask, - const size_t size, - const svbool_t pred0, - const svbool_t pred1, - const svbool_t pred2, - const svbool_t pred3, - const svbool_t pred4, - const svbool_t pred5, - const svbool_t pred6, - const svbool_t pred7) { - const size_t sve_width = svcntw(); - - // this is the buffer for the maximum possible case of 2048 bits - uint8_t pred_buf[MAX_SVE_WIDTH / 8]; - *((volatile svbool_t*)(pred_buf + 0 * sve_width / 2)) = pred0; - *((volatile svbool_t*)(pred_buf + 1 * sve_width / 2)) = pred1; - *((volatile svbool_t*)(pred_buf + 2 * sve_width / 2)) = pred2; - *((volatile svbool_t*)(pred_buf + 3 * sve_width / 2)) = pred3; - *((volatile svbool_t*)(pred_buf + 4 * sve_width / 2)) = pred4; - *((volatile svbool_t*)(pred_buf + 5 * sve_width / 2)) = pred5; - *((volatile svbool_t*)(pred_buf + 6 * sve_width / 2)) = pred6; - *((volatile svbool_t*)(pred_buf + 7 * sve_width / 2)) = pred7; - - const svbool_t pred_op_8 = get_pred_op_8(size / 2); - const svbool_t pred_write_8 = get_pred_op_8(size / 8); - write_bitmask_32_8x(bitmask, pred_op_8, pred_write_8, pred_buf); + write_full(uint8_t* const __restrict bitmask, + const svbool_t pred_0, + const svbool_t pred_1, + const svbool_t pred_2, + const svbool_t pred_3, + const svbool_t pred_4, + const svbool_t pred_5, + const svbool_t pred_6, + const svbool_t pred_7) { + const uint64_t sve_width = svcntb(); + + // compact predicates + const svbool_t pred_01 = svuzp1_b16(pred_0, pred_1); + const svbool_t pred_23 = svuzp1_b16(pred_2, pred_3); + const svbool_t pred_45 = svuzp1_b16(pred_4, pred_5); + const svbool_t pred_67 = svuzp1_b16(pred_6, pred_7); + const svbool_t pred_0123 = svuzp1_b8(pred_01, pred_23); + const svbool_t pred_4567 = svuzp1_b8(pred_45, pred_67); + + // perform a full write + *((svbool_t*)(bitmask + 0 * sve_width / 8)) = pred_0123; + *((svbool_t*)(bitmask + 1 * sve_width / 8)) = pred_4567; + } + + static inline void + write_partial(uint8_t* const __restrict bitmask, + const size_t size, + const svbool_t pred_0, + const svbool_t pred_1, + const svbool_t pred_2, + const svbool_t pred_3, + const svbool_t pred_4, + const svbool_t pred_5, + const svbool_t pred_6, + const svbool_t pred_7) { + const uint64_t sve_width = svcntb(); + + // compact predicates + const svbool_t pred_01 = svuzp1_b16(pred_0, pred_1); + const svbool_t pred_23 = svuzp1_b16(pred_2, pred_3); + const svbool_t pred_45 = svuzp1_b16(pred_4, pred_5); + const svbool_t pred_67 = svuzp1_b16(pred_6, pred_7); + const svbool_t pred_0123 = svuzp1_b8(pred_01, pred_23); + const svbool_t pred_4567 = svuzp1_b8(pred_45, pred_67); + + // this is a temporary buffer for the maximum possible case of 512 bits + uint8_t pred_buf[MAX_SVE_WIDTH / 32]; + // write to the temporary buffer + *((volatile svbool_t*)(pred_buf + 0 * sve_width / 8)) = pred_0123; + *((volatile svbool_t*)(pred_buf + 1 * sve_width / 8)) = pred_4567; + + // make the write mask. (size % 8) == 0 is guaranteed by the caller. + const svbool_t pred_write = + svwhilelt_b8(uint32_t(0), uint32_t(size / 8)); + + // load the buffer + const svuint8_t mask_u8 = svld1_u8(pred_write, pred_buf); + // write it to the bitmask + svst1_u8(pred_write, bitmask, mask_u8); } }; template <> struct MaskHelper<8> { static inline void - write(uint8_t* const __restrict bitmask, - const size_t size, - const svbool_t pred0, - const svbool_t pred1, - const svbool_t pred2, - const svbool_t pred3, - const svbool_t pred4, - const svbool_t pred5, - const svbool_t pred6, - const svbool_t pred7) { - const size_t sve_width = svcntd(); - - // this is the buffer for the maximum possible case of 2048 bits - uint8_t pred_buf[MAX_SVE_WIDTH / 8]; - *((volatile svbool_t*)(pred_buf + 0 * sve_width)) = pred0; - *((volatile svbool_t*)(pred_buf + 1 * sve_width)) = pred1; - *((volatile svbool_t*)(pred_buf + 2 * sve_width)) = pred2; - *((volatile svbool_t*)(pred_buf + 3 * sve_width)) = pred3; - *((volatile svbool_t*)(pred_buf + 4 * sve_width)) = pred4; - *((volatile svbool_t*)(pred_buf + 5 * sve_width)) = pred5; - *((volatile svbool_t*)(pred_buf + 6 * sve_width)) = pred6; - *((volatile svbool_t*)(pred_buf + 7 * sve_width)) = pred7; - - const svbool_t pred_op_8 = get_pred_op_8(size / 1); - const svbool_t pred_write_8 = get_pred_op_8(size / 8); - write_bitmask_64_8x(bitmask, pred_op_8, pred_write_8, pred_buf); + write_full(uint8_t* const __restrict bitmask, + const svbool_t pred_0, + const svbool_t pred_1, + const svbool_t pred_2, + const svbool_t pred_3, + const svbool_t pred_4, + const svbool_t pred_5, + const svbool_t pred_6, + const svbool_t pred_7) { + // compact predicates + const svbool_t pred_01 = svuzp1_b32(pred_0, pred_1); + const svbool_t pred_23 = svuzp1_b32(pred_2, pred_3); + const svbool_t pred_45 = svuzp1_b32(pred_4, pred_5); + const svbool_t pred_67 = svuzp1_b32(pred_6, pred_7); + const svbool_t pred_0123 = svuzp1_b16(pred_01, pred_23); + const svbool_t pred_4567 = svuzp1_b16(pred_45, pred_67); + const svbool_t pred_01234567 = svuzp1_b8(pred_0123, pred_4567); + + // perform a full write + *((svbool_t*)bitmask) = pred_01234567; + } + + static inline void + write_partial(uint8_t* const __restrict bitmask, + const size_t size, + const svbool_t pred_0, + const svbool_t pred_1, + const svbool_t pred_2, + const svbool_t pred_3, + const svbool_t pred_4, + const svbool_t pred_5, + const svbool_t pred_6, + const svbool_t pred_7) { + // compact predicates + const svbool_t pred_01 = svuzp1_b32(pred_0, pred_1); + const svbool_t pred_23 = svuzp1_b32(pred_2, pred_3); + const svbool_t pred_45 = svuzp1_b32(pred_4, pred_5); + const svbool_t pred_67 = svuzp1_b32(pred_6, pred_7); + const svbool_t pred_0123 = svuzp1_b16(pred_01, pred_23); + const svbool_t pred_4567 = svuzp1_b16(pred_45, pred_67); + const svbool_t pred_01234567 = svuzp1_b8(pred_0123, pred_4567); + + // this is a temporary buffer for the maximum possible case of 256 bits + uint8_t pred_buf[MAX_SVE_WIDTH / 64]; + // write to the temporary buffer + *((volatile svbool_t*)(pred_buf)) = pred_01234567; + + // make the write mask. (size % 8) == 0 is guaranteed by the caller. + const svbool_t pred_write = + svwhilelt_b8(uint32_t(0), uint32_t(size / 8)); + + // load the buffer + const svuint8_t mask_u8 = svld1_u8(pred_write, pred_buf); + // write it to the bitmask + svst1_u8(pred_write, bitmask, mask_u8); } }; @@ -924,16 +819,8 @@ op_mask_helper(uint8_t* const __restrict res_u8, const size_t size, Func func) { const svbool_t cmp6 = func(pred_all, i + 6 * sve_width); const svbool_t cmp7 = func(pred_all, i + 7 * sve_width); - MaskHelper::write(res_u8 + i / 8, - sve_width * 8, - cmp0, - cmp1, - cmp2, - cmp3, - cmp4, - cmp5, - cmp6, - cmp7); + MaskHelper::write_full( + res_u8 + i / 8, cmp0, cmp1, cmp2, cmp3, cmp4, cmp5, cmp6, cmp7); } } @@ -985,16 +872,16 @@ op_mask_helper(uint8_t* const __restrict res_u8, const size_t size, Func func) { cmp7 = func(get_partial_pred(7), size_sve8 + 7 * sve_width); } - MaskHelper::write(res_u8 + size_sve8 / 8, - size - size_sve8, - cmp0, - cmp1, - cmp2, - cmp3, - cmp4, - cmp5, - cmp6, - cmp7); + MaskHelper::write_partial(res_u8 + size_sve8 / 8, + size - size_sve8, + cmp0, + cmp1, + cmp2, + cmp3, + cmp4, + cmp5, + cmp6, + cmp7); } return true; From de7901121fad13662edc3d021d848bb393c80fed Mon Sep 17 00:00:00 2001 From: shaoting-huang <167743503+shaoting-huang@users.noreply.github.com> Date: Wed, 22 May 2024 13:21:39 +0800 Subject: [PATCH 027/126] Upgrade go from 1.20 to 1.21 (#33047) Signed-off-by: shaoting-huang [shaoting-huang@zilliz.com] issue: https://github.com/milvus-io/milvus/issues/32982 # Background Go 1.21 introduces several improvements and changes over Go 1.20, which is quite stable now. According to [Go 1.21 Release Notes](https://tip.golang.org/doc/go1.21), the big difference of Go 1.21 is enabling Profile-Guided Optimization by default, which can improve performance by around 2-14%. Here are the summary steps of PGO: 1. Build Initial Binary (Without PGO) 2. Deploying the Production Environment 3. Run the program and collect Performance Analysis Data (CPU pprof) 4. Analyze the Collected Data and Select a Performance Profile for PGO 5. Place the Performance Analysis File in the Main Package Directory and Name It default.pgo 6. go build Detects the default.pgo File and Enables PGO 7. Build and Release the Updated Binary (With PGO) 8. Iterate and Repeat the Above Steps Screenshot 2024-05-14 at 15 57 01 # What does this PR do There are three experiments, search benchmark by Zilliz test platform, search benchmark by open-source [VectorDBBench](https://github.com/zilliztech/VectorDBBench?tab=readme-ov-file), and search benchmark with PGO. We do both search benchmarks by Zilliz test platform and by VectorDBBench to reduce reliance on a single experimental result. Besides, we validate the performance enhancement with PGO. ## Search Benchmark Report by Zilliz Test Platform An upgrade to Go 1.21 was conducted on a Milvus Standalone server, equipped with 16 CPUs and 64GB of memory. The search performance was evaluated using a 1 million entry local dataset with an L2 metric type in a 768-dimensional space. The system was tested for concurrent searches with 50 concurrent tasks for 1 hour, each with a 20-second interval. The reason for using one server rather than two servers to compare is to guarantee the same data source and same segment state after compaction. Test Sequence: 1. Go 1.20 Initial Run: Insert data, build index, load index, and search. 2. Go 1.20 Rebuild: Rebuild the index with the same dataset, load index, and search. 3. Go 1.21 Load: Upload to Go 1.21 within the server. Then load the index from the second run, and search. 4. Go 1.21 Rebuild: Rebuild the index with the same dataset, load index, and search. Search Metrics: | Metric | Go 1.20 | Go 1.20 Rebuild Index | Go 1.21 | Go 1.21 Rebuild Index | |----------------------------|------------------|-----------------|------------------|-----------------| | `search requests` | 10,942,683 | 16,131,726 | 16,200,887 | 16,331,052 | | `search fails` | 0 | 0 | 0 | 0 | | `search RT_avg` (ms) | 16.44 | 11.15 | 11.11 | 11.02 | | `search RT_min` (ms) | 1.30 | 1.28 | 1.31 | 1.26 | | `search RT_max` (ms) | 446.61 | 233.22 | 235.90 | 147.93 | | `search TP50` (ms) | 11.74 | 10.46 | 10.43 | 10.35 | | `search TP99` (ms) | 92.30 | 25.76 | 25.36 | 25.23 | | `search RPS` | 3,039 | 4,481 | 4,500 | 4,536 | ### Key Findings The benchmark tests reveal that the index build time with Go 1.20 at 340.39 ms and Go 1.21 at 337.60 ms demonstrated negligible performance variance in index construction. However, Go 1.21 offers slightly better performance in search operations compared to Go 1.20, with improvements in handling concurrent tasks and reducing response times. ## Search Benchmark Report By VectorDb Bench Follow [VectorDBBench](https://github.com/zilliztech/VectorDBBench?tab=readme-ov-file) to create a VectorDb Bench test for Go 1.20 and Go 1.21. We test the search performance with Go 1.20 and Go 1.21 (without PGO) on the Milvus Standalone system. The tests were conducted using the Cohere dataset with 1 million entries in a 768-dimensional space, utilizing the COSINE metric type. Search Metrics: Metric | Go 1.20 | Go 1.21 without PGO -- | -- | -- Load Duration (seconds) | 1195.95 | 976.37 Queries Per Second (QPS) | 841.62 | 875.89 99th Percentile Serial Latency (seconds) | 0.0047 | 0.0076 Recall | 0.9487 | 0.9489 ### Key Findings Go 1.21 indicates faster index loading times and larger search QPS handling. ## PGO Performance Test Milvus has already added [net/http/pprof](https://pkg.go.dev/net/http/pprof) in the metrics. So we can curl the CPU profile directly by running `curl -o default.pgo "http://${MILVUS_SERVER_IP}:${MILVUS_SERVER_PORT}/debug/pprof/profile?seconds=${TIME_SECOND}"` to collect the profile as the default.pgo during the first search. Then I build Milvus with PGO and use the same index to run the search again. The result is as below: Search Metrics | Metric | Go 1.21 Without PGO | Go 1.21 With PGO | Change (%) | |---------------------------------------------|------------------|-----------------|------------| | `search Requests` | 2,644,583 | 2,837,726 | +7.30% | | `search Fails` | 0 | 0 | N/A | | `search RT_avg` (ms) | 11.34 | 10.57 | -6.78% | | `search RT_min` (ms) | 1.39 | 1.32 | -5.18% | | `search RT_max` (ms) | 349.72 | 143.72 | -58.91% | | `search TP50` (ms) | 10.57 | 9.93 | -6.05% | | `search TP99` (ms) | 26.14 | 24.16 | -7.56% | | `search RPS` | 4,407 | 4,729 | +7.30% | ### Key Findings PGO led to a notable enhancement in search performance, particularly in reducing the maximum response time by 58% and increasing the search QPS by 7.3%. ### Further Analysis Generate a diff flame graphs between two CPU profiles by running `go tool pprof -http=:8000 -diff_base nopgo.pgo pgo.pgo -normalize` goprofiling Further insight of HnswIndexNode and Milvus Search Handler hnsw search_handler After applying PGO to the Milvus server, the CPU utilization of the faiss::fvec_L2 function has decreased. This optimization significantly enhances the performance of the [HnswIndexNode::Search::searchKnn](https://github.com/zilliztech/knowhere/blob/e0c9c41aa22d8f6e6761a0a54460e4573de15bfe/src/index/hnsw/hnsw.cc#L203) method, which is frequently invoked by Knowhere during high-concurrency searches. As the explanation from Go release notes, the function might be more aggressively inlined by Go compiler during the second build with the CPU profiling collected from the first run. As a result, the search handler efficiency within Milvus DataNode has improved, allowing the server to process a higher number of search queries per second (QPS). # Conclusion The combination of Go 1.21 and PGO has led to substantial enhancements in search performance for Milvus server, particularly in terms of search QPS and response times, making it more efficient for handling high-concurrency search operations. Signed-off-by: shaoting-huang --- .golangci.yml | 2 +- DEVELOPMENT.md | 12 ++++++------ Makefile | 15 ++++++++------- README.md | 6 +++--- README_CN.md | 2 +- configs/pgo/default.pgo | 0 go.mod | 2 +- scripts/README.md | 2 +- 8 files changed, 21 insertions(+), 20 deletions(-) create mode 100644 configs/pgo/default.pgo diff --git a/.golangci.yml b/.golangci.yml index 09779daf2548c..91895ce0cc115 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -1,5 +1,5 @@ run: - go: "1.20" + go: "1.21" skip-dirs: - build - configs diff --git a/DEVELOPMENT.md b/DEVELOPMENT.md index cb6493a9fa12f..246cb63810221 100644 --- a/DEVELOPMENT.md +++ b/DEVELOPMENT.md @@ -104,7 +104,7 @@ You can use Vscode to integrate C++ and Go together. Please replace user.setting Linux systems (Recommend Ubuntu 20.04 or later): ```bash -go: >= 1.20 +go: >= 1.21 cmake: >= 3.18 gcc: 7.5 conan: 1.61 @@ -113,7 +113,7 @@ conan: 1.61 MacOS systems with x86_64 (Big Sur 11.5 or later recommended): ```bash -go: >= 1.20 +go: >= 1.21 cmake: >= 3.18 llvm: >= 15 conan: 1.61 @@ -122,7 +122,7 @@ conan: 1.61 MacOS systems with Apple Silicon (Monterey 12.0.1 or later recommended): ```bash -go: >= 1.20 (Arch=ARM64) +go: >= 1.21 (Arch=ARM64) cmake: >= 3.18 llvm: >= 15 conan: 1.61 @@ -178,7 +178,7 @@ Confirm that your `GOPATH` and `GOBIN` environment variables are correctly set a ```shell $ go version ``` -Note: go >= 1.20 is required to build Milvus. +Note: go >= 1.21 is required to build Milvus. #### Docker & Docker Compose @@ -245,8 +245,8 @@ pip3 install conan==1.61.0 #### Install GO 1.80 ```bash -wget https://go.dev/dl/go1.18.10.linux-arm64.tar.gz -tar zxf go1.18.10.linux-arm64.tar.gz +wget https://go.dev/dl/go1.21.10.linux-arm64.tar.gz +tar zxf go1.21.10.linux-arm64.tar.gz mv ./go /usr/local vi /etc/profile export PATH=$PATH:/usr/local/go/bin diff --git a/Makefile b/Makefile index 6ed5b46d19680..9abc97fa6c4b1 100644 --- a/Makefile +++ b/Makefile @@ -17,6 +17,7 @@ OBJPREFIX := "github.com/milvus-io/milvus/cmd/milvus" INSTALL_PATH := $(PWD)/bin LIBRARY_PATH := $(PWD)/lib +PGO_PATH := $(PWD)/configs/pgo OS := $(shell uname -s) mode = Release @@ -72,14 +73,14 @@ milvus: build-cpp print-build-info @echo "Building Milvus ..." @source $(PWD)/scripts/setenv.sh && \ mkdir -p $(INSTALL_PATH) && go env -w CGO_ENABLED="1" && \ - GO111MODULE=on $(GO) build -ldflags="-r $${RPATH} -X '$(OBJPREFIX).BuildTags=$(BUILD_TAGS)' -X '$(OBJPREFIX).BuildTime=$(BUILD_TIME)' -X '$(OBJPREFIX).GitCommit=$(GIT_COMMIT)' -X '$(OBJPREFIX).GoVersion=$(GO_VERSION)'" \ + GO111MODULE=on $(GO) build -pgo=$(PGO_PATH)/default.pgo -ldflags="-r $${RPATH} -X '$(OBJPREFIX).BuildTags=$(BUILD_TAGS)' -X '$(OBJPREFIX).BuildTime=$(BUILD_TIME)' -X '$(OBJPREFIX).GitCommit=$(GIT_COMMIT)' -X '$(OBJPREFIX).GoVersion=$(GO_VERSION)'" \ -tags dynamic -o $(INSTALL_PATH)/milvus $(PWD)/cmd/main.go 1>/dev/null milvus-gpu: build-cpp-gpu print-gpu-build-info @echo "Building Milvus-gpu ..." @source $(PWD)/scripts/setenv.sh && \ mkdir -p $(INSTALL_PATH) && go env -w CGO_ENABLED="1" && \ - GO111MODULE=on $(GO) build -ldflags="-r $${RPATH} -X '$(OBJPREFIX).BuildTags=$(BUILD_TAGS_GPU)' -X '$(OBJPREFIX).BuildTime=$(BUILD_TIME)' -X '$(OBJPREFIX).GitCommit=$(GIT_COMMIT)' -X '$(OBJPREFIX).GoVersion=$(GO_VERSION)'" \ + GO111MODULE=on $(GO) build -pgo=$(PGO_PATH)/default.pgo -ldflags="-r $${RPATH} -X '$(OBJPREFIX).BuildTags=$(BUILD_TAGS_GPU)' -X '$(OBJPREFIX).BuildTime=$(BUILD_TIME)' -X '$(OBJPREFIX).GitCommit=$(GIT_COMMIT)' -X '$(OBJPREFIX).GoVersion=$(GO_VERSION)'" \ -tags dynamic -o $(INSTALL_PATH)/milvus $(PWD)/cmd/main.go 1>/dev/null get-build-deps: @@ -106,7 +107,7 @@ getdeps: tools/bin/revive: tools/check/go.mod cd tools/check; \ - $(GO) build -o ../bin/revive github.com/mgechev/revive + $(GO) build -pgo=$(PGO_PATH)/default.pgo -o ../bin/revive github.com/mgechev/revive cppcheck: @#(env bash ${PWD}/scripts/core_build.sh -l) @@ -169,14 +170,14 @@ binlog: @echo "Building binlog ..." @source $(PWD)/scripts/setenv.sh && \ mkdir -p $(INSTALL_PATH) && go env -w CGO_ENABLED="1" && \ - GO111MODULE=on $(GO) build -ldflags="-r $${RPATH}" -o $(INSTALL_PATH)/binlog $(PWD)/cmd/tools/binlog/main.go 1>/dev/null + GO111MODULE=on $(GO) build -pgo=$(PGO_PATH)/default.pgo -ldflags="-r $${RPATH}" -o $(INSTALL_PATH)/binlog $(PWD)/cmd/tools/binlog/main.go 1>/dev/null MIGRATION_PATH = $(PWD)/cmd/tools/migration meta-migration: @echo "Building migration tool ..." @source $(PWD)/scripts/setenv.sh && \ mkdir -p $(INSTALL_PATH) && go env -w CGO_ENABLED="1" && \ - GO111MODULE=on $(GO) build -ldflags="-r $${RPATH} -X '$(OBJPREFIX).BuildTags=$(BUILD_TAGS)' -X '$(OBJPREFIX).BuildTime=$(BUILD_TIME)' -X '$(OBJPREFIX).GitCommit=$(GIT_COMMIT)' -X '$(OBJPREFIX).GoVersion=$(GO_VERSION)'" \ + GO111MODULE=on $(GO) build -pgo=$(PGO_PATH)/default.pgo -ldflags="-r $${RPATH} -X '$(OBJPREFIX).BuildTags=$(BUILD_TAGS)' -X '$(OBJPREFIX).BuildTime=$(BUILD_TIME)' -X '$(OBJPREFIX).GitCommit=$(GIT_COMMIT)' -X '$(OBJPREFIX).GoVersion=$(GO_VERSION)'" \ -tags dynamic -o $(INSTALL_PATH)/meta-migration $(MIGRATION_PATH)/main.go 1>/dev/null INTERATION_PATH = $(PWD)/tests/integration @@ -371,7 +372,7 @@ clean: milvus-tools: print-build-info @echo "Building tools ..." @mkdir -p $(INSTALL_PATH)/tools && go env -w CGO_ENABLED="1" && GO111MODULE=on $(GO) build \ - -ldflags="-X 'main.BuildTags=$(BUILD_TAGS)' -X 'main.BuildTime=$(BUILD_TIME)' -X 'main.GitCommit=$(GIT_COMMIT)' -X 'main.GoVersion=$(GO_VERSION)'" \ + -pgo=$(PGO_PATH)/default.pgo -ldflags="-X 'main.BuildTags=$(BUILD_TAGS)' -X 'main.BuildTime=$(BUILD_TIME)' -X 'main.GitCommit=$(GIT_COMMIT)' -X 'main.GoVersion=$(GO_VERSION)'" \ -o $(INSTALL_PATH)/tools $(PWD)/cmd/tools/* 1>/dev/null rpm-setup: @@ -520,5 +521,5 @@ mmap-migration: @echo "Building migration tool ..." @source $(PWD)/scripts/setenv.sh && \ mkdir -p $(INSTALL_PATH) && go env -w CGO_ENABLED="1" && \ - GO111MODULE=on $(GO) build -ldflags="-r $${RPATH} -X '$(OBJPREFIX).BuildTags=$(BUILD_TAGS)' -X '$(OBJPREFIX).BuildTime=$(BUILD_TIME)' -X '$(OBJPREFIX).GitCommit=$(GIT_COMMIT)' -X '$(OBJPREFIX).GoVersion=$(GO_VERSION)'" \ + GO111MODULE=on $(GO) build -pgo=$(PGO_PATH)/default.pgo -ldflags="-r $${RPATH} -X '$(OBJPREFIX).BuildTags=$(BUILD_TAGS)' -X '$(OBJPREFIX).BuildTime=$(BUILD_TIME)' -X '$(OBJPREFIX).GitCommit=$(GIT_COMMIT)' -X '$(OBJPREFIX).GoVersion=$(GO_VERSION)'" \ -tags dynamic -o $(INSTALL_PATH)/mmap-migration $(MMAP_MIGRATION_PATH)/main.go 1>/dev/null \ No newline at end of file diff --git a/README.md b/README.md index d1bcc8413b7a0..9e0bf7607554a 100644 --- a/README.md +++ b/README.md @@ -72,21 +72,21 @@ Check the requirements first. Linux systems (Ubuntu 20.04 or later recommended): ```bash -go: >= 1.20 +go: >= 1.21 cmake: >= 3.26.4 gcc: 7.5 ``` MacOS systems with x86_64 (Big Sur 11.5 or later recommended): ```bash -go: >= 1.20 +go: >= 1.21 cmake: >= 3.26.4 llvm: >= 15 ``` MacOS systems with Apple Silicon (Monterey 12.0.1 or later recommended): ```bash -go: >= 1.20 (Arch=ARM64) +go: >= 1.21 (Arch=ARM64) cmake: >= 3.26.4 llvm: >= 15 ``` diff --git a/README_CN.md b/README_CN.md index 2b97a7138535b..26207c0f21fbb 100644 --- a/README_CN.md +++ b/README_CN.md @@ -68,7 +68,7 @@ Milvus 基于 [Apache 2.0 License](https://github.com/milvus-io/milvus/blob/mast 请先安装相关依赖。 ``` -go: 1.20 +go: 1.21 cmake: >=3.18 gcc: 7.5 protobuf: >=3.7 diff --git a/configs/pgo/default.pgo b/configs/pgo/default.pgo new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/go.mod b/go.mod index f50cfb447b574..27e4c5bc06899 100644 --- a/go.mod +++ b/go.mod @@ -1,6 +1,6 @@ module github.com/milvus-io/milvus -go 1.20 +go 1.21 require ( github.com/Azure/azure-sdk-for-go/sdk/azcore v1.7.0 diff --git a/scripts/README.md b/scripts/README.md index 838db6fd5e6db..8cb64fbca7dc4 100644 --- a/scripts/README.md +++ b/scripts/README.md @@ -4,7 +4,7 @@ ``` OS: Ubuntu 20.04 -go:1.20 +go:1.21 cmake: >=3.18 gcc: 7.5 ``` From 13fdaea9f0dcbfe55aba05bfe1f8833e5581898e Mon Sep 17 00:00:00 2001 From: aoiasd <45024769+aoiasd@users.noreply.github.com> Date: Wed, 22 May 2024 14:55:39 +0800 Subject: [PATCH 028/126] fix: accesslog writer cache close cause deadlock (#33261) relate: https://github.com/milvus-io/milvus/issues/33260 Signed-off-by: aoiasd --- internal/proxy/accesslog/writer.go | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/internal/proxy/accesslog/writer.go b/internal/proxy/accesslog/writer.go index b0784bdc2f3e1..5aad0acd6df3d 100644 --- a/internal/proxy/accesslog/writer.go +++ b/internal/proxy/accesslog/writer.go @@ -113,12 +113,15 @@ func (l *CacheWriter) Start() { } func (l *CacheWriter) Close() { - l.mu.Lock() - defer l.mu.Unlock() l.closeOnce.Do(func() { - l.closed = true + // close auto flush close(l.closeCh) l.closeWg.Wait() + + l.mu.Lock() + defer l.mu.Unlock() + l.closed = true + // flush remaining bytes l.writer.Flush() From 3c4df81261ee5e21e8c2f680eaea17fec996a66d Mon Sep 17 00:00:00 2001 From: congqixia Date: Wed, 22 May 2024 15:11:39 +0800 Subject: [PATCH 029/126] enhance: Assert insert data length not overflow int (#33248) When InsertData is too large for cpp proto unmarshalling, the error message is confusing since the length is overflowed This PR adds assertion for insert data length. Signed-off-by: Congqi Xia --- internal/core/src/segcore/segment_c.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/internal/core/src/segcore/segment_c.cpp b/internal/core/src/segcore/segment_c.cpp index df3b8fda7accf..06643ea3f7a34 100644 --- a/internal/core/src/segcore/segment_c.cpp +++ b/internal/core/src/segcore/segment_c.cpp @@ -12,6 +12,7 @@ #include "segcore/segment_c.h" #include +#include #include "common/FieldData.h" #include "common/LoadInfo.h" @@ -239,6 +240,9 @@ Insert(CSegmentInterface c_segment, const uint8_t* data_info, const uint64_t data_info_len) { try { + AssertInfo(data_info_len < std::numeric_limits::max(), + "insert data length ({}) exceeds max int", + data_info_len); auto segment = static_cast(c_segment); auto insert_record_proto = std::make_unique(); From dd0c6d6980a7005363ad20938ade4579cce7502a Mon Sep 17 00:00:00 2001 From: SimFG Date: Wed, 22 May 2024 15:57:39 +0800 Subject: [PATCH 030/126] fix: the panic when db isn't existed in the rate limit interceptor (#33244) issue: #33243 Signed-off-by: SimFG --- internal/proxy/meta_cache_adapter.go | 3 --- internal/proxy/rate_limit_interceptor.go | 14 +++++++---- internal/proxy/rate_limit_interceptor_test.go | 8 +++++- internal/proxy/simple_rate_limiter.go | 9 ++++++- internal/proxy/simple_rate_limiter_test.go | 2 ++ internal/rootcoord/quota_center.go | 25 +++++++++++++++++-- pkg/util/constant.go | 1 + 7 files changed, 50 insertions(+), 12 deletions(-) diff --git a/internal/proxy/meta_cache_adapter.go b/internal/proxy/meta_cache_adapter.go index c72665066f72b..da63272e74a2f 100644 --- a/internal/proxy/meta_cache_adapter.go +++ b/internal/proxy/meta_cache_adapter.go @@ -23,9 +23,7 @@ import ( "github.com/casbin/casbin/v2/model" jsonadapter "github.com/casbin/json-adapter/v2" - "go.uber.org/zap" - "github.com/milvus-io/milvus/pkg/log" "github.com/milvus-io/milvus/pkg/util/merr" ) @@ -51,7 +49,6 @@ func (a *MetaCacheCasbinAdapter) LoadPolicy(model model.Model) error { policyInfo := strings.Join(cache.GetPrivilegeInfo(context.Background()), ",") policy := fmt.Sprintf("[%s]", policyInfo) - log.Ctx(context.Background()).Info("LoddPolicy update policyinfo", zap.String("policyInfo", policy)) byteSource := []byte(policy) jAdapter := jsonadapter.NewAdapter(&byteSource) return jAdapter.LoadPolicy(model) diff --git a/internal/proxy/rate_limit_interceptor.go b/internal/proxy/rate_limit_interceptor.go index 14ac320334495..61fe932e78bd9 100644 --- a/internal/proxy/rate_limit_interceptor.go +++ b/internal/proxy/rate_limit_interceptor.go @@ -31,6 +31,7 @@ import ( "github.com/milvus-io/milvus/internal/types" "github.com/milvus-io/milvus/pkg/log" "github.com/milvus-io/milvus/pkg/metrics" + "github.com/milvus-io/milvus/pkg/util" "github.com/milvus-io/milvus/pkg/util/merr" "github.com/milvus-io/milvus/pkg/util/paramtable" "github.com/milvus-io/milvus/pkg/util/requestutil" @@ -119,6 +120,9 @@ func getCollectionAndPartitionIDs(ctx context.Context, r reqPartNames) (int64, m func getCollectionID(r reqCollName) (int64, map[int64][]int64) { db, _ := globalMetaCache.GetDatabaseInfo(context.TODO(), r.GetDbName()) + if db == nil { + return util.InvalidDBID, map[int64][]int64{} + } collectionID, _ := globalMetaCache.GetCollectionID(context.TODO(), r.GetDbName(), r.GetCollectionName()) return db.dbID, map[int64][]int64{collectionID: {}} } @@ -177,14 +181,14 @@ func getRequestInfo(ctx context.Context, req interface{}) (int64, map[int64][]in case *milvuspb.FlushRequest: db, err := globalMetaCache.GetDatabaseInfo(ctx, r.GetDbName()) if err != nil { - return 0, map[int64][]int64{}, 0, 0, err + return util.InvalidDBID, map[int64][]int64{}, 0, 0, err } collToPartIDs := make(map[int64][]int64, 0) for _, collectionName := range r.GetCollectionNames() { collectionID, err := globalMetaCache.GetCollectionID(ctx, r.GetDbName(), collectionName) if err != nil { - return 0, map[int64][]int64{}, 0, 0, err + return util.InvalidDBID, map[int64][]int64{}, 0, 0, err } collToPartIDs[collectionID] = []int64{} } @@ -193,16 +197,16 @@ func getRequestInfo(ctx context.Context, req interface{}) (int64, map[int64][]in dbName := GetCurDBNameFromContextOrDefault(ctx) dbInfo, err := globalMetaCache.GetDatabaseInfo(ctx, dbName) if err != nil { - return 0, map[int64][]int64{}, 0, 0, err + return util.InvalidDBID, map[int64][]int64{}, 0, 0, err } return dbInfo.dbID, map[int64][]int64{ r.GetCollectionID(): {}, }, internalpb.RateType_DDLCompaction, 1, nil default: // TODO: support more request if req == nil { - return 0, map[int64][]int64{}, 0, 0, fmt.Errorf("null request") + return util.InvalidDBID, map[int64][]int64{}, 0, 0, fmt.Errorf("null request") } - return 0, map[int64][]int64{}, 0, 0, fmt.Errorf("unsupported request type %s", reflect.TypeOf(req).Name()) + return util.InvalidDBID, map[int64][]int64{}, 0, 0, fmt.Errorf("unsupported request type %s", reflect.TypeOf(req).Name()) } } diff --git a/internal/proxy/rate_limit_interceptor_test.go b/internal/proxy/rate_limit_interceptor_test.go index cfea05d30b458..5440123da630a 100644 --- a/internal/proxy/rate_limit_interceptor_test.go +++ b/internal/proxy/rate_limit_interceptor_test.go @@ -29,6 +29,7 @@ import ( "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" "github.com/milvus-io/milvus-proto/go-api/v2/milvuspb" "github.com/milvus-io/milvus/internal/proto/internalpb" + "github.com/milvus-io/milvus/pkg/util" "github.com/milvus-io/milvus/pkg/util/merr" ) @@ -367,7 +368,7 @@ func TestGetInfo(t *testing.T) { }() t.Run("fail to get database", func(t *testing.T) { - mockCache.EXPECT().GetDatabaseInfo(mock.Anything, mock.Anything).Return(nil, errors.New("mock error: get database info")).Times(4) + mockCache.EXPECT().GetDatabaseInfo(mock.Anything, mock.Anything).Return(nil, errors.New("mock error: get database info")).Times(5) { _, _, err := getCollectionAndPartitionID(ctx, &milvuspb.InsertRequest{ DbName: "foo", @@ -394,6 +395,11 @@ func TestGetInfo(t *testing.T) { _, _, _, _, err := getRequestInfo(ctx, &milvuspb.ManualCompactionRequest{}) assert.Error(t, err) } + { + dbID, collectionIDInfos := getCollectionID(&milvuspb.CreateCollectionRequest{}) + assert.Equal(t, util.InvalidDBID, dbID) + assert.Equal(t, 0, len(collectionIDInfos)) + } }) t.Run("fail to get collection", func(t *testing.T) { diff --git a/internal/proxy/simple_rate_limiter.go b/internal/proxy/simple_rate_limiter.go index b6652e62791ed..0a6b721c46b94 100644 --- a/internal/proxy/simple_rate_limiter.go +++ b/internal/proxy/simple_rate_limiter.go @@ -32,6 +32,7 @@ import ( rlinternal "github.com/milvus-io/milvus/internal/util/ratelimitutil" "github.com/milvus-io/milvus/pkg/log" "github.com/milvus-io/milvus/pkg/metrics" + "github.com/milvus-io/milvus/pkg/util" "github.com/milvus-io/milvus/pkg/util/paramtable" "github.com/milvus-io/milvus/pkg/util/ratelimitutil" "github.com/milvus-io/milvus/pkg/util/typeutil" @@ -79,7 +80,7 @@ func (m *SimpleLimiter) Check(dbID int64, collectionIDToPartIDs map[int64][]int6 } // 2. check database level rate limits - if ret == nil { + if ret == nil && dbID != util.InvalidDBID { dbRateLimiters := m.rateLimiter.GetOrCreateDatabaseLimiters(dbID, newDatabaseLimiter) ret = dbRateLimiters.Check(rt, n) if ret != nil { @@ -92,6 +93,9 @@ func (m *SimpleLimiter) Check(dbID int64, collectionIDToPartIDs map[int64][]int6 // 3. check collection level rate limits if ret == nil && len(collectionIDToPartIDs) > 0 && !isNotCollectionLevelLimitRequest(rt) { for collectionID := range collectionIDToPartIDs { + if collectionID == 0 || dbID == util.InvalidDBID { + continue + } // only dml and dql have collection level rate limits collectionRateLimiters := m.rateLimiter.GetOrCreateCollectionLimiters(dbID, collectionID, newDatabaseLimiter, newCollectionLimiters) @@ -108,6 +112,9 @@ func (m *SimpleLimiter) Check(dbID int64, collectionIDToPartIDs map[int64][]int6 if ret == nil && len(collectionIDToPartIDs) > 0 { for collectionID, partitionIDs := range collectionIDToPartIDs { for _, partID := range partitionIDs { + if collectionID == 0 || partID == 0 || dbID == util.InvalidDBID { + continue + } partitionRateLimiters := m.rateLimiter.GetOrCreatePartitionLimiters(dbID, collectionID, partID, newDatabaseLimiter, newCollectionLimiters, newPartitionLimiters) ret = partitionRateLimiters.Check(rt, n) diff --git a/internal/proxy/simple_rate_limiter_test.go b/internal/proxy/simple_rate_limiter_test.go index 178c536beec51..c19253c3dbc40 100644 --- a/internal/proxy/simple_rate_limiter_test.go +++ b/internal/proxy/simple_rate_limiter_test.go @@ -87,9 +87,11 @@ func TestSimpleRateLimiter(t *testing.T) { clusterRateLimiters := simpleLimiter.rateLimiter.GetRootLimiters() collectionIDToPartIDs := map[int64][]int64{ + 0: {}, 1: {}, 2: {}, 3: {}, + 4: {0}, } for i := 1; i <= 3; i++ { diff --git a/internal/rootcoord/quota_center.go b/internal/rootcoord/quota_center.go index 539125a8cba24..ae12835d29e90 100644 --- a/internal/rootcoord/quota_center.go +++ b/internal/rootcoord/quota_center.go @@ -433,9 +433,15 @@ func (q *QuotaCenter) collectMetrics() error { } } + datacoordQuotaCollections := make([]int64, 0) q.diskMu.Lock() if dataCoordTopology.Cluster.Self.QuotaMetrics != nil { q.dataCoordMetrics = dataCoordTopology.Cluster.Self.QuotaMetrics + for _, metricCollections := range q.dataCoordMetrics.PartitionsBinlogSize { + for metricCollection := range metricCollections { + datacoordQuotaCollections = append(datacoordQuotaCollections, metricCollection) + } + } } q.diskMu.Unlock() @@ -447,7 +453,6 @@ func (q *QuotaCenter) collectMetrics() error { } var rangeErr error collections.Range(func(collectionID int64) bool { - var coll *model.Collection coll, getErr := q.meta.GetCollectionByIDWithMaxTs(context.TODO(), collectionID) if getErr != nil { rangeErr = getErr @@ -482,7 +487,23 @@ func (q *QuotaCenter) collectMetrics() error { } return true }) - return rangeErr + if rangeErr != nil { + return rangeErr + } + for _, collectionID := range datacoordQuotaCollections { + _, ok := q.collectionIDToDBID.Get(collectionID) + if ok { + continue + } + coll, getErr := q.meta.GetCollectionByIDWithMaxTs(context.TODO(), collectionID) + if getErr != nil { + return getErr + } + q.collectionIDToDBID.Insert(collectionID, coll.DBID) + q.collections.Insert(FormatCollectionKey(coll.DBID, coll.Name), collectionID) + } + + return nil }) // get Proxies metrics group.Go(func() error { diff --git a/pkg/util/constant.go b/pkg/util/constant.go index 36e52d83dce45..a416affa97564 100644 --- a/pkg/util/constant.go +++ b/pkg/util/constant.go @@ -56,6 +56,7 @@ const ( DefaultDBName = "default" DefaultDBID = int64(1) NonDBID = int64(0) + InvalidDBID = int64(-1) PrivilegeWord = "Privilege" AnyWord = "*" From b9b6343c8879f446ac2581a364c14f40a02783c4 Mon Sep 17 00:00:00 2001 From: SimFG Date: Wed, 22 May 2024 16:03:40 +0800 Subject: [PATCH 031/126] enhance: check the auth in some rest v2 api (#33256) /kind improvement link master proto: https://github.com/milvus-io/milvus-proto/blob/master/proto/milvus.proto Signed-off-by: SimFG --- .../distributed/proxy/httpserver/handler_v2.go | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/internal/distributed/proxy/httpserver/handler_v2.go b/internal/distributed/proxy/httpserver/handler_v2.go index 773aa32b984cc..2badb5957d89d 100644 --- a/internal/distributed/proxy/httpserver/handler_v2.go +++ b/internal/distributed/proxy/httpserver/handler_v2.go @@ -340,7 +340,7 @@ func (h *HandlersV2) getCollectionDetails(ctx context.Context, c *gin.Context, a DbName: dbName, CollectionName: collectionName, } - resp, err := wrapperProxy(ctx, c, req, false, false, func(reqCtx context.Context, req any) (any, error) { + resp, err := wrapperProxy(ctx, c, req, h.checkAuth, false, func(reqCtx context.Context, req any) (any, error) { return h.proxy.DescribeCollection(reqCtx, req.(*milvuspb.DescribeCollectionRequest)) }) if err != nil { @@ -1601,7 +1601,7 @@ func (h *HandlersV2) listIndexes(ctx context.Context, c *gin.Context, anyReq any DbName: dbName, CollectionName: collectionGetter.GetCollectionName(), } - resp, err := wrapperProxy(ctx, c, req, false, false, func(reqCtx context.Context, req any) (any, error) { + resp, err := wrapperProxy(ctx, c, req, h.checkAuth, false, func(reqCtx context.Context, req any) (any, error) { resp, err := h.proxy.DescribeIndex(reqCtx, req.(*milvuspb.DescribeIndexRequest)) if errors.Is(err, merr.ErrIndexNotFound) { return &milvuspb.DescribeIndexResponse{ @@ -1633,7 +1633,7 @@ func (h *HandlersV2) describeIndex(ctx context.Context, c *gin.Context, anyReq a CollectionName: collectionGetter.GetCollectionName(), IndexName: indexGetter.GetIndexName(), } - resp, err := wrapperProxy(ctx, c, req, false, false, func(reqCtx context.Context, req any) (interface{}, error) { + resp, err := wrapperProxy(ctx, c, req, h.checkAuth, false, func(reqCtx context.Context, req any) (interface{}, error) { return h.proxy.DescribeIndex(reqCtx, req.(*milvuspb.DescribeIndexRequest)) }) if err == nil { @@ -1681,7 +1681,7 @@ func (h *HandlersV2) createIndex(ctx context.Context, c *gin.Context, anyReq any for key, value := range indexParam.Params { req.ExtraParams = append(req.ExtraParams, &commonpb.KeyValuePair{Key: key, Value: fmt.Sprintf("%v", value)}) } - resp, err := wrapperProxy(ctx, c, req, false, false, func(reqCtx context.Context, req any) (interface{}, error) { + resp, err := wrapperProxy(ctx, c, req, h.checkAuth, false, func(reqCtx context.Context, req any) (interface{}, error) { return h.proxy.CreateIndex(reqCtx, req.(*milvuspb.CreateIndexRequest)) }) if err != nil { @@ -1700,7 +1700,7 @@ func (h *HandlersV2) dropIndex(ctx context.Context, c *gin.Context, anyReq any, CollectionName: collGetter.GetCollectionName(), IndexName: indexGetter.GetIndexName(), } - resp, err := wrapperProxy(ctx, c, req, false, false, func(reqCtx context.Context, req any) (interface{}, error) { + resp, err := wrapperProxy(ctx, c, req, h.checkAuth, false, func(reqCtx context.Context, req any) (interface{}, error) { return h.proxy.DropIndex(reqCtx, req.(*milvuspb.DropIndexRequest)) }) if err == nil { @@ -1752,7 +1752,7 @@ func (h *HandlersV2) createAlias(ctx context.Context, c *gin.Context, anyReq any CollectionName: collectionGetter.GetCollectionName(), Alias: aliasGetter.GetAliasName(), } - resp, err := wrapperProxy(ctx, c, req, false, false, func(reqCtx context.Context, req any) (interface{}, error) { + resp, err := wrapperProxy(ctx, c, req, h.checkAuth, false, func(reqCtx context.Context, req any) (interface{}, error) { return h.proxy.CreateAlias(reqCtx, req.(*milvuspb.CreateAliasRequest)) }) if err == nil { @@ -1767,7 +1767,7 @@ func (h *HandlersV2) dropAlias(ctx context.Context, c *gin.Context, anyReq any, DbName: dbName, Alias: getter.GetAliasName(), } - resp, err := wrapperProxy(ctx, c, req, false, false, func(reqCtx context.Context, req any) (interface{}, error) { + resp, err := wrapperProxy(ctx, c, req, h.checkAuth, false, func(reqCtx context.Context, req any) (interface{}, error) { return h.proxy.DropAlias(reqCtx, req.(*milvuspb.DropAliasRequest)) }) if err == nil { @@ -1784,7 +1784,7 @@ func (h *HandlersV2) alterAlias(ctx context.Context, c *gin.Context, anyReq any, CollectionName: collectionGetter.GetCollectionName(), Alias: aliasGetter.GetAliasName(), } - resp, err := wrapperProxy(ctx, c, req, false, false, func(reqCtx context.Context, req any) (interface{}, error) { + resp, err := wrapperProxy(ctx, c, req, h.checkAuth, false, func(reqCtx context.Context, req any) (interface{}, error) { return h.proxy.AlterAlias(reqCtx, req.(*milvuspb.AlterAliasRequest)) }) if err == nil { From aef33351b63cada54af9a00b1408596f30fb0c5e Mon Sep 17 00:00:00 2001 From: sre-ci-robot <56469371+sre-ci-robot@users.noreply.github.com> Date: Wed, 22 May 2024 16:33:48 +0800 Subject: [PATCH 032/126] [automated] Update gpu Builder image changes (#33192) Update gpu Builder image changes See changes: https://github.com/milvus-io/milvus/commit/c35eaaa3581b2a4e00e49a8517e846d4f2479daa Signed-off-by: sre-ci-robot sre-ci-robot@users.noreply.github.com Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> --- .env | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.env b/.env index 96cd6e27ed183..33b1d77e8567e 100644 --- a/.env +++ b/.env @@ -9,8 +9,8 @@ DATE_VERSION=20240429-6289f3a LATEST_DATE_VERSION=20240429-6289f3a # for services.gpubuilder.image in docker-compose.yml -GPU_DATE_VERSION=20240409-08bfb43 -LATEST_GPU_DATE_VERSION=20240409-08bfb43 +GPU_DATE_VERSION=20240520-c35eaaa +LATEST_GPU_DATE_VERSION=20240520-c35eaaa # for other services in docker-compose.yml MINIO_ADDRESS=minio:9000 From fc765c6a727fed50082d0d23577adfdb450791d9 Mon Sep 17 00:00:00 2001 From: sre-ci-robot <56469371+sre-ci-robot@users.noreply.github.com> Date: Wed, 22 May 2024 16:35:39 +0800 Subject: [PATCH 033/126] [automated] Update cpu Builder image changes (#33202) Update cpu Builder image changes See changes: https://github.com/milvus-io/milvus/commit/d27db996973f8ef5c4080f6d66173e43b5c7355a Signed-off-by: sre-ci-robot sre-ci-robot@users.noreply.github.com Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> --- .env | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.env b/.env index 33b1d77e8567e..6beb24525c5e1 100644 --- a/.env +++ b/.env @@ -5,8 +5,8 @@ IMAGE_ARCH=amd64 OS_NAME=ubuntu20.04 # for services.builder.image in docker-compose.yml -DATE_VERSION=20240429-6289f3a -LATEST_DATE_VERSION=20240429-6289f3a +DATE_VERSION=20240520-d27db99 +LATEST_DATE_VERSION=20240520-d27db99 # for services.gpubuilder.image in docker-compose.yml GPU_DATE_VERSION=20240520-c35eaaa From 310bfe71c23eefa7f5565c70a0c06812a226919f Mon Sep 17 00:00:00 2001 From: "sammy.huang" Date: Wed, 22 May 2024 17:08:29 +0800 Subject: [PATCH 034/126] fix: arm-based gpu image (#33275) Signed-off-by: Liang Huang --- ci/jenkins/PublishArmBasedGPUImages.groovy | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/ci/jenkins/PublishArmBasedGPUImages.groovy b/ci/jenkins/PublishArmBasedGPUImages.groovy index 16245b8661b8c..4540c6f45c3e9 100644 --- a/ci/jenkins/PublishArmBasedGPUImages.groovy +++ b/ci/jenkins/PublishArmBasedGPUImages.groovy @@ -33,6 +33,13 @@ pipeline { steps { script { + sh """ + git config --global --add safe.directory /home/jenkins/agent/workspace + """ + + def date = sh(returnStdout: true, script: 'date +%Y%m%d').trim() + def gitShortCommit = sh(returnStdout: true, script: 'git rev-parse --short HEAD').trim() + sh """ set -a # automatically export all variables from .env . .env @@ -41,9 +48,6 @@ pipeline { docker run --net=host -v \$(pwd):/root/milvus -v /root/.conan:/root/.conan -w /root/milvus milvusdb/milvus-env:gpu-ubuntu22.04-\${GPU_DATE_VERSION} sh -c "make clean && make gpu-install" """ - def date = sh(returnStdout: true, script: 'date +%Y%m%d').trim() - def gitShortCommit = sh(returnStdout: true, script: 'git rev-parse --short HEAD').trim() - withCredentials([usernamePassword(credentialsId: "${env.DOCKER_CREDENTIALS_ID}", usernameVariable: 'DOCKER_USERNAME', passwordVariable: 'DOCKER_PASSWORD')]) { sh 'docker login -u ${DOCKER_USERNAME} -p ${DOCKER_PASSWORD}' sh """ From 39f56678a02bcf4485334224b3375663d32925b8 Mon Sep 17 00:00:00 2001 From: wei liu Date: Wed, 22 May 2024 19:11:40 +0800 Subject: [PATCH 035/126] enhance: Reduce bloom filter lock contention between insert and delete in query coord (#32643) issue: #32530 cause ProcessDelete need to check whether pk exist in bloom filter, and ProcessInsert need to update pk to bloom filter, when execute ProcessInsert and ProcessDelete in parallel, it will cause race condition in segment's bloom filter This PR execute ProcessInsert and ProcessDelete in serial to avoid block each other Signed-off-by: Wei Liu --- internal/querynodev2/server.go | 2 +- internal/util/pipeline/node.go | 59 +------------------ internal/util/pipeline/pipeline.go | 37 +++++++++--- internal/util/pipeline/pipeline_test.go | 32 +++++++--- internal/util/pipeline/stream_pipeline.go | 9 ++- .../util/pipeline/stream_pipeline_test.go | 4 +- 6 files changed, 67 insertions(+), 76 deletions(-) diff --git a/internal/querynodev2/server.go b/internal/querynodev2/server.go index 326da3ef8daff..d142d72f2b5af 100644 --- a/internal/querynodev2/server.go +++ b/internal/querynodev2/server.go @@ -461,7 +461,7 @@ func (node *QueryNode) Stop() error { case <-time.After(time.Second): metrics.StoppingBalanceSegmentNum.WithLabelValues(fmt.Sprint(node.GetNodeID())).Set(float64(len(sealedSegments))) metrics.StoppingBalanceChannelNum.WithLabelValues(fmt.Sprint(node.GetNodeID())).Set(float64(channelNum)) - log.Info("migrate data...", zap.Int64("ServerID", paramtable.GetNodeID()), + log.Info("migrate data...", zap.Int64("ServerID", node.GetNodeID()), zap.Int64s("sealedSegments", lo.Map(sealedSegments, func(s segments.Segment, i int) int64 { return s.ID() })), diff --git a/internal/util/pipeline/node.go b/internal/util/pipeline/node.go index ad42e6318fe51..fe16397dceabf 100644 --- a/internal/util/pipeline/node.go +++ b/internal/util/pipeline/node.go @@ -17,12 +17,6 @@ package pipeline import ( - "fmt" - "sync" - - "go.uber.org/zap" - - "github.com/milvus-io/milvus/pkg/log" "github.com/milvus-io/milvus/pkg/util/timerecord" ) @@ -35,63 +29,16 @@ type Node interface { } type nodeCtx struct { - node Node - + node Node inputChannel chan Msg - - next *nodeCtx - checker *timerecord.GroupChecker - - closeCh chan struct{} // notify work to exit - closeWg sync.WaitGroup -} - -func (c *nodeCtx) Start() { - c.closeWg.Add(1) - c.node.Start() - go c.work() -} - -func (c *nodeCtx) Close() { - close(c.closeCh) - c.closeWg.Wait() -} - -func (c *nodeCtx) work() { - defer c.closeWg.Done() - name := fmt.Sprintf("nodeCtxTtChecker-%s", c.node.Name()) - if c.checker != nil { - c.checker.Check(name) - defer c.checker.Remove(name) - } - - for { - select { - // close - case <-c.closeCh: - c.node.Close() - close(c.inputChannel) - log.Debug("pipeline node closed", zap.String("nodeName", c.node.Name())) - return - case input := <-c.inputChannel: - var output Msg - output = c.node.Operate(input) - if c.checker != nil { - c.checker.Check(name) - } - if c.next != nil && output != nil { - c.next.inputChannel <- output - } - } - } + next *nodeCtx + checker *timerecord.GroupChecker } func newNodeCtx(node Node) *nodeCtx { return &nodeCtx{ node: node, inputChannel: make(chan Msg, node.MaxQueueLength()), - closeCh: make(chan struct{}), - closeWg: sync.WaitGroup{}, } } diff --git a/internal/util/pipeline/pipeline.go b/internal/util/pipeline/pipeline.go index 047bf65f48714..61212f4581992 100644 --- a/internal/util/pipeline/pipeline.go +++ b/internal/util/pipeline/pipeline.go @@ -17,6 +17,7 @@ package pipeline import ( + "fmt" "time" "go.uber.org/zap" @@ -36,6 +37,8 @@ type pipeline struct { inputChannel chan Msg nodeTtInterval time.Duration enableTtChecker bool + + checkerNames map[string]string } func (p *pipeline) Add(nodes ...Node) { @@ -50,6 +53,10 @@ func (p *pipeline) addNode(node Node) { nodeCtx.checker = timerecord.GetGroupChecker("fgNode", p.nodeTtInterval, func(list []string) { log.Warn("some node(s) haven't received input", zap.Strings("list", list), zap.Duration("duration ", p.nodeTtInterval)) }) + if p.checkerNames == nil { + p.checkerNames = make(map[string]string) + } + p.checkerNames[nodeCtx.node.Name()] = fmt.Sprintf("nodeCtxTtChecker-%s", nodeCtx.node.Name()) } if len(p.nodes) != 0 { @@ -62,17 +69,31 @@ func (p *pipeline) addNode(node Node) { } func (p *pipeline) Start() error { - if len(p.nodes) == 0 { - return ErrEmptyPipeline - } - for _, node := range p.nodes { - node.Start() - } return nil } func (p *pipeline) Close() { - for _, node := range p.nodes { - node.Close() +} + +func (p *pipeline) process() { + if len(p.nodes) == 0 { + return + } + + curNode := p.nodes[0] + for curNode != nil { + if len(curNode.inputChannel) == 0 { + break + } + + input := <-curNode.inputChannel + output := curNode.node.Operate(input) + if _, ok := p.checkerNames[curNode.node.Name()]; ok { + curNode.checker.Check(p.checkerNames[curNode.node.Name()]) + } + if curNode.next != nil && output != nil { + curNode.next.inputChannel <- output + } + curNode = curNode.next } } diff --git a/internal/util/pipeline/pipeline_test.go b/internal/util/pipeline/pipeline_test.go index 8ddeb9c35534a..909893d458961 100644 --- a/internal/util/pipeline/pipeline_test.go +++ b/internal/util/pipeline/pipeline_test.go @@ -31,8 +31,9 @@ type testNode struct { func (t *testNode) Operate(in Msg) Msg { msg := in.(*msgstream.MsgPack) - msg.BeginTs++ - t.outChannel <- msg.BeginTs + if t.outChannel != nil { + t.outChannel <- msg.BeginTs + } return msg } @@ -43,7 +44,7 @@ type PipelineSuite struct { } func (suite *PipelineSuite) SetupTest() { - suite.outChannel = make(chan msgstream.Timestamp) + suite.outChannel = make(chan msgstream.Timestamp, 1) suite.pipeline = &pipeline{ nodes: []*nodeCtx{}, nodeTtInterval: 0, @@ -52,7 +53,21 @@ func (suite *PipelineSuite) SetupTest() { suite.pipeline.addNode(&testNode{ BaseNode: &BaseNode{ - name: "test-node", + name: "test-node1", + maxQueueLength: 8, + }, + }) + + suite.pipeline.addNode(&testNode{ + BaseNode: &BaseNode{ + name: "test-node2", + maxQueueLength: 8, + }, + }) + + suite.pipeline.addNode(&testNode{ + BaseNode: &BaseNode{ + name: "test-node3", maxQueueLength: 8, }, outChannel: suite.outChannel, @@ -62,10 +77,13 @@ func (suite *PipelineSuite) SetupTest() { func (suite *PipelineSuite) TestBasic() { suite.pipeline.Start() defer suite.pipeline.Close() - suite.pipeline.inputChannel <- &msgstream.MsgPack{} - output := <-suite.outChannel - suite.Equal(msgstream.Timestamp(1), output) + for i := 0; i < 100; i++ { + suite.pipeline.inputChannel <- &msgstream.MsgPack{BeginTs: msgstream.Timestamp(i)} + suite.pipeline.process() + output := <-suite.outChannel + suite.Equal(i, int(output)) + } } func TestPipeline(t *testing.T) { diff --git a/internal/util/pipeline/stream_pipeline.go b/internal/util/pipeline/stream_pipeline.go index 6cb6b6900e04e..3c22c6e99fec6 100644 --- a/internal/util/pipeline/stream_pipeline.go +++ b/internal/util/pipeline/stream_pipeline.go @@ -37,7 +37,7 @@ type StreamPipeline interface { } type streamPipeline struct { - *pipeline + pipeline *pipeline input <-chan *msgstream.MsgPack dispatcher msgdispatcher.Client startOnce sync.Once @@ -57,7 +57,8 @@ func (p *streamPipeline) work() { return case msg := <-p.input: log.RatedDebug(10, "stream pipeline fetch msg", zap.Int("sum", len(msg.Msgs))) - p.nodes[0].inputChannel <- msg + p.pipeline.inputChannel <- msg + p.pipeline.process() } } } @@ -86,6 +87,10 @@ func (p *streamPipeline) ConsumeMsgStream(position *msgpb.MsgPosition) error { return nil } +func (p *streamPipeline) Add(nodes ...Node) { + p.pipeline.Add(nodes...) +} + func (p *streamPipeline) Start() error { var err error p.startOnce.Do(func() { diff --git a/internal/util/pipeline/stream_pipeline_test.go b/internal/util/pipeline/stream_pipeline_test.go index 7bf28a5a0c351..a731a18ff34b8 100644 --- a/internal/util/pipeline/stream_pipeline_test.go +++ b/internal/util/pipeline/stream_pipeline_test.go @@ -68,11 +68,11 @@ func (suite *StreamPipelineSuite) TestBasic() { suite.pipeline.Start() defer suite.pipeline.Close() - suite.inChannel <- &msgstream.MsgPack{} + suite.inChannel <- &msgstream.MsgPack{BeginTs: 1001} for i := 1; i <= suite.length; i++ { output := <-suite.outChannel - suite.Equal(msgstream.Timestamp(i), output) + suite.Equal(int64(1001), int64(output)) } } From 33144a43d44432073344586bda19ddf08536de44 Mon Sep 17 00:00:00 2001 From: congqixia Date: Wed, 22 May 2024 19:15:40 +0800 Subject: [PATCH 036/126] enhance: Support Row-based insert for milvusclient (#33270) See also #31293 Signed-off-by: Congqi Xia --- client/collection.go | 6 +- client/collection_options.go | 4 + client/entity/schema.go | 14 ++ client/entity/sparse.go | 2 +- client/row/data.go | 332 +++++++++++++++++++++++++++++++++++ client/row/data_test.go | 174 ++++++++++++++++++ client/row/schema.go | 185 +++++++++++++++++++ client/row/schema_test.go | 213 ++++++++++++++++++++++ client/write_options.go | 57 +++++- 9 files changed, 977 insertions(+), 10 deletions(-) create mode 100644 client/row/data.go create mode 100644 client/row/data_test.go create mode 100644 client/row/schema.go create mode 100644 client/row/schema_test.go diff --git a/client/collection.go b/client/collection.go index 039ff2460d64c..4031c687d9993 100644 --- a/client/collection.go +++ b/client/collection.go @@ -62,10 +62,6 @@ func (c *Client) CreateCollection(ctx context.Context, option CreateCollectionOp return nil } -type ListCollectionOption interface { - Request() *milvuspb.ShowCollectionsRequest -} - func (c *Client) ListCollections(ctx context.Context, option ListCollectionOption, callOptions ...grpc.CallOption) (collectionNames []string, err error) { req := option.Request() err = c.callService(func(milvusService milvuspb.MilvusServiceClient) error { @@ -82,7 +78,7 @@ func (c *Client) ListCollections(ctx context.Context, option ListCollectionOptio return collectionNames, err } -func (c *Client) DescribeCollection(ctx context.Context, option *describeCollectionOption, callOptions ...grpc.CallOption) (collection *entity.Collection, err error) { +func (c *Client) DescribeCollection(ctx context.Context, option DescribeCollectionOption, callOptions ...grpc.CallOption) (collection *entity.Collection, err error) { req := option.Request() err = c.callService(func(milvusService milvuspb.MilvusServiceClient) error { resp, err := milvusService.DescribeCollection(ctx, req, callOptions...) diff --git a/client/collection_options.go b/client/collection_options.go index adb59e37b5145..696fe702273a2 100644 --- a/client/collection_options.go +++ b/client/collection_options.go @@ -159,6 +159,10 @@ func NewCreateCollectionOption(name string, collectionSchema *entity.Schema) *cr } } +type ListCollectionOption interface { + Request() *milvuspb.ShowCollectionsRequest +} + type listCollectionOption struct{} func (opt *listCollectionOption) Request() *milvuspb.ShowCollectionsRequest { diff --git a/client/entity/schema.go b/client/entity/schema.go index ce30b53f51483..8225ba6c2fd3c 100644 --- a/client/entity/schema.go +++ b/client/entity/schema.go @@ -19,6 +19,8 @@ package entity import ( "strconv" + "github.com/cockroachdb/errors" + "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" ) @@ -293,6 +295,18 @@ func (f *Field) WithDim(dim int64) *Field { return f } +func (f *Field) GetDim() (int64, error) { + dimStr, has := f.TypeParams[TypeParamDim] + if !has { + return -1, errors.New("field with no dim") + } + dim, err := strconv.ParseInt(dimStr, 10, 64) + if err != nil { + return -1, errors.Newf("field with bad format dim: %s", err.Error()) + } + return dim, nil +} + func (f *Field) WithMaxLength(maxLen int64) *Field { if f.TypeParams == nil { f.TypeParams = make(map[string]string) diff --git a/client/entity/sparse.go b/client/entity/sparse.go index 2bded8f6e8f2b..56ca5f4dca265 100644 --- a/client/entity/sparse.go +++ b/client/entity/sparse.go @@ -88,7 +88,7 @@ func deserializeSliceSparceEmbedding(bs []byte) (sliceSparseEmbedding, error) { return sliceSparseEmbedding{}, errors.New("not valid sparse embedding bytes") } - length = length / 8 + length /= 8 result := sliceSparseEmbedding{ positions: make([]uint32, length), diff --git a/client/row/data.go b/client/row/data.go new file mode 100644 index 0000000000000..292661ade29be --- /dev/null +++ b/client/row/data.go @@ -0,0 +1,332 @@ +// Licensed to the LF AI & Data foundation under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package row + +import ( + "encoding/json" + "fmt" + "reflect" + "strconv" + + "github.com/cockroachdb/errors" + + "github.com/milvus-io/milvus/client/v2/column" + "github.com/milvus-io/milvus/client/v2/entity" +) + +const ( + // MilvusTag struct tag const for milvus row based struct + MilvusTag = `milvus` + + // MilvusSkipTagValue struct tag const for skip this field. + MilvusSkipTagValue = `-` + + // MilvusTagSep struct tag const for attribute separator + MilvusTagSep = `;` + + // MilvusTagName struct tag const for field name + MilvusTagName = `NAME` + + // VectorDimTag struct tag const for vector dimension + VectorDimTag = `DIM` + + // VectorTypeTag struct tag const for binary vector type + VectorTypeTag = `VECTOR_TYPE` + + // MilvusPrimaryKey struct tag const for primary key indicator + MilvusPrimaryKey = `PRIMARY_KEY` + + // MilvusAutoID struct tag const for auto id indicator + MilvusAutoID = `AUTO_ID` + + // DimMax dimension max value + DimMax = 65535 +) + +func AnyToColumns(rows []interface{}, schemas ...*entity.Schema) ([]column.Column, error) { + rowsLen := len(rows) + if rowsLen == 0 { + return []column.Column{}, errors.New("0 length column") + } + + var sch *entity.Schema + var err error + // if schema not provided, try to parse from row + if len(schemas) == 0 { + sch, err = ParseSchema(rows[0]) + if err != nil { + return []column.Column{}, err + } + } else { + // use first schema provided + sch = schemas[0] + } + + isDynamic := sch.EnableDynamicField + var dynamicCol *column.ColumnJSONBytes + + nameColumns := make(map[string]column.Column) + for _, field := range sch.Fields { + // skip auto id pk field + if field.PrimaryKey && field.AutoID { + continue + } + switch field.DataType { + case entity.FieldTypeBool: + data := make([]bool, 0, rowsLen) + col := column.NewColumnBool(field.Name, data) + nameColumns[field.Name] = col + case entity.FieldTypeInt8: + data := make([]int8, 0, rowsLen) + col := column.NewColumnInt8(field.Name, data) + nameColumns[field.Name] = col + case entity.FieldTypeInt16: + data := make([]int16, 0, rowsLen) + col := column.NewColumnInt16(field.Name, data) + nameColumns[field.Name] = col + case entity.FieldTypeInt32: + data := make([]int32, 0, rowsLen) + col := column.NewColumnInt32(field.Name, data) + nameColumns[field.Name] = col + case entity.FieldTypeInt64: + data := make([]int64, 0, rowsLen) + col := column.NewColumnInt64(field.Name, data) + nameColumns[field.Name] = col + case entity.FieldTypeFloat: + data := make([]float32, 0, rowsLen) + col := column.NewColumnFloat(field.Name, data) + nameColumns[field.Name] = col + case entity.FieldTypeDouble: + data := make([]float64, 0, rowsLen) + col := column.NewColumnDouble(field.Name, data) + nameColumns[field.Name] = col + case entity.FieldTypeString, entity.FieldTypeVarChar: + data := make([]string, 0, rowsLen) + col := column.NewColumnString(field.Name, data) + nameColumns[field.Name] = col + case entity.FieldTypeJSON: + data := make([][]byte, 0, rowsLen) + col := column.NewColumnJSONBytes(field.Name, data) + nameColumns[field.Name] = col + case entity.FieldTypeArray: + col := NewArrayColumn(field) + if col == nil { + return nil, errors.Newf("unsupported element type %s for Array", field.ElementType.String()) + } + nameColumns[field.Name] = col + case entity.FieldTypeFloatVector: + data := make([][]float32, 0, rowsLen) + dimStr, has := field.TypeParams[entity.TypeParamDim] + if !has { + return []column.Column{}, errors.New("vector field with no dim") + } + dim, err := strconv.ParseInt(dimStr, 10, 64) + if err != nil { + return []column.Column{}, fmt.Errorf("vector field with bad format dim: %s", err.Error()) + } + col := column.NewColumnFloatVector(field.Name, int(dim), data) + nameColumns[field.Name] = col + case entity.FieldTypeBinaryVector: + data := make([][]byte, 0, rowsLen) + dim, err := field.GetDim() + if err != nil { + return []column.Column{}, err + } + col := column.NewColumnBinaryVector(field.Name, int(dim), data) + nameColumns[field.Name] = col + case entity.FieldTypeFloat16Vector: + data := make([][]byte, 0, rowsLen) + dim, err := field.GetDim() + if err != nil { + return []column.Column{}, err + } + col := column.NewColumnFloat16Vector(field.Name, int(dim), data) + nameColumns[field.Name] = col + case entity.FieldTypeBFloat16Vector: + data := make([][]byte, 0, rowsLen) + dim, err := field.GetDim() + if err != nil { + return []column.Column{}, err + } + col := column.NewColumnBFloat16Vector(field.Name, int(dim), data) + nameColumns[field.Name] = col + case entity.FieldTypeSparseVector: + data := make([]entity.SparseEmbedding, 0, rowsLen) + col := column.NewColumnSparseVectors(field.Name, data) + nameColumns[field.Name] = col + } + } + + if isDynamic { + dynamicCol = column.NewColumnJSONBytes("", make([][]byte, 0, rowsLen)).WithIsDynamic(true) + } + + for _, row := range rows { + // collection schema name need not to be same, since receiver could has other names + v := reflect.ValueOf(row) + set, err := reflectValueCandi(v) + if err != nil { + return nil, err + } + + for idx, field := range sch.Fields { + // skip dynamic field if visible + if isDynamic && field.IsDynamic { + continue + } + // skip auto id pk field + if field.PrimaryKey && field.AutoID { + // remove pk field from candidates set, avoid adding it into dynamic column + delete(set, field.Name) + continue + } + column, ok := nameColumns[field.Name] + if !ok { + return nil, fmt.Errorf("expected unhandled field %s", field.Name) + } + + candi, ok := set[field.Name] + if !ok { + return nil, fmt.Errorf("row %d does not has field %s", idx, field.Name) + } + err := column.AppendValue(candi.v.Interface()) + if err != nil { + return nil, err + } + delete(set, field.Name) + } + + if isDynamic { + m := make(map[string]interface{}) + for name, candi := range set { + m[name] = candi.v.Interface() + } + bs, err := json.Marshal(m) + if err != nil { + return nil, fmt.Errorf("failed to marshal dynamic field %w", err) + } + err = dynamicCol.AppendValue(bs) + if err != nil { + return nil, fmt.Errorf("failed to append value to dynamic field %w", err) + } + } + } + columns := make([]column.Column, 0, len(nameColumns)) + for _, column := range nameColumns { + columns = append(columns, column) + } + if isDynamic { + columns = append(columns, dynamicCol) + } + return columns, nil +} + +func NewArrayColumn(f *entity.Field) column.Column { + switch f.ElementType { + case entity.FieldTypeBool: + return column.NewColumnBoolArray(f.Name, nil) + + case entity.FieldTypeInt8: + return column.NewColumnInt8Array(f.Name, nil) + + case entity.FieldTypeInt16: + return column.NewColumnInt16Array(f.Name, nil) + + case entity.FieldTypeInt32: + return column.NewColumnInt32Array(f.Name, nil) + + case entity.FieldTypeInt64: + return column.NewColumnInt64Array(f.Name, nil) + + case entity.FieldTypeFloat: + return column.NewColumnFloatArray(f.Name, nil) + + case entity.FieldTypeDouble: + return column.NewColumnDoubleArray(f.Name, nil) + + case entity.FieldTypeVarChar: + return column.NewColumnVarCharArray(f.Name, nil) + + default: + return nil + } +} + +type fieldCandi struct { + name string + v reflect.Value + options map[string]string +} + +func reflectValueCandi(v reflect.Value) (map[string]fieldCandi, error) { + if v.Kind() == reflect.Ptr { + v = v.Elem() + } + + result := make(map[string]fieldCandi) + switch v.Kind() { + case reflect.Map: // map[string]any + iter := v.MapRange() + for iter.Next() { + key := iter.Key().String() + result[key] = fieldCandi{ + name: key, + v: iter.Value(), + } + } + return result, nil + case reflect.Struct: + for i := 0; i < v.NumField(); i++ { + ft := v.Type().Field(i) + name := ft.Name + tag, ok := ft.Tag.Lookup(MilvusTag) + + settings := make(map[string]string) + if ok { + if tag == MilvusSkipTagValue { + continue + } + settings = ParseTagSetting(tag, MilvusTagSep) + fn, has := settings[MilvusTagName] + if has { + // overwrite column to tag name + name = fn + } + } + _, ok = result[name] + // duplicated + if ok { + return nil, fmt.Errorf("column has duplicated name: %s when parsing field: %s", name, ft.Name) + } + + v := v.Field(i) + if v.Kind() == reflect.Array { + v = v.Slice(0, v.Len()) + } + + result[name] = fieldCandi{ + name: name, + v: v, + options: settings, + } + } + + return result, nil + default: + return nil, fmt.Errorf("unsupport row type: %s", v.Kind().String()) + } +} diff --git a/client/row/data_test.go b/client/row/data_test.go new file mode 100644 index 0000000000000..9e8b7fb216fbc --- /dev/null +++ b/client/row/data_test.go @@ -0,0 +1,174 @@ +package row + +import ( + "reflect" + "testing" + + "github.com/stretchr/testify/suite" + + "github.com/milvus-io/milvus/client/v2/entity" +) + +type ValidStruct struct { + ID int64 `milvus:"primary_key"` + Attr1 int8 + Attr2 int16 + Attr3 int32 + Attr4 float32 + Attr5 float64 + Attr6 string + Attr7 bool + Vector []float32 `milvus:"dim:16"` + Vector2 []byte `milvus:"dim:32"` +} + +type ValidStruct2 struct { + ID int64 `milvus:"primary_key"` + Vector [16]float32 + Vector2 [4]byte + Ignored bool `milvus:"-"` +} + +type ValidStructWithNamedTag struct { + ID int64 `milvus:"primary_key;name:id"` + Vector [16]float32 `milvus:"name:vector"` +} + +type RowsSuite struct { + suite.Suite +} + +func (s *RowsSuite) TestRowsToColumns() { + s.Run("valid_cases", func() { + columns, err := AnyToColumns([]any{&ValidStruct{}}) + s.Nil(err) + s.Equal(10, len(columns)) + + columns, err = AnyToColumns([]any{&ValidStruct2{}}) + s.Nil(err) + s.Equal(3, len(columns)) + }) + + s.Run("auto_id_pk", func() { + type AutoPK struct { + ID int64 `milvus:"primary_key;auto_id"` + Vector []float32 `milvus:"dim:32"` + } + columns, err := AnyToColumns([]any{&AutoPK{}}) + s.Nil(err) + s.Require().Equal(1, len(columns)) + s.Equal("Vector", columns[0].Name()) + }) + + s.Run("fp16", func() { + type BF16Struct struct { + ID int64 `milvus:"primary_key;auto_id"` + Vector []byte `milvus:"dim:16;vector_type:bf16"` + } + columns, err := AnyToColumns([]any{&BF16Struct{}}) + s.Nil(err) + s.Require().Equal(1, len(columns)) + s.Equal("Vector", columns[0].Name()) + s.Equal(entity.FieldTypeBFloat16Vector, columns[0].Type()) + }) + + s.Run("fp16", func() { + type FP16Struct struct { + ID int64 `milvus:"primary_key;auto_id"` + Vector []byte `milvus:"dim:16;vector_type:fp16"` + } + columns, err := AnyToColumns([]any{&FP16Struct{}}) + s.Nil(err) + s.Require().Equal(1, len(columns)) + s.Equal("Vector", columns[0].Name()) + s.Equal(entity.FieldTypeFloat16Vector, columns[0].Type()) + }) + + s.Run("invalid_cases", func() { + // empty input + _, err := AnyToColumns([]any{}) + s.NotNil(err) + + // incompatible rows + _, err = AnyToColumns([]any{&ValidStruct{}, &ValidStruct2{}}) + s.NotNil(err) + + // schema & row not compatible + _, err = AnyToColumns([]any{&ValidStruct{}}, &entity.Schema{ + Fields: []*entity.Field{ + { + Name: "int64", + DataType: entity.FieldTypeInt64, + }, + }, + }) + s.NotNil(err) + }) +} + +func (s *RowsSuite) TestDynamicSchema() { + s.Run("all_fallback_dynamic", func() { + columns, err := AnyToColumns([]any{&ValidStruct{}}, + entity.NewSchema().WithDynamicFieldEnabled(true), + ) + s.NoError(err) + s.Equal(1, len(columns)) + }) + + s.Run("dynamic_not_found", func() { + _, err := AnyToColumns([]any{&ValidStruct{}}, + entity.NewSchema().WithField( + entity.NewField().WithName("ID").WithDataType(entity.FieldTypeInt64).WithIsPrimaryKey(true), + ).WithDynamicFieldEnabled(true), + ) + s.NoError(err) + }) +} + +func (s *RowsSuite) TestReflectValueCandi() { + cases := []struct { + tag string + v reflect.Value + expect map[string]fieldCandi + expectErr bool + }{ + { + tag: "MapRow", + v: reflect.ValueOf(map[string]interface{}{ + "A": "abd", "B": int64(8), + }), + expect: map[string]fieldCandi{ + "A": { + name: "A", + v: reflect.ValueOf("abd"), + }, + "B": { + name: "B", + v: reflect.ValueOf(int64(8)), + }, + }, + expectErr: false, + }, + } + + for _, c := range cases { + s.Run(c.tag, func() { + r, err := reflectValueCandi(c.v) + if c.expectErr { + s.Error(err) + return + } + s.NoError(err) + s.Equal(len(c.expect), len(r)) + for k, v := range c.expect { + rv, has := r[k] + s.Require().True(has) + s.Equal(v.name, rv.name) + } + }) + } +} + +func TestRows(t *testing.T) { + suite.Run(t, new(RowsSuite)) +} diff --git a/client/row/schema.go b/client/row/schema.go new file mode 100644 index 0000000000000..6022275653f17 --- /dev/null +++ b/client/row/schema.go @@ -0,0 +1,185 @@ +// Licensed to the LF AI & Data foundation under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package row + +import ( + "fmt" + "go/ast" + "reflect" + "strconv" + "strings" + + "github.com/cockroachdb/errors" + + "github.com/milvus-io/milvus/client/v2/entity" +) + +// ParseSchema parses schema from interface{}. +func ParseSchema(r interface{}) (*entity.Schema, error) { + sch := &entity.Schema{} + t := reflect.TypeOf(r) + if t.Kind() == reflect.Array || t.Kind() == reflect.Slice || t.Kind() == reflect.Ptr { + t = t.Elem() + } + + // MapRow is not supported for schema definition + // TODO add PrimaryKey() interface later + if t.Kind() == reflect.Map { + return nil, fmt.Errorf("map row is not supported for schema definition") + } + + if t.Kind() != reflect.Struct { + return nil, fmt.Errorf("unsupported data type: %+v", r) + } + + // Collection method not overwrited, try use Row type name + if sch.CollectionName == "" { + sch.CollectionName = t.Name() + if sch.CollectionName == "" { + return nil, errors.New("collection name not provided") + } + } + sch.Fields = make([]*entity.Field, 0, t.NumField()) + for i := 0; i < t.NumField(); i++ { + f := t.Field(i) + // ignore anonymous field for now + if f.Anonymous || !ast.IsExported(f.Name) { + continue + } + + field := &entity.Field{ + Name: f.Name, + } + ft := f.Type + if f.Type.Kind() == reflect.Ptr { + ft = ft.Elem() + } + fv := reflect.New(ft) + tag := f.Tag.Get(MilvusTag) + if tag == MilvusSkipTagValue { + continue + } + tagSettings := ParseTagSetting(tag, MilvusTagSep) + if _, has := tagSettings[MilvusPrimaryKey]; has { + field.PrimaryKey = true + } + if _, has := tagSettings[MilvusAutoID]; has { + field.AutoID = true + } + if name, has := tagSettings[MilvusTagName]; has { + field.Name = name + } + switch reflect.Indirect(fv).Kind() { + case reflect.Bool: + field.DataType = entity.FieldTypeBool + case reflect.Int8: + field.DataType = entity.FieldTypeInt8 + case reflect.Int16: + field.DataType = entity.FieldTypeInt16 + case reflect.Int32: + field.DataType = entity.FieldTypeInt32 + case reflect.Int64: + field.DataType = entity.FieldTypeInt64 + case reflect.Float32: + field.DataType = entity.FieldTypeFloat + case reflect.Float64: + field.DataType = entity.FieldTypeDouble + case reflect.String: + field.DataType = entity.FieldTypeString + case reflect.Array: + arrayLen := ft.Len() + elemType := ft.Elem() + switch elemType.Kind() { + case reflect.Uint8: + field.WithDataType(entity.FieldTypeBinaryVector) + field.WithDim(int64(arrayLen) * 8) + case reflect.Float32: + field.WithDataType(entity.FieldTypeFloatVector) + field.WithDim(int64(arrayLen)) + default: + return nil, fmt.Errorf("field %s is array of %v, which is not supported", f.Name, elemType) + } + case reflect.Slice: + dimStr, has := tagSettings[VectorDimTag] + if !has { + return nil, fmt.Errorf("field %s is slice but dim not provided", f.Name) + } + dim, err := strconv.ParseInt(dimStr, 10, 64) + if err != nil { + return nil, fmt.Errorf("dim value %s is not valid", dimStr) + } + if dim < 1 || dim > DimMax { + return nil, fmt.Errorf("dim value %d is out of range", dim) + } + field.WithDim(dim) + + elemType := ft.Elem() + switch elemType.Kind() { + case reflect.Uint8: // []byte, could be BinaryVector, fp16, bf 6 + switch tagSettings[VectorTypeTag] { + case "fp16": + field.DataType = entity.FieldTypeFloat16Vector + case "bf16": + field.DataType = entity.FieldTypeBFloat16Vector + default: + field.DataType = entity.FieldTypeBinaryVector + } + case reflect.Float32: + field.DataType = entity.FieldTypeFloatVector + default: + return nil, fmt.Errorf("field %s is slice of %v, which is not supported", f.Name, elemType) + } + default: + return nil, fmt.Errorf("field %s is %v, which is not supported", field.Name, ft) + } + sch.Fields = append(sch.Fields, field) + } + + return sch, nil +} + +// ParseTagSetting parses struct tag into map settings +func ParseTagSetting(str string, sep string) map[string]string { + settings := map[string]string{} + names := strings.Split(str, sep) + + for i := 0; i < len(names); i++ { + j := i + if len(names[j]) > 0 { + for { + if names[j][len(names[j])-1] == '\\' { + i++ + names[j] = names[j][0:len(names[j])-1] + sep + names[i] + names[i] = "" + } else { + break + } + } + } + + values := strings.Split(names[j], ":") + k := strings.TrimSpace(strings.ToUpper(values[0])) + + if len(values) >= 2 { + settings[k] = strings.Join(values[1:], ":") + } else if k != "" { + settings[k] = k + } + } + + return settings +} diff --git a/client/row/schema_test.go b/client/row/schema_test.go new file mode 100644 index 0000000000000..fbfdc19f27058 --- /dev/null +++ b/client/row/schema_test.go @@ -0,0 +1,213 @@ +// Licensed to the LF AI & Data foundation under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package row + +import ( + "testing" + + "github.com/stretchr/testify/assert" + + "github.com/milvus-io/milvus/client/v2/entity" +) + +// ArrayRow test case type +type ArrayRow [16]float32 + +func (ar *ArrayRow) Collection() string { return "" } +func (ar *ArrayRow) Partition() string { return "" } +func (ar *ArrayRow) Description() string { return "" } + +type Uint8Struct struct { + Attr uint8 +} + +type StringArrayStruct struct { + Vector [8]string +} + +type StringSliceStruct struct { + Vector []string `milvus:"dim:8"` +} + +type SliceNoDimStruct struct { + Vector []float32 `milvus:""` +} + +type SliceBadDimStruct struct { + Vector []float32 `milvus:"dim:str"` +} + +type SliceBadDimStruct2 struct { + Vector []float32 `milvus:"dim:0"` +} + +func TestParseSchema(t *testing.T) { + t.Run("invalid cases", func(t *testing.T) { + // anonymous struct with default collection name ("") will cause error + anonymusStruct := struct{}{} + sch, err := ParseSchema(anonymusStruct) + assert.Nil(t, sch) + assert.NotNil(t, err) + + // non struct + arrayRow := ArrayRow([16]float32{}) + sch, err = ParseSchema(&arrayRow) + assert.Nil(t, sch) + assert.NotNil(t, err) + + // uint8 not supported + sch, err = ParseSchema(&Uint8Struct{}) + assert.Nil(t, sch) + assert.NotNil(t, err) + + // string array not supported + sch, err = ParseSchema(&StringArrayStruct{}) + assert.Nil(t, sch) + assert.NotNil(t, err) + + // string slice not supported + sch, err = ParseSchema(&StringSliceStruct{}) + assert.Nil(t, sch) + assert.NotNil(t, err) + + // slice vector with no dim + sch, err = ParseSchema(&SliceNoDimStruct{}) + assert.Nil(t, sch) + assert.NotNil(t, err) + + // slice vector with bad format dim + sch, err = ParseSchema(&SliceBadDimStruct{}) + assert.Nil(t, sch) + assert.NotNil(t, err) + + // slice vector with bad format dim 2 + sch, err = ParseSchema(&SliceBadDimStruct2{}) + assert.Nil(t, sch) + assert.NotNil(t, err) + }) + + t.Run("valid cases", func(t *testing.T) { + getVectorField := func(schema *entity.Schema) *entity.Field { + for _, field := range schema.Fields { + if field.DataType == entity.FieldTypeFloatVector || + field.DataType == entity.FieldTypeBinaryVector || + field.DataType == entity.FieldTypeBFloat16Vector || + field.DataType == entity.FieldTypeFloat16Vector { + return field + } + } + return nil + } + + type ValidStruct struct { + ID int64 `milvus:"primary_key"` + Attr1 int8 + Attr2 int16 + Attr3 int32 + Attr4 float32 + Attr5 float64 + Attr6 string + Vector []float32 `milvus:"dim:128"` + } + vs := &ValidStruct{} + sch, err := ParseSchema(vs) + assert.Nil(t, err) + assert.NotNil(t, sch) + assert.Equal(t, "ValidStruct", sch.CollectionName) + + type ValidFp16Struct struct { + ID int64 `milvus:"primary_key"` + Attr1 int8 + Attr2 int16 + Attr3 int32 + Attr4 float32 + Attr5 float64 + Attr6 string + Vector []byte `milvus:"dim:128;vector_type:fp16"` + } + fp16Vs := &ValidFp16Struct{} + sch, err = ParseSchema(fp16Vs) + assert.Nil(t, err) + assert.NotNil(t, sch) + assert.Equal(t, "ValidFp16Struct", sch.CollectionName) + vectorField := getVectorField(sch) + assert.Equal(t, entity.FieldTypeFloat16Vector, vectorField.DataType) + + type ValidBf16Struct struct { + ID int64 `milvus:"primary_key"` + Attr1 int8 + Attr2 int16 + Attr3 int32 + Attr4 float32 + Attr5 float64 + Attr6 string + Vector []byte `milvus:"dim:128;vector_type:bf16"` + } + bf16Vs := &ValidBf16Struct{} + sch, err = ParseSchema(bf16Vs) + assert.Nil(t, err) + assert.NotNil(t, sch) + assert.Equal(t, "ValidBf16Struct", sch.CollectionName) + vectorField = getVectorField(sch) + assert.Equal(t, entity.FieldTypeBFloat16Vector, vectorField.DataType) + + type ValidByteStruct struct { + ID int64 `milvus:"primary_key"` + Vector []byte `milvus:"dim:128"` + } + vs2 := &ValidByteStruct{} + sch, err = ParseSchema(vs2) + assert.Nil(t, err) + assert.NotNil(t, sch) + + type ValidArrayStruct struct { + ID int64 `milvus:"primary_key"` + Vector [64]float32 + } + vs3 := &ValidArrayStruct{} + sch, err = ParseSchema(vs3) + assert.Nil(t, err) + assert.NotNil(t, sch) + + type ValidArrayStructByte struct { + ID int64 `milvus:"primary_key;auto_id"` + Data *string `milvus:"extra:test\\;false"` + Vector [64]byte + } + vs4 := &ValidArrayStructByte{} + sch, err = ParseSchema(vs4) + assert.Nil(t, err) + assert.NotNil(t, sch) + + vs5 := &ValidStructWithNamedTag{} + sch, err = ParseSchema(vs5) + assert.Nil(t, err) + assert.NotNil(t, sch) + i64f, vecf := false, false + for _, field := range sch.Fields { + if field.Name == "id" { + i64f = true + } + if field.Name == "vector" { + vecf = true + } + } + + assert.True(t, i64f) + assert.True(t, vecf) + }) +} diff --git a/client/write_options.go b/client/write_options.go index 54139ef0b21fa..612cc7fe2d995 100644 --- a/client/write_options.go +++ b/client/write_options.go @@ -28,6 +28,7 @@ import ( "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" "github.com/milvus-io/milvus/client/v2/column" "github.com/milvus-io/milvus/client/v2/entity" + "github.com/milvus-io/milvus/client/v2/row" ) type InsertOption interface { @@ -71,10 +72,8 @@ func (opt *columnBasedDataOption) processInsertColumns(colSchema *entity.Schema, l := col.Len() if rowSize == 0 { rowSize = l - } else { - if rowSize != l { - return nil, 0, errors.New("column size not match") - } + } else if rowSize != l { + return nil, 0, errors.New("column size not match") } field, has := mNameField[col.Name()] if !has { @@ -247,6 +246,56 @@ func NewColumnBasedInsertOption(collName string, columns ...column.Column) *colu } } +type rowBasedDataOption struct { + *columnBasedDataOption + rows []any +} + +func NewRowBasedInsertOption(collName string, rows ...any) *rowBasedDataOption { + return &rowBasedDataOption{ + columnBasedDataOption: &columnBasedDataOption{ + collName: collName, + }, + rows: rows, + } +} + +func (opt *rowBasedDataOption) InsertRequest(coll *entity.Collection) (*milvuspb.InsertRequest, error) { + columns, err := row.AnyToColumns(opt.rows, coll.Schema) + if err != nil { + return nil, err + } + opt.columnBasedDataOption.columns = columns + fieldsData, rowNum, err := opt.processInsertColumns(coll.Schema, opt.columns...) + if err != nil { + return nil, err + } + return &milvuspb.InsertRequest{ + CollectionName: opt.collName, + PartitionName: opt.partitionName, + FieldsData: fieldsData, + NumRows: uint32(rowNum), + }, nil +} + +func (opt *rowBasedDataOption) UpsertRequest(coll *entity.Collection) (*milvuspb.UpsertRequest, error) { + columns, err := row.AnyToColumns(opt.rows, coll.Schema) + if err != nil { + return nil, err + } + opt.columnBasedDataOption.columns = columns + fieldsData, rowNum, err := opt.processInsertColumns(coll.Schema, opt.columns...) + if err != nil { + return nil, err + } + return &milvuspb.UpsertRequest{ + CollectionName: opt.collName, + PartitionName: opt.partitionName, + FieldsData: fieldsData, + NumRows: uint32(rowNum), + }, nil +} + type DeleteOption interface { Request() *milvuspb.DeleteRequest } From e1bafd7105340da22af22e17c1b34bfb6fb31048 Mon Sep 17 00:00:00 2001 From: congqixia Date: Wed, 22 May 2024 21:11:40 +0800 Subject: [PATCH 037/126] enhance: Use pre-built logger for write buffer frequent ops (#33273) See also #33266 Each `WriteBuffer` shall have same channel/collection id attribute, so use same logger will do and reduce logger allocation & frequent name composition Signed-off-by: Congqi Xia --- .../datanode/writebuffer/l0_write_buffer.go | 3 +- internal/datanode/writebuffer/write_buffer.go | 28 +++++++++++-------- 2 files changed, 18 insertions(+), 13 deletions(-) diff --git a/internal/datanode/writebuffer/l0_write_buffer.go b/internal/datanode/writebuffer/l0_write_buffer.go index ebb1f9184aaec..c4ed68fb1147a 100644 --- a/internal/datanode/writebuffer/l0_write_buffer.go +++ b/internal/datanode/writebuffer/l0_write_buffer.go @@ -14,7 +14,6 @@ import ( "github.com/milvus-io/milvus/internal/proto/datapb" "github.com/milvus-io/milvus/internal/storage" "github.com/milvus-io/milvus/pkg/common" - "github.com/milvus-io/milvus/pkg/log" "github.com/milvus-io/milvus/pkg/mq/msgstream" "github.com/milvus-io/milvus/pkg/util/merr" "github.com/milvus-io/milvus/pkg/util/retry" @@ -143,6 +142,7 @@ func (wb *l0WriteBuffer) BufferData(insertMsgs []*msgstream.InsertMsg, deleteMsg } func (wb *l0WriteBuffer) getL0SegmentID(partitionID int64, startPos *msgpb.MsgPosition) int64 { + log := wb.logger segmentID, ok := wb.l0Segments[partitionID] if !ok { err := retry.Do(context.Background(), func() error { @@ -168,7 +168,6 @@ func (wb *l0WriteBuffer) getL0SegmentID(partitionID int64, startPos *msgpb.MsgPo log.Info("Add a new level zero segment", zap.Int64("segmentID", segmentID), zap.String("level", datapb.SegmentLevel_L0.String()), - zap.String("channel", wb.channelName), zap.Any("start position", startPos), ) } diff --git a/internal/datanode/writebuffer/write_buffer.go b/internal/datanode/writebuffer/write_buffer.go index 6c64060ccac8e..8456fb7ac2957 100644 --- a/internal/datanode/writebuffer/write_buffer.go +++ b/internal/datanode/writebuffer/write_buffer.go @@ -96,6 +96,10 @@ type writeBufferBase struct { flushTimestamp *atomic.Uint64 storagev2Cache *metacache.StorageV2Cache + + // pre build logger + logger *log.MLogger + cpRatedLogger *log.MLogger } func newWriteBufferBase(channel string, metacache metacache.MetaCache, storageV2Cache *metacache.StorageV2Cache, syncMgr syncmgr.SyncManager, option *writeBufferOption) (*writeBufferBase, error) { @@ -127,7 +131,7 @@ func newWriteBufferBase(channel string, metacache metacache.MetaCache, storageV2 return nil, err } - return &writeBufferBase{ + wb := &writeBufferBase{ channelName: channel, collectionID: metacache.Collection(), collSchema: schema, @@ -140,7 +144,13 @@ func newWriteBufferBase(channel string, metacache metacache.MetaCache, storageV2 syncPolicies: option.syncPolicies, flushTimestamp: flushTs, storagev2Cache: storageV2Cache, - }, nil + } + + wb.logger = log.With(zap.Int64("collectionID", wb.collectionID), + zap.String("channel", wb.channelName)) + wb.cpRatedLogger = wb.logger.WithRateGroup(fmt.Sprintf("writebuffer_cp_%s", wb.channelName), 1, 60) + + return wb, nil } func (wb *writeBufferBase) HasSegment(segmentID int64) bool { @@ -178,13 +188,10 @@ func (wb *writeBufferBase) MemorySize() int64 { } func (wb *writeBufferBase) EvictBuffer(policies ...SyncPolicy) { + log := wb.logger wb.mut.Lock() defer wb.mut.Unlock() - log := log.Ctx(context.Background()).With( - zap.Int64("collectionID", wb.collectionID), - zap.String("channel", wb.channelName), - ) // need valid checkpoint before triggering syncing if wb.checkpoint == nil { log.Warn("evict buffer before buffering data") @@ -201,9 +208,7 @@ func (wb *writeBufferBase) EvictBuffer(policies ...SyncPolicy) { } func (wb *writeBufferBase) GetCheckpoint() *msgpb.MsgPosition { - log := log.Ctx(context.Background()). - With(zap.String("channel", wb.channelName)). - WithRateGroup(fmt.Sprintf("writebuffer_cp_%s", wb.channelName), 1, 60) + log := wb.cpRatedLogger wb.mut.RLock() defer wb.mut.RUnlock() @@ -556,6 +561,7 @@ func (wb *writeBufferBase) getEstBatchSize() uint { } func (wb *writeBufferBase) Close(drop bool) { + log := wb.logger // sink all data and call Drop for meta writer wb.mut.Lock() defer wb.mut.Unlock() @@ -583,13 +589,13 @@ func (wb *writeBufferBase) Close(drop bool) { err := conc.AwaitAll(futures...) if err != nil { - log.Error("failed to sink write buffer data", zap.String("channel", wb.channelName), zap.Error(err)) + log.Error("failed to sink write buffer data", zap.Error(err)) // TODO change to remove channel in the future panic(err) } err = wb.metaWriter.DropChannel(wb.channelName) if err != nil { - log.Error("failed to drop channel", zap.String("channel", wb.channelName), zap.Error(err)) + log.Error("failed to drop channel", zap.Error(err)) // TODO change to remove channel in the future panic(err) } From b9d7145049a77ffddb1e7fef834563e851118477 Mon Sep 17 00:00:00 2001 From: PowderLi <135960789+PowderLi@users.noreply.github.com> Date: Thu, 23 May 2024 09:51:45 +0800 Subject: [PATCH 038/126] fix: [restful v2]role operations need dbName (#33283) issue: #33220 use dbName as part of privilege entity, so 1. grant / revoke a privilege need dbName 2. we can describe the privileges of the role which belong to one special database Signed-off-by: PowderLi --- internal/distributed/proxy/httpserver/handler_v2.go | 2 +- internal/distributed/proxy/httpserver/request_v2.go | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/internal/distributed/proxy/httpserver/handler_v2.go b/internal/distributed/proxy/httpserver/handler_v2.go index 2badb5957d89d..ba97bc5fa41f7 100644 --- a/internal/distributed/proxy/httpserver/handler_v2.go +++ b/internal/distributed/proxy/httpserver/handler_v2.go @@ -1514,7 +1514,7 @@ func (h *HandlersV2) listRoles(ctx context.Context, c *gin.Context, anyReq any, func (h *HandlersV2) describeRole(ctx context.Context, c *gin.Context, anyReq any, dbName string) (interface{}, error) { getter, _ := anyReq.(RoleNameGetter) req := &milvuspb.SelectGrantRequest{ - Entity: &milvuspb.GrantEntity{Role: &milvuspb.RoleEntity{Name: getter.GetRoleName()}}, + Entity: &milvuspb.GrantEntity{Role: &milvuspb.RoleEntity{Name: getter.GetRoleName()}, DbName: dbName}, } resp, err := wrapperProxy(ctx, c, req, h.checkAuth, false, func(reqCtx context.Context, req any) (interface{}, error) { return h.proxy.SelectGrant(reqCtx, req.(*milvuspb.SelectGrantRequest)) diff --git a/internal/distributed/proxy/httpserver/request_v2.go b/internal/distributed/proxy/httpserver/request_v2.go index aa32cfcb6c29e..01ecf511600f0 100644 --- a/internal/distributed/proxy/httpserver/request_v2.go +++ b/internal/distributed/proxy/httpserver/request_v2.go @@ -248,9 +248,12 @@ type UserRoleReq struct { } type RoleReq struct { + DbName string `json:"dbName"` RoleName string `json:"roleName" binding:"required"` } +func (req *RoleReq) GetDbName() string { return req.DbName } + func (req *RoleReq) GetRoleName() string { return req.RoleName } @@ -263,6 +266,8 @@ type GrantReq struct { DbName string `json:"dbName"` } +func (req *GrantReq) GetDbName() string { return req.DbName } + type IndexParam struct { FieldName string `json:"fieldName" binding:"required"` IndexName string `json:"indexName" binding:"required"` From 22bddde5ffd396529482d4646be1931220f0ca60 Mon Sep 17 00:00:00 2001 From: XuanYang-cn Date: Thu, 23 May 2024 09:53:40 +0800 Subject: [PATCH 039/126] enhance: Tidy compactor and remove dup codes (#32198) See also: #32451 Signed-off-by: yangxuan --- Makefile | 3 +- internal/datacoord/mock_cluster.go | 48 +- internal/datacoord/mock_session_manager.go | 62 +- internal/datanode/binlog_io.go | 249 ---- internal/datanode/binlog_io_test.go | 404 ------ internal/datanode/compaction/compactor.go | 32 + internal/datanode/compaction/mix_compactor.go | 568 ++++++++ .../datanode/compaction/mix_compactor_test.go | 803 +++++++++++ .../datanode/compaction/mock_compactor.go | 307 ++++ .../datanode/compaction/segment_writer.go | 165 +++ internal/datanode/compaction_executor.go | 55 +- internal/datanode/compaction_executor_test.go | 137 +- internal/datanode/compactor.go | 827 ----------- internal/datanode/compactor_test.go | 1246 ----------------- internal/datanode/io/binlog_io.go | 1 - internal/datanode/l0_compactor.go | 25 +- internal/datanode/l0_compactor_test.go | 8 +- internal/datanode/mock_test.go | 51 - internal/datanode/services.go | 8 +- internal/datanode/services_test.go | 20 +- .../datanode/writebuffer/insert_buffer.go | 11 +- .../datanode/writebuffer/segment_buffer.go | 15 + internal/metastore/kv/binlog/binlog.go | 4 +- internal/mocks/mock_datanode.go | 86 +- internal/mocks/mock_datanode_client.go | 114 +- 25 files changed, 2165 insertions(+), 3084 deletions(-) delete mode 100644 internal/datanode/binlog_io.go delete mode 100644 internal/datanode/binlog_io_test.go create mode 100644 internal/datanode/compaction/compactor.go create mode 100644 internal/datanode/compaction/mix_compactor.go create mode 100644 internal/datanode/compaction/mix_compactor_test.go create mode 100644 internal/datanode/compaction/mock_compactor.go create mode 100644 internal/datanode/compaction/segment_writer.go delete mode 100644 internal/datanode/compactor.go delete mode 100644 internal/datanode/compactor_test.go diff --git a/Makefile b/Makefile index 9abc97fa6c4b1..9ccfe22604b0e 100644 --- a/Makefile +++ b/Makefile @@ -480,6 +480,7 @@ generate-mockery-datanode: getdeps $(INSTALL_PATH)/mockery --name=BinlogIO --dir=$(PWD)/internal/datanode/io --output=$(PWD)/internal/datanode/io --filename=mock_binlogio.go --with-expecter --structname=MockBinlogIO --outpkg=io --inpackage $(INSTALL_PATH)/mockery --name=FlowgraphManager --dir=$(PWD)/internal/datanode --output=$(PWD)/internal/datanode --filename=mock_fgmanager.go --with-expecter --structname=MockFlowgraphManager --outpkg=datanode --inpackage $(INSTALL_PATH)/mockery --name=ChannelManager --dir=$(PWD)/internal/datanode --output=$(PWD)/internal/datanode --filename=mock_channelmanager.go --with-expecter --structname=MockChannelManager --outpkg=datanode --inpackage + $(INSTALL_PATH)/mockery --name=Compactor --dir=$(PWD)/internal/datanode/compaction --output=$(PWD)/internal/datanode/compaction --filename=mock_compactor.go --with-expecter --structname=MockCompactor --outpkg=compaction --inpackage generate-mockery-metastore: getdeps $(INSTALL_PATH)/mockery --name=RootCoordCatalog --dir=$(PWD)/internal/metastore --output=$(PWD)/internal/metastore/mocks --filename=mock_rootcoord_catalog.go --with-expecter --structname=RootCoordCatalog --outpkg=mocks @@ -522,4 +523,4 @@ mmap-migration: @source $(PWD)/scripts/setenv.sh && \ mkdir -p $(INSTALL_PATH) && go env -w CGO_ENABLED="1" && \ GO111MODULE=on $(GO) build -pgo=$(PGO_PATH)/default.pgo -ldflags="-r $${RPATH} -X '$(OBJPREFIX).BuildTags=$(BUILD_TAGS)' -X '$(OBJPREFIX).BuildTime=$(BUILD_TIME)' -X '$(OBJPREFIX).GitCommit=$(GIT_COMMIT)' -X '$(OBJPREFIX).GoVersion=$(GO_VERSION)'" \ - -tags dynamic -o $(INSTALL_PATH)/mmap-migration $(MMAP_MIGRATION_PATH)/main.go 1>/dev/null \ No newline at end of file + -tags dynamic -o $(INSTALL_PATH)/mmap-migration $(MMAP_MIGRATION_PATH)/main.go 1>/dev/null diff --git a/internal/datacoord/mock_cluster.go b/internal/datacoord/mock_cluster.go index e35f1e1fee0ab..e92ae8ecb3c28 100644 --- a/internal/datacoord/mock_cluster.go +++ b/internal/datacoord/mock_cluster.go @@ -74,8 +74,8 @@ type MockCluster_DropImport_Call struct { } // DropImport is a helper method to define mock.On call -// - nodeID int64 -// - in *datapb.DropImportRequest +// - nodeID int64 +// - in *datapb.DropImportRequest func (_e *MockCluster_Expecter) DropImport(nodeID interface{}, in interface{}) *MockCluster_DropImport_Call { return &MockCluster_DropImport_Call{Call: _e.mock.On("DropImport", nodeID, in)} } @@ -117,10 +117,10 @@ type MockCluster_Flush_Call struct { } // Flush is a helper method to define mock.On call -// - ctx context.Context -// - nodeID int64 -// - channel string -// - segments []*datapb.SegmentInfo +// - ctx context.Context +// - nodeID int64 +// - channel string +// - segments []*datapb.SegmentInfo func (_e *MockCluster_Expecter) Flush(ctx interface{}, nodeID interface{}, channel interface{}, segments interface{}) *MockCluster_Flush_Call { return &MockCluster_Flush_Call{Call: _e.mock.On("Flush", ctx, nodeID, channel, segments)} } @@ -162,10 +162,10 @@ type MockCluster_FlushChannels_Call struct { } // FlushChannels is a helper method to define mock.On call -// - ctx context.Context -// - nodeID int64 -// - flushTs uint64 -// - channels []string +// - ctx context.Context +// - nodeID int64 +// - flushTs uint64 +// - channels []string func (_e *MockCluster_Expecter) FlushChannels(ctx interface{}, nodeID interface{}, flushTs interface{}, channels interface{}) *MockCluster_FlushChannels_Call { return &MockCluster_FlushChannels_Call{Call: _e.mock.On("FlushChannels", ctx, nodeID, flushTs, channels)} } @@ -250,8 +250,8 @@ type MockCluster_ImportV2_Call struct { } // ImportV2 is a helper method to define mock.On call -// - nodeID int64 -// - in *datapb.ImportRequest +// - nodeID int64 +// - in *datapb.ImportRequest func (_e *MockCluster_Expecter) ImportV2(nodeID interface{}, in interface{}) *MockCluster_ImportV2_Call { return &MockCluster_ImportV2_Call{Call: _e.mock.On("ImportV2", nodeID, in)} } @@ -293,8 +293,8 @@ type MockCluster_PreImport_Call struct { } // PreImport is a helper method to define mock.On call -// - nodeID int64 -// - in *datapb.PreImportRequest +// - nodeID int64 +// - in *datapb.PreImportRequest func (_e *MockCluster_Expecter) PreImport(nodeID interface{}, in interface{}) *MockCluster_PreImport_Call { return &MockCluster_PreImport_Call{Call: _e.mock.On("PreImport", nodeID, in)} } @@ -348,8 +348,8 @@ type MockCluster_QueryImport_Call struct { } // QueryImport is a helper method to define mock.On call -// - nodeID int64 -// - in *datapb.QueryImportRequest +// - nodeID int64 +// - in *datapb.QueryImportRequest func (_e *MockCluster_Expecter) QueryImport(nodeID interface{}, in interface{}) *MockCluster_QueryImport_Call { return &MockCluster_QueryImport_Call{Call: _e.mock.On("QueryImport", nodeID, in)} } @@ -403,8 +403,8 @@ type MockCluster_QueryPreImport_Call struct { } // QueryPreImport is a helper method to define mock.On call -// - nodeID int64 -// - in *datapb.QueryPreImportRequest +// - nodeID int64 +// - in *datapb.QueryPreImportRequest func (_e *MockCluster_Expecter) QueryPreImport(nodeID interface{}, in interface{}) *MockCluster_QueryPreImport_Call { return &MockCluster_QueryPreImport_Call{Call: _e.mock.On("QueryPreImport", nodeID, in)} } @@ -489,7 +489,7 @@ type MockCluster_Register_Call struct { } // Register is a helper method to define mock.On call -// - node *NodeInfo +// - node *NodeInfo func (_e *MockCluster_Expecter) Register(node interface{}) *MockCluster_Register_Call { return &MockCluster_Register_Call{Call: _e.mock.On("Register", node)} } @@ -531,8 +531,8 @@ type MockCluster_Startup_Call struct { } // Startup is a helper method to define mock.On call -// - ctx context.Context -// - nodes []*NodeInfo +// - ctx context.Context +// - nodes []*NodeInfo func (_e *MockCluster_Expecter) Startup(ctx interface{}, nodes interface{}) *MockCluster_Startup_Call { return &MockCluster_Startup_Call{Call: _e.mock.On("Startup", ctx, nodes)} } @@ -574,7 +574,7 @@ type MockCluster_UnRegister_Call struct { } // UnRegister is a helper method to define mock.On call -// - node *NodeInfo +// - node *NodeInfo func (_e *MockCluster_Expecter) UnRegister(node interface{}) *MockCluster_UnRegister_Call { return &MockCluster_UnRegister_Call{Call: _e.mock.On("UnRegister", node)} } @@ -616,8 +616,8 @@ type MockCluster_Watch_Call struct { } // Watch is a helper method to define mock.On call -// - ctx context.Context -// - ch RWChannel +// - ctx context.Context +// - ch RWChannel func (_e *MockCluster_Expecter) Watch(ctx interface{}, ch interface{}) *MockCluster_Watch_Call { return &MockCluster_Watch_Call{Call: _e.mock.On("Watch", ctx, ch)} } diff --git a/internal/datacoord/mock_session_manager.go b/internal/datacoord/mock_session_manager.go index a7d8e7f679c59..aea14b219ce03 100644 --- a/internal/datacoord/mock_session_manager.go +++ b/internal/datacoord/mock_session_manager.go @@ -35,7 +35,7 @@ type MockSessionManager_AddSession_Call struct { } // AddSession is a helper method to define mock.On call -// - node *NodeInfo +// - node *NodeInfo func (_e *MockSessionManager_Expecter) AddSession(node interface{}) *MockSessionManager_AddSession_Call { return &MockSessionManager_AddSession_Call{Call: _e.mock.On("AddSession", node)} } @@ -89,9 +89,9 @@ type MockSessionManager_CheckChannelOperationProgress_Call struct { } // CheckChannelOperationProgress is a helper method to define mock.On call -// - ctx context.Context -// - nodeID int64 -// - info *datapb.ChannelWatchInfo +// - ctx context.Context +// - nodeID int64 +// - info *datapb.ChannelWatchInfo func (_e *MockSessionManager_Expecter) CheckChannelOperationProgress(ctx interface{}, nodeID interface{}, info interface{}) *MockSessionManager_CheckChannelOperationProgress_Call { return &MockSessionManager_CheckChannelOperationProgress_Call{Call: _e.mock.On("CheckChannelOperationProgress", ctx, nodeID, info)} } @@ -133,7 +133,7 @@ type MockSessionManager_CheckHealth_Call struct { } // CheckHealth is a helper method to define mock.On call -// - ctx context.Context +// - ctx context.Context func (_e *MockSessionManager_Expecter) CheckHealth(ctx interface{}) *MockSessionManager_CheckHealth_Call { return &MockSessionManager_CheckHealth_Call{Call: _e.mock.On("CheckHealth", ctx)} } @@ -207,9 +207,9 @@ type MockSessionManager_Compaction_Call struct { } // Compaction is a helper method to define mock.On call -// - ctx context.Context -// - nodeID int64 -// - plan *datapb.CompactionPlan +// - ctx context.Context +// - nodeID int64 +// - plan *datapb.CompactionPlan func (_e *MockSessionManager_Expecter) Compaction(ctx interface{}, nodeID interface{}, plan interface{}) *MockSessionManager_Compaction_Call { return &MockSessionManager_Compaction_Call{Call: _e.mock.On("Compaction", ctx, nodeID, plan)} } @@ -242,7 +242,7 @@ type MockSessionManager_DeleteSession_Call struct { } // DeleteSession is a helper method to define mock.On call -// - node *NodeInfo +// - node *NodeInfo func (_e *MockSessionManager_Expecter) DeleteSession(node interface{}) *MockSessionManager_DeleteSession_Call { return &MockSessionManager_DeleteSession_Call{Call: _e.mock.On("DeleteSession", node)} } @@ -284,8 +284,8 @@ type MockSessionManager_DropImport_Call struct { } // DropImport is a helper method to define mock.On call -// - nodeID int64 -// - in *datapb.DropImportRequest +// - nodeID int64 +// - in *datapb.DropImportRequest func (_e *MockSessionManager_Expecter) DropImport(nodeID interface{}, in interface{}) *MockSessionManager_DropImport_Call { return &MockSessionManager_DropImport_Call{Call: _e.mock.On("DropImport", nodeID, in)} } @@ -318,9 +318,9 @@ type MockSessionManager_Flush_Call struct { } // Flush is a helper method to define mock.On call -// - ctx context.Context -// - nodeID int64 -// - req *datapb.FlushSegmentsRequest +// - ctx context.Context +// - nodeID int64 +// - req *datapb.FlushSegmentsRequest func (_e *MockSessionManager_Expecter) Flush(ctx interface{}, nodeID interface{}, req interface{}) *MockSessionManager_Flush_Call { return &MockSessionManager_Flush_Call{Call: _e.mock.On("Flush", ctx, nodeID, req)} } @@ -362,9 +362,9 @@ type MockSessionManager_FlushChannels_Call struct { } // FlushChannels is a helper method to define mock.On call -// - ctx context.Context -// - nodeID int64 -// - req *datapb.FlushChannelsRequest +// - ctx context.Context +// - nodeID int64 +// - req *datapb.FlushChannelsRequest func (_e *MockSessionManager_Expecter) FlushChannels(ctx interface{}, nodeID interface{}, req interface{}) *MockSessionManager_FlushChannels_Call { return &MockSessionManager_FlushChannels_Call{Call: _e.mock.On("FlushChannels", ctx, nodeID, req)} } @@ -545,8 +545,8 @@ type MockSessionManager_ImportV2_Call struct { } // ImportV2 is a helper method to define mock.On call -// - nodeID int64 -// - in *datapb.ImportRequest +// - nodeID int64 +// - in *datapb.ImportRequest func (_e *MockSessionManager_Expecter) ImportV2(nodeID interface{}, in interface{}) *MockSessionManager_ImportV2_Call { return &MockSessionManager_ImportV2_Call{Call: _e.mock.On("ImportV2", nodeID, in)} } @@ -588,9 +588,9 @@ type MockSessionManager_NotifyChannelOperation_Call struct { } // NotifyChannelOperation is a helper method to define mock.On call -// - ctx context.Context -// - nodeID int64 -// - req *datapb.ChannelOperationsRequest +// - ctx context.Context +// - nodeID int64 +// - req *datapb.ChannelOperationsRequest func (_e *MockSessionManager_Expecter) NotifyChannelOperation(ctx interface{}, nodeID interface{}, req interface{}) *MockSessionManager_NotifyChannelOperation_Call { return &MockSessionManager_NotifyChannelOperation_Call{Call: _e.mock.On("NotifyChannelOperation", ctx, nodeID, req)} } @@ -632,8 +632,8 @@ type MockSessionManager_PreImport_Call struct { } // PreImport is a helper method to define mock.On call -// - nodeID int64 -// - in *datapb.PreImportRequest +// - nodeID int64 +// - in *datapb.PreImportRequest func (_e *MockSessionManager_Expecter) PreImport(nodeID interface{}, in interface{}) *MockSessionManager_PreImport_Call { return &MockSessionManager_PreImport_Call{Call: _e.mock.On("PreImport", nodeID, in)} } @@ -687,8 +687,8 @@ type MockSessionManager_QueryImport_Call struct { } // QueryImport is a helper method to define mock.On call -// - nodeID int64 -// - in *datapb.QueryImportRequest +// - nodeID int64 +// - in *datapb.QueryImportRequest func (_e *MockSessionManager_Expecter) QueryImport(nodeID interface{}, in interface{}) *MockSessionManager_QueryImport_Call { return &MockSessionManager_QueryImport_Call{Call: _e.mock.On("QueryImport", nodeID, in)} } @@ -742,8 +742,8 @@ type MockSessionManager_QueryPreImport_Call struct { } // QueryPreImport is a helper method to define mock.On call -// - nodeID int64 -// - in *datapb.QueryPreImportRequest +// - nodeID int64 +// - in *datapb.QueryPreImportRequest func (_e *MockSessionManager_Expecter) QueryPreImport(nodeID interface{}, in interface{}) *MockSessionManager_QueryPreImport_Call { return &MockSessionManager_QueryPreImport_Call{Call: _e.mock.On("QueryPreImport", nodeID, in)} } @@ -797,7 +797,7 @@ type MockSessionManager_QuerySlot_Call struct { } // QuerySlot is a helper method to define mock.On call -// - nodeID int64 +// - nodeID int64 func (_e *MockSessionManager_Expecter) QuerySlot(nodeID interface{}) *MockSessionManager_QuerySlot_Call { return &MockSessionManager_QuerySlot_Call{Call: _e.mock.On("QuerySlot", nodeID)} } @@ -839,8 +839,8 @@ type MockSessionManager_SyncSegments_Call struct { } // SyncSegments is a helper method to define mock.On call -// - nodeID int64 -// - req *datapb.SyncSegmentsRequest +// - nodeID int64 +// - req *datapb.SyncSegmentsRequest func (_e *MockSessionManager_Expecter) SyncSegments(nodeID interface{}, req interface{}) *MockSessionManager_SyncSegments_Call { return &MockSessionManager_SyncSegments_Call{Call: _e.mock.On("SyncSegments", nodeID, req)} } diff --git a/internal/datanode/binlog_io.go b/internal/datanode/binlog_io.go deleted file mode 100644 index 506c614a1c9b9..0000000000000 --- a/internal/datanode/binlog_io.go +++ /dev/null @@ -1,249 +0,0 @@ -// Licensed to the LF AI & Data foundation under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package datanode - -import ( - "context" - "strconv" - - "github.com/cockroachdb/errors" - "go.opentelemetry.io/otel" - "go.uber.org/zap" - - "github.com/milvus-io/milvus/internal/datanode/allocator" - "github.com/milvus-io/milvus/internal/datanode/io" - "github.com/milvus-io/milvus/internal/proto/datapb" - "github.com/milvus-io/milvus/internal/storage" - "github.com/milvus-io/milvus/pkg/common" - "github.com/milvus-io/milvus/pkg/log" - "github.com/milvus-io/milvus/pkg/util/metautil" - "github.com/milvus-io/milvus/pkg/util/typeutil" -) - -var ( - errUploadToBlobStorage = errors.New("upload to blob storage wrong") - errDownloadFromBlobStorage = errors.New("download from blob storage wrong") - // errStart used for retry start - errStart = errors.New("start") -) - -func downloadBlobs(ctx context.Context, b io.BinlogIO, paths []string) ([]*Blob, error) { - ctx, span := otel.Tracer(typeutil.DataNodeRole).Start(ctx, "downloadBlobs") - defer span.End() - log.Debug("down load", zap.Strings("path", paths)) - bytes, err := b.Download(ctx, paths) - if err != nil { - log.Warn("ctx done when downloading kvs from blob storage", zap.Strings("paths", paths)) - return nil, errDownloadFromBlobStorage - } - resp := make([]*Blob, len(paths)) - if len(paths) == 0 { - return resp, nil - } - for i := range bytes { - resp[i] = &Blob{Key: paths[i], Value: bytes[i]} - } - return resp, nil -} - -// genDeltaBlobs returns key, value -func genDeltaBlobs(b io.BinlogIO, allocator allocator.Allocator, data *DeleteData, collID, partID, segID UniqueID) (string, []byte, error) { - dCodec := storage.NewDeleteCodec() - - blob, err := dCodec.Serialize(collID, partID, segID, data) - if err != nil { - return "", nil, err - } - - idx, err := allocator.AllocOne() - if err != nil { - return "", nil, err - } - k := metautil.JoinIDPath(collID, partID, segID, idx) - key := b.JoinFullPath(common.SegmentDeltaLogPath, k) - - return key, blob.GetValue(), nil -} - -// genInsertBlobs returns insert-paths and save blob to kvs -func genInsertBlobs(b io.BinlogIO, allocator allocator.Allocator, data []*Blob, collectionID, partID, segID UniqueID, kvs map[string][]byte, -) (map[UniqueID]*datapb.FieldBinlog, error) { - inpaths := make(map[UniqueID]*datapb.FieldBinlog) - notifyGenIdx := make(chan struct{}) - defer close(notifyGenIdx) - - generator, err := allocator.GetGenerator(len(data), notifyGenIdx) - if err != nil { - return nil, err - } - - for _, blob := range data { - // Blob Key is generated by Serialize from int64 fieldID in collection schema, which won't raise error in ParseInt - fID, _ := strconv.ParseInt(blob.GetKey(), 10, 64) - k := metautil.JoinIDPath(collectionID, partID, segID, fID, <-generator) - key := b.JoinFullPath(common.SegmentInsertLogPath, k) - value := blob.GetValue() - fileLen := len(value) - - kvs[key] = value - inpaths[fID] = &datapb.FieldBinlog{ - FieldID: fID, - Binlogs: []*datapb.Binlog{{LogSize: int64(fileLen), LogPath: key, EntriesNum: blob.RowNum, MemorySize: blob.GetMemorySize()}}, - } - } - - return inpaths, nil -} - -// genStatBlobs return stats log paths and save blob to kvs -func genStatBlobs(b io.BinlogIO, allocator allocator.Allocator, stats *storage.PrimaryKeyStats, collectionID, partID, segID UniqueID, iCodec *storage.InsertCodec, kvs map[string][]byte, totRows int64) (map[UniqueID]*datapb.FieldBinlog, error) { - statBlob, err := iCodec.SerializePkStats(stats, totRows) - if err != nil { - return nil, err - } - statPaths := make(map[UniqueID]*datapb.FieldBinlog) - - idx, err := allocator.AllocOne() - if err != nil { - return nil, err - } - fID, _ := strconv.ParseInt(statBlob.GetKey(), 10, 64) - k := metautil.JoinIDPath(collectionID, partID, segID, fID, idx) - key := b.JoinFullPath(common.SegmentStatslogPath, k) - value := statBlob.GetValue() - fileLen := len(value) - - kvs[key] = value - - statPaths[fID] = &datapb.FieldBinlog{ - FieldID: fID, - Binlogs: []*datapb.Binlog{{LogSize: int64(fileLen), LogPath: key, EntriesNum: totRows, MemorySize: int64(fileLen)}}, - } - return statPaths, nil -} - -// update stats log -// also update with insert data if not nil -func uploadStatsLog( - ctx context.Context, - b io.BinlogIO, - allocator allocator.Allocator, - collectionID UniqueID, - partID UniqueID, - segID UniqueID, - stats *storage.PrimaryKeyStats, - totRows int64, - iCodec *storage.InsertCodec, -) (map[UniqueID]*datapb.FieldBinlog, error) { - ctx, span := otel.Tracer(typeutil.DataNodeRole).Start(ctx, "UploadStatslog") - defer span.End() - kvs := make(map[string][]byte) - - statPaths, err := genStatBlobs(b, allocator, stats, collectionID, partID, segID, iCodec, kvs, totRows) - if err != nil { - return nil, err - } - - err = b.Upload(ctx, kvs) - if err != nil { - return nil, err - } - - return statPaths, nil -} - -func uploadInsertLog( - ctx context.Context, - b io.BinlogIO, - allocator allocator.Allocator, - collectionID UniqueID, - partID UniqueID, - segID UniqueID, - data []*Blob, -) (map[UniqueID]*datapb.FieldBinlog, error) { - ctx, span := otel.Tracer(typeutil.DataNodeRole).Start(ctx, "UploadInsertLog") - defer span.End() - kvs := make(map[string][]byte) - - if len(data) <= 0 || data[0].RowNum <= 0 { - log.Warn("binlog io uploading empty insert data", - zap.Int64("segmentID", segID), - zap.Int64("collectionID", collectionID), - ) - return nil, nil - } - - inpaths, err := genInsertBlobs(b, allocator, data, collectionID, partID, segID, kvs) - if err != nil { - return nil, err - } - - err = b.Upload(ctx, kvs) - if err != nil { - return nil, err - } - - return inpaths, nil -} - -func uploadDeltaLog( - ctx context.Context, - b io.BinlogIO, - allocator allocator.Allocator, - collectionID UniqueID, - partID UniqueID, - segID UniqueID, - dData *DeleteData, -) ([]*datapb.FieldBinlog, error) { - ctx, span := otel.Tracer(typeutil.DataNodeRole).Start(ctx, "UploadDeltaLog") - defer span.End() - var ( - deltaInfo = make([]*datapb.FieldBinlog, 0) - kvs = make(map[string][]byte) - ) - - if dData.RowCount > 0 { - k, v, err := genDeltaBlobs(b, allocator, dData, collectionID, partID, segID) - if err != nil { - log.Warn("generate delta blobs wrong", - zap.Int64("collectionID", collectionID), - zap.Int64("segmentID", segID), - zap.Error(err)) - return nil, err - } - - kvs[k] = v - deltaInfo = append(deltaInfo, &datapb.FieldBinlog{ - FieldID: 0, // TODO: Not useful on deltalogs, FieldID shall be ID of primary key field - Binlogs: []*datapb.Binlog{{ - EntriesNum: dData.RowCount, - LogPath: k, - LogSize: int64(len(v)), - MemorySize: dData.Size(), - }}, - }) - } else { - return nil, nil - } - - err := b.Upload(ctx, kvs) - if err != nil { - return nil, err - } - - return deltaInfo, nil -} diff --git a/internal/datanode/binlog_io_test.go b/internal/datanode/binlog_io_test.go deleted file mode 100644 index 038978ac0464c..0000000000000 --- a/internal/datanode/binlog_io_test.go +++ /dev/null @@ -1,404 +0,0 @@ -// Licensed to the LF AI & Data foundation under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package datanode - -import ( - "context" - "fmt" - "path" - "testing" - "time" - - "github.com/cockroachdb/errors" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/mock" - "github.com/stretchr/testify/require" - "go.uber.org/zap" - - "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" - "github.com/milvus-io/milvus/internal/datanode/allocator" - "github.com/milvus-io/milvus/internal/datanode/io" - "github.com/milvus-io/milvus/internal/storage" - "github.com/milvus-io/milvus/pkg/common" - "github.com/milvus-io/milvus/pkg/log" -) - -var binlogTestDir = "/tmp/milvus_test/test_binlog_io" - -var validGeneratorFn = func(count int, done <-chan struct{}) <-chan UniqueID { - ret := make(chan UniqueID, count) - for i := 0; i < count; i++ { - ret <- int64(100 + i) - } - return ret -} - -func TestBinlogIOInterfaceMethods(t *testing.T) { - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - cm := storage.NewLocalChunkManager(storage.RootPath(binlogTestDir)) - defer cm.RemoveWithPrefix(ctx, cm.RootPath()) - - t.Run("Test download", func(t *testing.T) { - binlogIO := io.NewBinlogIO(cm, getOrCreateIOPool()) - tests := []struct { - isvalid bool - ks []string // for preparation - - inctx context.Context - - description string - }{ - {true, []string{"a", "b", "c"}, context.TODO(), "valid input"}, - {false, nil, context.Background(), "cancel by context"}, - } - - for _, test := range tests { - t.Run(test.description, func(t *testing.T) { - if test.isvalid { - inkeys := []string{} - for _, k := range test.ks { - blob, key, err := prepareBlob(cm, k) - require.NoError(t, err) - assert.NotEmpty(t, blob) - inkeys = append(inkeys, key) - - loaded, err := downloadBlobs(test.inctx, binlogIO, []string{key}) - assert.NoError(t, err) - assert.ElementsMatch(t, blob, loaded[0].GetValue()) - } - - loaded, err := downloadBlobs(test.inctx, binlogIO, inkeys) - assert.NoError(t, err) - assert.Equal(t, len(test.ks), len(loaded)) - } else { - ctx, cancel := context.WithCancel(test.inctx) - cancel() - - _, err := downloadBlobs(ctx, binlogIO, []string{"test"}) - assert.EqualError(t, err, errDownloadFromBlobStorage.Error()) - } - }) - } - }) - - t.Run("Test download twice", func(t *testing.T) { - binlogIO := io.NewBinlogIO(cm, getOrCreateIOPool()) - - ctx, cancel := context.WithTimeout(context.TODO(), time.Millisecond*20) - blobs, err := downloadBlobs(ctx, binlogIO, []string{"a"}) - assert.Error(t, err) - assert.Empty(t, blobs) - cancel() - }) - - t.Run("Test upload stats log err", func(t *testing.T) { - f := &MetaFactory{} - meta := f.GetCollectionMeta(UniqueID(10001), "test_gen_blobs", schemapb.DataType_Int64) - - t.Run("gen insert blob failed", func(t *testing.T) { - alloc := allocator.NewMockAllocator(t) - alloc.EXPECT().AllocOne().Call.Return(int64(0), fmt.Errorf("mock AllocOne error")) - binlogIO := io.NewBinlogIO(cm, getOrCreateIOPool()) - iCodec := storage.NewInsertCodecWithSchema(meta) - _, err := uploadStatsLog(context.Background(), binlogIO, alloc, meta.GetID(), 10, 1, genTestStat(meta), 10, iCodec) - assert.Error(t, err) - }) - }) - - t.Run("Test upload insert log err", func(t *testing.T) { - f := &MetaFactory{} - meta := f.GetCollectionMeta(UniqueID(10001), "test_gen_blobs", schemapb.DataType_Int64) - - t.Run("gen insert blob failed", func(t *testing.T) { - alloc := allocator.NewMockAllocator(t) - binlogIO := io.NewBinlogIO(cm, getOrCreateIOPool()) - iCodec := storage.NewInsertCodecWithSchema(meta) - var partId int64 = 10 - var segId int64 = 1 - iData := genInsertData(2) - blobs, err := iCodec.Serialize(10, 1, iData) - assert.NoError(t, err) - alloc.EXPECT().GetGenerator(mock.Anything, mock.Anything).Call.Return(nil, fmt.Errorf("mock err")) - _, err = uploadInsertLog(context.Background(), binlogIO, alloc, meta.GetID(), partId, segId, blobs) - assert.Error(t, err) - }) - - t.Run("upload failed", func(t *testing.T) { - mkc := &mockCm{errRead: true, errSave: true} - alloc := allocator.NewMockAllocator(t) - binlogIO := io.NewBinlogIO(mkc, getOrCreateIOPool()) - iCodec := storage.NewInsertCodecWithSchema(meta) - var partId int64 = 1 - var segId int64 = 10 - iData := genInsertData(2) - blobs, err := iCodec.Serialize(10, 1, iData) - assert.NoError(t, err) - - alloc.EXPECT().GetGenerator(mock.Anything, mock.Anything).Call.Return(validGeneratorFn, nil) - - ctx, cancel := context.WithTimeout(context.Background(), 100*time.Millisecond) - defer cancel() - - _, err = uploadInsertLog(ctx, binlogIO, alloc, meta.GetID(), partId, segId, blobs) - assert.Error(t, err) - }) - }) -} - -func prepareBlob(cm storage.ChunkManager, key string) ([]byte, string, error) { - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - - k := path.Join(cm.RootPath(), "test_prepare_blob", key) - blob := []byte{1, 2, 3, 255, 188} - - err := cm.Write(ctx, k, blob[:]) - if err != nil { - return nil, "", err - } - return blob, k, nil -} - -func TestBinlogIOInnerMethods(t *testing.T) { - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - cm := storage.NewLocalChunkManager(storage.RootPath(binlogTestDir)) - defer cm.RemoveWithPrefix(ctx, cm.RootPath()) - - t.Run("Test genDeltaBlobs", func(t *testing.T) { - alloc := allocator.NewMockAllocator(t) - alloc.EXPECT().AllocOne().Call.Return(int64(11111), nil) - binlogIO := io.NewBinlogIO(cm, getOrCreateIOPool()) - f := &MetaFactory{} - meta := f.GetCollectionMeta(UniqueID(10002), "test_gen_blobs", schemapb.DataType_Int64) - - tests := []struct { - isvalid bool - deletepk storage.PrimaryKey - ts uint64 - - description string - }{ - {true, storage.NewInt64PrimaryKey(1), 1111111, "valid input"}, - } - - for _, test := range tests { - t.Run(test.description, func(t *testing.T) { - if test.isvalid { - k, v, err := genDeltaBlobs(binlogIO, alloc, &DeleteData{ - Pks: []storage.PrimaryKey{test.deletepk}, - Tss: []uint64{test.ts}, - }, meta.GetID(), 10, 1) - - assert.NoError(t, err) - assert.NotEmpty(t, k) - assert.NotEmpty(t, v) - - log.Debug("genDeltaBlobs returns", zap.String("key", k)) - } - }) - } - }) - - t.Run("Test genDeltaBlobs error", func(t *testing.T) { - pk := storage.NewInt64PrimaryKey(1) - - t.Run("Test serialize error", func(t *testing.T) { - alloc := allocator.NewMockAllocator(t) - binlogIO := io.NewBinlogIO(cm, getOrCreateIOPool()) - k, v, err := genDeltaBlobs(binlogIO, alloc, &DeleteData{Pks: []storage.PrimaryKey{pk}, Tss: []uint64{}}, 1, 1, 1) - assert.Error(t, err) - assert.Empty(t, k) - assert.Empty(t, v) - }) - - t.Run("Test AllocOne error", func(t *testing.T) { - alloc := allocator.NewMockAllocator(t) - alloc.EXPECT().AllocOne().Call.Return(int64(0), fmt.Errorf("mock AllocOne error")) - binlogIO := io.NewBinlogIO(cm, getOrCreateIOPool()) - k, v, err := genDeltaBlobs(binlogIO, alloc, &DeleteData{Pks: []storage.PrimaryKey{pk}, Tss: []uint64{1}}, 1, 1, 1) - assert.Error(t, err) - assert.Empty(t, k) - assert.Empty(t, v) - }) - }) - - t.Run("Test genInsertBlobs", func(t *testing.T) { - f := &MetaFactory{} - alloc := allocator.NewMockAllocator(t) - alloc.EXPECT().GetGenerator(mock.Anything, mock.Anything).Call.Return(validGeneratorFn, nil) - binlogIO := io.NewBinlogIO(cm, getOrCreateIOPool()) - - tests := []struct { - pkType schemapb.DataType - description string - expectError bool - }{ - {schemapb.DataType_Int64, "int64PrimaryField", false}, - {schemapb.DataType_VarChar, "varCharPrimaryField", false}, - } - - for _, test := range tests { - t.Run(test.description, func(t *testing.T) { - meta := f.GetCollectionMeta(UniqueID(10001), "test_gen_blobs", test.pkType) - iCodec := storage.NewInsertCodecWithSchema(meta) - var partId int64 = 10 - var segId int64 = 1 - iData := genInsertData(2) - blobs, err := iCodec.Serialize(10, 1, iData) - assert.NoError(t, err) - kvs := make(map[string][]byte) - pin, err := genInsertBlobs(binlogIO, alloc, blobs, meta.GetID(), partId, segId, kvs) - - assert.NoError(t, err) - assert.Equal(t, 12, len(pin)) - assert.Equal(t, 12, len(kvs)) - - log.Debug("test paths", - zap.Int("kvs no.", len(kvs)), - zap.String("insert paths field0", pin[common.TimeStampField].GetBinlogs()[0].GetLogPath())) - }) - } - }) - - t.Run("Test genInsertBlobs error", func(t *testing.T) { - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - cm := storage.NewLocalChunkManager(storage.RootPath(binlogTestDir)) - defer cm.RemoveWithPrefix(ctx, cm.RootPath()) - - t.Run("GetGenerator error", func(t *testing.T) { - f := &MetaFactory{} - meta := f.GetCollectionMeta(UniqueID(10001), "test_gen_blobs", schemapb.DataType_Int64) - iCodec := storage.NewInsertCodecWithSchema(meta) - var partId int64 = 10 - var segId int64 = 1 - iData := genInsertData(2) - blobs, err := iCodec.Serialize(partId, segId, iData) - assert.NoError(t, err) - - alloc := allocator.NewMockAllocator(t) - alloc.EXPECT().GetGenerator(mock.Anything, mock.Anything).Return(nil, fmt.Errorf("mock GetGenerator error")) - binlogIO := io.NewBinlogIO(cm, getOrCreateIOPool()) - kvs := make(map[string][]byte) - - pin, err := genInsertBlobs(binlogIO, alloc, blobs, meta.GetID(), partId, segId, kvs) - - assert.Error(t, err) - assert.Empty(t, kvs) - assert.Empty(t, pin) - }) - }) - - t.Run("Test genStatsBlob", func(t *testing.T) { - f := &MetaFactory{} - alloc := allocator.NewMockAllocator(t) - alloc.EXPECT().AllocOne().Return(0, nil) - - binlogIO := io.NewBinlogIO(cm, getOrCreateIOPool()) - - tests := []struct { - pkType schemapb.DataType - description string - expectError bool - }{ - {schemapb.DataType_Int64, "int64PrimaryField", false}, - {schemapb.DataType_VarChar, "varCharPrimaryField", false}, - } - - for _, test := range tests { - t.Run(test.description, func(t *testing.T) { - meta := f.GetCollectionMeta(UniqueID(10001), "test_gen_stat_blobs", test.pkType) - iCodec := storage.NewInsertCodecWithSchema(meta) - - kvs := make(map[string][]byte) - stat, err := genStatBlobs(binlogIO, alloc, genTestStat(meta), meta.GetID(), 10, 1, iCodec, kvs, 0) - - assert.NoError(t, err) - assert.Equal(t, 1, len(stat)) - assert.Equal(t, 1, len(kvs)) - }) - } - }) - - t.Run("Test genStatsBlob error", func(t *testing.T) { - f := &MetaFactory{} - alloc := allocator.NewMockAllocator(t) - binlogIO := io.NewBinlogIO(cm, getOrCreateIOPool()) - - t.Run("serialize error", func(t *testing.T) { - meta := f.GetCollectionMeta(UniqueID(10001), "test_gen_stat_blobs_error", schemapb.DataType_Int64) - iCodec := storage.NewInsertCodecWithSchema(meta) - - kvs := make(map[string][]byte) - _, err := genStatBlobs(binlogIO, alloc, nil, meta.GetID(), 10, 1, iCodec, kvs, 0) - assert.Error(t, err) - }) - }) -} - -type mockCm struct { - storage.ChunkManager - errRead bool - errSave bool - MultiReadReturn [][]byte - ReadReturn []byte -} - -var _ storage.ChunkManager = (*mockCm)(nil) - -func (mk *mockCm) RootPath() string { - return "mock_test" -} - -func (mk *mockCm) Write(ctx context.Context, filePath string, content []byte) error { - if mk.errSave { - return errors.New("mockKv save error") - } - return nil -} - -func (mk *mockCm) MultiWrite(ctx context.Context, contents map[string][]byte) error { - if mk.errSave { - return errors.New("mockKv save error") - } - return nil -} - -func (mk *mockCm) Read(ctx context.Context, filePath string) ([]byte, error) { - if mk.errRead { - return nil, errors.New("mockKv read error") - } - return mk.ReadReturn, nil -} - -func (mk *mockCm) MultiRead(ctx context.Context, filePaths []string) ([][]byte, error) { - if mk.MultiReadReturn != nil { - return mk.MultiReadReturn, nil - } - return [][]byte{[]byte("a")}, nil -} - -func (mk *mockCm) ReadWithPrefix(ctx context.Context, prefix string) ([]string, [][]byte, error) { - return nil, nil, nil -} - -func (mk *mockCm) Remove(ctx context.Context, key string) error { return nil } -func (mk *mockCm) MultiRemove(ctx context.Context, keys []string) error { return nil } -func (mk *mockCm) RemoveWithPrefix(ctx context.Context, key string) error { return nil } -func (mk *mockCm) Close() {} diff --git a/internal/datanode/compaction/compactor.go b/internal/datanode/compaction/compactor.go new file mode 100644 index 0000000000000..da57562d93e28 --- /dev/null +++ b/internal/datanode/compaction/compactor.go @@ -0,0 +1,32 @@ +// Licensed to the LF AI & Data foundation under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package compaction + +import ( + "github.com/milvus-io/milvus/internal/proto/datapb" + "github.com/milvus-io/milvus/pkg/util/typeutil" +) + +type Compactor interface { + Complete() + Compact() (*datapb.CompactionPlanResult, error) + InjectDone() + Stop() + GetPlanID() typeutil.UniqueID + GetCollection() typeutil.UniqueID + GetChannelName() string +} diff --git a/internal/datanode/compaction/mix_compactor.go b/internal/datanode/compaction/mix_compactor.go new file mode 100644 index 0000000000000..da18de0f82fa8 --- /dev/null +++ b/internal/datanode/compaction/mix_compactor.go @@ -0,0 +1,568 @@ +// Licensed to the LF AI & Data foundation under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package compaction + +import ( + "context" + "fmt" + sio "io" + "strconv" + "sync" + "time" + + "github.com/cockroachdb/errors" + "github.com/samber/lo" + "go.opentelemetry.io/otel" + "go.uber.org/zap" + + "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" + "github.com/milvus-io/milvus/internal/datanode/allocator" + "github.com/milvus-io/milvus/internal/datanode/io" + iter "github.com/milvus-io/milvus/internal/datanode/iterators" + "github.com/milvus-io/milvus/internal/datanode/metacache" + "github.com/milvus-io/milvus/internal/datanode/syncmgr" + "github.com/milvus-io/milvus/internal/metastore/kv/binlog" + "github.com/milvus-io/milvus/internal/proto/datapb" + "github.com/milvus-io/milvus/internal/storage" + "github.com/milvus-io/milvus/pkg/log" + "github.com/milvus-io/milvus/pkg/metrics" + "github.com/milvus-io/milvus/pkg/util/funcutil" + "github.com/milvus-io/milvus/pkg/util/merr" + "github.com/milvus-io/milvus/pkg/util/paramtable" + "github.com/milvus-io/milvus/pkg/util/timerecord" + "github.com/milvus-io/milvus/pkg/util/tsoutil" + "github.com/milvus-io/milvus/pkg/util/typeutil" +) + +// for MixCompaction only +type mixCompactionTask struct { + binlogIO io.BinlogIO + Compactor + metaCache metacache.MetaCache + syncMgr syncmgr.SyncManager + allocator.Allocator + currentTs typeutil.Timestamp + + plan *datapb.CompactionPlan + + ctx context.Context + cancel context.CancelFunc + + injectDoneOnce sync.Once + done chan struct{} + tr *timerecord.TimeRecorder +} + +// make sure compactionTask implements compactor interface +var _ Compactor = (*mixCompactionTask)(nil) + +func NewMixCompactionTask( + ctx context.Context, + binlogIO io.BinlogIO, + metaCache metacache.MetaCache, + syncMgr syncmgr.SyncManager, + alloc allocator.Allocator, + plan *datapb.CompactionPlan, +) *mixCompactionTask { + ctx1, cancel := context.WithCancel(ctx) + return &mixCompactionTask{ + ctx: ctx1, + cancel: cancel, + binlogIO: binlogIO, + syncMgr: syncMgr, + metaCache: metaCache, + Allocator: alloc, + plan: plan, + tr: timerecord.NewTimeRecorder("mix compaction"), + currentTs: tsoutil.GetCurrentTime(), + done: make(chan struct{}, 1), + } +} + +func (t *mixCompactionTask) Complete() { + t.done <- struct{}{} +} + +func (t *mixCompactionTask) Stop() { + t.cancel() + <-t.done + t.InjectDone() +} + +func (t *mixCompactionTask) GetPlanID() typeutil.UniqueID { + return t.plan.GetPlanID() +} + +func (t *mixCompactionTask) GetChannelName() string { + return t.plan.GetChannel() +} + +// return num rows of all segment compaction from +func (t *mixCompactionTask) getNumRows() (int64, error) { + numRows := int64(0) + for _, binlog := range t.plan.SegmentBinlogs { + seg, ok := t.metaCache.GetSegmentByID(binlog.GetSegmentID()) + if !ok { + return 0, merr.WrapErrSegmentNotFound(binlog.GetSegmentID(), "get compaction segments num rows failed") + } + + numRows += seg.NumOfRows() + } + + return numRows, nil +} + +func (t *mixCompactionTask) mergeDeltalogs(ctx context.Context, dpaths map[typeutil.UniqueID][]string) (map[interface{}]typeutil.Timestamp, error) { + t.tr.RecordSpan() + ctx, span := otel.Tracer(typeutil.DataNodeRole).Start(ctx, "mergeDeltalogs") + defer span.End() + + log := log.With(zap.Int64("planID", t.GetPlanID())) + pk2ts := make(map[interface{}]typeutil.Timestamp) + + if len(dpaths) == 0 { + log.Info("compact with no deltalogs, skip merge deltalogs") + return pk2ts, nil + } + + allIters := make([]*iter.DeltalogIterator, 0) + for segID, paths := range dpaths { + if len(paths) == 0 { + continue + } + blobs, err := t.binlogIO.Download(ctx, paths) + if err != nil { + log.Warn("compact wrong, fail to download deltalogs", + zap.Int64("segment", segID), + zap.Strings("path", paths), + zap.Error(err)) + return nil, err + } + + allIters = append(allIters, iter.NewDeltalogIterator(blobs, nil)) + } + + for _, deltaIter := range allIters { + for deltaIter.HasNext() { + labeled, _ := deltaIter.Next() + ts := labeled.GetTimestamp() + if lastTs, ok := pk2ts[labeled.GetPk().GetValue()]; ok && lastTs > ts { + ts = lastTs + } + pk2ts[labeled.GetPk().GetValue()] = ts + } + } + + log.Info("compact mergeDeltalogs end", + zap.Int("deleted pk counts", len(pk2ts)), + zap.Duration("elapse", t.tr.RecordSpan())) + + return pk2ts, nil +} + +func (t *mixCompactionTask) statSerializeWrite(ctx context.Context, writer *SegmentWriter, finalRowCount int64) (*datapb.FieldBinlog, error) { + ctx, span := otel.Tracer(typeutil.DataNodeRole).Start(ctx, "statslog serializeWrite") + defer span.End() + sblob, err := writer.Finish(finalRowCount) + if err != nil { + return nil, err + } + + logID, err := t.AllocOne() + if err != nil { + return nil, err + } + + key, _ := binlog.BuildLogPath(storage.StatsBinlog, writer.GetCollectionID(), writer.GetPartitionID(), writer.GetSegmentID(), writer.GetPkID(), logID) + kvs := map[string][]byte{key: sblob.GetValue()} + statFieldLog := &datapb.FieldBinlog{ + FieldID: writer.GetPkID(), + Binlogs: []*datapb.Binlog{ + { + LogSize: int64(len(sblob.GetValue())), + MemorySize: int64(len(sblob.GetValue())), + LogPath: key, + EntriesNum: finalRowCount, + }, + }, + } + if err := t.binlogIO.Upload(ctx, kvs); err != nil { + log.Warn("failed to upload insert log", zap.Error(err)) + return nil, err + } + + return statFieldLog, nil +} + +func (t *mixCompactionTask) serializeWrite(ctx context.Context, writer *SegmentWriter) (kvs map[string][]byte, fieldBinlogs map[int64]*datapb.FieldBinlog, err error) { + _, span := otel.Tracer(typeutil.DataNodeRole).Start(ctx, "serializeWrite") + defer span.End() + + blobs, tr, err := writer.SerializeYield() + startID, _, err := t.Alloc(uint32(len(blobs))) + if err != nil { + return nil, nil, err + } + + kvs = make(map[string][]byte) + fieldBinlogs = make(map[int64]*datapb.FieldBinlog) + for i := range blobs { + // Blob Key is generated by Serialize from int64 fieldID in collection schema, which won't raise error in ParseInt + fID, _ := strconv.ParseInt(blobs[i].GetKey(), 10, 64) + key, _ := binlog.BuildLogPath(storage.InsertBinlog, writer.GetCollectionID(), writer.GetPartitionID(), writer.GetSegmentID(), fID, startID+int64(i)) + + kvs[key] = blobs[i].GetValue() + fieldBinlogs[fID] = &datapb.FieldBinlog{ + FieldID: fID, + Binlogs: []*datapb.Binlog{ + { + LogSize: int64(len(blobs[i].GetValue())), + MemorySize: blobs[i].GetMemorySize(), + LogPath: key, + EntriesNum: blobs[i].RowNum, + TimestampFrom: tr.GetMinTimestamp(), + TimestampTo: tr.GetMaxTimestamp(), + }, + }, + } + } + + return +} + +func (t *mixCompactionTask) merge( + ctx context.Context, + binlogPaths [][]string, + delta map[interface{}]typeutil.Timestamp, + writer *SegmentWriter, +) (*datapb.CompactionSegment, error) { + _ = t.tr.RecordSpan() + + ctx, span := otel.Tracer(typeutil.DataNodeRole).Start(ctx, "CompactMerge") + defer span.End() + + log := log.With(zap.Int64("planID", t.GetPlanID()), zap.Int64("compactTo segment", writer.GetSegmentID())) + + var ( + syncBatchCount int // binlog batch count + remainingRowCount int64 // the number of remaining entities + expiredRowCount int64 // the number of expired entities + unflushedRowCount int64 = 0 + + // All binlog meta of a segment + allBinlogs = make(map[typeutil.UniqueID]*datapb.FieldBinlog) + ) + + isValueDeleted := func(v *storage.Value) bool { + ts, ok := delta[v.PK.GetValue()] + // insert task and delete task has the same ts when upsert + // here should be < instead of <= + // to avoid the upsert data to be deleted after compact + if ok && uint64(v.Timestamp) < ts { + return true + } + return false + } + + downloadTimeCost := time.Duration(0) + serWriteTimeCost := time.Duration(0) + uploadTimeCost := time.Duration(0) + + for _, paths := range binlogPaths { + log := log.With(zap.Strings("paths", paths)) + downloadStart := time.Now() + allValues, err := t.binlogIO.Download(ctx, paths) + if err != nil { + log.Warn("compact wrong, fail to download insertLogs", zap.Error(err)) + } + downloadTimeCost += time.Since(downloadStart) + + blobs := lo.Map(allValues, func(v []byte, i int) *storage.Blob { + return &storage.Blob{Key: paths[i], Value: v} + }) + + iter, err := storage.NewBinlogDeserializeReader(blobs, writer.GetPkID()) + if err != nil { + log.Warn("compact wrong, failed to new insert binlogs reader", zap.Error(err)) + return nil, err + } + + for { + err := iter.Next() + if err != nil { + if err == sio.EOF { + break + } else { + log.Warn("compact wrong, failed to iter through data", zap.Error(err)) + return nil, err + } + } + v := iter.Value() + if isValueDeleted(v) { + continue + } + + // Filtering expired entity + if t.isExpiredEntity(typeutil.Timestamp(v.Timestamp)) { + expiredRowCount++ + continue + } + + err = writer.Write(v) + if err != nil { + log.Warn("compact wrong, failed to writer row", zap.Error(err)) + return nil, err + } + unflushedRowCount++ + remainingRowCount++ + + if (unflushedRowCount+1)%100 == 0 && writer.IsFull() { + serWriteStart := time.Now() + kvs, partialBinlogs, err := t.serializeWrite(ctx, writer) + if err != nil { + log.Warn("compact wrong, failed to serialize writer", zap.Error(err)) + return nil, err + } + serWriteTimeCost += time.Since(serWriteStart) + + uploadStart := time.Now() + if err := t.binlogIO.Upload(ctx, kvs); err != nil { + log.Warn("compact wrong, failed to upload kvs", zap.Error(err)) + } + uploadTimeCost += time.Since(uploadStart) + mergeFieldBinlogs(allBinlogs, partialBinlogs) + syncBatchCount++ + unflushedRowCount = 0 + } + } + } + + if !writer.IsEmpty() { + serWriteStart := time.Now() + kvs, partialBinlogs, err := t.serializeWrite(ctx, writer) + if err != nil { + log.Warn("compact wrong, failed to serialize writer", zap.Error(err)) + return nil, err + } + serWriteTimeCost += time.Since(serWriteStart) + + uploadStart := time.Now() + if err := t.binlogIO.Upload(ctx, kvs); err != nil { + log.Warn("compact wrong, failed to upload kvs", zap.Error(err)) + } + uploadTimeCost += time.Since(uploadStart) + + mergeFieldBinlogs(allBinlogs, partialBinlogs) + syncBatchCount++ + } + + serWriteStart := time.Now() + sPath, err := t.statSerializeWrite(ctx, writer, remainingRowCount) + if err != nil { + log.Warn("compact wrong, failed to serialize write segment stats", + zap.Int64("remaining row count", remainingRowCount), zap.Error(err)) + return nil, err + } + serWriteTimeCost += time.Since(serWriteStart) + + pack := &datapb.CompactionSegment{ + SegmentID: writer.GetSegmentID(), + InsertLogs: lo.Values(allBinlogs), + Field2StatslogPaths: []*datapb.FieldBinlog{sPath}, + NumOfRows: remainingRowCount, + Channel: t.plan.GetChannel(), + } + + totalElapse := t.tr.RecordSpan() + + log.Info("compact merge end", + zap.Int64("remaining row count", remainingRowCount), + zap.Int64("expired entities", expiredRowCount), + zap.Int("binlog batch count", syncBatchCount), + zap.Duration("download binlogs elapse", downloadTimeCost), + zap.Duration("upload binlogs elapse", uploadTimeCost), + zap.Duration("serWrite elapse", serWriteTimeCost), + zap.Duration("deRead elapse", totalElapse-serWriteTimeCost-downloadTimeCost-uploadTimeCost), + zap.Duration("total elapse", totalElapse)) + + return pack, nil +} + +func mergeFieldBinlogs(base, paths map[typeutil.UniqueID]*datapb.FieldBinlog) { + for fID, fpath := range paths { + if _, ok := base[fID]; !ok { + base[fID] = &datapb.FieldBinlog{FieldID: fID, Binlogs: make([]*datapb.Binlog, 0)} + } + base[fID].Binlogs = append(base[fID].Binlogs, fpath.GetBinlogs()...) + } +} + +func (t *mixCompactionTask) Compact() (*datapb.CompactionPlanResult, error) { + durInQueue := t.tr.RecordSpan() + compactStart := time.Now() + ctx, span := otel.Tracer(typeutil.DataNodeRole).Start(t.ctx, fmt.Sprintf("MixCompact-%d", t.GetPlanID())) + defer span.End() + + log := log.Ctx(ctx).With(zap.Int64("planID", t.plan.GetPlanID()), zap.Int32("timeout in seconds", t.plan.GetTimeoutInSeconds())) + if ok := funcutil.CheckCtxValid(ctx); !ok { + log.Warn("compact wrong, task context done or timeout") + return nil, ctx.Err() + } + + ctxTimeout, cancelAll := context.WithTimeout(ctx, time.Duration(t.plan.GetTimeoutInSeconds())*time.Second) + defer cancelAll() + + log.Info("compact start") + if len(t.plan.GetSegmentBinlogs()) < 1 { + log.Warn("compact wrong, there's no segments in segment binlogs") + return nil, errors.New("compaction plan is illegal") + } + + targetSegID, err := t.AllocOne() + if err != nil { + log.Warn("compact wrong, unable to allocate segmentID", zap.Error(err)) + return nil, err + } + + previousRowCount, err := t.getNumRows() + if err != nil { + log.Warn("compact wrong, unable to get previous numRows", zap.Error(err)) + return nil, err + } + + partID := t.plan.GetSegmentBinlogs()[0].GetPartitionID() + + writer, err := NewSegmentWriter(t.metaCache.Schema(), previousRowCount, targetSegID, partID, t.metaCache.Collection()) + if err != nil { + log.Warn("compact wrong, unable to init segment writer", zap.Error(err)) + return nil, err + } + + segIDs := lo.Map(t.plan.GetSegmentBinlogs(), func(binlogs *datapb.CompactionSegmentBinlogs, _ int) int64 { + return binlogs.GetSegmentID() + }) + // Inject to stop flush + // when compaction failed, these segments need to be Unblocked by injectDone in compaction_executor + // when compaction succeeded, these segments will be Unblocked by SyncSegments from DataCoord. + for _, segID := range segIDs { + t.syncMgr.Block(segID) + } + + if err := binlog.DecompressCompactionBinlogs(t.plan.GetSegmentBinlogs()); err != nil { + log.Warn("compact wrong, fail to decompress compaction binlogs", zap.Error(err)) + return nil, err + } + + deltaPaths := make(map[typeutil.UniqueID][]string) // segmentID to deltalog paths + allPath := make([][]string, 0) // group by binlog batch + for _, s := range t.plan.GetSegmentBinlogs() { + // Get the batch count of field binlog files from non-empty segment + // each segment might contain different batches + var binlogBatchCount int + for _, b := range s.GetFieldBinlogs() { + if b != nil { + binlogBatchCount = len(b.GetBinlogs()) + break + } + } + if binlogBatchCount == 0 { + log.Warn("compacting empty segment", zap.Int64("segmentID", s.GetSegmentID())) + continue + } + + for idx := 0; idx < binlogBatchCount; idx++ { + var batchPaths []string + for _, f := range s.GetFieldBinlogs() { + batchPaths = append(batchPaths, f.GetBinlogs()[idx].GetLogPath()) + } + allPath = append(allPath, batchPaths) + } + + deltaPaths[s.GetSegmentID()] = []string{} + for _, d := range s.GetDeltalogs() { + for _, l := range d.GetBinlogs() { + deltaPaths[s.GetSegmentID()] = append(deltaPaths[s.GetSegmentID()], l.GetLogPath()) + } + } + } + + // Unable to deal with all empty segments cases, so return error + if len(allPath) == 0 { + log.Warn("compact wrong, all segments' binlogs are empty") + return nil, errors.New("illegal compaction plan") + } + + deltaPk2Ts, err := t.mergeDeltalogs(ctxTimeout, deltaPaths) + if err != nil { + log.Warn("compact wrong, fail to merge deltalogs", zap.Error(err)) + return nil, err + } + + compactToSeg, err := t.merge(ctxTimeout, allPath, deltaPk2Ts, writer) + if err != nil { + log.Warn("compact wrong, fail to merge", zap.Error(err)) + return nil, err + } + + log.Info("compact done", + zap.Int64("compact to segment", targetSegID), + zap.Int64s("compact from segments", segIDs), + zap.Int("num of binlog paths", len(compactToSeg.GetInsertLogs())), + zap.Int("num of stats paths", 1), + zap.Int("num of delta paths", len(compactToSeg.GetDeltalogs())), + zap.Duration("compact elapse", time.Since(compactStart)), + ) + + metrics.DataNodeCompactionLatency.WithLabelValues(fmt.Sprint(paramtable.GetNodeID()), t.plan.GetType().String()).Observe(float64(t.tr.ElapseSpan().Milliseconds())) + metrics.DataNodeCompactionLatencyInQueue.WithLabelValues(fmt.Sprint(paramtable.GetNodeID())).Observe(float64(durInQueue.Milliseconds())) + + planResult := &datapb.CompactionPlanResult{ + State: commonpb.CompactionState_Completed, + PlanID: t.GetPlanID(), + Channel: t.GetChannelName(), + Segments: []*datapb.CompactionSegment{compactToSeg}, + Type: t.plan.GetType(), + } + + return planResult, nil +} + +func (t *mixCompactionTask) InjectDone() { + t.injectDoneOnce.Do(func() { + for _, binlog := range t.plan.SegmentBinlogs { + t.syncMgr.Unblock(binlog.SegmentID) + } + }) +} + +func (t *mixCompactionTask) GetCollection() typeutil.UniqueID { + return t.metaCache.Collection() +} + +func (t *mixCompactionTask) isExpiredEntity(ts typeutil.Timestamp) bool { + now := t.currentTs + + // entity expire is not enabled if duration <= 0 + if t.plan.GetCollectionTtl() <= 0 { + return false + } + + entityT, _ := tsoutil.ParseTS(ts) + nowT, _ := tsoutil.ParseTS(now) + + return entityT.Add(time.Duration(t.plan.GetCollectionTtl())).Before(nowT) +} diff --git a/internal/datanode/compaction/mix_compactor_test.go b/internal/datanode/compaction/mix_compactor_test.go new file mode 100644 index 0000000000000..6ca701bedd91e --- /dev/null +++ b/internal/datanode/compaction/mix_compactor_test.go @@ -0,0 +1,803 @@ +// Licensed to the LF AI & Data foundation under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package compaction + +import ( + "context" + "math" + "testing" + "time" + + "github.com/cockroachdb/errors" + "github.com/samber/lo" + "github.com/stretchr/testify/mock" + "github.com/stretchr/testify/suite" + + "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" + "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" + "github.com/milvus-io/milvus/internal/datanode/allocator" + "github.com/milvus-io/milvus/internal/datanode/io" + "github.com/milvus-io/milvus/internal/datanode/metacache" + "github.com/milvus-io/milvus/internal/datanode/syncmgr" + "github.com/milvus-io/milvus/internal/proto/datapb" + "github.com/milvus-io/milvus/internal/proto/etcdpb" + "github.com/milvus-io/milvus/internal/storage" + "github.com/milvus-io/milvus/pkg/common" + "github.com/milvus-io/milvus/pkg/util/merr" + "github.com/milvus-io/milvus/pkg/util/paramtable" + "github.com/milvus-io/milvus/pkg/util/tsoutil" + "github.com/milvus-io/milvus/pkg/util/typeutil" +) + +var compactTestDir = "/tmp/milvus_test/compact" + +func TestMixCompactionTaskSuite(t *testing.T) { + suite.Run(t, new(MixCompactionTaskSuite)) +} + +type MixCompactionTaskSuite struct { + suite.Suite + + mockBinlogIO *io.MockBinlogIO + mockAlloc *allocator.MockAllocator + mockMeta *metacache.MockMetaCache + mockSyncMgr *syncmgr.MockSyncManager + + meta *etcdpb.CollectionMeta + segWriter *SegmentWriter + + task *mixCompactionTask + plan *datapb.CompactionPlan +} + +func (s *MixCompactionTaskSuite) SetupSuite() { + paramtable.Get().Init(paramtable.NewBaseTable()) +} + +func (s *MixCompactionTaskSuite) SetupTest() { + s.mockBinlogIO = io.NewMockBinlogIO(s.T()) + s.mockAlloc = allocator.NewMockAllocator(s.T()) + s.mockMeta = metacache.NewMockMetaCache(s.T()) + s.mockSyncMgr = syncmgr.NewMockSyncManager(s.T()) + + s.task = NewMixCompactionTask(context.Background(), s.mockBinlogIO, s.mockMeta, s.mockSyncMgr, s.mockAlloc, nil) + + s.meta = genTestCollectionMeta() + + paramtable.Get().Save(paramtable.Get().CommonCfg.EntityExpirationTTL.Key, "0") + + s.plan = &datapb.CompactionPlan{ + PlanID: 999, + SegmentBinlogs: []*datapb.CompactionSegmentBinlogs{{ + SegmentID: 100, + FieldBinlogs: nil, + Field2StatslogPaths: nil, + Deltalogs: nil, + }}, + TimeoutInSeconds: 10, + Type: datapb.CompactionType_MixCompaction, + } + s.task.plan = s.plan +} + +func (s *MixCompactionTaskSuite) SetupSubTest() { + s.SetupTest() +} + +func (s *MixCompactionTaskSuite) TearDownTest() { + paramtable.Get().Reset(paramtable.Get().CommonCfg.EntityExpirationTTL.Key) +} + +func getMilvusBirthday() time.Time { + return time.Date(2019, time.Month(5), 30, 0, 0, 0, 0, time.UTC) +} + +func (s *MixCompactionTaskSuite) TestInjectDone() { + segmentIDs := []int64{100, 200, 300} + s.task.plan.SegmentBinlogs = lo.Map(segmentIDs, func(id int64, _ int) *datapb.CompactionSegmentBinlogs { + return &datapb.CompactionSegmentBinlogs{SegmentID: id} + }) + + for _, segmentID := range segmentIDs { + s.mockSyncMgr.EXPECT().Unblock(segmentID).Return().Once() + } + + s.task.InjectDone() + s.task.InjectDone() +} + +func (s *MixCompactionTaskSuite) TestCompactDupPK() { + // Test merge compactions, two segments with the same pk, one deletion pk=1 + // The merged segment 19530 should remain 3 pk without pk=100 + s.mockAlloc.EXPECT().AllocOne().Return(int64(19530), nil).Twice() + s.mockMeta.EXPECT().Schema().Return(s.meta.GetSchema()).Once() + s.mockMeta.EXPECT().Collection().Return(CollectionID).Once() + segments := []int64{7, 8, 9} + dblobs, err := getInt64DeltaBlobs( + 1, + []int64{100}, + []uint64{tsoutil.ComposeTSByTime(getMilvusBirthday().Add(time.Second), 0)}, + ) + s.Require().NoError(err) + + s.mockBinlogIO.EXPECT().Download(mock.Anything, []string{"1"}). + Return([][]byte{dblobs.GetValue()}, nil).Times(3) + s.mockAlloc.EXPECT().Alloc(mock.Anything).Return(7777777, 8888888, nil) + s.mockBinlogIO.EXPECT().Upload(mock.Anything, mock.Anything).Return(nil) + + // clear origial segments + s.task.plan.SegmentBinlogs = make([]*datapb.CompactionSegmentBinlogs, 0) + for _, segID := range segments { + s.initSegBuffer(segID) + row := getRow(100) + v := &storage.Value{ + PK: storage.NewInt64PrimaryKey(100), + Timestamp: int64(tsoutil.ComposeTSByTime(getMilvusBirthday(), 0)), + Value: row, + } + err := s.segWriter.Write(v) + s.segWriter.writer.Flush() + s.Require().NoError(err) + + statistic := &storage.PkStatistics{ + PkFilter: s.segWriter.pkstats.BF, + MinPK: s.segWriter.pkstats.MinPk, + MaxPK: s.segWriter.pkstats.MaxPk, + } + bfs := metacache.NewBloomFilterSet(statistic) + + kvs, fBinlogs, err := s.task.serializeWrite(context.TODO(), s.segWriter) + s.Require().NoError(err) + s.mockBinlogIO.EXPECT().Download(mock.Anything, mock.MatchedBy(func(keys []string) bool { + left, right := lo.Difference(keys, lo.Keys(kvs)) + return len(left) == 0 && len(right) == 0 + })).Return(lo.Values(kvs), nil).Once() + + seg := metacache.NewSegmentInfo(&datapb.SegmentInfo{ + CollectionID: CollectionID, + PartitionID: PartitionID, + ID: segID, + NumOfRows: 1, + }, bfs) + + s.mockMeta.EXPECT().GetSegmentByID(segID).RunAndReturn(func(id int64, filters ...metacache.SegmentFilter) (*metacache.SegmentInfo, bool) { + return seg, true + }) + s.mockSyncMgr.EXPECT().Block(segID).Return().Once() + + s.plan.SegmentBinlogs = append(s.plan.SegmentBinlogs, &datapb.CompactionSegmentBinlogs{ + SegmentID: segID, + FieldBinlogs: lo.Values(fBinlogs), + Deltalogs: []*datapb.FieldBinlog{ + {Binlogs: []*datapb.Binlog{{LogID: 1, LogPath: "1"}}}, + }, + }) + } + result, err := s.task.Compact() + s.NoError(err) + s.NotNil(result) + + s.Equal(s.task.plan.GetPlanID(), result.GetPlanID()) + s.Equal(1, len(result.GetSegments())) + + segment := result.GetSegments()[0] + s.EqualValues(19530, segment.GetSegmentID()) + s.EqualValues(3, segment.GetNumOfRows()) + s.NotEmpty(segment.InsertLogs) + s.NotEmpty(segment.Field2StatslogPaths) + s.Empty(segment.Deltalogs) +} + +func (s *MixCompactionTaskSuite) TestCompactTwoToOne() { + s.mockAlloc.EXPECT().AllocOne().Return(int64(19530), nil).Twice() + s.mockMeta.EXPECT().Schema().Return(s.meta.GetSchema()).Once() + s.mockMeta.EXPECT().Collection().Return(CollectionID).Once() + + segments := []int64{5, 6, 7} + s.mockAlloc.EXPECT().Alloc(mock.Anything).Return(7777777, 8888888, nil) + s.mockBinlogIO.EXPECT().Upload(mock.Anything, mock.Anything).Return(nil) + s.task.plan.SegmentBinlogs = make([]*datapb.CompactionSegmentBinlogs, 0) + for _, segID := range segments { + s.initSegBuffer(segID) + statistic := &storage.PkStatistics{ + PkFilter: s.segWriter.pkstats.BF, + MinPK: s.segWriter.pkstats.MinPk, + MaxPK: s.segWriter.pkstats.MaxPk, + } + bfs := metacache.NewBloomFilterSet(statistic) + kvs, fBinlogs, err := s.task.serializeWrite(context.TODO(), s.segWriter) + s.Require().NoError(err) + s.mockBinlogIO.EXPECT().Download(mock.Anything, mock.MatchedBy(func(keys []string) bool { + left, right := lo.Difference(keys, lo.Keys(kvs)) + return len(left) == 0 && len(right) == 0 + })).Return(lo.Values(kvs), nil).Once() + + seg := metacache.NewSegmentInfo(&datapb.SegmentInfo{ + CollectionID: CollectionID, + PartitionID: PartitionID, + ID: segID, + NumOfRows: 1, + }, bfs) + + s.mockMeta.EXPECT().GetSegmentByID(segID).RunAndReturn(func(id int64, filters ...metacache.SegmentFilter) (*metacache.SegmentInfo, bool) { + return seg, true + }) + s.mockSyncMgr.EXPECT().Block(segID).Return().Once() + + s.plan.SegmentBinlogs = append(s.plan.SegmentBinlogs, &datapb.CompactionSegmentBinlogs{ + SegmentID: segID, + FieldBinlogs: lo.Values(fBinlogs), + }) + } + + // append an empty segment + seg := metacache.NewSegmentInfo(&datapb.SegmentInfo{ + CollectionID: CollectionID, + PartitionID: PartitionID, + ID: 99999, + NumOfRows: 0, + }, metacache.NewBloomFilterSet()) + s.mockMeta.EXPECT().GetSegmentByID(seg.SegmentID()).RunAndReturn(func(id int64, filters ...metacache.SegmentFilter) (*metacache.SegmentInfo, bool) { + return seg, true + }) + s.mockSyncMgr.EXPECT().Block(seg.SegmentID()).Return().Once() + s.plan.SegmentBinlogs = append(s.plan.SegmentBinlogs, &datapb.CompactionSegmentBinlogs{ + SegmentID: seg.SegmentID(), + }) + + result, err := s.task.Compact() + s.NoError(err) + s.NotNil(result) + + s.Equal(s.task.plan.GetPlanID(), result.GetPlanID()) + s.Equal(1, len(result.GetSegments())) + + segment := result.GetSegments()[0] + s.EqualValues(19530, segment.GetSegmentID()) + s.EqualValues(3, segment.GetNumOfRows()) + s.NotEmpty(segment.InsertLogs) + s.NotEmpty(segment.Field2StatslogPaths) + s.Empty(segment.Deltalogs) +} + +func (s *MixCompactionTaskSuite) TestMergeBufferFull() { + paramtable.Get().Save(paramtable.Get().DataNodeCfg.BinLogMaxSize.Key, "1") + defer paramtable.Get().Reset(paramtable.Get().DataNodeCfg.BinLogMaxSize.Key) + + s.initSegBuffer(5) + v := storage.Value{ + PK: storage.NewInt64PrimaryKey(100), + Timestamp: int64(tsoutil.ComposeTSByTime(getMilvusBirthday(), 0)), + Value: getRow(100), + } + err := s.segWriter.Write(&v) + s.Require().NoError(err) + + s.mockAlloc.EXPECT().Alloc(mock.Anything).Return(888888, 999999, nil).Times(2) + kvs, _, err := s.task.serializeWrite(context.TODO(), s.segWriter) + s.Require().NoError(err) + + s.mockAlloc.EXPECT().AllocOne().Return(888888, nil) + s.mockBinlogIO.EXPECT().Download(mock.Anything, mock.Anything).RunAndReturn( + func(ctx context.Context, paths []string) ([][]byte, error) { + s.Require().Equal(len(paths), len(kvs)) + return lo.Values(kvs), nil + }) + s.mockBinlogIO.EXPECT().Upload(mock.Anything, mock.Anything).Return(nil).Maybe() + + segWriter, err := NewSegmentWriter(s.meta.GetSchema(), 100, 19530, PartitionID, CollectionID) + s.Require().NoError(err) + + compactionSegment, err := s.task.merge(s.task.ctx, [][]string{lo.Keys(kvs)}, nil, segWriter) + s.NoError(err) + s.NotNil(compactionSegment) + s.EqualValues(2, compactionSegment.GetNumOfRows()) +} + +func (s *MixCompactionTaskSuite) TestMergeEntityExpired() { + s.initSegBuffer(3) + // entityTs == tsoutil.ComposeTSByTime(milvusBirthday, 0) + collTTL := 864000 // 10 days + currTs := tsoutil.ComposeTSByTime(getMilvusBirthday().Add(time.Second*(time.Duration(collTTL)+1)), 0) + s.task.currentTs = currTs + s.task.plan.CollectionTtl = int64(collTTL) + s.mockAlloc.EXPECT().Alloc(mock.Anything).Return(888888, 999999, nil) + + kvs, _, err := s.task.serializeWrite(context.TODO(), s.segWriter) + s.Require().NoError(err) + s.mockAlloc.EXPECT().AllocOne().Return(888888, nil) + s.mockBinlogIO.EXPECT().Download(mock.Anything, mock.Anything).RunAndReturn( + func(ctx context.Context, paths []string) ([][]byte, error) { + s.Require().Equal(len(paths), len(kvs)) + return lo.Values(kvs), nil + }) + s.mockBinlogIO.EXPECT().Upload(mock.Anything, mock.Anything).Return(nil).Maybe() + + segWriter, err := NewSegmentWriter(s.meta.GetSchema(), 100, 19530, PartitionID, CollectionID) + s.Require().NoError(err) + + compactionSegment, err := s.task.merge(s.task.ctx, [][]string{lo.Keys(kvs)}, nil, segWriter) + s.NoError(err) + s.NotNil(compactionSegment) + s.EqualValues(0, compactionSegment.GetNumOfRows()) +} + +func (s *MixCompactionTaskSuite) TestMergeNoExpiration() { + s.initSegBuffer(4) + deleteTs := tsoutil.ComposeTSByTime(getMilvusBirthday().Add(10*time.Second), 0) + tests := []struct { + description string + deletions map[interface{}]uint64 + expectedRowCount int + }{ + {"no deletion", nil, 1}, + {"mismatch deletion", map[interface{}]uint64{int64(1): deleteTs}, 1}, + {"deleted pk=4", map[interface{}]uint64{int64(4): deleteTs}, 0}, + } + + s.mockAlloc.EXPECT().Alloc(mock.Anything).Return(888888, 999999, nil) + kvs, _, err := s.task.serializeWrite(context.TODO(), s.segWriter) + s.Require().NoError(err) + for _, test := range tests { + s.Run(test.description, func() { + if test.expectedRowCount > 0 { + s.mockAlloc.EXPECT().Alloc(mock.Anything).Return(77777, 99999, nil).Once() + } + s.mockAlloc.EXPECT().AllocOne().Return(888888, nil) + s.mockBinlogIO.EXPECT().Download(mock.Anything, mock.Anything).RunAndReturn( + func(ctx context.Context, paths []string) ([][]byte, error) { + s.Require().Equal(len(paths), len(kvs)) + return lo.Values(kvs), nil + }) + s.mockBinlogIO.EXPECT().Upload(mock.Anything, mock.Anything).Return(nil).Maybe() + + segWriter, err := NewSegmentWriter(s.meta.GetSchema(), 100, 19530, PartitionID, CollectionID) + s.Require().NoError(err) + + compactionSegment, err := s.task.merge(s.task.ctx, [][]string{lo.Keys(kvs)}, test.deletions, segWriter) + s.NoError(err) + s.NotNil(compactionSegment) + s.EqualValues(test.expectedRowCount, compactionSegment.GetNumOfRows()) + }) + } +} + +func (s *MixCompactionTaskSuite) TestMergeDeltalogsMultiSegment() { + tests := []struct { + segIDA int64 + dataApk []int64 + dataAts []uint64 + + segIDB int64 + dataBpk []int64 + dataBts []uint64 + + segIDC int64 + dataCpk []int64 + dataCts []uint64 + + expectedpk2ts map[int64]uint64 + description string + }{ + { + 0, nil, nil, + 100, + []int64{1, 2, 3}, + []uint64{20000, 30000, 20005}, + 200, + []int64{4, 5, 6}, + []uint64{50000, 50001, 50002}, + map[int64]uint64{ + 1: 20000, + 2: 30000, + 3: 20005, + 4: 50000, + 5: 50001, + 6: 50002, + }, + "2 segments", + }, + { + 300, + []int64{10, 20}, + []uint64{20001, 40001}, + 100, + []int64{1, 2, 3}, + []uint64{20000, 30000, 20005}, + 200, + []int64{4, 5, 6}, + []uint64{50000, 50001, 50002}, + map[int64]uint64{ + 10: 20001, + 20: 40001, + 1: 20000, + 2: 30000, + 3: 20005, + 4: 50000, + 5: 50001, + 6: 50002, + }, + "3 segments", + }, + } + + for _, test := range tests { + s.Run(test.description, func() { + dValues := make([][]byte, 0) + if test.dataApk != nil { + d, err := getInt64DeltaBlobs(test.segIDA, test.dataApk, test.dataAts) + s.Require().NoError(err) + dValues = append(dValues, d.GetValue()) + } + if test.dataBpk != nil { + d, err := getInt64DeltaBlobs(test.segIDB, test.dataBpk, test.dataBts) + s.Require().NoError(err) + dValues = append(dValues, d.GetValue()) + } + if test.dataCpk != nil { + d, err := getInt64DeltaBlobs(test.segIDC, test.dataCpk, test.dataCts) + s.Require().NoError(err) + dValues = append(dValues, d.GetValue()) + } + + s.mockBinlogIO.EXPECT().Download(mock.Anything, mock.Anything). + Return(dValues, nil) + + got, err := s.task.mergeDeltalogs(s.task.ctx, map[int64][]string{100: {"random"}}) + s.NoError(err) + + s.Equal(len(test.expectedpk2ts), len(got)) + gotKeys := lo.Map(lo.Keys(got), func(k interface{}, _ int) int64 { + res, ok := k.(int64) + s.Require().True(ok) + return res + }) + s.ElementsMatch(gotKeys, lo.Keys(test.expectedpk2ts)) + s.ElementsMatch(lo.Values(got), lo.Values(test.expectedpk2ts)) + }) + } +} + +func (s *MixCompactionTaskSuite) TestMergeDeltalogsOneSegment() { + blob, err := getInt64DeltaBlobs( + 100, + []int64{1, 2, 3, 4, 5, 1, 2}, + []uint64{20000, 20001, 20002, 30000, 50000, 50000, 10000}, + ) + s.Require().NoError(err) + + expectedMap := map[int64]uint64{1: 50000, 2: 20001, 3: 20002, 4: 30000, 5: 50000} + + s.mockBinlogIO.EXPECT().Download(mock.Anything, []string{"a"}). + Return([][]byte{blob.GetValue()}, nil).Once() + s.mockBinlogIO.EXPECT().Download(mock.Anything, []string{"mock_error"}). + Return(nil, errors.New("mock_error")).Once() + + invalidPaths := map[int64][]string{2000: {"mock_error"}} + got, err := s.task.mergeDeltalogs(s.task.ctx, invalidPaths) + s.Error(err) + s.Nil(got) + + dpaths := map[int64][]string{1000: {"a"}} + got, err = s.task.mergeDeltalogs(s.task.ctx, dpaths) + s.NoError(err) + s.NotNil(got) + s.Equal(len(expectedMap), len(got)) + + gotKeys := lo.Map(lo.Keys(got), func(k interface{}, _ int) int64 { + res, ok := k.(int64) + s.Require().True(ok) + return res + }) + s.ElementsMatch(gotKeys, lo.Keys(expectedMap)) + s.ElementsMatch(lo.Values(got), lo.Values(expectedMap)) +} + +func (s *MixCompactionTaskSuite) TestCompactFail() { + s.Run("mock ctx done", func() { + ctx, cancel := context.WithCancel(context.Background()) + cancel() + + s.task.ctx = ctx + s.task.cancel = cancel + _, err := s.task.Compact() + s.Error(err) + s.ErrorIs(err, context.Canceled) + }) + + s.Run("Test compact invalid empty segment binlogs", func() { + s.plan.SegmentBinlogs = nil + + _, err := s.task.Compact() + s.Error(err) + }) + + s.Run("Test compact AllocOnce failed", func() { + s.mockAlloc.EXPECT().AllocOne().Return(0, errors.New("mock alloc one error")).Once() + _, err := s.task.Compact() + s.Error(err) + }) + + s.Run("Test getNumRows error", func() { + s.mockAlloc.EXPECT().AllocOne().Return(19530, nil).Once() + s.mockMeta.EXPECT().GetSegmentByID(mock.Anything).Return(nil, false) + + _, err := s.task.Compact() + s.Error(err) + s.ErrorIs(err, merr.ErrSegmentNotFound) + }) +} + +func (s *MixCompactionTaskSuite) TestIsExpiredEntity() { + milvusBirthdayTs := tsoutil.ComposeTSByTime(getMilvusBirthday(), 0) + + tests := []struct { + description string + collTTL int64 + nowTs uint64 + entityTs uint64 + + expect bool + }{ + {"ttl=maxInt64, nowTs-entityTs=ttl", math.MaxInt64, math.MaxInt64, 0, true}, + {"ttl=maxInt64, nowTs-entityTs < 0", math.MaxInt64, milvusBirthdayTs, 0, false}, + {"ttl=maxInt64, 0ttl v2", math.MaxInt64, math.MaxInt64, milvusBirthdayTs, true}, + // entityTs==currTs will never happen + // {"ttl=maxInt64, curTs-entityTs=0", math.MaxInt64, milvusBirthdayTs, milvusBirthdayTs, true}, + {"ttl=0, nowTs>entityTs", 0, milvusBirthdayTs + 1, milvusBirthdayTs, false}, + {"ttl=0, nowTs==entityTs", 0, milvusBirthdayTs, milvusBirthdayTs, false}, + {"ttl=0, nowTs10days", 864000, milvusBirthdayTs + 864001, milvusBirthdayTs, true}, + {"ttl=10days, nowTs-entityTs==10days", 864000, milvusBirthdayTs + 864000, milvusBirthdayTs, true}, + {"ttl=10days, nowTs-entityTs<10days", 864000, milvusBirthdayTs + 10, milvusBirthdayTs, false}, + } + for _, test := range tests { + s.Run(test.description, func() { + t := &mixCompactionTask{ + plan: &datapb.CompactionPlan{ + CollectionTtl: test.collTTL, + }, + currentTs: test.nowTs, + } + got := t.isExpiredEntity(test.entityTs) + s.Equal(test.expect, got) + }) + } +} + +func getRow(magic int64) map[int64]interface{} { + ts := tsoutil.ComposeTSByTime(getMilvusBirthday(), 0) + return map[int64]interface{}{ + common.RowIDField: magic, + common.TimeStampField: int64(ts), // should be int64 here + BoolField: true, + Int8Field: int8(magic), + Int16Field: int16(magic), + Int32Field: int32(magic), + Int64Field: magic, + FloatField: float32(magic), + DoubleField: float64(magic), + StringField: "str", + VarCharField: "varchar", + BinaryVectorField: []byte{0}, + FloatVectorField: []float32{4, 5, 6, 7}, + Float16VectorField: []byte{0, 0, 0, 0, 255, 255, 255, 255}, + BFloat16VectorField: []byte{0, 0, 0, 0, 255, 255, 255, 255}, + SparseFloatVectorField: typeutil.CreateSparseFloatRow([]uint32{0, 1, 2}, []float32{4, 5, 6}), + ArrayField: &schemapb.ScalarField{ + Data: &schemapb.ScalarField_IntData{ + IntData: &schemapb.IntArray{Data: []int32{1, 2, 3}}, + }, + }, + JSONField: []byte(`{"batch":ok}`), + } +} + +func (s *MixCompactionTaskSuite) initSegBuffer(magic int64) { + segWriter, err := NewSegmentWriter(s.meta.GetSchema(), 100, magic, PartitionID, CollectionID) + s.Require().NoError(err) + + v := storage.Value{ + PK: storage.NewInt64PrimaryKey(magic), + Timestamp: int64(tsoutil.ComposeTSByTime(getMilvusBirthday(), 0)), + Value: getRow(magic), + } + err = segWriter.Write(&v) + s.Require().NoError(err) + segWriter.writer.Flush() + + s.segWriter = segWriter +} + +const ( + CollectionID = 1 + PartitionID = 1 + SegmentID = 1 + BoolField = 100 + Int8Field = 101 + Int16Field = 102 + Int32Field = 103 + Int64Field = 104 + FloatField = 105 + DoubleField = 106 + StringField = 107 + BinaryVectorField = 108 + FloatVectorField = 109 + ArrayField = 110 + JSONField = 111 + Float16VectorField = 112 + BFloat16VectorField = 113 + SparseFloatVectorField = 114 + VarCharField = 115 +) + +func getInt64DeltaBlobs(segID int64, pks []int64, tss []uint64) (*storage.Blob, error) { + primaryKeys := make([]storage.PrimaryKey, len(pks)) + for index, v := range pks { + primaryKeys[index] = storage.NewInt64PrimaryKey(v) + } + deltaData := storage.NewDeleteData(primaryKeys, tss) + + dCodec := storage.NewDeleteCodec() + blob, err := dCodec.Serialize(1, 10, segID, deltaData) + return blob, err +} + +func genTestCollectionMeta() *etcdpb.CollectionMeta { + return &etcdpb.CollectionMeta{ + ID: CollectionID, + PartitionTags: []string{"partition_0", "partition_1"}, + Schema: &schemapb.CollectionSchema{ + Name: "schema", + Description: "schema", + AutoID: true, + Fields: []*schemapb.FieldSchema{ + { + FieldID: common.RowIDField, + Name: "row_id", + DataType: schemapb.DataType_Int64, + }, + { + FieldID: common.TimeStampField, + Name: "Timestamp", + DataType: schemapb.DataType_Int64, + }, + { + FieldID: BoolField, + Name: "field_bool", + DataType: schemapb.DataType_Bool, + }, + { + FieldID: Int8Field, + Name: "field_int8", + DataType: schemapb.DataType_Int8, + }, + { + FieldID: Int16Field, + Name: "field_int16", + DataType: schemapb.DataType_Int16, + }, + { + FieldID: Int32Field, + Name: "field_int32", + DataType: schemapb.DataType_Int32, + }, + { + FieldID: Int64Field, + Name: "field_int64", + IsPrimaryKey: true, + DataType: schemapb.DataType_Int64, + }, + { + FieldID: FloatField, + Name: "field_float", + DataType: schemapb.DataType_Float, + }, + { + FieldID: DoubleField, + Name: "field_double", + DataType: schemapb.DataType_Double, + }, + { + FieldID: StringField, + Name: "field_string", + DataType: schemapb.DataType_String, + }, + { + FieldID: VarCharField, + Name: "field_varchar", + DataType: schemapb.DataType_VarChar, + TypeParams: []*commonpb.KeyValuePair{ + { + Key: common.MaxLengthKey, + Value: "128", + }, + }, + }, + { + FieldID: ArrayField, + Name: "field_int32_array", + Description: "int32 array", + DataType: schemapb.DataType_Array, + ElementType: schemapb.DataType_Int32, + }, + { + FieldID: JSONField, + Name: "field_json", + Description: "json", + DataType: schemapb.DataType_JSON, + }, + { + FieldID: BinaryVectorField, + Name: "field_binary_vector", + Description: "binary_vector", + DataType: schemapb.DataType_BinaryVector, + TypeParams: []*commonpb.KeyValuePair{ + { + Key: common.DimKey, + Value: "8", + }, + }, + }, + { + FieldID: FloatVectorField, + Name: "field_float_vector", + Description: "float_vector", + DataType: schemapb.DataType_FloatVector, + TypeParams: []*commonpb.KeyValuePair{ + { + Key: common.DimKey, + Value: "4", + }, + }, + }, + { + FieldID: Float16VectorField, + Name: "field_float16_vector", + Description: "float16_vector", + DataType: schemapb.DataType_Float16Vector, + TypeParams: []*commonpb.KeyValuePair{ + { + Key: common.DimKey, + Value: "4", + }, + }, + }, + { + FieldID: BFloat16VectorField, + Name: "field_bfloat16_vector", + Description: "bfloat16_vector", + DataType: schemapb.DataType_BFloat16Vector, + TypeParams: []*commonpb.KeyValuePair{ + { + Key: common.DimKey, + Value: "4", + }, + }, + }, + { + FieldID: SparseFloatVectorField, + Name: "field_sparse_float_vector", + Description: "sparse_float_vector", + DataType: schemapb.DataType_SparseFloatVector, + TypeParams: []*commonpb.KeyValuePair{}, + }, + }, + }, + } +} diff --git a/internal/datanode/compaction/mock_compactor.go b/internal/datanode/compaction/mock_compactor.go new file mode 100644 index 0000000000000..99dccea0aa54c --- /dev/null +++ b/internal/datanode/compaction/mock_compactor.go @@ -0,0 +1,307 @@ +// Code generated by mockery v2.32.4. DO NOT EDIT. + +package compaction + +import ( + datapb "github.com/milvus-io/milvus/internal/proto/datapb" + mock "github.com/stretchr/testify/mock" +) + +// MockCompactor is an autogenerated mock type for the Compactor type +type MockCompactor struct { + mock.Mock +} + +type MockCompactor_Expecter struct { + mock *mock.Mock +} + +func (_m *MockCompactor) EXPECT() *MockCompactor_Expecter { + return &MockCompactor_Expecter{mock: &_m.Mock} +} + +// Compact provides a mock function with given fields: +func (_m *MockCompactor) Compact() (*datapb.CompactionPlanResult, error) { + ret := _m.Called() + + var r0 *datapb.CompactionPlanResult + var r1 error + if rf, ok := ret.Get(0).(func() (*datapb.CompactionPlanResult, error)); ok { + return rf() + } + if rf, ok := ret.Get(0).(func() *datapb.CompactionPlanResult); ok { + r0 = rf() + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).(*datapb.CompactionPlanResult) + } + } + + if rf, ok := ret.Get(1).(func() error); ok { + r1 = rf() + } else { + r1 = ret.Error(1) + } + + return r0, r1 +} + +// MockCompactor_Compact_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'Compact' +type MockCompactor_Compact_Call struct { + *mock.Call +} + +// Compact is a helper method to define mock.On call +func (_e *MockCompactor_Expecter) Compact() *MockCompactor_Compact_Call { + return &MockCompactor_Compact_Call{Call: _e.mock.On("Compact")} +} + +func (_c *MockCompactor_Compact_Call) Run(run func()) *MockCompactor_Compact_Call { + _c.Call.Run(func(args mock.Arguments) { + run() + }) + return _c +} + +func (_c *MockCompactor_Compact_Call) Return(_a0 *datapb.CompactionPlanResult, _a1 error) *MockCompactor_Compact_Call { + _c.Call.Return(_a0, _a1) + return _c +} + +func (_c *MockCompactor_Compact_Call) RunAndReturn(run func() (*datapb.CompactionPlanResult, error)) *MockCompactor_Compact_Call { + _c.Call.Return(run) + return _c +} + +// Complete provides a mock function with given fields: +func (_m *MockCompactor) Complete() { + _m.Called() +} + +// MockCompactor_Complete_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'Complete' +type MockCompactor_Complete_Call struct { + *mock.Call +} + +// Complete is a helper method to define mock.On call +func (_e *MockCompactor_Expecter) Complete() *MockCompactor_Complete_Call { + return &MockCompactor_Complete_Call{Call: _e.mock.On("Complete")} +} + +func (_c *MockCompactor_Complete_Call) Run(run func()) *MockCompactor_Complete_Call { + _c.Call.Run(func(args mock.Arguments) { + run() + }) + return _c +} + +func (_c *MockCompactor_Complete_Call) Return() *MockCompactor_Complete_Call { + _c.Call.Return() + return _c +} + +func (_c *MockCompactor_Complete_Call) RunAndReturn(run func()) *MockCompactor_Complete_Call { + _c.Call.Return(run) + return _c +} + +// GetChannelName provides a mock function with given fields: +func (_m *MockCompactor) GetChannelName() string { + ret := _m.Called() + + var r0 string + if rf, ok := ret.Get(0).(func() string); ok { + r0 = rf() + } else { + r0 = ret.Get(0).(string) + } + + return r0 +} + +// MockCompactor_GetChannelName_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'GetChannelName' +type MockCompactor_GetChannelName_Call struct { + *mock.Call +} + +// GetChannelName is a helper method to define mock.On call +func (_e *MockCompactor_Expecter) GetChannelName() *MockCompactor_GetChannelName_Call { + return &MockCompactor_GetChannelName_Call{Call: _e.mock.On("GetChannelName")} +} + +func (_c *MockCompactor_GetChannelName_Call) Run(run func()) *MockCompactor_GetChannelName_Call { + _c.Call.Run(func(args mock.Arguments) { + run() + }) + return _c +} + +func (_c *MockCompactor_GetChannelName_Call) Return(_a0 string) *MockCompactor_GetChannelName_Call { + _c.Call.Return(_a0) + return _c +} + +func (_c *MockCompactor_GetChannelName_Call) RunAndReturn(run func() string) *MockCompactor_GetChannelName_Call { + _c.Call.Return(run) + return _c +} + +// GetCollection provides a mock function with given fields: +func (_m *MockCompactor) GetCollection() int64 { + ret := _m.Called() + + var r0 int64 + if rf, ok := ret.Get(0).(func() int64); ok { + r0 = rf() + } else { + r0 = ret.Get(0).(int64) + } + + return r0 +} + +// MockCompactor_GetCollection_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'GetCollection' +type MockCompactor_GetCollection_Call struct { + *mock.Call +} + +// GetCollection is a helper method to define mock.On call +func (_e *MockCompactor_Expecter) GetCollection() *MockCompactor_GetCollection_Call { + return &MockCompactor_GetCollection_Call{Call: _e.mock.On("GetCollection")} +} + +func (_c *MockCompactor_GetCollection_Call) Run(run func()) *MockCompactor_GetCollection_Call { + _c.Call.Run(func(args mock.Arguments) { + run() + }) + return _c +} + +func (_c *MockCompactor_GetCollection_Call) Return(_a0 int64) *MockCompactor_GetCollection_Call { + _c.Call.Return(_a0) + return _c +} + +func (_c *MockCompactor_GetCollection_Call) RunAndReturn(run func() int64) *MockCompactor_GetCollection_Call { + _c.Call.Return(run) + return _c +} + +// GetPlanID provides a mock function with given fields: +func (_m *MockCompactor) GetPlanID() int64 { + ret := _m.Called() + + var r0 int64 + if rf, ok := ret.Get(0).(func() int64); ok { + r0 = rf() + } else { + r0 = ret.Get(0).(int64) + } + + return r0 +} + +// MockCompactor_GetPlanID_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'GetPlanID' +type MockCompactor_GetPlanID_Call struct { + *mock.Call +} + +// GetPlanID is a helper method to define mock.On call +func (_e *MockCompactor_Expecter) GetPlanID() *MockCompactor_GetPlanID_Call { + return &MockCompactor_GetPlanID_Call{Call: _e.mock.On("GetPlanID")} +} + +func (_c *MockCompactor_GetPlanID_Call) Run(run func()) *MockCompactor_GetPlanID_Call { + _c.Call.Run(func(args mock.Arguments) { + run() + }) + return _c +} + +func (_c *MockCompactor_GetPlanID_Call) Return(_a0 int64) *MockCompactor_GetPlanID_Call { + _c.Call.Return(_a0) + return _c +} + +func (_c *MockCompactor_GetPlanID_Call) RunAndReturn(run func() int64) *MockCompactor_GetPlanID_Call { + _c.Call.Return(run) + return _c +} + +// InjectDone provides a mock function with given fields: +func (_m *MockCompactor) InjectDone() { + _m.Called() +} + +// MockCompactor_InjectDone_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'InjectDone' +type MockCompactor_InjectDone_Call struct { + *mock.Call +} + +// InjectDone is a helper method to define mock.On call +func (_e *MockCompactor_Expecter) InjectDone() *MockCompactor_InjectDone_Call { + return &MockCompactor_InjectDone_Call{Call: _e.mock.On("InjectDone")} +} + +func (_c *MockCompactor_InjectDone_Call) Run(run func()) *MockCompactor_InjectDone_Call { + _c.Call.Run(func(args mock.Arguments) { + run() + }) + return _c +} + +func (_c *MockCompactor_InjectDone_Call) Return() *MockCompactor_InjectDone_Call { + _c.Call.Return() + return _c +} + +func (_c *MockCompactor_InjectDone_Call) RunAndReturn(run func()) *MockCompactor_InjectDone_Call { + _c.Call.Return(run) + return _c +} + +// Stop provides a mock function with given fields: +func (_m *MockCompactor) Stop() { + _m.Called() +} + +// MockCompactor_Stop_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'Stop' +type MockCompactor_Stop_Call struct { + *mock.Call +} + +// Stop is a helper method to define mock.On call +func (_e *MockCompactor_Expecter) Stop() *MockCompactor_Stop_Call { + return &MockCompactor_Stop_Call{Call: _e.mock.On("Stop")} +} + +func (_c *MockCompactor_Stop_Call) Run(run func()) *MockCompactor_Stop_Call { + _c.Call.Run(func(args mock.Arguments) { + run() + }) + return _c +} + +func (_c *MockCompactor_Stop_Call) Return() *MockCompactor_Stop_Call { + _c.Call.Return() + return _c +} + +func (_c *MockCompactor_Stop_Call) RunAndReturn(run func()) *MockCompactor_Stop_Call { + _c.Call.Return(run) + return _c +} + +// NewMockCompactor creates a new instance of MockCompactor. It also registers a testing interface on the mock and a cleanup function to assert the mocks expectations. +// The first argument is typically a *testing.T value. +func NewMockCompactor(t interface { + mock.TestingT + Cleanup(func()) +}) *MockCompactor { + mock := &MockCompactor{} + mock.Mock.Test(t) + + t.Cleanup(func() { mock.AssertExpectations(t) }) + + return mock +} diff --git a/internal/datanode/compaction/segment_writer.go b/internal/datanode/compaction/segment_writer.go new file mode 100644 index 0000000000000..3d458aad9974e --- /dev/null +++ b/internal/datanode/compaction/segment_writer.go @@ -0,0 +1,165 @@ +// SegmentInsertBuffer can be reused to buffer all insert data of one segment +// buffer.Serialize will serialize the InsertBuffer and clear it +// pkstats keeps tracking pkstats of the segment until Finish + +package compaction + +import ( + "fmt" + "math" + + "go.uber.org/atomic" + + "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" + "github.com/milvus-io/milvus/internal/datanode/writebuffer" + "github.com/milvus-io/milvus/internal/proto/etcdpb" + "github.com/milvus-io/milvus/internal/storage" + "github.com/milvus-io/milvus/pkg/log" + "github.com/milvus-io/milvus/pkg/util/paramtable" + "github.com/milvus-io/milvus/pkg/util/typeutil" +) + +type SegmentWriter struct { + writer *storage.SerializeWriter[*storage.Value] + closers []func() (*storage.Blob, error) + tsFrom typeutil.Timestamp + tsTo typeutil.Timestamp + + pkstats *storage.PrimaryKeyStats + segmentID int64 + partitionID int64 + collectionID int64 + sch *schemapb.CollectionSchema + rowCount *atomic.Int64 +} + +func (w *SegmentWriter) GetRowNum() int64 { + return w.rowCount.Load() +} + +func (w *SegmentWriter) GetCollectionID() int64 { + return w.collectionID +} + +func (w *SegmentWriter) GetPartitionID() int64 { + return w.partitionID +} + +func (w *SegmentWriter) GetSegmentID() int64 { + return w.segmentID +} + +func (w *SegmentWriter) GetPkID() int64 { + return w.pkstats.FieldID +} + +func (w *SegmentWriter) Write(v *storage.Value) error { + ts := typeutil.Timestamp(v.Timestamp) + if ts < w.tsFrom { + w.tsFrom = ts + } + if ts > w.tsTo { + w.tsTo = ts + } + + w.pkstats.Update(v.PK) + w.rowCount.Inc() + return w.writer.Write(v) +} + +func (w *SegmentWriter) Finish(actualRowCount int64) (*storage.Blob, error) { + w.writer.Flush() + codec := storage.NewInsertCodecWithSchema(&etcdpb.CollectionMeta{ID: w.collectionID, Schema: w.sch}) + return codec.SerializePkStats(w.pkstats, actualRowCount) +} + +func (w *SegmentWriter) IsFull() bool { + w.writer.Flush() + return w.writer.WrittenMemorySize() > paramtable.Get().DataNodeCfg.BinLogMaxSize.GetAsUint64() +} + +func (w *SegmentWriter) IsEmpty() bool { + w.writer.Flush() + return w.writer.WrittenMemorySize() == 0 +} + +func (w *SegmentWriter) GetTimeRange() *writebuffer.TimeRange { + return writebuffer.NewTimeRange(w.tsFrom, w.tsTo) +} + +func (w *SegmentWriter) SerializeYield() ([]*storage.Blob, *writebuffer.TimeRange, error) { + w.writer.Flush() + w.writer.Close() + + fieldData := make([]*storage.Blob, len(w.closers)) + for i, f := range w.closers { + blob, err := f() + if err != nil { + return nil, nil, err + } + fieldData[i] = blob + } + + tr := w.GetTimeRange() + w.clear() + + return fieldData, tr, nil +} + +func (w *SegmentWriter) clear() { + writer, closers, _ := newBinlogWriter(w.collectionID, w.partitionID, w.segmentID, w.sch) + w.writer = writer + w.closers = closers + w.tsFrom = math.MaxUint64 + w.tsTo = 0 +} + +func NewSegmentWriter(sch *schemapb.CollectionSchema, maxCount int64, segID, partID, collID int64) (*SegmentWriter, error) { + writer, closers, err := newBinlogWriter(collID, partID, segID, sch) + if err != nil { + return nil, err + } + + var pkField *schemapb.FieldSchema + for _, fs := range sch.GetFields() { + if fs.GetIsPrimaryKey() && fs.GetFieldID() >= 100 && typeutil.IsPrimaryFieldType(fs.GetDataType()) { + pkField = fs + } + } + if pkField == nil { + log.Warn("failed to get pk field from schema") + return nil, fmt.Errorf("no pk field in schema") + } + + stats, err := storage.NewPrimaryKeyStats(pkField.GetFieldID(), int64(pkField.GetDataType()), maxCount) + if err != nil { + return nil, err + } + + segWriter := SegmentWriter{ + writer: writer, + closers: closers, + tsFrom: math.MaxUint64, + tsTo: 0, + + pkstats: stats, + sch: sch, + segmentID: segID, + partitionID: partID, + collectionID: collID, + rowCount: atomic.NewInt64(0), + } + + return &segWriter, nil +} + +func newBinlogWriter(collID, partID, segID int64, schema *schemapb.CollectionSchema, +) (writer *storage.SerializeWriter[*storage.Value], closers []func() (*storage.Blob, error), err error) { + fieldWriters := storage.NewBinlogStreamWriters(collID, partID, segID, schema.Fields) + closers = make([]func() (*storage.Blob, error), 0, len(fieldWriters)) + for _, w := range fieldWriters { + closers = append(closers, w.Finalize) + } + writer, err = storage.NewBinlogSerializeWriter(schema, partID, segID, fieldWriters, 1024) + return +} diff --git a/internal/datanode/compaction_executor.go b/internal/datanode/compaction_executor.go index bbcfbbb8279d6..938d1b5db0fd7 100644 --- a/internal/datanode/compaction_executor.go +++ b/internal/datanode/compaction_executor.go @@ -24,6 +24,7 @@ import ( "go.uber.org/zap" "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" + "github.com/milvus-io/milvus/internal/datanode/compaction" "github.com/milvus-io/milvus/internal/proto/datapb" "github.com/milvus-io/milvus/pkg/log" "github.com/milvus-io/milvus/pkg/util/typeutil" @@ -34,10 +35,10 @@ const ( ) type compactionExecutor struct { - executing *typeutil.ConcurrentMap[int64, compactor] // planID to compactor - completedCompactor *typeutil.ConcurrentMap[int64, compactor] // planID to compactor + executing *typeutil.ConcurrentMap[int64, compaction.Compactor] // planID to compactor + completedCompactor *typeutil.ConcurrentMap[int64, compaction.Compactor] // planID to compactor completed *typeutil.ConcurrentMap[int64, *datapb.CompactionPlanResult] // planID to CompactionPlanResult - taskCh chan compactor + taskCh chan compaction.Compactor dropped *typeutil.ConcurrentSet[string] // vchannel dropped // To prevent concurrency of release channel and compaction get results @@ -47,39 +48,39 @@ type compactionExecutor struct { func newCompactionExecutor() *compactionExecutor { return &compactionExecutor{ - executing: typeutil.NewConcurrentMap[int64, compactor](), - completedCompactor: typeutil.NewConcurrentMap[int64, compactor](), + executing: typeutil.NewConcurrentMap[int64, compaction.Compactor](), + completedCompactor: typeutil.NewConcurrentMap[int64, compaction.Compactor](), completed: typeutil.NewConcurrentMap[int64, *datapb.CompactionPlanResult](), - taskCh: make(chan compactor, maxTaskNum), + taskCh: make(chan compaction.Compactor, maxTaskNum), dropped: typeutil.NewConcurrentSet[string](), } } -func (c *compactionExecutor) execute(task compactor) { +func (c *compactionExecutor) execute(task compaction.Compactor) { c.taskCh <- task c.toExecutingState(task) } -func (c *compactionExecutor) toExecutingState(task compactor) { - c.executing.Insert(task.getPlanID(), task) +func (c *compactionExecutor) toExecutingState(task compaction.Compactor) { + c.executing.Insert(task.GetPlanID(), task) } -func (c *compactionExecutor) toCompleteState(task compactor) { - task.complete() - c.executing.GetAndRemove(task.getPlanID()) +func (c *compactionExecutor) toCompleteState(task compaction.Compactor) { + task.Complete() + c.executing.GetAndRemove(task.GetPlanID()) } func (c *compactionExecutor) injectDone(planID UniqueID) { c.completed.GetAndRemove(planID) task, loaded := c.completedCompactor.GetAndRemove(planID) if loaded { - log.Info("Compaction task inject done", zap.Int64("planID", planID), zap.String("channel", task.getChannelName())) - task.injectDone() + log.Info("Compaction task inject done", zap.Int64("planID", planID), zap.String("channel", task.GetChannelName())) + task.InjectDone() } } // These two func are bounded for waitGroup -func (c *compactionExecutor) executeWithState(task compactor) { +func (c *compactionExecutor) executeWithState(task compaction.Compactor) { go c.executeTask(task) } @@ -94,11 +95,11 @@ func (c *compactionExecutor) start(ctx context.Context) { } } -func (c *compactionExecutor) executeTask(task compactor) { +func (c *compactionExecutor) executeTask(task compaction.Compactor) { log := log.With( - zap.Int64("planID", task.getPlanID()), - zap.Int64("Collection", task.getCollection()), - zap.String("channel", task.getChannelName()), + zap.Int64("planID", task.GetPlanID()), + zap.Int64("Collection", task.GetCollection()), + zap.String("channel", task.GetChannelName()), ) defer func() { @@ -107,23 +108,23 @@ func (c *compactionExecutor) executeTask(task compactor) { log.Info("start to execute compaction") - result, err := task.compact() + result, err := task.Compact() if err != nil { - task.injectDone() + task.InjectDone() log.Warn("compaction task failed", zap.Error(err)) } else { c.completed.Insert(result.GetPlanID(), result) c.completedCompactor.Insert(result.GetPlanID(), task) } - log.Info("end to execute compaction", zap.Int64("planID", task.getPlanID())) + log.Info("end to execute compaction") } func (c *compactionExecutor) stopTask(planID UniqueID) { task, loaded := c.executing.GetAndRemove(planID) if loaded { - log.Warn("compaction executor stop task", zap.Int64("planID", planID), zap.String("vChannelName", task.getChannelName())) - task.stop() + log.Warn("compaction executor stop task", zap.Int64("planID", planID), zap.String("vChannelName", task.GetChannelName())) + task.Stop() } } @@ -141,8 +142,8 @@ func (c *compactionExecutor) discardPlan(channel string) { c.resultGuard.Lock() defer c.resultGuard.Unlock() - c.executing.Range(func(planID int64, task compactor) bool { - if task.getChannelName() == channel { + c.executing.Range(func(planID int64, task compaction.Compactor) bool { + if task.GetChannelName() == channel { c.stopTask(planID) } return true @@ -170,7 +171,7 @@ func (c *compactionExecutor) getAllCompactionResults() []*datapb.CompactionPlanR ) results := make([]*datapb.CompactionPlanResult, 0) // get executing results - c.executing.Range(func(planID int64, task compactor) bool { + c.executing.Range(func(planID int64, task compaction.Compactor) bool { executing = append(executing, planID) results = append(results, &datapb.CompactionPlanResult{ State: commonpb.CompactionState_Executing, diff --git a/internal/datanode/compaction_executor_test.go b/internal/datanode/compaction_executor_test.go index 68eb61c531e57..fd6fba2e6f0a7 100644 --- a/internal/datanode/compaction_executor_test.go +++ b/internal/datanode/compaction_executor_test.go @@ -20,28 +20,29 @@ import ( "context" "testing" + "github.com/cockroachdb/errors" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" + "github.com/milvus-io/milvus/internal/datanode/compaction" "github.com/milvus-io/milvus/internal/proto/datapb" ) func TestCompactionExecutor(t *testing.T) { t.Run("Test execute", func(t *testing.T) { - ctx, cancel := context.WithCancel(context.Background()) - ex := newCompactionExecutor() - go ex.start(ctx) - ex.execute(newMockCompactor(true)) - - cancel() - }) - - t.Run("Test stopTask", func(t *testing.T) { - ex := newCompactionExecutor() - mc := newMockCompactor(true) - ex.executeWithState(mc) - ex.stopTask(UniqueID(1)) + planID := int64(1) + mockC := compaction.NewMockCompactor(t) + mockC.EXPECT().GetPlanID().Return(planID).Once() + mockC.EXPECT().GetChannelName().Return("ch1").Once() + executor := newCompactionExecutor() + executor.execute(mockC) + + assert.EqualValues(t, 1, len(executor.taskCh)) + assert.EqualValues(t, 1, executor.executing.Len()) + + mockC.EXPECT().Stop().Return().Once() + executor.stopTask(planID) }) t.Run("Test start", func(t *testing.T) { @@ -57,19 +58,36 @@ func TestCompactionExecutor(t *testing.T) { description string }{ - {true, "compact return nil"}, + {true, "compact success"}, {false, "compact return error"}, } ex := newCompactionExecutor() for _, test := range tests { t.Run(test.description, func(t *testing.T) { + mockC := compaction.NewMockCompactor(t) + mockC.EXPECT().GetPlanID().Return(int64(1)) + mockC.EXPECT().GetCollection().Return(int64(1)) + mockC.EXPECT().GetChannelName().Return("ch1") + mockC.EXPECT().Complete().Return().Maybe() + signal := make(chan struct{}) if test.isvalid { - validTask := newMockCompactor(true) - ex.executeWithState(validTask) + mockC.EXPECT().Compact().RunAndReturn( + func() (*datapb.CompactionPlanResult, error) { + signal <- struct{}{} + return &datapb.CompactionPlanResult{PlanID: 1}, nil + }).Once() + ex.executeWithState(mockC) + <-signal } else { - invalidTask := newMockCompactor(false) - ex.executeWithState(invalidTask) + mockC.EXPECT().InjectDone().Return().Maybe() + mockC.EXPECT().Compact().RunAndReturn( + func() (*datapb.CompactionPlanResult, error) { + signal <- struct{}{} + return nil, errors.New("mock error") + }).Once() + ex.executeWithState(mockC) + <-signal } }) } @@ -95,33 +113,25 @@ func TestCompactionExecutor(t *testing.T) { t.Run("test stop vchannel tasks", func(t *testing.T) { ex := newCompactionExecutor() - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - go ex.start(ctx) - mc := newMockCompactor(true) - mc.alwaysWorking = true + mc := compaction.NewMockCompactor(t) + mc.EXPECT().GetPlanID().Return(int64(1)) + mc.EXPECT().GetChannelName().Return("mock") + mc.EXPECT().Compact().Return(&datapb.CompactionPlanResult{PlanID: 1}, nil).Maybe() + mc.EXPECT().Stop().Return().Once() ex.execute(mc) - // wait for task enqueued - found := false - for !found { - found = ex.executing.Contain(mc.getPlanID()) - } + require.True(t, ex.executing.Contain(int64(1))) ex.discardByDroppedChannel("mock") - - select { - case <-mc.ctx.Done(): - default: - t.FailNow() - } + assert.True(t, ex.dropped.Contain("mock")) + assert.False(t, ex.executing.Contain(int64(1))) }) t.Run("test getAllCompactionResults", func(t *testing.T) { ex := newCompactionExecutor() - mockC := newMockCompactor(true) + mockC := compaction.NewMockCompactor(t) ex.executing.Insert(int64(1), mockC) ex.completedCompactor.Insert(int64(2), mockC) @@ -158,60 +168,3 @@ func TestCompactionExecutor(t *testing.T) { require.Equal(t, 1, ex.executing.Len()) }) } - -func newMockCompactor(isvalid bool) *mockCompactor { - ctx, cancel := context.WithCancel(context.TODO()) - return &mockCompactor{ - ctx: ctx, - cancel: cancel, - isvalid: isvalid, - done: make(chan struct{}, 1), - } -} - -type mockCompactor struct { - ctx context.Context - cancel context.CancelFunc - isvalid bool - alwaysWorking bool - - done chan struct{} -} - -var _ compactor = (*mockCompactor)(nil) - -func (mc *mockCompactor) complete() { - mc.done <- struct{}{} -} - -func (mc *mockCompactor) injectDone() {} - -func (mc *mockCompactor) compact() (*datapb.CompactionPlanResult, error) { - if !mc.isvalid { - return nil, errStart - } - if mc.alwaysWorking { - <-mc.ctx.Done() - return nil, mc.ctx.Err() - } - return nil, nil -} - -func (mc *mockCompactor) getPlanID() UniqueID { - return 1 -} - -func (mc *mockCompactor) stop() { - if mc.cancel != nil { - mc.cancel() - <-mc.done - } -} - -func (mc *mockCompactor) getCollection() UniqueID { - return 1 -} - -func (mc *mockCompactor) getChannelName() string { - return "mock" -} diff --git a/internal/datanode/compactor.go b/internal/datanode/compactor.go deleted file mode 100644 index e99642316e6f4..0000000000000 --- a/internal/datanode/compactor.go +++ /dev/null @@ -1,827 +0,0 @@ -// Licensed to the LF AI & Data foundation under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package datanode - -import ( - "context" - "fmt" - sio "io" - "sync" - "time" - - "github.com/cockroachdb/errors" - "github.com/samber/lo" - "go.opentelemetry.io/otel" - "go.uber.org/zap" - - "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" - "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" - "github.com/milvus-io/milvus/internal/datanode/allocator" - "github.com/milvus-io/milvus/internal/datanode/io" - "github.com/milvus-io/milvus/internal/datanode/metacache" - "github.com/milvus-io/milvus/internal/datanode/syncmgr" - "github.com/milvus-io/milvus/internal/metastore/kv/binlog" - "github.com/milvus-io/milvus/internal/proto/datapb" - "github.com/milvus-io/milvus/internal/proto/etcdpb" - "github.com/milvus-io/milvus/internal/storage" - "github.com/milvus-io/milvus/pkg/log" - "github.com/milvus-io/milvus/pkg/metrics" - "github.com/milvus-io/milvus/pkg/util/funcutil" - "github.com/milvus-io/milvus/pkg/util/merr" - "github.com/milvus-io/milvus/pkg/util/paramtable" - "github.com/milvus-io/milvus/pkg/util/timerecord" - "github.com/milvus-io/milvus/pkg/util/tsoutil" - "github.com/milvus-io/milvus/pkg/util/typeutil" -) - -var ( - errIllegalCompactionPlan = errors.New("compaction plan illegal") - errTransferType = errors.New("transfer intferface to type wrong") - errUnknownDataType = errors.New("unknown shema DataType") - errContext = errors.New("context done or timeout") -) - -type compactor interface { - complete() - compact() (*datapb.CompactionPlanResult, error) - injectDone() - stop() - getPlanID() UniqueID - getCollection() UniqueID - getChannelName() string -} - -// make sure compactionTask implements compactor interface -var _ compactor = (*compactionTask)(nil) - -// for MixCompaction only -type compactionTask struct { - binlogIO io.BinlogIO - compactor - metaCache metacache.MetaCache - syncMgr syncmgr.SyncManager - allocator.Allocator - - plan *datapb.CompactionPlan - - ctx context.Context - cancel context.CancelFunc - - injectDoneOnce sync.Once - done chan struct{} - tr *timerecord.TimeRecorder -} - -func newCompactionTask( - ctx context.Context, - binlogIO io.BinlogIO, - metaCache metacache.MetaCache, - syncMgr syncmgr.SyncManager, - alloc allocator.Allocator, - plan *datapb.CompactionPlan, -) *compactionTask { - ctx1, cancel := context.WithCancel(ctx) - return &compactionTask{ - ctx: ctx1, - cancel: cancel, - binlogIO: binlogIO, - syncMgr: syncMgr, - metaCache: metaCache, - Allocator: alloc, - plan: plan, - tr: timerecord.NewTimeRecorder("levelone compaction"), - done: make(chan struct{}, 1), - } -} - -func (t *compactionTask) complete() { - t.done <- struct{}{} -} - -func (t *compactionTask) stop() { - t.cancel() - <-t.done - t.injectDone() -} - -func (t *compactionTask) getPlanID() UniqueID { - return t.plan.GetPlanID() -} - -func (t *compactionTask) getChannelName() string { - return t.plan.GetChannel() -} - -// return num rows of all segment compaction from -func (t *compactionTask) getNumRows() (int64, error) { - numRows := int64(0) - for _, binlog := range t.plan.SegmentBinlogs { - seg, ok := t.metaCache.GetSegmentByID(binlog.GetSegmentID()) - if !ok { - return 0, merr.WrapErrSegmentNotFound(binlog.GetSegmentID(), "get compaction segments num rows failed") - } - - numRows += seg.NumOfRows() - } - - return numRows, nil -} - -func (t *compactionTask) mergeDeltalogs(dBlobs map[UniqueID][]*Blob) (map[interface{}]Timestamp, error) { - log := log.With(zap.Int64("planID", t.getPlanID())) - mergeStart := time.Now() - dCodec := storage.NewDeleteCodec() - - pk2ts := make(map[interface{}]Timestamp) - - for _, blobs := range dBlobs { - _, _, dData, err := dCodec.Deserialize(blobs) - if err != nil { - log.Warn("merge deltalogs wrong", zap.Error(err)) - return nil, err - } - - for i := int64(0); i < dData.RowCount; i++ { - pk := dData.Pks[i] - ts := dData.Tss[i] - if lastTS, ok := pk2ts[pk.GetValue()]; ok && lastTS > ts { - ts = lastTS - } - pk2ts[pk.GetValue()] = ts - } - } - - log.Info("mergeDeltalogs end", - zap.Int("number of deleted pks to compact in insert logs", len(pk2ts)), - zap.Duration("elapse", time.Since(mergeStart))) - - return pk2ts, nil -} - -func newBinlogWriter(collectionId, partitionId, segmentId UniqueID, schema *schemapb.CollectionSchema, -) (writer *storage.SerializeWriter[*storage.Value], closers []func() (*Blob, error), err error) { - fieldWriters := storage.NewBinlogStreamWriters(collectionId, partitionId, segmentId, schema.Fields) - closers = make([]func() (*Blob, error), 0, len(fieldWriters)) - for _, w := range fieldWriters { - closers = append(closers, w.Finalize) - } - writer, err = storage.NewBinlogSerializeWriter(schema, partitionId, segmentId, fieldWriters, 1024) - return -} - -func (t *compactionTask) merge( - ctx context.Context, - unMergedInsertlogs [][]string, - targetSegID UniqueID, - partID UniqueID, - meta *etcdpb.CollectionMeta, - delta map[interface{}]Timestamp, -) ([]*datapb.FieldBinlog, []*datapb.FieldBinlog, int64, error) { - ctx, span := otel.Tracer(typeutil.DataNodeRole).Start(ctx, fmt.Sprintf("CompactMerge-%d", t.getPlanID())) - defer span.End() - log := log.With(zap.Int64("planID", t.getPlanID())) - mergeStart := time.Now() - - writer, finalizers, err := newBinlogWriter(meta.GetID(), partID, targetSegID, meta.GetSchema()) - if err != nil { - return nil, nil, 0, err - } - - var ( - numBinlogs int // binlog number - numRows uint64 // the number of rows uploaded - expired int64 // the number of expired entity - - insertField2Path = make(map[UniqueID]*datapb.FieldBinlog) - insertPaths = make([]*datapb.FieldBinlog, 0) - - statField2Path = make(map[UniqueID]*datapb.FieldBinlog) - statPaths = make([]*datapb.FieldBinlog, 0) - ) - - isDeletedValue := func(v *storage.Value) bool { - ts, ok := delta[v.PK.GetValue()] - // insert task and delete task has the same ts when upsert - // here should be < instead of <= - // to avoid the upsert data to be deleted after compact - if ok && uint64(v.Timestamp) < ts { - return true - } - return false - } - - addInsertFieldPath := func(inPaths map[UniqueID]*datapb.FieldBinlog, timestampFrom, timestampTo int64) { - for fID, path := range inPaths { - for _, binlog := range path.GetBinlogs() { - binlog.TimestampTo = uint64(timestampTo) - binlog.TimestampFrom = uint64(timestampFrom) - } - tmpBinlog, ok := insertField2Path[fID] - if !ok { - tmpBinlog = path - } else { - tmpBinlog.Binlogs = append(tmpBinlog.Binlogs, path.GetBinlogs()...) - } - insertField2Path[fID] = tmpBinlog - } - } - - addStatFieldPath := func(statPaths map[UniqueID]*datapb.FieldBinlog) { - for fID, path := range statPaths { - tmpBinlog, ok := statField2Path[fID] - if !ok { - tmpBinlog = path - } else { - tmpBinlog.Binlogs = append(tmpBinlog.Binlogs, path.GetBinlogs()...) - } - statField2Path[fID] = tmpBinlog - } - } - - // get pkID, pkType, dim - var pkField *schemapb.FieldSchema - for _, fs := range meta.GetSchema().GetFields() { - if fs.GetIsPrimaryKey() && fs.GetFieldID() >= 100 && typeutil.IsPrimaryFieldType(fs.GetDataType()) { - pkField = fs - } - } - - if pkField == nil { - log.Warn("failed to get pk field from schema") - return nil, nil, 0, fmt.Errorf("no pk field in schema") - } - - pkID := pkField.GetFieldID() - pkType := pkField.GetDataType() - - expired = 0 - numRows = 0 - numBinlogs = 0 - currentTs := t.GetCurrentTime() - unflushedRows := 0 - downloadTimeCost := time.Duration(0) - uploadInsertTimeCost := time.Duration(0) - - oldRowNums, err := t.getNumRows() - if err != nil { - return nil, nil, 0, err - } - - stats, err := storage.NewPrimaryKeyStats(pkID, int64(pkType), oldRowNums) - if err != nil { - return nil, nil, 0, err - } - // initial timestampFrom, timestampTo = -1, -1 is an illegal value, only to mark initial state - var ( - timestampTo int64 = -1 - timestampFrom int64 = -1 - ) - - flush := func() error { - uploadInsertStart := time.Now() - writer.Close() - fieldData := make([]*Blob, len(finalizers)) - - for i, f := range finalizers { - blob, err := f() - if err != nil { - return err - } - fieldData[i] = blob - } - inPaths, err := uploadInsertLog(ctx, t.binlogIO, t.Allocator, meta.ID, partID, targetSegID, fieldData) - if err != nil { - log.Warn("failed to upload single insert log", zap.Error(err)) - return err - } - numBinlogs += len(inPaths) - uploadInsertTimeCost += time.Since(uploadInsertStart) - addInsertFieldPath(inPaths, timestampFrom, timestampTo) - unflushedRows = 0 - return nil - } - - for _, path := range unMergedInsertlogs { - downloadStart := time.Now() - data, err := downloadBlobs(ctx, t.binlogIO, path) - if err != nil { - log.Warn("download insertlogs wrong", zap.Strings("path", path), zap.Error(err)) - return nil, nil, 0, err - } - downloadTimeCost += time.Since(downloadStart) - - iter, err := storage.NewBinlogDeserializeReader(data, pkID) - if err != nil { - log.Warn("new insert binlogs reader wrong", zap.Strings("path", path), zap.Error(err)) - return nil, nil, 0, err - } - - for { - err := iter.Next() - if err != nil { - if err == sio.EOF { - break - } else { - log.Warn("transfer interface to Value wrong", zap.Strings("path", path)) - return nil, nil, 0, errors.New("unexpected error") - } - } - v := iter.Value() - if isDeletedValue(v) { - continue - } - - ts := Timestamp(v.Timestamp) - // Filtering expired entity - if t.isExpiredEntity(ts, currentTs) { - expired++ - continue - } - - // Update timestampFrom, timestampTo - if v.Timestamp < timestampFrom || timestampFrom == -1 { - timestampFrom = v.Timestamp - } - if v.Timestamp > timestampTo || timestampFrom == -1 { - timestampTo = v.Timestamp - } - - err = writer.Write(v) - if err != nil { - return nil, nil, 0, err - } - numRows++ - unflushedRows++ - - stats.Update(v.PK) - - // check size every 100 rows in case of too many `GetMemorySize` call - if (unflushedRows+1)%100 == 0 { - writer.Flush() // Flush to update memory size - - if writer.WrittenMemorySize() > paramtable.Get().DataNodeCfg.BinLogMaxSize.GetAsUint64() { - if err := flush(); err != nil { - return nil, nil, 0, err - } - timestampFrom = -1 - timestampTo = -1 - - writer, finalizers, err = newBinlogWriter(meta.ID, targetSegID, partID, meta.Schema) - if err != nil { - return nil, nil, 0, err - } - } - } - } - } - - // final flush if there is unflushed rows - if unflushedRows > 0 { - if err := flush(); err != nil { - return nil, nil, 0, err - } - } - - // upload stats log - if numRows > 0 { - iCodec := storage.NewInsertCodecWithSchema(meta) - statsPaths, err := uploadStatsLog(ctx, t.binlogIO, t.Allocator, meta.GetID(), partID, targetSegID, stats, int64(numRows), iCodec) - if err != nil { - return nil, nil, 0, err - } - addStatFieldPath(statsPaths) - } - - for _, path := range insertField2Path { - insertPaths = append(insertPaths, path) - } - - for _, path := range statField2Path { - statPaths = append(statPaths, path) - } - - log.Info("compact merge end", - zap.Uint64("remaining insert numRows", numRows), - zap.Int64("expired entities", expired), - zap.Int("binlog file number", numBinlogs), - zap.Duration("download insert log elapse", downloadTimeCost), - zap.Duration("upload insert log elapse", uploadInsertTimeCost), - zap.Duration("merge elapse", time.Since(mergeStart))) - - return insertPaths, statPaths, int64(numRows), nil -} - -func (t *compactionTask) compact() (*datapb.CompactionPlanResult, error) { - ctx, span := otel.Tracer(typeutil.DataNodeRole).Start(t.ctx, fmt.Sprintf("Compact-%d", t.getPlanID())) - defer span.End() - - log := log.Ctx(ctx).With(zap.Int64("planID", t.plan.GetPlanID()), zap.Int32("timeout in seconds", t.plan.GetTimeoutInSeconds())) - if ok := funcutil.CheckCtxValid(ctx); !ok { - log.Warn("compact wrong, task context done or timeout") - return nil, errContext - } - - ctxTimeout, cancelAll := context.WithTimeout(ctx, time.Duration(t.plan.GetTimeoutInSeconds())*time.Second) - defer cancelAll() - - compactStart := time.Now() - durInQueue := t.tr.RecordSpan() - log.Info("compact start") - if len(t.plan.GetSegmentBinlogs()) < 1 { - log.Warn("compact wrong, there's no segments in segment binlogs") - return nil, errIllegalCompactionPlan - } - - targetSegID, err := t.AllocOne() - if err != nil { - log.Warn("compact wrong, unable to allocate segmentID", zap.Error(err)) - return nil, err - } - - segIDs := lo.Map(t.plan.GetSegmentBinlogs(), func(binlogs *datapb.CompactionSegmentBinlogs, _ int) int64 { - return binlogs.GetSegmentID() - }) - - // Inject to stop flush - // when compaction failed, these segments need to be Unblocked by injectDone in compaction_executor - // when compaction succeeded, these segments will be Unblocked by SyncSegments from DataCoord. - for _, segID := range segIDs { - t.syncMgr.Block(segID) - } - log.Info("compact finsh injection", zap.Duration("elapse", t.tr.RecordSpan())) - - if err := binlog.DecompressCompactionBinlogs(t.plan.GetSegmentBinlogs()); err != nil { - log.Warn("compact wrong, fail to decompress compaction binlogs", zap.Error(err)) - return nil, err - } - - dblobs := make(map[UniqueID][]*Blob) - allPath := make([][]string, 0) - - for _, s := range t.plan.GetSegmentBinlogs() { - log := log.With(zap.Int64("segmentID", s.GetSegmentID())) - // Get the batch count of field binlog files - var binlogBatch int - for _, b := range s.GetFieldBinlogs() { - if b != nil { - binlogBatch = len(b.GetBinlogs()) - break - } - } - if binlogBatch == 0 { - log.Warn("compacting empty segment") - continue - } - - for idx := 0; idx < binlogBatch; idx++ { - var ps []string - for _, f := range s.GetFieldBinlogs() { - ps = append(ps, f.GetBinlogs()[idx].GetLogPath()) - } - allPath = append(allPath, ps) - } - - paths := make([]string, 0) - for _, d := range s.GetDeltalogs() { - for _, l := range d.GetBinlogs() { - path := l.GetLogPath() - paths = append(paths, path) - } - } - - if len(paths) != 0 { - bs, err := downloadBlobs(ctxTimeout, t.binlogIO, paths) - if err != nil { - log.Warn("compact wrong, fail to download deltalogs", zap.Strings("path", paths), zap.Error(err)) - return nil, err - } - dblobs[s.GetSegmentID()] = append(dblobs[s.GetSegmentID()], bs...) - } - } - - // Unable to deal with all empty segments cases, so return error - if len(allPath) == 0 { - log.Warn("compact wrong, all segments are empty") - return nil, errIllegalCompactionPlan - } - - log.Info("compact download deltalogs elapse", zap.Duration("elapse", t.tr.RecordSpan())) - - if err != nil { - log.Warn("compact IO wrong", zap.Error(err)) - return nil, err - } - - deltaPk2Ts, err := t.mergeDeltalogs(dblobs) - if err != nil { - log.Warn("compact wrong, fail to merge deltalogs", zap.Error(err)) - return nil, err - } - - segmentBinlog := t.plan.GetSegmentBinlogs()[0] - partID := segmentBinlog.GetPartitionID() - meta := &etcdpb.CollectionMeta{ID: t.metaCache.Collection(), Schema: t.metaCache.Schema()} - - inPaths, statsPaths, numRows, err := t.merge(ctxTimeout, allPath, targetSegID, partID, meta, deltaPk2Ts) - if err != nil { - log.Warn("compact wrong, fail to merge", zap.Error(err)) - return nil, err - } - - pack := &datapb.CompactionSegment{ - SegmentID: targetSegID, - InsertLogs: inPaths, - Field2StatslogPaths: statsPaths, - NumOfRows: numRows, - Channel: t.plan.GetChannel(), - } - - log.Info("compact done", - zap.Int64("targetSegmentID", targetSegID), - zap.Int64s("compactedFrom", segIDs), - zap.Int("num of binlog paths", len(inPaths)), - zap.Int("num of stats paths", len(statsPaths)), - zap.Int("num of delta paths", len(pack.GetDeltalogs())), - zap.Duration("elapse", time.Since(compactStart)), - ) - - metrics.DataNodeCompactionLatency.WithLabelValues(fmt.Sprint(paramtable.GetNodeID()), t.plan.GetType().String()).Observe(float64(t.tr.ElapseSpan().Milliseconds())) - metrics.DataNodeCompactionLatencyInQueue.WithLabelValues(fmt.Sprint(paramtable.GetNodeID())).Observe(float64(durInQueue.Milliseconds())) - - planResult := &datapb.CompactionPlanResult{ - State: commonpb.CompactionState_Completed, - PlanID: t.getPlanID(), - Channel: t.plan.GetChannel(), - Segments: []*datapb.CompactionSegment{pack}, - Type: t.plan.GetType(), - } - - return planResult, nil -} - -func (t *compactionTask) injectDone() { - t.injectDoneOnce.Do(func() { - for _, binlog := range t.plan.SegmentBinlogs { - t.syncMgr.Unblock(binlog.SegmentID) - } - }) -} - -// TODO copy maybe expensive, but this seems to be the only convinent way. -func interface2FieldData(schemaDataType schemapb.DataType, content []interface{}, numRows int64) (storage.FieldData, error) { - var rst storage.FieldData - switch schemaDataType { - case schemapb.DataType_Bool: - data := &storage.BoolFieldData{ - Data: make([]bool, 0, len(content)), - } - - for _, c := range content { - r, ok := c.(bool) - if !ok { - return nil, errTransferType - } - data.Data = append(data.Data, r) - } - rst = data - - case schemapb.DataType_Int8: - data := &storage.Int8FieldData{ - Data: make([]int8, 0, len(content)), - } - - for _, c := range content { - r, ok := c.(int8) - if !ok { - return nil, errTransferType - } - data.Data = append(data.Data, r) - } - rst = data - - case schemapb.DataType_Int16: - data := &storage.Int16FieldData{ - Data: make([]int16, 0, len(content)), - } - - for _, c := range content { - r, ok := c.(int16) - if !ok { - return nil, errTransferType - } - data.Data = append(data.Data, r) - } - rst = data - - case schemapb.DataType_Int32: - data := &storage.Int32FieldData{ - Data: make([]int32, 0, len(content)), - } - - for _, c := range content { - r, ok := c.(int32) - if !ok { - return nil, errTransferType - } - data.Data = append(data.Data, r) - } - rst = data - - case schemapb.DataType_Int64: - data := &storage.Int64FieldData{ - Data: make([]int64, 0, len(content)), - } - - for _, c := range content { - r, ok := c.(int64) - if !ok { - return nil, errTransferType - } - data.Data = append(data.Data, r) - } - rst = data - - case schemapb.DataType_Float: - data := &storage.FloatFieldData{ - Data: make([]float32, 0, len(content)), - } - - for _, c := range content { - r, ok := c.(float32) - if !ok { - return nil, errTransferType - } - data.Data = append(data.Data, r) - } - rst = data - - case schemapb.DataType_Double: - data := &storage.DoubleFieldData{ - Data: make([]float64, 0, len(content)), - } - - for _, c := range content { - r, ok := c.(float64) - if !ok { - return nil, errTransferType - } - data.Data = append(data.Data, r) - } - rst = data - - case schemapb.DataType_String, schemapb.DataType_VarChar: - data := &storage.StringFieldData{ - Data: make([]string, 0, len(content)), - } - - for _, c := range content { - r, ok := c.(string) - if !ok { - return nil, errTransferType - } - data.Data = append(data.Data, r) - } - rst = data - - case schemapb.DataType_JSON: - data := &storage.JSONFieldData{ - Data: make([][]byte, 0, len(content)), - } - - for _, c := range content { - r, ok := c.([]byte) - if !ok { - return nil, errTransferType - } - data.Data = append(data.Data, r) - } - rst = data - - case schemapb.DataType_Array: - data := &storage.ArrayFieldData{ - Data: make([]*schemapb.ScalarField, 0, len(content)), - } - - for _, c := range content { - r, ok := c.(*schemapb.ScalarField) - if !ok { - return nil, errTransferType - } - data.ElementType = r.GetArrayData().GetElementType() - data.Data = append(data.Data, r) - } - rst = data - - case schemapb.DataType_FloatVector: - data := &storage.FloatVectorFieldData{ - Data: []float32{}, - } - - for _, c := range content { - r, ok := c.([]float32) - if !ok { - return nil, errTransferType - } - data.Data = append(data.Data, r...) - } - - data.Dim = len(data.Data) / int(numRows) - rst = data - - case schemapb.DataType_Float16Vector: - data := &storage.Float16VectorFieldData{ - Data: []byte{}, - } - - for _, c := range content { - r, ok := c.([]byte) - if !ok { - return nil, errTransferType - } - data.Data = append(data.Data, r...) - } - - data.Dim = len(data.Data) / 2 / int(numRows) - rst = data - - case schemapb.DataType_BFloat16Vector: - data := &storage.BFloat16VectorFieldData{ - Data: []byte{}, - } - - for _, c := range content { - r, ok := c.([]byte) - if !ok { - return nil, errTransferType - } - data.Data = append(data.Data, r...) - } - - data.Dim = len(data.Data) / 2 / int(numRows) - rst = data - - case schemapb.DataType_BinaryVector: - data := &storage.BinaryVectorFieldData{ - Data: []byte{}, - } - - for _, c := range content { - r, ok := c.([]byte) - if !ok { - return nil, errTransferType - } - data.Data = append(data.Data, r...) - } - - data.Dim = len(data.Data) * 8 / int(numRows) - rst = data - - case schemapb.DataType_SparseFloatVector: - data := &storage.SparseFloatVectorFieldData{} - for _, c := range content { - if err := data.AppendRow(c); err != nil { - return nil, fmt.Errorf("failed to append row: %v, %w", err, errTransferType) - } - } - rst = data - - default: - return nil, errUnknownDataType - } - - return rst, nil -} - -func (t *compactionTask) getCollection() UniqueID { - return t.metaCache.Collection() -} - -func (t *compactionTask) GetCurrentTime() typeutil.Timestamp { - return tsoutil.GetCurrentTime() -} - -func (t *compactionTask) isExpiredEntity(ts, now Timestamp) bool { - // entity expire is not enabled if duration <= 0 - if t.plan.GetCollectionTtl() <= 0 { - return false - } - - pts, _ := tsoutil.ParseTS(ts) - pnow, _ := tsoutil.ParseTS(now) - expireTime := pts.Add(time.Duration(t.plan.GetCollectionTtl())) - return expireTime.Before(pnow) -} diff --git a/internal/datanode/compactor_test.go b/internal/datanode/compactor_test.go deleted file mode 100644 index efea77b55fbbb..0000000000000 --- a/internal/datanode/compactor_test.go +++ /dev/null @@ -1,1246 +0,0 @@ -// Licensed to the LF AI & Data foundation under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package datanode - -import ( - "context" - "fmt" - "math" - "testing" - - "github.com/cockroachdb/errors" - "github.com/samber/lo" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/mock" - "github.com/stretchr/testify/require" - - "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" - "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" - "github.com/milvus-io/milvus/internal/datanode/allocator" - "github.com/milvus-io/milvus/internal/datanode/io" - "github.com/milvus-io/milvus/internal/datanode/metacache" - "github.com/milvus-io/milvus/internal/datanode/syncmgr" - memkv "github.com/milvus-io/milvus/internal/kv/mem" - "github.com/milvus-io/milvus/internal/proto/datapb" - "github.com/milvus-io/milvus/internal/proto/etcdpb" - "github.com/milvus-io/milvus/internal/storage" - "github.com/milvus-io/milvus/pkg/common" - "github.com/milvus-io/milvus/pkg/util/paramtable" - "github.com/milvus-io/milvus/pkg/util/timerecord" - "github.com/milvus-io/milvus/pkg/util/typeutil" -) - -var compactTestDir = "/tmp/milvus_test/compact" - -func TestCompactionTaskInnerMethods(t *testing.T) { - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - cm := storage.NewLocalChunkManager(storage.RootPath(compactTestDir)) - defer cm.RemoveWithPrefix(ctx, cm.RootPath()) - t.Run("Test.interface2FieldData", func(t *testing.T) { - tests := []struct { - isvalid bool - - tp schemapb.DataType - content []interface{} - - description string - }{ - {true, schemapb.DataType_Bool, []interface{}{true, false}, "valid bool"}, - {true, schemapb.DataType_Int8, []interface{}{int8(1), int8(2)}, "valid int8"}, - {true, schemapb.DataType_Int16, []interface{}{int16(1), int16(2)}, "valid int16"}, - {true, schemapb.DataType_Int32, []interface{}{int32(1), int32(2)}, "valid int32"}, - {true, schemapb.DataType_Int64, []interface{}{int64(1), int64(2)}, "valid int64"}, - {true, schemapb.DataType_Float, []interface{}{float32(1), float32(2)}, "valid float32"}, - {true, schemapb.DataType_Double, []interface{}{float64(1), float64(2)}, "valid float64"}, - {true, schemapb.DataType_VarChar, []interface{}{"test1", "test2"}, "valid varChar"}, - {true, schemapb.DataType_JSON, []interface{}{[]byte("{\"key\":\"value\"}"), []byte("{\"hello\":\"world\"}")}, "valid json"}, - {true, schemapb.DataType_Array, []interface{}{ - &schemapb.ScalarField{ - Data: &schemapb.ScalarField_IntData{ - IntData: &schemapb.IntArray{ - Data: []int32{1, 2}, - }, - }, - }, - &schemapb.ScalarField{ - Data: &schemapb.ScalarField_IntData{ - IntData: &schemapb.IntArray{ - Data: []int32{3, 4}, - }, - }, - }, - }, "valid array"}, - {true, schemapb.DataType_FloatVector, []interface{}{[]float32{1.0, 2.0}}, "valid floatvector"}, - {true, schemapb.DataType_BinaryVector, []interface{}{[]byte{255}}, "valid binaryvector"}, - {true, schemapb.DataType_Float16Vector, []interface{}{[]byte{255, 255, 255, 255}}, "valid float16vector"}, - {true, schemapb.DataType_BFloat16Vector, []interface{}{[]byte{255, 255, 255, 255}}, "valid bfloat16vector"}, - - {false, schemapb.DataType_Bool, []interface{}{1, 2}, "invalid bool"}, - {false, schemapb.DataType_Int8, []interface{}{nil, nil}, "invalid int8"}, - {false, schemapb.DataType_Int16, []interface{}{nil, nil}, "invalid int16"}, - {false, schemapb.DataType_Int32, []interface{}{nil, nil}, "invalid int32"}, - {false, schemapb.DataType_Int64, []interface{}{nil, nil}, "invalid int64"}, - {false, schemapb.DataType_Float, []interface{}{nil, nil}, "invalid float32"}, - {false, schemapb.DataType_Double, []interface{}{nil, nil}, "invalid float64"}, - {false, schemapb.DataType_VarChar, []interface{}{nil, nil}, "invalid varChar"}, - {false, schemapb.DataType_JSON, []interface{}{nil, nil}, "invalid json"}, - {false, schemapb.DataType_FloatVector, []interface{}{nil, nil}, "invalid floatvector"}, - {false, schemapb.DataType_BinaryVector, []interface{}{nil, nil}, "invalid binaryvector"}, - {false, schemapb.DataType_Float16Vector, []interface{}{nil, nil}, "invalid float16vector"}, - {false, schemapb.DataType_BFloat16Vector, []interface{}{nil, nil}, "invalid bfloat16vector"}, - - {false, schemapb.DataType_SparseFloatVector, []interface{}{nil, nil}, "invalid sparsefloatvector"}, - {false, schemapb.DataType_SparseFloatVector, []interface{}{[]byte{255}, []byte{15}}, "invalid sparsefloatvector"}, - {true, schemapb.DataType_SparseFloatVector, []interface{}{ - typeutil.CreateSparseFloatRow([]uint32{1, 2}, []float32{1.0, 2.0}), - typeutil.CreateSparseFloatRow([]uint32{3, 4}, []float32{1.0, 2.0}), - }, "valid sparsefloatvector"}, - } - - // make sure all new data types missed to handle would throw unexpected error - for typeName, typeValue := range schemapb.DataType_value { - tests = append(tests, struct { - isvalid bool - - tp schemapb.DataType - content []interface{} - - description string - }{false, schemapb.DataType(typeValue), []interface{}{nil, nil}, "invalid " + typeName}) - } - - for _, test := range tests { - t.Run(test.description, func(t *testing.T) { - if test.isvalid { - fd, err := interface2FieldData(test.tp, test.content, 2) - assert.NoError(t, err) - assert.Equal(t, 2, fd.RowNum()) - } else { - fd, err := interface2FieldData(test.tp, test.content, 2) - assert.True(t, errors.Is(err, errTransferType) || errors.Is(err, errUnknownDataType)) - assert.Nil(t, fd) - } - }) - } - }) - - t.Run("Test mergeDeltalogs", func(t *testing.T) { - t.Run("One segment", func(t *testing.T) { - invalidBlobs := map[UniqueID][]*Blob{ - 1: {}, - } - - blobs, err := getInt64DeltaBlobs( - 100, - []UniqueID{ - 1, - 2, - 3, - 4, - 5, - 1, - 2, - }, - []Timestamp{ - 20000, - 20001, - 20002, - 30000, - 50000, - 50000, - 10000, - }) - require.NoError(t, err) - - validBlobs := map[UniqueID][]*Blob{ - 100: blobs, - } - - tests := []struct { - isvalid bool - - dBlobs map[UniqueID][]*Blob - - description string - }{ - {false, invalidBlobs, "invalid dBlobs"}, - {true, validBlobs, "valid blobs"}, - } - - for _, test := range tests { - task := &compactionTask{ - done: make(chan struct{}, 1), - } - t.Run(test.description, func(t *testing.T) { - pk2ts, err := task.mergeDeltalogs(test.dBlobs) - if test.isvalid { - assert.NoError(t, err) - assert.Equal(t, 5, len(pk2ts)) - assert.EqualValues(t, 20001, pk2ts[UniqueID(2)]) - } else { - assert.Error(t, err) - assert.Nil(t, pk2ts) - } - }) - } - }) - - t.Run("Multiple segments", func(t *testing.T) { - tests := []struct { - segIDA UniqueID - dataApk []UniqueID - dataAts []Timestamp - - segIDB UniqueID - dataBpk []UniqueID - dataBts []Timestamp - - segIDC UniqueID - dataCpk []UniqueID - dataCts []Timestamp - - expectedpk2ts int - description string - }{ - { - 0, nil, nil, - 100, - []UniqueID{1, 2, 3}, - []Timestamp{20000, 30000, 20005}, - 200, - []UniqueID{4, 5, 6}, - []Timestamp{50000, 50001, 50002}, - 6, "2 segments", - }, - { - 300, - []UniqueID{10, 20}, - []Timestamp{20001, 40001}, - 100, - []UniqueID{1, 2, 3}, - []Timestamp{20000, 30000, 20005}, - 200, - []UniqueID{4, 5, 6}, - []Timestamp{50000, 50001, 50002}, - 8, "3 segments", - }, - } - - for _, test := range tests { - t.Run(test.description, func(t *testing.T) { - dBlobs := make(map[UniqueID][]*Blob) - if test.segIDA != UniqueID(0) { - d, err := getInt64DeltaBlobs(test.segIDA, test.dataApk, test.dataAts) - require.NoError(t, err) - dBlobs[test.segIDA] = d - } - if test.segIDB != UniqueID(0) { - d, err := getInt64DeltaBlobs(test.segIDB, test.dataBpk, test.dataBts) - require.NoError(t, err) - dBlobs[test.segIDB] = d - } - if test.segIDC != UniqueID(0) { - d, err := getInt64DeltaBlobs(test.segIDC, test.dataCpk, test.dataCts) - require.NoError(t, err) - dBlobs[test.segIDC] = d - } - - task := &compactionTask{ - done: make(chan struct{}, 1), - } - pk2ts, err := task.mergeDeltalogs(dBlobs) - assert.NoError(t, err) - assert.Equal(t, test.expectedpk2ts, len(pk2ts)) - }) - } - }) - }) - - t.Run("Test merge", func(t *testing.T) { - collectionID := int64(1) - meta := NewMetaFactory().GetCollectionMeta(collectionID, "test", schemapb.DataType_Int64) - - metaCache := metacache.NewMockMetaCache(t) - metaCache.EXPECT().Schema().Return(meta.GetSchema()).Maybe() - metaCache.EXPECT().GetSegmentByID(mock.Anything).RunAndReturn(func(id int64, filters ...metacache.SegmentFilter) (*metacache.SegmentInfo, bool) { - segment := metacache.NewSegmentInfo(&datapb.SegmentInfo{ - CollectionID: 1, - PartitionID: 0, - ID: id, - NumOfRows: 10, - }, nil) - return segment, true - }) - - alloc := allocator.NewMockAllocator(t) - alloc.EXPECT().GetGenerator(mock.Anything, mock.Anything).Call.Return(validGeneratorFn, nil) - alloc.EXPECT().AllocOne().Return(0, nil) - t.Run("Merge without expiration", func(t *testing.T) { - mockbIO := io.NewBinlogIO(cm, getOrCreateIOPool()) - paramtable.Get().Save(Params.CommonCfg.EntityExpirationTTL.Key, "0") - iData := genInsertDataWithExpiredTS() - iCodec := storage.NewInsertCodecWithSchema(meta) - var partId int64 = 0 - var segmentId int64 = 1 - blobs, err := iCodec.Serialize(partId, 0, iData) - assert.NoError(t, err) - var allPaths [][]string - inpath, err := uploadInsertLog(context.Background(), mockbIO, alloc, meta.GetID(), partId, segmentId, blobs) - assert.NoError(t, err) - assert.Equal(t, 12, len(inpath)) - binlogNum := len(inpath[0].GetBinlogs()) - assert.Equal(t, 1, binlogNum) - - for idx := 0; idx < binlogNum; idx++ { - var ps []string - for _, path := range inpath { - ps = append(ps, path.GetBinlogs()[idx].GetLogPath()) - } - allPaths = append(allPaths, ps) - } - - dm := map[interface{}]Timestamp{ - 1: 10000, - } - - ct := &compactionTask{ - metaCache: metaCache, - binlogIO: mockbIO, - Allocator: alloc, - done: make(chan struct{}, 1), - plan: &datapb.CompactionPlan{ - SegmentBinlogs: []*datapb.CompactionSegmentBinlogs{ - {SegmentID: 1}, - }, - }, - } - inPaths, statsPaths, numOfRow, err := ct.merge(context.Background(), allPaths, 2, 0, meta, dm) - assert.NoError(t, err) - assert.Equal(t, int64(2), numOfRow) - assert.Equal(t, 1, len(inPaths[0].GetBinlogs())) - assert.Equal(t, 1, len(statsPaths)) - assert.NotEqual(t, -1, inPaths[0].GetBinlogs()[0].GetTimestampFrom()) - assert.NotEqual(t, -1, inPaths[0].GetBinlogs()[0].GetTimestampTo()) - }) - t.Run("Merge without expiration2", func(t *testing.T) { - mockbIO := io.NewBinlogIO(cm, getOrCreateIOPool()) - iData := genInsertDataWithExpiredTS() - iCodec := storage.NewInsertCodecWithSchema(meta) - var partId int64 = 0 - var segmentId int64 = 1 - blobs, err := iCodec.Serialize(partId, 0, iData) - assert.NoError(t, err) - paramtable.Get().Save(Params.CommonCfg.EntityExpirationTTL.Key, "0") - BinLogMaxSize := Params.DataNodeCfg.BinLogMaxSize.GetValue() - defer func() { - Params.Save(Params.DataNodeCfg.BinLogMaxSize.Key, BinLogMaxSize) - }() - paramtable.Get().Save(Params.DataNodeCfg.BinLogMaxSize.Key, "64") - meta := NewMetaFactory().GetCollectionMeta(1, "test", schemapb.DataType_Int64) - - var allPaths [][]string - inpath, err := uploadInsertLog(context.Background(), mockbIO, alloc, meta.GetID(), partId, segmentId, blobs) - assert.NoError(t, err) - assert.Equal(t, 12, len(inpath)) - binlogNum := len(inpath[0].GetBinlogs()) - assert.Equal(t, 1, binlogNum) - - for idx := 0; idx < binlogNum; idx++ { - var ps []string - for _, path := range inpath { - ps = append(ps, path.GetBinlogs()[idx].GetLogPath()) - } - allPaths = append(allPaths, ps) - } - - dm := map[interface{}]Timestamp{} - - ct := &compactionTask{ - metaCache: metaCache, - binlogIO: mockbIO, - Allocator: alloc, - done: make(chan struct{}, 1), - plan: &datapb.CompactionPlan{ - SegmentBinlogs: []*datapb.CompactionSegmentBinlogs{ - {SegmentID: 1}, - }, - }, - } - inPaths, statsPaths, numOfRow, err := ct.merge(context.Background(), allPaths, 2, 0, meta, dm) - assert.NoError(t, err) - assert.Equal(t, int64(2), numOfRow) - assert.Equal(t, 1, len(inPaths[0].GetBinlogs())) - assert.Equal(t, 1, len(statsPaths)) - assert.Equal(t, 1, len(statsPaths[0].GetBinlogs())) - assert.NotEqual(t, -1, inPaths[0].GetBinlogs()[0].GetTimestampFrom()) - assert.NotEqual(t, -1, inPaths[0].GetBinlogs()[0].GetTimestampTo()) - }) - // set Params.DataNodeCfg.BinLogMaxSize.Key = 1 to generate multi binlogs, each has only one row - t.Run("merge_with_more_than_100rows", func(t *testing.T) { - mockbIO := io.NewBinlogIO(cm, getOrCreateIOPool()) - iCodec := storage.NewInsertCodecWithSchema(meta) - paramtable.Get().Save(Params.CommonCfg.EntityExpirationTTL.Key, "0") - BinLogMaxSize := Params.DataNodeCfg.BinLogMaxSize.GetAsInt() - defer func() { - paramtable.Get().Save(Params.DataNodeCfg.BinLogMaxSize.Key, fmt.Sprintf("%d", BinLogMaxSize)) - }() - paramtable.Get().Save(Params.DataNodeCfg.BinLogMaxSize.Key, "1") - iData := genInsertData(101) - var partId int64 = 0 - var segmentId int64 = 1 - blobs, err := iCodec.Serialize(partId, segmentId, iData) - assert.NoError(t, err) - - var allPaths [][]string - inpath, err := uploadInsertLog(context.Background(), mockbIO, alloc, meta.GetID(), partId, segmentId, blobs) - assert.NoError(t, err) - assert.Equal(t, 12, len(inpath)) - binlogNum := len(inpath[0].GetBinlogs()) - assert.Equal(t, 1, binlogNum) - - for idx := 0; idx < binlogNum; idx++ { - var ps []string - for _, path := range inpath { - ps = append(ps, path.GetBinlogs()[idx].GetLogPath()) - } - allPaths = append(allPaths, ps) - } - - dm := map[interface{}]Timestamp{ - 1: 10000, - } - - ct := &compactionTask{ - metaCache: metaCache, - binlogIO: mockbIO, - Allocator: alloc, - done: make(chan struct{}, 1), - plan: &datapb.CompactionPlan{ - SegmentBinlogs: []*datapb.CompactionSegmentBinlogs{ - {SegmentID: 1}, - }, - }, - } - inPaths, statsPaths, numOfRow, err := ct.merge(context.Background(), allPaths, 2, 0, meta, dm) - assert.NoError(t, err) - assert.Equal(t, int64(101), numOfRow) - assert.Equal(t, 2, len(inPaths[0].GetBinlogs())) - assert.Equal(t, 1, len(statsPaths)) - for _, inpath := range inPaths { - assert.NotEqual(t, -1, inpath.GetBinlogs()[0].GetTimestampFrom()) - assert.NotEqual(t, -1, inpath.GetBinlogs()[0].GetTimestampTo()) - } - }) - - t.Run("Merge with expiration", func(t *testing.T) { - mockbIO := io.NewBinlogIO(cm, getOrCreateIOPool()) - iCodec := storage.NewInsertCodecWithSchema(meta) - iData := genInsertDataWithExpiredTS() - var partId int64 = 0 - var segmentId int64 = 1 - blobs, err := iCodec.Serialize(partId, 0, iData) - assert.NoError(t, err) - meta := NewMetaFactory().GetCollectionMeta(1, "test", schemapb.DataType_Int64) - - var allPaths [][]string - inpath, err := uploadInsertLog(context.Background(), mockbIO, alloc, meta.GetID(), partId, segmentId, blobs) - assert.NoError(t, err) - assert.Equal(t, 12, len(inpath)) - binlogNum := len(inpath[0].GetBinlogs()) - assert.Equal(t, 1, binlogNum) - - for idx := 0; idx < binlogNum; idx++ { - var ps []string - for _, path := range inpath { - ps = append(ps, path.GetBinlogs()[idx].GetLogPath()) - } - allPaths = append(allPaths, ps) - } - - dm := map[interface{}]Timestamp{ - 1: 10000, - } - - // 10 days in seconds - ct := &compactionTask{ - metaCache: metaCache, - binlogIO: mockbIO, - Allocator: alloc, - plan: &datapb.CompactionPlan{ - CollectionTtl: 864000, - SegmentBinlogs: []*datapb.CompactionSegmentBinlogs{ - {SegmentID: 1}, - }, - }, - done: make(chan struct{}, 1), - } - inPaths, statsPaths, numOfRow, err := ct.merge(context.Background(), allPaths, 2, 0, meta, dm) - assert.NoError(t, err) - assert.Equal(t, int64(0), numOfRow) - assert.Equal(t, 0, len(inPaths)) - assert.Equal(t, 0, len(statsPaths)) - }) - - t.Run("merge_with_rownum_zero", func(t *testing.T) { - mockbIO := io.NewBinlogIO(cm, getOrCreateIOPool()) - iData := genInsertDataWithExpiredTS() - iCodec := storage.NewInsertCodecWithSchema(meta) - var partId int64 = 0 - var segmentId int64 = 1 - blobs, err := iCodec.Serialize(partId, 0, iData) - assert.NoError(t, err) - meta := NewMetaFactory().GetCollectionMeta(1, "test", schemapb.DataType_Int64) - metaCache := metacache.NewMockMetaCache(t) - metaCache.EXPECT().Schema().Return(meta.GetSchema()).Maybe() - metaCache.EXPECT().GetSegmentByID(mock.Anything).RunAndReturn(func(id int64, filters ...metacache.SegmentFilter) (*metacache.SegmentInfo, bool) { - segment := metacache.NewSegmentInfo(&datapb.SegmentInfo{ - CollectionID: 1, - PartitionID: 0, - ID: id, - NumOfRows: 0, - }, nil) - return segment, true - }) - - var allPaths [][]string - inpath, err := uploadInsertLog(context.Background(), mockbIO, alloc, meta.GetID(), partId, segmentId, blobs) - assert.NoError(t, err) - assert.Equal(t, 12, len(inpath)) - binlogNum := len(inpath[0].GetBinlogs()) - assert.Equal(t, 1, binlogNum) - - for idx := 0; idx < binlogNum; idx++ { - var ps []string - for _, path := range inpath { - ps = append(ps, path.GetBinlogs()[idx].GetLogPath()) - } - allPaths = append(allPaths, ps) - } - - dm := map[interface{}]Timestamp{ - 1: 10000, - } - - ct := &compactionTask{ - metaCache: metaCache, - binlogIO: mockbIO, - Allocator: alloc, - done: make(chan struct{}, 1), - plan: &datapb.CompactionPlan{ - SegmentBinlogs: []*datapb.CompactionSegmentBinlogs{ - {SegmentID: 1}, - }, - }, - } - _, _, _, err = ct.merge(context.Background(), allPaths, 2, 0, &etcdpb.CollectionMeta{ - Schema: meta.GetSchema(), - }, dm) - assert.Error(t, err) - }) - - t.Run("Merge with meta error", func(t *testing.T) { - mockbIO := io.NewBinlogIO(cm, getOrCreateIOPool()) - iCodec := storage.NewInsertCodecWithSchema(meta) - paramtable.Get().Save(Params.CommonCfg.EntityExpirationTTL.Key, "0") - iData := genInsertDataWithExpiredTS() - var partId int64 = 0 - var segmentId int64 = 1 - blobs, err := iCodec.Serialize(partId, 0, iData) - assert.NoError(t, err) - meta := NewMetaFactory().GetCollectionMeta(1, "test", schemapb.DataType_Int64) - - var allPaths [][]string - inpath, err := uploadInsertLog(context.Background(), mockbIO, alloc, meta.GetID(), partId, segmentId, blobs) - assert.NoError(t, err) - assert.Equal(t, 12, len(inpath)) - binlogNum := len(inpath[0].GetBinlogs()) - assert.Equal(t, 1, binlogNum) - - for idx := 0; idx < binlogNum; idx++ { - var ps []string - for _, path := range inpath { - ps = append(ps, path.GetBinlogs()[idx].GetLogPath()) - } - allPaths = append(allPaths, ps) - } - - dm := map[interface{}]Timestamp{ - 1: 10000, - } - - ct := &compactionTask{ - metaCache: metaCache, - binlogIO: mockbIO, - Allocator: alloc, - done: make(chan struct{}, 1), - plan: &datapb.CompactionPlan{ - SegmentBinlogs: []*datapb.CompactionSegmentBinlogs{ - {SegmentID: 1}, - }, - }, - } - _, _, _, err = ct.merge(context.Background(), allPaths, 2, 0, &etcdpb.CollectionMeta{ - Schema: &schemapb.CollectionSchema{Fields: []*schemapb.FieldSchema{ - {DataType: schemapb.DataType_FloatVector, TypeParams: []*commonpb.KeyValuePair{ - {Key: common.DimKey, Value: "64"}, - }}, - }}, - }, dm) - assert.Error(t, err) - }) - - t.Run("Merge with meta type param error", func(t *testing.T) { - mockbIO := io.NewBinlogIO(cm, getOrCreateIOPool()) - iCodec := storage.NewInsertCodecWithSchema(meta) - paramtable.Get().Save(Params.CommonCfg.EntityExpirationTTL.Key, "0") - iData := genInsertDataWithExpiredTS() - var partId int64 = 0 - var segmentId int64 = 1 - blobs, err := iCodec.Serialize(partId, 0, iData) - assert.NoError(t, err) - meta := NewMetaFactory().GetCollectionMeta(1, "test", schemapb.DataType_Int64) - - var allPaths [][]string - inpath, err := uploadInsertLog(context.Background(), mockbIO, alloc, meta.GetID(), partId, segmentId, blobs) - assert.NoError(t, err) - assert.Equal(t, 12, len(inpath)) - binlogNum := len(inpath[0].GetBinlogs()) - assert.Equal(t, 1, binlogNum) - - for idx := 0; idx < binlogNum; idx++ { - var ps []string - for _, path := range inpath { - ps = append(ps, path.GetBinlogs()[idx].GetLogPath()) - } - allPaths = append(allPaths, ps) - } - - dm := map[interface{}]Timestamp{ - 1: 10000, - } - - ct := &compactionTask{ - metaCache: metaCache, - binlogIO: mockbIO, - Allocator: alloc, - done: make(chan struct{}, 1), - } - - _, _, _, err = ct.merge(context.Background(), allPaths, 2, 0, &etcdpb.CollectionMeta{ - Schema: &schemapb.CollectionSchema{Fields: []*schemapb.FieldSchema{ - {DataType: schemapb.DataType_FloatVector, TypeParams: []*commonpb.KeyValuePair{ - {Key: common.DimKey, Value: "bad_dim"}, - }}, - }}, - }, dm) - assert.Error(t, err) - }) - }) - t.Run("Test isExpiredEntity", func(t *testing.T) { - t.Run("When CompactionEntityExpiration is set math.MaxInt64", func(t *testing.T) { - ct := &compactionTask{ - plan: &datapb.CompactionPlan{ - CollectionTtl: math.MaxInt64, - }, - done: make(chan struct{}, 1), - } - - res := ct.isExpiredEntity(0, genTimestamp()) - assert.Equal(t, false, res) - - res = ct.isExpiredEntity(math.MaxInt64, genTimestamp()) - assert.Equal(t, false, res) - - res = ct.isExpiredEntity(0, math.MaxInt64) - assert.Equal(t, true, res) - - res = ct.isExpiredEntity(math.MaxInt64, math.MaxInt64) - assert.Equal(t, false, res) - - res = ct.isExpiredEntity(math.MaxInt64, 0) - assert.Equal(t, false, res) - }) - t.Run("When CompactionEntityExpiration is set MAX_ENTITY_EXPIRATION = 0", func(t *testing.T) { - // 0 means expiration is not enabled - ct := &compactionTask{ - plan: &datapb.CompactionPlan{ - CollectionTtl: 0, - }, - done: make(chan struct{}, 1), - } - res := ct.isExpiredEntity(0, genTimestamp()) - assert.Equal(t, false, res) - - res = ct.isExpiredEntity(math.MaxInt64, genTimestamp()) - assert.Equal(t, false, res) - - res = ct.isExpiredEntity(0, math.MaxInt64) - assert.Equal(t, false, res) - - res = ct.isExpiredEntity(math.MaxInt64, math.MaxInt64) - assert.Equal(t, false, res) - - res = ct.isExpiredEntity(math.MaxInt64, 0) - assert.Equal(t, false, res) - }) - t.Run("When CompactionEntityExpiration is set 10 days", func(t *testing.T) { - // 10 days in seconds - ct := &compactionTask{ - plan: &datapb.CompactionPlan{ - CollectionTtl: 864000, - }, - done: make(chan struct{}, 1), - } - res := ct.isExpiredEntity(0, genTimestamp()) - assert.Equal(t, true, res) - - res = ct.isExpiredEntity(math.MaxInt64, genTimestamp()) - assert.Equal(t, false, res) - - res = ct.isExpiredEntity(0, math.MaxInt64) - assert.Equal(t, true, res) - - res = ct.isExpiredEntity(math.MaxInt64, math.MaxInt64) - assert.Equal(t, false, res) - - res = ct.isExpiredEntity(math.MaxInt64, 0) - assert.Equal(t, false, res) - }) - }) - - t.Run("Test getNumRows error", func(t *testing.T) { - metaCache := metacache.NewMockMetaCache(t) - metaCache.EXPECT().GetSegmentByID(mock.Anything).Return(nil, false) - ct := &compactionTask{ - metaCache: metaCache, - plan: &datapb.CompactionPlan{ - SegmentBinlogs: []*datapb.CompactionSegmentBinlogs{ - { - SegmentID: 1, - }, - }, - }, - done: make(chan struct{}, 1), - } - - _, err := ct.getNumRows() - assert.Error(t, err, "segment not found") - }) -} - -func getInt64DeltaBlobs(segID UniqueID, pks []UniqueID, tss []Timestamp) ([]*Blob, error) { - primaryKeys := make([]storage.PrimaryKey, len(pks)) - for index, v := range pks { - primaryKeys[index] = storage.NewInt64PrimaryKey(v) - } - deltaData := &DeleteData{ - Pks: primaryKeys, - Tss: tss, - RowCount: int64(len(pks)), - } - - dCodec := storage.NewDeleteCodec() - blob, err := dCodec.Serialize(1, 10, segID, deltaData) - return []*Blob{blob}, err -} - -func TestCompactorInterfaceMethods(t *testing.T) { - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - cm := storage.NewLocalChunkManager(storage.RootPath(compactTestDir)) - defer cm.RemoveWithPrefix(ctx, cm.RootPath()) - notEmptySegmentBinlogs := []*datapb.CompactionSegmentBinlogs{{ - SegmentID: 100, - FieldBinlogs: nil, - Field2StatslogPaths: nil, - Deltalogs: nil, - }} - paramtable.Get().Save(Params.CommonCfg.EntityExpirationTTL.Key, "0") // Turn off auto expiration - t.Run("Test compact with all segment empty", func(t *testing.T) { - alloc := allocator.NewMockAllocator(t) - alloc.EXPECT().AllocOne().Call.Return(int64(11111), nil) - ctx, cancel := context.WithCancel(context.TODO()) - - mockSyncmgr := syncmgr.NewMockSyncManager(t) - mockSyncmgr.EXPECT().Block(mock.Anything).Return() - task := &compactionTask{ - ctx: ctx, - cancel: cancel, - Allocator: alloc, - done: make(chan struct{}, 1), - tr: timerecord.NewTimeRecorder("test"), - syncMgr: mockSyncmgr, - plan: &datapb.CompactionPlan{ - PlanID: 999, - SegmentBinlogs: []*datapb.CompactionSegmentBinlogs{{SegmentID: 100}}, - TimeoutInSeconds: 10, - Type: datapb.CompactionType_MixCompaction, - }, - } - - _, err := task.compact() - assert.ErrorIs(t, errIllegalCompactionPlan, err) - }) - - t.Run("Test compact invalid empty segment binlogs", func(t *testing.T) { - plan := &datapb.CompactionPlan{ - PlanID: 999, - SegmentBinlogs: nil, - TimeoutInSeconds: 10, - Type: datapb.CompactionType_MixCompaction, - } - ctx, cancel := context.WithCancel(context.Background()) - emptyTask := &compactionTask{ - ctx: ctx, - cancel: cancel, - tr: timerecord.NewTimeRecorder("test"), - - done: make(chan struct{}, 1), - plan: plan, - } - - _, err := emptyTask.compact() - assert.Error(t, err) - assert.ErrorIs(t, err, errIllegalCompactionPlan) - - emptyTask.complete() - emptyTask.stop() - }) - - t.Run("Test compact invalid AllocOnce failed", func(t *testing.T) { - mockAlloc := allocator.NewMockAllocator(t) - mockAlloc.EXPECT().AllocOne().Call.Return(int64(0), errors.New("mock allocone error")).Once() - plan := &datapb.CompactionPlan{ - PlanID: 999, - SegmentBinlogs: notEmptySegmentBinlogs, - TimeoutInSeconds: 10, - Type: datapb.CompactionType_MixCompaction, - } - task := &compactionTask{ - ctx: context.Background(), - tr: timerecord.NewTimeRecorder("test"), - Allocator: mockAlloc, - plan: plan, - } - - _, err := task.compact() - assert.Error(t, err) - }) - - t.Run("Test typeII compact valid", func(t *testing.T) { - alloc := allocator.NewMockAllocator(t) - alloc.EXPECT().GetGenerator(mock.Anything, mock.Anything).Call.Return(validGeneratorFn, nil) - alloc.EXPECT().AllocOne().Call.Return(int64(19530), nil) - type testCase struct { - pkType schemapb.DataType - iData1 storage.FieldData - iData2 storage.FieldData - pks1 [2]storage.PrimaryKey - pks2 [2]storage.PrimaryKey - colID UniqueID - parID UniqueID - segID1 UniqueID - segID2 UniqueID - } - cases := []testCase{ - { - pkType: schemapb.DataType_Int64, - iData1: &storage.Int64FieldData{Data: []UniqueID{1}}, - iData2: &storage.Int64FieldData{Data: []UniqueID{9}}, - pks1: [2]storage.PrimaryKey{storage.NewInt64PrimaryKey(1), storage.NewInt64PrimaryKey(2)}, - pks2: [2]storage.PrimaryKey{storage.NewInt64PrimaryKey(9), storage.NewInt64PrimaryKey(10)}, - colID: 1, - parID: 10, - segID1: 100, - segID2: 101, - }, - { - pkType: schemapb.DataType_VarChar, - iData1: &storage.StringFieldData{Data: []string{"aaaa"}}, - iData2: &storage.StringFieldData{Data: []string{"milvus"}}, - pks1: [2]storage.PrimaryKey{storage.NewVarCharPrimaryKey("aaaa"), storage.NewVarCharPrimaryKey("bbbb")}, - pks2: [2]storage.PrimaryKey{storage.NewVarCharPrimaryKey("milvus"), storage.NewVarCharPrimaryKey("mmmm")}, - colID: 2, - parID: 11, - segID1: 102, - segID2: 103, - }, - } - - for _, c := range cases { - collName := "test_compact_coll_name" - meta := NewMetaFactory().GetCollectionMeta(c.colID, collName, c.pkType) - - mockbIO := io.NewBinlogIO(cm, getOrCreateIOPool()) - iCodec := storage.NewInsertCodecWithSchema(meta) - mockKv := memkv.NewMemoryKV() - metaCache := metacache.NewMockMetaCache(t) - metaCache.EXPECT().Collection().Return(c.colID) - metaCache.EXPECT().Schema().Return(meta.GetSchema()) - syncMgr := syncmgr.NewMockSyncManager(t) - syncMgr.EXPECT().Block(mock.Anything).Return() - - bfs := metacache.NewBloomFilterSet() - bfs.UpdatePKRange(c.iData1) - seg1 := metacache.NewSegmentInfo(&datapb.SegmentInfo{ - CollectionID: c.colID, - PartitionID: c.parID, - ID: c.segID1, - NumOfRows: 2, - }, bfs) - bfs = metacache.NewBloomFilterSet() - bfs.UpdatePKRange(c.iData2) - seg2 := metacache.NewSegmentInfo(&datapb.SegmentInfo{ - CollectionID: c.colID, - PartitionID: c.parID, - ID: c.segID2, - NumOfRows: 2, - }, bfs) - - bfs = metacache.NewBloomFilterSet() - seg3 := metacache.NewSegmentInfo(&datapb.SegmentInfo{ - CollectionID: c.colID, - PartitionID: c.parID, - ID: 99999, - }, bfs) - - metaCache.EXPECT().GetSegmentByID(c.segID1).Return(seg1, true) - metaCache.EXPECT().GetSegmentByID(c.segID2).Return(seg2, true) - metaCache.EXPECT().GetSegmentByID(seg3.SegmentID()).Return(seg3, true) - metaCache.EXPECT().GetSegmentByID(mock.Anything).Return(nil, false) - - iData1 := genInsertDataWithPKs(c.pks1, c.pkType) - iblobs1, err := iCodec.Serialize(c.parID, 0, iData1) - assert.NoError(t, err) - dData1 := &DeleteData{ - Pks: []storage.PrimaryKey{c.pks1[0]}, - Tss: []Timestamp{20000}, - RowCount: 1, - } - iData2 := genInsertDataWithPKs(c.pks2, c.pkType) - iblobs2, err := iCodec.Serialize(c.parID, 3, iData2) - assert.NoError(t, err) - dData2 := &DeleteData{ - Pks: []storage.PrimaryKey{c.pks2[0]}, - Tss: []Timestamp{30000}, - RowCount: 1, - } - - stats1, err := storage.NewPrimaryKeyStats(1, int64(c.pkType), 1) - require.NoError(t, err) - iPaths1, err := uploadInsertLog(context.Background(), mockbIO, alloc, meta.GetID(), c.parID, c.segID1, iblobs1) - require.NoError(t, err) - sPaths1, err := uploadStatsLog(context.Background(), mockbIO, alloc, meta.GetID(), c.parID, c.segID1, stats1, 2, iCodec) - require.NoError(t, err) - dPaths1, err := uploadDeltaLog(context.TODO(), mockbIO, alloc, meta.GetID(), c.parID, c.segID1, dData1) - require.NoError(t, err) - require.Equal(t, 12, len(iPaths1)) - - stats2, err := storage.NewPrimaryKeyStats(1, int64(c.pkType), 1) - require.NoError(t, err) - iPaths2, err := uploadInsertLog(context.Background(), mockbIO, alloc, meta.GetID(), c.parID, c.segID2, iblobs2) - require.NoError(t, err) - sPaths2, err := uploadStatsLog(context.Background(), mockbIO, alloc, meta.GetID(), c.parID, c.segID2, stats2, 2, iCodec) - require.NoError(t, err) - dPaths2, err := uploadDeltaLog(context.TODO(), mockbIO, alloc, meta.GetID(), c.parID, c.segID2, dData2) - require.NoError(t, err) - require.Equal(t, 12, len(iPaths2)) - - plan := &datapb.CompactionPlan{ - PlanID: 10080, - SegmentBinlogs: []*datapb.CompactionSegmentBinlogs{ - { - SegmentID: c.segID1, - FieldBinlogs: lo.Values(iPaths1), - Field2StatslogPaths: lo.Values(sPaths1), - Deltalogs: dPaths1, - }, - { - SegmentID: c.segID2, - FieldBinlogs: lo.Values(iPaths2), - Field2StatslogPaths: lo.Values(sPaths2), - Deltalogs: dPaths2, - }, - { - SegmentID: seg3.SegmentID(), // empty segment - }, - }, - StartTime: 0, - TimeoutInSeconds: 10, - Type: datapb.CompactionType_MergeCompaction, - Channel: "channelname", - } - - task := newCompactionTask(context.TODO(), mockbIO, metaCache, syncMgr, alloc, plan) - result, err := task.compact() - assert.NoError(t, err) - assert.NotNil(t, result) - - assert.Equal(t, plan.GetPlanID(), result.GetPlanID()) - assert.Equal(t, 1, len(result.GetSegments())) - - segment := result.GetSegments()[0] - assert.EqualValues(t, 19530, segment.GetSegmentID()) - assert.EqualValues(t, 2, segment.GetNumOfRows()) - assert.NotEmpty(t, segment.InsertLogs) - assert.NotEmpty(t, segment.Field2StatslogPaths) - - // New test, remove all the binlogs in memkv - err = mockKv.RemoveWithPrefix("/") - require.NoError(t, err) - plan.PlanID++ - - result, err = task.compact() - assert.NoError(t, err) - assert.NotNil(t, result) - - assert.Equal(t, plan.GetPlanID(), result.GetPlanID()) - assert.Equal(t, 1, len(result.GetSegments())) - - segment = result.GetSegments()[0] - assert.EqualValues(t, 19530, segment.GetSegmentID()) - assert.EqualValues(t, 2, segment.GetNumOfRows()) - assert.NotEmpty(t, segment.InsertLogs) - assert.NotEmpty(t, segment.Field2StatslogPaths) - } - }) - - t.Run("Test typeII compact 2 segments with the same pk", func(t *testing.T) { - // Test merge compactions, two segments with the same pk, one deletion pk=1 - // The merged segment 19530 should only contain 2 rows and both pk=2 - // Both pk = 1 rows of the two segments are compacted. - var collID, partID, segID1, segID2 UniqueID = 1, 10, 200, 201 - - alloc := allocator.NewMockAllocator(t) - alloc.EXPECT().AllocOne().Call.Return(int64(19530), nil) - alloc.EXPECT().GetGenerator(mock.Anything, mock.Anything).Call.Return(validGeneratorFn, nil) - - meta := NewMetaFactory().GetCollectionMeta(collID, "test_compact_coll_name", schemapb.DataType_Int64) - - mockbIO := io.NewBinlogIO(cm, getOrCreateIOPool()) - iCodec := storage.NewInsertCodecWithSchema(meta) - - metaCache := metacache.NewMockMetaCache(t) - metaCache.EXPECT().Collection().Return(collID) - metaCache.EXPECT().Schema().Return(meta.GetSchema()) - syncMgr := syncmgr.NewMockSyncManager(t) - syncMgr.EXPECT().Block(mock.Anything).Return() - - bfs := metacache.NewBloomFilterSet() - bfs.UpdatePKRange(&storage.Int64FieldData{Data: []UniqueID{1}}) - seg1 := metacache.NewSegmentInfo(&datapb.SegmentInfo{ - CollectionID: collID, - PartitionID: partID, - ID: segID1, - NumOfRows: 2, - }, bfs) - bfs = metacache.NewBloomFilterSet() - bfs.UpdatePKRange(&storage.Int64FieldData{Data: []UniqueID{1}}) - seg2 := metacache.NewSegmentInfo(&datapb.SegmentInfo{ - CollectionID: collID, - PartitionID: partID, - ID: segID2, - NumOfRows: 2, - }, bfs) - - metaCache.EXPECT().GetSegmentByID(mock.Anything).RunAndReturn(func(id int64, filters ...metacache.SegmentFilter) (*metacache.SegmentInfo, bool) { - switch id { - case segID1: - return seg1, true - case segID2: - return seg2, true - default: - return nil, false - } - }) - - // the same pk for segmentI and segmentII - pks := [2]storage.PrimaryKey{storage.NewInt64PrimaryKey(1), storage.NewInt64PrimaryKey(2)} - iData1 := genInsertDataWithPKs(pks, schemapb.DataType_Int64) - iblobs1, err := iCodec.Serialize(partID, 0, iData1) - assert.NoError(t, err) - iData2 := genInsertDataWithPKs(pks, schemapb.DataType_Int64) - iblobs2, err := iCodec.Serialize(partID, 1, iData2) - assert.NoError(t, err) - - pk1 := storage.NewInt64PrimaryKey(1) - dData1 := &DeleteData{ - Pks: []storage.PrimaryKey{pk1}, - Tss: []Timestamp{20000}, - RowCount: 1, - } - // empty dData2 - dData2 := &DeleteData{ - Pks: []storage.PrimaryKey{}, - Tss: []Timestamp{}, - RowCount: 0, - } - - stats1, err := storage.NewPrimaryKeyStats(1, int64(schemapb.DataType_Int64), 1) - require.NoError(t, err) - iPaths1, err := uploadInsertLog(context.Background(), mockbIO, alloc, meta.GetID(), partID, segID1, iblobs1) - require.NoError(t, err) - sPaths1, err := uploadStatsLog(context.Background(), mockbIO, alloc, meta.GetID(), partID, segID1, stats1, 1, iCodec) - require.NoError(t, err) - dPaths1, err := uploadDeltaLog(context.TODO(), mockbIO, alloc, meta.GetID(), partID, segID1, dData1) - require.NoError(t, err) - require.Equal(t, 12, len(iPaths1)) - - stats2, err := storage.NewPrimaryKeyStats(1, int64(schemapb.DataType_Int64), 1) - require.NoError(t, err) - iPaths2, err := uploadInsertLog(context.Background(), mockbIO, alloc, meta.GetID(), partID, segID2, iblobs2) - require.NoError(t, err) - sPaths2, err := uploadStatsLog(context.Background(), mockbIO, alloc, meta.GetID(), partID, segID2, stats2, 1, iCodec) - require.NoError(t, err) - dPaths2, err := uploadDeltaLog(context.TODO(), mockbIO, alloc, meta.GetID(), partID, segID2, dData2) - require.NoError(t, err) - require.Equal(t, 12, len(iPaths2)) - - plan := &datapb.CompactionPlan{ - PlanID: 20080, - SegmentBinlogs: []*datapb.CompactionSegmentBinlogs{ - { - SegmentID: segID1, - FieldBinlogs: lo.Values(iPaths1), - Field2StatslogPaths: lo.Values(sPaths1), - Deltalogs: dPaths1, - }, - { - SegmentID: segID2, - FieldBinlogs: lo.Values(iPaths2), - Field2StatslogPaths: lo.Values(sPaths2), - Deltalogs: dPaths2, - }, - }, - StartTime: 0, - TimeoutInSeconds: 10, - Type: datapb.CompactionType_MergeCompaction, - Channel: "channelname", - } - - task := newCompactionTask(context.TODO(), mockbIO, metaCache, syncMgr, alloc, plan) - result, err := task.compact() - assert.NoError(t, err) - assert.NotNil(t, result) - - assert.Equal(t, plan.GetPlanID(), result.GetPlanID()) - assert.Equal(t, 1, len(result.GetSegments())) - - segment := result.GetSegments()[0] - assert.EqualValues(t, 19530, segment.GetSegmentID()) - assert.EqualValues(t, 2, segment.GetNumOfRows()) - assert.NotEmpty(t, segment.InsertLogs) - assert.NotEmpty(t, segment.Field2StatslogPaths) - }) -} - -func TestInjectDone(t *testing.T) { - syncMgr := syncmgr.NewMockSyncManager(t) - - segmentIDs := []int64{100, 200, 300} - task := &compactionTask{ - plan: &datapb.CompactionPlan{ - SegmentBinlogs: lo.Map(segmentIDs, func(id int64, _ int) *datapb.CompactionSegmentBinlogs { - return &datapb.CompactionSegmentBinlogs{SegmentID: id} - }), - }, - syncMgr: syncMgr, - } - - for _, segmentID := range segmentIDs { - syncMgr.EXPECT().Unblock(segmentID).Return().Once() - } - - task.injectDone() - task.injectDone() -} - -func BenchmarkCompaction(b *testing.B) { - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - cm := storage.NewLocalChunkManager(storage.RootPath(compactTestDir)) - defer cm.RemoveWithPrefix(ctx, cm.RootPath()) - - collectionID := int64(1) - meta := NewMetaFactory().GetCollectionMeta(collectionID, "test", schemapb.DataType_Int64) - mockbIO := io.NewBinlogIO(cm, getOrCreateIOPool()) - paramtable.Get().Save(Params.CommonCfg.EntityExpirationTTL.Key, "0") - iData := genInsertDataWithExpiredTS() - iCodec := storage.NewInsertCodecWithSchema(meta) - var partId int64 = 0 - var segmentId int64 = 1 - blobs, err := iCodec.Serialize(partId, 0, iData) - assert.NoError(b, err) - var allPaths [][]string - alloc := allocator.NewMockAllocator(b) - alloc.EXPECT().GetGenerator(mock.Anything, mock.Anything).Call.Return(validGeneratorFn, nil) - alloc.EXPECT().AllocOne().Call.Return(int64(19530), nil) - inpath, err := uploadInsertLog(context.Background(), mockbIO, alloc, meta.GetID(), partId, segmentId, blobs) - assert.NoError(b, err) - assert.Equal(b, 12, len(inpath)) - binlogNum := len(inpath[0].GetBinlogs()) - assert.Equal(b, 1, binlogNum) - - for idx := 0; idx < binlogNum; idx++ { - var ps []string - for _, path := range inpath { - ps = append(ps, path.GetBinlogs()[idx].GetLogPath()) - } - allPaths = append(allPaths, ps) - } - - dm := map[interface{}]Timestamp{ - 1: 10000, - } - - metaCache := metacache.NewMockMetaCache(b) - metaCache.EXPECT().Schema().Return(meta.GetSchema()).Maybe() - metaCache.EXPECT().GetSegmentByID(mock.Anything).RunAndReturn(func(id int64, filters ...metacache.SegmentFilter) (*metacache.SegmentInfo, bool) { - segment := metacache.NewSegmentInfo(&datapb.SegmentInfo{ - CollectionID: 1, - PartitionID: 0, - ID: id, - NumOfRows: 10, - }, nil) - return segment, true - }) - - ct := &compactionTask{ - metaCache: metaCache, - binlogIO: mockbIO, - Allocator: alloc, - done: make(chan struct{}, 1), - plan: &datapb.CompactionPlan{ - SegmentBinlogs: []*datapb.CompactionSegmentBinlogs{ - {SegmentID: 1}, - }, - }, - } - - b.ResetTimer() - - for i := 0; i < b.N; i++ { - inPaths, statsPaths, numOfRow, err := ct.merge(context.Background(), allPaths, 2, 0, meta, dm) - assert.NoError(b, err) - assert.Equal(b, int64(2), numOfRow) - assert.Equal(b, 1, len(inPaths[0].GetBinlogs())) - assert.Equal(b, 1, len(statsPaths)) - assert.NotEqual(b, -1, inPaths[0].GetBinlogs()[0].GetTimestampFrom()) - assert.NotEqual(b, -1, inPaths[0].GetBinlogs()[0].GetTimestampTo()) - } -} diff --git a/internal/datanode/io/binlog_io.go b/internal/datanode/io/binlog_io.go index c60af8e992dda..317f267978132 100644 --- a/internal/datanode/io/binlog_io.go +++ b/internal/datanode/io/binlog_io.go @@ -101,7 +101,6 @@ func (b *BinlogIoImpl) Upload(ctx context.Context, kvs map[string][]byte) error }) return struct{}{}, err }) - futures = append(futures, future) } diff --git a/internal/datanode/l0_compactor.go b/internal/datanode/l0_compactor.go index 75bde780323c7..89157c407834b 100644 --- a/internal/datanode/l0_compactor.go +++ b/internal/datanode/l0_compactor.go @@ -23,12 +23,14 @@ import ( "sync" "time" + "github.com/cockroachdb/errors" "github.com/samber/lo" "go.opentelemetry.io/otel" "go.uber.org/zap" "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" "github.com/milvus-io/milvus/internal/datanode/allocator" + "github.com/milvus-io/milvus/internal/datanode/compaction" "github.com/milvus-io/milvus/internal/datanode/io" iter "github.com/milvus-io/milvus/internal/datanode/iterators" "github.com/milvus-io/milvus/internal/datanode/metacache" @@ -50,7 +52,7 @@ import ( ) type levelZeroCompactionTask struct { - compactor + compaction.Compactor io.BinlogIO allocator allocator.Allocator @@ -67,6 +69,9 @@ type levelZeroCompactionTask struct { tr *timerecord.TimeRecorder } +// make sure compactionTask implements compactor interface +var _ compaction.Compactor = (*levelZeroCompactionTask)(nil) + func newLevelZeroCompactionTask( ctx context.Context, binlogIO io.BinlogIO, @@ -92,31 +97,31 @@ func newLevelZeroCompactionTask( } } -func (t *levelZeroCompactionTask) complete() { +func (t *levelZeroCompactionTask) Complete() { t.done <- struct{}{} } -func (t *levelZeroCompactionTask) stop() { +func (t *levelZeroCompactionTask) Stop() { t.cancel() <-t.done } -func (t *levelZeroCompactionTask) getPlanID() UniqueID { +func (t *levelZeroCompactionTask) GetPlanID() UniqueID { return t.plan.GetPlanID() } -func (t *levelZeroCompactionTask) getChannelName() string { +func (t *levelZeroCompactionTask) GetChannelName() string { return t.plan.GetChannel() } -func (t *levelZeroCompactionTask) getCollection() int64 { +func (t *levelZeroCompactionTask) GetCollection() int64 { return t.metacache.Collection() } // Do nothing for levelzero compaction -func (t *levelZeroCompactionTask) injectDone() {} +func (t *levelZeroCompactionTask) InjectDone() {} -func (t *levelZeroCompactionTask) compact() (*datapb.CompactionPlanResult, error) { +func (t *levelZeroCompactionTask) Compact() (*datapb.CompactionPlanResult, error) { ctx, span := otel.Tracer(typeutil.DataNodeRole).Start(t.ctx, "L0Compact") defer span.End() log := log.Ctx(t.ctx).With(zap.Int64("planID", t.plan.GetPlanID()), zap.String("type", t.plan.GetType().String())) @@ -124,7 +129,7 @@ func (t *levelZeroCompactionTask) compact() (*datapb.CompactionPlanResult, error if !funcutil.CheckCtxValid(ctx) { log.Warn("compact wrong, task context done or timeout") - return nil, errContext + return nil, ctx.Err() } ctxTimeout, cancelAll := context.WithTimeout(ctx, time.Duration(t.plan.GetTimeoutInSeconds())*time.Second) @@ -139,7 +144,7 @@ func (t *levelZeroCompactionTask) compact() (*datapb.CompactionPlanResult, error }) if len(targetSegments) == 0 { log.Warn("compact wrong, not target sealed segments") - return nil, errIllegalCompactionPlan + return nil, errors.New("illegal compaction plan with empty target segments") } err := binlog.DecompressCompactionBinlogs(l0Segments) if err != nil { diff --git a/internal/datanode/l0_compactor_test.go b/internal/datanode/l0_compactor_test.go index 59b66086d67fa..08df575433ace 100644 --- a/internal/datanode/l0_compactor_test.go +++ b/internal/datanode/l0_compactor_test.go @@ -274,9 +274,9 @@ func (s *LevelZeroCompactionTaskSuite) TestCompactLinear() { }).Times(2) s.mockBinlogIO.EXPECT().Upload(mock.Anything, mock.Anything).Return(nil).Once() - s.Require().Equal(plan.GetPlanID(), s.task.getPlanID()) - s.Require().Equal(plan.GetChannel(), s.task.getChannelName()) - s.Require().EqualValues(1, s.task.getCollection()) + s.Require().Equal(plan.GetPlanID(), s.task.GetPlanID()) + s.Require().Equal(plan.GetChannel(), s.task.GetChannelName()) + s.Require().EqualValues(1, s.task.GetCollection()) l0Segments := lo.Filter(s.task.plan.GetSegmentBinlogs(), func(s *datapb.CompactionSegmentBinlogs, _ int) bool { return s.Level == datapb.SegmentLevel_L0 @@ -757,7 +757,7 @@ func (s *LevelZeroCompactionTaskSuite) TestFailed() { s.task.plan = plan - _, err := s.task.compact() + _, err := s.task.Compact() s.Error(err) }) } diff --git a/internal/datanode/mock_test.go b/internal/datanode/mock_test.go index ab9a99ad8f2f3..92286c4bbfcb0 100644 --- a/internal/datanode/mock_test.go +++ b/internal/datanode/mock_test.go @@ -1188,57 +1188,6 @@ func genEmptyInsertData() *InsertData { } } -func genInsertDataWithExpiredTS() *InsertData { - return &InsertData{ - Data: map[int64]storage.FieldData{ - 0: &storage.Int64FieldData{ - Data: []int64{11, 22}, - }, - 1: &storage.Int64FieldData{ - Data: []int64{329749364736000000, 329500223078400000}, // 2009-11-10 23:00:00 +0000 UTC, 2009-10-31 23:00:00 +0000 UTC - }, - 100: &storage.FloatVectorFieldData{ - Data: []float32{1.0, 6.0, 7.0, 8.0}, - Dim: 2, - }, - 101: &storage.BinaryVectorFieldData{ - Data: []byte{0, 255, 255, 255, 128, 128, 128, 0}, - Dim: 32, - }, - 102: &storage.BoolFieldData{ - Data: []bool{true, false}, - }, - 103: &storage.Int8FieldData{ - Data: []int8{5, 6}, - }, - 104: &storage.Int16FieldData{ - Data: []int16{7, 8}, - }, - 105: &storage.Int32FieldData{ - Data: []int32{9, 10}, - }, - 106: &storage.Int64FieldData{ - Data: []int64{1, 2}, - }, - 107: &storage.FloatFieldData{ - Data: []float32{2.333, 2.334}, - }, - 108: &storage.DoubleFieldData{ - Data: []float64{3.333, 3.334}, - }, - 109: &storage.StringFieldData{ - Data: []string{"test1", "test2"}, - }, - }, - } -} - -func genTimestamp() typeutil.Timestamp { - // Generate birthday of Golang - gb := time.Date(2009, time.Month(11), 10, 23, 0, 0, 0, time.UTC) - return tsoutil.ComposeTSByTime(gb, 0) -} - func genTestTickler() *etcdTickler { return newEtcdTickler(0, "", nil, nil, 0) } diff --git a/internal/datanode/services.go b/internal/datanode/services.go index ad8cb3039e7ec..a3ce9e8df8d1d 100644 --- a/internal/datanode/services.go +++ b/internal/datanode/services.go @@ -27,6 +27,7 @@ import ( "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" "github.com/milvus-io/milvus-proto/go-api/v2/milvuspb" + "github.com/milvus-io/milvus/internal/datanode/compaction" "github.com/milvus-io/milvus/internal/datanode/importv2" "github.com/milvus-io/milvus/internal/datanode/io" "github.com/milvus-io/milvus/internal/datanode/metacache" @@ -235,10 +236,10 @@ func (node *DataNode) Compaction(ctx context.Context, req *datapb.CompactionPlan taskCtx := trace.ContextWithSpanContext(node.ctx, spanCtx)*/ taskCtx := tracer.Propagate(ctx, node.ctx) - var task compactor + var task compaction.Compactor + binlogIO := io.NewBinlogIO(node.chunkManager, getOrCreateIOPool()) switch req.GetType() { case datapb.CompactionType_Level0DeleteCompaction: - binlogIO := io.NewBinlogIO(node.chunkManager, getOrCreateIOPool()) task = newLevelZeroCompactionTask( taskCtx, binlogIO, @@ -249,8 +250,7 @@ func (node *DataNode) Compaction(ctx context.Context, req *datapb.CompactionPlan req, ) case datapb.CompactionType_MixCompaction: - binlogIO := io.NewBinlogIO(node.chunkManager, getOrCreateIOPool()) - task = newCompactionTask( + task = compaction.NewMixCompactionTask( taskCtx, binlogIO, ds.metacache, diff --git a/internal/datanode/services_test.go b/internal/datanode/services_test.go index 94eed7f5193e5..a834b1907d849 100644 --- a/internal/datanode/services_test.go +++ b/internal/datanode/services_test.go @@ -34,6 +34,7 @@ import ( allocator2 "github.com/milvus-io/milvus/internal/allocator" "github.com/milvus-io/milvus/internal/datanode/allocator" "github.com/milvus-io/milvus/internal/datanode/broker" + "github.com/milvus-io/milvus/internal/datanode/compaction" "github.com/milvus-io/milvus/internal/datanode/metacache" "github.com/milvus-io/milvus/internal/proto/datapb" "github.com/milvus-io/milvus/internal/proto/internalpb" @@ -159,8 +160,12 @@ func (s *DataNodeServicesSuite) TestGetComponentStates() { func (s *DataNodeServicesSuite) TestGetCompactionState() { s.Run("success", func() { - s.node.compactionExecutor.executing.Insert(int64(3), newMockCompactor(true)) - s.node.compactionExecutor.executing.Insert(int64(2), newMockCompactor(true)) + mockC := compaction.NewMockCompactor(s.T()) + s.node.compactionExecutor.executing.Insert(int64(3), mockC) + + mockC2 := compaction.NewMockCompactor(s.T()) + s.node.compactionExecutor.executing.Insert(int64(2), mockC2) + s.node.compactionExecutor.completed.Insert(int64(1), &datapb.CompactionPlanResult{ PlanID: 1, State: commonpb.CompactionState_Completed, @@ -168,9 +173,16 @@ func (s *DataNodeServicesSuite) TestGetCompactionState() { {SegmentID: 10}, }, }) + + s.node.compactionExecutor.completed.Insert(int64(4), &datapb.CompactionPlanResult{ + PlanID: 4, + Type: datapb.CompactionType_Level0DeleteCompaction, + State: commonpb.CompactionState_Completed, + }) + stat, err := s.node.GetCompactionState(s.ctx, nil) s.Assert().NoError(err) - s.Assert().Equal(3, len(stat.GetResults())) + s.Assert().Equal(4, len(stat.GetResults())) var mu sync.RWMutex cnt := 0 @@ -182,7 +194,7 @@ func (s *DataNodeServicesSuite) TestGetCompactionState() { } } mu.Lock() - s.Assert().Equal(1, cnt) + s.Assert().Equal(2, cnt) mu.Unlock() s.Assert().Equal(1, s.node.compactionExecutor.completed.Len()) diff --git a/internal/datanode/writebuffer/insert_buffer.go b/internal/datanode/writebuffer/insert_buffer.go index adc052d0013b2..417c258b34b44 100644 --- a/internal/datanode/writebuffer/insert_buffer.go +++ b/internal/datanode/writebuffer/insert_buffer.go @@ -67,10 +67,7 @@ func (b *BufferBase) MinTimestamp() typeutil.Timestamp { } func (b *BufferBase) GetTimeRange() *TimeRange { - return &TimeRange{ - timestampMin: b.TimestampFrom, - timestampMax: b.TimestampTo, - } + return NewTimeRange(b.TimestampFrom, b.TimestampTo) } type InsertBuffer struct { @@ -117,16 +114,16 @@ func (ib *InsertBuffer) Yield() *storage.InsertData { } func (ib *InsertBuffer) Buffer(inData *inData, startPos, endPos *msgpb.MsgPosition) int64 { - totalMemSize := int64(0) + bufferedSize := int64(0) for idx, data := range inData.data { storage.MergeInsertData(ib.buffer, data) tsData := inData.tsField[idx] // update buffer size ib.UpdateStatistics(int64(data.GetRowNum()), int64(data.GetMemorySize()), ib.getTimestampRange(tsData), startPos, endPos) - totalMemSize += int64(data.GetMemorySize()) + bufferedSize += int64(data.GetMemorySize()) } - return totalMemSize + return bufferedSize } func (ib *InsertBuffer) getTimestampRange(tsData *storage.Int64FieldData) TimeRange { diff --git a/internal/datanode/writebuffer/segment_buffer.go b/internal/datanode/writebuffer/segment_buffer.go index 8e14c3f4f869d..58ec2b4afda61 100644 --- a/internal/datanode/writebuffer/segment_buffer.go +++ b/internal/datanode/writebuffer/segment_buffer.go @@ -76,6 +76,21 @@ type TimeRange struct { timestampMax typeutil.Timestamp } +func NewTimeRange(min, max typeutil.Timestamp) *TimeRange { + return &TimeRange{ + timestampMin: min, + timestampMax: max, + } +} + +func (tr *TimeRange) GetMinTimestamp() typeutil.Timestamp { + return tr.timestampMin +} + +func (tr *TimeRange) GetMaxTimestamp() typeutil.Timestamp { + return tr.timestampMax +} + func (tr *TimeRange) Merge(other *TimeRange) { if other.timestampMin < tr.timestampMin { tr.timestampMin = other.timestampMin diff --git a/internal/metastore/kv/binlog/binlog.go b/internal/metastore/kv/binlog/binlog.go index 94e0c09cc73e6..8b1d47d0e5970 100644 --- a/internal/metastore/kv/binlog/binlog.go +++ b/internal/metastore/kv/binlog/binlog.go @@ -148,7 +148,7 @@ func DecompressBinLog(binlogType storage.BinlogType, collectionID, partitionID, for _, fieldBinlog := range fieldBinlogs { for _, binlog := range fieldBinlog.Binlogs { if binlog.GetLogPath() == "" { - path, err := buildLogPath(binlogType, collectionID, partitionID, + path, err := BuildLogPath(binlogType, collectionID, partitionID, segmentID, fieldBinlog.GetFieldID(), binlog.GetLogID()) if err != nil { return err @@ -161,7 +161,7 @@ func DecompressBinLog(binlogType storage.BinlogType, collectionID, partitionID, } // build a binlog path on the storage by metadata -func buildLogPath(binlogType storage.BinlogType, collectionID, partitionID, segmentID, fieldID, logID typeutil.UniqueID) (string, error) { +func BuildLogPath(binlogType storage.BinlogType, collectionID, partitionID, segmentID, fieldID, logID typeutil.UniqueID) (string, error) { chunkManagerRootPath := paramtable.Get().MinioCfg.RootPath.GetValue() if paramtable.Get().CommonCfg.StorageType.GetValue() == "local" { chunkManagerRootPath = paramtable.Get().LocalStorageCfg.Path.GetValue() diff --git a/internal/mocks/mock_datanode.go b/internal/mocks/mock_datanode.go index 3392028c1bd48..b6dc02ae27de5 100644 --- a/internal/mocks/mock_datanode.go +++ b/internal/mocks/mock_datanode.go @@ -64,8 +64,8 @@ type MockDataNode_CheckChannelOperationProgress_Call struct { } // CheckChannelOperationProgress is a helper method to define mock.On call -// - _a0 context.Context -// - _a1 *datapb.ChannelWatchInfo +// - _a0 context.Context +// - _a1 *datapb.ChannelWatchInfo func (_e *MockDataNode_Expecter) CheckChannelOperationProgress(_a0 interface{}, _a1 interface{}) *MockDataNode_CheckChannelOperationProgress_Call { return &MockDataNode_CheckChannelOperationProgress_Call{Call: _e.mock.On("CheckChannelOperationProgress", _a0, _a1)} } @@ -119,8 +119,8 @@ type MockDataNode_Compaction_Call struct { } // Compaction is a helper method to define mock.On call -// - _a0 context.Context -// - _a1 *datapb.CompactionPlan +// - _a0 context.Context +// - _a1 *datapb.CompactionPlan func (_e *MockDataNode_Expecter) Compaction(_a0 interface{}, _a1 interface{}) *MockDataNode_Compaction_Call { return &MockDataNode_Compaction_Call{Call: _e.mock.On("Compaction", _a0, _a1)} } @@ -174,8 +174,8 @@ type MockDataNode_DropImport_Call struct { } // DropImport is a helper method to define mock.On call -// - _a0 context.Context -// - _a1 *datapb.DropImportRequest +// - _a0 context.Context +// - _a1 *datapb.DropImportRequest func (_e *MockDataNode_Expecter) DropImport(_a0 interface{}, _a1 interface{}) *MockDataNode_DropImport_Call { return &MockDataNode_DropImport_Call{Call: _e.mock.On("DropImport", _a0, _a1)} } @@ -229,8 +229,8 @@ type MockDataNode_FlushChannels_Call struct { } // FlushChannels is a helper method to define mock.On call -// - _a0 context.Context -// - _a1 *datapb.FlushChannelsRequest +// - _a0 context.Context +// - _a1 *datapb.FlushChannelsRequest func (_e *MockDataNode_Expecter) FlushChannels(_a0 interface{}, _a1 interface{}) *MockDataNode_FlushChannels_Call { return &MockDataNode_FlushChannels_Call{Call: _e.mock.On("FlushChannels", _a0, _a1)} } @@ -284,8 +284,8 @@ type MockDataNode_FlushSegments_Call struct { } // FlushSegments is a helper method to define mock.On call -// - _a0 context.Context -// - _a1 *datapb.FlushSegmentsRequest +// - _a0 context.Context +// - _a1 *datapb.FlushSegmentsRequest func (_e *MockDataNode_Expecter) FlushSegments(_a0 interface{}, _a1 interface{}) *MockDataNode_FlushSegments_Call { return &MockDataNode_FlushSegments_Call{Call: _e.mock.On("FlushSegments", _a0, _a1)} } @@ -380,8 +380,8 @@ type MockDataNode_GetCompactionState_Call struct { } // GetCompactionState is a helper method to define mock.On call -// - _a0 context.Context -// - _a1 *datapb.CompactionStateRequest +// - _a0 context.Context +// - _a1 *datapb.CompactionStateRequest func (_e *MockDataNode_Expecter) GetCompactionState(_a0 interface{}, _a1 interface{}) *MockDataNode_GetCompactionState_Call { return &MockDataNode_GetCompactionState_Call{Call: _e.mock.On("GetCompactionState", _a0, _a1)} } @@ -435,8 +435,8 @@ type MockDataNode_GetComponentStates_Call struct { } // GetComponentStates is a helper method to define mock.On call -// - _a0 context.Context -// - _a1 *milvuspb.GetComponentStatesRequest +// - _a0 context.Context +// - _a1 *milvuspb.GetComponentStatesRequest func (_e *MockDataNode_Expecter) GetComponentStates(_a0 interface{}, _a1 interface{}) *MockDataNode_GetComponentStates_Call { return &MockDataNode_GetComponentStates_Call{Call: _e.mock.On("GetComponentStates", _a0, _a1)} } @@ -490,8 +490,8 @@ type MockDataNode_GetMetrics_Call struct { } // GetMetrics is a helper method to define mock.On call -// - _a0 context.Context -// - _a1 *milvuspb.GetMetricsRequest +// - _a0 context.Context +// - _a1 *milvuspb.GetMetricsRequest func (_e *MockDataNode_Expecter) GetMetrics(_a0 interface{}, _a1 interface{}) *MockDataNode_GetMetrics_Call { return &MockDataNode_GetMetrics_Call{Call: _e.mock.On("GetMetrics", _a0, _a1)} } @@ -627,8 +627,8 @@ type MockDataNode_GetStatisticsChannel_Call struct { } // GetStatisticsChannel is a helper method to define mock.On call -// - _a0 context.Context -// - _a1 *internalpb.GetStatisticsChannelRequest +// - _a0 context.Context +// - _a1 *internalpb.GetStatisticsChannelRequest func (_e *MockDataNode_Expecter) GetStatisticsChannel(_a0 interface{}, _a1 interface{}) *MockDataNode_GetStatisticsChannel_Call { return &MockDataNode_GetStatisticsChannel_Call{Call: _e.mock.On("GetStatisticsChannel", _a0, _a1)} } @@ -682,8 +682,8 @@ type MockDataNode_ImportV2_Call struct { } // ImportV2 is a helper method to define mock.On call -// - _a0 context.Context -// - _a1 *datapb.ImportRequest +// - _a0 context.Context +// - _a1 *datapb.ImportRequest func (_e *MockDataNode_Expecter) ImportV2(_a0 interface{}, _a1 interface{}) *MockDataNode_ImportV2_Call { return &MockDataNode_ImportV2_Call{Call: _e.mock.On("ImportV2", _a0, _a1)} } @@ -778,8 +778,8 @@ type MockDataNode_NotifyChannelOperation_Call struct { } // NotifyChannelOperation is a helper method to define mock.On call -// - _a0 context.Context -// - _a1 *datapb.ChannelOperationsRequest +// - _a0 context.Context +// - _a1 *datapb.ChannelOperationsRequest func (_e *MockDataNode_Expecter) NotifyChannelOperation(_a0 interface{}, _a1 interface{}) *MockDataNode_NotifyChannelOperation_Call { return &MockDataNode_NotifyChannelOperation_Call{Call: _e.mock.On("NotifyChannelOperation", _a0, _a1)} } @@ -833,8 +833,8 @@ type MockDataNode_PreImport_Call struct { } // PreImport is a helper method to define mock.On call -// - _a0 context.Context -// - _a1 *datapb.PreImportRequest +// - _a0 context.Context +// - _a1 *datapb.PreImportRequest func (_e *MockDataNode_Expecter) PreImport(_a0 interface{}, _a1 interface{}) *MockDataNode_PreImport_Call { return &MockDataNode_PreImport_Call{Call: _e.mock.On("PreImport", _a0, _a1)} } @@ -888,8 +888,8 @@ type MockDataNode_QueryImport_Call struct { } // QueryImport is a helper method to define mock.On call -// - _a0 context.Context -// - _a1 *datapb.QueryImportRequest +// - _a0 context.Context +// - _a1 *datapb.QueryImportRequest func (_e *MockDataNode_Expecter) QueryImport(_a0 interface{}, _a1 interface{}) *MockDataNode_QueryImport_Call { return &MockDataNode_QueryImport_Call{Call: _e.mock.On("QueryImport", _a0, _a1)} } @@ -943,8 +943,8 @@ type MockDataNode_QueryPreImport_Call struct { } // QueryPreImport is a helper method to define mock.On call -// - _a0 context.Context -// - _a1 *datapb.QueryPreImportRequest +// - _a0 context.Context +// - _a1 *datapb.QueryPreImportRequest func (_e *MockDataNode_Expecter) QueryPreImport(_a0 interface{}, _a1 interface{}) *MockDataNode_QueryPreImport_Call { return &MockDataNode_QueryPreImport_Call{Call: _e.mock.On("QueryPreImport", _a0, _a1)} } @@ -998,8 +998,8 @@ type MockDataNode_QuerySlot_Call struct { } // QuerySlot is a helper method to define mock.On call -// - _a0 context.Context -// - _a1 *datapb.QuerySlotRequest +// - _a0 context.Context +// - _a1 *datapb.QuerySlotRequest func (_e *MockDataNode_Expecter) QuerySlot(_a0 interface{}, _a1 interface{}) *MockDataNode_QuerySlot_Call { return &MockDataNode_QuerySlot_Call{Call: _e.mock.On("QuerySlot", _a0, _a1)} } @@ -1094,8 +1094,8 @@ type MockDataNode_ResendSegmentStats_Call struct { } // ResendSegmentStats is a helper method to define mock.On call -// - _a0 context.Context -// - _a1 *datapb.ResendSegmentStatsRequest +// - _a0 context.Context +// - _a1 *datapb.ResendSegmentStatsRequest func (_e *MockDataNode_Expecter) ResendSegmentStats(_a0 interface{}, _a1 interface{}) *MockDataNode_ResendSegmentStats_Call { return &MockDataNode_ResendSegmentStats_Call{Call: _e.mock.On("ResendSegmentStats", _a0, _a1)} } @@ -1128,7 +1128,7 @@ type MockDataNode_SetAddress_Call struct { } // SetAddress is a helper method to define mock.On call -// - address string +// - address string func (_e *MockDataNode_Expecter) SetAddress(address interface{}) *MockDataNode_SetAddress_Call { return &MockDataNode_SetAddress_Call{Call: _e.mock.On("SetAddress", address)} } @@ -1170,7 +1170,7 @@ type MockDataNode_SetDataCoordClient_Call struct { } // SetDataCoordClient is a helper method to define mock.On call -// - dataCoord types.DataCoordClient +// - dataCoord types.DataCoordClient func (_e *MockDataNode_Expecter) SetDataCoordClient(dataCoord interface{}) *MockDataNode_SetDataCoordClient_Call { return &MockDataNode_SetDataCoordClient_Call{Call: _e.mock.On("SetDataCoordClient", dataCoord)} } @@ -1203,7 +1203,7 @@ type MockDataNode_SetEtcdClient_Call struct { } // SetEtcdClient is a helper method to define mock.On call -// - etcdClient *clientv3.Client +// - etcdClient *clientv3.Client func (_e *MockDataNode_Expecter) SetEtcdClient(etcdClient interface{}) *MockDataNode_SetEtcdClient_Call { return &MockDataNode_SetEtcdClient_Call{Call: _e.mock.On("SetEtcdClient", etcdClient)} } @@ -1245,7 +1245,7 @@ type MockDataNode_SetRootCoordClient_Call struct { } // SetRootCoordClient is a helper method to define mock.On call -// - rootCoord types.RootCoordClient +// - rootCoord types.RootCoordClient func (_e *MockDataNode_Expecter) SetRootCoordClient(rootCoord interface{}) *MockDataNode_SetRootCoordClient_Call { return &MockDataNode_SetRootCoordClient_Call{Call: _e.mock.On("SetRootCoordClient", rootCoord)} } @@ -1299,8 +1299,8 @@ type MockDataNode_ShowConfigurations_Call struct { } // ShowConfigurations is a helper method to define mock.On call -// - _a0 context.Context -// - _a1 *internalpb.ShowConfigurationsRequest +// - _a0 context.Context +// - _a1 *internalpb.ShowConfigurationsRequest func (_e *MockDataNode_Expecter) ShowConfigurations(_a0 interface{}, _a1 interface{}) *MockDataNode_ShowConfigurations_Call { return &MockDataNode_ShowConfigurations_Call{Call: _e.mock.On("ShowConfigurations", _a0, _a1)} } @@ -1436,8 +1436,8 @@ type MockDataNode_SyncSegments_Call struct { } // SyncSegments is a helper method to define mock.On call -// - _a0 context.Context -// - _a1 *datapb.SyncSegmentsRequest +// - _a0 context.Context +// - _a1 *datapb.SyncSegmentsRequest func (_e *MockDataNode_Expecter) SyncSegments(_a0 interface{}, _a1 interface{}) *MockDataNode_SyncSegments_Call { return &MockDataNode_SyncSegments_Call{Call: _e.mock.On("SyncSegments", _a0, _a1)} } @@ -1470,7 +1470,7 @@ type MockDataNode_UpdateStateCode_Call struct { } // UpdateStateCode is a helper method to define mock.On call -// - stateCode commonpb.StateCode +// - stateCode commonpb.StateCode func (_e *MockDataNode_Expecter) UpdateStateCode(stateCode interface{}) *MockDataNode_UpdateStateCode_Call { return &MockDataNode_UpdateStateCode_Call{Call: _e.mock.On("UpdateStateCode", stateCode)} } @@ -1524,8 +1524,8 @@ type MockDataNode_WatchDmChannels_Call struct { } // WatchDmChannels is a helper method to define mock.On call -// - _a0 context.Context -// - _a1 *datapb.WatchDmChannelsRequest +// - _a0 context.Context +// - _a1 *datapb.WatchDmChannelsRequest func (_e *MockDataNode_Expecter) WatchDmChannels(_a0 interface{}, _a1 interface{}) *MockDataNode_WatchDmChannels_Call { return &MockDataNode_WatchDmChannels_Call{Call: _e.mock.On("WatchDmChannels", _a0, _a1)} } diff --git a/internal/mocks/mock_datanode_client.go b/internal/mocks/mock_datanode_client.go index 78f7aeec32131..f16ff8d1705bb 100644 --- a/internal/mocks/mock_datanode_client.go +++ b/internal/mocks/mock_datanode_client.go @@ -70,9 +70,9 @@ type MockDataNodeClient_CheckChannelOperationProgress_Call struct { } // CheckChannelOperationProgress is a helper method to define mock.On call -// - ctx context.Context -// - in *datapb.ChannelWatchInfo -// - opts ...grpc.CallOption +// - ctx context.Context +// - in *datapb.ChannelWatchInfo +// - opts ...grpc.CallOption func (_e *MockDataNodeClient_Expecter) CheckChannelOperationProgress(ctx interface{}, in interface{}, opts ...interface{}) *MockDataNodeClient_CheckChannelOperationProgress_Call { return &MockDataNodeClient_CheckChannelOperationProgress_Call{Call: _e.mock.On("CheckChannelOperationProgress", append([]interface{}{ctx, in}, opts...)...)} @@ -181,9 +181,9 @@ type MockDataNodeClient_Compaction_Call struct { } // Compaction is a helper method to define mock.On call -// - ctx context.Context -// - in *datapb.CompactionPlan -// - opts ...grpc.CallOption +// - ctx context.Context +// - in *datapb.CompactionPlan +// - opts ...grpc.CallOption func (_e *MockDataNodeClient_Expecter) Compaction(ctx interface{}, in interface{}, opts ...interface{}) *MockDataNodeClient_Compaction_Call { return &MockDataNodeClient_Compaction_Call{Call: _e.mock.On("Compaction", append([]interface{}{ctx, in}, opts...)...)} @@ -251,9 +251,9 @@ type MockDataNodeClient_DropImport_Call struct { } // DropImport is a helper method to define mock.On call -// - ctx context.Context -// - in *datapb.DropImportRequest -// - opts ...grpc.CallOption +// - ctx context.Context +// - in *datapb.DropImportRequest +// - opts ...grpc.CallOption func (_e *MockDataNodeClient_Expecter) DropImport(ctx interface{}, in interface{}, opts ...interface{}) *MockDataNodeClient_DropImport_Call { return &MockDataNodeClient_DropImport_Call{Call: _e.mock.On("DropImport", append([]interface{}{ctx, in}, opts...)...)} @@ -321,9 +321,9 @@ type MockDataNodeClient_FlushChannels_Call struct { } // FlushChannels is a helper method to define mock.On call -// - ctx context.Context -// - in *datapb.FlushChannelsRequest -// - opts ...grpc.CallOption +// - ctx context.Context +// - in *datapb.FlushChannelsRequest +// - opts ...grpc.CallOption func (_e *MockDataNodeClient_Expecter) FlushChannels(ctx interface{}, in interface{}, opts ...interface{}) *MockDataNodeClient_FlushChannels_Call { return &MockDataNodeClient_FlushChannels_Call{Call: _e.mock.On("FlushChannels", append([]interface{}{ctx, in}, opts...)...)} @@ -391,9 +391,9 @@ type MockDataNodeClient_FlushSegments_Call struct { } // FlushSegments is a helper method to define mock.On call -// - ctx context.Context -// - in *datapb.FlushSegmentsRequest -// - opts ...grpc.CallOption +// - ctx context.Context +// - in *datapb.FlushSegmentsRequest +// - opts ...grpc.CallOption func (_e *MockDataNodeClient_Expecter) FlushSegments(ctx interface{}, in interface{}, opts ...interface{}) *MockDataNodeClient_FlushSegments_Call { return &MockDataNodeClient_FlushSegments_Call{Call: _e.mock.On("FlushSegments", append([]interface{}{ctx, in}, opts...)...)} @@ -461,9 +461,9 @@ type MockDataNodeClient_GetCompactionState_Call struct { } // GetCompactionState is a helper method to define mock.On call -// - ctx context.Context -// - in *datapb.CompactionStateRequest -// - opts ...grpc.CallOption +// - ctx context.Context +// - in *datapb.CompactionStateRequest +// - opts ...grpc.CallOption func (_e *MockDataNodeClient_Expecter) GetCompactionState(ctx interface{}, in interface{}, opts ...interface{}) *MockDataNodeClient_GetCompactionState_Call { return &MockDataNodeClient_GetCompactionState_Call{Call: _e.mock.On("GetCompactionState", append([]interface{}{ctx, in}, opts...)...)} @@ -531,9 +531,9 @@ type MockDataNodeClient_GetComponentStates_Call struct { } // GetComponentStates is a helper method to define mock.On call -// - ctx context.Context -// - in *milvuspb.GetComponentStatesRequest -// - opts ...grpc.CallOption +// - ctx context.Context +// - in *milvuspb.GetComponentStatesRequest +// - opts ...grpc.CallOption func (_e *MockDataNodeClient_Expecter) GetComponentStates(ctx interface{}, in interface{}, opts ...interface{}) *MockDataNodeClient_GetComponentStates_Call { return &MockDataNodeClient_GetComponentStates_Call{Call: _e.mock.On("GetComponentStates", append([]interface{}{ctx, in}, opts...)...)} @@ -601,9 +601,9 @@ type MockDataNodeClient_GetMetrics_Call struct { } // GetMetrics is a helper method to define mock.On call -// - ctx context.Context -// - in *milvuspb.GetMetricsRequest -// - opts ...grpc.CallOption +// - ctx context.Context +// - in *milvuspb.GetMetricsRequest +// - opts ...grpc.CallOption func (_e *MockDataNodeClient_Expecter) GetMetrics(ctx interface{}, in interface{}, opts ...interface{}) *MockDataNodeClient_GetMetrics_Call { return &MockDataNodeClient_GetMetrics_Call{Call: _e.mock.On("GetMetrics", append([]interface{}{ctx, in}, opts...)...)} @@ -671,9 +671,9 @@ type MockDataNodeClient_GetStatisticsChannel_Call struct { } // GetStatisticsChannel is a helper method to define mock.On call -// - ctx context.Context -// - in *internalpb.GetStatisticsChannelRequest -// - opts ...grpc.CallOption +// - ctx context.Context +// - in *internalpb.GetStatisticsChannelRequest +// - opts ...grpc.CallOption func (_e *MockDataNodeClient_Expecter) GetStatisticsChannel(ctx interface{}, in interface{}, opts ...interface{}) *MockDataNodeClient_GetStatisticsChannel_Call { return &MockDataNodeClient_GetStatisticsChannel_Call{Call: _e.mock.On("GetStatisticsChannel", append([]interface{}{ctx, in}, opts...)...)} @@ -741,9 +741,9 @@ type MockDataNodeClient_ImportV2_Call struct { } // ImportV2 is a helper method to define mock.On call -// - ctx context.Context -// - in *datapb.ImportRequest -// - opts ...grpc.CallOption +// - ctx context.Context +// - in *datapb.ImportRequest +// - opts ...grpc.CallOption func (_e *MockDataNodeClient_Expecter) ImportV2(ctx interface{}, in interface{}, opts ...interface{}) *MockDataNodeClient_ImportV2_Call { return &MockDataNodeClient_ImportV2_Call{Call: _e.mock.On("ImportV2", append([]interface{}{ctx, in}, opts...)...)} @@ -811,9 +811,9 @@ type MockDataNodeClient_NotifyChannelOperation_Call struct { } // NotifyChannelOperation is a helper method to define mock.On call -// - ctx context.Context -// - in *datapb.ChannelOperationsRequest -// - opts ...grpc.CallOption +// - ctx context.Context +// - in *datapb.ChannelOperationsRequest +// - opts ...grpc.CallOption func (_e *MockDataNodeClient_Expecter) NotifyChannelOperation(ctx interface{}, in interface{}, opts ...interface{}) *MockDataNodeClient_NotifyChannelOperation_Call { return &MockDataNodeClient_NotifyChannelOperation_Call{Call: _e.mock.On("NotifyChannelOperation", append([]interface{}{ctx, in}, opts...)...)} @@ -881,9 +881,9 @@ type MockDataNodeClient_PreImport_Call struct { } // PreImport is a helper method to define mock.On call -// - ctx context.Context -// - in *datapb.PreImportRequest -// - opts ...grpc.CallOption +// - ctx context.Context +// - in *datapb.PreImportRequest +// - opts ...grpc.CallOption func (_e *MockDataNodeClient_Expecter) PreImport(ctx interface{}, in interface{}, opts ...interface{}) *MockDataNodeClient_PreImport_Call { return &MockDataNodeClient_PreImport_Call{Call: _e.mock.On("PreImport", append([]interface{}{ctx, in}, opts...)...)} @@ -951,9 +951,9 @@ type MockDataNodeClient_QueryImport_Call struct { } // QueryImport is a helper method to define mock.On call -// - ctx context.Context -// - in *datapb.QueryImportRequest -// - opts ...grpc.CallOption +// - ctx context.Context +// - in *datapb.QueryImportRequest +// - opts ...grpc.CallOption func (_e *MockDataNodeClient_Expecter) QueryImport(ctx interface{}, in interface{}, opts ...interface{}) *MockDataNodeClient_QueryImport_Call { return &MockDataNodeClient_QueryImport_Call{Call: _e.mock.On("QueryImport", append([]interface{}{ctx, in}, opts...)...)} @@ -1021,9 +1021,9 @@ type MockDataNodeClient_QueryPreImport_Call struct { } // QueryPreImport is a helper method to define mock.On call -// - ctx context.Context -// - in *datapb.QueryPreImportRequest -// - opts ...grpc.CallOption +// - ctx context.Context +// - in *datapb.QueryPreImportRequest +// - opts ...grpc.CallOption func (_e *MockDataNodeClient_Expecter) QueryPreImport(ctx interface{}, in interface{}, opts ...interface{}) *MockDataNodeClient_QueryPreImport_Call { return &MockDataNodeClient_QueryPreImport_Call{Call: _e.mock.On("QueryPreImport", append([]interface{}{ctx, in}, opts...)...)} @@ -1091,9 +1091,9 @@ type MockDataNodeClient_QuerySlot_Call struct { } // QuerySlot is a helper method to define mock.On call -// - ctx context.Context -// - in *datapb.QuerySlotRequest -// - opts ...grpc.CallOption +// - ctx context.Context +// - in *datapb.QuerySlotRequest +// - opts ...grpc.CallOption func (_e *MockDataNodeClient_Expecter) QuerySlot(ctx interface{}, in interface{}, opts ...interface{}) *MockDataNodeClient_QuerySlot_Call { return &MockDataNodeClient_QuerySlot_Call{Call: _e.mock.On("QuerySlot", append([]interface{}{ctx, in}, opts...)...)} @@ -1161,9 +1161,9 @@ type MockDataNodeClient_ResendSegmentStats_Call struct { } // ResendSegmentStats is a helper method to define mock.On call -// - ctx context.Context -// - in *datapb.ResendSegmentStatsRequest -// - opts ...grpc.CallOption +// - ctx context.Context +// - in *datapb.ResendSegmentStatsRequest +// - opts ...grpc.CallOption func (_e *MockDataNodeClient_Expecter) ResendSegmentStats(ctx interface{}, in interface{}, opts ...interface{}) *MockDataNodeClient_ResendSegmentStats_Call { return &MockDataNodeClient_ResendSegmentStats_Call{Call: _e.mock.On("ResendSegmentStats", append([]interface{}{ctx, in}, opts...)...)} @@ -1231,9 +1231,9 @@ type MockDataNodeClient_ShowConfigurations_Call struct { } // ShowConfigurations is a helper method to define mock.On call -// - ctx context.Context -// - in *internalpb.ShowConfigurationsRequest -// - opts ...grpc.CallOption +// - ctx context.Context +// - in *internalpb.ShowConfigurationsRequest +// - opts ...grpc.CallOption func (_e *MockDataNodeClient_Expecter) ShowConfigurations(ctx interface{}, in interface{}, opts ...interface{}) *MockDataNodeClient_ShowConfigurations_Call { return &MockDataNodeClient_ShowConfigurations_Call{Call: _e.mock.On("ShowConfigurations", append([]interface{}{ctx, in}, opts...)...)} @@ -1301,9 +1301,9 @@ type MockDataNodeClient_SyncSegments_Call struct { } // SyncSegments is a helper method to define mock.On call -// - ctx context.Context -// - in *datapb.SyncSegmentsRequest -// - opts ...grpc.CallOption +// - ctx context.Context +// - in *datapb.SyncSegmentsRequest +// - opts ...grpc.CallOption func (_e *MockDataNodeClient_Expecter) SyncSegments(ctx interface{}, in interface{}, opts ...interface{}) *MockDataNodeClient_SyncSegments_Call { return &MockDataNodeClient_SyncSegments_Call{Call: _e.mock.On("SyncSegments", append([]interface{}{ctx, in}, opts...)...)} @@ -1371,9 +1371,9 @@ type MockDataNodeClient_WatchDmChannels_Call struct { } // WatchDmChannels is a helper method to define mock.On call -// - ctx context.Context -// - in *datapb.WatchDmChannelsRequest -// - opts ...grpc.CallOption +// - ctx context.Context +// - in *datapb.WatchDmChannelsRequest +// - opts ...grpc.CallOption func (_e *MockDataNodeClient_Expecter) WatchDmChannels(ctx interface{}, in interface{}, opts ...interface{}) *MockDataNodeClient_WatchDmChannels_Call { return &MockDataNodeClient_WatchDmChannels_Call{Call: _e.mock.On("WatchDmChannels", append([]interface{}{ctx, in}, opts...)...)} From be77ceba84580f6f0d0bd47a523592d5de5892da Mon Sep 17 00:00:00 2001 From: "cai.zhang" Date: Thu, 23 May 2024 10:31:40 +0800 Subject: [PATCH 040/126] enhance: Use proto for passing info in cgo (#33184) issue: #33183 --------- Signed-off-by: Cai Zhang --- internal/core/src/indexbuilder/index_c.cpp | 182 +++++++++---- internal/core/src/indexbuilder/index_c.h | 8 +- internal/core/unittest/test_index_wrapper.cpp | 2 +- internal/datacoord/index_builder.go | 40 +-- internal/datacoord/index_builder_test.go | 75 ++++- internal/indexnode/indexnode_service.go | 2 + internal/indexnode/task.go | 256 +++++++++--------- internal/indexnode/task_test.go | 14 +- internal/indexnode/util.go | 12 + internal/indexnode/util_test.go | 41 +++ internal/proto/index_cgo_msg.proto | 50 ++++ internal/proto/index_coord.proto | 4 + internal/util/indexcgowrapper/index.go | 25 +- 13 files changed, 501 insertions(+), 210 deletions(-) create mode 100644 internal/indexnode/util_test.go diff --git a/internal/core/src/indexbuilder/index_c.cpp b/internal/core/src/indexbuilder/index_c.cpp index 28a629052cad7..ae319cc26d61f 100644 --- a/internal/core/src/indexbuilder/index_c.cpp +++ b/internal/core/src/indexbuilder/index_c.cpp @@ -84,29 +84,95 @@ CreateIndexV0(enum CDataType dtype, return status; } +milvus::storage::StorageConfig +get_storage_config(const milvus::proto::indexcgo::StorageConfig& config) { + auto storage_config = milvus::storage::StorageConfig(); + storage_config.address = std::string(config.address()); + storage_config.bucket_name = std::string(config.bucket_name()); + storage_config.access_key_id = std::string(config.access_keyid()); + storage_config.access_key_value = std::string(config.secret_access_key()); + storage_config.root_path = std::string(config.root_path()); + storage_config.storage_type = std::string(config.storage_type()); + storage_config.cloud_provider = std::string(config.cloud_provider()); + storage_config.iam_endpoint = std::string(config.iamendpoint()); + storage_config.cloud_provider = std::string(config.cloud_provider()); + storage_config.useSSL = config.usessl(); + storage_config.sslCACert = config.sslcacert(); + storage_config.useIAM = config.useiam(); + storage_config.region = config.region(); + storage_config.useVirtualHost = config.use_virtual_host(); + storage_config.requestTimeoutMs = config.request_timeout_ms(); + return storage_config; +} + +milvus::OptFieldT +get_opt_field(const ::google::protobuf::RepeatedPtrField< + milvus::proto::indexcgo::OptionalFieldInfo>& field_infos) { + milvus::OptFieldT opt_fields_map; + for (const auto& field_info : field_infos) { + auto field_id = field_info.fieldid(); + if (opt_fields_map.find(field_id) == opt_fields_map.end()) { + opt_fields_map[field_id] = { + field_info.field_name(), + static_cast(field_info.field_type()), + {}}; + } + for (const auto& str : field_info.data_paths()) { + std::get<2>(opt_fields_map[field_id]).emplace_back(str); + } + } + + return opt_fields_map; +} + +milvus::Config +get_config(std::unique_ptr& info) { + milvus::Config config; + for (auto i = 0; i < info->index_params().size(); ++i) { + const auto& param = info->index_params(i); + config[param.key()] = param.value(); + } + + for (auto i = 0; i < info->type_params().size(); ++i) { + const auto& param = info->type_params(i); + config[param.key()] = param.value(); + } + + config["insert_files"] = info->insert_files(); + if (info->opt_fields().size()) { + config["opt_fields"] = get_opt_field(info->opt_fields()); + } + + return config; +} + CStatus -CreateIndex(CIndex* res_index, CBuildIndexInfo c_build_index_info) { +CreateIndex(CIndex* res_index, + const uint8_t* serialized_build_index_info, + const uint64_t len) { try { - auto build_index_info = (BuildIndexInfo*)c_build_index_info; - auto field_type = build_index_info->field_type; + auto build_index_info = + std::make_unique(); + auto res = + build_index_info->ParseFromArray(serialized_build_index_info, len); + AssertInfo(res, "Unmarshall build index info failed"); - milvus::index::CreateIndexInfo index_info; - index_info.field_type = build_index_info->field_type; + auto field_type = + static_cast(build_index_info->field_schema().data_type()); - auto& config = build_index_info->config; - config["insert_files"] = build_index_info->insert_files; - if (build_index_info->opt_fields.size()) { - config["opt_fields"] = build_index_info->opt_fields; - } + milvus::index::CreateIndexInfo index_info; + index_info.field_type = field_type; + auto storage_config = + get_storage_config(build_index_info->storage_config()); + auto config = get_config(build_index_info); // get index type auto index_type = milvus::index::GetValueFromConfig( config, "index_type"); AssertInfo(index_type.has_value(), "index type is empty"); index_info.index_type = index_type.value(); - auto engine_version = build_index_info->index_engine_version; - + auto engine_version = build_index_info->current_index_version(); index_info.index_engine_version = engine_version; config[milvus::index::INDEX_ENGINE_VERSION] = std::to_string(engine_version); @@ -121,24 +187,30 @@ CreateIndex(CIndex* res_index, CBuildIndexInfo c_build_index_info) { // init file manager milvus::storage::FieldDataMeta field_meta{ - build_index_info->collection_id, - build_index_info->partition_id, - build_index_info->segment_id, - build_index_info->field_id}; - - milvus::storage::IndexMeta index_meta{build_index_info->segment_id, - build_index_info->field_id, - build_index_info->index_build_id, - build_index_info->index_version}; - auto chunk_manager = milvus::storage::CreateChunkManager( - build_index_info->storage_config); + build_index_info->collectionid(), + build_index_info->partitionid(), + build_index_info->segmentid(), + build_index_info->field_schema().fieldid()}; + + milvus::storage::IndexMeta index_meta{ + build_index_info->segmentid(), + build_index_info->field_schema().fieldid(), + build_index_info->buildid(), + build_index_info->index_version(), + "", + build_index_info->field_schema().name(), + field_type, + build_index_info->dim(), + }; + auto chunk_manager = + milvus::storage::CreateChunkManager(storage_config); milvus::storage::FileManagerContext fileManagerContext( field_meta, index_meta, chunk_manager); auto index = milvus::indexbuilder::IndexFactory::GetInstance().CreateIndex( - build_index_info->field_type, config, fileManagerContext); + field_type, config, fileManagerContext); index->Build(); *res_index = index.release(); auto status = CStatus(); @@ -159,22 +231,32 @@ CreateIndex(CIndex* res_index, CBuildIndexInfo c_build_index_info) { } CStatus -CreateIndexV2(CIndex* res_index, CBuildIndexInfo c_build_index_info) { +CreateIndexV2(CIndex* res_index, + const uint8_t* serialized_build_index_info, + const uint64_t len) { try { - auto build_index_info = (BuildIndexInfo*)c_build_index_info; - auto field_type = build_index_info->field_type; + auto build_index_info = + std::make_unique(); + auto res = + build_index_info->ParseFromArray(serialized_build_index_info, len); + AssertInfo(res, "Unmarshall build index info failed"); + auto field_type = + static_cast(build_index_info->field_schema().data_type()); + milvus::index::CreateIndexInfo index_info; - index_info.field_type = build_index_info->field_type; - index_info.dim = build_index_info->dim; + index_info.field_type = field_type; + index_info.dim = build_index_info->dim(); - auto& config = build_index_info->config; + auto storage_config = + get_storage_config(build_index_info->storage_config()); + auto config = get_config(build_index_info); // get index type auto index_type = milvus::index::GetValueFromConfig( config, "index_type"); AssertInfo(index_type.has_value(), "index type is empty"); index_info.index_type = index_type.value(); - auto engine_version = build_index_info->index_engine_version; + auto engine_version = build_index_info->current_index_version(); index_info.index_engine_version = engine_version; config[milvus::index::INDEX_ENGINE_VERSION] = std::to_string(engine_version); @@ -188,39 +270,39 @@ CreateIndexV2(CIndex* res_index, CBuildIndexInfo c_build_index_info) { } milvus::storage::FieldDataMeta field_meta{ - build_index_info->collection_id, - build_index_info->partition_id, - build_index_info->segment_id, - build_index_info->field_id}; + build_index_info->collectionid(), + build_index_info->partitionid(), + build_index_info->segmentid(), + build_index_info->field_schema().fieldid()}; milvus::storage::IndexMeta index_meta{ - build_index_info->segment_id, - build_index_info->field_id, - build_index_info->index_build_id, - build_index_info->index_version, - build_index_info->field_name, + build_index_info->segmentid(), + build_index_info->field_schema().fieldid(), + build_index_info->buildid(), + build_index_info->index_version(), "", - build_index_info->field_type, - build_index_info->dim, + build_index_info->field_schema().name(), + field_type, + build_index_info->dim(), }; auto store_space = milvus_storage::Space::Open( - build_index_info->data_store_path, + build_index_info->store_path(), milvus_storage::Options{nullptr, - build_index_info->data_store_version}); + build_index_info->store_version()}); AssertInfo(store_space.ok() && store_space.has_value(), "create space failed: {}", store_space.status().ToString()); auto index_space = milvus_storage::Space::Open( - build_index_info->index_store_path, + build_index_info->index_store_path(), milvus_storage::Options{.schema = store_space.value()->schema()}); AssertInfo(index_space.ok() && index_space.has_value(), "create space failed: {}", index_space.status().ToString()); LOG_INFO("init space success"); - auto chunk_manager = milvus::storage::CreateChunkManager( - build_index_info->storage_config); + auto chunk_manager = + milvus::storage::CreateChunkManager(storage_config); milvus::storage::FileManagerContext fileManagerContext( field_meta, index_meta, @@ -229,9 +311,9 @@ CreateIndexV2(CIndex* res_index, CBuildIndexInfo c_build_index_info) { auto index = milvus::indexbuilder::IndexFactory::GetInstance().CreateIndex( - build_index_info->field_type, - build_index_info->field_name, - build_index_info->dim, + field_type, + build_index_info->field_schema().name(), + build_index_info->dim(), config, fileManagerContext, std::move(store_space.value())); diff --git a/internal/core/src/indexbuilder/index_c.h b/internal/core/src/indexbuilder/index_c.h index 16cd76e4531ce..53ce5552fef0a 100644 --- a/internal/core/src/indexbuilder/index_c.h +++ b/internal/core/src/indexbuilder/index_c.h @@ -28,7 +28,9 @@ CreateIndexV0(enum CDataType dtype, CIndex* res_index); CStatus -CreateIndex(CIndex* res_index, CBuildIndexInfo c_build_index_info); +CreateIndex(CIndex* res_index, + const uint8_t* serialized_build_index_info, + const uint64_t len); CStatus DeleteIndex(CIndex index); @@ -130,7 +132,9 @@ CStatus SerializeIndexAndUpLoadV2(CIndex index, CBinarySet* c_binary_set); CStatus -CreateIndexV2(CIndex* res_index, CBuildIndexInfo c_build_index_info); +CreateIndexV2(CIndex* res_index, + const uint8_t* serialized_build_index_info, + const uint64_t len); CStatus AppendIndexStorageInfo(CBuildIndexInfo c_build_index_info, diff --git a/internal/core/unittest/test_index_wrapper.cpp b/internal/core/unittest/test_index_wrapper.cpp index 39f6841957dc4..79581bc96947b 100644 --- a/internal/core/unittest/test_index_wrapper.cpp +++ b/internal/core/unittest/test_index_wrapper.cpp @@ -23,7 +23,7 @@ using namespace milvus; using namespace milvus::segcore; -using namespace milvus::proto::indexcgo; +using namespace milvus::proto; using Param = std::pair; diff --git a/internal/datacoord/index_builder.go b/internal/datacoord/index_builder.go index c56554561371c..9a83f2384cc98 100644 --- a/internal/datacoord/index_builder.go +++ b/internal/datacoord/index_builder.go @@ -348,28 +348,29 @@ func (ib *indexBuilder) process(buildID UniqueID) bool { } } var req *indexpb.CreateJobRequest - if Params.CommonCfg.EnableStorageV2.GetAsBool() { - collectionInfo, err := ib.handler.GetCollection(ib.ctx, segment.GetCollectionID()) - if err != nil { - log.Info("index builder get collection info failed", zap.Int64("collectionID", segment.GetCollectionID()), zap.Error(err)) - return false - } + collectionInfo, err := ib.handler.GetCollection(ib.ctx, segment.GetCollectionID()) + if err != nil { + log.Ctx(ib.ctx).Info("index builder get collection info failed", zap.Int64("collectionID", segment.GetCollectionID()), zap.Error(err)) + return false + } - schema := collectionInfo.Schema - var field *schemapb.FieldSchema + schema := collectionInfo.Schema + var field *schemapb.FieldSchema - for _, f := range schema.Fields { - if f.FieldID == fieldID { - field = f - break - } - } - - dim, err := storage.GetDimFromParams(field.TypeParams) - if err != nil { - return false + for _, f := range schema.Fields { + if f.FieldID == fieldID { + field = f + break } + } + dim, err := storage.GetDimFromParams(field.TypeParams) + if err != nil { + log.Ctx(ib.ctx).Warn("failed to get dim from field type params", + zap.String("field type", field.GetDataType().String()), zap.Error(err)) + // don't return, maybe field is scalar field or sparseFloatVector + } + if Params.CommonCfg.EnableStorageV2.GetAsBool() { storePath, err := itypeutil.GetStorageURI(params.Params.CommonCfg.StorageScheme.GetValue(), params.Params.CommonCfg.StoragePathPrefix.GetValue(), segment.GetID()) if err != nil { log.Ctx(ib.ctx).Warn("failed to get storage uri", zap.Error(err)) @@ -403,6 +404,7 @@ func (ib *indexBuilder) process(buildID UniqueID) bool { CurrentIndexVersion: ib.indexEngineVersionManager.GetCurrentIndexEngineVersion(), DataIds: binlogIDs, OptionalScalarFields: optionalFields, + Field: field, } } else { req = &indexpb.CreateJobRequest{ @@ -421,6 +423,8 @@ func (ib *indexBuilder) process(buildID UniqueID) bool { SegmentID: segment.GetID(), FieldID: fieldID, OptionalScalarFields: optionalFields, + Dim: int64(dim), + Field: field, } } diff --git a/internal/datacoord/index_builder_test.go b/internal/datacoord/index_builder_test.go index 46d8c7fe3f43e..9488c70f5e818 100644 --- a/internal/datacoord/index_builder_test.go +++ b/internal/datacoord/index_builder_test.go @@ -675,7 +675,30 @@ func TestIndexBuilder(t *testing.T) { chunkManager := &mocks.ChunkManager{} chunkManager.EXPECT().RootPath().Return("root") - ib := newIndexBuilder(ctx, mt, nodeManager, chunkManager, newIndexEngineVersionManager(), nil) + handler := NewNMockHandler(t) + handler.EXPECT().GetCollection(mock.Anything, mock.Anything).Return(&collectionInfo{ + ID: collID, + Schema: &schemapb.CollectionSchema{ + Name: "coll", + Fields: []*schemapb.FieldSchema{ + { + FieldID: fieldID, + Name: "vec", + DataType: schemapb.DataType_FloatVector, + TypeParams: []*commonpb.KeyValuePair{ + { + Key: "dim", + Value: "128", + }, + }, + }, + }, + EnableDynamicField: false, + Properties: nil, + }, + }, nil) + + ib := newIndexBuilder(ctx, mt, nodeManager, chunkManager, newIndexEngineVersionManager(), handler) assert.Equal(t, 6, len(ib.tasks)) assert.Equal(t, indexTaskInit, ib.tasks[buildID]) @@ -741,6 +764,30 @@ func TestIndexBuilder_Error(t *testing.T) { chunkManager := &mocks.ChunkManager{} chunkManager.EXPECT().RootPath().Return("root") + + handler := NewNMockHandler(t) + handler.EXPECT().GetCollection(mock.Anything, mock.Anything).Return(&collectionInfo{ + ID: collID, + Schema: &schemapb.CollectionSchema{ + Name: "coll", + Fields: []*schemapb.FieldSchema{ + { + FieldID: fieldID, + Name: "vec", + DataType: schemapb.DataType_FloatVector, + TypeParams: []*commonpb.KeyValuePair{ + { + Key: "dim", + Value: "128", + }, + }, + }, + }, + EnableDynamicField: false, + Properties: nil, + }, + }, nil) + ib := &indexBuilder{ ctx: context.Background(), tasks: map[int64]indexTaskState{ @@ -749,6 +796,7 @@ func TestIndexBuilder_Error(t *testing.T) { meta: createMetaTable(ec), chunkManager: chunkManager, indexEngineVersionManager: newIndexEngineVersionManager(), + handler: handler, } t.Run("meta not exist", func(t *testing.T) { @@ -1414,9 +1462,32 @@ func TestVecIndexWithOptionalScalarField(t *testing.T) { mt.collections[collID].Schema.Fields[1].DataType = schemapb.DataType_VarChar } + handler := NewNMockHandler(t) + handler.EXPECT().GetCollection(mock.Anything, mock.Anything).Return(&collectionInfo{ + ID: collID, + Schema: &schemapb.CollectionSchema{ + Name: "coll", + Fields: []*schemapb.FieldSchema{ + { + FieldID: fieldID, + Name: "vec", + DataType: schemapb.DataType_FloatVector, + TypeParams: []*commonpb.KeyValuePair{ + { + Key: "dim", + Value: "128", + }, + }, + }, + }, + EnableDynamicField: false, + Properties: nil, + }, + }, nil) + paramtable.Get().CommonCfg.EnableMaterializedView.SwapTempValue("true") defer paramtable.Get().CommonCfg.EnableMaterializedView.SwapTempValue("false") - ib := newIndexBuilder(ctx, &mt, nodeManager, cm, newIndexEngineVersionManager(), nil) + ib := newIndexBuilder(ctx, &mt, nodeManager, cm, newIndexEngineVersionManager(), handler) t.Run("success to get opt field on startup", func(t *testing.T) { ic.EXPECT().CreateJob(mock.Anything, mock.Anything, mock.Anything, mock.Anything).RunAndReturn( diff --git a/internal/indexnode/indexnode_service.go b/internal/indexnode/indexnode_service.go index a690e35e4a10a..fb9d5a0cc19a1 100644 --- a/internal/indexnode/indexnode_service.go +++ b/internal/indexnode/indexnode_service.go @@ -55,6 +55,8 @@ func (i *IndexNode) CreateJob(ctx context.Context, req *indexpb.CreateJobRequest defer i.lifetime.Done() log.Info("IndexNode building index ...", zap.Int64("collectionID", req.GetCollectionID()), + zap.Int64("partitionID", req.GetPartitionID()), + zap.Int64("segmentID", req.GetSegmentID()), zap.Int64("indexID", req.GetIndexID()), zap.String("indexName", req.GetIndexName()), zap.String("indexFilePrefix", req.GetIndexFilePrefix()), diff --git a/internal/indexnode/task.go b/internal/indexnode/task.go index b14343900d99c..54c8b3fe45a66 100644 --- a/internal/indexnode/task.go +++ b/internal/indexnode/task.go @@ -18,7 +18,6 @@ package indexnode import ( "context" - "encoding/json" "fmt" "runtime/debug" "strconv" @@ -30,6 +29,7 @@ import ( "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" + "github.com/milvus-io/milvus/internal/proto/indexcgopb" "github.com/milvus-io/milvus/internal/proto/indexpb" "github.com/milvus-io/milvus/internal/storage" "github.com/milvus-io/milvus/internal/util/indexcgowrapper" @@ -84,12 +84,21 @@ type indexBuildTaskV2 struct { } func (it *indexBuildTaskV2) parseParams(ctx context.Context) error { - it.collectionID = it.req.CollectionID - it.partitionID = it.req.PartitionID - it.segmentID = it.req.SegmentID - it.fieldType = it.req.FieldType - it.fieldID = it.req.FieldID - it.fieldName = it.req.FieldName + it.collectionID = it.req.GetCollectionID() + it.partitionID = it.req.GetPartitionID() + it.segmentID = it.req.GetSegmentID() + it.fieldType = it.req.GetFieldType() + if it.fieldType == schemapb.DataType_None { + it.fieldType = it.req.GetField().GetDataType() + } + it.fieldID = it.req.GetFieldID() + if it.fieldID == 0 { + it.fieldID = it.req.GetField().GetFieldID() + } + it.fieldName = it.req.GetFieldName() + if it.fieldName == "" { + it.fieldName = it.req.GetField().GetName() + } return nil } @@ -138,61 +147,66 @@ func (it *indexBuildTaskV2) BuildIndex(ctx context.Context) error { } } - var buildIndexInfo *indexcgowrapper.BuildIndexInfo - buildIndexInfo, err = indexcgowrapper.NewBuildIndexInfo(it.req.GetStorageConfig()) - defer indexcgowrapper.DeleteBuildIndexInfo(buildIndexInfo) - if err != nil { - log.Ctx(ctx).Warn("create build index info failed", zap.Error(err)) - return err - } - err = buildIndexInfo.AppendFieldMetaInfoV2(it.collectionID, it.partitionID, it.segmentID, it.fieldID, it.fieldType, it.fieldName, it.req.Dim) - if err != nil { - log.Ctx(ctx).Warn("append field meta failed", zap.Error(err)) - return err - } - - err = buildIndexInfo.AppendIndexMetaInfo(it.req.IndexID, it.req.BuildID, it.req.IndexVersion) - if err != nil { - log.Ctx(ctx).Warn("append index meta failed", zap.Error(err)) - return err - } - - err = buildIndexInfo.AppendBuildIndexParam(it.newIndexParams) - if err != nil { - log.Ctx(ctx).Warn("append index params failed", zap.Error(err)) - return err - } - - err = buildIndexInfo.AppendIndexStorageInfo(it.req.StorePath, it.req.IndexStorePath, it.req.StoreVersion) - if err != nil { - log.Ctx(ctx).Warn("append storage info failed", zap.Error(err)) - return err - } - - jsonIndexParams, err := json.Marshal(it.newIndexParams) - if err != nil { - log.Ctx(ctx).Error("failed to json marshal index params", zap.Error(err)) - return err - } - - log.Ctx(ctx).Info("index params are ready", - zap.Int64("buildID", it.BuildID), - zap.String("index params", string(jsonIndexParams))) - - err = buildIndexInfo.AppendBuildTypeParam(it.newTypeParams) - if err != nil { - log.Ctx(ctx).Warn("append type params failed", zap.Error(err)) - return err + storageConfig := &indexcgopb.StorageConfig{ + Address: it.req.GetStorageConfig().GetAddress(), + AccessKeyID: it.req.GetStorageConfig().GetAccessKeyID(), + SecretAccessKey: it.req.GetStorageConfig().GetSecretAccessKey(), + UseSSL: it.req.GetStorageConfig().GetUseSSL(), + BucketName: it.req.GetStorageConfig().GetBucketName(), + RootPath: it.req.GetStorageConfig().GetRootPath(), + UseIAM: it.req.GetStorageConfig().GetUseIAM(), + IAMEndpoint: it.req.GetStorageConfig().GetIAMEndpoint(), + StorageType: it.req.GetStorageConfig().GetStorageType(), + UseVirtualHost: it.req.GetStorageConfig().GetUseVirtualHost(), + Region: it.req.GetStorageConfig().GetRegion(), + CloudProvider: it.req.GetStorageConfig().GetCloudProvider(), + RequestTimeoutMs: it.req.GetStorageConfig().GetRequestTimeoutMs(), + SslCACert: it.req.GetStorageConfig().GetSslCACert(), + } + + optFields := make([]*indexcgopb.OptionalFieldInfo, 0, len(it.req.GetOptionalScalarFields())) + for _, optField := range it.req.GetOptionalScalarFields() { + optFields = append(optFields, &indexcgopb.OptionalFieldInfo{ + FieldID: optField.GetFieldID(), + FieldName: optField.GetFieldName(), + FieldType: optField.GetFieldType(), + DataPaths: optField.GetDataPaths(), + }) } - for _, optField := range it.req.GetOptionalScalarFields() { - if err := buildIndexInfo.AppendOptionalField(optField); err != nil { - log.Ctx(ctx).Warn("append optional field failed", zap.Error(err)) - return err + it.currentIndexVersion = getCurrentIndexVersion(it.req.GetCurrentIndexVersion()) + field := it.req.GetField() + if field == nil || field.GetDataType() == schemapb.DataType_None { + field = &schemapb.FieldSchema{ + FieldID: it.fieldID, + Name: it.fieldName, + DataType: it.fieldType, } } - it.index, err = indexcgowrapper.CreateIndexV2(ctx, buildIndexInfo) + buildIndexParams := &indexcgopb.BuildIndexInfo{ + ClusterID: it.ClusterID, + BuildID: it.BuildID, + CollectionID: it.collectionID, + PartitionID: it.partitionID, + SegmentID: it.segmentID, + IndexVersion: it.req.GetIndexVersion(), + CurrentIndexVersion: it.currentIndexVersion, + NumRows: it.req.GetNumRows(), + Dim: it.req.GetDim(), + IndexFilePrefix: it.req.GetIndexFilePrefix(), + InsertFiles: it.req.GetDataPaths(), + FieldSchema: field, + StorageConfig: storageConfig, + IndexParams: mapToKVPairs(it.newIndexParams), + TypeParams: mapToKVPairs(it.newTypeParams), + StorePath: it.req.GetStorePath(), + StoreVersion: it.req.GetStoreVersion(), + IndexStorePath: it.req.GetIndexStorePath(), + OptFields: optFields, + } + + it.index, err = indexcgowrapper.CreateIndexV2(ctx, buildIndexParams) if err != nil { if it.index != nil && it.index.CleanLocalData() != nil { log.Ctx(ctx).Error("failed to clean cached data on disk after build index failed", @@ -328,7 +342,7 @@ func (it *indexBuildTask) Prepare(ctx context.Context) error { if len(it.req.DataPaths) == 0 { for _, id := range it.req.GetDataIds() { - path := metautil.BuildInsertLogPath(it.req.GetStorageConfig().RootPath, it.req.GetCollectionID(), it.req.GetPartitionID(), it.req.GetSegmentID(), it.req.GetFieldID(), id) + path := metautil.BuildInsertLogPath(it.req.GetStorageConfig().RootPath, it.req.GetCollectionID(), it.req.GetPartitionID(), it.req.GetSegmentID(), it.req.GetField().GetFieldID(), id) it.req.DataPaths = append(it.req.DataPaths, path) } } @@ -362,16 +376,10 @@ func (it *indexBuildTask) Prepare(ctx context.Context) error { } it.newTypeParams = typeParams it.newIndexParams = indexParams + it.statistic.IndexParams = it.req.GetIndexParams() - // ugly codes to get dimension - if dimStr, ok := typeParams[common.DimKey]; ok { - var err error - it.statistic.Dim, err = strconv.ParseInt(dimStr, 10, 64) - if err != nil { - log.Ctx(ctx).Error("parse dimesion failed", zap.Error(err)) - // ignore error - } - } + it.statistic.Dim = it.req.GetDim() + log.Ctx(ctx).Info("Successfully prepare indexBuildTask", zap.Int64("buildID", it.BuildID), zap.Int64("Collection", it.collectionID), zap.Int64("SegmentID", it.segmentID)) return nil @@ -482,69 +490,65 @@ func (it *indexBuildTask) BuildIndex(ctx context.Context) error { } } - var buildIndexInfo *indexcgowrapper.BuildIndexInfo - buildIndexInfo, err = indexcgowrapper.NewBuildIndexInfo(it.req.GetStorageConfig()) - defer indexcgowrapper.DeleteBuildIndexInfo(buildIndexInfo) - if err != nil { - log.Ctx(ctx).Warn("create build index info failed", zap.Error(err)) - return err - } - err = buildIndexInfo.AppendFieldMetaInfo(it.collectionID, it.partitionID, it.segmentID, it.fieldID, it.fieldType) - if err != nil { - log.Ctx(ctx).Warn("append field meta failed", zap.Error(err)) - return err - } - - err = buildIndexInfo.AppendIndexMetaInfo(it.req.IndexID, it.req.BuildID, it.req.IndexVersion) - if err != nil { - log.Ctx(ctx).Warn("append index meta failed", zap.Error(err)) - return err - } - - err = buildIndexInfo.AppendBuildIndexParam(it.newIndexParams) - if err != nil { - log.Ctx(ctx).Warn("append index params failed", zap.Error(err)) - return err - } - - jsonIndexParams, err := json.Marshal(it.newIndexParams) - if err != nil { - log.Ctx(ctx).Error("failed to json marshal index params", zap.Error(err)) - return err - } - - log.Ctx(ctx).Info("index params are ready", - zap.Int64("buildID", it.BuildID), - zap.String("index params", string(jsonIndexParams))) - - err = buildIndexInfo.AppendBuildTypeParam(it.newTypeParams) - if err != nil { - log.Ctx(ctx).Warn("append type params failed", zap.Error(err)) - return err - } - - for _, path := range it.req.GetDataPaths() { - err = buildIndexInfo.AppendInsertFile(path) - if err != nil { - log.Ctx(ctx).Warn("append insert binlog path failed", zap.Error(err)) - return err - } + storageConfig := &indexcgopb.StorageConfig{ + Address: it.req.GetStorageConfig().GetAddress(), + AccessKeyID: it.req.GetStorageConfig().GetAccessKeyID(), + SecretAccessKey: it.req.GetStorageConfig().GetSecretAccessKey(), + UseSSL: it.req.GetStorageConfig().GetUseSSL(), + BucketName: it.req.GetStorageConfig().GetBucketName(), + RootPath: it.req.GetStorageConfig().GetRootPath(), + UseIAM: it.req.GetStorageConfig().GetUseIAM(), + IAMEndpoint: it.req.GetStorageConfig().GetIAMEndpoint(), + StorageType: it.req.GetStorageConfig().GetStorageType(), + UseVirtualHost: it.req.GetStorageConfig().GetUseVirtualHost(), + Region: it.req.GetStorageConfig().GetRegion(), + CloudProvider: it.req.GetStorageConfig().GetCloudProvider(), + RequestTimeoutMs: it.req.GetStorageConfig().GetRequestTimeoutMs(), + SslCACert: it.req.GetStorageConfig().GetSslCACert(), + } + + optFields := make([]*indexcgopb.OptionalFieldInfo, 0, len(it.req.GetOptionalScalarFields())) + for _, optField := range it.req.GetOptionalScalarFields() { + optFields = append(optFields, &indexcgopb.OptionalFieldInfo{ + FieldID: optField.GetFieldID(), + FieldName: optField.GetFieldName(), + FieldType: optField.GetFieldType(), + DataPaths: optField.GetDataPaths(), + }) } it.currentIndexVersion = getCurrentIndexVersion(it.req.GetCurrentIndexVersion()) - if err := buildIndexInfo.AppendIndexEngineVersion(it.currentIndexVersion); err != nil { - log.Ctx(ctx).Warn("append index engine version failed", zap.Error(err)) - return err - } - - for _, optField := range it.req.GetOptionalScalarFields() { - if err := buildIndexInfo.AppendOptionalField(optField); err != nil { - log.Ctx(ctx).Warn("append optional field failed", zap.Error(err)) - return err + field := it.req.GetField() + if field == nil || field.GetDataType() == schemapb.DataType_None { + field = &schemapb.FieldSchema{ + FieldID: it.fieldID, + Name: it.fieldName, + DataType: it.fieldType, } } - - it.index, err = indexcgowrapper.CreateIndex(ctx, buildIndexInfo) + buildIndexParams := &indexcgopb.BuildIndexInfo{ + ClusterID: it.ClusterID, + BuildID: it.BuildID, + CollectionID: it.collectionID, + PartitionID: it.partitionID, + SegmentID: it.segmentID, + IndexVersion: it.req.GetIndexVersion(), + CurrentIndexVersion: it.currentIndexVersion, + NumRows: it.req.GetNumRows(), + Dim: it.req.GetDim(), + IndexFilePrefix: it.req.GetIndexFilePrefix(), + InsertFiles: it.req.GetDataPaths(), + FieldSchema: field, + StorageConfig: storageConfig, + IndexParams: mapToKVPairs(it.newIndexParams), + TypeParams: mapToKVPairs(it.newTypeParams), + StorePath: it.req.GetStorePath(), + StoreVersion: it.req.GetStoreVersion(), + IndexStorePath: it.req.GetIndexStorePath(), + OptFields: optFields, + } + + it.index, err = indexcgowrapper.CreateIndex(ctx, buildIndexParams) if err != nil { if it.index != nil && it.index.CleanLocalData() != nil { log.Ctx(ctx).Error("failed to clean cached data on disk after build index failed", @@ -653,8 +657,6 @@ func (it *indexBuildTask) decodeBlobs(ctx context.Context, blobs []*storage.Blob deserializeDur := it.tr.RecordSpan() log.Ctx(ctx).Info("IndexNode deserialize data success", - zap.Int64("index id", it.req.IndexID), - zap.String("index name", it.req.IndexName), zap.Int64("collectionID", it.collectionID), zap.Int64("partitionID", it.partitionID), zap.Int64("segmentID", it.segmentID), diff --git a/internal/indexnode/task_test.go b/internal/indexnode/task_test.go index dc30abd800eec..6450c3e504a71 100644 --- a/internal/indexnode/task_test.go +++ b/internal/indexnode/task_test.go @@ -283,12 +283,14 @@ func (suite *IndexBuildTaskV2Suite) TestBuildIndex() { RootPath: "/tmp/milvus/data", StorageType: "local", }, - CollectionID: 1, - PartitionID: 1, - SegmentID: 1, - FieldID: 3, - FieldName: "vec", - FieldType: schemapb.DataType_FloatVector, + CollectionID: 1, + PartitionID: 1, + SegmentID: 1, + Field: &schemapb.FieldSchema{ + FieldID: 3, + Name: "vec", + DataType: schemapb.DataType_FloatVector, + }, StorePath: "file://" + suite.space.Path(), StoreVersion: suite.space.GetCurrentVersion(), IndexStorePath: "file://" + suite.space.Path(), diff --git a/internal/indexnode/util.go b/internal/indexnode/util.go index 9186f9855a81b..8aaa92910503f 100644 --- a/internal/indexnode/util.go +++ b/internal/indexnode/util.go @@ -19,6 +19,7 @@ package indexnode import ( "github.com/cockroachdb/errors" + "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" ) @@ -36,3 +37,14 @@ func estimateFieldDataSize(dim int64, numRows int64, dataType schemapb.DataType) return 0, nil } } + +func mapToKVPairs(m map[string]string) []*commonpb.KeyValuePair { + kvs := make([]*commonpb.KeyValuePair, 0, len(m)) + for k, v := range m { + kvs = append(kvs, &commonpb.KeyValuePair{ + Key: k, + Value: v, + }) + } + return kvs +} diff --git a/internal/indexnode/util_test.go b/internal/indexnode/util_test.go new file mode 100644 index 0000000000000..6d7d98e823240 --- /dev/null +++ b/internal/indexnode/util_test.go @@ -0,0 +1,41 @@ +// Licensed to the LF AI & Data foundation under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package indexnode + +import ( + "testing" + + "github.com/stretchr/testify/suite" +) + +type utilSuite struct { + suite.Suite +} + +func (s *utilSuite) Test_mapToKVPairs() { + indexParams := map[string]string{ + "index_type": "IVF_FLAT", + "dim": "128", + "nlist": "1024", + } + + s.Equal(3, len(mapToKVPairs(indexParams))) +} + +func Test_utilSuite(t *testing.T) { + suite.Run(t, new(utilSuite)) +} diff --git a/internal/proto/index_cgo_msg.proto b/internal/proto/index_cgo_msg.proto index 50b1ea5dde5a5..688f871f55aed 100644 --- a/internal/proto/index_cgo_msg.proto +++ b/internal/proto/index_cgo_msg.proto @@ -4,6 +4,7 @@ package milvus.proto.indexcgo; option go_package="github.com/milvus-io/milvus/internal/proto/indexcgopb"; import "common.proto"; +import "schema.proto"; message TypeParams { repeated common.KeyValuePair params = 1; @@ -30,3 +31,52 @@ message Binary { message BinarySet { repeated Binary datas = 1; } + +// Synchronously modify StorageConfig in index_coord.proto file +message StorageConfig { + string address = 1; + string access_keyID = 2; + string secret_access_key = 3; + bool useSSL = 4; + string bucket_name = 5; + string root_path = 6; + bool useIAM = 7; + string IAMEndpoint = 8; + string storage_type = 9; + bool use_virtual_host = 10; + string region = 11; + string cloud_provider = 12; + int64 request_timeout_ms = 13; + string sslCACert = 14; +} + +// Synchronously modify OptionalFieldInfo in index_coord.proto file +message OptionalFieldInfo { + int64 fieldID = 1; + string field_name = 2; + int32 field_type = 3; + repeated string data_paths = 4; +} + +message BuildIndexInfo { + string clusterID = 1; + int64 buildID = 2; + int64 collectionID = 3; + int64 partitionID = 4; + int64 segmentID = 5; + int64 index_version = 6; + int32 current_index_version = 7; + int64 num_rows = 8; + int64 dim = 9; + string index_file_prefix = 10; + repeated string insert_files = 11; +// repeated int64 data_ids = 12; + schema.FieldSchema field_schema = 12; + StorageConfig storage_config = 13; + repeated common.KeyValuePair index_params = 14; + repeated common.KeyValuePair type_params = 15; + string store_path = 16; + int64 store_version = 17; + string index_store_path = 18; + repeated OptionalFieldInfo opt_fields = 19; +} diff --git a/internal/proto/index_coord.proto b/internal/proto/index_coord.proto index d59452b17d2de..9204d7da2a9c7 100644 --- a/internal/proto/index_coord.proto +++ b/internal/proto/index_coord.proto @@ -8,6 +8,7 @@ import "common.proto"; import "internal.proto"; import "milvus.proto"; import "schema.proto"; +import "index_cgo_msg.proto"; service IndexCoord { rpc GetComponentStates(milvus.GetComponentStatesRequest) returns (milvus.ComponentStates) {} @@ -226,6 +227,7 @@ message GetIndexBuildProgressResponse { int64 pending_index_rows = 4; } +// Synchronously modify StorageConfig in index_cgo_msg.proto file message StorageConfig { string address = 1; string access_keyID = 2; @@ -243,6 +245,7 @@ message StorageConfig { string sslCACert = 14; } +// Synchronously modify OptionalFieldInfo in index_cgo_msg.proto file message OptionalFieldInfo { int64 fieldID = 1; string field_name = 2; @@ -276,6 +279,7 @@ message CreateJobRequest { int64 dim = 22; repeated int64 data_ids = 23; repeated OptionalFieldInfo optional_scalar_fields = 24; + schema.FieldSchema field = 25; } message QueryJobsRequest { diff --git a/internal/util/indexcgowrapper/index.go b/internal/util/indexcgowrapper/index.go index f0850b3b916de..a7cc7d0e9b21c 100644 --- a/internal/util/indexcgowrapper/index.go +++ b/internal/util/indexcgowrapper/index.go @@ -16,6 +16,7 @@ import ( "unsafe" "github.com/golang/protobuf/proto" + "go.uber.org/zap" "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" @@ -94,9 +95,17 @@ func NewCgoIndex(dtype schemapb.DataType, typeParams, indexParams map[string]str return index, nil } -func CreateIndex(ctx context.Context, buildIndexInfo *BuildIndexInfo) (CodecIndex, error) { +func CreateIndex(ctx context.Context, buildIndexInfo *indexcgopb.BuildIndexInfo) (CodecIndex, error) { + buildIndexInfoBlob, err := proto.Marshal(buildIndexInfo) + if err != nil { + log.Ctx(ctx).Warn("marshal buildIndexInfo failed", + zap.String("clusterID", buildIndexInfo.GetClusterID()), + zap.Int64("buildID", buildIndexInfo.GetBuildID()), + zap.Error(err)) + return nil, err + } var indexPtr C.CIndex - status := C.CreateIndex(&indexPtr, buildIndexInfo.cBuildIndexInfo) + status := C.CreateIndex(&indexPtr, (*C.uint8_t)(unsafe.Pointer(&buildIndexInfoBlob[0])), (C.uint64_t)(len(buildIndexInfoBlob))) if err := HandleCStatus(&status, "failed to create index"); err != nil { return nil, err } @@ -109,9 +118,17 @@ func CreateIndex(ctx context.Context, buildIndexInfo *BuildIndexInfo) (CodecInde return index, nil } -func CreateIndexV2(ctx context.Context, buildIndexInfo *BuildIndexInfo) (CodecIndex, error) { +func CreateIndexV2(ctx context.Context, buildIndexInfo *indexcgopb.BuildIndexInfo) (CodecIndex, error) { + buildIndexInfoBlob, err := proto.Marshal(buildIndexInfo) + if err != nil { + log.Ctx(ctx).Warn("marshal buildIndexInfo failed", + zap.String("clusterID", buildIndexInfo.GetClusterID()), + zap.Int64("buildID", buildIndexInfo.GetBuildID()), + zap.Error(err)) + return nil, err + } var indexPtr C.CIndex - status := C.CreateIndexV2(&indexPtr, buildIndexInfo.cBuildIndexInfo) + status := C.CreateIndexV2(&indexPtr, (*C.uint8_t)(unsafe.Pointer(&buildIndexInfoBlob[0])), (C.uint64_t)(len(buildIndexInfoBlob))) if err := HandleCStatus(&status, "failed to create index"); err != nil { return nil, err } From 9ff023ee35ec28afc0005f3251c7300e04ce1d64 Mon Sep 17 00:00:00 2001 From: "yihao.dai" Date: Thu, 23 May 2024 11:13:40 +0800 Subject: [PATCH 041/126] fix: Fix filtering by partition key fails for importing data (#33274) Before executing the import, partition IDs should be reordered according to partition names. Otherwise, the data might be hashed to the wrong partition during import. This PR corrects this error. issue: https://github.com/milvus-io/milvus/issues/33237 --------- Signed-off-by: bigsheeper --- internal/proxy/impl.go | 6 +- internal/proxy/msg_pack.go | 2 +- internal/proxy/task_test.go | 2 +- internal/proxy/util.go | 28 +-- .../integration/import/partition_key_test.go | 215 ++++++++++++++++++ tests/integration/import/util_test.go | 17 +- 6 files changed, 234 insertions(+), 36 deletions(-) create mode 100644 tests/integration/import/partition_key_test.go diff --git a/internal/proxy/impl.go b/internal/proxy/impl.go index c0f520097b801..3947bd510aa95 100644 --- a/internal/proxy/impl.go +++ b/internal/proxy/impl.go @@ -6100,7 +6100,11 @@ func (node *Proxy) ImportV2(ctx context.Context, req *internalpb.ImportRequest) resp.Status = merr.Status(err) return resp, nil } - partitionIDs = lo.Values(partitions) + _, partitionIDs, err = typeutil.RearrangePartitionsForPartitionKey(partitions) + if err != nil { + resp.Status = merr.Status(err) + return resp, nil + } } else { if req.GetPartitionName() == "" { req.PartitionName = Params.CommonCfg.DefaultPartitionName.GetValue() diff --git a/internal/proxy/msg_pack.go b/internal/proxy/msg_pack.go index 7d1d58b213698..1177bd8adc1dd 100644 --- a/internal/proxy/msg_pack.go +++ b/internal/proxy/msg_pack.go @@ -231,7 +231,7 @@ func repackInsertDataWithPartitionKey(ctx context.Context, } channel2RowOffsets := assignChannelsByPK(result.IDs, channelNames, insertMsg) - partitionNames, err := getDefaultPartitionNames(ctx, insertMsg.GetDbName(), insertMsg.CollectionName) + partitionNames, err := getDefaultPartitionsInPartitionKeyMode(ctx, insertMsg.GetDbName(), insertMsg.CollectionName) if err != nil { log.Warn("get default partition names failed in partition key mode", zap.String("collectionName", insertMsg.CollectionName), diff --git a/internal/proxy/task_test.go b/internal/proxy/task_test.go index 33fb1bfaed225..31e44e8b469ff 100644 --- a/internal/proxy/task_test.go +++ b/internal/proxy/task_test.go @@ -3135,7 +3135,7 @@ func TestCreateCollectionTaskWithPartitionKey(t *testing.T) { // check default partitions err = InitMetaCache(ctx, rc, nil, nil) assert.NoError(t, err) - partitionNames, err := getDefaultPartitionNames(ctx, "", task.CollectionName) + partitionNames, err := getDefaultPartitionsInPartitionKeyMode(ctx, "", task.CollectionName) assert.NoError(t, err) assert.Equal(t, task.GetNumPartitions(), int64(len(partitionNames))) diff --git a/internal/proxy/util.go b/internal/proxy/util.go index 42bc37f03b3b2..0e6b373f27dc4 100644 --- a/internal/proxy/util.go +++ b/internal/proxy/util.go @@ -1395,7 +1395,7 @@ func hasParitionKeyModeField(schema *schemapb.CollectionSchema) bool { return false } -// getDefaultPartitionNames only used in partition key mode +// getDefaultPartitionsInPartitionKeyMode only used in partition key mode func getDefaultPartitionsInPartitionKeyMode(ctx context.Context, dbName string, collectionName string) ([]string, error) { partitions, err := globalMetaCache.GetPartitions(ctx, dbName, collectionName) if err != nil { @@ -1411,32 +1411,6 @@ func getDefaultPartitionsInPartitionKeyMode(ctx context.Context, dbName string, return partitionNames, nil } -// getDefaultPartitionNames only used in partition key mode -func getDefaultPartitionNames(ctx context.Context, dbName string, collectionName string) ([]string, error) { - partitions, err := globalMetaCache.GetPartitions(ctx, dbName, collectionName) - if err != nil { - return nil, err - } - - // Make sure the order of the partition names got every time is the same - partitionNames := make([]string, len(partitions)) - for partitionName := range partitions { - splits := strings.Split(partitionName, "_") - if len(splits) < 2 { - err = fmt.Errorf("bad default partion name in partition ket mode: %s", partitionName) - return nil, err - } - index, err := strconv.ParseInt(splits[len(splits)-1], 10, 64) - if err != nil { - return nil, err - } - - partitionNames[index] = partitionName - } - - return partitionNames, nil -} - func assignChannelsByPK(pks *schemapb.IDs, channelNames []string, insertMsg *msgstream.InsertMsg) map[string][]int { insertMsg.HashValues = typeutil.HashPK2Channels(pks, channelNames) diff --git a/tests/integration/import/partition_key_test.go b/tests/integration/import/partition_key_test.go new file mode 100644 index 0000000000000..b9cba86c84b50 --- /dev/null +++ b/tests/integration/import/partition_key_test.go @@ -0,0 +1,215 @@ +// Licensed to the LF AI & Data foundation under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package importv2 + +import ( + "context" + "fmt" + "math/rand" + "os" + "strings" + "time" + + "github.com/golang/protobuf/proto" + "github.com/samber/lo" + "go.uber.org/zap" + + "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" + "github.com/milvus-io/milvus-proto/go-api/v2/milvuspb" + "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" + "github.com/milvus-io/milvus/internal/proto/internalpb" + "github.com/milvus-io/milvus/pkg/common" + "github.com/milvus-io/milvus/pkg/log" + "github.com/milvus-io/milvus/pkg/util/funcutil" + "github.com/milvus-io/milvus/pkg/util/merr" + "github.com/milvus-io/milvus/pkg/util/metric" + "github.com/milvus-io/milvus/tests/integration" +) + +func (s *BulkInsertSuite) TestImportWithPartitionKey() { + const ( + rowCount = 10000 + ) + + c := s.Cluster + ctx, cancel := context.WithTimeout(c.GetContext(), 60*time.Second) + defer cancel() + + collectionName := "TestBulkInsert_WithPartitionKey_" + funcutil.GenRandomStr() + + schema := integration.ConstructSchema(collectionName, dim, true, &schemapb.FieldSchema{ + FieldID: 100, + Name: integration.Int64Field, + IsPrimaryKey: true, + DataType: schemapb.DataType_Int64, + AutoID: true, + }, &schemapb.FieldSchema{ + FieldID: 101, + Name: integration.FloatVecField, + DataType: schemapb.DataType_FloatVector, + TypeParams: []*commonpb.KeyValuePair{ + { + Key: common.DimKey, + Value: fmt.Sprintf("%d", dim), + }, + }, + }, &schemapb.FieldSchema{ + FieldID: 102, + Name: integration.VarCharField, + DataType: schemapb.DataType_VarChar, + TypeParams: []*commonpb.KeyValuePair{ + { + Key: common.MaxLengthKey, + Value: fmt.Sprintf("%d", 256), + }, + }, + IsPartitionKey: true, + }) + marshaledSchema, err := proto.Marshal(schema) + s.NoError(err) + + createCollectionStatus, err := c.Proxy.CreateCollection(ctx, &milvuspb.CreateCollectionRequest{ + DbName: "", + CollectionName: collectionName, + Schema: marshaledSchema, + ShardsNum: common.DefaultShardsNum, + }) + s.NoError(err) + s.Equal(int32(0), createCollectionStatus.GetCode()) + + // create index + createIndexStatus, err := c.Proxy.CreateIndex(ctx, &milvuspb.CreateIndexRequest{ + CollectionName: collectionName, + FieldName: integration.FloatVecField, + IndexName: "_default", + ExtraParams: integration.ConstructIndexParam(dim, integration.IndexFaissIvfFlat, metric.L2), + }) + s.NoError(err) + s.Equal(int32(0), createIndexStatus.GetCode()) + + s.WaitForIndexBuilt(ctx, collectionName, integration.FloatVecField) + + // import + var files []*internalpb.ImportFile + err = os.MkdirAll(c.ChunkManager.RootPath(), os.ModePerm) + s.NoError(err) + + filePath := fmt.Sprintf("/tmp/test_%d.parquet", rand.Int()) + insertData, err := GenerateParquetFileAndReturnInsertData(filePath, schema, rowCount) + s.NoError(err) + defer os.Remove(filePath) + files = []*internalpb.ImportFile{ + { + Paths: []string{ + filePath, + }, + }, + } + + importResp, err := c.Proxy.ImportV2(ctx, &internalpb.ImportRequest{ + CollectionName: collectionName, + Files: files, + }) + s.NoError(err) + s.Equal(int32(0), importResp.GetStatus().GetCode()) + log.Info("Import result", zap.Any("importResp", importResp)) + + jobID := importResp.GetJobID() + err = WaitForImportDone(ctx, c, jobID) + s.NoError(err) + + // load + loadStatus, err := c.Proxy.LoadCollection(ctx, &milvuspb.LoadCollectionRequest{ + CollectionName: collectionName, + }) + s.NoError(err) + s.Equal(commonpb.ErrorCode_Success, loadStatus.GetErrorCode()) + s.WaitForLoad(ctx, collectionName) + + segments, err := c.MetaWatcher.ShowSegments() + s.NoError(err) + s.NotEmpty(segments) + log.Info("Show segments", zap.Any("segments", segments)) + + // load refresh + loadStatus, err = c.Proxy.LoadCollection(ctx, &milvuspb.LoadCollectionRequest{ + CollectionName: collectionName, + Refresh: true, + }) + s.NoError(err) + s.Equal(commonpb.ErrorCode_Success, loadStatus.GetErrorCode()) + s.WaitForLoadRefresh(ctx, "", collectionName) + + // query partition key, TermExpr + queryNum := 10 + partitionKeyData := insertData.Data[int64(102)].GetRows().([]string) + queryData := partitionKeyData[:queryNum] + strs := lo.Map(queryData, func(str string, _ int) string { + return fmt.Sprintf("\"%s\"", str) + }) + str := strings.Join(strs, `,`) + expr := fmt.Sprintf("%s in [%v]", integration.VarCharField, str) + queryResult, err := c.Proxy.Query(ctx, &milvuspb.QueryRequest{ + CollectionName: collectionName, + Expr: expr, + OutputFields: []string{integration.VarCharField}, + }) + err = merr.CheckRPCCall(queryResult, err) + s.NoError(err) + for _, data := range queryResult.GetFieldsData() { + if data.GetType() == schemapb.DataType_VarChar { + resData := data.GetScalars().GetStringData().GetData() + s.Equal(queryNum, len(resData)) + s.ElementsMatch(resData, queryData) + } + } + + // query partition key, CmpOp 1 + expr = fmt.Sprintf("%s >= 0", integration.Int64Field) + queryResult, err = c.Proxy.Query(ctx, &milvuspb.QueryRequest{ + CollectionName: collectionName, + Expr: expr, + OutputFields: []string{integration.VarCharField}, + }) + err = merr.CheckRPCCall(queryResult, err) + s.NoError(err) + for _, data := range queryResult.GetFieldsData() { + if data.GetType() == schemapb.DataType_VarChar { + resData := data.GetScalars().GetStringData().GetData() + s.Equal(rowCount, len(resData)) + s.ElementsMatch(resData, partitionKeyData) + } + } + + // query partition key, CmpOp 2 + target := partitionKeyData[rand.Intn(rowCount)] + expr = fmt.Sprintf("%s == \"%s\"", integration.VarCharField, target) + queryResult, err = c.Proxy.Query(ctx, &milvuspb.QueryRequest{ + CollectionName: collectionName, + Expr: expr, + OutputFields: []string{integration.VarCharField}, + }) + err = merr.CheckRPCCall(queryResult, err) + s.NoError(err) + for _, data := range queryResult.GetFieldsData() { + if data.GetType() == schemapb.DataType_VarChar { + resData := data.GetScalars().GetStringData().GetData() + s.Equal(1, len(resData)) + s.Equal(resData[0], target) + } + } +} diff --git a/tests/integration/import/util_test.go b/tests/integration/import/util_test.go index 237a705ec2474..d55168db0a789 100644 --- a/tests/integration/import/util_test.go +++ b/tests/integration/import/util_test.go @@ -46,33 +46,38 @@ import ( const dim = 128 func GenerateParquetFile(filePath string, schema *schemapb.CollectionSchema, numRows int) error { + _, err := GenerateParquetFileAndReturnInsertData(filePath, schema, numRows) + return err +} + +func GenerateParquetFileAndReturnInsertData(filePath string, schema *schemapb.CollectionSchema, numRows int) (*storage.InsertData, error) { w, err := os.OpenFile(filePath, os.O_RDWR|os.O_CREATE, 0o666) if err != nil { - return err + return nil, err } pqSchema, err := pq.ConvertToArrowSchema(schema) if err != nil { - return err + return nil, err } fw, err := pqarrow.NewFileWriter(pqSchema, w, parquet.NewWriterProperties(parquet.WithMaxRowGroupLength(int64(numRows))), pqarrow.DefaultWriterProps()) if err != nil { - return err + return nil, err } defer fw.Close() insertData, err := testutil.CreateInsertData(schema, numRows) if err != nil { - return err + return nil, err } columns, err := testutil.BuildArrayData(schema, insertData) if err != nil { - return err + return nil, err } recordBatch := array.NewRecord(pqSchema, columns, int64(numRows)) - return fw.Write(recordBatch) + return insertData, fw.Write(recordBatch) } func GenerateNumpyFiles(cm storage.ChunkManager, schema *schemapb.CollectionSchema, rowCount int) (*internalpb.ImportFile, error) { From 895799ec61dfe06a3dd1daf6af3ff59122beaeb3 Mon Sep 17 00:00:00 2001 From: "yihao.dai" Date: Thu, 23 May 2024 11:29:41 +0800 Subject: [PATCH 042/126] enhance: Abstract Execute interface for import/preimport task (#33234) Abstract Execute interface for import/preimport task, simplify import scheduler. issue: https://github.com/milvus-io/milvus/issues/33157 Signed-off-by: bigsheeper --- internal/datanode/data_node.go | 2 +- internal/datanode/importv2/pool.go | 41 +++ internal/datanode/importv2/scheduler.go | 248 +------------------ internal/datanode/importv2/scheduler_test.go | 28 +-- internal/datanode/importv2/task.go | 193 +-------------- internal/datanode/importv2/task_import.go | 248 +++++++++++++++++++ internal/datanode/importv2/task_preimport.go | 212 ++++++++++++++++ internal/datanode/services.go | 4 +- 8 files changed, 533 insertions(+), 443 deletions(-) create mode 100644 internal/datanode/importv2/pool.go create mode 100644 internal/datanode/importv2/task_import.go create mode 100644 internal/datanode/importv2/task_preimport.go diff --git a/internal/datanode/data_node.go b/internal/datanode/data_node.go index 628ab9198e7a3..c926aab7c4d18 100644 --- a/internal/datanode/data_node.go +++ b/internal/datanode/data_node.go @@ -288,7 +288,7 @@ func (node *DataNode) Init() error { node.writeBufferManager = writebuffer.NewManager(syncMgr) node.importTaskMgr = importv2.NewTaskManager() - node.importScheduler = importv2.NewScheduler(node.importTaskMgr, node.syncMgr, node.chunkManager) + node.importScheduler = importv2.NewScheduler(node.importTaskMgr) node.channelCheckpointUpdater = newChannelCheckpointUpdater(node.broker) node.flowgraphManager = newFlowgraphManager() diff --git a/internal/datanode/importv2/pool.go b/internal/datanode/importv2/pool.go new file mode 100644 index 0000000000000..3558477773f1f --- /dev/null +++ b/internal/datanode/importv2/pool.go @@ -0,0 +1,41 @@ +// Licensed to the LF AI & Data foundation under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package importv2 + +import ( + "sync" + + "github.com/milvus-io/milvus/pkg/util/conc" + "github.com/milvus-io/milvus/pkg/util/paramtable" +) + +var ( + execPool *conc.Pool[any] + execPoolInitOnce sync.Once +) + +func initExecPool() { + execPool = conc.NewPool[any]( + paramtable.Get().DataNodeCfg.MaxConcurrentImportTaskNum.GetAsInt(), + conc.WithPreAlloc(true), + ) +} + +func GetExecPool() *conc.Pool[any] { + execPoolInitOnce.Do(initExecPool) + return execPool +} diff --git a/internal/datanode/importv2/scheduler.go b/internal/datanode/importv2/scheduler.go index 37884d87d863a..d1d58e8df0655 100644 --- a/internal/datanode/importv2/scheduler.go +++ b/internal/datanode/importv2/scheduler.go @@ -17,20 +17,13 @@ package importv2 import ( - "fmt" - "io" "sync" "time" - "github.com/cockroachdb/errors" "github.com/samber/lo" "go.uber.org/zap" - "github.com/milvus-io/milvus/internal/datanode/syncmgr" "github.com/milvus-io/milvus/internal/proto/datapb" - "github.com/milvus-io/milvus/internal/proto/internalpb" - "github.com/milvus-io/milvus/internal/storage" - "github.com/milvus-io/milvus/internal/util/importutilv2" "github.com/milvus-io/milvus/pkg/log" "github.com/milvus-io/milvus/pkg/util/conc" "github.com/milvus-io/milvus/pkg/util/paramtable" @@ -44,25 +37,14 @@ type Scheduler interface { type scheduler struct { manager TaskManager - syncMgr syncmgr.SyncManager - cm storage.ChunkManager - - pool *conc.Pool[any] closeOnce sync.Once closeChan chan struct{} } -func NewScheduler(manager TaskManager, syncMgr syncmgr.SyncManager, cm storage.ChunkManager) Scheduler { - pool := conc.NewPool[any]( - paramtable.Get().DataNodeCfg.MaxConcurrentImportTaskNum.GetAsInt(), - conc.WithPreAlloc(true), - ) +func NewScheduler(manager TaskManager) Scheduler { return &scheduler{ manager: manager, - syncMgr: syncMgr, - cm: cm, - pool: pool, closeChan: make(chan struct{}), } } @@ -84,16 +66,9 @@ func (s *scheduler) Start() { tasks := s.manager.GetBy(WithStates(datapb.ImportTaskStateV2_Pending)) futures := make(map[int64][]*conc.Future[any]) for _, task := range tasks { - switch task.GetType() { - case PreImportTaskType: - fs := s.PreImport(task) - futures[task.GetTaskID()] = fs - tryFreeFutures(futures) - case ImportTaskType: - fs := s.Import(task) - futures[task.GetTaskID()] = fs - tryFreeFutures(futures) - } + fs := task.Execute() + futures[task.GetTaskID()] = fs + tryFreeFutures(futures) } for taskID, fs := range futures { err := conc.AwaitAll(fs...) @@ -120,17 +95,6 @@ func (s *scheduler) Close() { }) } -func WrapLogFields(task Task, fields ...zap.Field) []zap.Field { - res := []zap.Field{ - zap.Int64("taskID", task.GetTaskID()), - zap.Int64("jobID", task.GetJobID()), - zap.Int64("collectionID", task.GetCollectionID()), - zap.String("type", task.GetType().String()), - } - res = append(res, fields...) - return res -} - func tryFreeFutures(futures map[int64][]*conc.Future[any]) { for k, fs := range futures { fs = lo.Filter(fs, func(f *conc.Future[any], _ int) bool { @@ -143,207 +107,3 @@ func tryFreeFutures(futures map[int64][]*conc.Future[any]) { futures[k] = fs } } - -func (s *scheduler) handleErr(task Task, err error, msg string) { - log.Warn(msg, WrapLogFields(task, zap.Error(err))...) - s.manager.Update(task.GetTaskID(), UpdateState(datapb.ImportTaskStateV2_Failed), UpdateReason(err.Error())) -} - -func (s *scheduler) PreImport(task Task) []*conc.Future[any] { - bufferSize := paramtable.Get().DataNodeCfg.ReadBufferSizeInMB.GetAsInt() * 1024 * 1024 - log.Info("start to preimport", WrapLogFields(task, - zap.Int("bufferSize", bufferSize), - zap.Any("schema", task.GetSchema()))...) - s.manager.Update(task.GetTaskID(), UpdateState(datapb.ImportTaskStateV2_InProgress)) - files := lo.Map(task.(*PreImportTask).GetFileStats(), - func(fileStat *datapb.ImportFileStats, _ int) *internalpb.ImportFile { - return fileStat.GetImportFile() - }) - - fn := func(i int, file *internalpb.ImportFile) error { - reader, err := importutilv2.NewReader(task.GetCtx(), s.cm, task.GetSchema(), file, task.GetOptions(), bufferSize) - if err != nil { - s.handleErr(task, err, "new reader failed") - return err - } - defer reader.Close() - start := time.Now() - err = s.readFileStat(reader, task, i) - if err != nil { - s.handleErr(task, err, "preimport failed") - return err - } - log.Info("read file stat done", WrapLogFields(task, zap.Strings("files", file.GetPaths()), - zap.Duration("dur", time.Since(start)))...) - return nil - } - - futures := make([]*conc.Future[any], 0, len(files)) - for i, file := range files { - i := i - file := file - f := s.pool.Submit(func() (any, error) { - err := fn(i, file) - return err, err - }) - futures = append(futures, f) - } - return futures -} - -func (s *scheduler) readFileStat(reader importutilv2.Reader, task Task, fileIdx int) error { - fileSize, err := reader.Size() - if err != nil { - return err - } - maxSize := paramtable.Get().DataNodeCfg.MaxImportFileSizeInGB.GetAsFloat() * 1024 * 1024 * 1024 - if fileSize > int64(maxSize) { - return errors.New(fmt.Sprintf( - "The import file size has reached the maximum limit allowed for importing, "+ - "fileSize=%d, maxSize=%d", fileSize, int64(maxSize))) - } - - totalRows := 0 - totalSize := 0 - hashedStats := make(map[string]*datapb.PartitionImportStats) - for { - data, err := reader.Read() - if err != nil { - if errors.Is(err, io.EOF) { - break - } - return err - } - err = CheckRowsEqual(task.GetSchema(), data) - if err != nil { - return err - } - rowsCount, err := GetRowsStats(task, data) - if err != nil { - return err - } - MergeHashedStats(rowsCount, hashedStats) - rows := data.GetRowNum() - size := data.GetMemorySize() - totalRows += rows - totalSize += size - log.Info("reading file stat...", WrapLogFields(task, zap.Int("readRows", rows), zap.Int("readSize", size))...) - } - - stat := &datapb.ImportFileStats{ - FileSize: fileSize, - TotalRows: int64(totalRows), - TotalMemorySize: int64(totalSize), - HashedStats: hashedStats, - } - s.manager.Update(task.GetTaskID(), UpdateFileStat(fileIdx, stat)) - return nil -} - -func (s *scheduler) Import(task Task) []*conc.Future[any] { - bufferSize := paramtable.Get().DataNodeCfg.ReadBufferSizeInMB.GetAsInt() * 1024 * 1024 - log.Info("start to import", WrapLogFields(task, - zap.Int("bufferSize", bufferSize), - zap.Any("schema", task.GetSchema()))...) - s.manager.Update(task.GetTaskID(), UpdateState(datapb.ImportTaskStateV2_InProgress)) - - req := task.(*ImportTask).req - - fn := func(file *internalpb.ImportFile) error { - reader, err := importutilv2.NewReader(task.GetCtx(), s.cm, task.GetSchema(), file, task.GetOptions(), bufferSize) - if err != nil { - s.handleErr(task, err, fmt.Sprintf("new reader failed, file: %s", file.String())) - return err - } - defer reader.Close() - start := time.Now() - err = s.importFile(reader, task) - if err != nil { - s.handleErr(task, err, fmt.Sprintf("do import failed, file: %s", file.String())) - return err - } - log.Info("import file done", WrapLogFields(task, zap.Strings("files", file.GetPaths()), - zap.Duration("dur", time.Since(start)))...) - return nil - } - - futures := make([]*conc.Future[any], 0, len(req.GetFiles())) - for _, file := range req.GetFiles() { - file := file - f := s.pool.Submit(func() (any, error) { - err := fn(file) - return err, err - }) - futures = append(futures, f) - } - return futures -} - -func (s *scheduler) importFile(reader importutilv2.Reader, task Task) error { - iTask := task.(*ImportTask) - syncFutures := make([]*conc.Future[struct{}], 0) - syncTasks := make([]syncmgr.Task, 0) - for { - data, err := reader.Read() - if err != nil { - if errors.Is(err, io.EOF) { - break - } - return err - } - err = AppendSystemFieldsData(iTask, data) - if err != nil { - return err - } - hashedData, err := HashData(iTask, data) - if err != nil { - return err - } - fs, sts, err := s.Sync(iTask, hashedData) - if err != nil { - return err - } - syncFutures = append(syncFutures, fs...) - syncTasks = append(syncTasks, sts...) - } - err := conc.AwaitAll(syncFutures...) - if err != nil { - return err - } - for _, syncTask := range syncTasks { - segmentInfo, err := NewImportSegmentInfo(syncTask, iTask) - if err != nil { - return err - } - s.manager.Update(task.GetTaskID(), UpdateSegmentInfo(segmentInfo)) - log.Info("sync import data done", WrapLogFields(task, zap.Any("segmentInfo", segmentInfo))...) - } - return nil -} - -func (s *scheduler) Sync(task *ImportTask, hashedData HashedData) ([]*conc.Future[struct{}], []syncmgr.Task, error) { - log.Info("start to sync import data", WrapLogFields(task)...) - futures := make([]*conc.Future[struct{}], 0) - syncTasks := make([]syncmgr.Task, 0) - segmentImportedSizes := make(map[int64]int) - for channelIdx, datas := range hashedData { - channel := task.GetVchannels()[channelIdx] - for partitionIdx, data := range datas { - if data.GetRowNum() == 0 { - continue - } - partitionID := task.GetPartitionIDs()[partitionIdx] - size := data.GetMemorySize() - segmentID := PickSegment(task, segmentImportedSizes, channel, partitionID, size) - syncTask, err := NewSyncTask(task.GetCtx(), task, segmentID, partitionID, channel, data) - if err != nil { - return nil, nil, err - } - segmentImportedSizes[segmentID] += size - future := s.syncMgr.SyncData(task.GetCtx(), syncTask) - futures = append(futures, future) - syncTasks = append(syncTasks, syncTask) - } - } - return futures, syncTasks, nil -} diff --git a/internal/datanode/importv2/scheduler_test.go b/internal/datanode/importv2/scheduler_test.go index bdb876f75063e..a49e232992167 100644 --- a/internal/datanode/importv2/scheduler_test.go +++ b/internal/datanode/importv2/scheduler_test.go @@ -116,7 +116,7 @@ func (s *SchedulerSuite) SetupTest() { s.manager = NewTaskManager() s.syncMgr = syncmgr.NewMockSyncManager(s.T()) - s.scheduler = NewScheduler(s.manager, s.syncMgr, nil).(*scheduler) + s.scheduler = NewScheduler(s.manager).(*scheduler) } func createInsertData(t *testing.T, schema *schemapb.CollectionSchema, rowCount int) *storage.InsertData { @@ -236,7 +236,7 @@ func (s *SchedulerSuite) TestScheduler_Slots() { Schema: s.schema, ImportFiles: []*internalpb.ImportFile{{Paths: []string{"dummy.json"}}}, } - preimportTask := NewPreImportTask(preimportReq) + preimportTask := NewPreImportTask(preimportReq, s.manager, s.cm) s.manager.Add(preimportTask) slots := s.scheduler.Slots() @@ -262,7 +262,7 @@ func (s *SchedulerSuite) TestScheduler_Start_Preimport() { ioReader := strings.NewReader(string(bytes)) cm.EXPECT().Size(mock.Anything, mock.Anything).Return(1024, nil) cm.EXPECT().Reader(mock.Anything, mock.Anything).Return(&mockReader{Reader: ioReader}, nil) - s.scheduler.cm = cm + s.cm = cm preimportReq := &datapb.PreImportRequest{ JobID: 1, @@ -273,7 +273,7 @@ func (s *SchedulerSuite) TestScheduler_Start_Preimport() { Schema: s.schema, ImportFiles: []*internalpb.ImportFile{{Paths: []string{"dummy.json"}}}, } - preimportTask := NewPreImportTask(preimportReq) + preimportTask := NewPreImportTask(preimportReq, s.manager, s.cm) s.manager.Add(preimportTask) go s.scheduler.Start() @@ -316,7 +316,7 @@ func (s *SchedulerSuite) TestScheduler_Start_Preimport_Failed() { ioReader := strings.NewReader(string(bytes)) cm.EXPECT().Size(mock.Anything, mock.Anything).Return(1024, nil) cm.EXPECT().Reader(mock.Anything, mock.Anything).Return(&mockReader{Reader: ioReader}, nil) - s.scheduler.cm = cm + s.cm = cm preimportReq := &datapb.PreImportRequest{ JobID: 1, @@ -327,7 +327,7 @@ func (s *SchedulerSuite) TestScheduler_Start_Preimport_Failed() { Schema: s.schema, ImportFiles: []*internalpb.ImportFile{{Paths: []string{"dummy.json"}}}, } - preimportTask := NewPreImportTask(preimportReq) + preimportTask := NewPreImportTask(preimportReq, s.manager, s.cm) s.manager.Add(preimportTask) go s.scheduler.Start() @@ -355,7 +355,7 @@ func (s *SchedulerSuite) TestScheduler_Start_Import() { cm := mocks.NewChunkManager(s.T()) ioReader := strings.NewReader(string(bytes)) cm.EXPECT().Reader(mock.Anything, mock.Anything).Return(&mockReader{Reader: ioReader}, nil) - s.scheduler.cm = cm + s.cm = cm s.syncMgr.EXPECT().SyncData(mock.Anything, mock.Anything).RunAndReturn(func(ctx context.Context, task syncmgr.Task) *conc.Future[struct{}] { future := conc.Go(func() (struct{}, error) { @@ -388,7 +388,7 @@ func (s *SchedulerSuite) TestScheduler_Start_Import() { }, }, } - importTask := NewImportTask(importReq) + importTask := NewImportTask(importReq, s.manager, s.syncMgr, s.cm) s.manager.Add(importTask) go s.scheduler.Start() @@ -416,7 +416,7 @@ func (s *SchedulerSuite) TestScheduler_Start_Import_Failed() { cm := mocks.NewChunkManager(s.T()) ioReader := strings.NewReader(string(bytes)) cm.EXPECT().Reader(mock.Anything, mock.Anything).Return(&mockReader{Reader: ioReader}, nil) - s.scheduler.cm = cm + s.cm = cm s.syncMgr.EXPECT().SyncData(mock.Anything, mock.Anything).RunAndReturn(func(ctx context.Context, task syncmgr.Task) *conc.Future[struct{}] { future := conc.Go(func() (struct{}, error) { @@ -449,7 +449,7 @@ func (s *SchedulerSuite) TestScheduler_Start_Import_Failed() { }, }, } - importTask := NewImportTask(importReq) + importTask := NewImportTask(importReq, s.manager, s.syncMgr, s.cm) s.manager.Add(importTask) go s.scheduler.Start() @@ -487,9 +487,9 @@ func (s *SchedulerSuite) TestScheduler_ReadFileStat() { Schema: s.schema, ImportFiles: []*internalpb.ImportFile{importFile}, } - preimportTask := NewPreImportTask(preimportReq) + preimportTask := NewPreImportTask(preimportReq, s.manager, s.cm) s.manager.Add(preimportTask) - err := s.scheduler.readFileStat(s.reader, preimportTask, 0) + err := preimportTask.(*PreImportTask).readFileStat(s.reader, preimportTask, 0) s.NoError(err) } @@ -538,9 +538,9 @@ func (s *SchedulerSuite) TestScheduler_ImportFile() { }, }, } - importTask := NewImportTask(importReq) + importTask := NewImportTask(importReq, s.manager, s.syncMgr, s.cm) s.manager.Add(importTask) - err := s.scheduler.importFile(s.reader, importTask) + err := importTask.(*ImportTask).importFile(s.reader, importTask) s.NoError(err) } diff --git a/internal/datanode/importv2/task.go b/internal/datanode/importv2/task.go index a13f421f552d6..d349bf833bb03 100644 --- a/internal/datanode/importv2/task.go +++ b/internal/datanode/importv2/task.go @@ -17,18 +17,12 @@ package importv2 import ( - "context" - - "github.com/golang/protobuf/proto" "github.com/samber/lo" + "go.uber.org/zap" - "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" - "github.com/milvus-io/milvus/internal/datanode/metacache" "github.com/milvus-io/milvus/internal/proto/datapb" - "github.com/milvus-io/milvus/internal/proto/internalpb" - "github.com/milvus-io/milvus/internal/util/importutilv2" - "github.com/milvus-io/milvus/pkg/util/typeutil" + "github.com/milvus-io/milvus/pkg/util/conc" ) type TaskType int @@ -130,6 +124,7 @@ func UpdateSegmentInfo(info *datapb.ImportSegmentInfo) UpdateAction { } type Task interface { + Execute() []*conc.Future[any] GetJobID() int64 GetTaskID() int64 GetCollectionID() int64 @@ -139,183 +134,17 @@ type Task interface { GetState() datapb.ImportTaskStateV2 GetReason() string GetSchema() *schemapb.CollectionSchema - GetCtx() context.Context - GetOptions() []*commonpb.KeyValuePair Cancel() Clone() Task } -type PreImportTask struct { - *datapb.PreImportTask - ctx context.Context - cancel context.CancelFunc - partitionIDs []int64 - vchannels []string - schema *schemapb.CollectionSchema - options []*commonpb.KeyValuePair -} - -func NewPreImportTask(req *datapb.PreImportRequest) Task { - fileStats := lo.Map(req.GetImportFiles(), func(file *internalpb.ImportFile, _ int) *datapb.ImportFileStats { - return &datapb.ImportFileStats{ - ImportFile: file, - } - }) - ctx, cancel := context.WithCancel(context.Background()) - // During binlog import, even if the primary key's autoID is set to true, - // the primary key from the binlog should be used instead of being reassigned. - if importutilv2.IsBackup(req.GetOptions()) { - UnsetAutoID(req.GetSchema()) - } - return &PreImportTask{ - PreImportTask: &datapb.PreImportTask{ - JobID: req.GetJobID(), - TaskID: req.GetTaskID(), - CollectionID: req.GetCollectionID(), - State: datapb.ImportTaskStateV2_Pending, - FileStats: fileStats, - }, - ctx: ctx, - cancel: cancel, - partitionIDs: req.GetPartitionIDs(), - vchannels: req.GetVchannels(), - schema: req.GetSchema(), - options: req.GetOptions(), - } -} - -func (p *PreImportTask) GetPartitionIDs() []int64 { - return p.partitionIDs -} - -func (p *PreImportTask) GetVchannels() []string { - return p.vchannels -} - -func (p *PreImportTask) GetType() TaskType { - return PreImportTaskType -} - -func (p *PreImportTask) GetSchema() *schemapb.CollectionSchema { - return p.schema -} - -func (p *PreImportTask) GetOptions() []*commonpb.KeyValuePair { - return p.options -} - -func (p *PreImportTask) GetCtx() context.Context { - return p.ctx -} - -func (p *PreImportTask) Cancel() { - p.cancel() -} - -func (p *PreImportTask) Clone() Task { - ctx, cancel := context.WithCancel(p.GetCtx()) - return &PreImportTask{ - PreImportTask: proto.Clone(p.PreImportTask).(*datapb.PreImportTask), - ctx: ctx, - cancel: cancel, - partitionIDs: p.GetPartitionIDs(), - vchannels: p.GetVchannels(), - schema: p.GetSchema(), - options: p.GetOptions(), - } -} - -type ImportTask struct { - *datapb.ImportTaskV2 - ctx context.Context - cancel context.CancelFunc - segmentsInfo map[int64]*datapb.ImportSegmentInfo - req *datapb.ImportRequest - metaCaches map[string]metacache.MetaCache -} - -func NewImportTask(req *datapb.ImportRequest) Task { - ctx, cancel := context.WithCancel(context.Background()) - // During binlog import, even if the primary key's autoID is set to true, - // the primary key from the binlog should be used instead of being reassigned. - if importutilv2.IsBackup(req.GetOptions()) { - UnsetAutoID(req.GetSchema()) - } - task := &ImportTask{ - ImportTaskV2: &datapb.ImportTaskV2{ - JobID: req.GetJobID(), - TaskID: req.GetTaskID(), - CollectionID: req.GetCollectionID(), - State: datapb.ImportTaskStateV2_Pending, - }, - ctx: ctx, - cancel: cancel, - segmentsInfo: make(map[int64]*datapb.ImportSegmentInfo), - req: req, - } - task.initMetaCaches(req) - return task -} - -func (t *ImportTask) initMetaCaches(req *datapb.ImportRequest) { - metaCaches := make(map[string]metacache.MetaCache) - schema := typeutil.AppendSystemFields(req.GetSchema()) - for _, channel := range req.GetVchannels() { - info := &datapb.ChannelWatchInfo{ - Vchan: &datapb.VchannelInfo{ - CollectionID: req.GetCollectionID(), - ChannelName: channel, - }, - Schema: schema, - } - metaCache := metacache.NewMetaCache(info, func(segment *datapb.SegmentInfo) *metacache.BloomFilterSet { - return metacache.NewBloomFilterSet() - }) - metaCaches[channel] = metaCache - } - t.metaCaches = metaCaches -} - -func (t *ImportTask) GetType() TaskType { - return ImportTaskType -} - -func (t *ImportTask) GetPartitionIDs() []int64 { - return t.req.GetPartitionIDs() -} - -func (t *ImportTask) GetVchannels() []string { - return t.req.GetVchannels() -} - -func (t *ImportTask) GetSchema() *schemapb.CollectionSchema { - return t.req.GetSchema() -} - -func (t *ImportTask) GetOptions() []*commonpb.KeyValuePair { - return t.req.GetOptions() -} - -func (t *ImportTask) GetCtx() context.Context { - return t.ctx -} - -func (t *ImportTask) Cancel() { - t.cancel() -} - -func (t *ImportTask) GetSegmentsInfo() []*datapb.ImportSegmentInfo { - return lo.Values(t.segmentsInfo) -} - -func (t *ImportTask) Clone() Task { - ctx, cancel := context.WithCancel(t.GetCtx()) - return &ImportTask{ - ImportTaskV2: proto.Clone(t.ImportTaskV2).(*datapb.ImportTaskV2), - ctx: ctx, - cancel: cancel, - segmentsInfo: t.segmentsInfo, - req: t.req, - metaCaches: t.metaCaches, +func WrapLogFields(task Task, fields ...zap.Field) []zap.Field { + res := []zap.Field{ + zap.Int64("taskID", task.GetTaskID()), + zap.Int64("jobID", task.GetJobID()), + zap.Int64("collectionID", task.GetCollectionID()), + zap.String("type", task.GetType().String()), } + res = append(res, fields...) + return res } diff --git a/internal/datanode/importv2/task_import.go b/internal/datanode/importv2/task_import.go new file mode 100644 index 0000000000000..0b99348843bbb --- /dev/null +++ b/internal/datanode/importv2/task_import.go @@ -0,0 +1,248 @@ +// Licensed to the LF AI & Data foundation under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package importv2 + +import ( + "context" + "io" + "time" + + "github.com/cockroachdb/errors" + "github.com/golang/protobuf/proto" + "github.com/samber/lo" + "go.uber.org/zap" + + "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" + "github.com/milvus-io/milvus/internal/datanode/metacache" + "github.com/milvus-io/milvus/internal/datanode/syncmgr" + "github.com/milvus-io/milvus/internal/proto/datapb" + "github.com/milvus-io/milvus/internal/proto/internalpb" + "github.com/milvus-io/milvus/internal/storage" + "github.com/milvus-io/milvus/internal/util/importutilv2" + "github.com/milvus-io/milvus/pkg/log" + "github.com/milvus-io/milvus/pkg/util/conc" + "github.com/milvus-io/milvus/pkg/util/paramtable" + "github.com/milvus-io/milvus/pkg/util/typeutil" +) + +type ImportTask struct { + *datapb.ImportTaskV2 + ctx context.Context + cancel context.CancelFunc + segmentsInfo map[int64]*datapb.ImportSegmentInfo + req *datapb.ImportRequest + + manager TaskManager + syncMgr syncmgr.SyncManager + cm storage.ChunkManager + metaCaches map[string]metacache.MetaCache +} + +func NewImportTask(req *datapb.ImportRequest, + manager TaskManager, + syncMgr syncmgr.SyncManager, + cm storage.ChunkManager, +) Task { + ctx, cancel := context.WithCancel(context.Background()) + // During binlog import, even if the primary key's autoID is set to true, + // the primary key from the binlog should be used instead of being reassigned. + if importutilv2.IsBackup(req.GetOptions()) { + UnsetAutoID(req.GetSchema()) + } + task := &ImportTask{ + ImportTaskV2: &datapb.ImportTaskV2{ + JobID: req.GetJobID(), + TaskID: req.GetTaskID(), + CollectionID: req.GetCollectionID(), + State: datapb.ImportTaskStateV2_Pending, + }, + ctx: ctx, + cancel: cancel, + segmentsInfo: make(map[int64]*datapb.ImportSegmentInfo), + req: req, + manager: manager, + syncMgr: syncMgr, + cm: cm, + } + task.initMetaCaches(req) + return task +} + +func (t *ImportTask) initMetaCaches(req *datapb.ImportRequest) { + metaCaches := make(map[string]metacache.MetaCache) + schema := typeutil.AppendSystemFields(req.GetSchema()) + for _, channel := range req.GetVchannels() { + info := &datapb.ChannelWatchInfo{ + Vchan: &datapb.VchannelInfo{ + CollectionID: req.GetCollectionID(), + ChannelName: channel, + }, + Schema: schema, + } + metaCache := metacache.NewMetaCache(info, func(segment *datapb.SegmentInfo) *metacache.BloomFilterSet { + return metacache.NewBloomFilterSet() + }) + metaCaches[channel] = metaCache + } + t.metaCaches = metaCaches +} + +func (t *ImportTask) GetType() TaskType { + return ImportTaskType +} + +func (t *ImportTask) GetPartitionIDs() []int64 { + return t.req.GetPartitionIDs() +} + +func (t *ImportTask) GetVchannels() []string { + return t.req.GetVchannels() +} + +func (t *ImportTask) GetSchema() *schemapb.CollectionSchema { + return t.req.GetSchema() +} + +func (t *ImportTask) Cancel() { + t.cancel() +} + +func (t *ImportTask) GetSegmentsInfo() []*datapb.ImportSegmentInfo { + return lo.Values(t.segmentsInfo) +} + +func (t *ImportTask) Clone() Task { + ctx, cancel := context.WithCancel(t.ctx) + return &ImportTask{ + ImportTaskV2: proto.Clone(t.ImportTaskV2).(*datapb.ImportTaskV2), + ctx: ctx, + cancel: cancel, + segmentsInfo: t.segmentsInfo, + req: t.req, + metaCaches: t.metaCaches, + } +} + +func (t *ImportTask) Execute() []*conc.Future[any] { + bufferSize := paramtable.Get().DataNodeCfg.ReadBufferSizeInMB.GetAsInt() * 1024 * 1024 + log.Info("start to import", WrapLogFields(t, + zap.Int("bufferSize", bufferSize), + zap.Any("schema", t.GetSchema()))...) + t.manager.Update(t.GetTaskID(), UpdateState(datapb.ImportTaskStateV2_InProgress)) + + req := t.req + + fn := func(file *internalpb.ImportFile) error { + reader, err := importutilv2.NewReader(t.ctx, t.cm, t.GetSchema(), file, t.req.GetOptions(), bufferSize) + if err != nil { + log.Warn("new reader failed", WrapLogFields(t, zap.String("file", file.String()), zap.Error(err))...) + t.manager.Update(t.GetTaskID(), UpdateState(datapb.ImportTaskStateV2_Failed), UpdateReason(err.Error())) + return err + } + defer reader.Close() + start := time.Now() + err = t.importFile(reader, t) + if err != nil { + log.Warn("do import failed", WrapLogFields(t, zap.String("file", file.String()), zap.Error(err))...) + t.manager.Update(t.GetTaskID(), UpdateState(datapb.ImportTaskStateV2_Failed), UpdateReason(err.Error())) + return err + } + log.Info("import file done", WrapLogFields(t, zap.Strings("files", file.GetPaths()), + zap.Duration("dur", time.Since(start)))...) + return nil + } + + futures := make([]*conc.Future[any], 0, len(req.GetFiles())) + for _, file := range req.GetFiles() { + file := file + f := GetExecPool().Submit(func() (any, error) { + err := fn(file) + return err, err + }) + futures = append(futures, f) + } + return futures +} + +func (t *ImportTask) importFile(reader importutilv2.Reader, task Task) error { + iTask := task.(*ImportTask) + syncFutures := make([]*conc.Future[struct{}], 0) + syncTasks := make([]syncmgr.Task, 0) + for { + data, err := reader.Read() + if err != nil { + if errors.Is(err, io.EOF) { + break + } + return err + } + err = AppendSystemFieldsData(iTask, data) + if err != nil { + return err + } + hashedData, err := HashData(iTask, data) + if err != nil { + return err + } + fs, sts, err := t.sync(iTask, hashedData) + if err != nil { + return err + } + syncFutures = append(syncFutures, fs...) + syncTasks = append(syncTasks, sts...) + } + err := conc.AwaitAll(syncFutures...) + if err != nil { + return err + } + for _, syncTask := range syncTasks { + segmentInfo, err := NewImportSegmentInfo(syncTask, iTask) + if err != nil { + return err + } + t.manager.Update(task.GetTaskID(), UpdateSegmentInfo(segmentInfo)) + log.Info("sync import data done", WrapLogFields(task, zap.Any("segmentInfo", segmentInfo))...) + } + return nil +} + +func (t *ImportTask) sync(task *ImportTask, hashedData HashedData) ([]*conc.Future[struct{}], []syncmgr.Task, error) { + log.Info("start to sync import data", WrapLogFields(task)...) + futures := make([]*conc.Future[struct{}], 0) + syncTasks := make([]syncmgr.Task, 0) + segmentImportedSizes := make(map[int64]int) + for channelIdx, datas := range hashedData { + channel := task.GetVchannels()[channelIdx] + for partitionIdx, data := range datas { + if data.GetRowNum() == 0 { + continue + } + partitionID := task.GetPartitionIDs()[partitionIdx] + size := data.GetMemorySize() + segmentID := PickSegment(task, segmentImportedSizes, channel, partitionID, size) + syncTask, err := NewSyncTask(task.ctx, task, segmentID, partitionID, channel, data) + if err != nil { + return nil, nil, err + } + segmentImportedSizes[segmentID] += size + future := t.syncMgr.SyncData(task.ctx, syncTask) + futures = append(futures, future) + syncTasks = append(syncTasks, syncTask) + } + } + return futures, syncTasks, nil +} diff --git a/internal/datanode/importv2/task_preimport.go b/internal/datanode/importv2/task_preimport.go new file mode 100644 index 0000000000000..4d2ce93de7eab --- /dev/null +++ b/internal/datanode/importv2/task_preimport.go @@ -0,0 +1,212 @@ +// Licensed to the LF AI & Data foundation under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package importv2 + +import ( + "context" + "fmt" + "io" + "time" + + "github.com/cockroachdb/errors" + "github.com/golang/protobuf/proto" + "github.com/samber/lo" + "go.uber.org/zap" + + "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" + "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" + "github.com/milvus-io/milvus/internal/proto/datapb" + "github.com/milvus-io/milvus/internal/proto/internalpb" + "github.com/milvus-io/milvus/internal/storage" + "github.com/milvus-io/milvus/internal/util/importutilv2" + "github.com/milvus-io/milvus/pkg/log" + "github.com/milvus-io/milvus/pkg/util/conc" + "github.com/milvus-io/milvus/pkg/util/paramtable" +) + +type PreImportTask struct { + *datapb.PreImportTask + ctx context.Context + cancel context.CancelFunc + partitionIDs []int64 + vchannels []string + schema *schemapb.CollectionSchema + options []*commonpb.KeyValuePair + + manager TaskManager + cm storage.ChunkManager +} + +func NewPreImportTask(req *datapb.PreImportRequest, + manager TaskManager, + cm storage.ChunkManager, +) Task { + fileStats := lo.Map(req.GetImportFiles(), func(file *internalpb.ImportFile, _ int) *datapb.ImportFileStats { + return &datapb.ImportFileStats{ + ImportFile: file, + } + }) + ctx, cancel := context.WithCancel(context.Background()) + // During binlog import, even if the primary key's autoID is set to true, + // the primary key from the binlog should be used instead of being reassigned. + if importutilv2.IsBackup(req.GetOptions()) { + UnsetAutoID(req.GetSchema()) + } + return &PreImportTask{ + PreImportTask: &datapb.PreImportTask{ + JobID: req.GetJobID(), + TaskID: req.GetTaskID(), + CollectionID: req.GetCollectionID(), + State: datapb.ImportTaskStateV2_Pending, + FileStats: fileStats, + }, + ctx: ctx, + cancel: cancel, + partitionIDs: req.GetPartitionIDs(), + vchannels: req.GetVchannels(), + schema: req.GetSchema(), + options: req.GetOptions(), + manager: manager, + cm: cm, + } +} + +func (p *PreImportTask) GetPartitionIDs() []int64 { + return p.partitionIDs +} + +func (p *PreImportTask) GetVchannels() []string { + return p.vchannels +} + +func (p *PreImportTask) GetType() TaskType { + return PreImportTaskType +} + +func (p *PreImportTask) GetSchema() *schemapb.CollectionSchema { + return p.schema +} + +func (p *PreImportTask) Cancel() { + p.cancel() +} + +func (p *PreImportTask) Clone() Task { + ctx, cancel := context.WithCancel(p.ctx) + return &PreImportTask{ + PreImportTask: proto.Clone(p.PreImportTask).(*datapb.PreImportTask), + ctx: ctx, + cancel: cancel, + partitionIDs: p.GetPartitionIDs(), + vchannels: p.GetVchannels(), + schema: p.GetSchema(), + options: p.options, + } +} + +func (p *PreImportTask) Execute() []*conc.Future[any] { + bufferSize := paramtable.Get().DataNodeCfg.ReadBufferSizeInMB.GetAsInt() * 1024 * 1024 + log.Info("start to preimport", WrapLogFields(p, + zap.Int("bufferSize", bufferSize), + zap.Any("schema", p.GetSchema()))...) + p.manager.Update(p.GetTaskID(), UpdateState(datapb.ImportTaskStateV2_InProgress)) + files := lo.Map(p.GetFileStats(), + func(fileStat *datapb.ImportFileStats, _ int) *internalpb.ImportFile { + return fileStat.GetImportFile() + }) + + fn := func(i int, file *internalpb.ImportFile) error { + reader, err := importutilv2.NewReader(p.ctx, p.cm, p.GetSchema(), file, p.options, bufferSize) + if err != nil { + log.Warn("new reader failed", WrapLogFields(p, zap.String("file", file.String()), zap.Error(err))...) + p.manager.Update(p.GetTaskID(), UpdateState(datapb.ImportTaskStateV2_Failed), UpdateReason(err.Error())) + return err + } + defer reader.Close() + start := time.Now() + err = p.readFileStat(reader, p, i) + if err != nil { + log.Warn("preimport failed", WrapLogFields(p, zap.String("file", file.String()), zap.Error(err))...) + p.manager.Update(p.GetTaskID(), UpdateState(datapb.ImportTaskStateV2_Failed), UpdateReason(err.Error())) + return err + } + log.Info("read file stat done", WrapLogFields(p, zap.Strings("files", file.GetPaths()), + zap.Duration("dur", time.Since(start)))...) + return nil + } + + futures := make([]*conc.Future[any], 0, len(files)) + for i, file := range files { + i := i + file := file + f := GetExecPool().Submit(func() (any, error) { + err := fn(i, file) + return err, err + }) + futures = append(futures, f) + } + return futures +} + +func (p *PreImportTask) readFileStat(reader importutilv2.Reader, task Task, fileIdx int) error { + fileSize, err := reader.Size() + if err != nil { + return err + } + maxSize := paramtable.Get().DataNodeCfg.MaxImportFileSizeInGB.GetAsFloat() * 1024 * 1024 * 1024 + if fileSize > int64(maxSize) { + return errors.New(fmt.Sprintf( + "The import file size has reached the maximum limit allowed for importing, "+ + "fileSize=%d, maxSize=%d", fileSize, int64(maxSize))) + } + + totalRows := 0 + totalSize := 0 + hashedStats := make(map[string]*datapb.PartitionImportStats) + for { + data, err := reader.Read() + if err != nil { + if errors.Is(err, io.EOF) { + break + } + return err + } + err = CheckRowsEqual(task.GetSchema(), data) + if err != nil { + return err + } + rowsCount, err := GetRowsStats(task, data) + if err != nil { + return err + } + MergeHashedStats(rowsCount, hashedStats) + rows := data.GetRowNum() + size := data.GetMemorySize() + totalRows += rows + totalSize += size + log.Info("reading file stat...", WrapLogFields(task, zap.Int("readRows", rows), zap.Int("readSize", size))...) + } + + stat := &datapb.ImportFileStats{ + FileSize: fileSize, + TotalRows: int64(totalRows), + TotalMemorySize: int64(totalSize), + HashedStats: hashedStats, + } + p.manager.Update(task.GetTaskID(), UpdateFileStat(fileIdx, stat)) + return nil +} diff --git a/internal/datanode/services.go b/internal/datanode/services.go index a3ce9e8df8d1d..4f172c966272b 100644 --- a/internal/datanode/services.go +++ b/internal/datanode/services.go @@ -419,7 +419,7 @@ func (node *DataNode) PreImport(ctx context.Context, req *datapb.PreImportReques return merr.Status(err), nil } - task := importv2.NewPreImportTask(req) + task := importv2.NewPreImportTask(req, node.importTaskMgr, node.chunkManager) node.importTaskMgr.Add(task) log.Info("datanode added preimport task") @@ -438,7 +438,7 @@ func (node *DataNode) ImportV2(ctx context.Context, req *datapb.ImportRequest) ( if err := merr.CheckHealthy(node.GetStateCode()); err != nil { return merr.Status(err), nil } - task := importv2.NewImportTask(req) + task := importv2.NewImportTask(req, node.importTaskMgr, node.syncMgr, node.chunkManager) node.importTaskMgr.Add(task) log.Info("datanode added import task") From 32d3e22d7dc44613cd3adf05f3555bbebed8b4f8 Mon Sep 17 00:00:00 2001 From: "cai.zhang" Date: Thu, 23 May 2024 11:47:40 +0800 Subject: [PATCH 043/126] fix: Throw an exception after all the threads in thread pool finished (#32810) issue: #32487 Signed-off-by: Cai Zhang --- internal/core/src/storage/Util.cpp | 49 ++++++++++++++++++++++++------ 1 file changed, 40 insertions(+), 9 deletions(-) diff --git a/internal/core/src/storage/Util.cpp b/internal/core/src/storage/Util.cpp index 0e714f0a97362..33df073cef5cd 100644 --- a/internal/core/src/storage/Util.cpp +++ b/internal/core/src/storage/Util.cpp @@ -575,11 +575,22 @@ GetObjectData(std::shared_ptr space, } std::vector datas; - for (int i = 0; i < futures.size(); ++i) { - auto res = futures[i].get(); - datas.emplace_back(res->GetFieldData()); + std::exception_ptr first_exception = nullptr; + for (auto& future : futures) { + try { + auto res = future.get(); + datas.emplace_back(res->GetFieldData()); + } catch (...) { + if (!first_exception) { + first_exception = std::current_exception(); + } + } } ReleaseArrowUnused(); + if (first_exception) { + std::rethrow_exception(first_exception); + } + return datas; } @@ -612,12 +623,22 @@ PutIndexData(ChunkManager* remote_chunk_manager, } std::map remote_paths_to_size; + std::exception_ptr first_exception = nullptr; for (auto& future : futures) { - auto res = future.get(); - remote_paths_to_size[res.first] = res.second; + try { + auto res = future.get(); + remote_paths_to_size[res.first] = res.second; + } catch (...) { + if (!first_exception) { + first_exception = std::current_exception(); + } + } } - ReleaseArrowUnused(); + if (first_exception) { + std::rethrow_exception(first_exception); + } + return remote_paths_to_size; } @@ -650,12 +671,22 @@ PutIndexData(std::shared_ptr space, } std::map remote_paths_to_size; + std::exception_ptr first_exception = nullptr; for (auto& future : futures) { - auto res = future.get(); - remote_paths_to_size[res.first] = res.second; + try { + auto res = future.get(); + remote_paths_to_size[res.first] = res.second; + } catch (...) { + if (!first_exception) { + first_exception = std::current_exception(); + } + } } - ReleaseArrowUnused(); + if (first_exception) { + std::rethrow_exception(first_exception); + } + return remote_paths_to_size; } From 229a6b942b0f7b1a74a04060ae8e7babffa82d0c Mon Sep 17 00:00:00 2001 From: zhuwenxing Date: Thu, 23 May 2024 14:03:40 +0800 Subject: [PATCH 044/126] test: add check for partition key for import test (#33253) see https://github.com/milvus-io/milvus/issues/33237 Signed-off-by: zhuwenxing --- .../testcases/test_bulk_insert.py | 45 ++++++++++++++----- 1 file changed, 35 insertions(+), 10 deletions(-) diff --git a/tests/python_client/testcases/test_bulk_insert.py b/tests/python_client/testcases/test_bulk_insert.py index 9e1a4ae0bc5e3..e21b6a17206e8 100644 --- a/tests/python_client/testcases/test_bulk_insert.py +++ b/tests/python_client/testcases/test_bulk_insert.py @@ -828,7 +828,8 @@ def test_with_all_field_json(self, auto_id, dim, entities, enable_dynamic_field) @pytest.mark.parametrize("dim", [128]) # 128 @pytest.mark.parametrize("entities", [1000]) # 1000 @pytest.mark.parametrize("enable_dynamic_field", [True]) - def test_bulk_insert_all_field_with_new_json_format(self, auto_id, dim, entities, enable_dynamic_field): + @pytest.mark.parametrize("enable_partition_key", [True, False]) + def test_bulk_insert_all_field_with_new_json_format(self, auto_id, dim, entities, enable_dynamic_field, enable_partition_key): """ collection schema 1: [pk, int64, float64, string float_vector] data file: vectors.npy and uid.npy, @@ -841,7 +842,7 @@ def test_bulk_insert_all_field_with_new_json_format(self, auto_id, dim, entities cf.gen_int64_field(name=df.pk_field, is_primary=True, auto_id=auto_id), cf.gen_int64_field(name=df.int_field), cf.gen_float_field(name=df.float_field), - cf.gen_string_field(name=df.string_field), + cf.gen_string_field(name=df.string_field, is_partition_key=enable_partition_key), cf.gen_json_field(name=df.json_field), cf.gen_array_field(name=df.array_int_field, element_type=DataType.INT64), cf.gen_array_field(name=df.array_float_field, element_type=DataType.FLOAT), @@ -945,16 +946,23 @@ def test_bulk_insert_all_field_with_new_json_format(self, auto_id, dim, entities if enable_dynamic_field: assert "name" in fields_from_search assert "address" in fields_from_search - - + # query data + res, _ = self.collection_wrap.query(expr=f"{df.string_field} >= '0'", output_fields=[df.string_field]) + assert len(res) == entities + query_data = [r[df.string_field] for r in res][:len(self.collection_wrap.partitions)] + res, _ = self.collection_wrap.query(expr=f"{df.string_field} in {query_data}", output_fields=[df.string_field]) + assert len(res) == len(query_data) + if enable_partition_key: + assert len(self.collection_wrap.partitions) > 1 @pytest.mark.tags(CaseLabel.L3) @pytest.mark.parametrize("auto_id", [True, False]) @pytest.mark.parametrize("dim", [128]) # 128 @pytest.mark.parametrize("entities", [1000]) # 1000 @pytest.mark.parametrize("enable_dynamic_field", [True, False]) + @pytest.mark.parametrize("enable_partition_key", [True, False]) @pytest.mark.parametrize("include_meta", [True, False]) - def test_bulk_insert_all_field_with_numpy(self, auto_id, dim, entities, enable_dynamic_field, include_meta): + def test_bulk_insert_all_field_with_numpy(self, auto_id, dim, entities, enable_dynamic_field, enable_partition_key, include_meta): """ collection schema 1: [pk, int64, float64, string float_vector] data file: vectors.npy and uid.npy, @@ -970,7 +978,7 @@ def test_bulk_insert_all_field_with_numpy(self, auto_id, dim, entities, enable_d cf.gen_int64_field(name=df.pk_field, is_primary=True, auto_id=auto_id), cf.gen_int64_field(name=df.int_field), cf.gen_float_field(name=df.float_field), - cf.gen_string_field(name=df.string_field), + cf.gen_string_field(name=df.string_field, is_partition_key=enable_partition_key), cf.gen_json_field(name=df.json_field), cf.gen_float_vec_field(name=df.float_vec_field, dim=dim), # cf.gen_float_vec_field(name=df.image_float_vec_field, dim=dim), @@ -1072,14 +1080,25 @@ def test_bulk_insert_all_field_with_numpy(self, auto_id, dim, entities, enable_d if enable_dynamic_field and include_meta: assert "name" in fields_from_search assert "address" in fields_from_search + # query data + res, _ = self.collection_wrap.query(expr=f"{df.string_field} >= '0'", output_fields=[df.string_field]) + assert len(res) == entities + query_data = [r[df.string_field] for r in res][:len(self.collection_wrap.partitions)] + res, _ = self.collection_wrap.query(expr=f"{df.string_field} in {query_data}", output_fields=[df.string_field]) + assert len(res) == len(query_data) + if enable_partition_key: + assert len(self.collection_wrap.partitions) > 1 + + @pytest.mark.tags(CaseLabel.L3) @pytest.mark.parametrize("auto_id", [True, False]) @pytest.mark.parametrize("dim", [128]) # 128 @pytest.mark.parametrize("entities", [1000]) # 1000 @pytest.mark.parametrize("enable_dynamic_field", [True, False]) + @pytest.mark.parametrize("enable_partition_key", [True, False]) @pytest.mark.parametrize("include_meta", [True, False]) - def test_bulk_insert_all_field_with_parquet(self, auto_id, dim, entities, enable_dynamic_field, include_meta): + def test_bulk_insert_all_field_with_parquet(self, auto_id, dim, entities, enable_dynamic_field, enable_partition_key, include_meta): """ collection schema 1: [pk, int64, float64, string float_vector] data file: vectors.parquet and uid.parquet, @@ -1094,15 +1113,13 @@ def test_bulk_insert_all_field_with_parquet(self, auto_id, dim, entities, enable cf.gen_int64_field(name=df.pk_field, is_primary=True, auto_id=auto_id), cf.gen_int64_field(name=df.int_field), cf.gen_float_field(name=df.float_field), - cf.gen_string_field(name=df.string_field), + cf.gen_string_field(name=df.string_field, is_partition_key=enable_partition_key), cf.gen_json_field(name=df.json_field), cf.gen_array_field(name=df.array_int_field, element_type=DataType.INT64), cf.gen_array_field(name=df.array_float_field, element_type=DataType.FLOAT), cf.gen_array_field(name=df.array_string_field, element_type=DataType.VARCHAR, max_length=100), cf.gen_array_field(name=df.array_bool_field, element_type=DataType.BOOL), cf.gen_float_vec_field(name=df.float_vec_field, dim=dim), - # cf.gen_float_vec_field(name=df.image_float_vec_field, dim=dim), - # cf.gen_float_vec_field(name=df.text_float_vec_field, dim=dim), cf.gen_binary_vec_field(name=df.binary_vec_field, dim=dim), cf.gen_bfloat16_vec_field(name=df.bf16_vec_field, dim=dim), cf.gen_float16_vec_field(name=df.fp16_vec_field, dim=dim) @@ -1199,6 +1216,14 @@ def test_bulk_insert_all_field_with_parquet(self, auto_id, dim, entities, enable if enable_dynamic_field and include_meta: assert "name" in fields_from_search assert "address" in fields_from_search + # query data + res, _ = self.collection_wrap.query(expr=f"{df.string_field} >= '0'", output_fields=[df.string_field]) + assert len(res) == entities + query_data = [r[df.string_field] for r in res][:len(self.collection_wrap.partitions)] + res, _ = self.collection_wrap.query(expr=f"{df.string_field} in {query_data}", output_fields=[df.string_field]) + assert len(res) == len(query_data) + if enable_partition_key: + assert len(self.collection_wrap.partitions) > 1 @pytest.mark.tags(CaseLabel.L3) @pytest.mark.parametrize("auto_id", [True]) From c5918ffbdb131e6f5b9bf6fca54f98769eac7824 Mon Sep 17 00:00:00 2001 From: Buqian Zheng Date: Thu, 23 May 2024 14:11:42 +0800 Subject: [PATCH 045/126] enhance: mark sparse inverted index as mmap-able (#33281) issue: #29419 Signed-off-by: Buqian Zheng --- internal/core/src/index/Index.h | 5 ++++- internal/core/unittest/test_indexing.cpp | 2 +- internal/core/unittest/test_utils/DataGen.h | 12 ++++++++++++ pkg/util/indexparamcheck/index_type.go | 4 +++- 4 files changed, 20 insertions(+), 3 deletions(-) diff --git a/internal/core/src/index/Index.h b/internal/core/src/index/Index.h index 9381ee74ada26..2f3da4be14bbf 100644 --- a/internal/core/src/index/Index.h +++ b/internal/core/src/index/Index.h @@ -81,7 +81,10 @@ class IndexBase { index_type_ == knowhere::IndexEnum::INDEX_FAISS_IVFSQ8 || index_type_ == knowhere::IndexEnum::INDEX_FAISS_BIN_IVFFLAT || index_type_ == knowhere::IndexEnum::INDEX_FAISS_IDMAP || - index_type_ == knowhere::IndexEnum::INDEX_FAISS_BIN_IDMAP; + index_type_ == knowhere::IndexEnum::INDEX_FAISS_BIN_IDMAP || + index_type_ == + knowhere::IndexEnum::INDEX_SPARSE_INVERTED_INDEX || + index_type_ == knowhere::IndexEnum::INDEX_SPARSE_WAND; } const IndexType& diff --git a/internal/core/unittest/test_indexing.cpp b/internal/core/unittest/test_indexing.cpp index 4cdf94420bd19..c02f427736095 100644 --- a/internal/core/unittest/test_indexing.cpp +++ b/internal/core/unittest/test_indexing.cpp @@ -570,7 +570,7 @@ TEST_P(IndexTest, Mmap) { load_conf["mmap_filepath"] = "mmap/test_index_mmap_" + index_type; vec_index->Load(milvus::tracer::TraceContext{}, load_conf); EXPECT_EQ(vec_index->Count(), NB); - EXPECT_EQ(vec_index->GetDim(), DIM); + EXPECT_EQ(vec_index->GetDim(), is_sparse ? kTestSparseDim : DIM); milvus::SearchInfo search_info; search_info.topk_ = K; diff --git a/internal/core/unittest/test_utils/DataGen.h b/internal/core/unittest/test_utils/DataGen.h index 7566c63757eda..283ccbec3c7db 100644 --- a/internal/core/unittest/test_utils/DataGen.h +++ b/internal/core/unittest/test_utils/DataGen.h @@ -259,6 +259,18 @@ GenerateRandomSparseFloatVector(size_t rows, std::vector> data(rows); + // ensure the actual dim of the entire generated dataset is cols. + data[0][cols - 1] = real_distrib(rng); + --num_elements; + + // Ensure each row has at least one non-zero value + for (size_t i = 0; i < rows; ++i) { + auto col = col_distrib(rng); + float val = real_distrib(rng); + data[i][col] = val; + } + num_elements -= rows; + for (int32_t i = 0; i < num_elements; ++i) { auto row = row_distrib(rng); while (data[row].size() == (size_t)cols) { diff --git a/pkg/util/indexparamcheck/index_type.go b/pkg/util/indexparamcheck/index_type.go index e752057ea4e85..a20db560bfdb0 100644 --- a/pkg/util/indexparamcheck/index_type.go +++ b/pkg/util/indexparamcheck/index_type.go @@ -57,7 +57,9 @@ func IsMmapSupported(indexType IndexType) bool { indexType == IndexFaissBinIDMap || indexType == IndexFaissBinIvfFlat || indexType == IndexHNSW || - indexType == IndexScaNN + indexType == IndexScaNN || + indexType == IndexSparseInverted || + indexType == IndexSparseWand } func IsDiskIndex(indexType IndexType) bool { From fd53cdb103c6628a93becc9294d6283d76de1fc2 Mon Sep 17 00:00:00 2001 From: Cai Yudong Date: Thu, 23 May 2024 14:17:40 +0800 Subject: [PATCH 046/126] fix: Fix SparseFloatVector data parse error for json (#33259) Issue: #33162 Signed-off-by: Cai Yudong --- pkg/util/typeutil/schema.go | 34 +++++++++++++++++++++----------- pkg/util/typeutil/schema_test.go | 28 ++++++++++++++++++++++++++ 2 files changed, 51 insertions(+), 11 deletions(-) diff --git a/pkg/util/typeutil/schema.go b/pkg/util/typeutil/schema.go index 272447981a5ec..5a696be7431b4 100644 --- a/pkg/util/typeutil/schema.go +++ b/pkg/util/typeutil/schema.go @@ -1555,22 +1555,34 @@ func CreateSparseFloatRowFromMap(input map[string]interface{}) ([]byte, error) { if ok1 && ok2 { // try format1 - for _, v1 := range jsonIndices { - if num1, suc1 := v1.(int); suc1 { - indices = append(indices, uint32(num1)) - } else { - if num2, suc2 := v1.(float64); suc2 && num2 == float64(int(num2)) { - indices = append(indices, uint32(num2)) + for _, idx := range jsonIndices { + if i1, s1 := idx.(int); s1 { + indices = append(indices, uint32(i1)) + } else if i2, s2 := idx.(float64); s2 && i2 == float64(int(i2)) { + indices = append(indices, uint32(i2)) + } else if i3, s3 := idx.(json.Number); s3 { + if num, err := strconv.ParseUint(i3.String(), 0, 32); err == nil { + indices = append(indices, uint32(num)) } else { - return nil, fmt.Errorf("invalid index type: %v(%s)", v1, reflect.TypeOf(v1)) + return nil, err } + } else { + return nil, fmt.Errorf("invalid indicies type: %v(%s)", idx, reflect.TypeOf(idx)) } } - for _, v2 := range jsonValues { - if num, ok := v2.(float64); ok { - values = append(values, float32(num)) + for _, val := range jsonValues { + if v1, s1 := val.(int); s1 { + values = append(values, float32(v1)) + } else if v2, s2 := val.(float64); s2 { + values = append(values, float32(v2)) + } else if v3, s3 := val.(json.Number); s3 { + if num, err := strconv.ParseFloat(v3.String(), 32); err == nil { + values = append(values, float32(num)) + } else { + return nil, err + } } else { - return nil, fmt.Errorf("invalid value type: %s", reflect.TypeOf(v2)) + return nil, fmt.Errorf("invalid values type: %v(%s)", val, reflect.TypeOf(val)) } } } else if !ok1 && !ok2 { diff --git a/pkg/util/typeutil/schema_test.go b/pkg/util/typeutil/schema_test.go index b1e5ec4b835f1..f487336b94c17 100644 --- a/pkg/util/typeutil/schema_test.go +++ b/pkg/util/typeutil/schema_test.go @@ -2133,6 +2133,20 @@ func TestParseJsonSparseFloatRow(t *testing.T) { assert.Equal(t, CreateSparseFloatRow([]uint32{1, 3, 5}, []float32{2.0, 1.0, 3.0}), res) }) + t.Run("valid row 3", func(t *testing.T) { + row := map[string]interface{}{"indices": []interface{}{1, 3, 5}, "values": []interface{}{1, 2, 3}} + res, err := CreateSparseFloatRowFromMap(row) + assert.NoError(t, err) + assert.Equal(t, CreateSparseFloatRow([]uint32{1, 3, 5}, []float32{1.0, 2.0, 3.0}), res) + }) + + t.Run("valid row 3", func(t *testing.T) { + row := map[string]interface{}{"indices": []interface{}{math.MaxInt32 + 1}, "values": []interface{}{1.0}} + res, err := CreateSparseFloatRowFromMap(row) + assert.NoError(t, err) + assert.Equal(t, CreateSparseFloatRow([]uint32{math.MaxInt32 + 1}, []float32{1.0}), res) + }) + t.Run("invalid row 1", func(t *testing.T) { row := map[string]interface{}{"indices": []interface{}{1, 3, 5}, "values": []interface{}{1.0, 2.0}} _, err := CreateSparseFloatRowFromMap(row) @@ -2235,6 +2249,20 @@ func TestParseJsonSparseFloatRowBytes(t *testing.T) { assert.Equal(t, CreateSparseFloatRow([]uint32{1, 3, 5}, []float32{2.0, 1.0, 3.0}), res) }) + t.Run("valid row 3", func(t *testing.T) { + row := []byte(`{"indices":[1, 3, 5], "values":[1, 2, 3]}`) + res, err := CreateSparseFloatRowFromJSON(row) + assert.NoError(t, err) + assert.Equal(t, CreateSparseFloatRow([]uint32{1, 3, 5}, []float32{1.0, 2.0, 3.0}), res) + }) + + t.Run("valid row 3", func(t *testing.T) { + row := []byte(`{"indices":[2147483648], "values":[1.0]}`) + res, err := CreateSparseFloatRowFromJSON(row) + assert.NoError(t, err) + assert.Equal(t, CreateSparseFloatRow([]uint32{math.MaxInt32 + 1}, []float32{1.0}), res) + }) + t.Run("invalid row 1", func(t *testing.T) { row := []byte(`{"indices":[1,3,5],"values":[1.0,2.0,3.0`) _, err := CreateSparseFloatRowFromJSON(row) From 155cb40fc911cf8c19a0f0967dde55896b6daeb6 Mon Sep 17 00:00:00 2001 From: "yihao.dai" Date: Thu, 23 May 2024 14:19:40 +0800 Subject: [PATCH 047/126] fix: Fix printing type of request (#33289) issue: https://github.com/milvus-io/milvus/issues/31705 Signed-off-by: bigsheeper --- internal/proxy/rate_limit_interceptor.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/internal/proxy/rate_limit_interceptor.go b/internal/proxy/rate_limit_interceptor.go index 61fe932e78bd9..49e3c11b803bb 100644 --- a/internal/proxy/rate_limit_interceptor.go +++ b/internal/proxy/rate_limit_interceptor.go @@ -19,7 +19,6 @@ package proxy import ( "context" "fmt" - "reflect" "strconv" "github.com/golang/protobuf/proto" @@ -206,7 +205,7 @@ func getRequestInfo(ctx context.Context, req interface{}) (int64, map[int64][]in if req == nil { return util.InvalidDBID, map[int64][]int64{}, 0, 0, fmt.Errorf("null request") } - return util.InvalidDBID, map[int64][]int64{}, 0, 0, fmt.Errorf("unsupported request type %s", reflect.TypeOf(req).Name()) + return util.InvalidDBID, map[int64][]int64{}, 0, 0, fmt.Errorf("unsupported request type %T", req) } } From 00cd88e1640e003efefe592338f64eafb93bf813 Mon Sep 17 00:00:00 2001 From: yanliang567 <82361606+yanliang567@users.noreply.github.com> Date: Thu, 23 May 2024 14:29:40 +0800 Subject: [PATCH 048/126] test: Add index names tests for mulitple vectors support (#33250) Related issue: https://github.com/milvus-io/milvus/issues/32653 1. Update index name tests 2. remove some time.sleep --------- Signed-off-by: yanliang567 --- .../python_client/base/collection_wrapper.py | 2 - .../testcases/test_collection.py | 59 ++- tests/python_client/testcases/test_index.py | 393 ++++-------------- tests/python_client/testcases/test_issues.py | 50 +-- tests/python_client/testcases/test_search.py | 3 - 5 files changed, 141 insertions(+), 366 deletions(-) diff --git a/tests/python_client/base/collection_wrapper.py b/tests/python_client/base/collection_wrapper.py index 39650957af4b4..43dc117191952 100644 --- a/tests/python_client/base/collection_wrapper.py +++ b/tests/python_client/base/collection_wrapper.py @@ -226,7 +226,6 @@ def hybrid_search(self, reqs, rerank, limit, partition_names=None, output_fields @trace() def query(self, expr, output_fields=None, partition_names=None, timeout=None, check_task=None, check_items=None, **kwargs): - # time.sleep(5) timeout = TIMEOUT if timeout is None else timeout func_name = sys._getframe().f_code.co_name @@ -240,7 +239,6 @@ def query(self, expr, output_fields=None, partition_names=None, timeout=None, ch @trace() def query_iterator(self, batch_size=1000, limit=-1, expr=None, output_fields=None, partition_names=None, timeout=None, check_task=None, check_items=None, **kwargs): - # time.sleep(5) timeout = TIMEOUT if timeout is None else timeout func_name = sys._getframe().f_code.co_name diff --git a/tests/python_client/testcases/test_collection.py b/tests/python_client/testcases/test_collection.py index 63c96bfff3b50..71084a07910d5 100644 --- a/tests/python_client/testcases/test_collection.py +++ b/tests/python_client/testcases/test_collection.py @@ -106,9 +106,11 @@ def test_collection_invalid_name(self, name): expected: raise exception """ self._connect() - error = {ct.err_code: 1, ct.err_msg: "Invalid collection name: {}".format(name)} - if name is not None and name.strip() == "": - error = {ct.err_code: 1, ct.err_msg: "collection name should not be empty"} + error = {ct.err_code: 999, ct.err_msg: f"Invalid collection name: {name}"} + if name in [None, ""]: + error = {ct.err_code: 999, ct.err_msg: f"`collection_name` value {name} is illegal"} + if name in [" "]: + error = {ct.err_code: 999, ct.err_msg: f"collection name should not be empty"} self.collection_wrap.init_collection(name, schema=default_schema, check_task=CheckTasks.err_res, check_items=error) @@ -161,8 +163,8 @@ def test_collection_dup_name_new_schema(self): check_items={exp_name: c_name, exp_schema: default_schema}) fields = [cf.gen_int64_field(is_primary=True)] schema = cf.gen_collection_schema(fields=fields) - error = {ct.err_code: 0, ct.err_msg: "The collection already exist, but the schema is not the same as the " - "schema passed in."} + error = {ct.err_code: 999, ct.err_msg: "The collection already exist, but the schema is not the same as the " + "schema passed in."} self.collection_wrap.init_collection(c_name, schema=schema, check_task=CheckTasks.err_res, check_items=error) @pytest.mark.tags(CaseLabel.L2) @@ -382,7 +384,7 @@ def test_collection_without_vectors(self): self._connect() c_name = cf.gen_unique_str(prefix) schema = cf.gen_collection_schema([cf.gen_int64_field(is_primary=True)]) - error = {ct.err_code: 0, ct.err_msg: "No vector field is found."} + error = {ct.err_code: 999, ct.err_msg: "No vector field is found."} self.collection_wrap.init_collection(c_name, schema=schema, check_task=CheckTasks.err_res, check_items=error) @pytest.mark.tags(CaseLabel.L1) @@ -428,7 +430,7 @@ def test_collection_invalid_is_primary(self, is_primary): check_task=CheckTasks.err_res, check_items=error) @pytest.mark.tags(CaseLabel.L2) - @pytest.mark.parametrize("primary_field", ["12-s", "12 s", "(mn)", "中文", "%$#", "a".join("a" for i in range(256))]) + @pytest.mark.parametrize("primary_field", ["12-s", "non_existing", "(mn)", "中文", None]) def test_collection_invalid_primary_field(self, primary_field): """ target: test collection with invalid primary_field @@ -437,12 +439,12 @@ def test_collection_invalid_primary_field(self, primary_field): """ self._connect() fields = [cf.gen_int64_field(), cf.gen_float_vec_field()] - error = {ct.err_code: 1, ct.err_msg: "Schema must have a primary key field."} + error = {ct.err_code: 999, ct.err_msg: "Schema must have a primary key field"} self.collection_schema_wrap.init_collection_schema(fields=fields, primary_field=primary_field, check_task=CheckTasks.err_res, check_items=error) @pytest.mark.tags(CaseLabel.L2) - @pytest.mark.parametrize("primary_field", [[], 1, [1, "2", 3], (1,), {1: 1}, None]) + @pytest.mark.parametrize("primary_field", [[], 1, [1, "2", 3], (1,), {1: 1}]) def test_collection_non_string_primary_field(self, primary_field): """ target: test collection with non-string primary_field @@ -451,25 +453,10 @@ def test_collection_non_string_primary_field(self, primary_field): """ self._connect() fields = [cf.gen_int64_field(), cf.gen_float_vec_field()] - error = {ct.err_code: 1, ct.err_msg: "Param primary_field must be str type."} + error = {ct.err_code: 999, ct.err_msg: "Param primary_field must be int or str type"} self.collection_schema_wrap.init_collection_schema(fields, primary_field=primary_field, check_task=CheckTasks.err_res, check_items=error) - @pytest.mark.tags(CaseLabel.L2) - def test_collection_not_existed_primary_field(self): - """ - target: test collection with not exist primary field - method: specify not existed field as primary_field - expected: raise exception - """ - self._connect() - fake_field = cf.gen_unique_str() - fields = [cf.gen_int64_field(), cf.gen_float_vec_field()] - error = {ct.err_code: 1, ct.err_msg: "Schema must have a primary key field."} - - self.collection_schema_wrap.init_collection_schema(fields, primary_field=fake_field, - check_task=CheckTasks.err_res, check_items=error) - @pytest.mark.tags(CaseLabel.L0) def test_collection_primary_in_schema(self): """ @@ -506,7 +493,7 @@ def test_collection_unsupported_primary_field(self, get_unsupported_primary_fiel self._connect() field = get_unsupported_primary_field vec_field = cf.gen_float_vec_field(name="vec") - error = {ct.err_code: 1, ct.err_msg: "Primary key type must be DataType.INT64 or DataType.VARCHAR."} + error = {ct.err_code: 999, ct.err_msg: "Primary key type must be DataType.INT64 or DataType.VARCHAR."} self.collection_schema_wrap.init_collection_schema(fields=[field, vec_field], primary_field=field.name, check_task=CheckTasks.err_res, check_items=error) @@ -520,7 +507,7 @@ def test_collection_multi_primary_fields(self): self._connect() int_field_one = cf.gen_int64_field(is_primary=True) int_field_two = cf.gen_int64_field(name="int2", is_primary=True) - error = {ct.err_code: 0, ct.err_msg: "Expected only one primary key field"} + error = {ct.err_code: 999, ct.err_msg: "Expected only one primary key field"} self.collection_schema_wrap.init_collection_schema( fields=[int_field_one, int_field_two, cf.gen_float_vec_field()], check_task=CheckTasks.err_res, check_items=error) @@ -536,7 +523,7 @@ def test_collection_primary_inconsistent(self): int_field_one = cf.gen_int64_field(is_primary=True) int_field_two = cf.gen_int64_field(name="int2") fields = [int_field_one, int_field_two, cf.gen_float_vec_field()] - error = {ct.err_code: 1, ct.err_msg: "Expected only one primary key field"} + error = {ct.err_code: 999, ct.err_msg: "Expected only one primary key field"} self.collection_schema_wrap.init_collection_schema(fields, primary_field=int_field_two.name, check_task=CheckTasks.err_res, check_items=error) @@ -597,7 +584,7 @@ def test_collection_auto_id_non_primary_field(self): expected: raise exception """ self._connect() - error = {ct.err_code: 0, ct.err_msg: "auto_id can only be specified on the primary key field"} + error = {ct.err_code: 999, ct.err_msg: "auto_id can only be specified on the primary key field"} self.field_schema_wrap.init_field_schema(name=ct.default_int64_field_name, dtype=DataType.INT64, auto_id=True, check_task=CheckTasks.err_res, check_items=error) @@ -616,19 +603,21 @@ def test_collection_auto_id_false_non_primary(self): assert not schema.auto_id @pytest.mark.tags(CaseLabel.L2) - @pytest.mark.xfail(reason="issue 24578") - def test_collection_auto_id_inconsistent(self): + @pytest.mark.xfail(reason="pymilvus issue, should use fieldschema as top priority") + @pytest.mark.parametrize("auto_id", [True, False]) + def test_collection_auto_id_inconsistent(self, auto_id): """ target: test collection auto_id with both collection schema and field schema method: 1.set primary field auto_id=True in field schema 2.set auto_id=False in collection schema expected: raise exception """ self._connect() - int_field = cf.gen_int64_field(is_primary=True, auto_id=True) + int_field = cf.gen_int64_field(is_primary=True, auto_id=auto_id) vec_field = cf.gen_float_vec_field(name='vec') + schema, _ = self.collection_schema_wrap.init_collection_schema([int_field, vec_field], auto_id=not auto_id) + collection_w = self.collection_wrap.init_collection(cf.gen_unique_str(prefix), schema=schema)[0] - schema, _ = self.collection_schema_wrap.init_collection_schema([int_field, vec_field], auto_id=False) - assert schema.auto_id + assert collection_w.schema.auto_id is auto_id @pytest.mark.tags(CaseLabel.L2) @pytest.mark.parametrize("auto_id", [True, False]) @@ -718,7 +707,7 @@ def test_collection_vector_invalid_dim(self, get_invalid_dim): self.collection_wrap.init_collection(c_name, schema=schema, check_task=CheckTasks.err_res, check_items=error) @pytest.mark.tags(CaseLabel.L2) - @pytest.mark.parametrize("dim", [-1, 0, 32769]) + @pytest.mark.parametrize("dim", [ct.min_dim-1, ct.max_dim+1]) def test_collection_vector_out_bounds_dim(self, dim): """ target: test collection with out of bounds dim diff --git a/tests/python_client/testcases/test_index.py b/tests/python_client/testcases/test_index.py index a91cd115c1b85..017ab6ff034db 100644 --- a/tests/python_client/testcases/test_index.py +++ b/tests/python_client/testcases/test_index.py @@ -210,12 +210,15 @@ def test_index_create_indexes_for_different_fields(self): """ target: Test create indexes for different fields method: create two different indexes with default index name - expected: create successfully + expected: create successfully, and the default index name equals to field name """ - collection_w = self.init_collection_general(prefix, True, is_index=False)[0] - default_index = {"index_type": "IVF_FLAT", "params": {"nlist": 128}, "metric_type": "L2"} + collection_w = self.init_collection_general(prefix, True, nb=200, is_index=False)[0] + default_index = ct.default_index collection_w.create_index(default_field_name, default_index) collection_w.create_index(ct.default_int64_field_name, {}) + assert len(collection_w.indexes) == 2 + for index in collection_w.indexes: + assert index.field_name == index.index_name @pytest.mark.tags(CaseLabel.L1) def test_index_create_on_scalar_field(self): @@ -224,7 +227,7 @@ def test_index_create_on_scalar_field(self): method: create index on scalar field and load expected: raise exception """ - collection_w = self.init_collection_general(prefix, True, is_index=False)[0] + collection_w = self.init_collection_general(prefix, True, nb=200, is_index=False)[0] collection_w.create_index(ct.default_int64_field_name, {}) collection_w.load(check_task=CheckTasks.err_res, check_items={ct.err_code: 65535, @@ -256,7 +259,6 @@ def test_index_collection_empty(self): c_name = cf.gen_unique_str(prefix) collection_w = self.init_collection_wrap(name=c_name) index, _ = self.index_wrap.init_index(collection_w.collection, default_field_name, default_index_params) - # TODO: assert index cf.assert_equal_index(index, collection_w.collection.indexes[0]) @pytest.mark.tags(CaseLabel.L1) @@ -273,7 +275,6 @@ def test_index_params(self, index_param): collection_w.insert(data=data) index_params = index_param index, _ = self.index_wrap.init_index(collection_w.collection, default_field_name, index_params) - # TODO: assert index cf.assert_equal_index(index, collection_w.collection.indexes[0]) @pytest.mark.tags(CaseLabel.L1) @@ -294,78 +295,76 @@ def test_index_params_flush(self): cf.assert_equal_index(index, collection_w.collection.indexes[0]) assert collection_w.num_entities == ct.default_nb - # TODO: not support @pytest.mark.tags(CaseLabel.L1) - @pytest.mark.skip(reason='not supported') def test_index_name_dup(self): """ target: test index with duplicate index name - method: create index with existed index name create by `collection.create_index` + method: create index with existed index name and different index params + expected: raise exception + create index with the same index name and same index params expected: no exception raised """ c_name = cf.gen_unique_str(prefix) index_name = ct.default_index_name collection_w = self.init_collection_wrap(name=c_name) - collection_w.collection.create_index(default_field_name, default_index_params, index_name=index_name) - self.index_wrap.init_index(collection_w.collection, default_field_name, default_index_params, + params = cf.get_index_params_params("HNSW") + index_params = {"index_type": "HNSW", "metric_type": "L2", "params": params} + params2 = cf.get_index_params_params("HNSW") + params2.update({"M": 16, "efConstruction": 200}) + index_params2 = {"index_type": "HNSW", "metric_type": "L2", "params": params2} + collection_w.collection.create_index(default_field_name, index_params, index_name=index_name) + + # create index with the same index name and different index params + error = {ct.err_code: 999, ct.err_msg: "at most one distinct index is allowed per field"} + self.index_wrap.init_index(collection_w.collection, default_field_name, index_params2, index_name=index_name, check_task=CheckTasks.err_res, - check_items={ct.err_code: 1, ct.err_msg: ""}) + check_items=error) + # create index with the same index name and same index params + self.index_wrap.init_index(collection_w.collection, default_field_name, index_params) - # TODO: server not supported @pytest.mark.tags(CaseLabel.L1) - @pytest.mark.skip(reason='not supported') - def test_index_field_names(self): + def test_index_same_name_on_diff_fields(self): """ - target: test index on one field, with two indexes - method: create index with two different indexes - expected: no exception raised + target: verify index with the same name on different fields is not supported + method: create index with index name A on fieldA, create index with index name A on fieldB + expected: raise exception """ - pass + # collection_w, _ = self.init_collection_general(prefix, dim=64, insert_data=False, is_index=False, + # multiple_dim_array=[32]) + id_field = cf.gen_int64_field(name="id", is_primary=True) + vec_field = cf.gen_float_vec_field(name="vec_field", dim=64) + vec_field2 = cf.gen_float_vec_field(name="vec_field2", dim=32) + str_field = cf.gen_string_field(name="str_field") + str_field2 = cf.gen_string_field(name="str_field2") + schema, _ = self.collection_schema_wrap.init_collection_schema([id_field, vec_field, vec_field2, str_field, str_field2]) + collection_w = self.init_collection_wrap(schema=schema) + vec_index = ct.default_index + vec_index_name = "my_index" - # TODO: server not supported - @pytest.mark.tags(CaseLabel.L1) - @pytest.mark.skip(reason='not supported') - def test_index_fields(self): - """ - target: test index on two fields, with the same name - method: create the same index name with two different fields - expected: exception raised - """ - pass + # create same index name on different vector fields + error = {ct.err_code: 999, ct.err_msg: "at most one distinct index is allowed per field"} + collection_w.create_index(vec_field.name, vec_index, index_name=vec_index_name) + collection_w.create_index(vec_field2.name, vec_index, index_name=vec_index_name, + check_task=CheckTasks.err_res, + check_items=error) - # TODO: server not supported - @pytest.mark.tags(CaseLabel.L1) - @pytest.mark.skip(reason='not supported') - def test_index_fields_B(self): - """ - target: test index on two fields, with the different name - method: create the different index with two different fields - expected: no exception raised - """ - pass + # create same index name on different scalar fields + collection_w.create_index(str_field.name, index_name=vec_index_name, + check_task=CheckTasks.err_res, + check_items=error) - # TODO: server not supported - @pytest.mark.tags(CaseLabel.L1) - @pytest.mark.skip(reason='not supported') - def test_index_field_names_eq_maximum(self): - """ - target: test index on one field, with the different names, num of the names equal to the maximum num supported - method: create the different indexes - expected: no exception raised - """ - pass + # create same salar index nae on different scalar fields + index_name = "scalar_index" + collection_w.create_index(str_field.name, index_name=index_name) + collection_w.create_index(str_field2.name, index_name=index_name, + check_task=CheckTasks.err_res, + check_items=error) + all_indexes = collection_w.indexes + assert len(all_indexes) == 2 + assert all_indexes[0].index_name != all_indexes[1].index_name + for index in all_indexes: + assert index.index_name in [vec_index_name, index_name] - # TODO: server not supported - @pytest.mark.tags(CaseLabel.L1) - @pytest.mark.skip(reason='not supported') - def test_index_field_names_more_maximum(self): - """ - target: test index on one field, with the different names, num of the names more than the maximum num supported - method: create the different indexes - expected: exception raised - """ - pass - @pytest.mark.tags(CaseLabel.L1) def test_index_drop_index(self): """ @@ -381,7 +380,6 @@ def test_index_drop_index(self): assert len(collection_w.indexes) == 0 @pytest.mark.tags(CaseLabel.L1) - # TODO #7372 def test_index_drop_repeatedly(self): """ target: test index.drop @@ -417,52 +415,6 @@ def test_index_drop_multi_collections(self): assert cf.assert_equal_index(index_2, cw2.collection.indexes[0]) assert len(cw.collection.indexes) == 0 - @pytest.mark.tags(CaseLabel.L2) - @pytest.mark.skip(reason='TODO') - def test_index_drop_during_inserting(self): - """ - target: test index.drop during inserting - method: create indexes by `index`, and then drop it during inserting entities, make sure async insert - expected: no exception raised, insert success - """ - pass - - @pytest.mark.tags(CaseLabel.L2) - @pytest.mark.skip(reason='TODO') - def test_index_drop_during_searching(self): - """ - target: test index.drop during searching - method: create indexes by `index`, and then drop it during searching, make sure async search - expected: no exception raised, search success - """ - pass - - @pytest.mark.tags(CaseLabel.L2) - @pytest.mark.skip(reason='TODO') - def test_index_recovery_after_restart(self): - """ - target: test index still existed after server restart - method: create index by `index`, and then restart server, assert index existed - expected: index in collection.indexes - """ - pass - - @pytest.mark.tags(CaseLabel.L2) - @pytest.mark.skip(reason='TODO') - def test_index_building_after_restart(self): - """ - target: index can still build if not finished before server restart - method: create index by `index`, and then restart server, assert server is indexing - expected: index build finished after server restart - """ - pass - - """ - ****************************************************************** - The following classes are copied from pymilvus test - ****************************************************************** - """ - @pytest.mark.tags(CaseLabel.GPU) class TestNewIndexBase(TestcaseBase): @@ -532,22 +484,10 @@ def test_create_index_non_existed_field(self): collection_w.create_index(ct.default_int8_field_name, default_index_params, index_name=ct.default_index_name, check_task=CheckTasks.err_res, - check_items={ct.err_code: 1, + check_items={ct.err_code: 999, ct.err_msg: "cannot create index on non-existed field: int8"} ) - @pytest.mark.tags(CaseLabel.L1) - def test_create_index_no_vectors(self): - """ - target: test create index interface - method: create collection and add entities in it, create index - expected: return success - """ - c_name = cf.gen_unique_str(prefix) - collection_w = self.init_collection_wrap(name=c_name) - collection_w.create_index(ct.default_float_vec_field_name, default_index_params, - index_name=ct.default_index_name) - @pytest.mark.tags(CaseLabel.L1) def test_create_index_partition(self): """ @@ -597,7 +537,7 @@ def test_create_index_without_connect(self): assert ct.default_alias not in res_list collection_w.create_index(ct.default_float_vec_field_name, ct.default_all_indexes_params, check_task=CheckTasks.err_res, - check_items={ct.err_code: 1, ct.err_msg: "should create connect first"}) + check_items={ct.err_code: 999, ct.err_msg: "should create connection first"}) @pytest.mark.tags(CaseLabel.L1) def test_create_index_search_with_query_vectors(self): @@ -686,7 +626,7 @@ def test_create_index_different_name(self): collection_w.create_index(ct.default_float_vec_field_name, default_index_params, index_name="a") collection_w.create_index(ct.default_float_vec_field_name, default_index_params, index_name="b", check_task=CheckTasks.err_res, - check_items={ct.err_code: 1, + check_items={ct.err_code: 999, ct.err_msg: "CreateIndex failed: creating multiple indexes on same field is not supported"}) @pytest.mark.tags(CaseLabel.L1) @@ -722,18 +662,6 @@ def test_create_index_ip(self): collection_w.insert(data=data) collection_w.create_index(ct.default_float_vec_field_name, default_ip_index_params) - @pytest.mark.tags(CaseLabel.L1) - def test_create_index_no_vectors_ip(self): - """ - target: test create index interface - method: create collection and add entities in it, create index - expected: return success - """ - c_name = cf.gen_unique_str(prefix) - collection_w = self.init_collection_wrap(name=c_name) - collection_w.create_index(ct.default_float_vec_field_name, default_ip_index_params, - index_name=ct.default_index_name) - @pytest.mark.tags(CaseLabel.L1) def test_create_index_partition_ip(self): """ @@ -750,7 +678,7 @@ def test_create_index_partition_ip(self): assert len(ins_res.primary_keys) == len(data[0]) collection_w.create_index(ct.default_float_vec_field_name, default_ip_index_params) - @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.tags(CaseLabel.L2) def test_create_index_partition_flush_ip(self): """ target: test create index @@ -810,7 +738,7 @@ def build(collection_w): for t in threads: t.join() - @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.tags(CaseLabel.L2) def test_create_index_no_vectors_insert_ip(self): """ target: test create index interface when there is no vectors in collection, @@ -841,23 +769,6 @@ def test_create_same_index_repeatedly_ip(self): collection_w.create_index(ct.default_float_vec_field_name, default_ip_index_params) assert len(collection_w.indexes) == 1 - @pytest.mark.tags(CaseLabel.L2) - def test_create_index_different_name_ip(self): - """ - target: check if index can be created repeatedly, with the same create_index params - method: create index after index have been built - expected: raise error - """ - c_name = cf.gen_unique_str(prefix) - collection_w = self.init_collection_wrap(name=c_name) - data = cf.gen_default_list_data(default_nb) - collection_w.insert(data=data) - collection_w.create_index(ct.default_float_vec_field_name, default_ip_index_params, index_name="a") - collection_w.create_index(ct.default_float_vec_field_name, default_ip_index_params, index_name="b", - check_task=CheckTasks.err_res, - check_items={ct.err_code: 1, - ct.err_msg: "CreateIndex failed: creating multiple indexes on same field is not supported"}) - @pytest.mark.tags(CaseLabel.L0) def test_create_different_index_repeatedly_ip(self): """ @@ -903,7 +814,6 @@ def test_drop_index(self, get_simple_index): assert len(collection_w.indexes) == 0 @pytest.mark.tags(CaseLabel.L2) - # TODO #7372 def test_drop_index_repeatedly(self, get_simple_index): """ target: test drop index repeatedly @@ -935,7 +845,7 @@ def test_drop_index_without_connect(self): index_name=ct.default_index_name) self.connection_wrap.remove_connection(ct.default_alias) collection_w.drop_index(index_name=ct.default_index_name, check_task=CheckTasks.err_res, - check_items={ct.err_code: 1, ct.err_msg: "should create connect first."}) + check_items={ct.err_code: 999, ct.err_msg: "should create connection first."}) @pytest.mark.tags(CaseLabel.L2) def test_create_drop_index_repeatedly(self, get_simple_index): @@ -954,76 +864,6 @@ def test_create_drop_index_repeatedly(self, get_simple_index): collection_w.drop_index(index_name=ct.default_index_name) assert len(collection_w.indexes) == 0 - @pytest.mark.tags(CaseLabel.L2) - def test_drop_index_ip(self, get_simple_index): - """ - target: test drop index interface - method: create collection and add entities in it, create index, call drop index - expected: return code 0, and default index param - """ - get_simple_index["metric_type"] = "IP" - c_name = cf.gen_unique_str(prefix) - collection_w = self.init_collection_wrap(name=c_name) - if get_simple_index["index_type"] != "FLAT": - collection_w.create_index(ct.default_float_vec_field_name, get_simple_index, - index_name=ct.default_index_name) - assert len(collection_w.indexes) == 1 - collection_w.drop_index(index_name=ct.default_index_name) - assert len(collection_w.indexes) == 0 - - @pytest.mark.tags(CaseLabel.L2) - def test_drop_index_repeatedly_ip(self, get_simple_index): - """ - target: test drop index repeatedly - method: create index, call drop index, and drop again - expected: return code 0 - """ - get_simple_index["metric_type"] = "IP" - c_name = cf.gen_unique_str(prefix) - collection_w = self.init_collection_wrap(name=c_name) - if get_simple_index["index_type"] != "FLAT": - collection_w.create_index(ct.default_float_vec_field_name, get_simple_index, - index_name=ct.default_index_name) - assert len(collection_w.indexes) == 1 - collection_w.drop_index(index_name=ct.default_index_name) - assert len(collection_w.indexes) == 0 - collection_w.drop_index(index_name=ct.default_index_name) - assert len(collection_w.indexes) == 0 - - @pytest.mark.tags(CaseLabel.L2) - def test_drop_index_without_connect_ip(self): - """ - target: test drop index without connection - method: drop index, and check if drop successfully - expected: raise exception - """ - - c_name = cf.gen_unique_str(prefix) - collection_w = self.init_collection_wrap(c_name) - collection_w.create_index(ct.default_float_vec_field_name, default_ip_index_params, - index_name=ct.default_index_name) - self.connection_wrap.remove_connection(ct.default_alias) - collection_w.drop_index(index_name=ct.default_index_name, check_task=CheckTasks.err_res, - check_items={ct.err_code: 1, ct.err_msg: "should create connect first."}) - - @pytest.mark.tags(CaseLabel.L2) - def test_create_drop_index_repeatedly_ip(self, get_simple_index): - """ - target: test create / drop index repeatedly, use the same index params - method: create index, drop index, four times - expected: return code 0 - """ - get_simple_index["metric_type"] = "IP" - c_name = cf.gen_unique_str(prefix) - collection_w = self.init_collection_wrap(c_name) - if get_simple_index["index_type"] != "FLAT": - for i in range(4): - collection_w.create_index(ct.default_float_vec_field_name, get_simple_index, - index_name=ct.default_index_name) - assert len(collection_w.indexes) == 1 - collection_w.drop_index(index_name=ct.default_index_name) - assert len(collection_w.indexes) == 0 - @pytest.mark.tags(CaseLabel.L0) def test_create_PQ_without_nbits(self): """ @@ -1059,8 +899,8 @@ def test_index_collection_with_after_load(self): expected: load and search successfully """ collection_w = self.init_collection_wrap(cf.gen_unique_str(prefix)) - nums = 20 - tmp_nb = 5000 + nums = 5 + tmp_nb = 1000 for i in range(nums): df = cf.gen_default_dataframe_data(nb=tmp_nb, start=i * tmp_nb) insert_res, _ = collection_w.insert(df) @@ -1179,10 +1019,6 @@ def test_rebuild_mmap_index(self): @pytest.mark.tags(CaseLabel.GPU) class TestNewIndexBinary(TestcaseBase): - def get_simple_index(self, request): - log.info(request.param) - return copy.deepcopy(request.param) - """ ****************************************************************** The following cases are used to test `create_index` function @@ -1190,7 +1026,6 @@ def get_simple_index(self, request): """ @pytest.mark.tags(CaseLabel.L2) - # @pytest.mark.timeout(BUILD_TIMEOUT) def test_create_binary_index_on_scalar_field(self): """ target: test create index interface @@ -1202,7 +1037,6 @@ def test_create_binary_index_on_scalar_field(self): assert collection_w.has_index(index_name=binary_field_name)[0] is True @pytest.mark.tags(CaseLabel.L0) - # @pytest.mark.timeout(BUILD_TIMEOUT) def test_create_index_partition(self): """ target: test create index interface @@ -1219,11 +1053,10 @@ def test_create_index_partition(self): assert len(ins_res.primary_keys) == len(df) collection_w.create_index(default_binary_vec_field_name, default_binary_index_params, index_name=binary_field_name) - assert collection_w.has_index(index_name=binary_field_name)[0] == True + assert collection_w.has_index(index_name=binary_field_name)[0] is True assert len(collection_w.indexes) == 1 @pytest.mark.tags(CaseLabel.L0) - # @pytest.mark.timeout(BUILD_TIMEOUT) def test_create_index_search_with_query_vectors(self): """ target: test create index interface, search with more query vectors @@ -1242,7 +1075,6 @@ def test_create_index_search_with_query_vectors(self): default_search_binary_params, default_limit, default_search_exp) - # @pytest.mark.timeout(BUILD_TIMEOUT) @pytest.mark.tags(CaseLabel.L2) def test_create_index_invalid_metric_type_binary(self): """ @@ -1352,52 +1184,29 @@ def scalar_index(self, request): def vector_data_type(self, request): yield request.param - @pytest.fixture( - scope="function", - params=gen_invalid_strs() - ) - def get_collection_name(self, request): + @pytest.fixture(scope="function", params=ct.invalid_resource_names) + def invalid_index_name(self, request): + if request.param in [None, "", " "]: + pytest.skip("None and empty is valid for there is a default index name") yield request.param @pytest.mark.tags(CaseLabel.L0) - def test_create_index_with_invalid_collection_name(self, connect, get_collection_name): + def test_index_with_invalid_index_name(self, connect, invalid_index_name): """ target: test create index interface for invalid scenario - method: create index with invalid collection name - expected: raise exception - """ - collection_name = get_collection_name - with pytest.raises(Exception) as e: - connect.create_index(collection_name, field_name, default_ivf_flat_index) - - @pytest.mark.tags(CaseLabel.L2) - def test_drop_index_with_invalid_collection_name(self, connect, get_collection_name): - """ - target: test drop index interface for invalid scenario - method: drop index with invalid collection name + method: + 1. create index with invalid collection name expected: raise exception + 2. drop index with an invalid index name + expected: succeed """ - collection_name = get_collection_name - with pytest.raises(Exception) as e: - connect.drop_index(collection_name) - - @pytest.fixture( - scope="function", - params=gen_invalid_index() - ) - def get_index(self, request): - yield request.param + collection_w = self.init_collection_wrap() + error = {ct.err_code: 999, ct.err_msg: f"Invalid index name: {invalid_index_name}"} + collection_w.create_index(ct.default_float_vec_field_name, default_index_params, index_name=invalid_index_name, + check_task=CheckTasks.err_res, check_items=error) - @pytest.mark.tags(CaseLabel.L2) - def test_create_index_with_invalid_index_params(self, connect, collection, get_index): - """ - target: test create index interface for invalid scenario - method: create index with invalid index params - expected: raise exception - """ - log.info(get_index) - with pytest.raises(Exception) as e: - connect.create_index(collection, field_name, get_index) + # drop index with an invalid index name + collection_w.drop_index(index_name=invalid_index_name) @pytest.mark.tags(CaseLabel.L1) def test_drop_index_without_release(self): @@ -1407,12 +1216,11 @@ def test_drop_index_without_release(self): 2. drop the index expected: raise exception """ - collection_w = self.init_collection_general(prefix, True, is_index=False)[0] - default_index = {"index_type": "IVF_FLAT", "params": {"nlist": 128}, "metric_type": "L2"} - collection_w.create_index("float_vector", default_index) + collection_w = self.init_collection_general(prefix, True, nb=100, is_index=False)[0] + collection_w.create_index(ct.default_float_vec_field_name, ct.default_index) collection_w.load() collection_w.drop_index(check_task=CheckTasks.err_res, - check_items={"err_code": 1, + check_items={"err_code": 999, "err_msg": "index cannot be dropped, collection is " "loaded, please release it first"}) @@ -1425,7 +1233,7 @@ def test_annoy_index_with_invalid_params(self, n_trees): 2. set annoy index param n_trees type invalid(not int) expected: raise exception """ - collection_w = self.init_collection_general(prefix, True, is_index=False)[0] + collection_w = self.init_collection_general(prefix, True, nb=100, is_index=False)[0] index_annoy = {"index_type": "ANNOY", "params": {"n_trees": n_trees}, "metric_type": "L2"} collection_w.create_index("float_vector", index_annoy, check_task=CheckTasks.err_res, @@ -1439,10 +1247,9 @@ def test_create_index_json(self): method: 1.create collection, and create index expected: create index raise an error """ - collection_w, _, _, insert_ids = self.init_collection_general(prefix, True, - dim=ct.default_dim, is_index=False)[0:4] + collection_w = self.init_collection_general(prefix, True, nb=100, is_index=False)[0] # create index on JSON/Array field is not supported - collection_w.create_index(ct.default_json_field_name, index_params=ct.default_flat_index, + collection_w.create_index(ct.default_json_field_name, check_task=CheckTasks.err_res, check_items={ct.err_code: 1100, ct.err_msg: "create index on JSON field is not supported"}) @@ -1454,9 +1261,8 @@ def test_create_scalar_index_on_vector_field(self, scalar_index, vector_data_typ method: 1.create collection, and create index expected: Raise exception """ - collection_w, _, _, insert_ids = self.init_collection_general(prefix, True, - dim=ct.default_dim, is_index=False, - vector_data_type=vector_data_type)[0:4] + collection_w = self.init_collection_general(prefix, True, nb=100, + is_index=False, vector_data_type=vector_data_type)[0] scalar_index_params = {"index_type": scalar_index} collection_w.create_index(ct.default_float_vec_field_name, index_params=scalar_index_params, check_task=CheckTasks.err_res, @@ -2191,22 +1997,6 @@ def build(collection_w): for t in threads: t.join() - @pytest.mark.skip(reason="diskann dim range is set to be [1, 32768)") - @pytest.mark.tags(CaseLabel.L1) - @pytest.mark.parametrize("dim", [2, 4, 8]) - def test_create_index_with_small_dim(self, dim): - """ - target: test create index with diskann - method: 1.create collection, when the dim of the vector Less than 8 - 2.create diskann index - expected: create index raise an error - """ - collection_w = self.init_collection_general(prefix, False, dim=dim, is_index=False)[0] - collection_w.create_index(default_float_vec_field_name, ct.default_diskann_index, - check_task=CheckTasks.err_res, - check_items={ct.err_code: 1, - ct.err_msg: "dim out of range: [8, 32768]"}) - @pytest.mark.tags(CaseLabel.L2) def test_diskann_enable_mmap(self): """ @@ -2227,7 +2017,6 @@ def test_diskann_enable_mmap(self): check_items={ct.err_code: 104, ct.err_msg: f"index type DISKANN does not support mmap"}) - @pytest.mark.tags(CaseLabel.GPU) class TestAutoIndex(TestcaseBase): """ Test case of Auto index """ diff --git a/tests/python_client/testcases/test_issues.py b/tests/python_client/testcases/test_issues.py index b579f10cadd8c..1dad8133ff23f 100644 --- a/tests/python_client/testcases/test_issues.py +++ b/tests/python_client/testcases/test_issues.py @@ -11,9 +11,8 @@ class TestIssues(TestcaseBase): @pytest.mark.tags(CaseLabel.L0) @pytest.mark.parametrize("par_key_field", [ct.default_int64_field_name]) - @pytest.mark.parametrize("index_on_par_key_field", [True]) @pytest.mark.parametrize("use_upsert", [True, False]) - def test_issue_30607(self, par_key_field, index_on_par_key_field, use_upsert): + def test_issue_30607(self, par_key_field, use_upsert): """ Method: 1. create a collection with partition key on collection schema with customized num_partitions @@ -50,27 +49,30 @@ def test_issue_30607(self, par_key_field, index_on_par_key_field, use_upsert): num_entities = collection_w.num_entities # build index collection_w.create_index(field_name=vector_field.name, index_params=ct.default_index) - if index_on_par_key_field: - collection_w.create_index(field_name=par_key_field, index_params={}) - # load - collection_w.load() - # verify the partition key values are bashed correctly - seeds = 200 - rand_ids = random.sample(range(0, num_entities), seeds) - rand_ids = [str(rand_ids[i]) for i in range(len(rand_ids))] - res = collection_w.query(expr=f"pk in {rand_ids}", output_fields=["pk", par_key_field]) - # verify every the random id exists - assert len(res) == len(rand_ids) + for index_on_par_key_field in [False, True]: + collection_w.release() + if index_on_par_key_field: + collection_w.create_index(field_name=par_key_field, index_params={}) + # load + collection_w.load() - dirty_count = 0 - for i in range(len(res)): - pk = res[i].get("pk") - parkey_value = res[i].get(par_key_field) - res_parkey = collection_w.query(expr=f"{par_key_field}=={parkey_value} and pk=='{pk}'", - output_fields=["pk", par_key_field]) - if len(res_parkey) != 1: - log.info(f"dirty data found: pk {pk} with parkey {parkey_value}") - dirty_count += 1 - assert dirty_count == 0 - log.info(f"check randomly {seeds}/{num_entities}, dirty count={dirty_count}") \ No newline at end of file + # verify the partition key values are bashed correctly + seeds = 200 + rand_ids = random.sample(range(0, num_entities), seeds) + rand_ids = [str(rand_ids[i]) for i in range(len(rand_ids))] + res = collection_w.query(expr=f"pk in {rand_ids}", output_fields=["pk", par_key_field]) + # verify every the random id exists + assert len(res) == len(rand_ids) + + dirty_count = 0 + for i in range(len(res)): + pk = res[i].get("pk") + parkey_value = res[i].get(par_key_field) + res_parkey = collection_w.query(expr=f"{par_key_field}=={parkey_value} and pk=='{pk}'", + output_fields=["pk", par_key_field]) + if len(res_parkey) != 1: + log.info(f"dirty data found: pk {pk} with parkey {parkey_value}") + dirty_count += 1 + assert dirty_count == 0 + log.info(f"check randomly {seeds}/{num_entities}, dirty count={dirty_count}") \ No newline at end of file diff --git a/tests/python_client/testcases/test_search.py b/tests/python_client/testcases/test_search.py index 3fb595afc1c58..829f3b7f86396 100644 --- a/tests/python_client/testcases/test_search.py +++ b/tests/python_client/testcases/test_search.py @@ -10097,7 +10097,6 @@ def test_search_group_by_default(self, index_type, metric, vector_data_type): collection_w.flush() collection_w.create_index(ct.default_float_vec_field_name, index_params=_index_params) - # time.sleep(10) collection_w.load() search_params = {"metric_type": metric, "params": {"ef": 128}} @@ -10214,7 +10213,6 @@ def test_search_group_by_with_field_indexed(self, grpby_field): collection_w.flush() collection_w.create_index(ct.default_float_vec_field_name, index_params=_index) collection_w.create_index(grpby_field) - time.sleep(30) collection_w.load() search_params = {"metric_type": metric, "params": {"ef": 128}} @@ -10507,7 +10505,6 @@ def test_range_search_not_support_group_by(self): collection_w.flush() collection_w.create_index(ct.default_float_vec_field_name, index_params=_index) - time.sleep(10) collection_w.load() nq = 1 From 5452376e904a5978243603923d68e3f4e065efc4 Mon Sep 17 00:00:00 2001 From: congqixia Date: Thu, 23 May 2024 14:33:40 +0800 Subject: [PATCH 049/126] fix: Remove task from syncmgr after task done (#33302) See also #33247 Introduced in PR #32865 Remove task after task done to keep checkpoint sound and safe Signed-off-by: Congqi Xia --- internal/datanode/syncmgr/sync_manager.go | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/internal/datanode/syncmgr/sync_manager.go b/internal/datanode/syncmgr/sync_manager.go index 1f4534c52f447..190a8e7655be2 100644 --- a/internal/datanode/syncmgr/sync_manager.go +++ b/internal/datanode/syncmgr/sync_manager.go @@ -121,7 +121,6 @@ func (mgr *syncManager) SyncData(ctx context.Context, task Task) *conc.Future[st func (mgr *syncManager) safeSubmitTask(task Task) *conc.Future[struct{}] { taskKey := fmt.Sprintf("%d-%d", task.SegmentID(), task.Checkpoint().GetTimestamp()) mgr.tasks.Insert(taskKey, task) - defer mgr.tasks.Remove(taskKey) key, err := task.CalcTargetSegment() if err != nil { @@ -133,6 +132,7 @@ func (mgr *syncManager) safeSubmitTask(task Task) *conc.Future[struct{}] { } func (mgr *syncManager) submit(key int64, task Task) *conc.Future[struct{}] { + taskKey := fmt.Sprintf("%d-%d", task.SegmentID(), task.Checkpoint().GetTimestamp()) handler := func(err error) error { if err == nil { return nil @@ -161,7 +161,10 @@ func (mgr *syncManager) submit(key int64, task Task) *conc.Future[struct{}] { return mgr.submit(targetID, task).Err() } log.Info("sync mgr sumbit task with key", zap.Int64("key", key)) - return mgr.Submit(key, task, handler) + return mgr.Submit(key, task, handler, func(err error) error { + mgr.tasks.Remove(taskKey) + return err + }) } func (mgr *syncManager) GetEarliestPosition(channel string) (int64, *msgpb.MsgPosition) { From 3bec2c4928f56c8875f0e18d2c84a6fc32016dac Mon Sep 17 00:00:00 2001 From: foxspy Date: Thu, 23 May 2024 16:57:41 +0800 Subject: [PATCH 050/126] enhance: Update Knowhere version (#33309) /kind branch-feature Signed-off-by: xianliang.li --- internal/core/thirdparty/knowhere/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/core/thirdparty/knowhere/CMakeLists.txt b/internal/core/thirdparty/knowhere/CMakeLists.txt index 9a146ffe273d1..cd14699c819f0 100644 --- a/internal/core/thirdparty/knowhere/CMakeLists.txt +++ b/internal/core/thirdparty/knowhere/CMakeLists.txt @@ -12,7 +12,7 @@ #------------------------------------------------------------------------------- # Update KNOWHERE_VERSION for the first occurrence -set( KNOWHERE_VERSION 89657b08 ) +set( KNOWHERE_VERSION abd4087 ) set( GIT_REPOSITORY "https://github.com/zilliztech/knowhere.git") message(STATUS "Knowhere repo: ${GIT_REPOSITORY}") message(STATUS "Knowhere version: ${KNOWHERE_VERSION}") From 6c186112bd9a88ff1dbb46c10223e6191a875fe3 Mon Sep 17 00:00:00 2001 From: zhuwenxing Date: Thu, 23 May 2024 17:57:40 +0800 Subject: [PATCH 051/126] test: add sparse vector datatype for import test (#33166) Signed-off-by: zhuwenxing --- .../python_client/common/bulk_insert_data.py | 38 ++- .../testcases/test_bulk_insert.py | 242 ++++++++++++++++++ 2 files changed, 272 insertions(+), 8 deletions(-) diff --git a/tests/python_client/common/bulk_insert_data.py b/tests/python_client/common/bulk_insert_data.py index 7a98a6c9f8213..a064efde97034 100644 --- a/tests/python_client/common/bulk_insert_data.py +++ b/tests/python_client/common/bulk_insert_data.py @@ -23,6 +23,7 @@ class DataField: pk_field = "uid" vec_field = "vectors" float_vec_field = "float_vectors" + sparse_vec_field = "sparse_vectors" image_float_vec_field = "image_float_vec_field" text_float_vec_field = "text_float_vec_field" binary_vec_field = "binary_vec_field" @@ -473,7 +474,22 @@ def gen_vectors(float_vector, rows, dim): return vectors -def gen_data_by_data_field(data_field, rows, start=0, float_vector=True, dim=128, array_length=None): +def gen_sparse_vectors(rows, sparse_format="dok"): + # default sparse format is dok, dict of keys + # another option is coo, coordinate List + + rng = np.random.default_rng() + vectors = [{ + d: rng.random() for d in random.sample(range(1000), random.randint(20, 30)) + } for _ in range(rows)] + if sparse_format == "coo": + vectors = [ + {"indices": list(x.keys()), "values": list(x.values())} for x in vectors + ] + return vectors + + +def gen_data_by_data_field(data_field, rows, start=0, float_vector=True, dim=128, array_length=None, sparse_format="dok"): if array_length is None: array_length = random.randint(0, 10) @@ -483,6 +499,9 @@ def gen_data_by_data_field(data_field, rows, start=0, float_vector=True, dim=128 if "float" in data_field: data = gen_vectors(float_vector=True, rows=rows, dim=dim) data = pd.Series([np.array(x, dtype=np.dtype("float32")) for x in data]) + elif "sparse" in data_field: + data = gen_sparse_vectors(rows, sparse_format=sparse_format) + data = pd.Series([json.dumps(x) for x in data], dtype=np.dtype("str")) elif "fp16" in data_field: data = gen_fp16_vectors(rows, dim)[1] data = pd.Series([np.array(x, dtype=np.dtype("uint8")) for x in data]) @@ -596,7 +615,7 @@ def gen_json_files(is_row_based, rows, dim, auto_id, str_pk, return files -def gen_dict_data_by_data_field(data_fields, rows, start=0, float_vector=True, dim=128, array_length=None, enable_dynamic_field=False): +def gen_dict_data_by_data_field(data_fields, rows, start=0, float_vector=True, dim=128, array_length=None, enable_dynamic_field=False, **kwargs): data = [] for r in range(rows): d = {} @@ -605,6 +624,9 @@ def gen_dict_data_by_data_field(data_fields, rows, start=0, float_vector=True, d if "float" in data_field: float_vector = True d[data_field] = gen_vectors(float_vector=float_vector, rows=1, dim=dim)[0] + if "sparse" in data_field: + sparse_format = kwargs.get("sparse_format", "dok") + d[data_field] = gen_sparse_vectors(1, sparse_format=sparse_format)[0] if "binary" in data_field: float_vector = False d[data_field] = gen_vectors(float_vector=float_vector, rows=1, dim=dim)[0] @@ -647,7 +669,7 @@ def gen_dict_data_by_data_field(data_fields, rows, start=0, float_vector=True, d return data -def gen_new_json_files(float_vector, rows, dim, data_fields, file_nums=1, array_length=None, file_size=None, err_type="", enable_dynamic_field=False): +def gen_new_json_files(float_vector, rows, dim, data_fields, file_nums=1, array_length=None, file_size=None, err_type="", enable_dynamic_field=False, **kwargs): files = [] if file_size is not None: rows = 5000 @@ -655,7 +677,7 @@ def gen_new_json_files(float_vector, rows, dim, data_fields, file_nums=1, array_ for i in range(file_nums): file_name = f"data-fields-{len(data_fields)}-rows-{rows}-dim-{dim}-file-num-{i}-{int(time.time())}.json" file = f"{data_source}/{file_name}" - data = gen_dict_data_by_data_field(data_fields=data_fields, rows=rows, start=start_uid, float_vector=float_vector, dim=dim, array_length=array_length, enable_dynamic_field=enable_dynamic_field) + data = gen_dict_data_by_data_field(data_fields=data_fields, rows=rows, start=start_uid, float_vector=float_vector, dim=dim, array_length=array_length, enable_dynamic_field=enable_dynamic_field, **kwargs) # log.info(f"data: {data}") with open(file, "w") as f: json.dump(data, f) @@ -762,7 +784,7 @@ def gen_dynamic_field_data_in_parquet_file(rows, start=0): return data -def gen_parquet_files(float_vector, rows, dim, data_fields, file_size=None, row_group_size=None, file_nums=1, array_length=None, err_type="", enable_dynamic_field=False, include_meta=True): +def gen_parquet_files(float_vector, rows, dim, data_fields, file_size=None, row_group_size=None, file_nums=1, array_length=None, err_type="", enable_dynamic_field=False, include_meta=True, sparse_format="doc"): # gen numpy files if err_type == "": err_type = "none" @@ -775,7 +797,7 @@ def gen_parquet_files(float_vector, rows, dim, data_fields, file_size=None, row_ all_field_data = {} for data_field in data_fields: data = gen_data_by_data_field(data_field=data_field, rows=rows, start=0, - float_vector=float_vector, dim=dim, array_length=array_length) + float_vector=float_vector, dim=dim, array_length=array_length, sparse_format=sparse_format) all_field_data[data_field] = data if enable_dynamic_field and include_meta: all_field_data["$meta"] = gen_dynamic_field_data_in_parquet_file(rows=rows, start=0) @@ -948,7 +970,7 @@ def prepare_bulk_insert_numpy_files(minio_endpoint="", bucket_name="milvus-bucke def prepare_bulk_insert_parquet_files(minio_endpoint="", bucket_name="milvus-bucket", rows=100, dim=128, array_length=None, file_size=None, row_group_size=None, - enable_dynamic_field=False, data_fields=[DataField.vec_field], float_vector=True, file_nums=1, force=False, include_meta=True): + enable_dynamic_field=False, data_fields=[DataField.vec_field], float_vector=True, file_nums=1, force=False, include_meta=True, sparse_format="doc"): """ Generate column based files based on params in parquet format and copy them to the minio Note: each field in data_fields would be generated one parquet file. @@ -980,7 +1002,7 @@ def prepare_bulk_insert_parquet_files(minio_endpoint="", bucket_name="milvus-buc """ files = gen_parquet_files(rows=rows, dim=dim, float_vector=float_vector, enable_dynamic_field=enable_dynamic_field, data_fields=data_fields, array_length=array_length, file_size=file_size, row_group_size=row_group_size, - file_nums=file_nums, include_meta=include_meta) + file_nums=file_nums, include_meta=include_meta, sparse_format=sparse_format) copy_files_to_minio(host=minio_endpoint, r_source=data_source, files=files, bucket_name=bucket_name, force=force) return files diff --git a/tests/python_client/testcases/test_bulk_insert.py b/tests/python_client/testcases/test_bulk_insert.py index e21b6a17206e8..1270efb82282f 100644 --- a/tests/python_client/testcases/test_bulk_insert.py +++ b/tests/python_client/testcases/test_bulk_insert.py @@ -1224,6 +1224,248 @@ def test_bulk_insert_all_field_with_parquet(self, auto_id, dim, entities, enable assert len(res) == len(query_data) if enable_partition_key: assert len(self.collection_wrap.partitions) > 1 + + @pytest.mark.tags(CaseLabel.L3) + @pytest.mark.parametrize("auto_id", [True, False]) + @pytest.mark.parametrize("dim", [128]) # 128 + @pytest.mark.parametrize("entities", [1000]) # 1000 + @pytest.mark.parametrize("enable_dynamic_field", [True, False]) + @pytest.mark.parametrize("include_meta", [True, False]) + @pytest.mark.parametrize("sparse_format", ["doc", "coo"]) + def test_bulk_insert_sparse_vector_with_parquet(self, auto_id, dim, entities, enable_dynamic_field, include_meta, sparse_format): + """ + collection schema 1: [pk, int64, float64, string float_vector] + data file: vectors.parquet and uid.parquet, + Steps: + 1. create collection + 2. import data + 3. verify + """ + if enable_dynamic_field is False and include_meta is True: + pytest.skip("include_meta only works with enable_dynamic_field") + fields = [ + cf.gen_int64_field(name=df.pk_field, is_primary=True, auto_id=auto_id), + cf.gen_int64_field(name=df.int_field), + cf.gen_float_field(name=df.float_field), + cf.gen_string_field(name=df.string_field), + cf.gen_json_field(name=df.json_field), + cf.gen_array_field(name=df.array_int_field, element_type=DataType.INT64), + cf.gen_array_field(name=df.array_float_field, element_type=DataType.FLOAT), + cf.gen_array_field(name=df.array_string_field, element_type=DataType.VARCHAR, max_length=100), + cf.gen_array_field(name=df.array_bool_field, element_type=DataType.BOOL), + cf.gen_float_vec_field(name=df.float_vec_field, dim=dim), + cf.gen_sparse_vec_field(name=df.sparse_vec_field), + ] + data_fields = [f.name for f in fields if not f.to_dict().get("auto_id", False)] + files = prepare_bulk_insert_parquet_files( + minio_endpoint=self.minio_endpoint, + bucket_name=self.bucket_name, + rows=entities, + dim=dim, + data_fields=data_fields, + enable_dynamic_field=enable_dynamic_field, + force=True, + include_meta=include_meta, + sparse_format=sparse_format + ) + self._connect() + c_name = cf.gen_unique_str("bulk_insert") + schema = cf.gen_collection_schema(fields=fields, auto_id=auto_id, enable_dynamic_field=enable_dynamic_field) + self.collection_wrap.init_collection(c_name, schema=schema) + + # import data + t0 = time.time() + task_id, _ = self.utility_wrap.do_bulk_insert( + collection_name=c_name, files=files + ) + logging.info(f"bulk insert task ids:{task_id}") + success, states = self.utility_wrap.wait_for_bulk_insert_tasks_completed( + task_ids=[task_id], timeout=300 + ) + tt = time.time() - t0 + log.info(f"bulk insert state:{success} in {tt} with states:{states}") + assert success + num_entities = self.collection_wrap.num_entities + log.info(f" collection entities: {num_entities}") + assert num_entities == entities + # verify imported data is available for search + index_params = ct.default_index + float_vec_fields = [f.name for f in fields if "vec" in f.name and "float" in f.name] + sparse_vec_fields = [f.name for f in fields if "vec" in f.name and "sparse" in f.name] + for f in float_vec_fields: + self.collection_wrap.create_index( + field_name=f, index_params=index_params + ) + for f in sparse_vec_fields: + self.collection_wrap.create_index( + field_name=f, index_params=ct.default_sparse_inverted_index + ) + self.collection_wrap.load() + log.info(f"wait for load finished and be ready for search") + time.sleep(2) + # log.info(f"query seg info: {self.utility_wrap.get_query_segment_info(c_name)[0]}") + search_data = cf.gen_vectors(1, dim) + search_params = ct.default_search_params + for field_name in float_vec_fields: + res, _ = self.collection_wrap.search( + search_data, + field_name, + param=search_params, + limit=1, + output_fields=["*"], + check_task=CheckTasks.check_search_results, + check_items={"nq": 1, "limit": 1}, + ) + for hit in res: + for r in hit: + fields_from_search = r.fields.keys() + for f in fields: + assert f.name in fields_from_search + if enable_dynamic_field and include_meta: + assert "name" in fields_from_search + assert "address" in fields_from_search + search_data = cf.gen_sparse_vectors(1, dim) + search_params = ct.default_sparse_search_params + for field_name in sparse_vec_fields: + res, _ = self.collection_wrap.search( + search_data, + field_name, + param=search_params, + limit=1, + output_fields=["*"], + check_task=CheckTasks.check_search_results, + check_items={"nq": 1, "limit": 1}, + ) + for hit in res: + for r in hit: + fields_from_search = r.fields.keys() + for f in fields: + assert f.name in fields_from_search + if enable_dynamic_field and include_meta: + assert "name" in fields_from_search + assert "address" in fields_from_search + + + @pytest.mark.tags(CaseLabel.L3) + @pytest.mark.parametrize("auto_id", [True, False]) + @pytest.mark.parametrize("dim", [128]) # 128 + @pytest.mark.parametrize("entities", [1000]) # 1000 + @pytest.mark.parametrize("enable_dynamic_field", [True, False]) + @pytest.mark.parametrize("include_meta", [True, False]) + @pytest.mark.parametrize("sparse_format", ["doc", "coo"]) + def test_bulk_insert_sparse_vector_with_json(self, auto_id, dim, entities, enable_dynamic_field, include_meta, sparse_format): + """ + collection schema 1: [pk, int64, float64, string float_vector] + data file: vectors.parquet and uid.parquet, + Steps: + 1. create collection + 2. import data + 3. verify + """ + if enable_dynamic_field is False and include_meta is True: + pytest.skip("include_meta only works with enable_dynamic_field") + fields = [ + cf.gen_int64_field(name=df.pk_field, is_primary=True, auto_id=auto_id), + cf.gen_int64_field(name=df.int_field), + cf.gen_float_field(name=df.float_field), + cf.gen_string_field(name=df.string_field), + cf.gen_json_field(name=df.json_field), + cf.gen_array_field(name=df.array_int_field, element_type=DataType.INT64), + cf.gen_array_field(name=df.array_float_field, element_type=DataType.FLOAT), + cf.gen_array_field(name=df.array_string_field, element_type=DataType.VARCHAR, max_length=100), + cf.gen_array_field(name=df.array_bool_field, element_type=DataType.BOOL), + cf.gen_float_vec_field(name=df.float_vec_field, dim=dim), + cf.gen_sparse_vec_field(name=df.sparse_vec_field), + ] + data_fields = [f.name for f in fields if not f.to_dict().get("auto_id", False)] + files = prepare_bulk_insert_new_json_files( + minio_endpoint=self.minio_endpoint, + bucket_name=self.bucket_name, + rows=entities, + dim=dim, + data_fields=data_fields, + enable_dynamic_field=enable_dynamic_field, + force=True, + include_meta=include_meta, + sparse_format=sparse_format + ) + self._connect() + c_name = cf.gen_unique_str("bulk_insert") + schema = cf.gen_collection_schema(fields=fields, auto_id=auto_id, enable_dynamic_field=enable_dynamic_field) + self.collection_wrap.init_collection(c_name, schema=schema) + + # import data + t0 = time.time() + task_id, _ = self.utility_wrap.do_bulk_insert( + collection_name=c_name, files=files + ) + logging.info(f"bulk insert task ids:{task_id}") + success, states = self.utility_wrap.wait_for_bulk_insert_tasks_completed( + task_ids=[task_id], timeout=300 + ) + tt = time.time() - t0 + log.info(f"bulk insert state:{success} in {tt} with states:{states}") + assert success + num_entities = self.collection_wrap.num_entities + log.info(f" collection entities: {num_entities}") + assert num_entities == entities + # verify imported data is available for search + index_params = ct.default_index + float_vec_fields = [f.name for f in fields if "vec" in f.name and "float" in f.name] + sparse_vec_fields = [f.name for f in fields if "vec" in f.name and "sparse" in f.name] + for f in float_vec_fields: + self.collection_wrap.create_index( + field_name=f, index_params=index_params + ) + for f in sparse_vec_fields: + self.collection_wrap.create_index( + field_name=f, index_params=ct.default_sparse_inverted_index + ) + self.collection_wrap.load() + log.info(f"wait for load finished and be ready for search") + time.sleep(2) + # log.info(f"query seg info: {self.utility_wrap.get_query_segment_info(c_name)[0]}") + search_data = cf.gen_vectors(1, dim) + search_params = ct.default_search_params + for field_name in float_vec_fields: + res, _ = self.collection_wrap.search( + search_data, + field_name, + param=search_params, + limit=1, + output_fields=["*"], + check_task=CheckTasks.check_search_results, + check_items={"nq": 1, "limit": 1}, + ) + for hit in res: + for r in hit: + fields_from_search = r.fields.keys() + for f in fields: + assert f.name in fields_from_search + if enable_dynamic_field and include_meta: + assert "name" in fields_from_search + assert "address" in fields_from_search + search_data = cf.gen_sparse_vectors(1, dim) + search_params = ct.default_sparse_search_params + for field_name in sparse_vec_fields: + res, _ = self.collection_wrap.search( + search_data, + field_name, + param=search_params, + limit=1, + output_fields=["*"], + check_task=CheckTasks.check_search_results, + check_items={"nq": 1, "limit": 1}, + ) + for hit in res: + for r in hit: + fields_from_search = r.fields.keys() + for f in fields: + assert f.name in fields_from_search + if enable_dynamic_field and include_meta: + assert "name" in fields_from_search + assert "address" in fields_from_search + @pytest.mark.tags(CaseLabel.L3) @pytest.mark.parametrize("auto_id", [True]) From c7be2ce33ae012c3af05167fddc3b09227ff9ea1 Mon Sep 17 00:00:00 2001 From: wei liu Date: Thu, 23 May 2024 18:15:41 +0800 Subject: [PATCH 052/126] enhance: Decrease bloom filter fp rate to reduce delete impact (#33301) when milvus process delete record, it need to find record's corresponded segment by bloom filter, and higher bloom filter fp rate will cause delete record forwards to wrong segments. This PR Decrease bloom filter's default fp to 0.001. Signed-off-by: Wei Liu --- configs/milvus.yaml | 2 +- pkg/util/paramtable/component_param.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/configs/milvus.yaml b/configs/milvus.yaml index 6cf78cc058e63..5410ecce75b15 100644 --- a/configs/milvus.yaml +++ b/configs/milvus.yaml @@ -612,7 +612,7 @@ common: ttMsgEnabled: true # Whether the instance disable sending ts messages traceLogMode: 0 # trace request info bloomFilterSize: 100000 # bloom filter initial size - maxBloomFalsePositive: 0.05 # max false positive rate for bloom filter + maxBloomFalsePositive: 0.001 # max false positive rate for bloom filter # QuotaConfig, configurations of Milvus quota and limits. # By default, we enable: diff --git a/pkg/util/paramtable/component_param.go b/pkg/util/paramtable/component_param.go index 8804240da4ef9..735987f994159 100644 --- a/pkg/util/paramtable/component_param.go +++ b/pkg/util/paramtable/component_param.go @@ -728,7 +728,7 @@ like the old password verification when updating the credential`, p.MaxBloomFalsePositive = ParamItem{ Key: "common.maxBloomFalsePositive", Version: "2.3.2", - DefaultValue: "0.05", + DefaultValue: "0.001", Doc: "max false positive rate for bloom filter", Export: true, } From 1b4e28b97f693ef13a2eb13d55e7e96ee1285883 Mon Sep 17 00:00:00 2001 From: aoiasd <45024769+aoiasd@users.noreply.github.com> Date: Thu, 23 May 2024 20:03:40 +0800 Subject: [PATCH 053/126] enhance: Check by proxy rate limiter when delete get data by query. (#30891) relate: https://github.com/milvus-io/milvus/issues/30927 --------- Signed-off-by: aoiasd --- configs/milvus.yaml | 13 +-- internal/proxy/impl.go | 7 ++ internal/proxy/impl_test.go | 23 +++--- internal/proxy/proxy.go | 6 +- internal/proxy/proxy_test.go | 2 +- internal/proxy/rate_limit_interceptor_test.go | 4 + internal/proxy/simple_rate_limiter.go | 17 +++- internal/proxy/simple_rate_limiter_test.go | 18 ++--- internal/proxy/task_delete.go | 22 ++++- internal/proxy/task_delete_test.go | 80 +++++++++++++++++++ internal/types/types.go | 1 + pkg/util/paramtable/quota_param.go | 30 +++++++ 12 files changed, 192 insertions(+), 31 deletions(-) diff --git a/configs/milvus.yaml b/configs/milvus.yaml index 5410ecce75b15..b526ad733703e 100644 --- a/configs/milvus.yaml +++ b/configs/milvus.yaml @@ -631,6 +631,14 @@ quotaAndLimits: # collects metrics from Proxies, Query cluster and Data cluster. # seconds, (0 ~ 65536) quotaCenterCollectInterval: 3 + limits: + allocRetryTimes: 15 # retry times when delete alloc forward data from rate limit failed + allocWaitInterval: 1000 # retry wait duration when delete alloc forward data rate failed, in millisecond + complexDeleteLimitEnable: false # whether complex delete check forward data by limiter + maxCollectionNum: 65536 + maxCollectionNumPerDB: 65536 + maxInsertSize: -1 # maximum size of a single insert request, in bytes, -1 means no limit + maxResourceGroupNumOfQueryNode: 1024 # maximum number of resource groups of query nodes ddl: enabled: false collectionRate: -1 # qps, default no limit, rate for CreateCollection, DropCollection, LoadCollection, ReleaseCollection @@ -711,11 +719,6 @@ quotaAndLimits: max: -1 # qps, default no limit partition: max: -1 # qps, default no limit - limits: - maxCollectionNum: 65536 - maxCollectionNumPerDB: 65536 - maxInsertSize: -1 # maximum size of a single insert request, in bytes, -1 means no limit - maxResourceGroupNumOfQueryNode: 1024 # maximum number of resource groups of query nodes limitWriting: # forceDeny false means dml requests are allowed (except for some # specific conditions, such as memory of nodes to water marker), true means always reject all dml requests. diff --git a/internal/proxy/impl.go b/internal/proxy/impl.go index 3947bd510aa95..340b21fa33300 100644 --- a/internal/proxy/impl.go +++ b/internal/proxy/impl.go @@ -42,6 +42,7 @@ import ( "github.com/milvus-io/milvus/internal/proto/proxypb" "github.com/milvus-io/milvus/internal/proto/querypb" "github.com/milvus-io/milvus/internal/proxy/connection" + "github.com/milvus-io/milvus/internal/types" "github.com/milvus-io/milvus/internal/util/hookutil" "github.com/milvus-io/milvus/internal/util/importutilv2" "github.com/milvus-io/milvus/pkg/common" @@ -2642,6 +2643,11 @@ func (node *Proxy) Delete(ctx context.Context, request *milvuspb.DeleteRequest) metrics.ProxyFunctionCall.WithLabelValues(strconv.FormatInt(paramtable.GetNodeID(), 10), method, metrics.TotalLabel, request.GetDbName(), request.GetCollectionName()).Inc() + var limiter types.Limiter + if node.enableComplexDeleteLimit { + limiter, _ = node.GetRateLimiter() + } + dr := &deleteRunner{ req: request, idAllocator: node.rowIDAllocator, @@ -2650,6 +2656,7 @@ func (node *Proxy) Delete(ctx context.Context, request *milvuspb.DeleteRequest) chTicker: node.chTicker, queue: node.sched.dmQueue, lb: node.lbPolicy, + limiter: limiter, } log.Debug("init delete runner in Proxy") diff --git a/internal/proxy/impl_test.go b/internal/proxy/impl_test.go index 9577ee9b6d80d..883e44136d778 100644 --- a/internal/proxy/impl_test.go +++ b/internal/proxy/impl_test.go @@ -80,7 +80,7 @@ func TestProxy_InvalidateCollectionMetaCache_remove_stream(t *testing.T) { func TestProxy_CheckHealth(t *testing.T) { t.Run("not healthy", func(t *testing.T) { node := &Proxy{session: &sessionutil.Session{SessionRaw: sessionutil.SessionRaw{ServerID: 1}}} - node.simpleLimiter = NewSimpleLimiter() + node.simpleLimiter = NewSimpleLimiter(0, 0) node.UpdateStateCode(commonpb.StateCode_Abnormal) ctx := context.Background() resp, err := node.CheckHealth(ctx, &milvuspb.CheckHealthRequest{}) @@ -98,7 +98,7 @@ func TestProxy_CheckHealth(t *testing.T) { dataCoord: NewDataCoordMock(), session: &sessionutil.Session{SessionRaw: sessionutil.SessionRaw{ServerID: 1}}, } - node.simpleLimiter = NewSimpleLimiter() + node.simpleLimiter = NewSimpleLimiter(0, 0) node.UpdateStateCode(commonpb.StateCode_Healthy) ctx := context.Background() resp, err := node.CheckHealth(ctx, &milvuspb.CheckHealthRequest{}) @@ -131,7 +131,7 @@ func TestProxy_CheckHealth(t *testing.T) { queryCoord: qc, dataCoord: dataCoordMock, } - node.simpleLimiter = NewSimpleLimiter() + node.simpleLimiter = NewSimpleLimiter(0, 0) node.UpdateStateCode(commonpb.StateCode_Healthy) ctx := context.Background() resp, err := node.CheckHealth(ctx, &milvuspb.CheckHealthRequest{}) @@ -148,7 +148,7 @@ func TestProxy_CheckHealth(t *testing.T) { dataCoord: NewDataCoordMock(), queryCoord: qc, } - node.simpleLimiter = NewSimpleLimiter() + node.simpleLimiter = NewSimpleLimiter(0, 0) node.UpdateStateCode(commonpb.StateCode_Healthy) resp, err := node.CheckHealth(context.Background(), &milvuspb.CheckHealthRequest{}) assert.NoError(t, err) @@ -243,7 +243,7 @@ func TestProxy_ResourceGroup(t *testing.T) { node, err := NewProxy(ctx, factory) assert.NoError(t, err) - node.simpleLimiter = NewSimpleLimiter() + node.simpleLimiter = NewSimpleLimiter(0, 0) node.UpdateStateCode(commonpb.StateCode_Healthy) qc := mocks.NewMockQueryCoordClient(t) @@ -335,7 +335,7 @@ func TestProxy_InvalidResourceGroupName(t *testing.T) { node, err := NewProxy(ctx, factory) assert.NoError(t, err) - node.simpleLimiter = NewSimpleLimiter() + node.simpleLimiter = NewSimpleLimiter(0, 0) node.UpdateStateCode(commonpb.StateCode_Healthy) qc := mocks.NewMockQueryCoordClient(t) @@ -936,7 +936,7 @@ func TestProxyCreateDatabase(t *testing.T) { node.tsoAllocator = ×tampAllocator{ tso: newMockTimestampAllocatorInterface(), } - node.simpleLimiter = NewSimpleLimiter() + node.simpleLimiter = NewSimpleLimiter(0, 0) node.UpdateStateCode(commonpb.StateCode_Healthy) node.sched, err = newTaskScheduler(ctx, node.tsoAllocator, node.factory) node.sched.ddQueue.setMaxTaskNum(10) @@ -996,7 +996,7 @@ func TestProxyDropDatabase(t *testing.T) { node.tsoAllocator = ×tampAllocator{ tso: newMockTimestampAllocatorInterface(), } - node.simpleLimiter = NewSimpleLimiter() + node.simpleLimiter = NewSimpleLimiter(0, 0) node.UpdateStateCode(commonpb.StateCode_Healthy) node.sched, err = newTaskScheduler(ctx, node.tsoAllocator, node.factory) node.sched.ddQueue.setMaxTaskNum(10) @@ -1055,7 +1055,7 @@ func TestProxyListDatabase(t *testing.T) { node.tsoAllocator = ×tampAllocator{ tso: newMockTimestampAllocatorInterface(), } - node.simpleLimiter = NewSimpleLimiter() + node.simpleLimiter = NewSimpleLimiter(0, 0) node.UpdateStateCode(commonpb.StateCode_Healthy) node.sched, err = newTaskScheduler(ctx, node.tsoAllocator, node.factory) node.sched.ddQueue.setMaxTaskNum(10) @@ -1111,7 +1111,7 @@ func TestProxyAlterDatabase(t *testing.T) { node.tsoAllocator = ×tampAllocator{ tso: newMockTimestampAllocatorInterface(), } - node.simpleLimiter = NewSimpleLimiter() + node.simpleLimiter = NewSimpleLimiter(0, 0) node.UpdateStateCode(commonpb.StateCode_Healthy) node.sched, err = newTaskScheduler(ctx, node.tsoAllocator, node.factory) node.sched.ddQueue.setMaxTaskNum(10) @@ -1164,7 +1164,7 @@ func TestProxyDescribeDatabase(t *testing.T) { node.tsoAllocator = ×tampAllocator{ tso: newMockTimestampAllocatorInterface(), } - node.simpleLimiter = NewSimpleLimiter() + node.simpleLimiter = NewSimpleLimiter(0, 0) node.UpdateStateCode(commonpb.StateCode_Healthy) node.sched, err = newTaskScheduler(ctx, node.tsoAllocator, node.factory) node.sched.ddQueue.setMaxTaskNum(10) @@ -1287,6 +1287,7 @@ func TestProxy_Delete(t *testing.T) { Expr: "pk in [1, 2, 3]", } cache := NewMockCache(t) + cache.EXPECT().GetDatabaseInfo(mock.Anything, mock.Anything).Return(&databaseInfo{dbID: 0}, nil) cache.On("GetCollectionID", mock.Anything, // context.Context mock.AnythingOfType("string"), diff --git a/internal/proxy/proxy.go b/internal/proxy/proxy.go index 22d3dfbb9bcd8..c0af10850aaa8 100644 --- a/internal/proxy/proxy.go +++ b/internal/proxy/proxy.go @@ -128,6 +128,9 @@ type Proxy struct { // materialized view enableMaterializedView bool + + // delete rate limiter + enableComplexDeleteLimit bool } // NewProxy returns a Proxy struct. @@ -146,7 +149,7 @@ func NewProxy(ctx context.Context, factory dependency.Factory) (*Proxy, error) { factory: factory, searchResultCh: make(chan *internalpb.SearchResults, n), shardMgr: mgr, - simpleLimiter: NewSimpleLimiter(), + simpleLimiter: NewSimpleLimiter(Params.QuotaConfig.AllocWaitInterval.GetAsDuration(time.Millisecond), Params.QuotaConfig.AllocRetryTimes.GetAsUint()), lbPolicy: lbPolicy, resourceManager: resourceManager, replicateStreamManager: replicateStreamManager, @@ -287,6 +290,7 @@ func (node *Proxy) Init() error { node.chTicker = newChannelsTimeTicker(node.ctx, Params.ProxyCfg.TimeTickInterval.GetAsDuration(time.Millisecond)/2, []string{}, node.sched.getPChanStatistics, tsoAllocator) log.Debug("create channels time ticker done", zap.String("role", typeutil.ProxyRole), zap.Duration("syncTimeTickInterval", syncTimeTickInterval)) + node.enableComplexDeleteLimit = Params.QuotaConfig.ComplexDeleteLimitEnable.GetAsBool() node.metricsCacheManager = metricsinfo.NewMetricsCacheManager() log.Debug("create metrics cache manager done", zap.String("role", typeutil.ProxyRole)) diff --git a/internal/proxy/proxy_test.go b/internal/proxy/proxy_test.go index 9877d1243e3ac..298abede0c7da 100644 --- a/internal/proxy/proxy_test.go +++ b/internal/proxy/proxy_test.go @@ -299,7 +299,7 @@ func (s *proxyTestServer) startGrpc(ctx context.Context, wg *sync.WaitGroup, p * ctx, cancel := context.WithCancel(ctx) defer cancel() - s.simpleLimiter = NewSimpleLimiter() + s.simpleLimiter = NewSimpleLimiter(0, 0) opts := tracer.GetInterceptorOpts() s.grpcServer = grpc.NewServer( diff --git a/internal/proxy/rate_limit_interceptor_test.go b/internal/proxy/rate_limit_interceptor_test.go index 5440123da630a..53db9ede78c99 100644 --- a/internal/proxy/rate_limit_interceptor_test.go +++ b/internal/proxy/rate_limit_interceptor_test.go @@ -50,6 +50,10 @@ func (l *limiterMock) Check(dbID int64, collectionIDToPartIDs map[int64][]int64, return nil } +func (l *limiterMock) Alloc(ctx context.Context, dbID int64, collectionIDToPartIDs map[int64][]int64, rt internalpb.RateType, n int) error { + return l.Check(dbID, collectionIDToPartIDs, rt, n) +} + func TestRateLimitInterceptor(t *testing.T) { t.Run("test getRequestInfo", func(t *testing.T) { mockCache := NewMockCache(t) diff --git a/internal/proxy/simple_rate_limiter.go b/internal/proxy/simple_rate_limiter.go index 0a6b721c46b94..e5979242064fb 100644 --- a/internal/proxy/simple_rate_limiter.go +++ b/internal/proxy/simple_rate_limiter.go @@ -21,6 +21,7 @@ import ( "fmt" "strconv" "sync" + "time" "go.uber.org/zap" @@ -35,6 +36,7 @@ import ( "github.com/milvus-io/milvus/pkg/util" "github.com/milvus-io/milvus/pkg/util/paramtable" "github.com/milvus-io/milvus/pkg/util/ratelimitutil" + "github.com/milvus-io/milvus/pkg/util/retry" "github.com/milvus-io/milvus/pkg/util/typeutil" ) @@ -42,15 +44,26 @@ import ( type SimpleLimiter struct { quotaStatesMu sync.RWMutex rateLimiter *rlinternal.RateLimiterTree + + // for alloc + allocWaitInterval time.Duration + allocRetryTimes uint } // NewSimpleLimiter returns a new SimpleLimiter. -func NewSimpleLimiter() *SimpleLimiter { +func NewSimpleLimiter(allocWaitInterval time.Duration, allocRetryTimes uint) *SimpleLimiter { rootRateLimiter := newClusterLimiter() - m := &SimpleLimiter{rateLimiter: rlinternal.NewRateLimiterTree(rootRateLimiter)} + m := &SimpleLimiter{rateLimiter: rlinternal.NewRateLimiterTree(rootRateLimiter), allocWaitInterval: allocWaitInterval, allocRetryTimes: allocRetryTimes} return m } +// Alloc will retry till check pass or out of times. +func (m *SimpleLimiter) Alloc(ctx context.Context, dbID int64, collectionIDToPartIDs map[int64][]int64, rt internalpb.RateType, n int) error { + return retry.Do(ctx, func() error { + return m.Check(dbID, collectionIDToPartIDs, rt, n) + }, retry.Sleep(m.allocWaitInterval), retry.Attempts(m.allocRetryTimes)) +} + // Check checks if request would be limited or denied. func (m *SimpleLimiter) Check(dbID int64, collectionIDToPartIDs map[int64][]int64, rt internalpb.RateType, n int) error { if !Params.QuotaConfig.QuotaAndLimitsEnabled.GetAsBool() { diff --git a/internal/proxy/simple_rate_limiter_test.go b/internal/proxy/simple_rate_limiter_test.go index c19253c3dbc40..d9f555b4a87ce 100644 --- a/internal/proxy/simple_rate_limiter_test.go +++ b/internal/proxy/simple_rate_limiter_test.go @@ -40,7 +40,7 @@ func TestSimpleRateLimiter(t *testing.T) { bak := Params.QuotaConfig.QuotaAndLimitsEnabled.GetValue() paramtable.Get().Save(Params.QuotaConfig.QuotaAndLimitsEnabled.Key, "true") - simpleLimiter := NewSimpleLimiter() + simpleLimiter := NewSimpleLimiter(0, 0) clusterRateLimiters := simpleLimiter.rateLimiter.GetRootLimiters() simpleLimiter.rateLimiter.GetOrCreateCollectionLimiters(0, collectionID, newDatabaseLimiter, @@ -83,7 +83,7 @@ func TestSimpleRateLimiter(t *testing.T) { t.Run("test global static limit", func(t *testing.T) { bak := Params.QuotaConfig.QuotaAndLimitsEnabled.GetValue() paramtable.Get().Save(Params.QuotaConfig.QuotaAndLimitsEnabled.Key, "true") - simpleLimiter := NewSimpleLimiter() + simpleLimiter := NewSimpleLimiter(0, 0) clusterRateLimiters := simpleLimiter.rateLimiter.GetRootLimiters() collectionIDToPartIDs := map[int64][]int64{ @@ -136,7 +136,7 @@ func TestSimpleRateLimiter(t *testing.T) { }) t.Run("not enable quotaAndLimit", func(t *testing.T) { - simpleLimiter := NewSimpleLimiter() + simpleLimiter := NewSimpleLimiter(0, 0) bak := Params.QuotaConfig.QuotaAndLimitsEnabled.GetValue() paramtable.Get().Save(Params.QuotaConfig.QuotaAndLimitsEnabled.Key, "false") for _, rt := range internalpb.RateType_value { @@ -150,7 +150,7 @@ func TestSimpleRateLimiter(t *testing.T) { run := func(insertRate float64) { bakInsertRate := Params.QuotaConfig.DMLMaxInsertRate.GetValue() paramtable.Get().Save(Params.QuotaConfig.DMLMaxInsertRate.Key, fmt.Sprintf("%f", insertRate)) - simpleLimiter := NewSimpleLimiter() + simpleLimiter := NewSimpleLimiter(0, 0) bak := Params.QuotaConfig.QuotaAndLimitsEnabled.GetValue() paramtable.Get().Save(Params.QuotaConfig.QuotaAndLimitsEnabled.Key, "true") err := simpleLimiter.Check(0, nil, internalpb.RateType_DMLInsert, 1*1024*1024) @@ -166,7 +166,7 @@ func TestSimpleRateLimiter(t *testing.T) { }) t.Run("test set rates", func(t *testing.T) { - simpleLimiter := NewSimpleLimiter() + simpleLimiter := NewSimpleLimiter(0, 0) zeroRates := getZeroCollectionRates() err := simpleLimiter.SetRates(newCollectionLimiterNode(map[int64]*proxypb.LimiterNode{ @@ -188,7 +188,7 @@ func TestSimpleRateLimiter(t *testing.T) { }) t.Run("test quota states", func(t *testing.T) { - simpleLimiter := NewSimpleLimiter() + simpleLimiter := NewSimpleLimiter(0, 0) err := simpleLimiter.SetRates(newCollectionLimiterNode(map[int64]*proxypb.LimiterNode{ 1: { // collection limiter @@ -257,7 +257,7 @@ func newCollectionLimiterNode(collectionLimiterNodes map[int64]*proxypb.LimiterN func TestRateLimiter(t *testing.T) { t.Run("test limit", func(t *testing.T) { - simpleLimiter := NewSimpleLimiter() + simpleLimiter := NewSimpleLimiter(0, 0) rootLimiters := simpleLimiter.rateLimiter.GetRootLimiters() for _, rt := range internalpb.RateType_value { rootLimiters.GetLimiters().Insert(internalpb.RateType(rt), ratelimitutil.NewLimiter(ratelimitutil.Limit(1000), 1)) @@ -273,7 +273,7 @@ func TestRateLimiter(t *testing.T) { }) t.Run("test setRates", func(t *testing.T) { - simpleLimiter := NewSimpleLimiter() + simpleLimiter := NewSimpleLimiter(0, 0) collectionRateLimiters := simpleLimiter.rateLimiter.GetOrCreateCollectionLimiters(0, int64(1), newDatabaseLimiter, func() *rlinternal.RateLimiterNode { @@ -336,7 +336,7 @@ func TestRateLimiter(t *testing.T) { }) t.Run("test get error code", func(t *testing.T) { - simpleLimiter := NewSimpleLimiter() + simpleLimiter := NewSimpleLimiter(0, 0) collectionRateLimiters := simpleLimiter.rateLimiter.GetOrCreateCollectionLimiters(0, int64(1), newDatabaseLimiter, func() *rlinternal.RateLimiterNode { diff --git a/internal/proxy/task_delete.go b/internal/proxy/task_delete.go index 8822410576dd2..7beaffadcbbea 100644 --- a/internal/proxy/task_delete.go +++ b/internal/proxy/task_delete.go @@ -231,9 +231,11 @@ type deleteRunner struct { idAllocator allocator.Interface tsoAllocatorIns tsoAllocator + limiter types.Limiter // delete info schema *schemaInfo + dbID UniqueID collectionID UniqueID partitionID UniqueID partitionKeyMode bool @@ -259,6 +261,13 @@ func (dr *deleteRunner) Init(ctx context.Context) error { if err := validateCollectionName(collName); err != nil { return ErrWithLog(log, "Invalid collection name", err) } + + db, err := globalMetaCache.GetDatabaseInfo(ctx, dr.req.GetDbName()) + if err != nil { + return err + } + dr.dbID = db.dbID + dr.collectionID, err = globalMetaCache.GetCollectionID(ctx, dr.req.GetDbName(), collName) if err != nil { return ErrWithLog(log, "Failed to get collection id", err) @@ -428,7 +437,7 @@ func (dr *deleteRunner) getStreamingQueryAndDelteFunc(plan *planpb.PlanNode) exe } taskCh := make(chan *deleteTask, 256) - go dr.receiveQueryResult(ctx, client, taskCh) + go dr.receiveQueryResult(ctx, client, taskCh, partitionIDs) var allQueryCnt int64 // wait all task finish for task := range taskCh { @@ -449,7 +458,7 @@ func (dr *deleteRunner) getStreamingQueryAndDelteFunc(plan *planpb.PlanNode) exe } } -func (dr *deleteRunner) receiveQueryResult(ctx context.Context, client querypb.QueryNode_QueryStreamClient, taskCh chan *deleteTask) { +func (dr *deleteRunner) receiveQueryResult(ctx context.Context, client querypb.QueryNode_QueryStreamClient, taskCh chan *deleteTask, partitionIDs []int64) { defer func() { close(taskCh) }() @@ -472,6 +481,15 @@ func (dr *deleteRunner) receiveQueryResult(ctx context.Context, client querypb.Q return } + if dr.limiter != nil { + err := dr.limiter.Alloc(ctx, dr.dbID, map[int64][]int64{dr.collectionID: partitionIDs}, internalpb.RateType_DMLDelete, proto.Size(result.GetIds())) + if err != nil { + dr.err = err + log.Warn("query stream for delete failed because rate limiter", zap.Int64("msgID", dr.msgID), zap.Error(err)) + return + } + } + task, err := dr.produce(ctx, result.GetIds()) if err != nil { dr.err = err diff --git a/internal/proxy/task_delete_test.go b/internal/proxy/task_delete_test.go index 4c973b803c476..657029001952f 100644 --- a/internal/proxy/task_delete_test.go +++ b/internal/proxy/task_delete_test.go @@ -118,6 +118,7 @@ func TestDeleteTask_GetChannels(t *testing.T) { mock.AnythingOfType("string"), mock.AnythingOfType("string"), ).Return(collectionID, nil) + globalMetaCache = cache chMgr := NewMockChannelsMgr(t) chMgr.EXPECT().getChannels(mock.Anything).Return(channels, nil) @@ -265,6 +266,19 @@ func TestDeleteRunner_Init(t *testing.T) { assert.Error(t, dr.Init(context.Background())) }) + t.Run("fail to get database info", func(t *testing.T) { + dr := deleteRunner{ + req: &milvuspb.DeleteRequest{ + CollectionName: collectionName, + }, + } + cache := NewMockCache(t) + cache.EXPECT().GetDatabaseInfo(mock.Anything, mock.Anything).Return(nil, fmt.Errorf("mock error")) + globalMetaCache = cache + + assert.Error(t, dr.Init(context.Background())) + }) + t.Run("fail to get collection id", func(t *testing.T) { dr := deleteRunner{ req: &milvuspb.DeleteRequest{ @@ -272,11 +286,13 @@ func TestDeleteRunner_Init(t *testing.T) { }, } cache := NewMockCache(t) + cache.EXPECT().GetDatabaseInfo(mock.Anything, mock.Anything).Return(&databaseInfo{dbID: 0}, nil) cache.On("GetCollectionID", mock.Anything, // context.Context mock.AnythingOfType("string"), mock.AnythingOfType("string"), ).Return(int64(0), errors.New("mock GetCollectionID err")) + globalMetaCache = cache assert.Error(t, dr.Init(context.Background())) }) @@ -287,6 +303,7 @@ func TestDeleteRunner_Init(t *testing.T) { DbName: dbName, }} cache := NewMockCache(t) + cache.EXPECT().GetDatabaseInfo(mock.Anything, mock.Anything).Return(&databaseInfo{dbID: 0}, nil) cache.On("GetCollectionID", mock.Anything, // context.Context mock.AnythingOfType("string"), @@ -309,6 +326,7 @@ func TestDeleteRunner_Init(t *testing.T) { PartitionName: partitionName, }} cache := NewMockCache(t) + cache.EXPECT().GetDatabaseInfo(mock.Anything, mock.Anything).Return(&databaseInfo{dbID: 0}, nil) cache.On("GetCollectionID", mock.Anything, // context.Context mock.AnythingOfType("string"), @@ -347,6 +365,7 @@ func TestDeleteRunner_Init(t *testing.T) { }, } cache := NewMockCache(t) + cache.EXPECT().GetDatabaseInfo(mock.Anything, mock.Anything).Return(&databaseInfo{dbID: 0}, nil) cache.On("GetCollectionID", mock.Anything, // context.Context mock.AnythingOfType("string"), @@ -372,6 +391,7 @@ func TestDeleteRunner_Init(t *testing.T) { }, } cache := NewMockCache(t) + cache.EXPECT().GetDatabaseInfo(mock.Anything, mock.Anything).Return(&databaseInfo{dbID: 0}, nil) cache.On("GetCollectionID", mock.Anything, // context.Context mock.AnythingOfType("string"), @@ -405,6 +425,7 @@ func TestDeleteRunner_Init(t *testing.T) { chMgr: chMgr, } cache := NewMockCache(t) + cache.EXPECT().GetDatabaseInfo(mock.Anything, mock.Anything).Return(&databaseInfo{dbID: 0}, nil) cache.On("GetCollectionID", mock.Anything, // context.Context mock.AnythingOfType("string"), @@ -656,6 +677,65 @@ func TestDeleteRunner_Run(t *testing.T) { assert.Error(t, dr.Run(ctx)) }) + t.Run("complex delete rate limit check failed", func(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + mockMgr := NewMockChannelsMgr(t) + qn := mocks.NewMockQueryNodeClient(t) + lb := NewMockLBPolicy(t) + + dr := deleteRunner{ + chMgr: mockMgr, + queue: queue.dmQueue, + schema: schema, + collectionID: collectionID, + partitionID: partitionID, + vChannels: channels, + idAllocator: idAllocator, + tsoAllocatorIns: tsoAllocator, + lb: lb, + limiter: &limiterMock{}, + result: &milvuspb.MutationResult{ + Status: merr.Success(), + IDs: &schemapb.IDs{ + IdField: nil, + }, + }, + req: &milvuspb.DeleteRequest{ + CollectionName: collectionName, + PartitionName: partitionName, + DbName: dbName, + Expr: "pk < 3", + }, + } + lb.EXPECT().Execute(mock.Anything, mock.Anything).Call.Return(func(ctx context.Context, workload CollectionWorkLoad) error { + return workload.exec(ctx, 1, qn, "") + }) + + qn.EXPECT().QueryStream(mock.Anything, mock.Anything).Call.Return( + func(ctx context.Context, in *querypb.QueryRequest, opts ...grpc.CallOption) querypb.QueryNode_QueryStreamClient { + client := streamrpc.NewLocalQueryClient(ctx) + server := client.CreateServer() + + server.Send(&internalpb.RetrieveResults{ + Status: merr.Success(), + Ids: &schemapb.IDs{ + IdField: &schemapb.IDs_IntId{ + IntId: &schemapb.LongArray{ + Data: []int64{0, 1, 2}, + }, + }, + }, + }) + server.FinishSend(nil) + return client + }, nil) + + assert.Error(t, dr.Run(ctx)) + assert.Equal(t, int64(0), dr.result.DeleteCnt) + }) + t.Run("complex delete produce failed", func(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() diff --git a/internal/types/types.go b/internal/types/types.go index 27acc7cac3d1b..93c85dc9e79ef 100644 --- a/internal/types/types.go +++ b/internal/types/types.go @@ -38,6 +38,7 @@ import ( // Otherwise, the request will pass. Limit also returns limit of limiter. type Limiter interface { Check(dbID int64, collectionIDToPartIDs map[int64][]int64, rt internalpb.RateType, n int) error + Alloc(ctx context.Context, dbID int64, collectionIDToPartIDs map[int64][]int64, rt internalpb.RateType, n int) error } // Component is the interface all services implement diff --git a/pkg/util/paramtable/quota_param.go b/pkg/util/paramtable/quota_param.go index 33996a7b77dc7..3cc550e9e6eee 100644 --- a/pkg/util/paramtable/quota_param.go +++ b/pkg/util/paramtable/quota_param.go @@ -45,6 +45,9 @@ const ( type quotaConfig struct { QuotaAndLimitsEnabled ParamItem `refreshable:"false"` QuotaCenterCollectInterval ParamItem `refreshable:"false"` + AllocRetryTimes ParamItem `refreshable:"false"` + AllocWaitInterval ParamItem `refreshable:"false"` + ComplexDeleteLimitEnable ParamItem `refreshable:"false"` // ddl DDLLimitEnabled ParamItem `refreshable:"true"` @@ -2021,6 +2024,33 @@ MB/s, default no limit`, Export: true, } p.CoolOffSpeed.Init(base.mgr) + + p.AllocRetryTimes = ParamItem{ + Key: "quotaAndLimits.limits.allocRetryTimes", + Version: "2.4.0", + DefaultValue: "15", + Doc: `retry times when delete alloc forward data from rate limit failed`, + Export: true, + } + p.AllocRetryTimes.Init(base.mgr) + + p.AllocWaitInterval = ParamItem{ + Key: "quotaAndLimits.limits.allocWaitInterval", + Version: "2.4.0", + DefaultValue: "1000", + Doc: `retry wait duration when delete alloc forward data rate failed, in millisecond`, + Export: true, + } + p.AllocWaitInterval.Init(base.mgr) + + p.ComplexDeleteLimitEnable = ParamItem{ + Key: "quotaAndLimits.limits.complexDeleteLimitEnable", + Version: "2.4.0", + DefaultValue: "false", + Doc: `whether complex delete check forward data by limiter`, + Export: true, + } + p.ComplexDeleteLimitEnable.Init(base.mgr) } func megaBytes2Bytes(f float64) float64 { From b391781a2e49b46b0192cbdce1297e5f7c0caa1a Mon Sep 17 00:00:00 2001 From: sre-ci-robot <56469371+sre-ci-robot@users.noreply.github.com> Date: Fri, 24 May 2024 01:43:40 +0800 Subject: [PATCH 054/126] [automated] Update Knowhere Commit (#33340) Update Knowhere Commit Signed-off-by: sre-ci-robot sre-ci-robot@users.noreply.github.com Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> --- internal/core/thirdparty/knowhere/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/core/thirdparty/knowhere/CMakeLists.txt b/internal/core/thirdparty/knowhere/CMakeLists.txt index cd14699c819f0..1b0bdc0911500 100644 --- a/internal/core/thirdparty/knowhere/CMakeLists.txt +++ b/internal/core/thirdparty/knowhere/CMakeLists.txt @@ -12,7 +12,7 @@ #------------------------------------------------------------------------------- # Update KNOWHERE_VERSION for the first occurrence -set( KNOWHERE_VERSION abd4087 ) +set( KNOWHERE_VERSION 1f51ea4e ) set( GIT_REPOSITORY "https://github.com/zilliztech/knowhere.git") message(STATUS "Knowhere repo: ${GIT_REPOSITORY}") message(STATUS "Knowhere version: ${KNOWHERE_VERSION}") From 592d701617f361a25c26377c0a495f576856d0a8 Mon Sep 17 00:00:00 2001 From: "yihao.dai" Date: Fri, 24 May 2024 09:05:47 +0800 Subject: [PATCH 055/126] fix: Fix global rate limit is not working (#33335) If the request is limited by rate limiter, limiter should not "Cancel". This is because, if limited, tokens are not deducted; instead, "Cancel" operation would increase the token count. issue: https://github.com/milvus-io/milvus/issues/31705 Signed-off-by: bigsheeper --- internal/proxy/simple_rate_limiter.go | 1 - 1 file changed, 1 deletion(-) diff --git a/internal/proxy/simple_rate_limiter.go b/internal/proxy/simple_rate_limiter.go index e5979242064fb..1803de81e6a14 100644 --- a/internal/proxy/simple_rate_limiter.go +++ b/internal/proxy/simple_rate_limiter.go @@ -78,7 +78,6 @@ func (m *SimpleLimiter) Check(dbID int64, collectionIDToPartIDs map[int64][]int6 ret := clusterRateLimiters.Check(rt, n) if ret != nil { - clusterRateLimiters.Cancel(rt, n) return ret } From 7730b910b9f46be6ce2c7b881ccedd34db440cfc Mon Sep 17 00:00:00 2001 From: "yihao.dai" Date: Fri, 24 May 2024 09:07:41 +0800 Subject: [PATCH 056/126] enhance: Decouple compaction from shard (#33138) Decouple compaction from shard, remove dependencies on shards (e.g. SyncSegments, injection). issue: https://github.com/milvus-io/milvus/issues/32809 --------- Signed-off-by: bigsheeper --- go.sum | 2 + internal/datacoord/compaction.go | 48 ++-- internal/datacoord/compaction_scheduler.go | 95 ++++--- .../datacoord/compaction_scheduler_test.go | 50 ++-- internal/datacoord/compaction_test.go | 41 +-- internal/datacoord/compaction_trigger.go | 25 +- internal/datacoord/compaction_trigger_test.go | 41 +-- internal/datacoord/compaction_trigger_v2.go | 35 ++- .../datacoord/compaction_trigger_v2_test.go | 16 +- .../datacoord/mock_compaction_plan_context.go | 33 +-- internal/datacoord/server.go | 2 +- internal/datanode/compaction/compactor.go | 2 +- internal/datanode/compaction/mix_compactor.go | 76 ++---- .../datanode/compaction/mix_compactor_test.go | 99 ++------ internal/datanode/compaction_executor.go | 14 +- internal/datanode/l0_compactor.go | 37 ++- internal/datanode/l0_compactor_test.go | 209 ++++++++------- internal/datanode/services.go | 65 +---- internal/datanode/services_test.go | 200 +-------------- .../datanode/syncmgr/mock_sync_manager.go | 74 +----- internal/datanode/syncmgr/sync_manager.go | 18 +- .../datanode/syncmgr/sync_manager_test.go | 46 ---- internal/proto/data_coord.proto | 1 + .../integration/compaction/compaction_test.go | 47 ++++ .../compaction/l0_compaction_test.go | 238 ++++++++++++++++++ .../compaction/mix_compaction_test.go | 205 +++++++++++++++ 26 files changed, 843 insertions(+), 876 deletions(-) create mode 100644 tests/integration/compaction/compaction_test.go create mode 100644 tests/integration/compaction/l0_compaction_test.go create mode 100644 tests/integration/compaction/mix_compaction_test.go diff --git a/go.sum b/go.sum index 20a4faf084194..25be847bbe9f7 100644 --- a/go.sum +++ b/go.sum @@ -290,6 +290,7 @@ github.com/go-latex/latex v0.0.0-20210118124228-b3d85cf34e07/go.mod h1:CO1AlKB2C github.com/go-logfmt/logfmt v0.3.0/go.mod h1:Qt1PoO58o5twSAckw1HlFXLmHsOX5/0LbT9GBnD5lWE= github.com/go-logfmt/logfmt v0.4.0/go.mod h1:3RMwSq7FuexP4Kalkev3ejPJsZTpXXBr9+V4qmtdjCk= github.com/go-logfmt/logfmt v0.5.0/go.mod h1:wCYkCAKZfumFQihp8CzCvQ3paCTfi41vtzG1KdI/P7A= +github.com/go-logfmt/logfmt v0.5.1/go.mod h1:WYhtIu8zTZfxdn5+rREduYbwxfcBr/Vr6KEVveWlfTs= github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= github.com/go-logr/logr v1.3.0 h1:2y3SDp0ZXuc6/cjLSZ+Q3ir+QB9T/iG5yYRXqsagWSY= github.com/go-logr/logr v1.3.0/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= @@ -516,6 +517,7 @@ github.com/kataras/iris/v12 v12.1.8/go.mod h1:LMYy4VlP67TQ3Zgriz8RE2h2kMZV2SgMYb github.com/kataras/neffos v0.0.14/go.mod h1:8lqADm8PnbeFfL7CLXh1WHw53dG27MC3pgi2R1rmoTE= github.com/kataras/pio v0.0.2/go.mod h1:hAoW0t9UmXi4R5Oyq5Z4irTbaTsOemSrDGUtaTl7Dro= github.com/kataras/sitemap v0.0.5/go.mod h1:KY2eugMKiPwsJgx7+U103YZehfvNGOXURubcGyk0Bz8= +github.com/keybase/go-keychain v0.0.0-20190712205309-48d3d31d256d/go.mod h1:JJNrCn9otv/2QP4D7SMJBgaleKpOf66PnW6F5WGNRIc= github.com/kisielk/errcheck v1.1.0/go.mod h1:EZBBE59ingxPouuu3KfxchcWSUPOHkagtvWXihfKN4Q= github.com/kisielk/errcheck v1.2.0/go.mod h1:/BMXB+zMLi60iA8Vv6Ksmxu/1UDYcXs4uQLJ+jE2L00= github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= diff --git a/internal/datacoord/compaction.go b/internal/datacoord/compaction.go index 87b3fdbe2cd7c..c9919d7b821ea 100644 --- a/internal/datacoord/compaction.go +++ b/internal/datacoord/compaction.go @@ -45,11 +45,12 @@ const ( tsTimeout = uint64(1) ) +//go:generate mockery --name=compactionPlanContext --structname=MockCompactionPlanContext --output=./ --filename=mock_compaction_plan_context.go --with-expecter --inpackage type compactionPlanContext interface { start() stop() // execCompactionPlan start to execute plan and return immediately - execCompactionPlan(signal *compactionSignal, plan *datapb.CompactionPlan) error + execCompactionPlan(signal *compactionSignal, plan *datapb.CompactionPlan) // getCompaction return compaction task. If planId does not exist, return nil. getCompaction(planID int64) *compactionTask // updateCompaction set the compaction state to timeout or completed @@ -277,14 +278,8 @@ func (c *compactionPlanHandler) updateTask(planID int64, opts ...compactionTaskO } } -func (c *compactionPlanHandler) enqueuePlan(signal *compactionSignal, plan *datapb.CompactionPlan) error { - nodeID, err := c.chManager.FindWatcher(plan.GetChannel()) - if err != nil { - log.Error("failed to find watcher", zap.Int64("planID", plan.GetPlanID()), zap.Error(err)) - return err - } - - log := log.With(zap.Int64("planID", plan.GetPlanID()), zap.Int64("nodeID", nodeID)) +func (c *compactionPlanHandler) enqueuePlan(signal *compactionSignal, plan *datapb.CompactionPlan) { + log := log.With(zap.Int64("planID", plan.GetPlanID())) c.setSegmentsCompacting(plan, true) _, span := otel.Tracer(typeutil.DataCoordRole).Start(context.Background(), fmt.Sprintf("Compaction-%s", plan.GetType())) @@ -293,7 +288,6 @@ func (c *compactionPlanHandler) enqueuePlan(signal *compactionSignal, plan *data triggerInfo: signal, plan: plan, state: pipelining, - dataNodeID: nodeID, span: span, } c.mu.Lock() @@ -301,8 +295,7 @@ func (c *compactionPlanHandler) enqueuePlan(signal *compactionSignal, plan *data c.mu.Unlock() c.scheduler.Submit(task) - log.Info("Compaction plan submited") - return nil + log.Info("Compaction plan submitted") } func (c *compactionPlanHandler) RefreshPlan(task *compactionTask) error { @@ -337,10 +330,14 @@ func (c *compactionPlanHandler) RefreshPlan(task *compactionTask) error { sealedSegBinlogs := lo.Map(sealedSegments, func(info *SegmentInfo, _ int) *datapb.CompactionSegmentBinlogs { return &datapb.CompactionSegmentBinlogs{ - SegmentID: info.GetID(), - Level: info.GetLevel(), - CollectionID: info.GetCollectionID(), - PartitionID: info.GetPartitionID(), + SegmentID: info.GetID(), + FieldBinlogs: nil, + Field2StatslogPaths: info.GetStatslogs(), + Deltalogs: nil, + InsertChannel: info.GetInsertChannel(), + Level: info.GetLevel(), + CollectionID: info.GetCollectionID(), + PartitionID: info.GetPartitionID(), } }) @@ -407,8 +404,8 @@ func (c *compactionPlanHandler) notifyTasks(tasks []*compactionTask) { } // execCompactionPlan start to execute plan and return immediately -func (c *compactionPlanHandler) execCompactionPlan(signal *compactionSignal, plan *datapb.CompactionPlan) error { - return c.enqueuePlan(signal, plan) +func (c *compactionPlanHandler) execCompactionPlan(signal *compactionSignal, plan *datapb.CompactionPlan) { + c.enqueuePlan(signal, plan) } func (c *compactionPlanHandler) setSegmentsCompacting(plan *datapb.CompactionPlan, compacting bool) { @@ -483,25 +480,17 @@ func (c *compactionPlanHandler) handleMergeCompactionResult(plan *datapb.Compact log.Info("meta has already been changed, skip meta change and retry sync segments") } else { // Also prepare metric updates. - newSegments, metricMutation, err := c.meta.CompleteCompactionMutation(plan, result) + _, metricMutation, err := c.meta.CompleteCompactionMutation(plan, result) if err != nil { return err } // Apply metrics after successful meta update. metricMutation.commit() - newSegmentInfo = newSegments[0] } nodeID := c.plans[plan.GetPlanID()].dataNodeID req := &datapb.SyncSegmentsRequest{ - PlanID: plan.PlanID, - CompactedTo: newSegmentInfo.GetID(), - CompactedFrom: newSegmentInfo.GetCompactionFrom(), - NumOfRows: newSegmentInfo.GetNumOfRows(), - StatsLogs: newSegmentInfo.GetStatslogs(), - ChannelName: plan.GetChannel(), - PartitionId: newSegmentInfo.GetPartitionID(), - CollectionId: newSegmentInfo.GetCollectionID(), + PlanID: plan.PlanID, } log.Info("handleCompactionResult: syncing segments with node", zap.Int64("nodeID", nodeID)) @@ -633,8 +622,7 @@ func (c *compactionPlanHandler) updateCompaction(ts Timestamp) error { // without changing the meta log.Info("compaction syncing unknown plan with node") if err := c.sessions.SyncSegments(nodeID, &datapb.SyncSegmentsRequest{ - PlanID: planID, - ChannelName: plan.GetChannel(), + PlanID: planID, }); err != nil { log.Warn("compaction failed to sync segments with node", zap.Error(err)) return err diff --git a/internal/datacoord/compaction_scheduler.go b/internal/datacoord/compaction_scheduler.go index 5e592d5e3033f..745a9d40ff7f6 100644 --- a/internal/datacoord/compaction_scheduler.go +++ b/internal/datacoord/compaction_scheduler.go @@ -64,75 +64,64 @@ func (s *CompactionScheduler) Submit(tasks ...*compactionTask) { // Schedule pick 1 or 0 tasks for 1 node func (s *CompactionScheduler) Schedule() []*compactionTask { - s.taskGuard.Lock() - nodeTasks := lo.GroupBy(s.queuingTasks, func(t *compactionTask) int64 { - return t.dataNodeID - }) - s.taskGuard.Unlock() - if len(nodeTasks) == 0 { + s.taskGuard.RLock() + if len(s.queuingTasks) == 0 { + s.taskGuard.RUnlock() return nil // To mitigate the need for frequent slot querying } + s.taskGuard.RUnlock() nodeSlots := s.cluster.QuerySlots() - executable := make(map[int64]*compactionTask) + l0ChannelExcludes := typeutil.NewSet[string]() + mixChannelExcludes := typeutil.NewSet[string]() - pickPriorPolicy := func(tasks []*compactionTask, exclusiveChannels []string, executing []string) *compactionTask { - for _, task := range tasks { - // TODO: sheep, replace pickShardNode with pickAnyNode - if nodeID := s.pickShardNode(task.dataNodeID, nodeSlots); nodeID == NullNodeID { - log.Warn("cannot find datanode for compaction task", zap.Int64("planID", task.plan.PlanID), zap.String("vchannel", task.plan.Channel)) - continue + for _, tasks := range s.parallelTasks { + for _, t := range tasks { + switch t.plan.GetType() { + case datapb.CompactionType_Level0DeleteCompaction: + l0ChannelExcludes.Insert(t.plan.GetChannel()) + case datapb.CompactionType_MixCompaction: + mixChannelExcludes.Insert(t.plan.GetChannel()) } - - if lo.Contains(exclusiveChannels, task.plan.GetChannel()) { - continue - } - - if task.plan.GetType() == datapb.CompactionType_Level0DeleteCompaction { - // Channel of LevelZeroCompaction task with no executing compactions - if !lo.Contains(executing, task.plan.GetChannel()) { - return task - } - - // Don't schedule any tasks for channel with LevelZeroCompaction task - // when there're executing compactions - exclusiveChannels = append(exclusiveChannels, task.plan.GetChannel()) - continue - } - - return task } - - return nil } s.taskGuard.Lock() defer s.taskGuard.Unlock() - // pick 1 or 0 task for 1 node - for node, tasks := range nodeTasks { - parallel := s.parallelTasks[node] - - var ( - executing = typeutil.NewSet[string]() - channelsExecPrior = typeutil.NewSet[string]() - ) - for _, t := range parallel { - executing.Insert(t.plan.GetChannel()) - if t.plan.GetType() == datapb.CompactionType_Level0DeleteCompaction { - channelsExecPrior.Insert(t.plan.GetChannel()) - } - } - picked := pickPriorPolicy(tasks, channelsExecPrior.Collect(), executing.Collect()) - if picked != nil { - executable[node] = picked - nodeSlots[node]-- + picked := make([]*compactionTask, 0) + for _, t := range s.queuingTasks { + nodeID := s.pickAnyNode(nodeSlots) + if nodeID == NullNodeID { + log.Warn("cannot find datanode for compaction task", + zap.Int64("planID", t.plan.PlanID), zap.String("vchannel", t.plan.Channel)) + continue + } + switch t.plan.GetType() { + case datapb.CompactionType_Level0DeleteCompaction: + if l0ChannelExcludes.Contain(t.plan.GetChannel()) || + mixChannelExcludes.Contain(t.plan.GetChannel()) { + continue + } + t.dataNodeID = nodeID + picked = append(picked, t) + l0ChannelExcludes.Insert(t.plan.GetChannel()) + nodeSlots[nodeID]-- + case datapb.CompactionType_MixCompaction: + if l0ChannelExcludes.Contain(t.plan.GetChannel()) { + continue + } + t.dataNodeID = nodeID + picked = append(picked, t) + mixChannelExcludes.Insert(t.plan.GetChannel()) + nodeSlots[nodeID]-- } } var pickPlans []int64 - for node, task := range executable { + for _, task := range picked { + node := task.dataNodeID pickPlans = append(pickPlans, task.plan.PlanID) if _, ok := s.parallelTasks[node]; !ok { s.parallelTasks[node] = []*compactionTask{task} @@ -156,7 +145,7 @@ func (s *CompactionScheduler) Schedule() []*compactionTask { } } - return lo.Values(executable) + return picked } func (s *CompactionScheduler) Finish(nodeID UniqueID, plan *datapb.CompactionPlan) { diff --git a/internal/datacoord/compaction_scheduler_test.go b/internal/datacoord/compaction_scheduler_test.go index 37f64f740b2f7..a9e30ec996a17 100644 --- a/internal/datacoord/compaction_scheduler_test.go +++ b/internal/datacoord/compaction_scheduler_test.go @@ -60,11 +60,11 @@ func (s *SchedulerSuite) TestScheduleParallelTaskFull() { }{ {"with L0 tasks", []*compactionTask{ {dataNodeID: 100, plan: &datapb.CompactionPlan{PlanID: 10, Channel: "ch-10", Type: datapb.CompactionType_Level0DeleteCompaction}}, - {dataNodeID: 100, plan: &datapb.CompactionPlan{PlanID: 11, Channel: "ch-11", Type: datapb.CompactionType_MinorCompaction}}, + {dataNodeID: 100, plan: &datapb.CompactionPlan{PlanID: 11, Channel: "ch-11", Type: datapb.CompactionType_MixCompaction}}, }, []UniqueID{}}, {"without L0 tasks", []*compactionTask{ - {dataNodeID: 100, plan: &datapb.CompactionPlan{PlanID: 10, Channel: "ch-10", Type: datapb.CompactionType_MinorCompaction}}, - {dataNodeID: 100, plan: &datapb.CompactionPlan{PlanID: 11, Channel: "ch-11", Type: datapb.CompactionType_MinorCompaction}}, + {dataNodeID: 100, plan: &datapb.CompactionPlan{PlanID: 10, Channel: "ch-10", Type: datapb.CompactionType_MixCompaction}}, + {dataNodeID: 100, plan: &datapb.CompactionPlan{PlanID: 11, Channel: "ch-11", Type: datapb.CompactionType_MixCompaction}}, }, []UniqueID{}}, {"empty tasks", []*compactionTask{}, []UniqueID{}}, } @@ -101,16 +101,16 @@ func (s *SchedulerSuite) TestScheduleNodeWith1ParallelTask() { }{ {"with L0 tasks diff channel", []*compactionTask{ {dataNodeID: 101, plan: &datapb.CompactionPlan{PlanID: 10, Channel: "ch-10", Type: datapb.CompactionType_Level0DeleteCompaction}}, - {dataNodeID: 101, plan: &datapb.CompactionPlan{PlanID: 11, Channel: "ch-11", Type: datapb.CompactionType_MinorCompaction}}, - }, []UniqueID{10}}, + {dataNodeID: 101, plan: &datapb.CompactionPlan{PlanID: 11, Channel: "ch-11", Type: datapb.CompactionType_MixCompaction}}, + }, []UniqueID{10, 11}}, {"with L0 tasks same channel", []*compactionTask{ {dataNodeID: 101, plan: &datapb.CompactionPlan{PlanID: 10, Channel: "ch-2", Type: datapb.CompactionType_Level0DeleteCompaction}}, - {dataNodeID: 101, plan: &datapb.CompactionPlan{PlanID: 11, Channel: "ch-11", Type: datapb.CompactionType_MinorCompaction}}, + {dataNodeID: 101, plan: &datapb.CompactionPlan{PlanID: 11, Channel: "ch-11", Type: datapb.CompactionType_MixCompaction}}, }, []UniqueID{11}}, {"without L0 tasks", []*compactionTask{ - {dataNodeID: 101, plan: &datapb.CompactionPlan{PlanID: 14, Channel: "ch-2", Type: datapb.CompactionType_MinorCompaction}}, - {dataNodeID: 101, plan: &datapb.CompactionPlan{PlanID: 13, Channel: "ch-11", Type: datapb.CompactionType_MinorCompaction}}, - }, []UniqueID{14}}, + {dataNodeID: 101, plan: &datapb.CompactionPlan{PlanID: 14, Channel: "ch-2", Type: datapb.CompactionType_MixCompaction}}, + {dataNodeID: 101, plan: &datapb.CompactionPlan{PlanID: 13, Channel: "ch-11", Type: datapb.CompactionType_MixCompaction}}, + }, []UniqueID{14, 13}}, {"empty tasks", []*compactionTask{}, []UniqueID{}}, } @@ -134,15 +134,6 @@ func (s *SchedulerSuite) TestScheduleNodeWith1ParallelTask() { return t.plan.PlanID })) - // the second schedule returns empty for no slot - if len(test.tasks) > 0 { - cluster := NewMockCluster(s.T()) - cluster.EXPECT().QuerySlots().Return(map[int64]int64{101: 0}) - s.scheduler.cluster = cluster - } - gotTasks = s.scheduler.Schedule() - s.Empty(gotTasks) - s.Equal(4+len(test.tasks), s.scheduler.GetTaskCount()) }) } @@ -158,16 +149,16 @@ func (s *SchedulerSuite) TestScheduleNodeWithL0Executing() { }{ {"with L0 tasks diff channel", []*compactionTask{ {dataNodeID: 102, plan: &datapb.CompactionPlan{PlanID: 10, Channel: "ch-10", Type: datapb.CompactionType_Level0DeleteCompaction}}, - {dataNodeID: 102, plan: &datapb.CompactionPlan{PlanID: 11, Channel: "ch-11", Type: datapb.CompactionType_MinorCompaction}}, - }, []UniqueID{10}}, + {dataNodeID: 102, plan: &datapb.CompactionPlan{PlanID: 11, Channel: "ch-11", Type: datapb.CompactionType_MixCompaction}}, + }, []UniqueID{10, 11}}, {"with L0 tasks same channel", []*compactionTask{ {dataNodeID: 102, plan: &datapb.CompactionPlan{PlanID: 10, Channel: "ch-3", Type: datapb.CompactionType_Level0DeleteCompaction}}, - {dataNodeID: 102, plan: &datapb.CompactionPlan{PlanID: 11, Channel: "ch-11", Type: datapb.CompactionType_MinorCompaction}}, - {dataNodeID: 102, plan: &datapb.CompactionPlan{PlanID: 13, Channel: "ch-3", Type: datapb.CompactionType_MinorCompaction}}, + {dataNodeID: 102, plan: &datapb.CompactionPlan{PlanID: 11, Channel: "ch-11", Type: datapb.CompactionType_MixCompaction}}, + {dataNodeID: 102, plan: &datapb.CompactionPlan{PlanID: 13, Channel: "ch-3", Type: datapb.CompactionType_MixCompaction}}, }, []UniqueID{11}}, {"without L0 tasks", []*compactionTask{ - {dataNodeID: 102, plan: &datapb.CompactionPlan{PlanID: 14, Channel: "ch-3", Type: datapb.CompactionType_MinorCompaction}}, - {dataNodeID: 102, plan: &datapb.CompactionPlan{PlanID: 13, Channel: "ch-11", Type: datapb.CompactionType_MinorCompaction}}, + {dataNodeID: 102, plan: &datapb.CompactionPlan{PlanID: 14, Channel: "ch-3", Type: datapb.CompactionType_MixCompaction}}, + {dataNodeID: 102, plan: &datapb.CompactionPlan{PlanID: 13, Channel: "ch-11", Type: datapb.CompactionType_MixCompaction}}, }, []UniqueID{13}}, {"empty tasks", []*compactionTask{}, []UniqueID{}}, } @@ -192,17 +183,6 @@ func (s *SchedulerSuite) TestScheduleNodeWithL0Executing() { return t.plan.PlanID })) - // the second schedule returns empty for no slot - if len(test.tasks) > 0 { - cluster := NewMockCluster(s.T()) - cluster.EXPECT().QuerySlots().Return(map[int64]int64{101: 0}) - s.scheduler.cluster = cluster - } - if len(gotTasks) > 0 { - gotTasks = s.scheduler.Schedule() - s.Empty(gotTasks) - } - s.Equal(4+len(test.tasks), s.scheduler.GetTaskCount()) }) } diff --git a/internal/datacoord/compaction_test.go b/internal/datacoord/compaction_test.go index 0936e7f8adf24..879dfdbbbb9a9 100644 --- a/internal/datacoord/compaction_test.go +++ b/internal/datacoord/compaction_test.go @@ -431,43 +431,22 @@ func (s *CompactionPlanHandlerSuite) TestRefreshPlanMixCompaction() { } func (s *CompactionPlanHandlerSuite) TestExecCompactionPlan() { - s.mockCm.EXPECT().FindWatcher(mock.Anything).RunAndReturn(func(channel string) (int64, error) { - if channel == "ch-1" { - return 0, errors.Errorf("mock error for ch-1") - } - - return 1, nil - }).Twice() s.mockSch.EXPECT().Submit(mock.Anything).Return().Once() - tests := []struct { - description string - channel string - hasError bool - }{ - {"channel with error", "ch-1", true}, - {"channel with no error", "ch-2", false}, - } - handler := newCompactionPlanHandler(nil, s.mockSessMgr, s.mockCm, s.mockMeta, s.mockAlloc) handler.scheduler = s.mockSch - for idx, test := range tests { - sig := &compactionSignal{id: int64(idx)} - plan := &datapb.CompactionPlan{ - PlanID: int64(idx), - } - s.Run(test.description, func() { - plan.Channel = test.channel - - err := handler.execCompactionPlan(sig, plan) - if test.hasError { - s.Error(err) - } else { - s.NoError(err) - } - }) + sig := &compactionSignal{id: int64(1)} + plan := &datapb.CompactionPlan{ + PlanID: int64(1), } + plan.Channel = "ch-1" + + handler.execCompactionPlan(sig, plan) + handler.mu.RLock() + defer handler.mu.RUnlock() + _, ok := handler.plans[int64(1)] + s.True(ok) } func (s *CompactionPlanHandlerSuite) TestHandleMergeCompactionResult() { diff --git a/internal/datacoord/compaction_trigger.go b/internal/datacoord/compaction_trigger.go index b6ff595887fa5..f890f6f850bd3 100644 --- a/internal/datacoord/compaction_trigger.go +++ b/internal/datacoord/compaction_trigger.go @@ -430,23 +430,14 @@ func (t *compactionTrigger) handleGlobalSignal(signal *compactionSignal) error { break } start := time.Now() - if err := fillOriginPlan(t.allocator, plan); err != nil { + if err := fillOriginPlan(coll.Schema, t.allocator, plan); err != nil { log.Warn("failed to fill plan", zap.Int64("collectionID", signal.collectionID), zap.Int64s("segmentIDs", segIDs), zap.Error(err)) continue } - err := t.compactionHandler.execCompactionPlan(signal, plan) - if err != nil { - log.Warn("failed to execute compaction plan", - zap.Int64("collectionID", signal.collectionID), - zap.Int64("planID", plan.PlanID), - zap.Int64s("segmentIDs", segIDs), - zap.Error(err)) - continue - } - + t.compactionHandler.execCompactionPlan(signal, plan) log.Info("time cost of generating global compaction", zap.Int64("planID", plan.PlanID), zap.Int64("time cost", time.Since(start).Milliseconds()), @@ -530,18 +521,11 @@ func (t *compactionTrigger) handleSignal(signal *compactionSignal) { break } start := time.Now() - if err := fillOriginPlan(t.allocator, plan); err != nil { + if err := fillOriginPlan(coll.Schema, t.allocator, plan); err != nil { log.Warn("failed to fill plan", zap.Error(err)) continue } - if err := t.compactionHandler.execCompactionPlan(signal, plan); err != nil { - log.Warn("failed to execute compaction plan", - zap.Int64("collection", signal.collectionID), - zap.Int64("planID", plan.PlanID), - zap.Int64s("segmentIDs", fetchSegIDs(plan.GetSegmentBinlogs())), - zap.Error(err)) - continue - } + t.compactionHandler.execCompactionPlan(signal, plan) log.Info("time cost of generating compaction", zap.Int64("planID", plan.PlanID), zap.Int64("time cost", time.Since(start).Milliseconds()), @@ -713,6 +697,7 @@ func segmentsToPlan(segments []*SegmentInfo, compactTime *compactTime) *datapb.C } log.Info("generate a plan for priority candidates", zap.Any("plan", plan), + zap.Int("len(segments)", len(plan.GetSegmentBinlogs())), zap.Int64("target segment row", plan.TotalRows), zap.Int64("target segment size", size)) return plan } diff --git a/internal/datacoord/compaction_trigger_test.go b/internal/datacoord/compaction_trigger_test.go index 19d4146a65e14..56710ed80efee 100644 --- a/internal/datacoord/compaction_trigger_test.go +++ b/internal/datacoord/compaction_trigger_test.go @@ -51,9 +51,8 @@ var _ compactionPlanContext = (*spyCompactionHandler)(nil) func (h *spyCompactionHandler) removeTasksByChannel(channel string) {} // execCompactionPlan start to execute plan and return immediately -func (h *spyCompactionHandler) execCompactionPlan(signal *compactionSignal, plan *datapb.CompactionPlan) error { +func (h *spyCompactionHandler) execCompactionPlan(signal *compactionSignal, plan *datapb.CompactionPlan) { h.spyChan <- plan - return nil } // completeCompaction record the result of a compaction @@ -106,6 +105,22 @@ func Test_compactionTrigger_force(t *testing.T) { vecFieldID := int64(201) indexID := int64(1001) + + schema := &schemapb.CollectionSchema{ + Fields: []*schemapb.FieldSchema{ + { + FieldID: vecFieldID, + DataType: schemapb.DataType_FloatVector, + TypeParams: []*commonpb.KeyValuePair{ + { + Key: common.DimKey, + Value: "128", + }, + }, + }, + }, + } + tests := []struct { name string fields fields @@ -292,21 +307,8 @@ func Test_compactionTrigger_force(t *testing.T) { }, collections: map[int64]*collectionInfo{ 2: { - ID: 2, - Schema: &schemapb.CollectionSchema{ - Fields: []*schemapb.FieldSchema{ - { - FieldID: vecFieldID, - DataType: schemapb.DataType_FloatVector, - TypeParams: []*commonpb.KeyValuePair{ - { - Key: common.DimKey, - Value: "128", - }, - }, - }, - }, - }, + ID: 2, + Schema: schema, Properties: map[string]string{ common.CollectionTTLConfigKey: "0", }, @@ -469,6 +471,7 @@ func Test_compactionTrigger_force(t *testing.T) { Type: datapb.CompactionType_MixCompaction, Channel: "ch1", TotalRows: 200, + Schema: schema, }, }, }, @@ -2386,7 +2389,7 @@ func (s *CompactionTriggerSuite) TestHandleSignal() { }, }, }, nil) - s.compactionHandler.EXPECT().execCompactionPlan(mock.Anything, mock.Anything).Return(nil) + s.compactionHandler.EXPECT().execCompactionPlan(mock.Anything, mock.Anything).Return() tr.handleSignal(&compactionSignal{ segmentID: 1, collectionID: s.collectionID, @@ -2517,7 +2520,7 @@ func (s *CompactionTriggerSuite) TestHandleGlobalSignal() { common.CollectionAutoCompactionKey: "false", }, }, nil) - s.compactionHandler.EXPECT().execCompactionPlan(mock.Anything, mock.Anything).Return(nil) + s.compactionHandler.EXPECT().execCompactionPlan(mock.Anything, mock.Anything).Return() tr.handleGlobalSignal(&compactionSignal{ segmentID: 1, collectionID: s.collectionID, diff --git a/internal/datacoord/compaction_trigger_v2.go b/internal/datacoord/compaction_trigger_v2.go index 1ba9c1d9ef4aa..e1678e9f97e66 100644 --- a/internal/datacoord/compaction_trigger_v2.go +++ b/internal/datacoord/compaction_trigger_v2.go @@ -2,10 +2,12 @@ package datacoord import ( "context" + "time" "github.com/samber/lo" "go.uber.org/zap" + "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" "github.com/milvus-io/milvus/internal/proto/datapb" "github.com/milvus-io/milvus/pkg/log" ) @@ -33,16 +35,18 @@ type TriggerManager interface { // 2. SystemIDLE & schedulerIDLE // 3. Manual Compaction type CompactionTriggerManager struct { - scheduler Scheduler - handler compactionPlanContext // TODO replace with scheduler + scheduler Scheduler + handler Handler + compactionHandler compactionPlanContext // TODO replace with scheduler allocator allocator } -func NewCompactionTriggerManager(alloc allocator, handler compactionPlanContext) *CompactionTriggerManager { +func NewCompactionTriggerManager(alloc allocator, handler Handler, compactionHandler compactionPlanContext) *CompactionTriggerManager { m := &CompactionTriggerManager{ - allocator: alloc, - handler: handler, + allocator: alloc, + handler: handler, + compactionHandler: compactionHandler, } return m @@ -51,7 +55,7 @@ func NewCompactionTriggerManager(alloc allocator, handler compactionPlanContext) func (m *CompactionTriggerManager) Notify(taskID UniqueID, eventType CompactionTriggerType, views []CompactionView) { log := log.With(zap.Int64("taskID", taskID)) for _, view := range views { - if m.handler.isFull() { + if m.compactionHandler.isFull() { log.RatedInfo(1.0, "Skip trigger compaction for scheduler is full") return } @@ -103,7 +107,7 @@ func (m *CompactionTriggerManager) SubmitL0ViewToScheduler(taskID int64, outView // TODO, remove handler, use scheduler // m.scheduler.Submit(plan) - m.handler.execCompactionPlan(signal, plan) + m.compactionHandler.execCompactionPlan(signal, plan) log.Info("Finish to submit a LevelZeroCompaction plan", zap.Int64("taskID", taskID), zap.Int64("planID", plan.GetPlanID()), @@ -130,7 +134,14 @@ func (m *CompactionTriggerManager) buildL0CompactionPlan(view CompactionView) *d Channel: view.GetGroupLabel().Channel, } - if err := fillOriginPlan(m.allocator, plan); err != nil { + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + collection, err := m.handler.GetCollection(ctx, view.GetGroupLabel().CollectionID) + if err != nil { + return nil + } + + if err := fillOriginPlan(collection.Schema, m.allocator, plan); err != nil { return nil } @@ -145,14 +156,16 @@ type chanPartSegments struct { segments []*SegmentInfo } -func fillOriginPlan(alloc allocator, plan *datapb.CompactionPlan) error { - // TODO context - id, err := alloc.allocID(context.TODO()) +func fillOriginPlan(schema *schemapb.CollectionSchema, alloc allocator, plan *datapb.CompactionPlan) error { + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + id, err := alloc.allocID(ctx) if err != nil { return err } plan.PlanID = id plan.TimeoutInSeconds = Params.DataCoordCfg.CompactionTimeoutInSeconds.GetAsInt32() + plan.Schema = schema return nil } diff --git a/internal/datacoord/compaction_trigger_v2_test.go b/internal/datacoord/compaction_trigger_v2_test.go index 3176e04a0bd08..3b66ac944c822 100644 --- a/internal/datacoord/compaction_trigger_v2_test.go +++ b/internal/datacoord/compaction_trigger_v2_test.go @@ -20,6 +20,7 @@ type CompactionTriggerManagerSuite struct { suite.Suite mockAlloc *NMockAllocator + handler Handler mockPlanContext *MockCompactionPlanContext testLabel *CompactionGroupLabel meta *meta @@ -29,6 +30,7 @@ type CompactionTriggerManagerSuite struct { func (s *CompactionTriggerManagerSuite) SetupTest() { s.mockAlloc = NewNMockAllocator(s.T()) + s.handler = NewNMockHandler(s.T()) s.mockPlanContext = NewMockCompactionPlanContext(s.T()) s.testLabel = &CompactionGroupLabel{ @@ -42,7 +44,7 @@ func (s *CompactionTriggerManagerSuite) SetupTest() { s.meta.segments.SetSegment(id, segment) } - s.m = NewCompactionTriggerManager(s.mockAlloc, s.mockPlanContext) + s.m = NewCompactionTriggerManager(s.mockAlloc, s.handler, s.mockPlanContext) } func (s *CompactionTriggerManagerSuite) TestNotifyToFullScheduler() { @@ -73,6 +75,10 @@ func (s *CompactionTriggerManagerSuite) TestNotifyToFullScheduler() { } func (s *CompactionTriggerManagerSuite) TestNotifyByViewIDLE() { + handler := NewNMockHandler(s.T()) + handler.EXPECT().GetCollection(mock.Anything, mock.Anything).Return(&collectionInfo{}, nil) + s.m.handler = handler + viewManager := NewCompactionViewManager(s.meta, s.m, s.m.allocator) collSegs := s.meta.GetCompactableSegmentGroupByCollection() @@ -120,12 +126,16 @@ func (s *CompactionTriggerManagerSuite) TestNotifyByViewIDLE() { s.ElementsMatch(expectedSegs, gotSegs) log.Info("generated plan", zap.Any("plan", plan)) - }).Return(nil).Once() + }).Return().Once() s.m.Notify(19530, TriggerTypeLevelZeroViewIDLE, levelZeroView) } func (s *CompactionTriggerManagerSuite) TestNotifyByViewChange() { + handler := NewNMockHandler(s.T()) + handler.EXPECT().GetCollection(mock.Anything, mock.Anything).Return(&collectionInfo{}, nil) + s.m.handler = handler + viewManager := NewCompactionViewManager(s.meta, s.m, s.m.allocator) collSegs := s.meta.GetCompactableSegmentGroupByCollection() @@ -168,7 +178,7 @@ func (s *CompactionTriggerManagerSuite) TestNotifyByViewChange() { s.ElementsMatch(expectedSegs, gotSegs) log.Info("generated plan", zap.Any("plan", plan)) - }).Return(nil).Once() + }).Return().Once() s.m.Notify(19530, TriggerTypeLevelZeroViewChange, levelZeroView) } diff --git a/internal/datacoord/mock_compaction_plan_context.go b/internal/datacoord/mock_compaction_plan_context.go index b22041fb7f169..3b399474afe23 100644 --- a/internal/datacoord/mock_compaction_plan_context.go +++ b/internal/datacoord/mock_compaction_plan_context.go @@ -1,4 +1,4 @@ -// Code generated by mockery v2.32.4. DO NOT EDIT. +// Code generated by mockery v2.30.1. DO NOT EDIT. package datacoord @@ -21,17 +21,8 @@ func (_m *MockCompactionPlanContext) EXPECT() *MockCompactionPlanContext_Expecte } // execCompactionPlan provides a mock function with given fields: signal, plan -func (_m *MockCompactionPlanContext) execCompactionPlan(signal *compactionSignal, plan *datapb.CompactionPlan) error { - ret := _m.Called(signal, plan) - - var r0 error - if rf, ok := ret.Get(0).(func(*compactionSignal, *datapb.CompactionPlan) error); ok { - r0 = rf(signal, plan) - } else { - r0 = ret.Error(0) - } - - return r0 +func (_m *MockCompactionPlanContext) execCompactionPlan(signal *compactionSignal, plan *datapb.CompactionPlan) { + _m.Called(signal, plan) } // MockCompactionPlanContext_execCompactionPlan_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'execCompactionPlan' @@ -40,8 +31,8 @@ type MockCompactionPlanContext_execCompactionPlan_Call struct { } // execCompactionPlan is a helper method to define mock.On call -// - signal *compactionSignal -// - plan *datapb.CompactionPlan +// - signal *compactionSignal +// - plan *datapb.CompactionPlan func (_e *MockCompactionPlanContext_Expecter) execCompactionPlan(signal interface{}, plan interface{}) *MockCompactionPlanContext_execCompactionPlan_Call { return &MockCompactionPlanContext_execCompactionPlan_Call{Call: _e.mock.On("execCompactionPlan", signal, plan)} } @@ -53,12 +44,12 @@ func (_c *MockCompactionPlanContext_execCompactionPlan_Call) Run(run func(signal return _c } -func (_c *MockCompactionPlanContext_execCompactionPlan_Call) Return(_a0 error) *MockCompactionPlanContext_execCompactionPlan_Call { - _c.Call.Return(_a0) +func (_c *MockCompactionPlanContext_execCompactionPlan_Call) Return() *MockCompactionPlanContext_execCompactionPlan_Call { + _c.Call.Return() return _c } -func (_c *MockCompactionPlanContext_execCompactionPlan_Call) RunAndReturn(run func(*compactionSignal, *datapb.CompactionPlan) error) *MockCompactionPlanContext_execCompactionPlan_Call { +func (_c *MockCompactionPlanContext_execCompactionPlan_Call) RunAndReturn(run func(*compactionSignal, *datapb.CompactionPlan)) *MockCompactionPlanContext_execCompactionPlan_Call { _c.Call.Return(run) return _c } @@ -85,7 +76,7 @@ type MockCompactionPlanContext_getCompaction_Call struct { } // getCompaction is a helper method to define mock.On call -// - planID int64 +// - planID int64 func (_e *MockCompactionPlanContext_Expecter) getCompaction(planID interface{}) *MockCompactionPlanContext_getCompaction_Call { return &MockCompactionPlanContext_getCompaction_Call{Call: _e.mock.On("getCompaction", planID)} } @@ -129,7 +120,7 @@ type MockCompactionPlanContext_getCompactionTasksBySignalID_Call struct { } // getCompactionTasksBySignalID is a helper method to define mock.On call -// - signalID int64 +// - signalID int64 func (_e *MockCompactionPlanContext_Expecter) getCompactionTasksBySignalID(signalID interface{}) *MockCompactionPlanContext_getCompactionTasksBySignalID_Call { return &MockCompactionPlanContext_getCompactionTasksBySignalID_Call{Call: _e.mock.On("getCompactionTasksBySignalID", signalID)} } @@ -203,7 +194,7 @@ type MockCompactionPlanContext_removeTasksByChannel_Call struct { } // removeTasksByChannel is a helper method to define mock.On call -// - channel string +// - channel string func (_e *MockCompactionPlanContext_Expecter) removeTasksByChannel(channel interface{}) *MockCompactionPlanContext_removeTasksByChannel_Call { return &MockCompactionPlanContext_removeTasksByChannel_Call{Call: _e.mock.On("removeTasksByChannel", channel)} } @@ -309,7 +300,7 @@ type MockCompactionPlanContext_updateCompaction_Call struct { } // updateCompaction is a helper method to define mock.On call -// - ts uint64 +// - ts uint64 func (_e *MockCompactionPlanContext_Expecter) updateCompaction(ts interface{}) *MockCompactionPlanContext_updateCompaction_Call { return &MockCompactionPlanContext_updateCompaction_Call{Call: _e.mock.On("updateCompaction", ts)} } diff --git a/internal/datacoord/server.go b/internal/datacoord/server.go index 50ccc8d37ca58..85c6535637d41 100644 --- a/internal/datacoord/server.go +++ b/internal/datacoord/server.go @@ -524,7 +524,7 @@ func (s *Server) SetIndexNodeCreator(f func(context.Context, string, int64) (typ func (s *Server) createCompactionHandler() { s.compactionHandler = newCompactionPlanHandler(s.cluster, s.sessionManager, s.channelManager, s.meta, s.allocator) - triggerv2 := NewCompactionTriggerManager(s.allocator, s.compactionHandler) + triggerv2 := NewCompactionTriggerManager(s.allocator, s.handler, s.compactionHandler) s.compactionViewManager = NewCompactionViewManager(s.meta, triggerv2, s.allocator) } diff --git a/internal/datanode/compaction/compactor.go b/internal/datanode/compaction/compactor.go index da57562d93e28..825723a98fd52 100644 --- a/internal/datanode/compaction/compactor.go +++ b/internal/datanode/compaction/compactor.go @@ -21,10 +21,10 @@ import ( "github.com/milvus-io/milvus/pkg/util/typeutil" ) +//go:generate mockery --name=Compactor --structname=MockCompactor --output=./ --filename=mock_compactor.go --with-expecter --inpackage type Compactor interface { Complete() Compact() (*datapb.CompactionPlanResult, error) - InjectDone() Stop() GetPlanID() typeutil.UniqueID GetCollection() typeutil.UniqueID diff --git a/internal/datanode/compaction/mix_compactor.go b/internal/datanode/compaction/mix_compactor.go index da18de0f82fa8..928fff81ed248 100644 --- a/internal/datanode/compaction/mix_compactor.go +++ b/internal/datanode/compaction/mix_compactor.go @@ -21,7 +21,6 @@ import ( "fmt" sio "io" "strconv" - "sync" "time" "github.com/cockroachdb/errors" @@ -33,15 +32,12 @@ import ( "github.com/milvus-io/milvus/internal/datanode/allocator" "github.com/milvus-io/milvus/internal/datanode/io" iter "github.com/milvus-io/milvus/internal/datanode/iterators" - "github.com/milvus-io/milvus/internal/datanode/metacache" - "github.com/milvus-io/milvus/internal/datanode/syncmgr" "github.com/milvus-io/milvus/internal/metastore/kv/binlog" "github.com/milvus-io/milvus/internal/proto/datapb" "github.com/milvus-io/milvus/internal/storage" "github.com/milvus-io/milvus/pkg/log" "github.com/milvus-io/milvus/pkg/metrics" "github.com/milvus-io/milvus/pkg/util/funcutil" - "github.com/milvus-io/milvus/pkg/util/merr" "github.com/milvus-io/milvus/pkg/util/paramtable" "github.com/milvus-io/milvus/pkg/util/timerecord" "github.com/milvus-io/milvus/pkg/util/tsoutil" @@ -51,9 +47,6 @@ import ( // for MixCompaction only type mixCompactionTask struct { binlogIO io.BinlogIO - Compactor - metaCache metacache.MetaCache - syncMgr syncmgr.SyncManager allocator.Allocator currentTs typeutil.Timestamp @@ -62,9 +55,8 @@ type mixCompactionTask struct { ctx context.Context cancel context.CancelFunc - injectDoneOnce sync.Once - done chan struct{} - tr *timerecord.TimeRecorder + done chan struct{} + tr *timerecord.TimeRecorder } // make sure compactionTask implements compactor interface @@ -73,8 +65,6 @@ var _ Compactor = (*mixCompactionTask)(nil) func NewMixCompactionTask( ctx context.Context, binlogIO io.BinlogIO, - metaCache metacache.MetaCache, - syncMgr syncmgr.SyncManager, alloc allocator.Allocator, plan *datapb.CompactionPlan, ) *mixCompactionTask { @@ -83,8 +73,6 @@ func NewMixCompactionTask( ctx: ctx1, cancel: cancel, binlogIO: binlogIO, - syncMgr: syncMgr, - metaCache: metaCache, Allocator: alloc, plan: plan, tr: timerecord.NewTimeRecorder("mix compaction"), @@ -100,7 +88,6 @@ func (t *mixCompactionTask) Complete() { func (t *mixCompactionTask) Stop() { t.cancel() <-t.done - t.InjectDone() } func (t *mixCompactionTask) GetPlanID() typeutil.UniqueID { @@ -112,18 +99,16 @@ func (t *mixCompactionTask) GetChannelName() string { } // return num rows of all segment compaction from -func (t *mixCompactionTask) getNumRows() (int64, error) { +func (t *mixCompactionTask) getNumRows() int64 { numRows := int64(0) for _, binlog := range t.plan.SegmentBinlogs { - seg, ok := t.metaCache.GetSegmentByID(binlog.GetSegmentID()) - if !ok { - return 0, merr.WrapErrSegmentNotFound(binlog.GetSegmentID(), "get compaction segments num rows failed") + if len(binlog.GetFieldBinlogs()) > 0 { + for _, ct := range binlog.GetFieldBinlogs()[0].GetBinlogs() { + numRows += ct.GetEntriesNum() + } } - - numRows += seg.NumOfRows() } - - return numRows, nil + return numRows } func (t *mixCompactionTask) mergeDeltalogs(ctx context.Context, dpaths map[typeutil.UniqueID][]string) (map[interface{}]typeutil.Timestamp, error) { @@ -417,7 +402,19 @@ func (t *mixCompactionTask) Compact() (*datapb.CompactionPlanResult, error) { ctx, span := otel.Tracer(typeutil.DataNodeRole).Start(t.ctx, fmt.Sprintf("MixCompact-%d", t.GetPlanID())) defer span.End() - log := log.Ctx(ctx).With(zap.Int64("planID", t.plan.GetPlanID()), zap.Int32("timeout in seconds", t.plan.GetTimeoutInSeconds())) + if len(t.plan.GetSegmentBinlogs()) < 1 { + log.Warn("compact wrong, there's no segments in segment binlogs", zap.Int64("planID", t.plan.GetPlanID())) + return nil, errors.New("compaction plan is illegal") + } + + collectionID := t.plan.GetSegmentBinlogs()[0].GetCollectionID() + partitionID := t.plan.GetSegmentBinlogs()[0].GetPartitionID() + + log := log.Ctx(ctx).With(zap.Int64("planID", t.plan.GetPlanID()), + zap.Int64("collectionID", collectionID), + zap.Int64("partitionID", partitionID), + zap.Int32("timeout in seconds", t.plan.GetTimeoutInSeconds())) + if ok := funcutil.CheckCtxValid(ctx); !ok { log.Warn("compact wrong, task context done or timeout") return nil, ctx.Err() @@ -427,10 +424,6 @@ func (t *mixCompactionTask) Compact() (*datapb.CompactionPlanResult, error) { defer cancelAll() log.Info("compact start") - if len(t.plan.GetSegmentBinlogs()) < 1 { - log.Warn("compact wrong, there's no segments in segment binlogs") - return nil, errors.New("compaction plan is illegal") - } targetSegID, err := t.AllocOne() if err != nil { @@ -438,15 +431,9 @@ func (t *mixCompactionTask) Compact() (*datapb.CompactionPlanResult, error) { return nil, err } - previousRowCount, err := t.getNumRows() - if err != nil { - log.Warn("compact wrong, unable to get previous numRows", zap.Error(err)) - return nil, err - } - - partID := t.plan.GetSegmentBinlogs()[0].GetPartitionID() + previousRowCount := t.getNumRows() - writer, err := NewSegmentWriter(t.metaCache.Schema(), previousRowCount, targetSegID, partID, t.metaCache.Collection()) + writer, err := NewSegmentWriter(t.plan.GetSchema(), previousRowCount, targetSegID, partitionID, collectionID) if err != nil { log.Warn("compact wrong, unable to init segment writer", zap.Error(err)) return nil, err @@ -455,12 +442,6 @@ func (t *mixCompactionTask) Compact() (*datapb.CompactionPlanResult, error) { segIDs := lo.Map(t.plan.GetSegmentBinlogs(), func(binlogs *datapb.CompactionSegmentBinlogs, _ int) int64 { return binlogs.GetSegmentID() }) - // Inject to stop flush - // when compaction failed, these segments need to be Unblocked by injectDone in compaction_executor - // when compaction succeeded, these segments will be Unblocked by SyncSegments from DataCoord. - for _, segID := range segIDs { - t.syncMgr.Block(segID) - } if err := binlog.DecompressCompactionBinlogs(t.plan.GetSegmentBinlogs()); err != nil { log.Warn("compact wrong, fail to decompress compaction binlogs", zap.Error(err)) @@ -541,16 +522,9 @@ func (t *mixCompactionTask) Compact() (*datapb.CompactionPlanResult, error) { return planResult, nil } -func (t *mixCompactionTask) InjectDone() { - t.injectDoneOnce.Do(func() { - for _, binlog := range t.plan.SegmentBinlogs { - t.syncMgr.Unblock(binlog.SegmentID) - } - }) -} - func (t *mixCompactionTask) GetCollection() typeutil.UniqueID { - return t.metaCache.Collection() + // The length of SegmentBinlogs is checked before task enqueueing. + return t.plan.GetSegmentBinlogs()[0].GetCollectionID() } func (t *mixCompactionTask) isExpiredEntity(ts typeutil.Timestamp) bool { diff --git a/internal/datanode/compaction/mix_compactor_test.go b/internal/datanode/compaction/mix_compactor_test.go index 6ca701bedd91e..cea2c0b6fe16a 100644 --- a/internal/datanode/compaction/mix_compactor_test.go +++ b/internal/datanode/compaction/mix_compactor_test.go @@ -32,12 +32,10 @@ import ( "github.com/milvus-io/milvus/internal/datanode/allocator" "github.com/milvus-io/milvus/internal/datanode/io" "github.com/milvus-io/milvus/internal/datanode/metacache" - "github.com/milvus-io/milvus/internal/datanode/syncmgr" "github.com/milvus-io/milvus/internal/proto/datapb" "github.com/milvus-io/milvus/internal/proto/etcdpb" "github.com/milvus-io/milvus/internal/storage" "github.com/milvus-io/milvus/pkg/common" - "github.com/milvus-io/milvus/pkg/util/merr" "github.com/milvus-io/milvus/pkg/util/paramtable" "github.com/milvus-io/milvus/pkg/util/tsoutil" "github.com/milvus-io/milvus/pkg/util/typeutil" @@ -54,8 +52,6 @@ type MixCompactionTaskSuite struct { mockBinlogIO *io.MockBinlogIO mockAlloc *allocator.MockAllocator - mockMeta *metacache.MockMetaCache - mockSyncMgr *syncmgr.MockSyncManager meta *etcdpb.CollectionMeta segWriter *SegmentWriter @@ -71,10 +67,8 @@ func (s *MixCompactionTaskSuite) SetupSuite() { func (s *MixCompactionTaskSuite) SetupTest() { s.mockBinlogIO = io.NewMockBinlogIO(s.T()) s.mockAlloc = allocator.NewMockAllocator(s.T()) - s.mockMeta = metacache.NewMockMetaCache(s.T()) - s.mockSyncMgr = syncmgr.NewMockSyncManager(s.T()) - s.task = NewMixCompactionTask(context.Background(), s.mockBinlogIO, s.mockMeta, s.mockSyncMgr, s.mockAlloc, nil) + s.task = NewMixCompactionTask(context.Background(), s.mockBinlogIO, s.mockAlloc, nil) s.meta = genTestCollectionMeta() @@ -90,6 +84,7 @@ func (s *MixCompactionTaskSuite) SetupTest() { }}, TimeoutInSeconds: 10, Type: datapb.CompactionType_MixCompaction, + Schema: s.meta.GetSchema(), } s.task.plan = s.plan } @@ -106,26 +101,10 @@ func getMilvusBirthday() time.Time { return time.Date(2019, time.Month(5), 30, 0, 0, 0, 0, time.UTC) } -func (s *MixCompactionTaskSuite) TestInjectDone() { - segmentIDs := []int64{100, 200, 300} - s.task.plan.SegmentBinlogs = lo.Map(segmentIDs, func(id int64, _ int) *datapb.CompactionSegmentBinlogs { - return &datapb.CompactionSegmentBinlogs{SegmentID: id} - }) - - for _, segmentID := range segmentIDs { - s.mockSyncMgr.EXPECT().Unblock(segmentID).Return().Once() - } - - s.task.InjectDone() - s.task.InjectDone() -} - func (s *MixCompactionTaskSuite) TestCompactDupPK() { // Test merge compactions, two segments with the same pk, one deletion pk=1 // The merged segment 19530 should remain 3 pk without pk=100 s.mockAlloc.EXPECT().AllocOne().Return(int64(19530), nil).Twice() - s.mockMeta.EXPECT().Schema().Return(s.meta.GetSchema()).Once() - s.mockMeta.EXPECT().Collection().Return(CollectionID).Once() segments := []int64{7, 8, 9} dblobs, err := getInt64DeltaBlobs( 1, @@ -153,12 +132,12 @@ func (s *MixCompactionTaskSuite) TestCompactDupPK() { s.segWriter.writer.Flush() s.Require().NoError(err) - statistic := &storage.PkStatistics{ - PkFilter: s.segWriter.pkstats.BF, - MinPK: s.segWriter.pkstats.MinPk, - MaxPK: s.segWriter.pkstats.MaxPk, - } - bfs := metacache.NewBloomFilterSet(statistic) + //statistic := &storage.PkStatistics{ + // PkFilter: s.segWriter.pkstats.BF, + // MinPK: s.segWriter.pkstats.MinPk, + // MaxPK: s.segWriter.pkstats.MaxPk, + //} + //bfs := metacache.NewBloomFilterSet(statistic) kvs, fBinlogs, err := s.task.serializeWrite(context.TODO(), s.segWriter) s.Require().NoError(err) @@ -167,17 +146,12 @@ func (s *MixCompactionTaskSuite) TestCompactDupPK() { return len(left) == 0 && len(right) == 0 })).Return(lo.Values(kvs), nil).Once() - seg := metacache.NewSegmentInfo(&datapb.SegmentInfo{ - CollectionID: CollectionID, - PartitionID: PartitionID, - ID: segID, - NumOfRows: 1, - }, bfs) - - s.mockMeta.EXPECT().GetSegmentByID(segID).RunAndReturn(func(id int64, filters ...metacache.SegmentFilter) (*metacache.SegmentInfo, bool) { - return seg, true - }) - s.mockSyncMgr.EXPECT().Block(segID).Return().Once() + //seg := metacache.NewSegmentInfo(&datapb.SegmentInfo{ + // CollectionID: CollectionID, + // PartitionID: PartitionID, + // ID: segID, + // NumOfRows: 1, + //}, bfs) s.plan.SegmentBinlogs = append(s.plan.SegmentBinlogs, &datapb.CompactionSegmentBinlogs{ SegmentID: segID, @@ -204,8 +178,6 @@ func (s *MixCompactionTaskSuite) TestCompactDupPK() { func (s *MixCompactionTaskSuite) TestCompactTwoToOne() { s.mockAlloc.EXPECT().AllocOne().Return(int64(19530), nil).Twice() - s.mockMeta.EXPECT().Schema().Return(s.meta.GetSchema()).Once() - s.mockMeta.EXPECT().Collection().Return(CollectionID).Once() segments := []int64{5, 6, 7} s.mockAlloc.EXPECT().Alloc(mock.Anything).Return(7777777, 8888888, nil) @@ -213,12 +185,12 @@ func (s *MixCompactionTaskSuite) TestCompactTwoToOne() { s.task.plan.SegmentBinlogs = make([]*datapb.CompactionSegmentBinlogs, 0) for _, segID := range segments { s.initSegBuffer(segID) - statistic := &storage.PkStatistics{ - PkFilter: s.segWriter.pkstats.BF, - MinPK: s.segWriter.pkstats.MinPk, - MaxPK: s.segWriter.pkstats.MaxPk, - } - bfs := metacache.NewBloomFilterSet(statistic) + //statistic := &storage.PkStatistics{ + // PkFilter: s.segWriter.pkstats.BF, + // MinPK: s.segWriter.pkstats.MinPk, + // MaxPK: s.segWriter.pkstats.MaxPk, + //} + //bfs := metacache.NewBloomFilterSet(statistic) kvs, fBinlogs, err := s.task.serializeWrite(context.TODO(), s.segWriter) s.Require().NoError(err) s.mockBinlogIO.EXPECT().Download(mock.Anything, mock.MatchedBy(func(keys []string) bool { @@ -226,17 +198,12 @@ func (s *MixCompactionTaskSuite) TestCompactTwoToOne() { return len(left) == 0 && len(right) == 0 })).Return(lo.Values(kvs), nil).Once() - seg := metacache.NewSegmentInfo(&datapb.SegmentInfo{ - CollectionID: CollectionID, - PartitionID: PartitionID, - ID: segID, - NumOfRows: 1, - }, bfs) - - s.mockMeta.EXPECT().GetSegmentByID(segID).RunAndReturn(func(id int64, filters ...metacache.SegmentFilter) (*metacache.SegmentInfo, bool) { - return seg, true - }) - s.mockSyncMgr.EXPECT().Block(segID).Return().Once() + //seg := metacache.NewSegmentInfo(&datapb.SegmentInfo{ + // CollectionID: CollectionID, + // PartitionID: PartitionID, + // ID: segID, + // NumOfRows: 1, + //}, bfs) s.plan.SegmentBinlogs = append(s.plan.SegmentBinlogs, &datapb.CompactionSegmentBinlogs{ SegmentID: segID, @@ -251,10 +218,7 @@ func (s *MixCompactionTaskSuite) TestCompactTwoToOne() { ID: 99999, NumOfRows: 0, }, metacache.NewBloomFilterSet()) - s.mockMeta.EXPECT().GetSegmentByID(seg.SegmentID()).RunAndReturn(func(id int64, filters ...metacache.SegmentFilter) (*metacache.SegmentInfo, bool) { - return seg, true - }) - s.mockSyncMgr.EXPECT().Block(seg.SegmentID()).Return().Once() + s.plan.SegmentBinlogs = append(s.plan.SegmentBinlogs, &datapb.CompactionSegmentBinlogs{ SegmentID: seg.SegmentID(), }) @@ -531,15 +495,6 @@ func (s *MixCompactionTaskSuite) TestCompactFail() { _, err := s.task.Compact() s.Error(err) }) - - s.Run("Test getNumRows error", func() { - s.mockAlloc.EXPECT().AllocOne().Return(19530, nil).Once() - s.mockMeta.EXPECT().GetSegmentByID(mock.Anything).Return(nil, false) - - _, err := s.task.Compact() - s.Error(err) - s.ErrorIs(err, merr.ErrSegmentNotFound) - }) } func (s *MixCompactionTaskSuite) TestIsExpiredEntity() { diff --git a/internal/datanode/compaction_executor.go b/internal/datanode/compaction_executor.go index 938d1b5db0fd7..d8ac9fa423f83 100644 --- a/internal/datanode/compaction_executor.go +++ b/internal/datanode/compaction_executor.go @@ -70,12 +70,11 @@ func (c *compactionExecutor) toCompleteState(task compaction.Compactor) { c.executing.GetAndRemove(task.GetPlanID()) } -func (c *compactionExecutor) injectDone(planID UniqueID) { +func (c *compactionExecutor) removeTask(planID UniqueID) { c.completed.GetAndRemove(planID) task, loaded := c.completedCompactor.GetAndRemove(planID) if loaded { - log.Info("Compaction task inject done", zap.Int64("planID", planID), zap.String("channel", task.GetChannelName())) - task.InjectDone() + log.Info("Compaction task removed", zap.Int64("planID", planID), zap.String("channel", task.GetChannelName())) } } @@ -110,12 +109,11 @@ func (c *compactionExecutor) executeTask(task compaction.Compactor) { result, err := task.Compact() if err != nil { - task.InjectDone() log.Warn("compaction task failed", zap.Error(err)) - } else { - c.completed.Insert(result.GetPlanID(), result) - c.completedCompactor.Insert(result.GetPlanID(), task) + return } + c.completed.Insert(result.GetPlanID(), result) + c.completedCompactor.Insert(result.GetPlanID(), task) log.Info("end to execute compaction") } @@ -152,7 +150,7 @@ func (c *compactionExecutor) discardPlan(channel string) { // remove all completed plans of channel c.completed.Range(func(planID int64, result *datapb.CompactionPlanResult) bool { if result.GetChannel() == channel { - c.injectDone(planID) + c.removeTask(planID) log.Info("remove compaction plan and results", zap.String("channel", channel), zap.Int64("planID", planID)) diff --git a/internal/datanode/l0_compactor.go b/internal/datanode/l0_compactor.go index 89157c407834b..f04cc280c5c41 100644 --- a/internal/datanode/l0_compactor.go +++ b/internal/datanode/l0_compactor.go @@ -34,7 +34,6 @@ import ( "github.com/milvus-io/milvus/internal/datanode/io" iter "github.com/milvus-io/milvus/internal/datanode/iterators" "github.com/milvus-io/milvus/internal/datanode/metacache" - "github.com/milvus-io/milvus/internal/datanode/syncmgr" "github.com/milvus-io/milvus/internal/metastore/kv/binlog" "github.com/milvus-io/milvus/internal/proto/datapb" "github.com/milvus-io/milvus/internal/storage" @@ -52,12 +51,8 @@ import ( ) type levelZeroCompactionTask struct { - compaction.Compactor io.BinlogIO - allocator allocator.Allocator - metacache metacache.MetaCache - syncmgr syncmgr.SyncManager cm storage.ChunkManager plan *datapb.CompactionPlan @@ -76,8 +71,6 @@ func newLevelZeroCompactionTask( ctx context.Context, binlogIO io.BinlogIO, alloc allocator.Allocator, - metaCache metacache.MetaCache, - syncmgr syncmgr.SyncManager, cm storage.ChunkManager, plan *datapb.CompactionPlan, ) *levelZeroCompactionTask { @@ -88,8 +81,6 @@ func newLevelZeroCompactionTask( BinlogIO: binlogIO, allocator: alloc, - metacache: metaCache, - syncmgr: syncmgr, cm: cm, plan: plan, tr: timerecord.NewTimeRecorder("levelzero compaction"), @@ -115,12 +106,10 @@ func (t *levelZeroCompactionTask) GetChannelName() string { } func (t *levelZeroCompactionTask) GetCollection() int64 { - return t.metacache.Collection() + // The length of SegmentBinlogs is checked before task enqueueing. + return t.plan.GetSegmentBinlogs()[0].GetCollectionID() } -// Do nothing for levelzero compaction -func (t *levelZeroCompactionTask) InjectDone() {} - func (t *levelZeroCompactionTask) Compact() (*datapb.CompactionPlanResult, error) { ctx, span := otel.Tracer(typeutil.DataNodeRole).Start(t.ctx, "L0Compact") defer span.End() @@ -338,16 +327,20 @@ func (t *levelZeroCompactionTask) splitDelta( } func (t *levelZeroCompactionTask) composeDeltalog(segmentID int64, dData *storage.DeleteData) (map[string][]byte, *datapb.Binlog, error) { + segment, ok := lo.Find(t.plan.GetSegmentBinlogs(), func(segment *datapb.CompactionSegmentBinlogs) bool { + return segment.GetSegmentID() == segmentID + }) + if !ok { + return nil, nil, merr.WrapErrSegmentNotFound(segmentID, "cannot find segment in compaction plan") + } + var ( - collID = t.metacache.Collection() - uploadKv = make(map[string][]byte) + collectionID = segment.GetCollectionID() + partitionID = segment.GetPartitionID() + uploadKv = make(map[string][]byte) ) - seg, ok := t.metacache.GetSegmentByID(segmentID) - if !ok { - return nil, nil, merr.WrapErrSegmentLack(segmentID) - } - blob, err := storage.NewDeleteCodec().Serialize(collID, seg.PartitionID(), segmentID, dData) + blob, err := storage.NewDeleteCodec().Serialize(collectionID, partitionID, segmentID, dData) if err != nil { return nil, nil, err } @@ -357,7 +350,7 @@ func (t *levelZeroCompactionTask) composeDeltalog(segmentID int64, dData *storag return nil, nil, err } - blobKey := metautil.JoinIDPath(collID, seg.PartitionID(), segmentID, logID) + blobKey := metautil.JoinIDPath(collectionID, partitionID, segmentID, logID) blobPath := t.BinlogIO.JoinFullPath(common.SegmentDeltaLogPath, blobKey) uploadKv[blobPath] = blob.GetValue() @@ -447,7 +440,7 @@ func (t *levelZeroCompactionTask) loadBF(targetSegments []*datapb.CompactionSegm _ = binlog.DecompressBinLog(storage.StatsBinlog, segment.GetCollectionID(), segment.GetPartitionID(), segment.GetSegmentID(), segment.GetField2StatslogPaths()) pks, err := loadStats(t.ctx, t.cm, - t.metacache.Schema(), segment.GetSegmentID(), segment.GetField2StatslogPaths()) + t.plan.GetSchema(), segment.GetSegmentID(), segment.GetField2StatslogPaths()) if err != nil { log.Warn("failed to load segment stats log", zap.Error(err)) return err, err diff --git a/internal/datanode/l0_compactor_test.go b/internal/datanode/l0_compactor_test.go index 08df575433ace..9aad1fb685443 100644 --- a/internal/datanode/l0_compactor_test.go +++ b/internal/datanode/l0_compactor_test.go @@ -51,7 +51,6 @@ type LevelZeroCompactionTaskSuite struct { mockBinlogIO *io.MockBinlogIO mockAlloc *allocator.MockAllocator - mockMeta *metacache.MockMetaCache task *levelZeroCompactionTask dData *storage.DeleteData @@ -61,9 +60,8 @@ type LevelZeroCompactionTaskSuite struct { func (s *LevelZeroCompactionTaskSuite) SetupTest() { s.mockAlloc = allocator.NewMockAllocator(s.T()) s.mockBinlogIO = io.NewMockBinlogIO(s.T()) - s.mockMeta = metacache.NewMockMetaCache(s.T()) // plan of the task is unset - s.task = newLevelZeroCompactionTask(context.Background(), s.mockBinlogIO, s.mockAlloc, s.mockMeta, nil, nil, nil) + s.task = newLevelZeroCompactionTask(context.Background(), s.mockBinlogIO, s.mockAlloc, nil, nil) pk2ts := map[int64]uint64{ 1: 20000, @@ -101,20 +99,19 @@ func (s *LevelZeroCompactionTaskSuite) TestLinearBatchLoadDeltaFail() { }, {SegmentID: 200, Level: datapb.SegmentLevel_L1}, }, + Schema: &schemapb.CollectionSchema{ + Fields: []*schemapb.FieldSchema{ + { + IsPrimaryKey: true, + }, + }, + }, } s.task.plan = plan s.task.tr = timerecord.NewTimeRecorder("test") s.mockBinlogIO.EXPECT().Download(mock.Anything, mock.Anything).Return(nil, errors.New("mock download fail")).Twice() - s.mockMeta.EXPECT().Schema().Return(&schemapb.CollectionSchema{ - Fields: []*schemapb.FieldSchema{ - { - IsPrimaryKey: true, - }, - }, - }) - targetSegments := lo.Filter(plan.SegmentBinlogs, func(s *datapb.CompactionSegmentBinlogs, _ int) bool { return s.Level == datapb.SegmentLevel_L1 }) @@ -154,6 +151,13 @@ func (s *LevelZeroCompactionTaskSuite) TestLinearBatchUploadByCheckFail() { }, }}, }, + Schema: &schemapb.CollectionSchema{ + Fields: []*schemapb.FieldSchema{ + { + IsPrimaryKey: true, + }, + }, + }, } s.task.plan = plan @@ -170,15 +174,9 @@ func (s *LevelZeroCompactionTaskSuite) TestLinearBatchUploadByCheckFail() { s.task.cm = cm s.mockBinlogIO.EXPECT().Download(mock.Anything, mock.Anything).Return([][]byte{s.dBlob}, nil).Times(2) - s.mockMeta.EXPECT().Collection().Return(1) - s.mockMeta.EXPECT().GetSegmentByID(mock.Anything).Return(nil, false).Twice() - s.mockMeta.EXPECT().Schema().Return(&schemapb.CollectionSchema{ - Fields: []*schemapb.FieldSchema{ - { - IsPrimaryKey: true, - }, - }, - }) + mockAlloc := allocator.NewMockAllocator(s.T()) + mockAlloc.EXPECT().AllocOne().Return(0, errors.New("mock alloc err")) + s.task.allocator = mockAlloc targetSegments := lo.Filter(plan.SegmentBinlogs, func(s *datapb.CompactionSegmentBinlogs, _ int) bool { return s.Level == datapb.SegmentLevel_L1 @@ -200,7 +198,8 @@ func (s *LevelZeroCompactionTaskSuite) TestCompactLinear() { Type: datapb.CompactionType_Level0DeleteCompaction, SegmentBinlogs: []*datapb.CompactionSegmentBinlogs{ { - SegmentID: 100, Level: datapb.SegmentLevel_L0, Deltalogs: []*datapb.FieldBinlog{ + CollectionID: 1, + SegmentID: 100, Level: datapb.SegmentLevel_L0, Deltalogs: []*datapb.FieldBinlog{ { Binlogs: []*datapb.Binlog{ {LogPath: "a/b/c1", LogSize: 100}, @@ -212,7 +211,8 @@ func (s *LevelZeroCompactionTaskSuite) TestCompactLinear() { }, }, { - SegmentID: 101, Level: datapb.SegmentLevel_L0, Deltalogs: []*datapb.FieldBinlog{ + CollectionID: 1, + SegmentID: 101, Level: datapb.SegmentLevel_L0, Deltalogs: []*datapb.FieldBinlog{ { Binlogs: []*datapb.Binlog{ {LogPath: "a/d/c1", LogSize: 100}, @@ -223,20 +223,33 @@ func (s *LevelZeroCompactionTaskSuite) TestCompactLinear() { }, }, }, - {SegmentID: 200, Level: datapb.SegmentLevel_L1, Field2StatslogPaths: []*datapb.FieldBinlog{ - { - Binlogs: []*datapb.Binlog{ - {LogID: 9999, LogSize: 100}, + { + CollectionID: 1, + SegmentID: 200, Level: datapb.SegmentLevel_L1, Field2StatslogPaths: []*datapb.FieldBinlog{ + { + Binlogs: []*datapb.Binlog{ + {LogID: 9999, LogSize: 100}, + }, }, }, - }}, - {SegmentID: 201, Level: datapb.SegmentLevel_L1, Field2StatslogPaths: []*datapb.FieldBinlog{ - { - Binlogs: []*datapb.Binlog{ - {LogID: 9999, LogSize: 100}, + }, + { + CollectionID: 1, + SegmentID: 201, Level: datapb.SegmentLevel_L1, Field2StatslogPaths: []*datapb.FieldBinlog{ + { + Binlogs: []*datapb.Binlog{ + {LogID: 9999, LogSize: 100}, + }, }, }, - }}, + }, + }, + Schema: &schemapb.CollectionSchema{ + Fields: []*schemapb.FieldSchema{ + { + IsPrimaryKey: true, + }, + }, }, } @@ -254,18 +267,6 @@ func (s *LevelZeroCompactionTaskSuite) TestCompactLinear() { s.task.cm = cm s.mockBinlogIO.EXPECT().Download(mock.Anything, mock.Anything).Return([][]byte{s.dBlob}, nil).Times(2) - s.mockMeta.EXPECT().Collection().Return(1) - s.mockMeta.EXPECT().GetSegmentByID(mock.Anything, mock.Anything). - RunAndReturn(func(id int64, filters ...metacache.SegmentFilter) (*metacache.SegmentInfo, bool) { - return metacache.NewSegmentInfo(&datapb.SegmentInfo{ID: id, PartitionID: 10}, nil), true - }) - s.mockMeta.EXPECT().Schema().Return(&schemapb.CollectionSchema{ - Fields: []*schemapb.FieldSchema{ - { - IsPrimaryKey: true, - }, - }, - }) s.mockAlloc.EXPECT().AllocOne().Return(19530, nil).Times(2) s.mockBinlogIO.EXPECT().JoinFullPath(mock.Anything, mock.Anything). @@ -357,6 +358,13 @@ func (s *LevelZeroCompactionTaskSuite) TestCompactBatch() { }, }}, }, + Schema: &schemapb.CollectionSchema{ + Fields: []*schemapb.FieldSchema{ + { + IsPrimaryKey: true, + }, + }, + }, } s.task.plan = plan @@ -373,18 +381,6 @@ func (s *LevelZeroCompactionTaskSuite) TestCompactBatch() { s.task.cm = cm s.mockBinlogIO.EXPECT().Download(mock.Anything, mock.Anything).Return([][]byte{s.dBlob}, nil).Once() - s.mockMeta.EXPECT().Collection().Return(1) - s.mockMeta.EXPECT().GetSegmentByID(mock.Anything, mock.Anything). - RunAndReturn(func(id int64, filters ...metacache.SegmentFilter) (*metacache.SegmentInfo, bool) { - return metacache.NewSegmentInfo(&datapb.SegmentInfo{ID: id, PartitionID: 10}, nil), true - }) - s.mockMeta.EXPECT().Schema().Return(&schemapb.CollectionSchema{ - Fields: []*schemapb.FieldSchema{ - { - IsPrimaryKey: true, - }, - }, - }) s.mockAlloc.EXPECT().AllocOne().Return(19530, nil).Times(2) s.mockBinlogIO.EXPECT().JoinFullPath(mock.Anything, mock.Anything). @@ -430,11 +426,21 @@ func (s *LevelZeroCompactionTaskSuite) TestCompactBatch() { func (s *LevelZeroCompactionTaskSuite) TestUploadByCheck() { ctx := context.Background() + + plan := &datapb.CompactionPlan{ + SegmentBinlogs: []*datapb.CompactionSegmentBinlogs{ + { + SegmentID: 100, + }, + }, + } + s.Run("uploadByCheck directly composeDeltalog failed", func() { s.SetupTest() - s.mockMeta.EXPECT().Collection().Return(1) - s.mockMeta.EXPECT().GetSegmentByID(mock.Anything).Return(nil, false).Once() - + s.task.plan = plan + mockAlloc := allocator.NewMockAllocator(s.T()) + mockAlloc.EXPECT().AllocOne().Return(0, errors.New("mock alloc err")) + s.task.allocator = mockAlloc segments := map[int64]*storage.DeleteData{100: s.dData} results := make(map[int64]*datapb.CompactionSegment) err := s.task.uploadByCheck(ctx, false, segments, results) @@ -444,13 +450,8 @@ func (s *LevelZeroCompactionTaskSuite) TestUploadByCheck() { s.Run("uploadByCheck directly Upload failed", func() { s.SetupTest() + s.task.plan = plan s.mockBinlogIO.EXPECT().Upload(mock.Anything, mock.Anything).Return(errors.New("mock upload failed")) - s.mockMeta.EXPECT().Collection().Return(1) - s.mockMeta.EXPECT().GetSegmentByID( - mock.MatchedBy(func(ID int64) bool { - return ID == 100 - }), mock.Anything). - Return(metacache.NewSegmentInfo(&datapb.SegmentInfo{ID: 100, PartitionID: 10}, nil), true) s.mockAlloc.EXPECT().AllocOne().Return(19530, nil) blobKey := metautil.JoinIDPath(1, 10, 100, 19530) @@ -466,13 +467,8 @@ func (s *LevelZeroCompactionTaskSuite) TestUploadByCheck() { s.Run("upload directly", func() { s.SetupTest() + s.task.plan = plan s.mockBinlogIO.EXPECT().Upload(mock.Anything, mock.Anything).Return(nil) - s.mockMeta.EXPECT().Collection().Return(1) - s.mockMeta.EXPECT().GetSegmentByID( - mock.MatchedBy(func(ID int64) bool { - return ID == 100 - }), mock.Anything). - Return(metacache.NewSegmentInfo(&datapb.SegmentInfo{ID: 100, PartitionID: 10}, nil), true) s.mockAlloc.EXPECT().AllocOne().Return(19530, nil) blobKey := metautil.JoinIDPath(1, 10, 100, 19530) @@ -503,16 +499,11 @@ func (s *LevelZeroCompactionTaskSuite) TestUploadByCheck() { }) s.Run("check with upload", func() { + s.task.plan = plan blobKey := metautil.JoinIDPath(1, 10, 100, 19530) blobPath := path.Join(common.SegmentDeltaLogPath, blobKey) s.mockBinlogIO.EXPECT().Upload(mock.Anything, mock.Anything).Return(nil) - s.mockMeta.EXPECT().Collection().Return(1) - s.mockMeta.EXPECT().GetSegmentByID( - mock.MatchedBy(func(ID int64) bool { - return ID == 100 - }), mock.Anything). - Return(metacache.NewSegmentInfo(&datapb.SegmentInfo{ID: 100, PartitionID: 10}, nil), true) s.mockAlloc.EXPECT().AllocOne().Return(19530, nil) s.mockBinlogIO.EXPECT().JoinFullPath(mock.Anything, mock.Anything).Return(blobPath) @@ -539,20 +530,17 @@ func (s *LevelZeroCompactionTaskSuite) TestUploadByCheck() { } func (s *LevelZeroCompactionTaskSuite) TestComposeDeltalog() { - s.mockMeta.EXPECT().Collection().Return(1) - s.mockMeta.EXPECT(). - GetSegmentByID( - mock.MatchedBy(func(ID int64) bool { - return ID == 100 - }), mock.Anything). - Return(metacache.NewSegmentInfo(&datapb.SegmentInfo{ID: 100, PartitionID: 10}, nil), true) - - s.mockMeta.EXPECT(). - GetSegmentByID( - mock.MatchedBy(func(ID int64) bool { - return ID == 101 - }), mock.Anything). - Return(nil, false) + plan := &datapb.CompactionPlan{ + SegmentBinlogs: []*datapb.CompactionSegmentBinlogs{ + { + SegmentID: 100, + }, + { + SegmentID: 101, + }, + }, + } + s.task.plan = plan s.mockAlloc.EXPECT().AllocOne().Return(19530, nil) @@ -568,8 +556,13 @@ func (s *LevelZeroCompactionTaskSuite) TestComposeDeltalog() { s.NotNil(v) s.Equal(blobPath, binlog.LogPath) - _, _, err = s.task.composeDeltalog(101, s.dData) - s.Error(err) + kvs, _, err = s.task.composeDeltalog(101, s.dData) + s.NoError(err) + s.Equal(1, len(kvs)) + v, ok = kvs[blobPath] + s.True(ok) + s.NotNil(v) + s.Equal(blobPath, binlog.LogPath) } func (s *LevelZeroCompactionTaskSuite) TestSplitDelta() { @@ -684,6 +677,13 @@ func (s *LevelZeroCompactionTaskSuite) TestLoadBF() { }, }}, }, + Schema: &schemapb.CollectionSchema{ + Fields: []*schemapb.FieldSchema{ + { + IsPrimaryKey: true, + }, + }, + }, } s.task.plan = plan @@ -698,14 +698,6 @@ func (s *LevelZeroCompactionTaskSuite) TestLoadBF() { cm.EXPECT().MultiRead(mock.Anything, mock.Anything).Return([][]byte{sw.GetBuffer()}, nil) s.task.cm = cm - s.mockMeta.EXPECT().Schema().Return(&schemapb.CollectionSchema{ - Fields: []*schemapb.FieldSchema{ - { - IsPrimaryKey: true, - }, - }, - }) - bfs, err := s.task.loadBF(plan.SegmentBinlogs) s.NoError(err) @@ -730,18 +722,17 @@ func (s *LevelZeroCompactionTaskSuite) TestFailed() { }, }}, }, + Schema: &schemapb.CollectionSchema{ + Fields: []*schemapb.FieldSchema{ + { + IsPrimaryKey: false, + }, + }, + }, } s.task.plan = plan - s.mockMeta.EXPECT().Schema().Return(&schemapb.CollectionSchema{ - Fields: []*schemapb.FieldSchema{ - { - IsPrimaryKey: false, - }, - }, - }) - _, err := s.task.loadBF(plan.SegmentBinlogs) s.Error(err) }) diff --git a/internal/datanode/services.go b/internal/datanode/services.go index 4f172c966272b..d18f10ada6cd6 100644 --- a/internal/datanode/services.go +++ b/internal/datanode/services.go @@ -30,11 +30,8 @@ import ( "github.com/milvus-io/milvus/internal/datanode/compaction" "github.com/milvus-io/milvus/internal/datanode/importv2" "github.com/milvus-io/milvus/internal/datanode/io" - "github.com/milvus-io/milvus/internal/datanode/metacache" - "github.com/milvus-io/milvus/internal/metastore/kv/binlog" "github.com/milvus-io/milvus/internal/proto/datapb" "github.com/milvus-io/milvus/internal/proto/internalpb" - "github.com/milvus-io/milvus/internal/storage" "github.com/milvus-io/milvus/pkg/common" "github.com/milvus-io/milvus/pkg/log" "github.com/milvus-io/milvus/pkg/metrics" @@ -205,29 +202,9 @@ func (node *DataNode) Compaction(ctx context.Context, req *datapb.CompactionPlan return merr.Status(err), nil } - ds, ok := node.flowgraphManager.GetFlowgraphService(req.GetChannel()) - if !ok { - log.Warn("illegel compaction plan, channel not in this DataNode", zap.String("channelName", req.GetChannel())) - return merr.Status(merr.WrapErrChannelNotFound(req.GetChannel(), "illegel compaction plan")), nil - } - - if !node.compactionExecutor.isValidChannel(req.GetChannel()) { - log.Warn("channel of compaction is marked invalid in compaction executor", zap.String("channelName", req.GetChannel())) - return merr.Status(merr.WrapErrChannelNotFound(req.GetChannel(), "channel is dropping")), nil - } - - meta := ds.metacache - for _, segment := range req.GetSegmentBinlogs() { - if segment.GetLevel() == datapb.SegmentLevel_L0 { - continue - } - _, ok := meta.GetSegmentByID(segment.GetSegmentID(), metacache.WithSegmentState(commonpb.SegmentState_Flushed)) - if !ok { - log.Warn("compaction plan contains segment which is not flushed", - zap.Int64("segmentID", segment.GetSegmentID()), - ) - return merr.Status(merr.WrapErrSegmentNotFound(segment.GetSegmentID(), "segment with flushed state not found")), nil - } + if len(req.GetSegmentBinlogs()) == 0 { + log.Info("no segments to compact") + return merr.Success(), nil } /* @@ -244,8 +221,6 @@ func (node *DataNode) Compaction(ctx context.Context, req *datapb.CompactionPlan taskCtx, binlogIO, node.allocator, - ds.metacache, - node.syncMgr, node.chunkManager, req, ) @@ -253,8 +228,6 @@ func (node *DataNode) Compaction(ctx context.Context, req *datapb.CompactionPlan task = compaction.NewMixCompactionTask( taskCtx, binlogIO, - ds.metacache, - node.syncMgr, node.allocator, req, ) @@ -288,10 +261,6 @@ func (node *DataNode) SyncSegments(ctx context.Context, req *datapb.SyncSegments log := log.Ctx(ctx).With( zap.Int64("planID", req.GetPlanID()), zap.Int64("nodeID", node.GetNodeID()), - zap.Int64("target segmentID", req.GetCompactedTo()), - zap.Int64s("compacted from", req.GetCompactedFrom()), - zap.Int64("numOfRows", req.GetNumOfRows()), - zap.String("channelName", req.GetChannelName()), ) log.Info("DataNode receives SyncSegments") @@ -301,32 +270,8 @@ func (node *DataNode) SyncSegments(ctx context.Context, req *datapb.SyncSegments return merr.Status(err), nil } - if len(req.GetCompactedFrom()) <= 0 { - log.Info("SyncSegments with empty compactedFrom, clearing the plan") - node.compactionExecutor.injectDone(req.GetPlanID()) - return merr.Success(), nil - } - - ds, ok := node.flowgraphManager.GetFlowgraphService(req.GetChannelName()) - if !ok { - node.compactionExecutor.discardPlan(req.GetChannelName()) - err := merr.WrapErrChannelNotFound(req.GetChannelName()) - log.Warn("failed to sync segments", zap.Error(err)) - return merr.Status(err), nil - } - err := binlog.DecompressBinLog(storage.StatsBinlog, req.GetCollectionId(), req.GetPartitionId(), req.GetCompactedTo(), req.GetStatsLogs()) - if err != nil { - log.Warn("failed to DecompressBinLog", zap.Error(err)) - return merr.Status(err), nil - } - pks, err := loadStats(ctx, node.chunkManager, ds.metacache.Schema(), req.GetCompactedTo(), req.GetStatsLogs()) - if err != nil { - log.Warn("failed to load segment statslog", zap.Error(err)) - return merr.Status(err), nil - } - bfs := metacache.NewBloomFilterSet(pks...) - ds.metacache.CompactSegments(req.GetCompactedTo(), req.GetPartitionId(), req.GetNumOfRows(), bfs, req.GetCompactedFrom()...) - node.compactionExecutor.injectDone(req.GetPlanID()) + // TODO: sheep, add a new DropCompaction interface, deprecate SyncSegments + node.compactionExecutor.removeTask(req.GetPlanID()) return merr.Success(), nil } diff --git a/internal/datanode/services_test.go b/internal/datanode/services_test.go index a834b1907d849..b90ef427f1a44 100644 --- a/internal/datanode/services_test.go +++ b/internal/datanode/services_test.go @@ -210,50 +210,7 @@ func (s *DataNodeServicesSuite) TestGetCompactionState() { func (s *DataNodeServicesSuite) TestCompaction() { dmChannelName := "by-dev-rootcoord-dml_0_100v0" - schema := &schemapb.CollectionSchema{ - Name: "test_collection", - Fields: []*schemapb.FieldSchema{ - {FieldID: common.RowIDField, Name: common.RowIDFieldName, DataType: schemapb.DataType_Int64}, - {FieldID: common.TimeStampField, Name: common.TimeStampFieldName, DataType: schemapb.DataType_Int64}, - {FieldID: common.StartOfUserFieldID, DataType: schemapb.DataType_Int64, IsPrimaryKey: true, Name: "pk"}, - {FieldID: common.StartOfUserFieldID + 1, DataType: schemapb.DataType_FloatVector, TypeParams: []*commonpb.KeyValuePair{ - {Key: common.DimKey, Value: "128"}, - }}, - }, - } - flushedSegmentID := int64(100) - growingSegmentID := int64(101) - - vchan := &datapb.VchannelInfo{ - CollectionID: 1, - ChannelName: dmChannelName, - UnflushedSegmentIds: []int64{}, - FlushedSegmentIds: []int64{}, - } - - err := s.node.flowgraphManager.AddandStartWithEtcdTickler(s.node, vchan, schema, genTestTickler()) - s.Require().NoError(err) - - fgservice, ok := s.node.flowgraphManager.GetFlowgraphService(dmChannelName) - s.Require().True(ok) - - metaCache := metacache.NewMockMetaCache(s.T()) - metaCache.EXPECT().Collection().Return(1).Maybe() - metaCache.EXPECT().Schema().Return(schema).Maybe() - s.node.writeBufferManager.Register(dmChannelName, metaCache, nil) - fgservice.metacache.AddSegment(&datapb.SegmentInfo{ - ID: flushedSegmentID, - CollectionID: 1, - PartitionID: 2, - StartPosition: &msgpb.MsgPosition{}, - }, func(_ *datapb.SegmentInfo) *metacache.BloomFilterSet { return metacache.NewBloomFilterSet() }) - fgservice.metacache.AddSegment(&datapb.SegmentInfo{ - ID: growingSegmentID, - CollectionID: 1, - PartitionID: 2, - StartPosition: &msgpb.MsgPosition{}, - }, func(_ *datapb.SegmentInfo) *metacache.BloomFilterSet { return metacache.NewBloomFilterSet() }) s.Run("service_not_ready", func() { ctx, cancel := context.WithCancel(context.Background()) defer cancel() @@ -269,40 +226,7 @@ func (s *DataNodeServicesSuite) TestCompaction() { s.False(merr.Ok(resp)) }) - s.Run("channel_not_match", func() { - node := s.node - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - - req := &datapb.CompactionPlan{ - PlanID: 1000, - Channel: dmChannelName + "other", - } - - resp, err := node.Compaction(ctx, req) - s.NoError(err) - s.False(merr.Ok(resp)) - }) - - s.Run("channel_dropped", func() { - node := s.node - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - - node.compactionExecutor.dropped.Insert(dmChannelName) - defer node.compactionExecutor.dropped.Remove(dmChannelName) - - req := &datapb.CompactionPlan{ - PlanID: 1000, - Channel: dmChannelName, - } - - resp, err := node.Compaction(ctx, req) - s.NoError(err) - s.False(merr.Ok(resp)) - }) - - s.Run("compact_growing_segment", func() { + s.Run("unknown CompactionType", func() { node := s.node ctx, cancel := context.WithCancel(context.Background()) defer cancel() @@ -312,7 +236,7 @@ func (s *DataNodeServicesSuite) TestCompaction() { Channel: dmChannelName, SegmentBinlogs: []*datapb.CompactionSegmentBinlogs{ {SegmentID: 102, Level: datapb.SegmentLevel_L0}, - {SegmentID: growingSegmentID, Level: datapb.SegmentLevel_L1}, + {SegmentID: 103, Level: datapb.SegmentLevel_L1}, }, } @@ -506,126 +430,6 @@ func (s *DataNodeServicesSuite) TestGetMetrics() { zap.String("response", resp.Response)) } -func (s *DataNodeServicesSuite) TestSyncSegments() { - chanName := "fake-by-dev-rootcoord-dml-test-syncsegments-1" - schema := &schemapb.CollectionSchema{ - Name: "test_collection", - Fields: []*schemapb.FieldSchema{ - {FieldID: common.RowIDField, Name: common.RowIDFieldName, DataType: schemapb.DataType_Int64}, - {FieldID: common.TimeStampField, Name: common.TimeStampFieldName, DataType: schemapb.DataType_Int64}, - {FieldID: common.StartOfUserFieldID, DataType: schemapb.DataType_Int64, IsPrimaryKey: true, Name: "pk"}, - {FieldID: common.StartOfUserFieldID + 1, DataType: schemapb.DataType_FloatVector, TypeParams: []*commonpb.KeyValuePair{ - {Key: common.DimKey, Value: "128"}, - }}, - }, - } - - err := s.node.flowgraphManager.AddandStartWithEtcdTickler(s.node, &datapb.VchannelInfo{ - CollectionID: 1, - ChannelName: chanName, - UnflushedSegmentIds: []int64{}, - FlushedSegmentIds: []int64{100, 200, 300}, - }, schema, genTestTickler()) - s.Require().NoError(err) - fg, ok := s.node.flowgraphManager.GetFlowgraphService(chanName) - s.Assert().True(ok) - - fg.metacache.AddSegment(&datapb.SegmentInfo{ID: 100, CollectionID: 1, State: commonpb.SegmentState_Flushed}, EmptyBfsFactory) - fg.metacache.AddSegment(&datapb.SegmentInfo{ID: 101, CollectionID: 1, State: commonpb.SegmentState_Flushed}, EmptyBfsFactory) - fg.metacache.AddSegment(&datapb.SegmentInfo{ID: 200, CollectionID: 1, State: commonpb.SegmentState_Flushed}, EmptyBfsFactory) - fg.metacache.AddSegment(&datapb.SegmentInfo{ID: 201, CollectionID: 1, State: commonpb.SegmentState_Flushed}, EmptyBfsFactory) - fg.metacache.AddSegment(&datapb.SegmentInfo{ID: 300, CollectionID: 1, State: commonpb.SegmentState_Flushed}, EmptyBfsFactory) - - s.Run("empty compactedFrom", func() { - req := &datapb.SyncSegmentsRequest{ - CompactedTo: 400, - NumOfRows: 100, - } - - req.CompactedFrom = []UniqueID{} - status, err := s.node.SyncSegments(s.ctx, req) - s.Assert().NoError(err) - s.Assert().True(merr.Ok(status)) - }) - - s.Run("invalid compacted from", func() { - req := &datapb.SyncSegmentsRequest{ - CompactedTo: 400, - NumOfRows: 100, - CompactedFrom: []UniqueID{101, 201}, - } - - req.CompactedFrom = []UniqueID{101, 201} - status, err := s.node.SyncSegments(s.ctx, req) - s.Assert().NoError(err) - s.Assert().False(merr.Ok(status)) - }) - - s.Run("valid request numRows>0", func() { - req := &datapb.SyncSegmentsRequest{ - CompactedFrom: []UniqueID{100, 200, 101, 201}, - CompactedTo: 102, - NumOfRows: 100, - ChannelName: chanName, - CollectionId: 1, - } - status, err := s.node.SyncSegments(s.ctx, req) - s.Assert().NoError(err) - s.Assert().True(merr.Ok(status)) - - _, result := fg.metacache.GetSegmentByID(req.GetCompactedTo(), metacache.WithSegmentState(commonpb.SegmentState_Flushed)) - s.True(result) - for _, compactFrom := range req.GetCompactedFrom() { - seg, result := fg.metacache.GetSegmentByID(compactFrom, metacache.WithSegmentState(commonpb.SegmentState_Flushed)) - s.True(result) - s.Equal(req.CompactedTo, seg.CompactTo()) - } - - status, err = s.node.SyncSegments(s.ctx, req) - s.Assert().NoError(err) - s.Assert().True(merr.Ok(status)) - }) - - s.Run("without_channel_meta", func() { - fg.metacache.UpdateSegments(metacache.UpdateState(commonpb.SegmentState_Flushed), - metacache.WithSegmentIDs(100, 200, 300)) - - req := &datapb.SyncSegmentsRequest{ - CompactedFrom: []int64{100, 200}, - CompactedTo: 101, - NumOfRows: 0, - } - status, err := s.node.SyncSegments(s.ctx, req) - s.Assert().NoError(err) - s.Assert().False(merr.Ok(status)) - }) - - s.Run("valid_request_with_meta_num=0", func() { - fg.metacache.UpdateSegments(metacache.UpdateState(commonpb.SegmentState_Flushed), - metacache.WithSegmentIDs(100, 200, 300)) - - req := &datapb.SyncSegmentsRequest{ - CompactedFrom: []int64{100, 200}, - CompactedTo: 301, - NumOfRows: 0, - ChannelName: chanName, - CollectionId: 1, - } - status, err := s.node.SyncSegments(s.ctx, req) - s.Assert().NoError(err) - s.Assert().True(merr.Ok(status)) - - seg, result := fg.metacache.GetSegmentByID(100, metacache.WithSegmentState(commonpb.SegmentState_Flushed)) - s.True(result) - s.Equal(metacache.NullSegment, seg.CompactTo()) - seg, result = fg.metacache.GetSegmentByID(200, metacache.WithSegmentState(commonpb.SegmentState_Flushed)) - s.True(result) - s.Equal(metacache.NullSegment, seg.CompactTo()) - _, result = fg.metacache.GetSegmentByID(301, metacache.WithSegmentState(commonpb.SegmentState_Flushed)) - s.False(result) - }) -} - func (s *DataNodeServicesSuite) TestResendSegmentStats() { req := &datapb.ResendSegmentStatsRequest{ Base: &commonpb.MsgBase{}, diff --git a/internal/datanode/syncmgr/mock_sync_manager.go b/internal/datanode/syncmgr/mock_sync_manager.go index 34c69ac6b011d..259d09b2da542 100644 --- a/internal/datanode/syncmgr/mock_sync_manager.go +++ b/internal/datanode/syncmgr/mock_sync_manager.go @@ -1,4 +1,4 @@ -// Code generated by mockery v2.32.4. DO NOT EDIT. +// Code generated by mockery v2.30.1. DO NOT EDIT. package syncmgr @@ -25,39 +25,6 @@ func (_m *MockSyncManager) EXPECT() *MockSyncManager_Expecter { return &MockSyncManager_Expecter{mock: &_m.Mock} } -// Block provides a mock function with given fields: segmentID -func (_m *MockSyncManager) Block(segmentID int64) { - _m.Called(segmentID) -} - -// MockSyncManager_Block_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'Block' -type MockSyncManager_Block_Call struct { - *mock.Call -} - -// Block is a helper method to define mock.On call -// - segmentID int64 -func (_e *MockSyncManager_Expecter) Block(segmentID interface{}) *MockSyncManager_Block_Call { - return &MockSyncManager_Block_Call{Call: _e.mock.On("Block", segmentID)} -} - -func (_c *MockSyncManager_Block_Call) Run(run func(segmentID int64)) *MockSyncManager_Block_Call { - _c.Call.Run(func(args mock.Arguments) { - run(args[0].(int64)) - }) - return _c -} - -func (_c *MockSyncManager_Block_Call) Return() *MockSyncManager_Block_Call { - _c.Call.Return() - return _c -} - -func (_c *MockSyncManager_Block_Call) RunAndReturn(run func(int64)) *MockSyncManager_Block_Call { - _c.Call.Return(run) - return _c -} - // GetEarliestPosition provides a mock function with given fields: channel func (_m *MockSyncManager) GetEarliestPosition(channel string) (int64, *msgpb.MsgPosition) { ret := _m.Called(channel) @@ -90,7 +57,7 @@ type MockSyncManager_GetEarliestPosition_Call struct { } // GetEarliestPosition is a helper method to define mock.On call -// - channel string +// - channel string func (_e *MockSyncManager_Expecter) GetEarliestPosition(channel interface{}) *MockSyncManager_GetEarliestPosition_Call { return &MockSyncManager_GetEarliestPosition_Call{Call: _e.mock.On("GetEarliestPosition", channel)} } @@ -134,8 +101,8 @@ type MockSyncManager_SyncData_Call struct { } // SyncData is a helper method to define mock.On call -// - ctx context.Context -// - task Task +// - ctx context.Context +// - task Task func (_e *MockSyncManager_Expecter) SyncData(ctx interface{}, task interface{}) *MockSyncManager_SyncData_Call { return &MockSyncManager_SyncData_Call{Call: _e.mock.On("SyncData", ctx, task)} } @@ -157,39 +124,6 @@ func (_c *MockSyncManager_SyncData_Call) RunAndReturn(run func(context.Context, return _c } -// Unblock provides a mock function with given fields: segmentID -func (_m *MockSyncManager) Unblock(segmentID int64) { - _m.Called(segmentID) -} - -// MockSyncManager_Unblock_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'Unblock' -type MockSyncManager_Unblock_Call struct { - *mock.Call -} - -// Unblock is a helper method to define mock.On call -// - segmentID int64 -func (_e *MockSyncManager_Expecter) Unblock(segmentID interface{}) *MockSyncManager_Unblock_Call { - return &MockSyncManager_Unblock_Call{Call: _e.mock.On("Unblock", segmentID)} -} - -func (_c *MockSyncManager_Unblock_Call) Run(run func(segmentID int64)) *MockSyncManager_Unblock_Call { - _c.Call.Run(func(args mock.Arguments) { - run(args[0].(int64)) - }) - return _c -} - -func (_c *MockSyncManager_Unblock_Call) Return() *MockSyncManager_Unblock_Call { - _c.Call.Return() - return _c -} - -func (_c *MockSyncManager_Unblock_Call) RunAndReturn(run func(int64)) *MockSyncManager_Unblock_Call { - _c.Call.Return(run) - return _c -} - // NewMockSyncManager creates a new instance of MockSyncManager. It also registers a testing interface on the mock and a cleanup function to assert the mocks expectations. // The first argument is typically a *testing.T value. func NewMockSyncManager(t interface { diff --git a/internal/datanode/syncmgr/sync_manager.go b/internal/datanode/syncmgr/sync_manager.go index 190a8e7655be2..6a564c01345a4 100644 --- a/internal/datanode/syncmgr/sync_manager.go +++ b/internal/datanode/syncmgr/sync_manager.go @@ -40,19 +40,15 @@ type SyncMeta struct { metacache metacache.MetaCache } -// SyncMangger is the interface for sync manager. +// SyncManager is the interface for sync manager. // it processes the sync tasks inside and changes the meta. +// +//go:generate mockery --name=SyncManager --structname=MockSyncManager --output=./ --filename=mock_sync_manager.go --with-expecter --inpackage type SyncManager interface { // SyncData is the method to submit sync task. SyncData(ctx context.Context, task Task) *conc.Future[struct{}] // GetEarliestPosition returns the earliest position (normally start position) of the processing sync task of provided channel. GetEarliestPosition(channel string) (int64, *msgpb.MsgPosition) - // Block allows caller to block tasks of provided segment id. - // normally used by compaction task. - // if levelzero delta policy is enabled, this shall be an empty operation. - Block(segmentID int64) - // Unblock is the reverse method for `Block`. - Unblock(segmentID int64) } type syncManager struct { @@ -184,11 +180,3 @@ func (mgr *syncManager) GetEarliestPosition(channel string) (int64, *msgpb.MsgPo }) return segmentID, cp } - -func (mgr *syncManager) Block(segmentID int64) { - mgr.keyLock.Lock(segmentID) -} - -func (mgr *syncManager) Unblock(segmentID int64) { - mgr.keyLock.Unlock(segmentID) -} diff --git a/internal/datanode/syncmgr/sync_manager_test.go b/internal/datanode/syncmgr/sync_manager_test.go index 6f12a98df4d15..515e1266479d3 100644 --- a/internal/datanode/syncmgr/sync_manager_test.go +++ b/internal/datanode/syncmgr/sync_manager_test.go @@ -208,52 +208,6 @@ func (s *SyncManagerSuite) TestCompacted() { s.EqualValues(1001, segmentID.Load()) } -func (s *SyncManagerSuite) TestBlock() { - sig := make(chan struct{}) - counter := atomic.NewInt32(0) - s.broker.EXPECT().SaveBinlogPaths(mock.Anything, mock.Anything).Return(nil) - bfs := metacache.NewBloomFilterSet() - seg := metacache.NewSegmentInfo(&datapb.SegmentInfo{ - ID: s.segmentID, - }, bfs) - metacache.UpdateNumOfRows(1000)(seg) - s.metacache.EXPECT().GetSegmentByID(s.segmentID).Return(seg, true) - s.metacache.EXPECT().GetSegmentsBy(mock.Anything). - RunAndReturn(func(...metacache.SegmentFilter) []*metacache.SegmentInfo { - return []*metacache.SegmentInfo{seg} - }) - s.metacache.EXPECT().UpdateSegments(mock.Anything, mock.Anything).Run(func(_ metacache.SegmentAction, filters ...metacache.SegmentFilter) { - if counter.Inc() == 2 { - close(sig) - } - }) - - manager, err := NewSyncManager(s.chunkManager, s.allocator) - s.NoError(err) - - // block - manager.Block(s.segmentID) - - task := s.getSuiteSyncTask() - task.WithMetaWriter(BrokerMetaWriter(s.broker, 1)) - task.WithTimeRange(50, 100) - task.WithCheckpoint(&msgpb.MsgPosition{ - ChannelName: s.channelName, - MsgID: []byte{1, 2, 3, 4}, - Timestamp: 100, - }) - go manager.SyncData(context.Background(), task) - - select { - case <-sig: - s.FailNow("sync task done during block") - case <-time.After(time.Second): - } - - manager.Unblock(s.segmentID) - <-sig -} - func (s *SyncManagerSuite) TestResizePool() { manager, err := NewSyncManager(s.chunkManager, s.allocator) s.NoError(err) diff --git a/internal/proto/data_coord.proto b/internal/proto/data_coord.proto index ecb29e3be162c..38781e958fa8d 100644 --- a/internal/proto/data_coord.proto +++ b/internal/proto/data_coord.proto @@ -528,6 +528,7 @@ message CompactionPlan { string channel = 7; int64 collection_ttl = 8; int64 total_rows = 9; + schema.CollectionSchema schema = 10; } message CompactionSegment { diff --git a/tests/integration/compaction/compaction_test.go b/tests/integration/compaction/compaction_test.go new file mode 100644 index 0000000000000..2e738e00fb6c8 --- /dev/null +++ b/tests/integration/compaction/compaction_test.go @@ -0,0 +1,47 @@ +// Licensed to the LF AI & Data foundation under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package compaction + +import ( + "testing" + + "github.com/stretchr/testify/suite" + + "github.com/milvus-io/milvus/pkg/util/paramtable" + "github.com/milvus-io/milvus/tests/integration" +) + +type CompactionSuite struct { + integration.MiniClusterSuite +} + +func (s *CompactionSuite) SetupSuite() { + s.MiniClusterSuite.SetupSuite() + + paramtable.Init() + paramtable.Get().Save(paramtable.Get().DataCoordCfg.GlobalCompactionInterval.Key, "1") +} + +func (s *CompactionSuite) TearDownSuite() { + s.MiniClusterSuite.TearDownSuite() + + paramtable.Get().Reset(paramtable.Get().DataCoordCfg.GlobalCompactionInterval.Key) +} + +func TestCompaction(t *testing.T) { + suite.Run(t, new(CompactionSuite)) +} diff --git a/tests/integration/compaction/l0_compaction_test.go b/tests/integration/compaction/l0_compaction_test.go new file mode 100644 index 0000000000000..984e8eb3ce5e5 --- /dev/null +++ b/tests/integration/compaction/l0_compaction_test.go @@ -0,0 +1,238 @@ +// Licensed to the LF AI & Data foundation under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package compaction + +import ( + "context" + "fmt" + "time" + + "github.com/golang/protobuf/proto" + "github.com/samber/lo" + "go.uber.org/zap" + + "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" + "github.com/milvus-io/milvus-proto/go-api/v2/milvuspb" + "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" + "github.com/milvus-io/milvus/internal/proto/datapb" + "github.com/milvus-io/milvus/pkg/common" + "github.com/milvus-io/milvus/pkg/log" + "github.com/milvus-io/milvus/pkg/util/funcutil" + "github.com/milvus-io/milvus/pkg/util/merr" + "github.com/milvus-io/milvus/pkg/util/metric" + "github.com/milvus-io/milvus/pkg/util/paramtable" + "github.com/milvus-io/milvus/tests/integration" +) + +func (s *CompactionSuite) TestL0Compaction() { + ctx, cancel := context.WithTimeout(context.Background(), time.Minute*10) + defer cancel() + c := s.Cluster + + const ( + dim = 128 + dbName = "" + rowNum = 100000 + deleteCnt = 50000 + + indexType = integration.IndexFaissIvfFlat + metricType = metric.L2 + vecType = schemapb.DataType_FloatVector + ) + + paramtable.Get().Save(paramtable.Get().DataCoordCfg.LevelZeroCompactionTriggerDeltalogMinNum.Key, "1") + defer paramtable.Get().Reset(paramtable.Get().DataCoordCfg.LevelZeroCompactionTriggerDeltalogMinNum.Key) + + collectionName := "TestCompaction_" + funcutil.GenRandomStr() + + schema := integration.ConstructSchemaOfVecDataType(collectionName, dim, false, vecType) + marshaledSchema, err := proto.Marshal(schema) + s.NoError(err) + + // create collection + createCollectionStatus, err := c.Proxy.CreateCollection(ctx, &milvuspb.CreateCollectionRequest{ + DbName: dbName, + CollectionName: collectionName, + Schema: marshaledSchema, + ShardsNum: common.DefaultShardsNum, + ConsistencyLevel: commonpb.ConsistencyLevel_Strong, + }) + err = merr.CheckRPCCall(createCollectionStatus, err) + s.NoError(err) + log.Info("CreateCollection result", zap.Any("createCollectionStatus", createCollectionStatus)) + + // show collection + showCollectionsResp, err := c.Proxy.ShowCollections(ctx, &milvuspb.ShowCollectionsRequest{}) + err = merr.CheckRPCCall(showCollectionsResp, err) + s.NoError(err) + log.Info("ShowCollections result", zap.Any("showCollectionsResp", showCollectionsResp)) + + // insert + pkColumn := integration.NewInt64FieldData(integration.Int64Field, rowNum) + fVecColumn := integration.NewFloatVectorFieldData(integration.FloatVecField, rowNum, dim) + hashKeys := integration.GenerateHashKeys(rowNum) + insertResult, err := c.Proxy.Insert(ctx, &milvuspb.InsertRequest{ + DbName: dbName, + CollectionName: collectionName, + FieldsData: []*schemapb.FieldData{pkColumn, fVecColumn}, + HashKeys: hashKeys, + NumRows: uint32(rowNum), + }) + err = merr.CheckRPCCall(insertResult, err) + s.NoError(err) + s.Equal(int64(rowNum), insertResult.GetInsertCnt()) + + // flush + flushResp, err := c.Proxy.Flush(ctx, &milvuspb.FlushRequest{ + DbName: dbName, + CollectionNames: []string{collectionName}, + }) + err = merr.CheckRPCCall(flushResp, err) + s.NoError(err) + segmentIDs, has := flushResp.GetCollSegIDs()[collectionName] + ids := segmentIDs.GetData() + s.Require().NotEmpty(segmentIDs) + s.Require().True(has) + flushTs, has := flushResp.GetCollFlushTs()[collectionName] + s.True(has) + s.WaitForFlush(ctx, ids, flushTs, dbName, collectionName) + + // create index + createIndexStatus, err := c.Proxy.CreateIndex(ctx, &milvuspb.CreateIndexRequest{ + CollectionName: collectionName, + FieldName: integration.FloatVecField, + IndexName: "_default", + ExtraParams: integration.ConstructIndexParam(dim, indexType, metricType), + }) + err = merr.CheckRPCCall(createIndexStatus, err) + s.NoError(err) + s.WaitForIndexBuilt(ctx, collectionName, integration.FloatVecField) + + segments, err := c.MetaWatcher.ShowSegments() + s.NoError(err) + s.NotEmpty(segments) + s.Equal(1, len(segments)) + s.Equal(int64(rowNum), segments[0].GetNumOfRows()) + + // load + loadStatus, err := c.Proxy.LoadCollection(ctx, &milvuspb.LoadCollectionRequest{ + DbName: dbName, + CollectionName: collectionName, + }) + err = merr.CheckRPCCall(loadStatus, err) + s.NoError(err) + s.WaitForLoad(ctx, collectionName) + + // delete + deleteResult, err := c.Proxy.Delete(ctx, &milvuspb.DeleteRequest{ + DbName: dbName, + CollectionName: collectionName, + Expr: fmt.Sprintf("%s < %d", integration.Int64Field, deleteCnt), + }) + err = merr.CheckRPCCall(deleteResult, err) + s.NoError(err) + + // flush l0 + flushResp, err = c.Proxy.Flush(ctx, &milvuspb.FlushRequest{ + DbName: dbName, + CollectionNames: []string{collectionName}, + }) + err = merr.CheckRPCCall(flushResp, err) + s.NoError(err) + flushTs, has = flushResp.GetCollFlushTs()[collectionName] + s.True(has) + s.WaitForFlush(ctx, ids, flushTs, dbName, collectionName) + + // query + queryResult, err := c.Proxy.Query(ctx, &milvuspb.QueryRequest{ + DbName: dbName, + CollectionName: collectionName, + Expr: "", + OutputFields: []string{"count(*)"}, + }) + err = merr.CheckRPCCall(queryResult, err) + s.NoError(err) + s.Equal(int64(rowNum-deleteCnt), queryResult.GetFieldsData()[0].GetScalars().GetLongData().GetData()[0]) + + // wait for l0 compaction completed + showSegments := func() bool { + segments, err = c.MetaWatcher.ShowSegments() + s.NoError(err) + s.NotEmpty(segments) + log.Info("ShowSegments result", zap.Any("segments", segments)) + flushed := lo.Filter(segments, func(segment *datapb.SegmentInfo, _ int) bool { + return segment.GetState() == commonpb.SegmentState_Flushed + }) + if len(flushed) == 1 && + flushed[0].GetLevel() == datapb.SegmentLevel_L1 && + flushed[0].GetNumOfRows() == rowNum { + log.Info("l0 compaction done, wait for single compaction") + } + return len(flushed) == 1 && + flushed[0].GetLevel() == datapb.SegmentLevel_L1 && + flushed[0].GetNumOfRows() == rowNum-deleteCnt + } + for !showSegments() { + select { + case <-ctx.Done(): + s.Fail("waiting for compaction timeout") + return + case <-time.After(1 * time.Second): + } + } + + // search + expr := fmt.Sprintf("%s > 0", integration.Int64Field) + nq := 10 + topk := 10 + roundDecimal := -1 + params := integration.GetSearchParams(indexType, metricType) + searchReq := integration.ConstructSearchRequest("", collectionName, expr, + integration.FloatVecField, vecType, nil, metricType, params, nq, dim, topk, roundDecimal) + + searchResult, err := c.Proxy.Search(ctx, searchReq) + err = merr.CheckRPCCall(searchResult, err) + s.NoError(err) + s.Equal(nq*topk, len(searchResult.GetResults().GetScores())) + + // query + queryResult, err = c.Proxy.Query(ctx, &milvuspb.QueryRequest{ + DbName: dbName, + CollectionName: collectionName, + Expr: "", + OutputFields: []string{"count(*)"}, + }) + err = merr.CheckRPCCall(queryResult, err) + s.NoError(err) + s.Equal(int64(rowNum-deleteCnt), queryResult.GetFieldsData()[0].GetScalars().GetLongData().GetData()[0]) + + // release collection + status, err := c.Proxy.ReleaseCollection(ctx, &milvuspb.ReleaseCollectionRequest{ + CollectionName: collectionName, + }) + err = merr.CheckRPCCall(status, err) + s.NoError(err) + + // drop collection + status, err = c.Proxy.DropCollection(ctx, &milvuspb.DropCollectionRequest{ + CollectionName: collectionName, + }) + err = merr.CheckRPCCall(status, err) + s.NoError(err) + + log.Info("Test compaction succeed") +} diff --git a/tests/integration/compaction/mix_compaction_test.go b/tests/integration/compaction/mix_compaction_test.go new file mode 100644 index 0000000000000..b51636be5fd1e --- /dev/null +++ b/tests/integration/compaction/mix_compaction_test.go @@ -0,0 +1,205 @@ +// Licensed to the LF AI & Data foundation under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package compaction + +import ( + "context" + "fmt" + "time" + + "github.com/golang/protobuf/proto" + "github.com/samber/lo" + "go.uber.org/zap" + + "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" + "github.com/milvus-io/milvus-proto/go-api/v2/milvuspb" + "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" + "github.com/milvus-io/milvus/internal/proto/datapb" + "github.com/milvus-io/milvus/pkg/common" + "github.com/milvus-io/milvus/pkg/log" + "github.com/milvus-io/milvus/pkg/util/funcutil" + "github.com/milvus-io/milvus/pkg/util/merr" + "github.com/milvus-io/milvus/pkg/util/metric" + "github.com/milvus-io/milvus/tests/integration" +) + +func (s *CompactionSuite) TestMixCompaction() { + ctx, cancel := context.WithTimeout(context.Background(), time.Minute*10) + defer cancel() + c := s.Cluster + + const ( + dim = 128 + dbName = "" + rowNum = 10000 + batch = 1000 + + indexType = integration.IndexFaissIvfFlat + metricType = metric.L2 + vecType = schemapb.DataType_FloatVector + ) + + collectionName := "TestCompaction_" + funcutil.GenRandomStr() + + schema := integration.ConstructSchemaOfVecDataType(collectionName, dim, true, vecType) + marshaledSchema, err := proto.Marshal(schema) + s.NoError(err) + + // create collection + createCollectionStatus, err := c.Proxy.CreateCollection(ctx, &milvuspb.CreateCollectionRequest{ + DbName: dbName, + CollectionName: collectionName, + Schema: marshaledSchema, + ShardsNum: common.DefaultShardsNum, + ConsistencyLevel: commonpb.ConsistencyLevel_Strong, + }) + err = merr.CheckRPCCall(createCollectionStatus, err) + s.NoError(err) + log.Info("CreateCollection result", zap.Any("createCollectionStatus", createCollectionStatus)) + + // show collection + showCollectionsResp, err := c.Proxy.ShowCollections(ctx, &milvuspb.ShowCollectionsRequest{}) + err = merr.CheckRPCCall(showCollectionsResp, err) + s.NoError(err) + log.Info("ShowCollections result", zap.Any("showCollectionsResp", showCollectionsResp)) + + for i := 0; i < rowNum/batch; i++ { + // insert + fVecColumn := integration.NewFloatVectorFieldData(integration.FloatVecField, batch, dim) + hashKeys := integration.GenerateHashKeys(batch) + insertResult, err := c.Proxy.Insert(ctx, &milvuspb.InsertRequest{ + DbName: dbName, + CollectionName: collectionName, + FieldsData: []*schemapb.FieldData{fVecColumn}, + HashKeys: hashKeys, + NumRows: uint32(batch), + }) + err = merr.CheckRPCCall(insertResult, err) + s.NoError(err) + s.Equal(int64(batch), insertResult.GetInsertCnt()) + + // flush + flushResp, err := c.Proxy.Flush(ctx, &milvuspb.FlushRequest{ + DbName: dbName, + CollectionNames: []string{collectionName}, + }) + err = merr.CheckRPCCall(flushResp, err) + s.NoError(err) + segmentIDs, has := flushResp.GetCollSegIDs()[collectionName] + ids := segmentIDs.GetData() + s.Require().NotEmpty(segmentIDs) + s.Require().True(has) + flushTs, has := flushResp.GetCollFlushTs()[collectionName] + s.True(has) + s.WaitForFlush(ctx, ids, flushTs, dbName, collectionName) + + log.Info("insert done", zap.Int("i", i)) + } + + // create index + createIndexStatus, err := c.Proxy.CreateIndex(ctx, &milvuspb.CreateIndexRequest{ + CollectionName: collectionName, + FieldName: integration.FloatVecField, + IndexName: "_default", + ExtraParams: integration.ConstructIndexParam(dim, indexType, metricType), + }) + err = merr.CheckRPCCall(createIndexStatus, err) + s.NoError(err) + s.WaitForIndexBuilt(ctx, collectionName, integration.FloatVecField) + + segments, err := c.MetaWatcher.ShowSegments() + s.NoError(err) + s.NotEmpty(segments) + s.Equal(rowNum/batch, len(segments)) + for _, segment := range segments { + log.Info("show segment result", zap.String("segment", segment.String())) + } + + // wait for compaction completed + showSegments := func() bool { + segments, err = c.MetaWatcher.ShowSegments() + s.NoError(err) + s.NotEmpty(segments) + compactFromSegments := lo.Filter(segments, func(segment *datapb.SegmentInfo, _ int) bool { + return segment.GetState() == commonpb.SegmentState_Dropped + }) + compactToSegments := lo.Filter(segments, func(segment *datapb.SegmentInfo, _ int) bool { + return segment.GetState() == commonpb.SegmentState_Flushed + }) + log.Info("ShowSegments result", zap.Int("len(compactFromSegments)", len(compactFromSegments)), + zap.Int("len(compactToSegments)", len(compactToSegments))) + return len(compactToSegments) == 1 + } + for !showSegments() { + select { + case <-ctx.Done(): + s.Fail("waiting for compaction timeout") + return + case <-time.After(1 * time.Second): + } + } + + // load + loadStatus, err := c.Proxy.LoadCollection(ctx, &milvuspb.LoadCollectionRequest{ + DbName: dbName, + CollectionName: collectionName, + }) + err = merr.CheckRPCCall(loadStatus, err) + s.NoError(err) + s.WaitForLoad(ctx, collectionName) + + // search + expr := fmt.Sprintf("%s > 0", integration.Int64Field) + nq := 10 + topk := 10 + roundDecimal := -1 + params := integration.GetSearchParams(indexType, metricType) + searchReq := integration.ConstructSearchRequest("", collectionName, expr, + integration.FloatVecField, vecType, nil, metricType, params, nq, dim, topk, roundDecimal) + + searchResult, err := c.Proxy.Search(ctx, searchReq) + err = merr.CheckRPCCall(searchResult, err) + s.NoError(err) + s.Equal(nq*topk, len(searchResult.GetResults().GetScores())) + + // query + queryResult, err := c.Proxy.Query(ctx, &milvuspb.QueryRequest{ + DbName: dbName, + CollectionName: collectionName, + Expr: "", + OutputFields: []string{"count(*)"}, + }) + err = merr.CheckRPCCall(queryResult, err) + s.NoError(err) + s.Equal(int64(rowNum), queryResult.GetFieldsData()[0].GetScalars().GetLongData().GetData()[0]) + + // release collection + status, err := c.Proxy.ReleaseCollection(ctx, &milvuspb.ReleaseCollectionRequest{ + CollectionName: collectionName, + }) + err = merr.CheckRPCCall(status, err) + s.NoError(err) + + // drop collection + status, err = c.Proxy.DropCollection(ctx, &milvuspb.DropCollectionRequest{ + CollectionName: collectionName, + }) + err = merr.CheckRPCCall(status, err) + s.NoError(err) + + log.Info("Test compaction succeed") +} From 4004e4c5451f0c1aaabb987b713ca14368a439a1 Mon Sep 17 00:00:00 2001 From: Cai Yudong Date: Fri, 24 May 2024 10:23:41 +0800 Subject: [PATCH 057/126] enhance: Optimize bulk insert unittest (#33224) Issue: #22837 Signed-off-by: Cai Yudong --- internal/storage/insert_data.go | 4 +- .../util/importutilv2/json/reader_test.go | 59 +------- .../util/importutilv2/numpy/reader_test.go | 129 ++++++++-------- internal/util/testutil/test_util.go | 63 ++++++++ tests/integration/import/util_test.go | 140 +++++------------- 5 files changed, 167 insertions(+), 228 deletions(-) diff --git a/internal/storage/insert_data.go b/internal/storage/insert_data.go index 3722965279a2f..23e10e529066d 100644 --- a/internal/storage/insert_data.go +++ b/internal/storage/insert_data.go @@ -84,7 +84,9 @@ func (i *InsertData) GetRowNum() int { var rowNum int for _, data := range i.Data { rowNum = data.RowNum() - break + if rowNum > 0 { + break + } } return rowNum } diff --git a/internal/util/importutilv2/json/reader_test.go b/internal/util/importutilv2/json/reader_test.go index c46954ead8418..38dc64d86ed93 100644 --- a/internal/util/importutilv2/json/reader_test.go +++ b/internal/util/importutilv2/json/reader_test.go @@ -24,7 +24,6 @@ import ( "strings" "testing" - "github.com/samber/lo" "github.com/stretchr/testify/mock" "github.com/stretchr/testify/suite" "golang.org/x/exp/slices" @@ -98,64 +97,16 @@ func (suite *ReaderSuite) run(dataType schemapb.DataType, elemType schemapb.Data }, }, } + insertData, err := testutil.CreateInsertData(schema, suite.numRows) suite.NoError(err) - rows := make([]map[string]any, 0, suite.numRows) - fieldIDToField := lo.KeyBy(schema.GetFields(), func(field *schemapb.FieldSchema) int64 { - return field.GetFieldID() - }) - for i := 0; i < insertData.GetRowNum(); i++ { - data := make(map[int64]interface{}) - for fieldID, v := range insertData.Data { - field := fieldIDToField[fieldID] - dataType := field.GetDataType() - elemType := field.GetElementType() - switch dataType { - case schemapb.DataType_Array: - switch elemType { - case schemapb.DataType_Bool: - data[fieldID] = v.GetRow(i).(*schemapb.ScalarField).GetBoolData().GetData() - case schemapb.DataType_Int8, schemapb.DataType_Int16, schemapb.DataType_Int32: - data[fieldID] = v.GetRow(i).(*schemapb.ScalarField).GetIntData().GetData() - case schemapb.DataType_Int64: - data[fieldID] = v.GetRow(i).(*schemapb.ScalarField).GetLongData().GetData() - case schemapb.DataType_Float: - data[fieldID] = v.GetRow(i).(*schemapb.ScalarField).GetFloatData().GetData() - case schemapb.DataType_Double: - data[fieldID] = v.GetRow(i).(*schemapb.ScalarField).GetDoubleData().GetData() - case schemapb.DataType_String: - data[fieldID] = v.GetRow(i).(*schemapb.ScalarField).GetStringData().GetData() - } - case schemapb.DataType_JSON: - data[fieldID] = string(v.GetRow(i).([]byte)) - case schemapb.DataType_BinaryVector: - bytes := v.GetRow(i).([]byte) - ints := make([]int, 0, len(bytes)) - for _, b := range bytes { - ints = append(ints, int(b)) - } - data[fieldID] = ints - case schemapb.DataType_Float16Vector: - bytes := v.GetRow(i).([]byte) - data[fieldID] = typeutil.Float16BytesToFloat32Vector(bytes) - case schemapb.DataType_BFloat16Vector: - bytes := v.GetRow(i).([]byte) - data[fieldID] = typeutil.BFloat16BytesToFloat32Vector(bytes) - case schemapb.DataType_SparseFloatVector: - bytes := v.GetRow(i).([]byte) - data[fieldID] = typeutil.SparseFloatBytesToMap(bytes) - default: - data[fieldID] = v.GetRow(i) - } - } - row := lo.MapKeys(data, func(_ any, fieldID int64) string { - return fieldIDToField[fieldID].GetName() - }) - rows = append(rows, row) - } + + rows, err := testutil.CreateInsertDataRowsForJSON(schema, insertData) + suite.NoError(err) jsonBytes, err := json.Marshal(rows) suite.NoError(err) + type mockReader struct { io.Reader io.Closer diff --git a/internal/util/importutilv2/numpy/reader_test.go b/internal/util/importutilv2/numpy/reader_test.go index f94abb6b1a9ef..3b96e6553d214 100644 --- a/internal/util/importutilv2/numpy/reader_test.go +++ b/internal/util/importutilv2/numpy/reader_test.go @@ -128,60 +128,54 @@ func (suite *ReaderSuite) run(dt schemapb.DataType) { io.ReaderAt io.Seeker } + + var data interface{} for fieldID, fieldData := range insertData.Data { dataType := fieldIDToField[fieldID].GetDataType() + rowNum := fieldData.RowNum() switch dataType { case schemapb.DataType_JSON: - jsonStrs := make([]string, 0, fieldData.RowNum()) - for i := 0; i < fieldData.RowNum(); i++ { + jsonStrs := make([]string, 0, rowNum) + for i := 0; i < rowNum; i++ { row := fieldData.GetRow(i) jsonStrs = append(jsonStrs, string(row.([]byte))) } - reader, err := CreateReader(jsonStrs) - suite.NoError(err) - cm.EXPECT().Reader(mock.Anything, files[fieldID]).Return(&mockReader{ - Reader: reader, - }, nil) + data = jsonStrs case schemapb.DataType_BinaryVector: - chunked := lo.Chunk(insertData.Data[fieldID].GetRows().([]byte), dim/8) - chunkedRows := make([][dim / 8]byte, len(chunked)) + rows := fieldData.GetRows().([]byte) + const rowBytes = dim / 8 + chunked := lo.Chunk(rows, rowBytes) + chunkedRows := make([][rowBytes]byte, len(chunked)) for i, innerSlice := range chunked { copy(chunkedRows[i][:], innerSlice[:]) } - reader, err := CreateReader(chunkedRows) - suite.NoError(err) - cm.EXPECT().Reader(mock.Anything, files[fieldID]).Return(&mockReader{ - Reader: reader, - }, nil) + data = chunkedRows case schemapb.DataType_FloatVector: - chunked := lo.Chunk(insertData.Data[fieldID].GetRows().([]float32), dim) + rows := fieldData.GetRows().([]float32) + chunked := lo.Chunk(rows, dim) chunkedRows := make([][dim]float32, len(chunked)) for i, innerSlice := range chunked { copy(chunkedRows[i][:], innerSlice[:]) } - reader, err := CreateReader(chunkedRows) - suite.NoError(err) - cm.EXPECT().Reader(mock.Anything, files[fieldID]).Return(&mockReader{ - Reader: reader, - }, nil) + data = chunkedRows case schemapb.DataType_Float16Vector, schemapb.DataType_BFloat16Vector: - chunked := lo.Chunk(insertData.Data[fieldID].GetRows().([]byte), dim*2) - chunkedRows := make([][dim * 2]byte, len(chunked)) + rows := fieldData.GetRows().([]byte) + const rowBytes = dim * 2 + chunked := lo.Chunk(rows, rowBytes) + chunkedRows := make([][rowBytes]byte, len(chunked)) for i, innerSlice := range chunked { copy(chunkedRows[i][:], innerSlice[:]) } - reader, err := CreateReader(chunkedRows) - suite.NoError(err) - cm.EXPECT().Reader(mock.Anything, files[fieldID]).Return(&mockReader{ - Reader: reader, - }, nil) + data = chunkedRows default: - reader, err := CreateReader(insertData.Data[fieldID].GetRows()) - suite.NoError(err) - cm.EXPECT().Reader(mock.Anything, files[fieldID]).Return(&mockReader{ - Reader: reader, - }, nil) + data = fieldData.GetRows() } + + reader, err := CreateReader(data) + suite.NoError(err) + cm.EXPECT().Reader(mock.Anything, files[fieldID]).Return(&mockReader{ + Reader: reader, + }, nil) } reader, err := NewReader(context.Background(), cm, schema, lo.Values(files), math.MaxInt) @@ -268,59 +262,54 @@ func (suite *ReaderSuite) failRun(dt schemapb.DataType, isDynamic bool) { io.ReaderAt io.Seeker } + + var data interface{} for fieldID, fieldData := range insertData.Data { dataType := fieldIDToField[fieldID].GetDataType() - if dataType == schemapb.DataType_JSON { - jsonStrs := make([]string, 0, fieldData.RowNum()) - for i := 0; i < fieldData.RowNum(); i++ { + rowNum := fieldData.RowNum() + switch dataType { + case schemapb.DataType_JSON: + jsonStrs := make([]string, 0, rowNum) + for i := 0; i < rowNum; i++ { row := fieldData.GetRow(i) jsonStrs = append(jsonStrs, string(row.([]byte))) } - reader, err := CreateReader(jsonStrs) - suite.NoError(err) - cm.EXPECT().Reader(mock.Anything, files[fieldID]).Return(&mockReader{ - Reader: reader, - }, nil) - } else if dataType == schemapb.DataType_FloatVector { - chunked := lo.Chunk(insertData.Data[fieldID].GetRows().([]float32), dim) - chunkedRows := make([][dim]float32, len(chunked)) + data = jsonStrs + case schemapb.DataType_BinaryVector: + rows := fieldData.GetRows().([]byte) + const rowBytes = dim / 8 + chunked := lo.Chunk(rows, rowBytes) + chunkedRows := make([][rowBytes]byte, len(chunked)) for i, innerSlice := range chunked { copy(chunkedRows[i][:], innerSlice[:]) } - reader, err := CreateReader(chunkedRows) - suite.NoError(err) - cm.EXPECT().Reader(mock.Anything, files[fieldID]).Return(&mockReader{ - Reader: reader, - }, nil) - } else if dataType == schemapb.DataType_Float16Vector || dataType == schemapb.DataType_BFloat16Vector { - chunked := lo.Chunk(insertData.Data[fieldID].GetRows().([]byte), dim*2) - chunkedRows := make([][dim * 2]byte, len(chunked)) + data = chunkedRows + case schemapb.DataType_FloatVector: + rows := fieldData.GetRows().([]float32) + chunked := lo.Chunk(rows, dim) + chunkedRows := make([][dim]float32, len(chunked)) for i, innerSlice := range chunked { copy(chunkedRows[i][:], innerSlice[:]) } - reader, err := CreateReader(chunkedRows) - suite.NoError(err) - cm.EXPECT().Reader(mock.Anything, files[fieldID]).Return(&mockReader{ - Reader: reader, - }, nil) - } else if dataType == schemapb.DataType_BinaryVector { - chunked := lo.Chunk(insertData.Data[fieldID].GetRows().([]byte), dim/8) - chunkedRows := make([][dim / 8]byte, len(chunked)) + data = chunkedRows + case schemapb.DataType_Float16Vector, schemapb.DataType_BFloat16Vector: + rows := fieldData.GetRows().([]byte) + const rowBytes = dim * 2 + chunked := lo.Chunk(rows, rowBytes) + chunkedRows := make([][rowBytes]byte, len(chunked)) for i, innerSlice := range chunked { copy(chunkedRows[i][:], innerSlice[:]) } - reader, err := CreateReader(chunkedRows) - suite.NoError(err) - cm.EXPECT().Reader(mock.Anything, files[fieldID]).Return(&mockReader{ - Reader: reader, - }, nil) - } else { - reader, err := CreateReader(insertData.Data[fieldID].GetRows()) - suite.NoError(err) - cm.EXPECT().Reader(mock.Anything, files[fieldID]).Return(&mockReader{ - Reader: reader, - }, nil) + data = chunkedRows + default: + data = fieldData.GetRows() } + + reader, err := CreateReader(data) + suite.NoError(err) + cm.EXPECT().Reader(mock.Anything, files[fieldID]).Return(&mockReader{ + Reader: reader, + }, nil) } reader, err := NewReader(context.Background(), cm, schema, lo.Values(files), math.MaxInt) diff --git a/internal/util/testutil/test_util.go b/internal/util/testutil/test_util.go index d84affd043f7d..4548f0e77ff31 100644 --- a/internal/util/testutil/test_util.go +++ b/internal/util/testutil/test_util.go @@ -484,3 +484,66 @@ func BuildArrayData(schema *schemapb.CollectionSchema, insertData *storage.Inser } return columns, nil } + +func CreateInsertDataRowsForJSON(schema *schemapb.CollectionSchema, insertData *storage.InsertData) ([]map[string]any, error) { + fieldIDToField := lo.KeyBy(schema.GetFields(), func(field *schemapb.FieldSchema) int64 { + return field.GetFieldID() + }) + + rowNum := insertData.GetRowNum() + rows := make([]map[string]any, 0, rowNum) + for i := 0; i < rowNum; i++ { + data := make(map[int64]interface{}) + for fieldID, v := range insertData.Data { + field := fieldIDToField[fieldID] + dataType := field.GetDataType() + elemType := field.GetElementType() + if field.GetAutoID() { + continue + } + switch dataType { + case schemapb.DataType_Array: + switch elemType { + case schemapb.DataType_Bool: + data[fieldID] = v.GetRow(i).(*schemapb.ScalarField).GetBoolData().GetData() + case schemapb.DataType_Int8, schemapb.DataType_Int16, schemapb.DataType_Int32: + data[fieldID] = v.GetRow(i).(*schemapb.ScalarField).GetIntData().GetData() + case schemapb.DataType_Int64: + data[fieldID] = v.GetRow(i).(*schemapb.ScalarField).GetLongData().GetData() + case schemapb.DataType_Float: + data[fieldID] = v.GetRow(i).(*schemapb.ScalarField).GetFloatData().GetData() + case schemapb.DataType_Double: + data[fieldID] = v.GetRow(i).(*schemapb.ScalarField).GetDoubleData().GetData() + case schemapb.DataType_String: + data[fieldID] = v.GetRow(i).(*schemapb.ScalarField).GetStringData().GetData() + } + case schemapb.DataType_JSON: + data[fieldID] = string(v.GetRow(i).([]byte)) + case schemapb.DataType_BinaryVector: + bytes := v.GetRow(i).([]byte) + ints := make([]int, 0, len(bytes)) + for _, b := range bytes { + ints = append(ints, int(b)) + } + data[fieldID] = ints + case schemapb.DataType_Float16Vector: + bytes := v.GetRow(i).([]byte) + data[fieldID] = typeutil.Float16BytesToFloat32Vector(bytes) + case schemapb.DataType_BFloat16Vector: + bytes := v.GetRow(i).([]byte) + data[fieldID] = typeutil.BFloat16BytesToFloat32Vector(bytes) + case schemapb.DataType_SparseFloatVector: + bytes := v.GetRow(i).([]byte) + data[fieldID] = typeutil.SparseFloatBytesToMap(bytes) + default: + data[fieldID] = v.GetRow(i) + } + } + row := lo.MapKeys(data, func(_ any, fieldID int64) string { + return fieldIDToField[fieldID].GetName() + }) + rows = append(rows, row) + } + + return rows, nil +} diff --git a/tests/integration/import/util_test.go b/tests/integration/import/util_test.go index d55168db0a789..6987ffc355253 100644 --- a/tests/integration/import/util_test.go +++ b/tests/integration/import/util_test.go @@ -39,7 +39,6 @@ import ( "github.com/milvus-io/milvus/internal/util/testutil" "github.com/milvus-io/milvus/pkg/log" "github.com/milvus-io/milvus/pkg/util/merr" - "github.com/milvus-io/milvus/pkg/util/typeutil" "github.com/milvus-io/milvus/tests/integration" ) @@ -110,87 +109,60 @@ func GenerateNumpyFiles(cm storage.ChunkManager, schema *schemapb.CollectionSche path := fmt.Sprintf("%s/%s.npy", cm.RootPath(), field.GetName()) fieldID := field.GetFieldID() + fieldData := insertData.Data[fieldID] dType := field.GetDataType() switch dType { - case schemapb.DataType_Bool: - data = insertData.Data[fieldID].(*storage.BoolFieldData).Data - case schemapb.DataType_Int8: - data = insertData.Data[fieldID].(*storage.Int8FieldData).Data - case schemapb.DataType_Int16: - data = insertData.Data[fieldID].(*storage.Int16FieldData).Data - case schemapb.DataType_Int32: - data = insertData.Data[fieldID].(*storage.Int32FieldData).Data - case schemapb.DataType_Int64: - data = insertData.Data[fieldID].(*storage.Int64FieldData).Data - case schemapb.DataType_Float: - data = insertData.Data[fieldID].(*storage.FloatFieldData).Data - case schemapb.DataType_Double: - data = insertData.Data[fieldID].(*storage.DoubleFieldData).Data - case schemapb.DataType_String, schemapb.DataType_VarChar: - data = insertData.Data[fieldID].(*storage.StringFieldData).Data case schemapb.DataType_BinaryVector: - vecData := insertData.Data[fieldID].(*storage.BinaryVectorFieldData).Data - if dim != insertData.Data[fieldID].(*storage.BinaryVectorFieldData).Dim { - panic(fmt.Sprintf("dim mis-match: %d, %d", dim, insertData.Data[fieldID].(*storage.BinaryVectorFieldData).Dim)) + rows := fieldData.GetRows().([]byte) + if dim != fieldData.(*storage.BinaryVectorFieldData).Dim { + panic(fmt.Sprintf("dim mis-match: %d, %d", dim, fieldData.(*storage.BinaryVectorFieldData).Dim)) } const rowBytes = dim / 8 - rows := len(vecData) / rowBytes - binVecData := make([][rowBytes]byte, 0, rows) - for i := 0; i < rows; i++ { - rowVec := [rowBytes]byte{} - copy(rowVec[:], vecData[i*rowBytes:(i+1)*rowBytes]) - binVecData = append(binVecData, rowVec) + chunked := lo.Chunk(rows, rowBytes) + chunkedRows := make([][rowBytes]byte, len(chunked)) + for i, innerSlice := range chunked { + copy(chunkedRows[i][:], innerSlice[:]) } - data = binVecData + data = chunkedRows case schemapb.DataType_FloatVector: - vecData := insertData.Data[fieldID].(*storage.FloatVectorFieldData).Data - if dim != insertData.Data[fieldID].(*storage.FloatVectorFieldData).Dim { - panic(fmt.Sprintf("dim mis-match: %d, %d", dim, insertData.Data[fieldID].(*storage.FloatVectorFieldData).Dim)) + rows := fieldData.GetRows().([]float32) + if dim != fieldData.(*storage.FloatVectorFieldData).Dim { + panic(fmt.Sprintf("dim mis-match: %d, %d", dim, fieldData.(*storage.FloatVectorFieldData).Dim)) } - rows := len(vecData) / dim - floatVecData := make([][dim]float32, 0, rows) - for i := 0; i < rows; i++ { - rowVec := [dim]float32{} - copy(rowVec[:], vecData[i*dim:(i+1)*dim]) - floatVecData = append(floatVecData, rowVec) + chunked := lo.Chunk(rows, dim) + chunkedRows := make([][dim]float32, len(chunked)) + for i, innerSlice := range chunked { + copy(chunkedRows[i][:], innerSlice[:]) } - data = floatVecData + data = chunkedRows case schemapb.DataType_Float16Vector: - vecData := insertData.Data[fieldID].(*storage.Float16VectorFieldData).Data - if dim != insertData.Data[fieldID].(*storage.Float16VectorFieldData).Dim { - panic(fmt.Sprintf("dim mis-match: %d, %d", dim, insertData.Data[fieldID].(*storage.Float16VectorFieldData).Dim)) + rows := insertData.Data[fieldID].GetRows().([]byte) + if dim != fieldData.(*storage.Float16VectorFieldData).Dim { + panic(fmt.Sprintf("dim mis-match: %d, %d", dim, fieldData.(*storage.Float16VectorFieldData).Dim)) } const rowBytes = dim * 2 - rows := len(vecData) / rowBytes - float16VecData := make([][rowBytes]byte, 0, rows) - for i := 0; i < rows; i++ { - rowVec := [rowBytes]byte{} - copy(rowVec[:], vecData[i*rowBytes:(i+1)*rowBytes]) - float16VecData = append(float16VecData, rowVec) + chunked := lo.Chunk(rows, rowBytes) + chunkedRows := make([][rowBytes]byte, len(chunked)) + for i, innerSlice := range chunked { + copy(chunkedRows[i][:], innerSlice[:]) } - data = float16VecData + data = chunkedRows case schemapb.DataType_BFloat16Vector: - vecData := insertData.Data[fieldID].(*storage.BFloat16VectorFieldData).Data - if dim != insertData.Data[fieldID].(*storage.BFloat16VectorFieldData).Dim { - panic(fmt.Sprintf("dim mis-match: %d, %d", dim, insertData.Data[fieldID].(*storage.BFloat16VectorFieldData).Dim)) + rows := insertData.Data[fieldID].GetRows().([]byte) + if dim != fieldData.(*storage.BFloat16VectorFieldData).Dim { + panic(fmt.Sprintf("dim mis-match: %d, %d", dim, fieldData.(*storage.BFloat16VectorFieldData).Dim)) } const rowBytes = dim * 2 - rows := len(vecData) / rowBytes - bfloat16VecData := make([][rowBytes]byte, 0, rows) - for i := 0; i < rows; i++ { - rowVec := [rowBytes]byte{} - copy(rowVec[:], vecData[i*rowBytes:(i+1)*rowBytes]) - bfloat16VecData = append(bfloat16VecData, rowVec) + chunked := lo.Chunk(rows, rowBytes) + chunkedRows := make([][rowBytes]byte, len(chunked)) + for i, innerSlice := range chunked { + copy(chunkedRows[i][:], innerSlice[:]) } - data = bfloat16VecData + data = chunkedRows case schemapb.DataType_SparseFloatVector: data = insertData.Data[fieldID].(*storage.SparseFloatVectorFieldData).GetContents() - case schemapb.DataType_JSON: - data = insertData.Data[fieldID].(*storage.JSONFieldData).Data - case schemapb.DataType_Array: - data = insertData.Data[fieldID].(*storage.ArrayFieldData).Data default: - panic(fmt.Sprintf("unsupported data type: %s", dType.String())) + data = insertData.Data[fieldID].GetRows() } err := writeFn(path, data) @@ -207,47 +179,9 @@ func GenerateNumpyFiles(cm storage.ChunkManager, schema *schemapb.CollectionSche func GenerateJSONFile(t *testing.T, filePath string, schema *schemapb.CollectionSchema, count int) { insertData, err := testutil.CreateInsertData(schema, count) assert.NoError(t, err) - rows := make([]map[string]any, 0, count) - fieldIDToField := lo.KeyBy(schema.GetFields(), func(field *schemapb.FieldSchema) int64 { - return field.GetFieldID() - }) - for i := 0; i < count; i++ { - data := make(map[int64]interface{}) - for fieldID, v := range insertData.Data { - dataType := fieldIDToField[fieldID].GetDataType() - if fieldIDToField[fieldID].GetAutoID() { - continue - } - switch dataType { - case schemapb.DataType_Array: - data[fieldID] = v.GetRow(i).(*schemapb.ScalarField).GetIntData().GetData() - case schemapb.DataType_JSON: - data[fieldID] = string(v.GetRow(i).([]byte)) - case schemapb.DataType_BinaryVector: - bytes := v.GetRow(i).([]byte) - ints := make([]int, 0, len(bytes)) - for _, b := range bytes { - ints = append(ints, int(b)) - } - data[fieldID] = ints - case schemapb.DataType_Float16Vector: - bytes := v.GetRow(i).([]byte) - data[fieldID] = typeutil.Float16BytesToFloat32Vector(bytes) - case schemapb.DataType_BFloat16Vector: - bytes := v.GetRow(i).([]byte) - data[fieldID] = typeutil.BFloat16BytesToFloat32Vector(bytes) - case schemapb.DataType_SparseFloatVector: - bytes := v.GetRow(i).([]byte) - data[fieldID] = typeutil.SparseFloatBytesToMap(bytes) - default: - data[fieldID] = v.GetRow(i) - } - } - row := lo.MapKeys(data, func(_ any, fieldID int64) string { - return fieldIDToField[fieldID].GetName() - }) - rows = append(rows, row) - } + + rows, err := testutil.CreateInsertDataRowsForJSON(schema, insertData) + assert.NoError(t, err) jsonBytes, err := json.Marshal(rows) assert.NoError(t, err) From a8bd9bea39ac3f52c9e7c017fed6a81a322ddeda Mon Sep 17 00:00:00 2001 From: Ted Xu Date: Fri, 24 May 2024 10:33:40 +0800 Subject: [PATCH 058/126] fix: adding blob memory size in binlog serde (#33324) See: #33280 Signed-off-by: Ted Xu --- internal/storage/serde.go | 8 +++++--- internal/storage/serde_test.go | 1 + 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/internal/storage/serde.go b/internal/storage/serde.go index 636e505b83c76..ba5256c6d13f4 100644 --- a/internal/storage/serde.go +++ b/internal/storage/serde.go @@ -925,9 +925,10 @@ func (bsw *BinlogStreamWriter) Finalize() (*Blob, error) { return nil, err } return &Blob{ - Key: strconv.Itoa(int(bsw.fieldSchema.FieldID)), - Value: b.Bytes(), - RowNum: int64(bsw.rw.numRows), + Key: strconv.Itoa(int(bsw.fieldSchema.FieldID)), + Value: b.Bytes(), + RowNum: int64(bsw.rw.numRows), + MemorySize: int64(bsw.memorySize), }, nil } @@ -1016,6 +1017,7 @@ func NewBinlogSerializeWriter(schema *schemapb.CollectionSchema, partitionID, se if !ok { return nil, 0, errors.New(fmt.Sprintf("serialize error on type %s", types[fid])) } + writers[fid].memorySize += int(typeEntry.sizeof(e)) memorySize += typeEntry.sizeof(e) } } diff --git a/internal/storage/serde_test.go b/internal/storage/serde_test.go index 21a871cb5e606..0d9306069d775 100644 --- a/internal/storage/serde_test.go +++ b/internal/storage/serde_test.go @@ -160,6 +160,7 @@ func TestBinlogSerializeWriter(t *testing.T) { blob, err := w.Finalize() assert.NoError(t, err) assert.NotNil(t, blob) + assert.True(t, blob.MemorySize > 0) newblobs[i] = blob i++ } From 5cdc6ae489f6fbe1f864e976f46fced80bcf3a0e Mon Sep 17 00:00:00 2001 From: congqixia Date: Fri, 24 May 2024 10:41:40 +0800 Subject: [PATCH 059/126] enhance: Sync `deleteBufBytes` config value to default config (#33320) The delete buffer size is set to 64MB in milvus.yaml but the default set up shall be 16MB Signed-off-by: Congqi Xia --- configs/milvus.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configs/milvus.yaml b/configs/milvus.yaml index b526ad733703e..d1a3b7d8552a0 100644 --- a/configs/milvus.yaml +++ b/configs/milvus.yaml @@ -494,7 +494,7 @@ dataNode: coldTime: 60 # Turn on skip mode after there are only timetick msg for x seconds segment: insertBufSize: 16777216 # Max buffer size to flush for a single segment. - deleteBufBytes: 67108864 # Max buffer size in bytes to flush del for a single channel, default as 16MB + deleteBufBytes: 16777216 # Max buffer size in bytes to flush del for a single channel, default as 16MB syncPeriod: 600 # The period to sync segments if buffer is not empty. memory: forceSyncEnable: true # Set true to force sync if memory usage is too high From e895cfed8405fb8bd7e46a2af5b239785670f4c2 Mon Sep 17 00:00:00 2001 From: yiwangdr <80064917+yiwangdr@users.noreply.github.com> Date: Thu, 23 May 2024 21:47:40 -0700 Subject: [PATCH 060/126] fix: reduce redundant map operations in datacoord (#33343) More refactories will be added. issue: #33342 Signed-off-by: yiwangdr --- internal/datacoord/channel_manager.go | 23 ++----------- internal/datacoord/channel_manager_v2.go | 24 +------------ internal/datacoord/channel_store.go | 23 +++++++++++++ internal/datacoord/channel_store_v2.go | 25 +++++++++++--- internal/datacoord/mock_channel_store.go | 44 ++++++++++++++++++++++++ internal/datacoord/segment_info.go | 6 ++-- 6 files changed, 95 insertions(+), 50 deletions(-) diff --git a/internal/datacoord/channel_manager.go b/internal/datacoord/channel_manager.go index f7ce6ea9490e9..4fa1927660195 100644 --- a/internal/datacoord/channel_manager.go +++ b/internal/datacoord/channel_manager.go @@ -494,17 +494,9 @@ func (c *ChannelManagerImpl) GetBufferChannels() *NodeChannelInfo { // GetNodeChannelsByCollectionID gets all node channels map of the collection func (c *ChannelManagerImpl) GetNodeChannelsByCollectionID(collectionID UniqueID) map[UniqueID][]string { - nodeChs := make(map[UniqueID][]string) - for _, nodeChannels := range c.GetAssignedChannels() { - var channelNames []string - for name, ch := range nodeChannels.Channels { - if ch.GetCollectionID() == collectionID { - channelNames = append(channelNames, name) - } - } - nodeChs[nodeChannels.NodeID] = channelNames - } - return nodeChs + c.mu.RLock() + defer c.mu.RUnlock() + return c.store.GetNodeChannelsByCollectionID(collectionID) } // Get all channels belong to the collection @@ -891,15 +883,6 @@ func (c *ChannelManagerImpl) GetCollectionIDByChannel(channelName string) (bool, return false, 0 } -func (c *ChannelManagerImpl) GetNodeIDByChannelName(channelName string) (UniqueID, bool) { - for _, nodeChannel := range c.GetAssignedChannels() { - if _, ok := nodeChannel.Channels[channelName]; ok { - return nodeChannel.NodeID, true - } - } - return 0, false -} - func (c *ChannelManagerImpl) GetChannel(nodeID int64, channelName string) (RWChannel, bool) { c.mu.RLock() defer c.mu.RUnlock() diff --git a/internal/datacoord/channel_manager_v2.go b/internal/datacoord/channel_manager_v2.go index ccaed65ea9ce7..4bea2daf96e41 100644 --- a/internal/datacoord/channel_manager_v2.go +++ b/internal/datacoord/channel_manager_v2.go @@ -48,7 +48,6 @@ type ChannelManager interface { FindWatcher(channel string) (UniqueID, error) GetChannel(nodeID int64, channel string) (RWChannel, bool) - GetNodeIDByChannelName(channel string) (int64, bool) GetNodeChannelsByCollectionID(collectionID int64) map[int64][]string GetChannelsByCollectionID(collectionID int64) []RWChannel GetChannelNamesByCollectionID(collectionID int64) []string @@ -351,31 +350,10 @@ func (m *ChannelManagerImplV2) GetChannel(nodeID int64, channelName string) (RWC return nil, false } -func (m *ChannelManagerImplV2) GetNodeIDByChannelName(channel string) (int64, bool) { - m.mu.RLock() - defer m.mu.RUnlock() - nodeChannels := m.store.GetNodeChannelsBy( - WithoutBufferNode(), - WithChannelName(channel)) - - if len(nodeChannels) > 0 { - return nodeChannels[0].NodeID, true - } - - return 0, false -} - func (m *ChannelManagerImplV2) GetNodeChannelsByCollectionID(collectionID int64) map[int64][]string { m.mu.RLock() defer m.mu.RUnlock() - nodeChs := make(map[UniqueID][]string) - nodeChannels := m.store.GetNodeChannelsBy( - WithoutBufferNode(), - WithCollectionIDV2(collectionID)) - lo.ForEach(nodeChannels, func(info *NodeChannelInfo, _ int) { - nodeChs[info.NodeID] = lo.Keys(info.Channels) - }) - return nodeChs + return m.store.GetNodeChannelsByCollectionID(collectionID) } func (m *ChannelManagerImplV2) GetChannelsByCollectionID(collectionID int64) []RWChannel { diff --git a/internal/datacoord/channel_store.go b/internal/datacoord/channel_store.go index c59e626a6f846..76524df0a9022 100644 --- a/internal/datacoord/channel_store.go +++ b/internal/datacoord/channel_store.go @@ -37,6 +37,8 @@ import ( ) // ROChannelStore is a read only channel store for channels and nodes. +// +//go:generate mockery --name=ROChannelStore --structname=ROChannelStore --output=./ --filename=mock_ro_channel_store.go --with-expecter type ROChannelStore interface { // GetNode returns the channel info of a specific node. // Returns nil if the node doesn't belong to the cluster @@ -52,12 +54,16 @@ type ROChannelStore interface { GetNodes() []int64 // GetNodeChannelCount GetNodeChannelCount(nodeID int64) int + // GetNodeChannels for given collection + GetNodeChannelsByCollectionID(collectionID UniqueID) map[UniqueID][]string // GetNodeChannelsBy used by channel_store_v2 and channel_manager_v2 only GetNodeChannelsBy(nodeSelector NodeSelector, channelSelectors ...ChannelSelector) []*NodeChannelInfo } // RWChannelStore is the read write channel store for channels and nodes. +// +//go:generate mockery --name=RWChannelStore --structname=RWChannelStore --output=./ --filename=mock_channel_store.go --with-expecter type RWChannelStore interface { ROChannelStore // Reload restores the buffer channels and node-channels mapping form kv. @@ -458,6 +464,23 @@ func (c *ChannelStore) GetNodesChannels() []*NodeChannelInfo { return ret } +func (c *ChannelStore) GetNodeChannelsByCollectionID(collectionID UniqueID) map[UniqueID][]string { + nodeChs := make(map[UniqueID][]string) + for id, info := range c.channelsInfo { + if id == bufferID { + continue + } + var channelNames []string + for name, ch := range info.Channels { + if ch.GetCollectionID() == collectionID { + channelNames = append(channelNames, name) + } + } + nodeChs[id] = channelNames + } + return nodeChs +} + // GetBufferChannelInfo returns all unassigned channels. func (c *ChannelStore) GetBufferChannelInfo() *NodeChannelInfo { if info, ok := c.channelsInfo[bufferID]; ok { diff --git a/internal/datacoord/channel_store_v2.go b/internal/datacoord/channel_store_v2.go index 82f0d14e9e922..dcbeef28630b2 100644 --- a/internal/datacoord/channel_store_v2.go +++ b/internal/datacoord/channel_store_v2.go @@ -366,7 +366,7 @@ func WithChannelStates(states ...ChannelState) ChannelSelector { } func (c *StateChannelStore) GetNodeChannelsBy(nodeSelector NodeSelector, channelSelectors ...ChannelSelector) []*NodeChannelInfo { - nodeChannels := make(map[int64]*NodeChannelInfo) + var nodeChannels []*NodeChannelInfo for nodeID, cInfo := range c.channelsInfo { if nodeSelector(nodeID) { selected := make(map[string]RWChannel) @@ -382,13 +382,13 @@ func (c *StateChannelStore) GetNodeChannelsBy(nodeSelector NodeSelector, channel selected[chName] = channel } } - nodeChannels[nodeID] = &NodeChannelInfo{ + nodeChannels = append(nodeChannels, &NodeChannelInfo{ NodeID: nodeID, Channels: selected, - } + }) } } - return lo.Values(nodeChannels) + return nodeChannels } func (c *StateChannelStore) GetNodesChannels() []*NodeChannelInfo { @@ -401,6 +401,23 @@ func (c *StateChannelStore) GetNodesChannels() []*NodeChannelInfo { return ret } +func (c *StateChannelStore) GetNodeChannelsByCollectionID(collectionID UniqueID) map[UniqueID][]string { + nodeChs := make(map[UniqueID][]string) + for id, info := range c.channelsInfo { + if id == bufferID { + continue + } + var channelNames []string + for name, ch := range info.Channels { + if ch.GetCollectionID() == collectionID { + channelNames = append(channelNames, name) + } + } + nodeChs[id] = channelNames + } + return nodeChs +} + func (c *StateChannelStore) GetBufferChannelInfo() *NodeChannelInfo { return c.GetNode(bufferID) } diff --git a/internal/datacoord/mock_channel_store.go b/internal/datacoord/mock_channel_store.go index e0e469fba733c..fc7cb51ef3e92 100644 --- a/internal/datacoord/mock_channel_store.go +++ b/internal/datacoord/mock_channel_store.go @@ -179,6 +179,50 @@ func (_c *MockRWChannelStore_GetNodeChannelCount_Call) RunAndReturn(run func(int return _c } +// GetNodeChannelsByCollectionID provides a mock function with given fields: collectionID +func (_m *MockRWChannelStore) GetNodeChannelsByCollectionID(collectionID int64) map[int64][]string { + ret := _m.Called(collectionID) + + var r0 map[int64][]string + if rf, ok := ret.Get(0).(func(int64) map[int64][]string); ok { + r0 = rf(collectionID) + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).(map[int64][]string) + } + } + + return r0 +} + +// MockRWChannelStore_GetNodeChannelsByCollectionID_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'GetNodeChannelsByCollectionID' +type MockRWChannelStore_GetNodeChannelsByCollectionID_Call struct { + *mock.Call +} + +// GetNodeChannelsByCollectionID is a helper method to define mock.On call +// - collectionID int64 +func (_e *MockRWChannelStore_Expecter) GetNodeChannelsByCollectionID(collectionID interface{}) *MockRWChannelStore_GetNodeChannelsByCollectionID_Call { + return &MockRWChannelStore_GetNodeChannelsByCollectionID_Call{Call: _e.mock.On("GetNodeChannelsByCollectionID", collectionID)} +} + +func (_c *MockRWChannelStore_GetNodeChannelsByCollectionID_Call) Run(run func(collectionID int64)) *MockRWChannelStore_GetNodeChannelsByCollectionID_Call { + _c.Call.Run(func(args mock.Arguments) { + run(args[0].(int64)) + }) + return _c +} + +func (_c *MockRWChannelStore_GetNodeChannelsByCollectionID_Call) Return(_a0 map[int64][]string) *MockRWChannelStore_GetNodeChannelsByCollectionID_Call { + _c.Call.Return(_a0) + return _c +} + +func (_c *MockRWChannelStore_GetNodeChannelsByCollectionID_Call) RunAndReturn(run func(int64) map[int64][]string) *MockRWChannelStore_GetNodeChannelsByCollectionID_Call { + _c.Call.Return(run) + return _c +} + // GetNodeChannelsBy provides a mock function with given fields: nodeSelector, channelSelectors func (_m *MockRWChannelStore) GetNodeChannelsBy(nodeSelector NodeSelector, channelSelectors ...ChannelSelector) []*NodeChannelInfo { _va := make([]interface{}, len(channelSelectors)) diff --git a/internal/datacoord/segment_info.go b/internal/datacoord/segment_info.go index 5946934e8828a..40f0d46fe6cb9 100644 --- a/internal/datacoord/segment_info.go +++ b/internal/datacoord/segment_info.go @@ -102,7 +102,7 @@ func (s *SegmentsInfo) GetSegmentsBySelector(filters ...SegmentFilter) []*Segmen filter.AddFilter(criterion) } var result []*SegmentInfo - var candidates []*SegmentInfo + var candidates map[int64]*SegmentInfo // apply criterion switch { case criterion.collectionID > 0: @@ -110,9 +110,9 @@ func (s *SegmentsInfo) GetSegmentsBySelector(filters ...SegmentFilter) []*Segmen if !ok { return nil } - candidates = lo.Values(collSegments.segments) + candidates = collSegments.segments default: - candidates = lo.Values(s.segments) + candidates = s.segments } for _, segment := range candidates { if criterion.Match(segment) { From 2964f60edc3c027512ea42b24aae55a2238ab1d8 Mon Sep 17 00:00:00 2001 From: SimFG Date: Fri, 24 May 2024 14:19:41 +0800 Subject: [PATCH 061/126] enhance: the proxy metric in the query request (#33307) /kind improvement issue: #33306 Signed-off-by: SimFG --- internal/proxy/impl.go | 197 ++++++++++++++++++++---------------- internal/proxy/util.go | 19 ++++ internal/proxy/util_test.go | 21 ++++ 3 files changed, 148 insertions(+), 89 deletions(-) diff --git a/internal/proxy/impl.go b/internal/proxy/impl.go index 340b21fa33300..91fe1244b80fc 100644 --- a/internal/proxy/impl.go +++ b/internal/proxy/impl.go @@ -3415,21 +3415,8 @@ func (node *Proxy) Flush(ctx context.Context, request *milvuspb.FlushRequest) (* // Query get the records by primary keys. func (node *Proxy) query(ctx context.Context, qt *queryTask) (*milvuspb.QueryResults, error) { request := qt.request - receiveSize := proto.Size(request) - metrics.ProxyReceiveBytes.WithLabelValues( - strconv.FormatInt(paramtable.GetNodeID(), 10), - metrics.QueryLabel, - request.GetCollectionName(), - ).Add(float64(receiveSize)) - - metrics.ProxyReceivedNQ.WithLabelValues( - strconv.FormatInt(paramtable.GetNodeID(), 10), - metrics.SearchLabel, - request.GetCollectionName(), - ).Add(float64(1)) - - subLabel := GetCollectionRateSubLabel(request) - rateCol.Add(internalpb.RateType_DQLQuery.String(), 1, subLabel) + method := "Query" + isProxyRequest := GetRequestLabelFromContext(ctx) if err := merr.CheckHealthy(node.GetStateCode()); err != nil { return &milvuspb.QueryResults{ @@ -3437,20 +3424,6 @@ func (node *Proxy) query(ctx context.Context, qt *queryTask) (*milvuspb.QueryRes }, nil } - ctx, sp := otel.Tracer(typeutil.ProxyRole).Start(ctx, "Proxy-Query") - defer sp.End() - tr := timerecord.NewTimeRecorder("Query") - - method := "Query" - - metrics.ProxyFunctionCall.WithLabelValues( - strconv.FormatInt(paramtable.GetNodeID(), 10), - method, - metrics.TotalLabel, - request.GetDbName(), - request.GetCollectionName(), - ).Inc() - log := log.Ctx(ctx).With( zap.String("role", typeutil.ProxyRole), zap.String("db", request.DbName), @@ -3458,6 +3431,16 @@ func (node *Proxy) query(ctx context.Context, qt *queryTask) (*milvuspb.QueryRes zap.Strings("partitions", request.PartitionNames), ) + log.Debug( + rpcReceived(method), + zap.String("expr", request.Expr), + zap.Strings("OutputFields", request.OutputFields), + zap.Uint64("travel_timestamp", request.TravelTimestamp), + zap.Uint64("guarantee_timestamp", request.GuaranteeTimestamp), + ) + + tr := timerecord.NewTimeRecorder(method) + defer func() { span := tr.ElapseSpan() if span >= paramtable.Get().ProxyCfg.SlowQuerySpanInSeconds.GetAsDuration(time.Second) { @@ -3475,27 +3458,21 @@ func (node *Proxy) query(ctx context.Context, qt *queryTask) (*milvuspb.QueryRes } }() - log.Debug( - rpcReceived(method), - zap.String("expr", request.Expr), - zap.Strings("OutputFields", request.OutputFields), - zap.Uint64("travel_timestamp", request.TravelTimestamp), - zap.Uint64("guarantee_timestamp", request.GuaranteeTimestamp), - ) - if err := node.sched.dqQueue.Enqueue(qt); err != nil { log.Warn( rpcFailedToEnqueue(method), zap.Error(err), ) - metrics.ProxyFunctionCall.WithLabelValues( - strconv.FormatInt(paramtable.GetNodeID(), 10), - method, - metrics.AbandonLabel, - request.GetDbName(), - request.GetCollectionName(), - ).Inc() + if isProxyRequest { + metrics.ProxyFunctionCall.WithLabelValues( + strconv.FormatInt(paramtable.GetNodeID(), 10), + method, + metrics.AbandonLabel, + request.GetDbName(), + request.GetCollectionName(), + ).Inc() + } return &milvuspb.QueryResults{ Status: merr.Status(err), @@ -3510,45 +3487,36 @@ func (node *Proxy) query(ctx context.Context, qt *queryTask) (*milvuspb.QueryRes rpcFailedToWaitToFinish(method), zap.Error(err)) - metrics.ProxyFunctionCall.WithLabelValues(strconv.FormatInt(paramtable.GetNodeID(), 10), method, - metrics.FailLabel, request.GetDbName(), request.GetCollectionName()).Inc() + if isProxyRequest { + metrics.ProxyFunctionCall.WithLabelValues(strconv.FormatInt(paramtable.GetNodeID(), 10), method, + metrics.FailLabel, request.GetDbName(), request.GetCollectionName()).Inc() + } return &milvuspb.QueryResults{ Status: merr.Status(err), }, nil } - span := tr.CtxRecord(ctx, "wait query result") - metrics.ProxyWaitForSearchResultLatency.WithLabelValues( - strconv.FormatInt(paramtable.GetNodeID(), 10), - metrics.QueryLabel, - ).Observe(float64(span.Milliseconds())) - log.Debug(rpcDone(method)) - - metrics.ProxyFunctionCall.WithLabelValues( - strconv.FormatInt(paramtable.GetNodeID(), 10), - method, - metrics.SuccessLabel, - request.GetDbName(), - request.GetCollectionName(), - ).Inc() - - metrics.ProxySQLatency.WithLabelValues( - strconv.FormatInt(paramtable.GetNodeID(), 10), - metrics.QueryLabel, - request.GetDbName(), - request.GetCollectionName(), - ).Observe(float64(tr.ElapseSpan().Milliseconds())) + if isProxyRequest { + span := tr.CtxRecord(ctx, "wait query result") + metrics.ProxyWaitForSearchResultLatency.WithLabelValues( + strconv.FormatInt(paramtable.GetNodeID(), 10), + metrics.QueryLabel, + ).Observe(float64(span.Milliseconds())) - metrics.ProxyCollectionSQLatency.WithLabelValues( - strconv.FormatInt(paramtable.GetNodeID(), 10), - metrics.QueryLabel, - request.CollectionName, - ).Observe(float64(tr.ElapseSpan().Milliseconds())) + metrics.ProxySQLatency.WithLabelValues( + strconv.FormatInt(paramtable.GetNodeID(), 10), + metrics.QueryLabel, + request.GetDbName(), + request.GetCollectionName(), + ).Observe(float64(tr.ElapseSpan().Milliseconds())) - sentSize := proto.Size(qt.result) - rateCol.Add(metricsinfo.ReadResultThroughput, float64(sentSize), subLabel) - metrics.ProxyReadReqSendBytes.WithLabelValues(strconv.FormatInt(paramtable.GetNodeID(), 10)).Add(float64(sentSize)) + metrics.ProxyCollectionSQLatency.WithLabelValues( + strconv.FormatInt(paramtable.GetNodeID(), 10), + metrics.QueryLabel, + request.CollectionName, + ).Observe(float64(tr.ElapseSpan().Milliseconds())) + } return qt.result, nil } @@ -3570,22 +3538,73 @@ func (node *Proxy) Query(ctx context.Context, request *milvuspb.QueryRequest) (* lb: node.lbPolicy, mustUsePartitionKey: Params.ProxyCfg.MustUsePartitionKey.GetAsBool(), } + + subLabel := GetCollectionRateSubLabel(request) + receiveSize := proto.Size(request) + metrics.ProxyReceiveBytes.WithLabelValues( + strconv.FormatInt(paramtable.GetNodeID(), 10), + metrics.QueryLabel, + request.GetCollectionName(), + ).Add(float64(receiveSize)) + metrics.ProxyReceivedNQ.WithLabelValues( + strconv.FormatInt(paramtable.GetNodeID(), 10), + metrics.SearchLabel, + request.GetCollectionName(), + ).Add(float64(1)) + + rateCol.Add(internalpb.RateType_DQLQuery.String(), 1, subLabel) + + if err := merr.CheckHealthy(node.GetStateCode()); err != nil { + return &milvuspb.QueryResults{ + Status: merr.Status(err), + }, nil + } + + ctx, sp := otel.Tracer(typeutil.ProxyRole).Start(ctx, "Proxy-Query") + defer sp.End() + method := "Query" + + metrics.ProxyFunctionCall.WithLabelValues( + strconv.FormatInt(paramtable.GetNodeID(), 10), + method, + metrics.TotalLabel, + request.GetDbName(), + request.GetCollectionName(), + ).Inc() + + ctx = SetRequestLabelForContext(ctx) res, err := node.query(ctx, qt) - if merr.Ok(res.Status) && err == nil { - username := GetCurUserFromContextOrDefault(ctx) - nodeID := paramtable.GetStringNodeID() - v := Extension.Report(map[string]any{ - hookutil.OpTypeKey: hookutil.OpTypeQuery, - hookutil.DatabaseKey: request.DbName, - hookutil.UsernameKey: username, - hookutil.ResultDataSizeKey: proto.Size(res), - hookutil.RelatedDataSizeKey: qt.totalRelatedDataSize, - hookutil.RelatedCntKey: qt.allQueryCnt, - }) - SetReportValue(res.Status, v) - metrics.ProxyReportValue.WithLabelValues(nodeID, hookutil.OpTypeQuery, request.DbName, username).Add(float64(v)) + if err != nil || !merr.Ok(res.Status) { + return res, err } - return res, err + + log.Debug(rpcDone(method)) + + metrics.ProxyFunctionCall.WithLabelValues( + strconv.FormatInt(paramtable.GetNodeID(), 10), + method, + metrics.SuccessLabel, + request.GetDbName(), + request.GetCollectionName(), + ).Inc() + + sentSize := proto.Size(qt.result) + rateCol.Add(metricsinfo.ReadResultThroughput, float64(sentSize), subLabel) + metrics.ProxyReadReqSendBytes.WithLabelValues(strconv.FormatInt(paramtable.GetNodeID(), 10)).Add(float64(sentSize)) + + username := GetCurUserFromContextOrDefault(ctx) + nodeID := paramtable.GetStringNodeID() + v := Extension.Report(map[string]any{ + hookutil.OpTypeKey: hookutil.OpTypeQuery, + hookutil.DatabaseKey: request.DbName, + hookutil.UsernameKey: username, + hookutil.ResultDataSizeKey: proto.Size(res), + hookutil.RelatedDataSizeKey: qt.totalRelatedDataSize, + hookutil.RelatedCntKey: qt.allQueryCnt, + }) + SetReportValue(res.Status, v) + metrics.ProxyReportValue.WithLabelValues(nodeID, hookutil.OpTypeQuery, request.DbName, username).Add(float64(v)) + return res, nil } // CreateAlias create alias for collection, then you can search the collection with alias. diff --git a/internal/proxy/util.go b/internal/proxy/util.go index 0e6b373f27dc4..8982a17800aca 100644 --- a/internal/proxy/util.go +++ b/internal/proxy/util.go @@ -1620,3 +1620,22 @@ func GetCostValue(status *commonpb.Status) int { } return value } + +type isProxyRequestKeyType struct{} + +var ctxProxyRequestKey = isProxyRequestKeyType{} + +func SetRequestLabelForContext(ctx context.Context) context.Context { + return context.WithValue(ctx, ctxProxyRequestKey, true) +} + +func GetRequestLabelFromContext(ctx context.Context) bool { + if ctx == nil { + return false + } + v := ctx.Value(ctxProxyRequestKey) + if v == nil { + return false + } + return v.(bool) +} diff --git a/internal/proxy/util_test.go b/internal/proxy/util_test.go index 2d066d0f99add..46b3189351a0b 100644 --- a/internal/proxy/util_test.go +++ b/internal/proxy/util_test.go @@ -2294,3 +2294,24 @@ func TestGetCostValue(t *testing.T) { assert.Equal(t, 100, cost) }) } + +func TestRequestLabelWithContext(t *testing.T) { + ctx := context.Background() + + { + label := GetRequestLabelFromContext(ctx) + assert.False(t, label) + } + + ctx = SetRequestLabelForContext(ctx) + { + label := GetRequestLabelFromContext(ctx) + assert.True(t, label) + } + + { + // nolint + label := GetRequestLabelFromContext(nil) + assert.False(t, label) + } +} From 3c7d0209d44e4763955eefecf9a1092ac4e38f69 Mon Sep 17 00:00:00 2001 From: shaoting-huang <167743503+shaoting-huang@users.noreply.github.com> Date: Fri, 24 May 2024 14:23:40 +0800 Subject: [PATCH 062/126] enhance: update checker for go version (#33351) Signed-off-by: shaoting-huang [shaoting-huang@zilliz.com] issue: https://github.com/milvus-io/milvus/issues/32982 Signed-off-by: shaoting-huang --- .github/workflows/mac.yaml | 2 +- client/.golangci.yml | 2 +- client/go.mod | 2 +- pkg/go.mod | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/mac.yaml b/.github/workflows/mac.yaml index d17125b9d7c86..ccb21ebaab5af 100644 --- a/.github/workflows/mac.yaml +++ b/.github/workflows/mac.yaml @@ -56,7 +56,7 @@ jobs: - name: Setup Go environment uses: actions/setup-go@v2.2.0 with: - go-version: '~1.20.7' + go-version: '~1.21.10' - name: Mac Cache Go Mod Volumes uses: actions/cache@v3 with: diff --git a/client/.golangci.yml b/client/.golangci.yml index 5c90b6d694fc3..8b90a9f55a473 100644 --- a/client/.golangci.yml +++ b/client/.golangci.yml @@ -1,5 +1,5 @@ run: - go: "1.20" + go: "1.21" skip-dirs: - build - configs diff --git a/client/go.mod b/client/go.mod index 79dce6b878164..af0f2721f0df9 100644 --- a/client/go.mod +++ b/client/go.mod @@ -1,6 +1,6 @@ module github.com/milvus-io/milvus/client/v2 -go 1.20 +go 1.21 require ( github.com/blang/semver/v4 v4.0.0 diff --git a/pkg/go.mod b/pkg/go.mod index 1096ab575a204..8dc08d622c50f 100644 --- a/pkg/go.mod +++ b/pkg/go.mod @@ -1,6 +1,6 @@ module github.com/milvus-io/milvus/pkg -go 1.20 +go 1.21 require ( github.com/apache/pulsar-client-go v0.6.1-0.20210728062540-29414db801a7 From 370562b4ec3078e58a574c33e1cf7e4012af7cb3 Mon Sep 17 00:00:00 2001 From: Bingyi Sun Date: Fri, 24 May 2024 15:31:42 +0800 Subject: [PATCH 063/126] fix: fix partition loaded num metric (#33316) issue: https://github.com/milvus-io/milvus/issues/32108 Signed-off-by: sunby --- internal/querynodev2/segments/manager.go | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/internal/querynodev2/segments/manager.go b/internal/querynodev2/segments/manager.go index c0b890257f903..4a8cd52baed55 100644 --- a/internal/querynodev2/segments/manager.go +++ b/internal/querynodev2/segments/manager.go @@ -36,6 +36,7 @@ import ( "github.com/milvus-io/milvus/internal/proto/datapb" "github.com/milvus-io/milvus/internal/proto/querypb" "github.com/milvus-io/milvus/internal/querynodev2/segments/metricsutil" + "github.com/milvus-io/milvus/pkg/common" "github.com/milvus-io/milvus/pkg/eventlog" "github.com/milvus-io/milvus/pkg/log" "github.com/milvus-io/milvus/pkg/metrics" @@ -723,11 +724,15 @@ func (mgr *segmentManager) updateMetric() { collections, partiations := make(typeutil.Set[int64]), make(typeutil.Set[int64]) for _, seg := range mgr.growingSegments { collections.Insert(seg.Collection()) - partiations.Insert(seg.Partition()) + if seg.Partition() != common.AllPartitionsID { + partiations.Insert(seg.Partition()) + } } for _, seg := range mgr.sealedSegments { collections.Insert(seg.Collection()) - partiations.Insert(seg.Partition()) + if seg.Partition() != common.AllPartitionsID { + partiations.Insert(seg.Partition()) + } } metrics.QueryNodeNumCollections.WithLabelValues(fmt.Sprint(paramtable.GetNodeID())).Set(float64(collections.Len())) metrics.QueryNodeNumPartitions.WithLabelValues(fmt.Sprint(paramtable.GetNodeID())).Set(float64(partiations.Len())) From 970bf18a49ca3d46762dc12ea013aff201b7003f Mon Sep 17 00:00:00 2001 From: congqixia Date: Fri, 24 May 2024 18:07:41 +0800 Subject: [PATCH 064/126] fix: Allocate new slice for each batch in streaming reader (#33359) Related to #33268 Signed-off-by: Congqi Xia --- internal/storage/serde.go | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/internal/storage/serde.go b/internal/storage/serde.go index ba5256c6d13f4..c75bf5aabc0b6 100644 --- a/internal/storage/serde.go +++ b/internal/storage/serde.go @@ -588,9 +588,8 @@ func (deser *DeserializeReader[T]) Next() error { deser.pos = 0 deser.rec = deser.rr.Record() - if deser.values == nil { - deser.values = make([]T, deser.rec.Len()) - } + // allocate new slice preventing overwrite previous batch + deser.values = make([]T, deser.rec.Len()) if err := deser.deserializer(deser.rec, deser.values); err != nil { return err } From ed883b39d7ccd3799f5d941f40c4587e939114aa Mon Sep 17 00:00:00 2001 From: zhuwenxing Date: Fri, 24 May 2024 18:47:41 +0800 Subject: [PATCH 065/126] test: use ml-dtypes lib to produce bf16 datatype (#33354) Signed-off-by: zhuwenxing --- tests/python_client/common/bulk_insert_data.py | 6 +++--- tests/python_client/common/common_func.py | 11 ++++------- tests/python_client/requirements.txt | 4 +--- 3 files changed, 8 insertions(+), 13 deletions(-) diff --git a/tests/python_client/common/bulk_insert_data.py b/tests/python_client/common/bulk_insert_data.py index a064efde97034..ce80d9cbf0c23 100644 --- a/tests/python_client/common/bulk_insert_data.py +++ b/tests/python_client/common/bulk_insert_data.py @@ -4,7 +4,7 @@ import time import numpy as np -import jax.numpy as jnp +from ml_dtypes import bfloat16 import pandas as pd import random from faker import Faker @@ -128,9 +128,9 @@ def gen_bf16_vectors(num, dim, for_json=False): raw_vector = [random.random() for _ in range(dim)] raw_vectors.append(raw_vector) if for_json: - bf16_vector = np.array(jnp.array(raw_vector, dtype=jnp.bfloat16)).tolist() + bf16_vector = np.array(raw_vector, dtype=bfloat16).tolist() else: - bf16_vector = np.array(jnp.array(raw_vector, dtype=jnp.bfloat16)).view(np.uint8).tolist() + bf16_vector = np.array(raw_vector, dtype=bfloat16).view(np.uint8).tolist() bf16_vectors.append(bf16_vector) return raw_vectors, bf16_vectors diff --git a/tests/python_client/common/common_func.py b/tests/python_client/common/common_func.py index 4a4c15d322c7a..71d3328ced8cd 100644 --- a/tests/python_client/common/common_func.py +++ b/tests/python_client/common/common_func.py @@ -8,7 +8,7 @@ from functools import singledispatch import numpy as np import pandas as pd -import jax.numpy as jnp +from ml_dtypes import bfloat16 from sklearn import preprocessing from npy_append_array import NpyAppendArray from faker import Faker @@ -20,7 +20,6 @@ from utils.util_log import test_log as log from customize.milvus_operator import MilvusOperator import pickle -import tensorflow as tf fake = Faker() """" Methods of processing data """ @@ -1070,14 +1069,12 @@ def gen_data_by_collection_field(field, nb=None, start=None): dim = field.params['dim'] if nb is None: raw_vector = [random.random() for _ in range(dim)] - bf16_vector = jnp.array(raw_vector, dtype=jnp.bfloat16) - bf16_vector = np.array(bf16_vector).view(np.uint8).tolist() + bf16_vector = np.array(raw_vector, dtype=bfloat16).view(np.uint8).tolist() return bytes(bf16_vector) bf16_vectors = [] for i in range(nb): raw_vector = [random.random() for _ in range(dim)] - bf16_vector = jnp.array(raw_vector, dtype=jnp.bfloat16) - bf16_vector = np.array(bf16_vector).view(np.uint8).tolist() + bf16_vector = np.array(raw_vector, dtype=bfloat16).view(np.uint8).tolist() bf16_vectors.append(bytes(bf16_vector)) return bf16_vectors if data_type == DataType.FLOAT16_VECTOR: @@ -2077,7 +2074,7 @@ def gen_bf16_vectors(num, dim): for _ in range(num): raw_vector = [random.random() for _ in range(dim)] raw_vectors.append(raw_vector) - bf16_vector = tf.cast(raw_vector, dtype=tf.bfloat16).numpy() + bf16_vector = np.array(raw_vector, dtype=bfloat16) bf16_vectors.append(bf16_vector) return raw_vectors, bf16_vectors diff --git a/tests/python_client/requirements.txt b/tests/python_client/requirements.txt index 177e44cd3692f..99ad4f62c97d5 100644 --- a/tests/python_client/requirements.txt +++ b/tests/python_client/requirements.txt @@ -56,7 +56,5 @@ pyarrow==14.0.1 fastparquet==2023.7.0 # for bf16 datatype -jax==0.4.13 -jaxlib==0.4.13 -tensorflow==2.13.1 +ml-dtypes==0.2.0 From 36cbce4defaf9ef741f5adf79ee1f0092323e847 Mon Sep 17 00:00:00 2001 From: Xiaofan <83447078+xiaofan-luan@users.noreply.github.com> Date: Sat, 25 May 2024 04:43:41 +0800 Subject: [PATCH 066/126] enhance: optimize datanode cpu usage under large collection number (#33267) fix #33266 try to improve cpu usage by refactoring the ttchecker logic and caching string Signed-off-by: xiaofanluan --- internal/datanode/flow_graph_dd_node.go | 19 +++--- internal/datanode/timetick_sender.go | 3 +- internal/datanode/writebuffer/write_buffer.go | 4 +- internal/util/flowgraph/input_node.go | 22 ++++--- internal/util/flowgraph/node.go | 10 +-- internal/util/pipeline/node.go | 19 ++---- internal/util/pipeline/pipeline.go | 30 ++++----- pkg/util/timerecord/group_checker.go | 63 +++++++++++++------ pkg/util/timerecord/group_checker_test.go | 19 +++--- 9 files changed, 106 insertions(+), 83 deletions(-) diff --git a/internal/datanode/flow_graph_dd_node.go b/internal/datanode/flow_graph_dd_node.go index ea7d2e815db31..db78853f95074 100644 --- a/internal/datanode/flow_graph_dd_node.go +++ b/internal/datanode/flow_graph_dd_node.go @@ -91,10 +91,9 @@ func (ddn *ddNode) IsValidInMsg(in []Msg) bool { // Operate handles input messages, implementing flowgrpah.Node func (ddn *ddNode) Operate(in []Msg) []Msg { - log := log.With(zap.String("channel", ddn.vChannelName)) msMsg, ok := in[0].(*MsgStreamMsg) if !ok { - log.Warn("type assertion failed for MsgStreamMsg", zap.String("name", reflect.TypeOf(in[0]).Name())) + log.Warn("type assertion failed for MsgStreamMsg", zap.String("channel", ddn.vChannelName), zap.String("name", reflect.TypeOf(in[0]).Name())) return []Msg{} } @@ -110,12 +109,12 @@ func (ddn *ddNode) Operate(in []Msg) []Msg { endPositions: msMsg.EndPositions(), dropCollection: false, } - log.Warn("MsgStream closed", zap.Any("ddNode node", ddn.Name()), zap.Int64("collection", ddn.collectionID)) + log.Warn("MsgStream closed", zap.Any("ddNode node", ddn.Name()), zap.String("channel", ddn.vChannelName), zap.Int64("collection", ddn.collectionID)) return []Msg{&fgMsg} } if load := ddn.dropMode.Load(); load != nil && load.(bool) { - log.RatedInfo(1.0, "ddNode in dropMode") + log.RatedInfo(1.0, "ddNode in dropMode", zap.String("channel", ddn.vChannelName)) return []Msg{} } @@ -146,10 +145,10 @@ func (ddn *ddNode) Operate(in []Msg) []Msg { switch msg.Type() { case commonpb.MsgType_DropCollection: if msg.(*msgstream.DropCollectionMsg).GetCollectionID() == ddn.collectionID { - log.Info("Receiving DropCollection msg") + log.Info("Receiving DropCollection msg", zap.String("channel", ddn.vChannelName)) ddn.dropMode.Store(true) - log.Info("Stop compaction for dropped channel") + log.Info("Stop compaction for dropped channel", zap.String("channel", ddn.vChannelName)) ddn.compactionExecutor.discardByDroppedChannel(ddn.vChannelName) fgMsg.dropCollection = true } @@ -157,7 +156,7 @@ func (ddn *ddNode) Operate(in []Msg) []Msg { case commonpb.MsgType_DropPartition: dpMsg := msg.(*msgstream.DropPartitionMsg) if dpMsg.GetCollectionID() == ddn.collectionID { - log.Info("drop partition msg received", zap.Int64("partitionID", dpMsg.GetPartitionID())) + log.Info("drop partition msg received", zap.String("channel", ddn.vChannelName), zap.Int64("partitionID", dpMsg.GetPartitionID())) fgMsg.dropPartitions = append(fgMsg.dropPartitions, dpMsg.PartitionID) } @@ -166,6 +165,7 @@ func (ddn *ddNode) Operate(in []Msg) []Msg { if imsg.CollectionID != ddn.collectionID { log.Warn("filter invalid insert message, collection mis-match", zap.Int64("Get collID", imsg.CollectionID), + zap.String("channel", ddn.vChannelName), zap.Int64("Expected collID", ddn.collectionID)) continue } @@ -173,6 +173,7 @@ func (ddn *ddNode) Operate(in []Msg) []Msg { if ddn.tryToFilterSegmentInsertMessages(imsg) { log.Debug("filter insert messages", zap.Int64("filter segmentID", imsg.GetSegmentID()), + zap.String("channel", ddn.vChannelName), zap.Uint64("message timestamp", msg.EndTs()), ) continue @@ -194,6 +195,7 @@ func (ddn *ddNode) Operate(in []Msg) []Msg { log.Debug("DDNode receive insert messages", zap.Int64("segmentID", imsg.GetSegmentID()), + zap.String("channel", ddn.vChannelName), zap.Int("numRows", len(imsg.GetRowIDs()))) fgMsg.insertMessages = append(fgMsg.insertMessages, imsg) @@ -203,11 +205,12 @@ func (ddn *ddNode) Operate(in []Msg) []Msg { if dmsg.CollectionID != ddn.collectionID { log.Warn("filter invalid DeleteMsg, collection mis-match", zap.Int64("Get collID", dmsg.CollectionID), + zap.String("channel", ddn.vChannelName), zap.Int64("Expected collID", ddn.collectionID)) continue } - log.Debug("DDNode receive delete messages", zap.Int64("numRows", dmsg.NumRows)) + log.Debug("DDNode receive delete messages", zap.String("channel", ddn.vChannelName), zap.Int64("numRows", dmsg.NumRows)) rateCol.Add(metricsinfo.DeleteConsumeThroughput, float64(proto.Size(&dmsg.DeleteRequest))) metrics.DataNodeConsumeBytesCount. diff --git a/internal/datanode/timetick_sender.go b/internal/datanode/timetick_sender.go index 145e60aec8cb8..ecce410c05501 100644 --- a/internal/datanode/timetick_sender.go +++ b/internal/datanode/timetick_sender.go @@ -148,7 +148,6 @@ func (m *timeTickSender) cleanStatesCache(lastSentTss map[string]uint64) { m.mu.Lock() defer m.mu.Unlock() sizeBeforeClean := len(m.statsCache) - log := log.With(zap.Any("lastSentTss", lastSentTss), zap.Int("sizeBeforeClean", sizeBeforeClean)) for channelName, lastSentTs := range lastSentTss { _, ok := m.statsCache[channelName] if ok { @@ -162,7 +161,7 @@ func (m *timeTickSender) cleanStatesCache(lastSentTss map[string]uint64) { delete(m.statsCache, channelName) } } - log.RatedDebug(30, "timeTickSender stats", zap.Int("sizeAfterClean", len(m.statsCache))) + log.RatedDebug(30, "timeTickSender stats", zap.Any("lastSentTss", lastSentTss), zap.Int("sizeBeforeClean", sizeBeforeClean), zap.Int("sizeAfterClean", len(m.statsCache))) } func (m *timeTickSender) sendReport(ctx context.Context) error { diff --git a/internal/datanode/writebuffer/write_buffer.go b/internal/datanode/writebuffer/write_buffer.go index 8456fb7ac2957..3dbd8df5ec7d7 100644 --- a/internal/datanode/writebuffer/write_buffer.go +++ b/internal/datanode/writebuffer/write_buffer.go @@ -240,7 +240,7 @@ func (wb *writeBufferBase) GetCheckpoint() *msgpb.MsgPosition { switch { case bufferCandidate == nil && syncCandidate == nil: // all buffer are empty - log.RatedInfo(60, "checkpoint from latest consumed msg") + log.RatedDebug(60, "checkpoint from latest consumed msg") return wb.checkpoint case bufferCandidate == nil && syncCandidate != nil: checkpoint = syncCandidate @@ -260,7 +260,7 @@ func (wb *writeBufferBase) GetCheckpoint() *msgpb.MsgPosition { cpSource = "syncManager" } - log.RatedInfo(20, "checkpoint evaluated", + log.RatedDebug(20, "checkpoint evaluated", zap.String("cpSource", cpSource), zap.Int64("segmentID", segmentID), zap.Uint64("cpTimestamp", checkpoint.GetTimestamp())) diff --git a/internal/util/flowgraph/input_node.go b/internal/util/flowgraph/input_node.go index 24eeff9b4e248..eed9850025639 100644 --- a/internal/util/flowgraph/input_node.go +++ b/internal/util/flowgraph/input_node.go @@ -43,13 +43,15 @@ const ( // InputNode is the entry point of flowgragh type InputNode struct { BaseNode - input <-chan *msgstream.MsgPack - lastMsg *msgstream.MsgPack - name string - role string - nodeID int64 - collectionID int64 - dataType string + input <-chan *msgstream.MsgPack + lastMsg *msgstream.MsgPack + name string + role string + nodeID int64 + nodeIDStr string + collectionID int64 + collectionIDStr string + dataType string closeGracefully *atomic.Bool @@ -117,11 +119,11 @@ func (inNode *InputNode) Operate(in []Msg) []Msg { sub := tsoutil.SubByNow(msgPack.EndTs) if inNode.role == typeutil.DataNodeRole { metrics.DataNodeConsumeMsgCount. - WithLabelValues(fmt.Sprint(inNode.nodeID), inNode.dataType, fmt.Sprint(inNode.collectionID)). + WithLabelValues(inNode.nodeIDStr, inNode.dataType, inNode.collectionIDStr). Inc() metrics.DataNodeConsumeTimeTickLag. - WithLabelValues(fmt.Sprint(inNode.nodeID), inNode.dataType, fmt.Sprint(inNode.collectionID)). + WithLabelValues(inNode.nodeIDStr, inNode.dataType, inNode.collectionIDStr). Set(float64(sub)) } @@ -192,7 +194,9 @@ func NewInputNode(input <-chan *msgstream.MsgPack, nodeName string, maxQueueLeng name: nodeName, role: role, nodeID: nodeID, + nodeIDStr: fmt.Sprint(nodeID), collectionID: collectionID, + collectionIDStr: fmt.Sprint(collectionID), dataType: dataType, closeGracefully: atomic.NewBool(CloseImmediately), skipCount: 0, diff --git a/internal/util/flowgraph/node.go b/internal/util/flowgraph/node.go index 0ae56f955efe2..f38a65aea4891 100644 --- a/internal/util/flowgraph/node.go +++ b/internal/util/flowgraph/node.go @@ -83,16 +83,16 @@ func (nodeCtxManager *nodeCtxManager) workNodeStart() { inputNode := nodeCtxManager.inputNodeCtx curNode := inputNode // tt checker start - var checker *timerecord.GroupChecker + var checker *timerecord.Checker if enableTtChecker { - checker = timerecord.GetGroupChecker("fgNode", nodeCtxTtInterval, func(list []string) { + manager := timerecord.GetCheckerManger("fgNode", nodeCtxTtInterval, func(list []string) { log.Warn("some node(s) haven't received input", zap.Strings("list", list), zap.Duration("duration ", nodeCtxTtInterval)) }) for curNode != nil { name := fmt.Sprintf("nodeCtxTtChecker-%s", curNode.node.Name()) - checker.Check(name) + checker = timerecord.NewChecker(name, manager) curNode = curNode.downstream - defer checker.Remove(name) + defer checker.Close() } } @@ -138,7 +138,7 @@ func (nodeCtxManager *nodeCtxManager) workNodeStart() { curNode.downstream.inputChannel <- output } if enableTtChecker { - checker.Check(fmt.Sprintf("nodeCtxTtChecker-%s", curNode.node.Name())) + checker.Check() } curNode = curNode.downstream } diff --git a/internal/util/pipeline/node.go b/internal/util/pipeline/node.go index fe16397dceabf..def0331794bd0 100644 --- a/internal/util/pipeline/node.go +++ b/internal/util/pipeline/node.go @@ -24,21 +24,20 @@ type Node interface { Name() string MaxQueueLength() int32 Operate(in Msg) Msg - Start() - Close() } type nodeCtx struct { node Node - inputChannel chan Msg - next *nodeCtx - checker *timerecord.GroupChecker + InputChannel chan Msg + + Next *nodeCtx + Checker *timerecord.Checker } -func newNodeCtx(node Node) *nodeCtx { +func NewNodeCtx(node Node) *nodeCtx { return &nodeCtx{ node: node, - inputChannel: make(chan Msg, node.MaxQueueLength()), + InputChannel: make(chan Msg, node.MaxQueueLength()), } } @@ -57,12 +56,6 @@ func (node *BaseNode) MaxQueueLength() int32 { return node.maxQueueLength } -// Start implementing Node, base node does nothing when starts -func (node *BaseNode) Start() {} - -// Close implementing Node, base node does nothing when stops -func (node *BaseNode) Close() {} - func NewBaseNode(name string, maxQueryLength int32) *BaseNode { return &BaseNode{ name: name, diff --git a/internal/util/pipeline/pipeline.go b/internal/util/pipeline/pipeline.go index 61212f4581992..cfc0db3e59832 100644 --- a/internal/util/pipeline/pipeline.go +++ b/internal/util/pipeline/pipeline.go @@ -37,8 +37,6 @@ type pipeline struct { inputChannel chan Msg nodeTtInterval time.Duration enableTtChecker bool - - checkerNames map[string]string } func (p *pipeline) Add(nodes ...Node) { @@ -48,21 +46,19 @@ func (p *pipeline) Add(nodes ...Node) { } func (p *pipeline) addNode(node Node) { - nodeCtx := newNodeCtx(node) + nodeCtx := NewNodeCtx(node) if p.enableTtChecker { - nodeCtx.checker = timerecord.GetGroupChecker("fgNode", p.nodeTtInterval, func(list []string) { + manager := timerecord.GetCheckerManger("fgNode", p.nodeTtInterval, func(list []string) { log.Warn("some node(s) haven't received input", zap.Strings("list", list), zap.Duration("duration ", p.nodeTtInterval)) }) - if p.checkerNames == nil { - p.checkerNames = make(map[string]string) - } - p.checkerNames[nodeCtx.node.Name()] = fmt.Sprintf("nodeCtxTtChecker-%s", nodeCtx.node.Name()) + name := fmt.Sprintf("nodeCtxTtChecker-%s", node.Name()) + nodeCtx.Checker = timerecord.NewChecker(name, manager) } if len(p.nodes) != 0 { - p.nodes[len(p.nodes)-1].next = nodeCtx + p.nodes[len(p.nodes)-1].Next = nodeCtx } else { - p.inputChannel = nodeCtx.inputChannel + p.inputChannel = nodeCtx.InputChannel } p.nodes = append(p.nodes, nodeCtx) @@ -82,18 +78,18 @@ func (p *pipeline) process() { curNode := p.nodes[0] for curNode != nil { - if len(curNode.inputChannel) == 0 { + if len(curNode.InputChannel) == 0 { break } - input := <-curNode.inputChannel + input := <-curNode.InputChannel output := curNode.node.Operate(input) - if _, ok := p.checkerNames[curNode.node.Name()]; ok { - curNode.checker.Check(p.checkerNames[curNode.node.Name()]) + if curNode.Checker != nil { + curNode.Checker.Check() } - if curNode.next != nil && output != nil { - curNode.next.inputChannel <- output + if curNode.Next != nil && output != nil { + curNode.Next.InputChannel <- output } - curNode = curNode.next + curNode = curNode.Next } } diff --git a/pkg/util/timerecord/group_checker.go b/pkg/util/timerecord/group_checker.go index d8502884d7938..c06dcd5ddeb9a 100644 --- a/pkg/util/timerecord/group_checker.go +++ b/pkg/util/timerecord/group_checker.go @@ -18,23 +18,47 @@ package timerecord import ( "sync" + "sync/atomic" "time" "github.com/milvus-io/milvus/pkg/util/typeutil" ) // groups maintains string to GroupChecker -var groups = typeutil.NewConcurrentMap[string, *GroupChecker]() +var groups = typeutil.NewConcurrentMap[string, *CheckerManager]() -// GroupChecker checks members in same group silent for certain period of time +type Checker struct { + name string + manager *CheckerManager + lastChecked atomic.Value +} + +func NewChecker(name string, manager *CheckerManager) *Checker { + checker := &Checker{} + checker.name = name + checker.manager = manager + checker.lastChecked.Store(time.Now()) + manager.Register(name, checker) + return checker +} + +func (checker *Checker) Check() { + checker.lastChecked.Store(time.Now()) +} + +func (checker *Checker) Close() { + checker.manager.Remove(checker.name) +} + +// CheckerManager checks members in same group silent for certain period of time // print warning msg if there are item(s) that not reported -type GroupChecker struct { +type CheckerManager struct { groupName string - d time.Duration // check duration - t *time.Ticker // internal ticker - ch chan struct{} // closing signal - lastest *typeutil.ConcurrentMap[string, time.Time] // map member name => lastest report time + d time.Duration // check duration + t *time.Ticker // internal ticker + ch chan struct{} // closing signal + checkers *typeutil.ConcurrentMap[string, *Checker] // map member name => checker initOnce sync.Once stopOnce sync.Once @@ -43,7 +67,7 @@ type GroupChecker struct { // init start worker goroutine // protected by initOnce -func (gc *GroupChecker) init() { +func (gc *CheckerManager) init() { gc.initOnce.Do(func() { gc.ch = make(chan struct{}) go gc.work() @@ -51,7 +75,7 @@ func (gc *GroupChecker) init() { } // work is the main procedure logic -func (gc *GroupChecker) work() { +func (gc *CheckerManager) work() { gc.t = time.NewTicker(gc.d) defer gc.t.Stop() @@ -63,8 +87,8 @@ func (gc *GroupChecker) work() { } var list []string - gc.lastest.Range(func(name string, ts time.Time) bool { - if time.Since(ts) > gc.d { + gc.checkers.Range(func(name string, checker *Checker) bool { + if time.Since(checker.lastChecked.Load().(time.Time)) > gc.d { list = append(list, name) } return true @@ -75,18 +99,17 @@ func (gc *GroupChecker) work() { } } -// Check updates the latest timestamp for provided name -func (gc *GroupChecker) Check(name string) { - gc.lastest.Insert(name, time.Now()) +func (gc *CheckerManager) Register(name string, checker *Checker) { + gc.checkers.Insert(name, checker) } // Remove deletes name from watch list -func (gc *GroupChecker) Remove(name string) { - gc.lastest.GetAndRemove(name) +func (gc *CheckerManager) Remove(name string) { + gc.checkers.GetAndRemove(name) } // Stop closes the GroupChecker -func (gc *GroupChecker) Stop() { +func (gc *CheckerManager) Stop() { gc.stopOnce.Do(func() { close(gc.ch) groups.GetAndRemove(gc.groupName) @@ -96,12 +119,12 @@ func (gc *GroupChecker) Stop() { // GetGroupChecker returns the GroupChecker with related group name // if no exist GroupChecker has the provided name, a new instance will be created with provided params // otherwise the params will be ignored -func GetGroupChecker(groupName string, duration time.Duration, fn func([]string)) *GroupChecker { - gc := &GroupChecker{ +func GetCheckerManger(groupName string, duration time.Duration, fn func([]string)) *CheckerManager { + gc := &CheckerManager{ groupName: groupName, d: duration, fn: fn, - lastest: typeutil.NewConcurrentMap[string, time.Time](), + checkers: typeutil.NewConcurrentMap[string, *Checker](), } gc, loaded := groups.GetOrInsert(groupName, gc) if !loaded { diff --git a/pkg/util/timerecord/group_checker_test.go b/pkg/util/timerecord/group_checker_test.go index 4d3d84b58f2b5..cef4521abb328 100644 --- a/pkg/util/timerecord/group_checker_test.go +++ b/pkg/util/timerecord/group_checker_test.go @@ -23,20 +23,24 @@ import ( "github.com/stretchr/testify/assert" ) -func TestGroupChecker(t *testing.T) { +func TestChecker(t *testing.T) { groupName := `test_group` signal := make(chan []string, 1) // 10ms period which set before is too short // change 10ms to 500ms to ensure the group checker schedule after the second value stored duration := 500 * time.Millisecond - gc1 := GetGroupChecker(groupName, duration, func(list []string) { + gc1 := GetCheckerManger(groupName, duration, func(list []string) { signal <- list }) - gc1.Check("1") - gc2 := GetGroupChecker(groupName, time.Second, func(list []string) { + + checker1 := NewChecker("1", gc1) + checker1.Check() + + gc2 := GetCheckerManger(groupName, time.Second, func(list []string) { t.FailNow() }) - gc2.Check("2") + checker2 := NewChecker("2", gc2) + checker2.Check() assert.Equal(t, duration, gc2.d) @@ -45,11 +49,12 @@ func TestGroupChecker(t *testing.T) { return len(list) == 2 }, duration*3, duration) - gc2.Remove("2") - + checker2.Close() list := <-signal assert.ElementsMatch(t, []string{"1"}, list) + checker1.Close() + assert.NotPanics(t, func() { gc1.Stop() gc2.Stop() From 1b67cecd6595322f7eae0304be5bddc6c2343e87 Mon Sep 17 00:00:00 2001 From: Buqian Zheng Date: Mon, 27 May 2024 00:47:40 +0800 Subject: [PATCH 067/126] enhance: add sparse float vector support to restful v2 (#33231) issue: #29419 also re-enabled an e2e test using restful api, which is previously disabled due to https://github.com/milvus-io/milvus/issues/32214. In restful api, the accepted json formats of sparse float vector are: * `{"indices": [1, 100, 1000], "values": [0.1, 0.2, 0.3]}` * {"1": 0.1, "100": 0.2, "1000": 0.3} for accepted indice and value range, see https://milvus.io/docs/sparse_vector.md#FAQ Signed-off-by: Buqian Zheng --- .../proxy/httpserver/handler_v2.go | 5 +- .../proxy/httpserver/handler_v2_test.go | 39 +++++- .../distributed/proxy/httpserver/utils.go | 51 +++++++ .../proxy/httpserver/utils_test.go | 130 +++++++++++------- .../proxy/httpserver/wrap_request.go | 35 +++++ .../proxy/httpserver/wrap_request_test.go | 95 +++++++++++++ pkg/util/typeutil/schema.go | 99 ++++++++----- pkg/util/typeutil/schema_test.go | 41 +++++- .../testcases/test_vector_operations.py | 35 ++--- 9 files changed, 422 insertions(+), 108 deletions(-) diff --git a/internal/distributed/proxy/httpserver/handler_v2.go b/internal/distributed/proxy/httpserver/handler_v2.go index ba97bc5fa41f7..dc457e9b47e88 100644 --- a/internal/distributed/proxy/httpserver/handler_v2.go +++ b/internal/distributed/proxy/httpserver/handler_v2.go @@ -848,7 +848,10 @@ func generatePlaceholderGroup(ctx context.Context, body string, collSchema *sche if vectorField == nil { return nil, errors.New("cannot find a vector field named: " + fieldName) } - dim, _ := getDim(vectorField) + dim := int64(0) + if !typeutil.IsSparseFloatVectorType(vectorField.DataType) { + dim, _ = getDim(vectorField) + } phv, err := convertVectors2Placeholder(body, vectorField.DataType, dim) if err != nil { return nil, err diff --git a/internal/distributed/proxy/httpserver/handler_v2_test.go b/internal/distributed/proxy/httpserver/handler_v2_test.go index 13cc65cee5634..ab6f71315e559 100644 --- a/internal/distributed/proxy/httpserver/handler_v2_test.go +++ b/internal/distributed/proxy/httpserver/handler_v2_test.go @@ -471,11 +471,11 @@ func TestDatabaseWrapper(t *testing.T) { func TestCreateCollection(t *testing.T) { postTestCases := []requestBodyTestCase{} mp := mocks.NewMockProxy(t) - mp.EXPECT().CreateCollection(mock.Anything, mock.Anything).Return(commonSuccessStatus, nil).Times(11) + mp.EXPECT().CreateCollection(mock.Anything, mock.Anything).Return(commonSuccessStatus, nil).Times(12) mp.EXPECT().CreateIndex(mock.Anything, mock.Anything).Return(commonSuccessStatus, nil).Times(6) mp.EXPECT().LoadCollection(mock.Anything, mock.Anything).Return(commonSuccessStatus, nil).Times(6) mp.EXPECT().CreateIndex(mock.Anything, mock.Anything).Return(commonErrorStatus, nil).Twice() - mp.EXPECT().CreateCollection(mock.Anything, mock.Anything).Return(commonErrorStatus, nil).Once() + mp.EXPECT().CreateCollection(mock.Anything, mock.Anything).Return(commonErrorStatus, nil).Twice() testEngine := initHTTPServerV2(mp, false) path := versionalV2(CollectionCategory, CreateAction) // quickly create collection @@ -564,6 +564,18 @@ func TestCreateCollection(t *testing.T) { ] }}`), }) + // dim should not be specified for SparseFloatVector field + postTestCases = append(postTestCases, requestBodyTestCase{ + path: path, + requestBody: []byte(`{"collectionName": "` + DefaultCollectionName + `", "schema": { + "fields": [ + {"fieldName": "book_id", "dataType": "Int64", "isPrimary": true, "elementTypeParams": {}}, + {"fieldName": "word_count", "dataType": "Int64", "isPartitionKey": false, "elementTypeParams": {}}, + {"fieldName": "partition_field", "dataType": "VarChar", "isPartitionKey": true, "elementTypeParams": {"max_length": 256}}, + {"fieldName": "book_intro", "dataType": "SparseFloatVector", "elementTypeParams": {}} + ] + }, "params": {"partitionsNum": "32"}}`), + }) postTestCases = append(postTestCases, requestBodyTestCase{ path: path, requestBody: []byte(`{"collectionName": "` + DefaultCollectionName + `", "schema": { @@ -612,6 +624,18 @@ func TestCreateCollection(t *testing.T) { errMsg: "", errCode: 65535, }) + postTestCases = append(postTestCases, requestBodyTestCase{ + path: path, + requestBody: []byte(`{"collectionName": "` + DefaultCollectionName + `", "schema": { + "fields": [ + {"fieldName": "book_id", "dataType": "Int64", "isPrimary": true, "elementTypeParams": {}}, + {"fieldName": "word_count", "dataType": "Int64", "elementTypeParams": {}}, + {"fieldName": "book_intro", "dataType": "SparseFloatVector", "elementTypeParams": {"dim": 2}} + ] + }, "indexParams": [{"fieldName": "book_intro", "indexName": "book_intro_vector", "metricType": "L2"}]}`), + errMsg: "", + errCode: 65535, + }) for _, testcase := range postTestCases { t.Run("post"+testcase.path, func(t *testing.T) { @@ -1240,16 +1264,19 @@ func TestSearchV2(t *testing.T) { float16VectorField.Name = "float16Vector" bfloat16VectorField := generateVectorFieldSchema(schemapb.DataType_BFloat16Vector) bfloat16VectorField.Name = "bfloat16Vector" + sparseFloatVectorField := generateVectorFieldSchema(schemapb.DataType_SparseFloatVector) + sparseFloatVectorField.Name = "sparseFloatVector" collSchema.Fields = append(collSchema.Fields, &binaryVectorField) collSchema.Fields = append(collSchema.Fields, &float16VectorField) collSchema.Fields = append(collSchema.Fields, &bfloat16VectorField) + collSchema.Fields = append(collSchema.Fields, &sparseFloatVectorField) mp.EXPECT().DescribeCollection(mock.Anything, mock.Anything).Return(&milvuspb.DescribeCollectionResponse{ CollectionName: DefaultCollectionName, Schema: collSchema, ShardsNum: ShardNumDefault, Status: &StatusSuccess, - }, nil).Times(9) - mp.EXPECT().Search(mock.Anything, mock.Anything).Return(&milvuspb.SearchResults{Status: commonSuccessStatus, Results: &schemapb.SearchResultData{TopK: int64(0)}}, nil).Twice() + }, nil).Times(10) + mp.EXPECT().Search(mock.Anything, mock.Anything).Return(&milvuspb.SearchResults{Status: commonSuccessStatus, Results: &schemapb.SearchResultData{TopK: int64(0)}}, nil).Times(3) testEngine := initHTTPServerV2(mp, false) queryTestCases := []requestBodyTestCase{} queryTestCases = append(queryTestCases, requestBodyTestCase{ @@ -1377,6 +1404,10 @@ func TestSearchV2(t *testing.T) { errMsg: "can only accept json format request, error: dimension: 2, bytesLen: 4, but length of []byte: 3: invalid parameter[expected=BFloat16Vector][actual=\x01\x02\x03]", errCode: 1801, }) + queryTestCases = append(queryTestCases, requestBodyTestCase{ + path: SearchAction, + requestBody: []byte(`{"collectionName": "book", "data": [{"1": 0.1}], "annsField": "sparseFloatVector", "filter": "book_id in [2, 4, 6, 8]", "limit": 4, "outputFields": ["word_count"]}`), + }) for _, testcase := range queryTestCases { t.Run("search", func(t *testing.T) { diff --git a/internal/distributed/proxy/httpserver/utils.go b/internal/distributed/proxy/httpserver/utils.go index c57e98d4eba85..a42c165cf52d0 100644 --- a/internal/distributed/proxy/httpserver/utils.go +++ b/internal/distributed/proxy/httpserver/utils.go @@ -248,6 +248,15 @@ func checkAndSetData(body string, collSchema *schemapb.CollectionSchema) (error, return merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(fieldType)], dataString, err.Error()), reallyDataArray } reallyData[fieldName] = vectorArray + case schemapb.DataType_SparseFloatVector: + if dataString == "" { + return merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(fieldType)], "", "missing vector field: "+fieldName), reallyDataArray + } + sparseVec, err := typeutil.CreateSparseFloatRowFromJSON([]byte(dataString)) + if err != nil { + return merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(fieldType)], dataString, err.Error()), reallyDataArray + } + reallyData[fieldName] = sparseVec case schemapb.DataType_Float16Vector: if dataString == "" { return merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(fieldType)], "", "missing vector field: "+fieldName), reallyDataArray @@ -638,6 +647,9 @@ func anyToColumns(rows []map[string]interface{}, sch *schemapb.CollectionSchema) data = make([][]byte, 0, rowsLen) dim, _ := getDim(field) nameDims[field.Name] = dim + case schemapb.DataType_SparseFloatVector: + data = make([][]byte, 0, rowsLen) + nameDims[field.Name] = int64(0) default: return nil, fmt.Errorf("the type(%v) of field(%v) is not supported, use other sdk please", field.DataType, field.Name) } @@ -704,6 +716,13 @@ func anyToColumns(rows []map[string]interface{}, sch *schemapb.CollectionSchema) nameColumns[field.Name] = append(nameColumns[field.Name].([][]byte), candi.v.Interface().([]byte)) case schemapb.DataType_BFloat16Vector: nameColumns[field.Name] = append(nameColumns[field.Name].([][]byte), candi.v.Interface().([]byte)) + case schemapb.DataType_SparseFloatVector: + content := candi.v.Interface().([]byte) + rowSparseDim := typeutil.SparseFloatRowDim(content) + if rowSparseDim > nameDims[field.Name] { + nameDims[field.Name] = rowSparseDim + } + nameColumns[field.Name] = append(nameColumns[field.Name].([][]byte), content) default: return nil, fmt.Errorf("the type(%v) of field(%v) is not supported, use other sdk please", field.DataType, field.Name) } @@ -895,6 +914,18 @@ func anyToColumns(rows []map[string]interface{}, sch *schemapb.CollectionSchema) }, }, } + case schemapb.DataType_SparseFloatVector: + colData.Field = &schemapb.FieldData_Vectors{ + Vectors: &schemapb.VectorField{ + Dim: nameDims[name], + Data: &schemapb.VectorField_SparseFloatVector{ + SparseFloatVector: &schemapb.SparseFloatArray{ + Dim: nameDims[name], + Contents: column.([][]byte), + }, + }, + }, + } default: return nil, fmt.Errorf("the type(%v) of field(%v) is not supported, use other sdk please", colData.Type, name) } @@ -963,6 +994,19 @@ func serializeByteVectors(vectorStr string, dataType schemapb.DataType, dimensio return values, nil } +func serializeSparseFloatVectors(vectors []gjson.Result, dataType schemapb.DataType) ([][]byte, error) { + values := make([][]byte, 0) + for _, vector := range vectors { + vectorBytes := []byte(vector.String()) + sparseVector, err := typeutil.CreateSparseFloatRowFromJSON(vectorBytes) + if err != nil { + return nil, merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(dataType)], vector.String(), err.Error()) + } + values = append(values, sparseVector) + } + return values, nil +} + func convertVectors2Placeholder(body string, dataType schemapb.DataType, dimension int64) (*commonpb.PlaceholderValue, error) { var valueType commonpb.PlaceholderType var values [][]byte @@ -980,6 +1024,9 @@ func convertVectors2Placeholder(body string, dataType schemapb.DataType, dimensi case schemapb.DataType_BFloat16Vector: valueType = commonpb.PlaceholderType_BFloat16Vector values, err = serializeByteVectors(gjson.Get(body, HTTPRequestData).Raw, dataType, dimension, dimension*2) + case schemapb.DataType_SparseFloatVector: + valueType = commonpb.PlaceholderType_SparseFloatVector + values, err = serializeSparseFloatVectors(gjson.Get(body, HTTPRequestData).Array(), dataType) } if err != nil { return nil, err @@ -1070,6 +1117,8 @@ func buildQueryResp(rowsNum int64, needFields []string, fieldDataList []*schemap rowsNum = int64(len(fieldDataList[0].GetVectors().GetFloat16Vector())/2) / fieldDataList[0].GetVectors().GetDim() case schemapb.DataType_BFloat16Vector: rowsNum = int64(len(fieldDataList[0].GetVectors().GetBfloat16Vector())/2) / fieldDataList[0].GetVectors().GetDim() + case schemapb.DataType_SparseFloatVector: + rowsNum = int64(len(fieldDataList[0].GetVectors().GetSparseFloatVector().Contents)) default: return nil, fmt.Errorf("the type(%v) of field(%v) is not supported, use other sdk please", fieldDataList[0].Type, fieldDataList[0].FieldName) } @@ -1125,6 +1174,8 @@ func buildQueryResp(rowsNum int64, needFields []string, fieldDataList []*schemap row[fieldDataList[j].FieldName] = fieldDataList[j].GetVectors().GetFloat16Vector()[i*(fieldDataList[j].GetVectors().GetDim()*2) : (i+1)*(fieldDataList[j].GetVectors().GetDim()*2)] case schemapb.DataType_BFloat16Vector: row[fieldDataList[j].FieldName] = fieldDataList[j].GetVectors().GetBfloat16Vector()[i*(fieldDataList[j].GetVectors().GetDim()*2) : (i+1)*(fieldDataList[j].GetVectors().GetDim()*2)] + case schemapb.DataType_SparseFloatVector: + row[fieldDataList[j].FieldName] = typeutil.SparseFloatBytesToMap(fieldDataList[j].GetVectors().GetSparseFloatVector().Contents[i]) case schemapb.DataType_Array: row[fieldDataList[j].FieldName] = fieldDataList[j].GetScalars().GetArrayData().Data[i] case schemapb.DataType_JSON: diff --git a/internal/distributed/proxy/httpserver/utils_test.go b/internal/distributed/proxy/httpserver/utils_test.go index 945783c335cb7..f860bb37fb125 100644 --- a/internal/distributed/proxy/httpserver/utils_test.go +++ b/internal/distributed/proxy/httpserver/utils_test.go @@ -16,6 +16,7 @@ import ( "github.com/milvus-io/milvus-proto/go-api/v2/milvuspb" "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" "github.com/milvus-io/milvus/pkg/common" + "github.com/milvus-io/milvus/pkg/util/typeutil" ) const ( @@ -178,21 +179,45 @@ func generateVectorFieldData(vectorType schemapb.DataType) schemapb.FieldData { }, IsDynamic: false, } - } - return schemapb.FieldData{ - Type: schemapb.DataType_FloatVector, - FieldName: FieldBookIntro, - Field: &schemapb.FieldData_Vectors{ - Vectors: &schemapb.VectorField{ - Dim: 2, - Data: &schemapb.VectorField_FloatVector{ - FloatVector: &schemapb.FloatArray{ - Data: []float32{0.1, 0.11, 0.2, 0.22, 0.3, 0.33}, + case schemapb.DataType_FloatVector: + return schemapb.FieldData{ + Type: schemapb.DataType_FloatVector, + FieldName: FieldBookIntro, + Field: &schemapb.FieldData_Vectors{ + Vectors: &schemapb.VectorField{ + Dim: 2, + Data: &schemapb.VectorField_FloatVector{ + FloatVector: &schemapb.FloatArray{ + Data: []float32{0.1, 0.11, 0.2, 0.22, 0.3, 0.33}, + }, }, }, }, - }, - IsDynamic: false, + IsDynamic: false, + } + case schemapb.DataType_SparseFloatVector: + contents := make([][]byte, 0, 3) + contents = append(contents, typeutil.CreateSparseFloatRow([]uint32{1, 2, 3}, []float32{0.1, 0.11, 0.2})) + contents = append(contents, typeutil.CreateSparseFloatRow([]uint32{100, 200, 300}, []float32{10.1, 20.11, 30.2})) + contents = append(contents, typeutil.CreateSparseFloatRow([]uint32{1000, 2000, 3000}, []float32{5000.1, 7000.11, 9000.2})) + return schemapb.FieldData{ + Type: schemapb.DataType_SparseFloatVector, + FieldName: FieldBookIntro, + Field: &schemapb.FieldData_Vectors{ + Vectors: &schemapb.VectorField{ + Dim: int64(3001), + Data: &schemapb.VectorField_SparseFloatVector{ + SparseFloatVector: &schemapb.SparseFloatArray{ + Dim: int64(3001), + Contents: contents, + }, + }, + }, + }, + IsDynamic: false, + } + default: + panic("unsupported vector type") } } @@ -1005,7 +1030,7 @@ func newFieldData(fieldDatas []*schemapb.FieldData, firstFieldType schemapb.Data switch firstFieldType { case schemapb.DataType_None: - break + return fieldDatas case schemapb.DataType_Bool: return []*schemapb.FieldData{&fieldData1} case schemapb.DataType_Int8: @@ -1038,6 +1063,9 @@ func newFieldData(fieldDatas []*schemapb.FieldData, firstFieldType schemapb.Data return []*schemapb.FieldData{&fieldData10} case schemapb.DataType_JSON: return []*schemapb.FieldData{&fieldData9} + case schemapb.DataType_SparseFloatVector: + vectorField := generateVectorFieldData(firstFieldType) + return []*schemapb.FieldData{&vectorField} default: return []*schemapb.FieldData{ { @@ -1046,8 +1074,6 @@ func newFieldData(fieldDatas []*schemapb.FieldData, firstFieldType schemapb.Data }, } } - - return fieldDatas } func newSearchResult(results []map[string]interface{}) []map[string]interface{} { @@ -1225,26 +1251,30 @@ func TestVector(t *testing.T) { binaryVector := "vector-binary" float16Vector := "vector-float16" bfloat16Vector := "vector-bfloat16" + sparseFloatVector := "vector-sparse-float" row1 := map[string]interface{}{ - FieldBookID: int64(1), - floatVector: []float32{0.1, 0.11}, - binaryVector: []byte{1}, - float16Vector: []byte{1, 1, 11, 11}, - bfloat16Vector: []byte{1, 1, 11, 11}, + FieldBookID: int64(1), + floatVector: []float32{0.1, 0.11}, + binaryVector: []byte{1}, + float16Vector: []byte{1, 1, 11, 11}, + bfloat16Vector: []byte{1, 1, 11, 11}, + sparseFloatVector: map[uint32]float32{0: 0.1, 1: 0.11}, } row2 := map[string]interface{}{ - FieldBookID: int64(2), - floatVector: []float32{0.2, 0.22}, - binaryVector: []byte{2}, - float16Vector: []byte{2, 2, 22, 22}, - bfloat16Vector: []byte{2, 2, 22, 22}, + FieldBookID: int64(2), + floatVector: []float32{0.2, 0.22}, + binaryVector: []byte{2}, + float16Vector: []byte{2, 2, 22, 22}, + bfloat16Vector: []byte{2, 2, 22, 22}, + sparseFloatVector: map[uint32]float32{1000: 0.3, 200: 0.44}, } row3 := map[string]interface{}{ - FieldBookID: int64(3), - floatVector: []float32{0.3, 0.33}, - binaryVector: []byte{3}, - float16Vector: []byte{3, 3, 33, 33}, - bfloat16Vector: []byte{3, 3, 33, 33}, + FieldBookID: int64(3), + floatVector: []float32{0.3, 0.33}, + binaryVector: []byte{3}, + float16Vector: []byte{3, 3, 33, 33}, + bfloat16Vector: []byte{3, 3, 33, 33}, + sparseFloatVector: map[uint32]float32{987621: 32190.31, 32189: 0.0001}, } body, _ := wrapRequestBody([]map[string]interface{}{row1, row2, row3}) primaryField := generatePrimaryField(schemapb.DataType_Int64) @@ -1256,12 +1286,14 @@ func TestVector(t *testing.T) { float16VectorField.Name = float16Vector bfloat16VectorField := generateVectorFieldSchema(schemapb.DataType_BFloat16Vector) bfloat16VectorField.Name = bfloat16Vector + sparseFloatVectorField := generateVectorFieldSchema(schemapb.DataType_SparseFloatVector) + sparseFloatVectorField.Name = sparseFloatVector collectionSchema := &schemapb.CollectionSchema{ Name: DefaultCollectionName, Description: "", AutoID: false, Fields: []*schemapb.FieldSchema{ - &primaryField, &floatVectorField, &binaryVectorField, &float16VectorField, &bfloat16VectorField, + &primaryField, &floatVectorField, &binaryVectorField, &float16VectorField, &bfloat16VectorField, &sparseFloatVectorField, }, EnableDynamicField: true, } @@ -1271,27 +1303,29 @@ func TestVector(t *testing.T) { assert.Equal(t, 1, len(row[binaryVector].([]byte))) assert.Equal(t, 4, len(row[float16Vector].([]byte))) assert.Equal(t, 4, len(row[bfloat16Vector].([]byte))) + // all test sparse rows have 2 elements, each should be of 8 bytes + assert.Equal(t, 16, len(row[sparseFloatVector].([]byte))) } data, err := anyToColumns(rows, collectionSchema) assert.Equal(t, nil, err) assert.Equal(t, len(collectionSchema.Fields)+1, len(data)) - row1[bfloat16Vector] = []int64{99999999, -99999999} - body, _ = wrapRequestBody([]map[string]interface{}{row1}) - err, _ = checkAndSetData(string(body), collectionSchema) - assert.Error(t, err) - row1[float16Vector] = []int64{99999999, -99999999} - body, _ = wrapRequestBody([]map[string]interface{}{row1}) - err, _ = checkAndSetData(string(body), collectionSchema) - assert.Error(t, err) - row1[binaryVector] = []int64{99999999, -99999999} - body, _ = wrapRequestBody([]map[string]interface{}{row1}) - err, _ = checkAndSetData(string(body), collectionSchema) - assert.Error(t, err) - row1[floatVector] = []float64{math.MaxFloat64, 0} - body, _ = wrapRequestBody([]map[string]interface{}{row1}) - err, _ = checkAndSetData(string(body), collectionSchema) - assert.Error(t, err) + assertError := func(field string, value interface{}) { + row := make(map[string]interface{}) + for k, v := range row1 { + row[k] = v + } + row[field] = value + body, _ = wrapRequestBody([]map[string]interface{}{row}) + err, _ = checkAndSetData(string(body), collectionSchema) + assert.Error(t, err) + } + + assertError(bfloat16Vector, []int64{99999999, -99999999}) + assertError(float16Vector, []int64{99999999, -99999999}) + assertError(binaryVector, []int64{99999999, -99999999}) + assertError(floatVector, []float64{math.MaxFloat64, 0}) + assertError(sparseFloatVector, map[uint32]float32{0: -0.1, 1: 0.11, 2: 0.12}) } func TestBuildQueryResps(t *testing.T) { @@ -1305,7 +1339,7 @@ func TestBuildQueryResps(t *testing.T) { } dataTypes := []schemapb.DataType{ - schemapb.DataType_FloatVector, schemapb.DataType_BinaryVector, schemapb.DataType_Float16Vector, schemapb.DataType_BFloat16Vector, + schemapb.DataType_FloatVector, schemapb.DataType_BinaryVector, schemapb.DataType_Float16Vector, schemapb.DataType_BFloat16Vector, schemapb.DataType_SparseFloatVector, schemapb.DataType_Bool, schemapb.DataType_Int8, schemapb.DataType_Int16, schemapb.DataType_Int32, schemapb.DataType_Float, schemapb.DataType_Double, schemapb.DataType_String, schemapb.DataType_VarChar, diff --git a/internal/distributed/proxy/httpserver/wrap_request.go b/internal/distributed/proxy/httpserver/wrap_request.go index a8f5eec8b98e1..79d2f0dfa80c4 100644 --- a/internal/distributed/proxy/httpserver/wrap_request.go +++ b/internal/distributed/proxy/httpserver/wrap_request.go @@ -12,6 +12,7 @@ import ( "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" "github.com/milvus-io/milvus-proto/go-api/v2/milvuspb" "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" + "github.com/milvus-io/milvus/pkg/util/typeutil" ) // We wrap original protobuf structure for 2 reasons: @@ -212,6 +213,40 @@ func (f *FieldData) AsSchemapb() (*schemapb.FieldData, error) { }, }, } + case schemapb.DataType_SparseFloatVector: + var wrappedData []map[string]interface{} + err := json.Unmarshal(raw, &wrappedData) + if err != nil { + return nil, newFieldDataError(f.FieldName, err) + } + if len(wrappedData) < 1 { + return nil, errors.New("at least one row for insert") + } + data := make([][]byte, len(wrappedData)) + dim := int64(0) + for _, row := range wrappedData { + rowData, err := typeutil.CreateSparseFloatRowFromMap(row) + if err != nil { + return nil, newFieldDataError(f.FieldName, err) + } + data = append(data, rowData) + rowDim := typeutil.SparseFloatRowDim(rowData) + if rowDim > dim { + dim = rowDim + } + } + + ret.Field = &schemapb.FieldData_Vectors{ + Vectors: &schemapb.VectorField{ + Dim: dim, + Data: &schemapb.VectorField_SparseFloatVector{ + SparseFloatVector: &schemapb.SparseFloatArray{ + Dim: dim, + Contents: data, + }, + }, + }, + } default: return nil, errors.New("unsupported data type") } diff --git a/internal/distributed/proxy/httpserver/wrap_request_test.go b/internal/distributed/proxy/httpserver/wrap_request_test.go index defddf831a2c7..4d673fb6bd0ff 100644 --- a/internal/distributed/proxy/httpserver/wrap_request_test.go +++ b/internal/distributed/proxy/httpserver/wrap_request_test.go @@ -219,6 +219,101 @@ func TestFieldData_AsSchemapb(t *testing.T) { _, err := fieldData.AsSchemapb() assert.Error(t, err) }) + + t.Run("sparsefloatvector_ok_1", func(t *testing.T) { + fieldData := FieldData{ + Type: schemapb.DataType_SparseFloatVector, + Field: []byte(`[ + {"1": 0.1, "2": 0.2}, + {"3": 0.1, "5": 0.2}, + {"4": 0.1, "6": 0.2} + ]`), + } + raw, _ := json.Marshal(fieldData) + json.Unmarshal(raw, &fieldData) + _, err := fieldData.AsSchemapb() + assert.NoError(t, err) + }) + + t.Run("sparsefloatvector_ok_2", func(t *testing.T) { + fieldData := FieldData{ + Type: schemapb.DataType_SparseFloatVector, + Field: []byte(`[ + {"indices": [1, 2], "values": [0.1, 0.2]}, + {"indices": [3, 5], "values": [0.1, 0.2]}, + {"indices": [4, 6], "values": [0.1, 0.2]} + ]`), + } + raw, _ := json.Marshal(fieldData) + json.Unmarshal(raw, &fieldData) + _, err := fieldData.AsSchemapb() + assert.NoError(t, err) + }) + + t.Run("sparsefloatvector_ok_3", func(t *testing.T) { + fieldData := FieldData{ + Type: schemapb.DataType_SparseFloatVector, + Field: []byte(`[ + {"indices": [1, 2], "values": [0.1, 0.2]}, + {"3": 0.1, "5": 0.2}, + {"indices": [4, 6], "values": [0.1, 0.2]} + ]`), + } + raw, _ := json.Marshal(fieldData) + json.Unmarshal(raw, &fieldData) + _, err := fieldData.AsSchemapb() + assert.NoError(t, err) + }) + + t.Run("sparsefloatvector_empty_err", func(t *testing.T) { + fieldData := FieldData{ + Type: schemapb.DataType_SparseFloatVector, + Field: []byte(`[]`), + } + raw, _ := json.Marshal(fieldData) + json.Unmarshal(raw, &fieldData) + _, err := fieldData.AsSchemapb() + assert.Error(t, err) + }) + + t.Run("sparsefloatvector_invalid_json_err", func(t *testing.T) { + fieldData := FieldData{ + Type: schemapb.DataType_SparseFloatVector, + Field: []byte(`[ + {"3": 0.1, : 0.2} + ]`), + } + raw, _ := json.Marshal(fieldData) + json.Unmarshal(raw, &fieldData) + _, err := fieldData.AsSchemapb() + assert.Error(t, err) + }) + + t.Run("sparsefloatvector_invalid_row_1_err", func(t *testing.T) { + fieldData := FieldData{ + Type: schemapb.DataType_SparseFloatVector, + Field: []byte(`[ + {"indices": [1, 2], "values": [-0.1, 0.2]}, + ]`), + } + raw, _ := json.Marshal(fieldData) + json.Unmarshal(raw, &fieldData) + _, err := fieldData.AsSchemapb() + assert.Error(t, err) + }) + + t.Run("sparsefloatvector_invalid_row_2_err", func(t *testing.T) { + fieldData := FieldData{ + Type: schemapb.DataType_SparseFloatVector, + Field: []byte(`[ + {"indices": [1, -2], "values": [0.1, 0.2]}, + ]`), + } + raw, _ := json.Marshal(fieldData) + json.Unmarshal(raw, &fieldData) + _, err := fieldData.AsSchemapb() + assert.Error(t, err) + }) } func Test_vector2Bytes(t *testing.T) { diff --git a/pkg/util/typeutil/schema.go b/pkg/util/typeutil/schema.go index 5a696be7431b4..fd29f632f7c89 100644 --- a/pkg/util/typeutil/schema.go +++ b/pkg/util/typeutil/schema.go @@ -1550,40 +1550,75 @@ func CreateSparseFloatRowFromMap(input map[string]interface{}) ([]byte, error) { return nil, fmt.Errorf("empty JSON input") } + getValue := func(key interface{}) (float32, error) { + var val float64 + switch v := key.(type) { + case int: + val = float64(v) + case float64: + val = v + case json.Number: + if num, err := strconv.ParseFloat(v.String(), 64); err == nil { + val = num + } else { + return 0, fmt.Errorf("invalid value type in JSON: %s", reflect.TypeOf(v)) + } + default: + return 0, fmt.Errorf("invalid value type in JSON: %s", reflect.TypeOf(key)) + } + if VerifyFloat(val) != nil { + return 0, fmt.Errorf("invalid value in JSON: %v", val) + } + if val > math.MaxFloat32 { + return 0, fmt.Errorf("value too large in JSON: %v", val) + } + return float32(val), nil + } + + getIndex := func(key interface{}) (uint32, error) { + var idx int64 + switch v := key.(type) { + case int: + idx = int64(v) + case float64: + // check if the float64 is actually an integer + if v != float64(int64(v)) { + return 0, fmt.Errorf("invalid index in JSON: %v", v) + } + idx = int64(v) + case json.Number: + if num, err := strconv.ParseInt(v.String(), 0, 64); err == nil { + idx = num + } else { + return 0, err + } + default: + return 0, fmt.Errorf("invalid index type in JSON: %s", reflect.TypeOf(key)) + } + if idx >= math.MaxUint32 { + return 0, fmt.Errorf("index too large in JSON: %v", idx) + } + return uint32(idx), nil + } + jsonIndices, ok1 := input["indices"].([]interface{}) jsonValues, ok2 := input["values"].([]interface{}) if ok1 && ok2 { // try format1 for _, idx := range jsonIndices { - if i1, s1 := idx.(int); s1 { - indices = append(indices, uint32(i1)) - } else if i2, s2 := idx.(float64); s2 && i2 == float64(int(i2)) { - indices = append(indices, uint32(i2)) - } else if i3, s3 := idx.(json.Number); s3 { - if num, err := strconv.ParseUint(i3.String(), 0, 32); err == nil { - indices = append(indices, uint32(num)) - } else { - return nil, err - } - } else { - return nil, fmt.Errorf("invalid indicies type: %v(%s)", idx, reflect.TypeOf(idx)) + index, err := getIndex(idx) + if err != nil { + return nil, err } + indices = append(indices, index) } for _, val := range jsonValues { - if v1, s1 := val.(int); s1 { - values = append(values, float32(v1)) - } else if v2, s2 := val.(float64); s2 { - values = append(values, float32(v2)) - } else if v3, s3 := val.(json.Number); s3 { - if num, err := strconv.ParseFloat(v3.String(), 32); err == nil { - values = append(values, float32(num)) - } else { - return nil, err - } - } else { - return nil, fmt.Errorf("invalid values type: %v(%s)", val, reflect.TypeOf(val)) + value, err := getValue(val) + if err != nil { + return nil, err } + values = append(values, value) } } else if !ok1 && !ok2 { // try format2 @@ -1593,21 +1628,13 @@ func CreateSparseFloatRowFromMap(input map[string]interface{}) ([]byte, error) { return nil, err } - var val float64 - val, ok := v.(float64) - if !ok { - num, ok := v.(json.Number) - if !ok { - return nil, fmt.Errorf("invalid value type in JSON: %s", reflect.TypeOf(v)) - } - val, err = strconv.ParseFloat(num.String(), 32) - if err != nil { - return nil, err - } + val, err := getValue(v) + if err != nil { + return nil, err } indices = append(indices, uint32(idx)) - values = append(values, float32(val)) + values = append(values, val) } } else { return nil, fmt.Errorf("invalid JSON input") diff --git a/pkg/util/typeutil/schema_test.go b/pkg/util/typeutil/schema_test.go index f487336b94c17..6e6a6ec698776 100644 --- a/pkg/util/typeutil/schema_test.go +++ b/pkg/util/typeutil/schema_test.go @@ -18,6 +18,7 @@ package typeutil import ( "encoding/binary" + "fmt" "math" "reflect" "testing" @@ -2140,7 +2141,7 @@ func TestParseJsonSparseFloatRow(t *testing.T) { assert.Equal(t, CreateSparseFloatRow([]uint32{1, 3, 5}, []float32{1.0, 2.0, 3.0}), res) }) - t.Run("valid row 3", func(t *testing.T) { + t.Run("valid row 4", func(t *testing.T) { row := map[string]interface{}{"indices": []interface{}{math.MaxInt32 + 1}, "values": []interface{}{1.0}} res, err := CreateSparseFloatRowFromMap(row) assert.NoError(t, err) @@ -2177,6 +2178,30 @@ func TestParseJsonSparseFloatRow(t *testing.T) { assert.Error(t, err) }) + t.Run("invalid row 6", func(t *testing.T) { + row := map[string]interface{}{"indices": []interface{}{-1}, "values": []interface{}{0.2}} + _, err := CreateSparseFloatRowFromMap(row) + assert.Error(t, err) + }) + + t.Run("invalid row 7", func(t *testing.T) { + row := map[string]interface{}{"indices": []interface{}{math.MaxUint32}, "values": []interface{}{1.0}} + _, err := CreateSparseFloatRowFromMap(row) + assert.Error(t, err) + }) + + t.Run("invalid row 8", func(t *testing.T) { + row := map[string]interface{}{"indices": []interface{}{math.MaxUint32 + 10}, "values": []interface{}{1.0}} + _, err := CreateSparseFloatRowFromMap(row) + assert.Error(t, err) + }) + + t.Run("invalid row 9", func(t *testing.T) { + row := map[string]interface{}{"indices": []interface{}{10}, "values": []interface{}{float64(math.MaxFloat32) * 2}} + _, err := CreateSparseFloatRowFromMap(row) + assert.Error(t, err) + }) + t.Run("valid dict row 1", func(t *testing.T) { row := map[string]interface{}{"1": 1.0, "3": 2.0, "5": 3.0} res, err := CreateSparseFloatRowFromMap(row) @@ -2228,7 +2253,19 @@ func TestParseJsonSparseFloatRow(t *testing.T) { }) t.Run("invalid dict row 7", func(t *testing.T) { - row := map[string]interface{}{"1.1": 1.0, "3": 2.0, "5": 3.0} + row := map[string]interface{}{fmt.Sprint(math.MaxUint32): 1.0, "3": 2.0, "5": 3.0} + _, err := CreateSparseFloatRowFromMap(row) + assert.Error(t, err) + }) + + t.Run("invalid dict row 8", func(t *testing.T) { + row := map[string]interface{}{fmt.Sprint(math.MaxUint32 + 10): 1.0, "3": 2.0, "5": 3.0} + _, err := CreateSparseFloatRowFromMap(row) + assert.Error(t, err) + }) + + t.Run("invalid dict row 8", func(t *testing.T) { + row := map[string]interface{}{fmt.Sprint(math.MaxUint32 + 10): 1.0, "3": 2.0, "5": float64(math.MaxFloat32) * 2} _, err := CreateSparseFloatRowFromMap(row) assert.Error(t, err) }) diff --git a/tests/restful_client_v2/testcases/test_vector_operations.py b/tests/restful_client_v2/testcases/test_vector_operations.py index d2b74552ff331..5878946ff65f5 100644 --- a/tests/restful_client_v2/testcases/test_vector_operations.py +++ b/tests/restful_client_v2/testcases/test_vector_operations.py @@ -830,15 +830,15 @@ def test_search_vector_with_float_vector_datatype(self, nb, dim, insert_round, a assert len(rsp['data']) == 100 - @pytest.mark.parametrize("insert_round", [1]) - @pytest.mark.parametrize("auto_id", [True]) - @pytest.mark.parametrize("is_partition_key", [True]) + @pytest.mark.parametrize("insert_round", [1, 10]) + @pytest.mark.parametrize("auto_id", [True, False]) + @pytest.mark.parametrize("is_partition_key", [True, False]) @pytest.mark.parametrize("enable_dynamic_schema", [True]) @pytest.mark.parametrize("nb", [3000]) @pytest.mark.parametrize("dim", [128]) - @pytest.mark.xfail(reason="issue https://github.com/milvus-io/milvus/issues/32214") + @pytest.mark.parametrize("groupingField", ['user_id', None]) def test_search_vector_with_sparse_float_vector_datatype(self, nb, dim, insert_round, auto_id, - is_partition_key, enable_dynamic_schema): + is_partition_key, enable_dynamic_schema, groupingField): """ Insert a vector with a simple payload """ @@ -860,7 +860,7 @@ def test_search_vector_with_sparse_float_vector_datatype(self, nb, dim, insert_r }, "indexParams": [ {"fieldName": "sparse_float_vector", "indexName": "sparse_float_vector", "metricType": "IP", - "indexConfig": {"index_type": "SPARSE_INVERTED_INDEX", "drop_ratio_build": "0.2"}} + "params": {"index_type": "SPARSE_INVERTED_INDEX", "drop_ratio_build": "0.2"}} ] } rsp = self.collection_client.collection_create(payload) @@ -871,20 +871,21 @@ def test_search_vector_with_sparse_float_vector_datatype(self, nb, dim, insert_r # insert data for i in range(insert_round): data = [] - for i in range(nb): + for j in range(nb): + idx = i * nb + j if auto_id: tmp = { - "user_id": i%100, - "word_count": i, - "book_describe": f"book_{i}", + "user_id": idx%100, + "word_count": j, + "book_describe": f"book_{idx}", "sparse_float_vector": gen_vector(datatype="SparseFloatVector", dim=dim), } else: tmp = { - "book_id": i, - "user_id": i%100, - "word_count": i, - "book_describe": f"book_{i}", + "book_id": idx, + "user_id": idx%100, + "word_count": j, + "book_describe": f"book_{idx}", "sparse_float_vector": gen_vector(datatype="SparseFloatVector", dim=dim), } if enable_dynamic_schema: @@ -902,7 +903,6 @@ def test_search_vector_with_sparse_float_vector_datatype(self, nb, dim, insert_r "collectionName": name, "data": [gen_vector(datatype="SparseFloatVector", dim=dim)], "filter": "word_count > 100", - "groupingField": "user_id", "outputFields": ["*"], "searchParams": { "metricType": "IP", @@ -910,11 +910,12 @@ def test_search_vector_with_sparse_float_vector_datatype(self, nb, dim, insert_r "drop_ratio_search": "0.2", } }, - "limit": 100, + "limit": 500, } + if groupingField: + payload["groupingField"] = groupingField rsp = self.vector_client.vector_search(payload) assert rsp['code'] == 200 - assert len(rsp['data']) == 100 From 760223f80a1d9400cd22a8a71f31b7a8ec6b28d7 Mon Sep 17 00:00:00 2001 From: "yihao.dai" Date: Mon, 27 May 2024 01:25:40 +0800 Subject: [PATCH 068/126] fix: use seperate warmup pool and disable warmup by default (#33348) 1. use a small warmup pool to reduce the impact of warmup 2. change the warmup pool to nonblocking mode 3. disable warmup by default 4. remove the maximum size limit of 16 for the load pool issue: https://github.com/milvus-io/milvus/issues/32772 --------- Signed-off-by: bigsheeper Co-authored-by: xiaofanluan --- configs/milvus.yaml | 2 +- internal/querynodev2/segments/pool.go | 47 +++++++++++++++++---- internal/querynodev2/segments/pool_test.go | 21 +++++++++ internal/querynodev2/segments/segment.go | 4 +- pkg/util/paramtable/component_param.go | 2 +- pkg/util/paramtable/component_param_test.go | 2 +- 6 files changed, 64 insertions(+), 14 deletions(-) diff --git a/configs/milvus.yaml b/configs/milvus.yaml index d1a3b7d8552a0..91cfcc554a86b 100644 --- a/configs/milvus.yaml +++ b/configs/milvus.yaml @@ -335,7 +335,7 @@ queryNode: # chunk cache during the load process. This approach has the potential to substantially reduce query/search latency # for a specific duration post-load, albeit accompanied by a concurrent increase in disk usage; # 2. If set to "off," original vector data will only be loaded into the chunk cache during search/query. - warmup: async + warmup: off mmap: mmapEnabled: false # Enable mmap for loading data lazyload: diff --git a/internal/querynodev2/segments/pool.go b/internal/querynodev2/segments/pool.go index bbf21b91a746e..cb025d8c749a3 100644 --- a/internal/querynodev2/segments/pool.go +++ b/internal/querynodev2/segments/pool.go @@ -37,12 +37,14 @@ var ( // and other operations (insert/delete/statistics/etc.) // since in concurrent situation, there operation may block each other in high payload - sqp atomic.Pointer[conc.Pool[any]] - sqOnce sync.Once - dp atomic.Pointer[conc.Pool[any]] - dynOnce sync.Once - loadPool atomic.Pointer[conc.Pool[any]] - loadOnce sync.Once + sqp atomic.Pointer[conc.Pool[any]] + sqOnce sync.Once + dp atomic.Pointer[conc.Pool[any]] + dynOnce sync.Once + loadPool atomic.Pointer[conc.Pool[any]] + loadOnce sync.Once + warmupPool atomic.Pointer[conc.Pool[any]] + warmupOnce sync.Once ) // initSQPool initialize @@ -80,9 +82,6 @@ func initLoadPool() { loadOnce.Do(func() { pt := paramtable.Get() poolSize := hardware.GetCPUNum() * pt.CommonCfg.MiddlePriorityThreadCoreCoefficient.GetAsInt() - if poolSize > 16 { - poolSize = 16 - } pool := conc.NewPool[any]( poolSize, conc.WithPreAlloc(false), @@ -96,6 +95,23 @@ func initLoadPool() { }) } +func initWarmupPool() { + warmupOnce.Do(func() { + pt := paramtable.Get() + poolSize := hardware.GetCPUNum() * pt.CommonCfg.LowPriorityThreadCoreCoefficient.GetAsInt() + pool := conc.NewPool[any]( + poolSize, + conc.WithPreAlloc(false), + conc.WithDisablePurge(false), + conc.WithPreHandler(runtime.LockOSThread), // lock os thread for cgo thread disposal + conc.WithNonBlocking(true), // make warming up non blocking + ) + + warmupPool.Store(pool) + pt.Watch(pt.CommonCfg.LowPriorityThreadCoreCoefficient.Key, config.NewHandler("qn.warmpool.lowpriority", ResizeWarmupPool)) + }) +} + // GetSQPool returns the singleton pool instance for search/query operations. func GetSQPool() *conc.Pool[any] { initSQPool() @@ -113,6 +129,11 @@ func GetLoadPool() *conc.Pool[any] { return loadPool.Load() } +func GetWarmupPool() *conc.Pool[any] { + initWarmupPool() + return warmupPool.Load() +} + func ResizeSQPool(evt *config.Event) { if evt.HasUpdated { pt := paramtable.Get() @@ -131,6 +152,14 @@ func ResizeLoadPool(evt *config.Event) { } } +func ResizeWarmupPool(evt *config.Event) { + if evt.HasUpdated { + pt := paramtable.Get() + newSize := hardware.GetCPUNum() * pt.CommonCfg.LowPriorityThreadCoreCoefficient.GetAsInt() + resizePool(GetWarmupPool(), newSize, "WarmupPool") + } +} + func resizePool(pool *conc.Pool[any], newSize int, tag string) { log := log.Ctx(context.Background()). With( diff --git a/internal/querynodev2/segments/pool_test.go b/internal/querynodev2/segments/pool_test.go index 6c817bdb1eb9a..868bce4186236 100644 --- a/internal/querynodev2/segments/pool_test.go +++ b/internal/querynodev2/segments/pool_test.go @@ -82,6 +82,27 @@ func TestResizePools(t *testing.T) { assert.Equal(t, expectedCap, GetLoadPool().Cap()) }) + t.Run("WarmupPool", func(t *testing.T) { + expectedCap := hardware.GetCPUNum() * pt.CommonCfg.LowPriorityThreadCoreCoefficient.GetAsInt() + + ResizeWarmupPool(&config.Event{ + HasUpdated: true, + }) + assert.Equal(t, expectedCap, GetWarmupPool().Cap()) + + pt.Save(pt.CommonCfg.LowPriorityThreadCoreCoefficient.Key, strconv.FormatFloat(pt.CommonCfg.LowPriorityThreadCoreCoefficient.GetAsFloat()*2, 'f', 10, 64)) + ResizeWarmupPool(&config.Event{ + HasUpdated: true, + }) + assert.Equal(t, expectedCap, GetWarmupPool().Cap()) + + pt.Save(pt.CommonCfg.LowPriorityThreadCoreCoefficient.Key, "0") + ResizeWarmupPool(&config.Event{ + HasUpdated: true, + }) + assert.Equal(t, expectedCap, GetWarmupPool().Cap()) + }) + t.Run("error_pool", func(*testing.T) { pool := conc.NewDefaultPool[any]() c := pool.Cap() diff --git a/internal/querynodev2/segments/segment.go b/internal/querynodev2/segments/segment.go index 68f914d73302c..e4ad589e41e2e 100644 --- a/internal/querynodev2/segments/segment.go +++ b/internal/querynodev2/segments/segment.go @@ -1386,7 +1386,7 @@ func (s *LocalSegment) WarmupChunkCache(ctx context.Context, fieldID int64) { warmingUp := strings.ToLower(paramtable.Get().QueryNodeCfg.ChunkCacheWarmingUp.GetValue()) switch warmingUp { case "sync": - GetLoadPool().Submit(func() (any, error) { + GetWarmupPool().Submit(func() (any, error) { cFieldID := C.int64_t(fieldID) status = C.WarmupChunkCache(s.ptr, cFieldID) if err := HandleCStatus(ctx, &status, "warming up chunk cache failed"); err != nil { @@ -1397,7 +1397,7 @@ func (s *LocalSegment) WarmupChunkCache(ctx context.Context, fieldID int64) { return nil, nil }).Await() case "async": - GetLoadPool().Submit(func() (any, error) { + GetWarmupPool().Submit(func() (any, error) { if !s.ptrLock.RLockIf(state.IsNotReleased) { return nil, nil } diff --git a/pkg/util/paramtable/component_param.go b/pkg/util/paramtable/component_param.go index 735987f994159..317ad937f8b1d 100644 --- a/pkg/util/paramtable/component_param.go +++ b/pkg/util/paramtable/component_param.go @@ -2357,7 +2357,7 @@ func (p *queryNodeConfig) init(base *BaseTable) { p.ChunkCacheWarmingUp = ParamItem{ Key: "queryNode.cache.warmup", Version: "2.3.6", - DefaultValue: "async", + DefaultValue: "off", Doc: `options: async, sync, off. Specifies the necessity for warming up the chunk cache. 1. If set to "sync" or "async," the original vector data will be synchronously/asynchronously loaded into the diff --git a/pkg/util/paramtable/component_param_test.go b/pkg/util/paramtable/component_param_test.go index d3918c9d432e3..cba13b38931dc 100644 --- a/pkg/util/paramtable/component_param_test.go +++ b/pkg/util/paramtable/component_param_test.go @@ -339,7 +339,7 @@ func TestComponentParam(t *testing.T) { // chunk cache assert.Equal(t, "willneed", Params.ReadAheadPolicy.GetValue()) - assert.Equal(t, "async", Params.ChunkCacheWarmingUp.GetValue()) + assert.Equal(t, "false", Params.ChunkCacheWarmingUp.GetValue()) // test small indexNlist/NProbe default params.Remove("queryNode.segcore.smallIndex.nlist") From e708974578f03de17d76423617190799dee3c6da Mon Sep 17 00:00:00 2001 From: "cai.zhang" Date: Mon, 27 May 2024 01:37:28 +0800 Subject: [PATCH 069/126] enhance: Remove unused proto importing (#33321) Signed-off-by: Cai Zhang --- internal/proto/index_coord.proto | 1 - 1 file changed, 1 deletion(-) diff --git a/internal/proto/index_coord.proto b/internal/proto/index_coord.proto index 9204d7da2a9c7..0c0cea0361100 100644 --- a/internal/proto/index_coord.proto +++ b/internal/proto/index_coord.proto @@ -8,7 +8,6 @@ import "common.proto"; import "internal.proto"; import "milvus.proto"; import "schema.proto"; -import "index_cgo_msg.proto"; service IndexCoord { rpc GetComponentStates(milvus.GetComponentStatesRequest) returns (milvus.ComponentStates) {} From 58ee613feac481da53da7674aaa5736631a15307 Mon Sep 17 00:00:00 2001 From: jaime Date: Mon, 27 May 2024 01:49:41 +0800 Subject: [PATCH 070/126] enhance: remove repeated stats of loaded entity (#33255) Signed-off-by: jaime --- internal/querynodev2/segments/segment.go | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/internal/querynodev2/segments/segment.go b/internal/querynodev2/segments/segment.go index e4ad589e41e2e..614f3102213c1 100644 --- a/internal/querynodev2/segments/segment.go +++ b/internal/querynodev2/segments/segment.go @@ -30,7 +30,6 @@ import ( "fmt" "io" "runtime" - "strconv" "strings" "unsafe" @@ -440,15 +439,6 @@ func (s *LocalSegment) initializeSegment() error { // Update the insert count when initialize the segment and update the metrics. s.insertCount.Store(loadInfo.GetNumOfRows()) - metrics.QueryNodeNumEntities.WithLabelValues( - s.DatabaseName(), - fmt.Sprint(paramtable.GetNodeID()), - fmt.Sprint(s.Collection()), - fmt.Sprint(s.Partition()), - s.Type().String(), - strconv.FormatInt(int64(len(s.Indexes())), 10), - ).Add(float64(loadInfo.GetNumOfRows())) - return nil } @@ -808,15 +798,6 @@ func (s *LocalSegment) Insert(ctx context.Context, rowIDs []int64, timestamps [] } s.insertCount.Add(int64(numOfRow)) - metrics.QueryNodeNumEntities.WithLabelValues( - s.DatabaseName(), - fmt.Sprint(paramtable.GetNodeID()), - fmt.Sprint(s.Collection()), - fmt.Sprint(s.Partition()), - s.Type().String(), - strconv.FormatInt(int64(len(s.Indexes())), 10), - ).Add(float64(numOfRow)) - s.rowNum.Store(-1) s.memSize.Store(-1) return nil From cb99e3db34d7e22362df08a76bc7178b9a78d242 Mon Sep 17 00:00:00 2001 From: SimFG Date: Mon, 27 May 2024 10:31:41 +0800 Subject: [PATCH 071/126] enhance: add the includeCurrentMsg param for the Seek method (#33326) /kind improvement - issue: #33325 Signed-off-by: SimFG --- .../flow_graph_dmstream_input_node_test.go | 2 +- .../mqwrapper/rmq/rocksmq_msgstream_test.go | 8 ++-- internal/proxy/mock_test.go | 2 +- .../querynodev2/delegator/delegator_data.go | 2 +- .../delegator/delegator_data_test.go | 4 +- internal/querynodev2/services_test.go | 6 +-- internal/rootcoord/dml_channels_test.go | 6 ++- pkg/mq/msgdispatcher/dispatcher.go | 2 +- pkg/mq/msgstream/factory_stream_test.go | 2 +- pkg/mq/msgstream/mock_msgstream.go | 47 ++++++++++--------- pkg/mq/msgstream/mq_kafka_msgstream_test.go | 4 +- pkg/mq/msgstream/mq_msgstream.go | 8 ++-- pkg/mq/msgstream/mq_msgstream_test.go | 16 +++---- pkg/mq/msgstream/msgstream.go | 4 +- 14 files changed, 59 insertions(+), 54 deletions(-) diff --git a/internal/datanode/flow_graph_dmstream_input_node_test.go b/internal/datanode/flow_graph_dmstream_input_node_test.go index 75df57af0b49c..ae804fe19e5d2 100644 --- a/internal/datanode/flow_graph_dmstream_input_node_test.go +++ b/internal/datanode/flow_graph_dmstream_input_node_test.go @@ -91,7 +91,7 @@ func (mtm *mockTtMsgStream) Broadcast(*msgstream.MsgPack) (map[string][]msgstrea return nil, nil } -func (mtm *mockTtMsgStream) Seek(ctx context.Context, offset []*msgpb.MsgPosition) error { +func (mtm *mockTtMsgStream) Seek(ctx context.Context, msgPositions []*msgstream.MsgPosition, includeCurrentMsg bool) error { return nil } diff --git a/internal/mq/msgstream/mqwrapper/rmq/rocksmq_msgstream_test.go b/internal/mq/msgstream/mqwrapper/rmq/rocksmq_msgstream_test.go index e29171d1437a5..96be3628c9056 100644 --- a/internal/mq/msgstream/mqwrapper/rmq/rocksmq_msgstream_test.go +++ b/internal/mq/msgstream/mqwrapper/rmq/rocksmq_msgstream_test.go @@ -240,7 +240,7 @@ func TestMqMsgStream_SeekNotSubscribed(t *testing.T) { ChannelName: "b", }, } - err = m.Seek(context.Background(), p) + err = m.Seek(context.Background(), p, false) assert.Error(t, err) } @@ -403,7 +403,7 @@ func TestStream_RmqTtMsgStream_DuplicatedIDs(t *testing.T) { outputStream, _ = msgstream.NewMqTtMsgStream(context.Background(), 100, 100, rmqClient, factory.NewUnmarshalDispatcher()) consumerSubName = funcutil.RandomString(8) outputStream.AsConsumer(ctx, consumerChannels, consumerSubName, mqwrapper.SubscriptionPositionUnknown) - outputStream.Seek(ctx, receivedMsg.StartPositions) + outputStream.Seek(ctx, receivedMsg.StartPositions, false) seekMsg := consumer(ctx, outputStream) assert.Equal(t, len(seekMsg.Msgs), 1+2) assert.EqualValues(t, seekMsg.Msgs[0].BeginTs(), 1) @@ -506,7 +506,7 @@ func TestStream_RmqTtMsgStream_Seek(t *testing.T) { consumerSubName = funcutil.RandomString(8) outputStream.AsConsumer(ctx, consumerChannels, consumerSubName, mqwrapper.SubscriptionPositionUnknown) - outputStream.Seek(ctx, receivedMsg3.StartPositions) + outputStream.Seek(ctx, receivedMsg3.StartPositions, false) seekMsg := consumer(ctx, outputStream) assert.Equal(t, len(seekMsg.Msgs), 3) result := []uint64{14, 12, 13} @@ -565,7 +565,7 @@ func TestStream_RMqMsgStream_SeekInvalidMessage(t *testing.T) { }, } - err = outputStream2.Seek(ctx, p) + err = outputStream2.Seek(ctx, p, false) assert.NoError(t, err) for i := 10; i < 20; i++ { diff --git a/internal/proxy/mock_test.go b/internal/proxy/mock_test.go index 5675b100fa54d..96ee30669a0fe 100644 --- a/internal/proxy/mock_test.go +++ b/internal/proxy/mock_test.go @@ -298,7 +298,7 @@ func (ms *simpleMockMsgStream) GetProduceChannels() []string { return nil } -func (ms *simpleMockMsgStream) Seek(ctx context.Context, offset []*msgstream.MsgPosition) error { +func (ms *simpleMockMsgStream) Seek(ctx context.Context, msgPositions []*msgstream.MsgPosition, includeCurrentMsg bool) error { return nil } diff --git a/internal/querynodev2/delegator/delegator_data.go b/internal/querynodev2/delegator/delegator_data.go index 6055d8ec731ea..3990e63ba14b1 100644 --- a/internal/querynodev2/delegator/delegator_data.go +++ b/internal/querynodev2/delegator/delegator_data.go @@ -698,7 +698,7 @@ func (sd *shardDelegator) readDeleteFromMsgstream(ctx context.Context, position } ts = time.Now() - err = stream.Seek(context.TODO(), []*msgpb.MsgPosition{position}) + err = stream.Seek(context.TODO(), []*msgpb.MsgPosition{position}, false) if err != nil { return nil, err } diff --git a/internal/querynodev2/delegator/delegator_data_test.go b/internal/querynodev2/delegator/delegator_data_test.go index 47a284afd4c89..50665425aa8af 100644 --- a/internal/querynodev2/delegator/delegator_data_test.go +++ b/internal/querynodev2/delegator/delegator_data_test.go @@ -725,7 +725,7 @@ func (s *DelegatorDataSuite) TestLoadSegments() { }, 10) s.mq.EXPECT().AsConsumer(mock.Anything, mock.Anything, mock.Anything, mock.Anything).Return(nil) - s.mq.EXPECT().Seek(mock.Anything, mock.Anything).Return(nil) + s.mq.EXPECT().Seek(mock.Anything, mock.Anything, mock.Anything).Return(nil) s.mq.EXPECT().Close() ch := make(chan *msgstream.MsgPack, 10) close(ch) @@ -1173,7 +1173,7 @@ func (s *DelegatorDataSuite) TestReadDeleteFromMsgstream() { defer cancel() s.mq.EXPECT().AsConsumer(mock.Anything, mock.Anything, mock.Anything, mock.Anything).Return(nil) - s.mq.EXPECT().Seek(mock.Anything, mock.Anything).Return(nil) + s.mq.EXPECT().Seek(mock.Anything, mock.Anything, mock.Anything).Return(nil) s.mq.EXPECT().Close() ch := make(chan *msgstream.MsgPack, 10) s.mq.EXPECT().Chan().Return(ch) diff --git a/internal/querynodev2/services_test.go b/internal/querynodev2/services_test.go index d636c0dd43863..dcaf420b8cbe5 100644 --- a/internal/querynodev2/services_test.go +++ b/internal/querynodev2/services_test.go @@ -306,7 +306,7 @@ func (suite *ServiceSuite) TestWatchDmChannelsInt64() { // mocks suite.factory.EXPECT().NewTtMsgStream(mock.Anything).Return(suite.msgStream, nil) suite.msgStream.EXPECT().AsConsumer(mock.Anything, []string{suite.pchannel}, mock.Anything, mock.Anything).Return(nil) - suite.msgStream.EXPECT().Seek(mock.Anything, mock.Anything).Return(nil) + suite.msgStream.EXPECT().Seek(mock.Anything, mock.Anything, mock.Anything).Return(nil) suite.msgStream.EXPECT().Chan().Return(suite.msgChan) suite.msgStream.EXPECT().Close() @@ -358,7 +358,7 @@ func (suite *ServiceSuite) TestWatchDmChannelsVarchar() { // mocks suite.factory.EXPECT().NewTtMsgStream(mock.Anything).Return(suite.msgStream, nil) suite.msgStream.EXPECT().AsConsumer(mock.Anything, []string{suite.pchannel}, mock.Anything, mock.Anything).Return(nil) - suite.msgStream.EXPECT().Seek(mock.Anything, mock.Anything).Return(nil) + suite.msgStream.EXPECT().Seek(mock.Anything, mock.Anything, mock.Anything).Return(nil) suite.msgStream.EXPECT().Chan().Return(suite.msgChan) suite.msgStream.EXPECT().Close() @@ -432,7 +432,7 @@ func (suite *ServiceSuite) TestWatchDmChannels_Failed() { suite.factory.EXPECT().NewTtMsgStream(mock.Anything).Return(suite.msgStream, nil) suite.msgStream.EXPECT().AsConsumer(mock.Anything, []string{suite.pchannel}, mock.Anything, mock.Anything).Return(nil) suite.msgStream.EXPECT().Close().Return() - suite.msgStream.EXPECT().Seek(mock.Anything, mock.Anything).Return(errors.New("mock error")).Once() + suite.msgStream.EXPECT().Seek(mock.Anything, mock.Anything, mock.Anything).Return(errors.New("mock error")).Once() status, err = suite.node.WatchDmChannels(ctx, req) suite.NoError(err) diff --git a/internal/rootcoord/dml_channels_test.go b/internal/rootcoord/dml_channels_test.go index db61ff1327db9..e27117b0268aa 100644 --- a/internal/rootcoord/dml_channels_test.go +++ b/internal/rootcoord/dml_channels_test.go @@ -293,8 +293,10 @@ func (ms *FailMsgStream) Broadcast(*msgstream.MsgPack) (map[string][]msgstream.M } return nil, nil } -func (ms *FailMsgStream) Consume() *msgstream.MsgPack { return nil } -func (ms *FailMsgStream) Seek(ctx context.Context, offset []*msgstream.MsgPosition) error { return nil } +func (ms *FailMsgStream) Consume() *msgstream.MsgPack { return nil } +func (ms *FailMsgStream) Seek(ctx context.Context, msgPositions []*msgstream.MsgPosition, includeCurrentMsg bool) error { + return nil +} func (ms *FailMsgStream) GetLatestMsgID(channel string) (msgstream.MessageID, error) { return nil, nil diff --git a/pkg/mq/msgdispatcher/dispatcher.go b/pkg/mq/msgdispatcher/dispatcher.go index ee552046ddc08..4d0ab3e2c606e 100644 --- a/pkg/mq/msgdispatcher/dispatcher.go +++ b/pkg/mq/msgdispatcher/dispatcher.go @@ -103,7 +103,7 @@ func NewDispatcher(ctx context.Context, return nil, err } - err = stream.Seek(ctx, []*Pos{position}) + err = stream.Seek(ctx, []*Pos{position}, false) if err != nil { stream.Close() log.Error("seek failed", zap.Error(err)) diff --git a/pkg/mq/msgstream/factory_stream_test.go b/pkg/mq/msgstream/factory_stream_test.go index cb7ff8702cd08..d07e74cdfc0f1 100644 --- a/pkg/mq/msgstream/factory_stream_test.go +++ b/pkg/mq/msgstream/factory_stream_test.go @@ -766,7 +766,7 @@ func createAndSeekConsumer(ctx context.Context, t *testing.T, newer streamNewer, consumer, err := newer(ctx) assert.NoError(t, err) consumer.AsConsumer(context.Background(), channels, funcutil.RandomString(8), mqwrapper.SubscriptionPositionUnknown) - err = consumer.Seek(context.Background(), seekPositions) + err = consumer.Seek(context.Background(), seekPositions, false) assert.NoError(t, err) return consumer } diff --git a/pkg/mq/msgstream/mock_msgstream.go b/pkg/mq/msgstream/mock_msgstream.go index e97b0e30d91a5..adbf233246bf3 100644 --- a/pkg/mq/msgstream/mock_msgstream.go +++ b/pkg/mq/msgstream/mock_msgstream.go @@ -44,10 +44,10 @@ type MockMsgStream_AsConsumer_Call struct { } // AsConsumer is a helper method to define mock.On call -// - ctx context.Context -// - channels []string -// - subName string -// - position mqwrapper.SubscriptionInitialPosition +// - ctx context.Context +// - channels []string +// - subName string +// - position mqwrapper.SubscriptionInitialPosition func (_e *MockMsgStream_Expecter) AsConsumer(ctx interface{}, channels interface{}, subName interface{}, position interface{}) *MockMsgStream_AsConsumer_Call { return &MockMsgStream_AsConsumer_Call{Call: _e.mock.On("AsConsumer", ctx, channels, subName, position)} } @@ -80,7 +80,7 @@ type MockMsgStream_AsProducer_Call struct { } // AsProducer is a helper method to define mock.On call -// - channels []string +// - channels []string func (_e *MockMsgStream_Expecter) AsProducer(channels interface{}) *MockMsgStream_AsProducer_Call { return &MockMsgStream_AsProducer_Call{Call: _e.mock.On("AsProducer", channels)} } @@ -134,7 +134,7 @@ type MockMsgStream_Broadcast_Call struct { } // Broadcast is a helper method to define mock.On call -// - _a0 *MsgPack +// - _a0 *MsgPack func (_e *MockMsgStream_Expecter) Broadcast(_a0 interface{}) *MockMsgStream_Broadcast_Call { return &MockMsgStream_Broadcast_Call{Call: _e.mock.On("Broadcast", _a0)} } @@ -219,7 +219,7 @@ type MockMsgStream_CheckTopicValid_Call struct { } // CheckTopicValid is a helper method to define mock.On call -// - channel string +// - channel string func (_e *MockMsgStream_Expecter) CheckTopicValid(channel interface{}) *MockMsgStream_CheckTopicValid_Call { return &MockMsgStream_CheckTopicValid_Call{Call: _e.mock.On("CheckTopicValid", channel)} } @@ -284,7 +284,7 @@ type MockMsgStream_EnableProduce_Call struct { } // EnableProduce is a helper method to define mock.On call -// - can bool +// - can bool func (_e *MockMsgStream_Expecter) EnableProduce(can interface{}) *MockMsgStream_EnableProduce_Call { return &MockMsgStream_EnableProduce_Call{Call: _e.mock.On("EnableProduce", can)} } @@ -338,7 +338,7 @@ type MockMsgStream_GetLatestMsgID_Call struct { } // GetLatestMsgID is a helper method to define mock.On call -// - channel string +// - channel string func (_e *MockMsgStream_Expecter) GetLatestMsgID(channel interface{}) *MockMsgStream_GetLatestMsgID_Call { return &MockMsgStream_GetLatestMsgID_Call{Call: _e.mock.On("GetLatestMsgID", channel)} } @@ -423,7 +423,7 @@ type MockMsgStream_Produce_Call struct { } // Produce is a helper method to define mock.On call -// - _a0 *MsgPack +// - _a0 *MsgPack func (_e *MockMsgStream_Expecter) Produce(_a0 interface{}) *MockMsgStream_Produce_Call { return &MockMsgStream_Produce_Call{Call: _e.mock.On("Produce", _a0)} } @@ -445,13 +445,13 @@ func (_c *MockMsgStream_Produce_Call) RunAndReturn(run func(*MsgPack) error) *Mo return _c } -// Seek provides a mock function with given fields: ctx, offset -func (_m *MockMsgStream) Seek(ctx context.Context, offset []*msgpb.MsgPosition) error { - ret := _m.Called(ctx, offset) +// Seek provides a mock function with given fields: ctx, msgPositions, includeCurrentMsg +func (_m *MockMsgStream) Seek(ctx context.Context, msgPositions []*msgpb.MsgPosition, includeCurrentMsg bool) error { + ret := _m.Called(ctx, msgPositions, includeCurrentMsg) var r0 error - if rf, ok := ret.Get(0).(func(context.Context, []*msgpb.MsgPosition) error); ok { - r0 = rf(ctx, offset) + if rf, ok := ret.Get(0).(func(context.Context, []*msgpb.MsgPosition, bool) error); ok { + r0 = rf(ctx, msgPositions, includeCurrentMsg) } else { r0 = ret.Error(0) } @@ -465,15 +465,16 @@ type MockMsgStream_Seek_Call struct { } // Seek is a helper method to define mock.On call -// - ctx context.Context -// - offset []*msgpb.MsgPosition -func (_e *MockMsgStream_Expecter) Seek(ctx interface{}, offset interface{}) *MockMsgStream_Seek_Call { - return &MockMsgStream_Seek_Call{Call: _e.mock.On("Seek", ctx, offset)} +// - ctx context.Context +// - msgPositions []*msgpb.MsgPosition +// - includeCurrentMsg bool +func (_e *MockMsgStream_Expecter) Seek(ctx interface{}, msgPositions interface{}, includeCurrentMsg interface{}) *MockMsgStream_Seek_Call { + return &MockMsgStream_Seek_Call{Call: _e.mock.On("Seek", ctx, msgPositions, includeCurrentMsg)} } -func (_c *MockMsgStream_Seek_Call) Run(run func(ctx context.Context, offset []*msgpb.MsgPosition)) *MockMsgStream_Seek_Call { +func (_c *MockMsgStream_Seek_Call) Run(run func(ctx context.Context, msgPositions []*msgpb.MsgPosition, includeCurrentMsg bool)) *MockMsgStream_Seek_Call { _c.Call.Run(func(args mock.Arguments) { - run(args[0].(context.Context), args[1].([]*msgpb.MsgPosition)) + run(args[0].(context.Context), args[1].([]*msgpb.MsgPosition), args[2].(bool)) }) return _c } @@ -483,7 +484,7 @@ func (_c *MockMsgStream_Seek_Call) Return(_a0 error) *MockMsgStream_Seek_Call { return _c } -func (_c *MockMsgStream_Seek_Call) RunAndReturn(run func(context.Context, []*msgpb.MsgPosition) error) *MockMsgStream_Seek_Call { +func (_c *MockMsgStream_Seek_Call) RunAndReturn(run func(context.Context, []*msgpb.MsgPosition, bool) error) *MockMsgStream_Seek_Call { _c.Call.Return(run) return _c } @@ -499,7 +500,7 @@ type MockMsgStream_SetRepackFunc_Call struct { } // SetRepackFunc is a helper method to define mock.On call -// - repackFunc RepackFunc +// - repackFunc RepackFunc func (_e *MockMsgStream_Expecter) SetRepackFunc(repackFunc interface{}) *MockMsgStream_SetRepackFunc_Call { return &MockMsgStream_SetRepackFunc_Call{Call: _e.mock.On("SetRepackFunc", repackFunc)} } diff --git a/pkg/mq/msgstream/mq_kafka_msgstream_test.go b/pkg/mq/msgstream/mq_kafka_msgstream_test.go index 468d4e054a96f..fe39f8f082e2d 100644 --- a/pkg/mq/msgstream/mq_kafka_msgstream_test.go +++ b/pkg/mq/msgstream/mq_kafka_msgstream_test.go @@ -145,7 +145,7 @@ func TestStream_KafkaMsgStream_SeekToLast(t *testing.T) { defer outputStream2.Close() assert.NoError(t, err) - err = outputStream2.Seek(ctx, []*msgpb.MsgPosition{seekPosition}) + err = outputStream2.Seek(ctx, []*msgpb.MsgPosition{seekPosition}, false) assert.NoError(t, err) cnt := 0 @@ -482,6 +482,6 @@ func getKafkaTtOutputStreamAndSeek(ctx context.Context, kafkaAddress string, pos consumerName = append(consumerName, c.ChannelName) } outputStream.AsConsumer(context.Background(), consumerName, funcutil.RandomString(8), mqwrapper.SubscriptionPositionUnknown) - outputStream.Seek(context.Background(), positions) + outputStream.Seek(context.Background(), positions, false) return outputStream } diff --git a/pkg/mq/msgstream/mq_msgstream.go b/pkg/mq/msgstream/mq_msgstream.go index 86ad3f7dfe578..a93c9962f414d 100644 --- a/pkg/mq/msgstream/mq_msgstream.go +++ b/pkg/mq/msgstream/mq_msgstream.go @@ -473,7 +473,7 @@ func (ms *mqMsgStream) Chan() <-chan *MsgPack { // Seek reset the subscription associated with this consumer to a specific position, the seek position is exclusive // User has to ensure mq_msgstream is not closed before seek, and the seek position is already written. -func (ms *mqMsgStream) Seek(ctx context.Context, msgPositions []*msgpb.MsgPosition) error { +func (ms *mqMsgStream) Seek(ctx context.Context, msgPositions []*MsgPosition, includeCurrentMsg bool) error { for _, mp := range msgPositions { consumer, ok := ms.consumers[mp.ChannelName] if !ok { @@ -493,8 +493,8 @@ func (ms *mqMsgStream) Seek(ctx context.Context, msgPositions []*msgpb.MsgPositi } } - log.Info("MsgStream seek begin", zap.String("channel", mp.ChannelName), zap.Any("MessageID", mp.MsgID)) - err = consumer.Seek(messageID, false) + log.Info("MsgStream seek begin", zap.String("channel", mp.ChannelName), zap.Any("MessageID", mp.MsgID), zap.Bool("includeCurrentMsg", includeCurrentMsg)) + err = consumer.Seek(messageID, includeCurrentMsg) if err != nil { log.Warn("Failed to seek", zap.String("channel", mp.ChannelName), zap.Error(err)) return err @@ -840,7 +840,7 @@ func (ms *MqTtMsgStream) allChanReachSameTtMsg(chanTtMsgSync map[mqwrapper.Consu } // Seek to the specified position -func (ms *MqTtMsgStream) Seek(ctx context.Context, msgPositions []*msgpb.MsgPosition) error { +func (ms *MqTtMsgStream) Seek(ctx context.Context, msgPositions []*MsgPosition, includeCurrentMsg bool) error { var consumer mqwrapper.Consumer var mp *MsgPosition var err error diff --git a/pkg/mq/msgstream/mq_msgstream_test.go b/pkg/mq/msgstream/mq_msgstream_test.go index 8705eddf13499..ee4a5d57ffacc 100644 --- a/pkg/mq/msgstream/mq_msgstream_test.go +++ b/pkg/mq/msgstream/mq_msgstream_test.go @@ -517,7 +517,7 @@ func TestStream_PulsarMsgStream_SeekToLast(t *testing.T) { defer outputStream2.Close() assert.NoError(t, err) - err = outputStream2.Seek(ctx, []*msgpb.MsgPosition{seekPosition}) + err = outputStream2.Seek(ctx, []*msgpb.MsgPosition{seekPosition}, false) assert.NoError(t, err) cnt := 0 @@ -946,7 +946,7 @@ func TestStream_MqMsgStream_Seek(t *testing.T) { pulsarClient, _ := pulsarwrapper.NewClient(DefaultPulsarTenant, DefaultPulsarNamespace, pulsar.ClientOptions{URL: pulsarAddress}) outputStream2, _ := NewMqMsgStream(ctx, 100, 100, pulsarClient, factory.NewUnmarshalDispatcher()) outputStream2.AsConsumer(ctx, consumerChannels, consumerSubName, mqwrapper.SubscriptionPositionEarliest) - outputStream2.Seek(ctx, []*msgpb.MsgPosition{seekPosition}) + outputStream2.Seek(ctx, []*msgpb.MsgPosition{seekPosition}, false) for i := 6; i < 10; i++ { result := consumer(ctx, outputStream2) @@ -1001,7 +1001,7 @@ func TestStream_MqMsgStream_SeekInvalidMessage(t *testing.T) { }, } - err = outputStream2.Seek(ctx, p) + err = outputStream2.Seek(ctx, p, false) assert.NoError(t, err) for i := 10; i < 20; i++ { @@ -1070,15 +1070,15 @@ func TestSTream_MqMsgStream_SeekBadMessageID(t *testing.T) { } paramtable.Get().Save(paramtable.Get().MQCfg.IgnoreBadPosition.Key, "false") - err = outputStream2.Seek(ctx, p) + err = outputStream2.Seek(ctx, p, false) assert.Error(t, err) - err = outputStream3.Seek(ctx, p) + err = outputStream3.Seek(ctx, p, false) assert.Error(t, err) paramtable.Get().Save(paramtable.Get().MQCfg.IgnoreBadPosition.Key, "true") - err = outputStream2.Seek(ctx, p) + err = outputStream2.Seek(ctx, p, false) assert.NoError(t, err) - err = outputStream3.Seek(ctx, p) + err = outputStream3.Seek(ctx, p, false) assert.NoError(t, err) } @@ -1466,7 +1466,7 @@ func getPulsarTtOutputStreamAndSeek(ctx context.Context, pulsarAddress string, p consumerName = append(consumerName, c.ChannelName) } outputStream.AsConsumer(context.Background(), consumerName, funcutil.RandomString(8), mqwrapper.SubscriptionPositionUnknown) - outputStream.Seek(context.Background(), positions) + outputStream.Seek(context.Background(), positions, false) return outputStream } diff --git a/pkg/mq/msgstream/msgstream.go b/pkg/mq/msgstream/msgstream.go index 184d44967d098..62f8c8737e026 100644 --- a/pkg/mq/msgstream/msgstream.go +++ b/pkg/mq/msgstream/msgstream.go @@ -63,7 +63,9 @@ type MsgStream interface { AsConsumer(ctx context.Context, channels []string, subName string, position mqwrapper.SubscriptionInitialPosition) error Chan() <-chan *MsgPack - Seek(ctx context.Context, offset []*MsgPosition) error + // Seek consume message from the specified position + // includeCurrentMsg indicates whether to consume the current message, and in the milvus system, it should be always false + Seek(ctx context.Context, msgPositions []*MsgPosition, includeCurrentMsg bool) error GetLatestMsgID(channel string) (MessageID, error) CheckTopicValid(channel string) error From ee73e622025ac258d62f2164609e8bca743a0e89 Mon Sep 17 00:00:00 2001 From: PowderLi <135960789+PowderLi@users.noreply.github.com> Date: Mon, 27 May 2024 14:25:41 +0800 Subject: [PATCH 072/126] fix: [restful v2] search result be cut while nq > 1 (#33339) issue: #33099 #32837 #32419 1. len(search result) may be nq * topk, we need return all rather than topk 2. the in restful response payload keep the same with milvus error code Signed-off-by: PowderLi --- .../proxy/httpserver/handler_v2.go | 44 ++--- .../proxy/httpserver/handler_v2_test.go | 126 +++++++++--- .../proxy/httpserver/request_v2.go | 15 +- .../distributed/proxy/httpserver/utils.go | 2 +- tests/restful_client_v2/api/milvus.py | 4 +- tests/restful_client_v2/base/testbase.py | 6 +- .../testcases/test_alias_operation.py | 2 +- .../testcases/test_collection_operations.py | 88 ++++----- .../testcases/test_index_operation.py | 14 +- .../testcases/test_jobs_operation.py | 20 +- .../testcases/test_partition_operation.py | 28 +-- .../test_restful_sdk_mix_use_scenario.py | 12 +- .../testcases/test_role_operation.py | 4 +- .../testcases/test_user_operation.py | 6 +- .../testcases/test_vector_operations.py | 186 +++++++++--------- 15 files changed, 316 insertions(+), 241 deletions(-) diff --git a/internal/distributed/proxy/httpserver/handler_v2.go b/internal/distributed/proxy/httpserver/handler_v2.go index dc457e9b47e88..294db27a9195d 100644 --- a/internal/distributed/proxy/httpserver/handler_v2.go +++ b/internal/distributed/proxy/httpserver/handler_v2.go @@ -408,7 +408,7 @@ func (h *HandlersV2) getCollectionDetails(ctx context.Context, c *gin.Context, a if coll.Properties == nil { coll.Properties = []*commonpb.KeyValuePair{} } - c.JSON(http.StatusOK, gin.H{HTTPReturnCode: http.StatusOK, HTTPReturnData: gin.H{ + c.JSON(http.StatusOK, gin.H{HTTPReturnCode: commonpb.ErrorCode_Success, HTTPReturnData: gin.H{ HTTPCollectionName: coll.CollectionName, HTTPCollectionID: coll.CollectionID, HTTPReturnDescription: coll.Schema.Description, @@ -458,7 +458,7 @@ func (h *HandlersV2) getCollectionLoadState(ctx context.Context, c *gin.Context, c.JSON(http.StatusOK, gin.H{HTTPReturnCode: merr.Code(err), HTTPReturnMessage: err.Error()}) return resp, err } else if resp.(*milvuspb.GetLoadStateResponse).State == commonpb.LoadState_LoadStateNotLoad { - c.JSON(http.StatusOK, gin.H{HTTPReturnCode: http.StatusOK, HTTPReturnData: gin.H{ + c.JSON(http.StatusOK, gin.H{HTTPReturnCode: commonpb.ErrorCode_Success, HTTPReturnData: gin.H{ HTTPReturnLoadState: resp.(*milvuspb.GetLoadStateResponse).State.String(), }}) return resp, err @@ -483,7 +483,7 @@ func (h *HandlersV2) getCollectionLoadState(ctx context.Context, c *gin.Context, if progress >= 100 { state = commonpb.LoadState_LoadStateLoaded.String() } - c.JSON(http.StatusOK, gin.H{HTTPReturnCode: http.StatusOK, HTTPReturnData: gin.H{ + c.JSON(http.StatusOK, gin.H{HTTPReturnCode: commonpb.ErrorCode_Success, HTTPReturnData: gin.H{ HTTPReturnLoadState: state, HTTPReturnLoadProgress: progress, }, HTTPReturnMessage: errMessage}) @@ -587,7 +587,7 @@ func (h *HandlersV2) query(ctx context.Context, c *gin.Context, anyReq any, dbNa }) } else { c.JSON(http.StatusOK, gin.H{ - HTTPReturnCode: http.StatusOK, + HTTPReturnCode: commonpb.ErrorCode_Success, HTTPReturnData: outputData, HTTPReturnCost: proxy.GetCostValue(queryResp.GetStatus()), }) @@ -634,7 +634,7 @@ func (h *HandlersV2) get(ctx context.Context, c *gin.Context, anyReq any, dbName }) } else { c.JSON(http.StatusOK, gin.H{ - HTTPReturnCode: http.StatusOK, + HTTPReturnCode: commonpb.ErrorCode_Success, HTTPReturnData: outputData, HTTPReturnCost: proxy.GetCostValue(queryResp.GetStatus()), }) @@ -721,20 +721,20 @@ func (h *HandlersV2) insert(ctx context.Context, c *gin.Context, anyReq any, dbN allowJS, _ := strconv.ParseBool(c.Request.Header.Get(HTTPHeaderAllowInt64)) if allowJS { c.JSON(http.StatusOK, gin.H{ - HTTPReturnCode: http.StatusOK, + HTTPReturnCode: commonpb.ErrorCode_Success, HTTPReturnData: gin.H{"insertCount": insertResp.InsertCnt, "insertIds": insertResp.IDs.IdField.(*schemapb.IDs_IntId).IntId.Data}, HTTPReturnCost: cost, }) } else { c.JSON(http.StatusOK, gin.H{ - HTTPReturnCode: http.StatusOK, + HTTPReturnCode: commonpb.ErrorCode_Success, HTTPReturnData: gin.H{"insertCount": insertResp.InsertCnt, "insertIds": formatInt64(insertResp.IDs.IdField.(*schemapb.IDs_IntId).IntId.Data)}, HTTPReturnCost: cost, }) } case *schemapb.IDs_StrId: c.JSON(http.StatusOK, gin.H{ - HTTPReturnCode: http.StatusOK, + HTTPReturnCode: commonpb.ErrorCode_Success, HTTPReturnData: gin.H{"insertCount": insertResp.InsertCnt, "insertIds": insertResp.IDs.IdField.(*schemapb.IDs_StrId).StrId.Data}, HTTPReturnCost: cost, }) @@ -796,20 +796,20 @@ func (h *HandlersV2) upsert(ctx context.Context, c *gin.Context, anyReq any, dbN allowJS, _ := strconv.ParseBool(c.Request.Header.Get(HTTPHeaderAllowInt64)) if allowJS { c.JSON(http.StatusOK, gin.H{ - HTTPReturnCode: http.StatusOK, + HTTPReturnCode: commonpb.ErrorCode_Success, HTTPReturnData: gin.H{"upsertCount": upsertResp.UpsertCnt, "upsertIds": upsertResp.IDs.IdField.(*schemapb.IDs_IntId).IntId.Data}, HTTPReturnCost: cost, }) } else { c.JSON(http.StatusOK, gin.H{ - HTTPReturnCode: http.StatusOK, + HTTPReturnCode: commonpb.ErrorCode_Success, HTTPReturnData: gin.H{"upsertCount": upsertResp.UpsertCnt, "upsertIds": formatInt64(upsertResp.IDs.IdField.(*schemapb.IDs_IntId).IntId.Data)}, HTTPReturnCost: cost, }) } case *schemapb.IDs_StrId: c.JSON(http.StatusOK, gin.H{ - HTTPReturnCode: http.StatusOK, + HTTPReturnCode: commonpb.ErrorCode_Success, HTTPReturnData: gin.H{"upsertCount": upsertResp.UpsertCnt, "upsertIds": upsertResp.IDs.IdField.(*schemapb.IDs_StrId).StrId.Data}, HTTPReturnCost: cost, }) @@ -935,10 +935,10 @@ func (h *HandlersV2) search(ctx context.Context, c *gin.Context, anyReq any, dbN searchResp := resp.(*milvuspb.SearchResults) cost := proxy.GetCostValue(searchResp.GetStatus()) if searchResp.Results.TopK == int64(0) { - c.JSON(http.StatusOK, gin.H{HTTPReturnCode: http.StatusOK, HTTPReturnData: []interface{}{}, HTTPReturnCost: cost}) + c.JSON(http.StatusOK, gin.H{HTTPReturnCode: commonpb.ErrorCode_Success, HTTPReturnData: []interface{}{}, HTTPReturnCost: cost}) } else { allowJS, _ := strconv.ParseBool(c.Request.Header.Get(HTTPHeaderAllowInt64)) - outputData, err := buildQueryResp(searchResp.Results.TopK, searchResp.Results.OutputFields, searchResp.Results.FieldsData, searchResp.Results.Ids, searchResp.Results.Scores, allowJS) + outputData, err := buildQueryResp(0, searchResp.Results.OutputFields, searchResp.Results.FieldsData, searchResp.Results.Ids, searchResp.Results.Scores, allowJS) if err != nil { log.Ctx(ctx).Warn("high level restful api, fail to deal with search result", zap.Any("result", searchResp.Results), zap.Error(err)) c.JSON(http.StatusOK, gin.H{ @@ -946,7 +946,7 @@ func (h *HandlersV2) search(ctx context.Context, c *gin.Context, anyReq any, dbN HTTPReturnMessage: merr.ErrInvalidSearchResult.Error() + ", error: " + err.Error(), }) } else { - c.JSON(http.StatusOK, gin.H{HTTPReturnCode: http.StatusOK, HTTPReturnData: outputData, HTTPReturnCost: cost}) + c.JSON(http.StatusOK, gin.H{HTTPReturnCode: commonpb.ErrorCode_Success, HTTPReturnData: outputData, HTTPReturnCost: cost}) } } } @@ -1013,7 +1013,7 @@ func (h *HandlersV2) advancedSearch(ctx context.Context, c *gin.Context, anyReq searchResp := resp.(*milvuspb.SearchResults) cost := proxy.GetCostValue(searchResp.GetStatus()) if searchResp.Results.TopK == int64(0) { - c.JSON(http.StatusOK, gin.H{HTTPReturnCode: http.StatusOK, HTTPReturnData: []interface{}{}, HTTPReturnCost: cost}) + c.JSON(http.StatusOK, gin.H{HTTPReturnCode: commonpb.ErrorCode_Success, HTTPReturnData: []interface{}{}, HTTPReturnCost: cost}) } else { allowJS, _ := strconv.ParseBool(c.Request.Header.Get(HTTPHeaderAllowInt64)) outputData, err := buildQueryResp(0, searchResp.Results.OutputFields, searchResp.Results.FieldsData, searchResp.Results.Ids, searchResp.Results.Scores, allowJS) @@ -1024,7 +1024,7 @@ func (h *HandlersV2) advancedSearch(ctx context.Context, c *gin.Context, anyReq HTTPReturnMessage: merr.ErrInvalidSearchResult.Error() + ", error: " + err.Error(), }) } else { - c.JSON(http.StatusOK, gin.H{HTTPReturnCode: http.StatusOK, HTTPReturnData: outputData, HTTPReturnCost: cost}) + c.JSON(http.StatusOK, gin.H{HTTPReturnCode: commonpb.ErrorCode_Success, HTTPReturnData: outputData, HTTPReturnCost: cost}) } } } @@ -1534,7 +1534,7 @@ func (h *HandlersV2) describeRole(ctx context.Context, c *gin.Context, anyReq an } privileges = append(privileges, privilege) } - c.JSON(http.StatusOK, gin.H{HTTPReturnCode: http.StatusOK, HTTPReturnData: privileges}) + c.JSON(http.StatusOK, gin.H{HTTPReturnCode: commonpb.ErrorCode_Success, HTTPReturnData: privileges}) } return resp, err } @@ -1664,7 +1664,7 @@ func (h *HandlersV2) describeIndex(ctx context.Context, c *gin.Context, anyReq a } indexInfos = append(indexInfos, indexInfo) } - c.JSON(http.StatusOK, gin.H{HTTPReturnCode: http.StatusOK, HTTPReturnData: indexInfos}) + c.JSON(http.StatusOK, gin.H{HTTPReturnCode: commonpb.ErrorCode_Success, HTTPReturnData: indexInfos}) } return resp, err } @@ -1738,7 +1738,7 @@ func (h *HandlersV2) describeAlias(ctx context.Context, c *gin.Context, anyReq a }) if err == nil { response := resp.(*milvuspb.DescribeAliasResponse) - c.JSON(http.StatusOK, gin.H{HTTPReturnCode: http.StatusOK, HTTPReturnData: gin.H{ + c.JSON(http.StatusOK, gin.H{HTTPReturnCode: commonpb.ErrorCode_Success, HTTPReturnData: gin.H{ HTTPDbName: response.DbName, HTTPCollectionName: response.Collection, HTTPAliasName: response.Alias, @@ -1834,7 +1834,7 @@ func (h *HandlersV2) listImportJob(ctx context.Context, c *gin.Context, anyReq a records = append(records, jobDetail) } returnData["records"] = records - c.JSON(http.StatusOK, gin.H{HTTPReturnCode: http.StatusOK, HTTPReturnData: returnData}) + c.JSON(http.StatusOK, gin.H{HTTPReturnCode: commonpb.ErrorCode_Success, HTTPReturnData: returnData}) } return resp, err } @@ -1871,7 +1871,7 @@ func (h *HandlersV2) createImportJob(ctx context.Context, c *gin.Context, anyReq if err == nil { returnData := make(map[string]interface{}) returnData["jobId"] = resp.(*internalpb.ImportResponse).GetJobID() - c.JSON(http.StatusOK, gin.H{HTTPReturnCode: http.StatusOK, HTTPReturnData: returnData}) + c.JSON(http.StatusOK, gin.H{HTTPReturnCode: commonpb.ErrorCode_Success, HTTPReturnData: returnData}) } return resp, err } @@ -1927,7 +1927,7 @@ func (h *HandlersV2) getImportJobProcess(ctx context.Context, c *gin.Context, an } returnData["fileSize"] = totalFileSize returnData["details"] = details - c.JSON(http.StatusOK, gin.H{HTTPReturnCode: http.StatusOK, HTTPReturnData: returnData}) + c.JSON(http.StatusOK, gin.H{HTTPReturnCode: commonpb.ErrorCode_Success, HTTPReturnData: returnData}) } return resp, err } diff --git a/internal/distributed/proxy/httpserver/handler_v2_test.go b/internal/distributed/proxy/httpserver/handler_v2_test.go index ab6f71315e559..5a57e536b963c 100644 --- a/internal/distributed/proxy/httpserver/handler_v2_test.go +++ b/internal/distributed/proxy/httpserver/handler_v2_test.go @@ -647,11 +647,9 @@ func TestCreateCollection(t *testing.T) { returnBody := &ReturnErrMsg{} err := json.Unmarshal(w.Body.Bytes(), returnBody) assert.Nil(t, err) + assert.Equal(t, testcase.errCode, returnBody.Code) if testcase.errCode != 0 { - assert.Equal(t, testcase.errCode, returnBody.Code) assert.Equal(t, testcase.errMsg, returnBody.Message) - } else { - assert.Equal(t, int32(200), returnBody.Code) } }) } @@ -742,7 +740,15 @@ func TestMethodGet(t *testing.T) { }, nil).Twice() mp.EXPECT().DescribeCollection(mock.Anything, mock.Anything).Return(&milvuspb.DescribeCollectionResponse{Status: commonErrorStatus}, nil).Once() mp.EXPECT().GetLoadState(mock.Anything, mock.Anything).Return(&milvuspb.GetLoadStateResponse{Status: commonErrorStatus}, nil).Once() - mp.EXPECT().GetLoadState(mock.Anything, mock.Anything).Return(&DefaultLoadStateResp, nil).Times(3) + mp.EXPECT().GetLoadState(mock.Anything, mock.Anything).Return(&DefaultLoadStateResp, nil).Times(4) + mp.EXPECT().GetLoadState(mock.Anything, mock.Anything).Return(&milvuspb.GetLoadStateResponse{ + Status: &StatusSuccess, + State: commonpb.LoadState_LoadStateNotExist, + }, nil).Once() + mp.EXPECT().GetLoadState(mock.Anything, mock.Anything).Return(&milvuspb.GetLoadStateResponse{ + Status: &StatusSuccess, + State: commonpb.LoadState_LoadStateNotLoad, + }, nil).Once() mp.EXPECT().DescribeIndex(mock.Anything, mock.Anything).Return(&milvuspb.DescribeIndexResponse{Status: commonErrorStatus}, nil).Once() mp.EXPECT().DescribeIndex(mock.Anything, mock.Anything).Return(&DefaultDescIndexesReqp, nil).Times(3) mp.EXPECT().DescribeIndex(mock.Anything, mock.Anything).Return(nil, merr.WrapErrIndexNotFoundForCollection(DefaultCollectionName)).Once() @@ -765,6 +771,10 @@ func TestMethodGet(t *testing.T) { Status: commonSuccessStatus, Progress: int64(77), }, nil).Once() + mp.EXPECT().GetLoadingProgress(mock.Anything, mock.Anything).Return(&milvuspb.GetLoadingProgressResponse{ + Status: commonSuccessStatus, + Progress: int64(100), + }, nil).Once() mp.EXPECT().GetLoadingProgress(mock.Anything, mock.Anything).Return(&milvuspb.GetLoadingProgressResponse{Status: commonErrorStatus}, nil).Once() mp.EXPECT().ShowPartitions(mock.Anything, mock.Anything).Return(&milvuspb.ShowPartitionsResponse{ Status: &StatusSuccess, @@ -865,6 +875,17 @@ func TestMethodGet(t *testing.T) { queryTestCases = append(queryTestCases, rawTestCase{ path: versionalV2(CollectionCategory, LoadStateAction), }) + queryTestCases = append(queryTestCases, rawTestCase{ + path: versionalV2(CollectionCategory, LoadStateAction), + }) + queryTestCases = append(queryTestCases, rawTestCase{ + path: versionalV2(CollectionCategory, LoadStateAction), + errCode: 100, + errMsg: "collection not found[collection=book]", + }) + queryTestCases = append(queryTestCases, rawTestCase{ + path: versionalV2(CollectionCategory, LoadStateAction), + }) queryTestCases = append(queryTestCases, rawTestCase{ path: versionalV2(PartitionCategory, ListAction), }) @@ -906,7 +927,7 @@ func TestMethodGet(t *testing.T) { }) for _, testcase := range queryTestCases { - t.Run("query", func(t *testing.T) { + t.Run(testcase.path, func(t *testing.T) { bodyReader := bytes.NewReader([]byte(`{` + `"collectionName": "` + DefaultCollectionName + `",` + `"partitionName": "` + DefaultPartitionName + `",` + @@ -922,11 +943,9 @@ func TestMethodGet(t *testing.T) { returnBody := &ReturnErrMsg{} err := json.Unmarshal(w.Body.Bytes(), returnBody) assert.Nil(t, err) + assert.Equal(t, testcase.errCode, returnBody.Code) if testcase.errCode != 0 { - assert.Equal(t, testcase.errCode, returnBody.Code) assert.Equal(t, testcase.errMsg, returnBody.Message) - } else { - assert.Equal(t, int32(http.StatusOK), returnBody.Code) } fmt.Println(w.Body.String()) }) @@ -973,7 +992,7 @@ func TestMethodDelete(t *testing.T) { path: versionalV2(AliasCategory, DropAction), }) for _, testcase := range queryTestCases { - t.Run("query", func(t *testing.T) { + t.Run(testcase.path, func(t *testing.T) { bodyReader := bytes.NewReader([]byte(`{"collectionName": "` + DefaultCollectionName + `", "partitionName": "` + DefaultPartitionName + `", "userName": "` + util.UserRoot + `", "roleName": "` + util.RoleAdmin + `", "indexName": "` + DefaultIndexName + `", "aliasName": "` + DefaultAliasName + `"}`)) req := httptest.NewRequest(http.MethodPost, testcase.path, bodyReader) @@ -983,11 +1002,9 @@ func TestMethodDelete(t *testing.T) { returnBody := &ReturnErrMsg{} err := json.Unmarshal(w.Body.Bytes(), returnBody) assert.Nil(t, err) + assert.Equal(t, testcase.errCode, returnBody.Code) if testcase.errCode != 0 { - assert.Equal(t, testcase.errCode, returnBody.Code) assert.Equal(t, testcase.errMsg, returnBody.Message) - } else { - assert.Equal(t, int32(http.StatusOK), returnBody.Code) } fmt.Println(w.Body.String()) }) @@ -1104,7 +1121,7 @@ func TestMethodPost(t *testing.T) { }) for _, testcase := range queryTestCases { - t.Run("query", func(t *testing.T) { + t.Run(testcase.path, func(t *testing.T) { bodyReader := bytes.NewReader([]byte(`{` + `"collectionName": "` + DefaultCollectionName + `", "newCollectionName": "test", "newDbName": "",` + `"partitionName": "` + DefaultPartitionName + `", "partitionNames": ["` + DefaultPartitionName + `"],` + @@ -1123,11 +1140,9 @@ func TestMethodPost(t *testing.T) { returnBody := &ReturnErrMsg{} err := json.Unmarshal(w.Body.Bytes(), returnBody) assert.Nil(t, err) + assert.Equal(t, testcase.errCode, returnBody.Code) if testcase.errCode != 0 { - assert.Equal(t, testcase.errCode, returnBody.Code) assert.Equal(t, testcase.errMsg, returnBody.Message) - } else { - assert.Equal(t, int32(http.StatusOK), returnBody.Code) } fmt.Println(w.Body.String()) }) @@ -1222,7 +1237,7 @@ func TestDML(t *testing.T) { }) for _, testcase := range queryTestCases { - t.Run("query", func(t *testing.T) { + t.Run(testcase.path, func(t *testing.T) { bodyReader := bytes.NewReader(testcase.requestBody) req := httptest.NewRequest(http.MethodPost, versionalV2(EntityCategory, testcase.path), bodyReader) w := httptest.NewRecorder() @@ -1231,11 +1246,51 @@ func TestDML(t *testing.T) { returnBody := &ReturnErrMsg{} err := json.Unmarshal(w.Body.Bytes(), returnBody) assert.Nil(t, err) + assert.Equal(t, testcase.errCode, returnBody.Code) + if testcase.errCode != 0 { + assert.Equal(t, testcase.errMsg, returnBody.Message) + } + fmt.Println(w.Body.String()) + }) + } +} + +func TestAllowInt64(t *testing.T) { + paramtable.Init() + mp := mocks.NewMockProxy(t) + testEngine := initHTTPServerV2(mp, false) + queryTestCases := []requestBodyTestCase{} + queryTestCases = append(queryTestCases, requestBodyTestCase{ + path: InsertAction, + requestBody: []byte(`{"collectionName": "book", "data": [{"book_id": 0, "word_count": 0, "book_intro": [0.11825, 0.6]}]}`), + }) + queryTestCases = append(queryTestCases, requestBodyTestCase{ + path: UpsertAction, + requestBody: []byte(`{"collectionName": "book", "data": [{"book_id": 0, "word_count": 0, "book_intro": [0.11825, 0.6]}]}`), + }) + mp.EXPECT().DescribeCollection(mock.Anything, mock.Anything).Return(&milvuspb.DescribeCollectionResponse{ + CollectionName: DefaultCollectionName, + Schema: generateCollectionSchema(schemapb.DataType_Int64), + ShardsNum: ShardNumDefault, + Status: &StatusSuccess, + }, nil).Twice() + mp.EXPECT().Insert(mock.Anything, mock.Anything).Return(&milvuspb.MutationResult{Status: commonSuccessStatus, InsertCnt: int64(0), IDs: &schemapb.IDs{IdField: &schemapb.IDs_IntId{IntId: &schemapb.LongArray{Data: []int64{}}}}}, nil).Once() + mp.EXPECT().Upsert(mock.Anything, mock.Anything).Return(&milvuspb.MutationResult{Status: commonSuccessStatus, UpsertCnt: int64(0), IDs: &schemapb.IDs{IdField: &schemapb.IDs_IntId{IntId: &schemapb.LongArray{Data: []int64{}}}}}, nil).Once() + + for _, testcase := range queryTestCases { + t.Run(testcase.path, func(t *testing.T) { + bodyReader := bytes.NewReader(testcase.requestBody) + req := httptest.NewRequest(http.MethodPost, versionalV2(EntityCategory, testcase.path), bodyReader) + req.Header.Set(HTTPHeaderAllowInt64, "true") + w := httptest.NewRecorder() + testEngine.ServeHTTP(w, req) + assert.Equal(t, http.StatusOK, w.Code) + returnBody := &ReturnErrMsg{} + err := json.Unmarshal(w.Body.Bytes(), returnBody) + assert.Nil(t, err) + assert.Equal(t, testcase.errCode, returnBody.Code) if testcase.errCode != 0 { - assert.Equal(t, testcase.errCode, returnBody.Code) assert.Equal(t, testcase.errMsg, returnBody.Message) - } else { - assert.Equal(t, int32(http.StatusOK), returnBody.Code) } fmt.Println(w.Body.String()) }) @@ -1244,18 +1299,33 @@ func TestDML(t *testing.T) { func TestSearchV2(t *testing.T) { paramtable.Init() + outputFields := []string{FieldBookID, FieldWordCount, "author", "date"} mp := mocks.NewMockProxy(t) mp.EXPECT().DescribeCollection(mock.Anything, mock.Anything).Return(&milvuspb.DescribeCollectionResponse{ CollectionName: DefaultCollectionName, Schema: generateCollectionSchema(schemapb.DataType_Int64), ShardsNum: ShardNumDefault, Status: &StatusSuccess, - }, nil).Times(10) + }, nil).Times(12) + mp.EXPECT().Search(mock.Anything, mock.Anything).Return(&milvuspb.SearchResults{Status: commonSuccessStatus, Results: &schemapb.SearchResultData{ + TopK: int64(3), + OutputFields: outputFields, + FieldsData: generateFieldData(), + Ids: generateIDs(schemapb.DataType_Int64, 3), + Scores: DefaultScores, + }}, nil).Once() mp.EXPECT().Search(mock.Anything, mock.Anything).Return(&milvuspb.SearchResults{Status: commonSuccessStatus, Results: &schemapb.SearchResultData{TopK: int64(0)}}, nil).Times(3) mp.EXPECT().Search(mock.Anything, mock.Anything).Return(&milvuspb.SearchResults{Status: &commonpb.Status{ ErrorCode: 1700, // ErrFieldNotFound Reason: "groupBy field not found in schema: field not found[field=test]", }}, nil).Once() + mp.EXPECT().HybridSearch(mock.Anything, mock.Anything).Return(&milvuspb.SearchResults{Status: commonSuccessStatus, Results: &schemapb.SearchResultData{ + TopK: int64(3), + OutputFields: outputFields, + FieldsData: generateFieldData(), + Ids: generateIDs(schemapb.DataType_Int64, 3), + Scores: DefaultScores, + }}, nil).Once() mp.EXPECT().HybridSearch(mock.Anything, mock.Anything).Return(&milvuspb.SearchResults{Status: commonSuccessStatus, Results: &schemapb.SearchResultData{TopK: int64(0)}}, nil).Times(3) collSchema := generateCollectionSchema(schemapb.DataType_Int64) binaryVectorField := generateVectorFieldSchema(schemapb.DataType_BinaryVector) @@ -1283,6 +1353,10 @@ func TestSearchV2(t *testing.T) { path: SearchAction, requestBody: []byte(`{"collectionName": "book", "data": [[0.1, 0.2]], "filter": "book_id in [2, 4, 6, 8]", "limit": 4, "outputFields": ["word_count"]}`), }) + queryTestCases = append(queryTestCases, requestBodyTestCase{ + path: SearchAction, + requestBody: []byte(`{"collectionName": "book", "data": [[0.1, 0.2]], "filter": "book_id in [2, 4, 6, 8]", "limit": 4, "outputFields": ["word_count"]}`), + }) queryTestCases = append(queryTestCases, requestBodyTestCase{ path: SearchAction, requestBody: []byte(`{"collectionName": "book", "data": [[0.1, 0.2]], "filter": "book_id in [2, 4, 6, 8]", "limit": 4, "outputFields": ["word_count"], "params": {"radius":0.9}}`), @@ -1313,6 +1387,10 @@ func TestSearchV2(t *testing.T) { path: AdvancedSearchAction, requestBody: []byte(`{"collectionName": "hello_milvus", "search": [{"data": [[0.1, 0.2]], "annsField": "book_intro", "metricType": "L2", "limit": 3}, {"data": [[0.1, 0.2]], "annsField": "book_intro", "metricType": "L2", "limit": 3}], "rerank": {"strategy": "weighted", "params": {"weights": [0.9, 0.8]}}}`), }) + queryTestCases = append(queryTestCases, requestBodyTestCase{ + path: AdvancedSearchAction, + requestBody: []byte(`{"collectionName": "hello_milvus", "search": [{"data": [[0.1, 0.2]], "annsField": "book_intro", "metricType": "L2", "limit": 3}, {"data": [[0.1, 0.2]], "annsField": "book_intro", "metricType": "L2", "limit": 3}], "rerank": {"strategy": "weighted", "params": {"weights": [0.9, 0.8]}}}`), + }) queryTestCases = append(queryTestCases, requestBodyTestCase{ path: HybridSearchAction, requestBody: []byte(`{"collectionName": "hello_milvus", "search": [{"data": [[0.1, 0.2]], "annsField": "book_intro", "metricType": "L2", "limit": 3}, {"data": [[0.1, 0.2]], "annsField": "book_intro", "metricType": "L2", "limit": 3}], "rerank": {"strategy": "weighted", "params": {"weights": [0.9, 0.8]}}}`), @@ -1410,7 +1488,7 @@ func TestSearchV2(t *testing.T) { }) for _, testcase := range queryTestCases { - t.Run("search", func(t *testing.T) { + t.Run(testcase.path, func(t *testing.T) { bodyReader := bytes.NewReader(testcase.requestBody) req := httptest.NewRequest(http.MethodPost, versionalV2(EntityCategory, testcase.path), bodyReader) w := httptest.NewRecorder() @@ -1419,11 +1497,9 @@ func TestSearchV2(t *testing.T) { returnBody := &ReturnErrMsg{} err := json.Unmarshal(w.Body.Bytes(), returnBody) assert.Nil(t, err) + assert.Equal(t, testcase.errCode, returnBody.Code) if testcase.errCode != 0 { - assert.Equal(t, testcase.errCode, returnBody.Code) assert.Equal(t, testcase.errMsg, returnBody.Message) - } else { - assert.Equal(t, int32(http.StatusOK), returnBody.Code) } fmt.Println(w.Body.String()) }) diff --git a/internal/distributed/proxy/httpserver/request_v2.go b/internal/distributed/proxy/httpserver/request_v2.go index 01ecf511600f0..f5fe86a69e4d3 100644 --- a/internal/distributed/proxy/httpserver/request_v2.go +++ b/internal/distributed/proxy/httpserver/request_v2.go @@ -1,7 +1,6 @@ package httpserver import ( - "net/http" "strconv" "github.com/gin-gonic/gin" @@ -357,14 +356,14 @@ func (req *AliasCollectionReq) GetAliasName() string { } func wrapperReturnHas(has bool) gin.H { - return gin.H{HTTPReturnCode: http.StatusOK, HTTPReturnData: gin.H{HTTPReturnHas: has}} + return gin.H{HTTPReturnCode: commonpb.ErrorCode_Success, HTTPReturnData: gin.H{HTTPReturnHas: has}} } func wrapperReturnList(names []string) gin.H { if names == nil { - return gin.H{HTTPReturnCode: http.StatusOK, HTTPReturnData: []string{}} + return gin.H{HTTPReturnCode: commonpb.ErrorCode_Success, HTTPReturnData: []string{}} } - return gin.H{HTTPReturnCode: http.StatusOK, HTTPReturnData: names} + return gin.H{HTTPReturnCode: commonpb.ErrorCode_Success, HTTPReturnData: names} } func wrapperReturnRowCount(pairs []*commonpb.KeyValuePair) gin.H { @@ -376,15 +375,15 @@ func wrapperReturnRowCount(pairs []*commonpb.KeyValuePair) gin.H { } rowCount, err := strconv.ParseInt(rowCountValue, 10, 64) if err != nil { - return gin.H{HTTPReturnCode: http.StatusOK, HTTPReturnData: gin.H{HTTPReturnRowCount: rowCountValue}} + return gin.H{HTTPReturnCode: commonpb.ErrorCode_Success, HTTPReturnData: gin.H{HTTPReturnRowCount: rowCountValue}} } - return gin.H{HTTPReturnCode: http.StatusOK, HTTPReturnData: gin.H{HTTPReturnRowCount: rowCount}} + return gin.H{HTTPReturnCode: commonpb.ErrorCode_Success, HTTPReturnData: gin.H{HTTPReturnRowCount: rowCount}} } func wrapperReturnDefault() gin.H { - return gin.H{HTTPReturnCode: http.StatusOK, HTTPReturnData: gin.H{}} + return gin.H{HTTPReturnCode: commonpb.ErrorCode_Success, HTTPReturnData: gin.H{}} } func wrapperReturnDefaultWithCost(cost int) gin.H { - return gin.H{HTTPReturnCode: http.StatusOK, HTTPReturnData: gin.H{}, HTTPReturnCost: cost} + return gin.H{HTTPReturnCode: commonpb.ErrorCode_Success, HTTPReturnData: gin.H{}, HTTPReturnCost: cost} } diff --git a/internal/distributed/proxy/httpserver/utils.go b/internal/distributed/proxy/httpserver/utils.go index a42c165cf52d0..3053b23449960 100644 --- a/internal/distributed/proxy/httpserver/utils.go +++ b/internal/distributed/proxy/httpserver/utils.go @@ -1084,7 +1084,7 @@ func buildQueryResp(rowsNum int64, needFields []string, fieldDataList []*schemap var queryResp []map[string]interface{} columnNum := len(fieldDataList) - if rowsNum == int64(0) { + if rowsNum == int64(0) { // always if columnNum > 0 { switch fieldDataList[0].Type { case schemapb.DataType_Bool: diff --git a/tests/restful_client_v2/api/milvus.py b/tests/restful_client_v2/api/milvus.py index 5d9d9f72e79ea..76807a4d36b37 100644 --- a/tests/restful_client_v2/api/milvus.py +++ b/tests/restful_client_v2/api/milvus.py @@ -15,7 +15,7 @@ def logger_request_response(response, url, tt, headers, data, str_data, str_resp data = data[:1000] + "..." + data[-1000:] try: if response.status_code == 200: - if ('code' in response.json() and response.json()["code"] == 200) or ( + if ('code' in response.json() and response.json()["code"] == 0) or ( 'Code' in response.json() and response.json()["Code"] == 0): logger.debug( f"\nmethod: {method}, \nurl: {url}, \ncost time: {tt}, \nheader: {headers}, \npayload: {str_data}, \nresponse: {str_response}") @@ -612,7 +612,7 @@ def role_create(self, payload): url = f'{self.endpoint}/v2/vectordb/roles/create' response = self.post(url, headers=self.update_headers(), data=payload) res = response.json() - if res["code"] == 200: + if res["code"] == 0: self.role_names.append(payload["roleName"]) return res diff --git a/tests/restful_client_v2/base/testbase.py b/tests/restful_client_v2/base/testbase.py index 7452058d61890..a47239ae96102 100644 --- a/tests/restful_client_v2/base/testbase.py +++ b/tests/restful_client_v2/base/testbase.py @@ -80,7 +80,7 @@ def init_collection(self, collection_name, pk_field="id", metric_type="L2", dim= "vectorField": "vector", } rsp = self.collection_client.collection_create(schema_payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 self.wait_collection_load_completed(collection_name) batch_size = batch_size batch = nb // batch_size @@ -97,7 +97,7 @@ def init_collection(self, collection_name, pk_field="id", metric_type="L2", dim= body_size = sys.getsizeof(json.dumps(payload)) logger.debug(f"body size: {body_size / 1024 / 1024} MB") rsp = self.vector_client.vector_insert(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 if return_insert_id: insert_ids.extend(rsp['data']['insertIds']) # insert remainder data @@ -109,7 +109,7 @@ def init_collection(self, collection_name, pk_field="id", metric_type="L2", dim= "data": data } rsp = self.vector_client.vector_insert(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 if return_insert_id: insert_ids.extend(rsp['data']['insertIds']) if return_insert_id: diff --git a/tests/restful_client_v2/testcases/test_alias_operation.py b/tests/restful_client_v2/testcases/test_alias_operation.py index 75b47ef498a8f..3919defa499f7 100644 --- a/tests/restful_client_v2/testcases/test_alias_operation.py +++ b/tests/restful_client_v2/testcases/test_alias_operation.py @@ -38,7 +38,7 @@ def test_alias_e2e(self): "aliasName": alias_name } rsp = self.alias_client.create_alias(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 # list alias after create rsp = self.alias_client.list_alias() assert alias_name in rsp['data'] diff --git a/tests/restful_client_v2/testcases/test_collection_operations.py b/tests/restful_client_v2/testcases/test_collection_operations.py index 8314bd40fe81c..a8f96808a89eb 100644 --- a/tests/restful_client_v2/testcases/test_collection_operations.py +++ b/tests/restful_client_v2/testcases/test_collection_operations.py @@ -33,14 +33,14 @@ def test_create_collections_quick_setup(self, dim): } logging.info(f"create collection {name} with payload: {payload}") rsp = client.collection_create(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 rsp = client.collection_list() all_collections = rsp['data'] assert name in all_collections # describe collection rsp = client.collection_describe(name) - assert rsp['code'] == 200 + assert rsp['code'] == 0 assert rsp['data']['collectionName'] == name assert rsp['data']['autoId'] is False assert rsp['data']['enableDynamicField'] is True @@ -68,10 +68,10 @@ def test_create_collection_quick_setup_with_custom(self, vector_field, primary_f if id_type == "VarChar": collection_payload["params"] = {"max_length": "256"} rsp = self.collection_client.collection_create(collection_payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 rsp = self.collection_client.collection_describe(name) logger.info(f"rsp: {rsp}") - assert rsp['code'] == 200 + assert rsp['code'] == 0 assert rsp['data']['collectionName'] == name fields = [f["name"] for f in rsp['data']['fields']] assert primary_field in fields @@ -113,7 +113,7 @@ def test_create_collections_without_params(self, enable_dynamic_field, request_s logging.info(f"create collection {name} with payload: {payload}") rsp = client.collection_create(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 rsp = client.collection_list() all_collections = rsp['data'] @@ -131,7 +131,7 @@ def test_create_collections_without_params(self, enable_dynamic_field, request_s for d in rsp["data"]["properties"]: if d["key"] == "collection.ttl.seconds": ttl_seconds_actual = int(d["value"]) - assert rsp['code'] == 200 + assert rsp['code'] == 0 assert rsp['data']['enableDynamicField'] == False assert rsp['data']['collectionName'] == name assert rsp['data']['shardsNum'] == num_shards @@ -178,7 +178,7 @@ def test_create_collections_with_all_params(self): logging.info(f"create collection {name} with payload: {payload}") rsp = client.collection_create(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 rsp = client.collection_list() all_collections = rsp['data'] @@ -196,7 +196,7 @@ def test_create_collections_with_all_params(self): for d in rsp["data"]["properties"]: if d["key"] == "collection.ttl.seconds": ttl_seconds_actual = int(d["value"]) - assert rsp['code'] == 200 + assert rsp['code'] == 0 assert rsp['data']['collectionName'] == name assert rsp['data']['shardsNum'] == num_shards assert rsp['data']['partitionsNum'] == num_partitions @@ -235,7 +235,7 @@ def test_create_collections_custom_without_index(self, dim, auto_id, enable_dyna } logging.info(f"create collection {name} with payload: {payload}") rsp = client.collection_create(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 rsp = client.collection_list() all_collections = rsp['data'] @@ -244,7 +244,7 @@ def test_create_collections_custom_without_index(self, dim, auto_id, enable_dyna logger.info(f"schema: {c.schema}") # describe collection rsp = client.collection_describe(name) - assert rsp['code'] == 200 + assert rsp['code'] == 0 assert rsp['data']['collectionName'] == name assert rsp['data']['autoId'] == auto_id assert c.schema.auto_id == auto_id @@ -288,7 +288,7 @@ def test_create_collections_one_float_vector_with_index(self, dim, metric_type): } logging.info(f"create collection {name} with payload: {payload}") rsp = client.collection_create(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 rsp = client.collection_list() all_collections = rsp['data'] @@ -296,7 +296,7 @@ def test_create_collections_one_float_vector_with_index(self, dim, metric_type): # describe collection time.sleep(10) rsp = client.collection_describe(name) - assert rsp['code'] == 200 + assert rsp['code'] == 0 assert rsp['data']['collectionName'] == name # assert index created indexes = rsp['data']['indexes'] @@ -339,7 +339,7 @@ def test_create_collections_multi_float_vector_with_one_index(self, dim, metric_ # describe collection time.sleep(10) rsp = client.collection_describe(name) - assert rsp['code'] == 200 + assert rsp['code'] == 0 assert rsp['data']['collectionName'] == name # assert index created indexes = rsp['data']['indexes'] @@ -375,7 +375,7 @@ def test_create_collections_multi_float_vector_with_all_index(self, dim, metric_ } logging.info(f"create collection {name} with payload: {payload}") rsp = client.collection_create(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 rsp = client.collection_list() all_collections = rsp['data'] @@ -383,7 +383,7 @@ def test_create_collections_multi_float_vector_with_all_index(self, dim, metric_ # describe collection time.sleep(10) rsp = client.collection_describe(name) - assert rsp['code'] == 200 + assert rsp['code'] == 0 assert rsp['data']['collectionName'] == name # assert index created indexes = rsp['data']['indexes'] @@ -426,7 +426,7 @@ def test_create_collections_float16_vector_datatype(self, dim, auto_id, enable_d } logging.info(f"create collection {name} with payload: {payload}") rsp = client.collection_create(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 rsp = client.collection_list() all_collections = rsp['data'] @@ -435,7 +435,7 @@ def test_create_collections_float16_vector_datatype(self, dim, auto_id, enable_d logger.info(f"schema: {c.schema}") # describe collection rsp = client.collection_describe(name) - assert rsp['code'] == 200 + assert rsp['code'] == 0 assert rsp['data']['collectionName'] == name assert len(rsp['data']['fields']) == len(c.schema.fields) @@ -472,7 +472,7 @@ def test_create_collections_binary_vector_datatype(self, dim, auto_id, enable_dy } logging.info(f"create collection {name} with payload: {payload}") rsp = client.collection_create(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 rsp = client.collection_list() all_collections = rsp['data'] @@ -481,7 +481,7 @@ def test_create_collections_binary_vector_datatype(self, dim, auto_id, enable_dy logger.info(f"schema: {c.schema}") # describe collection rsp = client.collection_describe(name) - assert rsp['code'] == 200 + assert rsp['code'] == 0 assert rsp['data']['collectionName'] == name assert len(rsp['data']['fields']) == len(c.schema.fields) @@ -518,7 +518,7 @@ def create_collection(c_name, vector_dim, c_metric_type): time.sleep(10) success_cnt = 0 for rsp in concurrent_rsp: - if rsp["code"] == 200: + if rsp['code'] == 0: success_cnt += 1 logger.info(concurrent_rsp) assert success_cnt == 10 @@ -527,7 +527,7 @@ def create_collection(c_name, vector_dim, c_metric_type): assert name in all_collections # describe collection rsp = client.collection_describe(name) - assert rsp['code'] == 200 + assert rsp['code'] == 0 assert rsp['data']['collectionName'] == name def test_create_collections_concurrent_with_different_param(self): @@ -565,7 +565,7 @@ def create_collection(c_name, vector_dim, c_metric_type): time.sleep(10) success_cnt = 0 for rsp in concurrent_rsp: - if rsp["code"] == 200: + if rsp['code'] == 0: success_cnt += 1 logger.info(concurrent_rsp) assert success_cnt == 1 @@ -574,7 +574,7 @@ def create_collection(c_name, vector_dim, c_metric_type): assert name in all_collections # describe collection rsp = client.collection_describe(name) - assert rsp['code'] == 200 + assert rsp['code'] == 0 assert rsp['data']['collectionName'] == name @@ -659,7 +659,7 @@ def test_has_collections_default(self): } time.sleep(1) rsp = client.collection_create(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 name_list.append(name) rsp = client.collection_list() all_collections = rsp['data'] @@ -706,11 +706,11 @@ def test_get_collections_stats(self): } time.sleep(1) rsp = client.collection_create(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 # describe collection client.collection_describe(collection_name=name) rsp = client.collection_stats(collection_name=name) - assert rsp['code'] == 200 + assert rsp['code'] == 0 assert rsp['data']['rowCount'] == 0 # insert data nb = 3000 @@ -746,7 +746,7 @@ def test_load_and_release_collection(self): } } rsp = client.collection_create(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 # create index before load index_params = [{"fieldName": "book_intro", "indexName": "book_intro_vector", "metricType": "L2"}] payload = { @@ -762,7 +762,7 @@ def test_load_and_release_collection(self): # describe collection client.collection_describe(collection_name=name) rsp = client.collection_load(collection_name=name) - assert rsp['code'] == 200 + assert rsp['code'] == 0 rsp = client.collection_load_state(collection_name=name) assert rsp['data']['loadState'] in ["LoadStateLoaded", "LoadStateLoading"] time.sleep(5) @@ -793,11 +793,11 @@ def test_get_collection_load_state(self): "dimension": dim, } rsp = client.collection_create(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 # describe collection client.collection_describe(collection_name=name) rsp = client.collection_load_state(collection_name=name) - assert rsp['code'] == 200 + assert rsp['code'] == 0 assert rsp['data']['loadState'] in ["LoadStateNotLoad", "LoadStateLoading"] # insert data nb = 3000 @@ -835,7 +835,7 @@ def test_list_collections_default(self): } time.sleep(1) rsp = client.collection_create(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 name_list.append(name) rsp = client.collection_list() all_collections = rsp['data'] @@ -863,7 +863,7 @@ def test_list_collections_with_invalid_api_key(self): } time.sleep(1) rsp = client.collection_create(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 name_list.append(name) client = self.collection_client client.api_key = "illegal_api_key" @@ -889,13 +889,13 @@ def test_describe_collections_default(self): "metricType": "L2" } rsp = client.collection_create(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 rsp = client.collection_list() all_collections = rsp['data'] assert name in all_collections # describe collection rsp = client.collection_describe(name) - assert rsp['code'] == 200 + assert rsp['code'] == 0 assert rsp['data']['collectionName'] == name assert rsp['data']['autoId'] is False assert rsp['data']['enableDynamicField'] is True @@ -936,7 +936,7 @@ def test_describe_collections_custom(self): assert name in all_collections # describe collection rsp = client.collection_describe(name) - assert rsp['code'] == 200 + assert rsp['code'] == 0 assert rsp['data']['collectionName'] == name for field in rsp['data']['fields']: @@ -964,7 +964,7 @@ def test_describe_collections_with_invalid_api_key(self): "dimension": dim, } rsp = client.collection_create(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 rsp = client.collection_list() all_collections = rsp['data'] assert name in all_collections @@ -987,7 +987,7 @@ def test_describe_collections_with_invalid_collection_name(self): "dimension": dim, } rsp = client.collection_create(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 rsp = client.collection_list() all_collections = rsp['data'] assert name in all_collections @@ -1017,7 +1017,7 @@ def test_drop_collections_default(self): "metricType": "L2" } rsp = self.collection_client.collection_create(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 clo_list.append(name) rsp = self.collection_client.collection_list() all_collections = rsp['data'] @@ -1029,7 +1029,7 @@ def test_drop_collections_default(self): "collectionName": name, } rsp = self.collection_client.collection_drop(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 rsp = self.collection_client.collection_list() all_collections = rsp['data'] for name in clo_list: @@ -1052,7 +1052,7 @@ def test_drop_collections_with_invalid_api_key(self): "dimension": dim, } rsp = client.collection_create(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 rsp = client.collection_list() all_collections = rsp['data'] assert name in all_collections @@ -1081,7 +1081,7 @@ def test_drop_collections_with_invalid_collection_name(self): "dimension": dim, } rsp = client.collection_create(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 rsp = client.collection_list() all_collections = rsp['data'] assert name in all_collections @@ -1091,7 +1091,7 @@ def test_drop_collections_with_invalid_collection_name(self): "collectionName": invalid_name, } rsp = client.collection_drop(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 @pytest.mark.L0 @@ -1112,7 +1112,7 @@ def test_rename_collection(self): "dimension": dim, } rsp = client.collection_create(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 rsp = client.collection_list() all_collections = rsp['data'] assert name in all_collections @@ -1122,7 +1122,7 @@ def test_rename_collection(self): "newCollectionName": new_name, } rsp = client.collection_rename(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 rsp = client.collection_list() all_collections = rsp['data'] assert new_name in all_collections diff --git a/tests/restful_client_v2/testcases/test_index_operation.py b/tests/restful_client_v2/testcases/test_index_operation.py index fbccc84250efb..534684c9bfbdf 100644 --- a/tests/restful_client_v2/testcases/test_index_operation.py +++ b/tests/restful_client_v2/testcases/test_index_operation.py @@ -76,13 +76,13 @@ def test_index_e2e(self, dim, metric_type, index_type): if index_type == "AUTOINDEX": payload["indexParams"][0]["params"] = {"index_type": "AUTOINDEX"} rsp = self.index_client.index_create(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 time.sleep(10) # list index, expect not empty rsp = self.index_client.index_list(collection_name=name) # describe index rsp = self.index_client.index_describe(collection_name=name, index_name="book_intro_vector") - assert rsp['code'] == 200 + assert rsp['code'] == 0 assert len(rsp['data']) == len(payload['indexParams']) expected_index = sorted(payload['indexParams'], key=lambda x: x['fieldName']) actual_index = sorted(rsp['data'], key=lambda x: x['fieldName']) @@ -99,7 +99,7 @@ def test_index_e2e(self, dim, metric_type, index_type): "indexName": actual_index[i]['indexName'] } rsp = self.index_client.index_drop(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 # list index, expect empty rsp = self.index_client.index_list(collection_name=name) assert rsp['data'] == [] @@ -156,13 +156,13 @@ def test_index_for_scalar_field(self, dim, index_type): "params": {"index_type": "INVERTED"}}] } rsp = self.index_client.index_create(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 time.sleep(10) # list index, expect not empty rsp = self.index_client.index_list(collection_name=name) # describe index rsp = self.index_client.index_describe(collection_name=name, index_name="word_count_vector") - assert rsp['code'] == 200 + assert rsp['code'] == 0 assert len(rsp['data']) == len(payload['indexParams']) expected_index = sorted(payload['indexParams'], key=lambda x: x['fieldName']) actual_index = sorted(rsp['data'], key=lambda x: x['fieldName']) @@ -226,13 +226,13 @@ def test_index_for_binary_vector_field(self, dim, metric_type, index_type): if index_type == "BIN_IVF_FLAT": payload["indexParams"][0]["params"]["nlist"] = "16384" rsp = self.index_client.index_create(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 time.sleep(10) # list index, expect not empty rsp = self.index_client.index_list(collection_name=name) # describe index rsp = self.index_client.index_describe(collection_name=name, index_name=index_name) - assert rsp['code'] == 200 + assert rsp['code'] == 0 assert len(rsp['data']) == len(payload['indexParams']) expected_index = sorted(payload['indexParams'], key=lambda x: x['fieldName']) actual_index = sorted(rsp['data'], key=lambda x: x['fieldName']) diff --git a/tests/restful_client_v2/testcases/test_jobs_operation.py b/tests/restful_client_v2/testcases/test_jobs_operation.py index 3e0afbd5ec774..c651463efaab1 100644 --- a/tests/restful_client_v2/testcases/test_jobs_operation.py +++ b/tests/restful_client_v2/testcases/test_jobs_operation.py @@ -114,7 +114,7 @@ def test_job_e2e(self, insert_num, import_task_num, auto_id, is_partition_key, e "outputFields": ["*"], } rsp = self.vector_client.vector_query(payload) - assert rsp["code"] == 200 + assert rsp['code'] == 0 @pytest.mark.parametrize("insert_num", [5000]) @pytest.mark.parametrize("import_task_num", [1]) @@ -205,7 +205,7 @@ def test_import_job_with_db(self, insert_num, import_task_num, auto_id, is_parti "outputFields": ["*"], } rsp = self.vector_client.vector_query(payload) - assert rsp["code"] == 200 + assert rsp['code'] == 0 @pytest.mark.parametrize("insert_num", [5000]) @pytest.mark.parametrize("import_task_num", [1]) @@ -306,7 +306,7 @@ def test_import_job_with_partition(self, insert_num, import_task_num, auto_id, i "outputFields": ["*"], } rsp = self.vector_client.vector_query(payload) - assert rsp["code"] == 200 + assert rsp['code'] == 0 def test_job_import_multi_json_file(self): # create collection @@ -720,15 +720,15 @@ def test_job_import_binlog_file_type(self, nb, dim, insert_round, auto_id, ] } rsp = self.collection_client.collection_create(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 # create restore collection restore_collection_name = f"{name}_restore" payload["collectionName"] = restore_collection_name rsp = self.collection_client.collection_create(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 rsp = self.collection_client.collection_describe(name) logger.info(f"rsp: {rsp}") - assert rsp['code'] == 200 + assert rsp['code'] == 0 # insert data for i in range(insert_round): data = [] @@ -772,7 +772,7 @@ def test_job_import_binlog_file_type(self, nb, dim, insert_round, auto_id, "data": data, } rsp = self.vector_client.vector_insert(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 assert rsp['data']['insertCount'] == nb # flush data to generate binlog file c = Collection(name) @@ -781,7 +781,7 @@ def test_job_import_binlog_file_type(self, nb, dim, insert_round, auto_id, # query data to make sure the data is inserted rsp = self.vector_client.vector_query({"collectionName": name, "filter": "user_id > 0", "limit": 50}) - assert rsp['code'] == 200 + assert rsp['code'] == 0 assert len(rsp['data']) == 50 # get collection id c = Collection(name) @@ -802,7 +802,7 @@ def test_job_import_binlog_file_type(self, nb, dim, insert_round, auto_id, if is_partition_key: payload["partitionName"] = "_default_0" rsp = self.import_job_client.create_import_jobs(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 # list import job payload = { "collectionName": restore_collection_name, @@ -1528,7 +1528,7 @@ def test_get_job_progress_with_mismatch_db_name(self, insert_num, import_task_nu "outputFields": ["*"], } rsp = self.vector_client.vector_query(payload) - assert rsp["code"] == 200 + assert rsp['code'] == 0 @pytest.mark.L1 diff --git a/tests/restful_client_v2/testcases/test_partition_operation.py b/tests/restful_client_v2/testcases/test_partition_operation.py index 13022895541d1..44717b5686c3b 100644 --- a/tests/restful_client_v2/testcases/test_partition_operation.py +++ b/tests/restful_client_v2/testcases/test_partition_operation.py @@ -36,13 +36,13 @@ def test_partition_e2e(self): {"fieldName": "book_intro", "indexName": "book_intro_vector", "metricType": f"{metric_type}"}] } rsp = self.collection_client.collection_create(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 rsp = client.collection_list() all_collections = rsp['data'] assert name in all_collections # describe collection rsp = client.collection_describe(name) - assert rsp['code'] == 200 + assert rsp['code'] == 0 assert rsp['data']['collectionName'] == name # insert data to default partition data = [] @@ -59,11 +59,11 @@ def test_partition_e2e(self): "data": data, } rsp = self.vector_client.vector_insert(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 # create partition partition_name = "test_partition" rsp = self.partition_client.partition_create(collection_name=name, partition_name=partition_name) - assert rsp['code'] == 200 + assert rsp['code'] == 0 # insert data to partition data = [] for j in range(3000, 6000): @@ -80,45 +80,45 @@ def test_partition_e2e(self): "data": data, } rsp = self.vector_client.vector_insert(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 # create partition again rsp = self.partition_client.partition_create(collection_name=name, partition_name=partition_name) # list partitions rsp = self.partition_client.partition_list(collection_name=name) - assert rsp['code'] == 200 + assert rsp['code'] == 0 assert partition_name in rsp['data'] # has partition rsp = self.partition_client.partition_has(collection_name=name, partition_name=partition_name) - assert rsp['code'] == 200 + assert rsp['code'] == 0 assert rsp['data']["has"] is True # flush and get partition statistics c = Collection(name=name) c.flush() rsp = self.partition_client.partition_stats(collection_name=name, partition_name=partition_name) - assert rsp['code'] == 200 + assert rsp['code'] == 0 assert rsp['data']['rowCount'] == 3000 # release partition rsp = self.partition_client.partition_release(collection_name=name, partition_names=[partition_name]) - assert rsp['code'] == 200 + assert rsp['code'] == 0 # release partition again rsp = self.partition_client.partition_release(collection_name=name, partition_names=[partition_name]) - assert rsp['code'] == 200 + assert rsp['code'] == 0 # load partition rsp = self.partition_client.partition_load(collection_name=name, partition_names=[partition_name]) - assert rsp['code'] == 200 + assert rsp['code'] == 0 # load partition again rsp = self.partition_client.partition_load(collection_name=name, partition_names=[partition_name]) - assert rsp['code'] == 200 + assert rsp['code'] == 0 # drop partition when it is loaded rsp = self.partition_client.partition_drop(collection_name=name, partition_name=partition_name) assert rsp['code'] == 65535 # drop partition after release rsp = self.partition_client.partition_release(collection_name=name, partition_names=[partition_name]) rsp = self.partition_client.partition_drop(collection_name=name, partition_name=partition_name) - assert rsp['code'] == 200 + assert rsp['code'] == 0 # has partition rsp = self.partition_client.partition_has(collection_name=name, partition_name=partition_name) - assert rsp['code'] == 200 + assert rsp['code'] == 0 assert rsp['data']["has"] is False diff --git a/tests/restful_client_v2/testcases/test_restful_sdk_mix_use_scenario.py b/tests/restful_client_v2/testcases/test_restful_sdk_mix_use_scenario.py index 97a862248a551..ab7e5a28b7bab 100644 --- a/tests/restful_client_v2/testcases/test_restful_sdk_mix_use_scenario.py +++ b/tests/restful_client_v2/testcases/test_restful_sdk_mix_use_scenario.py @@ -37,7 +37,7 @@ def test_collection_created_by_sdk_describe_by_restful(self, dim, enable_dynamic all_collections = rsp['data'] assert name in all_collections rsp = client.collection_describe(name) - assert rsp['code'] == 200 + assert rsp['code'] == 0 assert rsp['data']['collectionName'] == name assert rsp['data']['enableDynamicField'] == enable_dynamic assert rsp['data']['load'] == "LoadStateNotLoad" @@ -57,7 +57,7 @@ def test_collection_created_by_restful_describe_by_sdk(self, dim, metric_type): "metricType": metric_type, } rsp = client.collection_create(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 collection = Collection(name=name) logger.info(collection.schema) field_names = [field.name for field in collection.schema.fields] @@ -89,7 +89,7 @@ def test_collection_created_index_by_sdk_describe_by_restful(self, metric_type): all_collections = rsp['data'] assert name in all_collections rsp = client.collection_describe(name) - assert rsp['code'] == 200 + assert rsp['code'] == 0 assert rsp['data']['collectionName'] == name assert len(rsp['data']['indexes']) == 1 and rsp['data']['indexes'][0]['metricType'] == metric_type @@ -160,7 +160,7 @@ def test_collection_create_by_sdk_insert_vector_by_restful(self): "data": data, } rsp = client.vector_insert(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 assert rsp['data']['insertCount'] == nb assert len(rsp['data']["insertIds"]) == nb @@ -196,7 +196,7 @@ def test_collection_create_by_sdk_search_vector_by_restful(self): } # search data by restful rsp = client.vector_search(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 assert len(rsp['data']) == 10 def test_collection_create_by_sdk_query_vector_by_restful(self): @@ -230,7 +230,7 @@ def test_collection_create_by_sdk_query_vector_by_restful(self): } # query data by restful rsp = client.vector_query(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 assert len(rsp['data']) == 10 def test_collection_create_by_restful_search_vector_by_sdk(self): diff --git a/tests/restful_client_v2/testcases/test_role_operation.py b/tests/restful_client_v2/testcases/test_role_operation.py index 63b4acae2da42..9ad8049a65ce7 100644 --- a/tests/restful_client_v2/testcases/test_role_operation.py +++ b/tests/restful_client_v2/testcases/test_role_operation.py @@ -43,7 +43,7 @@ def test_role_e2e(self): assert role_name in rsp['data'] # describe role rsp = self.role_client.role_describe(role_name) - assert rsp['code'] == 200 + assert rsp['code'] == 0 # grant privilege to role payload = { "roleName": role_name, @@ -52,7 +52,7 @@ def test_role_e2e(self): "privilege": "CreateCollection" } rsp = self.role_client.role_grant(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 # describe role after grant rsp = self.role_client.role_describe(role_name) privileges = [] diff --git a/tests/restful_client_v2/testcases/test_user_operation.py b/tests/restful_client_v2/testcases/test_user_operation.py index 4491c2aec7db3..b3cc0e5b76ca0 100644 --- a/tests/restful_client_v2/testcases/test_user_operation.py +++ b/tests/restful_client_v2/testcases/test_user_operation.py @@ -56,7 +56,7 @@ def test_user_e2e(self): "newPassword": new_password } rsp = self.user_client.user_password_update(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 # drop user payload = { "userName": user_name @@ -124,7 +124,7 @@ def test_user_binding_role(self): } self.collection_client.api_key = f"{user_name}:{password}" rsp = self.collection_client.collection_create(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 @pytest.mark.L1 @@ -158,7 +158,7 @@ def test_create_user_twice(self): for i in range(2): rsp = self.user_client.user_create(payload) if i == 0: - assert rsp['code'] == 200 + assert rsp['code'] == 0 else: assert rsp['code'] == 65535 assert "user already exists" in rsp['message'] diff --git a/tests/restful_client_v2/testcases/test_vector_operations.py b/tests/restful_client_v2/testcases/test_vector_operations.py index 5878946ff65f5..7d497718ba24f 100644 --- a/tests/restful_client_v2/testcases/test_vector_operations.py +++ b/tests/restful_client_v2/testcases/test_vector_operations.py @@ -33,10 +33,10 @@ def test_insert_entities_with_simple_payload(self, nb, dim, insert_round): "metricType": "L2" } rsp = self.collection_client.collection_create(collection_payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 rsp = self.collection_client.collection_describe(name) logger.info(f"rsp: {rsp}") - assert rsp['code'] == 200 + assert rsp['code'] == 0 # insert data for i in range(insert_round): data = get_data_by_payload(collection_payload, nb) @@ -47,7 +47,7 @@ def test_insert_entities_with_simple_payload(self, nb, dim, insert_round): body_size = sys.getsizeof(json.dumps(payload)) logger.info(f"body size: {body_size / 1024 / 1024} MB") rsp = self.vector_client.vector_insert(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 assert rsp['data']['insertCount'] == nb @pytest.mark.parametrize("insert_round", [1]) @@ -92,10 +92,10 @@ def test_insert_entities_with_all_scalar_datatype(self, nb, dim, insert_round, a ] } rsp = self.collection_client.collection_create(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 rsp = self.collection_client.collection_describe(name) logger.info(f"rsp: {rsp}") - assert rsp['code'] == 200 + assert rsp['code'] == 0 # insert data for i in range(insert_round): data = [] @@ -139,11 +139,11 @@ def test_insert_entities_with_all_scalar_datatype(self, nb, dim, insert_round, a "data": data, } rsp = self.vector_client.vector_insert(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 assert rsp['data']['insertCount'] == nb # query data to make sure the data is inserted rsp = self.vector_client.vector_query({"collectionName": name, "filter": "user_id > 0", "limit": 50}) - assert rsp['code'] == 200 + assert rsp['code'] == 0 assert len(rsp['data']) == 50 @pytest.mark.parametrize("insert_round", [1]) @@ -187,10 +187,10 @@ def test_insert_entities_with_all_vector_datatype(self, nb, dim, insert_round, a ] } rsp = self.collection_client.collection_create(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 rsp = self.collection_client.collection_describe(name) logger.info(f"rsp: {rsp}") - assert rsp['code'] == 200 + assert rsp['code'] == 0 # insert data for i in range(insert_round): data = [] @@ -224,7 +224,7 @@ def test_insert_entities_with_all_vector_datatype(self, nb, dim, insert_round, a "data": data, } rsp = self.vector_client.vector_insert(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 assert rsp['data']['insertCount'] == nb c = Collection(name) res = c.query( @@ -235,7 +235,7 @@ def test_insert_entities_with_all_vector_datatype(self, nb, dim, insert_round, a logger.info(f"res: {res}") # query data to make sure the data is inserted rsp = self.vector_client.vector_query({"collectionName": name, "filter": "user_id > 0", "limit": 50}) - assert rsp['code'] == 200 + assert rsp['code'] == 0 assert len(rsp['data']) == 50 @pytest.mark.parametrize("insert_round", [1]) @@ -280,10 +280,10 @@ def test_insert_entities_with_all_json_datatype(self, nb, dim, insert_round, aut ] } rsp = self.collection_client.collection_create(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 rsp = self.collection_client.collection_describe(name) logger.info(f"rsp: {rsp}") - assert rsp['code'] == 200 + assert rsp['code'] == 0 json_value = [ 1, 1.0, @@ -336,11 +336,11 @@ def test_insert_entities_with_all_json_datatype(self, nb, dim, insert_round, aut "data": data, } rsp = self.vector_client.vector_insert(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 assert rsp['data']['insertCount'] == nb # query data to make sure the data is inserted rsp = self.vector_client.vector_query({"collectionName": name, "filter": "user_id > 0", "limit": 50}) - assert rsp['code'] == 200 + assert rsp['code'] == 0 assert len(rsp['data']) == 50 @@ -359,9 +359,9 @@ def test_insert_vector_with_invalid_api_key(self): "dimension": dim, } rsp = self.collection_client.collection_create(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 rsp = self.collection_client.collection_describe(name) - assert rsp['code'] == 200 + assert rsp['code'] == 0 # insert data nb = 10 data = [ @@ -393,9 +393,9 @@ def test_insert_vector_with_invalid_collection_name(self): "dimension": dim, } rsp = self.collection_client.collection_create(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 rsp = self.collection_client.collection_describe(name) - assert rsp['code'] == 200 + assert rsp['code'] == 0 # insert data nb = 100 data = get_data_by_payload(payload, nb) @@ -421,9 +421,9 @@ def test_insert_vector_with_invalid_database_name(self): "dimension": dim, } rsp = self.collection_client.collection_create(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 rsp = self.collection_client.collection_describe(name) - assert rsp['code'] == 200 + assert rsp['code'] == 0 # insert data nb = 10 data = get_data_by_payload(payload, nb) @@ -449,9 +449,9 @@ def test_insert_vector_with_mismatch_dim(self): "dimension": dim, } rsp = self.collection_client.collection_create(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 rsp = self.collection_client.collection_describe(name) - assert rsp['code'] == 200 + assert rsp['code'] == 0 # insert data nb = 1 data = [ @@ -493,10 +493,10 @@ def test_upsert_vector_default(self, nb, dim, insert_round, id_type): "indexParams": [{"fieldName": "text_emb", "indexName": "text_emb_index", "metricType": "L2"}] } rsp = self.collection_client.collection_create(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 rsp = self.collection_client.collection_describe(name) logger.info(f"rsp: {rsp}") - assert rsp['code'] == 200 + assert rsp['code'] == 0 # insert data for i in range(insert_round): data = [] @@ -516,7 +516,7 @@ def test_upsert_vector_default(self, nb, dim, insert_round, id_type): body_size = sys.getsizeof(json.dumps(payload)) logger.info(f"body size: {body_size / 1024 / 1024} MB") rsp = self.vector_client.vector_insert(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 assert rsp['data']['insertCount'] == nb c = Collection(name) c.flush() @@ -575,10 +575,10 @@ def test_upsert_vector_pk_auto_id(self, nb, dim, insert_round, id_type): "indexParams": [{"fieldName": "text_emb", "indexName": "text_emb_index", "metricType": "L2"}] } rsp = self.collection_client.collection_create(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 rsp = self.collection_client.collection_describe(name) logger.info(f"rsp: {rsp}") - assert rsp['code'] == 200 + assert rsp['code'] == 0 ids = [] # insert data for i in range(insert_round): @@ -599,7 +599,7 @@ def test_upsert_vector_pk_auto_id(self, nb, dim, insert_round, id_type): body_size = sys.getsizeof(json.dumps(payload)) logger.info(f"body size: {body_size / 1024 / 1024} MB") rsp = self.vector_client.vector_insert(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 assert rsp['data']['insertCount'] == nb ids.extend(rsp['data']['insertIds']) c = Collection(name) @@ -682,10 +682,10 @@ def test_search_vector_with_all_vector_datatype(self, nb, dim, insert_round, aut ] } rsp = self.collection_client.collection_create(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 rsp = self.collection_client.collection_describe(name) logger.info(f"rsp: {rsp}") - assert rsp['code'] == 200 + assert rsp['code'] == 0 # insert data for i in range(insert_round): data = [] @@ -719,7 +719,7 @@ def test_search_vector_with_all_vector_datatype(self, nb, dim, insert_round, aut "data": data, } rsp = self.vector_client.vector_insert(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 assert rsp['data']['insertCount'] == nb # search data payload = { @@ -739,7 +739,7 @@ def test_search_vector_with_all_vector_datatype(self, nb, dim, insert_round, aut "limit": 100, } rsp = self.vector_client.vector_search(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 # assert no dup user_id user_ids = [r["user_id"]for r in rsp['data']] assert len(user_ids) == len(set(user_ids)) @@ -776,10 +776,10 @@ def test_search_vector_with_float_vector_datatype(self, nb, dim, insert_round, a ] } rsp = self.collection_client.collection_create(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 rsp = self.collection_client.collection_describe(name) logger.info(f"rsp: {rsp}") - assert rsp['code'] == 200 + assert rsp['code'] == 0 # insert data for i in range(insert_round): data = [] @@ -807,7 +807,7 @@ def test_search_vector_with_float_vector_datatype(self, nb, dim, insert_round, a "data": data, } rsp = self.vector_client.vector_insert(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 assert rsp['data']['insertCount'] == nb # search data payload = { @@ -826,7 +826,7 @@ def test_search_vector_with_float_vector_datatype(self, nb, dim, insert_round, a "limit": 100, } rsp = self.vector_client.vector_search(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 assert len(rsp['data']) == 100 @@ -864,10 +864,10 @@ def test_search_vector_with_sparse_float_vector_datatype(self, nb, dim, insert_r ] } rsp = self.collection_client.collection_create(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 rsp = self.collection_client.collection_describe(name) logger.info(f"rsp: {rsp}") - assert rsp['code'] == 200 + assert rsp['code'] == 0 # insert data for i in range(insert_round): data = [] @@ -896,7 +896,7 @@ def test_search_vector_with_sparse_float_vector_datatype(self, nb, dim, insert_r "data": data, } rsp = self.vector_client.vector_insert(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 assert rsp['data']['insertCount'] == nb # search data payload = { @@ -915,7 +915,7 @@ def test_search_vector_with_sparse_float_vector_datatype(self, nb, dim, insert_r if groupingField: payload["groupingField"] = groupingField rsp = self.vector_client.vector_search(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 @@ -952,10 +952,10 @@ def test_search_vector_with_binary_vector_datatype(self, nb, dim, insert_round, ] } rsp = self.collection_client.collection_create(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 rsp = self.collection_client.collection_describe(name) logger.info(f"rsp: {rsp}") - assert rsp['code'] == 200 + assert rsp['code'] == 0 # insert data for i in range(insert_round): data = [] @@ -983,7 +983,7 @@ def test_search_vector_with_binary_vector_datatype(self, nb, dim, insert_round, "data": data, } rsp = self.vector_client.vector_insert(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 assert rsp['data']['insertCount'] == nb # flush data c = Collection(name) @@ -1008,7 +1008,7 @@ def test_search_vector_with_binary_vector_datatype(self, nb, dim, insert_round, "limit": 100, } rsp = self.vector_client.vector_search(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 assert len(rsp['data']) == 100 @pytest.mark.parametrize("metric_type", ["IP", "L2", "COSINE"]) @@ -1028,7 +1028,7 @@ def test_search_vector_with_simple_payload(self, metric_type): "data": [vector_to_search], } rsp = self.vector_client.vector_search(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 res = rsp['data'] logger.info(f"res: {len(res)}") limit = int(payload.get("limit", 100)) @@ -1068,7 +1068,7 @@ def test_search_vector_with_exceed_sum_limit_offset(self, sum_limit_offset): if sum_limit_offset > max_search_sum_limit_offset: assert rsp['code'] == 65535 return - assert rsp['code'] == 200 + assert rsp['code'] == 0 res = rsp['data'] logger.info(f"res: {len(res)}") limit = int(payload.get("limit", 100)) @@ -1109,7 +1109,7 @@ def test_search_vector_with_complex_payload(self, limit, offset, metric_type): if offset + limit > constant.MAX_SUM_OFFSET_AND_LIMIT: assert rsp['code'] == 90126 return - assert rsp['code'] == 200 + assert rsp['code'] == 0 res = rsp['data'] logger.info(f"res: {len(res)}") assert len(res) == limit @@ -1142,7 +1142,7 @@ def test_search_vector_with_complex_int_filter(self, filter_expr): "offset": 0, } rsp = self.vector_client.vector_search(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 res = rsp['data'] logger.info(f"res: {len(res)}") assert len(res) <= limit @@ -1183,7 +1183,7 @@ def test_search_vector_with_complex_varchar_filter(self, filter_expr): "offset": 0, } rsp = self.vector_client.vector_search(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 res = rsp['data'] logger.info(f"res: {len(res)}") assert len(res) <= limit @@ -1230,7 +1230,7 @@ def test_search_vector_with_complex_int64_varchar_and_filter(self, filter_expr): "offset": 0, } rsp = self.vector_client.vector_search(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 res = rsp['data'] logger.info(f"res: {len(res)}") assert len(res) <= limit @@ -1355,10 +1355,10 @@ def test_advanced_search_vector_with_multi_float32_vector_datatype(self, nb, dim ] } rsp = self.collection_client.collection_create(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 rsp = self.collection_client.collection_describe(name) logger.info(f"rsp: {rsp}") - assert rsp['code'] == 200 + assert rsp['code'] == 0 # insert data for i in range(insert_round): data = [] @@ -1389,7 +1389,7 @@ def test_advanced_search_vector_with_multi_float32_vector_datatype(self, nb, dim "data": data, } rsp = self.vector_client.vector_insert(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 assert rsp['data']['insertCount'] == nb # advanced search data @@ -1420,7 +1420,7 @@ def test_advanced_search_vector_with_multi_float32_vector_datatype(self, nb, dim } rsp = self.vector_client.vector_advanced_search(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 assert len(rsp['data']) == 10 @@ -1463,10 +1463,10 @@ def test_hybrid_search_vector_with_multi_float32_vector_datatype(self, nb, dim, ] } rsp = self.collection_client.collection_create(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 rsp = self.collection_client.collection_describe(name) logger.info(f"rsp: {rsp}") - assert rsp['code'] == 200 + assert rsp['code'] == 0 # insert data for i in range(insert_round): data = [] @@ -1497,7 +1497,7 @@ def test_hybrid_search_vector_with_multi_float32_vector_datatype(self, nb, dim, "data": data, } rsp = self.vector_client.vector_insert(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 assert rsp['data']['insertCount'] == nb # advanced search data @@ -1528,7 +1528,7 @@ def test_hybrid_search_vector_with_multi_float32_vector_datatype(self, nb, dim, } rsp = self.vector_client.vector_hybrid_search(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 assert len(rsp['data']) == 10 @@ -1579,10 +1579,10 @@ def test_query_entities_with_all_scalar_datatype(self, nb, dim, insert_round, au ] } rsp = self.collection_client.collection_create(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 rsp = self.collection_client.collection_describe(name) logger.info(f"rsp: {rsp}") - assert rsp['code'] == 200 + assert rsp['code'] == 0 # insert data for i in range(insert_round): data = [] @@ -1626,7 +1626,7 @@ def test_query_entities_with_all_scalar_datatype(self, nb, dim, insert_round, au "data": data, } rsp = self.vector_client.vector_insert(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 assert rsp['data']['insertCount'] == nb # query data to make sure the data is inserted # 1. query for int64 @@ -1637,7 +1637,7 @@ def test_query_entities_with_all_scalar_datatype(self, nb, dim, insert_round, au "outputFields": ["*"] } rsp = self.vector_client.vector_query(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 assert len(rsp['data']) == 50 # 2. query for varchar @@ -1648,7 +1648,7 @@ def test_query_entities_with_all_scalar_datatype(self, nb, dim, insert_round, au "outputFields": ["*"] } rsp = self.vector_client.vector_query(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 assert len(rsp['data']) == 50 # 3. query for json @@ -1712,10 +1712,10 @@ def test_query_entities_with_all_vector_datatype(self, nb, dim, insert_round, au ] } rsp = self.collection_client.collection_create(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 rsp = self.collection_client.collection_describe(name) logger.info(f"rsp: {rsp}") - assert rsp['code'] == 200 + assert rsp['code'] == 0 # insert data for i in range(insert_round): data = [] @@ -1749,7 +1749,7 @@ def test_query_entities_with_all_vector_datatype(self, nb, dim, insert_round, au "data": data, } rsp = self.vector_client.vector_insert(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 assert rsp['data']['insertCount'] == nb c = Collection(name) res = c.query( @@ -1760,7 +1760,7 @@ def test_query_entities_with_all_vector_datatype(self, nb, dim, insert_round, au logger.info(f"res: {res}") # query data to make sure the data is inserted rsp = self.vector_client.vector_query({"collectionName": name, "filter": "user_id > 0", "limit": 50}) - assert rsp['code'] == 200 + assert rsp['code'] == 0 assert len(rsp['data']) == 50 @pytest.mark.parametrize("expr", ["10+20 <= uid < 20+30", "uid in [1,2,3,4]", @@ -1797,7 +1797,7 @@ def test_query_vector_with_int64_filter(self, expr, include_output_fields, parti output_fields.remove("vector") time.sleep(5) rsp = self.vector_client.vector_query(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 res = rsp['data'] logger.info(f"res: {len(res)}") for r in res: @@ -1821,7 +1821,7 @@ def test_query_vector_with_count(self): "outputFields": ["count(*)"] } rsp = self.vector_client.vector_query(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 assert rsp['data'][0]['count(*)'] == 3000 @pytest.mark.xfail(reason="query by id is not supported") @@ -1837,7 +1837,7 @@ def test_query_vector_by_id(self): "id": insert_ids, } rsp = self.vector_client.vector_query(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 @pytest.mark.parametrize("filter_expr", ["name > \"placeholder\"", "name like \"placeholder%\""]) @pytest.mark.parametrize("include_output_fields", [True, False]) @@ -1872,7 +1872,7 @@ def test_query_vector_with_varchar_filter(self, filter_expr, include_output_fiel if not include_output_fields: payload.pop("outputFields") rsp = self.vector_client.vector_query(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 res = rsp['data'] logger.info(f"res: {len(res)}") assert len(res) <= limit @@ -1920,7 +1920,7 @@ def test_query_vector_with_large_sum_of_limit_offset(self, sum_of_limit_offset): if sum_of_limit_offset > max_sum_of_limit_offset: assert rsp['code'] == 1 return - assert rsp['code'] == 200 + assert rsp['code'] == 0 res = rsp['data'] logger.info(f"res: {len(res)}") assert len(res) <= limit @@ -1975,7 +1975,7 @@ def test_get_vector_with_simple_payload(self): "data": [vector_to_search], } rsp = self.vector_client.vector_search(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 res = rsp['data'] logger.info(f"res: {len(res)}") limit = int(payload.get("limit", 100)) @@ -1988,7 +1988,7 @@ def test_get_vector_with_simple_payload(self): "id": ids[0], } rsp = self.vector_client.vector_get(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 res = rsp['data'] logger.info(f"res: {res}") logger.info(f"res: {len(res)}") @@ -2015,7 +2015,7 @@ def test_get_vector_complex(self, id_field_type, include_output_fields, include_ "filter": f"uid in {uids}", } rsp = self.vector_client.vector_query(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 res = rsp['data'] logger.info(f"res: {len(res)}") ids = [] @@ -2039,7 +2039,7 @@ def test_get_vector_complex(self, id_field_type, include_output_fields, include_ "id": id_to_get } rsp = self.vector_client.vector_get(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 res = rsp['data'] if isinstance(id_to_get, list): if include_invalid_id: @@ -2077,7 +2077,7 @@ def test_delete_vector_by_id(self): "id": insert_ids, } rsp = self.vector_client.vector_query(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 @pytest.mark.parametrize("id_field_type", ["list", "one"]) def test_delete_vector_by_pk_field_ids(self, id_field_type): @@ -2103,7 +2103,7 @@ def test_delete_vector_by_pk_field_ids(self, id_field_type): "filter": f"id == {id_to_delete}" } rsp = self.vector_client.vector_delete(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 # verify data deleted by get payload = { "collectionName": name, @@ -2130,7 +2130,7 @@ def test_delete_vector_by_filter_pk_field(self, id_field_type): "filter": f"uid in {uids}", } rsp = self.vector_client.vector_query(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 res = rsp['data'] logger.info(f"res: {len(res)}") ids = [] @@ -2158,7 +2158,7 @@ def test_delete_vector_by_filter_pk_field(self, id_field_type): } rsp = self.vector_client.vector_delete(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 logger.info(f"delete res: {rsp}") # verify data deleted @@ -2170,7 +2170,7 @@ def test_delete_vector_by_filter_pk_field(self, id_field_type): } time.sleep(5) rsp = self.vector_client.vector_query(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 assert len(rsp['data']) == 0 def test_delete_vector_by_custom_pk_field(self): @@ -2192,10 +2192,10 @@ def test_delete_vector_by_custom_pk_field(self): "indexParams": [{"fieldName": "text_emb", "indexName": "text_emb_index", "metricType": "L2"}] } rsp = self.collection_client.collection_create(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 rsp = self.collection_client.collection_describe(name) logger.info(f"rsp: {rsp}") - assert rsp['code'] == 200 + assert rsp['code'] == 0 pk_values = [] # insert data for i in range(insert_round): @@ -2217,7 +2217,7 @@ def test_delete_vector_by_custom_pk_field(self): body_size = sys.getsizeof(json.dumps(payload)) logger.info(f"body size: {body_size / 1024 / 1024} MB") rsp = self.vector_client.vector_insert(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 assert rsp['data']['insertCount'] == nb # query data before delete c = Collection(name) @@ -2255,10 +2255,10 @@ def test_delete_vector_by_filter_custom_field(self): "indexParams": [{"fieldName": "text_emb", "indexName": "text_emb_index", "metricType": "L2"}] } rsp = self.collection_client.collection_create(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 rsp = self.collection_client.collection_describe(name) logger.info(f"rsp: {rsp}") - assert rsp['code'] == 200 + assert rsp['code'] == 0 # insert data for i in range(insert_round): data = [] @@ -2277,7 +2277,7 @@ def test_delete_vector_by_filter_custom_field(self): body_size = sys.getsizeof(json.dumps(payload)) logger.info(f"body size: {body_size / 1024 / 1024} MB") rsp = self.vector_client.vector_insert(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 assert rsp['data']['insertCount'] == nb # query data before delete c = Collection(name) @@ -2313,7 +2313,7 @@ def test_delete_vector_with_non_primary_key(self): "outputFields": ["id", "uid"] } rsp = self.vector_client.vector_query(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 res = rsp['data'] logger.info(f"res: {len(res)}") id_list = [r['uid'] for r in res] @@ -2327,7 +2327,7 @@ def test_delete_vector_with_non_primary_key(self): "outputFields": ["id", "uid"] } rsp = self.vector_client.vector_query(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 res = rsp['data'] num_before_delete = len(res) logger.info(f"res: {len(res)}") @@ -2371,7 +2371,7 @@ def test_delete_vector_with_invalid_api_key(self): "filter": f"uid in {uids}", } rsp = self.vector_client.vector_query(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 res = rsp['data'] logger.info(f"res: {len(res)}") ids = [] @@ -2408,7 +2408,7 @@ def test_delete_vector_with_invalid_collection_name(self): "outputFields": ["id", "uid"] } rsp = self.vector_client.vector_query(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 res = rsp['data'] logger.info(f"res: {len(res)}") id_list = [r['id'] for r in res] @@ -2422,7 +2422,7 @@ def test_delete_vector_with_invalid_collection_name(self): "outputFields": ["id", "uid"] } rsp = self.vector_client.vector_query(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 res = rsp['data'] logger.info(f"res: {len(res)}") # delete data From af71116499982eddee1f0244678e97570b0814c6 Mon Sep 17 00:00:00 2001 From: "yihao.dai" Date: Mon, 27 May 2024 14:27:41 +0800 Subject: [PATCH 073/126] fix: Fix frequent 'failed to get request info' log (#33334) issue: https://github.com/milvus-io/milvus/issues/33333 --------- Signed-off-by: bigsheeper --- internal/proxy/rate_limit_interceptor.go | 4 ++-- internal/proxy/rate_limit_interceptor_test.go | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/internal/proxy/rate_limit_interceptor.go b/internal/proxy/rate_limit_interceptor.go index 49e3c11b803bb..01030fb8f190b 100644 --- a/internal/proxy/rate_limit_interceptor.go +++ b/internal/proxy/rate_limit_interceptor.go @@ -41,7 +41,7 @@ func RateLimitInterceptor(limiter types.Limiter) grpc.UnaryServerInterceptor { return func(ctx context.Context, req any, info *grpc.UnaryServerInfo, handler grpc.UnaryHandler) (interface{}, error) { dbID, collectionIDToPartIDs, rt, n, err := getRequestInfo(ctx, req) if err != nil { - log.RatedWarn(10, "failed to get request info", zap.Error(err)) + log.Warn("failed to get request info", zap.Error(err)) return handler(ctx, req) } @@ -205,7 +205,7 @@ func getRequestInfo(ctx context.Context, req interface{}) (int64, map[int64][]in if req == nil { return util.InvalidDBID, map[int64][]int64{}, 0, 0, fmt.Errorf("null request") } - return util.InvalidDBID, map[int64][]int64{}, 0, 0, fmt.Errorf("unsupported request type %T", req) + return util.InvalidDBID, map[int64][]int64{}, 0, 0, nil } } diff --git a/internal/proxy/rate_limit_interceptor_test.go b/internal/proxy/rate_limit_interceptor_test.go index 53db9ede78c99..3f9fbac8435a2 100644 --- a/internal/proxy/rate_limit_interceptor_test.go +++ b/internal/proxy/rate_limit_interceptor_test.go @@ -261,7 +261,7 @@ func TestRateLimitInterceptor(t *testing.T) { assert.Error(t, err) _, _, _, _, err = getRequestInfo(context.Background(), &milvuspb.CalcDistanceRequest{}) - assert.Error(t, err) + assert.NoError(t, err) }) t.Run("test getFailedResponse", func(t *testing.T) { From 5cf41613943f90cdad5eaffc99de63ca29c2fa86 Mon Sep 17 00:00:00 2001 From: "yihao.dai" Date: Mon, 27 May 2024 14:33:41 +0800 Subject: [PATCH 074/126] fix: Fix exception info is missing (#33393) Replace based std::exception to prevent "object slicing" issue: https://github.com/milvus-io/milvus/issues/33392 Signed-off-by: bigsheeper --- internal/core/src/common/Channel.h | 5 +++-- internal/core/src/common/Exception.h | 16 ++++++++++++++++ internal/core/src/segcore/Utils.cpp | 2 +- 3 files changed, 20 insertions(+), 3 deletions(-) diff --git a/internal/core/src/common/Channel.h b/internal/core/src/common/Channel.h index f042945432935..1dead8324791c 100644 --- a/internal/core/src/common/Channel.h +++ b/internal/core/src/common/Channel.h @@ -14,6 +14,7 @@ #include #include #include +#include "Exception.h" namespace milvus { template @@ -55,7 +56,7 @@ class Channel { } void - close(std::optional ex = std::nullopt) { + close(std::optional ex = std::nullopt) { if (ex.has_value()) { ex_ = std::move(ex); } @@ -64,6 +65,6 @@ class Channel { private: oneapi::tbb::concurrent_bounded_queue> inner_{}; - std::optional ex_{}; + std::optional ex_{}; }; } // namespace milvus diff --git a/internal/core/src/common/Exception.h b/internal/core/src/common/Exception.h index 68941ba56716c..d4f3863b9df25 100644 --- a/internal/core/src/common/Exception.h +++ b/internal/core/src/common/Exception.h @@ -20,6 +20,22 @@ namespace milvus { +class MilvusException : public std::exception { + public: + explicit MilvusException(const std::string& msg) + : std::exception(), exception_message_(msg) { + } + const char* + what() const noexcept { + return exception_message_.c_str(); + } + virtual ~MilvusException() { + } + + private: + std::string exception_message_; +}; + class NotImplementedException : public std::exception { public: explicit NotImplementedException(const std::string& msg) diff --git a/internal/core/src/segcore/Utils.cpp b/internal/core/src/segcore/Utils.cpp index cee4a04c92ac2..6349ad847ac9d 100644 --- a/internal/core/src/segcore/Utils.cpp +++ b/internal/core/src/segcore/Utils.cpp @@ -820,7 +820,7 @@ LoadFieldDatasFromRemote(const std::vector& remote_files, channel->close(); } catch (std::exception& e) { LOG_INFO("failed to load data from remote: {}", e.what()); - channel->close(std::move(e)); + channel->close(MilvusException(e.what())); } } From 9f9dff33f12b22a24c5fba03f41f2e23a28a46a8 Mon Sep 17 00:00:00 2001 From: Alexander Guzhva Date: Mon, 27 May 2024 02:52:53 -0400 Subject: [PATCH 075/126] fix: make milvus compilable using gcc-13 (#30149) add a missing header Signed-off-by: Alexandr Guzhva --- internal/core/src/config/ConfigKnowhere.h | 1 + 1 file changed, 1 insertion(+) diff --git a/internal/core/src/config/ConfigKnowhere.h b/internal/core/src/config/ConfigKnowhere.h index eff8be76f9c28..57a0713014d6b 100644 --- a/internal/core/src/config/ConfigKnowhere.h +++ b/internal/core/src/config/ConfigKnowhere.h @@ -15,6 +15,7 @@ // limitations under the License. #pragma once +#include #include namespace milvus::config { From 5b862264658befae894ac0e5894e961dd3c7a2e1 Mon Sep 17 00:00:00 2001 From: nico <109071306+NicoYuan1986@users.noreply.github.com> Date: Mon, 27 May 2024 14:59:42 +0800 Subject: [PATCH 076/126] test: update test cases about connection (#33257) Signed-off-by: nico --- .../python_client/testcases/test_connection.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/tests/python_client/testcases/test_connection.py b/tests/python_client/testcases/test_connection.py index 7e8a5c196ef87..74a46cad11fba 100644 --- a/tests/python_client/testcases/test_connection.py +++ b/tests/python_client/testcases/test_connection.py @@ -824,7 +824,7 @@ def test_close_repeatedly(self, host, port, connect_name): self.connection_wrap.disconnect(alias=connect_name) @pytest.mark.tags(ct.CaseLabel.L2) - @pytest.mark.parametrize("protocol", ["http", "ftp", "tcp"]) + @pytest.mark.parametrize("protocol", ["http", "tcp"]) @pytest.mark.parametrize("connect_name", [DefaultConfig.DEFAULT_USING]) def test_parameters_with_uri_connection(self, host, port, connect_name, protocol): """ @@ -836,6 +836,21 @@ def test_parameters_with_uri_connection(self, host, port, connect_name, protocol uri = "{}://{}:{}".format(protocol, host, port) self.connection_wrap.connect(alias=connect_name, uri=uri, check_task=ct.CheckTasks.ccr) + @pytest.mark.tags(ct.CaseLabel.L2) + @pytest.mark.parametrize("protocol", ["ftp"]) + @pytest.mark.parametrize("connect_name", [DefaultConfig.DEFAULT_USING]) + def test_parameters_with_invalid_uri_connection(self, host, port, connect_name, protocol): + """ + target: test the uri parameter to get a normal connection + method: get a connection with the uri parameter + expected: connected is True + """ + + uri = "{}://{}:{}".format(protocol, host, port) + self.connection_wrap.connect(alias=connect_name, uri=uri, check_task=ct.CheckTasks.err_res, + check_items={ct.err_code: 999, + ct.err_msg: "Open local milvus failed, dir: ftp: not exists"}) + @pytest.mark.tags(ct.CaseLabel.L2) @pytest.mark.parametrize("connect_name", [DefaultConfig.DEFAULT_USING]) def test_parameters_with_address_connection(self, host, port, connect_name): From 6275c750135621ea8a0894c60fdbebe9a49fa11c Mon Sep 17 00:00:00 2001 From: wei liu Date: Mon, 27 May 2024 15:01:42 +0800 Subject: [PATCH 077/126] fix: Watch channel task may stuck forever until qn become offline (#33394) issue: #32901 pr #32814 introduce the compatible issue, when upgrade to milvus latest, the query coord may skip update dist due to the lastModifyTs doesn't changes. but for old version querynode, the lastModifyTs in GetDataDistritbuionResponse is always 0, which makes qc skip update dist. then qc will keep retry the task to watch channel again and again. this PR add compatible with old version querynode, when lastModifyTs is 0, qc will update it's data distribution. Signed-off-by: Wei Liu --- internal/querycoordv2/dist/dist_handler.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/querycoordv2/dist/dist_handler.go b/internal/querycoordv2/dist/dist_handler.go index 1d396a415008d..b88f34177f87b 100644 --- a/internal/querycoordv2/dist/dist_handler.go +++ b/internal/querycoordv2/dist/dist_handler.go @@ -98,7 +98,7 @@ func (dh *distHandler) handleDistResp(resp *querypb.GetDataDistributionResponse) node.SetLastHeartbeat(time.Now()) // skip update dist if no distribution change happens in query node - if resp.GetLastModifyTs() <= dh.lastUpdateTs { + if resp.GetLastModifyTs() != 0 && resp.GetLastModifyTs() <= dh.lastUpdateTs { log.RatedInfo(30, "skip update dist due to no distribution change", zap.Int64("lastModifyTs", resp.GetLastModifyTs()), zap.Int64("lastUpdateTs", dh.lastUpdateTs)) } else { dh.lastUpdateTs = resp.GetLastModifyTs() From 066c8ea17591e775f7f734defff8e3514abafc39 Mon Sep 17 00:00:00 2001 From: Ted Xu Date: Mon, 27 May 2024 16:27:42 +0800 Subject: [PATCH 078/126] feat: stream reader/writer to support nulls (#33080) See: #31728 --------- Signed-off-by: Ted Xu --- internal/storage/serde.go | 130 ++++++++++++++++++++++++++++++--- internal/storage/serde_test.go | 100 +++++++++++++++++++++---- 2 files changed, 206 insertions(+), 24 deletions(-) diff --git a/internal/storage/serde.go b/internal/storage/serde.go index c75bf5aabc0b6..6ec7b38c92c9f 100644 --- a/internal/storage/serde.go +++ b/internal/storage/serde.go @@ -199,10 +199,16 @@ func (crr *compositeRecordReader) Close() { } type serdeEntry struct { - arrowType func(int) arrow.DataType + // arrowType returns the arrow type for the given dimension + arrowType func(int) arrow.DataType + // deserialize deserializes the i-th element in the array, returns the value and ok. + // null is deserialized to nil without checking the type nullability. deserialize func(arrow.Array, int) (any, bool) - serialize func(array.Builder, any) bool - sizeof func(any) uint64 + // serialize serializes the value to the builder, returns ok. + // nil is serialized to null without checking the type nullability. + serialize func(array.Builder, any) bool + // sizeof returns the size in bytes of the value + sizeof func(any) uint64 } var serdeMap = func() map[schemapb.DataType]serdeEntry { @@ -212,12 +218,19 @@ var serdeMap = func() map[schemapb.DataType]serdeEntry { return arrow.FixedWidthTypes.Boolean }, func(a arrow.Array, i int) (any, bool) { + if a.IsNull(i) { + return nil, true + } if arr, ok := a.(*array.Boolean); ok && i < arr.Len() { return arr.Value(i), true } return nil, false }, func(b array.Builder, v any) bool { + if v == nil { + b.AppendNull() + return true + } if builder, ok := b.(*array.BooleanBuilder); ok { if v, ok := v.(bool); ok { builder.Append(v) @@ -235,12 +248,19 @@ var serdeMap = func() map[schemapb.DataType]serdeEntry { return arrow.PrimitiveTypes.Int8 }, func(a arrow.Array, i int) (any, bool) { + if a.IsNull(i) { + return nil, true + } if arr, ok := a.(*array.Int8); ok && i < arr.Len() { return arr.Value(i), true } return nil, false }, func(b array.Builder, v any) bool { + if v == nil { + b.AppendNull() + return true + } if builder, ok := b.(*array.Int8Builder); ok { if v, ok := v.(int8); ok { builder.Append(v) @@ -258,12 +278,19 @@ var serdeMap = func() map[schemapb.DataType]serdeEntry { return arrow.PrimitiveTypes.Int16 }, func(a arrow.Array, i int) (any, bool) { + if a.IsNull(i) { + return nil, true + } if arr, ok := a.(*array.Int16); ok && i < arr.Len() { return arr.Value(i), true } return nil, false }, func(b array.Builder, v any) bool { + if v == nil { + b.AppendNull() + return true + } if builder, ok := b.(*array.Int16Builder); ok { if v, ok := v.(int16); ok { builder.Append(v) @@ -281,12 +308,19 @@ var serdeMap = func() map[schemapb.DataType]serdeEntry { return arrow.PrimitiveTypes.Int32 }, func(a arrow.Array, i int) (any, bool) { + if a.IsNull(i) { + return nil, true + } if arr, ok := a.(*array.Int32); ok && i < arr.Len() { return arr.Value(i), true } return nil, false }, func(b array.Builder, v any) bool { + if v == nil { + b.AppendNull() + return true + } if builder, ok := b.(*array.Int32Builder); ok { if v, ok := v.(int32); ok { builder.Append(v) @@ -304,12 +338,19 @@ var serdeMap = func() map[schemapb.DataType]serdeEntry { return arrow.PrimitiveTypes.Int64 }, func(a arrow.Array, i int) (any, bool) { + if a.IsNull(i) { + return nil, true + } if arr, ok := a.(*array.Int64); ok && i < arr.Len() { return arr.Value(i), true } return nil, false }, func(b array.Builder, v any) bool { + if v == nil { + b.AppendNull() + return true + } if builder, ok := b.(*array.Int64Builder); ok { if v, ok := v.(int64); ok { builder.Append(v) @@ -327,12 +368,19 @@ var serdeMap = func() map[schemapb.DataType]serdeEntry { return arrow.PrimitiveTypes.Float32 }, func(a arrow.Array, i int) (any, bool) { + if a.IsNull(i) { + return nil, true + } if arr, ok := a.(*array.Float32); ok && i < arr.Len() { return arr.Value(i), true } return nil, false }, func(b array.Builder, v any) bool { + if v == nil { + b.AppendNull() + return true + } if builder, ok := b.(*array.Float32Builder); ok { if v, ok := v.(float32); ok { builder.Append(v) @@ -350,12 +398,19 @@ var serdeMap = func() map[schemapb.DataType]serdeEntry { return arrow.PrimitiveTypes.Float64 }, func(a arrow.Array, i int) (any, bool) { + if a.IsNull(i) { + return nil, true + } if arr, ok := a.(*array.Float64); ok && i < arr.Len() { return arr.Value(i), true } return nil, false }, func(b array.Builder, v any) bool { + if v == nil { + b.AppendNull() + return true + } if builder, ok := b.(*array.Float64Builder); ok { if v, ok := v.(float64); ok { builder.Append(v) @@ -373,12 +428,19 @@ var serdeMap = func() map[schemapb.DataType]serdeEntry { return arrow.BinaryTypes.String }, func(a arrow.Array, i int) (any, bool) { + if a.IsNull(i) { + return nil, true + } if arr, ok := a.(*array.String); ok && i < arr.Len() { return arr.Value(i), true } return nil, false }, func(b array.Builder, v any) bool { + if v == nil { + b.AppendNull() + return true + } if builder, ok := b.(*array.StringBuilder); ok { if v, ok := v.(string); ok { builder.Append(v) @@ -388,6 +450,9 @@ var serdeMap = func() map[schemapb.DataType]serdeEntry { return false }, func(v any) uint64 { + if v == nil { + return 8 + } return uint64(len(v.(string))) }, } @@ -399,6 +464,9 @@ var serdeMap = func() map[schemapb.DataType]serdeEntry { return arrow.BinaryTypes.Binary }, func(a arrow.Array, i int) (any, bool) { + if a.IsNull(i) { + return nil, true + } if arr, ok := a.(*array.Binary); ok && i < arr.Len() { v := &schemapb.ScalarField{} if err := proto.Unmarshal(arr.Value(i), v); err == nil { @@ -408,6 +476,10 @@ var serdeMap = func() map[schemapb.DataType]serdeEntry { return nil, false }, func(b array.Builder, v any) bool { + if v == nil { + b.AppendNull() + return true + } if builder, ok := b.(*array.BinaryBuilder); ok { if vv, ok := v.(*schemapb.ScalarField); ok { if bytes, err := proto.Marshal(vv); err == nil { @@ -419,11 +491,17 @@ var serdeMap = func() map[schemapb.DataType]serdeEntry { return false }, func(v any) uint64 { + if v == nil { + return 8 + } return uint64(v.(*schemapb.ScalarField).XXX_Size()) }, } sizeOfBytes := func(v any) uint64 { + if v == nil { + return 8 + } return uint64(len(v.([]byte))) } @@ -432,12 +510,19 @@ var serdeMap = func() map[schemapb.DataType]serdeEntry { return arrow.BinaryTypes.Binary }, func(a arrow.Array, i int) (any, bool) { + if a.IsNull(i) { + return nil, true + } if arr, ok := a.(*array.Binary); ok && i < arr.Len() { return arr.Value(i), true } return nil, false }, func(b array.Builder, v any) bool { + if v == nil { + b.AppendNull() + return true + } if builder, ok := b.(*array.BinaryBuilder); ok { if v, ok := v.([]byte); ok { builder.Append(v) @@ -452,12 +537,19 @@ var serdeMap = func() map[schemapb.DataType]serdeEntry { m[schemapb.DataType_JSON] = byteEntry fixedSizeDeserializer := func(a arrow.Array, i int) (any, bool) { + if a.IsNull(i) { + return nil, true + } if arr, ok := a.(*array.FixedSizeBinary); ok && i < arr.Len() { return arr.Value(i), true } return nil, false } fixedSizeSerializer := func(b array.Builder, v any) bool { + if v == nil { + b.AppendNull() + return true + } if builder, ok := b.(*array.FixedSizeBinaryBuilder); ok { if v, ok := v.([]byte); ok { builder.Append(v) @@ -496,12 +588,19 @@ var serdeMap = func() map[schemapb.DataType]serdeEntry { return &arrow.FixedSizeBinaryType{ByteWidth: i * 4} }, func(a arrow.Array, i int) (any, bool) { + if a.IsNull(i) { + return nil, true + } if arr, ok := a.(*array.FixedSizeBinary); ok && i < arr.Len() { return arrow.Float32Traits.CastFromBytes(arr.Value(i)), true } return nil, false }, func(b array.Builder, v any) bool { + if v == nil { + b.AppendNull() + return true + } if builder, ok := b.(*array.FixedSizeBinaryBuilder); ok { if vv, ok := v.([]float32); ok { dim := len(vv) @@ -518,6 +617,9 @@ var serdeMap = func() map[schemapb.DataType]serdeEntry { return false }, func(v any) uint64 { + if v == nil { + return 8 + } return uint64(len(v.([]float32)) * 4) }, } @@ -639,11 +741,15 @@ func NewBinlogDeserializeReader(blobs []*Blob, PKfieldID UniqueID) (*Deserialize m := value.Value.(map[FieldID]interface{}) for j, dt := range r.Schema() { - d, ok := serdeMap[dt].deserialize(r.Column(j), i) - if ok { - m[j] = d // TODO: avoid memory copy here. + if r.Column(j).IsNull(i) { + m[j] = nil } else { - return errors.New(fmt.Sprintf("unexpected type %s", dt)) + d, ok := serdeMap[dt].deserialize(r.Column(j), i) + if ok { + m[j] = d // TODO: avoid memory copy here. + } else { + return errors.New(fmt.Sprintf("unexpected type %s", dt)) + } } } @@ -900,8 +1006,9 @@ func (bsw *BinlogStreamWriter) GetRecordWriter() (RecordWriter, error) { fid := bsw.fieldSchema.FieldID dim, _ := typeutil.GetDim(bsw.fieldSchema) rw, err := newSingleFieldRecordWriter(fid, arrow.Field{ - Name: strconv.Itoa(int(fid)), - Type: serdeMap[bsw.fieldSchema.DataType].arrowType(int(dim)), + Name: strconv.Itoa(int(fid)), + Type: serdeMap[bsw.fieldSchema.DataType].arrowType(int(dim)), + Nullable: true, // No nullable check here. }, &bsw.buf) if err != nil { return nil, err @@ -1028,8 +1135,9 @@ func NewBinlogSerializeWriter(schema *schemapb.CollectionSchema, partitionID, se arrays[i] = builder.NewArray() builder.Release() fields[i] = arrow.Field{ - Name: strconv.Itoa(int(fid)), - Type: arrays[i].DataType(), + Name: strconv.Itoa(int(fid)), + Type: arrays[i].DataType(), + Nullable: true, // No nullable check here. } field2Col[fid] = i i++ diff --git a/internal/storage/serde_test.go b/internal/storage/serde_test.go index 0d9306069d775..8cdf15b847532 100644 --- a/internal/storage/serde_test.go +++ b/internal/storage/serde_test.go @@ -178,6 +178,68 @@ func TestBinlogSerializeWriter(t *testing.T) { }) } +func TestNull(t *testing.T) { + t.Run("test null", func(t *testing.T) { + schema := generateTestSchema() + // Copy write the generated data + writers := NewBinlogStreamWriters(0, 0, 0, schema.Fields) + writer, err := NewBinlogSerializeWriter(schema, 0, 0, writers, 1024) + assert.NoError(t, err) + + m := make(map[FieldID]any) + m[common.RowIDField] = int64(0) + m[common.TimeStampField] = int64(0) + m[10] = nil + m[11] = nil + m[12] = nil + m[13] = nil + m[14] = nil + m[15] = nil + m[16] = nil + m[17] = nil + m[18] = nil + m[19] = nil + m[101] = nil + m[102] = nil + m[103] = nil + m[104] = nil + m[105] = nil + m[106] = nil + pk, err := GenPrimaryKeyByRawData(m[common.RowIDField], schemapb.DataType_Int64) + assert.NoError(t, err) + + value := &Value{ + ID: 0, + PK: pk, + Timestamp: 0, + IsDeleted: false, + Value: m, + } + writer.Write(value) + err = writer.Close() + assert.NoError(t, err) + + // Read from the written data + blobs := make([]*Blob, len(writers)) + i := 0 + for _, w := range writers { + blob, err := w.Finalize() + assert.NoError(t, err) + assert.NotNil(t, blob) + blobs[i] = blob + i++ + } + reader, err := NewBinlogDeserializeReader(blobs, common.RowIDField) + assert.NoError(t, err) + defer reader.Close() + err = reader.Next() + assert.NoError(t, err) + + readValue := reader.Value() + assert.Equal(t, value, readValue) + }) +} + func TestSerDe(t *testing.T) { type args struct { dt schemapb.DataType @@ -190,33 +252,45 @@ func TestSerDe(t *testing.T) { want1 bool }{ {"test bool", args{dt: schemapb.DataType_Bool, v: true}, true, true}, - {"test bool negative", args{dt: schemapb.DataType_Bool, v: nil}, nil, false}, + {"test bool null", args{dt: schemapb.DataType_Bool, v: nil}, nil, true}, + {"test bool negative", args{dt: schemapb.DataType_Bool, v: -1}, nil, false}, {"test int8", args{dt: schemapb.DataType_Int8, v: int8(1)}, int8(1), true}, - {"test int8 negative", args{dt: schemapb.DataType_Int8, v: nil}, nil, false}, + {"test int8 null", args{dt: schemapb.DataType_Int8, v: nil}, nil, true}, + {"test int8 negative", args{dt: schemapb.DataType_Int8, v: true}, nil, false}, {"test int16", args{dt: schemapb.DataType_Int16, v: int16(1)}, int16(1), true}, - {"test int16 negative", args{dt: schemapb.DataType_Int16, v: nil}, nil, false}, + {"test int16 null", args{dt: schemapb.DataType_Int16, v: nil}, nil, true}, + {"test int16 negative", args{dt: schemapb.DataType_Int16, v: true}, nil, false}, {"test int32", args{dt: schemapb.DataType_Int32, v: int32(1)}, int32(1), true}, - {"test int32 negative", args{dt: schemapb.DataType_Int32, v: nil}, nil, false}, + {"test int32 null", args{dt: schemapb.DataType_Int32, v: nil}, nil, true}, + {"test int32 negative", args{dt: schemapb.DataType_Int32, v: true}, nil, false}, {"test int64", args{dt: schemapb.DataType_Int64, v: int64(1)}, int64(1), true}, - {"test int64 negative", args{dt: schemapb.DataType_Int64, v: nil}, nil, false}, + {"test int64 null", args{dt: schemapb.DataType_Int64, v: nil}, nil, true}, + {"test int64 negative", args{dt: schemapb.DataType_Int64, v: true}, nil, false}, {"test float32", args{dt: schemapb.DataType_Float, v: float32(1)}, float32(1), true}, - {"test float32 negative", args{dt: schemapb.DataType_Float, v: nil}, nil, false}, + {"test float32 null", args{dt: schemapb.DataType_Float, v: nil}, nil, true}, + {"test float32 negative", args{dt: schemapb.DataType_Float, v: -1}, nil, false}, {"test float64", args{dt: schemapb.DataType_Double, v: float64(1)}, float64(1), true}, - {"test float64 negative", args{dt: schemapb.DataType_Double, v: nil}, nil, false}, + {"test float64 null", args{dt: schemapb.DataType_Double, v: nil}, nil, true}, + {"test float64 negative", args{dt: schemapb.DataType_Double, v: -1}, nil, false}, {"test string", args{dt: schemapb.DataType_String, v: "test"}, "test", true}, - {"test string negative", args{dt: schemapb.DataType_String, v: nil}, nil, false}, + {"test string null", args{dt: schemapb.DataType_String, v: nil}, nil, true}, + {"test string negative", args{dt: schemapb.DataType_String, v: -1}, nil, false}, {"test varchar", args{dt: schemapb.DataType_VarChar, v: "test"}, "test", true}, - {"test varchar negative", args{dt: schemapb.DataType_VarChar, v: nil}, nil, false}, + {"test varchar null", args{dt: schemapb.DataType_VarChar, v: nil}, nil, true}, + {"test varchar negative", args{dt: schemapb.DataType_VarChar, v: -1}, nil, false}, {"test array negative", args{dt: schemapb.DataType_Array, v: "{}"}, nil, false}, - {"test array negative null", args{dt: schemapb.DataType_Array, v: nil}, nil, false}, + {"test array null", args{dt: schemapb.DataType_Array, v: nil}, nil, true}, {"test json", args{dt: schemapb.DataType_JSON, v: []byte("{}")}, []byte("{}"), true}, - {"test json negative", args{dt: schemapb.DataType_JSON, v: nil}, nil, false}, + {"test json null", args{dt: schemapb.DataType_JSON, v: nil}, nil, true}, + {"test json negative", args{dt: schemapb.DataType_JSON, v: -1}, nil, false}, {"test float vector", args{dt: schemapb.DataType_FloatVector, v: []float32{1.0}}, []float32{1.0}, true}, - {"test float vector negative", args{dt: schemapb.DataType_FloatVector, v: nil}, nil, false}, + {"test float vector null", args{dt: schemapb.DataType_FloatVector, v: nil}, nil, true}, + {"test float vector negative", args{dt: schemapb.DataType_FloatVector, v: []int{1}}, nil, false}, {"test bool vector", args{dt: schemapb.DataType_BinaryVector, v: []byte{0xff}}, []byte{0xff}, true}, {"test float16 vector", args{dt: schemapb.DataType_Float16Vector, v: []byte{0xff, 0xff}}, []byte{0xff, 0xff}, true}, {"test bfloat16 vector", args{dt: schemapb.DataType_BFloat16Vector, v: []byte{0xff, 0xff}}, []byte{0xff, 0xff}, true}, - {"test bfloat16 vector negative", args{dt: schemapb.DataType_BFloat16Vector, v: nil}, nil, false}, + {"test bfloat16 vector null", args{dt: schemapb.DataType_BFloat16Vector, v: nil}, nil, true}, + {"test bfloat16 vector negative", args{dt: schemapb.DataType_BFloat16Vector, v: -1}, nil, false}, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { From 59a7a46904300a7f251fcc646f1074cb6fd29cdf Mon Sep 17 00:00:00 2001 From: aoiasd <45024769+aoiasd@users.noreply.github.com> Date: Mon, 27 May 2024 18:15:43 +0800 Subject: [PATCH 079/126] enhance: Merge query stream result for reduce delete task (#32855) relate: https://github.com/milvus-io/milvus/issues/32854 --------- Signed-off-by: aoiasd --- configs/milvus.yaml | 1 + internal/querynodev2/handlers.go | 2 +- internal/querynodev2/server.go | 4 +- .../querynodev2/tasks/query_stream_task.go | 8 +- internal/util/streamrpc/streamer.go | 119 ++++++++++++++++++ internal/util/streamrpc/streamer_test.go | 84 +++++++++++++ pkg/util/paramtable/component_param.go | 10 ++ 7 files changed, 225 insertions(+), 3 deletions(-) create mode 100644 internal/util/streamrpc/streamer_test.go diff --git a/configs/milvus.yaml b/configs/milvus.yaml index 91cfcc554a86b..5d630ebc0ef98 100644 --- a/configs/milvus.yaml +++ b/configs/milvus.yaml @@ -377,6 +377,7 @@ queryNode: maxQueueLength: 16 # Maximum length of task queue in flowgraph maxParallelism: 1024 # Maximum number of tasks executed in parallel in the flowgraph enableSegmentPrune: false # use partition prune function on shard delegator + queryStreamBatchSize: 4194304 # return batch size of stream query ip: # if not specified, use the first unicastable address port: 21123 grpc: diff --git a/internal/querynodev2/handlers.go b/internal/querynodev2/handlers.go index 1b25fd7c254af..170af4c39e6a1 100644 --- a/internal/querynodev2/handlers.go +++ b/internal/querynodev2/handlers.go @@ -317,7 +317,7 @@ func (node *QueryNode) queryStreamSegments(ctx context.Context, req *querypb.Que } // Send task to scheduler and wait until it finished. - task := tasks.NewQueryStreamTask(ctx, collection, node.manager, req, srv) + task := tasks.NewQueryStreamTask(ctx, collection, node.manager, req, srv, node.streamBatchSzie) if err := node.scheduler.Add(task); err != nil { log.Warn("failed to add query task into scheduler", zap.Error(err)) return err diff --git a/internal/querynodev2/server.go b/internal/querynodev2/server.go index d142d72f2b5af..c9a3d5cf42355 100644 --- a/internal/querynodev2/server.go +++ b/internal/querynodev2/server.go @@ -111,7 +111,8 @@ type QueryNode struct { loader segments.Loader // Search/Query - scheduler tasks.Scheduler + scheduler tasks.Scheduler + streamBatchSzie int // etcd client etcdCli *clientv3.Client @@ -328,6 +329,7 @@ func (node *QueryNode) Init() error { node.scheduler = tasks.NewScheduler( schedulePolicy, ) + node.streamBatchSzie = paramtable.Get().QueryNodeCfg.QueryStreamBatchSize.GetAsInt() log.Info("queryNode init scheduler", zap.String("policy", schedulePolicy)) node.clusterManager = cluster.NewWorkerManager(func(ctx context.Context, nodeID int64) (cluster.Worker, error) { diff --git a/internal/querynodev2/tasks/query_stream_task.go b/internal/querynodev2/tasks/query_stream_task.go index 5840efa6c1ce7..6c85535bbe0a9 100644 --- a/internal/querynodev2/tasks/query_stream_task.go +++ b/internal/querynodev2/tasks/query_stream_task.go @@ -16,6 +16,7 @@ func NewQueryStreamTask(ctx context.Context, manager *segments.Manager, req *querypb.QueryRequest, srv streamrpc.QueryStreamServer, + streamBatchSize int, ) *QueryStreamTask { return &QueryStreamTask{ ctx: ctx, @@ -23,6 +24,7 @@ func NewQueryStreamTask(ctx context.Context, segmentManager: manager, req: req, srv: srv, + batchSize: streamBatchSize, notifier: make(chan error, 1), } } @@ -33,6 +35,7 @@ type QueryStreamTask struct { segmentManager *segments.Manager req *querypb.QueryRequest srv streamrpc.QueryStreamServer + batchSize int notifier chan error } @@ -64,7 +67,10 @@ func (t *QueryStreamTask) Execute() error { } defer retrievePlan.Delete() - segments, err := segments.RetrieveStream(t.ctx, t.segmentManager, retrievePlan, t.req, t.srv) + srv := streamrpc.NewResultCacheServer(t.srv, t.batchSize) + defer srv.Flush() + + segments, err := segments.RetrieveStream(t.ctx, t.segmentManager, retrievePlan, t.req, srv) defer t.segmentManager.Segment.Unpin(segments) if err != nil { return err diff --git a/internal/util/streamrpc/streamer.go b/internal/util/streamrpc/streamer.go index 53571672eeb8c..79f47c8bc3c52 100644 --- a/internal/util/streamrpc/streamer.go +++ b/internal/util/streamrpc/streamer.go @@ -5,8 +5,10 @@ import ( "io" "sync" + "github.com/golang/protobuf/proto" "google.golang.org/grpc" + "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" "github.com/milvus-io/milvus/internal/proto/internalpb" ) @@ -42,6 +44,123 @@ func NewConcurrentQueryStreamServer(srv QueryStreamServer) *ConcurrentQueryStrea } } +type RetrieveResultCache struct { + result *internalpb.RetrieveResults + size int + cap int +} + +func (c *RetrieveResultCache) Put(result *internalpb.RetrieveResults) { + if c.result == nil { + c.result = result + c.size = proto.Size(result) + return + } + + c.merge(result) +} + +func (c *RetrieveResultCache) Flush() *internalpb.RetrieveResults { + result := c.result + c.result = nil + c.size = 0 + return result +} + +func (c *RetrieveResultCache) Alloc(result *internalpb.RetrieveResults) bool { + return proto.Size(result)+c.size <= c.cap +} + +func (c *RetrieveResultCache) IsFull() bool { + return c.size > c.cap +} + +func (c *RetrieveResultCache) IsEmpty() bool { + return c.size == 0 +} + +func (c *RetrieveResultCache) merge(result *internalpb.RetrieveResults) { + switch result.GetIds().GetIdField().(type) { + case *schemapb.IDs_IntId: + c.result.GetIds().GetIntId().Data = append(c.result.GetIds().GetIntId().GetData(), result.GetIds().GetIntId().GetData()...) + case *schemapb.IDs_StrId: + c.result.GetIds().GetStrId().Data = append(c.result.GetIds().GetStrId().GetData(), result.GetIds().GetStrId().GetData()...) + } + c.result.AllRetrieveCount = c.result.AllRetrieveCount + result.AllRetrieveCount + c.result.CostAggregation = mergeCostAggregation(c.result.GetCostAggregation(), result.GetCostAggregation()) + c.size = proto.Size(c.result) +} + +func mergeCostAggregation(a *internalpb.CostAggregation, b *internalpb.CostAggregation) *internalpb.CostAggregation { + if a == nil { + return b + } + if b == nil { + return a + } + + return &internalpb.CostAggregation{ + ResponseTime: a.GetResponseTime() + b.GetResponseTime(), + ServiceTime: a.GetServiceTime() + b.GetServiceTime(), + TotalNQ: a.GetTotalNQ() + b.GetTotalNQ(), + TotalRelatedDataSize: a.GetTotalRelatedDataSize() + b.GetTotalRelatedDataSize(), + } +} + +// Merge result by size and time. +type ResultCacheServer struct { + srv QueryStreamServer + cache *RetrieveResultCache + mu sync.Mutex +} + +func NewResultCacheServer(srv QueryStreamServer, cap int) *ResultCacheServer { + return &ResultCacheServer{ + srv: srv, + cache: &RetrieveResultCache{cap: cap}, + } +} + +func (s *ResultCacheServer) Send(result *internalpb.RetrieveResults) error { + s.mu.Lock() + defer s.mu.Unlock() + + if !s.cache.Alloc(result) && !s.cache.IsEmpty() { + result := s.cache.Flush() + if err := s.srv.Send(result); err != nil { + return err + } + } + + s.cache.Put(result) + if s.cache.IsFull() { + result := s.cache.Flush() + if err := s.srv.Send(result); err != nil { + return err + } + } + return nil +} + +func (s *ResultCacheServer) Flush() error { + s.mu.Lock() + defer s.mu.Unlock() + + result := s.cache.Flush() + if result == nil { + return nil + } + + if err := s.srv.Send(result); err != nil { + return err + } + return nil +} + +func (s *ResultCacheServer) Context() context.Context { + return s.srv.Context() +} + // TODO LOCAL SERVER AND CLIENT FOR STANDALONE // ONLY FOR TEST type LocalQueryServer struct { diff --git a/internal/util/streamrpc/streamer_test.go b/internal/util/streamrpc/streamer_test.go new file mode 100644 index 0000000000000..de1482adb9c12 --- /dev/null +++ b/internal/util/streamrpc/streamer_test.go @@ -0,0 +1,84 @@ +// Licensed to the LF AI & Data foundation under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package streamrpc + +import ( + "context" + "testing" + + "github.com/stretchr/testify/suite" + + "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" + "github.com/milvus-io/milvus/internal/proto/internalpb" +) + +type ResultCacheServerSuite struct { + suite.Suite +} + +func (s *ResultCacheServerSuite) TestSend() { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + client := NewLocalQueryClient(ctx) + srv := client.CreateServer() + cacheSrv := NewResultCacheServer(srv, 1024) + + err := cacheSrv.Send(&internalpb.RetrieveResults{ + Ids: &schemapb.IDs{ + IdField: &schemapb.IDs_IntId{IntId: &schemapb.LongArray{Data: []int64{1, 2, 3}}}, + }, + }) + s.NoError(err) + s.False(cacheSrv.cache.IsEmpty()) + + err = cacheSrv.Send(&internalpb.RetrieveResults{ + Ids: &schemapb.IDs{ + IdField: &schemapb.IDs_IntId{IntId: &schemapb.LongArray{Data: []int64{4, 5, 6}}}, + }, + }) + s.NoError(err) + + err = cacheSrv.Flush() + s.NoError(err) + s.True(cacheSrv.cache.IsEmpty()) + + msg, err := client.Recv() + s.NoError(err) + // Data: []int64{1,2,3,4,5,6} + s.Equal(6, len(msg.GetIds().GetIntId().GetData())) +} + +func (s *ResultCacheServerSuite) TestMerge() { + s.Nil(mergeCostAggregation(nil, nil)) + + cost := &internalpb.CostAggregation{} + s.Equal(cost, mergeCostAggregation(nil, cost)) + s.Equal(cost, mergeCostAggregation(cost, nil)) + + a := &internalpb.CostAggregation{ResponseTime: 1, ServiceTime: 1, TotalNQ: 1, TotalRelatedDataSize: 1} + b := &internalpb.CostAggregation{ResponseTime: 2, ServiceTime: 2, TotalNQ: 2, TotalRelatedDataSize: 2} + c := mergeCostAggregation(a, b) + s.Equal(int64(3), c.ResponseTime) + s.Equal(int64(3), c.ServiceTime) + s.Equal(int64(3), c.TotalNQ) + s.Equal(int64(3), c.TotalRelatedDataSize) +} + +func TestResultCacheServerSuite(t *testing.T) { + suite.Run(t, new(ResultCacheServerSuite)) +} diff --git a/pkg/util/paramtable/component_param.go b/pkg/util/paramtable/component_param.go index 317ad937f8b1d..c74bbefd352e6 100644 --- a/pkg/util/paramtable/component_param.go +++ b/pkg/util/paramtable/component_param.go @@ -2100,6 +2100,7 @@ type queryNodeConfig struct { EnableSegmentPrune ParamItem `refreshable:"false"` DefaultSegmentFilterRatio ParamItem `refreshable:"false"` UseStreamComputing ParamItem `refreshable:"false"` + QueryStreamBatchSize ParamItem `refreshable:"false"` } func (p *queryNodeConfig) init(base *BaseTable) { @@ -2683,6 +2684,15 @@ user-task-polling: Doc: "use stream search mode when searching or querying", } p.UseStreamComputing.Init(base.mgr) + + p.QueryStreamBatchSize = ParamItem{ + Key: "queryNode.queryStreamBatchSize", + Version: "2.4.1", + DefaultValue: "4194304", + Doc: "return batch size of stream query", + Export: true, + } + p.QueryStreamBatchSize.Init(base.mgr) } // ///////////////////////////////////////////////////////////////////////////// From 1973ebbc420042592159a5c085516d10aba5583e Mon Sep 17 00:00:00 2001 From: SimFG Date: Mon, 27 May 2024 18:17:42 +0800 Subject: [PATCH 080/126] fix: quotaCenter collect metrics failed because the wrong param (#33398) - issue: #33397 Signed-off-by: SimFG --- internal/rootcoord/quota_center.go | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/internal/rootcoord/quota_center.go b/internal/rootcoord/quota_center.go index ae12835d29e90..0e099438933cf 100644 --- a/internal/rootcoord/quota_center.go +++ b/internal/rootcoord/quota_center.go @@ -437,10 +437,8 @@ func (q *QuotaCenter) collectMetrics() error { q.diskMu.Lock() if dataCoordTopology.Cluster.Self.QuotaMetrics != nil { q.dataCoordMetrics = dataCoordTopology.Cluster.Self.QuotaMetrics - for _, metricCollections := range q.dataCoordMetrics.PartitionsBinlogSize { - for metricCollection := range metricCollections { - datacoordQuotaCollections = append(datacoordQuotaCollections, metricCollection) - } + for metricCollection := range q.dataCoordMetrics.PartitionsBinlogSize { + datacoordQuotaCollections = append(datacoordQuotaCollections, metricCollection) } } q.diskMu.Unlock() From 0d3272ed6d25f96c6f1c2ccef6a706195f8e0177 Mon Sep 17 00:00:00 2001 From: jaime Date: Mon, 27 May 2024 19:06:11 +0800 Subject: [PATCH 081/126] enhance: refine logs of cgo pool (#33373) Signed-off-by: jaime --- internal/querynodev2/segments/pool.go | 3 +++ internal/querynodev2/segments/segment.go | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/internal/querynodev2/segments/pool.go b/internal/querynodev2/segments/pool.go index cb025d8c749a3..5a0685082e4e1 100644 --- a/internal/querynodev2/segments/pool.go +++ b/internal/querynodev2/segments/pool.go @@ -62,6 +62,7 @@ func initSQPool() { pt.Watch(pt.QueryNodeCfg.MaxReadConcurrency.Key, config.NewHandler("qn.sqpool.maxconc", ResizeSQPool)) pt.Watch(pt.QueryNodeCfg.CGOPoolSizeRatio.Key, config.NewHandler("qn.sqpool.cgopoolratio", ResizeSQPool)) + log.Info("init SQPool done", zap.Int("size", initPoolSize)) }) } @@ -75,6 +76,7 @@ func initDynamicPool() { ) dp.Store(pool) + log.Info("init dynamicPool done", zap.Int("size", hardware.GetCPUNum())) }) } @@ -92,6 +94,7 @@ func initLoadPool() { loadPool.Store(pool) pt.Watch(pt.CommonCfg.MiddlePriorityThreadCoreCoefficient.Key, config.NewHandler("qn.loadpool.middlepriority", ResizeLoadPool)) + log.Info("init loadPool done", zap.Int("size", poolSize)) }) } diff --git a/internal/querynodev2/segments/segment.go b/internal/querynodev2/segments/segment.go index 614f3102213c1..13ae49c91aa11 100644 --- a/internal/querynodev2/segments/segment.go +++ b/internal/querynodev2/segments/segment.go @@ -1308,7 +1308,7 @@ func (s *LocalSegment) LoadIndex(ctx context.Context, indexInfo *querypb.FieldIn zap.Duration("newLoadIndexInfoSpan", newLoadIndexInfoSpan), zap.Duration("appendLoadIndexInfoSpan", appendLoadIndexInfoSpan), zap.Duration("updateIndexInfoSpan", updateIndexInfoSpan), - zap.Duration("updateIndexInfoSpan", warmupChunkCacheSpan), + zap.Duration("warmupChunkCacheSpan", warmupChunkCacheSpan), ) return nil } From a59ed65c48a41fde97670bdc4cde6a2bc7d3230a Mon Sep 17 00:00:00 2001 From: ArenaSu <704427617@qq.com> Date: Mon, 27 May 2024 20:51:42 +0800 Subject: [PATCH 082/126] doc: [skip-e2e] add comments for task queue (#33388) Add comments for task queue in internal/proxy/task_scheduler.go. Signed-off-by: ArenaSu <704427617@qq.com> --- internal/proxy/task_scheduler.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/internal/proxy/task_scheduler.go b/internal/proxy/task_scheduler.go index 9d8b899f81188..6c3adbfda34de 100644 --- a/internal/proxy/task_scheduler.go +++ b/internal/proxy/task_scheduler.go @@ -219,6 +219,7 @@ func newBaseTaskQueue(tsoAllocatorIns tsoAllocator) *baseTaskQueue { } } +// ddTaskQueue represents queue for DDL task such as createCollection/createPartition/dropCollection/dropPartition/hasCollection/hasPartition type ddTaskQueue struct { *baseTaskQueue lock sync.Mutex @@ -229,6 +230,7 @@ type pChanStatInfo struct { tsSet map[Timestamp]struct{} } +// dmTaskQueue represents queue for DML task such as insert/delete/upsert type dmTaskQueue struct { *baseTaskQueue @@ -351,6 +353,7 @@ func (queue *dmTaskQueue) getPChanStatsInfo() (map[pChan]*pChanStatistics, error return ret, nil } +// dqTaskQueue represents queue for DQL task such as search/query type dqTaskQueue struct { *baseTaskQueue } From 036b68e710d10428e3d6c24c6ed6005768a4088d Mon Sep 17 00:00:00 2001 From: sre-ci-robot Date: Tue, 28 May 2024 00:01:18 +0000 Subject: [PATCH 083/126] Update all contributors Signed-off-by: sre-ci-robot --- README.md | 3 ++- README_CN.md | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 9e0bf7607554a..b5420708f36e0 100644 --- a/README.md +++ b/README.md @@ -169,7 +169,7 @@ Contributions to Milvus are welcome from everyone. See [Guidelines for Contribut ### All contributors
-
+
@@ -179,6 +179,7 @@ Contributions to Milvus are welcome from everyone. See [Guidelines for Contribut + diff --git a/README_CN.md b/README_CN.md index 26207c0f21fbb..4688953871b4c 100644 --- a/README_CN.md +++ b/README_CN.md @@ -154,7 +154,7 @@ Milvus [训练营](https://github.com/milvus-io/bootcamp)能够帮助你了解 ### All contributors
-
+
@@ -164,6 +164,7 @@ Milvus [训练营](https://github.com/milvus-io/bootcamp)能够帮助你了解 + From 2453181218b25ae6281b3267032c4eafc7384431 Mon Sep 17 00:00:00 2001 From: SimFG Date: Tue, 28 May 2024 10:09:48 +0800 Subject: [PATCH 084/126] fix: not found database name in the datacoord meta object (#33411) - issue: #33410 Signed-off-by: SimFG --- internal/datacoord/meta.go | 38 +++++++++++++ internal/datacoord/meta_test.go | 88 +++++++++++++++++++++++++++++ internal/datacoord/mock_test.go | 4 +- internal/datacoord/server.go | 8 +++ internal/datacoord/server_test.go | 14 +++-- internal/datacoord/services_test.go | 2 + 6 files changed, 149 insertions(+), 5 deletions(-) diff --git a/internal/datacoord/meta.go b/internal/datacoord/meta.go index 065802a6343f6..434800bcb54af 100644 --- a/internal/datacoord/meta.go +++ b/internal/datacoord/meta.go @@ -32,6 +32,7 @@ import ( "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" "github.com/milvus-io/milvus-proto/go-api/v2/msgpb" "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" + "github.com/milvus-io/milvus/internal/datacoord/broker" "github.com/milvus-io/milvus/internal/metastore" "github.com/milvus-io/milvus/internal/proto/datapb" "github.com/milvus-io/milvus/internal/storage" @@ -39,6 +40,7 @@ import ( "github.com/milvus-io/milvus/pkg/common" "github.com/milvus-io/milvus/pkg/log" "github.com/milvus-io/milvus/pkg/metrics" + "github.com/milvus-io/milvus/pkg/util/funcutil" "github.com/milvus-io/milvus/pkg/util/lock" "github.com/milvus-io/milvus/pkg/util/merr" "github.com/milvus-io/milvus/pkg/util/metautil" @@ -162,6 +164,42 @@ func (m *meta) reloadFromKV() error { return nil } +func (m *meta) reloadCollectionsFromRootcoord(ctx context.Context, broker broker.Broker) error { + resp, err := broker.ListDatabases(ctx) + if err != nil { + return err + } + for _, dbName := range resp.GetDbNames() { + resp, err := broker.ShowCollections(ctx, dbName) + if err != nil { + return err + } + for _, collectionID := range resp.GetCollectionIds() { + resp, err := broker.DescribeCollectionInternal(ctx, collectionID) + if err != nil { + return err + } + partitionIDs, err := broker.ShowPartitionsInternal(ctx, collectionID) + if err != nil { + return err + } + collection := &collectionInfo{ + ID: collectionID, + Schema: resp.GetSchema(), + Partitions: partitionIDs, + StartPositions: resp.GetStartPositions(), + Properties: funcutil.KeyValuePair2Map(resp.GetProperties()), + CreatedAt: resp.GetCreatedTimestamp(), + DatabaseName: resp.GetDbName(), + DatabaseID: resp.GetDbId(), + VChannelNames: resp.GetVirtualChannelNames(), + } + m.AddCollection(collection) + } + } + return nil +} + // AddCollection adds a collection into meta // Note that collection info is just for caching and will not be set into etcd from datacoord func (m *meta) AddCollection(collection *collectionInfo) { diff --git a/internal/datacoord/meta_test.go b/internal/datacoord/meta_test.go index 5d3bab7aef66d..dd0471eebcddf 100644 --- a/internal/datacoord/meta_test.go +++ b/internal/datacoord/meta_test.go @@ -27,7 +27,9 @@ import ( "github.com/stretchr/testify/suite" "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" + "github.com/milvus-io/milvus-proto/go-api/v2/milvuspb" "github.com/milvus-io/milvus-proto/go-api/v2/msgpb" + "github.com/milvus-io/milvus/internal/datacoord/broker" "github.com/milvus-io/milvus/internal/kv" mockkv "github.com/milvus-io/milvus/internal/kv/mocks" "github.com/milvus-io/milvus/internal/metastore/kv/datacoord" @@ -1129,3 +1131,89 @@ func Test_meta_GcConfirm(t *testing.T) { assert.False(t, m.GcConfirm(context.TODO(), 100, 10000)) } + +func Test_meta_ReloadCollectionsFromRootcoords(t *testing.T) { + t.Run("fail to list database", func(t *testing.T) { + m := &meta{ + collections: make(map[UniqueID]*collectionInfo), + } + mockBroker := broker.NewMockBroker(t) + mockBroker.EXPECT().ListDatabases(mock.Anything).Return(nil, errors.New("list database failed, mocked")) + err := m.reloadCollectionsFromRootcoord(context.TODO(), mockBroker) + assert.Error(t, err) + }) + + t.Run("fail to show collections", func(t *testing.T) { + m := &meta{ + collections: make(map[UniqueID]*collectionInfo), + } + mockBroker := broker.NewMockBroker(t) + + mockBroker.EXPECT().ListDatabases(mock.Anything).Return(&milvuspb.ListDatabasesResponse{ + DbNames: []string{"db1"}, + }, nil) + mockBroker.EXPECT().ShowCollections(mock.Anything, mock.Anything).Return(nil, errors.New("show collections failed, mocked")) + err := m.reloadCollectionsFromRootcoord(context.TODO(), mockBroker) + assert.Error(t, err) + }) + + t.Run("fail to describe collection", func(t *testing.T) { + m := &meta{ + collections: make(map[UniqueID]*collectionInfo), + } + mockBroker := broker.NewMockBroker(t) + + mockBroker.EXPECT().ListDatabases(mock.Anything).Return(&milvuspb.ListDatabasesResponse{ + DbNames: []string{"db1"}, + }, nil) + mockBroker.EXPECT().ShowCollections(mock.Anything, mock.Anything).Return(&milvuspb.ShowCollectionsResponse{ + CollectionNames: []string{"coll1"}, + CollectionIds: []int64{1000}, + }, nil) + mockBroker.EXPECT().DescribeCollectionInternal(mock.Anything, mock.Anything).Return(nil, errors.New("describe collection failed, mocked")) + err := m.reloadCollectionsFromRootcoord(context.TODO(), mockBroker) + assert.Error(t, err) + }) + + t.Run("fail to show partitions", func(t *testing.T) { + m := &meta{ + collections: make(map[UniqueID]*collectionInfo), + } + mockBroker := broker.NewMockBroker(t) + + mockBroker.EXPECT().ListDatabases(mock.Anything).Return(&milvuspb.ListDatabasesResponse{ + DbNames: []string{"db1"}, + }, nil) + mockBroker.EXPECT().ShowCollections(mock.Anything, mock.Anything).Return(&milvuspb.ShowCollectionsResponse{ + CollectionNames: []string{"coll1"}, + CollectionIds: []int64{1000}, + }, nil) + mockBroker.EXPECT().DescribeCollectionInternal(mock.Anything, mock.Anything).Return(&milvuspb.DescribeCollectionResponse{}, nil) + mockBroker.EXPECT().ShowPartitionsInternal(mock.Anything, mock.Anything).Return(nil, errors.New("show partitions failed, mocked")) + err := m.reloadCollectionsFromRootcoord(context.TODO(), mockBroker) + assert.Error(t, err) + }) + + t.Run("success", func(t *testing.T) { + m := &meta{ + collections: make(map[UniqueID]*collectionInfo), + } + mockBroker := broker.NewMockBroker(t) + + mockBroker.EXPECT().ListDatabases(mock.Anything).Return(&milvuspb.ListDatabasesResponse{ + DbNames: []string{"db1"}, + }, nil) + mockBroker.EXPECT().ShowCollections(mock.Anything, mock.Anything).Return(&milvuspb.ShowCollectionsResponse{ + CollectionNames: []string{"coll1"}, + CollectionIds: []int64{1000}, + }, nil) + mockBroker.EXPECT().DescribeCollectionInternal(mock.Anything, mock.Anything).Return(&milvuspb.DescribeCollectionResponse{ + CollectionID: 1000, + }, nil) + mockBroker.EXPECT().ShowPartitionsInternal(mock.Anything, mock.Anything).Return([]int64{2000}, nil) + err := m.reloadCollectionsFromRootcoord(context.TODO(), mockBroker) + assert.NoError(t, err) + c := m.GetCollection(UniqueID(1000)) + assert.NotNil(t, c) + }) +} diff --git a/internal/datacoord/mock_test.go b/internal/datacoord/mock_test.go index bac8735fd394a..78d4ca4a78b58 100644 --- a/internal/datacoord/mock_test.go +++ b/internal/datacoord/mock_test.go @@ -461,7 +461,9 @@ func (m *mockRootCoordClient) DropDatabase(ctx context.Context, in *milvuspb.Dro } func (m *mockRootCoordClient) ListDatabases(ctx context.Context, in *milvuspb.ListDatabasesRequest, opts ...grpc.CallOption) (*milvuspb.ListDatabasesResponse, error) { - panic("not implemented") // TODO: Implement + return &milvuspb.ListDatabasesResponse{ + Status: merr.Success(), + }, nil } func (m *mockRootCoordClient) AlterDatabase(ctx context.Context, in *rootcoordpb.AlterDatabaseRequest, opts ...grpc.CallOption) (*commonpb.Status, error) { diff --git a/internal/datacoord/server.go b/internal/datacoord/server.go index 85c6535637d41..94c416e4b8e98 100644 --- a/internal/datacoord/server.go +++ b/internal/datacoord/server.go @@ -674,6 +674,14 @@ func (s *Server) initMeta(chunkManager storage.ChunkManager) error { if err != nil { return err } + + // Load collection information asynchronously + // HINT: please make sure this is the last step in the `reloadEtcdFn` function !!! + go func() { + _ = retry.Do(s.ctx, func() error { + return s.meta.reloadCollectionsFromRootcoord(s.ctx, s.broker) + }, retry.Sleep(time.Second), retry.Attempts(connMetaMaxRetryTime)) + }() return nil } return retry.Do(s.ctx, reloadEtcdFn, retry.Attempts(connMetaMaxRetryTime)) diff --git a/internal/datacoord/server_test.go b/internal/datacoord/server_test.go index c32dc623033df..feb3d0eabedef 100644 --- a/internal/datacoord/server_test.go +++ b/internal/datacoord/server_test.go @@ -42,6 +42,7 @@ import ( "github.com/milvus-io/milvus-proto/go-api/v2/msgpb" "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" "github.com/milvus-io/milvus/internal/datacoord/broker" + etcdkv "github.com/milvus-io/milvus/internal/kv/etcd" "github.com/milvus-io/milvus/internal/metastore/model" "github.com/milvus-io/milvus/internal/mocks" "github.com/milvus-io/milvus/internal/proto/datapb" @@ -3018,6 +3019,12 @@ var globalTestTikv = tikv.SetupLocalTxn() func WithMeta(meta *meta) Option { return func(svr *Server) { svr.meta = meta + + svr.watchClient = etcdkv.NewEtcdKV(svr.etcdCli, Params.EtcdCfg.MetaRootPath.GetValue(), + etcdkv.WithRequestTimeout(paramtable.Get().ServiceParam.EtcdCfg.RequestTimeout.GetAsDuration(time.Millisecond))) + metaRootPath := Params.EtcdCfg.MetaRootPath.GetValue() + svr.kv = etcdkv.NewEtcdKV(svr.etcdCli, metaRootPath, + etcdkv.WithRequestTimeout(paramtable.Get().ServiceParam.EtcdCfg.RequestTimeout.GetAsDuration(time.Millisecond))) } } @@ -3049,6 +3056,9 @@ func newTestServer(t *testing.T, opts ...Option) *Server { svr.rootCoordClientCreator = func(ctx context.Context) (types.RootCoordClient, error) { return newMockRootCoordClient(), nil } + for _, opt := range opts { + opt(svr) + } err = svr.Init() assert.NoError(t, err) @@ -3072,10 +3082,6 @@ func newTestServer(t *testing.T, opts ...Option) *Server { close(signal) } - for _, opt := range opts { - opt(svr) - } - err = svr.Register() assert.NoError(t, err) <-signal diff --git a/internal/datacoord/services_test.go b/internal/datacoord/services_test.go index 4db96c9a05f6b..ddb813acfa922 100644 --- a/internal/datacoord/services_test.go +++ b/internal/datacoord/services_test.go @@ -44,7 +44,9 @@ type ServerSuite struct { func WithChannelManager(cm ChannelManager) Option { return func(svr *Server) { + svr.sessionManager = NewSessionManagerImpl(withSessionCreator(svr.dataNodeCreator)) svr.channelManager = cm + svr.cluster = NewClusterImpl(svr.sessionManager, svr.channelManager) } } From 5e39aa9272ee43471a319e192c58bf25bf40d7f1 Mon Sep 17 00:00:00 2001 From: XuanYang-cn Date: Tue, 28 May 2024 12:33:42 +0800 Subject: [PATCH 085/126] enhance: Make channel meta able to writer 200k plus segments (#33279) See also: #33125 --------- Signed-off-by: yangxuan --- internal/datacoord/channel_manager_test.go | 6 +++- internal/datacoord/channel_manager_v2.go | 20 +++++++++-- internal/datacoord/channel_store.go | 7 +++- internal/datacoord/channel_store_test.go | 4 +++ internal/datacoord/channel_store_v2.go | 3 +- internal/datacoord/channel_store_v2_test.go | 38 ++++++++++++++++++++- internal/datacoord/policy_test.go | 2 +- 7 files changed, 73 insertions(+), 7 deletions(-) diff --git a/internal/datacoord/channel_manager_test.go b/internal/datacoord/channel_manager_test.go index d255e64ac9ef8..5866d638bc83a 100644 --- a/internal/datacoord/channel_manager_test.go +++ b/internal/datacoord/channel_manager_test.go @@ -34,6 +34,7 @@ import ( "github.com/milvus-io/milvus/internal/proto/datapb" "github.com/milvus-io/milvus/internal/util/dependency" "github.com/milvus-io/milvus/pkg/common" + "github.com/milvus-io/milvus/pkg/util/paramtable" ) // waitAndStore simulates DataNode's action @@ -401,8 +402,11 @@ func TestChannelManager(t *testing.T) { }() Params.Save(Params.DataCoordCfg.AutoBalance.Key, "true") - prefix := Params.CommonCfg.DataCoordWatchSubPath.GetValue() + + enableRPCK := paramtable.Get().DataCoordCfg.EnableBalanceChannelWithRPC.Key + paramtable.Get().Save(enableRPCK, "false") + defer paramtable.Get().Reset(enableRPCK) t.Run("test AddNode with avalible node", func(t *testing.T) { // Note: this test is based on the default registerPolicy defer watchkv.RemoveWithPrefix("") diff --git a/internal/datacoord/channel_manager_v2.go b/internal/datacoord/channel_manager_v2.go index 4bea2daf96e41..4695395eee75b 100644 --- a/internal/datacoord/channel_manager_v2.go +++ b/internal/datacoord/channel_manager_v2.go @@ -23,6 +23,7 @@ import ( "time" "github.com/cockroachdb/errors" + "github.com/golang/protobuf/proto" "github.com/samber/lo" "go.uber.org/zap" @@ -522,9 +523,11 @@ func (m *ChannelManagerImplV2) advanceToNotifies(ctx context.Context, toNotifies ) for _, ch := range nodeAssign.Channels { innerCh := ch + tmpWatchInfo := proto.Clone(innerCh.GetWatchInfo()).(*datapb.ChannelWatchInfo) + tmpWatchInfo.Vchan = m.h.GetDataVChanPositions(innerCh, allPartitionID) future := getOrCreateIOPool().Submit(func() (any, error) { - err := m.Notify(ctx, nodeAssign.NodeID, innerCh.GetWatchInfo()) + err := m.Notify(ctx, nodeAssign.NodeID, tmpWatchInfo) return innerCh, err }) futures = append(futures, future) @@ -694,7 +697,7 @@ func (m *ChannelManagerImplV2) fillChannelWatchInfo(op *ChannelOp) error { } info := &datapb.ChannelWatchInfo{ - Vchan: vcInfo, + Vchan: reduceVChanSize(vcInfo), StartTs: startTs, State: inferStateByOpType(op.Type), Schema: ch.GetSchema(), @@ -715,3 +718,16 @@ func inferStateByOpType(opType ChannelOpType) datapb.ChannelWatchState { return datapb.ChannelWatchState_ToWatch } } + +// Clear segmentID in vChannelInfo to reduce meta size. +// About 200k segments will exceed default meta size limit, +// clear it would make meta size way smaller and support infinite segments count +// +// NOTE: all the meta and in-mem watchInfo contains partial VChanInfo that dones't include segmentIDs +// Need to recalulate and fill-in segmentIDs before notify to DataNode +func reduceVChanSize(vChan *datapb.VchannelInfo) *datapb.VchannelInfo { + vChan.DroppedSegmentIds = nil + vChan.FlushedSegmentIds = nil + vChan.UnflushedSegmentIds = nil + return vChan +} diff --git a/internal/datacoord/channel_store.go b/internal/datacoord/channel_store.go index 76524df0a9022..a8b2bb7fc8d95 100644 --- a/internal/datacoord/channel_store.go +++ b/internal/datacoord/channel_store.go @@ -32,6 +32,7 @@ import ( "github.com/milvus-io/milvus/internal/proto/datapb" "github.com/milvus-io/milvus/pkg/log" "github.com/milvus-io/milvus/pkg/metrics" + "github.com/milvus-io/milvus/pkg/util/paramtable" "github.com/milvus-io/milvus/pkg/util/timerecord" "github.com/milvus-io/milvus/pkg/util/typeutil" ) @@ -152,7 +153,11 @@ func (op *ChannelOp) BuildKV() (map[string]string, []string, error) { k := buildNodeChannelKey(op.NodeID, ch.GetName()) switch op.Type { case Add, Watch, Release: - info, err := proto.Marshal(ch.GetWatchInfo()) + tmpWatchInfo := proto.Clone(ch.GetWatchInfo()).(*datapb.ChannelWatchInfo) + if paramtable.Get().DataCoordCfg.EnableBalanceChannelWithRPC.GetAsBool() { + tmpWatchInfo.Vchan = reduceVChanSize(tmpWatchInfo.GetVchan()) + } + info, err := proto.Marshal(tmpWatchInfo) if err != nil { return saves, removals, err } diff --git a/internal/datacoord/channel_store_test.go b/internal/datacoord/channel_store_test.go index 235bd5103c616..0790fc6a2ea15 100644 --- a/internal/datacoord/channel_store_test.go +++ b/internal/datacoord/channel_store_test.go @@ -31,6 +31,7 @@ import ( "github.com/milvus-io/milvus/internal/kv/predicates" "github.com/milvus-io/milvus/internal/proto/datapb" "github.com/milvus-io/milvus/pkg/metrics" + "github.com/milvus-io/milvus/pkg/util/paramtable" "github.com/milvus-io/milvus/pkg/util/testutils" ) @@ -58,6 +59,9 @@ func genChannelOperationsV1(from, to int64, num int) *ChannelOpSet { } func TestChannelStore_Update(t *testing.T) { + enableRPCK := paramtable.Get().DataCoordCfg.EnableBalanceChannelWithRPC.Key + paramtable.Get().Save(enableRPCK, "false") + defer paramtable.Get().Reset(enableRPCK) txnKv := mocks.NewTxnKV(t) txnKv.EXPECT().MultiSaveAndRemove(mock.Anything, mock.Anything).Run(func(saves map[string]string, removals []string, preds ...predicates.Predicate) { assert.False(t, len(saves)+len(removals) > 64, "too many operations") diff --git a/internal/datacoord/channel_store_v2.go b/internal/datacoord/channel_store_v2.go index dcbeef28630b2..b6e23f4fd8681 100644 --- a/internal/datacoord/channel_store_v2.go +++ b/internal/datacoord/channel_store_v2.go @@ -164,6 +164,7 @@ func (c *StateChannelStore) addAssignment(nodeID int64, channel RWChannel) { // DELETE + WATCH ---> from bufferID to nodeID // DELETE + WATCH ---> from lagecyID to nodeID // DELETE + WATCH ---> from deletedNode to nodeID/bufferID +// DELETE + WATCH ---> from releasedNode to nodeID/bufferID // RELEASE ---> release from nodeID // WATCH ---> watch to a new channel // DELETE ---> remove the channel @@ -223,7 +224,7 @@ func (c *StateChannelStore) getChannel(nodeID int64, channelName string) *StateC if storedChannel, ok := cInfo.Channels[channelName]; ok { return storedChannel.(*StateChannel) } - log.Error("Channel doesn't exist in Node", zap.String("channel", channelName), zap.Int64("nodeID", nodeID)) + log.Debug("Channel doesn't exist in Node", zap.String("channel", channelName), zap.Int64("nodeID", nodeID)) } else { log.Error("Node doesn't exist", zap.Int64("NodeID", nodeID)) } diff --git a/internal/datacoord/channel_store_v2_test.go b/internal/datacoord/channel_store_v2_test.go index d2f9a22f58314..2228ff971499d 100644 --- a/internal/datacoord/channel_store_v2_test.go +++ b/internal/datacoord/channel_store_v2_test.go @@ -257,6 +257,42 @@ func (s *StateChannelStoreSuite) TestUpdateWithTxnLimit() { } } +func (s *StateChannelStoreSuite) TestUpdateMeta2000kSegs() { + ch := getChannel("ch1", 1) + info := ch.GetWatchInfo() + // way larger than limit=2097152 + seg2000k := make([]int64, 2000000) + for i := range seg2000k { + seg2000k[i] = int64(i) + } + info.Vchan.FlushedSegmentIds = seg2000k + ch.UpdateWatchInfo(info) + + opSet := NewChannelOpSet( + NewChannelOp(bufferID, Delete, ch), + NewChannelOp(100, Watch, ch), + ) + s.SetupTest() + s.mockTxn.EXPECT().MultiSaveAndRemove(mock.Anything, mock.Anything). + Run(func(saves map[string]string, removals []string, preds ...predicates.Predicate) { + }).Return(nil).Once() + + store := NewStateChannelStore(s.mockTxn) + store.AddNode(100) + s.Require().Equal(0, store.GetNodeChannelCount(100)) + store.addAssignment(bufferID, ch) + s.Require().Equal(1, store.GetNodeChannelCount(bufferID)) + + err := store.updateMeta(opSet) + s.NoError(err) + + got := store.GetNodeChannelsBy(WithNodeIDs(100)) + s.NotNil(got) + s.Require().Equal(1, len(got)) + gotInfo := got[0] + s.ElementsMatch([]string{"ch1"}, lo.Keys(gotInfo.Channels)) +} + func (s *StateChannelStoreSuite) TestUpdateMeta() { tests := []struct { description string @@ -474,7 +510,7 @@ func genChannelOperations(nodeID int64, opType ChannelOpType, num int) *ChannelO for i := 0; i < num; i++ { name := fmt.Sprintf("ch%d", i) channel := NewStateChannel(getChannel(name, 1)) - channel.Info = &datapb.ChannelWatchInfo{} + channel.Info = generateWatchInfo(name, datapb.ChannelWatchState_ToWatch) channels = append(channels, channel) } diff --git a/internal/datacoord/policy_test.go b/internal/datacoord/policy_test.go index 5d9d254d5f31d..15e85f204dc0d 100644 --- a/internal/datacoord/policy_test.go +++ b/internal/datacoord/policy_test.go @@ -37,7 +37,7 @@ func getChannel(name string, collID int64) *StateChannel { return &StateChannel{ Name: name, CollectionID: collID, - Info: &datapb.ChannelWatchInfo{}, + Info: &datapb.ChannelWatchInfo{Vchan: &datapb.VchannelInfo{}}, } } From e71b7c7cc9895c5f830566314e987201a13ac1c9 Mon Sep 17 00:00:00 2001 From: congqixia Date: Tue, 28 May 2024 14:19:42 +0800 Subject: [PATCH 086/126] enhance: Reduce datanode metacache frequent scan range (#33400) See also #32165 There were some frequent scan in metacache: - List all segments whose start positions not synced - List compacted segments Those scan shall cause lots of CPU time when flushed segment number is large meanwhile `Flushed` segments can be skipped in those two scenarios This PR make: - Add segment state shortcut in metacache - List start positions state before `Flushed` - Make compacted segments state to be `Dropped` and use `Dropped` state while scanning them --------- Signed-off-by: Congqi Xia --- internal/datanode/data_sync_service_test.go | 2 + internal/datanode/metacache/actions.go | 64 +++++++---- internal/datanode/metacache/meta_cache.go | 100 +++++++++++------- internal/datanode/services_test.go | 1 + internal/datanode/syncmgr/meta_writer.go | 7 +- internal/datanode/syncmgr/meta_writer_test.go | 8 +- .../datanode/syncmgr/mock_sync_manager.go | 6 +- .../datanode/syncmgr/sync_manager_test.go | 4 +- internal/datanode/syncmgr/task_test.go | 6 +- internal/datanode/syncmgr/taskv2_test.go | 2 +- .../datanode/writebuffer/bf_write_buffer.go | 3 +- .../writebuffer/bf_write_buffer_test.go | 14 +-- .../writebuffer/l0_write_buffer_test.go | 4 +- internal/datanode/writebuffer/write_buffer.go | 5 +- 14 files changed, 145 insertions(+), 81 deletions(-) diff --git a/internal/datanode/data_sync_service_test.go b/internal/datanode/data_sync_service_test.go index c73e3257dd73e..3847f3eebcbd3 100644 --- a/internal/datanode/data_sync_service_test.go +++ b/internal/datanode/data_sync_service_test.go @@ -433,6 +433,7 @@ func (s *DataSyncServiceSuite) TestStartStop() { CollectionID: collMeta.ID, PartitionID: 1, InsertChannel: insertChannelName, + State: commonpb.SegmentState_Flushed, }, 1: { @@ -440,6 +441,7 @@ func (s *DataSyncServiceSuite) TestStartStop() { CollectionID: collMeta.ID, PartitionID: 1, InsertChannel: insertChannelName, + State: commonpb.SegmentState_Flushed, }, } return lo.FilterMap(segmentIDs, func(id int64, _ int) (*datapb.SegmentInfo, bool) { diff --git a/internal/datanode/metacache/actions.go b/internal/datanode/metacache/actions.go index 20d18f4acd846..3a2ac3f9831f5 100644 --- a/internal/datanode/metacache/actions.go +++ b/internal/datanode/metacache/actions.go @@ -25,40 +25,75 @@ import ( "github.com/milvus-io/milvus/pkg/util/typeutil" ) +type segmentCriterion struct { + ids typeutil.Set[int64] + states typeutil.Set[commonpb.SegmentState] + others []SegmentFilter +} + +func (sc *segmentCriterion) Match(segment *SegmentInfo) bool { + for _, filter := range sc.others { + if !filter.Filter(segment) { + return false + } + } + return true +} + type SegmentFilter interface { Filter(info *SegmentInfo) bool - SegmentIDs() ([]int64, bool) + AddFilter(*segmentCriterion) } +// SegmentIDFilter segment filter with segment ids. type SegmentIDFilter struct { - segmentIDs []int64 - ids typeutil.Set[int64] + ids typeutil.Set[int64] +} + +func (f *SegmentIDFilter) Filter(info *SegmentInfo) bool { + return f.ids.Contain(info.segmentID) +} + +func (f *SegmentIDFilter) AddFilter(criterion *segmentCriterion) { + criterion.ids = f.ids } func WithSegmentIDs(segmentIDs ...int64) SegmentFilter { set := typeutil.NewSet(segmentIDs...) return &SegmentIDFilter{ - segmentIDs: segmentIDs, - ids: set, + ids: set, } } -func (f *SegmentIDFilter) Filter(info *SegmentInfo) bool { - return f.ids.Contain(info.segmentID) +// SegmentStateFilter segment filter with segment states. +type SegmentStateFilter struct { + states typeutil.Set[commonpb.SegmentState] } -func (f *SegmentIDFilter) SegmentIDs() ([]int64, bool) { - return f.segmentIDs, true +func (f *SegmentStateFilter) Filter(info *SegmentInfo) bool { + return f.states.Contain(info.State()) } +func (f *SegmentStateFilter) AddFilter(criterion *segmentCriterion) { + criterion.states = f.states +} + +func WithSegmentState(states ...commonpb.SegmentState) SegmentFilter { + set := typeutil.NewSet(states...) + return &SegmentStateFilter{ + states: set, + } +} + +// SegmentFilterFunc implements segment filter with other filters logic. type SegmentFilterFunc func(info *SegmentInfo) bool func (f SegmentFilterFunc) Filter(info *SegmentInfo) bool { return f(info) } -func (f SegmentFilterFunc) SegmentIDs() ([]int64, bool) { - return nil, false +func (f SegmentFilterFunc) AddFilter(criterion *segmentCriterion) { + criterion.others = append(criterion.others, f) } func WithPartitionID(partitionID int64) SegmentFilter { @@ -67,13 +102,6 @@ func WithPartitionID(partitionID int64) SegmentFilter { }) } -func WithSegmentState(states ...commonpb.SegmentState) SegmentFilter { - set := typeutil.NewSet(states...) - return SegmentFilterFunc(func(info *SegmentInfo) bool { - return set.Len() > 0 && set.Contain(info.state) - }) -} - func WithStartPosNotRecorded() SegmentFilter { return SegmentFilterFunc(func(info *SegmentInfo) bool { return !info.startPosRecorded diff --git a/internal/datanode/metacache/meta_cache.go b/internal/datanode/metacache/meta_cache.go index 61d9644e34ba8..24021c7a7a3b5 100644 --- a/internal/datanode/metacache/meta_cache.go +++ b/internal/datanode/metacache/meta_cache.go @@ -60,18 +60,32 @@ type PkStatsFactory func(vchannel *datapb.SegmentInfo) *BloomFilterSet type metaCacheImpl struct { collectionID int64 vChannelName string - segmentInfos map[int64]*SegmentInfo schema *schemapb.CollectionSchema - mu sync.RWMutex + + mu sync.RWMutex + segmentInfos map[int64]*SegmentInfo + stateSegments map[commonpb.SegmentState]map[int64]*SegmentInfo } func NewMetaCache(info *datapb.ChannelWatchInfo, factory PkStatsFactory) MetaCache { vchannel := info.GetVchan() cache := &metaCacheImpl{ - collectionID: vchannel.GetCollectionID(), - vChannelName: vchannel.GetChannelName(), - segmentInfos: make(map[int64]*SegmentInfo), - schema: info.GetSchema(), + collectionID: vchannel.GetCollectionID(), + vChannelName: vchannel.GetChannelName(), + segmentInfos: make(map[int64]*SegmentInfo), + stateSegments: make(map[commonpb.SegmentState]map[int64]*SegmentInfo), + schema: info.GetSchema(), + } + + for _, state := range []commonpb.SegmentState{ + commonpb.SegmentState_Growing, + commonpb.SegmentState_Sealed, + commonpb.SegmentState_Flushing, + commonpb.SegmentState_Flushed, + commonpb.SegmentState_Dropped, + commonpb.SegmentState_Importing, + } { + cache.stateSegments[state] = make(map[int64]*SegmentInfo) } cache.init(vchannel, factory) @@ -80,13 +94,13 @@ func NewMetaCache(info *datapb.ChannelWatchInfo, factory PkStatsFactory) MetaCac func (c *metaCacheImpl) init(vchannel *datapb.VchannelInfo, factory PkStatsFactory) { for _, seg := range vchannel.FlushedSegments { - c.segmentInfos[seg.GetID()] = NewSegmentInfo(seg, factory(seg)) + c.addSegment(NewSegmentInfo(seg, factory(seg))) } for _, seg := range vchannel.UnflushedSegments { // segment state could be sealed for growing segment if flush request processed before datanode watch seg.State = commonpb.SegmentState_Growing - c.segmentInfos[seg.GetID()] = NewSegmentInfo(seg, factory(seg)) + c.addSegment(NewSegmentInfo(seg, factory(seg))) } } @@ -110,7 +124,13 @@ func (c *metaCacheImpl) AddSegment(segInfo *datapb.SegmentInfo, factory PkStatsF c.mu.Lock() defer c.mu.Unlock() - c.segmentInfos[segInfo.GetID()] = segment + c.addSegment(segment) +} + +func (c *metaCacheImpl) addSegment(segment *SegmentInfo) { + segID := segment.SegmentID() + c.segmentInfos[segID] = segment + c.stateSegments[segment.State()][segID] = segment } func (c *metaCacheImpl) CompactSegments(newSegmentID, partitionID int64, numOfRows int64, bfs *BloomFilterSet, oldSegmentIDs ...int64) { @@ -121,7 +141,7 @@ func (c *metaCacheImpl) CompactSegments(newSegmentID, partitionID int64, numOfRo if numOfRows > 0 { compactTo = newSegmentID if _, ok := c.segmentInfos[newSegmentID]; !ok { - c.segmentInfos[newSegmentID] = &SegmentInfo{ + c.addSegment(&SegmentInfo{ segmentID: newSegmentID, partitionID: partitionID, state: commonpb.SegmentState_Flushed, @@ -129,7 +149,7 @@ func (c *metaCacheImpl) CompactSegments(newSegmentID, partitionID int64, numOfRo flushedRows: numOfRows, startPosRecorded: true, bfs: bfs, - } + }) } log.Info("add compactTo segment info metacache", zap.Int64("segmentID", compactTo)) } @@ -140,7 +160,10 @@ func (c *metaCacheImpl) CompactSegments(newSegmentID, partitionID int64, numOfRo oldSet.Contain(segment.compactTo) { updated := segment.Clone() updated.compactTo = compactTo + updated.state = commonpb.SegmentState_Dropped c.segmentInfos[segment.segmentID] = updated + delete(c.stateSegments[commonpb.SegmentState_Flushed], segment.segmentID) + c.stateSegments[commonpb.SegmentState_Dropped][segment.segmentID] = segment log.Info("update segment compactTo", zap.Int64("segmentID", segment.segmentID), zap.Int64("originalCompactTo", segment.compactTo), @@ -160,6 +183,7 @@ func (c *metaCacheImpl) RemoveSegments(filters ...SegmentFilter) []int64 { var result []int64 process := func(id int64, info *SegmentInfo) { delete(c.segmentInfos, id) + delete(c.stateSegments[info.State()], id) result = append(result, id) } c.rangeWithFilter(process, filters...) @@ -207,6 +231,8 @@ func (c *metaCacheImpl) UpdateSegments(action SegmentAction, filters ...SegmentF nInfo := info.Clone() action(nInfo) c.segmentInfos[id] = nInfo + delete(c.stateSegments[info.State()], info.SegmentID()) + c.stateSegments[nInfo.State()][nInfo.SegmentID()] = nInfo }, filters...) } @@ -223,38 +249,38 @@ func (c *metaCacheImpl) PredictSegments(pk storage.PrimaryKey, filters ...Segmen } func (c *metaCacheImpl) rangeWithFilter(fn func(id int64, info *SegmentInfo), filters ...SegmentFilter) { - var hasIDs bool - set := typeutil.NewSet[int64]() - filtered := make([]SegmentFilter, 0, len(filters)) + criterion := &segmentCriterion{} for _, filter := range filters { - ids, ok := filter.SegmentIDs() - if ok { - set.Insert(ids...) - hasIDs = true - } else { - filtered = append(filtered, filter) - } + filter.AddFilter(criterion) } - mergedFilter := func(info *SegmentInfo) bool { - for _, filter := range filtered { - if !filter.Filter(info) { - return false - } + + var candidates []map[int64]*SegmentInfo + if criterion.states != nil { + candidates = lo.Map(criterion.states.Collect(), func(state commonpb.SegmentState, _ int) map[int64]*SegmentInfo { + return c.stateSegments[state] + }) + } else { + candidates = []map[int64]*SegmentInfo{ + c.segmentInfos, } - return true } - if hasIDs { - for id := range set { - info, has := c.segmentInfos[id] - if has && mergedFilter(info) { - fn(id, info) - } + for _, candidate := range candidates { + var segments map[int64]*SegmentInfo + if criterion.ids != nil { + segments = lo.SliceToMap(lo.FilterMap(criterion.ids.Collect(), func(id int64, _ int) (*SegmentInfo, bool) { + segment, ok := candidate[id] + return segment, ok + }), func(segment *SegmentInfo) (int64, *SegmentInfo) { + return segment.SegmentID(), segment + }) + } else { + segments = candidate } - } else { - for id, info := range c.segmentInfos { - if mergedFilter(info) { - fn(id, info) + + for id, segment := range segments { + if criterion.Match(segment) { + fn(id, segment) } } } diff --git a/internal/datanode/services_test.go b/internal/datanode/services_test.go index b90ef427f1a44..117d265ea5f68 100644 --- a/internal/datanode/services_test.go +++ b/internal/datanode/services_test.go @@ -283,6 +283,7 @@ func (s *DataNodeServicesSuite) TestFlushSegments() { ID: segmentID, CollectionID: 1, PartitionID: 2, + State: commonpb.SegmentState_Growing, StartPosition: &msgpb.MsgPosition{}, }, func(_ *datapb.SegmentInfo) *metacache.BloomFilterSet { return metacache.NewBloomFilterSet() }) diff --git a/internal/datanode/syncmgr/meta_writer.go b/internal/datanode/syncmgr/meta_writer.go index 0e82f6cfe66bc..9a6d864895277 100644 --- a/internal/datanode/syncmgr/meta_writer.go +++ b/internal/datanode/syncmgr/meta_writer.go @@ -7,6 +7,7 @@ import ( "github.com/samber/lo" "go.uber.org/zap" + "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" "github.com/milvus-io/milvus/internal/datanode/broker" "github.com/milvus-io/milvus/internal/datanode/metacache" "github.com/milvus-io/milvus/internal/proto/datapb" @@ -60,7 +61,8 @@ func (b *brokerMetaWriter) UpdateSync(pack *SyncTask) error { Position: pack.checkpoint, }) - startPos := lo.Map(pack.metacache.GetSegmentsBy(metacache.WithStartPosNotRecorded()), func(info *metacache.SegmentInfo, _ int) *datapb.SegmentStartPosition { + startPos := lo.Map(pack.metacache.GetSegmentsBy(metacache.WithSegmentState(commonpb.SegmentState_Growing, commonpb.SegmentState_Sealed, commonpb.SegmentState_Flushing), + metacache.WithStartPosNotRecorded()), func(info *metacache.SegmentInfo, _ int) *datapb.SegmentStartPosition { return &datapb.SegmentStartPosition{ SegmentID: info.SegmentID(), StartPosition: info.StartPosition(), @@ -150,7 +152,8 @@ func (b *brokerMetaWriter) UpdateSyncV2(pack *SyncTaskV2) error { Position: pack.checkpoint, }) - startPos := lo.Map(pack.metacache.GetSegmentsBy(metacache.WithStartPosNotRecorded()), func(info *metacache.SegmentInfo, _ int) *datapb.SegmentStartPosition { + startPos := lo.Map(pack.metacache.GetSegmentsBy(metacache.WithSegmentState(commonpb.SegmentState_Growing, commonpb.SegmentState_Flushing), + metacache.WithStartPosNotRecorded()), func(info *metacache.SegmentInfo, _ int) *datapb.SegmentStartPosition { return &datapb.SegmentStartPosition{ SegmentID: info.SegmentID(), StartPosition: info.StartPosition(), diff --git a/internal/datanode/syncmgr/meta_writer_test.go b/internal/datanode/syncmgr/meta_writer_test.go index fc1d921b70160..ef5c4e83d825d 100644 --- a/internal/datanode/syncmgr/meta_writer_test.go +++ b/internal/datanode/syncmgr/meta_writer_test.go @@ -39,7 +39,7 @@ func (s *MetaWriterSuite) TestNormalSave() { bfs := metacache.NewBloomFilterSet() seg := metacache.NewSegmentInfo(&datapb.SegmentInfo{}, bfs) metacache.UpdateNumOfRows(1000)(seg) - s.metacache.EXPECT().GetSegmentsBy(mock.Anything).Return([]*metacache.SegmentInfo{seg}) + s.metacache.EXPECT().GetSegmentsBy(mock.Anything, mock.Anything).Return([]*metacache.SegmentInfo{seg}) s.metacache.EXPECT().GetSegmentByID(mock.Anything).Return(seg, true) s.metacache.EXPECT().UpdateSegments(mock.Anything, mock.Anything).Return() task := NewSyncTask() @@ -55,7 +55,7 @@ func (s *MetaWriterSuite) TestReturnError() { seg := metacache.NewSegmentInfo(&datapb.SegmentInfo{}, bfs) metacache.UpdateNumOfRows(1000)(seg) s.metacache.EXPECT().GetSegmentByID(mock.Anything).Return(seg, true) - s.metacache.EXPECT().GetSegmentsBy(mock.Anything).Return([]*metacache.SegmentInfo{seg}) + s.metacache.EXPECT().GetSegmentsBy(mock.Anything, mock.Anything).Return([]*metacache.SegmentInfo{seg}) task := NewSyncTask() task.WithMetaCache(s.metacache) err := s.writer.UpdateSync(task) @@ -69,7 +69,7 @@ func (s *MetaWriterSuite) TestNormalSaveV2() { seg := metacache.NewSegmentInfo(&datapb.SegmentInfo{}, bfs) metacache.UpdateNumOfRows(1000)(seg) s.metacache.EXPECT().GetSegmentByID(mock.Anything).Return(seg, true) - s.metacache.EXPECT().GetSegmentsBy(mock.Anything).Return([]*metacache.SegmentInfo{seg}) + s.metacache.EXPECT().GetSegmentsBy(mock.Anything, mock.Anything).Return([]*metacache.SegmentInfo{seg}) task := NewSyncTaskV2() task.WithMetaCache(s.metacache) err := s.writer.UpdateSyncV2(task) @@ -83,7 +83,7 @@ func (s *MetaWriterSuite) TestReturnErrorV2() { seg := metacache.NewSegmentInfo(&datapb.SegmentInfo{}, bfs) metacache.UpdateNumOfRows(1000)(seg) s.metacache.EXPECT().GetSegmentByID(mock.Anything).Return(seg, true) - s.metacache.EXPECT().GetSegmentsBy(mock.Anything).Return([]*metacache.SegmentInfo{seg}) + s.metacache.EXPECT().GetSegmentsBy(mock.Anything, mock.Anything).Return([]*metacache.SegmentInfo{seg}) task := NewSyncTaskV2() task.WithMetaCache(s.metacache) err := s.writer.UpdateSyncV2(task) diff --git a/internal/datanode/syncmgr/mock_sync_manager.go b/internal/datanode/syncmgr/mock_sync_manager.go index 259d09b2da542..bf996d09aabff 100644 --- a/internal/datanode/syncmgr/mock_sync_manager.go +++ b/internal/datanode/syncmgr/mock_sync_manager.go @@ -57,7 +57,7 @@ type MockSyncManager_GetEarliestPosition_Call struct { } // GetEarliestPosition is a helper method to define mock.On call -// - channel string +// - channel string func (_e *MockSyncManager_Expecter) GetEarliestPosition(channel interface{}) *MockSyncManager_GetEarliestPosition_Call { return &MockSyncManager_GetEarliestPosition_Call{Call: _e.mock.On("GetEarliestPosition", channel)} } @@ -101,8 +101,8 @@ type MockSyncManager_SyncData_Call struct { } // SyncData is a helper method to define mock.On call -// - ctx context.Context -// - task Task +// - ctx context.Context +// - task Task func (_e *MockSyncManager_Expecter) SyncData(ctx interface{}, task interface{}) *MockSyncManager_SyncData_Call { return &MockSyncManager_SyncData_Call{Call: _e.mock.On("SyncData", ctx, task)} } diff --git a/internal/datanode/syncmgr/sync_manager_test.go b/internal/datanode/syncmgr/sync_manager_test.go index 515e1266479d3..c1ac3000505b9 100644 --- a/internal/datanode/syncmgr/sync_manager_test.go +++ b/internal/datanode/syncmgr/sync_manager_test.go @@ -155,7 +155,7 @@ func (s *SyncManagerSuite) TestSubmit() { seg := metacache.NewSegmentInfo(&datapb.SegmentInfo{}, bfs) metacache.UpdateNumOfRows(1000)(seg) s.metacache.EXPECT().GetSegmentByID(s.segmentID).Return(seg, true) - s.metacache.EXPECT().GetSegmentsBy(mock.Anything).Return([]*metacache.SegmentInfo{seg}) + s.metacache.EXPECT().GetSegmentsBy(mock.Anything, mock.Anything).Return([]*metacache.SegmentInfo{seg}) s.metacache.EXPECT().UpdateSegments(mock.Anything, mock.Anything).Return() manager, err := NewSyncManager(s.chunkManager, s.allocator) @@ -186,7 +186,7 @@ func (s *SyncManagerSuite) TestCompacted() { metacache.UpdateNumOfRows(1000)(seg) metacache.CompactTo(1001)(seg) s.metacache.EXPECT().GetSegmentByID(s.segmentID).Return(seg, true) - s.metacache.EXPECT().GetSegmentsBy(mock.Anything).Return([]*metacache.SegmentInfo{seg}) + s.metacache.EXPECT().GetSegmentsBy(mock.Anything, mock.Anything).Return([]*metacache.SegmentInfo{seg}) s.metacache.EXPECT().UpdateSegments(mock.Anything, mock.Anything).Return() manager, err := NewSyncManager(s.chunkManager, s.allocator) diff --git a/internal/datanode/syncmgr/task_test.go b/internal/datanode/syncmgr/task_test.go index d03aa278d7601..62ef40ce5666b 100644 --- a/internal/datanode/syncmgr/task_test.go +++ b/internal/datanode/syncmgr/task_test.go @@ -185,7 +185,7 @@ func (s *SyncTaskSuite) TestRunNormal() { metacache.UpdateNumOfRows(1000)(seg) seg.GetBloomFilterSet().Roll() s.metacache.EXPECT().GetSegmentByID(s.segmentID).Return(seg, true) - s.metacache.EXPECT().GetSegmentsBy(mock.Anything).Return([]*metacache.SegmentInfo{seg}) + s.metacache.EXPECT().GetSegmentsBy(mock.Anything, mock.Anything).Return([]*metacache.SegmentInfo{seg}) s.metacache.EXPECT().UpdateSegments(mock.Anything, mock.Anything).Return() s.Run("without_data", func() { @@ -268,7 +268,7 @@ func (s *SyncTaskSuite) TestRunL0Segment() { bfs := metacache.NewBloomFilterSet() seg := metacache.NewSegmentInfo(&datapb.SegmentInfo{Level: datapb.SegmentLevel_L0}, bfs) s.metacache.EXPECT().GetSegmentByID(s.segmentID).Return(seg, true) - s.metacache.EXPECT().GetSegmentsBy(mock.Anything).Return([]*metacache.SegmentInfo{seg}) + s.metacache.EXPECT().GetSegmentsBy(mock.Anything, mock.Anything).Return([]*metacache.SegmentInfo{seg}) s.metacache.EXPECT().UpdateSegments(mock.Anything, mock.Anything).Return() s.Run("pure_delete_l0_flush", func() { @@ -362,7 +362,7 @@ func (s *SyncTaskSuite) TestRunError() { seg := metacache.NewSegmentInfo(&datapb.SegmentInfo{}, metacache.NewBloomFilterSet()) metacache.UpdateNumOfRows(1000)(seg) s.metacache.EXPECT().GetSegmentByID(s.segmentID).Return(seg, true) - s.metacache.EXPECT().GetSegmentsBy(mock.Anything).Return([]*metacache.SegmentInfo{seg}) + s.metacache.EXPECT().GetSegmentsBy(mock.Anything, mock.Anything).Return([]*metacache.SegmentInfo{seg}) s.Run("allocate_id_fail", func() { mockAllocator := allocator.NewMockAllocator(s.T()) diff --git a/internal/datanode/syncmgr/taskv2_test.go b/internal/datanode/syncmgr/taskv2_test.go index 9367689ed1a28..bb8b36619129c 100644 --- a/internal/datanode/syncmgr/taskv2_test.go +++ b/internal/datanode/syncmgr/taskv2_test.go @@ -216,7 +216,7 @@ func (s *SyncTaskSuiteV2) TestRunNormal() { seg := metacache.NewSegmentInfo(&datapb.SegmentInfo{}, bfs) metacache.UpdateNumOfRows(1000)(seg) s.metacache.EXPECT().GetSegmentByID(mock.Anything).Return(seg, true) - s.metacache.EXPECT().GetSegmentsBy(mock.Anything).Return([]*metacache.SegmentInfo{seg}) + s.metacache.EXPECT().GetSegmentsBy(mock.Anything, mock.Anything).Return([]*metacache.SegmentInfo{seg}) s.metacache.EXPECT().UpdateSegments(mock.Anything, mock.Anything).Return() s.Run("without_insert_delete", func() { diff --git a/internal/datanode/writebuffer/bf_write_buffer.go b/internal/datanode/writebuffer/bf_write_buffer.go index 0438396879eb9..3ddc961879729 100644 --- a/internal/datanode/writebuffer/bf_write_buffer.go +++ b/internal/datanode/writebuffer/bf_write_buffer.go @@ -98,7 +98,8 @@ func (wb *bfWriteBuffer) BufferData(insertMsgs []*msgstream.InsertMsg, deleteMsg // update pk oracle for _, inData := range groups { // segment shall always exists after buffer insert - segments := wb.metaCache.GetSegmentsBy(metacache.WithSegmentIDs(inData.segmentID)) + segments := wb.metaCache.GetSegmentsBy( + metacache.WithSegmentIDs(inData.segmentID)) for _, segment := range segments { for _, fieldData := range inData.pkField { err := segment.GetBloomFilterSet().UpdatePKRange(fieldData) diff --git a/internal/datanode/writebuffer/bf_write_buffer_test.go b/internal/datanode/writebuffer/bf_write_buffer_test.go index d1881c034be1a..c7c80fa26e42a 100644 --- a/internal/datanode/writebuffer/bf_write_buffer_test.go +++ b/internal/datanode/writebuffer/bf_write_buffer_test.go @@ -218,7 +218,7 @@ func (s *BFWriteBufferSuite) TestBufferData() { s.metacacheInt64.EXPECT().GetSegmentByID(int64(1000)).Return(nil, false) s.metacacheInt64.EXPECT().AddSegment(mock.Anything, mock.Anything, mock.Anything).Return() s.metacacheInt64.EXPECT().UpdateSegments(mock.Anything, mock.Anything).Return() - s.metacacheInt64.EXPECT().GetSegmentIDsBy(mock.Anything, mock.Anything).Return([]int64{}) + s.metacacheInt64.EXPECT().GetSegmentIDsBy(mock.Anything, mock.Anything, mock.Anything).Return([]int64{}) pks, msg := s.composeInsertMsg(1000, 10, 128, schemapb.DataType_Int64) delMsg := s.composeDeleteMsg(lo.Map(pks, func(id int64, _ int) storage.PrimaryKey { return storage.NewInt64PrimaryKey(id) })) @@ -248,7 +248,7 @@ func (s *BFWriteBufferSuite) TestBufferData() { s.metacacheVarchar.EXPECT().GetSegmentByID(int64(1000)).Return(nil, false) s.metacacheVarchar.EXPECT().AddSegment(mock.Anything, mock.Anything, mock.Anything).Return() s.metacacheVarchar.EXPECT().UpdateSegments(mock.Anything, mock.Anything).Return() - s.metacacheVarchar.EXPECT().GetSegmentIDsBy(mock.Anything, mock.Anything).Return([]int64{}) + s.metacacheVarchar.EXPECT().GetSegmentIDsBy(mock.Anything, mock.Anything, mock.Anything).Return([]int64{}) pks, msg := s.composeInsertMsg(1000, 10, 128, schemapb.DataType_VarChar) delMsg := s.composeDeleteMsg(lo.Map(pks, func(id int64, _ int) storage.PrimaryKey { return storage.NewVarCharPrimaryKey(fmt.Sprintf("%v", id)) })) @@ -273,7 +273,7 @@ func (s *BFWriteBufferSuite) TestBufferData() { s.metacacheInt64.EXPECT().GetSegmentByID(int64(1000)).Return(nil, false) s.metacacheInt64.EXPECT().AddSegment(mock.Anything, mock.Anything, mock.Anything).Return() s.metacacheInt64.EXPECT().UpdateSegments(mock.Anything, mock.Anything).Return() - s.metacacheInt64.EXPECT().GetSegmentIDsBy(mock.Anything, mock.Anything).Return([]int64{}) + s.metacacheInt64.EXPECT().GetSegmentIDsBy(mock.Anything, mock.Anything, mock.Anything).Return([]int64{}) pks, msg := s.composeInsertMsg(1000, 10, 128, schemapb.DataType_VarChar) delMsg := s.composeDeleteMsg(lo.Map(pks, func(id int64, _ int) storage.PrimaryKey { return storage.NewInt64PrimaryKey(id) })) @@ -294,7 +294,7 @@ func (s *BFWriteBufferSuite) TestBufferData() { s.metacacheVarchar.EXPECT().GetSegmentByID(int64(1000)).Return(nil, false) s.metacacheVarchar.EXPECT().AddSegment(mock.Anything, mock.Anything, mock.Anything).Return() s.metacacheVarchar.EXPECT().UpdateSegments(mock.Anything, mock.Anything).Return() - s.metacacheVarchar.EXPECT().GetSegmentIDsBy(mock.Anything, mock.Anything).Return([]int64{}) + s.metacacheVarchar.EXPECT().GetSegmentIDsBy(mock.Anything, mock.Anything, mock.Anything).Return([]int64{}) pks, msg := s.composeInsertMsg(1000, 10, 128, schemapb.DataType_Int64) delMsg := s.composeDeleteMsg(lo.Map(pks, func(id int64, _ int) storage.PrimaryKey { return storage.NewInt64PrimaryKey(id) })) @@ -325,7 +325,7 @@ func (s *BFWriteBufferSuite) TestAutoSync() { s.metacacheInt64.EXPECT().GetSegmentByID(int64(1000)).Return(seg, true).Once() s.metacacheInt64.EXPECT().GetSegmentByID(int64(1002)).Return(seg1, true) s.metacacheInt64.EXPECT().GetSegmentIDsBy(mock.Anything).Return([]int64{1002}) - s.metacacheInt64.EXPECT().GetSegmentIDsBy(mock.Anything, mock.Anything).Return([]int64{}) + s.metacacheInt64.EXPECT().GetSegmentIDsBy(mock.Anything, mock.Anything, mock.Anything).Return([]int64{}) s.metacacheInt64.EXPECT().AddSegment(mock.Anything, mock.Anything, mock.Anything).Return() s.metacacheInt64.EXPECT().UpdateSegments(mock.Anything, mock.Anything).Return() s.metacacheInt64.EXPECT().UpdateSegments(mock.Anything, mock.Anything, mock.Anything).Return() @@ -363,7 +363,7 @@ func (s *BFWriteBufferSuite) TestBufferDataWithStorageV2() { seg := metacache.NewSegmentInfo(&datapb.SegmentInfo{ID: 1000}, metacache.NewBloomFilterSet()) s.metacacheInt64.EXPECT().GetSegmentsBy(mock.Anything, mock.Anything).Return([]*metacache.SegmentInfo{seg}) s.metacacheInt64.EXPECT().GetSegmentByID(int64(1000)).Return(nil, false) - s.metacacheInt64.EXPECT().GetSegmentIDsBy(mock.Anything, mock.Anything).Return([]int64{}) + s.metacacheInt64.EXPECT().GetSegmentIDsBy(mock.Anything, mock.Anything, mock.Anything).Return([]int64{}) s.metacacheInt64.EXPECT().AddSegment(mock.Anything, mock.Anything, mock.Anything).Return() s.metacacheInt64.EXPECT().UpdateSegments(mock.Anything, mock.Anything).Return() @@ -409,7 +409,7 @@ func (s *BFWriteBufferSuite) TestAutoSyncWithStorageV2() { s.metacacheInt64.EXPECT().GetSegmentByID(int64(1000)).Return(seg, true).Once() s.metacacheInt64.EXPECT().GetSegmentByID(int64(1002)).Return(seg1, true) s.metacacheInt64.EXPECT().GetSegmentIDsBy(mock.Anything).Return([]int64{1002}) - s.metacacheInt64.EXPECT().GetSegmentIDsBy(mock.Anything, mock.Anything).Return([]int64{1003}) // mocked compacted + s.metacacheInt64.EXPECT().GetSegmentIDsBy(mock.Anything, mock.Anything, mock.Anything).Return([]int64{1003}) // mocked compacted s.metacacheInt64.EXPECT().RemoveSegments(mock.Anything).Return([]int64{1003}) s.metacacheInt64.EXPECT().AddSegment(mock.Anything, mock.Anything, mock.Anything).Return() s.metacacheInt64.EXPECT().UpdateSegments(mock.Anything, mock.Anything).Return() diff --git a/internal/datanode/writebuffer/l0_write_buffer_test.go b/internal/datanode/writebuffer/l0_write_buffer_test.go index a7a1cf5261d17..29b231dc2c25e 100644 --- a/internal/datanode/writebuffer/l0_write_buffer_test.go +++ b/internal/datanode/writebuffer/l0_write_buffer_test.go @@ -186,7 +186,7 @@ func (s *L0WriteBufferSuite) TestBufferData() { s.metacache.EXPECT().GetSegmentByID(int64(1000)).Return(nil, false).Once() s.metacache.EXPECT().AddSegment(mock.Anything, mock.Anything, mock.Anything).Return() s.metacache.EXPECT().UpdateSegments(mock.Anything, mock.Anything).Return() - s.metacache.EXPECT().GetSegmentIDsBy(mock.Anything, mock.Anything).Return([]int64{}) + s.metacache.EXPECT().GetSegmentIDsBy(mock.Anything, mock.Anything, mock.Anything).Return([]int64{}) metrics.DataNodeFlowGraphBufferDataSize.Reset() err = wb.BufferData([]*msgstream.InsertMsg{msg}, []*msgstream.DeleteMsg{delMsg}, &msgpb.MsgPosition{Timestamp: 100}, &msgpb.MsgPosition{Timestamp: 200}) @@ -215,7 +215,7 @@ func (s *L0WriteBufferSuite) TestBufferData() { s.metacache.EXPECT().GetSegmentsBy(mock.Anything, mock.Anything).Return([]*metacache.SegmentInfo{seg}) s.metacache.EXPECT().AddSegment(mock.Anything, mock.Anything, mock.Anything).Return() s.metacache.EXPECT().UpdateSegments(mock.Anything, mock.Anything).Return() - s.metacache.EXPECT().GetSegmentIDsBy(mock.Anything, mock.Anything).Return([]int64{}) + s.metacache.EXPECT().GetSegmentIDsBy(mock.Anything, mock.Anything, mock.Anything).Return([]int64{}) metrics.DataNodeFlowGraphBufferDataSize.Reset() err = wb.BufferData([]*msgstream.InsertMsg{msg}, []*msgstream.DeleteMsg{delMsg}, &msgpb.MsgPosition{Timestamp: 100}, &msgpb.MsgPosition{Timestamp: 200}) diff --git a/internal/datanode/writebuffer/write_buffer.go b/internal/datanode/writebuffer/write_buffer.go index 3dbd8df5ec7d7..675f48fe92f06 100644 --- a/internal/datanode/writebuffer/write_buffer.go +++ b/internal/datanode/writebuffer/write_buffer.go @@ -279,7 +279,10 @@ func (wb *writeBufferBase) triggerSync() (segmentIDs []int64) { } func (wb *writeBufferBase) cleanupCompactedSegments() { - segmentIDs := wb.metaCache.GetSegmentIDsBy(metacache.WithCompacted(), metacache.WithNoSyncingTask()) + segmentIDs := wb.metaCache.GetSegmentIDsBy( + metacache.WithSegmentState(commonpb.SegmentState_Dropped), + metacache.WithCompacted(), + metacache.WithNoSyncingTask()) // remove compacted only when there is no writebuffer targetIDs := lo.Filter(segmentIDs, func(segmentID int64, _ int) bool { _, ok := wb.buffers[segmentID] From b138ae742d3af5b82f07193ed8effb15f9bb01fa Mon Sep 17 00:00:00 2001 From: Francis <455954986@qq.com> Date: Tue, 28 May 2024 14:35:42 +0800 Subject: [PATCH 087/126] fix: docs for python3 and conan (#32656) Conan 1.x is broken for Python 3.12, since imp is deprecated in favour of importlib, removed in 3.12. https://github.com/milvus-io/milvus/issues/32655 Signed-off-by: light-city <455954986@qq.com> --- DEVELOPMENT.md | 2 +- README.md | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/DEVELOPMENT.md b/DEVELOPMENT.md index 246cb63810221..99bfc0f1546ae 100644 --- a/DEVELOPMENT.md +++ b/DEVELOPMENT.md @@ -236,7 +236,7 @@ sudo apt install -y clang-format clang-tidy ninja-build gcc g++ curl zip unzip t #### Install conan ```bash -# Verify python3 version, need python3 version > 3.8 +# Verify python3 version, need python3 version > 3.8 and version <= 3.11 python3 --version # pip install conan 1.61.0 pip3 install conan==1.61.0 diff --git a/README.md b/README.md index b5420708f36e0..5353195096cad 100644 --- a/README.md +++ b/README.md @@ -75,6 +75,7 @@ Linux systems (Ubuntu 20.04 or later recommended): go: >= 1.21 cmake: >= 3.26.4 gcc: 7.5 +python: > 3.8 and <= 3.11 ``` MacOS systems with x86_64 (Big Sur 11.5 or later recommended): @@ -82,6 +83,7 @@ MacOS systems with x86_64 (Big Sur 11.5 or later recommended): go: >= 1.21 cmake: >= 3.26.4 llvm: >= 15 +python: > 3.8 and <= 3.11 ``` MacOS systems with Apple Silicon (Monterey 12.0.1 or later recommended): @@ -89,6 +91,7 @@ MacOS systems with Apple Silicon (Monterey 12.0.1 or later recommended): go: >= 1.21 (Arch=ARM64) cmake: >= 3.26.4 llvm: >= 15 +python: > 3.8 and <= 3.11 ``` Clone Milvus repo and build. From 6b3e42f8d899c3e7b571132e896cacd34fed00a9 Mon Sep 17 00:00:00 2001 From: Bingyi Sun Date: Tue, 28 May 2024 16:49:43 +0800 Subject: [PATCH 088/126] fix: fix wrong default local storage path (#33389) issue: https://github.com/milvus-io/milvus/issues/33427 Signed-off-by: sunby --- configs/milvus.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configs/milvus.yaml b/configs/milvus.yaml index 5d630ebc0ef98..298f152c5f3e9 100644 --- a/configs/milvus.yaml +++ b/configs/milvus.yaml @@ -68,7 +68,7 @@ tikv: tlsCACert: # path to your CACert file localStorage: - path: /tmp/milvus/data/ # please adjust in embedded Milvus: /tmp/milvus/data/ + path: /var/lib/milvus/data/ # please adjust in embedded Milvus: /tmp/milvus/data/ # Related configuration of MinIO/S3/GCS or any other service supports S3 API, which is responsible for data persistence for Milvus. # We refer to the storage service as MinIO/S3 in the following description for simplicity. From 3d29907b6e728fe0699298812c0a6ffdb4d4ba37 Mon Sep 17 00:00:00 2001 From: jaime Date: Tue, 28 May 2024 19:17:43 +0800 Subject: [PATCH 089/126] enhance: decrease cpu overhead during filter segments on datacoord (#33130) issue: #33129 Signed-off-by: jaime --- internal/datacoord/compaction_trigger_test.go | 73 ++++++++---- internal/datacoord/garbage_collector_test.go | 9 +- internal/datacoord/index_service_test.go | 7 +- internal/datacoord/meta_test.go | 92 +++++++++++++++ internal/datacoord/segment_info.go | 108 +++++++++++------- internal/datacoord/segment_operator.go | 22 +++- 6 files changed, 242 insertions(+), 69 deletions(-) diff --git a/internal/datacoord/compaction_trigger_test.go b/internal/datacoord/compaction_trigger_test.go index 56710ed80efee..78166718148a9 100644 --- a/internal/datacoord/compaction_trigger_test.go +++ b/internal/datacoord/compaction_trigger_test.go @@ -2197,34 +2197,63 @@ func (s *CompactionTriggerSuite) SetupTest() { catalog := mocks.NewDataCoordCatalog(s.T()) catalog.EXPECT().SaveChannelCheckpoint(mock.Anything, s.channel, mock.Anything).Return(nil) + seg1 := &SegmentInfo{ + SegmentInfo: s.genSeg(1, 60), + lastFlushTime: time.Now().Add(-100 * time.Minute), + } + seg2 := &SegmentInfo{ + SegmentInfo: s.genSeg(2, 60), + lastFlushTime: time.Now(), + } + seg3 := &SegmentInfo{ + SegmentInfo: s.genSeg(3, 60), + lastFlushTime: time.Now(), + } + seg4 := &SegmentInfo{ + SegmentInfo: s.genSeg(4, 60), + lastFlushTime: time.Now(), + } + seg5 := &SegmentInfo{ + SegmentInfo: s.genSeg(5, 60), + lastFlushTime: time.Now(), + } + seg6 := &SegmentInfo{ + SegmentInfo: s.genSeg(6, 60), + lastFlushTime: time.Now(), + } + s.meta = &meta{ channelCPs: newChannelCps(), catalog: catalog, segments: &SegmentsInfo{ segments: map[int64]*SegmentInfo{ - 1: { - SegmentInfo: s.genSeg(1, 60), - lastFlushTime: time.Now().Add(-100 * time.Minute), - }, - 2: { - SegmentInfo: s.genSeg(2, 60), - lastFlushTime: time.Now(), - }, - 3: { - SegmentInfo: s.genSeg(3, 60), - lastFlushTime: time.Now(), - }, - 4: { - SegmentInfo: s.genSeg(4, 60), - lastFlushTime: time.Now(), - }, - 5: { - SegmentInfo: s.genSeg(5, 26), - lastFlushTime: time.Now(), + 1: seg1, + 2: seg2, + 3: seg3, + 4: seg4, + 5: seg5, + 6: seg6, + }, + secondaryIndexes: segmentInfoIndexes{ + coll2Segments: map[UniqueID]map[UniqueID]*SegmentInfo{ + s.collectionID: { + 1: seg1, + 2: seg2, + 3: seg3, + 4: seg4, + 5: seg5, + 6: seg6, + }, }, - 6: { - SegmentInfo: s.genSeg(6, 26), - lastFlushTime: time.Now(), + channel2Segments: map[string]map[UniqueID]*SegmentInfo{ + s.channel: { + 1: seg1, + 2: seg2, + 3: seg3, + 4: seg4, + 5: seg5, + 6: seg6, + }, }, }, }, diff --git a/internal/datacoord/garbage_collector_test.go b/internal/datacoord/garbage_collector_test.go index 66f7873b4b81d..93a96f7e37429 100644 --- a/internal/datacoord/garbage_collector_test.go +++ b/internal/datacoord/garbage_collector_test.go @@ -465,7 +465,14 @@ func createMetaForRecycleUnusedSegIndexes(catalog metastore.DataCoordCatalog) *m }, }, segID + 1: { - SegmentInfo: nil, + SegmentInfo: &datapb.SegmentInfo{ + ID: segID + 1, + CollectionID: collID, + PartitionID: partID, + InsertChannel: "", + NumOfRows: 1026, + State: commonpb.SegmentState_Dropped, + }, }, } meta := &meta{ diff --git a/internal/datacoord/index_service_test.go b/internal/datacoord/index_service_test.go index cda44b2558e89..d10c8d104f1bb 100644 --- a/internal/datacoord/index_service_test.go +++ b/internal/datacoord/index_service_test.go @@ -938,7 +938,12 @@ func TestServer_GetSegmentIndexState(t *testing.T) { WriteHandoff: false, }) s.meta.segments.SetSegment(segID, &SegmentInfo{ - SegmentInfo: nil, + SegmentInfo: &datapb.SegmentInfo{ + ID: segID, + CollectionID: collID, + PartitionID: partID, + InsertChannel: "ch", + }, currRows: 0, allocations: nil, lastFlushTime: time.Time{}, diff --git a/internal/datacoord/meta_test.go b/internal/datacoord/meta_test.go index dd0471eebcddf..d90b1b015018a 100644 --- a/internal/datacoord/meta_test.go +++ b/internal/datacoord/meta_test.go @@ -22,6 +22,7 @@ import ( "github.com/cockroachdb/errors" "github.com/golang/protobuf/proto" + "github.com/samber/lo" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/mock" "github.com/stretchr/testify/suite" @@ -982,6 +983,97 @@ func Test_meta_GetSegmentsOfCollection(t *testing.T) { assert.True(t, ok) assert.Equal(t, expected, gotInfo.GetState()) } + + got = m.GetSegmentsOfCollection(-1) + assert.Equal(t, 3, len(got)) + + got = m.GetSegmentsOfCollection(10) + assert.Equal(t, 0, len(got)) +} + +func Test_meta_GetSegmentsWithChannel(t *testing.T) { + storedSegments := NewSegmentsInfo() + for segID, segment := range map[int64]*SegmentInfo{ + 1: { + SegmentInfo: &datapb.SegmentInfo{ + ID: 1, + CollectionID: 1, + InsertChannel: "h1", + State: commonpb.SegmentState_Flushed, + }, + }, + 2: { + SegmentInfo: &datapb.SegmentInfo{ + ID: 2, + CollectionID: 1, + InsertChannel: "h2", + State: commonpb.SegmentState_Growing, + }, + }, + 3: { + SegmentInfo: &datapb.SegmentInfo{ + ID: 3, + CollectionID: 2, + State: commonpb.SegmentState_Flushed, + InsertChannel: "h1", + }, + }, + } { + storedSegments.SetSegment(segID, segment) + } + m := &meta{segments: storedSegments} + got := m.GetSegmentsByChannel("h1") + assert.Equal(t, 2, len(got)) + assert.ElementsMatch(t, []int64{1, 3}, lo.Map( + got, + func(s *SegmentInfo, i int) int64 { + return s.ID + }, + )) + + got = m.GetSegmentsByChannel("h3") + assert.Equal(t, 0, len(got)) + + got = m.SelectSegments(WithCollection(1), WithChannel("h1"), SegmentFilterFunc(func(segment *SegmentInfo) bool { + return segment != nil && segment.GetState() == commonpb.SegmentState_Flushed + })) + assert.Equal(t, 1, len(got)) + assert.ElementsMatch(t, []int64{1}, lo.Map( + got, + func(s *SegmentInfo, i int) int64 { + return s.ID + }, + )) + + m.segments.DropSegment(3) + _, ok := m.segments.secondaryIndexes.coll2Segments[2] + assert.False(t, ok) + assert.Equal(t, 1, len(m.segments.secondaryIndexes.coll2Segments)) + assert.Equal(t, 2, len(m.segments.secondaryIndexes.channel2Segments)) + + segments, ok := m.segments.secondaryIndexes.channel2Segments["h1"] + assert.True(t, ok) + assert.Equal(t, 1, len(segments)) + assert.Equal(t, int64(1), segments[1].ID) + segments, ok = m.segments.secondaryIndexes.channel2Segments["h2"] + assert.True(t, ok) + assert.Equal(t, 1, len(segments)) + assert.Equal(t, int64(2), segments[2].ID) + + m.segments.DropSegment(2) + segments, ok = m.segments.secondaryIndexes.coll2Segments[1] + assert.True(t, ok) + assert.Equal(t, 1, len(segments)) + assert.Equal(t, int64(1), segments[1].ID) + assert.Equal(t, 1, len(m.segments.secondaryIndexes.coll2Segments)) + assert.Equal(t, 1, len(m.segments.secondaryIndexes.channel2Segments)) + + segments, ok = m.segments.secondaryIndexes.channel2Segments["h1"] + assert.True(t, ok) + assert.Equal(t, 1, len(segments)) + assert.Equal(t, int64(1), segments[1].ID) + _, ok = m.segments.secondaryIndexes.channel2Segments["h2"] + assert.False(t, ok) } func TestMeta_HasSegments(t *testing.T) { diff --git a/internal/datacoord/segment_info.go b/internal/datacoord/segment_info.go index 40f0d46fe6cb9..13e6f4aad1c33 100644 --- a/internal/datacoord/segment_info.go +++ b/internal/datacoord/segment_info.go @@ -32,12 +32,17 @@ import ( // SegmentsInfo wraps a map, which maintains ID to SegmentInfo relation type SegmentsInfo struct { - segments map[UniqueID]*SegmentInfo - collSegments map[UniqueID]*CollectionSegments - compactionTo map[UniqueID]UniqueID // map the compact relation, value is the segment which `CompactFrom` contains key. + segments map[UniqueID]*SegmentInfo + secondaryIndexes segmentInfoIndexes + compactionTo map[UniqueID]UniqueID // map the compact relation, value is the segment which `CompactFrom` contains key. // A segment can be compacted to only one segment finally in meta. } +type segmentInfoIndexes struct { + coll2Segments map[UniqueID]map[UniqueID]*SegmentInfo + channel2Segments map[string]map[UniqueID]*SegmentInfo +} + // SegmentInfo wraps datapb.SegmentInfo and patches some extra info on it type SegmentInfo struct { *datapb.SegmentInfo @@ -69,16 +74,15 @@ func NewSegmentInfo(info *datapb.SegmentInfo) *SegmentInfo { // note that no mutex is wrapped so external concurrent control is needed func NewSegmentsInfo() *SegmentsInfo { return &SegmentsInfo{ - segments: make(map[UniqueID]*SegmentInfo), - collSegments: make(map[UniqueID]*CollectionSegments), + segments: make(map[UniqueID]*SegmentInfo), + secondaryIndexes: segmentInfoIndexes{ + coll2Segments: make(map[UniqueID]map[UniqueID]*SegmentInfo), + channel2Segments: make(map[string]map[UniqueID]*SegmentInfo), + }, compactionTo: make(map[UniqueID]UniqueID), } } -type CollectionSegments struct { - segments map[int64]*SegmentInfo -} - // GetSegment returns SegmentInfo // the logPath in meta is empty func (s *SegmentsInfo) GetSegment(segmentID UniqueID) *SegmentInfo { @@ -96,24 +100,42 @@ func (s *SegmentsInfo) GetSegments() []*SegmentInfo { return lo.Values(s.segments) } +func (s *SegmentsInfo) getCandidates(criterion *segmentCriterion) map[UniqueID]*SegmentInfo { + if criterion.collectionID > 0 { + collSegments, ok := s.secondaryIndexes.coll2Segments[criterion.collectionID] + if !ok { + return nil + } + + // both collection id and channel are filters of criterion + if criterion.channel != "" { + return lo.OmitBy(collSegments, func(k UniqueID, v *SegmentInfo) bool { + return v.InsertChannel != criterion.channel + }) + } + return collSegments + } + + if criterion.channel != "" { + channelSegments, ok := s.secondaryIndexes.channel2Segments[criterion.channel] + if !ok { + return nil + } + return channelSegments + } + + return s.segments +} + func (s *SegmentsInfo) GetSegmentsBySelector(filters ...SegmentFilter) []*SegmentInfo { criterion := &segmentCriterion{} for _, filter := range filters { filter.AddFilter(criterion) } - var result []*SegmentInfo - var candidates map[int64]*SegmentInfo + // apply criterion - switch { - case criterion.collectionID > 0: - collSegments, ok := s.collSegments[criterion.collectionID] - if !ok { - return nil - } - candidates = collSegments.segments - default: - candidates = s.segments - } + candidates := s.getCandidates(criterion) + var result []*SegmentInfo for _, segment := range candidates { if criterion.Match(segment) { result = append(result, segment) @@ -144,7 +166,7 @@ func (s *SegmentsInfo) GetCompactionTo(fromSegmentID int64) (*SegmentInfo, bool) func (s *SegmentsInfo) DropSegment(segmentID UniqueID) { if segment, ok := s.segments[segmentID]; ok { s.deleteCompactTo(segment) - s.delCollection(segment) + s.removeSecondaryIndex(segment) delete(s.segments, segmentID) } } @@ -156,10 +178,10 @@ func (s *SegmentsInfo) SetSegment(segmentID UniqueID, segment *SegmentInfo) { if segment, ok := s.segments[segmentID]; ok { // Remove old segment compact to relation first. s.deleteCompactTo(segment) - s.delCollection(segment) + s.removeSecondaryIndex(segment) } s.segments[segmentID] = segment - s.addCollection(segment) + s.addSecondaryIndex(segment) s.addCompactTo(segment) } @@ -296,27 +318,35 @@ func (s *SegmentInfo) ShadowClone(opts ...SegmentInfoOption) *SegmentInfo { return cloned } -func (s *SegmentsInfo) addCollection(segment *SegmentInfo) { +func (s *SegmentsInfo) addSecondaryIndex(segment *SegmentInfo) { collID := segment.GetCollectionID() - collSegment, ok := s.collSegments[collID] - if !ok { - collSegment = &CollectionSegments{ - segments: make(map[UniqueID]*SegmentInfo), - } - s.collSegments[collID] = collSegment + channel := segment.GetInsertChannel() + if _, ok := s.secondaryIndexes.coll2Segments[collID]; !ok { + s.secondaryIndexes.coll2Segments[collID] = make(map[UniqueID]*SegmentInfo) + } + s.secondaryIndexes.coll2Segments[collID][segment.ID] = segment + + if _, ok := s.secondaryIndexes.channel2Segments[channel]; !ok { + s.secondaryIndexes.channel2Segments[channel] = make(map[UniqueID]*SegmentInfo) } - collSegment.segments[segment.GetID()] = segment + s.secondaryIndexes.channel2Segments[channel][segment.ID] = segment } -func (s *SegmentsInfo) delCollection(segment *SegmentInfo) { +func (s *SegmentsInfo) removeSecondaryIndex(segment *SegmentInfo) { collID := segment.GetCollectionID() - collSegment, ok := s.collSegments[collID] - if !ok { - return + channel := segment.GetInsertChannel() + if segments, ok := s.secondaryIndexes.coll2Segments[collID]; ok { + delete(segments, segment.ID) + if len(segments) == 0 { + delete(s.secondaryIndexes.coll2Segments, collID) + } } - delete(collSegment.segments, segment.GetID()) - if len(collSegment.segments) == 0 { - delete(s.collSegments, segment.GetCollectionID()) + + if segments, ok := s.secondaryIndexes.channel2Segments[channel]; ok { + delete(segments, segment.ID) + if len(segments) == 0 { + delete(s.secondaryIndexes.channel2Segments, channel) + } } } diff --git a/internal/datacoord/segment_operator.go b/internal/datacoord/segment_operator.go index 2d26f6d03d7d1..d31d1a4c3d8e2 100644 --- a/internal/datacoord/segment_operator.go +++ b/internal/datacoord/segment_operator.go @@ -31,6 +31,7 @@ func SetMaxRowCount(maxRow int64) SegmentOperator { type segmentCriterion struct { collectionID int64 + channel string others []SegmentFilter } @@ -62,6 +63,21 @@ func WithCollection(collectionID int64) SegmentFilter { return CollectionFilter(collectionID) } +type ChannelFilter string + +func (f ChannelFilter) Match(segment *SegmentInfo) bool { + return segment.GetInsertChannel() == string(f) +} + +func (f ChannelFilter) AddFilter(criterion *segmentCriterion) { + criterion.channel = string(f) +} + +// WithChannel WithCollection has a higher priority if both WithCollection and WithChannel are in condition together. +func WithChannel(channel string) SegmentFilter { + return ChannelFilter(channel) +} + type SegmentFilterFunc func(*SegmentInfo) bool func (f SegmentFilterFunc) Match(segment *SegmentInfo) bool { @@ -71,9 +87,3 @@ func (f SegmentFilterFunc) Match(segment *SegmentInfo) bool { func (f SegmentFilterFunc) AddFilter(criterion *segmentCriterion) { criterion.others = append(criterion.others, f) } - -func WithChannel(channel string) SegmentFilter { - return SegmentFilterFunc(func(si *SegmentInfo) bool { - return si.GetInsertChannel() == channel - }) -} From 371c2d8826650823a6488393607ed10db929f89e Mon Sep 17 00:00:00 2001 From: sre-ci-robot Date: Tue, 28 May 2024 12:00:44 +0000 Subject: [PATCH 090/126] Update all contributors Signed-off-by: sre-ci-robot --- README.md | 3 ++- README_CN.md | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 5353195096cad..4b05f41585111 100644 --- a/README.md +++ b/README.md @@ -172,7 +172,7 @@ Contributions to Milvus are welcome from everyone. See [Guidelines for Contribut ### All contributors
-
+
@@ -224,6 +224,7 @@ Contributions to Milvus are welcome from everyone. See [Guidelines for Contribut + diff --git a/README_CN.md b/README_CN.md index 4688953871b4c..7e81979848b36 100644 --- a/README_CN.md +++ b/README_CN.md @@ -154,7 +154,7 @@ Milvus [训练营](https://github.com/milvus-io/bootcamp)能够帮助你了解 ### All contributors
-
+
@@ -206,6 +206,7 @@ Milvus [训练营](https://github.com/milvus-io/bootcamp)能够帮助你了解 + From 73c9b80a7d86bfd3a702200af2df1c8faf0be848 Mon Sep 17 00:00:00 2001 From: congqixia Date: Wed, 29 May 2024 10:05:42 +0800 Subject: [PATCH 091/126] enhance: Store locations for largest K in `LocationCache` (#33429) See also #32642 `LocationCache` used map to store different locations for different K which may cause lots of CPU time when get locations many times. This PR change the implementation of LocationCache to store only the location for the largest K used to totally remove the map access operation. See pprof from test of @XuanYang-cn ![image](https://github.com/milvus-io/milvus/assets/84113973/ad17cff8-62ad-4d78-9bb0-f6df0512f4ea) --------- Signed-off-by: Congqi Xia --- .../datanode/metacache/bloom_filter_set.go | 2 +- .../datanode/writebuffer/bf_write_buffer.go | 2 +- .../datanode/writebuffer/l0_write_buffer.go | 2 +- internal/storage/pk_statistics.go | 25 +++++++++---------- 4 files changed, 15 insertions(+), 16 deletions(-) diff --git a/internal/datanode/metacache/bloom_filter_set.go b/internal/datanode/metacache/bloom_filter_set.go index 002988f61da2b..7785e6875419e 100644 --- a/internal/datanode/metacache/bloom_filter_set.go +++ b/internal/datanode/metacache/bloom_filter_set.go @@ -55,7 +55,7 @@ func NewBloomFilterSetWithBatchSize(batchSize uint, historyEntries ...*storage.P } } -func (bfs *BloomFilterSet) PkExists(lc storage.LocationsCache) bool { +func (bfs *BloomFilterSet) PkExists(lc *storage.LocationsCache) bool { bfs.mut.RLock() defer bfs.mut.RUnlock() if bfs.current != nil && bfs.current.TestLocationCache(lc) { diff --git a/internal/datanode/writebuffer/bf_write_buffer.go b/internal/datanode/writebuffer/bf_write_buffer.go index 3ddc961879729..322c4d56920d2 100644 --- a/internal/datanode/writebuffer/bf_write_buffer.go +++ b/internal/datanode/writebuffer/bf_write_buffer.go @@ -35,7 +35,7 @@ func (wb *bfWriteBuffer) dispatchDeleteMsgs(groups []*inData, deleteMsgs []*msgs // distribute delete msg for previous data for _, delMsg := range deleteMsgs { pks := storage.ParseIDs2PrimaryKeys(delMsg.GetPrimaryKeys()) - lcs := lo.Map(pks, func(pk storage.PrimaryKey, _ int) storage.LocationsCache { return storage.NewLocationsCache(pk) }) + lcs := lo.Map(pks, func(pk storage.PrimaryKey, _ int) *storage.LocationsCache { return storage.NewLocationsCache(pk) }) segments := wb.metaCache.GetSegmentsBy(metacache.WithPartitionID(delMsg.PartitionID), metacache.WithSegmentState(commonpb.SegmentState_Growing, commonpb.SegmentState_Flushing, commonpb.SegmentState_Flushed)) for _, segment := range segments { diff --git a/internal/datanode/writebuffer/l0_write_buffer.go b/internal/datanode/writebuffer/l0_write_buffer.go index c4ed68fb1147a..019994406f779 100644 --- a/internal/datanode/writebuffer/l0_write_buffer.go +++ b/internal/datanode/writebuffer/l0_write_buffer.go @@ -51,7 +51,7 @@ func (wb *l0WriteBuffer) dispatchDeleteMsgs(groups []*inData, deleteMsgs []*msgs for _, delMsg := range deleteMsgs { l0SegmentID := wb.getL0SegmentID(delMsg.GetPartitionID(), startPos) pks := storage.ParseIDs2PrimaryKeys(delMsg.GetPrimaryKeys()) - lcs := lo.Map(pks, func(pk storage.PrimaryKey, _ int) storage.LocationsCache { return storage.NewLocationsCache(pk) }) + lcs := lo.Map(pks, func(pk storage.PrimaryKey, _ int) *storage.LocationsCache { return storage.NewLocationsCache(pk) }) segments := wb.metaCache.GetSegmentsBy(metacache.WithPartitionID(delMsg.PartitionID), metacache.WithSegmentState(commonpb.SegmentState_Growing, commonpb.SegmentState_Flushing, commonpb.SegmentState_Flushed)) for _, segment := range segments { diff --git a/internal/storage/pk_statistics.go b/internal/storage/pk_statistics.go index c42a8c87929dd..ae5c549f65485 100644 --- a/internal/storage/pk_statistics.go +++ b/internal/storage/pk_statistics.go @@ -140,7 +140,7 @@ func (st *PkStatistics) TestLocations(pk PrimaryKey, locs []uint64) bool { return st.MinPK.LE(pk) && st.MaxPK.GE(pk) } -func (st *PkStatistics) TestLocationCache(lc LocationsCache) bool { +func (st *PkStatistics) TestLocationCache(lc *LocationsCache) bool { // empty pkStatics if st.MinPK == nil || st.MaxPK == nil || st.PkFilter == nil { return false @@ -159,22 +159,21 @@ func (st *PkStatistics) TestLocationCache(lc LocationsCache) bool { // Note that this helper is not concurrent safe and shall be used in same goroutine. type LocationsCache struct { pk PrimaryKey - locations map[uint][]uint64 + k uint + locations []uint64 } -func (lc LocationsCache) Locations(k uint) []uint64 { - locs, ok := lc.locations[k] - if ok { - return locs +func (lc *LocationsCache) Locations(k uint) []uint64 { + if k > lc.k { + lc.k = k + lc.locations = Locations(lc.pk, lc.k) } - locs = Locations(lc.pk, k) - lc.locations[k] = locs - return locs + + return lc.locations[:k] } -func NewLocationsCache(pk PrimaryKey) LocationsCache { - return LocationsCache{ - pk: pk, - locations: make(map[uint][]uint64), +func NewLocationsCache(pk PrimaryKey) *LocationsCache { + return &LocationsCache{ + pk: pk, } } From b13932bb5500fdef8b4d28bd5d2363ec38a223c4 Mon Sep 17 00:00:00 2001 From: wei liu Date: Wed, 29 May 2024 10:59:43 +0800 Subject: [PATCH 092/126] enhance: Enable database level replica num and resource groups for loading collection (#33052) issue: #30040 This PR introduce two database level props: 1. database.replica.number 2. database.resource_groups User can set those two database props by AlterDatabase API, then can load collection without specified replica_num and resource groups. then it will use database level load param when try to load collections. Signed-off-by: Wei Liu --- internal/proxy/task.go | 5 - .../querycoordv2/meta/coordinator_broker.go | 46 +++++ .../meta/coordinator_broker_test.go | 87 ++++++++ internal/querycoordv2/meta/mock_broker.go | 119 +++++++++++ internal/querycoordv2/server_test.go | 12 +- internal/querycoordv2/services.go | 36 ++++ internal/querycoordv2/services_test.go | 2 + pkg/common/common.go | 41 ++++ pkg/common/common_test.go | 50 +++++ tests/integration/replicas/load/load_test.go | 187 ++++++++++++++++++ 10 files changed, 575 insertions(+), 10 deletions(-) create mode 100644 tests/integration/replicas/load/load_test.go diff --git a/internal/proxy/task.go b/internal/proxy/task.go index 7aa1457fd1151..ffd79b67af0a7 100644 --- a/internal/proxy/task.go +++ b/internal/proxy/task.go @@ -1549,11 +1549,6 @@ func (t *loadCollectionTask) PreExecute(ctx context.Context) error { return err } - // To compat with LoadCollcetion before Milvus@2.1 - if t.ReplicaNumber == 0 { - t.ReplicaNumber = 1 - } - return nil } diff --git a/internal/querycoordv2/meta/coordinator_broker.go b/internal/querycoordv2/meta/coordinator_broker.go index cbcb9fced74e5..2df54688affb0 100644 --- a/internal/querycoordv2/meta/coordinator_broker.go +++ b/internal/querycoordv2/meta/coordinator_broker.go @@ -30,7 +30,9 @@ import ( "github.com/milvus-io/milvus/internal/proto/datapb" "github.com/milvus-io/milvus/internal/proto/indexpb" "github.com/milvus-io/milvus/internal/proto/querypb" + "github.com/milvus-io/milvus/internal/proto/rootcoordpb" "github.com/milvus-io/milvus/internal/types" + "github.com/milvus-io/milvus/pkg/common" "github.com/milvus-io/milvus/pkg/log" "github.com/milvus-io/milvus/pkg/util/commonpbutil" "github.com/milvus-io/milvus/pkg/util/merr" @@ -47,6 +49,8 @@ type Broker interface { GetSegmentInfo(ctx context.Context, segmentID ...UniqueID) (*datapb.GetSegmentInfoResponse, error) GetIndexInfo(ctx context.Context, collectionID UniqueID, segmentID UniqueID) ([]*querypb.FieldIndexInfo, error) GetRecoveryInfoV2(ctx context.Context, collectionID UniqueID, partitionIDs ...UniqueID) ([]*datapb.VchannelInfo, []*datapb.SegmentInfo, error) + DescribeDatabase(ctx context.Context, dbName string) (*rootcoordpb.DescribeDatabaseResponse, error) + GetCollectionLoadInfo(ctx context.Context, collectionID UniqueID) ([]string, int64, error) } type CoordinatorBroker struct { @@ -83,6 +87,48 @@ func (broker *CoordinatorBroker) DescribeCollection(ctx context.Context, collect return resp, nil } +func (broker *CoordinatorBroker) DescribeDatabase(ctx context.Context, dbName string) (*rootcoordpb.DescribeDatabaseResponse, error) { + ctx, cancel := context.WithTimeout(ctx, paramtable.Get().QueryCoordCfg.BrokerTimeout.GetAsDuration(time.Millisecond)) + defer cancel() + + req := &rootcoordpb.DescribeDatabaseRequest{ + Base: commonpbutil.NewMsgBase( + commonpbutil.WithMsgType(commonpb.MsgType_DescribeCollection), + ), + DbName: dbName, + } + resp, err := broker.rootCoord.DescribeDatabase(ctx, req) + if err := merr.CheckRPCCall(resp, err); err != nil { + log.Ctx(ctx).Warn("failed to describe database", zap.Error(err)) + return nil, err + } + return resp, nil +} + +// try to get database level replica_num and resource groups, return (resource_groups, replica_num, error) +func (broker *CoordinatorBroker) GetCollectionLoadInfo(ctx context.Context, collectionID UniqueID) ([]string, int64, error) { + // to do by weiliu1031: querycoord should cache mappings: collectionID->dbName + collectionInfo, err := broker.DescribeCollection(ctx, collectionID) + if err != nil { + return nil, 0, err + } + + dbInfo, err := broker.DescribeDatabase(ctx, collectionInfo.GetDbName()) + if err != nil { + return nil, 0, err + } + replicaNum, err := common.DatabaseLevelReplicaNumber(dbInfo.GetProperties()) + if err != nil { + return nil, 0, err + } + rgs, err := common.DatabaseLevelResourceGroups(dbInfo.GetProperties()) + if err != nil { + return nil, 0, err + } + + return rgs, replicaNum, nil +} + func (broker *CoordinatorBroker) GetPartitions(ctx context.Context, collectionID UniqueID) ([]UniqueID, error) { ctx, cancel := context.WithTimeout(ctx, paramtable.Get().QueryCoordCfg.BrokerTimeout.GetAsDuration(time.Millisecond)) defer cancel() diff --git a/internal/querycoordv2/meta/coordinator_broker_test.go b/internal/querycoordv2/meta/coordinator_broker_test.go index 476a997dd2ae9..778268f7ce66b 100644 --- a/internal/querycoordv2/meta/coordinator_broker_test.go +++ b/internal/querycoordv2/meta/coordinator_broker_test.go @@ -18,6 +18,7 @@ package meta import ( "context" + "strings" "testing" "github.com/cockroachdb/errors" @@ -32,6 +33,8 @@ import ( "github.com/milvus-io/milvus/internal/proto/datapb" "github.com/milvus-io/milvus/internal/proto/indexpb" "github.com/milvus-io/milvus/internal/proto/querypb" + "github.com/milvus-io/milvus/internal/proto/rootcoordpb" + "github.com/milvus-io/milvus/pkg/common" "github.com/milvus-io/milvus/pkg/util/merr" "github.com/milvus-io/milvus/pkg/util/paramtable" ) @@ -490,6 +493,90 @@ func (s *CoordinatorBrokerDataCoordSuite) TestGetIndexInfo() { }) } +func (s *CoordinatorBrokerRootCoordSuite) TestDescribeDatabase() { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + s.Run("normal_case", func() { + s.rootcoord.EXPECT().DescribeDatabase(mock.Anything, mock.Anything). + Return(&rootcoordpb.DescribeDatabaseResponse{ + Status: merr.Success(), + }, nil) + _, err := s.broker.DescribeDatabase(ctx, "fake_db1") + s.NoError(err) + s.resetMock() + }) + + s.Run("rootcoord_return_error", func() { + s.rootcoord.EXPECT().DescribeDatabase(mock.Anything, mock.Anything).Return(nil, errors.New("fake error")) + _, err := s.broker.DescribeDatabase(ctx, "fake_db1") + s.Error(err) + s.resetMock() + }) + + s.Run("rootcoord_return_failure_status", func() { + s.rootcoord.EXPECT().DescribeDatabase(mock.Anything, mock.Anything). + Return(&rootcoordpb.DescribeDatabaseResponse{ + Status: merr.Status(errors.New("fake error")), + }, nil) + _, err := s.broker.DescribeDatabase(ctx, "fake_db1") + s.Error(err) + s.resetMock() + }) + + s.Run("rootcoord_return_unimplemented", func() { + s.rootcoord.EXPECT().DescribeDatabase(mock.Anything, mock.Anything).Return(nil, merr.ErrServiceUnimplemented) + _, err := s.broker.DescribeDatabase(ctx, "fake_db1") + s.Error(err) + s.resetMock() + }) +} + +func (s *CoordinatorBrokerRootCoordSuite) TestGetCollectionLoadInfo() { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + s.Run("normal_case", func() { + s.rootcoord.EXPECT().DescribeCollection(mock.Anything, mock.Anything).Return(&milvuspb.DescribeCollectionResponse{ + DbName: "fake_db1", + }, nil) + s.rootcoord.EXPECT().DescribeDatabase(mock.Anything, mock.Anything). + Return(&rootcoordpb.DescribeDatabaseResponse{ + Status: merr.Success(), + Properties: []*commonpb.KeyValuePair{ + { + Key: common.DatabaseReplicaNumber, + Value: "3", + }, + { + Key: common.DatabaseResourceGroups, + Value: strings.Join([]string{"rg1", "rg2"}, ","), + }, + }, + }, nil) + rgs, replicas, err := s.broker.GetCollectionLoadInfo(ctx, 1) + s.NoError(err) + s.Equal(int64(3), replicas) + s.Contains(rgs, "rg1") + s.Contains(rgs, "rg2") + s.resetMock() + }) + + s.Run("props not set", func() { + s.rootcoord.EXPECT().DescribeCollection(mock.Anything, mock.Anything).Return(&milvuspb.DescribeCollectionResponse{ + DbName: "fake_db1", + }, nil) + s.rootcoord.EXPECT().DescribeDatabase(mock.Anything, mock.Anything). + Return(&rootcoordpb.DescribeDatabaseResponse{ + Status: merr.Success(), + Properties: []*commonpb.KeyValuePair{}, + }, nil) + _, _, err := s.broker.GetCollectionLoadInfo(ctx, 1) + s.Error(err) + s.resetMock() + }) +} + func TestCoordinatorBroker(t *testing.T) { suite.Run(t, new(CoordinatorBrokerRootCoordSuite)) suite.Run(t, new(CoordinatorBrokerDataCoordSuite)) diff --git a/internal/querycoordv2/meta/mock_broker.go b/internal/querycoordv2/meta/mock_broker.go index ff3548985547f..a940aff58bc91 100644 --- a/internal/querycoordv2/meta/mock_broker.go +++ b/internal/querycoordv2/meta/mock_broker.go @@ -13,6 +13,8 @@ import ( mock "github.com/stretchr/testify/mock" querypb "github.com/milvus-io/milvus/internal/proto/querypb" + + rootcoordpb "github.com/milvus-io/milvus/internal/proto/rootcoordpb" ) // MockBroker is an autogenerated mock type for the Broker type @@ -83,6 +85,123 @@ func (_c *MockBroker_DescribeCollection_Call) RunAndReturn(run func(context.Cont return _c } +// DescribeDatabase provides a mock function with given fields: ctx, dbName +func (_m *MockBroker) DescribeDatabase(ctx context.Context, dbName string) (*rootcoordpb.DescribeDatabaseResponse, error) { + ret := _m.Called(ctx, dbName) + + var r0 *rootcoordpb.DescribeDatabaseResponse + var r1 error + if rf, ok := ret.Get(0).(func(context.Context, string) (*rootcoordpb.DescribeDatabaseResponse, error)); ok { + return rf(ctx, dbName) + } + if rf, ok := ret.Get(0).(func(context.Context, string) *rootcoordpb.DescribeDatabaseResponse); ok { + r0 = rf(ctx, dbName) + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).(*rootcoordpb.DescribeDatabaseResponse) + } + } + + if rf, ok := ret.Get(1).(func(context.Context, string) error); ok { + r1 = rf(ctx, dbName) + } else { + r1 = ret.Error(1) + } + + return r0, r1 +} + +// MockBroker_DescribeDatabase_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'DescribeDatabase' +type MockBroker_DescribeDatabase_Call struct { + *mock.Call +} + +// DescribeDatabase is a helper method to define mock.On call +// - ctx context.Context +// - dbName string +func (_e *MockBroker_Expecter) DescribeDatabase(ctx interface{}, dbName interface{}) *MockBroker_DescribeDatabase_Call { + return &MockBroker_DescribeDatabase_Call{Call: _e.mock.On("DescribeDatabase", ctx, dbName)} +} + +func (_c *MockBroker_DescribeDatabase_Call) Run(run func(ctx context.Context, dbName string)) *MockBroker_DescribeDatabase_Call { + _c.Call.Run(func(args mock.Arguments) { + run(args[0].(context.Context), args[1].(string)) + }) + return _c +} + +func (_c *MockBroker_DescribeDatabase_Call) Return(_a0 *rootcoordpb.DescribeDatabaseResponse, _a1 error) *MockBroker_DescribeDatabase_Call { + _c.Call.Return(_a0, _a1) + return _c +} + +func (_c *MockBroker_DescribeDatabase_Call) RunAndReturn(run func(context.Context, string) (*rootcoordpb.DescribeDatabaseResponse, error)) *MockBroker_DescribeDatabase_Call { + _c.Call.Return(run) + return _c +} + +// GetCollectionLoadInfo provides a mock function with given fields: ctx, collectionID +func (_m *MockBroker) GetCollectionLoadInfo(ctx context.Context, collectionID int64) ([]string, int64, error) { + ret := _m.Called(ctx, collectionID) + + var r0 []string + var r1 int64 + var r2 error + if rf, ok := ret.Get(0).(func(context.Context, int64) ([]string, int64, error)); ok { + return rf(ctx, collectionID) + } + if rf, ok := ret.Get(0).(func(context.Context, int64) []string); ok { + r0 = rf(ctx, collectionID) + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).([]string) + } + } + + if rf, ok := ret.Get(1).(func(context.Context, int64) int64); ok { + r1 = rf(ctx, collectionID) + } else { + r1 = ret.Get(1).(int64) + } + + if rf, ok := ret.Get(2).(func(context.Context, int64) error); ok { + r2 = rf(ctx, collectionID) + } else { + r2 = ret.Error(2) + } + + return r0, r1, r2 +} + +// MockBroker_GetCollectionLoadInfo_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'GetCollectionLoadInfo' +type MockBroker_GetCollectionLoadInfo_Call struct { + *mock.Call +} + +// GetCollectionLoadInfo is a helper method to define mock.On call +// - ctx context.Context +// - collectionID int64 +func (_e *MockBroker_Expecter) GetCollectionLoadInfo(ctx interface{}, collectionID interface{}) *MockBroker_GetCollectionLoadInfo_Call { + return &MockBroker_GetCollectionLoadInfo_Call{Call: _e.mock.On("GetCollectionLoadInfo", ctx, collectionID)} +} + +func (_c *MockBroker_GetCollectionLoadInfo_Call) Run(run func(ctx context.Context, collectionID int64)) *MockBroker_GetCollectionLoadInfo_Call { + _c.Call.Run(func(args mock.Arguments) { + run(args[0].(context.Context), args[1].(int64)) + }) + return _c +} + +func (_c *MockBroker_GetCollectionLoadInfo_Call) Return(_a0 []string, _a1 int64, _a2 error) *MockBroker_GetCollectionLoadInfo_Call { + _c.Call.Return(_a0, _a1, _a2) + return _c +} + +func (_c *MockBroker_GetCollectionLoadInfo_Call) RunAndReturn(run func(context.Context, int64) ([]string, int64, error)) *MockBroker_GetCollectionLoadInfo_Call { + _c.Call.Return(run) + return _c +} + // GetIndexInfo provides a mock function with given fields: ctx, collectionID, segmentID func (_m *MockBroker) GetIndexInfo(ctx context.Context, collectionID int64, segmentID int64) ([]*querypb.FieldIndexInfo, error) { ret := _m.Called(ctx, collectionID, segmentID) diff --git a/internal/querycoordv2/server_test.go b/internal/querycoordv2/server_test.go index f71172fd89394..78c2fdb89b6f1 100644 --- a/internal/querycoordv2/server_test.go +++ b/internal/querycoordv2/server_test.go @@ -436,17 +436,19 @@ func (suite *ServerSuite) loadAll() { for _, collection := range suite.collections { if suite.loadTypes[collection] == querypb.LoadType_LoadCollection { req := &querypb.LoadCollectionRequest{ - CollectionID: collection, - ReplicaNumber: suite.replicaNumber[collection], + CollectionID: collection, + ReplicaNumber: suite.replicaNumber[collection], + ResourceGroups: []string{meta.DefaultResourceGroupName}, } resp, err := suite.server.LoadCollection(ctx, req) suite.NoError(err) suite.Equal(commonpb.ErrorCode_Success, resp.ErrorCode) } else { req := &querypb.LoadPartitionsRequest{ - CollectionID: collection, - PartitionIDs: suite.partitions[collection], - ReplicaNumber: suite.replicaNumber[collection], + CollectionID: collection, + PartitionIDs: suite.partitions[collection], + ReplicaNumber: suite.replicaNumber[collection], + ResourceGroups: []string{meta.DefaultResourceGroupName}, } resp, err := suite.server.LoadPartitions(ctx, req) suite.NoError(err) diff --git a/internal/querycoordv2/services.go b/internal/querycoordv2/services.go index dea9817a2777f..b64f09921fea6 100644 --- a/internal/querycoordv2/services.go +++ b/internal/querycoordv2/services.go @@ -215,6 +215,24 @@ func (s *Server) LoadCollection(ctx context.Context, req *querypb.LoadCollection return merr.Status(err), nil } + if req.GetReplicaNumber() <= 0 || len(req.GetResourceGroups()) == 0 { + // when replica number or resource groups is not set, use database level config + rgs, replicas, err := s.broker.GetCollectionLoadInfo(ctx, req.GetCollectionID()) + if err != nil { + log.Warn("failed to get data base level load info", zap.Error(err)) + } + + if req.GetReplicaNumber() <= 0 { + log.Info("load collection use database level replica number", zap.Int64("databaseLevelReplicaNum", replicas)) + req.ReplicaNumber = int32(replicas) + } + + if len(req.GetResourceGroups()) == 0 { + log.Info("load collection use database level resource groups", zap.Strings("databaseLevelResourceGroups", rgs)) + req.ResourceGroups = rgs + } + } + if err := s.checkResourceGroup(req.GetCollectionID(), req.GetResourceGroups()); err != nil { msg := "failed to load collection" log.Warn(msg, zap.Error(err)) @@ -316,6 +334,24 @@ func (s *Server) LoadPartitions(ctx context.Context, req *querypb.LoadPartitions return merr.Status(err), nil } + if req.GetReplicaNumber() <= 0 || len(req.GetResourceGroups()) == 0 { + // when replica number or resource groups is not set, use database level config + rgs, replicas, err := s.broker.GetCollectionLoadInfo(ctx, req.GetCollectionID()) + if err != nil { + log.Warn("failed to get data base level load info", zap.Error(err)) + } + + if req.GetReplicaNumber() <= 0 { + log.Info("load collection use database level replica number", zap.Int64("databaseLevelReplicaNum", replicas)) + req.ReplicaNumber = int32(replicas) + } + + if len(req.GetResourceGroups()) == 0 { + log.Info("load collection use database level resource groups", zap.Strings("databaseLevelResourceGroups", rgs)) + req.ResourceGroups = rgs + } + } + if err := s.checkResourceGroup(req.GetCollectionID(), req.GetResourceGroups()); err != nil { msg := "failed to load partitions" log.Warn(msg, zap.Error(err)) diff --git a/internal/querycoordv2/services_test.go b/internal/querycoordv2/services_test.go index 744004fd8f074..e4fb877d0101f 100644 --- a/internal/querycoordv2/services_test.go +++ b/internal/querycoordv2/services_test.go @@ -207,6 +207,8 @@ func (suite *ServiceSuite) SetupTest() { } suite.server.UpdateStateCode(commonpb.StateCode_Healthy) + + suite.broker.EXPECT().GetCollectionLoadInfo(mock.Anything, mock.Anything).Return([]string{meta.DefaultResourceGroupName}, 1, nil).Maybe() } func (suite *ServiceSuite) TestShowCollections() { diff --git a/pkg/common/common.go b/pkg/common/common.go index 2b9ebc4d82084..ea148b03b7f5d 100644 --- a/pkg/common/common.go +++ b/pkg/common/common.go @@ -18,6 +18,8 @@ package common import ( "encoding/binary" + "fmt" + "strconv" "strings" "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" @@ -134,6 +136,10 @@ const ( CollectionDiskQuotaKey = "collection.diskProtection.diskQuota.mb" PartitionDiskQuotaKey = "partition.diskProtection.diskQuota.mb" + + // database level properties + DatabaseReplicaNumber = "database.replica.number" + DatabaseResourceGroups = "database.resource_groups" ) // common properties @@ -205,3 +211,38 @@ const ( // LatestVerision is the magic number for watch latest revision LatestRevision = int64(-1) ) + +func DatabaseLevelReplicaNumber(kvs []*commonpb.KeyValuePair) (int64, error) { + for _, kv := range kvs { + if kv.Key == DatabaseReplicaNumber { + replicaNum, err := strconv.ParseInt(kv.Value, 10, 64) + if err != nil { + return 0, fmt.Errorf("invalid database property: [key=%s] [value=%s]", kv.Key, kv.Value) + } + + return replicaNum, nil + } + } + + return 0, fmt.Errorf("database property not found: %s", DatabaseReplicaNumber) +} + +func DatabaseLevelResourceGroups(kvs []*commonpb.KeyValuePair) ([]string, error) { + for _, kv := range kvs { + if kv.Key == DatabaseResourceGroups { + invalidPropValue := fmt.Errorf("invalid database property: [key=%s] [value=%s]", kv.Key, kv.Value) + if len(kv.Value) == 0 { + return nil, invalidPropValue + } + + rgs := strings.Split(kv.Value, ",") + if len(rgs) == 0 { + return nil, invalidPropValue + } + + return rgs, nil + } + } + + return nil, fmt.Errorf("database property not found: %s", DatabaseResourceGroups) +} diff --git a/pkg/common/common_test.go b/pkg/common/common_test.go index 7228b1b6ab8e8..2dc31e33fb16a 100644 --- a/pkg/common/common_test.go +++ b/pkg/common/common_test.go @@ -1,9 +1,12 @@ package common import ( + "strings" "testing" "github.com/stretchr/testify/assert" + + "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" ) func TestIsSystemField(t *testing.T) { @@ -38,3 +41,50 @@ func TestIsSystemField(t *testing.T) { }) } } + +func TestDatabaseProperties(t *testing.T) { + props := []*commonpb.KeyValuePair{ + { + Key: DatabaseReplicaNumber, + Value: "3", + }, + { + Key: DatabaseResourceGroups, + Value: strings.Join([]string{"rg1", "rg2"}, ","), + }, + } + + replicaNum, err := DatabaseLevelReplicaNumber(props) + assert.NoError(t, err) + assert.Equal(t, int64(3), replicaNum) + + rgs, err := DatabaseLevelResourceGroups(props) + assert.NoError(t, err) + assert.Contains(t, rgs, "rg1") + assert.Contains(t, rgs, "rg2") + + // test prop not found + _, err = DatabaseLevelReplicaNumber(nil) + assert.Error(t, err) + + _, err = DatabaseLevelResourceGroups(nil) + assert.Error(t, err) + + // test invalid prop value + + props = []*commonpb.KeyValuePair{ + { + Key: DatabaseReplicaNumber, + Value: "xxxx", + }, + { + Key: DatabaseResourceGroups, + Value: "", + }, + } + _, err = DatabaseLevelReplicaNumber(props) + assert.Error(t, err) + + _, err = DatabaseLevelResourceGroups(props) + assert.Error(t, err) +} diff --git a/tests/integration/replicas/load/load_test.go b/tests/integration/replicas/load/load_test.go new file mode 100644 index 0000000000000..837a634c53799 --- /dev/null +++ b/tests/integration/replicas/load/load_test.go @@ -0,0 +1,187 @@ +// Licensed to the LF AI & Data foundation under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package balance + +import ( + "context" + "fmt" + "strings" + "testing" + "time" + + "github.com/stretchr/testify/suite" + + "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" + "github.com/milvus-io/milvus-proto/go-api/v2/milvuspb" + "github.com/milvus-io/milvus-proto/go-api/v2/rgpb" + "github.com/milvus-io/milvus/internal/proto/querypb" + "github.com/milvus-io/milvus/internal/querycoordv2/meta" + "github.com/milvus-io/milvus/pkg/common" + "github.com/milvus-io/milvus/pkg/util/merr" + "github.com/milvus-io/milvus/pkg/util/paramtable" + "github.com/milvus-io/milvus/tests/integration" +) + +const ( + dim = 128 + dbName = "" + collectionName = "test_load_collection" +) + +type LoadTestSuite struct { + integration.MiniClusterSuite +} + +func (s *LoadTestSuite) SetupSuite() { + paramtable.Init() + paramtable.Get().Save(paramtable.Get().QueryCoordCfg.BalanceCheckInterval.Key, "1000") + paramtable.Get().Save(paramtable.Get().QueryNodeCfg.GracefulStopTimeout.Key, "1") + + s.Require().NoError(s.SetupEmbedEtcd()) +} + +func (s *LoadTestSuite) loadCollection(collectionName string, replica int, rgs []string) { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + // load + loadStatus, err := s.Cluster.Proxy.LoadCollection(ctx, &milvuspb.LoadCollectionRequest{ + DbName: dbName, + CollectionName: collectionName, + ReplicaNumber: int32(replica), + ResourceGroups: rgs, + }) + s.NoError(err) + s.True(merr.Ok(loadStatus)) + s.WaitForLoad(ctx, collectionName) +} + +func (s *LoadTestSuite) releaseCollection(collectionName string) { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + // load + status, err := s.Cluster.Proxy.ReleaseCollection(ctx, &milvuspb.ReleaseCollectionRequest{ + DbName: dbName, + CollectionName: collectionName, + }) + s.NoError(err) + s.True(merr.Ok(status)) +} + +func (s *LoadTestSuite) TestLoadWithDatabaseLevelConfig() { + ctx := context.Background() + s.CreateCollectionWithConfiguration(ctx, &integration.CreateCollectionConfig{ + DBName: dbName, + Dim: dim, + CollectionName: collectionName, + ChannelNum: 1, + SegmentNum: 3, + RowNumPerSegment: 2000, + }) + + // prepare resource groups + rgNum := 3 + rgs := make([]string, 0) + for i := 0; i < rgNum; i++ { + rgs = append(rgs, fmt.Sprintf("rg_%d", i)) + s.Cluster.QueryCoord.CreateResourceGroup(ctx, &milvuspb.CreateResourceGroupRequest{ + ResourceGroup: rgs[i], + Config: &rgpb.ResourceGroupConfig{ + Requests: &rgpb.ResourceGroupLimit{ + NodeNum: 1, + }, + Limits: &rgpb.ResourceGroupLimit{ + NodeNum: 1, + }, + + TransferFrom: []*rgpb.ResourceGroupTransfer{ + { + ResourceGroup: meta.DefaultResourceGroupName, + }, + }, + TransferTo: []*rgpb.ResourceGroupTransfer{ + { + ResourceGroup: meta.DefaultResourceGroupName, + }, + }, + }, + }) + } + + resp, err := s.Cluster.QueryCoord.ListResourceGroups(ctx, &milvuspb.ListResourceGroupsRequest{}) + s.NoError(err) + s.True(merr.Ok(resp.GetStatus())) + s.Len(resp.GetResourceGroups(), rgNum+1) + + for i := 1; i < rgNum; i++ { + s.Cluster.AddQueryNode() + } + + s.Eventually(func() bool { + matchCounter := 0 + for _, rg := range rgs { + resp1, err := s.Cluster.QueryCoord.DescribeResourceGroup(ctx, &querypb.DescribeResourceGroupRequest{ + ResourceGroup: rg, + }) + s.NoError(err) + s.True(merr.Ok(resp.GetStatus())) + if len(resp1.ResourceGroup.Nodes) == 1 { + matchCounter += 1 + } + } + return matchCounter == rgNum + }, 30*time.Second, time.Second) + + status, err := s.Cluster.Proxy.AlterDatabase(ctx, &milvuspb.AlterDatabaseRequest{ + DbName: "default", + Properties: []*commonpb.KeyValuePair{ + { + Key: common.DatabaseReplicaNumber, + Value: "3", + }, + { + Key: common.DatabaseResourceGroups, + Value: strings.Join(rgs, ","), + }, + }, + }) + s.NoError(err) + s.True(merr.Ok(status)) + + resp1, err := s.Cluster.Proxy.DescribeDatabase(ctx, &milvuspb.DescribeDatabaseRequest{ + DbName: "default", + }) + s.NoError(err) + s.True(merr.Ok(resp1.Status)) + s.Len(resp1.GetProperties(), 2) + + // load collection without specified replica and rgs + s.loadCollection(collectionName, 0, nil) + resp2, err := s.Cluster.Proxy.GetReplicas(ctx, &milvuspb.GetReplicasRequest{ + DbName: dbName, + CollectionName: collectionName, + }) + s.NoError(err) + s.True(merr.Ok(resp2.Status)) + s.Len(resp2.GetReplicas(), 3) + s.releaseCollection(collectionName) +} + +func TestReplicas(t *testing.T) { + suite.Run(t, new(LoadTestSuite)) +} From bbb69980acd41ef153c5f7cbf5e8fb0d972dbf52 Mon Sep 17 00:00:00 2001 From: "yihao.dai" Date: Wed, 29 May 2024 12:17:43 +0800 Subject: [PATCH 093/126] enhance: Replace 'off' with 'disable' (#33433) YAML will automatically parse "off" as a boolean variable. We should avoid using "off" in the future. issue: https://github.com/milvus-io/milvus/issues/32772 --------- Signed-off-by: bigsheeper --- configs/milvus.yaml | 8 ++++---- pkg/util/paramtable/component_param.go | 8 ++++---- pkg/util/paramtable/component_param_test.go | 2 +- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/configs/milvus.yaml b/configs/milvus.yaml index 298f152c5f3e9..1f23cd014da20 100644 --- a/configs/milvus.yaml +++ b/configs/milvus.yaml @@ -329,13 +329,13 @@ queryNode: enabled: true memoryLimit: 2147483648 # 2 GB, 2 * 1024 *1024 *1024 readAheadPolicy: willneed # The read ahead policy of chunk cache, options: `normal, random, sequential, willneed, dontneed` - # options: async, sync, off. + # options: async, sync, disable. # Specifies the necessity for warming up the chunk cache. - # 1. If set to "sync" or "async," the original vector data will be synchronously/asynchronously loaded into the + # 1. If set to "sync" or "async" the original vector data will be synchronously/asynchronously loaded into the # chunk cache during the load process. This approach has the potential to substantially reduce query/search latency # for a specific duration post-load, albeit accompanied by a concurrent increase in disk usage; - # 2. If set to "off," original vector data will only be loaded into the chunk cache during search/query. - warmup: off + # 2. If set to "disable" original vector data will only be loaded into the chunk cache during search/query. + warmup: disable mmap: mmapEnabled: false # Enable mmap for loading data lazyload: diff --git a/pkg/util/paramtable/component_param.go b/pkg/util/paramtable/component_param.go index c74bbefd352e6..9d3da19affc3f 100644 --- a/pkg/util/paramtable/component_param.go +++ b/pkg/util/paramtable/component_param.go @@ -2358,13 +2358,13 @@ func (p *queryNodeConfig) init(base *BaseTable) { p.ChunkCacheWarmingUp = ParamItem{ Key: "queryNode.cache.warmup", Version: "2.3.6", - DefaultValue: "off", - Doc: `options: async, sync, off. + DefaultValue: "disable", + Doc: `options: async, sync, disable. Specifies the necessity for warming up the chunk cache. -1. If set to "sync" or "async," the original vector data will be synchronously/asynchronously loaded into the +1. If set to "sync" or "async" the original vector data will be synchronously/asynchronously loaded into the chunk cache during the load process. This approach has the potential to substantially reduce query/search latency for a specific duration post-load, albeit accompanied by a concurrent increase in disk usage; -2. If set to "off," original vector data will only be loaded into the chunk cache during search/query.`, +2. If set to "disable" original vector data will only be loaded into the chunk cache during search/query.`, Export: true, } p.ChunkCacheWarmingUp.Init(base.mgr) diff --git a/pkg/util/paramtable/component_param_test.go b/pkg/util/paramtable/component_param_test.go index cba13b38931dc..1b4719efe3a8b 100644 --- a/pkg/util/paramtable/component_param_test.go +++ b/pkg/util/paramtable/component_param_test.go @@ -339,7 +339,7 @@ func TestComponentParam(t *testing.T) { // chunk cache assert.Equal(t, "willneed", Params.ReadAheadPolicy.GetValue()) - assert.Equal(t, "false", Params.ChunkCacheWarmingUp.GetValue()) + assert.Equal(t, "disable", Params.ChunkCacheWarmingUp.GetValue()) // test small indexNlist/NProbe default params.Remove("queryNode.segcore.smallIndex.nlist") From a26d6cdf23c99e7e2841e587541188b44e9fdf00 Mon Sep 17 00:00:00 2001 From: congqixia Date: Wed, 29 May 2024 14:07:44 +0800 Subject: [PATCH 094/126] fix: Remove group checker when closing qn pipeline (#33443) See also #33442 This fix shall prevent group checker keep printing "some node(s) haven't received input" err message after collection released Signed-off-by: Congqi Xia --- internal/util/pipeline/pipeline.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/internal/util/pipeline/pipeline.go b/internal/util/pipeline/pipeline.go index cfc0db3e59832..6e85f2d9989e6 100644 --- a/internal/util/pipeline/pipeline.go +++ b/internal/util/pipeline/pipeline.go @@ -69,6 +69,11 @@ func (p *pipeline) Start() error { } func (p *pipeline) Close() { + for _, node := range p.nodes { + if node.Checker != nil { + node.Checker.Close() + } + } } func (p *pipeline) process() { From 08b94ea81da8d57232e906cd7ece75558fb7eec8 Mon Sep 17 00:00:00 2001 From: smellthemoon <64083300+smellthemoon@users.noreply.github.com> Date: Wed, 29 May 2024 18:35:44 +0800 Subject: [PATCH 095/126] enhance:change wrong log (#33447) Signed-off-by: lixinguo Co-authored-by: lixinguo --- internal/datanode/broker/datacoord.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/datanode/broker/datacoord.go b/internal/datanode/broker/datacoord.go index e81afa8d0eba3..dc7a4f2febc5b 100644 --- a/internal/datanode/broker/datacoord.go +++ b/internal/datanode/broker/datacoord.go @@ -128,7 +128,7 @@ func (dc *dataCoordBroker) DropVirtualChannel(ctx context.Context, req *datapb.D resp, err := dc.client.DropVirtualChannel(ctx, req) if err := merr.CheckRPCCall(resp, err); err != nil { - log.Warn("failed to SaveBinlogPaths", zap.Error(err)) + log.Warn("failed to DropVirtualChannel", zap.Error(err)) return resp, err } From 54797b42860f5917458996b8b5fa734b1f9b15bf Mon Sep 17 00:00:00 2001 From: congqixia Date: Wed, 29 May 2024 19:15:43 +0800 Subject: [PATCH 096/126] enhance: Refine frequent log in datacoord (#33449) This PR changes: - Frequent `ListIndexes` success log to debug level - Aggregate collection missing log after collection dropped in `meta.GetCollectionIndexFilesSize` Signed-off-by: Congqi Xia Signed-off-by: Congqi Xia --- internal/datacoord/index_service.go | 2 +- internal/datacoord/meta.go | 8 +++++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/internal/datacoord/index_service.go b/internal/datacoord/index_service.go index b0c68a33e3f95..1db44438afd30 100644 --- a/internal/datacoord/index_service.go +++ b/internal/datacoord/index_service.go @@ -917,7 +917,7 @@ func (s *Server) ListIndexes(ctx context.Context, req *indexpb.ListIndexesReques UserIndexParams: index.UserIndexParams, } }) - log.Info("List index success") + log.Debug("List index success") return &indexpb.ListIndexesResponse{ Status: merr.Success(), IndexInfos: indexInfos, diff --git a/internal/datacoord/meta.go b/internal/datacoord/meta.go index 434800bcb54af..9b13593592c6f 100644 --- a/internal/datacoord/meta.go +++ b/internal/datacoord/meta.go @@ -47,6 +47,7 @@ import ( "github.com/milvus-io/milvus/pkg/util/paramtable" "github.com/milvus-io/milvus/pkg/util/timerecord" "github.com/milvus-io/milvus/pkg/util/tsoutil" + "github.com/milvus-io/milvus/pkg/util/typeutil" ) type meta struct { @@ -363,6 +364,8 @@ func (m *meta) GetCollectionIndexFilesSize() uint64 { m.RLock() defer m.RUnlock() var total uint64 + + missingCollections := make(typeutil.Set[int64]) for _, segmentIdx := range m.indexMeta.GetAllSegIndexes() { coll, ok := m.collections[segmentIdx.CollectionID] if ok { @@ -370,9 +373,12 @@ func (m *meta) GetCollectionIndexFilesSize() uint64 { fmt.Sprint(segmentIdx.CollectionID), fmt.Sprint(segmentIdx.SegmentID)).Set(float64(segmentIdx.IndexSize)) total += segmentIdx.IndexSize } else { - log.Warn("not found database name", zap.Int64("collectionID", segmentIdx.CollectionID)) + missingCollections.Insert(segmentIdx.CollectionID) } } + if missingCollections.Len() > 0 { + log.Warn("collection info not found when calculating index file sizes", zap.Int64s("collectionIDs", missingCollections.Collect())) + } return total } From 8f46a2095762785ee3cde295ebdd42609d2644fd Mon Sep 17 00:00:00 2001 From: SimFG Date: Wed, 29 May 2024 20:51:44 +0800 Subject: [PATCH 097/126] fix: show empty collection when has granted the all privilege (#33445) - issue: #33382 Signed-off-by: SimFG --- internal/rootcoord/show_collection_task.go | 2 +- internal/rootcoord/show_collection_task_test.go | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/internal/rootcoord/show_collection_task.go b/internal/rootcoord/show_collection_task.go index 247a171af321e..090d4ada5b561 100644 --- a/internal/rootcoord/show_collection_task.go +++ b/internal/rootcoord/show_collection_task.go @@ -89,7 +89,7 @@ func (t *showCollectionTask) Execute(ctx context.Context) error { for _, entity := range entities { objectType := entity.GetObject().GetName() if objectType == commonpb.ObjectType_Global.String() && - entity.GetGrantor().GetPrivilege().GetName() == commonpb.ObjectPrivilege_PrivilegeAll.String() { + entity.GetGrantor().GetPrivilege().GetName() == util.PrivilegeNameForAPI(commonpb.ObjectPrivilege_PrivilegeAll.String()) { privilegeColls.Insert(util.AnyWord) return privilegeColls, nil } diff --git a/internal/rootcoord/show_collection_task_test.go b/internal/rootcoord/show_collection_task_test.go index 8d82e4aa20275..52cea062cbda0 100644 --- a/internal/rootcoord/show_collection_task_test.go +++ b/internal/rootcoord/show_collection_task_test.go @@ -298,7 +298,9 @@ func TestShowCollectionsAuth(t *testing.T) { { Object: &milvuspb.ObjectEntity{Name: commonpb.ObjectType_Global.String()}, Grantor: &milvuspb.GrantorEntity{ - Privilege: &milvuspb.PrivilegeEntity{Name: commonpb.ObjectPrivilege_PrivilegeAll.String()}, + Privilege: &milvuspb.PrivilegeEntity{ + Name: util.PrivilegeNameForAPI(commonpb.ObjectPrivilege_PrivilegeAll.String()), + }, }, }, }, nil).Once() From 589d4dfd82b4539961b61b275216964f2eaaca2c Mon Sep 17 00:00:00 2001 From: zhagnlu <1542303831@qq.com> Date: Thu, 30 May 2024 13:09:43 +0800 Subject: [PATCH 098/126] enhance: optimize bitmap index (#33358) #32900 Signed-off-by: luzhang Co-authored-by: luzhang --- internal/core/src/common/Consts.h | 2 + internal/core/src/index/BitmapIndex.cpp | 366 ++++++++++++---- internal/core/src/index/BitmapIndex.h | 66 ++- internal/core/src/index/CMakeLists.txt | 1 + internal/core/src/index/HybridScalarIndex.cpp | 402 ++++++++++++++++++ internal/core/src/index/HybridScalarIndex.h | 166 ++++++++ internal/core/src/index/Index.h | 1 + internal/core/src/index/IndexFactory.cpp | 14 +- .../core/src/index/InvertedIndexTantivy.cpp | 30 +- internal/core/src/index/Meta.h | 2 + internal/core/src/index/ScalarIndex.h | 10 + internal/core/src/index/ScalarIndexSort.cpp | 51 ++- internal/core/src/index/ScalarIndexSort.h | 17 +- internal/core/src/index/StringIndexMarisa.cpp | 7 + internal/core/src/index/StringIndexMarisa.h | 17 +- internal/core/src/index/Utils.cpp | 9 + internal/core/src/index/Utils.h | 3 + internal/core/unittest/CMakeLists.txt | 2 +- internal/core/unittest/test_expr.cpp | 26 +- ...bitmap_index.cpp => test_hybrid_index.cpp} | 160 ++++++- internal/core/unittest/test_scalar_index.cpp | 267 +++++++++++- internal/proxy/task_index.go | 7 + pkg/common/common.go | 2 + .../indexparamcheck/bitmap_checker_test.go | 4 +- .../indexparamcheck/bitmap_index_checker.go | 6 +- pkg/util/paramtable/component_param.go | 24 +- 26 files changed, 1507 insertions(+), 155 deletions(-) create mode 100644 internal/core/src/index/HybridScalarIndex.cpp create mode 100644 internal/core/src/index/HybridScalarIndex.h rename internal/core/unittest/{test_bitmap_index.cpp => test_hybrid_index.cpp} (63%) diff --git a/internal/core/src/common/Consts.h b/internal/core/src/common/Consts.h index 65e6795b16e66..44d7d5559ca81 100644 --- a/internal/core/src/common/Consts.h +++ b/internal/core/src/common/Consts.h @@ -61,3 +61,5 @@ constexpr const char* RANGE_FILTER = knowhere::meta::RANGE_FILTER; const int64_t DEFAULT_MAX_OUTPUT_SIZE = 67108864; // bytes, 64MB const int64_t DEFAULT_CHUNK_MANAGER_REQUEST_TIMEOUT_MS = 10000; + +const int64_t DEFAULT_BITMAP_INDEX_CARDINALITY_BOUND = 500; diff --git a/internal/core/src/index/BitmapIndex.cpp b/internal/core/src/index/BitmapIndex.cpp index 5d0a4aabec3cd..3e63763dd2b51 100644 --- a/internal/core/src/index/BitmapIndex.cpp +++ b/internal/core/src/index/BitmapIndex.cpp @@ -15,10 +15,12 @@ // limitations under the License. #include +#include #include "index/BitmapIndex.h" #include "common/Slice.h" +#include "common/Common.h" #include "index/Meta.h" #include "index/ScalarIndex.h" #include "index/Utils.h" @@ -105,8 +107,13 @@ BitmapIndex::Build(size_t n, const T* data) { } total_num_rows_ = n; - for (auto it = data_.begin(); it != data_.end(); ++it) { - bitsets_[it->first] = ConvertRoaringToBitset(it->second); + if (data_.size() < DEFAULT_BITMAP_INDEX_CARDINALITY_BOUND) { + for (auto it = data_.begin(); it != data_.end(); ++it) { + bitsets_[it->first] = ConvertRoaringToBitset(it->second); + } + build_mode_ = BitmapIndexBuildMode::BITSET; + } else { + build_mode_ = BitmapIndexBuildMode::ROARING; } is_built_ = true; @@ -134,6 +141,13 @@ BitmapIndex::BuildV2(const Config& config) { field_datas.push_back(field_data); } + BuildWithFieldData(field_datas); +} + +template +void +BitmapIndex::BuildWithFieldData( + const std::vector& field_datas) { int total_num_rows = 0; for (auto& field_data : field_datas) { total_num_rows += field_data->get_num_rows(); @@ -142,7 +156,6 @@ BitmapIndex::BuildV2(const Config& config) { throw SegcoreError(DataIsEmpty, "scalar bitmap index can not build null values"); } - total_num_rows_ = total_num_rows; int64_t offset = 0; @@ -154,6 +167,7 @@ BitmapIndex::BuildV2(const Config& config) { offset++; } } + is_built_ = true; } @@ -190,6 +204,22 @@ BitmapIndex::SerializeIndexData(uint8_t* data_ptr) { } } +template +std::pair, size_t> +BitmapIndex::SerializeIndexMeta() { + YAML::Node node; + node[BITMAP_INDEX_LENGTH] = data_.size(); + node[BITMAP_INDEX_NUM_ROWS] = total_num_rows_; + + std::stringstream ss; + ss << node; + auto json_string = ss.str(); + auto str_size = json_string.size(); + std::shared_ptr res(new uint8_t[str_size]); + memcpy(res.get(), json_string.data(), str_size); + return std::make_pair(res, str_size); +} + template <> void BitmapIndex::SerializeIndexData(uint8_t* data_ptr) { @@ -217,21 +247,17 @@ BitmapIndex::Serialize(const Config& config) { uint8_t* data_ptr = index_data.get(); SerializeIndexData(data_ptr); - std::shared_ptr index_length(new uint8_t[sizeof(size_t)]); - auto index_size = data_.size(); - memcpy(index_length.get(), &index_size, sizeof(size_t)); - - std::shared_ptr num_rows(new uint8_t[sizeof(size_t)]); - memcpy(num_rows.get(), &total_num_rows_, sizeof(size_t)); + auto index_meta = SerializeIndexMeta(); BinarySet ret_set; ret_set.Append(BITMAP_INDEX_DATA, index_data, index_data_size); - ret_set.Append(BITMAP_INDEX_LENGTH, index_length, sizeof(size_t)); - ret_set.Append(BITMAP_INDEX_NUM_ROWS, num_rows, sizeof(size_t)); + ret_set.Append(BITMAP_INDEX_META, index_meta.first, index_meta.second); LOG_INFO("build bitmap index with cardinality = {}, num_rows = {}", - index_size, + Cardinality(), total_num_rows_); + + Disassemble(ret_set); return ret_set; } @@ -283,6 +309,29 @@ BitmapIndex::ConvertRoaringToBitset(const roaring::Roaring& values) { return res; } +template +std::pair +BitmapIndex::DeserializeIndexMeta(const uint8_t* data_ptr, + size_t data_size) { + YAML::Node node = YAML::Load( + std::string(reinterpret_cast(data_ptr), data_size)); + + auto index_length = node[BITMAP_INDEX_LENGTH].as(); + auto index_num_rows = node[BITMAP_INDEX_NUM_ROWS].as(); + + return std::make_pair(index_length, index_num_rows); +} + +template +void +BitmapIndex::ChooseIndexBuildMode() { + if (data_.size() <= DEFAULT_BITMAP_INDEX_CARDINALITY_BOUND) { + build_mode_ = BitmapIndexBuildMode::BITSET; + } else { + build_mode_ = BitmapIndexBuildMode::ROARING; + } +} + template void BitmapIndex::DeserializeIndexData(const uint8_t* data_ptr, @@ -296,7 +345,12 @@ BitmapIndex::DeserializeIndexData(const uint8_t* data_ptr, value = roaring::Roaring::read(reinterpret_cast(data_ptr)); data_ptr += value.getSizeInBytes(); - bitsets_[key] = ConvertRoaringToBitset(value); + ChooseIndexBuildMode(); + + if (build_mode_ == BitmapIndexBuildMode::BITSET) { + bitsets_[key] = ConvertRoaringToBitset(value); + data_.erase(key); + } } } @@ -324,21 +378,14 @@ template void BitmapIndex::LoadWithoutAssemble(const BinarySet& binary_set, const Config& config) { - size_t index_length; - auto index_length_buffer = binary_set.GetByName(BITMAP_INDEX_LENGTH); - memcpy(&index_length, - index_length_buffer->data.get(), - (size_t)index_length_buffer->size); - - auto num_rows_buffer = binary_set.GetByName(BITMAP_INDEX_NUM_ROWS); - memcpy(&total_num_rows_, - num_rows_buffer->data.get(), - (size_t)num_rows_buffer->size); + auto index_meta_buffer = binary_set.GetByName(BITMAP_INDEX_META); + auto index_meta = DeserializeIndexMeta(index_meta_buffer->data.get(), + index_meta_buffer->size); + auto index_length = index_meta.first; + total_num_rows_ = index_meta.second; auto index_data_buffer = binary_set.GetByName(BITMAP_INDEX_DATA); - const uint8_t* data_ptr = index_data_buffer->data.get(); - - DeserializeIndexData(data_ptr, index_length); + DeserializeIndexData(index_data_buffer->data.get(), index_length); LOG_INFO("load bitmap index with cardinality = {}, num_rows = {}", Cardinality(), @@ -416,26 +463,24 @@ BitmapIndex::In(const size_t n, const T* values) { AssertInfo(is_built_, "index has not been built"); TargetBitmap res(total_num_rows_, false); -#if 0 - roaring::Roaring result; - for (size_t i = 0; i < n; ++i) { - auto val = values[i]; - auto it = data_.find(val); - if (it != data_.end()) { - result |= it->second; + if (build_mode_ == BitmapIndexBuildMode::ROARING) { + for (size_t i = 0; i < n; ++i) { + auto val = values[i]; + auto it = data_.find(val); + if (it != data_.end()) { + for (const auto& v : it->second) { + res.set(v); + } + } } - } - for (auto& val : result) { - res.set(val); - } -#else - for (size_t i = 0; i < n; ++i) { - auto val = values[i]; - if (bitsets_.find(val) != bitsets_.end()) { - res |= bitsets_.at(val); + } else { + for (size_t i = 0; i < n; ++i) { + auto val = values[i]; + if (bitsets_.find(val) != bitsets_.end()) { + res |= bitsets_.at(val); + } } } -#endif return res; } @@ -443,36 +488,35 @@ template const TargetBitmap BitmapIndex::NotIn(const size_t n, const T* values) { AssertInfo(is_built_, "index has not been built"); - TargetBitmap res(total_num_rows_, false); -#if 0 - roaring::Roaring result; - for (int i = 0; i < n; ++i) { - auto val = values[i]; - auto it = data_.find(val); - if (it != data_.end()) { - result |= it->second; + if (build_mode_ == BitmapIndexBuildMode::ROARING) { + TargetBitmap res(total_num_rows_, true); + for (int i = 0; i < n; ++i) { + auto val = values[i]; + auto it = data_.find(val); + if (it != data_.end()) { + for (const auto& v : it->second) { + res.reset(v); + } + } } - } - - for (auto& val : result) { - bitset.reset(val); - } -#else - for (size_t i = 0; i < n; ++i) { - auto val = values[i]; - if (bitsets_.find(val) != bitsets_.end()) { - res |= bitsets_.at(val); + return res; + } else { + TargetBitmap res(total_num_rows_, false); + for (size_t i = 0; i < n; ++i) { + auto val = values[i]; + if (bitsets_.find(val) != bitsets_.end()) { + res |= bitsets_.at(val); + } } + res.flip(); + return res; } -#endif - res.flip(); - return res; } template -const TargetBitmap -BitmapIndex::Range(const T value, const OpType op) { +TargetBitmap +BitmapIndex::RangeForBitset(const T value, const OpType op) { AssertInfo(is_built_, "index has not been built"); TargetBitmap res(total_num_rows_, false); if (ShouldSkip(value, value, op)) { @@ -532,10 +576,82 @@ BitmapIndex::Range(const T value, const OpType op) { template const TargetBitmap -BitmapIndex::Range(const T lower_value, - bool lb_inclusive, - const T upper_value, - bool ub_inclusive) { +BitmapIndex::Range(const T value, OpType op) { + if (build_mode_ == BitmapIndexBuildMode::ROARING) { + return std::move(RangeForRoaring(value, op)); + } else { + return std::move(RangeForBitset(value, op)); + } +} + +template +TargetBitmap +BitmapIndex::RangeForRoaring(const T value, const OpType op) { + AssertInfo(is_built_, "index has not been built"); + TargetBitmap res(total_num_rows_, false); + if (ShouldSkip(value, value, op)) { + return res; + } + auto lb = data_.begin(); + auto ub = data_.end(); + + switch (op) { + case OpType::LessThan: { + ub = std::lower_bound(data_.begin(), + data_.end(), + std::make_pair(value, TargetBitmap()), + [](const auto& lhs, const auto& rhs) { + return lhs.first < rhs.first; + }); + break; + } + case OpType::LessEqual: { + ub = std::upper_bound(data_.begin(), + data_.end(), + std::make_pair(value, TargetBitmap()), + [](const auto& lhs, const auto& rhs) { + return lhs.first < rhs.first; + }); + break; + } + case OpType::GreaterThan: { + lb = std::upper_bound(data_.begin(), + data_.end(), + std::make_pair(value, TargetBitmap()), + [](const auto& lhs, const auto& rhs) { + return lhs.first < rhs.first; + }); + break; + } + case OpType::GreaterEqual: { + lb = std::lower_bound(data_.begin(), + data_.end(), + std::make_pair(value, TargetBitmap()), + [](const auto& lhs, const auto& rhs) { + return lhs.first < rhs.first; + }); + break; + } + default: { + throw SegcoreError(OpTypeInvalid, + fmt::format("Invalid OperatorType: {}", op)); + } + } + + for (; lb != ub; lb++) { + for (const auto& v : lb->second) { + res.set(v); + } + } + return res; +} + +template +TargetBitmap +BitmapIndex::RangeForBitset(const T lower_value, + bool lb_inclusive, + const T upper_value, + bool ub_inclusive) { AssertInfo(is_built_, "index has not been built"); TargetBitmap res(total_num_rows_, false); if (lower_value > upper_value || @@ -587,15 +703,99 @@ BitmapIndex::Range(const T lower_value, return res; } +template +const TargetBitmap +BitmapIndex::Range(const T lower_value, + bool lb_inclusive, + const T upper_value, + bool ub_inclusive) { + if (build_mode_ == BitmapIndexBuildMode::ROARING) { + return RangeForRoaring( + lower_value, lb_inclusive, upper_value, ub_inclusive); + } else { + return RangeForBitset( + lower_value, lb_inclusive, upper_value, ub_inclusive); + } +} + +template +TargetBitmap +BitmapIndex::RangeForRoaring(const T lower_value, + bool lb_inclusive, + const T upper_value, + bool ub_inclusive) { + AssertInfo(is_built_, "index has not been built"); + TargetBitmap res(total_num_rows_, false); + if (lower_value > upper_value || + (lower_value == upper_value && !(lb_inclusive && ub_inclusive))) { + return res; + } + if (ShouldSkip(lower_value, upper_value, OpType::Range)) { + return res; + } + + auto lb = data_.begin(); + auto ub = data_.end(); + + if (lb_inclusive) { + lb = std::lower_bound(data_.begin(), + data_.end(), + std::make_pair(lower_value, TargetBitmap()), + [](const auto& lhs, const auto& rhs) { + return lhs.first < rhs.first; + }); + } else { + lb = std::upper_bound(data_.begin(), + data_.end(), + std::make_pair(lower_value, TargetBitmap()), + [](const auto& lhs, const auto& rhs) { + return lhs.first < rhs.first; + }); + } + + if (ub_inclusive) { + ub = std::upper_bound(data_.begin(), + data_.end(), + std::make_pair(upper_value, TargetBitmap()), + [](const auto& lhs, const auto& rhs) { + return lhs.first < rhs.first; + }); + } else { + ub = std::lower_bound(data_.begin(), + data_.end(), + std::make_pair(upper_value, TargetBitmap()), + [](const auto& lhs, const auto& rhs) { + return lhs.first < rhs.first; + }); + } + + for (; lb != ub; lb++) { + for (const auto& v : lb->second) { + res.set(v); + } + } + return res; +} + template T BitmapIndex::Reverse_Lookup(size_t idx) const { AssertInfo(is_built_, "index has not been built"); AssertInfo(idx < total_num_rows_, "out of range of total coun"); - for (auto it = bitsets_.begin(); it != bitsets_.end(); it++) { - if (it->second[idx]) { - return it->first; + if (build_mode_ == BitmapIndexBuildMode::ROARING) { + for (auto it = data_.begin(); it != data_.end(); it++) { + for (const auto& v : it->second) { + if (v == idx) { + return it->first; + } + } + } + } else { + for (auto it = bitsets_.begin(); it != bitsets_.end(); it++) { + if (it->second[idx]) { + return it->first; + } } } throw SegcoreError( @@ -610,9 +810,7 @@ bool BitmapIndex::ShouldSkip(const T lower_value, const T upper_value, const OpType op) { - if (!bitsets_.empty()) { - auto lower_bound = bitsets_.begin()->first; - auto upper_bound = bitsets_.rbegin()->first; + auto skip = [&](OpType op, T lower_bound, T upper_bound) -> bool { bool should_skip = false; switch (op) { case OpType::LessThan: { @@ -649,6 +847,22 @@ BitmapIndex::ShouldSkip(const T lower_value, op)); } return should_skip; + }; + + if (build_mode_ == BitmapIndexBuildMode::ROARING) { + if (!data_.empty()) { + auto lower_bound = data_.begin()->first; + auto upper_bound = data_.rbegin()->first; + bool should_skip = skip(op, lower_bound, upper_bound); + return should_skip; + } + } else { + if (!bitsets_.empty()) { + auto lower_bound = bitsets_.begin()->first; + auto upper_bound = bitsets_.rbegin()->first; + bool should_skip = skip(op, lower_bound, upper_bound); + return should_skip; + } } return true; } diff --git a/internal/core/src/index/BitmapIndex.h b/internal/core/src/index/BitmapIndex.h index 38ea6004495ff..2ead42d5de545 100644 --- a/internal/core/src/index/BitmapIndex.h +++ b/internal/core/src/index/BitmapIndex.h @@ -30,6 +30,11 @@ namespace milvus { namespace index { +enum class BitmapIndexBuildMode { + ROARING, + BITSET, +}; + /* * @brief Implementation of Bitmap Index * @details This index only for scalar Integral type. @@ -45,6 +50,17 @@ class BitmapIndex : public ScalarIndex { const storage::FileManagerContext& file_manager_context, std::shared_ptr space); + explicit BitmapIndex( + const std::shared_ptr& file_manager) + : file_manager_(file_manager) { + } + + explicit BitmapIndex( + const std::shared_ptr& file_manager, + std::shared_ptr space) + : file_manager_(file_manager), space_(space) { + } + ~BitmapIndex() override = default; BinarySet @@ -61,7 +77,7 @@ class BitmapIndex : public ScalarIndex { int64_t Count() override { - return bitsets_.begin()->second.size(); + return total_num_rows_; } void @@ -70,6 +86,9 @@ class BitmapIndex : public ScalarIndex { void Build(const Config& config = {}) override; + void + BuildWithFieldData(const std::vector& datas) override; + void BuildV2(const Config& config = {}) override; @@ -108,9 +127,17 @@ class BitmapIndex : public ScalarIndex { int64_t Cardinality() { - return bitsets_.size(); + if (build_mode_ == BitmapIndexBuildMode::ROARING) { + return data_.size(); + } else { + return bitsets_.size(); + } } + void + LoadWithoutAssemble(const BinarySet& binary_set, + const Config& config) override; + private: size_t GetIndexDataSize(); @@ -118,24 +145,49 @@ class BitmapIndex : public ScalarIndex { void SerializeIndexData(uint8_t* index_data_ptr); + std::pair, size_t> + SerializeIndexMeta(); + + std::pair + DeserializeIndexMeta(const uint8_t* data_ptr, size_t data_size); + void DeserializeIndexData(const uint8_t* data_ptr, size_t index_length); + void + ChooseIndexBuildMode(); + bool ShouldSkip(const T lower_value, const T upper_value, const OpType op); TargetBitmap ConvertRoaringToBitset(const roaring::Roaring& values); - void - LoadWithoutAssemble(const BinarySet& binary_set, const Config& config); + TargetBitmap + RangeForRoaring(T value, OpType op); - private: - bool is_built_; + TargetBitmap + RangeForBitset(T value, OpType op); + + TargetBitmap + RangeForRoaring(T lower_bound_value, + bool lb_inclusive, + T upper_bound_value, + bool ub_inclusive); + + TargetBitmap + RangeForBitset(T lower_bound_value, + bool lb_inclusive, + T upper_bound_value, + bool ub_inclusive); + + public: + bool is_built_{false}; Config config_; + BitmapIndexBuildMode build_mode_; std::map data_; std::map bitsets_; - size_t total_num_rows_; + size_t total_num_rows_{0}; std::shared_ptr file_manager_; std::shared_ptr space_; }; diff --git a/internal/core/src/index/CMakeLists.txt b/internal/core/src/index/CMakeLists.txt index ed0f600587bd2..3256ab63a08c7 100644 --- a/internal/core/src/index/CMakeLists.txt +++ b/internal/core/src/index/CMakeLists.txt @@ -20,6 +20,7 @@ set(INDEX_FILES SkipIndex.cpp InvertedIndexTantivy.cpp BitmapIndex.cpp + HybridScalarIndex.cpp ) milvus_add_pkg_config("milvus_index") diff --git a/internal/core/src/index/HybridScalarIndex.cpp b/internal/core/src/index/HybridScalarIndex.cpp new file mode 100644 index 0000000000000..518828ea7bac7 --- /dev/null +++ b/internal/core/src/index/HybridScalarIndex.cpp @@ -0,0 +1,402 @@ +// Licensed to the LF AI & Data foundation under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include "index/HybridScalarIndex.h" +#include "common/Slice.h" +#include "common/Common.h" +#include "index/Meta.h" +#include "index/ScalarIndex.h" +#include "index/Utils.h" +#include "storage/Util.h" +#include "storage/space.h" + +namespace milvus { +namespace index { + +template +HybridScalarIndex::HybridScalarIndex( + const storage::FileManagerContext& file_manager_context) + : is_built_(false), + bitmap_index_cardinality_limit_(DEFAULT_BITMAP_INDEX_CARDINALITY_BOUND) { + if (file_manager_context.Valid()) { + file_manager_ = + std::make_shared(file_manager_context); + AssertInfo(file_manager_ != nullptr, "create file manager failed!"); + } + internal_index_type_ = InternalIndexType::NONE; +} + +template +HybridScalarIndex::HybridScalarIndex( + const storage::FileManagerContext& file_manager_context, + std::shared_ptr space) + : is_built_(false), + bitmap_index_cardinality_limit_(DEFAULT_BITMAP_INDEX_CARDINALITY_BOUND), + space_(space) { + if (file_manager_context.Valid()) { + file_manager_ = std::make_shared( + file_manager_context, space); + AssertInfo(file_manager_ != nullptr, "create file manager failed!"); + } + internal_index_type_ = InternalIndexType::NONE; +} + +template +InternalIndexType +HybridScalarIndex::SelectIndexBuildType(size_t n, const T* values) { + std::set distinct_vals; + for (size_t i = 0; i < n; i++) { + distinct_vals.insert(values[i]); + } + + // Decide whether to select bitmap index or stl sort + if (distinct_vals.size() >= bitmap_index_cardinality_limit_) { + internal_index_type_ = InternalIndexType::STLSORT; + } else { + internal_index_type_ = InternalIndexType::BITMAP; + } + return internal_index_type_; +} + +template <> +InternalIndexType +HybridScalarIndex::SelectIndexBuildType( + size_t n, const std::string* values) { + std::set distinct_vals; + for (size_t i = 0; i < n; i++) { + distinct_vals.insert(values[i]); + if (distinct_vals.size() >= bitmap_index_cardinality_limit_) { + break; + } + } + + // Decide whether to select bitmap index or marisa index + if (distinct_vals.size() >= bitmap_index_cardinality_limit_) { + internal_index_type_ = InternalIndexType::MARISA; + } else { + internal_index_type_ = InternalIndexType::BITMAP; + } + return internal_index_type_; +} + +template +InternalIndexType +HybridScalarIndex::SelectIndexBuildType( + const std::vector& field_datas) { + std::set distinct_vals; + for (const auto& data : field_datas) { + auto slice_row_num = data->get_num_rows(); + for (size_t i = 0; i < slice_row_num; ++i) { + auto val = reinterpret_cast(data->RawValue(i)); + distinct_vals.insert(*val); + if (distinct_vals.size() >= bitmap_index_cardinality_limit_) { + break; + } + } + } + + // Decide whether to select bitmap index or stl sort + if (distinct_vals.size() >= bitmap_index_cardinality_limit_) { + internal_index_type_ = InternalIndexType::STLSORT; + } else { + internal_index_type_ = InternalIndexType::BITMAP; + } + return internal_index_type_; +} + +template <> +InternalIndexType +HybridScalarIndex::SelectIndexBuildType( + const std::vector& field_datas) { + std::set distinct_vals; + for (const auto& data : field_datas) { + auto slice_row_num = data->get_num_rows(); + for (size_t i = 0; i < slice_row_num; ++i) { + auto val = reinterpret_cast(data->RawValue(i)); + distinct_vals.insert(*val); + if (distinct_vals.size() >= bitmap_index_cardinality_limit_) { + break; + } + } + } + + // Decide whether to select bitmap index or marisa sort + if (distinct_vals.size() >= bitmap_index_cardinality_limit_) { + internal_index_type_ = InternalIndexType::MARISA; + } else { + internal_index_type_ = InternalIndexType::BITMAP; + } + return internal_index_type_; +} + +template +std::shared_ptr> +HybridScalarIndex::GetInternalIndex() { + if (internal_index_ != nullptr) { + return internal_index_; + } + if (internal_index_type_ == InternalIndexType::BITMAP) { + internal_index_ = std::make_shared>(file_manager_); + } else if (internal_index_type_ == InternalIndexType::STLSORT) { + internal_index_ = std::make_shared>(file_manager_); + } else { + PanicInfo(UnexpectedError, + "unknown index type when get internal index"); + } + return internal_index_; +} + +template <> +std::shared_ptr> +HybridScalarIndex::GetInternalIndex() { + if (internal_index_ != nullptr) { + return internal_index_; + } + + if (internal_index_type_ == InternalIndexType::BITMAP) { + internal_index_ = + std::make_shared>(file_manager_); + } else if (internal_index_type_ == InternalIndexType::MARISA) { + internal_index_ = std::make_shared(file_manager_); + } else { + PanicInfo(UnexpectedError, + "unknown index type when get internal index"); + } + return internal_index_; +} + +template +void +HybridScalarIndex::BuildInternal( + const std::vector& field_datas) { + auto index = GetInternalIndex(); + index->BuildWithFieldData(field_datas); +} + +template +void +HybridScalarIndex::Build(const Config& config) { + if (is_built_) { + return; + } + + bitmap_index_cardinality_limit_ = + GetBitmapCardinalityLimitFromConfig(config); + LOG_INFO("config bitmap cardinality limit to {}", + bitmap_index_cardinality_limit_); + + auto insert_files = + GetValueFromConfig>(config, "insert_files"); + AssertInfo(insert_files.has_value(), + "insert file paths is empty when build index"); + + auto field_datas = + file_manager_->CacheRawDataToMemory(insert_files.value()); + + SelectIndexBuildType(field_datas); + BuildInternal(field_datas); + is_built_ = true; +} + +template +void +HybridScalarIndex::BuildV2(const Config& config) { + if (is_built_) { + return; + } + bitmap_index_cardinality_limit_ = + GetBitmapCardinalityLimitFromConfig(config); + LOG_INFO("config bitmap cardinality limit to {}", + bitmap_index_cardinality_limit_); + + auto field_name = file_manager_->GetIndexMeta().field_name; + auto reader = space_->ScanData(); + std::vector field_datas; + for (auto rec = reader->Next(); rec != nullptr; rec = reader->Next()) { + if (!rec.ok()) { + PanicInfo(DataFormatBroken, "failed to read data"); + } + auto data = rec.ValueUnsafe(); + auto total_num_rows = data->num_rows(); + auto col_data = data->GetColumnByName(field_name); + auto field_data = storage::CreateFieldData( + DataType(GetDType()), 0, total_num_rows); + field_data->FillFieldData(col_data); + field_datas.push_back(field_data); + } + + SelectIndexBuildType(field_datas); + BuildInternal(field_datas); + is_built_ = true; +} + +template +BinarySet +HybridScalarIndex::Serialize(const Config& config) { + AssertInfo(is_built_, "index has not been built yet"); + + auto ret_set = internal_index_->Serialize(config); + + // Add index type info to storage for future restruct index + std::shared_ptr index_type_buf(new uint8_t[sizeof(uint8_t)]); + index_type_buf[0] = static_cast(internal_index_type_); + ret_set.Append(INDEX_TYPE, index_type_buf, sizeof(uint8_t)); + + return ret_set; +} + +template +BinarySet +HybridScalarIndex::Upload(const Config& config) { + auto binary_set = Serialize(config); + file_manager_->AddFile(binary_set); + + auto remote_paths_to_size = file_manager_->GetRemotePathsToFileSize(); + BinarySet ret; + for (auto& file : remote_paths_to_size) { + ret.Append(file.first, nullptr, file.second); + } + + return ret; +} + +template +BinarySet +HybridScalarIndex::UploadV2(const Config& config) { + auto binary_set = Serialize(config); + file_manager_->AddFileV2(binary_set); + + auto remote_paths_to_size = file_manager_->GetRemotePathsToFileSize(); + BinarySet ret; + for (auto& file : remote_paths_to_size) { + ret.Append(file.first, nullptr, file.second); + } + + return ret; +} + +template +void +HybridScalarIndex::DeserializeIndexType(const BinarySet& binary_set) { + uint8_t index_type; + auto index_type_buffer = binary_set.GetByName(INDEX_TYPE); + memcpy(&index_type, index_type_buffer->data.get(), index_type_buffer->size); + internal_index_type_ = static_cast(index_type); +} + +template +void +HybridScalarIndex::LoadInternal(const BinarySet& binary_set, + const Config& config) { + auto index = GetInternalIndex(); + index->LoadWithoutAssemble(binary_set, config); +} + +template +void +HybridScalarIndex::Load(const BinarySet& binary_set, const Config& config) { + milvus::Assemble(const_cast(binary_set)); + DeserializeIndexType(binary_set); + + LoadInternal(binary_set, config); + is_built_ = true; +} + +template +void +HybridScalarIndex::LoadV2(const Config& config) { + auto blobs = space_->StatisticsBlobs(); + std::vector index_files; + auto prefix = file_manager_->GetRemoteIndexObjectPrefixV2(); + for (auto& b : blobs) { + if (b.name.rfind(prefix, 0) == 0) { + index_files.push_back(b.name); + } + } + std::map index_datas{}; + for (auto& file_name : index_files) { + auto res = space_->GetBlobByteSize(file_name); + if (!res.ok()) { + PanicInfo(S3Error, "unable to read index blob"); + } + auto index_blob_data = + std::shared_ptr(new uint8_t[res.value()]); + auto status = space_->ReadBlob(file_name, index_blob_data.get()); + if (!status.ok()) { + PanicInfo(S3Error, "unable to read index blob"); + } + auto raw_index_blob = + storage::DeserializeFileData(index_blob_data, res.value()); + auto key = file_name.substr(file_name.find_last_of('/') + 1); + index_datas[key] = raw_index_blob->GetFieldData(); + } + AssembleIndexDatas(index_datas); + + BinarySet binary_set; + for (auto& [key, data] : index_datas) { + auto size = data->Size(); + auto deleter = [&](uint8_t*) {}; // avoid repeated deconstruction + auto buf = std::shared_ptr( + (uint8_t*)const_cast(data->Data()), deleter); + binary_set.Append(key, buf, size); + } + + DeserializeIndexType(binary_set); + + LoadInternal(binary_set, config); + + is_built_ = true; +} + +template +void +HybridScalarIndex::Load(milvus::tracer::TraceContext ctx, + const Config& config) { + auto index_files = + GetValueFromConfig>(config, "index_files"); + AssertInfo(index_files.has_value(), + "index file paths is empty when load bitmap index"); + auto index_datas = file_manager_->LoadIndexToMemory(index_files.value()); + AssembleIndexDatas(index_datas); + BinarySet binary_set; + for (auto& [key, data] : index_datas) { + auto size = data->Size(); + auto deleter = [&](uint8_t*) {}; // avoid repeated deconstruction + auto buf = std::shared_ptr( + (uint8_t*)const_cast(data->Data()), deleter); + binary_set.Append(key, buf, size); + } + + DeserializeIndexType(binary_set); + + LoadInternal(binary_set, config); + + is_built_ = true; +} + +template class HybridScalarIndex; +template class HybridScalarIndex; +template class HybridScalarIndex; +template class HybridScalarIndex; +template class HybridScalarIndex; +template class HybridScalarIndex; +template class HybridScalarIndex; +template class HybridScalarIndex; + +} // namespace index +} // namespace milvus \ No newline at end of file diff --git a/internal/core/src/index/HybridScalarIndex.h b/internal/core/src/index/HybridScalarIndex.h new file mode 100644 index 0000000000000..c3c44630bf846 --- /dev/null +++ b/internal/core/src/index/HybridScalarIndex.h @@ -0,0 +1,166 @@ +// Licensed to the LF AI & Data foundation under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include + +#include "index/ScalarIndex.h" +#include "index/BitmapIndex.h" +#include "index/ScalarIndexSort.h" +#include "index/StringIndexMarisa.h" +#include "storage/FileManager.h" +#include "storage/DiskFileManagerImpl.h" +#include "storage/MemFileManagerImpl.h" +#include "storage/space.h" + +namespace milvus { +namespace index { + +enum class InternalIndexType { + NONE = 0, + BITMAP, + STLSORT, + MARISA, +}; + +/* +* @brief Implementation of hybrid index +* @details This index only for scalar type. +* dynamically choose bitmap/stlsort/marisa type index +* according to data distribution +*/ +template +class HybridScalarIndex : public ScalarIndex { + public: + explicit HybridScalarIndex( + const storage::FileManagerContext& file_manager_context = + storage::FileManagerContext()); + + explicit HybridScalarIndex( + const storage::FileManagerContext& file_manager_context, + std::shared_ptr space); + + ~HybridScalarIndex() override = default; + + BinarySet + Serialize(const Config& config) override; + + void + Load(const BinarySet& index_binary, const Config& config = {}) override; + + void + Load(milvus::tracer::TraceContext ctx, const Config& config = {}) override; + + void + LoadV2(const Config& config = {}) override; + + int64_t + Count() override { + return internal_index_->Count(); + } + + void + Build(size_t n, const T* values) override { + SelectIndexBuildType(n, values); + auto index = GetInternalIndex(); + index->Build(n, values); + is_built_ = true; + } + + void + Build(const Config& config = {}) override; + + void + BuildV2(const Config& config = {}) override; + + const TargetBitmap + In(size_t n, const T* values) override { + return internal_index_->In(n, values); + } + + const TargetBitmap + NotIn(size_t n, const T* values) override { + return internal_index_->NotIn(n, values); + } + + const TargetBitmap + Range(T value, OpType op) override { + return internal_index_->Range(value, op); + } + + const TargetBitmap + Range(T lower_bound_value, + bool lb_inclusive, + T upper_bound_value, + bool ub_inclusive) override { + return internal_index_->Range( + lower_bound_value, lb_inclusive, upper_bound_value, ub_inclusive); + } + + T + Reverse_Lookup(size_t offset) const override { + return internal_index_->Reverse_Lookup(offset); + } + + int64_t + Size() override { + return internal_index_->Size(); + } + + const bool + HasRawData() const override { + return internal_index_->HasRawData(); + } + + BinarySet + Upload(const Config& config = {}) override; + + BinarySet + UploadV2(const Config& config = {}) override; + + private: + InternalIndexType + SelectIndexBuildType(const std::vector& field_datas); + + InternalIndexType + SelectIndexBuildType(size_t n, const T* values); + + void + DeserializeIndexType(const BinarySet& binary_set); + + void + BuildInternal(const std::vector& field_datas); + + void + LoadInternal(const BinarySet& binary_set, const Config& config); + + std::shared_ptr> + GetInternalIndex(); + + public: + bool is_built_{false}; + int32_t bitmap_index_cardinality_limit_; + InternalIndexType internal_index_type_; + std::shared_ptr> internal_index_{nullptr}; + std::shared_ptr file_manager_{nullptr}; + std::shared_ptr space_{nullptr}; +}; + +} // namespace index +} // namespace milvus \ No newline at end of file diff --git a/internal/core/src/index/Index.h b/internal/core/src/index/Index.h index 2f3da4be14bbf..7567bf63e3c4e 100644 --- a/internal/core/src/index/Index.h +++ b/internal/core/src/index/Index.h @@ -18,6 +18,7 @@ #include #include +#include "common/FieldData.h" #include "common/EasyAssert.h" #include "knowhere/comp/index_param.h" #include "knowhere/dataset.h" diff --git a/internal/core/src/index/IndexFactory.cpp b/internal/core/src/index/IndexFactory.cpp index 6d133adc96204..79409056d980e 100644 --- a/internal/core/src/index/IndexFactory.cpp +++ b/internal/core/src/index/IndexFactory.cpp @@ -27,7 +27,7 @@ #include "index/StringIndexMarisa.h" #include "index/BoolIndex.h" #include "index/InvertedIndexTantivy.h" -#include "index/BitmapIndex.h" +#include "index/HybridScalarIndex.h" namespace milvus::index { @@ -44,7 +44,7 @@ IndexFactory::CreateScalarIndex( file_manager_context); } if (index_type == BITMAP_INDEX_TYPE) { - return std::make_unique>(file_manager_context); + return std::make_unique>(file_manager_context); } return CreateScalarIndexSort(file_manager_context); } @@ -70,7 +70,8 @@ IndexFactory::CreateScalarIndex( cfg, file_manager_context); } if (index_type == BITMAP_INDEX_TYPE) { - return std::make_unique>(file_manager_context); + return std::make_unique>( + file_manager_context); } return CreateStringIndexMarisa(file_manager_context); #else @@ -92,7 +93,8 @@ IndexFactory::CreateScalarIndex( cfg, file_manager_context, space); } if (index_type == BITMAP_INDEX_TYPE) { - return std::make_unique>(file_manager_context, space); + return std::make_unique>(file_manager_context, + space); } return CreateScalarIndexSort(file_manager_context, space); } @@ -112,8 +114,8 @@ IndexFactory::CreateScalarIndex( cfg, file_manager_context, space); } if (index_type == BITMAP_INDEX_TYPE) { - return std::make_unique>(file_manager_context, - space); + return std::make_unique>( + file_manager_context, space); } return CreateStringIndexMarisa(file_manager_context, space); #else diff --git a/internal/core/src/index/InvertedIndexTantivy.cpp b/internal/core/src/index/InvertedIndexTantivy.cpp index 5bb8ba3b16103..2c212704aaf49 100644 --- a/internal/core/src/index/InvertedIndexTantivy.cpp +++ b/internal/core/src/index/InvertedIndexTantivy.cpp @@ -426,8 +426,34 @@ InvertedIndexTantivy::BuildWithRawData(size_t n, const void* values, const Config& config) { if constexpr (!std::is_same_v) { - PanicInfo(Unsupported, - "InvertedIndex.BuildWithRawData only support string"); + TantivyConfig cfg; + if constexpr (std::is_same_v) { + cfg.data_type_ = DataType::INT8; + } + if constexpr (std::is_same_v) { + cfg.data_type_ = DataType::INT16; + } + if constexpr (std::is_same_v) { + cfg.data_type_ = DataType::INT32; + } + if constexpr (std::is_same_v) { + cfg.data_type_ = DataType::INT64; + } + if constexpr (std::is_same_v) { + cfg.data_type_ = DataType::VARCHAR; + } + boost::uuids::random_generator generator; + auto uuid = generator(); + auto prefix = boost::uuids::to_string(uuid); + path_ = fmt::format("/tmp/{}", prefix); + boost::filesystem::create_directories(path_); + cfg_ = cfg; + d_type_ = cfg_.to_tantivy_data_type(); + std::string field = "test_inverted_index"; + wrapper_ = std::make_shared( + field.c_str(), d_type_, path_.c_str()); + wrapper_->add_data(static_cast(values), n); + finish(); } else { boost::uuids::random_generator generator; auto uuid = generator(); diff --git a/internal/core/src/index/Meta.h b/internal/core/src/index/Meta.h index e44eb6d87a1ea..f1a01231b8825 100644 --- a/internal/core/src/index/Meta.h +++ b/internal/core/src/index/Meta.h @@ -54,6 +54,8 @@ constexpr const char* INDEX_BUILD_ID = "index_build_id"; constexpr const char* INDEX_ID = "index_id"; constexpr const char* INDEX_VERSION = "index_version"; constexpr const char* INDEX_ENGINE_VERSION = "index_engine_version"; +constexpr const char* BITMAP_INDEX_CARDINALITY_LIMIT = + "bitmap_cardinality_limit"; // VecIndex file metas constexpr const char* DISK_ANN_PREFIX_PATH = "index_prefix"; diff --git a/internal/core/src/index/ScalarIndex.h b/internal/core/src/index/ScalarIndex.h index aacef521f5db3..97a8b63c3ed44 100644 --- a/internal/core/src/index/ScalarIndex.h +++ b/internal/core/src/index/ScalarIndex.h @@ -80,6 +80,16 @@ class ScalarIndex : public IndexBase { RegexQuery(const std::string& pattern) { PanicInfo(Unsupported, "regex query is not supported"); } + + virtual void + BuildWithFieldData(const std::vector& field_datas) { + PanicInfo(Unsupported, "BuildwithFieldData is not supported"); + } + + virtual void + LoadWithoutAssemble(const BinarySet& binary_set, const Config& config) { + PanicInfo(Unsupported, "LoadWithoutAssemble is not supported"); + } }; template diff --git a/internal/core/src/index/ScalarIndexSort.cpp b/internal/core/src/index/ScalarIndexSort.cpp index bcb401ea5bf09..1f494e5c5a4d1 100644 --- a/internal/core/src/index/ScalarIndexSort.cpp +++ b/internal/core/src/index/ScalarIndexSort.cpp @@ -117,6 +117,35 @@ ScalarIndexSort::Build(const Config& config) { auto field_datas = file_manager_->CacheRawDataToMemory(insert_files.value()); + BuildWithFieldData(field_datas); +} + +template +void +ScalarIndexSort::Build(size_t n, const T* values) { + if (is_built_) + return; + if (n == 0) { + throw SegcoreError(DataIsEmpty, + "ScalarIndexSort cannot build null values!"); + } + data_.reserve(n); + idx_to_offsets_.resize(n); + T* p = const_cast(values); + for (size_t i = 0; i < n; ++i) { + data_.emplace_back(IndexStructure(*p++, i)); + } + std::sort(data_.begin(), data_.end()); + for (size_t i = 0; i < data_.size(); ++i) { + idx_to_offsets_[data_[i].idx_] = i; + } + is_built_ = true; +} + +template +void +ScalarIndexSort::BuildWithFieldData( + const std::vector& field_datas) { int64_t total_num_rows = 0; for (const auto& data : field_datas) { total_num_rows += data->get_num_rows(); @@ -145,28 +174,6 @@ ScalarIndexSort::Build(const Config& config) { is_built_ = true; } -template -void -ScalarIndexSort::Build(size_t n, const T* values) { - if (is_built_) - return; - if (n == 0) { - throw SegcoreError(DataIsEmpty, - "ScalarIndexSort cannot build null values!"); - } - data_.reserve(n); - idx_to_offsets_.resize(n); - T* p = const_cast(values); - for (size_t i = 0; i < n; ++i) { - data_.emplace_back(IndexStructure(*p++, i)); - } - std::sort(data_.begin(), data_.end()); - for (size_t i = 0; i < data_.size(); ++i) { - idx_to_offsets_[data_[i].idx_] = i; - } - is_built_ = true; -} - template BinarySet ScalarIndexSort::Serialize(const Config& config) { diff --git a/internal/core/src/index/ScalarIndexSort.h b/internal/core/src/index/ScalarIndexSort.h index e938b164184d7..96402017c9cfe 100644 --- a/internal/core/src/index/ScalarIndexSort.h +++ b/internal/core/src/index/ScalarIndexSort.h @@ -41,6 +41,17 @@ class ScalarIndexSort : public ScalarIndex { const storage::FileManagerContext& file_manager_context, std::shared_ptr space); + explicit ScalarIndexSort( + const std::shared_ptr& file_manager) + : file_manager_(file_manager) { + } + + explicit ScalarIndexSort( + const std::shared_ptr& file_manager, + std::shared_ptr space) + : file_manager_(file_manager), space_(space) { + } + BinarySet Serialize(const Config& config) override; @@ -100,6 +111,9 @@ class ScalarIndexSort : public ScalarIndex { return true; } + void + BuildWithFieldData(const std::vector& datas) override; + private: bool ShouldSkip(const T lower_value, const T upper_value, const OpType op); @@ -116,7 +130,8 @@ class ScalarIndexSort : public ScalarIndex { } void - LoadWithoutAssemble(const BinarySet& binary_set, const Config& config); + LoadWithoutAssemble(const BinarySet& binary_set, + const Config& config) override; private: bool is_built_; diff --git a/internal/core/src/index/StringIndexMarisa.cpp b/internal/core/src/index/StringIndexMarisa.cpp index aa41438e2bc8d..3e4aa85c52a41 100644 --- a/internal/core/src/index/StringIndexMarisa.cpp +++ b/internal/core/src/index/StringIndexMarisa.cpp @@ -132,6 +132,13 @@ StringIndexMarisa::Build(const Config& config) { "insert file paths is empty when build index"); auto field_datas = file_manager_->CacheRawDataToMemory(insert_files.value()); + + BuildWithFieldData(field_datas); +} + +void +StringIndexMarisa::BuildWithFieldData( + const std::vector& field_datas) { int64_t total_num_rows = 0; // fill key set. diff --git a/internal/core/src/index/StringIndexMarisa.h b/internal/core/src/index/StringIndexMarisa.h index 7b96f061241c3..e787a7e63b404 100644 --- a/internal/core/src/index/StringIndexMarisa.h +++ b/internal/core/src/index/StringIndexMarisa.h @@ -37,6 +37,17 @@ class StringIndexMarisa : public StringIndex { const storage::FileManagerContext& file_manager_context, std::shared_ptr space); + explicit StringIndexMarisa( + const std::shared_ptr& file_manager) + : file_manager_(file_manager) { + } + + explicit StringIndexMarisa( + const std::shared_ptr& file_manager, + std::shared_ptr space) + : file_manager_(file_manager), space_(space) { + } + int64_t Size() override; @@ -63,6 +74,9 @@ class StringIndexMarisa : public StringIndex { void Build(const Config& config = {}) override; + void + BuildWithFieldData(const std::vector& field_datas) override; + void BuildV2(const Config& Config = {}) override; @@ -113,7 +127,8 @@ class StringIndexMarisa : public StringIndex { prefix_match(const std::string_view prefix); void - LoadWithoutAssemble(const BinarySet& binary_set, const Config& config); + LoadWithoutAssemble(const BinarySet& binary_set, + const Config& config) override; private: Config config_; diff --git a/internal/core/src/index/Utils.cpp b/internal/core/src/index/Utils.cpp index a9ad1cf1a0d91..d931684d91767 100644 --- a/internal/core/src/index/Utils.cpp +++ b/internal/core/src/index/Utils.cpp @@ -154,6 +154,15 @@ GetIndexEngineVersionFromConfig(const Config& config) { return (std::stoi(index_engine_version.value())); } +int32_t +GetBitmapCardinalityLimitFromConfig(const Config& config) { + auto bitmap_limit = GetValueFromConfig( + config, index::BITMAP_INDEX_CARDINALITY_LIMIT); + AssertInfo(bitmap_limit.has_value(), + "bitmap cardinality limit not exist in config"); + return (std::stoi(bitmap_limit.value())); +} + // TODO :: too ugly storage::FieldDataMeta GetFieldDataMetaFromConfig(const Config& config) { diff --git a/internal/core/src/index/Utils.h b/internal/core/src/index/Utils.h index 53670dcba215e..50c70d8d52cdd 100644 --- a/internal/core/src/index/Utils.h +++ b/internal/core/src/index/Utils.h @@ -103,6 +103,9 @@ GetIndexTypeFromConfig(const Config& config); IndexVersion GetIndexEngineVersionFromConfig(const Config& config); +int32_t +GetBitmapCardinalityLimitFromConfig(const Config& config); + storage::FieldDataMeta GetFieldDataMetaFromConfig(const Config& config); diff --git a/internal/core/unittest/CMakeLists.txt b/internal/core/unittest/CMakeLists.txt index be78b2b36c43b..7abde651f3187 100644 --- a/internal/core/unittest/CMakeLists.txt +++ b/internal/core/unittest/CMakeLists.txt @@ -32,7 +32,7 @@ set(MILVUS_TEST_FILES test_growing.cpp test_growing_index.cpp test_indexing.cpp - test_bitmap_index.cpp + test_hybrid_index.cpp test_index_c_api.cpp test_index_wrapper.cpp test_init.cpp diff --git a/internal/core/unittest/test_expr.cpp b/internal/core/unittest/test_expr.cpp index efeae58f78e4a..339c92955b909 100644 --- a/internal/core/unittest/test_expr.cpp +++ b/internal/core/unittest/test_expr.cpp @@ -10,12 +10,14 @@ // or implied. See the License for the specific language governing permissions and limitations under the License #include +#include #include #include #include #include #include #include +#include #include "common/Json.h" #include "common/Types.h" @@ -35,6 +37,8 @@ #include "exec/expression/Expr.h" #include "exec/Task.h" #include "expr/ITypeExpr.h" +#include "index/BitmapIndex.h" +#include "index/InvertedIndexTantivy.h" using namespace milvus; using namespace milvus::query; @@ -1271,7 +1275,7 @@ TEST(Expr, TestExprPerformance) { {DataType::DOUBLE, double_fid}}; auto seg = CreateSealedSegment(schema); - int N = 1000000; + int N = 10000; auto raw_data = DataGen(schema, N); // load field data @@ -1678,7 +1682,7 @@ TEST_P(ExprTest, TestSealedSegmentGetBatchSize) { schema->set_primary_field_id(str1_fid); auto seg = CreateSealedSegment(schema); - int N = 1000000; + int N = 100000; auto raw_data = DataGen(schema, N); // load field data auto fields = schema->get_fields(); @@ -1739,7 +1743,7 @@ TEST_P(ExprTest, TestGrowingSegmentGetBatchSize) { schema->set_primary_field_id(str1_fid); auto seg = CreateGrowingSegment(schema, empty_index_meta); - int N = 1000000; + int N = 10000; auto raw_data = DataGen(schema, N); seg->PreInsert(N); seg->Insert(0, @@ -1804,7 +1808,7 @@ TEST_P(ExprTest, TestConjuctExpr) { schema->set_primary_field_id(str1_fid); auto seg = CreateSealedSegment(schema); - int N = 1000000; + int N = 10000; auto raw_data = DataGen(schema, N); // load field data auto fields = schema->get_fields(); @@ -1871,7 +1875,7 @@ TEST_P(ExprTest, TestUnaryBenchTest) { schema->set_primary_field_id(str1_fid); auto seg = CreateSealedSegment(schema); - int N = 1000000; + int N = 10000; auto raw_data = DataGen(schema, N); // load field data @@ -1942,7 +1946,7 @@ TEST_P(ExprTest, TestBinaryRangeBenchTest) { schema->set_primary_field_id(str1_fid); auto seg = CreateSealedSegment(schema); - int N = 1000000; + int N = 10000; auto raw_data = DataGen(schema, N); // load field data @@ -2022,7 +2026,7 @@ TEST_P(ExprTest, TestLogicalUnaryBenchTest) { schema->set_primary_field_id(str1_fid); auto seg = CreateSealedSegment(schema); - int N = 1000000; + int N = 10000; auto raw_data = DataGen(schema, N); // load field data @@ -2096,7 +2100,7 @@ TEST_P(ExprTest, TestBinaryLogicalBenchTest) { schema->set_primary_field_id(str1_fid); auto seg = CreateSealedSegment(schema); - int N = 1000000; + int N = 10000; auto raw_data = DataGen(schema, N); // load field data @@ -2180,7 +2184,7 @@ TEST_P(ExprTest, TestBinaryArithOpEvalRangeBenchExpr) { schema->set_primary_field_id(str1_fid); auto seg = CreateSealedSegment(schema); - int N = 1000000; + int N = 10000; auto raw_data = DataGen(schema, N); // load field data @@ -2263,7 +2267,7 @@ TEST_P(ExprTest, TestCompareExprBenchTest) { schema->set_primary_field_id(str1_fid); auto seg = CreateSealedSegment(schema); - int N = 1000000; + int N = 10000; auto raw_data = DataGen(schema, N); // load field data @@ -2333,7 +2337,7 @@ TEST_P(ExprTest, TestRefactorExprs) { schema->set_primary_field_id(str1_fid); auto seg = CreateSealedSegment(schema); - int N = 1000000; + int N = 10000; auto raw_data = DataGen(schema, N); // load field data diff --git a/internal/core/unittest/test_bitmap_index.cpp b/internal/core/unittest/test_hybrid_index.cpp similarity index 63% rename from internal/core/unittest/test_bitmap_index.cpp rename to internal/core/unittest/test_hybrid_index.cpp index 99d877d744587..42087199300df 100644 --- a/internal/core/unittest/test_bitmap_index.cpp +++ b/internal/core/unittest/test_hybrid_index.cpp @@ -17,6 +17,7 @@ #include "common/Tracer.h" #include "index/BitmapIndex.h" +#include "index/HybridScalarIndex.h" #include "storage/Util.h" #include "storage/InsertData.h" #include "indexbuilder/IndexFactory.h" @@ -60,7 +61,7 @@ GenerateData(const size_t size, const size_t cardinality) { } template -class BitmapIndexTest : public testing::Test { +class HybridIndexTestV1 : public testing::Test { protected: void Init(int64_t collection_id, @@ -88,7 +89,8 @@ class BitmapIndexTest : public testing::Test { auto serialized_bytes = insert_data.Serialize(storage::Remote); - auto log_path = fmt::format("{}/{}/{}/{}/{}", + auto log_path = fmt::format("/{}/{}/{}/{}/{}/{}", + "/tmp/test_hybrid/", collection_id, partition_id, segment_id, @@ -103,6 +105,7 @@ class BitmapIndexTest : public testing::Test { Config config; config["index_type"] = milvus::index::BITMAP_INDEX_TYPE; config["insert_files"] = std::vector{log_path}; + config["bitmap_cardinality_limit"] = "1000"; auto build_index = indexbuilder::IndexFactory::GetInstance().CreateIndex( @@ -125,10 +128,14 @@ class BitmapIndexTest : public testing::Test { index_->Load(milvus::tracer::TraceContext{}, config); } - void - SetUp() override { + virtual void + SetParam() { nb_ = 10000; cardinality_ = 30; + } + void + SetUp() override { + SetParam(); if constexpr (std::is_same_v) { type_ = DataType::INT8; @@ -162,7 +169,7 @@ class BitmapIndexTest : public testing::Test { index_version); } - virtual ~BitmapIndexTest() override { + virtual ~HybridIndexTestV1() override { boost::filesystem::remove_all(chunk_manager_->GetRootPath()); } @@ -176,7 +183,8 @@ class BitmapIndexTest : public testing::Test { test_data.push_back(data_[i]); s.insert(data_[i]); } - auto index_ptr = dynamic_cast*>(index_.get()); + auto index_ptr = + dynamic_cast*>(index_.get()); auto bitset = index_ptr->In(test_data.size(), test_data.data()); for (size_t i = 0; i < bitset.size(); i++) { ASSERT_EQ(bitset[i], s.find(data_[i]) != s.end()); @@ -192,7 +200,8 @@ class BitmapIndexTest : public testing::Test { test_data.push_back(data_[i]); s.insert(data_[i]); } - auto index_ptr = dynamic_cast*>(index_.get()); + auto index_ptr = + dynamic_cast*>(index_.get()); auto bitset = index_ptr->NotIn(test_data.size(), test_data.data()); for (size_t i = 0; i < bitset.size(); i++) { ASSERT_EQ(bitset[i], s.find(data_[i]) == s.end()); @@ -219,7 +228,7 @@ class BitmapIndexTest : public testing::Test { }; for (const auto& [test_value, op, ref] : test_cases) { auto index_ptr = - dynamic_cast*>(index_.get()); + dynamic_cast*>(index_.get()); auto bitset = index_ptr->Range(test_value, op); for (size_t i = 0; i < bitset.size(); i++) { auto ans = bitset[i]; @@ -232,8 +241,65 @@ class BitmapIndexTest : public testing::Test { } } - private: - std::shared_ptr chunk_manager_; + void + TestRangeCompareFunc() { + if constexpr (!std::is_same_v) { + using RefFunc = std::function; + struct TestParam { + int64_t lower_val; + int64_t upper_val; + bool lower_inclusive; + bool upper_inclusive; + RefFunc ref; + }; + std::vector test_cases = { + { + 10, + 30, + false, + false, + [&](int64_t i) { return 10 < data_[i] && data_[i] < 30; }, + }, + { + 10, + 30, + true, + false, + [&](int64_t i) { return 10 <= data_[i] && data_[i] < 30; }, + }, + { + 10, + 30, + true, + true, + [&](int64_t i) { return 10 <= data_[i] && data_[i] <= 30; }, + }, + { + 10, + 30, + false, + true, + [&](int64_t i) { return 10 < data_[i] && data_[i] <= 30; }, + }}; + + for (const auto& test_case : test_cases) { + auto index_ptr = + dynamic_cast*>(index_.get()); + auto bitset = index_ptr->Range(test_case.lower_val, + test_case.lower_inclusive, + test_case.upper_val, + test_case.upper_inclusive); + for (size_t i = 0; i < bitset.size(); i++) { + auto ans = bitset[i]; + auto should = test_case.ref(i); + ASSERT_EQ(ans, should) + << "lower:" << test_case.lower_val + << "upper:" << test_case.upper_val << ", @" << i + << ", ans: " << ans << ", ref: " << should; + } + } + } + } public: IndexBasePtr index_; @@ -241,34 +307,92 @@ class BitmapIndexTest : public testing::Test { size_t nb_; size_t cardinality_; boost::container::vector data_; + std::shared_ptr chunk_manager_; +}; + +TYPED_TEST_SUITE_P(HybridIndexTestV1); + +TYPED_TEST_P(HybridIndexTestV1, CountFuncTest) { + auto count = this->index_->Count(); + EXPECT_EQ(count, this->nb_); +} + +TYPED_TEST_P(HybridIndexTestV1, INFuncTest) { + this->TestInFunc(); +} + +TYPED_TEST_P(HybridIndexTestV1, NotINFuncTest) { + this->TestNotInFunc(); +} + +TYPED_TEST_P(HybridIndexTestV1, CompareValFuncTest) { + this->TestCompareValueFunc(); +} + +TYPED_TEST_P(HybridIndexTestV1, TestRangeCompareFuncTest) { + this->TestRangeCompareFunc(); +} + +using BitmapType = + testing::Types; + +REGISTER_TYPED_TEST_SUITE_P(HybridIndexTestV1, + CountFuncTest, + INFuncTest, + NotINFuncTest, + CompareValFuncTest, + TestRangeCompareFuncTest); + +INSTANTIATE_TYPED_TEST_SUITE_P(HybridIndexE2ECheck_LowCardinality, + HybridIndexTestV1, + BitmapType); + +template +class HybridIndexTestV2 : public HybridIndexTestV1 { + public: + virtual void + SetParam() override { + this->nb_ = 10000; + this->cardinality_ = 2000; + } + + virtual ~HybridIndexTestV2() { + } }; -TYPED_TEST_SUITE_P(BitmapIndexTest); +TYPED_TEST_SUITE_P(HybridIndexTestV2); -TYPED_TEST_P(BitmapIndexTest, CountFuncTest) { +TYPED_TEST_P(HybridIndexTestV2, CountFuncTest) { auto count = this->index_->Count(); EXPECT_EQ(count, this->nb_); } -TYPED_TEST_P(BitmapIndexTest, INFuncTest) { +TYPED_TEST_P(HybridIndexTestV2, INFuncTest) { this->TestInFunc(); } -TYPED_TEST_P(BitmapIndexTest, NotINFuncTest) { +TYPED_TEST_P(HybridIndexTestV2, NotINFuncTest) { this->TestNotInFunc(); } -TYPED_TEST_P(BitmapIndexTest, CompareValFuncTest) { +TYPED_TEST_P(HybridIndexTestV2, CompareValFuncTest) { this->TestCompareValueFunc(); } +TYPED_TEST_P(HybridIndexTestV2, TestRangeCompareFuncTest) { + this->TestRangeCompareFunc(); +} + using BitmapType = testing::Types; -REGISTER_TYPED_TEST_SUITE_P(BitmapIndexTest, +REGISTER_TYPED_TEST_SUITE_P(HybridIndexTestV2, CountFuncTest, INFuncTest, NotINFuncTest, - CompareValFuncTest); + CompareValFuncTest, + TestRangeCompareFuncTest); -INSTANTIATE_TYPED_TEST_SUITE_P(BitmapE2ECheck, BitmapIndexTest, BitmapType); +INSTANTIATE_TYPED_TEST_SUITE_P(HybridIndexE2ECheck_HighCardinality, + HybridIndexTestV2, + BitmapType); diff --git a/internal/core/unittest/test_scalar_index.cpp b/internal/core/unittest/test_scalar_index.cpp index 2fc943b57b505..2967523daf365 100644 --- a/internal/core/unittest/test_scalar_index.cpp +++ b/internal/core/unittest/test_scalar_index.cpp @@ -15,7 +15,11 @@ #include "gtest/gtest-typed-test.h" #include "index/IndexFactory.h" +#include "index/BitmapIndex.h" +#include "index/InvertedIndexTantivy.h" +#include "index/ScalarIndex.h" #include "common/CDataType.h" +#include "common/Types.h" #include "knowhere/comp/index_param.h" #include "test_utils/indexbuilder_test_utils.h" #include "test_utils/AssertUtils.h" @@ -373,7 +377,11 @@ TYPED_TEST_P(TypedScalarIndexTestV2, Base) { create_index_info, file_manager_context, space); auto scalar_index = dynamic_cast*>(index.get()); - scalar_index->BuildV2(); + milvus::Config config; + if (index_type == "BITMAP") { + config["bitmap_cardinality_limit"] = "1000"; + } + scalar_index->BuildV2(config); scalar_index->UploadV2(); auto new_index = @@ -391,3 +399,260 @@ REGISTER_TYPED_TEST_SUITE_P(TypedScalarIndexTestV2, Base); INSTANTIATE_TYPED_TEST_SUITE_P(ArithmeticCheck, TypedScalarIndexTestV2, ScalarT); + +using namespace milvus::index; +template +std::vector +GenerateRawData(int N, int cardinality) { + using std::vector; + std::default_random_engine random(60); + std::normal_distribution<> distr(0, 1); + vector data(N); + for (auto& x : data) { + x = random() % (cardinality); + } + return data; +} + +template <> +std::vector +GenerateRawData(int N, int cardinality) { + using std::vector; + std::default_random_engine random(60); + std::normal_distribution<> distr(0, 1); + vector data(N); + for (auto& x : data) { + x = std::to_string(random() % (cardinality)); + } + return data; +} + +template +IndexBasePtr +TestBuildIndex(int N, int cardinality, int index_type) { + auto raw_data = GenerateRawData(N, cardinality); + if (index_type == 0) { + auto index = std::make_unique>(); + index->Build(N, raw_data.data()); + return std::move(index); + } else if (index_type == 1) { + if constexpr (std::is_same_v) { + auto index = std::make_unique(); + index->Build(N, raw_data.data()); + return std::move(index); + } + auto index = milvus::index::CreateScalarIndexSort(); + index->Build(N, raw_data.data()); + return std::move(index); + } +} + +template +void +TestIndexSearchIn() { + // low data cardinality + { + int N = 1000; + std::vector data_cardinality = {10, 20, 100}; + for (auto& card : data_cardinality) { + auto bitmap_index = TestBuildIndex(N, card, 0); + auto bitmap_index_ptr = + dynamic_cast*>(bitmap_index.get()); + auto sort_index = TestBuildIndex(N, card, 1); + auto sort_index_ptr = + dynamic_cast*>(sort_index.get()); + std::vector terms; + for (int i = 0; i < 10; i++) { + terms.push_back(static_cast(i)); + } + auto final1 = bitmap_index_ptr->In(10, terms.data()); + auto final2 = sort_index_ptr->In(10, terms.data()); + EXPECT_EQ(final1.size(), final2.size()); + for (int i = 0; i < final1.size(); i++) { + EXPECT_EQ(final1[i], final2[i]); + } + + auto final3 = bitmap_index_ptr->NotIn(10, terms.data()); + auto final4 = sort_index_ptr->NotIn(10, terms.data()); + EXPECT_EQ(final4.size(), final3.size()); + for (int i = 0; i < final3.size(); i++) { + EXPECT_EQ(final3[i], final4[i]); + } + } + } + + // high data cardinality + { + int N = 10000; + std::vector data_cardinality = {1001, 2000}; + for (auto& card : data_cardinality) { + auto bitmap_index = TestBuildIndex(N, card, 0); + auto bitmap_index_ptr = + dynamic_cast*>(bitmap_index.get()); + auto sort_index = TestBuildIndex(N, card, 1); + auto sort_index_ptr = + dynamic_cast*>(sort_index.get()); + std::vector terms; + for (int i = 0; i < 10; i++) { + terms.push_back(static_cast(i)); + } + auto final1 = bitmap_index_ptr->In(10, terms.data()); + auto final2 = sort_index_ptr->In(10, terms.data()); + EXPECT_EQ(final1.size(), final2.size()); + for (int i = 0; i < final1.size(); i++) { + EXPECT_EQ(final1[i], final2[i]); + } + + auto final3 = bitmap_index_ptr->NotIn(10, terms.data()); + auto final4 = sort_index_ptr->NotIn(10, terms.data()); + EXPECT_EQ(final4.size(), final3.size()); + for (int i = 0; i < final3.size(); i++) { + EXPECT_EQ(final3[i], final4[i]); + } + } + } +} + +template <> +void +TestIndexSearchIn() { + // low data cardinality + { + int N = 1000; + std::vector data_cardinality = {10, 20, 100}; + for (auto& card : data_cardinality) { + auto bitmap_index = TestBuildIndex(N, card, 0); + auto bitmap_index_ptr = + dynamic_cast*>(bitmap_index.get()); + auto sort_index = TestBuildIndex(N, card, 1); + auto sort_index_ptr = + dynamic_cast*>(sort_index.get()); + std::vector terms; + for (int i = 0; i < 10; i++) { + terms.push_back(std::to_string(i)); + } + auto final1 = bitmap_index_ptr->In(10, terms.data()); + auto final2 = sort_index_ptr->In(10, terms.data()); + EXPECT_EQ(final1.size(), final2.size()); + for (int i = 0; i < final1.size(); i++) { + EXPECT_EQ(final1[i], final2[i]); + } + + auto final3 = bitmap_index_ptr->NotIn(10, terms.data()); + auto final4 = sort_index_ptr->NotIn(10, terms.data()); + EXPECT_EQ(final4.size(), final3.size()); + for (int i = 0; i < final3.size(); i++) { + EXPECT_EQ(final3[i], final4[i]); + } + } + } + // high data cardinality + { + int N = 10000; + std::vector data_cardinality = {1001, 2000}; + for (auto& card : data_cardinality) { + auto bitmap_index = TestBuildIndex(N, card, 0); + auto bitmap_index_ptr = + dynamic_cast*>(bitmap_index.get()); + auto sort_index = TestBuildIndex(N, card, 1); + auto sort_index_ptr = + dynamic_cast*>(sort_index.get()); + std::vector terms; + for (int i = 0; i < 10; i++) { + terms.push_back(std::to_string(i)); + } + auto final1 = bitmap_index_ptr->In(10, terms.data()); + auto final2 = sort_index_ptr->In(10, terms.data()); + EXPECT_EQ(final1.size(), final2.size()); + for (int i = 0; i < final1.size(); i++) { + EXPECT_EQ(final1[i], final2[i]); + } + + auto final3 = bitmap_index_ptr->NotIn(10, terms.data()); + auto final4 = sort_index_ptr->NotIn(10, terms.data()); + EXPECT_EQ(final4.size(), final3.size()); + for (int i = 0; i < final3.size(); i++) { + EXPECT_EQ(final3[i], final4[i]); + } + } + } +} + +TEST(ScalarTest, test_function_In) { + TestIndexSearchIn(); + TestIndexSearchIn(); + TestIndexSearchIn(); + TestIndexSearchIn(); + TestIndexSearchIn(); + TestIndexSearchIn(); + TestIndexSearchIn(); +} + +template +void +TestIndexSearchRange() { + // low data cordinality + { + int N = 1000; + std::vector data_cardinality = {10, 20, 100}; + for (auto& card : data_cardinality) { + auto bitmap_index = TestBuildIndex(N, card, 0); + auto bitmap_index_ptr = + dynamic_cast*>(bitmap_index.get()); + auto sort_index = TestBuildIndex(N, card, 1); + auto sort_index_ptr = + dynamic_cast*>(sort_index.get()); + + auto final1 = bitmap_index_ptr->Range(10, milvus::OpType::LessThan); + auto final2 = sort_index_ptr->Range(10, milvus::OpType::LessThan); + EXPECT_EQ(final1.size(), final2.size()); + for (int i = 0; i < final1.size(); i++) { + EXPECT_EQ(final1[i], final2[i]); + } + + auto final3 = bitmap_index_ptr->Range(10, true, 100, false); + auto final4 = sort_index_ptr->Range(10, true, 100, false); + EXPECT_EQ(final3.size(), final4.size()); + for (int i = 0; i < final1.size(); i++) { + EXPECT_EQ(final3[i], final4[i]); + } + } + } + + // high data cordinality + { + int N = 10000; + std::vector data_cardinality = {1001, 2000}; + for (auto& card : data_cardinality) { + auto bitmap_index = TestBuildIndex(N, card, 0); + auto bitmap_index_ptr = + dynamic_cast*>(bitmap_index.get()); + auto sort_index = TestBuildIndex(N, card, 1); + auto sort_index_ptr = + dynamic_cast*>(sort_index.get()); + + auto final1 = bitmap_index_ptr->Range(10, milvus::OpType::LessThan); + auto final2 = sort_index_ptr->Range(10, milvus::OpType::LessThan); + EXPECT_EQ(final1.size(), final2.size()); + for (int i = 0; i < final1.size(); i++) { + EXPECT_EQ(final1[i], final2[i]); + } + + auto final3 = bitmap_index_ptr->Range(10, true, 100, false); + auto final4 = sort_index_ptr->Range(10, true, 100, false); + EXPECT_EQ(final3.size(), final4.size()); + for (int i = 0; i < final1.size(); i++) { + EXPECT_EQ(final3[i], final4[i]); + } + } + } +} + +TEST(ScalarTest, test_function_range) { + TestIndexSearchRange(); + TestIndexSearchRange(); + TestIndexSearchRange(); + TestIndexSearchRange(); + TestIndexSearchRange(); + TestIndexSearchRange(); +} diff --git a/internal/proxy/task_index.go b/internal/proxy/task_index.go index 5925391c20b6e..149a13605a9ad 100644 --- a/internal/proxy/task_index.go +++ b/internal/proxy/task_index.go @@ -332,6 +332,13 @@ func fillDimension(field *schemapb.FieldSchema, indexParams map[string]string) e func checkTrain(field *schemapb.FieldSchema, indexParams map[string]string) error { indexType := indexParams[common.IndexTypeKey] + if indexType == indexparamcheck.IndexBitmap { + _, exist := indexParams[common.BitmapCardinalityLimitKey] + if !exist { + indexParams[common.BitmapCardinalityLimitKey] = paramtable.Get().CommonCfg.BitmapIndexCardinalityBound.GetValue() + } + } + checker, err := indexparamcheck.GetIndexCheckerMgrInstance().GetChecker(indexType) if err != nil { log.Warn("Failed to get index checker", zap.String(common.IndexTypeKey, indexType)) diff --git a/pkg/common/common.go b/pkg/common/common.go index ea148b03b7f5d..723f231718abf 100644 --- a/pkg/common/common.go +++ b/pkg/common/common.go @@ -112,6 +112,8 @@ const ( MaxCapacityKey = "max_capacity" DropRatioBuildKey = "drop_ratio_build" + + BitmapCardinalityLimitKey = "bitmap_cardinality_limit" ) // Collection properties key diff --git a/pkg/util/indexparamcheck/bitmap_checker_test.go b/pkg/util/indexparamcheck/bitmap_checker_test.go index 4b0cca2bf3309..aa1baa8963433 100644 --- a/pkg/util/indexparamcheck/bitmap_checker_test.go +++ b/pkg/util/indexparamcheck/bitmap_checker_test.go @@ -11,7 +11,7 @@ import ( func Test_BitmapIndexChecker(t *testing.T) { c := newBITMAPChecker() - assert.NoError(t, c.CheckTrain(map[string]string{})) + assert.NoError(t, c.CheckTrain(map[string]string{"bitmap_cardinality_limit": "100"})) assert.NoError(t, c.CheckValidDataType(schemapb.DataType_Int64)) assert.NoError(t, c.CheckValidDataType(schemapb.DataType_Float)) @@ -19,4 +19,6 @@ func Test_BitmapIndexChecker(t *testing.T) { assert.Error(t, c.CheckValidDataType(schemapb.DataType_JSON)) assert.Error(t, c.CheckValidDataType(schemapb.DataType_Array)) + assert.Error(t, c.CheckTrain(map[string]string{})) + assert.Error(t, c.CheckTrain(map[string]string{"bitmap_cardinality_limit": "0"})) } diff --git a/pkg/util/indexparamcheck/bitmap_index_checker.go b/pkg/util/indexparamcheck/bitmap_index_checker.go index da90a7d06db3a..d41267987d860 100644 --- a/pkg/util/indexparamcheck/bitmap_index_checker.go +++ b/pkg/util/indexparamcheck/bitmap_index_checker.go @@ -2,17 +2,21 @@ package indexparamcheck import ( "fmt" + "math" "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" + "github.com/milvus-io/milvus/pkg/common" "github.com/milvus-io/milvus/pkg/util/typeutil" ) -// STLSORTChecker checks if a STL_SORT index can be built. type BITMAPChecker struct { scalarIndexChecker } func (c *BITMAPChecker) CheckTrain(params map[string]string) error { + if !CheckIntByRange(params, common.BitmapCardinalityLimitKey, 1, math.MaxInt) { + return fmt.Errorf("failed to check bitmap cardinality limit, should be larger than 0 and smaller than math.MaxInt") + } return c.scalarIndexChecker.CheckTrain(params) } diff --git a/pkg/util/paramtable/component_param.go b/pkg/util/paramtable/component_param.go index 9d3da19affc3f..5dbd86f009462 100644 --- a/pkg/util/paramtable/component_param.go +++ b/pkg/util/paramtable/component_param.go @@ -42,13 +42,14 @@ const ( DefaultSessionTTL = 30 // s DefaultSessionRetryTimes = 30 - DefaultMaxDegree = 56 - DefaultSearchListSize = 100 - DefaultPQCodeBudgetGBRatio = 0.125 - DefaultBuildNumThreadsRatio = 1.0 - DefaultSearchCacheBudgetGBRatio = 0.10 - DefaultLoadNumThreadRatio = 8.0 - DefaultBeamWidthRatio = 4.0 + DefaultMaxDegree = 56 + DefaultSearchListSize = 100 + DefaultPQCodeBudgetGBRatio = 0.125 + DefaultBuildNumThreadsRatio = 1.0 + DefaultSearchCacheBudgetGBRatio = 0.10 + DefaultLoadNumThreadRatio = 8.0 + DefaultBeamWidthRatio = 4.0 + DefaultBitmapIndexCardinalityBound = 500 ) // ComponentParam is used to quickly and easily access all components' configurations. @@ -212,6 +213,7 @@ type commonConfig struct { BeamWidthRatio ParamItem `refreshable:"true"` GracefulTime ParamItem `refreshable:"true"` GracefulStopTimeout ParamItem `refreshable:"true"` + BitmapIndexCardinalityBound ParamItem `refreshable:"false"` StorageType ParamItem `refreshable:"false"` SimdType ParamItem `refreshable:"false"` @@ -443,6 +445,14 @@ This configuration is only used by querynode and indexnode, it selects CPU instr } p.IndexSliceSize.Init(base.mgr) + p.BitmapIndexCardinalityBound = ParamItem{ + Key: "common.bitmapIndexCardinalityBound", + Version: "2.5.0", + DefaultValue: strconv.Itoa(DefaultBitmapIndexCardinalityBound), + Export: true, + } + p.BitmapIndexCardinalityBound.Init(base.mgr) + p.EnableMaterializedView = ParamItem{ Key: "common.materializedView.enabled", Version: "2.5.0", From 77637180fa47e602b14c4c33c54cdeaf77d75136 Mon Sep 17 00:00:00 2001 From: "cai.zhang" Date: Thu, 30 May 2024 13:37:44 +0800 Subject: [PATCH 099/126] enhance: Periodically synchronize segments to datanode watcher (#33420) issue: #32809 --------- Signed-off-by: Cai Zhang --- configs/milvus.yaml | 1 + internal/datacoord/compaction.go | 32 +- internal/datacoord/compaction_test.go | 14 +- internal/datacoord/meta.go | 7 + internal/datacoord/segment_operator.go | 1 + internal/datacoord/server.go | 5 + internal/datacoord/sync_segments_scheduler.go | 149 +++++++ .../datacoord/sync_segments_scheduler_test.go | 371 ++++++++++++++++++ .../datanode/compaction/mock_compactor.go | 32 -- internal/datanode/compaction_executor_test.go | 1 - internal/datanode/data_node.go | 3 + internal/datanode/metacache/meta_cache.go | 100 +++-- .../datanode/metacache/meta_cache_test.go | 65 ++- .../datanode/metacache/mock_meta_cache.go | 91 +++-- internal/datanode/services.go | 65 ++- internal/datanode/services_test.go | 36 ++ .../datanode/syncmgr/mock_sync_manager.go | 2 +- internal/proto/data_coord.proto | 14 + pkg/util/paramtable/component_param.go | 9 + 19 files changed, 827 insertions(+), 171 deletions(-) create mode 100644 internal/datacoord/sync_segments_scheduler.go create mode 100644 internal/datacoord/sync_segments_scheduler_test.go diff --git a/configs/milvus.yaml b/configs/milvus.yaml index 1f23cd014da20..d6415e1a76208 100644 --- a/configs/milvus.yaml +++ b/configs/milvus.yaml @@ -482,6 +482,7 @@ dataCoord: serverMaxRecvSize: 268435456 clientMaxSendSize: 268435456 clientMaxRecvSize: 536870912 + syncSegmentsInterval: 300 dataNode: dataSync: diff --git a/internal/datacoord/compaction.go b/internal/datacoord/compaction.go index c9919d7b821ea..6a11765dc17bf 100644 --- a/internal/datacoord/compaction.go +++ b/internal/datacoord/compaction.go @@ -487,18 +487,7 @@ func (c *compactionPlanHandler) handleMergeCompactionResult(plan *datapb.Compact // Apply metrics after successful meta update. metricMutation.commit() } - - nodeID := c.plans[plan.GetPlanID()].dataNodeID - req := &datapb.SyncSegmentsRequest{ - PlanID: plan.PlanID, - } - - log.Info("handleCompactionResult: syncing segments with node", zap.Int64("nodeID", nodeID)) - if err := c.sessions.SyncSegments(nodeID, req); err != nil { - log.Warn("handleCompactionResult: fail to sync segments with node", - zap.Int64("nodeID", nodeID), zap.Error(err)) - return err - } + // TODO @xiaocai2333: drop compaction plan on datanode log.Info("handleCompactionResult: success to handle merge compaction result") return nil @@ -546,13 +535,8 @@ func (c *compactionPlanHandler) updateCompaction(ts Timestamp) error { // task.dataNodeID not match with channel // Mark this compaction as failure and skip processing the meta if !c.chManager.Match(task.dataNodeID, task.plan.GetChannel()) { - // Sync segments without CompactionFrom segmentsIDs to make sure DN clear the task - // without changing the meta + // TODO @xiaocai2333: drop compaction plan on datanode log.Warn("compaction failed for channel nodeID not match") - if err := c.sessions.SyncSegments(task.dataNodeID, &datapb.SyncSegmentsRequest{PlanID: planID}); err != nil { - log.Warn("compaction failed to sync segments with node", zap.Error(err)) - continue - } c.plans[planID] = c.plans[planID].shadowClone(setState(failed), endSpan()) c.setSegmentsCompacting(task.plan, false) c.scheduler.Finish(task.dataNodeID, task.plan) @@ -617,16 +601,8 @@ func (c *compactionPlanHandler) updateCompaction(ts Timestamp) error { if nodeUnkonwnPlan, ok := completedPlans[planID]; ok { nodeID, plan := nodeUnkonwnPlan.A, nodeUnkonwnPlan.B log := log.With(zap.Int64("planID", planID), zap.Int64("nodeID", nodeID), zap.String("channel", plan.GetChannel())) - - // Sync segments without CompactionFrom segmentsIDs to make sure DN clear the task - // without changing the meta - log.Info("compaction syncing unknown plan with node") - if err := c.sessions.SyncSegments(nodeID, &datapb.SyncSegmentsRequest{ - PlanID: planID, - }); err != nil { - log.Warn("compaction failed to sync segments with node", zap.Error(err)) - return err - } + // TODO @xiaocai2333: drop compaction plan on datanode + log.Info("drop unknown plan with node") } } diff --git a/internal/datacoord/compaction_test.go b/internal/datacoord/compaction_test.go index 879dfdbbbb9a9..f7c1c5733cbd7 100644 --- a/internal/datacoord/compaction_test.go +++ b/internal/datacoord/compaction_test.go @@ -84,7 +84,6 @@ func (s *CompactionPlanHandlerSuite) TestCheckResult() { 4: {A: 100, B: &datapb.CompactionPlanResult{PlanID: 4, State: commonpb.CompactionState_Executing}}, }, nil) - s.mockSessMgr.EXPECT().SyncSegments(int64(100), mock.Anything).Return(nil).Once() { s.mockAlloc.EXPECT().allocTimestamp(mock.Anything).Return(0, errors.New("mock")).Once() handler := newCompactionPlanHandler(nil, s.mockSessMgr, nil, nil, s.mockAlloc) @@ -475,7 +474,6 @@ func (s *CompactionPlanHandlerSuite) TestHandleMergeCompactionResult() { } return nil }).Once() - s.mockSessMgr.EXPECT().SyncSegments(mock.Anything, mock.Anything).Return(nil).Once() handler := newCompactionPlanHandler(nil, s.mockSessMgr, s.mockCm, s.mockMeta, s.mockAlloc) handler.plans[plan.PlanID] = &compactionTask{dataNodeID: 111, plan: plan} @@ -517,7 +515,6 @@ func (s *CompactionPlanHandlerSuite) TestHandleMergeCompactionResult() { s.mockMeta.EXPECT().CompleteCompactionMutation(mock.Anything, mock.Anything).Return( []*SegmentInfo{segment}, &segMetricMutation{}, nil).Once() - s.mockSessMgr.EXPECT().SyncSegments(mock.Anything, mock.Anything).Return(errors.New("mock error")).Once() handler := newCompactionPlanHandler(nil, s.mockSessMgr, s.mockCm, s.mockMeta, s.mockAlloc) handler.plans[plan.PlanID] = &compactionTask{dataNodeID: 111, plan: plan} @@ -529,7 +526,7 @@ func (s *CompactionPlanHandlerSuite) TestHandleMergeCompactionResult() { } err := handler.handleMergeCompactionResult(plan, compactionResult) - s.Error(err) + s.NoError(err) }) } @@ -549,7 +546,6 @@ func (s *CompactionPlanHandlerSuite) TestCompleteCompaction() { }) s.Run("test complete merge compaction task", func() { - s.mockSessMgr.EXPECT().SyncSegments(mock.Anything, mock.Anything).Return(nil).Once() // mock for handleMergeCompactionResult s.mockMeta.EXPECT().GetHealthySegment(mock.Anything).Return(nil).Once() segment := NewSegmentInfo(&datapb.SegmentInfo{ID: 100}) @@ -702,14 +698,6 @@ func (s *CompactionPlanHandlerSuite) TestUpdateCompaction() { }, } - s.mockSessMgr.EXPECT().SyncSegments(int64(222), mock.Anything).RunAndReturn(func(nodeID int64, req *datapb.SyncSegmentsRequest) error { - s.EqualValues(nodeID, 222) - s.NotNil(req) - s.Empty(req.GetCompactedFrom()) - s.EqualValues(5, req.GetPlanID()) - return nil - }).Once() - s.mockSessMgr.EXPECT().SyncSegments(int64(111), mock.Anything).Return(nil) s.mockCm.EXPECT().Match(int64(111), "ch-1").Return(true) s.mockCm.EXPECT().Match(int64(111), "ch-2").Return(false).Once() diff --git a/internal/datacoord/meta.go b/internal/datacoord/meta.go index 9b13593592c6f..f91494946af6a 100644 --- a/internal/datacoord/meta.go +++ b/internal/datacoord/meta.go @@ -1575,3 +1575,10 @@ func updateSegStateAndPrepareMetrics(segToUpdate *SegmentInfo, targetState commo metricMutation.append(segToUpdate.GetState(), targetState, segToUpdate.GetLevel(), segToUpdate.GetNumOfRows()) segToUpdate.State = targetState } + +func (m *meta) ListCollections() []int64 { + m.RLock() + defer m.RUnlock() + + return lo.Keys(m.collections) +} diff --git a/internal/datacoord/segment_operator.go b/internal/datacoord/segment_operator.go index d31d1a4c3d8e2..afd365e2dc825 100644 --- a/internal/datacoord/segment_operator.go +++ b/internal/datacoord/segment_operator.go @@ -32,6 +32,7 @@ func SetMaxRowCount(maxRow int64) SegmentOperator { type segmentCriterion struct { collectionID int64 channel string + partitionID int64 others []SegmentFilter } diff --git a/internal/datacoord/server.go b/internal/datacoord/server.go index 94c416e4b8e98..8498926b93339 100644 --- a/internal/datacoord/server.go +++ b/internal/datacoord/server.go @@ -127,6 +127,7 @@ type Server struct { compactionTrigger trigger compactionHandler compactionPlanContext compactionViewManager *CompactionViewManager + syncSegmentsScheduler *SyncSegmentsScheduler metricsCacheManager *metricsinfo.MetricsCacheManager @@ -393,6 +394,8 @@ func (s *Server) initDataCoord() error { s.importScheduler = NewImportScheduler(s.meta, s.cluster, s.allocator, s.importMeta, s.buildIndexCh) s.importChecker = NewImportChecker(s.meta, s.broker, s.cluster, s.allocator, s.segmentManager, s.importMeta) + s.syncSegmentsScheduler = newSyncSegmentsScheduler(s.meta, s.channelManager, s.sessionManager) + s.serverLoopCtx, s.serverLoopCancel = context.WithCancel(s.ctx) log.Info("init datacoord done", zap.Int64("nodeID", paramtable.GetNodeID()), zap.String("Address", s.address)) @@ -712,6 +715,7 @@ func (s *Server) startServerLoop() { go s.importScheduler.Start() go s.importChecker.Start() s.garbageCollector.start() + s.syncSegmentsScheduler.Start() } // startDataNodeTtLoop start a goroutine to recv data node tt msg from msgstream @@ -1104,6 +1108,7 @@ func (s *Server) Stop() error { s.importScheduler.Close() s.importChecker.Close() + s.syncSegmentsScheduler.Stop() if Params.DataCoordCfg.EnableCompaction.GetAsBool() { s.stopCompactionTrigger() diff --git a/internal/datacoord/sync_segments_scheduler.go b/internal/datacoord/sync_segments_scheduler.go new file mode 100644 index 0000000000000..f5224f7110b9e --- /dev/null +++ b/internal/datacoord/sync_segments_scheduler.go @@ -0,0 +1,149 @@ +// Licensed to the LF AI & Data foundation under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package datacoord + +import ( + "sync" + "time" + + "github.com/samber/lo" + "go.uber.org/zap" + + "github.com/milvus-io/milvus/internal/proto/datapb" + "github.com/milvus-io/milvus/pkg/log" + "github.com/milvus-io/milvus/pkg/util/logutil" + "github.com/milvus-io/milvus/pkg/util/typeutil" +) + +type SyncSegmentsScheduler struct { + quit chan struct{} + wg sync.WaitGroup + + meta *meta + channelManager ChannelManager + sessions SessionManager +} + +func newSyncSegmentsScheduler(m *meta, channelManager ChannelManager, sessions SessionManager) *SyncSegmentsScheduler { + return &SyncSegmentsScheduler{ + quit: make(chan struct{}), + wg: sync.WaitGroup{}, + meta: m, + channelManager: channelManager, + sessions: sessions, + } +} + +func (sss *SyncSegmentsScheduler) Start() { + sss.quit = make(chan struct{}) + sss.wg.Add(1) + + go func() { + defer logutil.LogPanic() + ticker := time.NewTicker(Params.DataCoordCfg.SyncSegmentsInterval.GetAsDuration(time.Second)) + defer sss.wg.Done() + + for { + select { + case <-sss.quit: + log.Info("sync segments scheduler quit") + ticker.Stop() + return + case <-ticker.C: + sss.SyncSegmentsForCollections() + } + } + }() + log.Info("SyncSegmentsScheduler started...") +} + +func (sss *SyncSegmentsScheduler) Stop() { + close(sss.quit) + sss.wg.Wait() +} + +func (sss *SyncSegmentsScheduler) SyncSegmentsForCollections() { + collIDs := sss.meta.ListCollections() + for _, collID := range collIDs { + collInfo := sss.meta.GetCollection(collID) + if collInfo == nil { + log.Warn("collection info is nil, skip it", zap.Int64("collectionID", collID)) + continue + } + pkField, err := typeutil.GetPrimaryFieldSchema(collInfo.Schema) + if err != nil { + log.Warn("get primary field from schema failed", zap.Int64("collectionID", collID), + zap.Error(err)) + continue + } + for _, channelName := range collInfo.VChannelNames { + nodeID, err := sss.channelManager.FindWatcher(channelName) + if err != nil { + log.Warn("find watcher for channel failed", zap.Int64("collectionID", collID), + zap.String("channelName", channelName), zap.Error(err)) + continue + } + for _, partitionID := range collInfo.Partitions { + if err := sss.SyncSegments(collID, partitionID, channelName, nodeID, pkField.GetFieldID()); err != nil { + log.Warn("sync segment with channel failed, retry next ticker", + zap.Int64("collectionID", collID), + zap.Int64("partitionID", partitionID), + zap.String("channel", channelName), + zap.Error(err)) + continue + } + } + } + } +} + +func (sss *SyncSegmentsScheduler) SyncSegments(collectionID, partitionID int64, channelName string, nodeID, pkFieldID int64) error { + log := log.With(zap.Int64("collectionID", collectionID), zap.Int64("partitionID", partitionID), + zap.String("channelName", channelName), zap.Int64("nodeID", nodeID)) + segments := sss.meta.SelectSegments(WithChannel(channelName), SegmentFilterFunc(func(info *SegmentInfo) bool { + return info.GetPartitionID() == partitionID && isSegmentHealthy(info) + })) + req := &datapb.SyncSegmentsRequest{ + ChannelName: channelName, + PartitionId: partitionID, + CollectionId: collectionID, + SegmentInfos: make(map[int64]*datapb.SyncSegmentInfo), + } + + for _, seg := range segments { + for _, statsLog := range seg.GetStatslogs() { + if statsLog.GetFieldID() == pkFieldID { + req.SegmentInfos[seg.ID] = &datapb.SyncSegmentInfo{ + SegmentId: seg.GetID(), + PkStatsLog: statsLog, + State: seg.GetState(), + Level: seg.GetLevel(), + NumOfRows: seg.GetNumOfRows(), + } + } + } + } + + if err := sss.sessions.SyncSegments(nodeID, req); err != nil { + log.Warn("fail to sync segments with node", zap.Error(err)) + return err + } + log.Info("sync segments success", zap.Int64s("segments", lo.Map(segments, func(t *SegmentInfo, i int) int64 { + return t.GetID() + }))) + return nil +} diff --git a/internal/datacoord/sync_segments_scheduler_test.go b/internal/datacoord/sync_segments_scheduler_test.go new file mode 100644 index 0000000000000..53ea0988dd740 --- /dev/null +++ b/internal/datacoord/sync_segments_scheduler_test.go @@ -0,0 +1,371 @@ +// Licensed to the LF AI & Data foundation under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package datacoord + +import ( + "sync/atomic" + "testing" + + "github.com/cockroachdb/errors" + "github.com/stretchr/testify/mock" + "github.com/stretchr/testify/suite" + + "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" + "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" + "github.com/milvus-io/milvus/internal/proto/datapb" + "github.com/milvus-io/milvus/pkg/util/lock" +) + +type SyncSegmentsSchedulerSuite struct { + suite.Suite + + m *meta + new atomic.Int64 + old atomic.Int64 +} + +func Test_SyncSegmentsSchedulerSuite(t *testing.T) { + suite.Run(t, new(SyncSegmentsSchedulerSuite)) +} + +func (s *SyncSegmentsSchedulerSuite) initParams() { + s.m = &meta{ + RWMutex: lock.RWMutex{}, + collections: map[UniqueID]*collectionInfo{ + 1: { + ID: 1, + Schema: &schemapb.CollectionSchema{ + Name: "coll1", + Fields: []*schemapb.FieldSchema{ + { + FieldID: 100, + Name: "pk", + IsPrimaryKey: true, + Description: "", + DataType: schemapb.DataType_Int64, + }, + { + FieldID: 101, + Name: "vec", + IsPrimaryKey: false, + Description: "", + DataType: schemapb.DataType_FloatVector, + }, + }, + }, + Partitions: []int64{2, 3}, + VChannelNames: []string{"channel1", "channel2"}, + }, + 2: nil, + }, + segments: &SegmentsInfo{ + secondaryIndexes: segmentInfoIndexes{ + channel2Segments: map[string]map[UniqueID]*SegmentInfo{ + "channel1": { + 5: { + SegmentInfo: &datapb.SegmentInfo{ + ID: 5, + CollectionID: 1, + PartitionID: 2, + InsertChannel: "channel1", + NumOfRows: 3000, + State: commonpb.SegmentState_Dropped, + Statslogs: []*datapb.FieldBinlog{ + { + FieldID: 100, + Binlogs: []*datapb.Binlog{ + { + LogID: 1, + }, + }, + }, + { + FieldID: 101, + Binlogs: []*datapb.Binlog{ + { + LogID: 2, + }, + }, + }, + }, + }, + }, + 6: { + SegmentInfo: &datapb.SegmentInfo{ + ID: 6, + CollectionID: 1, + PartitionID: 3, + InsertChannel: "channel1", + NumOfRows: 3000, + State: commonpb.SegmentState_Dropped, + Statslogs: []*datapb.FieldBinlog{ + { + FieldID: 100, + Binlogs: []*datapb.Binlog{ + { + LogID: 3, + }, + }, + }, + { + FieldID: 101, + Binlogs: []*datapb.Binlog{ + { + LogID: 4, + }, + }, + }, + }, + }, + }, + 9: { + SegmentInfo: &datapb.SegmentInfo{ + ID: 9, + CollectionID: 1, + PartitionID: 2, + InsertChannel: "channel1", + NumOfRows: 3000, + State: commonpb.SegmentState_Flushed, + Statslogs: []*datapb.FieldBinlog{ + { + FieldID: 100, + Binlogs: []*datapb.Binlog{ + { + LogID: 9, + }, + }, + }, + { + FieldID: 101, + Binlogs: []*datapb.Binlog{ + { + LogID: 10, + }, + }, + }, + }, + CompactionFrom: []int64{5}, + }, + }, + 10: { + SegmentInfo: &datapb.SegmentInfo{ + ID: 10, + CollectionID: 1, + PartitionID: 3, + InsertChannel: "channel1", + NumOfRows: 3000, + State: commonpb.SegmentState_Flushed, + Statslogs: []*datapb.FieldBinlog{ + { + FieldID: 100, + Binlogs: []*datapb.Binlog{ + { + LogID: 7, + }, + }, + }, + { + FieldID: 101, + Binlogs: []*datapb.Binlog{ + { + LogID: 8, + }, + }, + }, + }, + CompactionFrom: []int64{6}, + }, + }, + }, + "channel2": { + 7: { + SegmentInfo: &datapb.SegmentInfo{ + ID: 7, + CollectionID: 1, + PartitionID: 2, + InsertChannel: "channel2", + NumOfRows: 3000, + State: commonpb.SegmentState_Dropped, + Statslogs: []*datapb.FieldBinlog{ + { + FieldID: 100, + Binlogs: []*datapb.Binlog{ + { + LogID: 5, + }, + }, + }, + { + FieldID: 101, + Binlogs: []*datapb.Binlog{ + { + LogID: 6, + }, + }, + }, + }, + }, + }, + 8: { + SegmentInfo: &datapb.SegmentInfo{ + ID: 8, + CollectionID: 1, + PartitionID: 3, + InsertChannel: "channel2", + NumOfRows: 3000, + State: commonpb.SegmentState_Dropped, + Statslogs: []*datapb.FieldBinlog{ + { + FieldID: 100, + Binlogs: []*datapb.Binlog{ + { + LogID: 7, + }, + }, + }, + { + FieldID: 101, + Binlogs: []*datapb.Binlog{ + { + LogID: 8, + }, + }, + }, + }, + }, + }, + 11: { + SegmentInfo: &datapb.SegmentInfo{ + ID: 11, + CollectionID: 1, + PartitionID: 2, + InsertChannel: "channel2", + NumOfRows: 3000, + State: commonpb.SegmentState_Flushed, + Statslogs: []*datapb.FieldBinlog{ + { + FieldID: 100, + Binlogs: []*datapb.Binlog{ + { + LogID: 5, + }, + }, + }, + { + FieldID: 101, + Binlogs: []*datapb.Binlog{ + { + LogID: 6, + }, + }, + }, + }, + CompactionFrom: []int64{7}, + }, + }, + 12: { + SegmentInfo: &datapb.SegmentInfo{ + ID: 12, + CollectionID: 1, + PartitionID: 3, + InsertChannel: "channel2", + NumOfRows: 3000, + State: commonpb.SegmentState_Flushed, + Statslogs: []*datapb.FieldBinlog{ + { + FieldID: 100, + Binlogs: []*datapb.Binlog{ + { + LogID: 7, + }, + }, + }, + { + FieldID: 101, + Binlogs: []*datapb.Binlog{ + { + LogID: 8, + }, + }, + }, + }, + CompactionFrom: []int64{8}, + }, + }, + }, + }, + }, + }, + } +} + +func (s *SyncSegmentsSchedulerSuite) SetupTest() { + s.initParams() +} + +func (s *SyncSegmentsSchedulerSuite) Test_newSyncSegmentsScheduler() { + cm := NewMockChannelManager(s.T()) + cm.EXPECT().FindWatcher(mock.Anything).Return(100, nil) + + sm := NewMockSessionManager(s.T()) + sm.EXPECT().SyncSegments(mock.Anything, mock.Anything).RunAndReturn(func(i int64, request *datapb.SyncSegmentsRequest) error { + for _, seg := range request.GetSegmentInfos() { + if seg.GetState() == commonpb.SegmentState_Flushed { + s.new.Add(1) + } + if seg.GetState() == commonpb.SegmentState_Dropped { + s.old.Add(1) + } + } + return nil + }) + + Params.DataCoordCfg.SyncSegmentsInterval.SwapTempValue("1") + defer Params.DataCoordCfg.SyncSegmentsInterval.SwapTempValue("600") + sss := newSyncSegmentsScheduler(s.m, cm, sm) + sss.Start() + + // 2 channels, 2 partitions, 2 segments + // no longer sync dropped segments + for s.new.Load() < 4 { + } + sss.Stop() +} + +func (s *SyncSegmentsSchedulerSuite) Test_SyncSegmentsFail() { + cm := NewMockChannelManager(s.T()) + sm := NewMockSessionManager(s.T()) + + sss := newSyncSegmentsScheduler(s.m, cm, sm) + + s.Run("pk not found", func() { + sss.meta.collections[1].Schema.Fields[0].IsPrimaryKey = false + sss.SyncSegmentsForCollections() + sss.meta.collections[1].Schema.Fields[0].IsPrimaryKey = true + }) + + s.Run("find watcher failed", func() { + cm.EXPECT().FindWatcher(mock.Anything).Return(0, errors.New("mock error")).Twice() + sss.SyncSegmentsForCollections() + }) + + s.Run("sync segment failed", func() { + cm.EXPECT().FindWatcher(mock.Anything).Return(100, nil) + sm.EXPECT().SyncSegments(mock.Anything, mock.Anything).Return(errors.New("mock error")) + sss.SyncSegmentsForCollections() + }) +} diff --git a/internal/datanode/compaction/mock_compactor.go b/internal/datanode/compaction/mock_compactor.go index 99dccea0aa54c..19a83bf2e1b9d 100644 --- a/internal/datanode/compaction/mock_compactor.go +++ b/internal/datanode/compaction/mock_compactor.go @@ -228,38 +228,6 @@ func (_c *MockCompactor_GetPlanID_Call) RunAndReturn(run func() int64) *MockComp return _c } -// InjectDone provides a mock function with given fields: -func (_m *MockCompactor) InjectDone() { - _m.Called() -} - -// MockCompactor_InjectDone_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'InjectDone' -type MockCompactor_InjectDone_Call struct { - *mock.Call -} - -// InjectDone is a helper method to define mock.On call -func (_e *MockCompactor_Expecter) InjectDone() *MockCompactor_InjectDone_Call { - return &MockCompactor_InjectDone_Call{Call: _e.mock.On("InjectDone")} -} - -func (_c *MockCompactor_InjectDone_Call) Run(run func()) *MockCompactor_InjectDone_Call { - _c.Call.Run(func(args mock.Arguments) { - run() - }) - return _c -} - -func (_c *MockCompactor_InjectDone_Call) Return() *MockCompactor_InjectDone_Call { - _c.Call.Return() - return _c -} - -func (_c *MockCompactor_InjectDone_Call) RunAndReturn(run func()) *MockCompactor_InjectDone_Call { - _c.Call.Return(run) - return _c -} - // Stop provides a mock function with given fields: func (_m *MockCompactor) Stop() { _m.Called() diff --git a/internal/datanode/compaction_executor_test.go b/internal/datanode/compaction_executor_test.go index fd6fba2e6f0a7..5fd21070e280e 100644 --- a/internal/datanode/compaction_executor_test.go +++ b/internal/datanode/compaction_executor_test.go @@ -80,7 +80,6 @@ func TestCompactionExecutor(t *testing.T) { ex.executeWithState(mockC) <-signal } else { - mockC.EXPECT().InjectDone().Return().Maybe() mockC.EXPECT().Compact().RunAndReturn( func() (*datapb.CompactionPlanResult, error) { signal <- struct{}{} diff --git a/internal/datanode/data_node.go b/internal/datanode/data_node.go index c926aab7c4d18..135f501fe746c 100644 --- a/internal/datanode/data_node.go +++ b/internal/datanode/data_node.go @@ -48,6 +48,7 @@ import ( "github.com/milvus-io/milvus/pkg/log" "github.com/milvus-io/milvus/pkg/metrics" "github.com/milvus-io/milvus/pkg/mq/msgdispatcher" + "github.com/milvus-io/milvus/pkg/util/conc" "github.com/milvus-io/milvus/pkg/util/expr" "github.com/milvus-io/milvus/pkg/util/logutil" "github.com/milvus-io/milvus/pkg/util/metricsinfo" @@ -125,6 +126,7 @@ type DataNode struct { factory dependency.Factory reportImportRetryTimes uint // unitest set this value to 1 to save time, default is 10 + pool *conc.Pool[any] } // NewDataNode will return a DataNode with abnormal state. @@ -297,6 +299,7 @@ func (node *DataNode) Init() error { } else { node.eventManager = NewEventManager() } + node.pool = getOrCreateIOPool() log.Info("init datanode done", zap.String("Address", node.address)) }) diff --git a/internal/datanode/metacache/meta_cache.go b/internal/datanode/metacache/meta_cache.go index 24021c7a7a3b5..1c85946932943 100644 --- a/internal/datanode/metacache/meta_cache.go +++ b/internal/datanode/metacache/meta_cache.go @@ -27,9 +27,9 @@ import ( "github.com/milvus-io/milvus/internal/proto/datapb" "github.com/milvus-io/milvus/internal/storage" "github.com/milvus-io/milvus/pkg/log" - "github.com/milvus-io/milvus/pkg/util/typeutil" ) +//go:generate mockery --name=MetaCache --structname=MockMetaCache --output=./ --filename=mock_meta_cache.go --with-expecter --inpackage type MetaCache interface { // Collection returns collection id of metacache. Collection() int64 @@ -41,8 +41,6 @@ type MetaCache interface { UpdateSegments(action SegmentAction, filters ...SegmentFilter) // RemoveSegments removes segments matches the provided filter. RemoveSegments(filters ...SegmentFilter) []int64 - // CompactSegments transfers compaction segment results inside the metacache. - CompactSegments(newSegmentID, partitionID int64, numRows int64, bfs *BloomFilterSet, oldSegmentIDs ...int64) // GetSegmentsBy returns segments statify the provided filters. GetSegmentsBy(filters ...SegmentFilter) []*SegmentInfo // GetSegmentByID returns segment with provided segment id if exists. @@ -51,6 +49,10 @@ type MetaCache interface { GetSegmentIDsBy(filters ...SegmentFilter) []int64 // PredictSegments returns the segment ids which may contain the provided primary key. PredictSegments(pk storage.PrimaryKey, filters ...SegmentFilter) ([]int64, bool) + // DetectMissingSegments returns the segment ids which is missing in datanode. + DetectMissingSegments(segments map[int64]struct{}) []int64 + // UpdateSegmentView updates the segments BF from datacoord view. + UpdateSegmentView(partitionID int64, newSegments []*datapb.SyncSegmentInfo, newSegmentsBF []*BloomFilterSet, allSegments map[int64]struct{}) } var _ MetaCache = (*metaCacheImpl)(nil) @@ -133,45 +135,6 @@ func (c *metaCacheImpl) addSegment(segment *SegmentInfo) { c.stateSegments[segment.State()][segID] = segment } -func (c *metaCacheImpl) CompactSegments(newSegmentID, partitionID int64, numOfRows int64, bfs *BloomFilterSet, oldSegmentIDs ...int64) { - c.mu.Lock() - defer c.mu.Unlock() - - compactTo := NullSegment - if numOfRows > 0 { - compactTo = newSegmentID - if _, ok := c.segmentInfos[newSegmentID]; !ok { - c.addSegment(&SegmentInfo{ - segmentID: newSegmentID, - partitionID: partitionID, - state: commonpb.SegmentState_Flushed, - level: datapb.SegmentLevel_L1, - flushedRows: numOfRows, - startPosRecorded: true, - bfs: bfs, - }) - } - log.Info("add compactTo segment info metacache", zap.Int64("segmentID", compactTo)) - } - - oldSet := typeutil.NewSet(oldSegmentIDs...) - for _, segment := range c.segmentInfos { - if oldSet.Contain(segment.segmentID) || - oldSet.Contain(segment.compactTo) { - updated := segment.Clone() - updated.compactTo = compactTo - updated.state = commonpb.SegmentState_Dropped - c.segmentInfos[segment.segmentID] = updated - delete(c.stateSegments[commonpb.SegmentState_Flushed], segment.segmentID) - c.stateSegments[commonpb.SegmentState_Dropped][segment.segmentID] = segment - log.Info("update segment compactTo", - zap.Int64("segmentID", segment.segmentID), - zap.Int64("originalCompactTo", segment.compactTo), - zap.Int64("compactTo", compactTo)) - } - } -} - func (c *metaCacheImpl) RemoveSegments(filters ...SegmentFilter) []int64 { if len(filters) == 0 { log.Warn("remove segment without filters is not allowed", zap.Stack("callstack")) @@ -285,3 +248,56 @@ func (c *metaCacheImpl) rangeWithFilter(fn func(id int64, info *SegmentInfo), fi } } } + +func (c *metaCacheImpl) DetectMissingSegments(segments map[int64]struct{}) []int64 { + c.mu.RLock() + defer c.mu.RUnlock() + + missingSegments := make([]int64, 0) + + for segID := range segments { + if _, ok := c.segmentInfos[segID]; !ok { + missingSegments = append(missingSegments, segID) + } + } + + return missingSegments +} + +func (c *metaCacheImpl) UpdateSegmentView(partitionID int64, + newSegments []*datapb.SyncSegmentInfo, + newSegmentsBF []*BloomFilterSet, + allSegments map[int64]struct{}, +) { + c.mu.Lock() + defer c.mu.Unlock() + + for i, info := range newSegments { + // check again + if _, ok := c.segmentInfos[info.GetSegmentId()]; !ok { + segInfo := &SegmentInfo{ + segmentID: info.GetSegmentId(), + partitionID: partitionID, + state: info.GetState(), + level: info.GetLevel(), + flushedRows: info.GetNumOfRows(), + startPosRecorded: true, + bfs: newSegmentsBF[i], + } + c.segmentInfos[info.GetSegmentId()] = segInfo + c.stateSegments[info.GetState()][info.GetSegmentId()] = segInfo + log.Info("metacache does not have segment, add it", zap.Int64("segmentID", info.GetSegmentId())) + } + } + + for segID, info := range c.segmentInfos { + if info.partitionID != partitionID { + continue + } + if _, ok := allSegments[segID]; !ok { + log.Info("remove dropped segment", zap.Int64("segmentID", segID)) + delete(c.segmentInfos, segID) + delete(c.stateSegments[info.State()], segID) + } + } +} diff --git a/internal/datanode/metacache/meta_cache_test.go b/internal/datanode/metacache/meta_cache_test.go index c3b1663761ecf..cdb5e0614d567 100644 --- a/internal/datanode/metacache/meta_cache_test.go +++ b/internal/datanode/metacache/meta_cache_test.go @@ -103,27 +103,6 @@ func (s *MetaCacheSuite) TestMetaInfo() { s.Equal(s.collSchema, s.cache.Schema()) } -func (s *MetaCacheSuite) TestCompactSegments() { - for i, seg := range s.newSegments { - // compaction from flushed[i], unflushed[i] and invalidSeg to new[i] - s.cache.CompactSegments(seg, s.partitionIDs[i], 100, NewBloomFilterSet(), s.flushedSegments[i], s.growingSegments[i], s.invaliedSeg) - } - - for i, partitionID := range s.partitionIDs { - segs := s.cache.GetSegmentsBy(WithPartitionID(partitionID)) - for _, seg := range segs { - if seg.SegmentID() == s.newSegments[i] { - s.Equal(commonpb.SegmentState_Flushed, seg.State()) - s.Equal(int64(100), seg.NumOfRows()) - s.Equal(datapb.SegmentLevel_L1, seg.Level()) - } - if seg.SegmentID() == s.flushedSegments[i] { - s.Equal(s.newSegments[i], seg.CompactTo()) - } - } - } -} - func (s *MetaCacheSuite) TestAddSegment() { testSegs := []int64{100, 101, 102} for _, segID := range testSegs { @@ -210,6 +189,50 @@ func (s *MetaCacheSuite) TestPredictSegments() { s.EqualValues(1, predict[0]) } +func (s *MetaCacheSuite) Test_DetectMissingSegments() { + segments := map[int64]struct{}{ + 1: {}, 2: {}, 3: {}, 4: {}, 5: {}, 6: {}, 7: {}, 8: {}, 9: {}, 10: {}, + } + + missingSegments := s.cache.DetectMissingSegments(segments) + s.ElementsMatch(missingSegments, []int64{9, 10}) +} + +func (s *MetaCacheSuite) Test_UpdateSegmentView() { + addSegments := []*datapb.SyncSegmentInfo{ + { + SegmentId: 100, + PkStatsLog: nil, + State: commonpb.SegmentState_Flushed, + Level: datapb.SegmentLevel_L1, + NumOfRows: 10240, + }, + } + addSegmentsBF := []*BloomFilterSet{ + NewBloomFilterSet(), + } + segments := map[int64]struct{}{ + 1: {}, 2: {}, 3: {}, 4: {}, 5: {}, 6: {}, 7: {}, 8: {}, 100: {}, + } + + s.cache.UpdateSegmentView(1, addSegments, addSegmentsBF, segments) + + addSegments = []*datapb.SyncSegmentInfo{ + { + SegmentId: 101, + PkStatsLog: nil, + State: commonpb.SegmentState_Flushed, + Level: datapb.SegmentLevel_L1, + NumOfRows: 10240, + }, + } + + segments = map[int64]struct{}{ + 1: {}, 2: {}, 3: {}, 4: {}, 5: {}, 6: {}, 7: {}, 8: {}, 101: {}, + } + s.cache.UpdateSegmentView(1, addSegments, addSegmentsBF, segments) +} + func TestMetaCacheSuite(t *testing.T) { suite.Run(t, new(MetaCacheSuite)) } diff --git a/internal/datanode/metacache/mock_meta_cache.go b/internal/datanode/metacache/mock_meta_cache.go index b8c7bd0035d60..0bd69c61766d7 100644 --- a/internal/datanode/metacache/mock_meta_cache.go +++ b/internal/datanode/metacache/mock_meta_cache.go @@ -114,53 +114,46 @@ func (_c *MockMetaCache_Collection_Call) RunAndReturn(run func() int64) *MockMet return _c } -// CompactSegments provides a mock function with given fields: newSegmentID, partitionID, numRows, bfs, oldSegmentIDs -func (_m *MockMetaCache) CompactSegments(newSegmentID int64, partitionID int64, numRows int64, bfs *BloomFilterSet, oldSegmentIDs ...int64) { - _va := make([]interface{}, len(oldSegmentIDs)) - for _i := range oldSegmentIDs { - _va[_i] = oldSegmentIDs[_i] +// DetectMissingSegments provides a mock function with given fields: segments +func (_m *MockMetaCache) DetectMissingSegments(segments map[int64]struct{}) []int64 { + ret := _m.Called(segments) + + var r0 []int64 + if rf, ok := ret.Get(0).(func(map[int64]struct{}) []int64); ok { + r0 = rf(segments) + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).([]int64) + } } - var _ca []interface{} - _ca = append(_ca, newSegmentID, partitionID, numRows, bfs) - _ca = append(_ca, _va...) - _m.Called(_ca...) + + return r0 } -// MockMetaCache_CompactSegments_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'CompactSegments' -type MockMetaCache_CompactSegments_Call struct { +// MockMetaCache_DetectMissingSegments_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'DetectMissingSegments' +type MockMetaCache_DetectMissingSegments_Call struct { *mock.Call } -// CompactSegments is a helper method to define mock.On call -// - newSegmentID int64 -// - partitionID int64 -// - numRows int64 -// - bfs *BloomFilterSet -// - oldSegmentIDs ...int64 -func (_e *MockMetaCache_Expecter) CompactSegments(newSegmentID interface{}, partitionID interface{}, numRows interface{}, bfs interface{}, oldSegmentIDs ...interface{}) *MockMetaCache_CompactSegments_Call { - return &MockMetaCache_CompactSegments_Call{Call: _e.mock.On("CompactSegments", - append([]interface{}{newSegmentID, partitionID, numRows, bfs}, oldSegmentIDs...)...)} +// DetectMissingSegments is a helper method to define mock.On call +// - segments map[int64]struct{} +func (_e *MockMetaCache_Expecter) DetectMissingSegments(segments interface{}) *MockMetaCache_DetectMissingSegments_Call { + return &MockMetaCache_DetectMissingSegments_Call{Call: _e.mock.On("DetectMissingSegments", segments)} } -func (_c *MockMetaCache_CompactSegments_Call) Run(run func(newSegmentID int64, partitionID int64, numRows int64, bfs *BloomFilterSet, oldSegmentIDs ...int64)) *MockMetaCache_CompactSegments_Call { +func (_c *MockMetaCache_DetectMissingSegments_Call) Run(run func(segments map[int64]struct{})) *MockMetaCache_DetectMissingSegments_Call { _c.Call.Run(func(args mock.Arguments) { - variadicArgs := make([]int64, len(args)-4) - for i, a := range args[4:] { - if a != nil { - variadicArgs[i] = a.(int64) - } - } - run(args[0].(int64), args[1].(int64), args[2].(int64), args[3].(*BloomFilterSet), variadicArgs...) + run(args[0].(map[int64]struct{})) }) return _c } -func (_c *MockMetaCache_CompactSegments_Call) Return() *MockMetaCache_CompactSegments_Call { - _c.Call.Return() +func (_c *MockMetaCache_DetectMissingSegments_Call) Return(_a0 []int64) *MockMetaCache_DetectMissingSegments_Call { + _c.Call.Return(_a0) return _c } -func (_c *MockMetaCache_CompactSegments_Call) RunAndReturn(run func(int64, int64, int64, *BloomFilterSet, ...int64)) *MockMetaCache_CompactSegments_Call { +func (_c *MockMetaCache_DetectMissingSegments_Call) RunAndReturn(run func(map[int64]struct{}) []int64) *MockMetaCache_DetectMissingSegments_Call { _c.Call.Return(run) return _c } @@ -517,6 +510,42 @@ func (_c *MockMetaCache_Schema_Call) RunAndReturn(run func() *schemapb.Collectio return _c } +// UpdateSegmentView provides a mock function with given fields: partitionID, newSegments, newSegmentsBF, allSegments +func (_m *MockMetaCache) UpdateSegmentView(partitionID int64, newSegments []*datapb.SyncSegmentInfo, newSegmentsBF []*BloomFilterSet, allSegments map[int64]struct{}) { + _m.Called(partitionID, newSegments, newSegmentsBF, allSegments) +} + +// MockMetaCache_UpdateSegmentView_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'UpdateSegmentView' +type MockMetaCache_UpdateSegmentView_Call struct { + *mock.Call +} + +// UpdateSegmentView is a helper method to define mock.On call +// - partitionID int64 +// - newSegments []*datapb.SyncSegmentInfo +// - newSegmentsBF []*BloomFilterSet +// - allSegments map[int64]struct{} +func (_e *MockMetaCache_Expecter) UpdateSegmentView(partitionID interface{}, newSegments interface{}, newSegmentsBF interface{}, allSegments interface{}) *MockMetaCache_UpdateSegmentView_Call { + return &MockMetaCache_UpdateSegmentView_Call{Call: _e.mock.On("UpdateSegmentView", partitionID, newSegments, newSegmentsBF, allSegments)} +} + +func (_c *MockMetaCache_UpdateSegmentView_Call) Run(run func(partitionID int64, newSegments []*datapb.SyncSegmentInfo, newSegmentsBF []*BloomFilterSet, allSegments map[int64]struct{})) *MockMetaCache_UpdateSegmentView_Call { + _c.Call.Run(func(args mock.Arguments) { + run(args[0].(int64), args[1].([]*datapb.SyncSegmentInfo), args[2].([]*BloomFilterSet), args[3].(map[int64]struct{})) + }) + return _c +} + +func (_c *MockMetaCache_UpdateSegmentView_Call) Return() *MockMetaCache_UpdateSegmentView_Call { + _c.Call.Return() + return _c +} + +func (_c *MockMetaCache_UpdateSegmentView_Call) RunAndReturn(run func(int64, []*datapb.SyncSegmentInfo, []*BloomFilterSet, map[int64]struct{})) *MockMetaCache_UpdateSegmentView_Call { + _c.Call.Return(run) + return _c +} + // UpdateSegments provides a mock function with given fields: action, filters func (_m *MockMetaCache) UpdateSegments(action SegmentAction, filters ...SegmentFilter) { _va := make([]interface{}, len(filters)) diff --git a/internal/datanode/services.go b/internal/datanode/services.go index d18f10ada6cd6..6ffa618988b48 100644 --- a/internal/datanode/services.go +++ b/internal/datanode/services.go @@ -23,6 +23,7 @@ import ( "context" "fmt" + "github.com/samber/lo" "go.uber.org/zap" "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" @@ -30,12 +31,16 @@ import ( "github.com/milvus-io/milvus/internal/datanode/compaction" "github.com/milvus-io/milvus/internal/datanode/importv2" "github.com/milvus-io/milvus/internal/datanode/io" + "github.com/milvus-io/milvus/internal/datanode/metacache" + "github.com/milvus-io/milvus/internal/metastore/kv/binlog" "github.com/milvus-io/milvus/internal/proto/datapb" "github.com/milvus-io/milvus/internal/proto/internalpb" + "github.com/milvus-io/milvus/internal/storage" "github.com/milvus-io/milvus/pkg/common" "github.com/milvus-io/milvus/pkg/log" "github.com/milvus-io/milvus/pkg/metrics" "github.com/milvus-io/milvus/pkg/tracer" + "github.com/milvus-io/milvus/pkg/util/conc" "github.com/milvus-io/milvus/pkg/util/merr" "github.com/milvus-io/milvus/pkg/util/metricsinfo" "github.com/milvus-io/milvus/pkg/util/tsoutil" @@ -261,6 +266,9 @@ func (node *DataNode) SyncSegments(ctx context.Context, req *datapb.SyncSegments log := log.Ctx(ctx).With( zap.Int64("planID", req.GetPlanID()), zap.Int64("nodeID", node.GetNodeID()), + zap.Int64("collectionID", req.GetCollectionId()), + zap.Int64("partitionID", req.GetPartitionId()), + zap.String("channel", req.GetChannelName()), ) log.Info("DataNode receives SyncSegments") @@ -270,8 +278,61 @@ func (node *DataNode) SyncSegments(ctx context.Context, req *datapb.SyncSegments return merr.Status(err), nil } - // TODO: sheep, add a new DropCompaction interface, deprecate SyncSegments - node.compactionExecutor.removeTask(req.GetPlanID()) + if len(req.GetSegmentInfos()) <= 0 { + log.Info("sync segments is empty, skip it") + return merr.Success(), nil + } + + ds, ok := node.flowgraphManager.GetFlowgraphService(req.GetChannelName()) + if !ok { + node.compactionExecutor.discardPlan(req.GetChannelName()) + err := merr.WrapErrChannelNotFound(req.GetChannelName()) + log.Warn("failed to get flow graph service", zap.Error(err)) + return merr.Status(err), nil + } + + allSegments := make(map[int64]struct{}) + for segID := range req.GetSegmentInfos() { + allSegments[segID] = struct{}{} + } + + missingSegments := ds.metacache.DetectMissingSegments(allSegments) + + newSegments := make([]*datapb.SyncSegmentInfo, 0, len(missingSegments)) + futures := make([]*conc.Future[any], 0, len(missingSegments)) + + for _, segID := range missingSegments { + segID := segID + future := node.pool.Submit(func() (any, error) { + newSeg := req.GetSegmentInfos()[segID] + var val *metacache.BloomFilterSet + var err error + err = binlog.DecompressBinLog(storage.StatsBinlog, req.GetCollectionId(), req.GetPartitionId(), newSeg.GetSegmentId(), []*datapb.FieldBinlog{newSeg.GetPkStatsLog()}) + if err != nil { + log.Warn("failed to DecompressBinLog", zap.Error(err)) + return val, err + } + pks, err := loadStats(ctx, node.chunkManager, ds.metacache.Schema(), newSeg.GetSegmentId(), []*datapb.FieldBinlog{newSeg.GetPkStatsLog()}) + if err != nil { + log.Warn("failed to load segment stats log", zap.Error(err)) + return val, err + } + val = metacache.NewBloomFilterSet(pks...) + return val, nil + }) + futures = append(futures, future) + } + + err := conc.AwaitAll(futures...) + if err != nil { + return merr.Status(err), nil + } + + newSegmentsBF := lo.Map(futures, func(future *conc.Future[any], _ int) *metacache.BloomFilterSet { + return future.Value().(*metacache.BloomFilterSet) + }) + + ds.metacache.UpdateSegmentView(req.GetPartitionId(), newSegments, newSegmentsBF, allSegments) return merr.Success(), nil } diff --git a/internal/datanode/services_test.go b/internal/datanode/services_test.go index 117d265ea5f68..97c5ed51c4bce 100644 --- a/internal/datanode/services_test.go +++ b/internal/datanode/services_test.go @@ -493,3 +493,39 @@ func (s *DataNodeServicesSuite) TestQuerySlot() { s.NoError(merr.Error(resp.GetStatus())) }) } + +func (s *DataNodeServicesSuite) TestSyncSegments() { + s.Run("node not healthy", func() { + s.SetupTest() + s.node.UpdateStateCode(commonpb.StateCode_Abnormal) + + ctx := context.Background() + status, err := s.node.SyncSegments(ctx, nil) + s.NoError(err) + s.False(merr.Ok(status)) + s.ErrorIs(merr.Error(status), merr.ErrServiceNotReady) + }) + + s.Run("normal case", func() { + s.SetupTest() + ctx := context.Background() + req := &datapb.SyncSegmentsRequest{ + ChannelName: "channel1", + PartitionId: 2, + CollectionId: 1, + SegmentInfos: map[int64]*datapb.SyncSegmentInfo{ + 3: { + SegmentId: 3, + PkStatsLog: nil, + State: commonpb.SegmentState_Dropped, + Level: 2, + NumOfRows: 1024, + }, + }, + } + + status, err := s.node.SyncSegments(ctx, req) + s.NoError(err) + s.False(merr.Ok(status)) + }) +} diff --git a/internal/datanode/syncmgr/mock_sync_manager.go b/internal/datanode/syncmgr/mock_sync_manager.go index bf996d09aabff..ee19d324d3943 100644 --- a/internal/datanode/syncmgr/mock_sync_manager.go +++ b/internal/datanode/syncmgr/mock_sync_manager.go @@ -1,4 +1,4 @@ -// Code generated by mockery v2.30.1. DO NOT EDIT. +// Code generated by mockery v2.32.4. DO NOT EDIT. package syncmgr diff --git a/internal/proto/data_coord.proto b/internal/proto/data_coord.proto index 38781e958fa8d..3c1e97a24d645 100644 --- a/internal/proto/data_coord.proto +++ b/internal/proto/data_coord.proto @@ -496,15 +496,29 @@ message CompactionStateRequest { common.MsgBase base = 1; } +message SyncSegmentInfo { + int64 segment_id = 1; + FieldBinlog pk_stats_log = 2; + common.SegmentState state = 3; + SegmentLevel level = 4; + int64 num_of_rows = 5; +} + message SyncSegmentsRequest { + // Deprecated, after v2.4.3 int64 planID = 1; + // Deprecated, after v2.4.3 int64 compacted_to = 2; + // Deprecated, after v2.4.3 int64 num_of_rows = 3; + // Deprecated, after v2.4.3 repeated int64 compacted_from = 4; + // Deprecated, after v2.4.3 repeated FieldBinlog stats_logs = 5; string channel_name = 6; int64 partition_id = 7; int64 collection_id = 8; + map segment_infos = 9; } message CompactionSegmentBinlogs { diff --git a/pkg/util/paramtable/component_param.go b/pkg/util/paramtable/component_param.go index 5dbd86f009462..47431b907985e 100644 --- a/pkg/util/paramtable/component_param.go +++ b/pkg/util/paramtable/component_param.go @@ -2750,6 +2750,7 @@ type dataCoordConfig struct { SingleCompactionDeltalogMaxNum ParamItem `refreshable:"true"` GlobalCompactionInterval ParamItem `refreshable:"false"` ChannelCheckpointMaxLag ParamItem `refreshable:"true"` + SyncSegmentsInterval ParamItem `refreshable:"false"` // LevelZero Segment EnableLevelZeroSegment ParamItem `refreshable:"false"` @@ -3090,6 +3091,14 @@ During compaction, the size of segment # of rows is able to exceed segment max # } p.ChannelCheckpointMaxLag.Init(base.mgr) + p.SyncSegmentsInterval = ParamItem{ + Key: "dataCoord.sync.interval", + Version: "2.4.3", + Doc: "The time interval for regularly syncing segments", + DefaultValue: "600", // 10 * 60 seconds + } + p.SyncSegmentsInterval.Init(base.mgr) + // LevelZeroCompaction p.EnableLevelZeroSegment = ParamItem{ Key: "dataCoord.segment.enableLevelZero", From 416a2cf507ac8e132dcd5ccc3bade1fe7e896696 Mon Sep 17 00:00:00 2001 From: Chun Han <116052805+MrPresent-Han@users.noreply.github.com> Date: Thu, 30 May 2024 05:51:44 -0400 Subject: [PATCH 100/126] fix: query iterator lack results(#33137) (#33422) related: #33137 adding has_more_result_tag for various level's reduce to rectify reduce_stop_for_best Signed-off-by: MrPresent-Han --- internal/core/src/common/QueryResult.h | 1 + .../query/visitors/ExecPlanNodeVisitor.cpp | 4 +- internal/core/src/segcore/InsertRecord.h | 24 +++--- .../core/src/segcore/SegmentGrowingImpl.h | 2 +- .../core/src/segcore/SegmentInterface.cpp | 2 +- internal/core/src/segcore/SegmentInterface.h | 2 +- internal/core/src/segcore/SegmentSealedImpl.h | 2 +- .../unittest/test_offset_ordered_array.cpp | 86 ++++++++++++------- .../core/unittest/test_offset_ordered_map.cpp | 76 ++++++++++------ internal/proto/internal.proto | 1 + internal/proto/segcore.proto | 1 + internal/proxy/task_query.go | 15 ++-- internal/proxy/task_query_test.go | 15 +++- internal/querynodev2/segments/result.go | 10 ++- internal/querynodev2/segments/result_test.go | 50 +++++++++-- internal/querynodev2/tasks/query_task.go | 1 + pkg/util/typeutil/schema.go | 8 +- 17 files changed, 197 insertions(+), 103 deletions(-) diff --git a/internal/core/src/common/QueryResult.h b/internal/core/src/common/QueryResult.h index 9fd2d13d7776b..4cb7fef00e5dd 100644 --- a/internal/core/src/common/QueryResult.h +++ b/internal/core/src/common/QueryResult.h @@ -228,6 +228,7 @@ struct RetrieveResult { void* segment_; std::vector result_offsets_; std::vector field_data_; + bool has_more_result = true; }; using RetrieveResultPtr = std::shared_ptr; diff --git a/internal/core/src/query/visitors/ExecPlanNodeVisitor.cpp b/internal/core/src/query/visitors/ExecPlanNodeVisitor.cpp index e5a7a8f1c77f3..d9e8a6c125b00 100644 --- a/internal/core/src/query/visitors/ExecPlanNodeVisitor.cpp +++ b/internal/core/src/query/visitors/ExecPlanNodeVisitor.cpp @@ -291,8 +291,10 @@ ExecPlanNodeVisitor::visit(RetrievePlanNode& node) { false_filtered_out = true; segment->timestamp_filter(bitset_holder, timestamp_); } - retrieve_result.result_offsets_ = + auto results_pair = segment->find_first(node.limit_, bitset_holder, false_filtered_out); + retrieve_result.result_offsets_ = std::move(results_pair.first); + retrieve_result.has_more_result = results_pair.second; retrieve_result_opt_ = std::move(retrieve_result); } diff --git a/internal/core/src/segcore/InsertRecord.h b/internal/core/src/segcore/InsertRecord.h index 7e85a64c231d8..7da03c1828b36 100644 --- a/internal/core/src/segcore/InsertRecord.h +++ b/internal/core/src/segcore/InsertRecord.h @@ -60,7 +60,7 @@ class OffsetMap { using OffsetType = int64_t; // TODO: in fact, we can retrieve the pk here. Not sure which way is more efficient. - virtual std::vector + virtual std::pair, bool> find_first(int64_t limit, const BitsetType& bitset, bool false_filtered_out) const = 0; @@ -109,7 +109,7 @@ class OffsetOrderedMap : public OffsetMap { return map_.empty(); } - std::vector + std::pair, bool> find_first(int64_t limit, const BitsetType& bitset, bool false_filtered_out) const override { @@ -131,7 +131,7 @@ class OffsetOrderedMap : public OffsetMap { } private: - std::vector + std::pair, bool> find_first_by_index(int64_t limit, const BitsetType& bitset, bool false_filtered_out) const { @@ -144,8 +144,8 @@ class OffsetOrderedMap : public OffsetMap { limit = std::min(limit, cnt); std::vector seg_offsets; seg_offsets.reserve(limit); - for (auto it = map_.begin(); hit_num < limit && it != map_.end(); - it++) { + auto it = map_.begin(); + for (; hit_num < limit && it != map_.end(); it++) { for (auto seg_offset : it->second) { if (seg_offset >= size) { // Frequently concurrent insert/query will cause this case. @@ -161,7 +161,7 @@ class OffsetOrderedMap : public OffsetMap { } } } - return seg_offsets; + return {seg_offsets, it != map_.end()}; } private: @@ -226,7 +226,7 @@ class OffsetOrderedArray : public OffsetMap { return array_.empty(); } - std::vector + std::pair, bool> find_first(int64_t limit, const BitsetType& bitset, bool false_filtered_out) const override { @@ -248,7 +248,7 @@ class OffsetOrderedArray : public OffsetMap { } private: - std::vector + std::pair, bool> find_first_by_index(int64_t limit, const BitsetType& bitset, bool false_filtered_out) const { @@ -261,11 +261,11 @@ class OffsetOrderedArray : public OffsetMap { limit = std::min(limit, cnt); std::vector seg_offsets; seg_offsets.reserve(limit); - for (auto it = array_.begin(); hit_num < limit && it != array_.end(); - it++) { + auto it = array_.begin(); + for (; hit_num < limit && it != array_.end(); it++) { auto seg_offset = it->second; if (seg_offset >= size) { - // In fact, this case won't happend on sealed segments. + // In fact, this case won't happen on sealed segments. continue; } @@ -274,7 +274,7 @@ class OffsetOrderedArray : public OffsetMap { hit_num++; } } - return seg_offsets; + return {seg_offsets, it != array_.end()}; } void diff --git a/internal/core/src/segcore/SegmentGrowingImpl.h b/internal/core/src/segcore/SegmentGrowingImpl.h index 1cc308216bc82..06f9048d5ae22 100644 --- a/internal/core/src/segcore/SegmentGrowingImpl.h +++ b/internal/core/src/segcore/SegmentGrowingImpl.h @@ -268,7 +268,7 @@ class SegmentGrowingImpl : public SegmentGrowing { return true; } - std::vector + std::pair, bool> find_first(int64_t limit, const BitsetType& bitset, bool false_filtered_out) const override { diff --git a/internal/core/src/segcore/SegmentInterface.cpp b/internal/core/src/segcore/SegmentInterface.cpp index 3d79fc0b35971..0ad695ff597e1 100644 --- a/internal/core/src/segcore/SegmentInterface.cpp +++ b/internal/core/src/segcore/SegmentInterface.cpp @@ -91,6 +91,7 @@ SegmentInternalInterface::Retrieve(tracer::TraceContext* trace_ctx, query::ExecPlanNodeVisitor visitor(*this, timestamp); auto retrieve_results = visitor.get_retrieve_result(*plan->plan_node_); retrieve_results.segment_ = (void*)this; + results->set_has_more_result(retrieve_results.has_more_result); auto result_rows = retrieve_results.result_offsets_.size(); int64_t output_data_size = 0; @@ -120,7 +121,6 @@ SegmentInternalInterface::Retrieve(tracer::TraceContext* trace_ctx, retrieve_results.result_offsets_.size(), ignore_non_pk, true); - return results; } diff --git a/internal/core/src/segcore/SegmentInterface.h b/internal/core/src/segcore/SegmentInterface.h index 2715e387c76b3..6a2dbf1485bfd 100644 --- a/internal/core/src/segcore/SegmentInterface.h +++ b/internal/core/src/segcore/SegmentInterface.h @@ -290,7 +290,7 @@ class SegmentInternalInterface : public SegmentInterface { * @param false_filtered_out * @return All candidates offsets. */ - virtual std::vector + virtual std::pair, bool> find_first(int64_t limit, const BitsetType& bitset, bool false_filtered_out) const = 0; diff --git a/internal/core/src/segcore/SegmentSealedImpl.h b/internal/core/src/segcore/SegmentSealedImpl.h index 21306616e810e..b7e8b89e2c40a 100644 --- a/internal/core/src/segcore/SegmentSealedImpl.h +++ b/internal/core/src/segcore/SegmentSealedImpl.h @@ -133,7 +133,7 @@ class SegmentSealedImpl : public SegmentSealed { const IdArray* pks, const Timestamp* timestamps) override; - std::vector + std::pair, bool> find_first(int64_t limit, const BitsetType& bitset, bool false_filtered_out) const override { diff --git a/internal/core/unittest/test_offset_ordered_array.cpp b/internal/core/unittest/test_offset_ordered_array.cpp index ec371c6114540..1eb2e272b0f8f 100644 --- a/internal/core/unittest/test_offset_ordered_array.cpp +++ b/internal/core/unittest/test_offset_ordered_array.cpp @@ -65,8 +65,6 @@ using TypeOfPks = testing::Types; TYPED_TEST_SUITE_P(TypedOffsetOrderedArrayTest); TYPED_TEST_P(TypedOffsetOrderedArrayTest, find_first) { - std::vector offsets; - // not sealed. ASSERT_ANY_THROW(this->map_.find_first(Unlimited, {}, true)); @@ -81,40 +79,62 @@ TYPED_TEST_P(TypedOffsetOrderedArrayTest, find_first) { this->seal(); // all is satisfied. - BitsetType all(num); - all.set(); - offsets = this->map_.find_first(num / 2, all, true); - ASSERT_EQ(num / 2, offsets.size()); - for (int i = 1; i < offsets.size(); i++) { - ASSERT_TRUE(data[offsets[i - 1]] <= data[offsets[i]]); - } - offsets = this->map_.find_first(Unlimited, all, true); - ASSERT_EQ(num, offsets.size()); - for (int i = 1; i < offsets.size(); i++) { - ASSERT_TRUE(data[offsets[i - 1]] <= data[offsets[i]]); + { + BitsetType all(num); + all.set(); + { + auto [offsets, has_more_res] = + this->map_.find_first(num / 2, all, true); + ASSERT_EQ(num / 2, offsets.size()); + ASSERT_TRUE(has_more_res); + for (int i = 1; i < offsets.size(); i++) { + ASSERT_TRUE(data[offsets[i - 1]] <= data[offsets[i]]); + } + } + { + auto [offsets, has_more_res] = + this->map_.find_first(Unlimited, all, true); + ASSERT_EQ(num, offsets.size()); + ASSERT_FALSE(has_more_res); + for (int i = 1; i < offsets.size(); i++) { + ASSERT_TRUE(data[offsets[i - 1]] <= data[offsets[i]]); + } + } } - - // corner case, segment offset exceeds the size of bitset. - BitsetType all_minus_1(num - 1); - all_minus_1.set(); - offsets = this->map_.find_first(num / 2, all_minus_1, true); - ASSERT_EQ(num / 2, offsets.size()); - for (int i = 1; i < offsets.size(); i++) { - ASSERT_TRUE(data[offsets[i - 1]] <= data[offsets[i]]); + { + // corner case, segment offset exceeds the size of bitset. + BitsetType all_minus_1(num - 1); + all_minus_1.set(); + { + auto [offsets, has_more_res] = + this->map_.find_first(num / 2, all_minus_1, true); + ASSERT_EQ(num / 2, offsets.size()); + ASSERT_TRUE(has_more_res); + for (int i = 1; i < offsets.size(); i++) { + ASSERT_TRUE(data[offsets[i - 1]] <= data[offsets[i]]); + } + } + { + auto [offsets, has_more_res] = + this->map_.find_first(Unlimited, all_minus_1, true); + ASSERT_EQ(all_minus_1.size(), offsets.size()); + ASSERT_FALSE(has_more_res); + for (int i = 1; i < offsets.size(); i++) { + ASSERT_TRUE(data[offsets[i - 1]] <= data[offsets[i]]); + } + } } - offsets = this->map_.find_first(Unlimited, all_minus_1, true); - ASSERT_EQ(all_minus_1.size(), offsets.size()); - for (int i = 1; i < offsets.size(); i++) { - ASSERT_TRUE(data[offsets[i - 1]] <= data[offsets[i]]); + { + // none is satisfied. + BitsetType none(num); + none.reset(); + auto result_pair = this->map_.find_first(num / 2, none, true); + ASSERT_EQ(0, result_pair.first.size()); + ASSERT_TRUE(result_pair.second); + result_pair = this->map_.find_first(NoLimit, none, true); + ASSERT_EQ(0, result_pair.first.size()); + ASSERT_TRUE(result_pair.second); } - - // none is satisfied. - BitsetType none(num); - none.reset(); - offsets = this->map_.find_first(num / 2, none, true); - ASSERT_EQ(0, offsets.size()); - offsets = this->map_.find_first(NoLimit, none, true); - ASSERT_EQ(0, offsets.size()); } REGISTER_TYPED_TEST_SUITE_P(TypedOffsetOrderedArrayTest, find_first); diff --git a/internal/core/unittest/test_offset_ordered_map.cpp b/internal/core/unittest/test_offset_ordered_map.cpp index be16aed9e0eed..36f4bafc83f7a 100644 --- a/internal/core/unittest/test_offset_ordered_map.cpp +++ b/internal/core/unittest/test_offset_ordered_map.cpp @@ -60,12 +60,13 @@ using TypeOfPks = testing::Types; TYPED_TEST_SUITE_P(TypedOffsetOrderedMapTest); TYPED_TEST_P(TypedOffsetOrderedMapTest, find_first) { - std::vector offsets; - // no data. - offsets = this->map_.find_first(Unlimited, {}, true); - ASSERT_EQ(0, offsets.size()); - + { + auto [offsets, has_more_res] = + this->map_.find_first(Unlimited, {}, true); + ASSERT_EQ(0, offsets.size()); + ASSERT_FALSE(has_more_res); + } // insert 10 entities. int num = 10; auto data = this->random_generate(num); @@ -76,38 +77,63 @@ TYPED_TEST_P(TypedOffsetOrderedMapTest, find_first) { // all is satisfied. BitsetType all(num); all.set(); - offsets = this->map_.find_first(num / 2, all, true); - ASSERT_EQ(num / 2, offsets.size()); - for (int i = 1; i < offsets.size(); i++) { - ASSERT_TRUE(data[offsets[i - 1]] <= data[offsets[i]]); + + { + auto [offsets, has_more_res] = + this->map_.find_first(num / 2, all, true); + ASSERT_EQ(num / 2, offsets.size()); + ASSERT_TRUE(has_more_res); + for (int i = 1; i < offsets.size(); i++) { + ASSERT_TRUE(data[offsets[i - 1]] <= data[offsets[i]]); + } } - offsets = this->map_.find_first(Unlimited, all, true); - ASSERT_EQ(num, offsets.size()); - for (int i = 1; i < offsets.size(); i++) { - ASSERT_TRUE(data[offsets[i - 1]] <= data[offsets[i]]); + { + auto [offsets, has_more_res] = + this->map_.find_first(Unlimited, all, true); + ASSERT_EQ(num, offsets.size()); + ASSERT_FALSE(has_more_res); + for (int i = 1; i < offsets.size(); i++) { + ASSERT_TRUE(data[offsets[i - 1]] <= data[offsets[i]]); + } } // corner case, segment offset exceeds the size of bitset. BitsetType all_minus_1(num - 1); all_minus_1.set(); - offsets = this->map_.find_first(num / 2, all_minus_1, true); - ASSERT_EQ(num / 2, offsets.size()); - for (int i = 1; i < offsets.size(); i++) { - ASSERT_TRUE(data[offsets[i - 1]] <= data[offsets[i]]); + { + auto [offsets, has_more_res] = + this->map_.find_first(num / 2, all_minus_1, true); + ASSERT_EQ(num / 2, offsets.size()); + ASSERT_TRUE(has_more_res); + for (int i = 1; i < offsets.size(); i++) { + ASSERT_TRUE(data[offsets[i - 1]] <= data[offsets[i]]); + } } - offsets = this->map_.find_first(Unlimited, all_minus_1, true); - ASSERT_EQ(all_minus_1.size(), offsets.size()); - for (int i = 1; i < offsets.size(); i++) { - ASSERT_TRUE(data[offsets[i - 1]] <= data[offsets[i]]); + { + auto [offsets, has_more_res] = + this->map_.find_first(Unlimited, all_minus_1, true); + ASSERT_EQ(all_minus_1.size(), offsets.size()); + ASSERT_FALSE(has_more_res); + for (int i = 1; i < offsets.size(); i++) { + ASSERT_TRUE(data[offsets[i - 1]] <= data[offsets[i]]); + } } // none is satisfied. BitsetType none(num); none.reset(); - offsets = this->map_.find_first(num / 2, none, true); - ASSERT_EQ(0, offsets.size()); - offsets = this->map_.find_first(NoLimit, none, true); - ASSERT_EQ(0, offsets.size()); + { + auto [offsets, has_more_res] = + this->map_.find_first(num / 2, none, true); + ASSERT_TRUE(has_more_res); + ASSERT_EQ(0, offsets.size()); + } + { + auto [offsets, has_more_res] = + this->map_.find_first(NoLimit, none, true); + ASSERT_TRUE(has_more_res); + ASSERT_EQ(0, offsets.size()); + } } REGISTER_TYPED_TEST_SUITE_P(TypedOffsetOrderedMapTest, find_first); diff --git a/internal/proto/internal.proto b/internal/proto/internal.proto index 6715af58d92d2..980cf3576989c 100644 --- a/internal/proto/internal.proto +++ b/internal/proto/internal.proto @@ -198,6 +198,7 @@ message RetrieveResults { // query request cost CostAggregation costAggregation = 13; int64 all_retrieve_count = 14; + bool has_more_result = 15; } message LoadIndex { diff --git a/internal/proto/segcore.proto b/internal/proto/segcore.proto index ea7697f48c98d..aaf502bc1ec06 100644 --- a/internal/proto/segcore.proto +++ b/internal/proto/segcore.proto @@ -10,6 +10,7 @@ message RetrieveResults { repeated int64 offset = 2; repeated schema.FieldData fields_data = 3; int64 all_retrieve_count = 4; + bool has_more_result = 5; } message LoadFieldMeta { diff --git a/internal/proxy/task_query.go b/internal/proxy/task_query.go index 18abb6ed0531b..212015b440330 100644 --- a/internal/proxy/task_query.go +++ b/internal/proxy/task_query.go @@ -607,9 +607,9 @@ func reduceRetrieveResults(ctx context.Context, retrieveResults []*internalpb.Re idSet := make(map[interface{}]struct{}) cursors := make([]int64, len(validRetrieveResults)) - retrieveLimit := typeutil.Unlimited if queryParams != nil && queryParams.limit != typeutil.Unlimited { - retrieveLimit = queryParams.limit + queryParams.offset + // reduceStopForBest will try to get as many results as possible + // so loopEnd in this case will be set to the sum of all results' size if !queryParams.reduceStopForBest { loopEnd = int(queryParams.limit) } @@ -618,7 +618,7 @@ func reduceRetrieveResults(ctx context.Context, retrieveResults []*internalpb.Re // handle offset if queryParams != nil && queryParams.offset > 0 { for i := int64(0); i < queryParams.offset; i++ { - sel, drainOneResult := typeutil.SelectMinPK(retrieveLimit, validRetrieveResults, cursors) + sel, drainOneResult := typeutil.SelectMinPK(validRetrieveResults, cursors) if sel == -1 || (queryParams.reduceStopForBest && drainOneResult) { return ret, nil } @@ -626,16 +626,11 @@ func reduceRetrieveResults(ctx context.Context, retrieveResults []*internalpb.Re } } - reduceStopForBest := false - if queryParams != nil { - reduceStopForBest = queryParams.reduceStopForBest - } - var retSize int64 maxOutputSize := paramtable.Get().QuotaConfig.MaxOutputSize.GetAsInt64() for j := 0; j < loopEnd; { - sel, drainOneResult := typeutil.SelectMinPK(retrieveLimit, validRetrieveResults, cursors) - if sel == -1 || (reduceStopForBest && drainOneResult) { + sel, drainOneResult := typeutil.SelectMinPK(validRetrieveResults, cursors) + if sel == -1 || (queryParams.reduceStopForBest && drainOneResult) { break } diff --git a/internal/proxy/task_query_test.go b/internal/proxy/task_query_test.go index 5112b53ac2552..9b62b9ece5240 100644 --- a/internal/proxy/task_query_test.go +++ b/internal/proxy/task_query_test.go @@ -479,8 +479,7 @@ func TestTaskQuery_functions(t *testing.T) { }, FieldsData: fieldDataArray2, } - - result, err := reduceRetrieveResults(context.Background(), []*internalpb.RetrieveResults{result1, result2}, nil) + result, err := reduceRetrieveResults(context.Background(), []*internalpb.RetrieveResults{result1, result2}, &queryParams{limit: 2}) assert.NoError(t, err) assert.Equal(t, 2, len(result.GetFieldsData())) assert.Equal(t, Int64Array, result.GetFieldsData()[0].GetScalars().GetLongData().Data) @@ -488,7 +487,7 @@ func TestTaskQuery_functions(t *testing.T) { }) t.Run("test nil results", func(t *testing.T) { - ret, err := reduceRetrieveResults(context.Background(), nil, nil) + ret, err := reduceRetrieveResults(context.Background(), nil, &queryParams{}) assert.NoError(t, err) assert.Empty(t, ret.GetFieldsData()) }) @@ -594,6 +593,8 @@ func TestTaskQuery_functions(t *testing.T) { }) t.Run("test stop reduce for best for limit", func(t *testing.T) { + r1.HasMoreResult = true + r2.HasMoreResult = false result, err := reduceRetrieveResults(context.Background(), []*internalpb.RetrieveResults{r1, r2}, &queryParams{limit: 2, reduceStopForBest: true}) @@ -605,6 +606,8 @@ func TestTaskQuery_functions(t *testing.T) { }) t.Run("test stop reduce for best for limit and offset", func(t *testing.T) { + r1.HasMoreResult = true + r2.HasMoreResult = true result, err := reduceRetrieveResults(context.Background(), []*internalpb.RetrieveResults{r1, r2}, &queryParams{limit: 1, offset: 1, reduceStopForBest: true}) @@ -614,6 +617,8 @@ func TestTaskQuery_functions(t *testing.T) { }) t.Run("test stop reduce for best for limit and offset", func(t *testing.T) { + r1.HasMoreResult = false + r2.HasMoreResult = true result, err := reduceRetrieveResults(context.Background(), []*internalpb.RetrieveResults{r1, r2}, &queryParams{limit: 2, offset: 1, reduceStopForBest: true}) @@ -625,6 +630,8 @@ func TestTaskQuery_functions(t *testing.T) { }) t.Run("test stop reduce for best for unlimited set", func(t *testing.T) { + r1.HasMoreResult = false + r2.HasMoreResult = false result, err := reduceRetrieveResults(context.Background(), []*internalpb.RetrieveResults{r1, r2}, &queryParams{limit: typeutil.Unlimited, reduceStopForBest: true}) @@ -635,7 +642,7 @@ func TestTaskQuery_functions(t *testing.T) { assert.InDeltaSlice(t, resultFloat[0:(len)*Dim], result.FieldsData[1].GetVectors().GetFloatVector().Data, 10e-10) }) - t.Run("test stop reduce for best for unlimited set amd pffset", func(t *testing.T) { + t.Run("test stop reduce for best for unlimited set amd offset", func(t *testing.T) { result, err := reduceRetrieveResults(context.Background(), []*internalpb.RetrieveResults{r1, r2}, &queryParams{limit: typeutil.Unlimited, offset: 3, reduceStopForBest: true}) diff --git a/internal/querynodev2/segments/result.go b/internal/querynodev2/segments/result.go index 34a001e6e6a84..0ac61d81c9bb7 100644 --- a/internal/querynodev2/segments/result.go +++ b/internal/querynodev2/segments/result.go @@ -401,6 +401,7 @@ func MergeInternalRetrieveResult(ctx context.Context, retrieveResults []*interna validRetrieveResults := []*internalpb.RetrieveResults{} relatedDataSize := int64(0) + hasMoreResult := false for _, r := range retrieveResults { ret.AllRetrieveCount += r.GetAllRetrieveCount() relatedDataSize += r.GetCostAggregation().GetTotalRelatedDataSize() @@ -410,7 +411,9 @@ func MergeInternalRetrieveResult(ctx context.Context, retrieveResults []*interna } validRetrieveResults = append(validRetrieveResults, r) loopEnd += size + hasMoreResult = hasMoreResult || r.GetHasMoreResult() } + ret.HasMoreResult = hasMoreResult if len(validRetrieveResults) == 0 { return ret, nil @@ -427,7 +430,7 @@ func MergeInternalRetrieveResult(ctx context.Context, retrieveResults []*interna var retSize int64 maxOutputSize := paramtable.Get().QuotaConfig.MaxOutputSize.GetAsInt64() for j := 0; j < loopEnd; { - sel, drainOneResult := typeutil.SelectMinPK(param.limit, validRetrieveResults, cursors) + sel, drainOneResult := typeutil.SelectMinPK(validRetrieveResults, cursors) if sel == -1 || (param.mergeStopForBest && drainOneResult) { break } @@ -515,6 +518,7 @@ func MergeSegcoreRetrieveResults(ctx context.Context, retrieveResults []*segcore validSegments := make([]Segment, 0, len(segments)) selectedOffsets := make([][]int64, 0, len(retrieveResults)) selectedIndexes := make([][]int64, 0, len(retrieveResults)) + hasMoreResult := false for i, r := range retrieveResults { size := typeutil.GetSizeOfIDs(r.GetIds()) ret.AllRetrieveCount += r.GetAllRetrieveCount() @@ -529,7 +533,9 @@ func MergeSegcoreRetrieveResults(ctx context.Context, retrieveResults []*segcore selectedOffsets = append(selectedOffsets, make([]int64, 0, len(r.GetOffset()))) selectedIndexes = append(selectedIndexes, make([]int64, 0, len(r.GetOffset()))) loopEnd += size + hasMoreResult = r.GetHasMoreResult() || hasMoreResult } + ret.HasMoreResult = hasMoreResult if len(validRetrieveResults) == 0 { return ret, nil @@ -549,7 +555,7 @@ func MergeSegcoreRetrieveResults(ctx context.Context, retrieveResults []*segcore var retSize int64 maxOutputSize := paramtable.Get().QuotaConfig.MaxOutputSize.GetAsInt64() for j := 0; j < loopEnd && (limit == -1 || availableCount < limit); j++ { - sel, drainOneResult := typeutil.SelectMinPK(param.limit, validRetrieveResults, cursors) + sel, drainOneResult := typeutil.SelectMinPK(validRetrieveResults, cursors) if sel == -1 || (param.mergeStopForBest && drainOneResult) { break } diff --git a/internal/querynodev2/segments/result_test.go b/internal/querynodev2/segments/result_test.go index 79e75007d670a..6fcaf4196584a 100644 --- a/internal/querynodev2/segments/result_test.go +++ b/internal/querynodev2/segments/result_test.go @@ -513,29 +513,46 @@ func (suite *ResultSuite) TestResult_MergeStopForBestResult() { FieldsData: fieldDataArray2, } suite.Run("merge stop finite limited", func() { + result1.HasMoreResult = true + result2.HasMoreResult = true result, err := MergeSegcoreRetrieveResultsV1(context.Background(), []*segcorepb.RetrieveResults{result1, result2}, NewMergeParam(3, make([]int64, 0), nil, true)) suite.NoError(err) suite.Equal(2, len(result.GetFieldsData())) + // has more result both, stop reduce when draining one result + // here, we can only get best result from 0 to 4 without 6, because result1 has more results suite.Equal([]int64{0, 1, 2, 3, 4}, result.GetIds().GetIntId().GetData()) - // here, we can only get best result from 0 to 4 without 6, because we can never know whether there is - // one potential 5 in following result1 suite.Equal([]int64{11, 22, 11, 22, 33}, result.GetFieldsData()[0].GetScalars().GetLongData().Data) suite.InDeltaSlice([]float32{1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 11, 22, 33, 44}, result.FieldsData[1].GetVectors().GetFloatVector().Data, 10e-10) }) suite.Run("merge stop unlimited", func() { + result1.HasMoreResult = false + result2.HasMoreResult = false result, err := MergeSegcoreRetrieveResultsV1(context.Background(), []*segcorepb.RetrieveResults{result1, result2}, NewMergeParam(typeutil.Unlimited, make([]int64, 0), nil, true)) suite.NoError(err) suite.Equal(2, len(result.GetFieldsData())) + // as result1 and result2 don't have better results neither + // we can reduce all available result into the reduced result suite.Equal([]int64{0, 1, 2, 3, 4, 6}, result.GetIds().GetIntId().GetData()) - // here, we can only get best result from 0 to 4 without 6, because we can never know whether there is - // one potential 5 in following result1 suite.Equal([]int64{11, 22, 11, 22, 33, 33}, result.GetFieldsData()[0].GetScalars().GetLongData().Data) suite.InDeltaSlice([]float32{1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 11, 22, 33, 44, 11, 22, 33, 44}, result.FieldsData[1].GetVectors().GetFloatVector().Data, 10e-10) }) + suite.Run("merge stop one limited", func() { + result1.HasMoreResult = true + result2.HasMoreResult = false + result, err := MergeSegcoreRetrieveResultsV1(context.Background(), []*segcorepb.RetrieveResults{result1, result2}, + NewMergeParam(typeutil.Unlimited, make([]int64, 0), nil, true)) + suite.NoError(err) + suite.Equal(2, len(result.GetFieldsData())) + // as result1 may have better results, stop reducing when draining it + suite.Equal([]int64{0, 1, 2, 3, 4}, result.GetIds().GetIntId().GetData()) + suite.Equal([]int64{11, 22, 11, 22, 33}, result.GetFieldsData()[0].GetScalars().GetLongData().Data) + suite.InDeltaSlice([]float32{1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 11, 22, 33, 44}, + result.FieldsData[1].GetVectors().GetFloatVector().Data, 10e-10) + }) }) suite.Run("test stop internal merge for best", func() { @@ -559,6 +576,8 @@ func (suite *ResultSuite) TestResult_MergeStopForBestResult() { }, FieldsData: fieldDataArray2, } + result1.HasMoreResult = true + result2.HasMoreResult = false result, err := MergeInternalRetrieveResult(context.Background(), []*internalpb.RetrieveResults{result1, result2}, NewMergeParam(3, make([]int64, 0), nil, true)) suite.NoError(err) @@ -590,11 +609,24 @@ func (suite *ResultSuite) TestResult_MergeStopForBestResult() { }, FieldsData: fieldDataArray2, } - result, err := MergeInternalRetrieveResult(context.Background(), []*internalpb.RetrieveResults{result1, result2}, - NewMergeParam(3, make([]int64, 0), nil, true)) - suite.NoError(err) - suite.Equal(2, len(result.GetFieldsData())) - suite.Equal([]int64{0, 2, 4, 7}, result.GetIds().GetIntId().GetData()) + suite.Run("test drain one result without more results", func() { + result1.HasMoreResult = false + result2.HasMoreResult = false + result, err := MergeInternalRetrieveResult(context.Background(), []*internalpb.RetrieveResults{result1, result2}, + NewMergeParam(3, make([]int64, 0), nil, true)) + suite.NoError(err) + suite.Equal(2, len(result.GetFieldsData())) + suite.Equal([]int64{0, 2, 4, 7}, result.GetIds().GetIntId().GetData()) + }) + suite.Run("test drain one result with more results", func() { + result1.HasMoreResult = false + result2.HasMoreResult = true + result, err := MergeInternalRetrieveResult(context.Background(), []*internalpb.RetrieveResults{result1, result2}, + NewMergeParam(3, make([]int64, 0), nil, true)) + suite.NoError(err) + suite.Equal(2, len(result.GetFieldsData())) + suite.Equal([]int64{0, 2}, result.GetIds().GetIntId().GetData()) + }) }) } diff --git a/internal/querynodev2/tasks/query_task.go b/internal/querynodev2/tasks/query_task.go index 831d782d34b09..d4b0ec5c8061e 100644 --- a/internal/querynodev2/tasks/query_task.go +++ b/internal/querynodev2/tasks/query_task.go @@ -160,6 +160,7 @@ func (t *QueryTask) Execute() error { TotalRelatedDataSize: relatedDataSize, }, AllRetrieveCount: reducedResult.GetAllRetrieveCount(), + HasMoreResult: reducedResult.HasMoreResult, } return nil } diff --git a/pkg/util/typeutil/schema.go b/pkg/util/typeutil/schema.go index fd29f632f7c89..dfa35f2109dec 100644 --- a/pkg/util/typeutil/schema.go +++ b/pkg/util/typeutil/schema.go @@ -1323,10 +1323,11 @@ func ComparePK(pkA, pkB interface{}) bool { type ResultWithID interface { GetIds() *schemapb.IDs + GetHasMoreResult() bool } // SelectMinPK select the index of the minPK in results T of the cursors. -func SelectMinPK[T ResultWithID](limit int64, results []T, cursors []int64) (int, bool) { +func SelectMinPK[T ResultWithID](results []T, cursors []int64) (int, bool) { var ( sel = -1 drainResult = false @@ -1336,8 +1337,9 @@ func SelectMinPK[T ResultWithID](limit int64, results []T, cursors []int64) (int minStrPK string ) for i, cursor := range cursors { - // if result size < limit, this means we should ignore the result from this segment - if int(cursor) >= GetSizeOfIDs(results[i].GetIds()) && (GetSizeOfIDs(results[i].GetIds()) == int(limit)) { + // if cursor has run out of all results from one result and this result has more matched results + // in this case we have tell reduce to stop because better results may be retrieved in the following iteration + if int(cursor) >= GetSizeOfIDs(results[i].GetIds()) && (results[i].GetHasMoreResult()) { drainResult = true continue } From 23dedc2cbf086e206d6cfa7febbc151e33b4f585 Mon Sep 17 00:00:00 2001 From: sre-ci-robot Date: Thu, 30 May 2024 12:01:00 +0000 Subject: [PATCH 101/126] Update all contributors Signed-off-by: sre-ci-robot --- README.md | 5 +++-- README_CN.md | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 4b05f41585111..f1566e7fd2719 100644 --- a/README.md +++ b/README.md @@ -172,7 +172,7 @@ Contributions to Milvus are welcome from everyone. See [Guidelines for Contribut ### All contributors
-
+
@@ -216,6 +216,7 @@ Contributions to Milvus are welcome from everyone. See [Guidelines for Contribut + @@ -389,7 +390,6 @@ Contributions to Milvus are welcome from everyone. See [Guidelines for Contribut - @@ -449,6 +449,7 @@ Contributions to Milvus are welcome from everyone. See [Guidelines for Contribut + diff --git a/README_CN.md b/README_CN.md index 7e81979848b36..e11333770fd7f 100644 --- a/README_CN.md +++ b/README_CN.md @@ -154,7 +154,7 @@ Milvus [训练营](https://github.com/milvus-io/bootcamp)能够帮助你了解 ### All contributors
-
+
@@ -198,6 +198,7 @@ Milvus [训练营](https://github.com/milvus-io/bootcamp)能够帮助你了解 + @@ -371,7 +372,6 @@ Milvus [训练营](https://github.com/milvus-io/bootcamp)能够帮助你了解 - @@ -431,6 +431,7 @@ Milvus [训练营](https://github.com/milvus-io/bootcamp)能够帮助你了解 + From 0c5d8660aa978dc97ded9ff2a7a5f3171353b7c8 Mon Sep 17 00:00:00 2001 From: Jiquan Long Date: Fri, 31 May 2024 09:47:47 +0800 Subject: [PATCH 102/126] feat: support inverted index for array (#33452) issue: https://github.com/milvus-io/milvus/issues/27704 --------- Signed-off-by: longjiquan --- .../src/exec/expression/JsonContainsExpr.cpp | 98 ++++- .../src/exec/expression/JsonContainsExpr.h | 7 + internal/core/src/expr/ITypeExpr.h | 17 +- internal/core/src/index/IndexFactory.cpp | 105 +++--- internal/core/src/index/IndexFactory.h | 16 +- .../core/src/index/InvertedIndexTantivy.cpp | 342 ++++++++---------- .../core/src/index/InvertedIndexTantivy.h | 18 +- internal/core/src/index/TantivyConfig.h | 51 --- internal/core/src/indexbuilder/IndexFactory.h | 1 + internal/core/src/indexbuilder/index_c.cpp | 3 +- internal/core/src/pb/CMakeLists.txt | 8 +- internal/core/src/segcore/Types.h | 1 + internal/core/src/segcore/load_index_c.cpp | 51 ++- internal/core/src/segcore/load_index_c.h | 5 + internal/core/src/storage/Types.h | 1 + .../core/thirdparty/tantivy/CMakeLists.txt | 6 + internal/core/thirdparty/tantivy/ffi_demo.cpp | 17 + .../tantivy-binding/include/tantivy-binding.h | 18 + .../tantivy/tantivy-binding/src/demo_c.rs | 14 + .../tantivy-binding/src/index_writer.rs | 76 +++- .../tantivy-binding/src/index_writer_c.rs | 74 ++++ .../tantivy/tantivy-binding/src/lib.rs | 1 + .../core/thirdparty/tantivy/tantivy-wrapper.h | 65 ++++ internal/core/thirdparty/tantivy/test.cpp | 74 ++++ .../core/unittest/test_inverted_index.cpp | 25 +- internal/core/unittest/test_scalar_index.cpp | 28 +- internal/proto/cgo_msg.proto | 23 ++ .../querynodev2/segments/load_index_info.go | 32 ++ internal/querynodev2/segments/segment.go | 50 ++- pkg/util/indexparamcheck/inverted_checker.go | 3 +- .../indexparamcheck/inverted_checker_test.go | 2 +- scripts/generate_proto.sh | 3 + tests/python_client/testcases/test_index.py | 5 +- 33 files changed, 875 insertions(+), 365 deletions(-) delete mode 100644 internal/core/src/index/TantivyConfig.h create mode 100644 internal/core/thirdparty/tantivy/ffi_demo.cpp create mode 100644 internal/core/thirdparty/tantivy/tantivy-binding/src/demo_c.rs create mode 100644 internal/proto/cgo_msg.proto diff --git a/internal/core/src/exec/expression/JsonContainsExpr.cpp b/internal/core/src/exec/expression/JsonContainsExpr.cpp index 72251c301fb14..bbcc852c2a8e2 100644 --- a/internal/core/src/exec/expression/JsonContainsExpr.cpp +++ b/internal/core/src/exec/expression/JsonContainsExpr.cpp @@ -23,7 +23,14 @@ namespace exec { void PhyJsonContainsFilterExpr::Eval(EvalCtx& context, VectorPtr& result) { switch (expr_->column_.data_type_) { - case DataType::ARRAY: + case DataType::ARRAY: { + if (is_index_mode_) { + result = EvalArrayContainsForIndexSegment(); + } else { + result = EvalJsonContainsForDataSegment(); + } + break; + } case DataType::JSON: { if (is_index_mode_) { PanicInfo( @@ -94,7 +101,6 @@ PhyJsonContainsFilterExpr::EvalJsonContainsForDataSegment() { return ExecJsonContainsWithDiffType(); } } - break; } case proto::plan::JSONContainsExpr_JSONOp_ContainsAll: { if (IsArrayDataType(data_type)) { @@ -145,7 +151,6 @@ PhyJsonContainsFilterExpr::EvalJsonContainsForDataSegment() { return ExecJsonContainsAllWithDiffType(); } } - break; } default: PanicInfo(ExprInvalid, @@ -748,5 +753,92 @@ PhyJsonContainsFilterExpr::ExecJsonContainsWithDiffType() { return res_vec; } +VectorPtr +PhyJsonContainsFilterExpr::EvalArrayContainsForIndexSegment() { + switch (expr_->column_.element_type_) { + case DataType::BOOL: { + return ExecArrayContainsForIndexSegmentImpl(); + } + case DataType::INT8: { + return ExecArrayContainsForIndexSegmentImpl(); + } + case DataType::INT16: { + return ExecArrayContainsForIndexSegmentImpl(); + } + case DataType::INT32: { + return ExecArrayContainsForIndexSegmentImpl(); + } + case DataType::INT64: { + return ExecArrayContainsForIndexSegmentImpl(); + } + case DataType::FLOAT: { + return ExecArrayContainsForIndexSegmentImpl(); + } + case DataType::DOUBLE: { + return ExecArrayContainsForIndexSegmentImpl(); + } + case DataType::VARCHAR: + case DataType::STRING: { + return ExecArrayContainsForIndexSegmentImpl(); + } + default: + PanicInfo(DataTypeInvalid, + fmt::format("unsupported data type for " + "ExecArrayContainsForIndexSegmentImpl: {}", + expr_->column_.element_type_)); + } +} + +template +VectorPtr +PhyJsonContainsFilterExpr::ExecArrayContainsForIndexSegmentImpl() { + typedef std::conditional_t, + std::string, + ExprValueType> + GetType; + using Index = index::ScalarIndex; + auto real_batch_size = GetNextBatchSize(); + if (real_batch_size == 0) { + return nullptr; + } + + std::unordered_set elements; + for (auto const& element : expr_->vals_) { + elements.insert(GetValueFromProto(element)); + } + boost::container::vector elems(elements.begin(), elements.end()); + auto execute_sub_batch = + [this](Index* index_ptr, + const boost::container::vector& vals) { + switch (expr_->op_) { + case proto::plan::JSONContainsExpr_JSONOp_Contains: + case proto::plan::JSONContainsExpr_JSONOp_ContainsAny: { + return index_ptr->In(vals.size(), vals.data()); + } + case proto::plan::JSONContainsExpr_JSONOp_ContainsAll: { + TargetBitmap result(index_ptr->Count()); + result.set(); + for (size_t i = 0; i < vals.size(); i++) { + auto sub = index_ptr->In(1, &vals[i]); + result &= sub; + } + return result; + } + default: + PanicInfo( + ExprInvalid, + "unsupported array contains type {}", + proto::plan::JSONContainsExpr_JSONOp_Name(expr_->op_)); + } + }; + auto res = ProcessIndexChunks(execute_sub_batch, elems); + AssertInfo(res.size() == real_batch_size, + "internal error: expr processed rows {} not equal " + "expect batch size {}", + res.size(), + real_batch_size); + return std::make_shared(std::move(res)); +} + } //namespace exec } // namespace milvus diff --git a/internal/core/src/exec/expression/JsonContainsExpr.h b/internal/core/src/exec/expression/JsonContainsExpr.h index c757dc0d3fb92..a0cfdfdea0841 100644 --- a/internal/core/src/exec/expression/JsonContainsExpr.h +++ b/internal/core/src/exec/expression/JsonContainsExpr.h @@ -80,6 +80,13 @@ class PhyJsonContainsFilterExpr : public SegmentExpr { VectorPtr ExecJsonContainsWithDiffType(); + VectorPtr + EvalArrayContainsForIndexSegment(); + + template + VectorPtr + ExecArrayContainsForIndexSegmentImpl(); + private: std::shared_ptr expr_; }; diff --git a/internal/core/src/expr/ITypeExpr.h b/internal/core/src/expr/ITypeExpr.h index 102709aa16b83..6716f8af2f66f 100644 --- a/internal/core/src/expr/ITypeExpr.h +++ b/internal/core/src/expr/ITypeExpr.h @@ -113,11 +113,13 @@ IsMaterializedViewSupported(const DataType& data_type) { struct ColumnInfo { FieldId field_id_; DataType data_type_; + DataType element_type_; std::vector nested_path_; ColumnInfo(const proto::plan::ColumnInfo& column_info) : field_id_(column_info.field_id()), data_type_(static_cast(column_info.data_type())), + element_type_(static_cast(column_info.element_type())), nested_path_(column_info.nested_path().begin(), column_info.nested_path().end()) { } @@ -127,6 +129,7 @@ struct ColumnInfo { std::vector nested_path = {}) : field_id_(field_id), data_type_(data_type), + element_type_(DataType::NONE), nested_path_(std::move(nested_path)) { } @@ -140,6 +143,10 @@ struct ColumnInfo { return false; } + if (element_type_ != other.element_type_) { + return false; + } + for (int i = 0; i < nested_path_.size(); ++i) { if (nested_path_[i] != other.nested_path_[i]) { return false; @@ -151,10 +158,12 @@ struct ColumnInfo { std::string ToString() const { - return fmt::format("[FieldId:{}, data_type:{}, nested_path:{}]", - std::to_string(field_id_.get()), - data_type_, - milvus::Join(nested_path_, ",")); + return fmt::format( + "[FieldId:{}, data_type:{}, element_type:{}, nested_path:{}]", + std::to_string(field_id_.get()), + data_type_, + element_type_, + milvus::Join(nested_path_, ",")); } }; diff --git a/internal/core/src/index/IndexFactory.cpp b/internal/core/src/index/IndexFactory.cpp index 79409056d980e..cc660324f1b5a 100644 --- a/internal/core/src/index/IndexFactory.cpp +++ b/internal/core/src/index/IndexFactory.cpp @@ -35,13 +35,9 @@ template ScalarIndexPtr IndexFactory::CreateScalarIndex( const IndexType& index_type, - const storage::FileManagerContext& file_manager_context, - DataType d_type) { + const storage::FileManagerContext& file_manager_context) { if (index_type == INVERTED_INDEX_TYPE) { - TantivyConfig cfg; - cfg.data_type_ = d_type; - return std::make_unique>(cfg, - file_manager_context); + return std::make_unique>(file_manager_context); } if (index_type == BITMAP_INDEX_TYPE) { return std::make_unique>(file_manager_context); @@ -60,14 +56,11 @@ template <> ScalarIndexPtr IndexFactory::CreateScalarIndex( const IndexType& index_type, - const storage::FileManagerContext& file_manager_context, - DataType d_type) { + const storage::FileManagerContext& file_manager_context) { #if defined(__linux__) || defined(__APPLE__) if (index_type == INVERTED_INDEX_TYPE) { - TantivyConfig cfg; - cfg.data_type_ = d_type; return std::make_unique>( - cfg, file_manager_context); + file_manager_context); } if (index_type == BITMAP_INDEX_TYPE) { return std::make_unique>( @@ -84,13 +77,10 @@ ScalarIndexPtr IndexFactory::CreateScalarIndex( const IndexType& index_type, const storage::FileManagerContext& file_manager_context, - std::shared_ptr space, - DataType d_type) { + std::shared_ptr space) { if (index_type == INVERTED_INDEX_TYPE) { - TantivyConfig cfg; - cfg.data_type_ = d_type; - return std::make_unique>( - cfg, file_manager_context, space); + return std::make_unique>(file_manager_context, + space); } if (index_type == BITMAP_INDEX_TYPE) { return std::make_unique>(file_manager_context, @@ -104,14 +94,11 @@ ScalarIndexPtr IndexFactory::CreateScalarIndex( const IndexType& index_type, const storage::FileManagerContext& file_manager_context, - std::shared_ptr space, - DataType d_type) { + std::shared_ptr space) { #if defined(__linux__) || defined(__APPLE__) if (index_type == INVERTED_INDEX_TYPE) { - TantivyConfig cfg; - cfg.data_type_ = d_type; return std::make_unique>( - cfg, file_manager_context, space); + file_manager_context, space); } if (index_type == BITMAP_INDEX_TYPE) { return std::make_unique>( @@ -148,41 +135,32 @@ IndexFactory::CreateIndex( } IndexBasePtr -IndexFactory::CreateScalarIndex( - const CreateIndexInfo& create_index_info, +IndexFactory::CreatePrimitiveScalarIndex( + DataType data_type, + IndexType index_type, const storage::FileManagerContext& file_manager_context) { - auto data_type = create_index_info.field_type; - auto index_type = create_index_info.index_type; - switch (data_type) { // create scalar index case DataType::BOOL: - return CreateScalarIndex( - index_type, file_manager_context, data_type); + return CreateScalarIndex(index_type, file_manager_context); case DataType::INT8: - return CreateScalarIndex( - index_type, file_manager_context, data_type); + return CreateScalarIndex(index_type, file_manager_context); case DataType::INT16: - return CreateScalarIndex( - index_type, file_manager_context, data_type); + return CreateScalarIndex(index_type, file_manager_context); case DataType::INT32: - return CreateScalarIndex( - index_type, file_manager_context, data_type); + return CreateScalarIndex(index_type, file_manager_context); case DataType::INT64: - return CreateScalarIndex( - index_type, file_manager_context, data_type); + return CreateScalarIndex(index_type, file_manager_context); case DataType::FLOAT: - return CreateScalarIndex( - index_type, file_manager_context, data_type); + return CreateScalarIndex(index_type, file_manager_context); case DataType::DOUBLE: - return CreateScalarIndex( - index_type, file_manager_context, data_type); + return CreateScalarIndex(index_type, file_manager_context); // create string index case DataType::STRING: case DataType::VARCHAR: - return CreateScalarIndex( - index_type, file_manager_context, data_type); + return CreateScalarIndex(index_type, + file_manager_context); default: throw SegcoreError( DataTypeInvalid, @@ -190,6 +168,24 @@ IndexFactory::CreateScalarIndex( } } +IndexBasePtr +IndexFactory::CreateScalarIndex( + const CreateIndexInfo& create_index_info, + const storage::FileManagerContext& file_manager_context) { + switch (create_index_info.field_type) { + case DataType::ARRAY: + return CreatePrimitiveScalarIndex( + static_cast( + file_manager_context.fieldDataMeta.schema.element_type()), + create_index_info.index_type, + file_manager_context); + default: + return CreatePrimitiveScalarIndex(create_index_info.field_type, + create_index_info.index_type, + file_manager_context); + } +} + IndexBasePtr IndexFactory::CreateVectorIndex( const CreateIndexInfo& create_index_info, @@ -257,32 +253,25 @@ IndexFactory::CreateScalarIndex(const CreateIndexInfo& create_index_info, switch (data_type) { // create scalar index case DataType::BOOL: - return CreateScalarIndex( - index_type, file_manager, space, data_type); + return CreateScalarIndex(index_type, file_manager, space); case DataType::INT8: - return CreateScalarIndex( - index_type, file_manager, space, data_type); + return CreateScalarIndex(index_type, file_manager, space); case DataType::INT16: - return CreateScalarIndex( - index_type, file_manager, space, data_type); + return CreateScalarIndex(index_type, file_manager, space); case DataType::INT32: - return CreateScalarIndex( - index_type, file_manager, space, data_type); + return CreateScalarIndex(index_type, file_manager, space); case DataType::INT64: - return CreateScalarIndex( - index_type, file_manager, space, data_type); + return CreateScalarIndex(index_type, file_manager, space); case DataType::FLOAT: - return CreateScalarIndex( - index_type, file_manager, space, data_type); + return CreateScalarIndex(index_type, file_manager, space); case DataType::DOUBLE: - return CreateScalarIndex( - index_type, file_manager, space, data_type); + return CreateScalarIndex(index_type, file_manager, space); // create string index case DataType::STRING: case DataType::VARCHAR: return CreateScalarIndex( - index_type, file_manager, space, data_type); + index_type, file_manager, space); default: throw SegcoreError( DataTypeInvalid, diff --git a/internal/core/src/index/IndexFactory.h b/internal/core/src/index/IndexFactory.h index 75bd090292907..47b255ab4e912 100644 --- a/internal/core/src/index/IndexFactory.h +++ b/internal/core/src/index/IndexFactory.h @@ -65,6 +65,13 @@ class IndexFactory { CreateVectorIndex(const CreateIndexInfo& create_index_info, const storage::FileManagerContext& file_manager_context); + IndexBasePtr + CreatePrimitiveScalarIndex( + DataType data_type, + IndexType index_type, + const storage::FileManagerContext& file_manager_context = + storage::FileManagerContext()); + IndexBasePtr CreateScalarIndex(const CreateIndexInfo& create_index_info, const storage::FileManagerContext& file_manager_context = @@ -89,15 +96,13 @@ class IndexFactory { ScalarIndexPtr CreateScalarIndex(const IndexType& index_type, const storage::FileManagerContext& file_manager = - storage::FileManagerContext(), - DataType d_type = DataType::NONE); + storage::FileManagerContext()); template ScalarIndexPtr CreateScalarIndex(const IndexType& index_type, const storage::FileManagerContext& file_manager, - std::shared_ptr space, - DataType d_type = DataType::NONE); + std::shared_ptr space); }; // template <> @@ -112,6 +117,5 @@ ScalarIndexPtr IndexFactory::CreateScalarIndex( const IndexType& index_type, const storage::FileManagerContext& file_manager_context, - std::shared_ptr space, - DataType d_type); + std::shared_ptr space); } // namespace milvus::index diff --git a/internal/core/src/index/InvertedIndexTantivy.cpp b/internal/core/src/index/InvertedIndexTantivy.cpp index 2c212704aaf49..f09297dd33269 100644 --- a/internal/core/src/index/InvertedIndexTantivy.cpp +++ b/internal/core/src/index/InvertedIndexTantivy.cpp @@ -23,12 +23,50 @@ #include "InvertedIndexTantivy.h" namespace milvus::index { +inline TantivyDataType +get_tantivy_data_type(proto::schema::DataType data_type) { + switch (data_type) { + case proto::schema::DataType::Bool: { + return TantivyDataType::Bool; + } + + case proto::schema::DataType::Int8: + case proto::schema::DataType::Int16: + case proto::schema::DataType::Int32: + case proto::schema::DataType::Int64: { + return TantivyDataType::I64; + } + + case proto::schema::DataType::Float: + case proto::schema::DataType::Double: { + return TantivyDataType::F64; + } + + case proto::schema::DataType::VarChar: { + return TantivyDataType::Keyword; + } + + default: + PanicInfo(ErrorCode::NotImplemented, + fmt::format("not implemented data type: {}", data_type)); + } +} + +inline TantivyDataType +get_tantivy_data_type(const proto::schema::FieldSchema& schema) { + switch (schema.data_type()) { + case proto::schema::Array: + return get_tantivy_data_type(schema.element_type()); + default: + return get_tantivy_data_type(schema.data_type()); + } +} + template InvertedIndexTantivy::InvertedIndexTantivy( - const TantivyConfig& cfg, const storage::FileManagerContext& ctx, std::shared_ptr space) - : cfg_(cfg), space_(space) { + : space_(space), schema_(ctx.fieldDataMeta.schema) { mem_file_manager_ = std::make_shared(ctx, ctx.space_); disk_file_manager_ = std::make_shared(ctx, ctx.space_); auto field = @@ -36,7 +74,7 @@ InvertedIndexTantivy::InvertedIndexTantivy( auto prefix = disk_file_manager_->GetLocalIndexObjectPrefix(); path_ = prefix; boost::filesystem::create_directories(path_); - d_type_ = cfg_.to_tantivy_data_type(); + d_type_ = get_tantivy_data_type(schema_); if (tantivy_index_exist(path_.c_str())) { LOG_INFO( "index {} already exists, which should happen in loading progress", @@ -114,83 +152,7 @@ InvertedIndexTantivy::Build(const Config& config) { AssertInfo(insert_files.has_value(), "insert_files were empty"); auto field_datas = mem_file_manager_->CacheRawDataToMemory(insert_files.value()); - switch (cfg_.data_type_) { - case DataType::BOOL: { - for (const auto& data : field_datas) { - auto n = data->get_num_rows(); - wrapper_->add_data(static_cast(data->Data()), - n); - } - break; - } - - case DataType::INT8: { - for (const auto& data : field_datas) { - auto n = data->get_num_rows(); - wrapper_->add_data( - static_cast(data->Data()), n); - } - break; - } - - case DataType::INT16: { - for (const auto& data : field_datas) { - auto n = data->get_num_rows(); - wrapper_->add_data( - static_cast(data->Data()), n); - } - break; - } - - case DataType::INT32: { - for (const auto& data : field_datas) { - auto n = data->get_num_rows(); - wrapper_->add_data( - static_cast(data->Data()), n); - } - break; - } - - case DataType::INT64: { - for (const auto& data : field_datas) { - auto n = data->get_num_rows(); - wrapper_->add_data( - static_cast(data->Data()), n); - } - break; - } - - case DataType::FLOAT: { - for (const auto& data : field_datas) { - auto n = data->get_num_rows(); - wrapper_->add_data( - static_cast(data->Data()), n); - } - break; - } - - case DataType::DOUBLE: { - for (const auto& data : field_datas) { - auto n = data->get_num_rows(); - wrapper_->add_data( - static_cast(data->Data()), n); - } - break; - } - - case DataType::VARCHAR: { - for (const auto& data : field_datas) { - auto n = data->get_num_rows(); - wrapper_->add_data( - static_cast(data->Data()), n); - } - break; - } - - default: - PanicInfo(ErrorCode::NotImplemented, - fmt::format("todo: not supported, {}", cfg_.data_type_)); - } + build_index(field_datas); } template @@ -211,84 +173,7 @@ InvertedIndexTantivy::BuildV2(const Config& config) { field_data->FillFieldData(col_data); field_datas.push_back(field_data); } - - switch (cfg_.data_type_) { - case DataType::BOOL: { - for (const auto& data : field_datas) { - auto n = data->get_num_rows(); - wrapper_->add_data(static_cast(data->Data()), - n); - } - break; - } - - case DataType::INT8: { - for (const auto& data : field_datas) { - auto n = data->get_num_rows(); - wrapper_->add_data( - static_cast(data->Data()), n); - } - break; - } - - case DataType::INT16: { - for (const auto& data : field_datas) { - auto n = data->get_num_rows(); - wrapper_->add_data( - static_cast(data->Data()), n); - } - break; - } - - case DataType::INT32: { - for (const auto& data : field_datas) { - auto n = data->get_num_rows(); - wrapper_->add_data( - static_cast(data->Data()), n); - } - break; - } - - case DataType::INT64: { - for (const auto& data : field_datas) { - auto n = data->get_num_rows(); - wrapper_->add_data( - static_cast(data->Data()), n); - } - break; - } - - case DataType::FLOAT: { - for (const auto& data : field_datas) { - auto n = data->get_num_rows(); - wrapper_->add_data( - static_cast(data->Data()), n); - } - break; - } - - case DataType::DOUBLE: { - for (const auto& data : field_datas) { - auto n = data->get_num_rows(); - wrapper_->add_data( - static_cast(data->Data()), n); - } - break; - } - - case DataType::VARCHAR: { - for (const auto& data : field_datas) { - auto n = data->get_num_rows(); - wrapper_->add_data( - static_cast(data->Data()), n); - } - break; - } - - default: - PanicInfo(ErrorCode::NotImplemented, - fmt::format("todo: not supported, {}", cfg_.data_type_)); - } + build_index(field_datas); } template @@ -333,7 +218,8 @@ InvertedIndexTantivy::In(size_t n, const T* values) { template const TargetBitmap InvertedIndexTantivy::NotIn(size_t n, const T* values) { - TargetBitmap bitset(Count(), true); + TargetBitmap bitset(Count()); + bitset.set(); for (size_t i = 0; i < n; ++i) { auto array = wrapper_->term_query(values[i]); apply_hits(bitset, array, false); @@ -425,51 +311,107 @@ void InvertedIndexTantivy::BuildWithRawData(size_t n, const void* values, const Config& config) { - if constexpr (!std::is_same_v) { - TantivyConfig cfg; - if constexpr (std::is_same_v) { - cfg.data_type_ = DataType::INT8; - } - if constexpr (std::is_same_v) { - cfg.data_type_ = DataType::INT16; + if constexpr (std::is_same_v) { + schema_.set_data_type(proto::schema::DataType::Int8); + } + if constexpr (std::is_same_v) { + schema_.set_data_type(proto::schema::DataType::Int16); + } + if constexpr (std::is_same_v) { + schema_.set_data_type(proto::schema::DataType::Int32); + } + if constexpr (std::is_same_v) { + schema_.set_data_type(proto::schema::DataType::Int64); + } + if constexpr (std::is_same_v) { + schema_.set_data_type(proto::schema::DataType::Float); + } + if constexpr (std::is_same_v) { + schema_.set_data_type(proto::schema::DataType::Double); + } + if constexpr (std::is_same_v) { + schema_.set_data_type(proto::schema::DataType::VarChar); + } + boost::uuids::random_generator generator; + auto uuid = generator(); + auto prefix = boost::uuids::to_string(uuid); + path_ = fmt::format("/tmp/{}", prefix); + boost::filesystem::create_directories(path_); + d_type_ = get_tantivy_data_type(schema_); + std::string field = "test_inverted_index"; + wrapper_ = std::make_shared( + field.c_str(), d_type_, path_.c_str()); + wrapper_->add_data(static_cast(values), n); + finish(); +} + +template +void +InvertedIndexTantivy::build_index( + const std::vector>& field_datas) { + switch (schema_.data_type()) { + case proto::schema::DataType::Bool: + case proto::schema::DataType::Int8: + case proto::schema::DataType::Int16: + case proto::schema::DataType::Int32: + case proto::schema::DataType::Int64: + case proto::schema::DataType::Float: + case proto::schema::DataType::Double: + case proto::schema::DataType::String: + case proto::schema::DataType::VarChar: { + for (const auto& data : field_datas) { + auto n = data->get_num_rows(); + wrapper_->add_data(static_cast(data->Data()), n); + } + break; } - if constexpr (std::is_same_v) { - cfg.data_type_ = DataType::INT32; + + case proto::schema::DataType::Array: { + build_index_for_array(field_datas); + break; } - if constexpr (std::is_same_v) { - cfg.data_type_ = DataType::INT64; + + default: + PanicInfo(ErrorCode::NotImplemented, + fmt::format("Inverted index not supported on {}", + schema_.data_type())); + } +} + +template +void +InvertedIndexTantivy::build_index_for_array( + const std::vector>& field_datas) { + for (const auto& data : field_datas) { + auto n = data->get_num_rows(); + auto array_column = static_cast(data->Data()); + for (int64_t i = 0; i < n; i++) { + assert(array_column[i].get_element_type() == + static_cast(schema_.element_type())); + wrapper_->template add_multi_data( + reinterpret_cast(array_column[i].data()), + array_column[i].length()); } - if constexpr (std::is_same_v) { - cfg.data_type_ = DataType::VARCHAR; + } +} + +template <> +void +InvertedIndexTantivy::build_index_for_array( + const std::vector>& field_datas) { + for (const auto& data : field_datas) { + auto n = data->get_num_rows(); + auto array_column = static_cast(data->Data()); + for (int64_t i = 0; i < n; i++) { + assert(array_column[i].get_element_type() == + static_cast(schema_.element_type())); + std::vector output; + for (int64_t j = 0; j < array_column[i].length(); j++) { + output.push_back( + array_column[i].template get_data(j)); + } + wrapper_->template add_multi_data(output.data(), output.size()); } - boost::uuids::random_generator generator; - auto uuid = generator(); - auto prefix = boost::uuids::to_string(uuid); - path_ = fmt::format("/tmp/{}", prefix); - boost::filesystem::create_directories(path_); - cfg_ = cfg; - d_type_ = cfg_.to_tantivy_data_type(); - std::string field = "test_inverted_index"; - wrapper_ = std::make_shared( - field.c_str(), d_type_, path_.c_str()); - wrapper_->add_data(static_cast(values), n); - finish(); - } else { - boost::uuids::random_generator generator; - auto uuid = generator(); - auto prefix = boost::uuids::to_string(uuid); - path_ = fmt::format("/tmp/{}", prefix); - boost::filesystem::create_directories(path_); - cfg_ = TantivyConfig{ - .data_type_ = DataType::VARCHAR, - }; - d_type_ = cfg_.to_tantivy_data_type(); - std::string field = "test_inverted_index"; - wrapper_ = std::make_shared( - field.c_str(), d_type_, path_.c_str()); - wrapper_->add_data(static_cast(values), - n); - finish(); } } diff --git a/internal/core/src/index/InvertedIndexTantivy.h b/internal/core/src/index/InvertedIndexTantivy.h index 0ea2f64d869d3..cc0178804c343 100644 --- a/internal/core/src/index/InvertedIndexTantivy.h +++ b/internal/core/src/index/InvertedIndexTantivy.h @@ -18,7 +18,6 @@ #include "tantivy-binding.h" #include "tantivy-wrapper.h" #include "index/StringIndex.h" -#include "index/TantivyConfig.h" #include "storage/space.h" namespace milvus::index { @@ -36,13 +35,11 @@ class InvertedIndexTantivy : public ScalarIndex { InvertedIndexTantivy() = default; - explicit InvertedIndexTantivy(const TantivyConfig& cfg, - const storage::FileManagerContext& ctx) - : InvertedIndexTantivy(cfg, ctx, nullptr) { + explicit InvertedIndexTantivy(const storage::FileManagerContext& ctx) + : InvertedIndexTantivy(ctx, nullptr) { } - explicit InvertedIndexTantivy(const TantivyConfig& cfg, - const storage::FileManagerContext& ctx, + explicit InvertedIndexTantivy(const storage::FileManagerContext& ctx, std::shared_ptr space); ~InvertedIndexTantivy(); @@ -160,11 +157,18 @@ class InvertedIndexTantivy : public ScalarIndex { void finish(); + void + build_index(const std::vector>& field_datas); + + void + build_index_for_array( + const std::vector>& field_datas); + private: std::shared_ptr wrapper_; - TantivyConfig cfg_; TantivyDataType d_type_; std::string path_; + proto::schema::FieldSchema schema_; /* * To avoid IO amplification, we use both mem file manager & disk file manager diff --git a/internal/core/src/index/TantivyConfig.h b/internal/core/src/index/TantivyConfig.h deleted file mode 100644 index 355b4c76efc9d..0000000000000 --- a/internal/core/src/index/TantivyConfig.h +++ /dev/null @@ -1,51 +0,0 @@ -// Copyright (C) 2019-2020 Zilliz. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software distributed under the License -// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express -// or implied. See the License for the specific language governing permissions and limitations under the License - -#pragma once - -#include "storage/Types.h" -#include "tantivy-binding.h" - -namespace milvus::index { -struct TantivyConfig { - DataType data_type_; - - TantivyDataType - to_tantivy_data_type() { - switch (data_type_) { - case DataType::BOOL: { - return TantivyDataType::Bool; - } - - case DataType::INT8: - case DataType::INT16: - case DataType::INT32: - case DataType::INT64: { - return TantivyDataType::I64; - } - - case DataType::FLOAT: - case DataType::DOUBLE: { - return TantivyDataType::F64; - } - - case DataType::VARCHAR: { - return TantivyDataType::Keyword; - } - - default: - PanicInfo( - ErrorCode::NotImplemented, - fmt::format("not implemented data type: {}", data_type_)); - } - } -}; -} // namespace milvus::index \ No newline at end of file diff --git a/internal/core/src/indexbuilder/IndexFactory.h b/internal/core/src/indexbuilder/IndexFactory.h index cd361499b4065..1380a6e9817d3 100644 --- a/internal/core/src/indexbuilder/IndexFactory.h +++ b/internal/core/src/indexbuilder/IndexFactory.h @@ -60,6 +60,7 @@ class IndexFactory { case DataType::DOUBLE: case DataType::VARCHAR: case DataType::STRING: + case DataType::ARRAY: return CreateScalarIndex(type, config, context); case DataType::VECTOR_FLOAT: diff --git a/internal/core/src/indexbuilder/index_c.cpp b/internal/core/src/indexbuilder/index_c.cpp index ae319cc26d61f..7ccaf7c414a24 100644 --- a/internal/core/src/indexbuilder/index_c.cpp +++ b/internal/core/src/indexbuilder/index_c.cpp @@ -190,7 +190,8 @@ CreateIndex(CIndex* res_index, build_index_info->collectionid(), build_index_info->partitionid(), build_index_info->segmentid(), - build_index_info->field_schema().fieldid()}; + build_index_info->field_schema().fieldid(), + build_index_info->field_schema()}; milvus::storage::IndexMeta index_meta{ build_index_info->segmentid(), diff --git a/internal/core/src/pb/CMakeLists.txt b/internal/core/src/pb/CMakeLists.txt index 3c00203cf4c25..35726d9c24c65 100644 --- a/internal/core/src/pb/CMakeLists.txt +++ b/internal/core/src/pb/CMakeLists.txt @@ -11,12 +11,10 @@ find_package(Protobuf REQUIRED) +file(GLOB_RECURSE milvus_proto_srcs + "${CMAKE_CURRENT_SOURCE_DIR}/*.cc") add_library(milvus_proto STATIC - common.pb.cc - index_cgo_msg.pb.cc - plan.pb.cc - schema.pb.cc - segcore.pb.cc + ${milvus_proto_srcs} ) message(STATUS "milvus proto sources: " ${milvus_proto_srcs}) diff --git a/internal/core/src/segcore/Types.h b/internal/core/src/segcore/Types.h index 73ba7fcb188b6..106799ce2610f 100644 --- a/internal/core/src/segcore/Types.h +++ b/internal/core/src/segcore/Types.h @@ -46,6 +46,7 @@ struct LoadIndexInfo { std::string uri; int64_t index_store_version; IndexVersion index_engine_version; + proto::schema::FieldSchema schema; }; } // namespace milvus::segcore diff --git a/internal/core/src/segcore/load_index_c.cpp b/internal/core/src/segcore/load_index_c.cpp index 7f851948545d3..3df3a92879751 100644 --- a/internal/core/src/segcore/load_index_c.cpp +++ b/internal/core/src/segcore/load_index_c.cpp @@ -25,6 +25,7 @@ #include "storage/Util.h" #include "storage/RemoteChunkManagerSingleton.h" #include "storage/LocalChunkManagerSingleton.h" +#include "pb/cgo_msg.pb.h" bool IsLoadWithDisk(const char* index_type, int index_engine_version) { @@ -258,7 +259,8 @@ AppendIndexV2(CTraceContext c_trace, CLoadIndexInfo c_load_index_info) { load_index_info->collection_id, load_index_info->partition_id, load_index_info->segment_id, - load_index_info->field_id}; + load_index_info->field_id, + load_index_info->schema}; milvus::storage::IndexMeta index_meta{load_index_info->segment_id, load_index_info->field_id, load_index_info->index_build_id, @@ -484,3 +486,50 @@ AppendStorageInfo(CLoadIndexInfo c_load_index_info, load_index_info->uri = uri; load_index_info->index_store_version = version; } + +CStatus +FinishLoadIndexInfo(CLoadIndexInfo c_load_index_info, + const uint8_t* serialized_load_index_info, + const uint64_t len) { + try { + auto info_proto = std::make_unique(); + info_proto->ParseFromArray(serialized_load_index_info, len); + auto load_index_info = + static_cast(c_load_index_info); + // TODO: keep this since LoadIndexInfo is used by SegmentSealed. + { + load_index_info->collection_id = info_proto->collectionid(); + load_index_info->partition_id = info_proto->partitionid(); + load_index_info->segment_id = info_proto->segmentid(); + load_index_info->field_id = info_proto->field().fieldid(); + load_index_info->field_type = + static_cast(info_proto->field().data_type()); + load_index_info->enable_mmap = info_proto->enable_mmap(); + load_index_info->mmap_dir_path = info_proto->mmap_dir_path(); + load_index_info->index_id = info_proto->indexid(); + load_index_info->index_build_id = info_proto->index_buildid(); + load_index_info->index_version = info_proto->index_version(); + for (const auto& [k, v] : info_proto->index_params()) { + load_index_info->index_params[k] = v; + } + load_index_info->index_files.assign( + info_proto->index_files().begin(), + info_proto->index_files().end()); + load_index_info->uri = info_proto->uri(); + load_index_info->index_store_version = + info_proto->index_store_version(); + load_index_info->index_engine_version = + info_proto->index_engine_version(); + load_index_info->schema = info_proto->field(); + } + auto status = CStatus(); + status.error_code = milvus::Success; + status.error_msg = ""; + return status; + } catch (std::exception& e) { + auto status = CStatus(); + status.error_code = milvus::UnexpectedError; + status.error_msg = strdup(e.what()); + return status; + } +} diff --git a/internal/core/src/segcore/load_index_c.h b/internal/core/src/segcore/load_index_c.h index 7a3d89b797670..8755aa7396162 100644 --- a/internal/core/src/segcore/load_index_c.h +++ b/internal/core/src/segcore/load_index_c.h @@ -76,6 +76,11 @@ void AppendStorageInfo(CLoadIndexInfo c_load_index_info, const char* uri, int64_t version); + +CStatus +FinishLoadIndexInfo(CLoadIndexInfo c_load_index_info, + const uint8_t* serialized_load_index_info, + const uint64_t len); #ifdef __cplusplus } #endif diff --git a/internal/core/src/storage/Types.h b/internal/core/src/storage/Types.h index 924873dccda64..fbd72d0a59a78 100644 --- a/internal/core/src/storage/Types.h +++ b/internal/core/src/storage/Types.h @@ -64,6 +64,7 @@ struct FieldDataMeta { int64_t partition_id; int64_t segment_id; int64_t field_id; + proto::schema::FieldSchema schema; }; enum CodecType { diff --git a/internal/core/thirdparty/tantivy/CMakeLists.txt b/internal/core/thirdparty/tantivy/CMakeLists.txt index f4d928922874f..c1435a032a85e 100644 --- a/internal/core/thirdparty/tantivy/CMakeLists.txt +++ b/internal/core/thirdparty/tantivy/CMakeLists.txt @@ -71,3 +71,9 @@ target_link_libraries(bench_tantivy boost_filesystem dl ) + +add_executable(ffi_demo ffi_demo.cpp) +target_link_libraries(ffi_demo + tantivy_binding + dl + ) diff --git a/internal/core/thirdparty/tantivy/ffi_demo.cpp b/internal/core/thirdparty/tantivy/ffi_demo.cpp new file mode 100644 index 0000000000000..1626d655f175d --- /dev/null +++ b/internal/core/thirdparty/tantivy/ffi_demo.cpp @@ -0,0 +1,17 @@ +#include +#include + +#include "tantivy-binding.h" + +int +main(int argc, char* argv[]) { + std::vector data{"data1", "data2", "data3"}; + std::vector datas{}; + for (auto& s : data) { + datas.push_back(s.c_str()); + } + + print_vector_of_strings(datas.data(), datas.size()); + + return 0; +} diff --git a/internal/core/thirdparty/tantivy/tantivy-binding/include/tantivy-binding.h b/internal/core/thirdparty/tantivy/tantivy-binding/include/tantivy-binding.h index 3b22018bf047e..045d4a50e6a2c 100644 --- a/internal/core/thirdparty/tantivy/tantivy-binding/include/tantivy-binding.h +++ b/internal/core/thirdparty/tantivy/tantivy-binding/include/tantivy-binding.h @@ -97,6 +97,24 @@ void tantivy_index_add_bools(void *ptr, const bool *array, uintptr_t len); void tantivy_index_add_keyword(void *ptr, const char *s); +void tantivy_index_add_multi_int8s(void *ptr, const int8_t *array, uintptr_t len); + +void tantivy_index_add_multi_int16s(void *ptr, const int16_t *array, uintptr_t len); + +void tantivy_index_add_multi_int32s(void *ptr, const int32_t *array, uintptr_t len); + +void tantivy_index_add_multi_int64s(void *ptr, const int64_t *array, uintptr_t len); + +void tantivy_index_add_multi_f32s(void *ptr, const float *array, uintptr_t len); + +void tantivy_index_add_multi_f64s(void *ptr, const double *array, uintptr_t len); + +void tantivy_index_add_multi_bools(void *ptr, const bool *array, uintptr_t len); + +void tantivy_index_add_multi_keywords(void *ptr, const char *const *array, uintptr_t len); + bool tantivy_index_exist(const char *path); +void print_vector_of_strings(const char *const *ptr, uintptr_t len); + } // extern "C" diff --git a/internal/core/thirdparty/tantivy/tantivy-binding/src/demo_c.rs b/internal/core/thirdparty/tantivy/tantivy-binding/src/demo_c.rs new file mode 100644 index 0000000000000..257a41f17a891 --- /dev/null +++ b/internal/core/thirdparty/tantivy/tantivy-binding/src/demo_c.rs @@ -0,0 +1,14 @@ +use std::{ffi::{c_char, CStr}, slice}; + +#[no_mangle] +pub extern "C" fn print_vector_of_strings(ptr: *const *const c_char, len: usize) { + let arr : &[*const c_char] = unsafe { + slice::from_raw_parts(ptr, len) + }; + for element in arr { + let c_str = unsafe { + CStr::from_ptr(*element) + }; + println!("{}", c_str.to_str().unwrap()); + } +} \ No newline at end of file diff --git a/internal/core/thirdparty/tantivy/tantivy-binding/src/index_writer.rs b/internal/core/thirdparty/tantivy/tantivy-binding/src/index_writer.rs index ce96a5b4d5a30..2c8d56bf38694 100644 --- a/internal/core/thirdparty/tantivy/tantivy-binding/src/index_writer.rs +++ b/internal/core/thirdparty/tantivy/tantivy-binding/src/index_writer.rs @@ -1,10 +1,11 @@ -use futures::executor::block_on; +use std::ffi::CStr; +use libc::c_char; use tantivy::schema::{Field, IndexRecordOption, Schema, TextFieldIndexing, TextOptions, INDEXED}; -use tantivy::{doc, tokenizer, Index, IndexWriter, SingleSegmentIndexWriter}; +use tantivy::{doc, tokenizer, Index, SingleSegmentIndexWriter, Document}; use crate::data_type::TantivyDataType; -use crate::index_writer; + use crate::log::init_log; pub struct IndexWriterWrapper { @@ -98,7 +99,74 @@ impl IndexWriterWrapper { .unwrap(); } - pub fn finish(mut self) { + pub fn add_multi_i8s(&mut self, datas: &[i8]) { + let mut document = Document::default(); + for data in datas { + document.add_field_value(self.field, *data as i64); + } + self.index_writer.add_document(document).unwrap(); + } + + pub fn add_multi_i16s(&mut self, datas: &[i16]) { + let mut document = Document::default(); + for data in datas { + document.add_field_value(self.field, *data as i64); + } + self.index_writer.add_document(document).unwrap(); + } + + pub fn add_multi_i32s(&mut self, datas: &[i32]) { + let mut document = Document::default(); + for data in datas { + document.add_field_value(self.field, *data as i64); + } + self.index_writer.add_document(document).unwrap(); + } + + pub fn add_multi_i64s(&mut self, datas: &[i64]) { + let mut document = Document::default(); + for data in datas { + document.add_field_value(self.field, *data); + } + self.index_writer.add_document(document).unwrap(); + } + + pub fn add_multi_f32s(&mut self, datas: &[f32]) { + let mut document = Document::default(); + for data in datas { + document.add_field_value(self.field, *data as f64); + } + self.index_writer.add_document(document).unwrap(); + } + + pub fn add_multi_f64s(&mut self, datas: &[f64]) { + let mut document = Document::default(); + for data in datas { + document.add_field_value(self.field, *data); + } + self.index_writer.add_document(document).unwrap(); + } + + pub fn add_multi_bools(&mut self, datas: &[bool]) { + let mut document = Document::default(); + for data in datas { + document.add_field_value(self.field, *data); + } + self.index_writer.add_document(document).unwrap(); + } + + pub fn add_multi_keywords(&mut self, datas: &[*const c_char]) { + let mut document = Document::default(); + for element in datas { + let data = unsafe { + CStr::from_ptr(*element) + }; + document.add_field_value(self.field, data.to_str().unwrap()); + } + self.index_writer.add_document(document).unwrap(); + } + + pub fn finish(self) { self.index_writer .finalize() .expect("failed to build inverted index"); diff --git a/internal/core/thirdparty/tantivy/tantivy-binding/src/index_writer_c.rs b/internal/core/thirdparty/tantivy/tantivy-binding/src/index_writer_c.rs index c8822781158e8..b13f550d7cb00 100644 --- a/internal/core/thirdparty/tantivy/tantivy-binding/src/index_writer_c.rs +++ b/internal/core/thirdparty/tantivy/tantivy-binding/src/index_writer_c.rs @@ -122,3 +122,77 @@ pub extern "C" fn tantivy_index_add_keyword(ptr: *mut c_void, s: *const c_char) let c_str = unsafe { CStr::from_ptr(s) }; unsafe { (*real).add_keyword(c_str.to_str().unwrap()) } } + +// --------------------------------------------- array ------------------------------------------ + +#[no_mangle] +pub extern "C" fn tantivy_index_add_multi_int8s(ptr: *mut c_void, array: *const i8, len: usize) { + let real = ptr as *mut IndexWriterWrapper; + unsafe { + let arr = slice::from_raw_parts(array, len); + (*real).add_multi_i8s(arr) + } +} + +#[no_mangle] +pub extern "C" fn tantivy_index_add_multi_int16s(ptr: *mut c_void, array: *const i16, len: usize) { + let real = ptr as *mut IndexWriterWrapper; + unsafe { + let arr = slice::from_raw_parts(array, len) ; + (*real).add_multi_i16s(arr); + } +} + +#[no_mangle] +pub extern "C" fn tantivy_index_add_multi_int32s(ptr: *mut c_void, array: *const i32, len: usize) { + let real = ptr as *mut IndexWriterWrapper; + unsafe { + let arr = slice::from_raw_parts(array, len) ; + (*real).add_multi_i32s(arr); + } +} + +#[no_mangle] +pub extern "C" fn tantivy_index_add_multi_int64s(ptr: *mut c_void, array: *const i64, len: usize) { + let real = ptr as *mut IndexWriterWrapper; + unsafe { + let arr = slice::from_raw_parts(array, len) ; + (*real).add_multi_i64s(arr); + } +} + +#[no_mangle] +pub extern "C" fn tantivy_index_add_multi_f32s(ptr: *mut c_void, array: *const f32, len: usize) { + let real = ptr as *mut IndexWriterWrapper; + unsafe { + let arr = slice::from_raw_parts(array, len) ; + (*real).add_multi_f32s(arr); + } +} + +#[no_mangle] +pub extern "C" fn tantivy_index_add_multi_f64s(ptr: *mut c_void, array: *const f64, len: usize) { + let real = ptr as *mut IndexWriterWrapper; + unsafe { + let arr = slice::from_raw_parts(array, len) ; + (*real).add_multi_f64s(arr); + } +} + +#[no_mangle] +pub extern "C" fn tantivy_index_add_multi_bools(ptr: *mut c_void, array: *const bool, len: usize) { + let real = ptr as *mut IndexWriterWrapper; + unsafe { + let arr = slice::from_raw_parts(array, len) ; + (*real).add_multi_bools(arr); + } +} + +#[no_mangle] +pub extern "C" fn tantivy_index_add_multi_keywords(ptr: *mut c_void, array: *const *const c_char, len: usize) { + let real = ptr as *mut IndexWriterWrapper; + unsafe { + let arr = slice::from_raw_parts(array, len); + (*real).add_multi_keywords(arr) + } +} diff --git a/internal/core/thirdparty/tantivy/tantivy-binding/src/lib.rs b/internal/core/thirdparty/tantivy/tantivy-binding/src/lib.rs index aa069cb3b32b6..c6193de3f6908 100644 --- a/internal/core/thirdparty/tantivy/tantivy-binding/src/lib.rs +++ b/internal/core/thirdparty/tantivy/tantivy-binding/src/lib.rs @@ -10,6 +10,7 @@ mod log; mod util; mod util_c; mod vec_collector; +mod demo_c; pub fn add(left: usize, right: usize) -> usize { left + right diff --git a/internal/core/thirdparty/tantivy/tantivy-wrapper.h b/internal/core/thirdparty/tantivy/tantivy-wrapper.h index 358f14ea49ed0..3076f502aee21 100644 --- a/internal/core/thirdparty/tantivy/tantivy-wrapper.h +++ b/internal/core/thirdparty/tantivy/tantivy-wrapper.h @@ -1,5 +1,7 @@ #include #include +#include +#include #include "tantivy-binding.h" namespace milvus::tantivy { @@ -49,6 +51,15 @@ struct RustArrayWrapper { std::cout << ss.str() << std::endl; } + std::set + to_set() { + std::set s; + for (int i = 0; i < array_.len; i++) { + s.insert(array_.array[i]); + } + return s; + } + RustArray array_; private: @@ -186,6 +197,60 @@ struct TantivyIndexWrapper { typeid(T).name()); } + template + void + add_multi_data(const T* array, uintptr_t len) { + assert(!finished_); + + if constexpr (std::is_same_v) { + tantivy_index_add_multi_bools(writer_, array, len); + return; + } + + if constexpr (std::is_same_v) { + tantivy_index_add_multi_int8s(writer_, array, len); + return; + } + + if constexpr (std::is_same_v) { + tantivy_index_add_multi_int16s(writer_, array, len); + return; + } + + if constexpr (std::is_same_v) { + tantivy_index_add_multi_int32s(writer_, array, len); + return; + } + + if constexpr (std::is_same_v) { + tantivy_index_add_multi_int64s(writer_, array, len); + return; + } + + if constexpr (std::is_same_v) { + tantivy_index_add_multi_f32s(writer_, array, len); + return; + } + + if constexpr (std::is_same_v) { + tantivy_index_add_multi_f64s(writer_, array, len); + return; + } + + if constexpr (std::is_same_v) { + std::vector views; + for (uintptr_t i = 0; i < len; i++) { + views.push_back(array[i].c_str()); + } + tantivy_index_add_multi_keywords(writer_, views.data(), len); + return; + } + + throw fmt::format( + "InvertedIndex.add_multi_data: unsupported data type: {}", + typeid(T).name()); + } + inline void finish() { if (!finished_) { diff --git a/internal/core/thirdparty/tantivy/test.cpp b/internal/core/thirdparty/tantivy/test.cpp index 1c67a69673a5c..602ea3449f0a2 100644 --- a/internal/core/thirdparty/tantivy/test.cpp +++ b/internal/core/thirdparty/tantivy/test.cpp @@ -200,6 +200,77 @@ test_32717() { } } +template +std::map> +build_inverted_index(const std::vector>& vec_of_array) { + std::map> inverted_index; + for (uint32_t i = 0; i < vec_of_array.size(); i++) { + for (const auto& term : vec_of_array[i]) { + inverted_index[term].insert(i); + } + } + return inverted_index; +} + +void +test_array_int() { + using T = int64_t; + + auto path = "/tmp/inverted-index/test-binding/"; + boost::filesystem::remove_all(path); + boost::filesystem::create_directories(path); + auto w = TantivyIndexWrapper("test_field_name", guess_data_type(), path); + + std::vector> vec_of_array{ + {10, 40, 50}, + {20, 50}, + {10, 50, 60}, + }; + + for (const auto& arr : vec_of_array) { + w.add_multi_data(arr.data(), arr.size()); + } + w.finish(); + + assert(w.count() == vec_of_array.size()); + + auto inverted_index = build_inverted_index(vec_of_array); + for (const auto& [term, posting_list] : inverted_index) { + auto hits = w.term_query(term).to_set(); + assert(posting_list == hits); + } +} + +void +test_array_string() { + using T = std::string; + + auto path = "/tmp/inverted-index/test-binding/"; + boost::filesystem::remove_all(path); + boost::filesystem::create_directories(path); + auto w = + TantivyIndexWrapper("test_field_name", TantivyDataType::Keyword, path); + + std::vector> vec_of_array{ + {"10", "40", "50"}, + {"20", "50"}, + {"10", "50", "60"}, + }; + + for (const auto& arr : vec_of_array) { + w.add_multi_data(arr.data(), arr.size()); + } + w.finish(); + + assert(w.count() == vec_of_array.size()); + + auto inverted_index = build_inverted_index(vec_of_array); + for (const auto& [term, posting_list] : inverted_index) { + auto hits = w.term_query(term).to_set(); + assert(posting_list == hits); + } +} + int main(int argc, char* argv[]) { test_32717(); @@ -216,5 +287,8 @@ main(int argc, char* argv[]) { run(); + test_array_int(); + test_array_string(); + return 0; } diff --git a/internal/core/unittest/test_inverted_index.cpp b/internal/core/unittest/test_inverted_index.cpp index eeddfe6e9d81a..d01813ab94e6a 100644 --- a/internal/core/unittest/test_inverted_index.cpp +++ b/internal/core/unittest/test_inverted_index.cpp @@ -32,13 +32,20 @@ auto gen_field_meta(int64_t collection_id = 1, int64_t partition_id = 2, int64_t segment_id = 3, - int64_t field_id = 101) -> storage::FieldDataMeta { - return storage::FieldDataMeta{ + int64_t field_id = 101, + DataType data_type = DataType::NONE, + DataType element_type = DataType::NONE) + -> storage::FieldDataMeta { + auto meta = storage::FieldDataMeta{ .collection_id = collection_id, .partition_id = partition_id, .segment_id = segment_id, .field_id = field_id, }; + meta.schema.set_data_type(static_cast(data_type)); + meta.schema.set_element_type( + static_cast(element_type)); + return meta; } auto @@ -86,7 +93,7 @@ struct ChunkManagerWrapper { }; } // namespace milvus::test -template +template void test_run() { int64_t collection_id = 1; @@ -96,8 +103,8 @@ test_run() { int64_t index_build_id = 1000; int64_t index_version = 10000; - auto field_meta = - test::gen_field_meta(collection_id, partition_id, segment_id, field_id); + auto field_meta = test::gen_field_meta( + collection_id, partition_id, segment_id, field_id, dtype, element_type); auto index_meta = test::gen_index_meta( segment_id, field_id, index_build_id, index_version); @@ -305,8 +312,12 @@ test_string() { int64_t index_build_id = 1000; int64_t index_version = 10000; - auto field_meta = - test::gen_field_meta(collection_id, partition_id, segment_id, field_id); + auto field_meta = test::gen_field_meta(collection_id, + partition_id, + segment_id, + field_id, + dtype, + DataType::NONE); auto index_meta = test::gen_index_meta( segment_id, field_id, index_build_id, index_version); diff --git a/internal/core/unittest/test_scalar_index.cpp b/internal/core/unittest/test_scalar_index.cpp index 2967523daf365..9a99bec26a272 100644 --- a/internal/core/unittest/test_scalar_index.cpp +++ b/internal/core/unittest/test_scalar_index.cpp @@ -53,6 +53,14 @@ TYPED_TEST_P(TypedScalarIndexTest, Dummy) { std::cout << milvus::GetDType() << std::endl; } +auto +GetTempFileManagerCtx(CDataType data_type) { + auto ctx = milvus::storage::FileManagerContext(); + ctx.fieldDataMeta.schema.set_data_type( + static_cast(data_type)); + return ctx; +} + TYPED_TEST_P(TypedScalarIndexTest, Constructor) { using T = TypeParam; auto dtype = milvus::GetDType(); @@ -63,7 +71,7 @@ TYPED_TEST_P(TypedScalarIndexTest, Constructor) { create_index_info.index_type = index_type; auto index = milvus::index::IndexFactory::GetInstance().CreateScalarIndex( - create_index_info); + create_index_info, GetTempFileManagerCtx(dtype)); } } @@ -77,7 +85,7 @@ TYPED_TEST_P(TypedScalarIndexTest, Count) { create_index_info.index_type = index_type; auto index = milvus::index::IndexFactory::GetInstance().CreateScalarIndex( - create_index_info); + create_index_info, GetTempFileManagerCtx(dtype)); auto scalar_index = dynamic_cast*>(index.get()); auto arr = GenSortedArr(nb); @@ -96,7 +104,7 @@ TYPED_TEST_P(TypedScalarIndexTest, HasRawData) { create_index_info.index_type = index_type; auto index = milvus::index::IndexFactory::GetInstance().CreateScalarIndex( - create_index_info); + create_index_info, GetTempFileManagerCtx(dtype)); auto scalar_index = dynamic_cast*>(index.get()); auto arr = GenSortedArr(nb); @@ -116,7 +124,7 @@ TYPED_TEST_P(TypedScalarIndexTest, In) { create_index_info.index_type = index_type; auto index = milvus::index::IndexFactory::GetInstance().CreateScalarIndex( - create_index_info); + create_index_info, GetTempFileManagerCtx(dtype)); auto scalar_index = dynamic_cast*>(index.get()); auto arr = GenSortedArr(nb); @@ -135,7 +143,7 @@ TYPED_TEST_P(TypedScalarIndexTest, NotIn) { create_index_info.index_type = index_type; auto index = milvus::index::IndexFactory::GetInstance().CreateScalarIndex( - create_index_info); + create_index_info, GetTempFileManagerCtx(dtype)); auto scalar_index = dynamic_cast*>(index.get()); auto arr = GenSortedArr(nb); @@ -154,7 +162,7 @@ TYPED_TEST_P(TypedScalarIndexTest, Reverse) { create_index_info.index_type = index_type; auto index = milvus::index::IndexFactory::GetInstance().CreateScalarIndex( - create_index_info); + create_index_info, GetTempFileManagerCtx(dtype)); auto scalar_index = dynamic_cast*>(index.get()); auto arr = GenSortedArr(nb); @@ -173,7 +181,7 @@ TYPED_TEST_P(TypedScalarIndexTest, Range) { create_index_info.index_type = index_type; auto index = milvus::index::IndexFactory::GetInstance().CreateScalarIndex( - create_index_info); + create_index_info, GetTempFileManagerCtx(dtype)); auto scalar_index = dynamic_cast*>(index.get()); auto arr = GenSortedArr(nb); @@ -192,7 +200,7 @@ TYPED_TEST_P(TypedScalarIndexTest, Codec) { create_index_info.index_type = index_type; auto index = milvus::index::IndexFactory::GetInstance().CreateScalarIndex( - create_index_info); + create_index_info, GetTempFileManagerCtx(dtype)); auto scalar_index = dynamic_cast*>(index.get()); auto arr = GenSortedArr(nb); @@ -201,7 +209,7 @@ TYPED_TEST_P(TypedScalarIndexTest, Codec) { auto binary_set = index->Serialize(nullptr); auto copy_index = milvus::index::IndexFactory::GetInstance().CreateScalarIndex( - create_index_info); + create_index_info, GetTempFileManagerCtx(dtype)); copy_index->Load(binary_set); auto copy_scalar_index = @@ -372,6 +380,8 @@ TYPED_TEST_P(TypedScalarIndexTestV2, Base) { auto space = TestSpace(temp_path, vec_size, dataset, scalars); milvus::storage::FileManagerContext file_manager_context( {}, {.field_name = "scalar"}, chunk_manager, space); + file_manager_context.fieldDataMeta.schema.set_data_type( + static_cast(dtype)); auto index = milvus::index::IndexFactory::GetInstance().CreateScalarIndex( create_index_info, file_manager_context, space); diff --git a/internal/proto/cgo_msg.proto b/internal/proto/cgo_msg.proto new file mode 100644 index 0000000000000..6d851e95e0550 --- /dev/null +++ b/internal/proto/cgo_msg.proto @@ -0,0 +1,23 @@ +syntax = "proto3"; + +package milvus.proto.cgo; +option go_package="github.com/milvus-io/milvus/internal/proto/cgopb"; + +import "schema.proto"; + +message LoadIndexInfo { + int64 collectionID = 1; + int64 partitionID = 2; + int64 segmentID = 3; + schema.FieldSchema field = 5; + bool enable_mmap = 6; + string mmap_dir_path = 7; + int64 indexID = 8; + int64 index_buildID = 9; + int64 index_version = 10; + map index_params = 11; + repeated string index_files = 12; + string uri = 13; + int64 index_store_version = 14; + int32 index_engine_version = 15; +} diff --git a/internal/querynodev2/segments/load_index_info.go b/internal/querynodev2/segments/load_index_info.go index c5c1572475c40..04632bed95f2d 100644 --- a/internal/querynodev2/segments/load_index_info.go +++ b/internal/querynodev2/segments/load_index_info.go @@ -29,11 +29,13 @@ import ( "runtime" "unsafe" + "github.com/golang/protobuf/proto" "github.com/pingcap/log" "go.uber.org/zap" "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" "github.com/milvus-io/milvus/internal/datacoord" + "github.com/milvus-io/milvus/internal/proto/cgopb" "github.com/milvus-io/milvus/internal/proto/querypb" "github.com/milvus-io/milvus/internal/querycoordv2/params" "github.com/milvus-io/milvus/pkg/common" @@ -245,3 +247,33 @@ func (li *LoadIndexInfo) appendIndexEngineVersion(ctx context.Context, indexEngi return HandleCStatus(ctx, &status, "AppendIndexEngineVersion failed") } + +func (li *LoadIndexInfo) finish(ctx context.Context, info *cgopb.LoadIndexInfo) error { + marshaled, err := proto.Marshal(info) + if err != nil { + return err + } + + var status C.CStatus + _, _ = GetDynamicPool().Submit(func() (any, error) { + status = C.FinishLoadIndexInfo(li.cLoadIndexInfo, (*C.uint8_t)(unsafe.Pointer(&marshaled[0])), (C.uint64_t)(len(marshaled))) + return nil, nil + }).Await() + + if err := HandleCStatus(ctx, &status, "FinishLoadIndexInfo failed"); err != nil { + return err + } + + _, _ = GetLoadPool().Submit(func() (any, error) { + if paramtable.Get().CommonCfg.EnableStorageV2.GetAsBool() { + status = C.AppendIndexV3(li.cLoadIndexInfo) + } else { + traceCtx := ParseCTraceContext(ctx) + status = C.AppendIndexV2(traceCtx.ctx, li.cLoadIndexInfo) + runtime.KeepAlive(traceCtx) + } + return nil, nil + }).Await() + + return HandleCStatus(ctx, &status, "AppendIndex failed") +} diff --git a/internal/querynodev2/segments/segment.go b/internal/querynodev2/segments/segment.go index 13ae49c91aa11..3382b4373a364 100644 --- a/internal/querynodev2/segments/segment.go +++ b/internal/querynodev2/segments/segment.go @@ -45,6 +45,7 @@ import ( "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" milvus_storage "github.com/milvus-io/milvus-storage/go/storage" "github.com/milvus-io/milvus-storage/go/storage/options" + "github.com/milvus-io/milvus/internal/proto/cgopb" "github.com/milvus-io/milvus/internal/proto/datapb" "github.com/milvus-io/milvus/internal/proto/querypb" "github.com/milvus-io/milvus/internal/proto/segcorepb" @@ -56,6 +57,9 @@ import ( "github.com/milvus-io/milvus/pkg/common" "github.com/milvus-io/milvus/pkg/log" "github.com/milvus-io/milvus/pkg/metrics" + "github.com/milvus-io/milvus/pkg/util/funcutil" + "github.com/milvus-io/milvus/pkg/util/indexparamcheck" + "github.com/milvus-io/milvus/pkg/util/indexparams" "github.com/milvus-io/milvus/pkg/util/merr" "github.com/milvus-io/milvus/pkg/util/metautil" "github.com/milvus-io/milvus/pkg/util/paramtable" @@ -1266,18 +1270,58 @@ func (s *LocalSegment) LoadIndex(ctx context.Context, indexInfo *querypb.FieldIn return err } defer deleteLoadIndexInfo(loadIndexInfo) + + schema, err := typeutil.CreateSchemaHelper(s.GetCollection().Schema()) + if err != nil { + return err + } + fieldSchema, err := schema.GetFieldFromID(indexInfo.GetFieldID()) + if err != nil { + return err + } + + indexParams := funcutil.KeyValuePair2Map(indexInfo.IndexParams) + // as Knowhere reports error if encounter an unknown param, we need to delete it + delete(indexParams, common.MmapEnabledKey) + + // some build params also exist in indexParams, which are useless during loading process + if indexParams["index_type"] == indexparamcheck.IndexDISKANN { + if err := indexparams.SetDiskIndexLoadParams(paramtable.Get(), indexParams, indexInfo.GetNumRows()); err != nil { + return err + } + } + + if err := indexparams.AppendPrepareLoadParams(paramtable.Get(), indexParams); err != nil { + return err + } + + indexInfoProto := &cgopb.LoadIndexInfo{ + CollectionID: s.Collection(), + PartitionID: s.Partition(), + SegmentID: s.ID(), + Field: fieldSchema, + EnableMmap: isIndexMmapEnable(indexInfo), + MmapDirPath: paramtable.Get().QueryNodeCfg.MmapDirPath.GetValue(), + IndexID: indexInfo.GetIndexID(), + IndexBuildID: indexInfo.GetBuildID(), + IndexVersion: indexInfo.GetIndexVersion(), + IndexParams: indexParams, + IndexFiles: indexInfo.GetIndexFilePaths(), + IndexEngineVersion: indexInfo.GetCurrentIndexVersion(), + IndexStoreVersion: indexInfo.GetIndexStoreVersion(), + } + if paramtable.Get().CommonCfg.EnableStorageV2.GetAsBool() { uri, err := typeutil_internal.GetStorageURI(paramtable.Get().CommonCfg.StorageScheme.GetValue(), paramtable.Get().CommonCfg.StoragePathPrefix.GetValue(), s.ID()) if err != nil { return err } - loadIndexInfo.appendStorageInfo(uri, indexInfo.IndexStoreVersion) + indexInfoProto.Uri = uri } newLoadIndexInfoSpan := tr.RecordSpan() // 2. - err = loadIndexInfo.appendLoadIndexInfo(ctx, indexInfo, s.Collection(), s.Partition(), s.ID(), fieldType) - if err != nil { + if err := loadIndexInfo.finish(ctx, indexInfoProto); err != nil { if loadIndexInfo.cleanLocalData(ctx) != nil { log.Warn("failed to clean cached data on disk after append index failed", zap.Int64("buildID", indexInfo.BuildID), diff --git a/pkg/util/indexparamcheck/inverted_checker.go b/pkg/util/indexparamcheck/inverted_checker.go index b15549cd4b7a6..dfc24127d3569 100644 --- a/pkg/util/indexparamcheck/inverted_checker.go +++ b/pkg/util/indexparamcheck/inverted_checker.go @@ -17,7 +17,8 @@ func (c *INVERTEDChecker) CheckTrain(params map[string]string) error { } func (c *INVERTEDChecker) CheckValidDataType(dType schemapb.DataType) error { - if !typeutil.IsBoolType(dType) && !typeutil.IsArithmetic(dType) && !typeutil.IsStringType(dType) { + if !typeutil.IsBoolType(dType) && !typeutil.IsArithmetic(dType) && !typeutil.IsStringType(dType) && + !typeutil.IsArrayType(dType) { return fmt.Errorf("INVERTED are not supported on %s field", dType.String()) } return nil diff --git a/pkg/util/indexparamcheck/inverted_checker_test.go b/pkg/util/indexparamcheck/inverted_checker_test.go index afe41f89f1193..7a31290061490 100644 --- a/pkg/util/indexparamcheck/inverted_checker_test.go +++ b/pkg/util/indexparamcheck/inverted_checker_test.go @@ -18,8 +18,8 @@ func Test_INVERTEDIndexChecker(t *testing.T) { assert.NoError(t, c.CheckValidDataType(schemapb.DataType_Bool)) assert.NoError(t, c.CheckValidDataType(schemapb.DataType_Int64)) assert.NoError(t, c.CheckValidDataType(schemapb.DataType_Float)) + assert.NoError(t, c.CheckValidDataType(schemapb.DataType_Array)) assert.Error(t, c.CheckValidDataType(schemapb.DataType_JSON)) - assert.Error(t, c.CheckValidDataType(schemapb.DataType_Array)) assert.Error(t, c.CheckValidDataType(schemapb.DataType_FloatVector)) } diff --git a/scripts/generate_proto.sh b/scripts/generate_proto.sh index 2551f586c9f9c..286570b842aa8 100755 --- a/scripts/generate_proto.sh +++ b/scripts/generate_proto.sh @@ -44,6 +44,7 @@ pushd ${PROTO_DIR} mkdir -p etcdpb mkdir -p indexcgopb +mkdir -p cgopb mkdir -p internalpb mkdir -p rootcoordpb @@ -62,6 +63,7 @@ protoc_opt="${PROTOC_BIN} --proto_path=${API_PROTO_DIR} --proto_path=." ${protoc_opt} --go_out=plugins=grpc,paths=source_relative:./etcdpb etcd_meta.proto || { echo 'generate etcd_meta.proto failed'; exit 1; } ${protoc_opt} --go_out=plugins=grpc,paths=source_relative:./indexcgopb index_cgo_msg.proto || { echo 'generate index_cgo_msg failed '; exit 1; } +${protoc_opt} --go_out=plugins=grpc,paths=source_relative:./cgopb cgo_msg.proto || { echo 'generate cgo_msg failed '; exit 1; } ${protoc_opt} --go_out=plugins=grpc,paths=source_relative:./rootcoordpb root_coord.proto || { echo 'generate root_coord.proto failed'; exit 1; } ${protoc_opt} --go_out=plugins=grpc,paths=source_relative:./internalpb internal.proto || { echo 'generate internal.proto failed'; exit 1; } ${protoc_opt} --go_out=plugins=grpc,paths=source_relative:./proxypb proxy.proto|| { echo 'generate proxy.proto failed'; exit 1; } @@ -78,6 +80,7 @@ ${protoc_opt} --cpp_out=$CPP_SRC_DIR/src/pb schema.proto|| { echo 'generate sche ${protoc_opt} --cpp_out=$CPP_SRC_DIR/src/pb common.proto|| { echo 'generate common.proto failed'; exit 1; } ${protoc_opt} --cpp_out=$CPP_SRC_DIR/src/pb segcore.proto|| { echo 'generate segcore.proto failed'; exit 1; } ${protoc_opt} --cpp_out=$CPP_SRC_DIR/src/pb index_cgo_msg.proto|| { echo 'generate index_cgo_msg.proto failed'; exit 1; } +${protoc_opt} --cpp_out=$CPP_SRC_DIR/src/pb cgo_msg.proto|| { echo 'generate cgo_msg.proto failed'; exit 1; } ${protoc_opt} --cpp_out=$CPP_SRC_DIR/src/pb plan.proto|| { echo 'generate plan.proto failed'; exit 1; } popd diff --git a/tests/python_client/testcases/test_index.py b/tests/python_client/testcases/test_index.py index 017ab6ff034db..753fb28cd5b5e 100644 --- a/tests/python_client/testcases/test_index.py +++ b/tests/python_client/testcases/test_index.py @@ -1309,10 +1309,7 @@ def test_create_inverted_index_on_array_field(self): collection_w = self.init_collection_wrap(schema=schema) # 2. create index scalar_index_params = {"index_type": "INVERTED"} - collection_w.create_index(ct.default_int32_array_field_name, index_params=scalar_index_params, - check_task=CheckTasks.err_res, - check_items={ct.err_code: 1100, - ct.err_msg: "create index on Array field is not supported"}) + collection_w.create_index(ct.default_int32_array_field_name, index_params=scalar_index_params) @pytest.mark.tags(CaseLabel.L1) def test_create_inverted_index_no_vector_index(self): From 842fb02f749d0167126c85751391928347736617 Mon Sep 17 00:00:00 2001 From: congqixia Date: Fri, 31 May 2024 10:15:45 +0800 Subject: [PATCH 103/126] fix: Use localStorage path to check disk cap for indexnode (#33450) See also #30943 #30944 Signed-off-by: Congqi Xia --- pkg/util/paramtable/component_param.go | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/pkg/util/paramtable/component_param.go b/pkg/util/paramtable/component_param.go index 47431b907985e..85067e3bad4e1 100644 --- a/pkg/util/paramtable/component_param.go +++ b/pkg/util/paramtable/component_param.go @@ -2482,7 +2482,6 @@ Max read concurrency must greater than or equal to 1, and less than or equal to } diskUsage, err := disk.Usage(localStoragePath) if err != nil { - // panic(err) log.Fatal("failed to get disk usage", zap.String("localStoragePath", localStoragePath), zap.Error(err)) } return strconv.FormatUint(diskUsage.Total, 10) @@ -3799,9 +3798,16 @@ func (p *indexNodeConfig) init(base *BaseTable) { Version: "2.2.0", Formatter: func(v string) string { if len(v) == 0 { - diskUsage, err := disk.Usage("/") + // use local storage path to check correct device + localStoragePath := base.Get("localStorage.path") + if _, err := os.Stat(localStoragePath); os.IsNotExist(err) { + if err := os.MkdirAll(localStoragePath, os.ModePerm); err != nil { + log.Fatal("failed to mkdir", zap.String("localStoragePath", localStoragePath), zap.Error(err)) + } + } + diskUsage, err := disk.Usage(localStoragePath) if err != nil { - panic(err) + log.Fatal("failed to get disk usage", zap.String("localStoragePath", localStoragePath), zap.Error(err)) } return strconv.FormatUint(diskUsage.Total, 10) } From 41714142229e9a3d4e9e39f89868a53bf36a4e66 Mon Sep 17 00:00:00 2001 From: Buqian Zheng Date: Fri, 31 May 2024 10:17:50 +0800 Subject: [PATCH 104/126] enhance: update knowhere version (#33490) issue: https://github.com/milvus-io/milvus/issues/33489 update knowhere version to latest. remove usage of `seed_ef` as it be replaced by existing `ef`. Signed-off-by: Buqian Zheng --- internal/core/src/query/GroupByOperator.h | 4 ---- internal/core/thirdparty/knowhere/CMakeLists.txt | 2 +- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/internal/core/src/query/GroupByOperator.h b/internal/core/src/query/GroupByOperator.h index 64e112253748c..21162c09bfe9b 100644 --- a/internal/core/src/query/GroupByOperator.h +++ b/internal/core/src/query/GroupByOperator.h @@ -133,10 +133,6 @@ PrepareVectorIteratorsFromIndex(const SearchInfo& search_info, if (search_info.group_by_field_id_.has_value()) { try { auto search_conf = search_info.search_params_; - if (search_conf.contains(knowhere::indexparam::EF)) { - search_conf[knowhere::indexparam::SEED_EF] = - search_conf[knowhere::indexparam::EF]; - } knowhere::expected< std::vector>> iterators_val = diff --git a/internal/core/thirdparty/knowhere/CMakeLists.txt b/internal/core/thirdparty/knowhere/CMakeLists.txt index 1b0bdc0911500..0159549cad3e4 100644 --- a/internal/core/thirdparty/knowhere/CMakeLists.txt +++ b/internal/core/thirdparty/knowhere/CMakeLists.txt @@ -12,7 +12,7 @@ #------------------------------------------------------------------------------- # Update KNOWHERE_VERSION for the first occurrence -set( KNOWHERE_VERSION 1f51ea4e ) +set( KNOWHERE_VERSION 7499791 ) set( GIT_REPOSITORY "https://github.com/zilliztech/knowhere.git") message(STATUS "Knowhere repo: ${GIT_REPOSITORY}") message(STATUS "Knowhere version: ${KNOWHERE_VERSION}") From 0cf225fa75dc6c9d6502f2c2a6b6cc0b59f510ff Mon Sep 17 00:00:00 2001 From: aoiasd <45024769+aoiasd@users.noreply.github.com> Date: Fri, 31 May 2024 11:13:44 +0800 Subject: [PATCH 105/126] enhance: access log support restful api (#33155) relate: https://github.com/milvus-io/milvus/issues/31823 Signed-off-by: aoiasd --- .../distributed/proxy/httpserver/constant.go | 1 + .../proxy/httpserver/handler_v1.go | 180 +++++----- .../proxy/httpserver/handler_v2.go | 315 +++++++++++------- .../distributed/proxy/httpserver/utils.go | 16 + internal/distributed/proxy/service.go | 4 + internal/proxy/accesslog/formater_test.go | 5 +- internal/proxy/accesslog/info/grpc_info.go | 9 +- internal/proxy/accesslog/info/restful_info.go | 189 +++++++++++ .../proxy/accesslog/info/restful_info_test.go | 192 +++++++++++ internal/proxy/accesslog/util.go | 21 ++ 10 files changed, 713 insertions(+), 219 deletions(-) create mode 100644 internal/proxy/accesslog/info/restful_info.go create mode 100644 internal/proxy/accesslog/info/restful_info_test.go diff --git a/internal/distributed/proxy/httpserver/constant.go b/internal/distributed/proxy/httpserver/constant.go index f98373e905820..f106e52787312 100644 --- a/internal/distributed/proxy/httpserver/constant.go +++ b/internal/distributed/proxy/httpserver/constant.go @@ -47,6 +47,7 @@ const ( ) const ( + ContextRequest = "request" ContextUsername = "username" VectorCollectionsPath = "/vector/collections" VectorCollectionsCreatePath = "/vector/collections/create" diff --git a/internal/distributed/proxy/httpserver/handler_v1.go b/internal/distributed/proxy/httpserver/handler_v1.go index 804ed7ab788f3..0cdf7deddf380 100644 --- a/internal/distributed/proxy/httpserver/handler_v1.go +++ b/internal/distributed/proxy/httpserver/handler_v1.go @@ -32,12 +32,12 @@ var RestRequestInterceptorErr = errors.New("interceptor error placeholder") func checkAuthorization(ctx context.Context, c *gin.Context, req interface{}) error { username, ok := c.Get(ContextUsername) if !ok || username.(string) == "" { - c.JSON(http.StatusUnauthorized, gin.H{HTTPReturnCode: merr.Code(merr.ErrNeedAuthenticate), HTTPReturnMessage: merr.ErrNeedAuthenticate.Error()}) + HTTPReturn(c, http.StatusUnauthorized, gin.H{HTTPReturnCode: merr.Code(merr.ErrNeedAuthenticate), HTTPReturnMessage: merr.ErrNeedAuthenticate.Error()}) return RestRequestInterceptorErr } _, authErr := proxy.PrivilegeInterceptor(ctx, req) if authErr != nil { - c.JSON(http.StatusForbidden, gin.H{HTTPReturnCode: merr.Code(authErr), HTTPReturnMessage: authErr.Error()}) + HTTPReturn(c, http.StatusForbidden, gin.H{HTTPReturnCode: merr.Code(authErr), HTTPReturnMessage: authErr.Error()}) return RestRequestInterceptorErr } @@ -104,7 +104,7 @@ func (h *HandlersV1) checkDatabase(ctx context.Context, c *gin.Context, dbName s err = merr.Error(response.GetStatus()) } if err != nil { - c.AbortWithStatusJSON(http.StatusOK, gin.H{HTTPReturnCode: merr.Code(err), HTTPReturnMessage: err.Error()}) + HTTPAbortReturn(c, http.StatusOK, gin.H{HTTPReturnCode: merr.Code(err), HTTPReturnMessage: err.Error()}) return RestRequestInterceptorErr } for _, db := range response.DbNames { @@ -112,7 +112,7 @@ func (h *HandlersV1) checkDatabase(ctx context.Context, c *gin.Context, dbName s return nil } } - c.AbortWithStatusJSON(http.StatusOK, gin.H{ + HTTPAbortReturn(c, http.StatusOK, gin.H{ HTTPReturnCode: merr.Code(merr.ErrDatabaseNotFound), HTTPReturnMessage: merr.ErrDatabaseNotFound.Error() + ", database: " + dbName, }) @@ -133,7 +133,7 @@ func (h *HandlersV1) describeCollection(ctx context.Context, c *gin.Context, dbN err = merr.Error(response.GetStatus()) } if err != nil { - c.AbortWithStatusJSON(http.StatusOK, gin.H{HTTPReturnCode: merr.Code(err), HTTPReturnMessage: err.Error()}) + HTTPAbortReturn(c, http.StatusOK, gin.H{HTTPReturnCode: merr.Code(err), HTTPReturnMessage: err.Error()}) return nil, err } primaryField, ok := getPrimaryField(response.Schema) @@ -154,7 +154,7 @@ func (h *HandlersV1) hasCollection(ctx context.Context, c *gin.Context, dbName s err = merr.Error(response.GetStatus()) } if err != nil { - c.AbortWithStatusJSON(http.StatusOK, gin.H{HTTPReturnCode: merr.Code(err), HTTPReturnMessage: err.Error()}) + HTTPAbortReturn(c, http.StatusOK, gin.H{HTTPReturnCode: merr.Code(err), HTTPReturnMessage: err.Error()}) return false, err } return response.Value, nil @@ -193,6 +193,7 @@ func (h *HandlersV1) listCollections(c *gin.Context) { req := &milvuspb.ShowCollectionsRequest{ DbName: dbName, } + c.Set(ContextRequest, req) username, _ := c.Get(ContextUsername) ctx := proxy.NewContextWithMetadata(c, username.(string), req.DbName) @@ -206,7 +207,7 @@ func (h *HandlersV1) listCollections(c *gin.Context) { err = merr.Error(resp.(*milvuspb.ShowCollectionsResponse).GetStatus()) } if err != nil { - c.JSON(http.StatusOK, gin.H{HTTPReturnCode: merr.Code(err), HTTPReturnMessage: err.Error()}) + HTTPReturn(c, http.StatusOK, gin.H{HTTPReturnCode: merr.Code(err), HTTPReturnMessage: err.Error()}) return } response := resp.(*milvuspb.ShowCollectionsResponse) @@ -216,7 +217,7 @@ func (h *HandlersV1) listCollections(c *gin.Context) { } else { collections = []string{} } - c.JSON(http.StatusOK, gin.H{HTTPReturnCode: http.StatusOK, HTTPReturnData: collections}) + HTTPReturn(c, http.StatusOK, gin.H{HTTPReturnCode: http.StatusOK, HTTPReturnData: collections}) } func (h *HandlersV1) createCollection(c *gin.Context) { @@ -229,7 +230,7 @@ func (h *HandlersV1) createCollection(c *gin.Context) { } if err := c.ShouldBindWith(&httpReq, binding.JSON); err != nil { log.Warn("high level restful api, the parameter of create collection is incorrect", zap.Any("request", httpReq), zap.Error(err)) - c.AbortWithStatusJSON(http.StatusOK, gin.H{ + HTTPAbortReturn(c, http.StatusOK, gin.H{ HTTPReturnCode: merr.Code(merr.ErrIncorrectParameterFormat), HTTPReturnMessage: merr.ErrIncorrectParameterFormat.Error() + ", error: " + err.Error(), }) @@ -237,12 +238,20 @@ func (h *HandlersV1) createCollection(c *gin.Context) { } if httpReq.CollectionName == "" || httpReq.Dimension == 0 { log.Warn("high level restful api, create collection require parameters: [collectionName, dimension], but miss", zap.Any("request", httpReq)) - c.AbortWithStatusJSON(http.StatusOK, gin.H{ + HTTPAbortReturn(c, http.StatusOK, gin.H{ HTTPReturnCode: merr.Code(merr.ErrMissingRequiredParameters), HTTPReturnMessage: merr.ErrMissingRequiredParameters.Error() + ", required parameters: [collectionName, dimension]", }) return } + req := &milvuspb.CreateCollectionRequest{ + DbName: httpReq.DbName, + CollectionName: httpReq.CollectionName, + ShardsNum: ShardNumDefault, + ConsistencyLevel: commonpb.ConsistencyLevel_Bounded, + } + c.Set(ContextRequest, req) + schema, err := proto.Marshal(&schemapb.CollectionSchema{ Name: httpReq.CollectionName, Description: httpReq.Description, @@ -272,19 +281,13 @@ func (h *HandlersV1) createCollection(c *gin.Context) { }) if err != nil { log.Warn("high level restful api, marshal collection schema fail", zap.Any("request", httpReq), zap.Error(err)) - c.AbortWithStatusJSON(http.StatusOK, gin.H{ + HTTPAbortReturn(c, http.StatusOK, gin.H{ HTTPReturnCode: merr.Code(merr.ErrMarshalCollectionSchema), HTTPReturnMessage: merr.ErrMarshalCollectionSchema.Error() + ", error: " + err.Error(), }) return } - req := &milvuspb.CreateCollectionRequest{ - DbName: httpReq.DbName, - CollectionName: httpReq.CollectionName, - Schema: schema, - ShardsNum: ShardNumDefault, - ConsistencyLevel: commonpb.ConsistencyLevel_Bounded, - } + req.Schema = schema username, _ := c.Get(ContextUsername) ctx := proxy.NewContextWithMetadata(c, username.(string), req.DbName) response, err := h.executeRestRequestInterceptor(ctx, c, req, func(reqCtx context.Context, req any) (any, error) { @@ -297,7 +300,7 @@ func (h *HandlersV1) createCollection(c *gin.Context) { err = merr.Error(response.(*commonpb.Status)) } if err != nil { - c.AbortWithStatusJSON(http.StatusOK, gin.H{HTTPReturnCode: merr.Code(err), HTTPReturnMessage: err.Error()}) + HTTPAbortReturn(c, http.StatusOK, gin.H{HTTPReturnCode: merr.Code(err), HTTPReturnMessage: err.Error()}) return } @@ -312,7 +315,7 @@ func (h *HandlersV1) createCollection(c *gin.Context) { err = merr.Error(statusResponse) } if err != nil { - c.AbortWithStatusJSON(http.StatusOK, gin.H{HTTPReturnCode: merr.Code(err), HTTPReturnMessage: err.Error()}) + HTTPAbortReturn(c, http.StatusOK, gin.H{HTTPReturnCode: merr.Code(err), HTTPReturnMessage: err.Error()}) return } statusResponse, err = h.proxy.LoadCollection(ctx, &milvuspb.LoadCollectionRequest{ @@ -323,17 +326,17 @@ func (h *HandlersV1) createCollection(c *gin.Context) { err = merr.Error(statusResponse) } if err != nil { - c.AbortWithStatusJSON(http.StatusOK, gin.H{HTTPReturnCode: merr.Code(err), HTTPReturnMessage: err.Error()}) + HTTPAbortReturn(c, http.StatusOK, gin.H{HTTPReturnCode: merr.Code(err), HTTPReturnMessage: err.Error()}) return } - c.JSON(http.StatusOK, gin.H{HTTPReturnCode: http.StatusOK, HTTPReturnData: gin.H{}}) + HTTPReturn(c, http.StatusOK, gin.H{HTTPReturnCode: http.StatusOK, HTTPReturnData: gin.H{}}) } func (h *HandlersV1) getCollectionDetails(c *gin.Context) { collectionName := c.Query(HTTPCollectionName) if collectionName == "" { log.Warn("high level restful api, desc collection require parameter: [collectionName], but miss") - c.AbortWithStatusJSON(http.StatusOK, gin.H{ + HTTPAbortReturn(c, http.StatusOK, gin.H{ HTTPReturnCode: merr.Code(merr.ErrMissingRequiredParameters), HTTPReturnMessage: merr.ErrMissingRequiredParameters.Error() + ", required parameters: [collectionName]", }) @@ -347,6 +350,7 @@ func (h *HandlersV1) getCollectionDetails(c *gin.Context) { DbName: dbName, CollectionName: collectionName, } + c.Set(ContextRequest, req) response, err := h.executeRestRequestInterceptor(ctx, c, req, func(reqCtx context.Context, req any) (any, error) { return h.proxy.DescribeCollection(reqCtx, req.(*milvuspb.DescribeCollectionRequest)) @@ -356,7 +360,7 @@ func (h *HandlersV1) getCollectionDetails(c *gin.Context) { err = merr.Error(response.(*milvuspb.DescribeCollectionResponse).GetStatus()) } if err != nil { - c.AbortWithStatusJSON(http.StatusOK, gin.H{HTTPReturnCode: merr.Code(err), HTTPReturnMessage: err.Error()}) + HTTPAbortReturn(c, http.StatusOK, gin.H{HTTPReturnCode: merr.Code(err), HTTPReturnMessage: err.Error()}) return } coll := response.(*milvuspb.DescribeCollectionResponse) @@ -408,7 +412,7 @@ func (h *HandlersV1) getCollectionDetails(c *gin.Context) { } else { indexDesc = printIndexes(indexResp.IndexDescriptions) } - c.JSON(http.StatusOK, gin.H{HTTPReturnCode: http.StatusOK, HTTPReturnData: gin.H{ + HTTPReturn(c, http.StatusOK, gin.H{HTTPReturnCode: http.StatusOK, HTTPReturnData: gin.H{ HTTPCollectionName: coll.CollectionName, HTTPReturnDescription: coll.Schema.Description, "fields": printFields(coll.Schema.Fields), @@ -425,7 +429,7 @@ func (h *HandlersV1) dropCollection(c *gin.Context) { } if err := c.ShouldBindWith(&httpReq, binding.JSON); err != nil { log.Warn("high level restful api, the parameter of drop collection is incorrect", zap.Any("request", httpReq), zap.Error(err)) - c.AbortWithStatusJSON(http.StatusOK, gin.H{ + HTTPAbortReturn(c, http.StatusOK, gin.H{ HTTPReturnCode: merr.Code(merr.ErrIncorrectParameterFormat), HTTPReturnMessage: merr.ErrIncorrectParameterFormat.Error() + ", error: " + err.Error(), }) @@ -433,7 +437,7 @@ func (h *HandlersV1) dropCollection(c *gin.Context) { } if httpReq.CollectionName == "" { log.Warn("high level restful api, drop collection require parameter: [collectionName], but miss") - c.AbortWithStatusJSON(http.StatusOK, gin.H{ + HTTPAbortReturn(c, http.StatusOK, gin.H{ HTTPReturnCode: merr.Code(merr.ErrMissingRequiredParameters), HTTPReturnMessage: merr.ErrMissingRequiredParameters.Error() + ", required parameters: [collectionName]", }) @@ -443,6 +447,7 @@ func (h *HandlersV1) dropCollection(c *gin.Context) { DbName: httpReq.DbName, CollectionName: httpReq.CollectionName, } + c.Set(ContextRequest, req) username, _ := c.Get(ContextUsername) ctx := proxy.NewContextWithMetadata(c, username.(string), req.DbName) response, err := h.executeRestRequestInterceptor(ctx, c, req, func(reqCtx context.Context, req any) (any, error) { @@ -451,7 +456,7 @@ func (h *HandlersV1) dropCollection(c *gin.Context) { return nil, RestRequestInterceptorErr } if !has { - c.AbortWithStatusJSON(http.StatusOK, gin.H{ + HTTPAbortReturn(c, http.StatusOK, gin.H{ HTTPReturnCode: merr.Code(merr.ErrCollectionNotFound), HTTPReturnMessage: merr.ErrCollectionNotFound.Error() + ", database: " + httpReq.DbName + ", collection: " + httpReq.CollectionName, }) @@ -466,9 +471,9 @@ func (h *HandlersV1) dropCollection(c *gin.Context) { err = merr.Error(response.(*commonpb.Status)) } if err != nil { - c.JSON(http.StatusOK, gin.H{HTTPReturnCode: merr.Code(err), HTTPReturnMessage: err.Error()}) + HTTPReturn(c, http.StatusOK, gin.H{HTTPReturnCode: merr.Code(err), HTTPReturnMessage: err.Error()}) } else { - c.JSON(http.StatusOK, gin.H{HTTPReturnCode: http.StatusOK, HTTPReturnData: gin.H{}}) + HTTPReturn(c, http.StatusOK, gin.H{HTTPReturnCode: http.StatusOK, HTTPReturnData: gin.H{}}) } } @@ -480,7 +485,7 @@ func (h *HandlersV1) query(c *gin.Context) { } if err := c.ShouldBindWith(&httpReq, binding.JSON); err != nil { log.Warn("high level restful api, the parameter of query is incorrect", zap.Any("request", httpReq), zap.Error(err)) - c.AbortWithStatusJSON(http.StatusOK, gin.H{ + HTTPAbortReturn(c, http.StatusOK, gin.H{ HTTPReturnCode: merr.Code(merr.ErrIncorrectParameterFormat), HTTPReturnMessage: merr.ErrIncorrectParameterFormat.Error() + ", error: " + err.Error(), }) @@ -488,7 +493,7 @@ func (h *HandlersV1) query(c *gin.Context) { } if httpReq.CollectionName == "" || httpReq.Filter == "" { log.Warn("high level restful api, query require parameter: [collectionName, filter], but miss") - c.AbortWithStatusJSON(http.StatusOK, gin.H{ + HTTPAbortReturn(c, http.StatusOK, gin.H{ HTTPReturnCode: merr.Code(merr.ErrMissingRequiredParameters), HTTPReturnMessage: merr.ErrMissingRequiredParameters.Error() + ", required parameters: [collectionName, filter]", }) @@ -502,6 +507,7 @@ func (h *HandlersV1) query(c *gin.Context) { GuaranteeTimestamp: BoundedTimestamp, QueryParams: []*commonpb.KeyValuePair{}, } + c.Set(ContextRequest, req) if httpReq.Offset > 0 { req.QueryParams = append(req.QueryParams, &commonpb.KeyValuePair{Key: ParamOffset, Value: strconv.FormatInt(int64(httpReq.Offset), 10)}) } @@ -520,19 +526,19 @@ func (h *HandlersV1) query(c *gin.Context) { err = merr.Error(response.(*milvuspb.QueryResults).GetStatus()) } if err != nil { - c.JSON(http.StatusOK, gin.H{HTTPReturnCode: merr.Code(err), HTTPReturnMessage: err.Error()}) + HTTPReturn(c, http.StatusOK, gin.H{HTTPReturnCode: merr.Code(err), HTTPReturnMessage: err.Error()}) } else { queryResp := response.(*milvuspb.QueryResults) allowJS, _ := strconv.ParseBool(c.Request.Header.Get(HTTPHeaderAllowInt64)) outputData, err := buildQueryResp(int64(0), queryResp.OutputFields, queryResp.FieldsData, nil, nil, allowJS) if err != nil { log.Warn("high level restful api, fail to deal with query result", zap.Any("response", response), zap.Error(err)) - c.JSON(http.StatusOK, gin.H{ + HTTPReturn(c, http.StatusOK, gin.H{ HTTPReturnCode: merr.Code(merr.ErrInvalidSearchResult), HTTPReturnMessage: merr.ErrInvalidSearchResult.Error() + ", error: " + err.Error(), }) } else { - c.JSON(http.StatusOK, gin.H{HTTPReturnCode: http.StatusOK, HTTPReturnData: outputData}) + HTTPReturn(c, http.StatusOK, gin.H{HTTPReturnCode: http.StatusOK, HTTPReturnData: outputData}) } } } @@ -544,7 +550,7 @@ func (h *HandlersV1) get(c *gin.Context) { } if err := c.ShouldBindBodyWith(&httpReq, binding.JSON); err != nil { log.Warn("high level restful api, the parameter of get is incorrect", zap.Any("request", httpReq), zap.Error(err)) - c.AbortWithStatusJSON(http.StatusOK, gin.H{ + HTTPAbortReturn(c, http.StatusOK, gin.H{ HTTPReturnCode: merr.Code(merr.ErrIncorrectParameterFormat), HTTPReturnMessage: merr.ErrIncorrectParameterFormat.Error() + ", error: " + err.Error(), }) @@ -552,7 +558,7 @@ func (h *HandlersV1) get(c *gin.Context) { } if httpReq.CollectionName == "" || httpReq.ID == nil { log.Warn("high level restful api, get require parameter: [collectionName, id], but miss") - c.AbortWithStatusJSON(http.StatusOK, gin.H{ + HTTPAbortReturn(c, http.StatusOK, gin.H{ HTTPReturnCode: merr.Code(merr.ErrMissingRequiredParameters), HTTPReturnMessage: merr.ErrMissingRequiredParameters.Error() + ", required parameters: [collectionName, id]", }) @@ -564,6 +570,7 @@ func (h *HandlersV1) get(c *gin.Context) { OutputFields: httpReq.OutputFields, GuaranteeTimestamp: BoundedTimestamp, } + c.Set(ContextRequest, req) username, _ := c.Get(ContextUsername) ctx := proxy.NewContextWithMetadata(c, username.(string), req.DbName) response, err := h.executeRestRequestInterceptor(ctx, c, req, func(reqCtx context.Context, req any) (any, error) { @@ -574,7 +581,7 @@ func (h *HandlersV1) get(c *gin.Context) { body, _ := c.Get(gin.BodyBytesKey) filter, err := checkGetPrimaryKey(collSchema, gjson.Get(string(body.([]byte)), DefaultPrimaryFieldName)) if err != nil { - c.JSON(http.StatusOK, gin.H{ + HTTPReturn(c, http.StatusOK, gin.H{ HTTPReturnCode: merr.Code(merr.ErrCheckPrimaryKey), HTTPReturnMessage: merr.ErrCheckPrimaryKey.Error() + ", error: " + err.Error(), }) @@ -591,19 +598,19 @@ func (h *HandlersV1) get(c *gin.Context) { err = merr.Error(response.(*milvuspb.QueryResults).GetStatus()) } if err != nil { - c.JSON(http.StatusOK, gin.H{HTTPReturnCode: merr.Code(err), HTTPReturnMessage: err.Error()}) + HTTPReturn(c, http.StatusOK, gin.H{HTTPReturnCode: merr.Code(err), HTTPReturnMessage: err.Error()}) } else { queryResp := response.(*milvuspb.QueryResults) allowJS, _ := strconv.ParseBool(c.Request.Header.Get(HTTPHeaderAllowInt64)) outputData, err := buildQueryResp(int64(0), queryResp.OutputFields, queryResp.FieldsData, nil, nil, allowJS) if err != nil { log.Warn("high level restful api, fail to deal with get result", zap.Any("response", response), zap.Error(err)) - c.JSON(http.StatusOK, gin.H{ + HTTPReturn(c, http.StatusOK, gin.H{ HTTPReturnCode: merr.Code(merr.ErrInvalidSearchResult), HTTPReturnMessage: merr.ErrInvalidSearchResult.Error() + ", error: " + err.Error(), }) } else { - c.JSON(http.StatusOK, gin.H{HTTPReturnCode: http.StatusOK, HTTPReturnData: outputData}) + HTTPReturn(c, http.StatusOK, gin.H{HTTPReturnCode: http.StatusOK, HTTPReturnData: outputData}) } } } @@ -614,7 +621,7 @@ func (h *HandlersV1) delete(c *gin.Context) { } if err := c.ShouldBindBodyWith(&httpReq, binding.JSON); err != nil { log.Warn("high level restful api, the parameter of delete is incorrect", zap.Any("request", httpReq), zap.Error(err)) - c.AbortWithStatusJSON(http.StatusOK, gin.H{ + HTTPAbortReturn(c, http.StatusOK, gin.H{ HTTPReturnCode: merr.Code(merr.ErrIncorrectParameterFormat), HTTPReturnMessage: merr.ErrIncorrectParameterFormat.Error() + ", error: " + err.Error(), }) @@ -622,7 +629,7 @@ func (h *HandlersV1) delete(c *gin.Context) { } if httpReq.CollectionName == "" || (httpReq.ID == nil && httpReq.Filter == "") { log.Warn("high level restful api, delete require parameter: [collectionName, id/filter], but miss") - c.AbortWithStatusJSON(http.StatusOK, gin.H{ + HTTPAbortReturn(c, http.StatusOK, gin.H{ HTTPReturnCode: merr.Code(merr.ErrMissingRequiredParameters), HTTPReturnMessage: merr.ErrMissingRequiredParameters.Error() + ", required parameters: [collectionName, id/filter]", }) @@ -632,6 +639,7 @@ func (h *HandlersV1) delete(c *gin.Context) { DbName: httpReq.DbName, CollectionName: httpReq.CollectionName, } + c.Set(ContextRequest, req) username, _ := c.Get(ContextUsername) ctx := proxy.NewContextWithMetadata(c, username.(string), req.DbName) response, err := h.executeRestRequestInterceptor(ctx, c, req, func(reqCtx context.Context, req any) (any, error) { @@ -645,7 +653,7 @@ func (h *HandlersV1) delete(c *gin.Context) { body, _ := c.Get(gin.BodyBytesKey) filter, err := checkGetPrimaryKey(collSchema, gjson.Get(string(body.([]byte)), DefaultPrimaryFieldName)) if err != nil { - c.JSON(http.StatusOK, gin.H{ + HTTPReturn(c, http.StatusOK, gin.H{ HTTPReturnCode: merr.Code(merr.ErrCheckPrimaryKey), HTTPReturnMessage: merr.ErrCheckPrimaryKey.Error() + ", error: " + err.Error(), }) @@ -662,9 +670,9 @@ func (h *HandlersV1) delete(c *gin.Context) { err = merr.Error(response.(*milvuspb.MutationResult).GetStatus()) } if err != nil { - c.JSON(http.StatusOK, gin.H{HTTPReturnCode: merr.Code(err), HTTPReturnMessage: err.Error()}) + HTTPReturn(c, http.StatusOK, gin.H{HTTPReturnCode: merr.Code(err), HTTPReturnMessage: err.Error()}) } else { - c.JSON(http.StatusOK, gin.H{HTTPReturnCode: http.StatusOK, HTTPReturnData: gin.H{}}) + HTTPReturn(c, http.StatusOK, gin.H{HTTPReturnCode: http.StatusOK, HTTPReturnData: gin.H{}}) } } @@ -678,7 +686,7 @@ func (h *HandlersV1) insert(c *gin.Context) { } if err = c.ShouldBindBodyWith(&singleInsertReq, binding.JSON); err != nil { log.Warn("high level restful api, the parameter of insert is incorrect", zap.Any("request", httpReq), zap.Error(err)) - c.AbortWithStatusJSON(http.StatusOK, gin.H{ + HTTPAbortReturn(c, http.StatusOK, gin.H{ HTTPReturnCode: merr.Code(merr.ErrIncorrectParameterFormat), HTTPReturnMessage: merr.ErrIncorrectParameterFormat.Error() + ", error: " + err.Error(), }) @@ -690,7 +698,7 @@ func (h *HandlersV1) insert(c *gin.Context) { } if httpReq.CollectionName == "" || httpReq.Data == nil { log.Warn("high level restful api, insert require parameter: [collectionName, data], but miss") - c.AbortWithStatusJSON(http.StatusOK, gin.H{ + HTTPAbortReturn(c, http.StatusOK, gin.H{ HTTPReturnCode: merr.Code(merr.ErrMissingRequiredParameters), HTTPReturnMessage: merr.ErrMissingRequiredParameters.Error() + ", required parameters: [collectionName, data]", }) @@ -701,6 +709,7 @@ func (h *HandlersV1) insert(c *gin.Context) { CollectionName: httpReq.CollectionName, NumRows: uint32(len(httpReq.Data)), } + c.Set(ContextRequest, req) username, _ := c.Get(ContextUsername) ctx := proxy.NewContextWithMetadata(c, username.(string), req.DbName) response, err := h.executeRestRequestInterceptor(ctx, c, req, func(reqCtx context.Context, req any) (any, error) { @@ -712,7 +721,7 @@ func (h *HandlersV1) insert(c *gin.Context) { err, httpReq.Data = checkAndSetData(string(body.([]byte)), collSchema) if err != nil { log.Warn("high level restful api, fail to deal with insert data", zap.Any("body", body), zap.Error(err)) - c.AbortWithStatusJSON(http.StatusOK, gin.H{ + HTTPAbortReturn(c, http.StatusOK, gin.H{ HTTPReturnCode: merr.Code(merr.ErrInvalidInsertData), HTTPReturnMessage: merr.ErrInvalidInsertData.Error() + ", error: " + err.Error(), }) @@ -722,7 +731,7 @@ func (h *HandlersV1) insert(c *gin.Context) { insertReq.FieldsData, err = anyToColumns(httpReq.Data, collSchema) if err != nil { log.Warn("high level restful api, fail to deal with insert data", zap.Any("data", httpReq.Data), zap.Error(err)) - c.AbortWithStatusJSON(http.StatusOK, gin.H{ + HTTPAbortReturn(c, http.StatusOK, gin.H{ HTTPReturnCode: merr.Code(merr.ErrInvalidInsertData), HTTPReturnMessage: merr.ErrInvalidInsertData.Error() + ", error: " + err.Error(), }) @@ -737,21 +746,21 @@ func (h *HandlersV1) insert(c *gin.Context) { err = merr.Error(response.(*milvuspb.MutationResult).GetStatus()) } if err != nil { - c.JSON(http.StatusOK, gin.H{HTTPReturnCode: merr.Code(err), HTTPReturnMessage: err.Error()}) + HTTPReturn(c, http.StatusOK, gin.H{HTTPReturnCode: merr.Code(err), HTTPReturnMessage: err.Error()}) } else { insertResp := response.(*milvuspb.MutationResult) switch insertResp.IDs.GetIdField().(type) { case *schemapb.IDs_IntId: allowJS, _ := strconv.ParseBool(c.Request.Header.Get(HTTPHeaderAllowInt64)) if allowJS { - c.JSON(http.StatusOK, gin.H{HTTPReturnCode: http.StatusOK, HTTPReturnData: gin.H{"insertCount": insertResp.InsertCnt, "insertIds": insertResp.IDs.IdField.(*schemapb.IDs_IntId).IntId.Data}}) + HTTPReturn(c, http.StatusOK, gin.H{HTTPReturnCode: http.StatusOK, HTTPReturnData: gin.H{"insertCount": insertResp.InsertCnt, "insertIds": insertResp.IDs.IdField.(*schemapb.IDs_IntId).IntId.Data}}) } else { - c.JSON(http.StatusOK, gin.H{HTTPReturnCode: http.StatusOK, HTTPReturnData: gin.H{"insertCount": insertResp.InsertCnt, "insertIds": formatInt64(insertResp.IDs.IdField.(*schemapb.IDs_IntId).IntId.Data)}}) + HTTPReturn(c, http.StatusOK, gin.H{HTTPReturnCode: http.StatusOK, HTTPReturnData: gin.H{"insertCount": insertResp.InsertCnt, "insertIds": formatInt64(insertResp.IDs.IdField.(*schemapb.IDs_IntId).IntId.Data)}}) } case *schemapb.IDs_StrId: - c.JSON(http.StatusOK, gin.H{HTTPReturnCode: http.StatusOK, HTTPReturnData: gin.H{"insertCount": insertResp.InsertCnt, "insertIds": insertResp.IDs.IdField.(*schemapb.IDs_StrId).StrId.Data}}) + HTTPReturn(c, http.StatusOK, gin.H{HTTPReturnCode: http.StatusOK, HTTPReturnData: gin.H{"insertCount": insertResp.InsertCnt, "insertIds": insertResp.IDs.IdField.(*schemapb.IDs_StrId).StrId.Data}}) default: - c.JSON(http.StatusOK, gin.H{ + HTTPReturn(c, http.StatusOK, gin.H{ HTTPReturnCode: merr.Code(merr.ErrCheckPrimaryKey), HTTPReturnMessage: merr.ErrCheckPrimaryKey.Error() + ", error: unsupported primary key data type", }) @@ -769,7 +778,7 @@ func (h *HandlersV1) upsert(c *gin.Context) { } if err = c.ShouldBindBodyWith(&singleUpsertReq, binding.JSON); err != nil { log.Warn("high level restful api, the parameter of upsert is incorrect", zap.Any("request", httpReq), zap.Error(err)) - c.AbortWithStatusJSON(http.StatusOK, gin.H{ + HTTPAbortReturn(c, http.StatusOK, gin.H{ HTTPReturnCode: merr.Code(merr.ErrIncorrectParameterFormat), HTTPReturnMessage: merr.ErrIncorrectParameterFormat.Error() + ", error: " + err.Error(), }) @@ -781,7 +790,7 @@ func (h *HandlersV1) upsert(c *gin.Context) { } if httpReq.CollectionName == "" || httpReq.Data == nil { log.Warn("high level restful api, upsert require parameter: [collectionName, data], but miss") - c.AbortWithStatusJSON(http.StatusOK, gin.H{ + HTTPAbortReturn(c, http.StatusOK, gin.H{ HTTPReturnCode: merr.Code(merr.ErrMissingRequiredParameters), HTTPReturnMessage: merr.ErrMissingRequiredParameters.Error() + ", required parameters: [collectionName, data]", }) @@ -792,6 +801,7 @@ func (h *HandlersV1) upsert(c *gin.Context) { CollectionName: httpReq.CollectionName, NumRows: uint32(len(httpReq.Data)), } + c.Set(ContextRequest, req) username, _ := c.Get(ContextUsername) ctx := proxy.NewContextWithMetadata(c, username.(string), req.DbName) response, err := h.executeRestRequestInterceptor(ctx, c, req, func(reqCtx context.Context, req any) (any, error) { @@ -802,7 +812,7 @@ func (h *HandlersV1) upsert(c *gin.Context) { for _, fieldSchema := range collSchema.Fields { if fieldSchema.IsPrimaryKey && fieldSchema.AutoID { err := merr.WrapErrParameterInvalid("autoID: false", "autoID: true", "cannot upsert an autoID collection") - c.AbortWithStatusJSON(http.StatusOK, gin.H{HTTPReturnCode: merr.Code(err), HTTPReturnMessage: err.Error()}) + HTTPAbortReturn(c, http.StatusOK, gin.H{HTTPReturnCode: merr.Code(err), HTTPReturnMessage: err.Error()}) return nil, RestRequestInterceptorErr } } @@ -810,7 +820,7 @@ func (h *HandlersV1) upsert(c *gin.Context) { err, httpReq.Data = checkAndSetData(string(body.([]byte)), collSchema) if err != nil { log.Warn("high level restful api, fail to deal with upsert data", zap.Any("body", body), zap.Error(err)) - c.AbortWithStatusJSON(http.StatusOK, gin.H{ + HTTPAbortReturn(c, http.StatusOK, gin.H{ HTTPReturnCode: merr.Code(merr.ErrInvalidInsertData), HTTPReturnMessage: merr.ErrInvalidInsertData.Error() + ", error: " + err.Error(), }) @@ -820,7 +830,7 @@ func (h *HandlersV1) upsert(c *gin.Context) { upsertReq.FieldsData, err = anyToColumns(httpReq.Data, collSchema) if err != nil { log.Warn("high level restful api, fail to deal with upsert data", zap.Any("data", httpReq.Data), zap.Error(err)) - c.AbortWithStatusJSON(http.StatusOK, gin.H{ + HTTPAbortReturn(c, http.StatusOK, gin.H{ HTTPReturnCode: merr.Code(merr.ErrInvalidInsertData), HTTPReturnMessage: merr.ErrInvalidInsertData.Error() + ", error: " + err.Error(), }) @@ -835,21 +845,21 @@ func (h *HandlersV1) upsert(c *gin.Context) { err = merr.Error(response.(*milvuspb.MutationResult).GetStatus()) } if err != nil { - c.JSON(http.StatusOK, gin.H{HTTPReturnCode: merr.Code(err), HTTPReturnMessage: err.Error()}) + HTTPReturn(c, http.StatusOK, gin.H{HTTPReturnCode: merr.Code(err), HTTPReturnMessage: err.Error()}) } else { upsertResp := response.(*milvuspb.MutationResult) switch upsertResp.IDs.GetIdField().(type) { case *schemapb.IDs_IntId: allowJS, _ := strconv.ParseBool(c.Request.Header.Get(HTTPHeaderAllowInt64)) if allowJS { - c.JSON(http.StatusOK, gin.H{HTTPReturnCode: http.StatusOK, HTTPReturnData: gin.H{"upsertCount": upsertResp.UpsertCnt, "upsertIds": upsertResp.IDs.IdField.(*schemapb.IDs_IntId).IntId.Data}}) + HTTPReturn(c, http.StatusOK, gin.H{HTTPReturnCode: http.StatusOK, HTTPReturnData: gin.H{"upsertCount": upsertResp.UpsertCnt, "upsertIds": upsertResp.IDs.IdField.(*schemapb.IDs_IntId).IntId.Data}}) } else { - c.JSON(http.StatusOK, gin.H{HTTPReturnCode: http.StatusOK, HTTPReturnData: gin.H{"upsertCount": upsertResp.UpsertCnt, "upsertIds": formatInt64(upsertResp.IDs.IdField.(*schemapb.IDs_IntId).IntId.Data)}}) + HTTPReturn(c, http.StatusOK, gin.H{HTTPReturnCode: http.StatusOK, HTTPReturnData: gin.H{"upsertCount": upsertResp.UpsertCnt, "upsertIds": formatInt64(upsertResp.IDs.IdField.(*schemapb.IDs_IntId).IntId.Data)}}) } case *schemapb.IDs_StrId: - c.JSON(http.StatusOK, gin.H{HTTPReturnCode: http.StatusOK, HTTPReturnData: gin.H{"upsertCount": upsertResp.UpsertCnt, "upsertIds": upsertResp.IDs.IdField.(*schemapb.IDs_StrId).StrId.Data}}) + HTTPReturn(c, http.StatusOK, gin.H{HTTPReturnCode: http.StatusOK, HTTPReturnData: gin.H{"upsertCount": upsertResp.UpsertCnt, "upsertIds": upsertResp.IDs.IdField.(*schemapb.IDs_StrId).StrId.Data}}) default: - c.JSON(http.StatusOK, gin.H{ + HTTPReturn(c, http.StatusOK, gin.H{ HTTPReturnCode: merr.Code(merr.ErrCheckPrimaryKey), HTTPReturnMessage: merr.ErrCheckPrimaryKey.Error() + ", error: unsupported primary key data type", }) @@ -864,7 +874,7 @@ func (h *HandlersV1) search(c *gin.Context) { } if err := c.ShouldBindWith(&httpReq, binding.JSON); err != nil { log.Warn("high level restful api, the parameter of search is incorrect", zap.Any("request", httpReq), zap.Error(err)) - c.AbortWithStatusJSON(http.StatusOK, gin.H{ + HTTPAbortReturn(c, http.StatusOK, gin.H{ HTTPReturnCode: merr.Code(merr.ErrIncorrectParameterFormat), HTTPReturnMessage: merr.ErrIncorrectParameterFormat.Error() + ", error: " + err.Error(), }) @@ -872,12 +882,24 @@ func (h *HandlersV1) search(c *gin.Context) { } if httpReq.CollectionName == "" || httpReq.Vector == nil { log.Warn("high level restful api, search require parameter: [collectionName, vector], but miss") - c.AbortWithStatusJSON(http.StatusOK, gin.H{ + HTTPAbortReturn(c, http.StatusOK, gin.H{ HTTPReturnCode: merr.Code(merr.ErrMissingRequiredParameters), HTTPReturnMessage: merr.ErrMissingRequiredParameters.Error() + ", required parameters: [collectionName, vector]", }) return } + req := &milvuspb.SearchRequest{ + DbName: httpReq.DbName, + CollectionName: httpReq.CollectionName, + Dsl: httpReq.Filter, + PlaceholderGroup: vectors2PlaceholderGroupBytes([][]float32{httpReq.Vector}), + DslType: commonpb.DslType_BoolExprV1, + OutputFields: httpReq.OutputFields, + GuaranteeTimestamp: BoundedTimestamp, + Nq: int64(1), + } + c.Set(ContextRequest, req) + params := map[string]interface{}{ // auto generated mapping "level": int(commonpb.ConsistencyLevel_Bounded), } @@ -887,7 +909,7 @@ func (h *HandlersV1) search(c *gin.Context) { if rangeFilterOk { if !radiusOk { log.Warn("high level restful api, search params invalid, because only " + ParamRangeFilter) - c.AbortWithStatusJSON(http.StatusOK, gin.H{ + HTTPAbortReturn(c, http.StatusOK, gin.H{ HTTPReturnCode: merr.Code(merr.ErrIncorrectParameterFormat), HTTPReturnMessage: merr.ErrIncorrectParameterFormat.Error() + ", error: invalid search params", }) @@ -900,23 +922,13 @@ func (h *HandlersV1) search(c *gin.Context) { } } bs, _ := json.Marshal(params) - searchParams := []*commonpb.KeyValuePair{ + req.SearchParams = []*commonpb.KeyValuePair{ {Key: common.TopKKey, Value: strconv.FormatInt(int64(httpReq.Limit), 10)}, {Key: Params, Value: string(bs)}, {Key: ParamRoundDecimal, Value: "-1"}, {Key: ParamOffset, Value: strconv.FormatInt(int64(httpReq.Offset), 10)}, } - req := &milvuspb.SearchRequest{ - DbName: httpReq.DbName, - CollectionName: httpReq.CollectionName, - Dsl: httpReq.Filter, - PlaceholderGroup: vectors2PlaceholderGroupBytes([][]float32{httpReq.Vector}), - DslType: commonpb.DslType_BoolExprV1, - OutputFields: httpReq.OutputFields, - SearchParams: searchParams, - GuaranteeTimestamp: BoundedTimestamp, - Nq: int64(1), - } + username, _ := c.Get(ContextUsername) ctx := proxy.NewContextWithMetadata(c, username.(string), req.DbName) response, err := h.executeRestRequestInterceptor(ctx, c, req, func(reqCtx context.Context, req any) (any, error) { @@ -929,22 +941,22 @@ func (h *HandlersV1) search(c *gin.Context) { err = merr.Error(response.(*milvuspb.SearchResults).GetStatus()) } if err != nil { - c.JSON(http.StatusOK, gin.H{HTTPReturnCode: merr.Code(err), HTTPReturnMessage: err.Error()}) + HTTPReturn(c, http.StatusOK, gin.H{HTTPReturnCode: merr.Code(err), HTTPReturnMessage: err.Error()}) } else { searchResp := response.(*milvuspb.SearchResults) if searchResp.Results.TopK == int64(0) { - c.JSON(http.StatusOK, gin.H{HTTPReturnCode: http.StatusOK, HTTPReturnData: []interface{}{}}) + HTTPReturn(c, http.StatusOK, gin.H{HTTPReturnCode: http.StatusOK, HTTPReturnData: []interface{}{}}) } else { allowJS, _ := strconv.ParseBool(c.Request.Header.Get(HTTPHeaderAllowInt64)) outputData, err := buildQueryResp(searchResp.Results.TopK, searchResp.Results.OutputFields, searchResp.Results.FieldsData, searchResp.Results.Ids, searchResp.Results.Scores, allowJS) if err != nil { log.Warn("high level restful api, fail to deal with search result", zap.Any("result", searchResp.Results), zap.Error(err)) - c.JSON(http.StatusOK, gin.H{ + HTTPReturn(c, http.StatusOK, gin.H{ HTTPReturnCode: merr.Code(merr.ErrInvalidSearchResult), HTTPReturnMessage: merr.ErrInvalidSearchResult.Error() + ", error: " + err.Error(), }) } else { - c.JSON(http.StatusOK, gin.H{HTTPReturnCode: http.StatusOK, HTTPReturnData: outputData}) + HTTPReturn(c, http.StatusOK, gin.H{HTTPReturnCode: http.StatusOK, HTTPReturnData: outputData}) } } } diff --git a/internal/distributed/proxy/httpserver/handler_v2.go b/internal/distributed/proxy/httpserver/handler_v2.go index 294db27a9195d..e8637a02b653f 100644 --- a/internal/distributed/proxy/httpserver/handler_v2.go +++ b/internal/distributed/proxy/httpserver/handler_v2.go @@ -153,17 +153,17 @@ func wrapperPost(newReq newReqFunc, v2 handlerFuncV2) gin.HandlerFunc { log.Warn("high level restful api, read parameters from request body fail", zap.Error(err), zap.Any("url", c.Request.URL.Path), zap.Any("request", req)) if _, ok := err.(validator.ValidationErrors); ok { - c.AbortWithStatusJSON(http.StatusOK, gin.H{ + HTTPAbortReturn(c, http.StatusOK, gin.H{ HTTPReturnCode: merr.Code(merr.ErrMissingRequiredParameters), HTTPReturnMessage: merr.ErrMissingRequiredParameters.Error() + ", error: " + err.Error(), }) } else if err == io.EOF { - c.AbortWithStatusJSON(http.StatusOK, gin.H{ + HTTPAbortReturn(c, http.StatusOK, gin.H{ HTTPReturnCode: merr.Code(merr.ErrIncorrectParameterFormat), HTTPReturnMessage: merr.ErrIncorrectParameterFormat.Error() + ", the request body should be nil, however {} is valid", }) } else { - c.AbortWithStatusJSON(http.StatusOK, gin.H{ + HTTPAbortReturn(c, http.StatusOK, gin.H{ HTTPReturnCode: merr.Code(merr.ErrIncorrectParameterFormat), HTTPReturnMessage: merr.ErrIncorrectParameterFormat.Error() + ", error: " + err.Error(), }) @@ -230,14 +230,14 @@ func checkAuthorizationV2(ctx context.Context, c *gin.Context, ignoreErr bool, r username, ok := c.Get(ContextUsername) if !ok || username.(string) == "" { if !ignoreErr { - c.JSON(http.StatusUnauthorized, gin.H{HTTPReturnCode: merr.Code(merr.ErrNeedAuthenticate), HTTPReturnMessage: merr.ErrNeedAuthenticate.Error()}) + HTTPReturn(c, http.StatusUnauthorized, gin.H{HTTPReturnCode: merr.Code(merr.ErrNeedAuthenticate), HTTPReturnMessage: merr.ErrNeedAuthenticate.Error()}) } return merr.ErrNeedAuthenticate } _, authErr := proxy.PrivilegeInterceptor(ctx, req) if authErr != nil { if !ignoreErr { - c.JSON(http.StatusForbidden, gin.H{HTTPReturnCode: merr.Code(authErr), HTTPReturnMessage: authErr.Error()}) + HTTPReturn(c, http.StatusForbidden, gin.H{HTTPReturnCode: merr.Code(authErr), HTTPReturnMessage: authErr.Error()}) } return authErr } @@ -267,7 +267,7 @@ func wrapperProxy(ctx context.Context, c *gin.Context, req any, checkAuth bool, if err != nil { log.Ctx(ctx).Warn("high level restful api, grpc call failed", zap.Error(err), zap.Any("grpcRequest", req)) if !ignoreErr { - c.AbortWithStatusJSON(http.StatusOK, gin.H{HTTPReturnCode: merr.Code(err), HTTPReturnMessage: err.Error()}) + HTTPAbortReturn(c, http.StatusOK, gin.H{HTTPReturnCode: merr.Code(err), HTTPReturnMessage: err.Error()}) } } return response, err @@ -290,7 +290,7 @@ func (h *HandlersV2) wrapperCheckDatabase(v2 handlerFuncV2) handlerFuncV2 { } } log.Ctx(ctx).Warn("high level restful api, non-exist database", zap.String("database", dbName), zap.Any("request", req)) - c.AbortWithStatusJSON(http.StatusOK, gin.H{ + HTTPAbortReturn(c, http.StatusOK, gin.H{ HTTPReturnCode: merr.Code(merr.ErrDatabaseNotFound), HTTPReturnMessage: merr.ErrDatabaseNotFound.Error() + ", database: " + dbName, }) @@ -316,7 +316,7 @@ func (h *HandlersV2) hasCollection(ctx context.Context, c *gin.Context, anyReq a } has = resp.(*milvuspb.BoolResponse).Value } - c.JSON(http.StatusOK, wrapperReturnHas(has)) + HTTPReturn(c, http.StatusOK, wrapperReturnHas(has)) return has, nil } @@ -324,11 +324,12 @@ func (h *HandlersV2) listCollections(ctx context.Context, c *gin.Context, anyReq req := &milvuspb.ShowCollectionsRequest{ DbName: dbName, } + c.Set(ContextRequest, req) resp, err := wrapperProxy(ctx, c, req, false, false, func(reqCtx context.Context, req any) (interface{}, error) { return h.proxy.ShowCollections(reqCtx, req.(*milvuspb.ShowCollectionsRequest)) }) if err == nil { - c.JSON(http.StatusOK, wrapperReturnList(resp.(*milvuspb.ShowCollectionsResponse).CollectionNames)) + HTTPReturn(c, http.StatusOK, wrapperReturnList(resp.(*milvuspb.ShowCollectionsResponse).CollectionNames)) } return resp, err } @@ -340,6 +341,7 @@ func (h *HandlersV2) getCollectionDetails(ctx context.Context, c *gin.Context, a DbName: dbName, CollectionName: collectionName, } + c.Set(ContextRequest, req) resp, err := wrapperProxy(ctx, c, req, h.checkAuth, false, func(reqCtx context.Context, req any) (any, error) { return h.proxy.DescribeCollection(reqCtx, req.(*milvuspb.DescribeCollectionRequest)) }) @@ -408,7 +410,7 @@ func (h *HandlersV2) getCollectionDetails(ctx context.Context, c *gin.Context, a if coll.Properties == nil { coll.Properties = []*commonpb.KeyValuePair{} } - c.JSON(http.StatusOK, gin.H{HTTPReturnCode: commonpb.ErrorCode_Success, HTTPReturnData: gin.H{ + HTTPReturn(c, http.StatusOK, gin.H{HTTPReturnCode: merr.Code(nil), HTTPReturnData: gin.H{ HTTPCollectionName: coll.CollectionName, HTTPCollectionID: coll.CollectionID, HTTPReturnDescription: coll.Schema.Description, @@ -432,11 +434,12 @@ func (h *HandlersV2) getCollectionStats(ctx context.Context, c *gin.Context, any DbName: dbName, CollectionName: collectionGetter.GetCollectionName(), } + c.Set(ContextRequest, req) resp, err := wrapperProxy(ctx, c, req, h.checkAuth, false, func(reqCtx context.Context, req any) (any, error) { return h.proxy.GetCollectionStatistics(reqCtx, req.(*milvuspb.GetCollectionStatisticsRequest)) }) if err == nil { - c.JSON(http.StatusOK, wrapperReturnRowCount(resp.(*milvuspb.GetCollectionStatisticsResponse).Stats)) + HTTPReturn(c, http.StatusOK, wrapperReturnRowCount(resp.(*milvuspb.GetCollectionStatisticsResponse).Stats)) } return resp, err } @@ -447,6 +450,7 @@ func (h *HandlersV2) getCollectionLoadState(ctx context.Context, c *gin.Context, DbName: dbName, CollectionName: collectionGetter.GetCollectionName(), } + c.Set(ContextRequest, req) resp, err := wrapperProxy(ctx, c, req, h.checkAuth, false, func(reqCtx context.Context, req any) (any, error) { return h.proxy.GetLoadState(reqCtx, req.(*milvuspb.GetLoadStateRequest)) }) @@ -455,10 +459,10 @@ func (h *HandlersV2) getCollectionLoadState(ctx context.Context, c *gin.Context, } if resp.(*milvuspb.GetLoadStateResponse).State == commonpb.LoadState_LoadStateNotExist { err = merr.WrapErrCollectionNotFound(req.CollectionName) - c.JSON(http.StatusOK, gin.H{HTTPReturnCode: merr.Code(err), HTTPReturnMessage: err.Error()}) + HTTPReturn(c, http.StatusOK, gin.H{HTTPReturnCode: merr.Code(err), HTTPReturnMessage: err.Error()}) return resp, err } else if resp.(*milvuspb.GetLoadStateResponse).State == commonpb.LoadState_LoadStateNotLoad { - c.JSON(http.StatusOK, gin.H{HTTPReturnCode: commonpb.ErrorCode_Success, HTTPReturnData: gin.H{ + HTTPReturn(c, http.StatusOK, gin.H{HTTPReturnCode: merr.Code(nil), HTTPReturnData: gin.H{ HTTPReturnLoadState: resp.(*milvuspb.GetLoadStateResponse).State.String(), }}) return resp, err @@ -483,7 +487,7 @@ func (h *HandlersV2) getCollectionLoadState(ctx context.Context, c *gin.Context, if progress >= 100 { state = commonpb.LoadState_LoadStateLoaded.String() } - c.JSON(http.StatusOK, gin.H{HTTPReturnCode: commonpb.ErrorCode_Success, HTTPReturnData: gin.H{ + HTTPReturn(c, http.StatusOK, gin.H{HTTPReturnCode: merr.Code(nil), HTTPReturnData: gin.H{ HTTPReturnLoadState: state, HTTPReturnLoadProgress: progress, }, HTTPReturnMessage: errMessage}) @@ -496,11 +500,12 @@ func (h *HandlersV2) dropCollection(ctx context.Context, c *gin.Context, anyReq DbName: dbName, CollectionName: getter.GetCollectionName(), } + c.Set(ContextRequest, req) resp, err := wrapperProxy(ctx, c, req, h.checkAuth, false, func(reqCtx context.Context, req any) (interface{}, error) { return h.proxy.DropCollection(reqCtx, req.(*milvuspb.DropCollectionRequest)) }) if err == nil { - c.JSON(http.StatusOK, wrapperReturnDefault()) + HTTPReturn(c, http.StatusOK, wrapperReturnDefault()) } return resp, err } @@ -513,6 +518,7 @@ func (h *HandlersV2) renameCollection(ctx context.Context, c *gin.Context, anyRe NewName: httpReq.NewCollectionName, NewDBName: httpReq.NewDbName, } + c.Set(ContextRequest, req) if req.NewDBName == "" { req.NewDBName = dbName } @@ -520,7 +526,7 @@ func (h *HandlersV2) renameCollection(ctx context.Context, c *gin.Context, anyRe return h.proxy.RenameCollection(reqCtx, req.(*milvuspb.RenameCollectionRequest)) }) if err == nil { - c.JSON(http.StatusOK, wrapperReturnDefault()) + HTTPReturn(c, http.StatusOK, wrapperReturnDefault()) } return resp, err } @@ -531,11 +537,12 @@ func (h *HandlersV2) loadCollection(ctx context.Context, c *gin.Context, anyReq DbName: dbName, CollectionName: getter.GetCollectionName(), } + c.Set(ContextRequest, req) resp, err := wrapperProxy(ctx, c, req, h.checkAuth, false, func(reqCtx context.Context, req any) (interface{}, error) { return h.proxy.LoadCollection(reqCtx, req.(*milvuspb.LoadCollectionRequest)) }) if err == nil { - c.JSON(http.StatusOK, wrapperReturnDefault()) + HTTPReturn(c, http.StatusOK, wrapperReturnDefault()) } return resp, err } @@ -546,11 +553,12 @@ func (h *HandlersV2) releaseCollection(ctx context.Context, c *gin.Context, anyR DbName: dbName, CollectionName: getter.GetCollectionName(), } + c.Set(ContextRequest, req) resp, err := wrapperProxy(ctx, c, req, h.checkAuth, false, func(reqCtx context.Context, req any) (interface{}, error) { return h.proxy.ReleaseCollection(reqCtx, req.(*milvuspb.ReleaseCollectionRequest)) }) if err == nil { - c.JSON(http.StatusOK, wrapperReturnDefault()) + HTTPReturn(c, http.StatusOK, wrapperReturnDefault()) } return resp, err } @@ -566,6 +574,7 @@ func (h *HandlersV2) query(ctx context.Context, c *gin.Context, anyReq any, dbNa QueryParams: []*commonpb.KeyValuePair{}, UseDefaultConsistency: true, } + c.Set(ContextRequest, req) if httpReq.Offset > 0 { req.QueryParams = append(req.QueryParams, &commonpb.KeyValuePair{Key: ParamOffset, Value: strconv.FormatInt(int64(httpReq.Offset), 10)}) } @@ -581,13 +590,13 @@ func (h *HandlersV2) query(ctx context.Context, c *gin.Context, anyReq any, dbNa outputData, err := buildQueryResp(int64(0), queryResp.OutputFields, queryResp.FieldsData, nil, nil, allowJS) if err != nil { log.Ctx(ctx).Warn("high level restful api, fail to deal with query result", zap.Any("response", resp), zap.Error(err)) - c.JSON(http.StatusOK, gin.H{ + HTTPReturn(c, http.StatusOK, gin.H{ HTTPReturnCode: merr.Code(merr.ErrInvalidSearchResult), HTTPReturnMessage: merr.ErrInvalidSearchResult.Error() + ", error: " + err.Error(), }) } else { - c.JSON(http.StatusOK, gin.H{ - HTTPReturnCode: commonpb.ErrorCode_Success, + HTTPReturn(c, http.StatusOK, gin.H{ + HTTPReturnCode: merr.Code(nil), HTTPReturnData: outputData, HTTPReturnCost: proxy.GetCostValue(queryResp.GetStatus()), }) @@ -605,7 +614,7 @@ func (h *HandlersV2) get(ctx context.Context, c *gin.Context, anyReq any, dbName body, _ := c.Get(gin.BodyBytesKey) filter, err := checkGetPrimaryKey(collSchema, gjson.Get(string(body.([]byte)), DefaultPrimaryFieldName)) if err != nil { - c.JSON(http.StatusOK, gin.H{ + HTTPReturn(c, http.StatusOK, gin.H{ HTTPReturnCode: merr.Code(merr.ErrCheckPrimaryKey), HTTPReturnMessage: merr.ErrCheckPrimaryKey.Error() + ", error: " + err.Error(), }) @@ -619,6 +628,7 @@ func (h *HandlersV2) get(ctx context.Context, c *gin.Context, anyReq any, dbName Expr: filter, UseDefaultConsistency: true, } + c.Set(ContextRequest, req) resp, err := wrapperProxy(ctx, c, req, h.checkAuth, false, func(reqCtx context.Context, req any) (interface{}, error) { return h.proxy.Query(reqCtx, req.(*milvuspb.QueryRequest)) }) @@ -628,13 +638,13 @@ func (h *HandlersV2) get(ctx context.Context, c *gin.Context, anyReq any, dbName outputData, err := buildQueryResp(int64(0), queryResp.OutputFields, queryResp.FieldsData, nil, nil, allowJS) if err != nil { log.Ctx(ctx).Warn("high level restful api, fail to deal with get result", zap.Any("response", resp), zap.Error(err)) - c.JSON(http.StatusOK, gin.H{ + HTTPReturn(c, http.StatusOK, gin.H{ HTTPReturnCode: merr.Code(merr.ErrInvalidSearchResult), HTTPReturnMessage: merr.ErrInvalidSearchResult.Error() + ", error: " + err.Error(), }) } else { - c.JSON(http.StatusOK, gin.H{ - HTTPReturnCode: commonpb.ErrorCode_Success, + HTTPReturn(c, http.StatusOK, gin.H{ + HTTPReturnCode: merr.Code(nil), HTTPReturnData: outputData, HTTPReturnCost: proxy.GetCostValue(queryResp.GetStatus()), }) @@ -655,11 +665,12 @@ func (h *HandlersV2) delete(ctx context.Context, c *gin.Context, anyReq any, dbN PartitionName: httpReq.PartitionName, Expr: httpReq.Filter, } + c.Set(ContextRequest, req) if req.Expr == "" { body, _ := c.Get(gin.BodyBytesKey) filter, err := checkGetPrimaryKey(collSchema, gjson.Get(string(body.([]byte)), DefaultPrimaryFieldName)) if err != nil { - c.JSON(http.StatusOK, gin.H{ + HTTPReturn(c, http.StatusOK, gin.H{ HTTPReturnCode: merr.Code(merr.ErrCheckPrimaryKey), HTTPReturnMessage: merr.ErrCheckPrimaryKey.Error() + ", error: " + err.Error(), }) @@ -671,7 +682,7 @@ func (h *HandlersV2) delete(ctx context.Context, c *gin.Context, anyReq any, dbN return h.proxy.Delete(reqCtx, req.(*milvuspb.DeleteRequest)) }) if err == nil { - c.JSON(http.StatusOK, wrapperReturnDefaultWithCost( + HTTPReturn(c, http.StatusOK, wrapperReturnDefaultWithCost( proxy.GetCostValue(resp.(*milvuspb.MutationResult).GetStatus()), )) } @@ -680,6 +691,14 @@ func (h *HandlersV2) delete(ctx context.Context, c *gin.Context, anyReq any, dbN func (h *HandlersV2) insert(ctx context.Context, c *gin.Context, anyReq any, dbName string) (interface{}, error) { httpReq := anyReq.(*CollectionDataReq) + req := &milvuspb.InsertRequest{ + DbName: dbName, + CollectionName: httpReq.CollectionName, + PartitionName: httpReq.PartitionName, + // PartitionName: "_default", + } + c.Set(ContextRequest, req) + collSchema, err := h.GetCollectionSchema(ctx, c, dbName, httpReq.CollectionName) if err != nil { return nil, err @@ -688,23 +707,18 @@ func (h *HandlersV2) insert(ctx context.Context, c *gin.Context, anyReq any, dbN err, httpReq.Data = checkAndSetData(string(body.([]byte)), collSchema) if err != nil { log.Ctx(ctx).Warn("high level restful api, fail to deal with insert data", zap.Error(err), zap.String("body", string(body.([]byte)))) - c.AbortWithStatusJSON(http.StatusOK, gin.H{ + HTTPAbortReturn(c, http.StatusOK, gin.H{ HTTPReturnCode: merr.Code(merr.ErrInvalidInsertData), HTTPReturnMessage: merr.ErrInvalidInsertData.Error() + ", error: " + err.Error(), }) return nil, err } - req := &milvuspb.InsertRequest{ - DbName: dbName, - CollectionName: httpReq.CollectionName, - PartitionName: httpReq.PartitionName, - // PartitionName: "_default", - NumRows: uint32(len(httpReq.Data)), - } + + req.NumRows = uint32(len(httpReq.Data)) req.FieldsData, err = anyToColumns(httpReq.Data, collSchema) if err != nil { log.Ctx(ctx).Warn("high level restful api, fail to deal with insert data", zap.Any("data", httpReq.Data), zap.Error(err)) - c.AbortWithStatusJSON(http.StatusOK, gin.H{ + HTTPAbortReturn(c, http.StatusOK, gin.H{ HTTPReturnCode: merr.Code(merr.ErrInvalidInsertData), HTTPReturnMessage: merr.ErrInvalidInsertData.Error() + ", error: " + err.Error(), }) @@ -720,26 +734,26 @@ func (h *HandlersV2) insert(ctx context.Context, c *gin.Context, anyReq any, dbN case *schemapb.IDs_IntId: allowJS, _ := strconv.ParseBool(c.Request.Header.Get(HTTPHeaderAllowInt64)) if allowJS { - c.JSON(http.StatusOK, gin.H{ - HTTPReturnCode: commonpb.ErrorCode_Success, + HTTPReturn(c, http.StatusOK, gin.H{ + HTTPReturnCode: merr.Code(nil), HTTPReturnData: gin.H{"insertCount": insertResp.InsertCnt, "insertIds": insertResp.IDs.IdField.(*schemapb.IDs_IntId).IntId.Data}, HTTPReturnCost: cost, }) } else { - c.JSON(http.StatusOK, gin.H{ - HTTPReturnCode: commonpb.ErrorCode_Success, + HTTPReturn(c, http.StatusOK, gin.H{ + HTTPReturnCode: merr.Code(nil), HTTPReturnData: gin.H{"insertCount": insertResp.InsertCnt, "insertIds": formatInt64(insertResp.IDs.IdField.(*schemapb.IDs_IntId).IntId.Data)}, HTTPReturnCost: cost, }) } case *schemapb.IDs_StrId: - c.JSON(http.StatusOK, gin.H{ - HTTPReturnCode: commonpb.ErrorCode_Success, + HTTPReturn(c, http.StatusOK, gin.H{ + HTTPReturnCode: merr.Code(nil), HTTPReturnData: gin.H{"insertCount": insertResp.InsertCnt, "insertIds": insertResp.IDs.IdField.(*schemapb.IDs_StrId).StrId.Data}, HTTPReturnCost: cost, }) default: - c.JSON(http.StatusOK, gin.H{ + HTTPReturn(c, http.StatusOK, gin.H{ HTTPReturnCode: merr.Code(merr.ErrCheckPrimaryKey), HTTPReturnMessage: merr.ErrCheckPrimaryKey.Error() + ", error: unsupported primary key data type", }) @@ -750,36 +764,39 @@ func (h *HandlersV2) insert(ctx context.Context, c *gin.Context, anyReq any, dbN func (h *HandlersV2) upsert(ctx context.Context, c *gin.Context, anyReq any, dbName string) (interface{}, error) { httpReq := anyReq.(*CollectionDataReq) + req := &milvuspb.UpsertRequest{ + DbName: dbName, + CollectionName: httpReq.CollectionName, + PartitionName: httpReq.PartitionName, + // PartitionName: "_default", + } + c.Set(ContextRequest, req) + collSchema, err := h.GetCollectionSchema(ctx, c, dbName, httpReq.CollectionName) if err != nil { return nil, err } if collSchema.AutoID { err := merr.WrapErrParameterInvalid("autoID: false", "autoID: true", "cannot upsert an autoID collection") - c.AbortWithStatusJSON(http.StatusOK, gin.H{HTTPReturnCode: merr.Code(err), HTTPReturnMessage: err.Error()}) + HTTPAbortReturn(c, http.StatusOK, gin.H{HTTPReturnCode: merr.Code(err), HTTPReturnMessage: err.Error()}) return nil, err } body, _ := c.Get(gin.BodyBytesKey) err, httpReq.Data = checkAndSetData(string(body.([]byte)), collSchema) if err != nil { log.Ctx(ctx).Warn("high level restful api, fail to deal with upsert data", zap.Any("body", body), zap.Error(err)) - c.AbortWithStatusJSON(http.StatusOK, gin.H{ + HTTPAbortReturn(c, http.StatusOK, gin.H{ HTTPReturnCode: merr.Code(merr.ErrInvalidInsertData), HTTPReturnMessage: merr.ErrInvalidInsertData.Error() + ", error: " + err.Error(), }) return nil, err } - req := &milvuspb.UpsertRequest{ - DbName: dbName, - CollectionName: httpReq.CollectionName, - PartitionName: httpReq.PartitionName, - // PartitionName: "_default", - NumRows: uint32(len(httpReq.Data)), - } + + req.NumRows = uint32(len(httpReq.Data)) req.FieldsData, err = anyToColumns(httpReq.Data, collSchema) if err != nil { log.Ctx(ctx).Warn("high level restful api, fail to deal with upsert data", zap.Any("data", httpReq.Data), zap.Error(err)) - c.AbortWithStatusJSON(http.StatusOK, gin.H{ + HTTPAbortReturn(c, http.StatusOK, gin.H{ HTTPReturnCode: merr.Code(merr.ErrInvalidInsertData), HTTPReturnMessage: merr.ErrInvalidInsertData.Error() + ", error: " + err.Error(), }) @@ -795,26 +812,26 @@ func (h *HandlersV2) upsert(ctx context.Context, c *gin.Context, anyReq any, dbN case *schemapb.IDs_IntId: allowJS, _ := strconv.ParseBool(c.Request.Header.Get(HTTPHeaderAllowInt64)) if allowJS { - c.JSON(http.StatusOK, gin.H{ - HTTPReturnCode: commonpb.ErrorCode_Success, + HTTPReturn(c, http.StatusOK, gin.H{ + HTTPReturnCode: merr.Code(nil), HTTPReturnData: gin.H{"upsertCount": upsertResp.UpsertCnt, "upsertIds": upsertResp.IDs.IdField.(*schemapb.IDs_IntId).IntId.Data}, HTTPReturnCost: cost, }) } else { - c.JSON(http.StatusOK, gin.H{ - HTTPReturnCode: commonpb.ErrorCode_Success, + HTTPReturn(c, http.StatusOK, gin.H{ + HTTPReturnCode: merr.Code(nil), HTTPReturnData: gin.H{"upsertCount": upsertResp.UpsertCnt, "upsertIds": formatInt64(upsertResp.IDs.IdField.(*schemapb.IDs_IntId).IntId.Data)}, HTTPReturnCost: cost, }) } case *schemapb.IDs_StrId: - c.JSON(http.StatusOK, gin.H{ - HTTPReturnCode: commonpb.ErrorCode_Success, + HTTPReturn(c, http.StatusOK, gin.H{ + HTTPReturnCode: merr.Code(nil), HTTPReturnData: gin.H{"upsertCount": upsertResp.UpsertCnt, "upsertIds": upsertResp.IDs.IdField.(*schemapb.IDs_StrId).StrId.Data}, HTTPReturnCost: cost, }) default: - c.JSON(http.StatusOK, gin.H{ + HTTPReturn(c, http.StatusOK, gin.H{ HTTPReturnCode: merr.Code(merr.ErrCheckPrimaryKey), HTTPReturnMessage: merr.ErrCheckPrimaryKey.Error() + ", error: unsupported primary key data type", }) @@ -873,7 +890,7 @@ func generateSearchParams(ctx context.Context, c *gin.Context, reqParams map[str if rangeFilterOk { if !radiusOk { log.Ctx(ctx).Warn("high level restful api, search params invalid, because only " + ParamRangeFilter) - c.AbortWithStatusJSON(http.StatusOK, gin.H{ + HTTPAbortReturn(c, http.StatusOK, gin.H{ HTTPReturnCode: merr.Code(merr.ErrIncorrectParameterFormat), HTTPReturnMessage: merr.ErrIncorrectParameterFormat.Error() + ", error: invalid search params", }) @@ -894,6 +911,17 @@ func generateSearchParams(ctx context.Context, c *gin.Context, reqParams map[str func (h *HandlersV2) search(ctx context.Context, c *gin.Context, anyReq any, dbName string) (interface{}, error) { httpReq := anyReq.(*SearchReqV2) + req := &milvuspb.SearchRequest{ + DbName: dbName, + CollectionName: httpReq.CollectionName, + Dsl: httpReq.Filter, + DslType: commonpb.DslType_BoolExprV1, + OutputFields: httpReq.OutputFields, + PartitionNames: httpReq.PartitionNames, + UseDefaultConsistency: true, + } + c.Set(ContextRequest, req) + collSchema, err := h.GetCollectionSchema(ctx, c, dbName, httpReq.CollectionName) if err != nil { return nil, err @@ -911,23 +939,14 @@ func (h *HandlersV2) search(ctx context.Context, c *gin.Context, anyReq any, dbN placeholderGroup, err := generatePlaceholderGroup(ctx, string(body.([]byte)), collSchema, httpReq.AnnsField) if err != nil { log.Ctx(ctx).Warn("high level restful api, search with vector invalid", zap.Error(err)) - c.AbortWithStatusJSON(http.StatusOK, gin.H{ + HTTPAbortReturn(c, http.StatusOK, gin.H{ HTTPReturnCode: merr.Code(merr.ErrIncorrectParameterFormat), HTTPReturnMessage: merr.ErrIncorrectParameterFormat.Error() + ", error: " + err.Error(), }) return nil, err } - req := &milvuspb.SearchRequest{ - DbName: dbName, - CollectionName: httpReq.CollectionName, - Dsl: httpReq.Filter, - PlaceholderGroup: placeholderGroup, - DslType: commonpb.DslType_BoolExprV1, - OutputFields: httpReq.OutputFields, - PartitionNames: httpReq.PartitionNames, - SearchParams: searchParams, - UseDefaultConsistency: true, - } + req.SearchParams = searchParams + req.PlaceholderGroup = placeholderGroup resp, err := wrapperProxy(ctx, c, req, h.checkAuth, false, func(reqCtx context.Context, req any) (interface{}, error) { return h.proxy.Search(reqCtx, req.(*milvuspb.SearchRequest)) }) @@ -935,18 +954,18 @@ func (h *HandlersV2) search(ctx context.Context, c *gin.Context, anyReq any, dbN searchResp := resp.(*milvuspb.SearchResults) cost := proxy.GetCostValue(searchResp.GetStatus()) if searchResp.Results.TopK == int64(0) { - c.JSON(http.StatusOK, gin.H{HTTPReturnCode: commonpb.ErrorCode_Success, HTTPReturnData: []interface{}{}, HTTPReturnCost: cost}) + HTTPReturn(c, http.StatusOK, gin.H{HTTPReturnCode: merr.Code(nil), HTTPReturnData: []interface{}{}, HTTPReturnCost: cost}) } else { allowJS, _ := strconv.ParseBool(c.Request.Header.Get(HTTPHeaderAllowInt64)) outputData, err := buildQueryResp(0, searchResp.Results.OutputFields, searchResp.Results.FieldsData, searchResp.Results.Ids, searchResp.Results.Scores, allowJS) if err != nil { log.Ctx(ctx).Warn("high level restful api, fail to deal with search result", zap.Any("result", searchResp.Results), zap.Error(err)) - c.JSON(http.StatusOK, gin.H{ + HTTPReturn(c, http.StatusOK, gin.H{ HTTPReturnCode: merr.Code(merr.ErrInvalidSearchResult), HTTPReturnMessage: merr.ErrInvalidSearchResult.Error() + ", error: " + err.Error(), }) } else { - c.JSON(http.StatusOK, gin.H{HTTPReturnCode: commonpb.ErrorCode_Success, HTTPReturnData: outputData, HTTPReturnCost: cost}) + HTTPReturn(c, http.StatusOK, gin.H{HTTPReturnCode: merr.Code(nil), HTTPReturnData: outputData, HTTPReturnCost: cost}) } } } @@ -961,6 +980,8 @@ func (h *HandlersV2) advancedSearch(ctx context.Context, c *gin.Context, anyReq Requests: []*milvuspb.SearchRequest{}, OutputFields: httpReq.OutputFields, } + c.Set(ContextRequest, req) + collSchema, err := h.GetCollectionSchema(ctx, c, dbName, httpReq.CollectionName) if err != nil { return nil, err @@ -980,7 +1001,7 @@ func (h *HandlersV2) advancedSearch(ctx context.Context, c *gin.Context, anyReq placeholderGroup, err := generatePlaceholderGroup(ctx, searchArray[i].Raw, collSchema, subReq.AnnsField) if err != nil { log.Ctx(ctx).Warn("high level restful api, search with vector invalid", zap.Error(err)) - c.AbortWithStatusJSON(http.StatusOK, gin.H{ + HTTPAbortReturn(c, http.StatusOK, gin.H{ HTTPReturnCode: merr.Code(merr.ErrIncorrectParameterFormat), HTTPReturnMessage: merr.ErrIncorrectParameterFormat.Error() + ", error: " + err.Error(), }) @@ -1013,18 +1034,18 @@ func (h *HandlersV2) advancedSearch(ctx context.Context, c *gin.Context, anyReq searchResp := resp.(*milvuspb.SearchResults) cost := proxy.GetCostValue(searchResp.GetStatus()) if searchResp.Results.TopK == int64(0) { - c.JSON(http.StatusOK, gin.H{HTTPReturnCode: commonpb.ErrorCode_Success, HTTPReturnData: []interface{}{}, HTTPReturnCost: cost}) + HTTPReturn(c, http.StatusOK, gin.H{HTTPReturnCode: merr.Code(nil), HTTPReturnData: []interface{}{}, HTTPReturnCost: cost}) } else { allowJS, _ := strconv.ParseBool(c.Request.Header.Get(HTTPHeaderAllowInt64)) outputData, err := buildQueryResp(0, searchResp.Results.OutputFields, searchResp.Results.FieldsData, searchResp.Results.Ids, searchResp.Results.Scores, allowJS) if err != nil { log.Ctx(ctx).Warn("high level restful api, fail to deal with search result", zap.Any("result", searchResp.Results), zap.Error(err)) - c.JSON(http.StatusOK, gin.H{ + HTTPReturn(c, http.StatusOK, gin.H{ HTTPReturnCode: merr.Code(merr.ErrInvalidSearchResult), HTTPReturnMessage: merr.ErrInvalidSearchResult.Error() + ", error: " + err.Error(), }) } else { - c.JSON(http.StatusOK, gin.H{HTTPReturnCode: commonpb.ErrorCode_Success, HTTPReturnData: outputData, HTTPReturnCost: cost}) + HTTPReturn(c, http.StatusOK, gin.H{HTTPReturnCode: merr.Code(nil), HTTPReturnData: outputData, HTTPReturnCost: cost}) } } } @@ -1033,6 +1054,13 @@ func (h *HandlersV2) advancedSearch(ctx context.Context, c *gin.Context, anyReq func (h *HandlersV2) createCollection(ctx context.Context, c *gin.Context, anyReq any, dbName string) (interface{}, error) { httpReq := anyReq.(*CollectionReq) + req := &milvuspb.CreateCollectionRequest{ + DbName: dbName, + CollectionName: httpReq.CollectionName, + Properties: []*commonpb.KeyValuePair{}, + } + c.Set(ContextRequest, req) + var schema []byte var err error fieldNames := map[string]bool{} @@ -1042,7 +1070,7 @@ func (h *HandlersV2) createCollection(ctx context.Context, c *gin.Context, anyRe err := merr.WrapErrParameterInvalid("collectionName & dimension", "collectionName", "dimension is required for quickly create collection(default metric type: "+DefaultMetricType+")") log.Ctx(ctx).Warn("high level restful api, quickly create collection fail", zap.Error(err), zap.Any("request", anyReq)) - c.AbortWithStatusJSON(http.StatusOK, gin.H{ + HTTPAbortReturn(c, http.StatusOK, gin.H{ HTTPReturnCode: merr.Code(err), HTTPReturnMessage: err.Error(), }) @@ -1064,7 +1092,7 @@ func (h *HandlersV2) createCollection(ctx context.Context, c *gin.Context, anyRe err := merr.WrapErrParameterInvalid("Int64, Varchar", httpReq.IDType, "idType can only be [Int64, VarChar], default: Int64") log.Ctx(ctx).Warn("high level restful api, quickly create collection fail", zap.Error(err), zap.Any("request", anyReq)) - c.AbortWithStatusJSON(http.StatusOK, gin.H{ + HTTPAbortReturn(c, http.StatusOK, gin.H{ HTTPReturnCode: merr.Code(err), HTTPReturnMessage: err.Error(), }) @@ -1120,7 +1148,7 @@ func (h *HandlersV2) createCollection(ctx context.Context, c *gin.Context, anyRe fieldDataType, ok := schemapb.DataType_value[field.DataType] if !ok { log.Ctx(ctx).Warn("field's data type is invalid(case sensitive).", zap.Any("fieldDataType", field.DataType), zap.Any("field", field)) - c.AbortWithStatusJSON(http.StatusOK, gin.H{ + HTTPAbortReturn(c, http.StatusOK, gin.H{ HTTPReturnCode: merr.Code(merr.ErrParameterInvalid), HTTPReturnMessage: merr.ErrParameterInvalid.Error() + ", data type " + field.DataType + " is invalid(case sensitive).", }) @@ -1137,7 +1165,7 @@ func (h *HandlersV2) createCollection(ctx context.Context, c *gin.Context, anyRe if dataType == schemapb.DataType_Array { if _, ok := schemapb.DataType_value[field.ElementDataType]; !ok { log.Ctx(ctx).Warn("element's data type is invalid(case sensitive).", zap.Any("elementDataType", field.ElementDataType), zap.Any("field", field)) - c.AbortWithStatusJSON(http.StatusOK, gin.H{ + HTTPAbortReturn(c, http.StatusOK, gin.H{ HTTPReturnCode: merr.Code(merr.ErrParameterInvalid), HTTPReturnMessage: merr.ErrParameterInvalid.Error() + ", element data type " + field.ElementDataType + " is invalid(case sensitive).", }) @@ -1166,18 +1194,22 @@ func (h *HandlersV2) createCollection(ctx context.Context, c *gin.Context, anyRe } if err != nil { log.Ctx(ctx).Warn("high level restful api, marshal collection schema fail", zap.Error(err), zap.Any("request", anyReq)) - c.AbortWithStatusJSON(http.StatusOK, gin.H{ + HTTPAbortReturn(c, http.StatusOK, gin.H{ HTTPReturnCode: merr.Code(merr.ErrMarshalCollectionSchema), HTTPReturnMessage: merr.ErrMarshalCollectionSchema.Error() + ", error: " + err.Error(), }) return nil, err } + req.Schema = schema + shardsNum := int32(ShardNumDefault) if shardsNumStr, ok := httpReq.Params["shardsNum"]; ok { if shards, err := strconv.ParseInt(fmt.Sprintf("%v", shardsNumStr), 10, 64); err == nil { shardsNum = int32(shards) } } + req.ShardsNum = shardsNum + consistencyLevel := commonpb.ConsistencyLevel_Bounded if _, ok := httpReq.Params["consistencyLevel"]; ok { if level, ok := commonpb.ConsistencyLevel_value[fmt.Sprintf("%s", httpReq.Params["consistencyLevel"])]; ok { @@ -1186,21 +1218,15 @@ func (h *HandlersV2) createCollection(ctx context.Context, c *gin.Context, anyRe err := merr.WrapErrParameterInvalid("Strong, Session, Bounded, Eventually, Customized", httpReq.Params["consistencyLevel"], "consistencyLevel can only be [Strong, Session, Bounded, Eventually, Customized], default: Bounded") log.Ctx(ctx).Warn("high level restful api, create collection fail", zap.Error(err), zap.Any("request", anyReq)) - c.AbortWithStatusJSON(http.StatusOK, gin.H{ + HTTPAbortReturn(c, http.StatusOK, gin.H{ HTTPReturnCode: merr.Code(err), HTTPReturnMessage: err.Error(), }) return nil, err } } - req := &milvuspb.CreateCollectionRequest{ - DbName: dbName, - CollectionName: httpReq.CollectionName, - Schema: schema, - ShardsNum: shardsNum, - ConsistencyLevel: consistencyLevel, - Properties: []*commonpb.KeyValuePair{}, - } + req.ConsistencyLevel = consistencyLevel + if partitionsNum > 0 { req.NumPartitions = partitionsNum } @@ -1235,12 +1261,12 @@ func (h *HandlersV2) createCollection(ctx context.Context, c *gin.Context, anyRe } } else { if len(httpReq.IndexParams) == 0 { - c.JSON(http.StatusOK, wrapperReturnDefault()) + HTTPReturn(c, http.StatusOK, wrapperReturnDefault()) return nil, nil } for _, indexParam := range httpReq.IndexParams { if _, ok := fieldNames[indexParam.FieldName]; !ok { - c.AbortWithStatusJSON(http.StatusOK, gin.H{ + HTTPAbortReturn(c, http.StatusOK, gin.H{ HTTPReturnCode: merr.Code(merr.ErrMissingRequiredParameters), HTTPReturnMessage: merr.ErrMissingRequiredParameters.Error() + ", error: `" + indexParam.FieldName + "` hasn't defined in schema", }) @@ -1272,7 +1298,7 @@ func (h *HandlersV2) createCollection(ctx context.Context, c *gin.Context, anyRe return h.proxy.LoadCollection(ctx, req.(*milvuspb.LoadCollectionRequest)) }) if err == nil { - c.JSON(http.StatusOK, wrapperReturnDefault()) + HTTPReturn(c, http.StatusOK, wrapperReturnDefault()) } return statusResponse, err } @@ -1283,11 +1309,13 @@ func (h *HandlersV2) listPartitions(ctx context.Context, c *gin.Context, anyReq DbName: dbName, CollectionName: collectionGetter.GetCollectionName(), } + c.Set(ContextRequest, req) + resp, err := wrapperProxy(ctx, c, req, h.checkAuth, false, func(reqCtx context.Context, req any) (interface{}, error) { return h.proxy.ShowPartitions(reqCtx, req.(*milvuspb.ShowPartitionsRequest)) }) if err == nil { - c.JSON(http.StatusOK, wrapperReturnList(resp.(*milvuspb.ShowPartitionsResponse).PartitionNames)) + HTTPReturn(c, http.StatusOK, wrapperReturnList(resp.(*milvuspb.ShowPartitionsResponse).PartitionNames)) } return resp, err } @@ -1300,11 +1328,12 @@ func (h *HandlersV2) hasPartitions(ctx context.Context, c *gin.Context, anyReq a CollectionName: collectionGetter.GetCollectionName(), PartitionName: partitionGetter.GetPartitionName(), } + c.Set(ContextRequest, req) resp, err := wrapperProxy(ctx, c, req, h.checkAuth, false, func(reqCtx context.Context, req any) (interface{}, error) { return h.proxy.HasPartition(reqCtx, req.(*milvuspb.HasPartitionRequest)) }) if err == nil { - c.JSON(http.StatusOK, wrapperReturnHas(resp.(*milvuspb.BoolResponse).Value)) + HTTPReturn(c, http.StatusOK, wrapperReturnHas(resp.(*milvuspb.BoolResponse).Value)) } return resp, err } @@ -1319,11 +1348,12 @@ func (h *HandlersV2) statsPartition(ctx context.Context, c *gin.Context, anyReq CollectionName: collectionGetter.GetCollectionName(), PartitionName: partitionGetter.GetPartitionName(), } + c.Set(ContextRequest, req) resp, err := wrapperProxy(ctx, c, req, h.checkAuth, false, func(reqCtx context.Context, req any) (interface{}, error) { return h.proxy.GetPartitionStatistics(reqCtx, req.(*milvuspb.GetPartitionStatisticsRequest)) }) if err == nil { - c.JSON(http.StatusOK, wrapperReturnRowCount(resp.(*milvuspb.GetPartitionStatisticsResponse).Stats)) + HTTPReturn(c, http.StatusOK, wrapperReturnRowCount(resp.(*milvuspb.GetPartitionStatisticsResponse).Stats)) } return resp, err } @@ -1336,11 +1366,12 @@ func (h *HandlersV2) createPartition(ctx context.Context, c *gin.Context, anyReq CollectionName: collectionGetter.GetCollectionName(), PartitionName: partitionGetter.GetPartitionName(), } + c.Set(ContextRequest, req) resp, err := wrapperProxy(ctx, c, req, h.checkAuth, false, func(reqCtx context.Context, req any) (interface{}, error) { return h.proxy.CreatePartition(reqCtx, req.(*milvuspb.CreatePartitionRequest)) }) if err == nil { - c.JSON(http.StatusOK, wrapperReturnDefault()) + HTTPReturn(c, http.StatusOK, wrapperReturnDefault()) } return resp, err } @@ -1353,11 +1384,12 @@ func (h *HandlersV2) dropPartition(ctx context.Context, c *gin.Context, anyReq a CollectionName: collectionGetter.GetCollectionName(), PartitionName: partitionGetter.GetPartitionName(), } + c.Set(ContextRequest, req) resp, err := wrapperProxy(ctx, c, req, h.checkAuth, false, func(reqCtx context.Context, req any) (interface{}, error) { return h.proxy.DropPartition(reqCtx, req.(*milvuspb.DropPartitionRequest)) }) if err == nil { - c.JSON(http.StatusOK, wrapperReturnDefault()) + HTTPReturn(c, http.StatusOK, wrapperReturnDefault()) } return resp, err } @@ -1369,11 +1401,12 @@ func (h *HandlersV2) loadPartitions(ctx context.Context, c *gin.Context, anyReq CollectionName: httpReq.CollectionName, PartitionNames: httpReq.PartitionNames, } + c.Set(ContextRequest, req) resp, err := wrapperProxy(ctx, c, req, h.checkAuth, false, func(reqCtx context.Context, req any) (interface{}, error) { return h.proxy.LoadPartitions(reqCtx, req.(*milvuspb.LoadPartitionsRequest)) }) if err == nil { - c.JSON(http.StatusOK, wrapperReturnDefault()) + HTTPReturn(c, http.StatusOK, wrapperReturnDefault()) } return resp, err } @@ -1385,22 +1418,24 @@ func (h *HandlersV2) releasePartitions(ctx context.Context, c *gin.Context, anyR CollectionName: httpReq.CollectionName, PartitionNames: httpReq.PartitionNames, } + c.Set(ContextRequest, req) resp, err := wrapperProxy(ctx, c, req, h.checkAuth, false, func(reqCtx context.Context, req any) (interface{}, error) { return h.proxy.ReleasePartitions(reqCtx, req.(*milvuspb.ReleasePartitionsRequest)) }) if err == nil { - c.JSON(http.StatusOK, wrapperReturnDefault()) + HTTPReturn(c, http.StatusOK, wrapperReturnDefault()) } return resp, err } func (h *HandlersV2) listUsers(ctx context.Context, c *gin.Context, anyReq any, dbName string) (interface{}, error) { req := &milvuspb.ListCredUsersRequest{} + c.Set(ContextRequest, req) resp, err := wrapperProxy(ctx, c, req, h.checkAuth, false, func(reqCtx context.Context, req any) (interface{}, error) { return h.proxy.ListCredUsers(reqCtx, req.(*milvuspb.ListCredUsersRequest)) }) if err == nil { - c.JSON(http.StatusOK, wrapperReturnList(resp.(*milvuspb.ListCredUsersResponse).Usernames)) + HTTPReturn(c, http.StatusOK, wrapperReturnList(resp.(*milvuspb.ListCredUsersResponse).Usernames)) } return resp, err } @@ -1414,6 +1449,8 @@ func (h *HandlersV2) describeUser(ctx context.Context, c *gin.Context, anyReq an }, IncludeRoleInfo: true, } + c.Set(ContextRequest, req) + resp, err := wrapperProxy(ctx, c, req, h.checkAuth, false, func(reqCtx context.Context, req any) (interface{}, error) { return h.proxy.SelectUser(reqCtx, req.(*milvuspb.SelectUserRequest)) }) @@ -1426,7 +1463,7 @@ func (h *HandlersV2) describeUser(ctx context.Context, c *gin.Context, anyReq an } } } - c.JSON(http.StatusOK, wrapperReturnList(roleNames)) + HTTPReturn(c, http.StatusOK, wrapperReturnList(roleNames)) } return resp, err } @@ -1441,7 +1478,7 @@ func (h *HandlersV2) createUser(ctx context.Context, c *gin.Context, anyReq any, return h.proxy.CreateCredential(reqCtx, req.(*milvuspb.CreateCredentialRequest)) }) if err == nil { - c.JSON(http.StatusOK, wrapperReturnDefault()) + HTTPReturn(c, http.StatusOK, wrapperReturnDefault()) } return resp, err } @@ -1457,7 +1494,7 @@ func (h *HandlersV2) updateUser(ctx context.Context, c *gin.Context, anyReq any, return h.proxy.UpdateCredential(reqCtx, req.(*milvuspb.UpdateCredentialRequest)) }) if err == nil { - c.JSON(http.StatusOK, wrapperReturnDefault()) + HTTPReturn(c, http.StatusOK, wrapperReturnDefault()) } return resp, err } @@ -1471,7 +1508,7 @@ func (h *HandlersV2) dropUser(ctx context.Context, c *gin.Context, anyReq any, d return h.proxy.DeleteCredential(reqCtx, req.(*milvuspb.DeleteCredentialRequest)) }) if err == nil { - c.JSON(http.StatusOK, wrapperReturnDefault()) + HTTPReturn(c, http.StatusOK, wrapperReturnDefault()) } return resp, err } @@ -1486,7 +1523,7 @@ func (h *HandlersV2) operateRoleToUser(ctx context.Context, c *gin.Context, user return h.proxy.OperateUserRole(reqCtx, req.(*milvuspb.OperateUserRoleRequest)) }) if err == nil { - c.JSON(http.StatusOK, wrapperReturnDefault()) + HTTPReturn(c, http.StatusOK, wrapperReturnDefault()) } return resp, err } @@ -1509,7 +1546,7 @@ func (h *HandlersV2) listRoles(ctx context.Context, c *gin.Context, anyReq any, for _, role := range resp.(*milvuspb.SelectRoleResponse).Results { roleNames = append(roleNames, role.Role.Name) } - c.JSON(http.StatusOK, wrapperReturnList(roleNames)) + HTTPReturn(c, http.StatusOK, wrapperReturnList(roleNames)) } return resp, err } @@ -1534,7 +1571,7 @@ func (h *HandlersV2) describeRole(ctx context.Context, c *gin.Context, anyReq an } privileges = append(privileges, privilege) } - c.JSON(http.StatusOK, gin.H{HTTPReturnCode: commonpb.ErrorCode_Success, HTTPReturnData: privileges}) + HTTPReturn(c, http.StatusOK, gin.H{HTTPReturnCode: merr.Code(nil), HTTPReturnData: privileges}) } return resp, err } @@ -1548,7 +1585,7 @@ func (h *HandlersV2) createRole(ctx context.Context, c *gin.Context, anyReq any, return h.proxy.CreateRole(reqCtx, req.(*milvuspb.CreateRoleRequest)) }) if err == nil { - c.JSON(http.StatusOK, wrapperReturnDefault()) + HTTPReturn(c, http.StatusOK, wrapperReturnDefault()) } return resp, err } @@ -1562,7 +1599,7 @@ func (h *HandlersV2) dropRole(ctx context.Context, c *gin.Context, anyReq any, d return h.proxy.DropRole(reqCtx, req.(*milvuspb.DropRoleRequest)) }) if err == nil { - c.JSON(http.StatusOK, wrapperReturnDefault()) + HTTPReturn(c, http.StatusOK, wrapperReturnDefault()) } return resp, err } @@ -1584,7 +1621,7 @@ func (h *HandlersV2) operatePrivilegeToRole(ctx context.Context, c *gin.Context, return h.proxy.OperatePrivilege(reqCtx, req.(*milvuspb.OperatePrivilegeRequest)) }) if err == nil { - c.JSON(http.StatusOK, wrapperReturnDefault()) + HTTPReturn(c, http.StatusOK, wrapperReturnDefault()) } return resp, err } @@ -1604,6 +1641,8 @@ func (h *HandlersV2) listIndexes(ctx context.Context, c *gin.Context, anyReq any DbName: dbName, CollectionName: collectionGetter.GetCollectionName(), } + c.Set(ContextRequest, req) + resp, err := wrapperProxy(ctx, c, req, h.checkAuth, false, func(reqCtx context.Context, req any) (any, error) { resp, err := h.proxy.DescribeIndex(reqCtx, req.(*milvuspb.DescribeIndexRequest)) if errors.Is(err, merr.ErrIndexNotFound) { @@ -1624,7 +1663,7 @@ func (h *HandlersV2) listIndexes(ctx context.Context, c *gin.Context, anyReq any for _, index := range resp.(*milvuspb.DescribeIndexResponse).IndexDescriptions { indexNames = append(indexNames, index.IndexName) } - c.JSON(http.StatusOK, wrapperReturnList(indexNames)) + HTTPReturn(c, http.StatusOK, wrapperReturnList(indexNames)) return resp, err } @@ -1636,6 +1675,8 @@ func (h *HandlersV2) describeIndex(ctx context.Context, c *gin.Context, anyReq a CollectionName: collectionGetter.GetCollectionName(), IndexName: indexGetter.GetIndexName(), } + c.Set(ContextRequest, req) + resp, err := wrapperProxy(ctx, c, req, h.checkAuth, false, func(reqCtx context.Context, req any) (interface{}, error) { return h.proxy.DescribeIndex(reqCtx, req.(*milvuspb.DescribeIndexRequest)) }) @@ -1664,7 +1705,7 @@ func (h *HandlersV2) describeIndex(ctx context.Context, c *gin.Context, anyReq a } indexInfos = append(indexInfos, indexInfo) } - c.JSON(http.StatusOK, gin.H{HTTPReturnCode: commonpb.ErrorCode_Success, HTTPReturnData: indexInfos}) + HTTPReturn(c, http.StatusOK, gin.H{HTTPReturnCode: merr.Code(nil), HTTPReturnData: indexInfos}) } return resp, err } @@ -1681,6 +1722,8 @@ func (h *HandlersV2) createIndex(ctx context.Context, c *gin.Context, anyReq any {Key: common.MetricTypeKey, Value: indexParam.MetricType}, }, } + c.Set(ContextRequest, req) + for key, value := range indexParam.Params { req.ExtraParams = append(req.ExtraParams, &commonpb.KeyValuePair{Key: key, Value: fmt.Sprintf("%v", value)}) } @@ -1691,7 +1734,7 @@ func (h *HandlersV2) createIndex(ctx context.Context, c *gin.Context, anyReq any return resp, err } } - c.JSON(http.StatusOK, wrapperReturnDefault()) + HTTPReturn(c, http.StatusOK, wrapperReturnDefault()) return httpReq.IndexParams, nil } @@ -1703,11 +1746,13 @@ func (h *HandlersV2) dropIndex(ctx context.Context, c *gin.Context, anyReq any, CollectionName: collGetter.GetCollectionName(), IndexName: indexGetter.GetIndexName(), } + c.Set(ContextRequest, req) + resp, err := wrapperProxy(ctx, c, req, h.checkAuth, false, func(reqCtx context.Context, req any) (interface{}, error) { return h.proxy.DropIndex(reqCtx, req.(*milvuspb.DropIndexRequest)) }) if err == nil { - c.JSON(http.StatusOK, wrapperReturnDefault()) + HTTPReturn(c, http.StatusOK, wrapperReturnDefault()) } return resp, err } @@ -1718,11 +1763,13 @@ func (h *HandlersV2) listAlias(ctx context.Context, c *gin.Context, anyReq any, DbName: dbName, CollectionName: collectionGetter.GetCollectionName(), } + c.Set(ContextRequest, req) + resp, err := wrapperProxy(ctx, c, req, h.checkAuth, false, func(reqCtx context.Context, req any) (interface{}, error) { return h.proxy.ListAliases(reqCtx, req.(*milvuspb.ListAliasesRequest)) }) if err == nil { - c.JSON(http.StatusOK, wrapperReturnList(resp.(*milvuspb.ListAliasesResponse).Aliases)) + HTTPReturn(c, http.StatusOK, wrapperReturnList(resp.(*milvuspb.ListAliasesResponse).Aliases)) } return resp, err } @@ -1733,12 +1780,14 @@ func (h *HandlersV2) describeAlias(ctx context.Context, c *gin.Context, anyReq a DbName: dbName, Alias: getter.GetAliasName(), } + c.Set(ContextRequest, req) + resp, err := wrapperProxy(ctx, c, req, h.checkAuth, false, func(reqCtx context.Context, req any) (interface{}, error) { return h.proxy.DescribeAlias(reqCtx, req.(*milvuspb.DescribeAliasRequest)) }) if err == nil { response := resp.(*milvuspb.DescribeAliasResponse) - c.JSON(http.StatusOK, gin.H{HTTPReturnCode: commonpb.ErrorCode_Success, HTTPReturnData: gin.H{ + HTTPReturn(c, http.StatusOK, gin.H{HTTPReturnCode: merr.Code(nil), HTTPReturnData: gin.H{ HTTPDbName: response.DbName, HTTPCollectionName: response.Collection, HTTPAliasName: response.Alias, @@ -1755,11 +1804,13 @@ func (h *HandlersV2) createAlias(ctx context.Context, c *gin.Context, anyReq any CollectionName: collectionGetter.GetCollectionName(), Alias: aliasGetter.GetAliasName(), } + c.Set(ContextRequest, req) + resp, err := wrapperProxy(ctx, c, req, h.checkAuth, false, func(reqCtx context.Context, req any) (interface{}, error) { return h.proxy.CreateAlias(reqCtx, req.(*milvuspb.CreateAliasRequest)) }) if err == nil { - c.JSON(http.StatusOK, wrapperReturnDefault()) + HTTPReturn(c, http.StatusOK, wrapperReturnDefault()) } return resp, err } @@ -1770,11 +1821,13 @@ func (h *HandlersV2) dropAlias(ctx context.Context, c *gin.Context, anyReq any, DbName: dbName, Alias: getter.GetAliasName(), } + c.Set(ContextRequest, req) + resp, err := wrapperProxy(ctx, c, req, h.checkAuth, false, func(reqCtx context.Context, req any) (interface{}, error) { return h.proxy.DropAlias(reqCtx, req.(*milvuspb.DropAliasRequest)) }) if err == nil { - c.JSON(http.StatusOK, wrapperReturnDefault()) + HTTPReturn(c, http.StatusOK, wrapperReturnDefault()) } return resp, err } @@ -1787,11 +1840,13 @@ func (h *HandlersV2) alterAlias(ctx context.Context, c *gin.Context, anyReq any, CollectionName: collectionGetter.GetCollectionName(), Alias: aliasGetter.GetAliasName(), } + c.Set(ContextRequest, req) + resp, err := wrapperProxy(ctx, c, req, h.checkAuth, false, func(reqCtx context.Context, req any) (interface{}, error) { return h.proxy.AlterAlias(reqCtx, req.(*milvuspb.AlterAliasRequest)) }) if err == nil { - c.JSON(http.StatusOK, wrapperReturnDefault()) + HTTPReturn(c, http.StatusOK, wrapperReturnDefault()) } return resp, err } @@ -1805,6 +1860,8 @@ func (h *HandlersV2) listImportJob(ctx context.Context, c *gin.Context, anyReq a DbName: dbName, CollectionName: collectionName, } + c.Set(ContextRequest, req) + if h.checkAuth { err := checkAuthorizationV2(ctx, c, false, &milvuspb.ListImportsAuthPlaceholder{ DbName: dbName, @@ -1834,7 +1891,7 @@ func (h *HandlersV2) listImportJob(ctx context.Context, c *gin.Context, anyReq a records = append(records, jobDetail) } returnData["records"] = records - c.JSON(http.StatusOK, gin.H{HTTPReturnCode: commonpb.ErrorCode_Success, HTTPReturnData: returnData}) + HTTPReturn(c, http.StatusOK, gin.H{HTTPReturnCode: merr.Code(nil), HTTPReturnData: returnData}) } return resp, err } @@ -1855,6 +1912,8 @@ func (h *HandlersV2) createImportJob(ctx context.Context, c *gin.Context, anyReq }), Options: funcutil.Map2KeyValuePair(optionsGetter.GetOptions()), } + c.Set(ContextRequest, req) + if h.checkAuth { err := checkAuthorizationV2(ctx, c, false, &milvuspb.ImportAuthPlaceholder{ DbName: dbName, @@ -1871,7 +1930,7 @@ func (h *HandlersV2) createImportJob(ctx context.Context, c *gin.Context, anyReq if err == nil { returnData := make(map[string]interface{}) returnData["jobId"] = resp.(*internalpb.ImportResponse).GetJobID() - c.JSON(http.StatusOK, gin.H{HTTPReturnCode: commonpb.ErrorCode_Success, HTTPReturnData: returnData}) + HTTPReturn(c, http.StatusOK, gin.H{HTTPReturnCode: merr.Code(nil), HTTPReturnData: returnData}) } return resp, err } @@ -1882,6 +1941,8 @@ func (h *HandlersV2) getImportJobProcess(ctx context.Context, c *gin.Context, an DbName: dbName, JobID: jobIDGetter.GetJobID(), } + c.Set(ContextRequest, req) + if h.checkAuth { err := checkAuthorizationV2(ctx, c, false, &milvuspb.GetImportProgressAuthPlaceholder{ DbName: dbName, @@ -1927,7 +1988,7 @@ func (h *HandlersV2) getImportJobProcess(ctx context.Context, c *gin.Context, an } returnData["fileSize"] = totalFileSize returnData["details"] = details - c.JSON(http.StatusOK, gin.H{HTTPReturnCode: commonpb.ErrorCode_Success, HTTPReturnData: returnData}) + HTTPReturn(c, http.StatusOK, gin.H{HTTPReturnCode: merr.Code(nil), HTTPReturnData: returnData}) } return resp, err } diff --git a/internal/distributed/proxy/httpserver/utils.go b/internal/distributed/proxy/httpserver/utils.go index 3053b23449960..7a0ce94af7a38 100644 --- a/internal/distributed/proxy/httpserver/utils.go +++ b/internal/distributed/proxy/httpserver/utils.go @@ -28,6 +28,22 @@ import ( "github.com/milvus-io/milvus/pkg/util/typeutil" ) +func HTTPReturn(c *gin.Context, code int, result gin.H) { + c.Set(HTTPReturnCode, result[HTTPReturnCode]) + if errorMsg, ok := result[HTTPReturnMessage]; ok { + c.Set(HTTPReturnMessage, errorMsg) + } + c.JSON(code, result) +} + +func HTTPAbortReturn(c *gin.Context, code int, result gin.H) { + c.Set(HTTPReturnCode, result[HTTPReturnCode]) + if errorMsg, ok := result[HTTPReturnMessage]; ok { + c.Set(HTTPReturnMessage, errorMsg) + } + c.AbortWithStatusJSON(code, result) +} + func ParseUsernamePassword(c *gin.Context) (string, string, bool) { username, password, ok := c.Request.BasicAuth() if !ok { diff --git a/internal/distributed/proxy/service.go b/internal/distributed/proxy/service.go index bb902bc3280ae..faae5ed75064f 100644 --- a/internal/distributed/proxy/service.go +++ b/internal/distributed/proxy/service.go @@ -172,6 +172,8 @@ func (s *Server) registerHTTPServer() { func (s *Server) startHTTPServer(errChan chan error) { defer s.wg.Done() ginHandler := gin.New() + ginHandler.Use(accesslog.AccessLogMiddleware) + ginLogger := gin.LoggerWithConfig(gin.LoggerConfig{ SkipPaths: proxy.Params.ProxyCfg.GinLogSkipPaths.GetAsStrings(), Formatter: func(param gin.LogFormatterParams) string { @@ -182,6 +184,8 @@ func (s *Server) startHTTPServer(errChan chan error) { if !ok { traceID = "" } + + accesslog.SetHTTPParams(¶m) return fmt.Sprintf("[%v] [GIN] [%s] [traceID=%s] [code=%3d] [latency=%v] [client=%s] [method=%s] [error=%s]\n", param.TimeStamp.Format("2006/01/02 15:04:05.000 Z07:00"), param.Path, diff --git a/internal/proxy/accesslog/formater_test.go b/internal/proxy/accesslog/formater_test.go index 4a231a8eeeb98..e9e2f92d24aec 100644 --- a/internal/proxy/accesslog/formater_test.go +++ b/internal/proxy/accesslog/formater_test.go @@ -32,7 +32,7 @@ import ( "github.com/milvus-io/milvus-proto/go-api/v2/milvuspb" "github.com/milvus-io/milvus/internal/proxy/accesslog/info" - "github.com/milvus-io/milvus/pkg/log" + "github.com/milvus-io/milvus/pkg/tracer" "github.com/milvus-io/milvus/pkg/util" "github.com/milvus-io/milvus/pkg/util/crypto" "github.com/milvus-io/milvus/pkg/util/merr" @@ -153,16 +153,15 @@ func (s *LogFormatterSuite) TestFormatMethodInfo() { for _, req := range s.reqs { i := info.NewGrpcAccessInfo(metaContext, s.serverinfo, req) fs := formatter.Format(i) - log.Info(fs) s.True(strings.Contains(fs, s.traceID)) } + tracer.Init() traceContext, traceSpan := otel.Tracer(typeutil.ProxyRole).Start(s.ctx, "test") trueTraceID := traceSpan.SpanContext().TraceID().String() for _, req := range s.reqs { i := info.NewGrpcAccessInfo(traceContext, s.serverinfo, req) fs := formatter.Format(i) - log.Info(fs) s.True(strings.Contains(fs, trueTraceID)) } } diff --git a/internal/proxy/accesslog/info/grpc_info.go b/internal/proxy/accesslog/info/grpc_info.go index 56b737c02acb6..9d94078f72bfb 100644 --- a/internal/proxy/accesslog/info/grpc_info.go +++ b/internal/proxy/accesslog/info/grpc_info.go @@ -33,7 +33,6 @@ import ( "github.com/milvus-io/milvus-proto/go-api/v2/milvuspb" "github.com/milvus-io/milvus/internal/proxy/connection" "github.com/milvus-io/milvus/pkg/util/merr" - "github.com/milvus-io/milvus/pkg/util/paramtable" "github.com/milvus-io/milvus/pkg/util/requestutil" ) @@ -129,6 +128,10 @@ func (i *GrpcAccessInfo) TraceID() string { } traceID := trace.SpanFromContext(i.ctx).SpanContext().TraceID() + if !traceID.IsValid() { + return Unknown + } + return traceID.String() } @@ -252,10 +255,6 @@ func (i *GrpcAccessInfo) SdkVersion() string { return getSdkVersionByUserAgent(i.ctx) } -func (i *GrpcAccessInfo) ClusterPrefix() string { - return paramtable.Get().CommonCfg.ClusterPrefix.GetValue() -} - func (i *GrpcAccessInfo) OutputFields() string { fields, ok := requestutil.GetOutputFieldsFromRequest(i.req) if ok { diff --git a/internal/proxy/accesslog/info/restful_info.go b/internal/proxy/accesslog/info/restful_info.go new file mode 100644 index 0000000000000..cd7e4eba3beda --- /dev/null +++ b/internal/proxy/accesslog/info/restful_info.go @@ -0,0 +1,189 @@ +// Licensed to the LF AI & Data foundation under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package info + +import ( + "fmt" + "net/http" + "sync" + "time" + + "github.com/gin-gonic/gin" + + "github.com/milvus-io/milvus/pkg/util/requestutil" +) + +const ( + ContextUsername = "username" + ContextReturnCode = "code" + ContextReturnMessage = "message" + ContextRequest = "request" +) + +type RestfulInfo struct { + params *gin.LogFormatterParams + start time.Time + req interface{} + reqInitOnce sync.Once +} + +func NewRestfulInfo() *RestfulInfo { + return &RestfulInfo{start: time.Now()} +} + +func (i *RestfulInfo) SetParams(p *gin.LogFormatterParams) { + i.params = p +} + +func (i *RestfulInfo) InitReq() { + req, ok := i.params.Keys[ContextRequest] + if !ok { + return + } + i.req = req +} + +func (i *RestfulInfo) TimeCost() string { + return fmt.Sprint(i.params.Latency) +} + +func (i *RestfulInfo) TimeNow() string { + return time.Now().Format(timeFormat) +} + +func (i *RestfulInfo) TimeStart() string { + if i.start.IsZero() { + return Unknown + } + return i.start.Format(timeFormat) +} + +func (i *RestfulInfo) TimeEnd() string { + return i.params.TimeStamp.Format(timeFormat) +} + +func (i *RestfulInfo) MethodName() string { + return i.params.Path +} + +func (i *RestfulInfo) Address() string { + return i.params.ClientIP +} + +func (i *RestfulInfo) TraceID() string { + traceID, ok := i.params.Keys["traceID"] + if !ok { + return Unknown + } + return traceID.(string) +} + +func (i *RestfulInfo) MethodStatus() string { + if i.params.StatusCode != http.StatusOK { + return fmt.Sprintf("HttpError%d", i.params.StatusCode) + } + + if code, ok := i.params.Keys[ContextReturnCode]; !ok || code.(int32) != 0 { + return "Failed" + } + + return "Successful" +} + +func (i *RestfulInfo) UserName() string { + username, ok := i.params.Keys[ContextUsername] + if !ok || username == "" { + return Unknown + } + + return username.(string) +} + +func (i *RestfulInfo) ResponseSize() string { + return fmt.Sprint(i.params.BodySize) +} + +func (i *RestfulInfo) ErrorCode() string { + code, ok := i.params.Keys[ContextReturnCode] + if !ok { + return Unknown + } + return fmt.Sprint(code) +} + +func (i *RestfulInfo) ErrorMsg() string { + message, ok := i.params.Keys[ContextReturnMessage] + if !ok { + return "" + } + return fmt.Sprint(message) +} + +func (i *RestfulInfo) SdkVersion() string { + return "Restful" +} + +func (i *RestfulInfo) DbName() string { + name, ok := requestutil.GetDbNameFromRequest(i.req) + if !ok { + return Unknown + } + return name.(string) +} + +func (i *RestfulInfo) CollectionName() string { + name, ok := requestutil.GetCollectionNameFromRequest(i.req) + if !ok { + return Unknown + } + return name.(string) +} + +func (i *RestfulInfo) PartitionName() string { + name, ok := requestutil.GetPartitionNameFromRequest(i.req) + if ok { + return name.(string) + } + + names, ok := requestutil.GetPartitionNamesFromRequest(i.req) + if ok { + return fmt.Sprint(names.([]string)) + } + + return Unknown +} + +func (i *RestfulInfo) Expression() string { + expr, ok := requestutil.GetExprFromRequest(i.req) + if ok { + return expr.(string) + } + + dsl, ok := requestutil.GetDSLFromRequest(i.req) + if ok { + return dsl.(string) + } + return Unknown +} + +func (i *RestfulInfo) OutputFields() string { + fields, ok := requestutil.GetOutputFieldsFromRequest(i.req) + if ok { + return fmt.Sprint(fields.([]string)) + } + return Unknown +} diff --git a/internal/proxy/accesslog/info/restful_info_test.go b/internal/proxy/accesslog/info/restful_info_test.go new file mode 100644 index 0000000000000..8a12ad1e93246 --- /dev/null +++ b/internal/proxy/accesslog/info/restful_info_test.go @@ -0,0 +1,192 @@ +// Licensed to the LF AI & Data foundation under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package info + +import ( + "fmt" + "net/http" + "testing" + "time" + + "github.com/gin-gonic/gin" + "github.com/stretchr/testify/suite" + + "github.com/milvus-io/milvus-proto/go-api/v2/milvuspb" + "github.com/milvus-io/milvus/pkg/util/merr" + "github.com/milvus-io/milvus/pkg/util/paramtable" +) + +type RestfulAccessInfoSuite struct { + suite.Suite + + username string + traceID string + info *RestfulInfo +} + +func (s *RestfulAccessInfoSuite) SetupSuite() { + paramtable.Init() +} + +func (s *RestfulAccessInfoSuite) SetupTest() { + s.username = "test-user" + s.traceID = "test-trace" + s.info = &RestfulInfo{} + s.info.SetParams( + &gin.LogFormatterParams{ + Keys: make(map[string]any), + }) +} + +func (s *RestfulAccessInfoSuite) TestTimeCost() { + s.info.params.Latency = time.Second + result := Get(s.info, "$time_cost") + s.Equal(fmt.Sprint(time.Second), result[0]) +} + +func (s *RestfulAccessInfoSuite) TestTimeNow() { + result := Get(s.info, "$time_now") + s.NotEqual(Unknown, result[0]) +} + +func (s *RestfulAccessInfoSuite) TestTimeStart() { + result := Get(s.info, "$time_start") + s.Equal(Unknown, result[0]) + + s.info.start = time.Now() + result = Get(s.info, "$time_start") + s.Equal(s.info.start.Format(timeFormat), result[0]) +} + +func (s *RestfulAccessInfoSuite) TestTimeEnd() { + s.info.params.TimeStamp = time.Now() + result := Get(s.info, "$time_end") + s.Equal(s.info.params.TimeStamp.Format(timeFormat), result[0]) +} + +func (s *RestfulAccessInfoSuite) TestMethodName() { + s.info.params.Path = "/restful/test" + result := Get(s.info, "$method_name") + s.Equal(s.info.params.Path, result[0]) +} + +func (s *RestfulAccessInfoSuite) TestAddress() { + s.info.params.ClientIP = "127.0.0.1" + result := Get(s.info, "$user_addr") + s.Equal(s.info.params.ClientIP, result[0]) +} + +func (s *RestfulAccessInfoSuite) TestTraceID() { + result := Get(s.info, "$trace_id") + s.Equal(Unknown, result[0]) + + s.info.params.Keys["traceID"] = "testtrace" + result = Get(s.info, "$trace_id") + s.Equal(s.info.params.Keys["traceID"], result[0]) +} + +func (s *RestfulAccessInfoSuite) TestStatus() { + s.info.params.StatusCode = http.StatusBadRequest + result := Get(s.info, "$method_status") + s.Equal("HttpError400", result[0]) + + s.info.params.StatusCode = http.StatusOK + s.info.params.Keys[ContextReturnCode] = merr.Code(merr.ErrChannelLack) + result = Get(s.info, "$method_status") + s.Equal("Failed", result[0]) + + s.info.params.StatusCode = http.StatusOK + s.info.params.Keys[ContextReturnCode] = merr.Code(nil) + result = Get(s.info, "$method_status") + s.Equal("Successful", result[0]) +} + +func (s *RestfulAccessInfoSuite) TestErrorCode() { + result := Get(s.info, "$error_code") + s.Equal(Unknown, result[0]) + + s.info.params.Keys[ContextReturnCode] = 200 + result = Get(s.info, "$error_code") + s.Equal(fmt.Sprint(200), result[0]) +} + +func (s *RestfulAccessInfoSuite) TestErrorMsg() { + s.info.params.Keys[ContextReturnMessage] = merr.ErrChannelLack.Error() + result := Get(s.info, "$error_msg") + s.Equal(merr.ErrChannelLack.Error(), result[0]) +} + +func (s *RestfulAccessInfoSuite) TestDbName() { + result := Get(s.info, "$database_name") + s.Equal(Unknown, result[0]) + + req := &milvuspb.QueryRequest{ + DbName: "test", + } + s.info.req = req + result = Get(s.info, "$database_name") + s.Equal("test", result[0]) +} + +func (s *RestfulAccessInfoSuite) TestSdkInfo() { + result := Get(s.info, "$sdk_version") + s.Equal("Restful", result[0]) +} + +func (s *RestfulAccessInfoSuite) TestExpression() { + result := Get(s.info, "$method_expr") + s.Equal(Unknown, result[0]) + + testExpr := "test" + s.info.req = &milvuspb.QueryRequest{ + Expr: testExpr, + } + result = Get(s.info, "$method_expr") + s.Equal(testExpr, result[0]) + + s.info.req = &milvuspb.SearchRequest{ + Dsl: testExpr, + } + result = Get(s.info, "$method_expr") + s.Equal(testExpr, result[0]) +} + +func (s *RestfulAccessInfoSuite) TestOutputFields() { + result := Get(s.info, "$output_fields") + s.Equal(Unknown, result[0]) + + fields := []string{"pk"} + s.info.params.Keys[ContextRequest] = &milvuspb.QueryRequest{ + OutputFields: fields, + } + s.info.InitReq() + result = Get(s.info, "$output_fields") + s.Equal(fmt.Sprint(fields), result[0]) +} + +func (s *RestfulAccessInfoSuite) TestClusterPrefix() { + cluster := "instance-test" + paramtable.Init() + ClusterPrefix.Store(cluster) + + result := Get(s.info, "$cluster_prefix") + s.Equal(cluster, result[0]) +} + +func TestRestfulAccessInfo(t *testing.T) { + suite.Run(t, new(RestfulAccessInfoSuite)) +} diff --git a/internal/proxy/accesslog/util.go b/internal/proxy/accesslog/util.go index a0f35d74c7ea1..6e8f4a656b058 100644 --- a/internal/proxy/accesslog/util.go +++ b/internal/proxy/accesslog/util.go @@ -22,6 +22,7 @@ import ( "time" "github.com/cockroachdb/errors" + "github.com/gin-gonic/gin" "google.golang.org/grpc" "github.com/milvus-io/milvus/internal/proxy/accesslog/info" @@ -29,6 +30,8 @@ import ( type AccessKey struct{} +const ContextLogKey = "accesslog" + func UnaryAccessLogInterceptor(ctx context.Context, req any, rpcInfo *grpc.UnaryServerInfo, handler grpc.UnaryHandler) (interface{}, error) { accessInfo := info.NewGrpcAccessInfo(ctx, rpcInfo, req) newCtx := context.WithValue(ctx, AccessKey{}, accessInfo) @@ -44,6 +47,24 @@ func UnaryUpdateAccessInfoInterceptor(ctx context.Context, req any, rpcInfonfo * return handler(ctx, req) } +func AccessLogMiddleware(ctx *gin.Context) { + accessInfo := info.NewRestfulInfo() + ctx.Set(ContextLogKey, accessInfo) + ctx.Next() + accessInfo.InitReq() + _globalL.Write(accessInfo) +} + +func SetHTTPParams(p *gin.LogFormatterParams) { + value, ok := p.Keys[ContextLogKey] + if !ok { + return + } + + info := value.(*info.RestfulInfo) + info.SetParams(p) +} + func join(path1, path2 string) string { if strings.HasSuffix(path1, "/") { return path1 + path2 From 4159a4d5d72a8911ef0e23cca70c416651270218 Mon Sep 17 00:00:00 2001 From: zhuwenxing Date: Fri, 31 May 2024 13:54:07 +0800 Subject: [PATCH 106/126] test: update nightly ci (#33430) update nightly ci Signed-off-by: zhuwenxing --- tests/scripts/ci_e2e.sh | 20 ++++++++++---------- tests/scripts/ci_e2e_4am.sh | 23 ----------------------- 2 files changed, 10 insertions(+), 33 deletions(-) diff --git a/tests/scripts/ci_e2e.sh b/tests/scripts/ci_e2e.sh index 322bca1ee8556..7daff6b45a034 100755 --- a/tests/scripts/ci_e2e.sh +++ b/tests/scripts/ci_e2e.sh @@ -64,6 +64,16 @@ fi echo "prepare e2e test" install_pytest_requirements +if [[ "${MILVUS_HELM_RELEASE_NAME}" != *"msop"* ]]; then + if [[ -n "${TEST_TIMEOUT:-}" ]]; then + + timeout "${TEST_TIMEOUT}" pytest testcases/test_bulk_insert.py --timeout=300 --host ${MILVUS_SERVICE_NAME} --port ${MILVUS_SERVICE_PORT} --minio_host ${MINIO_SERVICE_NAME} \ + --html=${CI_LOG_PATH}/report_bulk_insert.html --self-contained-html + else + pytest testcases/test_bulk_insert.py --timeout=300 --host ${MILVUS_SERVICE_NAME} --port ${MILVUS_SERVICE_PORT} --minio_host ${MINIO_SERVICE_NAME} \ + --html=${CI_LOG_PATH}/report_bulk_insert.html --self-contained-html + fi +fi # Pytest is not able to have both --timeout & --workers, so do not add --timeout or --workers in the shell script if [[ -n "${TEST_TIMEOUT:-}" ]]; then @@ -74,13 +84,3 @@ else pytest --host ${MILVUS_SERVICE_NAME} --port ${MILVUS_SERVICE_PORT} \ --html=${CI_LOG_PATH}/report.html --self-contained-html ${@:-} fi - -# Run bulk insert test -if [[ -n "${TEST_TIMEOUT:-}" ]]; then - - timeout "${TEST_TIMEOUT}" pytest testcases/test_bulk_insert.py --host ${MILVUS_SERVICE_NAME} --port ${MILVUS_SERVICE_PORT} --minio_host ${MINIO_SERVICE_NAME} \ - --html=${CI_LOG_PATH}/report_bulk_insert.html --self-contained-html -else - pytest testcases/test_bulk_insert.py --host ${MILVUS_SERVICE_NAME} --port ${MILVUS_SERVICE_PORT} --minio_host ${MINIO_SERVICE_NAME} \ - --html=${CI_LOG_PATH}/report_bulk_insert.html --self-contained-html -fi \ No newline at end of file diff --git a/tests/scripts/ci_e2e_4am.sh b/tests/scripts/ci_e2e_4am.sh index aea78b1c3fa5f..35b7d1e3e5c04 100755 --- a/tests/scripts/ci_e2e_4am.sh +++ b/tests/scripts/ci_e2e_4am.sh @@ -133,26 +133,3 @@ else pytest --host ${MILVUS_SERVICE_NAME} --port ${MILVUS_SERVICE_PORT} \ --html=${CI_LOG_PATH}/report.html --self-contained-html ${@:-} fi - -# Run bulk insert test -# if MILVUS_HELM_RELEASE_NAME contains "msop", then it is one pod mode, skip the bulk insert test -if [[ "${MILVUS_HELM_RELEASE_NAME}" != *"msop"* ]]; then - if [[ -n "${TEST_TIMEOUT:-}" ]]; then - - timeout "${TEST_TIMEOUT}" pytest testcases/test_bulk_insert.py --timeout=300 --host ${MILVUS_SERVICE_NAME} --port ${MILVUS_SERVICE_PORT} --minio_host ${MINIO_SERVICE_NAME} \ - --html=${CI_LOG_PATH}/report_bulk_insert.html --self-contained-html - else - pytest testcases/test_bulk_insert.py --timeout=300 --host ${MILVUS_SERVICE_NAME} --port ${MILVUS_SERVICE_PORT} --minio_host ${MINIO_SERVICE_NAME} \ - --html=${CI_LOG_PATH}/report_bulk_insert.html --self-contained-html - fi -fi - -# # Run concurrent test with 5 processes -# if [[ -n "${TEST_TIMEOUT:-}" ]]; then - -# timeout "${TEST_TIMEOUT}" pytest testcases/test_concurrent.py --host ${MILVUS_SERVICE_NAME} --port ${MILVUS_SERVICE_PORT} --count 5 -n 5 \ -# --html=${CI_LOG_PATH}/report_concurrent.html --self-contained-html -# else -# pytest testcases/test_concurrent.py --host ${MILVUS_SERVICE_NAME} --port ${MILVUS_SERVICE_PORT} --count 5 -n 5 \ -# --html=${CI_LOG_PATH}/report_concurrent.html --self-contained-html -# fi From 3336b91ce6a174b1ecefd2480bc1c6e349089791 Mon Sep 17 00:00:00 2001 From: zhuwenxing Date: Fri, 31 May 2024 13:55:52 +0800 Subject: [PATCH 107/126] test: add channel exclusive balance test and resource group test (#33093) Signed-off-by: zhuwenxing --- tests/python_client/base/client_base.py | 10 +- tests/python_client/chaos/checker.py | 4 +- tests/python_client/common/milvus_sys.py | 17 + .../customize/milvus_operator.py | 58 ++ .../customize/template/default.yaml | 109 +- tests/python_client/deploy/milvus_crd.yaml | 13 +- tests/python_client/pytest.ini | 4 +- tests/python_client/requirements.txt | 1 + .../python_client/resource_group/conftest.py | 11 + .../test_channel_exclusive_balance.py | 446 +++++++++ .../resource_group/test_resource_group.py | 944 ++++++++++++++++++ tests/python_client/utils/util_birdwatcher.py | 79 ++ tests/python_client/utils/util_k8s.py | 2 + 13 files changed, 1682 insertions(+), 16 deletions(-) create mode 100644 tests/python_client/resource_group/conftest.py create mode 100644 tests/python_client/resource_group/test_channel_exclusive_balance.py create mode 100644 tests/python_client/resource_group/test_resource_group.py create mode 100644 tests/python_client/utils/util_birdwatcher.py diff --git a/tests/python_client/base/client_base.py b/tests/python_client/base/client_base.py index e5b3cfd2e6cc5..0b52845885136 100644 --- a/tests/python_client/base/client_base.py +++ b/tests/python_client/base/client_base.py @@ -1,4 +1,3 @@ -from numpy.core.fromnumeric import _partition_dispatcher import pytest import sys from pymilvus import DefaultConfig @@ -33,7 +32,7 @@ class Base: collection_object_list = [] resource_group_list = [] high_level_api_wrap = None - + skip_connection = False def setup_class(self): log.info("[setup_class] Start setup class...") @@ -128,6 +127,9 @@ class TestcaseBase(Base): def _connect(self, enable_milvus_client_api=False): """ Add a connection and create the connect """ + if self.skip_connection: + return None + if enable_milvus_client_api: if cf.param_info.param_uri: uri = cf.param_info.param_uri @@ -252,8 +254,8 @@ def init_collection_general(self, prefix="test", insert_data=False, nb=ct.defaul insert_ids = [] time_stamp = 0 # 1 create collection - default_schema = cf.gen_default_collection_schema(auto_id=auto_id, dim=dim, primary_field=primary_field, - enable_dynamic_field=enable_dynamic_field, + default_schema = cf.gen_default_collection_schema(auto_id=auto_id, dim=dim, primary_field=primary_field, + enable_dynamic_field=enable_dynamic_field, with_json=with_json, multiple_dim_array=multiple_dim_array, is_partition_key=is_partition_key, vector_data_type=vector_data_type) diff --git a/tests/python_client/chaos/checker.py b/tests/python_client/chaos/checker.py index 5eb256977775d..66cd25475d4d7 100644 --- a/tests/python_client/chaos/checker.py +++ b/tests/python_client/chaos/checker.py @@ -1331,10 +1331,10 @@ def keep_running(self): class DeleteChecker(Checker): """check delete operations in a dependent thread""" - def __init__(self, collection_name=None, schema=None): + def __init__(self, collection_name=None, schema=None, shards_num=2): if collection_name is None: collection_name = cf.gen_unique_str("DeleteChecker_") - super().__init__(collection_name=collection_name, schema=schema) + super().__init__(collection_name=collection_name, schema=schema, shards_num=shards_num) res, result = self.c_wrap.create_index(self.float_vector_field_name, constants.DEFAULT_INDEX_PARAM, timeout=timeout, diff --git a/tests/python_client/common/milvus_sys.py b/tests/python_client/common/milvus_sys.py index f8f2e3e4721a7..7db540bb72875 100644 --- a/tests/python_client/common/milvus_sys.py +++ b/tests/python_client/common/milvus_sys.py @@ -3,6 +3,7 @@ from pymilvus.grpc_gen import milvus_pb2 as milvus_types from pymilvus import connections from utils.util_log import test_log as log +from utils.util_log import test_log as log sys_info_req = ujson.dumps({"metric_type": "system_info"}) sys_statistics_req = ujson.dumps({"metric_type": "system_statistics"}) sys_logs_req = ujson.dumps({"metric_type": "system_logs"}) @@ -17,9 +18,24 @@ def __init__(self, alias='default'): # TODO: for now it only supports non_orm style API for getMetricsRequest req = milvus_types.GetMetricsRequest(request=sys_info_req) + self.sys_info = self.handler._stub.GetMetrics(req, wait_for_ready=True, timeout=None) + # req = milvus_types.GetMetricsRequest(request=sys_statistics_req) + # self.sys_statistics = self.handler._stub.GetMetrics(req, wait_for_ready=True, timeout=None) + # req = milvus_types.GetMetricsRequest(request=sys_logs_req) + # self.sys_logs = self.handler._stub.GetMetrics(req, wait_for_ready=True, timeout=None) self.sys_info = self.handler._stub.GetMetrics(req, wait_for_ready=True, timeout=60) log.debug(f"sys_info: {self.sys_info}") + def refresh(self): + req = milvus_types.GetMetricsRequest(request=sys_info_req) + self.sys_info = self.handler._stub.GetMetrics(req, wait_for_ready=True, timeout=None) + # req = milvus_types.GetMetricsRequest(request=sys_statistics_req) + # self.sys_statistics = self.handler._stub.GetMetrics(req, wait_for_ready=True, timeout=None) + # req = milvus_types.GetMetricsRequest(request=sys_logs_req) + # self.sys_logs = self.handler._stub.GetMetrics(req, wait_for_ready=True, timeout=None) + log.debug(f"sys info response: {self.sys_info.response}") + + @property def build_version(self): """get the first node's build version as milvus build version""" @@ -84,6 +100,7 @@ def proxy_nodes(self): @property def nodes(self): """get all the nodes in Milvus deployment""" + self.refresh() all_nodes = json.loads(self.sys_info.response).get('nodes_info') online_nodes = [node for node in all_nodes if node["infos"]["has_error"] is False] return online_nodes diff --git a/tests/python_client/customize/milvus_operator.py b/tests/python_client/customize/milvus_operator.py index 1140ff08f0e1a..658cbc4334bc3 100644 --- a/tests/python_client/customize/milvus_operator.py +++ b/tests/python_client/customize/milvus_operator.py @@ -3,6 +3,7 @@ import time from benedict import benedict from utils.util_log import test_log as log +from utils.util_k8s import get_pod_ip_name_pairs from common.cus_resource_opts import CustomResourceOperations as CusResource template_yaml = os.path.join(os.path.dirname(__file__), 'template/default.yaml') @@ -81,11 +82,13 @@ def uninstall(self, release_name, namespace='default', delete_depends=True, dele if delete_depends: del_configs = {'spec.dependencies.etcd.inCluster.deletionPolicy': 'Delete', 'spec.dependencies.pulsar.inCluster.deletionPolicy': 'Delete', + 'spec.dependencies.kafka.inCluster.deletionPolicy': 'Delete', 'spec.dependencies.storage.inCluster.deletionPolicy': 'Delete' } if delete_pvc: del_configs.update({'spec.dependencies.etcd.inCluster.pvcDeletion': True, 'spec.dependencies.pulsar.inCluster.pvcDeletion': True, + 'spec.dependencies.kafka.inCluster.pvcDeletion': True, 'spec.dependencies.storage.inCluster.pvcDeletion': True }) if delete_depends or delete_pvc: @@ -113,6 +116,40 @@ def upgrade(self, release_name, configs, namespace='default'): version=self.version, namespace=namespace) log.debug(f"upgrade milvus with configs: {d_configs}") cus_res.patch(release_name, d_configs) + self.wait_for_healthy(release_name, namespace=namespace) + + def rolling_update(self, release_name, new_image_name, namespace='default'): + """ + Method: patch custom resource object to rolling update milvus + Params: + release_name: release name of milvus + namespace: namespace that the milvus is running in + """ + cus_res = CusResource(kind=self.plural, group=self.group, + version=self.version, namespace=namespace) + rolling_configs = {'spec.components.enableRollingUpdate': True, + 'spec.components.imageUpdateMode': "rollingUpgrade", + 'spec.components.image': new_image_name} + log.debug(f"rolling update milvus with configs: {rolling_configs}") + cus_res.patch(release_name, rolling_configs) + self.wait_for_healthy(release_name, namespace=namespace) + + def scale(self, release_name, component, replicas, namespace='default'): + """ + Method: scale milvus components by replicas + Params: + release_name: release name of milvus + replicas: the number of replicas to scale + component: the component to scale, e.g: dataNode, queryNode, indexNode, proxy + namespace: namespace that the milvus is running in + """ + cus_res = CusResource(kind=self.plural, group=self.group, + version=self.version, namespace=namespace) + component = component.replace('node', 'Node') + scale_configs = {f'spec.components.{component}.replicas': replicas} + log.info(f"scale milvus with configs: {scale_configs}") + self.upgrade(release_name, scale_configs, namespace=namespace) + self.wait_for_healthy(release_name, namespace=namespace) def wait_for_healthy(self, release_name, namespace='default', timeout=600): """ @@ -152,3 +189,24 @@ def endpoint(self, release_name, namespace='default'): endpoint = res_object['status']['endpoint'] return endpoint + + def etcd_endpoints(self, release_name, namespace='default'): + """ + Method: get etcd endpoints by name and namespace + Return: a string type etcd endpoints. e.g: host:port + """ + etcd_endpoints = None + cus_res = CusResource(kind=self.plural, group=self.group, + version=self.version, namespace=namespace) + res_object = cus_res.get(release_name) + try: + etcd_endpoints = res_object['spec']['dependencies']['etcd']['endpoints'] + except KeyError: + log.info("etcd endpoints not found") + # get pod ip by pod name + label_selector = f"app.kubernetes.io/instance={release_name}-etcd, app.kubernetes.io/name=etcd" + res = get_pod_ip_name_pairs(namespace, label_selector) + if res: + etcd_endpoints = [f"{pod_ip}:2379" for pod_ip in res.keys()] + return etcd_endpoints[0] + diff --git a/tests/python_client/customize/template/default.yaml b/tests/python_client/customize/template/default.yaml index 507fe56193322..d3f71a8bbe139 100644 --- a/tests/python_client/customize/template/default.yaml +++ b/tests/python_client/customize/template/default.yaml @@ -13,6 +13,7 @@ spec: simdType: avx components: {} dependencies: + msgStreamType: kafka etcd: inCluster: deletionPolicy: Delete @@ -21,6 +22,113 @@ spec: metrics: podMonitor: enabled: true + kafka: + inCluster: + deletionPolicy: Retain + pvcDeletion: false + values: + replicaCount: 3 + defaultReplicationFactor: 2 + metrics: + kafka: + enabled: true + serviceMonitor: + enabled: true + jmx: + enabled: true + pulsar: + inCluster: + deletionPolicy: Retain + pvcDeletion: false + values: + components: + autorecovery: false + functions: false + toolset: false + pulsar_manager: false + monitoring: + prometheus: false + grafana: false + node_exporter: false + alert_manager: false + proxy: + replicaCount: 1 + resources: + requests: + cpu: 0.01 + memory: 256Mi + configData: + PULSAR_MEM: > + -Xms256m -Xmx256m + PULSAR_GC: > + -XX:MaxDirectMemorySize=256m + bookkeeper: + replicaCount: 2 + resources: + requests: + cpu: 0.01 + memory: 256Mi + configData: + PULSAR_MEM: > + -Xms256m + -Xmx256m + -XX:MaxDirectMemorySize=256m + PULSAR_GC: > + -Dio.netty.leakDetectionLevel=disabled + -Dio.netty.recycler.linkCapacity=1024 + -XX:+UseG1GC -XX:MaxGCPauseMillis=10 + -XX:+ParallelRefProcEnabled + -XX:+UnlockExperimentalVMOptions + -XX:+DoEscapeAnalysis + -XX:ParallelGCThreads=32 + -XX:ConcGCThreads=32 + -XX:G1NewSizePercent=50 + -XX:+DisableExplicitGC + -XX:-ResizePLAB + -XX:+ExitOnOutOfMemoryError + -XX:+PerfDisableSharedMem + -XX:+PrintGCDetails + zookeeper: + replicaCount: 1 + resources: + requests: + cpu: 0.01 + memory: 256Mi + configData: + PULSAR_MEM: > + -Xms256m + -Xmx256m + PULSAR_GC: > + -Dcom.sun.management.jmxremote + -Djute.maxbuffer=10485760 + -XX:+ParallelRefProcEnabled + -XX:+UnlockExperimentalVMOptions + -XX:+DoEscapeAnalysis -XX:+DisableExplicitGC + -XX:+PerfDisableSharedMem + -Dzookeeper.forceSync=no + broker: + replicaCount: 1 + resources: + requests: + cpu: 0.01 + memory: 256Mi + configData: + PULSAR_MEM: > + -Xms256m + -Xmx256m + PULSAR_GC: > + -XX:MaxDirectMemorySize=256m + -Dio.netty.leakDetectionLevel=disabled + -Dio.netty.recycler.linkCapacity=1024 + -XX:+ParallelRefProcEnabled + -XX:+UnlockExperimentalVMOptions + -XX:+DoEscapeAnalysis + -XX:ParallelGCThreads=32 + -XX:ConcGCThreads=32 + -XX:G1NewSizePercent=50 + -XX:+DisableExplicitGC + -XX:-ResizePLAB + -XX:+ExitOnOutOfMemoryError storage: inCluster: deletionPolicy: Delete @@ -29,4 +137,3 @@ spec: metrics: podMonitor: enabled: true - \ No newline at end of file diff --git a/tests/python_client/deploy/milvus_crd.yaml b/tests/python_client/deploy/milvus_crd.yaml index 41cab3351122b..d078b76463753 100644 --- a/tests/python_client/deploy/milvus_crd.yaml +++ b/tests/python_client/deploy/milvus_crd.yaml @@ -7,11 +7,11 @@ metadata: labels: app: milvus spec: - mode: standalone + mode: cluster config: dataNode: memory: - forceSyncEnable: false + forceSyncEnable: false rootCoord: enableActiveStandby: true dataCoord: @@ -29,7 +29,7 @@ spec: components: enableRollingUpdate: true imageUpdateMode: rollingUpgrade - image: milvusdb/milvus:2.2.0-20230208-2e4d64ec + image: harbor.milvus.io/milvus/milvus:master-20240426-4fb8044a-amd64 disableMetric: false dataNode: replicas: 3 @@ -45,7 +45,7 @@ spec: pvcDeletion: false values: replicaCount: 3 - kafka: + kafka: inCluster: deletionPolicy: Retain pvcDeletion: false @@ -58,13 +58,13 @@ spec: serviceMonitor: enabled: true jmx: - enabled: true + enabled: true pulsar: inCluster: deletionPolicy: Retain pvcDeletion: false values: - components: + components: autorecovery: false functions: false toolset: false @@ -158,4 +158,3 @@ spec: pvcDeletion: false values: mode: distributed - \ No newline at end of file diff --git a/tests/python_client/pytest.ini b/tests/python_client/pytest.ini index 122b5e8bf6a0f..1c90a7f2fd3c9 100644 --- a/tests/python_client/pytest.ini +++ b/tests/python_client/pytest.ini @@ -1,7 +1,7 @@ [pytest] -addopts = --host localhost --html=/tmp/ci_logs/report.html --self-contained-html -v +addopts = --host 10.104.21.154 --minio_host 10.104.21.153 --html=/tmp/ci_logs/report.html --self-contained-html -v --log-cli-level=INFO --capture=no # python3 -W ignore -m pytest log_format = [%(asctime)s - %(levelname)s - %(name)s]: %(message)s (%(filename)s:%(lineno)s) @@ -9,4 +9,4 @@ log_date_format = %Y-%m-%d %H:%M:%S filterwarnings = - ignore::DeprecationWarning \ No newline at end of file + ignore::DeprecationWarning diff --git a/tests/python_client/requirements.txt b/tests/python_client/requirements.txt index 99ad4f62c97d5..6b62783758641 100644 --- a/tests/python_client/requirements.txt +++ b/tests/python_client/requirements.txt @@ -46,6 +46,7 @@ loguru==0.7.0 psutil==5.9.4 pandas==1.5.3 tenacity==8.1.0 +rich==13.7.0 # for standby test etcd-sdk-python==0.0.4 deepdiff==6.7.1 diff --git a/tests/python_client/resource_group/conftest.py b/tests/python_client/resource_group/conftest.py new file mode 100644 index 0000000000000..7e56a38456b65 --- /dev/null +++ b/tests/python_client/resource_group/conftest.py @@ -0,0 +1,11 @@ +import pytest + + +def pytest_addoption(parser): + parser.addoption("--image_tag", action="store", default="master-20240514-89a7c34c", help="image_tag") + + +@pytest.fixture +def image_tag(request): + return request.config.getoption("--image_tag") + diff --git a/tests/python_client/resource_group/test_channel_exclusive_balance.py b/tests/python_client/resource_group/test_channel_exclusive_balance.py new file mode 100644 index 0000000000000..f916014fde0b9 --- /dev/null +++ b/tests/python_client/resource_group/test_channel_exclusive_balance.py @@ -0,0 +1,446 @@ +import pytest +import time +from pymilvus import connections, utility, Collection +from utils.util_log import test_log as log +from base.client_base import TestcaseBase +from chaos.checker import (InsertChecker, + FlushChecker, + UpsertChecker, + DeleteChecker, + Op, + ResultAnalyzer + ) +from chaos import chaos_commons as cc +from common import common_func as cf +from utils.util_k8s import get_querynode_id_pod_pairs +from utils.util_birdwatcher import BirdWatcher +from customize.milvus_operator import MilvusOperator +from common.milvus_sys import MilvusSys +from common.common_type import CaseLabel +from chaos.chaos_commons import assert_statistic + +namespace = 'chaos-testing' +prefix = "test_rg" + +from rich.table import Table +from rich.console import Console + + +def display_segment_distribution_info(collection_name, release_name, segment_info=None): + table = Table(title=f"{collection_name} Segment Distribution Info") + table.width = 200 + table.add_column("Segment ID", style="cyan") + table.add_column("Collection ID", style="cyan") + table.add_column("Partition ID", style="cyan") + table.add_column("Num Rows", style="cyan") + table.add_column("State", style="cyan") + table.add_column("Channel", style="cyan") + table.add_column("Node ID", style="cyan") + table.add_column("Node Name", style="cyan") + res = utility.get_query_segment_info(collection_name) + log.info(f"segment info: {res}") + label = f"app.kubernetes.io/instance={release_name}, app.kubernetes.io/component=querynode" + querynode_id_pod_pair = get_querynode_id_pod_pairs("chaos-testing", label) + for r in res: + channel = "unknown" + if segment_info and str(r.segmentID) in segment_info: + channel = segment_info[str(r.segmentID)]["Insert Channel"] + table.add_row( + str(r.segmentID), + str(r.collectionID), + str(r.partitionID), + str(r.num_rows), + str(r.state), + str(channel), + str(r.nodeIds), + str([querynode_id_pod_pair.get(node_id) for node_id in r.nodeIds]) + ) + console = Console() + console.width = 300 + console.print(table) + + +def display_channel_on_qn_distribution_info(collection_name, release_name, segment_info=None): + """ + node id, node name, channel, segment id + 1, rg-test-613938-querynode-0, [rg-test-613938-rootcoord-dml_3_449617770820133536v0], [449617770820133655] + 2, rg-test-613938-querynode-1, [rg-test-613938-rootcoord-dml_3_449617770820133537v0], [449617770820133656] + + """ + m = {} + res = utility.get_query_segment_info(collection_name) + for r in res: + if r.nodeIds: + for node_id in r.nodeIds: + if node_id not in m: + m[node_id] = { + "node_name": "", + "channel": [], + "segment_id": [] + } + m[node_id]["segment_id"].append(r.segmentID) + # get channel info + for node_id in m.keys(): + for seg in m[node_id]["segment_id"]: + if segment_info and str(seg) in segment_info: + m[node_id]["channel"].append(segment_info[str(seg)]["Insert Channel"]) + + # get node name + label = f"app.kubernetes.io/instance={release_name}, app.kubernetes.io/component=querynode" + querynode_id_pod_pair = get_querynode_id_pod_pairs("chaos-testing", label) + for node_id in m.keys(): + m[node_id]["node_name"] = querynode_id_pod_pair.get(node_id) + + table = Table(title=f"{collection_name} Channel Distribution Info") + table.width = 200 + table.add_column("Node ID", style="cyan") + table.add_column("Node Name", style="cyan") + table.add_column("Channel", style="cyan") + table.add_column("Segment ID", style="cyan") + for node_id, v in m.items(): + table.add_row( + str(node_id), + str(v["node_name"]), + "\n".join([str(x) for x in set(v["channel"])]), + "\n".join([str(x) for x in v["segment_id"]]) + ) + console = Console() + console.width = 300 + console.print(table) + return m + + +def _install_milvus(image_tag="master-latest"): + release_name = f"rg-test-{cf.gen_digits_by_length(6)}" + cus_configs = {'spec.mode': 'cluster', + 'spec.dependencies.msgStreamType': 'kafka', + 'spec.components.image': f'harbor.milvus.io/milvus/milvus:{image_tag}', + 'metadata.namespace': namespace, + 'metadata.name': release_name, + 'spec.components.proxy.serviceType': 'LoadBalancer', + 'spec.config.queryCoord.balancer': 'ChannelLevelScoreBalancer', + 'spec.config.queryCoord.channelExclusiveNodeFactor': 2 + } + milvus_op = MilvusOperator() + log.info(f"install milvus with configs: {cus_configs}") + milvus_op.install(cus_configs) + healthy = milvus_op.wait_for_healthy(release_name, namespace, timeout=1200) + log.info(f"milvus healthy: {healthy}") + if healthy: + endpoint = milvus_op.endpoint(release_name, namespace).split(':') + log.info(f"milvus endpoint: {endpoint}") + host = endpoint[0] + port = endpoint[1] + return release_name, host, port + else: + return release_name, None, None + + +class TestChannelExclusiveBalance(TestcaseBase): + + def teardown_method(self, method): + log.info(("*" * 35) + " teardown " + ("*" * 35)) + log.info("[teardown_method] Start teardown test case %s..." % method.__name__) + milvus_op = MilvusOperator() + milvus_op.uninstall(self.release_name, namespace) + connections.disconnect("default") + connections.remove_connection("default") + + def init_health_checkers(self, collection_name=None, shards_num=2): + c_name = collection_name + checkers = { + Op.insert: InsertChecker(collection_name=c_name, shards_num=shards_num), + Op.flush: FlushChecker(collection_name=c_name, shards_num=shards_num), + Op.upsert: UpsertChecker(collection_name=c_name, shards_num=shards_num), + Op.delete: DeleteChecker(collection_name=c_name, shards_num=shards_num), + } + self.health_checkers = checkers + + @pytest.mark.tags(CaseLabel.L3) + def test_channel_exclusive_balance_during_qn_scale_up(self, image_tag): + """ + steps + """ + milvus_op = MilvusOperator() + release_name, host, port = _install_milvus(image_tag=image_tag) + qn_num = 1 + milvus_op.scale(release_name, 'queryNode', qn_num, namespace) + self.release_name = release_name + assert host is not None + connections.connect("default", host=host, port=port) + etcd_endpoint = milvus_op.etcd_endpoints(release_name, namespace) + bw = BirdWatcher(etcd_endpoints=etcd_endpoint, root_path=release_name) + mil = MilvusSys(alias="default") + log.info(f"milvus build version: {mil.build_version}") + c_name = cf.gen_unique_str("Checker_") + self.init_health_checkers(collection_name=c_name) + c = Collection(name=c_name) + res = c.describe() + collection_id = res["collection_id"] + cc.start_monitor_threads(self.health_checkers) + seg_res = bw.show_segment_info(collection_id) + display_segment_distribution_info(c_name, release_name, segment_info=seg_res) + display_channel_on_qn_distribution_info(c_name, release_name, segment_info=seg_res) + log.info("*********************Load Start**********************") + request_duration = 360 + for i in range(10): + time.sleep(request_duration // 10) + for k, v in self.health_checkers.items(): + v.check_result() + qn_num += min(qn_num + 1, 8) + seg_res = bw.show_segment_info(collection_id) + display_segment_distribution_info(c_name, release_name, segment_info=seg_res) + display_channel_on_qn_distribution_info(c_name, release_name, segment_info=seg_res) + milvus_op.scale(release_name, 'queryNode', 8, namespace) + seg_res = bw.show_segment_info(collection_id) + display_segment_distribution_info(c_name, release_name, segment_info=seg_res) + res = display_channel_on_qn_distribution_info(c_name, release_name, segment_info=seg_res) + time.sleep(60) + ra = ResultAnalyzer() + ra.get_stage_success_rate() + assert_statistic(self.health_checkers) + for k, v in self.health_checkers.items(): + v.terminate() + time.sleep(60) + # in final state, channel exclusive balance is on, so all qn should have only one channel + for k, v in res.items(): + assert len(set(v["channel"])) == 1 + + + @pytest.mark.tags(CaseLabel.L3) + def test_channel_exclusive_balance_during_qn_scale_down(self, image_tag): + """ + steps + """ + milvus_op = MilvusOperator() + release_name, host, port = _install_milvus(image_tag=image_tag) + qn_num = 8 + milvus_op.scale(release_name, 'queryNode', qn_num, namespace) + self.release_name = release_name + assert host is not None + connections.connect("default", host=host, port=port) + etcd_endpoint = milvus_op.etcd_endpoints(release_name, namespace) + bw = BirdWatcher(etcd_endpoints=etcd_endpoint, root_path=release_name) + mil = MilvusSys(alias="default") + log.info(f"milvus build version: {mil.build_version}") + c_name = cf.gen_unique_str("Checker_") + self.init_health_checkers(collection_name=c_name) + c = Collection(name=c_name) + res = c.describe() + collection_id = res["collection_id"] + cc.start_monitor_threads(self.health_checkers) + seg_res = bw.show_segment_info(collection_id) + display_segment_distribution_info(c_name, release_name, segment_info=seg_res) + display_channel_on_qn_distribution_info(c_name, release_name, segment_info=seg_res) + log.info("*********************Load Start**********************") + request_duration = 360 + for i in range(10): + time.sleep(request_duration // 10) + for k, v in self.health_checkers.items(): + v.check_result() + qn_num = max(qn_num - 1, 3) + milvus_op.scale(release_name, 'queryNode', qn_num, namespace) + seg_res = bw.show_segment_info(collection_id) + display_segment_distribution_info(c_name, release_name, segment_info=seg_res) + display_channel_on_qn_distribution_info(c_name, release_name, segment_info=seg_res) + milvus_op.scale(release_name, 'queryNode', 1, namespace) + seg_res = bw.show_segment_info(collection_id) + display_segment_distribution_info(c_name, release_name, segment_info=seg_res) + res = display_channel_on_qn_distribution_info(c_name, release_name, segment_info=seg_res) + time.sleep(60) + ra = ResultAnalyzer() + ra.get_stage_success_rate() + assert_statistic(self.health_checkers) + for k, v in self.health_checkers.items(): + v.terminate() + time.sleep(60) + # shard num = 2, k = 2, qn_num = 3 + # in final state, channel exclusive balance is off, so all qn should have more than one channel + for k, v in res.items(): + assert len(set(v["channel"])) > 1 + + @pytest.mark.tags(CaseLabel.L3) + def test_channel_exclusive_balance_with_channel_num_is_1(self, image_tag): + """ + steps + """ + milvus_op = MilvusOperator() + release_name, host, port = _install_milvus(image_tag=image_tag) + qn_num = 1 + milvus_op.scale(release_name, 'queryNode', qn_num, namespace) + self.release_name = release_name + assert host is not None + connections.connect("default", host=host, port=port) + etcd_endpoint = milvus_op.etcd_endpoints(release_name, namespace) + bw = BirdWatcher(etcd_endpoints=etcd_endpoint, root_path=release_name) + mil = MilvusSys(alias="default") + log.info(f"milvus build version: {mil.build_version}") + c_name = cf.gen_unique_str("Checker_") + self.init_health_checkers(collection_name=c_name, shards_num=1) + c = Collection(name=c_name) + res = c.describe() + collection_id = res["collection_id"] + cc.start_monitor_threads(self.health_checkers) + seg_res = bw.show_segment_info(collection_id) + display_segment_distribution_info(c_name, release_name, segment_info=seg_res) + display_channel_on_qn_distribution_info(c_name, release_name, segment_info=seg_res) + log.info("*********************Load Start**********************") + request_duration = 360 + for i in range(10): + time.sleep(request_duration // 10) + for k, v in self.health_checkers.items(): + v.check_result() + qn_num = qn_num + 1 + qn_num = min(qn_num, 8) + milvus_op.scale(release_name, 'queryNode', qn_num, namespace) + seg_res = bw.show_segment_info(collection_id) + display_segment_distribution_info(c_name, release_name, segment_info=seg_res) + res = display_channel_on_qn_distribution_info(c_name, release_name, segment_info=seg_res) + for r in res: + assert len(set(r["channel"])) == 1 + milvus_op.scale(release_name, 'queryNode', 8, namespace) + seg_res = bw.show_segment_info(collection_id) + display_segment_distribution_info(c_name, release_name, segment_info=seg_res) + res = display_channel_on_qn_distribution_info(c_name, release_name, segment_info=seg_res) + time.sleep(60) + ra = ResultAnalyzer() + ra.get_stage_success_rate() + assert_statistic(self.health_checkers) + for k, v in self.health_checkers.items(): + v.terminate() + time.sleep(60) + + # since shard num is 1, so all qn should have only one channel, no matter what k is + for k, v in res.items(): + assert len(set(v["channel"])) == 1 + + @pytest.mark.tags(CaseLabel.L3) + def test_channel_exclusive_balance_after_k_increase(self, image_tag): + """ + steps + """ + milvus_op = MilvusOperator() + release_name, host, port = _install_milvus(image_tag=image_tag) + qn_num = 1 + milvus_op.scale(release_name, 'queryNode', qn_num, namespace) + self.release_name = release_name + assert host is not None + connections.connect("default", host=host, port=port) + etcd_endpoint = milvus_op.etcd_endpoints(release_name, namespace) + bw = BirdWatcher(etcd_endpoints=etcd_endpoint, root_path=release_name) + mil = MilvusSys(alias="default") + log.info(f"milvus build version: {mil.build_version}") + c_name = cf.gen_unique_str("Checker_") + self.init_health_checkers(collection_name=c_name) + c = Collection(name=c_name) + res = c.describe() + collection_id = res["collection_id"] + cc.start_monitor_threads(self.health_checkers) + seg_res = bw.show_segment_info(collection_id) + display_segment_distribution_info(c_name, release_name, segment_info=seg_res) + display_channel_on_qn_distribution_info(c_name, release_name, segment_info=seg_res) + log.info("*********************Load Start**********************") + request_duration = 360 + for i in range(10): + time.sleep(request_duration // 10) + for k, v in self.health_checkers.items(): + v.check_result() + qn_num = qn_num + 1 + qn_num = min(qn_num, 8) + if qn_num == 5: + config = { + "spec.config.queryCoord.channelExclusiveNodeFactor": 3 + } + milvus_op.upgrade(release_name, config, namespace) + milvus_op.scale(release_name, 'queryNode', qn_num, namespace) + seg_res = bw.show_segment_info(collection_id) + display_segment_distribution_info(c_name, release_name, segment_info=seg_res) + res = display_channel_on_qn_distribution_info(c_name, release_name, segment_info=seg_res) + if qn_num == 4: + # channel exclusive balance is on, so all qn should have only one channel + for r in res.values(): + assert len(set(r["channel"])) == 1 + if qn_num == 5: + # k is changed to 3 when qn_num is 5, + # channel exclusive balance is off, so all qn should have more than one channel + # wait for a while to make sure all qn have more than one channel + ready = False + t0 = time.time() + while not ready and time.time() - t0 < 180: + ready = True + for r in res.values(): + if len(set(r["channel"])) == 1: + ready = False + time.sleep(10) + res = display_channel_on_qn_distribution_info(c_name, release_name, segment_info=seg_res) + if qn_num == 6: + # channel exclusive balance is on, so all qn should have only one channel + ready = False + t0 = time.time() + while not ready and time.time() - t0 < 180: + ready = True + for r in res.values(): + if len(set(r["channel"])) != 1: + ready = False + time.sleep(10) + res = display_channel_on_qn_distribution_info(c_name, release_name, segment_info=seg_res) + milvus_op.scale(release_name, 'queryNode', 8, namespace) + seg_res = bw.show_segment_info(collection_id) + display_segment_distribution_info(c_name, release_name, segment_info=seg_res) + display_channel_on_qn_distribution_info(c_name, release_name, segment_info=seg_res) + time.sleep(60) + ra = ResultAnalyzer() + ra.get_stage_success_rate() + assert_statistic(self.health_checkers) + for k, v in self.health_checkers.items(): + v.terminate() + time.sleep(60) + + @pytest.mark.tags(CaseLabel.L3) + def test_channel_exclusive_balance_for_search_performance(self, image_tag): + """ + steps + """ + milvus_op = MilvusOperator() + release_name, host, port = _install_milvus(image_tag=image_tag) + qn_num = 1 + milvus_op.scale(release_name, 'queryNode', qn_num, namespace) + self.release_name = release_name + assert host is not None + connections.connect("default", host=host, port=port) + etcd_endpoint = milvus_op.etcd_endpoints(release_name, namespace) + bw = BirdWatcher(etcd_endpoints=etcd_endpoint, root_path=release_name) + mil = MilvusSys(alias="default") + log.info(f"milvus build version: {mil.build_version}") + c_name = cf.gen_unique_str("Checker_") + self.init_health_checkers(collection_name=c_name) + c = Collection(name=c_name) + res = c.describe() + collection_id = res["collection_id"] + cc.start_monitor_threads(self.health_checkers) + seg_res = bw.show_segment_info(collection_id) + display_segment_distribution_info(c_name, release_name, segment_info=seg_res) + display_channel_on_qn_distribution_info(c_name, release_name, segment_info=seg_res) + log.info("*********************Load Start**********************") + request_duration = 360 + for i in range(10): + time.sleep(request_duration // 10) + for k, v in self.health_checkers.items(): + v.check_result() + qn_num = qn_num + 1 + qn_num = min(qn_num, 8) + milvus_op.scale(release_name, 'queryNode', qn_num, namespace) + seg_res = bw.show_segment_info(collection_id) + display_segment_distribution_info(c_name, release_name, segment_info=seg_res) + display_channel_on_qn_distribution_info(c_name, release_name, segment_info=seg_res) + milvus_op.scale(release_name, 'queryNode', 8, namespace) + seg_res = bw.show_segment_info(collection_id) + display_segment_distribution_info(c_name, release_name, segment_info=seg_res) + display_channel_on_qn_distribution_info(c_name, release_name, segment_info=seg_res) + time.sleep(60) + ra = ResultAnalyzer() + ra.get_stage_success_rate() + assert_statistic(self.health_checkers) + for k, v in self.health_checkers.items(): + v.terminate() + time.sleep(60) diff --git a/tests/python_client/resource_group/test_resource_group.py b/tests/python_client/resource_group/test_resource_group.py new file mode 100644 index 0000000000000..0e4e448bd25dc --- /dev/null +++ b/tests/python_client/resource_group/test_resource_group.py @@ -0,0 +1,944 @@ +import pytest +import time +from typing import Union, List +from pymilvus import connections, utility, Collection +from pymilvus.client.constants import DEFAULT_RESOURCE_GROUP +from pymilvus.client.types import ResourceGroupConfig, ResourceGroupInfo +from utils.util_log import test_log as log +from base.client_base import TestcaseBase +from chaos.checker import (InsertChecker, + UpsertChecker, + SearchChecker, + HybridSearchChecker, + QueryChecker, + DeleteChecker, + Op, + ResultAnalyzer + ) +from chaos import chaos_commons as cc +from common import common_func as cf +from utils.util_k8s import get_querynode_id_pod_pairs +from common import common_type as ct +from customize.milvus_operator import MilvusOperator +from common.milvus_sys import MilvusSys +from common.common_type import CaseLabel +from chaos.chaos_commons import assert_statistic +from delayed_assert import assert_expectations + +namespace = 'chaos-testing' +prefix = "test_rg" + +from rich.table import Table +from rich.console import Console + + +def display_resource_group_info(info: Union[ResourceGroupInfo, List[ResourceGroupInfo]]): + table = Table(title="Resource Group Info") + table.width = 200 + table.add_column("Name", style="cyan") + table.add_column("Capacity", style="cyan") + table.add_column("Available Node", style="cyan") + table.add_column("Loaded Replica", style="cyan") + table.add_column("Outgoing Node", style="cyan") + table.add_column("Incoming Node", style="cyan") + table.add_column("Request", style="cyan") + table.add_column("Limit", style="cyan") + table.add_column("Nodes", style="cyan") + if isinstance(info, list): + for i in info: + table.add_row( + i.name, + str(i.capacity), + str(i.num_available_node), + str(i.num_loaded_replica), + str(i.num_outgoing_node), + str(i.num_incoming_node), + str(i.config.requests.node_num), + str(i.config.limits.node_num), + "\n".join([str(node.hostname) for node in i.nodes]) + ) + else: + table.add_row( + info.name, + str(info.capacity), + str(info.num_available_node), + str(info.num_loaded_replica), + str(info.num_outgoing_node), + str(info.num_incoming_node), + str(info.config.requests.node_num), + str(info.config.limits.node_num), + "\n".join([str(node.hostname) for node in info.nodes]) + ) + + console = Console() + console.width = 300 + console.print(table) + + +def display_segment_distribution_info(collection_name, release_name): + table = Table(title=f"{collection_name} Segment Distribution Info") + table.width = 200 + table.add_column("Segment ID", style="cyan") + table.add_column("Collection ID", style="cyan") + table.add_column("Partition ID", style="cyan") + table.add_column("Num Rows", style="cyan") + table.add_column("State", style="cyan") + table.add_column("Node ID", style="cyan") + table.add_column("Node Name", style="cyan") + res = utility.get_query_segment_info(collection_name) + label = f"app.kubernetes.io/instance={release_name}, app.kubernetes.io/component=querynode" + querynode_id_pod_pair = get_querynode_id_pod_pairs("chaos-testing", label) + + for r in res: + table.add_row( + str(r.segmentID), + str(r.collectionID), + str(r.partitionID), + str(r.num_rows), + str(r.state), + str(r.nodeIds), + str([querynode_id_pod_pair.get(node_id) for node_id in r.nodeIds]) + ) + console = Console() + console.width = 300 + console.print(table) + + +def list_all_resource_groups(): + rg_names = utility.list_resource_groups() + resource_groups = [] + for rg_name in rg_names: + resource_group = utility.describe_resource_group(rg_name) + resource_groups.append(resource_group) + display_resource_group_info(resource_groups) + + +def _install_milvus(image_tag="master-latest"): + release_name = f"rg-test-{cf.gen_digits_by_length(6)}" + cus_configs = {'spec.mode': 'cluster', + 'spec.dependencies.msgStreamType': 'kafka', + 'spec.components.image': f'harbor.milvus.io/milvus/milvus:{image_tag}', + 'metadata.namespace': namespace, + 'metadata.name': release_name, + 'spec.components.proxy.serviceType': 'LoadBalancer', + } + milvus_op = MilvusOperator() + log.info(f"install milvus with configs: {cus_configs}") + milvus_op.install(cus_configs) + healthy = milvus_op.wait_for_healthy(release_name, namespace, timeout=1200) + log.info(f"milvus healthy: {healthy}") + if healthy: + endpoint = milvus_op.endpoint(release_name, namespace).split(':') + log.info(f"milvus endpoint: {endpoint}") + host = endpoint[0] + port = endpoint[1] + return release_name, host, port + else: + return release_name, None, None + + +class TestResourceGroup(TestcaseBase): + + def teardown_method(self, method): + log.info(("*" * 35) + " teardown " + ("*" * 35)) + log.info("[teardown_method] Start teardown test case %s..." % method.__name__) + milvus_op = MilvusOperator() + milvus_op.uninstall(self.release_name, namespace) + connections.disconnect("default") + connections.remove_connection("default") + + @pytest.mark.tags(CaseLabel.L3) + def test_resource_group_scale_up(self, image_tag): + """ + steps + """ + milvus_op = MilvusOperator() + release_name, host, port = _install_milvus(image_tag=image_tag) + self.release_name = release_name + assert host is not None + connections.connect("default", host=host, port=port) + mil = MilvusSys(alias="default") + log.info(f"milvus build version: {mil.build_version}") + # create rg1 with request node_num=4, limit node_num=6 + name = cf.gen_unique_str("rg") + self.utility = utility + self.utility.create_resource_group(name, config=ResourceGroupConfig( + requests={"node_num": 4}, + limits={"node_num": 6}, + )) + # scale up rg1 to 8 nodes one by one + for replicas in range(1, 8): + milvus_op.scale(release_name, 'queryNode', replicas, namespace) + time.sleep(10) + # get querynode info + qn = mil.query_nodes + log.info(f"query node info: {len(qn)}") + resource_group = self.utility.describe_resource_group(name) + log.info(f"Resource group {name} info:\n {display_resource_group_info(resource_group)}") + list_all_resource_groups() + # assert the node in rg >= 4 + resource_group = self.utility.describe_resource_group(name) + assert resource_group.num_available_node >= 4 + + @pytest.mark.tags(CaseLabel.L3) + def test_resource_group_scale_down(self, image_tag): + """ + steps + """ + milvus_op = MilvusOperator() + release_name, host, port = _install_milvus(image_tag=image_tag) + milvus_op.scale(release_name, 'queryNode', 8, namespace) + self.release_name = release_name + assert host is not None + connections.connect("default", host=host, port=port) + mil = MilvusSys(alias="default") + log.info(f"milvus build version: {mil.build_version}") + # create rg1 with request node_num=4, limit node_num=6 + name = cf.gen_unique_str("rg") + self.utility = utility + self.utility.create_resource_group(name, config=ResourceGroupConfig( + requests={"node_num": 4}, + limits={"node_num": 6}, + )) + # scale down rg1 from 8 to 1 node one by one + for replicas in range(8, 1, -1): + milvus_op.scale(release_name, 'queryNode', replicas, namespace) + time.sleep(10) + resource_group = self.utility.describe_resource_group(name) + log.info(f"Resource group {name} info:\n {display_resource_group_info(resource_group)}") + list_all_resource_groups() + # assert the node in rg <= 1 + resource_group = self.utility.describe_resource_group(name) + assert resource_group.num_available_node <= 1 + + @pytest.mark.tags(CaseLabel.L3) + def test_resource_group_all_querynode_add_into_two_different_config_rg(self, image_tag): + """ + steps + """ + milvus_op = MilvusOperator() + release_name, host, port = _install_milvus(image_tag=image_tag) + milvus_op.scale(release_name, 'queryNode', 8, namespace) + self.release_name = release_name + assert host is not None + connections.connect("default", host=host, port=port) + mil = MilvusSys(alias="default") + log.info(f"milvus build version: {mil.build_version}") + rg_list = [] + # create rg1 with request node_num=4, limit node_num=6 + + name = cf.gen_unique_str("rg") + self.utility = utility + self.utility.create_resource_group(name, config=ResourceGroupConfig( + requests={"node_num": 4}, + limits={"node_num": 6}, + )) + rg_list.append(name) + name = cf.gen_unique_str("rg") + self.utility = utility + self.utility.create_resource_group(name, config=ResourceGroupConfig( + requests={"node_num": 3}, + limits={"node_num": 6}, + )) + rg_list.append(name) + # assert two rg satisfy the request node_num + list_all_resource_groups() + for rg in rg_list: + resource_group = self.utility.describe_resource_group(rg) + assert resource_group.num_available_node >= resource_group.config.requests.node_num + + # scale down rg1 from 8 to 1 node one by one + for replicas in range(8, 1, -1): + milvus_op.scale(release_name, 'queryNode', replicas, namespace) + time.sleep(10) + for name in rg_list: + resource_group = self.utility.describe_resource_group(name) + log.info(f"Resource group {name} info:\n {display_resource_group_info(resource_group)}") + list_all_resource_groups() + + @pytest.mark.tags(CaseLabel.L3) + def test_resource_group_querynode_add_into_two_different_config_rg_one_by_one(self, image_tag): + """ + steps + """ + milvus_op = MilvusOperator() + release_name, host, port = _install_milvus(image_tag=image_tag) + self.release_name = release_name + assert host is not None + connections.connect("default", host=host, port=port) + mil = MilvusSys(alias="default") + log.info(f"milvus build version: {mil.build_version}") + rg_list = [] + # create rg1 with request node_num=4, limit node_num=6 + name = cf.gen_unique_str("rg") + self.utility = utility + self.utility.create_resource_group(name, config=ResourceGroupConfig( + requests={"node_num": 4}, + limits={"node_num": 6}, + )) + rg_list.append(name) + + name = cf.gen_unique_str("rg") + self.utility = utility + self.utility.create_resource_group(name, config=ResourceGroupConfig( + requests={"node_num": 3}, + limits={"node_num": 6}, + )) + rg_list.append(name) + for replicas in range(1, 8): + milvus_op.scale(release_name, 'queryNode', replicas, namespace) + time.sleep(10) + list_all_resource_groups() + + for rg in rg_list: + resource_group = self.utility.describe_resource_group(rg) + assert resource_group.num_available_node >= resource_group.config.requests.node_num + # scale down rg1 from 8 to 1 node one by one + for replicas in range(8, 1, -1): + milvus_op.scale(release_name, 'queryNode', replicas, namespace) + time.sleep(10) + list_all_resource_groups() + for rg in rg_list: + resource_group = self.utility.describe_resource_group(rg) + assert resource_group.num_available_node >= 1 + + + @pytest.mark.tags(CaseLabel.L3) + def test_resource_group_querynode_add_into_new_rg(self, image_tag): + """ + steps + """ + milvus_op = MilvusOperator() + release_name, host, port = _install_milvus(image_tag=image_tag) + + self.release_name = release_name + milvus_op.scale(release_name, 'queryNode', 10, namespace) + assert host is not None + connections.connect("default", host=host, port=port) + mil = MilvusSys(alias="default") + log.info(f"milvus build version: {mil.build_version}") + rg_list = [] + # create rg1 with request node_num=4, limit node_num=6 + name = cf.gen_unique_str("rg") + self.utility = utility + self.utility.create_resource_group(name, config=ResourceGroupConfig( + requests={"node_num": 4}, + limits={"node_num": 6}, + )) + rg_list.append(name) + for rg in rg_list: + resource_group = self.utility.describe_resource_group(rg) + assert resource_group.num_available_node >= resource_group.config.requests.node_num + + # create a new rg with request node_num=3, limit node_num=6 + # the querynode will be added into the new rg from default rg + name = cf.gen_unique_str("rg") + self.utility = utility + self.utility.create_resource_group(name, config=ResourceGroupConfig( + requests={"node_num": 3}, + limits={"node_num": 6}, + )) + rg_list.append(name) + list_all_resource_groups() + for rg in rg_list: + resource_group = self.utility.describe_resource_group(rg) + assert resource_group.num_available_node >= resource_group.config.requests.node_num + + @pytest.mark.tags(CaseLabel.L3) + def test_resource_group_with_two_rg_link_to_each_other_when_all_not_reached_to_request(self, image_tag): + """ + steps + """ + milvus_op = MilvusOperator() + release_name, host, port = _install_milvus(image_tag=image_tag) + self.release_name = release_name + assert host is not None + connections.connect("default", host=host, port=port) + mil = MilvusSys(alias="default") + log.info(f"milvus build version: {mil.build_version}") + milvus_op.scale(release_name, 'queryNode', 8, namespace) + utility.update_resource_groups( + {DEFAULT_RESOURCE_GROUP: ResourceGroupConfig(requests={"node_num": 0}, limits={"node_num": 1})}) + # create rg1 with request node_num=4, limit node_num=6 + name = cf.gen_unique_str("rg") + rg1_name = name + self.utility = utility + self.utility.create_resource_group(name, config=ResourceGroupConfig( + requests={"node_num": 4}, + limits={"node_num": 6}, + )) + name = cf.gen_unique_str("rg") + rg2_name = name + self.utility = utility + self.utility.create_resource_group(name, config=ResourceGroupConfig( + requests={"node_num": 4}, + limits={"node_num": 6}, + )) + list_all_resource_groups() + log.info("update resource group") + utility.update_resource_groups( + {rg1_name: ResourceGroupConfig(requests={"node_num": 6}, + limits={"node_num": 8}, + transfer_from=[{"resource_group": rg2_name}], + transfer_to=[{"resource_group": rg2_name}], )}) + time.sleep(10) + list_all_resource_groups() + utility.update_resource_groups( + {rg2_name: ResourceGroupConfig(requests={"node_num": 6}, + limits={"node_num": 8}, + transfer_from=[{"resource_group": rg1_name}], + transfer_to=[{"resource_group": rg1_name}], )}) + time.sleep(10) + list_all_resource_groups() + # no querynode was transferred between rg1 and rg2 + resource_group = self.utility.describe_resource_group(rg1_name) + assert resource_group.num_available_node == 4 + resource_group = self.utility.describe_resource_group(rg2_name) + assert resource_group.num_available_node == 4 + + @pytest.mark.tags(CaseLabel.L3) + def test_resource_group_with_rg_transfer_from_non_default_rg(self, image_tag): + """ + steps + """ + milvus_op = MilvusOperator() + release_name, host, port = _install_milvus(image_tag=image_tag) + self.release_name = release_name + assert host is not None + connections.connect("default", host=host, port=port) + mil = MilvusSys(alias="default") + log.info(f"milvus build version: {mil.build_version}") + milvus_op.scale(release_name, 'queryNode', 15, namespace) + utility.update_resource_groups( + {DEFAULT_RESOURCE_GROUP: ResourceGroupConfig(requests={"node_num": 0}, limits={"node_num": 3})}) + # create rg1 with request node_num=4, limit node_num=6 + name = cf.gen_unique_str("rg") + rg1_name = name + self.utility = utility + self.utility.create_resource_group(name, config=ResourceGroupConfig( + requests={"node_num": 2}, + limits={"node_num": 2}, + )) + name = cf.gen_unique_str("rg") + rg2_name = name + self.utility = utility + self.utility.create_resource_group(name, config=ResourceGroupConfig( + requests={"node_num": 6}, + limits={"node_num": 10}, + )) + list_all_resource_groups() + rg2_available_node_before = self.utility.describe_resource_group(rg2_name).num_available_node + log.info("update resource group") + utility.update_resource_groups( + {rg1_name: ResourceGroupConfig(requests={"node_num": 4}, + limits={"node_num": 6}, + transfer_from=[{"resource_group": rg2_name}], + transfer_to=[{"resource_group": rg2_name}], )}) + time.sleep(10) + list_all_resource_groups() + # expect qn in rg 1 transfer from rg2 not the default rg + rg2_available_node_after = self.utility.describe_resource_group(rg2_name).num_available_node + assert rg2_available_node_before > rg2_available_node_after + + @pytest.mark.tags(CaseLabel.L3) + def test_resource_group_with_rg_transfer_to_non_default_rg(self, image_tag): + """ + steps + """ + milvus_op = MilvusOperator() + release_name, host, port = _install_milvus(image_tag=image_tag) + self.release_name = release_name + assert host is not None + connections.connect("default", host=host, port=port) + mil = MilvusSys(alias="default") + log.info(f"milvus build version: {mil.build_version}") + milvus_op.scale(release_name, 'queryNode', 10, namespace) + utility.update_resource_groups( + {DEFAULT_RESOURCE_GROUP: ResourceGroupConfig(requests={"node_num": 0}, limits={"node_num": 10})}) + # create rg1 with request node_num=4, limit node_num=6 + name = cf.gen_unique_str("rg") + rg1_name = name + self.utility = utility + self.utility.create_resource_group(name, config=ResourceGroupConfig( + requests={"node_num": 2}, + limits={"node_num": 10}, + )) + name = cf.gen_unique_str("rg") + rg2_name = name + self.utility = utility + self.utility.create_resource_group(name, config=ResourceGroupConfig( + requests={"node_num": 4}, + limits={"node_num": 4}, + )) + list_all_resource_groups() + rg1_node_available_before = self.utility.describe_resource_group(rg1_name).num_available_node + log.info("update resource group") + utility.update_resource_groups( + {rg2_name: ResourceGroupConfig(requests={"node_num": 2}, + limits={"node_num": 2}, + transfer_from=[{"resource_group": rg1_name}], + transfer_to=[{"resource_group": rg1_name}], )}) + time.sleep(10) + list_all_resource_groups() + # expect qn in rg 2 transfer to rg1 not the default rg + rg1_node_available_after = self.utility.describe_resource_group(rg1_name).num_available_node + assert rg1_node_available_after > rg1_node_available_before + + + @pytest.mark.tags(CaseLabel.L3) + def test_resource_group_with_rg_transfer_with_rg_list(self, image_tag): + """ + steps + """ + milvus_op = MilvusOperator() + release_name, host, port = _install_milvus(image_tag=image_tag) + self.release_name = release_name + assert host is not None + connections.connect("default", host=host, port=port) + mil = MilvusSys(alias="default") + log.info(f"milvus build version: {mil.build_version}") + milvus_op.scale(release_name, 'queryNode', 12, namespace) + utility.update_resource_groups( + {DEFAULT_RESOURCE_GROUP: ResourceGroupConfig(requests={"node_num": 0}, limits={"node_num": 1})}) + # create rg1 with request node_num=4, limit node_num=6 + name = cf.gen_unique_str("rg") + source_rg = name + self.utility = utility + self.utility.create_resource_group(name, config=ResourceGroupConfig( + requests={"node_num": 1}, + limits={"node_num": 1}, + )) + name = cf.gen_unique_str("rg") + small_rg = name + self.utility = utility + self.utility.create_resource_group(name, config=ResourceGroupConfig( + requests={"node_num": 2}, + limits={"node_num": 4}, + )) + name = cf.gen_unique_str("rg") + big_rg = name + self.utility = utility + self.utility.create_resource_group(name, config=ResourceGroupConfig( + requests={"node_num": 3}, + limits={"node_num": 6}, + )) + list_all_resource_groups() + small_rg_node_available_before = self.utility.describe_resource_group(small_rg).num_available_node + big_rg_node_available_before = self.utility.describe_resource_group(big_rg).num_available_node + log.info("update resource group") + utility.update_resource_groups( + {source_rg: ResourceGroupConfig(requests={"node_num": 6}, + limits={"node_num": 6}, + transfer_from=[{"resource_group": small_rg}, {"resource_group": big_rg}], + )}) + time.sleep(10) + list_all_resource_groups() + # expect source rg transfer from small rg and big rg + small_rg_node_available_after = self.utility.describe_resource_group(small_rg).num_available_node + big_rg_node_available_after = self.utility.describe_resource_group(big_rg).num_available_node + assert (small_rg_node_available_before + big_rg_node_available_before > small_rg_node_available_after + + big_rg_node_available_after) + + +class TestReplicasManagement(TestcaseBase): + + def teardown_method(self, method): + log.info(("*" * 35) + " teardown " + ("*" * 35)) + log.info("[teardown_method] Start teardown test case %s..." % method.__name__) + milvus_op = MilvusOperator() + milvus_op.uninstall(self.release_name, namespace) + connections.disconnect("default") + connections.remove_connection("default") + + @pytest.mark.tags(CaseLabel.L3) + def test_load_replicas_one_collection_multi_replicas_to_multi_rg(self, image_tag): + """ + steps + """ + milvus_op = MilvusOperator() + release_name, host, port = _install_milvus(image_tag=image_tag) + milvus_op.scale(release_name, 'queryNode', 12, namespace) + self.release_name = release_name + assert host is not None + connections.connect("default", host=host, port=port) + mil = MilvusSys(alias="default") + log.info(f"milvus build version: {mil.build_version}") + resource_groups = [] + for i in range(4): + name = cf.gen_unique_str("rg") + self.utility = utility + self.utility.create_resource_group(name, config=ResourceGroupConfig( + requests={"node_num": 2}, + limits={"node_num": 6}, + )) + resource_groups.append(name) + list_all_resource_groups() + + # create collection and load with 2 replicase + self.skip_connection = True + collection_w, vectors = self.init_collection_general(prefix, insert_data=True, + enable_dynamic_field=True)[0:2] + collection_w.release() + log.info(f"resource groups: {resource_groups}") + collection_w.load(replica_number=len(resource_groups), _resource_groups=resource_groups) + list_all_resource_groups() + + # list replicas + replicas = collection_w.get_replicas() + log.info(f"replicas: {replicas}") + rg_to_scale_down = resource_groups[0] + # scale down a rg to 1 node + self.utility.update_resource_groups( + {rg_to_scale_down: ResourceGroupConfig(requests={"node_num": 1}, + limits={"node_num": 1}, )} + ) + + list_all_resource_groups() + replicas = collection_w.get_replicas() + log.info(f"replicas: {replicas}") + # scale down a rg t0 0 node + self.utility.update_resource_groups( + {rg_to_scale_down: ResourceGroupConfig(requests={"node_num": 0}, + limits={"node_num": 0}, )} + ) + list_all_resource_groups() + replicas = collection_w.get_replicas() + log.info(f"replicas: {replicas}") + + @pytest.mark.tags(CaseLabel.L3) + def test_load_multi_collection_multi_replicas_to_multi_rg(self, image_tag): + """ + steps + """ + milvus_op = MilvusOperator() + release_name, host, port = _install_milvus(image_tag=image_tag) + milvus_op.scale(release_name, 'queryNode', 12, namespace) + self.release_name = release_name + assert host is not None + connections.connect("default", host=host, port=port) + mil = MilvusSys(alias="default") + log.info(f"milvus build version: {mil.build_version}") + # create two rg with request node_num=4, limit node_num=6 + resource_groups = [] + for i in range(3): + name = cf.gen_unique_str("rg") + self.utility = utility + self.utility.create_resource_group(name, config=ResourceGroupConfig( + requests={"node_num": 3}, + limits={"node_num": 6}, + )) + resource_groups.append(name) + log.info(f"resource groups: {resource_groups}") + list_all_resource_groups() + col_list = [] + # create collection and load with multi replicase + self.skip_connection = True + for i in range(3): + prefix = cf.gen_unique_str("test_rg") + collection_w, vectors = self.init_collection_general(prefix, insert_data=True, + enable_dynamic_field=True)[0:2] + collection_w.release() + col_list.append(collection_w) + collection_w.load(replica_number=len(resource_groups), _resource_groups=resource_groups) + list_all_resource_groups() + + # list replicas + for col in col_list: + replicas = col.get_replicas() + log.info(f"replicas: {replicas}") + + @pytest.mark.tags(CaseLabel.L3) + def test_load_multi_collection_one_replicas_to_multi_rg(self, image_tag): + """ + steps + """ + milvus_op = MilvusOperator() + release_name, host, port = _install_milvus(image_tag=image_tag) + milvus_op.scale(release_name, 'queryNode', 12, namespace) + self.release_name = release_name + assert host is not None + connections.connect("default", host=host, port=port) + mil = MilvusSys(alias="default") + log.info(f"milvus build version: {mil.build_version}") + # create two rg with request node_num=4, limit node_num=6 + resource_groups = [] + for i in range(3): + name = cf.gen_unique_str("rg") + self.utility = utility + self.utility.create_resource_group(name, config=ResourceGroupConfig( + requests={"node_num": 3}, + limits={"node_num": 6}, + )) + resource_groups.append(name) + log.info(f"resource groups: {resource_groups}") + list_all_resource_groups() + col_list = [] + # create collection and load with multi replicase + self.skip_connection = True + for i in range(3): + prefix = cf.gen_unique_str("test_rg") + collection_w, vectors = self.init_collection_general(prefix, insert_data=True, + enable_dynamic_field=True)[0:2] + collection_w.release() + col_list.append(collection_w) + collection_w.load(replica_number=1, _resource_groups=resource_groups) + list_all_resource_groups() + + # list replicas + for col in col_list: + replicas = col.get_replicas() + log.info(f"replicas: {replicas}") + + @pytest.mark.tags(CaseLabel.L3) + def test_transfer_replicas_to_other_rg(self, image_tag): + """ + steps + """ + milvus_op = MilvusOperator() + release_name, host, port = _install_milvus(image_tag=image_tag) + milvus_op.scale(release_name, 'queryNode', 12, namespace) + self.release_name = release_name + assert host is not None + connections.connect("default", host=host, port=port) + mil = MilvusSys(alias="default") + log.info(f"milvus build version: {mil.build_version}") + # create two rg with request node_num=4, limit node_num=6 + resource_groups = [] + for i in range(3): + name = cf.gen_unique_str("rg") + self.utility = utility + self.utility.create_resource_group(name, config=ResourceGroupConfig( + requests={"node_num": 3}, + limits={"node_num": 6}, + )) + resource_groups.append(name) + log.info(f"resource groups: {resource_groups}") + list_all_resource_groups() + col_list = [] + # create collection and load with multi replicase + self.skip_connection = True + for i in range(3): + prefix = cf.gen_unique_str("test_rg") + collection_w, vectors = self.init_collection_general(prefix, insert_data=True, + enable_dynamic_field=True)[0:2] + collection_w.release() + col_list.append(collection_w) + collection_w.load(replica_number=1, _resource_groups=[resource_groups[i]]) + list_all_resource_groups() + # list replicas + for col in col_list: + replicas = col.get_replicas() + log.info(f"replicas: {replicas}") + + # transfer replicas to default rg + self.utility.transfer_replica(source_group=resource_groups[0], target_group=DEFAULT_RESOURCE_GROUP, + collection_name=col_list[0].name, num_replicas=1) + + list_all_resource_groups() + # list replicas + for col in col_list: + replicas = col.get_replicas() + log.info(f"replicas: {replicas}") + + +class TestServiceAvailableDuringScale(TestcaseBase): + + def init_health_checkers(self, collection_name=None): + c_name = collection_name + shards_num = 5 + checkers = { + Op.insert: InsertChecker(collection_name=c_name, shards_num=shards_num), + Op.upsert: UpsertChecker(collection_name=c_name, shards_num=shards_num), + Op.search: SearchChecker(collection_name=c_name, shards_num=shards_num), + Op.hybrid_search: HybridSearchChecker(collection_name=c_name, shards_num=shards_num), + Op.query: QueryChecker(collection_name=c_name, shards_num=shards_num), + Op.delete: DeleteChecker(collection_name=c_name, shards_num=shards_num), + } + self.health_checkers = checkers + + def teardown_method(self, method): + log.info(("*" * 35) + " teardown " + ("*" * 35)) + log.info("[teardown_method] Start teardown test case %s..." % method.__name__) + milvus_op = MilvusOperator() + milvus_op.uninstall(self.release_name, namespace) + connections.disconnect("default") + connections.remove_connection("default") + + def test_service_available_during_scale_up(self, image_tag): + """ + steps + """ + milvus_op = MilvusOperator() + release_name, host, port = _install_milvus(image_tag=image_tag) + milvus_op.scale(release_name, 'queryNode', 3, namespace) + self.release_name = release_name + assert host is not None + connections.connect("default", host=host, port=port) + mil = MilvusSys(alias="default") + log.info(f"milvus build version: {mil.build_version}") + utility.update_resource_groups( + {DEFAULT_RESOURCE_GROUP: ResourceGroupConfig(requests={"node_num": 0}, limits={"node_num": 10})}) + # create rg + resource_groups = [] + name = cf.gen_unique_str("rg") + self.utility = utility + self.utility.create_resource_group(name, config=ResourceGroupConfig( + requests={"node_num": 1}, + limits={"node_num": 1}, + )) + resource_groups.append(name) + list_all_resource_groups() + c_name = cf.gen_unique_str("Checker_") + self.init_health_checkers(collection_name=c_name) + # load collection to non default rg + self.health_checkers[Op.search].c_wrap.release() + self.health_checkers[Op.search].c_wrap.load(_resource_groups=resource_groups) + cc.start_monitor_threads(self.health_checkers) + log.info("*********************Load Start**********************") + request_duration = 360 + for i in range(10): + time.sleep(request_duration//10) + for k, v in self.health_checkers.items(): + v.check_result() + # scale up querynode when progress is 3/10 + if i == 3: + utility.update_resource_groups( + {name: ResourceGroupConfig(requests={"node_num": 2}, limits={"node_num": 2})}) + log.info(f"scale up querynode in rg {name} from 1 to 2") + list_all_resource_groups() + display_segment_distribution_info(c_name, release_name) + time.sleep(60) + ra = ResultAnalyzer() + ra.get_stage_success_rate() + assert_statistic(self.health_checkers) + for k, v in self.health_checkers.items(): + v.terminate() + + def test_service_available_during_scale_down(self, image_tag): + """ + steps + """ + milvus_op = MilvusOperator() + release_name, host, port = _install_milvus(image_tag=image_tag) + milvus_op.scale(release_name, 'queryNode', 3, namespace) + self.release_name = release_name + assert host is not None + connections.connect("default", host=host, port=port) + mil = MilvusSys(alias="default") + log.info(f"milvus build version: {mil.build_version}") + utility.update_resource_groups( + {DEFAULT_RESOURCE_GROUP: ResourceGroupConfig(requests={"node_num": 0}, limits={"node_num": 5})}) + # create rg + resource_groups = [] + name = cf.gen_unique_str("rg") + self.utility = utility + self.utility.create_resource_group(name, config=ResourceGroupConfig( + requests={"node_num": 2}, + limits={"node_num": 2}, + )) + resource_groups.append(name) + list_all_resource_groups() + c_name = cf.gen_unique_str("Checker_") + self.init_health_checkers(collection_name=c_name) + # load collection to non default rg + self.health_checkers[Op.search].c_wrap.release() + self.health_checkers[Op.search].c_wrap.load(_resource_groups=resource_groups) + cc.start_monitor_threads(self.health_checkers) + list_all_resource_groups() + log.info("*********************Load Start**********************") + request_duration = 360 + for i in range(10): + time.sleep(request_duration//10) + for k, v in self.health_checkers.items(): + v.check_result() + # scale down querynode in rg when progress is 3/10 + if i == 3: + list_all_resource_groups() + utility.update_resource_groups( + {name: ResourceGroupConfig(requests={"node_num": 1}, limits={"node_num": 1})}) + log.info(f"scale down querynode in rg {name} from 2 to 1") + list_all_resource_groups() + time.sleep(60) + ra = ResultAnalyzer() + ra.get_stage_success_rate() + assert_statistic(self.health_checkers) + for k, v in self.health_checkers.items(): + v.terminate() + + +class TestServiceAvailableDuringTransferReplicas(TestcaseBase): + + def init_health_checkers(self, collection_name=None): + c_name = collection_name + shards_num = 5 + checkers = { + Op.insert: InsertChecker(collection_name=c_name, shards_num=shards_num), + Op.upsert: UpsertChecker(collection_name=c_name, shards_num=shards_num), + Op.search: SearchChecker(collection_name=c_name, shards_num=shards_num), + Op.hybrid_search: HybridSearchChecker(collection_name=c_name, shards_num=shards_num), + Op.query: QueryChecker(collection_name=c_name, shards_num=shards_num), + Op.delete: DeleteChecker(collection_name=c_name, shards_num=shards_num), + } + self.health_checkers = checkers + + def teardown_method(self, method): + log.info(("*" * 35) + " teardown " + ("*" * 35)) + log.info("[teardown_method] Start teardown test case %s..." % method.__name__) + milvus_op = MilvusOperator() + milvus_op.uninstall(self.release_name, namespace) + connections.disconnect("default") + connections.remove_connection("default") + + def test_service_available_during_transfer_replicas(self, image_tag): + """ + steps + """ + milvus_op = MilvusOperator() + release_name, host, port = _install_milvus(image_tag=image_tag) + milvus_op.scale(release_name, 'queryNode', 5, namespace) + self.release_name = release_name + assert host is not None + connections.connect("default", host=host, port=port) + mil = MilvusSys(alias="default") + log.info(f"milvus build version: {mil.build_version}") + utility.update_resource_groups( + {DEFAULT_RESOURCE_GROUP: ResourceGroupConfig(requests={"node_num": 0}, limits={"node_num": 10})}) + # create rg + resource_groups = [] + for i in range(2): + name = cf.gen_unique_str("rg") + self.utility = utility + self.utility.create_resource_group(name, config=ResourceGroupConfig( + requests={"node_num": 1}, + limits={"node_num": 1}, + )) + resource_groups.append(name) + list_all_resource_groups() + c_name = cf.gen_unique_str("Checker_") + self.init_health_checkers(collection_name=c_name) + self.health_checkers[Op.search].c_wrap.release() + self.health_checkers[Op.search].c_wrap.load(_resource_groups=resource_groups[0:1]) + cc.start_monitor_threads(self.health_checkers) + list_all_resource_groups() + display_segment_distribution_info(c_name, release_name) + log.info("*********************Load Start**********************") + request_duration = 360 + for i in range(10): + time.sleep(request_duration//10) + for k, v in self.health_checkers.items(): + v.check_result() + # transfer replicas from default to another + if i == 3: + # transfer replicas from default rg to another rg + list_all_resource_groups() + display_segment_distribution_info(c_name, release_name) + self.utility.transfer_replica(source_group=resource_groups[0], target_group=resource_groups[1], + collection_name=c_name, num_replicas=1) + list_all_resource_groups() + display_segment_distribution_info(c_name, release_name) + time.sleep(60) + ra = ResultAnalyzer() + ra.get_stage_success_rate() + assert_statistic(self.health_checkers) + for k, v in self.health_checkers.items(): + v.terminate() diff --git a/tests/python_client/utils/util_birdwatcher.py b/tests/python_client/utils/util_birdwatcher.py new file mode 100644 index 0000000000000..b7c4abe405af1 --- /dev/null +++ b/tests/python_client/utils/util_birdwatcher.py @@ -0,0 +1,79 @@ +import os +import re +from utils.util_log import test_log as log + + +def extraction_all_data(text): + # Patterns to handle the specifics of each key-value line + patterns = { + 'Segment ID': r"Segment ID:\s*(\d+)", + 'Segment State': r"Segment State:\s*(\w+)", + 'Collection ID': r"Collection ID:\s*(\d+)", + 'PartitionID': r"PartitionID:\s*(\d+)", + 'Insert Channel': r"Insert Channel:(.+)", + 'Num of Rows': r"Num of Rows:\s*(\d+)", + 'Max Row Num': r"Max Row Num:\s*(\d+)", + 'Last Expire Time': r"Last Expire Time:\s*(.+)", + 'Compact from': r"Compact from:\s*(\[\])", + 'Start Position ID': r"Start Position ID:\s*(\[[\d\s]+\])", + 'Start Position Time': r"Start Position ID:.*time:\s*(.+),", + 'Start Channel Name': r"channel name:\s*([^,\n]+)", + 'Dml Position ID': r"Dml Position ID:\s*(\[[\d\s]+\])", + 'Dml Position Time': r"Dml Position ID:.*time:\s*(.+),", + 'Dml Channel Name': r"channel name:\s*(.+)", + 'Binlog Nums': r"Binlog Nums:\s*(\d+)", + 'StatsLog Nums': r"StatsLog Nums:\s*(\d+)", + 'DeltaLog Nums': r"DeltaLog Nums:\s*(\d+)" + } + + refined_data = {} + for key, pattern in patterns.items(): + match = re.search(pattern, text) + if match: + refined_data[key] = match.group(1).strip() + + return refined_data + + +class BirdWatcher: + """ + + birdwatcher is a cli tool to get information about milvus + the command: + show segment info + """ + + def __init__(self, etcd_endpoints, root_path): + self.prefix = f"birdwatcher --olc=\"#connect --etcd {etcd_endpoints} --rootPath={root_path}," + + def parse_segment_info(self, output): + splitter = output.strip().split('\n')[0] + segments = output.strip().split(splitter) + segments = [segment for segment in segments if segment.strip()] + + # Parse all segments + parsed_segments = [extraction_all_data(segment) for segment in segments] + parsed_segments = [segment for segment in parsed_segments if segment] + return parsed_segments + + def show_segment_info(self, collection_id=None): + cmd = f"{self.prefix} show segment info --format table\"" + if collection_id: + cmd = f"{self.prefix} show segment info --collection {collection_id} --format table\"" + log.info(f"cmd: {cmd}") + output = os.popen(cmd).read() + # log.info(f"{cmd} output: {output}") + output = self.parse_segment_info(output) + for segment in output: + log.info(segment) + seg_res = {} + for segment in output: + seg_res[segment['Segment ID']] = segment + return seg_res + + +if __name__ == "__main__": + birdwatcher = BirdWatcher("10.104.18.24:2379", "rg-test-613938") + res = birdwatcher.show_segment_info() + print(res) + diff --git a/tests/python_client/utils/util_k8s.py b/tests/python_client/utils/util_k8s.py index ffaba8bcc1ff0..b514e3444c551 100644 --- a/tests/python_client/utils/util_k8s.py +++ b/tests/python_client/utils/util_k8s.py @@ -452,6 +452,8 @@ def record_time_when_standby_activated(namespace, release_name, coord_type, time log.info(f"Standby {coord_type} pod does not switch standby mode") + + if __name__ == '__main__': label = "app.kubernetes.io/name=milvus, component=querynode" instance_name = get_milvus_instance_name("chaos-testing", "10.96.250.111") From 6b51bae5be189eaa9916733a1811f43d3af9955d Mon Sep 17 00:00:00 2001 From: zhuwenxing Date: Fri, 31 May 2024 13:57:45 +0800 Subject: [PATCH 108/126] test: add different nq for restful v2 test (#33456) Signed-off-by: zhuwenxing --- .../restful_client_v2/testcases/test_vector_operations.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/restful_client_v2/testcases/test_vector_operations.py b/tests/restful_client_v2/testcases/test_vector_operations.py index 7d497718ba24f..73d2a3b9a9911 100644 --- a/tests/restful_client_v2/testcases/test_vector_operations.py +++ b/tests/restful_client_v2/testcases/test_vector_operations.py @@ -750,8 +750,9 @@ def test_search_vector_with_all_vector_datatype(self, nb, dim, insert_round, aut @pytest.mark.parametrize("enable_dynamic_schema", [True]) @pytest.mark.parametrize("nb", [3000]) @pytest.mark.parametrize("dim", [128]) + @pytest.mark.parametrize("nq", [1, 2]) def test_search_vector_with_float_vector_datatype(self, nb, dim, insert_round, auto_id, - is_partition_key, enable_dynamic_schema): + is_partition_key, enable_dynamic_schema, nq): """ Insert a vector with a simple payload """ @@ -812,7 +813,7 @@ def test_search_vector_with_float_vector_datatype(self, nb, dim, insert_round, a # search data payload = { "collectionName": name, - "data": [gen_vector(datatype="FloatVector", dim=dim)], + "data": [gen_vector(datatype="FloatVector", dim=dim) for _ in range(nq)], "filter": "word_count > 100", "groupingField": "user_id", "outputFields": ["*"], @@ -827,7 +828,7 @@ def test_search_vector_with_float_vector_datatype(self, nb, dim, insert_round, a } rsp = self.vector_client.vector_search(payload) assert rsp['code'] == 0 - assert len(rsp['data']) == 100 + assert len(rsp['data']) == 100 * nq @pytest.mark.parametrize("insert_round", [1, 10]) From 322a4c5b8cbdd0d34fae005c0410556b2e10dca1 Mon Sep 17 00:00:00 2001 From: wei liu Date: Fri, 31 May 2024 15:41:45 +0800 Subject: [PATCH 109/126] enhance: Remove StringPrimaryKey to reduce unnecessary copy and function call cost (#33486) issue: #33497 Signed-off-by: Wei Liu --- internal/storage/primary_key.go | 108 ++++++++++++-------------------- 1 file changed, 40 insertions(+), 68 deletions(-) diff --git a/internal/storage/primary_key.go b/internal/storage/primary_key.go index f9322f64dbd93..640ee2226a48c 100644 --- a/internal/storage/primary_key.go +++ b/internal/storage/primary_key.go @@ -158,71 +158,13 @@ func (ip *Int64PrimaryKey) Size() int64 { return 16 } -type BaseStringPrimaryKey struct { - Value string -} - -func (sp *BaseStringPrimaryKey) GT(key BaseStringPrimaryKey) bool { - return strings.Compare(sp.Value, key.Value) > 0 -} - -func (sp *BaseStringPrimaryKey) GE(key BaseStringPrimaryKey) bool { - return strings.Compare(sp.Value, key.Value) >= 0 -} - -func (sp *BaseStringPrimaryKey) LT(key BaseStringPrimaryKey) bool { - return strings.Compare(sp.Value, key.Value) < 0 -} - -func (sp *BaseStringPrimaryKey) LE(key BaseStringPrimaryKey) bool { - return strings.Compare(sp.Value, key.Value) <= 0 -} - -func (sp *BaseStringPrimaryKey) EQ(key BaseStringPrimaryKey) bool { - return strings.Compare(sp.Value, key.Value) == 0 -} - -func (sp *BaseStringPrimaryKey) MarshalJSON() ([]byte, error) { - ret, err := json.Marshal(sp.Value) - if err != nil { - return nil, err - } - - return ret, nil -} - -func (sp *BaseStringPrimaryKey) UnmarshalJSON(data []byte) error { - err := json.Unmarshal(data, &sp.Value) - if err != nil { - return err - } - - return nil -} - -func (sp *BaseStringPrimaryKey) SetValue(data interface{}) error { - value, ok := data.(string) - if !ok { - return fmt.Errorf("wrong type value when setValue for StringPrimaryKey") - } - - sp.Value = value - return nil -} - -func (sp *BaseStringPrimaryKey) GetValue() interface{} { - return sp.Value -} - type VarCharPrimaryKey struct { - BaseStringPrimaryKey + Value string } func NewVarCharPrimaryKey(v string) *VarCharPrimaryKey { return &VarCharPrimaryKey{ - BaseStringPrimaryKey: BaseStringPrimaryKey{ - Value: v, - }, + Value: v, } } @@ -233,7 +175,7 @@ func (vcp *VarCharPrimaryKey) GT(key PrimaryKey) bool { return false } - return vcp.BaseStringPrimaryKey.GT(pk.BaseStringPrimaryKey) + return strings.Compare(vcp.Value, pk.Value) > 0 } func (vcp *VarCharPrimaryKey) GE(key PrimaryKey) bool { @@ -243,7 +185,7 @@ func (vcp *VarCharPrimaryKey) GE(key PrimaryKey) bool { return false } - return vcp.BaseStringPrimaryKey.GE(pk.BaseStringPrimaryKey) + return strings.Compare(vcp.Value, pk.Value) >= 0 } func (vcp *VarCharPrimaryKey) LT(key PrimaryKey) bool { @@ -253,7 +195,7 @@ func (vcp *VarCharPrimaryKey) LT(key PrimaryKey) bool { return false } - return vcp.BaseStringPrimaryKey.LT(pk.BaseStringPrimaryKey) + return strings.Compare(vcp.Value, pk.Value) < 0 } func (vcp *VarCharPrimaryKey) LE(key PrimaryKey) bool { @@ -263,7 +205,7 @@ func (vcp *VarCharPrimaryKey) LE(key PrimaryKey) bool { return false } - return vcp.BaseStringPrimaryKey.LE(pk.BaseStringPrimaryKey) + return strings.Compare(vcp.Value, pk.Value) <= 0 } func (vcp *VarCharPrimaryKey) EQ(key PrimaryKey) bool { @@ -273,7 +215,39 @@ func (vcp *VarCharPrimaryKey) EQ(key PrimaryKey) bool { return false } - return vcp.BaseStringPrimaryKey.EQ(pk.BaseStringPrimaryKey) + return strings.Compare(vcp.Value, pk.Value) == 0 +} + +func (vcp *VarCharPrimaryKey) MarshalJSON() ([]byte, error) { + ret, err := json.Marshal(vcp.Value) + if err != nil { + return nil, err + } + + return ret, nil +} + +func (vcp *VarCharPrimaryKey) UnmarshalJSON(data []byte) error { + err := json.Unmarshal(data, &vcp.Value) + if err != nil { + return err + } + + return nil +} + +func (vcp *VarCharPrimaryKey) SetValue(data interface{}) error { + value, ok := data.(string) + if !ok { + return fmt.Errorf("wrong type value when setValue for VarCharPrimaryKey") + } + + vcp.Value = value + return nil +} + +func (vcp *VarCharPrimaryKey) GetValue() interface{} { + return vcp.Value } func (vcp *VarCharPrimaryKey) Type() schemapb.DataType { @@ -293,9 +267,7 @@ func GenPrimaryKeyByRawData(data interface{}, pkType schemapb.DataType) (Primary } case schemapb.DataType_VarChar: result = &VarCharPrimaryKey{ - BaseStringPrimaryKey: BaseStringPrimaryKey{ - Value: data.(string), - }, + Value: data.(string), } default: return nil, fmt.Errorf("not supported primary data type") From c6a1c49e02249446b87b227cf55ec7c3d799aa3e Mon Sep 17 00:00:00 2001 From: wei liu Date: Fri, 31 May 2024 17:49:45 +0800 Subject: [PATCH 110/126] enhance: Use Blocked Bloom Filter instead of basic bloom fitler impl. (#33405) issue: #32995 To speed up the construction and querying of Bloom filters, we chose a blocked Bloom filter instead of a basic Bloom filter implementation. WARN: This PR is compatible with old version bf impl, but if fall back to old milvus version, it may causes bloom filter deserialize failed. In single Bloom filter test cases with a capacity of 1,000,000 and a false positive rate (FPR) of 0.001, the blocked Bloom filter is 5 times faster than the basic Bloom filter in both querying and construction, at the cost of a 30% increase in memory usage. - Block BF construct time {"time": "54.128131ms"} - Block BF size {"size": 3021578} - Block BF Test cost {"time": "55.407352ms"} - Basic BF construct time {"time": "210.262183ms"} - Basic BF size {"size": 2396308} - Basic BF Test cost {"time": "192.596229ms"} In multi Bloom filter test cases with a capacity of 100,000, an FPR of 0.001, and 100 Bloom filters, we reuse the primary key locations for all Bloom filters to avoid repeated hash computations. As a result, the blocked Bloom filter is also 5 times faster than the basic Bloom filter in querying. - Block BF TestLocation cost {"time": "529.97183ms"} - Basic BF TestLocation cost {"time": "3.197430181s"} --------- Signed-off-by: Wei Liu --- go.mod | 4 +- go.sum | 25 +- .../datanode/metacache/bloom_filter_set.go | 7 +- .../datanode/syncmgr/storage_serializer.go | 1 + internal/datanode/writebuffer/write_buffer.go | 7 +- .../querynodev2/delegator/delegator_data.go | 9 +- .../delegator/delegator_data_test.go | 37 ++- .../querynodev2/delegator/delegator_test.go | 4 - .../querynodev2/pkoracle/bloom_filter_set.go | 66 +--- .../pkoracle/bloom_filter_set_test.go | 41 +-- internal/querynodev2/pkoracle/candidate.go | 4 +- internal/querynodev2/pkoracle/key.go | 11 +- internal/querynodev2/pkoracle/pk_oracle.go | 39 +-- .../querynodev2/segments/bloom_filter_set.go | 101 ------ .../segments/bloom_filter_set_test.go | 91 ------ internal/querynodev2/segments/mock_segment.go | 106 +------ internal/querynodev2/segments/segment.go | 12 +- .../querynodev2/segments/segment_interface.go | 4 +- .../segments/segment_loader_test.go | 15 +- internal/querynodev2/segments/segment_test.go | 8 - internal/storage/field_stats.go | 60 ++-- internal/storage/field_stats_test.go | 8 +- internal/storage/pk_statistics.go | 46 ++- internal/storage/stats.go | 49 ++- internal/storage/stats_test.go | 34 +- internal/util/bloomfilter/bloom_filter.go | 297 ++++++++++++++++++ .../util/bloomfilter/bloom_filter_test.go | 220 +++++++++++++ pkg/util/paramtable/component_param.go | 10 + pkg/util/paramtable/component_param_test.go | 1 + .../bloomfilter/bloom_filter_test.go | 196 ++++++++++++ 30 files changed, 974 insertions(+), 539 deletions(-) delete mode 100644 internal/querynodev2/segments/bloom_filter_set.go delete mode 100644 internal/querynodev2/segments/bloom_filter_set_test.go create mode 100644 internal/util/bloomfilter/bloom_filter.go create mode 100644 internal/util/bloomfilter/bloom_filter_test.go create mode 100644 tests/integration/bloomfilter/bloom_filter_test.go diff --git a/go.mod b/go.mod index 27e4c5bc06899..0b27e21ebb4aa 100644 --- a/go.mod +++ b/go.mod @@ -65,9 +65,11 @@ require ( require github.com/milvus-io/milvus-storage/go v0.0.0-20231227072638-ebd0b8e56d70 require ( + github.com/greatroar/blobloom v0.0.0-00010101000000-000000000000 github.com/jolestar/go-commons-pool/v2 v2.1.2 github.com/milvus-io/milvus/pkg v0.0.0-00010101000000-000000000000 github.com/pkg/errors v0.9.1 + github.com/zeebo/xxh3 v1.0.2 gopkg.in/yaml.v3 v3.0.1 ) @@ -209,7 +211,6 @@ require ( github.com/x448/float16 v0.8.4 // indirect github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2 // indirect github.com/yusufpapurcu/wmi v1.2.2 // indirect - github.com/zeebo/xxh3 v1.0.2 // indirect go.etcd.io/bbolt v1.3.6 // indirect go.etcd.io/etcd/client/pkg/v3 v3.5.5 // indirect go.etcd.io/etcd/client/v2 v2.305.5 // indirect @@ -250,6 +251,7 @@ replace ( github.com/bketelsen/crypt => github.com/bketelsen/crypt v0.0.4 // Fix security alert for core-os/etcd github.com/expr-lang/expr => github.com/SimFG/expr v0.0.0-20231218130003-94d085776dc5 github.com/go-kit/kit => github.com/go-kit/kit v0.1.0 + github.com/greatroar/blobloom => github.com/weiliu1031/blobloom v0.0.0-20240530105622-1e0e104a7160 // github.com/milvus-io/milvus-storage/go => ../milvus-storage/go github.com/milvus-io/milvus/pkg => ./pkg github.com/streamnative/pulsarctl => github.com/xiaofan-luan/pulsarctl v0.5.1 diff --git a/go.sum b/go.sum index 25be847bbe9f7..b5f2c76e8e79e 100644 --- a/go.sum +++ b/go.sum @@ -56,12 +56,14 @@ github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.3.0/go.mod h1:OQeznEEkTZ9Orh github.com/Azure/azure-sdk-for-go/sdk/internal v1.3.0 h1:sXr+ck84g/ZlZUOZiNELInmMgOsuGwdjjVkEIde0OtY= github.com/Azure/azure-sdk-for-go/sdk/internal v1.3.0/go.mod h1:okt5dMMTOFjX/aovMlrjvvXoPMBVSPzk9185BT0+eZM= github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/storage/armstorage v1.2.0 h1:Ma67P/GGprNwsslzEH6+Kb8nybI8jpDTm4Wmzu2ReK8= +github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/storage/armstorage v1.2.0/go.mod h1:c+Lifp3EDEamAkPVzMooRNOK6CZjNSdEnf1A7jsI9u4= github.com/Azure/azure-sdk-for-go/sdk/storage/azblob v1.1.0 h1:nVocQV40OQne5613EeLayJiRAJuKlBGy+m22qWG+WRg= github.com/Azure/azure-sdk-for-go/sdk/storage/azblob v1.1.0/go.mod h1:7QJP7dr2wznCMeqIrhMgWGf7XpAQnVrJqDm9nvV3Cu4= github.com/AzureAD/microsoft-authentication-library-for-go v1.0.0 h1:OBhqkivkhkMqLPymWEppkm7vgPQY2XsHoEkaMQ0AdZY= github.com/AzureAD/microsoft-authentication-library-for-go v1.0.0/go.mod h1:kgDmCTgBzIEPFElEF+FK0SdjAor06dRq2Go927dnQ6o= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= github.com/BurntSushi/toml v1.2.1 h1:9F2/+DoOYIOksmaJFPw1tGFy1eDnIJXg+UHjuD8lTak= +github.com/BurntSushi/toml v1.2.1/go.mod h1:CxXYINrC8qIiEnFrOxCa7Jy5BFHlXnUU2pbicEuybxQ= github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo= github.com/CloudyKit/fastprinter v0.0.0-20200109182630-33d98a066a53/go.mod h1:+3IMCy2vIlbG1XG/0ggNQv0SvxCAIpPM5b1nCz56Xno= github.com/CloudyKit/jet/v3 v3.0.0/go.mod h1:HKQPgSJmdK8hdoAbKUUWajkHyHo4RaU5rMdUywE7VMo= @@ -169,6 +171,7 @@ github.com/cncf/xds/go v0.0.0-20210922020428-25de7278fc84/go.mod h1:eXthEFrGJvWH github.com/cncf/xds/go v0.0.0-20211001041855-01bcc9b48dfe/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= github.com/cncf/xds/go v0.0.0-20211011173535-cb28da3451f1/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= github.com/cncf/xds/go v0.0.0-20230607035331-e9ce68804cb4 h1:/inchEIKaYC1Akx+H+gqO04wryn5h75LSazbRlnya1k= +github.com/cncf/xds/go v0.0.0-20230607035331-e9ce68804cb4/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= github.com/cockroachdb/datadriven v0.0.0-20200714090401-bf6692d28da5/go.mod h1:h6jFvWxBdQXxjopDMZyH2UVceIRfR84bdzbkoKrsWNo= github.com/cockroachdb/datadriven v1.0.2 h1:H9MtNqVoVhvd9nCBwOyDjUEdZCREqbIdCJD93PBm/jA= github.com/cockroachdb/datadriven v1.0.2/go.mod h1:a9RdTaap04u637JoCzcUoIcDmvwSUtcUFtT/C3kJlTU= @@ -215,6 +218,7 @@ github.com/dgryski/go-sip13 v0.0.0-20181026042036-e10d5fee7954/go.mod h1:vAd38F8 github.com/dimfeld/httptreemux v5.0.1+incompatible h1:Qj3gVcDNoOthBAqftuD596rm4wg/adLLz5xh5CmpiCA= github.com/dimfeld/httptreemux v5.0.1+incompatible/go.mod h1:rbUlSV+CCpv/SuqUTP/8Bk2O3LyUV436/yaRGkhP6Z0= github.com/dnaeon/go-vcr v1.2.0 h1:zHCHvJYTMh1N7xnV7zf1m1GPBF9Ad0Jk/whtQ1663qI= +github.com/dnaeon/go-vcr v1.2.0/go.mod h1:R4UdLID7HZT3taECzJs4YgbbH6PIGXB6W/sc5OLb6RQ= github.com/docker/go-units v0.4.0 h1:3uh0PgVws3nIA0Q+MwDC8yjEPf9zjRfZZWXZYDct3Tw= github.com/docker/go-units v0.4.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk= github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk= @@ -235,6 +239,7 @@ github.com/envoyproxy/go-control-plane v0.9.10-0.20210907150352-cf90f659a021/go. github.com/envoyproxy/go-control-plane v0.10.2-0.20220325020618-49ff273808a1/go.mod h1:KJwIaB5Mv44NWtYuAOFCVOjcI94vtpEz2JU/D2v6IjE= github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= github.com/envoyproxy/protoc-gen-validate v0.10.1 h1:c0g45+xCJhdgFGw7a5QAfdS4byAbud7miNWJ1WwEVf8= +github.com/envoyproxy/protoc-gen-validate v0.10.1/go.mod h1:DRjgyB0I43LtJapqN6NiRwroiAU2PaFuvk/vjgh61ss= github.com/etcd-io/bbolt v1.3.3/go.mod h1:ZF2nL25h33cCyBtcyWeZ2/I3HQOfTP+0PIEvHjkjCrw= github.com/facebookgo/ensure v0.0.0-20200202191622-63f1cf65ac4c h1:8ISkoahWXwZR41ois5lSJBSVw4D0OV19Ht/JSTzvSv0= github.com/facebookgo/ensure v0.0.0-20200202191622-63f1cf65ac4c/go.mod h1:Yg+htXGokKKdzcwhuNDwVvN+uBxDGXJ7G/VN1d8fa64= @@ -245,6 +250,7 @@ github.com/facebookgo/subset v0.0.0-20200203212716-c811ad88dec4/go.mod h1:5tD+ne github.com/fasthttp-contrib/websocket v0.0.0-20160511215533-1f3b11f56072/go.mod h1:duJ4Jxv5lDcvg4QuQr0oowTf7dz4/CR8NtyCooz9HL8= github.com/fatih/color v1.7.0/go.mod h1:Zm6kSWBoL9eyXnKyktHP6abPY2pDugNf5KwzbycvMj4= github.com/fatih/color v1.10.0 h1:s36xzo75JdqLaaWoiEHk767eHiwo0598uUxyfiPkDsg= +github.com/fatih/color v1.10.0/go.mod h1:ELkj/draVOlAH/xkhN6mQ50Qd0MPOk5AAr3maGEBuJM= github.com/fatih/structs v1.1.0/go.mod h1:9NiDSp5zOcgEDl+j00MP/WkGVPOlPRLejGD8Ga6PJ7M= github.com/fogleman/gg v1.2.1-0.20190220221249-0403632d5b90/go.mod h1:R/bRT+9gY/C5z7JzPU0zXsXHKM4/ayA+zqcVNZzPa1k= github.com/fogleman/gg v1.3.0/go.mod h1:R/bRT+9gY/C5z7JzPU0zXsXHKM4/ayA+zqcVNZzPa1k= @@ -257,6 +263,7 @@ github.com/frankban/quicktest v1.7.2/go.mod h1:jaStnuzAqU1AJdCO0l53JDCJrVDKcS03D github.com/frankban/quicktest v1.10.0/go.mod h1:ui7WezCLWMWxVWr1GETZY3smRy0G4KWq9vcPtJmFl7Y= github.com/frankban/quicktest v1.14.0/go.mod h1:NeW+ay9A/U67EYXNFA1nPE8e/tnQv/09mUdL/ijj8og= github.com/frankban/quicktest v1.14.5 h1:dfYrrRyLtiqT9GyKXgdh+k4inNeTvmGbuSgZ3lx3GhA= +github.com/frankban/quicktest v1.14.5/go.mod h1:4ptaffx2x8+WTWXmUCuVU6aPUX1/Mz7zb5vbUoiM6w0= github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= github.com/fsnotify/fsnotify v1.4.9 h1:hsms1Qyu0jgnwNXIxa+/V/PDsU6CfLf6CNO8H7IWoS4= github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ= @@ -290,7 +297,6 @@ github.com/go-latex/latex v0.0.0-20210118124228-b3d85cf34e07/go.mod h1:CO1AlKB2C github.com/go-logfmt/logfmt v0.3.0/go.mod h1:Qt1PoO58o5twSAckw1HlFXLmHsOX5/0LbT9GBnD5lWE= github.com/go-logfmt/logfmt v0.4.0/go.mod h1:3RMwSq7FuexP4Kalkev3ejPJsZTpXXBr9+V4qmtdjCk= github.com/go-logfmt/logfmt v0.5.0/go.mod h1:wCYkCAKZfumFQihp8CzCvQ3paCTfi41vtzG1KdI/P7A= -github.com/go-logfmt/logfmt v0.5.1/go.mod h1:WYhtIu8zTZfxdn5+rREduYbwxfcBr/Vr6KEVveWlfTs= github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= github.com/go-logr/logr v1.3.0 h1:2y3SDp0ZXuc6/cjLSZ+Q3ir+QB9T/iG5yYRXqsagWSY= github.com/go-logr/logr v1.3.0/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= @@ -300,6 +306,7 @@ github.com/go-martini/martini v0.0.0-20170121215854-22fa46961aab/go.mod h1:/P9AE github.com/go-ole/go-ole v1.2.6 h1:/Fpf6oFPoeFik9ty7siob0G6Ke8QvQEuVcuChpwXzpY= github.com/go-ole/go-ole v1.2.6/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0= github.com/go-playground/assert/v2 v2.2.0 h1:JvknZsQTYeFEAhQwI4qEt9cyV5ONwRHC+lYKSsYSR8s= +github.com/go-playground/assert/v2 v2.2.0/go.mod h1:VDjEfimB/XKnb+ZQfWdccd7VUvScMdVu0Titje2rxJ4= github.com/go-playground/locales v0.14.1 h1:EWaQ/wswjilfKLTECiXz7Rh+3BjFhfDFKv/oXslEjJA= github.com/go-playground/locales v0.14.1/go.mod h1:hxrqLVvrK65+Rwrd5Fc6F2O76J/NuW9t0sjnWqG1slY= github.com/go-playground/universal-translator v0.18.1 h1:Bcnm0ZwsGyWbCzImXv+pAJnYK9S473LQFuzCbDbfSFY= @@ -338,6 +345,7 @@ github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0/go.mod h1:E/TSTwGw github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= github.com/golang/glog v1.0.0/go.mod h1:EWib/APOK0SL3dFbYqvxE3UYd8E6s1ouQ7iEp/0LWV4= github.com/golang/glog v1.1.0 h1:/d3pCKDPWNnvIWe0vVUpNP32qc8U3PDVxySP/y360qE= +github.com/golang/glog v1.1.0/go.mod h1:pfYeQZ3JWZoXTV5sFc986z3HTpwQs9At6P4ImfuP3NQ= github.com/golang/groupcache v0.0.0-20190129154638-5b532d6fd5ef/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= github.com/golang/groupcache v0.0.0-20190702054246-869f871628b6/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= github.com/golang/groupcache v0.0.0-20191227052852-215e87163ea7/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= @@ -401,6 +409,7 @@ github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeN github.com/google/go-querystring v1.0.0/go.mod h1:odCYkC5MyYFN7vkCjXpyrEuKhc/BUO6wN/zVPAxq5ck= github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0= +github.com/google/gofuzz v1.2.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= github.com/google/martian v2.1.0+incompatible/go.mod h1:9I4somxYTbIHy5NJKHRl3wXiIaQGbYVAs8BPL6v8lEs= github.com/google/martian/v3 v3.0.0/go.mod h1:y5Zk1BBys9G+gd6Jrk0W3cC1+ELVxBWuIGO+w/tUAp0= github.com/google/martian/v3 v3.1.0/go.mod h1:y5Zk1BBys9G+gd6Jrk0W3cC1+ELVxBWuIGO+w/tUAp0= @@ -517,7 +526,6 @@ github.com/kataras/iris/v12 v12.1.8/go.mod h1:LMYy4VlP67TQ3Zgriz8RE2h2kMZV2SgMYb github.com/kataras/neffos v0.0.14/go.mod h1:8lqADm8PnbeFfL7CLXh1WHw53dG27MC3pgi2R1rmoTE= github.com/kataras/pio v0.0.2/go.mod h1:hAoW0t9UmXi4R5Oyq5Z4irTbaTsOemSrDGUtaTl7Dro= github.com/kataras/sitemap v0.0.5/go.mod h1:KY2eugMKiPwsJgx7+U103YZehfvNGOXURubcGyk0Bz8= -github.com/keybase/go-keychain v0.0.0-20190712205309-48d3d31d256d/go.mod h1:JJNrCn9otv/2QP4D7SMJBgaleKpOf66PnW6F5WGNRIc= github.com/kisielk/errcheck v1.1.0/go.mod h1:EZBBE59ingxPouuu3KfxchcWSUPOHkagtvWXihfKN4Q= github.com/kisielk/errcheck v1.2.0/go.mod h1:/BMXB+zMLi60iA8Vv6Ksmxu/1UDYcXs4uQLJ+jE2L00= github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= @@ -548,7 +556,9 @@ github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= github.com/kris-nova/logger v0.0.0-20181127235838-fd0d87064b06 h1:vN4d3jSss3ExzUn2cE0WctxztfOgiKvMKnDrydBsg00= +github.com/kris-nova/logger v0.0.0-20181127235838-fd0d87064b06/go.mod h1:++9BgZujZd4v0ZTZCb5iPsaomXdZWyxotIAh1IiDm44= github.com/kris-nova/lolgopher v0.0.0-20180921204813-313b3abb0d9b h1:xYEM2oBUhBEhQjrV+KJ9lEWDWYZoNVZUaBF++Wyljq4= +github.com/kris-nova/lolgopher v0.0.0-20180921204813-313b3abb0d9b/go.mod h1:V0HF/ZBlN86HqewcDC/cVxMmYDiRukWjSrgKLUAn9Js= github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc= github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw= github.com/labstack/echo/v4 v4.5.0/go.mod h1:czIriw4a0C1dFun+ObrXp7ok03xON0N1awStJ6ArI7Y= @@ -556,6 +566,7 @@ github.com/labstack/gommon v0.3.0/go.mod h1:MULnywXg0yavhxWKc+lOruYdAhDwPK9wf0OL github.com/leodido/go-urn v1.2.4 h1:XlAE/cm/ms7TE/VMVoduSpNBoyc2dOxHs5MZSwAN63Q= github.com/leodido/go-urn v1.2.4/go.mod h1:7ZrI8mTSeBSHl/UaRyKQW1qZeMgak41ANeCNaVckg+4= github.com/lingdor/stackerror v0.0.0-20191119040541-976d8885ed76 h1:IVlcvV0CjvfBYYod5ePe89l+3LBAl//6n9kJ9Vr2i0k= +github.com/lingdor/stackerror v0.0.0-20191119040541-976d8885ed76/go.mod h1:Iu9BHUvTh8/KpbuSoKx/CaJEdJvFxSverxIy7I+nq7s= github.com/linkedin/goavro v2.1.0+incompatible/go.mod h1:bBCwI2eGYpUI/4820s67MElg9tdeLbINjLjiM2xZFYM= github.com/linkedin/goavro/v2 v2.9.8/go.mod h1:UgQUb2N/pmueQYH9bfqFioWxzYCZXSfF8Jw03O5sjqA= github.com/linkedin/goavro/v2 v2.10.0/go.mod h1:UgQUb2N/pmueQYH9bfqFioWxzYCZXSfF8Jw03O5sjqA= @@ -582,6 +593,7 @@ github.com/mattn/go-isatty v0.0.14/go.mod h1:7GGIvUiUoEMVVmxf/4nioHXj79iQHKdU27k github.com/mattn/go-isatty v0.0.19 h1:JITubQf0MOLdlGRuRq+jtsDlekdYPia9ZFsB8h/APPA= github.com/mattn/go-isatty v0.0.19/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= github.com/mattn/go-runewidth v0.0.8 h1:3tS41NlGYSmhhe/8fhGRzc+z3AYCw1Fe1WAyLuujKs0= +github.com/mattn/go-runewidth v0.0.8/go.mod h1:H031xJmbD/WCDINGzjvQ9THkh0rPKHF+m2gUSrubnMI= github.com/mattn/goveralls v0.0.2/go.mod h1:8d1ZMHsd7fW6IRPKQh46F2WRpyib5/X4FOpevwGNQEw= github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0= github.com/matttproud/golang_protobuf_extensions v1.0.4 h1:mmDVorXM7PCGKw94cs5zkfA9PSy5pEvNWRP0ET0TIVo= @@ -653,6 +665,7 @@ github.com/nxadm/tail v1.4.8 h1:nPr65rt6Y5JFSKQO7qToXr7pePgD6Gwiw05lkbyAQTE= github.com/nxadm/tail v1.4.8/go.mod h1:+ncqLTQzXmGhMZNUePPaPqPvBxHAIsmXswZKocGu+AU= github.com/oklog/ulid v1.3.1/go.mod h1:CirwcVhetQ6Lv90oh/F+FBtV6XMibvdAFo93nm5qn4U= github.com/olekukonko/tablewriter v0.0.1 h1:b3iUnf1v+ppJiOfNX4yxxqfWKMQPZR5yoh8urCTFX88= +github.com/olekukonko/tablewriter v0.0.1/go.mod h1:vsDQFd/mU46D+Z4whnwzcISnGGzXWMclvtLoiIKAKIo= github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= github.com/onsi/ginkgo v1.10.3/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= github.com/onsi/ginkgo v1.12.1/go.mod h1:zj2OWP4+oCPe1qIXoGWkgMRwljMUYCdkwsT2108oapk= @@ -715,6 +728,7 @@ github.com/posener/complete v1.1.1/go.mod h1:em0nMJCgc9GFtwrmVmEMR/ZL6WyhyjMBndr github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c h1:ncq/mPwQF4JjgDlrVEn3C11VoGHZN7m8qihwgMEtzYw= github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c/go.mod h1:OmDBASR4679mdNQnz2pUhc2G8CO2JrUAVFDRBDP/hJE= github.com/prashantv/gostub v1.1.0 h1:BTyx3RfQjRHnUWaGF9oQos79AlQ5k8WNktv7VGvVH4g= +github.com/prashantv/gostub v1.1.0/go.mod h1:A5zLQHz7ieHGG7is6LLXLz7I8+3LZzsrV0P1IAHhP5U= github.com/prometheus/client_golang v0.9.1/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw= github.com/prometheus/client_golang v0.9.3/go.mod h1:/TN21ttK/J9q6uSwhBd54HahCDft0ttaMvbicHlPoso= github.com/prometheus/client_golang v1.0.0/go.mod h1:db9x61etRT2tGnBNRi70OPL5FsnadC4Ky3P0J6CfImo= @@ -842,6 +856,7 @@ github.com/subosito/gotenv v1.2.0/go.mod h1:N0PQaV/YGNqwC0u51sEeR/aUtSLEXKX9iv69 github.com/tencentcloud/tencentcloud-sdk-go/tencentcloud/common v1.0.865 h1:LcUqBlKC4j15LhT303yQDX/XxyHG4haEQqbHgZZA4SY= github.com/tencentcloud/tencentcloud-sdk-go/tencentcloud/common v1.0.865/go.mod h1:r5r4xbfxSaeR04b166HGsBa/R4U3SueirEUpXGuw+Q0= github.com/thoas/go-funk v0.9.1 h1:O549iLZqPpTUQ10ykd26sZhzD+rmR5pWhuElrhbC20M= +github.com/thoas/go-funk v0.9.1/go.mod h1:+IWnUfUmFO1+WVYQWQtIJHeRRdaIyyYglZN7xzUPe4Q= github.com/tiancaiamao/gp v0.0.0-20221230034425-4025bc8a4d4a h1:J/YdBZ46WKpXsxsW93SG+q0F8KI+yFrcIDT4c/RNoc4= github.com/tiancaiamao/gp v0.0.0-20221230034425-4025bc8a4d4a/go.mod h1:h4xBhSNtOeEosLJ4P7JyKXX7Cabg7AVkWCK5gV2vOrM= github.com/tidwall/gjson v1.14.4 h1:uo0p8EbA09J7RQaflQ1aBRffTR7xedD2bcIVSYxLnkM= @@ -879,6 +894,8 @@ github.com/valyala/fasthttp v1.6.0/go.mod h1:FstJa9V+Pj9vQ7OJie2qMHdwemEDaDiSdBn github.com/valyala/fasttemplate v1.0.1/go.mod h1:UQGH1tvbgY+Nz5t2n7tXsz52dQxojPUpymEIMZ47gx8= github.com/valyala/fasttemplate v1.2.1/go.mod h1:KHLXt3tVN2HBp8eijSv/kGJopbvo7S+qRAEEKiv+SiQ= github.com/valyala/tcplisten v0.0.0-20161114210144-ceec8f93295a/go.mod h1:v3UYOV9WzVtRmSR+PDvWpU/qWl4Wa5LApYYX4ZtKbio= +github.com/weiliu1031/blobloom v0.0.0-20240530105622-1e0e104a7160 h1:x7cclCOEtr9zSzSZhwB7mhz/tFNHsILh6XewGTmJKk0= +github.com/weiliu1031/blobloom v0.0.0-20240530105622-1e0e104a7160/go.mod h1:mjMJ1hh1wjGVfr93QIHJ6FfDNVrA0IELv8OvMHJxHKs= github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM= github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg= github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f/go.mod h1:N2zxlSyiKSe5eX1tZViRH5QA0qijqEDrYZiPEAiq3wU= @@ -901,6 +918,7 @@ github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1 github.com/yusufpapurcu/wmi v1.2.2 h1:KBNDSne4vP5mbSWnJbO+51IMOXJB67QiYCSBrubbPRg= github.com/yusufpapurcu/wmi v1.2.2/go.mod h1:SBZ9tNy3G9/m5Oi98Zks0QjeHVDvuK0qfxQmPyzfmi0= github.com/zeebo/assert v1.3.0 h1:g7C04CbJuIDKNPFHmsk4hwZDO5O+kntRxzaUoNXj+IQ= +github.com/zeebo/assert v1.3.0/go.mod h1:Pq9JiuJQpG8JLJdtkwrJESF0Foym2/D9XMU5ciN/wJ0= github.com/zeebo/xxh3 v1.0.2 h1:xZmwmqxHZA8AI603jOQ0tMqmBr9lPeFwGg6d+xy9DC0= github.com/zeebo/xxh3 v1.0.2/go.mod h1:5NWz9Sef7zIDm2JHfFlcQvNekmcEl9ekUZQQKCYaDcA= go.etcd.io/bbolt v1.3.2/go.mod h1:IbVyRI1SCnLcuJnV2u8VeU0CEYM7e686BmAb1XKL+uU= @@ -971,6 +989,7 @@ go.uber.org/automaxprocs v1.5.3/go.mod h1:eRbA25aqJrxAbsLO0xy5jVwPt7FQnRgjW+efnw go.uber.org/goleak v1.1.10/go.mod h1:8a7PlsEVH3e/a/GLqe5IIrQx6GzcnRmZEufDUTk4A7A= go.uber.org/goleak v1.1.11/go.mod h1:cwTWslyiVhfpKIDGSZEM2HlOvcqm+tG4zioyIeLoqMQ= go.uber.org/goleak v1.2.1 h1:NBol2c7O1ZokfZ0LEU9K6Whx/KnwvepVetCUhtKja4A= +go.uber.org/goleak v1.2.1/go.mod h1:qlT2yGI9QafXHhZZLxlSuNsMw3FFLxBr+tBRlmO1xH4= go.uber.org/multierr v1.1.0/go.mod h1:wR5kodmAFQ0UK8QlbwjlSNy0Z68gJhDJUG5sjR94q/0= go.uber.org/multierr v1.6.0/go.mod h1:cdWPpRnG4AhwMwsgIHip0KRBQjJy5kYEpYjJxpXp9iU= go.uber.org/multierr v1.7.0/go.mod h1:7EAYxJLBy9rStEaz58O2t4Uvip6FSURkq8/ppBp95ak= @@ -1458,6 +1477,7 @@ gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8 gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20200902074654-038fdea0a05b/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= gopkg.in/errgo.v1 v1.0.0/go.mod h1:CxwszS/Xz1C49Ucd2i6Zil5UToP1EmyrFhKaMVbg1mk= gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI= gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys= @@ -1514,3 +1534,4 @@ sigs.k8s.io/yaml v1.2.0/go.mod h1:yfXDCHCao9+ENCvLSE62v9VSji2MKu5jeNfTrofGhJc= sigs.k8s.io/yaml v1.3.0 h1:a2VclLzOGrwOHDiV8EfBGhvjHvP46CtW5j6POvhYGGo= sigs.k8s.io/yaml v1.3.0/go.mod h1:GeOyir5tyXNByN85N/dRIT9es5UQNerPYEKK56eTBm8= stathat.com/c/consistent v1.0.0 h1:ezyc51EGcRPJUxfHGSgJjWzJdj3NiMU9pNfLNGiXV0c= +stathat.com/c/consistent v1.0.0/go.mod h1:QkzMWzcbB+yQBL2AttO6sgsQS/JSTapcDISJalmCDS0= diff --git a/internal/datanode/metacache/bloom_filter_set.go b/internal/datanode/metacache/bloom_filter_set.go index 7785e6875419e..80b7bc057849c 100644 --- a/internal/datanode/metacache/bloom_filter_set.go +++ b/internal/datanode/metacache/bloom_filter_set.go @@ -19,10 +19,10 @@ package metacache import ( "sync" - "github.com/bits-and-blooms/bloom/v3" "github.com/samber/lo" "github.com/milvus-io/milvus/internal/storage" + "github.com/milvus-io/milvus/internal/util/bloomfilter" "github.com/milvus-io/milvus/pkg/util/paramtable" ) @@ -76,8 +76,9 @@ func (bfs *BloomFilterSet) UpdatePKRange(ids storage.FieldData) error { if bfs.current == nil { bfs.current = &storage.PkStatistics{ - PkFilter: bloom.NewWithEstimates(bfs.batchSize, - paramtable.Get().CommonCfg.MaxBloomFalsePositive.GetAsFloat()), + PkFilter: bloomfilter.NewBloomFilterWithType(bfs.batchSize, + paramtable.Get().CommonCfg.MaxBloomFalsePositive.GetAsFloat(), + paramtable.Get().CommonCfg.BloomFilterType.GetValue()), } } diff --git a/internal/datanode/syncmgr/storage_serializer.go b/internal/datanode/syncmgr/storage_serializer.go index 784f349940858..35c0789adf8e5 100644 --- a/internal/datanode/syncmgr/storage_serializer.go +++ b/internal/datanode/syncmgr/storage_serializer.go @@ -205,6 +205,7 @@ func (s *storageV1Serializer) serializeMergedPkStats(pack *SyncPack) (*storage.B FieldID: s.pkField.GetFieldID(), MaxPk: pks.MaxPK, MinPk: pks.MinPK, + BFType: pks.PkFilter.Type(), BF: pks.PkFilter, PkType: int64(s.pkField.GetDataType()), } diff --git a/internal/datanode/writebuffer/write_buffer.go b/internal/datanode/writebuffer/write_buffer.go index 675f48fe92f06..412ff5d95c9d8 100644 --- a/internal/datanode/writebuffer/write_buffer.go +++ b/internal/datanode/writebuffer/write_buffer.go @@ -5,7 +5,6 @@ import ( "fmt" "sync" - "github.com/bits-and-blooms/bloom/v3" "github.com/cockroachdb/errors" "github.com/samber/lo" "go.uber.org/atomic" @@ -20,6 +19,7 @@ import ( "github.com/milvus-io/milvus/internal/proto/datapb" "github.com/milvus-io/milvus/internal/querycoordv2/params" "github.com/milvus-io/milvus/internal/storage" + "github.com/milvus-io/milvus/internal/util/bloomfilter" "github.com/milvus-io/milvus/pkg/log" "github.com/milvus-io/milvus/pkg/metrics" "github.com/milvus-io/milvus/pkg/mq/msgstream" @@ -383,7 +383,10 @@ type inData struct { func (id *inData) generatePkStats() { id.batchBF = &storage.PkStatistics{ - PkFilter: bloom.NewWithEstimates(uint(id.rowNum), paramtable.Get().CommonCfg.MaxBloomFalsePositive.GetAsFloat()), + PkFilter: bloomfilter.NewBloomFilterWithType( + uint(id.rowNum), + paramtable.Get().CommonCfg.MaxBloomFalsePositive.GetAsFloat(), + paramtable.Get().CommonCfg.BloomFilterType.GetValue()), } for _, ids := range id.pkField { diff --git a/internal/querynodev2/delegator/delegator_data.go b/internal/querynodev2/delegator/delegator_data.go index 3990e63ba14b1..02dce93ecb42a 100644 --- a/internal/querynodev2/delegator/delegator_data.go +++ b/internal/querynodev2/delegator/delegator_data.go @@ -526,7 +526,8 @@ func (sd *shardDelegator) GetLevel0Deletions(partitionID int64, candidate pkorac if segment.Partition() == partitionID || segment.Partition() == common.AllPartitionsID { segmentPks, segmentTss := segment.DeleteRecords() for i, pk := range segmentPks { - if candidate.MayPkExist(pk) { + lc := storage.NewLocationsCache(pk) + if candidate.MayPkExist(lc) { pks = append(pks, pk) tss = append(tss, segmentTss[i]) } @@ -637,7 +638,8 @@ func (sd *shardDelegator) loadStreamDelete(ctx context.Context, continue } for i, pk := range record.DeleteData.Pks { - if candidate.MayPkExist(pk) { + lc := storage.NewLocationsCache(pk) + if candidate.MayPkExist(lc) { deleteData.Append(pk, record.DeleteData.Tss[i]) } } @@ -733,7 +735,8 @@ func (sd *shardDelegator) readDeleteFromMsgstream(ctx context.Context, position } for idx, pk := range storage.ParseIDs2PrimaryKeys(dmsg.GetPrimaryKeys()) { - if candidate.MayPkExist(pk) { + lc := storage.NewLocationsCache(pk) + if candidate.MayPkExist(lc) { result.Pks = append(result.Pks, pk) result.Tss = append(result.Tss, dmsg.Timestamps[idx]) } diff --git a/internal/querynodev2/delegator/delegator_data_test.go b/internal/querynodev2/delegator/delegator_data_test.go index 50665425aa8af..1a17f41812e9b 100644 --- a/internal/querynodev2/delegator/delegator_data_test.go +++ b/internal/querynodev2/delegator/delegator_data_test.go @@ -24,7 +24,6 @@ import ( "testing" "time" - bloom "github.com/bits-and-blooms/bloom/v3" "github.com/cockroachdb/errors" "github.com/samber/lo" "github.com/stretchr/testify/mock" @@ -41,6 +40,7 @@ import ( "github.com/milvus-io/milvus/internal/querynodev2/segments" "github.com/milvus-io/milvus/internal/querynodev2/tsafe" "github.com/milvus-io/milvus/internal/storage" + "github.com/milvus-io/milvus/internal/util/bloomfilter" "github.com/milvus-io/milvus/pkg/common" "github.com/milvus-io/milvus/pkg/mq/msgstream" "github.com/milvus-io/milvus/pkg/util/commonpbutil" @@ -258,12 +258,8 @@ func (s *DelegatorDataSuite) TestProcessDelete() { ms.EXPECT().Indexes().Return(nil) ms.EXPECT().RowNum().Return(info.GetNumOfRows()) ms.EXPECT().Delete(mock.Anything, mock.Anything, mock.Anything).Return(nil) - ms.EXPECT().MayPkExist(mock.Anything).Call.Return(func(pk storage.PrimaryKey) bool { - return pk.EQ(storage.NewInt64PrimaryKey(10)) - }) - ms.EXPECT().GetHashFuncNum().Return(1) - ms.EXPECT().TestLocations(mock.Anything, mock.Anything).RunAndReturn(func(pk storage.PrimaryKey, locs []uint64) bool { - return pk.EQ(storage.NewInt64PrimaryKey(10)) + ms.EXPECT().MayPkExist(mock.Anything).RunAndReturn(func(lc *storage.LocationsCache) bool { + return lc.GetPk().EQ(storage.NewInt64PrimaryKey(10)) }) return ms }) @@ -272,8 +268,9 @@ func (s *DelegatorDataSuite) TestProcessDelete() { Call.Return(func(ctx context.Context, collectionID int64, version int64, infos ...*querypb.SegmentLoadInfo) []*pkoracle.BloomFilterSet { return lo.Map(infos, func(info *querypb.SegmentLoadInfo, _ int) *pkoracle.BloomFilterSet { bfs := pkoracle.NewBloomFilterSet(info.GetSegmentID(), info.GetPartitionID(), commonpb.SegmentState_Sealed) - bf := bloom.NewWithEstimates(paramtable.Get().CommonCfg.BloomFilterSize.GetAsUint(), - paramtable.Get().CommonCfg.MaxBloomFalsePositive.GetAsFloat()) + bf := bloomfilter.NewBloomFilterWithType(paramtable.Get().CommonCfg.BloomFilterSize.GetAsUint(), + paramtable.Get().CommonCfg.MaxBloomFalsePositive.GetAsFloat(), + paramtable.Get().CommonCfg.BloomFilterType.GetValue()) pks := &storage.PkStatistics{ PkFilter: bf, } @@ -528,8 +525,10 @@ func (s *DelegatorDataSuite) TestLoadSegments() { Call.Return(func(ctx context.Context, collectionID int64, version int64, infos ...*querypb.SegmentLoadInfo) []*pkoracle.BloomFilterSet { return lo.Map(infos, func(info *querypb.SegmentLoadInfo, _ int) *pkoracle.BloomFilterSet { bfs := pkoracle.NewBloomFilterSet(info.GetSegmentID(), info.GetPartitionID(), commonpb.SegmentState_Sealed) - bf := bloom.NewWithEstimates(paramtable.Get().CommonCfg.BloomFilterSize.GetAsUint(), - paramtable.Get().CommonCfg.MaxBloomFalsePositive.GetAsFloat()) + bf := bloomfilter.NewBloomFilterWithType( + paramtable.Get().CommonCfg.BloomFilterSize.GetAsUint(), + paramtable.Get().CommonCfg.MaxBloomFalsePositive.GetAsFloat(), + paramtable.Get().CommonCfg.BloomFilterType.GetValue()) pks := &storage.PkStatistics{ PkFilter: bf, } @@ -686,8 +685,10 @@ func (s *DelegatorDataSuite) TestLoadSegments() { Call.Return(func(ctx context.Context, collectionID int64, version int64, infos ...*querypb.SegmentLoadInfo) []*pkoracle.BloomFilterSet { return lo.Map(infos, func(info *querypb.SegmentLoadInfo, _ int) *pkoracle.BloomFilterSet { bfs := pkoracle.NewBloomFilterSet(info.GetSegmentID(), info.GetPartitionID(), commonpb.SegmentState_Sealed) - bf := bloom.NewWithEstimates(paramtable.Get().CommonCfg.BloomFilterSize.GetAsUint(), - paramtable.Get().CommonCfg.MaxBloomFalsePositive.GetAsFloat()) + bf := bloomfilter.NewBloomFilterWithType( + paramtable.Get().CommonCfg.BloomFilterSize.GetAsUint(), + paramtable.Get().CommonCfg.MaxBloomFalsePositive.GetAsFloat(), + paramtable.Get().CommonCfg.BloomFilterType.GetValue()) pks := &storage.PkStatistics{ PkFilter: bf, } @@ -880,10 +881,6 @@ func (s *DelegatorDataSuite) TestReleaseSegment() { ms.EXPECT().MayPkExist(mock.Anything).Call.Return(func(pk storage.PrimaryKey) bool { return pk.EQ(storage.NewInt64PrimaryKey(10)) }) - ms.EXPECT().GetHashFuncNum().Return(1) - ms.EXPECT().TestLocations(mock.Anything, mock.Anything).RunAndReturn(func(pk storage.PrimaryKey, locs []uint64) bool { - return pk.EQ(storage.NewInt64PrimaryKey(10)) - }) return ms }) }, nil) @@ -891,8 +888,10 @@ func (s *DelegatorDataSuite) TestReleaseSegment() { Call.Return(func(ctx context.Context, collectionID int64, version int64, infos ...*querypb.SegmentLoadInfo) []*pkoracle.BloomFilterSet { return lo.Map(infos, func(info *querypb.SegmentLoadInfo, _ int) *pkoracle.BloomFilterSet { bfs := pkoracle.NewBloomFilterSet(info.GetSegmentID(), info.GetPartitionID(), commonpb.SegmentState_Sealed) - bf := bloom.NewWithEstimates(paramtable.Get().CommonCfg.BloomFilterSize.GetAsUint(), - paramtable.Get().CommonCfg.MaxBloomFalsePositive.GetAsFloat()) + bf := bloomfilter.NewBloomFilterWithType( + paramtable.Get().CommonCfg.BloomFilterSize.GetAsUint(), + paramtable.Get().CommonCfg.MaxBloomFalsePositive.GetAsFloat(), + paramtable.Get().CommonCfg.BloomFilterType.GetValue()) pks := &storage.PkStatistics{ PkFilter: bf, } diff --git a/internal/querynodev2/delegator/delegator_test.go b/internal/querynodev2/delegator/delegator_test.go index 4d51b1145df39..2dcd9ac5e01ec 100644 --- a/internal/querynodev2/delegator/delegator_test.go +++ b/internal/querynodev2/delegator/delegator_test.go @@ -99,10 +99,6 @@ func (s *DelegatorSuite) SetupTest() { ms.EXPECT().Indexes().Return(nil) ms.EXPECT().RowNum().Return(info.GetNumOfRows()) ms.EXPECT().Delete(mock.Anything, mock.Anything, mock.Anything).Return(nil) - ms.EXPECT().GetHashFuncNum().Return(1) - ms.EXPECT().TestLocations(mock.Anything, mock.Anything).RunAndReturn(func(pk storage.PrimaryKey, locs []uint64) bool { - return pk.EQ(storage.NewInt64PrimaryKey(10)) - }) return ms }) }, nil) diff --git a/internal/querynodev2/pkoracle/bloom_filter_set.go b/internal/querynodev2/pkoracle/bloom_filter_set.go index 608bb656efe23..88f5602ebfe98 100644 --- a/internal/querynodev2/pkoracle/bloom_filter_set.go +++ b/internal/querynodev2/pkoracle/bloom_filter_set.go @@ -17,15 +17,14 @@ package pkoracle import ( - "context" "sync" - bloom "github.com/bits-and-blooms/bloom/v3" "go.uber.org/zap" "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" "github.com/milvus-io/milvus/internal/storage" + "github.com/milvus-io/milvus/internal/util/bloomfilter" "github.com/milvus-io/milvus/pkg/common" "github.com/milvus-io/milvus/pkg/log" "github.com/milvus-io/milvus/pkg/util/paramtable" @@ -41,68 +40,25 @@ type BloomFilterSet struct { segType commonpb.SegmentState currentStat *storage.PkStatistics historyStats []*storage.PkStatistics - - kHashFunc uint } // MayPkExist returns whether any bloom filters returns positive. -func (s *BloomFilterSet) MayPkExist(pk storage.PrimaryKey) bool { +func (s *BloomFilterSet) MayPkExist(lc *storage.LocationsCache) bool { s.statsMutex.RLock() defer s.statsMutex.RUnlock() - if s.currentStat != nil && s.currentStat.PkExist(pk) { + if s.currentStat != nil && s.currentStat.TestLocationCache(lc) { return true } // for sealed, if one of the stats shows it exist, then we have to check it for _, historyStat := range s.historyStats { - if historyStat.PkExist(pk) { + if historyStat.TestLocationCache(lc) { return true } } return false } -func (s *BloomFilterSet) TestLocations(pk storage.PrimaryKey, locs []uint64) bool { - log := log.Ctx(context.TODO()).WithRateGroup("BloomFilterSet.TestLocations", 1, 60) - s.statsMutex.RLock() - defer s.statsMutex.RUnlock() - - if s.currentStat != nil { - k := s.currentStat.PkFilter.K() - if k > uint(len(locs)) { - log.RatedWarn(30, "locations num is less than hash func num, return false positive result", - zap.Int("locationNum", len(locs)), - zap.Uint("hashFuncNum", k), - zap.Int64("segmentID", s.segmentID)) - return true - } - - if s.currentStat.TestLocations(pk, locs[:k]) { - return true - } - } - - // for sealed, if one of the stats shows it exist, then we have to check it - for _, historyStat := range s.historyStats { - k := historyStat.PkFilter.K() - if k > uint(len(locs)) { - log.RatedWarn(30, "locations num is less than hash func num, return false positive result", - zap.Int("locationNum", len(locs)), - zap.Uint("hashFuncNum", k), - zap.Int64("segmentID", s.segmentID)) - return true - } - if historyStat.TestLocations(pk, locs[:k]) { - return true - } - } - return false -} - -func (s *BloomFilterSet) GetHashFuncNum() uint { - return s.kHashFunc -} - // ID implement candidate. func (s *BloomFilterSet) ID() int64 { return s.segmentID @@ -124,13 +80,12 @@ func (s *BloomFilterSet) UpdateBloomFilter(pks []storage.PrimaryKey) { defer s.statsMutex.Unlock() if s.currentStat == nil { - m, k := bloom.EstimateParameters(paramtable.Get().CommonCfg.BloomFilterSize.GetAsUint(), - paramtable.Get().CommonCfg.MaxBloomFalsePositive.GetAsFloat()) - if k > s.kHashFunc { - s.kHashFunc = k - } + bf := bloomfilter.NewBloomFilterWithType( + paramtable.Get().CommonCfg.BloomFilterSize.GetAsUint(), + paramtable.Get().CommonCfg.MaxBloomFalsePositive.GetAsFloat(), + paramtable.Get().CommonCfg.BloomFilterType.GetValue()) s.currentStat = &storage.PkStatistics{ - PkFilter: bloom.New(m, k), + PkFilter: bf, } } @@ -157,9 +112,6 @@ func (s *BloomFilterSet) AddHistoricalStats(stats *storage.PkStatistics) { s.statsMutex.Lock() defer s.statsMutex.Unlock() - if stats.PkFilter.K() > s.kHashFunc { - s.kHashFunc = stats.PkFilter.K() - } s.historyStats = append(s.historyStats, stats) } diff --git a/internal/querynodev2/pkoracle/bloom_filter_set_test.go b/internal/querynodev2/pkoracle/bloom_filter_set_test.go index 0384d3faa7ad7..9aaa8f0a08b1c 100644 --- a/internal/querynodev2/pkoracle/bloom_filter_set_test.go +++ b/internal/querynodev2/pkoracle/bloom_filter_set_test.go @@ -41,10 +41,9 @@ func TestInt64Pk(t *testing.T) { bfs.UpdateBloomFilter(pks) for i := 0; i < batchSize; i++ { - locations := storage.Locations(pks[i], bfs.GetHashFuncNum()) - ret1 := bfs.TestLocations(pks[i], locations) - ret2 := bfs.MayPkExist(pks[i]) - assert.Equal(t, ret1, ret2) + lc := storage.NewLocationsCache(pks[i]) + ret := bfs.MayPkExist(lc) + assert.True(t, ret) } assert.Equal(t, int64(1), bfs.ID()) @@ -66,10 +65,9 @@ func TestVarCharPk(t *testing.T) { bfs.UpdateBloomFilter(pks) for i := 0; i < batchSize; i++ { - locations := storage.Locations(pks[i], bfs.GetHashFuncNum()) - ret1 := bfs.TestLocations(pks[i], locations) - ret2 := bfs.MayPkExist(pks[i]) - assert.Equal(t, ret1, ret2) + lc := storage.NewLocationsCache(pks[i]) + ret := bfs.MayPkExist(lc) + assert.True(t, ret) } } @@ -91,29 +89,8 @@ func TestHistoricalStat(t *testing.T) { bfs.currentStat = nil for i := 0; i < batchSize; i++ { - locations := storage.Locations(pks[i], bfs.GetHashFuncNum()) - ret1 := bfs.TestLocations(pks[i], locations) - ret2 := bfs.MayPkExist(pks[i]) - assert.Equal(t, ret1, ret2) - } -} - -func TestHashFuncNum(t *testing.T) { - paramtable.Init() - batchSize := 100 - pks := make([]storage.PrimaryKey, 0) - for i := 0; i < batchSize; i++ { - pk := storage.NewVarCharPrimaryKey(strconv.FormatInt(int64(i), 10)) - pks = append(pks, pk) - } - - bfs := NewBloomFilterSet(1, 1, commonpb.SegmentState_Sealed) - bfs.UpdateBloomFilter(pks) - - for i := 0; i < batchSize; i++ { - // pass locations more then hash func num in bf - locations := storage.Locations(pks[i], bfs.GetHashFuncNum()+3) - ret1 := bfs.TestLocations(pks[i], locations) - assert.True(t, ret1) + lc := storage.NewLocationsCache(pks[i]) + ret := bfs.MayPkExist(lc) + assert.True(t, ret) } } diff --git a/internal/querynodev2/pkoracle/candidate.go b/internal/querynodev2/pkoracle/candidate.go index e5f051e5f1939..c115a5a0c133e 100644 --- a/internal/querynodev2/pkoracle/candidate.go +++ b/internal/querynodev2/pkoracle/candidate.go @@ -26,9 +26,7 @@ import ( // Candidate is the interface for pk oracle candidate. type Candidate interface { // MayPkExist checks whether primary key could exists in this candidate. - MayPkExist(pk storage.PrimaryKey) bool - TestLocations(pk storage.PrimaryKey, locs []uint64) bool - GetHashFuncNum() uint + MayPkExist(lc *storage.LocationsCache) bool ID() int64 Partition() int64 diff --git a/internal/querynodev2/pkoracle/key.go b/internal/querynodev2/pkoracle/key.go index 9845b5e065343..6600398798670 100644 --- a/internal/querynodev2/pkoracle/key.go +++ b/internal/querynodev2/pkoracle/key.go @@ -28,20 +28,11 @@ type candidateKey struct { } // MayPkExist checks whether primary key could exists in this candidate. -func (k candidateKey) MayPkExist(pk storage.PrimaryKey) bool { +func (k candidateKey) MayPkExist(lc *storage.LocationsCache) bool { // always return true to prevent miuse return true } -func (k candidateKey) TestLocations(pk storage.PrimaryKey, locs []uint64) bool { - // always return true to prevent miuse - return true -} - -func (k candidateKey) GetHashFuncNum() uint { - return 0 -} - // ID implements Candidate. func (k candidateKey) ID() int64 { return k.segmentID diff --git a/internal/querynodev2/pkoracle/pk_oracle.go b/internal/querynodev2/pkoracle/pk_oracle.go index 4d686503ec952..a700fe3066e05 100644 --- a/internal/querynodev2/pkoracle/pk_oracle.go +++ b/internal/querynodev2/pkoracle/pk_oracle.go @@ -19,10 +19,8 @@ package pkoracle import ( "fmt" - "sync" "github.com/milvus-io/milvus/internal/storage" - "github.com/milvus-io/milvus/pkg/log" "github.com/milvus-io/milvus/pkg/util/typeutil" ) @@ -43,30 +41,11 @@ var _ PkOracle = (*pkOracle)(nil) // pkOracle implementation. type pkOracle struct { candidates *typeutil.ConcurrentMap[string, candidateWithWorker] - - hashFuncNumMutex sync.RWMutex - maxHashFuncNum uint -} - -func (pko *pkOracle) GetMaxHashFuncNum() uint { - pko.hashFuncNumMutex.RLock() - defer pko.hashFuncNumMutex.RUnlock() - return pko.maxHashFuncNum -} - -func (pko *pkOracle) TryUpdateHashFuncNum(newValue uint) { - pko.hashFuncNumMutex.Lock() - defer pko.hashFuncNumMutex.Unlock() - if newValue > pko.maxHashFuncNum { - pko.maxHashFuncNum = newValue - } } // Get implements PkOracle. func (pko *pkOracle) Get(pk storage.PrimaryKey, filters ...CandidateFilter) ([]int64, error) { var result []int64 - var locations []uint64 - pko.candidates.Range(func(key string, candidate candidateWithWorker) bool { for _, filter := range filters { if !filter(candidate) { @@ -74,15 +53,8 @@ func (pko *pkOracle) Get(pk storage.PrimaryKey, filters ...CandidateFilter) ([]i } } - if locations == nil { - locations = storage.Locations(pk, pko.GetMaxHashFuncNum()) - if len(locations) == 0 { - log.Warn("pkOracle: no location found for pk") - return true - } - } - - if candidate.TestLocations(pk, locations) { + lc := storage.NewLocationsCache(pk) + if candidate.MayPkExist(lc) { result = append(result, candidate.ID()) } return true @@ -97,7 +69,6 @@ func (pko *pkOracle) candidateKey(candidate Candidate, workerID int64) string { // Register register candidate func (pko *pkOracle) Register(candidate Candidate, workerID int64) error { - pko.TryUpdateHashFuncNum(candidate.GetHashFuncNum()) pko.candidates.Insert(pko.candidateKey(candidate, workerID), candidateWithWorker{ Candidate: candidate, workerID: workerID, @@ -108,7 +79,6 @@ func (pko *pkOracle) Register(candidate Candidate, workerID int64) error { // Remove removes candidate from pko. func (pko *pkOracle) Remove(filters ...CandidateFilter) error { - max := uint(0) pko.candidates.Range(func(key string, candidate candidateWithWorker) bool { for _, filter := range filters { if !filter(candidate) { @@ -116,14 +86,9 @@ func (pko *pkOracle) Remove(filters ...CandidateFilter) error { } } pko.candidates.GetAndRemove(pko.candidateKey(candidate, candidate.workerID)) - if candidate.GetHashFuncNum() > max { - max = candidate.GetHashFuncNum() - } - return true }) - pko.TryUpdateHashFuncNum(max) return nil } diff --git a/internal/querynodev2/segments/bloom_filter_set.go b/internal/querynodev2/segments/bloom_filter_set.go deleted file mode 100644 index b07713961cf1b..0000000000000 --- a/internal/querynodev2/segments/bloom_filter_set.go +++ /dev/null @@ -1,101 +0,0 @@ -// Licensed to the LF AI & Data foundation under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package segments - -import ( - "sync" - - bloom "github.com/bits-and-blooms/bloom/v3" - "go.uber.org/zap" - - "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" - storage "github.com/milvus-io/milvus/internal/storage" - "github.com/milvus-io/milvus/pkg/common" - "github.com/milvus-io/milvus/pkg/log" - "github.com/milvus-io/milvus/pkg/util/paramtable" -) - -type bloomFilterSet struct { - statsMutex sync.RWMutex - currentStat *storage.PkStatistics - historyStats []*storage.PkStatistics -} - -func newBloomFilterSet() *bloomFilterSet { - return &bloomFilterSet{} -} - -// MayPkExist returns whether any bloom filters returns positive. -func (s *bloomFilterSet) MayPkExist(pk storage.PrimaryKey) bool { - s.statsMutex.RLock() - defer s.statsMutex.RUnlock() - if s.currentStat != nil && s.currentStat.PkExist(pk) { - return true - } - - // for sealed, if one of the stats shows it exist, then we have to check it - for _, historyStat := range s.historyStats { - if historyStat.PkExist(pk) { - return true - } - } - return false -} - -// UpdateBloomFilter updates currentStats with provided pks. -func (s *bloomFilterSet) UpdateBloomFilter(pks []storage.PrimaryKey) { - s.statsMutex.Lock() - defer s.statsMutex.Unlock() - - if s.currentStat == nil { - s.initCurrentStat() - } - - buf := make([]byte, 8) - for _, pk := range pks { - s.currentStat.UpdateMinMax(pk) - switch pk.Type() { - case schemapb.DataType_Int64: - int64Value := pk.(*storage.Int64PrimaryKey).Value - common.Endian.PutUint64(buf, uint64(int64Value)) - s.currentStat.PkFilter.Add(buf) - case schemapb.DataType_VarChar: - stringValue := pk.(*storage.VarCharPrimaryKey).Value - s.currentStat.PkFilter.AddString(stringValue) - default: - log.Error("failed to update bloomfilter", zap.Any("PK type", pk.Type())) - panic("failed to update bloomfilter") - } - } -} - -// AddHistoricalStats add loaded historical stats. -func (s *bloomFilterSet) AddHistoricalStats(stats *storage.PkStatistics) { - s.statsMutex.Lock() - defer s.statsMutex.Unlock() - - s.historyStats = append(s.historyStats, stats) -} - -// initCurrentStat initialize currentStats if nil. -// Note: invoker shall acquire statsMutex lock first. -func (s *bloomFilterSet) initCurrentStat() { - s.currentStat = &storage.PkStatistics{ - PkFilter: bloom.NewWithEstimates(paramtable.Get().CommonCfg.BloomFilterSize.GetAsUint(), - paramtable.Get().CommonCfg.MaxBloomFalsePositive.GetAsFloat()), - } -} diff --git a/internal/querynodev2/segments/bloom_filter_set_test.go b/internal/querynodev2/segments/bloom_filter_set_test.go deleted file mode 100644 index 9bf95a1ff9678..0000000000000 --- a/internal/querynodev2/segments/bloom_filter_set_test.go +++ /dev/null @@ -1,91 +0,0 @@ -// Licensed to the LF AI & Data foundation under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package segments - -import ( - "testing" - - "github.com/stretchr/testify/suite" - - "github.com/milvus-io/milvus/internal/storage" - "github.com/milvus-io/milvus/pkg/util/paramtable" -) - -type BloomFilterSetSuite struct { - suite.Suite - - intPks []int64 - stringPks []string - set *bloomFilterSet -} - -func (suite *BloomFilterSetSuite) SetupTest() { - suite.intPks = []int64{1, 2, 3} - suite.stringPks = []string{"1", "2", "3"} - paramtable.Init() - suite.set = newBloomFilterSet() -} - -func (suite *BloomFilterSetSuite) TestInt64PkBloomFilter() { - pks, err := storage.GenInt64PrimaryKeys(suite.intPks...) - suite.NoError(err) - - suite.set.UpdateBloomFilter(pks) - for _, pk := range pks { - exist := suite.set.MayPkExist(pk) - suite.True(exist) - } -} - -func (suite *BloomFilterSetSuite) TestStringPkBloomFilter() { - pks, err := storage.GenVarcharPrimaryKeys(suite.stringPks...) - suite.NoError(err) - - suite.set.UpdateBloomFilter(pks) - for _, pk := range pks { - exist := suite.set.MayPkExist(pk) - suite.True(exist) - } -} - -func (suite *BloomFilterSetSuite) TestHistoricalBloomFilter() { - pks, err := storage.GenVarcharPrimaryKeys(suite.stringPks...) - suite.NoError(err) - - suite.set.UpdateBloomFilter(pks) - for _, pk := range pks { - exist := suite.set.MayPkExist(pk) - suite.True(exist) - } - - old := suite.set.currentStat - suite.set.currentStat = nil - for _, pk := range pks { - exist := suite.set.MayPkExist(pk) - suite.False(exist) - } - - suite.set.AddHistoricalStats(old) - for _, pk := range pks { - exist := suite.set.MayPkExist(pk) - suite.True(exist) - } -} - -func TestBloomFilterSet(t *testing.T) { - suite.Run(t, &BloomFilterSetSuite{}) -} diff --git a/internal/querynodev2/segments/mock_segment.go b/internal/querynodev2/segments/mock_segment.go index 3121d0ca452c6..e31d1b5181ff7 100644 --- a/internal/querynodev2/segments/mock_segment.go +++ b/internal/querynodev2/segments/mock_segment.go @@ -246,47 +246,6 @@ func (_c *MockSegment_ExistIndex_Call) RunAndReturn(run func(int64) bool) *MockS return _c } -// GetHashFuncNum provides a mock function with given fields: -func (_m *MockSegment) GetHashFuncNum() uint { - ret := _m.Called() - - var r0 uint - if rf, ok := ret.Get(0).(func() uint); ok { - r0 = rf() - } else { - r0 = ret.Get(0).(uint) - } - - return r0 -} - -// MockSegment_GetHashFuncNum_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'GetHashFuncNum' -type MockSegment_GetHashFuncNum_Call struct { - *mock.Call -} - -// GetHashFuncNum is a helper method to define mock.On call -func (_e *MockSegment_Expecter) GetHashFuncNum() *MockSegment_GetHashFuncNum_Call { - return &MockSegment_GetHashFuncNum_Call{Call: _e.mock.On("GetHashFuncNum")} -} - -func (_c *MockSegment_GetHashFuncNum_Call) Run(run func()) *MockSegment_GetHashFuncNum_Call { - _c.Call.Run(func(args mock.Arguments) { - run() - }) - return _c -} - -func (_c *MockSegment_GetHashFuncNum_Call) Return(_a0 uint) *MockSegment_GetHashFuncNum_Call { - _c.Call.Return(_a0) - return _c -} - -func (_c *MockSegment_GetHashFuncNum_Call) RunAndReturn(run func() uint) *MockSegment_GetHashFuncNum_Call { - _c.Call.Return(run) - return _c -} - // GetIndex provides a mock function with given fields: fieldID func (_m *MockSegment) GetIndex(fieldID int64) *IndexedFieldInfo { ret := _m.Called(fieldID) @@ -752,13 +711,13 @@ func (_c *MockSegment_LoadInfo_Call) RunAndReturn(run func() *querypb.SegmentLoa return _c } -// MayPkExist provides a mock function with given fields: pk -func (_m *MockSegment) MayPkExist(pk storage.PrimaryKey) bool { - ret := _m.Called(pk) +// MayPkExist provides a mock function with given fields: lc +func (_m *MockSegment) MayPkExist(lc *storage.LocationsCache) bool { + ret := _m.Called(lc) var r0 bool - if rf, ok := ret.Get(0).(func(storage.PrimaryKey) bool); ok { - r0 = rf(pk) + if rf, ok := ret.Get(0).(func(*storage.LocationsCache) bool); ok { + r0 = rf(lc) } else { r0 = ret.Get(0).(bool) } @@ -772,14 +731,14 @@ type MockSegment_MayPkExist_Call struct { } // MayPkExist is a helper method to define mock.On call -// - pk storage.PrimaryKey -func (_e *MockSegment_Expecter) MayPkExist(pk interface{}) *MockSegment_MayPkExist_Call { - return &MockSegment_MayPkExist_Call{Call: _e.mock.On("MayPkExist", pk)} +// - lc *storage.LocationsCache +func (_e *MockSegment_Expecter) MayPkExist(lc interface{}) *MockSegment_MayPkExist_Call { + return &MockSegment_MayPkExist_Call{Call: _e.mock.On("MayPkExist", lc)} } -func (_c *MockSegment_MayPkExist_Call) Run(run func(pk storage.PrimaryKey)) *MockSegment_MayPkExist_Call { +func (_c *MockSegment_MayPkExist_Call) Run(run func(lc *storage.LocationsCache)) *MockSegment_MayPkExist_Call { _c.Call.Run(func(args mock.Arguments) { - run(args[0].(storage.PrimaryKey)) + run(args[0].(*storage.LocationsCache)) }) return _c } @@ -789,7 +748,7 @@ func (_c *MockSegment_MayPkExist_Call) Return(_a0 bool) *MockSegment_MayPkExist_ return _c } -func (_c *MockSegment_MayPkExist_Call) RunAndReturn(run func(storage.PrimaryKey) bool) *MockSegment_MayPkExist_Call { +func (_c *MockSegment_MayPkExist_Call) RunAndReturn(run func(*storage.LocationsCache) bool) *MockSegment_MayPkExist_Call { _c.Call.Return(run) return _c } @@ -1453,49 +1412,6 @@ func (_c *MockSegment_StartPosition_Call) RunAndReturn(run func() *msgpb.MsgPosi return _c } -// TestLocations provides a mock function with given fields: pk, loc -func (_m *MockSegment) TestLocations(pk storage.PrimaryKey, loc []uint64) bool { - ret := _m.Called(pk, loc) - - var r0 bool - if rf, ok := ret.Get(0).(func(storage.PrimaryKey, []uint64) bool); ok { - r0 = rf(pk, loc) - } else { - r0 = ret.Get(0).(bool) - } - - return r0 -} - -// MockSegment_TestLocations_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'TestLocations' -type MockSegment_TestLocations_Call struct { - *mock.Call -} - -// TestLocations is a helper method to define mock.On call -// - pk storage.PrimaryKey -// - loc []uint64 -func (_e *MockSegment_Expecter) TestLocations(pk interface{}, loc interface{}) *MockSegment_TestLocations_Call { - return &MockSegment_TestLocations_Call{Call: _e.mock.On("TestLocations", pk, loc)} -} - -func (_c *MockSegment_TestLocations_Call) Run(run func(pk storage.PrimaryKey, loc []uint64)) *MockSegment_TestLocations_Call { - _c.Call.Run(func(args mock.Arguments) { - run(args[0].(storage.PrimaryKey), args[1].([]uint64)) - }) - return _c -} - -func (_c *MockSegment_TestLocations_Call) Return(_a0 bool) *MockSegment_TestLocations_Call { - _c.Call.Return(_a0) - return _c -} - -func (_c *MockSegment_TestLocations_Call) RunAndReturn(run func(storage.PrimaryKey, []uint64) bool) *MockSegment_TestLocations_Call { - _c.Call.Return(run) - return _c -} - // Type provides a mock function with given fields: func (_m *MockSegment) Type() commonpb.SegmentState { ret := _m.Called() diff --git a/internal/querynodev2/segments/segment.go b/internal/querynodev2/segments/segment.go index 3382b4373a364..b4291850bca59 100644 --- a/internal/querynodev2/segments/segment.go +++ b/internal/querynodev2/segments/segment.go @@ -186,16 +186,8 @@ func (s *baseSegment) UpdateBloomFilter(pks []storage.PrimaryKey) { // MayPkExist returns true if the given PK exists in the PK range and being positive through the bloom filter, // false otherwise, // may returns true even the PK doesn't exist actually -func (s *baseSegment) MayPkExist(pk storage.PrimaryKey) bool { - return s.bloomFilterSet.MayPkExist(pk) -} - -func (s *baseSegment) TestLocations(pk storage.PrimaryKey, loc []uint64) bool { - return s.bloomFilterSet.TestLocations(pk, loc) -} - -func (s *baseSegment) GetHashFuncNum() uint { - return s.bloomFilterSet.GetHashFuncNum() +func (s *baseSegment) MayPkExist(lc *storage.LocationsCache) bool { + return s.bloomFilterSet.MayPkExist(lc) } // ResourceUsageEstimate returns the estimated resource usage of the segment. diff --git a/internal/querynodev2/segments/segment_interface.go b/internal/querynodev2/segments/segment_interface.go index 9ed9d4df90e21..f439d0f818e62 100644 --- a/internal/querynodev2/segments/segment_interface.go +++ b/internal/querynodev2/segments/segment_interface.go @@ -83,9 +83,7 @@ type Segment interface { // Bloom filter related UpdateBloomFilter(pks []storage.PrimaryKey) - MayPkExist(pk storage.PrimaryKey) bool - TestLocations(pk storage.PrimaryKey, loc []uint64) bool - GetHashFuncNum() uint + MayPkExist(lc *storage.LocationsCache) bool // Read operations Search(ctx context.Context, searchReq *SearchRequest) (*SearchResult, error) diff --git a/internal/querynodev2/segments/segment_loader_test.go b/internal/querynodev2/segments/segment_loader_test.go index 138fed79b784d..a1930159d45cb 100644 --- a/internal/querynodev2/segments/segment_loader_test.go +++ b/internal/querynodev2/segments/segment_loader_test.go @@ -226,7 +226,8 @@ func (suite *SegmentLoaderSuite) TestLoadMultipleSegments() { // Won't load bloom filter with sealed segments for _, segment := range segments { for pk := 0; pk < 100; pk++ { - exist := segment.MayPkExist(storage.NewInt64PrimaryKey(int64(pk))) + lc := storage.NewLocationsCache(storage.NewInt64PrimaryKey(int64(pk))) + exist := segment.MayPkExist(lc) suite.Require().False(exist) } } @@ -260,7 +261,8 @@ func (suite *SegmentLoaderSuite) TestLoadMultipleSegments() { // Should load bloom filter with growing segments for _, segment := range segments { for pk := 0; pk < 100; pk++ { - exist := segment.MayPkExist(storage.NewInt64PrimaryKey(int64(pk))) + lc := storage.NewLocationsCache(storage.NewInt64PrimaryKey(int64(pk))) + exist := segment.MayPkExist(lc) suite.True(exist) } } @@ -351,7 +353,8 @@ func (suite *SegmentLoaderSuite) TestLoadBloomFilter() { for _, bf := range bfs { for pk := 0; pk < 100; pk++ { - exist := bf.MayPkExist(storage.NewInt64PrimaryKey(int64(pk))) + lc := storage.NewLocationsCache(storage.NewInt64PrimaryKey(int64(pk))) + exist := bf.MayPkExist(lc) suite.Require().True(exist) } } @@ -404,7 +407,8 @@ func (suite *SegmentLoaderSuite) TestLoadDeltaLogs() { if pk == 1 || pk == 2 { continue } - exist := segment.MayPkExist(storage.NewInt64PrimaryKey(int64(pk))) + lc := storage.NewLocationsCache(storage.NewInt64PrimaryKey(int64(pk))) + exist := segment.MayPkExist(lc) suite.Require().True(exist) } } @@ -457,7 +461,8 @@ func (suite *SegmentLoaderSuite) TestLoadDupDeltaLogs() { if pk == 1 || pk == 2 { continue } - exist := segment.MayPkExist(storage.NewInt64PrimaryKey(int64(pk))) + lc := storage.NewLocationsCache(storage.NewInt64PrimaryKey(int64(pk))) + exist := segment.MayPkExist(lc) suite.Require().True(exist) } diff --git a/internal/querynodev2/segments/segment_test.go b/internal/querynodev2/segments/segment_test.go index d4f1855ab422f..464df07e7ab74 100644 --- a/internal/querynodev2/segments/segment_test.go +++ b/internal/querynodev2/segments/segment_test.go @@ -188,14 +188,6 @@ func (suite *SegmentSuite) TestHasRawData() { suite.True(has) } -func (suite *SegmentSuite) TestLocation() { - pk := storage.NewInt64PrimaryKey(100) - locations := storage.Locations(pk, suite.sealed.GetHashFuncNum()) - ret1 := suite.sealed.TestLocations(pk, locations) - ret2 := suite.sealed.MayPkExist(pk) - suite.Equal(ret1, ret2) -} - func (suite *SegmentSuite) TestCASVersion() { segment := suite.sealed diff --git a/internal/storage/field_stats.go b/internal/storage/field_stats.go index a26e8aa9e1f91..87d6e9acf7c77 100644 --- a/internal/storage/field_stats.go +++ b/internal/storage/field_stats.go @@ -20,10 +20,12 @@ import ( "encoding/json" "fmt" - "github.com/bits-and-blooms/bloom/v3" + "go.uber.org/zap" "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" + "github.com/milvus-io/milvus/internal/util/bloomfilter" "github.com/milvus-io/milvus/pkg/common" + "github.com/milvus-io/milvus/pkg/log" "github.com/milvus-io/milvus/pkg/util/merr" "github.com/milvus-io/milvus/pkg/util/paramtable" ) @@ -31,12 +33,13 @@ import ( // FieldStats contains statistics data for any column // todo: compatible to PrimaryKeyStats type FieldStats struct { - FieldID int64 `json:"fieldID"` - Type schemapb.DataType `json:"type"` - Max ScalarFieldValue `json:"max"` // for scalar field - Min ScalarFieldValue `json:"min"` // for scalar field - BF *bloom.BloomFilter `json:"bf"` // for scalar field - Centroids []VectorFieldValue `json:"centroids"` // for vector field + FieldID int64 `json:"fieldID"` + Type schemapb.DataType `json:"type"` + Max ScalarFieldValue `json:"max"` // for scalar field + Min ScalarFieldValue `json:"min"` // for scalar field + BFType bloomfilter.BFType `json:"bfType"` // for scalar field + BF bloomfilter.BloomFilterInterface `json:"bf"` // for scalar field + Centroids []VectorFieldValue `json:"centroids"` // for vector field } // UnmarshalJSON unmarshal bytes to FieldStats @@ -141,12 +144,22 @@ func (stats *FieldStats) UnmarshalJSON(data []byte) error { } } - if bfMessage, ok := messageMap["bf"]; ok && bfMessage != nil { - stats.BF = &bloom.BloomFilter{} - err = stats.BF.UnmarshalJSON(*bfMessage) + bfType := bloomfilter.BasicBF + if bfTypeMessage, ok := messageMap["bfType"]; ok && bfTypeMessage != nil { + err := json.Unmarshal(*bfTypeMessage, &bfType) if err != nil { return err } + stats.BFType = bfType + } + + if bfMessage, ok := messageMap["bf"]; ok && bfMessage != nil { + bf, err := bloomfilter.UnmarshalJSON(*bfMessage, bfType) + if err != nil { + log.Warn("Failed to unmarshal bloom filter, use AlwaysTrueBloomFilter instead of return err", zap.Error(err)) + bf = bloomfilter.AlwaysTrueBloomFilter + } + stats.BF = bf } } else { stats.initCentroids(data, stats.Type) @@ -161,12 +174,12 @@ func (stats *FieldStats) UnmarshalJSON(data []byte) error { func (stats *FieldStats) initCentroids(data []byte, dataType schemapb.DataType) { type FieldStatsAux struct { - FieldID int64 `json:"fieldID"` - Type schemapb.DataType `json:"type"` - Max json.RawMessage `json:"max"` - Min json.RawMessage `json:"min"` - BF *bloom.BloomFilter `json:"bf"` - Centroids []json.RawMessage `json:"centroids"` + FieldID int64 `json:"fieldID"` + Type schemapb.DataType `json:"type"` + Max json.RawMessage `json:"max"` + Min json.RawMessage `json:"min"` + BF bloomfilter.BloomFilterInterface `json:"bf"` + Centroids []json.RawMessage `json:"centroids"` } // Unmarshal JSON into the auxiliary struct var aux FieldStatsAux @@ -361,10 +374,15 @@ func NewFieldStats(fieldID int64, pkType schemapb.DataType, rowNum int64) (*Fiel Type: pkType, }, nil } + bfType := paramtable.Get().CommonCfg.BloomFilterType.GetValue() return &FieldStats{ FieldID: fieldID, Type: pkType, - BF: bloom.NewWithEstimates(uint(rowNum), paramtable.Get().CommonCfg.MaxBloomFalsePositive.GetAsFloat()), + BFType: bloomfilter.BFTypeFromString(bfType), + BF: bloomfilter.NewBloomFilterWithType( + uint(rowNum), + paramtable.Get().CommonCfg.MaxBloomFalsePositive.GetAsFloat(), + bfType), }, nil } @@ -391,11 +409,17 @@ func (sw *FieldStatsWriter) GenerateList(stats []*FieldStats) error { // GenerateByData writes data from @msgs with @fieldID to @buffer func (sw *FieldStatsWriter) GenerateByData(fieldID int64, pkType schemapb.DataType, msgs ...FieldData) error { statsList := make([]*FieldStats, 0) + + bfType := paramtable.Get().CommonCfg.BloomFilterType.GetValue() for _, msg := range msgs { stats := &FieldStats{ FieldID: fieldID, Type: pkType, - BF: bloom.NewWithEstimates(uint(msg.RowNum()), paramtable.Get().CommonCfg.MaxBloomFalsePositive.GetAsFloat()), + BFType: bloomfilter.BFTypeFromString(bfType), + BF: bloomfilter.NewBloomFilterWithType( + uint(msg.RowNum()), + paramtable.Get().CommonCfg.MaxBloomFalsePositive.GetAsFloat(), + bfType), } stats.UpdateByMsgs(msg) diff --git a/internal/storage/field_stats_test.go b/internal/storage/field_stats_test.go index e169902bf9749..f04155ac2d873 100644 --- a/internal/storage/field_stats_test.go +++ b/internal/storage/field_stats_test.go @@ -20,12 +20,13 @@ import ( "encoding/json" "testing" - "github.com/bits-and-blooms/bloom/v3" "github.com/stretchr/testify/assert" "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" + "github.com/milvus-io/milvus/internal/util/bloomfilter" "github.com/milvus-io/milvus/pkg/common" "github.com/milvus-io/milvus/pkg/util/merr" + "github.com/milvus-io/milvus/pkg/util/paramtable" ) func TestFieldStatsUpdate(t *testing.T) { @@ -373,7 +374,7 @@ func TestFieldStatsWriter_UpgradePrimaryKey(t *testing.T) { FieldID: common.RowIDField, Min: 1, Max: 9, - BF: bloom.NewWithEstimates(100000, 0.05), + BF: bloomfilter.NewBloomFilterWithType(100000, 0.05, paramtable.Get().CommonCfg.BloomFilterType.GetValue()), } b := make([]byte, 8) @@ -574,8 +575,9 @@ func TestFieldStatsUnMarshal(t *testing.T) { assert.Error(t, err) err = stats.UnmarshalJSON([]byte("{\"fieldID\":1,\"max\":10, \"maxPk\":10, \"minPk\": \"b\"}")) assert.Error(t, err) + // return AlwaysTrueBloomFilter when deserialize bloom filter failed. err = stats.UnmarshalJSON([]byte("{\"fieldID\":1,\"max\":10, \"maxPk\":10, \"minPk\": 1, \"bf\": \"2\"}")) - assert.Error(t, err) + assert.NoError(t, err) }) t.Run("succeed", func(t *testing.T) { diff --git a/internal/storage/pk_statistics.go b/internal/storage/pk_statistics.go index ae5c549f65485..7d4b21e2ef44c 100644 --- a/internal/storage/pk_statistics.go +++ b/internal/storage/pk_statistics.go @@ -19,18 +19,18 @@ package storage import ( "fmt" - "github.com/bits-and-blooms/bloom/v3" "github.com/cockroachdb/errors" "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" + "github.com/milvus-io/milvus/internal/util/bloomfilter" "github.com/milvus-io/milvus/pkg/common" ) // pkStatistics contains pk field statistic information type PkStatistics struct { - PkFilter *bloom.BloomFilter // bloom filter of pk inside a segment - MinPK PrimaryKey // minimal pk value, shortcut for checking whether a pk is inside this segment - MaxPK PrimaryKey // maximal pk value, same above + PkFilter bloomfilter.BloomFilterInterface // bloom filter of pk inside a segment + MinPK PrimaryKey // minimal pk value, shortcut for checking whether a pk is inside this segment + MaxPK PrimaryKey // maximal pk value, same above } // update set pk min/max value if input value is beyond former range. @@ -109,16 +109,16 @@ func (st *PkStatistics) PkExist(pk PrimaryKey) bool { } // Locations returns a list of hash locations representing a data item. -func Locations(pk PrimaryKey, k uint) []uint64 { +func Locations(pk PrimaryKey, k uint, bfType bloomfilter.BFType) []uint64 { switch pk.Type() { case schemapb.DataType_Int64: buf := make([]byte, 8) int64Pk := pk.(*Int64PrimaryKey) common.Endian.PutUint64(buf, uint64(int64Pk.Value)) - return bloom.Locations(buf, k) + return bloomfilter.Locations(buf, k, bfType) case schemapb.DataType_VarChar: varCharPk := pk.(*VarCharPrimaryKey) - return bloom.Locations([]byte(varCharPk.Value), k) + return bloomfilter.Locations([]byte(varCharPk.Value), k, bfType) default: // TODO:: } @@ -147,7 +147,7 @@ func (st *PkStatistics) TestLocationCache(lc *LocationsCache) bool { } // check bf first, TestLocation just do some bitset compute, cost is cheaper - if !st.PkFilter.TestLocations(lc.Locations(st.PkFilter.K())) { + if !st.PkFilter.TestLocations(lc.Locations(st.PkFilter.K(), st.PkFilter.Type())) { return false } @@ -158,18 +158,30 @@ func (st *PkStatistics) TestLocationCache(lc *LocationsCache) bool { // LocationsCache is a helper struct caching pk bloom filter locations. // Note that this helper is not concurrent safe and shall be used in same goroutine. type LocationsCache struct { - pk PrimaryKey - k uint - locations []uint64 + pk PrimaryKey + basicBFLocations []uint64 + blockBFLocations []uint64 } -func (lc *LocationsCache) Locations(k uint) []uint64 { - if k > lc.k { - lc.k = k - lc.locations = Locations(lc.pk, lc.k) - } +func (lc *LocationsCache) GetPk() PrimaryKey { + return lc.pk +} - return lc.locations[:k] +func (lc *LocationsCache) Locations(k uint, bfType bloomfilter.BFType) []uint64 { + switch bfType { + case bloomfilter.BasicBF: + if int(k) > len(lc.basicBFLocations) { + lc.basicBFLocations = Locations(lc.pk, k, bfType) + } + return lc.basicBFLocations[:k] + case bloomfilter.BlockedBF: + if int(k) > len(lc.blockBFLocations) { + lc.blockBFLocations = Locations(lc.pk, k, bfType) + } + return lc.blockBFLocations[:k] + default: + return nil + } } func NewLocationsCache(pk PrimaryKey) *LocationsCache { diff --git a/internal/storage/stats.go b/internal/storage/stats.go index 7914e04b80ef5..75da19ab5ecd6 100644 --- a/internal/storage/stats.go +++ b/internal/storage/stats.go @@ -20,9 +20,10 @@ import ( "encoding/json" "fmt" - "github.com/bits-and-blooms/bloom/v3" + "go.uber.org/zap" "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" + "github.com/milvus-io/milvus/internal/util/bloomfilter" "github.com/milvus-io/milvus/pkg/common" "github.com/milvus-io/milvus/pkg/log" "github.com/milvus-io/milvus/pkg/util/merr" @@ -31,13 +32,14 @@ import ( // PrimaryKeyStats contains statistics data for pk column type PrimaryKeyStats struct { - FieldID int64 `json:"fieldID"` - Max int64 `json:"max"` // useless, will delete - Min int64 `json:"min"` // useless, will delete - BF *bloom.BloomFilter `json:"bf"` - PkType int64 `json:"pkType"` - MaxPk PrimaryKey `json:"maxPk"` - MinPk PrimaryKey `json:"minPk"` + FieldID int64 `json:"fieldID"` + Max int64 `json:"max"` // useless, will delete + Min int64 `json:"min"` // useless, will delete + BFType bloomfilter.BFType `json:"bfType"` + BF bloomfilter.BloomFilterInterface `json:"bf"` + PkType int64 `json:"pkType"` + MaxPk PrimaryKey `json:"maxPk"` + MinPk PrimaryKey `json:"minPk"` } // UnmarshalJSON unmarshal bytes to PrimaryKeyStats @@ -110,12 +112,22 @@ func (stats *PrimaryKeyStats) UnmarshalJSON(data []byte) error { } } - if bfMessage, ok := messageMap["bf"]; ok && bfMessage != nil { - stats.BF = &bloom.BloomFilter{} - err = stats.BF.UnmarshalJSON(*bfMessage) + bfType := bloomfilter.BasicBF + if bfTypeMessage, ok := messageMap["bfType"]; ok && bfTypeMessage != nil { + err := json.Unmarshal(*bfTypeMessage, &bfType) if err != nil { return err } + stats.BFType = bfType + } + + if bfMessage, ok := messageMap["bf"]; ok && bfMessage != nil { + bf, err := bloomfilter.UnmarshalJSON(*bfMessage, bfType) + if err != nil { + log.Warn("Failed to unmarshal bloom filter, use AlwaysTrueBloomFilter instead of return err", zap.Error(err)) + bf = bloomfilter.AlwaysTrueBloomFilter + } + stats.BF = bf } return nil @@ -189,10 +201,16 @@ func NewPrimaryKeyStats(fieldID, pkType, rowNum int64) (*PrimaryKeyStats, error) if rowNum <= 0 { return nil, merr.WrapErrParameterInvalidMsg("zero or negative row num", rowNum) } + + bfType := paramtable.Get().CommonCfg.BloomFilterType.GetValue() return &PrimaryKeyStats{ FieldID: fieldID, PkType: pkType, - BF: bloom.NewWithEstimates(uint(rowNum), paramtable.Get().CommonCfg.MaxBloomFalsePositive.GetAsFloat()), + BFType: bloomfilter.BFTypeFromString(bfType), + BF: bloomfilter.NewBloomFilterWithType( + uint(rowNum), + paramtable.Get().CommonCfg.MaxBloomFalsePositive.GetAsFloat(), + bfType), }, nil } @@ -228,10 +246,15 @@ func (sw *StatsWriter) Generate(stats *PrimaryKeyStats) error { // GenerateByData writes Int64Stats or StringStats from @msgs with @fieldID to @buffer func (sw *StatsWriter) GenerateByData(fieldID int64, pkType schemapb.DataType, msgs FieldData) error { + bfType := paramtable.Get().CommonCfg.BloomFilterType.GetValue() stats := &PrimaryKeyStats{ FieldID: fieldID, PkType: int64(pkType), - BF: bloom.NewWithEstimates(uint(msgs.RowNum()), paramtable.Get().CommonCfg.MaxBloomFalsePositive.GetAsFloat()), + BFType: bloomfilter.BFTypeFromString(bfType), + BF: bloomfilter.NewBloomFilterWithType( + uint(msgs.RowNum()), + paramtable.Get().CommonCfg.MaxBloomFalsePositive.GetAsFloat(), + bfType), } stats.UpdateByMsgs(msgs) diff --git a/internal/storage/stats_test.go b/internal/storage/stats_test.go index 709f49697f28c..cccd3d9f9e65b 100644 --- a/internal/storage/stats_test.go +++ b/internal/storage/stats_test.go @@ -20,12 +20,13 @@ import ( "encoding/json" "testing" - "github.com/bits-and-blooms/bloom/v3" "github.com/stretchr/testify/assert" "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" + "github.com/milvus-io/milvus/internal/util/bloomfilter" "github.com/milvus-io/milvus/pkg/common" "github.com/milvus-io/milvus/pkg/util/merr" + "github.com/milvus-io/milvus/pkg/util/paramtable" ) func TestStatsWriter_Int64PrimaryKey(t *testing.T) { @@ -124,11 +125,13 @@ func TestStatsWriter_UpgradePrimaryKey(t *testing.T) { Data: []int64{1, 2, 3, 4, 5, 6, 7, 8, 9}, } + bfType := paramtable.Get().CommonCfg.BloomFilterType.GetValue() stats := &PrimaryKeyStats{ FieldID: common.RowIDField, Min: 1, Max: 9, - BF: bloom.NewWithEstimates(100000, 0.05), + BFType: bloomfilter.BFTypeFromString(bfType), + BF: bloomfilter.NewBloomFilterWithType(100000, 0.05, bfType), } b := make([]byte, 8) @@ -174,3 +177,30 @@ func TestDeserializeEmptyStats(t *testing.T) { _, err := DeserializeStats([]*Blob{blob}) assert.NoError(t, err) } + +func TestMarshalStats(t *testing.T) { + stat, err := NewPrimaryKeyStats(1, int64(schemapb.DataType_Int64), 100000) + assert.NoError(t, err) + + for i := 0; i < 10000; i++ { + stat.Update(NewInt64PrimaryKey(int64(i))) + } + + sw := &StatsWriter{} + sw.GenerateList([]*PrimaryKeyStats{stat}) + bytes := sw.GetBuffer() + + sr := &StatsReader{} + sr.SetBuffer(bytes) + stat1, err := sr.GetPrimaryKeyStatsList() + assert.NoError(t, err) + assert.Equal(t, 1, len(stat1)) + assert.Equal(t, stat.Min, stat1[0].Min) + assert.Equal(t, stat.Max, stat1[0].Max) + + for i := 0; i < 10000; i++ { + b := make([]byte, 8) + common.Endian.PutUint64(b, uint64(i)) + assert.True(t, stat1[0].BF.Test(b)) + } +} diff --git a/internal/util/bloomfilter/bloom_filter.go b/internal/util/bloomfilter/bloom_filter.go new file mode 100644 index 0000000000000..778597844e631 --- /dev/null +++ b/internal/util/bloomfilter/bloom_filter.go @@ -0,0 +1,297 @@ +// Licensed to the LF AI & Data foundation under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +package bloomfilter + +import ( + "encoding/json" + + "github.com/bits-and-blooms/bloom/v3" + "github.com/cockroachdb/errors" + "github.com/greatroar/blobloom" + "github.com/pingcap/log" + "github.com/zeebo/xxh3" + "go.uber.org/zap" +) + +type BFType int + +var AlwaysTrueBloomFilter = &alwaysTrueBloomFilter{} + +const ( + UnsupportedBFName = "Unsupported BloomFilter" + BlockBFName = "BlockedBloomFilter" + BasicBFName = "BasicBloomFilter" + AlwaysTrueBFName = "AlwaysTrueBloomFilter" +) + +const ( + UnsupportedBF BFType = iota + 1 + AlwaysTrueBF // empty bloom filter + BasicBF + BlockedBF +) + +var bfNames = map[BFType]string{ + BasicBF: BlockBFName, + BlockedBF: BasicBFName, + AlwaysTrueBF: AlwaysTrueBFName, + UnsupportedBF: UnsupportedBFName, +} + +func (t BFType) String() string { + return bfNames[t] +} + +func BFTypeFromString(name string) BFType { + switch name { + case BasicBFName: + return BasicBF + case BlockBFName: + return BlockedBF + case AlwaysTrueBFName: + return AlwaysTrueBF + default: + return UnsupportedBF + } +} + +type BloomFilterInterface interface { + Type() BFType + Cap() uint + K() uint + Add(data []byte) + AddString(data string) + Test(data []byte) bool + TestString(data string) bool + TestLocations(locs []uint64) bool + MarshalJSON() ([]byte, error) + UnmarshalJSON(data []byte) error +} + +type basicBloomFilter struct { + inner *bloom.BloomFilter + k uint +} + +func newBasicBloomFilter(capacity uint, fp float64) *basicBloomFilter { + inner := bloom.NewWithEstimates(capacity, fp) + return &basicBloomFilter{ + inner: inner, + k: inner.K(), + } +} + +func (b *basicBloomFilter) Type() BFType { + return BasicBF +} + +func (b *basicBloomFilter) Cap() uint { + return b.inner.Cap() +} + +func (b *basicBloomFilter) K() uint { + return b.k +} + +func (b *basicBloomFilter) Add(data []byte) { + b.inner.Add(data) +} + +func (b *basicBloomFilter) AddString(data string) { + b.inner.AddString(data) +} + +func (b *basicBloomFilter) Test(data []byte) bool { + return b.inner.Test(data) +} + +func (b *basicBloomFilter) TestString(data string) bool { + return b.inner.TestString(data) +} + +func (b *basicBloomFilter) TestLocations(locs []uint64) bool { + return b.inner.TestLocations(locs[:b.k]) +} + +func (b basicBloomFilter) MarshalJSON() ([]byte, error) { + return b.inner.MarshalJSON() +} + +func (b *basicBloomFilter) UnmarshalJSON(data []byte) error { + inner := &bloom.BloomFilter{} + inner.UnmarshalJSON(data) + b.inner = inner + b.k = inner.K() + return nil +} + +// impl Blocked Bloom filter with blobloom and xxh3 hash +type blockedBloomFilter struct { + inner *blobloom.Filter + k uint +} + +func newBlockedBloomFilter(capacity uint, fp float64) *blockedBloomFilter { + inner := blobloom.NewOptimized(blobloom.Config{ + Capacity: uint64(capacity), + FPRate: fp, + }) + return &blockedBloomFilter{ + inner: inner, + k: inner.K(), + } +} + +func (b *blockedBloomFilter) Type() BFType { + return BlockedBF +} + +func (b *blockedBloomFilter) Cap() uint { + return uint(b.inner.NumBits()) +} + +func (b *blockedBloomFilter) K() uint { + return b.k +} + +func (b *blockedBloomFilter) Add(data []byte) { + loc := xxh3.Hash(data) + b.inner.Add(loc) +} + +func (b *blockedBloomFilter) AddString(data string) { + h := xxh3.HashString(data) + b.inner.Add(h) +} + +func (b *blockedBloomFilter) Test(data []byte) bool { + loc := xxh3.Hash(data) + return b.inner.Has(loc) +} + +func (b *blockedBloomFilter) TestString(data string) bool { + h := xxh3.HashString(data) + return b.inner.Has(h) +} + +func (b *blockedBloomFilter) TestLocations(locs []uint64) bool { + return b.inner.TestLocations(locs) +} + +func (b blockedBloomFilter) MarshalJSON() ([]byte, error) { + return b.inner.MarshalJSON() +} + +func (b *blockedBloomFilter) UnmarshalJSON(data []byte) error { + inner := &blobloom.Filter{} + inner.UnmarshalJSON(data) + b.inner = inner + b.k = inner.K() + + return nil +} + +// always true bloom filter is used when deserialize stat log failed. +// Notice: add item to empty bloom filter is not permitted. and all Test Func will return false positive. +type alwaysTrueBloomFilter struct{} + +func (b *alwaysTrueBloomFilter) Type() BFType { + return AlwaysTrueBF +} + +func (b *alwaysTrueBloomFilter) Cap() uint { + return 0 +} + +func (b *alwaysTrueBloomFilter) K() uint { + return 0 +} + +func (b *alwaysTrueBloomFilter) Add(data []byte) { +} + +func (b *alwaysTrueBloomFilter) AddString(data string) { +} + +func (b *alwaysTrueBloomFilter) Test(data []byte) bool { + return true +} + +func (b *alwaysTrueBloomFilter) TestString(data string) bool { + return true +} + +func (b *alwaysTrueBloomFilter) TestLocations(locs []uint64) bool { + return true +} + +func (b *alwaysTrueBloomFilter) MarshalJSON() ([]byte, error) { + return []byte{}, nil +} + +func (b *alwaysTrueBloomFilter) UnmarshalJSON(data []byte) error { + return nil +} + +func NewBloomFilterWithType(capacity uint, fp float64, typeName string) BloomFilterInterface { + bfType := BFTypeFromString(typeName) + switch bfType { + case BlockedBF: + return newBlockedBloomFilter(capacity, fp) + case BasicBF: + return newBasicBloomFilter(capacity, fp) + default: + log.Info("unsupported bloom filter type, using block bloom filter", zap.String("type", typeName)) + return newBlockedBloomFilter(capacity, fp) + } +} + +func UnmarshalJSON(data []byte, bfType BFType) (BloomFilterInterface, error) { + switch bfType { + case BlockedBF: + bf := &blockedBloomFilter{} + err := json.Unmarshal(data, bf) + if err != nil { + return nil, errors.Wrap(err, "failed to unmarshal blocked bloom filter") + } + return bf, nil + case BasicBF: + bf := &basicBloomFilter{} + err := json.Unmarshal(data, bf) + if err != nil { + return nil, errors.Wrap(err, "failed to unmarshal blocked bloom filter") + } + return bf, nil + case AlwaysTrueBF: + return AlwaysTrueBloomFilter, nil + default: + return nil, errors.Errorf("unsupported bloom filter type: %d", bfType) + } +} + +func Locations(data []byte, k uint, bfType BFType) []uint64 { + switch bfType { + case BasicBF: + return bloom.Locations(data, k) + case BlockedBF: + return blobloom.Locations(xxh3.Hash(data), k) + case AlwaysTrueBF: + return nil + default: + log.Info("unsupported bloom filter type, using block bloom filter", zap.String("type", bfType.String())) + return nil + } +} diff --git a/internal/util/bloomfilter/bloom_filter_test.go b/internal/util/bloomfilter/bloom_filter_test.go new file mode 100644 index 0000000000000..5774d205b9853 --- /dev/null +++ b/internal/util/bloomfilter/bloom_filter_test.go @@ -0,0 +1,220 @@ +// Licensed to the LF AI & Data foundation under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +package bloomfilter + +import ( + "fmt" + "testing" + "time" + + "github.com/bits-and-blooms/bloom/v3" + "github.com/stretchr/testify/assert" + "go.uber.org/zap" + + "github.com/milvus-io/milvus-storage/go/common/log" +) + +func TestPerformance(t *testing.T) { + capacity := 1000000 + fpr := 0.001 + + keys := make([][]byte, 0) + for i := 0; i < capacity; i++ { + keys = append(keys, []byte(fmt.Sprintf("key%d", i))) + } + + bf1 := newBlockedBloomFilter(uint(capacity), fpr) + start1 := time.Now() + for _, key := range keys { + bf1.Add(key) + } + log.Info("Block BF construct time", zap.Duration("time", time.Since(start1))) + data, err := bf1.MarshalJSON() + assert.NoError(t, err) + log.Info("Block BF size", zap.Int("size", len(data))) + + start2 := time.Now() + for _, key := range keys { + bf1.Test(key) + } + log.Info("Block BF Test cost", zap.Duration("time", time.Since(start2))) + + bf2 := newBasicBloomFilter(uint(capacity), fpr) + start3 := time.Now() + for _, key := range keys { + bf2.Add(key) + } + log.Info("Basic BF construct time", zap.Duration("time", time.Since(start3))) + data, err = bf2.MarshalJSON() + assert.NoError(t, err) + log.Info("Basic BF size", zap.Int("size", len(data))) + + start4 := time.Now() + for _, key := range keys { + bf2.Test(key) + } + log.Info("Basic BF Test cost", zap.Duration("time", time.Since(start4))) +} + +func TestPerformance_MultiBF(t *testing.T) { + capacity := 100000 + fpr := 0.001 + + keys := make([][]byte, 0) + for i := 0; i < capacity; i++ { + keys = append(keys, []byte(fmt.Sprintf("key%d", time.Now().UnixNano()+int64(i)))) + } + + bfNum := 100 + bfs1 := make([]*blockedBloomFilter, 0) + start1 := time.Now() + for i := 0; i < bfNum; i++ { + bf1 := newBlockedBloomFilter(uint(capacity), fpr) + for _, key := range keys { + bf1.Add(key) + } + bfs1 = append(bfs1, bf1) + } + + log.Info("Block BF construct cost", zap.Duration("time", time.Since(start1))) + + start3 := time.Now() + for _, key := range keys { + locations := Locations(key, bfs1[0].K(), BlockedBF) + for i := 0; i < bfNum; i++ { + bfs1[i].TestLocations(locations) + } + } + log.Info("Block BF TestLocation cost", zap.Duration("time", time.Since(start3))) + + bfs2 := make([]*basicBloomFilter, 0) + start1 = time.Now() + for i := 0; i < bfNum; i++ { + bf2 := newBasicBloomFilter(uint(capacity), fpr) + for _, key := range keys { + bf2.Add(key) + } + bfs2 = append(bfs2, bf2) + } + + log.Info("Basic BF construct cost", zap.Duration("time", time.Since(start1))) + + start3 = time.Now() + for _, key := range keys { + locations := Locations(key, bfs1[0].K(), BasicBF) + for i := 0; i < bfNum; i++ { + bfs2[i].TestLocations(locations) + } + } + log.Info("Basic BF TestLocation cost", zap.Duration("time", time.Since(start3))) +} + +func TestPerformance_Capacity(t *testing.T) { + fpr := 0.001 + + for _, capacity := range []int64{100, 1000, 10000, 100000, 1000000} { + keys := make([][]byte, 0) + for i := 0; i < int(capacity); i++ { + keys = append(keys, []byte(fmt.Sprintf("key%d", time.Now().UnixNano()+int64(i)))) + } + + start1 := time.Now() + bf1 := newBlockedBloomFilter(uint(capacity), fpr) + for _, key := range keys { + bf1.Add(key) + } + + log.Info("Block BF construct cost", zap.Duration("time", time.Since(start1))) + + testKeys := make([][]byte, 0) + for i := 0; i < 10000; i++ { + testKeys = append(testKeys, []byte(fmt.Sprintf("key%d", time.Now().UnixNano()+int64(i)))) + } + + start3 := time.Now() + for _, key := range testKeys { + locations := Locations(key, bf1.K(), bf1.Type()) + bf1.TestLocations(locations) + } + _, k := bloom.EstimateParameters(uint(capacity), fpr) + log.Info("Block BF TestLocation cost", zap.Duration("time", time.Since(start3)), zap.Int("k", int(k)), zap.Int64("capacity", capacity)) + } +} + +func TestMarshal(t *testing.T) { + capacity := 200000 + fpr := 0.001 + + keys := make([][]byte, 0) + for i := 0; i < capacity; i++ { + keys = append(keys, []byte(fmt.Sprintf("key%d", i))) + } + + // test basic bf + basicBF := newBasicBloomFilter(uint(capacity), fpr) + for _, key := range keys { + basicBF.Add(key) + } + data, err := basicBF.MarshalJSON() + assert.NoError(t, err) + basicBF2, err := UnmarshalJSON(data, BasicBF) + assert.NoError(t, err) + assert.Equal(t, basicBF.Type(), basicBF2.Type()) + + for _, key := range keys { + assert.True(t, basicBF2.Test(key)) + } + + // test block bf + blockBF := newBlockedBloomFilter(uint(capacity), fpr) + for _, key := range keys { + blockBF.Add(key) + } + data, err = blockBF.MarshalJSON() + assert.NoError(t, err) + blockBF2, err := UnmarshalJSON(data, BlockedBF) + assert.NoError(t, err) + assert.Equal(t, blockBF.Type(), blockBF.Type()) + for _, key := range keys { + assert.True(t, blockBF2.Test(key)) + } + + // test compatible with bits-and-blooms/bloom + bf := bloom.NewWithEstimates(uint(capacity), fpr) + for _, key := range keys { + bf.Add(key) + } + data, err = bf.MarshalJSON() + assert.NoError(t, err) + bf2, err := UnmarshalJSON(data, BasicBF) + assert.NoError(t, err) + for _, key := range keys { + assert.True(t, bf2.Test(key)) + } + + // test empty bloom filter + emptyBF := AlwaysTrueBloomFilter + for _, key := range keys { + bf.Add(key) + } + data, err = emptyBF.MarshalJSON() + assert.NoError(t, err) + emptyBF2, err := UnmarshalJSON(data, AlwaysTrueBF) + assert.NoError(t, err) + for _, key := range keys { + assert.True(t, emptyBF2.Test(key)) + } +} diff --git a/pkg/util/paramtable/component_param.go b/pkg/util/paramtable/component_param.go index 85067e3bad4e1..590c7df33d6b2 100644 --- a/pkg/util/paramtable/component_param.go +++ b/pkg/util/paramtable/component_param.go @@ -245,6 +245,7 @@ type commonConfig struct { TTMsgEnabled ParamItem `refreshable:"true"` TraceLogMode ParamItem `refreshable:"true"` BloomFilterSize ParamItem `refreshable:"true"` + BloomFilterType ParamItem `refreshable:"true"` MaxBloomFalsePositive ParamItem `refreshable:"true"` PanicWhenPluginFail ParamItem `refreshable:"false"` } @@ -735,6 +736,15 @@ like the old password verification when updating the credential`, } p.BloomFilterSize.Init(base.mgr) + p.BloomFilterType = ParamItem{ + Key: "common.bloomFilterType", + Version: "2.4.3", + DefaultValue: "BlockedBloomFilter", + Doc: "bloom filter type, support BasicBloomFilter and BlockedBloomFilter", + Export: true, + } + p.BloomFilterType.Init(base.mgr) + p.MaxBloomFalsePositive = ParamItem{ Key: "common.maxBloomFalsePositive", Version: "2.3.2", diff --git a/pkg/util/paramtable/component_param_test.go b/pkg/util/paramtable/component_param_test.go index 1b4719efe3a8b..34e6d409c82bf 100644 --- a/pkg/util/paramtable/component_param_test.go +++ b/pkg/util/paramtable/component_param_test.go @@ -528,6 +528,7 @@ func TestCachedParam(t *testing.T) { assert.Equal(t, uint(100000), params.CommonCfg.BloomFilterSize.GetAsUint()) assert.Equal(t, uint(100000), params.CommonCfg.BloomFilterSize.GetAsUint()) + assert.Equal(t, "BlockedBloomFilter", params.CommonCfg.BloomFilterType.GetValue()) assert.Equal(t, uint64(8388608), params.ServiceParam.MQCfg.PursuitBufferSize.GetAsUint64()) assert.Equal(t, uint64(8388608), params.ServiceParam.MQCfg.PursuitBufferSize.GetAsUint64()) diff --git a/tests/integration/bloomfilter/bloom_filter_test.go b/tests/integration/bloomfilter/bloom_filter_test.go new file mode 100644 index 0000000000000..595ecdd025a3c --- /dev/null +++ b/tests/integration/bloomfilter/bloom_filter_test.go @@ -0,0 +1,196 @@ +// Licensed to the LF AI & Data foundation under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package bloomfilter + +import ( + "context" + "fmt" + "strconv" + "strings" + "testing" + + "github.com/golang/protobuf/proto" + "github.com/samber/lo" + "github.com/stretchr/testify/suite" + "go.uber.org/zap" + + "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" + "github.com/milvus-io/milvus-proto/go-api/v2/milvuspb" + "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" + "github.com/milvus-io/milvus/pkg/log" + "github.com/milvus-io/milvus/pkg/util/funcutil" + "github.com/milvus-io/milvus/pkg/util/merr" + "github.com/milvus-io/milvus/pkg/util/metric" + "github.com/milvus-io/milvus/pkg/util/paramtable" + "github.com/milvus-io/milvus/tests/integration" +) + +type BloomFilterTestSuit struct { + integration.MiniClusterSuite +} + +func (s *BloomFilterTestSuit) SetupSuite() { + paramtable.Init() + paramtable.Get().Save(paramtable.Get().QueryCoordCfg.BalanceCheckInterval.Key, "1000") + paramtable.Get().Save(paramtable.Get().QueryNodeCfg.GracefulStopTimeout.Key, "1") + + // disable compaction + paramtable.Get().Save(paramtable.Get().DataCoordCfg.EnableCompaction.Key, "false") + + s.Require().NoError(s.SetupEmbedEtcd()) +} + +func (s *BloomFilterTestSuit) TearDownSuite() { + defer paramtable.Get().Reset(paramtable.Get().DataCoordCfg.EnableCompaction.Key) + s.MiniClusterSuite.TearDownSuite() +} + +func (s *BloomFilterTestSuit) initCollection(collectionName string, replica int, channelNum int, segmentNum int, segmentRowNum int, segmentDeleteNum int) { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + const ( + dim = 128 + dbName = "" + ) + + schema := integration.ConstructSchema(collectionName, dim, true) + marshaledSchema, err := proto.Marshal(schema) + s.NoError(err) + + createCollectionStatus, err := s.Cluster.Proxy.CreateCollection(ctx, &milvuspb.CreateCollectionRequest{ + DbName: dbName, + CollectionName: collectionName, + Schema: marshaledSchema, + ShardsNum: int32(channelNum), + }) + s.NoError(err) + s.True(merr.Ok(createCollectionStatus)) + + log.Info("CreateCollection result", zap.Any("createCollectionStatus", createCollectionStatus)) + showCollectionsResp, err := s.Cluster.Proxy.ShowCollections(ctx, &milvuspb.ShowCollectionsRequest{}) + s.NoError(err) + s.True(merr.Ok(showCollectionsResp.Status)) + log.Info("ShowCollections result", zap.Any("showCollectionsResp", showCollectionsResp)) + + for i := 0; i < segmentNum; i++ { + // change bf type in real time + if i%2 == 0 { + paramtable.Get().Save(paramtable.Get().CommonCfg.BloomFilterType.Key, "BasicBloomFilter") + } else { + paramtable.Get().Save(paramtable.Get().CommonCfg.BloomFilterType.Key, "BlockedBloomFilter") + } + + fVecColumn := integration.NewFloatVectorFieldData(integration.FloatVecField, segmentRowNum, dim) + hashKeys := integration.GenerateHashKeys(segmentRowNum) + insertResult, err := s.Cluster.Proxy.Insert(ctx, &milvuspb.InsertRequest{ + DbName: dbName, + CollectionName: collectionName, + FieldsData: []*schemapb.FieldData{fVecColumn}, + HashKeys: hashKeys, + NumRows: uint32(segmentRowNum), + }) + s.NoError(err) + s.True(merr.Ok(insertResult.Status)) + + if segmentDeleteNum > 0 { + if segmentDeleteNum > segmentRowNum { + segmentDeleteNum = segmentRowNum + } + + pks := insertResult.GetIDs().GetIntId().GetData()[:segmentDeleteNum] + log.Info("========================delete expr==================", + zap.Int("length of pk", len(pks)), + ) + + expr := fmt.Sprintf("%s in [%s]", integration.Int64Field, strings.Join(lo.Map(pks, func(pk int64, _ int) string { return strconv.FormatInt(pk, 10) }), ",")) + + deleteResp, err := s.Cluster.Proxy.Delete(ctx, &milvuspb.DeleteRequest{ + CollectionName: collectionName, + Expr: expr, + }) + s.Require().NoError(err) + s.Require().True(merr.Ok(deleteResp.GetStatus())) + s.Require().EqualValues(len(pks), deleteResp.GetDeleteCnt()) + } + + // flush + flushResp, err := s.Cluster.Proxy.Flush(ctx, &milvuspb.FlushRequest{ + DbName: dbName, + CollectionNames: []string{collectionName}, + }) + s.NoError(err) + segmentIDs, has := flushResp.GetCollSegIDs()[collectionName] + ids := segmentIDs.GetData() + s.Require().NotEmpty(segmentIDs) + s.Require().True(has) + flushTs, has := flushResp.GetCollFlushTs()[collectionName] + s.True(has) + s.WaitForFlush(ctx, ids, flushTs, dbName, collectionName) + } + + // create index + createIndexStatus, err := s.Cluster.Proxy.CreateIndex(ctx, &milvuspb.CreateIndexRequest{ + CollectionName: collectionName, + FieldName: integration.FloatVecField, + IndexName: "_default", + ExtraParams: integration.ConstructIndexParam(dim, integration.IndexFaissIvfFlat, metric.L2), + }) + s.NoError(err) + s.True(merr.Ok(createIndexStatus)) + s.WaitForIndexBuilt(ctx, collectionName, integration.FloatVecField) + + for i := 1; i < replica; i++ { + s.Cluster.AddQueryNode() + } + + // load + loadStatus, err := s.Cluster.Proxy.LoadCollection(ctx, &milvuspb.LoadCollectionRequest{ + DbName: dbName, + CollectionName: collectionName, + ReplicaNumber: int32(replica), + }) + s.NoError(err) + s.Equal(commonpb.ErrorCode_Success, loadStatus.GetErrorCode()) + s.True(merr.Ok(loadStatus)) + s.WaitForLoad(ctx, collectionName) + log.Info("initCollection Done") +} + +func (s *BloomFilterTestSuit) TestLoadAndQuery() { + name := "test_balance_" + funcutil.GenRandomStr() + s.initCollection(name, 1, 2, 10, 2000, 500) + + ctx := context.Background() + queryResult, err := s.Cluster.Proxy.Query(ctx, &milvuspb.QueryRequest{ + DbName: "", + CollectionName: name, + Expr: "", + OutputFields: []string{"count(*)"}, + }) + if !merr.Ok(queryResult.GetStatus()) { + log.Warn("searchResult fail reason", zap.String("reason", queryResult.GetStatus().GetReason())) + } + s.NoError(err) + s.True(merr.Ok(queryResult.GetStatus())) + numEntities := queryResult.FieldsData[0].GetScalars().GetLongData().Data[0] + s.Equal(numEntities, int64(15000)) +} + +func TestBloomFilter(t *testing.T) { + suite.Run(t, new(BloomFilterTestSuit)) +} From 2c7bb0b8acb6dd0ba9de0bf6d4999783a7b85eaf Mon Sep 17 00:00:00 2001 From: smellthemoon <64083300+smellthemoon@users.noreply.github.com> Date: Fri, 31 May 2024 18:05:45 +0800 Subject: [PATCH 111/126] fix: replace removeWithPrefix with remove to avoid delete redundantly (#33328) #33288 --------- Signed-off-by: lixinguo Co-authored-by: lixinguo --- internal/kv/kv.go | 1 + internal/kv/mock_snapshot_kv.go | 8 ++ internal/kv/mock_snapshot_kv_test.go | 16 ++++ internal/kv/mocks/snapshot_kv.go | 44 +++++++++++ internal/metastore/kv/rootcoord/kv_catalog.go | 25 +++--- .../metastore/kv/rootcoord/kv_catalog_test.go | 53 ++++++++----- .../metastore/kv/rootcoord/suffix_snapshot.go | 48 ++++++++++++ .../kv/rootcoord/suffix_snapshot_test.go | 76 +++++++++++++++++++ 8 files changed, 234 insertions(+), 37 deletions(-) diff --git a/internal/kv/kv.go b/internal/kv/kv.go index 14091cdc1e842..929febe2c8080 100644 --- a/internal/kv/kv.go +++ b/internal/kv/kv.go @@ -91,5 +91,6 @@ type SnapShotKV interface { Load(key string, ts typeutil.Timestamp) (string, error) MultiSave(kvs map[string]string, ts typeutil.Timestamp) error LoadWithPrefix(key string, ts typeutil.Timestamp) ([]string, []string, error) + MultiSaveAndRemove(saves map[string]string, removals []string, ts typeutil.Timestamp) error MultiSaveAndRemoveWithPrefix(saves map[string]string, removals []string, ts typeutil.Timestamp) error } diff --git a/internal/kv/mock_snapshot_kv.go b/internal/kv/mock_snapshot_kv.go index 35cc851853dc6..9eed834997324 100644 --- a/internal/kv/mock_snapshot_kv.go +++ b/internal/kv/mock_snapshot_kv.go @@ -11,6 +11,7 @@ type mockSnapshotKV struct { MultiSaveFunc func(kvs map[string]string, ts typeutil.Timestamp) error LoadWithPrefixFunc func(key string, ts typeutil.Timestamp) ([]string, []string, error) MultiSaveAndRemoveWithPrefixFunc func(saves map[string]string, removals []string, ts typeutil.Timestamp) error + MultiSaveAndRemoveFunc func(saves map[string]string, removals []string, ts typeutil.Timestamp) error } func NewMockSnapshotKV() *mockSnapshotKV { @@ -51,3 +52,10 @@ func (m mockSnapshotKV) MultiSaveAndRemoveWithPrefix(saves map[string]string, re } return nil } + +func (m mockSnapshotKV) MultiSaveAndRemove(saves map[string]string, removals []string, ts typeutil.Timestamp) error { + if m.MultiSaveAndRemoveFunc != nil { + return m.MultiSaveAndRemoveFunc(saves, removals, ts) + } + return nil +} diff --git a/internal/kv/mock_snapshot_kv_test.go b/internal/kv/mock_snapshot_kv_test.go index 94e6f2136afb7..0b2df70f9173f 100644 --- a/internal/kv/mock_snapshot_kv_test.go +++ b/internal/kv/mock_snapshot_kv_test.go @@ -87,3 +87,19 @@ func Test_mockSnapshotKV_MultiSaveAndRemoveWithPrefix(t *testing.T) { assert.NoError(t, err) }) } + +func Test_mockSnapshotKV_MultiSaveAndRemove(t *testing.T) { + t.Run("func not set", func(t *testing.T) { + snapshot := NewMockSnapshotKV() + err := snapshot.MultiSaveAndRemove(nil, nil, 0) + assert.NoError(t, err) + }) + t.Run("func set", func(t *testing.T) { + snapshot := NewMockSnapshotKV() + snapshot.MultiSaveAndRemoveWithPrefixFunc = func(saves map[string]string, removals []string, ts typeutil.Timestamp) error { + return nil + } + err := snapshot.MultiSaveAndRemove(nil, nil, 0) + assert.NoError(t, err) + }) +} diff --git a/internal/kv/mocks/snapshot_kv.go b/internal/kv/mocks/snapshot_kv.go index e1e4ef7c1c3f2..dc2de1d78379b 100644 --- a/internal/kv/mocks/snapshot_kv.go +++ b/internal/kv/mocks/snapshot_kv.go @@ -177,6 +177,50 @@ func (_c *SnapShotKV_MultiSave_Call) RunAndReturn(run func(map[string]string, ui return _c } +// MultiSaveAndRemove provides a mock function with given fields: saves, removals, ts +func (_m *SnapShotKV) MultiSaveAndRemove(saves map[string]string, removals []string, ts uint64) error { + ret := _m.Called(saves, removals, ts) + + var r0 error + if rf, ok := ret.Get(0).(func(map[string]string, []string, uint64) error); ok { + r0 = rf(saves, removals, ts) + } else { + r0 = ret.Error(0) + } + + return r0 +} + +// SnapShotKV_MultiSaveAndRemove_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'MultiSaveAndRemove' +type SnapShotKV_MultiSaveAndRemove_Call struct { + *mock.Call +} + +// MultiSaveAndRemove is a helper method to define mock.On call +// - saves map[string]string +// - removals []string +// - ts uint64 +func (_e *SnapShotKV_Expecter) MultiSaveAndRemove(saves interface{}, removals interface{}, ts interface{}) *SnapShotKV_MultiSaveAndRemove_Call { + return &SnapShotKV_MultiSaveAndRemove_Call{Call: _e.mock.On("MultiSaveAndRemove", saves, removals, ts)} +} + +func (_c *SnapShotKV_MultiSaveAndRemove_Call) Run(run func(saves map[string]string, removals []string, ts uint64)) *SnapShotKV_MultiSaveAndRemove_Call { + _c.Call.Run(func(args mock.Arguments) { + run(args[0].(map[string]string), args[1].([]string), args[2].(uint64)) + }) + return _c +} + +func (_c *SnapShotKV_MultiSaveAndRemove_Call) Return(_a0 error) *SnapShotKV_MultiSaveAndRemove_Call { + _c.Call.Return(_a0) + return _c +} + +func (_c *SnapShotKV_MultiSaveAndRemove_Call) RunAndReturn(run func(map[string]string, []string, uint64) error) *SnapShotKV_MultiSaveAndRemove_Call { + _c.Call.Return(run) + return _c +} + // MultiSaveAndRemoveWithPrefix provides a mock function with given fields: saves, removals, ts func (_m *SnapShotKV) MultiSaveAndRemoveWithPrefix(saves map[string]string, removals []string, ts uint64) error { ret := _m.Called(saves, removals, ts) diff --git a/internal/metastore/kv/rootcoord/kv_catalog.go b/internal/metastore/kv/rootcoord/kv_catalog.go index 9edcfe13f6be5..916195598efc0 100644 --- a/internal/metastore/kv/rootcoord/kv_catalog.go +++ b/internal/metastore/kv/rootcoord/kv_catalog.go @@ -4,7 +4,6 @@ import ( "context" "encoding/json" "fmt" - "sort" "github.com/cockroachdb/errors" "github.com/golang/protobuf/proto" @@ -85,7 +84,7 @@ func BuildAliasPrefixWithDB(dbID int64) string { // since SnapshotKV may save both snapshot key and the original key if the original key is newest // MaxEtcdTxnNum need to divided by 2 -func batchMultiSaveAndRemoveWithPrefix(snapshot kv.SnapShotKV, limit int, saves map[string]string, removals []string, ts typeutil.Timestamp) error { +func batchMultiSaveAndRemove(snapshot kv.SnapShotKV, limit int, saves map[string]string, removals []string, ts typeutil.Timestamp) error { saveFn := func(partialKvs map[string]string) error { return snapshot.MultiSave(partialKvs, ts) } @@ -93,14 +92,8 @@ func batchMultiSaveAndRemoveWithPrefix(snapshot kv.SnapShotKV, limit int, saves return err } - // avoid a case that the former key is the prefix of the later key. - // for example, `root-coord/fields/collection_id/1` is the prefix of `root-coord/fields/collection_id/100`. - sort.Slice(removals, func(i, j int) bool { - return removals[i] > removals[j] - }) - removeFn := func(partialKeys []string) error { - return snapshot.MultiSaveAndRemoveWithPrefix(nil, partialKeys, ts) + return snapshot.MultiSaveAndRemove(nil, partialKeys, ts) } return etcd.RemoveByBatchWithLimit(removals, limit, removeFn) } @@ -127,7 +120,7 @@ func (kc *Catalog) AlterDatabase(ctx context.Context, newColl *model.Database, t func (kc *Catalog) DropDatabase(ctx context.Context, dbID int64, ts typeutil.Timestamp) error { key := BuildDatabaseKey(dbID) - return kc.Snapshot.MultiSaveAndRemoveWithPrefix(nil, []string{key}, ts) + return kc.Snapshot.MultiSaveAndRemove(nil, []string{key}, ts) } func (kc *Catalog) ListDatabases(ctx context.Context, ts typeutil.Timestamp) ([]*model.Database, error) { @@ -300,7 +293,7 @@ func (kc *Catalog) CreateAlias(ctx context.Context, alias *model.Alias, ts typeu return err } kvs := map[string]string{k: string(v)} - return kc.Snapshot.MultiSaveAndRemoveWithPrefix(kvs, []string{oldKBefore210, oldKeyWithoutDb}, ts) + return kc.Snapshot.MultiSaveAndRemove(kvs, []string{oldKBefore210, oldKeyWithoutDb}, ts) } func (kc *Catalog) CreateCredential(ctx context.Context, credential *model.Credential) error { @@ -455,12 +448,12 @@ func (kc *Catalog) DropCollection(ctx context.Context, collectionInfo *model.Col // However, if we remove collection first, we cannot remove other metas. // since SnapshotKV may save both snapshot key and the original key if the original key is newest // MaxEtcdTxnNum need to divided by 2 - if err := batchMultiSaveAndRemoveWithPrefix(kc.Snapshot, util.MaxEtcdTxnNum/2, nil, delMetakeysSnap, ts); err != nil { + if err := batchMultiSaveAndRemove(kc.Snapshot, util.MaxEtcdTxnNum/2, nil, delMetakeysSnap, ts); err != nil { return err } // if we found collection dropping, we should try removing related resources. - return kc.Snapshot.MultiSaveAndRemoveWithPrefix(nil, collectionKeys, ts) + return kc.Snapshot.MultiSaveAndRemove(nil, collectionKeys, ts) } func (kc *Catalog) alterModifyCollection(oldColl *model.Collection, newColl *model.Collection, ts typeutil.Timestamp) error { @@ -491,7 +484,7 @@ func (kc *Catalog) alterModifyCollection(oldColl *model.Collection, newColl *mod if oldKey == newKey { return kc.Snapshot.Save(newKey, string(value), ts) } - return kc.Snapshot.MultiSaveAndRemoveWithPrefix(saves, []string{oldKey}, ts) + return kc.Snapshot.MultiSaveAndRemove(saves, []string{oldKey}, ts) } func (kc *Catalog) AlterCollection(ctx context.Context, oldColl *model.Collection, newColl *model.Collection, alterType metastore.AlterType, ts typeutil.Timestamp) error { @@ -559,7 +552,7 @@ func (kc *Catalog) DropPartition(ctx context.Context, dbID int64, collectionID t if partitionVersionAfter210(collMeta) { k := BuildPartitionKey(collectionID, partitionID) - return kc.Snapshot.MultiSaveAndRemoveWithPrefix(nil, []string{k}, ts) + return kc.Snapshot.MultiSaveAndRemove(nil, []string{k}, ts) } k := BuildCollectionKey(util.NonDBID, collectionID) @@ -601,7 +594,7 @@ func (kc *Catalog) DropAlias(ctx context.Context, dbID int64, alias string, ts t oldKBefore210 := BuildAliasKey210(alias) oldKeyWithoutDb := BuildAliasKey(alias) k := BuildAliasKeyWithDB(dbID, alias) - return kc.Snapshot.MultiSaveAndRemoveWithPrefix(nil, []string{k, oldKeyWithoutDb, oldKBefore210}, ts) + return kc.Snapshot.MultiSaveAndRemove(nil, []string{k, oldKeyWithoutDb, oldKBefore210}, ts) } func (kc *Catalog) GetCollectionByName(ctx context.Context, dbID int64, collectionName string, ts typeutil.Timestamp) (*model.Collection, error) { diff --git a/internal/metastore/kv/rootcoord/kv_catalog_test.go b/internal/metastore/kv/rootcoord/kv_catalog_test.go index 7523c821677d5..5cb3c0f293d12 100644 --- a/internal/metastore/kv/rootcoord/kv_catalog_test.go +++ b/internal/metastore/kv/rootcoord/kv_catalog_test.go @@ -495,7 +495,7 @@ func TestCatalog_CreateAliasV2(t *testing.T) { ctx := context.Background() snapshot := kv.NewMockSnapshotKV() - snapshot.MultiSaveAndRemoveWithPrefixFunc = func(saves map[string]string, removals []string, ts typeutil.Timestamp) error { + snapshot.MultiSaveAndRemoveFunc = func(saves map[string]string, removals []string, ts typeutil.Timestamp) error { return errors.New("mock") } @@ -504,7 +504,7 @@ func TestCatalog_CreateAliasV2(t *testing.T) { err := kc.CreateAlias(ctx, &model.Alias{}, 0) assert.Error(t, err) - snapshot.MultiSaveAndRemoveWithPrefixFunc = func(saves map[string]string, removals []string, ts typeutil.Timestamp) error { + snapshot.MultiSaveAndRemoveFunc = func(saves map[string]string, removals []string, ts typeutil.Timestamp) error { return nil } err = kc.CreateAlias(ctx, &model.Alias{}, 0) @@ -623,7 +623,7 @@ func TestCatalog_AlterAliasV2(t *testing.T) { ctx := context.Background() snapshot := kv.NewMockSnapshotKV() - snapshot.MultiSaveAndRemoveWithPrefixFunc = func(saves map[string]string, removals []string, ts typeutil.Timestamp) error { + snapshot.MultiSaveAndRemoveFunc = func(saves map[string]string, removals []string, ts typeutil.Timestamp) error { return errors.New("mock") } @@ -632,7 +632,7 @@ func TestCatalog_AlterAliasV2(t *testing.T) { err := kc.AlterAlias(ctx, &model.Alias{}, 0) assert.Error(t, err) - snapshot.MultiSaveAndRemoveWithPrefixFunc = func(saves map[string]string, removals []string, ts typeutil.Timestamp) error { + snapshot.MultiSaveAndRemoveFunc = func(saves map[string]string, removals []string, ts typeutil.Timestamp) error { return nil } err = kc.AlterAlias(ctx, &model.Alias{}, 0) @@ -706,7 +706,7 @@ func TestCatalog_DropPartitionV2(t *testing.T) { snapshot.LoadFunc = func(key string, ts typeutil.Timestamp) (string, error) { return string(value), nil } - snapshot.MultiSaveAndRemoveWithPrefixFunc = func(saves map[string]string, removals []string, ts typeutil.Timestamp) error { + snapshot.MultiSaveAndRemoveFunc = func(saves map[string]string, removals []string, ts typeutil.Timestamp) error { return errors.New("mock") } @@ -715,7 +715,7 @@ func TestCatalog_DropPartitionV2(t *testing.T) { err = kc.DropPartition(ctx, 0, 100, 101, 0) assert.Error(t, err) - snapshot.MultiSaveAndRemoveWithPrefixFunc = func(saves map[string]string, removals []string, ts typeutil.Timestamp) error { + snapshot.MultiSaveAndRemoveFunc = func(saves map[string]string, removals []string, ts typeutil.Timestamp) error { return nil } err = kc.DropPartition(ctx, 0, 100, 101, 0) @@ -758,7 +758,7 @@ func TestCatalog_DropAliasV2(t *testing.T) { ctx := context.Background() snapshot := kv.NewMockSnapshotKV() - snapshot.MultiSaveAndRemoveWithPrefixFunc = func(saves map[string]string, removals []string, ts typeutil.Timestamp) error { + snapshot.MultiSaveAndRemoveFunc = func(saves map[string]string, removals []string, ts typeutil.Timestamp) error { return errors.New("mock") } @@ -767,7 +767,7 @@ func TestCatalog_DropAliasV2(t *testing.T) { err := kc.DropAlias(ctx, testDb, "alias", 0) assert.Error(t, err) - snapshot.MultiSaveAndRemoveWithPrefixFunc = func(saves map[string]string, removals []string, ts typeutil.Timestamp) error { + snapshot.MultiSaveAndRemoveFunc = func(saves map[string]string, removals []string, ts typeutil.Timestamp) error { return nil } err = kc.DropAlias(ctx, testDb, "alias", 0) @@ -942,14 +942,14 @@ func TestCatalog_ListAliasesV2(t *testing.T) { }) } -func Test_batchMultiSaveAndRemoveWithPrefix(t *testing.T) { +func Test_batchMultiSaveAndRemove(t *testing.T) { t.Run("failed to save", func(t *testing.T) { snapshot := kv.NewMockSnapshotKV() snapshot.MultiSaveFunc = func(kvs map[string]string, ts typeutil.Timestamp) error { return errors.New("error mock MultiSave") } saves := map[string]string{"k": "v"} - err := batchMultiSaveAndRemoveWithPrefix(snapshot, util.MaxEtcdTxnNum/2, saves, []string{}, 0) + err := batchMultiSaveAndRemove(snapshot, util.MaxEtcdTxnNum/2, saves, []string{}, 0) assert.Error(t, err) }) t.Run("failed to remove", func(t *testing.T) { @@ -957,12 +957,12 @@ func Test_batchMultiSaveAndRemoveWithPrefix(t *testing.T) { snapshot.MultiSaveFunc = func(kvs map[string]string, ts typeutil.Timestamp) error { return nil } - snapshot.MultiSaveAndRemoveWithPrefixFunc = func(saves map[string]string, removals []string, ts typeutil.Timestamp) error { - return errors.New("error mock MultiSaveAndRemoveWithPrefix") + snapshot.MultiSaveAndRemoveFunc = func(saves map[string]string, removals []string, ts typeutil.Timestamp) error { + return errors.New("error mock MultiSaveAndRemove") } saves := map[string]string{"k": "v"} removals := []string{"prefix1", "prefix2"} - err := batchMultiSaveAndRemoveWithPrefix(snapshot, util.MaxEtcdTxnNum/2, saves, removals, 0) + err := batchMultiSaveAndRemove(snapshot, util.MaxEtcdTxnNum/2, saves, removals, 0) assert.Error(t, err) }) t.Run("normal case", func(t *testing.T) { @@ -971,7 +971,7 @@ func Test_batchMultiSaveAndRemoveWithPrefix(t *testing.T) { log.Info("multi save", zap.Any("len", len(kvs)), zap.Any("saves", kvs)) return nil } - snapshot.MultiSaveAndRemoveWithPrefixFunc = func(saves map[string]string, removals []string, ts typeutil.Timestamp) error { + snapshot.MultiSaveAndRemoveFunc = func(saves map[string]string, removals []string, ts typeutil.Timestamp) error { log.Info("multi save and remove with prefix", zap.Any("len of saves", len(saves)), zap.Any("len of removals", len(removals)), zap.Any("saves", saves), zap.Any("removals", removals)) return nil @@ -983,7 +983,7 @@ func Test_batchMultiSaveAndRemoveWithPrefix(t *testing.T) { saves[fmt.Sprintf("k%d", i)] = fmt.Sprintf("v%d", i) removals = append(removals, fmt.Sprintf("k%d", i)) } - err := batchMultiSaveAndRemoveWithPrefix(snapshot, util.MaxEtcdTxnNum/2, saves, removals, 0) + err := batchMultiSaveAndRemove(snapshot, util.MaxEtcdTxnNum/2, saves, removals, 0) assert.NoError(t, err) }) } @@ -1040,7 +1040,7 @@ func TestCatalog_AlterCollection(t *testing.T) { t.Run("modify db name", func(t *testing.T) { var collectionID int64 = 1 snapshot := kv.NewMockSnapshotKV() - snapshot.MultiSaveAndRemoveWithPrefixFunc = func(saves map[string]string, removals []string, ts typeutil.Timestamp) error { + snapshot.MultiSaveAndRemoveFunc = func(saves map[string]string, removals []string, ts typeutil.Timestamp) error { assert.ElementsMatch(t, []string{BuildCollectionKey(0, collectionID)}, removals) assert.Equal(t, len(saves), 1) assert.Contains(t, maps.Keys(saves), BuildCollectionKey(1, collectionID)) @@ -1149,6 +1149,17 @@ func withMockMultiSaveAndRemoveWithPrefix(err error) mockSnapshotOpt { } } +func withMockMultiSaveAndRemove(err error) mockSnapshotOpt { + return func(ss *mocks.SnapShotKV) { + ss.On( + "MultiSaveAndRemove", + mock.AnythingOfType("map[string]string"), + mock.AnythingOfType("[]string"), + mock.AnythingOfType("uint64")). + Return(err) + } +} + func TestCatalog_CreateCollection(t *testing.T) { t.Run("collection not creating", func(t *testing.T) { kc := &Catalog{} @@ -1198,7 +1209,7 @@ func TestCatalog_CreateCollection(t *testing.T) { func TestCatalog_DropCollection(t *testing.T) { t.Run("failed to remove", func(t *testing.T) { - mockSnapshot := newMockSnapshot(t, withMockMultiSaveAndRemoveWithPrefix(errors.New("error mock MultiSaveAndRemoveWithPrefix"))) + mockSnapshot := newMockSnapshot(t, withMockMultiSaveAndRemove(errors.New("error mock MultiSaveAndRemove"))) kc := &Catalog{Snapshot: mockSnapshot} ctx := context.Background() coll := &model.Collection{ @@ -1216,7 +1227,7 @@ func TestCatalog_DropCollection(t *testing.T) { removeOtherCalled := false removeCollectionCalled := false mockSnapshot.On( - "MultiSaveAndRemoveWithPrefix", + "MultiSaveAndRemove", mock.AnythingOfType("map[string]string"), mock.AnythingOfType("[]string"), mock.AnythingOfType("uint64")). @@ -1225,13 +1236,13 @@ func TestCatalog_DropCollection(t *testing.T) { return nil }).Once() mockSnapshot.On( - "MultiSaveAndRemoveWithPrefix", + "MultiSaveAndRemove", mock.AnythingOfType("map[string]string"), mock.AnythingOfType("[]string"), mock.AnythingOfType("uint64")). Return(func(map[string]string, []string, typeutil.Timestamp) error { removeCollectionCalled = true - return errors.New("error mock MultiSaveAndRemoveWithPrefix") + return errors.New("error mock MultiSaveAndRemove") }).Once() kc := &Catalog{Snapshot: mockSnapshot} ctx := context.Background() @@ -1248,7 +1259,7 @@ func TestCatalog_DropCollection(t *testing.T) { }) t.Run("normal case", func(t *testing.T) { - mockSnapshot := newMockSnapshot(t, withMockMultiSaveAndRemoveWithPrefix(nil)) + mockSnapshot := newMockSnapshot(t, withMockMultiSaveAndRemove(nil)) kc := &Catalog{Snapshot: mockSnapshot} ctx := context.Background() coll := &model.Collection{ diff --git a/internal/metastore/kv/rootcoord/suffix_snapshot.go b/internal/metastore/kv/rootcoord/suffix_snapshot.go index f945dc958d3b7..af443ffc7c6f7 100644 --- a/internal/metastore/kv/rootcoord/suffix_snapshot.go +++ b/internal/metastore/kv/rootcoord/suffix_snapshot.go @@ -35,6 +35,7 @@ import ( "github.com/milvus-io/milvus/pkg/log" "github.com/milvus-io/milvus/pkg/util" "github.com/milvus-io/milvus/pkg/util/etcd" + "github.com/milvus-io/milvus/pkg/util/merr" "github.com/milvus-io/milvus/pkg/util/retry" "github.com/milvus-io/milvus/pkg/util/tsoutil" "github.com/milvus-io/milvus/pkg/util/typeutil" @@ -502,6 +503,53 @@ func (ss *SuffixSnapshot) LoadWithPrefix(key string, ts typeutil.Timestamp) ([]s return resultKeys, resultValues, nil } +// MultiSaveAndRemove save muiltple kvs and remove as well +// if ts == 0, act like MetaKv +// each key-value will be treated in same logic like Save +func (ss *SuffixSnapshot) MultiSaveAndRemove(saves map[string]string, removals []string, ts typeutil.Timestamp) error { + // if ts == 0, act like MetaKv + if ts == 0 { + return ss.MetaKv.MultiSaveAndRemove(saves, removals) + } + ss.Lock() + defer ss.Unlock() + var err error + + // process each key, checks whether is the latest + execute, updateList, err := ss.generateSaveExecute(saves, ts) + if err != nil { + return err + } + + // load each removal, change execution to adding tombstones + for _, removal := range removals { + value, err := ss.MetaKv.Load(removal) + if err != nil { + log.Warn("SuffixSnapshot MetaKv Load failed", zap.String("key", removal), zap.Error(err)) + if errors.Is(err, merr.ErrIoKeyNotFound) { + continue + } + return err + } + // add tombstone to original key and add ts entry + if IsTombstone(value) { + continue + } + execute[removal] = string(SuffixSnapshotTombstone) + execute[ss.composeTSKey(removal, ts)] = string(SuffixSnapshotTombstone) + updateList = append(updateList, removal) + } + + // multi save execute map; if succeeds, update ts in the update list + err = ss.MetaKv.MultiSave(execute) + if err == nil { + for _, key := range updateList { + ss.lastestTS[key] = ts + } + } + return err +} + // MultiSaveAndRemoveWithPrefix save muiltple kvs and remove as well // if ts == 0, act like MetaKv // each key-value will be treated in same logic like Save diff --git a/internal/metastore/kv/rootcoord/suffix_snapshot_test.go b/internal/metastore/kv/rootcoord/suffix_snapshot_test.go index 5efc00680def2..6d76e544700ac 100644 --- a/internal/metastore/kv/rootcoord/suffix_snapshot_test.go +++ b/internal/metastore/kv/rootcoord/suffix_snapshot_test.go @@ -673,6 +673,82 @@ func Test_SuffixSnapshotMultiSaveAndRemoveWithPrefix(t *testing.T) { ss.MultiSaveAndRemoveWithPrefix(map[string]string{}, []string{""}, 0) } +func Test_SuffixSnapshotMultiSaveAndRemove(t *testing.T) { + rand.Seed(time.Now().UnixNano()) + randVal := rand.Int() + + rootPath := fmt.Sprintf("/test/meta/%d", randVal) + sep := "_ts" + + etcdCli, err := etcd.GetEtcdClient( + Params.EtcdCfg.UseEmbedEtcd.GetAsBool(), + Params.EtcdCfg.EtcdUseSSL.GetAsBool(), + Params.EtcdCfg.Endpoints.GetAsStrings(), + Params.EtcdCfg.EtcdTLSCert.GetValue(), + Params.EtcdCfg.EtcdTLSKey.GetValue(), + Params.EtcdCfg.EtcdTLSCACert.GetValue(), + Params.EtcdCfg.EtcdTLSMinVersion.GetValue()) + require.Nil(t, err) + defer etcdCli.Close() + etcdkv := etcdkv.NewEtcdKV(etcdCli, rootPath) + require.Nil(t, err) + defer etcdkv.Close() + + var vtso typeutil.Timestamp + ftso := func() typeutil.Timestamp { + return vtso + } + + ss, err := NewSuffixSnapshot(etcdkv, sep, rootPath, snapshotPrefix) + assert.NoError(t, err) + assert.NotNil(t, ss) + defer ss.Close() + + for i := 0; i < 20; i++ { + vtso = typeutil.Timestamp(100 + i*5) + ts := ftso() + err = ss.Save(fmt.Sprintf("kd-%04d", i), fmt.Sprintf("value-%d", i), ts) + assert.NoError(t, err) + assert.Equal(t, vtso, ts) + } + for i := 20; i < 40; i++ { + sm := map[string]string{"ks": fmt.Sprintf("value-%d", i)} + dm := []string{fmt.Sprintf("kd-%04d", i-20)} + vtso = typeutil.Timestamp(100 + i*5) + ts := ftso() + err = ss.MultiSaveAndRemove(sm, dm, ts) + assert.NoError(t, err) + assert.Equal(t, vtso, ts) + } + for i := 0; i < 20; i++ { + val, err := ss.Load(fmt.Sprintf("kd-%04d", i), typeutil.Timestamp(100+i*5+2)) + assert.NoError(t, err) + assert.Equal(t, fmt.Sprintf("value-%d", i), val) + _, vals, err := ss.LoadWithPrefix("kd-", typeutil.Timestamp(100+i*5+2)) + assert.NoError(t, err) + assert.Equal(t, i+1, len(vals)) + } + for i := 20; i < 40; i++ { + val, err := ss.Load("ks", typeutil.Timestamp(100+i*5+2)) + assert.NoError(t, err) + assert.Equal(t, fmt.Sprintf("value-%d", i), val) + _, vals, err := ss.LoadWithPrefix("kd-", typeutil.Timestamp(100+i*5+2)) + assert.NoError(t, err) + assert.Equal(t, 39-i, len(vals)) + } + + // try to load + _, err = ss.Load("kd-0000", 500) + assert.Error(t, err) + _, err = ss.Load("kd-0000", 0) + assert.Error(t, err) + _, err = ss.Load("kd-0000", 1) + assert.Error(t, err) + + // cleanup + ss.MultiSaveAndRemoveWithPrefix(map[string]string{}, []string{""}, 0) +} + func TestSuffixSnapshot_LoadWithPrefix(t *testing.T) { rand.Seed(time.Now().UnixNano()) randVal := rand.Int() From 03826286684303af5c4395bed80392bfad6c3e1f Mon Sep 17 00:00:00 2001 From: XuanYang-cn Date: Mon, 3 Jun 2024 10:19:49 +0800 Subject: [PATCH 112/126] enhance: Add more tracing for l0 compactor (#33435) Signed-off-by: yangxuan --- internal/datanode/data_sync_service.go | 6 +++++- internal/datanode/l0_compactor.go | 21 +++++++++++---------- internal/datanode/l0_compactor_test.go | 4 ++-- 3 files changed, 18 insertions(+), 13 deletions(-) diff --git a/internal/datanode/data_sync_service.go b/internal/datanode/data_sync_service.go index ca744d239f1bf..1a3ff514bb5c6 100644 --- a/internal/datanode/data_sync_service.go +++ b/internal/datanode/data_sync_service.go @@ -23,6 +23,7 @@ import ( "sync" "time" + "go.opentelemetry.io/otel" "go.uber.org/zap" "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" @@ -250,8 +251,11 @@ func loadStatsV2(storageCache *metacache.StorageV2Cache, segment *datapb.Segment } func loadStats(ctx context.Context, chunkManager storage.ChunkManager, schema *schemapb.CollectionSchema, segmentID int64, statsBinlogs []*datapb.FieldBinlog) ([]*storage.PkStatistics, error) { + _, span := otel.Tracer(typeutil.DataNodeRole).Start(ctx, "loadStats") + defer span.End() + startTs := time.Now() - log := log.With(zap.Int64("segmentID", segmentID)) + log := log.Ctx(ctx).With(zap.Int64("segmentID", segmentID)) log.Info("begin to init pk bloom filter", zap.Int("statsBinLogsLen", len(statsBinlogs))) pkField, err := typeutil.GetPrimaryFieldSchema(schema) diff --git a/internal/datanode/l0_compactor.go b/internal/datanode/l0_compactor.go index f04cc280c5c41..f3367b639feb5 100644 --- a/internal/datanode/l0_compactor.go +++ b/internal/datanode/l0_compactor.go @@ -201,7 +201,7 @@ func (t *levelZeroCompactionTask) linearProcess(ctx context.Context, targetSegme alteredSegments = make(map[int64]*storage.DeleteData) ) - segmentBFs, err := t.loadBF(targetSegments) + segmentBFs, err := t.loadBF(ctx, targetSegments) if err != nil { return nil, err } @@ -254,7 +254,7 @@ func (t *levelZeroCompactionTask) batchProcess(ctx context.Context, targetSegmen return nil, err } - segmentBFs, err := t.loadBF(targetSegments) + segmentBFs, err := t.loadBF(ctx, targetSegments) if err != nil { return nil, err } @@ -420,11 +420,9 @@ func (t *levelZeroCompactionTask) uploadByCheck(ctx context.Context, requireChec return nil } -func (t *levelZeroCompactionTask) loadBF(targetSegments []*datapb.CompactionSegmentBinlogs) (map[int64]*metacache.BloomFilterSet, error) { - log := log.Ctx(t.ctx).With( - zap.Int64("planID", t.plan.GetPlanID()), - zap.String("type", t.plan.GetType().String()), - ) +func (t *levelZeroCompactionTask) loadBF(ctx context.Context, targetSegments []*datapb.CompactionSegmentBinlogs) (map[int64]*metacache.BloomFilterSet, error) { + _, span := otel.Tracer(typeutil.DataNodeRole).Start(ctx, "L0Compact loadBF") + defer span.End() var ( futures = make([]*conc.Future[any], 0, len(targetSegments)) @@ -436,13 +434,16 @@ func (t *levelZeroCompactionTask) loadBF(targetSegments []*datapb.CompactionSegm for _, segment := range targetSegments { segment := segment + innerCtx := ctx future := pool.Submit(func() (any, error) { _ = binlog.DecompressBinLog(storage.StatsBinlog, segment.GetCollectionID(), segment.GetPartitionID(), segment.GetSegmentID(), segment.GetField2StatslogPaths()) - pks, err := loadStats(t.ctx, t.cm, - t.plan.GetSchema(), segment.GetSegmentID(), segment.GetField2StatslogPaths()) + pks, err := loadStats(innerCtx, t.cm, t.plan.GetSchema(), segment.GetSegmentID(), segment.GetField2StatslogPaths()) if err != nil { - log.Warn("failed to load segment stats log", zap.Error(err)) + log.Warn("failed to load segment stats log", + zap.Int64("planID", t.plan.GetPlanID()), + zap.String("type", t.plan.GetType().String()), + zap.Error(err)) return err, err } bf := metacache.NewBloomFilterSet(pks...) diff --git a/internal/datanode/l0_compactor_test.go b/internal/datanode/l0_compactor_test.go index 9aad1fb685443..8c833df21f69c 100644 --- a/internal/datanode/l0_compactor_test.go +++ b/internal/datanode/l0_compactor_test.go @@ -698,7 +698,7 @@ func (s *LevelZeroCompactionTaskSuite) TestLoadBF() { cm.EXPECT().MultiRead(mock.Anything, mock.Anything).Return([][]byte{sw.GetBuffer()}, nil) s.task.cm = cm - bfs, err := s.task.loadBF(plan.SegmentBinlogs) + bfs, err := s.task.loadBF(context.Background(), plan.SegmentBinlogs) s.NoError(err) s.Len(bfs, 1) @@ -733,7 +733,7 @@ func (s *LevelZeroCompactionTaskSuite) TestFailed() { s.task.plan = plan - _, err := s.task.loadBF(plan.SegmentBinlogs) + _, err := s.task.loadBF(context.Background(), plan.SegmentBinlogs) s.Error(err) }) From 7a2127b09fa978c2ecbc2fcdc623588920061a51 Mon Sep 17 00:00:00 2001 From: "yihao.dai" Date: Mon, 3 Jun 2024 14:15:53 +0800 Subject: [PATCH 113/126] enhance: Avoid redundant meta operations of import (#33518) issue: https://github.com/milvus-io/milvus/issues/33513 --------- Signed-off-by: bigsheeper --- internal/datacoord/import_checker.go | 5 +++++ internal/datacoord/import_scheduler.go | 8 +++++--- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/internal/datacoord/import_checker.go b/internal/datacoord/import_checker.go index a1c86cc560ed5..fe75da1639141 100644 --- a/internal/datacoord/import_checker.go +++ b/internal/datacoord/import_checker.go @@ -341,6 +341,9 @@ func (c *importChecker) checkCollection(collectionID int64, jobs []ImportJob) { return } if !has { + jobs = lo.Filter(jobs, func(job ImportJob, _ int) bool { + return job.GetState() != internalpb.ImportJobState_Failed + }) for _, job := range jobs { err = c.imeta.UpdateJob(job.GetJobID(), UpdateJobState(internalpb.ImportJobState_Failed), UpdateJobReason(fmt.Sprintf("collection %d dropped", collectionID))) @@ -388,6 +391,8 @@ func (c *importChecker) checkGC(job ImportJob) { err := c.imeta.RemoveJob(job.GetJobID()) if err != nil { log.Warn("remove import job failed", zap.Int64("jobID", job.GetJobID()), zap.Error(err)) + return } + log.Info("import job removed", zap.Int64("jobID", job.GetJobID())) } } diff --git a/internal/datacoord/import_scheduler.go b/internal/datacoord/import_scheduler.go index f1cf30003cfd8..5f042de5db0a4 100644 --- a/internal/datacoord/import_scheduler.go +++ b/internal/datacoord/import_scheduler.go @@ -351,9 +351,11 @@ func (s *importScheduler) processFailed(task ImportTask) { return } } - err := s.imeta.UpdateTask(task.GetTaskID(), UpdateSegmentIDs(nil)) - if err != nil { - log.Warn("update import task segments failed", WrapTaskLog(task, zap.Error(err))...) + if len(segments) > 0 { + err := s.imeta.UpdateTask(task.GetTaskID(), UpdateSegmentIDs(nil)) + if err != nil { + log.Warn("update import task segments failed", WrapTaskLog(task, zap.Error(err))...) + } } } err := DropImportTask(task, s.cluster, s.imeta) From 180d754158a4add24225b4f5211702a9ad0241f6 Mon Sep 17 00:00:00 2001 From: yiwangdr <80064917+yiwangdr@users.noreply.github.com> Date: Sun, 2 Jun 2024 23:47:47 -0700 Subject: [PATCH 114/126] fix: speed up segment lookup via channel name in datacoord (#33530) issue: #33342 Signed-off-by: yiwangdr --- internal/datacoord/handler.go | 8 ++------ internal/datacoord/meta.go | 6 ++++++ internal/datacoord/segment_info.go | 11 +++++++++++ 3 files changed, 19 insertions(+), 6 deletions(-) diff --git a/internal/datacoord/handler.go b/internal/datacoord/handler.go index b02aa5d9abb13..03480782e28c4 100644 --- a/internal/datacoord/handler.go +++ b/internal/datacoord/handler.go @@ -57,9 +57,7 @@ func newServerHandler(s *Server) *ServerHandler { // GetDataVChanPositions gets vchannel latest positions with provided dml channel names for DataNode. func (h *ServerHandler) GetDataVChanPositions(channel RWChannel, partitionID UniqueID) *datapb.VchannelInfo { - segments := h.s.meta.SelectSegments(SegmentFilterFunc(func(s *SegmentInfo) bool { - return s.InsertChannel == channel.GetName() && !s.GetIsFake() - })) + segments := h.s.meta.GetRealSegmentsForChannel(channel.GetName()) log.Info("GetDataVChanPositions", zap.Int64("collectionID", channel.GetCollectionID()), zap.String("channel", channel.GetName()), @@ -105,9 +103,7 @@ func (h *ServerHandler) GetDataVChanPositions(channel RWChannel, partitionID Uni // the unflushed segments are actually the segments without index, even they are flushed. func (h *ServerHandler) GetQueryVChanPositions(channel RWChannel, partitionIDs ...UniqueID) *datapb.VchannelInfo { // cannot use GetSegmentsByChannel since dropped segments are needed here - segments := h.s.meta.SelectSegments(SegmentFilterFunc(func(s *SegmentInfo) bool { - return s.InsertChannel == channel.GetName() && !s.GetIsFake() - })) + segments := h.s.meta.GetRealSegmentsForChannel(channel.GetName()) segmentInfos := make(map[int64]*SegmentInfo) indexedSegments := FilterInIndexedSegments(h, h.s.meta, segments...) indexed := make(typeutil.UniqueSet) diff --git a/internal/datacoord/meta.go b/internal/datacoord/meta.go index f91494946af6a..850430539f711 100644 --- a/internal/datacoord/meta.go +++ b/internal/datacoord/meta.go @@ -1118,6 +1118,12 @@ func (m *meta) SelectSegments(filters ...SegmentFilter) []*SegmentInfo { return m.segments.GetSegmentsBySelector(filters...) } +func (m *meta) GetRealSegmentsForChannel(channel string) []*SegmentInfo { + m.RLock() + defer m.RUnlock() + return m.segments.GetRealSegmentsForChannel(channel) +} + // AddAllocation add allocation in segment func (m *meta) AddAllocation(segmentID UniqueID, allocation *Allocation) error { log.Debug("meta update: add allocation", diff --git a/internal/datacoord/segment_info.go b/internal/datacoord/segment_info.go index 13e6f4aad1c33..5f317fa70f58e 100644 --- a/internal/datacoord/segment_info.go +++ b/internal/datacoord/segment_info.go @@ -144,6 +144,17 @@ func (s *SegmentsInfo) GetSegmentsBySelector(filters ...SegmentFilter) []*Segmen return result } +func (s *SegmentsInfo) GetRealSegmentsForChannel(channel string) []*SegmentInfo { + channelSegments := s.secondaryIndexes.channel2Segments[channel] + var result []*SegmentInfo + for _, segment := range channelSegments { + if !segment.GetIsFake() { + result = append(result, segment) + } + } + return result +} + // GetCompactionTo returns the segment that the provided segment is compacted to. // Return (nil, false) if given segmentID can not found in the meta. // Return (nil, true) if given segmentID can be found not no compaction to. From 34c6a989ab99ec73e32c42aac62eb6864c76f387 Mon Sep 17 00:00:00 2001 From: wei liu Date: Mon, 3 Jun 2024 19:23:45 +0800 Subject: [PATCH 115/126] enhance: Avoid load bf in delegator when qn worker has no more memory (#33557) query coord send load request to delegator, delegator load bf first, then forward load request to qn worker. but when qn worker has no more memory, it will return load failed immediatelly. then delegator roll back the loaded bf. query coord wil retry the load request, and delegator will load and roll back bf again and again. this PR delay the loading bf step until load segment succeed in worker. Signed-off-by: Wei Liu --- .../querynodev2/delegator/delegator_data.go | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/internal/querynodev2/delegator/delegator_data.go b/internal/querynodev2/delegator/delegator_data.go index 02dce93ecb42a..6fc9d5dd6089d 100644 --- a/internal/querynodev2/delegator/delegator_data.go +++ b/internal/querynodev2/delegator/delegator_data.go @@ -424,16 +424,6 @@ func (sd *shardDelegator) LoadSegments(ctx context.Context, req *querypb.LoadSeg return err } - // load bloom filter only when candidate not exists - infos := lo.Filter(req.GetInfos(), func(info *querypb.SegmentLoadInfo, _ int) bool { - return !sd.pkOracle.Exists(pkoracle.NewCandidateKey(info.GetSegmentID(), info.GetPartitionID(), commonpb.SegmentState_Sealed), targetNodeID) - }) - candidates, err := sd.loader.LoadBloomFilterSet(ctx, req.GetCollectionID(), req.GetVersion(), infos...) - if err != nil { - log.Warn("failed to load bloom filter set for segment", zap.Error(err)) - return err - } - req.Base.TargetID = req.GetDstNodeID() log.Debug("worker loads segments...") @@ -490,6 +480,16 @@ func (sd *shardDelegator) LoadSegments(ctx context.Context, req *querypb.LoadSeg if req.GetInfos()[0].GetLevel() == datapb.SegmentLevel_L0 { sd.RefreshLevel0DeletionStats() } else { + // load bloom filter only when candidate not exists + infos := lo.Filter(req.GetInfos(), func(info *querypb.SegmentLoadInfo, _ int) bool { + return !sd.pkOracle.Exists(pkoracle.NewCandidateKey(info.GetSegmentID(), info.GetPartitionID(), commonpb.SegmentState_Sealed), targetNodeID) + }) + candidates, err := sd.loader.LoadBloomFilterSet(ctx, req.GetCollectionID(), req.GetVersion(), infos...) + if err != nil { + log.Warn("failed to load bloom filter set for segment", zap.Error(err)) + return err + } + log.Debug("load delete...") err = sd.loadStreamDelete(ctx, candidates, infos, req.GetDeltaPositions(), targetNodeID, worker, entries) if err != nil { From 2b285e5573dae58ec350c35a6c6f550a639c03cc Mon Sep 17 00:00:00 2001 From: congqixia Date: Mon, 3 Jun 2024 19:25:51 +0800 Subject: [PATCH 116/126] fix: Wrap init segcore tracing with golang timeout (#33494) See also #33483 Wrap `C.InitTrace` & `C.SetTrace` with timeout preventing otlp initializtion hangs forever when endpoint is not set correctly --------- Signed-off-by: Congqi Xia --- internal/core/src/common/Tracer.cpp | 4 +-- internal/util/initcore/init_core.go | 35 ++++++++++++++++++++++-- internal/util/initcore/init_core_test.go | 16 +++++++++++ pkg/util/paramtable/component_param.go | 20 ++++++++++---- 4 files changed, 66 insertions(+), 9 deletions(-) diff --git a/internal/core/src/common/Tracer.cpp b/internal/core/src/common/Tracer.cpp index 4711ef76ae3ef..d80dd301215e9 100644 --- a/internal/core/src/common/Tracer.cpp +++ b/internal/core/src/common/Tracer.cpp @@ -55,13 +55,13 @@ initTelemetry(const TraceConfig& cfg) { opts.transport_format = jaeger::TransportFormat::kThriftHttp; opts.endpoint = cfg.jaegerURL; exporter = jaeger::JaegerExporterFactory::Create(opts); - LOG_INFO("init jaeger exporter, endpoint:", opts.endpoint); + LOG_INFO("init jaeger exporter, endpoint: {}", opts.endpoint); } else if (cfg.exporter == "otlp") { auto opts = otlp::OtlpGrpcExporterOptions{}; opts.endpoint = cfg.otlpEndpoint; opts.use_ssl_credentials = cfg.oltpSecure; exporter = otlp::OtlpGrpcExporterFactory::Create(opts); - LOG_INFO("init otlp exporter, endpoint:", opts.endpoint); + LOG_INFO("init otlp exporter, endpoint: {}", opts.endpoint); } else { LOG_INFO("Empty Trace"); enable_trace = false; diff --git a/internal/util/initcore/init_core.go b/internal/util/initcore/init_core.go index f4bb9ec9634f9..bf6a2e903aa35 100644 --- a/internal/util/initcore/init_core.go +++ b/internal/util/initcore/init_core.go @@ -29,6 +29,7 @@ import "C" import ( "fmt" + "time" "unsafe" "github.com/cockroachdb/errors" @@ -61,7 +62,13 @@ func InitTraceConfig(params *paramtable.ComponentParam) { otlpEndpoint: endpoint, nodeID: nodeID, } - C.InitTrace(&config) + // oltp grpc may hangs forever, add timeout logic at go side + timeout := params.TraceCfg.InitTimeoutSeconds.GetAsDuration(time.Second) + callWithTimeout(func() { + C.InitTrace(&config) + }, func() { + panic("init segcore tracing timeout, See issue #33483") + }, timeout) } func ResetTraceConfig(params *paramtable.ComponentParam) { @@ -81,7 +88,31 @@ func ResetTraceConfig(params *paramtable.ComponentParam) { otlpEndpoint: endpoint, nodeID: nodeID, } - C.SetTrace(&config) + + // oltp grpc may hangs forever, add timeout logic at go side + timeout := params.TraceCfg.InitTimeoutSeconds.GetAsDuration(time.Second) + callWithTimeout(func() { + C.SetTrace(&config) + }, func() { + panic("set segcore tracing timeout, See issue #33483") + }, timeout) +} + +func callWithTimeout(fn func(), timeoutHandler func(), timeout time.Duration) { + if timeout > 0 { + ch := make(chan struct{}) + go func() { + defer close(ch) + fn() + }() + select { + case <-ch: + case <-time.After(timeout): + timeoutHandler() + } + } else { + fn() + } } func InitRemoteChunkManager(params *paramtable.ComponentParam) error { diff --git a/internal/util/initcore/init_core_test.go b/internal/util/initcore/init_core_test.go index fadc061042a8e..15d1b089a8989 100644 --- a/internal/util/initcore/init_core_test.go +++ b/internal/util/initcore/init_core_test.go @@ -19,6 +19,8 @@ package initcore import ( "testing" + "github.com/stretchr/testify/assert" + "github.com/milvus-io/milvus/pkg/util/paramtable" ) @@ -29,3 +31,17 @@ func TestTracer(t *testing.T) { paramtable.Get().Save(paramtable.Get().TraceCfg.Exporter.Key, "stdout") ResetTraceConfig(paramtable.Get()) } + +func TestOtlpHang(t *testing.T) { + paramtable.Init() + InitTraceConfig(paramtable.Get()) + + paramtable.Get().Save(paramtable.Get().TraceCfg.Exporter.Key, "otlp") + paramtable.Get().Save(paramtable.Get().TraceCfg.InitTimeoutSeconds.Key, "1") + defer paramtable.Get().Reset(paramtable.Get().TraceCfg.Exporter.Key) + defer paramtable.Get().Reset(paramtable.Get().TraceCfg.InitTimeoutSeconds.Key) + + assert.Panics(t, func() { + ResetTraceConfig(paramtable.Get()) + }) +} diff --git a/pkg/util/paramtable/component_param.go b/pkg/util/paramtable/component_param.go index 590c7df33d6b2..3ab8a846d994a 100644 --- a/pkg/util/paramtable/component_param.go +++ b/pkg/util/paramtable/component_param.go @@ -787,11 +787,12 @@ func (t *gpuConfig) init(base *BaseTable) { } type traceConfig struct { - Exporter ParamItem `refreshable:"false"` - SampleFraction ParamItem `refreshable:"false"` - JaegerURL ParamItem `refreshable:"false"` - OtlpEndpoint ParamItem `refreshable:"false"` - OtlpSecure ParamItem `refreshable:"false"` + Exporter ParamItem `refreshable:"false"` + SampleFraction ParamItem `refreshable:"false"` + JaegerURL ParamItem `refreshable:"false"` + OtlpEndpoint ParamItem `refreshable:"false"` + OtlpSecure ParamItem `refreshable:"false"` + InitTimeoutSeconds ParamItem `refreshable:"false"` } func (t *traceConfig) init(base *BaseTable) { @@ -839,6 +840,15 @@ Fractions >= 1 will always sample. Fractions < 0 are treated as zero.`, Export: true, } t.OtlpSecure.Init(base.mgr) + + t.InitTimeoutSeconds = ParamItem{ + Key: "trace.initTimeoutSeconds", + Version: "2.4.4", + DefaultValue: "10", + Export: true, + Doc: "segcore initialization timeout in seconds, preventing otlp grpc hangs forever", + } + t.InitTimeoutSeconds.Init(base.mgr) } type logConfig struct { From d0a0eac0a4a3acc8ef987581919ff3f4773f0904 Mon Sep 17 00:00:00 2001 From: Ted Xu Date: Mon, 3 Jun 2024 19:45:46 +0800 Subject: [PATCH 117/126] enhance: adding virtual resource allocator (#33508) See #33559 --------- Signed-off-by: Ted Xu --- pkg/util/vralloc/alloc.go | 171 +++++++++++++++++++++++++++++++++ pkg/util/vralloc/alloc_test.go | 76 +++++++++++++++ 2 files changed, 247 insertions(+) create mode 100644 pkg/util/vralloc/alloc.go create mode 100644 pkg/util/vralloc/alloc_test.go diff --git a/pkg/util/vralloc/alloc.go b/pkg/util/vralloc/alloc.go new file mode 100644 index 0000000000000..ab2cc422f3772 --- /dev/null +++ b/pkg/util/vralloc/alloc.go @@ -0,0 +1,171 @@ +// Licensed to the LF AI & Data foundation under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package vralloc + +import ( + "maps" + "sync" + + "github.com/shirou/gopsutil/v3/disk" + + "github.com/milvus-io/milvus/pkg/util/hardware" +) + +type Resource struct { + Memory int64 // Memory occupation in bytes + CPU int64 // CPU in cycles per second + Disk int64 // Disk occpuation in bytes +} + +// Add adds r2 to r +func (r *Resource) Add(r2 *Resource) *Resource { + r.Memory += r2.Memory + r.CPU += r2.CPU + r.Disk += r2.Disk + return r +} + +// Sub subtracts r2 from r +func (r *Resource) Sub(r2 *Resource) *Resource { + r.Memory -= r2.Memory + r.CPU -= r2.CPU + r.Disk -= r2.Disk + return r +} + +func (r *Resource) Diff(r2 *Resource) *Resource { + return &Resource{ + Memory: r.Memory - r2.Memory, + CPU: r.CPU - r2.CPU, + Disk: r.Disk - r2.Disk, + } +} + +// Le tests if the resource is less than or equal to the limit +func (r Resource) Le(limit *Resource) bool { + return r.Memory <= limit.Memory && r.CPU <= limit.CPU && r.Disk <= limit.Disk +} + +type Allocator interface { + // Allocate allocates the resource, returns true if the resource is allocated. If allocation failed, returns the short resource. + // The short resource is a positive value, e.g., if there is additional 8 bytes in disk needed, returns (0, 0, 8). + Allocate(id string, r *Resource) (allocated bool, short *Resource) + // Release releases the resource + Release(id string) + // Used returns the used resource + Used() Resource + // Inspect returns the allocated resources + Inspect() map[string]*Resource +} + +type FixedSizeAllocator struct { + limit *Resource + + lock sync.RWMutex + used Resource + allocs map[string]*Resource +} + +var _ Allocator = (*FixedSizeAllocator)(nil) + +func (a *FixedSizeAllocator) Allocate(id string, r *Resource) (allocated bool, short *Resource) { + a.lock.Lock() + defer a.lock.Unlock() + if a.used.Add(r).Le(a.limit) { + _, ok := a.allocs[id] + if ok { + // Re-allocate on identical id is not allowed + return false, nil + } + a.allocs[id] = r + return true, nil + } + short = a.used.Diff(a.limit) + a.used.Sub(r) + return false, short +} + +func (a *FixedSizeAllocator) Release(id string) { + a.lock.Lock() + defer a.lock.Unlock() + r, ok := a.allocs[id] + if !ok { + return + } + delete(a.allocs, id) + a.used.Sub(r) +} + +func (a *FixedSizeAllocator) Used() Resource { + a.lock.RLock() + defer a.lock.RUnlock() + return a.used +} + +func (a *FixedSizeAllocator) Inspect() map[string]*Resource { + a.lock.RLock() + defer a.lock.RUnlock() + return maps.Clone(a.allocs) +} + +func NewFixedSizeAllocator(limit *Resource) *FixedSizeAllocator { + return &FixedSizeAllocator{ + limit: limit, + allocs: make(map[string]*Resource), + } +} + +// PhysicalAwareFixedSizeAllocator allocates resources with additional consideration of physical resource usage. +type PhysicalAwareFixedSizeAllocator struct { + FixedSizeAllocator + + hwLimit *Resource + dir string // watching directory for disk usage, probably got by paramtable.Get().LocalStorageCfg.Path.GetValue() +} + +var _ Allocator = (*PhysicalAwareFixedSizeAllocator)(nil) + +func (a *PhysicalAwareFixedSizeAllocator) Allocate(id string, r *Resource) (allocated bool, short *Resource) { + memoryUsage := int64(hardware.GetUsedMemoryCount()) + diskUsage := int64(0) + if usageStats, err := disk.Usage(a.dir); err != nil { + diskUsage = int64(usageStats.Used) + } + + // Check if memory usage + future request estimation will exceed the memory limit + // Note that different allocators will not coordinate with each other, so the memory limit + // may be exceeded in concurrent allocations. + expected := &Resource{ + Memory: a.Used().Memory + r.Memory + memoryUsage, + Disk: a.Used().Disk + r.Disk + diskUsage, + } + if expected.Le(a.hwLimit) { + return a.FixedSizeAllocator.Allocate(id, r) + } + return false, expected.Diff(a.hwLimit) +} + +func NewPhysicalAwareFixedSizeAllocator(limit *Resource, hwMemoryLimit, hwDiskLimit int64, dir string) *PhysicalAwareFixedSizeAllocator { + return &PhysicalAwareFixedSizeAllocator{ + FixedSizeAllocator: FixedSizeAllocator{ + limit: limit, + allocs: make(map[string]*Resource), + }, + hwLimit: &Resource{Memory: hwMemoryLimit, Disk: hwDiskLimit}, + dir: dir, + } +} diff --git a/pkg/util/vralloc/alloc_test.go b/pkg/util/vralloc/alloc_test.go new file mode 100644 index 0000000000000..0b081a702ebd6 --- /dev/null +++ b/pkg/util/vralloc/alloc_test.go @@ -0,0 +1,76 @@ +// Licensed to the LF AI & Data foundation under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package vralloc + +import ( + "fmt" + "sync" + "testing" + + "github.com/stretchr/testify/assert" + + "github.com/milvus-io/milvus/pkg/util/hardware" +) + +func TestFixedSizeAllocator(t *testing.T) { + a := NewFixedSizeAllocator(&Resource{100, 100, 100}) + + allocated, _ := a.Allocate("a1", &Resource{10, 10, 10}) + assert.Equal(t, true, allocated) + allocated, _ = a.Allocate("a2", &Resource{90, 90, 90}) + assert.Equal(t, true, allocated) + allocated, short := a.Allocate("a3", &Resource{10, 0, 0}) + assert.Equal(t, false, allocated) + assert.Equal(t, &Resource{10, 0, 0}, short) + a.Release("a2") + allocated, _ = a.Allocate("a3", &Resource{10, 0, 0}) + assert.Equal(t, true, allocated) + m := a.Inspect() + assert.Equal(t, 2, len(m)) + allocated, _ = a.Allocate("a1", &Resource{10, 0, 0}) + assert.Equal(t, false, allocated) +} + +func TestFixedSizeAllocatorRace(t *testing.T) { + a := NewFixedSizeAllocator(&Resource{100, 100, 100}) + wg := new(sync.WaitGroup) + for i := 0; i < 100; i++ { + wg.Add(1) + go func(index int) { + defer wg.Done() + allocated, _ := a.Allocate(fmt.Sprintf("a%d", index), &Resource{1, 1, 1}) + assert.Equal(t, true, allocated) + }(i) + } + wg.Wait() + m := a.Inspect() + assert.Equal(t, 100, len(m)) +} + +func TestPhysicalAwareFixedSizeAllocator(t *testing.T) { + hwMemoryLimit := int64(float32(hardware.GetMemoryCount()) * 0.9) + hwDiskLimit := int64(1<<63 - 1) + a := NewPhysicalAwareFixedSizeAllocator(&Resource{100, 100, 100}, hwMemoryLimit, hwDiskLimit, "/tmp") + + allocated, _ := a.Allocate("a1", &Resource{10, 10, 10}) + assert.Equal(t, true, allocated) + allocated, _ = a.Allocate("a2", &Resource{90, 90, 90}) + assert.Equal(t, true, allocated) + allocated, short := a.Allocate("a3", &Resource{10, 0, 0}) + assert.Equal(t, false, allocated) + assert.Equal(t, &Resource{10, 0, 0}, short) +} From 44d7e03e56ec9532c9568585c63bf6841ba93005 Mon Sep 17 00:00:00 2001 From: SimFG Date: Mon, 3 Jun 2024 21:09:47 +0800 Subject: [PATCH 118/126] fix: reset the RootCoordQuotaStates metric before recording this metric (#33553) - issue: #33539 Signed-off-by: SimFG --- internal/rootcoord/quota_center.go | 1 + 1 file changed, 1 insertion(+) diff --git a/internal/rootcoord/quota_center.go b/internal/rootcoord/quota_center.go index 0e099438933cf..c16c5dbca9933 100644 --- a/internal/rootcoord/quota_center.go +++ b/internal/rootcoord/quota_center.go @@ -1487,6 +1487,7 @@ func (q *QuotaCenter) sendRatesToProxy() error { // recordMetrics records metrics of quota states. func (q *QuotaCenter) recordMetrics() { + metrics.RootCoordQuotaStates.Reset() dbIDs := make(map[int64]string, q.dbs.Len()) collectionIDs := make(map[int64]string, q.collections.Len()) q.dbs.Range(func(name string, id int64) bool { From 22a059d4dee6b02532de825522173057bbe1c8ea Mon Sep 17 00:00:00 2001 From: wei liu Date: Mon, 3 Jun 2024 21:59:46 +0800 Subject: [PATCH 119/126] enhance: update dependency for blobloom (#33565) Signed-off-by: Wei Liu --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index 0b27e21ebb4aa..3412f20c408a5 100644 --- a/go.mod +++ b/go.mod @@ -251,7 +251,7 @@ replace ( github.com/bketelsen/crypt => github.com/bketelsen/crypt v0.0.4 // Fix security alert for core-os/etcd github.com/expr-lang/expr => github.com/SimFG/expr v0.0.0-20231218130003-94d085776dc5 github.com/go-kit/kit => github.com/go-kit/kit v0.1.0 - github.com/greatroar/blobloom => github.com/weiliu1031/blobloom v0.0.0-20240530105622-1e0e104a7160 + github.com/greatroar/blobloom => github.com/milvus-io/blobloom v0.0.0-20240603110411-471ae49f3b93 // github.com/milvus-io/milvus-storage/go => ../milvus-storage/go github.com/milvus-io/milvus/pkg => ./pkg github.com/streamnative/pulsarctl => github.com/xiaofan-luan/pulsarctl v0.5.1 diff --git a/go.sum b/go.sum index b5f2c76e8e79e..f0f45360fef38 100644 --- a/go.sum +++ b/go.sum @@ -603,6 +603,8 @@ github.com/mgutz/ansi v0.0.0-20200706080929-d51e80ef957d h1:5PJl274Y63IEHC+7izoQ github.com/mgutz/ansi v0.0.0-20200706080929-d51e80ef957d/go.mod h1:01TrycV0kFyexm33Z7vhZRXopbI8J3TDReVlkTgMUxE= github.com/microcosm-cc/bluemonday v1.0.2/go.mod h1:iVP4YcDBq+n/5fb23BhYFvIMq/leAFZyRl6bYmGDlGc= github.com/miekg/dns v1.0.14/go.mod h1:W1PPwlIAgtquWBMBEV9nkV9Cazfe8ScdGz/Lj7v3Nrg= +github.com/milvus-io/blobloom v0.0.0-20240603110411-471ae49f3b93 h1:xnIeuG1nuTEHKbbv51OwNGO82U+d6ut08ppTmZVm+VY= +github.com/milvus-io/blobloom v0.0.0-20240603110411-471ae49f3b93/go.mod h1:mjMJ1hh1wjGVfr93QIHJ6FfDNVrA0IELv8OvMHJxHKs= github.com/milvus-io/gorocksdb v0.0.0-20220624081344-8c5f4212846b h1:TfeY0NxYxZzUfIfYe5qYDBzt4ZYRqzUjTR6CvUzjat8= github.com/milvus-io/gorocksdb v0.0.0-20220624081344-8c5f4212846b/go.mod h1:iwW+9cWfIzzDseEBCCeDSN5SD16Tidvy8cwQ7ZY8Qj4= github.com/milvus-io/milvus-proto/go-api/v2 v2.3.4-0.20240430035521-259ae1d10016 h1:8WV4maXLeGEyJCCYIc1DmZ18H+VFAjMrwXJg5iI2nX4= @@ -894,8 +896,6 @@ github.com/valyala/fasthttp v1.6.0/go.mod h1:FstJa9V+Pj9vQ7OJie2qMHdwemEDaDiSdBn github.com/valyala/fasttemplate v1.0.1/go.mod h1:UQGH1tvbgY+Nz5t2n7tXsz52dQxojPUpymEIMZ47gx8= github.com/valyala/fasttemplate v1.2.1/go.mod h1:KHLXt3tVN2HBp8eijSv/kGJopbvo7S+qRAEEKiv+SiQ= github.com/valyala/tcplisten v0.0.0-20161114210144-ceec8f93295a/go.mod h1:v3UYOV9WzVtRmSR+PDvWpU/qWl4Wa5LApYYX4ZtKbio= -github.com/weiliu1031/blobloom v0.0.0-20240530105622-1e0e104a7160 h1:x7cclCOEtr9zSzSZhwB7mhz/tFNHsILh6XewGTmJKk0= -github.com/weiliu1031/blobloom v0.0.0-20240530105622-1e0e104a7160/go.mod h1:mjMJ1hh1wjGVfr93QIHJ6FfDNVrA0IELv8OvMHJxHKs= github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM= github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg= github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f/go.mod h1:N2zxlSyiKSe5eX1tZViRH5QA0qijqEDrYZiPEAiq3wU= From d25c7554805de5c82251c0b50cf1b90791c78d0e Mon Sep 17 00:00:00 2001 From: sre-ci-robot <56469371+sre-ci-robot@users.noreply.github.com> Date: Tue, 4 Jun 2024 01:55:46 +0800 Subject: [PATCH 120/126] [automated] Update Knowhere Commit (#33573) Update Knowhere Commit Signed-off-by: sre-ci-robot sre-ci-robot@users.noreply.github.com Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> --- internal/core/thirdparty/knowhere/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/core/thirdparty/knowhere/CMakeLists.txt b/internal/core/thirdparty/knowhere/CMakeLists.txt index 0159549cad3e4..8af49408f8131 100644 --- a/internal/core/thirdparty/knowhere/CMakeLists.txt +++ b/internal/core/thirdparty/knowhere/CMakeLists.txt @@ -12,7 +12,7 @@ #------------------------------------------------------------------------------- # Update KNOWHERE_VERSION for the first occurrence -set( KNOWHERE_VERSION 7499791 ) +set( KNOWHERE_VERSION 74997917 ) set( GIT_REPOSITORY "https://github.com/zilliztech/knowhere.git") message(STATUS "Knowhere repo: ${GIT_REPOSITORY}") message(STATUS "Knowhere version: ${KNOWHERE_VERSION}") From 7f4698f4a7ac8b3c267be9ab4e003e7fb9d63911 Mon Sep 17 00:00:00 2001 From: congqixia Date: Tue, 4 Jun 2024 10:07:48 +0800 Subject: [PATCH 121/126] enhance: Use map PK to timestamp in buffer insert (#33566) Related to #27675 Store pk to minimal timestamp in `inData` instead of bloom filter to check whether some delete entry hit current insert batch Signed-off-by: Congqi Xia --- internal/datanode/writebuffer/write_buffer.go | 90 ++++++++++--------- 1 file changed, 50 insertions(+), 40 deletions(-) diff --git a/internal/datanode/writebuffer/write_buffer.go b/internal/datanode/writebuffer/write_buffer.go index 412ff5d95c9d8..cdc1abf4a2deb 100644 --- a/internal/datanode/writebuffer/write_buffer.go +++ b/internal/datanode/writebuffer/write_buffer.go @@ -19,7 +19,6 @@ import ( "github.com/milvus-io/milvus/internal/proto/datapb" "github.com/milvus-io/milvus/internal/querycoordv2/params" "github.com/milvus-io/milvus/internal/storage" - "github.com/milvus-io/milvus/internal/util/bloomfilter" "github.com/milvus-io/milvus/pkg/log" "github.com/milvus-io/milvus/pkg/metrics" "github.com/milvus-io/milvus/pkg/mq/msgstream" @@ -83,6 +82,8 @@ type writeBufferBase struct { metaWriter syncmgr.MetaWriter collSchema *schemapb.CollectionSchema + helper *typeutil.SchemaHelper + pkField *schemapb.FieldSchema estSizePerRecord int metaCache metacache.MetaCache syncMgr syncmgr.SyncManager @@ -130,11 +131,21 @@ func newWriteBufferBase(channel string, metacache metacache.MetaCache, storageV2 if err != nil { return nil, err } + helper, err := typeutil.CreateSchemaHelper(schema) + if err != nil { + return nil, err + } + pkField, err := helper.GetPrimaryKeyField() + if err != nil { + return nil, err + } wb := &writeBufferBase{ channelName: channel, collectionID: metacache.Collection(), collSchema: schema, + helper: helper, + pkField: pkField, estSizePerRecord: estSize, syncMgr: syncMgr, metaWriter: option.metaWriter, @@ -378,49 +389,21 @@ type inData struct { tsField []*storage.Int64FieldData rowNum int64 - batchBF *storage.PkStatistics -} - -func (id *inData) generatePkStats() { - id.batchBF = &storage.PkStatistics{ - PkFilter: bloomfilter.NewBloomFilterWithType( - uint(id.rowNum), - paramtable.Get().CommonCfg.MaxBloomFalsePositive.GetAsFloat(), - paramtable.Get().CommonCfg.BloomFilterType.GetValue()), - } - - for _, ids := range id.pkField { - id.batchBF.UpdatePKRange(ids) - } + intPKTs map[int64]int64 + strPKTs map[string]int64 } func (id *inData) pkExists(pk storage.PrimaryKey, ts uint64) bool { - if !id.batchBF.PkExist(pk) { - return false + var ok bool + var minTs int64 + switch pk.Type() { + case schemapb.DataType_Int64: + minTs, ok = id.intPKTs[pk.GetValue().(int64)] + case schemapb.DataType_VarChar: + minTs, ok = id.strPKTs[pk.GetValue().(string)] } - for batchIdx, timestamps := range id.tsField { - ids := id.pkField[batchIdx] - var primaryKey storage.PrimaryKey - switch pk.Type() { - case schemapb.DataType_Int64: - primaryKey = storage.NewInt64PrimaryKey(0) - case schemapb.DataType_VarChar: - primaryKey = storage.NewVarCharPrimaryKey("") - } - for idx := 0; idx < timestamps.RowNum(); idx++ { - timestamp := timestamps.GetRow(idx).(int64) - if int64(ts) <= timestamp { - continue - } - primaryKey.SetValue(ids.GetRow(idx)) - - if pk.EQ(primaryKey) { - return true - } - } - } - return false + return ok && ts > uint64(minTs) } // prepareInsert transfers InsertMsg into organized InsertData grouped by segmentID @@ -437,6 +420,13 @@ func (wb *writeBufferBase) prepareInsert(insertMsgs []*msgstream.InsertMsg) ([]* data: make([]*storage.InsertData, 0, len(msgs)), pkField: make([]storage.FieldData, 0, len(msgs)), } + switch wb.pkField.GetDataType() { + case schemapb.DataType_Int64: + inData.intPKTs = make(map[int64]int64) + case schemapb.DataType_VarChar: + inData.strPKTs = make(map[string]int64) + } + for _, msg := range msgs { data, err := storage.InsertMsgToInsertData(msg, wb.collSchema) if err != nil { @@ -460,12 +450,32 @@ func (wb *writeBufferBase) prepareInsert(insertMsgs []*msgstream.InsertMsg) ([]* return nil, merr.WrapErrServiceInternal("timestamp column row num not match") } + timestamps := tsFieldData.GetRows().([]int64) + + switch wb.pkField.GetDataType() { + case schemapb.DataType_Int64: + pks := pkFieldData.GetRows().([]int64) + for idx, pk := range pks { + ts, ok := inData.intPKTs[pk] + if !ok || timestamps[idx] < ts { + inData.intPKTs[pk] = timestamps[idx] + } + } + case schemapb.DataType_VarChar: + pks := pkFieldData.GetRows().([]string) + for idx, pk := range pks { + ts, ok := inData.strPKTs[pk] + if !ok || timestamps[idx] < ts { + inData.strPKTs[pk] = timestamps[idx] + } + } + } + inData.data = append(inData.data, data) inData.pkField = append(inData.pkField, pkFieldData) inData.tsField = append(inData.tsField, tsFieldData) inData.rowNum += int64(data.GetRowNum()) } - inData.generatePkStats() result = append(result, inData) } From ac5e098e13485e6c4e8c6a9ba356725d46254fe3 Mon Sep 17 00:00:00 2001 From: ThreadDao Date: Tue, 4 Jun 2024 10:57:47 +0800 Subject: [PATCH 122/126] feat: Add e2e test cases for GoSDK (#33378) - Add e2e test cases for Go `milvusclient` - Fix client SparseEmbedding to vector issue: #33419 Signed-off-by: ThreadDao --- client/entity/sparse.go | 1 + tests/go_client/base/milvus_client.go | 240 ++++ tests/go_client/common/consts.go | 67 + tests/go_client/common/response_checker.go | 44 + tests/go_client/common/utils.go | 122 ++ tests/go_client/go.mod | 129 ++ tests/go_client/go.sum | 1113 +++++++++++++++++ tests/go_client/testcases/client_test.go | 92 ++ tests/go_client/testcases/collection_test.go | 950 ++++++++++++++ .../testcases/helper/collection_helper.go | 11 + .../go_client/testcases/helper/data_helper.go | 324 +++++ .../testcases/helper/field_helper.go | 299 +++++ tests/go_client/testcases/helper/helper.go | 192 +++ .../testcases/helper/index_helper.go | 35 + .../go_client/testcases/helper/read_helper.go | 55 + .../testcases/helper/schema_helper.go | 68 + tests/go_client/testcases/main_test.go | 74 ++ tests/go_client/testcases/search_test.go | 42 + 18 files changed, 3858 insertions(+) create mode 100644 tests/go_client/base/milvus_client.go create mode 100644 tests/go_client/common/consts.go create mode 100644 tests/go_client/common/response_checker.go create mode 100644 tests/go_client/common/utils.go create mode 100644 tests/go_client/go.mod create mode 100644 tests/go_client/go.sum create mode 100644 tests/go_client/testcases/client_test.go create mode 100644 tests/go_client/testcases/collection_test.go create mode 100644 tests/go_client/testcases/helper/collection_helper.go create mode 100644 tests/go_client/testcases/helper/data_helper.go create mode 100644 tests/go_client/testcases/helper/field_helper.go create mode 100644 tests/go_client/testcases/helper/helper.go create mode 100644 tests/go_client/testcases/helper/index_helper.go create mode 100644 tests/go_client/testcases/helper/read_helper.go create mode 100644 tests/go_client/testcases/helper/schema_helper.go create mode 100644 tests/go_client/testcases/main_test.go create mode 100644 tests/go_client/testcases/search_test.go diff --git a/client/entity/sparse.go b/client/entity/sparse.go index 56ca5f4dca265..87edf58d152b2 100644 --- a/client/entity/sparse.go +++ b/client/entity/sparse.go @@ -29,6 +29,7 @@ type SparseEmbedding interface { Len() int // the actual items in this vector Get(idx int) (pos uint32, value float32, ok bool) Serialize() []byte + FieldType() FieldType } var ( diff --git a/tests/go_client/base/milvus_client.go b/tests/go_client/base/milvus_client.go new file mode 100644 index 0000000000000..8a43fc9f94f90 --- /dev/null +++ b/tests/go_client/base/milvus_client.go @@ -0,0 +1,240 @@ +package base + +import ( + "context" + "encoding/json" + "strings" + "time" + + "github.com/milvus-io/milvus/client/v2/entity" + "github.com/milvus-io/milvus/pkg/log" + + "go.uber.org/zap" + + "google.golang.org/grpc" + + clientv2 "github.com/milvus-io/milvus/client/v2" + "github.com/milvus-io/milvus/client/v2/index" +) + +func LoggingUnaryInterceptor() grpc.UnaryClientInterceptor { + return func(ctx context.Context, method string, req, reply interface{}, cc *grpc.ClientConn, invoker grpc.UnaryInvoker, opts ...grpc.CallOption) error { + maxLogLength := 300 + _method := strings.Split(method, "/") + _methodShotName := _method[len(_method)-1] + // Marshal req to json str + reqJSON, err := json.Marshal(req) + if err != nil { + log.Error("Failed to marshal request", zap.Error(err)) + reqJSON = []byte("could not marshal request") + } + reqStr := string(reqJSON) + if len(reqStr) > maxLogLength { + reqStr = reqStr[:maxLogLength] + "..." + } + + // log before + log.Info("Request", zap.String("method", _methodShotName), zap.Any("reqs", reqStr)) + + // invoker + start := time.Now() + errResp := invoker(ctx, method, req, reply, cc, opts...) + cost := time.Since(start) + + // Marshal reply to json str + respJSON, err := json.Marshal(reply) + if err != nil { + log.Error("Failed to marshal response", zap.Error(err)) + respJSON = []byte("could not marshal response") + } + respStr := string(respJSON) + if len(respStr) > maxLogLength { + respStr = respStr[:maxLogLength] + "..." + } + + // log after + log.Info("Response", zap.String("method", _methodShotName), zap.Any("resp", respStr)) + log.Debug("Cost", zap.String("method", _methodShotName), zap.Duration("cost", cost)) + return errResp + } +} + +type MilvusClient struct { + mClient *clientv2.Client +} + +func NewMilvusClient(ctx context.Context, cfg *clientv2.ClientConfig) (*MilvusClient, error) { + cfg.DialOptions = append(cfg.DialOptions, grpc.WithUnaryInterceptor(LoggingUnaryInterceptor())) + mClient, err := clientv2.New(ctx, cfg) + return &MilvusClient{ + mClient, + }, err +} + +func (mc *MilvusClient) Close(ctx context.Context) error { + err := mc.mClient.Close(ctx) + return err +} + +// -- database -- + +// UsingDatabase list all database in milvus cluster. +func (mc *MilvusClient) UsingDatabase(ctx context.Context, option clientv2.UsingDatabaseOption) error { + err := mc.mClient.UsingDatabase(ctx, option) + return err +} + +// ListDatabases list all database in milvus cluster. +func (mc *MilvusClient) ListDatabases(ctx context.Context, option clientv2.ListDatabaseOption, callOptions ...grpc.CallOption) ([]string, error) { + databaseNames, err := mc.mClient.ListDatabase(ctx, option, callOptions...) + return databaseNames, err +} + +// CreateDatabase create database with the given name. +func (mc *MilvusClient) CreateDatabase(ctx context.Context, option clientv2.CreateDatabaseOption, callOptions ...grpc.CallOption) error { + err := mc.mClient.CreateDatabase(ctx, option, callOptions...) + return err +} + +// DropDatabase drop database with the given db name. +func (mc *MilvusClient) DropDatabase(ctx context.Context, option clientv2.DropDatabaseOption, callOptions ...grpc.CallOption) error { + err := mc.mClient.DropDatabase(ctx, option, callOptions...) + return err +} + +// -- collection -- + +// CreateCollection Create Collection +func (mc *MilvusClient) CreateCollection(ctx context.Context, option clientv2.CreateCollectionOption, callOptions ...grpc.CallOption) error { + err := mc.mClient.CreateCollection(ctx, option, callOptions...) + return err +} + +// ListCollections Create Collection +func (mc *MilvusClient) ListCollections(ctx context.Context, option clientv2.ListCollectionOption, callOptions ...grpc.CallOption) ([]string, error) { + collectionNames, err := mc.mClient.ListCollections(ctx, option, callOptions...) + return collectionNames, err +} + +//DescribeCollection Describe collection +func (mc *MilvusClient) DescribeCollection(ctx context.Context, option clientv2.DescribeCollectionOption, callOptions ...grpc.CallOption) (*entity.Collection, error) { + collection, err := mc.mClient.DescribeCollection(ctx, option, callOptions...) + return collection, err +} + +// HasCollection Has collection +func (mc *MilvusClient) HasCollection(ctx context.Context, option clientv2.HasCollectionOption, callOptions ...grpc.CallOption) (bool, error) { + has, err := mc.mClient.HasCollection(ctx, option, callOptions...) + return has, err +} + +// DropCollection Drop Collection +func (mc *MilvusClient) DropCollection(ctx context.Context, option clientv2.DropCollectionOption, callOptions ...grpc.CallOption) error { + err := mc.mClient.DropCollection(ctx, option, callOptions...) + return err +} + +// -- partition -- + +// CreatePartition Create Partition +func (mc *MilvusClient) CreatePartition(ctx context.Context, option clientv2.CreatePartitionOption, callOptions ...grpc.CallOption) error { + err := mc.mClient.CreatePartition(ctx, option, callOptions...) + return err +} + +// DropPartition Drop Partition +func (mc *MilvusClient) DropPartition(ctx context.Context, option clientv2.DropPartitionOption, callOptions ...grpc.CallOption) error { + err := mc.mClient.DropPartition(ctx, option, callOptions...) + return err +} + +// HasPartition Has Partition +func (mc *MilvusClient) HasPartition(ctx context.Context, option clientv2.HasPartitionOption, callOptions ...grpc.CallOption) (bool, error) { + has, err := mc.mClient.HasPartition(ctx, option, callOptions...) + return has, err +} + +// ListPartitions List Partitions +func (mc *MilvusClient) ListPartitions(ctx context.Context, option clientv2.ListPartitionsOption, callOptions ...grpc.CallOption) ([]string, error) { + partitionNames, err := mc.mClient.ListPartitions(ctx, option, callOptions...) + return partitionNames, err +} + +// LoadPartitions Load Partitions into memory +func (mc *MilvusClient) LoadPartitions(ctx context.Context, option clientv2.LoadPartitionsOption, callOptions ...grpc.CallOption) (clientv2.LoadTask, error) { + loadTask, err := mc.mClient.LoadPartitions(ctx, option, callOptions...) + return loadTask, err +} + +// -- index -- + +// CreateIndex Create Index +func (mc *MilvusClient) CreateIndex(ctx context.Context, option clientv2.CreateIndexOption, callOptions ...grpc.CallOption) (*clientv2.CreateIndexTask, error) { + createIndexTask, err := mc.mClient.CreateIndex(ctx, option, callOptions...) + return createIndexTask, err +} + +// ListIndexes List Indexes +func (mc *MilvusClient) ListIndexes(ctx context.Context, option clientv2.ListIndexOption, callOptions ...grpc.CallOption) ([]string, error) { + indexes, err := mc.mClient.ListIndexes(ctx, option, callOptions...) + return indexes, err +} + +// DescribeIndex Describe Index +func (mc *MilvusClient) DescribeIndex(ctx context.Context, option clientv2.DescribeIndexOption, callOptions ...grpc.CallOption) (index.Index, error) { + index, err := mc.mClient.DescribeIndex(ctx, option, callOptions...) + return index, err +} + +// DropIndex Drop Index +func (mc *MilvusClient) DropIndex(ctx context.Context, option clientv2.DropIndexOption, callOptions ...grpc.CallOption) error { + err := mc.mClient.DropIndex(ctx, option, callOptions...) + return err +} + +// -- write -- + +// Insert insert data +func (mc *MilvusClient) Insert(ctx context.Context, option clientv2.InsertOption, callOptions ...grpc.CallOption) (clientv2.InsertResult, error) { + insertRes, err := mc.mClient.Insert(ctx, option, callOptions...) + log.Info("Insert", zap.Any("result", insertRes)) + return insertRes, err +} + +// Flush flush data +func (mc *MilvusClient) Flush(ctx context.Context, option clientv2.FlushOption, callOptions ...grpc.CallOption) (*clientv2.FlushTask, error) { + flushTask, err := mc.mClient.Flush(ctx, option, callOptions...) + return flushTask, err +} + +// Delete deletes data +func (mc *MilvusClient) Delete(ctx context.Context, option clientv2.DeleteOption, callOptions ...grpc.CallOption) (clientv2.DeleteResult, error) { + deleteRes, err := mc.mClient.Delete(ctx, option, callOptions...) + return deleteRes, err +} + +// Upsert upsert data +func (mc *MilvusClient) Upsert(ctx context.Context, option clientv2.UpsertOption, callOptions ...grpc.CallOption) (clientv2.UpsertResult, error) { + upsertRes, err := mc.mClient.Upsert(ctx, option, callOptions...) + return upsertRes, err +} + +// -- read -- + +// LoadCollection Load Collection +func (mc *MilvusClient) LoadCollection(ctx context.Context, option clientv2.LoadCollectionOption, callOptions ...grpc.CallOption) (clientv2.LoadTask, error) { + loadTask, err := mc.mClient.LoadCollection(ctx, option, callOptions...) + return loadTask, err +} + +// Search search from collection +func (mc *MilvusClient) Search(ctx context.Context, option clientv2.SearchOption, callOptions ...grpc.CallOption) ([]clientv2.ResultSet, error) { + resultSets, err := mc.mClient.Search(ctx, option, callOptions...) + return resultSets, err +} + +// Query query from collection +func (mc *MilvusClient) Query(ctx context.Context, option clientv2.QueryOption, callOptions ...grpc.CallOption) (clientv2.ResultSet, error) { + resultSet, err := mc.mClient.Query(ctx, option, callOptions...) + return resultSet, err +} diff --git a/tests/go_client/common/consts.go b/tests/go_client/common/consts.go new file mode 100644 index 0000000000000..46e964f1c8ea5 --- /dev/null +++ b/tests/go_client/common/consts.go @@ -0,0 +1,67 @@ +package common + +// cost default field name +const ( + DefaultInt8FieldName = "int8" + DefaultInt16FieldName = "int16" + DefaultInt32FieldName = "int32" + DefaultInt64FieldName = "int64" + DefaultBoolFieldName = "bool" + DefaultFloatFieldName = "float" + DefaultDoubleFieldName = "double" + DefaultVarcharFieldName = "varchar" + DefaultJSONFieldName = "json" + DefaultArrayFieldName = "array" + DefaultFloatVecFieldName = "floatVec" + DefaultBinaryVecFieldName = "binaryVec" + DefaultFloat16VecFieldName = "fp16Vec" + DefaultBFloat16VecFieldName = "bf16Vec" + DefaultSparseVecFieldName = "sparseVec" + DefaultDynamicNumberField = "dynamicNumber" + DefaultDynamicStringField = "dynamicString" + DefaultDynamicBoolField = "dynamicBool" + DefaultDynamicListField = "dynamicList" + DefaultBoolArrayField = "boolArray" + DefaultInt8ArrayField = "int8Array" + DefaultInt16ArrayField = "int16Array" + DefaultInt32ArrayField = "int32Array" + DefaultInt64ArrayField = "int64Array" + DefaultFloatArrayField = "floatArray" + DefaultDoubleArrayField = "doubleArray" + DefaultVarcharArrayField = "varcharArray" +) + +// cost for test cases +const ( + RowCount = "row_count" + DefaultTimeout = 120 + DefaultDim = 128 + DefaultShards = int32(2) + DefaultNb = 3000 + DefaultNq = 5 + DefaultLimit = 10 + TestCapacity = 100 // default array field capacity + TestMaxLen = 100 // default varchar field max length +) + +// const default value from milvus config +const ( + MaxPartitionNum = 4096 + DefaultDynamicFieldName = "$meta" + QueryCountFieldName = "count(*)" + DefaultPartition = "_default" + DefaultIndexName = "_default_idx_102" + DefaultIndexNameBinary = "_default_idx_100" + DefaultRgName = "__default_resource_group" + DefaultDb = "default" + MaxDim = 32768 + MaxLength = int64(65535) + MaxCollectionNameLen = 255 + DefaultRgCapacity = 1000000 + RetentionDuration = 40 // common.retentionDuration + MaxCapacity = 4096 // max array capacity + DefaultPartitionNum = 16 // default num_partitions + MaxTopK = 16384 + MaxVectorFieldNum = 4 + MaxShardNum = 16 +) diff --git a/tests/go_client/common/response_checker.go b/tests/go_client/common/response_checker.go new file mode 100644 index 0000000000000..283dd76aad4ab --- /dev/null +++ b/tests/go_client/common/response_checker.go @@ -0,0 +1,44 @@ +package common + +import ( + "strings" + "testing" + + "github.com/milvus-io/milvus/pkg/log" + "github.com/stretchr/testify/require" + + clientv2 "github.com/milvus-io/milvus/client/v2" +) + +func CheckErr(t *testing.T, actualErr error, expErrNil bool, expErrorMsg ...string) { + if expErrNil { + require.NoError(t, actualErr) + } else { + require.Error(t, actualErr) + switch len(expErrorMsg) { + case 0: + log.Fatal("expect error message should not be empty") + case 1: + require.ErrorContains(t, actualErr, expErrorMsg[0]) + default: + contains := false + for i := 0; i < len(expErrorMsg); i++ { + if strings.Contains(actualErr.Error(), expErrorMsg[i]) { + contains = true + } + } + if !contains { + t.FailNow() + } + } + } +} + +// CheckSearchResult check search result, check nq, topk, ids, score +func CheckSearchResult(t *testing.T, actualSearchResults []clientv2.ResultSet, expNq int, expTopK int) { + require.Equal(t, len(actualSearchResults), expNq) + require.Len(t, actualSearchResults, expNq) + for _, actualSearchResult := range actualSearchResults { + require.Equal(t, actualSearchResult.ResultCount, expTopK) + } +} diff --git a/tests/go_client/common/utils.go b/tests/go_client/common/utils.go new file mode 100644 index 0000000000000..c6f8a9e44c607 --- /dev/null +++ b/tests/go_client/common/utils.go @@ -0,0 +1,122 @@ +package common + +import ( + "encoding/binary" + "fmt" + "log" + "math" + "math/rand" + "strings" + "time" + + "github.com/milvus-io/milvus/client/v2/entity" + "github.com/x448/float16" +) + +var letterRunes = []rune("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ") +var r *rand.Rand + +func init() { + r = rand.New(rand.NewSource(time.Now().UnixNano())) +} + +func GenRandomString(prefix string, n int) string { + b := make([]rune, n) + for i := range b { + b[i] = letterRunes[r.Intn(len(letterRunes))] + } + str := fmt.Sprintf("%s_%s", prefix, string(b)) + return str +} + +// GenLongString gen invalid long string +func GenLongString(n int) string { + var builder strings.Builder + longString := "a" + for i := 0; i < n; i++ { + builder.WriteString(longString) + } + return builder.String() +} + +func GenValidNames() []string { + return []string{ + "a", + "_", + "_name", + "_123", + "name_", + "_coll_123_", + } +} + +func GenInvalidNames() []string { + invalidNames := []string{ + "", + " ", + "12-s", + "(mn)", + "中文", + "%$#", + "1", + "[10]", + "a b", + DefaultDynamicFieldName, + GenLongString(MaxCollectionNameLen + 1), + } + return invalidNames +} + +func GenFloatVector(dim int) []float32 { + vector := make([]float32, 0, dim) + for j := 0; j < int(dim); j++ { + vector = append(vector, rand.Float32()) + } + return vector +} + +func GenFloat16Vector(dim int) []byte { + ret := make([]byte, dim*2) + for i := 0; i < int(dim); i++ { + v := float16.Fromfloat32(rand.Float32()).Bits() + binary.LittleEndian.PutUint16(ret[i*2:], v) + } + return ret +} + +func GenBFloat16Vector(dim int) []byte { + ret16 := make([]uint16, 0, dim) + for i := 0; i < int(dim); i++ { + f := rand.Float32() + bits := math.Float32bits(f) + bits >>= 16 + bits &= 0x7FFF + ret16 = append(ret16, uint16(bits)) + } + ret := make([]byte, len(ret16)*2) + for i, value := range ret16 { + binary.LittleEndian.PutUint16(ret[i*2:], value) + } + return ret +} + +func GenBinaryVector(dim int) []byte { + vector := make([]byte, dim/8) + rand.Read(vector) + return vector +} + +func GenSparseVector(maxLen int) entity.SparseEmbedding { + length := 1 + rand.Intn(1+maxLen) + positions := make([]uint32, length) + values := make([]float32, length) + for i := 0; i < length; i++ { + positions[i] = uint32(2*i + 1) + values[i] = rand.Float32() + } + vector, err := entity.NewSliceSparseEmbedding(positions, values) + if err != nil { + log.Fatalf("Generate vector failed %s", err) + } + return vector +} diff --git a/tests/go_client/go.mod b/tests/go_client/go.mod new file mode 100644 index 0000000000000..665fdcc11fe11 --- /dev/null +++ b/tests/go_client/go.mod @@ -0,0 +1,129 @@ +module github.com/milvus-io/milvus/tests/go_client + +go 1.20 + +require ( + github.com/milvus-io/milvus/client/v2 v2.0.0-20240521081339-017fd7bc25de + github.com/milvus-io/milvus/pkg v0.0.2-0.20240317152703-17b4938985f3 + github.com/stretchr/testify v1.9.0 + github.com/x448/float16 v0.8.4 + go.uber.org/zap v1.27.0 + google.golang.org/grpc v1.64.0 +) + +replace github.com/milvus-io/milvus/client/v2 v2.0.0-20240521081339-017fd7bc25de => ../../../milvus/client + +require ( + github.com/beorn7/perks v1.0.1 // indirect + github.com/blang/semver/v4 v4.0.0 // indirect + github.com/cenkalti/backoff/v4 v4.2.0 // indirect + github.com/cespare/xxhash/v2 v2.2.0 // indirect + github.com/cilium/ebpf v0.11.0 // indirect + github.com/cockroachdb/errors v1.9.1 // indirect + github.com/cockroachdb/logtags v0.0.0-20211118104740-dabe8e521a4f // indirect + github.com/cockroachdb/redact v1.1.3 // indirect + github.com/containerd/cgroups/v3 v3.0.3 // indirect + github.com/coreos/go-semver v0.3.0 // indirect + github.com/coreos/go-systemd/v22 v22.3.2 // indirect + github.com/davecgh/go-spew v1.1.1 // indirect + github.com/docker/go-units v0.4.0 // indirect + github.com/dustin/go-humanize v1.0.0 // indirect + github.com/form3tech-oss/jwt-go v3.2.3+incompatible // indirect + github.com/fsnotify/fsnotify v1.4.9 // indirect + github.com/getsentry/sentry-go v0.12.0 // indirect + github.com/go-logr/logr v1.3.0 // indirect + github.com/go-logr/stdr v1.2.2 // indirect + github.com/go-ole/go-ole v1.2.6 // indirect + github.com/godbus/dbus/v5 v5.0.4 // indirect + github.com/gogo/googleapis v1.4.1 // indirect + github.com/gogo/protobuf v1.3.2 // indirect + github.com/gogo/status v1.1.0 // indirect + github.com/golang/protobuf v1.5.4 // indirect + github.com/google/btree v1.1.2 // indirect + github.com/gorilla/websocket v1.4.2 // indirect + github.com/grpc-ecosystem/go-grpc-middleware v1.3.0 // indirect + github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0 // indirect + github.com/grpc-ecosystem/grpc-gateway v1.16.0 // indirect + github.com/grpc-ecosystem/grpc-gateway/v2 v2.7.0 // indirect + github.com/hashicorp/hcl v1.0.0 // indirect + github.com/jonboulle/clockwork v0.2.2 // indirect + github.com/json-iterator/go v1.1.12 // indirect + github.com/kr/pretty v0.3.1 // indirect + github.com/kr/text v0.2.0 // indirect + github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0 // indirect + github.com/magiconair/properties v1.8.5 // indirect + github.com/matttproud/golang_protobuf_extensions v1.0.4 // indirect + github.com/milvus-io/milvus-proto/go-api/v2 v2.3.4-0.20240430035521-259ae1d10016 // indirect + github.com/mitchellh/mapstructure v1.4.1 // indirect + github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect + github.com/modern-go/reflect2 v1.0.2 // indirect + github.com/opencontainers/runtime-spec v1.0.2 // indirect + github.com/panjf2000/ants/v2 v2.7.2 // indirect + github.com/pelletier/go-toml v1.9.3 // indirect + github.com/pkg/errors v0.9.1 // indirect + github.com/pmezard/go-difflib v1.0.0 // indirect + github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c // indirect + github.com/prometheus/client_golang v1.14.0 // indirect + github.com/prometheus/client_model v0.3.0 // indirect + github.com/prometheus/common v0.42.0 // indirect + github.com/prometheus/procfs v0.9.0 // indirect + github.com/rogpeppe/go-internal v1.10.0 // indirect + github.com/samber/lo v1.27.0 // indirect + github.com/shirou/gopsutil/v3 v3.22.9 // indirect + github.com/sirupsen/logrus v1.9.0 // indirect + github.com/soheilhy/cmux v0.1.5 // indirect + github.com/spaolacci/murmur3 v1.1.0 // indirect + github.com/spf13/afero v1.6.0 // indirect + github.com/spf13/cast v1.3.1 // indirect + github.com/spf13/jwalterweatherman v1.1.0 // indirect + github.com/spf13/pflag v1.0.5 // indirect + github.com/spf13/viper v1.8.1 // indirect + github.com/subosito/gotenv v1.2.0 // indirect + github.com/tidwall/gjson v1.17.1 // indirect + github.com/tidwall/match v1.1.1 // indirect + github.com/tidwall/pretty v1.2.0 // indirect + github.com/tklauser/go-sysconf v0.3.10 // indirect + github.com/tklauser/numcpus v0.4.0 // indirect + github.com/tmc/grpc-websocket-proxy v0.0.0-20201229170055-e5319fda7802 // indirect + github.com/uber/jaeger-client-go v2.30.0+incompatible // indirect + github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2 // indirect + github.com/yusufpapurcu/wmi v1.2.2 // indirect + go.etcd.io/bbolt v1.3.6 // indirect + go.etcd.io/etcd/api/v3 v3.5.5 // indirect + go.etcd.io/etcd/client/pkg/v3 v3.5.5 // indirect + go.etcd.io/etcd/client/v2 v2.305.5 // indirect + go.etcd.io/etcd/client/v3 v3.5.5 // indirect + go.etcd.io/etcd/pkg/v3 v3.5.5 // indirect + go.etcd.io/etcd/raft/v3 v3.5.5 // indirect + go.etcd.io/etcd/server/v3 v3.5.5 // indirect + go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.38.0 // indirect + go.opentelemetry.io/otel v1.13.0 // indirect + go.opentelemetry.io/otel/exporters/otlp/internal/retry v1.13.0 // indirect + go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.13.0 // indirect + go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.13.0 // indirect + go.opentelemetry.io/otel/metric v0.35.0 // indirect + go.opentelemetry.io/otel/sdk v1.13.0 // indirect + go.opentelemetry.io/otel/trace v1.13.0 // indirect + go.opentelemetry.io/proto/otlp v0.19.0 // indirect + go.uber.org/atomic v1.10.0 // indirect + go.uber.org/automaxprocs v1.5.2 // indirect + go.uber.org/multierr v1.10.0 // indirect + golang.org/x/crypto v0.22.0 // indirect + golang.org/x/exp v0.0.0-20230224173230-c95f2b4c22f2 // indirect + golang.org/x/net v0.24.0 // indirect + golang.org/x/sync v0.6.0 // indirect + golang.org/x/sys v0.20.0 // indirect + golang.org/x/text v0.15.0 // indirect + golang.org/x/time v0.3.0 // indirect + google.golang.org/genproto v0.0.0-20230526161137-0005af68ea54 // indirect + google.golang.org/genproto/googleapis/api v0.0.0-20240318140521-94a12d6c2237 // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20240318140521-94a12d6c2237 // indirect + google.golang.org/protobuf v1.33.0 // indirect + gopkg.in/inf.v0 v0.9.1 // indirect + gopkg.in/ini.v1 v1.62.0 // indirect + gopkg.in/natefinch/lumberjack.v2 v2.0.0 // indirect + gopkg.in/yaml.v2 v2.4.0 // indirect + gopkg.in/yaml.v3 v3.0.1 // indirect + k8s.io/apimachinery v0.28.6 // indirect + sigs.k8s.io/yaml v1.3.0 // indirect +) diff --git a/tests/go_client/go.sum b/tests/go_client/go.sum new file mode 100644 index 0000000000000..b461c4e1c3ee7 --- /dev/null +++ b/tests/go_client/go.sum @@ -0,0 +1,1113 @@ +cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= +cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= +cloud.google.com/go v0.38.0/go.mod h1:990N+gfupTy94rShfmMCWGDn0LpTmnzTp2qbd1dvSRU= +cloud.google.com/go v0.44.1/go.mod h1:iSa0KzasP4Uvy3f1mN/7PiObzGgflwredwwASm/v6AU= +cloud.google.com/go v0.44.2/go.mod h1:60680Gw3Yr4ikxnPRS/oxxkBccT6SA1yMk63TGekxKY= +cloud.google.com/go v0.45.1/go.mod h1:RpBamKRgapWJb87xiFSdk4g1CME7QZg3uwTez+TSTjc= +cloud.google.com/go v0.46.3/go.mod h1:a6bKKbmY7er1mI7TEI4lsAkts/mkhTSZK8w33B4RAg0= +cloud.google.com/go v0.50.0/go.mod h1:r9sluTvynVuxRIOHXQEHMFffphuXHOMZMycpNR5e6To= +cloud.google.com/go v0.52.0/go.mod h1:pXajvRH/6o3+F9jDHZWQ5PbGhn+o8w9qiu/CffaVdO4= +cloud.google.com/go v0.53.0/go.mod h1:fp/UouUEsRkN6ryDKNW/Upv/JBKnv6WDthjR6+vze6M= +cloud.google.com/go v0.54.0/go.mod h1:1rq2OEkV3YMf6n/9ZvGWI3GWw0VoqH/1x2nd8Is/bPc= +cloud.google.com/go v0.56.0/go.mod h1:jr7tqZxxKOVYizybht9+26Z/gUq7tiRzu+ACVAMbKVk= +cloud.google.com/go v0.57.0/go.mod h1:oXiQ6Rzq3RAkkY7N6t3TcE6jE+CIBBbA36lwQ1JyzZs= +cloud.google.com/go v0.62.0/go.mod h1:jmCYTdRCQuc1PHIIJ/maLInMho30T/Y0M4hTdTShOYc= +cloud.google.com/go v0.65.0/go.mod h1:O5N8zS7uWy9vkA9vayVHs65eM1ubvY4h553ofrNHObY= +cloud.google.com/go v0.72.0/go.mod h1:M+5Vjvlc2wnp6tjzE102Dw08nGShTscUx2nZMufOKPI= +cloud.google.com/go v0.74.0/go.mod h1:VV1xSbzvo+9QJOxLDaJfTjx5e+MePCpCWwvftOeQmWk= +cloud.google.com/go v0.78.0/go.mod h1:QjdrLG0uq+YwhjoVOLsS1t7TW8fs36kLs4XO5R5ECHg= +cloud.google.com/go v0.79.0/go.mod h1:3bzgcEeQlzbuEAYu4mrWhKqWjmpprinYgKJLgKHnbb8= +cloud.google.com/go v0.81.0/go.mod h1:mk/AM35KwGk/Nm2YSeZbxXdrNK3KZOYHmLkOqC2V6E0= +cloud.google.com/go v0.110.0 h1:Zc8gqp3+a9/Eyph2KDmcGaPtbKRIoqq4YTlL4NMD0Ys= +cloud.google.com/go/bigquery v1.0.1/go.mod h1:i/xbL2UlR5RvWAURpBYZTtm/cXjCha9lbfbpx4poX+o= +cloud.google.com/go/bigquery v1.3.0/go.mod h1:PjpwJnslEMmckchkHFfq+HTD2DmtT67aNFKH1/VBDHE= +cloud.google.com/go/bigquery v1.4.0/go.mod h1:S8dzgnTigyfTmLBfrtrhyYhwRxG72rYxvftPBK2Dvzc= +cloud.google.com/go/bigquery v1.5.0/go.mod h1:snEHRnqQbz117VIFhE8bmtwIDY80NLUZUMb4Nv6dBIg= +cloud.google.com/go/bigquery v1.7.0/go.mod h1://okPTzCYNXSlb24MZs83e2Do+h+VXtc4gLoIoXIAPc= +cloud.google.com/go/bigquery v1.8.0/go.mod h1:J5hqkt3O0uAFnINi6JXValWIb1v0goeZM77hZzJN/fQ= +cloud.google.com/go/compute v1.25.1 h1:ZRpHJedLtTpKgr3RV1Fx23NuaAEN1Zfx9hw1u4aJdjU= +cloud.google.com/go/compute/metadata v0.2.3 h1:mg4jlk7mCAj6xXp9UJ4fjI9VUI5rubuGBW5aJ7UnBMY= +cloud.google.com/go/datastore v1.0.0/go.mod h1:LXYbyblFSglQ5pkeyhO+Qmw7ukd3C+pD7TKLgZqpHYE= +cloud.google.com/go/datastore v1.1.0/go.mod h1:umbIZjpQpHh4hmRpGhH4tLFup+FVzqBi1b3c64qFpCk= +cloud.google.com/go/firestore v1.1.0/go.mod h1:ulACoGHTpvq5r8rxGJ4ddJZBZqakUQqClKRT5SZwBmk= +cloud.google.com/go/pubsub v1.0.1/go.mod h1:R0Gpsv3s54REJCy4fxDixWD93lHJMoZTyQ2kNxGRt3I= +cloud.google.com/go/pubsub v1.1.0/go.mod h1:EwwdRX2sKPjnvnqCa270oGRyludottCI76h+R3AArQw= +cloud.google.com/go/pubsub v1.2.0/go.mod h1:jhfEVHT8odbXTkndysNHCcx0awwzvfOlguIAii9o8iA= +cloud.google.com/go/pubsub v1.3.1/go.mod h1:i+ucay31+CNRpDW4Lu78I4xXG+O1r/MAHgjpRVR+TSU= +cloud.google.com/go/storage v1.0.0/go.mod h1:IhtSnM/ZTZV8YYJWCY8RULGVqBDmpoyjwiyrjsg+URw= +cloud.google.com/go/storage v1.5.0/go.mod h1:tpKbwo567HUNpVclU5sGELwQWBDZ8gh0ZeosJ0Rtdos= +cloud.google.com/go/storage v1.6.0/go.mod h1:N7U0C8pVQ/+NIKOBQyamJIeKQKkZ+mxpohlUTyfDhBk= +cloud.google.com/go/storage v1.8.0/go.mod h1:Wv1Oy7z6Yz3DshWRJFhqM/UCfaWIRTdp0RXyy7KQOVs= +cloud.google.com/go/storage v1.10.0/go.mod h1:FLPqc6j+Ki4BU591ie1oL6qBQGu2Bl/tZ9ullr3+Kg0= +dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU= +github.com/AndreasBriese/bbloom v0.0.0-20190306092124-e2d15f34fcf9/go.mod h1:bOvUY6CB00SOBii9/FifXqc0awNKxLFCL/+pkDPuyl8= +github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= +github.com/BurntSushi/toml v1.2.1 h1:9F2/+DoOYIOksmaJFPw1tGFy1eDnIJXg+UHjuD8lTak= +github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo= +github.com/CloudyKit/fastprinter v0.0.0-20200109182630-33d98a066a53/go.mod h1:+3IMCy2vIlbG1XG/0ggNQv0SvxCAIpPM5b1nCz56Xno= +github.com/CloudyKit/jet/v3 v3.0.0/go.mod h1:HKQPgSJmdK8hdoAbKUUWajkHyHo4RaU5rMdUywE7VMo= +github.com/Joker/hpp v1.0.0/go.mod h1:8x5n+M1Hp5hC0g8okX3sR3vFQwynaX/UgSOM9MeBKzY= +github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAElWljhcU= +github.com/Shopify/goreferrer v0.0.0-20181106222321-ec9c9a553398/go.mod h1:a1uqRtAwp2Xwc6WNPJEufxJ7fx3npB4UV/JOLmbu5I0= +github.com/ajg/form v1.5.1/go.mod h1:uL1WgH+h2mgNtvBq0339dVnzXdBETtL2LeUXaIv25UY= +github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= +github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= +github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= +github.com/alecthomas/units v0.0.0-20190717042225-c3de453c63f4/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= +github.com/alecthomas/units v0.0.0-20190924025748-f65c72e2690d/go.mod h1:rBZYJk541a8SKzHPHnH3zbiI+7dagKZ0cgpgrD7Fyho= +github.com/antihax/optional v1.0.0/go.mod h1:uupD/76wgC+ih3iEmQUL+0Ugr19nfwCT1kdvxnR2qWY= +github.com/armon/circbuf v0.0.0-20150827004946-bbbad097214e/go.mod h1:3U/XgcO3hCbHZ8TKRvWD2dDTCfh9M9ya+I9JpbB7O8o= +github.com/armon/consul-api v0.0.0-20180202201655-eb2c6b5be1b6/go.mod h1:grANhF5doyWs3UAsr3K4I6qtAmlQcZDesFNEHPZAzj8= +github.com/armon/go-metrics v0.0.0-20180917152333-f0300d1749da/go.mod h1:Q73ZrmVTwzkszR9V5SSuryQ31EELlFMUz1kKyl939pY= +github.com/armon/go-radix v0.0.0-20180808171621-7fddfc383310/go.mod h1:ufUuZ+zHj4x4TnLV4JWEpy2hxWSpsRywHrMgIH9cCH8= +github.com/aymerick/raymond v2.0.3-0.20180322193309-b565731e1464+incompatible/go.mod h1:osfaiScAUVup+UC9Nfq76eWqDhXlp+4UYaA8uhTBO6g= +github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q= +github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8= +github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= +github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= +github.com/bgentry/speakeasy v0.1.0/go.mod h1:+zsyZBPWlz7T6j88CTgSN5bM796AkVf0kBD4zp0CCIs= +github.com/bketelsen/crypt v0.0.3-0.20200106085610-5cbc8cc4026c/go.mod h1:MKsuJmJgSg28kpZDP6UIiPt0e0Oz0kqKNGyRaWEPv84= +github.com/bketelsen/crypt v0.0.4/go.mod h1:aI6NrJ0pMGgvZKL1iVgXLnfIFJtfV+bKCoqOes/6LfM= +github.com/blang/semver/v4 v4.0.0 h1:1PFHFE6yCCTv8C1TeyNNarDzntLi7wMI5i/pzqYIsAM= +github.com/blang/semver/v4 v4.0.0/go.mod h1:IbckMUScFkM3pff0VJDNKRiT6TG/YpiHIM2yvyW5YoQ= +github.com/cenkalti/backoff/v4 v4.1.1/go.mod h1:scbssz8iZGpm3xbr14ovlUdkxfGXNInqkPWOWmG2CLw= +github.com/cenkalti/backoff/v4 v4.2.0 h1:HN5dHm3WBOgndBH6E8V0q2jIYIR3s9yglV8k/+MN3u4= +github.com/cenkalti/backoff/v4 v4.2.0/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE= +github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= +github.com/certifi/gocertifi v0.0.0-20191021191039-0944d244cd40/go.mod h1:sGbDF6GwGcLpkNXPUTkMRoywsNa/ol15pxFe6ERfguA= +github.com/certifi/gocertifi v0.0.0-20200922220541-2c3bb06c6054/go.mod h1:sGbDF6GwGcLpkNXPUTkMRoywsNa/ol15pxFe6ERfguA= +github.com/cespare/xxhash v1.1.0/go.mod h1:XrSqR1VqqWfGrhpAt58auRo0WTKS1nRRg3ghfAqPWnc= +github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= +github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44= +github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= +github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI= +github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI= +github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU= +github.com/cilium/ebpf v0.11.0 h1:V8gS/bTCCjX9uUnkUFUpPsksM8n1lXBAvHcpiFk1X2Y= +github.com/cilium/ebpf v0.11.0/go.mod h1:WE7CZAnqOL2RouJ4f1uyNhqr2P4CCvXFIqdRDUgWsVs= +github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= +github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc= +github.com/cncf/udpa/go v0.0.0-20200629203442-efcf912fb354/go.mod h1:WmhPx2Nbnhtbo57+VJT5O0JRkEi1Wbu0z5j0R8u5Hbk= +github.com/cncf/udpa/go v0.0.0-20201120205902-5459f2c99403/go.mod h1:WmhPx2Nbnhtbo57+VJT5O0JRkEi1Wbu0z5j0R8u5Hbk= +github.com/cncf/udpa/go v0.0.0-20210930031921-04548b0d99d4/go.mod h1:6pvJx4me5XPnfI9Z40ddWsdw2W/uZgQLFXToKeRcDiI= +github.com/cncf/xds/go v0.0.0-20210312221358-fbca930ec8ed/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= +github.com/cncf/xds/go v0.0.0-20210805033703-aa0b78936158/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= +github.com/cncf/xds/go v0.0.0-20210922020428-25de7278fc84/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= +github.com/cncf/xds/go v0.0.0-20211011173535-cb28da3451f1/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= +github.com/cncf/xds/go v0.0.0-20240318125728-8a4994d93e50 h1:DBmgJDC9dTfkVyGgipamEh2BpGYxScCH1TOF1LL1cXc= +github.com/cockroachdb/datadriven v0.0.0-20200714090401-bf6692d28da5/go.mod h1:h6jFvWxBdQXxjopDMZyH2UVceIRfR84bdzbkoKrsWNo= +github.com/cockroachdb/datadriven v1.0.2 h1:H9MtNqVoVhvd9nCBwOyDjUEdZCREqbIdCJD93PBm/jA= +github.com/cockroachdb/datadriven v1.0.2/go.mod h1:a9RdTaap04u637JoCzcUoIcDmvwSUtcUFtT/C3kJlTU= +github.com/cockroachdb/errors v1.2.4/go.mod h1:rQD95gz6FARkaKkQXUksEje/d9a6wBJoCr5oaCLELYA= +github.com/cockroachdb/errors v1.9.1 h1:yFVvsI0VxmRShfawbt/laCIDy/mtTqqnvoNgiy5bEV8= +github.com/cockroachdb/errors v1.9.1/go.mod h1:2sxOtL2WIc096WSZqZ5h8fa17rdDq9HZOZLBCor4mBk= +github.com/cockroachdb/logtags v0.0.0-20190617123548-eb05cc24525f/go.mod h1:i/u985jwjWRlyHXQbwatDASoW0RMlZ/3i9yJHE2xLkI= +github.com/cockroachdb/logtags v0.0.0-20211118104740-dabe8e521a4f h1:6jduT9Hfc0njg5jJ1DdKCFPdMBrp/mdZfCpa5h+WM74= +github.com/cockroachdb/logtags v0.0.0-20211118104740-dabe8e521a4f/go.mod h1:Vz9DsVWQQhf3vs21MhPMZpMGSht7O/2vFW2xusFUVOs= +github.com/cockroachdb/redact v1.1.3 h1:AKZds10rFSIj7qADf0g46UixK8NNLwWTNdCIGS5wfSQ= +github.com/cockroachdb/redact v1.1.3/go.mod h1:BVNblN9mBWFyMyqK1k3AAiSxhvhfK2oOZZ2lK+dpvRg= +github.com/codegangsta/inject v0.0.0-20150114235600-33e0aa1cb7c0/go.mod h1:4Zcjuz89kmFXt9morQgcfYZAYZ5n8WHjt81YYWIwtTM= +github.com/containerd/cgroups/v3 v3.0.3 h1:S5ByHZ/h9PMe5IOQoN7E+nMc2UcLEM/V48DGDJ9kip0= +github.com/containerd/cgroups/v3 v3.0.3/go.mod h1:8HBe7V3aWGLFPd/k03swSIsGjZhHI2WzJmticMgVuz0= +github.com/coreos/bbolt v1.3.2/go.mod h1:iRUV2dpdMOn7Bo10OQBFzIJO9kkE559Wcmn+qkEiiKk= +github.com/coreos/etcd v3.3.10+incompatible/go.mod h1:uF7uidLiAD3TWHmW31ZFd/JWoc32PjwdhPthX9715RE= +github.com/coreos/etcd v3.3.13+incompatible/go.mod h1:uF7uidLiAD3TWHmW31ZFd/JWoc32PjwdhPthX9715RE= +github.com/coreos/go-etcd v2.0.0+incompatible/go.mod h1:Jez6KQU2B/sWsbdaef3ED8NzMklzPG4d5KIOhIy30Tk= +github.com/coreos/go-semver v0.2.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3EedlOD2RNk= +github.com/coreos/go-semver v0.3.0 h1:wkHLiw0WNATZnSG7epLsujiMCgPAc9xhjJ4tgnAxmfM= +github.com/coreos/go-semver v0.3.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3EedlOD2RNk= +github.com/coreos/go-systemd v0.0.0-20190321100706-95778dfbb74e/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4= +github.com/coreos/go-systemd/v22 v22.3.2 h1:D9/bQk5vlXQFZ6Kwuu6zaiXJ9oTPe68++AzAJc1DzSI= +github.com/coreos/go-systemd/v22 v22.3.2/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc= +github.com/coreos/pkg v0.0.0-20180928190104-399ea9e2e55f/go.mod h1:E3G3o1h8I7cfcXa63jLwjI0eiQQMgzzUDFVpN/nH/eA= +github.com/cpuguy83/go-md2man v1.0.10/go.mod h1:SmD6nW6nTyfqj6ABTjUi3V3JVMnlJmwcJI5acqYI6dE= +github.com/cpuguy83/go-md2man/v2 v2.0.0/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU= +github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= +github.com/creack/pty v1.1.11/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/dgraph-io/badger v1.6.0/go.mod h1:zwt7syl517jmP8s94KqSxTlM6IMsdhYy6psNgSztDR4= +github.com/dgrijalva/jwt-go v3.2.0+incompatible/go.mod h1:E3ru+11k8xSBh+hMPgOLZmtrrCbhqsmaPHjLKYnJCaQ= +github.com/dgryski/go-farm v0.0.0-20190423205320-6a90982ecee2/go.mod h1:SqUrOPUnsFjfmXRMNPybcSiG0BgUW2AuFH8PAnS2iTw= +github.com/dgryski/go-sip13 v0.0.0-20181026042036-e10d5fee7954/go.mod h1:vAd38F8PWV+bWy6jNmig1y/TA+kYO4g3RSRF0IAv0no= +github.com/docker/go-units v0.4.0 h1:3uh0PgVws3nIA0Q+MwDC8yjEPf9zjRfZZWXZYDct3Tw= +github.com/docker/go-units v0.4.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk= +github.com/dustin/go-humanize v1.0.0 h1:VSnTsYCnlFHaM2/igO1h6X3HA71jcobQuxemgkq4zYo= +github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk= +github.com/eknkc/amber v0.0.0-20171010120322-cdade1c07385/go.mod h1:0vRUJqYpeSZifjYj7uP3BG/gKcuzL9xWVV/Y+cK33KM= +github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= +github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= +github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98= +github.com/envoyproxy/go-control-plane v0.9.7/go.mod h1:cwu0lG7PUMfa9snN8LXBig5ynNVH9qI8YYLbd1fK2po= +github.com/envoyproxy/go-control-plane v0.9.9-0.20201210154907-fd9021fe5dad/go.mod h1:cXg6YxExXjJnVBQHBLXeUAgxn2UodCpnH306RInaBQk= +github.com/envoyproxy/go-control-plane v0.9.9-0.20210217033140-668b12f5399d/go.mod h1:cXg6YxExXjJnVBQHBLXeUAgxn2UodCpnH306RInaBQk= +github.com/envoyproxy/go-control-plane v0.9.9-0.20210512163311-63b5d3c536b0/go.mod h1:hliV/p42l8fGbc6Y9bQ70uLwIvmJyVE5k4iMKlh8wCQ= +github.com/envoyproxy/go-control-plane v0.9.10-0.20210907150352-cf90f659a021/go.mod h1:AFq3mo9L8Lqqiid3OhADV3RfLJnjiw63cSpi+fDTRC0= +github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= +github.com/envoyproxy/protoc-gen-validate v1.0.4 h1:gVPz/FMfvh57HdSJQyvBtF00j8JU4zdyUgIUNhlgg0A= +github.com/etcd-io/bbolt v1.3.3/go.mod h1:ZF2nL25h33cCyBtcyWeZ2/I3HQOfTP+0PIEvHjkjCrw= +github.com/fasthttp-contrib/websocket v0.0.0-20160511215533-1f3b11f56072/go.mod h1:duJ4Jxv5lDcvg4QuQr0oowTf7dz4/CR8NtyCooz9HL8= +github.com/fatih/color v1.7.0/go.mod h1:Zm6kSWBoL9eyXnKyktHP6abPY2pDugNf5KwzbycvMj4= +github.com/fatih/structs v1.1.0/go.mod h1:9NiDSp5zOcgEDl+j00MP/WkGVPOlPRLejGD8Ga6PJ7M= +github.com/form3tech-oss/jwt-go v3.2.3+incompatible h1:7ZaBxOI7TMoYBfyA3cQHErNNyAWIKUMIwqxEtgHOs5c= +github.com/form3tech-oss/jwt-go v3.2.3+incompatible/go.mod h1:pbq4aXjuKjdthFRnoDwaVPLA+WlJuPGy+QneDUgJi2k= +github.com/frankban/quicktest v1.14.5 h1:dfYrrRyLtiqT9GyKXgdh+k4inNeTvmGbuSgZ3lx3GhA= +github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= +github.com/fsnotify/fsnotify v1.4.9 h1:hsms1Qyu0jgnwNXIxa+/V/PDsU6CfLf6CNO8H7IWoS4= +github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ= +github.com/gavv/httpexpect v2.0.0+incompatible/go.mod h1:x+9tiU1YnrOvnB725RkpoLv1M62hOWzwo5OXotisrKc= +github.com/getsentry/raven-go v0.2.0/go.mod h1:KungGk8q33+aIAZUIVWZDr2OfAEBsO49PX4NzFV5kcQ= +github.com/getsentry/sentry-go v0.12.0 h1:era7g0re5iY13bHSdN/xMkyV+5zZppjRVQhZrXCaEIk= +github.com/getsentry/sentry-go v0.12.0/go.mod h1:NSap0JBYWzHND8oMbyi0+XZhUalc1TBdRL1M71JZW2c= +github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04= +github.com/gin-contrib/sse v0.0.0-20190301062529-5545eab6dad3/go.mod h1:VJ0WA2NBN22VlZ2dKZQPAPnyWw5XTlK1KymzLKsr59s= +github.com/gin-gonic/gin v1.4.0/go.mod h1:OW2EZn3DO8Ln9oIKOvM++LBO+5UPHJJDH72/q/3rZdM= +github.com/go-check/check v0.0.0-20180628173108-788fd7840127/go.mod h1:9ES+weclKsC9YodN5RgxqK/VD9HM9JsCSh7rNhMZE98= +github.com/go-errors/errors v1.0.1 h1:LUHzmkK3GUKUrL/1gfBUxAHzcev3apQlezX/+O7ma6w= +github.com/go-errors/errors v1.0.1/go.mod h1:f4zRHt4oKfwPJE5k8C9vpYG+aDHdBFUsgrm6/TyX73Q= +github.com/go-gl/glfw v0.0.0-20190409004039-e6da0acd62b1/go.mod h1:vR7hzQXu2zJy9AVAgeJqvqgH9Q5CA+iKCZ2gyEVpxRU= +github.com/go-gl/glfw/v3.3/glfw v0.0.0-20191125211704-12ad95a8df72/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8= +github.com/go-gl/glfw/v3.3/glfw v0.0.0-20200222043503-6f7a984d4dc4/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8= +github.com/go-kit/kit v0.8.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as= +github.com/go-kit/kit v0.9.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as= +github.com/go-kit/log v0.1.0/go.mod h1:zbhenjAZHb184qTLMA9ZjW7ThYL0H2mk7Q6pNt4vbaY= +github.com/go-logfmt/logfmt v0.3.0/go.mod h1:Qt1PoO58o5twSAckw1HlFXLmHsOX5/0LbT9GBnD5lWE= +github.com/go-logfmt/logfmt v0.4.0/go.mod h1:3RMwSq7FuexP4Kalkev3ejPJsZTpXXBr9+V4qmtdjCk= +github.com/go-logfmt/logfmt v0.5.0/go.mod h1:wCYkCAKZfumFQihp8CzCvQ3paCTfi41vtzG1KdI/P7A= +github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= +github.com/go-logr/logr v1.3.0 h1:2y3SDp0ZXuc6/cjLSZ+Q3ir+QB9T/iG5yYRXqsagWSY= +github.com/go-logr/logr v1.3.0/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= +github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= +github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= +github.com/go-martini/martini v0.0.0-20170121215854-22fa46961aab/go.mod h1:/P9AEU963A2AYjv4d1V5eVL1CQbEJq6aCNHDDjibzu8= +github.com/go-ole/go-ole v1.2.6 h1:/Fpf6oFPoeFik9ty7siob0G6Ke8QvQEuVcuChpwXzpY= +github.com/go-ole/go-ole v1.2.6/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0= +github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY= +github.com/gobwas/httphead v0.0.0-20180130184737-2c6c146eadee/go.mod h1:L0fX3K22YWvt/FAX9NnzrNzcI4wNYi9Yku4O0LKYflo= +github.com/gobwas/pool v0.2.0/go.mod h1:q8bcK0KcYlCgd9e7WYLm9LpyS+YeLd8JVDW6WezmKEw= +github.com/gobwas/ws v1.0.2/go.mod h1:szmBTxLgaFppYjEmNtny/v3w89xOydFnnZMcgRRu/EM= +github.com/godbus/dbus/v5 v5.0.4 h1:9349emZab16e7zQvpmsbtjc18ykshndd8y2PG3sgJbA= +github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= +github.com/gogo/googleapis v0.0.0-20180223154316-0cd9801be74a/go.mod h1:gf4bu3Q80BeJ6H1S1vYPm8/ELATdvryBaNFGgqEef3s= +github.com/gogo/googleapis v1.4.1 h1:1Yx4Myt7BxzvUr5ldGSbwYiZG6t9wGBZ+8/fX3Wvtq0= +github.com/gogo/googleapis v1.4.1/go.mod h1:2lpHqI5OcWCtVElxXnPt+s8oJvMpySlOyM6xDCrzib4= +github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ= +github.com/gogo/protobuf v1.2.0/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ= +github.com/gogo/protobuf v1.2.1/go.mod h1:hp+jE20tsWTFYpLwKvXlhS1hjn+gTNwPg2I6zVXpSg4= +github.com/gogo/protobuf v1.3.1/go.mod h1:SlYgWuQ5SjCEi6WLHjHCa1yvBfUnHcTbrrZtXPKa29o= +github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= +github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= +github.com/gogo/status v1.1.0 h1:+eIkrewn5q6b30y+g/BJINVVdi2xH7je5MPJ3ZPK3JA= +github.com/gogo/status v1.1.0/go.mod h1:BFv9nrluPLmrS0EmGVvLaPNmRosr9KapBYd5/hpY1WM= +github.com/golang-jwt/jwt v3.2.2+incompatible/go.mod h1:8pz2t5EyA70fFQQSrl6XZXzqecmYZeUEB8OUGHkxJ+I= +github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= +github.com/golang/glog v1.0.0/go.mod h1:EWib/APOK0SL3dFbYqvxE3UYd8E6s1ouQ7iEp/0LWV4= +github.com/golang/glog v1.2.0 h1:uCdmnmatrKCgMBlM4rMuJZWOkPDqdbZPnrMXDY4gI68= +github.com/golang/groupcache v0.0.0-20190129154638-5b532d6fd5ef/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= +github.com/golang/groupcache v0.0.0-20190702054246-869f871628b6/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= +github.com/golang/groupcache v0.0.0-20191227052852-215e87163ea7/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= +github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= +github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= +github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= +github.com/golang/mock v1.2.0/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= +github.com/golang/mock v1.3.1/go.mod h1:sBzyDLLjw3U8JLTeZvSv8jJB+tU5PVekmnlKIyFUx0Y= +github.com/golang/mock v1.4.0/go.mod h1:UOMv5ysSaYNkG+OFQykRIcU/QvvxJf3p21QfJ2Bt3cw= +github.com/golang/mock v1.4.1/go.mod h1:UOMv5ysSaYNkG+OFQykRIcU/QvvxJf3p21QfJ2Bt3cw= +github.com/golang/mock v1.4.3/go.mod h1:UOMv5ysSaYNkG+OFQykRIcU/QvvxJf3p21QfJ2Bt3cw= +github.com/golang/mock v1.4.4/go.mod h1:l3mdAwkq5BuhzHwde/uurv3sEJeZMXNpwsxVWU71h+4= +github.com/golang/mock v1.5.0/go.mod h1:CWnOUgYIOo4TcNZ0wHX3YZCqsaM1I1Jvs6v3mP3KVu8= +github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= +github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= +github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= +github.com/golang/protobuf v1.3.3/go.mod h1:vzj43D7+SQXF/4pzW/hwtAqwc6iTitCiVSaWz5lYuqw= +github.com/golang/protobuf v1.3.4/go.mod h1:vzj43D7+SQXF/4pzW/hwtAqwc6iTitCiVSaWz5lYuqw= +github.com/golang/protobuf v1.3.5/go.mod h1:6O5/vntMXwX2lRkT1hjjk0nAC1IDOTvTlVgjlRvqsdk= +github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8= +github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA= +github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs= +github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w= +github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0= +github.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QDs8UjoX8= +github.com/golang/protobuf v1.4.2/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= +github.com/golang/protobuf v1.4.3/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= +github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= +github.com/golang/protobuf v1.5.1/go.mod h1:DopwsBzvsk0Fs44TXzsVbJyPhcCPeIwnvohx4u74HPM= +github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= +github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= +github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= +github.com/gomodule/redigo v1.7.1-0.20190724094224-574c33c3df38/go.mod h1:B4C85qUVwatsJoIUNIfCRsp7qO0iAmpGFZ4EELWSbC4= +github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= +github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= +github.com/google/btree v1.0.1/go.mod h1:xXMiIv4Fb/0kKde4SpL7qlzvu5cMJDRkFDxJfI9uaxA= +github.com/google/btree v1.1.2 h1:xf4v41cLI2Z6FxbKm+8Bu+m8ifhj15JuZ9sa0jZCMUU= +github.com/google/btree v1.1.2/go.mod h1:qOPhT0dTNdNzV6Z/lhRX0YXUafgPLFUh+gZMl761Gm4= +github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= +github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= +github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= +github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.4.1/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.1/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.3/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= +github.com/google/go-querystring v1.0.0/go.mod h1:odCYkC5MyYFN7vkCjXpyrEuKhc/BUO6wN/zVPAxq5ck= +github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= +github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0= +github.com/google/martian v2.1.0+incompatible/go.mod h1:9I4somxYTbIHy5NJKHRl3wXiIaQGbYVAs8BPL6v8lEs= +github.com/google/martian/v3 v3.0.0/go.mod h1:y5Zk1BBys9G+gd6Jrk0W3cC1+ELVxBWuIGO+w/tUAp0= +github.com/google/martian/v3 v3.1.0/go.mod h1:y5Zk1BBys9G+gd6Jrk0W3cC1+ELVxBWuIGO+w/tUAp0= +github.com/google/pprof v0.0.0-20181206194817-3ea8567a2e57/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc= +github.com/google/pprof v0.0.0-20190515194954-54271f7e092f/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc= +github.com/google/pprof v0.0.0-20191218002539-d4f498aebedc/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM= +github.com/google/pprof v0.0.0-20200212024743-f11f1df84d12/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM= +github.com/google/pprof v0.0.0-20200229191704-1ebb73c60ed3/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM= +github.com/google/pprof v0.0.0-20200430221834-fc25d7d30c6d/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM= +github.com/google/pprof v0.0.0-20200708004538-1a94d8640e99/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM= +github.com/google/pprof v0.0.0-20201023163331-3e6fc7fc9c4c/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= +github.com/google/pprof v0.0.0-20201203190320-1bf35d6f28c2/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= +github.com/google/pprof v0.0.0-20210122040257-d980be63207e/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= +github.com/google/pprof v0.0.0-20210226084205-cbba55b83ad5/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= +github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI= +github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/googleapis/gax-go/v2 v2.0.4/go.mod h1:0Wqv26UfaUD9n4G6kQubkQ+KchISgw+vpHVxEJEs9eg= +github.com/googleapis/gax-go/v2 v2.0.5/go.mod h1:DWXyrwAJ9X0FpwwEdw+IPEYBICEFu5mhpdKc/us6bOk= +github.com/gopherjs/gopherjs v0.0.0-20181017120253-0766667cb4d1/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY= +github.com/gopherjs/gopherjs v0.0.0-20200217142428-fce0ec30dd00 h1:l5lAOZEym3oK3SQ2HBHWsJUfbNBiTXJDeW2QDxw9AQ0= +github.com/gorilla/websocket v1.4.1/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE= +github.com/gorilla/websocket v1.4.2 h1:+/TMaTYc4QFitKJxsQ7Yye35DkWvkdLcvGKqM+x0Ufc= +github.com/gorilla/websocket v1.4.2/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE= +github.com/grpc-ecosystem/go-grpc-middleware v1.0.0/go.mod h1:FiyG127CGDf3tlThmgyCl78X/SZQqEOJBCDaAfeWzPs= +github.com/grpc-ecosystem/go-grpc-middleware v1.3.0 h1:+9834+KizmvFV7pXQGSXQTsaWhq2GjuNUt0aUU0YBYw= +github.com/grpc-ecosystem/go-grpc-middleware v1.3.0/go.mod h1:z0ButlSOZa5vEBq9m2m2hlwIgKw+rp3sdCBRoJY+30Y= +github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0 h1:Ovs26xHkKqVztRpIrF/92BcuyuQ/YW4NSIpoGtfXNho= +github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0/go.mod h1:8NvIoxWQoOIhqOTXgfV/d3M/q6VIi02HzZEHgUlZvzk= +github.com/grpc-ecosystem/grpc-gateway v1.9.0/go.mod h1:vNeuVxBJEsws4ogUvrchl83t/GYV9WGTSLVdBhOQFDY= +github.com/grpc-ecosystem/grpc-gateway v1.16.0 h1:gmcG1KaJ57LophUzW0Hy8NmPhnMZb4M0+kPpLofRdBo= +github.com/grpc-ecosystem/grpc-gateway v1.16.0/go.mod h1:BDjrQk3hbvj6Nolgz8mAMFbcEtjT1g+wF4CSlocrBnw= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.7.0 h1:BZHcxBETFHIdVyhyEfOvn/RdU/QGdLI4y34qQGjGWO0= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.7.0/go.mod h1:hgWBS7lorOAVIJEQMi4ZsPv9hVvWI6+ch50m39Pf2Ks= +github.com/hashicorp/consul/api v1.1.0/go.mod h1:VmuI/Lkw1nC05EYQWNKwWGbkg+FbDBtguAZLlVdkD9Q= +github.com/hashicorp/consul/sdk v0.1.1/go.mod h1:VKf9jXwCTEY1QZP2MOLRhb5i/I/ssyNV1vwHyQBF0x8= +github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4= +github.com/hashicorp/go-cleanhttp v0.5.1/go.mod h1:JpRdi6/HCYpAwUzNwuwqhbovhLtngrth3wmdIIUrZ80= +github.com/hashicorp/go-immutable-radix v1.0.0/go.mod h1:0y9vanUI8NX6FsYoO3zeMjhV/C5i9g4Q3DwcSNZ4P60= +github.com/hashicorp/go-msgpack v0.5.3/go.mod h1:ahLV/dePpqEmjfWmKiqvPkv/twdG7iPBM1vqhUKIvfM= +github.com/hashicorp/go-multierror v1.0.0/go.mod h1:dHtQlpGsu+cZNNAkkCN/P3hoUDHhCYQXV3UM06sGGrk= +github.com/hashicorp/go-rootcerts v1.0.0/go.mod h1:K6zTfqpRlCUIjkwsN4Z+hiSfzSTQa6eBIzfwKfwNnHU= +github.com/hashicorp/go-sockaddr v1.0.0/go.mod h1:7Xibr9yA9JjQq1JpNB2Vw7kxv8xerXegt+ozgdvDeDU= +github.com/hashicorp/go-syslog v1.0.0/go.mod h1:qPfqrKkXGihmCqbJM2mZgkZGvKG1dFdvsLplgctolz4= +github.com/hashicorp/go-uuid v1.0.0/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro= +github.com/hashicorp/go-uuid v1.0.1/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro= +github.com/hashicorp/go-version v1.2.0/go.mod h1:fltr4n8CU8Ke44wwGCBoEymUuxUHl09ZGVZPK5anwXA= +github.com/hashicorp/go.net v0.0.1/go.mod h1:hjKkEWcCURg++eb33jQU7oqQcI9XDCnUzHA0oac0k90= +github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= +github.com/hashicorp/golang-lru v0.5.1/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= +github.com/hashicorp/hcl v1.0.0 h1:0Anlzjpi4vEasTeNFn2mLJgTSwt0+6sfsiTG8qcWGx4= +github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ= +github.com/hashicorp/logutils v1.0.0/go.mod h1:QIAnNjmIWmVIIkWDTG1z5v++HQmx9WQRO+LraFDTW64= +github.com/hashicorp/mdns v1.0.0/go.mod h1:tL+uN++7HEJ6SQLQ2/p+z2pH24WQKWjBPkE0mNTz8vQ= +github.com/hashicorp/memberlist v0.1.3/go.mod h1:ajVTdAv/9Im8oMAAj5G31PhhMCZJV2pPBoIllUwCN7I= +github.com/hashicorp/serf v0.8.2/go.mod h1:6hOLApaqBFA1NXqRQAsxw9QxuDEvNxSQRwA/JwenrHc= +github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU= +github.com/hydrogen18/memlistener v0.0.0-20200120041712-dcc25e7acd91/go.mod h1:qEIFzExnS6016fRpRfxrExeVn2gbClQA99gQhnIcdhE= +github.com/ianlancetaylor/demangle v0.0.0-20181102032728-5e5cf60278f6/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc= +github.com/ianlancetaylor/demangle v0.0.0-20200824232613-28f6c0f3b639/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc= +github.com/imkira/go-interpol v1.1.0/go.mod h1:z0h2/2T3XF8kyEPpRgJ3kmNv+C43p+I/CoI+jC3w2iA= +github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8= +github.com/iris-contrib/blackfriday v2.0.0+incompatible/go.mod h1:UzZ2bDEoaSGPbkg6SAB4att1aAwTmVIx/5gCVqeyUdI= +github.com/iris-contrib/go.uuid v2.0.0+incompatible/go.mod h1:iz2lgM/1UnEf1kP0L/+fafWORmlnuysV2EMP8MW+qe0= +github.com/iris-contrib/jade v1.1.3/go.mod h1:H/geBymxJhShH5kecoiOCSssPX7QWYH7UaeZTSWddIk= +github.com/iris-contrib/pongo2 v0.0.1/go.mod h1:Ssh+00+3GAZqSQb30AvBRNxBx7rf0GqwkjqxNd0u65g= +github.com/iris-contrib/schema v0.0.1/go.mod h1:urYA3uvUNG1TIIjOSCzHr9/LmbQo8LrOcOqfqxa4hXw= +github.com/jonboulle/clockwork v0.1.0/go.mod h1:Ii8DK3G1RaLaWxj9trq07+26W01tbo22gdxWY5EU2bo= +github.com/jonboulle/clockwork v0.2.2 h1:UOGuzwb1PwsrDAObMuhUnj0p5ULPj8V/xJ7Kx9qUBdQ= +github.com/jonboulle/clockwork v0.2.2/go.mod h1:Pkfl5aHPm1nk2H9h0bjmnJD/BcgbGXUBGnn1kMkgxc8= +github.com/jpillora/backoff v1.0.0/go.mod h1:J/6gKK9jxlEcS3zixgDgUAsiuZ7yrSoa/FX5e0EB2j4= +github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU= +github.com/json-iterator/go v1.1.9/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= +github.com/json-iterator/go v1.1.10/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= +github.com/json-iterator/go v1.1.11/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= +github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= +github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= +github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU= +github.com/jstemmer/go-junit-report v0.9.1/go.mod h1:Brl9GWCQeLvo8nXZwPNNblvFj/XSXhF0NWZEnDohbsk= +github.com/jtolds/gls v4.20.0+incompatible h1:xdiiI2gbIgH/gLH7ADydsJ1uDOEzR8yvV7C0MuV77Wo= +github.com/jtolds/gls v4.20.0+incompatible/go.mod h1:QJZ7F/aHp+rZTRtaJ1ow/lLfFfVYBRgL+9YlvaHOwJU= +github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w= +github.com/julienschmidt/httprouter v1.3.0/go.mod h1:JR6WtHb+2LUe8TCKY3cZOxFyyO8IZAc4RVcycCCAKdM= +github.com/k0kubun/colorstring v0.0.0-20150214042306-9440f1994b88/go.mod h1:3w7q1U84EfirKl04SVQ/s7nPm1ZPhiXd34z40TNz36k= +github.com/kataras/golog v0.0.10/go.mod h1:yJ8YKCmyL+nWjERB90Qwn+bdyBZsaQwU3bTVFgkFIp8= +github.com/kataras/iris/v12 v12.1.8/go.mod h1:LMYy4VlP67TQ3Zgriz8RE2h2kMZV2SgMYbq3UhfoFmE= +github.com/kataras/neffos v0.0.14/go.mod h1:8lqADm8PnbeFfL7CLXh1WHw53dG27MC3pgi2R1rmoTE= +github.com/kataras/pio v0.0.2/go.mod h1:hAoW0t9UmXi4R5Oyq5Z4irTbaTsOemSrDGUtaTl7Dro= +github.com/kataras/sitemap v0.0.5/go.mod h1:KY2eugMKiPwsJgx7+U103YZehfvNGOXURubcGyk0Bz8= +github.com/kisielk/errcheck v1.1.0/go.mod h1:EZBBE59ingxPouuu3KfxchcWSUPOHkagtvWXihfKN4Q= +github.com/kisielk/errcheck v1.2.0/go.mod h1:/BMXB+zMLi60iA8Vv6Ksmxu/1UDYcXs4uQLJ+jE2L00= +github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= +github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= +github.com/klauspost/compress v1.8.2/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A= +github.com/klauspost/compress v1.9.7/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A= +github.com/klauspost/cpuid v1.2.1/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek= +github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= +github.com/konsorten/go-windows-terminal-sequences v1.0.3/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= +github.com/kr/fs v0.1.0/go.mod h1:FFnZGqtBN9Gxj7eW1uZ42v5BccTP0vu6NEaFoC2HwRg= +github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc= +github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= +github.com/kr/pretty v0.3.0/go.mod h1:640gp4NfQd8pI5XOwp5fnNeVWj67G7CFk/SaSQn7NBk= +github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= +github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= +github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= +github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= +github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= +github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= +github.com/labstack/echo/v4 v4.5.0/go.mod h1:czIriw4a0C1dFun+ObrXp7ok03xON0N1awStJ6ArI7Y= +github.com/labstack/gommon v0.3.0/go.mod h1:MULnywXg0yavhxWKc+lOruYdAhDwPK9wf0OL7NoOu+k= +github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0 h1:6E+4a0GO5zZEnZ81pIr0yLvtUWk2if982qA3F3QD6H4= +github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0/go.mod h1:zJYVVT2jmtg6P3p1VtQj7WsuWi/y4VnjVBn7F8KPB3I= +github.com/magiconair/properties v1.8.0/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czPbwD3XqdrwzmxQ= +github.com/magiconair/properties v1.8.1/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czPbwD3XqdrwzmxQ= +github.com/magiconair/properties v1.8.5 h1:b6kJs+EmPFMYGkow9GiUyCyOvIwYetYJ3fSaWak/Gls= +github.com/magiconair/properties v1.8.5/go.mod h1:y3VJvCyxH9uVvJTWEGAELF3aiYNyPKd5NZ3oSwXrF60= +github.com/mattn/go-colorable v0.0.9/go.mod h1:9vuHe8Xs5qXnSaW/c/ABM9alt+Vo+STaOChaDxuIBZU= +github.com/mattn/go-colorable v0.1.2/go.mod h1:U0ppj6V5qS13XJ6of8GYAs25YV2eR4EVcfRqFIhoBtE= +github.com/mattn/go-colorable v0.1.8/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc= +github.com/mattn/go-colorable v0.1.11/go.mod h1:u5H1YNBxpqRaxsYJYSkiCWKzEfiAb1Gb520KVy5xxl4= +github.com/mattn/go-isatty v0.0.3/go.mod h1:M+lRXTBqGeGNdLjl/ufCoiOlB5xdOkqRJdNxMWT7Zi4= +github.com/mattn/go-isatty v0.0.7/go.mod h1:Iq45c/XA43vh69/j3iqttzPXn0bhXyGjM0Hdxcsrc5s= +github.com/mattn/go-isatty v0.0.8/go.mod h1:Iq45c/XA43vh69/j3iqttzPXn0bhXyGjM0Hdxcsrc5s= +github.com/mattn/go-isatty v0.0.9/go.mod h1:YNRxwqDuOph6SZLI9vUUz6OYw3QyUt7WiY2yME+cCiQ= +github.com/mattn/go-isatty v0.0.12/go.mod h1:cbi8OIDigv2wuxKPP5vlRcQ1OAZbq2CE4Kysco4FUpU= +github.com/mattn/go-isatty v0.0.14/go.mod h1:7GGIvUiUoEMVVmxf/4nioHXj79iQHKdU27kJ6hsGG94= +github.com/mattn/goveralls v0.0.2/go.mod h1:8d1ZMHsd7fW6IRPKQh46F2WRpyib5/X4FOpevwGNQEw= +github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0= +github.com/matttproud/golang_protobuf_extensions v1.0.4 h1:mmDVorXM7PCGKw94cs5zkfA9PSy5pEvNWRP0ET0TIVo= +github.com/matttproud/golang_protobuf_extensions v1.0.4/go.mod h1:BSXmuO+STAnVfrANrmjBb36TMTDstsz7MSK+HVaYKv4= +github.com/mediocregopher/radix/v3 v3.4.2/go.mod h1:8FL3F6UQRXHXIBSPUs5h0RybMF8i4n7wVopoX3x7Bv8= +github.com/microcosm-cc/bluemonday v1.0.2/go.mod h1:iVP4YcDBq+n/5fb23BhYFvIMq/leAFZyRl6bYmGDlGc= +github.com/miekg/dns v1.0.14/go.mod h1:W1PPwlIAgtquWBMBEV9nkV9Cazfe8ScdGz/Lj7v3Nrg= +github.com/milvus-io/milvus-proto/go-api/v2 v2.3.4-0.20240430035521-259ae1d10016 h1:8WV4maXLeGEyJCCYIc1DmZ18H+VFAjMrwXJg5iI2nX4= +github.com/milvus-io/milvus-proto/go-api/v2 v2.3.4-0.20240430035521-259ae1d10016/go.mod h1:1OIl0v5PQeNxIJhCvY+K55CBUOYDZevw9g9380u1Wek= +github.com/milvus-io/milvus/pkg v0.0.2-0.20240317152703-17b4938985f3 h1:ZBpRWhBa7FTFxW4YYVv9AUESoW1Xyb3KNXTzTqfkZmw= +github.com/milvus-io/milvus/pkg v0.0.2-0.20240317152703-17b4938985f3/go.mod h1:jQ2BUZny1COsgv1Qbcv8dmbppW+V9J/c4YQZNb3EOm8= +github.com/mitchellh/cli v1.0.0/go.mod h1:hNIlj7HEI86fIcpObd7a0FcrxTWetlwJDGcceTlRvqc= +github.com/mitchellh/go-homedir v1.0.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0= +github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0= +github.com/mitchellh/go-testing-interface v1.0.0/go.mod h1:kRemZodwjscx+RGhAo8eIhFbs2+BFgRtFPeD/KE+zxI= +github.com/mitchellh/gox v0.4.0/go.mod h1:Sd9lOJ0+aimLBi73mGofS1ycjY8lL3uZM3JPS42BGNg= +github.com/mitchellh/iochan v1.0.0/go.mod h1:JwYml1nuB7xOzsp52dPpHFffvOCDupsG0QubkSMEySY= +github.com/mitchellh/mapstructure v0.0.0-20160808181253-ca63d7c062ee/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y= +github.com/mitchellh/mapstructure v1.1.2/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y= +github.com/mitchellh/mapstructure v1.4.1 h1:CpVNEelQCZBooIPDn+AR3NpivK/TIKU8bDxdASFVQag= +github.com/mitchellh/mapstructure v1.4.1/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo= +github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= +github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= +github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M= +github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= +github.com/moul/http2curl v1.0.0/go.mod h1:8UbvGypXm98wA/IqH45anm5Y2Z6ep6O31QGOAZ3H0fQ= +github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U= +github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U= +github.com/nats-io/jwt v0.3.0/go.mod h1:fRYCDE99xlTsqUzISS1Bi75UBJ6ljOJQOAAu5VglpSg= +github.com/nats-io/nats.go v1.9.1/go.mod h1:ZjDU1L/7fJ09jvUSRVBR2e7+RnLiiIQyqyzEE/Zbp4w= +github.com/nats-io/nkeys v0.1.0/go.mod h1:xpnFELMwJABBLVhffcfd1MZx6VsNRFpEugbxziKVo7w= +github.com/nats-io/nuid v1.0.1/go.mod h1:19wcPz3Ph3q0Jbyiqsd0kePYG7A95tJPxeL+1OSON2c= +github.com/oklog/ulid v1.3.1/go.mod h1:CirwcVhetQ6Lv90oh/F+FBtV6XMibvdAFo93nm5qn4U= +github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= +github.com/onsi/ginkgo v1.10.3/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= +github.com/onsi/gomega v1.7.1/go.mod h1:XdKZgCCFLUoM/7CFJVPcG8C1xQ1AJ0vpAezJrB7JYyY= +github.com/opencontainers/runtime-spec v1.0.2 h1:UfAcuLBJB9Coz72x1hgl8O5RVzTdNiaglX6v2DM6FI0= +github.com/opencontainers/runtime-spec v1.0.2/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0= +github.com/opentracing/opentracing-go v1.1.0/go.mod h1:UkNAQd3GIcIGf0SeVgPpRdFStlNbqXla1AfSYxPUl2o= +github.com/panjf2000/ants/v2 v2.7.2 h1:2NUt9BaZFO5kQzrieOmK/wdb/tQ/K+QHaxN8sOgD63U= +github.com/panjf2000/ants/v2 v2.7.2/go.mod h1:KIBmYG9QQX5U2qzFP/yQJaq/nSb6rahS9iEHkrCMgM8= +github.com/pascaldekloe/goe v0.0.0-20180627143212-57f6aae5913c/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc= +github.com/pelletier/go-toml v1.2.0/go.mod h1:5z9KED0ma1S8pY6P1sdut58dfprrGBbd/94hg7ilaic= +github.com/pelletier/go-toml v1.9.3 h1:zeC5b1GviRUyKYd6OJPvBU/mcVDVoL1OhT17FCt5dSQ= +github.com/pelletier/go-toml v1.9.3/go.mod h1:u1nR/EPcESfeI/szUZKdtJ0xRNbUoANCkoOuaOx1Y+c= +github.com/pingcap/errors v0.11.4/go.mod h1:Oi8TUi2kEtXXLMJk9l1cGmz20kV3TaQ0usTwv5KuLY8= +github.com/pingcap/errors v0.11.5-0.20211224045212-9687c2b0f87c h1:xpW9bvK+HuuTmyFqUwr+jcCvpVkK7sumiz+ko5H9eq4= +github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e/go.mod h1:pJLUxLENpZxwdsKMEsNbx1VGcRFpLqf3715MtcvvzbA= +github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= +github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/pkg/sftp v1.10.1/go.mod h1:lYOWFsE0bwd1+KfKJaKeuokY15vzFx25BLbzYYoAxZI= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/posener/complete v1.1.1/go.mod h1:em0nMJCgc9GFtwrmVmEMR/ZL6WyhyjMBndrE9hABlRI= +github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c h1:ncq/mPwQF4JjgDlrVEn3C11VoGHZN7m8qihwgMEtzYw= +github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c/go.mod h1:OmDBASR4679mdNQnz2pUhc2G8CO2JrUAVFDRBDP/hJE= +github.com/prashantv/gostub v1.1.0 h1:BTyx3RfQjRHnUWaGF9oQos79AlQ5k8WNktv7VGvVH4g= +github.com/prometheus/client_golang v0.9.1/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw= +github.com/prometheus/client_golang v0.9.3/go.mod h1:/TN21ttK/J9q6uSwhBd54HahCDft0ttaMvbicHlPoso= +github.com/prometheus/client_golang v1.0.0/go.mod h1:db9x61etRT2tGnBNRi70OPL5FsnadC4Ky3P0J6CfImo= +github.com/prometheus/client_golang v1.7.1/go.mod h1:PY5Wy2awLA44sXw4AOSfFBetzPP4j5+D6mVACh+pe2M= +github.com/prometheus/client_golang v1.11.1/go.mod h1:Z6t4BnS23TR94PD6BsDNk8yVqroYurpAkEiz0P2BEV0= +github.com/prometheus/client_golang v1.14.0 h1:nJdhIvne2eSX/XRAFV9PcvFFRbrjbcTUj0VP62TMhnw= +github.com/prometheus/client_golang v1.14.0/go.mod h1:8vpkKitgIVNcqrRBWh1C4TIUQgYNtG/XQE4E/Zae36Y= +github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo= +github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= +github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= +github.com/prometheus/client_model v0.2.0/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= +github.com/prometheus/client_model v0.3.0 h1:UBgGFHqYdG/TPFD1B1ogZywDqEkwp3fBMvqdiQ7Xew4= +github.com/prometheus/client_model v0.3.0/go.mod h1:LDGWKZIo7rky3hgvBe+caln+Dr3dPggB5dvjtD7w9+w= +github.com/prometheus/common v0.0.0-20181113130724-41aa239b4cce/go.mod h1:daVV7qP5qjZbuso7PdcryaAu0sAZbrN9i7WWcTMWvro= +github.com/prometheus/common v0.4.0/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y86RQel1bk4= +github.com/prometheus/common v0.4.1/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y86RQel1bk4= +github.com/prometheus/common v0.10.0/go.mod h1:Tlit/dnDKsSWFlCLTWaA1cyBgKHSMdTB80sz/V91rCo= +github.com/prometheus/common v0.26.0/go.mod h1:M7rCNAaPfAosfx8veZJCuw84e35h3Cfd9VFqTh1DIvc= +github.com/prometheus/common v0.42.0 h1:EKsfXEYo4JpWMHH5cg+KOUWeuJSov1Id8zGR8eeI1YM= +github.com/prometheus/common v0.42.0/go.mod h1:xBwqVerjNdUDjgODMpudtOMwlOwf2SaTr1yjz4b7Zbc= +github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk= +github.com/prometheus/procfs v0.0.0-20190507164030-5867b95ac084/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA= +github.com/prometheus/procfs v0.0.2/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA= +github.com/prometheus/procfs v0.1.3/go.mod h1:lV6e/gmhEcM9IjHGsFOCxxuZ+z1YqCvr4OA4YeYWdaU= +github.com/prometheus/procfs v0.6.0/go.mod h1:cz+aTbrPOrUb4q7XlbU9ygM+/jj0fzG6c1xBZuNvfVA= +github.com/prometheus/procfs v0.9.0 h1:wzCHvIvM5SxWqYvwgVL7yJY8Lz3PKn49KQtpgMYJfhI= +github.com/prometheus/procfs v0.9.0/go.mod h1:+pB4zwohETzFnmlpe6yd2lSc+0/46IYZRB/chUwxUZY= +github.com/prometheus/tsdb v0.7.1/go.mod h1:qhTCs0VvXwvX/y3TZrWD7rabWM+ijKTux40TwIPHuXU= +github.com/rogpeppe/fastuuid v0.0.0-20150106093220-6724a57986af/go.mod h1:XWv6SoW27p1b0cqNHllgS5HIMJraePCO15w5zCzIWYg= +github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ= +github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= +github.com/rogpeppe/go-internal v1.6.1/go.mod h1:xXDCJY+GAPziupqXw64V24skbSoqbTEfhy4qGm1nDQc= +github.com/rogpeppe/go-internal v1.8.1/go.mod h1:JeRgkft04UBgHMgCIwADu4Pn6Mtm5d4nPKWu0nJ5d+o= +github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs= +github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ= +github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog= +github.com/russross/blackfriday v1.5.2/go.mod h1:JO/DiYxRf+HjHt06OyowR9PTA263kcR/rfWxYHBV53g= +github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= +github.com/ryanuber/columnize v0.0.0-20160712163229-9b3edd62028f/go.mod h1:sm1tb6uqfes/u+d4ooFouqFdy9/2g9QGwK3SQygK0Ts= +github.com/ryanuber/columnize v2.1.0+incompatible/go.mod h1:sm1tb6uqfes/u+d4ooFouqFdy9/2g9QGwK3SQygK0Ts= +github.com/samber/lo v1.27.0 h1:GOyDWxsblvqYobqsmUuMddPa2/mMzkKyojlXol4+LaQ= +github.com/samber/lo v1.27.0/go.mod h1:it33p9UtPMS7z72fP4gw/EIfQB2eI8ke7GR2wc6+Rhg= +github.com/schollz/closestmatch v2.1.0+incompatible/go.mod h1:RtP1ddjLong6gTkbtmuhtR2uUrrJOpYzYRvbcPAid+g= +github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529/go.mod h1:DxrIzT+xaE7yg65j358z/aeFdxmN0P9QXhEzd20vsDc= +github.com/sergi/go-diff v1.0.0/go.mod h1:0CfEIISq7TuYL3j771MWULgwwjU+GofnZX9QAmXWZgo= +github.com/shirou/gopsutil/v3 v3.22.9 h1:yibtJhIVEMcdw+tCTbOPiF1VcsuDeTE4utJ8Dm4c5eA= +github.com/shirou/gopsutil/v3 v3.22.9/go.mod h1:bBYl1kjgEJpWpxeHmLI+dVHWtyAwfcmSBLDsp2TNT8A= +github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc= +github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo= +github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE= +github.com/sirupsen/logrus v1.6.0/go.mod h1:7uNnSEd1DgxDLC74fIahvMZmmYsHGZGEOFrfsX/uA88= +github.com/sirupsen/logrus v1.7.0/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0= +github.com/sirupsen/logrus v1.9.0 h1:trlNQbNUG3OdDrDil03MCb1H2o9nJ1x4/5LYw7byDE0= +github.com/sirupsen/logrus v1.9.0/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= +github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d/go.mod h1:OnSkiWE9lh6wB0YB77sQom3nweQdgAjqCqsofrRNTgc= +github.com/smartystreets/assertions v1.1.0 h1:MkTeG1DMwsrdH7QtLXy5W+fUxWq+vmb6cLmyJ7aRtF0= +github.com/smartystreets/goconvey v1.6.4 h1:fv0U8FUIMPNf1L9lnHLvLhgicrIVChEkdzIKYqbNC9s= +github.com/smartystreets/goconvey v1.6.4/go.mod h1:syvi0/a8iFYH4r/RixwvyeAJjdLS9QV7WQ/tjFTllLA= +github.com/soheilhy/cmux v0.1.4/go.mod h1:IM3LyeVVIOuxMH7sFAkER9+bJ4dT7Ms6E4xg4kGIyLM= +github.com/soheilhy/cmux v0.1.5 h1:jjzc5WVemNEDTLwv9tlmemhC73tI08BNOIGwBOo10Js= +github.com/soheilhy/cmux v0.1.5/go.mod h1:T7TcVDs9LWfQgPlPsdngu6I6QIoyIFZDDC6sNE1GqG0= +github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA= +github.com/spaolacci/murmur3 v1.1.0 h1:7c1g84S4BPRrfL5Xrdp6fOJ206sU9y293DDHaoy0bLI= +github.com/spaolacci/murmur3 v1.1.0/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA= +github.com/spf13/afero v1.1.2/go.mod h1:j4pytiNVoe2o6bmDsKpLACNPDBIoEAkihy7loJ1B0CQ= +github.com/spf13/afero v1.6.0 h1:xoax2sJ2DT8S8xA2paPFjDCScCNeWsg75VG0DLRreiY= +github.com/spf13/afero v1.6.0/go.mod h1:Ai8FlHk4v/PARR026UzYexafAt9roJ7LcLMAmO6Z93I= +github.com/spf13/cast v1.3.0/go.mod h1:Qx5cxh0v+4UWYiBimWS+eyWzqEqokIECu5etghLkUJE= +github.com/spf13/cast v1.3.1 h1:nFm6S0SMdyzrzcmThSipiEubIDy8WEXKNZ0UOgiRpng= +github.com/spf13/cast v1.3.1/go.mod h1:Qx5cxh0v+4UWYiBimWS+eyWzqEqokIECu5etghLkUJE= +github.com/spf13/cobra v0.0.5/go.mod h1:3K3wKZymM7VvHMDS9+Akkh4K60UwM26emMESw8tLCHU= +github.com/spf13/cobra v1.1.3/go.mod h1:pGADOWyqRD/YMrPZigI/zbliZ2wVD/23d+is3pSWzOo= +github.com/spf13/jwalterweatherman v1.0.0/go.mod h1:cQK4TGJAtQXfYWX+Ddv3mKDzgVb68N+wFjFa4jdeBTo= +github.com/spf13/jwalterweatherman v1.1.0 h1:ue6voC5bR5F8YxI5S67j9i582FU4Qvo2bmqnqMYADFk= +github.com/spf13/jwalterweatherman v1.1.0/go.mod h1:aNWZUN0dPAAO/Ljvb5BEdw96iTZ0EXowPYD95IqWIGo= +github.com/spf13/pflag v1.0.3/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4= +github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= +github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= +github.com/spf13/viper v1.3.2/go.mod h1:ZiWeW+zYFKm7srdB9IoDzzZXaJaI5eL9QjNiN/DMA2s= +github.com/spf13/viper v1.7.0/go.mod h1:8WkrPz2fc9jxqZNCJI/76HCieCp4Q8HaLFoCha5qpdg= +github.com/spf13/viper v1.8.1 h1:Kq1fyeebqsBfbjZj4EL7gj2IO0mMaiyjYUWcUsl2O44= +github.com/spf13/viper v1.8.1/go.mod h1:o0Pch8wJ9BVSWGQMbra6iw0oQ5oktSIBaujf1rJH9Ns= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= +github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= +github.com/stretchr/objx v0.5.2 h1:xuMeJ0Sdp5ZMRXx/aWO6RZxdr3beISkG5/G/aIRr3pY= +github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= +github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= +github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= +github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA= +github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= +github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= +github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= +github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +github.com/subosito/gotenv v1.2.0 h1:Slr1R9HxAlEKefgq5jn9U+DnETlIUa6HfgEzj0g5d7s= +github.com/subosito/gotenv v1.2.0/go.mod h1:N0PQaV/YGNqwC0u51sEeR/aUtSLEXKX9iv69rRypqCw= +github.com/thoas/go-funk v0.9.1 h1:O549iLZqPpTUQ10ykd26sZhzD+rmR5pWhuElrhbC20M= +github.com/tidwall/gjson v1.17.1 h1:wlYEnwqAHgzmhNUFfw7Xalt2JzQvsMx2Se4PcoFCT/U= +github.com/tidwall/gjson v1.17.1/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk= +github.com/tidwall/match v1.1.1 h1:+Ho715JplO36QYgwN9PGYNhgZvoUSc9X2c80KVTi+GA= +github.com/tidwall/match v1.1.1/go.mod h1:eRSPERbgtNPcGhD8UCthc6PmLEQXEWd3PRB5JTxsfmM= +github.com/tidwall/pretty v1.2.0 h1:RWIZEg2iJ8/g6fDDYzMpobmaoGh5OLl4AXtGUGPcqCs= +github.com/tidwall/pretty v1.2.0/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU= +github.com/tklauser/go-sysconf v0.3.10 h1:IJ1AZGZRWbY8T5Vfk04D9WOA5WSejdflXxP03OUqALw= +github.com/tklauser/go-sysconf v0.3.10/go.mod h1:C8XykCvCb+Gn0oNCWPIlcb0RuglQTYaQ2hGm7jmxEFk= +github.com/tklauser/numcpus v0.4.0 h1:E53Dm1HjH1/R2/aoCtXtPgzmElmn51aOkhCFSuZq//o= +github.com/tklauser/numcpus v0.4.0/go.mod h1:1+UI3pD8NW14VMwdgJNJ1ESk2UnwhAnz5hMwiKKqXCQ= +github.com/tmc/grpc-websocket-proxy v0.0.0-20190109142713-0ad062ec5ee5/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U= +github.com/tmc/grpc-websocket-proxy v0.0.0-20201229170055-e5319fda7802 h1:uruHq4dN7GR16kFc5fp3d1RIYzJW5onx8Ybykw2YQFA= +github.com/tmc/grpc-websocket-proxy v0.0.0-20201229170055-e5319fda7802/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U= +github.com/uber/jaeger-client-go v2.30.0+incompatible h1:D6wyKGCecFaSRUpo8lCVbaOOb6ThwMmTEbhRwtKR97o= +github.com/uber/jaeger-client-go v2.30.0+incompatible/go.mod h1:WVhlPFC8FDjOFMMWRy2pZqQJSXxYSwNYOkTr/Z6d3Kk= +github.com/ugorji/go v1.1.4/go.mod h1:uQMGLiO92mf5W77hV/PUCpI3pbzQx3CRekS0kk+RGrc= +github.com/ugorji/go v1.1.7/go.mod h1:kZn38zHttfInRq0xu/PH0az30d+z6vm202qpg1oXVMw= +github.com/ugorji/go/codec v0.0.0-20181204163529-d75b2dcb6bc8/go.mod h1:VFNgLljTbGfSG7qAOspJ7OScBnGdDN/yBr0sguwnwf0= +github.com/ugorji/go/codec v1.1.7/go.mod h1:Ax+UKWsSmolVDwsd+7N3ZtXu+yMGCf907BLYF3GoBXY= +github.com/urfave/negroni v1.0.0/go.mod h1:Meg73S6kFm/4PpbYdq35yYWoCZ9mS/YSx+lKnmiohz4= +github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc= +github.com/valyala/fasthttp v1.6.0/go.mod h1:FstJa9V+Pj9vQ7OJie2qMHdwemEDaDiSdBnvPM1Su9w= +github.com/valyala/fasttemplate v1.0.1/go.mod h1:UQGH1tvbgY+Nz5t2n7tXsz52dQxojPUpymEIMZ47gx8= +github.com/valyala/fasttemplate v1.2.1/go.mod h1:KHLXt3tVN2HBp8eijSv/kGJopbvo7S+qRAEEKiv+SiQ= +github.com/valyala/tcplisten v0.0.0-20161114210144-ceec8f93295a/go.mod h1:v3UYOV9WzVtRmSR+PDvWpU/qWl4Wa5LApYYX4ZtKbio= +github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM= +github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg= +github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f/go.mod h1:N2zxlSyiKSe5eX1tZViRH5QA0qijqEDrYZiPEAiq3wU= +github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415/go.mod h1:GwrjFmJcFw6At/Gs6z4yjiIwzuJ1/+UwLxMQDVQXShQ= +github.com/xeipuuv/gojsonschema v1.2.0/go.mod h1:anYRn/JVcOK2ZgGU+IjEV4nwlhoK5sQluxsYJ78Id3Y= +github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2 h1:eY9dn8+vbi4tKz5Qo6v2eYzo7kUS51QINcR5jNpbZS8= +github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2/go.mod h1:UETIi67q53MR2AWcXfiuqkDkRtnGDLqkBTpCHuJHxtU= +github.com/xordataexchange/crypt v0.0.3-0.20170626215501-b2862e3d0a77/go.mod h1:aYKd//L2LvnjZzWKhF00oedf4jCCReLcmhLdhm1A27Q= +github.com/yalp/jsonpath v0.0.0-20180802001716-5cc68e5049a0/go.mod h1:/LWChgwKmvncFJFHJ7Gvn9wZArjbV5/FppcK2fKk/tI= +github.com/yudai/gojsondiff v1.0.0/go.mod h1:AY32+k2cwILAkW1fbgxQ5mUmMiZFgLIV+FBNExI05xg= +github.com/yudai/golcs v0.0.0-20170316035057-ecda9a501e82/go.mod h1:lgjkn3NuSvDfVJdfcVVdX+jpBxNmX4rDAzaS45IcYoM= +github.com/yudai/pp v2.0.1+incompatible/go.mod h1:PuxR/8QJ7cyCkFp/aUDS+JY727OFEZkTdatxwunjIkc= +github.com/yuin/goldmark v1.1.25/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +github.com/yuin/goldmark v1.1.32/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k= +github.com/yusufpapurcu/wmi v1.2.2 h1:KBNDSne4vP5mbSWnJbO+51IMOXJB67QiYCSBrubbPRg= +github.com/yusufpapurcu/wmi v1.2.2/go.mod h1:SBZ9tNy3G9/m5Oi98Zks0QjeHVDvuK0qfxQmPyzfmi0= +go.etcd.io/bbolt v1.3.2/go.mod h1:IbVyRI1SCnLcuJnV2u8VeU0CEYM7e686BmAb1XKL+uU= +go.etcd.io/bbolt v1.3.6 h1:/ecaJf0sk1l4l6V4awd65v2C3ILy7MSj+s/x1ADCIMU= +go.etcd.io/bbolt v1.3.6/go.mod h1:qXsaaIqmgQH0T+OPdb99Bf+PKfBBQVAdyD6TY9G8XM4= +go.etcd.io/etcd/api/v3 v3.5.0/go.mod h1:cbVKeC6lCfl7j/8jBhAK6aIYO9XOjdptoxU/nLQcPvs= +go.etcd.io/etcd/api/v3 v3.5.5 h1:BX4JIbQ7hl7+jL+g+2j5UAr0o1bctCm6/Ct+ArBGkf0= +go.etcd.io/etcd/api/v3 v3.5.5/go.mod h1:KFtNaxGDw4Yx/BA4iPPwevUTAuqcsPxzyX8PHydchN8= +go.etcd.io/etcd/client/pkg/v3 v3.5.0/go.mod h1:IJHfcCEKxYu1Os13ZdwCwIUTUVGYTSAM3YSwc9/Ac1g= +go.etcd.io/etcd/client/pkg/v3 v3.5.5 h1:9S0JUVvmrVl7wCF39iTQthdaaNIiAaQbmK75ogO6GU8= +go.etcd.io/etcd/client/pkg/v3 v3.5.5/go.mod h1:ggrwbk069qxpKPq8/FKkQ3Xq9y39kbFR4LnKszpRXeQ= +go.etcd.io/etcd/client/v2 v2.305.0/go.mod h1:h9puh54ZTgAKtEbut2oe9P4L/oqKCVB6xsXlzd7alYQ= +go.etcd.io/etcd/client/v2 v2.305.5 h1:DktRP60//JJpnPC0VBymAN/7V71GHMdjDCBt4ZPXDjI= +go.etcd.io/etcd/client/v2 v2.305.5/go.mod h1:zQjKllfqfBVyVStbt4FaosoX2iYd8fV/GRy/PbowgP4= +go.etcd.io/etcd/client/v3 v3.5.5 h1:q++2WTJbUgpQu4B6hCuT7VkdwaTP7Qz6Daak3WzbrlI= +go.etcd.io/etcd/client/v3 v3.5.5/go.mod h1:aApjR4WGlSumpnJ2kloS75h6aHUmAyaPLjHMxpc7E7c= +go.etcd.io/etcd/pkg/v3 v3.5.5 h1:Ablg7T7OkR+AeeeU32kdVhw/AGDsitkKPl7aW73ssjU= +go.etcd.io/etcd/pkg/v3 v3.5.5/go.mod h1:6ksYFxttiUGzC2uxyqiyOEvhAiD0tuIqSZkX3TyPdaE= +go.etcd.io/etcd/raft/v3 v3.5.5 h1:Ibz6XyZ60OYyRopu73lLM/P+qco3YtlZMOhnXNS051I= +go.etcd.io/etcd/raft/v3 v3.5.5/go.mod h1:76TA48q03g1y1VpTue92jZLr9lIHKUNcYdZOOGyx8rI= +go.etcd.io/etcd/server/v3 v3.5.5 h1:jNjYm/9s+f9A9r6+SC4RvNaz6AqixpOvhrFdT0PvIj0= +go.etcd.io/etcd/server/v3 v3.5.5/go.mod h1:rZ95vDw/jrvsbj9XpTqPrTAB9/kzchVdhRirySPkUBc= +go.opencensus.io v0.21.0/go.mod h1:mSImk1erAIZhrmZN+AvHh14ztQfjbGwt4TtuofqLduU= +go.opencensus.io v0.22.0/go.mod h1:+kGneAE2xo2IficOXnaByMWTGM9T73dGwxeWcUqIpI8= +go.opencensus.io v0.22.2/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= +go.opencensus.io v0.22.3/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= +go.opencensus.io v0.22.4/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= +go.opencensus.io v0.22.5/go.mod h1:5pWMHQbX5EPX2/62yrJeAkowc+lfs/XD7Uxpq3pI6kk= +go.opencensus.io v0.23.0/go.mod h1:XItmlyltB5F7CS4xOC1DcqMoFqwtC6OG2xF7mCv7P7E= +go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.25.0/go.mod h1:E5NNboN0UqSAki0Atn9kVwaN7I+l25gGxDqBueo/74E= +go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.38.0 h1:g/BAN5o90Pr6D8xMRezjzGOHBpc15U+4oE53nZLiae4= +go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.38.0/go.mod h1:+F41JBSkye7aYJELRvIMF0Z66reIwIOL0St75ZVwSJs= +go.opentelemetry.io/otel v1.0.1/go.mod h1:OPEOD4jIT2SlZPMmwT6FqZz2C0ZNdQqiWcoK6M0SNFU= +go.opentelemetry.io/otel v1.13.0 h1:1ZAKnNQKwBBxFtww/GwxNUyTf0AxkZzrukO8MeXqe4Y= +go.opentelemetry.io/otel v1.13.0/go.mod h1:FH3RtdZCzRkJYFTCsAKDy9l/XYjMdNv6QrkFFB8DvVg= +go.opentelemetry.io/otel/exporters/otlp/internal/retry v1.13.0 h1:pa05sNT/P8OsIQ8mPZKTIyiBuzS/xDGLVx+DCt0y6Vs= +go.opentelemetry.io/otel/exporters/otlp/internal/retry v1.13.0/go.mod h1:rqbht/LlhVBgn5+k3M5QK96K5Xb0DvXpMJ5SFQpY6uw= +go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.0.1/go.mod h1:Kv8liBeVNFkkkbilbgWRpV+wWuu+H5xdOT6HAgd30iw= +go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.13.0 h1:Any/nVxaoMq1T2w0W85d6w5COlLuCCgOYKQhJJWEMwQ= +go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.13.0/go.mod h1:46vAP6RWfNn7EKov73l5KBFlNxz8kYlxR1woU+bJ4ZY= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.0.1/go.mod h1:xOvWoTOrQjxjW61xtOmD/WKGRYb/P4NzRo3bs65U6Rk= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.13.0 h1:Wz7UQn7/eIqZVDJbuNEM6PmqeA71cWXrWcXekP5HZgU= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.13.0/go.mod h1:OhH1xvgA5jZW2M/S4PcvtDlFE1VULRRBsibBrKuJQGI= +go.opentelemetry.io/otel/metric v0.35.0 h1:aPT5jk/w7F9zW51L7WgRqNKDElBdyRLGuBtI5MX34e8= +go.opentelemetry.io/otel/metric v0.35.0/go.mod h1:qAcbhaTRFU6uG8QM7dDo7XvFsWcugziq/5YI065TokQ= +go.opentelemetry.io/otel/sdk v1.0.1/go.mod h1:HrdXne+BiwsOHYYkBE5ysIcv2bvdZstxzmCQhxTcZkI= +go.opentelemetry.io/otel/sdk v1.13.0 h1:BHib5g8MvdqS65yo2vV1s6Le42Hm6rrw08qU6yz5JaM= +go.opentelemetry.io/otel/sdk v1.13.0/go.mod h1:YLKPx5+6Vx/o1TCUYYs+bpymtkmazOMT6zoRrC7AQ7I= +go.opentelemetry.io/otel/trace v1.0.1/go.mod h1:5g4i4fKLaX2BQpSBsxw8YYcgKpMMSW3x7ZTuYBr3sUk= +go.opentelemetry.io/otel/trace v1.13.0 h1:CBgRZ6ntv+Amuj1jDsMhZtlAPT6gbyIRdaIzFhfBSdY= +go.opentelemetry.io/otel/trace v1.13.0/go.mod h1:muCvmmO9KKpvuXSf3KKAXXB2ygNYHQ+ZfI5X08d3tds= +go.opentelemetry.io/proto/otlp v0.7.0/go.mod h1:PqfVotwruBrMGOCsRd/89rSnXhoiJIqeYNgFYFoEGnI= +go.opentelemetry.io/proto/otlp v0.9.0/go.mod h1:1vKfU9rv61e9EVGthD1zNvUbiwPcimSsOPU9brfSHJg= +go.opentelemetry.io/proto/otlp v0.19.0 h1:IVN6GR+mhC4s5yfcTbmzHYODqvWAp3ZedA2SJPI1Nnw= +go.opentelemetry.io/proto/otlp v0.19.0/go.mod h1:H7XAot3MsfNsj7EXtrA2q5xSNQ10UqI405h3+duxN4U= +go.uber.org/atomic v1.4.0/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE= +go.uber.org/atomic v1.7.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc= +go.uber.org/atomic v1.10.0 h1:9qC72Qh0+3MqyJbAn8YU5xVq1frD8bn3JtD2oXtafVQ= +go.uber.org/atomic v1.10.0/go.mod h1:LUxbIzbOniOlMKjJjyPfpl4v+PKK2cNJn91OQbhoJI0= +go.uber.org/automaxprocs v1.5.2 h1:2LxUOGiR3O6tw8ui5sZa2LAaHnsviZdVOUZw4fvbnME= +go.uber.org/automaxprocs v1.5.2/go.mod h1:eRbA25aqJrxAbsLO0xy5jVwPt7FQnRgjW+efnwa1WM0= +go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= +go.uber.org/multierr v1.1.0/go.mod h1:wR5kodmAFQ0UK8QlbwjlSNy0Z68gJhDJUG5sjR94q/0= +go.uber.org/multierr v1.6.0/go.mod h1:cdWPpRnG4AhwMwsgIHip0KRBQjJy5kYEpYjJxpXp9iU= +go.uber.org/multierr v1.10.0 h1:S0h4aNzvfcFsC3dRF1jLoaov7oRaKqRGC/pUEJ2yvPQ= +go.uber.org/multierr v1.10.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y= +go.uber.org/zap v1.10.0/go.mod h1:vwi/ZaCAaUcBkycHslxD9B2zi4UTXhF60s6SWpuDF0Q= +go.uber.org/zap v1.17.0/go.mod h1:MXVU+bhUf/A7Xi2HNOnopQOrmycQ5Ih87HtOu4q5SSo= +go.uber.org/zap v1.27.0 h1:aJMhYGrd5QSmlpLMr2MftRKl7t8J8PTZPA732ud/XR8= +go.uber.org/zap v1.27.0/go.mod h1:GB2qFLM7cTU87MWRP2mPIjqfIDnGu+VIO4V/SdhGo2E= +golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= +golang.org/x/crypto v0.0.0-20181029021203-45a5f77698d3/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= +golang.org/x/crypto v0.0.0-20181203042331-505ab145d0a9/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/crypto v0.0.0-20190605123033-f99c8df09eb5/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/crypto v0.0.0-20190701094942-4def268fd1a4/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/crypto v0.0.0-20190820162420-60c769a6c586/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/crypto v0.0.0-20191227163750-53104e6ec876/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/crypto v0.0.0-20210322153248-0c34fe9e7dc2/go.mod h1:T9bdIzuCu7OtxOm1hfPfRQxPLYneinmdGuTeoZ9dtd4= +golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= +golang.org/x/crypto v0.0.0-20220411220226-7b82a4e95df4/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4= +golang.org/x/crypto v0.22.0 h1:g1v0xeRhjcugydODzvb3mEM9SQ0HGp9s/nh3COQ/C30= +golang.org/x/crypto v0.22.0/go.mod h1:vr6Su+7cTlO45qkww3VDJlzDn0ctJvRgYbC2NvXHt+M= +golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= +golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= +golang.org/x/exp v0.0.0-20190510132918-efd6b22b2522/go.mod h1:ZjyILWgesfNpC6sMxTJOJm9Kp84zZh5NQWvqDGG3Qr8= +golang.org/x/exp v0.0.0-20190829153037-c13cbed26979/go.mod h1:86+5VVa7VpoJ4kLfm080zCjGlMRFzhUhsZKEZO7MGek= +golang.org/x/exp v0.0.0-20191030013958-a1ab85dbe136/go.mod h1:JXzH8nQsPlswgeRAPE3MuO9GYsAcnJvJ4vnMwN/5qkY= +golang.org/x/exp v0.0.0-20191129062945-2f5052295587/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4= +golang.org/x/exp v0.0.0-20191227195350-da58074b4299/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4= +golang.org/x/exp v0.0.0-20200119233911-0405dc783f0a/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4= +golang.org/x/exp v0.0.0-20200207192155-f17229e696bd/go.mod h1:J/WKrq2StrnmMY6+EHIKF9dgMWnmCNThgcyBT1FY9mM= +golang.org/x/exp v0.0.0-20200224162631-6cc2880d07d6/go.mod h1:3jZMyOhIsHpP37uCMkUooju7aAi5cS1Q23tOzKc+0MU= +golang.org/x/exp v0.0.0-20230224173230-c95f2b4c22f2 h1:Jvc7gsqn21cJHCmAWx0LiimpP18LZmUxkT5Mp7EZ1mI= +golang.org/x/exp v0.0.0-20230224173230-c95f2b4c22f2/go.mod h1:CxIveKay+FTh1D0yPZemJVgC/95VzuuOLq5Qi4xnoYc= +golang.org/x/image v0.0.0-20190227222117-0694c2d4d067/go.mod h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js= +golang.org/x/image v0.0.0-20190802002840-cff245a6509b/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= +golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= +golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU= +golang.org/x/lint v0.0.0-20190301231843-5614ed5bae6f/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= +golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= +golang.org/x/lint v0.0.0-20190409202823-959b441ac422/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= +golang.org/x/lint v0.0.0-20190909230951-414d861bb4ac/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= +golang.org/x/lint v0.0.0-20190930215403-16217165b5de/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= +golang.org/x/lint v0.0.0-20191125180803-fdd1cda4f05f/go.mod h1:5qLYkcX4OjUUV8bRuDixDT3tpyyb+LUpUlRWLxfhWrs= +golang.org/x/lint v0.0.0-20200130185559-910be7a94367/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY= +golang.org/x/lint v0.0.0-20200302205851-738671d3881b/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY= +golang.org/x/lint v0.0.0-20201208152925-83fdc39ff7b5/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY= +golang.org/x/lint v0.0.0-20210508222113-6edffad5e616/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY= +golang.org/x/mobile v0.0.0-20190312151609-d3739f865fa6/go.mod h1:z+o9i4GpDbdi3rU15maQ/Ox0txvL9dWGYEHz965HBQE= +golang.org/x/mobile v0.0.0-20190719004257-d2bd2a29d028/go.mod h1:E/iHnbuqvinMTCcRqshq8CkpyQDoeVncDDYHnLhea+o= +golang.org/x/mod v0.0.0-20190513183733-4bf6d317e70e/go.mod h1:mXi4GBBbnImb6dmsKGUJ2LatrhH/nqhxcFungHvyanc= +golang.org/x/mod v0.1.0/go.mod h1:0QHyrYULN0/3qlju5TqG8bIK38QM8yzMo5ekMj3DlcY= +golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg= +golang.org/x/mod v0.1.1-0.20191107180719-034126e5016b/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg= +golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/mod v0.4.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/mod v0.4.1/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20181023162649-9b4f9f5ad519/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20181201002055-351d144fa1fc/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20181220203305-927f97764cc3/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190327091125-710a502c58a2/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190501004415-9ce7a6920f09/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190503192946-f4e77d36d62c/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks= +golang.org/x/net v0.0.0-20190613194153-d28f0bde5980/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20190628185345-da137c7871d7/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20190724013045-ca1201d0de80/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20190827160401-ba9fcec4b297/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20191209160850-c0dbc17a3553/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20200114155413-6afb5195e5aa/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20200222125558-5a598a2470a0/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20200301022130-244492dfa37a/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20200324143707-d3edc9973b7e/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= +golang.org/x/net v0.0.0-20200501053045-e0ff5e5a1de5/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= +golang.org/x/net v0.0.0-20200506145744-7e3656a0809f/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= +golang.org/x/net v0.0.0-20200513185701-a91f0712d120/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= +golang.org/x/net v0.0.0-20200520182314-0ba52f642ac2/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= +golang.org/x/net v0.0.0-20200625001655-4c5254603344/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= +golang.org/x/net v0.0.0-20200707034311-ab3426394381/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= +golang.org/x/net v0.0.0-20200822124328-c89045814202/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= +golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= +golang.org/x/net v0.0.0-20201031054903-ff519b6c9102/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= +golang.org/x/net v0.0.0-20201110031124-69a78807bb2b/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= +golang.org/x/net v0.0.0-20201202161906-c7110b5ffcbb/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= +golang.org/x/net v0.0.0-20201209123823-ac852fbbde11/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= +golang.org/x/net v0.0.0-20210119194325-5f4716e94777/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= +golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= +golang.org/x/net v0.0.0-20210316092652-d523dce5a7f4/go.mod h1:RBQZq4jEuRlivfhVLdyRGr576XBO4/greRjx4P4O3yc= +golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM= +golang.org/x/net v0.0.0-20211008194852-3b03d305991f/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= +golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= +golang.org/x/net v0.24.0 h1:1PcaxkF854Fu3+lvBIx5SYn9wRlBzzcnHZSiaFFAb0w= +golang.org/x/net v0.24.0/go.mod h1:2Q7sJY5mzlzWjKtYUEXSlBWCdyaioyXzRB2RtU8KVE8= +golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= +golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= +golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= +golang.org/x/oauth2 v0.0.0-20191202225959-858c2ad4c8b6/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= +golang.org/x/oauth2 v0.0.0-20200107190931-bf48bf16ab8d/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= +golang.org/x/oauth2 v0.0.0-20200902213428-5d25da1a8d43/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= +golang.org/x/oauth2 v0.0.0-20201109201403-9fd604954f58/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= +golang.org/x/oauth2 v0.0.0-20201208152858-08078c50e5b5/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= +golang.org/x/oauth2 v0.0.0-20210218202405-ba52d332ba99/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= +golang.org/x/oauth2 v0.0.0-20210220000619-9bb904979d93/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= +golang.org/x/oauth2 v0.0.0-20210313182246-cd4f82c27b84/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= +golang.org/x/oauth2 v0.0.0-20210402161424-2e8d93401602/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= +golang.org/x/oauth2 v0.0.0-20211104180415-d3ed0bb246c8/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= +golang.org/x/oauth2 v0.18.0 h1:09qnuIAgzdx1XplqJvW6CQqMCtGZykZWcXzPMPUusvI= +golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20190227155943-e225da77a7e6/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20200317015054-43a5402ce75a/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20200625203802-6e8e738ad208/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20201207232520-09787c993a3a/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.6.0 h1:5BMeUDZ7vkXGfEr1x9B4bRcTH4lpkTkpdh0T/J+qjbQ= +golang.org/x/sync v0.6.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= +golang.org/x/sys v0.0.0-20180823144017-11551d06cbcc/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20181026203630-95b1ffbd15a5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20181107165924-66b7b1311ac8/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20181116152217-5ac8a444bdc5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20181205085412-a5c9d58dba9a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190222072716-a9d3bda3a223/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190312061237-fead79001313/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190422165155-953cdadca894/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190502145724-3ef323f4f1fd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190507160741-ecd444e8653b/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190606165138-5da285871e9c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190624142023-c5567b49c5d0/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190626221950-04f50cda93cb/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190726091711-fc99dfbffb4e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190813064441-fde4db37ae7a/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190916202348-b4ddaad3f8a3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20191001151750-bb3f8db39f24/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20191005200804-aed5e4c7ecf9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20191204072324-ce4227a45e2e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20191228213918-04cbcbbfeed8/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200106162015-b016eb3dc98e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200113162924-86b910548bc1/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200116001909-b77594299b42/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200122134326-e047566fdf82/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200202164722-d101bd2416d5/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200212091648-12a6c2dcc1e4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200223170610-d5e6a3e2c0ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200302150141-5c8b2ff67527/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200331124033-c3d80250170d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200501052902-10377860bb8e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200511232937-7e40ca221e25/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200515095857-1151b9dac4a9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200523222454-059865788121/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200615200032-f1bc736245b1/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200625212154-ddb9806d33ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200803210538-64077c9b5642/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200905004654-be1d3432aa8f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200923182605-d9f96fdee20d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20201201145000-ef89a241ccb3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20201204225414-ed752295db88/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210104204734-6f8348627aad/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210119212857-b64e53b001e4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210220050731-9a76102bfb43/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210305230114-8fe3ee5dd75b/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210315160823-c6e025ad8005/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210320140829-1e4c9ba3b0c4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210403161142-5e06dd20ab57/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210423185535-09eb48e85fd7/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210510120138-977fb7262007/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20210603081109-ebe580a85c40/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20210927094055-39ccf1dd6fa6/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20211007075335-d3039528d8ac/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220128215802-99c3d69c2c27/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220209214540-3681064d5158/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.20.0 h1:Od9JTbYCk261bKm4M/mw7AklTlFYIa0bIp9BgSm1S8Y= +golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= +golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.4/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.5/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= +golang.org/x/text v0.15.0 h1:h1V/4gjBv8v9cjcR6+AR5+/cIYK5N/WAgiv4xlsEtAk= +golang.org/x/text v0.15.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= +golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= +golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= +golang.org/x/time v0.0.0-20191024005414-555d28b269f0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= +golang.org/x/time v0.0.0-20201208040808-7e3f01d25324/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= +golang.org/x/time v0.0.0-20210220033141-f8bda1e9f3ba/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= +golang.org/x/time v0.3.0 h1:rg5rLMjNzMS1RkNLzCG38eapWhnYLFYXDXj2gOlr8j4= +golang.org/x/time v0.3.0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= +golang.org/x/tools v0.0.0-20180221164845-07fd8470d635/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20181030221726-6c7e314b6563/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20181221001348-537d06c36207/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY= +golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= +golang.org/x/tools v0.0.0-20190312151545-0bb0c0a6e846/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= +golang.org/x/tools v0.0.0-20190312170243-e65039ee4138/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= +golang.org/x/tools v0.0.0-20190327201419-c70d86f8b7cf/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= +golang.org/x/tools v0.0.0-20190328211700-ab21143f2384/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= +golang.org/x/tools v0.0.0-20190425150028-36563e24a262/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= +golang.org/x/tools v0.0.0-20190506145303-2d16b83fe98c/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= +golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= +golang.org/x/tools v0.0.0-20190606124116-d0a3d012864b/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc= +golang.org/x/tools v0.0.0-20190621195816-6e04913cbbac/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc= +golang.org/x/tools v0.0.0-20190628153133-6cdbf07be9d0/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc= +golang.org/x/tools v0.0.0-20190816200558-6889da9d5479/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20190911174233-4f2ddba30aff/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20191012152004-8de300cfc20a/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20191112195655-aa38f8e97acc/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20191113191852-77e3bb0ad9e7/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20191115202509-3a792d9c32b2/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20191125144606-a911d9008d1f/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20191130070609-6e064ea0cf2d/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20191216173652-a0e659d51361/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= +golang.org/x/tools v0.0.0-20191227053925-7b8e75db28f4/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= +golang.org/x/tools v0.0.0-20200117161641-43d50277825c/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= +golang.org/x/tools v0.0.0-20200122220014-bf1340f18c4a/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= +golang.org/x/tools v0.0.0-20200130002326-2f3ba24bd6e7/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= +golang.org/x/tools v0.0.0-20200204074204-1cc6d1ef6c74/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= +golang.org/x/tools v0.0.0-20200207183749-b753a1ba74fa/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= +golang.org/x/tools v0.0.0-20200212150539-ea181f53ac56/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= +golang.org/x/tools v0.0.0-20200224181240-023911ca70b2/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= +golang.org/x/tools v0.0.0-20200227222343-706bc42d1f0d/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= +golang.org/x/tools v0.0.0-20200304193943-95d2e580d8eb/go.mod h1:o4KQGtdN14AW+yjsvvwRTJJuXz8XRtIHtEnmAXLyFUw= +golang.org/x/tools v0.0.0-20200312045724-11d5b4c81c7d/go.mod h1:o4KQGtdN14AW+yjsvvwRTJJuXz8XRtIHtEnmAXLyFUw= +golang.org/x/tools v0.0.0-20200331025713-a30bf2db82d4/go.mod h1:Sl4aGygMT6LrqrWclx+PTx3U+LnKx/seiNR+3G19Ar8= +golang.org/x/tools v0.0.0-20200501065659-ab2804fb9c9d/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= +golang.org/x/tools v0.0.0-20200512131952-2bc93b1c0c88/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= +golang.org/x/tools v0.0.0-20200515010526-7d3b6ebf133d/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= +golang.org/x/tools v0.0.0-20200618134242-20370b0cb4b2/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= +golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= +golang.org/x/tools v0.0.0-20200729194436-6467de6f59a7/go.mod h1:njjCfa9FT2d7l9Bc6FUM5FLjQPp3cFF28FI3qnDFljA= +golang.org/x/tools v0.0.0-20200804011535-6c149bb5ef0d/go.mod h1:njjCfa9FT2d7l9Bc6FUM5FLjQPp3cFF28FI3qnDFljA= +golang.org/x/tools v0.0.0-20200825202427-b303f430e36d/go.mod h1:njjCfa9FT2d7l9Bc6FUM5FLjQPp3cFF28FI3qnDFljA= +golang.org/x/tools v0.0.0-20200904185747-39188db58858/go.mod h1:Cj7w3i3Rnn0Xh82ur9kSqwfTHTeVxaDqrfMjpcNT6bE= +golang.org/x/tools v0.0.0-20201110124207-079ba7bd75cd/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= +golang.org/x/tools v0.0.0-20201201161351-ac6f37ff4c2a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= +golang.org/x/tools v0.0.0-20201208233053-a543418bbed2/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= +golang.org/x/tools v0.0.0-20210105154028-b0ab187a4818/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= +golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= +golang.org/x/tools v0.1.0/go.mod h1:xkSsbof2nBLbhDlRMhhhyNLN/zl3eTqcnHD5viDpcZ0= +golang.org/x/tools v0.1.2/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= +golang.org/x/tools v0.1.3/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +google.golang.org/api v0.4.0/go.mod h1:8k5glujaEP+g9n7WNsDg8QP6cUVNI86fCNMcbazEtwE= +google.golang.org/api v0.7.0/go.mod h1:WtwebWUNSVBH/HAw79HIFXZNqEvBhG+Ra+ax0hx3E3M= +google.golang.org/api v0.8.0/go.mod h1:o4eAsZoiT+ibD93RtjEohWalFOjRDx6CVaqeizhEnKg= +google.golang.org/api v0.9.0/go.mod h1:o4eAsZoiT+ibD93RtjEohWalFOjRDx6CVaqeizhEnKg= +google.golang.org/api v0.13.0/go.mod h1:iLdEw5Ide6rF15KTC1Kkl0iskquN2gFfn9o9XIsbkAI= +google.golang.org/api v0.14.0/go.mod h1:iLdEw5Ide6rF15KTC1Kkl0iskquN2gFfn9o9XIsbkAI= +google.golang.org/api v0.15.0/go.mod h1:iLdEw5Ide6rF15KTC1Kkl0iskquN2gFfn9o9XIsbkAI= +google.golang.org/api v0.17.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE= +google.golang.org/api v0.18.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE= +google.golang.org/api v0.19.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE= +google.golang.org/api v0.20.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE= +google.golang.org/api v0.22.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE= +google.golang.org/api v0.24.0/go.mod h1:lIXQywCXRcnZPGlsd8NbLnOjtAoL6em04bJ9+z0MncE= +google.golang.org/api v0.28.0/go.mod h1:lIXQywCXRcnZPGlsd8NbLnOjtAoL6em04bJ9+z0MncE= +google.golang.org/api v0.29.0/go.mod h1:Lcubydp8VUV7KeIHD9z2Bys/sm/vGKnG1UHuDBSrHWM= +google.golang.org/api v0.30.0/go.mod h1:QGmEvQ87FHZNiUVJkT14jQNYJ4ZJjdRF23ZXz5138Fc= +google.golang.org/api v0.35.0/go.mod h1:/XrVsuzM0rZmrsbjJutiuftIzeuTQcEeaYcSk/mQ1dg= +google.golang.org/api v0.36.0/go.mod h1:+z5ficQTmoYpPn8LCUNVpK5I7hwkpjbcgqA7I34qYtE= +google.golang.org/api v0.40.0/go.mod h1:fYKFpnQN0DsDSKRVRcQSDQNtqWPfM9i+zNPxepjRCQ8= +google.golang.org/api v0.41.0/go.mod h1:RkxM5lITDfTzmyKFPt+wGrCJbVfniCr2ool8kTBzRTU= +google.golang.org/api v0.43.0/go.mod h1:nQsDGjRXMo4lvh5hP0TKqF244gqhGcr/YSIykhUk/94= +google.golang.org/api v0.44.0/go.mod h1:EBOGZqzyhtvMDoxwS97ctnh0zUmYY6CxqXsc1AvkYD8= +google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= +google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= +google.golang.org/appengine v1.5.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= +google.golang.org/appengine v1.6.1/go.mod h1:i06prIuMbXzDqacNJfV5OdTW448YApPu5ww/cMBSeb0= +google.golang.org/appengine v1.6.5/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc= +google.golang.org/appengine v1.6.6/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc= +google.golang.org/appengine v1.6.7/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc= +google.golang.org/appengine v1.6.8 h1:IhEN5q69dyKagZPYMSdIjS2HqprW324FRQZJcGqPAsM= +google.golang.org/genproto v0.0.0-20180518175338-11a468237815/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= +google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= +google.golang.org/genproto v0.0.0-20190307195333-5fe7a883aa19/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE= +google.golang.org/genproto v0.0.0-20190418145605-e7d98fc518a7/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE= +google.golang.org/genproto v0.0.0-20190425155659-357c62f0e4bb/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE= +google.golang.org/genproto v0.0.0-20190502173448-54afdca5d873/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE= +google.golang.org/genproto v0.0.0-20190801165951-fa694d86fc64/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc= +google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc= +google.golang.org/genproto v0.0.0-20190911173649-1774047e7e51/go.mod h1:IbNlFCBrqXvoKpeg0TB2l7cyZUmoaFKYIwrEpbDKLA8= +google.golang.org/genproto v0.0.0-20191108220845-16a3f7862a1a/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc= +google.golang.org/genproto v0.0.0-20191115194625-c23dd37a84c9/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc= +google.golang.org/genproto v0.0.0-20191216164720-4f79533eabd1/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc= +google.golang.org/genproto v0.0.0-20191230161307-f3c370f40bfb/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc= +google.golang.org/genproto v0.0.0-20200115191322-ca5a22157cba/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc= +google.golang.org/genproto v0.0.0-20200122232147-0452cf42e150/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc= +google.golang.org/genproto v0.0.0-20200204135345-fa8e72b47b90/go.mod h1:GmwEX6Z4W5gMy59cAlVYjN9JhxgbQH6Gn+gFDQe2lzA= +google.golang.org/genproto v0.0.0-20200212174721-66ed5ce911ce/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= +google.golang.org/genproto v0.0.0-20200224152610-e50cd9704f63/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= +google.golang.org/genproto v0.0.0-20200228133532-8c2c7df3a383/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= +google.golang.org/genproto v0.0.0-20200305110556-506484158171/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= +google.golang.org/genproto v0.0.0-20200312145019-da6875a35672/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= +google.golang.org/genproto v0.0.0-20200331122359-1ee6d9798940/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= +google.golang.org/genproto v0.0.0-20200423170343-7949de9c1215/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= +google.golang.org/genproto v0.0.0-20200430143042-b979b6f78d84/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= +google.golang.org/genproto v0.0.0-20200511104702-f5ebc3bea380/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= +google.golang.org/genproto v0.0.0-20200513103714-09dca8ec2884/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= +google.golang.org/genproto v0.0.0-20200515170657-fc4c6c6a6587/go.mod h1:YsZOwe1myG/8QRHRsmBRE1LrgQY60beZKjly0O1fX9U= +google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo= +google.golang.org/genproto v0.0.0-20200618031413-b414f8b61790/go.mod h1:jDfRM7FcilCzHH/e9qn6dsT145K34l5v+OpcnNgKAAA= +google.golang.org/genproto v0.0.0-20200729003335-053ba62fc06f/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= +google.golang.org/genproto v0.0.0-20200804131852-c06518451d9c/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= +google.golang.org/genproto v0.0.0-20200825200019-8632dd797987/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= +google.golang.org/genproto v0.0.0-20200904004341-0bd0a958aa1d/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= +google.golang.org/genproto v0.0.0-20201109203340-2640f1f9cdfb/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= +google.golang.org/genproto v0.0.0-20201201144952-b05cb90ed32e/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= +google.golang.org/genproto v0.0.0-20201210142538-e3217bee35cc/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= +google.golang.org/genproto v0.0.0-20201214200347-8c77b98c765d/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= +google.golang.org/genproto v0.0.0-20210222152913-aa3ee6e6a81c/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= +google.golang.org/genproto v0.0.0-20210303154014-9728d6b83eeb/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= +google.golang.org/genproto v0.0.0-20210310155132-4ce2db91004e/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= +google.golang.org/genproto v0.0.0-20210319143718-93e7006c17a6/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= +google.golang.org/genproto v0.0.0-20210402141018-6c239bbf2bb1/go.mod h1:9lPAdzaEmUacj36I+k7YKbEc5CXzPIeORRgDAUOu28A= +google.golang.org/genproto v0.0.0-20210602131652-f16073e35f0c/go.mod h1:UODoCrxHCcBojKKwX1terBiRUaqAsFqJiF615XL43r0= +google.golang.org/genproto v0.0.0-20210624195500-8bfb893ecb84/go.mod h1:SzzZ/N+nwJDaO1kznhnlzqS8ocJICar6hYhVyhi++24= +google.golang.org/genproto v0.0.0-20211118181313-81c1377c94b1/go.mod h1:5CzLGKJ67TSI2B9POpiiyGha0AjJvZIUgRMt1dSmuhc= +google.golang.org/genproto v0.0.0-20230526161137-0005af68ea54 h1:9NWlQfY2ePejTmfwUH1OWwmznFa+0kKcHGPDvcPza9M= +google.golang.org/genproto v0.0.0-20230526161137-0005af68ea54/go.mod h1:zqTuNwFlFRsw5zIts5VnzLQxSRqh+CGOTVMlYbY0Eyk= +google.golang.org/genproto/googleapis/api v0.0.0-20240318140521-94a12d6c2237 h1:RFiFrvy37/mpSpdySBDrUdipW/dHwsRwh3J3+A9VgT4= +google.golang.org/genproto/googleapis/api v0.0.0-20240318140521-94a12d6c2237/go.mod h1:Z5Iiy3jtmioajWHDGFk7CeugTyHtPvMHA4UTmUkyalE= +google.golang.org/genproto/googleapis/rpc v0.0.0-20240318140521-94a12d6c2237 h1:NnYq6UN9ReLM9/Y01KWNOWyI5xQ9kbIms5GGJVwS/Yc= +google.golang.org/genproto/googleapis/rpc v0.0.0-20240318140521-94a12d6c2237/go.mod h1:WtryC6hu0hhx87FDGxWCDptyssuo68sk10vYjF+T9fY= +google.golang.org/grpc v1.12.0/go.mod h1:yo6s7OP7yaDglbqo1J04qKzAhqBH6lvTonzMVmEdcZw= +google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= +google.golang.org/grpc v1.20.1/go.mod h1:10oTOabMzJvdu6/UiuZezV6QK5dSlG84ov/aaiqXj38= +google.golang.org/grpc v1.21.1/go.mod h1:oYelfM1adQP15Ek0mdvEgi9Df8B9CZIaU1084ijfRaM= +google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg= +google.golang.org/grpc v1.25.1/go.mod h1:c3i+UQWmh7LiEpx4sFZnkU36qjEYZ0imhYfXVyQciAY= +google.golang.org/grpc v1.26.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= +google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= +google.golang.org/grpc v1.27.1/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= +google.golang.org/grpc v1.28.0/go.mod h1:rpkK4SK4GF4Ach/+MFLZUBavHOvF2JJB5uozKKal+60= +google.golang.org/grpc v1.29.1/go.mod h1:itym6AZVZYACWQqET3MqgPpjcuV5QH3BxFS3IjizoKk= +google.golang.org/grpc v1.30.0/go.mod h1:N36X2cJ7JwdamYAgDz+s+rVMFjt3numwzf/HckM8pak= +google.golang.org/grpc v1.31.0/go.mod h1:N36X2cJ7JwdamYAgDz+s+rVMFjt3numwzf/HckM8pak= +google.golang.org/grpc v1.31.1/go.mod h1:N36X2cJ7JwdamYAgDz+s+rVMFjt3numwzf/HckM8pak= +google.golang.org/grpc v1.33.1/go.mod h1:fr5YgcSWrqhRRxogOsw7RzIpsmvOZ6IcH4kBYTpR3n0= +google.golang.org/grpc v1.33.2/go.mod h1:JMHMWHQWaTccqQQlmk3MJZS+GWXOdAesneDmEnv2fbc= +google.golang.org/grpc v1.34.0/go.mod h1:WotjhfgOW/POjDeRt8vscBtXq+2VjORFy659qA51WJ8= +google.golang.org/grpc v1.35.0/go.mod h1:qjiiYl8FncCW8feJPdyg3v6XW24KsRHe+dy9BAGRRjU= +google.golang.org/grpc v1.36.0/go.mod h1:qjiiYl8FncCW8feJPdyg3v6XW24KsRHe+dy9BAGRRjU= +google.golang.org/grpc v1.36.1/go.mod h1:qjiiYl8FncCW8feJPdyg3v6XW24KsRHe+dy9BAGRRjU= +google.golang.org/grpc v1.37.1/go.mod h1:NREThFqKR1f3iQ6oBuvc5LadQuXVGo9rkm5ZGrQdJfM= +google.golang.org/grpc v1.38.0/go.mod h1:NREThFqKR1f3iQ6oBuvc5LadQuXVGo9rkm5ZGrQdJfM= +google.golang.org/grpc v1.40.0/go.mod h1:ogyxbiOoUXAkP+4+xa6PZSE9DZgIHtSpzjDTB9KAK34= +google.golang.org/grpc v1.41.0/go.mod h1:U3l9uK9J0sini8mHphKoXyaqDA/8VyGnDee1zzIUK6k= +google.golang.org/grpc v1.42.0/go.mod h1:k+4IHHFw41K8+bbowsex27ge2rCb65oeWqe4jJ590SU= +google.golang.org/grpc v1.64.0 h1:KH3VH9y/MgNQg1dE7b3XfVK0GsPSIzJwdF617gUSbvY= +google.golang.org/grpc v1.64.0/go.mod h1:oxjF8E3FBnjp+/gVFYdWacaLDx9na1aqy9oovLpxQYg= +google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= +google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0= +google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM= +google.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miEFZTKqfCUM6K7xSMQL9OKL/b6hQv+e19PK+JZNE= +google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo= +google.golang.org/protobuf v1.22.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= +google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= +google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= +google.golang.org/protobuf v1.24.0/go.mod h1:r/3tXBNzIEhYS9I1OUVjXDlt8tc493IdKGjtUeSXeh4= +google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c= +google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= +google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= +google.golang.org/protobuf v1.27.1/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= +google.golang.org/protobuf v1.33.0 h1:uNO2rsAINq/JlFpSdYEKIZ0uKD/R9cpdv0T+yoGwGmI= +google.golang.org/protobuf v1.33.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos= +gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= +gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI= +gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys= +gopkg.in/go-playground/assert.v1 v1.2.1/go.mod h1:9RXL0bg/zibRAgZUYszZSwO/z8Y/a8bDuhia5mkpMnE= +gopkg.in/go-playground/validator.v8 v8.18.2/go.mod h1:RX2a/7Ha8BgOhfk7j780h4/u/RRjR0eouCJSH80/M2Y= +gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc= +gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw= +gopkg.in/ini.v1 v1.51.0/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k= +gopkg.in/ini.v1 v1.51.1/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k= +gopkg.in/ini.v1 v1.62.0 h1:duBzk771uxoUuOlyRLkHsygud9+5lrlGjdFBb4mSKDU= +gopkg.in/ini.v1 v1.62.0/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k= +gopkg.in/mgo.v2 v2.0.0-20180705113604-9856a29383ce/go.mod h1:yeKp02qBN3iKW1OzL3MGk2IdtZzaj7SFntXj72NppTA= +gopkg.in/natefinch/lumberjack.v2 v2.0.0 h1:1Lc07Kr7qY4U2YPouBjpCLxpiyxIVoxqXgkXLknAOE8= +gopkg.in/natefinch/lumberjack.v2 v2.0.0/go.mod h1:l0ndWWf7gzL7RNwBG7wST/UCcT4T24xpD6X8LsfU/+k= +gopkg.in/resty.v1 v1.12.0/go.mod h1:mDo4pnntr5jdWRML875a/NmxYqAlA73dVijT2AXvQQo= +gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw= +gopkg.in/yaml.v2 v2.0.0-20170812160011-eb3733d160e7/go.mod h1:JAlM8MvJe8wmxCU4Bli9HhUf9+ttbYbLASfIpnQbh74= +gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.2.3/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.2.5/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.3.0/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= +gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= +gopkg.in/yaml.v3 v3.0.0-20191120175047-4206685974f2/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= +honnef.co/go/tools v0.0.0-20190106161140-3f1c8253044a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= +honnef.co/go/tools v0.0.0-20190418001031-e561f6794a2a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= +honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= +honnef.co/go/tools v0.0.1-2019.2.3/go.mod h1:a3bituU0lyd329TUQxRnasdCoJDkEUEAqEt0JzvZhAg= +honnef.co/go/tools v0.0.1-2020.1.3/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9vFzvIQ3k= +honnef.co/go/tools v0.0.1-2020.1.4/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9vFzvIQ3k= +k8s.io/apimachinery v0.28.6 h1:RsTeR4z6S07srPg6XYrwXpTJVMXsjPXn0ODakMytSW0= +k8s.io/apimachinery v0.28.6/go.mod h1:QFNX/kCl/EMT2WTSz8k4WLCv2XnkOLMaL8GAVRMdpsA= +rsc.io/binaryregexp v0.2.0/go.mod h1:qTv7/COck+e2FymRvadv62gMdZztPaShugOCi3I+8D8= +rsc.io/quote/v3 v3.1.0/go.mod h1:yEA65RcK8LyAZtP9Kv3t0HmxON59tX3rD+tICJqUlj0= +rsc.io/sampler v1.3.0/go.mod h1:T1hPZKmBbMNahiBKFy5HrXp6adAjACjK9JXDnKaTXpA= +sigs.k8s.io/yaml v1.2.0/go.mod h1:yfXDCHCao9+ENCvLSE62v9VSji2MKu5jeNfTrofGhJc= +sigs.k8s.io/yaml v1.3.0 h1:a2VclLzOGrwOHDiV8EfBGhvjHvP46CtW5j6POvhYGGo= +sigs.k8s.io/yaml v1.3.0/go.mod h1:GeOyir5tyXNByN85N/dRIT9es5UQNerPYEKK56eTBm8= diff --git a/tests/go_client/testcases/client_test.go b/tests/go_client/testcases/client_test.go new file mode 100644 index 0000000000000..5c203dfa7f25c --- /dev/null +++ b/tests/go_client/testcases/client_test.go @@ -0,0 +1,92 @@ +///go:build L0 + +package testcases + +import ( + "strings" + "testing" + "time" + + "github.com/milvus-io/milvus/tests/go_client/testcases/helper" + + clientv2 "github.com/milvus-io/milvus/client/v2" + "github.com/milvus-io/milvus/tests/go_client/base" + "github.com/milvus-io/milvus/tests/go_client/common" +) + +// test connect and close, connect again +func TestConnectClose(t *testing.T) { + // connect + ctx := helper.CreateContext(t, time.Second*common.DefaultTimeout) + mc, errConnect := base.NewMilvusClient(ctx, &defaultCfg) + common.CheckErr(t, errConnect, true) + + // verify that connect success + listOpt := clientv2.NewListCollectionOption() + _, errList := mc.ListCollections(ctx, listOpt) + common.CheckErr(t, errList, true) + + // close connect and verify + err := mc.Close(ctx) + common.CheckErr(t, err, true) + _, errList2 := mc.ListCollections(ctx, listOpt) + common.CheckErr(t, errList2, false, "service not ready[SDK=0]: not connected") + + // connect again + mc, errConnect2 := base.NewMilvusClient(ctx, &defaultCfg) + common.CheckErr(t, errConnect2, true) + _, errList3 := mc.ListCollections(ctx, listOpt) + common.CheckErr(t, errList3, true) +} + +func genInvalidClientConfig() []clientv2.ClientConfig { + invalidClientConfigs := []clientv2.ClientConfig{ + {Address: "aaa"}, // not exist address + {Address: strings.Split(*addr, ":")[0]}, // Address=localhost + {Address: strings.Split(*addr, ":")[1]}, // Address=19530 + {Address: *addr, Username: "aaa"}, // not exist username + {Address: *addr, Username: "root", Password: "aaa"}, // wrong password + {Address: *addr, DBName: "aaa"}, // not exist db + } + return invalidClientConfigs +} + +// test connect with timeout and invalid addr +func TestConnectInvalidAddr(t *testing.T) { + // connect + ctx := helper.CreateContext(t, time.Second*5) + for _, invalidCfg := range genInvalidClientConfig() { + _, errConnect := base.NewMilvusClient(ctx, &invalidCfg) + common.CheckErr(t, errConnect, false, "context deadline exceeded") + } +} + +// test connect repeatedly +func TestConnectRepeat(t *testing.T) { + // connect + ctx := helper.CreateContext(t, time.Second*10) + + _, errConnect := base.NewMilvusClient(ctx, &defaultCfg) + common.CheckErr(t, errConnect, true) + + // connect again + mc, errConnect2 := base.NewMilvusClient(ctx, &defaultCfg) + common.CheckErr(t, errConnect2, true) + + _, err := mc.ListCollections(ctx, clientv2.NewListCollectionOption()) + common.CheckErr(t, err, true) +} + +// test close repeatedly +func TestCloseRepeat(t *testing.T) { + // connect + ctx := helper.CreateContext(t, time.Second*10) + mc, errConnect2 := base.NewMilvusClient(ctx, &defaultCfg) + common.CheckErr(t, errConnect2, true) + + // close and again + err := mc.Close(ctx) + common.CheckErr(t, err, true) + err = mc.Close(ctx) + common.CheckErr(t, err, true) +} diff --git a/tests/go_client/testcases/collection_test.go b/tests/go_client/testcases/collection_test.go new file mode 100644 index 0000000000000..d55a9cc9ab229 --- /dev/null +++ b/tests/go_client/testcases/collection_test.go @@ -0,0 +1,950 @@ +package testcases + +import ( + "fmt" + "testing" + "time" + + hp "github.com/milvus-io/milvus/tests/go_client/testcases/helper" + + "github.com/stretchr/testify/require" + + "github.com/milvus-io/milvus/pkg/log" + "go.uber.org/zap" + + clientv2 "github.com/milvus-io/milvus/client/v2" + "github.com/milvus-io/milvus/client/v2/entity" + "github.com/milvus-io/milvus/tests/go_client/common" +) + +var prefix = "collection" + +// test create default floatVec and binaryVec collection +func TestCreateCollection(t *testing.T) { + t.Parallel() + ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout) + mc := createDefaultMilvusClient(ctx, t) + for _, collectionFieldsType := range []hp.CollectionFieldsType{hp.Int64Vec, hp.VarcharBinary, hp.Int64VarcharSparseVec, hp.AllFields} { + fields := hp.FieldsFact.GenFieldsForCollection(collectionFieldsType, hp.TNewFieldsOption()) + schema := hp.GenSchema(hp.TNewSchemaOption().TWithFields(fields)) + err := mc.CreateCollection(ctx, clientv2.NewCreateCollectionOption(schema.CollectionName, schema)) + common.CheckErr(t, err, true) + + // has collections and verify + has, err := mc.HasCollection(ctx, clientv2.NewHasCollectionOption(schema.CollectionName)) + common.CheckErr(t, err, true) + require.True(t, has) + + // list collections and verify + collections, err := mc.ListCollections(ctx, clientv2.NewListCollectionOption()) + common.CheckErr(t, err, true) + require.Contains(t, collections, schema.CollectionName) + } +} + +//func TestCreateCollection(t *testing.T) {} +func TestCreateAutoIdCollectionField(t *testing.T) { + ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout) + mc := createDefaultMilvusClient(ctx, t) + + vecField := entity.NewField().WithName(common.DefaultFloatVecFieldName).WithDataType(entity.FieldTypeFloatVector).WithDim(common.DefaultDim) + int64Field := entity.NewField().WithName(common.DefaultInt64FieldName).WithDataType(entity.FieldTypeInt64).WithIsPrimaryKey(true).WithIsAutoID(true) + varcharField := entity.NewField().WithName(common.DefaultVarcharFieldName).WithDataType(entity.FieldTypeVarChar).WithIsPrimaryKey(true).WithIsAutoID(true).WithMaxLength(common.MaxLength) + for _, pkField := range []*entity.Field{int64Field, varcharField} { + // pk field with name + collName := common.GenRandomString(prefix, 6) + schema := entity.NewSchema().WithName(collName).WithField(pkField).WithField(vecField) + err := mc.CreateCollection(ctx, clientv2.NewCreateCollectionOption(collName, schema)) + common.CheckErr(t, err, true) + + // verify field name + coll, err := mc.DescribeCollection(ctx, clientv2.NewDescribeCollectionOption(collName)) + common.CheckErr(t, err, true) + require.True(t, coll.Schema.AutoID) + require.True(t, coll.Schema.Fields[0].AutoID) + + // insert + vecColumn := hp.GenColumnData(common.DefaultNb, vecField.DataType, *hp.TNewColumnOption()) + _, err = mc.Insert(ctx, clientv2.NewColumnBasedInsertOption(schema.CollectionName, vecColumn)) + common.CheckErr(t, err, true) + } +} + +// create collection and specify shard num +func TestCreateCollectionShards(t *testing.T) { + ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout) + mc := createDefaultMilvusClient(ctx, t) + + vecField := entity.NewField().WithName(common.DefaultFloatVecFieldName).WithDataType(entity.FieldTypeFloatVector).WithDim(common.DefaultDim) + int64Field := entity.NewField().WithName(common.DefaultInt64FieldName).WithDataType(entity.FieldTypeInt64).WithIsPrimaryKey(true).WithIsAutoID(true) + for _, shard := range []int32{-1, 0, 2, 16} { + // pk field with name + collName := common.GenRandomString(prefix, 6) + schema := entity.NewSchema().WithName(collName).WithField(int64Field).WithField(vecField) + err := mc.CreateCollection(ctx, clientv2.NewCreateCollectionOption(collName, schema).WithShardNum(shard)) + common.CheckErr(t, err, true) + + // verify field name + coll, err := mc.DescribeCollection(ctx, clientv2.NewDescribeCollectionOption(collName)) + common.CheckErr(t, err, true) + if shard < 1 { + shard = 1 + } + require.Equal(t, shard, coll.ShardNum) + } +} + +// test create auto collection with schema +func TestCreateAutoIdCollectionSchema(t *testing.T) { + t.Skip("waiting for valid AutoId from schema params") + ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout) + mc := createDefaultMilvusClient(ctx, t) + + collName := common.GenRandomString(prefix, 6) + vecField := entity.NewField().WithName(common.DefaultFloatVecFieldName).WithDataType(entity.FieldTypeFloatVector).WithDim(common.DefaultDim) + for _, pkFieldType := range []entity.FieldType{entity.FieldTypeVarChar, entity.FieldTypeInt64} { + pkField := entity.NewField().WithName("pk").WithDataType(pkFieldType).WithIsPrimaryKey(true).WithMaxLength(common.MaxLength) + + // pk field with name + schema := entity.NewSchema().WithName(collName).WithField(pkField).WithField(vecField).WithAutoID(true) + err := mc.CreateCollection(ctx, clientv2.NewCreateCollectionOption(collName, schema)) + common.CheckErr(t, err, true) + + // verify field name + coll, err := mc.DescribeCollection(ctx, clientv2.NewDescribeCollectionOption(collName)) + common.CheckErr(t, err, true) + log.Info("schema autoID", zap.Bool("schemaAuto", coll.Schema.AutoID)) + log.Info("field autoID", zap.Bool("fieldAuto", coll.Schema.Fields[0].AutoID)) + + // insert + vecColumn := hp.GenColumnData(common.DefaultNb, vecField.DataType, *hp.TNewColumnOption()) + _, err = mc.Insert(ctx, clientv2.NewColumnBasedInsertOption(schema.CollectionName, vecColumn)) + common.CheckErr(t, err, false, "field pk not passed") + } +} + +// test create auto collection with collection option +func TestCreateAutoIdCollection(t *testing.T) { + t.Skip("waiting for valid AutoId from collection option") + ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout) + mc := createDefaultMilvusClient(ctx, t) + + collName := common.GenRandomString(prefix, 6) + vecField := entity.NewField().WithName(common.DefaultFloatVecFieldName).WithDataType(entity.FieldTypeFloatVector).WithDim(common.DefaultDim) + for _, pkFieldType := range []entity.FieldType{entity.FieldTypeVarChar, entity.FieldTypeInt64} { + pkField := entity.NewField().WithName("pk").WithDataType(pkFieldType).WithIsPrimaryKey(true).WithMaxLength(common.MaxLength) + + // pk field with name + schema := entity.NewSchema().WithName(collName).WithField(pkField).WithField(vecField) + err := mc.CreateCollection(ctx, clientv2.NewCreateCollectionOption(collName, schema).WithAutoID(true)) + common.CheckErr(t, err, true) + + // verify field name + coll, err := mc.DescribeCollection(ctx, clientv2.NewDescribeCollectionOption(collName)) + common.CheckErr(t, err, true) + log.Info("schema autoID", zap.Bool("schemaAuto", coll.Schema.AutoID)) + log.Info("field autoID", zap.Bool("fieldAuto", coll.Schema.Fields[0].AutoID)) + + // insert + vecColumn := hp.GenColumnData(common.DefaultNb, vecField.DataType, *hp.TNewColumnOption()) + _, err = mc.Insert(ctx, clientv2.NewColumnBasedInsertOption(schema.CollectionName, vecColumn)) + common.CheckErr(t, err, false, "field pk not passed") + } +} + +func TestCreateJsonCollection(t *testing.T) { + ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout) + mc := createDefaultMilvusClient(ctx, t) + + collName := common.GenRandomString(prefix, 6) + vecField := entity.NewField().WithName(common.DefaultFloatVecFieldName).WithDataType(entity.FieldTypeFloatVector).WithDim(common.DefaultDim) + pkField := entity.NewField().WithName(common.DefaultVarcharFieldName).WithDataType(entity.FieldTypeVarChar).WithIsPrimaryKey(true).WithMaxLength(common.MaxLength) + jsonField := entity.NewField().WithName(common.DefaultJSONFieldName).WithDataType(entity.FieldTypeJSON) + + // pk field with name + schema := entity.NewSchema().WithName(collName).WithField(pkField).WithField(vecField).WithField(jsonField) + err := mc.CreateCollection(ctx, clientv2.NewCreateCollectionOption(collName, schema)) + common.CheckErr(t, err, true) + + // verify field name + has, err := mc.HasCollection(ctx, clientv2.NewHasCollectionOption(schema.CollectionName)) + common.CheckErr(t, err, true) + require.True(t, has) +} + +func TestCreateArrayCollections(t *testing.T) { + ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout) + mc := createDefaultMilvusClient(ctx, t) + + collName := common.GenRandomString(prefix, 6) + vecField := entity.NewField().WithName(common.DefaultFloatVecFieldName).WithDataType(entity.FieldTypeFloatVector).WithDim(common.DefaultDim) + pkField := entity.NewField().WithName(common.DefaultVarcharFieldName).WithDataType(entity.FieldTypeVarChar).WithIsPrimaryKey(true).WithMaxLength(common.MaxLength) + + schema := entity.NewSchema().WithName(collName).WithField(pkField).WithField(vecField) + + for _, eleType := range hp.GetAllArrayElementType() { + arrayField := entity.NewField().WithName(hp.GetFieldNameByElementType(eleType)).WithDataType(entity.FieldTypeArray).WithElementType(eleType).WithMaxCapacity(common.MaxCapacity) + if eleType == entity.FieldTypeVarChar { + arrayField.WithMaxLength(common.MaxLength) + } + schema.WithField(arrayField) + } + + // pk field with name + err := mc.CreateCollection(ctx, clientv2.NewCreateCollectionOption(collName, schema)) + common.CheckErr(t, err, true) + + // verify field name + has, err := mc.HasCollection(ctx, clientv2.NewHasCollectionOption(schema.CollectionName)) + common.CheckErr(t, err, true) + require.True(t, has) +} + +// test create collection with partition key not supported field type +func TestCreateCollectionPartitionKey(t *testing.T) { + ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout) + mc := createDefaultMilvusClient(ctx, t) + + int64Field := entity.NewField().WithName(common.DefaultInt64FieldName).WithDataType(entity.FieldTypeInt64).WithIsPrimaryKey(true) + vecField := entity.NewField().WithName(common.DefaultFloatVecFieldName).WithDataType(entity.FieldTypeFloatVector).WithDim(common.DefaultDim) + t.Parallel() + + for _, fieldType := range []entity.FieldType{entity.FieldTypeVarChar, entity.FieldTypeInt64} { + partitionKeyField := entity.NewField().WithName("par_key").WithDataType(fieldType).WithIsPartitionKey(true).WithMaxLength(common.TestMaxLen) + collName := common.GenRandomString(prefix, 6) + schema := entity.NewSchema().WithName(collName).WithField(int64Field).WithField(vecField).WithField(partitionKeyField) + + err := mc.CreateCollection(ctx, clientv2.NewCreateCollectionOption(collName, schema)) + common.CheckErr(t, err, true) + + coll, err := mc.DescribeCollection(ctx, clientv2.NewDescribeCollectionOption(collName)) + common.CheckErr(t, err, true) + + for _, field := range coll.Schema.Fields { + if field.Name == "par_key" { + require.True(t, field.IsPartitionKey) + } + } + + // verify partitions + partitions, err := mc.ListPartitions(ctx, clientv2.NewListPartitionOption(collName)) + require.Len(t, partitions, common.DefaultPartitionNum) + } +} + +// test create partition key collection WithPartitionNum +func TestCreateCollectionPartitionKeyNumPartition(t *testing.T) { + t.Skip("Waiting for WithPartitionNum") + ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout) + mc := createDefaultMilvusClient(ctx, t) + + int64Field := entity.NewField().WithName(common.DefaultInt64FieldName).WithDataType(entity.FieldTypeInt64).WithIsPrimaryKey(true) + vecField := entity.NewField().WithName(common.DefaultFloatVecFieldName).WithDataType(entity.FieldTypeFloatVector).WithDim(common.DefaultDim) + partitionKeyField := entity.NewField().WithName("par_key").WithDataType(entity.FieldTypeInt64).WithIsPartitionKey(true) + t.Parallel() + + for _, numPartition := range []int64{1, 128, 64, 4096} { + collName := common.GenRandomString(prefix, 6) + schema := entity.NewSchema().WithName(collName).WithField(int64Field).WithField(vecField).WithField(partitionKeyField) + + err := mc.CreateCollection(ctx, clientv2.NewCreateCollectionOption(collName, schema)) + common.CheckErr(t, err, true) + + // verify partitions num + partitions, err := mc.ListPartitions(ctx, clientv2.NewListPartitionOption(collName)) + require.Len(t, partitions, int(numPartition)) + } +} + +func TestCreateCollectionDynamicSchema(t *testing.T) { + ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout) + mc := createDefaultMilvusClient(ctx, t) + + collName := common.GenRandomString(prefix, 6) + vecField := entity.NewField().WithName(common.DefaultFloatVecFieldName).WithDataType(entity.FieldTypeFloatVector).WithDim(common.DefaultDim) + pkField := entity.NewField().WithName(common.DefaultVarcharFieldName).WithDataType(entity.FieldTypeVarChar).WithIsPrimaryKey(true).WithMaxLength(common.MaxLength) + + schema := entity.NewSchema().WithName(collName).WithField(pkField).WithField(vecField).WithDynamicFieldEnabled(true) + // pk field with name + err := mc.CreateCollection(ctx, clientv2.NewCreateCollectionOption(collName, schema)) + common.CheckErr(t, err, true) + + // verify field name + has, err := mc.HasCollection(ctx, clientv2.NewHasCollectionOption(schema.CollectionName)) + common.CheckErr(t, err, true) + require.True(t, has) + + coll, err := mc.DescribeCollection(ctx, clientv2.NewDescribeCollectionOption(schema.CollectionName)) + require.True(t, coll.Schema.EnableDynamicField) + + // insert dynamic + columnOption := *hp.TNewColumnOption() + varcharColumn := hp.GenColumnData(common.DefaultNb, entity.FieldTypeVarChar, columnOption) + vecColumn := hp.GenColumnData(common.DefaultNb, entity.FieldTypeFloatVector, columnOption) + dynamicData := hp.GenDynamicFieldData(0, common.DefaultNb) + _, err = mc.Insert(ctx, clientv2.NewColumnBasedInsertOption(schema.CollectionName, varcharColumn, vecColumn).WithColumns(dynamicData...)) + common.CheckErr(t, err, true) +} + +func TestCreateCollectionDynamic(t *testing.T) { + t.Skip("waiting for dynamicField alignment") + ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout) + mc := createDefaultMilvusClient(ctx, t) + + collName := common.GenRandomString(prefix, 6) + vecField := entity.NewField().WithName(common.DefaultFloatVecFieldName).WithDataType(entity.FieldTypeFloatVector).WithDim(common.DefaultDim) + pkField := entity.NewField().WithName(common.DefaultVarcharFieldName).WithDataType(entity.FieldTypeVarChar).WithIsPrimaryKey(true).WithMaxLength(common.MaxLength) + + schema := entity.NewSchema().WithName(collName).WithField(pkField).WithField(vecField) + // pk field with name + err := mc.CreateCollection(ctx, clientv2.NewCreateCollectionOption(collName, schema).WithDynamicSchema(true)) + common.CheckErr(t, err, true) + + // verify field name + has, err := mc.HasCollection(ctx, clientv2.NewHasCollectionOption(schema.CollectionName)) + common.CheckErr(t, err, true) + require.True(t, has) + + coll, err := mc.DescribeCollection(ctx, clientv2.NewDescribeCollectionOption(schema.CollectionName)) + log.Info("collection dynamic", zap.Bool("collectionSchema", coll.Schema.EnableDynamicField)) + //require.True(t, coll.Schema.Fields[0].IsDynamic) + + // insert dynamic + columnOption := *hp.TNewColumnOption() + varcharColumn := hp.GenColumnData(common.DefaultNb, entity.FieldTypeVarChar, columnOption) + vecColumn := hp.GenColumnData(common.DefaultNb, entity.FieldTypeFloatVector, columnOption) + dynamicData := hp.GenDynamicFieldData(0, common.DefaultNb) + _, err = mc.Insert(ctx, clientv2.NewColumnBasedInsertOption(schema.CollectionName, varcharColumn, vecColumn).WithColumns(dynamicData...)) + common.CheckErr(t, err, false, "field dynamicNumber does not exist") +} + +func TestCreateCollectionAllFields(t *testing.T) { + ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout) + mc := createDefaultMilvusClient(ctx, t) + + collName := common.GenRandomString(prefix, 6) + schema := entity.NewSchema().WithName(collName) + + // gen all fields except sparse vector + fields := hp.FieldsFactory{}.GenFieldsForCollection(hp.AllFields, hp.TNewFieldsOption()) + for _, field := range fields { + schema.WithField(field) + } + + // pk field with name + err := mc.CreateCollection(ctx, clientv2.NewCreateCollectionOption(collName, schema)) + common.CheckErr(t, err, true) + + // verify field name + has, err := mc.HasCollection(ctx, clientv2.NewHasCollectionOption(schema.CollectionName)) + common.CheckErr(t, err, true) + require.True(t, has) +} + +func TestCreateCollectionSparseVector(t *testing.T) { + ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout) + mc := createDefaultMilvusClient(ctx, t) + + collName := common.GenRandomString(prefix, 6) + sparseVecField := entity.NewField().WithName(common.DefaultSparseVecFieldName).WithDataType(entity.FieldTypeSparseVector) + pkField := entity.NewField().WithName(common.DefaultVarcharFieldName).WithDataType(entity.FieldTypeVarChar).WithIsPrimaryKey(true).WithMaxLength(common.MaxLength) + + schema := entity.NewSchema().WithName(collName).WithField(pkField).WithField(sparseVecField) + // pk field with name + err := mc.CreateCollection(ctx, clientv2.NewCreateCollectionOption(collName, schema).WithDynamicSchema(true)) + common.CheckErr(t, err, true) + + // verify field name + has, err := mc.HasCollection(ctx, clientv2.NewHasCollectionOption(schema.CollectionName)) + common.CheckErr(t, err, true) + require.True(t, has) +} + +func TestCreateCollectionWithValidFieldName(t *testing.T) { + t.Parallel() + // connect + ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout) + mc := createDefaultMilvusClient(ctx, t) + + // create collection with valid field name + for _, name := range common.GenValidNames() { + collName := common.GenRandomString(prefix, 6) + + // pk field with name + pkField := entity.NewField().WithName(name).WithDataType(entity.FieldTypeInt64).WithIsPrimaryKey(true) + vecField := entity.NewField().WithName(common.DefaultFloatVecFieldName).WithDataType(entity.FieldTypeFloatVector).WithDim(common.DefaultDim) + schema := entity.NewSchema().WithName(collName).WithField(pkField).WithField(vecField) + err := mc.CreateCollection(ctx, clientv2.NewCreateCollectionOption(collName, schema)) + common.CheckErr(t, err, true) + + // verify field name + coll, err := mc.DescribeCollection(ctx, clientv2.NewDescribeCollectionOption(collName)) + common.CheckErr(t, err, true) + require.Equal(t, name, coll.Schema.Fields[0].Name) + } +} + +func genDefaultSchema() *entity.Schema { + int64Pk := entity.NewField().WithName(common.DefaultInt64FieldName).WithDataType(entity.FieldTypeInt64).WithIsPrimaryKey(true) + varchar := entity.NewField().WithName(common.DefaultVarcharFieldName).WithDataType(entity.FieldTypeVarChar).WithMaxLength(common.TestMaxLen) + floatVec := entity.NewField().WithName(common.DefaultFloatVecFieldName).WithDataType(entity.FieldTypeFloatVector).WithDim(common.DefaultDim) + binaryVec := entity.NewField().WithName(common.DefaultBinaryVecFieldName).WithDataType(entity.FieldTypeBinaryVector).WithDim(common.DefaultDim) + + schema := entity.NewSchema().WithField(int64Pk).WithField(varchar).WithField(floatVec).WithField(binaryVec) + return schema +} + +// create collection with valid name +func TestCreateCollectionWithValidName(t *testing.T) { + t.Parallel() + // connect + ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout) + mc := createDefaultMilvusClient(ctx, t) + + for _, name := range common.GenValidNames() { + schema := genDefaultSchema().WithName(name) + err := mc.CreateCollection(ctx, clientv2.NewCreateCollectionOption(name, schema)) + common.CheckErr(t, err, true) + + collections, err := mc.ListCollections(ctx, clientv2.NewListCollectionOption()) + common.CheckErr(t, err, true) + require.Contains(t, collections, name) + + err = mc.DropCollection(ctx, clientv2.NewDropCollectionOption(name)) + common.CheckErr(t, err, true) + } +} + +// create collection with invalid field name +func TestCreateCollectionWithInvalidFieldName(t *testing.T) { + t.Parallel() + // connect + ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout) + mc := createDefaultMilvusClient(ctx, t) + + // create collection with invalid field name + for _, invalidName := range common.GenInvalidNames() { + log.Debug("TestCreateCollectionWithInvalidFieldName", zap.String("fieldName", invalidName)) + pkField := entity.NewField().WithName(invalidName).WithDataType(entity.FieldTypeInt64).WithIsPrimaryKey(true) + schema := entity.NewSchema().WithName("aaa").WithField(pkField) + collOpt := clientv2.NewCreateCollectionOption("aaa", schema) + + err := mc.CreateCollection(ctx, collOpt) + common.CheckErr(t, err, false, "field name should not be empty", + "The first character of a field name must be an underscore or letter", + "Field name cannot only contain numbers, letters, and underscores", + "The length of a field name must be less than 255 characters") + } +} + +// create collection with invalid collection name: invalid str, schemaName isn't equal to collectionName, schema name is empty +func TestCreateCollectionWithInvalidCollectionName(t *testing.T) { + t.Parallel() + // connect + ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout) + mc := createDefaultMilvusClient(ctx, t) + collName := common.GenRandomString(prefix, 6) + + // create collection and schema no name + schema := genDefaultSchema() + err2 := mc.CreateCollection(ctx, clientv2.NewCreateCollectionOption(collName, schema)) + common.CheckErr(t, err2, false, "collection name should not be empty") + + // create collection with invalid schema name + for _, invalidName := range common.GenInvalidNames() { + log.Debug("TestCreateCollectionWithInvalidCollectionName", zap.String("collectionName", invalidName)) + + // schema has invalid name + schema.WithName(invalidName) + err := mc.CreateCollection(ctx, clientv2.NewCreateCollectionOption(collName, schema)) + common.CheckErr(t, err, false, "collection name should not be empty", + "the first character of a collection name must be an underscore or letter", + "collection name can only contain numbers, letters and underscores", + "the length of a collection name must be less than 255 characters") + + // collection option has invalid name + schema.WithName(collName) + err2 := mc.CreateCollection(ctx, clientv2.NewCreateCollectionOption(invalidName, schema)) + common.CheckErr(t, err2, false, "collection name matches schema name") + } + + // collection name not equal to schema name + schema.WithName(collName) + err3 := mc.CreateCollection(ctx, clientv2.NewCreateCollectionOption(common.GenRandomString("pre", 4), schema)) + common.CheckErr(t, err3, false, "collection name matches schema name") +} + +// create collection missing pk field or vector field +func TestCreateCollectionInvalidFields(t *testing.T) { + ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout) + mc := createDefaultMilvusClient(ctx, t) + + type invalidFieldsStruct struct { + fields []*entity.Field + errMsg string + } + pkField := entity.NewField().WithName(common.DefaultInt64FieldName).WithDataType(entity.FieldTypeInt64).WithIsPrimaryKey(true) + pkField2 := entity.NewField().WithName("pk").WithDataType(entity.FieldTypeInt64).WithIsPrimaryKey(true) + varcharField := entity.NewField().WithName(common.DefaultVarcharFieldName).WithDataType(entity.FieldTypeVarChar) + stringField := entity.NewField().WithName("str").WithDataType(entity.FieldTypeString) + vecField := entity.NewField().WithName(common.DefaultFloatVecFieldName).WithDataType(entity.FieldTypeFloatVector).WithDim(common.DefaultDim) + noneField := entity.NewField().WithName("none").WithDataType(entity.FieldTypeNone) + invalidFields := []invalidFieldsStruct{ + // TODO https://github.com/milvus-io/milvus/issues/33199 + //{fields: []*entity.Field{pkField}, errMsg: "vector field not set"}, + {fields: []*entity.Field{vecField}, errMsg: "primary key is not specified"}, + {fields: []*entity.Field{pkField, pkField2, vecField}, errMsg: "there are more than one primary key"}, + {fields: []*entity.Field{pkField, vecField, noneField}, errMsg: "data type None is not valid"}, + {fields: []*entity.Field{pkField, vecField, stringField}, errMsg: "string data type not supported yet, please use VarChar type instead"}, + {fields: []*entity.Field{pkField, vecField, varcharField}, errMsg: "type param(max_length) should be specified for varChar field"}, + } + + collName := common.GenRandomString(prefix, 6) + for _, invalidField := range invalidFields { + schema := entity.NewSchema().WithName(collName) + for _, field := range invalidField.fields { + schema.WithField(field) + } + collOpt := clientv2.NewCreateCollectionOption(collName, schema) + err := mc.CreateCollection(ctx, collOpt) + common.CheckErr(t, err, false, invalidField.errMsg) + } +} + +// create autoID or not collection with non-int64 and non-varchar field +func TestCreateCollectionInvalidAutoPkField(t *testing.T) { + ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout) + mc := createDefaultMilvusClient(ctx, t) + t.Parallel() + // create collection with autoID true or not + collName := common.GenRandomString(prefix, 6) + + for _, autoId := range []bool{true, false} { + vecField := entity.NewField().WithName(common.DefaultFloatVecFieldName).WithDataType(entity.FieldTypeFloatVector).WithDim(common.DefaultDim) + // pk field type: non-int64 and non-varchar + for _, fieldType := range hp.GetInvalidPkFieldType() { + invalidPkField := entity.NewField().WithName("pk").WithDataType(fieldType).WithIsPrimaryKey(true) + schema := entity.NewSchema().WithName(collName).WithField(vecField).WithField(invalidPkField).WithAutoID(autoId) + errNonInt64Field := mc.CreateCollection(ctx, clientv2.NewCreateCollectionOption(collName, schema)) + common.CheckErr(t, errNonInt64Field, false, "the data type of primary key should be Int64 or VarChar") + } + } +} + +// test create collection with duplicate field name +func TestCreateCollectionDuplicateField(t *testing.T) { + ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout) + mc := createDefaultMilvusClient(ctx, t) + + // duplicate field + pkField := entity.NewField().WithName("id").WithDataType(entity.FieldTypeVarChar).WithIsPrimaryKey(true) + pkField2 := entity.NewField().WithName("id").WithDataType(entity.FieldTypeVarChar) + vecField := entity.NewField().WithName(common.DefaultFloatVecFieldName).WithDataType(entity.FieldTypeFloatVector).WithDim(common.DefaultDim) + + // two vector fields have same name + collName := common.GenRandomString(prefix, 6) + schema := entity.NewSchema().WithName(collName).WithField(pkField).WithField(vecField).WithField(vecField) + errDupField := mc.CreateCollection(ctx, clientv2.NewCreateCollectionOption(collName, schema)) + common.CheckErr(t, errDupField, false, "duplicated field name") + + // two named "id" fields, one is pk field and other is scalar field + schema2 := entity.NewSchema().WithName(collName).WithField(pkField).WithField(pkField2).WithField(vecField).WithAutoID(true) + errDupField2 := mc.CreateCollection(ctx, clientv2.NewCreateCollectionOption(collName, schema2)) + common.CheckErr(t, errDupField2, false, "duplicated field name") +} + +// test create collection with partition key not supported field type +func TestCreateCollectionInvalidPartitionKeyType(t *testing.T) { + ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout) + mc := createDefaultMilvusClient(ctx, t) + + int64Field := entity.NewField().WithName(common.DefaultInt64FieldName).WithDataType(entity.FieldTypeInt64).WithIsPrimaryKey(true) + vecField := entity.NewField().WithName(common.DefaultFloatVecFieldName).WithDataType(entity.FieldTypeFloatVector).WithDim(common.DefaultDim) + collName := common.GenRandomString(prefix, 6) + + t.Parallel() + for _, fieldType := range hp.GetInvalidPartitionKeyFieldType() { + log.Debug("TestCreateCollectionInvalidPartitionKeyType", zap.Any("partitionKeyFieldType", fieldType)) + partitionKeyField := entity.NewField().WithName("parKey").WithDataType(fieldType).WithIsPartitionKey(true) + if fieldType == entity.FieldTypeArray { + partitionKeyField.WithElementType(entity.FieldTypeInt64) + } + schema := entity.NewSchema().WithName(collName).WithField(int64Field).WithField(vecField).WithField(partitionKeyField) + err := mc.CreateCollection(ctx, clientv2.NewCreateCollectionOption(collName, schema)) + common.CheckErr(t, err, false, "the data type of partition key should be Int64 or VarChar") + } +} + +// partition key field cannot be primary field, d can only be one partition key field +func TestCreateCollectionPartitionKeyPk(t *testing.T) { + ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout) + mc := createDefaultMilvusClient(ctx, t) + + int64Field := entity.NewField().WithName(common.DefaultInt64FieldName).WithDataType(entity.FieldTypeInt64).WithIsPrimaryKey(true).WithIsPartitionKey(true) + vecField := entity.NewField().WithName(common.DefaultFloatVecFieldName).WithDataType(entity.FieldTypeFloatVector).WithDim(common.DefaultDim) + collName := common.GenRandomString(prefix, 6) + + schema := entity.NewSchema().WithName(collName).WithField(int64Field).WithField(vecField) + err := mc.CreateCollection(ctx, clientv2.NewCreateCollectionOption(collName, schema)) + common.CheckErr(t, err, false, "the partition key field must not be primary field") +} + +// can only be one partition key field +func TestCreateCollectionPartitionKeyNum(t *testing.T) { + ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout) + mc := createDefaultMilvusClient(ctx, t) + + int64Field := entity.NewField().WithName(common.DefaultInt64FieldName).WithDataType(entity.FieldTypeInt64).WithIsPrimaryKey(true) + vecField := entity.NewField().WithName(common.DefaultFloatVecFieldName).WithDataType(entity.FieldTypeFloatVector).WithDim(common.DefaultDim) + collName := common.GenRandomString(prefix, 6) + + pkField1 := entity.NewField().WithName("pk_1").WithDataType(entity.FieldTypeInt64).WithIsPartitionKey(true) + pkField2 := entity.NewField().WithName("pk_2").WithDataType(entity.FieldTypeVarChar).WithMaxLength(common.TestMaxLen).WithIsPartitionKey(true) + + schema := entity.NewSchema().WithName(collName).WithField(int64Field).WithField(vecField).WithField(pkField1).WithField(pkField2) + err := mc.CreateCollection(ctx, clientv2.NewCreateCollectionOption(collName, schema)) + common.CheckErr(t, err, false, "there are more than one partition key") +} + +func TestPartitionKeyInvalidNumPartition(t *testing.T) { + t.Skip("Waiting for num partition") + ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout) + mc := createDefaultMilvusClient(ctx, t) + + // prepare field and schema + int64Field := entity.NewField().WithName(common.DefaultInt64FieldName).WithDataType(entity.FieldTypeInt64).WithIsPrimaryKey(true) + vecField := entity.NewField().WithName(common.DefaultFloatVecFieldName).WithDataType(entity.FieldTypeFloatVector).WithDim(common.DefaultDim) + pkField1 := entity.NewField().WithName("partitionKeyField").WithDataType(entity.FieldTypeInt64).WithIsPartitionKey(true) + + // schema + collName := common.GenRandomString(prefix, 6) + schema := entity.NewSchema().WithName(collName).WithField(int64Field).WithField(vecField).WithField(pkField1) + invalidNumPartitionStruct := []struct { + numPartitions int64 + errMsg string + }{ + {common.MaxPartitionNum + 1, "exceeds max configuration (4096)"}, + {-1, "the specified partitions should be greater than 0 if partition key is used"}, + } + for _, npStruct := range invalidNumPartitionStruct { + + // create collection with num partitions + err := mc.CreateCollection(ctx, clientv2.NewCreateCollectionOption(collName, schema)) + common.CheckErr(t, err, false, npStruct.errMsg) + } +} + +// test create collection with multi auto id +func TestCreateCollectionMultiAutoId(t *testing.T) { + ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout) + mc := createDefaultMilvusClient(ctx, t) + + collName := common.GenRandomString(prefix, 6) + schema := entity.NewSchema().WithField( + entity.NewField().WithName(common.DefaultInt64FieldName).WithDataType(entity.FieldTypeInt64).WithIsPrimaryKey(true).WithIsAutoID(true)).WithField( + entity.NewField().WithName("dupInt").WithDataType(entity.FieldTypeInt64).WithIsAutoID(true)).WithField( + entity.NewField().WithName(common.DefaultFloatVecFieldName).WithDataType(entity.FieldTypeFloatVector).WithDim(common.DefaultDim), + ).WithName(collName) + errMultiAuto := mc.CreateCollection(ctx, clientv2.NewCreateCollectionOption(collName, schema)) + common.CheckErr(t, errMultiAuto, false, "only one field can speficy AutoID with true") +} + +// test create collection with different autoId between pk field and schema +func TestCreateCollectionInconsistentAutoId(t *testing.T) { + ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout) + mc := createDefaultMilvusClient(ctx, t) + + for _, autoId := range []bool{true, false} { + log.Debug("TestCreateCollectionInconsistentAutoId", zap.Bool("autoId", autoId)) + collName := common.GenRandomString(prefix, 6) + // field and schema have opposite autoID + schema := entity.NewSchema().WithField( + entity.NewField().WithName(common.DefaultInt64FieldName).WithDataType(entity.FieldTypeInt64).WithIsPrimaryKey(true).WithIsAutoID(autoId)).WithField( + entity.NewField().WithName(common.DefaultFloatVecFieldName).WithDataType(entity.FieldTypeFloatVector).WithDim(common.DefaultDim), + ).WithName(collName).WithAutoID(!autoId) + + // create collection + err := mc.CreateCollection(ctx, clientv2.NewCreateCollectionOption(collName, schema)) + common.CheckErr(t, err, true) + + // describe collection + coll, err := mc.DescribeCollection(ctx, clientv2.NewDescribeCollectionOption(collName)) + common.CheckErr(t, err, true) + require.EqualValues(t, autoId, coll.Schema.AutoID) + for _, field := range coll.Schema.Fields { + if field.Name == common.DefaultInt64FieldName { + require.EqualValues(t, autoId, coll.Schema.Fields[0].AutoID) + } + } + } +} + +// create collection with field or schema description +func TestCreateCollectionDescription(t *testing.T) { + ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout) + mc := createDefaultMilvusClient(ctx, t) + + // gen field with description + pkDesc := "This is pk field" + schemaDesc := "This is schema" + collName := common.GenRandomString(prefix, 6) + + pkField := entity.NewField().WithName(common.DefaultInt64FieldName).WithDataType(entity.FieldTypeInt64).WithIsPrimaryKey(true).WithDescription(pkDesc) + vecField := entity.NewField().WithName(common.DefaultFloatVecFieldName).WithDataType(entity.FieldTypeFloatVector).WithDim(common.DefaultDim) + schema := entity.NewSchema().WithName(collName).WithField(pkField).WithField(vecField).WithDescription(schemaDesc) + + err := mc.CreateCollection(ctx, clientv2.NewCreateCollectionOption(collName, schema)) + common.CheckErr(t, err, true) + + coll, err := mc.DescribeCollection(ctx, clientv2.NewDescribeCollectionOption(collName)) + common.CheckErr(t, err, true) + require.EqualValues(t, schemaDesc, coll.Schema.Description) + for _, field := range coll.Schema.Fields { + if field.Name == common.DefaultInt64FieldName { + require.Equal(t, pkDesc, field.Description) + } else { + require.Empty(t, field.Description) + } + } +} + +// test invalid dim of binary field +func TestCreateBinaryCollectionInvalidDim(t *testing.T) { + t.Parallel() + type invalidDimStruct struct { + dim int64 + errMsg string + } + + invalidDims := []invalidDimStruct{ + {dim: 10, errMsg: "should be multiple of 8"}, + {dim: 0, errMsg: "should be in range 2 ~ 32768"}, + {dim: 1, errMsg: "should be in range 2 ~ 32768"}, + {dim: common.MaxDim * 9, errMsg: "binary vector dimension should be in range 2 ~ 262144"}, + {dim: common.MaxDim*8 + 1, errMsg: "binary vector dimension should be multiple of 8"}, + } + + ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout) + mc := createDefaultMilvusClient(ctx, t) + + for _, invalidDim := range invalidDims { + log.Debug("TestCreateBinaryCollectionInvalidDim", zap.Int64("dim", invalidDim.dim)) + collName := common.GenRandomString(prefix, 6) + // field and schema have opposite autoID + schema := entity.NewSchema().WithField( + entity.NewField().WithName(common.DefaultInt64FieldName).WithDataType(entity.FieldTypeInt64).WithIsPrimaryKey(true)).WithField( + entity.NewField().WithName(common.DefaultBinaryVecFieldName).WithDataType(entity.FieldTypeBinaryVector).WithDim(invalidDim.dim), + ).WithName(collName) + + // create collection + err := mc.CreateCollection(ctx, clientv2.NewCreateCollectionOption(collName, schema)) + common.CheckErr(t, err, false, invalidDim.errMsg) + } +} + +// test invalid dim of float vector +func TestCreateFloatCollectionInvalidDim(t *testing.T) { + t.Parallel() + type invalidDimStruct struct { + dim string + errMsg string + } + + invalidDims := []invalidDimStruct{ + {dim: "0", errMsg: "should be in range 2 ~ 32768"}, + {dim: "1", errMsg: "should be in range 2 ~ 32768"}, + {dim: "", errMsg: "invalid syntax"}, + {dim: "中文", errMsg: "invalid syntax"}, + {dim: "%$#", errMsg: "invalid syntax"}, + {dim: fmt.Sprintf("%d", common.MaxDim+1), errMsg: "float vector dimension should be in range 2 ~ 32768"}, + } + + ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout) + mc := createDefaultMilvusClient(ctx, t) + + for _, vecType := range []entity.FieldType{entity.FieldTypeFloatVector, entity.FieldTypeFloat16Vector, entity.FieldTypeBFloat16Vector} { + for _, invalidDim := range invalidDims { + log.Debug("TestCreateBinaryCollectionInvalidDim", zap.String("dim", invalidDim.dim)) + collName := common.GenRandomString(prefix, 6) + + schema := entity.NewSchema().WithField( + entity.NewField().WithName(common.DefaultInt64FieldName).WithDataType(entity.FieldTypeInt64).WithIsPrimaryKey(true)).WithField( + entity.NewField().WithName("pk").WithDataType(vecType).WithTypeParams(entity.TypeParamDim, invalidDim.dim), + ).WithName(collName) + + // create collection + err := mc.CreateCollection(ctx, clientv2.NewCreateCollectionOption(collName, schema)) + common.CheckErr(t, err, false, invalidDim.errMsg) + } + } +} + +func TestCreateVectorWithoutDim(t *testing.T) { + ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout) + mc := createDefaultMilvusClient(ctx, t) + collName := common.GenRandomString(prefix, 6) + + schema := entity.NewSchema().WithField( + entity.NewField().WithName(common.DefaultInt64FieldName).WithDataType(entity.FieldTypeInt64).WithIsPrimaryKey(true)).WithField( + entity.NewField().WithName("vec").WithDataType(entity.FieldTypeFloatVector), + ).WithName(collName) + err := mc.CreateCollection(ctx, clientv2.NewCreateCollectionOption(collName, schema)) + common.CheckErr(t, err, false, "dimension is not defined in field type params, check type param `dim` for vector field") +} + +// specify dim for sparse vector -> error +func TestCreateCollectionSparseVectorWithDim(t *testing.T) { + ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout) + mc := createDefaultMilvusClient(ctx, t) + collName := common.GenRandomString(prefix, 6) + + schema := entity.NewSchema().WithField( + entity.NewField().WithName(common.DefaultInt64FieldName).WithDataType(entity.FieldTypeInt64).WithIsPrimaryKey(true)).WithField( + entity.NewField().WithName("sparse").WithDataType(entity.FieldTypeSparseVector).WithDim(common.DefaultDim), + ).WithName(collName) + + // create collection + err := mc.CreateCollection(ctx, clientv2.NewCreateCollectionOption(collName, schema)) + common.CheckErr(t, err, false, "dim should not be specified for sparse vector field sparse") +} + +func TestCreateArrayFieldInvalidCapacity(t *testing.T) { + ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout) + mc := createDefaultMilvusClient(ctx, t) + + collName := common.GenRandomString(prefix, 6) + + pkField := entity.NewField().WithName(common.DefaultInt64FieldName).WithDataType(entity.FieldTypeInt64).WithIsPrimaryKey(true) + vecField := entity.NewField().WithName(common.DefaultFloatVecFieldName).WithDataType(entity.FieldTypeFloatVector).WithDim(common.DefaultDim) + arrayField := entity.NewField().WithName(common.DefaultArrayFieldName).WithDataType(entity.FieldTypeArray).WithElementType(entity.FieldTypeFloat) + schema := entity.NewSchema().WithName(collName).WithField(pkField).WithField(vecField).WithField(arrayField) + + // create collection + err := mc.CreateCollection(ctx, clientv2.NewCreateCollectionOption(collName, schema)) + common.CheckErr(t, err, false, "type param(max_capacity) should be specified for array field") + + // invalid Capacity + for _, invalidCapacity := range []int64{-1, 0, common.MaxCapacity + 1} { + arrayField.WithMaxCapacity(invalidCapacity) + err := mc.CreateCollection(ctx, clientv2.NewCreateCollectionOption(collName, schema)) + common.CheckErr(t, err, false, "the maximum capacity specified for a Array should be in (0, 4096]") + } +} + +// test create collection varchar array with invalid max length +func TestCreateVarcharArrayInvalidLength(t *testing.T) { + ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout) + mc := createDefaultMilvusClient(ctx, t) + + collName := common.GenRandomString(prefix, 6) + + pkField := entity.NewField().WithName(common.DefaultInt64FieldName).WithDataType(entity.FieldTypeInt64).WithIsPrimaryKey(true) + vecField := entity.NewField().WithName(common.DefaultFloatVecFieldName).WithDataType(entity.FieldTypeFloatVector).WithDim(common.DefaultDim) + arrayVarcharField := entity.NewField().WithName(common.DefaultArrayFieldName).WithDataType(entity.FieldTypeArray).WithElementType(entity.FieldTypeVarChar).WithMaxCapacity(100) + schema := entity.NewSchema().WithName(collName).WithField(pkField).WithField(vecField).WithField(arrayVarcharField) + + // create collection + err := mc.CreateCollection(ctx, clientv2.NewCreateCollectionOption(collName, schema)) + common.CheckErr(t, err, false, "type param(max_length) should be specified for varChar field") + + // invalid Capacity + for _, invalidLength := range []int64{-1, 0, common.MaxLength + 1} { + arrayVarcharField.WithMaxLength(invalidLength) + err := mc.CreateCollection(ctx, clientv2.NewCreateCollectionOption(collName, schema)) + common.CheckErr(t, err, false, "the maximum length specified for a VarChar should be in (0, 65535]") + } +} + +// test create collection varchar array with invalid max length +func TestCreateVarcharInvalidLength(t *testing.T) { + ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout) + mc := createDefaultMilvusClient(ctx, t) + + collName := common.GenRandomString(prefix, 6) + + varcharField := entity.NewField().WithName(common.DefaultVarcharFieldName).WithDataType(entity.FieldTypeVarChar).WithIsPrimaryKey(true) + vecField := entity.NewField().WithName(common.DefaultFloatVecFieldName).WithDataType(entity.FieldTypeFloatVector).WithDim(common.DefaultDim) + + schema := entity.NewSchema().WithName(collName).WithField(varcharField).WithField(vecField) + // create collection + err := mc.CreateCollection(ctx, clientv2.NewCreateCollectionOption(collName, schema)) + common.CheckErr(t, err, false, "type param(max_length) should be specified for varChar field") + + // invalid Capacity + for _, invalidLength := range []int64{-1, 0, common.MaxLength + 1} { + varcharField.WithMaxLength(invalidLength) + err := mc.CreateCollection(ctx, clientv2.NewCreateCollectionOption(collName, schema)) + common.CheckErr(t, err, false, "the maximum length specified for a VarChar should be in (0, 65535]") + } +} + +func TestCreateArrayNotSupportedFieldType(t *testing.T) { + ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout) + mc := createDefaultMilvusClient(ctx, t) + + collName := common.GenRandomString(prefix, 6) + // not supported ElementType: Array, Json, FloatVector, BinaryVector + pkField := entity.NewField().WithName(common.DefaultInt64FieldName).WithDataType(entity.FieldTypeInt64).WithIsPrimaryKey(true) + vecField := entity.NewField().WithName(common.DefaultFloatVecFieldName).WithDataType(entity.FieldTypeFloatVector).WithDim(common.DefaultDim) + for _, fieldType := range []entity.FieldType{entity.FieldTypeArray, entity.FieldTypeJSON, entity.FieldTypeBinaryVector, entity.FieldTypeFloatVector} { + field := entity.NewField().WithName("array").WithDataType(entity.FieldTypeArray).WithElementType(fieldType) + schema := entity.NewSchema().WithName(collName).WithField(pkField).WithField(vecField).WithField(field) + err := mc.CreateCollection(ctx, clientv2.NewCreateCollectionOption(collName, schema)) + common.CheckErr(t, err, false, fmt.Sprintf("element type %s is not supported", fieldType.Name())) + } +} + +// the num of vector fields > default limit=4 +func TestCreateMultiVectorExceed(t *testing.T) { + ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout) + mc := createDefaultMilvusClient(ctx, t) + + collName := common.GenRandomString(prefix, 6) + pkField := entity.NewField().WithName(common.DefaultInt64FieldName).WithDataType(entity.FieldTypeInt64).WithIsPrimaryKey(true) + schema := entity.NewSchema().WithName(collName).WithField(pkField) + for i := 0; i < common.MaxVectorFieldNum+1; i++ { + vecField := entity.NewField().WithName(fmt.Sprintf("vec_%d", i)).WithDataType(entity.FieldTypeFloatVector).WithDim(common.DefaultDim) + schema.WithField(vecField) + } + err := mc.CreateCollection(ctx, clientv2.NewCreateCollectionOption(collName, schema)) + common.CheckErr(t, err, false, fmt.Sprintf("maximum vector field's number should be limited to %d", common.MaxVectorFieldNum)) +} + +//func TestCreateCollection(t *testing.T) {} +func TestCreateCollectionInvalidShards(t *testing.T) { + ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout) + mc := createDefaultMilvusClient(ctx, t) + + vecField := entity.NewField().WithName(common.DefaultFloatVecFieldName).WithDataType(entity.FieldTypeFloatVector).WithDim(common.DefaultDim) + int64Field := entity.NewField().WithName(common.DefaultInt64FieldName).WithDataType(entity.FieldTypeInt64).WithIsPrimaryKey(true).WithIsAutoID(true) + for _, shard := range []int32{common.MaxShardNum + 1} { + // pk field with name + collName := common.GenRandomString(prefix, 6) + schema := entity.NewSchema().WithName(collName).WithField(int64Field).WithField(vecField) + err := mc.CreateCollection(ctx, clientv2.NewCreateCollectionOption(collName, schema).WithShardNum(shard)) + common.CheckErr(t, err, false, fmt.Sprintf("maximum shards's number should be limited to %d", common.MaxShardNum)) + } +} + +func TestCreateCollectionInvalid(t *testing.T) { + ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout) + mc := createDefaultMilvusClient(ctx, t) + + collName := common.GenRandomString(prefix, 6) + type mSchemaErr struct { + schema *entity.Schema + errMsg string + } + mSchemaErrs := []mSchemaErr{ + {schema: nil, errMsg: "duplicated field name"}, + {schema: entity.NewSchema(), errMsg: "collection name should not be empty"}, + {schema: entity.NewSchema().WithName("aaa"), errMsg: "primary key is not specified"}, + {schema: entity.NewSchema().WithName("aaa").WithField(entity.NewField()), errMsg: "primary key is not specified"}, + {schema: entity.NewSchema().WithName("aaa").WithField(entity.NewField().WithIsPrimaryKey(true)), errMsg: "the data type of primary key should be Int64 or VarChar"}, + {schema: entity.NewSchema().WithName("aaa").WithField(entity.NewField().WithIsPrimaryKey(true).WithDataType(entity.FieldTypeVarChar)), errMsg: "field name should not be empty"}, + } + for _, mSchema := range mSchemaErrs { + err := mc.CreateCollection(ctx, clientv2.NewCreateCollectionOption(collName, mSchema.schema)) + common.CheckErr(t, err, false, mSchema.errMsg) + } +} diff --git a/tests/go_client/testcases/helper/collection_helper.go b/tests/go_client/testcases/helper/collection_helper.go new file mode 100644 index 0000000000000..347468142c9de --- /dev/null +++ b/tests/go_client/testcases/helper/collection_helper.go @@ -0,0 +1,11 @@ +package helper + +type CreateCollectionParams struct { + CollectionFieldsType CollectionFieldsType // collection fields type +} + +func NewCreateCollectionParams(collectionFieldsType CollectionFieldsType) *CreateCollectionParams { + return &CreateCollectionParams{ + CollectionFieldsType: collectionFieldsType, + } +} diff --git a/tests/go_client/testcases/helper/data_helper.go b/tests/go_client/testcases/helper/data_helper.go new file mode 100644 index 0000000000000..56660c8c05e31 --- /dev/null +++ b/tests/go_client/testcases/helper/data_helper.go @@ -0,0 +1,324 @@ +package helper + +import ( + "bytes" + "strconv" + + "github.com/milvus-io/milvus/client/v2/column" + "github.com/milvus-io/milvus/client/v2/entity" + "github.com/milvus-io/milvus/pkg/log" + "github.com/milvus-io/milvus/tests/go_client/common" + "go.uber.org/zap" +) + +// insert params +type InsertParams struct { + Schema *entity.Schema + PartitionName string + Start int + Nb int + IsRows bool +} + +func NewInsertParams(schema *entity.Schema, nb int) *InsertParams { + return &InsertParams{ + Schema: schema, + Nb: nb, + } +} + +func (opt *InsertParams) TWithPartitionName(partitionName string) *InsertParams { + opt.PartitionName = partitionName + return opt +} + +func (opt *InsertParams) TWithStart(start int) *InsertParams { + opt.Start = start + return opt +} + +func (opt *InsertParams) TWithIsRows(isRows bool) *InsertParams { + opt.IsRows = isRows + return opt +} + +// GenColumnDataOption -- create column data -- +type GenColumnOption struct { + dim int64 + maxLen int64 + start int + fieldName string + elementType entity.FieldType +} + +func (opt *GenColumnOption) TWithDim(dim int64) *GenColumnOption { + opt.dim = dim + return opt +} + +func (opt *GenColumnOption) TWithMaxLen(maxLen int64) *GenColumnOption { + opt.maxLen = maxLen + return opt +} + +func (opt *GenColumnOption) TWithStart(start int) *GenColumnOption { + opt.start = start + return opt +} + +func (opt *GenColumnOption) TWithFieldName(fieldName string) *GenColumnOption { + opt.fieldName = fieldName + return opt +} + +func (opt *GenColumnOption) TWithElementType(eleType entity.FieldType) *GenColumnOption { + opt.elementType = eleType + return opt +} + +func TNewColumnOption() *GenColumnOption { + return &GenColumnOption{ + dim: common.DefaultDim, + maxLen: common.TestMaxLen, + start: 0, + } +} + +func GenArrayColumnData(nb int, eleType entity.FieldType, option GenColumnOption) column.Column { + start := option.start + fieldName := option.fieldName + if option.fieldName == "" { + fieldName = GetFieldNameByElementType(eleType) + } + capacity := int(option.maxLen) + switch eleType { + case entity.FieldTypeBool: + boolValues := make([][]bool, 0, nb) + for i := start; i < start+nb; i++ { + boolArray := make([]bool, 0, capacity) + for j := 0; j < capacity; j++ { + boolArray = append(boolArray, i%2 == 0) + } + boolValues = append(boolValues, boolArray) + } + return column.NewColumnBoolArray(fieldName, boolValues) + case entity.FieldTypeInt8: + int8Values := make([][]int8, 0, nb) + for i := start; i < start+nb; i++ { + int8Array := make([]int8, 0, capacity) + for j := 0; j < capacity; j++ { + int8Array = append(int8Array, int8(i+j)) + } + int8Values = append(int8Values, int8Array) + } + return column.NewColumnInt8Array(fieldName, int8Values) + case entity.FieldTypeInt16: + int16Values := make([][]int16, 0, nb) + for i := start; i < start+nb; i++ { + int16Array := make([]int16, 0, capacity) + for j := 0; j < capacity; j++ { + int16Array = append(int16Array, int16(i+j)) + } + int16Values = append(int16Values, int16Array) + } + return column.NewColumnInt16Array(fieldName, int16Values) + case entity.FieldTypeInt32: + int32Values := make([][]int32, 0, nb) + for i := start; i < start+nb; i++ { + int32Array := make([]int32, 0, capacity) + for j := 0; j < capacity; j++ { + int32Array = append(int32Array, int32(i+j)) + } + int32Values = append(int32Values, int32Array) + } + return column.NewColumnInt32Array(fieldName, int32Values) + case entity.FieldTypeInt64: + int64Values := make([][]int64, 0, nb) + for i := start; i < start+nb; i++ { + int64Array := make([]int64, 0, capacity) + for j := 0; j < capacity; j++ { + int64Array = append(int64Array, int64(i+j)) + } + int64Values = append(int64Values, int64Array) + } + return column.NewColumnInt64Array(fieldName, int64Values) + case entity.FieldTypeFloat: + floatValues := make([][]float32, 0, nb) + for i := start; i < start+nb; i++ { + floatArray := make([]float32, 0, capacity) + for j := 0; j < capacity; j++ { + floatArray = append(floatArray, float32(i+j)) + } + floatValues = append(floatValues, floatArray) + } + return column.NewColumnFloatArray(fieldName, floatValues) + case entity.FieldTypeDouble: + doubleValues := make([][]float64, 0, nb) + for i := start; i < start+nb; i++ { + doubleArray := make([]float64, 0, capacity) + for j := 0; j < capacity; j++ { + doubleArray = append(doubleArray, float64(i+j)) + } + doubleValues = append(doubleValues, doubleArray) + } + return column.NewColumnDoubleArray(fieldName, doubleValues) + case entity.FieldTypeVarChar: + varcharValues := make([][][]byte, 0, nb) + for i := start; i < start+nb; i++ { + varcharArray := make([][]byte, 0, capacity) + for j := 0; j < capacity; j++ { + var buf bytes.Buffer + buf.WriteString(strconv.Itoa(i + j)) + varcharArray = append(varcharArray, buf.Bytes()) + } + varcharValues = append(varcharValues, varcharArray) + } + return column.NewColumnVarCharArray(fieldName, varcharValues) + default: + log.Fatal("GenArrayColumnData failed", zap.Any("ElementType", eleType)) + return nil + } +} + +// GenColumnData GenColumnDataOption +func GenColumnData(nb int, fieldType entity.FieldType, option GenColumnOption) column.Column { + dim := int(option.dim) + maxLen := int(option.maxLen) + start := option.start + fieldName := option.fieldName + if option.fieldName == "" { + fieldName = GetFieldNameByFieldType(fieldType, option.elementType) + } + switch fieldType { + case entity.FieldTypeInt64: + int64Values := make([]int64, 0, nb) + for i := start; i < start+nb; i++ { + int64Values = append(int64Values, int64(i)) + } + return column.NewColumnInt64(fieldName, int64Values) + + case entity.FieldTypeInt8: + int8Values := make([]int8, 0, nb) + for i := start; i < start+nb; i++ { + int8Values = append(int8Values, int8(i)) + } + return column.NewColumnInt8(fieldName, int8Values) + + case entity.FieldTypeInt16: + int16Values := make([]int16, 0, nb) + for i := start; i < start+nb; i++ { + int16Values = append(int16Values, int16(i)) + } + return column.NewColumnInt16(fieldName, int16Values) + + case entity.FieldTypeInt32: + int32Values := make([]int32, 0, nb) + for i := start; i < start+nb; i++ { + int32Values = append(int32Values, int32(i)) + } + return column.NewColumnInt32(fieldName, int32Values) + + case entity.FieldTypeBool: + boolValues := make([]bool, 0, nb) + for i := start; i < start+nb; i++ { + boolValues = append(boolValues, i/2 == 0) + } + return column.NewColumnBool(fieldName, boolValues) + + case entity.FieldTypeFloat: + floatValues := make([]float32, 0, nb) + for i := start; i < start+nb; i++ { + floatValues = append(floatValues, float32(i)) + } + return column.NewColumnFloat(fieldName, floatValues) + + case entity.FieldTypeDouble: + floatValues := make([]float64, 0, nb) + for i := start; i < start+nb; i++ { + floatValues = append(floatValues, float64(i)) + } + return column.NewColumnDouble(fieldName, floatValues) + + case entity.FieldTypeVarChar: + varcharValues := make([]string, 0, nb) + for i := start; i < start+nb; i++ { + varcharValues = append(varcharValues, strconv.Itoa(i)) + } + return column.NewColumnVarChar(fieldName, varcharValues) + + case entity.FieldTypeArray: + return GenArrayColumnData(nb, option.elementType, option) + + case entity.FieldTypeFloatVector: + vecFloatValues := make([][]float32, 0, nb) + for i := start; i < start+nb; i++ { + vec := common.GenFloatVector(dim) + vecFloatValues = append(vecFloatValues, vec) + } + return column.NewColumnFloatVector(fieldName, int(option.dim), vecFloatValues) + case entity.FieldTypeBinaryVector: + binaryVectors := make([][]byte, 0, nb) + for i := 0; i < nb; i++ { + vec := common.GenBinaryVector(dim) + binaryVectors = append(binaryVectors, vec) + } + return column.NewColumnBinaryVector(fieldName, dim, binaryVectors) + case entity.FieldTypeFloat16Vector: + fp16Vectors := make([][]byte, 0, nb) + for i := start; i < start+nb; i++ { + vec := common.GenFloat16Vector(dim) + fp16Vectors = append(fp16Vectors, vec) + } + return column.NewColumnFloat16Vector(fieldName, dim, fp16Vectors) + case entity.FieldTypeBFloat16Vector: + bf16Vectors := make([][]byte, 0, nb) + for i := start; i < start+nb; i++ { + vec := common.GenBFloat16Vector(dim) + bf16Vectors = append(bf16Vectors, vec) + } + return column.NewColumnBFloat16Vector(fieldName, dim, bf16Vectors) + case entity.FieldTypeSparseVector: + vectors := make([]entity.SparseEmbedding, 0, nb) + for i := start; i < start+nb; i++ { + vec := common.GenSparseVector(maxLen) + vectors = append(vectors, vec) + } + return column.NewColumnSparseVectors(fieldName, vectors) + default: + log.Fatal("GenColumnData failed", zap.Any("FieldType", fieldType)) + return nil + } +} + +func GenDynamicFieldData(start int, nb int) []column.Column { + type ListStruct struct { + List []int64 `json:"list" milvus:"name:list"` + } + + // gen number, string bool list data column + numberValues := make([]int32, 0, nb) + stringValues := make([]string, 0, nb) + boolValues := make([]bool, 0, nb) + //listValues := make([][]byte, 0, Nb) + //m := make(map[string]interface{}) + for i := start; i < start+nb; i++ { + numberValues = append(numberValues, int32(i)) + stringValues = append(stringValues, strconv.Itoa(i)) + boolValues = append(boolValues, i%3 == 0) + //m["list"] = ListStruct{ + // List: []int64{int64(i), int64(i + 1)}, + //} + //bs, err := json.Marshal(m) + //if err != nil { + // log.Fatalf("Marshal json field failed: %s", err) + //} + //listValues = append(listValues, bs) + } + data := []column.Column{ + column.NewColumnInt32(common.DefaultDynamicNumberField, numberValues), + column.NewColumnString(common.DefaultDynamicStringField, stringValues), + column.NewColumnBool(common.DefaultDynamicBoolField, boolValues), + //entity.NewColumnJSONBytes(DefaultDynamicListField, listValues), + } + return data +} diff --git a/tests/go_client/testcases/helper/field_helper.go b/tests/go_client/testcases/helper/field_helper.go new file mode 100644 index 0000000000000..1e2a321e62e44 --- /dev/null +++ b/tests/go_client/testcases/helper/field_helper.go @@ -0,0 +1,299 @@ +package helper + +import ( + "github.com/milvus-io/milvus/client/v2/entity" + "github.com/milvus-io/milvus/pkg/log" + "github.com/milvus-io/milvus/tests/go_client/common" + "go.uber.org/zap" +) + +func GetFieldNameByElementType(t entity.FieldType) string { + switch t { + case entity.FieldTypeBool: + return common.DefaultBoolArrayField + case entity.FieldTypeInt8: + return common.DefaultInt8ArrayField + case entity.FieldTypeInt16: + return common.DefaultInt16ArrayField + case entity.FieldTypeInt32: + return common.DefaultInt32ArrayField + case entity.FieldTypeInt64: + return common.DefaultInt64ArrayField + case entity.FieldTypeFloat: + return common.DefaultFloatArrayField + case entity.FieldTypeDouble: + return common.DefaultDoubleArrayField + case entity.FieldTypeVarChar: + return common.DefaultVarcharArrayField + default: + return common.DefaultArrayFieldName + } +} + +func GetFieldNameByFieldType(t entity.FieldType, eleType ...entity.FieldType) string { + switch t { + case entity.FieldTypeBool: + return common.DefaultBoolFieldName + case entity.FieldTypeInt8: + return common.DefaultInt8FieldName + case entity.FieldTypeInt16: + return common.DefaultInt16FieldName + case entity.FieldTypeInt32: + return common.DefaultInt32FieldName + case entity.FieldTypeInt64: + return common.DefaultInt64FieldName + case entity.FieldTypeFloat: + return common.DefaultFloatFieldName + case entity.FieldTypeDouble: + return common.DefaultDoubleFieldName + case entity.FieldTypeVarChar: + return common.DefaultVarcharFieldName + case entity.FieldTypeJSON: + return common.DefaultJSONFieldName + case entity.FieldTypeArray: + return GetFieldNameByElementType(eleType[0]) + case entity.FieldTypeBinaryVector: + return common.DefaultBinaryVecFieldName + case entity.FieldTypeFloatVector: + return common.DefaultFloatVecFieldName + case entity.FieldTypeFloat16Vector: + return common.DefaultFloat16VecFieldName + case entity.FieldTypeBFloat16Vector: + return common.DefaultBFloat16VecFieldName + case entity.FieldTypeSparseVector: + return common.DefaultSparseVecFieldName + default: + return "" + } +} + +type CollectionFieldsType int32 + +const ( + // FieldTypeNone zero value place holder + Int64Vec CollectionFieldsType = 1 // int64 + floatVec + VarcharBinary CollectionFieldsType = 2 // varchar + binaryVec + Int64VecJSON CollectionFieldsType = 3 // int64 + floatVec + json + Int64VecArray CollectionFieldsType = 4 // int64 + floatVec + array + Int64VarcharSparseVec CollectionFieldsType = 5 // int64 + varchar + sparse vector + Int64MultiVec CollectionFieldsType = 6 // int64 + floatVec + binaryVec + fp16Vec + bf16vec + AllFields CollectionFieldsType = 7 // all fields excepted sparse +) + +type GenFieldsOption struct { + AutoID bool // is auto id + Dim int64 + IsDynamic bool + MaxLength int64 // varchar len or array capacity + MaxCapacity int64 + IsPartitionKey bool + ElementType entity.FieldType +} + +func TNewFieldsOption() *GenFieldsOption { + return &GenFieldsOption{ + AutoID: false, + Dim: common.DefaultDim, + MaxLength: common.TestMaxLen, + MaxCapacity: common.TestCapacity, + IsDynamic: false, + IsPartitionKey: false, + ElementType: entity.FieldTypeNone, + } +} + +func (opt *GenFieldsOption) TWithAutoID(autoID bool) *GenFieldsOption { + opt.AutoID = autoID + return opt +} + +func (opt *GenFieldsOption) TWithDim(dim int64) *GenFieldsOption { + opt.Dim = dim + return opt +} + +func (opt *GenFieldsOption) TWithIsDynamic(isDynamic bool) *GenFieldsOption { + opt.IsDynamic = isDynamic + return opt +} + +func (opt *GenFieldsOption) TWithIsPartitionKey(isPartitionKey bool) *GenFieldsOption { + opt.IsPartitionKey = isPartitionKey + return opt +} + +func (opt *GenFieldsOption) TWithElementType(elementType entity.FieldType) *GenFieldsOption { + opt.ElementType = elementType + return opt +} + +func (opt *GenFieldsOption) TWithMaxLen(maxLen int64) *GenFieldsOption { + opt.MaxLength = maxLen + return opt +} + +func (opt *GenFieldsOption) TWithMaxCapacity(maxCapacity int64) *GenFieldsOption { + opt.MaxCapacity = maxCapacity + return opt +} + +// factory +type FieldsFactory struct{} + +// product +type CollectionFields interface { + GenFields(opts GenFieldsOption) []*entity.Field +} + +type FieldsInt64Vec struct{} + +func (cf FieldsInt64Vec) GenFields(option GenFieldsOption) []*entity.Field { + pkField := entity.NewField().WithName(GetFieldNameByFieldType(entity.FieldTypeInt64)).WithDataType(entity.FieldTypeInt64).WithIsPrimaryKey(true) + vecField := entity.NewField().WithName(GetFieldNameByFieldType(entity.FieldTypeFloatVector)).WithDataType(entity.FieldTypeFloatVector).WithDim(option.Dim) + if option.AutoID { + pkField.WithIsAutoID(option.AutoID) + } + return []*entity.Field{pkField, vecField} +} + +type FieldsVarcharBinary struct{} + +func (cf FieldsVarcharBinary) GenFields(option GenFieldsOption) []*entity.Field { + pkField := entity.NewField().WithName(GetFieldNameByFieldType(entity.FieldTypeVarChar)).WithDataType(entity.FieldTypeVarChar). + WithIsPrimaryKey(true).WithMaxLength(option.MaxLength) + vecField := entity.NewField().WithName(GetFieldNameByFieldType(entity.FieldTypeBinaryVector)).WithDataType(entity.FieldTypeBinaryVector).WithDim(option.Dim) + if option.AutoID { + pkField.WithIsAutoID(option.AutoID) + } + return []*entity.Field{pkField, vecField} +} + +type FieldsInt64VecJSON struct{} + +func (cf FieldsInt64VecJSON) GenFields(option GenFieldsOption) []*entity.Field { + pkField := entity.NewField().WithName(GetFieldNameByFieldType(entity.FieldTypeInt64)).WithDataType(entity.FieldTypeInt64).WithIsPrimaryKey(true) + vecField := entity.NewField().WithName(GetFieldNameByFieldType(entity.FieldTypeFloatVector)).WithDataType(entity.FieldTypeFloatVector).WithDim(option.Dim) + jsonField := entity.NewField().WithName(GetFieldNameByFieldType(entity.FieldTypeJSON)).WithDataType(entity.FieldTypeJSON) + if option.AutoID { + pkField.WithIsAutoID(option.AutoID) + } + return []*entity.Field{pkField, vecField, jsonField} +} + +type FieldsInt64VecArray struct{} + +func (cf FieldsInt64VecArray) GenFields(option GenFieldsOption) []*entity.Field { + pkField := entity.NewField().WithName(GetFieldNameByFieldType(entity.FieldTypeInt64)).WithDataType(entity.FieldTypeInt64).WithIsPrimaryKey(true) + vecField := entity.NewField().WithName(GetFieldNameByFieldType(entity.FieldTypeFloatVector)).WithDataType(entity.FieldTypeFloatVector).WithDim(option.Dim) + fields := []*entity.Field{ + pkField, vecField, + } + for _, eleType := range GetAllArrayElementType() { + arrayField := entity.NewField().WithName(GetFieldNameByElementType(eleType)).WithDataType(entity.FieldTypeArray).WithElementType(eleType).WithMaxCapacity(option.MaxCapacity) + if eleType == entity.FieldTypeVarChar { + arrayField.WithMaxLength(option.MaxLength) + } + fields = append(fields, arrayField) + } + if option.AutoID { + pkField.WithIsAutoID(option.AutoID) + } + return fields +} + +type FieldsInt64VarcharSparseVec struct{} + +func (cf FieldsInt64VarcharSparseVec) GenFields(option GenFieldsOption) []*entity.Field { + pkField := entity.NewField().WithName(GetFieldNameByFieldType(entity.FieldTypeInt64)).WithDataType(entity.FieldTypeInt64).WithIsPrimaryKey(true) + varcharField := entity.NewField().WithName(GetFieldNameByFieldType(entity.FieldTypeVarChar)).WithDataType(entity.FieldTypeVarChar).WithMaxLength(option.MaxLength) + sparseVecField := entity.NewField().WithName(GetFieldNameByFieldType(entity.FieldTypeSparseVector)).WithDataType(entity.FieldTypeSparseVector) + + if option.AutoID { + pkField.WithIsAutoID(option.AutoID) + } + return []*entity.Field{pkField, varcharField, sparseVecField} +} + +type FieldsInt64MultiVec struct{} + +func (cf FieldsInt64MultiVec) GenFields(option GenFieldsOption) []*entity.Field { + pkField := entity.NewField().WithName(GetFieldNameByFieldType(entity.FieldTypeInt64)).WithDataType(entity.FieldTypeInt64).WithIsPrimaryKey(true) + fields := []*entity.Field{ + pkField, + } + for _, fieldType := range GetAllVectorFieldType() { + if fieldType == entity.FieldTypeSparseVector { + continue + } + vecField := entity.NewField().WithName(GetFieldNameByFieldType(fieldType)).WithDataType(fieldType).WithDim(option.Dim) + fields = append(fields, vecField) + } + + if option.AutoID { + pkField.WithIsAutoID(option.AutoID) + } + return fields +} + +type FieldsAllFields struct{} // except sparse vector field +func (cf FieldsAllFields) GenFields(option GenFieldsOption) []*entity.Field { + pkField := entity.NewField().WithName(GetFieldNameByFieldType(entity.FieldTypeInt64)).WithDataType(entity.FieldTypeInt64).WithIsPrimaryKey(true) + fields := []*entity.Field{ + pkField, + } + // scalar fields and array fields + for _, fieldType := range GetAllScaleFieldType() { + if fieldType == entity.FieldTypeInt64 { + continue + } else if fieldType == entity.FieldTypeArray { + for _, eleType := range GetAllArrayElementType() { + arrayField := entity.NewField().WithName(GetFieldNameByElementType(eleType)).WithDataType(entity.FieldTypeArray).WithElementType(eleType).WithMaxCapacity(option.MaxCapacity) + if eleType == entity.FieldTypeVarChar { + arrayField.WithMaxLength(option.MaxLength) + } + fields = append(fields, arrayField) + } + } else if fieldType == entity.FieldTypeVarChar { + varcharField := entity.NewField().WithName(GetFieldNameByFieldType(fieldType)).WithDataType(fieldType).WithMaxLength(option.MaxLength) + fields = append(fields, varcharField) + } else { + scalarField := entity.NewField().WithName(GetFieldNameByFieldType(fieldType)).WithDataType(fieldType) + fields = append(fields, scalarField) + } + + } + for _, fieldType := range GetAllVectorFieldType() { + if fieldType == entity.FieldTypeSparseVector { + continue + } + vecField := entity.NewField().WithName(GetFieldNameByFieldType(fieldType)).WithDataType(fieldType).WithDim(option.Dim) + fields = append(fields, vecField) + } + + if option.AutoID { + pkField.WithIsAutoID(option.AutoID) + } + return fields +} + +func (ff FieldsFactory) GenFieldsForCollection(collectionFieldsType CollectionFieldsType, option *GenFieldsOption) []*entity.Field { + log.Info("GenFieldsForCollection", zap.Any("GenFieldsOption", option)) + switch collectionFieldsType { + case Int64Vec: + return FieldsInt64Vec{}.GenFields(*option) + case VarcharBinary: + return FieldsVarcharBinary{}.GenFields(*option) + case Int64VecJSON: + return FieldsInt64VecJSON{}.GenFields(*option) + case Int64VecArray: + return FieldsInt64VecArray{}.GenFields(*option) + case Int64VarcharSparseVec: + return FieldsInt64VarcharSparseVec{}.GenFields(*option) + case Int64MultiVec: + return FieldsInt64MultiVec{}.GenFields(*option) + case AllFields: + return FieldsAllFields{}.GenFields(*option) + default: + return FieldsInt64Vec{}.GenFields(*option) + } +} diff --git a/tests/go_client/testcases/helper/helper.go b/tests/go_client/testcases/helper/helper.go new file mode 100644 index 0000000000000..f2b93bbfdbb6e --- /dev/null +++ b/tests/go_client/testcases/helper/helper.go @@ -0,0 +1,192 @@ +package helper + +import ( + "context" + "testing" + "time" + + clientv2 "github.com/milvus-io/milvus/client/v2" + "github.com/milvus-io/milvus/pkg/log" + "github.com/milvus-io/milvus/tests/go_client/base" + "github.com/milvus-io/milvus/tests/go_client/common" + "go.uber.org/zap" + + "github.com/milvus-io/milvus/client/v2/entity" +) + +func CreateContext(t *testing.T, timeout time.Duration) context.Context { + ctx, cancel := context.WithTimeout(context.Background(), timeout) + t.Cleanup(func() { + cancel() + }) + return ctx +} + +//var ArrayFieldType = + +func GetAllArrayElementType() []entity.FieldType { + return []entity.FieldType{ + entity.FieldTypeBool, + entity.FieldTypeInt8, + entity.FieldTypeInt16, + entity.FieldTypeInt32, + entity.FieldTypeInt64, + entity.FieldTypeFloat, + entity.FieldTypeDouble, + entity.FieldTypeVarChar, + } +} + +func GetAllVectorFieldType() []entity.FieldType { + return []entity.FieldType{ + entity.FieldTypeBinaryVector, + entity.FieldTypeFloatVector, + entity.FieldTypeFloat16Vector, + entity.FieldTypeBFloat16Vector, + entity.FieldTypeSparseVector, + } +} + +func GetAllScaleFieldType() []entity.FieldType { + return []entity.FieldType{ + entity.FieldTypeBool, + entity.FieldTypeInt8, + entity.FieldTypeInt16, + entity.FieldTypeInt32, + entity.FieldTypeInt64, + entity.FieldTypeFloat, + entity.FieldTypeDouble, + entity.FieldTypeVarChar, + entity.FieldTypeArray, + entity.FieldTypeJSON, + } +} + +func GetAllFieldsType() []entity.FieldType { + allFieldType := GetAllScaleFieldType() + allFieldType = append(allFieldType, entity.FieldTypeBinaryVector, + entity.FieldTypeFloatVector, + entity.FieldTypeFloat16Vector, + entity.FieldTypeBFloat16Vector, + //entity.FieldTypeSparseVector, max vector fields num is 4 + ) + return allFieldType +} + +func GetInvalidPkFieldType() []entity.FieldType { + nonPkFieldTypes := []entity.FieldType{ + entity.FieldTypeNone, + entity.FieldTypeBool, + entity.FieldTypeInt8, + entity.FieldTypeInt16, + entity.FieldTypeInt32, + entity.FieldTypeFloat, + entity.FieldTypeDouble, + entity.FieldTypeString, + entity.FieldTypeJSON, + entity.FieldTypeArray, + } + return nonPkFieldTypes +} + +func GetInvalidPartitionKeyFieldType() []entity.FieldType { + nonPkFieldTypes := []entity.FieldType{ + entity.FieldTypeBool, + entity.FieldTypeInt8, + entity.FieldTypeInt16, + entity.FieldTypeInt32, + entity.FieldTypeFloat, + entity.FieldTypeDouble, + entity.FieldTypeJSON, + entity.FieldTypeArray, + entity.FieldTypeFloatVector, + } + return nonPkFieldTypes +} + +// ----------------- prepare data -------------------------- +type CollectionPrepare struct{} + +var CollPrepare CollectionPrepare +var FieldsFact FieldsFactory + +func (chainTask *CollectionPrepare) CreateCollection(ctx context.Context, t *testing.T, mc *base.MilvusClient, + cp *CreateCollectionParams, fieldOpt *GenFieldsOption, schemaOpt *GenSchemaOption) (*CollectionPrepare, *entity.Schema) { + + fields := FieldsFact.GenFieldsForCollection(cp.CollectionFieldsType, fieldOpt) + schemaOpt.Fields = fields + schema := GenSchema(schemaOpt) + + err := mc.CreateCollection(ctx, clientv2.NewCreateCollectionOption(schema.CollectionName, schema)) + common.CheckErr(t, err, true) + + t.Cleanup(func() { + err := mc.DropCollection(ctx, clientv2.NewDropCollectionOption(schema.CollectionName)) + common.CheckErr(t, err, true) + }) + return chainTask, schema +} + +func (chainTask *CollectionPrepare) InsertData(ctx context.Context, t *testing.T, mc *base.MilvusClient, + ip *InsertParams, option *GenColumnOption) (*CollectionPrepare, clientv2.InsertResult) { + if nil == ip.Schema || ip.Schema.CollectionName == "" { + log.Fatal("[InsertData] Nil Schema is not expected") + } + fields := ip.Schema.Fields + insertOpt := clientv2.NewColumnBasedInsertOption(ip.Schema.CollectionName) + for _, field := range fields { + column := GenColumnData(ip.Nb, field.DataType, *option) + insertOpt.WithColumns(column) + } + + insertRes, err := mc.Insert(ctx, insertOpt) + common.CheckErr(t, err, true) + return chainTask, insertRes +} + +func (chainTask *CollectionPrepare) FlushData(ctx context.Context, t *testing.T, mc *base.MilvusClient, collName string) *CollectionPrepare { + flushTask, err := mc.Flush(ctx, clientv2.NewFlushOption(collName)) + common.CheckErr(t, err, true) + err = flushTask.Await(ctx) + common.CheckErr(t, err, true) + return chainTask +} + +func (chainTask *CollectionPrepare) CreateIndex(ctx context.Context, t *testing.T, mc *base.MilvusClient, ip *IndexParams) *CollectionPrepare { + if nil == ip.Schema || ip.Schema.CollectionName == "" { + log.Fatal("[CreateIndex] Empty collection name is not expected") + } + collName := ip.Schema.CollectionName + mFieldIndex := ip.FieldIndexMap + + for _, field := range ip.Schema.Fields { + if field.DataType >= 100 { + if idx, ok := mFieldIndex[field.Name]; ok { + log.Info("CreateIndex", zap.String("indexName", idx.Name()), zap.Any("indexType", idx.IndexType()), zap.Any("indexParams", idx.Params())) + createIndexTask, err := mc.CreateIndex(ctx, clientv2.NewCreateIndexOption(collName, field.Name, idx)) + common.CheckErr(t, err, true) + err = createIndexTask.Await(ctx) + common.CheckErr(t, err, true) + } else { + idx := GetDefaultVectorIndex(field.DataType) + log.Info("CreateIndex", zap.String("indexName", idx.Name()), zap.Any("indexType", idx.IndexType()), zap.Any("indexParams", idx.Params())) + createIndexTask, err := mc.CreateIndex(ctx, clientv2.NewCreateIndexOption(collName, field.Name, idx)) + common.CheckErr(t, err, true) + err = createIndexTask.Await(ctx) + common.CheckErr(t, err, true) + } + } + } + return chainTask +} + +func (chainTask *CollectionPrepare) Load(ctx context.Context, t *testing.T, mc *base.MilvusClient, lp *LoadParams) *CollectionPrepare { + if lp.CollectionName == "" { + log.Fatal("[Load] Empty collection name is not expected") + } + loadTask, err := mc.LoadCollection(ctx, clientv2.NewLoadCollectionOption(lp.CollectionName).WithReplica(lp.Replica)) + common.CheckErr(t, err, true) + err = loadTask.Await(ctx) + common.CheckErr(t, err, true) + return chainTask +} diff --git a/tests/go_client/testcases/helper/index_helper.go b/tests/go_client/testcases/helper/index_helper.go new file mode 100644 index 0000000000000..554fcbb3789e5 --- /dev/null +++ b/tests/go_client/testcases/helper/index_helper.go @@ -0,0 +1,35 @@ +package helper + +import ( + "github.com/milvus-io/milvus/client/v2/entity" + "github.com/milvus-io/milvus/client/v2/index" +) + +func GetDefaultVectorIndex(fieldType entity.FieldType) index.Index { + switch fieldType { + case entity.FieldTypeFloatVector, entity.FieldTypeFloat16Vector, entity.FieldTypeBFloat16Vector, entity.FieldTypeSparseVector: + return index.NewHNSWIndex(entity.COSINE, 8, 200) + case entity.FieldTypeBinaryVector: + return nil + // return binary index + default: + return nil + // return auto index + } +} + +type IndexParams struct { + Schema *entity.Schema + FieldIndexMap map[string]index.Index +} + +func NewIndexParams(schema *entity.Schema) *IndexParams { + return &IndexParams{ + Schema: schema, + } +} + +func (opt *IndexParams) TWithFieldIndex(mFieldIndex map[string]index.Index) *IndexParams { + opt.FieldIndexMap = mFieldIndex + return opt +} diff --git a/tests/go_client/testcases/helper/read_helper.go b/tests/go_client/testcases/helper/read_helper.go new file mode 100644 index 0000000000000..085dfec31d255 --- /dev/null +++ b/tests/go_client/testcases/helper/read_helper.go @@ -0,0 +1,55 @@ +package helper + +import ( + "github.com/milvus-io/milvus/client/v2/entity" + "github.com/milvus-io/milvus/tests/go_client/common" +) + +type LoadParams struct { + CollectionName string + Replica int +} + +func NewLoadParams(name string) *LoadParams { + return &LoadParams{ + CollectionName: name, + } +} + +func (opt *LoadParams) TWithReplica(replica int) *LoadParams { + opt.Replica = replica + return opt +} + +// GenSearchVectors gen search vectors +func GenSearchVectors(nq int, dim int, dataType entity.FieldType) []entity.Vector { + vectors := make([]entity.Vector, 0, nq) + switch dataType { + case entity.FieldTypeFloatVector: + for i := 0; i < nq; i++ { + vector := common.GenFloatVector(dim) + vectors = append(vectors, entity.FloatVector(vector)) + } + case entity.FieldTypeBinaryVector: + for i := 0; i < nq; i++ { + vector := common.GenBinaryVector(dim) + vectors = append(vectors, entity.BinaryVector(vector)) + } + case entity.FieldTypeFloat16Vector: + for i := 0; i < nq; i++ { + vector := common.GenFloat16Vector(dim) + vectors = append(vectors, entity.Float16Vector(vector)) + } + case entity.FieldTypeBFloat16Vector: + for i := 0; i < nq; i++ { + vector := common.GenBFloat16Vector(dim) + vectors = append(vectors, entity.BFloat16Vector(vector)) + } + case entity.FieldTypeSparseVector: + for i := 0; i < nq; i++ { + vec := common.GenSparseVector(dim) + vectors = append(vectors, vec) + } + } + return vectors +} diff --git a/tests/go_client/testcases/helper/schema_helper.go b/tests/go_client/testcases/helper/schema_helper.go new file mode 100644 index 0000000000000..d96e567a28632 --- /dev/null +++ b/tests/go_client/testcases/helper/schema_helper.go @@ -0,0 +1,68 @@ +package helper + +import ( + "github.com/milvus-io/milvus/client/v2/entity" + "github.com/milvus-io/milvus/pkg/log" + "github.com/milvus-io/milvus/tests/go_client/common" +) + +type GenSchemaOption struct { + CollectionName string + Description string + AutoID bool + Fields []*entity.Field + EnableDynamicField bool +} + +func TNewSchemaOption() *GenSchemaOption { + return &GenSchemaOption{} +} + +func (opt *GenSchemaOption) TWithName(collectionName string) *GenSchemaOption { + opt.CollectionName = collectionName + return opt +} + +func (opt *GenSchemaOption) TWithDescription(description string) *GenSchemaOption { + opt.Description = description + return opt +} + +func (opt *GenSchemaOption) TWithAutoID(autoID bool) *GenSchemaOption { + opt.AutoID = autoID + return opt +} + +func (opt *GenSchemaOption) TWithEnableDynamicField(enableDynamicField bool) *GenSchemaOption { + opt.EnableDynamicField = enableDynamicField + return opt +} + +func (opt *GenSchemaOption) TWithFields(fields []*entity.Field) *GenSchemaOption { + opt.Fields = fields + return opt +} + +func GenSchema(option *GenSchemaOption) *entity.Schema { + if len(option.Fields) == 0 { + log.Fatal("Require at least a primary field and a vector field") + } + if option.CollectionName == "" { + option.CollectionName = common.GenRandomString("pre", 6) + } + schema := entity.NewSchema().WithName(option.CollectionName) + for _, field := range option.Fields { + schema.WithField(field) + } + + if option.Description != "" { + schema.WithDescription(option.Description) + } + if option.AutoID { + schema.WithAutoID(option.AutoID) + } + if option.EnableDynamicField { + schema.WithDynamicFieldEnabled(option.EnableDynamicField) + } + return schema +} diff --git a/tests/go_client/testcases/main_test.go b/tests/go_client/testcases/main_test.go new file mode 100644 index 0000000000000..221f1746c857e --- /dev/null +++ b/tests/go_client/testcases/main_test.go @@ -0,0 +1,74 @@ +package testcases + +import ( + "context" + "flag" + "os" + "testing" + "time" + + "go.uber.org/zap" + + clientv2 "github.com/milvus-io/milvus/client/v2" + + "github.com/milvus-io/milvus/pkg/log" + "github.com/milvus-io/milvus/tests/go_client/base" + "github.com/milvus-io/milvus/tests/go_client/common" +) + +var addr = flag.String("addr", "localhost:19530", "server host and port") +var defaultCfg = clientv2.ClientConfig{Address: *addr} + +// teardown +func teardown() { + log.Info("Start to tear down all.....") + ctx, cancel := context.WithTimeout(context.Background(), time.Second*common.DefaultTimeout) + defer cancel() + mc, err := base.NewMilvusClient(ctx, &defaultCfg) + if err != nil { + log.Fatal("teardown failed to connect milvus with error", zap.Error(err)) + } + defer mc.Close(ctx) + + // clear dbs + dbs, _ := mc.ListDatabases(ctx, clientv2.NewListDatabaseOption()) + for _, db := range dbs { + if db != common.DefaultDb { + _ = mc.UsingDatabase(ctx, clientv2.NewUsingDatabaseOption(db)) + collections, _ := mc.ListCollections(ctx, clientv2.NewListCollectionOption()) + for _, coll := range collections { + _ = mc.DropCollection(ctx, clientv2.NewDropCollectionOption(coll)) + } + _ = mc.DropDatabase(ctx, clientv2.NewDropDatabaseOption(db)) + } + } +} + +// create connect +func createDefaultMilvusClient(ctx context.Context, t *testing.T) *base.MilvusClient { + t.Helper() + + var ( + mc *base.MilvusClient + err error + ) + mc, err = base.NewMilvusClient(ctx, &defaultCfg) + common.CheckErr(t, err, true) + + t.Cleanup(func() { + mc.Close(ctx) + }) + + return mc +} + +func TestMain(m *testing.M) { + flag.Parse() + log.Info("Parser Milvus address", zap.String("address", *addr)) + code := m.Run() + if code != 0 { + log.Error("Tests failed and exited", zap.Int("code", code)) + } + teardown() + os.Exit(code) +} diff --git a/tests/go_client/testcases/search_test.go b/tests/go_client/testcases/search_test.go new file mode 100644 index 0000000000000..f1dd0236bb5ac --- /dev/null +++ b/tests/go_client/testcases/search_test.go @@ -0,0 +1,42 @@ +package testcases + +import ( + "testing" + "time" + + clientv2 "github.com/milvus-io/milvus/client/v2" + "github.com/milvus-io/milvus/client/v2/entity" + "github.com/milvus-io/milvus/pkg/log" + "github.com/milvus-io/milvus/tests/go_client/common" + hp "github.com/milvus-io/milvus/tests/go_client/testcases/helper" + "go.uber.org/zap" +) + +func TestSearch(t *testing.T) { + ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout) + mc := createDefaultMilvusClient(ctx, t) + + cp := hp.NewCreateCollectionParams(hp.Int64Vec) + _, schema := hp.CollPrepare.CreateCollection(ctx, t, mc, cp, hp.TNewFieldsOption(), hp.TNewSchemaOption()) + log.Info("schema", zap.Any("schema", schema)) + + insertParams := hp.NewInsertParams(schema, common.DefaultNb) + hp.CollPrepare.InsertData(ctx, t, mc, insertParams, hp.TNewColumnOption()) + + // flush -> index -> load + hp.CollPrepare.FlushData(ctx, t, mc, schema.CollectionName) + hp.CollPrepare.CreateIndex(ctx, t, mc, hp.NewIndexParams(schema)) + hp.CollPrepare.Load(ctx, t, mc, hp.NewLoadParams(schema.CollectionName)) + + // search + vectors := hp.GenSearchVectors(common.DefaultNq, common.DefaultDim, entity.FieldTypeFloatVector) + resSearch, err := mc.Search(ctx, clientv2.NewSearchOption(schema.CollectionName, common.DefaultLimit, vectors).WithConsistencyLevel(entity.ClStrong)) + common.CheckErr(t, err, true) + common.CheckSearchResult(t, resSearch, common.DefaultNq, common.DefaultLimit) + + log.Info("search", zap.Any("resSearch", resSearch)) + log.Info("search", zap.Any("ids", resSearch[0].IDs)) + log.Info("search", zap.Any("scores", resSearch[0].Scores)) + id, _ := resSearch[0].IDs.GetAsInt64(0) + log.Info("search", zap.Int64("ids", id)) +} From 2422084a29410a006018d5159054143ba99a28c8 Mon Sep 17 00:00:00 2001 From: aoiasd <45024769+aoiasd@users.noreply.github.com> Date: Tue, 4 Jun 2024 11:39:46 +0800 Subject: [PATCH 123/126] fix: paramtable cache cause dynamic config non-dynamic (#33473) relate: https://github.com/milvus-io/milvus/issues/33461 Signed-off-by: aoiasd --- cmd/roles/roles.go | 2 +- pkg/config/env_source.go | 3 +++ pkg/config/etcd_source.go | 11 +++++++++++ pkg/config/file_source.go | 11 +++++++++++ pkg/config/manager.go | 11 +++++++++++ pkg/config/manager_test.go | 3 +++ pkg/config/source.go | 5 +++++ 7 files changed, 45 insertions(+), 1 deletion(-) diff --git a/cmd/roles/roles.go b/cmd/roles/roles.go index 1105498f5f6e7..16be7ac378958 100644 --- a/cmd/roles/roles.go +++ b/cmd/roles/roles.go @@ -411,7 +411,7 @@ func (mr *MilvusRoles) Run() { } tracer.SetTracerProvider(exp, params.TraceCfg.SampleFraction.GetAsFloat()) - log.Info("Reset tracer finished", zap.String("Exporter", params.TraceCfg.Exporter.GetValue())) + log.Info("Reset tracer finished", zap.String("Exporter", params.TraceCfg.Exporter.GetValue()), zap.Float64("SampleFraction", params.TraceCfg.SampleFraction.GetAsFloat())) if paramtable.GetRole() == typeutil.QueryNodeRole || paramtable.GetRole() == typeutil.StandaloneRole { initcore.InitTraceConfig(params) diff --git a/pkg/config/env_source.go b/pkg/config/env_source.go index abef8bb821cf0..b36ee5917b176 100644 --- a/pkg/config/env_source.go +++ b/pkg/config/env_source.go @@ -78,6 +78,9 @@ func (es EnvSource) GetSourceName() string { return "EnvironmentSource" } +func (es EnvSource) SetManager(m ConfigManager) { +} + func (es EnvSource) SetEventHandler(eh EventHandler) { } diff --git a/pkg/config/etcd_source.go b/pkg/config/etcd_source.go index 9c87d0fc1c968..29f49278d76b9 100644 --- a/pkg/config/etcd_source.go +++ b/pkg/config/etcd_source.go @@ -24,6 +24,7 @@ import ( "sync" "time" + "github.com/samber/lo" clientv3 "go.etcd.io/etcd/client/v3" "go.uber.org/zap" @@ -44,6 +45,7 @@ type EtcdSource struct { updateMu sync.Mutex configRefresher *refresher + manager ConfigManager } func NewEtcdSource(etcdInfo *EtcdInfo) (*EtcdSource, error) { @@ -115,6 +117,12 @@ func (es *EtcdSource) Close() { es.configRefresher.stop() } +func (es *EtcdSource) SetManager(m ConfigManager) { + es.Lock() + defer es.Unlock() + es.manager = m +} + func (es *EtcdSource) SetEventHandler(eh EventHandler) { es.configRefresher.SetEventHandler(eh) } @@ -172,6 +180,9 @@ func (es *EtcdSource) update(configs map[string]string) error { return err } es.currentConfigs = configs + if es.manager != nil { + es.manager.EvictCacheValueByFormat(lo.Map(events, func(event *Event, _ int) string { return event.Key })...) + } es.Unlock() es.configRefresher.fireEvents(events...) diff --git a/pkg/config/file_source.go b/pkg/config/file_source.go index 6c1ba11bf18d8..9a1ab3f863e05 100644 --- a/pkg/config/file_source.go +++ b/pkg/config/file_source.go @@ -22,6 +22,7 @@ import ( "sync" "github.com/cockroachdb/errors" + "github.com/samber/lo" "github.com/spf13/cast" "github.com/spf13/viper" "go.uber.org/zap" @@ -36,6 +37,7 @@ type FileSource struct { updateMu sync.Mutex configRefresher *refresher + manager ConfigManager } func NewFileSource(fileInfo *FileInfo) *FileSource { @@ -91,6 +93,12 @@ func (fs *FileSource) Close() { fs.configRefresher.stop() } +func (fs *FileSource) SetManager(m ConfigManager) { + fs.Lock() + defer fs.Unlock() + fs.manager = m +} + func (fs *FileSource) SetEventHandler(eh EventHandler) { fs.RWMutex.Lock() defer fs.RWMutex.Unlock() @@ -173,6 +181,9 @@ func (fs *FileSource) update(configs map[string]string) error { return err } fs.configs = configs + if fs.manager != nil { + fs.manager.EvictCacheValueByFormat(lo.Map(events, func(event *Event, _ int) string { return event.Key })...) + } fs.Unlock() fs.configRefresher.fireEvents(events...) diff --git a/pkg/config/manager.go b/pkg/config/manager.go index b33993296b0b1..7e8c100255a1f 100644 --- a/pkg/config/manager.go +++ b/pkg/config/manager.go @@ -116,6 +116,16 @@ func (m *Manager) EvictCachedValue(key string) { m.configCache.Remove(key) } +func (m *Manager) EvictCacheValueByFormat(keys ...string) { + set := typeutil.NewSet(keys...) + m.configCache.Range(func(key string, value interface{}) bool { + if set.Contain(formatKey(key)) { + m.configCache.Remove(key) + } + return true + }) +} + func (m *Manager) GetConfig(key string) (string, error) { realKey := formatKey(key) v, ok := m.overlays.Get(realKey) @@ -210,6 +220,7 @@ func (m *Manager) AddSource(source Source) error { return err } + source.SetManager(m) m.sources.Insert(sourceName, source) err := m.pullSourceConfigs(sourceName) diff --git a/pkg/config/manager_test.go b/pkg/config/manager_test.go index ef4c2290abeed..b955071661d6e 100644 --- a/pkg/config/manager_test.go +++ b/pkg/config/manager_test.go @@ -270,6 +270,9 @@ func (ErrSource) GetPriority() int { return 2 } +func (ErrSource) SetManager(m ConfigManager) { +} + // GetSourceName implements Source func (ErrSource) GetSourceName() string { return "ErrSource" diff --git a/pkg/config/source.go b/pkg/config/source.go index 6a2cfbae0437a..61a22e320feea 100644 --- a/pkg/config/source.go +++ b/pkg/config/source.go @@ -23,12 +23,17 @@ const ( LowPriority = NormalPriority + 10 ) +type ConfigManager interface { + EvictCacheValueByFormat(keys ...string) +} + type Source interface { GetConfigurations() (map[string]string, error) GetConfigurationByKey(string) (string, error) GetPriority() int GetSourceName() string SetEventHandler(eh EventHandler) + SetManager(m ConfigManager) UpdateOptions(opt Options) Close() } From c6f8a73bb2328be5db5be95d0ac33330ed29bcdf Mon Sep 17 00:00:00 2001 From: zhagnlu <1542303831@qq.com> Date: Tue, 4 Jun 2024 14:09:47 +0800 Subject: [PATCH 124/126] enhance: optimize some cache to reduce memory usage (#33534) #33533 Signed-off-by: luzhang Co-authored-by: luzhang --- internal/core/src/mmap/Column.h | 7 +- internal/core/src/segcore/InsertRecord.h | 13 +--- .../core/src/segcore/SegmentGrowingImpl.cpp | 9 +-- internal/core/src/segcore/SegmentInterface.h | 1 + .../core/src/segcore/SegmentSealedImpl.cpp | 75 ++++--------------- 5 files changed, 24 insertions(+), 81 deletions(-) diff --git a/internal/core/src/mmap/Column.h b/internal/core/src/mmap/Column.h index bda4ca16a9edd..916bb07b0e1c8 100644 --- a/internal/core/src/mmap/Column.h +++ b/internal/core/src/mmap/Column.h @@ -459,9 +459,7 @@ class VariableColumn : public ColumnBase { std::string_view RawAt(const int i) const { - size_t len = (i == indices_.size() - 1) ? size_ - indices_.back() - : indices_[i + 1] - indices_[i]; - return std::string_view(data_ + indices_[i], len); + return std::string_view(views_[i]); } void @@ -502,6 +500,9 @@ class VariableColumn : public ColumnBase { } ConstructViews(); + + // Not need indices_ after + indices_.clear(); } protected: diff --git a/internal/core/src/segcore/InsertRecord.h b/internal/core/src/segcore/InsertRecord.h index 7da03c1828b36..13a92d22e760a 100644 --- a/internal/core/src/segcore/InsertRecord.h +++ b/internal/core/src/segcore/InsertRecord.h @@ -212,7 +212,8 @@ class OffsetOrderedArray : public OffsetMap { PanicInfo(Unsupported, "OffsetOrderedArray could not insert after seal"); } - array_.push_back(std::make_pair(std::get(pk), offset)); + array_.push_back( + std::make_pair(std::get(pk), static_cast(offset))); } void @@ -285,13 +286,13 @@ class OffsetOrderedArray : public OffsetMap { private: bool is_sealed = false; - std::vector> array_; + std::vector> array_; }; template struct InsertRecord { InsertRecord(const Schema& schema, int64_t size_per_chunk) - : row_ids_(size_per_chunk), timestamps_(size_per_chunk) { + : timestamps_(size_per_chunk) { std::optional pk_field_id = schema.get_primary_field_id(); for (auto& field : schema) { @@ -590,10 +591,8 @@ struct InsertRecord { void clear() { timestamps_.clear(); - row_ids_.clear(); reserved = 0; ack_responder_.clear(); - timestamp_index_ = TimestampIndex(); pk2offset_->clear(); fields_data_.clear(); } @@ -605,15 +604,11 @@ struct InsertRecord { public: ConcurrentVector timestamps_; - ConcurrentVector row_ids_; // used for preInsert of growing segment std::atomic reserved = 0; AckResponder ack_responder_; - // used for timestamps index of sealed segment - TimestampIndex timestamp_index_; - // pks to row offset std::unique_ptr pk2offset_; diff --git a/internal/core/src/segcore/SegmentGrowingImpl.cpp b/internal/core/src/segcore/SegmentGrowingImpl.cpp index 3d1f277c43d89..d8cd057f28be7 100644 --- a/internal/core/src/segcore/SegmentGrowingImpl.cpp +++ b/internal/core/src/segcore/SegmentGrowingImpl.cpp @@ -110,7 +110,6 @@ SegmentGrowingImpl::Insert(int64_t reserved_offset, // step 3: fill into Segment.ConcurrentVector insert_record_.timestamps_.set_data_raw( reserved_offset, timestamps_raw, num_rows); - insert_record_.row_ids_.set_data_raw(reserved_offset, row_ids, num_rows); // update the mem size of timestamps and row IDs stats_.mem_size += num_rows * (sizeof(Timestamp) + sizeof(idx_t)); @@ -224,7 +223,6 @@ SegmentGrowingImpl::LoadFieldData(const LoadFieldDataInfo& infos) { } if (field_id == RowFieldID) { - insert_record_.row_ids_.set_data_raw(reserved_offset, field_data); continue; } @@ -313,7 +311,6 @@ SegmentGrowingImpl::LoadFieldDataV2(const LoadFieldDataInfo& infos) { } if (field_id == RowFieldID) { - insert_record_.row_ids_.set_data_raw(reserved_offset, field_data); continue; } @@ -766,10 +763,8 @@ SegmentGrowingImpl::bulk_subscript(SystemFieldType system_type, static_cast(output)); break; case SystemFieldType::RowId: - bulk_subscript_impl(&this->insert_record_.row_ids_, - seg_offsets, - count, - static_cast(output)); + PanicInfo(ErrorCode::Unsupported, + "RowId retrieve is not supported"); break; default: PanicInfo(DataTypeInvalid, "unknown subscript fields"); diff --git a/internal/core/src/segcore/SegmentInterface.h b/internal/core/src/segcore/SegmentInterface.h index 6a2dbf1485bfd..663cfa20819be 100644 --- a/internal/core/src/segcore/SegmentInterface.h +++ b/internal/core/src/segcore/SegmentInterface.h @@ -235,6 +235,7 @@ class SegmentInternalInterface : public SegmentInterface { virtual int64_t num_chunk_data(FieldId field_id) const = 0; + // bitset 1 means not hit. 0 means hit. virtual void mask_with_timestamps(BitsetType& bitset_chunk, Timestamp timestamp) const = 0; diff --git a/internal/core/src/segcore/SegmentSealedImpl.cpp b/internal/core/src/segcore/SegmentSealedImpl.cpp index 36e7a6aebb184..4c06d6a3ffb5b 100644 --- a/internal/core/src/segcore/SegmentSealedImpl.cpp +++ b/internal/core/src/segcore/SegmentSealedImpl.cpp @@ -23,6 +23,7 @@ #include #include #include +#include #include "Utils.h" #include "Types.h" @@ -348,35 +349,15 @@ SegmentSealedImpl::LoadFieldData(FieldId field_id, FieldDataInfo& data) { offset += row_count; } - TimestampIndex index; - auto min_slice_length = num_rows < 4096 ? 1 : 4096; - auto meta = GenerateFakeSlices( - timestamps.data(), num_rows, min_slice_length); - index.set_length_meta(std::move(meta)); - // todo ::opt to avoid copy timestamps from field data - index.build_with(timestamps.data(), num_rows); - - // use special index std::unique_lock lck(mutex_); AssertInfo(insert_record_.timestamps_.empty(), "already exists"); insert_record_.timestamps_.fill_chunk_data(field_data); - insert_record_.timestamp_index_ = std::move(index); AssertInfo(insert_record_.timestamps_.num_chunk() == 1, "num chunk not equal to 1 for sealed segment"); stats_.mem_size += sizeof(Timestamp) * data.row_count; } else { AssertInfo(system_field_type == SystemFieldType::RowId, "System field type of id column is not RowId"); - - auto field_data = storage::CollectFieldDataChannel(data.channel); - - // write data under lock - std::unique_lock lck(mutex_); - AssertInfo(insert_record_.row_ids_.empty(), "already exists"); - insert_record_.row_ids_.fill_chunk_data(field_data); - AssertInfo(insert_record_.row_ids_.num_chunk() == 1, - "num chunk not equal to 1 for sealed segment"); - stats_.mem_size += sizeof(idx_t) * data.row_count; } ++system_ready_count_; } else { @@ -925,9 +906,7 @@ SegmentSealedImpl::DropFieldData(const FieldId field_id) { std::unique_lock lck(mutex_); --system_ready_count_; - if (system_field_type == SystemFieldType::RowId) { - insert_record_.row_ids_.clear(); - } else if (system_field_type == SystemFieldType::Timestamp) { + if (system_field_type == SystemFieldType::Timestamp) { insert_record_.timestamps_.clear(); } lck.unlock(); @@ -1042,13 +1021,7 @@ SegmentSealedImpl::bulk_subscript(SystemFieldType system_type, static_cast(output)); break; case SystemFieldType::RowId: - AssertInfo(insert_record_.row_ids_.num_chunk() == 1, - "num chunk of rowID not equal to 1 for sealed segment"); - bulk_subscript_impl( - this->insert_record_.row_ids_.get_chunk_data(0), - seg_offsets, - count, - static_cast(output)); + PanicInfo(ErrorCode::Unsupported, "RowId retrieve not supported"); break; default: PanicInfo(DataTypeInvalid, @@ -1512,12 +1485,6 @@ SegmentSealedImpl::debug() const { void SegmentSealedImpl::LoadSegmentMeta( const proto::segcore::LoadSegmentMeta& segment_meta) { - std::unique_lock lck(mutex_); - std::vector slice_lengths; - for (auto& info : segment_meta.metas()) { - slice_lengths.push_back(info.row_count()); - } - insert_record_.timestamp_index_.set_length_meta(std::move(slice_lengths)); PanicInfo(NotImplemented, "unimplemented"); } @@ -1529,33 +1496,17 @@ SegmentSealedImpl::get_active_count(Timestamp ts) const { void SegmentSealedImpl::mask_with_timestamps(BitsetType& bitset_chunk, - Timestamp timestamp) const { - // TODO change the - AssertInfo(insert_record_.timestamps_.num_chunk() == 1, - "num chunk not equal to 1 for sealed segment"); - const auto& timestamps_data = insert_record_.timestamps_.get_chunk(0); - AssertInfo(timestamps_data.size() == get_row_count(), - fmt::format("Timestamp size not equal to row count: {}, {}", - timestamps_data.size(), - get_row_count())); - auto range = insert_record_.timestamp_index_.get_active_range(timestamp); - - // range == (size_, size_) and size_ is this->timestamps_.size(). - // it means these data are all useful, we don't need to update bitset_chunk. - // It can be thought of as an OR operation with another bitmask that is all 0s, but it is not necessary to do so. - if (range.first == range.second && range.first == timestamps_data.size()) { - // just skip - return; - } - // range == (0, 0). it means these data can not be used, directly set bitset_chunk to all 1s. - // It can be thought of as an OR operation with another bitmask that is all 1s. - if (range.first == range.second && range.first == 0) { - bitset_chunk.set(); - return; + Timestamp ts) const { + auto row_count = this->get_row_count(); + auto& ts_vec = this->insert_record_.timestamps_; + auto iter = std::upper_bound( + boost::make_counting_iterator(static_cast(0)), + boost::make_counting_iterator(row_count), + ts, + [&](Timestamp ts, int64_t index) { return ts < ts_vec[index]; }); + for (size_t i = *iter; i < row_count; ++i) { + bitset_chunk.set(i); } - auto mask = TimestampIndex::GenerateBitset( - timestamp, range, timestamps_data.data(), timestamps_data.size()); - bitset_chunk |= mask; } bool From d610fdf033112d6b8a59eeefcef97cb7b7c1306e Mon Sep 17 00:00:00 2001 From: Bingyi Sun Date: Tue, 4 Jun 2024 19:05:49 +0800 Subject: [PATCH 125/126] enhance: change Allocator to generic (#33581) Signed-off-by: sunby --- pkg/util/vralloc/alloc.go | 44 ++++++++++++++++------------------ pkg/util/vralloc/alloc_test.go | 6 ++--- 2 files changed, 23 insertions(+), 27 deletions(-) diff --git a/pkg/util/vralloc/alloc.go b/pkg/util/vralloc/alloc.go index ab2cc422f3772..32eb5442ccd66 100644 --- a/pkg/util/vralloc/alloc.go +++ b/pkg/util/vralloc/alloc.go @@ -60,29 +60,27 @@ func (r Resource) Le(limit *Resource) bool { return r.Memory <= limit.Memory && r.CPU <= limit.CPU && r.Disk <= limit.Disk } -type Allocator interface { +type Allocator[T comparable] interface { // Allocate allocates the resource, returns true if the resource is allocated. If allocation failed, returns the short resource. // The short resource is a positive value, e.g., if there is additional 8 bytes in disk needed, returns (0, 0, 8). - Allocate(id string, r *Resource) (allocated bool, short *Resource) + Allocate(id T, r *Resource) (allocated bool, short *Resource) // Release releases the resource - Release(id string) + Release(id T) // Used returns the used resource Used() Resource // Inspect returns the allocated resources - Inspect() map[string]*Resource + Inspect() map[T]*Resource } -type FixedSizeAllocator struct { +type FixedSizeAllocator[T comparable] struct { limit *Resource lock sync.RWMutex used Resource - allocs map[string]*Resource + allocs map[T]*Resource } -var _ Allocator = (*FixedSizeAllocator)(nil) - -func (a *FixedSizeAllocator) Allocate(id string, r *Resource) (allocated bool, short *Resource) { +func (a *FixedSizeAllocator[T]) Allocate(id T, r *Resource) (allocated bool, short *Resource) { a.lock.Lock() defer a.lock.Unlock() if a.used.Add(r).Le(a.limit) { @@ -99,7 +97,7 @@ func (a *FixedSizeAllocator) Allocate(id string, r *Resource) (allocated bool, s return false, short } -func (a *FixedSizeAllocator) Release(id string) { +func (a *FixedSizeAllocator[T]) Release(id T) { a.lock.Lock() defer a.lock.Unlock() r, ok := a.allocs[id] @@ -110,36 +108,34 @@ func (a *FixedSizeAllocator) Release(id string) { a.used.Sub(r) } -func (a *FixedSizeAllocator) Used() Resource { +func (a *FixedSizeAllocator[T]) Used() Resource { a.lock.RLock() defer a.lock.RUnlock() return a.used } -func (a *FixedSizeAllocator) Inspect() map[string]*Resource { +func (a *FixedSizeAllocator[T]) Inspect() map[T]*Resource { a.lock.RLock() defer a.lock.RUnlock() return maps.Clone(a.allocs) } -func NewFixedSizeAllocator(limit *Resource) *FixedSizeAllocator { - return &FixedSizeAllocator{ +func NewFixedSizeAllocator[T comparable](limit *Resource) *FixedSizeAllocator[T] { + return &FixedSizeAllocator[T]{ limit: limit, - allocs: make(map[string]*Resource), + allocs: make(map[T]*Resource), } } // PhysicalAwareFixedSizeAllocator allocates resources with additional consideration of physical resource usage. -type PhysicalAwareFixedSizeAllocator struct { - FixedSizeAllocator +type PhysicalAwareFixedSizeAllocator[T comparable] struct { + FixedSizeAllocator[T] hwLimit *Resource dir string // watching directory for disk usage, probably got by paramtable.Get().LocalStorageCfg.Path.GetValue() } -var _ Allocator = (*PhysicalAwareFixedSizeAllocator)(nil) - -func (a *PhysicalAwareFixedSizeAllocator) Allocate(id string, r *Resource) (allocated bool, short *Resource) { +func (a *PhysicalAwareFixedSizeAllocator[T]) Allocate(id T, r *Resource) (allocated bool, short *Resource) { memoryUsage := int64(hardware.GetUsedMemoryCount()) diskUsage := int64(0) if usageStats, err := disk.Usage(a.dir); err != nil { @@ -159,11 +155,11 @@ func (a *PhysicalAwareFixedSizeAllocator) Allocate(id string, r *Resource) (allo return false, expected.Diff(a.hwLimit) } -func NewPhysicalAwareFixedSizeAllocator(limit *Resource, hwMemoryLimit, hwDiskLimit int64, dir string) *PhysicalAwareFixedSizeAllocator { - return &PhysicalAwareFixedSizeAllocator{ - FixedSizeAllocator: FixedSizeAllocator{ +func NewPhysicalAwareFixedSizeAllocator[T comparable](limit *Resource, hwMemoryLimit, hwDiskLimit int64, dir string) *PhysicalAwareFixedSizeAllocator[T] { + return &PhysicalAwareFixedSizeAllocator[T]{ + FixedSizeAllocator: FixedSizeAllocator[T]{ limit: limit, - allocs: make(map[string]*Resource), + allocs: make(map[T]*Resource), }, hwLimit: &Resource{Memory: hwMemoryLimit, Disk: hwDiskLimit}, dir: dir, diff --git a/pkg/util/vralloc/alloc_test.go b/pkg/util/vralloc/alloc_test.go index 0b081a702ebd6..6b50d39ac4420 100644 --- a/pkg/util/vralloc/alloc_test.go +++ b/pkg/util/vralloc/alloc_test.go @@ -27,7 +27,7 @@ import ( ) func TestFixedSizeAllocator(t *testing.T) { - a := NewFixedSizeAllocator(&Resource{100, 100, 100}) + a := NewFixedSizeAllocator[string](&Resource{100, 100, 100}) allocated, _ := a.Allocate("a1", &Resource{10, 10, 10}) assert.Equal(t, true, allocated) @@ -46,7 +46,7 @@ func TestFixedSizeAllocator(t *testing.T) { } func TestFixedSizeAllocatorRace(t *testing.T) { - a := NewFixedSizeAllocator(&Resource{100, 100, 100}) + a := NewFixedSizeAllocator[string](&Resource{100, 100, 100}) wg := new(sync.WaitGroup) for i := 0; i < 100; i++ { wg.Add(1) @@ -64,7 +64,7 @@ func TestFixedSizeAllocatorRace(t *testing.T) { func TestPhysicalAwareFixedSizeAllocator(t *testing.T) { hwMemoryLimit := int64(float32(hardware.GetMemoryCount()) * 0.9) hwDiskLimit := int64(1<<63 - 1) - a := NewPhysicalAwareFixedSizeAllocator(&Resource{100, 100, 100}, hwMemoryLimit, hwDiskLimit, "/tmp") + a := NewPhysicalAwareFixedSizeAllocator[string](&Resource{100, 100, 100}, hwMemoryLimit, hwDiskLimit, "/tmp") allocated, _ := a.Allocate("a1", &Resource{10, 10, 10}) assert.Equal(t, true, allocated) From 8858fcb40af256a794bb4cc7efef303d933ee6f3 Mon Sep 17 00:00:00 2001 From: jaime Date: Tue, 4 Jun 2024 20:09:54 +0800 Subject: [PATCH 126/126] fix: fix loaded entity num is inaccurate (#33521) issue: #33520 Signed-off-by: jaime --- internal/querynodev2/metrics_info.go | 8 ++++---- pkg/metrics/querynode_metrics.go | 1 - 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/internal/querynodev2/metrics_info.go b/internal/querynodev2/metrics_info.go index c41d12ed7ef6d..b4c50a5d1b9fb 100644 --- a/internal/querynodev2/metrics_info.go +++ b/internal/querynodev2/metrics_info.go @@ -103,11 +103,12 @@ func getQuotaMetrics(node *QueryNode) (*metricsinfo.QueryNodeQuotaMetrics, error } minTsafeChannel, minTsafe := node.tSafeManager.Min() - collections := node.manager.Collection.List() - nodeID := fmt.Sprint(node.GetNodeID()) + metrics.QueryNodeNumEntities.Reset() + metrics.QueryNodeEntitiesSize.Reset() + var totalGrowingSize int64 growingSegments := node.manager.Segment.GetBy(segments.WithType(segments.SegmentTypeGrowing)) growingGroupByCollection := lo.GroupBy(growingSegments, func(seg segments.Segment) int64 { @@ -125,6 +126,7 @@ func getQuotaMetrics(node *QueryNode) (*metricsinfo.QueryNodeQuotaMetrics, error growingGroupByPartition := lo.GroupBy(growingSegments, func(seg segments.Segment) int64 { return seg.Partition() }) + for _, segs := range growingGroupByPartition { numEntities := lo.SumBy(segs, func(seg segments.Segment) int64 { return seg.RowNum() @@ -136,7 +138,6 @@ func getQuotaMetrics(node *QueryNode) (*metricsinfo.QueryNodeQuotaMetrics, error fmt.Sprint(segment.Collection()), fmt.Sprint(segment.Partition()), segments.SegmentTypeGrowing.String(), - fmt.Sprint(len(segment.Indexes())), ).Set(float64(numEntities)) } @@ -166,7 +167,6 @@ func getQuotaMetrics(node *QueryNode) (*metricsinfo.QueryNodeQuotaMetrics, error fmt.Sprint(segment.Collection()), fmt.Sprint(segment.Partition()), segments.SegmentTypeSealed.String(), - fmt.Sprint(len(segment.Indexes())), ).Set(float64(numEntities)) } diff --git a/pkg/metrics/querynode_metrics.go b/pkg/metrics/querynode_metrics.go index d64c3cbb29869..2aa8e13c8a0da 100644 --- a/pkg/metrics/querynode_metrics.go +++ b/pkg/metrics/querynode_metrics.go @@ -370,7 +370,6 @@ var ( collectionIDLabelName, partitionIDLabelName, segmentStateLabelName, - indexCountLabelName, }) QueryNodeEntitiesSize = prometheus.NewGaugeVec(