diff --git a/.env b/.env index 96cd6e27ed183..6beb24525c5e1 100644 --- a/.env +++ b/.env @@ -5,12 +5,12 @@ IMAGE_ARCH=amd64 OS_NAME=ubuntu20.04 # for services.builder.image in docker-compose.yml -DATE_VERSION=20240429-6289f3a -LATEST_DATE_VERSION=20240429-6289f3a +DATE_VERSION=20240520-d27db99 +LATEST_DATE_VERSION=20240520-d27db99 # for services.gpubuilder.image in docker-compose.yml -GPU_DATE_VERSION=20240409-08bfb43 -LATEST_GPU_DATE_VERSION=20240409-08bfb43 +GPU_DATE_VERSION=20240520-c35eaaa +LATEST_GPU_DATE_VERSION=20240520-c35eaaa # for other services in docker-compose.yml MINIO_ADDRESS=minio:9000 diff --git a/.github/workflows/mac.yaml b/.github/workflows/mac.yaml index d17125b9d7c86..ccb21ebaab5af 100644 --- a/.github/workflows/mac.yaml +++ b/.github/workflows/mac.yaml @@ -56,7 +56,7 @@ jobs: - name: Setup Go environment uses: actions/setup-go@v2.2.0 with: - go-version: '~1.20.7' + go-version: '~1.21.10' - name: Mac Cache Go Mod Volumes uses: actions/cache@v3 with: diff --git a/.golangci.yml b/.golangci.yml index 09779daf2548c..91895ce0cc115 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -1,5 +1,5 @@ run: - go: "1.20" + go: "1.21" skip-dirs: - build - configs diff --git a/DEVELOPMENT.md b/DEVELOPMENT.md index cb6493a9fa12f..99bfc0f1546ae 100644 --- a/DEVELOPMENT.md +++ b/DEVELOPMENT.md @@ -104,7 +104,7 @@ You can use Vscode to integrate C++ and Go together. Please replace user.setting Linux systems (Recommend Ubuntu 20.04 or later): ```bash -go: >= 1.20 +go: >= 1.21 cmake: >= 3.18 gcc: 7.5 conan: 1.61 @@ -113,7 +113,7 @@ conan: 1.61 MacOS systems with x86_64 (Big Sur 11.5 or later recommended): ```bash -go: >= 1.20 +go: >= 1.21 cmake: >= 3.18 llvm: >= 15 conan: 1.61 @@ -122,7 +122,7 @@ conan: 1.61 MacOS systems with Apple Silicon (Monterey 12.0.1 or later recommended): ```bash -go: >= 1.20 (Arch=ARM64) +go: >= 1.21 (Arch=ARM64) cmake: >= 3.18 llvm: >= 15 conan: 1.61 @@ -178,7 +178,7 @@ Confirm that your `GOPATH` and `GOBIN` environment variables are correctly set a ```shell $ go version ``` -Note: go >= 1.20 is required to build Milvus. +Note: go >= 1.21 is required to build Milvus. #### Docker & Docker Compose @@ -236,7 +236,7 @@ sudo apt install -y clang-format clang-tidy ninja-build gcc g++ curl zip unzip t #### Install conan ```bash -# Verify python3 version, need python3 version > 3.8 +# Verify python3 version, need python3 version > 3.8 and version <= 3.11 python3 --version # pip install conan 1.61.0 pip3 install conan==1.61.0 @@ -245,8 +245,8 @@ pip3 install conan==1.61.0 #### Install GO 1.80 ```bash -wget https://go.dev/dl/go1.18.10.linux-arm64.tar.gz -tar zxf go1.18.10.linux-arm64.tar.gz +wget https://go.dev/dl/go1.21.10.linux-arm64.tar.gz +tar zxf go1.21.10.linux-arm64.tar.gz mv ./go /usr/local vi /etc/profile export PATH=$PATH:/usr/local/go/bin diff --git a/Makefile b/Makefile index 24cf720f10e97..9ccfe22604b0e 100644 --- a/Makefile +++ b/Makefile @@ -17,6 +17,7 @@ OBJPREFIX := "github.com/milvus-io/milvus/cmd/milvus" INSTALL_PATH := $(PWD)/bin LIBRARY_PATH := $(PWD)/lib +PGO_PATH := $(PWD)/configs/pgo OS := $(shell uname -s) mode = Release @@ -72,14 +73,14 @@ milvus: build-cpp print-build-info @echo "Building Milvus ..." @source $(PWD)/scripts/setenv.sh && \ mkdir -p $(INSTALL_PATH) && go env -w CGO_ENABLED="1" && \ - GO111MODULE=on $(GO) build -ldflags="-r $${RPATH} -X '$(OBJPREFIX).BuildTags=$(BUILD_TAGS)' -X '$(OBJPREFIX).BuildTime=$(BUILD_TIME)' -X '$(OBJPREFIX).GitCommit=$(GIT_COMMIT)' -X '$(OBJPREFIX).GoVersion=$(GO_VERSION)'" \ + GO111MODULE=on $(GO) build -pgo=$(PGO_PATH)/default.pgo -ldflags="-r $${RPATH} -X '$(OBJPREFIX).BuildTags=$(BUILD_TAGS)' -X '$(OBJPREFIX).BuildTime=$(BUILD_TIME)' -X '$(OBJPREFIX).GitCommit=$(GIT_COMMIT)' -X '$(OBJPREFIX).GoVersion=$(GO_VERSION)'" \ -tags dynamic -o $(INSTALL_PATH)/milvus $(PWD)/cmd/main.go 1>/dev/null milvus-gpu: build-cpp-gpu print-gpu-build-info @echo "Building Milvus-gpu ..." @source $(PWD)/scripts/setenv.sh && \ mkdir -p $(INSTALL_PATH) && go env -w CGO_ENABLED="1" && \ - GO111MODULE=on $(GO) build -ldflags="-r $${RPATH} -X '$(OBJPREFIX).BuildTags=$(BUILD_TAGS_GPU)' -X '$(OBJPREFIX).BuildTime=$(BUILD_TIME)' -X '$(OBJPREFIX).GitCommit=$(GIT_COMMIT)' -X '$(OBJPREFIX).GoVersion=$(GO_VERSION)'" \ + GO111MODULE=on $(GO) build -pgo=$(PGO_PATH)/default.pgo -ldflags="-r $${RPATH} -X '$(OBJPREFIX).BuildTags=$(BUILD_TAGS_GPU)' -X '$(OBJPREFIX).BuildTime=$(BUILD_TIME)' -X '$(OBJPREFIX).GitCommit=$(GIT_COMMIT)' -X '$(OBJPREFIX).GoVersion=$(GO_VERSION)'" \ -tags dynamic -o $(INSTALL_PATH)/milvus $(PWD)/cmd/main.go 1>/dev/null get-build-deps: @@ -106,7 +107,7 @@ getdeps: tools/bin/revive: tools/check/go.mod cd tools/check; \ - $(GO) build -o ../bin/revive github.com/mgechev/revive + $(GO) build -pgo=$(PGO_PATH)/default.pgo -o ../bin/revive github.com/mgechev/revive cppcheck: @#(env bash ${PWD}/scripts/core_build.sh -l) @@ -142,20 +143,25 @@ lint-fix: getdeps @$(INSTALL_PATH)/gofumpt -l -w internal/ @$(INSTALL_PATH)/gofumpt -l -w cmd/ @$(INSTALL_PATH)/gofumpt -l -w pkg/ + @$(INSTALL_PATH)/gofumpt -l -w client/ @$(INSTALL_PATH)/gofumpt -l -w tests/integration/ @echo "Running gci fix" @$(INSTALL_PATH)/gci write cmd/ --skip-generated -s standard -s default -s "prefix(github.com/milvus-io)" --custom-order @$(INSTALL_PATH)/gci write internal/ --skip-generated -s standard -s default -s "prefix(github.com/milvus-io)" --custom-order @$(INSTALL_PATH)/gci write pkg/ --skip-generated -s standard -s default -s "prefix(github.com/milvus-io)" --custom-order + @$(INSTALL_PATH)/gci write client/ --skip-generated -s standard -s default -s "prefix(github.com/milvus-io)" --custom-order @$(INSTALL_PATH)/gci write tests/ --skip-generated -s standard -s default -s "prefix(github.com/milvus-io)" --custom-order @echo "Running golangci-lint auto-fix" - @source $(PWD)/scripts/setenv.sh && GO111MODULE=on $(INSTALL_PATH)/golangci-lint run --fix --timeout=30m --config $(PWD)/.golangci.yml; cd pkg && GO111MODULE=on $(INSTALL_PATH)/golangci-lint run --fix --timeout=30m --config $(PWD)/.golangci.yml + @source $(PWD)/scripts/setenv.sh && GO111MODULE=on $(INSTALL_PATH)/golangci-lint run --fix --timeout=30m --config $(PWD)/.golangci.yml; + @source $(PWD)/scripts/setenv.sh && cd pkg && GO111MODULE=on $(INSTALL_PATH)/golangci-lint run --fix --timeout=30m --config $(PWD)/.golangci.yml + @source $(PWD)/scripts/setenv.sh && cd client && GO111MODULE=on $(INSTALL_PATH)/golangci-lint run --fix --timeout=30m --config $(PWD)/client/.golangci.yml #TODO: Check code specifications by golangci-lint static-check: getdeps @echo "Running $@ check" @source $(PWD)/scripts/setenv.sh && GO111MODULE=on $(INSTALL_PATH)/golangci-lint run --timeout=30m --config $(PWD)/.golangci.yml @source $(PWD)/scripts/setenv.sh && cd pkg && GO111MODULE=on $(INSTALL_PATH)/golangci-lint run --timeout=30m --config $(PWD)/.golangci.yml + @source $(PWD)/scripts/setenv.sh && cd client && GO111MODULE=on $(INSTALL_PATH)/golangci-lint run --timeout=30m --config $(PWD)/client/.golangci.yml verifiers: build-cpp getdeps cppcheck fmt static-check @@ -164,14 +170,14 @@ binlog: @echo "Building binlog ..." @source $(PWD)/scripts/setenv.sh && \ mkdir -p $(INSTALL_PATH) && go env -w CGO_ENABLED="1" && \ - GO111MODULE=on $(GO) build -ldflags="-r $${RPATH}" -o $(INSTALL_PATH)/binlog $(PWD)/cmd/tools/binlog/main.go 1>/dev/null + GO111MODULE=on $(GO) build -pgo=$(PGO_PATH)/default.pgo -ldflags="-r $${RPATH}" -o $(INSTALL_PATH)/binlog $(PWD)/cmd/tools/binlog/main.go 1>/dev/null MIGRATION_PATH = $(PWD)/cmd/tools/migration meta-migration: @echo "Building migration tool ..." @source $(PWD)/scripts/setenv.sh && \ mkdir -p $(INSTALL_PATH) && go env -w CGO_ENABLED="1" && \ - GO111MODULE=on $(GO) build -ldflags="-r $${RPATH} -X '$(OBJPREFIX).BuildTags=$(BUILD_TAGS)' -X '$(OBJPREFIX).BuildTime=$(BUILD_TIME)' -X '$(OBJPREFIX).GitCommit=$(GIT_COMMIT)' -X '$(OBJPREFIX).GoVersion=$(GO_VERSION)'" \ + GO111MODULE=on $(GO) build -pgo=$(PGO_PATH)/default.pgo -ldflags="-r $${RPATH} -X '$(OBJPREFIX).BuildTags=$(BUILD_TAGS)' -X '$(OBJPREFIX).BuildTime=$(BUILD_TIME)' -X '$(OBJPREFIX).GitCommit=$(GIT_COMMIT)' -X '$(OBJPREFIX).GoVersion=$(GO_VERSION)'" \ -tags dynamic -o $(INSTALL_PATH)/meta-migration $(MIGRATION_PATH)/main.go 1>/dev/null INTERATION_PATH = $(PWD)/tests/integration @@ -366,7 +372,7 @@ clean: milvus-tools: print-build-info @echo "Building tools ..." @mkdir -p $(INSTALL_PATH)/tools && go env -w CGO_ENABLED="1" && GO111MODULE=on $(GO) build \ - -ldflags="-X 'main.BuildTags=$(BUILD_TAGS)' -X 'main.BuildTime=$(BUILD_TIME)' -X 'main.GitCommit=$(GIT_COMMIT)' -X 'main.GoVersion=$(GO_VERSION)'" \ + -pgo=$(PGO_PATH)/default.pgo -ldflags="-X 'main.BuildTags=$(BUILD_TAGS)' -X 'main.BuildTime=$(BUILD_TIME)' -X 'main.GitCommit=$(GIT_COMMIT)' -X 'main.GoVersion=$(GO_VERSION)'" \ -o $(INSTALL_PATH)/tools $(PWD)/cmd/tools/* 1>/dev/null rpm-setup: @@ -474,6 +480,7 @@ generate-mockery-datanode: getdeps $(INSTALL_PATH)/mockery --name=BinlogIO --dir=$(PWD)/internal/datanode/io --output=$(PWD)/internal/datanode/io --filename=mock_binlogio.go --with-expecter --structname=MockBinlogIO --outpkg=io --inpackage $(INSTALL_PATH)/mockery --name=FlowgraphManager --dir=$(PWD)/internal/datanode --output=$(PWD)/internal/datanode --filename=mock_fgmanager.go --with-expecter --structname=MockFlowgraphManager --outpkg=datanode --inpackage $(INSTALL_PATH)/mockery --name=ChannelManager --dir=$(PWD)/internal/datanode --output=$(PWD)/internal/datanode --filename=mock_channelmanager.go --with-expecter --structname=MockChannelManager --outpkg=datanode --inpackage + $(INSTALL_PATH)/mockery --name=Compactor --dir=$(PWD)/internal/datanode/compaction --output=$(PWD)/internal/datanode/compaction --filename=mock_compactor.go --with-expecter --structname=MockCompactor --outpkg=compaction --inpackage generate-mockery-metastore: getdeps $(INSTALL_PATH)/mockery --name=RootCoordCatalog --dir=$(PWD)/internal/metastore --output=$(PWD)/internal/metastore/mocks --filename=mock_rootcoord_catalog.go --with-expecter --structname=RootCoordCatalog --outpkg=mocks @@ -515,5 +522,5 @@ mmap-migration: @echo "Building migration tool ..." @source $(PWD)/scripts/setenv.sh && \ mkdir -p $(INSTALL_PATH) && go env -w CGO_ENABLED="1" && \ - GO111MODULE=on $(GO) build -ldflags="-r $${RPATH} -X '$(OBJPREFIX).BuildTags=$(BUILD_TAGS)' -X '$(OBJPREFIX).BuildTime=$(BUILD_TIME)' -X '$(OBJPREFIX).GitCommit=$(GIT_COMMIT)' -X '$(OBJPREFIX).GoVersion=$(GO_VERSION)'" \ - -tags dynamic -o $(INSTALL_PATH)/mmap-migration $(MMAP_MIGRATION_PATH)/main.go 1>/dev/null \ No newline at end of file + GO111MODULE=on $(GO) build -pgo=$(PGO_PATH)/default.pgo -ldflags="-r $${RPATH} -X '$(OBJPREFIX).BuildTags=$(BUILD_TAGS)' -X '$(OBJPREFIX).BuildTime=$(BUILD_TIME)' -X '$(OBJPREFIX).GitCommit=$(GIT_COMMIT)' -X '$(OBJPREFIX).GoVersion=$(GO_VERSION)'" \ + -tags dynamic -o $(INSTALL_PATH)/mmap-migration $(MMAP_MIGRATION_PATH)/main.go 1>/dev/null diff --git a/README.md b/README.md index d1bcc8413b7a0..f1566e7fd2719 100644 --- a/README.md +++ b/README.md @@ -72,23 +72,26 @@ Check the requirements first. Linux systems (Ubuntu 20.04 or later recommended): ```bash -go: >= 1.20 +go: >= 1.21 cmake: >= 3.26.4 gcc: 7.5 +python: > 3.8 and <= 3.11 ``` MacOS systems with x86_64 (Big Sur 11.5 or later recommended): ```bash -go: >= 1.20 +go: >= 1.21 cmake: >= 3.26.4 llvm: >= 15 +python: > 3.8 and <= 3.11 ``` MacOS systems with Apple Silicon (Monterey 12.0.1 or later recommended): ```bash -go: >= 1.20 (Arch=ARM64) +go: >= 1.21 (Arch=ARM64) cmake: >= 3.26.4 llvm: >= 15 +python: > 3.8 and <= 3.11 ``` Clone Milvus repo and build. @@ -169,7 +172,7 @@ Contributions to Milvus are welcome from everyone. See [Guidelines for Contribut ### All contributors
-
+
@@ -179,6 +182,7 @@ Contributions to Milvus are welcome from everyone. See [Guidelines for Contribut + @@ -212,6 +216,7 @@ Contributions to Milvus are welcome from everyone. See [Guidelines for Contribut + @@ -220,6 +225,7 @@ Contributions to Milvus are welcome from everyone. See [Guidelines for Contribut + @@ -384,7 +390,6 @@ Contributions to Milvus are welcome from everyone. See [Guidelines for Contribut - @@ -444,6 +449,7 @@ Contributions to Milvus are welcome from everyone. See [Guidelines for Contribut + diff --git a/README_CN.md b/README_CN.md index 2b97a7138535b..e11333770fd7f 100644 --- a/README_CN.md +++ b/README_CN.md @@ -68,7 +68,7 @@ Milvus 基于 [Apache 2.0 License](https://github.com/milvus-io/milvus/blob/mast 请先安装相关依赖。 ``` -go: 1.20 +go: 1.21 cmake: >=3.18 gcc: 7.5 protobuf: >=3.7 @@ -154,7 +154,7 @@ Milvus [训练营](https://github.com/milvus-io/bootcamp)能够帮助你了解 ### All contributors
-
+
@@ -164,6 +164,7 @@ Milvus [训练营](https://github.com/milvus-io/bootcamp)能够帮助你了解 + @@ -197,6 +198,7 @@ Milvus [训练营](https://github.com/milvus-io/bootcamp)能够帮助你了解 + @@ -205,6 +207,7 @@ Milvus [训练营](https://github.com/milvus-io/bootcamp)能够帮助你了解 + @@ -369,7 +372,6 @@ Milvus [训练营](https://github.com/milvus-io/bootcamp)能够帮助你了解 - @@ -429,6 +431,7 @@ Milvus [训练营](https://github.com/milvus-io/bootcamp)能够帮助你了解 + diff --git a/build/docker/builder/cpu/amazonlinux2023/Dockerfile b/build/docker/builder/cpu/amazonlinux2023/Dockerfile index d5516fd46ab0f..0e0502d0ad621 100644 --- a/build/docker/builder/cpu/amazonlinux2023/Dockerfile +++ b/build/docker/builder/cpu/amazonlinux2023/Dockerfile @@ -14,10 +14,19 @@ FROM amazonlinux:2023 ARG TARGETARCH RUN dnf install -y wget g++ gcc gdb libatomic libstdc++-static ninja-build git make zip unzip tar which \ - autoconf automake golang python3 python3-pip perl-FindBin texinfo \ + autoconf automake python3 python3-pip perl-FindBin texinfo \ pkg-config libuuid-devel libaio perl-IPC-Cmd libasan openblas-devel && \ rm -rf /var/cache/yum/* +ENV GOPATH /go +ENV GOROOT /usr/local/go +ENV GO111MODULE on +ENV PATH $GOPATH/bin:$GOROOT/bin:$PATH +RUN mkdir -p /usr/local/go && wget -qO- "https://go.dev/dl/go1.21.10.linux-$TARGETARCH.tar.gz" | tar --strip-components=1 -xz -C /usr/local/go && \ + mkdir -p "$GOPATH/src" "$GOPATH/bin" && \ + go clean --modcache && \ + chmod -R 777 "$GOPATH" && chmod -R a+w $(go env GOTOOLDIR) + RUN pip3 install conan==1.61.0 RUN echo "target arch $TARGETARCH" diff --git a/ci/jenkins/PR-Arm.groovy b/ci/jenkins/PR-Arm.groovy new file mode 100644 index 0000000000000..cdf50e1678b27 --- /dev/null +++ b/ci/jenkins/PR-Arm.groovy @@ -0,0 +1,324 @@ +#!/usr/bin/env groovy + +int total_timeout_minutes = 60 * 5 +int e2e_timeout_seconds = 120 * 60 +def imageTag='' +int case_timeout_seconds = 20 * 60 +def chart_version='4.1.28' +pipeline { + options { + timestamps() + timeout(time: total_timeout_minutes, unit: 'MINUTES') + buildDiscarder logRotator(artifactDaysToKeepStr: '30') + parallelsAlwaysFailFast() + preserveStashes(buildCount: 5) + disableConcurrentBuilds(abortPrevious: true) + + } + agent { + kubernetes { + cloud '4am' + defaultContainer 'main' + yamlFile 'ci/jenkins/pod/rte-arm.yaml' + customWorkspace '/home/jenkins/agent/workspace' + } + } + environment { + PROJECT_NAME = 'milvus' + SEMVER = "${BRANCH_NAME.contains('/') ? BRANCH_NAME.substring(BRANCH_NAME.lastIndexOf('/') + 1) : BRANCH_NAME}" + DOCKER_BUILDKIT = 1 + ARTIFACTS = "${env.WORKSPACE}/_artifacts" + CI_DOCKER_CREDENTIAL_ID = "harbor-milvus-io-registry" + MILVUS_HELM_NAMESPACE = "milvus-ci" + DISABLE_KIND = true + HUB = 'harbor.milvus.io/milvus' + JENKINS_BUILD_ID = "${env.BUILD_ID}" + CI_MODE="pr" + SHOW_MILVUS_CONFIGMAP= true + + DOCKER_CREDENTIALS_ID = "dockerhub" + TARGET_REPO = "milvusdb" + HARBOR_REPO = "harbor.milvus.io" + } + + stages { + stage ('Build'){ + steps { + container('main') { + script { + sh 'printenv' + def date = sh(returnStdout: true, script: 'date +%Y%m%d').trim() + sh 'git config --global --add safe.directory /home/jenkins/agent/workspace' + def gitShortCommit = sh(returnStdout: true, script: 'git rev-parse --short HEAD').trim() + imageTag="${env.BRANCH_NAME}-${date}-${gitShortCommit}" + + + sh """ + echo "Building image with tag: ${imageTag}" + + set -a # automatically export all variables from .env + . .env + set +a # stop automatically + + + docker run --net=host -v /root/.conan:/root/.conan -v \$(pwd):/root/milvus -w /root/milvus milvusdb/milvus-env:ubuntu20.04-\${DATE_VERSION} sh -c "make clean && make install" + """ + + withCredentials([usernamePassword(credentialsId: "${env.CI_DOCKER_CREDENTIAL_ID}", usernameVariable: 'CI_REGISTRY_USERNAME', passwordVariable: 'CI_REGISTRY_PASSWORD')]){ + sh "docker login ${env.HARBOR_REPO} -u '${CI_REGISTRY_USERNAME}' -p '${CI_REGISTRY_PASSWORD}'" + sh """ + export MILVUS_HARBOR_IMAGE_REPO="${env.HARBOR_REPO}/milvus/milvus" + export MILVUS_IMAGE_TAG="${imageTag}" + + docker build --build-arg TARGETARCH=arm64 -f "./build/docker/milvus/ubuntu20.04/Dockerfile" -t \${MILVUS_HARBOR_IMAGE_REPO}:\${MILVUS_IMAGE_TAG} . + + docker push \${MILVUS_HARBOR_IMAGE_REPO}:\${MILVUS_IMAGE_TAG} + docker logout + """ + } + + // stash imageTag info for rebuild install & E2E Test only + sh "echo ${imageTag} > imageTag.txt" + stash includes: 'imageTag.txt', name: 'imageTag' + + } + } + } + } + + + stage('Install & E2E Test') { + matrix { + axes { + axis { + name 'MILVUS_SERVER_TYPE' + values 'standalone' + } + axis { + name 'MILVUS_CLIENT' + values 'pymilvus' + } + } + + stages { + stage('Install') { + agent { + kubernetes { + cloud '4am' + inheritFrom 'milvus-e2e-4am' + defaultContainer 'main' + yamlFile 'ci/jenkins/pod/rte-build.yaml' + customWorkspace '/home/jenkins/agent/workspace' + } + } + steps { + container('main') { + stash includes: 'tests/**', name: 'testCode', useDefaultExcludes: false + dir ('tests/scripts') { + script { + sh 'printenv' + def clusterEnabled = "false" + def valuesFile = "pr-arm.yaml" + + if ("${MILVUS_SERVER_TYPE}" == "standalone-one-pod") { + valuesFile = "nightly-one-pod.yaml" + } + + if ("${MILVUS_CLIENT}" == "pymilvus") { + if ("${imageTag}"==''){ + dir ("imageTag"){ + try{ + unstash 'imageTag' + imageTag=sh(returnStdout: true, script: 'cat imageTag.txt | tr -d \'\n\r\'') + }catch(e){ + print "No Image Tag info remained ,please rerun build to build new image." + exit 1 + } + } + } + // modify values file to enable kafka + if ("${MILVUS_SERVER_TYPE}".contains("kafka")) { + sh ''' + apt-get update + apt-get install wget -y + wget https://github.com/mikefarah/yq/releases/download/v4.34.1/yq_linux_amd64 -O /usr/bin/yq + chmod +x /usr/bin/yq + ''' + sh """ + cp values/ci/pr-4am.yaml values/ci/pr_kafka.yaml + yq -i '.pulsar.enabled=false' values/ci/pr_kafka.yaml + yq -i '.kafka.enabled=true' values/ci/pr_kafka.yaml + yq -i '.kafka.metrics.kafka.enabled=true' values/ci/pr_kafka.yaml + yq -i '.kafka.metrics.jmx.enabled=true' values/ci/pr_kafka.yaml + yq -i '.kafka.metrics.serviceMonitor.enabled=true' values/ci/pr_kafka.yaml + """ + } + withCredentials([usernamePassword(credentialsId: "${env.CI_DOCKER_CREDENTIAL_ID}", usernameVariable: 'CI_REGISTRY_USERNAME', passwordVariable: 'CI_REGISTRY_PASSWORD')]){ + if ("${MILVUS_SERVER_TYPE}" == "standalone-one-pod") { + try { + sh """ + MILVUS_CLUSTER_ENABLED=${clusterEnabled} \ + MILVUS_HELM_REPO="https://nexus-ci.zilliz.cc/repository/milvus-proxy" \ + TAG=${imageTag}\ + ./e2e-k8s.sh \ + --skip-export-logs \ + --skip-cleanup \ + --skip-setup \ + --skip-test \ + --skip-build \ + --skip-build-image \ + --install-extra-arg " + --set etcd.metrics.enabled=true \ + --set etcd.metrics.podMonitor.enabled=true \ + --set indexCoordinator.gc.interval=1 \ + --set indexNode.disk.enabled=true \ + --set queryNode.disk.enabled=true \ + --set standalone.disk.enabled=true \ + --version ${chart_version} \ + -f values/ci/${valuesFile}" + """ + } catch (Exception e) { + echo "Tests failed, but the build will not be marked as failed." + } + + }else{ + sh """ + MILVUS_CLUSTER_ENABLED=${clusterEnabled} \ + MILVUS_HELM_REPO="https://nexus-ci.zilliz.cc/repository/milvus-proxy" \ + TAG=${imageTag}\ + ./e2e-k8s.sh \ + --skip-export-logs \ + --skip-cleanup \ + --skip-setup \ + --skip-test \ + --skip-build \ + --skip-build-image \ + --install-extra-arg " + --set etcd.metrics.enabled=true \ + --set etcd.metrics.podMonitor.enabled=true \ + --set indexCoordinator.gc.interval=1 \ + --set indexNode.disk.enabled=true \ + --set queryNode.disk.enabled=true \ + --set standalone.disk.enabled=true \ + --version ${chart_version} \ + -f values/ci/${valuesFile}" + """ + } + } + } else { + error "Error: Unsupported Milvus client: ${MILVUS_CLIENT}" + } + } + } + } + + } + } + stage('E2E Test'){ + options { + skipDefaultCheckout() + } + agent { + kubernetes { + cloud '4am' + inheritFrom 'default' + defaultContainer 'main' + yamlFile 'ci/jenkins/pod/e2e.yaml' + customWorkspace '/home/jenkins/agent/workspace' + } + } + steps { + container('pytest') { + unstash('testCode') + script { + sh 'ls -lah' + } + dir ('tests/scripts') { + script { + def release_name=sh(returnStdout: true, script: './get_release_name.sh') + def clusterEnabled = 'false' + if ("${MILVUS_SERVER_TYPE}".contains("distributed")) { + clusterEnabled = "true" + } + if ("${MILVUS_CLIENT}" == "pymilvus") { + if ("${MILVUS_SERVER_TYPE}" == "standalone-one-pod") { + try { + sh """ + MILVUS_HELM_RELEASE_NAME="${release_name}" \ + MILVUS_HELM_NAMESPACE="milvus-ci" \ + MILVUS_CLUSTER_ENABLED="${clusterEnabled}" \ + TEST_TIMEOUT="${e2e_timeout_seconds}" \ + ./ci_e2e_4am.sh "-n 6 -x --tags L0 L1 --timeout ${case_timeout_seconds}" + """ + } catch (Exception e) { + echo "Tests failed, but the build will not be marked as failed." + } + }else{ + sh """ + MILVUS_HELM_RELEASE_NAME="${release_name}" \ + MILVUS_HELM_NAMESPACE="milvus-ci" \ + MILVUS_CLUSTER_ENABLED="${clusterEnabled}" \ + TEST_TIMEOUT="${e2e_timeout_seconds}" \ + ./ci_e2e_4am.sh "-n 6 -x --tags L0 L1 --timeout ${case_timeout_seconds}" + """ + } + } else { + error "Error: Unsupported Milvus client: ${MILVUS_CLIENT}" + } + } + } + } + } + post{ + always { + container('pytest'){ + dir("${env.ARTIFACTS}") { + sh "tar -zcvf ${PROJECT_NAME}-${MILVUS_SERVER_TYPE}-${MILVUS_CLIENT}-pytest-logs.tar.gz /tmp/ci_logs/test --remove-files || true" + archiveArtifacts artifacts: "${PROJECT_NAME}-${MILVUS_SERVER_TYPE}-${MILVUS_CLIENT}-pytest-logs.tar.gz ", allowEmptyArchive: true + } + } + } + + } + } + } + post{ + always { + container('main') { + dir ('tests/scripts') { + script { + def release_name=sh(returnStdout: true, script: './get_release_name.sh') + sh "kubectl get pods -n ${MILVUS_HELM_NAMESPACE} | grep ${release_name} " + sh "./uninstall_milvus.sh --release-name ${release_name}" + sh "./ci_logs.sh --log-dir /ci-logs --artifacts-name ${env.ARTIFACTS}/artifacts-${PROJECT_NAME}-${MILVUS_SERVER_TYPE}-${SEMVER}-${env.BUILD_NUMBER}-${MILVUS_CLIENT}-e2e-logs \ + --release-name ${release_name}" + dir("${env.ARTIFACTS}") { + archiveArtifacts artifacts: "artifacts-${PROJECT_NAME}-${MILVUS_SERVER_TYPE}-${SEMVER}-${env.BUILD_NUMBER}-${MILVUS_CLIENT}-e2e-logs.tar.gz", allowEmptyArchive: true + } + } + } + } + } + } + + } + + } + } + post{ + unsuccessful { + container('jnlp') { + dir ('tests/scripts') { + script { + def authorEmail = sh(returnStdout: true, script: './get_author_email.sh ') + emailext subject: '$DEFAULT_SUBJECT', + body: '$DEFAULT_CONTENT', + recipientProviders: [developers(), culprits()], + replyTo: '$DEFAULT_REPLYTO', + to: "${authorEmail},devops@zilliz.com" + } + } + } + } + } +} diff --git a/ci/jenkins/PublishArmBasedGPUImages.groovy b/ci/jenkins/PublishArmBasedGPUImages.groovy index 3ed8affb075f4..4540c6f45c3e9 100644 --- a/ci/jenkins/PublishArmBasedGPUImages.groovy +++ b/ci/jenkins/PublishArmBasedGPUImages.groovy @@ -2,7 +2,15 @@ pipeline { agent { - label 'arm' + kubernetes { + cloud '4am' + defaultContainer 'main' + yamlFile "ci/jenkins/pod/rte-arm.yaml" + customWorkspace '/home/jenkins/agent/workspace' + // We allow this pod to remain active for a while, later jobs can + // reuse cache in previous created nodes. + // idleMinutes 120 + } } options { @@ -26,16 +34,20 @@ pipeline { steps { script { sh """ - set -a # automatically export all variables from .env - . ${WORKSPACE}/.env - set +a # stop automatically - - docker run -v \$(pwd):/root/milvus -v \$(pwd)/.docker/.conan:/root/.conan -w /root/milvus milvusdb/milvus-env:gpu-ubuntu22.04-\${GPU_DATE_VERSION} sh -c "make clean && make gpu-install" + git config --global --add safe.directory /home/jenkins/agent/workspace """ def date = sh(returnStdout: true, script: 'date +%Y%m%d').trim() def gitShortCommit = sh(returnStdout: true, script: 'git rev-parse --short HEAD').trim() + sh """ + set -a # automatically export all variables from .env + . .env + set +a # stop automatically + + docker run --net=host -v \$(pwd):/root/milvus -v /root/.conan:/root/.conan -w /root/milvus milvusdb/milvus-env:gpu-ubuntu22.04-\${GPU_DATE_VERSION} sh -c "make clean && make gpu-install" + """ + withCredentials([usernamePassword(credentialsId: "${env.DOCKER_CREDENTIALS_ID}", usernameVariable: 'DOCKER_USERNAME', passwordVariable: 'DOCKER_PASSWORD')]) { sh 'docker login -u ${DOCKER_USERNAME} -p ${DOCKER_PASSWORD}' sh """ diff --git a/ci/jenkins/PublishArmBasedImages.groovy b/ci/jenkins/PublishArmBasedImages.groovy new file mode 100644 index 0000000000000..0fd77c7da7666 --- /dev/null +++ b/ci/jenkins/PublishArmBasedImages.groovy @@ -0,0 +1,93 @@ +#!/usr/bin/env groovy + +pipeline { + agent { + kubernetes { + cloud '4am' + defaultContainer 'main' + yamlFile "ci/jenkins/pod/rte-arm.yaml" + customWorkspace '/home/jenkins/agent/workspace' + // We allow this pod to remain active for a while, later jobs can + // reuse cache in previous created nodes. + // idleMinutes 120 + } + } + parameters { + string(name: 'image-tag', defaultValue: '', description: 'the image tag to be pushed to image registry') + } + + options { + timestamps() + timeout(time: 300, unit: 'MINUTES') + // parallelsAlwaysFailFast() + disableConcurrentBuilds() + } + + environment { + DOCKER_CREDENTIALS_ID = "dockerhub" + DOCKER_BUILDKIT = 1 + TARGET_REPO = "milvusdb" + CI_DOCKER_CREDENTIAL_ID = "harbor-milvus-io-registry" + HARBOR_REPO = "harbor.milvus.io" + } + + stages { + stage('Publish Milvus cpu Images'){ + + steps { + script { + sh """ + git config --global --add safe.directory /home/jenkins/agent/workspace + """ + + def tag = "" + if (params['image-tag'] == '') { + def date = sh(returnStdout: true, script: 'date +%Y%m%d').trim() + def gitShortCommit = sh(returnStdout: true, script: 'git rev-parse --short HEAD').trim() + tag = "${env.BRANCH_NAME}-${date}-${gitShortCommit}-arm" + }else{ + tag = params['image-tag'] + } + + sh """ + echo "Building image with tag: ${tag}" + + set -a # automatically export all variables from .env + . .env + set +a # stop automatically + + + docker run --net=host -v /root/.conan:/root/.conan -v \$(pwd):/root/milvus -w /root/milvus milvusdb/milvus-env:ubuntu20.04-\${DATE_VERSION} sh -c "make clean && make install" + """ + + + withCredentials([usernamePassword(credentialsId: "${env.DOCKER_CREDENTIALS_ID}", usernameVariable: 'DOCKER_USERNAME', passwordVariable: 'DOCKER_PASSWORD')]) { + sh 'docker login -u ${DOCKER_USERNAME} -p ${DOCKER_PASSWORD}' + sh """ + export MILVUS_IMAGE_REPO="${env.TARGET_REPO}/milvus" + export MILVUS_HARBOR_IMAGE_REPO="${env.HARBOR_REPO}/milvus/milvus" + export MILVUS_IMAGE_TAG="${tag}" + + docker build --build-arg TARGETARCH=arm64 -f "./build/docker/milvus/ubuntu20.04/Dockerfile" -t \${MILVUS_IMAGE_REPO}:\${MILVUS_IMAGE_TAG} . + + docker push \${MILVUS_IMAGE_REPO}:\${MILVUS_IMAGE_TAG} + docker tag \${MILVUS_IMAGE_REPO}:\${MILVUS_IMAGE_TAG} \${MILVUS_HARBOR_IMAGE_REPO}:\${MILVUS_IMAGE_TAG} + docker logout + """ + } + + withCredentials([usernamePassword(credentialsId: "${env.CI_DOCKER_CREDENTIAL_ID}", usernameVariable: 'CI_REGISTRY_USERNAME', passwordVariable: 'CI_REGISTRY_PASSWORD')]){ + sh "docker login ${env.HARBOR_REPO} -u '${CI_REGISTRY_USERNAME}' -p '${CI_REGISTRY_PASSWORD}'" + sh """ + export MILVUS_HARBOR_IMAGE_REPO="${env.HARBOR_REPO}/milvus/milvus" + export MILVUS_IMAGE_TAG="${tag}" + docker push \${MILVUS_HARBOR_IMAGE_REPO}:\${MILVUS_IMAGE_TAG} + docker logout + """ + } + } + } + } + } + +} diff --git a/ci/jenkins/pod/rte-arm.yaml b/ci/jenkins/pod/rte-arm.yaml new file mode 100644 index 0000000000000..7d10349f40275 --- /dev/null +++ b/ci/jenkins/pod/rte-arm.yaml @@ -0,0 +1,66 @@ +apiVersion: v1 +kind: Pod +metadata: + labels: + app: milvus-e2e + namespace: milvus-ci +spec: + hostNetwork: true + securityContext: # Optional: Restrict capabilities for some security hardening + privileged: true + tolerations: + - key: "node-role.kubernetes.io/arm" + operator: "Exists" + effect: "NoSchedule" + nodeSelector: + "kubernetes.io/arch": "arm64" + enableServiceLinks: false + containers: + - name: main + image: docker:latest + args: ["sleep", "36000"] + # workingDir: /home/jenkins/agent/workspace + securityContext: + privileged: true + resources: + limits: + cpu: "6" + memory: 12Gi + requests: + cpu: "0.5" + memory: 5Gi + volumeMounts: + - mountPath: /var/run + name: docker-root + - mountPath: /root/.conan + name: build-cache + # - mountPath: /ci-logs + # name: ci-logs + - name: dind + image: docker:dind + securityContext: + privileged: true + args: ["dockerd","--host=unix:///var/run/docker.sock","--registry-mirror=https://docker-nexus-ci.zilliz.cc"] + resources: + limits: + cpu: "6" + memory: 12Gi + requests: + cpu: "0.5" + memory: 5Gi + volumeMounts: + - mountPath: /var/run + name: docker-root + - mountPath: /root/.conan + name: build-cache + volumes: + - emptyDir: {} + name: docker-root + - hostPath: + path: /root/.conan + type: DirectoryOrCreate + name: build-cache + # - name: ci-logs + # nfs: + # path: /ci-logs + # server: 172.16.70.249 diff --git a/client/.golangci.yml b/client/.golangci.yml new file mode 100644 index 0000000000000..8b90a9f55a473 --- /dev/null +++ b/client/.golangci.yml @@ -0,0 +1,172 @@ +run: + go: "1.21" + skip-dirs: + - build + - configs + - deployments + - docs + - scripts + - internal/core + - cmake_build + skip-files: + - partial_search_test.go + +linters: + disable-all: true + enable: + - gosimple + - govet + - ineffassign + - staticcheck + - decorder + - depguard + - gofmt + - goimports + - gosec + - revive + - unconvert + - misspell + - typecheck + - durationcheck + - forbidigo + - gci + - whitespace + - gofumpt + - gocritic + +linters-settings: + gci: + sections: + - standard + - default + - prefix(github.com/milvus-io) + custom-order: true + gofumpt: + lang-version: "1.18" + module-path: github.com/milvus-io + goimports: + local-prefixes: github.com/milvus-io + revive: + rules: + - name: unused-parameter + disabled: true + - name: var-naming + severity: warning + disabled: false + arguments: + - ["ID"] # Allow list + - name: context-as-argument + severity: warning + disabled: false + arguments: + - allowTypesBefore: "*testing.T" + - name: datarace + severity: warning + disabled: false + - name: duplicated-imports + severity: warning + disabled: false + - name: waitgroup-by-value + severity: warning + disabled: false + - name: indent-error-flow + severity: warning + disabled: false + arguments: + - "preserveScope" + - name: range-val-in-closure + severity: warning + disabled: false + - name: range-val-address + severity: warning + disabled: false + - name: string-of-int + severity: warning + disabled: false + misspell: + locale: US + gocritic: + enabled-checks: + - ruleguard + settings: + ruleguard: + failOnError: true + rules: "ruleguard/rules.go" + depguard: + rules: + main: + deny: + - pkg: "errors" + desc: not allowed, use github.com/cockroachdb/errors + - pkg: "github.com/pkg/errors" + desc: not allowed, use github.com/cockroachdb/errors + - pkg: "github.com/pingcap/errors" + desc: not allowed, use github.com/cockroachdb/errors + - pkg: "golang.org/x/xerrors" + desc: not allowed, use github.com/cockroachdb/errors + - pkg: "github.com/go-errors/errors" + desc: not allowed, use github.com/cockroachdb/errors + - pkg: "io/ioutil" + desc: ioutil is deprecated after 1.16, 1.17, use os and io package instead + - pkg: "github.com/tikv/client-go/rawkv" + desc: not allowed, use github.com/tikv/client-go/v2/txnkv + - pkg: "github.com/tikv/client-go/v2/rawkv" + desc: not allowed, use github.com/tikv/client-go/v2/txnkv + forbidigo: + forbid: + - '^time\.Tick$' + - 'return merr\.Err[a-zA-Z]+' + - 'merr\.Wrap\w+\(\)\.Error\(\)' + - '\.(ErrorCode|Reason) = ' + - 'Reason:\s+\w+\.Error\(\)' + - 'errors.New\((.+)\.GetReason\(\)\)' + - 'commonpb\.Status\{[\s\n]*ErrorCode:[\s\n]*.+[\s\S\n]*?\}' + - 'os\.Open\(.+\)' + - 'os\.ReadFile\(.+\)' + - 'os\.WriteFile\(.+\)' + - "runtime.NumCPU" + - "runtime.GOMAXPROCS(0)" + #- 'fmt\.Print.*' WIP + +issues: + exclude-use-default: false + exclude-rules: + - path: .+_test\.go + linters: + - forbidigo + exclude: + - should have a package comment + - should have comment + - should be of the form + - should not use dot imports + - which can be annoying to use + # Binds to all network interfaces + - G102 + # Use of unsafe calls should be audited + - G103 + # Errors unhandled + - G104 + # file/folder Permission + - G301 + - G302 + # Potential file inclusion via variable + - G304 + # Deferring unsafe method like *os.File Close + - G307 + # TLS MinVersion too low + - G402 + # Use of weak random number generator math/rand + - G404 + # Unused parameters + - SA1019 + # defer return errors + - SA5001 + + # Maximum issues count per one linter. Set to 0 to disable. Default is 50. + max-issues-per-linter: 0 + # Maximum count of issues with the same text. Set to 0 to disable. Default is 3. + max-same-issues: 0 + +service: + # use the fixed version to not introduce new linters unexpectedly + golangci-lint-version: 1.55.2 diff --git a/client/client_config.go b/client/client_config.go index 63a4f6d2b8565..01f82877f7967 100644 --- a/client/client_config.go +++ b/client/client_config.go @@ -10,10 +10,11 @@ import ( "time" "github.com/cockroachdb/errors" - "github.com/milvus-io/milvus/pkg/util/crypto" "google.golang.org/grpc" "google.golang.org/grpc/backoff" "google.golang.org/grpc/keepalive" + + "github.com/milvus-io/milvus/pkg/util/crypto" ) const ( diff --git a/client/client_test.go b/client/client_test.go index f23a9d9941d83..c6d0867ee8af3 100644 --- a/client/client_test.go +++ b/client/client_test.go @@ -31,7 +31,7 @@ func (s *ClientSuite) TestNewClient() { s.NotNil(c) }) - s.Run("emtpy_addr", func() { + s.Run("empty_addr", func() { _, err := New(ctx, &ClientConfig{}) s.Error(err) s.T().Log(err) diff --git a/client/collection.go b/client/collection.go index 039ff2460d64c..4031c687d9993 100644 --- a/client/collection.go +++ b/client/collection.go @@ -62,10 +62,6 @@ func (c *Client) CreateCollection(ctx context.Context, option CreateCollectionOp return nil } -type ListCollectionOption interface { - Request() *milvuspb.ShowCollectionsRequest -} - func (c *Client) ListCollections(ctx context.Context, option ListCollectionOption, callOptions ...grpc.CallOption) (collectionNames []string, err error) { req := option.Request() err = c.callService(func(milvusService milvuspb.MilvusServiceClient) error { @@ -82,7 +78,7 @@ func (c *Client) ListCollections(ctx context.Context, option ListCollectionOptio return collectionNames, err } -func (c *Client) DescribeCollection(ctx context.Context, option *describeCollectionOption, callOptions ...grpc.CallOption) (collection *entity.Collection, err error) { +func (c *Client) DescribeCollection(ctx context.Context, option DescribeCollectionOption, callOptions ...grpc.CallOption) (collection *entity.Collection, err error) { req := option.Request() err = c.callService(func(milvusService milvuspb.MilvusServiceClient) error { resp, err := milvusService.DescribeCollection(ctx, req, callOptions...) diff --git a/client/collection_options.go b/client/collection_options.go index adb59e37b5145..696fe702273a2 100644 --- a/client/collection_options.go +++ b/client/collection_options.go @@ -159,6 +159,10 @@ func NewCreateCollectionOption(name string, collectionSchema *entity.Schema) *cr } } +type ListCollectionOption interface { + Request() *milvuspb.ShowCollectionsRequest +} + type listCollectionOption struct{} func (opt *listCollectionOption) Request() *milvuspb.ShowCollectionsRequest { diff --git a/client/column/columns.go b/client/column/columns.go index 8a2a52d87941f..a30b064e15235 100644 --- a/client/column/columns.go +++ b/client/column/columns.go @@ -239,7 +239,7 @@ func FieldDataColumn(fd *schemapb.FieldData, begin, end int) (Column, error) { data := x.FloatVector.GetData() dim := int(vectors.GetDim()) if end < 0 { - end = int(len(data) / dim) + end = len(data) / dim } vector := make([][]float32, 0, end-begin) // shall not have remanunt for i := begin; i < end; i++ { @@ -262,7 +262,7 @@ func FieldDataColumn(fd *schemapb.FieldData, begin, end int) (Column, error) { dim := int(vectors.GetDim()) blen := dim / 8 if end < 0 { - end = int(len(data) / blen) + end = len(data) / blen } vector := make([][]byte, 0, end-begin) for i := begin; i < end; i++ { @@ -281,7 +281,7 @@ func FieldDataColumn(fd *schemapb.FieldData, begin, end int) (Column, error) { data := x.Float16Vector dim := int(vectors.GetDim()) if end < 0 { - end = int(len(data) / dim) + end = len(data) / dim } vector := make([][]byte, 0, end-begin) for i := begin; i < end; i++ { @@ -300,7 +300,7 @@ func FieldDataColumn(fd *schemapb.FieldData, begin, end int) (Column, error) { data := x.Bfloat16Vector dim := int(vectors.GetDim()) if end < 0 { - end = int(len(data) / dim) + end = len(data) / dim } vector := make([][]byte, 0, end-begin) // shall not have remanunt for i := begin; i < end; i++ { diff --git a/client/column/sparse.go b/client/column/sparse.go index b9d20fd616ded..cc02e3ee2ffe2 100644 --- a/client/column/sparse.go +++ b/client/column/sparse.go @@ -22,6 +22,7 @@ import ( "math" "github.com/cockroachdb/errors" + "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" "github.com/milvus-io/milvus/client/v2/entity" ) diff --git a/client/column/sparse_test.go b/client/column/sparse_test.go index 387df9efe7d7c..564f223ff1532 100644 --- a/client/column/sparse_test.go +++ b/client/column/sparse_test.go @@ -21,9 +21,10 @@ import ( "math/rand" "testing" - "github.com/milvus-io/milvus/client/v2/entity" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + + "github.com/milvus-io/milvus/client/v2/entity" ) func TestColumnSparseEmbedding(t *testing.T) { diff --git a/client/column/varchar.go b/client/column/varchar.go index 9ed1646450189..63aff96ae94c8 100644 --- a/client/column/varchar.go +++ b/client/column/varchar.go @@ -17,9 +17,10 @@ package column import ( - "errors" "fmt" + "github.com/cockroachdb/errors" + "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" "github.com/milvus-io/milvus/client/v2/entity" ) @@ -70,7 +71,7 @@ func (c *ColumnVarChar) FieldData() *schemapb.FieldData { } data := make([]string, 0, c.Len()) for i := 0; i < c.Len(); i++ { - data = append(data, string(c.values[i])) + data = append(data, c.values[i]) } fd.Field = &schemapb.FieldData_Scalars{ Scalars: &schemapb.ScalarField{ diff --git a/client/database_test.go b/client/database_test.go index f46a0cafb8b7b..d7555d7d5aa44 100644 --- a/client/database_test.go +++ b/client/database_test.go @@ -5,11 +5,12 @@ import ( "fmt" "testing" + mock "github.com/stretchr/testify/mock" + "github.com/stretchr/testify/suite" + "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" "github.com/milvus-io/milvus-proto/go-api/v2/milvuspb" "github.com/milvus-io/milvus/pkg/util/merr" - mock "github.com/stretchr/testify/mock" - "github.com/stretchr/testify/suite" ) type DatabaseSuite struct { diff --git a/client/entity/schema.go b/client/entity/schema.go index ce30b53f51483..8225ba6c2fd3c 100644 --- a/client/entity/schema.go +++ b/client/entity/schema.go @@ -19,6 +19,8 @@ package entity import ( "strconv" + "github.com/cockroachdb/errors" + "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" ) @@ -293,6 +295,18 @@ func (f *Field) WithDim(dim int64) *Field { return f } +func (f *Field) GetDim() (int64, error) { + dimStr, has := f.TypeParams[TypeParamDim] + if !has { + return -1, errors.New("field with no dim") + } + dim, err := strconv.ParseInt(dimStr, 10, 64) + if err != nil { + return -1, errors.Newf("field with bad format dim: %s", err.Error()) + } + return dim, nil +} + func (f *Field) WithMaxLength(maxLen int64) *Field { if f.TypeParams == nil { f.TypeParams = make(map[string]string) diff --git a/client/entity/sparse.go b/client/entity/sparse.go index 00f41c60d355e..87edf58d152b2 100644 --- a/client/entity/sparse.go +++ b/client/entity/sparse.go @@ -29,6 +29,7 @@ type SparseEmbedding interface { Len() int // the actual items in this vector Get(idx int) (pos uint32, value float32, ok bool) Serialize() []byte + FieldType() FieldType } var ( @@ -56,7 +57,7 @@ func (e sliceSparseEmbedding) FieldType() FieldType { } func (e sliceSparseEmbedding) Get(idx int) (uint32, float32, bool) { - if idx < 0 || idx >= int(e.len) { + if idx < 0 || idx >= e.len { return 0, 0, false } return e.positions[idx], e.values[idx], true @@ -88,7 +89,7 @@ func deserializeSliceSparceEmbedding(bs []byte) (sliceSparseEmbedding, error) { return sliceSparseEmbedding{}, errors.New("not valid sparse embedding bytes") } - length = length / 8 + length /= 8 result := sliceSparseEmbedding{ positions: make([]uint32, length), diff --git a/client/example/database/main.go b/client/example/database/main.go index 5b978b6261549..0069923d9a2c6 100644 --- a/client/example/database/main.go +++ b/client/example/database/main.go @@ -5,6 +5,7 @@ import ( "log" milvusclient "github.com/milvus-io/milvus/client/v2" + "github.com/milvus-io/milvus/client/v2/entity" ) const ( @@ -34,4 +35,55 @@ func main() { log.Fatal("failed to list databases", err.Error()) } log.Println("=== Databases: ", dbNames) + + schema := entity.NewSchema().WithName("hello_milvus"). + WithField(entity.NewField().WithName("ID").WithDataType(entity.FieldTypeInt64).WithIsPrimaryKey(true)). + WithField(entity.NewField().WithName("Vector").WithDataType(entity.FieldTypeFloatVector).WithDim(128)) + + if err := c.CreateCollection(ctx, milvusclient.NewCreateCollectionOption("hello_milvus", schema)); err != nil { + log.Fatal("failed to create collection:", err.Error()) + } + + collections, err := c.ListCollections(ctx, milvusclient.NewListCollectionOption()) + if err != nil { + log.Fatal("failed to list collections,", err.Error()) + } + + for _, collectionName := range collections { + collection, err := c.DescribeCollection(ctx, milvusclient.NewDescribeCollectionOption(collectionName)) + if err != nil { + log.Fatal(err.Error()) + } + log.Println(collection.Name) + for _, field := range collection.Schema.Fields { + log.Println("=== Field: ", field.Name, field.DataType, field.AutoID) + } + } + + c.CreateDatabase(ctx, milvusclient.NewCreateDatabaseOption("test")) + c.UsingDatabase(ctx, milvusclient.NewUsingDatabaseOption("test")) + + schema = entity.NewSchema().WithName("hello_milvus"). + WithField(entity.NewField().WithName("ID").WithDataType(entity.FieldTypeVarChar).WithMaxLength(64).WithIsPrimaryKey(true)). + WithField(entity.NewField().WithName("Vector").WithDataType(entity.FieldTypeFloatVector).WithDim(128)) + + if err := c.CreateCollection(ctx, milvusclient.NewCreateCollectionOption("hello_milvus", schema)); err != nil { + log.Fatal("failed to create collection:", err.Error()) + } + + collections, err = c.ListCollections(ctx, milvusclient.NewListCollectionOption()) + if err != nil { + log.Fatal("failed to list collections,", err.Error()) + } + + for _, collectionName := range collections { + collection, err := c.DescribeCollection(ctx, milvusclient.NewDescribeCollectionOption(collectionName)) + if err != nil { + log.Fatal(err.Error()) + } + log.Println(collection.Name) + for _, field := range collection.Schema.Fields { + log.Println("=== Field: ", field.Name, field.DataType, field.AutoID) + } + } } diff --git a/client/example/playground/main.go b/client/example/playground/main.go index e5984648cf71c..43ae57915cfd2 100644 --- a/client/example/playground/main.go +++ b/client/example/playground/main.go @@ -18,6 +18,7 @@ const ( helloMilvusCmd = `hello_milvus` partitionsCmd = `partitions` indexCmd = `indexes` + countCmd = `count` milvusAddr = `localhost:19530` nEntities, dim = 3000, 128 @@ -38,9 +39,109 @@ func main() { Partitions() case indexCmd: Indexes() + case countCmd: + Count() } } +func Count() { + ctx := context.Background() + + collectionName := "hello_count_inverted" + + c, err := milvusclient.New(ctx, &milvusclient.ClientConfig{ + Address: "127.0.0.1:19530", + }) + if err != nil { + log.Fatal("failed to connect to milvus, err: ", err.Error()) + } + + schema := entity.NewSchema().WithName(collectionName). + WithField(entity.NewField().WithName("id").WithDataType(entity.FieldTypeInt64).WithIsAutoID(true).WithIsPrimaryKey(true)). + WithField(entity.NewField().WithName("vector").WithDataType(entity.FieldTypeFloatVector).WithDim(128)) + + err = c.CreateCollection(ctx, milvusclient.NewCreateCollectionOption(collectionName, schema)) + if err != nil { + log.Fatal("failed to connect to milvus, err: ", err.Error()) + } + + indexTask, err := c.CreateIndex(ctx, milvusclient.NewCreateIndexOption(collectionName, "id", index.NewGenericIndex("inverted", map[string]string{}))) + if err != nil { + log.Fatal("failed to connect to milvus, err: ", err.Error()) + } + + indexTask.Await(ctx) + + indexTask, err = c.CreateIndex(ctx, milvusclient.NewCreateIndexOption(collectionName, "vector", index.NewHNSWIndex(entity.L2, 16, 32))) + if err != nil { + log.Fatal("failed to connect to milvus, err: ", err.Error()) + } + + indexTask.Await(ctx) + + loadTask, err := c.LoadCollection(ctx, milvusclient.NewLoadCollectionOption(collectionName)) + if err != nil { + log.Fatal("faied to load collection, err: ", err.Error()) + } + loadTask.Await(ctx) + + for i := 0; i < 100; i++ { + // randomData := make([]int64, 0, nEntities) + vectorData := make([][]float32, 0, nEntities) + // generate data + for i := 0; i < nEntities; i++ { + // randomData = append(randomData, rand.Int63n(1000)) + vec := make([]float32, 0, dim) + for j := 0; j < dim; j++ { + vec = append(vec, rand.Float32()) + } + vectorData = append(vectorData, vec) + } + + _, err = c.Insert(ctx, milvusclient.NewColumnBasedInsertOption(collectionName).WithFloatVectorColumn("vector", dim, vectorData)) + if err != nil { + log.Fatal("failed to insert data") + } + + log.Println("start flush collection") + flushTask, err := c.Flush(ctx, milvusclient.NewFlushOption(collectionName)) + if err != nil { + log.Fatal("failed to flush", err.Error()) + } + start := time.Now() + err = flushTask.Await(ctx) + if err != nil { + log.Fatal("failed to flush", err.Error()) + } + log.Println("flush done, elapsed", time.Since(start)) + + result, err := c.Query(ctx, milvusclient.NewQueryOption(collectionName). + WithOutputFields([]string{"count(*)"}). + WithConsistencyLevel(entity.ClStrong)) + if err != nil { + log.Fatal("failed to connect to milvus, err: ", err.Error()) + } + for _, rs := range result.Fields { + log.Println(rs) + } + result, err = c.Query(ctx, milvusclient.NewQueryOption(collectionName). + WithOutputFields([]string{"count(*)"}). + WithFilter("id > 0"). + WithConsistencyLevel(entity.ClStrong)) + if err != nil { + log.Fatal("failed to connect to milvus, err: ", err.Error()) + } + for _, rs := range result.Fields { + log.Println(rs) + } + } + + // err = c.DropCollection(ctx, milvusclient.NewDropCollectionOption(collectionName)) + // if err != nil { + // log.Fatal("=== Failed to drop collection", err.Error()) + // } +} + func HelloMilvus() { ctx := context.Background() @@ -92,7 +193,7 @@ func HelloMilvus() { vectorData = append(vectorData, vec) } - err = c.Insert(ctx, milvusclient.NewColumnBasedInsertOption(collectionName).WithFloatVectorColumn("vector", dim, vectorData)) + _, err = c.Insert(ctx, milvusclient.NewColumnBasedInsertOption(collectionName).WithFloatVectorColumn("vector", dim, vectorData)) if err != nil { log.Fatal("failed to insert data") } @@ -107,22 +208,7 @@ func HelloMilvus() { if err != nil { log.Fatal("failed to flush", err.Error()) } - log.Println("flush done, elasped", time.Since(start)) - - indexTask, err := c.CreateIndex(ctx, milvusclient.NewCreateIndexOption(collectionName, "vector", index.NewHNSWIndex(entity.L2, 16, 100))) - if err != nil { - log.Fatal("failed to create index, err: ", err.Error()) - } - err = indexTask.Await(ctx) - if err != nil { - log.Fatal("failed to wait index construction complete") - } - - loadTask, err := c.LoadCollection(ctx, milvusclient.NewLoadCollectionOption(collectionName)) - if err != nil { - log.Fatal("failed to load collection", err.Error()) - } - loadTask.Await(ctx) + log.Println("flush done, elapsed", time.Since(start)) vec2search := []entity.Vector{ entity.FloatVector(vectorData[len(vectorData)-2]), diff --git a/client/go.mod b/client/go.mod index c0f6882c3d768..af0f2721f0df9 100644 --- a/client/go.mod +++ b/client/go.mod @@ -1,6 +1,6 @@ module github.com/milvus-io/milvus/client/v2 -go 1.20 +go 1.21 require ( github.com/blang/semver/v4 v4.0.0 @@ -10,6 +10,7 @@ require ( github.com/grpc-ecosystem/go-grpc-middleware v1.3.0 github.com/milvus-io/milvus-proto/go-api/v2 v2.3.4-0.20240430035521-259ae1d10016 github.com/milvus-io/milvus/pkg v0.0.2-0.20240317152703-17b4938985f3 + github.com/quasilyte/go-ruleguard/dsl v0.3.22 github.com/samber/lo v1.27.0 github.com/stretchr/testify v1.8.4 github.com/tidwall/gjson v1.17.1 diff --git a/client/go.sum b/client/go.sum index 1efeee2111774..44e4615201642 100644 --- a/client/go.sum +++ b/client/go.sum @@ -476,6 +476,8 @@ github.com/prometheus/procfs v0.6.0/go.mod h1:cz+aTbrPOrUb4q7XlbU9ygM+/jj0fzG6c1 github.com/prometheus/procfs v0.9.0 h1:wzCHvIvM5SxWqYvwgVL7yJY8Lz3PKn49KQtpgMYJfhI= github.com/prometheus/procfs v0.9.0/go.mod h1:+pB4zwohETzFnmlpe6yd2lSc+0/46IYZRB/chUwxUZY= github.com/prometheus/tsdb v0.7.1/go.mod h1:qhTCs0VvXwvX/y3TZrWD7rabWM+ijKTux40TwIPHuXU= +github.com/quasilyte/go-ruleguard/dsl v0.3.22 h1:wd8zkOhSNr+I+8Qeciml08ivDt1pSXe60+5DqOpCjPE= +github.com/quasilyte/go-ruleguard/dsl v0.3.22/go.mod h1:KeCP03KrjuSO0H1kTuZQCWlQPulDV6YMIXmpQss17rU= github.com/rogpeppe/fastuuid v0.0.0-20150106093220-6724a57986af/go.mod h1:XWv6SoW27p1b0cqNHllgS5HIMJraePCO15w5zCzIWYg= github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ= github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= diff --git a/client/index.go b/client/index.go index 79dd57ed3e9c6..79320484632e7 100644 --- a/client/index.go +++ b/client/index.go @@ -21,12 +21,13 @@ import ( "fmt" "time" + "google.golang.org/grpc" + "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" "github.com/milvus-io/milvus-proto/go-api/v2/milvuspb" "github.com/milvus-io/milvus/client/v2/entity" "github.com/milvus-io/milvus/client/v2/index" "github.com/milvus-io/milvus/pkg/util/merr" - "google.golang.org/grpc" ) type CreateIndexTask struct { diff --git a/client/index_test.go b/client/index_test.go index ac9f5e40699e5..920457f9a2160 100644 --- a/client/index_test.go +++ b/client/index_test.go @@ -22,14 +22,15 @@ import ( "testing" "time" + "github.com/stretchr/testify/mock" + "github.com/stretchr/testify/suite" + "go.uber.org/atomic" + "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" "github.com/milvus-io/milvus-proto/go-api/v2/milvuspb" "github.com/milvus-io/milvus/client/v2/entity" "github.com/milvus-io/milvus/client/v2/index" "github.com/milvus-io/milvus/pkg/util/merr" - "github.com/stretchr/testify/mock" - "github.com/stretchr/testify/suite" - "go.uber.org/atomic" ) type IndexSuite struct { diff --git a/client/interceptors.go b/client/interceptors.go index 16396c4aed7f9..6756a74895825 100644 --- a/client/interceptors.go +++ b/client/interceptors.go @@ -20,12 +20,12 @@ import ( "context" "time" + grpc_retry "github.com/grpc-ecosystem/go-grpc-middleware/retry" "google.golang.org/grpc" "google.golang.org/grpc/codes" "google.golang.org/grpc/metadata" "google.golang.org/grpc/status" - grpc_retry "github.com/grpc-ecosystem/go-grpc-middleware/retry" "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" "github.com/milvus-io/milvus-proto/go-api/v2/milvuspb" ) diff --git a/client/interceptors_test.go b/client/interceptors_test.go index e3bcb34fcea66..648575dbd42ed 100644 --- a/client/interceptors_test.go +++ b/client/interceptors_test.go @@ -28,9 +28,11 @@ import ( "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" ) -var mockInvokerError error -var mockInvokerReply interface{} -var mockInvokeTimes = 0 +var ( + mockInvokerError error + mockInvokerReply interface{} + mockInvokeTimes = 0 +) var mockInvoker grpc.UnaryInvoker = func(ctx context.Context, method string, req, reply interface{}, cc *grpc.ClientConn, opts ...grpc.CallOption) error { mockInvokeTimes++ diff --git a/client/maintenance_test.go b/client/maintenance_test.go index 333146f8ca4c9..0efcd449dfc41 100644 --- a/client/maintenance_test.go +++ b/client/maintenance_test.go @@ -22,13 +22,14 @@ import ( "testing" "time" + "github.com/stretchr/testify/mock" + "github.com/stretchr/testify/suite" + "go.uber.org/atomic" + "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" "github.com/milvus-io/milvus-proto/go-api/v2/milvuspb" "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" "github.com/milvus-io/milvus/pkg/util/merr" - "github.com/stretchr/testify/mock" - "github.com/stretchr/testify/suite" - "go.uber.org/atomic" ) type MaintenanceSuite struct { diff --git a/client/partition.go b/client/partition.go index 93036b2300dc8..18483687175b4 100644 --- a/client/partition.go +++ b/client/partition.go @@ -19,9 +19,10 @@ package client import ( "context" + "google.golang.org/grpc" + "github.com/milvus-io/milvus-proto/go-api/v2/milvuspb" "github.com/milvus-io/milvus/pkg/util/merr" - "google.golang.org/grpc" ) // CreatePartition is the API for creating a partition for a collection. diff --git a/client/partition_test.go b/client/partition_test.go index 2c6c4e2ed82c4..7bd7cd74360b0 100644 --- a/client/partition_test.go +++ b/client/partition_test.go @@ -21,11 +21,12 @@ import ( "fmt" "testing" + "github.com/stretchr/testify/mock" + "github.com/stretchr/testify/suite" + "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" "github.com/milvus-io/milvus-proto/go-api/v2/milvuspb" "github.com/milvus-io/milvus/pkg/util/merr" - "github.com/stretchr/testify/mock" - "github.com/stretchr/testify/suite" ) type PartitionSuite struct { diff --git a/client/read.go b/client/read.go index 3aeaff769d31b..1907ed8e07fa4 100644 --- a/client/read.go +++ b/client/read.go @@ -19,9 +19,9 @@ package client import ( "context" + "github.com/cockroachdb/errors" "google.golang.org/grpc" - "github.com/cockroachdb/errors" "github.com/milvus-io/milvus-proto/go-api/v2/milvuspb" "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" "github.com/milvus-io/milvus/client/v2/column" diff --git a/client/read_options.go b/client/read_options.go index a1f563bfc0642..2bdaf78a553eb 100644 --- a/client/read_options.go +++ b/client/read_options.go @@ -21,6 +21,7 @@ import ( "strconv" "github.com/golang/protobuf/proto" + "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" "github.com/milvus-io/milvus-proto/go-api/v2/milvuspb" "github.com/milvus-io/milvus/client/v2/entity" diff --git a/client/read_test.go b/client/read_test.go index 6606226d1bb76..0e815a0563382 100644 --- a/client/read_test.go +++ b/client/read_test.go @@ -6,13 +6,14 @@ import ( "math/rand" "testing" + "github.com/samber/lo" + "github.com/stretchr/testify/mock" + "github.com/stretchr/testify/suite" + "github.com/milvus-io/milvus-proto/go-api/v2/milvuspb" "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" "github.com/milvus-io/milvus/client/v2/entity" "github.com/milvus-io/milvus/pkg/util/merr" - "github.com/samber/lo" - "github.com/stretchr/testify/mock" - "github.com/stretchr/testify/suite" ) type ReadSuite struct { diff --git a/client/row/data.go b/client/row/data.go new file mode 100644 index 0000000000000..292661ade29be --- /dev/null +++ b/client/row/data.go @@ -0,0 +1,332 @@ +// Licensed to the LF AI & Data foundation under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package row + +import ( + "encoding/json" + "fmt" + "reflect" + "strconv" + + "github.com/cockroachdb/errors" + + "github.com/milvus-io/milvus/client/v2/column" + "github.com/milvus-io/milvus/client/v2/entity" +) + +const ( + // MilvusTag struct tag const for milvus row based struct + MilvusTag = `milvus` + + // MilvusSkipTagValue struct tag const for skip this field. + MilvusSkipTagValue = `-` + + // MilvusTagSep struct tag const for attribute separator + MilvusTagSep = `;` + + // MilvusTagName struct tag const for field name + MilvusTagName = `NAME` + + // VectorDimTag struct tag const for vector dimension + VectorDimTag = `DIM` + + // VectorTypeTag struct tag const for binary vector type + VectorTypeTag = `VECTOR_TYPE` + + // MilvusPrimaryKey struct tag const for primary key indicator + MilvusPrimaryKey = `PRIMARY_KEY` + + // MilvusAutoID struct tag const for auto id indicator + MilvusAutoID = `AUTO_ID` + + // DimMax dimension max value + DimMax = 65535 +) + +func AnyToColumns(rows []interface{}, schemas ...*entity.Schema) ([]column.Column, error) { + rowsLen := len(rows) + if rowsLen == 0 { + return []column.Column{}, errors.New("0 length column") + } + + var sch *entity.Schema + var err error + // if schema not provided, try to parse from row + if len(schemas) == 0 { + sch, err = ParseSchema(rows[0]) + if err != nil { + return []column.Column{}, err + } + } else { + // use first schema provided + sch = schemas[0] + } + + isDynamic := sch.EnableDynamicField + var dynamicCol *column.ColumnJSONBytes + + nameColumns := make(map[string]column.Column) + for _, field := range sch.Fields { + // skip auto id pk field + if field.PrimaryKey && field.AutoID { + continue + } + switch field.DataType { + case entity.FieldTypeBool: + data := make([]bool, 0, rowsLen) + col := column.NewColumnBool(field.Name, data) + nameColumns[field.Name] = col + case entity.FieldTypeInt8: + data := make([]int8, 0, rowsLen) + col := column.NewColumnInt8(field.Name, data) + nameColumns[field.Name] = col + case entity.FieldTypeInt16: + data := make([]int16, 0, rowsLen) + col := column.NewColumnInt16(field.Name, data) + nameColumns[field.Name] = col + case entity.FieldTypeInt32: + data := make([]int32, 0, rowsLen) + col := column.NewColumnInt32(field.Name, data) + nameColumns[field.Name] = col + case entity.FieldTypeInt64: + data := make([]int64, 0, rowsLen) + col := column.NewColumnInt64(field.Name, data) + nameColumns[field.Name] = col + case entity.FieldTypeFloat: + data := make([]float32, 0, rowsLen) + col := column.NewColumnFloat(field.Name, data) + nameColumns[field.Name] = col + case entity.FieldTypeDouble: + data := make([]float64, 0, rowsLen) + col := column.NewColumnDouble(field.Name, data) + nameColumns[field.Name] = col + case entity.FieldTypeString, entity.FieldTypeVarChar: + data := make([]string, 0, rowsLen) + col := column.NewColumnString(field.Name, data) + nameColumns[field.Name] = col + case entity.FieldTypeJSON: + data := make([][]byte, 0, rowsLen) + col := column.NewColumnJSONBytes(field.Name, data) + nameColumns[field.Name] = col + case entity.FieldTypeArray: + col := NewArrayColumn(field) + if col == nil { + return nil, errors.Newf("unsupported element type %s for Array", field.ElementType.String()) + } + nameColumns[field.Name] = col + case entity.FieldTypeFloatVector: + data := make([][]float32, 0, rowsLen) + dimStr, has := field.TypeParams[entity.TypeParamDim] + if !has { + return []column.Column{}, errors.New("vector field with no dim") + } + dim, err := strconv.ParseInt(dimStr, 10, 64) + if err != nil { + return []column.Column{}, fmt.Errorf("vector field with bad format dim: %s", err.Error()) + } + col := column.NewColumnFloatVector(field.Name, int(dim), data) + nameColumns[field.Name] = col + case entity.FieldTypeBinaryVector: + data := make([][]byte, 0, rowsLen) + dim, err := field.GetDim() + if err != nil { + return []column.Column{}, err + } + col := column.NewColumnBinaryVector(field.Name, int(dim), data) + nameColumns[field.Name] = col + case entity.FieldTypeFloat16Vector: + data := make([][]byte, 0, rowsLen) + dim, err := field.GetDim() + if err != nil { + return []column.Column{}, err + } + col := column.NewColumnFloat16Vector(field.Name, int(dim), data) + nameColumns[field.Name] = col + case entity.FieldTypeBFloat16Vector: + data := make([][]byte, 0, rowsLen) + dim, err := field.GetDim() + if err != nil { + return []column.Column{}, err + } + col := column.NewColumnBFloat16Vector(field.Name, int(dim), data) + nameColumns[field.Name] = col + case entity.FieldTypeSparseVector: + data := make([]entity.SparseEmbedding, 0, rowsLen) + col := column.NewColumnSparseVectors(field.Name, data) + nameColumns[field.Name] = col + } + } + + if isDynamic { + dynamicCol = column.NewColumnJSONBytes("", make([][]byte, 0, rowsLen)).WithIsDynamic(true) + } + + for _, row := range rows { + // collection schema name need not to be same, since receiver could has other names + v := reflect.ValueOf(row) + set, err := reflectValueCandi(v) + if err != nil { + return nil, err + } + + for idx, field := range sch.Fields { + // skip dynamic field if visible + if isDynamic && field.IsDynamic { + continue + } + // skip auto id pk field + if field.PrimaryKey && field.AutoID { + // remove pk field from candidates set, avoid adding it into dynamic column + delete(set, field.Name) + continue + } + column, ok := nameColumns[field.Name] + if !ok { + return nil, fmt.Errorf("expected unhandled field %s", field.Name) + } + + candi, ok := set[field.Name] + if !ok { + return nil, fmt.Errorf("row %d does not has field %s", idx, field.Name) + } + err := column.AppendValue(candi.v.Interface()) + if err != nil { + return nil, err + } + delete(set, field.Name) + } + + if isDynamic { + m := make(map[string]interface{}) + for name, candi := range set { + m[name] = candi.v.Interface() + } + bs, err := json.Marshal(m) + if err != nil { + return nil, fmt.Errorf("failed to marshal dynamic field %w", err) + } + err = dynamicCol.AppendValue(bs) + if err != nil { + return nil, fmt.Errorf("failed to append value to dynamic field %w", err) + } + } + } + columns := make([]column.Column, 0, len(nameColumns)) + for _, column := range nameColumns { + columns = append(columns, column) + } + if isDynamic { + columns = append(columns, dynamicCol) + } + return columns, nil +} + +func NewArrayColumn(f *entity.Field) column.Column { + switch f.ElementType { + case entity.FieldTypeBool: + return column.NewColumnBoolArray(f.Name, nil) + + case entity.FieldTypeInt8: + return column.NewColumnInt8Array(f.Name, nil) + + case entity.FieldTypeInt16: + return column.NewColumnInt16Array(f.Name, nil) + + case entity.FieldTypeInt32: + return column.NewColumnInt32Array(f.Name, nil) + + case entity.FieldTypeInt64: + return column.NewColumnInt64Array(f.Name, nil) + + case entity.FieldTypeFloat: + return column.NewColumnFloatArray(f.Name, nil) + + case entity.FieldTypeDouble: + return column.NewColumnDoubleArray(f.Name, nil) + + case entity.FieldTypeVarChar: + return column.NewColumnVarCharArray(f.Name, nil) + + default: + return nil + } +} + +type fieldCandi struct { + name string + v reflect.Value + options map[string]string +} + +func reflectValueCandi(v reflect.Value) (map[string]fieldCandi, error) { + if v.Kind() == reflect.Ptr { + v = v.Elem() + } + + result := make(map[string]fieldCandi) + switch v.Kind() { + case reflect.Map: // map[string]any + iter := v.MapRange() + for iter.Next() { + key := iter.Key().String() + result[key] = fieldCandi{ + name: key, + v: iter.Value(), + } + } + return result, nil + case reflect.Struct: + for i := 0; i < v.NumField(); i++ { + ft := v.Type().Field(i) + name := ft.Name + tag, ok := ft.Tag.Lookup(MilvusTag) + + settings := make(map[string]string) + if ok { + if tag == MilvusSkipTagValue { + continue + } + settings = ParseTagSetting(tag, MilvusTagSep) + fn, has := settings[MilvusTagName] + if has { + // overwrite column to tag name + name = fn + } + } + _, ok = result[name] + // duplicated + if ok { + return nil, fmt.Errorf("column has duplicated name: %s when parsing field: %s", name, ft.Name) + } + + v := v.Field(i) + if v.Kind() == reflect.Array { + v = v.Slice(0, v.Len()) + } + + result[name] = fieldCandi{ + name: name, + v: v, + options: settings, + } + } + + return result, nil + default: + return nil, fmt.Errorf("unsupport row type: %s", v.Kind().String()) + } +} diff --git a/client/row/data_test.go b/client/row/data_test.go new file mode 100644 index 0000000000000..9e8b7fb216fbc --- /dev/null +++ b/client/row/data_test.go @@ -0,0 +1,174 @@ +package row + +import ( + "reflect" + "testing" + + "github.com/stretchr/testify/suite" + + "github.com/milvus-io/milvus/client/v2/entity" +) + +type ValidStruct struct { + ID int64 `milvus:"primary_key"` + Attr1 int8 + Attr2 int16 + Attr3 int32 + Attr4 float32 + Attr5 float64 + Attr6 string + Attr7 bool + Vector []float32 `milvus:"dim:16"` + Vector2 []byte `milvus:"dim:32"` +} + +type ValidStruct2 struct { + ID int64 `milvus:"primary_key"` + Vector [16]float32 + Vector2 [4]byte + Ignored bool `milvus:"-"` +} + +type ValidStructWithNamedTag struct { + ID int64 `milvus:"primary_key;name:id"` + Vector [16]float32 `milvus:"name:vector"` +} + +type RowsSuite struct { + suite.Suite +} + +func (s *RowsSuite) TestRowsToColumns() { + s.Run("valid_cases", func() { + columns, err := AnyToColumns([]any{&ValidStruct{}}) + s.Nil(err) + s.Equal(10, len(columns)) + + columns, err = AnyToColumns([]any{&ValidStruct2{}}) + s.Nil(err) + s.Equal(3, len(columns)) + }) + + s.Run("auto_id_pk", func() { + type AutoPK struct { + ID int64 `milvus:"primary_key;auto_id"` + Vector []float32 `milvus:"dim:32"` + } + columns, err := AnyToColumns([]any{&AutoPK{}}) + s.Nil(err) + s.Require().Equal(1, len(columns)) + s.Equal("Vector", columns[0].Name()) + }) + + s.Run("fp16", func() { + type BF16Struct struct { + ID int64 `milvus:"primary_key;auto_id"` + Vector []byte `milvus:"dim:16;vector_type:bf16"` + } + columns, err := AnyToColumns([]any{&BF16Struct{}}) + s.Nil(err) + s.Require().Equal(1, len(columns)) + s.Equal("Vector", columns[0].Name()) + s.Equal(entity.FieldTypeBFloat16Vector, columns[0].Type()) + }) + + s.Run("fp16", func() { + type FP16Struct struct { + ID int64 `milvus:"primary_key;auto_id"` + Vector []byte `milvus:"dim:16;vector_type:fp16"` + } + columns, err := AnyToColumns([]any{&FP16Struct{}}) + s.Nil(err) + s.Require().Equal(1, len(columns)) + s.Equal("Vector", columns[0].Name()) + s.Equal(entity.FieldTypeFloat16Vector, columns[0].Type()) + }) + + s.Run("invalid_cases", func() { + // empty input + _, err := AnyToColumns([]any{}) + s.NotNil(err) + + // incompatible rows + _, err = AnyToColumns([]any{&ValidStruct{}, &ValidStruct2{}}) + s.NotNil(err) + + // schema & row not compatible + _, err = AnyToColumns([]any{&ValidStruct{}}, &entity.Schema{ + Fields: []*entity.Field{ + { + Name: "int64", + DataType: entity.FieldTypeInt64, + }, + }, + }) + s.NotNil(err) + }) +} + +func (s *RowsSuite) TestDynamicSchema() { + s.Run("all_fallback_dynamic", func() { + columns, err := AnyToColumns([]any{&ValidStruct{}}, + entity.NewSchema().WithDynamicFieldEnabled(true), + ) + s.NoError(err) + s.Equal(1, len(columns)) + }) + + s.Run("dynamic_not_found", func() { + _, err := AnyToColumns([]any{&ValidStruct{}}, + entity.NewSchema().WithField( + entity.NewField().WithName("ID").WithDataType(entity.FieldTypeInt64).WithIsPrimaryKey(true), + ).WithDynamicFieldEnabled(true), + ) + s.NoError(err) + }) +} + +func (s *RowsSuite) TestReflectValueCandi() { + cases := []struct { + tag string + v reflect.Value + expect map[string]fieldCandi + expectErr bool + }{ + { + tag: "MapRow", + v: reflect.ValueOf(map[string]interface{}{ + "A": "abd", "B": int64(8), + }), + expect: map[string]fieldCandi{ + "A": { + name: "A", + v: reflect.ValueOf("abd"), + }, + "B": { + name: "B", + v: reflect.ValueOf(int64(8)), + }, + }, + expectErr: false, + }, + } + + for _, c := range cases { + s.Run(c.tag, func() { + r, err := reflectValueCandi(c.v) + if c.expectErr { + s.Error(err) + return + } + s.NoError(err) + s.Equal(len(c.expect), len(r)) + for k, v := range c.expect { + rv, has := r[k] + s.Require().True(has) + s.Equal(v.name, rv.name) + } + }) + } +} + +func TestRows(t *testing.T) { + suite.Run(t, new(RowsSuite)) +} diff --git a/client/row/schema.go b/client/row/schema.go new file mode 100644 index 0000000000000..6022275653f17 --- /dev/null +++ b/client/row/schema.go @@ -0,0 +1,185 @@ +// Licensed to the LF AI & Data foundation under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package row + +import ( + "fmt" + "go/ast" + "reflect" + "strconv" + "strings" + + "github.com/cockroachdb/errors" + + "github.com/milvus-io/milvus/client/v2/entity" +) + +// ParseSchema parses schema from interface{}. +func ParseSchema(r interface{}) (*entity.Schema, error) { + sch := &entity.Schema{} + t := reflect.TypeOf(r) + if t.Kind() == reflect.Array || t.Kind() == reflect.Slice || t.Kind() == reflect.Ptr { + t = t.Elem() + } + + // MapRow is not supported for schema definition + // TODO add PrimaryKey() interface later + if t.Kind() == reflect.Map { + return nil, fmt.Errorf("map row is not supported for schema definition") + } + + if t.Kind() != reflect.Struct { + return nil, fmt.Errorf("unsupported data type: %+v", r) + } + + // Collection method not overwrited, try use Row type name + if sch.CollectionName == "" { + sch.CollectionName = t.Name() + if sch.CollectionName == "" { + return nil, errors.New("collection name not provided") + } + } + sch.Fields = make([]*entity.Field, 0, t.NumField()) + for i := 0; i < t.NumField(); i++ { + f := t.Field(i) + // ignore anonymous field for now + if f.Anonymous || !ast.IsExported(f.Name) { + continue + } + + field := &entity.Field{ + Name: f.Name, + } + ft := f.Type + if f.Type.Kind() == reflect.Ptr { + ft = ft.Elem() + } + fv := reflect.New(ft) + tag := f.Tag.Get(MilvusTag) + if tag == MilvusSkipTagValue { + continue + } + tagSettings := ParseTagSetting(tag, MilvusTagSep) + if _, has := tagSettings[MilvusPrimaryKey]; has { + field.PrimaryKey = true + } + if _, has := tagSettings[MilvusAutoID]; has { + field.AutoID = true + } + if name, has := tagSettings[MilvusTagName]; has { + field.Name = name + } + switch reflect.Indirect(fv).Kind() { + case reflect.Bool: + field.DataType = entity.FieldTypeBool + case reflect.Int8: + field.DataType = entity.FieldTypeInt8 + case reflect.Int16: + field.DataType = entity.FieldTypeInt16 + case reflect.Int32: + field.DataType = entity.FieldTypeInt32 + case reflect.Int64: + field.DataType = entity.FieldTypeInt64 + case reflect.Float32: + field.DataType = entity.FieldTypeFloat + case reflect.Float64: + field.DataType = entity.FieldTypeDouble + case reflect.String: + field.DataType = entity.FieldTypeString + case reflect.Array: + arrayLen := ft.Len() + elemType := ft.Elem() + switch elemType.Kind() { + case reflect.Uint8: + field.WithDataType(entity.FieldTypeBinaryVector) + field.WithDim(int64(arrayLen) * 8) + case reflect.Float32: + field.WithDataType(entity.FieldTypeFloatVector) + field.WithDim(int64(arrayLen)) + default: + return nil, fmt.Errorf("field %s is array of %v, which is not supported", f.Name, elemType) + } + case reflect.Slice: + dimStr, has := tagSettings[VectorDimTag] + if !has { + return nil, fmt.Errorf("field %s is slice but dim not provided", f.Name) + } + dim, err := strconv.ParseInt(dimStr, 10, 64) + if err != nil { + return nil, fmt.Errorf("dim value %s is not valid", dimStr) + } + if dim < 1 || dim > DimMax { + return nil, fmt.Errorf("dim value %d is out of range", dim) + } + field.WithDim(dim) + + elemType := ft.Elem() + switch elemType.Kind() { + case reflect.Uint8: // []byte, could be BinaryVector, fp16, bf 6 + switch tagSettings[VectorTypeTag] { + case "fp16": + field.DataType = entity.FieldTypeFloat16Vector + case "bf16": + field.DataType = entity.FieldTypeBFloat16Vector + default: + field.DataType = entity.FieldTypeBinaryVector + } + case reflect.Float32: + field.DataType = entity.FieldTypeFloatVector + default: + return nil, fmt.Errorf("field %s is slice of %v, which is not supported", f.Name, elemType) + } + default: + return nil, fmt.Errorf("field %s is %v, which is not supported", field.Name, ft) + } + sch.Fields = append(sch.Fields, field) + } + + return sch, nil +} + +// ParseTagSetting parses struct tag into map settings +func ParseTagSetting(str string, sep string) map[string]string { + settings := map[string]string{} + names := strings.Split(str, sep) + + for i := 0; i < len(names); i++ { + j := i + if len(names[j]) > 0 { + for { + if names[j][len(names[j])-1] == '\\' { + i++ + names[j] = names[j][0:len(names[j])-1] + sep + names[i] + names[i] = "" + } else { + break + } + } + } + + values := strings.Split(names[j], ":") + k := strings.TrimSpace(strings.ToUpper(values[0])) + + if len(values) >= 2 { + settings[k] = strings.Join(values[1:], ":") + } else if k != "" { + settings[k] = k + } + } + + return settings +} diff --git a/client/row/schema_test.go b/client/row/schema_test.go new file mode 100644 index 0000000000000..fbfdc19f27058 --- /dev/null +++ b/client/row/schema_test.go @@ -0,0 +1,213 @@ +// Licensed to the LF AI & Data foundation under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package row + +import ( + "testing" + + "github.com/stretchr/testify/assert" + + "github.com/milvus-io/milvus/client/v2/entity" +) + +// ArrayRow test case type +type ArrayRow [16]float32 + +func (ar *ArrayRow) Collection() string { return "" } +func (ar *ArrayRow) Partition() string { return "" } +func (ar *ArrayRow) Description() string { return "" } + +type Uint8Struct struct { + Attr uint8 +} + +type StringArrayStruct struct { + Vector [8]string +} + +type StringSliceStruct struct { + Vector []string `milvus:"dim:8"` +} + +type SliceNoDimStruct struct { + Vector []float32 `milvus:""` +} + +type SliceBadDimStruct struct { + Vector []float32 `milvus:"dim:str"` +} + +type SliceBadDimStruct2 struct { + Vector []float32 `milvus:"dim:0"` +} + +func TestParseSchema(t *testing.T) { + t.Run("invalid cases", func(t *testing.T) { + // anonymous struct with default collection name ("") will cause error + anonymusStruct := struct{}{} + sch, err := ParseSchema(anonymusStruct) + assert.Nil(t, sch) + assert.NotNil(t, err) + + // non struct + arrayRow := ArrayRow([16]float32{}) + sch, err = ParseSchema(&arrayRow) + assert.Nil(t, sch) + assert.NotNil(t, err) + + // uint8 not supported + sch, err = ParseSchema(&Uint8Struct{}) + assert.Nil(t, sch) + assert.NotNil(t, err) + + // string array not supported + sch, err = ParseSchema(&StringArrayStruct{}) + assert.Nil(t, sch) + assert.NotNil(t, err) + + // string slice not supported + sch, err = ParseSchema(&StringSliceStruct{}) + assert.Nil(t, sch) + assert.NotNil(t, err) + + // slice vector with no dim + sch, err = ParseSchema(&SliceNoDimStruct{}) + assert.Nil(t, sch) + assert.NotNil(t, err) + + // slice vector with bad format dim + sch, err = ParseSchema(&SliceBadDimStruct{}) + assert.Nil(t, sch) + assert.NotNil(t, err) + + // slice vector with bad format dim 2 + sch, err = ParseSchema(&SliceBadDimStruct2{}) + assert.Nil(t, sch) + assert.NotNil(t, err) + }) + + t.Run("valid cases", func(t *testing.T) { + getVectorField := func(schema *entity.Schema) *entity.Field { + for _, field := range schema.Fields { + if field.DataType == entity.FieldTypeFloatVector || + field.DataType == entity.FieldTypeBinaryVector || + field.DataType == entity.FieldTypeBFloat16Vector || + field.DataType == entity.FieldTypeFloat16Vector { + return field + } + } + return nil + } + + type ValidStruct struct { + ID int64 `milvus:"primary_key"` + Attr1 int8 + Attr2 int16 + Attr3 int32 + Attr4 float32 + Attr5 float64 + Attr6 string + Vector []float32 `milvus:"dim:128"` + } + vs := &ValidStruct{} + sch, err := ParseSchema(vs) + assert.Nil(t, err) + assert.NotNil(t, sch) + assert.Equal(t, "ValidStruct", sch.CollectionName) + + type ValidFp16Struct struct { + ID int64 `milvus:"primary_key"` + Attr1 int8 + Attr2 int16 + Attr3 int32 + Attr4 float32 + Attr5 float64 + Attr6 string + Vector []byte `milvus:"dim:128;vector_type:fp16"` + } + fp16Vs := &ValidFp16Struct{} + sch, err = ParseSchema(fp16Vs) + assert.Nil(t, err) + assert.NotNil(t, sch) + assert.Equal(t, "ValidFp16Struct", sch.CollectionName) + vectorField := getVectorField(sch) + assert.Equal(t, entity.FieldTypeFloat16Vector, vectorField.DataType) + + type ValidBf16Struct struct { + ID int64 `milvus:"primary_key"` + Attr1 int8 + Attr2 int16 + Attr3 int32 + Attr4 float32 + Attr5 float64 + Attr6 string + Vector []byte `milvus:"dim:128;vector_type:bf16"` + } + bf16Vs := &ValidBf16Struct{} + sch, err = ParseSchema(bf16Vs) + assert.Nil(t, err) + assert.NotNil(t, sch) + assert.Equal(t, "ValidBf16Struct", sch.CollectionName) + vectorField = getVectorField(sch) + assert.Equal(t, entity.FieldTypeBFloat16Vector, vectorField.DataType) + + type ValidByteStruct struct { + ID int64 `milvus:"primary_key"` + Vector []byte `milvus:"dim:128"` + } + vs2 := &ValidByteStruct{} + sch, err = ParseSchema(vs2) + assert.Nil(t, err) + assert.NotNil(t, sch) + + type ValidArrayStruct struct { + ID int64 `milvus:"primary_key"` + Vector [64]float32 + } + vs3 := &ValidArrayStruct{} + sch, err = ParseSchema(vs3) + assert.Nil(t, err) + assert.NotNil(t, sch) + + type ValidArrayStructByte struct { + ID int64 `milvus:"primary_key;auto_id"` + Data *string `milvus:"extra:test\\;false"` + Vector [64]byte + } + vs4 := &ValidArrayStructByte{} + sch, err = ParseSchema(vs4) + assert.Nil(t, err) + assert.NotNil(t, sch) + + vs5 := &ValidStructWithNamedTag{} + sch, err = ParseSchema(vs5) + assert.Nil(t, err) + assert.NotNil(t, sch) + i64f, vecf := false, false + for _, field := range sch.Fields { + if field.Name == "id" { + i64f = true + } + if field.Name == "vector" { + vecf = true + } + } + + assert.True(t, i64f) + assert.True(t, vecf) + }) +} diff --git a/client/ruleguard/rules.go b/client/ruleguard/rules.go new file mode 100644 index 0000000000000..5bc3422c9b450 --- /dev/null +++ b/client/ruleguard/rules.go @@ -0,0 +1,409 @@ +// Licensed to the LF AI & Data foundation under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package gorules + +import ( + "github.com/quasilyte/go-ruleguard/dsl" +) + +// This is a collection of rules for ruleguard: https://github.com/quasilyte/go-ruleguard + +// Remove extra conversions: mdempsky/unconvert +func unconvert(m dsl.Matcher) { + m.Match("int($x)").Where(m["x"].Type.Is("int") && !m["x"].Const).Report("unnecessary conversion").Suggest("$x") + + m.Match("float32($x)").Where(m["x"].Type.Is("float32") && !m["x"].Const).Report("unnecessary conversion").Suggest("$x") + m.Match("float64($x)").Where(m["x"].Type.Is("float64") && !m["x"].Const).Report("unnecessary conversion").Suggest("$x") + + // m.Match("byte($x)").Where(m["x"].Type.Is("byte")).Report("unnecessary conversion").Suggest("$x") + // m.Match("rune($x)").Where(m["x"].Type.Is("rune")).Report("unnecessary conversion").Suggest("$x") + m.Match("bool($x)").Where(m["x"].Type.Is("bool") && !m["x"].Const).Report("unnecessary conversion").Suggest("$x") + + m.Match("int8($x)").Where(m["x"].Type.Is("int8") && !m["x"].Const).Report("unnecessary conversion").Suggest("$x") + m.Match("int16($x)").Where(m["x"].Type.Is("int16") && !m["x"].Const).Report("unnecessary conversion").Suggest("$x") + m.Match("int32($x)").Where(m["x"].Type.Is("int32") && !m["x"].Const).Report("unnecessary conversion").Suggest("$x") + m.Match("int64($x)").Where(m["x"].Type.Is("int64") && !m["x"].Const).Report("unnecessary conversion").Suggest("$x") + + m.Match("uint8($x)").Where(m["x"].Type.Is("uint8") && !m["x"].Const).Report("unnecessary conversion").Suggest("$x") + m.Match("uint16($x)").Where(m["x"].Type.Is("uint16") && !m["x"].Const).Report("unnecessary conversion").Suggest("$x") + m.Match("uint32($x)").Where(m["x"].Type.Is("uint32") && !m["x"].Const).Report("unnecessary conversion").Suggest("$x") + m.Match("uint64($x)").Where(m["x"].Type.Is("uint64") && !m["x"].Const).Report("unnecessary conversion").Suggest("$x") + + m.Match("time.Duration($x)").Where(m["x"].Type.Is("time.Duration") && !m["x"].Text.Matches("^[0-9]*$")).Report("unnecessary conversion").Suggest("$x") +} + +// Don't use == or != with time.Time +// https://github.com/dominikh/go-tools/issues/47 : Wontfix +func timeeq(m dsl.Matcher) { + m.Match("$t0 == $t1").Where(m["t0"].Type.Is("time.Time")).Report("using == with time.Time") + m.Match("$t0 != $t1").Where(m["t0"].Type.Is("time.Time")).Report("using != with time.Time") + m.Match(`map[$k]$v`).Where(m["k"].Type.Is("time.Time")).Report("map with time.Time keys are easy to misuse") +} + +// err but no an error +func errnoterror(m dsl.Matcher) { + // Would be easier to check for all err identifiers instead, but then how do we get the type from m[] ? + + m.Match( + "if $*_, err := $x; $err != nil { $*_ } else if $_ { $*_ }", + "if $*_, err := $x; $err != nil { $*_ } else { $*_ }", + "if $*_, err := $x; $err != nil { $*_ }", + + "if $*_, err = $x; $err != nil { $*_ } else if $_ { $*_ }", + "if $*_, err = $x; $err != nil { $*_ } else { $*_ }", + "if $*_, err = $x; $err != nil { $*_ }", + + "$*_, err := $x; if $err != nil { $*_ } else if $_ { $*_ }", + "$*_, err := $x; if $err != nil { $*_ } else { $*_ }", + "$*_, err := $x; if $err != nil { $*_ }", + + "$*_, err = $x; if $err != nil { $*_ } else if $_ { $*_ }", + "$*_, err = $x; if $err != nil { $*_ } else { $*_ }", + "$*_, err = $x; if $err != nil { $*_ }", + ). + Where(m["err"].Text == "err" && !m["err"].Type.Is("error") && m["x"].Text != "recover()"). + Report("err variable not error type") +} + +// Identical if and else bodies +func ifbodythenbody(m dsl.Matcher) { + m.Match("if $*_ { $body } else { $body }"). + Report("identical if and else bodies") + + // Lots of false positives. + // m.Match("if $*_ { $body } else if $*_ { $body }"). + // Report("identical if and else bodies") +} + +// Odd inequality: A - B < 0 instead of != +// Too many false positives. +/* +func subtractnoteq(m dsl.Matcher) { + m.Match("$a - $b < 0").Report("consider $a != $b") + m.Match("$a - $b > 0").Report("consider $a != $b") + m.Match("0 < $a - $b").Report("consider $a != $b") + m.Match("0 > $a - $b").Report("consider $a != $b") +} +*/ + +// Self-assignment +func selfassign(m dsl.Matcher) { + m.Match("$x = $x").Report("useless self-assignment") +} + +// Odd nested ifs +func oddnestedif(m dsl.Matcher) { + m.Match("if $x { if $x { $*_ }; $*_ }", + "if $x == $y { if $x != $y {$*_ }; $*_ }", + "if $x != $y { if $x == $y {$*_ }; $*_ }", + "if $x { if !$x { $*_ }; $*_ }", + "if !$x { if $x { $*_ }; $*_ }"). + Report("odd nested ifs") + + m.Match("for $x { if $x { $*_ }; $*_ }", + "for $x == $y { if $x != $y {$*_ }; $*_ }", + "for $x != $y { if $x == $y {$*_ }; $*_ }", + "for $x { if !$x { $*_ }; $*_ }", + "for !$x { if $x { $*_ }; $*_ }"). + Report("odd nested for/ifs") +} + +// odd bitwise expressions +func oddbitwise(m dsl.Matcher) { + m.Match("$x | $x", + "$x | ^$x", + "^$x | $x"). + Report("odd bitwise OR") + + m.Match("$x & $x", + "$x & ^$x", + "^$x & $x"). + Report("odd bitwise AND") + + m.Match("$x &^ $x"). + Report("odd bitwise AND-NOT") +} + +// odd sequence of if tests with return +func ifreturn(m dsl.Matcher) { + m.Match("if $x { return $*_ }; if $x {$*_ }").Report("odd sequence of if test") + m.Match("if $x { return $*_ }; if !$x {$*_ }").Report("odd sequence of if test") + m.Match("if !$x { return $*_ }; if $x {$*_ }").Report("odd sequence of if test") + m.Match("if $x == $y { return $*_ }; if $x != $y {$*_ }").Report("odd sequence of if test") + m.Match("if $x != $y { return $*_ }; if $x == $y {$*_ }").Report("odd sequence of if test") +} + +func oddifsequence(m dsl.Matcher) { + /* + m.Match("if $x { $*_ }; if $x {$*_ }").Report("odd sequence of if test") + + m.Match("if $x == $y { $*_ }; if $y == $x {$*_ }").Report("odd sequence of if tests") + m.Match("if $x != $y { $*_ }; if $y != $x {$*_ }").Report("odd sequence of if tests") + + m.Match("if $x < $y { $*_ }; if $y > $x {$*_ }").Report("odd sequence of if tests") + m.Match("if $x <= $y { $*_ }; if $y >= $x {$*_ }").Report("odd sequence of if tests") + + m.Match("if $x > $y { $*_ }; if $y < $x {$*_ }").Report("odd sequence of if tests") + m.Match("if $x >= $y { $*_ }; if $y <= $x {$*_ }").Report("odd sequence of if tests") + */ +} + +// odd sequence of nested if tests +func nestedifsequence(m dsl.Matcher) { + /* + m.Match("if $x < $y { if $x >= $y {$*_ }; $*_ }").Report("odd sequence of nested if tests") + m.Match("if $x <= $y { if $x > $y {$*_ }; $*_ }").Report("odd sequence of nested if tests") + m.Match("if $x > $y { if $x <= $y {$*_ }; $*_ }").Report("odd sequence of nested if tests") + m.Match("if $x >= $y { if $x < $y {$*_ }; $*_ }").Report("odd sequence of nested if tests") + */ +} + +// odd sequence of assignments +func identicalassignments(m dsl.Matcher) { + m.Match("$x = $y; $y = $x").Report("odd sequence of assignments") +} + +func oddcompoundop(m dsl.Matcher) { + m.Match("$x += $x + $_", + "$x += $x - $_"). + Report("odd += expression") + + m.Match("$x -= $x + $_", + "$x -= $x - $_"). + Report("odd -= expression") +} + +func constswitch(m dsl.Matcher) { + m.Match("switch $x { $*_ }", "switch $*_; $x { $*_ }"). + Where(m["x"].Const && !m["x"].Text.Matches(`^runtime\.`)). + Report("constant switch") +} + +func oddcomparisons(m dsl.Matcher) { + m.Match( + "$x - $y == 0", + "$x - $y != 0", + "$x - $y < 0", + "$x - $y <= 0", + "$x - $y > 0", + "$x - $y >= 0", + "$x ^ $y == 0", + "$x ^ $y != 0", + ).Report("odd comparison") +} + +func oddmathbits(m dsl.Matcher) { + m.Match( + "64 - bits.LeadingZeros64($x)", + "32 - bits.LeadingZeros32($x)", + "16 - bits.LeadingZeros16($x)", + "8 - bits.LeadingZeros8($x)", + ).Report("odd math/bits expression: use bits.Len*() instead?") +} + +// func floateq(m dsl.Matcher) { +// m.Match( +// "$x == $y", +// "$x != $y", +// ). +// Where(m["x"].Type.Is("float32") && !m["x"].Const && !m["y"].Text.Matches("0(.0+)?") && !m.File().Name.Matches("floating_comparision.go")). +// Report("floating point tested for equality") + +// m.Match( +// "$x == $y", +// "$x != $y", +// ). +// Where(m["x"].Type.Is("float64") && !m["x"].Const && !m["y"].Text.Matches("0(.0+)?") && !m.File().Name.Matches("floating_comparision.go")). +// Report("floating point tested for equality") + +// m.Match("switch $x { $*_ }", "switch $*_; $x { $*_ }"). +// Where(m["x"].Type.Is("float32")). +// Report("floating point as switch expression") + +// m.Match("switch $x { $*_ }", "switch $*_; $x { $*_ }"). +// Where(m["x"].Type.Is("float64")). +// Report("floating point as switch expression") + +// } + +func badexponent(m dsl.Matcher) { + m.Match( + "2 ^ $x", + "10 ^ $x", + ). + Report("caret (^) is not exponentiation") +} + +func floatloop(m dsl.Matcher) { + m.Match( + "for $i := $x; $i < $y; $i += $z { $*_ }", + "for $i = $x; $i < $y; $i += $z { $*_ }", + ). + Where(m["i"].Type.Is("float64")). + Report("floating point for loop counter") + + m.Match( + "for $i := $x; $i < $y; $i += $z { $*_ }", + "for $i = $x; $i < $y; $i += $z { $*_ }", + ). + Where(m["i"].Type.Is("float32")). + Report("floating point for loop counter") +} + +func urlredacted(m dsl.Matcher) { + m.Match( + "log.Println($x, $*_)", + "log.Println($*_, $x, $*_)", + "log.Println($*_, $x)", + "log.Printf($*_, $x, $*_)", + "log.Printf($*_, $x)", + + "log.Println($x, $*_)", + "log.Println($*_, $x, $*_)", + "log.Println($*_, $x)", + "log.Printf($*_, $x, $*_)", + "log.Printf($*_, $x)", + ). + Where(m["x"].Type.Is("*url.URL")). + Report("consider $x.Redacted() when outputting URLs") +} + +func sprinterr(m dsl.Matcher) { + m.Match(`fmt.Sprint($err)`, + `fmt.Sprintf("%s", $err)`, + `fmt.Sprintf("%v", $err)`, + ). + Where(m["err"].Type.Is("error")). + Report("maybe call $err.Error() instead of fmt.Sprint()?") +} + +// disable this check, because it can not apply to generic type +//func largeloopcopy(m dsl.Matcher) { +// m.Match( +// `for $_, $v := range $_ { $*_ }`, +// ). +// Where(m["v"].Type.Size > 1024). +// Report(`loop copies large value each iteration`) +//} + +func joinpath(m dsl.Matcher) { + m.Match( + `strings.Join($_, "/")`, + `strings.Join($_, "\\")`, + "strings.Join($_, `\\`)", + ). + Report(`did you mean path.Join() or filepath.Join() ?`) +} + +func readfull(m dsl.Matcher) { + m.Match(`$n, $err := io.ReadFull($_, $slice) + if $err != nil || $n != len($slice) { + $*_ + }`, + `$n, $err := io.ReadFull($_, $slice) + if $n != len($slice) || $err != nil { + $*_ + }`, + `$n, $err = io.ReadFull($_, $slice) + if $err != nil || $n != len($slice) { + $*_ + }`, + `$n, $err = io.ReadFull($_, $slice) + if $n != len($slice) || $err != nil { + $*_ + }`, + `if $n, $err := io.ReadFull($_, $slice); $n != len($slice) || $err != nil { + $*_ + }`, + `if $n, $err := io.ReadFull($_, $slice); $err != nil || $n != len($slice) { + $*_ + }`, + `if $n, $err = io.ReadFull($_, $slice); $n != len($slice) || $err != nil { + $*_ + }`, + `if $n, $err = io.ReadFull($_, $slice); $err != nil || $n != len($slice) { + $*_ + }`, + ).Report("io.ReadFull() returns err == nil iff n == len(slice)") +} + +func nilerr(m dsl.Matcher) { + m.Match( + `if err == nil { return err }`, + `if err == nil { return $*_, err }`, + ). + Report(`return nil error instead of nil value`) +} + +func mailaddress(m dsl.Matcher) { + m.Match( + "fmt.Sprintf(`\"%s\" <%s>`, $NAME, $EMAIL)", + "fmt.Sprintf(`\"%s\"<%s>`, $NAME, $EMAIL)", + "fmt.Sprintf(`%s <%s>`, $NAME, $EMAIL)", + "fmt.Sprintf(`%s<%s>`, $NAME, $EMAIL)", + `fmt.Sprintf("\"%s\"<%s>", $NAME, $EMAIL)`, + `fmt.Sprintf("\"%s\" <%s>", $NAME, $EMAIL)`, + `fmt.Sprintf("%s<%s>", $NAME, $EMAIL)`, + `fmt.Sprintf("%s <%s>", $NAME, $EMAIL)`, + ). + Report("use net/mail Address.String() instead of fmt.Sprintf()"). + Suggest("(&mail.Address{Name:$NAME, Address:$EMAIL}).String()") +} + +func errnetclosed(m dsl.Matcher) { + m.Match( + `strings.Contains($err.Error(), $text)`, + ). + Where(m["text"].Text.Matches("\".*closed network connection.*\"")). + Report(`String matching against error texts is fragile; use net.ErrClosed instead`). + Suggest(`errors.Is($err, net.ErrClosed)`) +} + +func httpheaderadd(m dsl.Matcher) { + m.Match( + `$H.Add($KEY, $VALUE)`, + ). + Where(m["H"].Type.Is("http.Header")). + Report("use http.Header.Set method instead of Add to overwrite all existing header values"). + Suggest(`$H.Set($KEY, $VALUE)`) +} + +func hmacnew(m dsl.Matcher) { + m.Match("hmac.New(func() hash.Hash { return $x }, $_)", + `$f := func() hash.Hash { return $x } + $*_ + hmac.New($f, $_)`, + ).Where(m["x"].Pure). + Report("invalid hash passed to hmac.New()") +} + +func writestring(m dsl.Matcher) { + m.Match(`io.WriteString($w, string($b))`). + Where(m["b"].Type.Is("[]byte")). + Suggest("$w.Write($b)") +} + +func badlock(m dsl.Matcher) { + // Shouldn't give many false positives without type filter + // as Lock+Unlock pairs in combination with defer gives us pretty + // a good chance to guess correctly. If we constrain the type to sync.Mutex + // then it'll be harder to match embedded locks and custom methods + // that may forward the call to the sync.Mutex (or other synchronization primitive). + + m.Match(`$mu.Lock(); defer $mu.RUnlock()`).Report(`maybe $mu.RLock() was intended?`) + m.Match(`$mu.RLock(); defer $mu.Unlock()`).Report(`maybe $mu.Lock() was intended?`) +} diff --git a/client/write_options.go b/client/write_options.go index 54139ef0b21fa..612cc7fe2d995 100644 --- a/client/write_options.go +++ b/client/write_options.go @@ -28,6 +28,7 @@ import ( "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" "github.com/milvus-io/milvus/client/v2/column" "github.com/milvus-io/milvus/client/v2/entity" + "github.com/milvus-io/milvus/client/v2/row" ) type InsertOption interface { @@ -71,10 +72,8 @@ func (opt *columnBasedDataOption) processInsertColumns(colSchema *entity.Schema, l := col.Len() if rowSize == 0 { rowSize = l - } else { - if rowSize != l { - return nil, 0, errors.New("column size not match") - } + } else if rowSize != l { + return nil, 0, errors.New("column size not match") } field, has := mNameField[col.Name()] if !has { @@ -247,6 +246,56 @@ func NewColumnBasedInsertOption(collName string, columns ...column.Column) *colu } } +type rowBasedDataOption struct { + *columnBasedDataOption + rows []any +} + +func NewRowBasedInsertOption(collName string, rows ...any) *rowBasedDataOption { + return &rowBasedDataOption{ + columnBasedDataOption: &columnBasedDataOption{ + collName: collName, + }, + rows: rows, + } +} + +func (opt *rowBasedDataOption) InsertRequest(coll *entity.Collection) (*milvuspb.InsertRequest, error) { + columns, err := row.AnyToColumns(opt.rows, coll.Schema) + if err != nil { + return nil, err + } + opt.columnBasedDataOption.columns = columns + fieldsData, rowNum, err := opt.processInsertColumns(coll.Schema, opt.columns...) + if err != nil { + return nil, err + } + return &milvuspb.InsertRequest{ + CollectionName: opt.collName, + PartitionName: opt.partitionName, + FieldsData: fieldsData, + NumRows: uint32(rowNum), + }, nil +} + +func (opt *rowBasedDataOption) UpsertRequest(coll *entity.Collection) (*milvuspb.UpsertRequest, error) { + columns, err := row.AnyToColumns(opt.rows, coll.Schema) + if err != nil { + return nil, err + } + opt.columnBasedDataOption.columns = columns + fieldsData, rowNum, err := opt.processInsertColumns(coll.Schema, opt.columns...) + if err != nil { + return nil, err + } + return &milvuspb.UpsertRequest{ + CollectionName: opt.collName, + PartitionName: opt.partitionName, + FieldsData: fieldsData, + NumRows: uint32(rowNum), + }, nil +} + type DeleteOption interface { Request() *milvuspb.DeleteRequest } diff --git a/client/write_test.go b/client/write_test.go index 3fdb9ece0f615..a87957e615c0a 100644 --- a/client/write_test.go +++ b/client/write_test.go @@ -22,13 +22,14 @@ import ( "math/rand" "testing" + "github.com/samber/lo" + "github.com/stretchr/testify/mock" + "github.com/stretchr/testify/suite" + "github.com/milvus-io/milvus-proto/go-api/v2/milvuspb" "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" "github.com/milvus-io/milvus/client/v2/entity" "github.com/milvus-io/milvus/pkg/util/merr" - "github.com/samber/lo" - "github.com/stretchr/testify/mock" - "github.com/stretchr/testify/suite" ) type WriteSuite struct { diff --git a/cmd/roles/roles.go b/cmd/roles/roles.go index 1105498f5f6e7..16be7ac378958 100644 --- a/cmd/roles/roles.go +++ b/cmd/roles/roles.go @@ -411,7 +411,7 @@ func (mr *MilvusRoles) Run() { } tracer.SetTracerProvider(exp, params.TraceCfg.SampleFraction.GetAsFloat()) - log.Info("Reset tracer finished", zap.String("Exporter", params.TraceCfg.Exporter.GetValue())) + log.Info("Reset tracer finished", zap.String("Exporter", params.TraceCfg.Exporter.GetValue()), zap.Float64("SampleFraction", params.TraceCfg.SampleFraction.GetAsFloat())) if paramtable.GetRole() == typeutil.QueryNodeRole || paramtable.GetRole() == typeutil.StandaloneRole { initcore.InitTraceConfig(params) diff --git a/configs/milvus.yaml b/configs/milvus.yaml index 6cf78cc058e63..d6415e1a76208 100644 --- a/configs/milvus.yaml +++ b/configs/milvus.yaml @@ -68,7 +68,7 @@ tikv: tlsCACert: # path to your CACert file localStorage: - path: /tmp/milvus/data/ # please adjust in embedded Milvus: /tmp/milvus/data/ + path: /var/lib/milvus/data/ # please adjust in embedded Milvus: /tmp/milvus/data/ # Related configuration of MinIO/S3/GCS or any other service supports S3 API, which is responsible for data persistence for Milvus. # We refer to the storage service as MinIO/S3 in the following description for simplicity. @@ -329,13 +329,13 @@ queryNode: enabled: true memoryLimit: 2147483648 # 2 GB, 2 * 1024 *1024 *1024 readAheadPolicy: willneed # The read ahead policy of chunk cache, options: `normal, random, sequential, willneed, dontneed` - # options: async, sync, off. + # options: async, sync, disable. # Specifies the necessity for warming up the chunk cache. - # 1. If set to "sync" or "async," the original vector data will be synchronously/asynchronously loaded into the + # 1. If set to "sync" or "async" the original vector data will be synchronously/asynchronously loaded into the # chunk cache during the load process. This approach has the potential to substantially reduce query/search latency # for a specific duration post-load, albeit accompanied by a concurrent increase in disk usage; - # 2. If set to "off," original vector data will only be loaded into the chunk cache during search/query. - warmup: async + # 2. If set to "disable" original vector data will only be loaded into the chunk cache during search/query. + warmup: disable mmap: mmapEnabled: false # Enable mmap for loading data lazyload: @@ -377,6 +377,7 @@ queryNode: maxQueueLength: 16 # Maximum length of task queue in flowgraph maxParallelism: 1024 # Maximum number of tasks executed in parallel in the flowgraph enableSegmentPrune: false # use partition prune function on shard delegator + queryStreamBatchSize: 4194304 # return batch size of stream query ip: # if not specified, use the first unicastable address port: 21123 grpc: @@ -481,6 +482,7 @@ dataCoord: serverMaxRecvSize: 268435456 clientMaxSendSize: 268435456 clientMaxRecvSize: 536870912 + syncSegmentsInterval: 300 dataNode: dataSync: @@ -494,7 +496,7 @@ dataNode: coldTime: 60 # Turn on skip mode after there are only timetick msg for x seconds segment: insertBufSize: 16777216 # Max buffer size to flush for a single segment. - deleteBufBytes: 67108864 # Max buffer size in bytes to flush del for a single channel, default as 16MB + deleteBufBytes: 16777216 # Max buffer size in bytes to flush del for a single channel, default as 16MB syncPeriod: 600 # The period to sync segments if buffer is not empty. memory: forceSyncEnable: true # Set true to force sync if memory usage is too high @@ -612,7 +614,7 @@ common: ttMsgEnabled: true # Whether the instance disable sending ts messages traceLogMode: 0 # trace request info bloomFilterSize: 100000 # bloom filter initial size - maxBloomFalsePositive: 0.05 # max false positive rate for bloom filter + maxBloomFalsePositive: 0.001 # max false positive rate for bloom filter # QuotaConfig, configurations of Milvus quota and limits. # By default, we enable: @@ -631,6 +633,14 @@ quotaAndLimits: # collects metrics from Proxies, Query cluster and Data cluster. # seconds, (0 ~ 65536) quotaCenterCollectInterval: 3 + limits: + allocRetryTimes: 15 # retry times when delete alloc forward data from rate limit failed + allocWaitInterval: 1000 # retry wait duration when delete alloc forward data rate failed, in millisecond + complexDeleteLimitEnable: false # whether complex delete check forward data by limiter + maxCollectionNum: 65536 + maxCollectionNumPerDB: 65536 + maxInsertSize: -1 # maximum size of a single insert request, in bytes, -1 means no limit + maxResourceGroupNumOfQueryNode: 1024 # maximum number of resource groups of query nodes ddl: enabled: false collectionRate: -1 # qps, default no limit, rate for CreateCollection, DropCollection, LoadCollection, ReleaseCollection @@ -711,11 +721,6 @@ quotaAndLimits: max: -1 # qps, default no limit partition: max: -1 # qps, default no limit - limits: - maxCollectionNum: 65536 - maxCollectionNumPerDB: 65536 - maxInsertSize: -1 # maximum size of a single insert request, in bytes, -1 means no limit - maxResourceGroupNumOfQueryNode: 1024 # maximum number of resource groups of query nodes limitWriting: # forceDeny false means dml requests are allowed (except for some # specific conditions, such as memory of nodes to water marker), true means always reject all dml requests. diff --git a/configs/pgo/default.pgo b/configs/pgo/default.pgo new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/go.mod b/go.mod index f50cfb447b574..3412f20c408a5 100644 --- a/go.mod +++ b/go.mod @@ -1,6 +1,6 @@ module github.com/milvus-io/milvus -go 1.20 +go 1.21 require ( github.com/Azure/azure-sdk-for-go/sdk/azcore v1.7.0 @@ -65,9 +65,11 @@ require ( require github.com/milvus-io/milvus-storage/go v0.0.0-20231227072638-ebd0b8e56d70 require ( + github.com/greatroar/blobloom v0.0.0-00010101000000-000000000000 github.com/jolestar/go-commons-pool/v2 v2.1.2 github.com/milvus-io/milvus/pkg v0.0.0-00010101000000-000000000000 github.com/pkg/errors v0.9.1 + github.com/zeebo/xxh3 v1.0.2 gopkg.in/yaml.v3 v3.0.1 ) @@ -209,7 +211,6 @@ require ( github.com/x448/float16 v0.8.4 // indirect github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2 // indirect github.com/yusufpapurcu/wmi v1.2.2 // indirect - github.com/zeebo/xxh3 v1.0.2 // indirect go.etcd.io/bbolt v1.3.6 // indirect go.etcd.io/etcd/client/pkg/v3 v3.5.5 // indirect go.etcd.io/etcd/client/v2 v2.305.5 // indirect @@ -250,6 +251,7 @@ replace ( github.com/bketelsen/crypt => github.com/bketelsen/crypt v0.0.4 // Fix security alert for core-os/etcd github.com/expr-lang/expr => github.com/SimFG/expr v0.0.0-20231218130003-94d085776dc5 github.com/go-kit/kit => github.com/go-kit/kit v0.1.0 + github.com/greatroar/blobloom => github.com/milvus-io/blobloom v0.0.0-20240603110411-471ae49f3b93 // github.com/milvus-io/milvus-storage/go => ../milvus-storage/go github.com/milvus-io/milvus/pkg => ./pkg github.com/streamnative/pulsarctl => github.com/xiaofan-luan/pulsarctl v0.5.1 diff --git a/go.sum b/go.sum index 20a4faf084194..f0f45360fef38 100644 --- a/go.sum +++ b/go.sum @@ -56,12 +56,14 @@ github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.3.0/go.mod h1:OQeznEEkTZ9Orh github.com/Azure/azure-sdk-for-go/sdk/internal v1.3.0 h1:sXr+ck84g/ZlZUOZiNELInmMgOsuGwdjjVkEIde0OtY= github.com/Azure/azure-sdk-for-go/sdk/internal v1.3.0/go.mod h1:okt5dMMTOFjX/aovMlrjvvXoPMBVSPzk9185BT0+eZM= github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/storage/armstorage v1.2.0 h1:Ma67P/GGprNwsslzEH6+Kb8nybI8jpDTm4Wmzu2ReK8= +github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/storage/armstorage v1.2.0/go.mod h1:c+Lifp3EDEamAkPVzMooRNOK6CZjNSdEnf1A7jsI9u4= github.com/Azure/azure-sdk-for-go/sdk/storage/azblob v1.1.0 h1:nVocQV40OQne5613EeLayJiRAJuKlBGy+m22qWG+WRg= github.com/Azure/azure-sdk-for-go/sdk/storage/azblob v1.1.0/go.mod h1:7QJP7dr2wznCMeqIrhMgWGf7XpAQnVrJqDm9nvV3Cu4= github.com/AzureAD/microsoft-authentication-library-for-go v1.0.0 h1:OBhqkivkhkMqLPymWEppkm7vgPQY2XsHoEkaMQ0AdZY= github.com/AzureAD/microsoft-authentication-library-for-go v1.0.0/go.mod h1:kgDmCTgBzIEPFElEF+FK0SdjAor06dRq2Go927dnQ6o= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= github.com/BurntSushi/toml v1.2.1 h1:9F2/+DoOYIOksmaJFPw1tGFy1eDnIJXg+UHjuD8lTak= +github.com/BurntSushi/toml v1.2.1/go.mod h1:CxXYINrC8qIiEnFrOxCa7Jy5BFHlXnUU2pbicEuybxQ= github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo= github.com/CloudyKit/fastprinter v0.0.0-20200109182630-33d98a066a53/go.mod h1:+3IMCy2vIlbG1XG/0ggNQv0SvxCAIpPM5b1nCz56Xno= github.com/CloudyKit/jet/v3 v3.0.0/go.mod h1:HKQPgSJmdK8hdoAbKUUWajkHyHo4RaU5rMdUywE7VMo= @@ -169,6 +171,7 @@ github.com/cncf/xds/go v0.0.0-20210922020428-25de7278fc84/go.mod h1:eXthEFrGJvWH github.com/cncf/xds/go v0.0.0-20211001041855-01bcc9b48dfe/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= github.com/cncf/xds/go v0.0.0-20211011173535-cb28da3451f1/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= github.com/cncf/xds/go v0.0.0-20230607035331-e9ce68804cb4 h1:/inchEIKaYC1Akx+H+gqO04wryn5h75LSazbRlnya1k= +github.com/cncf/xds/go v0.0.0-20230607035331-e9ce68804cb4/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= github.com/cockroachdb/datadriven v0.0.0-20200714090401-bf6692d28da5/go.mod h1:h6jFvWxBdQXxjopDMZyH2UVceIRfR84bdzbkoKrsWNo= github.com/cockroachdb/datadriven v1.0.2 h1:H9MtNqVoVhvd9nCBwOyDjUEdZCREqbIdCJD93PBm/jA= github.com/cockroachdb/datadriven v1.0.2/go.mod h1:a9RdTaap04u637JoCzcUoIcDmvwSUtcUFtT/C3kJlTU= @@ -215,6 +218,7 @@ github.com/dgryski/go-sip13 v0.0.0-20181026042036-e10d5fee7954/go.mod h1:vAd38F8 github.com/dimfeld/httptreemux v5.0.1+incompatible h1:Qj3gVcDNoOthBAqftuD596rm4wg/adLLz5xh5CmpiCA= github.com/dimfeld/httptreemux v5.0.1+incompatible/go.mod h1:rbUlSV+CCpv/SuqUTP/8Bk2O3LyUV436/yaRGkhP6Z0= github.com/dnaeon/go-vcr v1.2.0 h1:zHCHvJYTMh1N7xnV7zf1m1GPBF9Ad0Jk/whtQ1663qI= +github.com/dnaeon/go-vcr v1.2.0/go.mod h1:R4UdLID7HZT3taECzJs4YgbbH6PIGXB6W/sc5OLb6RQ= github.com/docker/go-units v0.4.0 h1:3uh0PgVws3nIA0Q+MwDC8yjEPf9zjRfZZWXZYDct3Tw= github.com/docker/go-units v0.4.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk= github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk= @@ -235,6 +239,7 @@ github.com/envoyproxy/go-control-plane v0.9.10-0.20210907150352-cf90f659a021/go. github.com/envoyproxy/go-control-plane v0.10.2-0.20220325020618-49ff273808a1/go.mod h1:KJwIaB5Mv44NWtYuAOFCVOjcI94vtpEz2JU/D2v6IjE= github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= github.com/envoyproxy/protoc-gen-validate v0.10.1 h1:c0g45+xCJhdgFGw7a5QAfdS4byAbud7miNWJ1WwEVf8= +github.com/envoyproxy/protoc-gen-validate v0.10.1/go.mod h1:DRjgyB0I43LtJapqN6NiRwroiAU2PaFuvk/vjgh61ss= github.com/etcd-io/bbolt v1.3.3/go.mod h1:ZF2nL25h33cCyBtcyWeZ2/I3HQOfTP+0PIEvHjkjCrw= github.com/facebookgo/ensure v0.0.0-20200202191622-63f1cf65ac4c h1:8ISkoahWXwZR41ois5lSJBSVw4D0OV19Ht/JSTzvSv0= github.com/facebookgo/ensure v0.0.0-20200202191622-63f1cf65ac4c/go.mod h1:Yg+htXGokKKdzcwhuNDwVvN+uBxDGXJ7G/VN1d8fa64= @@ -245,6 +250,7 @@ github.com/facebookgo/subset v0.0.0-20200203212716-c811ad88dec4/go.mod h1:5tD+ne github.com/fasthttp-contrib/websocket v0.0.0-20160511215533-1f3b11f56072/go.mod h1:duJ4Jxv5lDcvg4QuQr0oowTf7dz4/CR8NtyCooz9HL8= github.com/fatih/color v1.7.0/go.mod h1:Zm6kSWBoL9eyXnKyktHP6abPY2pDugNf5KwzbycvMj4= github.com/fatih/color v1.10.0 h1:s36xzo75JdqLaaWoiEHk767eHiwo0598uUxyfiPkDsg= +github.com/fatih/color v1.10.0/go.mod h1:ELkj/draVOlAH/xkhN6mQ50Qd0MPOk5AAr3maGEBuJM= github.com/fatih/structs v1.1.0/go.mod h1:9NiDSp5zOcgEDl+j00MP/WkGVPOlPRLejGD8Ga6PJ7M= github.com/fogleman/gg v1.2.1-0.20190220221249-0403632d5b90/go.mod h1:R/bRT+9gY/C5z7JzPU0zXsXHKM4/ayA+zqcVNZzPa1k= github.com/fogleman/gg v1.3.0/go.mod h1:R/bRT+9gY/C5z7JzPU0zXsXHKM4/ayA+zqcVNZzPa1k= @@ -257,6 +263,7 @@ github.com/frankban/quicktest v1.7.2/go.mod h1:jaStnuzAqU1AJdCO0l53JDCJrVDKcS03D github.com/frankban/quicktest v1.10.0/go.mod h1:ui7WezCLWMWxVWr1GETZY3smRy0G4KWq9vcPtJmFl7Y= github.com/frankban/quicktest v1.14.0/go.mod h1:NeW+ay9A/U67EYXNFA1nPE8e/tnQv/09mUdL/ijj8og= github.com/frankban/quicktest v1.14.5 h1:dfYrrRyLtiqT9GyKXgdh+k4inNeTvmGbuSgZ3lx3GhA= +github.com/frankban/quicktest v1.14.5/go.mod h1:4ptaffx2x8+WTWXmUCuVU6aPUX1/Mz7zb5vbUoiM6w0= github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= github.com/fsnotify/fsnotify v1.4.9 h1:hsms1Qyu0jgnwNXIxa+/V/PDsU6CfLf6CNO8H7IWoS4= github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ= @@ -299,6 +306,7 @@ github.com/go-martini/martini v0.0.0-20170121215854-22fa46961aab/go.mod h1:/P9AE github.com/go-ole/go-ole v1.2.6 h1:/Fpf6oFPoeFik9ty7siob0G6Ke8QvQEuVcuChpwXzpY= github.com/go-ole/go-ole v1.2.6/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0= github.com/go-playground/assert/v2 v2.2.0 h1:JvknZsQTYeFEAhQwI4qEt9cyV5ONwRHC+lYKSsYSR8s= +github.com/go-playground/assert/v2 v2.2.0/go.mod h1:VDjEfimB/XKnb+ZQfWdccd7VUvScMdVu0Titje2rxJ4= github.com/go-playground/locales v0.14.1 h1:EWaQ/wswjilfKLTECiXz7Rh+3BjFhfDFKv/oXslEjJA= github.com/go-playground/locales v0.14.1/go.mod h1:hxrqLVvrK65+Rwrd5Fc6F2O76J/NuW9t0sjnWqG1slY= github.com/go-playground/universal-translator v0.18.1 h1:Bcnm0ZwsGyWbCzImXv+pAJnYK9S473LQFuzCbDbfSFY= @@ -337,6 +345,7 @@ github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0/go.mod h1:E/TSTwGw github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= github.com/golang/glog v1.0.0/go.mod h1:EWib/APOK0SL3dFbYqvxE3UYd8E6s1ouQ7iEp/0LWV4= github.com/golang/glog v1.1.0 h1:/d3pCKDPWNnvIWe0vVUpNP32qc8U3PDVxySP/y360qE= +github.com/golang/glog v1.1.0/go.mod h1:pfYeQZ3JWZoXTV5sFc986z3HTpwQs9At6P4ImfuP3NQ= github.com/golang/groupcache v0.0.0-20190129154638-5b532d6fd5ef/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= github.com/golang/groupcache v0.0.0-20190702054246-869f871628b6/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= github.com/golang/groupcache v0.0.0-20191227052852-215e87163ea7/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= @@ -400,6 +409,7 @@ github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeN github.com/google/go-querystring v1.0.0/go.mod h1:odCYkC5MyYFN7vkCjXpyrEuKhc/BUO6wN/zVPAxq5ck= github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0= +github.com/google/gofuzz v1.2.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= github.com/google/martian v2.1.0+incompatible/go.mod h1:9I4somxYTbIHy5NJKHRl3wXiIaQGbYVAs8BPL6v8lEs= github.com/google/martian/v3 v3.0.0/go.mod h1:y5Zk1BBys9G+gd6Jrk0W3cC1+ELVxBWuIGO+w/tUAp0= github.com/google/martian/v3 v3.1.0/go.mod h1:y5Zk1BBys9G+gd6Jrk0W3cC1+ELVxBWuIGO+w/tUAp0= @@ -546,7 +556,9 @@ github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= github.com/kris-nova/logger v0.0.0-20181127235838-fd0d87064b06 h1:vN4d3jSss3ExzUn2cE0WctxztfOgiKvMKnDrydBsg00= +github.com/kris-nova/logger v0.0.0-20181127235838-fd0d87064b06/go.mod h1:++9BgZujZd4v0ZTZCb5iPsaomXdZWyxotIAh1IiDm44= github.com/kris-nova/lolgopher v0.0.0-20180921204813-313b3abb0d9b h1:xYEM2oBUhBEhQjrV+KJ9lEWDWYZoNVZUaBF++Wyljq4= +github.com/kris-nova/lolgopher v0.0.0-20180921204813-313b3abb0d9b/go.mod h1:V0HF/ZBlN86HqewcDC/cVxMmYDiRukWjSrgKLUAn9Js= github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc= github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw= github.com/labstack/echo/v4 v4.5.0/go.mod h1:czIriw4a0C1dFun+ObrXp7ok03xON0N1awStJ6ArI7Y= @@ -554,6 +566,7 @@ github.com/labstack/gommon v0.3.0/go.mod h1:MULnywXg0yavhxWKc+lOruYdAhDwPK9wf0OL github.com/leodido/go-urn v1.2.4 h1:XlAE/cm/ms7TE/VMVoduSpNBoyc2dOxHs5MZSwAN63Q= github.com/leodido/go-urn v1.2.4/go.mod h1:7ZrI8mTSeBSHl/UaRyKQW1qZeMgak41ANeCNaVckg+4= github.com/lingdor/stackerror v0.0.0-20191119040541-976d8885ed76 h1:IVlcvV0CjvfBYYod5ePe89l+3LBAl//6n9kJ9Vr2i0k= +github.com/lingdor/stackerror v0.0.0-20191119040541-976d8885ed76/go.mod h1:Iu9BHUvTh8/KpbuSoKx/CaJEdJvFxSverxIy7I+nq7s= github.com/linkedin/goavro v2.1.0+incompatible/go.mod h1:bBCwI2eGYpUI/4820s67MElg9tdeLbINjLjiM2xZFYM= github.com/linkedin/goavro/v2 v2.9.8/go.mod h1:UgQUb2N/pmueQYH9bfqFioWxzYCZXSfF8Jw03O5sjqA= github.com/linkedin/goavro/v2 v2.10.0/go.mod h1:UgQUb2N/pmueQYH9bfqFioWxzYCZXSfF8Jw03O5sjqA= @@ -580,6 +593,7 @@ github.com/mattn/go-isatty v0.0.14/go.mod h1:7GGIvUiUoEMVVmxf/4nioHXj79iQHKdU27k github.com/mattn/go-isatty v0.0.19 h1:JITubQf0MOLdlGRuRq+jtsDlekdYPia9ZFsB8h/APPA= github.com/mattn/go-isatty v0.0.19/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= github.com/mattn/go-runewidth v0.0.8 h1:3tS41NlGYSmhhe/8fhGRzc+z3AYCw1Fe1WAyLuujKs0= +github.com/mattn/go-runewidth v0.0.8/go.mod h1:H031xJmbD/WCDINGzjvQ9THkh0rPKHF+m2gUSrubnMI= github.com/mattn/goveralls v0.0.2/go.mod h1:8d1ZMHsd7fW6IRPKQh46F2WRpyib5/X4FOpevwGNQEw= github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0= github.com/matttproud/golang_protobuf_extensions v1.0.4 h1:mmDVorXM7PCGKw94cs5zkfA9PSy5pEvNWRP0ET0TIVo= @@ -589,6 +603,8 @@ github.com/mgutz/ansi v0.0.0-20200706080929-d51e80ef957d h1:5PJl274Y63IEHC+7izoQ github.com/mgutz/ansi v0.0.0-20200706080929-d51e80ef957d/go.mod h1:01TrycV0kFyexm33Z7vhZRXopbI8J3TDReVlkTgMUxE= github.com/microcosm-cc/bluemonday v1.0.2/go.mod h1:iVP4YcDBq+n/5fb23BhYFvIMq/leAFZyRl6bYmGDlGc= github.com/miekg/dns v1.0.14/go.mod h1:W1PPwlIAgtquWBMBEV9nkV9Cazfe8ScdGz/Lj7v3Nrg= +github.com/milvus-io/blobloom v0.0.0-20240603110411-471ae49f3b93 h1:xnIeuG1nuTEHKbbv51OwNGO82U+d6ut08ppTmZVm+VY= +github.com/milvus-io/blobloom v0.0.0-20240603110411-471ae49f3b93/go.mod h1:mjMJ1hh1wjGVfr93QIHJ6FfDNVrA0IELv8OvMHJxHKs= github.com/milvus-io/gorocksdb v0.0.0-20220624081344-8c5f4212846b h1:TfeY0NxYxZzUfIfYe5qYDBzt4ZYRqzUjTR6CvUzjat8= github.com/milvus-io/gorocksdb v0.0.0-20220624081344-8c5f4212846b/go.mod h1:iwW+9cWfIzzDseEBCCeDSN5SD16Tidvy8cwQ7ZY8Qj4= github.com/milvus-io/milvus-proto/go-api/v2 v2.3.4-0.20240430035521-259ae1d10016 h1:8WV4maXLeGEyJCCYIc1DmZ18H+VFAjMrwXJg5iI2nX4= @@ -651,6 +667,7 @@ github.com/nxadm/tail v1.4.8 h1:nPr65rt6Y5JFSKQO7qToXr7pePgD6Gwiw05lkbyAQTE= github.com/nxadm/tail v1.4.8/go.mod h1:+ncqLTQzXmGhMZNUePPaPqPvBxHAIsmXswZKocGu+AU= github.com/oklog/ulid v1.3.1/go.mod h1:CirwcVhetQ6Lv90oh/F+FBtV6XMibvdAFo93nm5qn4U= github.com/olekukonko/tablewriter v0.0.1 h1:b3iUnf1v+ppJiOfNX4yxxqfWKMQPZR5yoh8urCTFX88= +github.com/olekukonko/tablewriter v0.0.1/go.mod h1:vsDQFd/mU46D+Z4whnwzcISnGGzXWMclvtLoiIKAKIo= github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= github.com/onsi/ginkgo v1.10.3/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= github.com/onsi/ginkgo v1.12.1/go.mod h1:zj2OWP4+oCPe1qIXoGWkgMRwljMUYCdkwsT2108oapk= @@ -713,6 +730,7 @@ github.com/posener/complete v1.1.1/go.mod h1:em0nMJCgc9GFtwrmVmEMR/ZL6WyhyjMBndr github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c h1:ncq/mPwQF4JjgDlrVEn3C11VoGHZN7m8qihwgMEtzYw= github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c/go.mod h1:OmDBASR4679mdNQnz2pUhc2G8CO2JrUAVFDRBDP/hJE= github.com/prashantv/gostub v1.1.0 h1:BTyx3RfQjRHnUWaGF9oQos79AlQ5k8WNktv7VGvVH4g= +github.com/prashantv/gostub v1.1.0/go.mod h1:A5zLQHz7ieHGG7is6LLXLz7I8+3LZzsrV0P1IAHhP5U= github.com/prometheus/client_golang v0.9.1/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw= github.com/prometheus/client_golang v0.9.3/go.mod h1:/TN21ttK/J9q6uSwhBd54HahCDft0ttaMvbicHlPoso= github.com/prometheus/client_golang v1.0.0/go.mod h1:db9x61etRT2tGnBNRi70OPL5FsnadC4Ky3P0J6CfImo= @@ -840,6 +858,7 @@ github.com/subosito/gotenv v1.2.0/go.mod h1:N0PQaV/YGNqwC0u51sEeR/aUtSLEXKX9iv69 github.com/tencentcloud/tencentcloud-sdk-go/tencentcloud/common v1.0.865 h1:LcUqBlKC4j15LhT303yQDX/XxyHG4haEQqbHgZZA4SY= github.com/tencentcloud/tencentcloud-sdk-go/tencentcloud/common v1.0.865/go.mod h1:r5r4xbfxSaeR04b166HGsBa/R4U3SueirEUpXGuw+Q0= github.com/thoas/go-funk v0.9.1 h1:O549iLZqPpTUQ10ykd26sZhzD+rmR5pWhuElrhbC20M= +github.com/thoas/go-funk v0.9.1/go.mod h1:+IWnUfUmFO1+WVYQWQtIJHeRRdaIyyYglZN7xzUPe4Q= github.com/tiancaiamao/gp v0.0.0-20221230034425-4025bc8a4d4a h1:J/YdBZ46WKpXsxsW93SG+q0F8KI+yFrcIDT4c/RNoc4= github.com/tiancaiamao/gp v0.0.0-20221230034425-4025bc8a4d4a/go.mod h1:h4xBhSNtOeEosLJ4P7JyKXX7Cabg7AVkWCK5gV2vOrM= github.com/tidwall/gjson v1.14.4 h1:uo0p8EbA09J7RQaflQ1aBRffTR7xedD2bcIVSYxLnkM= @@ -899,6 +918,7 @@ github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1 github.com/yusufpapurcu/wmi v1.2.2 h1:KBNDSne4vP5mbSWnJbO+51IMOXJB67QiYCSBrubbPRg= github.com/yusufpapurcu/wmi v1.2.2/go.mod h1:SBZ9tNy3G9/m5Oi98Zks0QjeHVDvuK0qfxQmPyzfmi0= github.com/zeebo/assert v1.3.0 h1:g7C04CbJuIDKNPFHmsk4hwZDO5O+kntRxzaUoNXj+IQ= +github.com/zeebo/assert v1.3.0/go.mod h1:Pq9JiuJQpG8JLJdtkwrJESF0Foym2/D9XMU5ciN/wJ0= github.com/zeebo/xxh3 v1.0.2 h1:xZmwmqxHZA8AI603jOQ0tMqmBr9lPeFwGg6d+xy9DC0= github.com/zeebo/xxh3 v1.0.2/go.mod h1:5NWz9Sef7zIDm2JHfFlcQvNekmcEl9ekUZQQKCYaDcA= go.etcd.io/bbolt v1.3.2/go.mod h1:IbVyRI1SCnLcuJnV2u8VeU0CEYM7e686BmAb1XKL+uU= @@ -969,6 +989,7 @@ go.uber.org/automaxprocs v1.5.3/go.mod h1:eRbA25aqJrxAbsLO0xy5jVwPt7FQnRgjW+efnw go.uber.org/goleak v1.1.10/go.mod h1:8a7PlsEVH3e/a/GLqe5IIrQx6GzcnRmZEufDUTk4A7A= go.uber.org/goleak v1.1.11/go.mod h1:cwTWslyiVhfpKIDGSZEM2HlOvcqm+tG4zioyIeLoqMQ= go.uber.org/goleak v1.2.1 h1:NBol2c7O1ZokfZ0LEU9K6Whx/KnwvepVetCUhtKja4A= +go.uber.org/goleak v1.2.1/go.mod h1:qlT2yGI9QafXHhZZLxlSuNsMw3FFLxBr+tBRlmO1xH4= go.uber.org/multierr v1.1.0/go.mod h1:wR5kodmAFQ0UK8QlbwjlSNy0Z68gJhDJUG5sjR94q/0= go.uber.org/multierr v1.6.0/go.mod h1:cdWPpRnG4AhwMwsgIHip0KRBQjJy5kYEpYjJxpXp9iU= go.uber.org/multierr v1.7.0/go.mod h1:7EAYxJLBy9rStEaz58O2t4Uvip6FSURkq8/ppBp95ak= @@ -1456,6 +1477,7 @@ gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8 gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20200902074654-038fdea0a05b/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= gopkg.in/errgo.v1 v1.0.0/go.mod h1:CxwszS/Xz1C49Ucd2i6Zil5UToP1EmyrFhKaMVbg1mk= gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI= gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys= @@ -1512,3 +1534,4 @@ sigs.k8s.io/yaml v1.2.0/go.mod h1:yfXDCHCao9+ENCvLSE62v9VSji2MKu5jeNfTrofGhJc= sigs.k8s.io/yaml v1.3.0 h1:a2VclLzOGrwOHDiV8EfBGhvjHvP46CtW5j6POvhYGGo= sigs.k8s.io/yaml v1.3.0/go.mod h1:GeOyir5tyXNByN85N/dRIT9es5UQNerPYEKK56eTBm8= stathat.com/c/consistent v1.0.0 h1:ezyc51EGcRPJUxfHGSgJjWzJdj3NiMU9pNfLNGiXV0c= +stathat.com/c/consistent v1.0.0/go.mod h1:QkzMWzcbB+yQBL2AttO6sgsQS/JSTapcDISJalmCDS0= diff --git a/internal/core/src/bitset/detail/platform/arm/sve-impl.h b/internal/core/src/bitset/detail/platform/arm/sve-impl.h index 18433402d04d9..dfc84f2824d8a 100644 --- a/internal/core/src/bitset/detail/platform/arm/sve-impl.h +++ b/internal/core/src/bitset/detail/platform/arm/sve-impl.h @@ -42,63 +42,6 @@ namespace { // constexpr size_t MAX_SVE_WIDTH = 2048; -constexpr uint8_t SVE_LANES_8[MAX_SVE_WIDTH / 8] = { - 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, - 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, - 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, 0x20, - 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, - 0x2C, 0x2D, 0x2E, 0x2F, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, - 0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F, - - 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4A, - 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, - 0x56, 0x57, 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F, 0x60, - 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B, - 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, - 0x77, 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F, - - 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8A, - 0x8B, 0x8C, 0x8D, 0x8E, 0x8F, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, - 0x96, 0x97, 0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9E, 0x9F, 0xA0, - 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xAA, 0xAB, - 0xAC, 0xAD, 0xAE, 0xAF, 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, - 0xB7, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF, - - 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, - 0xCB, 0xCC, 0xCD, 0xCE, 0xCF, 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, - 0xD6, 0xD7, 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, 0xE0, - 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xEB, - 0xEC, 0xED, 0xEE, 0xEF, 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, - 0xF7, 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF}; - -constexpr uint16_t SVE_LANES_16[MAX_SVE_WIDTH / 16] = { - 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, - 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, - 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, 0x20, - 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, - 0x2C, 0x2D, 0x2E, 0x2F, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, - 0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F, - - 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4A, - 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, - 0x56, 0x57, 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F, 0x60, - 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B, - 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, - 0x77, 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F}; - -constexpr uint32_t SVE_LANES_32[MAX_SVE_WIDTH / 32] = { - 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, - 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, - 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, 0x20, - 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, - 0x2C, 0x2D, 0x2E, 0x2F, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, - 0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F}; - -constexpr uint64_t SVE_LANES_64[MAX_SVE_WIDTH / 64] = { - 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, - 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, - 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F}; - /* // debugging facilities @@ -131,179 +74,28 @@ void print_svuint8_t(const svuint8_t value) { /////////////////////////////////////////////////////////////////////////// -// todo: replace with pext whenever available - -// generate 16-bit bitmask from 8 serialized 16-bit svbool_t values -void -write_bitmask_16_8x(uint8_t* const __restrict res_u8, - const svbool_t pred_op, - const svbool_t pred_write, - const uint8_t* const __restrict pred_buf) { - // perform parallel pext - // 2048b -> 32 bytes mask -> 256 bytes total, 128 uint16_t values - // 512b -> 8 bytes mask -> 64 bytes total, 32 uint16_t values - // 256b -> 4 bytes mask -> 32 bytes total, 16 uint16_t values - // 128b -> 2 bytes mask -> 16 bytes total, 8 uint16_t values - - // this code does reduction of 16-bit 0b0A0B0C0D0E0F0G0H words into - // uint8_t values 0bABCDEFGH, then writes ones to the memory - - // we need to operate in uint8_t - const svuint8_t mask_8b = svld1_u8(pred_op, pred_buf); - - const svuint8_t mask_04_8b = svand_n_u8_z(pred_op, mask_8b, 0x01); - const svuint8_t mask_15_8b = svand_n_u8_z(pred_op, mask_8b, 0x04); - const svuint8_t mask_15s_8b = svlsr_n_u8_z(pred_op, mask_15_8b, 1); - const svuint8_t mask_26_8b = svand_n_u8_z(pred_op, mask_8b, 0x10); - const svuint8_t mask_26s_8b = svlsr_n_u8_z(pred_op, mask_26_8b, 2); - const svuint8_t mask_37_8b = svand_n_u8_z(pred_op, mask_8b, 0x40); - const svuint8_t mask_37s_8b = svlsr_n_u8_z(pred_op, mask_37_8b, 3); - - const svuint8_t mask_0347_8b = svorr_u8_z(pred_op, mask_04_8b, mask_37s_8b); - const svuint8_t mask_1256_8b = - svorr_u8_z(pred_op, mask_15s_8b, mask_26s_8b); - const svuint8_t mask_cmb_8b = - svorr_u8_z(pred_op, mask_0347_8b, mask_1256_8b); - - // - const svuint16_t shifts_16b = svdup_u16(0x0400UL); - const svuint8_t shifts_8b = svreinterpret_u8_u16(shifts_16b); - const svuint8_t shifted_8b_m0 = svlsl_u8_z(pred_op, mask_cmb_8b, shifts_8b); - - const svuint8_t zero_8b = svdup_n_u8(0); - - const svuint8_t shifted_8b_m3 = - svorr_u8_z(pred_op, - svuzp1_u8(shifted_8b_m0, zero_8b), - svuzp2_u8(shifted_8b_m0, zero_8b)); - - // write a finished bitmask - svst1_u8(pred_write, res_u8, shifted_8b_m3); -} - -// generate 32-bit bitmask from 8 serialized 32-bit svbool_t values -void -write_bitmask_32_8x(uint8_t* const __restrict res_u8, - const svbool_t pred_op, - const svbool_t pred_write, - const uint8_t* const __restrict pred_buf) { - // perform parallel pext - // 2048b -> 32 bytes mask -> 256 bytes total, 64 uint32_t values - // 512b -> 8 bytes mask -> 64 bytes total, 16 uint32_t values - // 256b -> 4 bytes mask -> 32 bytes total, 8 uint32_t values - // 128b -> 2 bytes mask -> 16 bytes total, 4 uint32_t values - - // this code does reduction of 32-bit 0b000A000B000C000D... dwords into - // uint8_t values 0bABCDEFGH, then writes ones to the memory - - // we need to operate in uint8_t - const svuint8_t mask_8b = svld1_u8(pred_op, pred_buf); - - const svuint8_t mask_024_8b = svand_n_u8_z(pred_op, mask_8b, 0x01); - const svuint8_t mask_135s_8b = svlsr_n_u8_z(pred_op, mask_8b, 3); - const svuint8_t mask_cmb_8b = - svorr_u8_z(pred_op, mask_024_8b, mask_135s_8b); - - // - const svuint32_t shifts_32b = svdup_u32(0x06040200UL); - const svuint8_t shifts_8b = svreinterpret_u8_u32(shifts_32b); - const svuint8_t shifted_8b_m0 = svlsl_u8_z(pred_op, mask_cmb_8b, shifts_8b); - - const svuint8_t zero_8b = svdup_n_u8(0); - - const svuint8_t shifted_8b_m2 = - svorr_u8_z(pred_op, - svuzp1_u8(shifted_8b_m0, zero_8b), - svuzp2_u8(shifted_8b_m0, zero_8b)); - const svuint8_t shifted_8b_m3 = - svorr_u8_z(pred_op, - svuzp1_u8(shifted_8b_m2, zero_8b), - svuzp2_u8(shifted_8b_m2, zero_8b)); - - // write a finished bitmask - svst1_u8(pred_write, res_u8, shifted_8b_m3); -} - -// generate 64-bit bitmask from 8 serialized 64-bit svbool_t values -void -write_bitmask_64_8x(uint8_t* const __restrict res_u8, - const svbool_t pred_op, - const svbool_t pred_write, - const uint8_t* const __restrict pred_buf) { - // perform parallel pext - // 2048b -> 32 bytes mask -> 256 bytes total, 32 uint64_t values - // 512b -> 8 bytes mask -> 64 bytes total, 4 uint64_t values - // 256b -> 4 bytes mask -> 32 bytes total, 2 uint64_t values - // 128b -> 2 bytes mask -> 16 bytes total, 1 uint64_t values - - // this code does reduction of 64-bit 0b0000000A0000000B... qwords into - // uint8_t values 0bABCDEFGH, then writes ones to the memory - - // we need to operate in uint8_t - const svuint8_t mask_8b = svld1_u8(pred_op, pred_buf); - const svuint64_t shifts_64b = svdup_u64(0x706050403020100ULL); - const svuint8_t shifts_8b = svreinterpret_u8_u64(shifts_64b); - const svuint8_t shifted_8b_m0 = svlsl_u8_z(pred_op, mask_8b, shifts_8b); - - const svuint8_t zero_8b = svdup_n_u8(0); - - const svuint8_t shifted_8b_m1 = - svorr_u8_z(pred_op, - svuzp1_u8(shifted_8b_m0, zero_8b), - svuzp2_u8(shifted_8b_m0, zero_8b)); - const svuint8_t shifted_8b_m2 = - svorr_u8_z(pred_op, - svuzp1_u8(shifted_8b_m1, zero_8b), - svuzp2_u8(shifted_8b_m1, zero_8b)); - const svuint8_t shifted_8b_m3 = - svorr_u8_z(pred_op, - svuzp1_u8(shifted_8b_m2, zero_8b), - svuzp2_u8(shifted_8b_m2, zero_8b)); - - // write a finished bitmask - svst1_u8(pred_write, res_u8, shifted_8b_m3); -} - -/////////////////////////////////////////////////////////////////////////// - // inline svbool_t get_pred_op_8(const size_t n_elements) { - const svbool_t pred_all_8 = svptrue_b8(); - const svuint8_t lanes_8 = svld1_u8(pred_all_8, SVE_LANES_8); - const svuint8_t leftovers_op = svdup_n_u8(n_elements); - const svbool_t pred_op = svcmpgt_u8(pred_all_8, leftovers_op, lanes_8); - return pred_op; + return svwhilelt_b8(uint32_t(0), uint32_t(n_elements)); } // inline svbool_t get_pred_op_16(const size_t n_elements) { - const svbool_t pred_all_16 = svptrue_b16(); - const svuint16_t lanes_16 = svld1_u16(pred_all_16, SVE_LANES_16); - const svuint16_t leftovers_op = svdup_n_u16(n_elements); - const svbool_t pred_op = svcmpgt_u16(pred_all_16, leftovers_op, lanes_16); - return pred_op; + return svwhilelt_b16(uint32_t(0), uint32_t(n_elements)); } // inline svbool_t get_pred_op_32(const size_t n_elements) { - const svbool_t pred_all_32 = svptrue_b32(); - const svuint32_t lanes_32 = svld1_u32(pred_all_32, SVE_LANES_32); - const svuint32_t leftovers_op = svdup_n_u32(n_elements); - const svbool_t pred_op = svcmpgt_u32(pred_all_32, leftovers_op, lanes_32); - return pred_op; + return svwhilelt_b32(uint32_t(0), uint32_t(n_elements)); } // inline svbool_t get_pred_op_64(const size_t n_elements) { - const svbool_t pred_all_64 = svptrue_b64(); - const svuint64_t lanes_64 = svld1_u64(pred_all_64, SVE_LANES_64); - const svuint64_t leftovers_op = svdup_n_u64(n_elements); - const svbool_t pred_op = svcmpgt_u64(pred_all_64, leftovers_op, lanes_64); - return pred_op; + return svwhilelt_b64(uint32_t(0), uint32_t(n_elements)); } // @@ -579,7 +371,7 @@ struct SVEVector { using sve_type = svint8_t; // measured in the number of elements that an SVE register can hold - static inline size_t + static inline uint64_t width() { return svcntb(); } @@ -606,7 +398,7 @@ struct SVEVector { using sve_type = svint16_t; // measured in the number of elements that an SVE register can hold - static inline size_t + static inline uint64_t width() { return svcnth(); } @@ -633,7 +425,7 @@ struct SVEVector { using sve_type = svint32_t; // measured in the number of elements that an SVE register can hold - static inline size_t + static inline uint64_t width() { return svcntw(); } @@ -660,7 +452,7 @@ struct SVEVector { using sve_type = svint64_t; // measured in the number of elements that an SVE register can hold - static inline size_t + static inline uint64_t width() { return svcntd(); } @@ -687,7 +479,7 @@ struct SVEVector { using sve_type = svfloat32_t; // measured in the number of elements that an SVE register can hold - static inline size_t + static inline uint64_t width() { return svcntw(); } @@ -714,7 +506,7 @@ struct SVEVector { using sve_type = svfloat64_t; // measured in the number of elements that an SVE register can hold - static inline size_t + static inline uint64_t width() { return svcntd(); } @@ -737,159 +529,262 @@ struct SVEVector { /////////////////////////////////////////////////////////////////////////// -// an interesting discussion here: -// https://stackoverflow.com/questions/77834169/what-is-a-fast-fallback-algorithm-which-emulates-pdep-and-pext-in-software - -// SVE2 has bitperm, which contains the implementation of pext - -// todo: replace with pext whenever available - -// +// NBYTES is the size of the underlying datatype in bytes. +// So, for example, for i8/u8 use 1, for i64/u64/f64 use 8/ template struct MaskHelper {}; template <> struct MaskHelper<1> { static inline void - write(uint8_t* const __restrict bitmask, - const size_t size, - const svbool_t pred0, - const svbool_t pred1, - const svbool_t pred2, - const svbool_t pred3, - const svbool_t pred4, - const svbool_t pred5, - const svbool_t pred6, - const svbool_t pred7) { - const size_t sve_width = svcntb(); - if (sve_width == 8 * sve_width) { - // perform a full write - *((svbool_t*)(bitmask + 0 * sve_width / 8)) = pred0; - *((svbool_t*)(bitmask + 1 * sve_width / 8)) = pred1; - *((svbool_t*)(bitmask + 2 * sve_width / 8)) = pred2; - *((svbool_t*)(bitmask + 3 * sve_width / 8)) = pred3; - *((svbool_t*)(bitmask + 4 * sve_width / 8)) = pred4; - *((svbool_t*)(bitmask + 5 * sve_width / 8)) = pred5; - *((svbool_t*)(bitmask + 6 * sve_width / 8)) = pred6; - *((svbool_t*)(bitmask + 7 * sve_width / 8)) = pred7; - } else { - // perform a partial write - - // this is the buffer for the maximum possible case of 2048 bits - uint8_t pred_buf[MAX_SVE_WIDTH / 8]; - *((volatile svbool_t*)(pred_buf + 0 * sve_width / 8)) = pred0; - *((volatile svbool_t*)(pred_buf + 1 * sve_width / 8)) = pred1; - *((volatile svbool_t*)(pred_buf + 2 * sve_width / 8)) = pred2; - *((volatile svbool_t*)(pred_buf + 3 * sve_width / 8)) = pred3; - *((volatile svbool_t*)(pred_buf + 4 * sve_width / 8)) = pred4; - *((volatile svbool_t*)(pred_buf + 5 * sve_width / 8)) = pred5; - *((volatile svbool_t*)(pred_buf + 6 * sve_width / 8)) = pred6; - *((volatile svbool_t*)(pred_buf + 7 * sve_width / 8)) = pred7; - - // make the write mask - const svbool_t pred_write = get_pred_op_8(size / 8); - - // load the buffer - const svuint8_t mask_u8 = svld1_u8(pred_write, pred_buf); - // write it to the bitmask - svst1_u8(pred_write, bitmask, mask_u8); - } + write_full(uint8_t* const __restrict bitmask, + const svbool_t pred0, + const svbool_t pred1, + const svbool_t pred2, + const svbool_t pred3, + const svbool_t pred4, + const svbool_t pred5, + const svbool_t pred6, + const svbool_t pred7) { + const uint64_t sve_width = svcntb(); + + // perform a full write + *((svbool_t*)(bitmask + 0 * sve_width / 8)) = pred0; + *((svbool_t*)(bitmask + 1 * sve_width / 8)) = pred1; + *((svbool_t*)(bitmask + 2 * sve_width / 8)) = pred2; + *((svbool_t*)(bitmask + 3 * sve_width / 8)) = pred3; + *((svbool_t*)(bitmask + 4 * sve_width / 8)) = pred4; + *((svbool_t*)(bitmask + 5 * sve_width / 8)) = pred5; + *((svbool_t*)(bitmask + 6 * sve_width / 8)) = pred6; + *((svbool_t*)(bitmask + 7 * sve_width / 8)) = pred7; + } + + static inline void + write_partial(uint8_t* const __restrict bitmask, + const size_t size, + const svbool_t pred_0, + const svbool_t pred_1, + const svbool_t pred_2, + const svbool_t pred_3, + const svbool_t pred_4, + const svbool_t pred_5, + const svbool_t pred_6, + const svbool_t pred_7) { + const uint64_t sve_width = svcntb(); + + // perform a partial write + + // this is a temporary buffer for the maximum possible case of 2048 bits + uint8_t pred_buf[MAX_SVE_WIDTH / 8]; + // write to the temporary buffer + *((volatile svbool_t*)(pred_buf + 0 * sve_width / 8)) = pred_0; + *((volatile svbool_t*)(pred_buf + 1 * sve_width / 8)) = pred_1; + *((volatile svbool_t*)(pred_buf + 2 * sve_width / 8)) = pred_2; + *((volatile svbool_t*)(pred_buf + 3 * sve_width / 8)) = pred_3; + *((volatile svbool_t*)(pred_buf + 4 * sve_width / 8)) = pred_4; + *((volatile svbool_t*)(pred_buf + 5 * sve_width / 8)) = pred_5; + *((volatile svbool_t*)(pred_buf + 6 * sve_width / 8)) = pred_6; + *((volatile svbool_t*)(pred_buf + 7 * sve_width / 8)) = pred_7; + + // make the write mask. (size % 8) == 0 is guaranteed by the caller. + const svbool_t pred_write = + svwhilelt_b8(uint32_t(0), uint32_t(size / 8)); + + // load the buffer + const svuint8_t mask_u8 = svld1_u8(pred_write, pred_buf); + // write it to the bitmask + svst1_u8(pred_write, bitmask, mask_u8); } }; template <> struct MaskHelper<2> { static inline void - write(uint8_t* const __restrict bitmask, - const size_t size, - const svbool_t pred0, - const svbool_t pred1, - const svbool_t pred2, - const svbool_t pred3, - const svbool_t pred4, - const svbool_t pred5, - const svbool_t pred6, - const svbool_t pred7) { - const size_t sve_width = svcnth(); - - // this is the buffer for the maximum possible case of 2048 bits - uint8_t pred_buf[MAX_SVE_WIDTH / 8]; - *((volatile svbool_t*)(pred_buf + 0 * sve_width / 4)) = pred0; - *((volatile svbool_t*)(pred_buf + 1 * sve_width / 4)) = pred1; - *((volatile svbool_t*)(pred_buf + 2 * sve_width / 4)) = pred2; - *((volatile svbool_t*)(pred_buf + 3 * sve_width / 4)) = pred3; - *((volatile svbool_t*)(pred_buf + 4 * sve_width / 4)) = pred4; - *((volatile svbool_t*)(pred_buf + 5 * sve_width / 4)) = pred5; - *((volatile svbool_t*)(pred_buf + 6 * sve_width / 4)) = pred6; - *((volatile svbool_t*)(pred_buf + 7 * sve_width / 4)) = pred7; - - const svbool_t pred_op_8 = get_pred_op_8(size / 4); - const svbool_t pred_write_8 = get_pred_op_8(size / 8); - write_bitmask_16_8x(bitmask, pred_op_8, pred_write_8, pred_buf); + write_full(uint8_t* const __restrict bitmask, + const svbool_t pred_0, + const svbool_t pred_1, + const svbool_t pred_2, + const svbool_t pred_3, + const svbool_t pred_4, + const svbool_t pred_5, + const svbool_t pred_6, + const svbool_t pred_7) { + const uint64_t sve_width = svcntb(); + + // compact predicates + const svbool_t pred_01 = svuzp1_b8(pred_0, pred_1); + const svbool_t pred_23 = svuzp1_b8(pred_2, pred_3); + const svbool_t pred_45 = svuzp1_b8(pred_4, pred_5); + const svbool_t pred_67 = svuzp1_b8(pred_6, pred_7); + + // perform a full write + *((svbool_t*)(bitmask + 0 * sve_width / 8)) = pred_01; + *((svbool_t*)(bitmask + 1 * sve_width / 8)) = pred_23; + *((svbool_t*)(bitmask + 2 * sve_width / 8)) = pred_45; + *((svbool_t*)(bitmask + 3 * sve_width / 8)) = pred_67; + } + + static inline void + write_partial(uint8_t* const __restrict bitmask, + const size_t size, + const svbool_t pred_0, + const svbool_t pred_1, + const svbool_t pred_2, + const svbool_t pred_3, + const svbool_t pred_4, + const svbool_t pred_5, + const svbool_t pred_6, + const svbool_t pred_7) { + const uint64_t sve_width = svcntb(); + + // compact predicates + const svbool_t pred_01 = svuzp1_b8(pred_0, pred_1); + const svbool_t pred_23 = svuzp1_b8(pred_2, pred_3); + const svbool_t pred_45 = svuzp1_b8(pred_4, pred_5); + const svbool_t pred_67 = svuzp1_b8(pred_6, pred_7); + + // this is a temporary buffer for the maximum possible case of 1024 bits + uint8_t pred_buf[MAX_SVE_WIDTH / 16]; + // write to the temporary buffer + *((volatile svbool_t*)(pred_buf + 0 * sve_width / 8)) = pred_01; + *((volatile svbool_t*)(pred_buf + 1 * sve_width / 8)) = pred_23; + *((volatile svbool_t*)(pred_buf + 2 * sve_width / 8)) = pred_45; + *((volatile svbool_t*)(pred_buf + 3 * sve_width / 8)) = pred_67; + + // make the write mask. (size % 8) == 0 is guaranteed by the caller. + const svbool_t pred_write = + svwhilelt_b8(uint32_t(0), uint32_t(size / 8)); + + // load the buffer + const svuint8_t mask_u8 = svld1_u8(pred_write, pred_buf); + // write it to the bitmask + svst1_u8(pred_write, bitmask, mask_u8); } }; template <> struct MaskHelper<4> { static inline void - write(uint8_t* const __restrict bitmask, - const size_t size, - const svbool_t pred0, - const svbool_t pred1, - const svbool_t pred2, - const svbool_t pred3, - const svbool_t pred4, - const svbool_t pred5, - const svbool_t pred6, - const svbool_t pred7) { - const size_t sve_width = svcntw(); - - // this is the buffer for the maximum possible case of 2048 bits - uint8_t pred_buf[MAX_SVE_WIDTH / 8]; - *((volatile svbool_t*)(pred_buf + 0 * sve_width / 2)) = pred0; - *((volatile svbool_t*)(pred_buf + 1 * sve_width / 2)) = pred1; - *((volatile svbool_t*)(pred_buf + 2 * sve_width / 2)) = pred2; - *((volatile svbool_t*)(pred_buf + 3 * sve_width / 2)) = pred3; - *((volatile svbool_t*)(pred_buf + 4 * sve_width / 2)) = pred4; - *((volatile svbool_t*)(pred_buf + 5 * sve_width / 2)) = pred5; - *((volatile svbool_t*)(pred_buf + 6 * sve_width / 2)) = pred6; - *((volatile svbool_t*)(pred_buf + 7 * sve_width / 2)) = pred7; - - const svbool_t pred_op_8 = get_pred_op_8(size / 2); - const svbool_t pred_write_8 = get_pred_op_8(size / 8); - write_bitmask_32_8x(bitmask, pred_op_8, pred_write_8, pred_buf); + write_full(uint8_t* const __restrict bitmask, + const svbool_t pred_0, + const svbool_t pred_1, + const svbool_t pred_2, + const svbool_t pred_3, + const svbool_t pred_4, + const svbool_t pred_5, + const svbool_t pred_6, + const svbool_t pred_7) { + const uint64_t sve_width = svcntb(); + + // compact predicates + const svbool_t pred_01 = svuzp1_b16(pred_0, pred_1); + const svbool_t pred_23 = svuzp1_b16(pred_2, pred_3); + const svbool_t pred_45 = svuzp1_b16(pred_4, pred_5); + const svbool_t pred_67 = svuzp1_b16(pred_6, pred_7); + const svbool_t pred_0123 = svuzp1_b8(pred_01, pred_23); + const svbool_t pred_4567 = svuzp1_b8(pred_45, pred_67); + + // perform a full write + *((svbool_t*)(bitmask + 0 * sve_width / 8)) = pred_0123; + *((svbool_t*)(bitmask + 1 * sve_width / 8)) = pred_4567; + } + + static inline void + write_partial(uint8_t* const __restrict bitmask, + const size_t size, + const svbool_t pred_0, + const svbool_t pred_1, + const svbool_t pred_2, + const svbool_t pred_3, + const svbool_t pred_4, + const svbool_t pred_5, + const svbool_t pred_6, + const svbool_t pred_7) { + const uint64_t sve_width = svcntb(); + + // compact predicates + const svbool_t pred_01 = svuzp1_b16(pred_0, pred_1); + const svbool_t pred_23 = svuzp1_b16(pred_2, pred_3); + const svbool_t pred_45 = svuzp1_b16(pred_4, pred_5); + const svbool_t pred_67 = svuzp1_b16(pred_6, pred_7); + const svbool_t pred_0123 = svuzp1_b8(pred_01, pred_23); + const svbool_t pred_4567 = svuzp1_b8(pred_45, pred_67); + + // this is a temporary buffer for the maximum possible case of 512 bits + uint8_t pred_buf[MAX_SVE_WIDTH / 32]; + // write to the temporary buffer + *((volatile svbool_t*)(pred_buf + 0 * sve_width / 8)) = pred_0123; + *((volatile svbool_t*)(pred_buf + 1 * sve_width / 8)) = pred_4567; + + // make the write mask. (size % 8) == 0 is guaranteed by the caller. + const svbool_t pred_write = + svwhilelt_b8(uint32_t(0), uint32_t(size / 8)); + + // load the buffer + const svuint8_t mask_u8 = svld1_u8(pred_write, pred_buf); + // write it to the bitmask + svst1_u8(pred_write, bitmask, mask_u8); } }; template <> struct MaskHelper<8> { static inline void - write(uint8_t* const __restrict bitmask, - const size_t size, - const svbool_t pred0, - const svbool_t pred1, - const svbool_t pred2, - const svbool_t pred3, - const svbool_t pred4, - const svbool_t pred5, - const svbool_t pred6, - const svbool_t pred7) { - const size_t sve_width = svcntd(); - - // this is the buffer for the maximum possible case of 2048 bits - uint8_t pred_buf[MAX_SVE_WIDTH / 8]; - *((volatile svbool_t*)(pred_buf + 0 * sve_width)) = pred0; - *((volatile svbool_t*)(pred_buf + 1 * sve_width)) = pred1; - *((volatile svbool_t*)(pred_buf + 2 * sve_width)) = pred2; - *((volatile svbool_t*)(pred_buf + 3 * sve_width)) = pred3; - *((volatile svbool_t*)(pred_buf + 4 * sve_width)) = pred4; - *((volatile svbool_t*)(pred_buf + 5 * sve_width)) = pred5; - *((volatile svbool_t*)(pred_buf + 6 * sve_width)) = pred6; - *((volatile svbool_t*)(pred_buf + 7 * sve_width)) = pred7; - - const svbool_t pred_op_8 = get_pred_op_8(size / 1); - const svbool_t pred_write_8 = get_pred_op_8(size / 8); - write_bitmask_64_8x(bitmask, pred_op_8, pred_write_8, pred_buf); + write_full(uint8_t* const __restrict bitmask, + const svbool_t pred_0, + const svbool_t pred_1, + const svbool_t pred_2, + const svbool_t pred_3, + const svbool_t pred_4, + const svbool_t pred_5, + const svbool_t pred_6, + const svbool_t pred_7) { + // compact predicates + const svbool_t pred_01 = svuzp1_b32(pred_0, pred_1); + const svbool_t pred_23 = svuzp1_b32(pred_2, pred_3); + const svbool_t pred_45 = svuzp1_b32(pred_4, pred_5); + const svbool_t pred_67 = svuzp1_b32(pred_6, pred_7); + const svbool_t pred_0123 = svuzp1_b16(pred_01, pred_23); + const svbool_t pred_4567 = svuzp1_b16(pred_45, pred_67); + const svbool_t pred_01234567 = svuzp1_b8(pred_0123, pred_4567); + + // perform a full write + *((svbool_t*)bitmask) = pred_01234567; + } + + static inline void + write_partial(uint8_t* const __restrict bitmask, + const size_t size, + const svbool_t pred_0, + const svbool_t pred_1, + const svbool_t pred_2, + const svbool_t pred_3, + const svbool_t pred_4, + const svbool_t pred_5, + const svbool_t pred_6, + const svbool_t pred_7) { + // compact predicates + const svbool_t pred_01 = svuzp1_b32(pred_0, pred_1); + const svbool_t pred_23 = svuzp1_b32(pred_2, pred_3); + const svbool_t pred_45 = svuzp1_b32(pred_4, pred_5); + const svbool_t pred_67 = svuzp1_b32(pred_6, pred_7); + const svbool_t pred_0123 = svuzp1_b16(pred_01, pred_23); + const svbool_t pred_4567 = svuzp1_b16(pred_45, pred_67); + const svbool_t pred_01234567 = svuzp1_b8(pred_0123, pred_4567); + + // this is a temporary buffer for the maximum possible case of 256 bits + uint8_t pred_buf[MAX_SVE_WIDTH / 64]; + // write to the temporary buffer + *((volatile svbool_t*)(pred_buf)) = pred_01234567; + + // make the write mask. (size % 8) == 0 is guaranteed by the caller. + const svbool_t pred_write = + svwhilelt_b8(uint32_t(0), uint32_t(size / 8)); + + // load the buffer + const svuint8_t mask_u8 = svld1_u8(pred_write, pred_buf); + // write it to the bitmask + svst1_u8(pred_write, bitmask, mask_u8); } }; @@ -924,16 +819,8 @@ op_mask_helper(uint8_t* const __restrict res_u8, const size_t size, Func func) { const svbool_t cmp6 = func(pred_all, i + 6 * sve_width); const svbool_t cmp7 = func(pred_all, i + 7 * sve_width); - MaskHelper::write(res_u8 + i / 8, - sve_width * 8, - cmp0, - cmp1, - cmp2, - cmp3, - cmp4, - cmp5, - cmp6, - cmp7); + MaskHelper::write_full( + res_u8 + i / 8, cmp0, cmp1, cmp2, cmp3, cmp4, cmp5, cmp6, cmp7); } } @@ -985,16 +872,16 @@ op_mask_helper(uint8_t* const __restrict res_u8, const size_t size, Func func) { cmp7 = func(get_partial_pred(7), size_sve8 + 7 * sve_width); } - MaskHelper::write(res_u8 + size_sve8 / 8, - size - size_sve8, - cmp0, - cmp1, - cmp2, - cmp3, - cmp4, - cmp5, - cmp6, - cmp7); + MaskHelper::write_partial(res_u8 + size_sve8 / 8, + size - size_sve8, + cmp0, + cmp1, + cmp2, + cmp3, + cmp4, + cmp5, + cmp6, + cmp7); } return true; diff --git a/internal/core/src/common/Channel.h b/internal/core/src/common/Channel.h index f042945432935..1dead8324791c 100644 --- a/internal/core/src/common/Channel.h +++ b/internal/core/src/common/Channel.h @@ -14,6 +14,7 @@ #include #include #include +#include "Exception.h" namespace milvus { template @@ -55,7 +56,7 @@ class Channel { } void - close(std::optional ex = std::nullopt) { + close(std::optional ex = std::nullopt) { if (ex.has_value()) { ex_ = std::move(ex); } @@ -64,6 +65,6 @@ class Channel { private: oneapi::tbb::concurrent_bounded_queue> inner_{}; - std::optional ex_{}; + std::optional ex_{}; }; } // namespace milvus diff --git a/internal/core/src/common/Consts.h b/internal/core/src/common/Consts.h index 65e6795b16e66..44d7d5559ca81 100644 --- a/internal/core/src/common/Consts.h +++ b/internal/core/src/common/Consts.h @@ -61,3 +61,5 @@ constexpr const char* RANGE_FILTER = knowhere::meta::RANGE_FILTER; const int64_t DEFAULT_MAX_OUTPUT_SIZE = 67108864; // bytes, 64MB const int64_t DEFAULT_CHUNK_MANAGER_REQUEST_TIMEOUT_MS = 10000; + +const int64_t DEFAULT_BITMAP_INDEX_CARDINALITY_BOUND = 500; diff --git a/internal/core/src/common/Exception.h b/internal/core/src/common/Exception.h index 68941ba56716c..d4f3863b9df25 100644 --- a/internal/core/src/common/Exception.h +++ b/internal/core/src/common/Exception.h @@ -20,6 +20,22 @@ namespace milvus { +class MilvusException : public std::exception { + public: + explicit MilvusException(const std::string& msg) + : std::exception(), exception_message_(msg) { + } + const char* + what() const noexcept { + return exception_message_.c_str(); + } + virtual ~MilvusException() { + } + + private: + std::string exception_message_; +}; + class NotImplementedException : public std::exception { public: explicit NotImplementedException(const std::string& msg) diff --git a/internal/core/src/common/QueryResult.h b/internal/core/src/common/QueryResult.h index 9fd2d13d7776b..4cb7fef00e5dd 100644 --- a/internal/core/src/common/QueryResult.h +++ b/internal/core/src/common/QueryResult.h @@ -228,6 +228,7 @@ struct RetrieveResult { void* segment_; std::vector result_offsets_; std::vector field_data_; + bool has_more_result = true; }; using RetrieveResultPtr = std::shared_ptr; diff --git a/internal/core/src/common/Tracer.cpp b/internal/core/src/common/Tracer.cpp index 4711ef76ae3ef..d80dd301215e9 100644 --- a/internal/core/src/common/Tracer.cpp +++ b/internal/core/src/common/Tracer.cpp @@ -55,13 +55,13 @@ initTelemetry(const TraceConfig& cfg) { opts.transport_format = jaeger::TransportFormat::kThriftHttp; opts.endpoint = cfg.jaegerURL; exporter = jaeger::JaegerExporterFactory::Create(opts); - LOG_INFO("init jaeger exporter, endpoint:", opts.endpoint); + LOG_INFO("init jaeger exporter, endpoint: {}", opts.endpoint); } else if (cfg.exporter == "otlp") { auto opts = otlp::OtlpGrpcExporterOptions{}; opts.endpoint = cfg.otlpEndpoint; opts.use_ssl_credentials = cfg.oltpSecure; exporter = otlp::OtlpGrpcExporterFactory::Create(opts); - LOG_INFO("init otlp exporter, endpoint:", opts.endpoint); + LOG_INFO("init otlp exporter, endpoint: {}", opts.endpoint); } else { LOG_INFO("Empty Trace"); enable_trace = false; diff --git a/internal/core/src/config/ConfigKnowhere.h b/internal/core/src/config/ConfigKnowhere.h index eff8be76f9c28..57a0713014d6b 100644 --- a/internal/core/src/config/ConfigKnowhere.h +++ b/internal/core/src/config/ConfigKnowhere.h @@ -15,6 +15,7 @@ // limitations under the License. #pragma once +#include #include namespace milvus::config { diff --git a/internal/core/src/exec/expression/JsonContainsExpr.cpp b/internal/core/src/exec/expression/JsonContainsExpr.cpp index 72251c301fb14..bbcc852c2a8e2 100644 --- a/internal/core/src/exec/expression/JsonContainsExpr.cpp +++ b/internal/core/src/exec/expression/JsonContainsExpr.cpp @@ -23,7 +23,14 @@ namespace exec { void PhyJsonContainsFilterExpr::Eval(EvalCtx& context, VectorPtr& result) { switch (expr_->column_.data_type_) { - case DataType::ARRAY: + case DataType::ARRAY: { + if (is_index_mode_) { + result = EvalArrayContainsForIndexSegment(); + } else { + result = EvalJsonContainsForDataSegment(); + } + break; + } case DataType::JSON: { if (is_index_mode_) { PanicInfo( @@ -94,7 +101,6 @@ PhyJsonContainsFilterExpr::EvalJsonContainsForDataSegment() { return ExecJsonContainsWithDiffType(); } } - break; } case proto::plan::JSONContainsExpr_JSONOp_ContainsAll: { if (IsArrayDataType(data_type)) { @@ -145,7 +151,6 @@ PhyJsonContainsFilterExpr::EvalJsonContainsForDataSegment() { return ExecJsonContainsAllWithDiffType(); } } - break; } default: PanicInfo(ExprInvalid, @@ -748,5 +753,92 @@ PhyJsonContainsFilterExpr::ExecJsonContainsWithDiffType() { return res_vec; } +VectorPtr +PhyJsonContainsFilterExpr::EvalArrayContainsForIndexSegment() { + switch (expr_->column_.element_type_) { + case DataType::BOOL: { + return ExecArrayContainsForIndexSegmentImpl(); + } + case DataType::INT8: { + return ExecArrayContainsForIndexSegmentImpl(); + } + case DataType::INT16: { + return ExecArrayContainsForIndexSegmentImpl(); + } + case DataType::INT32: { + return ExecArrayContainsForIndexSegmentImpl(); + } + case DataType::INT64: { + return ExecArrayContainsForIndexSegmentImpl(); + } + case DataType::FLOAT: { + return ExecArrayContainsForIndexSegmentImpl(); + } + case DataType::DOUBLE: { + return ExecArrayContainsForIndexSegmentImpl(); + } + case DataType::VARCHAR: + case DataType::STRING: { + return ExecArrayContainsForIndexSegmentImpl(); + } + default: + PanicInfo(DataTypeInvalid, + fmt::format("unsupported data type for " + "ExecArrayContainsForIndexSegmentImpl: {}", + expr_->column_.element_type_)); + } +} + +template +VectorPtr +PhyJsonContainsFilterExpr::ExecArrayContainsForIndexSegmentImpl() { + typedef std::conditional_t, + std::string, + ExprValueType> + GetType; + using Index = index::ScalarIndex; + auto real_batch_size = GetNextBatchSize(); + if (real_batch_size == 0) { + return nullptr; + } + + std::unordered_set elements; + for (auto const& element : expr_->vals_) { + elements.insert(GetValueFromProto(element)); + } + boost::container::vector elems(elements.begin(), elements.end()); + auto execute_sub_batch = + [this](Index* index_ptr, + const boost::container::vector& vals) { + switch (expr_->op_) { + case proto::plan::JSONContainsExpr_JSONOp_Contains: + case proto::plan::JSONContainsExpr_JSONOp_ContainsAny: { + return index_ptr->In(vals.size(), vals.data()); + } + case proto::plan::JSONContainsExpr_JSONOp_ContainsAll: { + TargetBitmap result(index_ptr->Count()); + result.set(); + for (size_t i = 0; i < vals.size(); i++) { + auto sub = index_ptr->In(1, &vals[i]); + result &= sub; + } + return result; + } + default: + PanicInfo( + ExprInvalid, + "unsupported array contains type {}", + proto::plan::JSONContainsExpr_JSONOp_Name(expr_->op_)); + } + }; + auto res = ProcessIndexChunks(execute_sub_batch, elems); + AssertInfo(res.size() == real_batch_size, + "internal error: expr processed rows {} not equal " + "expect batch size {}", + res.size(), + real_batch_size); + return std::make_shared(std::move(res)); +} + } //namespace exec } // namespace milvus diff --git a/internal/core/src/exec/expression/JsonContainsExpr.h b/internal/core/src/exec/expression/JsonContainsExpr.h index c757dc0d3fb92..a0cfdfdea0841 100644 --- a/internal/core/src/exec/expression/JsonContainsExpr.h +++ b/internal/core/src/exec/expression/JsonContainsExpr.h @@ -80,6 +80,13 @@ class PhyJsonContainsFilterExpr : public SegmentExpr { VectorPtr ExecJsonContainsWithDiffType(); + VectorPtr + EvalArrayContainsForIndexSegment(); + + template + VectorPtr + ExecArrayContainsForIndexSegmentImpl(); + private: std::shared_ptr expr_; }; diff --git a/internal/core/src/expr/ITypeExpr.h b/internal/core/src/expr/ITypeExpr.h index 102709aa16b83..6716f8af2f66f 100644 --- a/internal/core/src/expr/ITypeExpr.h +++ b/internal/core/src/expr/ITypeExpr.h @@ -113,11 +113,13 @@ IsMaterializedViewSupported(const DataType& data_type) { struct ColumnInfo { FieldId field_id_; DataType data_type_; + DataType element_type_; std::vector nested_path_; ColumnInfo(const proto::plan::ColumnInfo& column_info) : field_id_(column_info.field_id()), data_type_(static_cast(column_info.data_type())), + element_type_(static_cast(column_info.element_type())), nested_path_(column_info.nested_path().begin(), column_info.nested_path().end()) { } @@ -127,6 +129,7 @@ struct ColumnInfo { std::vector nested_path = {}) : field_id_(field_id), data_type_(data_type), + element_type_(DataType::NONE), nested_path_(std::move(nested_path)) { } @@ -140,6 +143,10 @@ struct ColumnInfo { return false; } + if (element_type_ != other.element_type_) { + return false; + } + for (int i = 0; i < nested_path_.size(); ++i) { if (nested_path_[i] != other.nested_path_[i]) { return false; @@ -151,10 +158,12 @@ struct ColumnInfo { std::string ToString() const { - return fmt::format("[FieldId:{}, data_type:{}, nested_path:{}]", - std::to_string(field_id_.get()), - data_type_, - milvus::Join(nested_path_, ",")); + return fmt::format( + "[FieldId:{}, data_type:{}, element_type:{}, nested_path:{}]", + std::to_string(field_id_.get()), + data_type_, + element_type_, + milvus::Join(nested_path_, ",")); } }; diff --git a/internal/core/src/index/BitmapIndex.cpp b/internal/core/src/index/BitmapIndex.cpp index 5d0a4aabec3cd..3e63763dd2b51 100644 --- a/internal/core/src/index/BitmapIndex.cpp +++ b/internal/core/src/index/BitmapIndex.cpp @@ -15,10 +15,12 @@ // limitations under the License. #include +#include #include "index/BitmapIndex.h" #include "common/Slice.h" +#include "common/Common.h" #include "index/Meta.h" #include "index/ScalarIndex.h" #include "index/Utils.h" @@ -105,8 +107,13 @@ BitmapIndex::Build(size_t n, const T* data) { } total_num_rows_ = n; - for (auto it = data_.begin(); it != data_.end(); ++it) { - bitsets_[it->first] = ConvertRoaringToBitset(it->second); + if (data_.size() < DEFAULT_BITMAP_INDEX_CARDINALITY_BOUND) { + for (auto it = data_.begin(); it != data_.end(); ++it) { + bitsets_[it->first] = ConvertRoaringToBitset(it->second); + } + build_mode_ = BitmapIndexBuildMode::BITSET; + } else { + build_mode_ = BitmapIndexBuildMode::ROARING; } is_built_ = true; @@ -134,6 +141,13 @@ BitmapIndex::BuildV2(const Config& config) { field_datas.push_back(field_data); } + BuildWithFieldData(field_datas); +} + +template +void +BitmapIndex::BuildWithFieldData( + const std::vector& field_datas) { int total_num_rows = 0; for (auto& field_data : field_datas) { total_num_rows += field_data->get_num_rows(); @@ -142,7 +156,6 @@ BitmapIndex::BuildV2(const Config& config) { throw SegcoreError(DataIsEmpty, "scalar bitmap index can not build null values"); } - total_num_rows_ = total_num_rows; int64_t offset = 0; @@ -154,6 +167,7 @@ BitmapIndex::BuildV2(const Config& config) { offset++; } } + is_built_ = true; } @@ -190,6 +204,22 @@ BitmapIndex::SerializeIndexData(uint8_t* data_ptr) { } } +template +std::pair, size_t> +BitmapIndex::SerializeIndexMeta() { + YAML::Node node; + node[BITMAP_INDEX_LENGTH] = data_.size(); + node[BITMAP_INDEX_NUM_ROWS] = total_num_rows_; + + std::stringstream ss; + ss << node; + auto json_string = ss.str(); + auto str_size = json_string.size(); + std::shared_ptr res(new uint8_t[str_size]); + memcpy(res.get(), json_string.data(), str_size); + return std::make_pair(res, str_size); +} + template <> void BitmapIndex::SerializeIndexData(uint8_t* data_ptr) { @@ -217,21 +247,17 @@ BitmapIndex::Serialize(const Config& config) { uint8_t* data_ptr = index_data.get(); SerializeIndexData(data_ptr); - std::shared_ptr index_length(new uint8_t[sizeof(size_t)]); - auto index_size = data_.size(); - memcpy(index_length.get(), &index_size, sizeof(size_t)); - - std::shared_ptr num_rows(new uint8_t[sizeof(size_t)]); - memcpy(num_rows.get(), &total_num_rows_, sizeof(size_t)); + auto index_meta = SerializeIndexMeta(); BinarySet ret_set; ret_set.Append(BITMAP_INDEX_DATA, index_data, index_data_size); - ret_set.Append(BITMAP_INDEX_LENGTH, index_length, sizeof(size_t)); - ret_set.Append(BITMAP_INDEX_NUM_ROWS, num_rows, sizeof(size_t)); + ret_set.Append(BITMAP_INDEX_META, index_meta.first, index_meta.second); LOG_INFO("build bitmap index with cardinality = {}, num_rows = {}", - index_size, + Cardinality(), total_num_rows_); + + Disassemble(ret_set); return ret_set; } @@ -283,6 +309,29 @@ BitmapIndex::ConvertRoaringToBitset(const roaring::Roaring& values) { return res; } +template +std::pair +BitmapIndex::DeserializeIndexMeta(const uint8_t* data_ptr, + size_t data_size) { + YAML::Node node = YAML::Load( + std::string(reinterpret_cast(data_ptr), data_size)); + + auto index_length = node[BITMAP_INDEX_LENGTH].as(); + auto index_num_rows = node[BITMAP_INDEX_NUM_ROWS].as(); + + return std::make_pair(index_length, index_num_rows); +} + +template +void +BitmapIndex::ChooseIndexBuildMode() { + if (data_.size() <= DEFAULT_BITMAP_INDEX_CARDINALITY_BOUND) { + build_mode_ = BitmapIndexBuildMode::BITSET; + } else { + build_mode_ = BitmapIndexBuildMode::ROARING; + } +} + template void BitmapIndex::DeserializeIndexData(const uint8_t* data_ptr, @@ -296,7 +345,12 @@ BitmapIndex::DeserializeIndexData(const uint8_t* data_ptr, value = roaring::Roaring::read(reinterpret_cast(data_ptr)); data_ptr += value.getSizeInBytes(); - bitsets_[key] = ConvertRoaringToBitset(value); + ChooseIndexBuildMode(); + + if (build_mode_ == BitmapIndexBuildMode::BITSET) { + bitsets_[key] = ConvertRoaringToBitset(value); + data_.erase(key); + } } } @@ -324,21 +378,14 @@ template void BitmapIndex::LoadWithoutAssemble(const BinarySet& binary_set, const Config& config) { - size_t index_length; - auto index_length_buffer = binary_set.GetByName(BITMAP_INDEX_LENGTH); - memcpy(&index_length, - index_length_buffer->data.get(), - (size_t)index_length_buffer->size); - - auto num_rows_buffer = binary_set.GetByName(BITMAP_INDEX_NUM_ROWS); - memcpy(&total_num_rows_, - num_rows_buffer->data.get(), - (size_t)num_rows_buffer->size); + auto index_meta_buffer = binary_set.GetByName(BITMAP_INDEX_META); + auto index_meta = DeserializeIndexMeta(index_meta_buffer->data.get(), + index_meta_buffer->size); + auto index_length = index_meta.first; + total_num_rows_ = index_meta.second; auto index_data_buffer = binary_set.GetByName(BITMAP_INDEX_DATA); - const uint8_t* data_ptr = index_data_buffer->data.get(); - - DeserializeIndexData(data_ptr, index_length); + DeserializeIndexData(index_data_buffer->data.get(), index_length); LOG_INFO("load bitmap index with cardinality = {}, num_rows = {}", Cardinality(), @@ -416,26 +463,24 @@ BitmapIndex::In(const size_t n, const T* values) { AssertInfo(is_built_, "index has not been built"); TargetBitmap res(total_num_rows_, false); -#if 0 - roaring::Roaring result; - for (size_t i = 0; i < n; ++i) { - auto val = values[i]; - auto it = data_.find(val); - if (it != data_.end()) { - result |= it->second; + if (build_mode_ == BitmapIndexBuildMode::ROARING) { + for (size_t i = 0; i < n; ++i) { + auto val = values[i]; + auto it = data_.find(val); + if (it != data_.end()) { + for (const auto& v : it->second) { + res.set(v); + } + } } - } - for (auto& val : result) { - res.set(val); - } -#else - for (size_t i = 0; i < n; ++i) { - auto val = values[i]; - if (bitsets_.find(val) != bitsets_.end()) { - res |= bitsets_.at(val); + } else { + for (size_t i = 0; i < n; ++i) { + auto val = values[i]; + if (bitsets_.find(val) != bitsets_.end()) { + res |= bitsets_.at(val); + } } } -#endif return res; } @@ -443,36 +488,35 @@ template const TargetBitmap BitmapIndex::NotIn(const size_t n, const T* values) { AssertInfo(is_built_, "index has not been built"); - TargetBitmap res(total_num_rows_, false); -#if 0 - roaring::Roaring result; - for (int i = 0; i < n; ++i) { - auto val = values[i]; - auto it = data_.find(val); - if (it != data_.end()) { - result |= it->second; + if (build_mode_ == BitmapIndexBuildMode::ROARING) { + TargetBitmap res(total_num_rows_, true); + for (int i = 0; i < n; ++i) { + auto val = values[i]; + auto it = data_.find(val); + if (it != data_.end()) { + for (const auto& v : it->second) { + res.reset(v); + } + } } - } - - for (auto& val : result) { - bitset.reset(val); - } -#else - for (size_t i = 0; i < n; ++i) { - auto val = values[i]; - if (bitsets_.find(val) != bitsets_.end()) { - res |= bitsets_.at(val); + return res; + } else { + TargetBitmap res(total_num_rows_, false); + for (size_t i = 0; i < n; ++i) { + auto val = values[i]; + if (bitsets_.find(val) != bitsets_.end()) { + res |= bitsets_.at(val); + } } + res.flip(); + return res; } -#endif - res.flip(); - return res; } template -const TargetBitmap -BitmapIndex::Range(const T value, const OpType op) { +TargetBitmap +BitmapIndex::RangeForBitset(const T value, const OpType op) { AssertInfo(is_built_, "index has not been built"); TargetBitmap res(total_num_rows_, false); if (ShouldSkip(value, value, op)) { @@ -532,10 +576,82 @@ BitmapIndex::Range(const T value, const OpType op) { template const TargetBitmap -BitmapIndex::Range(const T lower_value, - bool lb_inclusive, - const T upper_value, - bool ub_inclusive) { +BitmapIndex::Range(const T value, OpType op) { + if (build_mode_ == BitmapIndexBuildMode::ROARING) { + return std::move(RangeForRoaring(value, op)); + } else { + return std::move(RangeForBitset(value, op)); + } +} + +template +TargetBitmap +BitmapIndex::RangeForRoaring(const T value, const OpType op) { + AssertInfo(is_built_, "index has not been built"); + TargetBitmap res(total_num_rows_, false); + if (ShouldSkip(value, value, op)) { + return res; + } + auto lb = data_.begin(); + auto ub = data_.end(); + + switch (op) { + case OpType::LessThan: { + ub = std::lower_bound(data_.begin(), + data_.end(), + std::make_pair(value, TargetBitmap()), + [](const auto& lhs, const auto& rhs) { + return lhs.first < rhs.first; + }); + break; + } + case OpType::LessEqual: { + ub = std::upper_bound(data_.begin(), + data_.end(), + std::make_pair(value, TargetBitmap()), + [](const auto& lhs, const auto& rhs) { + return lhs.first < rhs.first; + }); + break; + } + case OpType::GreaterThan: { + lb = std::upper_bound(data_.begin(), + data_.end(), + std::make_pair(value, TargetBitmap()), + [](const auto& lhs, const auto& rhs) { + return lhs.first < rhs.first; + }); + break; + } + case OpType::GreaterEqual: { + lb = std::lower_bound(data_.begin(), + data_.end(), + std::make_pair(value, TargetBitmap()), + [](const auto& lhs, const auto& rhs) { + return lhs.first < rhs.first; + }); + break; + } + default: { + throw SegcoreError(OpTypeInvalid, + fmt::format("Invalid OperatorType: {}", op)); + } + } + + for (; lb != ub; lb++) { + for (const auto& v : lb->second) { + res.set(v); + } + } + return res; +} + +template +TargetBitmap +BitmapIndex::RangeForBitset(const T lower_value, + bool lb_inclusive, + const T upper_value, + bool ub_inclusive) { AssertInfo(is_built_, "index has not been built"); TargetBitmap res(total_num_rows_, false); if (lower_value > upper_value || @@ -587,15 +703,99 @@ BitmapIndex::Range(const T lower_value, return res; } +template +const TargetBitmap +BitmapIndex::Range(const T lower_value, + bool lb_inclusive, + const T upper_value, + bool ub_inclusive) { + if (build_mode_ == BitmapIndexBuildMode::ROARING) { + return RangeForRoaring( + lower_value, lb_inclusive, upper_value, ub_inclusive); + } else { + return RangeForBitset( + lower_value, lb_inclusive, upper_value, ub_inclusive); + } +} + +template +TargetBitmap +BitmapIndex::RangeForRoaring(const T lower_value, + bool lb_inclusive, + const T upper_value, + bool ub_inclusive) { + AssertInfo(is_built_, "index has not been built"); + TargetBitmap res(total_num_rows_, false); + if (lower_value > upper_value || + (lower_value == upper_value && !(lb_inclusive && ub_inclusive))) { + return res; + } + if (ShouldSkip(lower_value, upper_value, OpType::Range)) { + return res; + } + + auto lb = data_.begin(); + auto ub = data_.end(); + + if (lb_inclusive) { + lb = std::lower_bound(data_.begin(), + data_.end(), + std::make_pair(lower_value, TargetBitmap()), + [](const auto& lhs, const auto& rhs) { + return lhs.first < rhs.first; + }); + } else { + lb = std::upper_bound(data_.begin(), + data_.end(), + std::make_pair(lower_value, TargetBitmap()), + [](const auto& lhs, const auto& rhs) { + return lhs.first < rhs.first; + }); + } + + if (ub_inclusive) { + ub = std::upper_bound(data_.begin(), + data_.end(), + std::make_pair(upper_value, TargetBitmap()), + [](const auto& lhs, const auto& rhs) { + return lhs.first < rhs.first; + }); + } else { + ub = std::lower_bound(data_.begin(), + data_.end(), + std::make_pair(upper_value, TargetBitmap()), + [](const auto& lhs, const auto& rhs) { + return lhs.first < rhs.first; + }); + } + + for (; lb != ub; lb++) { + for (const auto& v : lb->second) { + res.set(v); + } + } + return res; +} + template T BitmapIndex::Reverse_Lookup(size_t idx) const { AssertInfo(is_built_, "index has not been built"); AssertInfo(idx < total_num_rows_, "out of range of total coun"); - for (auto it = bitsets_.begin(); it != bitsets_.end(); it++) { - if (it->second[idx]) { - return it->first; + if (build_mode_ == BitmapIndexBuildMode::ROARING) { + for (auto it = data_.begin(); it != data_.end(); it++) { + for (const auto& v : it->second) { + if (v == idx) { + return it->first; + } + } + } + } else { + for (auto it = bitsets_.begin(); it != bitsets_.end(); it++) { + if (it->second[idx]) { + return it->first; + } } } throw SegcoreError( @@ -610,9 +810,7 @@ bool BitmapIndex::ShouldSkip(const T lower_value, const T upper_value, const OpType op) { - if (!bitsets_.empty()) { - auto lower_bound = bitsets_.begin()->first; - auto upper_bound = bitsets_.rbegin()->first; + auto skip = [&](OpType op, T lower_bound, T upper_bound) -> bool { bool should_skip = false; switch (op) { case OpType::LessThan: { @@ -649,6 +847,22 @@ BitmapIndex::ShouldSkip(const T lower_value, op)); } return should_skip; + }; + + if (build_mode_ == BitmapIndexBuildMode::ROARING) { + if (!data_.empty()) { + auto lower_bound = data_.begin()->first; + auto upper_bound = data_.rbegin()->first; + bool should_skip = skip(op, lower_bound, upper_bound); + return should_skip; + } + } else { + if (!bitsets_.empty()) { + auto lower_bound = bitsets_.begin()->first; + auto upper_bound = bitsets_.rbegin()->first; + bool should_skip = skip(op, lower_bound, upper_bound); + return should_skip; + } } return true; } diff --git a/internal/core/src/index/BitmapIndex.h b/internal/core/src/index/BitmapIndex.h index 38ea6004495ff..2ead42d5de545 100644 --- a/internal/core/src/index/BitmapIndex.h +++ b/internal/core/src/index/BitmapIndex.h @@ -30,6 +30,11 @@ namespace milvus { namespace index { +enum class BitmapIndexBuildMode { + ROARING, + BITSET, +}; + /* * @brief Implementation of Bitmap Index * @details This index only for scalar Integral type. @@ -45,6 +50,17 @@ class BitmapIndex : public ScalarIndex { const storage::FileManagerContext& file_manager_context, std::shared_ptr space); + explicit BitmapIndex( + const std::shared_ptr& file_manager) + : file_manager_(file_manager) { + } + + explicit BitmapIndex( + const std::shared_ptr& file_manager, + std::shared_ptr space) + : file_manager_(file_manager), space_(space) { + } + ~BitmapIndex() override = default; BinarySet @@ -61,7 +77,7 @@ class BitmapIndex : public ScalarIndex { int64_t Count() override { - return bitsets_.begin()->second.size(); + return total_num_rows_; } void @@ -70,6 +86,9 @@ class BitmapIndex : public ScalarIndex { void Build(const Config& config = {}) override; + void + BuildWithFieldData(const std::vector& datas) override; + void BuildV2(const Config& config = {}) override; @@ -108,9 +127,17 @@ class BitmapIndex : public ScalarIndex { int64_t Cardinality() { - return bitsets_.size(); + if (build_mode_ == BitmapIndexBuildMode::ROARING) { + return data_.size(); + } else { + return bitsets_.size(); + } } + void + LoadWithoutAssemble(const BinarySet& binary_set, + const Config& config) override; + private: size_t GetIndexDataSize(); @@ -118,24 +145,49 @@ class BitmapIndex : public ScalarIndex { void SerializeIndexData(uint8_t* index_data_ptr); + std::pair, size_t> + SerializeIndexMeta(); + + std::pair + DeserializeIndexMeta(const uint8_t* data_ptr, size_t data_size); + void DeserializeIndexData(const uint8_t* data_ptr, size_t index_length); + void + ChooseIndexBuildMode(); + bool ShouldSkip(const T lower_value, const T upper_value, const OpType op); TargetBitmap ConvertRoaringToBitset(const roaring::Roaring& values); - void - LoadWithoutAssemble(const BinarySet& binary_set, const Config& config); + TargetBitmap + RangeForRoaring(T value, OpType op); - private: - bool is_built_; + TargetBitmap + RangeForBitset(T value, OpType op); + + TargetBitmap + RangeForRoaring(T lower_bound_value, + bool lb_inclusive, + T upper_bound_value, + bool ub_inclusive); + + TargetBitmap + RangeForBitset(T lower_bound_value, + bool lb_inclusive, + T upper_bound_value, + bool ub_inclusive); + + public: + bool is_built_{false}; Config config_; + BitmapIndexBuildMode build_mode_; std::map data_; std::map bitsets_; - size_t total_num_rows_; + size_t total_num_rows_{0}; std::shared_ptr file_manager_; std::shared_ptr space_; }; diff --git a/internal/core/src/index/CMakeLists.txt b/internal/core/src/index/CMakeLists.txt index ed0f600587bd2..3256ab63a08c7 100644 --- a/internal/core/src/index/CMakeLists.txt +++ b/internal/core/src/index/CMakeLists.txt @@ -20,6 +20,7 @@ set(INDEX_FILES SkipIndex.cpp InvertedIndexTantivy.cpp BitmapIndex.cpp + HybridScalarIndex.cpp ) milvus_add_pkg_config("milvus_index") diff --git a/internal/core/src/index/HybridScalarIndex.cpp b/internal/core/src/index/HybridScalarIndex.cpp new file mode 100644 index 0000000000000..518828ea7bac7 --- /dev/null +++ b/internal/core/src/index/HybridScalarIndex.cpp @@ -0,0 +1,402 @@ +// Licensed to the LF AI & Data foundation under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include "index/HybridScalarIndex.h" +#include "common/Slice.h" +#include "common/Common.h" +#include "index/Meta.h" +#include "index/ScalarIndex.h" +#include "index/Utils.h" +#include "storage/Util.h" +#include "storage/space.h" + +namespace milvus { +namespace index { + +template +HybridScalarIndex::HybridScalarIndex( + const storage::FileManagerContext& file_manager_context) + : is_built_(false), + bitmap_index_cardinality_limit_(DEFAULT_BITMAP_INDEX_CARDINALITY_BOUND) { + if (file_manager_context.Valid()) { + file_manager_ = + std::make_shared(file_manager_context); + AssertInfo(file_manager_ != nullptr, "create file manager failed!"); + } + internal_index_type_ = InternalIndexType::NONE; +} + +template +HybridScalarIndex::HybridScalarIndex( + const storage::FileManagerContext& file_manager_context, + std::shared_ptr space) + : is_built_(false), + bitmap_index_cardinality_limit_(DEFAULT_BITMAP_INDEX_CARDINALITY_BOUND), + space_(space) { + if (file_manager_context.Valid()) { + file_manager_ = std::make_shared( + file_manager_context, space); + AssertInfo(file_manager_ != nullptr, "create file manager failed!"); + } + internal_index_type_ = InternalIndexType::NONE; +} + +template +InternalIndexType +HybridScalarIndex::SelectIndexBuildType(size_t n, const T* values) { + std::set distinct_vals; + for (size_t i = 0; i < n; i++) { + distinct_vals.insert(values[i]); + } + + // Decide whether to select bitmap index or stl sort + if (distinct_vals.size() >= bitmap_index_cardinality_limit_) { + internal_index_type_ = InternalIndexType::STLSORT; + } else { + internal_index_type_ = InternalIndexType::BITMAP; + } + return internal_index_type_; +} + +template <> +InternalIndexType +HybridScalarIndex::SelectIndexBuildType( + size_t n, const std::string* values) { + std::set distinct_vals; + for (size_t i = 0; i < n; i++) { + distinct_vals.insert(values[i]); + if (distinct_vals.size() >= bitmap_index_cardinality_limit_) { + break; + } + } + + // Decide whether to select bitmap index or marisa index + if (distinct_vals.size() >= bitmap_index_cardinality_limit_) { + internal_index_type_ = InternalIndexType::MARISA; + } else { + internal_index_type_ = InternalIndexType::BITMAP; + } + return internal_index_type_; +} + +template +InternalIndexType +HybridScalarIndex::SelectIndexBuildType( + const std::vector& field_datas) { + std::set distinct_vals; + for (const auto& data : field_datas) { + auto slice_row_num = data->get_num_rows(); + for (size_t i = 0; i < slice_row_num; ++i) { + auto val = reinterpret_cast(data->RawValue(i)); + distinct_vals.insert(*val); + if (distinct_vals.size() >= bitmap_index_cardinality_limit_) { + break; + } + } + } + + // Decide whether to select bitmap index or stl sort + if (distinct_vals.size() >= bitmap_index_cardinality_limit_) { + internal_index_type_ = InternalIndexType::STLSORT; + } else { + internal_index_type_ = InternalIndexType::BITMAP; + } + return internal_index_type_; +} + +template <> +InternalIndexType +HybridScalarIndex::SelectIndexBuildType( + const std::vector& field_datas) { + std::set distinct_vals; + for (const auto& data : field_datas) { + auto slice_row_num = data->get_num_rows(); + for (size_t i = 0; i < slice_row_num; ++i) { + auto val = reinterpret_cast(data->RawValue(i)); + distinct_vals.insert(*val); + if (distinct_vals.size() >= bitmap_index_cardinality_limit_) { + break; + } + } + } + + // Decide whether to select bitmap index or marisa sort + if (distinct_vals.size() >= bitmap_index_cardinality_limit_) { + internal_index_type_ = InternalIndexType::MARISA; + } else { + internal_index_type_ = InternalIndexType::BITMAP; + } + return internal_index_type_; +} + +template +std::shared_ptr> +HybridScalarIndex::GetInternalIndex() { + if (internal_index_ != nullptr) { + return internal_index_; + } + if (internal_index_type_ == InternalIndexType::BITMAP) { + internal_index_ = std::make_shared>(file_manager_); + } else if (internal_index_type_ == InternalIndexType::STLSORT) { + internal_index_ = std::make_shared>(file_manager_); + } else { + PanicInfo(UnexpectedError, + "unknown index type when get internal index"); + } + return internal_index_; +} + +template <> +std::shared_ptr> +HybridScalarIndex::GetInternalIndex() { + if (internal_index_ != nullptr) { + return internal_index_; + } + + if (internal_index_type_ == InternalIndexType::BITMAP) { + internal_index_ = + std::make_shared>(file_manager_); + } else if (internal_index_type_ == InternalIndexType::MARISA) { + internal_index_ = std::make_shared(file_manager_); + } else { + PanicInfo(UnexpectedError, + "unknown index type when get internal index"); + } + return internal_index_; +} + +template +void +HybridScalarIndex::BuildInternal( + const std::vector& field_datas) { + auto index = GetInternalIndex(); + index->BuildWithFieldData(field_datas); +} + +template +void +HybridScalarIndex::Build(const Config& config) { + if (is_built_) { + return; + } + + bitmap_index_cardinality_limit_ = + GetBitmapCardinalityLimitFromConfig(config); + LOG_INFO("config bitmap cardinality limit to {}", + bitmap_index_cardinality_limit_); + + auto insert_files = + GetValueFromConfig>(config, "insert_files"); + AssertInfo(insert_files.has_value(), + "insert file paths is empty when build index"); + + auto field_datas = + file_manager_->CacheRawDataToMemory(insert_files.value()); + + SelectIndexBuildType(field_datas); + BuildInternal(field_datas); + is_built_ = true; +} + +template +void +HybridScalarIndex::BuildV2(const Config& config) { + if (is_built_) { + return; + } + bitmap_index_cardinality_limit_ = + GetBitmapCardinalityLimitFromConfig(config); + LOG_INFO("config bitmap cardinality limit to {}", + bitmap_index_cardinality_limit_); + + auto field_name = file_manager_->GetIndexMeta().field_name; + auto reader = space_->ScanData(); + std::vector field_datas; + for (auto rec = reader->Next(); rec != nullptr; rec = reader->Next()) { + if (!rec.ok()) { + PanicInfo(DataFormatBroken, "failed to read data"); + } + auto data = rec.ValueUnsafe(); + auto total_num_rows = data->num_rows(); + auto col_data = data->GetColumnByName(field_name); + auto field_data = storage::CreateFieldData( + DataType(GetDType()), 0, total_num_rows); + field_data->FillFieldData(col_data); + field_datas.push_back(field_data); + } + + SelectIndexBuildType(field_datas); + BuildInternal(field_datas); + is_built_ = true; +} + +template +BinarySet +HybridScalarIndex::Serialize(const Config& config) { + AssertInfo(is_built_, "index has not been built yet"); + + auto ret_set = internal_index_->Serialize(config); + + // Add index type info to storage for future restruct index + std::shared_ptr index_type_buf(new uint8_t[sizeof(uint8_t)]); + index_type_buf[0] = static_cast(internal_index_type_); + ret_set.Append(INDEX_TYPE, index_type_buf, sizeof(uint8_t)); + + return ret_set; +} + +template +BinarySet +HybridScalarIndex::Upload(const Config& config) { + auto binary_set = Serialize(config); + file_manager_->AddFile(binary_set); + + auto remote_paths_to_size = file_manager_->GetRemotePathsToFileSize(); + BinarySet ret; + for (auto& file : remote_paths_to_size) { + ret.Append(file.first, nullptr, file.second); + } + + return ret; +} + +template +BinarySet +HybridScalarIndex::UploadV2(const Config& config) { + auto binary_set = Serialize(config); + file_manager_->AddFileV2(binary_set); + + auto remote_paths_to_size = file_manager_->GetRemotePathsToFileSize(); + BinarySet ret; + for (auto& file : remote_paths_to_size) { + ret.Append(file.first, nullptr, file.second); + } + + return ret; +} + +template +void +HybridScalarIndex::DeserializeIndexType(const BinarySet& binary_set) { + uint8_t index_type; + auto index_type_buffer = binary_set.GetByName(INDEX_TYPE); + memcpy(&index_type, index_type_buffer->data.get(), index_type_buffer->size); + internal_index_type_ = static_cast(index_type); +} + +template +void +HybridScalarIndex::LoadInternal(const BinarySet& binary_set, + const Config& config) { + auto index = GetInternalIndex(); + index->LoadWithoutAssemble(binary_set, config); +} + +template +void +HybridScalarIndex::Load(const BinarySet& binary_set, const Config& config) { + milvus::Assemble(const_cast(binary_set)); + DeserializeIndexType(binary_set); + + LoadInternal(binary_set, config); + is_built_ = true; +} + +template +void +HybridScalarIndex::LoadV2(const Config& config) { + auto blobs = space_->StatisticsBlobs(); + std::vector index_files; + auto prefix = file_manager_->GetRemoteIndexObjectPrefixV2(); + for (auto& b : blobs) { + if (b.name.rfind(prefix, 0) == 0) { + index_files.push_back(b.name); + } + } + std::map index_datas{}; + for (auto& file_name : index_files) { + auto res = space_->GetBlobByteSize(file_name); + if (!res.ok()) { + PanicInfo(S3Error, "unable to read index blob"); + } + auto index_blob_data = + std::shared_ptr(new uint8_t[res.value()]); + auto status = space_->ReadBlob(file_name, index_blob_data.get()); + if (!status.ok()) { + PanicInfo(S3Error, "unable to read index blob"); + } + auto raw_index_blob = + storage::DeserializeFileData(index_blob_data, res.value()); + auto key = file_name.substr(file_name.find_last_of('/') + 1); + index_datas[key] = raw_index_blob->GetFieldData(); + } + AssembleIndexDatas(index_datas); + + BinarySet binary_set; + for (auto& [key, data] : index_datas) { + auto size = data->Size(); + auto deleter = [&](uint8_t*) {}; // avoid repeated deconstruction + auto buf = std::shared_ptr( + (uint8_t*)const_cast(data->Data()), deleter); + binary_set.Append(key, buf, size); + } + + DeserializeIndexType(binary_set); + + LoadInternal(binary_set, config); + + is_built_ = true; +} + +template +void +HybridScalarIndex::Load(milvus::tracer::TraceContext ctx, + const Config& config) { + auto index_files = + GetValueFromConfig>(config, "index_files"); + AssertInfo(index_files.has_value(), + "index file paths is empty when load bitmap index"); + auto index_datas = file_manager_->LoadIndexToMemory(index_files.value()); + AssembleIndexDatas(index_datas); + BinarySet binary_set; + for (auto& [key, data] : index_datas) { + auto size = data->Size(); + auto deleter = [&](uint8_t*) {}; // avoid repeated deconstruction + auto buf = std::shared_ptr( + (uint8_t*)const_cast(data->Data()), deleter); + binary_set.Append(key, buf, size); + } + + DeserializeIndexType(binary_set); + + LoadInternal(binary_set, config); + + is_built_ = true; +} + +template class HybridScalarIndex; +template class HybridScalarIndex; +template class HybridScalarIndex; +template class HybridScalarIndex; +template class HybridScalarIndex; +template class HybridScalarIndex; +template class HybridScalarIndex; +template class HybridScalarIndex; + +} // namespace index +} // namespace milvus \ No newline at end of file diff --git a/internal/core/src/index/HybridScalarIndex.h b/internal/core/src/index/HybridScalarIndex.h new file mode 100644 index 0000000000000..c3c44630bf846 --- /dev/null +++ b/internal/core/src/index/HybridScalarIndex.h @@ -0,0 +1,166 @@ +// Licensed to the LF AI & Data foundation under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include + +#include "index/ScalarIndex.h" +#include "index/BitmapIndex.h" +#include "index/ScalarIndexSort.h" +#include "index/StringIndexMarisa.h" +#include "storage/FileManager.h" +#include "storage/DiskFileManagerImpl.h" +#include "storage/MemFileManagerImpl.h" +#include "storage/space.h" + +namespace milvus { +namespace index { + +enum class InternalIndexType { + NONE = 0, + BITMAP, + STLSORT, + MARISA, +}; + +/* +* @brief Implementation of hybrid index +* @details This index only for scalar type. +* dynamically choose bitmap/stlsort/marisa type index +* according to data distribution +*/ +template +class HybridScalarIndex : public ScalarIndex { + public: + explicit HybridScalarIndex( + const storage::FileManagerContext& file_manager_context = + storage::FileManagerContext()); + + explicit HybridScalarIndex( + const storage::FileManagerContext& file_manager_context, + std::shared_ptr space); + + ~HybridScalarIndex() override = default; + + BinarySet + Serialize(const Config& config) override; + + void + Load(const BinarySet& index_binary, const Config& config = {}) override; + + void + Load(milvus::tracer::TraceContext ctx, const Config& config = {}) override; + + void + LoadV2(const Config& config = {}) override; + + int64_t + Count() override { + return internal_index_->Count(); + } + + void + Build(size_t n, const T* values) override { + SelectIndexBuildType(n, values); + auto index = GetInternalIndex(); + index->Build(n, values); + is_built_ = true; + } + + void + Build(const Config& config = {}) override; + + void + BuildV2(const Config& config = {}) override; + + const TargetBitmap + In(size_t n, const T* values) override { + return internal_index_->In(n, values); + } + + const TargetBitmap + NotIn(size_t n, const T* values) override { + return internal_index_->NotIn(n, values); + } + + const TargetBitmap + Range(T value, OpType op) override { + return internal_index_->Range(value, op); + } + + const TargetBitmap + Range(T lower_bound_value, + bool lb_inclusive, + T upper_bound_value, + bool ub_inclusive) override { + return internal_index_->Range( + lower_bound_value, lb_inclusive, upper_bound_value, ub_inclusive); + } + + T + Reverse_Lookup(size_t offset) const override { + return internal_index_->Reverse_Lookup(offset); + } + + int64_t + Size() override { + return internal_index_->Size(); + } + + const bool + HasRawData() const override { + return internal_index_->HasRawData(); + } + + BinarySet + Upload(const Config& config = {}) override; + + BinarySet + UploadV2(const Config& config = {}) override; + + private: + InternalIndexType + SelectIndexBuildType(const std::vector& field_datas); + + InternalIndexType + SelectIndexBuildType(size_t n, const T* values); + + void + DeserializeIndexType(const BinarySet& binary_set); + + void + BuildInternal(const std::vector& field_datas); + + void + LoadInternal(const BinarySet& binary_set, const Config& config); + + std::shared_ptr> + GetInternalIndex(); + + public: + bool is_built_{false}; + int32_t bitmap_index_cardinality_limit_; + InternalIndexType internal_index_type_; + std::shared_ptr> internal_index_{nullptr}; + std::shared_ptr file_manager_{nullptr}; + std::shared_ptr space_{nullptr}; +}; + +} // namespace index +} // namespace milvus \ No newline at end of file diff --git a/internal/core/src/index/Index.h b/internal/core/src/index/Index.h index 9381ee74ada26..7567bf63e3c4e 100644 --- a/internal/core/src/index/Index.h +++ b/internal/core/src/index/Index.h @@ -18,6 +18,7 @@ #include #include +#include "common/FieldData.h" #include "common/EasyAssert.h" #include "knowhere/comp/index_param.h" #include "knowhere/dataset.h" @@ -81,7 +82,10 @@ class IndexBase { index_type_ == knowhere::IndexEnum::INDEX_FAISS_IVFSQ8 || index_type_ == knowhere::IndexEnum::INDEX_FAISS_BIN_IVFFLAT || index_type_ == knowhere::IndexEnum::INDEX_FAISS_IDMAP || - index_type_ == knowhere::IndexEnum::INDEX_FAISS_BIN_IDMAP; + index_type_ == knowhere::IndexEnum::INDEX_FAISS_BIN_IDMAP || + index_type_ == + knowhere::IndexEnum::INDEX_SPARSE_INVERTED_INDEX || + index_type_ == knowhere::IndexEnum::INDEX_SPARSE_WAND; } const IndexType& diff --git a/internal/core/src/index/IndexFactory.cpp b/internal/core/src/index/IndexFactory.cpp index 6d133adc96204..cc660324f1b5a 100644 --- a/internal/core/src/index/IndexFactory.cpp +++ b/internal/core/src/index/IndexFactory.cpp @@ -27,7 +27,7 @@ #include "index/StringIndexMarisa.h" #include "index/BoolIndex.h" #include "index/InvertedIndexTantivy.h" -#include "index/BitmapIndex.h" +#include "index/HybridScalarIndex.h" namespace milvus::index { @@ -35,16 +35,12 @@ template ScalarIndexPtr IndexFactory::CreateScalarIndex( const IndexType& index_type, - const storage::FileManagerContext& file_manager_context, - DataType d_type) { + const storage::FileManagerContext& file_manager_context) { if (index_type == INVERTED_INDEX_TYPE) { - TantivyConfig cfg; - cfg.data_type_ = d_type; - return std::make_unique>(cfg, - file_manager_context); + return std::make_unique>(file_manager_context); } if (index_type == BITMAP_INDEX_TYPE) { - return std::make_unique>(file_manager_context); + return std::make_unique>(file_manager_context); } return CreateScalarIndexSort(file_manager_context); } @@ -60,17 +56,15 @@ template <> ScalarIndexPtr IndexFactory::CreateScalarIndex( const IndexType& index_type, - const storage::FileManagerContext& file_manager_context, - DataType d_type) { + const storage::FileManagerContext& file_manager_context) { #if defined(__linux__) || defined(__APPLE__) if (index_type == INVERTED_INDEX_TYPE) { - TantivyConfig cfg; - cfg.data_type_ = d_type; return std::make_unique>( - cfg, file_manager_context); + file_manager_context); } if (index_type == BITMAP_INDEX_TYPE) { - return std::make_unique>(file_manager_context); + return std::make_unique>( + file_manager_context); } return CreateStringIndexMarisa(file_manager_context); #else @@ -83,16 +77,14 @@ ScalarIndexPtr IndexFactory::CreateScalarIndex( const IndexType& index_type, const storage::FileManagerContext& file_manager_context, - std::shared_ptr space, - DataType d_type) { + std::shared_ptr space) { if (index_type == INVERTED_INDEX_TYPE) { - TantivyConfig cfg; - cfg.data_type_ = d_type; - return std::make_unique>( - cfg, file_manager_context, space); + return std::make_unique>(file_manager_context, + space); } if (index_type == BITMAP_INDEX_TYPE) { - return std::make_unique>(file_manager_context, space); + return std::make_unique>(file_manager_context, + space); } return CreateScalarIndexSort(file_manager_context, space); } @@ -102,18 +94,15 @@ ScalarIndexPtr IndexFactory::CreateScalarIndex( const IndexType& index_type, const storage::FileManagerContext& file_manager_context, - std::shared_ptr space, - DataType d_type) { + std::shared_ptr space) { #if defined(__linux__) || defined(__APPLE__) if (index_type == INVERTED_INDEX_TYPE) { - TantivyConfig cfg; - cfg.data_type_ = d_type; return std::make_unique>( - cfg, file_manager_context, space); + file_manager_context, space); } if (index_type == BITMAP_INDEX_TYPE) { - return std::make_unique>(file_manager_context, - space); + return std::make_unique>( + file_manager_context, space); } return CreateStringIndexMarisa(file_manager_context, space); #else @@ -146,41 +135,32 @@ IndexFactory::CreateIndex( } IndexBasePtr -IndexFactory::CreateScalarIndex( - const CreateIndexInfo& create_index_info, +IndexFactory::CreatePrimitiveScalarIndex( + DataType data_type, + IndexType index_type, const storage::FileManagerContext& file_manager_context) { - auto data_type = create_index_info.field_type; - auto index_type = create_index_info.index_type; - switch (data_type) { // create scalar index case DataType::BOOL: - return CreateScalarIndex( - index_type, file_manager_context, data_type); + return CreateScalarIndex(index_type, file_manager_context); case DataType::INT8: - return CreateScalarIndex( - index_type, file_manager_context, data_type); + return CreateScalarIndex(index_type, file_manager_context); case DataType::INT16: - return CreateScalarIndex( - index_type, file_manager_context, data_type); + return CreateScalarIndex(index_type, file_manager_context); case DataType::INT32: - return CreateScalarIndex( - index_type, file_manager_context, data_type); + return CreateScalarIndex(index_type, file_manager_context); case DataType::INT64: - return CreateScalarIndex( - index_type, file_manager_context, data_type); + return CreateScalarIndex(index_type, file_manager_context); case DataType::FLOAT: - return CreateScalarIndex( - index_type, file_manager_context, data_type); + return CreateScalarIndex(index_type, file_manager_context); case DataType::DOUBLE: - return CreateScalarIndex( - index_type, file_manager_context, data_type); + return CreateScalarIndex(index_type, file_manager_context); // create string index case DataType::STRING: case DataType::VARCHAR: - return CreateScalarIndex( - index_type, file_manager_context, data_type); + return CreateScalarIndex(index_type, + file_manager_context); default: throw SegcoreError( DataTypeInvalid, @@ -188,6 +168,24 @@ IndexFactory::CreateScalarIndex( } } +IndexBasePtr +IndexFactory::CreateScalarIndex( + const CreateIndexInfo& create_index_info, + const storage::FileManagerContext& file_manager_context) { + switch (create_index_info.field_type) { + case DataType::ARRAY: + return CreatePrimitiveScalarIndex( + static_cast( + file_manager_context.fieldDataMeta.schema.element_type()), + create_index_info.index_type, + file_manager_context); + default: + return CreatePrimitiveScalarIndex(create_index_info.field_type, + create_index_info.index_type, + file_manager_context); + } +} + IndexBasePtr IndexFactory::CreateVectorIndex( const CreateIndexInfo& create_index_info, @@ -255,32 +253,25 @@ IndexFactory::CreateScalarIndex(const CreateIndexInfo& create_index_info, switch (data_type) { // create scalar index case DataType::BOOL: - return CreateScalarIndex( - index_type, file_manager, space, data_type); + return CreateScalarIndex(index_type, file_manager, space); case DataType::INT8: - return CreateScalarIndex( - index_type, file_manager, space, data_type); + return CreateScalarIndex(index_type, file_manager, space); case DataType::INT16: - return CreateScalarIndex( - index_type, file_manager, space, data_type); + return CreateScalarIndex(index_type, file_manager, space); case DataType::INT32: - return CreateScalarIndex( - index_type, file_manager, space, data_type); + return CreateScalarIndex(index_type, file_manager, space); case DataType::INT64: - return CreateScalarIndex( - index_type, file_manager, space, data_type); + return CreateScalarIndex(index_type, file_manager, space); case DataType::FLOAT: - return CreateScalarIndex( - index_type, file_manager, space, data_type); + return CreateScalarIndex(index_type, file_manager, space); case DataType::DOUBLE: - return CreateScalarIndex( - index_type, file_manager, space, data_type); + return CreateScalarIndex(index_type, file_manager, space); // create string index case DataType::STRING: case DataType::VARCHAR: return CreateScalarIndex( - index_type, file_manager, space, data_type); + index_type, file_manager, space); default: throw SegcoreError( DataTypeInvalid, diff --git a/internal/core/src/index/IndexFactory.h b/internal/core/src/index/IndexFactory.h index 75bd090292907..47b255ab4e912 100644 --- a/internal/core/src/index/IndexFactory.h +++ b/internal/core/src/index/IndexFactory.h @@ -65,6 +65,13 @@ class IndexFactory { CreateVectorIndex(const CreateIndexInfo& create_index_info, const storage::FileManagerContext& file_manager_context); + IndexBasePtr + CreatePrimitiveScalarIndex( + DataType data_type, + IndexType index_type, + const storage::FileManagerContext& file_manager_context = + storage::FileManagerContext()); + IndexBasePtr CreateScalarIndex(const CreateIndexInfo& create_index_info, const storage::FileManagerContext& file_manager_context = @@ -89,15 +96,13 @@ class IndexFactory { ScalarIndexPtr CreateScalarIndex(const IndexType& index_type, const storage::FileManagerContext& file_manager = - storage::FileManagerContext(), - DataType d_type = DataType::NONE); + storage::FileManagerContext()); template ScalarIndexPtr CreateScalarIndex(const IndexType& index_type, const storage::FileManagerContext& file_manager, - std::shared_ptr space, - DataType d_type = DataType::NONE); + std::shared_ptr space); }; // template <> @@ -112,6 +117,5 @@ ScalarIndexPtr IndexFactory::CreateScalarIndex( const IndexType& index_type, const storage::FileManagerContext& file_manager_context, - std::shared_ptr space, - DataType d_type); + std::shared_ptr space); } // namespace milvus::index diff --git a/internal/core/src/index/InvertedIndexTantivy.cpp b/internal/core/src/index/InvertedIndexTantivy.cpp index 5bb8ba3b16103..f09297dd33269 100644 --- a/internal/core/src/index/InvertedIndexTantivy.cpp +++ b/internal/core/src/index/InvertedIndexTantivy.cpp @@ -23,12 +23,50 @@ #include "InvertedIndexTantivy.h" namespace milvus::index { +inline TantivyDataType +get_tantivy_data_type(proto::schema::DataType data_type) { + switch (data_type) { + case proto::schema::DataType::Bool: { + return TantivyDataType::Bool; + } + + case proto::schema::DataType::Int8: + case proto::schema::DataType::Int16: + case proto::schema::DataType::Int32: + case proto::schema::DataType::Int64: { + return TantivyDataType::I64; + } + + case proto::schema::DataType::Float: + case proto::schema::DataType::Double: { + return TantivyDataType::F64; + } + + case proto::schema::DataType::VarChar: { + return TantivyDataType::Keyword; + } + + default: + PanicInfo(ErrorCode::NotImplemented, + fmt::format("not implemented data type: {}", data_type)); + } +} + +inline TantivyDataType +get_tantivy_data_type(const proto::schema::FieldSchema& schema) { + switch (schema.data_type()) { + case proto::schema::Array: + return get_tantivy_data_type(schema.element_type()); + default: + return get_tantivy_data_type(schema.data_type()); + } +} + template InvertedIndexTantivy::InvertedIndexTantivy( - const TantivyConfig& cfg, const storage::FileManagerContext& ctx, std::shared_ptr space) - : cfg_(cfg), space_(space) { + : space_(space), schema_(ctx.fieldDataMeta.schema) { mem_file_manager_ = std::make_shared(ctx, ctx.space_); disk_file_manager_ = std::make_shared(ctx, ctx.space_); auto field = @@ -36,7 +74,7 @@ InvertedIndexTantivy::InvertedIndexTantivy( auto prefix = disk_file_manager_->GetLocalIndexObjectPrefix(); path_ = prefix; boost::filesystem::create_directories(path_); - d_type_ = cfg_.to_tantivy_data_type(); + d_type_ = get_tantivy_data_type(schema_); if (tantivy_index_exist(path_.c_str())) { LOG_INFO( "index {} already exists, which should happen in loading progress", @@ -114,83 +152,7 @@ InvertedIndexTantivy::Build(const Config& config) { AssertInfo(insert_files.has_value(), "insert_files were empty"); auto field_datas = mem_file_manager_->CacheRawDataToMemory(insert_files.value()); - switch (cfg_.data_type_) { - case DataType::BOOL: { - for (const auto& data : field_datas) { - auto n = data->get_num_rows(); - wrapper_->add_data(static_cast(data->Data()), - n); - } - break; - } - - case DataType::INT8: { - for (const auto& data : field_datas) { - auto n = data->get_num_rows(); - wrapper_->add_data( - static_cast(data->Data()), n); - } - break; - } - - case DataType::INT16: { - for (const auto& data : field_datas) { - auto n = data->get_num_rows(); - wrapper_->add_data( - static_cast(data->Data()), n); - } - break; - } - - case DataType::INT32: { - for (const auto& data : field_datas) { - auto n = data->get_num_rows(); - wrapper_->add_data( - static_cast(data->Data()), n); - } - break; - } - - case DataType::INT64: { - for (const auto& data : field_datas) { - auto n = data->get_num_rows(); - wrapper_->add_data( - static_cast(data->Data()), n); - } - break; - } - - case DataType::FLOAT: { - for (const auto& data : field_datas) { - auto n = data->get_num_rows(); - wrapper_->add_data( - static_cast(data->Data()), n); - } - break; - } - - case DataType::DOUBLE: { - for (const auto& data : field_datas) { - auto n = data->get_num_rows(); - wrapper_->add_data( - static_cast(data->Data()), n); - } - break; - } - - case DataType::VARCHAR: { - for (const auto& data : field_datas) { - auto n = data->get_num_rows(); - wrapper_->add_data( - static_cast(data->Data()), n); - } - break; - } - - default: - PanicInfo(ErrorCode::NotImplemented, - fmt::format("todo: not supported, {}", cfg_.data_type_)); - } + build_index(field_datas); } template @@ -211,84 +173,7 @@ InvertedIndexTantivy::BuildV2(const Config& config) { field_data->FillFieldData(col_data); field_datas.push_back(field_data); } - - switch (cfg_.data_type_) { - case DataType::BOOL: { - for (const auto& data : field_datas) { - auto n = data->get_num_rows(); - wrapper_->add_data(static_cast(data->Data()), - n); - } - break; - } - - case DataType::INT8: { - for (const auto& data : field_datas) { - auto n = data->get_num_rows(); - wrapper_->add_data( - static_cast(data->Data()), n); - } - break; - } - - case DataType::INT16: { - for (const auto& data : field_datas) { - auto n = data->get_num_rows(); - wrapper_->add_data( - static_cast(data->Data()), n); - } - break; - } - - case DataType::INT32: { - for (const auto& data : field_datas) { - auto n = data->get_num_rows(); - wrapper_->add_data( - static_cast(data->Data()), n); - } - break; - } - - case DataType::INT64: { - for (const auto& data : field_datas) { - auto n = data->get_num_rows(); - wrapper_->add_data( - static_cast(data->Data()), n); - } - break; - } - - case DataType::FLOAT: { - for (const auto& data : field_datas) { - auto n = data->get_num_rows(); - wrapper_->add_data( - static_cast(data->Data()), n); - } - break; - } - - case DataType::DOUBLE: { - for (const auto& data : field_datas) { - auto n = data->get_num_rows(); - wrapper_->add_data( - static_cast(data->Data()), n); - } - break; - } - - case DataType::VARCHAR: { - for (const auto& data : field_datas) { - auto n = data->get_num_rows(); - wrapper_->add_data( - static_cast(data->Data()), n); - } - break; - } - - default: - PanicInfo(ErrorCode::NotImplemented, - fmt::format("todo: not supported, {}", cfg_.data_type_)); - } + build_index(field_datas); } template @@ -333,7 +218,8 @@ InvertedIndexTantivy::In(size_t n, const T* values) { template const TargetBitmap InvertedIndexTantivy::NotIn(size_t n, const T* values) { - TargetBitmap bitset(Count(), true); + TargetBitmap bitset(Count()); + bitset.set(); for (size_t i = 0; i < n; ++i) { auto array = wrapper_->term_query(values[i]); apply_hits(bitset, array, false); @@ -425,25 +311,107 @@ void InvertedIndexTantivy::BuildWithRawData(size_t n, const void* values, const Config& config) { - if constexpr (!std::is_same_v) { - PanicInfo(Unsupported, - "InvertedIndex.BuildWithRawData only support string"); - } else { - boost::uuids::random_generator generator; - auto uuid = generator(); - auto prefix = boost::uuids::to_string(uuid); - path_ = fmt::format("/tmp/{}", prefix); - boost::filesystem::create_directories(path_); - cfg_ = TantivyConfig{ - .data_type_ = DataType::VARCHAR, - }; - d_type_ = cfg_.to_tantivy_data_type(); - std::string field = "test_inverted_index"; - wrapper_ = std::make_shared( - field.c_str(), d_type_, path_.c_str()); - wrapper_->add_data(static_cast(values), - n); - finish(); + if constexpr (std::is_same_v) { + schema_.set_data_type(proto::schema::DataType::Int8); + } + if constexpr (std::is_same_v) { + schema_.set_data_type(proto::schema::DataType::Int16); + } + if constexpr (std::is_same_v) { + schema_.set_data_type(proto::schema::DataType::Int32); + } + if constexpr (std::is_same_v) { + schema_.set_data_type(proto::schema::DataType::Int64); + } + if constexpr (std::is_same_v) { + schema_.set_data_type(proto::schema::DataType::Float); + } + if constexpr (std::is_same_v) { + schema_.set_data_type(proto::schema::DataType::Double); + } + if constexpr (std::is_same_v) { + schema_.set_data_type(proto::schema::DataType::VarChar); + } + boost::uuids::random_generator generator; + auto uuid = generator(); + auto prefix = boost::uuids::to_string(uuid); + path_ = fmt::format("/tmp/{}", prefix); + boost::filesystem::create_directories(path_); + d_type_ = get_tantivy_data_type(schema_); + std::string field = "test_inverted_index"; + wrapper_ = std::make_shared( + field.c_str(), d_type_, path_.c_str()); + wrapper_->add_data(static_cast(values), n); + finish(); +} + +template +void +InvertedIndexTantivy::build_index( + const std::vector>& field_datas) { + switch (schema_.data_type()) { + case proto::schema::DataType::Bool: + case proto::schema::DataType::Int8: + case proto::schema::DataType::Int16: + case proto::schema::DataType::Int32: + case proto::schema::DataType::Int64: + case proto::schema::DataType::Float: + case proto::schema::DataType::Double: + case proto::schema::DataType::String: + case proto::schema::DataType::VarChar: { + for (const auto& data : field_datas) { + auto n = data->get_num_rows(); + wrapper_->add_data(static_cast(data->Data()), n); + } + break; + } + + case proto::schema::DataType::Array: { + build_index_for_array(field_datas); + break; + } + + default: + PanicInfo(ErrorCode::NotImplemented, + fmt::format("Inverted index not supported on {}", + schema_.data_type())); + } +} + +template +void +InvertedIndexTantivy::build_index_for_array( + const std::vector>& field_datas) { + for (const auto& data : field_datas) { + auto n = data->get_num_rows(); + auto array_column = static_cast(data->Data()); + for (int64_t i = 0; i < n; i++) { + assert(array_column[i].get_element_type() == + static_cast(schema_.element_type())); + wrapper_->template add_multi_data( + reinterpret_cast(array_column[i].data()), + array_column[i].length()); + } + } +} + +template <> +void +InvertedIndexTantivy::build_index_for_array( + const std::vector>& field_datas) { + for (const auto& data : field_datas) { + auto n = data->get_num_rows(); + auto array_column = static_cast(data->Data()); + for (int64_t i = 0; i < n; i++) { + assert(array_column[i].get_element_type() == + static_cast(schema_.element_type())); + std::vector output; + for (int64_t j = 0; j < array_column[i].length(); j++) { + output.push_back( + array_column[i].template get_data(j)); + } + wrapper_->template add_multi_data(output.data(), output.size()); + } } } diff --git a/internal/core/src/index/InvertedIndexTantivy.h b/internal/core/src/index/InvertedIndexTantivy.h index 0ea2f64d869d3..cc0178804c343 100644 --- a/internal/core/src/index/InvertedIndexTantivy.h +++ b/internal/core/src/index/InvertedIndexTantivy.h @@ -18,7 +18,6 @@ #include "tantivy-binding.h" #include "tantivy-wrapper.h" #include "index/StringIndex.h" -#include "index/TantivyConfig.h" #include "storage/space.h" namespace milvus::index { @@ -36,13 +35,11 @@ class InvertedIndexTantivy : public ScalarIndex { InvertedIndexTantivy() = default; - explicit InvertedIndexTantivy(const TantivyConfig& cfg, - const storage::FileManagerContext& ctx) - : InvertedIndexTantivy(cfg, ctx, nullptr) { + explicit InvertedIndexTantivy(const storage::FileManagerContext& ctx) + : InvertedIndexTantivy(ctx, nullptr) { } - explicit InvertedIndexTantivy(const TantivyConfig& cfg, - const storage::FileManagerContext& ctx, + explicit InvertedIndexTantivy(const storage::FileManagerContext& ctx, std::shared_ptr space); ~InvertedIndexTantivy(); @@ -160,11 +157,18 @@ class InvertedIndexTantivy : public ScalarIndex { void finish(); + void + build_index(const std::vector>& field_datas); + + void + build_index_for_array( + const std::vector>& field_datas); + private: std::shared_ptr wrapper_; - TantivyConfig cfg_; TantivyDataType d_type_; std::string path_; + proto::schema::FieldSchema schema_; /* * To avoid IO amplification, we use both mem file manager & disk file manager diff --git a/internal/core/src/index/Meta.h b/internal/core/src/index/Meta.h index e44eb6d87a1ea..f1a01231b8825 100644 --- a/internal/core/src/index/Meta.h +++ b/internal/core/src/index/Meta.h @@ -54,6 +54,8 @@ constexpr const char* INDEX_BUILD_ID = "index_build_id"; constexpr const char* INDEX_ID = "index_id"; constexpr const char* INDEX_VERSION = "index_version"; constexpr const char* INDEX_ENGINE_VERSION = "index_engine_version"; +constexpr const char* BITMAP_INDEX_CARDINALITY_LIMIT = + "bitmap_cardinality_limit"; // VecIndex file metas constexpr const char* DISK_ANN_PREFIX_PATH = "index_prefix"; diff --git a/internal/core/src/index/ScalarIndex.h b/internal/core/src/index/ScalarIndex.h index aacef521f5db3..97a8b63c3ed44 100644 --- a/internal/core/src/index/ScalarIndex.h +++ b/internal/core/src/index/ScalarIndex.h @@ -80,6 +80,16 @@ class ScalarIndex : public IndexBase { RegexQuery(const std::string& pattern) { PanicInfo(Unsupported, "regex query is not supported"); } + + virtual void + BuildWithFieldData(const std::vector& field_datas) { + PanicInfo(Unsupported, "BuildwithFieldData is not supported"); + } + + virtual void + LoadWithoutAssemble(const BinarySet& binary_set, const Config& config) { + PanicInfo(Unsupported, "LoadWithoutAssemble is not supported"); + } }; template diff --git a/internal/core/src/index/ScalarIndexSort.cpp b/internal/core/src/index/ScalarIndexSort.cpp index bcb401ea5bf09..1f494e5c5a4d1 100644 --- a/internal/core/src/index/ScalarIndexSort.cpp +++ b/internal/core/src/index/ScalarIndexSort.cpp @@ -117,6 +117,35 @@ ScalarIndexSort::Build(const Config& config) { auto field_datas = file_manager_->CacheRawDataToMemory(insert_files.value()); + BuildWithFieldData(field_datas); +} + +template +void +ScalarIndexSort::Build(size_t n, const T* values) { + if (is_built_) + return; + if (n == 0) { + throw SegcoreError(DataIsEmpty, + "ScalarIndexSort cannot build null values!"); + } + data_.reserve(n); + idx_to_offsets_.resize(n); + T* p = const_cast(values); + for (size_t i = 0; i < n; ++i) { + data_.emplace_back(IndexStructure(*p++, i)); + } + std::sort(data_.begin(), data_.end()); + for (size_t i = 0; i < data_.size(); ++i) { + idx_to_offsets_[data_[i].idx_] = i; + } + is_built_ = true; +} + +template +void +ScalarIndexSort::BuildWithFieldData( + const std::vector& field_datas) { int64_t total_num_rows = 0; for (const auto& data : field_datas) { total_num_rows += data->get_num_rows(); @@ -145,28 +174,6 @@ ScalarIndexSort::Build(const Config& config) { is_built_ = true; } -template -void -ScalarIndexSort::Build(size_t n, const T* values) { - if (is_built_) - return; - if (n == 0) { - throw SegcoreError(DataIsEmpty, - "ScalarIndexSort cannot build null values!"); - } - data_.reserve(n); - idx_to_offsets_.resize(n); - T* p = const_cast(values); - for (size_t i = 0; i < n; ++i) { - data_.emplace_back(IndexStructure(*p++, i)); - } - std::sort(data_.begin(), data_.end()); - for (size_t i = 0; i < data_.size(); ++i) { - idx_to_offsets_[data_[i].idx_] = i; - } - is_built_ = true; -} - template BinarySet ScalarIndexSort::Serialize(const Config& config) { diff --git a/internal/core/src/index/ScalarIndexSort.h b/internal/core/src/index/ScalarIndexSort.h index e938b164184d7..96402017c9cfe 100644 --- a/internal/core/src/index/ScalarIndexSort.h +++ b/internal/core/src/index/ScalarIndexSort.h @@ -41,6 +41,17 @@ class ScalarIndexSort : public ScalarIndex { const storage::FileManagerContext& file_manager_context, std::shared_ptr space); + explicit ScalarIndexSort( + const std::shared_ptr& file_manager) + : file_manager_(file_manager) { + } + + explicit ScalarIndexSort( + const std::shared_ptr& file_manager, + std::shared_ptr space) + : file_manager_(file_manager), space_(space) { + } + BinarySet Serialize(const Config& config) override; @@ -100,6 +111,9 @@ class ScalarIndexSort : public ScalarIndex { return true; } + void + BuildWithFieldData(const std::vector& datas) override; + private: bool ShouldSkip(const T lower_value, const T upper_value, const OpType op); @@ -116,7 +130,8 @@ class ScalarIndexSort : public ScalarIndex { } void - LoadWithoutAssemble(const BinarySet& binary_set, const Config& config); + LoadWithoutAssemble(const BinarySet& binary_set, + const Config& config) override; private: bool is_built_; diff --git a/internal/core/src/index/StringIndexMarisa.cpp b/internal/core/src/index/StringIndexMarisa.cpp index aa41438e2bc8d..3e4aa85c52a41 100644 --- a/internal/core/src/index/StringIndexMarisa.cpp +++ b/internal/core/src/index/StringIndexMarisa.cpp @@ -132,6 +132,13 @@ StringIndexMarisa::Build(const Config& config) { "insert file paths is empty when build index"); auto field_datas = file_manager_->CacheRawDataToMemory(insert_files.value()); + + BuildWithFieldData(field_datas); +} + +void +StringIndexMarisa::BuildWithFieldData( + const std::vector& field_datas) { int64_t total_num_rows = 0; // fill key set. diff --git a/internal/core/src/index/StringIndexMarisa.h b/internal/core/src/index/StringIndexMarisa.h index 7b96f061241c3..e787a7e63b404 100644 --- a/internal/core/src/index/StringIndexMarisa.h +++ b/internal/core/src/index/StringIndexMarisa.h @@ -37,6 +37,17 @@ class StringIndexMarisa : public StringIndex { const storage::FileManagerContext& file_manager_context, std::shared_ptr space); + explicit StringIndexMarisa( + const std::shared_ptr& file_manager) + : file_manager_(file_manager) { + } + + explicit StringIndexMarisa( + const std::shared_ptr& file_manager, + std::shared_ptr space) + : file_manager_(file_manager), space_(space) { + } + int64_t Size() override; @@ -63,6 +74,9 @@ class StringIndexMarisa : public StringIndex { void Build(const Config& config = {}) override; + void + BuildWithFieldData(const std::vector& field_datas) override; + void BuildV2(const Config& Config = {}) override; @@ -113,7 +127,8 @@ class StringIndexMarisa : public StringIndex { prefix_match(const std::string_view prefix); void - LoadWithoutAssemble(const BinarySet& binary_set, const Config& config); + LoadWithoutAssemble(const BinarySet& binary_set, + const Config& config) override; private: Config config_; diff --git a/internal/core/src/index/TantivyConfig.h b/internal/core/src/index/TantivyConfig.h deleted file mode 100644 index 355b4c76efc9d..0000000000000 --- a/internal/core/src/index/TantivyConfig.h +++ /dev/null @@ -1,51 +0,0 @@ -// Copyright (C) 2019-2020 Zilliz. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software distributed under the License -// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express -// or implied. See the License for the specific language governing permissions and limitations under the License - -#pragma once - -#include "storage/Types.h" -#include "tantivy-binding.h" - -namespace milvus::index { -struct TantivyConfig { - DataType data_type_; - - TantivyDataType - to_tantivy_data_type() { - switch (data_type_) { - case DataType::BOOL: { - return TantivyDataType::Bool; - } - - case DataType::INT8: - case DataType::INT16: - case DataType::INT32: - case DataType::INT64: { - return TantivyDataType::I64; - } - - case DataType::FLOAT: - case DataType::DOUBLE: { - return TantivyDataType::F64; - } - - case DataType::VARCHAR: { - return TantivyDataType::Keyword; - } - - default: - PanicInfo( - ErrorCode::NotImplemented, - fmt::format("not implemented data type: {}", data_type_)); - } - } -}; -} // namespace milvus::index \ No newline at end of file diff --git a/internal/core/src/index/Utils.cpp b/internal/core/src/index/Utils.cpp index a9ad1cf1a0d91..d931684d91767 100644 --- a/internal/core/src/index/Utils.cpp +++ b/internal/core/src/index/Utils.cpp @@ -154,6 +154,15 @@ GetIndexEngineVersionFromConfig(const Config& config) { return (std::stoi(index_engine_version.value())); } +int32_t +GetBitmapCardinalityLimitFromConfig(const Config& config) { + auto bitmap_limit = GetValueFromConfig( + config, index::BITMAP_INDEX_CARDINALITY_LIMIT); + AssertInfo(bitmap_limit.has_value(), + "bitmap cardinality limit not exist in config"); + return (std::stoi(bitmap_limit.value())); +} + // TODO :: too ugly storage::FieldDataMeta GetFieldDataMetaFromConfig(const Config& config) { diff --git a/internal/core/src/index/Utils.h b/internal/core/src/index/Utils.h index 53670dcba215e..50c70d8d52cdd 100644 --- a/internal/core/src/index/Utils.h +++ b/internal/core/src/index/Utils.h @@ -103,6 +103,9 @@ GetIndexTypeFromConfig(const Config& config); IndexVersion GetIndexEngineVersionFromConfig(const Config& config); +int32_t +GetBitmapCardinalityLimitFromConfig(const Config& config); + storage::FieldDataMeta GetFieldDataMetaFromConfig(const Config& config); diff --git a/internal/core/src/indexbuilder/IndexFactory.h b/internal/core/src/indexbuilder/IndexFactory.h index cd361499b4065..1380a6e9817d3 100644 --- a/internal/core/src/indexbuilder/IndexFactory.h +++ b/internal/core/src/indexbuilder/IndexFactory.h @@ -60,6 +60,7 @@ class IndexFactory { case DataType::DOUBLE: case DataType::VARCHAR: case DataType::STRING: + case DataType::ARRAY: return CreateScalarIndex(type, config, context); case DataType::VECTOR_FLOAT: diff --git a/internal/core/src/indexbuilder/index_c.cpp b/internal/core/src/indexbuilder/index_c.cpp index 28a629052cad7..7ccaf7c414a24 100644 --- a/internal/core/src/indexbuilder/index_c.cpp +++ b/internal/core/src/indexbuilder/index_c.cpp @@ -84,29 +84,95 @@ CreateIndexV0(enum CDataType dtype, return status; } +milvus::storage::StorageConfig +get_storage_config(const milvus::proto::indexcgo::StorageConfig& config) { + auto storage_config = milvus::storage::StorageConfig(); + storage_config.address = std::string(config.address()); + storage_config.bucket_name = std::string(config.bucket_name()); + storage_config.access_key_id = std::string(config.access_keyid()); + storage_config.access_key_value = std::string(config.secret_access_key()); + storage_config.root_path = std::string(config.root_path()); + storage_config.storage_type = std::string(config.storage_type()); + storage_config.cloud_provider = std::string(config.cloud_provider()); + storage_config.iam_endpoint = std::string(config.iamendpoint()); + storage_config.cloud_provider = std::string(config.cloud_provider()); + storage_config.useSSL = config.usessl(); + storage_config.sslCACert = config.sslcacert(); + storage_config.useIAM = config.useiam(); + storage_config.region = config.region(); + storage_config.useVirtualHost = config.use_virtual_host(); + storage_config.requestTimeoutMs = config.request_timeout_ms(); + return storage_config; +} + +milvus::OptFieldT +get_opt_field(const ::google::protobuf::RepeatedPtrField< + milvus::proto::indexcgo::OptionalFieldInfo>& field_infos) { + milvus::OptFieldT opt_fields_map; + for (const auto& field_info : field_infos) { + auto field_id = field_info.fieldid(); + if (opt_fields_map.find(field_id) == opt_fields_map.end()) { + opt_fields_map[field_id] = { + field_info.field_name(), + static_cast(field_info.field_type()), + {}}; + } + for (const auto& str : field_info.data_paths()) { + std::get<2>(opt_fields_map[field_id]).emplace_back(str); + } + } + + return opt_fields_map; +} + +milvus::Config +get_config(std::unique_ptr& info) { + milvus::Config config; + for (auto i = 0; i < info->index_params().size(); ++i) { + const auto& param = info->index_params(i); + config[param.key()] = param.value(); + } + + for (auto i = 0; i < info->type_params().size(); ++i) { + const auto& param = info->type_params(i); + config[param.key()] = param.value(); + } + + config["insert_files"] = info->insert_files(); + if (info->opt_fields().size()) { + config["opt_fields"] = get_opt_field(info->opt_fields()); + } + + return config; +} + CStatus -CreateIndex(CIndex* res_index, CBuildIndexInfo c_build_index_info) { +CreateIndex(CIndex* res_index, + const uint8_t* serialized_build_index_info, + const uint64_t len) { try { - auto build_index_info = (BuildIndexInfo*)c_build_index_info; - auto field_type = build_index_info->field_type; + auto build_index_info = + std::make_unique(); + auto res = + build_index_info->ParseFromArray(serialized_build_index_info, len); + AssertInfo(res, "Unmarshall build index info failed"); - milvus::index::CreateIndexInfo index_info; - index_info.field_type = build_index_info->field_type; + auto field_type = + static_cast(build_index_info->field_schema().data_type()); - auto& config = build_index_info->config; - config["insert_files"] = build_index_info->insert_files; - if (build_index_info->opt_fields.size()) { - config["opt_fields"] = build_index_info->opt_fields; - } + milvus::index::CreateIndexInfo index_info; + index_info.field_type = field_type; + auto storage_config = + get_storage_config(build_index_info->storage_config()); + auto config = get_config(build_index_info); // get index type auto index_type = milvus::index::GetValueFromConfig( config, "index_type"); AssertInfo(index_type.has_value(), "index type is empty"); index_info.index_type = index_type.value(); - auto engine_version = build_index_info->index_engine_version; - + auto engine_version = build_index_info->current_index_version(); index_info.index_engine_version = engine_version; config[milvus::index::INDEX_ENGINE_VERSION] = std::to_string(engine_version); @@ -121,24 +187,31 @@ CreateIndex(CIndex* res_index, CBuildIndexInfo c_build_index_info) { // init file manager milvus::storage::FieldDataMeta field_meta{ - build_index_info->collection_id, - build_index_info->partition_id, - build_index_info->segment_id, - build_index_info->field_id}; - - milvus::storage::IndexMeta index_meta{build_index_info->segment_id, - build_index_info->field_id, - build_index_info->index_build_id, - build_index_info->index_version}; - auto chunk_manager = milvus::storage::CreateChunkManager( - build_index_info->storage_config); + build_index_info->collectionid(), + build_index_info->partitionid(), + build_index_info->segmentid(), + build_index_info->field_schema().fieldid(), + build_index_info->field_schema()}; + + milvus::storage::IndexMeta index_meta{ + build_index_info->segmentid(), + build_index_info->field_schema().fieldid(), + build_index_info->buildid(), + build_index_info->index_version(), + "", + build_index_info->field_schema().name(), + field_type, + build_index_info->dim(), + }; + auto chunk_manager = + milvus::storage::CreateChunkManager(storage_config); milvus::storage::FileManagerContext fileManagerContext( field_meta, index_meta, chunk_manager); auto index = milvus::indexbuilder::IndexFactory::GetInstance().CreateIndex( - build_index_info->field_type, config, fileManagerContext); + field_type, config, fileManagerContext); index->Build(); *res_index = index.release(); auto status = CStatus(); @@ -159,22 +232,32 @@ CreateIndex(CIndex* res_index, CBuildIndexInfo c_build_index_info) { } CStatus -CreateIndexV2(CIndex* res_index, CBuildIndexInfo c_build_index_info) { +CreateIndexV2(CIndex* res_index, + const uint8_t* serialized_build_index_info, + const uint64_t len) { try { - auto build_index_info = (BuildIndexInfo*)c_build_index_info; - auto field_type = build_index_info->field_type; + auto build_index_info = + std::make_unique(); + auto res = + build_index_info->ParseFromArray(serialized_build_index_info, len); + AssertInfo(res, "Unmarshall build index info failed"); + auto field_type = + static_cast(build_index_info->field_schema().data_type()); + milvus::index::CreateIndexInfo index_info; - index_info.field_type = build_index_info->field_type; - index_info.dim = build_index_info->dim; + index_info.field_type = field_type; + index_info.dim = build_index_info->dim(); - auto& config = build_index_info->config; + auto storage_config = + get_storage_config(build_index_info->storage_config()); + auto config = get_config(build_index_info); // get index type auto index_type = milvus::index::GetValueFromConfig( config, "index_type"); AssertInfo(index_type.has_value(), "index type is empty"); index_info.index_type = index_type.value(); - auto engine_version = build_index_info->index_engine_version; + auto engine_version = build_index_info->current_index_version(); index_info.index_engine_version = engine_version; config[milvus::index::INDEX_ENGINE_VERSION] = std::to_string(engine_version); @@ -188,39 +271,39 @@ CreateIndexV2(CIndex* res_index, CBuildIndexInfo c_build_index_info) { } milvus::storage::FieldDataMeta field_meta{ - build_index_info->collection_id, - build_index_info->partition_id, - build_index_info->segment_id, - build_index_info->field_id}; + build_index_info->collectionid(), + build_index_info->partitionid(), + build_index_info->segmentid(), + build_index_info->field_schema().fieldid()}; milvus::storage::IndexMeta index_meta{ - build_index_info->segment_id, - build_index_info->field_id, - build_index_info->index_build_id, - build_index_info->index_version, - build_index_info->field_name, + build_index_info->segmentid(), + build_index_info->field_schema().fieldid(), + build_index_info->buildid(), + build_index_info->index_version(), "", - build_index_info->field_type, - build_index_info->dim, + build_index_info->field_schema().name(), + field_type, + build_index_info->dim(), }; auto store_space = milvus_storage::Space::Open( - build_index_info->data_store_path, + build_index_info->store_path(), milvus_storage::Options{nullptr, - build_index_info->data_store_version}); + build_index_info->store_version()}); AssertInfo(store_space.ok() && store_space.has_value(), "create space failed: {}", store_space.status().ToString()); auto index_space = milvus_storage::Space::Open( - build_index_info->index_store_path, + build_index_info->index_store_path(), milvus_storage::Options{.schema = store_space.value()->schema()}); AssertInfo(index_space.ok() && index_space.has_value(), "create space failed: {}", index_space.status().ToString()); LOG_INFO("init space success"); - auto chunk_manager = milvus::storage::CreateChunkManager( - build_index_info->storage_config); + auto chunk_manager = + milvus::storage::CreateChunkManager(storage_config); milvus::storage::FileManagerContext fileManagerContext( field_meta, index_meta, @@ -229,9 +312,9 @@ CreateIndexV2(CIndex* res_index, CBuildIndexInfo c_build_index_info) { auto index = milvus::indexbuilder::IndexFactory::GetInstance().CreateIndex( - build_index_info->field_type, - build_index_info->field_name, - build_index_info->dim, + field_type, + build_index_info->field_schema().name(), + build_index_info->dim(), config, fileManagerContext, std::move(store_space.value())); diff --git a/internal/core/src/indexbuilder/index_c.h b/internal/core/src/indexbuilder/index_c.h index 16cd76e4531ce..53ce5552fef0a 100644 --- a/internal/core/src/indexbuilder/index_c.h +++ b/internal/core/src/indexbuilder/index_c.h @@ -28,7 +28,9 @@ CreateIndexV0(enum CDataType dtype, CIndex* res_index); CStatus -CreateIndex(CIndex* res_index, CBuildIndexInfo c_build_index_info); +CreateIndex(CIndex* res_index, + const uint8_t* serialized_build_index_info, + const uint64_t len); CStatus DeleteIndex(CIndex index); @@ -130,7 +132,9 @@ CStatus SerializeIndexAndUpLoadV2(CIndex index, CBinarySet* c_binary_set); CStatus -CreateIndexV2(CIndex* res_index, CBuildIndexInfo c_build_index_info); +CreateIndexV2(CIndex* res_index, + const uint8_t* serialized_build_index_info, + const uint64_t len); CStatus AppendIndexStorageInfo(CBuildIndexInfo c_build_index_info, diff --git a/internal/core/src/mmap/Column.h b/internal/core/src/mmap/Column.h index bda4ca16a9edd..916bb07b0e1c8 100644 --- a/internal/core/src/mmap/Column.h +++ b/internal/core/src/mmap/Column.h @@ -459,9 +459,7 @@ class VariableColumn : public ColumnBase { std::string_view RawAt(const int i) const { - size_t len = (i == indices_.size() - 1) ? size_ - indices_.back() - : indices_[i + 1] - indices_[i]; - return std::string_view(data_ + indices_[i], len); + return std::string_view(views_[i]); } void @@ -502,6 +500,9 @@ class VariableColumn : public ColumnBase { } ConstructViews(); + + // Not need indices_ after + indices_.clear(); } protected: diff --git a/internal/core/src/pb/CMakeLists.txt b/internal/core/src/pb/CMakeLists.txt index 3c00203cf4c25..35726d9c24c65 100644 --- a/internal/core/src/pb/CMakeLists.txt +++ b/internal/core/src/pb/CMakeLists.txt @@ -11,12 +11,10 @@ find_package(Protobuf REQUIRED) +file(GLOB_RECURSE milvus_proto_srcs + "${CMAKE_CURRENT_SOURCE_DIR}/*.cc") add_library(milvus_proto STATIC - common.pb.cc - index_cgo_msg.pb.cc - plan.pb.cc - schema.pb.cc - segcore.pb.cc + ${milvus_proto_srcs} ) message(STATUS "milvus proto sources: " ${milvus_proto_srcs}) diff --git a/internal/core/src/query/GroupByOperator.h b/internal/core/src/query/GroupByOperator.h index 64e112253748c..21162c09bfe9b 100644 --- a/internal/core/src/query/GroupByOperator.h +++ b/internal/core/src/query/GroupByOperator.h @@ -133,10 +133,6 @@ PrepareVectorIteratorsFromIndex(const SearchInfo& search_info, if (search_info.group_by_field_id_.has_value()) { try { auto search_conf = search_info.search_params_; - if (search_conf.contains(knowhere::indexparam::EF)) { - search_conf[knowhere::indexparam::SEED_EF] = - search_conf[knowhere::indexparam::EF]; - } knowhere::expected< std::vector>> iterators_val = diff --git a/internal/core/src/query/visitors/ExecPlanNodeVisitor.cpp b/internal/core/src/query/visitors/ExecPlanNodeVisitor.cpp index e5a7a8f1c77f3..d9e8a6c125b00 100644 --- a/internal/core/src/query/visitors/ExecPlanNodeVisitor.cpp +++ b/internal/core/src/query/visitors/ExecPlanNodeVisitor.cpp @@ -291,8 +291,10 @@ ExecPlanNodeVisitor::visit(RetrievePlanNode& node) { false_filtered_out = true; segment->timestamp_filter(bitset_holder, timestamp_); } - retrieve_result.result_offsets_ = + auto results_pair = segment->find_first(node.limit_, bitset_holder, false_filtered_out); + retrieve_result.result_offsets_ = std::move(results_pair.first); + retrieve_result.has_more_result = results_pair.second; retrieve_result_opt_ = std::move(retrieve_result); } diff --git a/internal/core/src/segcore/InsertRecord.h b/internal/core/src/segcore/InsertRecord.h index 7e85a64c231d8..13a92d22e760a 100644 --- a/internal/core/src/segcore/InsertRecord.h +++ b/internal/core/src/segcore/InsertRecord.h @@ -60,7 +60,7 @@ class OffsetMap { using OffsetType = int64_t; // TODO: in fact, we can retrieve the pk here. Not sure which way is more efficient. - virtual std::vector + virtual std::pair, bool> find_first(int64_t limit, const BitsetType& bitset, bool false_filtered_out) const = 0; @@ -109,7 +109,7 @@ class OffsetOrderedMap : public OffsetMap { return map_.empty(); } - std::vector + std::pair, bool> find_first(int64_t limit, const BitsetType& bitset, bool false_filtered_out) const override { @@ -131,7 +131,7 @@ class OffsetOrderedMap : public OffsetMap { } private: - std::vector + std::pair, bool> find_first_by_index(int64_t limit, const BitsetType& bitset, bool false_filtered_out) const { @@ -144,8 +144,8 @@ class OffsetOrderedMap : public OffsetMap { limit = std::min(limit, cnt); std::vector seg_offsets; seg_offsets.reserve(limit); - for (auto it = map_.begin(); hit_num < limit && it != map_.end(); - it++) { + auto it = map_.begin(); + for (; hit_num < limit && it != map_.end(); it++) { for (auto seg_offset : it->second) { if (seg_offset >= size) { // Frequently concurrent insert/query will cause this case. @@ -161,7 +161,7 @@ class OffsetOrderedMap : public OffsetMap { } } } - return seg_offsets; + return {seg_offsets, it != map_.end()}; } private: @@ -212,7 +212,8 @@ class OffsetOrderedArray : public OffsetMap { PanicInfo(Unsupported, "OffsetOrderedArray could not insert after seal"); } - array_.push_back(std::make_pair(std::get(pk), offset)); + array_.push_back( + std::make_pair(std::get(pk), static_cast(offset))); } void @@ -226,7 +227,7 @@ class OffsetOrderedArray : public OffsetMap { return array_.empty(); } - std::vector + std::pair, bool> find_first(int64_t limit, const BitsetType& bitset, bool false_filtered_out) const override { @@ -248,7 +249,7 @@ class OffsetOrderedArray : public OffsetMap { } private: - std::vector + std::pair, bool> find_first_by_index(int64_t limit, const BitsetType& bitset, bool false_filtered_out) const { @@ -261,11 +262,11 @@ class OffsetOrderedArray : public OffsetMap { limit = std::min(limit, cnt); std::vector seg_offsets; seg_offsets.reserve(limit); - for (auto it = array_.begin(); hit_num < limit && it != array_.end(); - it++) { + auto it = array_.begin(); + for (; hit_num < limit && it != array_.end(); it++) { auto seg_offset = it->second; if (seg_offset >= size) { - // In fact, this case won't happend on sealed segments. + // In fact, this case won't happen on sealed segments. continue; } @@ -274,7 +275,7 @@ class OffsetOrderedArray : public OffsetMap { hit_num++; } } - return seg_offsets; + return {seg_offsets, it != array_.end()}; } void @@ -285,13 +286,13 @@ class OffsetOrderedArray : public OffsetMap { private: bool is_sealed = false; - std::vector> array_; + std::vector> array_; }; template struct InsertRecord { InsertRecord(const Schema& schema, int64_t size_per_chunk) - : row_ids_(size_per_chunk), timestamps_(size_per_chunk) { + : timestamps_(size_per_chunk) { std::optional pk_field_id = schema.get_primary_field_id(); for (auto& field : schema) { @@ -590,10 +591,8 @@ struct InsertRecord { void clear() { timestamps_.clear(); - row_ids_.clear(); reserved = 0; ack_responder_.clear(); - timestamp_index_ = TimestampIndex(); pk2offset_->clear(); fields_data_.clear(); } @@ -605,15 +604,11 @@ struct InsertRecord { public: ConcurrentVector timestamps_; - ConcurrentVector row_ids_; // used for preInsert of growing segment std::atomic reserved = 0; AckResponder ack_responder_; - // used for timestamps index of sealed segment - TimestampIndex timestamp_index_; - // pks to row offset std::unique_ptr pk2offset_; diff --git a/internal/core/src/segcore/SegmentGrowingImpl.cpp b/internal/core/src/segcore/SegmentGrowingImpl.cpp index 3d1f277c43d89..d8cd057f28be7 100644 --- a/internal/core/src/segcore/SegmentGrowingImpl.cpp +++ b/internal/core/src/segcore/SegmentGrowingImpl.cpp @@ -110,7 +110,6 @@ SegmentGrowingImpl::Insert(int64_t reserved_offset, // step 3: fill into Segment.ConcurrentVector insert_record_.timestamps_.set_data_raw( reserved_offset, timestamps_raw, num_rows); - insert_record_.row_ids_.set_data_raw(reserved_offset, row_ids, num_rows); // update the mem size of timestamps and row IDs stats_.mem_size += num_rows * (sizeof(Timestamp) + sizeof(idx_t)); @@ -224,7 +223,6 @@ SegmentGrowingImpl::LoadFieldData(const LoadFieldDataInfo& infos) { } if (field_id == RowFieldID) { - insert_record_.row_ids_.set_data_raw(reserved_offset, field_data); continue; } @@ -313,7 +311,6 @@ SegmentGrowingImpl::LoadFieldDataV2(const LoadFieldDataInfo& infos) { } if (field_id == RowFieldID) { - insert_record_.row_ids_.set_data_raw(reserved_offset, field_data); continue; } @@ -766,10 +763,8 @@ SegmentGrowingImpl::bulk_subscript(SystemFieldType system_type, static_cast(output)); break; case SystemFieldType::RowId: - bulk_subscript_impl(&this->insert_record_.row_ids_, - seg_offsets, - count, - static_cast(output)); + PanicInfo(ErrorCode::Unsupported, + "RowId retrieve is not supported"); break; default: PanicInfo(DataTypeInvalid, "unknown subscript fields"); diff --git a/internal/core/src/segcore/SegmentGrowingImpl.h b/internal/core/src/segcore/SegmentGrowingImpl.h index 1cc308216bc82..06f9048d5ae22 100644 --- a/internal/core/src/segcore/SegmentGrowingImpl.h +++ b/internal/core/src/segcore/SegmentGrowingImpl.h @@ -268,7 +268,7 @@ class SegmentGrowingImpl : public SegmentGrowing { return true; } - std::vector + std::pair, bool> find_first(int64_t limit, const BitsetType& bitset, bool false_filtered_out) const override { diff --git a/internal/core/src/segcore/SegmentInterface.cpp b/internal/core/src/segcore/SegmentInterface.cpp index 3d79fc0b35971..0ad695ff597e1 100644 --- a/internal/core/src/segcore/SegmentInterface.cpp +++ b/internal/core/src/segcore/SegmentInterface.cpp @@ -91,6 +91,7 @@ SegmentInternalInterface::Retrieve(tracer::TraceContext* trace_ctx, query::ExecPlanNodeVisitor visitor(*this, timestamp); auto retrieve_results = visitor.get_retrieve_result(*plan->plan_node_); retrieve_results.segment_ = (void*)this; + results->set_has_more_result(retrieve_results.has_more_result); auto result_rows = retrieve_results.result_offsets_.size(); int64_t output_data_size = 0; @@ -120,7 +121,6 @@ SegmentInternalInterface::Retrieve(tracer::TraceContext* trace_ctx, retrieve_results.result_offsets_.size(), ignore_non_pk, true); - return results; } diff --git a/internal/core/src/segcore/SegmentInterface.h b/internal/core/src/segcore/SegmentInterface.h index 2715e387c76b3..663cfa20819be 100644 --- a/internal/core/src/segcore/SegmentInterface.h +++ b/internal/core/src/segcore/SegmentInterface.h @@ -235,6 +235,7 @@ class SegmentInternalInterface : public SegmentInterface { virtual int64_t num_chunk_data(FieldId field_id) const = 0; + // bitset 1 means not hit. 0 means hit. virtual void mask_with_timestamps(BitsetType& bitset_chunk, Timestamp timestamp) const = 0; @@ -290,7 +291,7 @@ class SegmentInternalInterface : public SegmentInterface { * @param false_filtered_out * @return All candidates offsets. */ - virtual std::vector + virtual std::pair, bool> find_first(int64_t limit, const BitsetType& bitset, bool false_filtered_out) const = 0; diff --git a/internal/core/src/segcore/SegmentSealedImpl.cpp b/internal/core/src/segcore/SegmentSealedImpl.cpp index 36e7a6aebb184..4c06d6a3ffb5b 100644 --- a/internal/core/src/segcore/SegmentSealedImpl.cpp +++ b/internal/core/src/segcore/SegmentSealedImpl.cpp @@ -23,6 +23,7 @@ #include #include #include +#include #include "Utils.h" #include "Types.h" @@ -348,35 +349,15 @@ SegmentSealedImpl::LoadFieldData(FieldId field_id, FieldDataInfo& data) { offset += row_count; } - TimestampIndex index; - auto min_slice_length = num_rows < 4096 ? 1 : 4096; - auto meta = GenerateFakeSlices( - timestamps.data(), num_rows, min_slice_length); - index.set_length_meta(std::move(meta)); - // todo ::opt to avoid copy timestamps from field data - index.build_with(timestamps.data(), num_rows); - - // use special index std::unique_lock lck(mutex_); AssertInfo(insert_record_.timestamps_.empty(), "already exists"); insert_record_.timestamps_.fill_chunk_data(field_data); - insert_record_.timestamp_index_ = std::move(index); AssertInfo(insert_record_.timestamps_.num_chunk() == 1, "num chunk not equal to 1 for sealed segment"); stats_.mem_size += sizeof(Timestamp) * data.row_count; } else { AssertInfo(system_field_type == SystemFieldType::RowId, "System field type of id column is not RowId"); - - auto field_data = storage::CollectFieldDataChannel(data.channel); - - // write data under lock - std::unique_lock lck(mutex_); - AssertInfo(insert_record_.row_ids_.empty(), "already exists"); - insert_record_.row_ids_.fill_chunk_data(field_data); - AssertInfo(insert_record_.row_ids_.num_chunk() == 1, - "num chunk not equal to 1 for sealed segment"); - stats_.mem_size += sizeof(idx_t) * data.row_count; } ++system_ready_count_; } else { @@ -925,9 +906,7 @@ SegmentSealedImpl::DropFieldData(const FieldId field_id) { std::unique_lock lck(mutex_); --system_ready_count_; - if (system_field_type == SystemFieldType::RowId) { - insert_record_.row_ids_.clear(); - } else if (system_field_type == SystemFieldType::Timestamp) { + if (system_field_type == SystemFieldType::Timestamp) { insert_record_.timestamps_.clear(); } lck.unlock(); @@ -1042,13 +1021,7 @@ SegmentSealedImpl::bulk_subscript(SystemFieldType system_type, static_cast(output)); break; case SystemFieldType::RowId: - AssertInfo(insert_record_.row_ids_.num_chunk() == 1, - "num chunk of rowID not equal to 1 for sealed segment"); - bulk_subscript_impl( - this->insert_record_.row_ids_.get_chunk_data(0), - seg_offsets, - count, - static_cast(output)); + PanicInfo(ErrorCode::Unsupported, "RowId retrieve not supported"); break; default: PanicInfo(DataTypeInvalid, @@ -1512,12 +1485,6 @@ SegmentSealedImpl::debug() const { void SegmentSealedImpl::LoadSegmentMeta( const proto::segcore::LoadSegmentMeta& segment_meta) { - std::unique_lock lck(mutex_); - std::vector slice_lengths; - for (auto& info : segment_meta.metas()) { - slice_lengths.push_back(info.row_count()); - } - insert_record_.timestamp_index_.set_length_meta(std::move(slice_lengths)); PanicInfo(NotImplemented, "unimplemented"); } @@ -1529,33 +1496,17 @@ SegmentSealedImpl::get_active_count(Timestamp ts) const { void SegmentSealedImpl::mask_with_timestamps(BitsetType& bitset_chunk, - Timestamp timestamp) const { - // TODO change the - AssertInfo(insert_record_.timestamps_.num_chunk() == 1, - "num chunk not equal to 1 for sealed segment"); - const auto& timestamps_data = insert_record_.timestamps_.get_chunk(0); - AssertInfo(timestamps_data.size() == get_row_count(), - fmt::format("Timestamp size not equal to row count: {}, {}", - timestamps_data.size(), - get_row_count())); - auto range = insert_record_.timestamp_index_.get_active_range(timestamp); - - // range == (size_, size_) and size_ is this->timestamps_.size(). - // it means these data are all useful, we don't need to update bitset_chunk. - // It can be thought of as an OR operation with another bitmask that is all 0s, but it is not necessary to do so. - if (range.first == range.second && range.first == timestamps_data.size()) { - // just skip - return; - } - // range == (0, 0). it means these data can not be used, directly set bitset_chunk to all 1s. - // It can be thought of as an OR operation with another bitmask that is all 1s. - if (range.first == range.second && range.first == 0) { - bitset_chunk.set(); - return; + Timestamp ts) const { + auto row_count = this->get_row_count(); + auto& ts_vec = this->insert_record_.timestamps_; + auto iter = std::upper_bound( + boost::make_counting_iterator(static_cast(0)), + boost::make_counting_iterator(row_count), + ts, + [&](Timestamp ts, int64_t index) { return ts < ts_vec[index]; }); + for (size_t i = *iter; i < row_count; ++i) { + bitset_chunk.set(i); } - auto mask = TimestampIndex::GenerateBitset( - timestamp, range, timestamps_data.data(), timestamps_data.size()); - bitset_chunk |= mask; } bool diff --git a/internal/core/src/segcore/SegmentSealedImpl.h b/internal/core/src/segcore/SegmentSealedImpl.h index 21306616e810e..b7e8b89e2c40a 100644 --- a/internal/core/src/segcore/SegmentSealedImpl.h +++ b/internal/core/src/segcore/SegmentSealedImpl.h @@ -133,7 +133,7 @@ class SegmentSealedImpl : public SegmentSealed { const IdArray* pks, const Timestamp* timestamps) override; - std::vector + std::pair, bool> find_first(int64_t limit, const BitsetType& bitset, bool false_filtered_out) const override { diff --git a/internal/core/src/segcore/Types.h b/internal/core/src/segcore/Types.h index 73ba7fcb188b6..106799ce2610f 100644 --- a/internal/core/src/segcore/Types.h +++ b/internal/core/src/segcore/Types.h @@ -46,6 +46,7 @@ struct LoadIndexInfo { std::string uri; int64_t index_store_version; IndexVersion index_engine_version; + proto::schema::FieldSchema schema; }; } // namespace milvus::segcore diff --git a/internal/core/src/segcore/Utils.cpp b/internal/core/src/segcore/Utils.cpp index cee4a04c92ac2..6349ad847ac9d 100644 --- a/internal/core/src/segcore/Utils.cpp +++ b/internal/core/src/segcore/Utils.cpp @@ -820,7 +820,7 @@ LoadFieldDatasFromRemote(const std::vector& remote_files, channel->close(); } catch (std::exception& e) { LOG_INFO("failed to load data from remote: {}", e.what()); - channel->close(std::move(e)); + channel->close(MilvusException(e.what())); } } diff --git a/internal/core/src/segcore/load_index_c.cpp b/internal/core/src/segcore/load_index_c.cpp index 7f851948545d3..3df3a92879751 100644 --- a/internal/core/src/segcore/load_index_c.cpp +++ b/internal/core/src/segcore/load_index_c.cpp @@ -25,6 +25,7 @@ #include "storage/Util.h" #include "storage/RemoteChunkManagerSingleton.h" #include "storage/LocalChunkManagerSingleton.h" +#include "pb/cgo_msg.pb.h" bool IsLoadWithDisk(const char* index_type, int index_engine_version) { @@ -258,7 +259,8 @@ AppendIndexV2(CTraceContext c_trace, CLoadIndexInfo c_load_index_info) { load_index_info->collection_id, load_index_info->partition_id, load_index_info->segment_id, - load_index_info->field_id}; + load_index_info->field_id, + load_index_info->schema}; milvus::storage::IndexMeta index_meta{load_index_info->segment_id, load_index_info->field_id, load_index_info->index_build_id, @@ -484,3 +486,50 @@ AppendStorageInfo(CLoadIndexInfo c_load_index_info, load_index_info->uri = uri; load_index_info->index_store_version = version; } + +CStatus +FinishLoadIndexInfo(CLoadIndexInfo c_load_index_info, + const uint8_t* serialized_load_index_info, + const uint64_t len) { + try { + auto info_proto = std::make_unique(); + info_proto->ParseFromArray(serialized_load_index_info, len); + auto load_index_info = + static_cast(c_load_index_info); + // TODO: keep this since LoadIndexInfo is used by SegmentSealed. + { + load_index_info->collection_id = info_proto->collectionid(); + load_index_info->partition_id = info_proto->partitionid(); + load_index_info->segment_id = info_proto->segmentid(); + load_index_info->field_id = info_proto->field().fieldid(); + load_index_info->field_type = + static_cast(info_proto->field().data_type()); + load_index_info->enable_mmap = info_proto->enable_mmap(); + load_index_info->mmap_dir_path = info_proto->mmap_dir_path(); + load_index_info->index_id = info_proto->indexid(); + load_index_info->index_build_id = info_proto->index_buildid(); + load_index_info->index_version = info_proto->index_version(); + for (const auto& [k, v] : info_proto->index_params()) { + load_index_info->index_params[k] = v; + } + load_index_info->index_files.assign( + info_proto->index_files().begin(), + info_proto->index_files().end()); + load_index_info->uri = info_proto->uri(); + load_index_info->index_store_version = + info_proto->index_store_version(); + load_index_info->index_engine_version = + info_proto->index_engine_version(); + load_index_info->schema = info_proto->field(); + } + auto status = CStatus(); + status.error_code = milvus::Success; + status.error_msg = ""; + return status; + } catch (std::exception& e) { + auto status = CStatus(); + status.error_code = milvus::UnexpectedError; + status.error_msg = strdup(e.what()); + return status; + } +} diff --git a/internal/core/src/segcore/load_index_c.h b/internal/core/src/segcore/load_index_c.h index 7a3d89b797670..8755aa7396162 100644 --- a/internal/core/src/segcore/load_index_c.h +++ b/internal/core/src/segcore/load_index_c.h @@ -76,6 +76,11 @@ void AppendStorageInfo(CLoadIndexInfo c_load_index_info, const char* uri, int64_t version); + +CStatus +FinishLoadIndexInfo(CLoadIndexInfo c_load_index_info, + const uint8_t* serialized_load_index_info, + const uint64_t len); #ifdef __cplusplus } #endif diff --git a/internal/core/src/segcore/segment_c.cpp b/internal/core/src/segcore/segment_c.cpp index df3b8fda7accf..06643ea3f7a34 100644 --- a/internal/core/src/segcore/segment_c.cpp +++ b/internal/core/src/segcore/segment_c.cpp @@ -12,6 +12,7 @@ #include "segcore/segment_c.h" #include +#include #include "common/FieldData.h" #include "common/LoadInfo.h" @@ -239,6 +240,9 @@ Insert(CSegmentInterface c_segment, const uint8_t* data_info, const uint64_t data_info_len) { try { + AssertInfo(data_info_len < std::numeric_limits::max(), + "insert data length ({}) exceeds max int", + data_info_len); auto segment = static_cast(c_segment); auto insert_record_proto = std::make_unique(); diff --git a/internal/core/src/storage/Types.h b/internal/core/src/storage/Types.h index 924873dccda64..fbd72d0a59a78 100644 --- a/internal/core/src/storage/Types.h +++ b/internal/core/src/storage/Types.h @@ -64,6 +64,7 @@ struct FieldDataMeta { int64_t partition_id; int64_t segment_id; int64_t field_id; + proto::schema::FieldSchema schema; }; enum CodecType { diff --git a/internal/core/src/storage/Util.cpp b/internal/core/src/storage/Util.cpp index 0e714f0a97362..33df073cef5cd 100644 --- a/internal/core/src/storage/Util.cpp +++ b/internal/core/src/storage/Util.cpp @@ -575,11 +575,22 @@ GetObjectData(std::shared_ptr space, } std::vector datas; - for (int i = 0; i < futures.size(); ++i) { - auto res = futures[i].get(); - datas.emplace_back(res->GetFieldData()); + std::exception_ptr first_exception = nullptr; + for (auto& future : futures) { + try { + auto res = future.get(); + datas.emplace_back(res->GetFieldData()); + } catch (...) { + if (!first_exception) { + first_exception = std::current_exception(); + } + } } ReleaseArrowUnused(); + if (first_exception) { + std::rethrow_exception(first_exception); + } + return datas; } @@ -612,12 +623,22 @@ PutIndexData(ChunkManager* remote_chunk_manager, } std::map remote_paths_to_size; + std::exception_ptr first_exception = nullptr; for (auto& future : futures) { - auto res = future.get(); - remote_paths_to_size[res.first] = res.second; + try { + auto res = future.get(); + remote_paths_to_size[res.first] = res.second; + } catch (...) { + if (!first_exception) { + first_exception = std::current_exception(); + } + } } - ReleaseArrowUnused(); + if (first_exception) { + std::rethrow_exception(first_exception); + } + return remote_paths_to_size; } @@ -650,12 +671,22 @@ PutIndexData(std::shared_ptr space, } std::map remote_paths_to_size; + std::exception_ptr first_exception = nullptr; for (auto& future : futures) { - auto res = future.get(); - remote_paths_to_size[res.first] = res.second; + try { + auto res = future.get(); + remote_paths_to_size[res.first] = res.second; + } catch (...) { + if (!first_exception) { + first_exception = std::current_exception(); + } + } } - ReleaseArrowUnused(); + if (first_exception) { + std::rethrow_exception(first_exception); + } + return remote_paths_to_size; } diff --git a/internal/core/thirdparty/knowhere/CMakeLists.txt b/internal/core/thirdparty/knowhere/CMakeLists.txt index 9a146ffe273d1..8af49408f8131 100644 --- a/internal/core/thirdparty/knowhere/CMakeLists.txt +++ b/internal/core/thirdparty/knowhere/CMakeLists.txt @@ -12,7 +12,7 @@ #------------------------------------------------------------------------------- # Update KNOWHERE_VERSION for the first occurrence -set( KNOWHERE_VERSION 89657b08 ) +set( KNOWHERE_VERSION 74997917 ) set( GIT_REPOSITORY "https://github.com/zilliztech/knowhere.git") message(STATUS "Knowhere repo: ${GIT_REPOSITORY}") message(STATUS "Knowhere version: ${KNOWHERE_VERSION}") diff --git a/internal/core/thirdparty/tantivy/CMakeLists.txt b/internal/core/thirdparty/tantivy/CMakeLists.txt index f4d928922874f..c1435a032a85e 100644 --- a/internal/core/thirdparty/tantivy/CMakeLists.txt +++ b/internal/core/thirdparty/tantivy/CMakeLists.txt @@ -71,3 +71,9 @@ target_link_libraries(bench_tantivy boost_filesystem dl ) + +add_executable(ffi_demo ffi_demo.cpp) +target_link_libraries(ffi_demo + tantivy_binding + dl + ) diff --git a/internal/core/thirdparty/tantivy/ffi_demo.cpp b/internal/core/thirdparty/tantivy/ffi_demo.cpp new file mode 100644 index 0000000000000..1626d655f175d --- /dev/null +++ b/internal/core/thirdparty/tantivy/ffi_demo.cpp @@ -0,0 +1,17 @@ +#include +#include + +#include "tantivy-binding.h" + +int +main(int argc, char* argv[]) { + std::vector data{"data1", "data2", "data3"}; + std::vector datas{}; + for (auto& s : data) { + datas.push_back(s.c_str()); + } + + print_vector_of_strings(datas.data(), datas.size()); + + return 0; +} diff --git a/internal/core/thirdparty/tantivy/tantivy-binding/include/tantivy-binding.h b/internal/core/thirdparty/tantivy/tantivy-binding/include/tantivy-binding.h index 3b22018bf047e..045d4a50e6a2c 100644 --- a/internal/core/thirdparty/tantivy/tantivy-binding/include/tantivy-binding.h +++ b/internal/core/thirdparty/tantivy/tantivy-binding/include/tantivy-binding.h @@ -97,6 +97,24 @@ void tantivy_index_add_bools(void *ptr, const bool *array, uintptr_t len); void tantivy_index_add_keyword(void *ptr, const char *s); +void tantivy_index_add_multi_int8s(void *ptr, const int8_t *array, uintptr_t len); + +void tantivy_index_add_multi_int16s(void *ptr, const int16_t *array, uintptr_t len); + +void tantivy_index_add_multi_int32s(void *ptr, const int32_t *array, uintptr_t len); + +void tantivy_index_add_multi_int64s(void *ptr, const int64_t *array, uintptr_t len); + +void tantivy_index_add_multi_f32s(void *ptr, const float *array, uintptr_t len); + +void tantivy_index_add_multi_f64s(void *ptr, const double *array, uintptr_t len); + +void tantivy_index_add_multi_bools(void *ptr, const bool *array, uintptr_t len); + +void tantivy_index_add_multi_keywords(void *ptr, const char *const *array, uintptr_t len); + bool tantivy_index_exist(const char *path); +void print_vector_of_strings(const char *const *ptr, uintptr_t len); + } // extern "C" diff --git a/internal/core/thirdparty/tantivy/tantivy-binding/src/demo_c.rs b/internal/core/thirdparty/tantivy/tantivy-binding/src/demo_c.rs new file mode 100644 index 0000000000000..257a41f17a891 --- /dev/null +++ b/internal/core/thirdparty/tantivy/tantivy-binding/src/demo_c.rs @@ -0,0 +1,14 @@ +use std::{ffi::{c_char, CStr}, slice}; + +#[no_mangle] +pub extern "C" fn print_vector_of_strings(ptr: *const *const c_char, len: usize) { + let arr : &[*const c_char] = unsafe { + slice::from_raw_parts(ptr, len) + }; + for element in arr { + let c_str = unsafe { + CStr::from_ptr(*element) + }; + println!("{}", c_str.to_str().unwrap()); + } +} \ No newline at end of file diff --git a/internal/core/thirdparty/tantivy/tantivy-binding/src/index_writer.rs b/internal/core/thirdparty/tantivy/tantivy-binding/src/index_writer.rs index ce96a5b4d5a30..2c8d56bf38694 100644 --- a/internal/core/thirdparty/tantivy/tantivy-binding/src/index_writer.rs +++ b/internal/core/thirdparty/tantivy/tantivy-binding/src/index_writer.rs @@ -1,10 +1,11 @@ -use futures::executor::block_on; +use std::ffi::CStr; +use libc::c_char; use tantivy::schema::{Field, IndexRecordOption, Schema, TextFieldIndexing, TextOptions, INDEXED}; -use tantivy::{doc, tokenizer, Index, IndexWriter, SingleSegmentIndexWriter}; +use tantivy::{doc, tokenizer, Index, SingleSegmentIndexWriter, Document}; use crate::data_type::TantivyDataType; -use crate::index_writer; + use crate::log::init_log; pub struct IndexWriterWrapper { @@ -98,7 +99,74 @@ impl IndexWriterWrapper { .unwrap(); } - pub fn finish(mut self) { + pub fn add_multi_i8s(&mut self, datas: &[i8]) { + let mut document = Document::default(); + for data in datas { + document.add_field_value(self.field, *data as i64); + } + self.index_writer.add_document(document).unwrap(); + } + + pub fn add_multi_i16s(&mut self, datas: &[i16]) { + let mut document = Document::default(); + for data in datas { + document.add_field_value(self.field, *data as i64); + } + self.index_writer.add_document(document).unwrap(); + } + + pub fn add_multi_i32s(&mut self, datas: &[i32]) { + let mut document = Document::default(); + for data in datas { + document.add_field_value(self.field, *data as i64); + } + self.index_writer.add_document(document).unwrap(); + } + + pub fn add_multi_i64s(&mut self, datas: &[i64]) { + let mut document = Document::default(); + for data in datas { + document.add_field_value(self.field, *data); + } + self.index_writer.add_document(document).unwrap(); + } + + pub fn add_multi_f32s(&mut self, datas: &[f32]) { + let mut document = Document::default(); + for data in datas { + document.add_field_value(self.field, *data as f64); + } + self.index_writer.add_document(document).unwrap(); + } + + pub fn add_multi_f64s(&mut self, datas: &[f64]) { + let mut document = Document::default(); + for data in datas { + document.add_field_value(self.field, *data); + } + self.index_writer.add_document(document).unwrap(); + } + + pub fn add_multi_bools(&mut self, datas: &[bool]) { + let mut document = Document::default(); + for data in datas { + document.add_field_value(self.field, *data); + } + self.index_writer.add_document(document).unwrap(); + } + + pub fn add_multi_keywords(&mut self, datas: &[*const c_char]) { + let mut document = Document::default(); + for element in datas { + let data = unsafe { + CStr::from_ptr(*element) + }; + document.add_field_value(self.field, data.to_str().unwrap()); + } + self.index_writer.add_document(document).unwrap(); + } + + pub fn finish(self) { self.index_writer .finalize() .expect("failed to build inverted index"); diff --git a/internal/core/thirdparty/tantivy/tantivy-binding/src/index_writer_c.rs b/internal/core/thirdparty/tantivy/tantivy-binding/src/index_writer_c.rs index c8822781158e8..b13f550d7cb00 100644 --- a/internal/core/thirdparty/tantivy/tantivy-binding/src/index_writer_c.rs +++ b/internal/core/thirdparty/tantivy/tantivy-binding/src/index_writer_c.rs @@ -122,3 +122,77 @@ pub extern "C" fn tantivy_index_add_keyword(ptr: *mut c_void, s: *const c_char) let c_str = unsafe { CStr::from_ptr(s) }; unsafe { (*real).add_keyword(c_str.to_str().unwrap()) } } + +// --------------------------------------------- array ------------------------------------------ + +#[no_mangle] +pub extern "C" fn tantivy_index_add_multi_int8s(ptr: *mut c_void, array: *const i8, len: usize) { + let real = ptr as *mut IndexWriterWrapper; + unsafe { + let arr = slice::from_raw_parts(array, len); + (*real).add_multi_i8s(arr) + } +} + +#[no_mangle] +pub extern "C" fn tantivy_index_add_multi_int16s(ptr: *mut c_void, array: *const i16, len: usize) { + let real = ptr as *mut IndexWriterWrapper; + unsafe { + let arr = slice::from_raw_parts(array, len) ; + (*real).add_multi_i16s(arr); + } +} + +#[no_mangle] +pub extern "C" fn tantivy_index_add_multi_int32s(ptr: *mut c_void, array: *const i32, len: usize) { + let real = ptr as *mut IndexWriterWrapper; + unsafe { + let arr = slice::from_raw_parts(array, len) ; + (*real).add_multi_i32s(arr); + } +} + +#[no_mangle] +pub extern "C" fn tantivy_index_add_multi_int64s(ptr: *mut c_void, array: *const i64, len: usize) { + let real = ptr as *mut IndexWriterWrapper; + unsafe { + let arr = slice::from_raw_parts(array, len) ; + (*real).add_multi_i64s(arr); + } +} + +#[no_mangle] +pub extern "C" fn tantivy_index_add_multi_f32s(ptr: *mut c_void, array: *const f32, len: usize) { + let real = ptr as *mut IndexWriterWrapper; + unsafe { + let arr = slice::from_raw_parts(array, len) ; + (*real).add_multi_f32s(arr); + } +} + +#[no_mangle] +pub extern "C" fn tantivy_index_add_multi_f64s(ptr: *mut c_void, array: *const f64, len: usize) { + let real = ptr as *mut IndexWriterWrapper; + unsafe { + let arr = slice::from_raw_parts(array, len) ; + (*real).add_multi_f64s(arr); + } +} + +#[no_mangle] +pub extern "C" fn tantivy_index_add_multi_bools(ptr: *mut c_void, array: *const bool, len: usize) { + let real = ptr as *mut IndexWriterWrapper; + unsafe { + let arr = slice::from_raw_parts(array, len) ; + (*real).add_multi_bools(arr); + } +} + +#[no_mangle] +pub extern "C" fn tantivy_index_add_multi_keywords(ptr: *mut c_void, array: *const *const c_char, len: usize) { + let real = ptr as *mut IndexWriterWrapper; + unsafe { + let arr = slice::from_raw_parts(array, len); + (*real).add_multi_keywords(arr) + } +} diff --git a/internal/core/thirdparty/tantivy/tantivy-binding/src/lib.rs b/internal/core/thirdparty/tantivy/tantivy-binding/src/lib.rs index aa069cb3b32b6..c6193de3f6908 100644 --- a/internal/core/thirdparty/tantivy/tantivy-binding/src/lib.rs +++ b/internal/core/thirdparty/tantivy/tantivy-binding/src/lib.rs @@ -10,6 +10,7 @@ mod log; mod util; mod util_c; mod vec_collector; +mod demo_c; pub fn add(left: usize, right: usize) -> usize { left + right diff --git a/internal/core/thirdparty/tantivy/tantivy-wrapper.h b/internal/core/thirdparty/tantivy/tantivy-wrapper.h index 358f14ea49ed0..3076f502aee21 100644 --- a/internal/core/thirdparty/tantivy/tantivy-wrapper.h +++ b/internal/core/thirdparty/tantivy/tantivy-wrapper.h @@ -1,5 +1,7 @@ #include #include +#include +#include #include "tantivy-binding.h" namespace milvus::tantivy { @@ -49,6 +51,15 @@ struct RustArrayWrapper { std::cout << ss.str() << std::endl; } + std::set + to_set() { + std::set s; + for (int i = 0; i < array_.len; i++) { + s.insert(array_.array[i]); + } + return s; + } + RustArray array_; private: @@ -186,6 +197,60 @@ struct TantivyIndexWrapper { typeid(T).name()); } + template + void + add_multi_data(const T* array, uintptr_t len) { + assert(!finished_); + + if constexpr (std::is_same_v) { + tantivy_index_add_multi_bools(writer_, array, len); + return; + } + + if constexpr (std::is_same_v) { + tantivy_index_add_multi_int8s(writer_, array, len); + return; + } + + if constexpr (std::is_same_v) { + tantivy_index_add_multi_int16s(writer_, array, len); + return; + } + + if constexpr (std::is_same_v) { + tantivy_index_add_multi_int32s(writer_, array, len); + return; + } + + if constexpr (std::is_same_v) { + tantivy_index_add_multi_int64s(writer_, array, len); + return; + } + + if constexpr (std::is_same_v) { + tantivy_index_add_multi_f32s(writer_, array, len); + return; + } + + if constexpr (std::is_same_v) { + tantivy_index_add_multi_f64s(writer_, array, len); + return; + } + + if constexpr (std::is_same_v) { + std::vector views; + for (uintptr_t i = 0; i < len; i++) { + views.push_back(array[i].c_str()); + } + tantivy_index_add_multi_keywords(writer_, views.data(), len); + return; + } + + throw fmt::format( + "InvertedIndex.add_multi_data: unsupported data type: {}", + typeid(T).name()); + } + inline void finish() { if (!finished_) { diff --git a/internal/core/thirdparty/tantivy/test.cpp b/internal/core/thirdparty/tantivy/test.cpp index 1c67a69673a5c..602ea3449f0a2 100644 --- a/internal/core/thirdparty/tantivy/test.cpp +++ b/internal/core/thirdparty/tantivy/test.cpp @@ -200,6 +200,77 @@ test_32717() { } } +template +std::map> +build_inverted_index(const std::vector>& vec_of_array) { + std::map> inverted_index; + for (uint32_t i = 0; i < vec_of_array.size(); i++) { + for (const auto& term : vec_of_array[i]) { + inverted_index[term].insert(i); + } + } + return inverted_index; +} + +void +test_array_int() { + using T = int64_t; + + auto path = "/tmp/inverted-index/test-binding/"; + boost::filesystem::remove_all(path); + boost::filesystem::create_directories(path); + auto w = TantivyIndexWrapper("test_field_name", guess_data_type(), path); + + std::vector> vec_of_array{ + {10, 40, 50}, + {20, 50}, + {10, 50, 60}, + }; + + for (const auto& arr : vec_of_array) { + w.add_multi_data(arr.data(), arr.size()); + } + w.finish(); + + assert(w.count() == vec_of_array.size()); + + auto inverted_index = build_inverted_index(vec_of_array); + for (const auto& [term, posting_list] : inverted_index) { + auto hits = w.term_query(term).to_set(); + assert(posting_list == hits); + } +} + +void +test_array_string() { + using T = std::string; + + auto path = "/tmp/inverted-index/test-binding/"; + boost::filesystem::remove_all(path); + boost::filesystem::create_directories(path); + auto w = + TantivyIndexWrapper("test_field_name", TantivyDataType::Keyword, path); + + std::vector> vec_of_array{ + {"10", "40", "50"}, + {"20", "50"}, + {"10", "50", "60"}, + }; + + for (const auto& arr : vec_of_array) { + w.add_multi_data(arr.data(), arr.size()); + } + w.finish(); + + assert(w.count() == vec_of_array.size()); + + auto inverted_index = build_inverted_index(vec_of_array); + for (const auto& [term, posting_list] : inverted_index) { + auto hits = w.term_query(term).to_set(); + assert(posting_list == hits); + } +} + int main(int argc, char* argv[]) { test_32717(); @@ -216,5 +287,8 @@ main(int argc, char* argv[]) { run(); + test_array_int(); + test_array_string(); + return 0; } diff --git a/internal/core/unittest/CMakeLists.txt b/internal/core/unittest/CMakeLists.txt index be78b2b36c43b..7abde651f3187 100644 --- a/internal/core/unittest/CMakeLists.txt +++ b/internal/core/unittest/CMakeLists.txt @@ -32,7 +32,7 @@ set(MILVUS_TEST_FILES test_growing.cpp test_growing_index.cpp test_indexing.cpp - test_bitmap_index.cpp + test_hybrid_index.cpp test_index_c_api.cpp test_index_wrapper.cpp test_init.cpp diff --git a/internal/core/unittest/test_expr.cpp b/internal/core/unittest/test_expr.cpp index efeae58f78e4a..339c92955b909 100644 --- a/internal/core/unittest/test_expr.cpp +++ b/internal/core/unittest/test_expr.cpp @@ -10,12 +10,14 @@ // or implied. See the License for the specific language governing permissions and limitations under the License #include +#include #include #include #include #include #include #include +#include #include "common/Json.h" #include "common/Types.h" @@ -35,6 +37,8 @@ #include "exec/expression/Expr.h" #include "exec/Task.h" #include "expr/ITypeExpr.h" +#include "index/BitmapIndex.h" +#include "index/InvertedIndexTantivy.h" using namespace milvus; using namespace milvus::query; @@ -1271,7 +1275,7 @@ TEST(Expr, TestExprPerformance) { {DataType::DOUBLE, double_fid}}; auto seg = CreateSealedSegment(schema); - int N = 1000000; + int N = 10000; auto raw_data = DataGen(schema, N); // load field data @@ -1678,7 +1682,7 @@ TEST_P(ExprTest, TestSealedSegmentGetBatchSize) { schema->set_primary_field_id(str1_fid); auto seg = CreateSealedSegment(schema); - int N = 1000000; + int N = 100000; auto raw_data = DataGen(schema, N); // load field data auto fields = schema->get_fields(); @@ -1739,7 +1743,7 @@ TEST_P(ExprTest, TestGrowingSegmentGetBatchSize) { schema->set_primary_field_id(str1_fid); auto seg = CreateGrowingSegment(schema, empty_index_meta); - int N = 1000000; + int N = 10000; auto raw_data = DataGen(schema, N); seg->PreInsert(N); seg->Insert(0, @@ -1804,7 +1808,7 @@ TEST_P(ExprTest, TestConjuctExpr) { schema->set_primary_field_id(str1_fid); auto seg = CreateSealedSegment(schema); - int N = 1000000; + int N = 10000; auto raw_data = DataGen(schema, N); // load field data auto fields = schema->get_fields(); @@ -1871,7 +1875,7 @@ TEST_P(ExprTest, TestUnaryBenchTest) { schema->set_primary_field_id(str1_fid); auto seg = CreateSealedSegment(schema); - int N = 1000000; + int N = 10000; auto raw_data = DataGen(schema, N); // load field data @@ -1942,7 +1946,7 @@ TEST_P(ExprTest, TestBinaryRangeBenchTest) { schema->set_primary_field_id(str1_fid); auto seg = CreateSealedSegment(schema); - int N = 1000000; + int N = 10000; auto raw_data = DataGen(schema, N); // load field data @@ -2022,7 +2026,7 @@ TEST_P(ExprTest, TestLogicalUnaryBenchTest) { schema->set_primary_field_id(str1_fid); auto seg = CreateSealedSegment(schema); - int N = 1000000; + int N = 10000; auto raw_data = DataGen(schema, N); // load field data @@ -2096,7 +2100,7 @@ TEST_P(ExprTest, TestBinaryLogicalBenchTest) { schema->set_primary_field_id(str1_fid); auto seg = CreateSealedSegment(schema); - int N = 1000000; + int N = 10000; auto raw_data = DataGen(schema, N); // load field data @@ -2180,7 +2184,7 @@ TEST_P(ExprTest, TestBinaryArithOpEvalRangeBenchExpr) { schema->set_primary_field_id(str1_fid); auto seg = CreateSealedSegment(schema); - int N = 1000000; + int N = 10000; auto raw_data = DataGen(schema, N); // load field data @@ -2263,7 +2267,7 @@ TEST_P(ExprTest, TestCompareExprBenchTest) { schema->set_primary_field_id(str1_fid); auto seg = CreateSealedSegment(schema); - int N = 1000000; + int N = 10000; auto raw_data = DataGen(schema, N); // load field data @@ -2333,7 +2337,7 @@ TEST_P(ExprTest, TestRefactorExprs) { schema->set_primary_field_id(str1_fid); auto seg = CreateSealedSegment(schema); - int N = 1000000; + int N = 10000; auto raw_data = DataGen(schema, N); // load field data diff --git a/internal/core/unittest/test_bitmap_index.cpp b/internal/core/unittest/test_hybrid_index.cpp similarity index 63% rename from internal/core/unittest/test_bitmap_index.cpp rename to internal/core/unittest/test_hybrid_index.cpp index 99d877d744587..42087199300df 100644 --- a/internal/core/unittest/test_bitmap_index.cpp +++ b/internal/core/unittest/test_hybrid_index.cpp @@ -17,6 +17,7 @@ #include "common/Tracer.h" #include "index/BitmapIndex.h" +#include "index/HybridScalarIndex.h" #include "storage/Util.h" #include "storage/InsertData.h" #include "indexbuilder/IndexFactory.h" @@ -60,7 +61,7 @@ GenerateData(const size_t size, const size_t cardinality) { } template -class BitmapIndexTest : public testing::Test { +class HybridIndexTestV1 : public testing::Test { protected: void Init(int64_t collection_id, @@ -88,7 +89,8 @@ class BitmapIndexTest : public testing::Test { auto serialized_bytes = insert_data.Serialize(storage::Remote); - auto log_path = fmt::format("{}/{}/{}/{}/{}", + auto log_path = fmt::format("/{}/{}/{}/{}/{}/{}", + "/tmp/test_hybrid/", collection_id, partition_id, segment_id, @@ -103,6 +105,7 @@ class BitmapIndexTest : public testing::Test { Config config; config["index_type"] = milvus::index::BITMAP_INDEX_TYPE; config["insert_files"] = std::vector{log_path}; + config["bitmap_cardinality_limit"] = "1000"; auto build_index = indexbuilder::IndexFactory::GetInstance().CreateIndex( @@ -125,10 +128,14 @@ class BitmapIndexTest : public testing::Test { index_->Load(milvus::tracer::TraceContext{}, config); } - void - SetUp() override { + virtual void + SetParam() { nb_ = 10000; cardinality_ = 30; + } + void + SetUp() override { + SetParam(); if constexpr (std::is_same_v) { type_ = DataType::INT8; @@ -162,7 +169,7 @@ class BitmapIndexTest : public testing::Test { index_version); } - virtual ~BitmapIndexTest() override { + virtual ~HybridIndexTestV1() override { boost::filesystem::remove_all(chunk_manager_->GetRootPath()); } @@ -176,7 +183,8 @@ class BitmapIndexTest : public testing::Test { test_data.push_back(data_[i]); s.insert(data_[i]); } - auto index_ptr = dynamic_cast*>(index_.get()); + auto index_ptr = + dynamic_cast*>(index_.get()); auto bitset = index_ptr->In(test_data.size(), test_data.data()); for (size_t i = 0; i < bitset.size(); i++) { ASSERT_EQ(bitset[i], s.find(data_[i]) != s.end()); @@ -192,7 +200,8 @@ class BitmapIndexTest : public testing::Test { test_data.push_back(data_[i]); s.insert(data_[i]); } - auto index_ptr = dynamic_cast*>(index_.get()); + auto index_ptr = + dynamic_cast*>(index_.get()); auto bitset = index_ptr->NotIn(test_data.size(), test_data.data()); for (size_t i = 0; i < bitset.size(); i++) { ASSERT_EQ(bitset[i], s.find(data_[i]) == s.end()); @@ -219,7 +228,7 @@ class BitmapIndexTest : public testing::Test { }; for (const auto& [test_value, op, ref] : test_cases) { auto index_ptr = - dynamic_cast*>(index_.get()); + dynamic_cast*>(index_.get()); auto bitset = index_ptr->Range(test_value, op); for (size_t i = 0; i < bitset.size(); i++) { auto ans = bitset[i]; @@ -232,8 +241,65 @@ class BitmapIndexTest : public testing::Test { } } - private: - std::shared_ptr chunk_manager_; + void + TestRangeCompareFunc() { + if constexpr (!std::is_same_v) { + using RefFunc = std::function; + struct TestParam { + int64_t lower_val; + int64_t upper_val; + bool lower_inclusive; + bool upper_inclusive; + RefFunc ref; + }; + std::vector test_cases = { + { + 10, + 30, + false, + false, + [&](int64_t i) { return 10 < data_[i] && data_[i] < 30; }, + }, + { + 10, + 30, + true, + false, + [&](int64_t i) { return 10 <= data_[i] && data_[i] < 30; }, + }, + { + 10, + 30, + true, + true, + [&](int64_t i) { return 10 <= data_[i] && data_[i] <= 30; }, + }, + { + 10, + 30, + false, + true, + [&](int64_t i) { return 10 < data_[i] && data_[i] <= 30; }, + }}; + + for (const auto& test_case : test_cases) { + auto index_ptr = + dynamic_cast*>(index_.get()); + auto bitset = index_ptr->Range(test_case.lower_val, + test_case.lower_inclusive, + test_case.upper_val, + test_case.upper_inclusive); + for (size_t i = 0; i < bitset.size(); i++) { + auto ans = bitset[i]; + auto should = test_case.ref(i); + ASSERT_EQ(ans, should) + << "lower:" << test_case.lower_val + << "upper:" << test_case.upper_val << ", @" << i + << ", ans: " << ans << ", ref: " << should; + } + } + } + } public: IndexBasePtr index_; @@ -241,34 +307,92 @@ class BitmapIndexTest : public testing::Test { size_t nb_; size_t cardinality_; boost::container::vector data_; + std::shared_ptr chunk_manager_; +}; + +TYPED_TEST_SUITE_P(HybridIndexTestV1); + +TYPED_TEST_P(HybridIndexTestV1, CountFuncTest) { + auto count = this->index_->Count(); + EXPECT_EQ(count, this->nb_); +} + +TYPED_TEST_P(HybridIndexTestV1, INFuncTest) { + this->TestInFunc(); +} + +TYPED_TEST_P(HybridIndexTestV1, NotINFuncTest) { + this->TestNotInFunc(); +} + +TYPED_TEST_P(HybridIndexTestV1, CompareValFuncTest) { + this->TestCompareValueFunc(); +} + +TYPED_TEST_P(HybridIndexTestV1, TestRangeCompareFuncTest) { + this->TestRangeCompareFunc(); +} + +using BitmapType = + testing::Types; + +REGISTER_TYPED_TEST_SUITE_P(HybridIndexTestV1, + CountFuncTest, + INFuncTest, + NotINFuncTest, + CompareValFuncTest, + TestRangeCompareFuncTest); + +INSTANTIATE_TYPED_TEST_SUITE_P(HybridIndexE2ECheck_LowCardinality, + HybridIndexTestV1, + BitmapType); + +template +class HybridIndexTestV2 : public HybridIndexTestV1 { + public: + virtual void + SetParam() override { + this->nb_ = 10000; + this->cardinality_ = 2000; + } + + virtual ~HybridIndexTestV2() { + } }; -TYPED_TEST_SUITE_P(BitmapIndexTest); +TYPED_TEST_SUITE_P(HybridIndexTestV2); -TYPED_TEST_P(BitmapIndexTest, CountFuncTest) { +TYPED_TEST_P(HybridIndexTestV2, CountFuncTest) { auto count = this->index_->Count(); EXPECT_EQ(count, this->nb_); } -TYPED_TEST_P(BitmapIndexTest, INFuncTest) { +TYPED_TEST_P(HybridIndexTestV2, INFuncTest) { this->TestInFunc(); } -TYPED_TEST_P(BitmapIndexTest, NotINFuncTest) { +TYPED_TEST_P(HybridIndexTestV2, NotINFuncTest) { this->TestNotInFunc(); } -TYPED_TEST_P(BitmapIndexTest, CompareValFuncTest) { +TYPED_TEST_P(HybridIndexTestV2, CompareValFuncTest) { this->TestCompareValueFunc(); } +TYPED_TEST_P(HybridIndexTestV2, TestRangeCompareFuncTest) { + this->TestRangeCompareFunc(); +} + using BitmapType = testing::Types; -REGISTER_TYPED_TEST_SUITE_P(BitmapIndexTest, +REGISTER_TYPED_TEST_SUITE_P(HybridIndexTestV2, CountFuncTest, INFuncTest, NotINFuncTest, - CompareValFuncTest); + CompareValFuncTest, + TestRangeCompareFuncTest); -INSTANTIATE_TYPED_TEST_SUITE_P(BitmapE2ECheck, BitmapIndexTest, BitmapType); +INSTANTIATE_TYPED_TEST_SUITE_P(HybridIndexE2ECheck_HighCardinality, + HybridIndexTestV2, + BitmapType); diff --git a/internal/core/unittest/test_index_wrapper.cpp b/internal/core/unittest/test_index_wrapper.cpp index 39f6841957dc4..79581bc96947b 100644 --- a/internal/core/unittest/test_index_wrapper.cpp +++ b/internal/core/unittest/test_index_wrapper.cpp @@ -23,7 +23,7 @@ using namespace milvus; using namespace milvus::segcore; -using namespace milvus::proto::indexcgo; +using namespace milvus::proto; using Param = std::pair; diff --git a/internal/core/unittest/test_indexing.cpp b/internal/core/unittest/test_indexing.cpp index 4cdf94420bd19..c02f427736095 100644 --- a/internal/core/unittest/test_indexing.cpp +++ b/internal/core/unittest/test_indexing.cpp @@ -570,7 +570,7 @@ TEST_P(IndexTest, Mmap) { load_conf["mmap_filepath"] = "mmap/test_index_mmap_" + index_type; vec_index->Load(milvus::tracer::TraceContext{}, load_conf); EXPECT_EQ(vec_index->Count(), NB); - EXPECT_EQ(vec_index->GetDim(), DIM); + EXPECT_EQ(vec_index->GetDim(), is_sparse ? kTestSparseDim : DIM); milvus::SearchInfo search_info; search_info.topk_ = K; diff --git a/internal/core/unittest/test_inverted_index.cpp b/internal/core/unittest/test_inverted_index.cpp index eeddfe6e9d81a..d01813ab94e6a 100644 --- a/internal/core/unittest/test_inverted_index.cpp +++ b/internal/core/unittest/test_inverted_index.cpp @@ -32,13 +32,20 @@ auto gen_field_meta(int64_t collection_id = 1, int64_t partition_id = 2, int64_t segment_id = 3, - int64_t field_id = 101) -> storage::FieldDataMeta { - return storage::FieldDataMeta{ + int64_t field_id = 101, + DataType data_type = DataType::NONE, + DataType element_type = DataType::NONE) + -> storage::FieldDataMeta { + auto meta = storage::FieldDataMeta{ .collection_id = collection_id, .partition_id = partition_id, .segment_id = segment_id, .field_id = field_id, }; + meta.schema.set_data_type(static_cast(data_type)); + meta.schema.set_element_type( + static_cast(element_type)); + return meta; } auto @@ -86,7 +93,7 @@ struct ChunkManagerWrapper { }; } // namespace milvus::test -template +template void test_run() { int64_t collection_id = 1; @@ -96,8 +103,8 @@ test_run() { int64_t index_build_id = 1000; int64_t index_version = 10000; - auto field_meta = - test::gen_field_meta(collection_id, partition_id, segment_id, field_id); + auto field_meta = test::gen_field_meta( + collection_id, partition_id, segment_id, field_id, dtype, element_type); auto index_meta = test::gen_index_meta( segment_id, field_id, index_build_id, index_version); @@ -305,8 +312,12 @@ test_string() { int64_t index_build_id = 1000; int64_t index_version = 10000; - auto field_meta = - test::gen_field_meta(collection_id, partition_id, segment_id, field_id); + auto field_meta = test::gen_field_meta(collection_id, + partition_id, + segment_id, + field_id, + dtype, + DataType::NONE); auto index_meta = test::gen_index_meta( segment_id, field_id, index_build_id, index_version); diff --git a/internal/core/unittest/test_offset_ordered_array.cpp b/internal/core/unittest/test_offset_ordered_array.cpp index ec371c6114540..1eb2e272b0f8f 100644 --- a/internal/core/unittest/test_offset_ordered_array.cpp +++ b/internal/core/unittest/test_offset_ordered_array.cpp @@ -65,8 +65,6 @@ using TypeOfPks = testing::Types; TYPED_TEST_SUITE_P(TypedOffsetOrderedArrayTest); TYPED_TEST_P(TypedOffsetOrderedArrayTest, find_first) { - std::vector offsets; - // not sealed. ASSERT_ANY_THROW(this->map_.find_first(Unlimited, {}, true)); @@ -81,40 +79,62 @@ TYPED_TEST_P(TypedOffsetOrderedArrayTest, find_first) { this->seal(); // all is satisfied. - BitsetType all(num); - all.set(); - offsets = this->map_.find_first(num / 2, all, true); - ASSERT_EQ(num / 2, offsets.size()); - for (int i = 1; i < offsets.size(); i++) { - ASSERT_TRUE(data[offsets[i - 1]] <= data[offsets[i]]); - } - offsets = this->map_.find_first(Unlimited, all, true); - ASSERT_EQ(num, offsets.size()); - for (int i = 1; i < offsets.size(); i++) { - ASSERT_TRUE(data[offsets[i - 1]] <= data[offsets[i]]); + { + BitsetType all(num); + all.set(); + { + auto [offsets, has_more_res] = + this->map_.find_first(num / 2, all, true); + ASSERT_EQ(num / 2, offsets.size()); + ASSERT_TRUE(has_more_res); + for (int i = 1; i < offsets.size(); i++) { + ASSERT_TRUE(data[offsets[i - 1]] <= data[offsets[i]]); + } + } + { + auto [offsets, has_more_res] = + this->map_.find_first(Unlimited, all, true); + ASSERT_EQ(num, offsets.size()); + ASSERT_FALSE(has_more_res); + for (int i = 1; i < offsets.size(); i++) { + ASSERT_TRUE(data[offsets[i - 1]] <= data[offsets[i]]); + } + } } - - // corner case, segment offset exceeds the size of bitset. - BitsetType all_minus_1(num - 1); - all_minus_1.set(); - offsets = this->map_.find_first(num / 2, all_minus_1, true); - ASSERT_EQ(num / 2, offsets.size()); - for (int i = 1; i < offsets.size(); i++) { - ASSERT_TRUE(data[offsets[i - 1]] <= data[offsets[i]]); + { + // corner case, segment offset exceeds the size of bitset. + BitsetType all_minus_1(num - 1); + all_minus_1.set(); + { + auto [offsets, has_more_res] = + this->map_.find_first(num / 2, all_minus_1, true); + ASSERT_EQ(num / 2, offsets.size()); + ASSERT_TRUE(has_more_res); + for (int i = 1; i < offsets.size(); i++) { + ASSERT_TRUE(data[offsets[i - 1]] <= data[offsets[i]]); + } + } + { + auto [offsets, has_more_res] = + this->map_.find_first(Unlimited, all_minus_1, true); + ASSERT_EQ(all_minus_1.size(), offsets.size()); + ASSERT_FALSE(has_more_res); + for (int i = 1; i < offsets.size(); i++) { + ASSERT_TRUE(data[offsets[i - 1]] <= data[offsets[i]]); + } + } } - offsets = this->map_.find_first(Unlimited, all_minus_1, true); - ASSERT_EQ(all_minus_1.size(), offsets.size()); - for (int i = 1; i < offsets.size(); i++) { - ASSERT_TRUE(data[offsets[i - 1]] <= data[offsets[i]]); + { + // none is satisfied. + BitsetType none(num); + none.reset(); + auto result_pair = this->map_.find_first(num / 2, none, true); + ASSERT_EQ(0, result_pair.first.size()); + ASSERT_TRUE(result_pair.second); + result_pair = this->map_.find_first(NoLimit, none, true); + ASSERT_EQ(0, result_pair.first.size()); + ASSERT_TRUE(result_pair.second); } - - // none is satisfied. - BitsetType none(num); - none.reset(); - offsets = this->map_.find_first(num / 2, none, true); - ASSERT_EQ(0, offsets.size()); - offsets = this->map_.find_first(NoLimit, none, true); - ASSERT_EQ(0, offsets.size()); } REGISTER_TYPED_TEST_SUITE_P(TypedOffsetOrderedArrayTest, find_first); diff --git a/internal/core/unittest/test_offset_ordered_map.cpp b/internal/core/unittest/test_offset_ordered_map.cpp index be16aed9e0eed..36f4bafc83f7a 100644 --- a/internal/core/unittest/test_offset_ordered_map.cpp +++ b/internal/core/unittest/test_offset_ordered_map.cpp @@ -60,12 +60,13 @@ using TypeOfPks = testing::Types; TYPED_TEST_SUITE_P(TypedOffsetOrderedMapTest); TYPED_TEST_P(TypedOffsetOrderedMapTest, find_first) { - std::vector offsets; - // no data. - offsets = this->map_.find_first(Unlimited, {}, true); - ASSERT_EQ(0, offsets.size()); - + { + auto [offsets, has_more_res] = + this->map_.find_first(Unlimited, {}, true); + ASSERT_EQ(0, offsets.size()); + ASSERT_FALSE(has_more_res); + } // insert 10 entities. int num = 10; auto data = this->random_generate(num); @@ -76,38 +77,63 @@ TYPED_TEST_P(TypedOffsetOrderedMapTest, find_first) { // all is satisfied. BitsetType all(num); all.set(); - offsets = this->map_.find_first(num / 2, all, true); - ASSERT_EQ(num / 2, offsets.size()); - for (int i = 1; i < offsets.size(); i++) { - ASSERT_TRUE(data[offsets[i - 1]] <= data[offsets[i]]); + + { + auto [offsets, has_more_res] = + this->map_.find_first(num / 2, all, true); + ASSERT_EQ(num / 2, offsets.size()); + ASSERT_TRUE(has_more_res); + for (int i = 1; i < offsets.size(); i++) { + ASSERT_TRUE(data[offsets[i - 1]] <= data[offsets[i]]); + } } - offsets = this->map_.find_first(Unlimited, all, true); - ASSERT_EQ(num, offsets.size()); - for (int i = 1; i < offsets.size(); i++) { - ASSERT_TRUE(data[offsets[i - 1]] <= data[offsets[i]]); + { + auto [offsets, has_more_res] = + this->map_.find_first(Unlimited, all, true); + ASSERT_EQ(num, offsets.size()); + ASSERT_FALSE(has_more_res); + for (int i = 1; i < offsets.size(); i++) { + ASSERT_TRUE(data[offsets[i - 1]] <= data[offsets[i]]); + } } // corner case, segment offset exceeds the size of bitset. BitsetType all_minus_1(num - 1); all_minus_1.set(); - offsets = this->map_.find_first(num / 2, all_minus_1, true); - ASSERT_EQ(num / 2, offsets.size()); - for (int i = 1; i < offsets.size(); i++) { - ASSERT_TRUE(data[offsets[i - 1]] <= data[offsets[i]]); + { + auto [offsets, has_more_res] = + this->map_.find_first(num / 2, all_minus_1, true); + ASSERT_EQ(num / 2, offsets.size()); + ASSERT_TRUE(has_more_res); + for (int i = 1; i < offsets.size(); i++) { + ASSERT_TRUE(data[offsets[i - 1]] <= data[offsets[i]]); + } } - offsets = this->map_.find_first(Unlimited, all_minus_1, true); - ASSERT_EQ(all_minus_1.size(), offsets.size()); - for (int i = 1; i < offsets.size(); i++) { - ASSERT_TRUE(data[offsets[i - 1]] <= data[offsets[i]]); + { + auto [offsets, has_more_res] = + this->map_.find_first(Unlimited, all_minus_1, true); + ASSERT_EQ(all_minus_1.size(), offsets.size()); + ASSERT_FALSE(has_more_res); + for (int i = 1; i < offsets.size(); i++) { + ASSERT_TRUE(data[offsets[i - 1]] <= data[offsets[i]]); + } } // none is satisfied. BitsetType none(num); none.reset(); - offsets = this->map_.find_first(num / 2, none, true); - ASSERT_EQ(0, offsets.size()); - offsets = this->map_.find_first(NoLimit, none, true); - ASSERT_EQ(0, offsets.size()); + { + auto [offsets, has_more_res] = + this->map_.find_first(num / 2, none, true); + ASSERT_TRUE(has_more_res); + ASSERT_EQ(0, offsets.size()); + } + { + auto [offsets, has_more_res] = + this->map_.find_first(NoLimit, none, true); + ASSERT_TRUE(has_more_res); + ASSERT_EQ(0, offsets.size()); + } } REGISTER_TYPED_TEST_SUITE_P(TypedOffsetOrderedMapTest, find_first); diff --git a/internal/core/unittest/test_scalar_index.cpp b/internal/core/unittest/test_scalar_index.cpp index 2fc943b57b505..9a99bec26a272 100644 --- a/internal/core/unittest/test_scalar_index.cpp +++ b/internal/core/unittest/test_scalar_index.cpp @@ -15,7 +15,11 @@ #include "gtest/gtest-typed-test.h" #include "index/IndexFactory.h" +#include "index/BitmapIndex.h" +#include "index/InvertedIndexTantivy.h" +#include "index/ScalarIndex.h" #include "common/CDataType.h" +#include "common/Types.h" #include "knowhere/comp/index_param.h" #include "test_utils/indexbuilder_test_utils.h" #include "test_utils/AssertUtils.h" @@ -49,6 +53,14 @@ TYPED_TEST_P(TypedScalarIndexTest, Dummy) { std::cout << milvus::GetDType() << std::endl; } +auto +GetTempFileManagerCtx(CDataType data_type) { + auto ctx = milvus::storage::FileManagerContext(); + ctx.fieldDataMeta.schema.set_data_type( + static_cast(data_type)); + return ctx; +} + TYPED_TEST_P(TypedScalarIndexTest, Constructor) { using T = TypeParam; auto dtype = milvus::GetDType(); @@ -59,7 +71,7 @@ TYPED_TEST_P(TypedScalarIndexTest, Constructor) { create_index_info.index_type = index_type; auto index = milvus::index::IndexFactory::GetInstance().CreateScalarIndex( - create_index_info); + create_index_info, GetTempFileManagerCtx(dtype)); } } @@ -73,7 +85,7 @@ TYPED_TEST_P(TypedScalarIndexTest, Count) { create_index_info.index_type = index_type; auto index = milvus::index::IndexFactory::GetInstance().CreateScalarIndex( - create_index_info); + create_index_info, GetTempFileManagerCtx(dtype)); auto scalar_index = dynamic_cast*>(index.get()); auto arr = GenSortedArr(nb); @@ -92,7 +104,7 @@ TYPED_TEST_P(TypedScalarIndexTest, HasRawData) { create_index_info.index_type = index_type; auto index = milvus::index::IndexFactory::GetInstance().CreateScalarIndex( - create_index_info); + create_index_info, GetTempFileManagerCtx(dtype)); auto scalar_index = dynamic_cast*>(index.get()); auto arr = GenSortedArr(nb); @@ -112,7 +124,7 @@ TYPED_TEST_P(TypedScalarIndexTest, In) { create_index_info.index_type = index_type; auto index = milvus::index::IndexFactory::GetInstance().CreateScalarIndex( - create_index_info); + create_index_info, GetTempFileManagerCtx(dtype)); auto scalar_index = dynamic_cast*>(index.get()); auto arr = GenSortedArr(nb); @@ -131,7 +143,7 @@ TYPED_TEST_P(TypedScalarIndexTest, NotIn) { create_index_info.index_type = index_type; auto index = milvus::index::IndexFactory::GetInstance().CreateScalarIndex( - create_index_info); + create_index_info, GetTempFileManagerCtx(dtype)); auto scalar_index = dynamic_cast*>(index.get()); auto arr = GenSortedArr(nb); @@ -150,7 +162,7 @@ TYPED_TEST_P(TypedScalarIndexTest, Reverse) { create_index_info.index_type = index_type; auto index = milvus::index::IndexFactory::GetInstance().CreateScalarIndex( - create_index_info); + create_index_info, GetTempFileManagerCtx(dtype)); auto scalar_index = dynamic_cast*>(index.get()); auto arr = GenSortedArr(nb); @@ -169,7 +181,7 @@ TYPED_TEST_P(TypedScalarIndexTest, Range) { create_index_info.index_type = index_type; auto index = milvus::index::IndexFactory::GetInstance().CreateScalarIndex( - create_index_info); + create_index_info, GetTempFileManagerCtx(dtype)); auto scalar_index = dynamic_cast*>(index.get()); auto arr = GenSortedArr(nb); @@ -188,7 +200,7 @@ TYPED_TEST_P(TypedScalarIndexTest, Codec) { create_index_info.index_type = index_type; auto index = milvus::index::IndexFactory::GetInstance().CreateScalarIndex( - create_index_info); + create_index_info, GetTempFileManagerCtx(dtype)); auto scalar_index = dynamic_cast*>(index.get()); auto arr = GenSortedArr(nb); @@ -197,7 +209,7 @@ TYPED_TEST_P(TypedScalarIndexTest, Codec) { auto binary_set = index->Serialize(nullptr); auto copy_index = milvus::index::IndexFactory::GetInstance().CreateScalarIndex( - create_index_info); + create_index_info, GetTempFileManagerCtx(dtype)); copy_index->Load(binary_set); auto copy_scalar_index = @@ -368,12 +380,18 @@ TYPED_TEST_P(TypedScalarIndexTestV2, Base) { auto space = TestSpace(temp_path, vec_size, dataset, scalars); milvus::storage::FileManagerContext file_manager_context( {}, {.field_name = "scalar"}, chunk_manager, space); + file_manager_context.fieldDataMeta.schema.set_data_type( + static_cast(dtype)); auto index = milvus::index::IndexFactory::GetInstance().CreateScalarIndex( create_index_info, file_manager_context, space); auto scalar_index = dynamic_cast*>(index.get()); - scalar_index->BuildV2(); + milvus::Config config; + if (index_type == "BITMAP") { + config["bitmap_cardinality_limit"] = "1000"; + } + scalar_index->BuildV2(config); scalar_index->UploadV2(); auto new_index = @@ -391,3 +409,260 @@ REGISTER_TYPED_TEST_SUITE_P(TypedScalarIndexTestV2, Base); INSTANTIATE_TYPED_TEST_SUITE_P(ArithmeticCheck, TypedScalarIndexTestV2, ScalarT); + +using namespace milvus::index; +template +std::vector +GenerateRawData(int N, int cardinality) { + using std::vector; + std::default_random_engine random(60); + std::normal_distribution<> distr(0, 1); + vector data(N); + for (auto& x : data) { + x = random() % (cardinality); + } + return data; +} + +template <> +std::vector +GenerateRawData(int N, int cardinality) { + using std::vector; + std::default_random_engine random(60); + std::normal_distribution<> distr(0, 1); + vector data(N); + for (auto& x : data) { + x = std::to_string(random() % (cardinality)); + } + return data; +} + +template +IndexBasePtr +TestBuildIndex(int N, int cardinality, int index_type) { + auto raw_data = GenerateRawData(N, cardinality); + if (index_type == 0) { + auto index = std::make_unique>(); + index->Build(N, raw_data.data()); + return std::move(index); + } else if (index_type == 1) { + if constexpr (std::is_same_v) { + auto index = std::make_unique(); + index->Build(N, raw_data.data()); + return std::move(index); + } + auto index = milvus::index::CreateScalarIndexSort(); + index->Build(N, raw_data.data()); + return std::move(index); + } +} + +template +void +TestIndexSearchIn() { + // low data cardinality + { + int N = 1000; + std::vector data_cardinality = {10, 20, 100}; + for (auto& card : data_cardinality) { + auto bitmap_index = TestBuildIndex(N, card, 0); + auto bitmap_index_ptr = + dynamic_cast*>(bitmap_index.get()); + auto sort_index = TestBuildIndex(N, card, 1); + auto sort_index_ptr = + dynamic_cast*>(sort_index.get()); + std::vector terms; + for (int i = 0; i < 10; i++) { + terms.push_back(static_cast(i)); + } + auto final1 = bitmap_index_ptr->In(10, terms.data()); + auto final2 = sort_index_ptr->In(10, terms.data()); + EXPECT_EQ(final1.size(), final2.size()); + for (int i = 0; i < final1.size(); i++) { + EXPECT_EQ(final1[i], final2[i]); + } + + auto final3 = bitmap_index_ptr->NotIn(10, terms.data()); + auto final4 = sort_index_ptr->NotIn(10, terms.data()); + EXPECT_EQ(final4.size(), final3.size()); + for (int i = 0; i < final3.size(); i++) { + EXPECT_EQ(final3[i], final4[i]); + } + } + } + + // high data cardinality + { + int N = 10000; + std::vector data_cardinality = {1001, 2000}; + for (auto& card : data_cardinality) { + auto bitmap_index = TestBuildIndex(N, card, 0); + auto bitmap_index_ptr = + dynamic_cast*>(bitmap_index.get()); + auto sort_index = TestBuildIndex(N, card, 1); + auto sort_index_ptr = + dynamic_cast*>(sort_index.get()); + std::vector terms; + for (int i = 0; i < 10; i++) { + terms.push_back(static_cast(i)); + } + auto final1 = bitmap_index_ptr->In(10, terms.data()); + auto final2 = sort_index_ptr->In(10, terms.data()); + EXPECT_EQ(final1.size(), final2.size()); + for (int i = 0; i < final1.size(); i++) { + EXPECT_EQ(final1[i], final2[i]); + } + + auto final3 = bitmap_index_ptr->NotIn(10, terms.data()); + auto final4 = sort_index_ptr->NotIn(10, terms.data()); + EXPECT_EQ(final4.size(), final3.size()); + for (int i = 0; i < final3.size(); i++) { + EXPECT_EQ(final3[i], final4[i]); + } + } + } +} + +template <> +void +TestIndexSearchIn() { + // low data cardinality + { + int N = 1000; + std::vector data_cardinality = {10, 20, 100}; + for (auto& card : data_cardinality) { + auto bitmap_index = TestBuildIndex(N, card, 0); + auto bitmap_index_ptr = + dynamic_cast*>(bitmap_index.get()); + auto sort_index = TestBuildIndex(N, card, 1); + auto sort_index_ptr = + dynamic_cast*>(sort_index.get()); + std::vector terms; + for (int i = 0; i < 10; i++) { + terms.push_back(std::to_string(i)); + } + auto final1 = bitmap_index_ptr->In(10, terms.data()); + auto final2 = sort_index_ptr->In(10, terms.data()); + EXPECT_EQ(final1.size(), final2.size()); + for (int i = 0; i < final1.size(); i++) { + EXPECT_EQ(final1[i], final2[i]); + } + + auto final3 = bitmap_index_ptr->NotIn(10, terms.data()); + auto final4 = sort_index_ptr->NotIn(10, terms.data()); + EXPECT_EQ(final4.size(), final3.size()); + for (int i = 0; i < final3.size(); i++) { + EXPECT_EQ(final3[i], final4[i]); + } + } + } + // high data cardinality + { + int N = 10000; + std::vector data_cardinality = {1001, 2000}; + for (auto& card : data_cardinality) { + auto bitmap_index = TestBuildIndex(N, card, 0); + auto bitmap_index_ptr = + dynamic_cast*>(bitmap_index.get()); + auto sort_index = TestBuildIndex(N, card, 1); + auto sort_index_ptr = + dynamic_cast*>(sort_index.get()); + std::vector terms; + for (int i = 0; i < 10; i++) { + terms.push_back(std::to_string(i)); + } + auto final1 = bitmap_index_ptr->In(10, terms.data()); + auto final2 = sort_index_ptr->In(10, terms.data()); + EXPECT_EQ(final1.size(), final2.size()); + for (int i = 0; i < final1.size(); i++) { + EXPECT_EQ(final1[i], final2[i]); + } + + auto final3 = bitmap_index_ptr->NotIn(10, terms.data()); + auto final4 = sort_index_ptr->NotIn(10, terms.data()); + EXPECT_EQ(final4.size(), final3.size()); + for (int i = 0; i < final3.size(); i++) { + EXPECT_EQ(final3[i], final4[i]); + } + } + } +} + +TEST(ScalarTest, test_function_In) { + TestIndexSearchIn(); + TestIndexSearchIn(); + TestIndexSearchIn(); + TestIndexSearchIn(); + TestIndexSearchIn(); + TestIndexSearchIn(); + TestIndexSearchIn(); +} + +template +void +TestIndexSearchRange() { + // low data cordinality + { + int N = 1000; + std::vector data_cardinality = {10, 20, 100}; + for (auto& card : data_cardinality) { + auto bitmap_index = TestBuildIndex(N, card, 0); + auto bitmap_index_ptr = + dynamic_cast*>(bitmap_index.get()); + auto sort_index = TestBuildIndex(N, card, 1); + auto sort_index_ptr = + dynamic_cast*>(sort_index.get()); + + auto final1 = bitmap_index_ptr->Range(10, milvus::OpType::LessThan); + auto final2 = sort_index_ptr->Range(10, milvus::OpType::LessThan); + EXPECT_EQ(final1.size(), final2.size()); + for (int i = 0; i < final1.size(); i++) { + EXPECT_EQ(final1[i], final2[i]); + } + + auto final3 = bitmap_index_ptr->Range(10, true, 100, false); + auto final4 = sort_index_ptr->Range(10, true, 100, false); + EXPECT_EQ(final3.size(), final4.size()); + for (int i = 0; i < final1.size(); i++) { + EXPECT_EQ(final3[i], final4[i]); + } + } + } + + // high data cordinality + { + int N = 10000; + std::vector data_cardinality = {1001, 2000}; + for (auto& card : data_cardinality) { + auto bitmap_index = TestBuildIndex(N, card, 0); + auto bitmap_index_ptr = + dynamic_cast*>(bitmap_index.get()); + auto sort_index = TestBuildIndex(N, card, 1); + auto sort_index_ptr = + dynamic_cast*>(sort_index.get()); + + auto final1 = bitmap_index_ptr->Range(10, milvus::OpType::LessThan); + auto final2 = sort_index_ptr->Range(10, milvus::OpType::LessThan); + EXPECT_EQ(final1.size(), final2.size()); + for (int i = 0; i < final1.size(); i++) { + EXPECT_EQ(final1[i], final2[i]); + } + + auto final3 = bitmap_index_ptr->Range(10, true, 100, false); + auto final4 = sort_index_ptr->Range(10, true, 100, false); + EXPECT_EQ(final3.size(), final4.size()); + for (int i = 0; i < final1.size(); i++) { + EXPECT_EQ(final3[i], final4[i]); + } + } + } +} + +TEST(ScalarTest, test_function_range) { + TestIndexSearchRange(); + TestIndexSearchRange(); + TestIndexSearchRange(); + TestIndexSearchRange(); + TestIndexSearchRange(); + TestIndexSearchRange(); +} diff --git a/internal/core/unittest/test_utils/DataGen.h b/internal/core/unittest/test_utils/DataGen.h index 7566c63757eda..283ccbec3c7db 100644 --- a/internal/core/unittest/test_utils/DataGen.h +++ b/internal/core/unittest/test_utils/DataGen.h @@ -259,6 +259,18 @@ GenerateRandomSparseFloatVector(size_t rows, std::vector> data(rows); + // ensure the actual dim of the entire generated dataset is cols. + data[0][cols - 1] = real_distrib(rng); + --num_elements; + + // Ensure each row has at least one non-zero value + for (size_t i = 0; i < rows; ++i) { + auto col = col_distrib(rng); + float val = real_distrib(rng); + data[i][col] = val; + } + num_elements -= rows; + for (int32_t i = 0; i < num_elements; ++i) { auto row = row_distrib(rng); while (data[row].size() == (size_t)cols) { diff --git a/internal/datacoord/channel_manager.go b/internal/datacoord/channel_manager.go index f7ce6ea9490e9..4fa1927660195 100644 --- a/internal/datacoord/channel_manager.go +++ b/internal/datacoord/channel_manager.go @@ -494,17 +494,9 @@ func (c *ChannelManagerImpl) GetBufferChannels() *NodeChannelInfo { // GetNodeChannelsByCollectionID gets all node channels map of the collection func (c *ChannelManagerImpl) GetNodeChannelsByCollectionID(collectionID UniqueID) map[UniqueID][]string { - nodeChs := make(map[UniqueID][]string) - for _, nodeChannels := range c.GetAssignedChannels() { - var channelNames []string - for name, ch := range nodeChannels.Channels { - if ch.GetCollectionID() == collectionID { - channelNames = append(channelNames, name) - } - } - nodeChs[nodeChannels.NodeID] = channelNames - } - return nodeChs + c.mu.RLock() + defer c.mu.RUnlock() + return c.store.GetNodeChannelsByCollectionID(collectionID) } // Get all channels belong to the collection @@ -891,15 +883,6 @@ func (c *ChannelManagerImpl) GetCollectionIDByChannel(channelName string) (bool, return false, 0 } -func (c *ChannelManagerImpl) GetNodeIDByChannelName(channelName string) (UniqueID, bool) { - for _, nodeChannel := range c.GetAssignedChannels() { - if _, ok := nodeChannel.Channels[channelName]; ok { - return nodeChannel.NodeID, true - } - } - return 0, false -} - func (c *ChannelManagerImpl) GetChannel(nodeID int64, channelName string) (RWChannel, bool) { c.mu.RLock() defer c.mu.RUnlock() diff --git a/internal/datacoord/channel_manager_test.go b/internal/datacoord/channel_manager_test.go index d255e64ac9ef8..5866d638bc83a 100644 --- a/internal/datacoord/channel_manager_test.go +++ b/internal/datacoord/channel_manager_test.go @@ -34,6 +34,7 @@ import ( "github.com/milvus-io/milvus/internal/proto/datapb" "github.com/milvus-io/milvus/internal/util/dependency" "github.com/milvus-io/milvus/pkg/common" + "github.com/milvus-io/milvus/pkg/util/paramtable" ) // waitAndStore simulates DataNode's action @@ -401,8 +402,11 @@ func TestChannelManager(t *testing.T) { }() Params.Save(Params.DataCoordCfg.AutoBalance.Key, "true") - prefix := Params.CommonCfg.DataCoordWatchSubPath.GetValue() + + enableRPCK := paramtable.Get().DataCoordCfg.EnableBalanceChannelWithRPC.Key + paramtable.Get().Save(enableRPCK, "false") + defer paramtable.Get().Reset(enableRPCK) t.Run("test AddNode with avalible node", func(t *testing.T) { // Note: this test is based on the default registerPolicy defer watchkv.RemoveWithPrefix("") diff --git a/internal/datacoord/channel_manager_v2.go b/internal/datacoord/channel_manager_v2.go index 6243761ce70cd..4695395eee75b 100644 --- a/internal/datacoord/channel_manager_v2.go +++ b/internal/datacoord/channel_manager_v2.go @@ -23,6 +23,7 @@ import ( "time" "github.com/cockroachdb/errors" + "github.com/golang/protobuf/proto" "github.com/samber/lo" "go.uber.org/zap" @@ -31,6 +32,7 @@ import ( "github.com/milvus-io/milvus/pkg/log" "github.com/milvus-io/milvus/pkg/util/conc" "github.com/milvus-io/milvus/pkg/util/lock" + "github.com/milvus-io/milvus/pkg/util/merr" "github.com/milvus-io/milvus/pkg/util/typeutil" ) @@ -47,7 +49,6 @@ type ChannelManager interface { FindWatcher(channel string) (UniqueID, error) GetChannel(nodeID int64, channel string) (RWChannel, bool) - GetNodeIDByChannelName(channel string) (int64, bool) GetNodeChannelsByCollectionID(collectionID int64) map[int64][]string GetChannelsByCollectionID(collectionID int64) []RWChannel GetChannelNamesByCollectionID(collectionID int64) []string @@ -131,16 +132,19 @@ func (m *ChannelManagerImplV2) Startup(ctx context.Context, legacyNodes, allNode oNodes := m.store.GetNodes() m.mu.Unlock() - // Add new online nodes to the cluster. offLines, newOnLines := lo.Difference(oNodes, allNodes) - lo.ForEach(newOnLines, func(nodeID int64, _ int) { - m.AddNode(nodeID) - }) - // Delete offlines from the cluster - lo.ForEach(offLines, func(nodeID int64, _ int) { - m.DeleteNode(nodeID) - }) + for _, nodeID := range offLines { + if err := m.DeleteNode(nodeID); err != nil { + return err + } + } + // Add new online nodes to the cluster. + for _, nodeID := range newOnLines { + if err := m.AddNode(nodeID); err != nil { + return err + } + } m.mu.Lock() nodeChannels := m.store.GetNodeChannelsBy( @@ -347,31 +351,10 @@ func (m *ChannelManagerImplV2) GetChannel(nodeID int64, channelName string) (RWC return nil, false } -func (m *ChannelManagerImplV2) GetNodeIDByChannelName(channel string) (int64, bool) { - m.mu.RLock() - defer m.mu.RUnlock() - nodeChannels := m.store.GetNodeChannelsBy( - WithoutBufferNode(), - WithChannelName(channel)) - - if len(nodeChannels) > 0 { - return nodeChannels[0].NodeID, true - } - - return 0, false -} - func (m *ChannelManagerImplV2) GetNodeChannelsByCollectionID(collectionID int64) map[int64][]string { m.mu.RLock() defer m.mu.RUnlock() - nodeChs := make(map[UniqueID][]string) - nodeChannels := m.store.GetNodeChannelsBy( - WithoutBufferNode(), - WithCollectionIDV2(collectionID)) - lo.ForEach(nodeChannels, func(info *NodeChannelInfo, _ int) { - nodeChs[info.NodeID] = lo.Keys(info.Channels) - }) - return nodeChs + return m.store.GetNodeChannelsByCollectionID(collectionID) } func (m *ChannelManagerImplV2) GetChannelsByCollectionID(collectionID int64) []RWChannel { @@ -540,9 +523,11 @@ func (m *ChannelManagerImplV2) advanceToNotifies(ctx context.Context, toNotifies ) for _, ch := range nodeAssign.Channels { innerCh := ch + tmpWatchInfo := proto.Clone(innerCh.GetWatchInfo()).(*datapb.ChannelWatchInfo) + tmpWatchInfo.Vchan = m.h.GetDataVChanPositions(innerCh, allPartitionID) future := getOrCreateIOPool().Submit(func() (any, error) { - err := m.Notify(ctx, nodeAssign.NodeID, innerCh.GetWatchInfo()) + err := m.Notify(ctx, nodeAssign.NodeID, tmpWatchInfo) return innerCh, err }) futures = append(futures, future) @@ -654,7 +639,10 @@ func (m *ChannelManagerImplV2) Check(ctx context.Context, nodeID int64, info *da ) resp, err := m.subCluster.CheckChannelOperationProgress(ctx, nodeID, info) if err != nil { - log.Warn("Fail to check channel operation progress") + log.Warn("Fail to check channel operation progress", zap.Error(err)) + if errors.Is(err, merr.ErrNodeNotFound) { + return false, true + } return false, false } log.Info("Got channel operation progress", @@ -709,7 +697,7 @@ func (m *ChannelManagerImplV2) fillChannelWatchInfo(op *ChannelOp) error { } info := &datapb.ChannelWatchInfo{ - Vchan: vcInfo, + Vchan: reduceVChanSize(vcInfo), StartTs: startTs, State: inferStateByOpType(op.Type), Schema: ch.GetSchema(), @@ -730,3 +718,16 @@ func inferStateByOpType(opType ChannelOpType) datapb.ChannelWatchState { return datapb.ChannelWatchState_ToWatch } } + +// Clear segmentID in vChannelInfo to reduce meta size. +// About 200k segments will exceed default meta size limit, +// clear it would make meta size way smaller and support infinite segments count +// +// NOTE: all the meta and in-mem watchInfo contains partial VChanInfo that dones't include segmentIDs +// Need to recalulate and fill-in segmentIDs before notify to DataNode +func reduceVChanSize(vChan *datapb.VchannelInfo) *datapb.VchannelInfo { + vChan.DroppedSegmentIds = nil + vChan.FlushedSegmentIds = nil + vChan.UnflushedSegmentIds = nil + return vChan +} diff --git a/internal/datacoord/channel_manager_v2_test.go b/internal/datacoord/channel_manager_v2_test.go index 4bacd11399b0d..b2093b9881b0f 100644 --- a/internal/datacoord/channel_manager_v2_test.go +++ b/internal/datacoord/channel_manager_v2_test.go @@ -21,6 +21,7 @@ import ( "fmt" "testing" + "github.com/cockroachdb/errors" "github.com/golang/protobuf/proto" "github.com/samber/lo" "github.com/stretchr/testify/mock" @@ -31,6 +32,7 @@ import ( "github.com/milvus-io/milvus/internal/kv/predicates" "github.com/milvus-io/milvus/internal/proto/datapb" "github.com/milvus-io/milvus/pkg/log" + "github.com/milvus-io/milvus/pkg/util/merr" "github.com/milvus-io/milvus/pkg/util/paramtable" ) @@ -446,6 +448,29 @@ func (s *ChannelManagerSuite) TestAdvanceChannelState() { s.checkAssignment(m, 1, "ch1", Watching) s.checkAssignment(m, 1, "ch2", Watching) }) + s.Run("advance watching channels check ErrNodeNotFound", func() { + chNodes := map[string]int64{ + "ch1": 1, + "ch2": 1, + } + s.prepareMeta(chNodes, datapb.ChannelWatchState_ToWatch) + s.mockCluster.EXPECT().NotifyChannelOperation(mock.Anything, mock.Anything, mock.Anything).Return(nil).Twice() + m, err := NewChannelManagerV2(s.mockKv, s.mockHandler, s.mockCluster, s.mockAlloc) + s.Require().NoError(err) + s.checkAssignment(m, 1, "ch1", ToWatch) + s.checkAssignment(m, 1, "ch2", ToWatch) + + m.AdvanceChannelState(ctx) + s.checkAssignment(m, 1, "ch1", Watching) + s.checkAssignment(m, 1, "ch2", Watching) + + s.mockCluster.EXPECT().CheckChannelOperationProgress(mock.Anything, mock.Anything, mock.Anything). + Return(nil, merr.WrapErrNodeNotFound(1)).Twice() + m.AdvanceChannelState(ctx) + s.checkAssignment(m, 1, "ch1", Standby) + s.checkAssignment(m, 1, "ch2", Standby) + }) + s.Run("advance watching channels check watch success", func() { chNodes := map[string]int64{ "ch1": 1, @@ -517,6 +542,28 @@ func (s *ChannelManagerSuite) TestAdvanceChannelState() { s.checkAssignment(m, 1, "ch1", Releasing) s.checkAssignment(m, 1, "ch2", Releasing) }) + s.Run("advance releasing channels check ErrNodeNotFound", func() { + chNodes := map[string]int64{ + "ch1": 1, + "ch2": 1, + } + s.prepareMeta(chNodes, datapb.ChannelWatchState_ToRelease) + s.mockCluster.EXPECT().NotifyChannelOperation(mock.Anything, mock.Anything, mock.Anything).Return(nil).Twice() + m, err := NewChannelManagerV2(s.mockKv, s.mockHandler, s.mockCluster, s.mockAlloc) + s.Require().NoError(err) + s.checkAssignment(m, 1, "ch1", ToRelease) + s.checkAssignment(m, 1, "ch2", ToRelease) + + m.AdvanceChannelState(ctx) + s.checkAssignment(m, 1, "ch1", Releasing) + s.checkAssignment(m, 1, "ch2", Releasing) + + s.mockCluster.EXPECT().CheckChannelOperationProgress(mock.Anything, mock.Anything, mock.Anything). + Return(nil, merr.WrapErrNodeNotFound(1)).Twice() + m.AdvanceChannelState(ctx) + s.checkAssignment(m, 1, "ch1", Standby) + s.checkAssignment(m, 1, "ch2", Standby) + }) s.Run("advance releasing channels check release success", func() { chNodes := map[string]int64{ "ch1": 1, @@ -659,5 +706,26 @@ func (s *ChannelManagerSuite) TestStartup() { s.checkAssignment(m, 2, "ch3", ToWatch) } +func (s *ChannelManagerSuite) TestStartupRootCoordFailed() { + chNodes := map[string]int64{ + "ch1": 1, + "ch2": 1, + "ch3": 1, + "ch4": bufferID, + } + s.prepareMeta(chNodes, datapb.ChannelWatchState_ToWatch) + + s.mockAlloc = NewNMockAllocator(s.T()) + s.mockAlloc.EXPECT().allocID(mock.Anything).Return(0, errors.New("mock rootcoord failure")) + m, err := NewChannelManagerV2(s.mockKv, s.mockHandler, s.mockCluster, s.mockAlloc) + s.Require().NoError(err) + + err = m.Startup(context.TODO(), nil, []int64{2}) + s.Error(err) + + err = m.Startup(context.TODO(), nil, []int64{1, 2}) + s.Error(err) +} + func (s *ChannelManagerSuite) TestCheckLoop() {} func (s *ChannelManagerSuite) TestGet() {} diff --git a/internal/datacoord/channel_store.go b/internal/datacoord/channel_store.go index c59e626a6f846..a8b2bb7fc8d95 100644 --- a/internal/datacoord/channel_store.go +++ b/internal/datacoord/channel_store.go @@ -32,11 +32,14 @@ import ( "github.com/milvus-io/milvus/internal/proto/datapb" "github.com/milvus-io/milvus/pkg/log" "github.com/milvus-io/milvus/pkg/metrics" + "github.com/milvus-io/milvus/pkg/util/paramtable" "github.com/milvus-io/milvus/pkg/util/timerecord" "github.com/milvus-io/milvus/pkg/util/typeutil" ) // ROChannelStore is a read only channel store for channels and nodes. +// +//go:generate mockery --name=ROChannelStore --structname=ROChannelStore --output=./ --filename=mock_ro_channel_store.go --with-expecter type ROChannelStore interface { // GetNode returns the channel info of a specific node. // Returns nil if the node doesn't belong to the cluster @@ -52,12 +55,16 @@ type ROChannelStore interface { GetNodes() []int64 // GetNodeChannelCount GetNodeChannelCount(nodeID int64) int + // GetNodeChannels for given collection + GetNodeChannelsByCollectionID(collectionID UniqueID) map[UniqueID][]string // GetNodeChannelsBy used by channel_store_v2 and channel_manager_v2 only GetNodeChannelsBy(nodeSelector NodeSelector, channelSelectors ...ChannelSelector) []*NodeChannelInfo } // RWChannelStore is the read write channel store for channels and nodes. +// +//go:generate mockery --name=RWChannelStore --structname=RWChannelStore --output=./ --filename=mock_channel_store.go --with-expecter type RWChannelStore interface { ROChannelStore // Reload restores the buffer channels and node-channels mapping form kv. @@ -146,7 +153,11 @@ func (op *ChannelOp) BuildKV() (map[string]string, []string, error) { k := buildNodeChannelKey(op.NodeID, ch.GetName()) switch op.Type { case Add, Watch, Release: - info, err := proto.Marshal(ch.GetWatchInfo()) + tmpWatchInfo := proto.Clone(ch.GetWatchInfo()).(*datapb.ChannelWatchInfo) + if paramtable.Get().DataCoordCfg.EnableBalanceChannelWithRPC.GetAsBool() { + tmpWatchInfo.Vchan = reduceVChanSize(tmpWatchInfo.GetVchan()) + } + info, err := proto.Marshal(tmpWatchInfo) if err != nil { return saves, removals, err } @@ -458,6 +469,23 @@ func (c *ChannelStore) GetNodesChannels() []*NodeChannelInfo { return ret } +func (c *ChannelStore) GetNodeChannelsByCollectionID(collectionID UniqueID) map[UniqueID][]string { + nodeChs := make(map[UniqueID][]string) + for id, info := range c.channelsInfo { + if id == bufferID { + continue + } + var channelNames []string + for name, ch := range info.Channels { + if ch.GetCollectionID() == collectionID { + channelNames = append(channelNames, name) + } + } + nodeChs[id] = channelNames + } + return nodeChs +} + // GetBufferChannelInfo returns all unassigned channels. func (c *ChannelStore) GetBufferChannelInfo() *NodeChannelInfo { if info, ok := c.channelsInfo[bufferID]; ok { diff --git a/internal/datacoord/channel_store_test.go b/internal/datacoord/channel_store_test.go index 235bd5103c616..0790fc6a2ea15 100644 --- a/internal/datacoord/channel_store_test.go +++ b/internal/datacoord/channel_store_test.go @@ -31,6 +31,7 @@ import ( "github.com/milvus-io/milvus/internal/kv/predicates" "github.com/milvus-io/milvus/internal/proto/datapb" "github.com/milvus-io/milvus/pkg/metrics" + "github.com/milvus-io/milvus/pkg/util/paramtable" "github.com/milvus-io/milvus/pkg/util/testutils" ) @@ -58,6 +59,9 @@ func genChannelOperationsV1(from, to int64, num int) *ChannelOpSet { } func TestChannelStore_Update(t *testing.T) { + enableRPCK := paramtable.Get().DataCoordCfg.EnableBalanceChannelWithRPC.Key + paramtable.Get().Save(enableRPCK, "false") + defer paramtable.Get().Reset(enableRPCK) txnKv := mocks.NewTxnKV(t) txnKv.EXPECT().MultiSaveAndRemove(mock.Anything, mock.Anything).Run(func(saves map[string]string, removals []string, preds ...predicates.Predicate) { assert.False(t, len(saves)+len(removals) > 64, "too many operations") diff --git a/internal/datacoord/channel_store_v2.go b/internal/datacoord/channel_store_v2.go index 82f0d14e9e922..b6e23f4fd8681 100644 --- a/internal/datacoord/channel_store_v2.go +++ b/internal/datacoord/channel_store_v2.go @@ -164,6 +164,7 @@ func (c *StateChannelStore) addAssignment(nodeID int64, channel RWChannel) { // DELETE + WATCH ---> from bufferID to nodeID // DELETE + WATCH ---> from lagecyID to nodeID // DELETE + WATCH ---> from deletedNode to nodeID/bufferID +// DELETE + WATCH ---> from releasedNode to nodeID/bufferID // RELEASE ---> release from nodeID // WATCH ---> watch to a new channel // DELETE ---> remove the channel @@ -223,7 +224,7 @@ func (c *StateChannelStore) getChannel(nodeID int64, channelName string) *StateC if storedChannel, ok := cInfo.Channels[channelName]; ok { return storedChannel.(*StateChannel) } - log.Error("Channel doesn't exist in Node", zap.String("channel", channelName), zap.Int64("nodeID", nodeID)) + log.Debug("Channel doesn't exist in Node", zap.String("channel", channelName), zap.Int64("nodeID", nodeID)) } else { log.Error("Node doesn't exist", zap.Int64("NodeID", nodeID)) } @@ -366,7 +367,7 @@ func WithChannelStates(states ...ChannelState) ChannelSelector { } func (c *StateChannelStore) GetNodeChannelsBy(nodeSelector NodeSelector, channelSelectors ...ChannelSelector) []*NodeChannelInfo { - nodeChannels := make(map[int64]*NodeChannelInfo) + var nodeChannels []*NodeChannelInfo for nodeID, cInfo := range c.channelsInfo { if nodeSelector(nodeID) { selected := make(map[string]RWChannel) @@ -382,13 +383,13 @@ func (c *StateChannelStore) GetNodeChannelsBy(nodeSelector NodeSelector, channel selected[chName] = channel } } - nodeChannels[nodeID] = &NodeChannelInfo{ + nodeChannels = append(nodeChannels, &NodeChannelInfo{ NodeID: nodeID, Channels: selected, - } + }) } } - return lo.Values(nodeChannels) + return nodeChannels } func (c *StateChannelStore) GetNodesChannels() []*NodeChannelInfo { @@ -401,6 +402,23 @@ func (c *StateChannelStore) GetNodesChannels() []*NodeChannelInfo { return ret } +func (c *StateChannelStore) GetNodeChannelsByCollectionID(collectionID UniqueID) map[UniqueID][]string { + nodeChs := make(map[UniqueID][]string) + for id, info := range c.channelsInfo { + if id == bufferID { + continue + } + var channelNames []string + for name, ch := range info.Channels { + if ch.GetCollectionID() == collectionID { + channelNames = append(channelNames, name) + } + } + nodeChs[id] = channelNames + } + return nodeChs +} + func (c *StateChannelStore) GetBufferChannelInfo() *NodeChannelInfo { return c.GetNode(bufferID) } diff --git a/internal/datacoord/channel_store_v2_test.go b/internal/datacoord/channel_store_v2_test.go index d2f9a22f58314..2228ff971499d 100644 --- a/internal/datacoord/channel_store_v2_test.go +++ b/internal/datacoord/channel_store_v2_test.go @@ -257,6 +257,42 @@ func (s *StateChannelStoreSuite) TestUpdateWithTxnLimit() { } } +func (s *StateChannelStoreSuite) TestUpdateMeta2000kSegs() { + ch := getChannel("ch1", 1) + info := ch.GetWatchInfo() + // way larger than limit=2097152 + seg2000k := make([]int64, 2000000) + for i := range seg2000k { + seg2000k[i] = int64(i) + } + info.Vchan.FlushedSegmentIds = seg2000k + ch.UpdateWatchInfo(info) + + opSet := NewChannelOpSet( + NewChannelOp(bufferID, Delete, ch), + NewChannelOp(100, Watch, ch), + ) + s.SetupTest() + s.mockTxn.EXPECT().MultiSaveAndRemove(mock.Anything, mock.Anything). + Run(func(saves map[string]string, removals []string, preds ...predicates.Predicate) { + }).Return(nil).Once() + + store := NewStateChannelStore(s.mockTxn) + store.AddNode(100) + s.Require().Equal(0, store.GetNodeChannelCount(100)) + store.addAssignment(bufferID, ch) + s.Require().Equal(1, store.GetNodeChannelCount(bufferID)) + + err := store.updateMeta(opSet) + s.NoError(err) + + got := store.GetNodeChannelsBy(WithNodeIDs(100)) + s.NotNil(got) + s.Require().Equal(1, len(got)) + gotInfo := got[0] + s.ElementsMatch([]string{"ch1"}, lo.Keys(gotInfo.Channels)) +} + func (s *StateChannelStoreSuite) TestUpdateMeta() { tests := []struct { description string @@ -474,7 +510,7 @@ func genChannelOperations(nodeID int64, opType ChannelOpType, num int) *ChannelO for i := 0; i < num; i++ { name := fmt.Sprintf("ch%d", i) channel := NewStateChannel(getChannel(name, 1)) - channel.Info = &datapb.ChannelWatchInfo{} + channel.Info = generateWatchInfo(name, datapb.ChannelWatchState_ToWatch) channels = append(channels, channel) } diff --git a/internal/datacoord/compaction.go b/internal/datacoord/compaction.go index d4d7a0ef82751..6a11765dc17bf 100644 --- a/internal/datacoord/compaction.go +++ b/internal/datacoord/compaction.go @@ -45,11 +45,12 @@ const ( tsTimeout = uint64(1) ) +//go:generate mockery --name=compactionPlanContext --structname=MockCompactionPlanContext --output=./ --filename=mock_compaction_plan_context.go --with-expecter --inpackage type compactionPlanContext interface { start() stop() // execCompactionPlan start to execute plan and return immediately - execCompactionPlan(signal *compactionSignal, plan *datapb.CompactionPlan) error + execCompactionPlan(signal *compactionSignal, plan *datapb.CompactionPlan) // getCompaction return compaction task. If planId does not exist, return nil. getCompaction(planID int64) *compactionTask // updateCompaction set the compaction state to timeout or completed @@ -277,14 +278,8 @@ func (c *compactionPlanHandler) updateTask(planID int64, opts ...compactionTaskO } } -func (c *compactionPlanHandler) enqueuePlan(signal *compactionSignal, plan *datapb.CompactionPlan) error { - nodeID, err := c.chManager.FindWatcher(plan.GetChannel()) - if err != nil { - log.Error("failed to find watcher", zap.Int64("planID", plan.GetPlanID()), zap.Error(err)) - return err - } - - log := log.With(zap.Int64("planID", plan.GetPlanID()), zap.Int64("nodeID", nodeID)) +func (c *compactionPlanHandler) enqueuePlan(signal *compactionSignal, plan *datapb.CompactionPlan) { + log := log.With(zap.Int64("planID", plan.GetPlanID())) c.setSegmentsCompacting(plan, true) _, span := otel.Tracer(typeutil.DataCoordRole).Start(context.Background(), fmt.Sprintf("Compaction-%s", plan.GetType())) @@ -293,7 +288,6 @@ func (c *compactionPlanHandler) enqueuePlan(signal *compactionSignal, plan *data triggerInfo: signal, plan: plan, state: pipelining, - dataNodeID: nodeID, span: span, } c.mu.Lock() @@ -301,8 +295,7 @@ func (c *compactionPlanHandler) enqueuePlan(signal *compactionSignal, plan *data c.mu.Unlock() c.scheduler.Submit(task) - log.Info("Compaction plan submited") - return nil + log.Info("Compaction plan submitted") } func (c *compactionPlanHandler) RefreshPlan(task *compactionTask) error { @@ -322,7 +315,6 @@ func (c *compactionPlanHandler) RefreshPlan(task *compactionTask) error { // Select sealed L1 segments for LevelZero compaction that meets the condition: // dmlPos < triggerInfo.pos - // TODO: select L2 segments too sealedSegments := c.meta.SelectSegments(WithCollection(task.triggerInfo.collectionID), SegmentFilterFunc(func(info *SegmentInfo) bool { return (task.triggerInfo.partitionID == -1 || info.GetPartitionID() == task.triggerInfo.partitionID) && info.GetInsertChannel() == plan.GetChannel() && @@ -338,10 +330,14 @@ func (c *compactionPlanHandler) RefreshPlan(task *compactionTask) error { sealedSegBinlogs := lo.Map(sealedSegments, func(info *SegmentInfo, _ int) *datapb.CompactionSegmentBinlogs { return &datapb.CompactionSegmentBinlogs{ - SegmentID: info.GetID(), - Level: datapb.SegmentLevel_L1, - CollectionID: info.GetCollectionID(), - PartitionID: info.GetPartitionID(), + SegmentID: info.GetID(), + FieldBinlogs: nil, + Field2StatslogPaths: info.GetStatslogs(), + Deltalogs: nil, + InsertChannel: info.GetInsertChannel(), + Level: info.GetLevel(), + CollectionID: info.GetCollectionID(), + PartitionID: info.GetPartitionID(), } }) @@ -408,8 +404,8 @@ func (c *compactionPlanHandler) notifyTasks(tasks []*compactionTask) { } // execCompactionPlan start to execute plan and return immediately -func (c *compactionPlanHandler) execCompactionPlan(signal *compactionSignal, plan *datapb.CompactionPlan) error { - return c.enqueuePlan(signal, plan) +func (c *compactionPlanHandler) execCompactionPlan(signal *compactionSignal, plan *datapb.CompactionPlan) { + c.enqueuePlan(signal, plan) } func (c *compactionPlanHandler) setSegmentsCompacting(plan *datapb.CompactionPlan, compacting bool) { @@ -484,33 +480,14 @@ func (c *compactionPlanHandler) handleMergeCompactionResult(plan *datapb.Compact log.Info("meta has already been changed, skip meta change and retry sync segments") } else { // Also prepare metric updates. - newSegments, metricMutation, err := c.meta.CompleteCompactionMutation(plan, result) + _, metricMutation, err := c.meta.CompleteCompactionMutation(plan, result) if err != nil { return err } // Apply metrics after successful meta update. metricMutation.commit() - newSegmentInfo = newSegments[0] - } - - nodeID := c.plans[plan.GetPlanID()].dataNodeID - req := &datapb.SyncSegmentsRequest{ - PlanID: plan.PlanID, - CompactedTo: newSegmentInfo.GetID(), - CompactedFrom: newSegmentInfo.GetCompactionFrom(), - NumOfRows: newSegmentInfo.GetNumOfRows(), - StatsLogs: newSegmentInfo.GetStatslogs(), - ChannelName: plan.GetChannel(), - PartitionId: newSegmentInfo.GetPartitionID(), - CollectionId: newSegmentInfo.GetCollectionID(), - } - - log.Info("handleCompactionResult: syncing segments with node", zap.Int64("nodeID", nodeID)) - if err := c.sessions.SyncSegments(nodeID, req); err != nil { - log.Warn("handleCompactionResult: fail to sync segments with node", - zap.Int64("nodeID", nodeID), zap.Error(err)) - return err } + // TODO @xiaocai2333: drop compaction plan on datanode log.Info("handleCompactionResult: success to handle merge compaction result") return nil @@ -558,13 +535,8 @@ func (c *compactionPlanHandler) updateCompaction(ts Timestamp) error { // task.dataNodeID not match with channel // Mark this compaction as failure and skip processing the meta if !c.chManager.Match(task.dataNodeID, task.plan.GetChannel()) { - // Sync segments without CompactionFrom segmentsIDs to make sure DN clear the task - // without changing the meta + // TODO @xiaocai2333: drop compaction plan on datanode log.Warn("compaction failed for channel nodeID not match") - if err := c.sessions.SyncSegments(task.dataNodeID, &datapb.SyncSegmentsRequest{PlanID: planID}); err != nil { - log.Warn("compaction failed to sync segments with node", zap.Error(err)) - continue - } c.plans[planID] = c.plans[planID].shadowClone(setState(failed), endSpan()) c.setSegmentsCompacting(task.plan, false) c.scheduler.Finish(task.dataNodeID, task.plan) @@ -629,17 +601,8 @@ func (c *compactionPlanHandler) updateCompaction(ts Timestamp) error { if nodeUnkonwnPlan, ok := completedPlans[planID]; ok { nodeID, plan := nodeUnkonwnPlan.A, nodeUnkonwnPlan.B log := log.With(zap.Int64("planID", planID), zap.Int64("nodeID", nodeID), zap.String("channel", plan.GetChannel())) - - // Sync segments without CompactionFrom segmentsIDs to make sure DN clear the task - // without changing the meta - log.Info("compaction syncing unknown plan with node") - if err := c.sessions.SyncSegments(nodeID, &datapb.SyncSegmentsRequest{ - PlanID: planID, - ChannelName: plan.GetChannel(), - }); err != nil { - log.Warn("compaction failed to sync segments with node", zap.Error(err)) - return err - } + // TODO @xiaocai2333: drop compaction plan on datanode + log.Info("drop unknown plan with node") } } diff --git a/internal/datacoord/compaction_scheduler.go b/internal/datacoord/compaction_scheduler.go index 5e592d5e3033f..745a9d40ff7f6 100644 --- a/internal/datacoord/compaction_scheduler.go +++ b/internal/datacoord/compaction_scheduler.go @@ -64,75 +64,64 @@ func (s *CompactionScheduler) Submit(tasks ...*compactionTask) { // Schedule pick 1 or 0 tasks for 1 node func (s *CompactionScheduler) Schedule() []*compactionTask { - s.taskGuard.Lock() - nodeTasks := lo.GroupBy(s.queuingTasks, func(t *compactionTask) int64 { - return t.dataNodeID - }) - s.taskGuard.Unlock() - if len(nodeTasks) == 0 { + s.taskGuard.RLock() + if len(s.queuingTasks) == 0 { + s.taskGuard.RUnlock() return nil // To mitigate the need for frequent slot querying } + s.taskGuard.RUnlock() nodeSlots := s.cluster.QuerySlots() - executable := make(map[int64]*compactionTask) + l0ChannelExcludes := typeutil.NewSet[string]() + mixChannelExcludes := typeutil.NewSet[string]() - pickPriorPolicy := func(tasks []*compactionTask, exclusiveChannels []string, executing []string) *compactionTask { - for _, task := range tasks { - // TODO: sheep, replace pickShardNode with pickAnyNode - if nodeID := s.pickShardNode(task.dataNodeID, nodeSlots); nodeID == NullNodeID { - log.Warn("cannot find datanode for compaction task", zap.Int64("planID", task.plan.PlanID), zap.String("vchannel", task.plan.Channel)) - continue + for _, tasks := range s.parallelTasks { + for _, t := range tasks { + switch t.plan.GetType() { + case datapb.CompactionType_Level0DeleteCompaction: + l0ChannelExcludes.Insert(t.plan.GetChannel()) + case datapb.CompactionType_MixCompaction: + mixChannelExcludes.Insert(t.plan.GetChannel()) } - - if lo.Contains(exclusiveChannels, task.plan.GetChannel()) { - continue - } - - if task.plan.GetType() == datapb.CompactionType_Level0DeleteCompaction { - // Channel of LevelZeroCompaction task with no executing compactions - if !lo.Contains(executing, task.plan.GetChannel()) { - return task - } - - // Don't schedule any tasks for channel with LevelZeroCompaction task - // when there're executing compactions - exclusiveChannels = append(exclusiveChannels, task.plan.GetChannel()) - continue - } - - return task } - - return nil } s.taskGuard.Lock() defer s.taskGuard.Unlock() - // pick 1 or 0 task for 1 node - for node, tasks := range nodeTasks { - parallel := s.parallelTasks[node] - - var ( - executing = typeutil.NewSet[string]() - channelsExecPrior = typeutil.NewSet[string]() - ) - for _, t := range parallel { - executing.Insert(t.plan.GetChannel()) - if t.plan.GetType() == datapb.CompactionType_Level0DeleteCompaction { - channelsExecPrior.Insert(t.plan.GetChannel()) - } - } - picked := pickPriorPolicy(tasks, channelsExecPrior.Collect(), executing.Collect()) - if picked != nil { - executable[node] = picked - nodeSlots[node]-- + picked := make([]*compactionTask, 0) + for _, t := range s.queuingTasks { + nodeID := s.pickAnyNode(nodeSlots) + if nodeID == NullNodeID { + log.Warn("cannot find datanode for compaction task", + zap.Int64("planID", t.plan.PlanID), zap.String("vchannel", t.plan.Channel)) + continue + } + switch t.plan.GetType() { + case datapb.CompactionType_Level0DeleteCompaction: + if l0ChannelExcludes.Contain(t.plan.GetChannel()) || + mixChannelExcludes.Contain(t.plan.GetChannel()) { + continue + } + t.dataNodeID = nodeID + picked = append(picked, t) + l0ChannelExcludes.Insert(t.plan.GetChannel()) + nodeSlots[nodeID]-- + case datapb.CompactionType_MixCompaction: + if l0ChannelExcludes.Contain(t.plan.GetChannel()) { + continue + } + t.dataNodeID = nodeID + picked = append(picked, t) + mixChannelExcludes.Insert(t.plan.GetChannel()) + nodeSlots[nodeID]-- } } var pickPlans []int64 - for node, task := range executable { + for _, task := range picked { + node := task.dataNodeID pickPlans = append(pickPlans, task.plan.PlanID) if _, ok := s.parallelTasks[node]; !ok { s.parallelTasks[node] = []*compactionTask{task} @@ -156,7 +145,7 @@ func (s *CompactionScheduler) Schedule() []*compactionTask { } } - return lo.Values(executable) + return picked } func (s *CompactionScheduler) Finish(nodeID UniqueID, plan *datapb.CompactionPlan) { diff --git a/internal/datacoord/compaction_scheduler_test.go b/internal/datacoord/compaction_scheduler_test.go index 37f64f740b2f7..a9e30ec996a17 100644 --- a/internal/datacoord/compaction_scheduler_test.go +++ b/internal/datacoord/compaction_scheduler_test.go @@ -60,11 +60,11 @@ func (s *SchedulerSuite) TestScheduleParallelTaskFull() { }{ {"with L0 tasks", []*compactionTask{ {dataNodeID: 100, plan: &datapb.CompactionPlan{PlanID: 10, Channel: "ch-10", Type: datapb.CompactionType_Level0DeleteCompaction}}, - {dataNodeID: 100, plan: &datapb.CompactionPlan{PlanID: 11, Channel: "ch-11", Type: datapb.CompactionType_MinorCompaction}}, + {dataNodeID: 100, plan: &datapb.CompactionPlan{PlanID: 11, Channel: "ch-11", Type: datapb.CompactionType_MixCompaction}}, }, []UniqueID{}}, {"without L0 tasks", []*compactionTask{ - {dataNodeID: 100, plan: &datapb.CompactionPlan{PlanID: 10, Channel: "ch-10", Type: datapb.CompactionType_MinorCompaction}}, - {dataNodeID: 100, plan: &datapb.CompactionPlan{PlanID: 11, Channel: "ch-11", Type: datapb.CompactionType_MinorCompaction}}, + {dataNodeID: 100, plan: &datapb.CompactionPlan{PlanID: 10, Channel: "ch-10", Type: datapb.CompactionType_MixCompaction}}, + {dataNodeID: 100, plan: &datapb.CompactionPlan{PlanID: 11, Channel: "ch-11", Type: datapb.CompactionType_MixCompaction}}, }, []UniqueID{}}, {"empty tasks", []*compactionTask{}, []UniqueID{}}, } @@ -101,16 +101,16 @@ func (s *SchedulerSuite) TestScheduleNodeWith1ParallelTask() { }{ {"with L0 tasks diff channel", []*compactionTask{ {dataNodeID: 101, plan: &datapb.CompactionPlan{PlanID: 10, Channel: "ch-10", Type: datapb.CompactionType_Level0DeleteCompaction}}, - {dataNodeID: 101, plan: &datapb.CompactionPlan{PlanID: 11, Channel: "ch-11", Type: datapb.CompactionType_MinorCompaction}}, - }, []UniqueID{10}}, + {dataNodeID: 101, plan: &datapb.CompactionPlan{PlanID: 11, Channel: "ch-11", Type: datapb.CompactionType_MixCompaction}}, + }, []UniqueID{10, 11}}, {"with L0 tasks same channel", []*compactionTask{ {dataNodeID: 101, plan: &datapb.CompactionPlan{PlanID: 10, Channel: "ch-2", Type: datapb.CompactionType_Level0DeleteCompaction}}, - {dataNodeID: 101, plan: &datapb.CompactionPlan{PlanID: 11, Channel: "ch-11", Type: datapb.CompactionType_MinorCompaction}}, + {dataNodeID: 101, plan: &datapb.CompactionPlan{PlanID: 11, Channel: "ch-11", Type: datapb.CompactionType_MixCompaction}}, }, []UniqueID{11}}, {"without L0 tasks", []*compactionTask{ - {dataNodeID: 101, plan: &datapb.CompactionPlan{PlanID: 14, Channel: "ch-2", Type: datapb.CompactionType_MinorCompaction}}, - {dataNodeID: 101, plan: &datapb.CompactionPlan{PlanID: 13, Channel: "ch-11", Type: datapb.CompactionType_MinorCompaction}}, - }, []UniqueID{14}}, + {dataNodeID: 101, plan: &datapb.CompactionPlan{PlanID: 14, Channel: "ch-2", Type: datapb.CompactionType_MixCompaction}}, + {dataNodeID: 101, plan: &datapb.CompactionPlan{PlanID: 13, Channel: "ch-11", Type: datapb.CompactionType_MixCompaction}}, + }, []UniqueID{14, 13}}, {"empty tasks", []*compactionTask{}, []UniqueID{}}, } @@ -134,15 +134,6 @@ func (s *SchedulerSuite) TestScheduleNodeWith1ParallelTask() { return t.plan.PlanID })) - // the second schedule returns empty for no slot - if len(test.tasks) > 0 { - cluster := NewMockCluster(s.T()) - cluster.EXPECT().QuerySlots().Return(map[int64]int64{101: 0}) - s.scheduler.cluster = cluster - } - gotTasks = s.scheduler.Schedule() - s.Empty(gotTasks) - s.Equal(4+len(test.tasks), s.scheduler.GetTaskCount()) }) } @@ -158,16 +149,16 @@ func (s *SchedulerSuite) TestScheduleNodeWithL0Executing() { }{ {"with L0 tasks diff channel", []*compactionTask{ {dataNodeID: 102, plan: &datapb.CompactionPlan{PlanID: 10, Channel: "ch-10", Type: datapb.CompactionType_Level0DeleteCompaction}}, - {dataNodeID: 102, plan: &datapb.CompactionPlan{PlanID: 11, Channel: "ch-11", Type: datapb.CompactionType_MinorCompaction}}, - }, []UniqueID{10}}, + {dataNodeID: 102, plan: &datapb.CompactionPlan{PlanID: 11, Channel: "ch-11", Type: datapb.CompactionType_MixCompaction}}, + }, []UniqueID{10, 11}}, {"with L0 tasks same channel", []*compactionTask{ {dataNodeID: 102, plan: &datapb.CompactionPlan{PlanID: 10, Channel: "ch-3", Type: datapb.CompactionType_Level0DeleteCompaction}}, - {dataNodeID: 102, plan: &datapb.CompactionPlan{PlanID: 11, Channel: "ch-11", Type: datapb.CompactionType_MinorCompaction}}, - {dataNodeID: 102, plan: &datapb.CompactionPlan{PlanID: 13, Channel: "ch-3", Type: datapb.CompactionType_MinorCompaction}}, + {dataNodeID: 102, plan: &datapb.CompactionPlan{PlanID: 11, Channel: "ch-11", Type: datapb.CompactionType_MixCompaction}}, + {dataNodeID: 102, plan: &datapb.CompactionPlan{PlanID: 13, Channel: "ch-3", Type: datapb.CompactionType_MixCompaction}}, }, []UniqueID{11}}, {"without L0 tasks", []*compactionTask{ - {dataNodeID: 102, plan: &datapb.CompactionPlan{PlanID: 14, Channel: "ch-3", Type: datapb.CompactionType_MinorCompaction}}, - {dataNodeID: 102, plan: &datapb.CompactionPlan{PlanID: 13, Channel: "ch-11", Type: datapb.CompactionType_MinorCompaction}}, + {dataNodeID: 102, plan: &datapb.CompactionPlan{PlanID: 14, Channel: "ch-3", Type: datapb.CompactionType_MixCompaction}}, + {dataNodeID: 102, plan: &datapb.CompactionPlan{PlanID: 13, Channel: "ch-11", Type: datapb.CompactionType_MixCompaction}}, }, []UniqueID{13}}, {"empty tasks", []*compactionTask{}, []UniqueID{}}, } @@ -192,17 +183,6 @@ func (s *SchedulerSuite) TestScheduleNodeWithL0Executing() { return t.plan.PlanID })) - // the second schedule returns empty for no slot - if len(test.tasks) > 0 { - cluster := NewMockCluster(s.T()) - cluster.EXPECT().QuerySlots().Return(map[int64]int64{101: 0}) - s.scheduler.cluster = cluster - } - if len(gotTasks) > 0 { - gotTasks = s.scheduler.Schedule() - s.Empty(gotTasks) - } - s.Equal(4+len(test.tasks), s.scheduler.GetTaskCount()) }) } diff --git a/internal/datacoord/compaction_test.go b/internal/datacoord/compaction_test.go index 0936e7f8adf24..f7c1c5733cbd7 100644 --- a/internal/datacoord/compaction_test.go +++ b/internal/datacoord/compaction_test.go @@ -84,7 +84,6 @@ func (s *CompactionPlanHandlerSuite) TestCheckResult() { 4: {A: 100, B: &datapb.CompactionPlanResult{PlanID: 4, State: commonpb.CompactionState_Executing}}, }, nil) - s.mockSessMgr.EXPECT().SyncSegments(int64(100), mock.Anything).Return(nil).Once() { s.mockAlloc.EXPECT().allocTimestamp(mock.Anything).Return(0, errors.New("mock")).Once() handler := newCompactionPlanHandler(nil, s.mockSessMgr, nil, nil, s.mockAlloc) @@ -431,43 +430,22 @@ func (s *CompactionPlanHandlerSuite) TestRefreshPlanMixCompaction() { } func (s *CompactionPlanHandlerSuite) TestExecCompactionPlan() { - s.mockCm.EXPECT().FindWatcher(mock.Anything).RunAndReturn(func(channel string) (int64, error) { - if channel == "ch-1" { - return 0, errors.Errorf("mock error for ch-1") - } - - return 1, nil - }).Twice() s.mockSch.EXPECT().Submit(mock.Anything).Return().Once() - tests := []struct { - description string - channel string - hasError bool - }{ - {"channel with error", "ch-1", true}, - {"channel with no error", "ch-2", false}, - } - handler := newCompactionPlanHandler(nil, s.mockSessMgr, s.mockCm, s.mockMeta, s.mockAlloc) handler.scheduler = s.mockSch - for idx, test := range tests { - sig := &compactionSignal{id: int64(idx)} - plan := &datapb.CompactionPlan{ - PlanID: int64(idx), - } - s.Run(test.description, func() { - plan.Channel = test.channel - - err := handler.execCompactionPlan(sig, plan) - if test.hasError { - s.Error(err) - } else { - s.NoError(err) - } - }) + sig := &compactionSignal{id: int64(1)} + plan := &datapb.CompactionPlan{ + PlanID: int64(1), } + plan.Channel = "ch-1" + + handler.execCompactionPlan(sig, plan) + handler.mu.RLock() + defer handler.mu.RUnlock() + _, ok := handler.plans[int64(1)] + s.True(ok) } func (s *CompactionPlanHandlerSuite) TestHandleMergeCompactionResult() { @@ -496,7 +474,6 @@ func (s *CompactionPlanHandlerSuite) TestHandleMergeCompactionResult() { } return nil }).Once() - s.mockSessMgr.EXPECT().SyncSegments(mock.Anything, mock.Anything).Return(nil).Once() handler := newCompactionPlanHandler(nil, s.mockSessMgr, s.mockCm, s.mockMeta, s.mockAlloc) handler.plans[plan.PlanID] = &compactionTask{dataNodeID: 111, plan: plan} @@ -538,7 +515,6 @@ func (s *CompactionPlanHandlerSuite) TestHandleMergeCompactionResult() { s.mockMeta.EXPECT().CompleteCompactionMutation(mock.Anything, mock.Anything).Return( []*SegmentInfo{segment}, &segMetricMutation{}, nil).Once() - s.mockSessMgr.EXPECT().SyncSegments(mock.Anything, mock.Anything).Return(errors.New("mock error")).Once() handler := newCompactionPlanHandler(nil, s.mockSessMgr, s.mockCm, s.mockMeta, s.mockAlloc) handler.plans[plan.PlanID] = &compactionTask{dataNodeID: 111, plan: plan} @@ -550,7 +526,7 @@ func (s *CompactionPlanHandlerSuite) TestHandleMergeCompactionResult() { } err := handler.handleMergeCompactionResult(plan, compactionResult) - s.Error(err) + s.NoError(err) }) } @@ -570,7 +546,6 @@ func (s *CompactionPlanHandlerSuite) TestCompleteCompaction() { }) s.Run("test complete merge compaction task", func() { - s.mockSessMgr.EXPECT().SyncSegments(mock.Anything, mock.Anything).Return(nil).Once() // mock for handleMergeCompactionResult s.mockMeta.EXPECT().GetHealthySegment(mock.Anything).Return(nil).Once() segment := NewSegmentInfo(&datapb.SegmentInfo{ID: 100}) @@ -723,14 +698,6 @@ func (s *CompactionPlanHandlerSuite) TestUpdateCompaction() { }, } - s.mockSessMgr.EXPECT().SyncSegments(int64(222), mock.Anything).RunAndReturn(func(nodeID int64, req *datapb.SyncSegmentsRequest) error { - s.EqualValues(nodeID, 222) - s.NotNil(req) - s.Empty(req.GetCompactedFrom()) - s.EqualValues(5, req.GetPlanID()) - return nil - }).Once() - s.mockSessMgr.EXPECT().SyncSegments(int64(111), mock.Anything).Return(nil) s.mockCm.EXPECT().Match(int64(111), "ch-1").Return(true) s.mockCm.EXPECT().Match(int64(111), "ch-2").Return(false).Once() diff --git a/internal/datacoord/compaction_trigger.go b/internal/datacoord/compaction_trigger.go index b6ff595887fa5..f890f6f850bd3 100644 --- a/internal/datacoord/compaction_trigger.go +++ b/internal/datacoord/compaction_trigger.go @@ -430,23 +430,14 @@ func (t *compactionTrigger) handleGlobalSignal(signal *compactionSignal) error { break } start := time.Now() - if err := fillOriginPlan(t.allocator, plan); err != nil { + if err := fillOriginPlan(coll.Schema, t.allocator, plan); err != nil { log.Warn("failed to fill plan", zap.Int64("collectionID", signal.collectionID), zap.Int64s("segmentIDs", segIDs), zap.Error(err)) continue } - err := t.compactionHandler.execCompactionPlan(signal, plan) - if err != nil { - log.Warn("failed to execute compaction plan", - zap.Int64("collectionID", signal.collectionID), - zap.Int64("planID", plan.PlanID), - zap.Int64s("segmentIDs", segIDs), - zap.Error(err)) - continue - } - + t.compactionHandler.execCompactionPlan(signal, plan) log.Info("time cost of generating global compaction", zap.Int64("planID", plan.PlanID), zap.Int64("time cost", time.Since(start).Milliseconds()), @@ -530,18 +521,11 @@ func (t *compactionTrigger) handleSignal(signal *compactionSignal) { break } start := time.Now() - if err := fillOriginPlan(t.allocator, plan); err != nil { + if err := fillOriginPlan(coll.Schema, t.allocator, plan); err != nil { log.Warn("failed to fill plan", zap.Error(err)) continue } - if err := t.compactionHandler.execCompactionPlan(signal, plan); err != nil { - log.Warn("failed to execute compaction plan", - zap.Int64("collection", signal.collectionID), - zap.Int64("planID", plan.PlanID), - zap.Int64s("segmentIDs", fetchSegIDs(plan.GetSegmentBinlogs())), - zap.Error(err)) - continue - } + t.compactionHandler.execCompactionPlan(signal, plan) log.Info("time cost of generating compaction", zap.Int64("planID", plan.PlanID), zap.Int64("time cost", time.Since(start).Milliseconds()), @@ -713,6 +697,7 @@ func segmentsToPlan(segments []*SegmentInfo, compactTime *compactTime) *datapb.C } log.Info("generate a plan for priority candidates", zap.Any("plan", plan), + zap.Int("len(segments)", len(plan.GetSegmentBinlogs())), zap.Int64("target segment row", plan.TotalRows), zap.Int64("target segment size", size)) return plan } diff --git a/internal/datacoord/compaction_trigger_test.go b/internal/datacoord/compaction_trigger_test.go index 19d4146a65e14..78166718148a9 100644 --- a/internal/datacoord/compaction_trigger_test.go +++ b/internal/datacoord/compaction_trigger_test.go @@ -51,9 +51,8 @@ var _ compactionPlanContext = (*spyCompactionHandler)(nil) func (h *spyCompactionHandler) removeTasksByChannel(channel string) {} // execCompactionPlan start to execute plan and return immediately -func (h *spyCompactionHandler) execCompactionPlan(signal *compactionSignal, plan *datapb.CompactionPlan) error { +func (h *spyCompactionHandler) execCompactionPlan(signal *compactionSignal, plan *datapb.CompactionPlan) { h.spyChan <- plan - return nil } // completeCompaction record the result of a compaction @@ -106,6 +105,22 @@ func Test_compactionTrigger_force(t *testing.T) { vecFieldID := int64(201) indexID := int64(1001) + + schema := &schemapb.CollectionSchema{ + Fields: []*schemapb.FieldSchema{ + { + FieldID: vecFieldID, + DataType: schemapb.DataType_FloatVector, + TypeParams: []*commonpb.KeyValuePair{ + { + Key: common.DimKey, + Value: "128", + }, + }, + }, + }, + } + tests := []struct { name string fields fields @@ -292,21 +307,8 @@ func Test_compactionTrigger_force(t *testing.T) { }, collections: map[int64]*collectionInfo{ 2: { - ID: 2, - Schema: &schemapb.CollectionSchema{ - Fields: []*schemapb.FieldSchema{ - { - FieldID: vecFieldID, - DataType: schemapb.DataType_FloatVector, - TypeParams: []*commonpb.KeyValuePair{ - { - Key: common.DimKey, - Value: "128", - }, - }, - }, - }, - }, + ID: 2, + Schema: schema, Properties: map[string]string{ common.CollectionTTLConfigKey: "0", }, @@ -469,6 +471,7 @@ func Test_compactionTrigger_force(t *testing.T) { Type: datapb.CompactionType_MixCompaction, Channel: "ch1", TotalRows: 200, + Schema: schema, }, }, }, @@ -2194,34 +2197,63 @@ func (s *CompactionTriggerSuite) SetupTest() { catalog := mocks.NewDataCoordCatalog(s.T()) catalog.EXPECT().SaveChannelCheckpoint(mock.Anything, s.channel, mock.Anything).Return(nil) + seg1 := &SegmentInfo{ + SegmentInfo: s.genSeg(1, 60), + lastFlushTime: time.Now().Add(-100 * time.Minute), + } + seg2 := &SegmentInfo{ + SegmentInfo: s.genSeg(2, 60), + lastFlushTime: time.Now(), + } + seg3 := &SegmentInfo{ + SegmentInfo: s.genSeg(3, 60), + lastFlushTime: time.Now(), + } + seg4 := &SegmentInfo{ + SegmentInfo: s.genSeg(4, 60), + lastFlushTime: time.Now(), + } + seg5 := &SegmentInfo{ + SegmentInfo: s.genSeg(5, 60), + lastFlushTime: time.Now(), + } + seg6 := &SegmentInfo{ + SegmentInfo: s.genSeg(6, 60), + lastFlushTime: time.Now(), + } + s.meta = &meta{ channelCPs: newChannelCps(), catalog: catalog, segments: &SegmentsInfo{ segments: map[int64]*SegmentInfo{ - 1: { - SegmentInfo: s.genSeg(1, 60), - lastFlushTime: time.Now().Add(-100 * time.Minute), - }, - 2: { - SegmentInfo: s.genSeg(2, 60), - lastFlushTime: time.Now(), - }, - 3: { - SegmentInfo: s.genSeg(3, 60), - lastFlushTime: time.Now(), - }, - 4: { - SegmentInfo: s.genSeg(4, 60), - lastFlushTime: time.Now(), - }, - 5: { - SegmentInfo: s.genSeg(5, 26), - lastFlushTime: time.Now(), + 1: seg1, + 2: seg2, + 3: seg3, + 4: seg4, + 5: seg5, + 6: seg6, + }, + secondaryIndexes: segmentInfoIndexes{ + coll2Segments: map[UniqueID]map[UniqueID]*SegmentInfo{ + s.collectionID: { + 1: seg1, + 2: seg2, + 3: seg3, + 4: seg4, + 5: seg5, + 6: seg6, + }, }, - 6: { - SegmentInfo: s.genSeg(6, 26), - lastFlushTime: time.Now(), + channel2Segments: map[string]map[UniqueID]*SegmentInfo{ + s.channel: { + 1: seg1, + 2: seg2, + 3: seg3, + 4: seg4, + 5: seg5, + 6: seg6, + }, }, }, }, @@ -2386,7 +2418,7 @@ func (s *CompactionTriggerSuite) TestHandleSignal() { }, }, }, nil) - s.compactionHandler.EXPECT().execCompactionPlan(mock.Anything, mock.Anything).Return(nil) + s.compactionHandler.EXPECT().execCompactionPlan(mock.Anything, mock.Anything).Return() tr.handleSignal(&compactionSignal{ segmentID: 1, collectionID: s.collectionID, @@ -2517,7 +2549,7 @@ func (s *CompactionTriggerSuite) TestHandleGlobalSignal() { common.CollectionAutoCompactionKey: "false", }, }, nil) - s.compactionHandler.EXPECT().execCompactionPlan(mock.Anything, mock.Anything).Return(nil) + s.compactionHandler.EXPECT().execCompactionPlan(mock.Anything, mock.Anything).Return() tr.handleGlobalSignal(&compactionSignal{ segmentID: 1, collectionID: s.collectionID, diff --git a/internal/datacoord/compaction_trigger_v2.go b/internal/datacoord/compaction_trigger_v2.go index 1ba9c1d9ef4aa..e1678e9f97e66 100644 --- a/internal/datacoord/compaction_trigger_v2.go +++ b/internal/datacoord/compaction_trigger_v2.go @@ -2,10 +2,12 @@ package datacoord import ( "context" + "time" "github.com/samber/lo" "go.uber.org/zap" + "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" "github.com/milvus-io/milvus/internal/proto/datapb" "github.com/milvus-io/milvus/pkg/log" ) @@ -33,16 +35,18 @@ type TriggerManager interface { // 2. SystemIDLE & schedulerIDLE // 3. Manual Compaction type CompactionTriggerManager struct { - scheduler Scheduler - handler compactionPlanContext // TODO replace with scheduler + scheduler Scheduler + handler Handler + compactionHandler compactionPlanContext // TODO replace with scheduler allocator allocator } -func NewCompactionTriggerManager(alloc allocator, handler compactionPlanContext) *CompactionTriggerManager { +func NewCompactionTriggerManager(alloc allocator, handler Handler, compactionHandler compactionPlanContext) *CompactionTriggerManager { m := &CompactionTriggerManager{ - allocator: alloc, - handler: handler, + allocator: alloc, + handler: handler, + compactionHandler: compactionHandler, } return m @@ -51,7 +55,7 @@ func NewCompactionTriggerManager(alloc allocator, handler compactionPlanContext) func (m *CompactionTriggerManager) Notify(taskID UniqueID, eventType CompactionTriggerType, views []CompactionView) { log := log.With(zap.Int64("taskID", taskID)) for _, view := range views { - if m.handler.isFull() { + if m.compactionHandler.isFull() { log.RatedInfo(1.0, "Skip trigger compaction for scheduler is full") return } @@ -103,7 +107,7 @@ func (m *CompactionTriggerManager) SubmitL0ViewToScheduler(taskID int64, outView // TODO, remove handler, use scheduler // m.scheduler.Submit(plan) - m.handler.execCompactionPlan(signal, plan) + m.compactionHandler.execCompactionPlan(signal, plan) log.Info("Finish to submit a LevelZeroCompaction plan", zap.Int64("taskID", taskID), zap.Int64("planID", plan.GetPlanID()), @@ -130,7 +134,14 @@ func (m *CompactionTriggerManager) buildL0CompactionPlan(view CompactionView) *d Channel: view.GetGroupLabel().Channel, } - if err := fillOriginPlan(m.allocator, plan); err != nil { + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + collection, err := m.handler.GetCollection(ctx, view.GetGroupLabel().CollectionID) + if err != nil { + return nil + } + + if err := fillOriginPlan(collection.Schema, m.allocator, plan); err != nil { return nil } @@ -145,14 +156,16 @@ type chanPartSegments struct { segments []*SegmentInfo } -func fillOriginPlan(alloc allocator, plan *datapb.CompactionPlan) error { - // TODO context - id, err := alloc.allocID(context.TODO()) +func fillOriginPlan(schema *schemapb.CollectionSchema, alloc allocator, plan *datapb.CompactionPlan) error { + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + id, err := alloc.allocID(ctx) if err != nil { return err } plan.PlanID = id plan.TimeoutInSeconds = Params.DataCoordCfg.CompactionTimeoutInSeconds.GetAsInt32() + plan.Schema = schema return nil } diff --git a/internal/datacoord/compaction_trigger_v2_test.go b/internal/datacoord/compaction_trigger_v2_test.go index 3176e04a0bd08..3b66ac944c822 100644 --- a/internal/datacoord/compaction_trigger_v2_test.go +++ b/internal/datacoord/compaction_trigger_v2_test.go @@ -20,6 +20,7 @@ type CompactionTriggerManagerSuite struct { suite.Suite mockAlloc *NMockAllocator + handler Handler mockPlanContext *MockCompactionPlanContext testLabel *CompactionGroupLabel meta *meta @@ -29,6 +30,7 @@ type CompactionTriggerManagerSuite struct { func (s *CompactionTriggerManagerSuite) SetupTest() { s.mockAlloc = NewNMockAllocator(s.T()) + s.handler = NewNMockHandler(s.T()) s.mockPlanContext = NewMockCompactionPlanContext(s.T()) s.testLabel = &CompactionGroupLabel{ @@ -42,7 +44,7 @@ func (s *CompactionTriggerManagerSuite) SetupTest() { s.meta.segments.SetSegment(id, segment) } - s.m = NewCompactionTriggerManager(s.mockAlloc, s.mockPlanContext) + s.m = NewCompactionTriggerManager(s.mockAlloc, s.handler, s.mockPlanContext) } func (s *CompactionTriggerManagerSuite) TestNotifyToFullScheduler() { @@ -73,6 +75,10 @@ func (s *CompactionTriggerManagerSuite) TestNotifyToFullScheduler() { } func (s *CompactionTriggerManagerSuite) TestNotifyByViewIDLE() { + handler := NewNMockHandler(s.T()) + handler.EXPECT().GetCollection(mock.Anything, mock.Anything).Return(&collectionInfo{}, nil) + s.m.handler = handler + viewManager := NewCompactionViewManager(s.meta, s.m, s.m.allocator) collSegs := s.meta.GetCompactableSegmentGroupByCollection() @@ -120,12 +126,16 @@ func (s *CompactionTriggerManagerSuite) TestNotifyByViewIDLE() { s.ElementsMatch(expectedSegs, gotSegs) log.Info("generated plan", zap.Any("plan", plan)) - }).Return(nil).Once() + }).Return().Once() s.m.Notify(19530, TriggerTypeLevelZeroViewIDLE, levelZeroView) } func (s *CompactionTriggerManagerSuite) TestNotifyByViewChange() { + handler := NewNMockHandler(s.T()) + handler.EXPECT().GetCollection(mock.Anything, mock.Anything).Return(&collectionInfo{}, nil) + s.m.handler = handler + viewManager := NewCompactionViewManager(s.meta, s.m, s.m.allocator) collSegs := s.meta.GetCompactableSegmentGroupByCollection() @@ -168,7 +178,7 @@ func (s *CompactionTriggerManagerSuite) TestNotifyByViewChange() { s.ElementsMatch(expectedSegs, gotSegs) log.Info("generated plan", zap.Any("plan", plan)) - }).Return(nil).Once() + }).Return().Once() s.m.Notify(19530, TriggerTypeLevelZeroViewChange, levelZeroView) } diff --git a/internal/datacoord/garbage_collector_test.go b/internal/datacoord/garbage_collector_test.go index 66f7873b4b81d..93a96f7e37429 100644 --- a/internal/datacoord/garbage_collector_test.go +++ b/internal/datacoord/garbage_collector_test.go @@ -465,7 +465,14 @@ func createMetaForRecycleUnusedSegIndexes(catalog metastore.DataCoordCatalog) *m }, }, segID + 1: { - SegmentInfo: nil, + SegmentInfo: &datapb.SegmentInfo{ + ID: segID + 1, + CollectionID: collID, + PartitionID: partID, + InsertChannel: "", + NumOfRows: 1026, + State: commonpb.SegmentState_Dropped, + }, }, } meta := &meta{ diff --git a/internal/datacoord/handler.go b/internal/datacoord/handler.go index b02aa5d9abb13..03480782e28c4 100644 --- a/internal/datacoord/handler.go +++ b/internal/datacoord/handler.go @@ -57,9 +57,7 @@ func newServerHandler(s *Server) *ServerHandler { // GetDataVChanPositions gets vchannel latest positions with provided dml channel names for DataNode. func (h *ServerHandler) GetDataVChanPositions(channel RWChannel, partitionID UniqueID) *datapb.VchannelInfo { - segments := h.s.meta.SelectSegments(SegmentFilterFunc(func(s *SegmentInfo) bool { - return s.InsertChannel == channel.GetName() && !s.GetIsFake() - })) + segments := h.s.meta.GetRealSegmentsForChannel(channel.GetName()) log.Info("GetDataVChanPositions", zap.Int64("collectionID", channel.GetCollectionID()), zap.String("channel", channel.GetName()), @@ -105,9 +103,7 @@ func (h *ServerHandler) GetDataVChanPositions(channel RWChannel, partitionID Uni // the unflushed segments are actually the segments without index, even they are flushed. func (h *ServerHandler) GetQueryVChanPositions(channel RWChannel, partitionIDs ...UniqueID) *datapb.VchannelInfo { // cannot use GetSegmentsByChannel since dropped segments are needed here - segments := h.s.meta.SelectSegments(SegmentFilterFunc(func(s *SegmentInfo) bool { - return s.InsertChannel == channel.GetName() && !s.GetIsFake() - })) + segments := h.s.meta.GetRealSegmentsForChannel(channel.GetName()) segmentInfos := make(map[int64]*SegmentInfo) indexedSegments := FilterInIndexedSegments(h, h.s.meta, segments...) indexed := make(typeutil.UniqueSet) diff --git a/internal/datacoord/import_checker.go b/internal/datacoord/import_checker.go index a1c86cc560ed5..fe75da1639141 100644 --- a/internal/datacoord/import_checker.go +++ b/internal/datacoord/import_checker.go @@ -341,6 +341,9 @@ func (c *importChecker) checkCollection(collectionID int64, jobs []ImportJob) { return } if !has { + jobs = lo.Filter(jobs, func(job ImportJob, _ int) bool { + return job.GetState() != internalpb.ImportJobState_Failed + }) for _, job := range jobs { err = c.imeta.UpdateJob(job.GetJobID(), UpdateJobState(internalpb.ImportJobState_Failed), UpdateJobReason(fmt.Sprintf("collection %d dropped", collectionID))) @@ -388,6 +391,8 @@ func (c *importChecker) checkGC(job ImportJob) { err := c.imeta.RemoveJob(job.GetJobID()) if err != nil { log.Warn("remove import job failed", zap.Int64("jobID", job.GetJobID()), zap.Error(err)) + return } + log.Info("import job removed", zap.Int64("jobID", job.GetJobID())) } } diff --git a/internal/datacoord/import_scheduler.go b/internal/datacoord/import_scheduler.go index f1cf30003cfd8..5f042de5db0a4 100644 --- a/internal/datacoord/import_scheduler.go +++ b/internal/datacoord/import_scheduler.go @@ -351,9 +351,11 @@ func (s *importScheduler) processFailed(task ImportTask) { return } } - err := s.imeta.UpdateTask(task.GetTaskID(), UpdateSegmentIDs(nil)) - if err != nil { - log.Warn("update import task segments failed", WrapTaskLog(task, zap.Error(err))...) + if len(segments) > 0 { + err := s.imeta.UpdateTask(task.GetTaskID(), UpdateSegmentIDs(nil)) + if err != nil { + log.Warn("update import task segments failed", WrapTaskLog(task, zap.Error(err))...) + } } } err := DropImportTask(task, s.cluster, s.imeta) diff --git a/internal/datacoord/index_builder.go b/internal/datacoord/index_builder.go index c56554561371c..9a83f2384cc98 100644 --- a/internal/datacoord/index_builder.go +++ b/internal/datacoord/index_builder.go @@ -348,28 +348,29 @@ func (ib *indexBuilder) process(buildID UniqueID) bool { } } var req *indexpb.CreateJobRequest - if Params.CommonCfg.EnableStorageV2.GetAsBool() { - collectionInfo, err := ib.handler.GetCollection(ib.ctx, segment.GetCollectionID()) - if err != nil { - log.Info("index builder get collection info failed", zap.Int64("collectionID", segment.GetCollectionID()), zap.Error(err)) - return false - } + collectionInfo, err := ib.handler.GetCollection(ib.ctx, segment.GetCollectionID()) + if err != nil { + log.Ctx(ib.ctx).Info("index builder get collection info failed", zap.Int64("collectionID", segment.GetCollectionID()), zap.Error(err)) + return false + } - schema := collectionInfo.Schema - var field *schemapb.FieldSchema + schema := collectionInfo.Schema + var field *schemapb.FieldSchema - for _, f := range schema.Fields { - if f.FieldID == fieldID { - field = f - break - } - } - - dim, err := storage.GetDimFromParams(field.TypeParams) - if err != nil { - return false + for _, f := range schema.Fields { + if f.FieldID == fieldID { + field = f + break } + } + dim, err := storage.GetDimFromParams(field.TypeParams) + if err != nil { + log.Ctx(ib.ctx).Warn("failed to get dim from field type params", + zap.String("field type", field.GetDataType().String()), zap.Error(err)) + // don't return, maybe field is scalar field or sparseFloatVector + } + if Params.CommonCfg.EnableStorageV2.GetAsBool() { storePath, err := itypeutil.GetStorageURI(params.Params.CommonCfg.StorageScheme.GetValue(), params.Params.CommonCfg.StoragePathPrefix.GetValue(), segment.GetID()) if err != nil { log.Ctx(ib.ctx).Warn("failed to get storage uri", zap.Error(err)) @@ -403,6 +404,7 @@ func (ib *indexBuilder) process(buildID UniqueID) bool { CurrentIndexVersion: ib.indexEngineVersionManager.GetCurrentIndexEngineVersion(), DataIds: binlogIDs, OptionalScalarFields: optionalFields, + Field: field, } } else { req = &indexpb.CreateJobRequest{ @@ -421,6 +423,8 @@ func (ib *indexBuilder) process(buildID UniqueID) bool { SegmentID: segment.GetID(), FieldID: fieldID, OptionalScalarFields: optionalFields, + Dim: int64(dim), + Field: field, } } diff --git a/internal/datacoord/index_builder_test.go b/internal/datacoord/index_builder_test.go index 46d8c7fe3f43e..9488c70f5e818 100644 --- a/internal/datacoord/index_builder_test.go +++ b/internal/datacoord/index_builder_test.go @@ -675,7 +675,30 @@ func TestIndexBuilder(t *testing.T) { chunkManager := &mocks.ChunkManager{} chunkManager.EXPECT().RootPath().Return("root") - ib := newIndexBuilder(ctx, mt, nodeManager, chunkManager, newIndexEngineVersionManager(), nil) + handler := NewNMockHandler(t) + handler.EXPECT().GetCollection(mock.Anything, mock.Anything).Return(&collectionInfo{ + ID: collID, + Schema: &schemapb.CollectionSchema{ + Name: "coll", + Fields: []*schemapb.FieldSchema{ + { + FieldID: fieldID, + Name: "vec", + DataType: schemapb.DataType_FloatVector, + TypeParams: []*commonpb.KeyValuePair{ + { + Key: "dim", + Value: "128", + }, + }, + }, + }, + EnableDynamicField: false, + Properties: nil, + }, + }, nil) + + ib := newIndexBuilder(ctx, mt, nodeManager, chunkManager, newIndexEngineVersionManager(), handler) assert.Equal(t, 6, len(ib.tasks)) assert.Equal(t, indexTaskInit, ib.tasks[buildID]) @@ -741,6 +764,30 @@ func TestIndexBuilder_Error(t *testing.T) { chunkManager := &mocks.ChunkManager{} chunkManager.EXPECT().RootPath().Return("root") + + handler := NewNMockHandler(t) + handler.EXPECT().GetCollection(mock.Anything, mock.Anything).Return(&collectionInfo{ + ID: collID, + Schema: &schemapb.CollectionSchema{ + Name: "coll", + Fields: []*schemapb.FieldSchema{ + { + FieldID: fieldID, + Name: "vec", + DataType: schemapb.DataType_FloatVector, + TypeParams: []*commonpb.KeyValuePair{ + { + Key: "dim", + Value: "128", + }, + }, + }, + }, + EnableDynamicField: false, + Properties: nil, + }, + }, nil) + ib := &indexBuilder{ ctx: context.Background(), tasks: map[int64]indexTaskState{ @@ -749,6 +796,7 @@ func TestIndexBuilder_Error(t *testing.T) { meta: createMetaTable(ec), chunkManager: chunkManager, indexEngineVersionManager: newIndexEngineVersionManager(), + handler: handler, } t.Run("meta not exist", func(t *testing.T) { @@ -1414,9 +1462,32 @@ func TestVecIndexWithOptionalScalarField(t *testing.T) { mt.collections[collID].Schema.Fields[1].DataType = schemapb.DataType_VarChar } + handler := NewNMockHandler(t) + handler.EXPECT().GetCollection(mock.Anything, mock.Anything).Return(&collectionInfo{ + ID: collID, + Schema: &schemapb.CollectionSchema{ + Name: "coll", + Fields: []*schemapb.FieldSchema{ + { + FieldID: fieldID, + Name: "vec", + DataType: schemapb.DataType_FloatVector, + TypeParams: []*commonpb.KeyValuePair{ + { + Key: "dim", + Value: "128", + }, + }, + }, + }, + EnableDynamicField: false, + Properties: nil, + }, + }, nil) + paramtable.Get().CommonCfg.EnableMaterializedView.SwapTempValue("true") defer paramtable.Get().CommonCfg.EnableMaterializedView.SwapTempValue("false") - ib := newIndexBuilder(ctx, &mt, nodeManager, cm, newIndexEngineVersionManager(), nil) + ib := newIndexBuilder(ctx, &mt, nodeManager, cm, newIndexEngineVersionManager(), handler) t.Run("success to get opt field on startup", func(t *testing.T) { ic.EXPECT().CreateJob(mock.Anything, mock.Anything, mock.Anything, mock.Anything).RunAndReturn( diff --git a/internal/datacoord/index_service.go b/internal/datacoord/index_service.go index d1e166d8fb291..1db44438afd30 100644 --- a/internal/datacoord/index_service.go +++ b/internal/datacoord/index_service.go @@ -690,7 +690,7 @@ func (s *Server) DescribeIndex(ctx context.Context, req *indexpb.DescribeIndexRe indexes := s.meta.indexMeta.GetIndexesForCollection(req.GetCollectionID(), req.GetIndexName()) if len(indexes) == 0 { err := merr.WrapErrIndexNotFound(req.GetIndexName()) - log.Warn("DescribeIndex fail", zap.Error(err)) + log.RatedWarn(60, "DescribeIndex fail", zap.Error(err)) return &indexpb.DescribeIndexResponse{ Status: merr.Status(err), }, nil @@ -917,7 +917,7 @@ func (s *Server) ListIndexes(ctx context.Context, req *indexpb.ListIndexesReques UserIndexParams: index.UserIndexParams, } }) - log.Info("List index success") + log.Debug("List index success") return &indexpb.ListIndexesResponse{ Status: merr.Success(), IndexInfos: indexInfos, diff --git a/internal/datacoord/index_service_test.go b/internal/datacoord/index_service_test.go index cda44b2558e89..d10c8d104f1bb 100644 --- a/internal/datacoord/index_service_test.go +++ b/internal/datacoord/index_service_test.go @@ -938,7 +938,12 @@ func TestServer_GetSegmentIndexState(t *testing.T) { WriteHandoff: false, }) s.meta.segments.SetSegment(segID, &SegmentInfo{ - SegmentInfo: nil, + SegmentInfo: &datapb.SegmentInfo{ + ID: segID, + CollectionID: collID, + PartitionID: partID, + InsertChannel: "ch", + }, currRows: 0, allocations: nil, lastFlushTime: time.Time{}, diff --git a/internal/datacoord/meta.go b/internal/datacoord/meta.go index 892166fc4b46a..850430539f711 100644 --- a/internal/datacoord/meta.go +++ b/internal/datacoord/meta.go @@ -32,6 +32,7 @@ import ( "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" "github.com/milvus-io/milvus-proto/go-api/v2/msgpb" "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" + "github.com/milvus-io/milvus/internal/datacoord/broker" "github.com/milvus-io/milvus/internal/metastore" "github.com/milvus-io/milvus/internal/proto/datapb" "github.com/milvus-io/milvus/internal/storage" @@ -39,12 +40,14 @@ import ( "github.com/milvus-io/milvus/pkg/common" "github.com/milvus-io/milvus/pkg/log" "github.com/milvus-io/milvus/pkg/metrics" + "github.com/milvus-io/milvus/pkg/util/funcutil" "github.com/milvus-io/milvus/pkg/util/lock" "github.com/milvus-io/milvus/pkg/util/merr" "github.com/milvus-io/milvus/pkg/util/metautil" "github.com/milvus-io/milvus/pkg/util/paramtable" "github.com/milvus-io/milvus/pkg/util/timerecord" "github.com/milvus-io/milvus/pkg/util/tsoutil" + "github.com/milvus-io/milvus/pkg/util/typeutil" ) type meta struct { @@ -162,6 +165,42 @@ func (m *meta) reloadFromKV() error { return nil } +func (m *meta) reloadCollectionsFromRootcoord(ctx context.Context, broker broker.Broker) error { + resp, err := broker.ListDatabases(ctx) + if err != nil { + return err + } + for _, dbName := range resp.GetDbNames() { + resp, err := broker.ShowCollections(ctx, dbName) + if err != nil { + return err + } + for _, collectionID := range resp.GetCollectionIds() { + resp, err := broker.DescribeCollectionInternal(ctx, collectionID) + if err != nil { + return err + } + partitionIDs, err := broker.ShowPartitionsInternal(ctx, collectionID) + if err != nil { + return err + } + collection := &collectionInfo{ + ID: collectionID, + Schema: resp.GetSchema(), + Partitions: partitionIDs, + StartPositions: resp.GetStartPositions(), + Properties: funcutil.KeyValuePair2Map(resp.GetProperties()), + CreatedAt: resp.GetCreatedTimestamp(), + DatabaseName: resp.GetDbName(), + DatabaseID: resp.GetDbId(), + VChannelNames: resp.GetVirtualChannelNames(), + } + m.AddCollection(collection) + } + } + return nil +} + // AddCollection adds a collection into meta // Note that collection info is just for caching and will not be set into etcd from datacoord func (m *meta) AddCollection(collection *collectionInfo) { @@ -176,21 +215,10 @@ func (m *meta) AddCollection(collection *collectionInfo) { // DropCollection drop a collection from meta func (m *meta) DropCollection(collectionID int64) { log.Info("meta update: drop collection", zap.Int64("collectionID", collectionID)) - segments := m.SelectSegments(WithCollection(collectionID)) m.Lock() defer m.Unlock() - coll, ok := m.collections[collectionID] - if ok { - metrics.CleanupDataCoordNumStoredRows(coll.DatabaseName, collectionID) - metrics.CleanupDataCoordBulkInsertVectors(coll.DatabaseName, collectionID) - for _, seg := range segments { - metrics.CleanupDataCoordSegmentMetrics(coll.DatabaseName, collectionID, seg.ID) - } - } else { - log.Warn("not found database name", zap.Int64("collectionID", collectionID)) - } - delete(m.collections, collectionID) + metrics.CleanupDataCoordWithCollectionID(collectionID) metrics.DataCoordNumCollections.WithLabelValues().Set(float64(len(m.collections))) log.Info("meta update: drop collection - complete", zap.Int64("collectionID", collectionID)) } @@ -318,13 +346,13 @@ func (m *meta) GetCollectionBinlogSize() (int64, map[UniqueID]int64, map[UniqueI collectionRowsNum[segment.GetCollectionID()][segment.GetState()] += segment.GetNumOfRows() } } + + metrics.DataCoordNumStoredRows.Reset() for collectionID, statesRows := range collectionRowsNum { for state, rows := range statesRows { coll, ok := m.collections[collectionID] if ok { metrics.DataCoordNumStoredRows.WithLabelValues(coll.DatabaseName, fmt.Sprint(collectionID), state.String()).Set(float64(rows)) - } else { - log.Warn("not found database name", zap.Int64("collectionID", collectionID)) } } } @@ -336,6 +364,8 @@ func (m *meta) GetCollectionIndexFilesSize() uint64 { m.RLock() defer m.RUnlock() var total uint64 + + missingCollections := make(typeutil.Set[int64]) for _, segmentIdx := range m.indexMeta.GetAllSegIndexes() { coll, ok := m.collections[segmentIdx.CollectionID] if ok { @@ -343,9 +373,12 @@ func (m *meta) GetCollectionIndexFilesSize() uint64 { fmt.Sprint(segmentIdx.CollectionID), fmt.Sprint(segmentIdx.SegmentID)).Set(float64(segmentIdx.IndexSize)) total += segmentIdx.IndexSize } else { - log.Warn("not found database name", zap.Int64("collectionID", segmentIdx.CollectionID)) + missingCollections.Insert(segmentIdx.CollectionID) } } + if missingCollections.Len() > 0 { + log.Warn("collection info not found when calculating index file sizes", zap.Int64s("collectionIDs", missingCollections.Collect())) + } return total } @@ -1085,6 +1118,12 @@ func (m *meta) SelectSegments(filters ...SegmentFilter) []*SegmentInfo { return m.segments.GetSegmentsBySelector(filters...) } +func (m *meta) GetRealSegmentsForChannel(channel string) []*SegmentInfo { + m.RLock() + defer m.RUnlock() + return m.segments.GetRealSegmentsForChannel(channel) +} + // AddAllocation add allocation in segment func (m *meta) AddAllocation(segmentID UniqueID, allocation *Allocation) error { log.Debug("meta update: add allocation", @@ -1374,6 +1413,28 @@ func (m *meta) UpdateChannelCheckpoint(vChannel string, pos *msgpb.MsgPosition) return nil } +// MarkChannelCheckpointDropped set channel checkpoint to MaxUint64 preventing future update +// and remove the metrics for channel checkpoint lag. +func (m *meta) MarkChannelCheckpointDropped(ctx context.Context, channel string) error { + m.channelCPs.Lock() + defer m.channelCPs.Unlock() + + cp := &msgpb.MsgPosition{ + ChannelName: channel, + Timestamp: math.MaxUint64, + } + + err := m.catalog.SaveChannelCheckpoints(ctx, []*msgpb.MsgPosition{cp}) + if err != nil { + return err + } + + m.channelCPs.checkpoints[channel] = cp + + metrics.DataCoordCheckpointUnixSeconds.DeleteLabelValues(fmt.Sprint(paramtable.GetNodeID()), channel) + return nil +} + // UpdateChannelCheckpoints updates and saves channel checkpoints. func (m *meta) UpdateChannelCheckpoints(positions []*msgpb.MsgPosition) error { m.channelCPs.Lock() @@ -1520,3 +1581,10 @@ func updateSegStateAndPrepareMetrics(segToUpdate *SegmentInfo, targetState commo metricMutation.append(segToUpdate.GetState(), targetState, segToUpdate.GetLevel(), segToUpdate.GetNumOfRows()) segToUpdate.State = targetState } + +func (m *meta) ListCollections() []int64 { + m.RLock() + defer m.RUnlock() + + return lo.Keys(m.collections) +} diff --git a/internal/datacoord/meta_test.go b/internal/datacoord/meta_test.go index 5d3bab7aef66d..d90b1b015018a 100644 --- a/internal/datacoord/meta_test.go +++ b/internal/datacoord/meta_test.go @@ -22,12 +22,15 @@ import ( "github.com/cockroachdb/errors" "github.com/golang/protobuf/proto" + "github.com/samber/lo" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/mock" "github.com/stretchr/testify/suite" "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" + "github.com/milvus-io/milvus-proto/go-api/v2/milvuspb" "github.com/milvus-io/milvus-proto/go-api/v2/msgpb" + "github.com/milvus-io/milvus/internal/datacoord/broker" "github.com/milvus-io/milvus/internal/kv" mockkv "github.com/milvus-io/milvus/internal/kv/mocks" "github.com/milvus-io/milvus/internal/metastore/kv/datacoord" @@ -980,6 +983,97 @@ func Test_meta_GetSegmentsOfCollection(t *testing.T) { assert.True(t, ok) assert.Equal(t, expected, gotInfo.GetState()) } + + got = m.GetSegmentsOfCollection(-1) + assert.Equal(t, 3, len(got)) + + got = m.GetSegmentsOfCollection(10) + assert.Equal(t, 0, len(got)) +} + +func Test_meta_GetSegmentsWithChannel(t *testing.T) { + storedSegments := NewSegmentsInfo() + for segID, segment := range map[int64]*SegmentInfo{ + 1: { + SegmentInfo: &datapb.SegmentInfo{ + ID: 1, + CollectionID: 1, + InsertChannel: "h1", + State: commonpb.SegmentState_Flushed, + }, + }, + 2: { + SegmentInfo: &datapb.SegmentInfo{ + ID: 2, + CollectionID: 1, + InsertChannel: "h2", + State: commonpb.SegmentState_Growing, + }, + }, + 3: { + SegmentInfo: &datapb.SegmentInfo{ + ID: 3, + CollectionID: 2, + State: commonpb.SegmentState_Flushed, + InsertChannel: "h1", + }, + }, + } { + storedSegments.SetSegment(segID, segment) + } + m := &meta{segments: storedSegments} + got := m.GetSegmentsByChannel("h1") + assert.Equal(t, 2, len(got)) + assert.ElementsMatch(t, []int64{1, 3}, lo.Map( + got, + func(s *SegmentInfo, i int) int64 { + return s.ID + }, + )) + + got = m.GetSegmentsByChannel("h3") + assert.Equal(t, 0, len(got)) + + got = m.SelectSegments(WithCollection(1), WithChannel("h1"), SegmentFilterFunc(func(segment *SegmentInfo) bool { + return segment != nil && segment.GetState() == commonpb.SegmentState_Flushed + })) + assert.Equal(t, 1, len(got)) + assert.ElementsMatch(t, []int64{1}, lo.Map( + got, + func(s *SegmentInfo, i int) int64 { + return s.ID + }, + )) + + m.segments.DropSegment(3) + _, ok := m.segments.secondaryIndexes.coll2Segments[2] + assert.False(t, ok) + assert.Equal(t, 1, len(m.segments.secondaryIndexes.coll2Segments)) + assert.Equal(t, 2, len(m.segments.secondaryIndexes.channel2Segments)) + + segments, ok := m.segments.secondaryIndexes.channel2Segments["h1"] + assert.True(t, ok) + assert.Equal(t, 1, len(segments)) + assert.Equal(t, int64(1), segments[1].ID) + segments, ok = m.segments.secondaryIndexes.channel2Segments["h2"] + assert.True(t, ok) + assert.Equal(t, 1, len(segments)) + assert.Equal(t, int64(2), segments[2].ID) + + m.segments.DropSegment(2) + segments, ok = m.segments.secondaryIndexes.coll2Segments[1] + assert.True(t, ok) + assert.Equal(t, 1, len(segments)) + assert.Equal(t, int64(1), segments[1].ID) + assert.Equal(t, 1, len(m.segments.secondaryIndexes.coll2Segments)) + assert.Equal(t, 1, len(m.segments.secondaryIndexes.channel2Segments)) + + segments, ok = m.segments.secondaryIndexes.channel2Segments["h1"] + assert.True(t, ok) + assert.Equal(t, 1, len(segments)) + assert.Equal(t, int64(1), segments[1].ID) + _, ok = m.segments.secondaryIndexes.channel2Segments["h2"] + assert.False(t, ok) } func TestMeta_HasSegments(t *testing.T) { @@ -1129,3 +1223,89 @@ func Test_meta_GcConfirm(t *testing.T) { assert.False(t, m.GcConfirm(context.TODO(), 100, 10000)) } + +func Test_meta_ReloadCollectionsFromRootcoords(t *testing.T) { + t.Run("fail to list database", func(t *testing.T) { + m := &meta{ + collections: make(map[UniqueID]*collectionInfo), + } + mockBroker := broker.NewMockBroker(t) + mockBroker.EXPECT().ListDatabases(mock.Anything).Return(nil, errors.New("list database failed, mocked")) + err := m.reloadCollectionsFromRootcoord(context.TODO(), mockBroker) + assert.Error(t, err) + }) + + t.Run("fail to show collections", func(t *testing.T) { + m := &meta{ + collections: make(map[UniqueID]*collectionInfo), + } + mockBroker := broker.NewMockBroker(t) + + mockBroker.EXPECT().ListDatabases(mock.Anything).Return(&milvuspb.ListDatabasesResponse{ + DbNames: []string{"db1"}, + }, nil) + mockBroker.EXPECT().ShowCollections(mock.Anything, mock.Anything).Return(nil, errors.New("show collections failed, mocked")) + err := m.reloadCollectionsFromRootcoord(context.TODO(), mockBroker) + assert.Error(t, err) + }) + + t.Run("fail to describe collection", func(t *testing.T) { + m := &meta{ + collections: make(map[UniqueID]*collectionInfo), + } + mockBroker := broker.NewMockBroker(t) + + mockBroker.EXPECT().ListDatabases(mock.Anything).Return(&milvuspb.ListDatabasesResponse{ + DbNames: []string{"db1"}, + }, nil) + mockBroker.EXPECT().ShowCollections(mock.Anything, mock.Anything).Return(&milvuspb.ShowCollectionsResponse{ + CollectionNames: []string{"coll1"}, + CollectionIds: []int64{1000}, + }, nil) + mockBroker.EXPECT().DescribeCollectionInternal(mock.Anything, mock.Anything).Return(nil, errors.New("describe collection failed, mocked")) + err := m.reloadCollectionsFromRootcoord(context.TODO(), mockBroker) + assert.Error(t, err) + }) + + t.Run("fail to show partitions", func(t *testing.T) { + m := &meta{ + collections: make(map[UniqueID]*collectionInfo), + } + mockBroker := broker.NewMockBroker(t) + + mockBroker.EXPECT().ListDatabases(mock.Anything).Return(&milvuspb.ListDatabasesResponse{ + DbNames: []string{"db1"}, + }, nil) + mockBroker.EXPECT().ShowCollections(mock.Anything, mock.Anything).Return(&milvuspb.ShowCollectionsResponse{ + CollectionNames: []string{"coll1"}, + CollectionIds: []int64{1000}, + }, nil) + mockBroker.EXPECT().DescribeCollectionInternal(mock.Anything, mock.Anything).Return(&milvuspb.DescribeCollectionResponse{}, nil) + mockBroker.EXPECT().ShowPartitionsInternal(mock.Anything, mock.Anything).Return(nil, errors.New("show partitions failed, mocked")) + err := m.reloadCollectionsFromRootcoord(context.TODO(), mockBroker) + assert.Error(t, err) + }) + + t.Run("success", func(t *testing.T) { + m := &meta{ + collections: make(map[UniqueID]*collectionInfo), + } + mockBroker := broker.NewMockBroker(t) + + mockBroker.EXPECT().ListDatabases(mock.Anything).Return(&milvuspb.ListDatabasesResponse{ + DbNames: []string{"db1"}, + }, nil) + mockBroker.EXPECT().ShowCollections(mock.Anything, mock.Anything).Return(&milvuspb.ShowCollectionsResponse{ + CollectionNames: []string{"coll1"}, + CollectionIds: []int64{1000}, + }, nil) + mockBroker.EXPECT().DescribeCollectionInternal(mock.Anything, mock.Anything).Return(&milvuspb.DescribeCollectionResponse{ + CollectionID: 1000, + }, nil) + mockBroker.EXPECT().ShowPartitionsInternal(mock.Anything, mock.Anything).Return([]int64{2000}, nil) + err := m.reloadCollectionsFromRootcoord(context.TODO(), mockBroker) + assert.NoError(t, err) + c := m.GetCollection(UniqueID(1000)) + assert.NotNil(t, c) + }) +} diff --git a/internal/datacoord/mock_channel_store.go b/internal/datacoord/mock_channel_store.go index e0e469fba733c..fc7cb51ef3e92 100644 --- a/internal/datacoord/mock_channel_store.go +++ b/internal/datacoord/mock_channel_store.go @@ -179,6 +179,50 @@ func (_c *MockRWChannelStore_GetNodeChannelCount_Call) RunAndReturn(run func(int return _c } +// GetNodeChannelsByCollectionID provides a mock function with given fields: collectionID +func (_m *MockRWChannelStore) GetNodeChannelsByCollectionID(collectionID int64) map[int64][]string { + ret := _m.Called(collectionID) + + var r0 map[int64][]string + if rf, ok := ret.Get(0).(func(int64) map[int64][]string); ok { + r0 = rf(collectionID) + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).(map[int64][]string) + } + } + + return r0 +} + +// MockRWChannelStore_GetNodeChannelsByCollectionID_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'GetNodeChannelsByCollectionID' +type MockRWChannelStore_GetNodeChannelsByCollectionID_Call struct { + *mock.Call +} + +// GetNodeChannelsByCollectionID is a helper method to define mock.On call +// - collectionID int64 +func (_e *MockRWChannelStore_Expecter) GetNodeChannelsByCollectionID(collectionID interface{}) *MockRWChannelStore_GetNodeChannelsByCollectionID_Call { + return &MockRWChannelStore_GetNodeChannelsByCollectionID_Call{Call: _e.mock.On("GetNodeChannelsByCollectionID", collectionID)} +} + +func (_c *MockRWChannelStore_GetNodeChannelsByCollectionID_Call) Run(run func(collectionID int64)) *MockRWChannelStore_GetNodeChannelsByCollectionID_Call { + _c.Call.Run(func(args mock.Arguments) { + run(args[0].(int64)) + }) + return _c +} + +func (_c *MockRWChannelStore_GetNodeChannelsByCollectionID_Call) Return(_a0 map[int64][]string) *MockRWChannelStore_GetNodeChannelsByCollectionID_Call { + _c.Call.Return(_a0) + return _c +} + +func (_c *MockRWChannelStore_GetNodeChannelsByCollectionID_Call) RunAndReturn(run func(int64) map[int64][]string) *MockRWChannelStore_GetNodeChannelsByCollectionID_Call { + _c.Call.Return(run) + return _c +} + // GetNodeChannelsBy provides a mock function with given fields: nodeSelector, channelSelectors func (_m *MockRWChannelStore) GetNodeChannelsBy(nodeSelector NodeSelector, channelSelectors ...ChannelSelector) []*NodeChannelInfo { _va := make([]interface{}, len(channelSelectors)) diff --git a/internal/datacoord/mock_cluster.go b/internal/datacoord/mock_cluster.go index e35f1e1fee0ab..e92ae8ecb3c28 100644 --- a/internal/datacoord/mock_cluster.go +++ b/internal/datacoord/mock_cluster.go @@ -74,8 +74,8 @@ type MockCluster_DropImport_Call struct { } // DropImport is a helper method to define mock.On call -// - nodeID int64 -// - in *datapb.DropImportRequest +// - nodeID int64 +// - in *datapb.DropImportRequest func (_e *MockCluster_Expecter) DropImport(nodeID interface{}, in interface{}) *MockCluster_DropImport_Call { return &MockCluster_DropImport_Call{Call: _e.mock.On("DropImport", nodeID, in)} } @@ -117,10 +117,10 @@ type MockCluster_Flush_Call struct { } // Flush is a helper method to define mock.On call -// - ctx context.Context -// - nodeID int64 -// - channel string -// - segments []*datapb.SegmentInfo +// - ctx context.Context +// - nodeID int64 +// - channel string +// - segments []*datapb.SegmentInfo func (_e *MockCluster_Expecter) Flush(ctx interface{}, nodeID interface{}, channel interface{}, segments interface{}) *MockCluster_Flush_Call { return &MockCluster_Flush_Call{Call: _e.mock.On("Flush", ctx, nodeID, channel, segments)} } @@ -162,10 +162,10 @@ type MockCluster_FlushChannels_Call struct { } // FlushChannels is a helper method to define mock.On call -// - ctx context.Context -// - nodeID int64 -// - flushTs uint64 -// - channels []string +// - ctx context.Context +// - nodeID int64 +// - flushTs uint64 +// - channels []string func (_e *MockCluster_Expecter) FlushChannels(ctx interface{}, nodeID interface{}, flushTs interface{}, channels interface{}) *MockCluster_FlushChannels_Call { return &MockCluster_FlushChannels_Call{Call: _e.mock.On("FlushChannels", ctx, nodeID, flushTs, channels)} } @@ -250,8 +250,8 @@ type MockCluster_ImportV2_Call struct { } // ImportV2 is a helper method to define mock.On call -// - nodeID int64 -// - in *datapb.ImportRequest +// - nodeID int64 +// - in *datapb.ImportRequest func (_e *MockCluster_Expecter) ImportV2(nodeID interface{}, in interface{}) *MockCluster_ImportV2_Call { return &MockCluster_ImportV2_Call{Call: _e.mock.On("ImportV2", nodeID, in)} } @@ -293,8 +293,8 @@ type MockCluster_PreImport_Call struct { } // PreImport is a helper method to define mock.On call -// - nodeID int64 -// - in *datapb.PreImportRequest +// - nodeID int64 +// - in *datapb.PreImportRequest func (_e *MockCluster_Expecter) PreImport(nodeID interface{}, in interface{}) *MockCluster_PreImport_Call { return &MockCluster_PreImport_Call{Call: _e.mock.On("PreImport", nodeID, in)} } @@ -348,8 +348,8 @@ type MockCluster_QueryImport_Call struct { } // QueryImport is a helper method to define mock.On call -// - nodeID int64 -// - in *datapb.QueryImportRequest +// - nodeID int64 +// - in *datapb.QueryImportRequest func (_e *MockCluster_Expecter) QueryImport(nodeID interface{}, in interface{}) *MockCluster_QueryImport_Call { return &MockCluster_QueryImport_Call{Call: _e.mock.On("QueryImport", nodeID, in)} } @@ -403,8 +403,8 @@ type MockCluster_QueryPreImport_Call struct { } // QueryPreImport is a helper method to define mock.On call -// - nodeID int64 -// - in *datapb.QueryPreImportRequest +// - nodeID int64 +// - in *datapb.QueryPreImportRequest func (_e *MockCluster_Expecter) QueryPreImport(nodeID interface{}, in interface{}) *MockCluster_QueryPreImport_Call { return &MockCluster_QueryPreImport_Call{Call: _e.mock.On("QueryPreImport", nodeID, in)} } @@ -489,7 +489,7 @@ type MockCluster_Register_Call struct { } // Register is a helper method to define mock.On call -// - node *NodeInfo +// - node *NodeInfo func (_e *MockCluster_Expecter) Register(node interface{}) *MockCluster_Register_Call { return &MockCluster_Register_Call{Call: _e.mock.On("Register", node)} } @@ -531,8 +531,8 @@ type MockCluster_Startup_Call struct { } // Startup is a helper method to define mock.On call -// - ctx context.Context -// - nodes []*NodeInfo +// - ctx context.Context +// - nodes []*NodeInfo func (_e *MockCluster_Expecter) Startup(ctx interface{}, nodes interface{}) *MockCluster_Startup_Call { return &MockCluster_Startup_Call{Call: _e.mock.On("Startup", ctx, nodes)} } @@ -574,7 +574,7 @@ type MockCluster_UnRegister_Call struct { } // UnRegister is a helper method to define mock.On call -// - node *NodeInfo +// - node *NodeInfo func (_e *MockCluster_Expecter) UnRegister(node interface{}) *MockCluster_UnRegister_Call { return &MockCluster_UnRegister_Call{Call: _e.mock.On("UnRegister", node)} } @@ -616,8 +616,8 @@ type MockCluster_Watch_Call struct { } // Watch is a helper method to define mock.On call -// - ctx context.Context -// - ch RWChannel +// - ctx context.Context +// - ch RWChannel func (_e *MockCluster_Expecter) Watch(ctx interface{}, ch interface{}) *MockCluster_Watch_Call { return &MockCluster_Watch_Call{Call: _e.mock.On("Watch", ctx, ch)} } diff --git a/internal/datacoord/mock_compaction_plan_context.go b/internal/datacoord/mock_compaction_plan_context.go index b22041fb7f169..3b399474afe23 100644 --- a/internal/datacoord/mock_compaction_plan_context.go +++ b/internal/datacoord/mock_compaction_plan_context.go @@ -1,4 +1,4 @@ -// Code generated by mockery v2.32.4. DO NOT EDIT. +// Code generated by mockery v2.30.1. DO NOT EDIT. package datacoord @@ -21,17 +21,8 @@ func (_m *MockCompactionPlanContext) EXPECT() *MockCompactionPlanContext_Expecte } // execCompactionPlan provides a mock function with given fields: signal, plan -func (_m *MockCompactionPlanContext) execCompactionPlan(signal *compactionSignal, plan *datapb.CompactionPlan) error { - ret := _m.Called(signal, plan) - - var r0 error - if rf, ok := ret.Get(0).(func(*compactionSignal, *datapb.CompactionPlan) error); ok { - r0 = rf(signal, plan) - } else { - r0 = ret.Error(0) - } - - return r0 +func (_m *MockCompactionPlanContext) execCompactionPlan(signal *compactionSignal, plan *datapb.CompactionPlan) { + _m.Called(signal, plan) } // MockCompactionPlanContext_execCompactionPlan_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'execCompactionPlan' @@ -40,8 +31,8 @@ type MockCompactionPlanContext_execCompactionPlan_Call struct { } // execCompactionPlan is a helper method to define mock.On call -// - signal *compactionSignal -// - plan *datapb.CompactionPlan +// - signal *compactionSignal +// - plan *datapb.CompactionPlan func (_e *MockCompactionPlanContext_Expecter) execCompactionPlan(signal interface{}, plan interface{}) *MockCompactionPlanContext_execCompactionPlan_Call { return &MockCompactionPlanContext_execCompactionPlan_Call{Call: _e.mock.On("execCompactionPlan", signal, plan)} } @@ -53,12 +44,12 @@ func (_c *MockCompactionPlanContext_execCompactionPlan_Call) Run(run func(signal return _c } -func (_c *MockCompactionPlanContext_execCompactionPlan_Call) Return(_a0 error) *MockCompactionPlanContext_execCompactionPlan_Call { - _c.Call.Return(_a0) +func (_c *MockCompactionPlanContext_execCompactionPlan_Call) Return() *MockCompactionPlanContext_execCompactionPlan_Call { + _c.Call.Return() return _c } -func (_c *MockCompactionPlanContext_execCompactionPlan_Call) RunAndReturn(run func(*compactionSignal, *datapb.CompactionPlan) error) *MockCompactionPlanContext_execCompactionPlan_Call { +func (_c *MockCompactionPlanContext_execCompactionPlan_Call) RunAndReturn(run func(*compactionSignal, *datapb.CompactionPlan)) *MockCompactionPlanContext_execCompactionPlan_Call { _c.Call.Return(run) return _c } @@ -85,7 +76,7 @@ type MockCompactionPlanContext_getCompaction_Call struct { } // getCompaction is a helper method to define mock.On call -// - planID int64 +// - planID int64 func (_e *MockCompactionPlanContext_Expecter) getCompaction(planID interface{}) *MockCompactionPlanContext_getCompaction_Call { return &MockCompactionPlanContext_getCompaction_Call{Call: _e.mock.On("getCompaction", planID)} } @@ -129,7 +120,7 @@ type MockCompactionPlanContext_getCompactionTasksBySignalID_Call struct { } // getCompactionTasksBySignalID is a helper method to define mock.On call -// - signalID int64 +// - signalID int64 func (_e *MockCompactionPlanContext_Expecter) getCompactionTasksBySignalID(signalID interface{}) *MockCompactionPlanContext_getCompactionTasksBySignalID_Call { return &MockCompactionPlanContext_getCompactionTasksBySignalID_Call{Call: _e.mock.On("getCompactionTasksBySignalID", signalID)} } @@ -203,7 +194,7 @@ type MockCompactionPlanContext_removeTasksByChannel_Call struct { } // removeTasksByChannel is a helper method to define mock.On call -// - channel string +// - channel string func (_e *MockCompactionPlanContext_Expecter) removeTasksByChannel(channel interface{}) *MockCompactionPlanContext_removeTasksByChannel_Call { return &MockCompactionPlanContext_removeTasksByChannel_Call{Call: _e.mock.On("removeTasksByChannel", channel)} } @@ -309,7 +300,7 @@ type MockCompactionPlanContext_updateCompaction_Call struct { } // updateCompaction is a helper method to define mock.On call -// - ts uint64 +// - ts uint64 func (_e *MockCompactionPlanContext_Expecter) updateCompaction(ts interface{}) *MockCompactionPlanContext_updateCompaction_Call { return &MockCompactionPlanContext_updateCompaction_Call{Call: _e.mock.On("updateCompaction", ts)} } diff --git a/internal/datacoord/mock_session_manager.go b/internal/datacoord/mock_session_manager.go index a7d8e7f679c59..aea14b219ce03 100644 --- a/internal/datacoord/mock_session_manager.go +++ b/internal/datacoord/mock_session_manager.go @@ -35,7 +35,7 @@ type MockSessionManager_AddSession_Call struct { } // AddSession is a helper method to define mock.On call -// - node *NodeInfo +// - node *NodeInfo func (_e *MockSessionManager_Expecter) AddSession(node interface{}) *MockSessionManager_AddSession_Call { return &MockSessionManager_AddSession_Call{Call: _e.mock.On("AddSession", node)} } @@ -89,9 +89,9 @@ type MockSessionManager_CheckChannelOperationProgress_Call struct { } // CheckChannelOperationProgress is a helper method to define mock.On call -// - ctx context.Context -// - nodeID int64 -// - info *datapb.ChannelWatchInfo +// - ctx context.Context +// - nodeID int64 +// - info *datapb.ChannelWatchInfo func (_e *MockSessionManager_Expecter) CheckChannelOperationProgress(ctx interface{}, nodeID interface{}, info interface{}) *MockSessionManager_CheckChannelOperationProgress_Call { return &MockSessionManager_CheckChannelOperationProgress_Call{Call: _e.mock.On("CheckChannelOperationProgress", ctx, nodeID, info)} } @@ -133,7 +133,7 @@ type MockSessionManager_CheckHealth_Call struct { } // CheckHealth is a helper method to define mock.On call -// - ctx context.Context +// - ctx context.Context func (_e *MockSessionManager_Expecter) CheckHealth(ctx interface{}) *MockSessionManager_CheckHealth_Call { return &MockSessionManager_CheckHealth_Call{Call: _e.mock.On("CheckHealth", ctx)} } @@ -207,9 +207,9 @@ type MockSessionManager_Compaction_Call struct { } // Compaction is a helper method to define mock.On call -// - ctx context.Context -// - nodeID int64 -// - plan *datapb.CompactionPlan +// - ctx context.Context +// - nodeID int64 +// - plan *datapb.CompactionPlan func (_e *MockSessionManager_Expecter) Compaction(ctx interface{}, nodeID interface{}, plan interface{}) *MockSessionManager_Compaction_Call { return &MockSessionManager_Compaction_Call{Call: _e.mock.On("Compaction", ctx, nodeID, plan)} } @@ -242,7 +242,7 @@ type MockSessionManager_DeleteSession_Call struct { } // DeleteSession is a helper method to define mock.On call -// - node *NodeInfo +// - node *NodeInfo func (_e *MockSessionManager_Expecter) DeleteSession(node interface{}) *MockSessionManager_DeleteSession_Call { return &MockSessionManager_DeleteSession_Call{Call: _e.mock.On("DeleteSession", node)} } @@ -284,8 +284,8 @@ type MockSessionManager_DropImport_Call struct { } // DropImport is a helper method to define mock.On call -// - nodeID int64 -// - in *datapb.DropImportRequest +// - nodeID int64 +// - in *datapb.DropImportRequest func (_e *MockSessionManager_Expecter) DropImport(nodeID interface{}, in interface{}) *MockSessionManager_DropImport_Call { return &MockSessionManager_DropImport_Call{Call: _e.mock.On("DropImport", nodeID, in)} } @@ -318,9 +318,9 @@ type MockSessionManager_Flush_Call struct { } // Flush is a helper method to define mock.On call -// - ctx context.Context -// - nodeID int64 -// - req *datapb.FlushSegmentsRequest +// - ctx context.Context +// - nodeID int64 +// - req *datapb.FlushSegmentsRequest func (_e *MockSessionManager_Expecter) Flush(ctx interface{}, nodeID interface{}, req interface{}) *MockSessionManager_Flush_Call { return &MockSessionManager_Flush_Call{Call: _e.mock.On("Flush", ctx, nodeID, req)} } @@ -362,9 +362,9 @@ type MockSessionManager_FlushChannels_Call struct { } // FlushChannels is a helper method to define mock.On call -// - ctx context.Context -// - nodeID int64 -// - req *datapb.FlushChannelsRequest +// - ctx context.Context +// - nodeID int64 +// - req *datapb.FlushChannelsRequest func (_e *MockSessionManager_Expecter) FlushChannels(ctx interface{}, nodeID interface{}, req interface{}) *MockSessionManager_FlushChannels_Call { return &MockSessionManager_FlushChannels_Call{Call: _e.mock.On("FlushChannels", ctx, nodeID, req)} } @@ -545,8 +545,8 @@ type MockSessionManager_ImportV2_Call struct { } // ImportV2 is a helper method to define mock.On call -// - nodeID int64 -// - in *datapb.ImportRequest +// - nodeID int64 +// - in *datapb.ImportRequest func (_e *MockSessionManager_Expecter) ImportV2(nodeID interface{}, in interface{}) *MockSessionManager_ImportV2_Call { return &MockSessionManager_ImportV2_Call{Call: _e.mock.On("ImportV2", nodeID, in)} } @@ -588,9 +588,9 @@ type MockSessionManager_NotifyChannelOperation_Call struct { } // NotifyChannelOperation is a helper method to define mock.On call -// - ctx context.Context -// - nodeID int64 -// - req *datapb.ChannelOperationsRequest +// - ctx context.Context +// - nodeID int64 +// - req *datapb.ChannelOperationsRequest func (_e *MockSessionManager_Expecter) NotifyChannelOperation(ctx interface{}, nodeID interface{}, req interface{}) *MockSessionManager_NotifyChannelOperation_Call { return &MockSessionManager_NotifyChannelOperation_Call{Call: _e.mock.On("NotifyChannelOperation", ctx, nodeID, req)} } @@ -632,8 +632,8 @@ type MockSessionManager_PreImport_Call struct { } // PreImport is a helper method to define mock.On call -// - nodeID int64 -// - in *datapb.PreImportRequest +// - nodeID int64 +// - in *datapb.PreImportRequest func (_e *MockSessionManager_Expecter) PreImport(nodeID interface{}, in interface{}) *MockSessionManager_PreImport_Call { return &MockSessionManager_PreImport_Call{Call: _e.mock.On("PreImport", nodeID, in)} } @@ -687,8 +687,8 @@ type MockSessionManager_QueryImport_Call struct { } // QueryImport is a helper method to define mock.On call -// - nodeID int64 -// - in *datapb.QueryImportRequest +// - nodeID int64 +// - in *datapb.QueryImportRequest func (_e *MockSessionManager_Expecter) QueryImport(nodeID interface{}, in interface{}) *MockSessionManager_QueryImport_Call { return &MockSessionManager_QueryImport_Call{Call: _e.mock.On("QueryImport", nodeID, in)} } @@ -742,8 +742,8 @@ type MockSessionManager_QueryPreImport_Call struct { } // QueryPreImport is a helper method to define mock.On call -// - nodeID int64 -// - in *datapb.QueryPreImportRequest +// - nodeID int64 +// - in *datapb.QueryPreImportRequest func (_e *MockSessionManager_Expecter) QueryPreImport(nodeID interface{}, in interface{}) *MockSessionManager_QueryPreImport_Call { return &MockSessionManager_QueryPreImport_Call{Call: _e.mock.On("QueryPreImport", nodeID, in)} } @@ -797,7 +797,7 @@ type MockSessionManager_QuerySlot_Call struct { } // QuerySlot is a helper method to define mock.On call -// - nodeID int64 +// - nodeID int64 func (_e *MockSessionManager_Expecter) QuerySlot(nodeID interface{}) *MockSessionManager_QuerySlot_Call { return &MockSessionManager_QuerySlot_Call{Call: _e.mock.On("QuerySlot", nodeID)} } @@ -839,8 +839,8 @@ type MockSessionManager_SyncSegments_Call struct { } // SyncSegments is a helper method to define mock.On call -// - nodeID int64 -// - req *datapb.SyncSegmentsRequest +// - nodeID int64 +// - req *datapb.SyncSegmentsRequest func (_e *MockSessionManager_Expecter) SyncSegments(nodeID interface{}, req interface{}) *MockSessionManager_SyncSegments_Call { return &MockSessionManager_SyncSegments_Call{Call: _e.mock.On("SyncSegments", nodeID, req)} } diff --git a/internal/datacoord/mock_test.go b/internal/datacoord/mock_test.go index bac8735fd394a..78d4ca4a78b58 100644 --- a/internal/datacoord/mock_test.go +++ b/internal/datacoord/mock_test.go @@ -461,7 +461,9 @@ func (m *mockRootCoordClient) DropDatabase(ctx context.Context, in *milvuspb.Dro } func (m *mockRootCoordClient) ListDatabases(ctx context.Context, in *milvuspb.ListDatabasesRequest, opts ...grpc.CallOption) (*milvuspb.ListDatabasesResponse, error) { - panic("not implemented") // TODO: Implement + return &milvuspb.ListDatabasesResponse{ + Status: merr.Success(), + }, nil } func (m *mockRootCoordClient) AlterDatabase(ctx context.Context, in *rootcoordpb.AlterDatabaseRequest, opts ...grpc.CallOption) (*commonpb.Status, error) { diff --git a/internal/datacoord/policy_test.go b/internal/datacoord/policy_test.go index 5d9d254d5f31d..15e85f204dc0d 100644 --- a/internal/datacoord/policy_test.go +++ b/internal/datacoord/policy_test.go @@ -37,7 +37,7 @@ func getChannel(name string, collID int64) *StateChannel { return &StateChannel{ Name: name, CollectionID: collID, - Info: &datapb.ChannelWatchInfo{}, + Info: &datapb.ChannelWatchInfo{Vchan: &datapb.VchannelInfo{}}, } } diff --git a/internal/datacoord/segment_info.go b/internal/datacoord/segment_info.go index 5946934e8828a..5f317fa70f58e 100644 --- a/internal/datacoord/segment_info.go +++ b/internal/datacoord/segment_info.go @@ -32,12 +32,17 @@ import ( // SegmentsInfo wraps a map, which maintains ID to SegmentInfo relation type SegmentsInfo struct { - segments map[UniqueID]*SegmentInfo - collSegments map[UniqueID]*CollectionSegments - compactionTo map[UniqueID]UniqueID // map the compact relation, value is the segment which `CompactFrom` contains key. + segments map[UniqueID]*SegmentInfo + secondaryIndexes segmentInfoIndexes + compactionTo map[UniqueID]UniqueID // map the compact relation, value is the segment which `CompactFrom` contains key. // A segment can be compacted to only one segment finally in meta. } +type segmentInfoIndexes struct { + coll2Segments map[UniqueID]map[UniqueID]*SegmentInfo + channel2Segments map[string]map[UniqueID]*SegmentInfo +} + // SegmentInfo wraps datapb.SegmentInfo and patches some extra info on it type SegmentInfo struct { *datapb.SegmentInfo @@ -69,16 +74,15 @@ func NewSegmentInfo(info *datapb.SegmentInfo) *SegmentInfo { // note that no mutex is wrapped so external concurrent control is needed func NewSegmentsInfo() *SegmentsInfo { return &SegmentsInfo{ - segments: make(map[UniqueID]*SegmentInfo), - collSegments: make(map[UniqueID]*CollectionSegments), + segments: make(map[UniqueID]*SegmentInfo), + secondaryIndexes: segmentInfoIndexes{ + coll2Segments: make(map[UniqueID]map[UniqueID]*SegmentInfo), + channel2Segments: make(map[string]map[UniqueID]*SegmentInfo), + }, compactionTo: make(map[UniqueID]UniqueID), } } -type CollectionSegments struct { - segments map[int64]*SegmentInfo -} - // GetSegment returns SegmentInfo // the logPath in meta is empty func (s *SegmentsInfo) GetSegment(segmentID UniqueID) *SegmentInfo { @@ -96,24 +100,42 @@ func (s *SegmentsInfo) GetSegments() []*SegmentInfo { return lo.Values(s.segments) } +func (s *SegmentsInfo) getCandidates(criterion *segmentCriterion) map[UniqueID]*SegmentInfo { + if criterion.collectionID > 0 { + collSegments, ok := s.secondaryIndexes.coll2Segments[criterion.collectionID] + if !ok { + return nil + } + + // both collection id and channel are filters of criterion + if criterion.channel != "" { + return lo.OmitBy(collSegments, func(k UniqueID, v *SegmentInfo) bool { + return v.InsertChannel != criterion.channel + }) + } + return collSegments + } + + if criterion.channel != "" { + channelSegments, ok := s.secondaryIndexes.channel2Segments[criterion.channel] + if !ok { + return nil + } + return channelSegments + } + + return s.segments +} + func (s *SegmentsInfo) GetSegmentsBySelector(filters ...SegmentFilter) []*SegmentInfo { criterion := &segmentCriterion{} for _, filter := range filters { filter.AddFilter(criterion) } - var result []*SegmentInfo - var candidates []*SegmentInfo + // apply criterion - switch { - case criterion.collectionID > 0: - collSegments, ok := s.collSegments[criterion.collectionID] - if !ok { - return nil - } - candidates = lo.Values(collSegments.segments) - default: - candidates = lo.Values(s.segments) - } + candidates := s.getCandidates(criterion) + var result []*SegmentInfo for _, segment := range candidates { if criterion.Match(segment) { result = append(result, segment) @@ -122,6 +144,17 @@ func (s *SegmentsInfo) GetSegmentsBySelector(filters ...SegmentFilter) []*Segmen return result } +func (s *SegmentsInfo) GetRealSegmentsForChannel(channel string) []*SegmentInfo { + channelSegments := s.secondaryIndexes.channel2Segments[channel] + var result []*SegmentInfo + for _, segment := range channelSegments { + if !segment.GetIsFake() { + result = append(result, segment) + } + } + return result +} + // GetCompactionTo returns the segment that the provided segment is compacted to. // Return (nil, false) if given segmentID can not found in the meta. // Return (nil, true) if given segmentID can be found not no compaction to. @@ -144,7 +177,7 @@ func (s *SegmentsInfo) GetCompactionTo(fromSegmentID int64) (*SegmentInfo, bool) func (s *SegmentsInfo) DropSegment(segmentID UniqueID) { if segment, ok := s.segments[segmentID]; ok { s.deleteCompactTo(segment) - s.delCollection(segment) + s.removeSecondaryIndex(segment) delete(s.segments, segmentID) } } @@ -156,10 +189,10 @@ func (s *SegmentsInfo) SetSegment(segmentID UniqueID, segment *SegmentInfo) { if segment, ok := s.segments[segmentID]; ok { // Remove old segment compact to relation first. s.deleteCompactTo(segment) - s.delCollection(segment) + s.removeSecondaryIndex(segment) } s.segments[segmentID] = segment - s.addCollection(segment) + s.addSecondaryIndex(segment) s.addCompactTo(segment) } @@ -296,27 +329,35 @@ func (s *SegmentInfo) ShadowClone(opts ...SegmentInfoOption) *SegmentInfo { return cloned } -func (s *SegmentsInfo) addCollection(segment *SegmentInfo) { +func (s *SegmentsInfo) addSecondaryIndex(segment *SegmentInfo) { collID := segment.GetCollectionID() - collSegment, ok := s.collSegments[collID] - if !ok { - collSegment = &CollectionSegments{ - segments: make(map[UniqueID]*SegmentInfo), - } - s.collSegments[collID] = collSegment + channel := segment.GetInsertChannel() + if _, ok := s.secondaryIndexes.coll2Segments[collID]; !ok { + s.secondaryIndexes.coll2Segments[collID] = make(map[UniqueID]*SegmentInfo) + } + s.secondaryIndexes.coll2Segments[collID][segment.ID] = segment + + if _, ok := s.secondaryIndexes.channel2Segments[channel]; !ok { + s.secondaryIndexes.channel2Segments[channel] = make(map[UniqueID]*SegmentInfo) } - collSegment.segments[segment.GetID()] = segment + s.secondaryIndexes.channel2Segments[channel][segment.ID] = segment } -func (s *SegmentsInfo) delCollection(segment *SegmentInfo) { +func (s *SegmentsInfo) removeSecondaryIndex(segment *SegmentInfo) { collID := segment.GetCollectionID() - collSegment, ok := s.collSegments[collID] - if !ok { - return + channel := segment.GetInsertChannel() + if segments, ok := s.secondaryIndexes.coll2Segments[collID]; ok { + delete(segments, segment.ID) + if len(segments) == 0 { + delete(s.secondaryIndexes.coll2Segments, collID) + } } - delete(collSegment.segments, segment.GetID()) - if len(collSegment.segments) == 0 { - delete(s.collSegments, segment.GetCollectionID()) + + if segments, ok := s.secondaryIndexes.channel2Segments[channel]; ok { + delete(segments, segment.ID) + if len(segments) == 0 { + delete(s.secondaryIndexes.channel2Segments, channel) + } } } diff --git a/internal/datacoord/segment_operator.go b/internal/datacoord/segment_operator.go index 2d26f6d03d7d1..afd365e2dc825 100644 --- a/internal/datacoord/segment_operator.go +++ b/internal/datacoord/segment_operator.go @@ -31,6 +31,8 @@ func SetMaxRowCount(maxRow int64) SegmentOperator { type segmentCriterion struct { collectionID int64 + channel string + partitionID int64 others []SegmentFilter } @@ -62,6 +64,21 @@ func WithCollection(collectionID int64) SegmentFilter { return CollectionFilter(collectionID) } +type ChannelFilter string + +func (f ChannelFilter) Match(segment *SegmentInfo) bool { + return segment.GetInsertChannel() == string(f) +} + +func (f ChannelFilter) AddFilter(criterion *segmentCriterion) { + criterion.channel = string(f) +} + +// WithChannel WithCollection has a higher priority if both WithCollection and WithChannel are in condition together. +func WithChannel(channel string) SegmentFilter { + return ChannelFilter(channel) +} + type SegmentFilterFunc func(*SegmentInfo) bool func (f SegmentFilterFunc) Match(segment *SegmentInfo) bool { @@ -71,9 +88,3 @@ func (f SegmentFilterFunc) Match(segment *SegmentInfo) bool { func (f SegmentFilterFunc) AddFilter(criterion *segmentCriterion) { criterion.others = append(criterion.others, f) } - -func WithChannel(channel string) SegmentFilter { - return SegmentFilterFunc(func(si *SegmentInfo) bool { - return si.GetInsertChannel() == channel - }) -} diff --git a/internal/datacoord/server.go b/internal/datacoord/server.go index 50ccc8d37ca58..8498926b93339 100644 --- a/internal/datacoord/server.go +++ b/internal/datacoord/server.go @@ -127,6 +127,7 @@ type Server struct { compactionTrigger trigger compactionHandler compactionPlanContext compactionViewManager *CompactionViewManager + syncSegmentsScheduler *SyncSegmentsScheduler metricsCacheManager *metricsinfo.MetricsCacheManager @@ -393,6 +394,8 @@ func (s *Server) initDataCoord() error { s.importScheduler = NewImportScheduler(s.meta, s.cluster, s.allocator, s.importMeta, s.buildIndexCh) s.importChecker = NewImportChecker(s.meta, s.broker, s.cluster, s.allocator, s.segmentManager, s.importMeta) + s.syncSegmentsScheduler = newSyncSegmentsScheduler(s.meta, s.channelManager, s.sessionManager) + s.serverLoopCtx, s.serverLoopCancel = context.WithCancel(s.ctx) log.Info("init datacoord done", zap.Int64("nodeID", paramtable.GetNodeID()), zap.String("Address", s.address)) @@ -524,7 +527,7 @@ func (s *Server) SetIndexNodeCreator(f func(context.Context, string, int64) (typ func (s *Server) createCompactionHandler() { s.compactionHandler = newCompactionPlanHandler(s.cluster, s.sessionManager, s.channelManager, s.meta, s.allocator) - triggerv2 := NewCompactionTriggerManager(s.allocator, s.compactionHandler) + triggerv2 := NewCompactionTriggerManager(s.allocator, s.handler, s.compactionHandler) s.compactionViewManager = NewCompactionViewManager(s.meta, triggerv2, s.allocator) } @@ -674,6 +677,14 @@ func (s *Server) initMeta(chunkManager storage.ChunkManager) error { if err != nil { return err } + + // Load collection information asynchronously + // HINT: please make sure this is the last step in the `reloadEtcdFn` function !!! + go func() { + _ = retry.Do(s.ctx, func() error { + return s.meta.reloadCollectionsFromRootcoord(s.ctx, s.broker) + }, retry.Sleep(time.Second), retry.Attempts(connMetaMaxRetryTime)) + }() return nil } return retry.Do(s.ctx, reloadEtcdFn, retry.Attempts(connMetaMaxRetryTime)) @@ -704,6 +715,7 @@ func (s *Server) startServerLoop() { go s.importScheduler.Start() go s.importChecker.Start() s.garbageCollector.start() + s.syncSegmentsScheduler.Start() } // startDataNodeTtLoop start a goroutine to recv data node tt msg from msgstream @@ -1096,6 +1108,7 @@ func (s *Server) Stop() error { s.importScheduler.Close() s.importChecker.Close() + s.syncSegmentsScheduler.Stop() if Params.DataCoordCfg.EnableCompaction.GetAsBool() { s.stopCompactionTrigger() diff --git a/internal/datacoord/server_test.go b/internal/datacoord/server_test.go index c32dc623033df..feb3d0eabedef 100644 --- a/internal/datacoord/server_test.go +++ b/internal/datacoord/server_test.go @@ -42,6 +42,7 @@ import ( "github.com/milvus-io/milvus-proto/go-api/v2/msgpb" "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" "github.com/milvus-io/milvus/internal/datacoord/broker" + etcdkv "github.com/milvus-io/milvus/internal/kv/etcd" "github.com/milvus-io/milvus/internal/metastore/model" "github.com/milvus-io/milvus/internal/mocks" "github.com/milvus-io/milvus/internal/proto/datapb" @@ -3018,6 +3019,12 @@ var globalTestTikv = tikv.SetupLocalTxn() func WithMeta(meta *meta) Option { return func(svr *Server) { svr.meta = meta + + svr.watchClient = etcdkv.NewEtcdKV(svr.etcdCli, Params.EtcdCfg.MetaRootPath.GetValue(), + etcdkv.WithRequestTimeout(paramtable.Get().ServiceParam.EtcdCfg.RequestTimeout.GetAsDuration(time.Millisecond))) + metaRootPath := Params.EtcdCfg.MetaRootPath.GetValue() + svr.kv = etcdkv.NewEtcdKV(svr.etcdCli, metaRootPath, + etcdkv.WithRequestTimeout(paramtable.Get().ServiceParam.EtcdCfg.RequestTimeout.GetAsDuration(time.Millisecond))) } } @@ -3049,6 +3056,9 @@ func newTestServer(t *testing.T, opts ...Option) *Server { svr.rootCoordClientCreator = func(ctx context.Context) (types.RootCoordClient, error) { return newMockRootCoordClient(), nil } + for _, opt := range opts { + opt(svr) + } err = svr.Init() assert.NoError(t, err) @@ -3072,10 +3082,6 @@ func newTestServer(t *testing.T, opts ...Option) *Server { close(signal) } - for _, opt := range opts { - opt(svr) - } - err = svr.Register() assert.NoError(t, err) <-signal diff --git a/internal/datacoord/services.go b/internal/datacoord/services.go index 49ee83fda364f..6a00585e8fc9e 100644 --- a/internal/datacoord/services.go +++ b/internal/datacoord/services.go @@ -618,6 +618,8 @@ func (s *Server) DropVirtualChannel(ctx context.Context, req *datapb.DropVirtual s.segmentManager.DropSegmentsOfChannel(ctx, channel) s.compactionHandler.removeTasksByChannel(channel) metrics.DataCoordCheckpointUnixSeconds.DeleteLabelValues(fmt.Sprint(paramtable.GetNodeID()), channel) + s.meta.MarkChannelCheckpointDropped(ctx, channel) + // no compaction triggered in Drop procedure return resp, nil } diff --git a/internal/datacoord/services_test.go b/internal/datacoord/services_test.go index 4db96c9a05f6b..ddb813acfa922 100644 --- a/internal/datacoord/services_test.go +++ b/internal/datacoord/services_test.go @@ -44,7 +44,9 @@ type ServerSuite struct { func WithChannelManager(cm ChannelManager) Option { return func(svr *Server) { + svr.sessionManager = NewSessionManagerImpl(withSessionCreator(svr.dataNodeCreator)) svr.channelManager = cm + svr.cluster = NewClusterImpl(svr.sessionManager, svr.channelManager) } } diff --git a/internal/datacoord/sync_segments_scheduler.go b/internal/datacoord/sync_segments_scheduler.go new file mode 100644 index 0000000000000..f5224f7110b9e --- /dev/null +++ b/internal/datacoord/sync_segments_scheduler.go @@ -0,0 +1,149 @@ +// Licensed to the LF AI & Data foundation under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package datacoord + +import ( + "sync" + "time" + + "github.com/samber/lo" + "go.uber.org/zap" + + "github.com/milvus-io/milvus/internal/proto/datapb" + "github.com/milvus-io/milvus/pkg/log" + "github.com/milvus-io/milvus/pkg/util/logutil" + "github.com/milvus-io/milvus/pkg/util/typeutil" +) + +type SyncSegmentsScheduler struct { + quit chan struct{} + wg sync.WaitGroup + + meta *meta + channelManager ChannelManager + sessions SessionManager +} + +func newSyncSegmentsScheduler(m *meta, channelManager ChannelManager, sessions SessionManager) *SyncSegmentsScheduler { + return &SyncSegmentsScheduler{ + quit: make(chan struct{}), + wg: sync.WaitGroup{}, + meta: m, + channelManager: channelManager, + sessions: sessions, + } +} + +func (sss *SyncSegmentsScheduler) Start() { + sss.quit = make(chan struct{}) + sss.wg.Add(1) + + go func() { + defer logutil.LogPanic() + ticker := time.NewTicker(Params.DataCoordCfg.SyncSegmentsInterval.GetAsDuration(time.Second)) + defer sss.wg.Done() + + for { + select { + case <-sss.quit: + log.Info("sync segments scheduler quit") + ticker.Stop() + return + case <-ticker.C: + sss.SyncSegmentsForCollections() + } + } + }() + log.Info("SyncSegmentsScheduler started...") +} + +func (sss *SyncSegmentsScheduler) Stop() { + close(sss.quit) + sss.wg.Wait() +} + +func (sss *SyncSegmentsScheduler) SyncSegmentsForCollections() { + collIDs := sss.meta.ListCollections() + for _, collID := range collIDs { + collInfo := sss.meta.GetCollection(collID) + if collInfo == nil { + log.Warn("collection info is nil, skip it", zap.Int64("collectionID", collID)) + continue + } + pkField, err := typeutil.GetPrimaryFieldSchema(collInfo.Schema) + if err != nil { + log.Warn("get primary field from schema failed", zap.Int64("collectionID", collID), + zap.Error(err)) + continue + } + for _, channelName := range collInfo.VChannelNames { + nodeID, err := sss.channelManager.FindWatcher(channelName) + if err != nil { + log.Warn("find watcher for channel failed", zap.Int64("collectionID", collID), + zap.String("channelName", channelName), zap.Error(err)) + continue + } + for _, partitionID := range collInfo.Partitions { + if err := sss.SyncSegments(collID, partitionID, channelName, nodeID, pkField.GetFieldID()); err != nil { + log.Warn("sync segment with channel failed, retry next ticker", + zap.Int64("collectionID", collID), + zap.Int64("partitionID", partitionID), + zap.String("channel", channelName), + zap.Error(err)) + continue + } + } + } + } +} + +func (sss *SyncSegmentsScheduler) SyncSegments(collectionID, partitionID int64, channelName string, nodeID, pkFieldID int64) error { + log := log.With(zap.Int64("collectionID", collectionID), zap.Int64("partitionID", partitionID), + zap.String("channelName", channelName), zap.Int64("nodeID", nodeID)) + segments := sss.meta.SelectSegments(WithChannel(channelName), SegmentFilterFunc(func(info *SegmentInfo) bool { + return info.GetPartitionID() == partitionID && isSegmentHealthy(info) + })) + req := &datapb.SyncSegmentsRequest{ + ChannelName: channelName, + PartitionId: partitionID, + CollectionId: collectionID, + SegmentInfos: make(map[int64]*datapb.SyncSegmentInfo), + } + + for _, seg := range segments { + for _, statsLog := range seg.GetStatslogs() { + if statsLog.GetFieldID() == pkFieldID { + req.SegmentInfos[seg.ID] = &datapb.SyncSegmentInfo{ + SegmentId: seg.GetID(), + PkStatsLog: statsLog, + State: seg.GetState(), + Level: seg.GetLevel(), + NumOfRows: seg.GetNumOfRows(), + } + } + } + } + + if err := sss.sessions.SyncSegments(nodeID, req); err != nil { + log.Warn("fail to sync segments with node", zap.Error(err)) + return err + } + log.Info("sync segments success", zap.Int64s("segments", lo.Map(segments, func(t *SegmentInfo, i int) int64 { + return t.GetID() + }))) + return nil +} diff --git a/internal/datacoord/sync_segments_scheduler_test.go b/internal/datacoord/sync_segments_scheduler_test.go new file mode 100644 index 0000000000000..53ea0988dd740 --- /dev/null +++ b/internal/datacoord/sync_segments_scheduler_test.go @@ -0,0 +1,371 @@ +// Licensed to the LF AI & Data foundation under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package datacoord + +import ( + "sync/atomic" + "testing" + + "github.com/cockroachdb/errors" + "github.com/stretchr/testify/mock" + "github.com/stretchr/testify/suite" + + "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" + "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" + "github.com/milvus-io/milvus/internal/proto/datapb" + "github.com/milvus-io/milvus/pkg/util/lock" +) + +type SyncSegmentsSchedulerSuite struct { + suite.Suite + + m *meta + new atomic.Int64 + old atomic.Int64 +} + +func Test_SyncSegmentsSchedulerSuite(t *testing.T) { + suite.Run(t, new(SyncSegmentsSchedulerSuite)) +} + +func (s *SyncSegmentsSchedulerSuite) initParams() { + s.m = &meta{ + RWMutex: lock.RWMutex{}, + collections: map[UniqueID]*collectionInfo{ + 1: { + ID: 1, + Schema: &schemapb.CollectionSchema{ + Name: "coll1", + Fields: []*schemapb.FieldSchema{ + { + FieldID: 100, + Name: "pk", + IsPrimaryKey: true, + Description: "", + DataType: schemapb.DataType_Int64, + }, + { + FieldID: 101, + Name: "vec", + IsPrimaryKey: false, + Description: "", + DataType: schemapb.DataType_FloatVector, + }, + }, + }, + Partitions: []int64{2, 3}, + VChannelNames: []string{"channel1", "channel2"}, + }, + 2: nil, + }, + segments: &SegmentsInfo{ + secondaryIndexes: segmentInfoIndexes{ + channel2Segments: map[string]map[UniqueID]*SegmentInfo{ + "channel1": { + 5: { + SegmentInfo: &datapb.SegmentInfo{ + ID: 5, + CollectionID: 1, + PartitionID: 2, + InsertChannel: "channel1", + NumOfRows: 3000, + State: commonpb.SegmentState_Dropped, + Statslogs: []*datapb.FieldBinlog{ + { + FieldID: 100, + Binlogs: []*datapb.Binlog{ + { + LogID: 1, + }, + }, + }, + { + FieldID: 101, + Binlogs: []*datapb.Binlog{ + { + LogID: 2, + }, + }, + }, + }, + }, + }, + 6: { + SegmentInfo: &datapb.SegmentInfo{ + ID: 6, + CollectionID: 1, + PartitionID: 3, + InsertChannel: "channel1", + NumOfRows: 3000, + State: commonpb.SegmentState_Dropped, + Statslogs: []*datapb.FieldBinlog{ + { + FieldID: 100, + Binlogs: []*datapb.Binlog{ + { + LogID: 3, + }, + }, + }, + { + FieldID: 101, + Binlogs: []*datapb.Binlog{ + { + LogID: 4, + }, + }, + }, + }, + }, + }, + 9: { + SegmentInfo: &datapb.SegmentInfo{ + ID: 9, + CollectionID: 1, + PartitionID: 2, + InsertChannel: "channel1", + NumOfRows: 3000, + State: commonpb.SegmentState_Flushed, + Statslogs: []*datapb.FieldBinlog{ + { + FieldID: 100, + Binlogs: []*datapb.Binlog{ + { + LogID: 9, + }, + }, + }, + { + FieldID: 101, + Binlogs: []*datapb.Binlog{ + { + LogID: 10, + }, + }, + }, + }, + CompactionFrom: []int64{5}, + }, + }, + 10: { + SegmentInfo: &datapb.SegmentInfo{ + ID: 10, + CollectionID: 1, + PartitionID: 3, + InsertChannel: "channel1", + NumOfRows: 3000, + State: commonpb.SegmentState_Flushed, + Statslogs: []*datapb.FieldBinlog{ + { + FieldID: 100, + Binlogs: []*datapb.Binlog{ + { + LogID: 7, + }, + }, + }, + { + FieldID: 101, + Binlogs: []*datapb.Binlog{ + { + LogID: 8, + }, + }, + }, + }, + CompactionFrom: []int64{6}, + }, + }, + }, + "channel2": { + 7: { + SegmentInfo: &datapb.SegmentInfo{ + ID: 7, + CollectionID: 1, + PartitionID: 2, + InsertChannel: "channel2", + NumOfRows: 3000, + State: commonpb.SegmentState_Dropped, + Statslogs: []*datapb.FieldBinlog{ + { + FieldID: 100, + Binlogs: []*datapb.Binlog{ + { + LogID: 5, + }, + }, + }, + { + FieldID: 101, + Binlogs: []*datapb.Binlog{ + { + LogID: 6, + }, + }, + }, + }, + }, + }, + 8: { + SegmentInfo: &datapb.SegmentInfo{ + ID: 8, + CollectionID: 1, + PartitionID: 3, + InsertChannel: "channel2", + NumOfRows: 3000, + State: commonpb.SegmentState_Dropped, + Statslogs: []*datapb.FieldBinlog{ + { + FieldID: 100, + Binlogs: []*datapb.Binlog{ + { + LogID: 7, + }, + }, + }, + { + FieldID: 101, + Binlogs: []*datapb.Binlog{ + { + LogID: 8, + }, + }, + }, + }, + }, + }, + 11: { + SegmentInfo: &datapb.SegmentInfo{ + ID: 11, + CollectionID: 1, + PartitionID: 2, + InsertChannel: "channel2", + NumOfRows: 3000, + State: commonpb.SegmentState_Flushed, + Statslogs: []*datapb.FieldBinlog{ + { + FieldID: 100, + Binlogs: []*datapb.Binlog{ + { + LogID: 5, + }, + }, + }, + { + FieldID: 101, + Binlogs: []*datapb.Binlog{ + { + LogID: 6, + }, + }, + }, + }, + CompactionFrom: []int64{7}, + }, + }, + 12: { + SegmentInfo: &datapb.SegmentInfo{ + ID: 12, + CollectionID: 1, + PartitionID: 3, + InsertChannel: "channel2", + NumOfRows: 3000, + State: commonpb.SegmentState_Flushed, + Statslogs: []*datapb.FieldBinlog{ + { + FieldID: 100, + Binlogs: []*datapb.Binlog{ + { + LogID: 7, + }, + }, + }, + { + FieldID: 101, + Binlogs: []*datapb.Binlog{ + { + LogID: 8, + }, + }, + }, + }, + CompactionFrom: []int64{8}, + }, + }, + }, + }, + }, + }, + } +} + +func (s *SyncSegmentsSchedulerSuite) SetupTest() { + s.initParams() +} + +func (s *SyncSegmentsSchedulerSuite) Test_newSyncSegmentsScheduler() { + cm := NewMockChannelManager(s.T()) + cm.EXPECT().FindWatcher(mock.Anything).Return(100, nil) + + sm := NewMockSessionManager(s.T()) + sm.EXPECT().SyncSegments(mock.Anything, mock.Anything).RunAndReturn(func(i int64, request *datapb.SyncSegmentsRequest) error { + for _, seg := range request.GetSegmentInfos() { + if seg.GetState() == commonpb.SegmentState_Flushed { + s.new.Add(1) + } + if seg.GetState() == commonpb.SegmentState_Dropped { + s.old.Add(1) + } + } + return nil + }) + + Params.DataCoordCfg.SyncSegmentsInterval.SwapTempValue("1") + defer Params.DataCoordCfg.SyncSegmentsInterval.SwapTempValue("600") + sss := newSyncSegmentsScheduler(s.m, cm, sm) + sss.Start() + + // 2 channels, 2 partitions, 2 segments + // no longer sync dropped segments + for s.new.Load() < 4 { + } + sss.Stop() +} + +func (s *SyncSegmentsSchedulerSuite) Test_SyncSegmentsFail() { + cm := NewMockChannelManager(s.T()) + sm := NewMockSessionManager(s.T()) + + sss := newSyncSegmentsScheduler(s.m, cm, sm) + + s.Run("pk not found", func() { + sss.meta.collections[1].Schema.Fields[0].IsPrimaryKey = false + sss.SyncSegmentsForCollections() + sss.meta.collections[1].Schema.Fields[0].IsPrimaryKey = true + }) + + s.Run("find watcher failed", func() { + cm.EXPECT().FindWatcher(mock.Anything).Return(0, errors.New("mock error")).Twice() + sss.SyncSegmentsForCollections() + }) + + s.Run("sync segment failed", func() { + cm.EXPECT().FindWatcher(mock.Anything).Return(100, nil) + sm.EXPECT().SyncSegments(mock.Anything, mock.Anything).Return(errors.New("mock error")) + sss.SyncSegmentsForCollections() + }) +} diff --git a/internal/datanode/binlog_io.go b/internal/datanode/binlog_io.go deleted file mode 100644 index 506c614a1c9b9..0000000000000 --- a/internal/datanode/binlog_io.go +++ /dev/null @@ -1,249 +0,0 @@ -// Licensed to the LF AI & Data foundation under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package datanode - -import ( - "context" - "strconv" - - "github.com/cockroachdb/errors" - "go.opentelemetry.io/otel" - "go.uber.org/zap" - - "github.com/milvus-io/milvus/internal/datanode/allocator" - "github.com/milvus-io/milvus/internal/datanode/io" - "github.com/milvus-io/milvus/internal/proto/datapb" - "github.com/milvus-io/milvus/internal/storage" - "github.com/milvus-io/milvus/pkg/common" - "github.com/milvus-io/milvus/pkg/log" - "github.com/milvus-io/milvus/pkg/util/metautil" - "github.com/milvus-io/milvus/pkg/util/typeutil" -) - -var ( - errUploadToBlobStorage = errors.New("upload to blob storage wrong") - errDownloadFromBlobStorage = errors.New("download from blob storage wrong") - // errStart used for retry start - errStart = errors.New("start") -) - -func downloadBlobs(ctx context.Context, b io.BinlogIO, paths []string) ([]*Blob, error) { - ctx, span := otel.Tracer(typeutil.DataNodeRole).Start(ctx, "downloadBlobs") - defer span.End() - log.Debug("down load", zap.Strings("path", paths)) - bytes, err := b.Download(ctx, paths) - if err != nil { - log.Warn("ctx done when downloading kvs from blob storage", zap.Strings("paths", paths)) - return nil, errDownloadFromBlobStorage - } - resp := make([]*Blob, len(paths)) - if len(paths) == 0 { - return resp, nil - } - for i := range bytes { - resp[i] = &Blob{Key: paths[i], Value: bytes[i]} - } - return resp, nil -} - -// genDeltaBlobs returns key, value -func genDeltaBlobs(b io.BinlogIO, allocator allocator.Allocator, data *DeleteData, collID, partID, segID UniqueID) (string, []byte, error) { - dCodec := storage.NewDeleteCodec() - - blob, err := dCodec.Serialize(collID, partID, segID, data) - if err != nil { - return "", nil, err - } - - idx, err := allocator.AllocOne() - if err != nil { - return "", nil, err - } - k := metautil.JoinIDPath(collID, partID, segID, idx) - key := b.JoinFullPath(common.SegmentDeltaLogPath, k) - - return key, blob.GetValue(), nil -} - -// genInsertBlobs returns insert-paths and save blob to kvs -func genInsertBlobs(b io.BinlogIO, allocator allocator.Allocator, data []*Blob, collectionID, partID, segID UniqueID, kvs map[string][]byte, -) (map[UniqueID]*datapb.FieldBinlog, error) { - inpaths := make(map[UniqueID]*datapb.FieldBinlog) - notifyGenIdx := make(chan struct{}) - defer close(notifyGenIdx) - - generator, err := allocator.GetGenerator(len(data), notifyGenIdx) - if err != nil { - return nil, err - } - - for _, blob := range data { - // Blob Key is generated by Serialize from int64 fieldID in collection schema, which won't raise error in ParseInt - fID, _ := strconv.ParseInt(blob.GetKey(), 10, 64) - k := metautil.JoinIDPath(collectionID, partID, segID, fID, <-generator) - key := b.JoinFullPath(common.SegmentInsertLogPath, k) - value := blob.GetValue() - fileLen := len(value) - - kvs[key] = value - inpaths[fID] = &datapb.FieldBinlog{ - FieldID: fID, - Binlogs: []*datapb.Binlog{{LogSize: int64(fileLen), LogPath: key, EntriesNum: blob.RowNum, MemorySize: blob.GetMemorySize()}}, - } - } - - return inpaths, nil -} - -// genStatBlobs return stats log paths and save blob to kvs -func genStatBlobs(b io.BinlogIO, allocator allocator.Allocator, stats *storage.PrimaryKeyStats, collectionID, partID, segID UniqueID, iCodec *storage.InsertCodec, kvs map[string][]byte, totRows int64) (map[UniqueID]*datapb.FieldBinlog, error) { - statBlob, err := iCodec.SerializePkStats(stats, totRows) - if err != nil { - return nil, err - } - statPaths := make(map[UniqueID]*datapb.FieldBinlog) - - idx, err := allocator.AllocOne() - if err != nil { - return nil, err - } - fID, _ := strconv.ParseInt(statBlob.GetKey(), 10, 64) - k := metautil.JoinIDPath(collectionID, partID, segID, fID, idx) - key := b.JoinFullPath(common.SegmentStatslogPath, k) - value := statBlob.GetValue() - fileLen := len(value) - - kvs[key] = value - - statPaths[fID] = &datapb.FieldBinlog{ - FieldID: fID, - Binlogs: []*datapb.Binlog{{LogSize: int64(fileLen), LogPath: key, EntriesNum: totRows, MemorySize: int64(fileLen)}}, - } - return statPaths, nil -} - -// update stats log -// also update with insert data if not nil -func uploadStatsLog( - ctx context.Context, - b io.BinlogIO, - allocator allocator.Allocator, - collectionID UniqueID, - partID UniqueID, - segID UniqueID, - stats *storage.PrimaryKeyStats, - totRows int64, - iCodec *storage.InsertCodec, -) (map[UniqueID]*datapb.FieldBinlog, error) { - ctx, span := otel.Tracer(typeutil.DataNodeRole).Start(ctx, "UploadStatslog") - defer span.End() - kvs := make(map[string][]byte) - - statPaths, err := genStatBlobs(b, allocator, stats, collectionID, partID, segID, iCodec, kvs, totRows) - if err != nil { - return nil, err - } - - err = b.Upload(ctx, kvs) - if err != nil { - return nil, err - } - - return statPaths, nil -} - -func uploadInsertLog( - ctx context.Context, - b io.BinlogIO, - allocator allocator.Allocator, - collectionID UniqueID, - partID UniqueID, - segID UniqueID, - data []*Blob, -) (map[UniqueID]*datapb.FieldBinlog, error) { - ctx, span := otel.Tracer(typeutil.DataNodeRole).Start(ctx, "UploadInsertLog") - defer span.End() - kvs := make(map[string][]byte) - - if len(data) <= 0 || data[0].RowNum <= 0 { - log.Warn("binlog io uploading empty insert data", - zap.Int64("segmentID", segID), - zap.Int64("collectionID", collectionID), - ) - return nil, nil - } - - inpaths, err := genInsertBlobs(b, allocator, data, collectionID, partID, segID, kvs) - if err != nil { - return nil, err - } - - err = b.Upload(ctx, kvs) - if err != nil { - return nil, err - } - - return inpaths, nil -} - -func uploadDeltaLog( - ctx context.Context, - b io.BinlogIO, - allocator allocator.Allocator, - collectionID UniqueID, - partID UniqueID, - segID UniqueID, - dData *DeleteData, -) ([]*datapb.FieldBinlog, error) { - ctx, span := otel.Tracer(typeutil.DataNodeRole).Start(ctx, "UploadDeltaLog") - defer span.End() - var ( - deltaInfo = make([]*datapb.FieldBinlog, 0) - kvs = make(map[string][]byte) - ) - - if dData.RowCount > 0 { - k, v, err := genDeltaBlobs(b, allocator, dData, collectionID, partID, segID) - if err != nil { - log.Warn("generate delta blobs wrong", - zap.Int64("collectionID", collectionID), - zap.Int64("segmentID", segID), - zap.Error(err)) - return nil, err - } - - kvs[k] = v - deltaInfo = append(deltaInfo, &datapb.FieldBinlog{ - FieldID: 0, // TODO: Not useful on deltalogs, FieldID shall be ID of primary key field - Binlogs: []*datapb.Binlog{{ - EntriesNum: dData.RowCount, - LogPath: k, - LogSize: int64(len(v)), - MemorySize: dData.Size(), - }}, - }) - } else { - return nil, nil - } - - err := b.Upload(ctx, kvs) - if err != nil { - return nil, err - } - - return deltaInfo, nil -} diff --git a/internal/datanode/binlog_io_test.go b/internal/datanode/binlog_io_test.go deleted file mode 100644 index 038978ac0464c..0000000000000 --- a/internal/datanode/binlog_io_test.go +++ /dev/null @@ -1,404 +0,0 @@ -// Licensed to the LF AI & Data foundation under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package datanode - -import ( - "context" - "fmt" - "path" - "testing" - "time" - - "github.com/cockroachdb/errors" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/mock" - "github.com/stretchr/testify/require" - "go.uber.org/zap" - - "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" - "github.com/milvus-io/milvus/internal/datanode/allocator" - "github.com/milvus-io/milvus/internal/datanode/io" - "github.com/milvus-io/milvus/internal/storage" - "github.com/milvus-io/milvus/pkg/common" - "github.com/milvus-io/milvus/pkg/log" -) - -var binlogTestDir = "/tmp/milvus_test/test_binlog_io" - -var validGeneratorFn = func(count int, done <-chan struct{}) <-chan UniqueID { - ret := make(chan UniqueID, count) - for i := 0; i < count; i++ { - ret <- int64(100 + i) - } - return ret -} - -func TestBinlogIOInterfaceMethods(t *testing.T) { - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - cm := storage.NewLocalChunkManager(storage.RootPath(binlogTestDir)) - defer cm.RemoveWithPrefix(ctx, cm.RootPath()) - - t.Run("Test download", func(t *testing.T) { - binlogIO := io.NewBinlogIO(cm, getOrCreateIOPool()) - tests := []struct { - isvalid bool - ks []string // for preparation - - inctx context.Context - - description string - }{ - {true, []string{"a", "b", "c"}, context.TODO(), "valid input"}, - {false, nil, context.Background(), "cancel by context"}, - } - - for _, test := range tests { - t.Run(test.description, func(t *testing.T) { - if test.isvalid { - inkeys := []string{} - for _, k := range test.ks { - blob, key, err := prepareBlob(cm, k) - require.NoError(t, err) - assert.NotEmpty(t, blob) - inkeys = append(inkeys, key) - - loaded, err := downloadBlobs(test.inctx, binlogIO, []string{key}) - assert.NoError(t, err) - assert.ElementsMatch(t, blob, loaded[0].GetValue()) - } - - loaded, err := downloadBlobs(test.inctx, binlogIO, inkeys) - assert.NoError(t, err) - assert.Equal(t, len(test.ks), len(loaded)) - } else { - ctx, cancel := context.WithCancel(test.inctx) - cancel() - - _, err := downloadBlobs(ctx, binlogIO, []string{"test"}) - assert.EqualError(t, err, errDownloadFromBlobStorage.Error()) - } - }) - } - }) - - t.Run("Test download twice", func(t *testing.T) { - binlogIO := io.NewBinlogIO(cm, getOrCreateIOPool()) - - ctx, cancel := context.WithTimeout(context.TODO(), time.Millisecond*20) - blobs, err := downloadBlobs(ctx, binlogIO, []string{"a"}) - assert.Error(t, err) - assert.Empty(t, blobs) - cancel() - }) - - t.Run("Test upload stats log err", func(t *testing.T) { - f := &MetaFactory{} - meta := f.GetCollectionMeta(UniqueID(10001), "test_gen_blobs", schemapb.DataType_Int64) - - t.Run("gen insert blob failed", func(t *testing.T) { - alloc := allocator.NewMockAllocator(t) - alloc.EXPECT().AllocOne().Call.Return(int64(0), fmt.Errorf("mock AllocOne error")) - binlogIO := io.NewBinlogIO(cm, getOrCreateIOPool()) - iCodec := storage.NewInsertCodecWithSchema(meta) - _, err := uploadStatsLog(context.Background(), binlogIO, alloc, meta.GetID(), 10, 1, genTestStat(meta), 10, iCodec) - assert.Error(t, err) - }) - }) - - t.Run("Test upload insert log err", func(t *testing.T) { - f := &MetaFactory{} - meta := f.GetCollectionMeta(UniqueID(10001), "test_gen_blobs", schemapb.DataType_Int64) - - t.Run("gen insert blob failed", func(t *testing.T) { - alloc := allocator.NewMockAllocator(t) - binlogIO := io.NewBinlogIO(cm, getOrCreateIOPool()) - iCodec := storage.NewInsertCodecWithSchema(meta) - var partId int64 = 10 - var segId int64 = 1 - iData := genInsertData(2) - blobs, err := iCodec.Serialize(10, 1, iData) - assert.NoError(t, err) - alloc.EXPECT().GetGenerator(mock.Anything, mock.Anything).Call.Return(nil, fmt.Errorf("mock err")) - _, err = uploadInsertLog(context.Background(), binlogIO, alloc, meta.GetID(), partId, segId, blobs) - assert.Error(t, err) - }) - - t.Run("upload failed", func(t *testing.T) { - mkc := &mockCm{errRead: true, errSave: true} - alloc := allocator.NewMockAllocator(t) - binlogIO := io.NewBinlogIO(mkc, getOrCreateIOPool()) - iCodec := storage.NewInsertCodecWithSchema(meta) - var partId int64 = 1 - var segId int64 = 10 - iData := genInsertData(2) - blobs, err := iCodec.Serialize(10, 1, iData) - assert.NoError(t, err) - - alloc.EXPECT().GetGenerator(mock.Anything, mock.Anything).Call.Return(validGeneratorFn, nil) - - ctx, cancel := context.WithTimeout(context.Background(), 100*time.Millisecond) - defer cancel() - - _, err = uploadInsertLog(ctx, binlogIO, alloc, meta.GetID(), partId, segId, blobs) - assert.Error(t, err) - }) - }) -} - -func prepareBlob(cm storage.ChunkManager, key string) ([]byte, string, error) { - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - - k := path.Join(cm.RootPath(), "test_prepare_blob", key) - blob := []byte{1, 2, 3, 255, 188} - - err := cm.Write(ctx, k, blob[:]) - if err != nil { - return nil, "", err - } - return blob, k, nil -} - -func TestBinlogIOInnerMethods(t *testing.T) { - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - cm := storage.NewLocalChunkManager(storage.RootPath(binlogTestDir)) - defer cm.RemoveWithPrefix(ctx, cm.RootPath()) - - t.Run("Test genDeltaBlobs", func(t *testing.T) { - alloc := allocator.NewMockAllocator(t) - alloc.EXPECT().AllocOne().Call.Return(int64(11111), nil) - binlogIO := io.NewBinlogIO(cm, getOrCreateIOPool()) - f := &MetaFactory{} - meta := f.GetCollectionMeta(UniqueID(10002), "test_gen_blobs", schemapb.DataType_Int64) - - tests := []struct { - isvalid bool - deletepk storage.PrimaryKey - ts uint64 - - description string - }{ - {true, storage.NewInt64PrimaryKey(1), 1111111, "valid input"}, - } - - for _, test := range tests { - t.Run(test.description, func(t *testing.T) { - if test.isvalid { - k, v, err := genDeltaBlobs(binlogIO, alloc, &DeleteData{ - Pks: []storage.PrimaryKey{test.deletepk}, - Tss: []uint64{test.ts}, - }, meta.GetID(), 10, 1) - - assert.NoError(t, err) - assert.NotEmpty(t, k) - assert.NotEmpty(t, v) - - log.Debug("genDeltaBlobs returns", zap.String("key", k)) - } - }) - } - }) - - t.Run("Test genDeltaBlobs error", func(t *testing.T) { - pk := storage.NewInt64PrimaryKey(1) - - t.Run("Test serialize error", func(t *testing.T) { - alloc := allocator.NewMockAllocator(t) - binlogIO := io.NewBinlogIO(cm, getOrCreateIOPool()) - k, v, err := genDeltaBlobs(binlogIO, alloc, &DeleteData{Pks: []storage.PrimaryKey{pk}, Tss: []uint64{}}, 1, 1, 1) - assert.Error(t, err) - assert.Empty(t, k) - assert.Empty(t, v) - }) - - t.Run("Test AllocOne error", func(t *testing.T) { - alloc := allocator.NewMockAllocator(t) - alloc.EXPECT().AllocOne().Call.Return(int64(0), fmt.Errorf("mock AllocOne error")) - binlogIO := io.NewBinlogIO(cm, getOrCreateIOPool()) - k, v, err := genDeltaBlobs(binlogIO, alloc, &DeleteData{Pks: []storage.PrimaryKey{pk}, Tss: []uint64{1}}, 1, 1, 1) - assert.Error(t, err) - assert.Empty(t, k) - assert.Empty(t, v) - }) - }) - - t.Run("Test genInsertBlobs", func(t *testing.T) { - f := &MetaFactory{} - alloc := allocator.NewMockAllocator(t) - alloc.EXPECT().GetGenerator(mock.Anything, mock.Anything).Call.Return(validGeneratorFn, nil) - binlogIO := io.NewBinlogIO(cm, getOrCreateIOPool()) - - tests := []struct { - pkType schemapb.DataType - description string - expectError bool - }{ - {schemapb.DataType_Int64, "int64PrimaryField", false}, - {schemapb.DataType_VarChar, "varCharPrimaryField", false}, - } - - for _, test := range tests { - t.Run(test.description, func(t *testing.T) { - meta := f.GetCollectionMeta(UniqueID(10001), "test_gen_blobs", test.pkType) - iCodec := storage.NewInsertCodecWithSchema(meta) - var partId int64 = 10 - var segId int64 = 1 - iData := genInsertData(2) - blobs, err := iCodec.Serialize(10, 1, iData) - assert.NoError(t, err) - kvs := make(map[string][]byte) - pin, err := genInsertBlobs(binlogIO, alloc, blobs, meta.GetID(), partId, segId, kvs) - - assert.NoError(t, err) - assert.Equal(t, 12, len(pin)) - assert.Equal(t, 12, len(kvs)) - - log.Debug("test paths", - zap.Int("kvs no.", len(kvs)), - zap.String("insert paths field0", pin[common.TimeStampField].GetBinlogs()[0].GetLogPath())) - }) - } - }) - - t.Run("Test genInsertBlobs error", func(t *testing.T) { - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - cm := storage.NewLocalChunkManager(storage.RootPath(binlogTestDir)) - defer cm.RemoveWithPrefix(ctx, cm.RootPath()) - - t.Run("GetGenerator error", func(t *testing.T) { - f := &MetaFactory{} - meta := f.GetCollectionMeta(UniqueID(10001), "test_gen_blobs", schemapb.DataType_Int64) - iCodec := storage.NewInsertCodecWithSchema(meta) - var partId int64 = 10 - var segId int64 = 1 - iData := genInsertData(2) - blobs, err := iCodec.Serialize(partId, segId, iData) - assert.NoError(t, err) - - alloc := allocator.NewMockAllocator(t) - alloc.EXPECT().GetGenerator(mock.Anything, mock.Anything).Return(nil, fmt.Errorf("mock GetGenerator error")) - binlogIO := io.NewBinlogIO(cm, getOrCreateIOPool()) - kvs := make(map[string][]byte) - - pin, err := genInsertBlobs(binlogIO, alloc, blobs, meta.GetID(), partId, segId, kvs) - - assert.Error(t, err) - assert.Empty(t, kvs) - assert.Empty(t, pin) - }) - }) - - t.Run("Test genStatsBlob", func(t *testing.T) { - f := &MetaFactory{} - alloc := allocator.NewMockAllocator(t) - alloc.EXPECT().AllocOne().Return(0, nil) - - binlogIO := io.NewBinlogIO(cm, getOrCreateIOPool()) - - tests := []struct { - pkType schemapb.DataType - description string - expectError bool - }{ - {schemapb.DataType_Int64, "int64PrimaryField", false}, - {schemapb.DataType_VarChar, "varCharPrimaryField", false}, - } - - for _, test := range tests { - t.Run(test.description, func(t *testing.T) { - meta := f.GetCollectionMeta(UniqueID(10001), "test_gen_stat_blobs", test.pkType) - iCodec := storage.NewInsertCodecWithSchema(meta) - - kvs := make(map[string][]byte) - stat, err := genStatBlobs(binlogIO, alloc, genTestStat(meta), meta.GetID(), 10, 1, iCodec, kvs, 0) - - assert.NoError(t, err) - assert.Equal(t, 1, len(stat)) - assert.Equal(t, 1, len(kvs)) - }) - } - }) - - t.Run("Test genStatsBlob error", func(t *testing.T) { - f := &MetaFactory{} - alloc := allocator.NewMockAllocator(t) - binlogIO := io.NewBinlogIO(cm, getOrCreateIOPool()) - - t.Run("serialize error", func(t *testing.T) { - meta := f.GetCollectionMeta(UniqueID(10001), "test_gen_stat_blobs_error", schemapb.DataType_Int64) - iCodec := storage.NewInsertCodecWithSchema(meta) - - kvs := make(map[string][]byte) - _, err := genStatBlobs(binlogIO, alloc, nil, meta.GetID(), 10, 1, iCodec, kvs, 0) - assert.Error(t, err) - }) - }) -} - -type mockCm struct { - storage.ChunkManager - errRead bool - errSave bool - MultiReadReturn [][]byte - ReadReturn []byte -} - -var _ storage.ChunkManager = (*mockCm)(nil) - -func (mk *mockCm) RootPath() string { - return "mock_test" -} - -func (mk *mockCm) Write(ctx context.Context, filePath string, content []byte) error { - if mk.errSave { - return errors.New("mockKv save error") - } - return nil -} - -func (mk *mockCm) MultiWrite(ctx context.Context, contents map[string][]byte) error { - if mk.errSave { - return errors.New("mockKv save error") - } - return nil -} - -func (mk *mockCm) Read(ctx context.Context, filePath string) ([]byte, error) { - if mk.errRead { - return nil, errors.New("mockKv read error") - } - return mk.ReadReturn, nil -} - -func (mk *mockCm) MultiRead(ctx context.Context, filePaths []string) ([][]byte, error) { - if mk.MultiReadReturn != nil { - return mk.MultiReadReturn, nil - } - return [][]byte{[]byte("a")}, nil -} - -func (mk *mockCm) ReadWithPrefix(ctx context.Context, prefix string) ([]string, [][]byte, error) { - return nil, nil, nil -} - -func (mk *mockCm) Remove(ctx context.Context, key string) error { return nil } -func (mk *mockCm) MultiRemove(ctx context.Context, keys []string) error { return nil } -func (mk *mockCm) RemoveWithPrefix(ctx context.Context, key string) error { return nil } -func (mk *mockCm) Close() {} diff --git a/internal/datanode/broker/datacoord.go b/internal/datanode/broker/datacoord.go index e81afa8d0eba3..dc7a4f2febc5b 100644 --- a/internal/datanode/broker/datacoord.go +++ b/internal/datanode/broker/datacoord.go @@ -128,7 +128,7 @@ func (dc *dataCoordBroker) DropVirtualChannel(ctx context.Context, req *datapb.D resp, err := dc.client.DropVirtualChannel(ctx, req) if err := merr.CheckRPCCall(resp, err); err != nil { - log.Warn("failed to SaveBinlogPaths", zap.Error(err)) + log.Warn("failed to DropVirtualChannel", zap.Error(err)) return resp, err } diff --git a/internal/datanode/broker/mock_broker.go b/internal/datanode/broker/mock_broker.go index f8b731c80e281..ae735bff96dbe 100644 --- a/internal/datanode/broker/mock_broker.go +++ b/internal/datanode/broker/mock_broker.go @@ -1,4 +1,4 @@ -// Code generated by mockery v2.30.1. DO NOT EDIT. +// Code generated by mockery v2.32.4. DO NOT EDIT. package broker @@ -63,8 +63,8 @@ type MockBroker_AssignSegmentID_Call struct { } // AssignSegmentID is a helper method to define mock.On call -// - ctx context.Context -// - reqs ...*datapb.SegmentIDRequest +// - ctx context.Context +// - reqs ...*datapb.SegmentIDRequest func (_e *MockBroker_Expecter) AssignSegmentID(ctx interface{}, reqs ...interface{}) *MockBroker_AssignSegmentID_Call { return &MockBroker_AssignSegmentID_Call{Call: _e.mock.On("AssignSegmentID", append([]interface{}{ctx}, reqs...)...)} @@ -125,8 +125,8 @@ type MockBroker_DropVirtualChannel_Call struct { } // DropVirtualChannel is a helper method to define mock.On call -// - ctx context.Context -// - req *datapb.DropVirtualChannelRequest +// - ctx context.Context +// - req *datapb.DropVirtualChannelRequest func (_e *MockBroker_Expecter) DropVirtualChannel(ctx interface{}, req interface{}) *MockBroker_DropVirtualChannel_Call { return &MockBroker_DropVirtualChannel_Call{Call: _e.mock.On("DropVirtualChannel", ctx, req)} } @@ -180,8 +180,8 @@ type MockBroker_GetSegmentInfo_Call struct { } // GetSegmentInfo is a helper method to define mock.On call -// - ctx context.Context -// - segmentIDs []int64 +// - ctx context.Context +// - segmentIDs []int64 func (_e *MockBroker_Expecter) GetSegmentInfo(ctx interface{}, segmentIDs interface{}) *MockBroker_GetSegmentInfo_Call { return &MockBroker_GetSegmentInfo_Call{Call: _e.mock.On("GetSegmentInfo", ctx, segmentIDs)} } @@ -223,8 +223,8 @@ type MockBroker_ReportTimeTick_Call struct { } // ReportTimeTick is a helper method to define mock.On call -// - ctx context.Context -// - msgs []*msgpb.DataNodeTtMsg +// - ctx context.Context +// - msgs []*msgpb.DataNodeTtMsg func (_e *MockBroker_Expecter) ReportTimeTick(ctx interface{}, msgs interface{}) *MockBroker_ReportTimeTick_Call { return &MockBroker_ReportTimeTick_Call{Call: _e.mock.On("ReportTimeTick", ctx, msgs)} } @@ -266,8 +266,8 @@ type MockBroker_SaveBinlogPaths_Call struct { } // SaveBinlogPaths is a helper method to define mock.On call -// - ctx context.Context -// - req *datapb.SaveBinlogPathsRequest +// - ctx context.Context +// - req *datapb.SaveBinlogPathsRequest func (_e *MockBroker_Expecter) SaveBinlogPaths(ctx interface{}, req interface{}) *MockBroker_SaveBinlogPaths_Call { return &MockBroker_SaveBinlogPaths_Call{Call: _e.mock.On("SaveBinlogPaths", ctx, req)} } @@ -309,8 +309,8 @@ type MockBroker_UpdateChannelCheckpoint_Call struct { } // UpdateChannelCheckpoint is a helper method to define mock.On call -// - ctx context.Context -// - channelCPs []*msgpb.MsgPosition +// - ctx context.Context +// - channelCPs []*msgpb.MsgPosition func (_e *MockBroker_Expecter) UpdateChannelCheckpoint(ctx interface{}, channelCPs interface{}) *MockBroker_UpdateChannelCheckpoint_Call { return &MockBroker_UpdateChannelCheckpoint_Call{Call: _e.mock.On("UpdateChannelCheckpoint", ctx, channelCPs)} } @@ -352,8 +352,8 @@ type MockBroker_UpdateSegmentStatistics_Call struct { } // UpdateSegmentStatistics is a helper method to define mock.On call -// - ctx context.Context -// - req *datapb.UpdateSegmentStatisticsRequest +// - ctx context.Context +// - req *datapb.UpdateSegmentStatisticsRequest func (_e *MockBroker_Expecter) UpdateSegmentStatistics(ctx interface{}, req interface{}) *MockBroker_UpdateSegmentStatistics_Call { return &MockBroker_UpdateSegmentStatistics_Call{Call: _e.mock.On("UpdateSegmentStatistics", ctx, req)} } diff --git a/internal/datanode/channel_manager.go b/internal/datanode/channel_manager.go index 97ae15e714656..1fb3e4d4a01eb 100644 --- a/internal/datanode/channel_manager.go +++ b/internal/datanode/channel_manager.go @@ -32,7 +32,10 @@ import ( "github.com/milvus-io/milvus/pkg/util/typeutil" ) -type releaseFunc func(channel string) +type ( + releaseFunc func(channel string) + watchFunc func(ctx context.Context, dn *DataNode, info *datapb.ChannelWatchInfo, tickler *tickler) (*dataSyncService, error) +) type ChannelManager interface { Submit(info *datapb.ChannelWatchInfo) error @@ -206,7 +209,7 @@ func (m *ChannelManagerImpl) handleOpState(opState *opState) { } func (m *ChannelManagerImpl) getOrCreateRunner(channel string) *opRunner { - runner, loaded := m.opRunners.GetOrInsert(channel, NewOpRunner(channel, m.dn, m.releaseFunc, m.communicateCh)) + runner, loaded := m.opRunners.GetOrInsert(channel, NewOpRunner(channel, m.dn, m.releaseFunc, executeWatch, m.communicateCh)) if !loaded { runner.Start() } @@ -228,6 +231,7 @@ type opRunner struct { channel string dn *DataNode releaseFunc releaseFunc + watchFunc watchFunc guard sync.RWMutex allOps map[UniqueID]*opInfo // opID -> tickler @@ -238,11 +242,12 @@ type opRunner struct { closeWg sync.WaitGroup } -func NewOpRunner(channel string, dn *DataNode, f releaseFunc, resultCh chan *opState) *opRunner { +func NewOpRunner(channel string, dn *DataNode, releaseF releaseFunc, watchF watchFunc, resultCh chan *opState) *opRunner { return &opRunner{ channel: channel, dn: dn, - releaseFunc: f, + releaseFunc: releaseF, + watchFunc: watchF, opsInQueue: make(chan *datapb.ChannelWatchInfo, 10), allOps: make(map[UniqueID]*opInfo), resultCh: resultCh, @@ -333,16 +338,16 @@ func (r *opRunner) watchWithTimer(info *datapb.ChannelWatchInfo) *opState { opInfo.tickler = tickler var ( - successSig = make(chan struct{}, 1) - waiter sync.WaitGroup + successSig = make(chan struct{}, 1) + finishWaiter sync.WaitGroup ) watchTimeout := Params.DataCoordCfg.WatchTimeoutInterval.GetAsDuration(time.Second) ctx, cancel := context.WithTimeout(context.Background(), watchTimeout) defer cancel() - startTimer := func(wg *sync.WaitGroup) { - defer wg.Done() + startTimer := func(finishWg *sync.WaitGroup) { + defer finishWg.Done() timer := time.NewTimer(watchTimeout) defer timer.Stop() @@ -377,11 +382,12 @@ func (r *opRunner) watchWithTimer(info *datapb.ChannelWatchInfo) *opState { } } - waiter.Add(2) - go startTimer(&waiter) + finishWaiter.Add(2) + go startTimer(&finishWaiter) + go func() { - defer waiter.Done() - fg, err := executeWatch(ctx, r.dn, info, tickler) + defer finishWaiter.Done() + fg, err := r.watchFunc(ctx, r.dn, info, tickler) if err != nil { opState.state = datapb.ChannelWatchState_WatchFailure } else { @@ -391,7 +397,7 @@ func (r *opRunner) watchWithTimer(info *datapb.ChannelWatchInfo) *opState { } }() - waiter.Wait() + finishWaiter.Wait() return opState } @@ -402,13 +408,14 @@ func (r *opRunner) releaseWithTimer(releaseFunc releaseFunc, channel string, opI opID: opID, } var ( - successSig = make(chan struct{}, 1) - waiter sync.WaitGroup + successSig = make(chan struct{}, 1) + finishWaiter sync.WaitGroup ) log := log.With(zap.Int64("opID", opID), zap.String("channel", channel)) - startTimer := func(wg *sync.WaitGroup) { - defer wg.Done() + startTimer := func(finishWaiter *sync.WaitGroup) { + defer finishWaiter.Done() + releaseTimeout := Params.DataCoordCfg.WatchTimeoutInterval.GetAsDuration(time.Second) timer := time.NewTimer(releaseTimeout) defer timer.Stop() @@ -435,8 +442,8 @@ func (r *opRunner) releaseWithTimer(releaseFunc releaseFunc, channel string, opI } } - waiter.Add(1) - go startTimer(&waiter) + finishWaiter.Add(1) + go startTimer(&finishWaiter) go func() { // TODO: failure should panic this DN, but we're not sure how // to recover when releaseFunc stuck. @@ -450,7 +457,7 @@ func (r *opRunner) releaseWithTimer(releaseFunc releaseFunc, channel string, opI successSig <- struct{}{} }() - waiter.Wait() + finishWaiter.Wait() return opState } diff --git a/internal/datanode/channel_manager_test.go b/internal/datanode/channel_manager_test.go index 85c13d7fe9be4..0dad91c14c786 100644 --- a/internal/datanode/channel_manager_test.go +++ b/internal/datanode/channel_manager_test.go @@ -20,6 +20,7 @@ import ( "context" "testing" + "github.com/cockroachdb/errors" "github.com/stretchr/testify/suite" "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" @@ -56,7 +57,7 @@ func (s *OpRunnerSuite) TestWatchWithTimer() { mockReleaseFunc := func(channel string) { log.Info("mock release func") } - runner := NewOpRunner(channel, s.node, mockReleaseFunc, commuCh) + runner := NewOpRunner(channel, s.node, mockReleaseFunc, executeWatch, commuCh) err := runner.Enqueue(info) s.Require().NoError(err) @@ -67,6 +68,35 @@ func (s *OpRunnerSuite) TestWatchWithTimer() { runner.FinishOp(100) } +func (s *OpRunnerSuite) TestWatchTimeout() { + channel := "by-dev-rootcoord-dml-1000" + paramtable.Get().Save(Params.DataCoordCfg.WatchTimeoutInterval.Key, "0.000001") + defer paramtable.Get().Reset(Params.DataCoordCfg.WatchTimeoutInterval.Key) + info := getWatchInfoByOpID(100, channel, datapb.ChannelWatchState_ToWatch) + + sig := make(chan struct{}) + commuCh := make(chan *opState) + + mockReleaseFunc := func(channel string) { log.Info("mock release func") } + mockWatchFunc := func(ctx context.Context, dn *DataNode, info *datapb.ChannelWatchInfo, tickler *tickler) (*dataSyncService, error) { + <-ctx.Done() + sig <- struct{}{} + return nil, errors.New("timeout") + } + + runner := NewOpRunner(channel, s.node, mockReleaseFunc, mockWatchFunc, commuCh) + runner.Start() + defer runner.Close() + err := runner.Enqueue(info) + s.Require().NoError(err) + + <-sig + opState := <-commuCh + s.Require().NotNil(opState) + s.Equal(info.GetOpID(), opState.opID) + s.Equal(datapb.ChannelWatchState_WatchFailure, opState.state) +} + type OpRunnerSuite struct { suite.Suite node *DataNode @@ -126,26 +156,6 @@ func (s *ChannelManagerSuite) TearDownTest() { } } -func (s *ChannelManagerSuite) TestWatchFail() { - channel := "by-dev-rootcoord-dml-2" - paramtable.Get().Save(Params.DataCoordCfg.WatchTimeoutInterval.Key, "0.000001") - defer paramtable.Get().Reset(Params.DataCoordCfg.WatchTimeoutInterval.Key) - info := getWatchInfoByOpID(100, channel, datapb.ChannelWatchState_ToWatch) - s.Require().Equal(0, s.manager.opRunners.Len()) - err := s.manager.Submit(info) - s.Require().NoError(err) - - opState := <-s.manager.communicateCh - s.Require().NotNil(opState) - s.Equal(info.GetOpID(), opState.opID) - s.Equal(datapb.ChannelWatchState_WatchFailure, opState.state) - - s.manager.handleOpState(opState) - - resp := s.manager.GetProgress(info) - s.Equal(datapb.ChannelWatchState_WatchFailure, resp.GetState()) -} - func (s *ChannelManagerSuite) TestReleaseStuck() { var ( channel = "by-dev-rootcoord-dml-2" diff --git a/internal/datanode/compaction/compactor.go b/internal/datanode/compaction/compactor.go new file mode 100644 index 0000000000000..825723a98fd52 --- /dev/null +++ b/internal/datanode/compaction/compactor.go @@ -0,0 +1,32 @@ +// Licensed to the LF AI & Data foundation under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package compaction + +import ( + "github.com/milvus-io/milvus/internal/proto/datapb" + "github.com/milvus-io/milvus/pkg/util/typeutil" +) + +//go:generate mockery --name=Compactor --structname=MockCompactor --output=./ --filename=mock_compactor.go --with-expecter --inpackage +type Compactor interface { + Complete() + Compact() (*datapb.CompactionPlanResult, error) + Stop() + GetPlanID() typeutil.UniqueID + GetCollection() typeutil.UniqueID + GetChannelName() string +} diff --git a/internal/datanode/compaction/mix_compactor.go b/internal/datanode/compaction/mix_compactor.go new file mode 100644 index 0000000000000..928fff81ed248 --- /dev/null +++ b/internal/datanode/compaction/mix_compactor.go @@ -0,0 +1,542 @@ +// Licensed to the LF AI & Data foundation under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package compaction + +import ( + "context" + "fmt" + sio "io" + "strconv" + "time" + + "github.com/cockroachdb/errors" + "github.com/samber/lo" + "go.opentelemetry.io/otel" + "go.uber.org/zap" + + "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" + "github.com/milvus-io/milvus/internal/datanode/allocator" + "github.com/milvus-io/milvus/internal/datanode/io" + iter "github.com/milvus-io/milvus/internal/datanode/iterators" + "github.com/milvus-io/milvus/internal/metastore/kv/binlog" + "github.com/milvus-io/milvus/internal/proto/datapb" + "github.com/milvus-io/milvus/internal/storage" + "github.com/milvus-io/milvus/pkg/log" + "github.com/milvus-io/milvus/pkg/metrics" + "github.com/milvus-io/milvus/pkg/util/funcutil" + "github.com/milvus-io/milvus/pkg/util/paramtable" + "github.com/milvus-io/milvus/pkg/util/timerecord" + "github.com/milvus-io/milvus/pkg/util/tsoutil" + "github.com/milvus-io/milvus/pkg/util/typeutil" +) + +// for MixCompaction only +type mixCompactionTask struct { + binlogIO io.BinlogIO + allocator.Allocator + currentTs typeutil.Timestamp + + plan *datapb.CompactionPlan + + ctx context.Context + cancel context.CancelFunc + + done chan struct{} + tr *timerecord.TimeRecorder +} + +// make sure compactionTask implements compactor interface +var _ Compactor = (*mixCompactionTask)(nil) + +func NewMixCompactionTask( + ctx context.Context, + binlogIO io.BinlogIO, + alloc allocator.Allocator, + plan *datapb.CompactionPlan, +) *mixCompactionTask { + ctx1, cancel := context.WithCancel(ctx) + return &mixCompactionTask{ + ctx: ctx1, + cancel: cancel, + binlogIO: binlogIO, + Allocator: alloc, + plan: plan, + tr: timerecord.NewTimeRecorder("mix compaction"), + currentTs: tsoutil.GetCurrentTime(), + done: make(chan struct{}, 1), + } +} + +func (t *mixCompactionTask) Complete() { + t.done <- struct{}{} +} + +func (t *mixCompactionTask) Stop() { + t.cancel() + <-t.done +} + +func (t *mixCompactionTask) GetPlanID() typeutil.UniqueID { + return t.plan.GetPlanID() +} + +func (t *mixCompactionTask) GetChannelName() string { + return t.plan.GetChannel() +} + +// return num rows of all segment compaction from +func (t *mixCompactionTask) getNumRows() int64 { + numRows := int64(0) + for _, binlog := range t.plan.SegmentBinlogs { + if len(binlog.GetFieldBinlogs()) > 0 { + for _, ct := range binlog.GetFieldBinlogs()[0].GetBinlogs() { + numRows += ct.GetEntriesNum() + } + } + } + return numRows +} + +func (t *mixCompactionTask) mergeDeltalogs(ctx context.Context, dpaths map[typeutil.UniqueID][]string) (map[interface{}]typeutil.Timestamp, error) { + t.tr.RecordSpan() + ctx, span := otel.Tracer(typeutil.DataNodeRole).Start(ctx, "mergeDeltalogs") + defer span.End() + + log := log.With(zap.Int64("planID", t.GetPlanID())) + pk2ts := make(map[interface{}]typeutil.Timestamp) + + if len(dpaths) == 0 { + log.Info("compact with no deltalogs, skip merge deltalogs") + return pk2ts, nil + } + + allIters := make([]*iter.DeltalogIterator, 0) + for segID, paths := range dpaths { + if len(paths) == 0 { + continue + } + blobs, err := t.binlogIO.Download(ctx, paths) + if err != nil { + log.Warn("compact wrong, fail to download deltalogs", + zap.Int64("segment", segID), + zap.Strings("path", paths), + zap.Error(err)) + return nil, err + } + + allIters = append(allIters, iter.NewDeltalogIterator(blobs, nil)) + } + + for _, deltaIter := range allIters { + for deltaIter.HasNext() { + labeled, _ := deltaIter.Next() + ts := labeled.GetTimestamp() + if lastTs, ok := pk2ts[labeled.GetPk().GetValue()]; ok && lastTs > ts { + ts = lastTs + } + pk2ts[labeled.GetPk().GetValue()] = ts + } + } + + log.Info("compact mergeDeltalogs end", + zap.Int("deleted pk counts", len(pk2ts)), + zap.Duration("elapse", t.tr.RecordSpan())) + + return pk2ts, nil +} + +func (t *mixCompactionTask) statSerializeWrite(ctx context.Context, writer *SegmentWriter, finalRowCount int64) (*datapb.FieldBinlog, error) { + ctx, span := otel.Tracer(typeutil.DataNodeRole).Start(ctx, "statslog serializeWrite") + defer span.End() + sblob, err := writer.Finish(finalRowCount) + if err != nil { + return nil, err + } + + logID, err := t.AllocOne() + if err != nil { + return nil, err + } + + key, _ := binlog.BuildLogPath(storage.StatsBinlog, writer.GetCollectionID(), writer.GetPartitionID(), writer.GetSegmentID(), writer.GetPkID(), logID) + kvs := map[string][]byte{key: sblob.GetValue()} + statFieldLog := &datapb.FieldBinlog{ + FieldID: writer.GetPkID(), + Binlogs: []*datapb.Binlog{ + { + LogSize: int64(len(sblob.GetValue())), + MemorySize: int64(len(sblob.GetValue())), + LogPath: key, + EntriesNum: finalRowCount, + }, + }, + } + if err := t.binlogIO.Upload(ctx, kvs); err != nil { + log.Warn("failed to upload insert log", zap.Error(err)) + return nil, err + } + + return statFieldLog, nil +} + +func (t *mixCompactionTask) serializeWrite(ctx context.Context, writer *SegmentWriter) (kvs map[string][]byte, fieldBinlogs map[int64]*datapb.FieldBinlog, err error) { + _, span := otel.Tracer(typeutil.DataNodeRole).Start(ctx, "serializeWrite") + defer span.End() + + blobs, tr, err := writer.SerializeYield() + startID, _, err := t.Alloc(uint32(len(blobs))) + if err != nil { + return nil, nil, err + } + + kvs = make(map[string][]byte) + fieldBinlogs = make(map[int64]*datapb.FieldBinlog) + for i := range blobs { + // Blob Key is generated by Serialize from int64 fieldID in collection schema, which won't raise error in ParseInt + fID, _ := strconv.ParseInt(blobs[i].GetKey(), 10, 64) + key, _ := binlog.BuildLogPath(storage.InsertBinlog, writer.GetCollectionID(), writer.GetPartitionID(), writer.GetSegmentID(), fID, startID+int64(i)) + + kvs[key] = blobs[i].GetValue() + fieldBinlogs[fID] = &datapb.FieldBinlog{ + FieldID: fID, + Binlogs: []*datapb.Binlog{ + { + LogSize: int64(len(blobs[i].GetValue())), + MemorySize: blobs[i].GetMemorySize(), + LogPath: key, + EntriesNum: blobs[i].RowNum, + TimestampFrom: tr.GetMinTimestamp(), + TimestampTo: tr.GetMaxTimestamp(), + }, + }, + } + } + + return +} + +func (t *mixCompactionTask) merge( + ctx context.Context, + binlogPaths [][]string, + delta map[interface{}]typeutil.Timestamp, + writer *SegmentWriter, +) (*datapb.CompactionSegment, error) { + _ = t.tr.RecordSpan() + + ctx, span := otel.Tracer(typeutil.DataNodeRole).Start(ctx, "CompactMerge") + defer span.End() + + log := log.With(zap.Int64("planID", t.GetPlanID()), zap.Int64("compactTo segment", writer.GetSegmentID())) + + var ( + syncBatchCount int // binlog batch count + remainingRowCount int64 // the number of remaining entities + expiredRowCount int64 // the number of expired entities + unflushedRowCount int64 = 0 + + // All binlog meta of a segment + allBinlogs = make(map[typeutil.UniqueID]*datapb.FieldBinlog) + ) + + isValueDeleted := func(v *storage.Value) bool { + ts, ok := delta[v.PK.GetValue()] + // insert task and delete task has the same ts when upsert + // here should be < instead of <= + // to avoid the upsert data to be deleted after compact + if ok && uint64(v.Timestamp) < ts { + return true + } + return false + } + + downloadTimeCost := time.Duration(0) + serWriteTimeCost := time.Duration(0) + uploadTimeCost := time.Duration(0) + + for _, paths := range binlogPaths { + log := log.With(zap.Strings("paths", paths)) + downloadStart := time.Now() + allValues, err := t.binlogIO.Download(ctx, paths) + if err != nil { + log.Warn("compact wrong, fail to download insertLogs", zap.Error(err)) + } + downloadTimeCost += time.Since(downloadStart) + + blobs := lo.Map(allValues, func(v []byte, i int) *storage.Blob { + return &storage.Blob{Key: paths[i], Value: v} + }) + + iter, err := storage.NewBinlogDeserializeReader(blobs, writer.GetPkID()) + if err != nil { + log.Warn("compact wrong, failed to new insert binlogs reader", zap.Error(err)) + return nil, err + } + + for { + err := iter.Next() + if err != nil { + if err == sio.EOF { + break + } else { + log.Warn("compact wrong, failed to iter through data", zap.Error(err)) + return nil, err + } + } + v := iter.Value() + if isValueDeleted(v) { + continue + } + + // Filtering expired entity + if t.isExpiredEntity(typeutil.Timestamp(v.Timestamp)) { + expiredRowCount++ + continue + } + + err = writer.Write(v) + if err != nil { + log.Warn("compact wrong, failed to writer row", zap.Error(err)) + return nil, err + } + unflushedRowCount++ + remainingRowCount++ + + if (unflushedRowCount+1)%100 == 0 && writer.IsFull() { + serWriteStart := time.Now() + kvs, partialBinlogs, err := t.serializeWrite(ctx, writer) + if err != nil { + log.Warn("compact wrong, failed to serialize writer", zap.Error(err)) + return nil, err + } + serWriteTimeCost += time.Since(serWriteStart) + + uploadStart := time.Now() + if err := t.binlogIO.Upload(ctx, kvs); err != nil { + log.Warn("compact wrong, failed to upload kvs", zap.Error(err)) + } + uploadTimeCost += time.Since(uploadStart) + mergeFieldBinlogs(allBinlogs, partialBinlogs) + syncBatchCount++ + unflushedRowCount = 0 + } + } + } + + if !writer.IsEmpty() { + serWriteStart := time.Now() + kvs, partialBinlogs, err := t.serializeWrite(ctx, writer) + if err != nil { + log.Warn("compact wrong, failed to serialize writer", zap.Error(err)) + return nil, err + } + serWriteTimeCost += time.Since(serWriteStart) + + uploadStart := time.Now() + if err := t.binlogIO.Upload(ctx, kvs); err != nil { + log.Warn("compact wrong, failed to upload kvs", zap.Error(err)) + } + uploadTimeCost += time.Since(uploadStart) + + mergeFieldBinlogs(allBinlogs, partialBinlogs) + syncBatchCount++ + } + + serWriteStart := time.Now() + sPath, err := t.statSerializeWrite(ctx, writer, remainingRowCount) + if err != nil { + log.Warn("compact wrong, failed to serialize write segment stats", + zap.Int64("remaining row count", remainingRowCount), zap.Error(err)) + return nil, err + } + serWriteTimeCost += time.Since(serWriteStart) + + pack := &datapb.CompactionSegment{ + SegmentID: writer.GetSegmentID(), + InsertLogs: lo.Values(allBinlogs), + Field2StatslogPaths: []*datapb.FieldBinlog{sPath}, + NumOfRows: remainingRowCount, + Channel: t.plan.GetChannel(), + } + + totalElapse := t.tr.RecordSpan() + + log.Info("compact merge end", + zap.Int64("remaining row count", remainingRowCount), + zap.Int64("expired entities", expiredRowCount), + zap.Int("binlog batch count", syncBatchCount), + zap.Duration("download binlogs elapse", downloadTimeCost), + zap.Duration("upload binlogs elapse", uploadTimeCost), + zap.Duration("serWrite elapse", serWriteTimeCost), + zap.Duration("deRead elapse", totalElapse-serWriteTimeCost-downloadTimeCost-uploadTimeCost), + zap.Duration("total elapse", totalElapse)) + + return pack, nil +} + +func mergeFieldBinlogs(base, paths map[typeutil.UniqueID]*datapb.FieldBinlog) { + for fID, fpath := range paths { + if _, ok := base[fID]; !ok { + base[fID] = &datapb.FieldBinlog{FieldID: fID, Binlogs: make([]*datapb.Binlog, 0)} + } + base[fID].Binlogs = append(base[fID].Binlogs, fpath.GetBinlogs()...) + } +} + +func (t *mixCompactionTask) Compact() (*datapb.CompactionPlanResult, error) { + durInQueue := t.tr.RecordSpan() + compactStart := time.Now() + ctx, span := otel.Tracer(typeutil.DataNodeRole).Start(t.ctx, fmt.Sprintf("MixCompact-%d", t.GetPlanID())) + defer span.End() + + if len(t.plan.GetSegmentBinlogs()) < 1 { + log.Warn("compact wrong, there's no segments in segment binlogs", zap.Int64("planID", t.plan.GetPlanID())) + return nil, errors.New("compaction plan is illegal") + } + + collectionID := t.plan.GetSegmentBinlogs()[0].GetCollectionID() + partitionID := t.plan.GetSegmentBinlogs()[0].GetPartitionID() + + log := log.Ctx(ctx).With(zap.Int64("planID", t.plan.GetPlanID()), + zap.Int64("collectionID", collectionID), + zap.Int64("partitionID", partitionID), + zap.Int32("timeout in seconds", t.plan.GetTimeoutInSeconds())) + + if ok := funcutil.CheckCtxValid(ctx); !ok { + log.Warn("compact wrong, task context done or timeout") + return nil, ctx.Err() + } + + ctxTimeout, cancelAll := context.WithTimeout(ctx, time.Duration(t.plan.GetTimeoutInSeconds())*time.Second) + defer cancelAll() + + log.Info("compact start") + + targetSegID, err := t.AllocOne() + if err != nil { + log.Warn("compact wrong, unable to allocate segmentID", zap.Error(err)) + return nil, err + } + + previousRowCount := t.getNumRows() + + writer, err := NewSegmentWriter(t.plan.GetSchema(), previousRowCount, targetSegID, partitionID, collectionID) + if err != nil { + log.Warn("compact wrong, unable to init segment writer", zap.Error(err)) + return nil, err + } + + segIDs := lo.Map(t.plan.GetSegmentBinlogs(), func(binlogs *datapb.CompactionSegmentBinlogs, _ int) int64 { + return binlogs.GetSegmentID() + }) + + if err := binlog.DecompressCompactionBinlogs(t.plan.GetSegmentBinlogs()); err != nil { + log.Warn("compact wrong, fail to decompress compaction binlogs", zap.Error(err)) + return nil, err + } + + deltaPaths := make(map[typeutil.UniqueID][]string) // segmentID to deltalog paths + allPath := make([][]string, 0) // group by binlog batch + for _, s := range t.plan.GetSegmentBinlogs() { + // Get the batch count of field binlog files from non-empty segment + // each segment might contain different batches + var binlogBatchCount int + for _, b := range s.GetFieldBinlogs() { + if b != nil { + binlogBatchCount = len(b.GetBinlogs()) + break + } + } + if binlogBatchCount == 0 { + log.Warn("compacting empty segment", zap.Int64("segmentID", s.GetSegmentID())) + continue + } + + for idx := 0; idx < binlogBatchCount; idx++ { + var batchPaths []string + for _, f := range s.GetFieldBinlogs() { + batchPaths = append(batchPaths, f.GetBinlogs()[idx].GetLogPath()) + } + allPath = append(allPath, batchPaths) + } + + deltaPaths[s.GetSegmentID()] = []string{} + for _, d := range s.GetDeltalogs() { + for _, l := range d.GetBinlogs() { + deltaPaths[s.GetSegmentID()] = append(deltaPaths[s.GetSegmentID()], l.GetLogPath()) + } + } + } + + // Unable to deal with all empty segments cases, so return error + if len(allPath) == 0 { + log.Warn("compact wrong, all segments' binlogs are empty") + return nil, errors.New("illegal compaction plan") + } + + deltaPk2Ts, err := t.mergeDeltalogs(ctxTimeout, deltaPaths) + if err != nil { + log.Warn("compact wrong, fail to merge deltalogs", zap.Error(err)) + return nil, err + } + + compactToSeg, err := t.merge(ctxTimeout, allPath, deltaPk2Ts, writer) + if err != nil { + log.Warn("compact wrong, fail to merge", zap.Error(err)) + return nil, err + } + + log.Info("compact done", + zap.Int64("compact to segment", targetSegID), + zap.Int64s("compact from segments", segIDs), + zap.Int("num of binlog paths", len(compactToSeg.GetInsertLogs())), + zap.Int("num of stats paths", 1), + zap.Int("num of delta paths", len(compactToSeg.GetDeltalogs())), + zap.Duration("compact elapse", time.Since(compactStart)), + ) + + metrics.DataNodeCompactionLatency.WithLabelValues(fmt.Sprint(paramtable.GetNodeID()), t.plan.GetType().String()).Observe(float64(t.tr.ElapseSpan().Milliseconds())) + metrics.DataNodeCompactionLatencyInQueue.WithLabelValues(fmt.Sprint(paramtable.GetNodeID())).Observe(float64(durInQueue.Milliseconds())) + + planResult := &datapb.CompactionPlanResult{ + State: commonpb.CompactionState_Completed, + PlanID: t.GetPlanID(), + Channel: t.GetChannelName(), + Segments: []*datapb.CompactionSegment{compactToSeg}, + Type: t.plan.GetType(), + } + + return planResult, nil +} + +func (t *mixCompactionTask) GetCollection() typeutil.UniqueID { + // The length of SegmentBinlogs is checked before task enqueueing. + return t.plan.GetSegmentBinlogs()[0].GetCollectionID() +} + +func (t *mixCompactionTask) isExpiredEntity(ts typeutil.Timestamp) bool { + now := t.currentTs + + // entity expire is not enabled if duration <= 0 + if t.plan.GetCollectionTtl() <= 0 { + return false + } + + entityT, _ := tsoutil.ParseTS(ts) + nowT, _ := tsoutil.ParseTS(now) + + return entityT.Add(time.Duration(t.plan.GetCollectionTtl())).Before(nowT) +} diff --git a/internal/datanode/compaction/mix_compactor_test.go b/internal/datanode/compaction/mix_compactor_test.go new file mode 100644 index 0000000000000..cea2c0b6fe16a --- /dev/null +++ b/internal/datanode/compaction/mix_compactor_test.go @@ -0,0 +1,758 @@ +// Licensed to the LF AI & Data foundation under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package compaction + +import ( + "context" + "math" + "testing" + "time" + + "github.com/cockroachdb/errors" + "github.com/samber/lo" + "github.com/stretchr/testify/mock" + "github.com/stretchr/testify/suite" + + "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" + "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" + "github.com/milvus-io/milvus/internal/datanode/allocator" + "github.com/milvus-io/milvus/internal/datanode/io" + "github.com/milvus-io/milvus/internal/datanode/metacache" + "github.com/milvus-io/milvus/internal/proto/datapb" + "github.com/milvus-io/milvus/internal/proto/etcdpb" + "github.com/milvus-io/milvus/internal/storage" + "github.com/milvus-io/milvus/pkg/common" + "github.com/milvus-io/milvus/pkg/util/paramtable" + "github.com/milvus-io/milvus/pkg/util/tsoutil" + "github.com/milvus-io/milvus/pkg/util/typeutil" +) + +var compactTestDir = "/tmp/milvus_test/compact" + +func TestMixCompactionTaskSuite(t *testing.T) { + suite.Run(t, new(MixCompactionTaskSuite)) +} + +type MixCompactionTaskSuite struct { + suite.Suite + + mockBinlogIO *io.MockBinlogIO + mockAlloc *allocator.MockAllocator + + meta *etcdpb.CollectionMeta + segWriter *SegmentWriter + + task *mixCompactionTask + plan *datapb.CompactionPlan +} + +func (s *MixCompactionTaskSuite) SetupSuite() { + paramtable.Get().Init(paramtable.NewBaseTable()) +} + +func (s *MixCompactionTaskSuite) SetupTest() { + s.mockBinlogIO = io.NewMockBinlogIO(s.T()) + s.mockAlloc = allocator.NewMockAllocator(s.T()) + + s.task = NewMixCompactionTask(context.Background(), s.mockBinlogIO, s.mockAlloc, nil) + + s.meta = genTestCollectionMeta() + + paramtable.Get().Save(paramtable.Get().CommonCfg.EntityExpirationTTL.Key, "0") + + s.plan = &datapb.CompactionPlan{ + PlanID: 999, + SegmentBinlogs: []*datapb.CompactionSegmentBinlogs{{ + SegmentID: 100, + FieldBinlogs: nil, + Field2StatslogPaths: nil, + Deltalogs: nil, + }}, + TimeoutInSeconds: 10, + Type: datapb.CompactionType_MixCompaction, + Schema: s.meta.GetSchema(), + } + s.task.plan = s.plan +} + +func (s *MixCompactionTaskSuite) SetupSubTest() { + s.SetupTest() +} + +func (s *MixCompactionTaskSuite) TearDownTest() { + paramtable.Get().Reset(paramtable.Get().CommonCfg.EntityExpirationTTL.Key) +} + +func getMilvusBirthday() time.Time { + return time.Date(2019, time.Month(5), 30, 0, 0, 0, 0, time.UTC) +} + +func (s *MixCompactionTaskSuite) TestCompactDupPK() { + // Test merge compactions, two segments with the same pk, one deletion pk=1 + // The merged segment 19530 should remain 3 pk without pk=100 + s.mockAlloc.EXPECT().AllocOne().Return(int64(19530), nil).Twice() + segments := []int64{7, 8, 9} + dblobs, err := getInt64DeltaBlobs( + 1, + []int64{100}, + []uint64{tsoutil.ComposeTSByTime(getMilvusBirthday().Add(time.Second), 0)}, + ) + s.Require().NoError(err) + + s.mockBinlogIO.EXPECT().Download(mock.Anything, []string{"1"}). + Return([][]byte{dblobs.GetValue()}, nil).Times(3) + s.mockAlloc.EXPECT().Alloc(mock.Anything).Return(7777777, 8888888, nil) + s.mockBinlogIO.EXPECT().Upload(mock.Anything, mock.Anything).Return(nil) + + // clear origial segments + s.task.plan.SegmentBinlogs = make([]*datapb.CompactionSegmentBinlogs, 0) + for _, segID := range segments { + s.initSegBuffer(segID) + row := getRow(100) + v := &storage.Value{ + PK: storage.NewInt64PrimaryKey(100), + Timestamp: int64(tsoutil.ComposeTSByTime(getMilvusBirthday(), 0)), + Value: row, + } + err := s.segWriter.Write(v) + s.segWriter.writer.Flush() + s.Require().NoError(err) + + //statistic := &storage.PkStatistics{ + // PkFilter: s.segWriter.pkstats.BF, + // MinPK: s.segWriter.pkstats.MinPk, + // MaxPK: s.segWriter.pkstats.MaxPk, + //} + //bfs := metacache.NewBloomFilterSet(statistic) + + kvs, fBinlogs, err := s.task.serializeWrite(context.TODO(), s.segWriter) + s.Require().NoError(err) + s.mockBinlogIO.EXPECT().Download(mock.Anything, mock.MatchedBy(func(keys []string) bool { + left, right := lo.Difference(keys, lo.Keys(kvs)) + return len(left) == 0 && len(right) == 0 + })).Return(lo.Values(kvs), nil).Once() + + //seg := metacache.NewSegmentInfo(&datapb.SegmentInfo{ + // CollectionID: CollectionID, + // PartitionID: PartitionID, + // ID: segID, + // NumOfRows: 1, + //}, bfs) + + s.plan.SegmentBinlogs = append(s.plan.SegmentBinlogs, &datapb.CompactionSegmentBinlogs{ + SegmentID: segID, + FieldBinlogs: lo.Values(fBinlogs), + Deltalogs: []*datapb.FieldBinlog{ + {Binlogs: []*datapb.Binlog{{LogID: 1, LogPath: "1"}}}, + }, + }) + } + result, err := s.task.Compact() + s.NoError(err) + s.NotNil(result) + + s.Equal(s.task.plan.GetPlanID(), result.GetPlanID()) + s.Equal(1, len(result.GetSegments())) + + segment := result.GetSegments()[0] + s.EqualValues(19530, segment.GetSegmentID()) + s.EqualValues(3, segment.GetNumOfRows()) + s.NotEmpty(segment.InsertLogs) + s.NotEmpty(segment.Field2StatslogPaths) + s.Empty(segment.Deltalogs) +} + +func (s *MixCompactionTaskSuite) TestCompactTwoToOne() { + s.mockAlloc.EXPECT().AllocOne().Return(int64(19530), nil).Twice() + + segments := []int64{5, 6, 7} + s.mockAlloc.EXPECT().Alloc(mock.Anything).Return(7777777, 8888888, nil) + s.mockBinlogIO.EXPECT().Upload(mock.Anything, mock.Anything).Return(nil) + s.task.plan.SegmentBinlogs = make([]*datapb.CompactionSegmentBinlogs, 0) + for _, segID := range segments { + s.initSegBuffer(segID) + //statistic := &storage.PkStatistics{ + // PkFilter: s.segWriter.pkstats.BF, + // MinPK: s.segWriter.pkstats.MinPk, + // MaxPK: s.segWriter.pkstats.MaxPk, + //} + //bfs := metacache.NewBloomFilterSet(statistic) + kvs, fBinlogs, err := s.task.serializeWrite(context.TODO(), s.segWriter) + s.Require().NoError(err) + s.mockBinlogIO.EXPECT().Download(mock.Anything, mock.MatchedBy(func(keys []string) bool { + left, right := lo.Difference(keys, lo.Keys(kvs)) + return len(left) == 0 && len(right) == 0 + })).Return(lo.Values(kvs), nil).Once() + + //seg := metacache.NewSegmentInfo(&datapb.SegmentInfo{ + // CollectionID: CollectionID, + // PartitionID: PartitionID, + // ID: segID, + // NumOfRows: 1, + //}, bfs) + + s.plan.SegmentBinlogs = append(s.plan.SegmentBinlogs, &datapb.CompactionSegmentBinlogs{ + SegmentID: segID, + FieldBinlogs: lo.Values(fBinlogs), + }) + } + + // append an empty segment + seg := metacache.NewSegmentInfo(&datapb.SegmentInfo{ + CollectionID: CollectionID, + PartitionID: PartitionID, + ID: 99999, + NumOfRows: 0, + }, metacache.NewBloomFilterSet()) + + s.plan.SegmentBinlogs = append(s.plan.SegmentBinlogs, &datapb.CompactionSegmentBinlogs{ + SegmentID: seg.SegmentID(), + }) + + result, err := s.task.Compact() + s.NoError(err) + s.NotNil(result) + + s.Equal(s.task.plan.GetPlanID(), result.GetPlanID()) + s.Equal(1, len(result.GetSegments())) + + segment := result.GetSegments()[0] + s.EqualValues(19530, segment.GetSegmentID()) + s.EqualValues(3, segment.GetNumOfRows()) + s.NotEmpty(segment.InsertLogs) + s.NotEmpty(segment.Field2StatslogPaths) + s.Empty(segment.Deltalogs) +} + +func (s *MixCompactionTaskSuite) TestMergeBufferFull() { + paramtable.Get().Save(paramtable.Get().DataNodeCfg.BinLogMaxSize.Key, "1") + defer paramtable.Get().Reset(paramtable.Get().DataNodeCfg.BinLogMaxSize.Key) + + s.initSegBuffer(5) + v := storage.Value{ + PK: storage.NewInt64PrimaryKey(100), + Timestamp: int64(tsoutil.ComposeTSByTime(getMilvusBirthday(), 0)), + Value: getRow(100), + } + err := s.segWriter.Write(&v) + s.Require().NoError(err) + + s.mockAlloc.EXPECT().Alloc(mock.Anything).Return(888888, 999999, nil).Times(2) + kvs, _, err := s.task.serializeWrite(context.TODO(), s.segWriter) + s.Require().NoError(err) + + s.mockAlloc.EXPECT().AllocOne().Return(888888, nil) + s.mockBinlogIO.EXPECT().Download(mock.Anything, mock.Anything).RunAndReturn( + func(ctx context.Context, paths []string) ([][]byte, error) { + s.Require().Equal(len(paths), len(kvs)) + return lo.Values(kvs), nil + }) + s.mockBinlogIO.EXPECT().Upload(mock.Anything, mock.Anything).Return(nil).Maybe() + + segWriter, err := NewSegmentWriter(s.meta.GetSchema(), 100, 19530, PartitionID, CollectionID) + s.Require().NoError(err) + + compactionSegment, err := s.task.merge(s.task.ctx, [][]string{lo.Keys(kvs)}, nil, segWriter) + s.NoError(err) + s.NotNil(compactionSegment) + s.EqualValues(2, compactionSegment.GetNumOfRows()) +} + +func (s *MixCompactionTaskSuite) TestMergeEntityExpired() { + s.initSegBuffer(3) + // entityTs == tsoutil.ComposeTSByTime(milvusBirthday, 0) + collTTL := 864000 // 10 days + currTs := tsoutil.ComposeTSByTime(getMilvusBirthday().Add(time.Second*(time.Duration(collTTL)+1)), 0) + s.task.currentTs = currTs + s.task.plan.CollectionTtl = int64(collTTL) + s.mockAlloc.EXPECT().Alloc(mock.Anything).Return(888888, 999999, nil) + + kvs, _, err := s.task.serializeWrite(context.TODO(), s.segWriter) + s.Require().NoError(err) + s.mockAlloc.EXPECT().AllocOne().Return(888888, nil) + s.mockBinlogIO.EXPECT().Download(mock.Anything, mock.Anything).RunAndReturn( + func(ctx context.Context, paths []string) ([][]byte, error) { + s.Require().Equal(len(paths), len(kvs)) + return lo.Values(kvs), nil + }) + s.mockBinlogIO.EXPECT().Upload(mock.Anything, mock.Anything).Return(nil).Maybe() + + segWriter, err := NewSegmentWriter(s.meta.GetSchema(), 100, 19530, PartitionID, CollectionID) + s.Require().NoError(err) + + compactionSegment, err := s.task.merge(s.task.ctx, [][]string{lo.Keys(kvs)}, nil, segWriter) + s.NoError(err) + s.NotNil(compactionSegment) + s.EqualValues(0, compactionSegment.GetNumOfRows()) +} + +func (s *MixCompactionTaskSuite) TestMergeNoExpiration() { + s.initSegBuffer(4) + deleteTs := tsoutil.ComposeTSByTime(getMilvusBirthday().Add(10*time.Second), 0) + tests := []struct { + description string + deletions map[interface{}]uint64 + expectedRowCount int + }{ + {"no deletion", nil, 1}, + {"mismatch deletion", map[interface{}]uint64{int64(1): deleteTs}, 1}, + {"deleted pk=4", map[interface{}]uint64{int64(4): deleteTs}, 0}, + } + + s.mockAlloc.EXPECT().Alloc(mock.Anything).Return(888888, 999999, nil) + kvs, _, err := s.task.serializeWrite(context.TODO(), s.segWriter) + s.Require().NoError(err) + for _, test := range tests { + s.Run(test.description, func() { + if test.expectedRowCount > 0 { + s.mockAlloc.EXPECT().Alloc(mock.Anything).Return(77777, 99999, nil).Once() + } + s.mockAlloc.EXPECT().AllocOne().Return(888888, nil) + s.mockBinlogIO.EXPECT().Download(mock.Anything, mock.Anything).RunAndReturn( + func(ctx context.Context, paths []string) ([][]byte, error) { + s.Require().Equal(len(paths), len(kvs)) + return lo.Values(kvs), nil + }) + s.mockBinlogIO.EXPECT().Upload(mock.Anything, mock.Anything).Return(nil).Maybe() + + segWriter, err := NewSegmentWriter(s.meta.GetSchema(), 100, 19530, PartitionID, CollectionID) + s.Require().NoError(err) + + compactionSegment, err := s.task.merge(s.task.ctx, [][]string{lo.Keys(kvs)}, test.deletions, segWriter) + s.NoError(err) + s.NotNil(compactionSegment) + s.EqualValues(test.expectedRowCount, compactionSegment.GetNumOfRows()) + }) + } +} + +func (s *MixCompactionTaskSuite) TestMergeDeltalogsMultiSegment() { + tests := []struct { + segIDA int64 + dataApk []int64 + dataAts []uint64 + + segIDB int64 + dataBpk []int64 + dataBts []uint64 + + segIDC int64 + dataCpk []int64 + dataCts []uint64 + + expectedpk2ts map[int64]uint64 + description string + }{ + { + 0, nil, nil, + 100, + []int64{1, 2, 3}, + []uint64{20000, 30000, 20005}, + 200, + []int64{4, 5, 6}, + []uint64{50000, 50001, 50002}, + map[int64]uint64{ + 1: 20000, + 2: 30000, + 3: 20005, + 4: 50000, + 5: 50001, + 6: 50002, + }, + "2 segments", + }, + { + 300, + []int64{10, 20}, + []uint64{20001, 40001}, + 100, + []int64{1, 2, 3}, + []uint64{20000, 30000, 20005}, + 200, + []int64{4, 5, 6}, + []uint64{50000, 50001, 50002}, + map[int64]uint64{ + 10: 20001, + 20: 40001, + 1: 20000, + 2: 30000, + 3: 20005, + 4: 50000, + 5: 50001, + 6: 50002, + }, + "3 segments", + }, + } + + for _, test := range tests { + s.Run(test.description, func() { + dValues := make([][]byte, 0) + if test.dataApk != nil { + d, err := getInt64DeltaBlobs(test.segIDA, test.dataApk, test.dataAts) + s.Require().NoError(err) + dValues = append(dValues, d.GetValue()) + } + if test.dataBpk != nil { + d, err := getInt64DeltaBlobs(test.segIDB, test.dataBpk, test.dataBts) + s.Require().NoError(err) + dValues = append(dValues, d.GetValue()) + } + if test.dataCpk != nil { + d, err := getInt64DeltaBlobs(test.segIDC, test.dataCpk, test.dataCts) + s.Require().NoError(err) + dValues = append(dValues, d.GetValue()) + } + + s.mockBinlogIO.EXPECT().Download(mock.Anything, mock.Anything). + Return(dValues, nil) + + got, err := s.task.mergeDeltalogs(s.task.ctx, map[int64][]string{100: {"random"}}) + s.NoError(err) + + s.Equal(len(test.expectedpk2ts), len(got)) + gotKeys := lo.Map(lo.Keys(got), func(k interface{}, _ int) int64 { + res, ok := k.(int64) + s.Require().True(ok) + return res + }) + s.ElementsMatch(gotKeys, lo.Keys(test.expectedpk2ts)) + s.ElementsMatch(lo.Values(got), lo.Values(test.expectedpk2ts)) + }) + } +} + +func (s *MixCompactionTaskSuite) TestMergeDeltalogsOneSegment() { + blob, err := getInt64DeltaBlobs( + 100, + []int64{1, 2, 3, 4, 5, 1, 2}, + []uint64{20000, 20001, 20002, 30000, 50000, 50000, 10000}, + ) + s.Require().NoError(err) + + expectedMap := map[int64]uint64{1: 50000, 2: 20001, 3: 20002, 4: 30000, 5: 50000} + + s.mockBinlogIO.EXPECT().Download(mock.Anything, []string{"a"}). + Return([][]byte{blob.GetValue()}, nil).Once() + s.mockBinlogIO.EXPECT().Download(mock.Anything, []string{"mock_error"}). + Return(nil, errors.New("mock_error")).Once() + + invalidPaths := map[int64][]string{2000: {"mock_error"}} + got, err := s.task.mergeDeltalogs(s.task.ctx, invalidPaths) + s.Error(err) + s.Nil(got) + + dpaths := map[int64][]string{1000: {"a"}} + got, err = s.task.mergeDeltalogs(s.task.ctx, dpaths) + s.NoError(err) + s.NotNil(got) + s.Equal(len(expectedMap), len(got)) + + gotKeys := lo.Map(lo.Keys(got), func(k interface{}, _ int) int64 { + res, ok := k.(int64) + s.Require().True(ok) + return res + }) + s.ElementsMatch(gotKeys, lo.Keys(expectedMap)) + s.ElementsMatch(lo.Values(got), lo.Values(expectedMap)) +} + +func (s *MixCompactionTaskSuite) TestCompactFail() { + s.Run("mock ctx done", func() { + ctx, cancel := context.WithCancel(context.Background()) + cancel() + + s.task.ctx = ctx + s.task.cancel = cancel + _, err := s.task.Compact() + s.Error(err) + s.ErrorIs(err, context.Canceled) + }) + + s.Run("Test compact invalid empty segment binlogs", func() { + s.plan.SegmentBinlogs = nil + + _, err := s.task.Compact() + s.Error(err) + }) + + s.Run("Test compact AllocOnce failed", func() { + s.mockAlloc.EXPECT().AllocOne().Return(0, errors.New("mock alloc one error")).Once() + _, err := s.task.Compact() + s.Error(err) + }) +} + +func (s *MixCompactionTaskSuite) TestIsExpiredEntity() { + milvusBirthdayTs := tsoutil.ComposeTSByTime(getMilvusBirthday(), 0) + + tests := []struct { + description string + collTTL int64 + nowTs uint64 + entityTs uint64 + + expect bool + }{ + {"ttl=maxInt64, nowTs-entityTs=ttl", math.MaxInt64, math.MaxInt64, 0, true}, + {"ttl=maxInt64, nowTs-entityTs < 0", math.MaxInt64, milvusBirthdayTs, 0, false}, + {"ttl=maxInt64, 0ttl v2", math.MaxInt64, math.MaxInt64, milvusBirthdayTs, true}, + // entityTs==currTs will never happen + // {"ttl=maxInt64, curTs-entityTs=0", math.MaxInt64, milvusBirthdayTs, milvusBirthdayTs, true}, + {"ttl=0, nowTs>entityTs", 0, milvusBirthdayTs + 1, milvusBirthdayTs, false}, + {"ttl=0, nowTs==entityTs", 0, milvusBirthdayTs, milvusBirthdayTs, false}, + {"ttl=0, nowTs10days", 864000, milvusBirthdayTs + 864001, milvusBirthdayTs, true}, + {"ttl=10days, nowTs-entityTs==10days", 864000, milvusBirthdayTs + 864000, milvusBirthdayTs, true}, + {"ttl=10days, nowTs-entityTs<10days", 864000, milvusBirthdayTs + 10, milvusBirthdayTs, false}, + } + for _, test := range tests { + s.Run(test.description, func() { + t := &mixCompactionTask{ + plan: &datapb.CompactionPlan{ + CollectionTtl: test.collTTL, + }, + currentTs: test.nowTs, + } + got := t.isExpiredEntity(test.entityTs) + s.Equal(test.expect, got) + }) + } +} + +func getRow(magic int64) map[int64]interface{} { + ts := tsoutil.ComposeTSByTime(getMilvusBirthday(), 0) + return map[int64]interface{}{ + common.RowIDField: magic, + common.TimeStampField: int64(ts), // should be int64 here + BoolField: true, + Int8Field: int8(magic), + Int16Field: int16(magic), + Int32Field: int32(magic), + Int64Field: magic, + FloatField: float32(magic), + DoubleField: float64(magic), + StringField: "str", + VarCharField: "varchar", + BinaryVectorField: []byte{0}, + FloatVectorField: []float32{4, 5, 6, 7}, + Float16VectorField: []byte{0, 0, 0, 0, 255, 255, 255, 255}, + BFloat16VectorField: []byte{0, 0, 0, 0, 255, 255, 255, 255}, + SparseFloatVectorField: typeutil.CreateSparseFloatRow([]uint32{0, 1, 2}, []float32{4, 5, 6}), + ArrayField: &schemapb.ScalarField{ + Data: &schemapb.ScalarField_IntData{ + IntData: &schemapb.IntArray{Data: []int32{1, 2, 3}}, + }, + }, + JSONField: []byte(`{"batch":ok}`), + } +} + +func (s *MixCompactionTaskSuite) initSegBuffer(magic int64) { + segWriter, err := NewSegmentWriter(s.meta.GetSchema(), 100, magic, PartitionID, CollectionID) + s.Require().NoError(err) + + v := storage.Value{ + PK: storage.NewInt64PrimaryKey(magic), + Timestamp: int64(tsoutil.ComposeTSByTime(getMilvusBirthday(), 0)), + Value: getRow(magic), + } + err = segWriter.Write(&v) + s.Require().NoError(err) + segWriter.writer.Flush() + + s.segWriter = segWriter +} + +const ( + CollectionID = 1 + PartitionID = 1 + SegmentID = 1 + BoolField = 100 + Int8Field = 101 + Int16Field = 102 + Int32Field = 103 + Int64Field = 104 + FloatField = 105 + DoubleField = 106 + StringField = 107 + BinaryVectorField = 108 + FloatVectorField = 109 + ArrayField = 110 + JSONField = 111 + Float16VectorField = 112 + BFloat16VectorField = 113 + SparseFloatVectorField = 114 + VarCharField = 115 +) + +func getInt64DeltaBlobs(segID int64, pks []int64, tss []uint64) (*storage.Blob, error) { + primaryKeys := make([]storage.PrimaryKey, len(pks)) + for index, v := range pks { + primaryKeys[index] = storage.NewInt64PrimaryKey(v) + } + deltaData := storage.NewDeleteData(primaryKeys, tss) + + dCodec := storage.NewDeleteCodec() + blob, err := dCodec.Serialize(1, 10, segID, deltaData) + return blob, err +} + +func genTestCollectionMeta() *etcdpb.CollectionMeta { + return &etcdpb.CollectionMeta{ + ID: CollectionID, + PartitionTags: []string{"partition_0", "partition_1"}, + Schema: &schemapb.CollectionSchema{ + Name: "schema", + Description: "schema", + AutoID: true, + Fields: []*schemapb.FieldSchema{ + { + FieldID: common.RowIDField, + Name: "row_id", + DataType: schemapb.DataType_Int64, + }, + { + FieldID: common.TimeStampField, + Name: "Timestamp", + DataType: schemapb.DataType_Int64, + }, + { + FieldID: BoolField, + Name: "field_bool", + DataType: schemapb.DataType_Bool, + }, + { + FieldID: Int8Field, + Name: "field_int8", + DataType: schemapb.DataType_Int8, + }, + { + FieldID: Int16Field, + Name: "field_int16", + DataType: schemapb.DataType_Int16, + }, + { + FieldID: Int32Field, + Name: "field_int32", + DataType: schemapb.DataType_Int32, + }, + { + FieldID: Int64Field, + Name: "field_int64", + IsPrimaryKey: true, + DataType: schemapb.DataType_Int64, + }, + { + FieldID: FloatField, + Name: "field_float", + DataType: schemapb.DataType_Float, + }, + { + FieldID: DoubleField, + Name: "field_double", + DataType: schemapb.DataType_Double, + }, + { + FieldID: StringField, + Name: "field_string", + DataType: schemapb.DataType_String, + }, + { + FieldID: VarCharField, + Name: "field_varchar", + DataType: schemapb.DataType_VarChar, + TypeParams: []*commonpb.KeyValuePair{ + { + Key: common.MaxLengthKey, + Value: "128", + }, + }, + }, + { + FieldID: ArrayField, + Name: "field_int32_array", + Description: "int32 array", + DataType: schemapb.DataType_Array, + ElementType: schemapb.DataType_Int32, + }, + { + FieldID: JSONField, + Name: "field_json", + Description: "json", + DataType: schemapb.DataType_JSON, + }, + { + FieldID: BinaryVectorField, + Name: "field_binary_vector", + Description: "binary_vector", + DataType: schemapb.DataType_BinaryVector, + TypeParams: []*commonpb.KeyValuePair{ + { + Key: common.DimKey, + Value: "8", + }, + }, + }, + { + FieldID: FloatVectorField, + Name: "field_float_vector", + Description: "float_vector", + DataType: schemapb.DataType_FloatVector, + TypeParams: []*commonpb.KeyValuePair{ + { + Key: common.DimKey, + Value: "4", + }, + }, + }, + { + FieldID: Float16VectorField, + Name: "field_float16_vector", + Description: "float16_vector", + DataType: schemapb.DataType_Float16Vector, + TypeParams: []*commonpb.KeyValuePair{ + { + Key: common.DimKey, + Value: "4", + }, + }, + }, + { + FieldID: BFloat16VectorField, + Name: "field_bfloat16_vector", + Description: "bfloat16_vector", + DataType: schemapb.DataType_BFloat16Vector, + TypeParams: []*commonpb.KeyValuePair{ + { + Key: common.DimKey, + Value: "4", + }, + }, + }, + { + FieldID: SparseFloatVectorField, + Name: "field_sparse_float_vector", + Description: "sparse_float_vector", + DataType: schemapb.DataType_SparseFloatVector, + TypeParams: []*commonpb.KeyValuePair{}, + }, + }, + }, + } +} diff --git a/internal/datanode/compaction/mock_compactor.go b/internal/datanode/compaction/mock_compactor.go new file mode 100644 index 0000000000000..19a83bf2e1b9d --- /dev/null +++ b/internal/datanode/compaction/mock_compactor.go @@ -0,0 +1,275 @@ +// Code generated by mockery v2.32.4. DO NOT EDIT. + +package compaction + +import ( + datapb "github.com/milvus-io/milvus/internal/proto/datapb" + mock "github.com/stretchr/testify/mock" +) + +// MockCompactor is an autogenerated mock type for the Compactor type +type MockCompactor struct { + mock.Mock +} + +type MockCompactor_Expecter struct { + mock *mock.Mock +} + +func (_m *MockCompactor) EXPECT() *MockCompactor_Expecter { + return &MockCompactor_Expecter{mock: &_m.Mock} +} + +// Compact provides a mock function with given fields: +func (_m *MockCompactor) Compact() (*datapb.CompactionPlanResult, error) { + ret := _m.Called() + + var r0 *datapb.CompactionPlanResult + var r1 error + if rf, ok := ret.Get(0).(func() (*datapb.CompactionPlanResult, error)); ok { + return rf() + } + if rf, ok := ret.Get(0).(func() *datapb.CompactionPlanResult); ok { + r0 = rf() + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).(*datapb.CompactionPlanResult) + } + } + + if rf, ok := ret.Get(1).(func() error); ok { + r1 = rf() + } else { + r1 = ret.Error(1) + } + + return r0, r1 +} + +// MockCompactor_Compact_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'Compact' +type MockCompactor_Compact_Call struct { + *mock.Call +} + +// Compact is a helper method to define mock.On call +func (_e *MockCompactor_Expecter) Compact() *MockCompactor_Compact_Call { + return &MockCompactor_Compact_Call{Call: _e.mock.On("Compact")} +} + +func (_c *MockCompactor_Compact_Call) Run(run func()) *MockCompactor_Compact_Call { + _c.Call.Run(func(args mock.Arguments) { + run() + }) + return _c +} + +func (_c *MockCompactor_Compact_Call) Return(_a0 *datapb.CompactionPlanResult, _a1 error) *MockCompactor_Compact_Call { + _c.Call.Return(_a0, _a1) + return _c +} + +func (_c *MockCompactor_Compact_Call) RunAndReturn(run func() (*datapb.CompactionPlanResult, error)) *MockCompactor_Compact_Call { + _c.Call.Return(run) + return _c +} + +// Complete provides a mock function with given fields: +func (_m *MockCompactor) Complete() { + _m.Called() +} + +// MockCompactor_Complete_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'Complete' +type MockCompactor_Complete_Call struct { + *mock.Call +} + +// Complete is a helper method to define mock.On call +func (_e *MockCompactor_Expecter) Complete() *MockCompactor_Complete_Call { + return &MockCompactor_Complete_Call{Call: _e.mock.On("Complete")} +} + +func (_c *MockCompactor_Complete_Call) Run(run func()) *MockCompactor_Complete_Call { + _c.Call.Run(func(args mock.Arguments) { + run() + }) + return _c +} + +func (_c *MockCompactor_Complete_Call) Return() *MockCompactor_Complete_Call { + _c.Call.Return() + return _c +} + +func (_c *MockCompactor_Complete_Call) RunAndReturn(run func()) *MockCompactor_Complete_Call { + _c.Call.Return(run) + return _c +} + +// GetChannelName provides a mock function with given fields: +func (_m *MockCompactor) GetChannelName() string { + ret := _m.Called() + + var r0 string + if rf, ok := ret.Get(0).(func() string); ok { + r0 = rf() + } else { + r0 = ret.Get(0).(string) + } + + return r0 +} + +// MockCompactor_GetChannelName_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'GetChannelName' +type MockCompactor_GetChannelName_Call struct { + *mock.Call +} + +// GetChannelName is a helper method to define mock.On call +func (_e *MockCompactor_Expecter) GetChannelName() *MockCompactor_GetChannelName_Call { + return &MockCompactor_GetChannelName_Call{Call: _e.mock.On("GetChannelName")} +} + +func (_c *MockCompactor_GetChannelName_Call) Run(run func()) *MockCompactor_GetChannelName_Call { + _c.Call.Run(func(args mock.Arguments) { + run() + }) + return _c +} + +func (_c *MockCompactor_GetChannelName_Call) Return(_a0 string) *MockCompactor_GetChannelName_Call { + _c.Call.Return(_a0) + return _c +} + +func (_c *MockCompactor_GetChannelName_Call) RunAndReturn(run func() string) *MockCompactor_GetChannelName_Call { + _c.Call.Return(run) + return _c +} + +// GetCollection provides a mock function with given fields: +func (_m *MockCompactor) GetCollection() int64 { + ret := _m.Called() + + var r0 int64 + if rf, ok := ret.Get(0).(func() int64); ok { + r0 = rf() + } else { + r0 = ret.Get(0).(int64) + } + + return r0 +} + +// MockCompactor_GetCollection_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'GetCollection' +type MockCompactor_GetCollection_Call struct { + *mock.Call +} + +// GetCollection is a helper method to define mock.On call +func (_e *MockCompactor_Expecter) GetCollection() *MockCompactor_GetCollection_Call { + return &MockCompactor_GetCollection_Call{Call: _e.mock.On("GetCollection")} +} + +func (_c *MockCompactor_GetCollection_Call) Run(run func()) *MockCompactor_GetCollection_Call { + _c.Call.Run(func(args mock.Arguments) { + run() + }) + return _c +} + +func (_c *MockCompactor_GetCollection_Call) Return(_a0 int64) *MockCompactor_GetCollection_Call { + _c.Call.Return(_a0) + return _c +} + +func (_c *MockCompactor_GetCollection_Call) RunAndReturn(run func() int64) *MockCompactor_GetCollection_Call { + _c.Call.Return(run) + return _c +} + +// GetPlanID provides a mock function with given fields: +func (_m *MockCompactor) GetPlanID() int64 { + ret := _m.Called() + + var r0 int64 + if rf, ok := ret.Get(0).(func() int64); ok { + r0 = rf() + } else { + r0 = ret.Get(0).(int64) + } + + return r0 +} + +// MockCompactor_GetPlanID_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'GetPlanID' +type MockCompactor_GetPlanID_Call struct { + *mock.Call +} + +// GetPlanID is a helper method to define mock.On call +func (_e *MockCompactor_Expecter) GetPlanID() *MockCompactor_GetPlanID_Call { + return &MockCompactor_GetPlanID_Call{Call: _e.mock.On("GetPlanID")} +} + +func (_c *MockCompactor_GetPlanID_Call) Run(run func()) *MockCompactor_GetPlanID_Call { + _c.Call.Run(func(args mock.Arguments) { + run() + }) + return _c +} + +func (_c *MockCompactor_GetPlanID_Call) Return(_a0 int64) *MockCompactor_GetPlanID_Call { + _c.Call.Return(_a0) + return _c +} + +func (_c *MockCompactor_GetPlanID_Call) RunAndReturn(run func() int64) *MockCompactor_GetPlanID_Call { + _c.Call.Return(run) + return _c +} + +// Stop provides a mock function with given fields: +func (_m *MockCompactor) Stop() { + _m.Called() +} + +// MockCompactor_Stop_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'Stop' +type MockCompactor_Stop_Call struct { + *mock.Call +} + +// Stop is a helper method to define mock.On call +func (_e *MockCompactor_Expecter) Stop() *MockCompactor_Stop_Call { + return &MockCompactor_Stop_Call{Call: _e.mock.On("Stop")} +} + +func (_c *MockCompactor_Stop_Call) Run(run func()) *MockCompactor_Stop_Call { + _c.Call.Run(func(args mock.Arguments) { + run() + }) + return _c +} + +func (_c *MockCompactor_Stop_Call) Return() *MockCompactor_Stop_Call { + _c.Call.Return() + return _c +} + +func (_c *MockCompactor_Stop_Call) RunAndReturn(run func()) *MockCompactor_Stop_Call { + _c.Call.Return(run) + return _c +} + +// NewMockCompactor creates a new instance of MockCompactor. It also registers a testing interface on the mock and a cleanup function to assert the mocks expectations. +// The first argument is typically a *testing.T value. +func NewMockCompactor(t interface { + mock.TestingT + Cleanup(func()) +}) *MockCompactor { + mock := &MockCompactor{} + mock.Mock.Test(t) + + t.Cleanup(func() { mock.AssertExpectations(t) }) + + return mock +} diff --git a/internal/datanode/compaction/segment_writer.go b/internal/datanode/compaction/segment_writer.go new file mode 100644 index 0000000000000..3d458aad9974e --- /dev/null +++ b/internal/datanode/compaction/segment_writer.go @@ -0,0 +1,165 @@ +// SegmentInsertBuffer can be reused to buffer all insert data of one segment +// buffer.Serialize will serialize the InsertBuffer and clear it +// pkstats keeps tracking pkstats of the segment until Finish + +package compaction + +import ( + "fmt" + "math" + + "go.uber.org/atomic" + + "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" + "github.com/milvus-io/milvus/internal/datanode/writebuffer" + "github.com/milvus-io/milvus/internal/proto/etcdpb" + "github.com/milvus-io/milvus/internal/storage" + "github.com/milvus-io/milvus/pkg/log" + "github.com/milvus-io/milvus/pkg/util/paramtable" + "github.com/milvus-io/milvus/pkg/util/typeutil" +) + +type SegmentWriter struct { + writer *storage.SerializeWriter[*storage.Value] + closers []func() (*storage.Blob, error) + tsFrom typeutil.Timestamp + tsTo typeutil.Timestamp + + pkstats *storage.PrimaryKeyStats + segmentID int64 + partitionID int64 + collectionID int64 + sch *schemapb.CollectionSchema + rowCount *atomic.Int64 +} + +func (w *SegmentWriter) GetRowNum() int64 { + return w.rowCount.Load() +} + +func (w *SegmentWriter) GetCollectionID() int64 { + return w.collectionID +} + +func (w *SegmentWriter) GetPartitionID() int64 { + return w.partitionID +} + +func (w *SegmentWriter) GetSegmentID() int64 { + return w.segmentID +} + +func (w *SegmentWriter) GetPkID() int64 { + return w.pkstats.FieldID +} + +func (w *SegmentWriter) Write(v *storage.Value) error { + ts := typeutil.Timestamp(v.Timestamp) + if ts < w.tsFrom { + w.tsFrom = ts + } + if ts > w.tsTo { + w.tsTo = ts + } + + w.pkstats.Update(v.PK) + w.rowCount.Inc() + return w.writer.Write(v) +} + +func (w *SegmentWriter) Finish(actualRowCount int64) (*storage.Blob, error) { + w.writer.Flush() + codec := storage.NewInsertCodecWithSchema(&etcdpb.CollectionMeta{ID: w.collectionID, Schema: w.sch}) + return codec.SerializePkStats(w.pkstats, actualRowCount) +} + +func (w *SegmentWriter) IsFull() bool { + w.writer.Flush() + return w.writer.WrittenMemorySize() > paramtable.Get().DataNodeCfg.BinLogMaxSize.GetAsUint64() +} + +func (w *SegmentWriter) IsEmpty() bool { + w.writer.Flush() + return w.writer.WrittenMemorySize() == 0 +} + +func (w *SegmentWriter) GetTimeRange() *writebuffer.TimeRange { + return writebuffer.NewTimeRange(w.tsFrom, w.tsTo) +} + +func (w *SegmentWriter) SerializeYield() ([]*storage.Blob, *writebuffer.TimeRange, error) { + w.writer.Flush() + w.writer.Close() + + fieldData := make([]*storage.Blob, len(w.closers)) + for i, f := range w.closers { + blob, err := f() + if err != nil { + return nil, nil, err + } + fieldData[i] = blob + } + + tr := w.GetTimeRange() + w.clear() + + return fieldData, tr, nil +} + +func (w *SegmentWriter) clear() { + writer, closers, _ := newBinlogWriter(w.collectionID, w.partitionID, w.segmentID, w.sch) + w.writer = writer + w.closers = closers + w.tsFrom = math.MaxUint64 + w.tsTo = 0 +} + +func NewSegmentWriter(sch *schemapb.CollectionSchema, maxCount int64, segID, partID, collID int64) (*SegmentWriter, error) { + writer, closers, err := newBinlogWriter(collID, partID, segID, sch) + if err != nil { + return nil, err + } + + var pkField *schemapb.FieldSchema + for _, fs := range sch.GetFields() { + if fs.GetIsPrimaryKey() && fs.GetFieldID() >= 100 && typeutil.IsPrimaryFieldType(fs.GetDataType()) { + pkField = fs + } + } + if pkField == nil { + log.Warn("failed to get pk field from schema") + return nil, fmt.Errorf("no pk field in schema") + } + + stats, err := storage.NewPrimaryKeyStats(pkField.GetFieldID(), int64(pkField.GetDataType()), maxCount) + if err != nil { + return nil, err + } + + segWriter := SegmentWriter{ + writer: writer, + closers: closers, + tsFrom: math.MaxUint64, + tsTo: 0, + + pkstats: stats, + sch: sch, + segmentID: segID, + partitionID: partID, + collectionID: collID, + rowCount: atomic.NewInt64(0), + } + + return &segWriter, nil +} + +func newBinlogWriter(collID, partID, segID int64, schema *schemapb.CollectionSchema, +) (writer *storage.SerializeWriter[*storage.Value], closers []func() (*storage.Blob, error), err error) { + fieldWriters := storage.NewBinlogStreamWriters(collID, partID, segID, schema.Fields) + closers = make([]func() (*storage.Blob, error), 0, len(fieldWriters)) + for _, w := range fieldWriters { + closers = append(closers, w.Finalize) + } + writer, err = storage.NewBinlogSerializeWriter(schema, partID, segID, fieldWriters, 1024) + return +} diff --git a/internal/datanode/compaction_executor.go b/internal/datanode/compaction_executor.go index 11a8a93ab4a78..d8ac9fa423f83 100644 --- a/internal/datanode/compaction_executor.go +++ b/internal/datanode/compaction_executor.go @@ -24,6 +24,7 @@ import ( "go.uber.org/zap" "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" + "github.com/milvus-io/milvus/internal/datanode/compaction" "github.com/milvus-io/milvus/internal/proto/datapb" "github.com/milvus-io/milvus/pkg/log" "github.com/milvus-io/milvus/pkg/util/typeutil" @@ -34,10 +35,10 @@ const ( ) type compactionExecutor struct { - executing *typeutil.ConcurrentMap[int64, compactor] // planID to compactor - completedCompactor *typeutil.ConcurrentMap[int64, compactor] // planID to compactor + executing *typeutil.ConcurrentMap[int64, compaction.Compactor] // planID to compactor + completedCompactor *typeutil.ConcurrentMap[int64, compaction.Compactor] // planID to compactor completed *typeutil.ConcurrentMap[int64, *datapb.CompactionPlanResult] // planID to CompactionPlanResult - taskCh chan compactor + taskCh chan compaction.Compactor dropped *typeutil.ConcurrentSet[string] // vchannel dropped // To prevent concurrency of release channel and compaction get results @@ -47,39 +48,38 @@ type compactionExecutor struct { func newCompactionExecutor() *compactionExecutor { return &compactionExecutor{ - executing: typeutil.NewConcurrentMap[int64, compactor](), - completedCompactor: typeutil.NewConcurrentMap[int64, compactor](), + executing: typeutil.NewConcurrentMap[int64, compaction.Compactor](), + completedCompactor: typeutil.NewConcurrentMap[int64, compaction.Compactor](), completed: typeutil.NewConcurrentMap[int64, *datapb.CompactionPlanResult](), - taskCh: make(chan compactor, maxTaskNum), + taskCh: make(chan compaction.Compactor, maxTaskNum), dropped: typeutil.NewConcurrentSet[string](), } } -func (c *compactionExecutor) execute(task compactor) { +func (c *compactionExecutor) execute(task compaction.Compactor) { c.taskCh <- task c.toExecutingState(task) } -func (c *compactionExecutor) toExecutingState(task compactor) { - c.executing.Insert(task.getPlanID(), task) +func (c *compactionExecutor) toExecutingState(task compaction.Compactor) { + c.executing.Insert(task.GetPlanID(), task) } -func (c *compactionExecutor) toCompleteState(task compactor) { - task.complete() - c.executing.GetAndRemove(task.getPlanID()) +func (c *compactionExecutor) toCompleteState(task compaction.Compactor) { + task.Complete() + c.executing.GetAndRemove(task.GetPlanID()) } -func (c *compactionExecutor) injectDone(planID UniqueID) { +func (c *compactionExecutor) removeTask(planID UniqueID) { c.completed.GetAndRemove(planID) task, loaded := c.completedCompactor.GetAndRemove(planID) if loaded { - log.Info("Compaction task inject done", zap.Int64("planID", planID), zap.String("channel", task.getChannelName())) - task.injectDone() + log.Info("Compaction task removed", zap.Int64("planID", planID), zap.String("channel", task.GetChannelName())) } } // These two func are bounded for waitGroup -func (c *compactionExecutor) executeWithState(task compactor) { +func (c *compactionExecutor) executeWithState(task compaction.Compactor) { go c.executeTask(task) } @@ -94,11 +94,11 @@ func (c *compactionExecutor) start(ctx context.Context) { } } -func (c *compactionExecutor) executeTask(task compactor) { +func (c *compactionExecutor) executeTask(task compaction.Compactor) { log := log.With( - zap.Int64("planID", task.getPlanID()), - zap.Int64("Collection", task.getCollection()), - zap.String("channel", task.getChannelName()), + zap.Int64("planID", task.GetPlanID()), + zap.Int64("Collection", task.GetCollection()), + zap.String("channel", task.GetChannelName()), ) defer func() { @@ -107,23 +107,22 @@ func (c *compactionExecutor) executeTask(task compactor) { log.Info("start to execute compaction") - result, err := task.compact() + result, err := task.Compact() if err != nil { - task.injectDone() log.Warn("compaction task failed", zap.Error(err)) - } else { - c.completed.Insert(result.GetPlanID(), result) - c.completedCompactor.Insert(result.GetPlanID(), task) + return } + c.completed.Insert(result.GetPlanID(), result) + c.completedCompactor.Insert(result.GetPlanID(), task) - log.Info("end to execute compaction", zap.Int64("planID", task.getPlanID())) + log.Info("end to execute compaction") } func (c *compactionExecutor) stopTask(planID UniqueID) { task, loaded := c.executing.GetAndRemove(planID) if loaded { - log.Warn("compaction executor stop task", zap.Int64("planID", planID), zap.String("vChannelName", task.getChannelName())) - task.stop() + log.Warn("compaction executor stop task", zap.Int64("planID", planID), zap.String("vChannelName", task.GetChannelName())) + task.Stop() } } @@ -141,8 +140,8 @@ func (c *compactionExecutor) discardPlan(channel string) { c.resultGuard.Lock() defer c.resultGuard.Unlock() - c.executing.Range(func(planID int64, task compactor) bool { - if task.getChannelName() == channel { + c.executing.Range(func(planID int64, task compaction.Compactor) bool { + if task.GetChannelName() == channel { c.stopTask(planID) } return true @@ -151,7 +150,7 @@ func (c *compactionExecutor) discardPlan(channel string) { // remove all completed plans of channel c.completed.Range(func(planID int64, result *datapb.CompactionPlanResult) bool { if result.GetChannel() == channel { - c.injectDone(planID) + c.removeTask(planID) log.Info("remove compaction plan and results", zap.String("channel", channel), zap.Int64("planID", planID)) @@ -170,7 +169,7 @@ func (c *compactionExecutor) getAllCompactionResults() []*datapb.CompactionPlanR ) results := make([]*datapb.CompactionPlanResult, 0) // get executing results - c.executing.Range(func(planID int64, task compactor) bool { + c.executing.Range(func(planID int64, task compaction.Compactor) bool { executing = append(executing, planID) results = append(results, &datapb.CompactionPlanResult{ State: commonpb.CompactionState_Executing, @@ -190,9 +189,10 @@ func (c *compactionExecutor) getAllCompactionResults() []*datapb.CompactionPlanR return true }) - // remote level zero results + // remove level zero results lo.ForEach(completedLevelZero, func(planID int64, _ int) { c.completed.Remove(planID) + c.completedCompactor.Remove(planID) }) if len(results) > 0 { diff --git a/internal/datanode/compaction_executor_test.go b/internal/datanode/compaction_executor_test.go index d56cb5e2bc39a..5fd21070e280e 100644 --- a/internal/datanode/compaction_executor_test.go +++ b/internal/datanode/compaction_executor_test.go @@ -20,26 +20,29 @@ import ( "context" "testing" + "github.com/cockroachdb/errors" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" + "github.com/milvus-io/milvus/internal/datanode/compaction" "github.com/milvus-io/milvus/internal/proto/datapb" ) func TestCompactionExecutor(t *testing.T) { t.Run("Test execute", func(t *testing.T) { - ctx, cancel := context.WithCancel(context.Background()) - ex := newCompactionExecutor() - go ex.start(ctx) - ex.execute(newMockCompactor(true)) - - cancel() - }) - - t.Run("Test stopTask", func(t *testing.T) { - ex := newCompactionExecutor() - mc := newMockCompactor(true) - ex.executeWithState(mc) - ex.stopTask(UniqueID(1)) + planID := int64(1) + mockC := compaction.NewMockCompactor(t) + mockC.EXPECT().GetPlanID().Return(planID).Once() + mockC.EXPECT().GetChannelName().Return("ch1").Once() + executor := newCompactionExecutor() + executor.execute(mockC) + + assert.EqualValues(t, 1, len(executor.taskCh)) + assert.EqualValues(t, 1, executor.executing.Len()) + + mockC.EXPECT().Stop().Return().Once() + executor.stopTask(planID) }) t.Run("Test start", func(t *testing.T) { @@ -55,19 +58,35 @@ func TestCompactionExecutor(t *testing.T) { description string }{ - {true, "compact return nil"}, + {true, "compact success"}, {false, "compact return error"}, } ex := newCompactionExecutor() for _, test := range tests { t.Run(test.description, func(t *testing.T) { + mockC := compaction.NewMockCompactor(t) + mockC.EXPECT().GetPlanID().Return(int64(1)) + mockC.EXPECT().GetCollection().Return(int64(1)) + mockC.EXPECT().GetChannelName().Return("ch1") + mockC.EXPECT().Complete().Return().Maybe() + signal := make(chan struct{}) if test.isvalid { - validTask := newMockCompactor(true) - ex.executeWithState(validTask) + mockC.EXPECT().Compact().RunAndReturn( + func() (*datapb.CompactionPlanResult, error) { + signal <- struct{}{} + return &datapb.CompactionPlanResult{PlanID: 1}, nil + }).Once() + ex.executeWithState(mockC) + <-signal } else { - invalidTask := newMockCompactor(false) - ex.executeWithState(invalidTask) + mockC.EXPECT().Compact().RunAndReturn( + func() (*datapb.CompactionPlanResult, error) { + signal <- struct{}{} + return nil, errors.New("mock error") + }).Once() + ex.executeWithState(mockC) + <-signal } }) } @@ -93,83 +112,58 @@ func TestCompactionExecutor(t *testing.T) { t.Run("test stop vchannel tasks", func(t *testing.T) { ex := newCompactionExecutor() - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - go ex.start(ctx) - mc := newMockCompactor(true) - mc.alwaysWorking = true + mc := compaction.NewMockCompactor(t) + mc.EXPECT().GetPlanID().Return(int64(1)) + mc.EXPECT().GetChannelName().Return("mock") + mc.EXPECT().Compact().Return(&datapb.CompactionPlanResult{PlanID: 1}, nil).Maybe() + mc.EXPECT().Stop().Return().Once() ex.execute(mc) - // wait for task enqueued - found := false - for !found { - found = ex.executing.Contain(mc.getPlanID()) - } + require.True(t, ex.executing.Contain(int64(1))) ex.discardByDroppedChannel("mock") - - select { - case <-mc.ctx.Done(): - default: - t.FailNow() - } + assert.True(t, ex.dropped.Contain("mock")) + assert.False(t, ex.executing.Contain(int64(1))) }) -} - -func newMockCompactor(isvalid bool) *mockCompactor { - ctx, cancel := context.WithCancel(context.TODO()) - return &mockCompactor{ - ctx: ctx, - cancel: cancel, - isvalid: isvalid, - done: make(chan struct{}, 1), - } -} - -type mockCompactor struct { - ctx context.Context - cancel context.CancelFunc - isvalid bool - alwaysWorking bool - done chan struct{} -} - -var _ compactor = (*mockCompactor)(nil) - -func (mc *mockCompactor) complete() { - mc.done <- struct{}{} -} - -func (mc *mockCompactor) injectDone() {} - -func (mc *mockCompactor) compact() (*datapb.CompactionPlanResult, error) { - if !mc.isvalid { - return nil, errStart - } - if mc.alwaysWorking { - <-mc.ctx.Done() - return nil, mc.ctx.Err() - } - return nil, nil -} - -func (mc *mockCompactor) getPlanID() UniqueID { - return 1 -} - -func (mc *mockCompactor) stop() { - if mc.cancel != nil { - mc.cancel() - <-mc.done - } -} + t.Run("test getAllCompactionResults", func(t *testing.T) { + ex := newCompactionExecutor() -func (mc *mockCompactor) getCollection() UniqueID { - return 1 -} + mockC := compaction.NewMockCompactor(t) + ex.executing.Insert(int64(1), mockC) + + ex.completedCompactor.Insert(int64(2), mockC) + ex.completed.Insert(int64(2), &datapb.CompactionPlanResult{ + PlanID: 2, + State: commonpb.CompactionState_Completed, + Type: datapb.CompactionType_MixCompaction, + }) + + ex.completedCompactor.Insert(int64(3), mockC) + ex.completed.Insert(int64(3), &datapb.CompactionPlanResult{ + PlanID: 3, + State: commonpb.CompactionState_Completed, + Type: datapb.CompactionType_Level0DeleteCompaction, + }) + + require.Equal(t, 2, ex.completed.Len()) + require.Equal(t, 2, ex.completedCompactor.Len()) + require.Equal(t, 1, ex.executing.Len()) + + result := ex.getAllCompactionResults() + assert.Equal(t, 3, len(result)) + + for _, res := range result { + if res.PlanID == int64(1) { + assert.Equal(t, res.GetState(), commonpb.CompactionState_Executing) + } else { + assert.Equal(t, res.GetState(), commonpb.CompactionState_Completed) + } + } -func (mc *mockCompactor) getChannelName() string { - return "mock" + assert.Equal(t, 1, ex.completed.Len()) + require.Equal(t, 1, ex.completedCompactor.Len()) + require.Equal(t, 1, ex.executing.Len()) + }) } diff --git a/internal/datanode/compactor.go b/internal/datanode/compactor.go deleted file mode 100644 index e99642316e6f4..0000000000000 --- a/internal/datanode/compactor.go +++ /dev/null @@ -1,827 +0,0 @@ -// Licensed to the LF AI & Data foundation under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package datanode - -import ( - "context" - "fmt" - sio "io" - "sync" - "time" - - "github.com/cockroachdb/errors" - "github.com/samber/lo" - "go.opentelemetry.io/otel" - "go.uber.org/zap" - - "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" - "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" - "github.com/milvus-io/milvus/internal/datanode/allocator" - "github.com/milvus-io/milvus/internal/datanode/io" - "github.com/milvus-io/milvus/internal/datanode/metacache" - "github.com/milvus-io/milvus/internal/datanode/syncmgr" - "github.com/milvus-io/milvus/internal/metastore/kv/binlog" - "github.com/milvus-io/milvus/internal/proto/datapb" - "github.com/milvus-io/milvus/internal/proto/etcdpb" - "github.com/milvus-io/milvus/internal/storage" - "github.com/milvus-io/milvus/pkg/log" - "github.com/milvus-io/milvus/pkg/metrics" - "github.com/milvus-io/milvus/pkg/util/funcutil" - "github.com/milvus-io/milvus/pkg/util/merr" - "github.com/milvus-io/milvus/pkg/util/paramtable" - "github.com/milvus-io/milvus/pkg/util/timerecord" - "github.com/milvus-io/milvus/pkg/util/tsoutil" - "github.com/milvus-io/milvus/pkg/util/typeutil" -) - -var ( - errIllegalCompactionPlan = errors.New("compaction plan illegal") - errTransferType = errors.New("transfer intferface to type wrong") - errUnknownDataType = errors.New("unknown shema DataType") - errContext = errors.New("context done or timeout") -) - -type compactor interface { - complete() - compact() (*datapb.CompactionPlanResult, error) - injectDone() - stop() - getPlanID() UniqueID - getCollection() UniqueID - getChannelName() string -} - -// make sure compactionTask implements compactor interface -var _ compactor = (*compactionTask)(nil) - -// for MixCompaction only -type compactionTask struct { - binlogIO io.BinlogIO - compactor - metaCache metacache.MetaCache - syncMgr syncmgr.SyncManager - allocator.Allocator - - plan *datapb.CompactionPlan - - ctx context.Context - cancel context.CancelFunc - - injectDoneOnce sync.Once - done chan struct{} - tr *timerecord.TimeRecorder -} - -func newCompactionTask( - ctx context.Context, - binlogIO io.BinlogIO, - metaCache metacache.MetaCache, - syncMgr syncmgr.SyncManager, - alloc allocator.Allocator, - plan *datapb.CompactionPlan, -) *compactionTask { - ctx1, cancel := context.WithCancel(ctx) - return &compactionTask{ - ctx: ctx1, - cancel: cancel, - binlogIO: binlogIO, - syncMgr: syncMgr, - metaCache: metaCache, - Allocator: alloc, - plan: plan, - tr: timerecord.NewTimeRecorder("levelone compaction"), - done: make(chan struct{}, 1), - } -} - -func (t *compactionTask) complete() { - t.done <- struct{}{} -} - -func (t *compactionTask) stop() { - t.cancel() - <-t.done - t.injectDone() -} - -func (t *compactionTask) getPlanID() UniqueID { - return t.plan.GetPlanID() -} - -func (t *compactionTask) getChannelName() string { - return t.plan.GetChannel() -} - -// return num rows of all segment compaction from -func (t *compactionTask) getNumRows() (int64, error) { - numRows := int64(0) - for _, binlog := range t.plan.SegmentBinlogs { - seg, ok := t.metaCache.GetSegmentByID(binlog.GetSegmentID()) - if !ok { - return 0, merr.WrapErrSegmentNotFound(binlog.GetSegmentID(), "get compaction segments num rows failed") - } - - numRows += seg.NumOfRows() - } - - return numRows, nil -} - -func (t *compactionTask) mergeDeltalogs(dBlobs map[UniqueID][]*Blob) (map[interface{}]Timestamp, error) { - log := log.With(zap.Int64("planID", t.getPlanID())) - mergeStart := time.Now() - dCodec := storage.NewDeleteCodec() - - pk2ts := make(map[interface{}]Timestamp) - - for _, blobs := range dBlobs { - _, _, dData, err := dCodec.Deserialize(blobs) - if err != nil { - log.Warn("merge deltalogs wrong", zap.Error(err)) - return nil, err - } - - for i := int64(0); i < dData.RowCount; i++ { - pk := dData.Pks[i] - ts := dData.Tss[i] - if lastTS, ok := pk2ts[pk.GetValue()]; ok && lastTS > ts { - ts = lastTS - } - pk2ts[pk.GetValue()] = ts - } - } - - log.Info("mergeDeltalogs end", - zap.Int("number of deleted pks to compact in insert logs", len(pk2ts)), - zap.Duration("elapse", time.Since(mergeStart))) - - return pk2ts, nil -} - -func newBinlogWriter(collectionId, partitionId, segmentId UniqueID, schema *schemapb.CollectionSchema, -) (writer *storage.SerializeWriter[*storage.Value], closers []func() (*Blob, error), err error) { - fieldWriters := storage.NewBinlogStreamWriters(collectionId, partitionId, segmentId, schema.Fields) - closers = make([]func() (*Blob, error), 0, len(fieldWriters)) - for _, w := range fieldWriters { - closers = append(closers, w.Finalize) - } - writer, err = storage.NewBinlogSerializeWriter(schema, partitionId, segmentId, fieldWriters, 1024) - return -} - -func (t *compactionTask) merge( - ctx context.Context, - unMergedInsertlogs [][]string, - targetSegID UniqueID, - partID UniqueID, - meta *etcdpb.CollectionMeta, - delta map[interface{}]Timestamp, -) ([]*datapb.FieldBinlog, []*datapb.FieldBinlog, int64, error) { - ctx, span := otel.Tracer(typeutil.DataNodeRole).Start(ctx, fmt.Sprintf("CompactMerge-%d", t.getPlanID())) - defer span.End() - log := log.With(zap.Int64("planID", t.getPlanID())) - mergeStart := time.Now() - - writer, finalizers, err := newBinlogWriter(meta.GetID(), partID, targetSegID, meta.GetSchema()) - if err != nil { - return nil, nil, 0, err - } - - var ( - numBinlogs int // binlog number - numRows uint64 // the number of rows uploaded - expired int64 // the number of expired entity - - insertField2Path = make(map[UniqueID]*datapb.FieldBinlog) - insertPaths = make([]*datapb.FieldBinlog, 0) - - statField2Path = make(map[UniqueID]*datapb.FieldBinlog) - statPaths = make([]*datapb.FieldBinlog, 0) - ) - - isDeletedValue := func(v *storage.Value) bool { - ts, ok := delta[v.PK.GetValue()] - // insert task and delete task has the same ts when upsert - // here should be < instead of <= - // to avoid the upsert data to be deleted after compact - if ok && uint64(v.Timestamp) < ts { - return true - } - return false - } - - addInsertFieldPath := func(inPaths map[UniqueID]*datapb.FieldBinlog, timestampFrom, timestampTo int64) { - for fID, path := range inPaths { - for _, binlog := range path.GetBinlogs() { - binlog.TimestampTo = uint64(timestampTo) - binlog.TimestampFrom = uint64(timestampFrom) - } - tmpBinlog, ok := insertField2Path[fID] - if !ok { - tmpBinlog = path - } else { - tmpBinlog.Binlogs = append(tmpBinlog.Binlogs, path.GetBinlogs()...) - } - insertField2Path[fID] = tmpBinlog - } - } - - addStatFieldPath := func(statPaths map[UniqueID]*datapb.FieldBinlog) { - for fID, path := range statPaths { - tmpBinlog, ok := statField2Path[fID] - if !ok { - tmpBinlog = path - } else { - tmpBinlog.Binlogs = append(tmpBinlog.Binlogs, path.GetBinlogs()...) - } - statField2Path[fID] = tmpBinlog - } - } - - // get pkID, pkType, dim - var pkField *schemapb.FieldSchema - for _, fs := range meta.GetSchema().GetFields() { - if fs.GetIsPrimaryKey() && fs.GetFieldID() >= 100 && typeutil.IsPrimaryFieldType(fs.GetDataType()) { - pkField = fs - } - } - - if pkField == nil { - log.Warn("failed to get pk field from schema") - return nil, nil, 0, fmt.Errorf("no pk field in schema") - } - - pkID := pkField.GetFieldID() - pkType := pkField.GetDataType() - - expired = 0 - numRows = 0 - numBinlogs = 0 - currentTs := t.GetCurrentTime() - unflushedRows := 0 - downloadTimeCost := time.Duration(0) - uploadInsertTimeCost := time.Duration(0) - - oldRowNums, err := t.getNumRows() - if err != nil { - return nil, nil, 0, err - } - - stats, err := storage.NewPrimaryKeyStats(pkID, int64(pkType), oldRowNums) - if err != nil { - return nil, nil, 0, err - } - // initial timestampFrom, timestampTo = -1, -1 is an illegal value, only to mark initial state - var ( - timestampTo int64 = -1 - timestampFrom int64 = -1 - ) - - flush := func() error { - uploadInsertStart := time.Now() - writer.Close() - fieldData := make([]*Blob, len(finalizers)) - - for i, f := range finalizers { - blob, err := f() - if err != nil { - return err - } - fieldData[i] = blob - } - inPaths, err := uploadInsertLog(ctx, t.binlogIO, t.Allocator, meta.ID, partID, targetSegID, fieldData) - if err != nil { - log.Warn("failed to upload single insert log", zap.Error(err)) - return err - } - numBinlogs += len(inPaths) - uploadInsertTimeCost += time.Since(uploadInsertStart) - addInsertFieldPath(inPaths, timestampFrom, timestampTo) - unflushedRows = 0 - return nil - } - - for _, path := range unMergedInsertlogs { - downloadStart := time.Now() - data, err := downloadBlobs(ctx, t.binlogIO, path) - if err != nil { - log.Warn("download insertlogs wrong", zap.Strings("path", path), zap.Error(err)) - return nil, nil, 0, err - } - downloadTimeCost += time.Since(downloadStart) - - iter, err := storage.NewBinlogDeserializeReader(data, pkID) - if err != nil { - log.Warn("new insert binlogs reader wrong", zap.Strings("path", path), zap.Error(err)) - return nil, nil, 0, err - } - - for { - err := iter.Next() - if err != nil { - if err == sio.EOF { - break - } else { - log.Warn("transfer interface to Value wrong", zap.Strings("path", path)) - return nil, nil, 0, errors.New("unexpected error") - } - } - v := iter.Value() - if isDeletedValue(v) { - continue - } - - ts := Timestamp(v.Timestamp) - // Filtering expired entity - if t.isExpiredEntity(ts, currentTs) { - expired++ - continue - } - - // Update timestampFrom, timestampTo - if v.Timestamp < timestampFrom || timestampFrom == -1 { - timestampFrom = v.Timestamp - } - if v.Timestamp > timestampTo || timestampFrom == -1 { - timestampTo = v.Timestamp - } - - err = writer.Write(v) - if err != nil { - return nil, nil, 0, err - } - numRows++ - unflushedRows++ - - stats.Update(v.PK) - - // check size every 100 rows in case of too many `GetMemorySize` call - if (unflushedRows+1)%100 == 0 { - writer.Flush() // Flush to update memory size - - if writer.WrittenMemorySize() > paramtable.Get().DataNodeCfg.BinLogMaxSize.GetAsUint64() { - if err := flush(); err != nil { - return nil, nil, 0, err - } - timestampFrom = -1 - timestampTo = -1 - - writer, finalizers, err = newBinlogWriter(meta.ID, targetSegID, partID, meta.Schema) - if err != nil { - return nil, nil, 0, err - } - } - } - } - } - - // final flush if there is unflushed rows - if unflushedRows > 0 { - if err := flush(); err != nil { - return nil, nil, 0, err - } - } - - // upload stats log - if numRows > 0 { - iCodec := storage.NewInsertCodecWithSchema(meta) - statsPaths, err := uploadStatsLog(ctx, t.binlogIO, t.Allocator, meta.GetID(), partID, targetSegID, stats, int64(numRows), iCodec) - if err != nil { - return nil, nil, 0, err - } - addStatFieldPath(statsPaths) - } - - for _, path := range insertField2Path { - insertPaths = append(insertPaths, path) - } - - for _, path := range statField2Path { - statPaths = append(statPaths, path) - } - - log.Info("compact merge end", - zap.Uint64("remaining insert numRows", numRows), - zap.Int64("expired entities", expired), - zap.Int("binlog file number", numBinlogs), - zap.Duration("download insert log elapse", downloadTimeCost), - zap.Duration("upload insert log elapse", uploadInsertTimeCost), - zap.Duration("merge elapse", time.Since(mergeStart))) - - return insertPaths, statPaths, int64(numRows), nil -} - -func (t *compactionTask) compact() (*datapb.CompactionPlanResult, error) { - ctx, span := otel.Tracer(typeutil.DataNodeRole).Start(t.ctx, fmt.Sprintf("Compact-%d", t.getPlanID())) - defer span.End() - - log := log.Ctx(ctx).With(zap.Int64("planID", t.plan.GetPlanID()), zap.Int32("timeout in seconds", t.plan.GetTimeoutInSeconds())) - if ok := funcutil.CheckCtxValid(ctx); !ok { - log.Warn("compact wrong, task context done or timeout") - return nil, errContext - } - - ctxTimeout, cancelAll := context.WithTimeout(ctx, time.Duration(t.plan.GetTimeoutInSeconds())*time.Second) - defer cancelAll() - - compactStart := time.Now() - durInQueue := t.tr.RecordSpan() - log.Info("compact start") - if len(t.plan.GetSegmentBinlogs()) < 1 { - log.Warn("compact wrong, there's no segments in segment binlogs") - return nil, errIllegalCompactionPlan - } - - targetSegID, err := t.AllocOne() - if err != nil { - log.Warn("compact wrong, unable to allocate segmentID", zap.Error(err)) - return nil, err - } - - segIDs := lo.Map(t.plan.GetSegmentBinlogs(), func(binlogs *datapb.CompactionSegmentBinlogs, _ int) int64 { - return binlogs.GetSegmentID() - }) - - // Inject to stop flush - // when compaction failed, these segments need to be Unblocked by injectDone in compaction_executor - // when compaction succeeded, these segments will be Unblocked by SyncSegments from DataCoord. - for _, segID := range segIDs { - t.syncMgr.Block(segID) - } - log.Info("compact finsh injection", zap.Duration("elapse", t.tr.RecordSpan())) - - if err := binlog.DecompressCompactionBinlogs(t.plan.GetSegmentBinlogs()); err != nil { - log.Warn("compact wrong, fail to decompress compaction binlogs", zap.Error(err)) - return nil, err - } - - dblobs := make(map[UniqueID][]*Blob) - allPath := make([][]string, 0) - - for _, s := range t.plan.GetSegmentBinlogs() { - log := log.With(zap.Int64("segmentID", s.GetSegmentID())) - // Get the batch count of field binlog files - var binlogBatch int - for _, b := range s.GetFieldBinlogs() { - if b != nil { - binlogBatch = len(b.GetBinlogs()) - break - } - } - if binlogBatch == 0 { - log.Warn("compacting empty segment") - continue - } - - for idx := 0; idx < binlogBatch; idx++ { - var ps []string - for _, f := range s.GetFieldBinlogs() { - ps = append(ps, f.GetBinlogs()[idx].GetLogPath()) - } - allPath = append(allPath, ps) - } - - paths := make([]string, 0) - for _, d := range s.GetDeltalogs() { - for _, l := range d.GetBinlogs() { - path := l.GetLogPath() - paths = append(paths, path) - } - } - - if len(paths) != 0 { - bs, err := downloadBlobs(ctxTimeout, t.binlogIO, paths) - if err != nil { - log.Warn("compact wrong, fail to download deltalogs", zap.Strings("path", paths), zap.Error(err)) - return nil, err - } - dblobs[s.GetSegmentID()] = append(dblobs[s.GetSegmentID()], bs...) - } - } - - // Unable to deal with all empty segments cases, so return error - if len(allPath) == 0 { - log.Warn("compact wrong, all segments are empty") - return nil, errIllegalCompactionPlan - } - - log.Info("compact download deltalogs elapse", zap.Duration("elapse", t.tr.RecordSpan())) - - if err != nil { - log.Warn("compact IO wrong", zap.Error(err)) - return nil, err - } - - deltaPk2Ts, err := t.mergeDeltalogs(dblobs) - if err != nil { - log.Warn("compact wrong, fail to merge deltalogs", zap.Error(err)) - return nil, err - } - - segmentBinlog := t.plan.GetSegmentBinlogs()[0] - partID := segmentBinlog.GetPartitionID() - meta := &etcdpb.CollectionMeta{ID: t.metaCache.Collection(), Schema: t.metaCache.Schema()} - - inPaths, statsPaths, numRows, err := t.merge(ctxTimeout, allPath, targetSegID, partID, meta, deltaPk2Ts) - if err != nil { - log.Warn("compact wrong, fail to merge", zap.Error(err)) - return nil, err - } - - pack := &datapb.CompactionSegment{ - SegmentID: targetSegID, - InsertLogs: inPaths, - Field2StatslogPaths: statsPaths, - NumOfRows: numRows, - Channel: t.plan.GetChannel(), - } - - log.Info("compact done", - zap.Int64("targetSegmentID", targetSegID), - zap.Int64s("compactedFrom", segIDs), - zap.Int("num of binlog paths", len(inPaths)), - zap.Int("num of stats paths", len(statsPaths)), - zap.Int("num of delta paths", len(pack.GetDeltalogs())), - zap.Duration("elapse", time.Since(compactStart)), - ) - - metrics.DataNodeCompactionLatency.WithLabelValues(fmt.Sprint(paramtable.GetNodeID()), t.plan.GetType().String()).Observe(float64(t.tr.ElapseSpan().Milliseconds())) - metrics.DataNodeCompactionLatencyInQueue.WithLabelValues(fmt.Sprint(paramtable.GetNodeID())).Observe(float64(durInQueue.Milliseconds())) - - planResult := &datapb.CompactionPlanResult{ - State: commonpb.CompactionState_Completed, - PlanID: t.getPlanID(), - Channel: t.plan.GetChannel(), - Segments: []*datapb.CompactionSegment{pack}, - Type: t.plan.GetType(), - } - - return planResult, nil -} - -func (t *compactionTask) injectDone() { - t.injectDoneOnce.Do(func() { - for _, binlog := range t.plan.SegmentBinlogs { - t.syncMgr.Unblock(binlog.SegmentID) - } - }) -} - -// TODO copy maybe expensive, but this seems to be the only convinent way. -func interface2FieldData(schemaDataType schemapb.DataType, content []interface{}, numRows int64) (storage.FieldData, error) { - var rst storage.FieldData - switch schemaDataType { - case schemapb.DataType_Bool: - data := &storage.BoolFieldData{ - Data: make([]bool, 0, len(content)), - } - - for _, c := range content { - r, ok := c.(bool) - if !ok { - return nil, errTransferType - } - data.Data = append(data.Data, r) - } - rst = data - - case schemapb.DataType_Int8: - data := &storage.Int8FieldData{ - Data: make([]int8, 0, len(content)), - } - - for _, c := range content { - r, ok := c.(int8) - if !ok { - return nil, errTransferType - } - data.Data = append(data.Data, r) - } - rst = data - - case schemapb.DataType_Int16: - data := &storage.Int16FieldData{ - Data: make([]int16, 0, len(content)), - } - - for _, c := range content { - r, ok := c.(int16) - if !ok { - return nil, errTransferType - } - data.Data = append(data.Data, r) - } - rst = data - - case schemapb.DataType_Int32: - data := &storage.Int32FieldData{ - Data: make([]int32, 0, len(content)), - } - - for _, c := range content { - r, ok := c.(int32) - if !ok { - return nil, errTransferType - } - data.Data = append(data.Data, r) - } - rst = data - - case schemapb.DataType_Int64: - data := &storage.Int64FieldData{ - Data: make([]int64, 0, len(content)), - } - - for _, c := range content { - r, ok := c.(int64) - if !ok { - return nil, errTransferType - } - data.Data = append(data.Data, r) - } - rst = data - - case schemapb.DataType_Float: - data := &storage.FloatFieldData{ - Data: make([]float32, 0, len(content)), - } - - for _, c := range content { - r, ok := c.(float32) - if !ok { - return nil, errTransferType - } - data.Data = append(data.Data, r) - } - rst = data - - case schemapb.DataType_Double: - data := &storage.DoubleFieldData{ - Data: make([]float64, 0, len(content)), - } - - for _, c := range content { - r, ok := c.(float64) - if !ok { - return nil, errTransferType - } - data.Data = append(data.Data, r) - } - rst = data - - case schemapb.DataType_String, schemapb.DataType_VarChar: - data := &storage.StringFieldData{ - Data: make([]string, 0, len(content)), - } - - for _, c := range content { - r, ok := c.(string) - if !ok { - return nil, errTransferType - } - data.Data = append(data.Data, r) - } - rst = data - - case schemapb.DataType_JSON: - data := &storage.JSONFieldData{ - Data: make([][]byte, 0, len(content)), - } - - for _, c := range content { - r, ok := c.([]byte) - if !ok { - return nil, errTransferType - } - data.Data = append(data.Data, r) - } - rst = data - - case schemapb.DataType_Array: - data := &storage.ArrayFieldData{ - Data: make([]*schemapb.ScalarField, 0, len(content)), - } - - for _, c := range content { - r, ok := c.(*schemapb.ScalarField) - if !ok { - return nil, errTransferType - } - data.ElementType = r.GetArrayData().GetElementType() - data.Data = append(data.Data, r) - } - rst = data - - case schemapb.DataType_FloatVector: - data := &storage.FloatVectorFieldData{ - Data: []float32{}, - } - - for _, c := range content { - r, ok := c.([]float32) - if !ok { - return nil, errTransferType - } - data.Data = append(data.Data, r...) - } - - data.Dim = len(data.Data) / int(numRows) - rst = data - - case schemapb.DataType_Float16Vector: - data := &storage.Float16VectorFieldData{ - Data: []byte{}, - } - - for _, c := range content { - r, ok := c.([]byte) - if !ok { - return nil, errTransferType - } - data.Data = append(data.Data, r...) - } - - data.Dim = len(data.Data) / 2 / int(numRows) - rst = data - - case schemapb.DataType_BFloat16Vector: - data := &storage.BFloat16VectorFieldData{ - Data: []byte{}, - } - - for _, c := range content { - r, ok := c.([]byte) - if !ok { - return nil, errTransferType - } - data.Data = append(data.Data, r...) - } - - data.Dim = len(data.Data) / 2 / int(numRows) - rst = data - - case schemapb.DataType_BinaryVector: - data := &storage.BinaryVectorFieldData{ - Data: []byte{}, - } - - for _, c := range content { - r, ok := c.([]byte) - if !ok { - return nil, errTransferType - } - data.Data = append(data.Data, r...) - } - - data.Dim = len(data.Data) * 8 / int(numRows) - rst = data - - case schemapb.DataType_SparseFloatVector: - data := &storage.SparseFloatVectorFieldData{} - for _, c := range content { - if err := data.AppendRow(c); err != nil { - return nil, fmt.Errorf("failed to append row: %v, %w", err, errTransferType) - } - } - rst = data - - default: - return nil, errUnknownDataType - } - - return rst, nil -} - -func (t *compactionTask) getCollection() UniqueID { - return t.metaCache.Collection() -} - -func (t *compactionTask) GetCurrentTime() typeutil.Timestamp { - return tsoutil.GetCurrentTime() -} - -func (t *compactionTask) isExpiredEntity(ts, now Timestamp) bool { - // entity expire is not enabled if duration <= 0 - if t.plan.GetCollectionTtl() <= 0 { - return false - } - - pts, _ := tsoutil.ParseTS(ts) - pnow, _ := tsoutil.ParseTS(now) - expireTime := pts.Add(time.Duration(t.plan.GetCollectionTtl())) - return expireTime.Before(pnow) -} diff --git a/internal/datanode/compactor_test.go b/internal/datanode/compactor_test.go deleted file mode 100644 index efea77b55fbbb..0000000000000 --- a/internal/datanode/compactor_test.go +++ /dev/null @@ -1,1246 +0,0 @@ -// Licensed to the LF AI & Data foundation under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package datanode - -import ( - "context" - "fmt" - "math" - "testing" - - "github.com/cockroachdb/errors" - "github.com/samber/lo" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/mock" - "github.com/stretchr/testify/require" - - "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" - "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" - "github.com/milvus-io/milvus/internal/datanode/allocator" - "github.com/milvus-io/milvus/internal/datanode/io" - "github.com/milvus-io/milvus/internal/datanode/metacache" - "github.com/milvus-io/milvus/internal/datanode/syncmgr" - memkv "github.com/milvus-io/milvus/internal/kv/mem" - "github.com/milvus-io/milvus/internal/proto/datapb" - "github.com/milvus-io/milvus/internal/proto/etcdpb" - "github.com/milvus-io/milvus/internal/storage" - "github.com/milvus-io/milvus/pkg/common" - "github.com/milvus-io/milvus/pkg/util/paramtable" - "github.com/milvus-io/milvus/pkg/util/timerecord" - "github.com/milvus-io/milvus/pkg/util/typeutil" -) - -var compactTestDir = "/tmp/milvus_test/compact" - -func TestCompactionTaskInnerMethods(t *testing.T) { - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - cm := storage.NewLocalChunkManager(storage.RootPath(compactTestDir)) - defer cm.RemoveWithPrefix(ctx, cm.RootPath()) - t.Run("Test.interface2FieldData", func(t *testing.T) { - tests := []struct { - isvalid bool - - tp schemapb.DataType - content []interface{} - - description string - }{ - {true, schemapb.DataType_Bool, []interface{}{true, false}, "valid bool"}, - {true, schemapb.DataType_Int8, []interface{}{int8(1), int8(2)}, "valid int8"}, - {true, schemapb.DataType_Int16, []interface{}{int16(1), int16(2)}, "valid int16"}, - {true, schemapb.DataType_Int32, []interface{}{int32(1), int32(2)}, "valid int32"}, - {true, schemapb.DataType_Int64, []interface{}{int64(1), int64(2)}, "valid int64"}, - {true, schemapb.DataType_Float, []interface{}{float32(1), float32(2)}, "valid float32"}, - {true, schemapb.DataType_Double, []interface{}{float64(1), float64(2)}, "valid float64"}, - {true, schemapb.DataType_VarChar, []interface{}{"test1", "test2"}, "valid varChar"}, - {true, schemapb.DataType_JSON, []interface{}{[]byte("{\"key\":\"value\"}"), []byte("{\"hello\":\"world\"}")}, "valid json"}, - {true, schemapb.DataType_Array, []interface{}{ - &schemapb.ScalarField{ - Data: &schemapb.ScalarField_IntData{ - IntData: &schemapb.IntArray{ - Data: []int32{1, 2}, - }, - }, - }, - &schemapb.ScalarField{ - Data: &schemapb.ScalarField_IntData{ - IntData: &schemapb.IntArray{ - Data: []int32{3, 4}, - }, - }, - }, - }, "valid array"}, - {true, schemapb.DataType_FloatVector, []interface{}{[]float32{1.0, 2.0}}, "valid floatvector"}, - {true, schemapb.DataType_BinaryVector, []interface{}{[]byte{255}}, "valid binaryvector"}, - {true, schemapb.DataType_Float16Vector, []interface{}{[]byte{255, 255, 255, 255}}, "valid float16vector"}, - {true, schemapb.DataType_BFloat16Vector, []interface{}{[]byte{255, 255, 255, 255}}, "valid bfloat16vector"}, - - {false, schemapb.DataType_Bool, []interface{}{1, 2}, "invalid bool"}, - {false, schemapb.DataType_Int8, []interface{}{nil, nil}, "invalid int8"}, - {false, schemapb.DataType_Int16, []interface{}{nil, nil}, "invalid int16"}, - {false, schemapb.DataType_Int32, []interface{}{nil, nil}, "invalid int32"}, - {false, schemapb.DataType_Int64, []interface{}{nil, nil}, "invalid int64"}, - {false, schemapb.DataType_Float, []interface{}{nil, nil}, "invalid float32"}, - {false, schemapb.DataType_Double, []interface{}{nil, nil}, "invalid float64"}, - {false, schemapb.DataType_VarChar, []interface{}{nil, nil}, "invalid varChar"}, - {false, schemapb.DataType_JSON, []interface{}{nil, nil}, "invalid json"}, - {false, schemapb.DataType_FloatVector, []interface{}{nil, nil}, "invalid floatvector"}, - {false, schemapb.DataType_BinaryVector, []interface{}{nil, nil}, "invalid binaryvector"}, - {false, schemapb.DataType_Float16Vector, []interface{}{nil, nil}, "invalid float16vector"}, - {false, schemapb.DataType_BFloat16Vector, []interface{}{nil, nil}, "invalid bfloat16vector"}, - - {false, schemapb.DataType_SparseFloatVector, []interface{}{nil, nil}, "invalid sparsefloatvector"}, - {false, schemapb.DataType_SparseFloatVector, []interface{}{[]byte{255}, []byte{15}}, "invalid sparsefloatvector"}, - {true, schemapb.DataType_SparseFloatVector, []interface{}{ - typeutil.CreateSparseFloatRow([]uint32{1, 2}, []float32{1.0, 2.0}), - typeutil.CreateSparseFloatRow([]uint32{3, 4}, []float32{1.0, 2.0}), - }, "valid sparsefloatvector"}, - } - - // make sure all new data types missed to handle would throw unexpected error - for typeName, typeValue := range schemapb.DataType_value { - tests = append(tests, struct { - isvalid bool - - tp schemapb.DataType - content []interface{} - - description string - }{false, schemapb.DataType(typeValue), []interface{}{nil, nil}, "invalid " + typeName}) - } - - for _, test := range tests { - t.Run(test.description, func(t *testing.T) { - if test.isvalid { - fd, err := interface2FieldData(test.tp, test.content, 2) - assert.NoError(t, err) - assert.Equal(t, 2, fd.RowNum()) - } else { - fd, err := interface2FieldData(test.tp, test.content, 2) - assert.True(t, errors.Is(err, errTransferType) || errors.Is(err, errUnknownDataType)) - assert.Nil(t, fd) - } - }) - } - }) - - t.Run("Test mergeDeltalogs", func(t *testing.T) { - t.Run("One segment", func(t *testing.T) { - invalidBlobs := map[UniqueID][]*Blob{ - 1: {}, - } - - blobs, err := getInt64DeltaBlobs( - 100, - []UniqueID{ - 1, - 2, - 3, - 4, - 5, - 1, - 2, - }, - []Timestamp{ - 20000, - 20001, - 20002, - 30000, - 50000, - 50000, - 10000, - }) - require.NoError(t, err) - - validBlobs := map[UniqueID][]*Blob{ - 100: blobs, - } - - tests := []struct { - isvalid bool - - dBlobs map[UniqueID][]*Blob - - description string - }{ - {false, invalidBlobs, "invalid dBlobs"}, - {true, validBlobs, "valid blobs"}, - } - - for _, test := range tests { - task := &compactionTask{ - done: make(chan struct{}, 1), - } - t.Run(test.description, func(t *testing.T) { - pk2ts, err := task.mergeDeltalogs(test.dBlobs) - if test.isvalid { - assert.NoError(t, err) - assert.Equal(t, 5, len(pk2ts)) - assert.EqualValues(t, 20001, pk2ts[UniqueID(2)]) - } else { - assert.Error(t, err) - assert.Nil(t, pk2ts) - } - }) - } - }) - - t.Run("Multiple segments", func(t *testing.T) { - tests := []struct { - segIDA UniqueID - dataApk []UniqueID - dataAts []Timestamp - - segIDB UniqueID - dataBpk []UniqueID - dataBts []Timestamp - - segIDC UniqueID - dataCpk []UniqueID - dataCts []Timestamp - - expectedpk2ts int - description string - }{ - { - 0, nil, nil, - 100, - []UniqueID{1, 2, 3}, - []Timestamp{20000, 30000, 20005}, - 200, - []UniqueID{4, 5, 6}, - []Timestamp{50000, 50001, 50002}, - 6, "2 segments", - }, - { - 300, - []UniqueID{10, 20}, - []Timestamp{20001, 40001}, - 100, - []UniqueID{1, 2, 3}, - []Timestamp{20000, 30000, 20005}, - 200, - []UniqueID{4, 5, 6}, - []Timestamp{50000, 50001, 50002}, - 8, "3 segments", - }, - } - - for _, test := range tests { - t.Run(test.description, func(t *testing.T) { - dBlobs := make(map[UniqueID][]*Blob) - if test.segIDA != UniqueID(0) { - d, err := getInt64DeltaBlobs(test.segIDA, test.dataApk, test.dataAts) - require.NoError(t, err) - dBlobs[test.segIDA] = d - } - if test.segIDB != UniqueID(0) { - d, err := getInt64DeltaBlobs(test.segIDB, test.dataBpk, test.dataBts) - require.NoError(t, err) - dBlobs[test.segIDB] = d - } - if test.segIDC != UniqueID(0) { - d, err := getInt64DeltaBlobs(test.segIDC, test.dataCpk, test.dataCts) - require.NoError(t, err) - dBlobs[test.segIDC] = d - } - - task := &compactionTask{ - done: make(chan struct{}, 1), - } - pk2ts, err := task.mergeDeltalogs(dBlobs) - assert.NoError(t, err) - assert.Equal(t, test.expectedpk2ts, len(pk2ts)) - }) - } - }) - }) - - t.Run("Test merge", func(t *testing.T) { - collectionID := int64(1) - meta := NewMetaFactory().GetCollectionMeta(collectionID, "test", schemapb.DataType_Int64) - - metaCache := metacache.NewMockMetaCache(t) - metaCache.EXPECT().Schema().Return(meta.GetSchema()).Maybe() - metaCache.EXPECT().GetSegmentByID(mock.Anything).RunAndReturn(func(id int64, filters ...metacache.SegmentFilter) (*metacache.SegmentInfo, bool) { - segment := metacache.NewSegmentInfo(&datapb.SegmentInfo{ - CollectionID: 1, - PartitionID: 0, - ID: id, - NumOfRows: 10, - }, nil) - return segment, true - }) - - alloc := allocator.NewMockAllocator(t) - alloc.EXPECT().GetGenerator(mock.Anything, mock.Anything).Call.Return(validGeneratorFn, nil) - alloc.EXPECT().AllocOne().Return(0, nil) - t.Run("Merge without expiration", func(t *testing.T) { - mockbIO := io.NewBinlogIO(cm, getOrCreateIOPool()) - paramtable.Get().Save(Params.CommonCfg.EntityExpirationTTL.Key, "0") - iData := genInsertDataWithExpiredTS() - iCodec := storage.NewInsertCodecWithSchema(meta) - var partId int64 = 0 - var segmentId int64 = 1 - blobs, err := iCodec.Serialize(partId, 0, iData) - assert.NoError(t, err) - var allPaths [][]string - inpath, err := uploadInsertLog(context.Background(), mockbIO, alloc, meta.GetID(), partId, segmentId, blobs) - assert.NoError(t, err) - assert.Equal(t, 12, len(inpath)) - binlogNum := len(inpath[0].GetBinlogs()) - assert.Equal(t, 1, binlogNum) - - for idx := 0; idx < binlogNum; idx++ { - var ps []string - for _, path := range inpath { - ps = append(ps, path.GetBinlogs()[idx].GetLogPath()) - } - allPaths = append(allPaths, ps) - } - - dm := map[interface{}]Timestamp{ - 1: 10000, - } - - ct := &compactionTask{ - metaCache: metaCache, - binlogIO: mockbIO, - Allocator: alloc, - done: make(chan struct{}, 1), - plan: &datapb.CompactionPlan{ - SegmentBinlogs: []*datapb.CompactionSegmentBinlogs{ - {SegmentID: 1}, - }, - }, - } - inPaths, statsPaths, numOfRow, err := ct.merge(context.Background(), allPaths, 2, 0, meta, dm) - assert.NoError(t, err) - assert.Equal(t, int64(2), numOfRow) - assert.Equal(t, 1, len(inPaths[0].GetBinlogs())) - assert.Equal(t, 1, len(statsPaths)) - assert.NotEqual(t, -1, inPaths[0].GetBinlogs()[0].GetTimestampFrom()) - assert.NotEqual(t, -1, inPaths[0].GetBinlogs()[0].GetTimestampTo()) - }) - t.Run("Merge without expiration2", func(t *testing.T) { - mockbIO := io.NewBinlogIO(cm, getOrCreateIOPool()) - iData := genInsertDataWithExpiredTS() - iCodec := storage.NewInsertCodecWithSchema(meta) - var partId int64 = 0 - var segmentId int64 = 1 - blobs, err := iCodec.Serialize(partId, 0, iData) - assert.NoError(t, err) - paramtable.Get().Save(Params.CommonCfg.EntityExpirationTTL.Key, "0") - BinLogMaxSize := Params.DataNodeCfg.BinLogMaxSize.GetValue() - defer func() { - Params.Save(Params.DataNodeCfg.BinLogMaxSize.Key, BinLogMaxSize) - }() - paramtable.Get().Save(Params.DataNodeCfg.BinLogMaxSize.Key, "64") - meta := NewMetaFactory().GetCollectionMeta(1, "test", schemapb.DataType_Int64) - - var allPaths [][]string - inpath, err := uploadInsertLog(context.Background(), mockbIO, alloc, meta.GetID(), partId, segmentId, blobs) - assert.NoError(t, err) - assert.Equal(t, 12, len(inpath)) - binlogNum := len(inpath[0].GetBinlogs()) - assert.Equal(t, 1, binlogNum) - - for idx := 0; idx < binlogNum; idx++ { - var ps []string - for _, path := range inpath { - ps = append(ps, path.GetBinlogs()[idx].GetLogPath()) - } - allPaths = append(allPaths, ps) - } - - dm := map[interface{}]Timestamp{} - - ct := &compactionTask{ - metaCache: metaCache, - binlogIO: mockbIO, - Allocator: alloc, - done: make(chan struct{}, 1), - plan: &datapb.CompactionPlan{ - SegmentBinlogs: []*datapb.CompactionSegmentBinlogs{ - {SegmentID: 1}, - }, - }, - } - inPaths, statsPaths, numOfRow, err := ct.merge(context.Background(), allPaths, 2, 0, meta, dm) - assert.NoError(t, err) - assert.Equal(t, int64(2), numOfRow) - assert.Equal(t, 1, len(inPaths[0].GetBinlogs())) - assert.Equal(t, 1, len(statsPaths)) - assert.Equal(t, 1, len(statsPaths[0].GetBinlogs())) - assert.NotEqual(t, -1, inPaths[0].GetBinlogs()[0].GetTimestampFrom()) - assert.NotEqual(t, -1, inPaths[0].GetBinlogs()[0].GetTimestampTo()) - }) - // set Params.DataNodeCfg.BinLogMaxSize.Key = 1 to generate multi binlogs, each has only one row - t.Run("merge_with_more_than_100rows", func(t *testing.T) { - mockbIO := io.NewBinlogIO(cm, getOrCreateIOPool()) - iCodec := storage.NewInsertCodecWithSchema(meta) - paramtable.Get().Save(Params.CommonCfg.EntityExpirationTTL.Key, "0") - BinLogMaxSize := Params.DataNodeCfg.BinLogMaxSize.GetAsInt() - defer func() { - paramtable.Get().Save(Params.DataNodeCfg.BinLogMaxSize.Key, fmt.Sprintf("%d", BinLogMaxSize)) - }() - paramtable.Get().Save(Params.DataNodeCfg.BinLogMaxSize.Key, "1") - iData := genInsertData(101) - var partId int64 = 0 - var segmentId int64 = 1 - blobs, err := iCodec.Serialize(partId, segmentId, iData) - assert.NoError(t, err) - - var allPaths [][]string - inpath, err := uploadInsertLog(context.Background(), mockbIO, alloc, meta.GetID(), partId, segmentId, blobs) - assert.NoError(t, err) - assert.Equal(t, 12, len(inpath)) - binlogNum := len(inpath[0].GetBinlogs()) - assert.Equal(t, 1, binlogNum) - - for idx := 0; idx < binlogNum; idx++ { - var ps []string - for _, path := range inpath { - ps = append(ps, path.GetBinlogs()[idx].GetLogPath()) - } - allPaths = append(allPaths, ps) - } - - dm := map[interface{}]Timestamp{ - 1: 10000, - } - - ct := &compactionTask{ - metaCache: metaCache, - binlogIO: mockbIO, - Allocator: alloc, - done: make(chan struct{}, 1), - plan: &datapb.CompactionPlan{ - SegmentBinlogs: []*datapb.CompactionSegmentBinlogs{ - {SegmentID: 1}, - }, - }, - } - inPaths, statsPaths, numOfRow, err := ct.merge(context.Background(), allPaths, 2, 0, meta, dm) - assert.NoError(t, err) - assert.Equal(t, int64(101), numOfRow) - assert.Equal(t, 2, len(inPaths[0].GetBinlogs())) - assert.Equal(t, 1, len(statsPaths)) - for _, inpath := range inPaths { - assert.NotEqual(t, -1, inpath.GetBinlogs()[0].GetTimestampFrom()) - assert.NotEqual(t, -1, inpath.GetBinlogs()[0].GetTimestampTo()) - } - }) - - t.Run("Merge with expiration", func(t *testing.T) { - mockbIO := io.NewBinlogIO(cm, getOrCreateIOPool()) - iCodec := storage.NewInsertCodecWithSchema(meta) - iData := genInsertDataWithExpiredTS() - var partId int64 = 0 - var segmentId int64 = 1 - blobs, err := iCodec.Serialize(partId, 0, iData) - assert.NoError(t, err) - meta := NewMetaFactory().GetCollectionMeta(1, "test", schemapb.DataType_Int64) - - var allPaths [][]string - inpath, err := uploadInsertLog(context.Background(), mockbIO, alloc, meta.GetID(), partId, segmentId, blobs) - assert.NoError(t, err) - assert.Equal(t, 12, len(inpath)) - binlogNum := len(inpath[0].GetBinlogs()) - assert.Equal(t, 1, binlogNum) - - for idx := 0; idx < binlogNum; idx++ { - var ps []string - for _, path := range inpath { - ps = append(ps, path.GetBinlogs()[idx].GetLogPath()) - } - allPaths = append(allPaths, ps) - } - - dm := map[interface{}]Timestamp{ - 1: 10000, - } - - // 10 days in seconds - ct := &compactionTask{ - metaCache: metaCache, - binlogIO: mockbIO, - Allocator: alloc, - plan: &datapb.CompactionPlan{ - CollectionTtl: 864000, - SegmentBinlogs: []*datapb.CompactionSegmentBinlogs{ - {SegmentID: 1}, - }, - }, - done: make(chan struct{}, 1), - } - inPaths, statsPaths, numOfRow, err := ct.merge(context.Background(), allPaths, 2, 0, meta, dm) - assert.NoError(t, err) - assert.Equal(t, int64(0), numOfRow) - assert.Equal(t, 0, len(inPaths)) - assert.Equal(t, 0, len(statsPaths)) - }) - - t.Run("merge_with_rownum_zero", func(t *testing.T) { - mockbIO := io.NewBinlogIO(cm, getOrCreateIOPool()) - iData := genInsertDataWithExpiredTS() - iCodec := storage.NewInsertCodecWithSchema(meta) - var partId int64 = 0 - var segmentId int64 = 1 - blobs, err := iCodec.Serialize(partId, 0, iData) - assert.NoError(t, err) - meta := NewMetaFactory().GetCollectionMeta(1, "test", schemapb.DataType_Int64) - metaCache := metacache.NewMockMetaCache(t) - metaCache.EXPECT().Schema().Return(meta.GetSchema()).Maybe() - metaCache.EXPECT().GetSegmentByID(mock.Anything).RunAndReturn(func(id int64, filters ...metacache.SegmentFilter) (*metacache.SegmentInfo, bool) { - segment := metacache.NewSegmentInfo(&datapb.SegmentInfo{ - CollectionID: 1, - PartitionID: 0, - ID: id, - NumOfRows: 0, - }, nil) - return segment, true - }) - - var allPaths [][]string - inpath, err := uploadInsertLog(context.Background(), mockbIO, alloc, meta.GetID(), partId, segmentId, blobs) - assert.NoError(t, err) - assert.Equal(t, 12, len(inpath)) - binlogNum := len(inpath[0].GetBinlogs()) - assert.Equal(t, 1, binlogNum) - - for idx := 0; idx < binlogNum; idx++ { - var ps []string - for _, path := range inpath { - ps = append(ps, path.GetBinlogs()[idx].GetLogPath()) - } - allPaths = append(allPaths, ps) - } - - dm := map[interface{}]Timestamp{ - 1: 10000, - } - - ct := &compactionTask{ - metaCache: metaCache, - binlogIO: mockbIO, - Allocator: alloc, - done: make(chan struct{}, 1), - plan: &datapb.CompactionPlan{ - SegmentBinlogs: []*datapb.CompactionSegmentBinlogs{ - {SegmentID: 1}, - }, - }, - } - _, _, _, err = ct.merge(context.Background(), allPaths, 2, 0, &etcdpb.CollectionMeta{ - Schema: meta.GetSchema(), - }, dm) - assert.Error(t, err) - }) - - t.Run("Merge with meta error", func(t *testing.T) { - mockbIO := io.NewBinlogIO(cm, getOrCreateIOPool()) - iCodec := storage.NewInsertCodecWithSchema(meta) - paramtable.Get().Save(Params.CommonCfg.EntityExpirationTTL.Key, "0") - iData := genInsertDataWithExpiredTS() - var partId int64 = 0 - var segmentId int64 = 1 - blobs, err := iCodec.Serialize(partId, 0, iData) - assert.NoError(t, err) - meta := NewMetaFactory().GetCollectionMeta(1, "test", schemapb.DataType_Int64) - - var allPaths [][]string - inpath, err := uploadInsertLog(context.Background(), mockbIO, alloc, meta.GetID(), partId, segmentId, blobs) - assert.NoError(t, err) - assert.Equal(t, 12, len(inpath)) - binlogNum := len(inpath[0].GetBinlogs()) - assert.Equal(t, 1, binlogNum) - - for idx := 0; idx < binlogNum; idx++ { - var ps []string - for _, path := range inpath { - ps = append(ps, path.GetBinlogs()[idx].GetLogPath()) - } - allPaths = append(allPaths, ps) - } - - dm := map[interface{}]Timestamp{ - 1: 10000, - } - - ct := &compactionTask{ - metaCache: metaCache, - binlogIO: mockbIO, - Allocator: alloc, - done: make(chan struct{}, 1), - plan: &datapb.CompactionPlan{ - SegmentBinlogs: []*datapb.CompactionSegmentBinlogs{ - {SegmentID: 1}, - }, - }, - } - _, _, _, err = ct.merge(context.Background(), allPaths, 2, 0, &etcdpb.CollectionMeta{ - Schema: &schemapb.CollectionSchema{Fields: []*schemapb.FieldSchema{ - {DataType: schemapb.DataType_FloatVector, TypeParams: []*commonpb.KeyValuePair{ - {Key: common.DimKey, Value: "64"}, - }}, - }}, - }, dm) - assert.Error(t, err) - }) - - t.Run("Merge with meta type param error", func(t *testing.T) { - mockbIO := io.NewBinlogIO(cm, getOrCreateIOPool()) - iCodec := storage.NewInsertCodecWithSchema(meta) - paramtable.Get().Save(Params.CommonCfg.EntityExpirationTTL.Key, "0") - iData := genInsertDataWithExpiredTS() - var partId int64 = 0 - var segmentId int64 = 1 - blobs, err := iCodec.Serialize(partId, 0, iData) - assert.NoError(t, err) - meta := NewMetaFactory().GetCollectionMeta(1, "test", schemapb.DataType_Int64) - - var allPaths [][]string - inpath, err := uploadInsertLog(context.Background(), mockbIO, alloc, meta.GetID(), partId, segmentId, blobs) - assert.NoError(t, err) - assert.Equal(t, 12, len(inpath)) - binlogNum := len(inpath[0].GetBinlogs()) - assert.Equal(t, 1, binlogNum) - - for idx := 0; idx < binlogNum; idx++ { - var ps []string - for _, path := range inpath { - ps = append(ps, path.GetBinlogs()[idx].GetLogPath()) - } - allPaths = append(allPaths, ps) - } - - dm := map[interface{}]Timestamp{ - 1: 10000, - } - - ct := &compactionTask{ - metaCache: metaCache, - binlogIO: mockbIO, - Allocator: alloc, - done: make(chan struct{}, 1), - } - - _, _, _, err = ct.merge(context.Background(), allPaths, 2, 0, &etcdpb.CollectionMeta{ - Schema: &schemapb.CollectionSchema{Fields: []*schemapb.FieldSchema{ - {DataType: schemapb.DataType_FloatVector, TypeParams: []*commonpb.KeyValuePair{ - {Key: common.DimKey, Value: "bad_dim"}, - }}, - }}, - }, dm) - assert.Error(t, err) - }) - }) - t.Run("Test isExpiredEntity", func(t *testing.T) { - t.Run("When CompactionEntityExpiration is set math.MaxInt64", func(t *testing.T) { - ct := &compactionTask{ - plan: &datapb.CompactionPlan{ - CollectionTtl: math.MaxInt64, - }, - done: make(chan struct{}, 1), - } - - res := ct.isExpiredEntity(0, genTimestamp()) - assert.Equal(t, false, res) - - res = ct.isExpiredEntity(math.MaxInt64, genTimestamp()) - assert.Equal(t, false, res) - - res = ct.isExpiredEntity(0, math.MaxInt64) - assert.Equal(t, true, res) - - res = ct.isExpiredEntity(math.MaxInt64, math.MaxInt64) - assert.Equal(t, false, res) - - res = ct.isExpiredEntity(math.MaxInt64, 0) - assert.Equal(t, false, res) - }) - t.Run("When CompactionEntityExpiration is set MAX_ENTITY_EXPIRATION = 0", func(t *testing.T) { - // 0 means expiration is not enabled - ct := &compactionTask{ - plan: &datapb.CompactionPlan{ - CollectionTtl: 0, - }, - done: make(chan struct{}, 1), - } - res := ct.isExpiredEntity(0, genTimestamp()) - assert.Equal(t, false, res) - - res = ct.isExpiredEntity(math.MaxInt64, genTimestamp()) - assert.Equal(t, false, res) - - res = ct.isExpiredEntity(0, math.MaxInt64) - assert.Equal(t, false, res) - - res = ct.isExpiredEntity(math.MaxInt64, math.MaxInt64) - assert.Equal(t, false, res) - - res = ct.isExpiredEntity(math.MaxInt64, 0) - assert.Equal(t, false, res) - }) - t.Run("When CompactionEntityExpiration is set 10 days", func(t *testing.T) { - // 10 days in seconds - ct := &compactionTask{ - plan: &datapb.CompactionPlan{ - CollectionTtl: 864000, - }, - done: make(chan struct{}, 1), - } - res := ct.isExpiredEntity(0, genTimestamp()) - assert.Equal(t, true, res) - - res = ct.isExpiredEntity(math.MaxInt64, genTimestamp()) - assert.Equal(t, false, res) - - res = ct.isExpiredEntity(0, math.MaxInt64) - assert.Equal(t, true, res) - - res = ct.isExpiredEntity(math.MaxInt64, math.MaxInt64) - assert.Equal(t, false, res) - - res = ct.isExpiredEntity(math.MaxInt64, 0) - assert.Equal(t, false, res) - }) - }) - - t.Run("Test getNumRows error", func(t *testing.T) { - metaCache := metacache.NewMockMetaCache(t) - metaCache.EXPECT().GetSegmentByID(mock.Anything).Return(nil, false) - ct := &compactionTask{ - metaCache: metaCache, - plan: &datapb.CompactionPlan{ - SegmentBinlogs: []*datapb.CompactionSegmentBinlogs{ - { - SegmentID: 1, - }, - }, - }, - done: make(chan struct{}, 1), - } - - _, err := ct.getNumRows() - assert.Error(t, err, "segment not found") - }) -} - -func getInt64DeltaBlobs(segID UniqueID, pks []UniqueID, tss []Timestamp) ([]*Blob, error) { - primaryKeys := make([]storage.PrimaryKey, len(pks)) - for index, v := range pks { - primaryKeys[index] = storage.NewInt64PrimaryKey(v) - } - deltaData := &DeleteData{ - Pks: primaryKeys, - Tss: tss, - RowCount: int64(len(pks)), - } - - dCodec := storage.NewDeleteCodec() - blob, err := dCodec.Serialize(1, 10, segID, deltaData) - return []*Blob{blob}, err -} - -func TestCompactorInterfaceMethods(t *testing.T) { - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - cm := storage.NewLocalChunkManager(storage.RootPath(compactTestDir)) - defer cm.RemoveWithPrefix(ctx, cm.RootPath()) - notEmptySegmentBinlogs := []*datapb.CompactionSegmentBinlogs{{ - SegmentID: 100, - FieldBinlogs: nil, - Field2StatslogPaths: nil, - Deltalogs: nil, - }} - paramtable.Get().Save(Params.CommonCfg.EntityExpirationTTL.Key, "0") // Turn off auto expiration - t.Run("Test compact with all segment empty", func(t *testing.T) { - alloc := allocator.NewMockAllocator(t) - alloc.EXPECT().AllocOne().Call.Return(int64(11111), nil) - ctx, cancel := context.WithCancel(context.TODO()) - - mockSyncmgr := syncmgr.NewMockSyncManager(t) - mockSyncmgr.EXPECT().Block(mock.Anything).Return() - task := &compactionTask{ - ctx: ctx, - cancel: cancel, - Allocator: alloc, - done: make(chan struct{}, 1), - tr: timerecord.NewTimeRecorder("test"), - syncMgr: mockSyncmgr, - plan: &datapb.CompactionPlan{ - PlanID: 999, - SegmentBinlogs: []*datapb.CompactionSegmentBinlogs{{SegmentID: 100}}, - TimeoutInSeconds: 10, - Type: datapb.CompactionType_MixCompaction, - }, - } - - _, err := task.compact() - assert.ErrorIs(t, errIllegalCompactionPlan, err) - }) - - t.Run("Test compact invalid empty segment binlogs", func(t *testing.T) { - plan := &datapb.CompactionPlan{ - PlanID: 999, - SegmentBinlogs: nil, - TimeoutInSeconds: 10, - Type: datapb.CompactionType_MixCompaction, - } - ctx, cancel := context.WithCancel(context.Background()) - emptyTask := &compactionTask{ - ctx: ctx, - cancel: cancel, - tr: timerecord.NewTimeRecorder("test"), - - done: make(chan struct{}, 1), - plan: plan, - } - - _, err := emptyTask.compact() - assert.Error(t, err) - assert.ErrorIs(t, err, errIllegalCompactionPlan) - - emptyTask.complete() - emptyTask.stop() - }) - - t.Run("Test compact invalid AllocOnce failed", func(t *testing.T) { - mockAlloc := allocator.NewMockAllocator(t) - mockAlloc.EXPECT().AllocOne().Call.Return(int64(0), errors.New("mock allocone error")).Once() - plan := &datapb.CompactionPlan{ - PlanID: 999, - SegmentBinlogs: notEmptySegmentBinlogs, - TimeoutInSeconds: 10, - Type: datapb.CompactionType_MixCompaction, - } - task := &compactionTask{ - ctx: context.Background(), - tr: timerecord.NewTimeRecorder("test"), - Allocator: mockAlloc, - plan: plan, - } - - _, err := task.compact() - assert.Error(t, err) - }) - - t.Run("Test typeII compact valid", func(t *testing.T) { - alloc := allocator.NewMockAllocator(t) - alloc.EXPECT().GetGenerator(mock.Anything, mock.Anything).Call.Return(validGeneratorFn, nil) - alloc.EXPECT().AllocOne().Call.Return(int64(19530), nil) - type testCase struct { - pkType schemapb.DataType - iData1 storage.FieldData - iData2 storage.FieldData - pks1 [2]storage.PrimaryKey - pks2 [2]storage.PrimaryKey - colID UniqueID - parID UniqueID - segID1 UniqueID - segID2 UniqueID - } - cases := []testCase{ - { - pkType: schemapb.DataType_Int64, - iData1: &storage.Int64FieldData{Data: []UniqueID{1}}, - iData2: &storage.Int64FieldData{Data: []UniqueID{9}}, - pks1: [2]storage.PrimaryKey{storage.NewInt64PrimaryKey(1), storage.NewInt64PrimaryKey(2)}, - pks2: [2]storage.PrimaryKey{storage.NewInt64PrimaryKey(9), storage.NewInt64PrimaryKey(10)}, - colID: 1, - parID: 10, - segID1: 100, - segID2: 101, - }, - { - pkType: schemapb.DataType_VarChar, - iData1: &storage.StringFieldData{Data: []string{"aaaa"}}, - iData2: &storage.StringFieldData{Data: []string{"milvus"}}, - pks1: [2]storage.PrimaryKey{storage.NewVarCharPrimaryKey("aaaa"), storage.NewVarCharPrimaryKey("bbbb")}, - pks2: [2]storage.PrimaryKey{storage.NewVarCharPrimaryKey("milvus"), storage.NewVarCharPrimaryKey("mmmm")}, - colID: 2, - parID: 11, - segID1: 102, - segID2: 103, - }, - } - - for _, c := range cases { - collName := "test_compact_coll_name" - meta := NewMetaFactory().GetCollectionMeta(c.colID, collName, c.pkType) - - mockbIO := io.NewBinlogIO(cm, getOrCreateIOPool()) - iCodec := storage.NewInsertCodecWithSchema(meta) - mockKv := memkv.NewMemoryKV() - metaCache := metacache.NewMockMetaCache(t) - metaCache.EXPECT().Collection().Return(c.colID) - metaCache.EXPECT().Schema().Return(meta.GetSchema()) - syncMgr := syncmgr.NewMockSyncManager(t) - syncMgr.EXPECT().Block(mock.Anything).Return() - - bfs := metacache.NewBloomFilterSet() - bfs.UpdatePKRange(c.iData1) - seg1 := metacache.NewSegmentInfo(&datapb.SegmentInfo{ - CollectionID: c.colID, - PartitionID: c.parID, - ID: c.segID1, - NumOfRows: 2, - }, bfs) - bfs = metacache.NewBloomFilterSet() - bfs.UpdatePKRange(c.iData2) - seg2 := metacache.NewSegmentInfo(&datapb.SegmentInfo{ - CollectionID: c.colID, - PartitionID: c.parID, - ID: c.segID2, - NumOfRows: 2, - }, bfs) - - bfs = metacache.NewBloomFilterSet() - seg3 := metacache.NewSegmentInfo(&datapb.SegmentInfo{ - CollectionID: c.colID, - PartitionID: c.parID, - ID: 99999, - }, bfs) - - metaCache.EXPECT().GetSegmentByID(c.segID1).Return(seg1, true) - metaCache.EXPECT().GetSegmentByID(c.segID2).Return(seg2, true) - metaCache.EXPECT().GetSegmentByID(seg3.SegmentID()).Return(seg3, true) - metaCache.EXPECT().GetSegmentByID(mock.Anything).Return(nil, false) - - iData1 := genInsertDataWithPKs(c.pks1, c.pkType) - iblobs1, err := iCodec.Serialize(c.parID, 0, iData1) - assert.NoError(t, err) - dData1 := &DeleteData{ - Pks: []storage.PrimaryKey{c.pks1[0]}, - Tss: []Timestamp{20000}, - RowCount: 1, - } - iData2 := genInsertDataWithPKs(c.pks2, c.pkType) - iblobs2, err := iCodec.Serialize(c.parID, 3, iData2) - assert.NoError(t, err) - dData2 := &DeleteData{ - Pks: []storage.PrimaryKey{c.pks2[0]}, - Tss: []Timestamp{30000}, - RowCount: 1, - } - - stats1, err := storage.NewPrimaryKeyStats(1, int64(c.pkType), 1) - require.NoError(t, err) - iPaths1, err := uploadInsertLog(context.Background(), mockbIO, alloc, meta.GetID(), c.parID, c.segID1, iblobs1) - require.NoError(t, err) - sPaths1, err := uploadStatsLog(context.Background(), mockbIO, alloc, meta.GetID(), c.parID, c.segID1, stats1, 2, iCodec) - require.NoError(t, err) - dPaths1, err := uploadDeltaLog(context.TODO(), mockbIO, alloc, meta.GetID(), c.parID, c.segID1, dData1) - require.NoError(t, err) - require.Equal(t, 12, len(iPaths1)) - - stats2, err := storage.NewPrimaryKeyStats(1, int64(c.pkType), 1) - require.NoError(t, err) - iPaths2, err := uploadInsertLog(context.Background(), mockbIO, alloc, meta.GetID(), c.parID, c.segID2, iblobs2) - require.NoError(t, err) - sPaths2, err := uploadStatsLog(context.Background(), mockbIO, alloc, meta.GetID(), c.parID, c.segID2, stats2, 2, iCodec) - require.NoError(t, err) - dPaths2, err := uploadDeltaLog(context.TODO(), mockbIO, alloc, meta.GetID(), c.parID, c.segID2, dData2) - require.NoError(t, err) - require.Equal(t, 12, len(iPaths2)) - - plan := &datapb.CompactionPlan{ - PlanID: 10080, - SegmentBinlogs: []*datapb.CompactionSegmentBinlogs{ - { - SegmentID: c.segID1, - FieldBinlogs: lo.Values(iPaths1), - Field2StatslogPaths: lo.Values(sPaths1), - Deltalogs: dPaths1, - }, - { - SegmentID: c.segID2, - FieldBinlogs: lo.Values(iPaths2), - Field2StatslogPaths: lo.Values(sPaths2), - Deltalogs: dPaths2, - }, - { - SegmentID: seg3.SegmentID(), // empty segment - }, - }, - StartTime: 0, - TimeoutInSeconds: 10, - Type: datapb.CompactionType_MergeCompaction, - Channel: "channelname", - } - - task := newCompactionTask(context.TODO(), mockbIO, metaCache, syncMgr, alloc, plan) - result, err := task.compact() - assert.NoError(t, err) - assert.NotNil(t, result) - - assert.Equal(t, plan.GetPlanID(), result.GetPlanID()) - assert.Equal(t, 1, len(result.GetSegments())) - - segment := result.GetSegments()[0] - assert.EqualValues(t, 19530, segment.GetSegmentID()) - assert.EqualValues(t, 2, segment.GetNumOfRows()) - assert.NotEmpty(t, segment.InsertLogs) - assert.NotEmpty(t, segment.Field2StatslogPaths) - - // New test, remove all the binlogs in memkv - err = mockKv.RemoveWithPrefix("/") - require.NoError(t, err) - plan.PlanID++ - - result, err = task.compact() - assert.NoError(t, err) - assert.NotNil(t, result) - - assert.Equal(t, plan.GetPlanID(), result.GetPlanID()) - assert.Equal(t, 1, len(result.GetSegments())) - - segment = result.GetSegments()[0] - assert.EqualValues(t, 19530, segment.GetSegmentID()) - assert.EqualValues(t, 2, segment.GetNumOfRows()) - assert.NotEmpty(t, segment.InsertLogs) - assert.NotEmpty(t, segment.Field2StatslogPaths) - } - }) - - t.Run("Test typeII compact 2 segments with the same pk", func(t *testing.T) { - // Test merge compactions, two segments with the same pk, one deletion pk=1 - // The merged segment 19530 should only contain 2 rows and both pk=2 - // Both pk = 1 rows of the two segments are compacted. - var collID, partID, segID1, segID2 UniqueID = 1, 10, 200, 201 - - alloc := allocator.NewMockAllocator(t) - alloc.EXPECT().AllocOne().Call.Return(int64(19530), nil) - alloc.EXPECT().GetGenerator(mock.Anything, mock.Anything).Call.Return(validGeneratorFn, nil) - - meta := NewMetaFactory().GetCollectionMeta(collID, "test_compact_coll_name", schemapb.DataType_Int64) - - mockbIO := io.NewBinlogIO(cm, getOrCreateIOPool()) - iCodec := storage.NewInsertCodecWithSchema(meta) - - metaCache := metacache.NewMockMetaCache(t) - metaCache.EXPECT().Collection().Return(collID) - metaCache.EXPECT().Schema().Return(meta.GetSchema()) - syncMgr := syncmgr.NewMockSyncManager(t) - syncMgr.EXPECT().Block(mock.Anything).Return() - - bfs := metacache.NewBloomFilterSet() - bfs.UpdatePKRange(&storage.Int64FieldData{Data: []UniqueID{1}}) - seg1 := metacache.NewSegmentInfo(&datapb.SegmentInfo{ - CollectionID: collID, - PartitionID: partID, - ID: segID1, - NumOfRows: 2, - }, bfs) - bfs = metacache.NewBloomFilterSet() - bfs.UpdatePKRange(&storage.Int64FieldData{Data: []UniqueID{1}}) - seg2 := metacache.NewSegmentInfo(&datapb.SegmentInfo{ - CollectionID: collID, - PartitionID: partID, - ID: segID2, - NumOfRows: 2, - }, bfs) - - metaCache.EXPECT().GetSegmentByID(mock.Anything).RunAndReturn(func(id int64, filters ...metacache.SegmentFilter) (*metacache.SegmentInfo, bool) { - switch id { - case segID1: - return seg1, true - case segID2: - return seg2, true - default: - return nil, false - } - }) - - // the same pk for segmentI and segmentII - pks := [2]storage.PrimaryKey{storage.NewInt64PrimaryKey(1), storage.NewInt64PrimaryKey(2)} - iData1 := genInsertDataWithPKs(pks, schemapb.DataType_Int64) - iblobs1, err := iCodec.Serialize(partID, 0, iData1) - assert.NoError(t, err) - iData2 := genInsertDataWithPKs(pks, schemapb.DataType_Int64) - iblobs2, err := iCodec.Serialize(partID, 1, iData2) - assert.NoError(t, err) - - pk1 := storage.NewInt64PrimaryKey(1) - dData1 := &DeleteData{ - Pks: []storage.PrimaryKey{pk1}, - Tss: []Timestamp{20000}, - RowCount: 1, - } - // empty dData2 - dData2 := &DeleteData{ - Pks: []storage.PrimaryKey{}, - Tss: []Timestamp{}, - RowCount: 0, - } - - stats1, err := storage.NewPrimaryKeyStats(1, int64(schemapb.DataType_Int64), 1) - require.NoError(t, err) - iPaths1, err := uploadInsertLog(context.Background(), mockbIO, alloc, meta.GetID(), partID, segID1, iblobs1) - require.NoError(t, err) - sPaths1, err := uploadStatsLog(context.Background(), mockbIO, alloc, meta.GetID(), partID, segID1, stats1, 1, iCodec) - require.NoError(t, err) - dPaths1, err := uploadDeltaLog(context.TODO(), mockbIO, alloc, meta.GetID(), partID, segID1, dData1) - require.NoError(t, err) - require.Equal(t, 12, len(iPaths1)) - - stats2, err := storage.NewPrimaryKeyStats(1, int64(schemapb.DataType_Int64), 1) - require.NoError(t, err) - iPaths2, err := uploadInsertLog(context.Background(), mockbIO, alloc, meta.GetID(), partID, segID2, iblobs2) - require.NoError(t, err) - sPaths2, err := uploadStatsLog(context.Background(), mockbIO, alloc, meta.GetID(), partID, segID2, stats2, 1, iCodec) - require.NoError(t, err) - dPaths2, err := uploadDeltaLog(context.TODO(), mockbIO, alloc, meta.GetID(), partID, segID2, dData2) - require.NoError(t, err) - require.Equal(t, 12, len(iPaths2)) - - plan := &datapb.CompactionPlan{ - PlanID: 20080, - SegmentBinlogs: []*datapb.CompactionSegmentBinlogs{ - { - SegmentID: segID1, - FieldBinlogs: lo.Values(iPaths1), - Field2StatslogPaths: lo.Values(sPaths1), - Deltalogs: dPaths1, - }, - { - SegmentID: segID2, - FieldBinlogs: lo.Values(iPaths2), - Field2StatslogPaths: lo.Values(sPaths2), - Deltalogs: dPaths2, - }, - }, - StartTime: 0, - TimeoutInSeconds: 10, - Type: datapb.CompactionType_MergeCompaction, - Channel: "channelname", - } - - task := newCompactionTask(context.TODO(), mockbIO, metaCache, syncMgr, alloc, plan) - result, err := task.compact() - assert.NoError(t, err) - assert.NotNil(t, result) - - assert.Equal(t, plan.GetPlanID(), result.GetPlanID()) - assert.Equal(t, 1, len(result.GetSegments())) - - segment := result.GetSegments()[0] - assert.EqualValues(t, 19530, segment.GetSegmentID()) - assert.EqualValues(t, 2, segment.GetNumOfRows()) - assert.NotEmpty(t, segment.InsertLogs) - assert.NotEmpty(t, segment.Field2StatslogPaths) - }) -} - -func TestInjectDone(t *testing.T) { - syncMgr := syncmgr.NewMockSyncManager(t) - - segmentIDs := []int64{100, 200, 300} - task := &compactionTask{ - plan: &datapb.CompactionPlan{ - SegmentBinlogs: lo.Map(segmentIDs, func(id int64, _ int) *datapb.CompactionSegmentBinlogs { - return &datapb.CompactionSegmentBinlogs{SegmentID: id} - }), - }, - syncMgr: syncMgr, - } - - for _, segmentID := range segmentIDs { - syncMgr.EXPECT().Unblock(segmentID).Return().Once() - } - - task.injectDone() - task.injectDone() -} - -func BenchmarkCompaction(b *testing.B) { - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - cm := storage.NewLocalChunkManager(storage.RootPath(compactTestDir)) - defer cm.RemoveWithPrefix(ctx, cm.RootPath()) - - collectionID := int64(1) - meta := NewMetaFactory().GetCollectionMeta(collectionID, "test", schemapb.DataType_Int64) - mockbIO := io.NewBinlogIO(cm, getOrCreateIOPool()) - paramtable.Get().Save(Params.CommonCfg.EntityExpirationTTL.Key, "0") - iData := genInsertDataWithExpiredTS() - iCodec := storage.NewInsertCodecWithSchema(meta) - var partId int64 = 0 - var segmentId int64 = 1 - blobs, err := iCodec.Serialize(partId, 0, iData) - assert.NoError(b, err) - var allPaths [][]string - alloc := allocator.NewMockAllocator(b) - alloc.EXPECT().GetGenerator(mock.Anything, mock.Anything).Call.Return(validGeneratorFn, nil) - alloc.EXPECT().AllocOne().Call.Return(int64(19530), nil) - inpath, err := uploadInsertLog(context.Background(), mockbIO, alloc, meta.GetID(), partId, segmentId, blobs) - assert.NoError(b, err) - assert.Equal(b, 12, len(inpath)) - binlogNum := len(inpath[0].GetBinlogs()) - assert.Equal(b, 1, binlogNum) - - for idx := 0; idx < binlogNum; idx++ { - var ps []string - for _, path := range inpath { - ps = append(ps, path.GetBinlogs()[idx].GetLogPath()) - } - allPaths = append(allPaths, ps) - } - - dm := map[interface{}]Timestamp{ - 1: 10000, - } - - metaCache := metacache.NewMockMetaCache(b) - metaCache.EXPECT().Schema().Return(meta.GetSchema()).Maybe() - metaCache.EXPECT().GetSegmentByID(mock.Anything).RunAndReturn(func(id int64, filters ...metacache.SegmentFilter) (*metacache.SegmentInfo, bool) { - segment := metacache.NewSegmentInfo(&datapb.SegmentInfo{ - CollectionID: 1, - PartitionID: 0, - ID: id, - NumOfRows: 10, - }, nil) - return segment, true - }) - - ct := &compactionTask{ - metaCache: metaCache, - binlogIO: mockbIO, - Allocator: alloc, - done: make(chan struct{}, 1), - plan: &datapb.CompactionPlan{ - SegmentBinlogs: []*datapb.CompactionSegmentBinlogs{ - {SegmentID: 1}, - }, - }, - } - - b.ResetTimer() - - for i := 0; i < b.N; i++ { - inPaths, statsPaths, numOfRow, err := ct.merge(context.Background(), allPaths, 2, 0, meta, dm) - assert.NoError(b, err) - assert.Equal(b, int64(2), numOfRow) - assert.Equal(b, 1, len(inPaths[0].GetBinlogs())) - assert.Equal(b, 1, len(statsPaths)) - assert.NotEqual(b, -1, inPaths[0].GetBinlogs()[0].GetTimestampFrom()) - assert.NotEqual(b, -1, inPaths[0].GetBinlogs()[0].GetTimestampTo()) - } -} diff --git a/internal/datanode/data_node.go b/internal/datanode/data_node.go index 628ab9198e7a3..135f501fe746c 100644 --- a/internal/datanode/data_node.go +++ b/internal/datanode/data_node.go @@ -48,6 +48,7 @@ import ( "github.com/milvus-io/milvus/pkg/log" "github.com/milvus-io/milvus/pkg/metrics" "github.com/milvus-io/milvus/pkg/mq/msgdispatcher" + "github.com/milvus-io/milvus/pkg/util/conc" "github.com/milvus-io/milvus/pkg/util/expr" "github.com/milvus-io/milvus/pkg/util/logutil" "github.com/milvus-io/milvus/pkg/util/metricsinfo" @@ -125,6 +126,7 @@ type DataNode struct { factory dependency.Factory reportImportRetryTimes uint // unitest set this value to 1 to save time, default is 10 + pool *conc.Pool[any] } // NewDataNode will return a DataNode with abnormal state. @@ -288,7 +290,7 @@ func (node *DataNode) Init() error { node.writeBufferManager = writebuffer.NewManager(syncMgr) node.importTaskMgr = importv2.NewTaskManager() - node.importScheduler = importv2.NewScheduler(node.importTaskMgr, node.syncMgr, node.chunkManager) + node.importScheduler = importv2.NewScheduler(node.importTaskMgr) node.channelCheckpointUpdater = newChannelCheckpointUpdater(node.broker) node.flowgraphManager = newFlowgraphManager() @@ -297,6 +299,7 @@ func (node *DataNode) Init() error { } else { node.eventManager = NewEventManager() } + node.pool = getOrCreateIOPool() log.Info("init datanode done", zap.String("Address", node.address)) }) diff --git a/internal/datanode/data_sync_service.go b/internal/datanode/data_sync_service.go index ca744d239f1bf..1a3ff514bb5c6 100644 --- a/internal/datanode/data_sync_service.go +++ b/internal/datanode/data_sync_service.go @@ -23,6 +23,7 @@ import ( "sync" "time" + "go.opentelemetry.io/otel" "go.uber.org/zap" "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" @@ -250,8 +251,11 @@ func loadStatsV2(storageCache *metacache.StorageV2Cache, segment *datapb.Segment } func loadStats(ctx context.Context, chunkManager storage.ChunkManager, schema *schemapb.CollectionSchema, segmentID int64, statsBinlogs []*datapb.FieldBinlog) ([]*storage.PkStatistics, error) { + _, span := otel.Tracer(typeutil.DataNodeRole).Start(ctx, "loadStats") + defer span.End() + startTs := time.Now() - log := log.With(zap.Int64("segmentID", segmentID)) + log := log.Ctx(ctx).With(zap.Int64("segmentID", segmentID)) log.Info("begin to init pk bloom filter", zap.Int("statsBinLogsLen", len(statsBinlogs))) pkField, err := typeutil.GetPrimaryFieldSchema(schema) diff --git a/internal/datanode/data_sync_service_test.go b/internal/datanode/data_sync_service_test.go index c73e3257dd73e..3847f3eebcbd3 100644 --- a/internal/datanode/data_sync_service_test.go +++ b/internal/datanode/data_sync_service_test.go @@ -433,6 +433,7 @@ func (s *DataSyncServiceSuite) TestStartStop() { CollectionID: collMeta.ID, PartitionID: 1, InsertChannel: insertChannelName, + State: commonpb.SegmentState_Flushed, }, 1: { @@ -440,6 +441,7 @@ func (s *DataSyncServiceSuite) TestStartStop() { CollectionID: collMeta.ID, PartitionID: 1, InsertChannel: insertChannelName, + State: commonpb.SegmentState_Flushed, }, } return lo.FilterMap(segmentIDs, func(id int64, _ int) (*datapb.SegmentInfo, bool) { diff --git a/internal/datanode/flow_graph_dd_node.go b/internal/datanode/flow_graph_dd_node.go index ea7d2e815db31..db78853f95074 100644 --- a/internal/datanode/flow_graph_dd_node.go +++ b/internal/datanode/flow_graph_dd_node.go @@ -91,10 +91,9 @@ func (ddn *ddNode) IsValidInMsg(in []Msg) bool { // Operate handles input messages, implementing flowgrpah.Node func (ddn *ddNode) Operate(in []Msg) []Msg { - log := log.With(zap.String("channel", ddn.vChannelName)) msMsg, ok := in[0].(*MsgStreamMsg) if !ok { - log.Warn("type assertion failed for MsgStreamMsg", zap.String("name", reflect.TypeOf(in[0]).Name())) + log.Warn("type assertion failed for MsgStreamMsg", zap.String("channel", ddn.vChannelName), zap.String("name", reflect.TypeOf(in[0]).Name())) return []Msg{} } @@ -110,12 +109,12 @@ func (ddn *ddNode) Operate(in []Msg) []Msg { endPositions: msMsg.EndPositions(), dropCollection: false, } - log.Warn("MsgStream closed", zap.Any("ddNode node", ddn.Name()), zap.Int64("collection", ddn.collectionID)) + log.Warn("MsgStream closed", zap.Any("ddNode node", ddn.Name()), zap.String("channel", ddn.vChannelName), zap.Int64("collection", ddn.collectionID)) return []Msg{&fgMsg} } if load := ddn.dropMode.Load(); load != nil && load.(bool) { - log.RatedInfo(1.0, "ddNode in dropMode") + log.RatedInfo(1.0, "ddNode in dropMode", zap.String("channel", ddn.vChannelName)) return []Msg{} } @@ -146,10 +145,10 @@ func (ddn *ddNode) Operate(in []Msg) []Msg { switch msg.Type() { case commonpb.MsgType_DropCollection: if msg.(*msgstream.DropCollectionMsg).GetCollectionID() == ddn.collectionID { - log.Info("Receiving DropCollection msg") + log.Info("Receiving DropCollection msg", zap.String("channel", ddn.vChannelName)) ddn.dropMode.Store(true) - log.Info("Stop compaction for dropped channel") + log.Info("Stop compaction for dropped channel", zap.String("channel", ddn.vChannelName)) ddn.compactionExecutor.discardByDroppedChannel(ddn.vChannelName) fgMsg.dropCollection = true } @@ -157,7 +156,7 @@ func (ddn *ddNode) Operate(in []Msg) []Msg { case commonpb.MsgType_DropPartition: dpMsg := msg.(*msgstream.DropPartitionMsg) if dpMsg.GetCollectionID() == ddn.collectionID { - log.Info("drop partition msg received", zap.Int64("partitionID", dpMsg.GetPartitionID())) + log.Info("drop partition msg received", zap.String("channel", ddn.vChannelName), zap.Int64("partitionID", dpMsg.GetPartitionID())) fgMsg.dropPartitions = append(fgMsg.dropPartitions, dpMsg.PartitionID) } @@ -166,6 +165,7 @@ func (ddn *ddNode) Operate(in []Msg) []Msg { if imsg.CollectionID != ddn.collectionID { log.Warn("filter invalid insert message, collection mis-match", zap.Int64("Get collID", imsg.CollectionID), + zap.String("channel", ddn.vChannelName), zap.Int64("Expected collID", ddn.collectionID)) continue } @@ -173,6 +173,7 @@ func (ddn *ddNode) Operate(in []Msg) []Msg { if ddn.tryToFilterSegmentInsertMessages(imsg) { log.Debug("filter insert messages", zap.Int64("filter segmentID", imsg.GetSegmentID()), + zap.String("channel", ddn.vChannelName), zap.Uint64("message timestamp", msg.EndTs()), ) continue @@ -194,6 +195,7 @@ func (ddn *ddNode) Operate(in []Msg) []Msg { log.Debug("DDNode receive insert messages", zap.Int64("segmentID", imsg.GetSegmentID()), + zap.String("channel", ddn.vChannelName), zap.Int("numRows", len(imsg.GetRowIDs()))) fgMsg.insertMessages = append(fgMsg.insertMessages, imsg) @@ -203,11 +205,12 @@ func (ddn *ddNode) Operate(in []Msg) []Msg { if dmsg.CollectionID != ddn.collectionID { log.Warn("filter invalid DeleteMsg, collection mis-match", zap.Int64("Get collID", dmsg.CollectionID), + zap.String("channel", ddn.vChannelName), zap.Int64("Expected collID", ddn.collectionID)) continue } - log.Debug("DDNode receive delete messages", zap.Int64("numRows", dmsg.NumRows)) + log.Debug("DDNode receive delete messages", zap.String("channel", ddn.vChannelName), zap.Int64("numRows", dmsg.NumRows)) rateCol.Add(metricsinfo.DeleteConsumeThroughput, float64(proto.Size(&dmsg.DeleteRequest))) metrics.DataNodeConsumeBytesCount. diff --git a/internal/datanode/flow_graph_dmstream_input_node_test.go b/internal/datanode/flow_graph_dmstream_input_node_test.go index 75df57af0b49c..ae804fe19e5d2 100644 --- a/internal/datanode/flow_graph_dmstream_input_node_test.go +++ b/internal/datanode/flow_graph_dmstream_input_node_test.go @@ -91,7 +91,7 @@ func (mtm *mockTtMsgStream) Broadcast(*msgstream.MsgPack) (map[string][]msgstrea return nil, nil } -func (mtm *mockTtMsgStream) Seek(ctx context.Context, offset []*msgpb.MsgPosition) error { +func (mtm *mockTtMsgStream) Seek(ctx context.Context, msgPositions []*msgstream.MsgPosition, includeCurrentMsg bool) error { return nil } diff --git a/internal/datanode/importv2/pool.go b/internal/datanode/importv2/pool.go new file mode 100644 index 0000000000000..3558477773f1f --- /dev/null +++ b/internal/datanode/importv2/pool.go @@ -0,0 +1,41 @@ +// Licensed to the LF AI & Data foundation under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package importv2 + +import ( + "sync" + + "github.com/milvus-io/milvus/pkg/util/conc" + "github.com/milvus-io/milvus/pkg/util/paramtable" +) + +var ( + execPool *conc.Pool[any] + execPoolInitOnce sync.Once +) + +func initExecPool() { + execPool = conc.NewPool[any]( + paramtable.Get().DataNodeCfg.MaxConcurrentImportTaskNum.GetAsInt(), + conc.WithPreAlloc(true), + ) +} + +func GetExecPool() *conc.Pool[any] { + execPoolInitOnce.Do(initExecPool) + return execPool +} diff --git a/internal/datanode/importv2/scheduler.go b/internal/datanode/importv2/scheduler.go index 37884d87d863a..d1d58e8df0655 100644 --- a/internal/datanode/importv2/scheduler.go +++ b/internal/datanode/importv2/scheduler.go @@ -17,20 +17,13 @@ package importv2 import ( - "fmt" - "io" "sync" "time" - "github.com/cockroachdb/errors" "github.com/samber/lo" "go.uber.org/zap" - "github.com/milvus-io/milvus/internal/datanode/syncmgr" "github.com/milvus-io/milvus/internal/proto/datapb" - "github.com/milvus-io/milvus/internal/proto/internalpb" - "github.com/milvus-io/milvus/internal/storage" - "github.com/milvus-io/milvus/internal/util/importutilv2" "github.com/milvus-io/milvus/pkg/log" "github.com/milvus-io/milvus/pkg/util/conc" "github.com/milvus-io/milvus/pkg/util/paramtable" @@ -44,25 +37,14 @@ type Scheduler interface { type scheduler struct { manager TaskManager - syncMgr syncmgr.SyncManager - cm storage.ChunkManager - - pool *conc.Pool[any] closeOnce sync.Once closeChan chan struct{} } -func NewScheduler(manager TaskManager, syncMgr syncmgr.SyncManager, cm storage.ChunkManager) Scheduler { - pool := conc.NewPool[any]( - paramtable.Get().DataNodeCfg.MaxConcurrentImportTaskNum.GetAsInt(), - conc.WithPreAlloc(true), - ) +func NewScheduler(manager TaskManager) Scheduler { return &scheduler{ manager: manager, - syncMgr: syncMgr, - cm: cm, - pool: pool, closeChan: make(chan struct{}), } } @@ -84,16 +66,9 @@ func (s *scheduler) Start() { tasks := s.manager.GetBy(WithStates(datapb.ImportTaskStateV2_Pending)) futures := make(map[int64][]*conc.Future[any]) for _, task := range tasks { - switch task.GetType() { - case PreImportTaskType: - fs := s.PreImport(task) - futures[task.GetTaskID()] = fs - tryFreeFutures(futures) - case ImportTaskType: - fs := s.Import(task) - futures[task.GetTaskID()] = fs - tryFreeFutures(futures) - } + fs := task.Execute() + futures[task.GetTaskID()] = fs + tryFreeFutures(futures) } for taskID, fs := range futures { err := conc.AwaitAll(fs...) @@ -120,17 +95,6 @@ func (s *scheduler) Close() { }) } -func WrapLogFields(task Task, fields ...zap.Field) []zap.Field { - res := []zap.Field{ - zap.Int64("taskID", task.GetTaskID()), - zap.Int64("jobID", task.GetJobID()), - zap.Int64("collectionID", task.GetCollectionID()), - zap.String("type", task.GetType().String()), - } - res = append(res, fields...) - return res -} - func tryFreeFutures(futures map[int64][]*conc.Future[any]) { for k, fs := range futures { fs = lo.Filter(fs, func(f *conc.Future[any], _ int) bool { @@ -143,207 +107,3 @@ func tryFreeFutures(futures map[int64][]*conc.Future[any]) { futures[k] = fs } } - -func (s *scheduler) handleErr(task Task, err error, msg string) { - log.Warn(msg, WrapLogFields(task, zap.Error(err))...) - s.manager.Update(task.GetTaskID(), UpdateState(datapb.ImportTaskStateV2_Failed), UpdateReason(err.Error())) -} - -func (s *scheduler) PreImport(task Task) []*conc.Future[any] { - bufferSize := paramtable.Get().DataNodeCfg.ReadBufferSizeInMB.GetAsInt() * 1024 * 1024 - log.Info("start to preimport", WrapLogFields(task, - zap.Int("bufferSize", bufferSize), - zap.Any("schema", task.GetSchema()))...) - s.manager.Update(task.GetTaskID(), UpdateState(datapb.ImportTaskStateV2_InProgress)) - files := lo.Map(task.(*PreImportTask).GetFileStats(), - func(fileStat *datapb.ImportFileStats, _ int) *internalpb.ImportFile { - return fileStat.GetImportFile() - }) - - fn := func(i int, file *internalpb.ImportFile) error { - reader, err := importutilv2.NewReader(task.GetCtx(), s.cm, task.GetSchema(), file, task.GetOptions(), bufferSize) - if err != nil { - s.handleErr(task, err, "new reader failed") - return err - } - defer reader.Close() - start := time.Now() - err = s.readFileStat(reader, task, i) - if err != nil { - s.handleErr(task, err, "preimport failed") - return err - } - log.Info("read file stat done", WrapLogFields(task, zap.Strings("files", file.GetPaths()), - zap.Duration("dur", time.Since(start)))...) - return nil - } - - futures := make([]*conc.Future[any], 0, len(files)) - for i, file := range files { - i := i - file := file - f := s.pool.Submit(func() (any, error) { - err := fn(i, file) - return err, err - }) - futures = append(futures, f) - } - return futures -} - -func (s *scheduler) readFileStat(reader importutilv2.Reader, task Task, fileIdx int) error { - fileSize, err := reader.Size() - if err != nil { - return err - } - maxSize := paramtable.Get().DataNodeCfg.MaxImportFileSizeInGB.GetAsFloat() * 1024 * 1024 * 1024 - if fileSize > int64(maxSize) { - return errors.New(fmt.Sprintf( - "The import file size has reached the maximum limit allowed for importing, "+ - "fileSize=%d, maxSize=%d", fileSize, int64(maxSize))) - } - - totalRows := 0 - totalSize := 0 - hashedStats := make(map[string]*datapb.PartitionImportStats) - for { - data, err := reader.Read() - if err != nil { - if errors.Is(err, io.EOF) { - break - } - return err - } - err = CheckRowsEqual(task.GetSchema(), data) - if err != nil { - return err - } - rowsCount, err := GetRowsStats(task, data) - if err != nil { - return err - } - MergeHashedStats(rowsCount, hashedStats) - rows := data.GetRowNum() - size := data.GetMemorySize() - totalRows += rows - totalSize += size - log.Info("reading file stat...", WrapLogFields(task, zap.Int("readRows", rows), zap.Int("readSize", size))...) - } - - stat := &datapb.ImportFileStats{ - FileSize: fileSize, - TotalRows: int64(totalRows), - TotalMemorySize: int64(totalSize), - HashedStats: hashedStats, - } - s.manager.Update(task.GetTaskID(), UpdateFileStat(fileIdx, stat)) - return nil -} - -func (s *scheduler) Import(task Task) []*conc.Future[any] { - bufferSize := paramtable.Get().DataNodeCfg.ReadBufferSizeInMB.GetAsInt() * 1024 * 1024 - log.Info("start to import", WrapLogFields(task, - zap.Int("bufferSize", bufferSize), - zap.Any("schema", task.GetSchema()))...) - s.manager.Update(task.GetTaskID(), UpdateState(datapb.ImportTaskStateV2_InProgress)) - - req := task.(*ImportTask).req - - fn := func(file *internalpb.ImportFile) error { - reader, err := importutilv2.NewReader(task.GetCtx(), s.cm, task.GetSchema(), file, task.GetOptions(), bufferSize) - if err != nil { - s.handleErr(task, err, fmt.Sprintf("new reader failed, file: %s", file.String())) - return err - } - defer reader.Close() - start := time.Now() - err = s.importFile(reader, task) - if err != nil { - s.handleErr(task, err, fmt.Sprintf("do import failed, file: %s", file.String())) - return err - } - log.Info("import file done", WrapLogFields(task, zap.Strings("files", file.GetPaths()), - zap.Duration("dur", time.Since(start)))...) - return nil - } - - futures := make([]*conc.Future[any], 0, len(req.GetFiles())) - for _, file := range req.GetFiles() { - file := file - f := s.pool.Submit(func() (any, error) { - err := fn(file) - return err, err - }) - futures = append(futures, f) - } - return futures -} - -func (s *scheduler) importFile(reader importutilv2.Reader, task Task) error { - iTask := task.(*ImportTask) - syncFutures := make([]*conc.Future[struct{}], 0) - syncTasks := make([]syncmgr.Task, 0) - for { - data, err := reader.Read() - if err != nil { - if errors.Is(err, io.EOF) { - break - } - return err - } - err = AppendSystemFieldsData(iTask, data) - if err != nil { - return err - } - hashedData, err := HashData(iTask, data) - if err != nil { - return err - } - fs, sts, err := s.Sync(iTask, hashedData) - if err != nil { - return err - } - syncFutures = append(syncFutures, fs...) - syncTasks = append(syncTasks, sts...) - } - err := conc.AwaitAll(syncFutures...) - if err != nil { - return err - } - for _, syncTask := range syncTasks { - segmentInfo, err := NewImportSegmentInfo(syncTask, iTask) - if err != nil { - return err - } - s.manager.Update(task.GetTaskID(), UpdateSegmentInfo(segmentInfo)) - log.Info("sync import data done", WrapLogFields(task, zap.Any("segmentInfo", segmentInfo))...) - } - return nil -} - -func (s *scheduler) Sync(task *ImportTask, hashedData HashedData) ([]*conc.Future[struct{}], []syncmgr.Task, error) { - log.Info("start to sync import data", WrapLogFields(task)...) - futures := make([]*conc.Future[struct{}], 0) - syncTasks := make([]syncmgr.Task, 0) - segmentImportedSizes := make(map[int64]int) - for channelIdx, datas := range hashedData { - channel := task.GetVchannels()[channelIdx] - for partitionIdx, data := range datas { - if data.GetRowNum() == 0 { - continue - } - partitionID := task.GetPartitionIDs()[partitionIdx] - size := data.GetMemorySize() - segmentID := PickSegment(task, segmentImportedSizes, channel, partitionID, size) - syncTask, err := NewSyncTask(task.GetCtx(), task, segmentID, partitionID, channel, data) - if err != nil { - return nil, nil, err - } - segmentImportedSizes[segmentID] += size - future := s.syncMgr.SyncData(task.GetCtx(), syncTask) - futures = append(futures, future) - syncTasks = append(syncTasks, syncTask) - } - } - return futures, syncTasks, nil -} diff --git a/internal/datanode/importv2/scheduler_test.go b/internal/datanode/importv2/scheduler_test.go index bdb876f75063e..a49e232992167 100644 --- a/internal/datanode/importv2/scheduler_test.go +++ b/internal/datanode/importv2/scheduler_test.go @@ -116,7 +116,7 @@ func (s *SchedulerSuite) SetupTest() { s.manager = NewTaskManager() s.syncMgr = syncmgr.NewMockSyncManager(s.T()) - s.scheduler = NewScheduler(s.manager, s.syncMgr, nil).(*scheduler) + s.scheduler = NewScheduler(s.manager).(*scheduler) } func createInsertData(t *testing.T, schema *schemapb.CollectionSchema, rowCount int) *storage.InsertData { @@ -236,7 +236,7 @@ func (s *SchedulerSuite) TestScheduler_Slots() { Schema: s.schema, ImportFiles: []*internalpb.ImportFile{{Paths: []string{"dummy.json"}}}, } - preimportTask := NewPreImportTask(preimportReq) + preimportTask := NewPreImportTask(preimportReq, s.manager, s.cm) s.manager.Add(preimportTask) slots := s.scheduler.Slots() @@ -262,7 +262,7 @@ func (s *SchedulerSuite) TestScheduler_Start_Preimport() { ioReader := strings.NewReader(string(bytes)) cm.EXPECT().Size(mock.Anything, mock.Anything).Return(1024, nil) cm.EXPECT().Reader(mock.Anything, mock.Anything).Return(&mockReader{Reader: ioReader}, nil) - s.scheduler.cm = cm + s.cm = cm preimportReq := &datapb.PreImportRequest{ JobID: 1, @@ -273,7 +273,7 @@ func (s *SchedulerSuite) TestScheduler_Start_Preimport() { Schema: s.schema, ImportFiles: []*internalpb.ImportFile{{Paths: []string{"dummy.json"}}}, } - preimportTask := NewPreImportTask(preimportReq) + preimportTask := NewPreImportTask(preimportReq, s.manager, s.cm) s.manager.Add(preimportTask) go s.scheduler.Start() @@ -316,7 +316,7 @@ func (s *SchedulerSuite) TestScheduler_Start_Preimport_Failed() { ioReader := strings.NewReader(string(bytes)) cm.EXPECT().Size(mock.Anything, mock.Anything).Return(1024, nil) cm.EXPECT().Reader(mock.Anything, mock.Anything).Return(&mockReader{Reader: ioReader}, nil) - s.scheduler.cm = cm + s.cm = cm preimportReq := &datapb.PreImportRequest{ JobID: 1, @@ -327,7 +327,7 @@ func (s *SchedulerSuite) TestScheduler_Start_Preimport_Failed() { Schema: s.schema, ImportFiles: []*internalpb.ImportFile{{Paths: []string{"dummy.json"}}}, } - preimportTask := NewPreImportTask(preimportReq) + preimportTask := NewPreImportTask(preimportReq, s.manager, s.cm) s.manager.Add(preimportTask) go s.scheduler.Start() @@ -355,7 +355,7 @@ func (s *SchedulerSuite) TestScheduler_Start_Import() { cm := mocks.NewChunkManager(s.T()) ioReader := strings.NewReader(string(bytes)) cm.EXPECT().Reader(mock.Anything, mock.Anything).Return(&mockReader{Reader: ioReader}, nil) - s.scheduler.cm = cm + s.cm = cm s.syncMgr.EXPECT().SyncData(mock.Anything, mock.Anything).RunAndReturn(func(ctx context.Context, task syncmgr.Task) *conc.Future[struct{}] { future := conc.Go(func() (struct{}, error) { @@ -388,7 +388,7 @@ func (s *SchedulerSuite) TestScheduler_Start_Import() { }, }, } - importTask := NewImportTask(importReq) + importTask := NewImportTask(importReq, s.manager, s.syncMgr, s.cm) s.manager.Add(importTask) go s.scheduler.Start() @@ -416,7 +416,7 @@ func (s *SchedulerSuite) TestScheduler_Start_Import_Failed() { cm := mocks.NewChunkManager(s.T()) ioReader := strings.NewReader(string(bytes)) cm.EXPECT().Reader(mock.Anything, mock.Anything).Return(&mockReader{Reader: ioReader}, nil) - s.scheduler.cm = cm + s.cm = cm s.syncMgr.EXPECT().SyncData(mock.Anything, mock.Anything).RunAndReturn(func(ctx context.Context, task syncmgr.Task) *conc.Future[struct{}] { future := conc.Go(func() (struct{}, error) { @@ -449,7 +449,7 @@ func (s *SchedulerSuite) TestScheduler_Start_Import_Failed() { }, }, } - importTask := NewImportTask(importReq) + importTask := NewImportTask(importReq, s.manager, s.syncMgr, s.cm) s.manager.Add(importTask) go s.scheduler.Start() @@ -487,9 +487,9 @@ func (s *SchedulerSuite) TestScheduler_ReadFileStat() { Schema: s.schema, ImportFiles: []*internalpb.ImportFile{importFile}, } - preimportTask := NewPreImportTask(preimportReq) + preimportTask := NewPreImportTask(preimportReq, s.manager, s.cm) s.manager.Add(preimportTask) - err := s.scheduler.readFileStat(s.reader, preimportTask, 0) + err := preimportTask.(*PreImportTask).readFileStat(s.reader, preimportTask, 0) s.NoError(err) } @@ -538,9 +538,9 @@ func (s *SchedulerSuite) TestScheduler_ImportFile() { }, }, } - importTask := NewImportTask(importReq) + importTask := NewImportTask(importReq, s.manager, s.syncMgr, s.cm) s.manager.Add(importTask) - err := s.scheduler.importFile(s.reader, importTask) + err := importTask.(*ImportTask).importFile(s.reader, importTask) s.NoError(err) } diff --git a/internal/datanode/importv2/task.go b/internal/datanode/importv2/task.go index a13f421f552d6..d349bf833bb03 100644 --- a/internal/datanode/importv2/task.go +++ b/internal/datanode/importv2/task.go @@ -17,18 +17,12 @@ package importv2 import ( - "context" - - "github.com/golang/protobuf/proto" "github.com/samber/lo" + "go.uber.org/zap" - "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" - "github.com/milvus-io/milvus/internal/datanode/metacache" "github.com/milvus-io/milvus/internal/proto/datapb" - "github.com/milvus-io/milvus/internal/proto/internalpb" - "github.com/milvus-io/milvus/internal/util/importutilv2" - "github.com/milvus-io/milvus/pkg/util/typeutil" + "github.com/milvus-io/milvus/pkg/util/conc" ) type TaskType int @@ -130,6 +124,7 @@ func UpdateSegmentInfo(info *datapb.ImportSegmentInfo) UpdateAction { } type Task interface { + Execute() []*conc.Future[any] GetJobID() int64 GetTaskID() int64 GetCollectionID() int64 @@ -139,183 +134,17 @@ type Task interface { GetState() datapb.ImportTaskStateV2 GetReason() string GetSchema() *schemapb.CollectionSchema - GetCtx() context.Context - GetOptions() []*commonpb.KeyValuePair Cancel() Clone() Task } -type PreImportTask struct { - *datapb.PreImportTask - ctx context.Context - cancel context.CancelFunc - partitionIDs []int64 - vchannels []string - schema *schemapb.CollectionSchema - options []*commonpb.KeyValuePair -} - -func NewPreImportTask(req *datapb.PreImportRequest) Task { - fileStats := lo.Map(req.GetImportFiles(), func(file *internalpb.ImportFile, _ int) *datapb.ImportFileStats { - return &datapb.ImportFileStats{ - ImportFile: file, - } - }) - ctx, cancel := context.WithCancel(context.Background()) - // During binlog import, even if the primary key's autoID is set to true, - // the primary key from the binlog should be used instead of being reassigned. - if importutilv2.IsBackup(req.GetOptions()) { - UnsetAutoID(req.GetSchema()) - } - return &PreImportTask{ - PreImportTask: &datapb.PreImportTask{ - JobID: req.GetJobID(), - TaskID: req.GetTaskID(), - CollectionID: req.GetCollectionID(), - State: datapb.ImportTaskStateV2_Pending, - FileStats: fileStats, - }, - ctx: ctx, - cancel: cancel, - partitionIDs: req.GetPartitionIDs(), - vchannels: req.GetVchannels(), - schema: req.GetSchema(), - options: req.GetOptions(), - } -} - -func (p *PreImportTask) GetPartitionIDs() []int64 { - return p.partitionIDs -} - -func (p *PreImportTask) GetVchannels() []string { - return p.vchannels -} - -func (p *PreImportTask) GetType() TaskType { - return PreImportTaskType -} - -func (p *PreImportTask) GetSchema() *schemapb.CollectionSchema { - return p.schema -} - -func (p *PreImportTask) GetOptions() []*commonpb.KeyValuePair { - return p.options -} - -func (p *PreImportTask) GetCtx() context.Context { - return p.ctx -} - -func (p *PreImportTask) Cancel() { - p.cancel() -} - -func (p *PreImportTask) Clone() Task { - ctx, cancel := context.WithCancel(p.GetCtx()) - return &PreImportTask{ - PreImportTask: proto.Clone(p.PreImportTask).(*datapb.PreImportTask), - ctx: ctx, - cancel: cancel, - partitionIDs: p.GetPartitionIDs(), - vchannels: p.GetVchannels(), - schema: p.GetSchema(), - options: p.GetOptions(), - } -} - -type ImportTask struct { - *datapb.ImportTaskV2 - ctx context.Context - cancel context.CancelFunc - segmentsInfo map[int64]*datapb.ImportSegmentInfo - req *datapb.ImportRequest - metaCaches map[string]metacache.MetaCache -} - -func NewImportTask(req *datapb.ImportRequest) Task { - ctx, cancel := context.WithCancel(context.Background()) - // During binlog import, even if the primary key's autoID is set to true, - // the primary key from the binlog should be used instead of being reassigned. - if importutilv2.IsBackup(req.GetOptions()) { - UnsetAutoID(req.GetSchema()) - } - task := &ImportTask{ - ImportTaskV2: &datapb.ImportTaskV2{ - JobID: req.GetJobID(), - TaskID: req.GetTaskID(), - CollectionID: req.GetCollectionID(), - State: datapb.ImportTaskStateV2_Pending, - }, - ctx: ctx, - cancel: cancel, - segmentsInfo: make(map[int64]*datapb.ImportSegmentInfo), - req: req, - } - task.initMetaCaches(req) - return task -} - -func (t *ImportTask) initMetaCaches(req *datapb.ImportRequest) { - metaCaches := make(map[string]metacache.MetaCache) - schema := typeutil.AppendSystemFields(req.GetSchema()) - for _, channel := range req.GetVchannels() { - info := &datapb.ChannelWatchInfo{ - Vchan: &datapb.VchannelInfo{ - CollectionID: req.GetCollectionID(), - ChannelName: channel, - }, - Schema: schema, - } - metaCache := metacache.NewMetaCache(info, func(segment *datapb.SegmentInfo) *metacache.BloomFilterSet { - return metacache.NewBloomFilterSet() - }) - metaCaches[channel] = metaCache - } - t.metaCaches = metaCaches -} - -func (t *ImportTask) GetType() TaskType { - return ImportTaskType -} - -func (t *ImportTask) GetPartitionIDs() []int64 { - return t.req.GetPartitionIDs() -} - -func (t *ImportTask) GetVchannels() []string { - return t.req.GetVchannels() -} - -func (t *ImportTask) GetSchema() *schemapb.CollectionSchema { - return t.req.GetSchema() -} - -func (t *ImportTask) GetOptions() []*commonpb.KeyValuePair { - return t.req.GetOptions() -} - -func (t *ImportTask) GetCtx() context.Context { - return t.ctx -} - -func (t *ImportTask) Cancel() { - t.cancel() -} - -func (t *ImportTask) GetSegmentsInfo() []*datapb.ImportSegmentInfo { - return lo.Values(t.segmentsInfo) -} - -func (t *ImportTask) Clone() Task { - ctx, cancel := context.WithCancel(t.GetCtx()) - return &ImportTask{ - ImportTaskV2: proto.Clone(t.ImportTaskV2).(*datapb.ImportTaskV2), - ctx: ctx, - cancel: cancel, - segmentsInfo: t.segmentsInfo, - req: t.req, - metaCaches: t.metaCaches, +func WrapLogFields(task Task, fields ...zap.Field) []zap.Field { + res := []zap.Field{ + zap.Int64("taskID", task.GetTaskID()), + zap.Int64("jobID", task.GetJobID()), + zap.Int64("collectionID", task.GetCollectionID()), + zap.String("type", task.GetType().String()), } + res = append(res, fields...) + return res } diff --git a/internal/datanode/importv2/task_import.go b/internal/datanode/importv2/task_import.go new file mode 100644 index 0000000000000..0b99348843bbb --- /dev/null +++ b/internal/datanode/importv2/task_import.go @@ -0,0 +1,248 @@ +// Licensed to the LF AI & Data foundation under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package importv2 + +import ( + "context" + "io" + "time" + + "github.com/cockroachdb/errors" + "github.com/golang/protobuf/proto" + "github.com/samber/lo" + "go.uber.org/zap" + + "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" + "github.com/milvus-io/milvus/internal/datanode/metacache" + "github.com/milvus-io/milvus/internal/datanode/syncmgr" + "github.com/milvus-io/milvus/internal/proto/datapb" + "github.com/milvus-io/milvus/internal/proto/internalpb" + "github.com/milvus-io/milvus/internal/storage" + "github.com/milvus-io/milvus/internal/util/importutilv2" + "github.com/milvus-io/milvus/pkg/log" + "github.com/milvus-io/milvus/pkg/util/conc" + "github.com/milvus-io/milvus/pkg/util/paramtable" + "github.com/milvus-io/milvus/pkg/util/typeutil" +) + +type ImportTask struct { + *datapb.ImportTaskV2 + ctx context.Context + cancel context.CancelFunc + segmentsInfo map[int64]*datapb.ImportSegmentInfo + req *datapb.ImportRequest + + manager TaskManager + syncMgr syncmgr.SyncManager + cm storage.ChunkManager + metaCaches map[string]metacache.MetaCache +} + +func NewImportTask(req *datapb.ImportRequest, + manager TaskManager, + syncMgr syncmgr.SyncManager, + cm storage.ChunkManager, +) Task { + ctx, cancel := context.WithCancel(context.Background()) + // During binlog import, even if the primary key's autoID is set to true, + // the primary key from the binlog should be used instead of being reassigned. + if importutilv2.IsBackup(req.GetOptions()) { + UnsetAutoID(req.GetSchema()) + } + task := &ImportTask{ + ImportTaskV2: &datapb.ImportTaskV2{ + JobID: req.GetJobID(), + TaskID: req.GetTaskID(), + CollectionID: req.GetCollectionID(), + State: datapb.ImportTaskStateV2_Pending, + }, + ctx: ctx, + cancel: cancel, + segmentsInfo: make(map[int64]*datapb.ImportSegmentInfo), + req: req, + manager: manager, + syncMgr: syncMgr, + cm: cm, + } + task.initMetaCaches(req) + return task +} + +func (t *ImportTask) initMetaCaches(req *datapb.ImportRequest) { + metaCaches := make(map[string]metacache.MetaCache) + schema := typeutil.AppendSystemFields(req.GetSchema()) + for _, channel := range req.GetVchannels() { + info := &datapb.ChannelWatchInfo{ + Vchan: &datapb.VchannelInfo{ + CollectionID: req.GetCollectionID(), + ChannelName: channel, + }, + Schema: schema, + } + metaCache := metacache.NewMetaCache(info, func(segment *datapb.SegmentInfo) *metacache.BloomFilterSet { + return metacache.NewBloomFilterSet() + }) + metaCaches[channel] = metaCache + } + t.metaCaches = metaCaches +} + +func (t *ImportTask) GetType() TaskType { + return ImportTaskType +} + +func (t *ImportTask) GetPartitionIDs() []int64 { + return t.req.GetPartitionIDs() +} + +func (t *ImportTask) GetVchannels() []string { + return t.req.GetVchannels() +} + +func (t *ImportTask) GetSchema() *schemapb.CollectionSchema { + return t.req.GetSchema() +} + +func (t *ImportTask) Cancel() { + t.cancel() +} + +func (t *ImportTask) GetSegmentsInfo() []*datapb.ImportSegmentInfo { + return lo.Values(t.segmentsInfo) +} + +func (t *ImportTask) Clone() Task { + ctx, cancel := context.WithCancel(t.ctx) + return &ImportTask{ + ImportTaskV2: proto.Clone(t.ImportTaskV2).(*datapb.ImportTaskV2), + ctx: ctx, + cancel: cancel, + segmentsInfo: t.segmentsInfo, + req: t.req, + metaCaches: t.metaCaches, + } +} + +func (t *ImportTask) Execute() []*conc.Future[any] { + bufferSize := paramtable.Get().DataNodeCfg.ReadBufferSizeInMB.GetAsInt() * 1024 * 1024 + log.Info("start to import", WrapLogFields(t, + zap.Int("bufferSize", bufferSize), + zap.Any("schema", t.GetSchema()))...) + t.manager.Update(t.GetTaskID(), UpdateState(datapb.ImportTaskStateV2_InProgress)) + + req := t.req + + fn := func(file *internalpb.ImportFile) error { + reader, err := importutilv2.NewReader(t.ctx, t.cm, t.GetSchema(), file, t.req.GetOptions(), bufferSize) + if err != nil { + log.Warn("new reader failed", WrapLogFields(t, zap.String("file", file.String()), zap.Error(err))...) + t.manager.Update(t.GetTaskID(), UpdateState(datapb.ImportTaskStateV2_Failed), UpdateReason(err.Error())) + return err + } + defer reader.Close() + start := time.Now() + err = t.importFile(reader, t) + if err != nil { + log.Warn("do import failed", WrapLogFields(t, zap.String("file", file.String()), zap.Error(err))...) + t.manager.Update(t.GetTaskID(), UpdateState(datapb.ImportTaskStateV2_Failed), UpdateReason(err.Error())) + return err + } + log.Info("import file done", WrapLogFields(t, zap.Strings("files", file.GetPaths()), + zap.Duration("dur", time.Since(start)))...) + return nil + } + + futures := make([]*conc.Future[any], 0, len(req.GetFiles())) + for _, file := range req.GetFiles() { + file := file + f := GetExecPool().Submit(func() (any, error) { + err := fn(file) + return err, err + }) + futures = append(futures, f) + } + return futures +} + +func (t *ImportTask) importFile(reader importutilv2.Reader, task Task) error { + iTask := task.(*ImportTask) + syncFutures := make([]*conc.Future[struct{}], 0) + syncTasks := make([]syncmgr.Task, 0) + for { + data, err := reader.Read() + if err != nil { + if errors.Is(err, io.EOF) { + break + } + return err + } + err = AppendSystemFieldsData(iTask, data) + if err != nil { + return err + } + hashedData, err := HashData(iTask, data) + if err != nil { + return err + } + fs, sts, err := t.sync(iTask, hashedData) + if err != nil { + return err + } + syncFutures = append(syncFutures, fs...) + syncTasks = append(syncTasks, sts...) + } + err := conc.AwaitAll(syncFutures...) + if err != nil { + return err + } + for _, syncTask := range syncTasks { + segmentInfo, err := NewImportSegmentInfo(syncTask, iTask) + if err != nil { + return err + } + t.manager.Update(task.GetTaskID(), UpdateSegmentInfo(segmentInfo)) + log.Info("sync import data done", WrapLogFields(task, zap.Any("segmentInfo", segmentInfo))...) + } + return nil +} + +func (t *ImportTask) sync(task *ImportTask, hashedData HashedData) ([]*conc.Future[struct{}], []syncmgr.Task, error) { + log.Info("start to sync import data", WrapLogFields(task)...) + futures := make([]*conc.Future[struct{}], 0) + syncTasks := make([]syncmgr.Task, 0) + segmentImportedSizes := make(map[int64]int) + for channelIdx, datas := range hashedData { + channel := task.GetVchannels()[channelIdx] + for partitionIdx, data := range datas { + if data.GetRowNum() == 0 { + continue + } + partitionID := task.GetPartitionIDs()[partitionIdx] + size := data.GetMemorySize() + segmentID := PickSegment(task, segmentImportedSizes, channel, partitionID, size) + syncTask, err := NewSyncTask(task.ctx, task, segmentID, partitionID, channel, data) + if err != nil { + return nil, nil, err + } + segmentImportedSizes[segmentID] += size + future := t.syncMgr.SyncData(task.ctx, syncTask) + futures = append(futures, future) + syncTasks = append(syncTasks, syncTask) + } + } + return futures, syncTasks, nil +} diff --git a/internal/datanode/importv2/task_preimport.go b/internal/datanode/importv2/task_preimport.go new file mode 100644 index 0000000000000..4d2ce93de7eab --- /dev/null +++ b/internal/datanode/importv2/task_preimport.go @@ -0,0 +1,212 @@ +// Licensed to the LF AI & Data foundation under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package importv2 + +import ( + "context" + "fmt" + "io" + "time" + + "github.com/cockroachdb/errors" + "github.com/golang/protobuf/proto" + "github.com/samber/lo" + "go.uber.org/zap" + + "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" + "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" + "github.com/milvus-io/milvus/internal/proto/datapb" + "github.com/milvus-io/milvus/internal/proto/internalpb" + "github.com/milvus-io/milvus/internal/storage" + "github.com/milvus-io/milvus/internal/util/importutilv2" + "github.com/milvus-io/milvus/pkg/log" + "github.com/milvus-io/milvus/pkg/util/conc" + "github.com/milvus-io/milvus/pkg/util/paramtable" +) + +type PreImportTask struct { + *datapb.PreImportTask + ctx context.Context + cancel context.CancelFunc + partitionIDs []int64 + vchannels []string + schema *schemapb.CollectionSchema + options []*commonpb.KeyValuePair + + manager TaskManager + cm storage.ChunkManager +} + +func NewPreImportTask(req *datapb.PreImportRequest, + manager TaskManager, + cm storage.ChunkManager, +) Task { + fileStats := lo.Map(req.GetImportFiles(), func(file *internalpb.ImportFile, _ int) *datapb.ImportFileStats { + return &datapb.ImportFileStats{ + ImportFile: file, + } + }) + ctx, cancel := context.WithCancel(context.Background()) + // During binlog import, even if the primary key's autoID is set to true, + // the primary key from the binlog should be used instead of being reassigned. + if importutilv2.IsBackup(req.GetOptions()) { + UnsetAutoID(req.GetSchema()) + } + return &PreImportTask{ + PreImportTask: &datapb.PreImportTask{ + JobID: req.GetJobID(), + TaskID: req.GetTaskID(), + CollectionID: req.GetCollectionID(), + State: datapb.ImportTaskStateV2_Pending, + FileStats: fileStats, + }, + ctx: ctx, + cancel: cancel, + partitionIDs: req.GetPartitionIDs(), + vchannels: req.GetVchannels(), + schema: req.GetSchema(), + options: req.GetOptions(), + manager: manager, + cm: cm, + } +} + +func (p *PreImportTask) GetPartitionIDs() []int64 { + return p.partitionIDs +} + +func (p *PreImportTask) GetVchannels() []string { + return p.vchannels +} + +func (p *PreImportTask) GetType() TaskType { + return PreImportTaskType +} + +func (p *PreImportTask) GetSchema() *schemapb.CollectionSchema { + return p.schema +} + +func (p *PreImportTask) Cancel() { + p.cancel() +} + +func (p *PreImportTask) Clone() Task { + ctx, cancel := context.WithCancel(p.ctx) + return &PreImportTask{ + PreImportTask: proto.Clone(p.PreImportTask).(*datapb.PreImportTask), + ctx: ctx, + cancel: cancel, + partitionIDs: p.GetPartitionIDs(), + vchannels: p.GetVchannels(), + schema: p.GetSchema(), + options: p.options, + } +} + +func (p *PreImportTask) Execute() []*conc.Future[any] { + bufferSize := paramtable.Get().DataNodeCfg.ReadBufferSizeInMB.GetAsInt() * 1024 * 1024 + log.Info("start to preimport", WrapLogFields(p, + zap.Int("bufferSize", bufferSize), + zap.Any("schema", p.GetSchema()))...) + p.manager.Update(p.GetTaskID(), UpdateState(datapb.ImportTaskStateV2_InProgress)) + files := lo.Map(p.GetFileStats(), + func(fileStat *datapb.ImportFileStats, _ int) *internalpb.ImportFile { + return fileStat.GetImportFile() + }) + + fn := func(i int, file *internalpb.ImportFile) error { + reader, err := importutilv2.NewReader(p.ctx, p.cm, p.GetSchema(), file, p.options, bufferSize) + if err != nil { + log.Warn("new reader failed", WrapLogFields(p, zap.String("file", file.String()), zap.Error(err))...) + p.manager.Update(p.GetTaskID(), UpdateState(datapb.ImportTaskStateV2_Failed), UpdateReason(err.Error())) + return err + } + defer reader.Close() + start := time.Now() + err = p.readFileStat(reader, p, i) + if err != nil { + log.Warn("preimport failed", WrapLogFields(p, zap.String("file", file.String()), zap.Error(err))...) + p.manager.Update(p.GetTaskID(), UpdateState(datapb.ImportTaskStateV2_Failed), UpdateReason(err.Error())) + return err + } + log.Info("read file stat done", WrapLogFields(p, zap.Strings("files", file.GetPaths()), + zap.Duration("dur", time.Since(start)))...) + return nil + } + + futures := make([]*conc.Future[any], 0, len(files)) + for i, file := range files { + i := i + file := file + f := GetExecPool().Submit(func() (any, error) { + err := fn(i, file) + return err, err + }) + futures = append(futures, f) + } + return futures +} + +func (p *PreImportTask) readFileStat(reader importutilv2.Reader, task Task, fileIdx int) error { + fileSize, err := reader.Size() + if err != nil { + return err + } + maxSize := paramtable.Get().DataNodeCfg.MaxImportFileSizeInGB.GetAsFloat() * 1024 * 1024 * 1024 + if fileSize > int64(maxSize) { + return errors.New(fmt.Sprintf( + "The import file size has reached the maximum limit allowed for importing, "+ + "fileSize=%d, maxSize=%d", fileSize, int64(maxSize))) + } + + totalRows := 0 + totalSize := 0 + hashedStats := make(map[string]*datapb.PartitionImportStats) + for { + data, err := reader.Read() + if err != nil { + if errors.Is(err, io.EOF) { + break + } + return err + } + err = CheckRowsEqual(task.GetSchema(), data) + if err != nil { + return err + } + rowsCount, err := GetRowsStats(task, data) + if err != nil { + return err + } + MergeHashedStats(rowsCount, hashedStats) + rows := data.GetRowNum() + size := data.GetMemorySize() + totalRows += rows + totalSize += size + log.Info("reading file stat...", WrapLogFields(task, zap.Int("readRows", rows), zap.Int("readSize", size))...) + } + + stat := &datapb.ImportFileStats{ + FileSize: fileSize, + TotalRows: int64(totalRows), + TotalMemorySize: int64(totalSize), + HashedStats: hashedStats, + } + p.manager.Update(task.GetTaskID(), UpdateFileStat(fileIdx, stat)) + return nil +} diff --git a/internal/datanode/io/binlog_io.go b/internal/datanode/io/binlog_io.go index c60af8e992dda..317f267978132 100644 --- a/internal/datanode/io/binlog_io.go +++ b/internal/datanode/io/binlog_io.go @@ -101,7 +101,6 @@ func (b *BinlogIoImpl) Upload(ctx context.Context, kvs map[string][]byte) error }) return struct{}{}, err }) - futures = append(futures, future) } diff --git a/internal/datanode/l0_compactor.go b/internal/datanode/l0_compactor.go index 75bde780323c7..f3367b639feb5 100644 --- a/internal/datanode/l0_compactor.go +++ b/internal/datanode/l0_compactor.go @@ -23,16 +23,17 @@ import ( "sync" "time" + "github.com/cockroachdb/errors" "github.com/samber/lo" "go.opentelemetry.io/otel" "go.uber.org/zap" "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" "github.com/milvus-io/milvus/internal/datanode/allocator" + "github.com/milvus-io/milvus/internal/datanode/compaction" "github.com/milvus-io/milvus/internal/datanode/io" iter "github.com/milvus-io/milvus/internal/datanode/iterators" "github.com/milvus-io/milvus/internal/datanode/metacache" - "github.com/milvus-io/milvus/internal/datanode/syncmgr" "github.com/milvus-io/milvus/internal/metastore/kv/binlog" "github.com/milvus-io/milvus/internal/proto/datapb" "github.com/milvus-io/milvus/internal/storage" @@ -50,12 +51,8 @@ import ( ) type levelZeroCompactionTask struct { - compactor io.BinlogIO - allocator allocator.Allocator - metacache metacache.MetaCache - syncmgr syncmgr.SyncManager cm storage.ChunkManager plan *datapb.CompactionPlan @@ -67,12 +64,13 @@ type levelZeroCompactionTask struct { tr *timerecord.TimeRecorder } +// make sure compactionTask implements compactor interface +var _ compaction.Compactor = (*levelZeroCompactionTask)(nil) + func newLevelZeroCompactionTask( ctx context.Context, binlogIO io.BinlogIO, alloc allocator.Allocator, - metaCache metacache.MetaCache, - syncmgr syncmgr.SyncManager, cm storage.ChunkManager, plan *datapb.CompactionPlan, ) *levelZeroCompactionTask { @@ -83,8 +81,6 @@ func newLevelZeroCompactionTask( BinlogIO: binlogIO, allocator: alloc, - metacache: metaCache, - syncmgr: syncmgr, cm: cm, plan: plan, tr: timerecord.NewTimeRecorder("levelzero compaction"), @@ -92,31 +88,29 @@ func newLevelZeroCompactionTask( } } -func (t *levelZeroCompactionTask) complete() { +func (t *levelZeroCompactionTask) Complete() { t.done <- struct{}{} } -func (t *levelZeroCompactionTask) stop() { +func (t *levelZeroCompactionTask) Stop() { t.cancel() <-t.done } -func (t *levelZeroCompactionTask) getPlanID() UniqueID { +func (t *levelZeroCompactionTask) GetPlanID() UniqueID { return t.plan.GetPlanID() } -func (t *levelZeroCompactionTask) getChannelName() string { +func (t *levelZeroCompactionTask) GetChannelName() string { return t.plan.GetChannel() } -func (t *levelZeroCompactionTask) getCollection() int64 { - return t.metacache.Collection() +func (t *levelZeroCompactionTask) GetCollection() int64 { + // The length of SegmentBinlogs is checked before task enqueueing. + return t.plan.GetSegmentBinlogs()[0].GetCollectionID() } -// Do nothing for levelzero compaction -func (t *levelZeroCompactionTask) injectDone() {} - -func (t *levelZeroCompactionTask) compact() (*datapb.CompactionPlanResult, error) { +func (t *levelZeroCompactionTask) Compact() (*datapb.CompactionPlanResult, error) { ctx, span := otel.Tracer(typeutil.DataNodeRole).Start(t.ctx, "L0Compact") defer span.End() log := log.Ctx(t.ctx).With(zap.Int64("planID", t.plan.GetPlanID()), zap.String("type", t.plan.GetType().String())) @@ -124,7 +118,7 @@ func (t *levelZeroCompactionTask) compact() (*datapb.CompactionPlanResult, error if !funcutil.CheckCtxValid(ctx) { log.Warn("compact wrong, task context done or timeout") - return nil, errContext + return nil, ctx.Err() } ctxTimeout, cancelAll := context.WithTimeout(ctx, time.Duration(t.plan.GetTimeoutInSeconds())*time.Second) @@ -139,7 +133,7 @@ func (t *levelZeroCompactionTask) compact() (*datapb.CompactionPlanResult, error }) if len(targetSegments) == 0 { log.Warn("compact wrong, not target sealed segments") - return nil, errIllegalCompactionPlan + return nil, errors.New("illegal compaction plan with empty target segments") } err := binlog.DecompressCompactionBinlogs(l0Segments) if err != nil { @@ -207,7 +201,7 @@ func (t *levelZeroCompactionTask) linearProcess(ctx context.Context, targetSegme alteredSegments = make(map[int64]*storage.DeleteData) ) - segmentBFs, err := t.loadBF(targetSegments) + segmentBFs, err := t.loadBF(ctx, targetSegments) if err != nil { return nil, err } @@ -260,7 +254,7 @@ func (t *levelZeroCompactionTask) batchProcess(ctx context.Context, targetSegmen return nil, err } - segmentBFs, err := t.loadBF(targetSegments) + segmentBFs, err := t.loadBF(ctx, targetSegments) if err != nil { return nil, err } @@ -333,16 +327,20 @@ func (t *levelZeroCompactionTask) splitDelta( } func (t *levelZeroCompactionTask) composeDeltalog(segmentID int64, dData *storage.DeleteData) (map[string][]byte, *datapb.Binlog, error) { + segment, ok := lo.Find(t.plan.GetSegmentBinlogs(), func(segment *datapb.CompactionSegmentBinlogs) bool { + return segment.GetSegmentID() == segmentID + }) + if !ok { + return nil, nil, merr.WrapErrSegmentNotFound(segmentID, "cannot find segment in compaction plan") + } + var ( - collID = t.metacache.Collection() - uploadKv = make(map[string][]byte) + collectionID = segment.GetCollectionID() + partitionID = segment.GetPartitionID() + uploadKv = make(map[string][]byte) ) - seg, ok := t.metacache.GetSegmentByID(segmentID) - if !ok { - return nil, nil, merr.WrapErrSegmentLack(segmentID) - } - blob, err := storage.NewDeleteCodec().Serialize(collID, seg.PartitionID(), segmentID, dData) + blob, err := storage.NewDeleteCodec().Serialize(collectionID, partitionID, segmentID, dData) if err != nil { return nil, nil, err } @@ -352,7 +350,7 @@ func (t *levelZeroCompactionTask) composeDeltalog(segmentID int64, dData *storag return nil, nil, err } - blobKey := metautil.JoinIDPath(collID, seg.PartitionID(), segmentID, logID) + blobKey := metautil.JoinIDPath(collectionID, partitionID, segmentID, logID) blobPath := t.BinlogIO.JoinFullPath(common.SegmentDeltaLogPath, blobKey) uploadKv[blobPath] = blob.GetValue() @@ -422,11 +420,9 @@ func (t *levelZeroCompactionTask) uploadByCheck(ctx context.Context, requireChec return nil } -func (t *levelZeroCompactionTask) loadBF(targetSegments []*datapb.CompactionSegmentBinlogs) (map[int64]*metacache.BloomFilterSet, error) { - log := log.Ctx(t.ctx).With( - zap.Int64("planID", t.plan.GetPlanID()), - zap.String("type", t.plan.GetType().String()), - ) +func (t *levelZeroCompactionTask) loadBF(ctx context.Context, targetSegments []*datapb.CompactionSegmentBinlogs) (map[int64]*metacache.BloomFilterSet, error) { + _, span := otel.Tracer(typeutil.DataNodeRole).Start(ctx, "L0Compact loadBF") + defer span.End() var ( futures = make([]*conc.Future[any], 0, len(targetSegments)) @@ -438,13 +434,16 @@ func (t *levelZeroCompactionTask) loadBF(targetSegments []*datapb.CompactionSegm for _, segment := range targetSegments { segment := segment + innerCtx := ctx future := pool.Submit(func() (any, error) { _ = binlog.DecompressBinLog(storage.StatsBinlog, segment.GetCollectionID(), segment.GetPartitionID(), segment.GetSegmentID(), segment.GetField2StatslogPaths()) - pks, err := loadStats(t.ctx, t.cm, - t.metacache.Schema(), segment.GetSegmentID(), segment.GetField2StatslogPaths()) + pks, err := loadStats(innerCtx, t.cm, t.plan.GetSchema(), segment.GetSegmentID(), segment.GetField2StatslogPaths()) if err != nil { - log.Warn("failed to load segment stats log", zap.Error(err)) + log.Warn("failed to load segment stats log", + zap.Int64("planID", t.plan.GetPlanID()), + zap.String("type", t.plan.GetType().String()), + zap.Error(err)) return err, err } bf := metacache.NewBloomFilterSet(pks...) diff --git a/internal/datanode/l0_compactor_test.go b/internal/datanode/l0_compactor_test.go index 59b66086d67fa..8c833df21f69c 100644 --- a/internal/datanode/l0_compactor_test.go +++ b/internal/datanode/l0_compactor_test.go @@ -51,7 +51,6 @@ type LevelZeroCompactionTaskSuite struct { mockBinlogIO *io.MockBinlogIO mockAlloc *allocator.MockAllocator - mockMeta *metacache.MockMetaCache task *levelZeroCompactionTask dData *storage.DeleteData @@ -61,9 +60,8 @@ type LevelZeroCompactionTaskSuite struct { func (s *LevelZeroCompactionTaskSuite) SetupTest() { s.mockAlloc = allocator.NewMockAllocator(s.T()) s.mockBinlogIO = io.NewMockBinlogIO(s.T()) - s.mockMeta = metacache.NewMockMetaCache(s.T()) // plan of the task is unset - s.task = newLevelZeroCompactionTask(context.Background(), s.mockBinlogIO, s.mockAlloc, s.mockMeta, nil, nil, nil) + s.task = newLevelZeroCompactionTask(context.Background(), s.mockBinlogIO, s.mockAlloc, nil, nil) pk2ts := map[int64]uint64{ 1: 20000, @@ -101,20 +99,19 @@ func (s *LevelZeroCompactionTaskSuite) TestLinearBatchLoadDeltaFail() { }, {SegmentID: 200, Level: datapb.SegmentLevel_L1}, }, + Schema: &schemapb.CollectionSchema{ + Fields: []*schemapb.FieldSchema{ + { + IsPrimaryKey: true, + }, + }, + }, } s.task.plan = plan s.task.tr = timerecord.NewTimeRecorder("test") s.mockBinlogIO.EXPECT().Download(mock.Anything, mock.Anything).Return(nil, errors.New("mock download fail")).Twice() - s.mockMeta.EXPECT().Schema().Return(&schemapb.CollectionSchema{ - Fields: []*schemapb.FieldSchema{ - { - IsPrimaryKey: true, - }, - }, - }) - targetSegments := lo.Filter(plan.SegmentBinlogs, func(s *datapb.CompactionSegmentBinlogs, _ int) bool { return s.Level == datapb.SegmentLevel_L1 }) @@ -154,6 +151,13 @@ func (s *LevelZeroCompactionTaskSuite) TestLinearBatchUploadByCheckFail() { }, }}, }, + Schema: &schemapb.CollectionSchema{ + Fields: []*schemapb.FieldSchema{ + { + IsPrimaryKey: true, + }, + }, + }, } s.task.plan = plan @@ -170,15 +174,9 @@ func (s *LevelZeroCompactionTaskSuite) TestLinearBatchUploadByCheckFail() { s.task.cm = cm s.mockBinlogIO.EXPECT().Download(mock.Anything, mock.Anything).Return([][]byte{s.dBlob}, nil).Times(2) - s.mockMeta.EXPECT().Collection().Return(1) - s.mockMeta.EXPECT().GetSegmentByID(mock.Anything).Return(nil, false).Twice() - s.mockMeta.EXPECT().Schema().Return(&schemapb.CollectionSchema{ - Fields: []*schemapb.FieldSchema{ - { - IsPrimaryKey: true, - }, - }, - }) + mockAlloc := allocator.NewMockAllocator(s.T()) + mockAlloc.EXPECT().AllocOne().Return(0, errors.New("mock alloc err")) + s.task.allocator = mockAlloc targetSegments := lo.Filter(plan.SegmentBinlogs, func(s *datapb.CompactionSegmentBinlogs, _ int) bool { return s.Level == datapb.SegmentLevel_L1 @@ -200,7 +198,8 @@ func (s *LevelZeroCompactionTaskSuite) TestCompactLinear() { Type: datapb.CompactionType_Level0DeleteCompaction, SegmentBinlogs: []*datapb.CompactionSegmentBinlogs{ { - SegmentID: 100, Level: datapb.SegmentLevel_L0, Deltalogs: []*datapb.FieldBinlog{ + CollectionID: 1, + SegmentID: 100, Level: datapb.SegmentLevel_L0, Deltalogs: []*datapb.FieldBinlog{ { Binlogs: []*datapb.Binlog{ {LogPath: "a/b/c1", LogSize: 100}, @@ -212,7 +211,8 @@ func (s *LevelZeroCompactionTaskSuite) TestCompactLinear() { }, }, { - SegmentID: 101, Level: datapb.SegmentLevel_L0, Deltalogs: []*datapb.FieldBinlog{ + CollectionID: 1, + SegmentID: 101, Level: datapb.SegmentLevel_L0, Deltalogs: []*datapb.FieldBinlog{ { Binlogs: []*datapb.Binlog{ {LogPath: "a/d/c1", LogSize: 100}, @@ -223,20 +223,33 @@ func (s *LevelZeroCompactionTaskSuite) TestCompactLinear() { }, }, }, - {SegmentID: 200, Level: datapb.SegmentLevel_L1, Field2StatslogPaths: []*datapb.FieldBinlog{ - { - Binlogs: []*datapb.Binlog{ - {LogID: 9999, LogSize: 100}, + { + CollectionID: 1, + SegmentID: 200, Level: datapb.SegmentLevel_L1, Field2StatslogPaths: []*datapb.FieldBinlog{ + { + Binlogs: []*datapb.Binlog{ + {LogID: 9999, LogSize: 100}, + }, }, }, - }}, - {SegmentID: 201, Level: datapb.SegmentLevel_L1, Field2StatslogPaths: []*datapb.FieldBinlog{ - { - Binlogs: []*datapb.Binlog{ - {LogID: 9999, LogSize: 100}, + }, + { + CollectionID: 1, + SegmentID: 201, Level: datapb.SegmentLevel_L1, Field2StatslogPaths: []*datapb.FieldBinlog{ + { + Binlogs: []*datapb.Binlog{ + {LogID: 9999, LogSize: 100}, + }, }, }, - }}, + }, + }, + Schema: &schemapb.CollectionSchema{ + Fields: []*schemapb.FieldSchema{ + { + IsPrimaryKey: true, + }, + }, }, } @@ -254,18 +267,6 @@ func (s *LevelZeroCompactionTaskSuite) TestCompactLinear() { s.task.cm = cm s.mockBinlogIO.EXPECT().Download(mock.Anything, mock.Anything).Return([][]byte{s.dBlob}, nil).Times(2) - s.mockMeta.EXPECT().Collection().Return(1) - s.mockMeta.EXPECT().GetSegmentByID(mock.Anything, mock.Anything). - RunAndReturn(func(id int64, filters ...metacache.SegmentFilter) (*metacache.SegmentInfo, bool) { - return metacache.NewSegmentInfo(&datapb.SegmentInfo{ID: id, PartitionID: 10}, nil), true - }) - s.mockMeta.EXPECT().Schema().Return(&schemapb.CollectionSchema{ - Fields: []*schemapb.FieldSchema{ - { - IsPrimaryKey: true, - }, - }, - }) s.mockAlloc.EXPECT().AllocOne().Return(19530, nil).Times(2) s.mockBinlogIO.EXPECT().JoinFullPath(mock.Anything, mock.Anything). @@ -274,9 +275,9 @@ func (s *LevelZeroCompactionTaskSuite) TestCompactLinear() { }).Times(2) s.mockBinlogIO.EXPECT().Upload(mock.Anything, mock.Anything).Return(nil).Once() - s.Require().Equal(plan.GetPlanID(), s.task.getPlanID()) - s.Require().Equal(plan.GetChannel(), s.task.getChannelName()) - s.Require().EqualValues(1, s.task.getCollection()) + s.Require().Equal(plan.GetPlanID(), s.task.GetPlanID()) + s.Require().Equal(plan.GetChannel(), s.task.GetChannelName()) + s.Require().EqualValues(1, s.task.GetCollection()) l0Segments := lo.Filter(s.task.plan.GetSegmentBinlogs(), func(s *datapb.CompactionSegmentBinlogs, _ int) bool { return s.Level == datapb.SegmentLevel_L0 @@ -357,6 +358,13 @@ func (s *LevelZeroCompactionTaskSuite) TestCompactBatch() { }, }}, }, + Schema: &schemapb.CollectionSchema{ + Fields: []*schemapb.FieldSchema{ + { + IsPrimaryKey: true, + }, + }, + }, } s.task.plan = plan @@ -373,18 +381,6 @@ func (s *LevelZeroCompactionTaskSuite) TestCompactBatch() { s.task.cm = cm s.mockBinlogIO.EXPECT().Download(mock.Anything, mock.Anything).Return([][]byte{s.dBlob}, nil).Once() - s.mockMeta.EXPECT().Collection().Return(1) - s.mockMeta.EXPECT().GetSegmentByID(mock.Anything, mock.Anything). - RunAndReturn(func(id int64, filters ...metacache.SegmentFilter) (*metacache.SegmentInfo, bool) { - return metacache.NewSegmentInfo(&datapb.SegmentInfo{ID: id, PartitionID: 10}, nil), true - }) - s.mockMeta.EXPECT().Schema().Return(&schemapb.CollectionSchema{ - Fields: []*schemapb.FieldSchema{ - { - IsPrimaryKey: true, - }, - }, - }) s.mockAlloc.EXPECT().AllocOne().Return(19530, nil).Times(2) s.mockBinlogIO.EXPECT().JoinFullPath(mock.Anything, mock.Anything). @@ -430,11 +426,21 @@ func (s *LevelZeroCompactionTaskSuite) TestCompactBatch() { func (s *LevelZeroCompactionTaskSuite) TestUploadByCheck() { ctx := context.Background() + + plan := &datapb.CompactionPlan{ + SegmentBinlogs: []*datapb.CompactionSegmentBinlogs{ + { + SegmentID: 100, + }, + }, + } + s.Run("uploadByCheck directly composeDeltalog failed", func() { s.SetupTest() - s.mockMeta.EXPECT().Collection().Return(1) - s.mockMeta.EXPECT().GetSegmentByID(mock.Anything).Return(nil, false).Once() - + s.task.plan = plan + mockAlloc := allocator.NewMockAllocator(s.T()) + mockAlloc.EXPECT().AllocOne().Return(0, errors.New("mock alloc err")) + s.task.allocator = mockAlloc segments := map[int64]*storage.DeleteData{100: s.dData} results := make(map[int64]*datapb.CompactionSegment) err := s.task.uploadByCheck(ctx, false, segments, results) @@ -444,13 +450,8 @@ func (s *LevelZeroCompactionTaskSuite) TestUploadByCheck() { s.Run("uploadByCheck directly Upload failed", func() { s.SetupTest() + s.task.plan = plan s.mockBinlogIO.EXPECT().Upload(mock.Anything, mock.Anything).Return(errors.New("mock upload failed")) - s.mockMeta.EXPECT().Collection().Return(1) - s.mockMeta.EXPECT().GetSegmentByID( - mock.MatchedBy(func(ID int64) bool { - return ID == 100 - }), mock.Anything). - Return(metacache.NewSegmentInfo(&datapb.SegmentInfo{ID: 100, PartitionID: 10}, nil), true) s.mockAlloc.EXPECT().AllocOne().Return(19530, nil) blobKey := metautil.JoinIDPath(1, 10, 100, 19530) @@ -466,13 +467,8 @@ func (s *LevelZeroCompactionTaskSuite) TestUploadByCheck() { s.Run("upload directly", func() { s.SetupTest() + s.task.plan = plan s.mockBinlogIO.EXPECT().Upload(mock.Anything, mock.Anything).Return(nil) - s.mockMeta.EXPECT().Collection().Return(1) - s.mockMeta.EXPECT().GetSegmentByID( - mock.MatchedBy(func(ID int64) bool { - return ID == 100 - }), mock.Anything). - Return(metacache.NewSegmentInfo(&datapb.SegmentInfo{ID: 100, PartitionID: 10}, nil), true) s.mockAlloc.EXPECT().AllocOne().Return(19530, nil) blobKey := metautil.JoinIDPath(1, 10, 100, 19530) @@ -503,16 +499,11 @@ func (s *LevelZeroCompactionTaskSuite) TestUploadByCheck() { }) s.Run("check with upload", func() { + s.task.plan = plan blobKey := metautil.JoinIDPath(1, 10, 100, 19530) blobPath := path.Join(common.SegmentDeltaLogPath, blobKey) s.mockBinlogIO.EXPECT().Upload(mock.Anything, mock.Anything).Return(nil) - s.mockMeta.EXPECT().Collection().Return(1) - s.mockMeta.EXPECT().GetSegmentByID( - mock.MatchedBy(func(ID int64) bool { - return ID == 100 - }), mock.Anything). - Return(metacache.NewSegmentInfo(&datapb.SegmentInfo{ID: 100, PartitionID: 10}, nil), true) s.mockAlloc.EXPECT().AllocOne().Return(19530, nil) s.mockBinlogIO.EXPECT().JoinFullPath(mock.Anything, mock.Anything).Return(blobPath) @@ -539,20 +530,17 @@ func (s *LevelZeroCompactionTaskSuite) TestUploadByCheck() { } func (s *LevelZeroCompactionTaskSuite) TestComposeDeltalog() { - s.mockMeta.EXPECT().Collection().Return(1) - s.mockMeta.EXPECT(). - GetSegmentByID( - mock.MatchedBy(func(ID int64) bool { - return ID == 100 - }), mock.Anything). - Return(metacache.NewSegmentInfo(&datapb.SegmentInfo{ID: 100, PartitionID: 10}, nil), true) - - s.mockMeta.EXPECT(). - GetSegmentByID( - mock.MatchedBy(func(ID int64) bool { - return ID == 101 - }), mock.Anything). - Return(nil, false) + plan := &datapb.CompactionPlan{ + SegmentBinlogs: []*datapb.CompactionSegmentBinlogs{ + { + SegmentID: 100, + }, + { + SegmentID: 101, + }, + }, + } + s.task.plan = plan s.mockAlloc.EXPECT().AllocOne().Return(19530, nil) @@ -568,8 +556,13 @@ func (s *LevelZeroCompactionTaskSuite) TestComposeDeltalog() { s.NotNil(v) s.Equal(blobPath, binlog.LogPath) - _, _, err = s.task.composeDeltalog(101, s.dData) - s.Error(err) + kvs, _, err = s.task.composeDeltalog(101, s.dData) + s.NoError(err) + s.Equal(1, len(kvs)) + v, ok = kvs[blobPath] + s.True(ok) + s.NotNil(v) + s.Equal(blobPath, binlog.LogPath) } func (s *LevelZeroCompactionTaskSuite) TestSplitDelta() { @@ -684,6 +677,13 @@ func (s *LevelZeroCompactionTaskSuite) TestLoadBF() { }, }}, }, + Schema: &schemapb.CollectionSchema{ + Fields: []*schemapb.FieldSchema{ + { + IsPrimaryKey: true, + }, + }, + }, } s.task.plan = plan @@ -698,15 +698,7 @@ func (s *LevelZeroCompactionTaskSuite) TestLoadBF() { cm.EXPECT().MultiRead(mock.Anything, mock.Anything).Return([][]byte{sw.GetBuffer()}, nil) s.task.cm = cm - s.mockMeta.EXPECT().Schema().Return(&schemapb.CollectionSchema{ - Fields: []*schemapb.FieldSchema{ - { - IsPrimaryKey: true, - }, - }, - }) - - bfs, err := s.task.loadBF(plan.SegmentBinlogs) + bfs, err := s.task.loadBF(context.Background(), plan.SegmentBinlogs) s.NoError(err) s.Len(bfs, 1) @@ -730,19 +722,18 @@ func (s *LevelZeroCompactionTaskSuite) TestFailed() { }, }}, }, + Schema: &schemapb.CollectionSchema{ + Fields: []*schemapb.FieldSchema{ + { + IsPrimaryKey: false, + }, + }, + }, } s.task.plan = plan - s.mockMeta.EXPECT().Schema().Return(&schemapb.CollectionSchema{ - Fields: []*schemapb.FieldSchema{ - { - IsPrimaryKey: false, - }, - }, - }) - - _, err := s.task.loadBF(plan.SegmentBinlogs) + _, err := s.task.loadBF(context.Background(), plan.SegmentBinlogs) s.Error(err) }) @@ -757,7 +748,7 @@ func (s *LevelZeroCompactionTaskSuite) TestFailed() { s.task.plan = plan - _, err := s.task.compact() + _, err := s.task.Compact() s.Error(err) }) } diff --git a/internal/datanode/metacache/actions.go b/internal/datanode/metacache/actions.go index 20d18f4acd846..3a2ac3f9831f5 100644 --- a/internal/datanode/metacache/actions.go +++ b/internal/datanode/metacache/actions.go @@ -25,40 +25,75 @@ import ( "github.com/milvus-io/milvus/pkg/util/typeutil" ) +type segmentCriterion struct { + ids typeutil.Set[int64] + states typeutil.Set[commonpb.SegmentState] + others []SegmentFilter +} + +func (sc *segmentCriterion) Match(segment *SegmentInfo) bool { + for _, filter := range sc.others { + if !filter.Filter(segment) { + return false + } + } + return true +} + type SegmentFilter interface { Filter(info *SegmentInfo) bool - SegmentIDs() ([]int64, bool) + AddFilter(*segmentCriterion) } +// SegmentIDFilter segment filter with segment ids. type SegmentIDFilter struct { - segmentIDs []int64 - ids typeutil.Set[int64] + ids typeutil.Set[int64] +} + +func (f *SegmentIDFilter) Filter(info *SegmentInfo) bool { + return f.ids.Contain(info.segmentID) +} + +func (f *SegmentIDFilter) AddFilter(criterion *segmentCriterion) { + criterion.ids = f.ids } func WithSegmentIDs(segmentIDs ...int64) SegmentFilter { set := typeutil.NewSet(segmentIDs...) return &SegmentIDFilter{ - segmentIDs: segmentIDs, - ids: set, + ids: set, } } -func (f *SegmentIDFilter) Filter(info *SegmentInfo) bool { - return f.ids.Contain(info.segmentID) +// SegmentStateFilter segment filter with segment states. +type SegmentStateFilter struct { + states typeutil.Set[commonpb.SegmentState] } -func (f *SegmentIDFilter) SegmentIDs() ([]int64, bool) { - return f.segmentIDs, true +func (f *SegmentStateFilter) Filter(info *SegmentInfo) bool { + return f.states.Contain(info.State()) } +func (f *SegmentStateFilter) AddFilter(criterion *segmentCriterion) { + criterion.states = f.states +} + +func WithSegmentState(states ...commonpb.SegmentState) SegmentFilter { + set := typeutil.NewSet(states...) + return &SegmentStateFilter{ + states: set, + } +} + +// SegmentFilterFunc implements segment filter with other filters logic. type SegmentFilterFunc func(info *SegmentInfo) bool func (f SegmentFilterFunc) Filter(info *SegmentInfo) bool { return f(info) } -func (f SegmentFilterFunc) SegmentIDs() ([]int64, bool) { - return nil, false +func (f SegmentFilterFunc) AddFilter(criterion *segmentCriterion) { + criterion.others = append(criterion.others, f) } func WithPartitionID(partitionID int64) SegmentFilter { @@ -67,13 +102,6 @@ func WithPartitionID(partitionID int64) SegmentFilter { }) } -func WithSegmentState(states ...commonpb.SegmentState) SegmentFilter { - set := typeutil.NewSet(states...) - return SegmentFilterFunc(func(info *SegmentInfo) bool { - return set.Len() > 0 && set.Contain(info.state) - }) -} - func WithStartPosNotRecorded() SegmentFilter { return SegmentFilterFunc(func(info *SegmentInfo) bool { return !info.startPosRecorded diff --git a/internal/datanode/metacache/bloom_filter_set.go b/internal/datanode/metacache/bloom_filter_set.go index 002988f61da2b..80b7bc057849c 100644 --- a/internal/datanode/metacache/bloom_filter_set.go +++ b/internal/datanode/metacache/bloom_filter_set.go @@ -19,10 +19,10 @@ package metacache import ( "sync" - "github.com/bits-and-blooms/bloom/v3" "github.com/samber/lo" "github.com/milvus-io/milvus/internal/storage" + "github.com/milvus-io/milvus/internal/util/bloomfilter" "github.com/milvus-io/milvus/pkg/util/paramtable" ) @@ -55,7 +55,7 @@ func NewBloomFilterSetWithBatchSize(batchSize uint, historyEntries ...*storage.P } } -func (bfs *BloomFilterSet) PkExists(lc storage.LocationsCache) bool { +func (bfs *BloomFilterSet) PkExists(lc *storage.LocationsCache) bool { bfs.mut.RLock() defer bfs.mut.RUnlock() if bfs.current != nil && bfs.current.TestLocationCache(lc) { @@ -76,8 +76,9 @@ func (bfs *BloomFilterSet) UpdatePKRange(ids storage.FieldData) error { if bfs.current == nil { bfs.current = &storage.PkStatistics{ - PkFilter: bloom.NewWithEstimates(bfs.batchSize, - paramtable.Get().CommonCfg.MaxBloomFalsePositive.GetAsFloat()), + PkFilter: bloomfilter.NewBloomFilterWithType(bfs.batchSize, + paramtable.Get().CommonCfg.MaxBloomFalsePositive.GetAsFloat(), + paramtable.Get().CommonCfg.BloomFilterType.GetValue()), } } diff --git a/internal/datanode/metacache/meta_cache.go b/internal/datanode/metacache/meta_cache.go index 61d9644e34ba8..1c85946932943 100644 --- a/internal/datanode/metacache/meta_cache.go +++ b/internal/datanode/metacache/meta_cache.go @@ -27,9 +27,9 @@ import ( "github.com/milvus-io/milvus/internal/proto/datapb" "github.com/milvus-io/milvus/internal/storage" "github.com/milvus-io/milvus/pkg/log" - "github.com/milvus-io/milvus/pkg/util/typeutil" ) +//go:generate mockery --name=MetaCache --structname=MockMetaCache --output=./ --filename=mock_meta_cache.go --with-expecter --inpackage type MetaCache interface { // Collection returns collection id of metacache. Collection() int64 @@ -41,8 +41,6 @@ type MetaCache interface { UpdateSegments(action SegmentAction, filters ...SegmentFilter) // RemoveSegments removes segments matches the provided filter. RemoveSegments(filters ...SegmentFilter) []int64 - // CompactSegments transfers compaction segment results inside the metacache. - CompactSegments(newSegmentID, partitionID int64, numRows int64, bfs *BloomFilterSet, oldSegmentIDs ...int64) // GetSegmentsBy returns segments statify the provided filters. GetSegmentsBy(filters ...SegmentFilter) []*SegmentInfo // GetSegmentByID returns segment with provided segment id if exists. @@ -51,6 +49,10 @@ type MetaCache interface { GetSegmentIDsBy(filters ...SegmentFilter) []int64 // PredictSegments returns the segment ids which may contain the provided primary key. PredictSegments(pk storage.PrimaryKey, filters ...SegmentFilter) ([]int64, bool) + // DetectMissingSegments returns the segment ids which is missing in datanode. + DetectMissingSegments(segments map[int64]struct{}) []int64 + // UpdateSegmentView updates the segments BF from datacoord view. + UpdateSegmentView(partitionID int64, newSegments []*datapb.SyncSegmentInfo, newSegmentsBF []*BloomFilterSet, allSegments map[int64]struct{}) } var _ MetaCache = (*metaCacheImpl)(nil) @@ -60,18 +62,32 @@ type PkStatsFactory func(vchannel *datapb.SegmentInfo) *BloomFilterSet type metaCacheImpl struct { collectionID int64 vChannelName string - segmentInfos map[int64]*SegmentInfo schema *schemapb.CollectionSchema - mu sync.RWMutex + + mu sync.RWMutex + segmentInfos map[int64]*SegmentInfo + stateSegments map[commonpb.SegmentState]map[int64]*SegmentInfo } func NewMetaCache(info *datapb.ChannelWatchInfo, factory PkStatsFactory) MetaCache { vchannel := info.GetVchan() cache := &metaCacheImpl{ - collectionID: vchannel.GetCollectionID(), - vChannelName: vchannel.GetChannelName(), - segmentInfos: make(map[int64]*SegmentInfo), - schema: info.GetSchema(), + collectionID: vchannel.GetCollectionID(), + vChannelName: vchannel.GetChannelName(), + segmentInfos: make(map[int64]*SegmentInfo), + stateSegments: make(map[commonpb.SegmentState]map[int64]*SegmentInfo), + schema: info.GetSchema(), + } + + for _, state := range []commonpb.SegmentState{ + commonpb.SegmentState_Growing, + commonpb.SegmentState_Sealed, + commonpb.SegmentState_Flushing, + commonpb.SegmentState_Flushed, + commonpb.SegmentState_Dropped, + commonpb.SegmentState_Importing, + } { + cache.stateSegments[state] = make(map[int64]*SegmentInfo) } cache.init(vchannel, factory) @@ -80,13 +96,13 @@ func NewMetaCache(info *datapb.ChannelWatchInfo, factory PkStatsFactory) MetaCac func (c *metaCacheImpl) init(vchannel *datapb.VchannelInfo, factory PkStatsFactory) { for _, seg := range vchannel.FlushedSegments { - c.segmentInfos[seg.GetID()] = NewSegmentInfo(seg, factory(seg)) + c.addSegment(NewSegmentInfo(seg, factory(seg))) } for _, seg := range vchannel.UnflushedSegments { // segment state could be sealed for growing segment if flush request processed before datanode watch seg.State = commonpb.SegmentState_Growing - c.segmentInfos[seg.GetID()] = NewSegmentInfo(seg, factory(seg)) + c.addSegment(NewSegmentInfo(seg, factory(seg))) } } @@ -110,43 +126,13 @@ func (c *metaCacheImpl) AddSegment(segInfo *datapb.SegmentInfo, factory PkStatsF c.mu.Lock() defer c.mu.Unlock() - c.segmentInfos[segInfo.GetID()] = segment + c.addSegment(segment) } -func (c *metaCacheImpl) CompactSegments(newSegmentID, partitionID int64, numOfRows int64, bfs *BloomFilterSet, oldSegmentIDs ...int64) { - c.mu.Lock() - defer c.mu.Unlock() - - compactTo := NullSegment - if numOfRows > 0 { - compactTo = newSegmentID - if _, ok := c.segmentInfos[newSegmentID]; !ok { - c.segmentInfos[newSegmentID] = &SegmentInfo{ - segmentID: newSegmentID, - partitionID: partitionID, - state: commonpb.SegmentState_Flushed, - level: datapb.SegmentLevel_L1, - flushedRows: numOfRows, - startPosRecorded: true, - bfs: bfs, - } - } - log.Info("add compactTo segment info metacache", zap.Int64("segmentID", compactTo)) - } - - oldSet := typeutil.NewSet(oldSegmentIDs...) - for _, segment := range c.segmentInfos { - if oldSet.Contain(segment.segmentID) || - oldSet.Contain(segment.compactTo) { - updated := segment.Clone() - updated.compactTo = compactTo - c.segmentInfos[segment.segmentID] = updated - log.Info("update segment compactTo", - zap.Int64("segmentID", segment.segmentID), - zap.Int64("originalCompactTo", segment.compactTo), - zap.Int64("compactTo", compactTo)) - } - } +func (c *metaCacheImpl) addSegment(segment *SegmentInfo) { + segID := segment.SegmentID() + c.segmentInfos[segID] = segment + c.stateSegments[segment.State()][segID] = segment } func (c *metaCacheImpl) RemoveSegments(filters ...SegmentFilter) []int64 { @@ -160,6 +146,7 @@ func (c *metaCacheImpl) RemoveSegments(filters ...SegmentFilter) []int64 { var result []int64 process := func(id int64, info *SegmentInfo) { delete(c.segmentInfos, id) + delete(c.stateSegments[info.State()], id) result = append(result, id) } c.rangeWithFilter(process, filters...) @@ -207,6 +194,8 @@ func (c *metaCacheImpl) UpdateSegments(action SegmentAction, filters ...SegmentF nInfo := info.Clone() action(nInfo) c.segmentInfos[id] = nInfo + delete(c.stateSegments[info.State()], info.SegmentID()) + c.stateSegments[nInfo.State()][nInfo.SegmentID()] = nInfo }, filters...) } @@ -223,39 +212,92 @@ func (c *metaCacheImpl) PredictSegments(pk storage.PrimaryKey, filters ...Segmen } func (c *metaCacheImpl) rangeWithFilter(fn func(id int64, info *SegmentInfo), filters ...SegmentFilter) { - var hasIDs bool - set := typeutil.NewSet[int64]() - filtered := make([]SegmentFilter, 0, len(filters)) + criterion := &segmentCriterion{} for _, filter := range filters { - ids, ok := filter.SegmentIDs() - if ok { - set.Insert(ids...) - hasIDs = true - } else { - filtered = append(filtered, filter) + filter.AddFilter(criterion) + } + + var candidates []map[int64]*SegmentInfo + if criterion.states != nil { + candidates = lo.Map(criterion.states.Collect(), func(state commonpb.SegmentState, _ int) map[int64]*SegmentInfo { + return c.stateSegments[state] + }) + } else { + candidates = []map[int64]*SegmentInfo{ + c.segmentInfos, } } - mergedFilter := func(info *SegmentInfo) bool { - for _, filter := range filtered { - if !filter.Filter(info) { - return false + + for _, candidate := range candidates { + var segments map[int64]*SegmentInfo + if criterion.ids != nil { + segments = lo.SliceToMap(lo.FilterMap(criterion.ids.Collect(), func(id int64, _ int) (*SegmentInfo, bool) { + segment, ok := candidate[id] + return segment, ok + }), func(segment *SegmentInfo) (int64, *SegmentInfo) { + return segment.SegmentID(), segment + }) + } else { + segments = candidate + } + + for id, segment := range segments { + if criterion.Match(segment) { + fn(id, segment) } } - return true } +} - if hasIDs { - for id := range set { - info, has := c.segmentInfos[id] - if has && mergedFilter(info) { - fn(id, info) - } +func (c *metaCacheImpl) DetectMissingSegments(segments map[int64]struct{}) []int64 { + c.mu.RLock() + defer c.mu.RUnlock() + + missingSegments := make([]int64, 0) + + for segID := range segments { + if _, ok := c.segmentInfos[segID]; !ok { + missingSegments = append(missingSegments, segID) } - } else { - for id, info := range c.segmentInfos { - if mergedFilter(info) { - fn(id, info) + } + + return missingSegments +} + +func (c *metaCacheImpl) UpdateSegmentView(partitionID int64, + newSegments []*datapb.SyncSegmentInfo, + newSegmentsBF []*BloomFilterSet, + allSegments map[int64]struct{}, +) { + c.mu.Lock() + defer c.mu.Unlock() + + for i, info := range newSegments { + // check again + if _, ok := c.segmentInfos[info.GetSegmentId()]; !ok { + segInfo := &SegmentInfo{ + segmentID: info.GetSegmentId(), + partitionID: partitionID, + state: info.GetState(), + level: info.GetLevel(), + flushedRows: info.GetNumOfRows(), + startPosRecorded: true, + bfs: newSegmentsBF[i], } + c.segmentInfos[info.GetSegmentId()] = segInfo + c.stateSegments[info.GetState()][info.GetSegmentId()] = segInfo + log.Info("metacache does not have segment, add it", zap.Int64("segmentID", info.GetSegmentId())) + } + } + + for segID, info := range c.segmentInfos { + if info.partitionID != partitionID { + continue + } + if _, ok := allSegments[segID]; !ok { + log.Info("remove dropped segment", zap.Int64("segmentID", segID)) + delete(c.segmentInfos, segID) + delete(c.stateSegments[info.State()], segID) } } } diff --git a/internal/datanode/metacache/meta_cache_test.go b/internal/datanode/metacache/meta_cache_test.go index c3b1663761ecf..cdb5e0614d567 100644 --- a/internal/datanode/metacache/meta_cache_test.go +++ b/internal/datanode/metacache/meta_cache_test.go @@ -103,27 +103,6 @@ func (s *MetaCacheSuite) TestMetaInfo() { s.Equal(s.collSchema, s.cache.Schema()) } -func (s *MetaCacheSuite) TestCompactSegments() { - for i, seg := range s.newSegments { - // compaction from flushed[i], unflushed[i] and invalidSeg to new[i] - s.cache.CompactSegments(seg, s.partitionIDs[i], 100, NewBloomFilterSet(), s.flushedSegments[i], s.growingSegments[i], s.invaliedSeg) - } - - for i, partitionID := range s.partitionIDs { - segs := s.cache.GetSegmentsBy(WithPartitionID(partitionID)) - for _, seg := range segs { - if seg.SegmentID() == s.newSegments[i] { - s.Equal(commonpb.SegmentState_Flushed, seg.State()) - s.Equal(int64(100), seg.NumOfRows()) - s.Equal(datapb.SegmentLevel_L1, seg.Level()) - } - if seg.SegmentID() == s.flushedSegments[i] { - s.Equal(s.newSegments[i], seg.CompactTo()) - } - } - } -} - func (s *MetaCacheSuite) TestAddSegment() { testSegs := []int64{100, 101, 102} for _, segID := range testSegs { @@ -210,6 +189,50 @@ func (s *MetaCacheSuite) TestPredictSegments() { s.EqualValues(1, predict[0]) } +func (s *MetaCacheSuite) Test_DetectMissingSegments() { + segments := map[int64]struct{}{ + 1: {}, 2: {}, 3: {}, 4: {}, 5: {}, 6: {}, 7: {}, 8: {}, 9: {}, 10: {}, + } + + missingSegments := s.cache.DetectMissingSegments(segments) + s.ElementsMatch(missingSegments, []int64{9, 10}) +} + +func (s *MetaCacheSuite) Test_UpdateSegmentView() { + addSegments := []*datapb.SyncSegmentInfo{ + { + SegmentId: 100, + PkStatsLog: nil, + State: commonpb.SegmentState_Flushed, + Level: datapb.SegmentLevel_L1, + NumOfRows: 10240, + }, + } + addSegmentsBF := []*BloomFilterSet{ + NewBloomFilterSet(), + } + segments := map[int64]struct{}{ + 1: {}, 2: {}, 3: {}, 4: {}, 5: {}, 6: {}, 7: {}, 8: {}, 100: {}, + } + + s.cache.UpdateSegmentView(1, addSegments, addSegmentsBF, segments) + + addSegments = []*datapb.SyncSegmentInfo{ + { + SegmentId: 101, + PkStatsLog: nil, + State: commonpb.SegmentState_Flushed, + Level: datapb.SegmentLevel_L1, + NumOfRows: 10240, + }, + } + + segments = map[int64]struct{}{ + 1: {}, 2: {}, 3: {}, 4: {}, 5: {}, 6: {}, 7: {}, 8: {}, 101: {}, + } + s.cache.UpdateSegmentView(1, addSegments, addSegmentsBF, segments) +} + func TestMetaCacheSuite(t *testing.T) { suite.Run(t, new(MetaCacheSuite)) } diff --git a/internal/datanode/metacache/mock_meta_cache.go b/internal/datanode/metacache/mock_meta_cache.go index b8c7bd0035d60..0bd69c61766d7 100644 --- a/internal/datanode/metacache/mock_meta_cache.go +++ b/internal/datanode/metacache/mock_meta_cache.go @@ -114,53 +114,46 @@ func (_c *MockMetaCache_Collection_Call) RunAndReturn(run func() int64) *MockMet return _c } -// CompactSegments provides a mock function with given fields: newSegmentID, partitionID, numRows, bfs, oldSegmentIDs -func (_m *MockMetaCache) CompactSegments(newSegmentID int64, partitionID int64, numRows int64, bfs *BloomFilterSet, oldSegmentIDs ...int64) { - _va := make([]interface{}, len(oldSegmentIDs)) - for _i := range oldSegmentIDs { - _va[_i] = oldSegmentIDs[_i] +// DetectMissingSegments provides a mock function with given fields: segments +func (_m *MockMetaCache) DetectMissingSegments(segments map[int64]struct{}) []int64 { + ret := _m.Called(segments) + + var r0 []int64 + if rf, ok := ret.Get(0).(func(map[int64]struct{}) []int64); ok { + r0 = rf(segments) + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).([]int64) + } } - var _ca []interface{} - _ca = append(_ca, newSegmentID, partitionID, numRows, bfs) - _ca = append(_ca, _va...) - _m.Called(_ca...) + + return r0 } -// MockMetaCache_CompactSegments_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'CompactSegments' -type MockMetaCache_CompactSegments_Call struct { +// MockMetaCache_DetectMissingSegments_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'DetectMissingSegments' +type MockMetaCache_DetectMissingSegments_Call struct { *mock.Call } -// CompactSegments is a helper method to define mock.On call -// - newSegmentID int64 -// - partitionID int64 -// - numRows int64 -// - bfs *BloomFilterSet -// - oldSegmentIDs ...int64 -func (_e *MockMetaCache_Expecter) CompactSegments(newSegmentID interface{}, partitionID interface{}, numRows interface{}, bfs interface{}, oldSegmentIDs ...interface{}) *MockMetaCache_CompactSegments_Call { - return &MockMetaCache_CompactSegments_Call{Call: _e.mock.On("CompactSegments", - append([]interface{}{newSegmentID, partitionID, numRows, bfs}, oldSegmentIDs...)...)} +// DetectMissingSegments is a helper method to define mock.On call +// - segments map[int64]struct{} +func (_e *MockMetaCache_Expecter) DetectMissingSegments(segments interface{}) *MockMetaCache_DetectMissingSegments_Call { + return &MockMetaCache_DetectMissingSegments_Call{Call: _e.mock.On("DetectMissingSegments", segments)} } -func (_c *MockMetaCache_CompactSegments_Call) Run(run func(newSegmentID int64, partitionID int64, numRows int64, bfs *BloomFilterSet, oldSegmentIDs ...int64)) *MockMetaCache_CompactSegments_Call { +func (_c *MockMetaCache_DetectMissingSegments_Call) Run(run func(segments map[int64]struct{})) *MockMetaCache_DetectMissingSegments_Call { _c.Call.Run(func(args mock.Arguments) { - variadicArgs := make([]int64, len(args)-4) - for i, a := range args[4:] { - if a != nil { - variadicArgs[i] = a.(int64) - } - } - run(args[0].(int64), args[1].(int64), args[2].(int64), args[3].(*BloomFilterSet), variadicArgs...) + run(args[0].(map[int64]struct{})) }) return _c } -func (_c *MockMetaCache_CompactSegments_Call) Return() *MockMetaCache_CompactSegments_Call { - _c.Call.Return() +func (_c *MockMetaCache_DetectMissingSegments_Call) Return(_a0 []int64) *MockMetaCache_DetectMissingSegments_Call { + _c.Call.Return(_a0) return _c } -func (_c *MockMetaCache_CompactSegments_Call) RunAndReturn(run func(int64, int64, int64, *BloomFilterSet, ...int64)) *MockMetaCache_CompactSegments_Call { +func (_c *MockMetaCache_DetectMissingSegments_Call) RunAndReturn(run func(map[int64]struct{}) []int64) *MockMetaCache_DetectMissingSegments_Call { _c.Call.Return(run) return _c } @@ -517,6 +510,42 @@ func (_c *MockMetaCache_Schema_Call) RunAndReturn(run func() *schemapb.Collectio return _c } +// UpdateSegmentView provides a mock function with given fields: partitionID, newSegments, newSegmentsBF, allSegments +func (_m *MockMetaCache) UpdateSegmentView(partitionID int64, newSegments []*datapb.SyncSegmentInfo, newSegmentsBF []*BloomFilterSet, allSegments map[int64]struct{}) { + _m.Called(partitionID, newSegments, newSegmentsBF, allSegments) +} + +// MockMetaCache_UpdateSegmentView_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'UpdateSegmentView' +type MockMetaCache_UpdateSegmentView_Call struct { + *mock.Call +} + +// UpdateSegmentView is a helper method to define mock.On call +// - partitionID int64 +// - newSegments []*datapb.SyncSegmentInfo +// - newSegmentsBF []*BloomFilterSet +// - allSegments map[int64]struct{} +func (_e *MockMetaCache_Expecter) UpdateSegmentView(partitionID interface{}, newSegments interface{}, newSegmentsBF interface{}, allSegments interface{}) *MockMetaCache_UpdateSegmentView_Call { + return &MockMetaCache_UpdateSegmentView_Call{Call: _e.mock.On("UpdateSegmentView", partitionID, newSegments, newSegmentsBF, allSegments)} +} + +func (_c *MockMetaCache_UpdateSegmentView_Call) Run(run func(partitionID int64, newSegments []*datapb.SyncSegmentInfo, newSegmentsBF []*BloomFilterSet, allSegments map[int64]struct{})) *MockMetaCache_UpdateSegmentView_Call { + _c.Call.Run(func(args mock.Arguments) { + run(args[0].(int64), args[1].([]*datapb.SyncSegmentInfo), args[2].([]*BloomFilterSet), args[3].(map[int64]struct{})) + }) + return _c +} + +func (_c *MockMetaCache_UpdateSegmentView_Call) Return() *MockMetaCache_UpdateSegmentView_Call { + _c.Call.Return() + return _c +} + +func (_c *MockMetaCache_UpdateSegmentView_Call) RunAndReturn(run func(int64, []*datapb.SyncSegmentInfo, []*BloomFilterSet, map[int64]struct{})) *MockMetaCache_UpdateSegmentView_Call { + _c.Call.Return(run) + return _c +} + // UpdateSegments provides a mock function with given fields: action, filters func (_m *MockMetaCache) UpdateSegments(action SegmentAction, filters ...SegmentFilter) { _va := make([]interface{}, len(filters)) diff --git a/internal/datanode/mock_test.go b/internal/datanode/mock_test.go index ab9a99ad8f2f3..92286c4bbfcb0 100644 --- a/internal/datanode/mock_test.go +++ b/internal/datanode/mock_test.go @@ -1188,57 +1188,6 @@ func genEmptyInsertData() *InsertData { } } -func genInsertDataWithExpiredTS() *InsertData { - return &InsertData{ - Data: map[int64]storage.FieldData{ - 0: &storage.Int64FieldData{ - Data: []int64{11, 22}, - }, - 1: &storage.Int64FieldData{ - Data: []int64{329749364736000000, 329500223078400000}, // 2009-11-10 23:00:00 +0000 UTC, 2009-10-31 23:00:00 +0000 UTC - }, - 100: &storage.FloatVectorFieldData{ - Data: []float32{1.0, 6.0, 7.0, 8.0}, - Dim: 2, - }, - 101: &storage.BinaryVectorFieldData{ - Data: []byte{0, 255, 255, 255, 128, 128, 128, 0}, - Dim: 32, - }, - 102: &storage.BoolFieldData{ - Data: []bool{true, false}, - }, - 103: &storage.Int8FieldData{ - Data: []int8{5, 6}, - }, - 104: &storage.Int16FieldData{ - Data: []int16{7, 8}, - }, - 105: &storage.Int32FieldData{ - Data: []int32{9, 10}, - }, - 106: &storage.Int64FieldData{ - Data: []int64{1, 2}, - }, - 107: &storage.FloatFieldData{ - Data: []float32{2.333, 2.334}, - }, - 108: &storage.DoubleFieldData{ - Data: []float64{3.333, 3.334}, - }, - 109: &storage.StringFieldData{ - Data: []string{"test1", "test2"}, - }, - }, - } -} - -func genTimestamp() typeutil.Timestamp { - // Generate birthday of Golang - gb := time.Date(2009, time.Month(11), 10, 23, 0, 0, 0, time.UTC) - return tsoutil.ComposeTSByTime(gb, 0) -} - func genTestTickler() *etcdTickler { return newEtcdTickler(0, "", nil, nil, 0) } diff --git a/internal/datanode/services.go b/internal/datanode/services.go index ad8cb3039e7ec..6ffa618988b48 100644 --- a/internal/datanode/services.go +++ b/internal/datanode/services.go @@ -23,10 +23,12 @@ import ( "context" "fmt" + "github.com/samber/lo" "go.uber.org/zap" "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" "github.com/milvus-io/milvus-proto/go-api/v2/milvuspb" + "github.com/milvus-io/milvus/internal/datanode/compaction" "github.com/milvus-io/milvus/internal/datanode/importv2" "github.com/milvus-io/milvus/internal/datanode/io" "github.com/milvus-io/milvus/internal/datanode/metacache" @@ -38,6 +40,7 @@ import ( "github.com/milvus-io/milvus/pkg/log" "github.com/milvus-io/milvus/pkg/metrics" "github.com/milvus-io/milvus/pkg/tracer" + "github.com/milvus-io/milvus/pkg/util/conc" "github.com/milvus-io/milvus/pkg/util/merr" "github.com/milvus-io/milvus/pkg/util/metricsinfo" "github.com/milvus-io/milvus/pkg/util/tsoutil" @@ -204,29 +207,9 @@ func (node *DataNode) Compaction(ctx context.Context, req *datapb.CompactionPlan return merr.Status(err), nil } - ds, ok := node.flowgraphManager.GetFlowgraphService(req.GetChannel()) - if !ok { - log.Warn("illegel compaction plan, channel not in this DataNode", zap.String("channelName", req.GetChannel())) - return merr.Status(merr.WrapErrChannelNotFound(req.GetChannel(), "illegel compaction plan")), nil - } - - if !node.compactionExecutor.isValidChannel(req.GetChannel()) { - log.Warn("channel of compaction is marked invalid in compaction executor", zap.String("channelName", req.GetChannel())) - return merr.Status(merr.WrapErrChannelNotFound(req.GetChannel(), "channel is dropping")), nil - } - - meta := ds.metacache - for _, segment := range req.GetSegmentBinlogs() { - if segment.GetLevel() == datapb.SegmentLevel_L0 { - continue - } - _, ok := meta.GetSegmentByID(segment.GetSegmentID(), metacache.WithSegmentState(commonpb.SegmentState_Flushed)) - if !ok { - log.Warn("compaction plan contains segment which is not flushed", - zap.Int64("segmentID", segment.GetSegmentID()), - ) - return merr.Status(merr.WrapErrSegmentNotFound(segment.GetSegmentID(), "segment with flushed state not found")), nil - } + if len(req.GetSegmentBinlogs()) == 0 { + log.Info("no segments to compact") + return merr.Success(), nil } /* @@ -235,26 +218,21 @@ func (node *DataNode) Compaction(ctx context.Context, req *datapb.CompactionPlan taskCtx := trace.ContextWithSpanContext(node.ctx, spanCtx)*/ taskCtx := tracer.Propagate(ctx, node.ctx) - var task compactor + var task compaction.Compactor + binlogIO := io.NewBinlogIO(node.chunkManager, getOrCreateIOPool()) switch req.GetType() { case datapb.CompactionType_Level0DeleteCompaction: - binlogIO := io.NewBinlogIO(node.chunkManager, getOrCreateIOPool()) task = newLevelZeroCompactionTask( taskCtx, binlogIO, node.allocator, - ds.metacache, - node.syncMgr, node.chunkManager, req, ) case datapb.CompactionType_MixCompaction: - binlogIO := io.NewBinlogIO(node.chunkManager, getOrCreateIOPool()) - task = newCompactionTask( + task = compaction.NewMixCompactionTask( taskCtx, binlogIO, - ds.metacache, - node.syncMgr, node.allocator, req, ) @@ -288,10 +266,9 @@ func (node *DataNode) SyncSegments(ctx context.Context, req *datapb.SyncSegments log := log.Ctx(ctx).With( zap.Int64("planID", req.GetPlanID()), zap.Int64("nodeID", node.GetNodeID()), - zap.Int64("target segmentID", req.GetCompactedTo()), - zap.Int64s("compacted from", req.GetCompactedFrom()), - zap.Int64("numOfRows", req.GetNumOfRows()), - zap.String("channelName", req.GetChannelName()), + zap.Int64("collectionID", req.GetCollectionId()), + zap.Int64("partitionID", req.GetPartitionId()), + zap.String("channel", req.GetChannelName()), ) log.Info("DataNode receives SyncSegments") @@ -301,9 +278,8 @@ func (node *DataNode) SyncSegments(ctx context.Context, req *datapb.SyncSegments return merr.Status(err), nil } - if len(req.GetCompactedFrom()) <= 0 { - log.Info("SyncSegments with empty compactedFrom, clearing the plan") - node.compactionExecutor.injectDone(req.GetPlanID()) + if len(req.GetSegmentInfos()) <= 0 { + log.Info("sync segments is empty, skip it") return merr.Success(), nil } @@ -311,22 +287,52 @@ func (node *DataNode) SyncSegments(ctx context.Context, req *datapb.SyncSegments if !ok { node.compactionExecutor.discardPlan(req.GetChannelName()) err := merr.WrapErrChannelNotFound(req.GetChannelName()) - log.Warn("failed to sync segments", zap.Error(err)) + log.Warn("failed to get flow graph service", zap.Error(err)) return merr.Status(err), nil } - err := binlog.DecompressBinLog(storage.StatsBinlog, req.GetCollectionId(), req.GetPartitionId(), req.GetCompactedTo(), req.GetStatsLogs()) - if err != nil { - log.Warn("failed to DecompressBinLog", zap.Error(err)) - return merr.Status(err), nil + + allSegments := make(map[int64]struct{}) + for segID := range req.GetSegmentInfos() { + allSegments[segID] = struct{}{} } - pks, err := loadStats(ctx, node.chunkManager, ds.metacache.Schema(), req.GetCompactedTo(), req.GetStatsLogs()) + + missingSegments := ds.metacache.DetectMissingSegments(allSegments) + + newSegments := make([]*datapb.SyncSegmentInfo, 0, len(missingSegments)) + futures := make([]*conc.Future[any], 0, len(missingSegments)) + + for _, segID := range missingSegments { + segID := segID + future := node.pool.Submit(func() (any, error) { + newSeg := req.GetSegmentInfos()[segID] + var val *metacache.BloomFilterSet + var err error + err = binlog.DecompressBinLog(storage.StatsBinlog, req.GetCollectionId(), req.GetPartitionId(), newSeg.GetSegmentId(), []*datapb.FieldBinlog{newSeg.GetPkStatsLog()}) + if err != nil { + log.Warn("failed to DecompressBinLog", zap.Error(err)) + return val, err + } + pks, err := loadStats(ctx, node.chunkManager, ds.metacache.Schema(), newSeg.GetSegmentId(), []*datapb.FieldBinlog{newSeg.GetPkStatsLog()}) + if err != nil { + log.Warn("failed to load segment stats log", zap.Error(err)) + return val, err + } + val = metacache.NewBloomFilterSet(pks...) + return val, nil + }) + futures = append(futures, future) + } + + err := conc.AwaitAll(futures...) if err != nil { - log.Warn("failed to load segment statslog", zap.Error(err)) return merr.Status(err), nil } - bfs := metacache.NewBloomFilterSet(pks...) - ds.metacache.CompactSegments(req.GetCompactedTo(), req.GetPartitionId(), req.GetNumOfRows(), bfs, req.GetCompactedFrom()...) - node.compactionExecutor.injectDone(req.GetPlanID()) + + newSegmentsBF := lo.Map(futures, func(future *conc.Future[any], _ int) *metacache.BloomFilterSet { + return future.Value().(*metacache.BloomFilterSet) + }) + + ds.metacache.UpdateSegmentView(req.GetPartitionId(), newSegments, newSegmentsBF, allSegments) return merr.Success(), nil } @@ -419,7 +425,7 @@ func (node *DataNode) PreImport(ctx context.Context, req *datapb.PreImportReques return merr.Status(err), nil } - task := importv2.NewPreImportTask(req) + task := importv2.NewPreImportTask(req, node.importTaskMgr, node.chunkManager) node.importTaskMgr.Add(task) log.Info("datanode added preimport task") @@ -438,7 +444,7 @@ func (node *DataNode) ImportV2(ctx context.Context, req *datapb.ImportRequest) ( if err := merr.CheckHealthy(node.GetStateCode()); err != nil { return merr.Status(err), nil } - task := importv2.NewImportTask(req) + task := importv2.NewImportTask(req, node.importTaskMgr, node.syncMgr, node.chunkManager) node.importTaskMgr.Add(task) log.Info("datanode added import task") diff --git a/internal/datanode/services_test.go b/internal/datanode/services_test.go index 94eed7f5193e5..97c5ed51c4bce 100644 --- a/internal/datanode/services_test.go +++ b/internal/datanode/services_test.go @@ -34,6 +34,7 @@ import ( allocator2 "github.com/milvus-io/milvus/internal/allocator" "github.com/milvus-io/milvus/internal/datanode/allocator" "github.com/milvus-io/milvus/internal/datanode/broker" + "github.com/milvus-io/milvus/internal/datanode/compaction" "github.com/milvus-io/milvus/internal/datanode/metacache" "github.com/milvus-io/milvus/internal/proto/datapb" "github.com/milvus-io/milvus/internal/proto/internalpb" @@ -159,8 +160,12 @@ func (s *DataNodeServicesSuite) TestGetComponentStates() { func (s *DataNodeServicesSuite) TestGetCompactionState() { s.Run("success", func() { - s.node.compactionExecutor.executing.Insert(int64(3), newMockCompactor(true)) - s.node.compactionExecutor.executing.Insert(int64(2), newMockCompactor(true)) + mockC := compaction.NewMockCompactor(s.T()) + s.node.compactionExecutor.executing.Insert(int64(3), mockC) + + mockC2 := compaction.NewMockCompactor(s.T()) + s.node.compactionExecutor.executing.Insert(int64(2), mockC2) + s.node.compactionExecutor.completed.Insert(int64(1), &datapb.CompactionPlanResult{ PlanID: 1, State: commonpb.CompactionState_Completed, @@ -168,9 +173,16 @@ func (s *DataNodeServicesSuite) TestGetCompactionState() { {SegmentID: 10}, }, }) + + s.node.compactionExecutor.completed.Insert(int64(4), &datapb.CompactionPlanResult{ + PlanID: 4, + Type: datapb.CompactionType_Level0DeleteCompaction, + State: commonpb.CompactionState_Completed, + }) + stat, err := s.node.GetCompactionState(s.ctx, nil) s.Assert().NoError(err) - s.Assert().Equal(3, len(stat.GetResults())) + s.Assert().Equal(4, len(stat.GetResults())) var mu sync.RWMutex cnt := 0 @@ -182,7 +194,7 @@ func (s *DataNodeServicesSuite) TestGetCompactionState() { } } mu.Lock() - s.Assert().Equal(1, cnt) + s.Assert().Equal(2, cnt) mu.Unlock() s.Assert().Equal(1, s.node.compactionExecutor.completed.Len()) @@ -198,50 +210,7 @@ func (s *DataNodeServicesSuite) TestGetCompactionState() { func (s *DataNodeServicesSuite) TestCompaction() { dmChannelName := "by-dev-rootcoord-dml_0_100v0" - schema := &schemapb.CollectionSchema{ - Name: "test_collection", - Fields: []*schemapb.FieldSchema{ - {FieldID: common.RowIDField, Name: common.RowIDFieldName, DataType: schemapb.DataType_Int64}, - {FieldID: common.TimeStampField, Name: common.TimeStampFieldName, DataType: schemapb.DataType_Int64}, - {FieldID: common.StartOfUserFieldID, DataType: schemapb.DataType_Int64, IsPrimaryKey: true, Name: "pk"}, - {FieldID: common.StartOfUserFieldID + 1, DataType: schemapb.DataType_FloatVector, TypeParams: []*commonpb.KeyValuePair{ - {Key: common.DimKey, Value: "128"}, - }}, - }, - } - flushedSegmentID := int64(100) - growingSegmentID := int64(101) - vchan := &datapb.VchannelInfo{ - CollectionID: 1, - ChannelName: dmChannelName, - UnflushedSegmentIds: []int64{}, - FlushedSegmentIds: []int64{}, - } - - err := s.node.flowgraphManager.AddandStartWithEtcdTickler(s.node, vchan, schema, genTestTickler()) - s.Require().NoError(err) - - fgservice, ok := s.node.flowgraphManager.GetFlowgraphService(dmChannelName) - s.Require().True(ok) - - metaCache := metacache.NewMockMetaCache(s.T()) - metaCache.EXPECT().Collection().Return(1).Maybe() - metaCache.EXPECT().Schema().Return(schema).Maybe() - s.node.writeBufferManager.Register(dmChannelName, metaCache, nil) - - fgservice.metacache.AddSegment(&datapb.SegmentInfo{ - ID: flushedSegmentID, - CollectionID: 1, - PartitionID: 2, - StartPosition: &msgpb.MsgPosition{}, - }, func(_ *datapb.SegmentInfo) *metacache.BloomFilterSet { return metacache.NewBloomFilterSet() }) - fgservice.metacache.AddSegment(&datapb.SegmentInfo{ - ID: growingSegmentID, - CollectionID: 1, - PartitionID: 2, - StartPosition: &msgpb.MsgPosition{}, - }, func(_ *datapb.SegmentInfo) *metacache.BloomFilterSet { return metacache.NewBloomFilterSet() }) s.Run("service_not_ready", func() { ctx, cancel := context.WithCancel(context.Background()) defer cancel() @@ -257,40 +226,7 @@ func (s *DataNodeServicesSuite) TestCompaction() { s.False(merr.Ok(resp)) }) - s.Run("channel_not_match", func() { - node := s.node - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - - req := &datapb.CompactionPlan{ - PlanID: 1000, - Channel: dmChannelName + "other", - } - - resp, err := node.Compaction(ctx, req) - s.NoError(err) - s.False(merr.Ok(resp)) - }) - - s.Run("channel_dropped", func() { - node := s.node - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - - node.compactionExecutor.dropped.Insert(dmChannelName) - defer node.compactionExecutor.dropped.Remove(dmChannelName) - - req := &datapb.CompactionPlan{ - PlanID: 1000, - Channel: dmChannelName, - } - - resp, err := node.Compaction(ctx, req) - s.NoError(err) - s.False(merr.Ok(resp)) - }) - - s.Run("compact_growing_segment", func() { + s.Run("unknown CompactionType", func() { node := s.node ctx, cancel := context.WithCancel(context.Background()) defer cancel() @@ -300,7 +236,7 @@ func (s *DataNodeServicesSuite) TestCompaction() { Channel: dmChannelName, SegmentBinlogs: []*datapb.CompactionSegmentBinlogs{ {SegmentID: 102, Level: datapb.SegmentLevel_L0}, - {SegmentID: growingSegmentID, Level: datapb.SegmentLevel_L1}, + {SegmentID: 103, Level: datapb.SegmentLevel_L1}, }, } @@ -347,6 +283,7 @@ func (s *DataNodeServicesSuite) TestFlushSegments() { ID: segmentID, CollectionID: 1, PartitionID: 2, + State: commonpb.SegmentState_Growing, StartPosition: &msgpb.MsgPosition{}, }, func(_ *datapb.SegmentInfo) *metacache.BloomFilterSet { return metacache.NewBloomFilterSet() }) @@ -494,126 +431,6 @@ func (s *DataNodeServicesSuite) TestGetMetrics() { zap.String("response", resp.Response)) } -func (s *DataNodeServicesSuite) TestSyncSegments() { - chanName := "fake-by-dev-rootcoord-dml-test-syncsegments-1" - schema := &schemapb.CollectionSchema{ - Name: "test_collection", - Fields: []*schemapb.FieldSchema{ - {FieldID: common.RowIDField, Name: common.RowIDFieldName, DataType: schemapb.DataType_Int64}, - {FieldID: common.TimeStampField, Name: common.TimeStampFieldName, DataType: schemapb.DataType_Int64}, - {FieldID: common.StartOfUserFieldID, DataType: schemapb.DataType_Int64, IsPrimaryKey: true, Name: "pk"}, - {FieldID: common.StartOfUserFieldID + 1, DataType: schemapb.DataType_FloatVector, TypeParams: []*commonpb.KeyValuePair{ - {Key: common.DimKey, Value: "128"}, - }}, - }, - } - - err := s.node.flowgraphManager.AddandStartWithEtcdTickler(s.node, &datapb.VchannelInfo{ - CollectionID: 1, - ChannelName: chanName, - UnflushedSegmentIds: []int64{}, - FlushedSegmentIds: []int64{100, 200, 300}, - }, schema, genTestTickler()) - s.Require().NoError(err) - fg, ok := s.node.flowgraphManager.GetFlowgraphService(chanName) - s.Assert().True(ok) - - fg.metacache.AddSegment(&datapb.SegmentInfo{ID: 100, CollectionID: 1, State: commonpb.SegmentState_Flushed}, EmptyBfsFactory) - fg.metacache.AddSegment(&datapb.SegmentInfo{ID: 101, CollectionID: 1, State: commonpb.SegmentState_Flushed}, EmptyBfsFactory) - fg.metacache.AddSegment(&datapb.SegmentInfo{ID: 200, CollectionID: 1, State: commonpb.SegmentState_Flushed}, EmptyBfsFactory) - fg.metacache.AddSegment(&datapb.SegmentInfo{ID: 201, CollectionID: 1, State: commonpb.SegmentState_Flushed}, EmptyBfsFactory) - fg.metacache.AddSegment(&datapb.SegmentInfo{ID: 300, CollectionID: 1, State: commonpb.SegmentState_Flushed}, EmptyBfsFactory) - - s.Run("empty compactedFrom", func() { - req := &datapb.SyncSegmentsRequest{ - CompactedTo: 400, - NumOfRows: 100, - } - - req.CompactedFrom = []UniqueID{} - status, err := s.node.SyncSegments(s.ctx, req) - s.Assert().NoError(err) - s.Assert().True(merr.Ok(status)) - }) - - s.Run("invalid compacted from", func() { - req := &datapb.SyncSegmentsRequest{ - CompactedTo: 400, - NumOfRows: 100, - CompactedFrom: []UniqueID{101, 201}, - } - - req.CompactedFrom = []UniqueID{101, 201} - status, err := s.node.SyncSegments(s.ctx, req) - s.Assert().NoError(err) - s.Assert().False(merr.Ok(status)) - }) - - s.Run("valid request numRows>0", func() { - req := &datapb.SyncSegmentsRequest{ - CompactedFrom: []UniqueID{100, 200, 101, 201}, - CompactedTo: 102, - NumOfRows: 100, - ChannelName: chanName, - CollectionId: 1, - } - status, err := s.node.SyncSegments(s.ctx, req) - s.Assert().NoError(err) - s.Assert().True(merr.Ok(status)) - - _, result := fg.metacache.GetSegmentByID(req.GetCompactedTo(), metacache.WithSegmentState(commonpb.SegmentState_Flushed)) - s.True(result) - for _, compactFrom := range req.GetCompactedFrom() { - seg, result := fg.metacache.GetSegmentByID(compactFrom, metacache.WithSegmentState(commonpb.SegmentState_Flushed)) - s.True(result) - s.Equal(req.CompactedTo, seg.CompactTo()) - } - - status, err = s.node.SyncSegments(s.ctx, req) - s.Assert().NoError(err) - s.Assert().True(merr.Ok(status)) - }) - - s.Run("without_channel_meta", func() { - fg.metacache.UpdateSegments(metacache.UpdateState(commonpb.SegmentState_Flushed), - metacache.WithSegmentIDs(100, 200, 300)) - - req := &datapb.SyncSegmentsRequest{ - CompactedFrom: []int64{100, 200}, - CompactedTo: 101, - NumOfRows: 0, - } - status, err := s.node.SyncSegments(s.ctx, req) - s.Assert().NoError(err) - s.Assert().False(merr.Ok(status)) - }) - - s.Run("valid_request_with_meta_num=0", func() { - fg.metacache.UpdateSegments(metacache.UpdateState(commonpb.SegmentState_Flushed), - metacache.WithSegmentIDs(100, 200, 300)) - - req := &datapb.SyncSegmentsRequest{ - CompactedFrom: []int64{100, 200}, - CompactedTo: 301, - NumOfRows: 0, - ChannelName: chanName, - CollectionId: 1, - } - status, err := s.node.SyncSegments(s.ctx, req) - s.Assert().NoError(err) - s.Assert().True(merr.Ok(status)) - - seg, result := fg.metacache.GetSegmentByID(100, metacache.WithSegmentState(commonpb.SegmentState_Flushed)) - s.True(result) - s.Equal(metacache.NullSegment, seg.CompactTo()) - seg, result = fg.metacache.GetSegmentByID(200, metacache.WithSegmentState(commonpb.SegmentState_Flushed)) - s.True(result) - s.Equal(metacache.NullSegment, seg.CompactTo()) - _, result = fg.metacache.GetSegmentByID(301, metacache.WithSegmentState(commonpb.SegmentState_Flushed)) - s.False(result) - }) -} - func (s *DataNodeServicesSuite) TestResendSegmentStats() { req := &datapb.ResendSegmentStatsRequest{ Base: &commonpb.MsgBase{}, @@ -676,3 +493,39 @@ func (s *DataNodeServicesSuite) TestQuerySlot() { s.NoError(merr.Error(resp.GetStatus())) }) } + +func (s *DataNodeServicesSuite) TestSyncSegments() { + s.Run("node not healthy", func() { + s.SetupTest() + s.node.UpdateStateCode(commonpb.StateCode_Abnormal) + + ctx := context.Background() + status, err := s.node.SyncSegments(ctx, nil) + s.NoError(err) + s.False(merr.Ok(status)) + s.ErrorIs(merr.Error(status), merr.ErrServiceNotReady) + }) + + s.Run("normal case", func() { + s.SetupTest() + ctx := context.Background() + req := &datapb.SyncSegmentsRequest{ + ChannelName: "channel1", + PartitionId: 2, + CollectionId: 1, + SegmentInfos: map[int64]*datapb.SyncSegmentInfo{ + 3: { + SegmentId: 3, + PkStatsLog: nil, + State: commonpb.SegmentState_Dropped, + Level: 2, + NumOfRows: 1024, + }, + }, + } + + status, err := s.node.SyncSegments(ctx, req) + s.NoError(err) + s.False(merr.Ok(status)) + }) +} diff --git a/internal/datanode/syncmgr/meta_writer.go b/internal/datanode/syncmgr/meta_writer.go index 0e82f6cfe66bc..9a6d864895277 100644 --- a/internal/datanode/syncmgr/meta_writer.go +++ b/internal/datanode/syncmgr/meta_writer.go @@ -7,6 +7,7 @@ import ( "github.com/samber/lo" "go.uber.org/zap" + "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" "github.com/milvus-io/milvus/internal/datanode/broker" "github.com/milvus-io/milvus/internal/datanode/metacache" "github.com/milvus-io/milvus/internal/proto/datapb" @@ -60,7 +61,8 @@ func (b *brokerMetaWriter) UpdateSync(pack *SyncTask) error { Position: pack.checkpoint, }) - startPos := lo.Map(pack.metacache.GetSegmentsBy(metacache.WithStartPosNotRecorded()), func(info *metacache.SegmentInfo, _ int) *datapb.SegmentStartPosition { + startPos := lo.Map(pack.metacache.GetSegmentsBy(metacache.WithSegmentState(commonpb.SegmentState_Growing, commonpb.SegmentState_Sealed, commonpb.SegmentState_Flushing), + metacache.WithStartPosNotRecorded()), func(info *metacache.SegmentInfo, _ int) *datapb.SegmentStartPosition { return &datapb.SegmentStartPosition{ SegmentID: info.SegmentID(), StartPosition: info.StartPosition(), @@ -150,7 +152,8 @@ func (b *brokerMetaWriter) UpdateSyncV2(pack *SyncTaskV2) error { Position: pack.checkpoint, }) - startPos := lo.Map(pack.metacache.GetSegmentsBy(metacache.WithStartPosNotRecorded()), func(info *metacache.SegmentInfo, _ int) *datapb.SegmentStartPosition { + startPos := lo.Map(pack.metacache.GetSegmentsBy(metacache.WithSegmentState(commonpb.SegmentState_Growing, commonpb.SegmentState_Flushing), + metacache.WithStartPosNotRecorded()), func(info *metacache.SegmentInfo, _ int) *datapb.SegmentStartPosition { return &datapb.SegmentStartPosition{ SegmentID: info.SegmentID(), StartPosition: info.StartPosition(), diff --git a/internal/datanode/syncmgr/meta_writer_test.go b/internal/datanode/syncmgr/meta_writer_test.go index fc1d921b70160..ef5c4e83d825d 100644 --- a/internal/datanode/syncmgr/meta_writer_test.go +++ b/internal/datanode/syncmgr/meta_writer_test.go @@ -39,7 +39,7 @@ func (s *MetaWriterSuite) TestNormalSave() { bfs := metacache.NewBloomFilterSet() seg := metacache.NewSegmentInfo(&datapb.SegmentInfo{}, bfs) metacache.UpdateNumOfRows(1000)(seg) - s.metacache.EXPECT().GetSegmentsBy(mock.Anything).Return([]*metacache.SegmentInfo{seg}) + s.metacache.EXPECT().GetSegmentsBy(mock.Anything, mock.Anything).Return([]*metacache.SegmentInfo{seg}) s.metacache.EXPECT().GetSegmentByID(mock.Anything).Return(seg, true) s.metacache.EXPECT().UpdateSegments(mock.Anything, mock.Anything).Return() task := NewSyncTask() @@ -55,7 +55,7 @@ func (s *MetaWriterSuite) TestReturnError() { seg := metacache.NewSegmentInfo(&datapb.SegmentInfo{}, bfs) metacache.UpdateNumOfRows(1000)(seg) s.metacache.EXPECT().GetSegmentByID(mock.Anything).Return(seg, true) - s.metacache.EXPECT().GetSegmentsBy(mock.Anything).Return([]*metacache.SegmentInfo{seg}) + s.metacache.EXPECT().GetSegmentsBy(mock.Anything, mock.Anything).Return([]*metacache.SegmentInfo{seg}) task := NewSyncTask() task.WithMetaCache(s.metacache) err := s.writer.UpdateSync(task) @@ -69,7 +69,7 @@ func (s *MetaWriterSuite) TestNormalSaveV2() { seg := metacache.NewSegmentInfo(&datapb.SegmentInfo{}, bfs) metacache.UpdateNumOfRows(1000)(seg) s.metacache.EXPECT().GetSegmentByID(mock.Anything).Return(seg, true) - s.metacache.EXPECT().GetSegmentsBy(mock.Anything).Return([]*metacache.SegmentInfo{seg}) + s.metacache.EXPECT().GetSegmentsBy(mock.Anything, mock.Anything).Return([]*metacache.SegmentInfo{seg}) task := NewSyncTaskV2() task.WithMetaCache(s.metacache) err := s.writer.UpdateSyncV2(task) @@ -83,7 +83,7 @@ func (s *MetaWriterSuite) TestReturnErrorV2() { seg := metacache.NewSegmentInfo(&datapb.SegmentInfo{}, bfs) metacache.UpdateNumOfRows(1000)(seg) s.metacache.EXPECT().GetSegmentByID(mock.Anything).Return(seg, true) - s.metacache.EXPECT().GetSegmentsBy(mock.Anything).Return([]*metacache.SegmentInfo{seg}) + s.metacache.EXPECT().GetSegmentsBy(mock.Anything, mock.Anything).Return([]*metacache.SegmentInfo{seg}) task := NewSyncTaskV2() task.WithMetaCache(s.metacache) err := s.writer.UpdateSyncV2(task) diff --git a/internal/datanode/syncmgr/mock_sync_manager.go b/internal/datanode/syncmgr/mock_sync_manager.go index 34c69ac6b011d..ee19d324d3943 100644 --- a/internal/datanode/syncmgr/mock_sync_manager.go +++ b/internal/datanode/syncmgr/mock_sync_manager.go @@ -25,39 +25,6 @@ func (_m *MockSyncManager) EXPECT() *MockSyncManager_Expecter { return &MockSyncManager_Expecter{mock: &_m.Mock} } -// Block provides a mock function with given fields: segmentID -func (_m *MockSyncManager) Block(segmentID int64) { - _m.Called(segmentID) -} - -// MockSyncManager_Block_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'Block' -type MockSyncManager_Block_Call struct { - *mock.Call -} - -// Block is a helper method to define mock.On call -// - segmentID int64 -func (_e *MockSyncManager_Expecter) Block(segmentID interface{}) *MockSyncManager_Block_Call { - return &MockSyncManager_Block_Call{Call: _e.mock.On("Block", segmentID)} -} - -func (_c *MockSyncManager_Block_Call) Run(run func(segmentID int64)) *MockSyncManager_Block_Call { - _c.Call.Run(func(args mock.Arguments) { - run(args[0].(int64)) - }) - return _c -} - -func (_c *MockSyncManager_Block_Call) Return() *MockSyncManager_Block_Call { - _c.Call.Return() - return _c -} - -func (_c *MockSyncManager_Block_Call) RunAndReturn(run func(int64)) *MockSyncManager_Block_Call { - _c.Call.Return(run) - return _c -} - // GetEarliestPosition provides a mock function with given fields: channel func (_m *MockSyncManager) GetEarliestPosition(channel string) (int64, *msgpb.MsgPosition) { ret := _m.Called(channel) @@ -157,39 +124,6 @@ func (_c *MockSyncManager_SyncData_Call) RunAndReturn(run func(context.Context, return _c } -// Unblock provides a mock function with given fields: segmentID -func (_m *MockSyncManager) Unblock(segmentID int64) { - _m.Called(segmentID) -} - -// MockSyncManager_Unblock_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'Unblock' -type MockSyncManager_Unblock_Call struct { - *mock.Call -} - -// Unblock is a helper method to define mock.On call -// - segmentID int64 -func (_e *MockSyncManager_Expecter) Unblock(segmentID interface{}) *MockSyncManager_Unblock_Call { - return &MockSyncManager_Unblock_Call{Call: _e.mock.On("Unblock", segmentID)} -} - -func (_c *MockSyncManager_Unblock_Call) Run(run func(segmentID int64)) *MockSyncManager_Unblock_Call { - _c.Call.Run(func(args mock.Arguments) { - run(args[0].(int64)) - }) - return _c -} - -func (_c *MockSyncManager_Unblock_Call) Return() *MockSyncManager_Unblock_Call { - _c.Call.Return() - return _c -} - -func (_c *MockSyncManager_Unblock_Call) RunAndReturn(run func(int64)) *MockSyncManager_Unblock_Call { - _c.Call.Return(run) - return _c -} - // NewMockSyncManager creates a new instance of MockSyncManager. It also registers a testing interface on the mock and a cleanup function to assert the mocks expectations. // The first argument is typically a *testing.T value. func NewMockSyncManager(t interface { diff --git a/internal/datanode/syncmgr/storage_serializer.go b/internal/datanode/syncmgr/storage_serializer.go index 784f349940858..35c0789adf8e5 100644 --- a/internal/datanode/syncmgr/storage_serializer.go +++ b/internal/datanode/syncmgr/storage_serializer.go @@ -205,6 +205,7 @@ func (s *storageV1Serializer) serializeMergedPkStats(pack *SyncPack) (*storage.B FieldID: s.pkField.GetFieldID(), MaxPk: pks.MaxPK, MinPk: pks.MinPK, + BFType: pks.PkFilter.Type(), BF: pks.PkFilter, PkType: int64(s.pkField.GetDataType()), } diff --git a/internal/datanode/syncmgr/sync_manager.go b/internal/datanode/syncmgr/sync_manager.go index 1f4534c52f447..6a564c01345a4 100644 --- a/internal/datanode/syncmgr/sync_manager.go +++ b/internal/datanode/syncmgr/sync_manager.go @@ -40,19 +40,15 @@ type SyncMeta struct { metacache metacache.MetaCache } -// SyncMangger is the interface for sync manager. +// SyncManager is the interface for sync manager. // it processes the sync tasks inside and changes the meta. +// +//go:generate mockery --name=SyncManager --structname=MockSyncManager --output=./ --filename=mock_sync_manager.go --with-expecter --inpackage type SyncManager interface { // SyncData is the method to submit sync task. SyncData(ctx context.Context, task Task) *conc.Future[struct{}] // GetEarliestPosition returns the earliest position (normally start position) of the processing sync task of provided channel. GetEarliestPosition(channel string) (int64, *msgpb.MsgPosition) - // Block allows caller to block tasks of provided segment id. - // normally used by compaction task. - // if levelzero delta policy is enabled, this shall be an empty operation. - Block(segmentID int64) - // Unblock is the reverse method for `Block`. - Unblock(segmentID int64) } type syncManager struct { @@ -121,7 +117,6 @@ func (mgr *syncManager) SyncData(ctx context.Context, task Task) *conc.Future[st func (mgr *syncManager) safeSubmitTask(task Task) *conc.Future[struct{}] { taskKey := fmt.Sprintf("%d-%d", task.SegmentID(), task.Checkpoint().GetTimestamp()) mgr.tasks.Insert(taskKey, task) - defer mgr.tasks.Remove(taskKey) key, err := task.CalcTargetSegment() if err != nil { @@ -133,6 +128,7 @@ func (mgr *syncManager) safeSubmitTask(task Task) *conc.Future[struct{}] { } func (mgr *syncManager) submit(key int64, task Task) *conc.Future[struct{}] { + taskKey := fmt.Sprintf("%d-%d", task.SegmentID(), task.Checkpoint().GetTimestamp()) handler := func(err error) error { if err == nil { return nil @@ -161,7 +157,10 @@ func (mgr *syncManager) submit(key int64, task Task) *conc.Future[struct{}] { return mgr.submit(targetID, task).Err() } log.Info("sync mgr sumbit task with key", zap.Int64("key", key)) - return mgr.Submit(key, task, handler) + return mgr.Submit(key, task, handler, func(err error) error { + mgr.tasks.Remove(taskKey) + return err + }) } func (mgr *syncManager) GetEarliestPosition(channel string) (int64, *msgpb.MsgPosition) { @@ -181,11 +180,3 @@ func (mgr *syncManager) GetEarliestPosition(channel string) (int64, *msgpb.MsgPo }) return segmentID, cp } - -func (mgr *syncManager) Block(segmentID int64) { - mgr.keyLock.Lock(segmentID) -} - -func (mgr *syncManager) Unblock(segmentID int64) { - mgr.keyLock.Unlock(segmentID) -} diff --git a/internal/datanode/syncmgr/sync_manager_test.go b/internal/datanode/syncmgr/sync_manager_test.go index 6f12a98df4d15..c1ac3000505b9 100644 --- a/internal/datanode/syncmgr/sync_manager_test.go +++ b/internal/datanode/syncmgr/sync_manager_test.go @@ -155,7 +155,7 @@ func (s *SyncManagerSuite) TestSubmit() { seg := metacache.NewSegmentInfo(&datapb.SegmentInfo{}, bfs) metacache.UpdateNumOfRows(1000)(seg) s.metacache.EXPECT().GetSegmentByID(s.segmentID).Return(seg, true) - s.metacache.EXPECT().GetSegmentsBy(mock.Anything).Return([]*metacache.SegmentInfo{seg}) + s.metacache.EXPECT().GetSegmentsBy(mock.Anything, mock.Anything).Return([]*metacache.SegmentInfo{seg}) s.metacache.EXPECT().UpdateSegments(mock.Anything, mock.Anything).Return() manager, err := NewSyncManager(s.chunkManager, s.allocator) @@ -186,7 +186,7 @@ func (s *SyncManagerSuite) TestCompacted() { metacache.UpdateNumOfRows(1000)(seg) metacache.CompactTo(1001)(seg) s.metacache.EXPECT().GetSegmentByID(s.segmentID).Return(seg, true) - s.metacache.EXPECT().GetSegmentsBy(mock.Anything).Return([]*metacache.SegmentInfo{seg}) + s.metacache.EXPECT().GetSegmentsBy(mock.Anything, mock.Anything).Return([]*metacache.SegmentInfo{seg}) s.metacache.EXPECT().UpdateSegments(mock.Anything, mock.Anything).Return() manager, err := NewSyncManager(s.chunkManager, s.allocator) @@ -208,52 +208,6 @@ func (s *SyncManagerSuite) TestCompacted() { s.EqualValues(1001, segmentID.Load()) } -func (s *SyncManagerSuite) TestBlock() { - sig := make(chan struct{}) - counter := atomic.NewInt32(0) - s.broker.EXPECT().SaveBinlogPaths(mock.Anything, mock.Anything).Return(nil) - bfs := metacache.NewBloomFilterSet() - seg := metacache.NewSegmentInfo(&datapb.SegmentInfo{ - ID: s.segmentID, - }, bfs) - metacache.UpdateNumOfRows(1000)(seg) - s.metacache.EXPECT().GetSegmentByID(s.segmentID).Return(seg, true) - s.metacache.EXPECT().GetSegmentsBy(mock.Anything). - RunAndReturn(func(...metacache.SegmentFilter) []*metacache.SegmentInfo { - return []*metacache.SegmentInfo{seg} - }) - s.metacache.EXPECT().UpdateSegments(mock.Anything, mock.Anything).Run(func(_ metacache.SegmentAction, filters ...metacache.SegmentFilter) { - if counter.Inc() == 2 { - close(sig) - } - }) - - manager, err := NewSyncManager(s.chunkManager, s.allocator) - s.NoError(err) - - // block - manager.Block(s.segmentID) - - task := s.getSuiteSyncTask() - task.WithMetaWriter(BrokerMetaWriter(s.broker, 1)) - task.WithTimeRange(50, 100) - task.WithCheckpoint(&msgpb.MsgPosition{ - ChannelName: s.channelName, - MsgID: []byte{1, 2, 3, 4}, - Timestamp: 100, - }) - go manager.SyncData(context.Background(), task) - - select { - case <-sig: - s.FailNow("sync task done during block") - case <-time.After(time.Second): - } - - manager.Unblock(s.segmentID) - <-sig -} - func (s *SyncManagerSuite) TestResizePool() { manager, err := NewSyncManager(s.chunkManager, s.allocator) s.NoError(err) diff --git a/internal/datanode/syncmgr/task_test.go b/internal/datanode/syncmgr/task_test.go index d03aa278d7601..62ef40ce5666b 100644 --- a/internal/datanode/syncmgr/task_test.go +++ b/internal/datanode/syncmgr/task_test.go @@ -185,7 +185,7 @@ func (s *SyncTaskSuite) TestRunNormal() { metacache.UpdateNumOfRows(1000)(seg) seg.GetBloomFilterSet().Roll() s.metacache.EXPECT().GetSegmentByID(s.segmentID).Return(seg, true) - s.metacache.EXPECT().GetSegmentsBy(mock.Anything).Return([]*metacache.SegmentInfo{seg}) + s.metacache.EXPECT().GetSegmentsBy(mock.Anything, mock.Anything).Return([]*metacache.SegmentInfo{seg}) s.metacache.EXPECT().UpdateSegments(mock.Anything, mock.Anything).Return() s.Run("without_data", func() { @@ -268,7 +268,7 @@ func (s *SyncTaskSuite) TestRunL0Segment() { bfs := metacache.NewBloomFilterSet() seg := metacache.NewSegmentInfo(&datapb.SegmentInfo{Level: datapb.SegmentLevel_L0}, bfs) s.metacache.EXPECT().GetSegmentByID(s.segmentID).Return(seg, true) - s.metacache.EXPECT().GetSegmentsBy(mock.Anything).Return([]*metacache.SegmentInfo{seg}) + s.metacache.EXPECT().GetSegmentsBy(mock.Anything, mock.Anything).Return([]*metacache.SegmentInfo{seg}) s.metacache.EXPECT().UpdateSegments(mock.Anything, mock.Anything).Return() s.Run("pure_delete_l0_flush", func() { @@ -362,7 +362,7 @@ func (s *SyncTaskSuite) TestRunError() { seg := metacache.NewSegmentInfo(&datapb.SegmentInfo{}, metacache.NewBloomFilterSet()) metacache.UpdateNumOfRows(1000)(seg) s.metacache.EXPECT().GetSegmentByID(s.segmentID).Return(seg, true) - s.metacache.EXPECT().GetSegmentsBy(mock.Anything).Return([]*metacache.SegmentInfo{seg}) + s.metacache.EXPECT().GetSegmentsBy(mock.Anything, mock.Anything).Return([]*metacache.SegmentInfo{seg}) s.Run("allocate_id_fail", func() { mockAllocator := allocator.NewMockAllocator(s.T()) diff --git a/internal/datanode/syncmgr/taskv2_test.go b/internal/datanode/syncmgr/taskv2_test.go index 9367689ed1a28..bb8b36619129c 100644 --- a/internal/datanode/syncmgr/taskv2_test.go +++ b/internal/datanode/syncmgr/taskv2_test.go @@ -216,7 +216,7 @@ func (s *SyncTaskSuiteV2) TestRunNormal() { seg := metacache.NewSegmentInfo(&datapb.SegmentInfo{}, bfs) metacache.UpdateNumOfRows(1000)(seg) s.metacache.EXPECT().GetSegmentByID(mock.Anything).Return(seg, true) - s.metacache.EXPECT().GetSegmentsBy(mock.Anything).Return([]*metacache.SegmentInfo{seg}) + s.metacache.EXPECT().GetSegmentsBy(mock.Anything, mock.Anything).Return([]*metacache.SegmentInfo{seg}) s.metacache.EXPECT().UpdateSegments(mock.Anything, mock.Anything).Return() s.Run("without_insert_delete", func() { diff --git a/internal/datanode/timetick_sender.go b/internal/datanode/timetick_sender.go index 145e60aec8cb8..ecce410c05501 100644 --- a/internal/datanode/timetick_sender.go +++ b/internal/datanode/timetick_sender.go @@ -148,7 +148,6 @@ func (m *timeTickSender) cleanStatesCache(lastSentTss map[string]uint64) { m.mu.Lock() defer m.mu.Unlock() sizeBeforeClean := len(m.statsCache) - log := log.With(zap.Any("lastSentTss", lastSentTss), zap.Int("sizeBeforeClean", sizeBeforeClean)) for channelName, lastSentTs := range lastSentTss { _, ok := m.statsCache[channelName] if ok { @@ -162,7 +161,7 @@ func (m *timeTickSender) cleanStatesCache(lastSentTss map[string]uint64) { delete(m.statsCache, channelName) } } - log.RatedDebug(30, "timeTickSender stats", zap.Int("sizeAfterClean", len(m.statsCache))) + log.RatedDebug(30, "timeTickSender stats", zap.Any("lastSentTss", lastSentTss), zap.Int("sizeBeforeClean", sizeBeforeClean), zap.Int("sizeAfterClean", len(m.statsCache))) } func (m *timeTickSender) sendReport(ctx context.Context) error { diff --git a/internal/datanode/writebuffer/bf_write_buffer.go b/internal/datanode/writebuffer/bf_write_buffer.go index 0438396879eb9..322c4d56920d2 100644 --- a/internal/datanode/writebuffer/bf_write_buffer.go +++ b/internal/datanode/writebuffer/bf_write_buffer.go @@ -35,7 +35,7 @@ func (wb *bfWriteBuffer) dispatchDeleteMsgs(groups []*inData, deleteMsgs []*msgs // distribute delete msg for previous data for _, delMsg := range deleteMsgs { pks := storage.ParseIDs2PrimaryKeys(delMsg.GetPrimaryKeys()) - lcs := lo.Map(pks, func(pk storage.PrimaryKey, _ int) storage.LocationsCache { return storage.NewLocationsCache(pk) }) + lcs := lo.Map(pks, func(pk storage.PrimaryKey, _ int) *storage.LocationsCache { return storage.NewLocationsCache(pk) }) segments := wb.metaCache.GetSegmentsBy(metacache.WithPartitionID(delMsg.PartitionID), metacache.WithSegmentState(commonpb.SegmentState_Growing, commonpb.SegmentState_Flushing, commonpb.SegmentState_Flushed)) for _, segment := range segments { @@ -98,7 +98,8 @@ func (wb *bfWriteBuffer) BufferData(insertMsgs []*msgstream.InsertMsg, deleteMsg // update pk oracle for _, inData := range groups { // segment shall always exists after buffer insert - segments := wb.metaCache.GetSegmentsBy(metacache.WithSegmentIDs(inData.segmentID)) + segments := wb.metaCache.GetSegmentsBy( + metacache.WithSegmentIDs(inData.segmentID)) for _, segment := range segments { for _, fieldData := range inData.pkField { err := segment.GetBloomFilterSet().UpdatePKRange(fieldData) diff --git a/internal/datanode/writebuffer/bf_write_buffer_test.go b/internal/datanode/writebuffer/bf_write_buffer_test.go index d1881c034be1a..c7c80fa26e42a 100644 --- a/internal/datanode/writebuffer/bf_write_buffer_test.go +++ b/internal/datanode/writebuffer/bf_write_buffer_test.go @@ -218,7 +218,7 @@ func (s *BFWriteBufferSuite) TestBufferData() { s.metacacheInt64.EXPECT().GetSegmentByID(int64(1000)).Return(nil, false) s.metacacheInt64.EXPECT().AddSegment(mock.Anything, mock.Anything, mock.Anything).Return() s.metacacheInt64.EXPECT().UpdateSegments(mock.Anything, mock.Anything).Return() - s.metacacheInt64.EXPECT().GetSegmentIDsBy(mock.Anything, mock.Anything).Return([]int64{}) + s.metacacheInt64.EXPECT().GetSegmentIDsBy(mock.Anything, mock.Anything, mock.Anything).Return([]int64{}) pks, msg := s.composeInsertMsg(1000, 10, 128, schemapb.DataType_Int64) delMsg := s.composeDeleteMsg(lo.Map(pks, func(id int64, _ int) storage.PrimaryKey { return storage.NewInt64PrimaryKey(id) })) @@ -248,7 +248,7 @@ func (s *BFWriteBufferSuite) TestBufferData() { s.metacacheVarchar.EXPECT().GetSegmentByID(int64(1000)).Return(nil, false) s.metacacheVarchar.EXPECT().AddSegment(mock.Anything, mock.Anything, mock.Anything).Return() s.metacacheVarchar.EXPECT().UpdateSegments(mock.Anything, mock.Anything).Return() - s.metacacheVarchar.EXPECT().GetSegmentIDsBy(mock.Anything, mock.Anything).Return([]int64{}) + s.metacacheVarchar.EXPECT().GetSegmentIDsBy(mock.Anything, mock.Anything, mock.Anything).Return([]int64{}) pks, msg := s.composeInsertMsg(1000, 10, 128, schemapb.DataType_VarChar) delMsg := s.composeDeleteMsg(lo.Map(pks, func(id int64, _ int) storage.PrimaryKey { return storage.NewVarCharPrimaryKey(fmt.Sprintf("%v", id)) })) @@ -273,7 +273,7 @@ func (s *BFWriteBufferSuite) TestBufferData() { s.metacacheInt64.EXPECT().GetSegmentByID(int64(1000)).Return(nil, false) s.metacacheInt64.EXPECT().AddSegment(mock.Anything, mock.Anything, mock.Anything).Return() s.metacacheInt64.EXPECT().UpdateSegments(mock.Anything, mock.Anything).Return() - s.metacacheInt64.EXPECT().GetSegmentIDsBy(mock.Anything, mock.Anything).Return([]int64{}) + s.metacacheInt64.EXPECT().GetSegmentIDsBy(mock.Anything, mock.Anything, mock.Anything).Return([]int64{}) pks, msg := s.composeInsertMsg(1000, 10, 128, schemapb.DataType_VarChar) delMsg := s.composeDeleteMsg(lo.Map(pks, func(id int64, _ int) storage.PrimaryKey { return storage.NewInt64PrimaryKey(id) })) @@ -294,7 +294,7 @@ func (s *BFWriteBufferSuite) TestBufferData() { s.metacacheVarchar.EXPECT().GetSegmentByID(int64(1000)).Return(nil, false) s.metacacheVarchar.EXPECT().AddSegment(mock.Anything, mock.Anything, mock.Anything).Return() s.metacacheVarchar.EXPECT().UpdateSegments(mock.Anything, mock.Anything).Return() - s.metacacheVarchar.EXPECT().GetSegmentIDsBy(mock.Anything, mock.Anything).Return([]int64{}) + s.metacacheVarchar.EXPECT().GetSegmentIDsBy(mock.Anything, mock.Anything, mock.Anything).Return([]int64{}) pks, msg := s.composeInsertMsg(1000, 10, 128, schemapb.DataType_Int64) delMsg := s.composeDeleteMsg(lo.Map(pks, func(id int64, _ int) storage.PrimaryKey { return storage.NewInt64PrimaryKey(id) })) @@ -325,7 +325,7 @@ func (s *BFWriteBufferSuite) TestAutoSync() { s.metacacheInt64.EXPECT().GetSegmentByID(int64(1000)).Return(seg, true).Once() s.metacacheInt64.EXPECT().GetSegmentByID(int64(1002)).Return(seg1, true) s.metacacheInt64.EXPECT().GetSegmentIDsBy(mock.Anything).Return([]int64{1002}) - s.metacacheInt64.EXPECT().GetSegmentIDsBy(mock.Anything, mock.Anything).Return([]int64{}) + s.metacacheInt64.EXPECT().GetSegmentIDsBy(mock.Anything, mock.Anything, mock.Anything).Return([]int64{}) s.metacacheInt64.EXPECT().AddSegment(mock.Anything, mock.Anything, mock.Anything).Return() s.metacacheInt64.EXPECT().UpdateSegments(mock.Anything, mock.Anything).Return() s.metacacheInt64.EXPECT().UpdateSegments(mock.Anything, mock.Anything, mock.Anything).Return() @@ -363,7 +363,7 @@ func (s *BFWriteBufferSuite) TestBufferDataWithStorageV2() { seg := metacache.NewSegmentInfo(&datapb.SegmentInfo{ID: 1000}, metacache.NewBloomFilterSet()) s.metacacheInt64.EXPECT().GetSegmentsBy(mock.Anything, mock.Anything).Return([]*metacache.SegmentInfo{seg}) s.metacacheInt64.EXPECT().GetSegmentByID(int64(1000)).Return(nil, false) - s.metacacheInt64.EXPECT().GetSegmentIDsBy(mock.Anything, mock.Anything).Return([]int64{}) + s.metacacheInt64.EXPECT().GetSegmentIDsBy(mock.Anything, mock.Anything, mock.Anything).Return([]int64{}) s.metacacheInt64.EXPECT().AddSegment(mock.Anything, mock.Anything, mock.Anything).Return() s.metacacheInt64.EXPECT().UpdateSegments(mock.Anything, mock.Anything).Return() @@ -409,7 +409,7 @@ func (s *BFWriteBufferSuite) TestAutoSyncWithStorageV2() { s.metacacheInt64.EXPECT().GetSegmentByID(int64(1000)).Return(seg, true).Once() s.metacacheInt64.EXPECT().GetSegmentByID(int64(1002)).Return(seg1, true) s.metacacheInt64.EXPECT().GetSegmentIDsBy(mock.Anything).Return([]int64{1002}) - s.metacacheInt64.EXPECT().GetSegmentIDsBy(mock.Anything, mock.Anything).Return([]int64{1003}) // mocked compacted + s.metacacheInt64.EXPECT().GetSegmentIDsBy(mock.Anything, mock.Anything, mock.Anything).Return([]int64{1003}) // mocked compacted s.metacacheInt64.EXPECT().RemoveSegments(mock.Anything).Return([]int64{1003}) s.metacacheInt64.EXPECT().AddSegment(mock.Anything, mock.Anything, mock.Anything).Return() s.metacacheInt64.EXPECT().UpdateSegments(mock.Anything, mock.Anything).Return() diff --git a/internal/datanode/writebuffer/insert_buffer.go b/internal/datanode/writebuffer/insert_buffer.go index adc052d0013b2..417c258b34b44 100644 --- a/internal/datanode/writebuffer/insert_buffer.go +++ b/internal/datanode/writebuffer/insert_buffer.go @@ -67,10 +67,7 @@ func (b *BufferBase) MinTimestamp() typeutil.Timestamp { } func (b *BufferBase) GetTimeRange() *TimeRange { - return &TimeRange{ - timestampMin: b.TimestampFrom, - timestampMax: b.TimestampTo, - } + return NewTimeRange(b.TimestampFrom, b.TimestampTo) } type InsertBuffer struct { @@ -117,16 +114,16 @@ func (ib *InsertBuffer) Yield() *storage.InsertData { } func (ib *InsertBuffer) Buffer(inData *inData, startPos, endPos *msgpb.MsgPosition) int64 { - totalMemSize := int64(0) + bufferedSize := int64(0) for idx, data := range inData.data { storage.MergeInsertData(ib.buffer, data) tsData := inData.tsField[idx] // update buffer size ib.UpdateStatistics(int64(data.GetRowNum()), int64(data.GetMemorySize()), ib.getTimestampRange(tsData), startPos, endPos) - totalMemSize += int64(data.GetMemorySize()) + bufferedSize += int64(data.GetMemorySize()) } - return totalMemSize + return bufferedSize } func (ib *InsertBuffer) getTimestampRange(tsData *storage.Int64FieldData) TimeRange { diff --git a/internal/datanode/writebuffer/l0_write_buffer.go b/internal/datanode/writebuffer/l0_write_buffer.go index ebb1f9184aaec..019994406f779 100644 --- a/internal/datanode/writebuffer/l0_write_buffer.go +++ b/internal/datanode/writebuffer/l0_write_buffer.go @@ -14,7 +14,6 @@ import ( "github.com/milvus-io/milvus/internal/proto/datapb" "github.com/milvus-io/milvus/internal/storage" "github.com/milvus-io/milvus/pkg/common" - "github.com/milvus-io/milvus/pkg/log" "github.com/milvus-io/milvus/pkg/mq/msgstream" "github.com/milvus-io/milvus/pkg/util/merr" "github.com/milvus-io/milvus/pkg/util/retry" @@ -52,7 +51,7 @@ func (wb *l0WriteBuffer) dispatchDeleteMsgs(groups []*inData, deleteMsgs []*msgs for _, delMsg := range deleteMsgs { l0SegmentID := wb.getL0SegmentID(delMsg.GetPartitionID(), startPos) pks := storage.ParseIDs2PrimaryKeys(delMsg.GetPrimaryKeys()) - lcs := lo.Map(pks, func(pk storage.PrimaryKey, _ int) storage.LocationsCache { return storage.NewLocationsCache(pk) }) + lcs := lo.Map(pks, func(pk storage.PrimaryKey, _ int) *storage.LocationsCache { return storage.NewLocationsCache(pk) }) segments := wb.metaCache.GetSegmentsBy(metacache.WithPartitionID(delMsg.PartitionID), metacache.WithSegmentState(commonpb.SegmentState_Growing, commonpb.SegmentState_Flushing, commonpb.SegmentState_Flushed)) for _, segment := range segments { @@ -143,6 +142,7 @@ func (wb *l0WriteBuffer) BufferData(insertMsgs []*msgstream.InsertMsg, deleteMsg } func (wb *l0WriteBuffer) getL0SegmentID(partitionID int64, startPos *msgpb.MsgPosition) int64 { + log := wb.logger segmentID, ok := wb.l0Segments[partitionID] if !ok { err := retry.Do(context.Background(), func() error { @@ -168,7 +168,6 @@ func (wb *l0WriteBuffer) getL0SegmentID(partitionID int64, startPos *msgpb.MsgPo log.Info("Add a new level zero segment", zap.Int64("segmentID", segmentID), zap.String("level", datapb.SegmentLevel_L0.String()), - zap.String("channel", wb.channelName), zap.Any("start position", startPos), ) } diff --git a/internal/datanode/writebuffer/l0_write_buffer_test.go b/internal/datanode/writebuffer/l0_write_buffer_test.go index a7a1cf5261d17..29b231dc2c25e 100644 --- a/internal/datanode/writebuffer/l0_write_buffer_test.go +++ b/internal/datanode/writebuffer/l0_write_buffer_test.go @@ -186,7 +186,7 @@ func (s *L0WriteBufferSuite) TestBufferData() { s.metacache.EXPECT().GetSegmentByID(int64(1000)).Return(nil, false).Once() s.metacache.EXPECT().AddSegment(mock.Anything, mock.Anything, mock.Anything).Return() s.metacache.EXPECT().UpdateSegments(mock.Anything, mock.Anything).Return() - s.metacache.EXPECT().GetSegmentIDsBy(mock.Anything, mock.Anything).Return([]int64{}) + s.metacache.EXPECT().GetSegmentIDsBy(mock.Anything, mock.Anything, mock.Anything).Return([]int64{}) metrics.DataNodeFlowGraphBufferDataSize.Reset() err = wb.BufferData([]*msgstream.InsertMsg{msg}, []*msgstream.DeleteMsg{delMsg}, &msgpb.MsgPosition{Timestamp: 100}, &msgpb.MsgPosition{Timestamp: 200}) @@ -215,7 +215,7 @@ func (s *L0WriteBufferSuite) TestBufferData() { s.metacache.EXPECT().GetSegmentsBy(mock.Anything, mock.Anything).Return([]*metacache.SegmentInfo{seg}) s.metacache.EXPECT().AddSegment(mock.Anything, mock.Anything, mock.Anything).Return() s.metacache.EXPECT().UpdateSegments(mock.Anything, mock.Anything).Return() - s.metacache.EXPECT().GetSegmentIDsBy(mock.Anything, mock.Anything).Return([]int64{}) + s.metacache.EXPECT().GetSegmentIDsBy(mock.Anything, mock.Anything, mock.Anything).Return([]int64{}) metrics.DataNodeFlowGraphBufferDataSize.Reset() err = wb.BufferData([]*msgstream.InsertMsg{msg}, []*msgstream.DeleteMsg{delMsg}, &msgpb.MsgPosition{Timestamp: 100}, &msgpb.MsgPosition{Timestamp: 200}) diff --git a/internal/datanode/writebuffer/segment_buffer.go b/internal/datanode/writebuffer/segment_buffer.go index 8e14c3f4f869d..58ec2b4afda61 100644 --- a/internal/datanode/writebuffer/segment_buffer.go +++ b/internal/datanode/writebuffer/segment_buffer.go @@ -76,6 +76,21 @@ type TimeRange struct { timestampMax typeutil.Timestamp } +func NewTimeRange(min, max typeutil.Timestamp) *TimeRange { + return &TimeRange{ + timestampMin: min, + timestampMax: max, + } +} + +func (tr *TimeRange) GetMinTimestamp() typeutil.Timestamp { + return tr.timestampMin +} + +func (tr *TimeRange) GetMaxTimestamp() typeutil.Timestamp { + return tr.timestampMax +} + func (tr *TimeRange) Merge(other *TimeRange) { if other.timestampMin < tr.timestampMin { tr.timestampMin = other.timestampMin diff --git a/internal/datanode/writebuffer/write_buffer.go b/internal/datanode/writebuffer/write_buffer.go index 6c64060ccac8e..cdc1abf4a2deb 100644 --- a/internal/datanode/writebuffer/write_buffer.go +++ b/internal/datanode/writebuffer/write_buffer.go @@ -5,7 +5,6 @@ import ( "fmt" "sync" - "github.com/bits-and-blooms/bloom/v3" "github.com/cockroachdb/errors" "github.com/samber/lo" "go.uber.org/atomic" @@ -83,6 +82,8 @@ type writeBufferBase struct { metaWriter syncmgr.MetaWriter collSchema *schemapb.CollectionSchema + helper *typeutil.SchemaHelper + pkField *schemapb.FieldSchema estSizePerRecord int metaCache metacache.MetaCache syncMgr syncmgr.SyncManager @@ -96,6 +97,10 @@ type writeBufferBase struct { flushTimestamp *atomic.Uint64 storagev2Cache *metacache.StorageV2Cache + + // pre build logger + logger *log.MLogger + cpRatedLogger *log.MLogger } func newWriteBufferBase(channel string, metacache metacache.MetaCache, storageV2Cache *metacache.StorageV2Cache, syncMgr syncmgr.SyncManager, option *writeBufferOption) (*writeBufferBase, error) { @@ -126,11 +131,21 @@ func newWriteBufferBase(channel string, metacache metacache.MetaCache, storageV2 if err != nil { return nil, err } + helper, err := typeutil.CreateSchemaHelper(schema) + if err != nil { + return nil, err + } + pkField, err := helper.GetPrimaryKeyField() + if err != nil { + return nil, err + } - return &writeBufferBase{ + wb := &writeBufferBase{ channelName: channel, collectionID: metacache.Collection(), collSchema: schema, + helper: helper, + pkField: pkField, estSizePerRecord: estSize, syncMgr: syncMgr, metaWriter: option.metaWriter, @@ -140,7 +155,13 @@ func newWriteBufferBase(channel string, metacache metacache.MetaCache, storageV2 syncPolicies: option.syncPolicies, flushTimestamp: flushTs, storagev2Cache: storageV2Cache, - }, nil + } + + wb.logger = log.With(zap.Int64("collectionID", wb.collectionID), + zap.String("channel", wb.channelName)) + wb.cpRatedLogger = wb.logger.WithRateGroup(fmt.Sprintf("writebuffer_cp_%s", wb.channelName), 1, 60) + + return wb, nil } func (wb *writeBufferBase) HasSegment(segmentID int64) bool { @@ -178,13 +199,10 @@ func (wb *writeBufferBase) MemorySize() int64 { } func (wb *writeBufferBase) EvictBuffer(policies ...SyncPolicy) { + log := wb.logger wb.mut.Lock() defer wb.mut.Unlock() - log := log.Ctx(context.Background()).With( - zap.Int64("collectionID", wb.collectionID), - zap.String("channel", wb.channelName), - ) // need valid checkpoint before triggering syncing if wb.checkpoint == nil { log.Warn("evict buffer before buffering data") @@ -201,9 +219,7 @@ func (wb *writeBufferBase) EvictBuffer(policies ...SyncPolicy) { } func (wb *writeBufferBase) GetCheckpoint() *msgpb.MsgPosition { - log := log.Ctx(context.Background()). - With(zap.String("channel", wb.channelName)). - WithRateGroup(fmt.Sprintf("writebuffer_cp_%s", wb.channelName), 1, 60) + log := wb.cpRatedLogger wb.mut.RLock() defer wb.mut.RUnlock() @@ -235,7 +251,7 @@ func (wb *writeBufferBase) GetCheckpoint() *msgpb.MsgPosition { switch { case bufferCandidate == nil && syncCandidate == nil: // all buffer are empty - log.RatedInfo(60, "checkpoint from latest consumed msg") + log.RatedDebug(60, "checkpoint from latest consumed msg") return wb.checkpoint case bufferCandidate == nil && syncCandidate != nil: checkpoint = syncCandidate @@ -255,7 +271,7 @@ func (wb *writeBufferBase) GetCheckpoint() *msgpb.MsgPosition { cpSource = "syncManager" } - log.RatedInfo(20, "checkpoint evaluated", + log.RatedDebug(20, "checkpoint evaluated", zap.String("cpSource", cpSource), zap.Int64("segmentID", segmentID), zap.Uint64("cpTimestamp", checkpoint.GetTimestamp())) @@ -274,7 +290,10 @@ func (wb *writeBufferBase) triggerSync() (segmentIDs []int64) { } func (wb *writeBufferBase) cleanupCompactedSegments() { - segmentIDs := wb.metaCache.GetSegmentIDsBy(metacache.WithCompacted(), metacache.WithNoSyncingTask()) + segmentIDs := wb.metaCache.GetSegmentIDsBy( + metacache.WithSegmentState(commonpb.SegmentState_Dropped), + metacache.WithCompacted(), + metacache.WithNoSyncingTask()) // remove compacted only when there is no writebuffer targetIDs := lo.Filter(segmentIDs, func(segmentID int64, _ int) bool { _, ok := wb.buffers[segmentID] @@ -370,46 +389,21 @@ type inData struct { tsField []*storage.Int64FieldData rowNum int64 - batchBF *storage.PkStatistics -} - -func (id *inData) generatePkStats() { - id.batchBF = &storage.PkStatistics{ - PkFilter: bloom.NewWithEstimates(uint(id.rowNum), paramtable.Get().CommonCfg.MaxBloomFalsePositive.GetAsFloat()), - } - - for _, ids := range id.pkField { - id.batchBF.UpdatePKRange(ids) - } + intPKTs map[int64]int64 + strPKTs map[string]int64 } func (id *inData) pkExists(pk storage.PrimaryKey, ts uint64) bool { - if !id.batchBF.PkExist(pk) { - return false + var ok bool + var minTs int64 + switch pk.Type() { + case schemapb.DataType_Int64: + minTs, ok = id.intPKTs[pk.GetValue().(int64)] + case schemapb.DataType_VarChar: + minTs, ok = id.strPKTs[pk.GetValue().(string)] } - for batchIdx, timestamps := range id.tsField { - ids := id.pkField[batchIdx] - var primaryKey storage.PrimaryKey - switch pk.Type() { - case schemapb.DataType_Int64: - primaryKey = storage.NewInt64PrimaryKey(0) - case schemapb.DataType_VarChar: - primaryKey = storage.NewVarCharPrimaryKey("") - } - for idx := 0; idx < timestamps.RowNum(); idx++ { - timestamp := timestamps.GetRow(idx).(int64) - if int64(ts) <= timestamp { - continue - } - primaryKey.SetValue(ids.GetRow(idx)) - - if pk.EQ(primaryKey) { - return true - } - } - } - return false + return ok && ts > uint64(minTs) } // prepareInsert transfers InsertMsg into organized InsertData grouped by segmentID @@ -426,6 +420,13 @@ func (wb *writeBufferBase) prepareInsert(insertMsgs []*msgstream.InsertMsg) ([]* data: make([]*storage.InsertData, 0, len(msgs)), pkField: make([]storage.FieldData, 0, len(msgs)), } + switch wb.pkField.GetDataType() { + case schemapb.DataType_Int64: + inData.intPKTs = make(map[int64]int64) + case schemapb.DataType_VarChar: + inData.strPKTs = make(map[string]int64) + } + for _, msg := range msgs { data, err := storage.InsertMsgToInsertData(msg, wb.collSchema) if err != nil { @@ -449,12 +450,32 @@ func (wb *writeBufferBase) prepareInsert(insertMsgs []*msgstream.InsertMsg) ([]* return nil, merr.WrapErrServiceInternal("timestamp column row num not match") } + timestamps := tsFieldData.GetRows().([]int64) + + switch wb.pkField.GetDataType() { + case schemapb.DataType_Int64: + pks := pkFieldData.GetRows().([]int64) + for idx, pk := range pks { + ts, ok := inData.intPKTs[pk] + if !ok || timestamps[idx] < ts { + inData.intPKTs[pk] = timestamps[idx] + } + } + case schemapb.DataType_VarChar: + pks := pkFieldData.GetRows().([]string) + for idx, pk := range pks { + ts, ok := inData.strPKTs[pk] + if !ok || timestamps[idx] < ts { + inData.strPKTs[pk] = timestamps[idx] + } + } + } + inData.data = append(inData.data, data) inData.pkField = append(inData.pkField, pkFieldData) inData.tsField = append(inData.tsField, tsFieldData) inData.rowNum += int64(data.GetRowNum()) } - inData.generatePkStats() result = append(result, inData) } @@ -556,6 +577,7 @@ func (wb *writeBufferBase) getEstBatchSize() uint { } func (wb *writeBufferBase) Close(drop bool) { + log := wb.logger // sink all data and call Drop for meta writer wb.mut.Lock() defer wb.mut.Unlock() @@ -583,13 +605,13 @@ func (wb *writeBufferBase) Close(drop bool) { err := conc.AwaitAll(futures...) if err != nil { - log.Error("failed to sink write buffer data", zap.String("channel", wb.channelName), zap.Error(err)) + log.Error("failed to sink write buffer data", zap.Error(err)) // TODO change to remove channel in the future panic(err) } err = wb.metaWriter.DropChannel(wb.channelName) if err != nil { - log.Error("failed to drop channel", zap.String("channel", wb.channelName), zap.Error(err)) + log.Error("failed to drop channel", zap.Error(err)) // TODO change to remove channel in the future panic(err) } diff --git a/internal/distributed/proxy/httpserver/constant.go b/internal/distributed/proxy/httpserver/constant.go index f98373e905820..f106e52787312 100644 --- a/internal/distributed/proxy/httpserver/constant.go +++ b/internal/distributed/proxy/httpserver/constant.go @@ -47,6 +47,7 @@ const ( ) const ( + ContextRequest = "request" ContextUsername = "username" VectorCollectionsPath = "/vector/collections" VectorCollectionsCreatePath = "/vector/collections/create" diff --git a/internal/distributed/proxy/httpserver/handler_v1.go b/internal/distributed/proxy/httpserver/handler_v1.go index 804ed7ab788f3..0cdf7deddf380 100644 --- a/internal/distributed/proxy/httpserver/handler_v1.go +++ b/internal/distributed/proxy/httpserver/handler_v1.go @@ -32,12 +32,12 @@ var RestRequestInterceptorErr = errors.New("interceptor error placeholder") func checkAuthorization(ctx context.Context, c *gin.Context, req interface{}) error { username, ok := c.Get(ContextUsername) if !ok || username.(string) == "" { - c.JSON(http.StatusUnauthorized, gin.H{HTTPReturnCode: merr.Code(merr.ErrNeedAuthenticate), HTTPReturnMessage: merr.ErrNeedAuthenticate.Error()}) + HTTPReturn(c, http.StatusUnauthorized, gin.H{HTTPReturnCode: merr.Code(merr.ErrNeedAuthenticate), HTTPReturnMessage: merr.ErrNeedAuthenticate.Error()}) return RestRequestInterceptorErr } _, authErr := proxy.PrivilegeInterceptor(ctx, req) if authErr != nil { - c.JSON(http.StatusForbidden, gin.H{HTTPReturnCode: merr.Code(authErr), HTTPReturnMessage: authErr.Error()}) + HTTPReturn(c, http.StatusForbidden, gin.H{HTTPReturnCode: merr.Code(authErr), HTTPReturnMessage: authErr.Error()}) return RestRequestInterceptorErr } @@ -104,7 +104,7 @@ func (h *HandlersV1) checkDatabase(ctx context.Context, c *gin.Context, dbName s err = merr.Error(response.GetStatus()) } if err != nil { - c.AbortWithStatusJSON(http.StatusOK, gin.H{HTTPReturnCode: merr.Code(err), HTTPReturnMessage: err.Error()}) + HTTPAbortReturn(c, http.StatusOK, gin.H{HTTPReturnCode: merr.Code(err), HTTPReturnMessage: err.Error()}) return RestRequestInterceptorErr } for _, db := range response.DbNames { @@ -112,7 +112,7 @@ func (h *HandlersV1) checkDatabase(ctx context.Context, c *gin.Context, dbName s return nil } } - c.AbortWithStatusJSON(http.StatusOK, gin.H{ + HTTPAbortReturn(c, http.StatusOK, gin.H{ HTTPReturnCode: merr.Code(merr.ErrDatabaseNotFound), HTTPReturnMessage: merr.ErrDatabaseNotFound.Error() + ", database: " + dbName, }) @@ -133,7 +133,7 @@ func (h *HandlersV1) describeCollection(ctx context.Context, c *gin.Context, dbN err = merr.Error(response.GetStatus()) } if err != nil { - c.AbortWithStatusJSON(http.StatusOK, gin.H{HTTPReturnCode: merr.Code(err), HTTPReturnMessage: err.Error()}) + HTTPAbortReturn(c, http.StatusOK, gin.H{HTTPReturnCode: merr.Code(err), HTTPReturnMessage: err.Error()}) return nil, err } primaryField, ok := getPrimaryField(response.Schema) @@ -154,7 +154,7 @@ func (h *HandlersV1) hasCollection(ctx context.Context, c *gin.Context, dbName s err = merr.Error(response.GetStatus()) } if err != nil { - c.AbortWithStatusJSON(http.StatusOK, gin.H{HTTPReturnCode: merr.Code(err), HTTPReturnMessage: err.Error()}) + HTTPAbortReturn(c, http.StatusOK, gin.H{HTTPReturnCode: merr.Code(err), HTTPReturnMessage: err.Error()}) return false, err } return response.Value, nil @@ -193,6 +193,7 @@ func (h *HandlersV1) listCollections(c *gin.Context) { req := &milvuspb.ShowCollectionsRequest{ DbName: dbName, } + c.Set(ContextRequest, req) username, _ := c.Get(ContextUsername) ctx := proxy.NewContextWithMetadata(c, username.(string), req.DbName) @@ -206,7 +207,7 @@ func (h *HandlersV1) listCollections(c *gin.Context) { err = merr.Error(resp.(*milvuspb.ShowCollectionsResponse).GetStatus()) } if err != nil { - c.JSON(http.StatusOK, gin.H{HTTPReturnCode: merr.Code(err), HTTPReturnMessage: err.Error()}) + HTTPReturn(c, http.StatusOK, gin.H{HTTPReturnCode: merr.Code(err), HTTPReturnMessage: err.Error()}) return } response := resp.(*milvuspb.ShowCollectionsResponse) @@ -216,7 +217,7 @@ func (h *HandlersV1) listCollections(c *gin.Context) { } else { collections = []string{} } - c.JSON(http.StatusOK, gin.H{HTTPReturnCode: http.StatusOK, HTTPReturnData: collections}) + HTTPReturn(c, http.StatusOK, gin.H{HTTPReturnCode: http.StatusOK, HTTPReturnData: collections}) } func (h *HandlersV1) createCollection(c *gin.Context) { @@ -229,7 +230,7 @@ func (h *HandlersV1) createCollection(c *gin.Context) { } if err := c.ShouldBindWith(&httpReq, binding.JSON); err != nil { log.Warn("high level restful api, the parameter of create collection is incorrect", zap.Any("request", httpReq), zap.Error(err)) - c.AbortWithStatusJSON(http.StatusOK, gin.H{ + HTTPAbortReturn(c, http.StatusOK, gin.H{ HTTPReturnCode: merr.Code(merr.ErrIncorrectParameterFormat), HTTPReturnMessage: merr.ErrIncorrectParameterFormat.Error() + ", error: " + err.Error(), }) @@ -237,12 +238,20 @@ func (h *HandlersV1) createCollection(c *gin.Context) { } if httpReq.CollectionName == "" || httpReq.Dimension == 0 { log.Warn("high level restful api, create collection require parameters: [collectionName, dimension], but miss", zap.Any("request", httpReq)) - c.AbortWithStatusJSON(http.StatusOK, gin.H{ + HTTPAbortReturn(c, http.StatusOK, gin.H{ HTTPReturnCode: merr.Code(merr.ErrMissingRequiredParameters), HTTPReturnMessage: merr.ErrMissingRequiredParameters.Error() + ", required parameters: [collectionName, dimension]", }) return } + req := &milvuspb.CreateCollectionRequest{ + DbName: httpReq.DbName, + CollectionName: httpReq.CollectionName, + ShardsNum: ShardNumDefault, + ConsistencyLevel: commonpb.ConsistencyLevel_Bounded, + } + c.Set(ContextRequest, req) + schema, err := proto.Marshal(&schemapb.CollectionSchema{ Name: httpReq.CollectionName, Description: httpReq.Description, @@ -272,19 +281,13 @@ func (h *HandlersV1) createCollection(c *gin.Context) { }) if err != nil { log.Warn("high level restful api, marshal collection schema fail", zap.Any("request", httpReq), zap.Error(err)) - c.AbortWithStatusJSON(http.StatusOK, gin.H{ + HTTPAbortReturn(c, http.StatusOK, gin.H{ HTTPReturnCode: merr.Code(merr.ErrMarshalCollectionSchema), HTTPReturnMessage: merr.ErrMarshalCollectionSchema.Error() + ", error: " + err.Error(), }) return } - req := &milvuspb.CreateCollectionRequest{ - DbName: httpReq.DbName, - CollectionName: httpReq.CollectionName, - Schema: schema, - ShardsNum: ShardNumDefault, - ConsistencyLevel: commonpb.ConsistencyLevel_Bounded, - } + req.Schema = schema username, _ := c.Get(ContextUsername) ctx := proxy.NewContextWithMetadata(c, username.(string), req.DbName) response, err := h.executeRestRequestInterceptor(ctx, c, req, func(reqCtx context.Context, req any) (any, error) { @@ -297,7 +300,7 @@ func (h *HandlersV1) createCollection(c *gin.Context) { err = merr.Error(response.(*commonpb.Status)) } if err != nil { - c.AbortWithStatusJSON(http.StatusOK, gin.H{HTTPReturnCode: merr.Code(err), HTTPReturnMessage: err.Error()}) + HTTPAbortReturn(c, http.StatusOK, gin.H{HTTPReturnCode: merr.Code(err), HTTPReturnMessage: err.Error()}) return } @@ -312,7 +315,7 @@ func (h *HandlersV1) createCollection(c *gin.Context) { err = merr.Error(statusResponse) } if err != nil { - c.AbortWithStatusJSON(http.StatusOK, gin.H{HTTPReturnCode: merr.Code(err), HTTPReturnMessage: err.Error()}) + HTTPAbortReturn(c, http.StatusOK, gin.H{HTTPReturnCode: merr.Code(err), HTTPReturnMessage: err.Error()}) return } statusResponse, err = h.proxy.LoadCollection(ctx, &milvuspb.LoadCollectionRequest{ @@ -323,17 +326,17 @@ func (h *HandlersV1) createCollection(c *gin.Context) { err = merr.Error(statusResponse) } if err != nil { - c.AbortWithStatusJSON(http.StatusOK, gin.H{HTTPReturnCode: merr.Code(err), HTTPReturnMessage: err.Error()}) + HTTPAbortReturn(c, http.StatusOK, gin.H{HTTPReturnCode: merr.Code(err), HTTPReturnMessage: err.Error()}) return } - c.JSON(http.StatusOK, gin.H{HTTPReturnCode: http.StatusOK, HTTPReturnData: gin.H{}}) + HTTPReturn(c, http.StatusOK, gin.H{HTTPReturnCode: http.StatusOK, HTTPReturnData: gin.H{}}) } func (h *HandlersV1) getCollectionDetails(c *gin.Context) { collectionName := c.Query(HTTPCollectionName) if collectionName == "" { log.Warn("high level restful api, desc collection require parameter: [collectionName], but miss") - c.AbortWithStatusJSON(http.StatusOK, gin.H{ + HTTPAbortReturn(c, http.StatusOK, gin.H{ HTTPReturnCode: merr.Code(merr.ErrMissingRequiredParameters), HTTPReturnMessage: merr.ErrMissingRequiredParameters.Error() + ", required parameters: [collectionName]", }) @@ -347,6 +350,7 @@ func (h *HandlersV1) getCollectionDetails(c *gin.Context) { DbName: dbName, CollectionName: collectionName, } + c.Set(ContextRequest, req) response, err := h.executeRestRequestInterceptor(ctx, c, req, func(reqCtx context.Context, req any) (any, error) { return h.proxy.DescribeCollection(reqCtx, req.(*milvuspb.DescribeCollectionRequest)) @@ -356,7 +360,7 @@ func (h *HandlersV1) getCollectionDetails(c *gin.Context) { err = merr.Error(response.(*milvuspb.DescribeCollectionResponse).GetStatus()) } if err != nil { - c.AbortWithStatusJSON(http.StatusOK, gin.H{HTTPReturnCode: merr.Code(err), HTTPReturnMessage: err.Error()}) + HTTPAbortReturn(c, http.StatusOK, gin.H{HTTPReturnCode: merr.Code(err), HTTPReturnMessage: err.Error()}) return } coll := response.(*milvuspb.DescribeCollectionResponse) @@ -408,7 +412,7 @@ func (h *HandlersV1) getCollectionDetails(c *gin.Context) { } else { indexDesc = printIndexes(indexResp.IndexDescriptions) } - c.JSON(http.StatusOK, gin.H{HTTPReturnCode: http.StatusOK, HTTPReturnData: gin.H{ + HTTPReturn(c, http.StatusOK, gin.H{HTTPReturnCode: http.StatusOK, HTTPReturnData: gin.H{ HTTPCollectionName: coll.CollectionName, HTTPReturnDescription: coll.Schema.Description, "fields": printFields(coll.Schema.Fields), @@ -425,7 +429,7 @@ func (h *HandlersV1) dropCollection(c *gin.Context) { } if err := c.ShouldBindWith(&httpReq, binding.JSON); err != nil { log.Warn("high level restful api, the parameter of drop collection is incorrect", zap.Any("request", httpReq), zap.Error(err)) - c.AbortWithStatusJSON(http.StatusOK, gin.H{ + HTTPAbortReturn(c, http.StatusOK, gin.H{ HTTPReturnCode: merr.Code(merr.ErrIncorrectParameterFormat), HTTPReturnMessage: merr.ErrIncorrectParameterFormat.Error() + ", error: " + err.Error(), }) @@ -433,7 +437,7 @@ func (h *HandlersV1) dropCollection(c *gin.Context) { } if httpReq.CollectionName == "" { log.Warn("high level restful api, drop collection require parameter: [collectionName], but miss") - c.AbortWithStatusJSON(http.StatusOK, gin.H{ + HTTPAbortReturn(c, http.StatusOK, gin.H{ HTTPReturnCode: merr.Code(merr.ErrMissingRequiredParameters), HTTPReturnMessage: merr.ErrMissingRequiredParameters.Error() + ", required parameters: [collectionName]", }) @@ -443,6 +447,7 @@ func (h *HandlersV1) dropCollection(c *gin.Context) { DbName: httpReq.DbName, CollectionName: httpReq.CollectionName, } + c.Set(ContextRequest, req) username, _ := c.Get(ContextUsername) ctx := proxy.NewContextWithMetadata(c, username.(string), req.DbName) response, err := h.executeRestRequestInterceptor(ctx, c, req, func(reqCtx context.Context, req any) (any, error) { @@ -451,7 +456,7 @@ func (h *HandlersV1) dropCollection(c *gin.Context) { return nil, RestRequestInterceptorErr } if !has { - c.AbortWithStatusJSON(http.StatusOK, gin.H{ + HTTPAbortReturn(c, http.StatusOK, gin.H{ HTTPReturnCode: merr.Code(merr.ErrCollectionNotFound), HTTPReturnMessage: merr.ErrCollectionNotFound.Error() + ", database: " + httpReq.DbName + ", collection: " + httpReq.CollectionName, }) @@ -466,9 +471,9 @@ func (h *HandlersV1) dropCollection(c *gin.Context) { err = merr.Error(response.(*commonpb.Status)) } if err != nil { - c.JSON(http.StatusOK, gin.H{HTTPReturnCode: merr.Code(err), HTTPReturnMessage: err.Error()}) + HTTPReturn(c, http.StatusOK, gin.H{HTTPReturnCode: merr.Code(err), HTTPReturnMessage: err.Error()}) } else { - c.JSON(http.StatusOK, gin.H{HTTPReturnCode: http.StatusOK, HTTPReturnData: gin.H{}}) + HTTPReturn(c, http.StatusOK, gin.H{HTTPReturnCode: http.StatusOK, HTTPReturnData: gin.H{}}) } } @@ -480,7 +485,7 @@ func (h *HandlersV1) query(c *gin.Context) { } if err := c.ShouldBindWith(&httpReq, binding.JSON); err != nil { log.Warn("high level restful api, the parameter of query is incorrect", zap.Any("request", httpReq), zap.Error(err)) - c.AbortWithStatusJSON(http.StatusOK, gin.H{ + HTTPAbortReturn(c, http.StatusOK, gin.H{ HTTPReturnCode: merr.Code(merr.ErrIncorrectParameterFormat), HTTPReturnMessage: merr.ErrIncorrectParameterFormat.Error() + ", error: " + err.Error(), }) @@ -488,7 +493,7 @@ func (h *HandlersV1) query(c *gin.Context) { } if httpReq.CollectionName == "" || httpReq.Filter == "" { log.Warn("high level restful api, query require parameter: [collectionName, filter], but miss") - c.AbortWithStatusJSON(http.StatusOK, gin.H{ + HTTPAbortReturn(c, http.StatusOK, gin.H{ HTTPReturnCode: merr.Code(merr.ErrMissingRequiredParameters), HTTPReturnMessage: merr.ErrMissingRequiredParameters.Error() + ", required parameters: [collectionName, filter]", }) @@ -502,6 +507,7 @@ func (h *HandlersV1) query(c *gin.Context) { GuaranteeTimestamp: BoundedTimestamp, QueryParams: []*commonpb.KeyValuePair{}, } + c.Set(ContextRequest, req) if httpReq.Offset > 0 { req.QueryParams = append(req.QueryParams, &commonpb.KeyValuePair{Key: ParamOffset, Value: strconv.FormatInt(int64(httpReq.Offset), 10)}) } @@ -520,19 +526,19 @@ func (h *HandlersV1) query(c *gin.Context) { err = merr.Error(response.(*milvuspb.QueryResults).GetStatus()) } if err != nil { - c.JSON(http.StatusOK, gin.H{HTTPReturnCode: merr.Code(err), HTTPReturnMessage: err.Error()}) + HTTPReturn(c, http.StatusOK, gin.H{HTTPReturnCode: merr.Code(err), HTTPReturnMessage: err.Error()}) } else { queryResp := response.(*milvuspb.QueryResults) allowJS, _ := strconv.ParseBool(c.Request.Header.Get(HTTPHeaderAllowInt64)) outputData, err := buildQueryResp(int64(0), queryResp.OutputFields, queryResp.FieldsData, nil, nil, allowJS) if err != nil { log.Warn("high level restful api, fail to deal with query result", zap.Any("response", response), zap.Error(err)) - c.JSON(http.StatusOK, gin.H{ + HTTPReturn(c, http.StatusOK, gin.H{ HTTPReturnCode: merr.Code(merr.ErrInvalidSearchResult), HTTPReturnMessage: merr.ErrInvalidSearchResult.Error() + ", error: " + err.Error(), }) } else { - c.JSON(http.StatusOK, gin.H{HTTPReturnCode: http.StatusOK, HTTPReturnData: outputData}) + HTTPReturn(c, http.StatusOK, gin.H{HTTPReturnCode: http.StatusOK, HTTPReturnData: outputData}) } } } @@ -544,7 +550,7 @@ func (h *HandlersV1) get(c *gin.Context) { } if err := c.ShouldBindBodyWith(&httpReq, binding.JSON); err != nil { log.Warn("high level restful api, the parameter of get is incorrect", zap.Any("request", httpReq), zap.Error(err)) - c.AbortWithStatusJSON(http.StatusOK, gin.H{ + HTTPAbortReturn(c, http.StatusOK, gin.H{ HTTPReturnCode: merr.Code(merr.ErrIncorrectParameterFormat), HTTPReturnMessage: merr.ErrIncorrectParameterFormat.Error() + ", error: " + err.Error(), }) @@ -552,7 +558,7 @@ func (h *HandlersV1) get(c *gin.Context) { } if httpReq.CollectionName == "" || httpReq.ID == nil { log.Warn("high level restful api, get require parameter: [collectionName, id], but miss") - c.AbortWithStatusJSON(http.StatusOK, gin.H{ + HTTPAbortReturn(c, http.StatusOK, gin.H{ HTTPReturnCode: merr.Code(merr.ErrMissingRequiredParameters), HTTPReturnMessage: merr.ErrMissingRequiredParameters.Error() + ", required parameters: [collectionName, id]", }) @@ -564,6 +570,7 @@ func (h *HandlersV1) get(c *gin.Context) { OutputFields: httpReq.OutputFields, GuaranteeTimestamp: BoundedTimestamp, } + c.Set(ContextRequest, req) username, _ := c.Get(ContextUsername) ctx := proxy.NewContextWithMetadata(c, username.(string), req.DbName) response, err := h.executeRestRequestInterceptor(ctx, c, req, func(reqCtx context.Context, req any) (any, error) { @@ -574,7 +581,7 @@ func (h *HandlersV1) get(c *gin.Context) { body, _ := c.Get(gin.BodyBytesKey) filter, err := checkGetPrimaryKey(collSchema, gjson.Get(string(body.([]byte)), DefaultPrimaryFieldName)) if err != nil { - c.JSON(http.StatusOK, gin.H{ + HTTPReturn(c, http.StatusOK, gin.H{ HTTPReturnCode: merr.Code(merr.ErrCheckPrimaryKey), HTTPReturnMessage: merr.ErrCheckPrimaryKey.Error() + ", error: " + err.Error(), }) @@ -591,19 +598,19 @@ func (h *HandlersV1) get(c *gin.Context) { err = merr.Error(response.(*milvuspb.QueryResults).GetStatus()) } if err != nil { - c.JSON(http.StatusOK, gin.H{HTTPReturnCode: merr.Code(err), HTTPReturnMessage: err.Error()}) + HTTPReturn(c, http.StatusOK, gin.H{HTTPReturnCode: merr.Code(err), HTTPReturnMessage: err.Error()}) } else { queryResp := response.(*milvuspb.QueryResults) allowJS, _ := strconv.ParseBool(c.Request.Header.Get(HTTPHeaderAllowInt64)) outputData, err := buildQueryResp(int64(0), queryResp.OutputFields, queryResp.FieldsData, nil, nil, allowJS) if err != nil { log.Warn("high level restful api, fail to deal with get result", zap.Any("response", response), zap.Error(err)) - c.JSON(http.StatusOK, gin.H{ + HTTPReturn(c, http.StatusOK, gin.H{ HTTPReturnCode: merr.Code(merr.ErrInvalidSearchResult), HTTPReturnMessage: merr.ErrInvalidSearchResult.Error() + ", error: " + err.Error(), }) } else { - c.JSON(http.StatusOK, gin.H{HTTPReturnCode: http.StatusOK, HTTPReturnData: outputData}) + HTTPReturn(c, http.StatusOK, gin.H{HTTPReturnCode: http.StatusOK, HTTPReturnData: outputData}) } } } @@ -614,7 +621,7 @@ func (h *HandlersV1) delete(c *gin.Context) { } if err := c.ShouldBindBodyWith(&httpReq, binding.JSON); err != nil { log.Warn("high level restful api, the parameter of delete is incorrect", zap.Any("request", httpReq), zap.Error(err)) - c.AbortWithStatusJSON(http.StatusOK, gin.H{ + HTTPAbortReturn(c, http.StatusOK, gin.H{ HTTPReturnCode: merr.Code(merr.ErrIncorrectParameterFormat), HTTPReturnMessage: merr.ErrIncorrectParameterFormat.Error() + ", error: " + err.Error(), }) @@ -622,7 +629,7 @@ func (h *HandlersV1) delete(c *gin.Context) { } if httpReq.CollectionName == "" || (httpReq.ID == nil && httpReq.Filter == "") { log.Warn("high level restful api, delete require parameter: [collectionName, id/filter], but miss") - c.AbortWithStatusJSON(http.StatusOK, gin.H{ + HTTPAbortReturn(c, http.StatusOK, gin.H{ HTTPReturnCode: merr.Code(merr.ErrMissingRequiredParameters), HTTPReturnMessage: merr.ErrMissingRequiredParameters.Error() + ", required parameters: [collectionName, id/filter]", }) @@ -632,6 +639,7 @@ func (h *HandlersV1) delete(c *gin.Context) { DbName: httpReq.DbName, CollectionName: httpReq.CollectionName, } + c.Set(ContextRequest, req) username, _ := c.Get(ContextUsername) ctx := proxy.NewContextWithMetadata(c, username.(string), req.DbName) response, err := h.executeRestRequestInterceptor(ctx, c, req, func(reqCtx context.Context, req any) (any, error) { @@ -645,7 +653,7 @@ func (h *HandlersV1) delete(c *gin.Context) { body, _ := c.Get(gin.BodyBytesKey) filter, err := checkGetPrimaryKey(collSchema, gjson.Get(string(body.([]byte)), DefaultPrimaryFieldName)) if err != nil { - c.JSON(http.StatusOK, gin.H{ + HTTPReturn(c, http.StatusOK, gin.H{ HTTPReturnCode: merr.Code(merr.ErrCheckPrimaryKey), HTTPReturnMessage: merr.ErrCheckPrimaryKey.Error() + ", error: " + err.Error(), }) @@ -662,9 +670,9 @@ func (h *HandlersV1) delete(c *gin.Context) { err = merr.Error(response.(*milvuspb.MutationResult).GetStatus()) } if err != nil { - c.JSON(http.StatusOK, gin.H{HTTPReturnCode: merr.Code(err), HTTPReturnMessage: err.Error()}) + HTTPReturn(c, http.StatusOK, gin.H{HTTPReturnCode: merr.Code(err), HTTPReturnMessage: err.Error()}) } else { - c.JSON(http.StatusOK, gin.H{HTTPReturnCode: http.StatusOK, HTTPReturnData: gin.H{}}) + HTTPReturn(c, http.StatusOK, gin.H{HTTPReturnCode: http.StatusOK, HTTPReturnData: gin.H{}}) } } @@ -678,7 +686,7 @@ func (h *HandlersV1) insert(c *gin.Context) { } if err = c.ShouldBindBodyWith(&singleInsertReq, binding.JSON); err != nil { log.Warn("high level restful api, the parameter of insert is incorrect", zap.Any("request", httpReq), zap.Error(err)) - c.AbortWithStatusJSON(http.StatusOK, gin.H{ + HTTPAbortReturn(c, http.StatusOK, gin.H{ HTTPReturnCode: merr.Code(merr.ErrIncorrectParameterFormat), HTTPReturnMessage: merr.ErrIncorrectParameterFormat.Error() + ", error: " + err.Error(), }) @@ -690,7 +698,7 @@ func (h *HandlersV1) insert(c *gin.Context) { } if httpReq.CollectionName == "" || httpReq.Data == nil { log.Warn("high level restful api, insert require parameter: [collectionName, data], but miss") - c.AbortWithStatusJSON(http.StatusOK, gin.H{ + HTTPAbortReturn(c, http.StatusOK, gin.H{ HTTPReturnCode: merr.Code(merr.ErrMissingRequiredParameters), HTTPReturnMessage: merr.ErrMissingRequiredParameters.Error() + ", required parameters: [collectionName, data]", }) @@ -701,6 +709,7 @@ func (h *HandlersV1) insert(c *gin.Context) { CollectionName: httpReq.CollectionName, NumRows: uint32(len(httpReq.Data)), } + c.Set(ContextRequest, req) username, _ := c.Get(ContextUsername) ctx := proxy.NewContextWithMetadata(c, username.(string), req.DbName) response, err := h.executeRestRequestInterceptor(ctx, c, req, func(reqCtx context.Context, req any) (any, error) { @@ -712,7 +721,7 @@ func (h *HandlersV1) insert(c *gin.Context) { err, httpReq.Data = checkAndSetData(string(body.([]byte)), collSchema) if err != nil { log.Warn("high level restful api, fail to deal with insert data", zap.Any("body", body), zap.Error(err)) - c.AbortWithStatusJSON(http.StatusOK, gin.H{ + HTTPAbortReturn(c, http.StatusOK, gin.H{ HTTPReturnCode: merr.Code(merr.ErrInvalidInsertData), HTTPReturnMessage: merr.ErrInvalidInsertData.Error() + ", error: " + err.Error(), }) @@ -722,7 +731,7 @@ func (h *HandlersV1) insert(c *gin.Context) { insertReq.FieldsData, err = anyToColumns(httpReq.Data, collSchema) if err != nil { log.Warn("high level restful api, fail to deal with insert data", zap.Any("data", httpReq.Data), zap.Error(err)) - c.AbortWithStatusJSON(http.StatusOK, gin.H{ + HTTPAbortReturn(c, http.StatusOK, gin.H{ HTTPReturnCode: merr.Code(merr.ErrInvalidInsertData), HTTPReturnMessage: merr.ErrInvalidInsertData.Error() + ", error: " + err.Error(), }) @@ -737,21 +746,21 @@ func (h *HandlersV1) insert(c *gin.Context) { err = merr.Error(response.(*milvuspb.MutationResult).GetStatus()) } if err != nil { - c.JSON(http.StatusOK, gin.H{HTTPReturnCode: merr.Code(err), HTTPReturnMessage: err.Error()}) + HTTPReturn(c, http.StatusOK, gin.H{HTTPReturnCode: merr.Code(err), HTTPReturnMessage: err.Error()}) } else { insertResp := response.(*milvuspb.MutationResult) switch insertResp.IDs.GetIdField().(type) { case *schemapb.IDs_IntId: allowJS, _ := strconv.ParseBool(c.Request.Header.Get(HTTPHeaderAllowInt64)) if allowJS { - c.JSON(http.StatusOK, gin.H{HTTPReturnCode: http.StatusOK, HTTPReturnData: gin.H{"insertCount": insertResp.InsertCnt, "insertIds": insertResp.IDs.IdField.(*schemapb.IDs_IntId).IntId.Data}}) + HTTPReturn(c, http.StatusOK, gin.H{HTTPReturnCode: http.StatusOK, HTTPReturnData: gin.H{"insertCount": insertResp.InsertCnt, "insertIds": insertResp.IDs.IdField.(*schemapb.IDs_IntId).IntId.Data}}) } else { - c.JSON(http.StatusOK, gin.H{HTTPReturnCode: http.StatusOK, HTTPReturnData: gin.H{"insertCount": insertResp.InsertCnt, "insertIds": formatInt64(insertResp.IDs.IdField.(*schemapb.IDs_IntId).IntId.Data)}}) + HTTPReturn(c, http.StatusOK, gin.H{HTTPReturnCode: http.StatusOK, HTTPReturnData: gin.H{"insertCount": insertResp.InsertCnt, "insertIds": formatInt64(insertResp.IDs.IdField.(*schemapb.IDs_IntId).IntId.Data)}}) } case *schemapb.IDs_StrId: - c.JSON(http.StatusOK, gin.H{HTTPReturnCode: http.StatusOK, HTTPReturnData: gin.H{"insertCount": insertResp.InsertCnt, "insertIds": insertResp.IDs.IdField.(*schemapb.IDs_StrId).StrId.Data}}) + HTTPReturn(c, http.StatusOK, gin.H{HTTPReturnCode: http.StatusOK, HTTPReturnData: gin.H{"insertCount": insertResp.InsertCnt, "insertIds": insertResp.IDs.IdField.(*schemapb.IDs_StrId).StrId.Data}}) default: - c.JSON(http.StatusOK, gin.H{ + HTTPReturn(c, http.StatusOK, gin.H{ HTTPReturnCode: merr.Code(merr.ErrCheckPrimaryKey), HTTPReturnMessage: merr.ErrCheckPrimaryKey.Error() + ", error: unsupported primary key data type", }) @@ -769,7 +778,7 @@ func (h *HandlersV1) upsert(c *gin.Context) { } if err = c.ShouldBindBodyWith(&singleUpsertReq, binding.JSON); err != nil { log.Warn("high level restful api, the parameter of upsert is incorrect", zap.Any("request", httpReq), zap.Error(err)) - c.AbortWithStatusJSON(http.StatusOK, gin.H{ + HTTPAbortReturn(c, http.StatusOK, gin.H{ HTTPReturnCode: merr.Code(merr.ErrIncorrectParameterFormat), HTTPReturnMessage: merr.ErrIncorrectParameterFormat.Error() + ", error: " + err.Error(), }) @@ -781,7 +790,7 @@ func (h *HandlersV1) upsert(c *gin.Context) { } if httpReq.CollectionName == "" || httpReq.Data == nil { log.Warn("high level restful api, upsert require parameter: [collectionName, data], but miss") - c.AbortWithStatusJSON(http.StatusOK, gin.H{ + HTTPAbortReturn(c, http.StatusOK, gin.H{ HTTPReturnCode: merr.Code(merr.ErrMissingRequiredParameters), HTTPReturnMessage: merr.ErrMissingRequiredParameters.Error() + ", required parameters: [collectionName, data]", }) @@ -792,6 +801,7 @@ func (h *HandlersV1) upsert(c *gin.Context) { CollectionName: httpReq.CollectionName, NumRows: uint32(len(httpReq.Data)), } + c.Set(ContextRequest, req) username, _ := c.Get(ContextUsername) ctx := proxy.NewContextWithMetadata(c, username.(string), req.DbName) response, err := h.executeRestRequestInterceptor(ctx, c, req, func(reqCtx context.Context, req any) (any, error) { @@ -802,7 +812,7 @@ func (h *HandlersV1) upsert(c *gin.Context) { for _, fieldSchema := range collSchema.Fields { if fieldSchema.IsPrimaryKey && fieldSchema.AutoID { err := merr.WrapErrParameterInvalid("autoID: false", "autoID: true", "cannot upsert an autoID collection") - c.AbortWithStatusJSON(http.StatusOK, gin.H{HTTPReturnCode: merr.Code(err), HTTPReturnMessage: err.Error()}) + HTTPAbortReturn(c, http.StatusOK, gin.H{HTTPReturnCode: merr.Code(err), HTTPReturnMessage: err.Error()}) return nil, RestRequestInterceptorErr } } @@ -810,7 +820,7 @@ func (h *HandlersV1) upsert(c *gin.Context) { err, httpReq.Data = checkAndSetData(string(body.([]byte)), collSchema) if err != nil { log.Warn("high level restful api, fail to deal with upsert data", zap.Any("body", body), zap.Error(err)) - c.AbortWithStatusJSON(http.StatusOK, gin.H{ + HTTPAbortReturn(c, http.StatusOK, gin.H{ HTTPReturnCode: merr.Code(merr.ErrInvalidInsertData), HTTPReturnMessage: merr.ErrInvalidInsertData.Error() + ", error: " + err.Error(), }) @@ -820,7 +830,7 @@ func (h *HandlersV1) upsert(c *gin.Context) { upsertReq.FieldsData, err = anyToColumns(httpReq.Data, collSchema) if err != nil { log.Warn("high level restful api, fail to deal with upsert data", zap.Any("data", httpReq.Data), zap.Error(err)) - c.AbortWithStatusJSON(http.StatusOK, gin.H{ + HTTPAbortReturn(c, http.StatusOK, gin.H{ HTTPReturnCode: merr.Code(merr.ErrInvalidInsertData), HTTPReturnMessage: merr.ErrInvalidInsertData.Error() + ", error: " + err.Error(), }) @@ -835,21 +845,21 @@ func (h *HandlersV1) upsert(c *gin.Context) { err = merr.Error(response.(*milvuspb.MutationResult).GetStatus()) } if err != nil { - c.JSON(http.StatusOK, gin.H{HTTPReturnCode: merr.Code(err), HTTPReturnMessage: err.Error()}) + HTTPReturn(c, http.StatusOK, gin.H{HTTPReturnCode: merr.Code(err), HTTPReturnMessage: err.Error()}) } else { upsertResp := response.(*milvuspb.MutationResult) switch upsertResp.IDs.GetIdField().(type) { case *schemapb.IDs_IntId: allowJS, _ := strconv.ParseBool(c.Request.Header.Get(HTTPHeaderAllowInt64)) if allowJS { - c.JSON(http.StatusOK, gin.H{HTTPReturnCode: http.StatusOK, HTTPReturnData: gin.H{"upsertCount": upsertResp.UpsertCnt, "upsertIds": upsertResp.IDs.IdField.(*schemapb.IDs_IntId).IntId.Data}}) + HTTPReturn(c, http.StatusOK, gin.H{HTTPReturnCode: http.StatusOK, HTTPReturnData: gin.H{"upsertCount": upsertResp.UpsertCnt, "upsertIds": upsertResp.IDs.IdField.(*schemapb.IDs_IntId).IntId.Data}}) } else { - c.JSON(http.StatusOK, gin.H{HTTPReturnCode: http.StatusOK, HTTPReturnData: gin.H{"upsertCount": upsertResp.UpsertCnt, "upsertIds": formatInt64(upsertResp.IDs.IdField.(*schemapb.IDs_IntId).IntId.Data)}}) + HTTPReturn(c, http.StatusOK, gin.H{HTTPReturnCode: http.StatusOK, HTTPReturnData: gin.H{"upsertCount": upsertResp.UpsertCnt, "upsertIds": formatInt64(upsertResp.IDs.IdField.(*schemapb.IDs_IntId).IntId.Data)}}) } case *schemapb.IDs_StrId: - c.JSON(http.StatusOK, gin.H{HTTPReturnCode: http.StatusOK, HTTPReturnData: gin.H{"upsertCount": upsertResp.UpsertCnt, "upsertIds": upsertResp.IDs.IdField.(*schemapb.IDs_StrId).StrId.Data}}) + HTTPReturn(c, http.StatusOK, gin.H{HTTPReturnCode: http.StatusOK, HTTPReturnData: gin.H{"upsertCount": upsertResp.UpsertCnt, "upsertIds": upsertResp.IDs.IdField.(*schemapb.IDs_StrId).StrId.Data}}) default: - c.JSON(http.StatusOK, gin.H{ + HTTPReturn(c, http.StatusOK, gin.H{ HTTPReturnCode: merr.Code(merr.ErrCheckPrimaryKey), HTTPReturnMessage: merr.ErrCheckPrimaryKey.Error() + ", error: unsupported primary key data type", }) @@ -864,7 +874,7 @@ func (h *HandlersV1) search(c *gin.Context) { } if err := c.ShouldBindWith(&httpReq, binding.JSON); err != nil { log.Warn("high level restful api, the parameter of search is incorrect", zap.Any("request", httpReq), zap.Error(err)) - c.AbortWithStatusJSON(http.StatusOK, gin.H{ + HTTPAbortReturn(c, http.StatusOK, gin.H{ HTTPReturnCode: merr.Code(merr.ErrIncorrectParameterFormat), HTTPReturnMessage: merr.ErrIncorrectParameterFormat.Error() + ", error: " + err.Error(), }) @@ -872,12 +882,24 @@ func (h *HandlersV1) search(c *gin.Context) { } if httpReq.CollectionName == "" || httpReq.Vector == nil { log.Warn("high level restful api, search require parameter: [collectionName, vector], but miss") - c.AbortWithStatusJSON(http.StatusOK, gin.H{ + HTTPAbortReturn(c, http.StatusOK, gin.H{ HTTPReturnCode: merr.Code(merr.ErrMissingRequiredParameters), HTTPReturnMessage: merr.ErrMissingRequiredParameters.Error() + ", required parameters: [collectionName, vector]", }) return } + req := &milvuspb.SearchRequest{ + DbName: httpReq.DbName, + CollectionName: httpReq.CollectionName, + Dsl: httpReq.Filter, + PlaceholderGroup: vectors2PlaceholderGroupBytes([][]float32{httpReq.Vector}), + DslType: commonpb.DslType_BoolExprV1, + OutputFields: httpReq.OutputFields, + GuaranteeTimestamp: BoundedTimestamp, + Nq: int64(1), + } + c.Set(ContextRequest, req) + params := map[string]interface{}{ // auto generated mapping "level": int(commonpb.ConsistencyLevel_Bounded), } @@ -887,7 +909,7 @@ func (h *HandlersV1) search(c *gin.Context) { if rangeFilterOk { if !radiusOk { log.Warn("high level restful api, search params invalid, because only " + ParamRangeFilter) - c.AbortWithStatusJSON(http.StatusOK, gin.H{ + HTTPAbortReturn(c, http.StatusOK, gin.H{ HTTPReturnCode: merr.Code(merr.ErrIncorrectParameterFormat), HTTPReturnMessage: merr.ErrIncorrectParameterFormat.Error() + ", error: invalid search params", }) @@ -900,23 +922,13 @@ func (h *HandlersV1) search(c *gin.Context) { } } bs, _ := json.Marshal(params) - searchParams := []*commonpb.KeyValuePair{ + req.SearchParams = []*commonpb.KeyValuePair{ {Key: common.TopKKey, Value: strconv.FormatInt(int64(httpReq.Limit), 10)}, {Key: Params, Value: string(bs)}, {Key: ParamRoundDecimal, Value: "-1"}, {Key: ParamOffset, Value: strconv.FormatInt(int64(httpReq.Offset), 10)}, } - req := &milvuspb.SearchRequest{ - DbName: httpReq.DbName, - CollectionName: httpReq.CollectionName, - Dsl: httpReq.Filter, - PlaceholderGroup: vectors2PlaceholderGroupBytes([][]float32{httpReq.Vector}), - DslType: commonpb.DslType_BoolExprV1, - OutputFields: httpReq.OutputFields, - SearchParams: searchParams, - GuaranteeTimestamp: BoundedTimestamp, - Nq: int64(1), - } + username, _ := c.Get(ContextUsername) ctx := proxy.NewContextWithMetadata(c, username.(string), req.DbName) response, err := h.executeRestRequestInterceptor(ctx, c, req, func(reqCtx context.Context, req any) (any, error) { @@ -929,22 +941,22 @@ func (h *HandlersV1) search(c *gin.Context) { err = merr.Error(response.(*milvuspb.SearchResults).GetStatus()) } if err != nil { - c.JSON(http.StatusOK, gin.H{HTTPReturnCode: merr.Code(err), HTTPReturnMessage: err.Error()}) + HTTPReturn(c, http.StatusOK, gin.H{HTTPReturnCode: merr.Code(err), HTTPReturnMessage: err.Error()}) } else { searchResp := response.(*milvuspb.SearchResults) if searchResp.Results.TopK == int64(0) { - c.JSON(http.StatusOK, gin.H{HTTPReturnCode: http.StatusOK, HTTPReturnData: []interface{}{}}) + HTTPReturn(c, http.StatusOK, gin.H{HTTPReturnCode: http.StatusOK, HTTPReturnData: []interface{}{}}) } else { allowJS, _ := strconv.ParseBool(c.Request.Header.Get(HTTPHeaderAllowInt64)) outputData, err := buildQueryResp(searchResp.Results.TopK, searchResp.Results.OutputFields, searchResp.Results.FieldsData, searchResp.Results.Ids, searchResp.Results.Scores, allowJS) if err != nil { log.Warn("high level restful api, fail to deal with search result", zap.Any("result", searchResp.Results), zap.Error(err)) - c.JSON(http.StatusOK, gin.H{ + HTTPReturn(c, http.StatusOK, gin.H{ HTTPReturnCode: merr.Code(merr.ErrInvalidSearchResult), HTTPReturnMessage: merr.ErrInvalidSearchResult.Error() + ", error: " + err.Error(), }) } else { - c.JSON(http.StatusOK, gin.H{HTTPReturnCode: http.StatusOK, HTTPReturnData: outputData}) + HTTPReturn(c, http.StatusOK, gin.H{HTTPReturnCode: http.StatusOK, HTTPReturnData: outputData}) } } } diff --git a/internal/distributed/proxy/httpserver/handler_v2.go b/internal/distributed/proxy/httpserver/handler_v2.go index 773aa32b984cc..e8637a02b653f 100644 --- a/internal/distributed/proxy/httpserver/handler_v2.go +++ b/internal/distributed/proxy/httpserver/handler_v2.go @@ -153,17 +153,17 @@ func wrapperPost(newReq newReqFunc, v2 handlerFuncV2) gin.HandlerFunc { log.Warn("high level restful api, read parameters from request body fail", zap.Error(err), zap.Any("url", c.Request.URL.Path), zap.Any("request", req)) if _, ok := err.(validator.ValidationErrors); ok { - c.AbortWithStatusJSON(http.StatusOK, gin.H{ + HTTPAbortReturn(c, http.StatusOK, gin.H{ HTTPReturnCode: merr.Code(merr.ErrMissingRequiredParameters), HTTPReturnMessage: merr.ErrMissingRequiredParameters.Error() + ", error: " + err.Error(), }) } else if err == io.EOF { - c.AbortWithStatusJSON(http.StatusOK, gin.H{ + HTTPAbortReturn(c, http.StatusOK, gin.H{ HTTPReturnCode: merr.Code(merr.ErrIncorrectParameterFormat), HTTPReturnMessage: merr.ErrIncorrectParameterFormat.Error() + ", the request body should be nil, however {} is valid", }) } else { - c.AbortWithStatusJSON(http.StatusOK, gin.H{ + HTTPAbortReturn(c, http.StatusOK, gin.H{ HTTPReturnCode: merr.Code(merr.ErrIncorrectParameterFormat), HTTPReturnMessage: merr.ErrIncorrectParameterFormat.Error() + ", error: " + err.Error(), }) @@ -230,14 +230,14 @@ func checkAuthorizationV2(ctx context.Context, c *gin.Context, ignoreErr bool, r username, ok := c.Get(ContextUsername) if !ok || username.(string) == "" { if !ignoreErr { - c.JSON(http.StatusUnauthorized, gin.H{HTTPReturnCode: merr.Code(merr.ErrNeedAuthenticate), HTTPReturnMessage: merr.ErrNeedAuthenticate.Error()}) + HTTPReturn(c, http.StatusUnauthorized, gin.H{HTTPReturnCode: merr.Code(merr.ErrNeedAuthenticate), HTTPReturnMessage: merr.ErrNeedAuthenticate.Error()}) } return merr.ErrNeedAuthenticate } _, authErr := proxy.PrivilegeInterceptor(ctx, req) if authErr != nil { if !ignoreErr { - c.JSON(http.StatusForbidden, gin.H{HTTPReturnCode: merr.Code(authErr), HTTPReturnMessage: authErr.Error()}) + HTTPReturn(c, http.StatusForbidden, gin.H{HTTPReturnCode: merr.Code(authErr), HTTPReturnMessage: authErr.Error()}) } return authErr } @@ -267,7 +267,7 @@ func wrapperProxy(ctx context.Context, c *gin.Context, req any, checkAuth bool, if err != nil { log.Ctx(ctx).Warn("high level restful api, grpc call failed", zap.Error(err), zap.Any("grpcRequest", req)) if !ignoreErr { - c.AbortWithStatusJSON(http.StatusOK, gin.H{HTTPReturnCode: merr.Code(err), HTTPReturnMessage: err.Error()}) + HTTPAbortReturn(c, http.StatusOK, gin.H{HTTPReturnCode: merr.Code(err), HTTPReturnMessage: err.Error()}) } } return response, err @@ -290,7 +290,7 @@ func (h *HandlersV2) wrapperCheckDatabase(v2 handlerFuncV2) handlerFuncV2 { } } log.Ctx(ctx).Warn("high level restful api, non-exist database", zap.String("database", dbName), zap.Any("request", req)) - c.AbortWithStatusJSON(http.StatusOK, gin.H{ + HTTPAbortReturn(c, http.StatusOK, gin.H{ HTTPReturnCode: merr.Code(merr.ErrDatabaseNotFound), HTTPReturnMessage: merr.ErrDatabaseNotFound.Error() + ", database: " + dbName, }) @@ -316,7 +316,7 @@ func (h *HandlersV2) hasCollection(ctx context.Context, c *gin.Context, anyReq a } has = resp.(*milvuspb.BoolResponse).Value } - c.JSON(http.StatusOK, wrapperReturnHas(has)) + HTTPReturn(c, http.StatusOK, wrapperReturnHas(has)) return has, nil } @@ -324,11 +324,12 @@ func (h *HandlersV2) listCollections(ctx context.Context, c *gin.Context, anyReq req := &milvuspb.ShowCollectionsRequest{ DbName: dbName, } + c.Set(ContextRequest, req) resp, err := wrapperProxy(ctx, c, req, false, false, func(reqCtx context.Context, req any) (interface{}, error) { return h.proxy.ShowCollections(reqCtx, req.(*milvuspb.ShowCollectionsRequest)) }) if err == nil { - c.JSON(http.StatusOK, wrapperReturnList(resp.(*milvuspb.ShowCollectionsResponse).CollectionNames)) + HTTPReturn(c, http.StatusOK, wrapperReturnList(resp.(*milvuspb.ShowCollectionsResponse).CollectionNames)) } return resp, err } @@ -340,7 +341,8 @@ func (h *HandlersV2) getCollectionDetails(ctx context.Context, c *gin.Context, a DbName: dbName, CollectionName: collectionName, } - resp, err := wrapperProxy(ctx, c, req, false, false, func(reqCtx context.Context, req any) (any, error) { + c.Set(ContextRequest, req) + resp, err := wrapperProxy(ctx, c, req, h.checkAuth, false, func(reqCtx context.Context, req any) (any, error) { return h.proxy.DescribeCollection(reqCtx, req.(*milvuspb.DescribeCollectionRequest)) }) if err != nil { @@ -408,7 +410,7 @@ func (h *HandlersV2) getCollectionDetails(ctx context.Context, c *gin.Context, a if coll.Properties == nil { coll.Properties = []*commonpb.KeyValuePair{} } - c.JSON(http.StatusOK, gin.H{HTTPReturnCode: http.StatusOK, HTTPReturnData: gin.H{ + HTTPReturn(c, http.StatusOK, gin.H{HTTPReturnCode: merr.Code(nil), HTTPReturnData: gin.H{ HTTPCollectionName: coll.CollectionName, HTTPCollectionID: coll.CollectionID, HTTPReturnDescription: coll.Schema.Description, @@ -432,11 +434,12 @@ func (h *HandlersV2) getCollectionStats(ctx context.Context, c *gin.Context, any DbName: dbName, CollectionName: collectionGetter.GetCollectionName(), } + c.Set(ContextRequest, req) resp, err := wrapperProxy(ctx, c, req, h.checkAuth, false, func(reqCtx context.Context, req any) (any, error) { return h.proxy.GetCollectionStatistics(reqCtx, req.(*milvuspb.GetCollectionStatisticsRequest)) }) if err == nil { - c.JSON(http.StatusOK, wrapperReturnRowCount(resp.(*milvuspb.GetCollectionStatisticsResponse).Stats)) + HTTPReturn(c, http.StatusOK, wrapperReturnRowCount(resp.(*milvuspb.GetCollectionStatisticsResponse).Stats)) } return resp, err } @@ -447,6 +450,7 @@ func (h *HandlersV2) getCollectionLoadState(ctx context.Context, c *gin.Context, DbName: dbName, CollectionName: collectionGetter.GetCollectionName(), } + c.Set(ContextRequest, req) resp, err := wrapperProxy(ctx, c, req, h.checkAuth, false, func(reqCtx context.Context, req any) (any, error) { return h.proxy.GetLoadState(reqCtx, req.(*milvuspb.GetLoadStateRequest)) }) @@ -455,10 +459,10 @@ func (h *HandlersV2) getCollectionLoadState(ctx context.Context, c *gin.Context, } if resp.(*milvuspb.GetLoadStateResponse).State == commonpb.LoadState_LoadStateNotExist { err = merr.WrapErrCollectionNotFound(req.CollectionName) - c.JSON(http.StatusOK, gin.H{HTTPReturnCode: merr.Code(err), HTTPReturnMessage: err.Error()}) + HTTPReturn(c, http.StatusOK, gin.H{HTTPReturnCode: merr.Code(err), HTTPReturnMessage: err.Error()}) return resp, err } else if resp.(*milvuspb.GetLoadStateResponse).State == commonpb.LoadState_LoadStateNotLoad { - c.JSON(http.StatusOK, gin.H{HTTPReturnCode: http.StatusOK, HTTPReturnData: gin.H{ + HTTPReturn(c, http.StatusOK, gin.H{HTTPReturnCode: merr.Code(nil), HTTPReturnData: gin.H{ HTTPReturnLoadState: resp.(*milvuspb.GetLoadStateResponse).State.String(), }}) return resp, err @@ -483,7 +487,7 @@ func (h *HandlersV2) getCollectionLoadState(ctx context.Context, c *gin.Context, if progress >= 100 { state = commonpb.LoadState_LoadStateLoaded.String() } - c.JSON(http.StatusOK, gin.H{HTTPReturnCode: http.StatusOK, HTTPReturnData: gin.H{ + HTTPReturn(c, http.StatusOK, gin.H{HTTPReturnCode: merr.Code(nil), HTTPReturnData: gin.H{ HTTPReturnLoadState: state, HTTPReturnLoadProgress: progress, }, HTTPReturnMessage: errMessage}) @@ -496,11 +500,12 @@ func (h *HandlersV2) dropCollection(ctx context.Context, c *gin.Context, anyReq DbName: dbName, CollectionName: getter.GetCollectionName(), } + c.Set(ContextRequest, req) resp, err := wrapperProxy(ctx, c, req, h.checkAuth, false, func(reqCtx context.Context, req any) (interface{}, error) { return h.proxy.DropCollection(reqCtx, req.(*milvuspb.DropCollectionRequest)) }) if err == nil { - c.JSON(http.StatusOK, wrapperReturnDefault()) + HTTPReturn(c, http.StatusOK, wrapperReturnDefault()) } return resp, err } @@ -513,6 +518,7 @@ func (h *HandlersV2) renameCollection(ctx context.Context, c *gin.Context, anyRe NewName: httpReq.NewCollectionName, NewDBName: httpReq.NewDbName, } + c.Set(ContextRequest, req) if req.NewDBName == "" { req.NewDBName = dbName } @@ -520,7 +526,7 @@ func (h *HandlersV2) renameCollection(ctx context.Context, c *gin.Context, anyRe return h.proxy.RenameCollection(reqCtx, req.(*milvuspb.RenameCollectionRequest)) }) if err == nil { - c.JSON(http.StatusOK, wrapperReturnDefault()) + HTTPReturn(c, http.StatusOK, wrapperReturnDefault()) } return resp, err } @@ -531,11 +537,12 @@ func (h *HandlersV2) loadCollection(ctx context.Context, c *gin.Context, anyReq DbName: dbName, CollectionName: getter.GetCollectionName(), } + c.Set(ContextRequest, req) resp, err := wrapperProxy(ctx, c, req, h.checkAuth, false, func(reqCtx context.Context, req any) (interface{}, error) { return h.proxy.LoadCollection(reqCtx, req.(*milvuspb.LoadCollectionRequest)) }) if err == nil { - c.JSON(http.StatusOK, wrapperReturnDefault()) + HTTPReturn(c, http.StatusOK, wrapperReturnDefault()) } return resp, err } @@ -546,11 +553,12 @@ func (h *HandlersV2) releaseCollection(ctx context.Context, c *gin.Context, anyR DbName: dbName, CollectionName: getter.GetCollectionName(), } + c.Set(ContextRequest, req) resp, err := wrapperProxy(ctx, c, req, h.checkAuth, false, func(reqCtx context.Context, req any) (interface{}, error) { return h.proxy.ReleaseCollection(reqCtx, req.(*milvuspb.ReleaseCollectionRequest)) }) if err == nil { - c.JSON(http.StatusOK, wrapperReturnDefault()) + HTTPReturn(c, http.StatusOK, wrapperReturnDefault()) } return resp, err } @@ -566,6 +574,7 @@ func (h *HandlersV2) query(ctx context.Context, c *gin.Context, anyReq any, dbNa QueryParams: []*commonpb.KeyValuePair{}, UseDefaultConsistency: true, } + c.Set(ContextRequest, req) if httpReq.Offset > 0 { req.QueryParams = append(req.QueryParams, &commonpb.KeyValuePair{Key: ParamOffset, Value: strconv.FormatInt(int64(httpReq.Offset), 10)}) } @@ -581,13 +590,13 @@ func (h *HandlersV2) query(ctx context.Context, c *gin.Context, anyReq any, dbNa outputData, err := buildQueryResp(int64(0), queryResp.OutputFields, queryResp.FieldsData, nil, nil, allowJS) if err != nil { log.Ctx(ctx).Warn("high level restful api, fail to deal with query result", zap.Any("response", resp), zap.Error(err)) - c.JSON(http.StatusOK, gin.H{ + HTTPReturn(c, http.StatusOK, gin.H{ HTTPReturnCode: merr.Code(merr.ErrInvalidSearchResult), HTTPReturnMessage: merr.ErrInvalidSearchResult.Error() + ", error: " + err.Error(), }) } else { - c.JSON(http.StatusOK, gin.H{ - HTTPReturnCode: http.StatusOK, + HTTPReturn(c, http.StatusOK, gin.H{ + HTTPReturnCode: merr.Code(nil), HTTPReturnData: outputData, HTTPReturnCost: proxy.GetCostValue(queryResp.GetStatus()), }) @@ -605,7 +614,7 @@ func (h *HandlersV2) get(ctx context.Context, c *gin.Context, anyReq any, dbName body, _ := c.Get(gin.BodyBytesKey) filter, err := checkGetPrimaryKey(collSchema, gjson.Get(string(body.([]byte)), DefaultPrimaryFieldName)) if err != nil { - c.JSON(http.StatusOK, gin.H{ + HTTPReturn(c, http.StatusOK, gin.H{ HTTPReturnCode: merr.Code(merr.ErrCheckPrimaryKey), HTTPReturnMessage: merr.ErrCheckPrimaryKey.Error() + ", error: " + err.Error(), }) @@ -619,6 +628,7 @@ func (h *HandlersV2) get(ctx context.Context, c *gin.Context, anyReq any, dbName Expr: filter, UseDefaultConsistency: true, } + c.Set(ContextRequest, req) resp, err := wrapperProxy(ctx, c, req, h.checkAuth, false, func(reqCtx context.Context, req any) (interface{}, error) { return h.proxy.Query(reqCtx, req.(*milvuspb.QueryRequest)) }) @@ -628,13 +638,13 @@ func (h *HandlersV2) get(ctx context.Context, c *gin.Context, anyReq any, dbName outputData, err := buildQueryResp(int64(0), queryResp.OutputFields, queryResp.FieldsData, nil, nil, allowJS) if err != nil { log.Ctx(ctx).Warn("high level restful api, fail to deal with get result", zap.Any("response", resp), zap.Error(err)) - c.JSON(http.StatusOK, gin.H{ + HTTPReturn(c, http.StatusOK, gin.H{ HTTPReturnCode: merr.Code(merr.ErrInvalidSearchResult), HTTPReturnMessage: merr.ErrInvalidSearchResult.Error() + ", error: " + err.Error(), }) } else { - c.JSON(http.StatusOK, gin.H{ - HTTPReturnCode: http.StatusOK, + HTTPReturn(c, http.StatusOK, gin.H{ + HTTPReturnCode: merr.Code(nil), HTTPReturnData: outputData, HTTPReturnCost: proxy.GetCostValue(queryResp.GetStatus()), }) @@ -655,11 +665,12 @@ func (h *HandlersV2) delete(ctx context.Context, c *gin.Context, anyReq any, dbN PartitionName: httpReq.PartitionName, Expr: httpReq.Filter, } + c.Set(ContextRequest, req) if req.Expr == "" { body, _ := c.Get(gin.BodyBytesKey) filter, err := checkGetPrimaryKey(collSchema, gjson.Get(string(body.([]byte)), DefaultPrimaryFieldName)) if err != nil { - c.JSON(http.StatusOK, gin.H{ + HTTPReturn(c, http.StatusOK, gin.H{ HTTPReturnCode: merr.Code(merr.ErrCheckPrimaryKey), HTTPReturnMessage: merr.ErrCheckPrimaryKey.Error() + ", error: " + err.Error(), }) @@ -671,7 +682,7 @@ func (h *HandlersV2) delete(ctx context.Context, c *gin.Context, anyReq any, dbN return h.proxy.Delete(reqCtx, req.(*milvuspb.DeleteRequest)) }) if err == nil { - c.JSON(http.StatusOK, wrapperReturnDefaultWithCost( + HTTPReturn(c, http.StatusOK, wrapperReturnDefaultWithCost( proxy.GetCostValue(resp.(*milvuspb.MutationResult).GetStatus()), )) } @@ -680,6 +691,14 @@ func (h *HandlersV2) delete(ctx context.Context, c *gin.Context, anyReq any, dbN func (h *HandlersV2) insert(ctx context.Context, c *gin.Context, anyReq any, dbName string) (interface{}, error) { httpReq := anyReq.(*CollectionDataReq) + req := &milvuspb.InsertRequest{ + DbName: dbName, + CollectionName: httpReq.CollectionName, + PartitionName: httpReq.PartitionName, + // PartitionName: "_default", + } + c.Set(ContextRequest, req) + collSchema, err := h.GetCollectionSchema(ctx, c, dbName, httpReq.CollectionName) if err != nil { return nil, err @@ -688,23 +707,18 @@ func (h *HandlersV2) insert(ctx context.Context, c *gin.Context, anyReq any, dbN err, httpReq.Data = checkAndSetData(string(body.([]byte)), collSchema) if err != nil { log.Ctx(ctx).Warn("high level restful api, fail to deal with insert data", zap.Error(err), zap.String("body", string(body.([]byte)))) - c.AbortWithStatusJSON(http.StatusOK, gin.H{ + HTTPAbortReturn(c, http.StatusOK, gin.H{ HTTPReturnCode: merr.Code(merr.ErrInvalidInsertData), HTTPReturnMessage: merr.ErrInvalidInsertData.Error() + ", error: " + err.Error(), }) return nil, err } - req := &milvuspb.InsertRequest{ - DbName: dbName, - CollectionName: httpReq.CollectionName, - PartitionName: httpReq.PartitionName, - // PartitionName: "_default", - NumRows: uint32(len(httpReq.Data)), - } + + req.NumRows = uint32(len(httpReq.Data)) req.FieldsData, err = anyToColumns(httpReq.Data, collSchema) if err != nil { log.Ctx(ctx).Warn("high level restful api, fail to deal with insert data", zap.Any("data", httpReq.Data), zap.Error(err)) - c.AbortWithStatusJSON(http.StatusOK, gin.H{ + HTTPAbortReturn(c, http.StatusOK, gin.H{ HTTPReturnCode: merr.Code(merr.ErrInvalidInsertData), HTTPReturnMessage: merr.ErrInvalidInsertData.Error() + ", error: " + err.Error(), }) @@ -720,26 +734,26 @@ func (h *HandlersV2) insert(ctx context.Context, c *gin.Context, anyReq any, dbN case *schemapb.IDs_IntId: allowJS, _ := strconv.ParseBool(c.Request.Header.Get(HTTPHeaderAllowInt64)) if allowJS { - c.JSON(http.StatusOK, gin.H{ - HTTPReturnCode: http.StatusOK, + HTTPReturn(c, http.StatusOK, gin.H{ + HTTPReturnCode: merr.Code(nil), HTTPReturnData: gin.H{"insertCount": insertResp.InsertCnt, "insertIds": insertResp.IDs.IdField.(*schemapb.IDs_IntId).IntId.Data}, HTTPReturnCost: cost, }) } else { - c.JSON(http.StatusOK, gin.H{ - HTTPReturnCode: http.StatusOK, + HTTPReturn(c, http.StatusOK, gin.H{ + HTTPReturnCode: merr.Code(nil), HTTPReturnData: gin.H{"insertCount": insertResp.InsertCnt, "insertIds": formatInt64(insertResp.IDs.IdField.(*schemapb.IDs_IntId).IntId.Data)}, HTTPReturnCost: cost, }) } case *schemapb.IDs_StrId: - c.JSON(http.StatusOK, gin.H{ - HTTPReturnCode: http.StatusOK, + HTTPReturn(c, http.StatusOK, gin.H{ + HTTPReturnCode: merr.Code(nil), HTTPReturnData: gin.H{"insertCount": insertResp.InsertCnt, "insertIds": insertResp.IDs.IdField.(*schemapb.IDs_StrId).StrId.Data}, HTTPReturnCost: cost, }) default: - c.JSON(http.StatusOK, gin.H{ + HTTPReturn(c, http.StatusOK, gin.H{ HTTPReturnCode: merr.Code(merr.ErrCheckPrimaryKey), HTTPReturnMessage: merr.ErrCheckPrimaryKey.Error() + ", error: unsupported primary key data type", }) @@ -750,36 +764,39 @@ func (h *HandlersV2) insert(ctx context.Context, c *gin.Context, anyReq any, dbN func (h *HandlersV2) upsert(ctx context.Context, c *gin.Context, anyReq any, dbName string) (interface{}, error) { httpReq := anyReq.(*CollectionDataReq) + req := &milvuspb.UpsertRequest{ + DbName: dbName, + CollectionName: httpReq.CollectionName, + PartitionName: httpReq.PartitionName, + // PartitionName: "_default", + } + c.Set(ContextRequest, req) + collSchema, err := h.GetCollectionSchema(ctx, c, dbName, httpReq.CollectionName) if err != nil { return nil, err } if collSchema.AutoID { err := merr.WrapErrParameterInvalid("autoID: false", "autoID: true", "cannot upsert an autoID collection") - c.AbortWithStatusJSON(http.StatusOK, gin.H{HTTPReturnCode: merr.Code(err), HTTPReturnMessage: err.Error()}) + HTTPAbortReturn(c, http.StatusOK, gin.H{HTTPReturnCode: merr.Code(err), HTTPReturnMessage: err.Error()}) return nil, err } body, _ := c.Get(gin.BodyBytesKey) err, httpReq.Data = checkAndSetData(string(body.([]byte)), collSchema) if err != nil { log.Ctx(ctx).Warn("high level restful api, fail to deal with upsert data", zap.Any("body", body), zap.Error(err)) - c.AbortWithStatusJSON(http.StatusOK, gin.H{ + HTTPAbortReturn(c, http.StatusOK, gin.H{ HTTPReturnCode: merr.Code(merr.ErrInvalidInsertData), HTTPReturnMessage: merr.ErrInvalidInsertData.Error() + ", error: " + err.Error(), }) return nil, err } - req := &milvuspb.UpsertRequest{ - DbName: dbName, - CollectionName: httpReq.CollectionName, - PartitionName: httpReq.PartitionName, - // PartitionName: "_default", - NumRows: uint32(len(httpReq.Data)), - } + + req.NumRows = uint32(len(httpReq.Data)) req.FieldsData, err = anyToColumns(httpReq.Data, collSchema) if err != nil { log.Ctx(ctx).Warn("high level restful api, fail to deal with upsert data", zap.Any("data", httpReq.Data), zap.Error(err)) - c.AbortWithStatusJSON(http.StatusOK, gin.H{ + HTTPAbortReturn(c, http.StatusOK, gin.H{ HTTPReturnCode: merr.Code(merr.ErrInvalidInsertData), HTTPReturnMessage: merr.ErrInvalidInsertData.Error() + ", error: " + err.Error(), }) @@ -795,26 +812,26 @@ func (h *HandlersV2) upsert(ctx context.Context, c *gin.Context, anyReq any, dbN case *schemapb.IDs_IntId: allowJS, _ := strconv.ParseBool(c.Request.Header.Get(HTTPHeaderAllowInt64)) if allowJS { - c.JSON(http.StatusOK, gin.H{ - HTTPReturnCode: http.StatusOK, + HTTPReturn(c, http.StatusOK, gin.H{ + HTTPReturnCode: merr.Code(nil), HTTPReturnData: gin.H{"upsertCount": upsertResp.UpsertCnt, "upsertIds": upsertResp.IDs.IdField.(*schemapb.IDs_IntId).IntId.Data}, HTTPReturnCost: cost, }) } else { - c.JSON(http.StatusOK, gin.H{ - HTTPReturnCode: http.StatusOK, + HTTPReturn(c, http.StatusOK, gin.H{ + HTTPReturnCode: merr.Code(nil), HTTPReturnData: gin.H{"upsertCount": upsertResp.UpsertCnt, "upsertIds": formatInt64(upsertResp.IDs.IdField.(*schemapb.IDs_IntId).IntId.Data)}, HTTPReturnCost: cost, }) } case *schemapb.IDs_StrId: - c.JSON(http.StatusOK, gin.H{ - HTTPReturnCode: http.StatusOK, + HTTPReturn(c, http.StatusOK, gin.H{ + HTTPReturnCode: merr.Code(nil), HTTPReturnData: gin.H{"upsertCount": upsertResp.UpsertCnt, "upsertIds": upsertResp.IDs.IdField.(*schemapb.IDs_StrId).StrId.Data}, HTTPReturnCost: cost, }) default: - c.JSON(http.StatusOK, gin.H{ + HTTPReturn(c, http.StatusOK, gin.H{ HTTPReturnCode: merr.Code(merr.ErrCheckPrimaryKey), HTTPReturnMessage: merr.ErrCheckPrimaryKey.Error() + ", error: unsupported primary key data type", }) @@ -848,7 +865,10 @@ func generatePlaceholderGroup(ctx context.Context, body string, collSchema *sche if vectorField == nil { return nil, errors.New("cannot find a vector field named: " + fieldName) } - dim, _ := getDim(vectorField) + dim := int64(0) + if !typeutil.IsSparseFloatVectorType(vectorField.DataType) { + dim, _ = getDim(vectorField) + } phv, err := convertVectors2Placeholder(body, vectorField.DataType, dim) if err != nil { return nil, err @@ -870,7 +890,7 @@ func generateSearchParams(ctx context.Context, c *gin.Context, reqParams map[str if rangeFilterOk { if !radiusOk { log.Ctx(ctx).Warn("high level restful api, search params invalid, because only " + ParamRangeFilter) - c.AbortWithStatusJSON(http.StatusOK, gin.H{ + HTTPAbortReturn(c, http.StatusOK, gin.H{ HTTPReturnCode: merr.Code(merr.ErrIncorrectParameterFormat), HTTPReturnMessage: merr.ErrIncorrectParameterFormat.Error() + ", error: invalid search params", }) @@ -891,6 +911,17 @@ func generateSearchParams(ctx context.Context, c *gin.Context, reqParams map[str func (h *HandlersV2) search(ctx context.Context, c *gin.Context, anyReq any, dbName string) (interface{}, error) { httpReq := anyReq.(*SearchReqV2) + req := &milvuspb.SearchRequest{ + DbName: dbName, + CollectionName: httpReq.CollectionName, + Dsl: httpReq.Filter, + DslType: commonpb.DslType_BoolExprV1, + OutputFields: httpReq.OutputFields, + PartitionNames: httpReq.PartitionNames, + UseDefaultConsistency: true, + } + c.Set(ContextRequest, req) + collSchema, err := h.GetCollectionSchema(ctx, c, dbName, httpReq.CollectionName) if err != nil { return nil, err @@ -908,23 +939,14 @@ func (h *HandlersV2) search(ctx context.Context, c *gin.Context, anyReq any, dbN placeholderGroup, err := generatePlaceholderGroup(ctx, string(body.([]byte)), collSchema, httpReq.AnnsField) if err != nil { log.Ctx(ctx).Warn("high level restful api, search with vector invalid", zap.Error(err)) - c.AbortWithStatusJSON(http.StatusOK, gin.H{ + HTTPAbortReturn(c, http.StatusOK, gin.H{ HTTPReturnCode: merr.Code(merr.ErrIncorrectParameterFormat), HTTPReturnMessage: merr.ErrIncorrectParameterFormat.Error() + ", error: " + err.Error(), }) return nil, err } - req := &milvuspb.SearchRequest{ - DbName: dbName, - CollectionName: httpReq.CollectionName, - Dsl: httpReq.Filter, - PlaceholderGroup: placeholderGroup, - DslType: commonpb.DslType_BoolExprV1, - OutputFields: httpReq.OutputFields, - PartitionNames: httpReq.PartitionNames, - SearchParams: searchParams, - UseDefaultConsistency: true, - } + req.SearchParams = searchParams + req.PlaceholderGroup = placeholderGroup resp, err := wrapperProxy(ctx, c, req, h.checkAuth, false, func(reqCtx context.Context, req any) (interface{}, error) { return h.proxy.Search(reqCtx, req.(*milvuspb.SearchRequest)) }) @@ -932,18 +954,18 @@ func (h *HandlersV2) search(ctx context.Context, c *gin.Context, anyReq any, dbN searchResp := resp.(*milvuspb.SearchResults) cost := proxy.GetCostValue(searchResp.GetStatus()) if searchResp.Results.TopK == int64(0) { - c.JSON(http.StatusOK, gin.H{HTTPReturnCode: http.StatusOK, HTTPReturnData: []interface{}{}, HTTPReturnCost: cost}) + HTTPReturn(c, http.StatusOK, gin.H{HTTPReturnCode: merr.Code(nil), HTTPReturnData: []interface{}{}, HTTPReturnCost: cost}) } else { allowJS, _ := strconv.ParseBool(c.Request.Header.Get(HTTPHeaderAllowInt64)) - outputData, err := buildQueryResp(searchResp.Results.TopK, searchResp.Results.OutputFields, searchResp.Results.FieldsData, searchResp.Results.Ids, searchResp.Results.Scores, allowJS) + outputData, err := buildQueryResp(0, searchResp.Results.OutputFields, searchResp.Results.FieldsData, searchResp.Results.Ids, searchResp.Results.Scores, allowJS) if err != nil { log.Ctx(ctx).Warn("high level restful api, fail to deal with search result", zap.Any("result", searchResp.Results), zap.Error(err)) - c.JSON(http.StatusOK, gin.H{ + HTTPReturn(c, http.StatusOK, gin.H{ HTTPReturnCode: merr.Code(merr.ErrInvalidSearchResult), HTTPReturnMessage: merr.ErrInvalidSearchResult.Error() + ", error: " + err.Error(), }) } else { - c.JSON(http.StatusOK, gin.H{HTTPReturnCode: http.StatusOK, HTTPReturnData: outputData, HTTPReturnCost: cost}) + HTTPReturn(c, http.StatusOK, gin.H{HTTPReturnCode: merr.Code(nil), HTTPReturnData: outputData, HTTPReturnCost: cost}) } } } @@ -958,6 +980,8 @@ func (h *HandlersV2) advancedSearch(ctx context.Context, c *gin.Context, anyReq Requests: []*milvuspb.SearchRequest{}, OutputFields: httpReq.OutputFields, } + c.Set(ContextRequest, req) + collSchema, err := h.GetCollectionSchema(ctx, c, dbName, httpReq.CollectionName) if err != nil { return nil, err @@ -977,7 +1001,7 @@ func (h *HandlersV2) advancedSearch(ctx context.Context, c *gin.Context, anyReq placeholderGroup, err := generatePlaceholderGroup(ctx, searchArray[i].Raw, collSchema, subReq.AnnsField) if err != nil { log.Ctx(ctx).Warn("high level restful api, search with vector invalid", zap.Error(err)) - c.AbortWithStatusJSON(http.StatusOK, gin.H{ + HTTPAbortReturn(c, http.StatusOK, gin.H{ HTTPReturnCode: merr.Code(merr.ErrIncorrectParameterFormat), HTTPReturnMessage: merr.ErrIncorrectParameterFormat.Error() + ", error: " + err.Error(), }) @@ -1010,18 +1034,18 @@ func (h *HandlersV2) advancedSearch(ctx context.Context, c *gin.Context, anyReq searchResp := resp.(*milvuspb.SearchResults) cost := proxy.GetCostValue(searchResp.GetStatus()) if searchResp.Results.TopK == int64(0) { - c.JSON(http.StatusOK, gin.H{HTTPReturnCode: http.StatusOK, HTTPReturnData: []interface{}{}, HTTPReturnCost: cost}) + HTTPReturn(c, http.StatusOK, gin.H{HTTPReturnCode: merr.Code(nil), HTTPReturnData: []interface{}{}, HTTPReturnCost: cost}) } else { allowJS, _ := strconv.ParseBool(c.Request.Header.Get(HTTPHeaderAllowInt64)) outputData, err := buildQueryResp(0, searchResp.Results.OutputFields, searchResp.Results.FieldsData, searchResp.Results.Ids, searchResp.Results.Scores, allowJS) if err != nil { log.Ctx(ctx).Warn("high level restful api, fail to deal with search result", zap.Any("result", searchResp.Results), zap.Error(err)) - c.JSON(http.StatusOK, gin.H{ + HTTPReturn(c, http.StatusOK, gin.H{ HTTPReturnCode: merr.Code(merr.ErrInvalidSearchResult), HTTPReturnMessage: merr.ErrInvalidSearchResult.Error() + ", error: " + err.Error(), }) } else { - c.JSON(http.StatusOK, gin.H{HTTPReturnCode: http.StatusOK, HTTPReturnData: outputData, HTTPReturnCost: cost}) + HTTPReturn(c, http.StatusOK, gin.H{HTTPReturnCode: merr.Code(nil), HTTPReturnData: outputData, HTTPReturnCost: cost}) } } } @@ -1030,6 +1054,13 @@ func (h *HandlersV2) advancedSearch(ctx context.Context, c *gin.Context, anyReq func (h *HandlersV2) createCollection(ctx context.Context, c *gin.Context, anyReq any, dbName string) (interface{}, error) { httpReq := anyReq.(*CollectionReq) + req := &milvuspb.CreateCollectionRequest{ + DbName: dbName, + CollectionName: httpReq.CollectionName, + Properties: []*commonpb.KeyValuePair{}, + } + c.Set(ContextRequest, req) + var schema []byte var err error fieldNames := map[string]bool{} @@ -1039,7 +1070,7 @@ func (h *HandlersV2) createCollection(ctx context.Context, c *gin.Context, anyRe err := merr.WrapErrParameterInvalid("collectionName & dimension", "collectionName", "dimension is required for quickly create collection(default metric type: "+DefaultMetricType+")") log.Ctx(ctx).Warn("high level restful api, quickly create collection fail", zap.Error(err), zap.Any("request", anyReq)) - c.AbortWithStatusJSON(http.StatusOK, gin.H{ + HTTPAbortReturn(c, http.StatusOK, gin.H{ HTTPReturnCode: merr.Code(err), HTTPReturnMessage: err.Error(), }) @@ -1061,7 +1092,7 @@ func (h *HandlersV2) createCollection(ctx context.Context, c *gin.Context, anyRe err := merr.WrapErrParameterInvalid("Int64, Varchar", httpReq.IDType, "idType can only be [Int64, VarChar], default: Int64") log.Ctx(ctx).Warn("high level restful api, quickly create collection fail", zap.Error(err), zap.Any("request", anyReq)) - c.AbortWithStatusJSON(http.StatusOK, gin.H{ + HTTPAbortReturn(c, http.StatusOK, gin.H{ HTTPReturnCode: merr.Code(err), HTTPReturnMessage: err.Error(), }) @@ -1117,7 +1148,7 @@ func (h *HandlersV2) createCollection(ctx context.Context, c *gin.Context, anyRe fieldDataType, ok := schemapb.DataType_value[field.DataType] if !ok { log.Ctx(ctx).Warn("field's data type is invalid(case sensitive).", zap.Any("fieldDataType", field.DataType), zap.Any("field", field)) - c.AbortWithStatusJSON(http.StatusOK, gin.H{ + HTTPAbortReturn(c, http.StatusOK, gin.H{ HTTPReturnCode: merr.Code(merr.ErrParameterInvalid), HTTPReturnMessage: merr.ErrParameterInvalid.Error() + ", data type " + field.DataType + " is invalid(case sensitive).", }) @@ -1134,7 +1165,7 @@ func (h *HandlersV2) createCollection(ctx context.Context, c *gin.Context, anyRe if dataType == schemapb.DataType_Array { if _, ok := schemapb.DataType_value[field.ElementDataType]; !ok { log.Ctx(ctx).Warn("element's data type is invalid(case sensitive).", zap.Any("elementDataType", field.ElementDataType), zap.Any("field", field)) - c.AbortWithStatusJSON(http.StatusOK, gin.H{ + HTTPAbortReturn(c, http.StatusOK, gin.H{ HTTPReturnCode: merr.Code(merr.ErrParameterInvalid), HTTPReturnMessage: merr.ErrParameterInvalid.Error() + ", element data type " + field.ElementDataType + " is invalid(case sensitive).", }) @@ -1163,18 +1194,22 @@ func (h *HandlersV2) createCollection(ctx context.Context, c *gin.Context, anyRe } if err != nil { log.Ctx(ctx).Warn("high level restful api, marshal collection schema fail", zap.Error(err), zap.Any("request", anyReq)) - c.AbortWithStatusJSON(http.StatusOK, gin.H{ + HTTPAbortReturn(c, http.StatusOK, gin.H{ HTTPReturnCode: merr.Code(merr.ErrMarshalCollectionSchema), HTTPReturnMessage: merr.ErrMarshalCollectionSchema.Error() + ", error: " + err.Error(), }) return nil, err } + req.Schema = schema + shardsNum := int32(ShardNumDefault) if shardsNumStr, ok := httpReq.Params["shardsNum"]; ok { if shards, err := strconv.ParseInt(fmt.Sprintf("%v", shardsNumStr), 10, 64); err == nil { shardsNum = int32(shards) } } + req.ShardsNum = shardsNum + consistencyLevel := commonpb.ConsistencyLevel_Bounded if _, ok := httpReq.Params["consistencyLevel"]; ok { if level, ok := commonpb.ConsistencyLevel_value[fmt.Sprintf("%s", httpReq.Params["consistencyLevel"])]; ok { @@ -1183,21 +1218,15 @@ func (h *HandlersV2) createCollection(ctx context.Context, c *gin.Context, anyRe err := merr.WrapErrParameterInvalid("Strong, Session, Bounded, Eventually, Customized", httpReq.Params["consistencyLevel"], "consistencyLevel can only be [Strong, Session, Bounded, Eventually, Customized], default: Bounded") log.Ctx(ctx).Warn("high level restful api, create collection fail", zap.Error(err), zap.Any("request", anyReq)) - c.AbortWithStatusJSON(http.StatusOK, gin.H{ + HTTPAbortReturn(c, http.StatusOK, gin.H{ HTTPReturnCode: merr.Code(err), HTTPReturnMessage: err.Error(), }) return nil, err } } - req := &milvuspb.CreateCollectionRequest{ - DbName: dbName, - CollectionName: httpReq.CollectionName, - Schema: schema, - ShardsNum: shardsNum, - ConsistencyLevel: consistencyLevel, - Properties: []*commonpb.KeyValuePair{}, - } + req.ConsistencyLevel = consistencyLevel + if partitionsNum > 0 { req.NumPartitions = partitionsNum } @@ -1232,12 +1261,12 @@ func (h *HandlersV2) createCollection(ctx context.Context, c *gin.Context, anyRe } } else { if len(httpReq.IndexParams) == 0 { - c.JSON(http.StatusOK, wrapperReturnDefault()) + HTTPReturn(c, http.StatusOK, wrapperReturnDefault()) return nil, nil } for _, indexParam := range httpReq.IndexParams { if _, ok := fieldNames[indexParam.FieldName]; !ok { - c.AbortWithStatusJSON(http.StatusOK, gin.H{ + HTTPAbortReturn(c, http.StatusOK, gin.H{ HTTPReturnCode: merr.Code(merr.ErrMissingRequiredParameters), HTTPReturnMessage: merr.ErrMissingRequiredParameters.Error() + ", error: `" + indexParam.FieldName + "` hasn't defined in schema", }) @@ -1269,7 +1298,7 @@ func (h *HandlersV2) createCollection(ctx context.Context, c *gin.Context, anyRe return h.proxy.LoadCollection(ctx, req.(*milvuspb.LoadCollectionRequest)) }) if err == nil { - c.JSON(http.StatusOK, wrapperReturnDefault()) + HTTPReturn(c, http.StatusOK, wrapperReturnDefault()) } return statusResponse, err } @@ -1280,11 +1309,13 @@ func (h *HandlersV2) listPartitions(ctx context.Context, c *gin.Context, anyReq DbName: dbName, CollectionName: collectionGetter.GetCollectionName(), } + c.Set(ContextRequest, req) + resp, err := wrapperProxy(ctx, c, req, h.checkAuth, false, func(reqCtx context.Context, req any) (interface{}, error) { return h.proxy.ShowPartitions(reqCtx, req.(*milvuspb.ShowPartitionsRequest)) }) if err == nil { - c.JSON(http.StatusOK, wrapperReturnList(resp.(*milvuspb.ShowPartitionsResponse).PartitionNames)) + HTTPReturn(c, http.StatusOK, wrapperReturnList(resp.(*milvuspb.ShowPartitionsResponse).PartitionNames)) } return resp, err } @@ -1297,11 +1328,12 @@ func (h *HandlersV2) hasPartitions(ctx context.Context, c *gin.Context, anyReq a CollectionName: collectionGetter.GetCollectionName(), PartitionName: partitionGetter.GetPartitionName(), } + c.Set(ContextRequest, req) resp, err := wrapperProxy(ctx, c, req, h.checkAuth, false, func(reqCtx context.Context, req any) (interface{}, error) { return h.proxy.HasPartition(reqCtx, req.(*milvuspb.HasPartitionRequest)) }) if err == nil { - c.JSON(http.StatusOK, wrapperReturnHas(resp.(*milvuspb.BoolResponse).Value)) + HTTPReturn(c, http.StatusOK, wrapperReturnHas(resp.(*milvuspb.BoolResponse).Value)) } return resp, err } @@ -1316,11 +1348,12 @@ func (h *HandlersV2) statsPartition(ctx context.Context, c *gin.Context, anyReq CollectionName: collectionGetter.GetCollectionName(), PartitionName: partitionGetter.GetPartitionName(), } + c.Set(ContextRequest, req) resp, err := wrapperProxy(ctx, c, req, h.checkAuth, false, func(reqCtx context.Context, req any) (interface{}, error) { return h.proxy.GetPartitionStatistics(reqCtx, req.(*milvuspb.GetPartitionStatisticsRequest)) }) if err == nil { - c.JSON(http.StatusOK, wrapperReturnRowCount(resp.(*milvuspb.GetPartitionStatisticsResponse).Stats)) + HTTPReturn(c, http.StatusOK, wrapperReturnRowCount(resp.(*milvuspb.GetPartitionStatisticsResponse).Stats)) } return resp, err } @@ -1333,11 +1366,12 @@ func (h *HandlersV2) createPartition(ctx context.Context, c *gin.Context, anyReq CollectionName: collectionGetter.GetCollectionName(), PartitionName: partitionGetter.GetPartitionName(), } + c.Set(ContextRequest, req) resp, err := wrapperProxy(ctx, c, req, h.checkAuth, false, func(reqCtx context.Context, req any) (interface{}, error) { return h.proxy.CreatePartition(reqCtx, req.(*milvuspb.CreatePartitionRequest)) }) if err == nil { - c.JSON(http.StatusOK, wrapperReturnDefault()) + HTTPReturn(c, http.StatusOK, wrapperReturnDefault()) } return resp, err } @@ -1350,11 +1384,12 @@ func (h *HandlersV2) dropPartition(ctx context.Context, c *gin.Context, anyReq a CollectionName: collectionGetter.GetCollectionName(), PartitionName: partitionGetter.GetPartitionName(), } + c.Set(ContextRequest, req) resp, err := wrapperProxy(ctx, c, req, h.checkAuth, false, func(reqCtx context.Context, req any) (interface{}, error) { return h.proxy.DropPartition(reqCtx, req.(*milvuspb.DropPartitionRequest)) }) if err == nil { - c.JSON(http.StatusOK, wrapperReturnDefault()) + HTTPReturn(c, http.StatusOK, wrapperReturnDefault()) } return resp, err } @@ -1366,11 +1401,12 @@ func (h *HandlersV2) loadPartitions(ctx context.Context, c *gin.Context, anyReq CollectionName: httpReq.CollectionName, PartitionNames: httpReq.PartitionNames, } + c.Set(ContextRequest, req) resp, err := wrapperProxy(ctx, c, req, h.checkAuth, false, func(reqCtx context.Context, req any) (interface{}, error) { return h.proxy.LoadPartitions(reqCtx, req.(*milvuspb.LoadPartitionsRequest)) }) if err == nil { - c.JSON(http.StatusOK, wrapperReturnDefault()) + HTTPReturn(c, http.StatusOK, wrapperReturnDefault()) } return resp, err } @@ -1382,22 +1418,24 @@ func (h *HandlersV2) releasePartitions(ctx context.Context, c *gin.Context, anyR CollectionName: httpReq.CollectionName, PartitionNames: httpReq.PartitionNames, } + c.Set(ContextRequest, req) resp, err := wrapperProxy(ctx, c, req, h.checkAuth, false, func(reqCtx context.Context, req any) (interface{}, error) { return h.proxy.ReleasePartitions(reqCtx, req.(*milvuspb.ReleasePartitionsRequest)) }) if err == nil { - c.JSON(http.StatusOK, wrapperReturnDefault()) + HTTPReturn(c, http.StatusOK, wrapperReturnDefault()) } return resp, err } func (h *HandlersV2) listUsers(ctx context.Context, c *gin.Context, anyReq any, dbName string) (interface{}, error) { req := &milvuspb.ListCredUsersRequest{} + c.Set(ContextRequest, req) resp, err := wrapperProxy(ctx, c, req, h.checkAuth, false, func(reqCtx context.Context, req any) (interface{}, error) { return h.proxy.ListCredUsers(reqCtx, req.(*milvuspb.ListCredUsersRequest)) }) if err == nil { - c.JSON(http.StatusOK, wrapperReturnList(resp.(*milvuspb.ListCredUsersResponse).Usernames)) + HTTPReturn(c, http.StatusOK, wrapperReturnList(resp.(*milvuspb.ListCredUsersResponse).Usernames)) } return resp, err } @@ -1411,6 +1449,8 @@ func (h *HandlersV2) describeUser(ctx context.Context, c *gin.Context, anyReq an }, IncludeRoleInfo: true, } + c.Set(ContextRequest, req) + resp, err := wrapperProxy(ctx, c, req, h.checkAuth, false, func(reqCtx context.Context, req any) (interface{}, error) { return h.proxy.SelectUser(reqCtx, req.(*milvuspb.SelectUserRequest)) }) @@ -1423,7 +1463,7 @@ func (h *HandlersV2) describeUser(ctx context.Context, c *gin.Context, anyReq an } } } - c.JSON(http.StatusOK, wrapperReturnList(roleNames)) + HTTPReturn(c, http.StatusOK, wrapperReturnList(roleNames)) } return resp, err } @@ -1438,7 +1478,7 @@ func (h *HandlersV2) createUser(ctx context.Context, c *gin.Context, anyReq any, return h.proxy.CreateCredential(reqCtx, req.(*milvuspb.CreateCredentialRequest)) }) if err == nil { - c.JSON(http.StatusOK, wrapperReturnDefault()) + HTTPReturn(c, http.StatusOK, wrapperReturnDefault()) } return resp, err } @@ -1454,7 +1494,7 @@ func (h *HandlersV2) updateUser(ctx context.Context, c *gin.Context, anyReq any, return h.proxy.UpdateCredential(reqCtx, req.(*milvuspb.UpdateCredentialRequest)) }) if err == nil { - c.JSON(http.StatusOK, wrapperReturnDefault()) + HTTPReturn(c, http.StatusOK, wrapperReturnDefault()) } return resp, err } @@ -1468,7 +1508,7 @@ func (h *HandlersV2) dropUser(ctx context.Context, c *gin.Context, anyReq any, d return h.proxy.DeleteCredential(reqCtx, req.(*milvuspb.DeleteCredentialRequest)) }) if err == nil { - c.JSON(http.StatusOK, wrapperReturnDefault()) + HTTPReturn(c, http.StatusOK, wrapperReturnDefault()) } return resp, err } @@ -1483,7 +1523,7 @@ func (h *HandlersV2) operateRoleToUser(ctx context.Context, c *gin.Context, user return h.proxy.OperateUserRole(reqCtx, req.(*milvuspb.OperateUserRoleRequest)) }) if err == nil { - c.JSON(http.StatusOK, wrapperReturnDefault()) + HTTPReturn(c, http.StatusOK, wrapperReturnDefault()) } return resp, err } @@ -1506,7 +1546,7 @@ func (h *HandlersV2) listRoles(ctx context.Context, c *gin.Context, anyReq any, for _, role := range resp.(*milvuspb.SelectRoleResponse).Results { roleNames = append(roleNames, role.Role.Name) } - c.JSON(http.StatusOK, wrapperReturnList(roleNames)) + HTTPReturn(c, http.StatusOK, wrapperReturnList(roleNames)) } return resp, err } @@ -1514,7 +1554,7 @@ func (h *HandlersV2) listRoles(ctx context.Context, c *gin.Context, anyReq any, func (h *HandlersV2) describeRole(ctx context.Context, c *gin.Context, anyReq any, dbName string) (interface{}, error) { getter, _ := anyReq.(RoleNameGetter) req := &milvuspb.SelectGrantRequest{ - Entity: &milvuspb.GrantEntity{Role: &milvuspb.RoleEntity{Name: getter.GetRoleName()}}, + Entity: &milvuspb.GrantEntity{Role: &milvuspb.RoleEntity{Name: getter.GetRoleName()}, DbName: dbName}, } resp, err := wrapperProxy(ctx, c, req, h.checkAuth, false, func(reqCtx context.Context, req any) (interface{}, error) { return h.proxy.SelectGrant(reqCtx, req.(*milvuspb.SelectGrantRequest)) @@ -1531,7 +1571,7 @@ func (h *HandlersV2) describeRole(ctx context.Context, c *gin.Context, anyReq an } privileges = append(privileges, privilege) } - c.JSON(http.StatusOK, gin.H{HTTPReturnCode: http.StatusOK, HTTPReturnData: privileges}) + HTTPReturn(c, http.StatusOK, gin.H{HTTPReturnCode: merr.Code(nil), HTTPReturnData: privileges}) } return resp, err } @@ -1545,7 +1585,7 @@ func (h *HandlersV2) createRole(ctx context.Context, c *gin.Context, anyReq any, return h.proxy.CreateRole(reqCtx, req.(*milvuspb.CreateRoleRequest)) }) if err == nil { - c.JSON(http.StatusOK, wrapperReturnDefault()) + HTTPReturn(c, http.StatusOK, wrapperReturnDefault()) } return resp, err } @@ -1559,7 +1599,7 @@ func (h *HandlersV2) dropRole(ctx context.Context, c *gin.Context, anyReq any, d return h.proxy.DropRole(reqCtx, req.(*milvuspb.DropRoleRequest)) }) if err == nil { - c.JSON(http.StatusOK, wrapperReturnDefault()) + HTTPReturn(c, http.StatusOK, wrapperReturnDefault()) } return resp, err } @@ -1581,7 +1621,7 @@ func (h *HandlersV2) operatePrivilegeToRole(ctx context.Context, c *gin.Context, return h.proxy.OperatePrivilege(reqCtx, req.(*milvuspb.OperatePrivilegeRequest)) }) if err == nil { - c.JSON(http.StatusOK, wrapperReturnDefault()) + HTTPReturn(c, http.StatusOK, wrapperReturnDefault()) } return resp, err } @@ -1601,7 +1641,9 @@ func (h *HandlersV2) listIndexes(ctx context.Context, c *gin.Context, anyReq any DbName: dbName, CollectionName: collectionGetter.GetCollectionName(), } - resp, err := wrapperProxy(ctx, c, req, false, false, func(reqCtx context.Context, req any) (any, error) { + c.Set(ContextRequest, req) + + resp, err := wrapperProxy(ctx, c, req, h.checkAuth, false, func(reqCtx context.Context, req any) (any, error) { resp, err := h.proxy.DescribeIndex(reqCtx, req.(*milvuspb.DescribeIndexRequest)) if errors.Is(err, merr.ErrIndexNotFound) { return &milvuspb.DescribeIndexResponse{ @@ -1621,7 +1663,7 @@ func (h *HandlersV2) listIndexes(ctx context.Context, c *gin.Context, anyReq any for _, index := range resp.(*milvuspb.DescribeIndexResponse).IndexDescriptions { indexNames = append(indexNames, index.IndexName) } - c.JSON(http.StatusOK, wrapperReturnList(indexNames)) + HTTPReturn(c, http.StatusOK, wrapperReturnList(indexNames)) return resp, err } @@ -1633,7 +1675,9 @@ func (h *HandlersV2) describeIndex(ctx context.Context, c *gin.Context, anyReq a CollectionName: collectionGetter.GetCollectionName(), IndexName: indexGetter.GetIndexName(), } - resp, err := wrapperProxy(ctx, c, req, false, false, func(reqCtx context.Context, req any) (interface{}, error) { + c.Set(ContextRequest, req) + + resp, err := wrapperProxy(ctx, c, req, h.checkAuth, false, func(reqCtx context.Context, req any) (interface{}, error) { return h.proxy.DescribeIndex(reqCtx, req.(*milvuspb.DescribeIndexRequest)) }) if err == nil { @@ -1661,7 +1705,7 @@ func (h *HandlersV2) describeIndex(ctx context.Context, c *gin.Context, anyReq a } indexInfos = append(indexInfos, indexInfo) } - c.JSON(http.StatusOK, gin.H{HTTPReturnCode: http.StatusOK, HTTPReturnData: indexInfos}) + HTTPReturn(c, http.StatusOK, gin.H{HTTPReturnCode: merr.Code(nil), HTTPReturnData: indexInfos}) } return resp, err } @@ -1678,17 +1722,19 @@ func (h *HandlersV2) createIndex(ctx context.Context, c *gin.Context, anyReq any {Key: common.MetricTypeKey, Value: indexParam.MetricType}, }, } + c.Set(ContextRequest, req) + for key, value := range indexParam.Params { req.ExtraParams = append(req.ExtraParams, &commonpb.KeyValuePair{Key: key, Value: fmt.Sprintf("%v", value)}) } - resp, err := wrapperProxy(ctx, c, req, false, false, func(reqCtx context.Context, req any) (interface{}, error) { + resp, err := wrapperProxy(ctx, c, req, h.checkAuth, false, func(reqCtx context.Context, req any) (interface{}, error) { return h.proxy.CreateIndex(reqCtx, req.(*milvuspb.CreateIndexRequest)) }) if err != nil { return resp, err } } - c.JSON(http.StatusOK, wrapperReturnDefault()) + HTTPReturn(c, http.StatusOK, wrapperReturnDefault()) return httpReq.IndexParams, nil } @@ -1700,11 +1746,13 @@ func (h *HandlersV2) dropIndex(ctx context.Context, c *gin.Context, anyReq any, CollectionName: collGetter.GetCollectionName(), IndexName: indexGetter.GetIndexName(), } - resp, err := wrapperProxy(ctx, c, req, false, false, func(reqCtx context.Context, req any) (interface{}, error) { + c.Set(ContextRequest, req) + + resp, err := wrapperProxy(ctx, c, req, h.checkAuth, false, func(reqCtx context.Context, req any) (interface{}, error) { return h.proxy.DropIndex(reqCtx, req.(*milvuspb.DropIndexRequest)) }) if err == nil { - c.JSON(http.StatusOK, wrapperReturnDefault()) + HTTPReturn(c, http.StatusOK, wrapperReturnDefault()) } return resp, err } @@ -1715,11 +1763,13 @@ func (h *HandlersV2) listAlias(ctx context.Context, c *gin.Context, anyReq any, DbName: dbName, CollectionName: collectionGetter.GetCollectionName(), } + c.Set(ContextRequest, req) + resp, err := wrapperProxy(ctx, c, req, h.checkAuth, false, func(reqCtx context.Context, req any) (interface{}, error) { return h.proxy.ListAliases(reqCtx, req.(*milvuspb.ListAliasesRequest)) }) if err == nil { - c.JSON(http.StatusOK, wrapperReturnList(resp.(*milvuspb.ListAliasesResponse).Aliases)) + HTTPReturn(c, http.StatusOK, wrapperReturnList(resp.(*milvuspb.ListAliasesResponse).Aliases)) } return resp, err } @@ -1730,12 +1780,14 @@ func (h *HandlersV2) describeAlias(ctx context.Context, c *gin.Context, anyReq a DbName: dbName, Alias: getter.GetAliasName(), } + c.Set(ContextRequest, req) + resp, err := wrapperProxy(ctx, c, req, h.checkAuth, false, func(reqCtx context.Context, req any) (interface{}, error) { return h.proxy.DescribeAlias(reqCtx, req.(*milvuspb.DescribeAliasRequest)) }) if err == nil { response := resp.(*milvuspb.DescribeAliasResponse) - c.JSON(http.StatusOK, gin.H{HTTPReturnCode: http.StatusOK, HTTPReturnData: gin.H{ + HTTPReturn(c, http.StatusOK, gin.H{HTTPReturnCode: merr.Code(nil), HTTPReturnData: gin.H{ HTTPDbName: response.DbName, HTTPCollectionName: response.Collection, HTTPAliasName: response.Alias, @@ -1752,11 +1804,13 @@ func (h *HandlersV2) createAlias(ctx context.Context, c *gin.Context, anyReq any CollectionName: collectionGetter.GetCollectionName(), Alias: aliasGetter.GetAliasName(), } - resp, err := wrapperProxy(ctx, c, req, false, false, func(reqCtx context.Context, req any) (interface{}, error) { + c.Set(ContextRequest, req) + + resp, err := wrapperProxy(ctx, c, req, h.checkAuth, false, func(reqCtx context.Context, req any) (interface{}, error) { return h.proxy.CreateAlias(reqCtx, req.(*milvuspb.CreateAliasRequest)) }) if err == nil { - c.JSON(http.StatusOK, wrapperReturnDefault()) + HTTPReturn(c, http.StatusOK, wrapperReturnDefault()) } return resp, err } @@ -1767,11 +1821,13 @@ func (h *HandlersV2) dropAlias(ctx context.Context, c *gin.Context, anyReq any, DbName: dbName, Alias: getter.GetAliasName(), } - resp, err := wrapperProxy(ctx, c, req, false, false, func(reqCtx context.Context, req any) (interface{}, error) { + c.Set(ContextRequest, req) + + resp, err := wrapperProxy(ctx, c, req, h.checkAuth, false, func(reqCtx context.Context, req any) (interface{}, error) { return h.proxy.DropAlias(reqCtx, req.(*milvuspb.DropAliasRequest)) }) if err == nil { - c.JSON(http.StatusOK, wrapperReturnDefault()) + HTTPReturn(c, http.StatusOK, wrapperReturnDefault()) } return resp, err } @@ -1784,11 +1840,13 @@ func (h *HandlersV2) alterAlias(ctx context.Context, c *gin.Context, anyReq any, CollectionName: collectionGetter.GetCollectionName(), Alias: aliasGetter.GetAliasName(), } - resp, err := wrapperProxy(ctx, c, req, false, false, func(reqCtx context.Context, req any) (interface{}, error) { + c.Set(ContextRequest, req) + + resp, err := wrapperProxy(ctx, c, req, h.checkAuth, false, func(reqCtx context.Context, req any) (interface{}, error) { return h.proxy.AlterAlias(reqCtx, req.(*milvuspb.AlterAliasRequest)) }) if err == nil { - c.JSON(http.StatusOK, wrapperReturnDefault()) + HTTPReturn(c, http.StatusOK, wrapperReturnDefault()) } return resp, err } @@ -1802,6 +1860,8 @@ func (h *HandlersV2) listImportJob(ctx context.Context, c *gin.Context, anyReq a DbName: dbName, CollectionName: collectionName, } + c.Set(ContextRequest, req) + if h.checkAuth { err := checkAuthorizationV2(ctx, c, false, &milvuspb.ListImportsAuthPlaceholder{ DbName: dbName, @@ -1831,7 +1891,7 @@ func (h *HandlersV2) listImportJob(ctx context.Context, c *gin.Context, anyReq a records = append(records, jobDetail) } returnData["records"] = records - c.JSON(http.StatusOK, gin.H{HTTPReturnCode: http.StatusOK, HTTPReturnData: returnData}) + HTTPReturn(c, http.StatusOK, gin.H{HTTPReturnCode: merr.Code(nil), HTTPReturnData: returnData}) } return resp, err } @@ -1852,6 +1912,8 @@ func (h *HandlersV2) createImportJob(ctx context.Context, c *gin.Context, anyReq }), Options: funcutil.Map2KeyValuePair(optionsGetter.GetOptions()), } + c.Set(ContextRequest, req) + if h.checkAuth { err := checkAuthorizationV2(ctx, c, false, &milvuspb.ImportAuthPlaceholder{ DbName: dbName, @@ -1868,7 +1930,7 @@ func (h *HandlersV2) createImportJob(ctx context.Context, c *gin.Context, anyReq if err == nil { returnData := make(map[string]interface{}) returnData["jobId"] = resp.(*internalpb.ImportResponse).GetJobID() - c.JSON(http.StatusOK, gin.H{HTTPReturnCode: http.StatusOK, HTTPReturnData: returnData}) + HTTPReturn(c, http.StatusOK, gin.H{HTTPReturnCode: merr.Code(nil), HTTPReturnData: returnData}) } return resp, err } @@ -1879,6 +1941,8 @@ func (h *HandlersV2) getImportJobProcess(ctx context.Context, c *gin.Context, an DbName: dbName, JobID: jobIDGetter.GetJobID(), } + c.Set(ContextRequest, req) + if h.checkAuth { err := checkAuthorizationV2(ctx, c, false, &milvuspb.GetImportProgressAuthPlaceholder{ DbName: dbName, @@ -1924,7 +1988,7 @@ func (h *HandlersV2) getImportJobProcess(ctx context.Context, c *gin.Context, an } returnData["fileSize"] = totalFileSize returnData["details"] = details - c.JSON(http.StatusOK, gin.H{HTTPReturnCode: http.StatusOK, HTTPReturnData: returnData}) + HTTPReturn(c, http.StatusOK, gin.H{HTTPReturnCode: merr.Code(nil), HTTPReturnData: returnData}) } return resp, err } diff --git a/internal/distributed/proxy/httpserver/handler_v2_test.go b/internal/distributed/proxy/httpserver/handler_v2_test.go index 13cc65cee5634..5a57e536b963c 100644 --- a/internal/distributed/proxy/httpserver/handler_v2_test.go +++ b/internal/distributed/proxy/httpserver/handler_v2_test.go @@ -471,11 +471,11 @@ func TestDatabaseWrapper(t *testing.T) { func TestCreateCollection(t *testing.T) { postTestCases := []requestBodyTestCase{} mp := mocks.NewMockProxy(t) - mp.EXPECT().CreateCollection(mock.Anything, mock.Anything).Return(commonSuccessStatus, nil).Times(11) + mp.EXPECT().CreateCollection(mock.Anything, mock.Anything).Return(commonSuccessStatus, nil).Times(12) mp.EXPECT().CreateIndex(mock.Anything, mock.Anything).Return(commonSuccessStatus, nil).Times(6) mp.EXPECT().LoadCollection(mock.Anything, mock.Anything).Return(commonSuccessStatus, nil).Times(6) mp.EXPECT().CreateIndex(mock.Anything, mock.Anything).Return(commonErrorStatus, nil).Twice() - mp.EXPECT().CreateCollection(mock.Anything, mock.Anything).Return(commonErrorStatus, nil).Once() + mp.EXPECT().CreateCollection(mock.Anything, mock.Anything).Return(commonErrorStatus, nil).Twice() testEngine := initHTTPServerV2(mp, false) path := versionalV2(CollectionCategory, CreateAction) // quickly create collection @@ -564,6 +564,18 @@ func TestCreateCollection(t *testing.T) { ] }}`), }) + // dim should not be specified for SparseFloatVector field + postTestCases = append(postTestCases, requestBodyTestCase{ + path: path, + requestBody: []byte(`{"collectionName": "` + DefaultCollectionName + `", "schema": { + "fields": [ + {"fieldName": "book_id", "dataType": "Int64", "isPrimary": true, "elementTypeParams": {}}, + {"fieldName": "word_count", "dataType": "Int64", "isPartitionKey": false, "elementTypeParams": {}}, + {"fieldName": "partition_field", "dataType": "VarChar", "isPartitionKey": true, "elementTypeParams": {"max_length": 256}}, + {"fieldName": "book_intro", "dataType": "SparseFloatVector", "elementTypeParams": {}} + ] + }, "params": {"partitionsNum": "32"}}`), + }) postTestCases = append(postTestCases, requestBodyTestCase{ path: path, requestBody: []byte(`{"collectionName": "` + DefaultCollectionName + `", "schema": { @@ -612,6 +624,18 @@ func TestCreateCollection(t *testing.T) { errMsg: "", errCode: 65535, }) + postTestCases = append(postTestCases, requestBodyTestCase{ + path: path, + requestBody: []byte(`{"collectionName": "` + DefaultCollectionName + `", "schema": { + "fields": [ + {"fieldName": "book_id", "dataType": "Int64", "isPrimary": true, "elementTypeParams": {}}, + {"fieldName": "word_count", "dataType": "Int64", "elementTypeParams": {}}, + {"fieldName": "book_intro", "dataType": "SparseFloatVector", "elementTypeParams": {"dim": 2}} + ] + }, "indexParams": [{"fieldName": "book_intro", "indexName": "book_intro_vector", "metricType": "L2"}]}`), + errMsg: "", + errCode: 65535, + }) for _, testcase := range postTestCases { t.Run("post"+testcase.path, func(t *testing.T) { @@ -623,11 +647,9 @@ func TestCreateCollection(t *testing.T) { returnBody := &ReturnErrMsg{} err := json.Unmarshal(w.Body.Bytes(), returnBody) assert.Nil(t, err) + assert.Equal(t, testcase.errCode, returnBody.Code) if testcase.errCode != 0 { - assert.Equal(t, testcase.errCode, returnBody.Code) assert.Equal(t, testcase.errMsg, returnBody.Message) - } else { - assert.Equal(t, int32(200), returnBody.Code) } }) } @@ -718,7 +740,15 @@ func TestMethodGet(t *testing.T) { }, nil).Twice() mp.EXPECT().DescribeCollection(mock.Anything, mock.Anything).Return(&milvuspb.DescribeCollectionResponse{Status: commonErrorStatus}, nil).Once() mp.EXPECT().GetLoadState(mock.Anything, mock.Anything).Return(&milvuspb.GetLoadStateResponse{Status: commonErrorStatus}, nil).Once() - mp.EXPECT().GetLoadState(mock.Anything, mock.Anything).Return(&DefaultLoadStateResp, nil).Times(3) + mp.EXPECT().GetLoadState(mock.Anything, mock.Anything).Return(&DefaultLoadStateResp, nil).Times(4) + mp.EXPECT().GetLoadState(mock.Anything, mock.Anything).Return(&milvuspb.GetLoadStateResponse{ + Status: &StatusSuccess, + State: commonpb.LoadState_LoadStateNotExist, + }, nil).Once() + mp.EXPECT().GetLoadState(mock.Anything, mock.Anything).Return(&milvuspb.GetLoadStateResponse{ + Status: &StatusSuccess, + State: commonpb.LoadState_LoadStateNotLoad, + }, nil).Once() mp.EXPECT().DescribeIndex(mock.Anything, mock.Anything).Return(&milvuspb.DescribeIndexResponse{Status: commonErrorStatus}, nil).Once() mp.EXPECT().DescribeIndex(mock.Anything, mock.Anything).Return(&DefaultDescIndexesReqp, nil).Times(3) mp.EXPECT().DescribeIndex(mock.Anything, mock.Anything).Return(nil, merr.WrapErrIndexNotFoundForCollection(DefaultCollectionName)).Once() @@ -741,6 +771,10 @@ func TestMethodGet(t *testing.T) { Status: commonSuccessStatus, Progress: int64(77), }, nil).Once() + mp.EXPECT().GetLoadingProgress(mock.Anything, mock.Anything).Return(&milvuspb.GetLoadingProgressResponse{ + Status: commonSuccessStatus, + Progress: int64(100), + }, nil).Once() mp.EXPECT().GetLoadingProgress(mock.Anything, mock.Anything).Return(&milvuspb.GetLoadingProgressResponse{Status: commonErrorStatus}, nil).Once() mp.EXPECT().ShowPartitions(mock.Anything, mock.Anything).Return(&milvuspb.ShowPartitionsResponse{ Status: &StatusSuccess, @@ -841,6 +875,17 @@ func TestMethodGet(t *testing.T) { queryTestCases = append(queryTestCases, rawTestCase{ path: versionalV2(CollectionCategory, LoadStateAction), }) + queryTestCases = append(queryTestCases, rawTestCase{ + path: versionalV2(CollectionCategory, LoadStateAction), + }) + queryTestCases = append(queryTestCases, rawTestCase{ + path: versionalV2(CollectionCategory, LoadStateAction), + errCode: 100, + errMsg: "collection not found[collection=book]", + }) + queryTestCases = append(queryTestCases, rawTestCase{ + path: versionalV2(CollectionCategory, LoadStateAction), + }) queryTestCases = append(queryTestCases, rawTestCase{ path: versionalV2(PartitionCategory, ListAction), }) @@ -882,7 +927,7 @@ func TestMethodGet(t *testing.T) { }) for _, testcase := range queryTestCases { - t.Run("query", func(t *testing.T) { + t.Run(testcase.path, func(t *testing.T) { bodyReader := bytes.NewReader([]byte(`{` + `"collectionName": "` + DefaultCollectionName + `",` + `"partitionName": "` + DefaultPartitionName + `",` + @@ -898,11 +943,9 @@ func TestMethodGet(t *testing.T) { returnBody := &ReturnErrMsg{} err := json.Unmarshal(w.Body.Bytes(), returnBody) assert.Nil(t, err) + assert.Equal(t, testcase.errCode, returnBody.Code) if testcase.errCode != 0 { - assert.Equal(t, testcase.errCode, returnBody.Code) assert.Equal(t, testcase.errMsg, returnBody.Message) - } else { - assert.Equal(t, int32(http.StatusOK), returnBody.Code) } fmt.Println(w.Body.String()) }) @@ -949,7 +992,7 @@ func TestMethodDelete(t *testing.T) { path: versionalV2(AliasCategory, DropAction), }) for _, testcase := range queryTestCases { - t.Run("query", func(t *testing.T) { + t.Run(testcase.path, func(t *testing.T) { bodyReader := bytes.NewReader([]byte(`{"collectionName": "` + DefaultCollectionName + `", "partitionName": "` + DefaultPartitionName + `", "userName": "` + util.UserRoot + `", "roleName": "` + util.RoleAdmin + `", "indexName": "` + DefaultIndexName + `", "aliasName": "` + DefaultAliasName + `"}`)) req := httptest.NewRequest(http.MethodPost, testcase.path, bodyReader) @@ -959,11 +1002,9 @@ func TestMethodDelete(t *testing.T) { returnBody := &ReturnErrMsg{} err := json.Unmarshal(w.Body.Bytes(), returnBody) assert.Nil(t, err) + assert.Equal(t, testcase.errCode, returnBody.Code) if testcase.errCode != 0 { - assert.Equal(t, testcase.errCode, returnBody.Code) assert.Equal(t, testcase.errMsg, returnBody.Message) - } else { - assert.Equal(t, int32(http.StatusOK), returnBody.Code) } fmt.Println(w.Body.String()) }) @@ -1080,7 +1121,7 @@ func TestMethodPost(t *testing.T) { }) for _, testcase := range queryTestCases { - t.Run("query", func(t *testing.T) { + t.Run(testcase.path, func(t *testing.T) { bodyReader := bytes.NewReader([]byte(`{` + `"collectionName": "` + DefaultCollectionName + `", "newCollectionName": "test", "newDbName": "",` + `"partitionName": "` + DefaultPartitionName + `", "partitionNames": ["` + DefaultPartitionName + `"],` + @@ -1099,11 +1140,9 @@ func TestMethodPost(t *testing.T) { returnBody := &ReturnErrMsg{} err := json.Unmarshal(w.Body.Bytes(), returnBody) assert.Nil(t, err) + assert.Equal(t, testcase.errCode, returnBody.Code) if testcase.errCode != 0 { - assert.Equal(t, testcase.errCode, returnBody.Code) assert.Equal(t, testcase.errMsg, returnBody.Message) - } else { - assert.Equal(t, int32(http.StatusOK), returnBody.Code) } fmt.Println(w.Body.String()) }) @@ -1198,7 +1237,7 @@ func TestDML(t *testing.T) { }) for _, testcase := range queryTestCases { - t.Run("query", func(t *testing.T) { + t.Run(testcase.path, func(t *testing.T) { bodyReader := bytes.NewReader(testcase.requestBody) req := httptest.NewRequest(http.MethodPost, versionalV2(EntityCategory, testcase.path), bodyReader) w := httptest.NewRecorder() @@ -1207,11 +1246,51 @@ func TestDML(t *testing.T) { returnBody := &ReturnErrMsg{} err := json.Unmarshal(w.Body.Bytes(), returnBody) assert.Nil(t, err) + assert.Equal(t, testcase.errCode, returnBody.Code) + if testcase.errCode != 0 { + assert.Equal(t, testcase.errMsg, returnBody.Message) + } + fmt.Println(w.Body.String()) + }) + } +} + +func TestAllowInt64(t *testing.T) { + paramtable.Init() + mp := mocks.NewMockProxy(t) + testEngine := initHTTPServerV2(mp, false) + queryTestCases := []requestBodyTestCase{} + queryTestCases = append(queryTestCases, requestBodyTestCase{ + path: InsertAction, + requestBody: []byte(`{"collectionName": "book", "data": [{"book_id": 0, "word_count": 0, "book_intro": [0.11825, 0.6]}]}`), + }) + queryTestCases = append(queryTestCases, requestBodyTestCase{ + path: UpsertAction, + requestBody: []byte(`{"collectionName": "book", "data": [{"book_id": 0, "word_count": 0, "book_intro": [0.11825, 0.6]}]}`), + }) + mp.EXPECT().DescribeCollection(mock.Anything, mock.Anything).Return(&milvuspb.DescribeCollectionResponse{ + CollectionName: DefaultCollectionName, + Schema: generateCollectionSchema(schemapb.DataType_Int64), + ShardsNum: ShardNumDefault, + Status: &StatusSuccess, + }, nil).Twice() + mp.EXPECT().Insert(mock.Anything, mock.Anything).Return(&milvuspb.MutationResult{Status: commonSuccessStatus, InsertCnt: int64(0), IDs: &schemapb.IDs{IdField: &schemapb.IDs_IntId{IntId: &schemapb.LongArray{Data: []int64{}}}}}, nil).Once() + mp.EXPECT().Upsert(mock.Anything, mock.Anything).Return(&milvuspb.MutationResult{Status: commonSuccessStatus, UpsertCnt: int64(0), IDs: &schemapb.IDs{IdField: &schemapb.IDs_IntId{IntId: &schemapb.LongArray{Data: []int64{}}}}}, nil).Once() + + for _, testcase := range queryTestCases { + t.Run(testcase.path, func(t *testing.T) { + bodyReader := bytes.NewReader(testcase.requestBody) + req := httptest.NewRequest(http.MethodPost, versionalV2(EntityCategory, testcase.path), bodyReader) + req.Header.Set(HTTPHeaderAllowInt64, "true") + w := httptest.NewRecorder() + testEngine.ServeHTTP(w, req) + assert.Equal(t, http.StatusOK, w.Code) + returnBody := &ReturnErrMsg{} + err := json.Unmarshal(w.Body.Bytes(), returnBody) + assert.Nil(t, err) + assert.Equal(t, testcase.errCode, returnBody.Code) if testcase.errCode != 0 { - assert.Equal(t, testcase.errCode, returnBody.Code) assert.Equal(t, testcase.errMsg, returnBody.Message) - } else { - assert.Equal(t, int32(http.StatusOK), returnBody.Code) } fmt.Println(w.Body.String()) }) @@ -1220,18 +1299,33 @@ func TestDML(t *testing.T) { func TestSearchV2(t *testing.T) { paramtable.Init() + outputFields := []string{FieldBookID, FieldWordCount, "author", "date"} mp := mocks.NewMockProxy(t) mp.EXPECT().DescribeCollection(mock.Anything, mock.Anything).Return(&milvuspb.DescribeCollectionResponse{ CollectionName: DefaultCollectionName, Schema: generateCollectionSchema(schemapb.DataType_Int64), ShardsNum: ShardNumDefault, Status: &StatusSuccess, - }, nil).Times(10) + }, nil).Times(12) + mp.EXPECT().Search(mock.Anything, mock.Anything).Return(&milvuspb.SearchResults{Status: commonSuccessStatus, Results: &schemapb.SearchResultData{ + TopK: int64(3), + OutputFields: outputFields, + FieldsData: generateFieldData(), + Ids: generateIDs(schemapb.DataType_Int64, 3), + Scores: DefaultScores, + }}, nil).Once() mp.EXPECT().Search(mock.Anything, mock.Anything).Return(&milvuspb.SearchResults{Status: commonSuccessStatus, Results: &schemapb.SearchResultData{TopK: int64(0)}}, nil).Times(3) mp.EXPECT().Search(mock.Anything, mock.Anything).Return(&milvuspb.SearchResults{Status: &commonpb.Status{ ErrorCode: 1700, // ErrFieldNotFound Reason: "groupBy field not found in schema: field not found[field=test]", }}, nil).Once() + mp.EXPECT().HybridSearch(mock.Anything, mock.Anything).Return(&milvuspb.SearchResults{Status: commonSuccessStatus, Results: &schemapb.SearchResultData{ + TopK: int64(3), + OutputFields: outputFields, + FieldsData: generateFieldData(), + Ids: generateIDs(schemapb.DataType_Int64, 3), + Scores: DefaultScores, + }}, nil).Once() mp.EXPECT().HybridSearch(mock.Anything, mock.Anything).Return(&milvuspb.SearchResults{Status: commonSuccessStatus, Results: &schemapb.SearchResultData{TopK: int64(0)}}, nil).Times(3) collSchema := generateCollectionSchema(schemapb.DataType_Int64) binaryVectorField := generateVectorFieldSchema(schemapb.DataType_BinaryVector) @@ -1240,22 +1334,29 @@ func TestSearchV2(t *testing.T) { float16VectorField.Name = "float16Vector" bfloat16VectorField := generateVectorFieldSchema(schemapb.DataType_BFloat16Vector) bfloat16VectorField.Name = "bfloat16Vector" + sparseFloatVectorField := generateVectorFieldSchema(schemapb.DataType_SparseFloatVector) + sparseFloatVectorField.Name = "sparseFloatVector" collSchema.Fields = append(collSchema.Fields, &binaryVectorField) collSchema.Fields = append(collSchema.Fields, &float16VectorField) collSchema.Fields = append(collSchema.Fields, &bfloat16VectorField) + collSchema.Fields = append(collSchema.Fields, &sparseFloatVectorField) mp.EXPECT().DescribeCollection(mock.Anything, mock.Anything).Return(&milvuspb.DescribeCollectionResponse{ CollectionName: DefaultCollectionName, Schema: collSchema, ShardsNum: ShardNumDefault, Status: &StatusSuccess, - }, nil).Times(9) - mp.EXPECT().Search(mock.Anything, mock.Anything).Return(&milvuspb.SearchResults{Status: commonSuccessStatus, Results: &schemapb.SearchResultData{TopK: int64(0)}}, nil).Twice() + }, nil).Times(10) + mp.EXPECT().Search(mock.Anything, mock.Anything).Return(&milvuspb.SearchResults{Status: commonSuccessStatus, Results: &schemapb.SearchResultData{TopK: int64(0)}}, nil).Times(3) testEngine := initHTTPServerV2(mp, false) queryTestCases := []requestBodyTestCase{} queryTestCases = append(queryTestCases, requestBodyTestCase{ path: SearchAction, requestBody: []byte(`{"collectionName": "book", "data": [[0.1, 0.2]], "filter": "book_id in [2, 4, 6, 8]", "limit": 4, "outputFields": ["word_count"]}`), }) + queryTestCases = append(queryTestCases, requestBodyTestCase{ + path: SearchAction, + requestBody: []byte(`{"collectionName": "book", "data": [[0.1, 0.2]], "filter": "book_id in [2, 4, 6, 8]", "limit": 4, "outputFields": ["word_count"]}`), + }) queryTestCases = append(queryTestCases, requestBodyTestCase{ path: SearchAction, requestBody: []byte(`{"collectionName": "book", "data": [[0.1, 0.2]], "filter": "book_id in [2, 4, 6, 8]", "limit": 4, "outputFields": ["word_count"], "params": {"radius":0.9}}`), @@ -1286,6 +1387,10 @@ func TestSearchV2(t *testing.T) { path: AdvancedSearchAction, requestBody: []byte(`{"collectionName": "hello_milvus", "search": [{"data": [[0.1, 0.2]], "annsField": "book_intro", "metricType": "L2", "limit": 3}, {"data": [[0.1, 0.2]], "annsField": "book_intro", "metricType": "L2", "limit": 3}], "rerank": {"strategy": "weighted", "params": {"weights": [0.9, 0.8]}}}`), }) + queryTestCases = append(queryTestCases, requestBodyTestCase{ + path: AdvancedSearchAction, + requestBody: []byte(`{"collectionName": "hello_milvus", "search": [{"data": [[0.1, 0.2]], "annsField": "book_intro", "metricType": "L2", "limit": 3}, {"data": [[0.1, 0.2]], "annsField": "book_intro", "metricType": "L2", "limit": 3}], "rerank": {"strategy": "weighted", "params": {"weights": [0.9, 0.8]}}}`), + }) queryTestCases = append(queryTestCases, requestBodyTestCase{ path: HybridSearchAction, requestBody: []byte(`{"collectionName": "hello_milvus", "search": [{"data": [[0.1, 0.2]], "annsField": "book_intro", "metricType": "L2", "limit": 3}, {"data": [[0.1, 0.2]], "annsField": "book_intro", "metricType": "L2", "limit": 3}], "rerank": {"strategy": "weighted", "params": {"weights": [0.9, 0.8]}}}`), @@ -1377,9 +1482,13 @@ func TestSearchV2(t *testing.T) { errMsg: "can only accept json format request, error: dimension: 2, bytesLen: 4, but length of []byte: 3: invalid parameter[expected=BFloat16Vector][actual=\x01\x02\x03]", errCode: 1801, }) + queryTestCases = append(queryTestCases, requestBodyTestCase{ + path: SearchAction, + requestBody: []byte(`{"collectionName": "book", "data": [{"1": 0.1}], "annsField": "sparseFloatVector", "filter": "book_id in [2, 4, 6, 8]", "limit": 4, "outputFields": ["word_count"]}`), + }) for _, testcase := range queryTestCases { - t.Run("search", func(t *testing.T) { + t.Run(testcase.path, func(t *testing.T) { bodyReader := bytes.NewReader(testcase.requestBody) req := httptest.NewRequest(http.MethodPost, versionalV2(EntityCategory, testcase.path), bodyReader) w := httptest.NewRecorder() @@ -1388,11 +1497,9 @@ func TestSearchV2(t *testing.T) { returnBody := &ReturnErrMsg{} err := json.Unmarshal(w.Body.Bytes(), returnBody) assert.Nil(t, err) + assert.Equal(t, testcase.errCode, returnBody.Code) if testcase.errCode != 0 { - assert.Equal(t, testcase.errCode, returnBody.Code) assert.Equal(t, testcase.errMsg, returnBody.Message) - } else { - assert.Equal(t, int32(http.StatusOK), returnBody.Code) } fmt.Println(w.Body.String()) }) diff --git a/internal/distributed/proxy/httpserver/request_v2.go b/internal/distributed/proxy/httpserver/request_v2.go index aa32cfcb6c29e..f5fe86a69e4d3 100644 --- a/internal/distributed/proxy/httpserver/request_v2.go +++ b/internal/distributed/proxy/httpserver/request_v2.go @@ -1,7 +1,6 @@ package httpserver import ( - "net/http" "strconv" "github.com/gin-gonic/gin" @@ -248,9 +247,12 @@ type UserRoleReq struct { } type RoleReq struct { + DbName string `json:"dbName"` RoleName string `json:"roleName" binding:"required"` } +func (req *RoleReq) GetDbName() string { return req.DbName } + func (req *RoleReq) GetRoleName() string { return req.RoleName } @@ -263,6 +265,8 @@ type GrantReq struct { DbName string `json:"dbName"` } +func (req *GrantReq) GetDbName() string { return req.DbName } + type IndexParam struct { FieldName string `json:"fieldName" binding:"required"` IndexName string `json:"indexName" binding:"required"` @@ -352,14 +356,14 @@ func (req *AliasCollectionReq) GetAliasName() string { } func wrapperReturnHas(has bool) gin.H { - return gin.H{HTTPReturnCode: http.StatusOK, HTTPReturnData: gin.H{HTTPReturnHas: has}} + return gin.H{HTTPReturnCode: commonpb.ErrorCode_Success, HTTPReturnData: gin.H{HTTPReturnHas: has}} } func wrapperReturnList(names []string) gin.H { if names == nil { - return gin.H{HTTPReturnCode: http.StatusOK, HTTPReturnData: []string{}} + return gin.H{HTTPReturnCode: commonpb.ErrorCode_Success, HTTPReturnData: []string{}} } - return gin.H{HTTPReturnCode: http.StatusOK, HTTPReturnData: names} + return gin.H{HTTPReturnCode: commonpb.ErrorCode_Success, HTTPReturnData: names} } func wrapperReturnRowCount(pairs []*commonpb.KeyValuePair) gin.H { @@ -371,15 +375,15 @@ func wrapperReturnRowCount(pairs []*commonpb.KeyValuePair) gin.H { } rowCount, err := strconv.ParseInt(rowCountValue, 10, 64) if err != nil { - return gin.H{HTTPReturnCode: http.StatusOK, HTTPReturnData: gin.H{HTTPReturnRowCount: rowCountValue}} + return gin.H{HTTPReturnCode: commonpb.ErrorCode_Success, HTTPReturnData: gin.H{HTTPReturnRowCount: rowCountValue}} } - return gin.H{HTTPReturnCode: http.StatusOK, HTTPReturnData: gin.H{HTTPReturnRowCount: rowCount}} + return gin.H{HTTPReturnCode: commonpb.ErrorCode_Success, HTTPReturnData: gin.H{HTTPReturnRowCount: rowCount}} } func wrapperReturnDefault() gin.H { - return gin.H{HTTPReturnCode: http.StatusOK, HTTPReturnData: gin.H{}} + return gin.H{HTTPReturnCode: commonpb.ErrorCode_Success, HTTPReturnData: gin.H{}} } func wrapperReturnDefaultWithCost(cost int) gin.H { - return gin.H{HTTPReturnCode: http.StatusOK, HTTPReturnData: gin.H{}, HTTPReturnCost: cost} + return gin.H{HTTPReturnCode: commonpb.ErrorCode_Success, HTTPReturnData: gin.H{}, HTTPReturnCost: cost} } diff --git a/internal/distributed/proxy/httpserver/utils.go b/internal/distributed/proxy/httpserver/utils.go index c57e98d4eba85..7a0ce94af7a38 100644 --- a/internal/distributed/proxy/httpserver/utils.go +++ b/internal/distributed/proxy/httpserver/utils.go @@ -28,6 +28,22 @@ import ( "github.com/milvus-io/milvus/pkg/util/typeutil" ) +func HTTPReturn(c *gin.Context, code int, result gin.H) { + c.Set(HTTPReturnCode, result[HTTPReturnCode]) + if errorMsg, ok := result[HTTPReturnMessage]; ok { + c.Set(HTTPReturnMessage, errorMsg) + } + c.JSON(code, result) +} + +func HTTPAbortReturn(c *gin.Context, code int, result gin.H) { + c.Set(HTTPReturnCode, result[HTTPReturnCode]) + if errorMsg, ok := result[HTTPReturnMessage]; ok { + c.Set(HTTPReturnMessage, errorMsg) + } + c.AbortWithStatusJSON(code, result) +} + func ParseUsernamePassword(c *gin.Context) (string, string, bool) { username, password, ok := c.Request.BasicAuth() if !ok { @@ -248,6 +264,15 @@ func checkAndSetData(body string, collSchema *schemapb.CollectionSchema) (error, return merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(fieldType)], dataString, err.Error()), reallyDataArray } reallyData[fieldName] = vectorArray + case schemapb.DataType_SparseFloatVector: + if dataString == "" { + return merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(fieldType)], "", "missing vector field: "+fieldName), reallyDataArray + } + sparseVec, err := typeutil.CreateSparseFloatRowFromJSON([]byte(dataString)) + if err != nil { + return merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(fieldType)], dataString, err.Error()), reallyDataArray + } + reallyData[fieldName] = sparseVec case schemapb.DataType_Float16Vector: if dataString == "" { return merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(fieldType)], "", "missing vector field: "+fieldName), reallyDataArray @@ -638,6 +663,9 @@ func anyToColumns(rows []map[string]interface{}, sch *schemapb.CollectionSchema) data = make([][]byte, 0, rowsLen) dim, _ := getDim(field) nameDims[field.Name] = dim + case schemapb.DataType_SparseFloatVector: + data = make([][]byte, 0, rowsLen) + nameDims[field.Name] = int64(0) default: return nil, fmt.Errorf("the type(%v) of field(%v) is not supported, use other sdk please", field.DataType, field.Name) } @@ -704,6 +732,13 @@ func anyToColumns(rows []map[string]interface{}, sch *schemapb.CollectionSchema) nameColumns[field.Name] = append(nameColumns[field.Name].([][]byte), candi.v.Interface().([]byte)) case schemapb.DataType_BFloat16Vector: nameColumns[field.Name] = append(nameColumns[field.Name].([][]byte), candi.v.Interface().([]byte)) + case schemapb.DataType_SparseFloatVector: + content := candi.v.Interface().([]byte) + rowSparseDim := typeutil.SparseFloatRowDim(content) + if rowSparseDim > nameDims[field.Name] { + nameDims[field.Name] = rowSparseDim + } + nameColumns[field.Name] = append(nameColumns[field.Name].([][]byte), content) default: return nil, fmt.Errorf("the type(%v) of field(%v) is not supported, use other sdk please", field.DataType, field.Name) } @@ -895,6 +930,18 @@ func anyToColumns(rows []map[string]interface{}, sch *schemapb.CollectionSchema) }, }, } + case schemapb.DataType_SparseFloatVector: + colData.Field = &schemapb.FieldData_Vectors{ + Vectors: &schemapb.VectorField{ + Dim: nameDims[name], + Data: &schemapb.VectorField_SparseFloatVector{ + SparseFloatVector: &schemapb.SparseFloatArray{ + Dim: nameDims[name], + Contents: column.([][]byte), + }, + }, + }, + } default: return nil, fmt.Errorf("the type(%v) of field(%v) is not supported, use other sdk please", colData.Type, name) } @@ -963,6 +1010,19 @@ func serializeByteVectors(vectorStr string, dataType schemapb.DataType, dimensio return values, nil } +func serializeSparseFloatVectors(vectors []gjson.Result, dataType schemapb.DataType) ([][]byte, error) { + values := make([][]byte, 0) + for _, vector := range vectors { + vectorBytes := []byte(vector.String()) + sparseVector, err := typeutil.CreateSparseFloatRowFromJSON(vectorBytes) + if err != nil { + return nil, merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(dataType)], vector.String(), err.Error()) + } + values = append(values, sparseVector) + } + return values, nil +} + func convertVectors2Placeholder(body string, dataType schemapb.DataType, dimension int64) (*commonpb.PlaceholderValue, error) { var valueType commonpb.PlaceholderType var values [][]byte @@ -980,6 +1040,9 @@ func convertVectors2Placeholder(body string, dataType schemapb.DataType, dimensi case schemapb.DataType_BFloat16Vector: valueType = commonpb.PlaceholderType_BFloat16Vector values, err = serializeByteVectors(gjson.Get(body, HTTPRequestData).Raw, dataType, dimension, dimension*2) + case schemapb.DataType_SparseFloatVector: + valueType = commonpb.PlaceholderType_SparseFloatVector + values, err = serializeSparseFloatVectors(gjson.Get(body, HTTPRequestData).Array(), dataType) } if err != nil { return nil, err @@ -1037,7 +1100,7 @@ func buildQueryResp(rowsNum int64, needFields []string, fieldDataList []*schemap var queryResp []map[string]interface{} columnNum := len(fieldDataList) - if rowsNum == int64(0) { + if rowsNum == int64(0) { // always if columnNum > 0 { switch fieldDataList[0].Type { case schemapb.DataType_Bool: @@ -1070,6 +1133,8 @@ func buildQueryResp(rowsNum int64, needFields []string, fieldDataList []*schemap rowsNum = int64(len(fieldDataList[0].GetVectors().GetFloat16Vector())/2) / fieldDataList[0].GetVectors().GetDim() case schemapb.DataType_BFloat16Vector: rowsNum = int64(len(fieldDataList[0].GetVectors().GetBfloat16Vector())/2) / fieldDataList[0].GetVectors().GetDim() + case schemapb.DataType_SparseFloatVector: + rowsNum = int64(len(fieldDataList[0].GetVectors().GetSparseFloatVector().Contents)) default: return nil, fmt.Errorf("the type(%v) of field(%v) is not supported, use other sdk please", fieldDataList[0].Type, fieldDataList[0].FieldName) } @@ -1125,6 +1190,8 @@ func buildQueryResp(rowsNum int64, needFields []string, fieldDataList []*schemap row[fieldDataList[j].FieldName] = fieldDataList[j].GetVectors().GetFloat16Vector()[i*(fieldDataList[j].GetVectors().GetDim()*2) : (i+1)*(fieldDataList[j].GetVectors().GetDim()*2)] case schemapb.DataType_BFloat16Vector: row[fieldDataList[j].FieldName] = fieldDataList[j].GetVectors().GetBfloat16Vector()[i*(fieldDataList[j].GetVectors().GetDim()*2) : (i+1)*(fieldDataList[j].GetVectors().GetDim()*2)] + case schemapb.DataType_SparseFloatVector: + row[fieldDataList[j].FieldName] = typeutil.SparseFloatBytesToMap(fieldDataList[j].GetVectors().GetSparseFloatVector().Contents[i]) case schemapb.DataType_Array: row[fieldDataList[j].FieldName] = fieldDataList[j].GetScalars().GetArrayData().Data[i] case schemapb.DataType_JSON: diff --git a/internal/distributed/proxy/httpserver/utils_test.go b/internal/distributed/proxy/httpserver/utils_test.go index 945783c335cb7..f860bb37fb125 100644 --- a/internal/distributed/proxy/httpserver/utils_test.go +++ b/internal/distributed/proxy/httpserver/utils_test.go @@ -16,6 +16,7 @@ import ( "github.com/milvus-io/milvus-proto/go-api/v2/milvuspb" "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" "github.com/milvus-io/milvus/pkg/common" + "github.com/milvus-io/milvus/pkg/util/typeutil" ) const ( @@ -178,21 +179,45 @@ func generateVectorFieldData(vectorType schemapb.DataType) schemapb.FieldData { }, IsDynamic: false, } - } - return schemapb.FieldData{ - Type: schemapb.DataType_FloatVector, - FieldName: FieldBookIntro, - Field: &schemapb.FieldData_Vectors{ - Vectors: &schemapb.VectorField{ - Dim: 2, - Data: &schemapb.VectorField_FloatVector{ - FloatVector: &schemapb.FloatArray{ - Data: []float32{0.1, 0.11, 0.2, 0.22, 0.3, 0.33}, + case schemapb.DataType_FloatVector: + return schemapb.FieldData{ + Type: schemapb.DataType_FloatVector, + FieldName: FieldBookIntro, + Field: &schemapb.FieldData_Vectors{ + Vectors: &schemapb.VectorField{ + Dim: 2, + Data: &schemapb.VectorField_FloatVector{ + FloatVector: &schemapb.FloatArray{ + Data: []float32{0.1, 0.11, 0.2, 0.22, 0.3, 0.33}, + }, }, }, }, - }, - IsDynamic: false, + IsDynamic: false, + } + case schemapb.DataType_SparseFloatVector: + contents := make([][]byte, 0, 3) + contents = append(contents, typeutil.CreateSparseFloatRow([]uint32{1, 2, 3}, []float32{0.1, 0.11, 0.2})) + contents = append(contents, typeutil.CreateSparseFloatRow([]uint32{100, 200, 300}, []float32{10.1, 20.11, 30.2})) + contents = append(contents, typeutil.CreateSparseFloatRow([]uint32{1000, 2000, 3000}, []float32{5000.1, 7000.11, 9000.2})) + return schemapb.FieldData{ + Type: schemapb.DataType_SparseFloatVector, + FieldName: FieldBookIntro, + Field: &schemapb.FieldData_Vectors{ + Vectors: &schemapb.VectorField{ + Dim: int64(3001), + Data: &schemapb.VectorField_SparseFloatVector{ + SparseFloatVector: &schemapb.SparseFloatArray{ + Dim: int64(3001), + Contents: contents, + }, + }, + }, + }, + IsDynamic: false, + } + default: + panic("unsupported vector type") } } @@ -1005,7 +1030,7 @@ func newFieldData(fieldDatas []*schemapb.FieldData, firstFieldType schemapb.Data switch firstFieldType { case schemapb.DataType_None: - break + return fieldDatas case schemapb.DataType_Bool: return []*schemapb.FieldData{&fieldData1} case schemapb.DataType_Int8: @@ -1038,6 +1063,9 @@ func newFieldData(fieldDatas []*schemapb.FieldData, firstFieldType schemapb.Data return []*schemapb.FieldData{&fieldData10} case schemapb.DataType_JSON: return []*schemapb.FieldData{&fieldData9} + case schemapb.DataType_SparseFloatVector: + vectorField := generateVectorFieldData(firstFieldType) + return []*schemapb.FieldData{&vectorField} default: return []*schemapb.FieldData{ { @@ -1046,8 +1074,6 @@ func newFieldData(fieldDatas []*schemapb.FieldData, firstFieldType schemapb.Data }, } } - - return fieldDatas } func newSearchResult(results []map[string]interface{}) []map[string]interface{} { @@ -1225,26 +1251,30 @@ func TestVector(t *testing.T) { binaryVector := "vector-binary" float16Vector := "vector-float16" bfloat16Vector := "vector-bfloat16" + sparseFloatVector := "vector-sparse-float" row1 := map[string]interface{}{ - FieldBookID: int64(1), - floatVector: []float32{0.1, 0.11}, - binaryVector: []byte{1}, - float16Vector: []byte{1, 1, 11, 11}, - bfloat16Vector: []byte{1, 1, 11, 11}, + FieldBookID: int64(1), + floatVector: []float32{0.1, 0.11}, + binaryVector: []byte{1}, + float16Vector: []byte{1, 1, 11, 11}, + bfloat16Vector: []byte{1, 1, 11, 11}, + sparseFloatVector: map[uint32]float32{0: 0.1, 1: 0.11}, } row2 := map[string]interface{}{ - FieldBookID: int64(2), - floatVector: []float32{0.2, 0.22}, - binaryVector: []byte{2}, - float16Vector: []byte{2, 2, 22, 22}, - bfloat16Vector: []byte{2, 2, 22, 22}, + FieldBookID: int64(2), + floatVector: []float32{0.2, 0.22}, + binaryVector: []byte{2}, + float16Vector: []byte{2, 2, 22, 22}, + bfloat16Vector: []byte{2, 2, 22, 22}, + sparseFloatVector: map[uint32]float32{1000: 0.3, 200: 0.44}, } row3 := map[string]interface{}{ - FieldBookID: int64(3), - floatVector: []float32{0.3, 0.33}, - binaryVector: []byte{3}, - float16Vector: []byte{3, 3, 33, 33}, - bfloat16Vector: []byte{3, 3, 33, 33}, + FieldBookID: int64(3), + floatVector: []float32{0.3, 0.33}, + binaryVector: []byte{3}, + float16Vector: []byte{3, 3, 33, 33}, + bfloat16Vector: []byte{3, 3, 33, 33}, + sparseFloatVector: map[uint32]float32{987621: 32190.31, 32189: 0.0001}, } body, _ := wrapRequestBody([]map[string]interface{}{row1, row2, row3}) primaryField := generatePrimaryField(schemapb.DataType_Int64) @@ -1256,12 +1286,14 @@ func TestVector(t *testing.T) { float16VectorField.Name = float16Vector bfloat16VectorField := generateVectorFieldSchema(schemapb.DataType_BFloat16Vector) bfloat16VectorField.Name = bfloat16Vector + sparseFloatVectorField := generateVectorFieldSchema(schemapb.DataType_SparseFloatVector) + sparseFloatVectorField.Name = sparseFloatVector collectionSchema := &schemapb.CollectionSchema{ Name: DefaultCollectionName, Description: "", AutoID: false, Fields: []*schemapb.FieldSchema{ - &primaryField, &floatVectorField, &binaryVectorField, &float16VectorField, &bfloat16VectorField, + &primaryField, &floatVectorField, &binaryVectorField, &float16VectorField, &bfloat16VectorField, &sparseFloatVectorField, }, EnableDynamicField: true, } @@ -1271,27 +1303,29 @@ func TestVector(t *testing.T) { assert.Equal(t, 1, len(row[binaryVector].([]byte))) assert.Equal(t, 4, len(row[float16Vector].([]byte))) assert.Equal(t, 4, len(row[bfloat16Vector].([]byte))) + // all test sparse rows have 2 elements, each should be of 8 bytes + assert.Equal(t, 16, len(row[sparseFloatVector].([]byte))) } data, err := anyToColumns(rows, collectionSchema) assert.Equal(t, nil, err) assert.Equal(t, len(collectionSchema.Fields)+1, len(data)) - row1[bfloat16Vector] = []int64{99999999, -99999999} - body, _ = wrapRequestBody([]map[string]interface{}{row1}) - err, _ = checkAndSetData(string(body), collectionSchema) - assert.Error(t, err) - row1[float16Vector] = []int64{99999999, -99999999} - body, _ = wrapRequestBody([]map[string]interface{}{row1}) - err, _ = checkAndSetData(string(body), collectionSchema) - assert.Error(t, err) - row1[binaryVector] = []int64{99999999, -99999999} - body, _ = wrapRequestBody([]map[string]interface{}{row1}) - err, _ = checkAndSetData(string(body), collectionSchema) - assert.Error(t, err) - row1[floatVector] = []float64{math.MaxFloat64, 0} - body, _ = wrapRequestBody([]map[string]interface{}{row1}) - err, _ = checkAndSetData(string(body), collectionSchema) - assert.Error(t, err) + assertError := func(field string, value interface{}) { + row := make(map[string]interface{}) + for k, v := range row1 { + row[k] = v + } + row[field] = value + body, _ = wrapRequestBody([]map[string]interface{}{row}) + err, _ = checkAndSetData(string(body), collectionSchema) + assert.Error(t, err) + } + + assertError(bfloat16Vector, []int64{99999999, -99999999}) + assertError(float16Vector, []int64{99999999, -99999999}) + assertError(binaryVector, []int64{99999999, -99999999}) + assertError(floatVector, []float64{math.MaxFloat64, 0}) + assertError(sparseFloatVector, map[uint32]float32{0: -0.1, 1: 0.11, 2: 0.12}) } func TestBuildQueryResps(t *testing.T) { @@ -1305,7 +1339,7 @@ func TestBuildQueryResps(t *testing.T) { } dataTypes := []schemapb.DataType{ - schemapb.DataType_FloatVector, schemapb.DataType_BinaryVector, schemapb.DataType_Float16Vector, schemapb.DataType_BFloat16Vector, + schemapb.DataType_FloatVector, schemapb.DataType_BinaryVector, schemapb.DataType_Float16Vector, schemapb.DataType_BFloat16Vector, schemapb.DataType_SparseFloatVector, schemapb.DataType_Bool, schemapb.DataType_Int8, schemapb.DataType_Int16, schemapb.DataType_Int32, schemapb.DataType_Float, schemapb.DataType_Double, schemapb.DataType_String, schemapb.DataType_VarChar, diff --git a/internal/distributed/proxy/httpserver/wrap_request.go b/internal/distributed/proxy/httpserver/wrap_request.go index a8f5eec8b98e1..79d2f0dfa80c4 100644 --- a/internal/distributed/proxy/httpserver/wrap_request.go +++ b/internal/distributed/proxy/httpserver/wrap_request.go @@ -12,6 +12,7 @@ import ( "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" "github.com/milvus-io/milvus-proto/go-api/v2/milvuspb" "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" + "github.com/milvus-io/milvus/pkg/util/typeutil" ) // We wrap original protobuf structure for 2 reasons: @@ -212,6 +213,40 @@ func (f *FieldData) AsSchemapb() (*schemapb.FieldData, error) { }, }, } + case schemapb.DataType_SparseFloatVector: + var wrappedData []map[string]interface{} + err := json.Unmarshal(raw, &wrappedData) + if err != nil { + return nil, newFieldDataError(f.FieldName, err) + } + if len(wrappedData) < 1 { + return nil, errors.New("at least one row for insert") + } + data := make([][]byte, len(wrappedData)) + dim := int64(0) + for _, row := range wrappedData { + rowData, err := typeutil.CreateSparseFloatRowFromMap(row) + if err != nil { + return nil, newFieldDataError(f.FieldName, err) + } + data = append(data, rowData) + rowDim := typeutil.SparseFloatRowDim(rowData) + if rowDim > dim { + dim = rowDim + } + } + + ret.Field = &schemapb.FieldData_Vectors{ + Vectors: &schemapb.VectorField{ + Dim: dim, + Data: &schemapb.VectorField_SparseFloatVector{ + SparseFloatVector: &schemapb.SparseFloatArray{ + Dim: dim, + Contents: data, + }, + }, + }, + } default: return nil, errors.New("unsupported data type") } diff --git a/internal/distributed/proxy/httpserver/wrap_request_test.go b/internal/distributed/proxy/httpserver/wrap_request_test.go index defddf831a2c7..4d673fb6bd0ff 100644 --- a/internal/distributed/proxy/httpserver/wrap_request_test.go +++ b/internal/distributed/proxy/httpserver/wrap_request_test.go @@ -219,6 +219,101 @@ func TestFieldData_AsSchemapb(t *testing.T) { _, err := fieldData.AsSchemapb() assert.Error(t, err) }) + + t.Run("sparsefloatvector_ok_1", func(t *testing.T) { + fieldData := FieldData{ + Type: schemapb.DataType_SparseFloatVector, + Field: []byte(`[ + {"1": 0.1, "2": 0.2}, + {"3": 0.1, "5": 0.2}, + {"4": 0.1, "6": 0.2} + ]`), + } + raw, _ := json.Marshal(fieldData) + json.Unmarshal(raw, &fieldData) + _, err := fieldData.AsSchemapb() + assert.NoError(t, err) + }) + + t.Run("sparsefloatvector_ok_2", func(t *testing.T) { + fieldData := FieldData{ + Type: schemapb.DataType_SparseFloatVector, + Field: []byte(`[ + {"indices": [1, 2], "values": [0.1, 0.2]}, + {"indices": [3, 5], "values": [0.1, 0.2]}, + {"indices": [4, 6], "values": [0.1, 0.2]} + ]`), + } + raw, _ := json.Marshal(fieldData) + json.Unmarshal(raw, &fieldData) + _, err := fieldData.AsSchemapb() + assert.NoError(t, err) + }) + + t.Run("sparsefloatvector_ok_3", func(t *testing.T) { + fieldData := FieldData{ + Type: schemapb.DataType_SparseFloatVector, + Field: []byte(`[ + {"indices": [1, 2], "values": [0.1, 0.2]}, + {"3": 0.1, "5": 0.2}, + {"indices": [4, 6], "values": [0.1, 0.2]} + ]`), + } + raw, _ := json.Marshal(fieldData) + json.Unmarshal(raw, &fieldData) + _, err := fieldData.AsSchemapb() + assert.NoError(t, err) + }) + + t.Run("sparsefloatvector_empty_err", func(t *testing.T) { + fieldData := FieldData{ + Type: schemapb.DataType_SparseFloatVector, + Field: []byte(`[]`), + } + raw, _ := json.Marshal(fieldData) + json.Unmarshal(raw, &fieldData) + _, err := fieldData.AsSchemapb() + assert.Error(t, err) + }) + + t.Run("sparsefloatvector_invalid_json_err", func(t *testing.T) { + fieldData := FieldData{ + Type: schemapb.DataType_SparseFloatVector, + Field: []byte(`[ + {"3": 0.1, : 0.2} + ]`), + } + raw, _ := json.Marshal(fieldData) + json.Unmarshal(raw, &fieldData) + _, err := fieldData.AsSchemapb() + assert.Error(t, err) + }) + + t.Run("sparsefloatvector_invalid_row_1_err", func(t *testing.T) { + fieldData := FieldData{ + Type: schemapb.DataType_SparseFloatVector, + Field: []byte(`[ + {"indices": [1, 2], "values": [-0.1, 0.2]}, + ]`), + } + raw, _ := json.Marshal(fieldData) + json.Unmarshal(raw, &fieldData) + _, err := fieldData.AsSchemapb() + assert.Error(t, err) + }) + + t.Run("sparsefloatvector_invalid_row_2_err", func(t *testing.T) { + fieldData := FieldData{ + Type: schemapb.DataType_SparseFloatVector, + Field: []byte(`[ + {"indices": [1, -2], "values": [0.1, 0.2]}, + ]`), + } + raw, _ := json.Marshal(fieldData) + json.Unmarshal(raw, &fieldData) + _, err := fieldData.AsSchemapb() + assert.Error(t, err) + }) } func Test_vector2Bytes(t *testing.T) { diff --git a/internal/distributed/proxy/service.go b/internal/distributed/proxy/service.go index bb902bc3280ae..faae5ed75064f 100644 --- a/internal/distributed/proxy/service.go +++ b/internal/distributed/proxy/service.go @@ -172,6 +172,8 @@ func (s *Server) registerHTTPServer() { func (s *Server) startHTTPServer(errChan chan error) { defer s.wg.Done() ginHandler := gin.New() + ginHandler.Use(accesslog.AccessLogMiddleware) + ginLogger := gin.LoggerWithConfig(gin.LoggerConfig{ SkipPaths: proxy.Params.ProxyCfg.GinLogSkipPaths.GetAsStrings(), Formatter: func(param gin.LogFormatterParams) string { @@ -182,6 +184,8 @@ func (s *Server) startHTTPServer(errChan chan error) { if !ok { traceID = "" } + + accesslog.SetHTTPParams(¶m) return fmt.Sprintf("[%v] [GIN] [%s] [traceID=%s] [code=%3d] [latency=%v] [client=%s] [method=%s] [error=%s]\n", param.TimeStamp.Format("2006/01/02 15:04:05.000 Z07:00"), param.Path, diff --git a/internal/indexnode/indexnode_service.go b/internal/indexnode/indexnode_service.go index a690e35e4a10a..fb9d5a0cc19a1 100644 --- a/internal/indexnode/indexnode_service.go +++ b/internal/indexnode/indexnode_service.go @@ -55,6 +55,8 @@ func (i *IndexNode) CreateJob(ctx context.Context, req *indexpb.CreateJobRequest defer i.lifetime.Done() log.Info("IndexNode building index ...", zap.Int64("collectionID", req.GetCollectionID()), + zap.Int64("partitionID", req.GetPartitionID()), + zap.Int64("segmentID", req.GetSegmentID()), zap.Int64("indexID", req.GetIndexID()), zap.String("indexName", req.GetIndexName()), zap.String("indexFilePrefix", req.GetIndexFilePrefix()), diff --git a/internal/indexnode/task.go b/internal/indexnode/task.go index b14343900d99c..54c8b3fe45a66 100644 --- a/internal/indexnode/task.go +++ b/internal/indexnode/task.go @@ -18,7 +18,6 @@ package indexnode import ( "context" - "encoding/json" "fmt" "runtime/debug" "strconv" @@ -30,6 +29,7 @@ import ( "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" + "github.com/milvus-io/milvus/internal/proto/indexcgopb" "github.com/milvus-io/milvus/internal/proto/indexpb" "github.com/milvus-io/milvus/internal/storage" "github.com/milvus-io/milvus/internal/util/indexcgowrapper" @@ -84,12 +84,21 @@ type indexBuildTaskV2 struct { } func (it *indexBuildTaskV2) parseParams(ctx context.Context) error { - it.collectionID = it.req.CollectionID - it.partitionID = it.req.PartitionID - it.segmentID = it.req.SegmentID - it.fieldType = it.req.FieldType - it.fieldID = it.req.FieldID - it.fieldName = it.req.FieldName + it.collectionID = it.req.GetCollectionID() + it.partitionID = it.req.GetPartitionID() + it.segmentID = it.req.GetSegmentID() + it.fieldType = it.req.GetFieldType() + if it.fieldType == schemapb.DataType_None { + it.fieldType = it.req.GetField().GetDataType() + } + it.fieldID = it.req.GetFieldID() + if it.fieldID == 0 { + it.fieldID = it.req.GetField().GetFieldID() + } + it.fieldName = it.req.GetFieldName() + if it.fieldName == "" { + it.fieldName = it.req.GetField().GetName() + } return nil } @@ -138,61 +147,66 @@ func (it *indexBuildTaskV2) BuildIndex(ctx context.Context) error { } } - var buildIndexInfo *indexcgowrapper.BuildIndexInfo - buildIndexInfo, err = indexcgowrapper.NewBuildIndexInfo(it.req.GetStorageConfig()) - defer indexcgowrapper.DeleteBuildIndexInfo(buildIndexInfo) - if err != nil { - log.Ctx(ctx).Warn("create build index info failed", zap.Error(err)) - return err - } - err = buildIndexInfo.AppendFieldMetaInfoV2(it.collectionID, it.partitionID, it.segmentID, it.fieldID, it.fieldType, it.fieldName, it.req.Dim) - if err != nil { - log.Ctx(ctx).Warn("append field meta failed", zap.Error(err)) - return err - } - - err = buildIndexInfo.AppendIndexMetaInfo(it.req.IndexID, it.req.BuildID, it.req.IndexVersion) - if err != nil { - log.Ctx(ctx).Warn("append index meta failed", zap.Error(err)) - return err - } - - err = buildIndexInfo.AppendBuildIndexParam(it.newIndexParams) - if err != nil { - log.Ctx(ctx).Warn("append index params failed", zap.Error(err)) - return err - } - - err = buildIndexInfo.AppendIndexStorageInfo(it.req.StorePath, it.req.IndexStorePath, it.req.StoreVersion) - if err != nil { - log.Ctx(ctx).Warn("append storage info failed", zap.Error(err)) - return err - } - - jsonIndexParams, err := json.Marshal(it.newIndexParams) - if err != nil { - log.Ctx(ctx).Error("failed to json marshal index params", zap.Error(err)) - return err - } - - log.Ctx(ctx).Info("index params are ready", - zap.Int64("buildID", it.BuildID), - zap.String("index params", string(jsonIndexParams))) - - err = buildIndexInfo.AppendBuildTypeParam(it.newTypeParams) - if err != nil { - log.Ctx(ctx).Warn("append type params failed", zap.Error(err)) - return err + storageConfig := &indexcgopb.StorageConfig{ + Address: it.req.GetStorageConfig().GetAddress(), + AccessKeyID: it.req.GetStorageConfig().GetAccessKeyID(), + SecretAccessKey: it.req.GetStorageConfig().GetSecretAccessKey(), + UseSSL: it.req.GetStorageConfig().GetUseSSL(), + BucketName: it.req.GetStorageConfig().GetBucketName(), + RootPath: it.req.GetStorageConfig().GetRootPath(), + UseIAM: it.req.GetStorageConfig().GetUseIAM(), + IAMEndpoint: it.req.GetStorageConfig().GetIAMEndpoint(), + StorageType: it.req.GetStorageConfig().GetStorageType(), + UseVirtualHost: it.req.GetStorageConfig().GetUseVirtualHost(), + Region: it.req.GetStorageConfig().GetRegion(), + CloudProvider: it.req.GetStorageConfig().GetCloudProvider(), + RequestTimeoutMs: it.req.GetStorageConfig().GetRequestTimeoutMs(), + SslCACert: it.req.GetStorageConfig().GetSslCACert(), + } + + optFields := make([]*indexcgopb.OptionalFieldInfo, 0, len(it.req.GetOptionalScalarFields())) + for _, optField := range it.req.GetOptionalScalarFields() { + optFields = append(optFields, &indexcgopb.OptionalFieldInfo{ + FieldID: optField.GetFieldID(), + FieldName: optField.GetFieldName(), + FieldType: optField.GetFieldType(), + DataPaths: optField.GetDataPaths(), + }) } - for _, optField := range it.req.GetOptionalScalarFields() { - if err := buildIndexInfo.AppendOptionalField(optField); err != nil { - log.Ctx(ctx).Warn("append optional field failed", zap.Error(err)) - return err + it.currentIndexVersion = getCurrentIndexVersion(it.req.GetCurrentIndexVersion()) + field := it.req.GetField() + if field == nil || field.GetDataType() == schemapb.DataType_None { + field = &schemapb.FieldSchema{ + FieldID: it.fieldID, + Name: it.fieldName, + DataType: it.fieldType, } } - it.index, err = indexcgowrapper.CreateIndexV2(ctx, buildIndexInfo) + buildIndexParams := &indexcgopb.BuildIndexInfo{ + ClusterID: it.ClusterID, + BuildID: it.BuildID, + CollectionID: it.collectionID, + PartitionID: it.partitionID, + SegmentID: it.segmentID, + IndexVersion: it.req.GetIndexVersion(), + CurrentIndexVersion: it.currentIndexVersion, + NumRows: it.req.GetNumRows(), + Dim: it.req.GetDim(), + IndexFilePrefix: it.req.GetIndexFilePrefix(), + InsertFiles: it.req.GetDataPaths(), + FieldSchema: field, + StorageConfig: storageConfig, + IndexParams: mapToKVPairs(it.newIndexParams), + TypeParams: mapToKVPairs(it.newTypeParams), + StorePath: it.req.GetStorePath(), + StoreVersion: it.req.GetStoreVersion(), + IndexStorePath: it.req.GetIndexStorePath(), + OptFields: optFields, + } + + it.index, err = indexcgowrapper.CreateIndexV2(ctx, buildIndexParams) if err != nil { if it.index != nil && it.index.CleanLocalData() != nil { log.Ctx(ctx).Error("failed to clean cached data on disk after build index failed", @@ -328,7 +342,7 @@ func (it *indexBuildTask) Prepare(ctx context.Context) error { if len(it.req.DataPaths) == 0 { for _, id := range it.req.GetDataIds() { - path := metautil.BuildInsertLogPath(it.req.GetStorageConfig().RootPath, it.req.GetCollectionID(), it.req.GetPartitionID(), it.req.GetSegmentID(), it.req.GetFieldID(), id) + path := metautil.BuildInsertLogPath(it.req.GetStorageConfig().RootPath, it.req.GetCollectionID(), it.req.GetPartitionID(), it.req.GetSegmentID(), it.req.GetField().GetFieldID(), id) it.req.DataPaths = append(it.req.DataPaths, path) } } @@ -362,16 +376,10 @@ func (it *indexBuildTask) Prepare(ctx context.Context) error { } it.newTypeParams = typeParams it.newIndexParams = indexParams + it.statistic.IndexParams = it.req.GetIndexParams() - // ugly codes to get dimension - if dimStr, ok := typeParams[common.DimKey]; ok { - var err error - it.statistic.Dim, err = strconv.ParseInt(dimStr, 10, 64) - if err != nil { - log.Ctx(ctx).Error("parse dimesion failed", zap.Error(err)) - // ignore error - } - } + it.statistic.Dim = it.req.GetDim() + log.Ctx(ctx).Info("Successfully prepare indexBuildTask", zap.Int64("buildID", it.BuildID), zap.Int64("Collection", it.collectionID), zap.Int64("SegmentID", it.segmentID)) return nil @@ -482,69 +490,65 @@ func (it *indexBuildTask) BuildIndex(ctx context.Context) error { } } - var buildIndexInfo *indexcgowrapper.BuildIndexInfo - buildIndexInfo, err = indexcgowrapper.NewBuildIndexInfo(it.req.GetStorageConfig()) - defer indexcgowrapper.DeleteBuildIndexInfo(buildIndexInfo) - if err != nil { - log.Ctx(ctx).Warn("create build index info failed", zap.Error(err)) - return err - } - err = buildIndexInfo.AppendFieldMetaInfo(it.collectionID, it.partitionID, it.segmentID, it.fieldID, it.fieldType) - if err != nil { - log.Ctx(ctx).Warn("append field meta failed", zap.Error(err)) - return err - } - - err = buildIndexInfo.AppendIndexMetaInfo(it.req.IndexID, it.req.BuildID, it.req.IndexVersion) - if err != nil { - log.Ctx(ctx).Warn("append index meta failed", zap.Error(err)) - return err - } - - err = buildIndexInfo.AppendBuildIndexParam(it.newIndexParams) - if err != nil { - log.Ctx(ctx).Warn("append index params failed", zap.Error(err)) - return err - } - - jsonIndexParams, err := json.Marshal(it.newIndexParams) - if err != nil { - log.Ctx(ctx).Error("failed to json marshal index params", zap.Error(err)) - return err - } - - log.Ctx(ctx).Info("index params are ready", - zap.Int64("buildID", it.BuildID), - zap.String("index params", string(jsonIndexParams))) - - err = buildIndexInfo.AppendBuildTypeParam(it.newTypeParams) - if err != nil { - log.Ctx(ctx).Warn("append type params failed", zap.Error(err)) - return err - } - - for _, path := range it.req.GetDataPaths() { - err = buildIndexInfo.AppendInsertFile(path) - if err != nil { - log.Ctx(ctx).Warn("append insert binlog path failed", zap.Error(err)) - return err - } + storageConfig := &indexcgopb.StorageConfig{ + Address: it.req.GetStorageConfig().GetAddress(), + AccessKeyID: it.req.GetStorageConfig().GetAccessKeyID(), + SecretAccessKey: it.req.GetStorageConfig().GetSecretAccessKey(), + UseSSL: it.req.GetStorageConfig().GetUseSSL(), + BucketName: it.req.GetStorageConfig().GetBucketName(), + RootPath: it.req.GetStorageConfig().GetRootPath(), + UseIAM: it.req.GetStorageConfig().GetUseIAM(), + IAMEndpoint: it.req.GetStorageConfig().GetIAMEndpoint(), + StorageType: it.req.GetStorageConfig().GetStorageType(), + UseVirtualHost: it.req.GetStorageConfig().GetUseVirtualHost(), + Region: it.req.GetStorageConfig().GetRegion(), + CloudProvider: it.req.GetStorageConfig().GetCloudProvider(), + RequestTimeoutMs: it.req.GetStorageConfig().GetRequestTimeoutMs(), + SslCACert: it.req.GetStorageConfig().GetSslCACert(), + } + + optFields := make([]*indexcgopb.OptionalFieldInfo, 0, len(it.req.GetOptionalScalarFields())) + for _, optField := range it.req.GetOptionalScalarFields() { + optFields = append(optFields, &indexcgopb.OptionalFieldInfo{ + FieldID: optField.GetFieldID(), + FieldName: optField.GetFieldName(), + FieldType: optField.GetFieldType(), + DataPaths: optField.GetDataPaths(), + }) } it.currentIndexVersion = getCurrentIndexVersion(it.req.GetCurrentIndexVersion()) - if err := buildIndexInfo.AppendIndexEngineVersion(it.currentIndexVersion); err != nil { - log.Ctx(ctx).Warn("append index engine version failed", zap.Error(err)) - return err - } - - for _, optField := range it.req.GetOptionalScalarFields() { - if err := buildIndexInfo.AppendOptionalField(optField); err != nil { - log.Ctx(ctx).Warn("append optional field failed", zap.Error(err)) - return err + field := it.req.GetField() + if field == nil || field.GetDataType() == schemapb.DataType_None { + field = &schemapb.FieldSchema{ + FieldID: it.fieldID, + Name: it.fieldName, + DataType: it.fieldType, } } - - it.index, err = indexcgowrapper.CreateIndex(ctx, buildIndexInfo) + buildIndexParams := &indexcgopb.BuildIndexInfo{ + ClusterID: it.ClusterID, + BuildID: it.BuildID, + CollectionID: it.collectionID, + PartitionID: it.partitionID, + SegmentID: it.segmentID, + IndexVersion: it.req.GetIndexVersion(), + CurrentIndexVersion: it.currentIndexVersion, + NumRows: it.req.GetNumRows(), + Dim: it.req.GetDim(), + IndexFilePrefix: it.req.GetIndexFilePrefix(), + InsertFiles: it.req.GetDataPaths(), + FieldSchema: field, + StorageConfig: storageConfig, + IndexParams: mapToKVPairs(it.newIndexParams), + TypeParams: mapToKVPairs(it.newTypeParams), + StorePath: it.req.GetStorePath(), + StoreVersion: it.req.GetStoreVersion(), + IndexStorePath: it.req.GetIndexStorePath(), + OptFields: optFields, + } + + it.index, err = indexcgowrapper.CreateIndex(ctx, buildIndexParams) if err != nil { if it.index != nil && it.index.CleanLocalData() != nil { log.Ctx(ctx).Error("failed to clean cached data on disk after build index failed", @@ -653,8 +657,6 @@ func (it *indexBuildTask) decodeBlobs(ctx context.Context, blobs []*storage.Blob deserializeDur := it.tr.RecordSpan() log.Ctx(ctx).Info("IndexNode deserialize data success", - zap.Int64("index id", it.req.IndexID), - zap.String("index name", it.req.IndexName), zap.Int64("collectionID", it.collectionID), zap.Int64("partitionID", it.partitionID), zap.Int64("segmentID", it.segmentID), diff --git a/internal/indexnode/task_test.go b/internal/indexnode/task_test.go index dc30abd800eec..6450c3e504a71 100644 --- a/internal/indexnode/task_test.go +++ b/internal/indexnode/task_test.go @@ -283,12 +283,14 @@ func (suite *IndexBuildTaskV2Suite) TestBuildIndex() { RootPath: "/tmp/milvus/data", StorageType: "local", }, - CollectionID: 1, - PartitionID: 1, - SegmentID: 1, - FieldID: 3, - FieldName: "vec", - FieldType: schemapb.DataType_FloatVector, + CollectionID: 1, + PartitionID: 1, + SegmentID: 1, + Field: &schemapb.FieldSchema{ + FieldID: 3, + Name: "vec", + DataType: schemapb.DataType_FloatVector, + }, StorePath: "file://" + suite.space.Path(), StoreVersion: suite.space.GetCurrentVersion(), IndexStorePath: "file://" + suite.space.Path(), diff --git a/internal/indexnode/util.go b/internal/indexnode/util.go index 9186f9855a81b..8aaa92910503f 100644 --- a/internal/indexnode/util.go +++ b/internal/indexnode/util.go @@ -19,6 +19,7 @@ package indexnode import ( "github.com/cockroachdb/errors" + "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" ) @@ -36,3 +37,14 @@ func estimateFieldDataSize(dim int64, numRows int64, dataType schemapb.DataType) return 0, nil } } + +func mapToKVPairs(m map[string]string) []*commonpb.KeyValuePair { + kvs := make([]*commonpb.KeyValuePair, 0, len(m)) + for k, v := range m { + kvs = append(kvs, &commonpb.KeyValuePair{ + Key: k, + Value: v, + }) + } + return kvs +} diff --git a/internal/indexnode/util_test.go b/internal/indexnode/util_test.go new file mode 100644 index 0000000000000..6d7d98e823240 --- /dev/null +++ b/internal/indexnode/util_test.go @@ -0,0 +1,41 @@ +// Licensed to the LF AI & Data foundation under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package indexnode + +import ( + "testing" + + "github.com/stretchr/testify/suite" +) + +type utilSuite struct { + suite.Suite +} + +func (s *utilSuite) Test_mapToKVPairs() { + indexParams := map[string]string{ + "index_type": "IVF_FLAT", + "dim": "128", + "nlist": "1024", + } + + s.Equal(3, len(mapToKVPairs(indexParams))) +} + +func Test_utilSuite(t *testing.T) { + suite.Run(t, new(utilSuite)) +} diff --git a/internal/kv/kv.go b/internal/kv/kv.go index 14091cdc1e842..929febe2c8080 100644 --- a/internal/kv/kv.go +++ b/internal/kv/kv.go @@ -91,5 +91,6 @@ type SnapShotKV interface { Load(key string, ts typeutil.Timestamp) (string, error) MultiSave(kvs map[string]string, ts typeutil.Timestamp) error LoadWithPrefix(key string, ts typeutil.Timestamp) ([]string, []string, error) + MultiSaveAndRemove(saves map[string]string, removals []string, ts typeutil.Timestamp) error MultiSaveAndRemoveWithPrefix(saves map[string]string, removals []string, ts typeutil.Timestamp) error } diff --git a/internal/kv/mock_snapshot_kv.go b/internal/kv/mock_snapshot_kv.go index 35cc851853dc6..9eed834997324 100644 --- a/internal/kv/mock_snapshot_kv.go +++ b/internal/kv/mock_snapshot_kv.go @@ -11,6 +11,7 @@ type mockSnapshotKV struct { MultiSaveFunc func(kvs map[string]string, ts typeutil.Timestamp) error LoadWithPrefixFunc func(key string, ts typeutil.Timestamp) ([]string, []string, error) MultiSaveAndRemoveWithPrefixFunc func(saves map[string]string, removals []string, ts typeutil.Timestamp) error + MultiSaveAndRemoveFunc func(saves map[string]string, removals []string, ts typeutil.Timestamp) error } func NewMockSnapshotKV() *mockSnapshotKV { @@ -51,3 +52,10 @@ func (m mockSnapshotKV) MultiSaveAndRemoveWithPrefix(saves map[string]string, re } return nil } + +func (m mockSnapshotKV) MultiSaveAndRemove(saves map[string]string, removals []string, ts typeutil.Timestamp) error { + if m.MultiSaveAndRemoveFunc != nil { + return m.MultiSaveAndRemoveFunc(saves, removals, ts) + } + return nil +} diff --git a/internal/kv/mock_snapshot_kv_test.go b/internal/kv/mock_snapshot_kv_test.go index 94e6f2136afb7..0b2df70f9173f 100644 --- a/internal/kv/mock_snapshot_kv_test.go +++ b/internal/kv/mock_snapshot_kv_test.go @@ -87,3 +87,19 @@ func Test_mockSnapshotKV_MultiSaveAndRemoveWithPrefix(t *testing.T) { assert.NoError(t, err) }) } + +func Test_mockSnapshotKV_MultiSaveAndRemove(t *testing.T) { + t.Run("func not set", func(t *testing.T) { + snapshot := NewMockSnapshotKV() + err := snapshot.MultiSaveAndRemove(nil, nil, 0) + assert.NoError(t, err) + }) + t.Run("func set", func(t *testing.T) { + snapshot := NewMockSnapshotKV() + snapshot.MultiSaveAndRemoveWithPrefixFunc = func(saves map[string]string, removals []string, ts typeutil.Timestamp) error { + return nil + } + err := snapshot.MultiSaveAndRemove(nil, nil, 0) + assert.NoError(t, err) + }) +} diff --git a/internal/kv/mocks/snapshot_kv.go b/internal/kv/mocks/snapshot_kv.go index e1e4ef7c1c3f2..dc2de1d78379b 100644 --- a/internal/kv/mocks/snapshot_kv.go +++ b/internal/kv/mocks/snapshot_kv.go @@ -177,6 +177,50 @@ func (_c *SnapShotKV_MultiSave_Call) RunAndReturn(run func(map[string]string, ui return _c } +// MultiSaveAndRemove provides a mock function with given fields: saves, removals, ts +func (_m *SnapShotKV) MultiSaveAndRemove(saves map[string]string, removals []string, ts uint64) error { + ret := _m.Called(saves, removals, ts) + + var r0 error + if rf, ok := ret.Get(0).(func(map[string]string, []string, uint64) error); ok { + r0 = rf(saves, removals, ts) + } else { + r0 = ret.Error(0) + } + + return r0 +} + +// SnapShotKV_MultiSaveAndRemove_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'MultiSaveAndRemove' +type SnapShotKV_MultiSaveAndRemove_Call struct { + *mock.Call +} + +// MultiSaveAndRemove is a helper method to define mock.On call +// - saves map[string]string +// - removals []string +// - ts uint64 +func (_e *SnapShotKV_Expecter) MultiSaveAndRemove(saves interface{}, removals interface{}, ts interface{}) *SnapShotKV_MultiSaveAndRemove_Call { + return &SnapShotKV_MultiSaveAndRemove_Call{Call: _e.mock.On("MultiSaveAndRemove", saves, removals, ts)} +} + +func (_c *SnapShotKV_MultiSaveAndRemove_Call) Run(run func(saves map[string]string, removals []string, ts uint64)) *SnapShotKV_MultiSaveAndRemove_Call { + _c.Call.Run(func(args mock.Arguments) { + run(args[0].(map[string]string), args[1].([]string), args[2].(uint64)) + }) + return _c +} + +func (_c *SnapShotKV_MultiSaveAndRemove_Call) Return(_a0 error) *SnapShotKV_MultiSaveAndRemove_Call { + _c.Call.Return(_a0) + return _c +} + +func (_c *SnapShotKV_MultiSaveAndRemove_Call) RunAndReturn(run func(map[string]string, []string, uint64) error) *SnapShotKV_MultiSaveAndRemove_Call { + _c.Call.Return(run) + return _c +} + // MultiSaveAndRemoveWithPrefix provides a mock function with given fields: saves, removals, ts func (_m *SnapShotKV) MultiSaveAndRemoveWithPrefix(saves map[string]string, removals []string, ts uint64) error { ret := _m.Called(saves, removals, ts) diff --git a/internal/metastore/kv/binlog/binlog.go b/internal/metastore/kv/binlog/binlog.go index 94e0c09cc73e6..8b1d47d0e5970 100644 --- a/internal/metastore/kv/binlog/binlog.go +++ b/internal/metastore/kv/binlog/binlog.go @@ -148,7 +148,7 @@ func DecompressBinLog(binlogType storage.BinlogType, collectionID, partitionID, for _, fieldBinlog := range fieldBinlogs { for _, binlog := range fieldBinlog.Binlogs { if binlog.GetLogPath() == "" { - path, err := buildLogPath(binlogType, collectionID, partitionID, + path, err := BuildLogPath(binlogType, collectionID, partitionID, segmentID, fieldBinlog.GetFieldID(), binlog.GetLogID()) if err != nil { return err @@ -161,7 +161,7 @@ func DecompressBinLog(binlogType storage.BinlogType, collectionID, partitionID, } // build a binlog path on the storage by metadata -func buildLogPath(binlogType storage.BinlogType, collectionID, partitionID, segmentID, fieldID, logID typeutil.UniqueID) (string, error) { +func BuildLogPath(binlogType storage.BinlogType, collectionID, partitionID, segmentID, fieldID, logID typeutil.UniqueID) (string, error) { chunkManagerRootPath := paramtable.Get().MinioCfg.RootPath.GetValue() if paramtable.Get().CommonCfg.StorageType.GetValue() == "local" { chunkManagerRootPath = paramtable.Get().LocalStorageCfg.Path.GetValue() diff --git a/internal/metastore/kv/rootcoord/kv_catalog.go b/internal/metastore/kv/rootcoord/kv_catalog.go index 9edcfe13f6be5..916195598efc0 100644 --- a/internal/metastore/kv/rootcoord/kv_catalog.go +++ b/internal/metastore/kv/rootcoord/kv_catalog.go @@ -4,7 +4,6 @@ import ( "context" "encoding/json" "fmt" - "sort" "github.com/cockroachdb/errors" "github.com/golang/protobuf/proto" @@ -85,7 +84,7 @@ func BuildAliasPrefixWithDB(dbID int64) string { // since SnapshotKV may save both snapshot key and the original key if the original key is newest // MaxEtcdTxnNum need to divided by 2 -func batchMultiSaveAndRemoveWithPrefix(snapshot kv.SnapShotKV, limit int, saves map[string]string, removals []string, ts typeutil.Timestamp) error { +func batchMultiSaveAndRemove(snapshot kv.SnapShotKV, limit int, saves map[string]string, removals []string, ts typeutil.Timestamp) error { saveFn := func(partialKvs map[string]string) error { return snapshot.MultiSave(partialKvs, ts) } @@ -93,14 +92,8 @@ func batchMultiSaveAndRemoveWithPrefix(snapshot kv.SnapShotKV, limit int, saves return err } - // avoid a case that the former key is the prefix of the later key. - // for example, `root-coord/fields/collection_id/1` is the prefix of `root-coord/fields/collection_id/100`. - sort.Slice(removals, func(i, j int) bool { - return removals[i] > removals[j] - }) - removeFn := func(partialKeys []string) error { - return snapshot.MultiSaveAndRemoveWithPrefix(nil, partialKeys, ts) + return snapshot.MultiSaveAndRemove(nil, partialKeys, ts) } return etcd.RemoveByBatchWithLimit(removals, limit, removeFn) } @@ -127,7 +120,7 @@ func (kc *Catalog) AlterDatabase(ctx context.Context, newColl *model.Database, t func (kc *Catalog) DropDatabase(ctx context.Context, dbID int64, ts typeutil.Timestamp) error { key := BuildDatabaseKey(dbID) - return kc.Snapshot.MultiSaveAndRemoveWithPrefix(nil, []string{key}, ts) + return kc.Snapshot.MultiSaveAndRemove(nil, []string{key}, ts) } func (kc *Catalog) ListDatabases(ctx context.Context, ts typeutil.Timestamp) ([]*model.Database, error) { @@ -300,7 +293,7 @@ func (kc *Catalog) CreateAlias(ctx context.Context, alias *model.Alias, ts typeu return err } kvs := map[string]string{k: string(v)} - return kc.Snapshot.MultiSaveAndRemoveWithPrefix(kvs, []string{oldKBefore210, oldKeyWithoutDb}, ts) + return kc.Snapshot.MultiSaveAndRemove(kvs, []string{oldKBefore210, oldKeyWithoutDb}, ts) } func (kc *Catalog) CreateCredential(ctx context.Context, credential *model.Credential) error { @@ -455,12 +448,12 @@ func (kc *Catalog) DropCollection(ctx context.Context, collectionInfo *model.Col // However, if we remove collection first, we cannot remove other metas. // since SnapshotKV may save both snapshot key and the original key if the original key is newest // MaxEtcdTxnNum need to divided by 2 - if err := batchMultiSaveAndRemoveWithPrefix(kc.Snapshot, util.MaxEtcdTxnNum/2, nil, delMetakeysSnap, ts); err != nil { + if err := batchMultiSaveAndRemove(kc.Snapshot, util.MaxEtcdTxnNum/2, nil, delMetakeysSnap, ts); err != nil { return err } // if we found collection dropping, we should try removing related resources. - return kc.Snapshot.MultiSaveAndRemoveWithPrefix(nil, collectionKeys, ts) + return kc.Snapshot.MultiSaveAndRemove(nil, collectionKeys, ts) } func (kc *Catalog) alterModifyCollection(oldColl *model.Collection, newColl *model.Collection, ts typeutil.Timestamp) error { @@ -491,7 +484,7 @@ func (kc *Catalog) alterModifyCollection(oldColl *model.Collection, newColl *mod if oldKey == newKey { return kc.Snapshot.Save(newKey, string(value), ts) } - return kc.Snapshot.MultiSaveAndRemoveWithPrefix(saves, []string{oldKey}, ts) + return kc.Snapshot.MultiSaveAndRemove(saves, []string{oldKey}, ts) } func (kc *Catalog) AlterCollection(ctx context.Context, oldColl *model.Collection, newColl *model.Collection, alterType metastore.AlterType, ts typeutil.Timestamp) error { @@ -559,7 +552,7 @@ func (kc *Catalog) DropPartition(ctx context.Context, dbID int64, collectionID t if partitionVersionAfter210(collMeta) { k := BuildPartitionKey(collectionID, partitionID) - return kc.Snapshot.MultiSaveAndRemoveWithPrefix(nil, []string{k}, ts) + return kc.Snapshot.MultiSaveAndRemove(nil, []string{k}, ts) } k := BuildCollectionKey(util.NonDBID, collectionID) @@ -601,7 +594,7 @@ func (kc *Catalog) DropAlias(ctx context.Context, dbID int64, alias string, ts t oldKBefore210 := BuildAliasKey210(alias) oldKeyWithoutDb := BuildAliasKey(alias) k := BuildAliasKeyWithDB(dbID, alias) - return kc.Snapshot.MultiSaveAndRemoveWithPrefix(nil, []string{k, oldKeyWithoutDb, oldKBefore210}, ts) + return kc.Snapshot.MultiSaveAndRemove(nil, []string{k, oldKeyWithoutDb, oldKBefore210}, ts) } func (kc *Catalog) GetCollectionByName(ctx context.Context, dbID int64, collectionName string, ts typeutil.Timestamp) (*model.Collection, error) { diff --git a/internal/metastore/kv/rootcoord/kv_catalog_test.go b/internal/metastore/kv/rootcoord/kv_catalog_test.go index 7523c821677d5..5cb3c0f293d12 100644 --- a/internal/metastore/kv/rootcoord/kv_catalog_test.go +++ b/internal/metastore/kv/rootcoord/kv_catalog_test.go @@ -495,7 +495,7 @@ func TestCatalog_CreateAliasV2(t *testing.T) { ctx := context.Background() snapshot := kv.NewMockSnapshotKV() - snapshot.MultiSaveAndRemoveWithPrefixFunc = func(saves map[string]string, removals []string, ts typeutil.Timestamp) error { + snapshot.MultiSaveAndRemoveFunc = func(saves map[string]string, removals []string, ts typeutil.Timestamp) error { return errors.New("mock") } @@ -504,7 +504,7 @@ func TestCatalog_CreateAliasV2(t *testing.T) { err := kc.CreateAlias(ctx, &model.Alias{}, 0) assert.Error(t, err) - snapshot.MultiSaveAndRemoveWithPrefixFunc = func(saves map[string]string, removals []string, ts typeutil.Timestamp) error { + snapshot.MultiSaveAndRemoveFunc = func(saves map[string]string, removals []string, ts typeutil.Timestamp) error { return nil } err = kc.CreateAlias(ctx, &model.Alias{}, 0) @@ -623,7 +623,7 @@ func TestCatalog_AlterAliasV2(t *testing.T) { ctx := context.Background() snapshot := kv.NewMockSnapshotKV() - snapshot.MultiSaveAndRemoveWithPrefixFunc = func(saves map[string]string, removals []string, ts typeutil.Timestamp) error { + snapshot.MultiSaveAndRemoveFunc = func(saves map[string]string, removals []string, ts typeutil.Timestamp) error { return errors.New("mock") } @@ -632,7 +632,7 @@ func TestCatalog_AlterAliasV2(t *testing.T) { err := kc.AlterAlias(ctx, &model.Alias{}, 0) assert.Error(t, err) - snapshot.MultiSaveAndRemoveWithPrefixFunc = func(saves map[string]string, removals []string, ts typeutil.Timestamp) error { + snapshot.MultiSaveAndRemoveFunc = func(saves map[string]string, removals []string, ts typeutil.Timestamp) error { return nil } err = kc.AlterAlias(ctx, &model.Alias{}, 0) @@ -706,7 +706,7 @@ func TestCatalog_DropPartitionV2(t *testing.T) { snapshot.LoadFunc = func(key string, ts typeutil.Timestamp) (string, error) { return string(value), nil } - snapshot.MultiSaveAndRemoveWithPrefixFunc = func(saves map[string]string, removals []string, ts typeutil.Timestamp) error { + snapshot.MultiSaveAndRemoveFunc = func(saves map[string]string, removals []string, ts typeutil.Timestamp) error { return errors.New("mock") } @@ -715,7 +715,7 @@ func TestCatalog_DropPartitionV2(t *testing.T) { err = kc.DropPartition(ctx, 0, 100, 101, 0) assert.Error(t, err) - snapshot.MultiSaveAndRemoveWithPrefixFunc = func(saves map[string]string, removals []string, ts typeutil.Timestamp) error { + snapshot.MultiSaveAndRemoveFunc = func(saves map[string]string, removals []string, ts typeutil.Timestamp) error { return nil } err = kc.DropPartition(ctx, 0, 100, 101, 0) @@ -758,7 +758,7 @@ func TestCatalog_DropAliasV2(t *testing.T) { ctx := context.Background() snapshot := kv.NewMockSnapshotKV() - snapshot.MultiSaveAndRemoveWithPrefixFunc = func(saves map[string]string, removals []string, ts typeutil.Timestamp) error { + snapshot.MultiSaveAndRemoveFunc = func(saves map[string]string, removals []string, ts typeutil.Timestamp) error { return errors.New("mock") } @@ -767,7 +767,7 @@ func TestCatalog_DropAliasV2(t *testing.T) { err := kc.DropAlias(ctx, testDb, "alias", 0) assert.Error(t, err) - snapshot.MultiSaveAndRemoveWithPrefixFunc = func(saves map[string]string, removals []string, ts typeutil.Timestamp) error { + snapshot.MultiSaveAndRemoveFunc = func(saves map[string]string, removals []string, ts typeutil.Timestamp) error { return nil } err = kc.DropAlias(ctx, testDb, "alias", 0) @@ -942,14 +942,14 @@ func TestCatalog_ListAliasesV2(t *testing.T) { }) } -func Test_batchMultiSaveAndRemoveWithPrefix(t *testing.T) { +func Test_batchMultiSaveAndRemove(t *testing.T) { t.Run("failed to save", func(t *testing.T) { snapshot := kv.NewMockSnapshotKV() snapshot.MultiSaveFunc = func(kvs map[string]string, ts typeutil.Timestamp) error { return errors.New("error mock MultiSave") } saves := map[string]string{"k": "v"} - err := batchMultiSaveAndRemoveWithPrefix(snapshot, util.MaxEtcdTxnNum/2, saves, []string{}, 0) + err := batchMultiSaveAndRemove(snapshot, util.MaxEtcdTxnNum/2, saves, []string{}, 0) assert.Error(t, err) }) t.Run("failed to remove", func(t *testing.T) { @@ -957,12 +957,12 @@ func Test_batchMultiSaveAndRemoveWithPrefix(t *testing.T) { snapshot.MultiSaveFunc = func(kvs map[string]string, ts typeutil.Timestamp) error { return nil } - snapshot.MultiSaveAndRemoveWithPrefixFunc = func(saves map[string]string, removals []string, ts typeutil.Timestamp) error { - return errors.New("error mock MultiSaveAndRemoveWithPrefix") + snapshot.MultiSaveAndRemoveFunc = func(saves map[string]string, removals []string, ts typeutil.Timestamp) error { + return errors.New("error mock MultiSaveAndRemove") } saves := map[string]string{"k": "v"} removals := []string{"prefix1", "prefix2"} - err := batchMultiSaveAndRemoveWithPrefix(snapshot, util.MaxEtcdTxnNum/2, saves, removals, 0) + err := batchMultiSaveAndRemove(snapshot, util.MaxEtcdTxnNum/2, saves, removals, 0) assert.Error(t, err) }) t.Run("normal case", func(t *testing.T) { @@ -971,7 +971,7 @@ func Test_batchMultiSaveAndRemoveWithPrefix(t *testing.T) { log.Info("multi save", zap.Any("len", len(kvs)), zap.Any("saves", kvs)) return nil } - snapshot.MultiSaveAndRemoveWithPrefixFunc = func(saves map[string]string, removals []string, ts typeutil.Timestamp) error { + snapshot.MultiSaveAndRemoveFunc = func(saves map[string]string, removals []string, ts typeutil.Timestamp) error { log.Info("multi save and remove with prefix", zap.Any("len of saves", len(saves)), zap.Any("len of removals", len(removals)), zap.Any("saves", saves), zap.Any("removals", removals)) return nil @@ -983,7 +983,7 @@ func Test_batchMultiSaveAndRemoveWithPrefix(t *testing.T) { saves[fmt.Sprintf("k%d", i)] = fmt.Sprintf("v%d", i) removals = append(removals, fmt.Sprintf("k%d", i)) } - err := batchMultiSaveAndRemoveWithPrefix(snapshot, util.MaxEtcdTxnNum/2, saves, removals, 0) + err := batchMultiSaveAndRemove(snapshot, util.MaxEtcdTxnNum/2, saves, removals, 0) assert.NoError(t, err) }) } @@ -1040,7 +1040,7 @@ func TestCatalog_AlterCollection(t *testing.T) { t.Run("modify db name", func(t *testing.T) { var collectionID int64 = 1 snapshot := kv.NewMockSnapshotKV() - snapshot.MultiSaveAndRemoveWithPrefixFunc = func(saves map[string]string, removals []string, ts typeutil.Timestamp) error { + snapshot.MultiSaveAndRemoveFunc = func(saves map[string]string, removals []string, ts typeutil.Timestamp) error { assert.ElementsMatch(t, []string{BuildCollectionKey(0, collectionID)}, removals) assert.Equal(t, len(saves), 1) assert.Contains(t, maps.Keys(saves), BuildCollectionKey(1, collectionID)) @@ -1149,6 +1149,17 @@ func withMockMultiSaveAndRemoveWithPrefix(err error) mockSnapshotOpt { } } +func withMockMultiSaveAndRemove(err error) mockSnapshotOpt { + return func(ss *mocks.SnapShotKV) { + ss.On( + "MultiSaveAndRemove", + mock.AnythingOfType("map[string]string"), + mock.AnythingOfType("[]string"), + mock.AnythingOfType("uint64")). + Return(err) + } +} + func TestCatalog_CreateCollection(t *testing.T) { t.Run("collection not creating", func(t *testing.T) { kc := &Catalog{} @@ -1198,7 +1209,7 @@ func TestCatalog_CreateCollection(t *testing.T) { func TestCatalog_DropCollection(t *testing.T) { t.Run("failed to remove", func(t *testing.T) { - mockSnapshot := newMockSnapshot(t, withMockMultiSaveAndRemoveWithPrefix(errors.New("error mock MultiSaveAndRemoveWithPrefix"))) + mockSnapshot := newMockSnapshot(t, withMockMultiSaveAndRemove(errors.New("error mock MultiSaveAndRemove"))) kc := &Catalog{Snapshot: mockSnapshot} ctx := context.Background() coll := &model.Collection{ @@ -1216,7 +1227,7 @@ func TestCatalog_DropCollection(t *testing.T) { removeOtherCalled := false removeCollectionCalled := false mockSnapshot.On( - "MultiSaveAndRemoveWithPrefix", + "MultiSaveAndRemove", mock.AnythingOfType("map[string]string"), mock.AnythingOfType("[]string"), mock.AnythingOfType("uint64")). @@ -1225,13 +1236,13 @@ func TestCatalog_DropCollection(t *testing.T) { return nil }).Once() mockSnapshot.On( - "MultiSaveAndRemoveWithPrefix", + "MultiSaveAndRemove", mock.AnythingOfType("map[string]string"), mock.AnythingOfType("[]string"), mock.AnythingOfType("uint64")). Return(func(map[string]string, []string, typeutil.Timestamp) error { removeCollectionCalled = true - return errors.New("error mock MultiSaveAndRemoveWithPrefix") + return errors.New("error mock MultiSaveAndRemove") }).Once() kc := &Catalog{Snapshot: mockSnapshot} ctx := context.Background() @@ -1248,7 +1259,7 @@ func TestCatalog_DropCollection(t *testing.T) { }) t.Run("normal case", func(t *testing.T) { - mockSnapshot := newMockSnapshot(t, withMockMultiSaveAndRemoveWithPrefix(nil)) + mockSnapshot := newMockSnapshot(t, withMockMultiSaveAndRemove(nil)) kc := &Catalog{Snapshot: mockSnapshot} ctx := context.Background() coll := &model.Collection{ diff --git a/internal/metastore/kv/rootcoord/suffix_snapshot.go b/internal/metastore/kv/rootcoord/suffix_snapshot.go index f945dc958d3b7..af443ffc7c6f7 100644 --- a/internal/metastore/kv/rootcoord/suffix_snapshot.go +++ b/internal/metastore/kv/rootcoord/suffix_snapshot.go @@ -35,6 +35,7 @@ import ( "github.com/milvus-io/milvus/pkg/log" "github.com/milvus-io/milvus/pkg/util" "github.com/milvus-io/milvus/pkg/util/etcd" + "github.com/milvus-io/milvus/pkg/util/merr" "github.com/milvus-io/milvus/pkg/util/retry" "github.com/milvus-io/milvus/pkg/util/tsoutil" "github.com/milvus-io/milvus/pkg/util/typeutil" @@ -502,6 +503,53 @@ func (ss *SuffixSnapshot) LoadWithPrefix(key string, ts typeutil.Timestamp) ([]s return resultKeys, resultValues, nil } +// MultiSaveAndRemove save muiltple kvs and remove as well +// if ts == 0, act like MetaKv +// each key-value will be treated in same logic like Save +func (ss *SuffixSnapshot) MultiSaveAndRemove(saves map[string]string, removals []string, ts typeutil.Timestamp) error { + // if ts == 0, act like MetaKv + if ts == 0 { + return ss.MetaKv.MultiSaveAndRemove(saves, removals) + } + ss.Lock() + defer ss.Unlock() + var err error + + // process each key, checks whether is the latest + execute, updateList, err := ss.generateSaveExecute(saves, ts) + if err != nil { + return err + } + + // load each removal, change execution to adding tombstones + for _, removal := range removals { + value, err := ss.MetaKv.Load(removal) + if err != nil { + log.Warn("SuffixSnapshot MetaKv Load failed", zap.String("key", removal), zap.Error(err)) + if errors.Is(err, merr.ErrIoKeyNotFound) { + continue + } + return err + } + // add tombstone to original key and add ts entry + if IsTombstone(value) { + continue + } + execute[removal] = string(SuffixSnapshotTombstone) + execute[ss.composeTSKey(removal, ts)] = string(SuffixSnapshotTombstone) + updateList = append(updateList, removal) + } + + // multi save execute map; if succeeds, update ts in the update list + err = ss.MetaKv.MultiSave(execute) + if err == nil { + for _, key := range updateList { + ss.lastestTS[key] = ts + } + } + return err +} + // MultiSaveAndRemoveWithPrefix save muiltple kvs and remove as well // if ts == 0, act like MetaKv // each key-value will be treated in same logic like Save diff --git a/internal/metastore/kv/rootcoord/suffix_snapshot_test.go b/internal/metastore/kv/rootcoord/suffix_snapshot_test.go index 5efc00680def2..6d76e544700ac 100644 --- a/internal/metastore/kv/rootcoord/suffix_snapshot_test.go +++ b/internal/metastore/kv/rootcoord/suffix_snapshot_test.go @@ -673,6 +673,82 @@ func Test_SuffixSnapshotMultiSaveAndRemoveWithPrefix(t *testing.T) { ss.MultiSaveAndRemoveWithPrefix(map[string]string{}, []string{""}, 0) } +func Test_SuffixSnapshotMultiSaveAndRemove(t *testing.T) { + rand.Seed(time.Now().UnixNano()) + randVal := rand.Int() + + rootPath := fmt.Sprintf("/test/meta/%d", randVal) + sep := "_ts" + + etcdCli, err := etcd.GetEtcdClient( + Params.EtcdCfg.UseEmbedEtcd.GetAsBool(), + Params.EtcdCfg.EtcdUseSSL.GetAsBool(), + Params.EtcdCfg.Endpoints.GetAsStrings(), + Params.EtcdCfg.EtcdTLSCert.GetValue(), + Params.EtcdCfg.EtcdTLSKey.GetValue(), + Params.EtcdCfg.EtcdTLSCACert.GetValue(), + Params.EtcdCfg.EtcdTLSMinVersion.GetValue()) + require.Nil(t, err) + defer etcdCli.Close() + etcdkv := etcdkv.NewEtcdKV(etcdCli, rootPath) + require.Nil(t, err) + defer etcdkv.Close() + + var vtso typeutil.Timestamp + ftso := func() typeutil.Timestamp { + return vtso + } + + ss, err := NewSuffixSnapshot(etcdkv, sep, rootPath, snapshotPrefix) + assert.NoError(t, err) + assert.NotNil(t, ss) + defer ss.Close() + + for i := 0; i < 20; i++ { + vtso = typeutil.Timestamp(100 + i*5) + ts := ftso() + err = ss.Save(fmt.Sprintf("kd-%04d", i), fmt.Sprintf("value-%d", i), ts) + assert.NoError(t, err) + assert.Equal(t, vtso, ts) + } + for i := 20; i < 40; i++ { + sm := map[string]string{"ks": fmt.Sprintf("value-%d", i)} + dm := []string{fmt.Sprintf("kd-%04d", i-20)} + vtso = typeutil.Timestamp(100 + i*5) + ts := ftso() + err = ss.MultiSaveAndRemove(sm, dm, ts) + assert.NoError(t, err) + assert.Equal(t, vtso, ts) + } + for i := 0; i < 20; i++ { + val, err := ss.Load(fmt.Sprintf("kd-%04d", i), typeutil.Timestamp(100+i*5+2)) + assert.NoError(t, err) + assert.Equal(t, fmt.Sprintf("value-%d", i), val) + _, vals, err := ss.LoadWithPrefix("kd-", typeutil.Timestamp(100+i*5+2)) + assert.NoError(t, err) + assert.Equal(t, i+1, len(vals)) + } + for i := 20; i < 40; i++ { + val, err := ss.Load("ks", typeutil.Timestamp(100+i*5+2)) + assert.NoError(t, err) + assert.Equal(t, fmt.Sprintf("value-%d", i), val) + _, vals, err := ss.LoadWithPrefix("kd-", typeutil.Timestamp(100+i*5+2)) + assert.NoError(t, err) + assert.Equal(t, 39-i, len(vals)) + } + + // try to load + _, err = ss.Load("kd-0000", 500) + assert.Error(t, err) + _, err = ss.Load("kd-0000", 0) + assert.Error(t, err) + _, err = ss.Load("kd-0000", 1) + assert.Error(t, err) + + // cleanup + ss.MultiSaveAndRemoveWithPrefix(map[string]string{}, []string{""}, 0) +} + func TestSuffixSnapshot_LoadWithPrefix(t *testing.T) { rand.Seed(time.Now().UnixNano()) randVal := rand.Int() diff --git a/internal/mocks/mock_datanode.go b/internal/mocks/mock_datanode.go index 3392028c1bd48..b6dc02ae27de5 100644 --- a/internal/mocks/mock_datanode.go +++ b/internal/mocks/mock_datanode.go @@ -64,8 +64,8 @@ type MockDataNode_CheckChannelOperationProgress_Call struct { } // CheckChannelOperationProgress is a helper method to define mock.On call -// - _a0 context.Context -// - _a1 *datapb.ChannelWatchInfo +// - _a0 context.Context +// - _a1 *datapb.ChannelWatchInfo func (_e *MockDataNode_Expecter) CheckChannelOperationProgress(_a0 interface{}, _a1 interface{}) *MockDataNode_CheckChannelOperationProgress_Call { return &MockDataNode_CheckChannelOperationProgress_Call{Call: _e.mock.On("CheckChannelOperationProgress", _a0, _a1)} } @@ -119,8 +119,8 @@ type MockDataNode_Compaction_Call struct { } // Compaction is a helper method to define mock.On call -// - _a0 context.Context -// - _a1 *datapb.CompactionPlan +// - _a0 context.Context +// - _a1 *datapb.CompactionPlan func (_e *MockDataNode_Expecter) Compaction(_a0 interface{}, _a1 interface{}) *MockDataNode_Compaction_Call { return &MockDataNode_Compaction_Call{Call: _e.mock.On("Compaction", _a0, _a1)} } @@ -174,8 +174,8 @@ type MockDataNode_DropImport_Call struct { } // DropImport is a helper method to define mock.On call -// - _a0 context.Context -// - _a1 *datapb.DropImportRequest +// - _a0 context.Context +// - _a1 *datapb.DropImportRequest func (_e *MockDataNode_Expecter) DropImport(_a0 interface{}, _a1 interface{}) *MockDataNode_DropImport_Call { return &MockDataNode_DropImport_Call{Call: _e.mock.On("DropImport", _a0, _a1)} } @@ -229,8 +229,8 @@ type MockDataNode_FlushChannels_Call struct { } // FlushChannels is a helper method to define mock.On call -// - _a0 context.Context -// - _a1 *datapb.FlushChannelsRequest +// - _a0 context.Context +// - _a1 *datapb.FlushChannelsRequest func (_e *MockDataNode_Expecter) FlushChannels(_a0 interface{}, _a1 interface{}) *MockDataNode_FlushChannels_Call { return &MockDataNode_FlushChannels_Call{Call: _e.mock.On("FlushChannels", _a0, _a1)} } @@ -284,8 +284,8 @@ type MockDataNode_FlushSegments_Call struct { } // FlushSegments is a helper method to define mock.On call -// - _a0 context.Context -// - _a1 *datapb.FlushSegmentsRequest +// - _a0 context.Context +// - _a1 *datapb.FlushSegmentsRequest func (_e *MockDataNode_Expecter) FlushSegments(_a0 interface{}, _a1 interface{}) *MockDataNode_FlushSegments_Call { return &MockDataNode_FlushSegments_Call{Call: _e.mock.On("FlushSegments", _a0, _a1)} } @@ -380,8 +380,8 @@ type MockDataNode_GetCompactionState_Call struct { } // GetCompactionState is a helper method to define mock.On call -// - _a0 context.Context -// - _a1 *datapb.CompactionStateRequest +// - _a0 context.Context +// - _a1 *datapb.CompactionStateRequest func (_e *MockDataNode_Expecter) GetCompactionState(_a0 interface{}, _a1 interface{}) *MockDataNode_GetCompactionState_Call { return &MockDataNode_GetCompactionState_Call{Call: _e.mock.On("GetCompactionState", _a0, _a1)} } @@ -435,8 +435,8 @@ type MockDataNode_GetComponentStates_Call struct { } // GetComponentStates is a helper method to define mock.On call -// - _a0 context.Context -// - _a1 *milvuspb.GetComponentStatesRequest +// - _a0 context.Context +// - _a1 *milvuspb.GetComponentStatesRequest func (_e *MockDataNode_Expecter) GetComponentStates(_a0 interface{}, _a1 interface{}) *MockDataNode_GetComponentStates_Call { return &MockDataNode_GetComponentStates_Call{Call: _e.mock.On("GetComponentStates", _a0, _a1)} } @@ -490,8 +490,8 @@ type MockDataNode_GetMetrics_Call struct { } // GetMetrics is a helper method to define mock.On call -// - _a0 context.Context -// - _a1 *milvuspb.GetMetricsRequest +// - _a0 context.Context +// - _a1 *milvuspb.GetMetricsRequest func (_e *MockDataNode_Expecter) GetMetrics(_a0 interface{}, _a1 interface{}) *MockDataNode_GetMetrics_Call { return &MockDataNode_GetMetrics_Call{Call: _e.mock.On("GetMetrics", _a0, _a1)} } @@ -627,8 +627,8 @@ type MockDataNode_GetStatisticsChannel_Call struct { } // GetStatisticsChannel is a helper method to define mock.On call -// - _a0 context.Context -// - _a1 *internalpb.GetStatisticsChannelRequest +// - _a0 context.Context +// - _a1 *internalpb.GetStatisticsChannelRequest func (_e *MockDataNode_Expecter) GetStatisticsChannel(_a0 interface{}, _a1 interface{}) *MockDataNode_GetStatisticsChannel_Call { return &MockDataNode_GetStatisticsChannel_Call{Call: _e.mock.On("GetStatisticsChannel", _a0, _a1)} } @@ -682,8 +682,8 @@ type MockDataNode_ImportV2_Call struct { } // ImportV2 is a helper method to define mock.On call -// - _a0 context.Context -// - _a1 *datapb.ImportRequest +// - _a0 context.Context +// - _a1 *datapb.ImportRequest func (_e *MockDataNode_Expecter) ImportV2(_a0 interface{}, _a1 interface{}) *MockDataNode_ImportV2_Call { return &MockDataNode_ImportV2_Call{Call: _e.mock.On("ImportV2", _a0, _a1)} } @@ -778,8 +778,8 @@ type MockDataNode_NotifyChannelOperation_Call struct { } // NotifyChannelOperation is a helper method to define mock.On call -// - _a0 context.Context -// - _a1 *datapb.ChannelOperationsRequest +// - _a0 context.Context +// - _a1 *datapb.ChannelOperationsRequest func (_e *MockDataNode_Expecter) NotifyChannelOperation(_a0 interface{}, _a1 interface{}) *MockDataNode_NotifyChannelOperation_Call { return &MockDataNode_NotifyChannelOperation_Call{Call: _e.mock.On("NotifyChannelOperation", _a0, _a1)} } @@ -833,8 +833,8 @@ type MockDataNode_PreImport_Call struct { } // PreImport is a helper method to define mock.On call -// - _a0 context.Context -// - _a1 *datapb.PreImportRequest +// - _a0 context.Context +// - _a1 *datapb.PreImportRequest func (_e *MockDataNode_Expecter) PreImport(_a0 interface{}, _a1 interface{}) *MockDataNode_PreImport_Call { return &MockDataNode_PreImport_Call{Call: _e.mock.On("PreImport", _a0, _a1)} } @@ -888,8 +888,8 @@ type MockDataNode_QueryImport_Call struct { } // QueryImport is a helper method to define mock.On call -// - _a0 context.Context -// - _a1 *datapb.QueryImportRequest +// - _a0 context.Context +// - _a1 *datapb.QueryImportRequest func (_e *MockDataNode_Expecter) QueryImport(_a0 interface{}, _a1 interface{}) *MockDataNode_QueryImport_Call { return &MockDataNode_QueryImport_Call{Call: _e.mock.On("QueryImport", _a0, _a1)} } @@ -943,8 +943,8 @@ type MockDataNode_QueryPreImport_Call struct { } // QueryPreImport is a helper method to define mock.On call -// - _a0 context.Context -// - _a1 *datapb.QueryPreImportRequest +// - _a0 context.Context +// - _a1 *datapb.QueryPreImportRequest func (_e *MockDataNode_Expecter) QueryPreImport(_a0 interface{}, _a1 interface{}) *MockDataNode_QueryPreImport_Call { return &MockDataNode_QueryPreImport_Call{Call: _e.mock.On("QueryPreImport", _a0, _a1)} } @@ -998,8 +998,8 @@ type MockDataNode_QuerySlot_Call struct { } // QuerySlot is a helper method to define mock.On call -// - _a0 context.Context -// - _a1 *datapb.QuerySlotRequest +// - _a0 context.Context +// - _a1 *datapb.QuerySlotRequest func (_e *MockDataNode_Expecter) QuerySlot(_a0 interface{}, _a1 interface{}) *MockDataNode_QuerySlot_Call { return &MockDataNode_QuerySlot_Call{Call: _e.mock.On("QuerySlot", _a0, _a1)} } @@ -1094,8 +1094,8 @@ type MockDataNode_ResendSegmentStats_Call struct { } // ResendSegmentStats is a helper method to define mock.On call -// - _a0 context.Context -// - _a1 *datapb.ResendSegmentStatsRequest +// - _a0 context.Context +// - _a1 *datapb.ResendSegmentStatsRequest func (_e *MockDataNode_Expecter) ResendSegmentStats(_a0 interface{}, _a1 interface{}) *MockDataNode_ResendSegmentStats_Call { return &MockDataNode_ResendSegmentStats_Call{Call: _e.mock.On("ResendSegmentStats", _a0, _a1)} } @@ -1128,7 +1128,7 @@ type MockDataNode_SetAddress_Call struct { } // SetAddress is a helper method to define mock.On call -// - address string +// - address string func (_e *MockDataNode_Expecter) SetAddress(address interface{}) *MockDataNode_SetAddress_Call { return &MockDataNode_SetAddress_Call{Call: _e.mock.On("SetAddress", address)} } @@ -1170,7 +1170,7 @@ type MockDataNode_SetDataCoordClient_Call struct { } // SetDataCoordClient is a helper method to define mock.On call -// - dataCoord types.DataCoordClient +// - dataCoord types.DataCoordClient func (_e *MockDataNode_Expecter) SetDataCoordClient(dataCoord interface{}) *MockDataNode_SetDataCoordClient_Call { return &MockDataNode_SetDataCoordClient_Call{Call: _e.mock.On("SetDataCoordClient", dataCoord)} } @@ -1203,7 +1203,7 @@ type MockDataNode_SetEtcdClient_Call struct { } // SetEtcdClient is a helper method to define mock.On call -// - etcdClient *clientv3.Client +// - etcdClient *clientv3.Client func (_e *MockDataNode_Expecter) SetEtcdClient(etcdClient interface{}) *MockDataNode_SetEtcdClient_Call { return &MockDataNode_SetEtcdClient_Call{Call: _e.mock.On("SetEtcdClient", etcdClient)} } @@ -1245,7 +1245,7 @@ type MockDataNode_SetRootCoordClient_Call struct { } // SetRootCoordClient is a helper method to define mock.On call -// - rootCoord types.RootCoordClient +// - rootCoord types.RootCoordClient func (_e *MockDataNode_Expecter) SetRootCoordClient(rootCoord interface{}) *MockDataNode_SetRootCoordClient_Call { return &MockDataNode_SetRootCoordClient_Call{Call: _e.mock.On("SetRootCoordClient", rootCoord)} } @@ -1299,8 +1299,8 @@ type MockDataNode_ShowConfigurations_Call struct { } // ShowConfigurations is a helper method to define mock.On call -// - _a0 context.Context -// - _a1 *internalpb.ShowConfigurationsRequest +// - _a0 context.Context +// - _a1 *internalpb.ShowConfigurationsRequest func (_e *MockDataNode_Expecter) ShowConfigurations(_a0 interface{}, _a1 interface{}) *MockDataNode_ShowConfigurations_Call { return &MockDataNode_ShowConfigurations_Call{Call: _e.mock.On("ShowConfigurations", _a0, _a1)} } @@ -1436,8 +1436,8 @@ type MockDataNode_SyncSegments_Call struct { } // SyncSegments is a helper method to define mock.On call -// - _a0 context.Context -// - _a1 *datapb.SyncSegmentsRequest +// - _a0 context.Context +// - _a1 *datapb.SyncSegmentsRequest func (_e *MockDataNode_Expecter) SyncSegments(_a0 interface{}, _a1 interface{}) *MockDataNode_SyncSegments_Call { return &MockDataNode_SyncSegments_Call{Call: _e.mock.On("SyncSegments", _a0, _a1)} } @@ -1470,7 +1470,7 @@ type MockDataNode_UpdateStateCode_Call struct { } // UpdateStateCode is a helper method to define mock.On call -// - stateCode commonpb.StateCode +// - stateCode commonpb.StateCode func (_e *MockDataNode_Expecter) UpdateStateCode(stateCode interface{}) *MockDataNode_UpdateStateCode_Call { return &MockDataNode_UpdateStateCode_Call{Call: _e.mock.On("UpdateStateCode", stateCode)} } @@ -1524,8 +1524,8 @@ type MockDataNode_WatchDmChannels_Call struct { } // WatchDmChannels is a helper method to define mock.On call -// - _a0 context.Context -// - _a1 *datapb.WatchDmChannelsRequest +// - _a0 context.Context +// - _a1 *datapb.WatchDmChannelsRequest func (_e *MockDataNode_Expecter) WatchDmChannels(_a0 interface{}, _a1 interface{}) *MockDataNode_WatchDmChannels_Call { return &MockDataNode_WatchDmChannels_Call{Call: _e.mock.On("WatchDmChannels", _a0, _a1)} } diff --git a/internal/mocks/mock_datanode_client.go b/internal/mocks/mock_datanode_client.go index 78f7aeec32131..f16ff8d1705bb 100644 --- a/internal/mocks/mock_datanode_client.go +++ b/internal/mocks/mock_datanode_client.go @@ -70,9 +70,9 @@ type MockDataNodeClient_CheckChannelOperationProgress_Call struct { } // CheckChannelOperationProgress is a helper method to define mock.On call -// - ctx context.Context -// - in *datapb.ChannelWatchInfo -// - opts ...grpc.CallOption +// - ctx context.Context +// - in *datapb.ChannelWatchInfo +// - opts ...grpc.CallOption func (_e *MockDataNodeClient_Expecter) CheckChannelOperationProgress(ctx interface{}, in interface{}, opts ...interface{}) *MockDataNodeClient_CheckChannelOperationProgress_Call { return &MockDataNodeClient_CheckChannelOperationProgress_Call{Call: _e.mock.On("CheckChannelOperationProgress", append([]interface{}{ctx, in}, opts...)...)} @@ -181,9 +181,9 @@ type MockDataNodeClient_Compaction_Call struct { } // Compaction is a helper method to define mock.On call -// - ctx context.Context -// - in *datapb.CompactionPlan -// - opts ...grpc.CallOption +// - ctx context.Context +// - in *datapb.CompactionPlan +// - opts ...grpc.CallOption func (_e *MockDataNodeClient_Expecter) Compaction(ctx interface{}, in interface{}, opts ...interface{}) *MockDataNodeClient_Compaction_Call { return &MockDataNodeClient_Compaction_Call{Call: _e.mock.On("Compaction", append([]interface{}{ctx, in}, opts...)...)} @@ -251,9 +251,9 @@ type MockDataNodeClient_DropImport_Call struct { } // DropImport is a helper method to define mock.On call -// - ctx context.Context -// - in *datapb.DropImportRequest -// - opts ...grpc.CallOption +// - ctx context.Context +// - in *datapb.DropImportRequest +// - opts ...grpc.CallOption func (_e *MockDataNodeClient_Expecter) DropImport(ctx interface{}, in interface{}, opts ...interface{}) *MockDataNodeClient_DropImport_Call { return &MockDataNodeClient_DropImport_Call{Call: _e.mock.On("DropImport", append([]interface{}{ctx, in}, opts...)...)} @@ -321,9 +321,9 @@ type MockDataNodeClient_FlushChannels_Call struct { } // FlushChannels is a helper method to define mock.On call -// - ctx context.Context -// - in *datapb.FlushChannelsRequest -// - opts ...grpc.CallOption +// - ctx context.Context +// - in *datapb.FlushChannelsRequest +// - opts ...grpc.CallOption func (_e *MockDataNodeClient_Expecter) FlushChannels(ctx interface{}, in interface{}, opts ...interface{}) *MockDataNodeClient_FlushChannels_Call { return &MockDataNodeClient_FlushChannels_Call{Call: _e.mock.On("FlushChannels", append([]interface{}{ctx, in}, opts...)...)} @@ -391,9 +391,9 @@ type MockDataNodeClient_FlushSegments_Call struct { } // FlushSegments is a helper method to define mock.On call -// - ctx context.Context -// - in *datapb.FlushSegmentsRequest -// - opts ...grpc.CallOption +// - ctx context.Context +// - in *datapb.FlushSegmentsRequest +// - opts ...grpc.CallOption func (_e *MockDataNodeClient_Expecter) FlushSegments(ctx interface{}, in interface{}, opts ...interface{}) *MockDataNodeClient_FlushSegments_Call { return &MockDataNodeClient_FlushSegments_Call{Call: _e.mock.On("FlushSegments", append([]interface{}{ctx, in}, opts...)...)} @@ -461,9 +461,9 @@ type MockDataNodeClient_GetCompactionState_Call struct { } // GetCompactionState is a helper method to define mock.On call -// - ctx context.Context -// - in *datapb.CompactionStateRequest -// - opts ...grpc.CallOption +// - ctx context.Context +// - in *datapb.CompactionStateRequest +// - opts ...grpc.CallOption func (_e *MockDataNodeClient_Expecter) GetCompactionState(ctx interface{}, in interface{}, opts ...interface{}) *MockDataNodeClient_GetCompactionState_Call { return &MockDataNodeClient_GetCompactionState_Call{Call: _e.mock.On("GetCompactionState", append([]interface{}{ctx, in}, opts...)...)} @@ -531,9 +531,9 @@ type MockDataNodeClient_GetComponentStates_Call struct { } // GetComponentStates is a helper method to define mock.On call -// - ctx context.Context -// - in *milvuspb.GetComponentStatesRequest -// - opts ...grpc.CallOption +// - ctx context.Context +// - in *milvuspb.GetComponentStatesRequest +// - opts ...grpc.CallOption func (_e *MockDataNodeClient_Expecter) GetComponentStates(ctx interface{}, in interface{}, opts ...interface{}) *MockDataNodeClient_GetComponentStates_Call { return &MockDataNodeClient_GetComponentStates_Call{Call: _e.mock.On("GetComponentStates", append([]interface{}{ctx, in}, opts...)...)} @@ -601,9 +601,9 @@ type MockDataNodeClient_GetMetrics_Call struct { } // GetMetrics is a helper method to define mock.On call -// - ctx context.Context -// - in *milvuspb.GetMetricsRequest -// - opts ...grpc.CallOption +// - ctx context.Context +// - in *milvuspb.GetMetricsRequest +// - opts ...grpc.CallOption func (_e *MockDataNodeClient_Expecter) GetMetrics(ctx interface{}, in interface{}, opts ...interface{}) *MockDataNodeClient_GetMetrics_Call { return &MockDataNodeClient_GetMetrics_Call{Call: _e.mock.On("GetMetrics", append([]interface{}{ctx, in}, opts...)...)} @@ -671,9 +671,9 @@ type MockDataNodeClient_GetStatisticsChannel_Call struct { } // GetStatisticsChannel is a helper method to define mock.On call -// - ctx context.Context -// - in *internalpb.GetStatisticsChannelRequest -// - opts ...grpc.CallOption +// - ctx context.Context +// - in *internalpb.GetStatisticsChannelRequest +// - opts ...grpc.CallOption func (_e *MockDataNodeClient_Expecter) GetStatisticsChannel(ctx interface{}, in interface{}, opts ...interface{}) *MockDataNodeClient_GetStatisticsChannel_Call { return &MockDataNodeClient_GetStatisticsChannel_Call{Call: _e.mock.On("GetStatisticsChannel", append([]interface{}{ctx, in}, opts...)...)} @@ -741,9 +741,9 @@ type MockDataNodeClient_ImportV2_Call struct { } // ImportV2 is a helper method to define mock.On call -// - ctx context.Context -// - in *datapb.ImportRequest -// - opts ...grpc.CallOption +// - ctx context.Context +// - in *datapb.ImportRequest +// - opts ...grpc.CallOption func (_e *MockDataNodeClient_Expecter) ImportV2(ctx interface{}, in interface{}, opts ...interface{}) *MockDataNodeClient_ImportV2_Call { return &MockDataNodeClient_ImportV2_Call{Call: _e.mock.On("ImportV2", append([]interface{}{ctx, in}, opts...)...)} @@ -811,9 +811,9 @@ type MockDataNodeClient_NotifyChannelOperation_Call struct { } // NotifyChannelOperation is a helper method to define mock.On call -// - ctx context.Context -// - in *datapb.ChannelOperationsRequest -// - opts ...grpc.CallOption +// - ctx context.Context +// - in *datapb.ChannelOperationsRequest +// - opts ...grpc.CallOption func (_e *MockDataNodeClient_Expecter) NotifyChannelOperation(ctx interface{}, in interface{}, opts ...interface{}) *MockDataNodeClient_NotifyChannelOperation_Call { return &MockDataNodeClient_NotifyChannelOperation_Call{Call: _e.mock.On("NotifyChannelOperation", append([]interface{}{ctx, in}, opts...)...)} @@ -881,9 +881,9 @@ type MockDataNodeClient_PreImport_Call struct { } // PreImport is a helper method to define mock.On call -// - ctx context.Context -// - in *datapb.PreImportRequest -// - opts ...grpc.CallOption +// - ctx context.Context +// - in *datapb.PreImportRequest +// - opts ...grpc.CallOption func (_e *MockDataNodeClient_Expecter) PreImport(ctx interface{}, in interface{}, opts ...interface{}) *MockDataNodeClient_PreImport_Call { return &MockDataNodeClient_PreImport_Call{Call: _e.mock.On("PreImport", append([]interface{}{ctx, in}, opts...)...)} @@ -951,9 +951,9 @@ type MockDataNodeClient_QueryImport_Call struct { } // QueryImport is a helper method to define mock.On call -// - ctx context.Context -// - in *datapb.QueryImportRequest -// - opts ...grpc.CallOption +// - ctx context.Context +// - in *datapb.QueryImportRequest +// - opts ...grpc.CallOption func (_e *MockDataNodeClient_Expecter) QueryImport(ctx interface{}, in interface{}, opts ...interface{}) *MockDataNodeClient_QueryImport_Call { return &MockDataNodeClient_QueryImport_Call{Call: _e.mock.On("QueryImport", append([]interface{}{ctx, in}, opts...)...)} @@ -1021,9 +1021,9 @@ type MockDataNodeClient_QueryPreImport_Call struct { } // QueryPreImport is a helper method to define mock.On call -// - ctx context.Context -// - in *datapb.QueryPreImportRequest -// - opts ...grpc.CallOption +// - ctx context.Context +// - in *datapb.QueryPreImportRequest +// - opts ...grpc.CallOption func (_e *MockDataNodeClient_Expecter) QueryPreImport(ctx interface{}, in interface{}, opts ...interface{}) *MockDataNodeClient_QueryPreImport_Call { return &MockDataNodeClient_QueryPreImport_Call{Call: _e.mock.On("QueryPreImport", append([]interface{}{ctx, in}, opts...)...)} @@ -1091,9 +1091,9 @@ type MockDataNodeClient_QuerySlot_Call struct { } // QuerySlot is a helper method to define mock.On call -// - ctx context.Context -// - in *datapb.QuerySlotRequest -// - opts ...grpc.CallOption +// - ctx context.Context +// - in *datapb.QuerySlotRequest +// - opts ...grpc.CallOption func (_e *MockDataNodeClient_Expecter) QuerySlot(ctx interface{}, in interface{}, opts ...interface{}) *MockDataNodeClient_QuerySlot_Call { return &MockDataNodeClient_QuerySlot_Call{Call: _e.mock.On("QuerySlot", append([]interface{}{ctx, in}, opts...)...)} @@ -1161,9 +1161,9 @@ type MockDataNodeClient_ResendSegmentStats_Call struct { } // ResendSegmentStats is a helper method to define mock.On call -// - ctx context.Context -// - in *datapb.ResendSegmentStatsRequest -// - opts ...grpc.CallOption +// - ctx context.Context +// - in *datapb.ResendSegmentStatsRequest +// - opts ...grpc.CallOption func (_e *MockDataNodeClient_Expecter) ResendSegmentStats(ctx interface{}, in interface{}, opts ...interface{}) *MockDataNodeClient_ResendSegmentStats_Call { return &MockDataNodeClient_ResendSegmentStats_Call{Call: _e.mock.On("ResendSegmentStats", append([]interface{}{ctx, in}, opts...)...)} @@ -1231,9 +1231,9 @@ type MockDataNodeClient_ShowConfigurations_Call struct { } // ShowConfigurations is a helper method to define mock.On call -// - ctx context.Context -// - in *internalpb.ShowConfigurationsRequest -// - opts ...grpc.CallOption +// - ctx context.Context +// - in *internalpb.ShowConfigurationsRequest +// - opts ...grpc.CallOption func (_e *MockDataNodeClient_Expecter) ShowConfigurations(ctx interface{}, in interface{}, opts ...interface{}) *MockDataNodeClient_ShowConfigurations_Call { return &MockDataNodeClient_ShowConfigurations_Call{Call: _e.mock.On("ShowConfigurations", append([]interface{}{ctx, in}, opts...)...)} @@ -1301,9 +1301,9 @@ type MockDataNodeClient_SyncSegments_Call struct { } // SyncSegments is a helper method to define mock.On call -// - ctx context.Context -// - in *datapb.SyncSegmentsRequest -// - opts ...grpc.CallOption +// - ctx context.Context +// - in *datapb.SyncSegmentsRequest +// - opts ...grpc.CallOption func (_e *MockDataNodeClient_Expecter) SyncSegments(ctx interface{}, in interface{}, opts ...interface{}) *MockDataNodeClient_SyncSegments_Call { return &MockDataNodeClient_SyncSegments_Call{Call: _e.mock.On("SyncSegments", append([]interface{}{ctx, in}, opts...)...)} @@ -1371,9 +1371,9 @@ type MockDataNodeClient_WatchDmChannels_Call struct { } // WatchDmChannels is a helper method to define mock.On call -// - ctx context.Context -// - in *datapb.WatchDmChannelsRequest -// - opts ...grpc.CallOption +// - ctx context.Context +// - in *datapb.WatchDmChannelsRequest +// - opts ...grpc.CallOption func (_e *MockDataNodeClient_Expecter) WatchDmChannels(ctx interface{}, in interface{}, opts ...interface{}) *MockDataNodeClient_WatchDmChannels_Call { return &MockDataNodeClient_WatchDmChannels_Call{Call: _e.mock.On("WatchDmChannels", append([]interface{}{ctx, in}, opts...)...)} diff --git a/internal/mq/msgstream/mqwrapper/rmq/rocksmq_msgstream_test.go b/internal/mq/msgstream/mqwrapper/rmq/rocksmq_msgstream_test.go index e29171d1437a5..96be3628c9056 100644 --- a/internal/mq/msgstream/mqwrapper/rmq/rocksmq_msgstream_test.go +++ b/internal/mq/msgstream/mqwrapper/rmq/rocksmq_msgstream_test.go @@ -240,7 +240,7 @@ func TestMqMsgStream_SeekNotSubscribed(t *testing.T) { ChannelName: "b", }, } - err = m.Seek(context.Background(), p) + err = m.Seek(context.Background(), p, false) assert.Error(t, err) } @@ -403,7 +403,7 @@ func TestStream_RmqTtMsgStream_DuplicatedIDs(t *testing.T) { outputStream, _ = msgstream.NewMqTtMsgStream(context.Background(), 100, 100, rmqClient, factory.NewUnmarshalDispatcher()) consumerSubName = funcutil.RandomString(8) outputStream.AsConsumer(ctx, consumerChannels, consumerSubName, mqwrapper.SubscriptionPositionUnknown) - outputStream.Seek(ctx, receivedMsg.StartPositions) + outputStream.Seek(ctx, receivedMsg.StartPositions, false) seekMsg := consumer(ctx, outputStream) assert.Equal(t, len(seekMsg.Msgs), 1+2) assert.EqualValues(t, seekMsg.Msgs[0].BeginTs(), 1) @@ -506,7 +506,7 @@ func TestStream_RmqTtMsgStream_Seek(t *testing.T) { consumerSubName = funcutil.RandomString(8) outputStream.AsConsumer(ctx, consumerChannels, consumerSubName, mqwrapper.SubscriptionPositionUnknown) - outputStream.Seek(ctx, receivedMsg3.StartPositions) + outputStream.Seek(ctx, receivedMsg3.StartPositions, false) seekMsg := consumer(ctx, outputStream) assert.Equal(t, len(seekMsg.Msgs), 3) result := []uint64{14, 12, 13} @@ -565,7 +565,7 @@ func TestStream_RMqMsgStream_SeekInvalidMessage(t *testing.T) { }, } - err = outputStream2.Seek(ctx, p) + err = outputStream2.Seek(ctx, p, false) assert.NoError(t, err) for i := 10; i < 20; i++ { diff --git a/internal/proto/cgo_msg.proto b/internal/proto/cgo_msg.proto new file mode 100644 index 0000000000000..6d851e95e0550 --- /dev/null +++ b/internal/proto/cgo_msg.proto @@ -0,0 +1,23 @@ +syntax = "proto3"; + +package milvus.proto.cgo; +option go_package="github.com/milvus-io/milvus/internal/proto/cgopb"; + +import "schema.proto"; + +message LoadIndexInfo { + int64 collectionID = 1; + int64 partitionID = 2; + int64 segmentID = 3; + schema.FieldSchema field = 5; + bool enable_mmap = 6; + string mmap_dir_path = 7; + int64 indexID = 8; + int64 index_buildID = 9; + int64 index_version = 10; + map index_params = 11; + repeated string index_files = 12; + string uri = 13; + int64 index_store_version = 14; + int32 index_engine_version = 15; +} diff --git a/internal/proto/data_coord.proto b/internal/proto/data_coord.proto index ecb29e3be162c..3c1e97a24d645 100644 --- a/internal/proto/data_coord.proto +++ b/internal/proto/data_coord.proto @@ -496,15 +496,29 @@ message CompactionStateRequest { common.MsgBase base = 1; } +message SyncSegmentInfo { + int64 segment_id = 1; + FieldBinlog pk_stats_log = 2; + common.SegmentState state = 3; + SegmentLevel level = 4; + int64 num_of_rows = 5; +} + message SyncSegmentsRequest { + // Deprecated, after v2.4.3 int64 planID = 1; + // Deprecated, after v2.4.3 int64 compacted_to = 2; + // Deprecated, after v2.4.3 int64 num_of_rows = 3; + // Deprecated, after v2.4.3 repeated int64 compacted_from = 4; + // Deprecated, after v2.4.3 repeated FieldBinlog stats_logs = 5; string channel_name = 6; int64 partition_id = 7; int64 collection_id = 8; + map segment_infos = 9; } message CompactionSegmentBinlogs { @@ -528,6 +542,7 @@ message CompactionPlan { string channel = 7; int64 collection_ttl = 8; int64 total_rows = 9; + schema.CollectionSchema schema = 10; } message CompactionSegment { diff --git a/internal/proto/index_cgo_msg.proto b/internal/proto/index_cgo_msg.proto index 50b1ea5dde5a5..688f871f55aed 100644 --- a/internal/proto/index_cgo_msg.proto +++ b/internal/proto/index_cgo_msg.proto @@ -4,6 +4,7 @@ package milvus.proto.indexcgo; option go_package="github.com/milvus-io/milvus/internal/proto/indexcgopb"; import "common.proto"; +import "schema.proto"; message TypeParams { repeated common.KeyValuePair params = 1; @@ -30,3 +31,52 @@ message Binary { message BinarySet { repeated Binary datas = 1; } + +// Synchronously modify StorageConfig in index_coord.proto file +message StorageConfig { + string address = 1; + string access_keyID = 2; + string secret_access_key = 3; + bool useSSL = 4; + string bucket_name = 5; + string root_path = 6; + bool useIAM = 7; + string IAMEndpoint = 8; + string storage_type = 9; + bool use_virtual_host = 10; + string region = 11; + string cloud_provider = 12; + int64 request_timeout_ms = 13; + string sslCACert = 14; +} + +// Synchronously modify OptionalFieldInfo in index_coord.proto file +message OptionalFieldInfo { + int64 fieldID = 1; + string field_name = 2; + int32 field_type = 3; + repeated string data_paths = 4; +} + +message BuildIndexInfo { + string clusterID = 1; + int64 buildID = 2; + int64 collectionID = 3; + int64 partitionID = 4; + int64 segmentID = 5; + int64 index_version = 6; + int32 current_index_version = 7; + int64 num_rows = 8; + int64 dim = 9; + string index_file_prefix = 10; + repeated string insert_files = 11; +// repeated int64 data_ids = 12; + schema.FieldSchema field_schema = 12; + StorageConfig storage_config = 13; + repeated common.KeyValuePair index_params = 14; + repeated common.KeyValuePair type_params = 15; + string store_path = 16; + int64 store_version = 17; + string index_store_path = 18; + repeated OptionalFieldInfo opt_fields = 19; +} diff --git a/internal/proto/index_coord.proto b/internal/proto/index_coord.proto index d59452b17d2de..0c0cea0361100 100644 --- a/internal/proto/index_coord.proto +++ b/internal/proto/index_coord.proto @@ -226,6 +226,7 @@ message GetIndexBuildProgressResponse { int64 pending_index_rows = 4; } +// Synchronously modify StorageConfig in index_cgo_msg.proto file message StorageConfig { string address = 1; string access_keyID = 2; @@ -243,6 +244,7 @@ message StorageConfig { string sslCACert = 14; } +// Synchronously modify OptionalFieldInfo in index_cgo_msg.proto file message OptionalFieldInfo { int64 fieldID = 1; string field_name = 2; @@ -276,6 +278,7 @@ message CreateJobRequest { int64 dim = 22; repeated int64 data_ids = 23; repeated OptionalFieldInfo optional_scalar_fields = 24; + schema.FieldSchema field = 25; } message QueryJobsRequest { diff --git a/internal/proto/internal.proto b/internal/proto/internal.proto index 6715af58d92d2..980cf3576989c 100644 --- a/internal/proto/internal.proto +++ b/internal/proto/internal.proto @@ -198,6 +198,7 @@ message RetrieveResults { // query request cost CostAggregation costAggregation = 13; int64 all_retrieve_count = 14; + bool has_more_result = 15; } message LoadIndex { diff --git a/internal/proto/segcore.proto b/internal/proto/segcore.proto index ea7697f48c98d..aaf502bc1ec06 100644 --- a/internal/proto/segcore.proto +++ b/internal/proto/segcore.proto @@ -10,6 +10,7 @@ message RetrieveResults { repeated int64 offset = 2; repeated schema.FieldData fields_data = 3; int64 all_retrieve_count = 4; + bool has_more_result = 5; } message LoadFieldMeta { diff --git a/internal/proxy/accesslog/formater_test.go b/internal/proxy/accesslog/formater_test.go index 4a231a8eeeb98..e9e2f92d24aec 100644 --- a/internal/proxy/accesslog/formater_test.go +++ b/internal/proxy/accesslog/formater_test.go @@ -32,7 +32,7 @@ import ( "github.com/milvus-io/milvus-proto/go-api/v2/milvuspb" "github.com/milvus-io/milvus/internal/proxy/accesslog/info" - "github.com/milvus-io/milvus/pkg/log" + "github.com/milvus-io/milvus/pkg/tracer" "github.com/milvus-io/milvus/pkg/util" "github.com/milvus-io/milvus/pkg/util/crypto" "github.com/milvus-io/milvus/pkg/util/merr" @@ -153,16 +153,15 @@ func (s *LogFormatterSuite) TestFormatMethodInfo() { for _, req := range s.reqs { i := info.NewGrpcAccessInfo(metaContext, s.serverinfo, req) fs := formatter.Format(i) - log.Info(fs) s.True(strings.Contains(fs, s.traceID)) } + tracer.Init() traceContext, traceSpan := otel.Tracer(typeutil.ProxyRole).Start(s.ctx, "test") trueTraceID := traceSpan.SpanContext().TraceID().String() for _, req := range s.reqs { i := info.NewGrpcAccessInfo(traceContext, s.serverinfo, req) fs := formatter.Format(i) - log.Info(fs) s.True(strings.Contains(fs, trueTraceID)) } } diff --git a/internal/proxy/accesslog/info/grpc_info.go b/internal/proxy/accesslog/info/grpc_info.go index 56b737c02acb6..9d94078f72bfb 100644 --- a/internal/proxy/accesslog/info/grpc_info.go +++ b/internal/proxy/accesslog/info/grpc_info.go @@ -33,7 +33,6 @@ import ( "github.com/milvus-io/milvus-proto/go-api/v2/milvuspb" "github.com/milvus-io/milvus/internal/proxy/connection" "github.com/milvus-io/milvus/pkg/util/merr" - "github.com/milvus-io/milvus/pkg/util/paramtable" "github.com/milvus-io/milvus/pkg/util/requestutil" ) @@ -129,6 +128,10 @@ func (i *GrpcAccessInfo) TraceID() string { } traceID := trace.SpanFromContext(i.ctx).SpanContext().TraceID() + if !traceID.IsValid() { + return Unknown + } + return traceID.String() } @@ -252,10 +255,6 @@ func (i *GrpcAccessInfo) SdkVersion() string { return getSdkVersionByUserAgent(i.ctx) } -func (i *GrpcAccessInfo) ClusterPrefix() string { - return paramtable.Get().CommonCfg.ClusterPrefix.GetValue() -} - func (i *GrpcAccessInfo) OutputFields() string { fields, ok := requestutil.GetOutputFieldsFromRequest(i.req) if ok { diff --git a/internal/proxy/accesslog/info/restful_info.go b/internal/proxy/accesslog/info/restful_info.go new file mode 100644 index 0000000000000..cd7e4eba3beda --- /dev/null +++ b/internal/proxy/accesslog/info/restful_info.go @@ -0,0 +1,189 @@ +// Licensed to the LF AI & Data foundation under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package info + +import ( + "fmt" + "net/http" + "sync" + "time" + + "github.com/gin-gonic/gin" + + "github.com/milvus-io/milvus/pkg/util/requestutil" +) + +const ( + ContextUsername = "username" + ContextReturnCode = "code" + ContextReturnMessage = "message" + ContextRequest = "request" +) + +type RestfulInfo struct { + params *gin.LogFormatterParams + start time.Time + req interface{} + reqInitOnce sync.Once +} + +func NewRestfulInfo() *RestfulInfo { + return &RestfulInfo{start: time.Now()} +} + +func (i *RestfulInfo) SetParams(p *gin.LogFormatterParams) { + i.params = p +} + +func (i *RestfulInfo) InitReq() { + req, ok := i.params.Keys[ContextRequest] + if !ok { + return + } + i.req = req +} + +func (i *RestfulInfo) TimeCost() string { + return fmt.Sprint(i.params.Latency) +} + +func (i *RestfulInfo) TimeNow() string { + return time.Now().Format(timeFormat) +} + +func (i *RestfulInfo) TimeStart() string { + if i.start.IsZero() { + return Unknown + } + return i.start.Format(timeFormat) +} + +func (i *RestfulInfo) TimeEnd() string { + return i.params.TimeStamp.Format(timeFormat) +} + +func (i *RestfulInfo) MethodName() string { + return i.params.Path +} + +func (i *RestfulInfo) Address() string { + return i.params.ClientIP +} + +func (i *RestfulInfo) TraceID() string { + traceID, ok := i.params.Keys["traceID"] + if !ok { + return Unknown + } + return traceID.(string) +} + +func (i *RestfulInfo) MethodStatus() string { + if i.params.StatusCode != http.StatusOK { + return fmt.Sprintf("HttpError%d", i.params.StatusCode) + } + + if code, ok := i.params.Keys[ContextReturnCode]; !ok || code.(int32) != 0 { + return "Failed" + } + + return "Successful" +} + +func (i *RestfulInfo) UserName() string { + username, ok := i.params.Keys[ContextUsername] + if !ok || username == "" { + return Unknown + } + + return username.(string) +} + +func (i *RestfulInfo) ResponseSize() string { + return fmt.Sprint(i.params.BodySize) +} + +func (i *RestfulInfo) ErrorCode() string { + code, ok := i.params.Keys[ContextReturnCode] + if !ok { + return Unknown + } + return fmt.Sprint(code) +} + +func (i *RestfulInfo) ErrorMsg() string { + message, ok := i.params.Keys[ContextReturnMessage] + if !ok { + return "" + } + return fmt.Sprint(message) +} + +func (i *RestfulInfo) SdkVersion() string { + return "Restful" +} + +func (i *RestfulInfo) DbName() string { + name, ok := requestutil.GetDbNameFromRequest(i.req) + if !ok { + return Unknown + } + return name.(string) +} + +func (i *RestfulInfo) CollectionName() string { + name, ok := requestutil.GetCollectionNameFromRequest(i.req) + if !ok { + return Unknown + } + return name.(string) +} + +func (i *RestfulInfo) PartitionName() string { + name, ok := requestutil.GetPartitionNameFromRequest(i.req) + if ok { + return name.(string) + } + + names, ok := requestutil.GetPartitionNamesFromRequest(i.req) + if ok { + return fmt.Sprint(names.([]string)) + } + + return Unknown +} + +func (i *RestfulInfo) Expression() string { + expr, ok := requestutil.GetExprFromRequest(i.req) + if ok { + return expr.(string) + } + + dsl, ok := requestutil.GetDSLFromRequest(i.req) + if ok { + return dsl.(string) + } + return Unknown +} + +func (i *RestfulInfo) OutputFields() string { + fields, ok := requestutil.GetOutputFieldsFromRequest(i.req) + if ok { + return fmt.Sprint(fields.([]string)) + } + return Unknown +} diff --git a/internal/proxy/accesslog/info/restful_info_test.go b/internal/proxy/accesslog/info/restful_info_test.go new file mode 100644 index 0000000000000..8a12ad1e93246 --- /dev/null +++ b/internal/proxy/accesslog/info/restful_info_test.go @@ -0,0 +1,192 @@ +// Licensed to the LF AI & Data foundation under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package info + +import ( + "fmt" + "net/http" + "testing" + "time" + + "github.com/gin-gonic/gin" + "github.com/stretchr/testify/suite" + + "github.com/milvus-io/milvus-proto/go-api/v2/milvuspb" + "github.com/milvus-io/milvus/pkg/util/merr" + "github.com/milvus-io/milvus/pkg/util/paramtable" +) + +type RestfulAccessInfoSuite struct { + suite.Suite + + username string + traceID string + info *RestfulInfo +} + +func (s *RestfulAccessInfoSuite) SetupSuite() { + paramtable.Init() +} + +func (s *RestfulAccessInfoSuite) SetupTest() { + s.username = "test-user" + s.traceID = "test-trace" + s.info = &RestfulInfo{} + s.info.SetParams( + &gin.LogFormatterParams{ + Keys: make(map[string]any), + }) +} + +func (s *RestfulAccessInfoSuite) TestTimeCost() { + s.info.params.Latency = time.Second + result := Get(s.info, "$time_cost") + s.Equal(fmt.Sprint(time.Second), result[0]) +} + +func (s *RestfulAccessInfoSuite) TestTimeNow() { + result := Get(s.info, "$time_now") + s.NotEqual(Unknown, result[0]) +} + +func (s *RestfulAccessInfoSuite) TestTimeStart() { + result := Get(s.info, "$time_start") + s.Equal(Unknown, result[0]) + + s.info.start = time.Now() + result = Get(s.info, "$time_start") + s.Equal(s.info.start.Format(timeFormat), result[0]) +} + +func (s *RestfulAccessInfoSuite) TestTimeEnd() { + s.info.params.TimeStamp = time.Now() + result := Get(s.info, "$time_end") + s.Equal(s.info.params.TimeStamp.Format(timeFormat), result[0]) +} + +func (s *RestfulAccessInfoSuite) TestMethodName() { + s.info.params.Path = "/restful/test" + result := Get(s.info, "$method_name") + s.Equal(s.info.params.Path, result[0]) +} + +func (s *RestfulAccessInfoSuite) TestAddress() { + s.info.params.ClientIP = "127.0.0.1" + result := Get(s.info, "$user_addr") + s.Equal(s.info.params.ClientIP, result[0]) +} + +func (s *RestfulAccessInfoSuite) TestTraceID() { + result := Get(s.info, "$trace_id") + s.Equal(Unknown, result[0]) + + s.info.params.Keys["traceID"] = "testtrace" + result = Get(s.info, "$trace_id") + s.Equal(s.info.params.Keys["traceID"], result[0]) +} + +func (s *RestfulAccessInfoSuite) TestStatus() { + s.info.params.StatusCode = http.StatusBadRequest + result := Get(s.info, "$method_status") + s.Equal("HttpError400", result[0]) + + s.info.params.StatusCode = http.StatusOK + s.info.params.Keys[ContextReturnCode] = merr.Code(merr.ErrChannelLack) + result = Get(s.info, "$method_status") + s.Equal("Failed", result[0]) + + s.info.params.StatusCode = http.StatusOK + s.info.params.Keys[ContextReturnCode] = merr.Code(nil) + result = Get(s.info, "$method_status") + s.Equal("Successful", result[0]) +} + +func (s *RestfulAccessInfoSuite) TestErrorCode() { + result := Get(s.info, "$error_code") + s.Equal(Unknown, result[0]) + + s.info.params.Keys[ContextReturnCode] = 200 + result = Get(s.info, "$error_code") + s.Equal(fmt.Sprint(200), result[0]) +} + +func (s *RestfulAccessInfoSuite) TestErrorMsg() { + s.info.params.Keys[ContextReturnMessage] = merr.ErrChannelLack.Error() + result := Get(s.info, "$error_msg") + s.Equal(merr.ErrChannelLack.Error(), result[0]) +} + +func (s *RestfulAccessInfoSuite) TestDbName() { + result := Get(s.info, "$database_name") + s.Equal(Unknown, result[0]) + + req := &milvuspb.QueryRequest{ + DbName: "test", + } + s.info.req = req + result = Get(s.info, "$database_name") + s.Equal("test", result[0]) +} + +func (s *RestfulAccessInfoSuite) TestSdkInfo() { + result := Get(s.info, "$sdk_version") + s.Equal("Restful", result[0]) +} + +func (s *RestfulAccessInfoSuite) TestExpression() { + result := Get(s.info, "$method_expr") + s.Equal(Unknown, result[0]) + + testExpr := "test" + s.info.req = &milvuspb.QueryRequest{ + Expr: testExpr, + } + result = Get(s.info, "$method_expr") + s.Equal(testExpr, result[0]) + + s.info.req = &milvuspb.SearchRequest{ + Dsl: testExpr, + } + result = Get(s.info, "$method_expr") + s.Equal(testExpr, result[0]) +} + +func (s *RestfulAccessInfoSuite) TestOutputFields() { + result := Get(s.info, "$output_fields") + s.Equal(Unknown, result[0]) + + fields := []string{"pk"} + s.info.params.Keys[ContextRequest] = &milvuspb.QueryRequest{ + OutputFields: fields, + } + s.info.InitReq() + result = Get(s.info, "$output_fields") + s.Equal(fmt.Sprint(fields), result[0]) +} + +func (s *RestfulAccessInfoSuite) TestClusterPrefix() { + cluster := "instance-test" + paramtable.Init() + ClusterPrefix.Store(cluster) + + result := Get(s.info, "$cluster_prefix") + s.Equal(cluster, result[0]) +} + +func TestRestfulAccessInfo(t *testing.T) { + suite.Run(t, new(RestfulAccessInfoSuite)) +} diff --git a/internal/proxy/accesslog/util.go b/internal/proxy/accesslog/util.go index a0f35d74c7ea1..6e8f4a656b058 100644 --- a/internal/proxy/accesslog/util.go +++ b/internal/proxy/accesslog/util.go @@ -22,6 +22,7 @@ import ( "time" "github.com/cockroachdb/errors" + "github.com/gin-gonic/gin" "google.golang.org/grpc" "github.com/milvus-io/milvus/internal/proxy/accesslog/info" @@ -29,6 +30,8 @@ import ( type AccessKey struct{} +const ContextLogKey = "accesslog" + func UnaryAccessLogInterceptor(ctx context.Context, req any, rpcInfo *grpc.UnaryServerInfo, handler grpc.UnaryHandler) (interface{}, error) { accessInfo := info.NewGrpcAccessInfo(ctx, rpcInfo, req) newCtx := context.WithValue(ctx, AccessKey{}, accessInfo) @@ -44,6 +47,24 @@ func UnaryUpdateAccessInfoInterceptor(ctx context.Context, req any, rpcInfonfo * return handler(ctx, req) } +func AccessLogMiddleware(ctx *gin.Context) { + accessInfo := info.NewRestfulInfo() + ctx.Set(ContextLogKey, accessInfo) + ctx.Next() + accessInfo.InitReq() + _globalL.Write(accessInfo) +} + +func SetHTTPParams(p *gin.LogFormatterParams) { + value, ok := p.Keys[ContextLogKey] + if !ok { + return + } + + info := value.(*info.RestfulInfo) + info.SetParams(p) +} + func join(path1, path2 string) string { if strings.HasSuffix(path1, "/") { return path1 + path2 diff --git a/internal/proxy/accesslog/writer.go b/internal/proxy/accesslog/writer.go index b0784bdc2f3e1..5aad0acd6df3d 100644 --- a/internal/proxy/accesslog/writer.go +++ b/internal/proxy/accesslog/writer.go @@ -113,12 +113,15 @@ func (l *CacheWriter) Start() { } func (l *CacheWriter) Close() { - l.mu.Lock() - defer l.mu.Unlock() l.closeOnce.Do(func() { - l.closed = true + // close auto flush close(l.closeCh) l.closeWg.Wait() + + l.mu.Lock() + defer l.mu.Unlock() + l.closed = true + // flush remaining bytes l.writer.Flush() diff --git a/internal/proxy/impl.go b/internal/proxy/impl.go index c0f520097b801..91fe1244b80fc 100644 --- a/internal/proxy/impl.go +++ b/internal/proxy/impl.go @@ -42,6 +42,7 @@ import ( "github.com/milvus-io/milvus/internal/proto/proxypb" "github.com/milvus-io/milvus/internal/proto/querypb" "github.com/milvus-io/milvus/internal/proxy/connection" + "github.com/milvus-io/milvus/internal/types" "github.com/milvus-io/milvus/internal/util/hookutil" "github.com/milvus-io/milvus/internal/util/importutilv2" "github.com/milvus-io/milvus/pkg/common" @@ -2642,6 +2643,11 @@ func (node *Proxy) Delete(ctx context.Context, request *milvuspb.DeleteRequest) metrics.ProxyFunctionCall.WithLabelValues(strconv.FormatInt(paramtable.GetNodeID(), 10), method, metrics.TotalLabel, request.GetDbName(), request.GetCollectionName()).Inc() + var limiter types.Limiter + if node.enableComplexDeleteLimit { + limiter, _ = node.GetRateLimiter() + } + dr := &deleteRunner{ req: request, idAllocator: node.rowIDAllocator, @@ -2650,6 +2656,7 @@ func (node *Proxy) Delete(ctx context.Context, request *milvuspb.DeleteRequest) chTicker: node.chTicker, queue: node.sched.dmQueue, lb: node.lbPolicy, + limiter: limiter, } log.Debug("init delete runner in Proxy") @@ -3408,21 +3415,8 @@ func (node *Proxy) Flush(ctx context.Context, request *milvuspb.FlushRequest) (* // Query get the records by primary keys. func (node *Proxy) query(ctx context.Context, qt *queryTask) (*milvuspb.QueryResults, error) { request := qt.request - receiveSize := proto.Size(request) - metrics.ProxyReceiveBytes.WithLabelValues( - strconv.FormatInt(paramtable.GetNodeID(), 10), - metrics.QueryLabel, - request.GetCollectionName(), - ).Add(float64(receiveSize)) - - metrics.ProxyReceivedNQ.WithLabelValues( - strconv.FormatInt(paramtable.GetNodeID(), 10), - metrics.SearchLabel, - request.GetCollectionName(), - ).Add(float64(1)) - - subLabel := GetCollectionRateSubLabel(request) - rateCol.Add(internalpb.RateType_DQLQuery.String(), 1, subLabel) + method := "Query" + isProxyRequest := GetRequestLabelFromContext(ctx) if err := merr.CheckHealthy(node.GetStateCode()); err != nil { return &milvuspb.QueryResults{ @@ -3430,20 +3424,6 @@ func (node *Proxy) query(ctx context.Context, qt *queryTask) (*milvuspb.QueryRes }, nil } - ctx, sp := otel.Tracer(typeutil.ProxyRole).Start(ctx, "Proxy-Query") - defer sp.End() - tr := timerecord.NewTimeRecorder("Query") - - method := "Query" - - metrics.ProxyFunctionCall.WithLabelValues( - strconv.FormatInt(paramtable.GetNodeID(), 10), - method, - metrics.TotalLabel, - request.GetDbName(), - request.GetCollectionName(), - ).Inc() - log := log.Ctx(ctx).With( zap.String("role", typeutil.ProxyRole), zap.String("db", request.DbName), @@ -3451,6 +3431,16 @@ func (node *Proxy) query(ctx context.Context, qt *queryTask) (*milvuspb.QueryRes zap.Strings("partitions", request.PartitionNames), ) + log.Debug( + rpcReceived(method), + zap.String("expr", request.Expr), + zap.Strings("OutputFields", request.OutputFields), + zap.Uint64("travel_timestamp", request.TravelTimestamp), + zap.Uint64("guarantee_timestamp", request.GuaranteeTimestamp), + ) + + tr := timerecord.NewTimeRecorder(method) + defer func() { span := tr.ElapseSpan() if span >= paramtable.Get().ProxyCfg.SlowQuerySpanInSeconds.GetAsDuration(time.Second) { @@ -3468,27 +3458,21 @@ func (node *Proxy) query(ctx context.Context, qt *queryTask) (*milvuspb.QueryRes } }() - log.Debug( - rpcReceived(method), - zap.String("expr", request.Expr), - zap.Strings("OutputFields", request.OutputFields), - zap.Uint64("travel_timestamp", request.TravelTimestamp), - zap.Uint64("guarantee_timestamp", request.GuaranteeTimestamp), - ) - if err := node.sched.dqQueue.Enqueue(qt); err != nil { log.Warn( rpcFailedToEnqueue(method), zap.Error(err), ) - metrics.ProxyFunctionCall.WithLabelValues( - strconv.FormatInt(paramtable.GetNodeID(), 10), - method, - metrics.AbandonLabel, - request.GetDbName(), - request.GetCollectionName(), - ).Inc() + if isProxyRequest { + metrics.ProxyFunctionCall.WithLabelValues( + strconv.FormatInt(paramtable.GetNodeID(), 10), + method, + metrics.AbandonLabel, + request.GetDbName(), + request.GetCollectionName(), + ).Inc() + } return &milvuspb.QueryResults{ Status: merr.Status(err), @@ -3503,45 +3487,36 @@ func (node *Proxy) query(ctx context.Context, qt *queryTask) (*milvuspb.QueryRes rpcFailedToWaitToFinish(method), zap.Error(err)) - metrics.ProxyFunctionCall.WithLabelValues(strconv.FormatInt(paramtable.GetNodeID(), 10), method, - metrics.FailLabel, request.GetDbName(), request.GetCollectionName()).Inc() + if isProxyRequest { + metrics.ProxyFunctionCall.WithLabelValues(strconv.FormatInt(paramtable.GetNodeID(), 10), method, + metrics.FailLabel, request.GetDbName(), request.GetCollectionName()).Inc() + } return &milvuspb.QueryResults{ Status: merr.Status(err), }, nil } - span := tr.CtxRecord(ctx, "wait query result") - metrics.ProxyWaitForSearchResultLatency.WithLabelValues( - strconv.FormatInt(paramtable.GetNodeID(), 10), - metrics.QueryLabel, - ).Observe(float64(span.Milliseconds())) - - log.Debug(rpcDone(method)) - metrics.ProxyFunctionCall.WithLabelValues( - strconv.FormatInt(paramtable.GetNodeID(), 10), - method, - metrics.SuccessLabel, - request.GetDbName(), - request.GetCollectionName(), - ).Inc() - - metrics.ProxySQLatency.WithLabelValues( - strconv.FormatInt(paramtable.GetNodeID(), 10), - metrics.QueryLabel, - request.GetDbName(), - request.GetCollectionName(), - ).Observe(float64(tr.ElapseSpan().Milliseconds())) + if isProxyRequest { + span := tr.CtxRecord(ctx, "wait query result") + metrics.ProxyWaitForSearchResultLatency.WithLabelValues( + strconv.FormatInt(paramtable.GetNodeID(), 10), + metrics.QueryLabel, + ).Observe(float64(span.Milliseconds())) - metrics.ProxyCollectionSQLatency.WithLabelValues( - strconv.FormatInt(paramtable.GetNodeID(), 10), - metrics.QueryLabel, - request.CollectionName, - ).Observe(float64(tr.ElapseSpan().Milliseconds())) + metrics.ProxySQLatency.WithLabelValues( + strconv.FormatInt(paramtable.GetNodeID(), 10), + metrics.QueryLabel, + request.GetDbName(), + request.GetCollectionName(), + ).Observe(float64(tr.ElapseSpan().Milliseconds())) - sentSize := proto.Size(qt.result) - rateCol.Add(metricsinfo.ReadResultThroughput, float64(sentSize), subLabel) - metrics.ProxyReadReqSendBytes.WithLabelValues(strconv.FormatInt(paramtable.GetNodeID(), 10)).Add(float64(sentSize)) + metrics.ProxyCollectionSQLatency.WithLabelValues( + strconv.FormatInt(paramtable.GetNodeID(), 10), + metrics.QueryLabel, + request.CollectionName, + ).Observe(float64(tr.ElapseSpan().Milliseconds())) + } return qt.result, nil } @@ -3563,22 +3538,73 @@ func (node *Proxy) Query(ctx context.Context, request *milvuspb.QueryRequest) (* lb: node.lbPolicy, mustUsePartitionKey: Params.ProxyCfg.MustUsePartitionKey.GetAsBool(), } + + subLabel := GetCollectionRateSubLabel(request) + receiveSize := proto.Size(request) + metrics.ProxyReceiveBytes.WithLabelValues( + strconv.FormatInt(paramtable.GetNodeID(), 10), + metrics.QueryLabel, + request.GetCollectionName(), + ).Add(float64(receiveSize)) + metrics.ProxyReceivedNQ.WithLabelValues( + strconv.FormatInt(paramtable.GetNodeID(), 10), + metrics.SearchLabel, + request.GetCollectionName(), + ).Add(float64(1)) + + rateCol.Add(internalpb.RateType_DQLQuery.String(), 1, subLabel) + + if err := merr.CheckHealthy(node.GetStateCode()); err != nil { + return &milvuspb.QueryResults{ + Status: merr.Status(err), + }, nil + } + + ctx, sp := otel.Tracer(typeutil.ProxyRole).Start(ctx, "Proxy-Query") + defer sp.End() + method := "Query" + + metrics.ProxyFunctionCall.WithLabelValues( + strconv.FormatInt(paramtable.GetNodeID(), 10), + method, + metrics.TotalLabel, + request.GetDbName(), + request.GetCollectionName(), + ).Inc() + + ctx = SetRequestLabelForContext(ctx) res, err := node.query(ctx, qt) - if merr.Ok(res.Status) && err == nil { - username := GetCurUserFromContextOrDefault(ctx) - nodeID := paramtable.GetStringNodeID() - v := Extension.Report(map[string]any{ - hookutil.OpTypeKey: hookutil.OpTypeQuery, - hookutil.DatabaseKey: request.DbName, - hookutil.UsernameKey: username, - hookutil.ResultDataSizeKey: proto.Size(res), - hookutil.RelatedDataSizeKey: qt.totalRelatedDataSize, - hookutil.RelatedCntKey: qt.allQueryCnt, - }) - SetReportValue(res.Status, v) - metrics.ProxyReportValue.WithLabelValues(nodeID, hookutil.OpTypeQuery, request.DbName, username).Add(float64(v)) + if err != nil || !merr.Ok(res.Status) { + return res, err } - return res, err + + log.Debug(rpcDone(method)) + + metrics.ProxyFunctionCall.WithLabelValues( + strconv.FormatInt(paramtable.GetNodeID(), 10), + method, + metrics.SuccessLabel, + request.GetDbName(), + request.GetCollectionName(), + ).Inc() + + sentSize := proto.Size(qt.result) + rateCol.Add(metricsinfo.ReadResultThroughput, float64(sentSize), subLabel) + metrics.ProxyReadReqSendBytes.WithLabelValues(strconv.FormatInt(paramtable.GetNodeID(), 10)).Add(float64(sentSize)) + + username := GetCurUserFromContextOrDefault(ctx) + nodeID := paramtable.GetStringNodeID() + v := Extension.Report(map[string]any{ + hookutil.OpTypeKey: hookutil.OpTypeQuery, + hookutil.DatabaseKey: request.DbName, + hookutil.UsernameKey: username, + hookutil.ResultDataSizeKey: proto.Size(res), + hookutil.RelatedDataSizeKey: qt.totalRelatedDataSize, + hookutil.RelatedCntKey: qt.allQueryCnt, + }) + SetReportValue(res.Status, v) + metrics.ProxyReportValue.WithLabelValues(nodeID, hookutil.OpTypeQuery, request.DbName, username).Add(float64(v)) + return res, nil } // CreateAlias create alias for collection, then you can search the collection with alias. @@ -6100,7 +6126,11 @@ func (node *Proxy) ImportV2(ctx context.Context, req *internalpb.ImportRequest) resp.Status = merr.Status(err) return resp, nil } - partitionIDs = lo.Values(partitions) + _, partitionIDs, err = typeutil.RearrangePartitionsForPartitionKey(partitions) + if err != nil { + resp.Status = merr.Status(err) + return resp, nil + } } else { if req.GetPartitionName() == "" { req.PartitionName = Params.CommonCfg.DefaultPartitionName.GetValue() diff --git a/internal/proxy/impl_test.go b/internal/proxy/impl_test.go index 9577ee9b6d80d..883e44136d778 100644 --- a/internal/proxy/impl_test.go +++ b/internal/proxy/impl_test.go @@ -80,7 +80,7 @@ func TestProxy_InvalidateCollectionMetaCache_remove_stream(t *testing.T) { func TestProxy_CheckHealth(t *testing.T) { t.Run("not healthy", func(t *testing.T) { node := &Proxy{session: &sessionutil.Session{SessionRaw: sessionutil.SessionRaw{ServerID: 1}}} - node.simpleLimiter = NewSimpleLimiter() + node.simpleLimiter = NewSimpleLimiter(0, 0) node.UpdateStateCode(commonpb.StateCode_Abnormal) ctx := context.Background() resp, err := node.CheckHealth(ctx, &milvuspb.CheckHealthRequest{}) @@ -98,7 +98,7 @@ func TestProxy_CheckHealth(t *testing.T) { dataCoord: NewDataCoordMock(), session: &sessionutil.Session{SessionRaw: sessionutil.SessionRaw{ServerID: 1}}, } - node.simpleLimiter = NewSimpleLimiter() + node.simpleLimiter = NewSimpleLimiter(0, 0) node.UpdateStateCode(commonpb.StateCode_Healthy) ctx := context.Background() resp, err := node.CheckHealth(ctx, &milvuspb.CheckHealthRequest{}) @@ -131,7 +131,7 @@ func TestProxy_CheckHealth(t *testing.T) { queryCoord: qc, dataCoord: dataCoordMock, } - node.simpleLimiter = NewSimpleLimiter() + node.simpleLimiter = NewSimpleLimiter(0, 0) node.UpdateStateCode(commonpb.StateCode_Healthy) ctx := context.Background() resp, err := node.CheckHealth(ctx, &milvuspb.CheckHealthRequest{}) @@ -148,7 +148,7 @@ func TestProxy_CheckHealth(t *testing.T) { dataCoord: NewDataCoordMock(), queryCoord: qc, } - node.simpleLimiter = NewSimpleLimiter() + node.simpleLimiter = NewSimpleLimiter(0, 0) node.UpdateStateCode(commonpb.StateCode_Healthy) resp, err := node.CheckHealth(context.Background(), &milvuspb.CheckHealthRequest{}) assert.NoError(t, err) @@ -243,7 +243,7 @@ func TestProxy_ResourceGroup(t *testing.T) { node, err := NewProxy(ctx, factory) assert.NoError(t, err) - node.simpleLimiter = NewSimpleLimiter() + node.simpleLimiter = NewSimpleLimiter(0, 0) node.UpdateStateCode(commonpb.StateCode_Healthy) qc := mocks.NewMockQueryCoordClient(t) @@ -335,7 +335,7 @@ func TestProxy_InvalidResourceGroupName(t *testing.T) { node, err := NewProxy(ctx, factory) assert.NoError(t, err) - node.simpleLimiter = NewSimpleLimiter() + node.simpleLimiter = NewSimpleLimiter(0, 0) node.UpdateStateCode(commonpb.StateCode_Healthy) qc := mocks.NewMockQueryCoordClient(t) @@ -936,7 +936,7 @@ func TestProxyCreateDatabase(t *testing.T) { node.tsoAllocator = ×tampAllocator{ tso: newMockTimestampAllocatorInterface(), } - node.simpleLimiter = NewSimpleLimiter() + node.simpleLimiter = NewSimpleLimiter(0, 0) node.UpdateStateCode(commonpb.StateCode_Healthy) node.sched, err = newTaskScheduler(ctx, node.tsoAllocator, node.factory) node.sched.ddQueue.setMaxTaskNum(10) @@ -996,7 +996,7 @@ func TestProxyDropDatabase(t *testing.T) { node.tsoAllocator = ×tampAllocator{ tso: newMockTimestampAllocatorInterface(), } - node.simpleLimiter = NewSimpleLimiter() + node.simpleLimiter = NewSimpleLimiter(0, 0) node.UpdateStateCode(commonpb.StateCode_Healthy) node.sched, err = newTaskScheduler(ctx, node.tsoAllocator, node.factory) node.sched.ddQueue.setMaxTaskNum(10) @@ -1055,7 +1055,7 @@ func TestProxyListDatabase(t *testing.T) { node.tsoAllocator = ×tampAllocator{ tso: newMockTimestampAllocatorInterface(), } - node.simpleLimiter = NewSimpleLimiter() + node.simpleLimiter = NewSimpleLimiter(0, 0) node.UpdateStateCode(commonpb.StateCode_Healthy) node.sched, err = newTaskScheduler(ctx, node.tsoAllocator, node.factory) node.sched.ddQueue.setMaxTaskNum(10) @@ -1111,7 +1111,7 @@ func TestProxyAlterDatabase(t *testing.T) { node.tsoAllocator = ×tampAllocator{ tso: newMockTimestampAllocatorInterface(), } - node.simpleLimiter = NewSimpleLimiter() + node.simpleLimiter = NewSimpleLimiter(0, 0) node.UpdateStateCode(commonpb.StateCode_Healthy) node.sched, err = newTaskScheduler(ctx, node.tsoAllocator, node.factory) node.sched.ddQueue.setMaxTaskNum(10) @@ -1164,7 +1164,7 @@ func TestProxyDescribeDatabase(t *testing.T) { node.tsoAllocator = ×tampAllocator{ tso: newMockTimestampAllocatorInterface(), } - node.simpleLimiter = NewSimpleLimiter() + node.simpleLimiter = NewSimpleLimiter(0, 0) node.UpdateStateCode(commonpb.StateCode_Healthy) node.sched, err = newTaskScheduler(ctx, node.tsoAllocator, node.factory) node.sched.ddQueue.setMaxTaskNum(10) @@ -1287,6 +1287,7 @@ func TestProxy_Delete(t *testing.T) { Expr: "pk in [1, 2, 3]", } cache := NewMockCache(t) + cache.EXPECT().GetDatabaseInfo(mock.Anything, mock.Anything).Return(&databaseInfo{dbID: 0}, nil) cache.On("GetCollectionID", mock.Anything, // context.Context mock.AnythingOfType("string"), diff --git a/internal/proxy/meta_cache_adapter.go b/internal/proxy/meta_cache_adapter.go index c72665066f72b..da63272e74a2f 100644 --- a/internal/proxy/meta_cache_adapter.go +++ b/internal/proxy/meta_cache_adapter.go @@ -23,9 +23,7 @@ import ( "github.com/casbin/casbin/v2/model" jsonadapter "github.com/casbin/json-adapter/v2" - "go.uber.org/zap" - "github.com/milvus-io/milvus/pkg/log" "github.com/milvus-io/milvus/pkg/util/merr" ) @@ -51,7 +49,6 @@ func (a *MetaCacheCasbinAdapter) LoadPolicy(model model.Model) error { policyInfo := strings.Join(cache.GetPrivilegeInfo(context.Background()), ",") policy := fmt.Sprintf("[%s]", policyInfo) - log.Ctx(context.Background()).Info("LoddPolicy update policyinfo", zap.String("policyInfo", policy)) byteSource := []byte(policy) jAdapter := jsonadapter.NewAdapter(&byteSource) return jAdapter.LoadPolicy(model) diff --git a/internal/proxy/mock_test.go b/internal/proxy/mock_test.go index 5675b100fa54d..96ee30669a0fe 100644 --- a/internal/proxy/mock_test.go +++ b/internal/proxy/mock_test.go @@ -298,7 +298,7 @@ func (ms *simpleMockMsgStream) GetProduceChannels() []string { return nil } -func (ms *simpleMockMsgStream) Seek(ctx context.Context, offset []*msgstream.MsgPosition) error { +func (ms *simpleMockMsgStream) Seek(ctx context.Context, msgPositions []*msgstream.MsgPosition, includeCurrentMsg bool) error { return nil } diff --git a/internal/proxy/msg_pack.go b/internal/proxy/msg_pack.go index 7d1d58b213698..1177bd8adc1dd 100644 --- a/internal/proxy/msg_pack.go +++ b/internal/proxy/msg_pack.go @@ -231,7 +231,7 @@ func repackInsertDataWithPartitionKey(ctx context.Context, } channel2RowOffsets := assignChannelsByPK(result.IDs, channelNames, insertMsg) - partitionNames, err := getDefaultPartitionNames(ctx, insertMsg.GetDbName(), insertMsg.CollectionName) + partitionNames, err := getDefaultPartitionsInPartitionKeyMode(ctx, insertMsg.GetDbName(), insertMsg.CollectionName) if err != nil { log.Warn("get default partition names failed in partition key mode", zap.String("collectionName", insertMsg.CollectionName), diff --git a/internal/proxy/proxy.go b/internal/proxy/proxy.go index 22d3dfbb9bcd8..c0af10850aaa8 100644 --- a/internal/proxy/proxy.go +++ b/internal/proxy/proxy.go @@ -128,6 +128,9 @@ type Proxy struct { // materialized view enableMaterializedView bool + + // delete rate limiter + enableComplexDeleteLimit bool } // NewProxy returns a Proxy struct. @@ -146,7 +149,7 @@ func NewProxy(ctx context.Context, factory dependency.Factory) (*Proxy, error) { factory: factory, searchResultCh: make(chan *internalpb.SearchResults, n), shardMgr: mgr, - simpleLimiter: NewSimpleLimiter(), + simpleLimiter: NewSimpleLimiter(Params.QuotaConfig.AllocWaitInterval.GetAsDuration(time.Millisecond), Params.QuotaConfig.AllocRetryTimes.GetAsUint()), lbPolicy: lbPolicy, resourceManager: resourceManager, replicateStreamManager: replicateStreamManager, @@ -287,6 +290,7 @@ func (node *Proxy) Init() error { node.chTicker = newChannelsTimeTicker(node.ctx, Params.ProxyCfg.TimeTickInterval.GetAsDuration(time.Millisecond)/2, []string{}, node.sched.getPChanStatistics, tsoAllocator) log.Debug("create channels time ticker done", zap.String("role", typeutil.ProxyRole), zap.Duration("syncTimeTickInterval", syncTimeTickInterval)) + node.enableComplexDeleteLimit = Params.QuotaConfig.ComplexDeleteLimitEnable.GetAsBool() node.metricsCacheManager = metricsinfo.NewMetricsCacheManager() log.Debug("create metrics cache manager done", zap.String("role", typeutil.ProxyRole)) diff --git a/internal/proxy/proxy_test.go b/internal/proxy/proxy_test.go index 9877d1243e3ac..298abede0c7da 100644 --- a/internal/proxy/proxy_test.go +++ b/internal/proxy/proxy_test.go @@ -299,7 +299,7 @@ func (s *proxyTestServer) startGrpc(ctx context.Context, wg *sync.WaitGroup, p * ctx, cancel := context.WithCancel(ctx) defer cancel() - s.simpleLimiter = NewSimpleLimiter() + s.simpleLimiter = NewSimpleLimiter(0, 0) opts := tracer.GetInterceptorOpts() s.grpcServer = grpc.NewServer( diff --git a/internal/proxy/rate_limit_interceptor.go b/internal/proxy/rate_limit_interceptor.go index 14ac320334495..01030fb8f190b 100644 --- a/internal/proxy/rate_limit_interceptor.go +++ b/internal/proxy/rate_limit_interceptor.go @@ -19,7 +19,6 @@ package proxy import ( "context" "fmt" - "reflect" "strconv" "github.com/golang/protobuf/proto" @@ -31,6 +30,7 @@ import ( "github.com/milvus-io/milvus/internal/types" "github.com/milvus-io/milvus/pkg/log" "github.com/milvus-io/milvus/pkg/metrics" + "github.com/milvus-io/milvus/pkg/util" "github.com/milvus-io/milvus/pkg/util/merr" "github.com/milvus-io/milvus/pkg/util/paramtable" "github.com/milvus-io/milvus/pkg/util/requestutil" @@ -41,7 +41,7 @@ func RateLimitInterceptor(limiter types.Limiter) grpc.UnaryServerInterceptor { return func(ctx context.Context, req any, info *grpc.UnaryServerInfo, handler grpc.UnaryHandler) (interface{}, error) { dbID, collectionIDToPartIDs, rt, n, err := getRequestInfo(ctx, req) if err != nil { - log.RatedWarn(10, "failed to get request info", zap.Error(err)) + log.Warn("failed to get request info", zap.Error(err)) return handler(ctx, req) } @@ -119,6 +119,9 @@ func getCollectionAndPartitionIDs(ctx context.Context, r reqPartNames) (int64, m func getCollectionID(r reqCollName) (int64, map[int64][]int64) { db, _ := globalMetaCache.GetDatabaseInfo(context.TODO(), r.GetDbName()) + if db == nil { + return util.InvalidDBID, map[int64][]int64{} + } collectionID, _ := globalMetaCache.GetCollectionID(context.TODO(), r.GetDbName(), r.GetCollectionName()) return db.dbID, map[int64][]int64{collectionID: {}} } @@ -177,14 +180,14 @@ func getRequestInfo(ctx context.Context, req interface{}) (int64, map[int64][]in case *milvuspb.FlushRequest: db, err := globalMetaCache.GetDatabaseInfo(ctx, r.GetDbName()) if err != nil { - return 0, map[int64][]int64{}, 0, 0, err + return util.InvalidDBID, map[int64][]int64{}, 0, 0, err } collToPartIDs := make(map[int64][]int64, 0) for _, collectionName := range r.GetCollectionNames() { collectionID, err := globalMetaCache.GetCollectionID(ctx, r.GetDbName(), collectionName) if err != nil { - return 0, map[int64][]int64{}, 0, 0, err + return util.InvalidDBID, map[int64][]int64{}, 0, 0, err } collToPartIDs[collectionID] = []int64{} } @@ -193,16 +196,16 @@ func getRequestInfo(ctx context.Context, req interface{}) (int64, map[int64][]in dbName := GetCurDBNameFromContextOrDefault(ctx) dbInfo, err := globalMetaCache.GetDatabaseInfo(ctx, dbName) if err != nil { - return 0, map[int64][]int64{}, 0, 0, err + return util.InvalidDBID, map[int64][]int64{}, 0, 0, err } return dbInfo.dbID, map[int64][]int64{ r.GetCollectionID(): {}, }, internalpb.RateType_DDLCompaction, 1, nil default: // TODO: support more request if req == nil { - return 0, map[int64][]int64{}, 0, 0, fmt.Errorf("null request") + return util.InvalidDBID, map[int64][]int64{}, 0, 0, fmt.Errorf("null request") } - return 0, map[int64][]int64{}, 0, 0, fmt.Errorf("unsupported request type %s", reflect.TypeOf(req).Name()) + return util.InvalidDBID, map[int64][]int64{}, 0, 0, nil } } diff --git a/internal/proxy/rate_limit_interceptor_test.go b/internal/proxy/rate_limit_interceptor_test.go index cfea05d30b458..3f9fbac8435a2 100644 --- a/internal/proxy/rate_limit_interceptor_test.go +++ b/internal/proxy/rate_limit_interceptor_test.go @@ -29,6 +29,7 @@ import ( "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" "github.com/milvus-io/milvus-proto/go-api/v2/milvuspb" "github.com/milvus-io/milvus/internal/proto/internalpb" + "github.com/milvus-io/milvus/pkg/util" "github.com/milvus-io/milvus/pkg/util/merr" ) @@ -49,6 +50,10 @@ func (l *limiterMock) Check(dbID int64, collectionIDToPartIDs map[int64][]int64, return nil } +func (l *limiterMock) Alloc(ctx context.Context, dbID int64, collectionIDToPartIDs map[int64][]int64, rt internalpb.RateType, n int) error { + return l.Check(dbID, collectionIDToPartIDs, rt, n) +} + func TestRateLimitInterceptor(t *testing.T) { t.Run("test getRequestInfo", func(t *testing.T) { mockCache := NewMockCache(t) @@ -256,7 +261,7 @@ func TestRateLimitInterceptor(t *testing.T) { assert.Error(t, err) _, _, _, _, err = getRequestInfo(context.Background(), &milvuspb.CalcDistanceRequest{}) - assert.Error(t, err) + assert.NoError(t, err) }) t.Run("test getFailedResponse", func(t *testing.T) { @@ -367,7 +372,7 @@ func TestGetInfo(t *testing.T) { }() t.Run("fail to get database", func(t *testing.T) { - mockCache.EXPECT().GetDatabaseInfo(mock.Anything, mock.Anything).Return(nil, errors.New("mock error: get database info")).Times(4) + mockCache.EXPECT().GetDatabaseInfo(mock.Anything, mock.Anything).Return(nil, errors.New("mock error: get database info")).Times(5) { _, _, err := getCollectionAndPartitionID(ctx, &milvuspb.InsertRequest{ DbName: "foo", @@ -394,6 +399,11 @@ func TestGetInfo(t *testing.T) { _, _, _, _, err := getRequestInfo(ctx, &milvuspb.ManualCompactionRequest{}) assert.Error(t, err) } + { + dbID, collectionIDInfos := getCollectionID(&milvuspb.CreateCollectionRequest{}) + assert.Equal(t, util.InvalidDBID, dbID) + assert.Equal(t, 0, len(collectionIDInfos)) + } }) t.Run("fail to get collection", func(t *testing.T) { diff --git a/internal/proxy/simple_rate_limiter.go b/internal/proxy/simple_rate_limiter.go index b6652e62791ed..1803de81e6a14 100644 --- a/internal/proxy/simple_rate_limiter.go +++ b/internal/proxy/simple_rate_limiter.go @@ -21,6 +21,7 @@ import ( "fmt" "strconv" "sync" + "time" "go.uber.org/zap" @@ -32,8 +33,10 @@ import ( rlinternal "github.com/milvus-io/milvus/internal/util/ratelimitutil" "github.com/milvus-io/milvus/pkg/log" "github.com/milvus-io/milvus/pkg/metrics" + "github.com/milvus-io/milvus/pkg/util" "github.com/milvus-io/milvus/pkg/util/paramtable" "github.com/milvus-io/milvus/pkg/util/ratelimitutil" + "github.com/milvus-io/milvus/pkg/util/retry" "github.com/milvus-io/milvus/pkg/util/typeutil" ) @@ -41,15 +44,26 @@ import ( type SimpleLimiter struct { quotaStatesMu sync.RWMutex rateLimiter *rlinternal.RateLimiterTree + + // for alloc + allocWaitInterval time.Duration + allocRetryTimes uint } // NewSimpleLimiter returns a new SimpleLimiter. -func NewSimpleLimiter() *SimpleLimiter { +func NewSimpleLimiter(allocWaitInterval time.Duration, allocRetryTimes uint) *SimpleLimiter { rootRateLimiter := newClusterLimiter() - m := &SimpleLimiter{rateLimiter: rlinternal.NewRateLimiterTree(rootRateLimiter)} + m := &SimpleLimiter{rateLimiter: rlinternal.NewRateLimiterTree(rootRateLimiter), allocWaitInterval: allocWaitInterval, allocRetryTimes: allocRetryTimes} return m } +// Alloc will retry till check pass or out of times. +func (m *SimpleLimiter) Alloc(ctx context.Context, dbID int64, collectionIDToPartIDs map[int64][]int64, rt internalpb.RateType, n int) error { + return retry.Do(ctx, func() error { + return m.Check(dbID, collectionIDToPartIDs, rt, n) + }, retry.Sleep(m.allocWaitInterval), retry.Attempts(m.allocRetryTimes)) +} + // Check checks if request would be limited or denied. func (m *SimpleLimiter) Check(dbID int64, collectionIDToPartIDs map[int64][]int64, rt internalpb.RateType, n int) error { if !Params.QuotaConfig.QuotaAndLimitsEnabled.GetAsBool() { @@ -64,7 +78,6 @@ func (m *SimpleLimiter) Check(dbID int64, collectionIDToPartIDs map[int64][]int6 ret := clusterRateLimiters.Check(rt, n) if ret != nil { - clusterRateLimiters.Cancel(rt, n) return ret } @@ -79,7 +92,7 @@ func (m *SimpleLimiter) Check(dbID int64, collectionIDToPartIDs map[int64][]int6 } // 2. check database level rate limits - if ret == nil { + if ret == nil && dbID != util.InvalidDBID { dbRateLimiters := m.rateLimiter.GetOrCreateDatabaseLimiters(dbID, newDatabaseLimiter) ret = dbRateLimiters.Check(rt, n) if ret != nil { @@ -92,6 +105,9 @@ func (m *SimpleLimiter) Check(dbID int64, collectionIDToPartIDs map[int64][]int6 // 3. check collection level rate limits if ret == nil && len(collectionIDToPartIDs) > 0 && !isNotCollectionLevelLimitRequest(rt) { for collectionID := range collectionIDToPartIDs { + if collectionID == 0 || dbID == util.InvalidDBID { + continue + } // only dml and dql have collection level rate limits collectionRateLimiters := m.rateLimiter.GetOrCreateCollectionLimiters(dbID, collectionID, newDatabaseLimiter, newCollectionLimiters) @@ -108,6 +124,9 @@ func (m *SimpleLimiter) Check(dbID int64, collectionIDToPartIDs map[int64][]int6 if ret == nil && len(collectionIDToPartIDs) > 0 { for collectionID, partitionIDs := range collectionIDToPartIDs { for _, partID := range partitionIDs { + if collectionID == 0 || partID == 0 || dbID == util.InvalidDBID { + continue + } partitionRateLimiters := m.rateLimiter.GetOrCreatePartitionLimiters(dbID, collectionID, partID, newDatabaseLimiter, newCollectionLimiters, newPartitionLimiters) ret = partitionRateLimiters.Check(rt, n) diff --git a/internal/proxy/simple_rate_limiter_test.go b/internal/proxy/simple_rate_limiter_test.go index 178c536beec51..d9f555b4a87ce 100644 --- a/internal/proxy/simple_rate_limiter_test.go +++ b/internal/proxy/simple_rate_limiter_test.go @@ -40,7 +40,7 @@ func TestSimpleRateLimiter(t *testing.T) { bak := Params.QuotaConfig.QuotaAndLimitsEnabled.GetValue() paramtable.Get().Save(Params.QuotaConfig.QuotaAndLimitsEnabled.Key, "true") - simpleLimiter := NewSimpleLimiter() + simpleLimiter := NewSimpleLimiter(0, 0) clusterRateLimiters := simpleLimiter.rateLimiter.GetRootLimiters() simpleLimiter.rateLimiter.GetOrCreateCollectionLimiters(0, collectionID, newDatabaseLimiter, @@ -83,13 +83,15 @@ func TestSimpleRateLimiter(t *testing.T) { t.Run("test global static limit", func(t *testing.T) { bak := Params.QuotaConfig.QuotaAndLimitsEnabled.GetValue() paramtable.Get().Save(Params.QuotaConfig.QuotaAndLimitsEnabled.Key, "true") - simpleLimiter := NewSimpleLimiter() + simpleLimiter := NewSimpleLimiter(0, 0) clusterRateLimiters := simpleLimiter.rateLimiter.GetRootLimiters() collectionIDToPartIDs := map[int64][]int64{ + 0: {}, 1: {}, 2: {}, 3: {}, + 4: {0}, } for i := 1; i <= 3; i++ { @@ -134,7 +136,7 @@ func TestSimpleRateLimiter(t *testing.T) { }) t.Run("not enable quotaAndLimit", func(t *testing.T) { - simpleLimiter := NewSimpleLimiter() + simpleLimiter := NewSimpleLimiter(0, 0) bak := Params.QuotaConfig.QuotaAndLimitsEnabled.GetValue() paramtable.Get().Save(Params.QuotaConfig.QuotaAndLimitsEnabled.Key, "false") for _, rt := range internalpb.RateType_value { @@ -148,7 +150,7 @@ func TestSimpleRateLimiter(t *testing.T) { run := func(insertRate float64) { bakInsertRate := Params.QuotaConfig.DMLMaxInsertRate.GetValue() paramtable.Get().Save(Params.QuotaConfig.DMLMaxInsertRate.Key, fmt.Sprintf("%f", insertRate)) - simpleLimiter := NewSimpleLimiter() + simpleLimiter := NewSimpleLimiter(0, 0) bak := Params.QuotaConfig.QuotaAndLimitsEnabled.GetValue() paramtable.Get().Save(Params.QuotaConfig.QuotaAndLimitsEnabled.Key, "true") err := simpleLimiter.Check(0, nil, internalpb.RateType_DMLInsert, 1*1024*1024) @@ -164,7 +166,7 @@ func TestSimpleRateLimiter(t *testing.T) { }) t.Run("test set rates", func(t *testing.T) { - simpleLimiter := NewSimpleLimiter() + simpleLimiter := NewSimpleLimiter(0, 0) zeroRates := getZeroCollectionRates() err := simpleLimiter.SetRates(newCollectionLimiterNode(map[int64]*proxypb.LimiterNode{ @@ -186,7 +188,7 @@ func TestSimpleRateLimiter(t *testing.T) { }) t.Run("test quota states", func(t *testing.T) { - simpleLimiter := NewSimpleLimiter() + simpleLimiter := NewSimpleLimiter(0, 0) err := simpleLimiter.SetRates(newCollectionLimiterNode(map[int64]*proxypb.LimiterNode{ 1: { // collection limiter @@ -255,7 +257,7 @@ func newCollectionLimiterNode(collectionLimiterNodes map[int64]*proxypb.LimiterN func TestRateLimiter(t *testing.T) { t.Run("test limit", func(t *testing.T) { - simpleLimiter := NewSimpleLimiter() + simpleLimiter := NewSimpleLimiter(0, 0) rootLimiters := simpleLimiter.rateLimiter.GetRootLimiters() for _, rt := range internalpb.RateType_value { rootLimiters.GetLimiters().Insert(internalpb.RateType(rt), ratelimitutil.NewLimiter(ratelimitutil.Limit(1000), 1)) @@ -271,7 +273,7 @@ func TestRateLimiter(t *testing.T) { }) t.Run("test setRates", func(t *testing.T) { - simpleLimiter := NewSimpleLimiter() + simpleLimiter := NewSimpleLimiter(0, 0) collectionRateLimiters := simpleLimiter.rateLimiter.GetOrCreateCollectionLimiters(0, int64(1), newDatabaseLimiter, func() *rlinternal.RateLimiterNode { @@ -334,7 +336,7 @@ func TestRateLimiter(t *testing.T) { }) t.Run("test get error code", func(t *testing.T) { - simpleLimiter := NewSimpleLimiter() + simpleLimiter := NewSimpleLimiter(0, 0) collectionRateLimiters := simpleLimiter.rateLimiter.GetOrCreateCollectionLimiters(0, int64(1), newDatabaseLimiter, func() *rlinternal.RateLimiterNode { diff --git a/internal/proxy/task.go b/internal/proxy/task.go index 4167a04ce3fce..ffd79b67af0a7 100644 --- a/internal/proxy/task.go +++ b/internal/proxy/task.go @@ -293,10 +293,15 @@ func (t *createCollectionTask) PreExecute(ctx context.Context) error { return fmt.Errorf("maximum field's number should be limited to %d", Params.ProxyCfg.MaxFieldNum.GetAsInt()) } - if len(typeutil.GetVectorFieldSchemas(t.schema)) > Params.ProxyCfg.MaxVectorFieldNum.GetAsInt() { + vectorFields := len(typeutil.GetVectorFieldSchemas(t.schema)) + if vectorFields > Params.ProxyCfg.MaxVectorFieldNum.GetAsInt() { return fmt.Errorf("maximum vector field's number should be limited to %d", Params.ProxyCfg.MaxVectorFieldNum.GetAsInt()) } + if vectorFields == 0 { + return merr.WrapErrParameterInvalidMsg("schema does not contain vector field") + } + // validate collection name if err := validateCollectionName(t.schema.Name); err != nil { return err @@ -1544,11 +1549,6 @@ func (t *loadCollectionTask) PreExecute(ctx context.Context) error { return err } - // To compat with LoadCollcetion before Milvus@2.1 - if t.ReplicaNumber == 0 { - t.ReplicaNumber = 1 - } - return nil } diff --git a/internal/proxy/task_delete.go b/internal/proxy/task_delete.go index 8822410576dd2..7beaffadcbbea 100644 --- a/internal/proxy/task_delete.go +++ b/internal/proxy/task_delete.go @@ -231,9 +231,11 @@ type deleteRunner struct { idAllocator allocator.Interface tsoAllocatorIns tsoAllocator + limiter types.Limiter // delete info schema *schemaInfo + dbID UniqueID collectionID UniqueID partitionID UniqueID partitionKeyMode bool @@ -259,6 +261,13 @@ func (dr *deleteRunner) Init(ctx context.Context) error { if err := validateCollectionName(collName); err != nil { return ErrWithLog(log, "Invalid collection name", err) } + + db, err := globalMetaCache.GetDatabaseInfo(ctx, dr.req.GetDbName()) + if err != nil { + return err + } + dr.dbID = db.dbID + dr.collectionID, err = globalMetaCache.GetCollectionID(ctx, dr.req.GetDbName(), collName) if err != nil { return ErrWithLog(log, "Failed to get collection id", err) @@ -428,7 +437,7 @@ func (dr *deleteRunner) getStreamingQueryAndDelteFunc(plan *planpb.PlanNode) exe } taskCh := make(chan *deleteTask, 256) - go dr.receiveQueryResult(ctx, client, taskCh) + go dr.receiveQueryResult(ctx, client, taskCh, partitionIDs) var allQueryCnt int64 // wait all task finish for task := range taskCh { @@ -449,7 +458,7 @@ func (dr *deleteRunner) getStreamingQueryAndDelteFunc(plan *planpb.PlanNode) exe } } -func (dr *deleteRunner) receiveQueryResult(ctx context.Context, client querypb.QueryNode_QueryStreamClient, taskCh chan *deleteTask) { +func (dr *deleteRunner) receiveQueryResult(ctx context.Context, client querypb.QueryNode_QueryStreamClient, taskCh chan *deleteTask, partitionIDs []int64) { defer func() { close(taskCh) }() @@ -472,6 +481,15 @@ func (dr *deleteRunner) receiveQueryResult(ctx context.Context, client querypb.Q return } + if dr.limiter != nil { + err := dr.limiter.Alloc(ctx, dr.dbID, map[int64][]int64{dr.collectionID: partitionIDs}, internalpb.RateType_DMLDelete, proto.Size(result.GetIds())) + if err != nil { + dr.err = err + log.Warn("query stream for delete failed because rate limiter", zap.Int64("msgID", dr.msgID), zap.Error(err)) + return + } + } + task, err := dr.produce(ctx, result.GetIds()) if err != nil { dr.err = err diff --git a/internal/proxy/task_delete_test.go b/internal/proxy/task_delete_test.go index 4c973b803c476..657029001952f 100644 --- a/internal/proxy/task_delete_test.go +++ b/internal/proxy/task_delete_test.go @@ -118,6 +118,7 @@ func TestDeleteTask_GetChannels(t *testing.T) { mock.AnythingOfType("string"), mock.AnythingOfType("string"), ).Return(collectionID, nil) + globalMetaCache = cache chMgr := NewMockChannelsMgr(t) chMgr.EXPECT().getChannels(mock.Anything).Return(channels, nil) @@ -265,6 +266,19 @@ func TestDeleteRunner_Init(t *testing.T) { assert.Error(t, dr.Init(context.Background())) }) + t.Run("fail to get database info", func(t *testing.T) { + dr := deleteRunner{ + req: &milvuspb.DeleteRequest{ + CollectionName: collectionName, + }, + } + cache := NewMockCache(t) + cache.EXPECT().GetDatabaseInfo(mock.Anything, mock.Anything).Return(nil, fmt.Errorf("mock error")) + globalMetaCache = cache + + assert.Error(t, dr.Init(context.Background())) + }) + t.Run("fail to get collection id", func(t *testing.T) { dr := deleteRunner{ req: &milvuspb.DeleteRequest{ @@ -272,11 +286,13 @@ func TestDeleteRunner_Init(t *testing.T) { }, } cache := NewMockCache(t) + cache.EXPECT().GetDatabaseInfo(mock.Anything, mock.Anything).Return(&databaseInfo{dbID: 0}, nil) cache.On("GetCollectionID", mock.Anything, // context.Context mock.AnythingOfType("string"), mock.AnythingOfType("string"), ).Return(int64(0), errors.New("mock GetCollectionID err")) + globalMetaCache = cache assert.Error(t, dr.Init(context.Background())) }) @@ -287,6 +303,7 @@ func TestDeleteRunner_Init(t *testing.T) { DbName: dbName, }} cache := NewMockCache(t) + cache.EXPECT().GetDatabaseInfo(mock.Anything, mock.Anything).Return(&databaseInfo{dbID: 0}, nil) cache.On("GetCollectionID", mock.Anything, // context.Context mock.AnythingOfType("string"), @@ -309,6 +326,7 @@ func TestDeleteRunner_Init(t *testing.T) { PartitionName: partitionName, }} cache := NewMockCache(t) + cache.EXPECT().GetDatabaseInfo(mock.Anything, mock.Anything).Return(&databaseInfo{dbID: 0}, nil) cache.On("GetCollectionID", mock.Anything, // context.Context mock.AnythingOfType("string"), @@ -347,6 +365,7 @@ func TestDeleteRunner_Init(t *testing.T) { }, } cache := NewMockCache(t) + cache.EXPECT().GetDatabaseInfo(mock.Anything, mock.Anything).Return(&databaseInfo{dbID: 0}, nil) cache.On("GetCollectionID", mock.Anything, // context.Context mock.AnythingOfType("string"), @@ -372,6 +391,7 @@ func TestDeleteRunner_Init(t *testing.T) { }, } cache := NewMockCache(t) + cache.EXPECT().GetDatabaseInfo(mock.Anything, mock.Anything).Return(&databaseInfo{dbID: 0}, nil) cache.On("GetCollectionID", mock.Anything, // context.Context mock.AnythingOfType("string"), @@ -405,6 +425,7 @@ func TestDeleteRunner_Init(t *testing.T) { chMgr: chMgr, } cache := NewMockCache(t) + cache.EXPECT().GetDatabaseInfo(mock.Anything, mock.Anything).Return(&databaseInfo{dbID: 0}, nil) cache.On("GetCollectionID", mock.Anything, // context.Context mock.AnythingOfType("string"), @@ -656,6 +677,65 @@ func TestDeleteRunner_Run(t *testing.T) { assert.Error(t, dr.Run(ctx)) }) + t.Run("complex delete rate limit check failed", func(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + mockMgr := NewMockChannelsMgr(t) + qn := mocks.NewMockQueryNodeClient(t) + lb := NewMockLBPolicy(t) + + dr := deleteRunner{ + chMgr: mockMgr, + queue: queue.dmQueue, + schema: schema, + collectionID: collectionID, + partitionID: partitionID, + vChannels: channels, + idAllocator: idAllocator, + tsoAllocatorIns: tsoAllocator, + lb: lb, + limiter: &limiterMock{}, + result: &milvuspb.MutationResult{ + Status: merr.Success(), + IDs: &schemapb.IDs{ + IdField: nil, + }, + }, + req: &milvuspb.DeleteRequest{ + CollectionName: collectionName, + PartitionName: partitionName, + DbName: dbName, + Expr: "pk < 3", + }, + } + lb.EXPECT().Execute(mock.Anything, mock.Anything).Call.Return(func(ctx context.Context, workload CollectionWorkLoad) error { + return workload.exec(ctx, 1, qn, "") + }) + + qn.EXPECT().QueryStream(mock.Anything, mock.Anything).Call.Return( + func(ctx context.Context, in *querypb.QueryRequest, opts ...grpc.CallOption) querypb.QueryNode_QueryStreamClient { + client := streamrpc.NewLocalQueryClient(ctx) + server := client.CreateServer() + + server.Send(&internalpb.RetrieveResults{ + Status: merr.Success(), + Ids: &schemapb.IDs{ + IdField: &schemapb.IDs_IntId{ + IntId: &schemapb.LongArray{ + Data: []int64{0, 1, 2}, + }, + }, + }, + }) + server.FinishSend(nil) + return client + }, nil) + + assert.Error(t, dr.Run(ctx)) + assert.Equal(t, int64(0), dr.result.DeleteCnt) + }) + t.Run("complex delete produce failed", func(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() diff --git a/internal/proxy/task_index.go b/internal/proxy/task_index.go index 5925391c20b6e..149a13605a9ad 100644 --- a/internal/proxy/task_index.go +++ b/internal/proxy/task_index.go @@ -332,6 +332,13 @@ func fillDimension(field *schemapb.FieldSchema, indexParams map[string]string) e func checkTrain(field *schemapb.FieldSchema, indexParams map[string]string) error { indexType := indexParams[common.IndexTypeKey] + if indexType == indexparamcheck.IndexBitmap { + _, exist := indexParams[common.BitmapCardinalityLimitKey] + if !exist { + indexParams[common.BitmapCardinalityLimitKey] = paramtable.Get().CommonCfg.BitmapIndexCardinalityBound.GetValue() + } + } + checker, err := indexparamcheck.GetIndexCheckerMgrInstance().GetChecker(indexType) if err != nil { log.Warn("Failed to get index checker", zap.String(common.IndexTypeKey, indexType)) diff --git a/internal/proxy/task_query.go b/internal/proxy/task_query.go index 618805a4f9bc1..212015b440330 100644 --- a/internal/proxy/task_query.go +++ b/internal/proxy/task_query.go @@ -607,9 +607,9 @@ func reduceRetrieveResults(ctx context.Context, retrieveResults []*internalpb.Re idSet := make(map[interface{}]struct{}) cursors := make([]int64, len(validRetrieveResults)) - retrieveLimit := typeutil.Unlimited if queryParams != nil && queryParams.limit != typeutil.Unlimited { - retrieveLimit = queryParams.limit + queryParams.offset + // reduceStopForBest will try to get as many results as possible + // so loopEnd in this case will be set to the sum of all results' size if !queryParams.reduceStopForBest { loopEnd = int(queryParams.limit) } @@ -618,7 +618,7 @@ func reduceRetrieveResults(ctx context.Context, retrieveResults []*internalpb.Re // handle offset if queryParams != nil && queryParams.offset > 0 { for i := int64(0); i < queryParams.offset; i++ { - sel, drainOneResult := typeutil.SelectMinPK(retrieveLimit, validRetrieveResults, cursors) + sel, drainOneResult := typeutil.SelectMinPK(validRetrieveResults, cursors) if sel == -1 || (queryParams.reduceStopForBest && drainOneResult) { return ret, nil } @@ -626,16 +626,11 @@ func reduceRetrieveResults(ctx context.Context, retrieveResults []*internalpb.Re } } - reduceStopForBest := false - if queryParams != nil { - reduceStopForBest = queryParams.reduceStopForBest - } - var retSize int64 maxOutputSize := paramtable.Get().QuotaConfig.MaxOutputSize.GetAsInt64() - for j := 0; j < loopEnd; j++ { - sel, drainOneResult := typeutil.SelectMinPK(retrieveLimit, validRetrieveResults, cursors) - if sel == -1 || (reduceStopForBest && drainOneResult) { + for j := 0; j < loopEnd; { + sel, drainOneResult := typeutil.SelectMinPK(validRetrieveResults, cursors) + if sel == -1 || (queryParams.reduceStopForBest && drainOneResult) { break } @@ -643,6 +638,7 @@ func reduceRetrieveResults(ctx context.Context, retrieveResults []*internalpb.Re if _, ok := idSet[pk]; !ok { retSize += typeutil.AppendFieldData(ret.FieldsData, validRetrieveResults[sel].GetFieldsData(), cursors[sel]) idSet[pk] = struct{}{} + j++ } else { // primary keys duplicate skipDupCnt++ diff --git a/internal/proxy/task_query_test.go b/internal/proxy/task_query_test.go index 5112b53ac2552..9b62b9ece5240 100644 --- a/internal/proxy/task_query_test.go +++ b/internal/proxy/task_query_test.go @@ -479,8 +479,7 @@ func TestTaskQuery_functions(t *testing.T) { }, FieldsData: fieldDataArray2, } - - result, err := reduceRetrieveResults(context.Background(), []*internalpb.RetrieveResults{result1, result2}, nil) + result, err := reduceRetrieveResults(context.Background(), []*internalpb.RetrieveResults{result1, result2}, &queryParams{limit: 2}) assert.NoError(t, err) assert.Equal(t, 2, len(result.GetFieldsData())) assert.Equal(t, Int64Array, result.GetFieldsData()[0].GetScalars().GetLongData().Data) @@ -488,7 +487,7 @@ func TestTaskQuery_functions(t *testing.T) { }) t.Run("test nil results", func(t *testing.T) { - ret, err := reduceRetrieveResults(context.Background(), nil, nil) + ret, err := reduceRetrieveResults(context.Background(), nil, &queryParams{}) assert.NoError(t, err) assert.Empty(t, ret.GetFieldsData()) }) @@ -594,6 +593,8 @@ func TestTaskQuery_functions(t *testing.T) { }) t.Run("test stop reduce for best for limit", func(t *testing.T) { + r1.HasMoreResult = true + r2.HasMoreResult = false result, err := reduceRetrieveResults(context.Background(), []*internalpb.RetrieveResults{r1, r2}, &queryParams{limit: 2, reduceStopForBest: true}) @@ -605,6 +606,8 @@ func TestTaskQuery_functions(t *testing.T) { }) t.Run("test stop reduce for best for limit and offset", func(t *testing.T) { + r1.HasMoreResult = true + r2.HasMoreResult = true result, err := reduceRetrieveResults(context.Background(), []*internalpb.RetrieveResults{r1, r2}, &queryParams{limit: 1, offset: 1, reduceStopForBest: true}) @@ -614,6 +617,8 @@ func TestTaskQuery_functions(t *testing.T) { }) t.Run("test stop reduce for best for limit and offset", func(t *testing.T) { + r1.HasMoreResult = false + r2.HasMoreResult = true result, err := reduceRetrieveResults(context.Background(), []*internalpb.RetrieveResults{r1, r2}, &queryParams{limit: 2, offset: 1, reduceStopForBest: true}) @@ -625,6 +630,8 @@ func TestTaskQuery_functions(t *testing.T) { }) t.Run("test stop reduce for best for unlimited set", func(t *testing.T) { + r1.HasMoreResult = false + r2.HasMoreResult = false result, err := reduceRetrieveResults(context.Background(), []*internalpb.RetrieveResults{r1, r2}, &queryParams{limit: typeutil.Unlimited, reduceStopForBest: true}) @@ -635,7 +642,7 @@ func TestTaskQuery_functions(t *testing.T) { assert.InDeltaSlice(t, resultFloat[0:(len)*Dim], result.FieldsData[1].GetVectors().GetFloatVector().Data, 10e-10) }) - t.Run("test stop reduce for best for unlimited set amd pffset", func(t *testing.T) { + t.Run("test stop reduce for best for unlimited set amd offset", func(t *testing.T) { result, err := reduceRetrieveResults(context.Background(), []*internalpb.RetrieveResults{r1, r2}, &queryParams{limit: typeutil.Unlimited, offset: 3, reduceStopForBest: true}) diff --git a/internal/proxy/task_scheduler.go b/internal/proxy/task_scheduler.go index 9d8b899f81188..6c3adbfda34de 100644 --- a/internal/proxy/task_scheduler.go +++ b/internal/proxy/task_scheduler.go @@ -219,6 +219,7 @@ func newBaseTaskQueue(tsoAllocatorIns tsoAllocator) *baseTaskQueue { } } +// ddTaskQueue represents queue for DDL task such as createCollection/createPartition/dropCollection/dropPartition/hasCollection/hasPartition type ddTaskQueue struct { *baseTaskQueue lock sync.Mutex @@ -229,6 +230,7 @@ type pChanStatInfo struct { tsSet map[Timestamp]struct{} } +// dmTaskQueue represents queue for DML task such as insert/delete/upsert type dmTaskQueue struct { *baseTaskQueue @@ -351,6 +353,7 @@ func (queue *dmTaskQueue) getPChanStatsInfo() (map[pChan]*pChanStatistics, error return ret, nil } +// dqTaskQueue represents queue for DQL task such as search/query type dqTaskQueue struct { *baseTaskQueue } diff --git a/internal/proxy/task_test.go b/internal/proxy/task_test.go index 161f5b1bf9f11..31e44e8b469ff 100644 --- a/internal/proxy/task_test.go +++ b/internal/proxy/task_test.go @@ -754,6 +754,25 @@ func TestCreateCollectionTask(t *testing.T) { err = task.PreExecute(ctx) assert.Error(t, err) + // without vector field + schema = &schemapb.CollectionSchema{ + Name: collectionName, + Description: "", + AutoID: false, + Fields: []*schemapb.FieldSchema{ + { + Name: "id", + DataType: schemapb.DataType_Int64, + IsPrimaryKey: true, + }, + }, + } + noVectorSchema, err := proto.Marshal(schema) + assert.NoError(t, err) + task.CreateCollectionRequest.Schema = noVectorSchema + err = task.PreExecute(ctx) + assert.Error(t, err) + task.CreateCollectionRequest = reqBackup // validateCollectionName @@ -3116,7 +3135,7 @@ func TestCreateCollectionTaskWithPartitionKey(t *testing.T) { // check default partitions err = InitMetaCache(ctx, rc, nil, nil) assert.NoError(t, err) - partitionNames, err := getDefaultPartitionNames(ctx, "", task.CollectionName) + partitionNames, err := getDefaultPartitionsInPartitionKeyMode(ctx, "", task.CollectionName) assert.NoError(t, err) assert.Equal(t, task.GetNumPartitions(), int64(len(partitionNames))) diff --git a/internal/proxy/util.go b/internal/proxy/util.go index 42bc37f03b3b2..8982a17800aca 100644 --- a/internal/proxy/util.go +++ b/internal/proxy/util.go @@ -1395,7 +1395,7 @@ func hasParitionKeyModeField(schema *schemapb.CollectionSchema) bool { return false } -// getDefaultPartitionNames only used in partition key mode +// getDefaultPartitionsInPartitionKeyMode only used in partition key mode func getDefaultPartitionsInPartitionKeyMode(ctx context.Context, dbName string, collectionName string) ([]string, error) { partitions, err := globalMetaCache.GetPartitions(ctx, dbName, collectionName) if err != nil { @@ -1411,32 +1411,6 @@ func getDefaultPartitionsInPartitionKeyMode(ctx context.Context, dbName string, return partitionNames, nil } -// getDefaultPartitionNames only used in partition key mode -func getDefaultPartitionNames(ctx context.Context, dbName string, collectionName string) ([]string, error) { - partitions, err := globalMetaCache.GetPartitions(ctx, dbName, collectionName) - if err != nil { - return nil, err - } - - // Make sure the order of the partition names got every time is the same - partitionNames := make([]string, len(partitions)) - for partitionName := range partitions { - splits := strings.Split(partitionName, "_") - if len(splits) < 2 { - err = fmt.Errorf("bad default partion name in partition ket mode: %s", partitionName) - return nil, err - } - index, err := strconv.ParseInt(splits[len(splits)-1], 10, 64) - if err != nil { - return nil, err - } - - partitionNames[index] = partitionName - } - - return partitionNames, nil -} - func assignChannelsByPK(pks *schemapb.IDs, channelNames []string, insertMsg *msgstream.InsertMsg) map[string][]int { insertMsg.HashValues = typeutil.HashPK2Channels(pks, channelNames) @@ -1646,3 +1620,22 @@ func GetCostValue(status *commonpb.Status) int { } return value } + +type isProxyRequestKeyType struct{} + +var ctxProxyRequestKey = isProxyRequestKeyType{} + +func SetRequestLabelForContext(ctx context.Context) context.Context { + return context.WithValue(ctx, ctxProxyRequestKey, true) +} + +func GetRequestLabelFromContext(ctx context.Context) bool { + if ctx == nil { + return false + } + v := ctx.Value(ctxProxyRequestKey) + if v == nil { + return false + } + return v.(bool) +} diff --git a/internal/proxy/util_test.go b/internal/proxy/util_test.go index 2d066d0f99add..46b3189351a0b 100644 --- a/internal/proxy/util_test.go +++ b/internal/proxy/util_test.go @@ -2294,3 +2294,24 @@ func TestGetCostValue(t *testing.T) { assert.Equal(t, 100, cost) }) } + +func TestRequestLabelWithContext(t *testing.T) { + ctx := context.Background() + + { + label := GetRequestLabelFromContext(ctx) + assert.False(t, label) + } + + ctx = SetRequestLabelForContext(ctx) + { + label := GetRequestLabelFromContext(ctx) + assert.True(t, label) + } + + { + // nolint + label := GetRequestLabelFromContext(nil) + assert.False(t, label) + } +} diff --git a/internal/querycoordv2/checkers/balance_checker.go b/internal/querycoordv2/checkers/balance_checker.go index f611bdef1887f..81c7c96271637 100644 --- a/internal/querycoordv2/checkers/balance_checker.go +++ b/internal/querycoordv2/checkers/balance_checker.go @@ -39,28 +39,28 @@ import ( // BalanceChecker checks the cluster distribution and generates balance tasks. type BalanceChecker struct { *checkerActivation - balance.Balance meta *meta.Meta nodeManager *session.NodeManager normalBalanceCollectionsCurrentRound typeutil.UniqueSet scheduler task.Scheduler targetMgr *meta.TargetManager + getBalancerFunc GetBalancerFunc } func NewBalanceChecker(meta *meta.Meta, targetMgr *meta.TargetManager, - balancer balance.Balance, nodeMgr *session.NodeManager, scheduler task.Scheduler, + getBalancerFunc GetBalancerFunc, ) *BalanceChecker { return &BalanceChecker{ checkerActivation: newCheckerActivation(), - Balance: balancer, meta: meta, targetMgr: targetMgr, nodeManager: nodeMgr, normalBalanceCollectionsCurrentRound: typeutil.NewUniqueSet(), scheduler: scheduler, + getBalancerFunc: getBalancerFunc, } } @@ -155,7 +155,7 @@ func (b *BalanceChecker) balanceReplicas(replicaIDs []int64) ([]balance.SegmentA if replica == nil { continue } - sPlans, cPlans := b.Balance.BalanceReplica(replica) + sPlans, cPlans := b.getBalancerFunc().BalanceReplica(replica) segmentPlans = append(segmentPlans, sPlans...) channelPlans = append(channelPlans, cPlans...) if len(segmentPlans) != 0 || len(channelPlans) != 0 { diff --git a/internal/querycoordv2/checkers/balance_checker_test.go b/internal/querycoordv2/checkers/balance_checker_test.go index 6cc52b58145d4..e389ab64f9370 100644 --- a/internal/querycoordv2/checkers/balance_checker_test.go +++ b/internal/querycoordv2/checkers/balance_checker_test.go @@ -78,7 +78,7 @@ func (suite *BalanceCheckerTestSuite) SetupTest() { suite.targetMgr = meta.NewTargetManager(suite.broker, suite.meta) suite.balancer = balance.NewMockBalancer(suite.T()) - suite.checker = NewBalanceChecker(suite.meta, suite.targetMgr, suite.balancer, suite.nodeMgr, suite.scheduler) + suite.checker = NewBalanceChecker(suite.meta, suite.targetMgr, suite.nodeMgr, suite.scheduler, func() balance.Balance { return suite.balancer }) } func (suite *BalanceCheckerTestSuite) TearDownTest() { diff --git a/internal/querycoordv2/checkers/channel_checker.go b/internal/querycoordv2/checkers/channel_checker.go index 9ba0761107b2f..d00ea8cb46d7f 100644 --- a/internal/querycoordv2/checkers/channel_checker.go +++ b/internal/querycoordv2/checkers/channel_checker.go @@ -36,27 +36,27 @@ import ( // TODO(sunby): have too much similar codes with SegmentChecker type ChannelChecker struct { *checkerActivation - meta *meta.Meta - dist *meta.DistributionManager - targetMgr *meta.TargetManager - nodeMgr *session.NodeManager - balancer balance.Balance + meta *meta.Meta + dist *meta.DistributionManager + targetMgr *meta.TargetManager + nodeMgr *session.NodeManager + getBalancerFunc GetBalancerFunc } func NewChannelChecker( meta *meta.Meta, dist *meta.DistributionManager, targetMgr *meta.TargetManager, - balancer balance.Balance, nodeMgr *session.NodeManager, + getBalancerFunc GetBalancerFunc, ) *ChannelChecker { return &ChannelChecker{ checkerActivation: newCheckerActivation(), meta: meta, dist: dist, targetMgr: targetMgr, - balancer: balancer, nodeMgr: nodeMgr, + getBalancerFunc: getBalancerFunc, } } @@ -215,7 +215,7 @@ func (c *ChannelChecker) createChannelLoadTask(ctx context.Context, channels []* if len(rwNodes) == 0 { rwNodes = replica.GetRWNodes() } - plan := c.balancer.AssignChannel([]*meta.DmChannel{ch}, rwNodes, false) + plan := c.getBalancerFunc().AssignChannel([]*meta.DmChannel{ch}, rwNodes, false) plans = append(plans, plan...) } diff --git a/internal/querycoordv2/checkers/channel_checker_test.go b/internal/querycoordv2/checkers/channel_checker_test.go index 6aa9c062887fe..149123194d997 100644 --- a/internal/querycoordv2/checkers/channel_checker_test.go +++ b/internal/querycoordv2/checkers/channel_checker_test.go @@ -77,7 +77,7 @@ func (suite *ChannelCheckerTestSuite) SetupTest() { distManager := meta.NewDistributionManager() balancer := suite.createMockBalancer() - suite.checker = NewChannelChecker(suite.meta, distManager, targetManager, balancer, suite.nodeMgr) + suite.checker = NewChannelChecker(suite.meta, distManager, targetManager, suite.nodeMgr, func() balance.Balance { return balancer }) suite.broker.EXPECT().GetPartitions(mock.Anything, int64(1)).Return([]int64{1}, nil).Maybe() } diff --git a/internal/querycoordv2/checkers/controller.go b/internal/querycoordv2/checkers/controller.go index 133a5abf18202..efc8b05faf4e9 100644 --- a/internal/querycoordv2/checkers/controller.go +++ b/internal/querycoordv2/checkers/controller.go @@ -35,6 +35,8 @@ import ( var errTypeNotFound = errors.New("checker type not found") +type GetBalancerFunc = func() balance.Balance + type CheckerController struct { cancel context.CancelFunc manualCheckChs map[utils.CheckerType]chan struct{} @@ -55,17 +57,17 @@ func NewCheckerController( meta *meta.Meta, dist *meta.DistributionManager, targetMgr *meta.TargetManager, - balancer balance.Balance, nodeMgr *session.NodeManager, scheduler task.Scheduler, broker meta.Broker, + getBalancerFunc GetBalancerFunc, ) *CheckerController { // CheckerController runs checkers with the order, // the former checker has higher priority checkers := map[utils.CheckerType]Checker{ - utils.ChannelChecker: NewChannelChecker(meta, dist, targetMgr, balancer, nodeMgr), - utils.SegmentChecker: NewSegmentChecker(meta, dist, targetMgr, balancer, nodeMgr), - utils.BalanceChecker: NewBalanceChecker(meta, targetMgr, balancer, nodeMgr, scheduler), + utils.ChannelChecker: NewChannelChecker(meta, dist, targetMgr, nodeMgr, getBalancerFunc), + utils.SegmentChecker: NewSegmentChecker(meta, dist, targetMgr, nodeMgr, getBalancerFunc), + utils.BalanceChecker: NewBalanceChecker(meta, targetMgr, nodeMgr, scheduler, getBalancerFunc), utils.IndexChecker: NewIndexChecker(meta, dist, broker, nodeMgr), utils.LeaderChecker: NewLeaderChecker(meta, dist, targetMgr, nodeMgr), } diff --git a/internal/querycoordv2/checkers/controller_base_test.go b/internal/querycoordv2/checkers/controller_base_test.go index 9f5b233defa74..762a8a2bde590 100644 --- a/internal/querycoordv2/checkers/controller_base_test.go +++ b/internal/querycoordv2/checkers/controller_base_test.go @@ -77,7 +77,8 @@ func (suite *ControllerBaseTestSuite) SetupTest() { suite.balancer = balance.NewMockBalancer(suite.T()) suite.scheduler = task.NewMockScheduler(suite.T()) - suite.controller = NewCheckerController(suite.meta, suite.dist, suite.targetManager, suite.balancer, suite.nodeMgr, suite.scheduler, suite.broker) + + suite.controller = NewCheckerController(suite.meta, suite.dist, suite.targetManager, suite.nodeMgr, suite.scheduler, suite.broker, func() balance.Balance { return suite.balancer }) } func (s *ControllerBaseTestSuite) TestActivation() { diff --git a/internal/querycoordv2/checkers/controller_test.go b/internal/querycoordv2/checkers/controller_test.go index b69ab9c10f620..c04f4ecaea179 100644 --- a/internal/querycoordv2/checkers/controller_test.go +++ b/internal/querycoordv2/checkers/controller_test.go @@ -81,7 +81,7 @@ func (suite *CheckerControllerSuite) SetupTest() { suite.balancer = balance.NewMockBalancer(suite.T()) suite.scheduler = task.NewMockScheduler(suite.T()) - suite.controller = NewCheckerController(suite.meta, suite.dist, suite.targetManager, suite.balancer, suite.nodeMgr, suite.scheduler, suite.broker) + suite.controller = NewCheckerController(suite.meta, suite.dist, suite.targetManager, suite.nodeMgr, suite.scheduler, suite.broker, func() balance.Balance { return suite.balancer }) } func (suite *CheckerControllerSuite) TestBasic() { diff --git a/internal/querycoordv2/checkers/segment_checker.go b/internal/querycoordv2/checkers/segment_checker.go index 1c85aef177df3..bcdfdb3f45cf4 100644 --- a/internal/querycoordv2/checkers/segment_checker.go +++ b/internal/querycoordv2/checkers/segment_checker.go @@ -41,27 +41,27 @@ const initialTargetVersion = int64(0) type SegmentChecker struct { *checkerActivation - meta *meta.Meta - dist *meta.DistributionManager - targetMgr *meta.TargetManager - balancer balance.Balance - nodeMgr *session.NodeManager + meta *meta.Meta + dist *meta.DistributionManager + targetMgr *meta.TargetManager + nodeMgr *session.NodeManager + getBalancerFunc GetBalancerFunc } func NewSegmentChecker( meta *meta.Meta, dist *meta.DistributionManager, targetMgr *meta.TargetManager, - balancer balance.Balance, nodeMgr *session.NodeManager, + getBalancerFunc GetBalancerFunc, ) *SegmentChecker { return &SegmentChecker{ checkerActivation: newCheckerActivation(), meta: meta, dist: dist, targetMgr: targetMgr, - balancer: balancer, nodeMgr: nodeMgr, + getBalancerFunc: getBalancerFunc, } } @@ -403,7 +403,7 @@ func (c *SegmentChecker) createSegmentLoadTasks(ctx context.Context, segments [] SegmentInfo: s, } }) - shardPlans := c.balancer.AssignSegment(replica.GetCollectionID(), segmentInfos, rwNodes, false) + shardPlans := c.getBalancerFunc().AssignSegment(replica.GetCollectionID(), segmentInfos, rwNodes, false) for i := range shardPlans { shardPlans[i].Replica = replica } diff --git a/internal/querycoordv2/checkers/segment_checker_test.go b/internal/querycoordv2/checkers/segment_checker_test.go index 88861f6d060a8..c6fdd03440398 100644 --- a/internal/querycoordv2/checkers/segment_checker_test.go +++ b/internal/querycoordv2/checkers/segment_checker_test.go @@ -77,7 +77,7 @@ func (suite *SegmentCheckerTestSuite) SetupTest() { targetManager := meta.NewTargetManager(suite.broker, suite.meta) balancer := suite.createMockBalancer() - suite.checker = NewSegmentChecker(suite.meta, distManager, targetManager, balancer, suite.nodeMgr) + suite.checker = NewSegmentChecker(suite.meta, distManager, targetManager, suite.nodeMgr, func() balance.Balance { return balancer }) suite.broker.EXPECT().GetPartitions(mock.Anything, int64(1)).Return([]int64{1}, nil).Maybe() } diff --git a/internal/querycoordv2/dist/dist_handler.go b/internal/querycoordv2/dist/dist_handler.go index 1d396a415008d..b88f34177f87b 100644 --- a/internal/querycoordv2/dist/dist_handler.go +++ b/internal/querycoordv2/dist/dist_handler.go @@ -98,7 +98,7 @@ func (dh *distHandler) handleDistResp(resp *querypb.GetDataDistributionResponse) node.SetLastHeartbeat(time.Now()) // skip update dist if no distribution change happens in query node - if resp.GetLastModifyTs() <= dh.lastUpdateTs { + if resp.GetLastModifyTs() != 0 && resp.GetLastModifyTs() <= dh.lastUpdateTs { log.RatedInfo(30, "skip update dist due to no distribution change", zap.Int64("lastModifyTs", resp.GetLastModifyTs()), zap.Int64("lastUpdateTs", dh.lastUpdateTs)) } else { dh.lastUpdateTs = resp.GetLastModifyTs() diff --git a/internal/querycoordv2/handlers.go b/internal/querycoordv2/handlers.go index e3387ae6b785a..13fa55008d0a1 100644 --- a/internal/querycoordv2/handlers.go +++ b/internal/querycoordv2/handlers.go @@ -99,7 +99,7 @@ func (s *Server) balanceSegments(ctx context.Context, copyMode bool, ) error { log := log.Ctx(ctx).With(zap.Int64("collectionID", collectionID), zap.Int64("srcNode", srcNode)) - plans := s.balancer.AssignSegment(collectionID, segments, dstNodes, true) + plans := s.getBalancerFunc().AssignSegment(collectionID, segments, dstNodes, true) for i := range plans { plans[i].From = srcNode plans[i].Replica = replica @@ -175,7 +175,7 @@ func (s *Server) balanceChannels(ctx context.Context, ) error { log := log.Ctx(ctx).With(zap.Int64("collectionID", collectionID)) - plans := s.balancer.AssignChannel(channels, dstNodes, true) + plans := s.getBalancerFunc().AssignChannel(channels, dstNodes, true) for i := range plans { plans[i].From = srcNode plans[i].Replica = replica diff --git a/internal/querycoordv2/meta/collection_manager.go b/internal/querycoordv2/meta/collection_manager.go index 766e59b66f0d8..4871459812c01 100644 --- a/internal/querycoordv2/meta/collection_manager.go +++ b/internal/querycoordv2/meta/collection_manager.go @@ -555,6 +555,7 @@ func (m *CollectionManager) RemoveCollection(collectionID typeutil.UniqueID) err } delete(m.collectionPartitions, collectionID) } + metrics.CleanQueryCoordMetricsWithCollectionID(collectionID) return nil } diff --git a/internal/querycoordv2/meta/coordinator_broker.go b/internal/querycoordv2/meta/coordinator_broker.go index cbcb9fced74e5..2df54688affb0 100644 --- a/internal/querycoordv2/meta/coordinator_broker.go +++ b/internal/querycoordv2/meta/coordinator_broker.go @@ -30,7 +30,9 @@ import ( "github.com/milvus-io/milvus/internal/proto/datapb" "github.com/milvus-io/milvus/internal/proto/indexpb" "github.com/milvus-io/milvus/internal/proto/querypb" + "github.com/milvus-io/milvus/internal/proto/rootcoordpb" "github.com/milvus-io/milvus/internal/types" + "github.com/milvus-io/milvus/pkg/common" "github.com/milvus-io/milvus/pkg/log" "github.com/milvus-io/milvus/pkg/util/commonpbutil" "github.com/milvus-io/milvus/pkg/util/merr" @@ -47,6 +49,8 @@ type Broker interface { GetSegmentInfo(ctx context.Context, segmentID ...UniqueID) (*datapb.GetSegmentInfoResponse, error) GetIndexInfo(ctx context.Context, collectionID UniqueID, segmentID UniqueID) ([]*querypb.FieldIndexInfo, error) GetRecoveryInfoV2(ctx context.Context, collectionID UniqueID, partitionIDs ...UniqueID) ([]*datapb.VchannelInfo, []*datapb.SegmentInfo, error) + DescribeDatabase(ctx context.Context, dbName string) (*rootcoordpb.DescribeDatabaseResponse, error) + GetCollectionLoadInfo(ctx context.Context, collectionID UniqueID) ([]string, int64, error) } type CoordinatorBroker struct { @@ -83,6 +87,48 @@ func (broker *CoordinatorBroker) DescribeCollection(ctx context.Context, collect return resp, nil } +func (broker *CoordinatorBroker) DescribeDatabase(ctx context.Context, dbName string) (*rootcoordpb.DescribeDatabaseResponse, error) { + ctx, cancel := context.WithTimeout(ctx, paramtable.Get().QueryCoordCfg.BrokerTimeout.GetAsDuration(time.Millisecond)) + defer cancel() + + req := &rootcoordpb.DescribeDatabaseRequest{ + Base: commonpbutil.NewMsgBase( + commonpbutil.WithMsgType(commonpb.MsgType_DescribeCollection), + ), + DbName: dbName, + } + resp, err := broker.rootCoord.DescribeDatabase(ctx, req) + if err := merr.CheckRPCCall(resp, err); err != nil { + log.Ctx(ctx).Warn("failed to describe database", zap.Error(err)) + return nil, err + } + return resp, nil +} + +// try to get database level replica_num and resource groups, return (resource_groups, replica_num, error) +func (broker *CoordinatorBroker) GetCollectionLoadInfo(ctx context.Context, collectionID UniqueID) ([]string, int64, error) { + // to do by weiliu1031: querycoord should cache mappings: collectionID->dbName + collectionInfo, err := broker.DescribeCollection(ctx, collectionID) + if err != nil { + return nil, 0, err + } + + dbInfo, err := broker.DescribeDatabase(ctx, collectionInfo.GetDbName()) + if err != nil { + return nil, 0, err + } + replicaNum, err := common.DatabaseLevelReplicaNumber(dbInfo.GetProperties()) + if err != nil { + return nil, 0, err + } + rgs, err := common.DatabaseLevelResourceGroups(dbInfo.GetProperties()) + if err != nil { + return nil, 0, err + } + + return rgs, replicaNum, nil +} + func (broker *CoordinatorBroker) GetPartitions(ctx context.Context, collectionID UniqueID) ([]UniqueID, error) { ctx, cancel := context.WithTimeout(ctx, paramtable.Get().QueryCoordCfg.BrokerTimeout.GetAsDuration(time.Millisecond)) defer cancel() diff --git a/internal/querycoordv2/meta/coordinator_broker_test.go b/internal/querycoordv2/meta/coordinator_broker_test.go index 476a997dd2ae9..778268f7ce66b 100644 --- a/internal/querycoordv2/meta/coordinator_broker_test.go +++ b/internal/querycoordv2/meta/coordinator_broker_test.go @@ -18,6 +18,7 @@ package meta import ( "context" + "strings" "testing" "github.com/cockroachdb/errors" @@ -32,6 +33,8 @@ import ( "github.com/milvus-io/milvus/internal/proto/datapb" "github.com/milvus-io/milvus/internal/proto/indexpb" "github.com/milvus-io/milvus/internal/proto/querypb" + "github.com/milvus-io/milvus/internal/proto/rootcoordpb" + "github.com/milvus-io/milvus/pkg/common" "github.com/milvus-io/milvus/pkg/util/merr" "github.com/milvus-io/milvus/pkg/util/paramtable" ) @@ -490,6 +493,90 @@ func (s *CoordinatorBrokerDataCoordSuite) TestGetIndexInfo() { }) } +func (s *CoordinatorBrokerRootCoordSuite) TestDescribeDatabase() { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + s.Run("normal_case", func() { + s.rootcoord.EXPECT().DescribeDatabase(mock.Anything, mock.Anything). + Return(&rootcoordpb.DescribeDatabaseResponse{ + Status: merr.Success(), + }, nil) + _, err := s.broker.DescribeDatabase(ctx, "fake_db1") + s.NoError(err) + s.resetMock() + }) + + s.Run("rootcoord_return_error", func() { + s.rootcoord.EXPECT().DescribeDatabase(mock.Anything, mock.Anything).Return(nil, errors.New("fake error")) + _, err := s.broker.DescribeDatabase(ctx, "fake_db1") + s.Error(err) + s.resetMock() + }) + + s.Run("rootcoord_return_failure_status", func() { + s.rootcoord.EXPECT().DescribeDatabase(mock.Anything, mock.Anything). + Return(&rootcoordpb.DescribeDatabaseResponse{ + Status: merr.Status(errors.New("fake error")), + }, nil) + _, err := s.broker.DescribeDatabase(ctx, "fake_db1") + s.Error(err) + s.resetMock() + }) + + s.Run("rootcoord_return_unimplemented", func() { + s.rootcoord.EXPECT().DescribeDatabase(mock.Anything, mock.Anything).Return(nil, merr.ErrServiceUnimplemented) + _, err := s.broker.DescribeDatabase(ctx, "fake_db1") + s.Error(err) + s.resetMock() + }) +} + +func (s *CoordinatorBrokerRootCoordSuite) TestGetCollectionLoadInfo() { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + s.Run("normal_case", func() { + s.rootcoord.EXPECT().DescribeCollection(mock.Anything, mock.Anything).Return(&milvuspb.DescribeCollectionResponse{ + DbName: "fake_db1", + }, nil) + s.rootcoord.EXPECT().DescribeDatabase(mock.Anything, mock.Anything). + Return(&rootcoordpb.DescribeDatabaseResponse{ + Status: merr.Success(), + Properties: []*commonpb.KeyValuePair{ + { + Key: common.DatabaseReplicaNumber, + Value: "3", + }, + { + Key: common.DatabaseResourceGroups, + Value: strings.Join([]string{"rg1", "rg2"}, ","), + }, + }, + }, nil) + rgs, replicas, err := s.broker.GetCollectionLoadInfo(ctx, 1) + s.NoError(err) + s.Equal(int64(3), replicas) + s.Contains(rgs, "rg1") + s.Contains(rgs, "rg2") + s.resetMock() + }) + + s.Run("props not set", func() { + s.rootcoord.EXPECT().DescribeCollection(mock.Anything, mock.Anything).Return(&milvuspb.DescribeCollectionResponse{ + DbName: "fake_db1", + }, nil) + s.rootcoord.EXPECT().DescribeDatabase(mock.Anything, mock.Anything). + Return(&rootcoordpb.DescribeDatabaseResponse{ + Status: merr.Success(), + Properties: []*commonpb.KeyValuePair{}, + }, nil) + _, _, err := s.broker.GetCollectionLoadInfo(ctx, 1) + s.Error(err) + s.resetMock() + }) +} + func TestCoordinatorBroker(t *testing.T) { suite.Run(t, new(CoordinatorBrokerRootCoordSuite)) suite.Run(t, new(CoordinatorBrokerDataCoordSuite)) diff --git a/internal/querycoordv2/meta/mock_broker.go b/internal/querycoordv2/meta/mock_broker.go index ff3548985547f..a940aff58bc91 100644 --- a/internal/querycoordv2/meta/mock_broker.go +++ b/internal/querycoordv2/meta/mock_broker.go @@ -13,6 +13,8 @@ import ( mock "github.com/stretchr/testify/mock" querypb "github.com/milvus-io/milvus/internal/proto/querypb" + + rootcoordpb "github.com/milvus-io/milvus/internal/proto/rootcoordpb" ) // MockBroker is an autogenerated mock type for the Broker type @@ -83,6 +85,123 @@ func (_c *MockBroker_DescribeCollection_Call) RunAndReturn(run func(context.Cont return _c } +// DescribeDatabase provides a mock function with given fields: ctx, dbName +func (_m *MockBroker) DescribeDatabase(ctx context.Context, dbName string) (*rootcoordpb.DescribeDatabaseResponse, error) { + ret := _m.Called(ctx, dbName) + + var r0 *rootcoordpb.DescribeDatabaseResponse + var r1 error + if rf, ok := ret.Get(0).(func(context.Context, string) (*rootcoordpb.DescribeDatabaseResponse, error)); ok { + return rf(ctx, dbName) + } + if rf, ok := ret.Get(0).(func(context.Context, string) *rootcoordpb.DescribeDatabaseResponse); ok { + r0 = rf(ctx, dbName) + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).(*rootcoordpb.DescribeDatabaseResponse) + } + } + + if rf, ok := ret.Get(1).(func(context.Context, string) error); ok { + r1 = rf(ctx, dbName) + } else { + r1 = ret.Error(1) + } + + return r0, r1 +} + +// MockBroker_DescribeDatabase_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'DescribeDatabase' +type MockBroker_DescribeDatabase_Call struct { + *mock.Call +} + +// DescribeDatabase is a helper method to define mock.On call +// - ctx context.Context +// - dbName string +func (_e *MockBroker_Expecter) DescribeDatabase(ctx interface{}, dbName interface{}) *MockBroker_DescribeDatabase_Call { + return &MockBroker_DescribeDatabase_Call{Call: _e.mock.On("DescribeDatabase", ctx, dbName)} +} + +func (_c *MockBroker_DescribeDatabase_Call) Run(run func(ctx context.Context, dbName string)) *MockBroker_DescribeDatabase_Call { + _c.Call.Run(func(args mock.Arguments) { + run(args[0].(context.Context), args[1].(string)) + }) + return _c +} + +func (_c *MockBroker_DescribeDatabase_Call) Return(_a0 *rootcoordpb.DescribeDatabaseResponse, _a1 error) *MockBroker_DescribeDatabase_Call { + _c.Call.Return(_a0, _a1) + return _c +} + +func (_c *MockBroker_DescribeDatabase_Call) RunAndReturn(run func(context.Context, string) (*rootcoordpb.DescribeDatabaseResponse, error)) *MockBroker_DescribeDatabase_Call { + _c.Call.Return(run) + return _c +} + +// GetCollectionLoadInfo provides a mock function with given fields: ctx, collectionID +func (_m *MockBroker) GetCollectionLoadInfo(ctx context.Context, collectionID int64) ([]string, int64, error) { + ret := _m.Called(ctx, collectionID) + + var r0 []string + var r1 int64 + var r2 error + if rf, ok := ret.Get(0).(func(context.Context, int64) ([]string, int64, error)); ok { + return rf(ctx, collectionID) + } + if rf, ok := ret.Get(0).(func(context.Context, int64) []string); ok { + r0 = rf(ctx, collectionID) + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).([]string) + } + } + + if rf, ok := ret.Get(1).(func(context.Context, int64) int64); ok { + r1 = rf(ctx, collectionID) + } else { + r1 = ret.Get(1).(int64) + } + + if rf, ok := ret.Get(2).(func(context.Context, int64) error); ok { + r2 = rf(ctx, collectionID) + } else { + r2 = ret.Error(2) + } + + return r0, r1, r2 +} + +// MockBroker_GetCollectionLoadInfo_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'GetCollectionLoadInfo' +type MockBroker_GetCollectionLoadInfo_Call struct { + *mock.Call +} + +// GetCollectionLoadInfo is a helper method to define mock.On call +// - ctx context.Context +// - collectionID int64 +func (_e *MockBroker_Expecter) GetCollectionLoadInfo(ctx interface{}, collectionID interface{}) *MockBroker_GetCollectionLoadInfo_Call { + return &MockBroker_GetCollectionLoadInfo_Call{Call: _e.mock.On("GetCollectionLoadInfo", ctx, collectionID)} +} + +func (_c *MockBroker_GetCollectionLoadInfo_Call) Run(run func(ctx context.Context, collectionID int64)) *MockBroker_GetCollectionLoadInfo_Call { + _c.Call.Run(func(args mock.Arguments) { + run(args[0].(context.Context), args[1].(int64)) + }) + return _c +} + +func (_c *MockBroker_GetCollectionLoadInfo_Call) Return(_a0 []string, _a1 int64, _a2 error) *MockBroker_GetCollectionLoadInfo_Call { + _c.Call.Return(_a0, _a1, _a2) + return _c +} + +func (_c *MockBroker_GetCollectionLoadInfo_Call) RunAndReturn(run func(context.Context, int64) ([]string, int64, error)) *MockBroker_GetCollectionLoadInfo_Call { + _c.Call.Return(run) + return _c +} + // GetIndexInfo provides a mock function with given fields: ctx, collectionID, segmentID func (_m *MockBroker) GetIndexInfo(ctx context.Context, collectionID int64, segmentID int64) ([]*querypb.FieldIndexInfo, error) { ret := _m.Called(ctx, collectionID, segmentID) diff --git a/internal/querycoordv2/ops_service_test.go b/internal/querycoordv2/ops_service_test.go index 509ba091e9865..c9d062d631a30 100644 --- a/internal/querycoordv2/ops_service_test.go +++ b/internal/querycoordv2/ops_service_test.go @@ -121,7 +121,7 @@ func (suite *OpsServiceSuite) SetupTest() { suite.distController = dist.NewMockController(suite.T()) suite.checkerController = checkers.NewCheckerController(suite.meta, suite.distMgr, - suite.targetMgr, suite.balancer, suite.nodeMgr, suite.taskScheduler, suite.broker) + suite.targetMgr, suite.nodeMgr, suite.taskScheduler, suite.broker, func() balance.Balance { return suite.balancer }) suite.server = &Server{ kv: suite.kv, @@ -137,7 +137,7 @@ func (suite *OpsServiceSuite) SetupTest() { cluster: suite.cluster, jobScheduler: suite.jobScheduler, taskScheduler: suite.taskScheduler, - balancer: suite.balancer, + getBalancerFunc: func() balance.Balance { return suite.balancer }, distController: suite.distController, ctx: context.Background(), checkerController: suite.checkerController, diff --git a/internal/querycoordv2/server.go b/internal/querycoordv2/server.go index d115c4ceb7cf7..da16b65fb4406 100644 --- a/internal/querycoordv2/server.go +++ b/internal/querycoordv2/server.go @@ -115,7 +115,9 @@ type Server struct { resourceObserver *observers.ResourceObserver leaderCacheObserver *observers.LeaderCacheObserver - balancer balance.Balance + getBalancerFunc checkers.GetBalancerFunc + balancerMap map[string]balance.Balance + balancerLock sync.RWMutex // Active-standby enableActiveStandBy bool @@ -137,6 +139,7 @@ func NewQueryCoord(ctx context.Context) (*Server, error) { cancel: cancel, nodeUpEventChan: make(chan int64, 10240), notifyNodeUp: make(chan struct{}), + balancerMap: make(map[string]balance.Balance), } server.UpdateStateCode(commonpb.StateCode_Abnormal) server.queryNodeCreator = session.DefaultQueryNodeCreator @@ -287,34 +290,46 @@ func (s *Server) initQueryCoord() error { s.taskScheduler, ) - // Init balancer map and balancer - log.Info("init balancer") - switch params.Params.QueryCoordCfg.Balancer.GetValue() { - case meta.RoundRobinBalancerName: - s.balancer = balance.NewRoundRobinBalancer(s.taskScheduler, s.nodeMgr) - case meta.RowCountBasedBalancerName: - s.balancer = balance.NewRowCountBasedBalancer(s.taskScheduler, s.nodeMgr, s.dist, s.meta, s.targetMgr) - case meta.ScoreBasedBalancerName: - s.balancer = balance.NewScoreBasedBalancer(s.taskScheduler, s.nodeMgr, s.dist, s.meta, s.targetMgr) - case meta.MultiTargetBalancerName: - s.balancer = balance.NewMultiTargetBalancer(s.taskScheduler, s.nodeMgr, s.dist, s.meta, s.targetMgr) - case meta.ChannelLevelScoreBalancerName: - s.balancer = balance.NewChannelLevelScoreBalancer(s.taskScheduler, s.nodeMgr, s.dist, s.meta, s.targetMgr) - default: - log.Info(fmt.Sprintf("default to use %s", meta.ScoreBasedBalancerName)) - s.balancer = balance.NewScoreBasedBalancer(s.taskScheduler, s.nodeMgr, s.dist, s.meta, s.targetMgr) - } - // Init checker controller log.Info("init checker controller") + s.getBalancerFunc = func() balance.Balance { + balanceKey := paramtable.Get().QueryCoordCfg.Balancer.GetValue() + s.balancerLock.Lock() + defer s.balancerLock.Unlock() + + balancer, ok := s.balancerMap[balanceKey] + if ok { + return balancer + } + + log.Info("switch to new balancer", zap.String("name", balanceKey)) + switch balanceKey { + case meta.RoundRobinBalancerName: + balancer = balance.NewRoundRobinBalancer(s.taskScheduler, s.nodeMgr) + case meta.RowCountBasedBalancerName: + balancer = balance.NewRowCountBasedBalancer(s.taskScheduler, s.nodeMgr, s.dist, s.meta, s.targetMgr) + case meta.ScoreBasedBalancerName: + balancer = balance.NewScoreBasedBalancer(s.taskScheduler, s.nodeMgr, s.dist, s.meta, s.targetMgr) + case meta.MultiTargetBalancerName: + balancer = balance.NewMultiTargetBalancer(s.taskScheduler, s.nodeMgr, s.dist, s.meta, s.targetMgr) + case meta.ChannelLevelScoreBalancerName: + balancer = balance.NewChannelLevelScoreBalancer(s.taskScheduler, s.nodeMgr, s.dist, s.meta, s.targetMgr) + default: + log.Info(fmt.Sprintf("default to use %s", meta.ScoreBasedBalancerName)) + balancer = balance.NewScoreBasedBalancer(s.taskScheduler, s.nodeMgr, s.dist, s.meta, s.targetMgr) + } + + s.balancerMap[balanceKey] = balancer + return balancer + } s.checkerController = checkers.NewCheckerController( s.meta, s.dist, s.targetMgr, - s.balancer, s.nodeMgr, s.taskScheduler, s.broker, + s.getBalancerFunc, ) // Init observers @@ -441,6 +456,7 @@ func (s *Server) startQueryCoord() error { s.nodeMgr.Stopping(node.ServerID) } } + s.checkNodeStateInRG() for _, node := range sessions { s.handleNodeUp(node.ServerID) } @@ -762,6 +778,20 @@ func (s *Server) handleNodeDown(node int64) { s.meta.ResourceManager.HandleNodeDown(node) } +func (s *Server) checkNodeStateInRG() { + for _, rgName := range s.meta.ListResourceGroups() { + rg := s.meta.ResourceManager.GetResourceGroup(rgName) + for _, node := range rg.GetNodes() { + info := s.nodeMgr.Get(node) + if info == nil { + s.meta.ResourceManager.HandleNodeDown(node) + } else if info.IsStoppingState() { + s.meta.ResourceManager.HandleNodeStopping(node) + } + } + } +} + func (s *Server) updateBalanceConfigLoop(ctx context.Context) { success := s.updateBalanceConfig() if success { diff --git a/internal/querycoordv2/server_test.go b/internal/querycoordv2/server_test.go index c3be55e29f1b8..78c2fdb89b6f1 100644 --- a/internal/querycoordv2/server_test.go +++ b/internal/querycoordv2/server_test.go @@ -436,17 +436,19 @@ func (suite *ServerSuite) loadAll() { for _, collection := range suite.collections { if suite.loadTypes[collection] == querypb.LoadType_LoadCollection { req := &querypb.LoadCollectionRequest{ - CollectionID: collection, - ReplicaNumber: suite.replicaNumber[collection], + CollectionID: collection, + ReplicaNumber: suite.replicaNumber[collection], + ResourceGroups: []string{meta.DefaultResourceGroupName}, } resp, err := suite.server.LoadCollection(ctx, req) suite.NoError(err) suite.Equal(commonpb.ErrorCode_Success, resp.ErrorCode) } else { req := &querypb.LoadPartitionsRequest{ - CollectionID: collection, - PartitionIDs: suite.partitions[collection], - ReplicaNumber: suite.replicaNumber[collection], + CollectionID: collection, + PartitionIDs: suite.partitions[collection], + ReplicaNumber: suite.replicaNumber[collection], + ResourceGroups: []string{meta.DefaultResourceGroupName}, } resp, err := suite.server.LoadPartitions(ctx, req) suite.NoError(err) @@ -567,10 +569,10 @@ func (suite *ServerSuite) hackServer() { suite.server.meta, suite.server.dist, suite.server.targetMgr, - suite.server.balancer, suite.server.nodeMgr, suite.server.taskScheduler, suite.server.broker, + suite.server.getBalancerFunc, ) suite.server.targetObserver = observers.NewTargetObserver( suite.server.meta, diff --git a/internal/querycoordv2/services.go b/internal/querycoordv2/services.go index dea9817a2777f..b64f09921fea6 100644 --- a/internal/querycoordv2/services.go +++ b/internal/querycoordv2/services.go @@ -215,6 +215,24 @@ func (s *Server) LoadCollection(ctx context.Context, req *querypb.LoadCollection return merr.Status(err), nil } + if req.GetReplicaNumber() <= 0 || len(req.GetResourceGroups()) == 0 { + // when replica number or resource groups is not set, use database level config + rgs, replicas, err := s.broker.GetCollectionLoadInfo(ctx, req.GetCollectionID()) + if err != nil { + log.Warn("failed to get data base level load info", zap.Error(err)) + } + + if req.GetReplicaNumber() <= 0 { + log.Info("load collection use database level replica number", zap.Int64("databaseLevelReplicaNum", replicas)) + req.ReplicaNumber = int32(replicas) + } + + if len(req.GetResourceGroups()) == 0 { + log.Info("load collection use database level resource groups", zap.Strings("databaseLevelResourceGroups", rgs)) + req.ResourceGroups = rgs + } + } + if err := s.checkResourceGroup(req.GetCollectionID(), req.GetResourceGroups()); err != nil { msg := "failed to load collection" log.Warn(msg, zap.Error(err)) @@ -316,6 +334,24 @@ func (s *Server) LoadPartitions(ctx context.Context, req *querypb.LoadPartitions return merr.Status(err), nil } + if req.GetReplicaNumber() <= 0 || len(req.GetResourceGroups()) == 0 { + // when replica number or resource groups is not set, use database level config + rgs, replicas, err := s.broker.GetCollectionLoadInfo(ctx, req.GetCollectionID()) + if err != nil { + log.Warn("failed to get data base level load info", zap.Error(err)) + } + + if req.GetReplicaNumber() <= 0 { + log.Info("load collection use database level replica number", zap.Int64("databaseLevelReplicaNum", replicas)) + req.ReplicaNumber = int32(replicas) + } + + if len(req.GetResourceGroups()) == 0 { + log.Info("load collection use database level resource groups", zap.Strings("databaseLevelResourceGroups", rgs)) + req.ResourceGroups = rgs + } + } + if err := s.checkResourceGroup(req.GetCollectionID(), req.GetResourceGroups()); err != nil { msg := "failed to load partitions" log.Warn(msg, zap.Error(err)) diff --git a/internal/querycoordv2/services_test.go b/internal/querycoordv2/services_test.go index 9c486a26b9c81..e4fb877d0101f 100644 --- a/internal/querycoordv2/services_test.go +++ b/internal/querycoordv2/services_test.go @@ -201,12 +201,14 @@ func (suite *ServiceSuite) SetupTest() { cluster: suite.cluster, jobScheduler: suite.jobScheduler, taskScheduler: suite.taskScheduler, - balancer: suite.balancer, + getBalancerFunc: func() balance.Balance { return suite.balancer }, distController: suite.distController, ctx: context.Background(), } suite.server.UpdateStateCode(commonpb.StateCode_Healthy) + + suite.broker.EXPECT().GetCollectionLoadInfo(mock.Anything, mock.Anything).Return([]string{meta.DefaultResourceGroupName}, 1, nil).Maybe() } func (suite *ServiceSuite) TestShowCollections() { diff --git a/internal/querycoordv2/task/scheduler.go b/internal/querycoordv2/task/scheduler.go index 055e88a31e024..ed0f04c4d0a3c 100644 --- a/internal/querycoordv2/task/scheduler.go +++ b/internal/querycoordv2/task/scheduler.go @@ -798,7 +798,11 @@ func (scheduler *taskScheduler) remove(task Task) { scheduler.updateTaskMetrics() log.Info("task removed") - metrics.QueryCoordTaskLatency.WithLabelValues(scheduler.getTaskMetricsLabel(task), task.Shard()).Observe(float64(task.GetTaskLatency())) + + if scheduler.meta.Exist(task.CollectionID()) { + metrics.QueryCoordTaskLatency.WithLabelValues(fmt.Sprint(task.CollectionID()), + scheduler.getTaskMetricsLabel(task), task.Shard()).Observe(float64(task.GetTaskLatency())) + } } func (scheduler *taskScheduler) getTaskMetricsLabel(task Task) string { diff --git a/internal/querynodev2/delegator/delegator.go b/internal/querynodev2/delegator/delegator.go index 7fca3c6acdb63..7f3cc38d38f6b 100644 --- a/internal/querynodev2/delegator/delegator.go +++ b/internal/querynodev2/delegator/delegator.go @@ -106,12 +106,11 @@ type shardDelegator struct { lifetime lifetime.Lifetime[lifetime.State] - distribution *distribution - segmentManager segments.SegmentManager - tsafeManager tsafe.Manager - pkOracle pkoracle.PkOracle - level0Mut sync.RWMutex - level0Deletions map[int64]*storage.DeleteData // partitionID -> deletions + distribution *distribution + segmentManager segments.SegmentManager + tsafeManager tsafe.Manager + pkOracle pkoracle.PkOracle + level0Mut sync.RWMutex // stream delete buffer deleteMut sync.RWMutex deleteBuffer deletebuffer.DeleteBuffer[*deletebuffer.Item] @@ -876,7 +875,6 @@ func NewShardDelegator(ctx context.Context, collectionID UniqueID, replicaID Uni workerManager: workerManager, lifetime: lifetime.NewLifetime(lifetime.Initializing), distribution: NewDistribution(), - level0Deletions: make(map[int64]*storage.DeleteData), deleteBuffer: deletebuffer.NewListDeleteBuffer[*deletebuffer.Item](startTs, sizePerBlock), pkOracle: pkoracle.NewPkOracle(), tsafeManager: tsafeManager, diff --git a/internal/querynodev2/delegator/delegator_data.go b/internal/querynodev2/delegator/delegator_data.go index be4870a34c571..6fc9d5dd6089d 100644 --- a/internal/querynodev2/delegator/delegator_data.go +++ b/internal/querynodev2/delegator/delegator_data.go @@ -365,7 +365,7 @@ func (sd *shardDelegator) LoadGrowing(ctx context.Context, infos []*querypb.Segm log := log.With( zap.Int64("segmentID", segment.ID()), ) - deletedPks, deletedTss := sd.GetLevel0Deletions(segment.Partition()) + deletedPks, deletedTss := sd.GetLevel0Deletions(segment.Partition(), pkoracle.NewCandidateKey(segment.ID(), segment.Partition(), segments.SegmentTypeGrowing)) if len(deletedPks) == 0 { continue } @@ -424,16 +424,6 @@ func (sd *shardDelegator) LoadSegments(ctx context.Context, req *querypb.LoadSeg return err } - // load bloom filter only when candidate not exists - infos := lo.Filter(req.GetInfos(), func(info *querypb.SegmentLoadInfo, _ int) bool { - return !sd.pkOracle.Exists(pkoracle.NewCandidateKey(info.GetSegmentID(), info.GetPartitionID(), commonpb.SegmentState_Sealed), targetNodeID) - }) - candidates, err := sd.loader.LoadBloomFilterSet(ctx, req.GetCollectionID(), req.GetVersion(), infos...) - if err != nil { - log.Warn("failed to load bloom filter set for segment", zap.Error(err)) - return err - } - req.Base.TargetID = req.GetDstNodeID() log.Debug("worker loads segments...") @@ -488,8 +478,18 @@ func (sd *shardDelegator) LoadSegments(ctx context.Context, req *querypb.LoadSeg } }) if req.GetInfos()[0].GetLevel() == datapb.SegmentLevel_L0 { - sd.GenerateLevel0DeletionCache() + sd.RefreshLevel0DeletionStats() } else { + // load bloom filter only when candidate not exists + infos := lo.Filter(req.GetInfos(), func(info *querypb.SegmentLoadInfo, _ int) bool { + return !sd.pkOracle.Exists(pkoracle.NewCandidateKey(info.GetSegmentID(), info.GetPartitionID(), commonpb.SegmentState_Sealed), targetNodeID) + }) + candidates, err := sd.loader.LoadBloomFilterSet(ctx, req.GetCollectionID(), req.GetVersion(), infos...) + if err != nil { + log.Warn("failed to load bloom filter set for segment", zap.Error(err)) + return err + } + log.Debug("load delete...") err = sd.loadStreamDelete(ctx, candidates, infos, req.GetDeltaPositions(), targetNodeID, worker, entries) if err != nil { @@ -512,94 +512,52 @@ func (sd *shardDelegator) LoadSegments(ctx context.Context, req *querypb.LoadSeg return nil } -func (sd *shardDelegator) GetLevel0Deletions(partitionID int64) ([]storage.PrimaryKey, []storage.Timestamp) { - sd.level0Mut.RLock() - deleteData, ok1 := sd.level0Deletions[partitionID] - allPartitionsDeleteData, ok2 := sd.level0Deletions[common.AllPartitionsID] - sd.level0Mut.RUnlock() - // we may need to merge the specified partition deletions and the all partitions deletions, - // so release the mutex as early as possible. - - if ok1 && ok2 { - pks := make([]storage.PrimaryKey, 0, deleteData.RowCount+allPartitionsDeleteData.RowCount) - tss := make([]storage.Timestamp, 0, deleteData.RowCount+allPartitionsDeleteData.RowCount) - - i := 0 - j := 0 - for i < int(deleteData.RowCount) || j < int(allPartitionsDeleteData.RowCount) { - if i == int(deleteData.RowCount) { - pks = append(pks, allPartitionsDeleteData.Pks[j]) - tss = append(tss, allPartitionsDeleteData.Tss[j]) - j++ - } else if j == int(allPartitionsDeleteData.RowCount) { - pks = append(pks, deleteData.Pks[i]) - tss = append(tss, deleteData.Tss[i]) - i++ - } else if deleteData.Tss[i] < allPartitionsDeleteData.Tss[j] { - pks = append(pks, deleteData.Pks[i]) - tss = append(tss, deleteData.Tss[i]) - i++ - } else { - pks = append(pks, allPartitionsDeleteData.Pks[j]) - tss = append(tss, allPartitionsDeleteData.Tss[j]) - j++ - } - } - - return pks, tss - } else if ok1 { - return deleteData.Pks, deleteData.Tss - } else if ok2 { - return allPartitionsDeleteData.Pks, allPartitionsDeleteData.Tss - } - - return nil, nil -} +func (sd *shardDelegator) GetLevel0Deletions(partitionID int64, candidate pkoracle.Candidate) ([]storage.PrimaryKey, []storage.Timestamp) { + sd.level0Mut.Lock() + defer sd.level0Mut.Unlock() -func (sd *shardDelegator) GenerateLevel0DeletionCache() { + // TODO: this could be large, host all L0 delete on delegator might be a dangerous, consider mmap it on local segment and stream processing it level0Segments := sd.segmentManager.GetBy(segments.WithLevel(datapb.SegmentLevel_L0), segments.WithChannel(sd.vchannelName)) - deletions := make(map[int64]*storage.DeleteData) + pks := make([]storage.PrimaryKey, 0) + tss := make([]storage.Timestamp, 0) + for _, segment := range level0Segments { segment := segment.(*segments.L0Segment) - pks, tss := segment.DeleteRecords() - deleteData, ok := deletions[segment.Partition()] - if !ok { - deleteData = storage.NewDeleteData(pks, tss) - } else { - deleteData.AppendBatch(pks, tss) + if segment.Partition() == partitionID || segment.Partition() == common.AllPartitionsID { + segmentPks, segmentTss := segment.DeleteRecords() + for i, pk := range segmentPks { + lc := storage.NewLocationsCache(pk) + if candidate.MayPkExist(lc) { + pks = append(pks, pk) + tss = append(tss, segmentTss[i]) + } + } } - deletions[segment.Partition()] = deleteData } - type DeletePair struct { - Pk storage.PrimaryKey - Ts storage.Timestamp - } - for _, deleteData := range deletions { - pairs := make([]DeletePair, deleteData.RowCount) - for i := range deleteData.Pks { - pairs[i] = DeletePair{deleteData.Pks[i], deleteData.Tss[i]} - } - sort.Slice(pairs, func(i, j int) bool { - return pairs[i].Ts < pairs[j].Ts - }) - for i := range pairs { - deleteData.Pks[i], deleteData.Tss[i] = pairs[i].Pk, pairs[i].Ts - } - } + sort.Slice(pks, func(i, j int) bool { + return tss[i] < tss[j] + }) + return pks, tss +} + +func (sd *shardDelegator) RefreshLevel0DeletionStats() { sd.level0Mut.Lock() defer sd.level0Mut.Unlock() + level0Segments := sd.segmentManager.GetBy(segments.WithLevel(datapb.SegmentLevel_L0), segments.WithChannel(sd.vchannelName)) totalSize := int64(0) - for _, delete := range deletions { - totalSize += delete.Size() + for _, segment := range level0Segments { + segment := segment.(*segments.L0Segment) + pks, tss := segment.DeleteRecords() + totalSize += lo.SumBy(pks, func(pk storage.PrimaryKey) int64 { return pk.Size() }) + int64(len(tss)*8) } + metrics.QueryNodeLevelZeroSize.WithLabelValues( fmt.Sprint(paramtable.GetNodeID()), fmt.Sprint(sd.collectionID), sd.vchannelName, ).Set(float64(totalSize)) - sd.level0Deletions = deletions } func (sd *shardDelegator) loadStreamDelete(ctx context.Context, @@ -635,14 +593,9 @@ func (sd *shardDelegator) loadStreamDelete(ctx context.Context, position = deltaPositions[0] } - deletedPks, deletedTss := sd.GetLevel0Deletions(candidate.Partition()) + deletedPks, deletedTss := sd.GetLevel0Deletions(candidate.Partition(), candidate) deleteData := &storage.DeleteData{} - for i, pk := range deletedPks { - if candidate.MayPkExist(pk) { - deleteData.Append(pk, deletedTss[i]) - } - } - + deleteData.AppendBatch(deletedPks, deletedTss) if deleteData.RowCount > 0 { log.Info("forward L0 delete to worker...", zap.Int64("deleteRowNum", deleteData.RowCount), @@ -685,7 +638,8 @@ func (sd *shardDelegator) loadStreamDelete(ctx context.Context, continue } for i, pk := range record.DeleteData.Pks { - if candidate.MayPkExist(pk) { + lc := storage.NewLocationsCache(pk) + if candidate.MayPkExist(lc) { deleteData.Append(pk, record.DeleteData.Tss[i]) } } @@ -746,7 +700,7 @@ func (sd *shardDelegator) readDeleteFromMsgstream(ctx context.Context, position } ts = time.Now() - err = stream.Seek(context.TODO(), []*msgpb.MsgPosition{position}) + err = stream.Seek(context.TODO(), []*msgpb.MsgPosition{position}, false) if err != nil { return nil, err } @@ -781,7 +735,8 @@ func (sd *shardDelegator) readDeleteFromMsgstream(ctx context.Context, position } for idx, pk := range storage.ParseIDs2PrimaryKeys(dmsg.GetPrimaryKeys()) { - if candidate.MayPkExist(pk) { + lc := storage.NewLocationsCache(pk) + if candidate.MayPkExist(lc) { result.Pks = append(result.Pks, pk) result.Tss = append(result.Tss, dmsg.Timestamps[idx]) } @@ -900,7 +855,7 @@ func (sd *shardDelegator) ReleaseSegments(ctx context.Context, req *querypb.Rele } if hasLevel0 { - sd.GenerateLevel0DeletionCache() + sd.RefreshLevel0DeletionStats() } partitionsToReload := make([]UniqueID, 0) lo.ForEach(req.GetSegmentIDs(), func(segmentID int64, _ int) { diff --git a/internal/querynodev2/delegator/delegator_data_test.go b/internal/querynodev2/delegator/delegator_data_test.go index 6d2ae22411b66..1a17f41812e9b 100644 --- a/internal/querynodev2/delegator/delegator_data_test.go +++ b/internal/querynodev2/delegator/delegator_data_test.go @@ -24,7 +24,6 @@ import ( "testing" "time" - bloom "github.com/bits-and-blooms/bloom/v3" "github.com/cockroachdb/errors" "github.com/samber/lo" "github.com/stretchr/testify/mock" @@ -41,6 +40,7 @@ import ( "github.com/milvus-io/milvus/internal/querynodev2/segments" "github.com/milvus-io/milvus/internal/querynodev2/tsafe" "github.com/milvus-io/milvus/internal/storage" + "github.com/milvus-io/milvus/internal/util/bloomfilter" "github.com/milvus-io/milvus/pkg/common" "github.com/milvus-io/milvus/pkg/mq/msgstream" "github.com/milvus-io/milvus/pkg/util/commonpbutil" @@ -258,12 +258,8 @@ func (s *DelegatorDataSuite) TestProcessDelete() { ms.EXPECT().Indexes().Return(nil) ms.EXPECT().RowNum().Return(info.GetNumOfRows()) ms.EXPECT().Delete(mock.Anything, mock.Anything, mock.Anything).Return(nil) - ms.EXPECT().MayPkExist(mock.Anything).Call.Return(func(pk storage.PrimaryKey) bool { - return pk.EQ(storage.NewInt64PrimaryKey(10)) - }) - ms.EXPECT().GetHashFuncNum().Return(1) - ms.EXPECT().TestLocations(mock.Anything, mock.Anything).RunAndReturn(func(pk storage.PrimaryKey, locs []uint64) bool { - return pk.EQ(storage.NewInt64PrimaryKey(10)) + ms.EXPECT().MayPkExist(mock.Anything).RunAndReturn(func(lc *storage.LocationsCache) bool { + return lc.GetPk().EQ(storage.NewInt64PrimaryKey(10)) }) return ms }) @@ -272,8 +268,9 @@ func (s *DelegatorDataSuite) TestProcessDelete() { Call.Return(func(ctx context.Context, collectionID int64, version int64, infos ...*querypb.SegmentLoadInfo) []*pkoracle.BloomFilterSet { return lo.Map(infos, func(info *querypb.SegmentLoadInfo, _ int) *pkoracle.BloomFilterSet { bfs := pkoracle.NewBloomFilterSet(info.GetSegmentID(), info.GetPartitionID(), commonpb.SegmentState_Sealed) - bf := bloom.NewWithEstimates(paramtable.Get().CommonCfg.BloomFilterSize.GetAsUint(), - paramtable.Get().CommonCfg.MaxBloomFalsePositive.GetAsFloat()) + bf := bloomfilter.NewBloomFilterWithType(paramtable.Get().CommonCfg.BloomFilterSize.GetAsUint(), + paramtable.Get().CommonCfg.MaxBloomFalsePositive.GetAsFloat(), + paramtable.Get().CommonCfg.BloomFilterType.GetValue()) pks := &storage.PkStatistics{ PkFilter: bf, } @@ -528,8 +525,10 @@ func (s *DelegatorDataSuite) TestLoadSegments() { Call.Return(func(ctx context.Context, collectionID int64, version int64, infos ...*querypb.SegmentLoadInfo) []*pkoracle.BloomFilterSet { return lo.Map(infos, func(info *querypb.SegmentLoadInfo, _ int) *pkoracle.BloomFilterSet { bfs := pkoracle.NewBloomFilterSet(info.GetSegmentID(), info.GetPartitionID(), commonpb.SegmentState_Sealed) - bf := bloom.NewWithEstimates(paramtable.Get().CommonCfg.BloomFilterSize.GetAsUint(), - paramtable.Get().CommonCfg.MaxBloomFalsePositive.GetAsFloat()) + bf := bloomfilter.NewBloomFilterWithType( + paramtable.Get().CommonCfg.BloomFilterSize.GetAsUint(), + paramtable.Get().CommonCfg.MaxBloomFalsePositive.GetAsFloat(), + paramtable.Get().CommonCfg.BloomFilterType.GetValue()) pks := &storage.PkStatistics{ PkFilter: bf, } @@ -686,8 +685,10 @@ func (s *DelegatorDataSuite) TestLoadSegments() { Call.Return(func(ctx context.Context, collectionID int64, version int64, infos ...*querypb.SegmentLoadInfo) []*pkoracle.BloomFilterSet { return lo.Map(infos, func(info *querypb.SegmentLoadInfo, _ int) *pkoracle.BloomFilterSet { bfs := pkoracle.NewBloomFilterSet(info.GetSegmentID(), info.GetPartitionID(), commonpb.SegmentState_Sealed) - bf := bloom.NewWithEstimates(paramtable.Get().CommonCfg.BloomFilterSize.GetAsUint(), - paramtable.Get().CommonCfg.MaxBloomFalsePositive.GetAsFloat()) + bf := bloomfilter.NewBloomFilterWithType( + paramtable.Get().CommonCfg.BloomFilterSize.GetAsUint(), + paramtable.Get().CommonCfg.MaxBloomFalsePositive.GetAsFloat(), + paramtable.Get().CommonCfg.BloomFilterType.GetValue()) pks := &storage.PkStatistics{ PkFilter: bf, } @@ -725,7 +726,7 @@ func (s *DelegatorDataSuite) TestLoadSegments() { }, 10) s.mq.EXPECT().AsConsumer(mock.Anything, mock.Anything, mock.Anything, mock.Anything).Return(nil) - s.mq.EXPECT().Seek(mock.Anything, mock.Anything).Return(nil) + s.mq.EXPECT().Seek(mock.Anything, mock.Anything, mock.Anything).Return(nil) s.mq.EXPECT().Close() ch := make(chan *msgstream.MsgPack, 10) close(ch) @@ -880,10 +881,6 @@ func (s *DelegatorDataSuite) TestReleaseSegment() { ms.EXPECT().MayPkExist(mock.Anything).Call.Return(func(pk storage.PrimaryKey) bool { return pk.EQ(storage.NewInt64PrimaryKey(10)) }) - ms.EXPECT().GetHashFuncNum().Return(1) - ms.EXPECT().TestLocations(mock.Anything, mock.Anything).RunAndReturn(func(pk storage.PrimaryKey, locs []uint64) bool { - return pk.EQ(storage.NewInt64PrimaryKey(10)) - }) return ms }) }, nil) @@ -891,8 +888,10 @@ func (s *DelegatorDataSuite) TestReleaseSegment() { Call.Return(func(ctx context.Context, collectionID int64, version int64, infos ...*querypb.SegmentLoadInfo) []*pkoracle.BloomFilterSet { return lo.Map(infos, func(info *querypb.SegmentLoadInfo, _ int) *pkoracle.BloomFilterSet { bfs := pkoracle.NewBloomFilterSet(info.GetSegmentID(), info.GetPartitionID(), commonpb.SegmentState_Sealed) - bf := bloom.NewWithEstimates(paramtable.Get().CommonCfg.BloomFilterSize.GetAsUint(), - paramtable.Get().CommonCfg.MaxBloomFalsePositive.GetAsFloat()) + bf := bloomfilter.NewBloomFilterWithType( + paramtable.Get().CommonCfg.BloomFilterSize.GetAsUint(), + paramtable.Get().CommonCfg.MaxBloomFalsePositive.GetAsFloat(), + paramtable.Get().CommonCfg.BloomFilterType.GetValue()) pks := &storage.PkStatistics{ PkFilter: bf, } @@ -1110,44 +1109,62 @@ func (s *DelegatorDataSuite) TestLevel0Deletions() { partitionID := int64(10) partitionDeleteData := storage.NewDeleteData([]storage.PrimaryKey{storage.NewInt64PrimaryKey(1)}, []storage.Timestamp{100}) allPartitionDeleteData := storage.NewDeleteData([]storage.PrimaryKey{storage.NewInt64PrimaryKey(2)}, []storage.Timestamp{101}) - delegator.level0Deletions[partitionID] = partitionDeleteData - pks, _ := delegator.GetLevel0Deletions(partitionID) + schema := segments.GenTestCollectionSchema("test_stop", schemapb.DataType_Int64, true) + collection := segments.NewCollection(1, schema, nil, &querypb.LoadMetaInfo{ + LoadType: querypb.LoadType_LoadCollection, + }) + + l0, _ := segments.NewL0Segment(collection, segments.SegmentTypeSealed, 1, &querypb.SegmentLoadInfo{ + CollectionID: 1, + SegmentID: 2, + PartitionID: partitionID, + InsertChannel: delegator.vchannelName, + Level: datapb.SegmentLevel_L0, + NumOfRows: 1, + }) + l0.LoadDeltaData(context.TODO(), partitionDeleteData) + delegator.segmentManager.Put(context.TODO(), segments.SegmentTypeSealed, l0) + + l0Global, _ := segments.NewL0Segment(collection, segments.SegmentTypeSealed, 2, &querypb.SegmentLoadInfo{ + CollectionID: 1, + SegmentID: 3, + PartitionID: common.AllPartitionsID, + InsertChannel: delegator.vchannelName, + Level: datapb.SegmentLevel_L0, + NumOfRows: int64(1), + }) + l0Global.LoadDeltaData(context.TODO(), allPartitionDeleteData) + + pks, _ := delegator.GetLevel0Deletions(partitionID, pkoracle.NewCandidateKey(l0.ID(), l0.Partition(), segments.SegmentTypeGrowing)) s.True(pks[0].EQ(partitionDeleteData.Pks[0])) - pks, _ = delegator.GetLevel0Deletions(partitionID + 1) + pks, _ = delegator.GetLevel0Deletions(partitionID+1, pkoracle.NewCandidateKey(l0.ID(), l0.Partition(), segments.SegmentTypeGrowing)) s.Empty(pks) - delegator.level0Deletions[common.AllPartitionsID] = allPartitionDeleteData - pks, _ = delegator.GetLevel0Deletions(partitionID) - s.Len(pks, 2) + delegator.segmentManager.Put(context.TODO(), segments.SegmentTypeSealed, l0Global) + pks, _ = delegator.GetLevel0Deletions(partitionID, pkoracle.NewCandidateKey(l0.ID(), l0.Partition(), segments.SegmentTypeGrowing)) s.True(pks[0].EQ(partitionDeleteData.Pks[0])) s.True(pks[1].EQ(allPartitionDeleteData.Pks[0])) - delete(delegator.level0Deletions, partitionID) - pks, _ = delegator.GetLevel0Deletions(partitionID) - s.True(pks[0].EQ(allPartitionDeleteData.Pks[0])) - - // exchange the order - delegator.level0Deletions = make(map[int64]*storage.DeleteData) - partitionDeleteData, allPartitionDeleteData = allPartitionDeleteData, partitionDeleteData - delegator.level0Deletions[partitionID] = partitionDeleteData - - pks, _ = delegator.GetLevel0Deletions(partitionID) - s.True(pks[0].EQ(partitionDeleteData.Pks[0])) + bfs := pkoracle.NewBloomFilterSet(3, l0.Partition(), commonpb.SegmentState_Sealed) + bfs.UpdateBloomFilter(allPartitionDeleteData.Pks) - pks, _ = delegator.GetLevel0Deletions(partitionID + 1) - s.Empty(pks) + pks, _ = delegator.GetLevel0Deletions(partitionID, bfs) + // bf filtered segment + s.Equal(len(pks), 1) + s.True(pks[0].EQ(allPartitionDeleteData.Pks[0])) - delegator.level0Deletions[common.AllPartitionsID] = allPartitionDeleteData - pks, _ = delegator.GetLevel0Deletions(partitionID) - s.Len(pks, 2) + delegator.segmentManager.Remove(context.TODO(), l0.ID(), querypb.DataScope_All) + pks, _ = delegator.GetLevel0Deletions(partitionID, pkoracle.NewCandidateKey(l0.ID(), l0.Partition(), segments.SegmentTypeGrowing)) s.True(pks[0].EQ(allPartitionDeleteData.Pks[0])) - s.True(pks[1].EQ(partitionDeleteData.Pks[0])) - delete(delegator.level0Deletions, partitionID) - pks, _ = delegator.GetLevel0Deletions(partitionID) + pks, _ = delegator.GetLevel0Deletions(partitionID+1, pkoracle.NewCandidateKey(l0.ID(), l0.Partition(), segments.SegmentTypeGrowing)) s.True(pks[0].EQ(allPartitionDeleteData.Pks[0])) + + delegator.segmentManager.Remove(context.TODO(), l0Global.ID(), querypb.DataScope_All) + pks, _ = delegator.GetLevel0Deletions(partitionID+1, pkoracle.NewCandidateKey(l0.ID(), l0.Partition(), segments.SegmentTypeGrowing)) + s.Empty(pks) } func (s *DelegatorDataSuite) TestReadDeleteFromMsgstream() { @@ -1155,7 +1172,7 @@ func (s *DelegatorDataSuite) TestReadDeleteFromMsgstream() { defer cancel() s.mq.EXPECT().AsConsumer(mock.Anything, mock.Anything, mock.Anything, mock.Anything).Return(nil) - s.mq.EXPECT().Seek(mock.Anything, mock.Anything).Return(nil) + s.mq.EXPECT().Seek(mock.Anything, mock.Anything, mock.Anything).Return(nil) s.mq.EXPECT().Close() ch := make(chan *msgstream.MsgPack, 10) s.mq.EXPECT().Chan().Return(ch) diff --git a/internal/querynodev2/delegator/delegator_test.go b/internal/querynodev2/delegator/delegator_test.go index 4d51b1145df39..2dcd9ac5e01ec 100644 --- a/internal/querynodev2/delegator/delegator_test.go +++ b/internal/querynodev2/delegator/delegator_test.go @@ -99,10 +99,6 @@ func (s *DelegatorSuite) SetupTest() { ms.EXPECT().Indexes().Return(nil) ms.EXPECT().RowNum().Return(info.GetNumOfRows()) ms.EXPECT().Delete(mock.Anything, mock.Anything, mock.Anything).Return(nil) - ms.EXPECT().GetHashFuncNum().Return(1) - ms.EXPECT().TestLocations(mock.Anything, mock.Anything).RunAndReturn(func(pk storage.PrimaryKey, locs []uint64) bool { - return pk.EQ(storage.NewInt64PrimaryKey(10)) - }) return ms }) }, nil) diff --git a/internal/querynodev2/handlers.go b/internal/querynodev2/handlers.go index 1b25fd7c254af..170af4c39e6a1 100644 --- a/internal/querynodev2/handlers.go +++ b/internal/querynodev2/handlers.go @@ -317,7 +317,7 @@ func (node *QueryNode) queryStreamSegments(ctx context.Context, req *querypb.Que } // Send task to scheduler and wait until it finished. - task := tasks.NewQueryStreamTask(ctx, collection, node.manager, req, srv) + task := tasks.NewQueryStreamTask(ctx, collection, node.manager, req, srv, node.streamBatchSzie) if err := node.scheduler.Add(task); err != nil { log.Warn("failed to add query task into scheduler", zap.Error(err)) return err diff --git a/internal/querynodev2/metrics_info.go b/internal/querynodev2/metrics_info.go index c41d12ed7ef6d..b4c50a5d1b9fb 100644 --- a/internal/querynodev2/metrics_info.go +++ b/internal/querynodev2/metrics_info.go @@ -103,11 +103,12 @@ func getQuotaMetrics(node *QueryNode) (*metricsinfo.QueryNodeQuotaMetrics, error } minTsafeChannel, minTsafe := node.tSafeManager.Min() - collections := node.manager.Collection.List() - nodeID := fmt.Sprint(node.GetNodeID()) + metrics.QueryNodeNumEntities.Reset() + metrics.QueryNodeEntitiesSize.Reset() + var totalGrowingSize int64 growingSegments := node.manager.Segment.GetBy(segments.WithType(segments.SegmentTypeGrowing)) growingGroupByCollection := lo.GroupBy(growingSegments, func(seg segments.Segment) int64 { @@ -125,6 +126,7 @@ func getQuotaMetrics(node *QueryNode) (*metricsinfo.QueryNodeQuotaMetrics, error growingGroupByPartition := lo.GroupBy(growingSegments, func(seg segments.Segment) int64 { return seg.Partition() }) + for _, segs := range growingGroupByPartition { numEntities := lo.SumBy(segs, func(seg segments.Segment) int64 { return seg.RowNum() @@ -136,7 +138,6 @@ func getQuotaMetrics(node *QueryNode) (*metricsinfo.QueryNodeQuotaMetrics, error fmt.Sprint(segment.Collection()), fmt.Sprint(segment.Partition()), segments.SegmentTypeGrowing.String(), - fmt.Sprint(len(segment.Indexes())), ).Set(float64(numEntities)) } @@ -166,7 +167,6 @@ func getQuotaMetrics(node *QueryNode) (*metricsinfo.QueryNodeQuotaMetrics, error fmt.Sprint(segment.Collection()), fmt.Sprint(segment.Partition()), segments.SegmentTypeSealed.String(), - fmt.Sprint(len(segment.Indexes())), ).Set(float64(numEntities)) } diff --git a/internal/querynodev2/pkoracle/bloom_filter_set.go b/internal/querynodev2/pkoracle/bloom_filter_set.go index 608bb656efe23..88f5602ebfe98 100644 --- a/internal/querynodev2/pkoracle/bloom_filter_set.go +++ b/internal/querynodev2/pkoracle/bloom_filter_set.go @@ -17,15 +17,14 @@ package pkoracle import ( - "context" "sync" - bloom "github.com/bits-and-blooms/bloom/v3" "go.uber.org/zap" "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" "github.com/milvus-io/milvus/internal/storage" + "github.com/milvus-io/milvus/internal/util/bloomfilter" "github.com/milvus-io/milvus/pkg/common" "github.com/milvus-io/milvus/pkg/log" "github.com/milvus-io/milvus/pkg/util/paramtable" @@ -41,68 +40,25 @@ type BloomFilterSet struct { segType commonpb.SegmentState currentStat *storage.PkStatistics historyStats []*storage.PkStatistics - - kHashFunc uint } // MayPkExist returns whether any bloom filters returns positive. -func (s *BloomFilterSet) MayPkExist(pk storage.PrimaryKey) bool { +func (s *BloomFilterSet) MayPkExist(lc *storage.LocationsCache) bool { s.statsMutex.RLock() defer s.statsMutex.RUnlock() - if s.currentStat != nil && s.currentStat.PkExist(pk) { + if s.currentStat != nil && s.currentStat.TestLocationCache(lc) { return true } // for sealed, if one of the stats shows it exist, then we have to check it for _, historyStat := range s.historyStats { - if historyStat.PkExist(pk) { + if historyStat.TestLocationCache(lc) { return true } } return false } -func (s *BloomFilterSet) TestLocations(pk storage.PrimaryKey, locs []uint64) bool { - log := log.Ctx(context.TODO()).WithRateGroup("BloomFilterSet.TestLocations", 1, 60) - s.statsMutex.RLock() - defer s.statsMutex.RUnlock() - - if s.currentStat != nil { - k := s.currentStat.PkFilter.K() - if k > uint(len(locs)) { - log.RatedWarn(30, "locations num is less than hash func num, return false positive result", - zap.Int("locationNum", len(locs)), - zap.Uint("hashFuncNum", k), - zap.Int64("segmentID", s.segmentID)) - return true - } - - if s.currentStat.TestLocations(pk, locs[:k]) { - return true - } - } - - // for sealed, if one of the stats shows it exist, then we have to check it - for _, historyStat := range s.historyStats { - k := historyStat.PkFilter.K() - if k > uint(len(locs)) { - log.RatedWarn(30, "locations num is less than hash func num, return false positive result", - zap.Int("locationNum", len(locs)), - zap.Uint("hashFuncNum", k), - zap.Int64("segmentID", s.segmentID)) - return true - } - if historyStat.TestLocations(pk, locs[:k]) { - return true - } - } - return false -} - -func (s *BloomFilterSet) GetHashFuncNum() uint { - return s.kHashFunc -} - // ID implement candidate. func (s *BloomFilterSet) ID() int64 { return s.segmentID @@ -124,13 +80,12 @@ func (s *BloomFilterSet) UpdateBloomFilter(pks []storage.PrimaryKey) { defer s.statsMutex.Unlock() if s.currentStat == nil { - m, k := bloom.EstimateParameters(paramtable.Get().CommonCfg.BloomFilterSize.GetAsUint(), - paramtable.Get().CommonCfg.MaxBloomFalsePositive.GetAsFloat()) - if k > s.kHashFunc { - s.kHashFunc = k - } + bf := bloomfilter.NewBloomFilterWithType( + paramtable.Get().CommonCfg.BloomFilterSize.GetAsUint(), + paramtable.Get().CommonCfg.MaxBloomFalsePositive.GetAsFloat(), + paramtable.Get().CommonCfg.BloomFilterType.GetValue()) s.currentStat = &storage.PkStatistics{ - PkFilter: bloom.New(m, k), + PkFilter: bf, } } @@ -157,9 +112,6 @@ func (s *BloomFilterSet) AddHistoricalStats(stats *storage.PkStatistics) { s.statsMutex.Lock() defer s.statsMutex.Unlock() - if stats.PkFilter.K() > s.kHashFunc { - s.kHashFunc = stats.PkFilter.K() - } s.historyStats = append(s.historyStats, stats) } diff --git a/internal/querynodev2/pkoracle/bloom_filter_set_test.go b/internal/querynodev2/pkoracle/bloom_filter_set_test.go index 0384d3faa7ad7..9aaa8f0a08b1c 100644 --- a/internal/querynodev2/pkoracle/bloom_filter_set_test.go +++ b/internal/querynodev2/pkoracle/bloom_filter_set_test.go @@ -41,10 +41,9 @@ func TestInt64Pk(t *testing.T) { bfs.UpdateBloomFilter(pks) for i := 0; i < batchSize; i++ { - locations := storage.Locations(pks[i], bfs.GetHashFuncNum()) - ret1 := bfs.TestLocations(pks[i], locations) - ret2 := bfs.MayPkExist(pks[i]) - assert.Equal(t, ret1, ret2) + lc := storage.NewLocationsCache(pks[i]) + ret := bfs.MayPkExist(lc) + assert.True(t, ret) } assert.Equal(t, int64(1), bfs.ID()) @@ -66,10 +65,9 @@ func TestVarCharPk(t *testing.T) { bfs.UpdateBloomFilter(pks) for i := 0; i < batchSize; i++ { - locations := storage.Locations(pks[i], bfs.GetHashFuncNum()) - ret1 := bfs.TestLocations(pks[i], locations) - ret2 := bfs.MayPkExist(pks[i]) - assert.Equal(t, ret1, ret2) + lc := storage.NewLocationsCache(pks[i]) + ret := bfs.MayPkExist(lc) + assert.True(t, ret) } } @@ -91,29 +89,8 @@ func TestHistoricalStat(t *testing.T) { bfs.currentStat = nil for i := 0; i < batchSize; i++ { - locations := storage.Locations(pks[i], bfs.GetHashFuncNum()) - ret1 := bfs.TestLocations(pks[i], locations) - ret2 := bfs.MayPkExist(pks[i]) - assert.Equal(t, ret1, ret2) - } -} - -func TestHashFuncNum(t *testing.T) { - paramtable.Init() - batchSize := 100 - pks := make([]storage.PrimaryKey, 0) - for i := 0; i < batchSize; i++ { - pk := storage.NewVarCharPrimaryKey(strconv.FormatInt(int64(i), 10)) - pks = append(pks, pk) - } - - bfs := NewBloomFilterSet(1, 1, commonpb.SegmentState_Sealed) - bfs.UpdateBloomFilter(pks) - - for i := 0; i < batchSize; i++ { - // pass locations more then hash func num in bf - locations := storage.Locations(pks[i], bfs.GetHashFuncNum()+3) - ret1 := bfs.TestLocations(pks[i], locations) - assert.True(t, ret1) + lc := storage.NewLocationsCache(pks[i]) + ret := bfs.MayPkExist(lc) + assert.True(t, ret) } } diff --git a/internal/querynodev2/pkoracle/candidate.go b/internal/querynodev2/pkoracle/candidate.go index e5f051e5f1939..c115a5a0c133e 100644 --- a/internal/querynodev2/pkoracle/candidate.go +++ b/internal/querynodev2/pkoracle/candidate.go @@ -26,9 +26,7 @@ import ( // Candidate is the interface for pk oracle candidate. type Candidate interface { // MayPkExist checks whether primary key could exists in this candidate. - MayPkExist(pk storage.PrimaryKey) bool - TestLocations(pk storage.PrimaryKey, locs []uint64) bool - GetHashFuncNum() uint + MayPkExist(lc *storage.LocationsCache) bool ID() int64 Partition() int64 diff --git a/internal/querynodev2/pkoracle/key.go b/internal/querynodev2/pkoracle/key.go index 9845b5e065343..6600398798670 100644 --- a/internal/querynodev2/pkoracle/key.go +++ b/internal/querynodev2/pkoracle/key.go @@ -28,20 +28,11 @@ type candidateKey struct { } // MayPkExist checks whether primary key could exists in this candidate. -func (k candidateKey) MayPkExist(pk storage.PrimaryKey) bool { +func (k candidateKey) MayPkExist(lc *storage.LocationsCache) bool { // always return true to prevent miuse return true } -func (k candidateKey) TestLocations(pk storage.PrimaryKey, locs []uint64) bool { - // always return true to prevent miuse - return true -} - -func (k candidateKey) GetHashFuncNum() uint { - return 0 -} - // ID implements Candidate. func (k candidateKey) ID() int64 { return k.segmentID diff --git a/internal/querynodev2/pkoracle/pk_oracle.go b/internal/querynodev2/pkoracle/pk_oracle.go index 4d686503ec952..a700fe3066e05 100644 --- a/internal/querynodev2/pkoracle/pk_oracle.go +++ b/internal/querynodev2/pkoracle/pk_oracle.go @@ -19,10 +19,8 @@ package pkoracle import ( "fmt" - "sync" "github.com/milvus-io/milvus/internal/storage" - "github.com/milvus-io/milvus/pkg/log" "github.com/milvus-io/milvus/pkg/util/typeutil" ) @@ -43,30 +41,11 @@ var _ PkOracle = (*pkOracle)(nil) // pkOracle implementation. type pkOracle struct { candidates *typeutil.ConcurrentMap[string, candidateWithWorker] - - hashFuncNumMutex sync.RWMutex - maxHashFuncNum uint -} - -func (pko *pkOracle) GetMaxHashFuncNum() uint { - pko.hashFuncNumMutex.RLock() - defer pko.hashFuncNumMutex.RUnlock() - return pko.maxHashFuncNum -} - -func (pko *pkOracle) TryUpdateHashFuncNum(newValue uint) { - pko.hashFuncNumMutex.Lock() - defer pko.hashFuncNumMutex.Unlock() - if newValue > pko.maxHashFuncNum { - pko.maxHashFuncNum = newValue - } } // Get implements PkOracle. func (pko *pkOracle) Get(pk storage.PrimaryKey, filters ...CandidateFilter) ([]int64, error) { var result []int64 - var locations []uint64 - pko.candidates.Range(func(key string, candidate candidateWithWorker) bool { for _, filter := range filters { if !filter(candidate) { @@ -74,15 +53,8 @@ func (pko *pkOracle) Get(pk storage.PrimaryKey, filters ...CandidateFilter) ([]i } } - if locations == nil { - locations = storage.Locations(pk, pko.GetMaxHashFuncNum()) - if len(locations) == 0 { - log.Warn("pkOracle: no location found for pk") - return true - } - } - - if candidate.TestLocations(pk, locations) { + lc := storage.NewLocationsCache(pk) + if candidate.MayPkExist(lc) { result = append(result, candidate.ID()) } return true @@ -97,7 +69,6 @@ func (pko *pkOracle) candidateKey(candidate Candidate, workerID int64) string { // Register register candidate func (pko *pkOracle) Register(candidate Candidate, workerID int64) error { - pko.TryUpdateHashFuncNum(candidate.GetHashFuncNum()) pko.candidates.Insert(pko.candidateKey(candidate, workerID), candidateWithWorker{ Candidate: candidate, workerID: workerID, @@ -108,7 +79,6 @@ func (pko *pkOracle) Register(candidate Candidate, workerID int64) error { // Remove removes candidate from pko. func (pko *pkOracle) Remove(filters ...CandidateFilter) error { - max := uint(0) pko.candidates.Range(func(key string, candidate candidateWithWorker) bool { for _, filter := range filters { if !filter(candidate) { @@ -116,14 +86,9 @@ func (pko *pkOracle) Remove(filters ...CandidateFilter) error { } } pko.candidates.GetAndRemove(pko.candidateKey(candidate, candidate.workerID)) - if candidate.GetHashFuncNum() > max { - max = candidate.GetHashFuncNum() - } - return true }) - pko.TryUpdateHashFuncNum(max) return nil } diff --git a/internal/querynodev2/segments/bloom_filter_set.go b/internal/querynodev2/segments/bloom_filter_set.go deleted file mode 100644 index b07713961cf1b..0000000000000 --- a/internal/querynodev2/segments/bloom_filter_set.go +++ /dev/null @@ -1,101 +0,0 @@ -// Licensed to the LF AI & Data foundation under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package segments - -import ( - "sync" - - bloom "github.com/bits-and-blooms/bloom/v3" - "go.uber.org/zap" - - "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" - storage "github.com/milvus-io/milvus/internal/storage" - "github.com/milvus-io/milvus/pkg/common" - "github.com/milvus-io/milvus/pkg/log" - "github.com/milvus-io/milvus/pkg/util/paramtable" -) - -type bloomFilterSet struct { - statsMutex sync.RWMutex - currentStat *storage.PkStatistics - historyStats []*storage.PkStatistics -} - -func newBloomFilterSet() *bloomFilterSet { - return &bloomFilterSet{} -} - -// MayPkExist returns whether any bloom filters returns positive. -func (s *bloomFilterSet) MayPkExist(pk storage.PrimaryKey) bool { - s.statsMutex.RLock() - defer s.statsMutex.RUnlock() - if s.currentStat != nil && s.currentStat.PkExist(pk) { - return true - } - - // for sealed, if one of the stats shows it exist, then we have to check it - for _, historyStat := range s.historyStats { - if historyStat.PkExist(pk) { - return true - } - } - return false -} - -// UpdateBloomFilter updates currentStats with provided pks. -func (s *bloomFilterSet) UpdateBloomFilter(pks []storage.PrimaryKey) { - s.statsMutex.Lock() - defer s.statsMutex.Unlock() - - if s.currentStat == nil { - s.initCurrentStat() - } - - buf := make([]byte, 8) - for _, pk := range pks { - s.currentStat.UpdateMinMax(pk) - switch pk.Type() { - case schemapb.DataType_Int64: - int64Value := pk.(*storage.Int64PrimaryKey).Value - common.Endian.PutUint64(buf, uint64(int64Value)) - s.currentStat.PkFilter.Add(buf) - case schemapb.DataType_VarChar: - stringValue := pk.(*storage.VarCharPrimaryKey).Value - s.currentStat.PkFilter.AddString(stringValue) - default: - log.Error("failed to update bloomfilter", zap.Any("PK type", pk.Type())) - panic("failed to update bloomfilter") - } - } -} - -// AddHistoricalStats add loaded historical stats. -func (s *bloomFilterSet) AddHistoricalStats(stats *storage.PkStatistics) { - s.statsMutex.Lock() - defer s.statsMutex.Unlock() - - s.historyStats = append(s.historyStats, stats) -} - -// initCurrentStat initialize currentStats if nil. -// Note: invoker shall acquire statsMutex lock first. -func (s *bloomFilterSet) initCurrentStat() { - s.currentStat = &storage.PkStatistics{ - PkFilter: bloom.NewWithEstimates(paramtable.Get().CommonCfg.BloomFilterSize.GetAsUint(), - paramtable.Get().CommonCfg.MaxBloomFalsePositive.GetAsFloat()), - } -} diff --git a/internal/querynodev2/segments/bloom_filter_set_test.go b/internal/querynodev2/segments/bloom_filter_set_test.go deleted file mode 100644 index 9bf95a1ff9678..0000000000000 --- a/internal/querynodev2/segments/bloom_filter_set_test.go +++ /dev/null @@ -1,91 +0,0 @@ -// Licensed to the LF AI & Data foundation under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package segments - -import ( - "testing" - - "github.com/stretchr/testify/suite" - - "github.com/milvus-io/milvus/internal/storage" - "github.com/milvus-io/milvus/pkg/util/paramtable" -) - -type BloomFilterSetSuite struct { - suite.Suite - - intPks []int64 - stringPks []string - set *bloomFilterSet -} - -func (suite *BloomFilterSetSuite) SetupTest() { - suite.intPks = []int64{1, 2, 3} - suite.stringPks = []string{"1", "2", "3"} - paramtable.Init() - suite.set = newBloomFilterSet() -} - -func (suite *BloomFilterSetSuite) TestInt64PkBloomFilter() { - pks, err := storage.GenInt64PrimaryKeys(suite.intPks...) - suite.NoError(err) - - suite.set.UpdateBloomFilter(pks) - for _, pk := range pks { - exist := suite.set.MayPkExist(pk) - suite.True(exist) - } -} - -func (suite *BloomFilterSetSuite) TestStringPkBloomFilter() { - pks, err := storage.GenVarcharPrimaryKeys(suite.stringPks...) - suite.NoError(err) - - suite.set.UpdateBloomFilter(pks) - for _, pk := range pks { - exist := suite.set.MayPkExist(pk) - suite.True(exist) - } -} - -func (suite *BloomFilterSetSuite) TestHistoricalBloomFilter() { - pks, err := storage.GenVarcharPrimaryKeys(suite.stringPks...) - suite.NoError(err) - - suite.set.UpdateBloomFilter(pks) - for _, pk := range pks { - exist := suite.set.MayPkExist(pk) - suite.True(exist) - } - - old := suite.set.currentStat - suite.set.currentStat = nil - for _, pk := range pks { - exist := suite.set.MayPkExist(pk) - suite.False(exist) - } - - suite.set.AddHistoricalStats(old) - for _, pk := range pks { - exist := suite.set.MayPkExist(pk) - suite.True(exist) - } -} - -func TestBloomFilterSet(t *testing.T) { - suite.Run(t, &BloomFilterSetSuite{}) -} diff --git a/internal/querynodev2/segments/load_index_info.go b/internal/querynodev2/segments/load_index_info.go index c5c1572475c40..04632bed95f2d 100644 --- a/internal/querynodev2/segments/load_index_info.go +++ b/internal/querynodev2/segments/load_index_info.go @@ -29,11 +29,13 @@ import ( "runtime" "unsafe" + "github.com/golang/protobuf/proto" "github.com/pingcap/log" "go.uber.org/zap" "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" "github.com/milvus-io/milvus/internal/datacoord" + "github.com/milvus-io/milvus/internal/proto/cgopb" "github.com/milvus-io/milvus/internal/proto/querypb" "github.com/milvus-io/milvus/internal/querycoordv2/params" "github.com/milvus-io/milvus/pkg/common" @@ -245,3 +247,33 @@ func (li *LoadIndexInfo) appendIndexEngineVersion(ctx context.Context, indexEngi return HandleCStatus(ctx, &status, "AppendIndexEngineVersion failed") } + +func (li *LoadIndexInfo) finish(ctx context.Context, info *cgopb.LoadIndexInfo) error { + marshaled, err := proto.Marshal(info) + if err != nil { + return err + } + + var status C.CStatus + _, _ = GetDynamicPool().Submit(func() (any, error) { + status = C.FinishLoadIndexInfo(li.cLoadIndexInfo, (*C.uint8_t)(unsafe.Pointer(&marshaled[0])), (C.uint64_t)(len(marshaled))) + return nil, nil + }).Await() + + if err := HandleCStatus(ctx, &status, "FinishLoadIndexInfo failed"); err != nil { + return err + } + + _, _ = GetLoadPool().Submit(func() (any, error) { + if paramtable.Get().CommonCfg.EnableStorageV2.GetAsBool() { + status = C.AppendIndexV3(li.cLoadIndexInfo) + } else { + traceCtx := ParseCTraceContext(ctx) + status = C.AppendIndexV2(traceCtx.ctx, li.cLoadIndexInfo) + runtime.KeepAlive(traceCtx) + } + return nil, nil + }).Await() + + return HandleCStatus(ctx, &status, "AppendIndex failed") +} diff --git a/internal/querynodev2/segments/manager.go b/internal/querynodev2/segments/manager.go index c0b890257f903..4a8cd52baed55 100644 --- a/internal/querynodev2/segments/manager.go +++ b/internal/querynodev2/segments/manager.go @@ -36,6 +36,7 @@ import ( "github.com/milvus-io/milvus/internal/proto/datapb" "github.com/milvus-io/milvus/internal/proto/querypb" "github.com/milvus-io/milvus/internal/querynodev2/segments/metricsutil" + "github.com/milvus-io/milvus/pkg/common" "github.com/milvus-io/milvus/pkg/eventlog" "github.com/milvus-io/milvus/pkg/log" "github.com/milvus-io/milvus/pkg/metrics" @@ -723,11 +724,15 @@ func (mgr *segmentManager) updateMetric() { collections, partiations := make(typeutil.Set[int64]), make(typeutil.Set[int64]) for _, seg := range mgr.growingSegments { collections.Insert(seg.Collection()) - partiations.Insert(seg.Partition()) + if seg.Partition() != common.AllPartitionsID { + partiations.Insert(seg.Partition()) + } } for _, seg := range mgr.sealedSegments { collections.Insert(seg.Collection()) - partiations.Insert(seg.Partition()) + if seg.Partition() != common.AllPartitionsID { + partiations.Insert(seg.Partition()) + } } metrics.QueryNodeNumCollections.WithLabelValues(fmt.Sprint(paramtable.GetNodeID())).Set(float64(collections.Len())) metrics.QueryNodeNumPartitions.WithLabelValues(fmt.Sprint(paramtable.GetNodeID())).Set(float64(partiations.Len())) diff --git a/internal/querynodev2/segments/mock_segment.go b/internal/querynodev2/segments/mock_segment.go index 3121d0ca452c6..e31d1b5181ff7 100644 --- a/internal/querynodev2/segments/mock_segment.go +++ b/internal/querynodev2/segments/mock_segment.go @@ -246,47 +246,6 @@ func (_c *MockSegment_ExistIndex_Call) RunAndReturn(run func(int64) bool) *MockS return _c } -// GetHashFuncNum provides a mock function with given fields: -func (_m *MockSegment) GetHashFuncNum() uint { - ret := _m.Called() - - var r0 uint - if rf, ok := ret.Get(0).(func() uint); ok { - r0 = rf() - } else { - r0 = ret.Get(0).(uint) - } - - return r0 -} - -// MockSegment_GetHashFuncNum_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'GetHashFuncNum' -type MockSegment_GetHashFuncNum_Call struct { - *mock.Call -} - -// GetHashFuncNum is a helper method to define mock.On call -func (_e *MockSegment_Expecter) GetHashFuncNum() *MockSegment_GetHashFuncNum_Call { - return &MockSegment_GetHashFuncNum_Call{Call: _e.mock.On("GetHashFuncNum")} -} - -func (_c *MockSegment_GetHashFuncNum_Call) Run(run func()) *MockSegment_GetHashFuncNum_Call { - _c.Call.Run(func(args mock.Arguments) { - run() - }) - return _c -} - -func (_c *MockSegment_GetHashFuncNum_Call) Return(_a0 uint) *MockSegment_GetHashFuncNum_Call { - _c.Call.Return(_a0) - return _c -} - -func (_c *MockSegment_GetHashFuncNum_Call) RunAndReturn(run func() uint) *MockSegment_GetHashFuncNum_Call { - _c.Call.Return(run) - return _c -} - // GetIndex provides a mock function with given fields: fieldID func (_m *MockSegment) GetIndex(fieldID int64) *IndexedFieldInfo { ret := _m.Called(fieldID) @@ -752,13 +711,13 @@ func (_c *MockSegment_LoadInfo_Call) RunAndReturn(run func() *querypb.SegmentLoa return _c } -// MayPkExist provides a mock function with given fields: pk -func (_m *MockSegment) MayPkExist(pk storage.PrimaryKey) bool { - ret := _m.Called(pk) +// MayPkExist provides a mock function with given fields: lc +func (_m *MockSegment) MayPkExist(lc *storage.LocationsCache) bool { + ret := _m.Called(lc) var r0 bool - if rf, ok := ret.Get(0).(func(storage.PrimaryKey) bool); ok { - r0 = rf(pk) + if rf, ok := ret.Get(0).(func(*storage.LocationsCache) bool); ok { + r0 = rf(lc) } else { r0 = ret.Get(0).(bool) } @@ -772,14 +731,14 @@ type MockSegment_MayPkExist_Call struct { } // MayPkExist is a helper method to define mock.On call -// - pk storage.PrimaryKey -func (_e *MockSegment_Expecter) MayPkExist(pk interface{}) *MockSegment_MayPkExist_Call { - return &MockSegment_MayPkExist_Call{Call: _e.mock.On("MayPkExist", pk)} +// - lc *storage.LocationsCache +func (_e *MockSegment_Expecter) MayPkExist(lc interface{}) *MockSegment_MayPkExist_Call { + return &MockSegment_MayPkExist_Call{Call: _e.mock.On("MayPkExist", lc)} } -func (_c *MockSegment_MayPkExist_Call) Run(run func(pk storage.PrimaryKey)) *MockSegment_MayPkExist_Call { +func (_c *MockSegment_MayPkExist_Call) Run(run func(lc *storage.LocationsCache)) *MockSegment_MayPkExist_Call { _c.Call.Run(func(args mock.Arguments) { - run(args[0].(storage.PrimaryKey)) + run(args[0].(*storage.LocationsCache)) }) return _c } @@ -789,7 +748,7 @@ func (_c *MockSegment_MayPkExist_Call) Return(_a0 bool) *MockSegment_MayPkExist_ return _c } -func (_c *MockSegment_MayPkExist_Call) RunAndReturn(run func(storage.PrimaryKey) bool) *MockSegment_MayPkExist_Call { +func (_c *MockSegment_MayPkExist_Call) RunAndReturn(run func(*storage.LocationsCache) bool) *MockSegment_MayPkExist_Call { _c.Call.Return(run) return _c } @@ -1453,49 +1412,6 @@ func (_c *MockSegment_StartPosition_Call) RunAndReturn(run func() *msgpb.MsgPosi return _c } -// TestLocations provides a mock function with given fields: pk, loc -func (_m *MockSegment) TestLocations(pk storage.PrimaryKey, loc []uint64) bool { - ret := _m.Called(pk, loc) - - var r0 bool - if rf, ok := ret.Get(0).(func(storage.PrimaryKey, []uint64) bool); ok { - r0 = rf(pk, loc) - } else { - r0 = ret.Get(0).(bool) - } - - return r0 -} - -// MockSegment_TestLocations_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'TestLocations' -type MockSegment_TestLocations_Call struct { - *mock.Call -} - -// TestLocations is a helper method to define mock.On call -// - pk storage.PrimaryKey -// - loc []uint64 -func (_e *MockSegment_Expecter) TestLocations(pk interface{}, loc interface{}) *MockSegment_TestLocations_Call { - return &MockSegment_TestLocations_Call{Call: _e.mock.On("TestLocations", pk, loc)} -} - -func (_c *MockSegment_TestLocations_Call) Run(run func(pk storage.PrimaryKey, loc []uint64)) *MockSegment_TestLocations_Call { - _c.Call.Run(func(args mock.Arguments) { - run(args[0].(storage.PrimaryKey), args[1].([]uint64)) - }) - return _c -} - -func (_c *MockSegment_TestLocations_Call) Return(_a0 bool) *MockSegment_TestLocations_Call { - _c.Call.Return(_a0) - return _c -} - -func (_c *MockSegment_TestLocations_Call) RunAndReturn(run func(storage.PrimaryKey, []uint64) bool) *MockSegment_TestLocations_Call { - _c.Call.Return(run) - return _c -} - // Type provides a mock function with given fields: func (_m *MockSegment) Type() commonpb.SegmentState { ret := _m.Called() diff --git a/internal/querynodev2/segments/pool.go b/internal/querynodev2/segments/pool.go index bbf21b91a746e..5a0685082e4e1 100644 --- a/internal/querynodev2/segments/pool.go +++ b/internal/querynodev2/segments/pool.go @@ -37,12 +37,14 @@ var ( // and other operations (insert/delete/statistics/etc.) // since in concurrent situation, there operation may block each other in high payload - sqp atomic.Pointer[conc.Pool[any]] - sqOnce sync.Once - dp atomic.Pointer[conc.Pool[any]] - dynOnce sync.Once - loadPool atomic.Pointer[conc.Pool[any]] - loadOnce sync.Once + sqp atomic.Pointer[conc.Pool[any]] + sqOnce sync.Once + dp atomic.Pointer[conc.Pool[any]] + dynOnce sync.Once + loadPool atomic.Pointer[conc.Pool[any]] + loadOnce sync.Once + warmupPool atomic.Pointer[conc.Pool[any]] + warmupOnce sync.Once ) // initSQPool initialize @@ -60,6 +62,7 @@ func initSQPool() { pt.Watch(pt.QueryNodeCfg.MaxReadConcurrency.Key, config.NewHandler("qn.sqpool.maxconc", ResizeSQPool)) pt.Watch(pt.QueryNodeCfg.CGOPoolSizeRatio.Key, config.NewHandler("qn.sqpool.cgopoolratio", ResizeSQPool)) + log.Info("init SQPool done", zap.Int("size", initPoolSize)) }) } @@ -73,6 +76,7 @@ func initDynamicPool() { ) dp.Store(pool) + log.Info("init dynamicPool done", zap.Int("size", hardware.GetCPUNum())) }) } @@ -80,9 +84,6 @@ func initLoadPool() { loadOnce.Do(func() { pt := paramtable.Get() poolSize := hardware.GetCPUNum() * pt.CommonCfg.MiddlePriorityThreadCoreCoefficient.GetAsInt() - if poolSize > 16 { - poolSize = 16 - } pool := conc.NewPool[any]( poolSize, conc.WithPreAlloc(false), @@ -93,6 +94,24 @@ func initLoadPool() { loadPool.Store(pool) pt.Watch(pt.CommonCfg.MiddlePriorityThreadCoreCoefficient.Key, config.NewHandler("qn.loadpool.middlepriority", ResizeLoadPool)) + log.Info("init loadPool done", zap.Int("size", poolSize)) + }) +} + +func initWarmupPool() { + warmupOnce.Do(func() { + pt := paramtable.Get() + poolSize := hardware.GetCPUNum() * pt.CommonCfg.LowPriorityThreadCoreCoefficient.GetAsInt() + pool := conc.NewPool[any]( + poolSize, + conc.WithPreAlloc(false), + conc.WithDisablePurge(false), + conc.WithPreHandler(runtime.LockOSThread), // lock os thread for cgo thread disposal + conc.WithNonBlocking(true), // make warming up non blocking + ) + + warmupPool.Store(pool) + pt.Watch(pt.CommonCfg.LowPriorityThreadCoreCoefficient.Key, config.NewHandler("qn.warmpool.lowpriority", ResizeWarmupPool)) }) } @@ -113,6 +132,11 @@ func GetLoadPool() *conc.Pool[any] { return loadPool.Load() } +func GetWarmupPool() *conc.Pool[any] { + initWarmupPool() + return warmupPool.Load() +} + func ResizeSQPool(evt *config.Event) { if evt.HasUpdated { pt := paramtable.Get() @@ -131,6 +155,14 @@ func ResizeLoadPool(evt *config.Event) { } } +func ResizeWarmupPool(evt *config.Event) { + if evt.HasUpdated { + pt := paramtable.Get() + newSize := hardware.GetCPUNum() * pt.CommonCfg.LowPriorityThreadCoreCoefficient.GetAsInt() + resizePool(GetWarmupPool(), newSize, "WarmupPool") + } +} + func resizePool(pool *conc.Pool[any], newSize int, tag string) { log := log.Ctx(context.Background()). With( diff --git a/internal/querynodev2/segments/pool_test.go b/internal/querynodev2/segments/pool_test.go index 6c817bdb1eb9a..868bce4186236 100644 --- a/internal/querynodev2/segments/pool_test.go +++ b/internal/querynodev2/segments/pool_test.go @@ -82,6 +82,27 @@ func TestResizePools(t *testing.T) { assert.Equal(t, expectedCap, GetLoadPool().Cap()) }) + t.Run("WarmupPool", func(t *testing.T) { + expectedCap := hardware.GetCPUNum() * pt.CommonCfg.LowPriorityThreadCoreCoefficient.GetAsInt() + + ResizeWarmupPool(&config.Event{ + HasUpdated: true, + }) + assert.Equal(t, expectedCap, GetWarmupPool().Cap()) + + pt.Save(pt.CommonCfg.LowPriorityThreadCoreCoefficient.Key, strconv.FormatFloat(pt.CommonCfg.LowPriorityThreadCoreCoefficient.GetAsFloat()*2, 'f', 10, 64)) + ResizeWarmupPool(&config.Event{ + HasUpdated: true, + }) + assert.Equal(t, expectedCap, GetWarmupPool().Cap()) + + pt.Save(pt.CommonCfg.LowPriorityThreadCoreCoefficient.Key, "0") + ResizeWarmupPool(&config.Event{ + HasUpdated: true, + }) + assert.Equal(t, expectedCap, GetWarmupPool().Cap()) + }) + t.Run("error_pool", func(*testing.T) { pool := conc.NewDefaultPool[any]() c := pool.Cap() diff --git a/internal/querynodev2/segments/reduce_test.go b/internal/querynodev2/segments/reduce_test.go index 21defdd64c122..9693dc2f717ad 100644 --- a/internal/querynodev2/segments/reduce_test.go +++ b/internal/querynodev2/segments/reduce_test.go @@ -101,7 +101,7 @@ func (suite *ReduceSuite) SetupTest() { ) suite.Require().NoError(err) for _, binlog := range binlogs { - err = suite.segment.(*LocalSegment).LoadFieldData(ctx, binlog.FieldID, int64(msgLength), binlog) + err = suite.segment.(*LocalSegment).LoadFieldData(ctx, binlog.FieldID, int64(msgLength), binlog, false) suite.Require().NoError(err) } } diff --git a/internal/querynodev2/segments/result.go b/internal/querynodev2/segments/result.go index 34a001e6e6a84..0ac61d81c9bb7 100644 --- a/internal/querynodev2/segments/result.go +++ b/internal/querynodev2/segments/result.go @@ -401,6 +401,7 @@ func MergeInternalRetrieveResult(ctx context.Context, retrieveResults []*interna validRetrieveResults := []*internalpb.RetrieveResults{} relatedDataSize := int64(0) + hasMoreResult := false for _, r := range retrieveResults { ret.AllRetrieveCount += r.GetAllRetrieveCount() relatedDataSize += r.GetCostAggregation().GetTotalRelatedDataSize() @@ -410,7 +411,9 @@ func MergeInternalRetrieveResult(ctx context.Context, retrieveResults []*interna } validRetrieveResults = append(validRetrieveResults, r) loopEnd += size + hasMoreResult = hasMoreResult || r.GetHasMoreResult() } + ret.HasMoreResult = hasMoreResult if len(validRetrieveResults) == 0 { return ret, nil @@ -427,7 +430,7 @@ func MergeInternalRetrieveResult(ctx context.Context, retrieveResults []*interna var retSize int64 maxOutputSize := paramtable.Get().QuotaConfig.MaxOutputSize.GetAsInt64() for j := 0; j < loopEnd; { - sel, drainOneResult := typeutil.SelectMinPK(param.limit, validRetrieveResults, cursors) + sel, drainOneResult := typeutil.SelectMinPK(validRetrieveResults, cursors) if sel == -1 || (param.mergeStopForBest && drainOneResult) { break } @@ -515,6 +518,7 @@ func MergeSegcoreRetrieveResults(ctx context.Context, retrieveResults []*segcore validSegments := make([]Segment, 0, len(segments)) selectedOffsets := make([][]int64, 0, len(retrieveResults)) selectedIndexes := make([][]int64, 0, len(retrieveResults)) + hasMoreResult := false for i, r := range retrieveResults { size := typeutil.GetSizeOfIDs(r.GetIds()) ret.AllRetrieveCount += r.GetAllRetrieveCount() @@ -529,7 +533,9 @@ func MergeSegcoreRetrieveResults(ctx context.Context, retrieveResults []*segcore selectedOffsets = append(selectedOffsets, make([]int64, 0, len(r.GetOffset()))) selectedIndexes = append(selectedIndexes, make([]int64, 0, len(r.GetOffset()))) loopEnd += size + hasMoreResult = r.GetHasMoreResult() || hasMoreResult } + ret.HasMoreResult = hasMoreResult if len(validRetrieveResults) == 0 { return ret, nil @@ -549,7 +555,7 @@ func MergeSegcoreRetrieveResults(ctx context.Context, retrieveResults []*segcore var retSize int64 maxOutputSize := paramtable.Get().QuotaConfig.MaxOutputSize.GetAsInt64() for j := 0; j < loopEnd && (limit == -1 || availableCount < limit); j++ { - sel, drainOneResult := typeutil.SelectMinPK(param.limit, validRetrieveResults, cursors) + sel, drainOneResult := typeutil.SelectMinPK(validRetrieveResults, cursors) if sel == -1 || (param.mergeStopForBest && drainOneResult) { break } diff --git a/internal/querynodev2/segments/result_test.go b/internal/querynodev2/segments/result_test.go index 79e75007d670a..6fcaf4196584a 100644 --- a/internal/querynodev2/segments/result_test.go +++ b/internal/querynodev2/segments/result_test.go @@ -513,29 +513,46 @@ func (suite *ResultSuite) TestResult_MergeStopForBestResult() { FieldsData: fieldDataArray2, } suite.Run("merge stop finite limited", func() { + result1.HasMoreResult = true + result2.HasMoreResult = true result, err := MergeSegcoreRetrieveResultsV1(context.Background(), []*segcorepb.RetrieveResults{result1, result2}, NewMergeParam(3, make([]int64, 0), nil, true)) suite.NoError(err) suite.Equal(2, len(result.GetFieldsData())) + // has more result both, stop reduce when draining one result + // here, we can only get best result from 0 to 4 without 6, because result1 has more results suite.Equal([]int64{0, 1, 2, 3, 4}, result.GetIds().GetIntId().GetData()) - // here, we can only get best result from 0 to 4 without 6, because we can never know whether there is - // one potential 5 in following result1 suite.Equal([]int64{11, 22, 11, 22, 33}, result.GetFieldsData()[0].GetScalars().GetLongData().Data) suite.InDeltaSlice([]float32{1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 11, 22, 33, 44}, result.FieldsData[1].GetVectors().GetFloatVector().Data, 10e-10) }) suite.Run("merge stop unlimited", func() { + result1.HasMoreResult = false + result2.HasMoreResult = false result, err := MergeSegcoreRetrieveResultsV1(context.Background(), []*segcorepb.RetrieveResults{result1, result2}, NewMergeParam(typeutil.Unlimited, make([]int64, 0), nil, true)) suite.NoError(err) suite.Equal(2, len(result.GetFieldsData())) + // as result1 and result2 don't have better results neither + // we can reduce all available result into the reduced result suite.Equal([]int64{0, 1, 2, 3, 4, 6}, result.GetIds().GetIntId().GetData()) - // here, we can only get best result from 0 to 4 without 6, because we can never know whether there is - // one potential 5 in following result1 suite.Equal([]int64{11, 22, 11, 22, 33, 33}, result.GetFieldsData()[0].GetScalars().GetLongData().Data) suite.InDeltaSlice([]float32{1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 11, 22, 33, 44, 11, 22, 33, 44}, result.FieldsData[1].GetVectors().GetFloatVector().Data, 10e-10) }) + suite.Run("merge stop one limited", func() { + result1.HasMoreResult = true + result2.HasMoreResult = false + result, err := MergeSegcoreRetrieveResultsV1(context.Background(), []*segcorepb.RetrieveResults{result1, result2}, + NewMergeParam(typeutil.Unlimited, make([]int64, 0), nil, true)) + suite.NoError(err) + suite.Equal(2, len(result.GetFieldsData())) + // as result1 may have better results, stop reducing when draining it + suite.Equal([]int64{0, 1, 2, 3, 4}, result.GetIds().GetIntId().GetData()) + suite.Equal([]int64{11, 22, 11, 22, 33}, result.GetFieldsData()[0].GetScalars().GetLongData().Data) + suite.InDeltaSlice([]float32{1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 11, 22, 33, 44}, + result.FieldsData[1].GetVectors().GetFloatVector().Data, 10e-10) + }) }) suite.Run("test stop internal merge for best", func() { @@ -559,6 +576,8 @@ func (suite *ResultSuite) TestResult_MergeStopForBestResult() { }, FieldsData: fieldDataArray2, } + result1.HasMoreResult = true + result2.HasMoreResult = false result, err := MergeInternalRetrieveResult(context.Background(), []*internalpb.RetrieveResults{result1, result2}, NewMergeParam(3, make([]int64, 0), nil, true)) suite.NoError(err) @@ -590,11 +609,24 @@ func (suite *ResultSuite) TestResult_MergeStopForBestResult() { }, FieldsData: fieldDataArray2, } - result, err := MergeInternalRetrieveResult(context.Background(), []*internalpb.RetrieveResults{result1, result2}, - NewMergeParam(3, make([]int64, 0), nil, true)) - suite.NoError(err) - suite.Equal(2, len(result.GetFieldsData())) - suite.Equal([]int64{0, 2, 4, 7}, result.GetIds().GetIntId().GetData()) + suite.Run("test drain one result without more results", func() { + result1.HasMoreResult = false + result2.HasMoreResult = false + result, err := MergeInternalRetrieveResult(context.Background(), []*internalpb.RetrieveResults{result1, result2}, + NewMergeParam(3, make([]int64, 0), nil, true)) + suite.NoError(err) + suite.Equal(2, len(result.GetFieldsData())) + suite.Equal([]int64{0, 2, 4, 7}, result.GetIds().GetIntId().GetData()) + }) + suite.Run("test drain one result with more results", func() { + result1.HasMoreResult = false + result2.HasMoreResult = true + result, err := MergeInternalRetrieveResult(context.Background(), []*internalpb.RetrieveResults{result1, result2}, + NewMergeParam(3, make([]int64, 0), nil, true)) + suite.NoError(err) + suite.Equal(2, len(result.GetFieldsData())) + suite.Equal([]int64{0, 2}, result.GetIds().GetIntId().GetData()) + }) }) } diff --git a/internal/querynodev2/segments/retrieve_test.go b/internal/querynodev2/segments/retrieve_test.go index aa2562b1e866c..ea58f2802b92e 100644 --- a/internal/querynodev2/segments/retrieve_test.go +++ b/internal/querynodev2/segments/retrieve_test.go @@ -109,7 +109,7 @@ func (suite *RetrieveSuite) SetupTest() { ) suite.Require().NoError(err) for _, binlog := range binlogs { - err = suite.sealed.(*LocalSegment).LoadFieldData(ctx, binlog.FieldID, int64(msgLength), binlog) + err = suite.sealed.(*LocalSegment).LoadFieldData(ctx, binlog.FieldID, int64(msgLength), binlog, false) suite.Require().NoError(err) } diff --git a/internal/querynodev2/segments/search_test.go b/internal/querynodev2/segments/search_test.go index 81475b14c27db..415ad28ccee98 100644 --- a/internal/querynodev2/segments/search_test.go +++ b/internal/querynodev2/segments/search_test.go @@ -100,7 +100,7 @@ func (suite *SearchSuite) SetupTest() { ) suite.Require().NoError(err) for _, binlog := range binlogs { - err = suite.sealed.(*LocalSegment).LoadFieldData(ctx, binlog.FieldID, int64(msgLength), binlog) + err = suite.sealed.(*LocalSegment).LoadFieldData(ctx, binlog.FieldID, int64(msgLength), binlog, false) suite.Require().NoError(err) } diff --git a/internal/querynodev2/segments/segment.go b/internal/querynodev2/segments/segment.go index c9a0df822efad..b4291850bca59 100644 --- a/internal/querynodev2/segments/segment.go +++ b/internal/querynodev2/segments/segment.go @@ -30,7 +30,6 @@ import ( "fmt" "io" "runtime" - "strconv" "strings" "unsafe" @@ -46,6 +45,7 @@ import ( "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" milvus_storage "github.com/milvus-io/milvus-storage/go/storage" "github.com/milvus-io/milvus-storage/go/storage/options" + "github.com/milvus-io/milvus/internal/proto/cgopb" "github.com/milvus-io/milvus/internal/proto/datapb" "github.com/milvus-io/milvus/internal/proto/querypb" "github.com/milvus-io/milvus/internal/proto/segcorepb" @@ -57,6 +57,9 @@ import ( "github.com/milvus-io/milvus/pkg/common" "github.com/milvus-io/milvus/pkg/log" "github.com/milvus-io/milvus/pkg/metrics" + "github.com/milvus-io/milvus/pkg/util/funcutil" + "github.com/milvus-io/milvus/pkg/util/indexparamcheck" + "github.com/milvus-io/milvus/pkg/util/indexparams" "github.com/milvus-io/milvus/pkg/util/merr" "github.com/milvus-io/milvus/pkg/util/metautil" "github.com/milvus-io/milvus/pkg/util/paramtable" @@ -183,16 +186,8 @@ func (s *baseSegment) UpdateBloomFilter(pks []storage.PrimaryKey) { // MayPkExist returns true if the given PK exists in the PK range and being positive through the bloom filter, // false otherwise, // may returns true even the PK doesn't exist actually -func (s *baseSegment) MayPkExist(pk storage.PrimaryKey) bool { - return s.bloomFilterSet.MayPkExist(pk) -} - -func (s *baseSegment) TestLocations(pk storage.PrimaryKey, loc []uint64) bool { - return s.bloomFilterSet.TestLocations(pk, loc) -} - -func (s *baseSegment) GetHashFuncNum() uint { - return s.bloomFilterSet.GetHashFuncNum() +func (s *baseSegment) MayPkExist(lc *storage.LocationsCache) bool { + return s.bloomFilterSet.MayPkExist(lc) } // ResourceUsageEstimate returns the estimated resource usage of the segment. @@ -440,15 +435,6 @@ func (s *LocalSegment) initializeSegment() error { // Update the insert count when initialize the segment and update the metrics. s.insertCount.Store(loadInfo.GetNumOfRows()) - metrics.QueryNodeNumEntities.WithLabelValues( - s.DatabaseName(), - fmt.Sprint(paramtable.GetNodeID()), - fmt.Sprint(s.Collection()), - fmt.Sprint(s.Partition()), - s.Type().String(), - strconv.FormatInt(int64(len(s.Indexes())), 10), - ).Add(float64(loadInfo.GetNumOfRows())) - return nil } @@ -808,15 +794,6 @@ func (s *LocalSegment) Insert(ctx context.Context, rowIDs []int64, timestamps [] } s.insertCount.Add(int64(numOfRow)) - metrics.QueryNodeNumEntities.WithLabelValues( - s.DatabaseName(), - fmt.Sprint(paramtable.GetNodeID()), - fmt.Sprint(s.Collection()), - fmt.Sprint(s.Partition()), - s.Type().String(), - strconv.FormatInt(int64(len(s.Indexes())), 10), - ).Add(float64(numOfRow)) - s.rowNum.Store(-1) s.memSize.Store(-1) return nil @@ -967,7 +944,7 @@ func (s *LocalSegment) LoadMultiFieldData(ctx context.Context) error { return nil } -func (s *LocalSegment) LoadFieldData(ctx context.Context, fieldID int64, rowCount int64, field *datapb.FieldBinlog) error { +func (s *LocalSegment) LoadFieldData(ctx context.Context, fieldID int64, rowCount int64, field *datapb.FieldBinlog, useMmap bool) error { if !s.ptrLock.RLockIf(state.IsNotReleased) { return merr.WrapErrSegmentNotLoaded(s.ID(), "segment released") } @@ -1006,7 +983,7 @@ func (s *LocalSegment) LoadFieldData(ctx context.Context, fieldID int64, rowCoun } collection := s.collection - mmapEnabled := common.IsFieldMmapEnabled(collection.Schema(), fieldID) || + mmapEnabled := useMmap || common.IsFieldMmapEnabled(collection.Schema(), fieldID) || (!common.FieldHasMmapKey(collection.Schema(), fieldID) && params.Params.QueryNodeCfg.MmapEnabled.GetAsBool()) loadFieldDataInfo.appendMMapDirPath(paramtable.Get().QueryNodeCfg.MmapDirPath.GetValue()) loadFieldDataInfo.enableMmap(fieldID, mmapEnabled) @@ -1285,18 +1262,58 @@ func (s *LocalSegment) LoadIndex(ctx context.Context, indexInfo *querypb.FieldIn return err } defer deleteLoadIndexInfo(loadIndexInfo) + + schema, err := typeutil.CreateSchemaHelper(s.GetCollection().Schema()) + if err != nil { + return err + } + fieldSchema, err := schema.GetFieldFromID(indexInfo.GetFieldID()) + if err != nil { + return err + } + + indexParams := funcutil.KeyValuePair2Map(indexInfo.IndexParams) + // as Knowhere reports error if encounter an unknown param, we need to delete it + delete(indexParams, common.MmapEnabledKey) + + // some build params also exist in indexParams, which are useless during loading process + if indexParams["index_type"] == indexparamcheck.IndexDISKANN { + if err := indexparams.SetDiskIndexLoadParams(paramtable.Get(), indexParams, indexInfo.GetNumRows()); err != nil { + return err + } + } + + if err := indexparams.AppendPrepareLoadParams(paramtable.Get(), indexParams); err != nil { + return err + } + + indexInfoProto := &cgopb.LoadIndexInfo{ + CollectionID: s.Collection(), + PartitionID: s.Partition(), + SegmentID: s.ID(), + Field: fieldSchema, + EnableMmap: isIndexMmapEnable(indexInfo), + MmapDirPath: paramtable.Get().QueryNodeCfg.MmapDirPath.GetValue(), + IndexID: indexInfo.GetIndexID(), + IndexBuildID: indexInfo.GetBuildID(), + IndexVersion: indexInfo.GetIndexVersion(), + IndexParams: indexParams, + IndexFiles: indexInfo.GetIndexFilePaths(), + IndexEngineVersion: indexInfo.GetCurrentIndexVersion(), + IndexStoreVersion: indexInfo.GetIndexStoreVersion(), + } + if paramtable.Get().CommonCfg.EnableStorageV2.GetAsBool() { uri, err := typeutil_internal.GetStorageURI(paramtable.Get().CommonCfg.StorageScheme.GetValue(), paramtable.Get().CommonCfg.StoragePathPrefix.GetValue(), s.ID()) if err != nil { return err } - loadIndexInfo.appendStorageInfo(uri, indexInfo.IndexStoreVersion) + indexInfoProto.Uri = uri } newLoadIndexInfoSpan := tr.RecordSpan() // 2. - err = loadIndexInfo.appendLoadIndexInfo(ctx, indexInfo, s.Collection(), s.Partition(), s.ID(), fieldType) - if err != nil { + if err := loadIndexInfo.finish(ctx, indexInfoProto); err != nil { if loadIndexInfo.cleanLocalData(ctx) != nil { log.Warn("failed to clean cached data on disk after append index failed", zap.Int64("buildID", indexInfo.BuildID), @@ -1327,7 +1344,7 @@ func (s *LocalSegment) LoadIndex(ctx context.Context, indexInfo *querypb.FieldIn zap.Duration("newLoadIndexInfoSpan", newLoadIndexInfoSpan), zap.Duration("appendLoadIndexInfoSpan", appendLoadIndexInfoSpan), zap.Duration("updateIndexInfoSpan", updateIndexInfoSpan), - zap.Duration("updateIndexInfoSpan", warmupChunkCacheSpan), + zap.Duration("warmupChunkCacheSpan", warmupChunkCacheSpan), ) return nil } @@ -1386,7 +1403,7 @@ func (s *LocalSegment) WarmupChunkCache(ctx context.Context, fieldID int64) { warmingUp := strings.ToLower(paramtable.Get().QueryNodeCfg.ChunkCacheWarmingUp.GetValue()) switch warmingUp { case "sync": - GetLoadPool().Submit(func() (any, error) { + GetWarmupPool().Submit(func() (any, error) { cFieldID := C.int64_t(fieldID) status = C.WarmupChunkCache(s.ptr, cFieldID) if err := HandleCStatus(ctx, &status, "warming up chunk cache failed"); err != nil { @@ -1397,7 +1414,7 @@ func (s *LocalSegment) WarmupChunkCache(ctx context.Context, fieldID int64) { return nil, nil }).Await() case "async": - GetLoadPool().Submit(func() (any, error) { + GetWarmupPool().Submit(func() (any, error) { if !s.ptrLock.RLockIf(state.IsNotReleased) { return nil, nil } diff --git a/internal/querynodev2/segments/segment_interface.go b/internal/querynodev2/segments/segment_interface.go index 9ed9d4df90e21..f439d0f818e62 100644 --- a/internal/querynodev2/segments/segment_interface.go +++ b/internal/querynodev2/segments/segment_interface.go @@ -83,9 +83,7 @@ type Segment interface { // Bloom filter related UpdateBloomFilter(pks []storage.PrimaryKey) - MayPkExist(pk storage.PrimaryKey) bool - TestLocations(pk storage.PrimaryKey, loc []uint64) bool - GetHashFuncNum() uint + MayPkExist(lc *storage.LocationsCache) bool // Read operations Search(ctx context.Context, searchReq *SearchRequest) (*SearchResult, error) diff --git a/internal/querynodev2/segments/segment_loader.go b/internal/querynodev2/segments/segment_loader.go index 7c6f83dda7938..2db43f4ede218 100644 --- a/internal/querynodev2/segments/segment_loader.go +++ b/internal/querynodev2/segments/segment_loader.go @@ -508,7 +508,7 @@ func (loader *segmentLoaderV2) loadSealedSegmentFields(ctx context.Context, segm runningGroup, _ := errgroup.WithContext(ctx) fields.Range(func(fieldID int64, field *schemapb.FieldSchema) bool { runningGroup.Go(func() error { - return segment.LoadFieldData(ctx, fieldID, rowCount, nil) + return segment.LoadFieldData(ctx, fieldID, rowCount, nil, false) }) return true }) @@ -1058,7 +1058,7 @@ func (loader *segmentLoader) loadSealedSegment(ctx context.Context, loadInfo *qu zap.String("index", info.IndexInfo.GetIndexName()), ) // for scalar index's raw data, only load to mmap not memory - if err = segment.LoadFieldData(ctx, fieldID, loadInfo.GetNumOfRows(), info.FieldBinlog); err != nil { + if err = segment.LoadFieldData(ctx, fieldID, loadInfo.GetNumOfRows(), info.FieldBinlog, true); err != nil { log.Warn("load raw data failed", zap.Int64("fieldID", fieldID), zap.Error(err)) return err } @@ -1212,7 +1212,7 @@ func loadSealedSegmentFields(ctx context.Context, collection *Collection, segmen fieldID, rowCount, fieldBinLog, - ) + false) }) } err := runningGroup.Wait() diff --git a/internal/querynodev2/segments/segment_loader_test.go b/internal/querynodev2/segments/segment_loader_test.go index 138fed79b784d..a1930159d45cb 100644 --- a/internal/querynodev2/segments/segment_loader_test.go +++ b/internal/querynodev2/segments/segment_loader_test.go @@ -226,7 +226,8 @@ func (suite *SegmentLoaderSuite) TestLoadMultipleSegments() { // Won't load bloom filter with sealed segments for _, segment := range segments { for pk := 0; pk < 100; pk++ { - exist := segment.MayPkExist(storage.NewInt64PrimaryKey(int64(pk))) + lc := storage.NewLocationsCache(storage.NewInt64PrimaryKey(int64(pk))) + exist := segment.MayPkExist(lc) suite.Require().False(exist) } } @@ -260,7 +261,8 @@ func (suite *SegmentLoaderSuite) TestLoadMultipleSegments() { // Should load bloom filter with growing segments for _, segment := range segments { for pk := 0; pk < 100; pk++ { - exist := segment.MayPkExist(storage.NewInt64PrimaryKey(int64(pk))) + lc := storage.NewLocationsCache(storage.NewInt64PrimaryKey(int64(pk))) + exist := segment.MayPkExist(lc) suite.True(exist) } } @@ -351,7 +353,8 @@ func (suite *SegmentLoaderSuite) TestLoadBloomFilter() { for _, bf := range bfs { for pk := 0; pk < 100; pk++ { - exist := bf.MayPkExist(storage.NewInt64PrimaryKey(int64(pk))) + lc := storage.NewLocationsCache(storage.NewInt64PrimaryKey(int64(pk))) + exist := bf.MayPkExist(lc) suite.Require().True(exist) } } @@ -404,7 +407,8 @@ func (suite *SegmentLoaderSuite) TestLoadDeltaLogs() { if pk == 1 || pk == 2 { continue } - exist := segment.MayPkExist(storage.NewInt64PrimaryKey(int64(pk))) + lc := storage.NewLocationsCache(storage.NewInt64PrimaryKey(int64(pk))) + exist := segment.MayPkExist(lc) suite.Require().True(exist) } } @@ -457,7 +461,8 @@ func (suite *SegmentLoaderSuite) TestLoadDupDeltaLogs() { if pk == 1 || pk == 2 { continue } - exist := segment.MayPkExist(storage.NewInt64PrimaryKey(int64(pk))) + lc := storage.NewLocationsCache(storage.NewInt64PrimaryKey(int64(pk))) + exist := segment.MayPkExist(lc) suite.Require().True(exist) } diff --git a/internal/querynodev2/segments/segment_test.go b/internal/querynodev2/segments/segment_test.go index c05de4c83d100..464df07e7ab74 100644 --- a/internal/querynodev2/segments/segment_test.go +++ b/internal/querynodev2/segments/segment_test.go @@ -100,7 +100,7 @@ func (suite *SegmentSuite) SetupTest() { g, err := suite.sealed.(*LocalSegment).StartLoadData() suite.Require().NoError(err) for _, binlog := range binlogs { - err = suite.sealed.(*LocalSegment).LoadFieldData(ctx, binlog.FieldID, int64(msgLength), binlog) + err = suite.sealed.(*LocalSegment).LoadFieldData(ctx, binlog.FieldID, int64(msgLength), binlog, false) suite.Require().NoError(err) } g.Done(nil) @@ -188,14 +188,6 @@ func (suite *SegmentSuite) TestHasRawData() { suite.True(has) } -func (suite *SegmentSuite) TestLocation() { - pk := storage.NewInt64PrimaryKey(100) - locations := storage.Locations(pk, suite.sealed.GetHashFuncNum()) - ret1 := suite.sealed.TestLocations(pk, locations) - ret2 := suite.sealed.MayPkExist(pk) - suite.Equal(ret1, ret2) -} - func (suite *SegmentSuite) TestCASVersion() { segment := suite.sealed diff --git a/internal/querynodev2/server.go b/internal/querynodev2/server.go index 326da3ef8daff..c9a3d5cf42355 100644 --- a/internal/querynodev2/server.go +++ b/internal/querynodev2/server.go @@ -111,7 +111,8 @@ type QueryNode struct { loader segments.Loader // Search/Query - scheduler tasks.Scheduler + scheduler tasks.Scheduler + streamBatchSzie int // etcd client etcdCli *clientv3.Client @@ -328,6 +329,7 @@ func (node *QueryNode) Init() error { node.scheduler = tasks.NewScheduler( schedulePolicy, ) + node.streamBatchSzie = paramtable.Get().QueryNodeCfg.QueryStreamBatchSize.GetAsInt() log.Info("queryNode init scheduler", zap.String("policy", schedulePolicy)) node.clusterManager = cluster.NewWorkerManager(func(ctx context.Context, nodeID int64) (cluster.Worker, error) { @@ -461,7 +463,7 @@ func (node *QueryNode) Stop() error { case <-time.After(time.Second): metrics.StoppingBalanceSegmentNum.WithLabelValues(fmt.Sprint(node.GetNodeID())).Set(float64(len(sealedSegments))) metrics.StoppingBalanceChannelNum.WithLabelValues(fmt.Sprint(node.GetNodeID())).Set(float64(channelNum)) - log.Info("migrate data...", zap.Int64("ServerID", paramtable.GetNodeID()), + log.Info("migrate data...", zap.Int64("ServerID", node.GetNodeID()), zap.Int64s("sealedSegments", lo.Map(sealedSegments, func(s segments.Segment, i int) int64 { return s.ID() })), diff --git a/internal/querynodev2/services_test.go b/internal/querynodev2/services_test.go index d636c0dd43863..dcaf420b8cbe5 100644 --- a/internal/querynodev2/services_test.go +++ b/internal/querynodev2/services_test.go @@ -306,7 +306,7 @@ func (suite *ServiceSuite) TestWatchDmChannelsInt64() { // mocks suite.factory.EXPECT().NewTtMsgStream(mock.Anything).Return(suite.msgStream, nil) suite.msgStream.EXPECT().AsConsumer(mock.Anything, []string{suite.pchannel}, mock.Anything, mock.Anything).Return(nil) - suite.msgStream.EXPECT().Seek(mock.Anything, mock.Anything).Return(nil) + suite.msgStream.EXPECT().Seek(mock.Anything, mock.Anything, mock.Anything).Return(nil) suite.msgStream.EXPECT().Chan().Return(suite.msgChan) suite.msgStream.EXPECT().Close() @@ -358,7 +358,7 @@ func (suite *ServiceSuite) TestWatchDmChannelsVarchar() { // mocks suite.factory.EXPECT().NewTtMsgStream(mock.Anything).Return(suite.msgStream, nil) suite.msgStream.EXPECT().AsConsumer(mock.Anything, []string{suite.pchannel}, mock.Anything, mock.Anything).Return(nil) - suite.msgStream.EXPECT().Seek(mock.Anything, mock.Anything).Return(nil) + suite.msgStream.EXPECT().Seek(mock.Anything, mock.Anything, mock.Anything).Return(nil) suite.msgStream.EXPECT().Chan().Return(suite.msgChan) suite.msgStream.EXPECT().Close() @@ -432,7 +432,7 @@ func (suite *ServiceSuite) TestWatchDmChannels_Failed() { suite.factory.EXPECT().NewTtMsgStream(mock.Anything).Return(suite.msgStream, nil) suite.msgStream.EXPECT().AsConsumer(mock.Anything, []string{suite.pchannel}, mock.Anything, mock.Anything).Return(nil) suite.msgStream.EXPECT().Close().Return() - suite.msgStream.EXPECT().Seek(mock.Anything, mock.Anything).Return(errors.New("mock error")).Once() + suite.msgStream.EXPECT().Seek(mock.Anything, mock.Anything, mock.Anything).Return(errors.New("mock error")).Once() status, err = suite.node.WatchDmChannels(ctx, req) suite.NoError(err) diff --git a/internal/querynodev2/tasks/query_stream_task.go b/internal/querynodev2/tasks/query_stream_task.go index 5840efa6c1ce7..6c85535bbe0a9 100644 --- a/internal/querynodev2/tasks/query_stream_task.go +++ b/internal/querynodev2/tasks/query_stream_task.go @@ -16,6 +16,7 @@ func NewQueryStreamTask(ctx context.Context, manager *segments.Manager, req *querypb.QueryRequest, srv streamrpc.QueryStreamServer, + streamBatchSize int, ) *QueryStreamTask { return &QueryStreamTask{ ctx: ctx, @@ -23,6 +24,7 @@ func NewQueryStreamTask(ctx context.Context, segmentManager: manager, req: req, srv: srv, + batchSize: streamBatchSize, notifier: make(chan error, 1), } } @@ -33,6 +35,7 @@ type QueryStreamTask struct { segmentManager *segments.Manager req *querypb.QueryRequest srv streamrpc.QueryStreamServer + batchSize int notifier chan error } @@ -64,7 +67,10 @@ func (t *QueryStreamTask) Execute() error { } defer retrievePlan.Delete() - segments, err := segments.RetrieveStream(t.ctx, t.segmentManager, retrievePlan, t.req, t.srv) + srv := streamrpc.NewResultCacheServer(t.srv, t.batchSize) + defer srv.Flush() + + segments, err := segments.RetrieveStream(t.ctx, t.segmentManager, retrievePlan, t.req, srv) defer t.segmentManager.Segment.Unpin(segments) if err != nil { return err diff --git a/internal/querynodev2/tasks/query_task.go b/internal/querynodev2/tasks/query_task.go index 831d782d34b09..d4b0ec5c8061e 100644 --- a/internal/querynodev2/tasks/query_task.go +++ b/internal/querynodev2/tasks/query_task.go @@ -160,6 +160,7 @@ func (t *QueryTask) Execute() error { TotalRelatedDataSize: relatedDataSize, }, AllRetrieveCount: reducedResult.GetAllRetrieveCount(), + HasMoreResult: reducedResult.HasMoreResult, } return nil } diff --git a/internal/rootcoord/broker.go b/internal/rootcoord/broker.go index c1fa30d0ac037..8701a5c6f2068 100644 --- a/internal/rootcoord/broker.go +++ b/internal/rootcoord/broker.go @@ -58,9 +58,7 @@ type Broker interface { GcConfirm(ctx context.Context, collectionID, partitionID UniqueID) bool DropCollectionIndex(ctx context.Context, collID UniqueID, partIDs []UniqueID) error - GetSegmentIndexState(ctx context.Context, collID UniqueID, indexName string, segIDs []UniqueID) ([]*indexpb.SegmentIndexState, error) - DescribeIndex(ctx context.Context, colID UniqueID) (*indexpb.DescribeIndexResponse, error) - + // notify observer to clean their meta cache BroadcastAlteredCollection(ctx context.Context, req *milvuspb.AlterCollectionRequest) error } @@ -270,12 +268,6 @@ func (b *ServerBroker) BroadcastAlteredCollection(ctx context.Context, req *milv return nil } -func (b *ServerBroker) DescribeIndex(ctx context.Context, colID UniqueID) (*indexpb.DescribeIndexResponse, error) { - return b.s.dataCoord.DescribeIndex(ctx, &indexpb.DescribeIndexRequest{ - CollectionID: colID, - }) -} - func (b *ServerBroker) GcConfirm(ctx context.Context, collectionID, partitionID UniqueID) bool { log := log.Ctx(ctx).With(zap.Int64("collection", collectionID), zap.Int64("partition", partitionID)) diff --git a/internal/rootcoord/dml_channels_test.go b/internal/rootcoord/dml_channels_test.go index db61ff1327db9..e27117b0268aa 100644 --- a/internal/rootcoord/dml_channels_test.go +++ b/internal/rootcoord/dml_channels_test.go @@ -293,8 +293,10 @@ func (ms *FailMsgStream) Broadcast(*msgstream.MsgPack) (map[string][]msgstream.M } return nil, nil } -func (ms *FailMsgStream) Consume() *msgstream.MsgPack { return nil } -func (ms *FailMsgStream) Seek(ctx context.Context, offset []*msgstream.MsgPosition) error { return nil } +func (ms *FailMsgStream) Consume() *msgstream.MsgPack { return nil } +func (ms *FailMsgStream) Seek(ctx context.Context, msgPositions []*msgstream.MsgPosition, includeCurrentMsg bool) error { + return nil +} func (ms *FailMsgStream) GetLatestMsgID(channel string) (msgstream.MessageID, error) { return nil, nil diff --git a/internal/rootcoord/mock_test.go b/internal/rootcoord/mock_test.go index f17ff9d1e27eb..fe0940928d6a7 100644 --- a/internal/rootcoord/mock_test.go +++ b/internal/rootcoord/mock_test.go @@ -899,7 +899,6 @@ type mockBroker struct { FlushFunc func(ctx context.Context, cID int64, segIDs []int64) error DropCollectionIndexFunc func(ctx context.Context, collID UniqueID, partIDs []UniqueID) error - DescribeIndexFunc func(ctx context.Context, colID UniqueID) (*indexpb.DescribeIndexResponse, error) GetSegmentIndexStateFunc func(ctx context.Context, collID UniqueID, indexName string, segIDs []UniqueID) ([]*indexpb.SegmentIndexState, error) BroadcastAlteredCollectionFunc func(ctx context.Context, req *milvuspb.AlterCollectionRequest) error @@ -935,10 +934,6 @@ func (b mockBroker) DropCollectionIndex(ctx context.Context, collID UniqueID, pa return b.DropCollectionIndexFunc(ctx, collID, partIDs) } -func (b mockBroker) DescribeIndex(ctx context.Context, colID UniqueID) (*indexpb.DescribeIndexResponse, error) { - return b.DescribeIndexFunc(ctx, colID) -} - func (b mockBroker) GetSegmentIndexState(ctx context.Context, collID UniqueID, indexName string, segIDs []UniqueID) ([]*indexpb.SegmentIndexState, error) { return b.GetSegmentIndexStateFunc(ctx, collID, indexName, segIDs) } diff --git a/internal/rootcoord/quota_center.go b/internal/rootcoord/quota_center.go index 539125a8cba24..c16c5dbca9933 100644 --- a/internal/rootcoord/quota_center.go +++ b/internal/rootcoord/quota_center.go @@ -433,9 +433,13 @@ func (q *QuotaCenter) collectMetrics() error { } } + datacoordQuotaCollections := make([]int64, 0) q.diskMu.Lock() if dataCoordTopology.Cluster.Self.QuotaMetrics != nil { q.dataCoordMetrics = dataCoordTopology.Cluster.Self.QuotaMetrics + for metricCollection := range q.dataCoordMetrics.PartitionsBinlogSize { + datacoordQuotaCollections = append(datacoordQuotaCollections, metricCollection) + } } q.diskMu.Unlock() @@ -447,7 +451,6 @@ func (q *QuotaCenter) collectMetrics() error { } var rangeErr error collections.Range(func(collectionID int64) bool { - var coll *model.Collection coll, getErr := q.meta.GetCollectionByIDWithMaxTs(context.TODO(), collectionID) if getErr != nil { rangeErr = getErr @@ -482,7 +485,23 @@ func (q *QuotaCenter) collectMetrics() error { } return true }) - return rangeErr + if rangeErr != nil { + return rangeErr + } + for _, collectionID := range datacoordQuotaCollections { + _, ok := q.collectionIDToDBID.Get(collectionID) + if ok { + continue + } + coll, getErr := q.meta.GetCollectionByIDWithMaxTs(context.TODO(), collectionID) + if getErr != nil { + return getErr + } + q.collectionIDToDBID.Insert(collectionID, coll.DBID) + q.collections.Insert(FormatCollectionKey(coll.DBID, coll.Name), collectionID) + } + + return nil }) // get Proxies metrics group.Go(func() error { @@ -1468,6 +1487,7 @@ func (q *QuotaCenter) sendRatesToProxy() error { // recordMetrics records metrics of quota states. func (q *QuotaCenter) recordMetrics() { + metrics.RootCoordQuotaStates.Reset() dbIDs := make(map[int64]string, q.dbs.Len()) collectionIDs := make(map[int64]string, q.collections.Len()) q.dbs.Range(func(name string, id int64) bool { diff --git a/internal/rootcoord/root_coord.go b/internal/rootcoord/root_coord.go index cc330fa3c9a8e..4e8fc35ee1700 100644 --- a/internal/rootcoord/root_coord.go +++ b/internal/rootcoord/root_coord.go @@ -545,15 +545,29 @@ func (c *Core) initRbac() error { } } + if Params.ProxyCfg.EnablePublicPrivilege.GetAsBool() { + err = c.initPublicRolePrivilege() + if err != nil { + return err + } + } + + if Params.RoleCfg.Enabled.GetAsBool() { + return c.initBuiltinRoles() + } + return nil +} + +func (c *Core) initPublicRolePrivilege() error { // grant privileges for the public role globalPrivileges := []string{ commonpb.ObjectPrivilege_PrivilegeDescribeCollection.String(), - commonpb.ObjectPrivilege_PrivilegeShowCollections.String(), } collectionPrivileges := []string{ commonpb.ObjectPrivilege_PrivilegeIndexDetail.String(), } + var err error for _, globalPrivilege := range globalPrivileges { err = c.meta.OperatePrivilege(util.DefaultTenant, &milvuspb.GrantEntity{ Role: &milvuspb.RoleEntity{Name: util.RolePublic}, @@ -584,9 +598,6 @@ func (c *Core) initRbac() error { return errors.Wrap(err, "failed to grant collection privilege") } } - if Params.RoleCfg.Enabled.GetAsBool() { - return c.initBuiltinRoles() - } return nil } diff --git a/internal/rootcoord/root_coord_test.go b/internal/rootcoord/root_coord_test.go index 832526d61bef9..bbdb896b52d17 100644 --- a/internal/rootcoord/root_coord_test.go +++ b/internal/rootcoord/root_coord_test.go @@ -1807,6 +1807,48 @@ func TestCore_Stop(t *testing.T) { }) } +func TestCore_InitRBAC(t *testing.T) { + paramtable.Init() + t.Run("init default role and public role privilege", func(t *testing.T) { + meta := mockrootcoord.NewIMetaTable(t) + c := newTestCore(withHealthyCode(), withMeta(meta)) + meta.EXPECT().CreateRole(mock.Anything, mock.Anything).Return(nil).Twice() + meta.EXPECT().OperatePrivilege(mock.Anything, mock.Anything, mock.Anything).Return(nil).Twice() + + Params.Save(Params.RoleCfg.Enabled.Key, "false") + Params.Save(Params.ProxyCfg.EnablePublicPrivilege.Key, "true") + + defer func() { + Params.Reset(Params.RoleCfg.Enabled.Key) + Params.Reset(Params.ProxyCfg.EnablePublicPrivilege.Key) + }() + + err := c.initRbac() + assert.NoError(t, err) + }) + + t.Run("not init public role privilege and init default privilege", func(t *testing.T) { + builtinRoles := `{"db_admin": {"privileges": [{"object_type": "Global", "object_name": "*", "privilege": "CreateCollection", "db_name": "*"}]}}` + meta := mockrootcoord.NewIMetaTable(t) + c := newTestCore(withHealthyCode(), withMeta(meta)) + meta.EXPECT().CreateRole(mock.Anything, mock.Anything).Return(nil).Times(3) + meta.EXPECT().OperatePrivilege(mock.Anything, mock.Anything, mock.Anything).Return(nil).Once() + + Params.Save(Params.RoleCfg.Enabled.Key, "true") + Params.Save(Params.RoleCfg.Roles.Key, builtinRoles) + Params.Save(Params.ProxyCfg.EnablePublicPrivilege.Key, "false") + + defer func() { + Params.Reset(Params.RoleCfg.Enabled.Key) + Params.Reset(Params.RoleCfg.Roles.Key) + Params.Reset(Params.ProxyCfg.EnablePublicPrivilege.Key) + }() + + err := c.initRbac() + assert.NoError(t, err) + }) +} + type RootCoordSuite struct { suite.Suite } diff --git a/internal/rootcoord/show_collection_task.go b/internal/rootcoord/show_collection_task.go index 247a171af321e..090d4ada5b561 100644 --- a/internal/rootcoord/show_collection_task.go +++ b/internal/rootcoord/show_collection_task.go @@ -89,7 +89,7 @@ func (t *showCollectionTask) Execute(ctx context.Context) error { for _, entity := range entities { objectType := entity.GetObject().GetName() if objectType == commonpb.ObjectType_Global.String() && - entity.GetGrantor().GetPrivilege().GetName() == commonpb.ObjectPrivilege_PrivilegeAll.String() { + entity.GetGrantor().GetPrivilege().GetName() == util.PrivilegeNameForAPI(commonpb.ObjectPrivilege_PrivilegeAll.String()) { privilegeColls.Insert(util.AnyWord) return privilegeColls, nil } diff --git a/internal/rootcoord/show_collection_task_test.go b/internal/rootcoord/show_collection_task_test.go index 8d82e4aa20275..52cea062cbda0 100644 --- a/internal/rootcoord/show_collection_task_test.go +++ b/internal/rootcoord/show_collection_task_test.go @@ -298,7 +298,9 @@ func TestShowCollectionsAuth(t *testing.T) { { Object: &milvuspb.ObjectEntity{Name: commonpb.ObjectType_Global.String()}, Grantor: &milvuspb.GrantorEntity{ - Privilege: &milvuspb.PrivilegeEntity{Name: commonpb.ObjectPrivilege_PrivilegeAll.String()}, + Privilege: &milvuspb.PrivilegeEntity{ + Name: util.PrivilegeNameForAPI(commonpb.ObjectPrivilege_PrivilegeAll.String()), + }, }, }, }, nil).Once() diff --git a/internal/storage/field_stats.go b/internal/storage/field_stats.go index a26e8aa9e1f91..87d6e9acf7c77 100644 --- a/internal/storage/field_stats.go +++ b/internal/storage/field_stats.go @@ -20,10 +20,12 @@ import ( "encoding/json" "fmt" - "github.com/bits-and-blooms/bloom/v3" + "go.uber.org/zap" "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" + "github.com/milvus-io/milvus/internal/util/bloomfilter" "github.com/milvus-io/milvus/pkg/common" + "github.com/milvus-io/milvus/pkg/log" "github.com/milvus-io/milvus/pkg/util/merr" "github.com/milvus-io/milvus/pkg/util/paramtable" ) @@ -31,12 +33,13 @@ import ( // FieldStats contains statistics data for any column // todo: compatible to PrimaryKeyStats type FieldStats struct { - FieldID int64 `json:"fieldID"` - Type schemapb.DataType `json:"type"` - Max ScalarFieldValue `json:"max"` // for scalar field - Min ScalarFieldValue `json:"min"` // for scalar field - BF *bloom.BloomFilter `json:"bf"` // for scalar field - Centroids []VectorFieldValue `json:"centroids"` // for vector field + FieldID int64 `json:"fieldID"` + Type schemapb.DataType `json:"type"` + Max ScalarFieldValue `json:"max"` // for scalar field + Min ScalarFieldValue `json:"min"` // for scalar field + BFType bloomfilter.BFType `json:"bfType"` // for scalar field + BF bloomfilter.BloomFilterInterface `json:"bf"` // for scalar field + Centroids []VectorFieldValue `json:"centroids"` // for vector field } // UnmarshalJSON unmarshal bytes to FieldStats @@ -141,12 +144,22 @@ func (stats *FieldStats) UnmarshalJSON(data []byte) error { } } - if bfMessage, ok := messageMap["bf"]; ok && bfMessage != nil { - stats.BF = &bloom.BloomFilter{} - err = stats.BF.UnmarshalJSON(*bfMessage) + bfType := bloomfilter.BasicBF + if bfTypeMessage, ok := messageMap["bfType"]; ok && bfTypeMessage != nil { + err := json.Unmarshal(*bfTypeMessage, &bfType) if err != nil { return err } + stats.BFType = bfType + } + + if bfMessage, ok := messageMap["bf"]; ok && bfMessage != nil { + bf, err := bloomfilter.UnmarshalJSON(*bfMessage, bfType) + if err != nil { + log.Warn("Failed to unmarshal bloom filter, use AlwaysTrueBloomFilter instead of return err", zap.Error(err)) + bf = bloomfilter.AlwaysTrueBloomFilter + } + stats.BF = bf } } else { stats.initCentroids(data, stats.Type) @@ -161,12 +174,12 @@ func (stats *FieldStats) UnmarshalJSON(data []byte) error { func (stats *FieldStats) initCentroids(data []byte, dataType schemapb.DataType) { type FieldStatsAux struct { - FieldID int64 `json:"fieldID"` - Type schemapb.DataType `json:"type"` - Max json.RawMessage `json:"max"` - Min json.RawMessage `json:"min"` - BF *bloom.BloomFilter `json:"bf"` - Centroids []json.RawMessage `json:"centroids"` + FieldID int64 `json:"fieldID"` + Type schemapb.DataType `json:"type"` + Max json.RawMessage `json:"max"` + Min json.RawMessage `json:"min"` + BF bloomfilter.BloomFilterInterface `json:"bf"` + Centroids []json.RawMessage `json:"centroids"` } // Unmarshal JSON into the auxiliary struct var aux FieldStatsAux @@ -361,10 +374,15 @@ func NewFieldStats(fieldID int64, pkType schemapb.DataType, rowNum int64) (*Fiel Type: pkType, }, nil } + bfType := paramtable.Get().CommonCfg.BloomFilterType.GetValue() return &FieldStats{ FieldID: fieldID, Type: pkType, - BF: bloom.NewWithEstimates(uint(rowNum), paramtable.Get().CommonCfg.MaxBloomFalsePositive.GetAsFloat()), + BFType: bloomfilter.BFTypeFromString(bfType), + BF: bloomfilter.NewBloomFilterWithType( + uint(rowNum), + paramtable.Get().CommonCfg.MaxBloomFalsePositive.GetAsFloat(), + bfType), }, nil } @@ -391,11 +409,17 @@ func (sw *FieldStatsWriter) GenerateList(stats []*FieldStats) error { // GenerateByData writes data from @msgs with @fieldID to @buffer func (sw *FieldStatsWriter) GenerateByData(fieldID int64, pkType schemapb.DataType, msgs ...FieldData) error { statsList := make([]*FieldStats, 0) + + bfType := paramtable.Get().CommonCfg.BloomFilterType.GetValue() for _, msg := range msgs { stats := &FieldStats{ FieldID: fieldID, Type: pkType, - BF: bloom.NewWithEstimates(uint(msg.RowNum()), paramtable.Get().CommonCfg.MaxBloomFalsePositive.GetAsFloat()), + BFType: bloomfilter.BFTypeFromString(bfType), + BF: bloomfilter.NewBloomFilterWithType( + uint(msg.RowNum()), + paramtable.Get().CommonCfg.MaxBloomFalsePositive.GetAsFloat(), + bfType), } stats.UpdateByMsgs(msg) diff --git a/internal/storage/field_stats_test.go b/internal/storage/field_stats_test.go index e169902bf9749..f04155ac2d873 100644 --- a/internal/storage/field_stats_test.go +++ b/internal/storage/field_stats_test.go @@ -20,12 +20,13 @@ import ( "encoding/json" "testing" - "github.com/bits-and-blooms/bloom/v3" "github.com/stretchr/testify/assert" "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" + "github.com/milvus-io/milvus/internal/util/bloomfilter" "github.com/milvus-io/milvus/pkg/common" "github.com/milvus-io/milvus/pkg/util/merr" + "github.com/milvus-io/milvus/pkg/util/paramtable" ) func TestFieldStatsUpdate(t *testing.T) { @@ -373,7 +374,7 @@ func TestFieldStatsWriter_UpgradePrimaryKey(t *testing.T) { FieldID: common.RowIDField, Min: 1, Max: 9, - BF: bloom.NewWithEstimates(100000, 0.05), + BF: bloomfilter.NewBloomFilterWithType(100000, 0.05, paramtable.Get().CommonCfg.BloomFilterType.GetValue()), } b := make([]byte, 8) @@ -574,8 +575,9 @@ func TestFieldStatsUnMarshal(t *testing.T) { assert.Error(t, err) err = stats.UnmarshalJSON([]byte("{\"fieldID\":1,\"max\":10, \"maxPk\":10, \"minPk\": \"b\"}")) assert.Error(t, err) + // return AlwaysTrueBloomFilter when deserialize bloom filter failed. err = stats.UnmarshalJSON([]byte("{\"fieldID\":1,\"max\":10, \"maxPk\":10, \"minPk\": 1, \"bf\": \"2\"}")) - assert.Error(t, err) + assert.NoError(t, err) }) t.Run("succeed", func(t *testing.T) { diff --git a/internal/storage/insert_data.go b/internal/storage/insert_data.go index 3722965279a2f..23e10e529066d 100644 --- a/internal/storage/insert_data.go +++ b/internal/storage/insert_data.go @@ -84,7 +84,9 @@ func (i *InsertData) GetRowNum() int { var rowNum int for _, data := range i.Data { rowNum = data.RowNum() - break + if rowNum > 0 { + break + } } return rowNum } diff --git a/internal/storage/pk_statistics.go b/internal/storage/pk_statistics.go index c42a8c87929dd..7d4b21e2ef44c 100644 --- a/internal/storage/pk_statistics.go +++ b/internal/storage/pk_statistics.go @@ -19,18 +19,18 @@ package storage import ( "fmt" - "github.com/bits-and-blooms/bloom/v3" "github.com/cockroachdb/errors" "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" + "github.com/milvus-io/milvus/internal/util/bloomfilter" "github.com/milvus-io/milvus/pkg/common" ) // pkStatistics contains pk field statistic information type PkStatistics struct { - PkFilter *bloom.BloomFilter // bloom filter of pk inside a segment - MinPK PrimaryKey // minimal pk value, shortcut for checking whether a pk is inside this segment - MaxPK PrimaryKey // maximal pk value, same above + PkFilter bloomfilter.BloomFilterInterface // bloom filter of pk inside a segment + MinPK PrimaryKey // minimal pk value, shortcut for checking whether a pk is inside this segment + MaxPK PrimaryKey // maximal pk value, same above } // update set pk min/max value if input value is beyond former range. @@ -109,16 +109,16 @@ func (st *PkStatistics) PkExist(pk PrimaryKey) bool { } // Locations returns a list of hash locations representing a data item. -func Locations(pk PrimaryKey, k uint) []uint64 { +func Locations(pk PrimaryKey, k uint, bfType bloomfilter.BFType) []uint64 { switch pk.Type() { case schemapb.DataType_Int64: buf := make([]byte, 8) int64Pk := pk.(*Int64PrimaryKey) common.Endian.PutUint64(buf, uint64(int64Pk.Value)) - return bloom.Locations(buf, k) + return bloomfilter.Locations(buf, k, bfType) case schemapb.DataType_VarChar: varCharPk := pk.(*VarCharPrimaryKey) - return bloom.Locations([]byte(varCharPk.Value), k) + return bloomfilter.Locations([]byte(varCharPk.Value), k, bfType) default: // TODO:: } @@ -140,14 +140,14 @@ func (st *PkStatistics) TestLocations(pk PrimaryKey, locs []uint64) bool { return st.MinPK.LE(pk) && st.MaxPK.GE(pk) } -func (st *PkStatistics) TestLocationCache(lc LocationsCache) bool { +func (st *PkStatistics) TestLocationCache(lc *LocationsCache) bool { // empty pkStatics if st.MinPK == nil || st.MaxPK == nil || st.PkFilter == nil { return false } // check bf first, TestLocation just do some bitset compute, cost is cheaper - if !st.PkFilter.TestLocations(lc.Locations(st.PkFilter.K())) { + if !st.PkFilter.TestLocations(lc.Locations(st.PkFilter.K(), st.PkFilter.Type())) { return false } @@ -158,23 +158,34 @@ func (st *PkStatistics) TestLocationCache(lc LocationsCache) bool { // LocationsCache is a helper struct caching pk bloom filter locations. // Note that this helper is not concurrent safe and shall be used in same goroutine. type LocationsCache struct { - pk PrimaryKey - locations map[uint][]uint64 + pk PrimaryKey + basicBFLocations []uint64 + blockBFLocations []uint64 } -func (lc LocationsCache) Locations(k uint) []uint64 { - locs, ok := lc.locations[k] - if ok { - return locs +func (lc *LocationsCache) GetPk() PrimaryKey { + return lc.pk +} + +func (lc *LocationsCache) Locations(k uint, bfType bloomfilter.BFType) []uint64 { + switch bfType { + case bloomfilter.BasicBF: + if int(k) > len(lc.basicBFLocations) { + lc.basicBFLocations = Locations(lc.pk, k, bfType) + } + return lc.basicBFLocations[:k] + case bloomfilter.BlockedBF: + if int(k) > len(lc.blockBFLocations) { + lc.blockBFLocations = Locations(lc.pk, k, bfType) + } + return lc.blockBFLocations[:k] + default: + return nil } - locs = Locations(lc.pk, k) - lc.locations[k] = locs - return locs } -func NewLocationsCache(pk PrimaryKey) LocationsCache { - return LocationsCache{ - pk: pk, - locations: make(map[uint][]uint64), +func NewLocationsCache(pk PrimaryKey) *LocationsCache { + return &LocationsCache{ + pk: pk, } } diff --git a/internal/storage/primary_key.go b/internal/storage/primary_key.go index f9322f64dbd93..640ee2226a48c 100644 --- a/internal/storage/primary_key.go +++ b/internal/storage/primary_key.go @@ -158,71 +158,13 @@ func (ip *Int64PrimaryKey) Size() int64 { return 16 } -type BaseStringPrimaryKey struct { - Value string -} - -func (sp *BaseStringPrimaryKey) GT(key BaseStringPrimaryKey) bool { - return strings.Compare(sp.Value, key.Value) > 0 -} - -func (sp *BaseStringPrimaryKey) GE(key BaseStringPrimaryKey) bool { - return strings.Compare(sp.Value, key.Value) >= 0 -} - -func (sp *BaseStringPrimaryKey) LT(key BaseStringPrimaryKey) bool { - return strings.Compare(sp.Value, key.Value) < 0 -} - -func (sp *BaseStringPrimaryKey) LE(key BaseStringPrimaryKey) bool { - return strings.Compare(sp.Value, key.Value) <= 0 -} - -func (sp *BaseStringPrimaryKey) EQ(key BaseStringPrimaryKey) bool { - return strings.Compare(sp.Value, key.Value) == 0 -} - -func (sp *BaseStringPrimaryKey) MarshalJSON() ([]byte, error) { - ret, err := json.Marshal(sp.Value) - if err != nil { - return nil, err - } - - return ret, nil -} - -func (sp *BaseStringPrimaryKey) UnmarshalJSON(data []byte) error { - err := json.Unmarshal(data, &sp.Value) - if err != nil { - return err - } - - return nil -} - -func (sp *BaseStringPrimaryKey) SetValue(data interface{}) error { - value, ok := data.(string) - if !ok { - return fmt.Errorf("wrong type value when setValue for StringPrimaryKey") - } - - sp.Value = value - return nil -} - -func (sp *BaseStringPrimaryKey) GetValue() interface{} { - return sp.Value -} - type VarCharPrimaryKey struct { - BaseStringPrimaryKey + Value string } func NewVarCharPrimaryKey(v string) *VarCharPrimaryKey { return &VarCharPrimaryKey{ - BaseStringPrimaryKey: BaseStringPrimaryKey{ - Value: v, - }, + Value: v, } } @@ -233,7 +175,7 @@ func (vcp *VarCharPrimaryKey) GT(key PrimaryKey) bool { return false } - return vcp.BaseStringPrimaryKey.GT(pk.BaseStringPrimaryKey) + return strings.Compare(vcp.Value, pk.Value) > 0 } func (vcp *VarCharPrimaryKey) GE(key PrimaryKey) bool { @@ -243,7 +185,7 @@ func (vcp *VarCharPrimaryKey) GE(key PrimaryKey) bool { return false } - return vcp.BaseStringPrimaryKey.GE(pk.BaseStringPrimaryKey) + return strings.Compare(vcp.Value, pk.Value) >= 0 } func (vcp *VarCharPrimaryKey) LT(key PrimaryKey) bool { @@ -253,7 +195,7 @@ func (vcp *VarCharPrimaryKey) LT(key PrimaryKey) bool { return false } - return vcp.BaseStringPrimaryKey.LT(pk.BaseStringPrimaryKey) + return strings.Compare(vcp.Value, pk.Value) < 0 } func (vcp *VarCharPrimaryKey) LE(key PrimaryKey) bool { @@ -263,7 +205,7 @@ func (vcp *VarCharPrimaryKey) LE(key PrimaryKey) bool { return false } - return vcp.BaseStringPrimaryKey.LE(pk.BaseStringPrimaryKey) + return strings.Compare(vcp.Value, pk.Value) <= 0 } func (vcp *VarCharPrimaryKey) EQ(key PrimaryKey) bool { @@ -273,7 +215,39 @@ func (vcp *VarCharPrimaryKey) EQ(key PrimaryKey) bool { return false } - return vcp.BaseStringPrimaryKey.EQ(pk.BaseStringPrimaryKey) + return strings.Compare(vcp.Value, pk.Value) == 0 +} + +func (vcp *VarCharPrimaryKey) MarshalJSON() ([]byte, error) { + ret, err := json.Marshal(vcp.Value) + if err != nil { + return nil, err + } + + return ret, nil +} + +func (vcp *VarCharPrimaryKey) UnmarshalJSON(data []byte) error { + err := json.Unmarshal(data, &vcp.Value) + if err != nil { + return err + } + + return nil +} + +func (vcp *VarCharPrimaryKey) SetValue(data interface{}) error { + value, ok := data.(string) + if !ok { + return fmt.Errorf("wrong type value when setValue for VarCharPrimaryKey") + } + + vcp.Value = value + return nil +} + +func (vcp *VarCharPrimaryKey) GetValue() interface{} { + return vcp.Value } func (vcp *VarCharPrimaryKey) Type() schemapb.DataType { @@ -293,9 +267,7 @@ func GenPrimaryKeyByRawData(data interface{}, pkType schemapb.DataType) (Primary } case schemapb.DataType_VarChar: result = &VarCharPrimaryKey{ - BaseStringPrimaryKey: BaseStringPrimaryKey{ - Value: data.(string), - }, + Value: data.(string), } default: return nil, fmt.Errorf("not supported primary data type") diff --git a/internal/storage/serde.go b/internal/storage/serde.go index 636e505b83c76..6ec7b38c92c9f 100644 --- a/internal/storage/serde.go +++ b/internal/storage/serde.go @@ -199,10 +199,16 @@ func (crr *compositeRecordReader) Close() { } type serdeEntry struct { - arrowType func(int) arrow.DataType + // arrowType returns the arrow type for the given dimension + arrowType func(int) arrow.DataType + // deserialize deserializes the i-th element in the array, returns the value and ok. + // null is deserialized to nil without checking the type nullability. deserialize func(arrow.Array, int) (any, bool) - serialize func(array.Builder, any) bool - sizeof func(any) uint64 + // serialize serializes the value to the builder, returns ok. + // nil is serialized to null without checking the type nullability. + serialize func(array.Builder, any) bool + // sizeof returns the size in bytes of the value + sizeof func(any) uint64 } var serdeMap = func() map[schemapb.DataType]serdeEntry { @@ -212,12 +218,19 @@ var serdeMap = func() map[schemapb.DataType]serdeEntry { return arrow.FixedWidthTypes.Boolean }, func(a arrow.Array, i int) (any, bool) { + if a.IsNull(i) { + return nil, true + } if arr, ok := a.(*array.Boolean); ok && i < arr.Len() { return arr.Value(i), true } return nil, false }, func(b array.Builder, v any) bool { + if v == nil { + b.AppendNull() + return true + } if builder, ok := b.(*array.BooleanBuilder); ok { if v, ok := v.(bool); ok { builder.Append(v) @@ -235,12 +248,19 @@ var serdeMap = func() map[schemapb.DataType]serdeEntry { return arrow.PrimitiveTypes.Int8 }, func(a arrow.Array, i int) (any, bool) { + if a.IsNull(i) { + return nil, true + } if arr, ok := a.(*array.Int8); ok && i < arr.Len() { return arr.Value(i), true } return nil, false }, func(b array.Builder, v any) bool { + if v == nil { + b.AppendNull() + return true + } if builder, ok := b.(*array.Int8Builder); ok { if v, ok := v.(int8); ok { builder.Append(v) @@ -258,12 +278,19 @@ var serdeMap = func() map[schemapb.DataType]serdeEntry { return arrow.PrimitiveTypes.Int16 }, func(a arrow.Array, i int) (any, bool) { + if a.IsNull(i) { + return nil, true + } if arr, ok := a.(*array.Int16); ok && i < arr.Len() { return arr.Value(i), true } return nil, false }, func(b array.Builder, v any) bool { + if v == nil { + b.AppendNull() + return true + } if builder, ok := b.(*array.Int16Builder); ok { if v, ok := v.(int16); ok { builder.Append(v) @@ -281,12 +308,19 @@ var serdeMap = func() map[schemapb.DataType]serdeEntry { return arrow.PrimitiveTypes.Int32 }, func(a arrow.Array, i int) (any, bool) { + if a.IsNull(i) { + return nil, true + } if arr, ok := a.(*array.Int32); ok && i < arr.Len() { return arr.Value(i), true } return nil, false }, func(b array.Builder, v any) bool { + if v == nil { + b.AppendNull() + return true + } if builder, ok := b.(*array.Int32Builder); ok { if v, ok := v.(int32); ok { builder.Append(v) @@ -304,12 +338,19 @@ var serdeMap = func() map[schemapb.DataType]serdeEntry { return arrow.PrimitiveTypes.Int64 }, func(a arrow.Array, i int) (any, bool) { + if a.IsNull(i) { + return nil, true + } if arr, ok := a.(*array.Int64); ok && i < arr.Len() { return arr.Value(i), true } return nil, false }, func(b array.Builder, v any) bool { + if v == nil { + b.AppendNull() + return true + } if builder, ok := b.(*array.Int64Builder); ok { if v, ok := v.(int64); ok { builder.Append(v) @@ -327,12 +368,19 @@ var serdeMap = func() map[schemapb.DataType]serdeEntry { return arrow.PrimitiveTypes.Float32 }, func(a arrow.Array, i int) (any, bool) { + if a.IsNull(i) { + return nil, true + } if arr, ok := a.(*array.Float32); ok && i < arr.Len() { return arr.Value(i), true } return nil, false }, func(b array.Builder, v any) bool { + if v == nil { + b.AppendNull() + return true + } if builder, ok := b.(*array.Float32Builder); ok { if v, ok := v.(float32); ok { builder.Append(v) @@ -350,12 +398,19 @@ var serdeMap = func() map[schemapb.DataType]serdeEntry { return arrow.PrimitiveTypes.Float64 }, func(a arrow.Array, i int) (any, bool) { + if a.IsNull(i) { + return nil, true + } if arr, ok := a.(*array.Float64); ok && i < arr.Len() { return arr.Value(i), true } return nil, false }, func(b array.Builder, v any) bool { + if v == nil { + b.AppendNull() + return true + } if builder, ok := b.(*array.Float64Builder); ok { if v, ok := v.(float64); ok { builder.Append(v) @@ -373,12 +428,19 @@ var serdeMap = func() map[schemapb.DataType]serdeEntry { return arrow.BinaryTypes.String }, func(a arrow.Array, i int) (any, bool) { + if a.IsNull(i) { + return nil, true + } if arr, ok := a.(*array.String); ok && i < arr.Len() { return arr.Value(i), true } return nil, false }, func(b array.Builder, v any) bool { + if v == nil { + b.AppendNull() + return true + } if builder, ok := b.(*array.StringBuilder); ok { if v, ok := v.(string); ok { builder.Append(v) @@ -388,6 +450,9 @@ var serdeMap = func() map[schemapb.DataType]serdeEntry { return false }, func(v any) uint64 { + if v == nil { + return 8 + } return uint64(len(v.(string))) }, } @@ -399,6 +464,9 @@ var serdeMap = func() map[schemapb.DataType]serdeEntry { return arrow.BinaryTypes.Binary }, func(a arrow.Array, i int) (any, bool) { + if a.IsNull(i) { + return nil, true + } if arr, ok := a.(*array.Binary); ok && i < arr.Len() { v := &schemapb.ScalarField{} if err := proto.Unmarshal(arr.Value(i), v); err == nil { @@ -408,6 +476,10 @@ var serdeMap = func() map[schemapb.DataType]serdeEntry { return nil, false }, func(b array.Builder, v any) bool { + if v == nil { + b.AppendNull() + return true + } if builder, ok := b.(*array.BinaryBuilder); ok { if vv, ok := v.(*schemapb.ScalarField); ok { if bytes, err := proto.Marshal(vv); err == nil { @@ -419,11 +491,17 @@ var serdeMap = func() map[schemapb.DataType]serdeEntry { return false }, func(v any) uint64 { + if v == nil { + return 8 + } return uint64(v.(*schemapb.ScalarField).XXX_Size()) }, } sizeOfBytes := func(v any) uint64 { + if v == nil { + return 8 + } return uint64(len(v.([]byte))) } @@ -432,12 +510,19 @@ var serdeMap = func() map[schemapb.DataType]serdeEntry { return arrow.BinaryTypes.Binary }, func(a arrow.Array, i int) (any, bool) { + if a.IsNull(i) { + return nil, true + } if arr, ok := a.(*array.Binary); ok && i < arr.Len() { return arr.Value(i), true } return nil, false }, func(b array.Builder, v any) bool { + if v == nil { + b.AppendNull() + return true + } if builder, ok := b.(*array.BinaryBuilder); ok { if v, ok := v.([]byte); ok { builder.Append(v) @@ -452,12 +537,19 @@ var serdeMap = func() map[schemapb.DataType]serdeEntry { m[schemapb.DataType_JSON] = byteEntry fixedSizeDeserializer := func(a arrow.Array, i int) (any, bool) { + if a.IsNull(i) { + return nil, true + } if arr, ok := a.(*array.FixedSizeBinary); ok && i < arr.Len() { return arr.Value(i), true } return nil, false } fixedSizeSerializer := func(b array.Builder, v any) bool { + if v == nil { + b.AppendNull() + return true + } if builder, ok := b.(*array.FixedSizeBinaryBuilder); ok { if v, ok := v.([]byte); ok { builder.Append(v) @@ -496,12 +588,19 @@ var serdeMap = func() map[schemapb.DataType]serdeEntry { return &arrow.FixedSizeBinaryType{ByteWidth: i * 4} }, func(a arrow.Array, i int) (any, bool) { + if a.IsNull(i) { + return nil, true + } if arr, ok := a.(*array.FixedSizeBinary); ok && i < arr.Len() { return arrow.Float32Traits.CastFromBytes(arr.Value(i)), true } return nil, false }, func(b array.Builder, v any) bool { + if v == nil { + b.AppendNull() + return true + } if builder, ok := b.(*array.FixedSizeBinaryBuilder); ok { if vv, ok := v.([]float32); ok { dim := len(vv) @@ -518,6 +617,9 @@ var serdeMap = func() map[schemapb.DataType]serdeEntry { return false }, func(v any) uint64 { + if v == nil { + return 8 + } return uint64(len(v.([]float32)) * 4) }, } @@ -588,9 +690,8 @@ func (deser *DeserializeReader[T]) Next() error { deser.pos = 0 deser.rec = deser.rr.Record() - if deser.values == nil { - deser.values = make([]T, deser.rec.Len()) - } + // allocate new slice preventing overwrite previous batch + deser.values = make([]T, deser.rec.Len()) if err := deser.deserializer(deser.rec, deser.values); err != nil { return err } @@ -640,11 +741,15 @@ func NewBinlogDeserializeReader(blobs []*Blob, PKfieldID UniqueID) (*Deserialize m := value.Value.(map[FieldID]interface{}) for j, dt := range r.Schema() { - d, ok := serdeMap[dt].deserialize(r.Column(j), i) - if ok { - m[j] = d // TODO: avoid memory copy here. + if r.Column(j).IsNull(i) { + m[j] = nil } else { - return errors.New(fmt.Sprintf("unexpected type %s", dt)) + d, ok := serdeMap[dt].deserialize(r.Column(j), i) + if ok { + m[j] = d // TODO: avoid memory copy here. + } else { + return errors.New(fmt.Sprintf("unexpected type %s", dt)) + } } } @@ -901,8 +1006,9 @@ func (bsw *BinlogStreamWriter) GetRecordWriter() (RecordWriter, error) { fid := bsw.fieldSchema.FieldID dim, _ := typeutil.GetDim(bsw.fieldSchema) rw, err := newSingleFieldRecordWriter(fid, arrow.Field{ - Name: strconv.Itoa(int(fid)), - Type: serdeMap[bsw.fieldSchema.DataType].arrowType(int(dim)), + Name: strconv.Itoa(int(fid)), + Type: serdeMap[bsw.fieldSchema.DataType].arrowType(int(dim)), + Nullable: true, // No nullable check here. }, &bsw.buf) if err != nil { return nil, err @@ -925,9 +1031,10 @@ func (bsw *BinlogStreamWriter) Finalize() (*Blob, error) { return nil, err } return &Blob{ - Key: strconv.Itoa(int(bsw.fieldSchema.FieldID)), - Value: b.Bytes(), - RowNum: int64(bsw.rw.numRows), + Key: strconv.Itoa(int(bsw.fieldSchema.FieldID)), + Value: b.Bytes(), + RowNum: int64(bsw.rw.numRows), + MemorySize: int64(bsw.memorySize), }, nil } @@ -1016,6 +1123,7 @@ func NewBinlogSerializeWriter(schema *schemapb.CollectionSchema, partitionID, se if !ok { return nil, 0, errors.New(fmt.Sprintf("serialize error on type %s", types[fid])) } + writers[fid].memorySize += int(typeEntry.sizeof(e)) memorySize += typeEntry.sizeof(e) } } @@ -1027,8 +1135,9 @@ func NewBinlogSerializeWriter(schema *schemapb.CollectionSchema, partitionID, se arrays[i] = builder.NewArray() builder.Release() fields[i] = arrow.Field{ - Name: strconv.Itoa(int(fid)), - Type: arrays[i].DataType(), + Name: strconv.Itoa(int(fid)), + Type: arrays[i].DataType(), + Nullable: true, // No nullable check here. } field2Col[fid] = i i++ diff --git a/internal/storage/serde_test.go b/internal/storage/serde_test.go index 21a871cb5e606..8cdf15b847532 100644 --- a/internal/storage/serde_test.go +++ b/internal/storage/serde_test.go @@ -160,6 +160,7 @@ func TestBinlogSerializeWriter(t *testing.T) { blob, err := w.Finalize() assert.NoError(t, err) assert.NotNil(t, blob) + assert.True(t, blob.MemorySize > 0) newblobs[i] = blob i++ } @@ -177,6 +178,68 @@ func TestBinlogSerializeWriter(t *testing.T) { }) } +func TestNull(t *testing.T) { + t.Run("test null", func(t *testing.T) { + schema := generateTestSchema() + // Copy write the generated data + writers := NewBinlogStreamWriters(0, 0, 0, schema.Fields) + writer, err := NewBinlogSerializeWriter(schema, 0, 0, writers, 1024) + assert.NoError(t, err) + + m := make(map[FieldID]any) + m[common.RowIDField] = int64(0) + m[common.TimeStampField] = int64(0) + m[10] = nil + m[11] = nil + m[12] = nil + m[13] = nil + m[14] = nil + m[15] = nil + m[16] = nil + m[17] = nil + m[18] = nil + m[19] = nil + m[101] = nil + m[102] = nil + m[103] = nil + m[104] = nil + m[105] = nil + m[106] = nil + pk, err := GenPrimaryKeyByRawData(m[common.RowIDField], schemapb.DataType_Int64) + assert.NoError(t, err) + + value := &Value{ + ID: 0, + PK: pk, + Timestamp: 0, + IsDeleted: false, + Value: m, + } + writer.Write(value) + err = writer.Close() + assert.NoError(t, err) + + // Read from the written data + blobs := make([]*Blob, len(writers)) + i := 0 + for _, w := range writers { + blob, err := w.Finalize() + assert.NoError(t, err) + assert.NotNil(t, blob) + blobs[i] = blob + i++ + } + reader, err := NewBinlogDeserializeReader(blobs, common.RowIDField) + assert.NoError(t, err) + defer reader.Close() + err = reader.Next() + assert.NoError(t, err) + + readValue := reader.Value() + assert.Equal(t, value, readValue) + }) +} + func TestSerDe(t *testing.T) { type args struct { dt schemapb.DataType @@ -189,33 +252,45 @@ func TestSerDe(t *testing.T) { want1 bool }{ {"test bool", args{dt: schemapb.DataType_Bool, v: true}, true, true}, - {"test bool negative", args{dt: schemapb.DataType_Bool, v: nil}, nil, false}, + {"test bool null", args{dt: schemapb.DataType_Bool, v: nil}, nil, true}, + {"test bool negative", args{dt: schemapb.DataType_Bool, v: -1}, nil, false}, {"test int8", args{dt: schemapb.DataType_Int8, v: int8(1)}, int8(1), true}, - {"test int8 negative", args{dt: schemapb.DataType_Int8, v: nil}, nil, false}, + {"test int8 null", args{dt: schemapb.DataType_Int8, v: nil}, nil, true}, + {"test int8 negative", args{dt: schemapb.DataType_Int8, v: true}, nil, false}, {"test int16", args{dt: schemapb.DataType_Int16, v: int16(1)}, int16(1), true}, - {"test int16 negative", args{dt: schemapb.DataType_Int16, v: nil}, nil, false}, + {"test int16 null", args{dt: schemapb.DataType_Int16, v: nil}, nil, true}, + {"test int16 negative", args{dt: schemapb.DataType_Int16, v: true}, nil, false}, {"test int32", args{dt: schemapb.DataType_Int32, v: int32(1)}, int32(1), true}, - {"test int32 negative", args{dt: schemapb.DataType_Int32, v: nil}, nil, false}, + {"test int32 null", args{dt: schemapb.DataType_Int32, v: nil}, nil, true}, + {"test int32 negative", args{dt: schemapb.DataType_Int32, v: true}, nil, false}, {"test int64", args{dt: schemapb.DataType_Int64, v: int64(1)}, int64(1), true}, - {"test int64 negative", args{dt: schemapb.DataType_Int64, v: nil}, nil, false}, + {"test int64 null", args{dt: schemapb.DataType_Int64, v: nil}, nil, true}, + {"test int64 negative", args{dt: schemapb.DataType_Int64, v: true}, nil, false}, {"test float32", args{dt: schemapb.DataType_Float, v: float32(1)}, float32(1), true}, - {"test float32 negative", args{dt: schemapb.DataType_Float, v: nil}, nil, false}, + {"test float32 null", args{dt: schemapb.DataType_Float, v: nil}, nil, true}, + {"test float32 negative", args{dt: schemapb.DataType_Float, v: -1}, nil, false}, {"test float64", args{dt: schemapb.DataType_Double, v: float64(1)}, float64(1), true}, - {"test float64 negative", args{dt: schemapb.DataType_Double, v: nil}, nil, false}, + {"test float64 null", args{dt: schemapb.DataType_Double, v: nil}, nil, true}, + {"test float64 negative", args{dt: schemapb.DataType_Double, v: -1}, nil, false}, {"test string", args{dt: schemapb.DataType_String, v: "test"}, "test", true}, - {"test string negative", args{dt: schemapb.DataType_String, v: nil}, nil, false}, + {"test string null", args{dt: schemapb.DataType_String, v: nil}, nil, true}, + {"test string negative", args{dt: schemapb.DataType_String, v: -1}, nil, false}, {"test varchar", args{dt: schemapb.DataType_VarChar, v: "test"}, "test", true}, - {"test varchar negative", args{dt: schemapb.DataType_VarChar, v: nil}, nil, false}, + {"test varchar null", args{dt: schemapb.DataType_VarChar, v: nil}, nil, true}, + {"test varchar negative", args{dt: schemapb.DataType_VarChar, v: -1}, nil, false}, {"test array negative", args{dt: schemapb.DataType_Array, v: "{}"}, nil, false}, - {"test array negative null", args{dt: schemapb.DataType_Array, v: nil}, nil, false}, + {"test array null", args{dt: schemapb.DataType_Array, v: nil}, nil, true}, {"test json", args{dt: schemapb.DataType_JSON, v: []byte("{}")}, []byte("{}"), true}, - {"test json negative", args{dt: schemapb.DataType_JSON, v: nil}, nil, false}, + {"test json null", args{dt: schemapb.DataType_JSON, v: nil}, nil, true}, + {"test json negative", args{dt: schemapb.DataType_JSON, v: -1}, nil, false}, {"test float vector", args{dt: schemapb.DataType_FloatVector, v: []float32{1.0}}, []float32{1.0}, true}, - {"test float vector negative", args{dt: schemapb.DataType_FloatVector, v: nil}, nil, false}, + {"test float vector null", args{dt: schemapb.DataType_FloatVector, v: nil}, nil, true}, + {"test float vector negative", args{dt: schemapb.DataType_FloatVector, v: []int{1}}, nil, false}, {"test bool vector", args{dt: schemapb.DataType_BinaryVector, v: []byte{0xff}}, []byte{0xff}, true}, {"test float16 vector", args{dt: schemapb.DataType_Float16Vector, v: []byte{0xff, 0xff}}, []byte{0xff, 0xff}, true}, {"test bfloat16 vector", args{dt: schemapb.DataType_BFloat16Vector, v: []byte{0xff, 0xff}}, []byte{0xff, 0xff}, true}, - {"test bfloat16 vector negative", args{dt: schemapb.DataType_BFloat16Vector, v: nil}, nil, false}, + {"test bfloat16 vector null", args{dt: schemapb.DataType_BFloat16Vector, v: nil}, nil, true}, + {"test bfloat16 vector negative", args{dt: schemapb.DataType_BFloat16Vector, v: -1}, nil, false}, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { diff --git a/internal/storage/stats.go b/internal/storage/stats.go index 7914e04b80ef5..75da19ab5ecd6 100644 --- a/internal/storage/stats.go +++ b/internal/storage/stats.go @@ -20,9 +20,10 @@ import ( "encoding/json" "fmt" - "github.com/bits-and-blooms/bloom/v3" + "go.uber.org/zap" "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" + "github.com/milvus-io/milvus/internal/util/bloomfilter" "github.com/milvus-io/milvus/pkg/common" "github.com/milvus-io/milvus/pkg/log" "github.com/milvus-io/milvus/pkg/util/merr" @@ -31,13 +32,14 @@ import ( // PrimaryKeyStats contains statistics data for pk column type PrimaryKeyStats struct { - FieldID int64 `json:"fieldID"` - Max int64 `json:"max"` // useless, will delete - Min int64 `json:"min"` // useless, will delete - BF *bloom.BloomFilter `json:"bf"` - PkType int64 `json:"pkType"` - MaxPk PrimaryKey `json:"maxPk"` - MinPk PrimaryKey `json:"minPk"` + FieldID int64 `json:"fieldID"` + Max int64 `json:"max"` // useless, will delete + Min int64 `json:"min"` // useless, will delete + BFType bloomfilter.BFType `json:"bfType"` + BF bloomfilter.BloomFilterInterface `json:"bf"` + PkType int64 `json:"pkType"` + MaxPk PrimaryKey `json:"maxPk"` + MinPk PrimaryKey `json:"minPk"` } // UnmarshalJSON unmarshal bytes to PrimaryKeyStats @@ -110,12 +112,22 @@ func (stats *PrimaryKeyStats) UnmarshalJSON(data []byte) error { } } - if bfMessage, ok := messageMap["bf"]; ok && bfMessage != nil { - stats.BF = &bloom.BloomFilter{} - err = stats.BF.UnmarshalJSON(*bfMessage) + bfType := bloomfilter.BasicBF + if bfTypeMessage, ok := messageMap["bfType"]; ok && bfTypeMessage != nil { + err := json.Unmarshal(*bfTypeMessage, &bfType) if err != nil { return err } + stats.BFType = bfType + } + + if bfMessage, ok := messageMap["bf"]; ok && bfMessage != nil { + bf, err := bloomfilter.UnmarshalJSON(*bfMessage, bfType) + if err != nil { + log.Warn("Failed to unmarshal bloom filter, use AlwaysTrueBloomFilter instead of return err", zap.Error(err)) + bf = bloomfilter.AlwaysTrueBloomFilter + } + stats.BF = bf } return nil @@ -189,10 +201,16 @@ func NewPrimaryKeyStats(fieldID, pkType, rowNum int64) (*PrimaryKeyStats, error) if rowNum <= 0 { return nil, merr.WrapErrParameterInvalidMsg("zero or negative row num", rowNum) } + + bfType := paramtable.Get().CommonCfg.BloomFilterType.GetValue() return &PrimaryKeyStats{ FieldID: fieldID, PkType: pkType, - BF: bloom.NewWithEstimates(uint(rowNum), paramtable.Get().CommonCfg.MaxBloomFalsePositive.GetAsFloat()), + BFType: bloomfilter.BFTypeFromString(bfType), + BF: bloomfilter.NewBloomFilterWithType( + uint(rowNum), + paramtable.Get().CommonCfg.MaxBloomFalsePositive.GetAsFloat(), + bfType), }, nil } @@ -228,10 +246,15 @@ func (sw *StatsWriter) Generate(stats *PrimaryKeyStats) error { // GenerateByData writes Int64Stats or StringStats from @msgs with @fieldID to @buffer func (sw *StatsWriter) GenerateByData(fieldID int64, pkType schemapb.DataType, msgs FieldData) error { + bfType := paramtable.Get().CommonCfg.BloomFilterType.GetValue() stats := &PrimaryKeyStats{ FieldID: fieldID, PkType: int64(pkType), - BF: bloom.NewWithEstimates(uint(msgs.RowNum()), paramtable.Get().CommonCfg.MaxBloomFalsePositive.GetAsFloat()), + BFType: bloomfilter.BFTypeFromString(bfType), + BF: bloomfilter.NewBloomFilterWithType( + uint(msgs.RowNum()), + paramtable.Get().CommonCfg.MaxBloomFalsePositive.GetAsFloat(), + bfType), } stats.UpdateByMsgs(msgs) diff --git a/internal/storage/stats_test.go b/internal/storage/stats_test.go index 709f49697f28c..cccd3d9f9e65b 100644 --- a/internal/storage/stats_test.go +++ b/internal/storage/stats_test.go @@ -20,12 +20,13 @@ import ( "encoding/json" "testing" - "github.com/bits-and-blooms/bloom/v3" "github.com/stretchr/testify/assert" "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" + "github.com/milvus-io/milvus/internal/util/bloomfilter" "github.com/milvus-io/milvus/pkg/common" "github.com/milvus-io/milvus/pkg/util/merr" + "github.com/milvus-io/milvus/pkg/util/paramtable" ) func TestStatsWriter_Int64PrimaryKey(t *testing.T) { @@ -124,11 +125,13 @@ func TestStatsWriter_UpgradePrimaryKey(t *testing.T) { Data: []int64{1, 2, 3, 4, 5, 6, 7, 8, 9}, } + bfType := paramtable.Get().CommonCfg.BloomFilterType.GetValue() stats := &PrimaryKeyStats{ FieldID: common.RowIDField, Min: 1, Max: 9, - BF: bloom.NewWithEstimates(100000, 0.05), + BFType: bloomfilter.BFTypeFromString(bfType), + BF: bloomfilter.NewBloomFilterWithType(100000, 0.05, bfType), } b := make([]byte, 8) @@ -174,3 +177,30 @@ func TestDeserializeEmptyStats(t *testing.T) { _, err := DeserializeStats([]*Blob{blob}) assert.NoError(t, err) } + +func TestMarshalStats(t *testing.T) { + stat, err := NewPrimaryKeyStats(1, int64(schemapb.DataType_Int64), 100000) + assert.NoError(t, err) + + for i := 0; i < 10000; i++ { + stat.Update(NewInt64PrimaryKey(int64(i))) + } + + sw := &StatsWriter{} + sw.GenerateList([]*PrimaryKeyStats{stat}) + bytes := sw.GetBuffer() + + sr := &StatsReader{} + sr.SetBuffer(bytes) + stat1, err := sr.GetPrimaryKeyStatsList() + assert.NoError(t, err) + assert.Equal(t, 1, len(stat1)) + assert.Equal(t, stat.Min, stat1[0].Min) + assert.Equal(t, stat.Max, stat1[0].Max) + + for i := 0; i < 10000; i++ { + b := make([]byte, 8) + common.Endian.PutUint64(b, uint64(i)) + assert.True(t, stat1[0].BF.Test(b)) + } +} diff --git a/internal/types/types.go b/internal/types/types.go index 27acc7cac3d1b..93c85dc9e79ef 100644 --- a/internal/types/types.go +++ b/internal/types/types.go @@ -38,6 +38,7 @@ import ( // Otherwise, the request will pass. Limit also returns limit of limiter. type Limiter interface { Check(dbID int64, collectionIDToPartIDs map[int64][]int64, rt internalpb.RateType, n int) error + Alloc(ctx context.Context, dbID int64, collectionIDToPartIDs map[int64][]int64, rt internalpb.RateType, n int) error } // Component is the interface all services implement diff --git a/internal/util/bloomfilter/bloom_filter.go b/internal/util/bloomfilter/bloom_filter.go new file mode 100644 index 0000000000000..778597844e631 --- /dev/null +++ b/internal/util/bloomfilter/bloom_filter.go @@ -0,0 +1,297 @@ +// Licensed to the LF AI & Data foundation under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +package bloomfilter + +import ( + "encoding/json" + + "github.com/bits-and-blooms/bloom/v3" + "github.com/cockroachdb/errors" + "github.com/greatroar/blobloom" + "github.com/pingcap/log" + "github.com/zeebo/xxh3" + "go.uber.org/zap" +) + +type BFType int + +var AlwaysTrueBloomFilter = &alwaysTrueBloomFilter{} + +const ( + UnsupportedBFName = "Unsupported BloomFilter" + BlockBFName = "BlockedBloomFilter" + BasicBFName = "BasicBloomFilter" + AlwaysTrueBFName = "AlwaysTrueBloomFilter" +) + +const ( + UnsupportedBF BFType = iota + 1 + AlwaysTrueBF // empty bloom filter + BasicBF + BlockedBF +) + +var bfNames = map[BFType]string{ + BasicBF: BlockBFName, + BlockedBF: BasicBFName, + AlwaysTrueBF: AlwaysTrueBFName, + UnsupportedBF: UnsupportedBFName, +} + +func (t BFType) String() string { + return bfNames[t] +} + +func BFTypeFromString(name string) BFType { + switch name { + case BasicBFName: + return BasicBF + case BlockBFName: + return BlockedBF + case AlwaysTrueBFName: + return AlwaysTrueBF + default: + return UnsupportedBF + } +} + +type BloomFilterInterface interface { + Type() BFType + Cap() uint + K() uint + Add(data []byte) + AddString(data string) + Test(data []byte) bool + TestString(data string) bool + TestLocations(locs []uint64) bool + MarshalJSON() ([]byte, error) + UnmarshalJSON(data []byte) error +} + +type basicBloomFilter struct { + inner *bloom.BloomFilter + k uint +} + +func newBasicBloomFilter(capacity uint, fp float64) *basicBloomFilter { + inner := bloom.NewWithEstimates(capacity, fp) + return &basicBloomFilter{ + inner: inner, + k: inner.K(), + } +} + +func (b *basicBloomFilter) Type() BFType { + return BasicBF +} + +func (b *basicBloomFilter) Cap() uint { + return b.inner.Cap() +} + +func (b *basicBloomFilter) K() uint { + return b.k +} + +func (b *basicBloomFilter) Add(data []byte) { + b.inner.Add(data) +} + +func (b *basicBloomFilter) AddString(data string) { + b.inner.AddString(data) +} + +func (b *basicBloomFilter) Test(data []byte) bool { + return b.inner.Test(data) +} + +func (b *basicBloomFilter) TestString(data string) bool { + return b.inner.TestString(data) +} + +func (b *basicBloomFilter) TestLocations(locs []uint64) bool { + return b.inner.TestLocations(locs[:b.k]) +} + +func (b basicBloomFilter) MarshalJSON() ([]byte, error) { + return b.inner.MarshalJSON() +} + +func (b *basicBloomFilter) UnmarshalJSON(data []byte) error { + inner := &bloom.BloomFilter{} + inner.UnmarshalJSON(data) + b.inner = inner + b.k = inner.K() + return nil +} + +// impl Blocked Bloom filter with blobloom and xxh3 hash +type blockedBloomFilter struct { + inner *blobloom.Filter + k uint +} + +func newBlockedBloomFilter(capacity uint, fp float64) *blockedBloomFilter { + inner := blobloom.NewOptimized(blobloom.Config{ + Capacity: uint64(capacity), + FPRate: fp, + }) + return &blockedBloomFilter{ + inner: inner, + k: inner.K(), + } +} + +func (b *blockedBloomFilter) Type() BFType { + return BlockedBF +} + +func (b *blockedBloomFilter) Cap() uint { + return uint(b.inner.NumBits()) +} + +func (b *blockedBloomFilter) K() uint { + return b.k +} + +func (b *blockedBloomFilter) Add(data []byte) { + loc := xxh3.Hash(data) + b.inner.Add(loc) +} + +func (b *blockedBloomFilter) AddString(data string) { + h := xxh3.HashString(data) + b.inner.Add(h) +} + +func (b *blockedBloomFilter) Test(data []byte) bool { + loc := xxh3.Hash(data) + return b.inner.Has(loc) +} + +func (b *blockedBloomFilter) TestString(data string) bool { + h := xxh3.HashString(data) + return b.inner.Has(h) +} + +func (b *blockedBloomFilter) TestLocations(locs []uint64) bool { + return b.inner.TestLocations(locs) +} + +func (b blockedBloomFilter) MarshalJSON() ([]byte, error) { + return b.inner.MarshalJSON() +} + +func (b *blockedBloomFilter) UnmarshalJSON(data []byte) error { + inner := &blobloom.Filter{} + inner.UnmarshalJSON(data) + b.inner = inner + b.k = inner.K() + + return nil +} + +// always true bloom filter is used when deserialize stat log failed. +// Notice: add item to empty bloom filter is not permitted. and all Test Func will return false positive. +type alwaysTrueBloomFilter struct{} + +func (b *alwaysTrueBloomFilter) Type() BFType { + return AlwaysTrueBF +} + +func (b *alwaysTrueBloomFilter) Cap() uint { + return 0 +} + +func (b *alwaysTrueBloomFilter) K() uint { + return 0 +} + +func (b *alwaysTrueBloomFilter) Add(data []byte) { +} + +func (b *alwaysTrueBloomFilter) AddString(data string) { +} + +func (b *alwaysTrueBloomFilter) Test(data []byte) bool { + return true +} + +func (b *alwaysTrueBloomFilter) TestString(data string) bool { + return true +} + +func (b *alwaysTrueBloomFilter) TestLocations(locs []uint64) bool { + return true +} + +func (b *alwaysTrueBloomFilter) MarshalJSON() ([]byte, error) { + return []byte{}, nil +} + +func (b *alwaysTrueBloomFilter) UnmarshalJSON(data []byte) error { + return nil +} + +func NewBloomFilterWithType(capacity uint, fp float64, typeName string) BloomFilterInterface { + bfType := BFTypeFromString(typeName) + switch bfType { + case BlockedBF: + return newBlockedBloomFilter(capacity, fp) + case BasicBF: + return newBasicBloomFilter(capacity, fp) + default: + log.Info("unsupported bloom filter type, using block bloom filter", zap.String("type", typeName)) + return newBlockedBloomFilter(capacity, fp) + } +} + +func UnmarshalJSON(data []byte, bfType BFType) (BloomFilterInterface, error) { + switch bfType { + case BlockedBF: + bf := &blockedBloomFilter{} + err := json.Unmarshal(data, bf) + if err != nil { + return nil, errors.Wrap(err, "failed to unmarshal blocked bloom filter") + } + return bf, nil + case BasicBF: + bf := &basicBloomFilter{} + err := json.Unmarshal(data, bf) + if err != nil { + return nil, errors.Wrap(err, "failed to unmarshal blocked bloom filter") + } + return bf, nil + case AlwaysTrueBF: + return AlwaysTrueBloomFilter, nil + default: + return nil, errors.Errorf("unsupported bloom filter type: %d", bfType) + } +} + +func Locations(data []byte, k uint, bfType BFType) []uint64 { + switch bfType { + case BasicBF: + return bloom.Locations(data, k) + case BlockedBF: + return blobloom.Locations(xxh3.Hash(data), k) + case AlwaysTrueBF: + return nil + default: + log.Info("unsupported bloom filter type, using block bloom filter", zap.String("type", bfType.String())) + return nil + } +} diff --git a/internal/util/bloomfilter/bloom_filter_test.go b/internal/util/bloomfilter/bloom_filter_test.go new file mode 100644 index 0000000000000..5774d205b9853 --- /dev/null +++ b/internal/util/bloomfilter/bloom_filter_test.go @@ -0,0 +1,220 @@ +// Licensed to the LF AI & Data foundation under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +package bloomfilter + +import ( + "fmt" + "testing" + "time" + + "github.com/bits-and-blooms/bloom/v3" + "github.com/stretchr/testify/assert" + "go.uber.org/zap" + + "github.com/milvus-io/milvus-storage/go/common/log" +) + +func TestPerformance(t *testing.T) { + capacity := 1000000 + fpr := 0.001 + + keys := make([][]byte, 0) + for i := 0; i < capacity; i++ { + keys = append(keys, []byte(fmt.Sprintf("key%d", i))) + } + + bf1 := newBlockedBloomFilter(uint(capacity), fpr) + start1 := time.Now() + for _, key := range keys { + bf1.Add(key) + } + log.Info("Block BF construct time", zap.Duration("time", time.Since(start1))) + data, err := bf1.MarshalJSON() + assert.NoError(t, err) + log.Info("Block BF size", zap.Int("size", len(data))) + + start2 := time.Now() + for _, key := range keys { + bf1.Test(key) + } + log.Info("Block BF Test cost", zap.Duration("time", time.Since(start2))) + + bf2 := newBasicBloomFilter(uint(capacity), fpr) + start3 := time.Now() + for _, key := range keys { + bf2.Add(key) + } + log.Info("Basic BF construct time", zap.Duration("time", time.Since(start3))) + data, err = bf2.MarshalJSON() + assert.NoError(t, err) + log.Info("Basic BF size", zap.Int("size", len(data))) + + start4 := time.Now() + for _, key := range keys { + bf2.Test(key) + } + log.Info("Basic BF Test cost", zap.Duration("time", time.Since(start4))) +} + +func TestPerformance_MultiBF(t *testing.T) { + capacity := 100000 + fpr := 0.001 + + keys := make([][]byte, 0) + for i := 0; i < capacity; i++ { + keys = append(keys, []byte(fmt.Sprintf("key%d", time.Now().UnixNano()+int64(i)))) + } + + bfNum := 100 + bfs1 := make([]*blockedBloomFilter, 0) + start1 := time.Now() + for i := 0; i < bfNum; i++ { + bf1 := newBlockedBloomFilter(uint(capacity), fpr) + for _, key := range keys { + bf1.Add(key) + } + bfs1 = append(bfs1, bf1) + } + + log.Info("Block BF construct cost", zap.Duration("time", time.Since(start1))) + + start3 := time.Now() + for _, key := range keys { + locations := Locations(key, bfs1[0].K(), BlockedBF) + for i := 0; i < bfNum; i++ { + bfs1[i].TestLocations(locations) + } + } + log.Info("Block BF TestLocation cost", zap.Duration("time", time.Since(start3))) + + bfs2 := make([]*basicBloomFilter, 0) + start1 = time.Now() + for i := 0; i < bfNum; i++ { + bf2 := newBasicBloomFilter(uint(capacity), fpr) + for _, key := range keys { + bf2.Add(key) + } + bfs2 = append(bfs2, bf2) + } + + log.Info("Basic BF construct cost", zap.Duration("time", time.Since(start1))) + + start3 = time.Now() + for _, key := range keys { + locations := Locations(key, bfs1[0].K(), BasicBF) + for i := 0; i < bfNum; i++ { + bfs2[i].TestLocations(locations) + } + } + log.Info("Basic BF TestLocation cost", zap.Duration("time", time.Since(start3))) +} + +func TestPerformance_Capacity(t *testing.T) { + fpr := 0.001 + + for _, capacity := range []int64{100, 1000, 10000, 100000, 1000000} { + keys := make([][]byte, 0) + for i := 0; i < int(capacity); i++ { + keys = append(keys, []byte(fmt.Sprintf("key%d", time.Now().UnixNano()+int64(i)))) + } + + start1 := time.Now() + bf1 := newBlockedBloomFilter(uint(capacity), fpr) + for _, key := range keys { + bf1.Add(key) + } + + log.Info("Block BF construct cost", zap.Duration("time", time.Since(start1))) + + testKeys := make([][]byte, 0) + for i := 0; i < 10000; i++ { + testKeys = append(testKeys, []byte(fmt.Sprintf("key%d", time.Now().UnixNano()+int64(i)))) + } + + start3 := time.Now() + for _, key := range testKeys { + locations := Locations(key, bf1.K(), bf1.Type()) + bf1.TestLocations(locations) + } + _, k := bloom.EstimateParameters(uint(capacity), fpr) + log.Info("Block BF TestLocation cost", zap.Duration("time", time.Since(start3)), zap.Int("k", int(k)), zap.Int64("capacity", capacity)) + } +} + +func TestMarshal(t *testing.T) { + capacity := 200000 + fpr := 0.001 + + keys := make([][]byte, 0) + for i := 0; i < capacity; i++ { + keys = append(keys, []byte(fmt.Sprintf("key%d", i))) + } + + // test basic bf + basicBF := newBasicBloomFilter(uint(capacity), fpr) + for _, key := range keys { + basicBF.Add(key) + } + data, err := basicBF.MarshalJSON() + assert.NoError(t, err) + basicBF2, err := UnmarshalJSON(data, BasicBF) + assert.NoError(t, err) + assert.Equal(t, basicBF.Type(), basicBF2.Type()) + + for _, key := range keys { + assert.True(t, basicBF2.Test(key)) + } + + // test block bf + blockBF := newBlockedBloomFilter(uint(capacity), fpr) + for _, key := range keys { + blockBF.Add(key) + } + data, err = blockBF.MarshalJSON() + assert.NoError(t, err) + blockBF2, err := UnmarshalJSON(data, BlockedBF) + assert.NoError(t, err) + assert.Equal(t, blockBF.Type(), blockBF.Type()) + for _, key := range keys { + assert.True(t, blockBF2.Test(key)) + } + + // test compatible with bits-and-blooms/bloom + bf := bloom.NewWithEstimates(uint(capacity), fpr) + for _, key := range keys { + bf.Add(key) + } + data, err = bf.MarshalJSON() + assert.NoError(t, err) + bf2, err := UnmarshalJSON(data, BasicBF) + assert.NoError(t, err) + for _, key := range keys { + assert.True(t, bf2.Test(key)) + } + + // test empty bloom filter + emptyBF := AlwaysTrueBloomFilter + for _, key := range keys { + bf.Add(key) + } + data, err = emptyBF.MarshalJSON() + assert.NoError(t, err) + emptyBF2, err := UnmarshalJSON(data, AlwaysTrueBF) + assert.NoError(t, err) + for _, key := range keys { + assert.True(t, emptyBF2.Test(key)) + } +} diff --git a/internal/util/flowgraph/input_node.go b/internal/util/flowgraph/input_node.go index 24eeff9b4e248..eed9850025639 100644 --- a/internal/util/flowgraph/input_node.go +++ b/internal/util/flowgraph/input_node.go @@ -43,13 +43,15 @@ const ( // InputNode is the entry point of flowgragh type InputNode struct { BaseNode - input <-chan *msgstream.MsgPack - lastMsg *msgstream.MsgPack - name string - role string - nodeID int64 - collectionID int64 - dataType string + input <-chan *msgstream.MsgPack + lastMsg *msgstream.MsgPack + name string + role string + nodeID int64 + nodeIDStr string + collectionID int64 + collectionIDStr string + dataType string closeGracefully *atomic.Bool @@ -117,11 +119,11 @@ func (inNode *InputNode) Operate(in []Msg) []Msg { sub := tsoutil.SubByNow(msgPack.EndTs) if inNode.role == typeutil.DataNodeRole { metrics.DataNodeConsumeMsgCount. - WithLabelValues(fmt.Sprint(inNode.nodeID), inNode.dataType, fmt.Sprint(inNode.collectionID)). + WithLabelValues(inNode.nodeIDStr, inNode.dataType, inNode.collectionIDStr). Inc() metrics.DataNodeConsumeTimeTickLag. - WithLabelValues(fmt.Sprint(inNode.nodeID), inNode.dataType, fmt.Sprint(inNode.collectionID)). + WithLabelValues(inNode.nodeIDStr, inNode.dataType, inNode.collectionIDStr). Set(float64(sub)) } @@ -192,7 +194,9 @@ func NewInputNode(input <-chan *msgstream.MsgPack, nodeName string, maxQueueLeng name: nodeName, role: role, nodeID: nodeID, + nodeIDStr: fmt.Sprint(nodeID), collectionID: collectionID, + collectionIDStr: fmt.Sprint(collectionID), dataType: dataType, closeGracefully: atomic.NewBool(CloseImmediately), skipCount: 0, diff --git a/internal/util/flowgraph/node.go b/internal/util/flowgraph/node.go index 0ae56f955efe2..f38a65aea4891 100644 --- a/internal/util/flowgraph/node.go +++ b/internal/util/flowgraph/node.go @@ -83,16 +83,16 @@ func (nodeCtxManager *nodeCtxManager) workNodeStart() { inputNode := nodeCtxManager.inputNodeCtx curNode := inputNode // tt checker start - var checker *timerecord.GroupChecker + var checker *timerecord.Checker if enableTtChecker { - checker = timerecord.GetGroupChecker("fgNode", nodeCtxTtInterval, func(list []string) { + manager := timerecord.GetCheckerManger("fgNode", nodeCtxTtInterval, func(list []string) { log.Warn("some node(s) haven't received input", zap.Strings("list", list), zap.Duration("duration ", nodeCtxTtInterval)) }) for curNode != nil { name := fmt.Sprintf("nodeCtxTtChecker-%s", curNode.node.Name()) - checker.Check(name) + checker = timerecord.NewChecker(name, manager) curNode = curNode.downstream - defer checker.Remove(name) + defer checker.Close() } } @@ -138,7 +138,7 @@ func (nodeCtxManager *nodeCtxManager) workNodeStart() { curNode.downstream.inputChannel <- output } if enableTtChecker { - checker.Check(fmt.Sprintf("nodeCtxTtChecker-%s", curNode.node.Name())) + checker.Check() } curNode = curNode.downstream } diff --git a/internal/util/importutilv2/json/reader_test.go b/internal/util/importutilv2/json/reader_test.go index c46954ead8418..38dc64d86ed93 100644 --- a/internal/util/importutilv2/json/reader_test.go +++ b/internal/util/importutilv2/json/reader_test.go @@ -24,7 +24,6 @@ import ( "strings" "testing" - "github.com/samber/lo" "github.com/stretchr/testify/mock" "github.com/stretchr/testify/suite" "golang.org/x/exp/slices" @@ -98,64 +97,16 @@ func (suite *ReaderSuite) run(dataType schemapb.DataType, elemType schemapb.Data }, }, } + insertData, err := testutil.CreateInsertData(schema, suite.numRows) suite.NoError(err) - rows := make([]map[string]any, 0, suite.numRows) - fieldIDToField := lo.KeyBy(schema.GetFields(), func(field *schemapb.FieldSchema) int64 { - return field.GetFieldID() - }) - for i := 0; i < insertData.GetRowNum(); i++ { - data := make(map[int64]interface{}) - for fieldID, v := range insertData.Data { - field := fieldIDToField[fieldID] - dataType := field.GetDataType() - elemType := field.GetElementType() - switch dataType { - case schemapb.DataType_Array: - switch elemType { - case schemapb.DataType_Bool: - data[fieldID] = v.GetRow(i).(*schemapb.ScalarField).GetBoolData().GetData() - case schemapb.DataType_Int8, schemapb.DataType_Int16, schemapb.DataType_Int32: - data[fieldID] = v.GetRow(i).(*schemapb.ScalarField).GetIntData().GetData() - case schemapb.DataType_Int64: - data[fieldID] = v.GetRow(i).(*schemapb.ScalarField).GetLongData().GetData() - case schemapb.DataType_Float: - data[fieldID] = v.GetRow(i).(*schemapb.ScalarField).GetFloatData().GetData() - case schemapb.DataType_Double: - data[fieldID] = v.GetRow(i).(*schemapb.ScalarField).GetDoubleData().GetData() - case schemapb.DataType_String: - data[fieldID] = v.GetRow(i).(*schemapb.ScalarField).GetStringData().GetData() - } - case schemapb.DataType_JSON: - data[fieldID] = string(v.GetRow(i).([]byte)) - case schemapb.DataType_BinaryVector: - bytes := v.GetRow(i).([]byte) - ints := make([]int, 0, len(bytes)) - for _, b := range bytes { - ints = append(ints, int(b)) - } - data[fieldID] = ints - case schemapb.DataType_Float16Vector: - bytes := v.GetRow(i).([]byte) - data[fieldID] = typeutil.Float16BytesToFloat32Vector(bytes) - case schemapb.DataType_BFloat16Vector: - bytes := v.GetRow(i).([]byte) - data[fieldID] = typeutil.BFloat16BytesToFloat32Vector(bytes) - case schemapb.DataType_SparseFloatVector: - bytes := v.GetRow(i).([]byte) - data[fieldID] = typeutil.SparseFloatBytesToMap(bytes) - default: - data[fieldID] = v.GetRow(i) - } - } - row := lo.MapKeys(data, func(_ any, fieldID int64) string { - return fieldIDToField[fieldID].GetName() - }) - rows = append(rows, row) - } + + rows, err := testutil.CreateInsertDataRowsForJSON(schema, insertData) + suite.NoError(err) jsonBytes, err := json.Marshal(rows) suite.NoError(err) + type mockReader struct { io.Reader io.Closer diff --git a/internal/util/importutilv2/numpy/reader_test.go b/internal/util/importutilv2/numpy/reader_test.go index f94abb6b1a9ef..3b96e6553d214 100644 --- a/internal/util/importutilv2/numpy/reader_test.go +++ b/internal/util/importutilv2/numpy/reader_test.go @@ -128,60 +128,54 @@ func (suite *ReaderSuite) run(dt schemapb.DataType) { io.ReaderAt io.Seeker } + + var data interface{} for fieldID, fieldData := range insertData.Data { dataType := fieldIDToField[fieldID].GetDataType() + rowNum := fieldData.RowNum() switch dataType { case schemapb.DataType_JSON: - jsonStrs := make([]string, 0, fieldData.RowNum()) - for i := 0; i < fieldData.RowNum(); i++ { + jsonStrs := make([]string, 0, rowNum) + for i := 0; i < rowNum; i++ { row := fieldData.GetRow(i) jsonStrs = append(jsonStrs, string(row.([]byte))) } - reader, err := CreateReader(jsonStrs) - suite.NoError(err) - cm.EXPECT().Reader(mock.Anything, files[fieldID]).Return(&mockReader{ - Reader: reader, - }, nil) + data = jsonStrs case schemapb.DataType_BinaryVector: - chunked := lo.Chunk(insertData.Data[fieldID].GetRows().([]byte), dim/8) - chunkedRows := make([][dim / 8]byte, len(chunked)) + rows := fieldData.GetRows().([]byte) + const rowBytes = dim / 8 + chunked := lo.Chunk(rows, rowBytes) + chunkedRows := make([][rowBytes]byte, len(chunked)) for i, innerSlice := range chunked { copy(chunkedRows[i][:], innerSlice[:]) } - reader, err := CreateReader(chunkedRows) - suite.NoError(err) - cm.EXPECT().Reader(mock.Anything, files[fieldID]).Return(&mockReader{ - Reader: reader, - }, nil) + data = chunkedRows case schemapb.DataType_FloatVector: - chunked := lo.Chunk(insertData.Data[fieldID].GetRows().([]float32), dim) + rows := fieldData.GetRows().([]float32) + chunked := lo.Chunk(rows, dim) chunkedRows := make([][dim]float32, len(chunked)) for i, innerSlice := range chunked { copy(chunkedRows[i][:], innerSlice[:]) } - reader, err := CreateReader(chunkedRows) - suite.NoError(err) - cm.EXPECT().Reader(mock.Anything, files[fieldID]).Return(&mockReader{ - Reader: reader, - }, nil) + data = chunkedRows case schemapb.DataType_Float16Vector, schemapb.DataType_BFloat16Vector: - chunked := lo.Chunk(insertData.Data[fieldID].GetRows().([]byte), dim*2) - chunkedRows := make([][dim * 2]byte, len(chunked)) + rows := fieldData.GetRows().([]byte) + const rowBytes = dim * 2 + chunked := lo.Chunk(rows, rowBytes) + chunkedRows := make([][rowBytes]byte, len(chunked)) for i, innerSlice := range chunked { copy(chunkedRows[i][:], innerSlice[:]) } - reader, err := CreateReader(chunkedRows) - suite.NoError(err) - cm.EXPECT().Reader(mock.Anything, files[fieldID]).Return(&mockReader{ - Reader: reader, - }, nil) + data = chunkedRows default: - reader, err := CreateReader(insertData.Data[fieldID].GetRows()) - suite.NoError(err) - cm.EXPECT().Reader(mock.Anything, files[fieldID]).Return(&mockReader{ - Reader: reader, - }, nil) + data = fieldData.GetRows() } + + reader, err := CreateReader(data) + suite.NoError(err) + cm.EXPECT().Reader(mock.Anything, files[fieldID]).Return(&mockReader{ + Reader: reader, + }, nil) } reader, err := NewReader(context.Background(), cm, schema, lo.Values(files), math.MaxInt) @@ -268,59 +262,54 @@ func (suite *ReaderSuite) failRun(dt schemapb.DataType, isDynamic bool) { io.ReaderAt io.Seeker } + + var data interface{} for fieldID, fieldData := range insertData.Data { dataType := fieldIDToField[fieldID].GetDataType() - if dataType == schemapb.DataType_JSON { - jsonStrs := make([]string, 0, fieldData.RowNum()) - for i := 0; i < fieldData.RowNum(); i++ { + rowNum := fieldData.RowNum() + switch dataType { + case schemapb.DataType_JSON: + jsonStrs := make([]string, 0, rowNum) + for i := 0; i < rowNum; i++ { row := fieldData.GetRow(i) jsonStrs = append(jsonStrs, string(row.([]byte))) } - reader, err := CreateReader(jsonStrs) - suite.NoError(err) - cm.EXPECT().Reader(mock.Anything, files[fieldID]).Return(&mockReader{ - Reader: reader, - }, nil) - } else if dataType == schemapb.DataType_FloatVector { - chunked := lo.Chunk(insertData.Data[fieldID].GetRows().([]float32), dim) - chunkedRows := make([][dim]float32, len(chunked)) + data = jsonStrs + case schemapb.DataType_BinaryVector: + rows := fieldData.GetRows().([]byte) + const rowBytes = dim / 8 + chunked := lo.Chunk(rows, rowBytes) + chunkedRows := make([][rowBytes]byte, len(chunked)) for i, innerSlice := range chunked { copy(chunkedRows[i][:], innerSlice[:]) } - reader, err := CreateReader(chunkedRows) - suite.NoError(err) - cm.EXPECT().Reader(mock.Anything, files[fieldID]).Return(&mockReader{ - Reader: reader, - }, nil) - } else if dataType == schemapb.DataType_Float16Vector || dataType == schemapb.DataType_BFloat16Vector { - chunked := lo.Chunk(insertData.Data[fieldID].GetRows().([]byte), dim*2) - chunkedRows := make([][dim * 2]byte, len(chunked)) + data = chunkedRows + case schemapb.DataType_FloatVector: + rows := fieldData.GetRows().([]float32) + chunked := lo.Chunk(rows, dim) + chunkedRows := make([][dim]float32, len(chunked)) for i, innerSlice := range chunked { copy(chunkedRows[i][:], innerSlice[:]) } - reader, err := CreateReader(chunkedRows) - suite.NoError(err) - cm.EXPECT().Reader(mock.Anything, files[fieldID]).Return(&mockReader{ - Reader: reader, - }, nil) - } else if dataType == schemapb.DataType_BinaryVector { - chunked := lo.Chunk(insertData.Data[fieldID].GetRows().([]byte), dim/8) - chunkedRows := make([][dim / 8]byte, len(chunked)) + data = chunkedRows + case schemapb.DataType_Float16Vector, schemapb.DataType_BFloat16Vector: + rows := fieldData.GetRows().([]byte) + const rowBytes = dim * 2 + chunked := lo.Chunk(rows, rowBytes) + chunkedRows := make([][rowBytes]byte, len(chunked)) for i, innerSlice := range chunked { copy(chunkedRows[i][:], innerSlice[:]) } - reader, err := CreateReader(chunkedRows) - suite.NoError(err) - cm.EXPECT().Reader(mock.Anything, files[fieldID]).Return(&mockReader{ - Reader: reader, - }, nil) - } else { - reader, err := CreateReader(insertData.Data[fieldID].GetRows()) - suite.NoError(err) - cm.EXPECT().Reader(mock.Anything, files[fieldID]).Return(&mockReader{ - Reader: reader, - }, nil) + data = chunkedRows + default: + data = fieldData.GetRows() } + + reader, err := CreateReader(data) + suite.NoError(err) + cm.EXPECT().Reader(mock.Anything, files[fieldID]).Return(&mockReader{ + Reader: reader, + }, nil) } reader, err := NewReader(context.Background(), cm, schema, lo.Values(files), math.MaxInt) diff --git a/internal/util/importutilv2/parquet/field_reader.go b/internal/util/importutilv2/parquet/field_reader.go index 090a5e2a638fe..707bdade50c1e 100644 --- a/internal/util/importutilv2/parquet/field_reader.go +++ b/internal/util/importutilv2/parquet/field_reader.go @@ -130,15 +130,13 @@ func ReadBoolData(pcr *FieldReader, count int64) (any, error) { data := make([]bool, 0, count) for _, chunk := range chunked.Chunks() { dataNums := chunk.Data().Len() - chunkData := make([]bool, dataNums) boolReader, ok := chunk.(*array.Boolean) if !ok { return nil, WrapTypeErr("bool", chunk.DataType().Name(), pcr.field) } for i := 0; i < dataNums; i++ { - chunkData[i] = boolReader.Value(i) + data = append(data, boolReader.Value(i)) } - data = append(data, chunkData...) } if len(data) == 0 { return nil, nil @@ -154,42 +152,40 @@ func ReadIntegerOrFloatData[T constraints.Integer | constraints.Float](pcr *Fiel data := make([]T, 0, count) for _, chunk := range chunked.Chunks() { dataNums := chunk.Data().Len() - chunkData := make([]T, dataNums) switch chunk.DataType().ID() { case arrow.INT8: int8Reader := chunk.(*array.Int8) for i := 0; i < dataNums; i++ { - chunkData[i] = T(int8Reader.Value(i)) + data = append(data, T(int8Reader.Value(i))) } case arrow.INT16: int16Reader := chunk.(*array.Int16) for i := 0; i < dataNums; i++ { - chunkData[i] = T(int16Reader.Value(i)) + data = append(data, T(int16Reader.Value(i))) } case arrow.INT32: int32Reader := chunk.(*array.Int32) for i := 0; i < dataNums; i++ { - chunkData[i] = T(int32Reader.Value(i)) + data = append(data, T(int32Reader.Value(i))) } case arrow.INT64: int64Reader := chunk.(*array.Int64) for i := 0; i < dataNums; i++ { - chunkData[i] = T(int64Reader.Value(i)) + data = append(data, T(int64Reader.Value(i))) } case arrow.FLOAT32: float32Reader := chunk.(*array.Float32) for i := 0; i < dataNums; i++ { - chunkData[i] = T(float32Reader.Value(i)) + data = append(data, T(float32Reader.Value(i))) } case arrow.FLOAT64: float64Reader := chunk.(*array.Float64) for i := 0; i < dataNums; i++ { - chunkData[i] = T(float64Reader.Value(i)) + data = append(data, T(float64Reader.Value(i))) } default: return nil, WrapTypeErr("integer|float", chunk.DataType().Name(), pcr.field) } - data = append(data, chunkData...) } if len(data) == 0 { return nil, nil @@ -205,15 +201,13 @@ func ReadStringData(pcr *FieldReader, count int64) (any, error) { data := make([]string, 0, count) for _, chunk := range chunked.Chunks() { dataNums := chunk.Data().Len() - chunkData := make([]string, dataNums) stringReader, ok := chunk.(*array.String) if !ok { return nil, WrapTypeErr("string", chunk.DataType().Name(), pcr.field) } for i := 0; i < dataNums; i++ { - chunkData[i] = stringReader.Value(i) + data = append(data, stringReader.Value(i)) } - data = append(data, chunkData...) } if len(data) == 0 { return nil, nil @@ -297,7 +291,7 @@ func ReadSparseFloatVectorData(pcr *FieldReader, count int64) (any, error) { for _, str := range data.([]string) { rowVec, err := typeutil.CreateSparseFloatRowFromJSON([]byte(str)) if err != nil { - return nil, merr.WrapErrImportFailed(fmt.Sprintf("Invalid JSON string for SparseFloatVector: '%s'", str)) + return nil, merr.WrapErrImportFailed(fmt.Sprintf("Invalid JSON string for SparseFloatVector: '%s', err = %v", str, err)) } byteArr = append(byteArr, rowVec) elemCount := len(rowVec) / 8 diff --git a/internal/util/indexcgowrapper/index.go b/internal/util/indexcgowrapper/index.go index f0850b3b916de..a7cc7d0e9b21c 100644 --- a/internal/util/indexcgowrapper/index.go +++ b/internal/util/indexcgowrapper/index.go @@ -16,6 +16,7 @@ import ( "unsafe" "github.com/golang/protobuf/proto" + "go.uber.org/zap" "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" @@ -94,9 +95,17 @@ func NewCgoIndex(dtype schemapb.DataType, typeParams, indexParams map[string]str return index, nil } -func CreateIndex(ctx context.Context, buildIndexInfo *BuildIndexInfo) (CodecIndex, error) { +func CreateIndex(ctx context.Context, buildIndexInfo *indexcgopb.BuildIndexInfo) (CodecIndex, error) { + buildIndexInfoBlob, err := proto.Marshal(buildIndexInfo) + if err != nil { + log.Ctx(ctx).Warn("marshal buildIndexInfo failed", + zap.String("clusterID", buildIndexInfo.GetClusterID()), + zap.Int64("buildID", buildIndexInfo.GetBuildID()), + zap.Error(err)) + return nil, err + } var indexPtr C.CIndex - status := C.CreateIndex(&indexPtr, buildIndexInfo.cBuildIndexInfo) + status := C.CreateIndex(&indexPtr, (*C.uint8_t)(unsafe.Pointer(&buildIndexInfoBlob[0])), (C.uint64_t)(len(buildIndexInfoBlob))) if err := HandleCStatus(&status, "failed to create index"); err != nil { return nil, err } @@ -109,9 +118,17 @@ func CreateIndex(ctx context.Context, buildIndexInfo *BuildIndexInfo) (CodecInde return index, nil } -func CreateIndexV2(ctx context.Context, buildIndexInfo *BuildIndexInfo) (CodecIndex, error) { +func CreateIndexV2(ctx context.Context, buildIndexInfo *indexcgopb.BuildIndexInfo) (CodecIndex, error) { + buildIndexInfoBlob, err := proto.Marshal(buildIndexInfo) + if err != nil { + log.Ctx(ctx).Warn("marshal buildIndexInfo failed", + zap.String("clusterID", buildIndexInfo.GetClusterID()), + zap.Int64("buildID", buildIndexInfo.GetBuildID()), + zap.Error(err)) + return nil, err + } var indexPtr C.CIndex - status := C.CreateIndexV2(&indexPtr, buildIndexInfo.cBuildIndexInfo) + status := C.CreateIndexV2(&indexPtr, (*C.uint8_t)(unsafe.Pointer(&buildIndexInfoBlob[0])), (C.uint64_t)(len(buildIndexInfoBlob))) if err := HandleCStatus(&status, "failed to create index"); err != nil { return nil, err } diff --git a/internal/util/initcore/init_core.go b/internal/util/initcore/init_core.go index f4bb9ec9634f9..bf6a2e903aa35 100644 --- a/internal/util/initcore/init_core.go +++ b/internal/util/initcore/init_core.go @@ -29,6 +29,7 @@ import "C" import ( "fmt" + "time" "unsafe" "github.com/cockroachdb/errors" @@ -61,7 +62,13 @@ func InitTraceConfig(params *paramtable.ComponentParam) { otlpEndpoint: endpoint, nodeID: nodeID, } - C.InitTrace(&config) + // oltp grpc may hangs forever, add timeout logic at go side + timeout := params.TraceCfg.InitTimeoutSeconds.GetAsDuration(time.Second) + callWithTimeout(func() { + C.InitTrace(&config) + }, func() { + panic("init segcore tracing timeout, See issue #33483") + }, timeout) } func ResetTraceConfig(params *paramtable.ComponentParam) { @@ -81,7 +88,31 @@ func ResetTraceConfig(params *paramtable.ComponentParam) { otlpEndpoint: endpoint, nodeID: nodeID, } - C.SetTrace(&config) + + // oltp grpc may hangs forever, add timeout logic at go side + timeout := params.TraceCfg.InitTimeoutSeconds.GetAsDuration(time.Second) + callWithTimeout(func() { + C.SetTrace(&config) + }, func() { + panic("set segcore tracing timeout, See issue #33483") + }, timeout) +} + +func callWithTimeout(fn func(), timeoutHandler func(), timeout time.Duration) { + if timeout > 0 { + ch := make(chan struct{}) + go func() { + defer close(ch) + fn() + }() + select { + case <-ch: + case <-time.After(timeout): + timeoutHandler() + } + } else { + fn() + } } func InitRemoteChunkManager(params *paramtable.ComponentParam) error { diff --git a/internal/util/initcore/init_core_test.go b/internal/util/initcore/init_core_test.go index fadc061042a8e..15d1b089a8989 100644 --- a/internal/util/initcore/init_core_test.go +++ b/internal/util/initcore/init_core_test.go @@ -19,6 +19,8 @@ package initcore import ( "testing" + "github.com/stretchr/testify/assert" + "github.com/milvus-io/milvus/pkg/util/paramtable" ) @@ -29,3 +31,17 @@ func TestTracer(t *testing.T) { paramtable.Get().Save(paramtable.Get().TraceCfg.Exporter.Key, "stdout") ResetTraceConfig(paramtable.Get()) } + +func TestOtlpHang(t *testing.T) { + paramtable.Init() + InitTraceConfig(paramtable.Get()) + + paramtable.Get().Save(paramtable.Get().TraceCfg.Exporter.Key, "otlp") + paramtable.Get().Save(paramtable.Get().TraceCfg.InitTimeoutSeconds.Key, "1") + defer paramtable.Get().Reset(paramtable.Get().TraceCfg.Exporter.Key) + defer paramtable.Get().Reset(paramtable.Get().TraceCfg.InitTimeoutSeconds.Key) + + assert.Panics(t, func() { + ResetTraceConfig(paramtable.Get()) + }) +} diff --git a/internal/util/pipeline/node.go b/internal/util/pipeline/node.go index ad42e6318fe51..def0331794bd0 100644 --- a/internal/util/pipeline/node.go +++ b/internal/util/pipeline/node.go @@ -17,12 +17,6 @@ package pipeline import ( - "fmt" - "sync" - - "go.uber.org/zap" - - "github.com/milvus-io/milvus/pkg/log" "github.com/milvus-io/milvus/pkg/util/timerecord" ) @@ -30,68 +24,20 @@ type Node interface { Name() string MaxQueueLength() int32 Operate(in Msg) Msg - Start() - Close() } type nodeCtx struct { - node Node - - inputChannel chan Msg - - next *nodeCtx - checker *timerecord.GroupChecker - - closeCh chan struct{} // notify work to exit - closeWg sync.WaitGroup -} - -func (c *nodeCtx) Start() { - c.closeWg.Add(1) - c.node.Start() - go c.work() -} + node Node + InputChannel chan Msg -func (c *nodeCtx) Close() { - close(c.closeCh) - c.closeWg.Wait() + Next *nodeCtx + Checker *timerecord.Checker } -func (c *nodeCtx) work() { - defer c.closeWg.Done() - name := fmt.Sprintf("nodeCtxTtChecker-%s", c.node.Name()) - if c.checker != nil { - c.checker.Check(name) - defer c.checker.Remove(name) - } - - for { - select { - // close - case <-c.closeCh: - c.node.Close() - close(c.inputChannel) - log.Debug("pipeline node closed", zap.String("nodeName", c.node.Name())) - return - case input := <-c.inputChannel: - var output Msg - output = c.node.Operate(input) - if c.checker != nil { - c.checker.Check(name) - } - if c.next != nil && output != nil { - c.next.inputChannel <- output - } - } - } -} - -func newNodeCtx(node Node) *nodeCtx { +func NewNodeCtx(node Node) *nodeCtx { return &nodeCtx{ node: node, - inputChannel: make(chan Msg, node.MaxQueueLength()), - closeCh: make(chan struct{}), - closeWg: sync.WaitGroup{}, + InputChannel: make(chan Msg, node.MaxQueueLength()), } } @@ -110,12 +56,6 @@ func (node *BaseNode) MaxQueueLength() int32 { return node.maxQueueLength } -// Start implementing Node, base node does nothing when starts -func (node *BaseNode) Start() {} - -// Close implementing Node, base node does nothing when stops -func (node *BaseNode) Close() {} - func NewBaseNode(name string, maxQueryLength int32) *BaseNode { return &BaseNode{ name: name, diff --git a/internal/util/pipeline/pipeline.go b/internal/util/pipeline/pipeline.go index 047bf65f48714..6e85f2d9989e6 100644 --- a/internal/util/pipeline/pipeline.go +++ b/internal/util/pipeline/pipeline.go @@ -17,6 +17,7 @@ package pipeline import ( + "fmt" "time" "go.uber.org/zap" @@ -45,34 +46,55 @@ func (p *pipeline) Add(nodes ...Node) { } func (p *pipeline) addNode(node Node) { - nodeCtx := newNodeCtx(node) + nodeCtx := NewNodeCtx(node) if p.enableTtChecker { - nodeCtx.checker = timerecord.GetGroupChecker("fgNode", p.nodeTtInterval, func(list []string) { + manager := timerecord.GetCheckerManger("fgNode", p.nodeTtInterval, func(list []string) { log.Warn("some node(s) haven't received input", zap.Strings("list", list), zap.Duration("duration ", p.nodeTtInterval)) }) + name := fmt.Sprintf("nodeCtxTtChecker-%s", node.Name()) + nodeCtx.Checker = timerecord.NewChecker(name, manager) } if len(p.nodes) != 0 { - p.nodes[len(p.nodes)-1].next = nodeCtx + p.nodes[len(p.nodes)-1].Next = nodeCtx } else { - p.inputChannel = nodeCtx.inputChannel + p.inputChannel = nodeCtx.InputChannel } p.nodes = append(p.nodes, nodeCtx) } func (p *pipeline) Start() error { - if len(p.nodes) == 0 { - return ErrEmptyPipeline - } - for _, node := range p.nodes { - node.Start() - } return nil } func (p *pipeline) Close() { for _, node := range p.nodes { - node.Close() + if node.Checker != nil { + node.Checker.Close() + } + } +} + +func (p *pipeline) process() { + if len(p.nodes) == 0 { + return + } + + curNode := p.nodes[0] + for curNode != nil { + if len(curNode.InputChannel) == 0 { + break + } + + input := <-curNode.InputChannel + output := curNode.node.Operate(input) + if curNode.Checker != nil { + curNode.Checker.Check() + } + if curNode.Next != nil && output != nil { + curNode.Next.InputChannel <- output + } + curNode = curNode.Next } } diff --git a/internal/util/pipeline/pipeline_test.go b/internal/util/pipeline/pipeline_test.go index 8ddeb9c35534a..909893d458961 100644 --- a/internal/util/pipeline/pipeline_test.go +++ b/internal/util/pipeline/pipeline_test.go @@ -31,8 +31,9 @@ type testNode struct { func (t *testNode) Operate(in Msg) Msg { msg := in.(*msgstream.MsgPack) - msg.BeginTs++ - t.outChannel <- msg.BeginTs + if t.outChannel != nil { + t.outChannel <- msg.BeginTs + } return msg } @@ -43,7 +44,7 @@ type PipelineSuite struct { } func (suite *PipelineSuite) SetupTest() { - suite.outChannel = make(chan msgstream.Timestamp) + suite.outChannel = make(chan msgstream.Timestamp, 1) suite.pipeline = &pipeline{ nodes: []*nodeCtx{}, nodeTtInterval: 0, @@ -52,7 +53,21 @@ func (suite *PipelineSuite) SetupTest() { suite.pipeline.addNode(&testNode{ BaseNode: &BaseNode{ - name: "test-node", + name: "test-node1", + maxQueueLength: 8, + }, + }) + + suite.pipeline.addNode(&testNode{ + BaseNode: &BaseNode{ + name: "test-node2", + maxQueueLength: 8, + }, + }) + + suite.pipeline.addNode(&testNode{ + BaseNode: &BaseNode{ + name: "test-node3", maxQueueLength: 8, }, outChannel: suite.outChannel, @@ -62,10 +77,13 @@ func (suite *PipelineSuite) SetupTest() { func (suite *PipelineSuite) TestBasic() { suite.pipeline.Start() defer suite.pipeline.Close() - suite.pipeline.inputChannel <- &msgstream.MsgPack{} - output := <-suite.outChannel - suite.Equal(msgstream.Timestamp(1), output) + for i := 0; i < 100; i++ { + suite.pipeline.inputChannel <- &msgstream.MsgPack{BeginTs: msgstream.Timestamp(i)} + suite.pipeline.process() + output := <-suite.outChannel + suite.Equal(i, int(output)) + } } func TestPipeline(t *testing.T) { diff --git a/internal/util/pipeline/stream_pipeline.go b/internal/util/pipeline/stream_pipeline.go index 6cb6b6900e04e..3c22c6e99fec6 100644 --- a/internal/util/pipeline/stream_pipeline.go +++ b/internal/util/pipeline/stream_pipeline.go @@ -37,7 +37,7 @@ type StreamPipeline interface { } type streamPipeline struct { - *pipeline + pipeline *pipeline input <-chan *msgstream.MsgPack dispatcher msgdispatcher.Client startOnce sync.Once @@ -57,7 +57,8 @@ func (p *streamPipeline) work() { return case msg := <-p.input: log.RatedDebug(10, "stream pipeline fetch msg", zap.Int("sum", len(msg.Msgs))) - p.nodes[0].inputChannel <- msg + p.pipeline.inputChannel <- msg + p.pipeline.process() } } } @@ -86,6 +87,10 @@ func (p *streamPipeline) ConsumeMsgStream(position *msgpb.MsgPosition) error { return nil } +func (p *streamPipeline) Add(nodes ...Node) { + p.pipeline.Add(nodes...) +} + func (p *streamPipeline) Start() error { var err error p.startOnce.Do(func() { diff --git a/internal/util/pipeline/stream_pipeline_test.go b/internal/util/pipeline/stream_pipeline_test.go index 7bf28a5a0c351..a731a18ff34b8 100644 --- a/internal/util/pipeline/stream_pipeline_test.go +++ b/internal/util/pipeline/stream_pipeline_test.go @@ -68,11 +68,11 @@ func (suite *StreamPipelineSuite) TestBasic() { suite.pipeline.Start() defer suite.pipeline.Close() - suite.inChannel <- &msgstream.MsgPack{} + suite.inChannel <- &msgstream.MsgPack{BeginTs: 1001} for i := 1; i <= suite.length; i++ { output := <-suite.outChannel - suite.Equal(msgstream.Timestamp(i), output) + suite.Equal(int64(1001), int64(output)) } } diff --git a/internal/util/streamrpc/streamer.go b/internal/util/streamrpc/streamer.go index 53571672eeb8c..79f47c8bc3c52 100644 --- a/internal/util/streamrpc/streamer.go +++ b/internal/util/streamrpc/streamer.go @@ -5,8 +5,10 @@ import ( "io" "sync" + "github.com/golang/protobuf/proto" "google.golang.org/grpc" + "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" "github.com/milvus-io/milvus/internal/proto/internalpb" ) @@ -42,6 +44,123 @@ func NewConcurrentQueryStreamServer(srv QueryStreamServer) *ConcurrentQueryStrea } } +type RetrieveResultCache struct { + result *internalpb.RetrieveResults + size int + cap int +} + +func (c *RetrieveResultCache) Put(result *internalpb.RetrieveResults) { + if c.result == nil { + c.result = result + c.size = proto.Size(result) + return + } + + c.merge(result) +} + +func (c *RetrieveResultCache) Flush() *internalpb.RetrieveResults { + result := c.result + c.result = nil + c.size = 0 + return result +} + +func (c *RetrieveResultCache) Alloc(result *internalpb.RetrieveResults) bool { + return proto.Size(result)+c.size <= c.cap +} + +func (c *RetrieveResultCache) IsFull() bool { + return c.size > c.cap +} + +func (c *RetrieveResultCache) IsEmpty() bool { + return c.size == 0 +} + +func (c *RetrieveResultCache) merge(result *internalpb.RetrieveResults) { + switch result.GetIds().GetIdField().(type) { + case *schemapb.IDs_IntId: + c.result.GetIds().GetIntId().Data = append(c.result.GetIds().GetIntId().GetData(), result.GetIds().GetIntId().GetData()...) + case *schemapb.IDs_StrId: + c.result.GetIds().GetStrId().Data = append(c.result.GetIds().GetStrId().GetData(), result.GetIds().GetStrId().GetData()...) + } + c.result.AllRetrieveCount = c.result.AllRetrieveCount + result.AllRetrieveCount + c.result.CostAggregation = mergeCostAggregation(c.result.GetCostAggregation(), result.GetCostAggregation()) + c.size = proto.Size(c.result) +} + +func mergeCostAggregation(a *internalpb.CostAggregation, b *internalpb.CostAggregation) *internalpb.CostAggregation { + if a == nil { + return b + } + if b == nil { + return a + } + + return &internalpb.CostAggregation{ + ResponseTime: a.GetResponseTime() + b.GetResponseTime(), + ServiceTime: a.GetServiceTime() + b.GetServiceTime(), + TotalNQ: a.GetTotalNQ() + b.GetTotalNQ(), + TotalRelatedDataSize: a.GetTotalRelatedDataSize() + b.GetTotalRelatedDataSize(), + } +} + +// Merge result by size and time. +type ResultCacheServer struct { + srv QueryStreamServer + cache *RetrieveResultCache + mu sync.Mutex +} + +func NewResultCacheServer(srv QueryStreamServer, cap int) *ResultCacheServer { + return &ResultCacheServer{ + srv: srv, + cache: &RetrieveResultCache{cap: cap}, + } +} + +func (s *ResultCacheServer) Send(result *internalpb.RetrieveResults) error { + s.mu.Lock() + defer s.mu.Unlock() + + if !s.cache.Alloc(result) && !s.cache.IsEmpty() { + result := s.cache.Flush() + if err := s.srv.Send(result); err != nil { + return err + } + } + + s.cache.Put(result) + if s.cache.IsFull() { + result := s.cache.Flush() + if err := s.srv.Send(result); err != nil { + return err + } + } + return nil +} + +func (s *ResultCacheServer) Flush() error { + s.mu.Lock() + defer s.mu.Unlock() + + result := s.cache.Flush() + if result == nil { + return nil + } + + if err := s.srv.Send(result); err != nil { + return err + } + return nil +} + +func (s *ResultCacheServer) Context() context.Context { + return s.srv.Context() +} + // TODO LOCAL SERVER AND CLIENT FOR STANDALONE // ONLY FOR TEST type LocalQueryServer struct { diff --git a/internal/util/streamrpc/streamer_test.go b/internal/util/streamrpc/streamer_test.go new file mode 100644 index 0000000000000..de1482adb9c12 --- /dev/null +++ b/internal/util/streamrpc/streamer_test.go @@ -0,0 +1,84 @@ +// Licensed to the LF AI & Data foundation under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package streamrpc + +import ( + "context" + "testing" + + "github.com/stretchr/testify/suite" + + "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" + "github.com/milvus-io/milvus/internal/proto/internalpb" +) + +type ResultCacheServerSuite struct { + suite.Suite +} + +func (s *ResultCacheServerSuite) TestSend() { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + client := NewLocalQueryClient(ctx) + srv := client.CreateServer() + cacheSrv := NewResultCacheServer(srv, 1024) + + err := cacheSrv.Send(&internalpb.RetrieveResults{ + Ids: &schemapb.IDs{ + IdField: &schemapb.IDs_IntId{IntId: &schemapb.LongArray{Data: []int64{1, 2, 3}}}, + }, + }) + s.NoError(err) + s.False(cacheSrv.cache.IsEmpty()) + + err = cacheSrv.Send(&internalpb.RetrieveResults{ + Ids: &schemapb.IDs{ + IdField: &schemapb.IDs_IntId{IntId: &schemapb.LongArray{Data: []int64{4, 5, 6}}}, + }, + }) + s.NoError(err) + + err = cacheSrv.Flush() + s.NoError(err) + s.True(cacheSrv.cache.IsEmpty()) + + msg, err := client.Recv() + s.NoError(err) + // Data: []int64{1,2,3,4,5,6} + s.Equal(6, len(msg.GetIds().GetIntId().GetData())) +} + +func (s *ResultCacheServerSuite) TestMerge() { + s.Nil(mergeCostAggregation(nil, nil)) + + cost := &internalpb.CostAggregation{} + s.Equal(cost, mergeCostAggregation(nil, cost)) + s.Equal(cost, mergeCostAggregation(cost, nil)) + + a := &internalpb.CostAggregation{ResponseTime: 1, ServiceTime: 1, TotalNQ: 1, TotalRelatedDataSize: 1} + b := &internalpb.CostAggregation{ResponseTime: 2, ServiceTime: 2, TotalNQ: 2, TotalRelatedDataSize: 2} + c := mergeCostAggregation(a, b) + s.Equal(int64(3), c.ResponseTime) + s.Equal(int64(3), c.ServiceTime) + s.Equal(int64(3), c.TotalNQ) + s.Equal(int64(3), c.TotalRelatedDataSize) +} + +func TestResultCacheServerSuite(t *testing.T) { + suite.Run(t, new(ResultCacheServerSuite)) +} diff --git a/internal/util/testutil/test_util.go b/internal/util/testutil/test_util.go index d84affd043f7d..4548f0e77ff31 100644 --- a/internal/util/testutil/test_util.go +++ b/internal/util/testutil/test_util.go @@ -484,3 +484,66 @@ func BuildArrayData(schema *schemapb.CollectionSchema, insertData *storage.Inser } return columns, nil } + +func CreateInsertDataRowsForJSON(schema *schemapb.CollectionSchema, insertData *storage.InsertData) ([]map[string]any, error) { + fieldIDToField := lo.KeyBy(schema.GetFields(), func(field *schemapb.FieldSchema) int64 { + return field.GetFieldID() + }) + + rowNum := insertData.GetRowNum() + rows := make([]map[string]any, 0, rowNum) + for i := 0; i < rowNum; i++ { + data := make(map[int64]interface{}) + for fieldID, v := range insertData.Data { + field := fieldIDToField[fieldID] + dataType := field.GetDataType() + elemType := field.GetElementType() + if field.GetAutoID() { + continue + } + switch dataType { + case schemapb.DataType_Array: + switch elemType { + case schemapb.DataType_Bool: + data[fieldID] = v.GetRow(i).(*schemapb.ScalarField).GetBoolData().GetData() + case schemapb.DataType_Int8, schemapb.DataType_Int16, schemapb.DataType_Int32: + data[fieldID] = v.GetRow(i).(*schemapb.ScalarField).GetIntData().GetData() + case schemapb.DataType_Int64: + data[fieldID] = v.GetRow(i).(*schemapb.ScalarField).GetLongData().GetData() + case schemapb.DataType_Float: + data[fieldID] = v.GetRow(i).(*schemapb.ScalarField).GetFloatData().GetData() + case schemapb.DataType_Double: + data[fieldID] = v.GetRow(i).(*schemapb.ScalarField).GetDoubleData().GetData() + case schemapb.DataType_String: + data[fieldID] = v.GetRow(i).(*schemapb.ScalarField).GetStringData().GetData() + } + case schemapb.DataType_JSON: + data[fieldID] = string(v.GetRow(i).([]byte)) + case schemapb.DataType_BinaryVector: + bytes := v.GetRow(i).([]byte) + ints := make([]int, 0, len(bytes)) + for _, b := range bytes { + ints = append(ints, int(b)) + } + data[fieldID] = ints + case schemapb.DataType_Float16Vector: + bytes := v.GetRow(i).([]byte) + data[fieldID] = typeutil.Float16BytesToFloat32Vector(bytes) + case schemapb.DataType_BFloat16Vector: + bytes := v.GetRow(i).([]byte) + data[fieldID] = typeutil.BFloat16BytesToFloat32Vector(bytes) + case schemapb.DataType_SparseFloatVector: + bytes := v.GetRow(i).([]byte) + data[fieldID] = typeutil.SparseFloatBytesToMap(bytes) + default: + data[fieldID] = v.GetRow(i) + } + } + row := lo.MapKeys(data, func(_ any, fieldID int64) string { + return fieldIDToField[fieldID].GetName() + }) + rows = append(rows, row) + } + + return rows, nil +} diff --git a/pkg/common/common.go b/pkg/common/common.go index 2b9ebc4d82084..723f231718abf 100644 --- a/pkg/common/common.go +++ b/pkg/common/common.go @@ -18,6 +18,8 @@ package common import ( "encoding/binary" + "fmt" + "strconv" "strings" "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" @@ -110,6 +112,8 @@ const ( MaxCapacityKey = "max_capacity" DropRatioBuildKey = "drop_ratio_build" + + BitmapCardinalityLimitKey = "bitmap_cardinality_limit" ) // Collection properties key @@ -134,6 +138,10 @@ const ( CollectionDiskQuotaKey = "collection.diskProtection.diskQuota.mb" PartitionDiskQuotaKey = "partition.diskProtection.diskQuota.mb" + + // database level properties + DatabaseReplicaNumber = "database.replica.number" + DatabaseResourceGroups = "database.resource_groups" ) // common properties @@ -205,3 +213,38 @@ const ( // LatestVerision is the magic number for watch latest revision LatestRevision = int64(-1) ) + +func DatabaseLevelReplicaNumber(kvs []*commonpb.KeyValuePair) (int64, error) { + for _, kv := range kvs { + if kv.Key == DatabaseReplicaNumber { + replicaNum, err := strconv.ParseInt(kv.Value, 10, 64) + if err != nil { + return 0, fmt.Errorf("invalid database property: [key=%s] [value=%s]", kv.Key, kv.Value) + } + + return replicaNum, nil + } + } + + return 0, fmt.Errorf("database property not found: %s", DatabaseReplicaNumber) +} + +func DatabaseLevelResourceGroups(kvs []*commonpb.KeyValuePair) ([]string, error) { + for _, kv := range kvs { + if kv.Key == DatabaseResourceGroups { + invalidPropValue := fmt.Errorf("invalid database property: [key=%s] [value=%s]", kv.Key, kv.Value) + if len(kv.Value) == 0 { + return nil, invalidPropValue + } + + rgs := strings.Split(kv.Value, ",") + if len(rgs) == 0 { + return nil, invalidPropValue + } + + return rgs, nil + } + } + + return nil, fmt.Errorf("database property not found: %s", DatabaseResourceGroups) +} diff --git a/pkg/common/common_test.go b/pkg/common/common_test.go index 7228b1b6ab8e8..2dc31e33fb16a 100644 --- a/pkg/common/common_test.go +++ b/pkg/common/common_test.go @@ -1,9 +1,12 @@ package common import ( + "strings" "testing" "github.com/stretchr/testify/assert" + + "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" ) func TestIsSystemField(t *testing.T) { @@ -38,3 +41,50 @@ func TestIsSystemField(t *testing.T) { }) } } + +func TestDatabaseProperties(t *testing.T) { + props := []*commonpb.KeyValuePair{ + { + Key: DatabaseReplicaNumber, + Value: "3", + }, + { + Key: DatabaseResourceGroups, + Value: strings.Join([]string{"rg1", "rg2"}, ","), + }, + } + + replicaNum, err := DatabaseLevelReplicaNumber(props) + assert.NoError(t, err) + assert.Equal(t, int64(3), replicaNum) + + rgs, err := DatabaseLevelResourceGroups(props) + assert.NoError(t, err) + assert.Contains(t, rgs, "rg1") + assert.Contains(t, rgs, "rg2") + + // test prop not found + _, err = DatabaseLevelReplicaNumber(nil) + assert.Error(t, err) + + _, err = DatabaseLevelResourceGroups(nil) + assert.Error(t, err) + + // test invalid prop value + + props = []*commonpb.KeyValuePair{ + { + Key: DatabaseReplicaNumber, + Value: "xxxx", + }, + { + Key: DatabaseResourceGroups, + Value: "", + }, + } + _, err = DatabaseLevelReplicaNumber(props) + assert.Error(t, err) + + _, err = DatabaseLevelResourceGroups(props) + assert.Error(t, err) +} diff --git a/pkg/config/env_source.go b/pkg/config/env_source.go index abef8bb821cf0..b36ee5917b176 100644 --- a/pkg/config/env_source.go +++ b/pkg/config/env_source.go @@ -78,6 +78,9 @@ func (es EnvSource) GetSourceName() string { return "EnvironmentSource" } +func (es EnvSource) SetManager(m ConfigManager) { +} + func (es EnvSource) SetEventHandler(eh EventHandler) { } diff --git a/pkg/config/etcd_source.go b/pkg/config/etcd_source.go index 9c87d0fc1c968..29f49278d76b9 100644 --- a/pkg/config/etcd_source.go +++ b/pkg/config/etcd_source.go @@ -24,6 +24,7 @@ import ( "sync" "time" + "github.com/samber/lo" clientv3 "go.etcd.io/etcd/client/v3" "go.uber.org/zap" @@ -44,6 +45,7 @@ type EtcdSource struct { updateMu sync.Mutex configRefresher *refresher + manager ConfigManager } func NewEtcdSource(etcdInfo *EtcdInfo) (*EtcdSource, error) { @@ -115,6 +117,12 @@ func (es *EtcdSource) Close() { es.configRefresher.stop() } +func (es *EtcdSource) SetManager(m ConfigManager) { + es.Lock() + defer es.Unlock() + es.manager = m +} + func (es *EtcdSource) SetEventHandler(eh EventHandler) { es.configRefresher.SetEventHandler(eh) } @@ -172,6 +180,9 @@ func (es *EtcdSource) update(configs map[string]string) error { return err } es.currentConfigs = configs + if es.manager != nil { + es.manager.EvictCacheValueByFormat(lo.Map(events, func(event *Event, _ int) string { return event.Key })...) + } es.Unlock() es.configRefresher.fireEvents(events...) diff --git a/pkg/config/file_source.go b/pkg/config/file_source.go index 6c1ba11bf18d8..9a1ab3f863e05 100644 --- a/pkg/config/file_source.go +++ b/pkg/config/file_source.go @@ -22,6 +22,7 @@ import ( "sync" "github.com/cockroachdb/errors" + "github.com/samber/lo" "github.com/spf13/cast" "github.com/spf13/viper" "go.uber.org/zap" @@ -36,6 +37,7 @@ type FileSource struct { updateMu sync.Mutex configRefresher *refresher + manager ConfigManager } func NewFileSource(fileInfo *FileInfo) *FileSource { @@ -91,6 +93,12 @@ func (fs *FileSource) Close() { fs.configRefresher.stop() } +func (fs *FileSource) SetManager(m ConfigManager) { + fs.Lock() + defer fs.Unlock() + fs.manager = m +} + func (fs *FileSource) SetEventHandler(eh EventHandler) { fs.RWMutex.Lock() defer fs.RWMutex.Unlock() @@ -173,6 +181,9 @@ func (fs *FileSource) update(configs map[string]string) error { return err } fs.configs = configs + if fs.manager != nil { + fs.manager.EvictCacheValueByFormat(lo.Map(events, func(event *Event, _ int) string { return event.Key })...) + } fs.Unlock() fs.configRefresher.fireEvents(events...) diff --git a/pkg/config/manager.go b/pkg/config/manager.go index b33993296b0b1..7e8c100255a1f 100644 --- a/pkg/config/manager.go +++ b/pkg/config/manager.go @@ -116,6 +116,16 @@ func (m *Manager) EvictCachedValue(key string) { m.configCache.Remove(key) } +func (m *Manager) EvictCacheValueByFormat(keys ...string) { + set := typeutil.NewSet(keys...) + m.configCache.Range(func(key string, value interface{}) bool { + if set.Contain(formatKey(key)) { + m.configCache.Remove(key) + } + return true + }) +} + func (m *Manager) GetConfig(key string) (string, error) { realKey := formatKey(key) v, ok := m.overlays.Get(realKey) @@ -210,6 +220,7 @@ func (m *Manager) AddSource(source Source) error { return err } + source.SetManager(m) m.sources.Insert(sourceName, source) err := m.pullSourceConfigs(sourceName) diff --git a/pkg/config/manager_test.go b/pkg/config/manager_test.go index ef4c2290abeed..b955071661d6e 100644 --- a/pkg/config/manager_test.go +++ b/pkg/config/manager_test.go @@ -270,6 +270,9 @@ func (ErrSource) GetPriority() int { return 2 } +func (ErrSource) SetManager(m ConfigManager) { +} + // GetSourceName implements Source func (ErrSource) GetSourceName() string { return "ErrSource" diff --git a/pkg/config/source.go b/pkg/config/source.go index 6a2cfbae0437a..61a22e320feea 100644 --- a/pkg/config/source.go +++ b/pkg/config/source.go @@ -23,12 +23,17 @@ const ( LowPriority = NormalPriority + 10 ) +type ConfigManager interface { + EvictCacheValueByFormat(keys ...string) +} + type Source interface { GetConfigurations() (map[string]string, error) GetConfigurationByKey(string) (string, error) GetPriority() int GetSourceName() string SetEventHandler(eh EventHandler) + SetManager(m ConfigManager) UpdateOptions(opt Options) Close() } diff --git a/pkg/go.mod b/pkg/go.mod index 1096ab575a204..8dc08d622c50f 100644 --- a/pkg/go.mod +++ b/pkg/go.mod @@ -1,6 +1,6 @@ module github.com/milvus-io/milvus/pkg -go 1.20 +go 1.21 require ( github.com/apache/pulsar-client-go v0.6.1-0.20210728062540-29414db801a7 diff --git a/pkg/metrics/datacoord_metrics.go b/pkg/metrics/datacoord_metrics.go index 0fb96d9cff305..35c27256effe9 100644 --- a/pkg/metrics/datacoord_metrics.go +++ b/pkg/metrics/datacoord_metrics.go @@ -21,7 +21,6 @@ import ( "github.com/prometheus/client_golang/prometheus" - "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" "github.com/milvus-io/milvus/pkg/util/typeutil" ) @@ -341,19 +340,26 @@ func CleanupDataCoordSegmentMetrics(dbName string, collectionID int64, segmentID }) } -func CleanupDataCoordNumStoredRows(dbName string, collectionID int64) { - for _, state := range commonpb.SegmentState_name { - DataCoordNumStoredRows.Delete(prometheus.Labels{ - databaseLabelName: dbName, - collectionIDLabelName: fmt.Sprint(collectionID), - segmentStateLabelName: fmt.Sprint(state), - }) - } -} - -func CleanupDataCoordBulkInsertVectors(dbName string, collectionID int64) { - DataCoordBulkVectors.Delete(prometheus.Labels{ - databaseLabelName: dbName, +func CleanupDataCoordWithCollectionID(collectionID int64) { + IndexTaskNum.DeletePartialMatch(prometheus.Labels{ + collectionIDLabelName: fmt.Sprint(collectionID), + }) + DataCoordNumStoredRows.DeletePartialMatch(prometheus.Labels{ + collectionIDLabelName: fmt.Sprint(collectionID), + }) + DataCoordBulkVectors.DeletePartialMatch(prometheus.Labels{ + collectionIDLabelName: fmt.Sprint(collectionID), + }) + DataCoordSegmentBinLogFileCount.DeletePartialMatch(prometheus.Labels{ + collectionIDLabelName: fmt.Sprint(collectionID), + }) + DataCoordStoredBinlogSize.DeletePartialMatch(prometheus.Labels{ + collectionIDLabelName: fmt.Sprint(collectionID), + }) + DataCoordStoredIndexFilesSize.DeletePartialMatch(prometheus.Labels{ + collectionIDLabelName: fmt.Sprint(collectionID), + }) + DataCoordSizeStoredL0Segment.Delete(prometheus.Labels{ collectionIDLabelName: fmt.Sprint(collectionID), }) } diff --git a/pkg/metrics/querycoord_metrics.go b/pkg/metrics/querycoord_metrics.go index 0bbc196a95a5a..b8a1301a09478 100644 --- a/pkg/metrics/querycoord_metrics.go +++ b/pkg/metrics/querycoord_metrics.go @@ -17,6 +17,8 @@ package metrics import ( + "fmt" + "github.com/prometheus/client_golang/prometheus" "github.com/milvus-io/milvus/pkg/util/typeutil" @@ -129,7 +131,7 @@ var ( Name: "task_latency", Help: "latency of all kind of task in query coord scheduler scheduler", Buckets: longTaskBuckets, - }, []string{taskTypeLabel, channelNameLabelName}) + }, []string{collectionIDLabelName, taskTypeLabel, channelNameLabelName}) ) // RegisterQueryCoord registers QueryCoord metrics @@ -145,3 +147,9 @@ func RegisterQueryCoord(registry *prometheus.Registry) { registry.MustRegister(QueryCoordCurrentTargetCheckpointUnixSeconds) registry.MustRegister(QueryCoordTaskLatency) } + +func CleanQueryCoordMetricsWithCollectionID(collectionID int64) { + QueryCoordTaskLatency.DeletePartialMatch(prometheus.Labels{ + collectionIDLabelName: fmt.Sprint(collectionID), + }) +} diff --git a/pkg/metrics/querynode_metrics.go b/pkg/metrics/querynode_metrics.go index d64c3cbb29869..2aa8e13c8a0da 100644 --- a/pkg/metrics/querynode_metrics.go +++ b/pkg/metrics/querynode_metrics.go @@ -370,7 +370,6 @@ var ( collectionIDLabelName, partitionIDLabelName, segmentStateLabelName, - indexCountLabelName, }) QueryNodeEntitiesSize = prometheus.NewGaugeVec( diff --git a/pkg/mq/msgdispatcher/dispatcher.go b/pkg/mq/msgdispatcher/dispatcher.go index ee552046ddc08..4d0ab3e2c606e 100644 --- a/pkg/mq/msgdispatcher/dispatcher.go +++ b/pkg/mq/msgdispatcher/dispatcher.go @@ -103,7 +103,7 @@ func NewDispatcher(ctx context.Context, return nil, err } - err = stream.Seek(ctx, []*Pos{position}) + err = stream.Seek(ctx, []*Pos{position}, false) if err != nil { stream.Close() log.Error("seek failed", zap.Error(err)) diff --git a/pkg/mq/msgstream/factory_stream_test.go b/pkg/mq/msgstream/factory_stream_test.go index cb7ff8702cd08..d07e74cdfc0f1 100644 --- a/pkg/mq/msgstream/factory_stream_test.go +++ b/pkg/mq/msgstream/factory_stream_test.go @@ -766,7 +766,7 @@ func createAndSeekConsumer(ctx context.Context, t *testing.T, newer streamNewer, consumer, err := newer(ctx) assert.NoError(t, err) consumer.AsConsumer(context.Background(), channels, funcutil.RandomString(8), mqwrapper.SubscriptionPositionUnknown) - err = consumer.Seek(context.Background(), seekPositions) + err = consumer.Seek(context.Background(), seekPositions, false) assert.NoError(t, err) return consumer } diff --git a/pkg/mq/msgstream/mock_msgstream.go b/pkg/mq/msgstream/mock_msgstream.go index e97b0e30d91a5..adbf233246bf3 100644 --- a/pkg/mq/msgstream/mock_msgstream.go +++ b/pkg/mq/msgstream/mock_msgstream.go @@ -44,10 +44,10 @@ type MockMsgStream_AsConsumer_Call struct { } // AsConsumer is a helper method to define mock.On call -// - ctx context.Context -// - channels []string -// - subName string -// - position mqwrapper.SubscriptionInitialPosition +// - ctx context.Context +// - channels []string +// - subName string +// - position mqwrapper.SubscriptionInitialPosition func (_e *MockMsgStream_Expecter) AsConsumer(ctx interface{}, channels interface{}, subName interface{}, position interface{}) *MockMsgStream_AsConsumer_Call { return &MockMsgStream_AsConsumer_Call{Call: _e.mock.On("AsConsumer", ctx, channels, subName, position)} } @@ -80,7 +80,7 @@ type MockMsgStream_AsProducer_Call struct { } // AsProducer is a helper method to define mock.On call -// - channels []string +// - channels []string func (_e *MockMsgStream_Expecter) AsProducer(channels interface{}) *MockMsgStream_AsProducer_Call { return &MockMsgStream_AsProducer_Call{Call: _e.mock.On("AsProducer", channels)} } @@ -134,7 +134,7 @@ type MockMsgStream_Broadcast_Call struct { } // Broadcast is a helper method to define mock.On call -// - _a0 *MsgPack +// - _a0 *MsgPack func (_e *MockMsgStream_Expecter) Broadcast(_a0 interface{}) *MockMsgStream_Broadcast_Call { return &MockMsgStream_Broadcast_Call{Call: _e.mock.On("Broadcast", _a0)} } @@ -219,7 +219,7 @@ type MockMsgStream_CheckTopicValid_Call struct { } // CheckTopicValid is a helper method to define mock.On call -// - channel string +// - channel string func (_e *MockMsgStream_Expecter) CheckTopicValid(channel interface{}) *MockMsgStream_CheckTopicValid_Call { return &MockMsgStream_CheckTopicValid_Call{Call: _e.mock.On("CheckTopicValid", channel)} } @@ -284,7 +284,7 @@ type MockMsgStream_EnableProduce_Call struct { } // EnableProduce is a helper method to define mock.On call -// - can bool +// - can bool func (_e *MockMsgStream_Expecter) EnableProduce(can interface{}) *MockMsgStream_EnableProduce_Call { return &MockMsgStream_EnableProduce_Call{Call: _e.mock.On("EnableProduce", can)} } @@ -338,7 +338,7 @@ type MockMsgStream_GetLatestMsgID_Call struct { } // GetLatestMsgID is a helper method to define mock.On call -// - channel string +// - channel string func (_e *MockMsgStream_Expecter) GetLatestMsgID(channel interface{}) *MockMsgStream_GetLatestMsgID_Call { return &MockMsgStream_GetLatestMsgID_Call{Call: _e.mock.On("GetLatestMsgID", channel)} } @@ -423,7 +423,7 @@ type MockMsgStream_Produce_Call struct { } // Produce is a helper method to define mock.On call -// - _a0 *MsgPack +// - _a0 *MsgPack func (_e *MockMsgStream_Expecter) Produce(_a0 interface{}) *MockMsgStream_Produce_Call { return &MockMsgStream_Produce_Call{Call: _e.mock.On("Produce", _a0)} } @@ -445,13 +445,13 @@ func (_c *MockMsgStream_Produce_Call) RunAndReturn(run func(*MsgPack) error) *Mo return _c } -// Seek provides a mock function with given fields: ctx, offset -func (_m *MockMsgStream) Seek(ctx context.Context, offset []*msgpb.MsgPosition) error { - ret := _m.Called(ctx, offset) +// Seek provides a mock function with given fields: ctx, msgPositions, includeCurrentMsg +func (_m *MockMsgStream) Seek(ctx context.Context, msgPositions []*msgpb.MsgPosition, includeCurrentMsg bool) error { + ret := _m.Called(ctx, msgPositions, includeCurrentMsg) var r0 error - if rf, ok := ret.Get(0).(func(context.Context, []*msgpb.MsgPosition) error); ok { - r0 = rf(ctx, offset) + if rf, ok := ret.Get(0).(func(context.Context, []*msgpb.MsgPosition, bool) error); ok { + r0 = rf(ctx, msgPositions, includeCurrentMsg) } else { r0 = ret.Error(0) } @@ -465,15 +465,16 @@ type MockMsgStream_Seek_Call struct { } // Seek is a helper method to define mock.On call -// - ctx context.Context -// - offset []*msgpb.MsgPosition -func (_e *MockMsgStream_Expecter) Seek(ctx interface{}, offset interface{}) *MockMsgStream_Seek_Call { - return &MockMsgStream_Seek_Call{Call: _e.mock.On("Seek", ctx, offset)} +// - ctx context.Context +// - msgPositions []*msgpb.MsgPosition +// - includeCurrentMsg bool +func (_e *MockMsgStream_Expecter) Seek(ctx interface{}, msgPositions interface{}, includeCurrentMsg interface{}) *MockMsgStream_Seek_Call { + return &MockMsgStream_Seek_Call{Call: _e.mock.On("Seek", ctx, msgPositions, includeCurrentMsg)} } -func (_c *MockMsgStream_Seek_Call) Run(run func(ctx context.Context, offset []*msgpb.MsgPosition)) *MockMsgStream_Seek_Call { +func (_c *MockMsgStream_Seek_Call) Run(run func(ctx context.Context, msgPositions []*msgpb.MsgPosition, includeCurrentMsg bool)) *MockMsgStream_Seek_Call { _c.Call.Run(func(args mock.Arguments) { - run(args[0].(context.Context), args[1].([]*msgpb.MsgPosition)) + run(args[0].(context.Context), args[1].([]*msgpb.MsgPosition), args[2].(bool)) }) return _c } @@ -483,7 +484,7 @@ func (_c *MockMsgStream_Seek_Call) Return(_a0 error) *MockMsgStream_Seek_Call { return _c } -func (_c *MockMsgStream_Seek_Call) RunAndReturn(run func(context.Context, []*msgpb.MsgPosition) error) *MockMsgStream_Seek_Call { +func (_c *MockMsgStream_Seek_Call) RunAndReturn(run func(context.Context, []*msgpb.MsgPosition, bool) error) *MockMsgStream_Seek_Call { _c.Call.Return(run) return _c } @@ -499,7 +500,7 @@ type MockMsgStream_SetRepackFunc_Call struct { } // SetRepackFunc is a helper method to define mock.On call -// - repackFunc RepackFunc +// - repackFunc RepackFunc func (_e *MockMsgStream_Expecter) SetRepackFunc(repackFunc interface{}) *MockMsgStream_SetRepackFunc_Call { return &MockMsgStream_SetRepackFunc_Call{Call: _e.mock.On("SetRepackFunc", repackFunc)} } diff --git a/pkg/mq/msgstream/mq_kafka_msgstream_test.go b/pkg/mq/msgstream/mq_kafka_msgstream_test.go index 468d4e054a96f..fe39f8f082e2d 100644 --- a/pkg/mq/msgstream/mq_kafka_msgstream_test.go +++ b/pkg/mq/msgstream/mq_kafka_msgstream_test.go @@ -145,7 +145,7 @@ func TestStream_KafkaMsgStream_SeekToLast(t *testing.T) { defer outputStream2.Close() assert.NoError(t, err) - err = outputStream2.Seek(ctx, []*msgpb.MsgPosition{seekPosition}) + err = outputStream2.Seek(ctx, []*msgpb.MsgPosition{seekPosition}, false) assert.NoError(t, err) cnt := 0 @@ -482,6 +482,6 @@ func getKafkaTtOutputStreamAndSeek(ctx context.Context, kafkaAddress string, pos consumerName = append(consumerName, c.ChannelName) } outputStream.AsConsumer(context.Background(), consumerName, funcutil.RandomString(8), mqwrapper.SubscriptionPositionUnknown) - outputStream.Seek(context.Background(), positions) + outputStream.Seek(context.Background(), positions, false) return outputStream } diff --git a/pkg/mq/msgstream/mq_msgstream.go b/pkg/mq/msgstream/mq_msgstream.go index 41f6b260debc6..a93c9962f414d 100644 --- a/pkg/mq/msgstream/mq_msgstream.go +++ b/pkg/mq/msgstream/mq_msgstream.go @@ -473,7 +473,7 @@ func (ms *mqMsgStream) Chan() <-chan *MsgPack { // Seek reset the subscription associated with this consumer to a specific position, the seek position is exclusive // User has to ensure mq_msgstream is not closed before seek, and the seek position is already written. -func (ms *mqMsgStream) Seek(ctx context.Context, msgPositions []*msgpb.MsgPosition) error { +func (ms *mqMsgStream) Seek(ctx context.Context, msgPositions []*MsgPosition, includeCurrentMsg bool) error { for _, mp := range msgPositions { consumer, ok := ms.consumers[mp.ChannelName] if !ok { @@ -482,14 +482,19 @@ func (ms *mqMsgStream) Seek(ctx context.Context, msgPositions []*msgpb.MsgPositi messageID, err := ms.client.BytesToMsgID(mp.MsgID) if err != nil { if paramtable.Get().MQCfg.IgnoreBadPosition.GetAsBool() { - log.Ctx(ctx).Warn("Ignoring bad message id", zap.Error(err)) - continue + // try to use latest message ID first + messageID, err = consumer.GetLatestMsgID() + if err != nil { + log.Ctx(ctx).Warn("Ignoring bad message id", zap.Error(err)) + continue + } + } else { + return err } - return err } - log.Info("MsgStream seek begin", zap.String("channel", mp.ChannelName), zap.Any("MessageID", mp.MsgID)) - err = consumer.Seek(messageID, false) + log.Info("MsgStream seek begin", zap.String("channel", mp.ChannelName), zap.Any("MessageID", mp.MsgID), zap.Bool("includeCurrentMsg", includeCurrentMsg)) + err = consumer.Seek(messageID, includeCurrentMsg) if err != nil { log.Warn("Failed to seek", zap.String("channel", mp.ChannelName), zap.Error(err)) return err @@ -835,7 +840,7 @@ func (ms *MqTtMsgStream) allChanReachSameTtMsg(chanTtMsgSync map[mqwrapper.Consu } // Seek to the specified position -func (ms *MqTtMsgStream) Seek(ctx context.Context, msgPositions []*msgpb.MsgPosition) error { +func (ms *MqTtMsgStream) Seek(ctx context.Context, msgPositions []*MsgPosition, includeCurrentMsg bool) error { var consumer mqwrapper.Consumer var mp *MsgPosition var err error @@ -853,11 +858,17 @@ func (ms *MqTtMsgStream) Seek(ctx context.Context, msgPositions []*msgpb.MsgPosi seekMsgID, err := ms.client.BytesToMsgID(mp.MsgID) if err != nil { if paramtable.Get().MQCfg.IgnoreBadPosition.GetAsBool() { - log.Ctx(ctx).Warn("Ignoring bad message id", zap.Error(err)) - return false, nil + // try to use latest message ID first + seekMsgID, err = consumer.GetLatestMsgID() + if err != nil { + log.Ctx(ctx).Warn("Ignoring bad message id", zap.Error(err)) + return false, nil + } + } else { + return false, err } - return false, err } + log.Info("MsgStream begin to seek start msg: ", zap.String("channel", mp.ChannelName), zap.Any("MessageID", mp.MsgID)) err = consumer.Seek(seekMsgID, true) if err != nil { diff --git a/pkg/mq/msgstream/mq_msgstream_test.go b/pkg/mq/msgstream/mq_msgstream_test.go index 8705eddf13499..ee4a5d57ffacc 100644 --- a/pkg/mq/msgstream/mq_msgstream_test.go +++ b/pkg/mq/msgstream/mq_msgstream_test.go @@ -517,7 +517,7 @@ func TestStream_PulsarMsgStream_SeekToLast(t *testing.T) { defer outputStream2.Close() assert.NoError(t, err) - err = outputStream2.Seek(ctx, []*msgpb.MsgPosition{seekPosition}) + err = outputStream2.Seek(ctx, []*msgpb.MsgPosition{seekPosition}, false) assert.NoError(t, err) cnt := 0 @@ -946,7 +946,7 @@ func TestStream_MqMsgStream_Seek(t *testing.T) { pulsarClient, _ := pulsarwrapper.NewClient(DefaultPulsarTenant, DefaultPulsarNamespace, pulsar.ClientOptions{URL: pulsarAddress}) outputStream2, _ := NewMqMsgStream(ctx, 100, 100, pulsarClient, factory.NewUnmarshalDispatcher()) outputStream2.AsConsumer(ctx, consumerChannels, consumerSubName, mqwrapper.SubscriptionPositionEarliest) - outputStream2.Seek(ctx, []*msgpb.MsgPosition{seekPosition}) + outputStream2.Seek(ctx, []*msgpb.MsgPosition{seekPosition}, false) for i := 6; i < 10; i++ { result := consumer(ctx, outputStream2) @@ -1001,7 +1001,7 @@ func TestStream_MqMsgStream_SeekInvalidMessage(t *testing.T) { }, } - err = outputStream2.Seek(ctx, p) + err = outputStream2.Seek(ctx, p, false) assert.NoError(t, err) for i := 10; i < 20; i++ { @@ -1070,15 +1070,15 @@ func TestSTream_MqMsgStream_SeekBadMessageID(t *testing.T) { } paramtable.Get().Save(paramtable.Get().MQCfg.IgnoreBadPosition.Key, "false") - err = outputStream2.Seek(ctx, p) + err = outputStream2.Seek(ctx, p, false) assert.Error(t, err) - err = outputStream3.Seek(ctx, p) + err = outputStream3.Seek(ctx, p, false) assert.Error(t, err) paramtable.Get().Save(paramtable.Get().MQCfg.IgnoreBadPosition.Key, "true") - err = outputStream2.Seek(ctx, p) + err = outputStream2.Seek(ctx, p, false) assert.NoError(t, err) - err = outputStream3.Seek(ctx, p) + err = outputStream3.Seek(ctx, p, false) assert.NoError(t, err) } @@ -1466,7 +1466,7 @@ func getPulsarTtOutputStreamAndSeek(ctx context.Context, pulsarAddress string, p consumerName = append(consumerName, c.ChannelName) } outputStream.AsConsumer(context.Background(), consumerName, funcutil.RandomString(8), mqwrapper.SubscriptionPositionUnknown) - outputStream.Seek(context.Background(), positions) + outputStream.Seek(context.Background(), positions, false) return outputStream } diff --git a/pkg/mq/msgstream/msgstream.go b/pkg/mq/msgstream/msgstream.go index 184d44967d098..62f8c8737e026 100644 --- a/pkg/mq/msgstream/msgstream.go +++ b/pkg/mq/msgstream/msgstream.go @@ -63,7 +63,9 @@ type MsgStream interface { AsConsumer(ctx context.Context, channels []string, subName string, position mqwrapper.SubscriptionInitialPosition) error Chan() <-chan *MsgPack - Seek(ctx context.Context, offset []*MsgPosition) error + // Seek consume message from the specified position + // includeCurrentMsg indicates whether to consume the current message, and in the milvus system, it should be always false + Seek(ctx context.Context, msgPositions []*MsgPosition, includeCurrentMsg bool) error GetLatestMsgID(channel string) (MessageID, error) CheckTopicValid(channel string) error diff --git a/pkg/tracer/tracer.go b/pkg/tracer/tracer.go index bb675f6f48a87..7f18634064df4 100644 --- a/pkg/tracer/tracer.go +++ b/pkg/tracer/tracer.go @@ -29,7 +29,6 @@ import ( "go.opentelemetry.io/otel/sdk/resource" sdk "go.opentelemetry.io/otel/sdk/trace" semconv "go.opentelemetry.io/otel/semconv/v1.4.0" - "go.opentelemetry.io/otel/trace" "go.uber.org/zap" "github.com/milvus-io/milvus/pkg/log" @@ -63,11 +62,6 @@ func CloseTracerProvider(ctx context.Context) error { } func SetTracerProvider(exp sdk.SpanExporter, traceIDRatio float64) { - if exp == nil { - otel.SetTracerProvider(trace.NewNoopTracerProvider()) - return - } - tp := sdk.NewTracerProvider( sdk.WithBatcher(exp), sdk.WithResource(resource.NewWithAttributes( diff --git a/pkg/util/constant.go b/pkg/util/constant.go index 36e52d83dce45..a416affa97564 100644 --- a/pkg/util/constant.go +++ b/pkg/util/constant.go @@ -56,6 +56,7 @@ const ( DefaultDBName = "default" DefaultDBID = int64(1) NonDBID = int64(0) + InvalidDBID = int64(-1) PrivilegeWord = "Privilege" AnyWord = "*" diff --git a/pkg/util/indexparamcheck/bitmap_checker_test.go b/pkg/util/indexparamcheck/bitmap_checker_test.go index 4b0cca2bf3309..aa1baa8963433 100644 --- a/pkg/util/indexparamcheck/bitmap_checker_test.go +++ b/pkg/util/indexparamcheck/bitmap_checker_test.go @@ -11,7 +11,7 @@ import ( func Test_BitmapIndexChecker(t *testing.T) { c := newBITMAPChecker() - assert.NoError(t, c.CheckTrain(map[string]string{})) + assert.NoError(t, c.CheckTrain(map[string]string{"bitmap_cardinality_limit": "100"})) assert.NoError(t, c.CheckValidDataType(schemapb.DataType_Int64)) assert.NoError(t, c.CheckValidDataType(schemapb.DataType_Float)) @@ -19,4 +19,6 @@ func Test_BitmapIndexChecker(t *testing.T) { assert.Error(t, c.CheckValidDataType(schemapb.DataType_JSON)) assert.Error(t, c.CheckValidDataType(schemapb.DataType_Array)) + assert.Error(t, c.CheckTrain(map[string]string{})) + assert.Error(t, c.CheckTrain(map[string]string{"bitmap_cardinality_limit": "0"})) } diff --git a/pkg/util/indexparamcheck/bitmap_index_checker.go b/pkg/util/indexparamcheck/bitmap_index_checker.go index da90a7d06db3a..d41267987d860 100644 --- a/pkg/util/indexparamcheck/bitmap_index_checker.go +++ b/pkg/util/indexparamcheck/bitmap_index_checker.go @@ -2,17 +2,21 @@ package indexparamcheck import ( "fmt" + "math" "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" + "github.com/milvus-io/milvus/pkg/common" "github.com/milvus-io/milvus/pkg/util/typeutil" ) -// STLSORTChecker checks if a STL_SORT index can be built. type BITMAPChecker struct { scalarIndexChecker } func (c *BITMAPChecker) CheckTrain(params map[string]string) error { + if !CheckIntByRange(params, common.BitmapCardinalityLimitKey, 1, math.MaxInt) { + return fmt.Errorf("failed to check bitmap cardinality limit, should be larger than 0 and smaller than math.MaxInt") + } return c.scalarIndexChecker.CheckTrain(params) } diff --git a/pkg/util/indexparamcheck/index_type.go b/pkg/util/indexparamcheck/index_type.go index e752057ea4e85..a20db560bfdb0 100644 --- a/pkg/util/indexparamcheck/index_type.go +++ b/pkg/util/indexparamcheck/index_type.go @@ -57,7 +57,9 @@ func IsMmapSupported(indexType IndexType) bool { indexType == IndexFaissBinIDMap || indexType == IndexFaissBinIvfFlat || indexType == IndexHNSW || - indexType == IndexScaNN + indexType == IndexScaNN || + indexType == IndexSparseInverted || + indexType == IndexSparseWand } func IsDiskIndex(indexType IndexType) bool { diff --git a/pkg/util/indexparamcheck/inverted_checker.go b/pkg/util/indexparamcheck/inverted_checker.go index b15549cd4b7a6..dfc24127d3569 100644 --- a/pkg/util/indexparamcheck/inverted_checker.go +++ b/pkg/util/indexparamcheck/inverted_checker.go @@ -17,7 +17,8 @@ func (c *INVERTEDChecker) CheckTrain(params map[string]string) error { } func (c *INVERTEDChecker) CheckValidDataType(dType schemapb.DataType) error { - if !typeutil.IsBoolType(dType) && !typeutil.IsArithmetic(dType) && !typeutil.IsStringType(dType) { + if !typeutil.IsBoolType(dType) && !typeutil.IsArithmetic(dType) && !typeutil.IsStringType(dType) && + !typeutil.IsArrayType(dType) { return fmt.Errorf("INVERTED are not supported on %s field", dType.String()) } return nil diff --git a/pkg/util/indexparamcheck/inverted_checker_test.go b/pkg/util/indexparamcheck/inverted_checker_test.go index afe41f89f1193..7a31290061490 100644 --- a/pkg/util/indexparamcheck/inverted_checker_test.go +++ b/pkg/util/indexparamcheck/inverted_checker_test.go @@ -18,8 +18,8 @@ func Test_INVERTEDIndexChecker(t *testing.T) { assert.NoError(t, c.CheckValidDataType(schemapb.DataType_Bool)) assert.NoError(t, c.CheckValidDataType(schemapb.DataType_Int64)) assert.NoError(t, c.CheckValidDataType(schemapb.DataType_Float)) + assert.NoError(t, c.CheckValidDataType(schemapb.DataType_Array)) assert.Error(t, c.CheckValidDataType(schemapb.DataType_JSON)) - assert.Error(t, c.CheckValidDataType(schemapb.DataType_Array)) assert.Error(t, c.CheckValidDataType(schemapb.DataType_FloatVector)) } diff --git a/pkg/util/paramtable/component_param.go b/pkg/util/paramtable/component_param.go index 117757815a39b..3ab8a846d994a 100644 --- a/pkg/util/paramtable/component_param.go +++ b/pkg/util/paramtable/component_param.go @@ -42,13 +42,14 @@ const ( DefaultSessionTTL = 30 // s DefaultSessionRetryTimes = 30 - DefaultMaxDegree = 56 - DefaultSearchListSize = 100 - DefaultPQCodeBudgetGBRatio = 0.125 - DefaultBuildNumThreadsRatio = 1.0 - DefaultSearchCacheBudgetGBRatio = 0.10 - DefaultLoadNumThreadRatio = 8.0 - DefaultBeamWidthRatio = 4.0 + DefaultMaxDegree = 56 + DefaultSearchListSize = 100 + DefaultPQCodeBudgetGBRatio = 0.125 + DefaultBuildNumThreadsRatio = 1.0 + DefaultSearchCacheBudgetGBRatio = 0.10 + DefaultLoadNumThreadRatio = 8.0 + DefaultBeamWidthRatio = 4.0 + DefaultBitmapIndexCardinalityBound = 500 ) // ComponentParam is used to quickly and easily access all components' configurations. @@ -212,6 +213,7 @@ type commonConfig struct { BeamWidthRatio ParamItem `refreshable:"true"` GracefulTime ParamItem `refreshable:"true"` GracefulStopTimeout ParamItem `refreshable:"true"` + BitmapIndexCardinalityBound ParamItem `refreshable:"false"` StorageType ParamItem `refreshable:"false"` SimdType ParamItem `refreshable:"false"` @@ -243,6 +245,7 @@ type commonConfig struct { TTMsgEnabled ParamItem `refreshable:"true"` TraceLogMode ParamItem `refreshable:"true"` BloomFilterSize ParamItem `refreshable:"true"` + BloomFilterType ParamItem `refreshable:"true"` MaxBloomFalsePositive ParamItem `refreshable:"true"` PanicWhenPluginFail ParamItem `refreshable:"false"` } @@ -443,6 +446,14 @@ This configuration is only used by querynode and indexnode, it selects CPU instr } p.IndexSliceSize.Init(base.mgr) + p.BitmapIndexCardinalityBound = ParamItem{ + Key: "common.bitmapIndexCardinalityBound", + Version: "2.5.0", + DefaultValue: strconv.Itoa(DefaultBitmapIndexCardinalityBound), + Export: true, + } + p.BitmapIndexCardinalityBound.Init(base.mgr) + p.EnableMaterializedView = ParamItem{ Key: "common.materializedView.enabled", Version: "2.5.0", @@ -725,10 +736,19 @@ like the old password verification when updating the credential`, } p.BloomFilterSize.Init(base.mgr) + p.BloomFilterType = ParamItem{ + Key: "common.bloomFilterType", + Version: "2.4.3", + DefaultValue: "BlockedBloomFilter", + Doc: "bloom filter type, support BasicBloomFilter and BlockedBloomFilter", + Export: true, + } + p.BloomFilterType.Init(base.mgr) + p.MaxBloomFalsePositive = ParamItem{ Key: "common.maxBloomFalsePositive", Version: "2.3.2", - DefaultValue: "0.05", + DefaultValue: "0.001", Doc: "max false positive rate for bloom filter", Export: true, } @@ -767,11 +787,12 @@ func (t *gpuConfig) init(base *BaseTable) { } type traceConfig struct { - Exporter ParamItem `refreshable:"false"` - SampleFraction ParamItem `refreshable:"false"` - JaegerURL ParamItem `refreshable:"false"` - OtlpEndpoint ParamItem `refreshable:"false"` - OtlpSecure ParamItem `refreshable:"false"` + Exporter ParamItem `refreshable:"false"` + SampleFraction ParamItem `refreshable:"false"` + JaegerURL ParamItem `refreshable:"false"` + OtlpEndpoint ParamItem `refreshable:"false"` + OtlpSecure ParamItem `refreshable:"false"` + InitTimeoutSeconds ParamItem `refreshable:"false"` } func (t *traceConfig) init(base *BaseTable) { @@ -819,6 +840,15 @@ Fractions >= 1 will always sample. Fractions < 0 are treated as zero.`, Export: true, } t.OtlpSecure.Init(base.mgr) + + t.InitTimeoutSeconds = ParamItem{ + Key: "trace.initTimeoutSeconds", + Version: "2.4.4", + DefaultValue: "10", + Export: true, + Doc: "segcore initialization timeout in seconds, preventing otlp grpc hangs forever", + } + t.InitTimeoutSeconds.Init(base.mgr) } type logConfig struct { @@ -1034,6 +1064,7 @@ type proxyConfig struct { MustUsePartitionKey ParamItem `refreshable:"true"` SkipAutoIDCheck ParamItem `refreshable:"true"` SkipPartitionKeyCheck ParamItem `refreshable:"true"` + EnablePublicPrivilege ParamItem `refreshable:"false"` AccessLog AccessLogConfig @@ -1394,6 +1425,14 @@ please adjust in embedded Milvus: false`, } p.SkipPartitionKeyCheck.Init(base.mgr) + p.EnablePublicPrivilege = ParamItem{ + Key: "proxy.enablePublicPrivilege", + Version: "2.4.1", + DefaultValue: "true", + Doc: "switch for whether proxy shall enable public privilege", + } + p.EnablePublicPrivilege.Init(base.mgr) + p.GracefulStopTimeout = ParamItem{ Key: "proxy.gracefulStopTimeout", Version: "2.3.7", @@ -2091,6 +2130,7 @@ type queryNodeConfig struct { EnableSegmentPrune ParamItem `refreshable:"false"` DefaultSegmentFilterRatio ParamItem `refreshable:"false"` UseStreamComputing ParamItem `refreshable:"false"` + QueryStreamBatchSize ParamItem `refreshable:"false"` } func (p *queryNodeConfig) init(base *BaseTable) { @@ -2348,13 +2388,13 @@ func (p *queryNodeConfig) init(base *BaseTable) { p.ChunkCacheWarmingUp = ParamItem{ Key: "queryNode.cache.warmup", Version: "2.3.6", - DefaultValue: "async", - Doc: `options: async, sync, off. + DefaultValue: "disable", + Doc: `options: async, sync, disable. Specifies the necessity for warming up the chunk cache. -1. If set to "sync" or "async," the original vector data will be synchronously/asynchronously loaded into the +1. If set to "sync" or "async" the original vector data will be synchronously/asynchronously loaded into the chunk cache during the load process. This approach has the potential to substantially reduce query/search latency for a specific duration post-load, albeit accompanied by a concurrent increase in disk usage; -2. If set to "off," original vector data will only be loaded into the chunk cache during search/query.`, +2. If set to "disable" original vector data will only be loaded into the chunk cache during search/query.`, Export: true, } p.ChunkCacheWarmingUp.Init(base.mgr) @@ -2462,7 +2502,6 @@ Max read concurrency must greater than or equal to 1, and less than or equal to } diskUsage, err := disk.Usage(localStoragePath) if err != nil { - // panic(err) log.Fatal("failed to get disk usage", zap.String("localStoragePath", localStoragePath), zap.Error(err)) } return strconv.FormatUint(diskUsage.Total, 10) @@ -2674,6 +2713,15 @@ user-task-polling: Doc: "use stream search mode when searching or querying", } p.UseStreamComputing.Init(base.mgr) + + p.QueryStreamBatchSize = ParamItem{ + Key: "queryNode.queryStreamBatchSize", + Version: "2.4.1", + DefaultValue: "4194304", + Doc: "return batch size of stream query", + Export: true, + } + p.QueryStreamBatchSize.Init(base.mgr) } // ///////////////////////////////////////////////////////////////////////////// @@ -2721,6 +2769,7 @@ type dataCoordConfig struct { SingleCompactionDeltalogMaxNum ParamItem `refreshable:"true"` GlobalCompactionInterval ParamItem `refreshable:"false"` ChannelCheckpointMaxLag ParamItem `refreshable:"true"` + SyncSegmentsInterval ParamItem `refreshable:"false"` // LevelZero Segment EnableLevelZeroSegment ParamItem `refreshable:"false"` @@ -3061,6 +3110,14 @@ During compaction, the size of segment # of rows is able to exceed segment max # } p.ChannelCheckpointMaxLag.Init(base.mgr) + p.SyncSegmentsInterval = ParamItem{ + Key: "dataCoord.sync.interval", + Version: "2.4.3", + Doc: "The time interval for regularly syncing segments", + DefaultValue: "600", // 10 * 60 seconds + } + p.SyncSegmentsInterval.Init(base.mgr) + // LevelZeroCompaction p.EnableLevelZeroSegment = ParamItem{ Key: "dataCoord.segment.enableLevelZero", @@ -3761,9 +3818,16 @@ func (p *indexNodeConfig) init(base *BaseTable) { Version: "2.2.0", Formatter: func(v string) string { if len(v) == 0 { - diskUsage, err := disk.Usage("/") + // use local storage path to check correct device + localStoragePath := base.Get("localStorage.path") + if _, err := os.Stat(localStoragePath); os.IsNotExist(err) { + if err := os.MkdirAll(localStoragePath, os.ModePerm); err != nil { + log.Fatal("failed to mkdir", zap.String("localStoragePath", localStoragePath), zap.Error(err)) + } + } + diskUsage, err := disk.Usage(localStoragePath) if err != nil { - panic(err) + log.Fatal("failed to get disk usage", zap.String("localStoragePath", localStoragePath), zap.Error(err)) } return strconv.FormatUint(diskUsage.Total, 10) } diff --git a/pkg/util/paramtable/component_param_test.go b/pkg/util/paramtable/component_param_test.go index d3918c9d432e3..34e6d409c82bf 100644 --- a/pkg/util/paramtable/component_param_test.go +++ b/pkg/util/paramtable/component_param_test.go @@ -339,7 +339,7 @@ func TestComponentParam(t *testing.T) { // chunk cache assert.Equal(t, "willneed", Params.ReadAheadPolicy.GetValue()) - assert.Equal(t, "async", Params.ChunkCacheWarmingUp.GetValue()) + assert.Equal(t, "disable", Params.ChunkCacheWarmingUp.GetValue()) // test small indexNlist/NProbe default params.Remove("queryNode.segcore.smallIndex.nlist") @@ -528,6 +528,7 @@ func TestCachedParam(t *testing.T) { assert.Equal(t, uint(100000), params.CommonCfg.BloomFilterSize.GetAsUint()) assert.Equal(t, uint(100000), params.CommonCfg.BloomFilterSize.GetAsUint()) + assert.Equal(t, "BlockedBloomFilter", params.CommonCfg.BloomFilterType.GetValue()) assert.Equal(t, uint64(8388608), params.ServiceParam.MQCfg.PursuitBufferSize.GetAsUint64()) assert.Equal(t, uint64(8388608), params.ServiceParam.MQCfg.PursuitBufferSize.GetAsUint64()) diff --git a/pkg/util/paramtable/quota_param.go b/pkg/util/paramtable/quota_param.go index 33996a7b77dc7..3cc550e9e6eee 100644 --- a/pkg/util/paramtable/quota_param.go +++ b/pkg/util/paramtable/quota_param.go @@ -45,6 +45,9 @@ const ( type quotaConfig struct { QuotaAndLimitsEnabled ParamItem `refreshable:"false"` QuotaCenterCollectInterval ParamItem `refreshable:"false"` + AllocRetryTimes ParamItem `refreshable:"false"` + AllocWaitInterval ParamItem `refreshable:"false"` + ComplexDeleteLimitEnable ParamItem `refreshable:"false"` // ddl DDLLimitEnabled ParamItem `refreshable:"true"` @@ -2021,6 +2024,33 @@ MB/s, default no limit`, Export: true, } p.CoolOffSpeed.Init(base.mgr) + + p.AllocRetryTimes = ParamItem{ + Key: "quotaAndLimits.limits.allocRetryTimes", + Version: "2.4.0", + DefaultValue: "15", + Doc: `retry times when delete alloc forward data from rate limit failed`, + Export: true, + } + p.AllocRetryTimes.Init(base.mgr) + + p.AllocWaitInterval = ParamItem{ + Key: "quotaAndLimits.limits.allocWaitInterval", + Version: "2.4.0", + DefaultValue: "1000", + Doc: `retry wait duration when delete alloc forward data rate failed, in millisecond`, + Export: true, + } + p.AllocWaitInterval.Init(base.mgr) + + p.ComplexDeleteLimitEnable = ParamItem{ + Key: "quotaAndLimits.limits.complexDeleteLimitEnable", + Version: "2.4.0", + DefaultValue: "false", + Doc: `whether complex delete check forward data by limiter`, + Export: true, + } + p.ComplexDeleteLimitEnable.Init(base.mgr) } func megaBytes2Bytes(f float64) float64 { diff --git a/pkg/util/timerecord/group_checker.go b/pkg/util/timerecord/group_checker.go index d8502884d7938..c06dcd5ddeb9a 100644 --- a/pkg/util/timerecord/group_checker.go +++ b/pkg/util/timerecord/group_checker.go @@ -18,23 +18,47 @@ package timerecord import ( "sync" + "sync/atomic" "time" "github.com/milvus-io/milvus/pkg/util/typeutil" ) // groups maintains string to GroupChecker -var groups = typeutil.NewConcurrentMap[string, *GroupChecker]() +var groups = typeutil.NewConcurrentMap[string, *CheckerManager]() -// GroupChecker checks members in same group silent for certain period of time +type Checker struct { + name string + manager *CheckerManager + lastChecked atomic.Value +} + +func NewChecker(name string, manager *CheckerManager) *Checker { + checker := &Checker{} + checker.name = name + checker.manager = manager + checker.lastChecked.Store(time.Now()) + manager.Register(name, checker) + return checker +} + +func (checker *Checker) Check() { + checker.lastChecked.Store(time.Now()) +} + +func (checker *Checker) Close() { + checker.manager.Remove(checker.name) +} + +// CheckerManager checks members in same group silent for certain period of time // print warning msg if there are item(s) that not reported -type GroupChecker struct { +type CheckerManager struct { groupName string - d time.Duration // check duration - t *time.Ticker // internal ticker - ch chan struct{} // closing signal - lastest *typeutil.ConcurrentMap[string, time.Time] // map member name => lastest report time + d time.Duration // check duration + t *time.Ticker // internal ticker + ch chan struct{} // closing signal + checkers *typeutil.ConcurrentMap[string, *Checker] // map member name => checker initOnce sync.Once stopOnce sync.Once @@ -43,7 +67,7 @@ type GroupChecker struct { // init start worker goroutine // protected by initOnce -func (gc *GroupChecker) init() { +func (gc *CheckerManager) init() { gc.initOnce.Do(func() { gc.ch = make(chan struct{}) go gc.work() @@ -51,7 +75,7 @@ func (gc *GroupChecker) init() { } // work is the main procedure logic -func (gc *GroupChecker) work() { +func (gc *CheckerManager) work() { gc.t = time.NewTicker(gc.d) defer gc.t.Stop() @@ -63,8 +87,8 @@ func (gc *GroupChecker) work() { } var list []string - gc.lastest.Range(func(name string, ts time.Time) bool { - if time.Since(ts) > gc.d { + gc.checkers.Range(func(name string, checker *Checker) bool { + if time.Since(checker.lastChecked.Load().(time.Time)) > gc.d { list = append(list, name) } return true @@ -75,18 +99,17 @@ func (gc *GroupChecker) work() { } } -// Check updates the latest timestamp for provided name -func (gc *GroupChecker) Check(name string) { - gc.lastest.Insert(name, time.Now()) +func (gc *CheckerManager) Register(name string, checker *Checker) { + gc.checkers.Insert(name, checker) } // Remove deletes name from watch list -func (gc *GroupChecker) Remove(name string) { - gc.lastest.GetAndRemove(name) +func (gc *CheckerManager) Remove(name string) { + gc.checkers.GetAndRemove(name) } // Stop closes the GroupChecker -func (gc *GroupChecker) Stop() { +func (gc *CheckerManager) Stop() { gc.stopOnce.Do(func() { close(gc.ch) groups.GetAndRemove(gc.groupName) @@ -96,12 +119,12 @@ func (gc *GroupChecker) Stop() { // GetGroupChecker returns the GroupChecker with related group name // if no exist GroupChecker has the provided name, a new instance will be created with provided params // otherwise the params will be ignored -func GetGroupChecker(groupName string, duration time.Duration, fn func([]string)) *GroupChecker { - gc := &GroupChecker{ +func GetCheckerManger(groupName string, duration time.Duration, fn func([]string)) *CheckerManager { + gc := &CheckerManager{ groupName: groupName, d: duration, fn: fn, - lastest: typeutil.NewConcurrentMap[string, time.Time](), + checkers: typeutil.NewConcurrentMap[string, *Checker](), } gc, loaded := groups.GetOrInsert(groupName, gc) if !loaded { diff --git a/pkg/util/timerecord/group_checker_test.go b/pkg/util/timerecord/group_checker_test.go index 4d3d84b58f2b5..cef4521abb328 100644 --- a/pkg/util/timerecord/group_checker_test.go +++ b/pkg/util/timerecord/group_checker_test.go @@ -23,20 +23,24 @@ import ( "github.com/stretchr/testify/assert" ) -func TestGroupChecker(t *testing.T) { +func TestChecker(t *testing.T) { groupName := `test_group` signal := make(chan []string, 1) // 10ms period which set before is too short // change 10ms to 500ms to ensure the group checker schedule after the second value stored duration := 500 * time.Millisecond - gc1 := GetGroupChecker(groupName, duration, func(list []string) { + gc1 := GetCheckerManger(groupName, duration, func(list []string) { signal <- list }) - gc1.Check("1") - gc2 := GetGroupChecker(groupName, time.Second, func(list []string) { + + checker1 := NewChecker("1", gc1) + checker1.Check() + + gc2 := GetCheckerManger(groupName, time.Second, func(list []string) { t.FailNow() }) - gc2.Check("2") + checker2 := NewChecker("2", gc2) + checker2.Check() assert.Equal(t, duration, gc2.d) @@ -45,11 +49,12 @@ func TestGroupChecker(t *testing.T) { return len(list) == 2 }, duration*3, duration) - gc2.Remove("2") - + checker2.Close() list := <-signal assert.ElementsMatch(t, []string{"1"}, list) + checker1.Close() + assert.NotPanics(t, func() { gc1.Stop() gc2.Stop() diff --git a/pkg/util/typeutil/schema.go b/pkg/util/typeutil/schema.go index 8277ccbe438a1..dfa35f2109dec 100644 --- a/pkg/util/typeutil/schema.go +++ b/pkg/util/typeutil/schema.go @@ -1323,10 +1323,11 @@ func ComparePK(pkA, pkB interface{}) bool { type ResultWithID interface { GetIds() *schemapb.IDs + GetHasMoreResult() bool } // SelectMinPK select the index of the minPK in results T of the cursors. -func SelectMinPK[T ResultWithID](limit int64, results []T, cursors []int64) (int, bool) { +func SelectMinPK[T ResultWithID](results []T, cursors []int64) (int, bool) { var ( sel = -1 drainResult = false @@ -1336,8 +1337,9 @@ func SelectMinPK[T ResultWithID](limit int64, results []T, cursors []int64) (int minStrPK string ) for i, cursor := range cursors { - // if result size < limit, this means we should ignore the result from this segment - if int(cursor) >= GetSizeOfIDs(results[i].GetIds()) && (GetSizeOfIDs(results[i].GetIds()) == int(limit)) { + // if cursor has run out of all results from one result and this result has more matched results + // in this case we have tell reduce to stop because better results may be retrieved in the following iteration + if int(cursor) >= GetSizeOfIDs(results[i].GetIds()) && (results[i].GetHasMoreResult()) { drainResult = true continue } @@ -1550,35 +1552,93 @@ func CreateSparseFloatRowFromMap(input map[string]interface{}) ([]byte, error) { return nil, fmt.Errorf("empty JSON input") } - // try format1 - indices, ok1 := input["indices"].([]uint32) - values, ok2 := input["values"].([]float32) + getValue := func(key interface{}) (float32, error) { + var val float64 + switch v := key.(type) { + case int: + val = float64(v) + case float64: + val = v + case json.Number: + if num, err := strconv.ParseFloat(v.String(), 64); err == nil { + val = num + } else { + return 0, fmt.Errorf("invalid value type in JSON: %s", reflect.TypeOf(v)) + } + default: + return 0, fmt.Errorf("invalid value type in JSON: %s", reflect.TypeOf(key)) + } + if VerifyFloat(val) != nil { + return 0, fmt.Errorf("invalid value in JSON: %v", val) + } + if val > math.MaxFloat32 { + return 0, fmt.Errorf("value too large in JSON: %v", val) + } + return float32(val), nil + } + + getIndex := func(key interface{}) (uint32, error) { + var idx int64 + switch v := key.(type) { + case int: + idx = int64(v) + case float64: + // check if the float64 is actually an integer + if v != float64(int64(v)) { + return 0, fmt.Errorf("invalid index in JSON: %v", v) + } + idx = int64(v) + case json.Number: + if num, err := strconv.ParseInt(v.String(), 0, 64); err == nil { + idx = num + } else { + return 0, err + } + default: + return 0, fmt.Errorf("invalid index type in JSON: %s", reflect.TypeOf(key)) + } + if idx >= math.MaxUint32 { + return 0, fmt.Errorf("index too large in JSON: %v", idx) + } + return uint32(idx), nil + } + + jsonIndices, ok1 := input["indices"].([]interface{}) + jsonValues, ok2 := input["values"].([]interface{}) - // try format2 - if !ok1 && !ok2 { + if ok1 && ok2 { + // try format1 + for _, idx := range jsonIndices { + index, err := getIndex(idx) + if err != nil { + return nil, err + } + indices = append(indices, index) + } + for _, val := range jsonValues { + value, err := getValue(val) + if err != nil { + return nil, err + } + values = append(values, value) + } + } else if !ok1 && !ok2 { + // try format2 for k, v := range input { idx, err := strconv.ParseUint(k, 0, 32) if err != nil { return nil, err } - var val float64 - val, ok := v.(float64) - if !ok { - num, ok := v.(json.Number) - if !ok { - return nil, fmt.Errorf("invalid value type in JSON: %s", reflect.TypeOf(v)) - } - val, err = strconv.ParseFloat(num.String(), 32) - if err != nil { - return nil, err - } + val, err := getValue(v) + if err != nil { + return nil, err } indices = append(indices, uint32(idx)) - values = append(values, float32(val)) + values = append(values, val) } - } else if ok1 != ok2 { + } else { return nil, fmt.Errorf("invalid JSON input") } diff --git a/pkg/util/typeutil/schema_test.go b/pkg/util/typeutil/schema_test.go index 67601a719d9e9..6e6a6ec698776 100644 --- a/pkg/util/typeutil/schema_test.go +++ b/pkg/util/typeutil/schema_test.go @@ -18,6 +18,7 @@ package typeutil import ( "encoding/binary" + "fmt" "math" "reflect" "testing" @@ -2120,39 +2121,83 @@ func TestValidateSparseFloatRows(t *testing.T) { func TestParseJsonSparseFloatRow(t *testing.T) { t.Run("valid row 1", func(t *testing.T) { - row := map[string]interface{}{"indices": []uint32{1, 3, 5}, "values": []float32{1.0, 2.0, 3.0}} + row := map[string]interface{}{"indices": []interface{}{1, 3, 5}, "values": []interface{}{1.0, 2.0, 3.0}} res, err := CreateSparseFloatRowFromMap(row) assert.NoError(t, err) assert.Equal(t, CreateSparseFloatRow([]uint32{1, 3, 5}, []float32{1.0, 2.0, 3.0}), res) }) t.Run("valid row 2", func(t *testing.T) { - row := map[string]interface{}{"indices": []uint32{3, 1, 5}, "values": []float32{1.0, 2.0, 3.0}} + row := map[string]interface{}{"indices": []interface{}{3, 1, 5}, "values": []interface{}{1.0, 2.0, 3.0}} res, err := CreateSparseFloatRowFromMap(row) assert.NoError(t, err) assert.Equal(t, CreateSparseFloatRow([]uint32{1, 3, 5}, []float32{2.0, 1.0, 3.0}), res) }) + t.Run("valid row 3", func(t *testing.T) { + row := map[string]interface{}{"indices": []interface{}{1, 3, 5}, "values": []interface{}{1, 2, 3}} + res, err := CreateSparseFloatRowFromMap(row) + assert.NoError(t, err) + assert.Equal(t, CreateSparseFloatRow([]uint32{1, 3, 5}, []float32{1.0, 2.0, 3.0}), res) + }) + + t.Run("valid row 4", func(t *testing.T) { + row := map[string]interface{}{"indices": []interface{}{math.MaxInt32 + 1}, "values": []interface{}{1.0}} + res, err := CreateSparseFloatRowFromMap(row) + assert.NoError(t, err) + assert.Equal(t, CreateSparseFloatRow([]uint32{math.MaxInt32 + 1}, []float32{1.0}), res) + }) + t.Run("invalid row 1", func(t *testing.T) { - row := map[string]interface{}{"indices": []uint32{1, 3, 5}, "values": []float32{1.0, 2.0}} + row := map[string]interface{}{"indices": []interface{}{1, 3, 5}, "values": []interface{}{1.0, 2.0}} _, err := CreateSparseFloatRowFromMap(row) assert.Error(t, err) }) t.Run("invalid row 2", func(t *testing.T) { - row := map[string]interface{}{"indices": []uint32{1}, "values": []float32{1.0, 2.0}} + row := map[string]interface{}{"indices": []interface{}{1}, "values": []interface{}{1.0, 2.0}} _, err := CreateSparseFloatRowFromMap(row) assert.Error(t, err) }) t.Run("invalid row 3", func(t *testing.T) { - row := map[string]interface{}{"indices": []uint32{}, "values": []float32{}} + row := map[string]interface{}{"indices": []interface{}{}, "values": []interface{}{}} _, err := CreateSparseFloatRowFromMap(row) assert.Error(t, err) }) t.Run("invalid row 4", func(t *testing.T) { - row := map[string]interface{}{"indices": []uint32{3}, "values": []float32{-0.2}} + row := map[string]interface{}{"indices": []interface{}{3}, "values": []interface{}{-0.2}} + _, err := CreateSparseFloatRowFromMap(row) + assert.Error(t, err) + }) + + t.Run("invalid row 5", func(t *testing.T) { + row := map[string]interface{}{"indices": []interface{}{3.1}, "values": []interface{}{0.2}} + _, err := CreateSparseFloatRowFromMap(row) + assert.Error(t, err) + }) + + t.Run("invalid row 6", func(t *testing.T) { + row := map[string]interface{}{"indices": []interface{}{-1}, "values": []interface{}{0.2}} + _, err := CreateSparseFloatRowFromMap(row) + assert.Error(t, err) + }) + + t.Run("invalid row 7", func(t *testing.T) { + row := map[string]interface{}{"indices": []interface{}{math.MaxUint32}, "values": []interface{}{1.0}} + _, err := CreateSparseFloatRowFromMap(row) + assert.Error(t, err) + }) + + t.Run("invalid row 8", func(t *testing.T) { + row := map[string]interface{}{"indices": []interface{}{math.MaxUint32 + 10}, "values": []interface{}{1.0}} + _, err := CreateSparseFloatRowFromMap(row) + assert.Error(t, err) + }) + + t.Run("invalid row 9", func(t *testing.T) { + row := map[string]interface{}{"indices": []interface{}{10}, "values": []interface{}{float64(math.MaxFloat32) * 2}} _, err := CreateSparseFloatRowFromMap(row) assert.Error(t, err) }) @@ -2206,4 +2251,156 @@ func TestParseJsonSparseFloatRow(t *testing.T) { _, err := CreateSparseFloatRowFromMap(row) assert.Error(t, err) }) + + t.Run("invalid dict row 7", func(t *testing.T) { + row := map[string]interface{}{fmt.Sprint(math.MaxUint32): 1.0, "3": 2.0, "5": 3.0} + _, err := CreateSparseFloatRowFromMap(row) + assert.Error(t, err) + }) + + t.Run("invalid dict row 8", func(t *testing.T) { + row := map[string]interface{}{fmt.Sprint(math.MaxUint32 + 10): 1.0, "3": 2.0, "5": 3.0} + _, err := CreateSparseFloatRowFromMap(row) + assert.Error(t, err) + }) + + t.Run("invalid dict row 8", func(t *testing.T) { + row := map[string]interface{}{fmt.Sprint(math.MaxUint32 + 10): 1.0, "3": 2.0, "5": float64(math.MaxFloat32) * 2} + _, err := CreateSparseFloatRowFromMap(row) + assert.Error(t, err) + }) +} + +func TestParseJsonSparseFloatRowBytes(t *testing.T) { + t.Run("valid row 1", func(t *testing.T) { + row := []byte(`{"indices":[1,3,5],"values":[1.0,2.0,3.0]}`) + res, err := CreateSparseFloatRowFromJSON(row) + assert.NoError(t, err) + assert.Equal(t, CreateSparseFloatRow([]uint32{1, 3, 5}, []float32{1.0, 2.0, 3.0}), res) + }) + + t.Run("valid row 2", func(t *testing.T) { + row := []byte(`{"indices":[3,1,5],"values":[1.0,2.0,3.0]}`) + res, err := CreateSparseFloatRowFromJSON(row) + assert.NoError(t, err) + assert.Equal(t, CreateSparseFloatRow([]uint32{1, 3, 5}, []float32{2.0, 1.0, 3.0}), res) + }) + + t.Run("valid row 3", func(t *testing.T) { + row := []byte(`{"indices":[1, 3, 5], "values":[1, 2, 3]}`) + res, err := CreateSparseFloatRowFromJSON(row) + assert.NoError(t, err) + assert.Equal(t, CreateSparseFloatRow([]uint32{1, 3, 5}, []float32{1.0, 2.0, 3.0}), res) + }) + + t.Run("valid row 3", func(t *testing.T) { + row := []byte(`{"indices":[2147483648], "values":[1.0]}`) + res, err := CreateSparseFloatRowFromJSON(row) + assert.NoError(t, err) + assert.Equal(t, CreateSparseFloatRow([]uint32{math.MaxInt32 + 1}, []float32{1.0}), res) + }) + + t.Run("invalid row 1", func(t *testing.T) { + row := []byte(`{"indices":[1,3,5],"values":[1.0,2.0,3.0`) + _, err := CreateSparseFloatRowFromJSON(row) + assert.Error(t, err) + }) + + t.Run("invalid row 2", func(t *testing.T) { + row := []byte(`{"indices":[1,3,5],"values":[1.0,2.0]`) + _, err := CreateSparseFloatRowFromJSON(row) + assert.Error(t, err) + }) + + t.Run("invalid row 3", func(t *testing.T) { + row := []byte(`{"indices":[1],"values":[1.0,2.0]`) + _, err := CreateSparseFloatRowFromJSON(row) + assert.Error(t, err) + }) + + t.Run("invalid row 4", func(t *testing.T) { + row := []byte(`{"indices":[],"values":[]`) + _, err := CreateSparseFloatRowFromJSON(row) + assert.Error(t, err) + }) + + t.Run("invalid row 5", func(t *testing.T) { + row := []byte(`{"indices":[-3],"values":[0.2]`) + _, err := CreateSparseFloatRowFromJSON(row) + assert.Error(t, err) + }) + + t.Run("invalid row 6", func(t *testing.T) { + row := []byte(`{"indices":[3],"values":[-0.2]`) + _, err := CreateSparseFloatRowFromJSON(row) + assert.Error(t, err) + }) + + t.Run("invalid row 7", func(t *testing.T) { + row := []byte(`{"indices": []interface{}{3.1}, "values": []interface{}{0.2}}`) + _, err := CreateSparseFloatRowFromJSON(row) + assert.Error(t, err) + }) + + t.Run("valid dict row 1", func(t *testing.T) { + row := []byte(`{"1": 1.0, "3": 2.0, "5": 3.0}`) + res, err := CreateSparseFloatRowFromJSON(row) + assert.NoError(t, err) + assert.Equal(t, CreateSparseFloatRow([]uint32{1, 3, 5}, []float32{1.0, 2.0, 3.0}), res) + }) + + t.Run("valid dict row 2", func(t *testing.T) { + row := []byte(`{"3": 1.0, "1": 2.0, "5": 3.0}`) + res, err := CreateSparseFloatRowFromJSON(row) + assert.NoError(t, err) + assert.Equal(t, CreateSparseFloatRow([]uint32{1, 3, 5}, []float32{2.0, 1.0, 3.0}), res) + }) + + t.Run("invalid dict row 1", func(t *testing.T) { + row := []byte(`{"a": 1.0, "3": 2.0, "5": 3.0}`) + _, err := CreateSparseFloatRowFromJSON(row) + assert.Error(t, err) + }) + + t.Run("invalid dict row 2", func(t *testing.T) { + row := []byte(`{"1": "a", "3": 2.0, "5": 3.0}`) + _, err := CreateSparseFloatRowFromJSON(row) + assert.Error(t, err) + }) + + t.Run("invalid dict row 3", func(t *testing.T) { + row := []byte(`{"1": "1.0", "3": 2.0, "5": 3.0}`) + _, err := CreateSparseFloatRowFromJSON(row) + assert.Error(t, err) + }) + + t.Run("invalid dict row 4", func(t *testing.T) { + row := []byte(`{"1": 1.0, "3": 2.0, "5": }`) + _, err := CreateSparseFloatRowFromJSON(row) + assert.Error(t, err) + }) + + t.Run("invalid dict row 5", func(t *testing.T) { + row := []byte(`{"-1": 1.0, "3": 2.0, "5": 3.0}`) + _, err := CreateSparseFloatRowFromJSON(row) + assert.Error(t, err) + }) + + t.Run("invalid dict row 6", func(t *testing.T) { + row := []byte(`{"1": -1.0, "3": 2.0, "5": 3.0}`) + _, err := CreateSparseFloatRowFromJSON(row) + assert.Error(t, err) + }) + + t.Run("invalid dict row 7", func(t *testing.T) { + row := []byte(`{}`) + _, err := CreateSparseFloatRowFromJSON(row) + assert.Error(t, err) + }) + + t.Run("invalid dict row 8", func(t *testing.T) { + row := []byte(`{"1.1": 1.0, "3": 2.0, "5": 3.0}`) + _, err := CreateSparseFloatRowFromJSON(row) + assert.Error(t, err) + }) } diff --git a/pkg/util/vralloc/alloc.go b/pkg/util/vralloc/alloc.go new file mode 100644 index 0000000000000..32eb5442ccd66 --- /dev/null +++ b/pkg/util/vralloc/alloc.go @@ -0,0 +1,167 @@ +// Licensed to the LF AI & Data foundation under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package vralloc + +import ( + "maps" + "sync" + + "github.com/shirou/gopsutil/v3/disk" + + "github.com/milvus-io/milvus/pkg/util/hardware" +) + +type Resource struct { + Memory int64 // Memory occupation in bytes + CPU int64 // CPU in cycles per second + Disk int64 // Disk occpuation in bytes +} + +// Add adds r2 to r +func (r *Resource) Add(r2 *Resource) *Resource { + r.Memory += r2.Memory + r.CPU += r2.CPU + r.Disk += r2.Disk + return r +} + +// Sub subtracts r2 from r +func (r *Resource) Sub(r2 *Resource) *Resource { + r.Memory -= r2.Memory + r.CPU -= r2.CPU + r.Disk -= r2.Disk + return r +} + +func (r *Resource) Diff(r2 *Resource) *Resource { + return &Resource{ + Memory: r.Memory - r2.Memory, + CPU: r.CPU - r2.CPU, + Disk: r.Disk - r2.Disk, + } +} + +// Le tests if the resource is less than or equal to the limit +func (r Resource) Le(limit *Resource) bool { + return r.Memory <= limit.Memory && r.CPU <= limit.CPU && r.Disk <= limit.Disk +} + +type Allocator[T comparable] interface { + // Allocate allocates the resource, returns true if the resource is allocated. If allocation failed, returns the short resource. + // The short resource is a positive value, e.g., if there is additional 8 bytes in disk needed, returns (0, 0, 8). + Allocate(id T, r *Resource) (allocated bool, short *Resource) + // Release releases the resource + Release(id T) + // Used returns the used resource + Used() Resource + // Inspect returns the allocated resources + Inspect() map[T]*Resource +} + +type FixedSizeAllocator[T comparable] struct { + limit *Resource + + lock sync.RWMutex + used Resource + allocs map[T]*Resource +} + +func (a *FixedSizeAllocator[T]) Allocate(id T, r *Resource) (allocated bool, short *Resource) { + a.lock.Lock() + defer a.lock.Unlock() + if a.used.Add(r).Le(a.limit) { + _, ok := a.allocs[id] + if ok { + // Re-allocate on identical id is not allowed + return false, nil + } + a.allocs[id] = r + return true, nil + } + short = a.used.Diff(a.limit) + a.used.Sub(r) + return false, short +} + +func (a *FixedSizeAllocator[T]) Release(id T) { + a.lock.Lock() + defer a.lock.Unlock() + r, ok := a.allocs[id] + if !ok { + return + } + delete(a.allocs, id) + a.used.Sub(r) +} + +func (a *FixedSizeAllocator[T]) Used() Resource { + a.lock.RLock() + defer a.lock.RUnlock() + return a.used +} + +func (a *FixedSizeAllocator[T]) Inspect() map[T]*Resource { + a.lock.RLock() + defer a.lock.RUnlock() + return maps.Clone(a.allocs) +} + +func NewFixedSizeAllocator[T comparable](limit *Resource) *FixedSizeAllocator[T] { + return &FixedSizeAllocator[T]{ + limit: limit, + allocs: make(map[T]*Resource), + } +} + +// PhysicalAwareFixedSizeAllocator allocates resources with additional consideration of physical resource usage. +type PhysicalAwareFixedSizeAllocator[T comparable] struct { + FixedSizeAllocator[T] + + hwLimit *Resource + dir string // watching directory for disk usage, probably got by paramtable.Get().LocalStorageCfg.Path.GetValue() +} + +func (a *PhysicalAwareFixedSizeAllocator[T]) Allocate(id T, r *Resource) (allocated bool, short *Resource) { + memoryUsage := int64(hardware.GetUsedMemoryCount()) + diskUsage := int64(0) + if usageStats, err := disk.Usage(a.dir); err != nil { + diskUsage = int64(usageStats.Used) + } + + // Check if memory usage + future request estimation will exceed the memory limit + // Note that different allocators will not coordinate with each other, so the memory limit + // may be exceeded in concurrent allocations. + expected := &Resource{ + Memory: a.Used().Memory + r.Memory + memoryUsage, + Disk: a.Used().Disk + r.Disk + diskUsage, + } + if expected.Le(a.hwLimit) { + return a.FixedSizeAllocator.Allocate(id, r) + } + return false, expected.Diff(a.hwLimit) +} + +func NewPhysicalAwareFixedSizeAllocator[T comparable](limit *Resource, hwMemoryLimit, hwDiskLimit int64, dir string) *PhysicalAwareFixedSizeAllocator[T] { + return &PhysicalAwareFixedSizeAllocator[T]{ + FixedSizeAllocator: FixedSizeAllocator[T]{ + limit: limit, + allocs: make(map[T]*Resource), + }, + hwLimit: &Resource{Memory: hwMemoryLimit, Disk: hwDiskLimit}, + dir: dir, + } +} diff --git a/pkg/util/vralloc/alloc_test.go b/pkg/util/vralloc/alloc_test.go new file mode 100644 index 0000000000000..6b50d39ac4420 --- /dev/null +++ b/pkg/util/vralloc/alloc_test.go @@ -0,0 +1,76 @@ +// Licensed to the LF AI & Data foundation under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package vralloc + +import ( + "fmt" + "sync" + "testing" + + "github.com/stretchr/testify/assert" + + "github.com/milvus-io/milvus/pkg/util/hardware" +) + +func TestFixedSizeAllocator(t *testing.T) { + a := NewFixedSizeAllocator[string](&Resource{100, 100, 100}) + + allocated, _ := a.Allocate("a1", &Resource{10, 10, 10}) + assert.Equal(t, true, allocated) + allocated, _ = a.Allocate("a2", &Resource{90, 90, 90}) + assert.Equal(t, true, allocated) + allocated, short := a.Allocate("a3", &Resource{10, 0, 0}) + assert.Equal(t, false, allocated) + assert.Equal(t, &Resource{10, 0, 0}, short) + a.Release("a2") + allocated, _ = a.Allocate("a3", &Resource{10, 0, 0}) + assert.Equal(t, true, allocated) + m := a.Inspect() + assert.Equal(t, 2, len(m)) + allocated, _ = a.Allocate("a1", &Resource{10, 0, 0}) + assert.Equal(t, false, allocated) +} + +func TestFixedSizeAllocatorRace(t *testing.T) { + a := NewFixedSizeAllocator[string](&Resource{100, 100, 100}) + wg := new(sync.WaitGroup) + for i := 0; i < 100; i++ { + wg.Add(1) + go func(index int) { + defer wg.Done() + allocated, _ := a.Allocate(fmt.Sprintf("a%d", index), &Resource{1, 1, 1}) + assert.Equal(t, true, allocated) + }(i) + } + wg.Wait() + m := a.Inspect() + assert.Equal(t, 100, len(m)) +} + +func TestPhysicalAwareFixedSizeAllocator(t *testing.T) { + hwMemoryLimit := int64(float32(hardware.GetMemoryCount()) * 0.9) + hwDiskLimit := int64(1<<63 - 1) + a := NewPhysicalAwareFixedSizeAllocator[string](&Resource{100, 100, 100}, hwMemoryLimit, hwDiskLimit, "/tmp") + + allocated, _ := a.Allocate("a1", &Resource{10, 10, 10}) + assert.Equal(t, true, allocated) + allocated, _ = a.Allocate("a2", &Resource{90, 90, 90}) + assert.Equal(t, true, allocated) + allocated, short := a.Allocate("a3", &Resource{10, 0, 0}) + assert.Equal(t, false, allocated) + assert.Equal(t, &Resource{10, 0, 0}, short) +} diff --git a/scripts/README.md b/scripts/README.md index 838db6fd5e6db..8cb64fbca7dc4 100644 --- a/scripts/README.md +++ b/scripts/README.md @@ -4,7 +4,7 @@ ``` OS: Ubuntu 20.04 -go:1.20 +go:1.21 cmake: >=3.18 gcc: 7.5 ``` diff --git a/scripts/generate_proto.sh b/scripts/generate_proto.sh index 2551f586c9f9c..286570b842aa8 100755 --- a/scripts/generate_proto.sh +++ b/scripts/generate_proto.sh @@ -44,6 +44,7 @@ pushd ${PROTO_DIR} mkdir -p etcdpb mkdir -p indexcgopb +mkdir -p cgopb mkdir -p internalpb mkdir -p rootcoordpb @@ -62,6 +63,7 @@ protoc_opt="${PROTOC_BIN} --proto_path=${API_PROTO_DIR} --proto_path=." ${protoc_opt} --go_out=plugins=grpc,paths=source_relative:./etcdpb etcd_meta.proto || { echo 'generate etcd_meta.proto failed'; exit 1; } ${protoc_opt} --go_out=plugins=grpc,paths=source_relative:./indexcgopb index_cgo_msg.proto || { echo 'generate index_cgo_msg failed '; exit 1; } +${protoc_opt} --go_out=plugins=grpc,paths=source_relative:./cgopb cgo_msg.proto || { echo 'generate cgo_msg failed '; exit 1; } ${protoc_opt} --go_out=plugins=grpc,paths=source_relative:./rootcoordpb root_coord.proto || { echo 'generate root_coord.proto failed'; exit 1; } ${protoc_opt} --go_out=plugins=grpc,paths=source_relative:./internalpb internal.proto || { echo 'generate internal.proto failed'; exit 1; } ${protoc_opt} --go_out=plugins=grpc,paths=source_relative:./proxypb proxy.proto|| { echo 'generate proxy.proto failed'; exit 1; } @@ -78,6 +80,7 @@ ${protoc_opt} --cpp_out=$CPP_SRC_DIR/src/pb schema.proto|| { echo 'generate sche ${protoc_opt} --cpp_out=$CPP_SRC_DIR/src/pb common.proto|| { echo 'generate common.proto failed'; exit 1; } ${protoc_opt} --cpp_out=$CPP_SRC_DIR/src/pb segcore.proto|| { echo 'generate segcore.proto failed'; exit 1; } ${protoc_opt} --cpp_out=$CPP_SRC_DIR/src/pb index_cgo_msg.proto|| { echo 'generate index_cgo_msg.proto failed'; exit 1; } +${protoc_opt} --cpp_out=$CPP_SRC_DIR/src/pb cgo_msg.proto|| { echo 'generate cgo_msg.proto failed'; exit 1; } ${protoc_opt} --cpp_out=$CPP_SRC_DIR/src/pb plan.proto|| { echo 'generate plan.proto failed'; exit 1; } popd diff --git a/scripts/install_deps.sh b/scripts/install_deps.sh index 1a455b980007a..067b617ef2e64 100755 --- a/scripts/install_deps.sh +++ b/scripts/install_deps.sh @@ -48,7 +48,7 @@ function install_linux_deps() { cmake_version=$(echo "$(cmake --version | head -1)" | grep -o '[0-9][\.][0-9]*') if [ ! $cmake_version ] || [ `expr $cmake_version \>= 3.26` -eq 0 ]; then echo "cmake version $cmake_version is less than 3.26, wait to installing ..." - wget -qO- "https://cmake.org/files/v3.26/cmake-3.26.5-linux-x86_64.tar.gz" | sudo tar --strip-components=1 -xz -C /usr/local + wget -qO- "https://cmake.org/files/v3.26/cmake-3.26.5-linux-$(uname -m).tar.gz" | sudo tar --strip-components=1 -xz -C /usr/local else echo "cmake version is $cmake_version" fi diff --git a/scripts/standalone_embed.sh b/scripts/standalone_embed.sh index 4284c9cfb9a77..647b73d37de0f 100755 --- a/scripts/standalone_embed.sh +++ b/scripts/standalone_embed.sh @@ -42,7 +42,7 @@ EOF --health-start-period=90s \ --health-timeout=20s \ --health-retries=3 \ - milvusdb/milvus:v2.4.0 \ + milvusdb/milvus:v2.4.1 \ milvus run standalone 1> /dev/null } diff --git a/tests/go_client/base/milvus_client.go b/tests/go_client/base/milvus_client.go new file mode 100644 index 0000000000000..8a43fc9f94f90 --- /dev/null +++ b/tests/go_client/base/milvus_client.go @@ -0,0 +1,240 @@ +package base + +import ( + "context" + "encoding/json" + "strings" + "time" + + "github.com/milvus-io/milvus/client/v2/entity" + "github.com/milvus-io/milvus/pkg/log" + + "go.uber.org/zap" + + "google.golang.org/grpc" + + clientv2 "github.com/milvus-io/milvus/client/v2" + "github.com/milvus-io/milvus/client/v2/index" +) + +func LoggingUnaryInterceptor() grpc.UnaryClientInterceptor { + return func(ctx context.Context, method string, req, reply interface{}, cc *grpc.ClientConn, invoker grpc.UnaryInvoker, opts ...grpc.CallOption) error { + maxLogLength := 300 + _method := strings.Split(method, "/") + _methodShotName := _method[len(_method)-1] + // Marshal req to json str + reqJSON, err := json.Marshal(req) + if err != nil { + log.Error("Failed to marshal request", zap.Error(err)) + reqJSON = []byte("could not marshal request") + } + reqStr := string(reqJSON) + if len(reqStr) > maxLogLength { + reqStr = reqStr[:maxLogLength] + "..." + } + + // log before + log.Info("Request", zap.String("method", _methodShotName), zap.Any("reqs", reqStr)) + + // invoker + start := time.Now() + errResp := invoker(ctx, method, req, reply, cc, opts...) + cost := time.Since(start) + + // Marshal reply to json str + respJSON, err := json.Marshal(reply) + if err != nil { + log.Error("Failed to marshal response", zap.Error(err)) + respJSON = []byte("could not marshal response") + } + respStr := string(respJSON) + if len(respStr) > maxLogLength { + respStr = respStr[:maxLogLength] + "..." + } + + // log after + log.Info("Response", zap.String("method", _methodShotName), zap.Any("resp", respStr)) + log.Debug("Cost", zap.String("method", _methodShotName), zap.Duration("cost", cost)) + return errResp + } +} + +type MilvusClient struct { + mClient *clientv2.Client +} + +func NewMilvusClient(ctx context.Context, cfg *clientv2.ClientConfig) (*MilvusClient, error) { + cfg.DialOptions = append(cfg.DialOptions, grpc.WithUnaryInterceptor(LoggingUnaryInterceptor())) + mClient, err := clientv2.New(ctx, cfg) + return &MilvusClient{ + mClient, + }, err +} + +func (mc *MilvusClient) Close(ctx context.Context) error { + err := mc.mClient.Close(ctx) + return err +} + +// -- database -- + +// UsingDatabase list all database in milvus cluster. +func (mc *MilvusClient) UsingDatabase(ctx context.Context, option clientv2.UsingDatabaseOption) error { + err := mc.mClient.UsingDatabase(ctx, option) + return err +} + +// ListDatabases list all database in milvus cluster. +func (mc *MilvusClient) ListDatabases(ctx context.Context, option clientv2.ListDatabaseOption, callOptions ...grpc.CallOption) ([]string, error) { + databaseNames, err := mc.mClient.ListDatabase(ctx, option, callOptions...) + return databaseNames, err +} + +// CreateDatabase create database with the given name. +func (mc *MilvusClient) CreateDatabase(ctx context.Context, option clientv2.CreateDatabaseOption, callOptions ...grpc.CallOption) error { + err := mc.mClient.CreateDatabase(ctx, option, callOptions...) + return err +} + +// DropDatabase drop database with the given db name. +func (mc *MilvusClient) DropDatabase(ctx context.Context, option clientv2.DropDatabaseOption, callOptions ...grpc.CallOption) error { + err := mc.mClient.DropDatabase(ctx, option, callOptions...) + return err +} + +// -- collection -- + +// CreateCollection Create Collection +func (mc *MilvusClient) CreateCollection(ctx context.Context, option clientv2.CreateCollectionOption, callOptions ...grpc.CallOption) error { + err := mc.mClient.CreateCollection(ctx, option, callOptions...) + return err +} + +// ListCollections Create Collection +func (mc *MilvusClient) ListCollections(ctx context.Context, option clientv2.ListCollectionOption, callOptions ...grpc.CallOption) ([]string, error) { + collectionNames, err := mc.mClient.ListCollections(ctx, option, callOptions...) + return collectionNames, err +} + +//DescribeCollection Describe collection +func (mc *MilvusClient) DescribeCollection(ctx context.Context, option clientv2.DescribeCollectionOption, callOptions ...grpc.CallOption) (*entity.Collection, error) { + collection, err := mc.mClient.DescribeCollection(ctx, option, callOptions...) + return collection, err +} + +// HasCollection Has collection +func (mc *MilvusClient) HasCollection(ctx context.Context, option clientv2.HasCollectionOption, callOptions ...grpc.CallOption) (bool, error) { + has, err := mc.mClient.HasCollection(ctx, option, callOptions...) + return has, err +} + +// DropCollection Drop Collection +func (mc *MilvusClient) DropCollection(ctx context.Context, option clientv2.DropCollectionOption, callOptions ...grpc.CallOption) error { + err := mc.mClient.DropCollection(ctx, option, callOptions...) + return err +} + +// -- partition -- + +// CreatePartition Create Partition +func (mc *MilvusClient) CreatePartition(ctx context.Context, option clientv2.CreatePartitionOption, callOptions ...grpc.CallOption) error { + err := mc.mClient.CreatePartition(ctx, option, callOptions...) + return err +} + +// DropPartition Drop Partition +func (mc *MilvusClient) DropPartition(ctx context.Context, option clientv2.DropPartitionOption, callOptions ...grpc.CallOption) error { + err := mc.mClient.DropPartition(ctx, option, callOptions...) + return err +} + +// HasPartition Has Partition +func (mc *MilvusClient) HasPartition(ctx context.Context, option clientv2.HasPartitionOption, callOptions ...grpc.CallOption) (bool, error) { + has, err := mc.mClient.HasPartition(ctx, option, callOptions...) + return has, err +} + +// ListPartitions List Partitions +func (mc *MilvusClient) ListPartitions(ctx context.Context, option clientv2.ListPartitionsOption, callOptions ...grpc.CallOption) ([]string, error) { + partitionNames, err := mc.mClient.ListPartitions(ctx, option, callOptions...) + return partitionNames, err +} + +// LoadPartitions Load Partitions into memory +func (mc *MilvusClient) LoadPartitions(ctx context.Context, option clientv2.LoadPartitionsOption, callOptions ...grpc.CallOption) (clientv2.LoadTask, error) { + loadTask, err := mc.mClient.LoadPartitions(ctx, option, callOptions...) + return loadTask, err +} + +// -- index -- + +// CreateIndex Create Index +func (mc *MilvusClient) CreateIndex(ctx context.Context, option clientv2.CreateIndexOption, callOptions ...grpc.CallOption) (*clientv2.CreateIndexTask, error) { + createIndexTask, err := mc.mClient.CreateIndex(ctx, option, callOptions...) + return createIndexTask, err +} + +// ListIndexes List Indexes +func (mc *MilvusClient) ListIndexes(ctx context.Context, option clientv2.ListIndexOption, callOptions ...grpc.CallOption) ([]string, error) { + indexes, err := mc.mClient.ListIndexes(ctx, option, callOptions...) + return indexes, err +} + +// DescribeIndex Describe Index +func (mc *MilvusClient) DescribeIndex(ctx context.Context, option clientv2.DescribeIndexOption, callOptions ...grpc.CallOption) (index.Index, error) { + index, err := mc.mClient.DescribeIndex(ctx, option, callOptions...) + return index, err +} + +// DropIndex Drop Index +func (mc *MilvusClient) DropIndex(ctx context.Context, option clientv2.DropIndexOption, callOptions ...grpc.CallOption) error { + err := mc.mClient.DropIndex(ctx, option, callOptions...) + return err +} + +// -- write -- + +// Insert insert data +func (mc *MilvusClient) Insert(ctx context.Context, option clientv2.InsertOption, callOptions ...grpc.CallOption) (clientv2.InsertResult, error) { + insertRes, err := mc.mClient.Insert(ctx, option, callOptions...) + log.Info("Insert", zap.Any("result", insertRes)) + return insertRes, err +} + +// Flush flush data +func (mc *MilvusClient) Flush(ctx context.Context, option clientv2.FlushOption, callOptions ...grpc.CallOption) (*clientv2.FlushTask, error) { + flushTask, err := mc.mClient.Flush(ctx, option, callOptions...) + return flushTask, err +} + +// Delete deletes data +func (mc *MilvusClient) Delete(ctx context.Context, option clientv2.DeleteOption, callOptions ...grpc.CallOption) (clientv2.DeleteResult, error) { + deleteRes, err := mc.mClient.Delete(ctx, option, callOptions...) + return deleteRes, err +} + +// Upsert upsert data +func (mc *MilvusClient) Upsert(ctx context.Context, option clientv2.UpsertOption, callOptions ...grpc.CallOption) (clientv2.UpsertResult, error) { + upsertRes, err := mc.mClient.Upsert(ctx, option, callOptions...) + return upsertRes, err +} + +// -- read -- + +// LoadCollection Load Collection +func (mc *MilvusClient) LoadCollection(ctx context.Context, option clientv2.LoadCollectionOption, callOptions ...grpc.CallOption) (clientv2.LoadTask, error) { + loadTask, err := mc.mClient.LoadCollection(ctx, option, callOptions...) + return loadTask, err +} + +// Search search from collection +func (mc *MilvusClient) Search(ctx context.Context, option clientv2.SearchOption, callOptions ...grpc.CallOption) ([]clientv2.ResultSet, error) { + resultSets, err := mc.mClient.Search(ctx, option, callOptions...) + return resultSets, err +} + +// Query query from collection +func (mc *MilvusClient) Query(ctx context.Context, option clientv2.QueryOption, callOptions ...grpc.CallOption) (clientv2.ResultSet, error) { + resultSet, err := mc.mClient.Query(ctx, option, callOptions...) + return resultSet, err +} diff --git a/tests/go_client/common/consts.go b/tests/go_client/common/consts.go new file mode 100644 index 0000000000000..46e964f1c8ea5 --- /dev/null +++ b/tests/go_client/common/consts.go @@ -0,0 +1,67 @@ +package common + +// cost default field name +const ( + DefaultInt8FieldName = "int8" + DefaultInt16FieldName = "int16" + DefaultInt32FieldName = "int32" + DefaultInt64FieldName = "int64" + DefaultBoolFieldName = "bool" + DefaultFloatFieldName = "float" + DefaultDoubleFieldName = "double" + DefaultVarcharFieldName = "varchar" + DefaultJSONFieldName = "json" + DefaultArrayFieldName = "array" + DefaultFloatVecFieldName = "floatVec" + DefaultBinaryVecFieldName = "binaryVec" + DefaultFloat16VecFieldName = "fp16Vec" + DefaultBFloat16VecFieldName = "bf16Vec" + DefaultSparseVecFieldName = "sparseVec" + DefaultDynamicNumberField = "dynamicNumber" + DefaultDynamicStringField = "dynamicString" + DefaultDynamicBoolField = "dynamicBool" + DefaultDynamicListField = "dynamicList" + DefaultBoolArrayField = "boolArray" + DefaultInt8ArrayField = "int8Array" + DefaultInt16ArrayField = "int16Array" + DefaultInt32ArrayField = "int32Array" + DefaultInt64ArrayField = "int64Array" + DefaultFloatArrayField = "floatArray" + DefaultDoubleArrayField = "doubleArray" + DefaultVarcharArrayField = "varcharArray" +) + +// cost for test cases +const ( + RowCount = "row_count" + DefaultTimeout = 120 + DefaultDim = 128 + DefaultShards = int32(2) + DefaultNb = 3000 + DefaultNq = 5 + DefaultLimit = 10 + TestCapacity = 100 // default array field capacity + TestMaxLen = 100 // default varchar field max length +) + +// const default value from milvus config +const ( + MaxPartitionNum = 4096 + DefaultDynamicFieldName = "$meta" + QueryCountFieldName = "count(*)" + DefaultPartition = "_default" + DefaultIndexName = "_default_idx_102" + DefaultIndexNameBinary = "_default_idx_100" + DefaultRgName = "__default_resource_group" + DefaultDb = "default" + MaxDim = 32768 + MaxLength = int64(65535) + MaxCollectionNameLen = 255 + DefaultRgCapacity = 1000000 + RetentionDuration = 40 // common.retentionDuration + MaxCapacity = 4096 // max array capacity + DefaultPartitionNum = 16 // default num_partitions + MaxTopK = 16384 + MaxVectorFieldNum = 4 + MaxShardNum = 16 +) diff --git a/tests/go_client/common/response_checker.go b/tests/go_client/common/response_checker.go new file mode 100644 index 0000000000000..283dd76aad4ab --- /dev/null +++ b/tests/go_client/common/response_checker.go @@ -0,0 +1,44 @@ +package common + +import ( + "strings" + "testing" + + "github.com/milvus-io/milvus/pkg/log" + "github.com/stretchr/testify/require" + + clientv2 "github.com/milvus-io/milvus/client/v2" +) + +func CheckErr(t *testing.T, actualErr error, expErrNil bool, expErrorMsg ...string) { + if expErrNil { + require.NoError(t, actualErr) + } else { + require.Error(t, actualErr) + switch len(expErrorMsg) { + case 0: + log.Fatal("expect error message should not be empty") + case 1: + require.ErrorContains(t, actualErr, expErrorMsg[0]) + default: + contains := false + for i := 0; i < len(expErrorMsg); i++ { + if strings.Contains(actualErr.Error(), expErrorMsg[i]) { + contains = true + } + } + if !contains { + t.FailNow() + } + } + } +} + +// CheckSearchResult check search result, check nq, topk, ids, score +func CheckSearchResult(t *testing.T, actualSearchResults []clientv2.ResultSet, expNq int, expTopK int) { + require.Equal(t, len(actualSearchResults), expNq) + require.Len(t, actualSearchResults, expNq) + for _, actualSearchResult := range actualSearchResults { + require.Equal(t, actualSearchResult.ResultCount, expTopK) + } +} diff --git a/tests/go_client/common/utils.go b/tests/go_client/common/utils.go new file mode 100644 index 0000000000000..c6f8a9e44c607 --- /dev/null +++ b/tests/go_client/common/utils.go @@ -0,0 +1,122 @@ +package common + +import ( + "encoding/binary" + "fmt" + "log" + "math" + "math/rand" + "strings" + "time" + + "github.com/milvus-io/milvus/client/v2/entity" + "github.com/x448/float16" +) + +var letterRunes = []rune("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ") +var r *rand.Rand + +func init() { + r = rand.New(rand.NewSource(time.Now().UnixNano())) +} + +func GenRandomString(prefix string, n int) string { + b := make([]rune, n) + for i := range b { + b[i] = letterRunes[r.Intn(len(letterRunes))] + } + str := fmt.Sprintf("%s_%s", prefix, string(b)) + return str +} + +// GenLongString gen invalid long string +func GenLongString(n int) string { + var builder strings.Builder + longString := "a" + for i := 0; i < n; i++ { + builder.WriteString(longString) + } + return builder.String() +} + +func GenValidNames() []string { + return []string{ + "a", + "_", + "_name", + "_123", + "name_", + "_coll_123_", + } +} + +func GenInvalidNames() []string { + invalidNames := []string{ + "", + " ", + "12-s", + "(mn)", + "中文", + "%$#", + "1", + "[10]", + "a b", + DefaultDynamicFieldName, + GenLongString(MaxCollectionNameLen + 1), + } + return invalidNames +} + +func GenFloatVector(dim int) []float32 { + vector := make([]float32, 0, dim) + for j := 0; j < int(dim); j++ { + vector = append(vector, rand.Float32()) + } + return vector +} + +func GenFloat16Vector(dim int) []byte { + ret := make([]byte, dim*2) + for i := 0; i < int(dim); i++ { + v := float16.Fromfloat32(rand.Float32()).Bits() + binary.LittleEndian.PutUint16(ret[i*2:], v) + } + return ret +} + +func GenBFloat16Vector(dim int) []byte { + ret16 := make([]uint16, 0, dim) + for i := 0; i < int(dim); i++ { + f := rand.Float32() + bits := math.Float32bits(f) + bits >>= 16 + bits &= 0x7FFF + ret16 = append(ret16, uint16(bits)) + } + ret := make([]byte, len(ret16)*2) + for i, value := range ret16 { + binary.LittleEndian.PutUint16(ret[i*2:], value) + } + return ret +} + +func GenBinaryVector(dim int) []byte { + vector := make([]byte, dim/8) + rand.Read(vector) + return vector +} + +func GenSparseVector(maxLen int) entity.SparseEmbedding { + length := 1 + rand.Intn(1+maxLen) + positions := make([]uint32, length) + values := make([]float32, length) + for i := 0; i < length; i++ { + positions[i] = uint32(2*i + 1) + values[i] = rand.Float32() + } + vector, err := entity.NewSliceSparseEmbedding(positions, values) + if err != nil { + log.Fatalf("Generate vector failed %s", err) + } + return vector +} diff --git a/tests/go_client/go.mod b/tests/go_client/go.mod new file mode 100644 index 0000000000000..665fdcc11fe11 --- /dev/null +++ b/tests/go_client/go.mod @@ -0,0 +1,129 @@ +module github.com/milvus-io/milvus/tests/go_client + +go 1.20 + +require ( + github.com/milvus-io/milvus/client/v2 v2.0.0-20240521081339-017fd7bc25de + github.com/milvus-io/milvus/pkg v0.0.2-0.20240317152703-17b4938985f3 + github.com/stretchr/testify v1.9.0 + github.com/x448/float16 v0.8.4 + go.uber.org/zap v1.27.0 + google.golang.org/grpc v1.64.0 +) + +replace github.com/milvus-io/milvus/client/v2 v2.0.0-20240521081339-017fd7bc25de => ../../../milvus/client + +require ( + github.com/beorn7/perks v1.0.1 // indirect + github.com/blang/semver/v4 v4.0.0 // indirect + github.com/cenkalti/backoff/v4 v4.2.0 // indirect + github.com/cespare/xxhash/v2 v2.2.0 // indirect + github.com/cilium/ebpf v0.11.0 // indirect + github.com/cockroachdb/errors v1.9.1 // indirect + github.com/cockroachdb/logtags v0.0.0-20211118104740-dabe8e521a4f // indirect + github.com/cockroachdb/redact v1.1.3 // indirect + github.com/containerd/cgroups/v3 v3.0.3 // indirect + github.com/coreos/go-semver v0.3.0 // indirect + github.com/coreos/go-systemd/v22 v22.3.2 // indirect + github.com/davecgh/go-spew v1.1.1 // indirect + github.com/docker/go-units v0.4.0 // indirect + github.com/dustin/go-humanize v1.0.0 // indirect + github.com/form3tech-oss/jwt-go v3.2.3+incompatible // indirect + github.com/fsnotify/fsnotify v1.4.9 // indirect + github.com/getsentry/sentry-go v0.12.0 // indirect + github.com/go-logr/logr v1.3.0 // indirect + github.com/go-logr/stdr v1.2.2 // indirect + github.com/go-ole/go-ole v1.2.6 // indirect + github.com/godbus/dbus/v5 v5.0.4 // indirect + github.com/gogo/googleapis v1.4.1 // indirect + github.com/gogo/protobuf v1.3.2 // indirect + github.com/gogo/status v1.1.0 // indirect + github.com/golang/protobuf v1.5.4 // indirect + github.com/google/btree v1.1.2 // indirect + github.com/gorilla/websocket v1.4.2 // indirect + github.com/grpc-ecosystem/go-grpc-middleware v1.3.0 // indirect + github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0 // indirect + github.com/grpc-ecosystem/grpc-gateway v1.16.0 // indirect + github.com/grpc-ecosystem/grpc-gateway/v2 v2.7.0 // indirect + github.com/hashicorp/hcl v1.0.0 // indirect + github.com/jonboulle/clockwork v0.2.2 // indirect + github.com/json-iterator/go v1.1.12 // indirect + github.com/kr/pretty v0.3.1 // indirect + github.com/kr/text v0.2.0 // indirect + github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0 // indirect + github.com/magiconair/properties v1.8.5 // indirect + github.com/matttproud/golang_protobuf_extensions v1.0.4 // indirect + github.com/milvus-io/milvus-proto/go-api/v2 v2.3.4-0.20240430035521-259ae1d10016 // indirect + github.com/mitchellh/mapstructure v1.4.1 // indirect + github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect + github.com/modern-go/reflect2 v1.0.2 // indirect + github.com/opencontainers/runtime-spec v1.0.2 // indirect + github.com/panjf2000/ants/v2 v2.7.2 // indirect + github.com/pelletier/go-toml v1.9.3 // indirect + github.com/pkg/errors v0.9.1 // indirect + github.com/pmezard/go-difflib v1.0.0 // indirect + github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c // indirect + github.com/prometheus/client_golang v1.14.0 // indirect + github.com/prometheus/client_model v0.3.0 // indirect + github.com/prometheus/common v0.42.0 // indirect + github.com/prometheus/procfs v0.9.0 // indirect + github.com/rogpeppe/go-internal v1.10.0 // indirect + github.com/samber/lo v1.27.0 // indirect + github.com/shirou/gopsutil/v3 v3.22.9 // indirect + github.com/sirupsen/logrus v1.9.0 // indirect + github.com/soheilhy/cmux v0.1.5 // indirect + github.com/spaolacci/murmur3 v1.1.0 // indirect + github.com/spf13/afero v1.6.0 // indirect + github.com/spf13/cast v1.3.1 // indirect + github.com/spf13/jwalterweatherman v1.1.0 // indirect + github.com/spf13/pflag v1.0.5 // indirect + github.com/spf13/viper v1.8.1 // indirect + github.com/subosito/gotenv v1.2.0 // indirect + github.com/tidwall/gjson v1.17.1 // indirect + github.com/tidwall/match v1.1.1 // indirect + github.com/tidwall/pretty v1.2.0 // indirect + github.com/tklauser/go-sysconf v0.3.10 // indirect + github.com/tklauser/numcpus v0.4.0 // indirect + github.com/tmc/grpc-websocket-proxy v0.0.0-20201229170055-e5319fda7802 // indirect + github.com/uber/jaeger-client-go v2.30.0+incompatible // indirect + github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2 // indirect + github.com/yusufpapurcu/wmi v1.2.2 // indirect + go.etcd.io/bbolt v1.3.6 // indirect + go.etcd.io/etcd/api/v3 v3.5.5 // indirect + go.etcd.io/etcd/client/pkg/v3 v3.5.5 // indirect + go.etcd.io/etcd/client/v2 v2.305.5 // indirect + go.etcd.io/etcd/client/v3 v3.5.5 // indirect + go.etcd.io/etcd/pkg/v3 v3.5.5 // indirect + go.etcd.io/etcd/raft/v3 v3.5.5 // indirect + go.etcd.io/etcd/server/v3 v3.5.5 // indirect + go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.38.0 // indirect + go.opentelemetry.io/otel v1.13.0 // indirect + go.opentelemetry.io/otel/exporters/otlp/internal/retry v1.13.0 // indirect + go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.13.0 // indirect + go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.13.0 // indirect + go.opentelemetry.io/otel/metric v0.35.0 // indirect + go.opentelemetry.io/otel/sdk v1.13.0 // indirect + go.opentelemetry.io/otel/trace v1.13.0 // indirect + go.opentelemetry.io/proto/otlp v0.19.0 // indirect + go.uber.org/atomic v1.10.0 // indirect + go.uber.org/automaxprocs v1.5.2 // indirect + go.uber.org/multierr v1.10.0 // indirect + golang.org/x/crypto v0.22.0 // indirect + golang.org/x/exp v0.0.0-20230224173230-c95f2b4c22f2 // indirect + golang.org/x/net v0.24.0 // indirect + golang.org/x/sync v0.6.0 // indirect + golang.org/x/sys v0.20.0 // indirect + golang.org/x/text v0.15.0 // indirect + golang.org/x/time v0.3.0 // indirect + google.golang.org/genproto v0.0.0-20230526161137-0005af68ea54 // indirect + google.golang.org/genproto/googleapis/api v0.0.0-20240318140521-94a12d6c2237 // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20240318140521-94a12d6c2237 // indirect + google.golang.org/protobuf v1.33.0 // indirect + gopkg.in/inf.v0 v0.9.1 // indirect + gopkg.in/ini.v1 v1.62.0 // indirect + gopkg.in/natefinch/lumberjack.v2 v2.0.0 // indirect + gopkg.in/yaml.v2 v2.4.0 // indirect + gopkg.in/yaml.v3 v3.0.1 // indirect + k8s.io/apimachinery v0.28.6 // indirect + sigs.k8s.io/yaml v1.3.0 // indirect +) diff --git a/tests/go_client/go.sum b/tests/go_client/go.sum new file mode 100644 index 0000000000000..b461c4e1c3ee7 --- /dev/null +++ b/tests/go_client/go.sum @@ -0,0 +1,1113 @@ +cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= +cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= +cloud.google.com/go v0.38.0/go.mod h1:990N+gfupTy94rShfmMCWGDn0LpTmnzTp2qbd1dvSRU= +cloud.google.com/go v0.44.1/go.mod h1:iSa0KzasP4Uvy3f1mN/7PiObzGgflwredwwASm/v6AU= +cloud.google.com/go v0.44.2/go.mod h1:60680Gw3Yr4ikxnPRS/oxxkBccT6SA1yMk63TGekxKY= +cloud.google.com/go v0.45.1/go.mod h1:RpBamKRgapWJb87xiFSdk4g1CME7QZg3uwTez+TSTjc= +cloud.google.com/go v0.46.3/go.mod h1:a6bKKbmY7er1mI7TEI4lsAkts/mkhTSZK8w33B4RAg0= +cloud.google.com/go v0.50.0/go.mod h1:r9sluTvynVuxRIOHXQEHMFffphuXHOMZMycpNR5e6To= +cloud.google.com/go v0.52.0/go.mod h1:pXajvRH/6o3+F9jDHZWQ5PbGhn+o8w9qiu/CffaVdO4= +cloud.google.com/go v0.53.0/go.mod h1:fp/UouUEsRkN6ryDKNW/Upv/JBKnv6WDthjR6+vze6M= +cloud.google.com/go v0.54.0/go.mod h1:1rq2OEkV3YMf6n/9ZvGWI3GWw0VoqH/1x2nd8Is/bPc= +cloud.google.com/go v0.56.0/go.mod h1:jr7tqZxxKOVYizybht9+26Z/gUq7tiRzu+ACVAMbKVk= +cloud.google.com/go v0.57.0/go.mod h1:oXiQ6Rzq3RAkkY7N6t3TcE6jE+CIBBbA36lwQ1JyzZs= +cloud.google.com/go v0.62.0/go.mod h1:jmCYTdRCQuc1PHIIJ/maLInMho30T/Y0M4hTdTShOYc= +cloud.google.com/go v0.65.0/go.mod h1:O5N8zS7uWy9vkA9vayVHs65eM1ubvY4h553ofrNHObY= +cloud.google.com/go v0.72.0/go.mod h1:M+5Vjvlc2wnp6tjzE102Dw08nGShTscUx2nZMufOKPI= +cloud.google.com/go v0.74.0/go.mod h1:VV1xSbzvo+9QJOxLDaJfTjx5e+MePCpCWwvftOeQmWk= +cloud.google.com/go v0.78.0/go.mod h1:QjdrLG0uq+YwhjoVOLsS1t7TW8fs36kLs4XO5R5ECHg= +cloud.google.com/go v0.79.0/go.mod h1:3bzgcEeQlzbuEAYu4mrWhKqWjmpprinYgKJLgKHnbb8= +cloud.google.com/go v0.81.0/go.mod h1:mk/AM35KwGk/Nm2YSeZbxXdrNK3KZOYHmLkOqC2V6E0= +cloud.google.com/go v0.110.0 h1:Zc8gqp3+a9/Eyph2KDmcGaPtbKRIoqq4YTlL4NMD0Ys= +cloud.google.com/go/bigquery v1.0.1/go.mod h1:i/xbL2UlR5RvWAURpBYZTtm/cXjCha9lbfbpx4poX+o= +cloud.google.com/go/bigquery v1.3.0/go.mod h1:PjpwJnslEMmckchkHFfq+HTD2DmtT67aNFKH1/VBDHE= +cloud.google.com/go/bigquery v1.4.0/go.mod h1:S8dzgnTigyfTmLBfrtrhyYhwRxG72rYxvftPBK2Dvzc= +cloud.google.com/go/bigquery v1.5.0/go.mod h1:snEHRnqQbz117VIFhE8bmtwIDY80NLUZUMb4Nv6dBIg= +cloud.google.com/go/bigquery v1.7.0/go.mod h1://okPTzCYNXSlb24MZs83e2Do+h+VXtc4gLoIoXIAPc= +cloud.google.com/go/bigquery v1.8.0/go.mod h1:J5hqkt3O0uAFnINi6JXValWIb1v0goeZM77hZzJN/fQ= +cloud.google.com/go/compute v1.25.1 h1:ZRpHJedLtTpKgr3RV1Fx23NuaAEN1Zfx9hw1u4aJdjU= +cloud.google.com/go/compute/metadata v0.2.3 h1:mg4jlk7mCAj6xXp9UJ4fjI9VUI5rubuGBW5aJ7UnBMY= +cloud.google.com/go/datastore v1.0.0/go.mod h1:LXYbyblFSglQ5pkeyhO+Qmw7ukd3C+pD7TKLgZqpHYE= +cloud.google.com/go/datastore v1.1.0/go.mod h1:umbIZjpQpHh4hmRpGhH4tLFup+FVzqBi1b3c64qFpCk= +cloud.google.com/go/firestore v1.1.0/go.mod h1:ulACoGHTpvq5r8rxGJ4ddJZBZqakUQqClKRT5SZwBmk= +cloud.google.com/go/pubsub v1.0.1/go.mod h1:R0Gpsv3s54REJCy4fxDixWD93lHJMoZTyQ2kNxGRt3I= +cloud.google.com/go/pubsub v1.1.0/go.mod h1:EwwdRX2sKPjnvnqCa270oGRyludottCI76h+R3AArQw= +cloud.google.com/go/pubsub v1.2.0/go.mod h1:jhfEVHT8odbXTkndysNHCcx0awwzvfOlguIAii9o8iA= +cloud.google.com/go/pubsub v1.3.1/go.mod h1:i+ucay31+CNRpDW4Lu78I4xXG+O1r/MAHgjpRVR+TSU= +cloud.google.com/go/storage v1.0.0/go.mod h1:IhtSnM/ZTZV8YYJWCY8RULGVqBDmpoyjwiyrjsg+URw= +cloud.google.com/go/storage v1.5.0/go.mod h1:tpKbwo567HUNpVclU5sGELwQWBDZ8gh0ZeosJ0Rtdos= +cloud.google.com/go/storage v1.6.0/go.mod h1:N7U0C8pVQ/+NIKOBQyamJIeKQKkZ+mxpohlUTyfDhBk= +cloud.google.com/go/storage v1.8.0/go.mod h1:Wv1Oy7z6Yz3DshWRJFhqM/UCfaWIRTdp0RXyy7KQOVs= +cloud.google.com/go/storage v1.10.0/go.mod h1:FLPqc6j+Ki4BU591ie1oL6qBQGu2Bl/tZ9ullr3+Kg0= +dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU= +github.com/AndreasBriese/bbloom v0.0.0-20190306092124-e2d15f34fcf9/go.mod h1:bOvUY6CB00SOBii9/FifXqc0awNKxLFCL/+pkDPuyl8= +github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= +github.com/BurntSushi/toml v1.2.1 h1:9F2/+DoOYIOksmaJFPw1tGFy1eDnIJXg+UHjuD8lTak= +github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo= +github.com/CloudyKit/fastprinter v0.0.0-20200109182630-33d98a066a53/go.mod h1:+3IMCy2vIlbG1XG/0ggNQv0SvxCAIpPM5b1nCz56Xno= +github.com/CloudyKit/jet/v3 v3.0.0/go.mod h1:HKQPgSJmdK8hdoAbKUUWajkHyHo4RaU5rMdUywE7VMo= +github.com/Joker/hpp v1.0.0/go.mod h1:8x5n+M1Hp5hC0g8okX3sR3vFQwynaX/UgSOM9MeBKzY= +github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAElWljhcU= +github.com/Shopify/goreferrer v0.0.0-20181106222321-ec9c9a553398/go.mod h1:a1uqRtAwp2Xwc6WNPJEufxJ7fx3npB4UV/JOLmbu5I0= +github.com/ajg/form v1.5.1/go.mod h1:uL1WgH+h2mgNtvBq0339dVnzXdBETtL2LeUXaIv25UY= +github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= +github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= +github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= +github.com/alecthomas/units v0.0.0-20190717042225-c3de453c63f4/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= +github.com/alecthomas/units v0.0.0-20190924025748-f65c72e2690d/go.mod h1:rBZYJk541a8SKzHPHnH3zbiI+7dagKZ0cgpgrD7Fyho= +github.com/antihax/optional v1.0.0/go.mod h1:uupD/76wgC+ih3iEmQUL+0Ugr19nfwCT1kdvxnR2qWY= +github.com/armon/circbuf v0.0.0-20150827004946-bbbad097214e/go.mod h1:3U/XgcO3hCbHZ8TKRvWD2dDTCfh9M9ya+I9JpbB7O8o= +github.com/armon/consul-api v0.0.0-20180202201655-eb2c6b5be1b6/go.mod h1:grANhF5doyWs3UAsr3K4I6qtAmlQcZDesFNEHPZAzj8= +github.com/armon/go-metrics v0.0.0-20180917152333-f0300d1749da/go.mod h1:Q73ZrmVTwzkszR9V5SSuryQ31EELlFMUz1kKyl939pY= +github.com/armon/go-radix v0.0.0-20180808171621-7fddfc383310/go.mod h1:ufUuZ+zHj4x4TnLV4JWEpy2hxWSpsRywHrMgIH9cCH8= +github.com/aymerick/raymond v2.0.3-0.20180322193309-b565731e1464+incompatible/go.mod h1:osfaiScAUVup+UC9Nfq76eWqDhXlp+4UYaA8uhTBO6g= +github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q= +github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8= +github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= +github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= +github.com/bgentry/speakeasy v0.1.0/go.mod h1:+zsyZBPWlz7T6j88CTgSN5bM796AkVf0kBD4zp0CCIs= +github.com/bketelsen/crypt v0.0.3-0.20200106085610-5cbc8cc4026c/go.mod h1:MKsuJmJgSg28kpZDP6UIiPt0e0Oz0kqKNGyRaWEPv84= +github.com/bketelsen/crypt v0.0.4/go.mod h1:aI6NrJ0pMGgvZKL1iVgXLnfIFJtfV+bKCoqOes/6LfM= +github.com/blang/semver/v4 v4.0.0 h1:1PFHFE6yCCTv8C1TeyNNarDzntLi7wMI5i/pzqYIsAM= +github.com/blang/semver/v4 v4.0.0/go.mod h1:IbckMUScFkM3pff0VJDNKRiT6TG/YpiHIM2yvyW5YoQ= +github.com/cenkalti/backoff/v4 v4.1.1/go.mod h1:scbssz8iZGpm3xbr14ovlUdkxfGXNInqkPWOWmG2CLw= +github.com/cenkalti/backoff/v4 v4.2.0 h1:HN5dHm3WBOgndBH6E8V0q2jIYIR3s9yglV8k/+MN3u4= +github.com/cenkalti/backoff/v4 v4.2.0/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE= +github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= +github.com/certifi/gocertifi v0.0.0-20191021191039-0944d244cd40/go.mod h1:sGbDF6GwGcLpkNXPUTkMRoywsNa/ol15pxFe6ERfguA= +github.com/certifi/gocertifi v0.0.0-20200922220541-2c3bb06c6054/go.mod h1:sGbDF6GwGcLpkNXPUTkMRoywsNa/ol15pxFe6ERfguA= +github.com/cespare/xxhash v1.1.0/go.mod h1:XrSqR1VqqWfGrhpAt58auRo0WTKS1nRRg3ghfAqPWnc= +github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= +github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44= +github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= +github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI= +github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI= +github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU= +github.com/cilium/ebpf v0.11.0 h1:V8gS/bTCCjX9uUnkUFUpPsksM8n1lXBAvHcpiFk1X2Y= +github.com/cilium/ebpf v0.11.0/go.mod h1:WE7CZAnqOL2RouJ4f1uyNhqr2P4CCvXFIqdRDUgWsVs= +github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= +github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc= +github.com/cncf/udpa/go v0.0.0-20200629203442-efcf912fb354/go.mod h1:WmhPx2Nbnhtbo57+VJT5O0JRkEi1Wbu0z5j0R8u5Hbk= +github.com/cncf/udpa/go v0.0.0-20201120205902-5459f2c99403/go.mod h1:WmhPx2Nbnhtbo57+VJT5O0JRkEi1Wbu0z5j0R8u5Hbk= +github.com/cncf/udpa/go v0.0.0-20210930031921-04548b0d99d4/go.mod h1:6pvJx4me5XPnfI9Z40ddWsdw2W/uZgQLFXToKeRcDiI= +github.com/cncf/xds/go v0.0.0-20210312221358-fbca930ec8ed/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= +github.com/cncf/xds/go v0.0.0-20210805033703-aa0b78936158/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= +github.com/cncf/xds/go v0.0.0-20210922020428-25de7278fc84/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= +github.com/cncf/xds/go v0.0.0-20211011173535-cb28da3451f1/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= +github.com/cncf/xds/go v0.0.0-20240318125728-8a4994d93e50 h1:DBmgJDC9dTfkVyGgipamEh2BpGYxScCH1TOF1LL1cXc= +github.com/cockroachdb/datadriven v0.0.0-20200714090401-bf6692d28da5/go.mod h1:h6jFvWxBdQXxjopDMZyH2UVceIRfR84bdzbkoKrsWNo= +github.com/cockroachdb/datadriven v1.0.2 h1:H9MtNqVoVhvd9nCBwOyDjUEdZCREqbIdCJD93PBm/jA= +github.com/cockroachdb/datadriven v1.0.2/go.mod h1:a9RdTaap04u637JoCzcUoIcDmvwSUtcUFtT/C3kJlTU= +github.com/cockroachdb/errors v1.2.4/go.mod h1:rQD95gz6FARkaKkQXUksEje/d9a6wBJoCr5oaCLELYA= +github.com/cockroachdb/errors v1.9.1 h1:yFVvsI0VxmRShfawbt/laCIDy/mtTqqnvoNgiy5bEV8= +github.com/cockroachdb/errors v1.9.1/go.mod h1:2sxOtL2WIc096WSZqZ5h8fa17rdDq9HZOZLBCor4mBk= +github.com/cockroachdb/logtags v0.0.0-20190617123548-eb05cc24525f/go.mod h1:i/u985jwjWRlyHXQbwatDASoW0RMlZ/3i9yJHE2xLkI= +github.com/cockroachdb/logtags v0.0.0-20211118104740-dabe8e521a4f h1:6jduT9Hfc0njg5jJ1DdKCFPdMBrp/mdZfCpa5h+WM74= +github.com/cockroachdb/logtags v0.0.0-20211118104740-dabe8e521a4f/go.mod h1:Vz9DsVWQQhf3vs21MhPMZpMGSht7O/2vFW2xusFUVOs= +github.com/cockroachdb/redact v1.1.3 h1:AKZds10rFSIj7qADf0g46UixK8NNLwWTNdCIGS5wfSQ= +github.com/cockroachdb/redact v1.1.3/go.mod h1:BVNblN9mBWFyMyqK1k3AAiSxhvhfK2oOZZ2lK+dpvRg= +github.com/codegangsta/inject v0.0.0-20150114235600-33e0aa1cb7c0/go.mod h1:4Zcjuz89kmFXt9morQgcfYZAYZ5n8WHjt81YYWIwtTM= +github.com/containerd/cgroups/v3 v3.0.3 h1:S5ByHZ/h9PMe5IOQoN7E+nMc2UcLEM/V48DGDJ9kip0= +github.com/containerd/cgroups/v3 v3.0.3/go.mod h1:8HBe7V3aWGLFPd/k03swSIsGjZhHI2WzJmticMgVuz0= +github.com/coreos/bbolt v1.3.2/go.mod h1:iRUV2dpdMOn7Bo10OQBFzIJO9kkE559Wcmn+qkEiiKk= +github.com/coreos/etcd v3.3.10+incompatible/go.mod h1:uF7uidLiAD3TWHmW31ZFd/JWoc32PjwdhPthX9715RE= +github.com/coreos/etcd v3.3.13+incompatible/go.mod h1:uF7uidLiAD3TWHmW31ZFd/JWoc32PjwdhPthX9715RE= +github.com/coreos/go-etcd v2.0.0+incompatible/go.mod h1:Jez6KQU2B/sWsbdaef3ED8NzMklzPG4d5KIOhIy30Tk= +github.com/coreos/go-semver v0.2.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3EedlOD2RNk= +github.com/coreos/go-semver v0.3.0 h1:wkHLiw0WNATZnSG7epLsujiMCgPAc9xhjJ4tgnAxmfM= +github.com/coreos/go-semver v0.3.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3EedlOD2RNk= +github.com/coreos/go-systemd v0.0.0-20190321100706-95778dfbb74e/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4= +github.com/coreos/go-systemd/v22 v22.3.2 h1:D9/bQk5vlXQFZ6Kwuu6zaiXJ9oTPe68++AzAJc1DzSI= +github.com/coreos/go-systemd/v22 v22.3.2/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc= +github.com/coreos/pkg v0.0.0-20180928190104-399ea9e2e55f/go.mod h1:E3G3o1h8I7cfcXa63jLwjI0eiQQMgzzUDFVpN/nH/eA= +github.com/cpuguy83/go-md2man v1.0.10/go.mod h1:SmD6nW6nTyfqj6ABTjUi3V3JVMnlJmwcJI5acqYI6dE= +github.com/cpuguy83/go-md2man/v2 v2.0.0/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU= +github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= +github.com/creack/pty v1.1.11/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/dgraph-io/badger v1.6.0/go.mod h1:zwt7syl517jmP8s94KqSxTlM6IMsdhYy6psNgSztDR4= +github.com/dgrijalva/jwt-go v3.2.0+incompatible/go.mod h1:E3ru+11k8xSBh+hMPgOLZmtrrCbhqsmaPHjLKYnJCaQ= +github.com/dgryski/go-farm v0.0.0-20190423205320-6a90982ecee2/go.mod h1:SqUrOPUnsFjfmXRMNPybcSiG0BgUW2AuFH8PAnS2iTw= +github.com/dgryski/go-sip13 v0.0.0-20181026042036-e10d5fee7954/go.mod h1:vAd38F8PWV+bWy6jNmig1y/TA+kYO4g3RSRF0IAv0no= +github.com/docker/go-units v0.4.0 h1:3uh0PgVws3nIA0Q+MwDC8yjEPf9zjRfZZWXZYDct3Tw= +github.com/docker/go-units v0.4.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk= +github.com/dustin/go-humanize v1.0.0 h1:VSnTsYCnlFHaM2/igO1h6X3HA71jcobQuxemgkq4zYo= +github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk= +github.com/eknkc/amber v0.0.0-20171010120322-cdade1c07385/go.mod h1:0vRUJqYpeSZifjYj7uP3BG/gKcuzL9xWVV/Y+cK33KM= +github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= +github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= +github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98= +github.com/envoyproxy/go-control-plane v0.9.7/go.mod h1:cwu0lG7PUMfa9snN8LXBig5ynNVH9qI8YYLbd1fK2po= +github.com/envoyproxy/go-control-plane v0.9.9-0.20201210154907-fd9021fe5dad/go.mod h1:cXg6YxExXjJnVBQHBLXeUAgxn2UodCpnH306RInaBQk= +github.com/envoyproxy/go-control-plane v0.9.9-0.20210217033140-668b12f5399d/go.mod h1:cXg6YxExXjJnVBQHBLXeUAgxn2UodCpnH306RInaBQk= +github.com/envoyproxy/go-control-plane v0.9.9-0.20210512163311-63b5d3c536b0/go.mod h1:hliV/p42l8fGbc6Y9bQ70uLwIvmJyVE5k4iMKlh8wCQ= +github.com/envoyproxy/go-control-plane v0.9.10-0.20210907150352-cf90f659a021/go.mod h1:AFq3mo9L8Lqqiid3OhADV3RfLJnjiw63cSpi+fDTRC0= +github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= +github.com/envoyproxy/protoc-gen-validate v1.0.4 h1:gVPz/FMfvh57HdSJQyvBtF00j8JU4zdyUgIUNhlgg0A= +github.com/etcd-io/bbolt v1.3.3/go.mod h1:ZF2nL25h33cCyBtcyWeZ2/I3HQOfTP+0PIEvHjkjCrw= +github.com/fasthttp-contrib/websocket v0.0.0-20160511215533-1f3b11f56072/go.mod h1:duJ4Jxv5lDcvg4QuQr0oowTf7dz4/CR8NtyCooz9HL8= +github.com/fatih/color v1.7.0/go.mod h1:Zm6kSWBoL9eyXnKyktHP6abPY2pDugNf5KwzbycvMj4= +github.com/fatih/structs v1.1.0/go.mod h1:9NiDSp5zOcgEDl+j00MP/WkGVPOlPRLejGD8Ga6PJ7M= +github.com/form3tech-oss/jwt-go v3.2.3+incompatible h1:7ZaBxOI7TMoYBfyA3cQHErNNyAWIKUMIwqxEtgHOs5c= +github.com/form3tech-oss/jwt-go v3.2.3+incompatible/go.mod h1:pbq4aXjuKjdthFRnoDwaVPLA+WlJuPGy+QneDUgJi2k= +github.com/frankban/quicktest v1.14.5 h1:dfYrrRyLtiqT9GyKXgdh+k4inNeTvmGbuSgZ3lx3GhA= +github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= +github.com/fsnotify/fsnotify v1.4.9 h1:hsms1Qyu0jgnwNXIxa+/V/PDsU6CfLf6CNO8H7IWoS4= +github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ= +github.com/gavv/httpexpect v2.0.0+incompatible/go.mod h1:x+9tiU1YnrOvnB725RkpoLv1M62hOWzwo5OXotisrKc= +github.com/getsentry/raven-go v0.2.0/go.mod h1:KungGk8q33+aIAZUIVWZDr2OfAEBsO49PX4NzFV5kcQ= +github.com/getsentry/sentry-go v0.12.0 h1:era7g0re5iY13bHSdN/xMkyV+5zZppjRVQhZrXCaEIk= +github.com/getsentry/sentry-go v0.12.0/go.mod h1:NSap0JBYWzHND8oMbyi0+XZhUalc1TBdRL1M71JZW2c= +github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04= +github.com/gin-contrib/sse v0.0.0-20190301062529-5545eab6dad3/go.mod h1:VJ0WA2NBN22VlZ2dKZQPAPnyWw5XTlK1KymzLKsr59s= +github.com/gin-gonic/gin v1.4.0/go.mod h1:OW2EZn3DO8Ln9oIKOvM++LBO+5UPHJJDH72/q/3rZdM= +github.com/go-check/check v0.0.0-20180628173108-788fd7840127/go.mod h1:9ES+weclKsC9YodN5RgxqK/VD9HM9JsCSh7rNhMZE98= +github.com/go-errors/errors v1.0.1 h1:LUHzmkK3GUKUrL/1gfBUxAHzcev3apQlezX/+O7ma6w= +github.com/go-errors/errors v1.0.1/go.mod h1:f4zRHt4oKfwPJE5k8C9vpYG+aDHdBFUsgrm6/TyX73Q= +github.com/go-gl/glfw v0.0.0-20190409004039-e6da0acd62b1/go.mod h1:vR7hzQXu2zJy9AVAgeJqvqgH9Q5CA+iKCZ2gyEVpxRU= +github.com/go-gl/glfw/v3.3/glfw v0.0.0-20191125211704-12ad95a8df72/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8= +github.com/go-gl/glfw/v3.3/glfw v0.0.0-20200222043503-6f7a984d4dc4/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8= +github.com/go-kit/kit v0.8.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as= +github.com/go-kit/kit v0.9.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as= +github.com/go-kit/log v0.1.0/go.mod h1:zbhenjAZHb184qTLMA9ZjW7ThYL0H2mk7Q6pNt4vbaY= +github.com/go-logfmt/logfmt v0.3.0/go.mod h1:Qt1PoO58o5twSAckw1HlFXLmHsOX5/0LbT9GBnD5lWE= +github.com/go-logfmt/logfmt v0.4.0/go.mod h1:3RMwSq7FuexP4Kalkev3ejPJsZTpXXBr9+V4qmtdjCk= +github.com/go-logfmt/logfmt v0.5.0/go.mod h1:wCYkCAKZfumFQihp8CzCvQ3paCTfi41vtzG1KdI/P7A= +github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= +github.com/go-logr/logr v1.3.0 h1:2y3SDp0ZXuc6/cjLSZ+Q3ir+QB9T/iG5yYRXqsagWSY= +github.com/go-logr/logr v1.3.0/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= +github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= +github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= +github.com/go-martini/martini v0.0.0-20170121215854-22fa46961aab/go.mod h1:/P9AEU963A2AYjv4d1V5eVL1CQbEJq6aCNHDDjibzu8= +github.com/go-ole/go-ole v1.2.6 h1:/Fpf6oFPoeFik9ty7siob0G6Ke8QvQEuVcuChpwXzpY= +github.com/go-ole/go-ole v1.2.6/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0= +github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY= +github.com/gobwas/httphead v0.0.0-20180130184737-2c6c146eadee/go.mod h1:L0fX3K22YWvt/FAX9NnzrNzcI4wNYi9Yku4O0LKYflo= +github.com/gobwas/pool v0.2.0/go.mod h1:q8bcK0KcYlCgd9e7WYLm9LpyS+YeLd8JVDW6WezmKEw= +github.com/gobwas/ws v1.0.2/go.mod h1:szmBTxLgaFppYjEmNtny/v3w89xOydFnnZMcgRRu/EM= +github.com/godbus/dbus/v5 v5.0.4 h1:9349emZab16e7zQvpmsbtjc18ykshndd8y2PG3sgJbA= +github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= +github.com/gogo/googleapis v0.0.0-20180223154316-0cd9801be74a/go.mod h1:gf4bu3Q80BeJ6H1S1vYPm8/ELATdvryBaNFGgqEef3s= +github.com/gogo/googleapis v1.4.1 h1:1Yx4Myt7BxzvUr5ldGSbwYiZG6t9wGBZ+8/fX3Wvtq0= +github.com/gogo/googleapis v1.4.1/go.mod h1:2lpHqI5OcWCtVElxXnPt+s8oJvMpySlOyM6xDCrzib4= +github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ= +github.com/gogo/protobuf v1.2.0/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ= +github.com/gogo/protobuf v1.2.1/go.mod h1:hp+jE20tsWTFYpLwKvXlhS1hjn+gTNwPg2I6zVXpSg4= +github.com/gogo/protobuf v1.3.1/go.mod h1:SlYgWuQ5SjCEi6WLHjHCa1yvBfUnHcTbrrZtXPKa29o= +github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= +github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= +github.com/gogo/status v1.1.0 h1:+eIkrewn5q6b30y+g/BJINVVdi2xH7je5MPJ3ZPK3JA= +github.com/gogo/status v1.1.0/go.mod h1:BFv9nrluPLmrS0EmGVvLaPNmRosr9KapBYd5/hpY1WM= +github.com/golang-jwt/jwt v3.2.2+incompatible/go.mod h1:8pz2t5EyA70fFQQSrl6XZXzqecmYZeUEB8OUGHkxJ+I= +github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= +github.com/golang/glog v1.0.0/go.mod h1:EWib/APOK0SL3dFbYqvxE3UYd8E6s1ouQ7iEp/0LWV4= +github.com/golang/glog v1.2.0 h1:uCdmnmatrKCgMBlM4rMuJZWOkPDqdbZPnrMXDY4gI68= +github.com/golang/groupcache v0.0.0-20190129154638-5b532d6fd5ef/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= +github.com/golang/groupcache v0.0.0-20190702054246-869f871628b6/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= +github.com/golang/groupcache v0.0.0-20191227052852-215e87163ea7/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= +github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= +github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= +github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= +github.com/golang/mock v1.2.0/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= +github.com/golang/mock v1.3.1/go.mod h1:sBzyDLLjw3U8JLTeZvSv8jJB+tU5PVekmnlKIyFUx0Y= +github.com/golang/mock v1.4.0/go.mod h1:UOMv5ysSaYNkG+OFQykRIcU/QvvxJf3p21QfJ2Bt3cw= +github.com/golang/mock v1.4.1/go.mod h1:UOMv5ysSaYNkG+OFQykRIcU/QvvxJf3p21QfJ2Bt3cw= +github.com/golang/mock v1.4.3/go.mod h1:UOMv5ysSaYNkG+OFQykRIcU/QvvxJf3p21QfJ2Bt3cw= +github.com/golang/mock v1.4.4/go.mod h1:l3mdAwkq5BuhzHwde/uurv3sEJeZMXNpwsxVWU71h+4= +github.com/golang/mock v1.5.0/go.mod h1:CWnOUgYIOo4TcNZ0wHX3YZCqsaM1I1Jvs6v3mP3KVu8= +github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= +github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= +github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= +github.com/golang/protobuf v1.3.3/go.mod h1:vzj43D7+SQXF/4pzW/hwtAqwc6iTitCiVSaWz5lYuqw= +github.com/golang/protobuf v1.3.4/go.mod h1:vzj43D7+SQXF/4pzW/hwtAqwc6iTitCiVSaWz5lYuqw= +github.com/golang/protobuf v1.3.5/go.mod h1:6O5/vntMXwX2lRkT1hjjk0nAC1IDOTvTlVgjlRvqsdk= +github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8= +github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA= +github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs= +github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w= +github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0= +github.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QDs8UjoX8= +github.com/golang/protobuf v1.4.2/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= +github.com/golang/protobuf v1.4.3/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= +github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= +github.com/golang/protobuf v1.5.1/go.mod h1:DopwsBzvsk0Fs44TXzsVbJyPhcCPeIwnvohx4u74HPM= +github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= +github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= +github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= +github.com/gomodule/redigo v1.7.1-0.20190724094224-574c33c3df38/go.mod h1:B4C85qUVwatsJoIUNIfCRsp7qO0iAmpGFZ4EELWSbC4= +github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= +github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= +github.com/google/btree v1.0.1/go.mod h1:xXMiIv4Fb/0kKde4SpL7qlzvu5cMJDRkFDxJfI9uaxA= +github.com/google/btree v1.1.2 h1:xf4v41cLI2Z6FxbKm+8Bu+m8ifhj15JuZ9sa0jZCMUU= +github.com/google/btree v1.1.2/go.mod h1:qOPhT0dTNdNzV6Z/lhRX0YXUafgPLFUh+gZMl761Gm4= +github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= +github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= +github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= +github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.4.1/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.1/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.3/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= +github.com/google/go-querystring v1.0.0/go.mod h1:odCYkC5MyYFN7vkCjXpyrEuKhc/BUO6wN/zVPAxq5ck= +github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= +github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0= +github.com/google/martian v2.1.0+incompatible/go.mod h1:9I4somxYTbIHy5NJKHRl3wXiIaQGbYVAs8BPL6v8lEs= +github.com/google/martian/v3 v3.0.0/go.mod h1:y5Zk1BBys9G+gd6Jrk0W3cC1+ELVxBWuIGO+w/tUAp0= +github.com/google/martian/v3 v3.1.0/go.mod h1:y5Zk1BBys9G+gd6Jrk0W3cC1+ELVxBWuIGO+w/tUAp0= +github.com/google/pprof v0.0.0-20181206194817-3ea8567a2e57/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc= +github.com/google/pprof v0.0.0-20190515194954-54271f7e092f/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc= +github.com/google/pprof v0.0.0-20191218002539-d4f498aebedc/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM= +github.com/google/pprof v0.0.0-20200212024743-f11f1df84d12/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM= +github.com/google/pprof v0.0.0-20200229191704-1ebb73c60ed3/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM= +github.com/google/pprof v0.0.0-20200430221834-fc25d7d30c6d/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM= +github.com/google/pprof v0.0.0-20200708004538-1a94d8640e99/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM= +github.com/google/pprof v0.0.0-20201023163331-3e6fc7fc9c4c/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= +github.com/google/pprof v0.0.0-20201203190320-1bf35d6f28c2/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= +github.com/google/pprof v0.0.0-20210122040257-d980be63207e/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= +github.com/google/pprof v0.0.0-20210226084205-cbba55b83ad5/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= +github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI= +github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/googleapis/gax-go/v2 v2.0.4/go.mod h1:0Wqv26UfaUD9n4G6kQubkQ+KchISgw+vpHVxEJEs9eg= +github.com/googleapis/gax-go/v2 v2.0.5/go.mod h1:DWXyrwAJ9X0FpwwEdw+IPEYBICEFu5mhpdKc/us6bOk= +github.com/gopherjs/gopherjs v0.0.0-20181017120253-0766667cb4d1/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY= +github.com/gopherjs/gopherjs v0.0.0-20200217142428-fce0ec30dd00 h1:l5lAOZEym3oK3SQ2HBHWsJUfbNBiTXJDeW2QDxw9AQ0= +github.com/gorilla/websocket v1.4.1/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE= +github.com/gorilla/websocket v1.4.2 h1:+/TMaTYc4QFitKJxsQ7Yye35DkWvkdLcvGKqM+x0Ufc= +github.com/gorilla/websocket v1.4.2/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE= +github.com/grpc-ecosystem/go-grpc-middleware v1.0.0/go.mod h1:FiyG127CGDf3tlThmgyCl78X/SZQqEOJBCDaAfeWzPs= +github.com/grpc-ecosystem/go-grpc-middleware v1.3.0 h1:+9834+KizmvFV7pXQGSXQTsaWhq2GjuNUt0aUU0YBYw= +github.com/grpc-ecosystem/go-grpc-middleware v1.3.0/go.mod h1:z0ButlSOZa5vEBq9m2m2hlwIgKw+rp3sdCBRoJY+30Y= +github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0 h1:Ovs26xHkKqVztRpIrF/92BcuyuQ/YW4NSIpoGtfXNho= +github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0/go.mod h1:8NvIoxWQoOIhqOTXgfV/d3M/q6VIi02HzZEHgUlZvzk= +github.com/grpc-ecosystem/grpc-gateway v1.9.0/go.mod h1:vNeuVxBJEsws4ogUvrchl83t/GYV9WGTSLVdBhOQFDY= +github.com/grpc-ecosystem/grpc-gateway v1.16.0 h1:gmcG1KaJ57LophUzW0Hy8NmPhnMZb4M0+kPpLofRdBo= +github.com/grpc-ecosystem/grpc-gateway v1.16.0/go.mod h1:BDjrQk3hbvj6Nolgz8mAMFbcEtjT1g+wF4CSlocrBnw= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.7.0 h1:BZHcxBETFHIdVyhyEfOvn/RdU/QGdLI4y34qQGjGWO0= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.7.0/go.mod h1:hgWBS7lorOAVIJEQMi4ZsPv9hVvWI6+ch50m39Pf2Ks= +github.com/hashicorp/consul/api v1.1.0/go.mod h1:VmuI/Lkw1nC05EYQWNKwWGbkg+FbDBtguAZLlVdkD9Q= +github.com/hashicorp/consul/sdk v0.1.1/go.mod h1:VKf9jXwCTEY1QZP2MOLRhb5i/I/ssyNV1vwHyQBF0x8= +github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4= +github.com/hashicorp/go-cleanhttp v0.5.1/go.mod h1:JpRdi6/HCYpAwUzNwuwqhbovhLtngrth3wmdIIUrZ80= +github.com/hashicorp/go-immutable-radix v1.0.0/go.mod h1:0y9vanUI8NX6FsYoO3zeMjhV/C5i9g4Q3DwcSNZ4P60= +github.com/hashicorp/go-msgpack v0.5.3/go.mod h1:ahLV/dePpqEmjfWmKiqvPkv/twdG7iPBM1vqhUKIvfM= +github.com/hashicorp/go-multierror v1.0.0/go.mod h1:dHtQlpGsu+cZNNAkkCN/P3hoUDHhCYQXV3UM06sGGrk= +github.com/hashicorp/go-rootcerts v1.0.0/go.mod h1:K6zTfqpRlCUIjkwsN4Z+hiSfzSTQa6eBIzfwKfwNnHU= +github.com/hashicorp/go-sockaddr v1.0.0/go.mod h1:7Xibr9yA9JjQq1JpNB2Vw7kxv8xerXegt+ozgdvDeDU= +github.com/hashicorp/go-syslog v1.0.0/go.mod h1:qPfqrKkXGihmCqbJM2mZgkZGvKG1dFdvsLplgctolz4= +github.com/hashicorp/go-uuid v1.0.0/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro= +github.com/hashicorp/go-uuid v1.0.1/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro= +github.com/hashicorp/go-version v1.2.0/go.mod h1:fltr4n8CU8Ke44wwGCBoEymUuxUHl09ZGVZPK5anwXA= +github.com/hashicorp/go.net v0.0.1/go.mod h1:hjKkEWcCURg++eb33jQU7oqQcI9XDCnUzHA0oac0k90= +github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= +github.com/hashicorp/golang-lru v0.5.1/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= +github.com/hashicorp/hcl v1.0.0 h1:0Anlzjpi4vEasTeNFn2mLJgTSwt0+6sfsiTG8qcWGx4= +github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ= +github.com/hashicorp/logutils v1.0.0/go.mod h1:QIAnNjmIWmVIIkWDTG1z5v++HQmx9WQRO+LraFDTW64= +github.com/hashicorp/mdns v1.0.0/go.mod h1:tL+uN++7HEJ6SQLQ2/p+z2pH24WQKWjBPkE0mNTz8vQ= +github.com/hashicorp/memberlist v0.1.3/go.mod h1:ajVTdAv/9Im8oMAAj5G31PhhMCZJV2pPBoIllUwCN7I= +github.com/hashicorp/serf v0.8.2/go.mod h1:6hOLApaqBFA1NXqRQAsxw9QxuDEvNxSQRwA/JwenrHc= +github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU= +github.com/hydrogen18/memlistener v0.0.0-20200120041712-dcc25e7acd91/go.mod h1:qEIFzExnS6016fRpRfxrExeVn2gbClQA99gQhnIcdhE= +github.com/ianlancetaylor/demangle v0.0.0-20181102032728-5e5cf60278f6/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc= +github.com/ianlancetaylor/demangle v0.0.0-20200824232613-28f6c0f3b639/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc= +github.com/imkira/go-interpol v1.1.0/go.mod h1:z0h2/2T3XF8kyEPpRgJ3kmNv+C43p+I/CoI+jC3w2iA= +github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8= +github.com/iris-contrib/blackfriday v2.0.0+incompatible/go.mod h1:UzZ2bDEoaSGPbkg6SAB4att1aAwTmVIx/5gCVqeyUdI= +github.com/iris-contrib/go.uuid v2.0.0+incompatible/go.mod h1:iz2lgM/1UnEf1kP0L/+fafWORmlnuysV2EMP8MW+qe0= +github.com/iris-contrib/jade v1.1.3/go.mod h1:H/geBymxJhShH5kecoiOCSssPX7QWYH7UaeZTSWddIk= +github.com/iris-contrib/pongo2 v0.0.1/go.mod h1:Ssh+00+3GAZqSQb30AvBRNxBx7rf0GqwkjqxNd0u65g= +github.com/iris-contrib/schema v0.0.1/go.mod h1:urYA3uvUNG1TIIjOSCzHr9/LmbQo8LrOcOqfqxa4hXw= +github.com/jonboulle/clockwork v0.1.0/go.mod h1:Ii8DK3G1RaLaWxj9trq07+26W01tbo22gdxWY5EU2bo= +github.com/jonboulle/clockwork v0.2.2 h1:UOGuzwb1PwsrDAObMuhUnj0p5ULPj8V/xJ7Kx9qUBdQ= +github.com/jonboulle/clockwork v0.2.2/go.mod h1:Pkfl5aHPm1nk2H9h0bjmnJD/BcgbGXUBGnn1kMkgxc8= +github.com/jpillora/backoff v1.0.0/go.mod h1:J/6gKK9jxlEcS3zixgDgUAsiuZ7yrSoa/FX5e0EB2j4= +github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU= +github.com/json-iterator/go v1.1.9/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= +github.com/json-iterator/go v1.1.10/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= +github.com/json-iterator/go v1.1.11/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= +github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= +github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= +github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU= +github.com/jstemmer/go-junit-report v0.9.1/go.mod h1:Brl9GWCQeLvo8nXZwPNNblvFj/XSXhF0NWZEnDohbsk= +github.com/jtolds/gls v4.20.0+incompatible h1:xdiiI2gbIgH/gLH7ADydsJ1uDOEzR8yvV7C0MuV77Wo= +github.com/jtolds/gls v4.20.0+incompatible/go.mod h1:QJZ7F/aHp+rZTRtaJ1ow/lLfFfVYBRgL+9YlvaHOwJU= +github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w= +github.com/julienschmidt/httprouter v1.3.0/go.mod h1:JR6WtHb+2LUe8TCKY3cZOxFyyO8IZAc4RVcycCCAKdM= +github.com/k0kubun/colorstring v0.0.0-20150214042306-9440f1994b88/go.mod h1:3w7q1U84EfirKl04SVQ/s7nPm1ZPhiXd34z40TNz36k= +github.com/kataras/golog v0.0.10/go.mod h1:yJ8YKCmyL+nWjERB90Qwn+bdyBZsaQwU3bTVFgkFIp8= +github.com/kataras/iris/v12 v12.1.8/go.mod h1:LMYy4VlP67TQ3Zgriz8RE2h2kMZV2SgMYbq3UhfoFmE= +github.com/kataras/neffos v0.0.14/go.mod h1:8lqADm8PnbeFfL7CLXh1WHw53dG27MC3pgi2R1rmoTE= +github.com/kataras/pio v0.0.2/go.mod h1:hAoW0t9UmXi4R5Oyq5Z4irTbaTsOemSrDGUtaTl7Dro= +github.com/kataras/sitemap v0.0.5/go.mod h1:KY2eugMKiPwsJgx7+U103YZehfvNGOXURubcGyk0Bz8= +github.com/kisielk/errcheck v1.1.0/go.mod h1:EZBBE59ingxPouuu3KfxchcWSUPOHkagtvWXihfKN4Q= +github.com/kisielk/errcheck v1.2.0/go.mod h1:/BMXB+zMLi60iA8Vv6Ksmxu/1UDYcXs4uQLJ+jE2L00= +github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= +github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= +github.com/klauspost/compress v1.8.2/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A= +github.com/klauspost/compress v1.9.7/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A= +github.com/klauspost/cpuid v1.2.1/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek= +github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= +github.com/konsorten/go-windows-terminal-sequences v1.0.3/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= +github.com/kr/fs v0.1.0/go.mod h1:FFnZGqtBN9Gxj7eW1uZ42v5BccTP0vu6NEaFoC2HwRg= +github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc= +github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= +github.com/kr/pretty v0.3.0/go.mod h1:640gp4NfQd8pI5XOwp5fnNeVWj67G7CFk/SaSQn7NBk= +github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= +github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= +github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= +github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= +github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= +github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= +github.com/labstack/echo/v4 v4.5.0/go.mod h1:czIriw4a0C1dFun+ObrXp7ok03xON0N1awStJ6ArI7Y= +github.com/labstack/gommon v0.3.0/go.mod h1:MULnywXg0yavhxWKc+lOruYdAhDwPK9wf0OL7NoOu+k= +github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0 h1:6E+4a0GO5zZEnZ81pIr0yLvtUWk2if982qA3F3QD6H4= +github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0/go.mod h1:zJYVVT2jmtg6P3p1VtQj7WsuWi/y4VnjVBn7F8KPB3I= +github.com/magiconair/properties v1.8.0/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czPbwD3XqdrwzmxQ= +github.com/magiconair/properties v1.8.1/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czPbwD3XqdrwzmxQ= +github.com/magiconair/properties v1.8.5 h1:b6kJs+EmPFMYGkow9GiUyCyOvIwYetYJ3fSaWak/Gls= +github.com/magiconair/properties v1.8.5/go.mod h1:y3VJvCyxH9uVvJTWEGAELF3aiYNyPKd5NZ3oSwXrF60= +github.com/mattn/go-colorable v0.0.9/go.mod h1:9vuHe8Xs5qXnSaW/c/ABM9alt+Vo+STaOChaDxuIBZU= +github.com/mattn/go-colorable v0.1.2/go.mod h1:U0ppj6V5qS13XJ6of8GYAs25YV2eR4EVcfRqFIhoBtE= +github.com/mattn/go-colorable v0.1.8/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc= +github.com/mattn/go-colorable v0.1.11/go.mod h1:u5H1YNBxpqRaxsYJYSkiCWKzEfiAb1Gb520KVy5xxl4= +github.com/mattn/go-isatty v0.0.3/go.mod h1:M+lRXTBqGeGNdLjl/ufCoiOlB5xdOkqRJdNxMWT7Zi4= +github.com/mattn/go-isatty v0.0.7/go.mod h1:Iq45c/XA43vh69/j3iqttzPXn0bhXyGjM0Hdxcsrc5s= +github.com/mattn/go-isatty v0.0.8/go.mod h1:Iq45c/XA43vh69/j3iqttzPXn0bhXyGjM0Hdxcsrc5s= +github.com/mattn/go-isatty v0.0.9/go.mod h1:YNRxwqDuOph6SZLI9vUUz6OYw3QyUt7WiY2yME+cCiQ= +github.com/mattn/go-isatty v0.0.12/go.mod h1:cbi8OIDigv2wuxKPP5vlRcQ1OAZbq2CE4Kysco4FUpU= +github.com/mattn/go-isatty v0.0.14/go.mod h1:7GGIvUiUoEMVVmxf/4nioHXj79iQHKdU27kJ6hsGG94= +github.com/mattn/goveralls v0.0.2/go.mod h1:8d1ZMHsd7fW6IRPKQh46F2WRpyib5/X4FOpevwGNQEw= +github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0= +github.com/matttproud/golang_protobuf_extensions v1.0.4 h1:mmDVorXM7PCGKw94cs5zkfA9PSy5pEvNWRP0ET0TIVo= +github.com/matttproud/golang_protobuf_extensions v1.0.4/go.mod h1:BSXmuO+STAnVfrANrmjBb36TMTDstsz7MSK+HVaYKv4= +github.com/mediocregopher/radix/v3 v3.4.2/go.mod h1:8FL3F6UQRXHXIBSPUs5h0RybMF8i4n7wVopoX3x7Bv8= +github.com/microcosm-cc/bluemonday v1.0.2/go.mod h1:iVP4YcDBq+n/5fb23BhYFvIMq/leAFZyRl6bYmGDlGc= +github.com/miekg/dns v1.0.14/go.mod h1:W1PPwlIAgtquWBMBEV9nkV9Cazfe8ScdGz/Lj7v3Nrg= +github.com/milvus-io/milvus-proto/go-api/v2 v2.3.4-0.20240430035521-259ae1d10016 h1:8WV4maXLeGEyJCCYIc1DmZ18H+VFAjMrwXJg5iI2nX4= +github.com/milvus-io/milvus-proto/go-api/v2 v2.3.4-0.20240430035521-259ae1d10016/go.mod h1:1OIl0v5PQeNxIJhCvY+K55CBUOYDZevw9g9380u1Wek= +github.com/milvus-io/milvus/pkg v0.0.2-0.20240317152703-17b4938985f3 h1:ZBpRWhBa7FTFxW4YYVv9AUESoW1Xyb3KNXTzTqfkZmw= +github.com/milvus-io/milvus/pkg v0.0.2-0.20240317152703-17b4938985f3/go.mod h1:jQ2BUZny1COsgv1Qbcv8dmbppW+V9J/c4YQZNb3EOm8= +github.com/mitchellh/cli v1.0.0/go.mod h1:hNIlj7HEI86fIcpObd7a0FcrxTWetlwJDGcceTlRvqc= +github.com/mitchellh/go-homedir v1.0.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0= +github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0= +github.com/mitchellh/go-testing-interface v1.0.0/go.mod h1:kRemZodwjscx+RGhAo8eIhFbs2+BFgRtFPeD/KE+zxI= +github.com/mitchellh/gox v0.4.0/go.mod h1:Sd9lOJ0+aimLBi73mGofS1ycjY8lL3uZM3JPS42BGNg= +github.com/mitchellh/iochan v1.0.0/go.mod h1:JwYml1nuB7xOzsp52dPpHFffvOCDupsG0QubkSMEySY= +github.com/mitchellh/mapstructure v0.0.0-20160808181253-ca63d7c062ee/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y= +github.com/mitchellh/mapstructure v1.1.2/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y= +github.com/mitchellh/mapstructure v1.4.1 h1:CpVNEelQCZBooIPDn+AR3NpivK/TIKU8bDxdASFVQag= +github.com/mitchellh/mapstructure v1.4.1/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo= +github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= +github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= +github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M= +github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= +github.com/moul/http2curl v1.0.0/go.mod h1:8UbvGypXm98wA/IqH45anm5Y2Z6ep6O31QGOAZ3H0fQ= +github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U= +github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U= +github.com/nats-io/jwt v0.3.0/go.mod h1:fRYCDE99xlTsqUzISS1Bi75UBJ6ljOJQOAAu5VglpSg= +github.com/nats-io/nats.go v1.9.1/go.mod h1:ZjDU1L/7fJ09jvUSRVBR2e7+RnLiiIQyqyzEE/Zbp4w= +github.com/nats-io/nkeys v0.1.0/go.mod h1:xpnFELMwJABBLVhffcfd1MZx6VsNRFpEugbxziKVo7w= +github.com/nats-io/nuid v1.0.1/go.mod h1:19wcPz3Ph3q0Jbyiqsd0kePYG7A95tJPxeL+1OSON2c= +github.com/oklog/ulid v1.3.1/go.mod h1:CirwcVhetQ6Lv90oh/F+FBtV6XMibvdAFo93nm5qn4U= +github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= +github.com/onsi/ginkgo v1.10.3/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= +github.com/onsi/gomega v1.7.1/go.mod h1:XdKZgCCFLUoM/7CFJVPcG8C1xQ1AJ0vpAezJrB7JYyY= +github.com/opencontainers/runtime-spec v1.0.2 h1:UfAcuLBJB9Coz72x1hgl8O5RVzTdNiaglX6v2DM6FI0= +github.com/opencontainers/runtime-spec v1.0.2/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0= +github.com/opentracing/opentracing-go v1.1.0/go.mod h1:UkNAQd3GIcIGf0SeVgPpRdFStlNbqXla1AfSYxPUl2o= +github.com/panjf2000/ants/v2 v2.7.2 h1:2NUt9BaZFO5kQzrieOmK/wdb/tQ/K+QHaxN8sOgD63U= +github.com/panjf2000/ants/v2 v2.7.2/go.mod h1:KIBmYG9QQX5U2qzFP/yQJaq/nSb6rahS9iEHkrCMgM8= +github.com/pascaldekloe/goe v0.0.0-20180627143212-57f6aae5913c/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc= +github.com/pelletier/go-toml v1.2.0/go.mod h1:5z9KED0ma1S8pY6P1sdut58dfprrGBbd/94hg7ilaic= +github.com/pelletier/go-toml v1.9.3 h1:zeC5b1GviRUyKYd6OJPvBU/mcVDVoL1OhT17FCt5dSQ= +github.com/pelletier/go-toml v1.9.3/go.mod h1:u1nR/EPcESfeI/szUZKdtJ0xRNbUoANCkoOuaOx1Y+c= +github.com/pingcap/errors v0.11.4/go.mod h1:Oi8TUi2kEtXXLMJk9l1cGmz20kV3TaQ0usTwv5KuLY8= +github.com/pingcap/errors v0.11.5-0.20211224045212-9687c2b0f87c h1:xpW9bvK+HuuTmyFqUwr+jcCvpVkK7sumiz+ko5H9eq4= +github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e/go.mod h1:pJLUxLENpZxwdsKMEsNbx1VGcRFpLqf3715MtcvvzbA= +github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= +github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/pkg/sftp v1.10.1/go.mod h1:lYOWFsE0bwd1+KfKJaKeuokY15vzFx25BLbzYYoAxZI= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/posener/complete v1.1.1/go.mod h1:em0nMJCgc9GFtwrmVmEMR/ZL6WyhyjMBndrE9hABlRI= +github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c h1:ncq/mPwQF4JjgDlrVEn3C11VoGHZN7m8qihwgMEtzYw= +github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c/go.mod h1:OmDBASR4679mdNQnz2pUhc2G8CO2JrUAVFDRBDP/hJE= +github.com/prashantv/gostub v1.1.0 h1:BTyx3RfQjRHnUWaGF9oQos79AlQ5k8WNktv7VGvVH4g= +github.com/prometheus/client_golang v0.9.1/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw= +github.com/prometheus/client_golang v0.9.3/go.mod h1:/TN21ttK/J9q6uSwhBd54HahCDft0ttaMvbicHlPoso= +github.com/prometheus/client_golang v1.0.0/go.mod h1:db9x61etRT2tGnBNRi70OPL5FsnadC4Ky3P0J6CfImo= +github.com/prometheus/client_golang v1.7.1/go.mod h1:PY5Wy2awLA44sXw4AOSfFBetzPP4j5+D6mVACh+pe2M= +github.com/prometheus/client_golang v1.11.1/go.mod h1:Z6t4BnS23TR94PD6BsDNk8yVqroYurpAkEiz0P2BEV0= +github.com/prometheus/client_golang v1.14.0 h1:nJdhIvne2eSX/XRAFV9PcvFFRbrjbcTUj0VP62TMhnw= +github.com/prometheus/client_golang v1.14.0/go.mod h1:8vpkKitgIVNcqrRBWh1C4TIUQgYNtG/XQE4E/Zae36Y= +github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo= +github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= +github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= +github.com/prometheus/client_model v0.2.0/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= +github.com/prometheus/client_model v0.3.0 h1:UBgGFHqYdG/TPFD1B1ogZywDqEkwp3fBMvqdiQ7Xew4= +github.com/prometheus/client_model v0.3.0/go.mod h1:LDGWKZIo7rky3hgvBe+caln+Dr3dPggB5dvjtD7w9+w= +github.com/prometheus/common v0.0.0-20181113130724-41aa239b4cce/go.mod h1:daVV7qP5qjZbuso7PdcryaAu0sAZbrN9i7WWcTMWvro= +github.com/prometheus/common v0.4.0/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y86RQel1bk4= +github.com/prometheus/common v0.4.1/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y86RQel1bk4= +github.com/prometheus/common v0.10.0/go.mod h1:Tlit/dnDKsSWFlCLTWaA1cyBgKHSMdTB80sz/V91rCo= +github.com/prometheus/common v0.26.0/go.mod h1:M7rCNAaPfAosfx8veZJCuw84e35h3Cfd9VFqTh1DIvc= +github.com/prometheus/common v0.42.0 h1:EKsfXEYo4JpWMHH5cg+KOUWeuJSov1Id8zGR8eeI1YM= +github.com/prometheus/common v0.42.0/go.mod h1:xBwqVerjNdUDjgODMpudtOMwlOwf2SaTr1yjz4b7Zbc= +github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk= +github.com/prometheus/procfs v0.0.0-20190507164030-5867b95ac084/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA= +github.com/prometheus/procfs v0.0.2/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA= +github.com/prometheus/procfs v0.1.3/go.mod h1:lV6e/gmhEcM9IjHGsFOCxxuZ+z1YqCvr4OA4YeYWdaU= +github.com/prometheus/procfs v0.6.0/go.mod h1:cz+aTbrPOrUb4q7XlbU9ygM+/jj0fzG6c1xBZuNvfVA= +github.com/prometheus/procfs v0.9.0 h1:wzCHvIvM5SxWqYvwgVL7yJY8Lz3PKn49KQtpgMYJfhI= +github.com/prometheus/procfs v0.9.0/go.mod h1:+pB4zwohETzFnmlpe6yd2lSc+0/46IYZRB/chUwxUZY= +github.com/prometheus/tsdb v0.7.1/go.mod h1:qhTCs0VvXwvX/y3TZrWD7rabWM+ijKTux40TwIPHuXU= +github.com/rogpeppe/fastuuid v0.0.0-20150106093220-6724a57986af/go.mod h1:XWv6SoW27p1b0cqNHllgS5HIMJraePCO15w5zCzIWYg= +github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ= +github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= +github.com/rogpeppe/go-internal v1.6.1/go.mod h1:xXDCJY+GAPziupqXw64V24skbSoqbTEfhy4qGm1nDQc= +github.com/rogpeppe/go-internal v1.8.1/go.mod h1:JeRgkft04UBgHMgCIwADu4Pn6Mtm5d4nPKWu0nJ5d+o= +github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs= +github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ= +github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog= +github.com/russross/blackfriday v1.5.2/go.mod h1:JO/DiYxRf+HjHt06OyowR9PTA263kcR/rfWxYHBV53g= +github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= +github.com/ryanuber/columnize v0.0.0-20160712163229-9b3edd62028f/go.mod h1:sm1tb6uqfes/u+d4ooFouqFdy9/2g9QGwK3SQygK0Ts= +github.com/ryanuber/columnize v2.1.0+incompatible/go.mod h1:sm1tb6uqfes/u+d4ooFouqFdy9/2g9QGwK3SQygK0Ts= +github.com/samber/lo v1.27.0 h1:GOyDWxsblvqYobqsmUuMddPa2/mMzkKyojlXol4+LaQ= +github.com/samber/lo v1.27.0/go.mod h1:it33p9UtPMS7z72fP4gw/EIfQB2eI8ke7GR2wc6+Rhg= +github.com/schollz/closestmatch v2.1.0+incompatible/go.mod h1:RtP1ddjLong6gTkbtmuhtR2uUrrJOpYzYRvbcPAid+g= +github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529/go.mod h1:DxrIzT+xaE7yg65j358z/aeFdxmN0P9QXhEzd20vsDc= +github.com/sergi/go-diff v1.0.0/go.mod h1:0CfEIISq7TuYL3j771MWULgwwjU+GofnZX9QAmXWZgo= +github.com/shirou/gopsutil/v3 v3.22.9 h1:yibtJhIVEMcdw+tCTbOPiF1VcsuDeTE4utJ8Dm4c5eA= +github.com/shirou/gopsutil/v3 v3.22.9/go.mod h1:bBYl1kjgEJpWpxeHmLI+dVHWtyAwfcmSBLDsp2TNT8A= +github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc= +github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo= +github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE= +github.com/sirupsen/logrus v1.6.0/go.mod h1:7uNnSEd1DgxDLC74fIahvMZmmYsHGZGEOFrfsX/uA88= +github.com/sirupsen/logrus v1.7.0/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0= +github.com/sirupsen/logrus v1.9.0 h1:trlNQbNUG3OdDrDil03MCb1H2o9nJ1x4/5LYw7byDE0= +github.com/sirupsen/logrus v1.9.0/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= +github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d/go.mod h1:OnSkiWE9lh6wB0YB77sQom3nweQdgAjqCqsofrRNTgc= +github.com/smartystreets/assertions v1.1.0 h1:MkTeG1DMwsrdH7QtLXy5W+fUxWq+vmb6cLmyJ7aRtF0= +github.com/smartystreets/goconvey v1.6.4 h1:fv0U8FUIMPNf1L9lnHLvLhgicrIVChEkdzIKYqbNC9s= +github.com/smartystreets/goconvey v1.6.4/go.mod h1:syvi0/a8iFYH4r/RixwvyeAJjdLS9QV7WQ/tjFTllLA= +github.com/soheilhy/cmux v0.1.4/go.mod h1:IM3LyeVVIOuxMH7sFAkER9+bJ4dT7Ms6E4xg4kGIyLM= +github.com/soheilhy/cmux v0.1.5 h1:jjzc5WVemNEDTLwv9tlmemhC73tI08BNOIGwBOo10Js= +github.com/soheilhy/cmux v0.1.5/go.mod h1:T7TcVDs9LWfQgPlPsdngu6I6QIoyIFZDDC6sNE1GqG0= +github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA= +github.com/spaolacci/murmur3 v1.1.0 h1:7c1g84S4BPRrfL5Xrdp6fOJ206sU9y293DDHaoy0bLI= +github.com/spaolacci/murmur3 v1.1.0/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA= +github.com/spf13/afero v1.1.2/go.mod h1:j4pytiNVoe2o6bmDsKpLACNPDBIoEAkihy7loJ1B0CQ= +github.com/spf13/afero v1.6.0 h1:xoax2sJ2DT8S8xA2paPFjDCScCNeWsg75VG0DLRreiY= +github.com/spf13/afero v1.6.0/go.mod h1:Ai8FlHk4v/PARR026UzYexafAt9roJ7LcLMAmO6Z93I= +github.com/spf13/cast v1.3.0/go.mod h1:Qx5cxh0v+4UWYiBimWS+eyWzqEqokIECu5etghLkUJE= +github.com/spf13/cast v1.3.1 h1:nFm6S0SMdyzrzcmThSipiEubIDy8WEXKNZ0UOgiRpng= +github.com/spf13/cast v1.3.1/go.mod h1:Qx5cxh0v+4UWYiBimWS+eyWzqEqokIECu5etghLkUJE= +github.com/spf13/cobra v0.0.5/go.mod h1:3K3wKZymM7VvHMDS9+Akkh4K60UwM26emMESw8tLCHU= +github.com/spf13/cobra v1.1.3/go.mod h1:pGADOWyqRD/YMrPZigI/zbliZ2wVD/23d+is3pSWzOo= +github.com/spf13/jwalterweatherman v1.0.0/go.mod h1:cQK4TGJAtQXfYWX+Ddv3mKDzgVb68N+wFjFa4jdeBTo= +github.com/spf13/jwalterweatherman v1.1.0 h1:ue6voC5bR5F8YxI5S67j9i582FU4Qvo2bmqnqMYADFk= +github.com/spf13/jwalterweatherman v1.1.0/go.mod h1:aNWZUN0dPAAO/Ljvb5BEdw96iTZ0EXowPYD95IqWIGo= +github.com/spf13/pflag v1.0.3/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4= +github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= +github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= +github.com/spf13/viper v1.3.2/go.mod h1:ZiWeW+zYFKm7srdB9IoDzzZXaJaI5eL9QjNiN/DMA2s= +github.com/spf13/viper v1.7.0/go.mod h1:8WkrPz2fc9jxqZNCJI/76HCieCp4Q8HaLFoCha5qpdg= +github.com/spf13/viper v1.8.1 h1:Kq1fyeebqsBfbjZj4EL7gj2IO0mMaiyjYUWcUsl2O44= +github.com/spf13/viper v1.8.1/go.mod h1:o0Pch8wJ9BVSWGQMbra6iw0oQ5oktSIBaujf1rJH9Ns= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= +github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= +github.com/stretchr/objx v0.5.2 h1:xuMeJ0Sdp5ZMRXx/aWO6RZxdr3beISkG5/G/aIRr3pY= +github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= +github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= +github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= +github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA= +github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= +github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= +github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= +github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +github.com/subosito/gotenv v1.2.0 h1:Slr1R9HxAlEKefgq5jn9U+DnETlIUa6HfgEzj0g5d7s= +github.com/subosito/gotenv v1.2.0/go.mod h1:N0PQaV/YGNqwC0u51sEeR/aUtSLEXKX9iv69rRypqCw= +github.com/thoas/go-funk v0.9.1 h1:O549iLZqPpTUQ10ykd26sZhzD+rmR5pWhuElrhbC20M= +github.com/tidwall/gjson v1.17.1 h1:wlYEnwqAHgzmhNUFfw7Xalt2JzQvsMx2Se4PcoFCT/U= +github.com/tidwall/gjson v1.17.1/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk= +github.com/tidwall/match v1.1.1 h1:+Ho715JplO36QYgwN9PGYNhgZvoUSc9X2c80KVTi+GA= +github.com/tidwall/match v1.1.1/go.mod h1:eRSPERbgtNPcGhD8UCthc6PmLEQXEWd3PRB5JTxsfmM= +github.com/tidwall/pretty v1.2.0 h1:RWIZEg2iJ8/g6fDDYzMpobmaoGh5OLl4AXtGUGPcqCs= +github.com/tidwall/pretty v1.2.0/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU= +github.com/tklauser/go-sysconf v0.3.10 h1:IJ1AZGZRWbY8T5Vfk04D9WOA5WSejdflXxP03OUqALw= +github.com/tklauser/go-sysconf v0.3.10/go.mod h1:C8XykCvCb+Gn0oNCWPIlcb0RuglQTYaQ2hGm7jmxEFk= +github.com/tklauser/numcpus v0.4.0 h1:E53Dm1HjH1/R2/aoCtXtPgzmElmn51aOkhCFSuZq//o= +github.com/tklauser/numcpus v0.4.0/go.mod h1:1+UI3pD8NW14VMwdgJNJ1ESk2UnwhAnz5hMwiKKqXCQ= +github.com/tmc/grpc-websocket-proxy v0.0.0-20190109142713-0ad062ec5ee5/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U= +github.com/tmc/grpc-websocket-proxy v0.0.0-20201229170055-e5319fda7802 h1:uruHq4dN7GR16kFc5fp3d1RIYzJW5onx8Ybykw2YQFA= +github.com/tmc/grpc-websocket-proxy v0.0.0-20201229170055-e5319fda7802/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U= +github.com/uber/jaeger-client-go v2.30.0+incompatible h1:D6wyKGCecFaSRUpo8lCVbaOOb6ThwMmTEbhRwtKR97o= +github.com/uber/jaeger-client-go v2.30.0+incompatible/go.mod h1:WVhlPFC8FDjOFMMWRy2pZqQJSXxYSwNYOkTr/Z6d3Kk= +github.com/ugorji/go v1.1.4/go.mod h1:uQMGLiO92mf5W77hV/PUCpI3pbzQx3CRekS0kk+RGrc= +github.com/ugorji/go v1.1.7/go.mod h1:kZn38zHttfInRq0xu/PH0az30d+z6vm202qpg1oXVMw= +github.com/ugorji/go/codec v0.0.0-20181204163529-d75b2dcb6bc8/go.mod h1:VFNgLljTbGfSG7qAOspJ7OScBnGdDN/yBr0sguwnwf0= +github.com/ugorji/go/codec v1.1.7/go.mod h1:Ax+UKWsSmolVDwsd+7N3ZtXu+yMGCf907BLYF3GoBXY= +github.com/urfave/negroni v1.0.0/go.mod h1:Meg73S6kFm/4PpbYdq35yYWoCZ9mS/YSx+lKnmiohz4= +github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc= +github.com/valyala/fasthttp v1.6.0/go.mod h1:FstJa9V+Pj9vQ7OJie2qMHdwemEDaDiSdBnvPM1Su9w= +github.com/valyala/fasttemplate v1.0.1/go.mod h1:UQGH1tvbgY+Nz5t2n7tXsz52dQxojPUpymEIMZ47gx8= +github.com/valyala/fasttemplate v1.2.1/go.mod h1:KHLXt3tVN2HBp8eijSv/kGJopbvo7S+qRAEEKiv+SiQ= +github.com/valyala/tcplisten v0.0.0-20161114210144-ceec8f93295a/go.mod h1:v3UYOV9WzVtRmSR+PDvWpU/qWl4Wa5LApYYX4ZtKbio= +github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM= +github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg= +github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f/go.mod h1:N2zxlSyiKSe5eX1tZViRH5QA0qijqEDrYZiPEAiq3wU= +github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415/go.mod h1:GwrjFmJcFw6At/Gs6z4yjiIwzuJ1/+UwLxMQDVQXShQ= +github.com/xeipuuv/gojsonschema v1.2.0/go.mod h1:anYRn/JVcOK2ZgGU+IjEV4nwlhoK5sQluxsYJ78Id3Y= +github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2 h1:eY9dn8+vbi4tKz5Qo6v2eYzo7kUS51QINcR5jNpbZS8= +github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2/go.mod h1:UETIi67q53MR2AWcXfiuqkDkRtnGDLqkBTpCHuJHxtU= +github.com/xordataexchange/crypt v0.0.3-0.20170626215501-b2862e3d0a77/go.mod h1:aYKd//L2LvnjZzWKhF00oedf4jCCReLcmhLdhm1A27Q= +github.com/yalp/jsonpath v0.0.0-20180802001716-5cc68e5049a0/go.mod h1:/LWChgwKmvncFJFHJ7Gvn9wZArjbV5/FppcK2fKk/tI= +github.com/yudai/gojsondiff v1.0.0/go.mod h1:AY32+k2cwILAkW1fbgxQ5mUmMiZFgLIV+FBNExI05xg= +github.com/yudai/golcs v0.0.0-20170316035057-ecda9a501e82/go.mod h1:lgjkn3NuSvDfVJdfcVVdX+jpBxNmX4rDAzaS45IcYoM= +github.com/yudai/pp v2.0.1+incompatible/go.mod h1:PuxR/8QJ7cyCkFp/aUDS+JY727OFEZkTdatxwunjIkc= +github.com/yuin/goldmark v1.1.25/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +github.com/yuin/goldmark v1.1.32/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k= +github.com/yusufpapurcu/wmi v1.2.2 h1:KBNDSne4vP5mbSWnJbO+51IMOXJB67QiYCSBrubbPRg= +github.com/yusufpapurcu/wmi v1.2.2/go.mod h1:SBZ9tNy3G9/m5Oi98Zks0QjeHVDvuK0qfxQmPyzfmi0= +go.etcd.io/bbolt v1.3.2/go.mod h1:IbVyRI1SCnLcuJnV2u8VeU0CEYM7e686BmAb1XKL+uU= +go.etcd.io/bbolt v1.3.6 h1:/ecaJf0sk1l4l6V4awd65v2C3ILy7MSj+s/x1ADCIMU= +go.etcd.io/bbolt v1.3.6/go.mod h1:qXsaaIqmgQH0T+OPdb99Bf+PKfBBQVAdyD6TY9G8XM4= +go.etcd.io/etcd/api/v3 v3.5.0/go.mod h1:cbVKeC6lCfl7j/8jBhAK6aIYO9XOjdptoxU/nLQcPvs= +go.etcd.io/etcd/api/v3 v3.5.5 h1:BX4JIbQ7hl7+jL+g+2j5UAr0o1bctCm6/Ct+ArBGkf0= +go.etcd.io/etcd/api/v3 v3.5.5/go.mod h1:KFtNaxGDw4Yx/BA4iPPwevUTAuqcsPxzyX8PHydchN8= +go.etcd.io/etcd/client/pkg/v3 v3.5.0/go.mod h1:IJHfcCEKxYu1Os13ZdwCwIUTUVGYTSAM3YSwc9/Ac1g= +go.etcd.io/etcd/client/pkg/v3 v3.5.5 h1:9S0JUVvmrVl7wCF39iTQthdaaNIiAaQbmK75ogO6GU8= +go.etcd.io/etcd/client/pkg/v3 v3.5.5/go.mod h1:ggrwbk069qxpKPq8/FKkQ3Xq9y39kbFR4LnKszpRXeQ= +go.etcd.io/etcd/client/v2 v2.305.0/go.mod h1:h9puh54ZTgAKtEbut2oe9P4L/oqKCVB6xsXlzd7alYQ= +go.etcd.io/etcd/client/v2 v2.305.5 h1:DktRP60//JJpnPC0VBymAN/7V71GHMdjDCBt4ZPXDjI= +go.etcd.io/etcd/client/v2 v2.305.5/go.mod h1:zQjKllfqfBVyVStbt4FaosoX2iYd8fV/GRy/PbowgP4= +go.etcd.io/etcd/client/v3 v3.5.5 h1:q++2WTJbUgpQu4B6hCuT7VkdwaTP7Qz6Daak3WzbrlI= +go.etcd.io/etcd/client/v3 v3.5.5/go.mod h1:aApjR4WGlSumpnJ2kloS75h6aHUmAyaPLjHMxpc7E7c= +go.etcd.io/etcd/pkg/v3 v3.5.5 h1:Ablg7T7OkR+AeeeU32kdVhw/AGDsitkKPl7aW73ssjU= +go.etcd.io/etcd/pkg/v3 v3.5.5/go.mod h1:6ksYFxttiUGzC2uxyqiyOEvhAiD0tuIqSZkX3TyPdaE= +go.etcd.io/etcd/raft/v3 v3.5.5 h1:Ibz6XyZ60OYyRopu73lLM/P+qco3YtlZMOhnXNS051I= +go.etcd.io/etcd/raft/v3 v3.5.5/go.mod h1:76TA48q03g1y1VpTue92jZLr9lIHKUNcYdZOOGyx8rI= +go.etcd.io/etcd/server/v3 v3.5.5 h1:jNjYm/9s+f9A9r6+SC4RvNaz6AqixpOvhrFdT0PvIj0= +go.etcd.io/etcd/server/v3 v3.5.5/go.mod h1:rZ95vDw/jrvsbj9XpTqPrTAB9/kzchVdhRirySPkUBc= +go.opencensus.io v0.21.0/go.mod h1:mSImk1erAIZhrmZN+AvHh14ztQfjbGwt4TtuofqLduU= +go.opencensus.io v0.22.0/go.mod h1:+kGneAE2xo2IficOXnaByMWTGM9T73dGwxeWcUqIpI8= +go.opencensus.io v0.22.2/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= +go.opencensus.io v0.22.3/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= +go.opencensus.io v0.22.4/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= +go.opencensus.io v0.22.5/go.mod h1:5pWMHQbX5EPX2/62yrJeAkowc+lfs/XD7Uxpq3pI6kk= +go.opencensus.io v0.23.0/go.mod h1:XItmlyltB5F7CS4xOC1DcqMoFqwtC6OG2xF7mCv7P7E= +go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.25.0/go.mod h1:E5NNboN0UqSAki0Atn9kVwaN7I+l25gGxDqBueo/74E= +go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.38.0 h1:g/BAN5o90Pr6D8xMRezjzGOHBpc15U+4oE53nZLiae4= +go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.38.0/go.mod h1:+F41JBSkye7aYJELRvIMF0Z66reIwIOL0St75ZVwSJs= +go.opentelemetry.io/otel v1.0.1/go.mod h1:OPEOD4jIT2SlZPMmwT6FqZz2C0ZNdQqiWcoK6M0SNFU= +go.opentelemetry.io/otel v1.13.0 h1:1ZAKnNQKwBBxFtww/GwxNUyTf0AxkZzrukO8MeXqe4Y= +go.opentelemetry.io/otel v1.13.0/go.mod h1:FH3RtdZCzRkJYFTCsAKDy9l/XYjMdNv6QrkFFB8DvVg= +go.opentelemetry.io/otel/exporters/otlp/internal/retry v1.13.0 h1:pa05sNT/P8OsIQ8mPZKTIyiBuzS/xDGLVx+DCt0y6Vs= +go.opentelemetry.io/otel/exporters/otlp/internal/retry v1.13.0/go.mod h1:rqbht/LlhVBgn5+k3M5QK96K5Xb0DvXpMJ5SFQpY6uw= +go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.0.1/go.mod h1:Kv8liBeVNFkkkbilbgWRpV+wWuu+H5xdOT6HAgd30iw= +go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.13.0 h1:Any/nVxaoMq1T2w0W85d6w5COlLuCCgOYKQhJJWEMwQ= +go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.13.0/go.mod h1:46vAP6RWfNn7EKov73l5KBFlNxz8kYlxR1woU+bJ4ZY= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.0.1/go.mod h1:xOvWoTOrQjxjW61xtOmD/WKGRYb/P4NzRo3bs65U6Rk= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.13.0 h1:Wz7UQn7/eIqZVDJbuNEM6PmqeA71cWXrWcXekP5HZgU= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.13.0/go.mod h1:OhH1xvgA5jZW2M/S4PcvtDlFE1VULRRBsibBrKuJQGI= +go.opentelemetry.io/otel/metric v0.35.0 h1:aPT5jk/w7F9zW51L7WgRqNKDElBdyRLGuBtI5MX34e8= +go.opentelemetry.io/otel/metric v0.35.0/go.mod h1:qAcbhaTRFU6uG8QM7dDo7XvFsWcugziq/5YI065TokQ= +go.opentelemetry.io/otel/sdk v1.0.1/go.mod h1:HrdXne+BiwsOHYYkBE5ysIcv2bvdZstxzmCQhxTcZkI= +go.opentelemetry.io/otel/sdk v1.13.0 h1:BHib5g8MvdqS65yo2vV1s6Le42Hm6rrw08qU6yz5JaM= +go.opentelemetry.io/otel/sdk v1.13.0/go.mod h1:YLKPx5+6Vx/o1TCUYYs+bpymtkmazOMT6zoRrC7AQ7I= +go.opentelemetry.io/otel/trace v1.0.1/go.mod h1:5g4i4fKLaX2BQpSBsxw8YYcgKpMMSW3x7ZTuYBr3sUk= +go.opentelemetry.io/otel/trace v1.13.0 h1:CBgRZ6ntv+Amuj1jDsMhZtlAPT6gbyIRdaIzFhfBSdY= +go.opentelemetry.io/otel/trace v1.13.0/go.mod h1:muCvmmO9KKpvuXSf3KKAXXB2ygNYHQ+ZfI5X08d3tds= +go.opentelemetry.io/proto/otlp v0.7.0/go.mod h1:PqfVotwruBrMGOCsRd/89rSnXhoiJIqeYNgFYFoEGnI= +go.opentelemetry.io/proto/otlp v0.9.0/go.mod h1:1vKfU9rv61e9EVGthD1zNvUbiwPcimSsOPU9brfSHJg= +go.opentelemetry.io/proto/otlp v0.19.0 h1:IVN6GR+mhC4s5yfcTbmzHYODqvWAp3ZedA2SJPI1Nnw= +go.opentelemetry.io/proto/otlp v0.19.0/go.mod h1:H7XAot3MsfNsj7EXtrA2q5xSNQ10UqI405h3+duxN4U= +go.uber.org/atomic v1.4.0/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE= +go.uber.org/atomic v1.7.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc= +go.uber.org/atomic v1.10.0 h1:9qC72Qh0+3MqyJbAn8YU5xVq1frD8bn3JtD2oXtafVQ= +go.uber.org/atomic v1.10.0/go.mod h1:LUxbIzbOniOlMKjJjyPfpl4v+PKK2cNJn91OQbhoJI0= +go.uber.org/automaxprocs v1.5.2 h1:2LxUOGiR3O6tw8ui5sZa2LAaHnsviZdVOUZw4fvbnME= +go.uber.org/automaxprocs v1.5.2/go.mod h1:eRbA25aqJrxAbsLO0xy5jVwPt7FQnRgjW+efnwa1WM0= +go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= +go.uber.org/multierr v1.1.0/go.mod h1:wR5kodmAFQ0UK8QlbwjlSNy0Z68gJhDJUG5sjR94q/0= +go.uber.org/multierr v1.6.0/go.mod h1:cdWPpRnG4AhwMwsgIHip0KRBQjJy5kYEpYjJxpXp9iU= +go.uber.org/multierr v1.10.0 h1:S0h4aNzvfcFsC3dRF1jLoaov7oRaKqRGC/pUEJ2yvPQ= +go.uber.org/multierr v1.10.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y= +go.uber.org/zap v1.10.0/go.mod h1:vwi/ZaCAaUcBkycHslxD9B2zi4UTXhF60s6SWpuDF0Q= +go.uber.org/zap v1.17.0/go.mod h1:MXVU+bhUf/A7Xi2HNOnopQOrmycQ5Ih87HtOu4q5SSo= +go.uber.org/zap v1.27.0 h1:aJMhYGrd5QSmlpLMr2MftRKl7t8J8PTZPA732ud/XR8= +go.uber.org/zap v1.27.0/go.mod h1:GB2qFLM7cTU87MWRP2mPIjqfIDnGu+VIO4V/SdhGo2E= +golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= +golang.org/x/crypto v0.0.0-20181029021203-45a5f77698d3/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= +golang.org/x/crypto v0.0.0-20181203042331-505ab145d0a9/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/crypto v0.0.0-20190605123033-f99c8df09eb5/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/crypto v0.0.0-20190701094942-4def268fd1a4/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/crypto v0.0.0-20190820162420-60c769a6c586/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/crypto v0.0.0-20191227163750-53104e6ec876/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/crypto v0.0.0-20210322153248-0c34fe9e7dc2/go.mod h1:T9bdIzuCu7OtxOm1hfPfRQxPLYneinmdGuTeoZ9dtd4= +golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= +golang.org/x/crypto v0.0.0-20220411220226-7b82a4e95df4/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4= +golang.org/x/crypto v0.22.0 h1:g1v0xeRhjcugydODzvb3mEM9SQ0HGp9s/nh3COQ/C30= +golang.org/x/crypto v0.22.0/go.mod h1:vr6Su+7cTlO45qkww3VDJlzDn0ctJvRgYbC2NvXHt+M= +golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= +golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= +golang.org/x/exp v0.0.0-20190510132918-efd6b22b2522/go.mod h1:ZjyILWgesfNpC6sMxTJOJm9Kp84zZh5NQWvqDGG3Qr8= +golang.org/x/exp v0.0.0-20190829153037-c13cbed26979/go.mod h1:86+5VVa7VpoJ4kLfm080zCjGlMRFzhUhsZKEZO7MGek= +golang.org/x/exp v0.0.0-20191030013958-a1ab85dbe136/go.mod h1:JXzH8nQsPlswgeRAPE3MuO9GYsAcnJvJ4vnMwN/5qkY= +golang.org/x/exp v0.0.0-20191129062945-2f5052295587/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4= +golang.org/x/exp v0.0.0-20191227195350-da58074b4299/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4= +golang.org/x/exp v0.0.0-20200119233911-0405dc783f0a/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4= +golang.org/x/exp v0.0.0-20200207192155-f17229e696bd/go.mod h1:J/WKrq2StrnmMY6+EHIKF9dgMWnmCNThgcyBT1FY9mM= +golang.org/x/exp v0.0.0-20200224162631-6cc2880d07d6/go.mod h1:3jZMyOhIsHpP37uCMkUooju7aAi5cS1Q23tOzKc+0MU= +golang.org/x/exp v0.0.0-20230224173230-c95f2b4c22f2 h1:Jvc7gsqn21cJHCmAWx0LiimpP18LZmUxkT5Mp7EZ1mI= +golang.org/x/exp v0.0.0-20230224173230-c95f2b4c22f2/go.mod h1:CxIveKay+FTh1D0yPZemJVgC/95VzuuOLq5Qi4xnoYc= +golang.org/x/image v0.0.0-20190227222117-0694c2d4d067/go.mod h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js= +golang.org/x/image v0.0.0-20190802002840-cff245a6509b/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= +golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= +golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU= +golang.org/x/lint v0.0.0-20190301231843-5614ed5bae6f/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= +golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= +golang.org/x/lint v0.0.0-20190409202823-959b441ac422/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= +golang.org/x/lint v0.0.0-20190909230951-414d861bb4ac/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= +golang.org/x/lint v0.0.0-20190930215403-16217165b5de/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= +golang.org/x/lint v0.0.0-20191125180803-fdd1cda4f05f/go.mod h1:5qLYkcX4OjUUV8bRuDixDT3tpyyb+LUpUlRWLxfhWrs= +golang.org/x/lint v0.0.0-20200130185559-910be7a94367/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY= +golang.org/x/lint v0.0.0-20200302205851-738671d3881b/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY= +golang.org/x/lint v0.0.0-20201208152925-83fdc39ff7b5/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY= +golang.org/x/lint v0.0.0-20210508222113-6edffad5e616/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY= +golang.org/x/mobile v0.0.0-20190312151609-d3739f865fa6/go.mod h1:z+o9i4GpDbdi3rU15maQ/Ox0txvL9dWGYEHz965HBQE= +golang.org/x/mobile v0.0.0-20190719004257-d2bd2a29d028/go.mod h1:E/iHnbuqvinMTCcRqshq8CkpyQDoeVncDDYHnLhea+o= +golang.org/x/mod v0.0.0-20190513183733-4bf6d317e70e/go.mod h1:mXi4GBBbnImb6dmsKGUJ2LatrhH/nqhxcFungHvyanc= +golang.org/x/mod v0.1.0/go.mod h1:0QHyrYULN0/3qlju5TqG8bIK38QM8yzMo5ekMj3DlcY= +golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg= +golang.org/x/mod v0.1.1-0.20191107180719-034126e5016b/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg= +golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/mod v0.4.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/mod v0.4.1/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20181023162649-9b4f9f5ad519/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20181201002055-351d144fa1fc/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20181220203305-927f97764cc3/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190327091125-710a502c58a2/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190501004415-9ce7a6920f09/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190503192946-f4e77d36d62c/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks= +golang.org/x/net v0.0.0-20190613194153-d28f0bde5980/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20190628185345-da137c7871d7/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20190724013045-ca1201d0de80/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20190827160401-ba9fcec4b297/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20191209160850-c0dbc17a3553/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20200114155413-6afb5195e5aa/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20200222125558-5a598a2470a0/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20200301022130-244492dfa37a/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20200324143707-d3edc9973b7e/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= +golang.org/x/net v0.0.0-20200501053045-e0ff5e5a1de5/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= +golang.org/x/net v0.0.0-20200506145744-7e3656a0809f/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= +golang.org/x/net v0.0.0-20200513185701-a91f0712d120/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= +golang.org/x/net v0.0.0-20200520182314-0ba52f642ac2/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= +golang.org/x/net v0.0.0-20200625001655-4c5254603344/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= +golang.org/x/net v0.0.0-20200707034311-ab3426394381/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= +golang.org/x/net v0.0.0-20200822124328-c89045814202/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= +golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= +golang.org/x/net v0.0.0-20201031054903-ff519b6c9102/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= +golang.org/x/net v0.0.0-20201110031124-69a78807bb2b/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= +golang.org/x/net v0.0.0-20201202161906-c7110b5ffcbb/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= +golang.org/x/net v0.0.0-20201209123823-ac852fbbde11/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= +golang.org/x/net v0.0.0-20210119194325-5f4716e94777/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= +golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= +golang.org/x/net v0.0.0-20210316092652-d523dce5a7f4/go.mod h1:RBQZq4jEuRlivfhVLdyRGr576XBO4/greRjx4P4O3yc= +golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM= +golang.org/x/net v0.0.0-20211008194852-3b03d305991f/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= +golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= +golang.org/x/net v0.24.0 h1:1PcaxkF854Fu3+lvBIx5SYn9wRlBzzcnHZSiaFFAb0w= +golang.org/x/net v0.24.0/go.mod h1:2Q7sJY5mzlzWjKtYUEXSlBWCdyaioyXzRB2RtU8KVE8= +golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= +golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= +golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= +golang.org/x/oauth2 v0.0.0-20191202225959-858c2ad4c8b6/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= +golang.org/x/oauth2 v0.0.0-20200107190931-bf48bf16ab8d/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= +golang.org/x/oauth2 v0.0.0-20200902213428-5d25da1a8d43/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= +golang.org/x/oauth2 v0.0.0-20201109201403-9fd604954f58/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= +golang.org/x/oauth2 v0.0.0-20201208152858-08078c50e5b5/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= +golang.org/x/oauth2 v0.0.0-20210218202405-ba52d332ba99/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= +golang.org/x/oauth2 v0.0.0-20210220000619-9bb904979d93/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= +golang.org/x/oauth2 v0.0.0-20210313182246-cd4f82c27b84/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= +golang.org/x/oauth2 v0.0.0-20210402161424-2e8d93401602/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= +golang.org/x/oauth2 v0.0.0-20211104180415-d3ed0bb246c8/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= +golang.org/x/oauth2 v0.18.0 h1:09qnuIAgzdx1XplqJvW6CQqMCtGZykZWcXzPMPUusvI= +golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20190227155943-e225da77a7e6/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20200317015054-43a5402ce75a/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20200625203802-6e8e738ad208/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20201207232520-09787c993a3a/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.6.0 h1:5BMeUDZ7vkXGfEr1x9B4bRcTH4lpkTkpdh0T/J+qjbQ= +golang.org/x/sync v0.6.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= +golang.org/x/sys v0.0.0-20180823144017-11551d06cbcc/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20181026203630-95b1ffbd15a5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20181107165924-66b7b1311ac8/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20181116152217-5ac8a444bdc5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20181205085412-a5c9d58dba9a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190222072716-a9d3bda3a223/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190312061237-fead79001313/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190422165155-953cdadca894/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190502145724-3ef323f4f1fd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190507160741-ecd444e8653b/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190606165138-5da285871e9c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190624142023-c5567b49c5d0/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190626221950-04f50cda93cb/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190726091711-fc99dfbffb4e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190813064441-fde4db37ae7a/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190916202348-b4ddaad3f8a3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20191001151750-bb3f8db39f24/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20191005200804-aed5e4c7ecf9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20191204072324-ce4227a45e2e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20191228213918-04cbcbbfeed8/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200106162015-b016eb3dc98e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200113162924-86b910548bc1/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200116001909-b77594299b42/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200122134326-e047566fdf82/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200202164722-d101bd2416d5/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200212091648-12a6c2dcc1e4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200223170610-d5e6a3e2c0ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200302150141-5c8b2ff67527/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200331124033-c3d80250170d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200501052902-10377860bb8e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200511232937-7e40ca221e25/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200515095857-1151b9dac4a9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200523222454-059865788121/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200615200032-f1bc736245b1/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200625212154-ddb9806d33ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200803210538-64077c9b5642/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200905004654-be1d3432aa8f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200923182605-d9f96fdee20d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20201201145000-ef89a241ccb3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20201204225414-ed752295db88/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210104204734-6f8348627aad/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210119212857-b64e53b001e4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210220050731-9a76102bfb43/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210305230114-8fe3ee5dd75b/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210315160823-c6e025ad8005/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210320140829-1e4c9ba3b0c4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210403161142-5e06dd20ab57/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210423185535-09eb48e85fd7/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210510120138-977fb7262007/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20210603081109-ebe580a85c40/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20210927094055-39ccf1dd6fa6/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20211007075335-d3039528d8ac/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220128215802-99c3d69c2c27/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220209214540-3681064d5158/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.20.0 h1:Od9JTbYCk261bKm4M/mw7AklTlFYIa0bIp9BgSm1S8Y= +golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= +golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.4/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.5/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= +golang.org/x/text v0.15.0 h1:h1V/4gjBv8v9cjcR6+AR5+/cIYK5N/WAgiv4xlsEtAk= +golang.org/x/text v0.15.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= +golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= +golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= +golang.org/x/time v0.0.0-20191024005414-555d28b269f0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= +golang.org/x/time v0.0.0-20201208040808-7e3f01d25324/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= +golang.org/x/time v0.0.0-20210220033141-f8bda1e9f3ba/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= +golang.org/x/time v0.3.0 h1:rg5rLMjNzMS1RkNLzCG38eapWhnYLFYXDXj2gOlr8j4= +golang.org/x/time v0.3.0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= +golang.org/x/tools v0.0.0-20180221164845-07fd8470d635/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20181030221726-6c7e314b6563/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20181221001348-537d06c36207/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY= +golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= +golang.org/x/tools v0.0.0-20190312151545-0bb0c0a6e846/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= +golang.org/x/tools v0.0.0-20190312170243-e65039ee4138/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= +golang.org/x/tools v0.0.0-20190327201419-c70d86f8b7cf/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= +golang.org/x/tools v0.0.0-20190328211700-ab21143f2384/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= +golang.org/x/tools v0.0.0-20190425150028-36563e24a262/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= +golang.org/x/tools v0.0.0-20190506145303-2d16b83fe98c/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= +golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= +golang.org/x/tools v0.0.0-20190606124116-d0a3d012864b/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc= +golang.org/x/tools v0.0.0-20190621195816-6e04913cbbac/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc= +golang.org/x/tools v0.0.0-20190628153133-6cdbf07be9d0/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc= +golang.org/x/tools v0.0.0-20190816200558-6889da9d5479/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20190911174233-4f2ddba30aff/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20191012152004-8de300cfc20a/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20191112195655-aa38f8e97acc/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20191113191852-77e3bb0ad9e7/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20191115202509-3a792d9c32b2/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20191125144606-a911d9008d1f/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20191130070609-6e064ea0cf2d/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20191216173652-a0e659d51361/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= +golang.org/x/tools v0.0.0-20191227053925-7b8e75db28f4/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= +golang.org/x/tools v0.0.0-20200117161641-43d50277825c/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= +golang.org/x/tools v0.0.0-20200122220014-bf1340f18c4a/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= +golang.org/x/tools v0.0.0-20200130002326-2f3ba24bd6e7/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= +golang.org/x/tools v0.0.0-20200204074204-1cc6d1ef6c74/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= +golang.org/x/tools v0.0.0-20200207183749-b753a1ba74fa/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= +golang.org/x/tools v0.0.0-20200212150539-ea181f53ac56/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= +golang.org/x/tools v0.0.0-20200224181240-023911ca70b2/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= +golang.org/x/tools v0.0.0-20200227222343-706bc42d1f0d/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= +golang.org/x/tools v0.0.0-20200304193943-95d2e580d8eb/go.mod h1:o4KQGtdN14AW+yjsvvwRTJJuXz8XRtIHtEnmAXLyFUw= +golang.org/x/tools v0.0.0-20200312045724-11d5b4c81c7d/go.mod h1:o4KQGtdN14AW+yjsvvwRTJJuXz8XRtIHtEnmAXLyFUw= +golang.org/x/tools v0.0.0-20200331025713-a30bf2db82d4/go.mod h1:Sl4aGygMT6LrqrWclx+PTx3U+LnKx/seiNR+3G19Ar8= +golang.org/x/tools v0.0.0-20200501065659-ab2804fb9c9d/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= +golang.org/x/tools v0.0.0-20200512131952-2bc93b1c0c88/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= +golang.org/x/tools v0.0.0-20200515010526-7d3b6ebf133d/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= +golang.org/x/tools v0.0.0-20200618134242-20370b0cb4b2/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= +golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= +golang.org/x/tools v0.0.0-20200729194436-6467de6f59a7/go.mod h1:njjCfa9FT2d7l9Bc6FUM5FLjQPp3cFF28FI3qnDFljA= +golang.org/x/tools v0.0.0-20200804011535-6c149bb5ef0d/go.mod h1:njjCfa9FT2d7l9Bc6FUM5FLjQPp3cFF28FI3qnDFljA= +golang.org/x/tools v0.0.0-20200825202427-b303f430e36d/go.mod h1:njjCfa9FT2d7l9Bc6FUM5FLjQPp3cFF28FI3qnDFljA= +golang.org/x/tools v0.0.0-20200904185747-39188db58858/go.mod h1:Cj7w3i3Rnn0Xh82ur9kSqwfTHTeVxaDqrfMjpcNT6bE= +golang.org/x/tools v0.0.0-20201110124207-079ba7bd75cd/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= +golang.org/x/tools v0.0.0-20201201161351-ac6f37ff4c2a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= +golang.org/x/tools v0.0.0-20201208233053-a543418bbed2/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= +golang.org/x/tools v0.0.0-20210105154028-b0ab187a4818/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= +golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= +golang.org/x/tools v0.1.0/go.mod h1:xkSsbof2nBLbhDlRMhhhyNLN/zl3eTqcnHD5viDpcZ0= +golang.org/x/tools v0.1.2/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= +golang.org/x/tools v0.1.3/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +google.golang.org/api v0.4.0/go.mod h1:8k5glujaEP+g9n7WNsDg8QP6cUVNI86fCNMcbazEtwE= +google.golang.org/api v0.7.0/go.mod h1:WtwebWUNSVBH/HAw79HIFXZNqEvBhG+Ra+ax0hx3E3M= +google.golang.org/api v0.8.0/go.mod h1:o4eAsZoiT+ibD93RtjEohWalFOjRDx6CVaqeizhEnKg= +google.golang.org/api v0.9.0/go.mod h1:o4eAsZoiT+ibD93RtjEohWalFOjRDx6CVaqeizhEnKg= +google.golang.org/api v0.13.0/go.mod h1:iLdEw5Ide6rF15KTC1Kkl0iskquN2gFfn9o9XIsbkAI= +google.golang.org/api v0.14.0/go.mod h1:iLdEw5Ide6rF15KTC1Kkl0iskquN2gFfn9o9XIsbkAI= +google.golang.org/api v0.15.0/go.mod h1:iLdEw5Ide6rF15KTC1Kkl0iskquN2gFfn9o9XIsbkAI= +google.golang.org/api v0.17.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE= +google.golang.org/api v0.18.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE= +google.golang.org/api v0.19.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE= +google.golang.org/api v0.20.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE= +google.golang.org/api v0.22.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE= +google.golang.org/api v0.24.0/go.mod h1:lIXQywCXRcnZPGlsd8NbLnOjtAoL6em04bJ9+z0MncE= +google.golang.org/api v0.28.0/go.mod h1:lIXQywCXRcnZPGlsd8NbLnOjtAoL6em04bJ9+z0MncE= +google.golang.org/api v0.29.0/go.mod h1:Lcubydp8VUV7KeIHD9z2Bys/sm/vGKnG1UHuDBSrHWM= +google.golang.org/api v0.30.0/go.mod h1:QGmEvQ87FHZNiUVJkT14jQNYJ4ZJjdRF23ZXz5138Fc= +google.golang.org/api v0.35.0/go.mod h1:/XrVsuzM0rZmrsbjJutiuftIzeuTQcEeaYcSk/mQ1dg= +google.golang.org/api v0.36.0/go.mod h1:+z5ficQTmoYpPn8LCUNVpK5I7hwkpjbcgqA7I34qYtE= +google.golang.org/api v0.40.0/go.mod h1:fYKFpnQN0DsDSKRVRcQSDQNtqWPfM9i+zNPxepjRCQ8= +google.golang.org/api v0.41.0/go.mod h1:RkxM5lITDfTzmyKFPt+wGrCJbVfniCr2ool8kTBzRTU= +google.golang.org/api v0.43.0/go.mod h1:nQsDGjRXMo4lvh5hP0TKqF244gqhGcr/YSIykhUk/94= +google.golang.org/api v0.44.0/go.mod h1:EBOGZqzyhtvMDoxwS97ctnh0zUmYY6CxqXsc1AvkYD8= +google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= +google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= +google.golang.org/appengine v1.5.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= +google.golang.org/appengine v1.6.1/go.mod h1:i06prIuMbXzDqacNJfV5OdTW448YApPu5ww/cMBSeb0= +google.golang.org/appengine v1.6.5/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc= +google.golang.org/appengine v1.6.6/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc= +google.golang.org/appengine v1.6.7/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc= +google.golang.org/appengine v1.6.8 h1:IhEN5q69dyKagZPYMSdIjS2HqprW324FRQZJcGqPAsM= +google.golang.org/genproto v0.0.0-20180518175338-11a468237815/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= +google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= +google.golang.org/genproto v0.0.0-20190307195333-5fe7a883aa19/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE= +google.golang.org/genproto v0.0.0-20190418145605-e7d98fc518a7/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE= +google.golang.org/genproto v0.0.0-20190425155659-357c62f0e4bb/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE= +google.golang.org/genproto v0.0.0-20190502173448-54afdca5d873/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE= +google.golang.org/genproto v0.0.0-20190801165951-fa694d86fc64/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc= +google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc= +google.golang.org/genproto v0.0.0-20190911173649-1774047e7e51/go.mod h1:IbNlFCBrqXvoKpeg0TB2l7cyZUmoaFKYIwrEpbDKLA8= +google.golang.org/genproto v0.0.0-20191108220845-16a3f7862a1a/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc= +google.golang.org/genproto v0.0.0-20191115194625-c23dd37a84c9/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc= +google.golang.org/genproto v0.0.0-20191216164720-4f79533eabd1/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc= +google.golang.org/genproto v0.0.0-20191230161307-f3c370f40bfb/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc= +google.golang.org/genproto v0.0.0-20200115191322-ca5a22157cba/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc= +google.golang.org/genproto v0.0.0-20200122232147-0452cf42e150/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc= +google.golang.org/genproto v0.0.0-20200204135345-fa8e72b47b90/go.mod h1:GmwEX6Z4W5gMy59cAlVYjN9JhxgbQH6Gn+gFDQe2lzA= +google.golang.org/genproto v0.0.0-20200212174721-66ed5ce911ce/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= +google.golang.org/genproto v0.0.0-20200224152610-e50cd9704f63/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= +google.golang.org/genproto v0.0.0-20200228133532-8c2c7df3a383/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= +google.golang.org/genproto v0.0.0-20200305110556-506484158171/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= +google.golang.org/genproto v0.0.0-20200312145019-da6875a35672/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= +google.golang.org/genproto v0.0.0-20200331122359-1ee6d9798940/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= +google.golang.org/genproto v0.0.0-20200423170343-7949de9c1215/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= +google.golang.org/genproto v0.0.0-20200430143042-b979b6f78d84/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= +google.golang.org/genproto v0.0.0-20200511104702-f5ebc3bea380/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= +google.golang.org/genproto v0.0.0-20200513103714-09dca8ec2884/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= +google.golang.org/genproto v0.0.0-20200515170657-fc4c6c6a6587/go.mod h1:YsZOwe1myG/8QRHRsmBRE1LrgQY60beZKjly0O1fX9U= +google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo= +google.golang.org/genproto v0.0.0-20200618031413-b414f8b61790/go.mod h1:jDfRM7FcilCzHH/e9qn6dsT145K34l5v+OpcnNgKAAA= +google.golang.org/genproto v0.0.0-20200729003335-053ba62fc06f/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= +google.golang.org/genproto v0.0.0-20200804131852-c06518451d9c/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= +google.golang.org/genproto v0.0.0-20200825200019-8632dd797987/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= +google.golang.org/genproto v0.0.0-20200904004341-0bd0a958aa1d/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= +google.golang.org/genproto v0.0.0-20201109203340-2640f1f9cdfb/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= +google.golang.org/genproto v0.0.0-20201201144952-b05cb90ed32e/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= +google.golang.org/genproto v0.0.0-20201210142538-e3217bee35cc/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= +google.golang.org/genproto v0.0.0-20201214200347-8c77b98c765d/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= +google.golang.org/genproto v0.0.0-20210222152913-aa3ee6e6a81c/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= +google.golang.org/genproto v0.0.0-20210303154014-9728d6b83eeb/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= +google.golang.org/genproto v0.0.0-20210310155132-4ce2db91004e/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= +google.golang.org/genproto v0.0.0-20210319143718-93e7006c17a6/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= +google.golang.org/genproto v0.0.0-20210402141018-6c239bbf2bb1/go.mod h1:9lPAdzaEmUacj36I+k7YKbEc5CXzPIeORRgDAUOu28A= +google.golang.org/genproto v0.0.0-20210602131652-f16073e35f0c/go.mod h1:UODoCrxHCcBojKKwX1terBiRUaqAsFqJiF615XL43r0= +google.golang.org/genproto v0.0.0-20210624195500-8bfb893ecb84/go.mod h1:SzzZ/N+nwJDaO1kznhnlzqS8ocJICar6hYhVyhi++24= +google.golang.org/genproto v0.0.0-20211118181313-81c1377c94b1/go.mod h1:5CzLGKJ67TSI2B9POpiiyGha0AjJvZIUgRMt1dSmuhc= +google.golang.org/genproto v0.0.0-20230526161137-0005af68ea54 h1:9NWlQfY2ePejTmfwUH1OWwmznFa+0kKcHGPDvcPza9M= +google.golang.org/genproto v0.0.0-20230526161137-0005af68ea54/go.mod h1:zqTuNwFlFRsw5zIts5VnzLQxSRqh+CGOTVMlYbY0Eyk= +google.golang.org/genproto/googleapis/api v0.0.0-20240318140521-94a12d6c2237 h1:RFiFrvy37/mpSpdySBDrUdipW/dHwsRwh3J3+A9VgT4= +google.golang.org/genproto/googleapis/api v0.0.0-20240318140521-94a12d6c2237/go.mod h1:Z5Iiy3jtmioajWHDGFk7CeugTyHtPvMHA4UTmUkyalE= +google.golang.org/genproto/googleapis/rpc v0.0.0-20240318140521-94a12d6c2237 h1:NnYq6UN9ReLM9/Y01KWNOWyI5xQ9kbIms5GGJVwS/Yc= +google.golang.org/genproto/googleapis/rpc v0.0.0-20240318140521-94a12d6c2237/go.mod h1:WtryC6hu0hhx87FDGxWCDptyssuo68sk10vYjF+T9fY= +google.golang.org/grpc v1.12.0/go.mod h1:yo6s7OP7yaDglbqo1J04qKzAhqBH6lvTonzMVmEdcZw= +google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= +google.golang.org/grpc v1.20.1/go.mod h1:10oTOabMzJvdu6/UiuZezV6QK5dSlG84ov/aaiqXj38= +google.golang.org/grpc v1.21.1/go.mod h1:oYelfM1adQP15Ek0mdvEgi9Df8B9CZIaU1084ijfRaM= +google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg= +google.golang.org/grpc v1.25.1/go.mod h1:c3i+UQWmh7LiEpx4sFZnkU36qjEYZ0imhYfXVyQciAY= +google.golang.org/grpc v1.26.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= +google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= +google.golang.org/grpc v1.27.1/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= +google.golang.org/grpc v1.28.0/go.mod h1:rpkK4SK4GF4Ach/+MFLZUBavHOvF2JJB5uozKKal+60= +google.golang.org/grpc v1.29.1/go.mod h1:itym6AZVZYACWQqET3MqgPpjcuV5QH3BxFS3IjizoKk= +google.golang.org/grpc v1.30.0/go.mod h1:N36X2cJ7JwdamYAgDz+s+rVMFjt3numwzf/HckM8pak= +google.golang.org/grpc v1.31.0/go.mod h1:N36X2cJ7JwdamYAgDz+s+rVMFjt3numwzf/HckM8pak= +google.golang.org/grpc v1.31.1/go.mod h1:N36X2cJ7JwdamYAgDz+s+rVMFjt3numwzf/HckM8pak= +google.golang.org/grpc v1.33.1/go.mod h1:fr5YgcSWrqhRRxogOsw7RzIpsmvOZ6IcH4kBYTpR3n0= +google.golang.org/grpc v1.33.2/go.mod h1:JMHMWHQWaTccqQQlmk3MJZS+GWXOdAesneDmEnv2fbc= +google.golang.org/grpc v1.34.0/go.mod h1:WotjhfgOW/POjDeRt8vscBtXq+2VjORFy659qA51WJ8= +google.golang.org/grpc v1.35.0/go.mod h1:qjiiYl8FncCW8feJPdyg3v6XW24KsRHe+dy9BAGRRjU= +google.golang.org/grpc v1.36.0/go.mod h1:qjiiYl8FncCW8feJPdyg3v6XW24KsRHe+dy9BAGRRjU= +google.golang.org/grpc v1.36.1/go.mod h1:qjiiYl8FncCW8feJPdyg3v6XW24KsRHe+dy9BAGRRjU= +google.golang.org/grpc v1.37.1/go.mod h1:NREThFqKR1f3iQ6oBuvc5LadQuXVGo9rkm5ZGrQdJfM= +google.golang.org/grpc v1.38.0/go.mod h1:NREThFqKR1f3iQ6oBuvc5LadQuXVGo9rkm5ZGrQdJfM= +google.golang.org/grpc v1.40.0/go.mod h1:ogyxbiOoUXAkP+4+xa6PZSE9DZgIHtSpzjDTB9KAK34= +google.golang.org/grpc v1.41.0/go.mod h1:U3l9uK9J0sini8mHphKoXyaqDA/8VyGnDee1zzIUK6k= +google.golang.org/grpc v1.42.0/go.mod h1:k+4IHHFw41K8+bbowsex27ge2rCb65oeWqe4jJ590SU= +google.golang.org/grpc v1.64.0 h1:KH3VH9y/MgNQg1dE7b3XfVK0GsPSIzJwdF617gUSbvY= +google.golang.org/grpc v1.64.0/go.mod h1:oxjF8E3FBnjp+/gVFYdWacaLDx9na1aqy9oovLpxQYg= +google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= +google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0= +google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM= +google.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miEFZTKqfCUM6K7xSMQL9OKL/b6hQv+e19PK+JZNE= +google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo= +google.golang.org/protobuf v1.22.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= +google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= +google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= +google.golang.org/protobuf v1.24.0/go.mod h1:r/3tXBNzIEhYS9I1OUVjXDlt8tc493IdKGjtUeSXeh4= +google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c= +google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= +google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= +google.golang.org/protobuf v1.27.1/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= +google.golang.org/protobuf v1.33.0 h1:uNO2rsAINq/JlFpSdYEKIZ0uKD/R9cpdv0T+yoGwGmI= +google.golang.org/protobuf v1.33.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos= +gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= +gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI= +gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys= +gopkg.in/go-playground/assert.v1 v1.2.1/go.mod h1:9RXL0bg/zibRAgZUYszZSwO/z8Y/a8bDuhia5mkpMnE= +gopkg.in/go-playground/validator.v8 v8.18.2/go.mod h1:RX2a/7Ha8BgOhfk7j780h4/u/RRjR0eouCJSH80/M2Y= +gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc= +gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw= +gopkg.in/ini.v1 v1.51.0/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k= +gopkg.in/ini.v1 v1.51.1/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k= +gopkg.in/ini.v1 v1.62.0 h1:duBzk771uxoUuOlyRLkHsygud9+5lrlGjdFBb4mSKDU= +gopkg.in/ini.v1 v1.62.0/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k= +gopkg.in/mgo.v2 v2.0.0-20180705113604-9856a29383ce/go.mod h1:yeKp02qBN3iKW1OzL3MGk2IdtZzaj7SFntXj72NppTA= +gopkg.in/natefinch/lumberjack.v2 v2.0.0 h1:1Lc07Kr7qY4U2YPouBjpCLxpiyxIVoxqXgkXLknAOE8= +gopkg.in/natefinch/lumberjack.v2 v2.0.0/go.mod h1:l0ndWWf7gzL7RNwBG7wST/UCcT4T24xpD6X8LsfU/+k= +gopkg.in/resty.v1 v1.12.0/go.mod h1:mDo4pnntr5jdWRML875a/NmxYqAlA73dVijT2AXvQQo= +gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw= +gopkg.in/yaml.v2 v2.0.0-20170812160011-eb3733d160e7/go.mod h1:JAlM8MvJe8wmxCU4Bli9HhUf9+ttbYbLASfIpnQbh74= +gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.2.3/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.2.5/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.3.0/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= +gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= +gopkg.in/yaml.v3 v3.0.0-20191120175047-4206685974f2/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= +honnef.co/go/tools v0.0.0-20190106161140-3f1c8253044a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= +honnef.co/go/tools v0.0.0-20190418001031-e561f6794a2a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= +honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= +honnef.co/go/tools v0.0.1-2019.2.3/go.mod h1:a3bituU0lyd329TUQxRnasdCoJDkEUEAqEt0JzvZhAg= +honnef.co/go/tools v0.0.1-2020.1.3/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9vFzvIQ3k= +honnef.co/go/tools v0.0.1-2020.1.4/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9vFzvIQ3k= +k8s.io/apimachinery v0.28.6 h1:RsTeR4z6S07srPg6XYrwXpTJVMXsjPXn0ODakMytSW0= +k8s.io/apimachinery v0.28.6/go.mod h1:QFNX/kCl/EMT2WTSz8k4WLCv2XnkOLMaL8GAVRMdpsA= +rsc.io/binaryregexp v0.2.0/go.mod h1:qTv7/COck+e2FymRvadv62gMdZztPaShugOCi3I+8D8= +rsc.io/quote/v3 v3.1.0/go.mod h1:yEA65RcK8LyAZtP9Kv3t0HmxON59tX3rD+tICJqUlj0= +rsc.io/sampler v1.3.0/go.mod h1:T1hPZKmBbMNahiBKFy5HrXp6adAjACjK9JXDnKaTXpA= +sigs.k8s.io/yaml v1.2.0/go.mod h1:yfXDCHCao9+ENCvLSE62v9VSji2MKu5jeNfTrofGhJc= +sigs.k8s.io/yaml v1.3.0 h1:a2VclLzOGrwOHDiV8EfBGhvjHvP46CtW5j6POvhYGGo= +sigs.k8s.io/yaml v1.3.0/go.mod h1:GeOyir5tyXNByN85N/dRIT9es5UQNerPYEKK56eTBm8= diff --git a/tests/go_client/testcases/client_test.go b/tests/go_client/testcases/client_test.go new file mode 100644 index 0000000000000..5c203dfa7f25c --- /dev/null +++ b/tests/go_client/testcases/client_test.go @@ -0,0 +1,92 @@ +///go:build L0 + +package testcases + +import ( + "strings" + "testing" + "time" + + "github.com/milvus-io/milvus/tests/go_client/testcases/helper" + + clientv2 "github.com/milvus-io/milvus/client/v2" + "github.com/milvus-io/milvus/tests/go_client/base" + "github.com/milvus-io/milvus/tests/go_client/common" +) + +// test connect and close, connect again +func TestConnectClose(t *testing.T) { + // connect + ctx := helper.CreateContext(t, time.Second*common.DefaultTimeout) + mc, errConnect := base.NewMilvusClient(ctx, &defaultCfg) + common.CheckErr(t, errConnect, true) + + // verify that connect success + listOpt := clientv2.NewListCollectionOption() + _, errList := mc.ListCollections(ctx, listOpt) + common.CheckErr(t, errList, true) + + // close connect and verify + err := mc.Close(ctx) + common.CheckErr(t, err, true) + _, errList2 := mc.ListCollections(ctx, listOpt) + common.CheckErr(t, errList2, false, "service not ready[SDK=0]: not connected") + + // connect again + mc, errConnect2 := base.NewMilvusClient(ctx, &defaultCfg) + common.CheckErr(t, errConnect2, true) + _, errList3 := mc.ListCollections(ctx, listOpt) + common.CheckErr(t, errList3, true) +} + +func genInvalidClientConfig() []clientv2.ClientConfig { + invalidClientConfigs := []clientv2.ClientConfig{ + {Address: "aaa"}, // not exist address + {Address: strings.Split(*addr, ":")[0]}, // Address=localhost + {Address: strings.Split(*addr, ":")[1]}, // Address=19530 + {Address: *addr, Username: "aaa"}, // not exist username + {Address: *addr, Username: "root", Password: "aaa"}, // wrong password + {Address: *addr, DBName: "aaa"}, // not exist db + } + return invalidClientConfigs +} + +// test connect with timeout and invalid addr +func TestConnectInvalidAddr(t *testing.T) { + // connect + ctx := helper.CreateContext(t, time.Second*5) + for _, invalidCfg := range genInvalidClientConfig() { + _, errConnect := base.NewMilvusClient(ctx, &invalidCfg) + common.CheckErr(t, errConnect, false, "context deadline exceeded") + } +} + +// test connect repeatedly +func TestConnectRepeat(t *testing.T) { + // connect + ctx := helper.CreateContext(t, time.Second*10) + + _, errConnect := base.NewMilvusClient(ctx, &defaultCfg) + common.CheckErr(t, errConnect, true) + + // connect again + mc, errConnect2 := base.NewMilvusClient(ctx, &defaultCfg) + common.CheckErr(t, errConnect2, true) + + _, err := mc.ListCollections(ctx, clientv2.NewListCollectionOption()) + common.CheckErr(t, err, true) +} + +// test close repeatedly +func TestCloseRepeat(t *testing.T) { + // connect + ctx := helper.CreateContext(t, time.Second*10) + mc, errConnect2 := base.NewMilvusClient(ctx, &defaultCfg) + common.CheckErr(t, errConnect2, true) + + // close and again + err := mc.Close(ctx) + common.CheckErr(t, err, true) + err = mc.Close(ctx) + common.CheckErr(t, err, true) +} diff --git a/tests/go_client/testcases/collection_test.go b/tests/go_client/testcases/collection_test.go new file mode 100644 index 0000000000000..d55a9cc9ab229 --- /dev/null +++ b/tests/go_client/testcases/collection_test.go @@ -0,0 +1,950 @@ +package testcases + +import ( + "fmt" + "testing" + "time" + + hp "github.com/milvus-io/milvus/tests/go_client/testcases/helper" + + "github.com/stretchr/testify/require" + + "github.com/milvus-io/milvus/pkg/log" + "go.uber.org/zap" + + clientv2 "github.com/milvus-io/milvus/client/v2" + "github.com/milvus-io/milvus/client/v2/entity" + "github.com/milvus-io/milvus/tests/go_client/common" +) + +var prefix = "collection" + +// test create default floatVec and binaryVec collection +func TestCreateCollection(t *testing.T) { + t.Parallel() + ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout) + mc := createDefaultMilvusClient(ctx, t) + for _, collectionFieldsType := range []hp.CollectionFieldsType{hp.Int64Vec, hp.VarcharBinary, hp.Int64VarcharSparseVec, hp.AllFields} { + fields := hp.FieldsFact.GenFieldsForCollection(collectionFieldsType, hp.TNewFieldsOption()) + schema := hp.GenSchema(hp.TNewSchemaOption().TWithFields(fields)) + err := mc.CreateCollection(ctx, clientv2.NewCreateCollectionOption(schema.CollectionName, schema)) + common.CheckErr(t, err, true) + + // has collections and verify + has, err := mc.HasCollection(ctx, clientv2.NewHasCollectionOption(schema.CollectionName)) + common.CheckErr(t, err, true) + require.True(t, has) + + // list collections and verify + collections, err := mc.ListCollections(ctx, clientv2.NewListCollectionOption()) + common.CheckErr(t, err, true) + require.Contains(t, collections, schema.CollectionName) + } +} + +//func TestCreateCollection(t *testing.T) {} +func TestCreateAutoIdCollectionField(t *testing.T) { + ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout) + mc := createDefaultMilvusClient(ctx, t) + + vecField := entity.NewField().WithName(common.DefaultFloatVecFieldName).WithDataType(entity.FieldTypeFloatVector).WithDim(common.DefaultDim) + int64Field := entity.NewField().WithName(common.DefaultInt64FieldName).WithDataType(entity.FieldTypeInt64).WithIsPrimaryKey(true).WithIsAutoID(true) + varcharField := entity.NewField().WithName(common.DefaultVarcharFieldName).WithDataType(entity.FieldTypeVarChar).WithIsPrimaryKey(true).WithIsAutoID(true).WithMaxLength(common.MaxLength) + for _, pkField := range []*entity.Field{int64Field, varcharField} { + // pk field with name + collName := common.GenRandomString(prefix, 6) + schema := entity.NewSchema().WithName(collName).WithField(pkField).WithField(vecField) + err := mc.CreateCollection(ctx, clientv2.NewCreateCollectionOption(collName, schema)) + common.CheckErr(t, err, true) + + // verify field name + coll, err := mc.DescribeCollection(ctx, clientv2.NewDescribeCollectionOption(collName)) + common.CheckErr(t, err, true) + require.True(t, coll.Schema.AutoID) + require.True(t, coll.Schema.Fields[0].AutoID) + + // insert + vecColumn := hp.GenColumnData(common.DefaultNb, vecField.DataType, *hp.TNewColumnOption()) + _, err = mc.Insert(ctx, clientv2.NewColumnBasedInsertOption(schema.CollectionName, vecColumn)) + common.CheckErr(t, err, true) + } +} + +// create collection and specify shard num +func TestCreateCollectionShards(t *testing.T) { + ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout) + mc := createDefaultMilvusClient(ctx, t) + + vecField := entity.NewField().WithName(common.DefaultFloatVecFieldName).WithDataType(entity.FieldTypeFloatVector).WithDim(common.DefaultDim) + int64Field := entity.NewField().WithName(common.DefaultInt64FieldName).WithDataType(entity.FieldTypeInt64).WithIsPrimaryKey(true).WithIsAutoID(true) + for _, shard := range []int32{-1, 0, 2, 16} { + // pk field with name + collName := common.GenRandomString(prefix, 6) + schema := entity.NewSchema().WithName(collName).WithField(int64Field).WithField(vecField) + err := mc.CreateCollection(ctx, clientv2.NewCreateCollectionOption(collName, schema).WithShardNum(shard)) + common.CheckErr(t, err, true) + + // verify field name + coll, err := mc.DescribeCollection(ctx, clientv2.NewDescribeCollectionOption(collName)) + common.CheckErr(t, err, true) + if shard < 1 { + shard = 1 + } + require.Equal(t, shard, coll.ShardNum) + } +} + +// test create auto collection with schema +func TestCreateAutoIdCollectionSchema(t *testing.T) { + t.Skip("waiting for valid AutoId from schema params") + ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout) + mc := createDefaultMilvusClient(ctx, t) + + collName := common.GenRandomString(prefix, 6) + vecField := entity.NewField().WithName(common.DefaultFloatVecFieldName).WithDataType(entity.FieldTypeFloatVector).WithDim(common.DefaultDim) + for _, pkFieldType := range []entity.FieldType{entity.FieldTypeVarChar, entity.FieldTypeInt64} { + pkField := entity.NewField().WithName("pk").WithDataType(pkFieldType).WithIsPrimaryKey(true).WithMaxLength(common.MaxLength) + + // pk field with name + schema := entity.NewSchema().WithName(collName).WithField(pkField).WithField(vecField).WithAutoID(true) + err := mc.CreateCollection(ctx, clientv2.NewCreateCollectionOption(collName, schema)) + common.CheckErr(t, err, true) + + // verify field name + coll, err := mc.DescribeCollection(ctx, clientv2.NewDescribeCollectionOption(collName)) + common.CheckErr(t, err, true) + log.Info("schema autoID", zap.Bool("schemaAuto", coll.Schema.AutoID)) + log.Info("field autoID", zap.Bool("fieldAuto", coll.Schema.Fields[0].AutoID)) + + // insert + vecColumn := hp.GenColumnData(common.DefaultNb, vecField.DataType, *hp.TNewColumnOption()) + _, err = mc.Insert(ctx, clientv2.NewColumnBasedInsertOption(schema.CollectionName, vecColumn)) + common.CheckErr(t, err, false, "field pk not passed") + } +} + +// test create auto collection with collection option +func TestCreateAutoIdCollection(t *testing.T) { + t.Skip("waiting for valid AutoId from collection option") + ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout) + mc := createDefaultMilvusClient(ctx, t) + + collName := common.GenRandomString(prefix, 6) + vecField := entity.NewField().WithName(common.DefaultFloatVecFieldName).WithDataType(entity.FieldTypeFloatVector).WithDim(common.DefaultDim) + for _, pkFieldType := range []entity.FieldType{entity.FieldTypeVarChar, entity.FieldTypeInt64} { + pkField := entity.NewField().WithName("pk").WithDataType(pkFieldType).WithIsPrimaryKey(true).WithMaxLength(common.MaxLength) + + // pk field with name + schema := entity.NewSchema().WithName(collName).WithField(pkField).WithField(vecField) + err := mc.CreateCollection(ctx, clientv2.NewCreateCollectionOption(collName, schema).WithAutoID(true)) + common.CheckErr(t, err, true) + + // verify field name + coll, err := mc.DescribeCollection(ctx, clientv2.NewDescribeCollectionOption(collName)) + common.CheckErr(t, err, true) + log.Info("schema autoID", zap.Bool("schemaAuto", coll.Schema.AutoID)) + log.Info("field autoID", zap.Bool("fieldAuto", coll.Schema.Fields[0].AutoID)) + + // insert + vecColumn := hp.GenColumnData(common.DefaultNb, vecField.DataType, *hp.TNewColumnOption()) + _, err = mc.Insert(ctx, clientv2.NewColumnBasedInsertOption(schema.CollectionName, vecColumn)) + common.CheckErr(t, err, false, "field pk not passed") + } +} + +func TestCreateJsonCollection(t *testing.T) { + ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout) + mc := createDefaultMilvusClient(ctx, t) + + collName := common.GenRandomString(prefix, 6) + vecField := entity.NewField().WithName(common.DefaultFloatVecFieldName).WithDataType(entity.FieldTypeFloatVector).WithDim(common.DefaultDim) + pkField := entity.NewField().WithName(common.DefaultVarcharFieldName).WithDataType(entity.FieldTypeVarChar).WithIsPrimaryKey(true).WithMaxLength(common.MaxLength) + jsonField := entity.NewField().WithName(common.DefaultJSONFieldName).WithDataType(entity.FieldTypeJSON) + + // pk field with name + schema := entity.NewSchema().WithName(collName).WithField(pkField).WithField(vecField).WithField(jsonField) + err := mc.CreateCollection(ctx, clientv2.NewCreateCollectionOption(collName, schema)) + common.CheckErr(t, err, true) + + // verify field name + has, err := mc.HasCollection(ctx, clientv2.NewHasCollectionOption(schema.CollectionName)) + common.CheckErr(t, err, true) + require.True(t, has) +} + +func TestCreateArrayCollections(t *testing.T) { + ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout) + mc := createDefaultMilvusClient(ctx, t) + + collName := common.GenRandomString(prefix, 6) + vecField := entity.NewField().WithName(common.DefaultFloatVecFieldName).WithDataType(entity.FieldTypeFloatVector).WithDim(common.DefaultDim) + pkField := entity.NewField().WithName(common.DefaultVarcharFieldName).WithDataType(entity.FieldTypeVarChar).WithIsPrimaryKey(true).WithMaxLength(common.MaxLength) + + schema := entity.NewSchema().WithName(collName).WithField(pkField).WithField(vecField) + + for _, eleType := range hp.GetAllArrayElementType() { + arrayField := entity.NewField().WithName(hp.GetFieldNameByElementType(eleType)).WithDataType(entity.FieldTypeArray).WithElementType(eleType).WithMaxCapacity(common.MaxCapacity) + if eleType == entity.FieldTypeVarChar { + arrayField.WithMaxLength(common.MaxLength) + } + schema.WithField(arrayField) + } + + // pk field with name + err := mc.CreateCollection(ctx, clientv2.NewCreateCollectionOption(collName, schema)) + common.CheckErr(t, err, true) + + // verify field name + has, err := mc.HasCollection(ctx, clientv2.NewHasCollectionOption(schema.CollectionName)) + common.CheckErr(t, err, true) + require.True(t, has) +} + +// test create collection with partition key not supported field type +func TestCreateCollectionPartitionKey(t *testing.T) { + ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout) + mc := createDefaultMilvusClient(ctx, t) + + int64Field := entity.NewField().WithName(common.DefaultInt64FieldName).WithDataType(entity.FieldTypeInt64).WithIsPrimaryKey(true) + vecField := entity.NewField().WithName(common.DefaultFloatVecFieldName).WithDataType(entity.FieldTypeFloatVector).WithDim(common.DefaultDim) + t.Parallel() + + for _, fieldType := range []entity.FieldType{entity.FieldTypeVarChar, entity.FieldTypeInt64} { + partitionKeyField := entity.NewField().WithName("par_key").WithDataType(fieldType).WithIsPartitionKey(true).WithMaxLength(common.TestMaxLen) + collName := common.GenRandomString(prefix, 6) + schema := entity.NewSchema().WithName(collName).WithField(int64Field).WithField(vecField).WithField(partitionKeyField) + + err := mc.CreateCollection(ctx, clientv2.NewCreateCollectionOption(collName, schema)) + common.CheckErr(t, err, true) + + coll, err := mc.DescribeCollection(ctx, clientv2.NewDescribeCollectionOption(collName)) + common.CheckErr(t, err, true) + + for _, field := range coll.Schema.Fields { + if field.Name == "par_key" { + require.True(t, field.IsPartitionKey) + } + } + + // verify partitions + partitions, err := mc.ListPartitions(ctx, clientv2.NewListPartitionOption(collName)) + require.Len(t, partitions, common.DefaultPartitionNum) + } +} + +// test create partition key collection WithPartitionNum +func TestCreateCollectionPartitionKeyNumPartition(t *testing.T) { + t.Skip("Waiting for WithPartitionNum") + ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout) + mc := createDefaultMilvusClient(ctx, t) + + int64Field := entity.NewField().WithName(common.DefaultInt64FieldName).WithDataType(entity.FieldTypeInt64).WithIsPrimaryKey(true) + vecField := entity.NewField().WithName(common.DefaultFloatVecFieldName).WithDataType(entity.FieldTypeFloatVector).WithDim(common.DefaultDim) + partitionKeyField := entity.NewField().WithName("par_key").WithDataType(entity.FieldTypeInt64).WithIsPartitionKey(true) + t.Parallel() + + for _, numPartition := range []int64{1, 128, 64, 4096} { + collName := common.GenRandomString(prefix, 6) + schema := entity.NewSchema().WithName(collName).WithField(int64Field).WithField(vecField).WithField(partitionKeyField) + + err := mc.CreateCollection(ctx, clientv2.NewCreateCollectionOption(collName, schema)) + common.CheckErr(t, err, true) + + // verify partitions num + partitions, err := mc.ListPartitions(ctx, clientv2.NewListPartitionOption(collName)) + require.Len(t, partitions, int(numPartition)) + } +} + +func TestCreateCollectionDynamicSchema(t *testing.T) { + ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout) + mc := createDefaultMilvusClient(ctx, t) + + collName := common.GenRandomString(prefix, 6) + vecField := entity.NewField().WithName(common.DefaultFloatVecFieldName).WithDataType(entity.FieldTypeFloatVector).WithDim(common.DefaultDim) + pkField := entity.NewField().WithName(common.DefaultVarcharFieldName).WithDataType(entity.FieldTypeVarChar).WithIsPrimaryKey(true).WithMaxLength(common.MaxLength) + + schema := entity.NewSchema().WithName(collName).WithField(pkField).WithField(vecField).WithDynamicFieldEnabled(true) + // pk field with name + err := mc.CreateCollection(ctx, clientv2.NewCreateCollectionOption(collName, schema)) + common.CheckErr(t, err, true) + + // verify field name + has, err := mc.HasCollection(ctx, clientv2.NewHasCollectionOption(schema.CollectionName)) + common.CheckErr(t, err, true) + require.True(t, has) + + coll, err := mc.DescribeCollection(ctx, clientv2.NewDescribeCollectionOption(schema.CollectionName)) + require.True(t, coll.Schema.EnableDynamicField) + + // insert dynamic + columnOption := *hp.TNewColumnOption() + varcharColumn := hp.GenColumnData(common.DefaultNb, entity.FieldTypeVarChar, columnOption) + vecColumn := hp.GenColumnData(common.DefaultNb, entity.FieldTypeFloatVector, columnOption) + dynamicData := hp.GenDynamicFieldData(0, common.DefaultNb) + _, err = mc.Insert(ctx, clientv2.NewColumnBasedInsertOption(schema.CollectionName, varcharColumn, vecColumn).WithColumns(dynamicData...)) + common.CheckErr(t, err, true) +} + +func TestCreateCollectionDynamic(t *testing.T) { + t.Skip("waiting for dynamicField alignment") + ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout) + mc := createDefaultMilvusClient(ctx, t) + + collName := common.GenRandomString(prefix, 6) + vecField := entity.NewField().WithName(common.DefaultFloatVecFieldName).WithDataType(entity.FieldTypeFloatVector).WithDim(common.DefaultDim) + pkField := entity.NewField().WithName(common.DefaultVarcharFieldName).WithDataType(entity.FieldTypeVarChar).WithIsPrimaryKey(true).WithMaxLength(common.MaxLength) + + schema := entity.NewSchema().WithName(collName).WithField(pkField).WithField(vecField) + // pk field with name + err := mc.CreateCollection(ctx, clientv2.NewCreateCollectionOption(collName, schema).WithDynamicSchema(true)) + common.CheckErr(t, err, true) + + // verify field name + has, err := mc.HasCollection(ctx, clientv2.NewHasCollectionOption(schema.CollectionName)) + common.CheckErr(t, err, true) + require.True(t, has) + + coll, err := mc.DescribeCollection(ctx, clientv2.NewDescribeCollectionOption(schema.CollectionName)) + log.Info("collection dynamic", zap.Bool("collectionSchema", coll.Schema.EnableDynamicField)) + //require.True(t, coll.Schema.Fields[0].IsDynamic) + + // insert dynamic + columnOption := *hp.TNewColumnOption() + varcharColumn := hp.GenColumnData(common.DefaultNb, entity.FieldTypeVarChar, columnOption) + vecColumn := hp.GenColumnData(common.DefaultNb, entity.FieldTypeFloatVector, columnOption) + dynamicData := hp.GenDynamicFieldData(0, common.DefaultNb) + _, err = mc.Insert(ctx, clientv2.NewColumnBasedInsertOption(schema.CollectionName, varcharColumn, vecColumn).WithColumns(dynamicData...)) + common.CheckErr(t, err, false, "field dynamicNumber does not exist") +} + +func TestCreateCollectionAllFields(t *testing.T) { + ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout) + mc := createDefaultMilvusClient(ctx, t) + + collName := common.GenRandomString(prefix, 6) + schema := entity.NewSchema().WithName(collName) + + // gen all fields except sparse vector + fields := hp.FieldsFactory{}.GenFieldsForCollection(hp.AllFields, hp.TNewFieldsOption()) + for _, field := range fields { + schema.WithField(field) + } + + // pk field with name + err := mc.CreateCollection(ctx, clientv2.NewCreateCollectionOption(collName, schema)) + common.CheckErr(t, err, true) + + // verify field name + has, err := mc.HasCollection(ctx, clientv2.NewHasCollectionOption(schema.CollectionName)) + common.CheckErr(t, err, true) + require.True(t, has) +} + +func TestCreateCollectionSparseVector(t *testing.T) { + ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout) + mc := createDefaultMilvusClient(ctx, t) + + collName := common.GenRandomString(prefix, 6) + sparseVecField := entity.NewField().WithName(common.DefaultSparseVecFieldName).WithDataType(entity.FieldTypeSparseVector) + pkField := entity.NewField().WithName(common.DefaultVarcharFieldName).WithDataType(entity.FieldTypeVarChar).WithIsPrimaryKey(true).WithMaxLength(common.MaxLength) + + schema := entity.NewSchema().WithName(collName).WithField(pkField).WithField(sparseVecField) + // pk field with name + err := mc.CreateCollection(ctx, clientv2.NewCreateCollectionOption(collName, schema).WithDynamicSchema(true)) + common.CheckErr(t, err, true) + + // verify field name + has, err := mc.HasCollection(ctx, clientv2.NewHasCollectionOption(schema.CollectionName)) + common.CheckErr(t, err, true) + require.True(t, has) +} + +func TestCreateCollectionWithValidFieldName(t *testing.T) { + t.Parallel() + // connect + ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout) + mc := createDefaultMilvusClient(ctx, t) + + // create collection with valid field name + for _, name := range common.GenValidNames() { + collName := common.GenRandomString(prefix, 6) + + // pk field with name + pkField := entity.NewField().WithName(name).WithDataType(entity.FieldTypeInt64).WithIsPrimaryKey(true) + vecField := entity.NewField().WithName(common.DefaultFloatVecFieldName).WithDataType(entity.FieldTypeFloatVector).WithDim(common.DefaultDim) + schema := entity.NewSchema().WithName(collName).WithField(pkField).WithField(vecField) + err := mc.CreateCollection(ctx, clientv2.NewCreateCollectionOption(collName, schema)) + common.CheckErr(t, err, true) + + // verify field name + coll, err := mc.DescribeCollection(ctx, clientv2.NewDescribeCollectionOption(collName)) + common.CheckErr(t, err, true) + require.Equal(t, name, coll.Schema.Fields[0].Name) + } +} + +func genDefaultSchema() *entity.Schema { + int64Pk := entity.NewField().WithName(common.DefaultInt64FieldName).WithDataType(entity.FieldTypeInt64).WithIsPrimaryKey(true) + varchar := entity.NewField().WithName(common.DefaultVarcharFieldName).WithDataType(entity.FieldTypeVarChar).WithMaxLength(common.TestMaxLen) + floatVec := entity.NewField().WithName(common.DefaultFloatVecFieldName).WithDataType(entity.FieldTypeFloatVector).WithDim(common.DefaultDim) + binaryVec := entity.NewField().WithName(common.DefaultBinaryVecFieldName).WithDataType(entity.FieldTypeBinaryVector).WithDim(common.DefaultDim) + + schema := entity.NewSchema().WithField(int64Pk).WithField(varchar).WithField(floatVec).WithField(binaryVec) + return schema +} + +// create collection with valid name +func TestCreateCollectionWithValidName(t *testing.T) { + t.Parallel() + // connect + ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout) + mc := createDefaultMilvusClient(ctx, t) + + for _, name := range common.GenValidNames() { + schema := genDefaultSchema().WithName(name) + err := mc.CreateCollection(ctx, clientv2.NewCreateCollectionOption(name, schema)) + common.CheckErr(t, err, true) + + collections, err := mc.ListCollections(ctx, clientv2.NewListCollectionOption()) + common.CheckErr(t, err, true) + require.Contains(t, collections, name) + + err = mc.DropCollection(ctx, clientv2.NewDropCollectionOption(name)) + common.CheckErr(t, err, true) + } +} + +// create collection with invalid field name +func TestCreateCollectionWithInvalidFieldName(t *testing.T) { + t.Parallel() + // connect + ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout) + mc := createDefaultMilvusClient(ctx, t) + + // create collection with invalid field name + for _, invalidName := range common.GenInvalidNames() { + log.Debug("TestCreateCollectionWithInvalidFieldName", zap.String("fieldName", invalidName)) + pkField := entity.NewField().WithName(invalidName).WithDataType(entity.FieldTypeInt64).WithIsPrimaryKey(true) + schema := entity.NewSchema().WithName("aaa").WithField(pkField) + collOpt := clientv2.NewCreateCollectionOption("aaa", schema) + + err := mc.CreateCollection(ctx, collOpt) + common.CheckErr(t, err, false, "field name should not be empty", + "The first character of a field name must be an underscore or letter", + "Field name cannot only contain numbers, letters, and underscores", + "The length of a field name must be less than 255 characters") + } +} + +// create collection with invalid collection name: invalid str, schemaName isn't equal to collectionName, schema name is empty +func TestCreateCollectionWithInvalidCollectionName(t *testing.T) { + t.Parallel() + // connect + ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout) + mc := createDefaultMilvusClient(ctx, t) + collName := common.GenRandomString(prefix, 6) + + // create collection and schema no name + schema := genDefaultSchema() + err2 := mc.CreateCollection(ctx, clientv2.NewCreateCollectionOption(collName, schema)) + common.CheckErr(t, err2, false, "collection name should not be empty") + + // create collection with invalid schema name + for _, invalidName := range common.GenInvalidNames() { + log.Debug("TestCreateCollectionWithInvalidCollectionName", zap.String("collectionName", invalidName)) + + // schema has invalid name + schema.WithName(invalidName) + err := mc.CreateCollection(ctx, clientv2.NewCreateCollectionOption(collName, schema)) + common.CheckErr(t, err, false, "collection name should not be empty", + "the first character of a collection name must be an underscore or letter", + "collection name can only contain numbers, letters and underscores", + "the length of a collection name must be less than 255 characters") + + // collection option has invalid name + schema.WithName(collName) + err2 := mc.CreateCollection(ctx, clientv2.NewCreateCollectionOption(invalidName, schema)) + common.CheckErr(t, err2, false, "collection name matches schema name") + } + + // collection name not equal to schema name + schema.WithName(collName) + err3 := mc.CreateCollection(ctx, clientv2.NewCreateCollectionOption(common.GenRandomString("pre", 4), schema)) + common.CheckErr(t, err3, false, "collection name matches schema name") +} + +// create collection missing pk field or vector field +func TestCreateCollectionInvalidFields(t *testing.T) { + ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout) + mc := createDefaultMilvusClient(ctx, t) + + type invalidFieldsStruct struct { + fields []*entity.Field + errMsg string + } + pkField := entity.NewField().WithName(common.DefaultInt64FieldName).WithDataType(entity.FieldTypeInt64).WithIsPrimaryKey(true) + pkField2 := entity.NewField().WithName("pk").WithDataType(entity.FieldTypeInt64).WithIsPrimaryKey(true) + varcharField := entity.NewField().WithName(common.DefaultVarcharFieldName).WithDataType(entity.FieldTypeVarChar) + stringField := entity.NewField().WithName("str").WithDataType(entity.FieldTypeString) + vecField := entity.NewField().WithName(common.DefaultFloatVecFieldName).WithDataType(entity.FieldTypeFloatVector).WithDim(common.DefaultDim) + noneField := entity.NewField().WithName("none").WithDataType(entity.FieldTypeNone) + invalidFields := []invalidFieldsStruct{ + // TODO https://github.com/milvus-io/milvus/issues/33199 + //{fields: []*entity.Field{pkField}, errMsg: "vector field not set"}, + {fields: []*entity.Field{vecField}, errMsg: "primary key is not specified"}, + {fields: []*entity.Field{pkField, pkField2, vecField}, errMsg: "there are more than one primary key"}, + {fields: []*entity.Field{pkField, vecField, noneField}, errMsg: "data type None is not valid"}, + {fields: []*entity.Field{pkField, vecField, stringField}, errMsg: "string data type not supported yet, please use VarChar type instead"}, + {fields: []*entity.Field{pkField, vecField, varcharField}, errMsg: "type param(max_length) should be specified for varChar field"}, + } + + collName := common.GenRandomString(prefix, 6) + for _, invalidField := range invalidFields { + schema := entity.NewSchema().WithName(collName) + for _, field := range invalidField.fields { + schema.WithField(field) + } + collOpt := clientv2.NewCreateCollectionOption(collName, schema) + err := mc.CreateCollection(ctx, collOpt) + common.CheckErr(t, err, false, invalidField.errMsg) + } +} + +// create autoID or not collection with non-int64 and non-varchar field +func TestCreateCollectionInvalidAutoPkField(t *testing.T) { + ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout) + mc := createDefaultMilvusClient(ctx, t) + t.Parallel() + // create collection with autoID true or not + collName := common.GenRandomString(prefix, 6) + + for _, autoId := range []bool{true, false} { + vecField := entity.NewField().WithName(common.DefaultFloatVecFieldName).WithDataType(entity.FieldTypeFloatVector).WithDim(common.DefaultDim) + // pk field type: non-int64 and non-varchar + for _, fieldType := range hp.GetInvalidPkFieldType() { + invalidPkField := entity.NewField().WithName("pk").WithDataType(fieldType).WithIsPrimaryKey(true) + schema := entity.NewSchema().WithName(collName).WithField(vecField).WithField(invalidPkField).WithAutoID(autoId) + errNonInt64Field := mc.CreateCollection(ctx, clientv2.NewCreateCollectionOption(collName, schema)) + common.CheckErr(t, errNonInt64Field, false, "the data type of primary key should be Int64 or VarChar") + } + } +} + +// test create collection with duplicate field name +func TestCreateCollectionDuplicateField(t *testing.T) { + ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout) + mc := createDefaultMilvusClient(ctx, t) + + // duplicate field + pkField := entity.NewField().WithName("id").WithDataType(entity.FieldTypeVarChar).WithIsPrimaryKey(true) + pkField2 := entity.NewField().WithName("id").WithDataType(entity.FieldTypeVarChar) + vecField := entity.NewField().WithName(common.DefaultFloatVecFieldName).WithDataType(entity.FieldTypeFloatVector).WithDim(common.DefaultDim) + + // two vector fields have same name + collName := common.GenRandomString(prefix, 6) + schema := entity.NewSchema().WithName(collName).WithField(pkField).WithField(vecField).WithField(vecField) + errDupField := mc.CreateCollection(ctx, clientv2.NewCreateCollectionOption(collName, schema)) + common.CheckErr(t, errDupField, false, "duplicated field name") + + // two named "id" fields, one is pk field and other is scalar field + schema2 := entity.NewSchema().WithName(collName).WithField(pkField).WithField(pkField2).WithField(vecField).WithAutoID(true) + errDupField2 := mc.CreateCollection(ctx, clientv2.NewCreateCollectionOption(collName, schema2)) + common.CheckErr(t, errDupField2, false, "duplicated field name") +} + +// test create collection with partition key not supported field type +func TestCreateCollectionInvalidPartitionKeyType(t *testing.T) { + ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout) + mc := createDefaultMilvusClient(ctx, t) + + int64Field := entity.NewField().WithName(common.DefaultInt64FieldName).WithDataType(entity.FieldTypeInt64).WithIsPrimaryKey(true) + vecField := entity.NewField().WithName(common.DefaultFloatVecFieldName).WithDataType(entity.FieldTypeFloatVector).WithDim(common.DefaultDim) + collName := common.GenRandomString(prefix, 6) + + t.Parallel() + for _, fieldType := range hp.GetInvalidPartitionKeyFieldType() { + log.Debug("TestCreateCollectionInvalidPartitionKeyType", zap.Any("partitionKeyFieldType", fieldType)) + partitionKeyField := entity.NewField().WithName("parKey").WithDataType(fieldType).WithIsPartitionKey(true) + if fieldType == entity.FieldTypeArray { + partitionKeyField.WithElementType(entity.FieldTypeInt64) + } + schema := entity.NewSchema().WithName(collName).WithField(int64Field).WithField(vecField).WithField(partitionKeyField) + err := mc.CreateCollection(ctx, clientv2.NewCreateCollectionOption(collName, schema)) + common.CheckErr(t, err, false, "the data type of partition key should be Int64 or VarChar") + } +} + +// partition key field cannot be primary field, d can only be one partition key field +func TestCreateCollectionPartitionKeyPk(t *testing.T) { + ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout) + mc := createDefaultMilvusClient(ctx, t) + + int64Field := entity.NewField().WithName(common.DefaultInt64FieldName).WithDataType(entity.FieldTypeInt64).WithIsPrimaryKey(true).WithIsPartitionKey(true) + vecField := entity.NewField().WithName(common.DefaultFloatVecFieldName).WithDataType(entity.FieldTypeFloatVector).WithDim(common.DefaultDim) + collName := common.GenRandomString(prefix, 6) + + schema := entity.NewSchema().WithName(collName).WithField(int64Field).WithField(vecField) + err := mc.CreateCollection(ctx, clientv2.NewCreateCollectionOption(collName, schema)) + common.CheckErr(t, err, false, "the partition key field must not be primary field") +} + +// can only be one partition key field +func TestCreateCollectionPartitionKeyNum(t *testing.T) { + ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout) + mc := createDefaultMilvusClient(ctx, t) + + int64Field := entity.NewField().WithName(common.DefaultInt64FieldName).WithDataType(entity.FieldTypeInt64).WithIsPrimaryKey(true) + vecField := entity.NewField().WithName(common.DefaultFloatVecFieldName).WithDataType(entity.FieldTypeFloatVector).WithDim(common.DefaultDim) + collName := common.GenRandomString(prefix, 6) + + pkField1 := entity.NewField().WithName("pk_1").WithDataType(entity.FieldTypeInt64).WithIsPartitionKey(true) + pkField2 := entity.NewField().WithName("pk_2").WithDataType(entity.FieldTypeVarChar).WithMaxLength(common.TestMaxLen).WithIsPartitionKey(true) + + schema := entity.NewSchema().WithName(collName).WithField(int64Field).WithField(vecField).WithField(pkField1).WithField(pkField2) + err := mc.CreateCollection(ctx, clientv2.NewCreateCollectionOption(collName, schema)) + common.CheckErr(t, err, false, "there are more than one partition key") +} + +func TestPartitionKeyInvalidNumPartition(t *testing.T) { + t.Skip("Waiting for num partition") + ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout) + mc := createDefaultMilvusClient(ctx, t) + + // prepare field and schema + int64Field := entity.NewField().WithName(common.DefaultInt64FieldName).WithDataType(entity.FieldTypeInt64).WithIsPrimaryKey(true) + vecField := entity.NewField().WithName(common.DefaultFloatVecFieldName).WithDataType(entity.FieldTypeFloatVector).WithDim(common.DefaultDim) + pkField1 := entity.NewField().WithName("partitionKeyField").WithDataType(entity.FieldTypeInt64).WithIsPartitionKey(true) + + // schema + collName := common.GenRandomString(prefix, 6) + schema := entity.NewSchema().WithName(collName).WithField(int64Field).WithField(vecField).WithField(pkField1) + invalidNumPartitionStruct := []struct { + numPartitions int64 + errMsg string + }{ + {common.MaxPartitionNum + 1, "exceeds max configuration (4096)"}, + {-1, "the specified partitions should be greater than 0 if partition key is used"}, + } + for _, npStruct := range invalidNumPartitionStruct { + + // create collection with num partitions + err := mc.CreateCollection(ctx, clientv2.NewCreateCollectionOption(collName, schema)) + common.CheckErr(t, err, false, npStruct.errMsg) + } +} + +// test create collection with multi auto id +func TestCreateCollectionMultiAutoId(t *testing.T) { + ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout) + mc := createDefaultMilvusClient(ctx, t) + + collName := common.GenRandomString(prefix, 6) + schema := entity.NewSchema().WithField( + entity.NewField().WithName(common.DefaultInt64FieldName).WithDataType(entity.FieldTypeInt64).WithIsPrimaryKey(true).WithIsAutoID(true)).WithField( + entity.NewField().WithName("dupInt").WithDataType(entity.FieldTypeInt64).WithIsAutoID(true)).WithField( + entity.NewField().WithName(common.DefaultFloatVecFieldName).WithDataType(entity.FieldTypeFloatVector).WithDim(common.DefaultDim), + ).WithName(collName) + errMultiAuto := mc.CreateCollection(ctx, clientv2.NewCreateCollectionOption(collName, schema)) + common.CheckErr(t, errMultiAuto, false, "only one field can speficy AutoID with true") +} + +// test create collection with different autoId between pk field and schema +func TestCreateCollectionInconsistentAutoId(t *testing.T) { + ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout) + mc := createDefaultMilvusClient(ctx, t) + + for _, autoId := range []bool{true, false} { + log.Debug("TestCreateCollectionInconsistentAutoId", zap.Bool("autoId", autoId)) + collName := common.GenRandomString(prefix, 6) + // field and schema have opposite autoID + schema := entity.NewSchema().WithField( + entity.NewField().WithName(common.DefaultInt64FieldName).WithDataType(entity.FieldTypeInt64).WithIsPrimaryKey(true).WithIsAutoID(autoId)).WithField( + entity.NewField().WithName(common.DefaultFloatVecFieldName).WithDataType(entity.FieldTypeFloatVector).WithDim(common.DefaultDim), + ).WithName(collName).WithAutoID(!autoId) + + // create collection + err := mc.CreateCollection(ctx, clientv2.NewCreateCollectionOption(collName, schema)) + common.CheckErr(t, err, true) + + // describe collection + coll, err := mc.DescribeCollection(ctx, clientv2.NewDescribeCollectionOption(collName)) + common.CheckErr(t, err, true) + require.EqualValues(t, autoId, coll.Schema.AutoID) + for _, field := range coll.Schema.Fields { + if field.Name == common.DefaultInt64FieldName { + require.EqualValues(t, autoId, coll.Schema.Fields[0].AutoID) + } + } + } +} + +// create collection with field or schema description +func TestCreateCollectionDescription(t *testing.T) { + ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout) + mc := createDefaultMilvusClient(ctx, t) + + // gen field with description + pkDesc := "This is pk field" + schemaDesc := "This is schema" + collName := common.GenRandomString(prefix, 6) + + pkField := entity.NewField().WithName(common.DefaultInt64FieldName).WithDataType(entity.FieldTypeInt64).WithIsPrimaryKey(true).WithDescription(pkDesc) + vecField := entity.NewField().WithName(common.DefaultFloatVecFieldName).WithDataType(entity.FieldTypeFloatVector).WithDim(common.DefaultDim) + schema := entity.NewSchema().WithName(collName).WithField(pkField).WithField(vecField).WithDescription(schemaDesc) + + err := mc.CreateCollection(ctx, clientv2.NewCreateCollectionOption(collName, schema)) + common.CheckErr(t, err, true) + + coll, err := mc.DescribeCollection(ctx, clientv2.NewDescribeCollectionOption(collName)) + common.CheckErr(t, err, true) + require.EqualValues(t, schemaDesc, coll.Schema.Description) + for _, field := range coll.Schema.Fields { + if field.Name == common.DefaultInt64FieldName { + require.Equal(t, pkDesc, field.Description) + } else { + require.Empty(t, field.Description) + } + } +} + +// test invalid dim of binary field +func TestCreateBinaryCollectionInvalidDim(t *testing.T) { + t.Parallel() + type invalidDimStruct struct { + dim int64 + errMsg string + } + + invalidDims := []invalidDimStruct{ + {dim: 10, errMsg: "should be multiple of 8"}, + {dim: 0, errMsg: "should be in range 2 ~ 32768"}, + {dim: 1, errMsg: "should be in range 2 ~ 32768"}, + {dim: common.MaxDim * 9, errMsg: "binary vector dimension should be in range 2 ~ 262144"}, + {dim: common.MaxDim*8 + 1, errMsg: "binary vector dimension should be multiple of 8"}, + } + + ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout) + mc := createDefaultMilvusClient(ctx, t) + + for _, invalidDim := range invalidDims { + log.Debug("TestCreateBinaryCollectionInvalidDim", zap.Int64("dim", invalidDim.dim)) + collName := common.GenRandomString(prefix, 6) + // field and schema have opposite autoID + schema := entity.NewSchema().WithField( + entity.NewField().WithName(common.DefaultInt64FieldName).WithDataType(entity.FieldTypeInt64).WithIsPrimaryKey(true)).WithField( + entity.NewField().WithName(common.DefaultBinaryVecFieldName).WithDataType(entity.FieldTypeBinaryVector).WithDim(invalidDim.dim), + ).WithName(collName) + + // create collection + err := mc.CreateCollection(ctx, clientv2.NewCreateCollectionOption(collName, schema)) + common.CheckErr(t, err, false, invalidDim.errMsg) + } +} + +// test invalid dim of float vector +func TestCreateFloatCollectionInvalidDim(t *testing.T) { + t.Parallel() + type invalidDimStruct struct { + dim string + errMsg string + } + + invalidDims := []invalidDimStruct{ + {dim: "0", errMsg: "should be in range 2 ~ 32768"}, + {dim: "1", errMsg: "should be in range 2 ~ 32768"}, + {dim: "", errMsg: "invalid syntax"}, + {dim: "中文", errMsg: "invalid syntax"}, + {dim: "%$#", errMsg: "invalid syntax"}, + {dim: fmt.Sprintf("%d", common.MaxDim+1), errMsg: "float vector dimension should be in range 2 ~ 32768"}, + } + + ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout) + mc := createDefaultMilvusClient(ctx, t) + + for _, vecType := range []entity.FieldType{entity.FieldTypeFloatVector, entity.FieldTypeFloat16Vector, entity.FieldTypeBFloat16Vector} { + for _, invalidDim := range invalidDims { + log.Debug("TestCreateBinaryCollectionInvalidDim", zap.String("dim", invalidDim.dim)) + collName := common.GenRandomString(prefix, 6) + + schema := entity.NewSchema().WithField( + entity.NewField().WithName(common.DefaultInt64FieldName).WithDataType(entity.FieldTypeInt64).WithIsPrimaryKey(true)).WithField( + entity.NewField().WithName("pk").WithDataType(vecType).WithTypeParams(entity.TypeParamDim, invalidDim.dim), + ).WithName(collName) + + // create collection + err := mc.CreateCollection(ctx, clientv2.NewCreateCollectionOption(collName, schema)) + common.CheckErr(t, err, false, invalidDim.errMsg) + } + } +} + +func TestCreateVectorWithoutDim(t *testing.T) { + ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout) + mc := createDefaultMilvusClient(ctx, t) + collName := common.GenRandomString(prefix, 6) + + schema := entity.NewSchema().WithField( + entity.NewField().WithName(common.DefaultInt64FieldName).WithDataType(entity.FieldTypeInt64).WithIsPrimaryKey(true)).WithField( + entity.NewField().WithName("vec").WithDataType(entity.FieldTypeFloatVector), + ).WithName(collName) + err := mc.CreateCollection(ctx, clientv2.NewCreateCollectionOption(collName, schema)) + common.CheckErr(t, err, false, "dimension is not defined in field type params, check type param `dim` for vector field") +} + +// specify dim for sparse vector -> error +func TestCreateCollectionSparseVectorWithDim(t *testing.T) { + ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout) + mc := createDefaultMilvusClient(ctx, t) + collName := common.GenRandomString(prefix, 6) + + schema := entity.NewSchema().WithField( + entity.NewField().WithName(common.DefaultInt64FieldName).WithDataType(entity.FieldTypeInt64).WithIsPrimaryKey(true)).WithField( + entity.NewField().WithName("sparse").WithDataType(entity.FieldTypeSparseVector).WithDim(common.DefaultDim), + ).WithName(collName) + + // create collection + err := mc.CreateCollection(ctx, clientv2.NewCreateCollectionOption(collName, schema)) + common.CheckErr(t, err, false, "dim should not be specified for sparse vector field sparse") +} + +func TestCreateArrayFieldInvalidCapacity(t *testing.T) { + ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout) + mc := createDefaultMilvusClient(ctx, t) + + collName := common.GenRandomString(prefix, 6) + + pkField := entity.NewField().WithName(common.DefaultInt64FieldName).WithDataType(entity.FieldTypeInt64).WithIsPrimaryKey(true) + vecField := entity.NewField().WithName(common.DefaultFloatVecFieldName).WithDataType(entity.FieldTypeFloatVector).WithDim(common.DefaultDim) + arrayField := entity.NewField().WithName(common.DefaultArrayFieldName).WithDataType(entity.FieldTypeArray).WithElementType(entity.FieldTypeFloat) + schema := entity.NewSchema().WithName(collName).WithField(pkField).WithField(vecField).WithField(arrayField) + + // create collection + err := mc.CreateCollection(ctx, clientv2.NewCreateCollectionOption(collName, schema)) + common.CheckErr(t, err, false, "type param(max_capacity) should be specified for array field") + + // invalid Capacity + for _, invalidCapacity := range []int64{-1, 0, common.MaxCapacity + 1} { + arrayField.WithMaxCapacity(invalidCapacity) + err := mc.CreateCollection(ctx, clientv2.NewCreateCollectionOption(collName, schema)) + common.CheckErr(t, err, false, "the maximum capacity specified for a Array should be in (0, 4096]") + } +} + +// test create collection varchar array with invalid max length +func TestCreateVarcharArrayInvalidLength(t *testing.T) { + ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout) + mc := createDefaultMilvusClient(ctx, t) + + collName := common.GenRandomString(prefix, 6) + + pkField := entity.NewField().WithName(common.DefaultInt64FieldName).WithDataType(entity.FieldTypeInt64).WithIsPrimaryKey(true) + vecField := entity.NewField().WithName(common.DefaultFloatVecFieldName).WithDataType(entity.FieldTypeFloatVector).WithDim(common.DefaultDim) + arrayVarcharField := entity.NewField().WithName(common.DefaultArrayFieldName).WithDataType(entity.FieldTypeArray).WithElementType(entity.FieldTypeVarChar).WithMaxCapacity(100) + schema := entity.NewSchema().WithName(collName).WithField(pkField).WithField(vecField).WithField(arrayVarcharField) + + // create collection + err := mc.CreateCollection(ctx, clientv2.NewCreateCollectionOption(collName, schema)) + common.CheckErr(t, err, false, "type param(max_length) should be specified for varChar field") + + // invalid Capacity + for _, invalidLength := range []int64{-1, 0, common.MaxLength + 1} { + arrayVarcharField.WithMaxLength(invalidLength) + err := mc.CreateCollection(ctx, clientv2.NewCreateCollectionOption(collName, schema)) + common.CheckErr(t, err, false, "the maximum length specified for a VarChar should be in (0, 65535]") + } +} + +// test create collection varchar array with invalid max length +func TestCreateVarcharInvalidLength(t *testing.T) { + ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout) + mc := createDefaultMilvusClient(ctx, t) + + collName := common.GenRandomString(prefix, 6) + + varcharField := entity.NewField().WithName(common.DefaultVarcharFieldName).WithDataType(entity.FieldTypeVarChar).WithIsPrimaryKey(true) + vecField := entity.NewField().WithName(common.DefaultFloatVecFieldName).WithDataType(entity.FieldTypeFloatVector).WithDim(common.DefaultDim) + + schema := entity.NewSchema().WithName(collName).WithField(varcharField).WithField(vecField) + // create collection + err := mc.CreateCollection(ctx, clientv2.NewCreateCollectionOption(collName, schema)) + common.CheckErr(t, err, false, "type param(max_length) should be specified for varChar field") + + // invalid Capacity + for _, invalidLength := range []int64{-1, 0, common.MaxLength + 1} { + varcharField.WithMaxLength(invalidLength) + err := mc.CreateCollection(ctx, clientv2.NewCreateCollectionOption(collName, schema)) + common.CheckErr(t, err, false, "the maximum length specified for a VarChar should be in (0, 65535]") + } +} + +func TestCreateArrayNotSupportedFieldType(t *testing.T) { + ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout) + mc := createDefaultMilvusClient(ctx, t) + + collName := common.GenRandomString(prefix, 6) + // not supported ElementType: Array, Json, FloatVector, BinaryVector + pkField := entity.NewField().WithName(common.DefaultInt64FieldName).WithDataType(entity.FieldTypeInt64).WithIsPrimaryKey(true) + vecField := entity.NewField().WithName(common.DefaultFloatVecFieldName).WithDataType(entity.FieldTypeFloatVector).WithDim(common.DefaultDim) + for _, fieldType := range []entity.FieldType{entity.FieldTypeArray, entity.FieldTypeJSON, entity.FieldTypeBinaryVector, entity.FieldTypeFloatVector} { + field := entity.NewField().WithName("array").WithDataType(entity.FieldTypeArray).WithElementType(fieldType) + schema := entity.NewSchema().WithName(collName).WithField(pkField).WithField(vecField).WithField(field) + err := mc.CreateCollection(ctx, clientv2.NewCreateCollectionOption(collName, schema)) + common.CheckErr(t, err, false, fmt.Sprintf("element type %s is not supported", fieldType.Name())) + } +} + +// the num of vector fields > default limit=4 +func TestCreateMultiVectorExceed(t *testing.T) { + ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout) + mc := createDefaultMilvusClient(ctx, t) + + collName := common.GenRandomString(prefix, 6) + pkField := entity.NewField().WithName(common.DefaultInt64FieldName).WithDataType(entity.FieldTypeInt64).WithIsPrimaryKey(true) + schema := entity.NewSchema().WithName(collName).WithField(pkField) + for i := 0; i < common.MaxVectorFieldNum+1; i++ { + vecField := entity.NewField().WithName(fmt.Sprintf("vec_%d", i)).WithDataType(entity.FieldTypeFloatVector).WithDim(common.DefaultDim) + schema.WithField(vecField) + } + err := mc.CreateCollection(ctx, clientv2.NewCreateCollectionOption(collName, schema)) + common.CheckErr(t, err, false, fmt.Sprintf("maximum vector field's number should be limited to %d", common.MaxVectorFieldNum)) +} + +//func TestCreateCollection(t *testing.T) {} +func TestCreateCollectionInvalidShards(t *testing.T) { + ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout) + mc := createDefaultMilvusClient(ctx, t) + + vecField := entity.NewField().WithName(common.DefaultFloatVecFieldName).WithDataType(entity.FieldTypeFloatVector).WithDim(common.DefaultDim) + int64Field := entity.NewField().WithName(common.DefaultInt64FieldName).WithDataType(entity.FieldTypeInt64).WithIsPrimaryKey(true).WithIsAutoID(true) + for _, shard := range []int32{common.MaxShardNum + 1} { + // pk field with name + collName := common.GenRandomString(prefix, 6) + schema := entity.NewSchema().WithName(collName).WithField(int64Field).WithField(vecField) + err := mc.CreateCollection(ctx, clientv2.NewCreateCollectionOption(collName, schema).WithShardNum(shard)) + common.CheckErr(t, err, false, fmt.Sprintf("maximum shards's number should be limited to %d", common.MaxShardNum)) + } +} + +func TestCreateCollectionInvalid(t *testing.T) { + ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout) + mc := createDefaultMilvusClient(ctx, t) + + collName := common.GenRandomString(prefix, 6) + type mSchemaErr struct { + schema *entity.Schema + errMsg string + } + mSchemaErrs := []mSchemaErr{ + {schema: nil, errMsg: "duplicated field name"}, + {schema: entity.NewSchema(), errMsg: "collection name should not be empty"}, + {schema: entity.NewSchema().WithName("aaa"), errMsg: "primary key is not specified"}, + {schema: entity.NewSchema().WithName("aaa").WithField(entity.NewField()), errMsg: "primary key is not specified"}, + {schema: entity.NewSchema().WithName("aaa").WithField(entity.NewField().WithIsPrimaryKey(true)), errMsg: "the data type of primary key should be Int64 or VarChar"}, + {schema: entity.NewSchema().WithName("aaa").WithField(entity.NewField().WithIsPrimaryKey(true).WithDataType(entity.FieldTypeVarChar)), errMsg: "field name should not be empty"}, + } + for _, mSchema := range mSchemaErrs { + err := mc.CreateCollection(ctx, clientv2.NewCreateCollectionOption(collName, mSchema.schema)) + common.CheckErr(t, err, false, mSchema.errMsg) + } +} diff --git a/tests/go_client/testcases/helper/collection_helper.go b/tests/go_client/testcases/helper/collection_helper.go new file mode 100644 index 0000000000000..347468142c9de --- /dev/null +++ b/tests/go_client/testcases/helper/collection_helper.go @@ -0,0 +1,11 @@ +package helper + +type CreateCollectionParams struct { + CollectionFieldsType CollectionFieldsType // collection fields type +} + +func NewCreateCollectionParams(collectionFieldsType CollectionFieldsType) *CreateCollectionParams { + return &CreateCollectionParams{ + CollectionFieldsType: collectionFieldsType, + } +} diff --git a/tests/go_client/testcases/helper/data_helper.go b/tests/go_client/testcases/helper/data_helper.go new file mode 100644 index 0000000000000..56660c8c05e31 --- /dev/null +++ b/tests/go_client/testcases/helper/data_helper.go @@ -0,0 +1,324 @@ +package helper + +import ( + "bytes" + "strconv" + + "github.com/milvus-io/milvus/client/v2/column" + "github.com/milvus-io/milvus/client/v2/entity" + "github.com/milvus-io/milvus/pkg/log" + "github.com/milvus-io/milvus/tests/go_client/common" + "go.uber.org/zap" +) + +// insert params +type InsertParams struct { + Schema *entity.Schema + PartitionName string + Start int + Nb int + IsRows bool +} + +func NewInsertParams(schema *entity.Schema, nb int) *InsertParams { + return &InsertParams{ + Schema: schema, + Nb: nb, + } +} + +func (opt *InsertParams) TWithPartitionName(partitionName string) *InsertParams { + opt.PartitionName = partitionName + return opt +} + +func (opt *InsertParams) TWithStart(start int) *InsertParams { + opt.Start = start + return opt +} + +func (opt *InsertParams) TWithIsRows(isRows bool) *InsertParams { + opt.IsRows = isRows + return opt +} + +// GenColumnDataOption -- create column data -- +type GenColumnOption struct { + dim int64 + maxLen int64 + start int + fieldName string + elementType entity.FieldType +} + +func (opt *GenColumnOption) TWithDim(dim int64) *GenColumnOption { + opt.dim = dim + return opt +} + +func (opt *GenColumnOption) TWithMaxLen(maxLen int64) *GenColumnOption { + opt.maxLen = maxLen + return opt +} + +func (opt *GenColumnOption) TWithStart(start int) *GenColumnOption { + opt.start = start + return opt +} + +func (opt *GenColumnOption) TWithFieldName(fieldName string) *GenColumnOption { + opt.fieldName = fieldName + return opt +} + +func (opt *GenColumnOption) TWithElementType(eleType entity.FieldType) *GenColumnOption { + opt.elementType = eleType + return opt +} + +func TNewColumnOption() *GenColumnOption { + return &GenColumnOption{ + dim: common.DefaultDim, + maxLen: common.TestMaxLen, + start: 0, + } +} + +func GenArrayColumnData(nb int, eleType entity.FieldType, option GenColumnOption) column.Column { + start := option.start + fieldName := option.fieldName + if option.fieldName == "" { + fieldName = GetFieldNameByElementType(eleType) + } + capacity := int(option.maxLen) + switch eleType { + case entity.FieldTypeBool: + boolValues := make([][]bool, 0, nb) + for i := start; i < start+nb; i++ { + boolArray := make([]bool, 0, capacity) + for j := 0; j < capacity; j++ { + boolArray = append(boolArray, i%2 == 0) + } + boolValues = append(boolValues, boolArray) + } + return column.NewColumnBoolArray(fieldName, boolValues) + case entity.FieldTypeInt8: + int8Values := make([][]int8, 0, nb) + for i := start; i < start+nb; i++ { + int8Array := make([]int8, 0, capacity) + for j := 0; j < capacity; j++ { + int8Array = append(int8Array, int8(i+j)) + } + int8Values = append(int8Values, int8Array) + } + return column.NewColumnInt8Array(fieldName, int8Values) + case entity.FieldTypeInt16: + int16Values := make([][]int16, 0, nb) + for i := start; i < start+nb; i++ { + int16Array := make([]int16, 0, capacity) + for j := 0; j < capacity; j++ { + int16Array = append(int16Array, int16(i+j)) + } + int16Values = append(int16Values, int16Array) + } + return column.NewColumnInt16Array(fieldName, int16Values) + case entity.FieldTypeInt32: + int32Values := make([][]int32, 0, nb) + for i := start; i < start+nb; i++ { + int32Array := make([]int32, 0, capacity) + for j := 0; j < capacity; j++ { + int32Array = append(int32Array, int32(i+j)) + } + int32Values = append(int32Values, int32Array) + } + return column.NewColumnInt32Array(fieldName, int32Values) + case entity.FieldTypeInt64: + int64Values := make([][]int64, 0, nb) + for i := start; i < start+nb; i++ { + int64Array := make([]int64, 0, capacity) + for j := 0; j < capacity; j++ { + int64Array = append(int64Array, int64(i+j)) + } + int64Values = append(int64Values, int64Array) + } + return column.NewColumnInt64Array(fieldName, int64Values) + case entity.FieldTypeFloat: + floatValues := make([][]float32, 0, nb) + for i := start; i < start+nb; i++ { + floatArray := make([]float32, 0, capacity) + for j := 0; j < capacity; j++ { + floatArray = append(floatArray, float32(i+j)) + } + floatValues = append(floatValues, floatArray) + } + return column.NewColumnFloatArray(fieldName, floatValues) + case entity.FieldTypeDouble: + doubleValues := make([][]float64, 0, nb) + for i := start; i < start+nb; i++ { + doubleArray := make([]float64, 0, capacity) + for j := 0; j < capacity; j++ { + doubleArray = append(doubleArray, float64(i+j)) + } + doubleValues = append(doubleValues, doubleArray) + } + return column.NewColumnDoubleArray(fieldName, doubleValues) + case entity.FieldTypeVarChar: + varcharValues := make([][][]byte, 0, nb) + for i := start; i < start+nb; i++ { + varcharArray := make([][]byte, 0, capacity) + for j := 0; j < capacity; j++ { + var buf bytes.Buffer + buf.WriteString(strconv.Itoa(i + j)) + varcharArray = append(varcharArray, buf.Bytes()) + } + varcharValues = append(varcharValues, varcharArray) + } + return column.NewColumnVarCharArray(fieldName, varcharValues) + default: + log.Fatal("GenArrayColumnData failed", zap.Any("ElementType", eleType)) + return nil + } +} + +// GenColumnData GenColumnDataOption +func GenColumnData(nb int, fieldType entity.FieldType, option GenColumnOption) column.Column { + dim := int(option.dim) + maxLen := int(option.maxLen) + start := option.start + fieldName := option.fieldName + if option.fieldName == "" { + fieldName = GetFieldNameByFieldType(fieldType, option.elementType) + } + switch fieldType { + case entity.FieldTypeInt64: + int64Values := make([]int64, 0, nb) + for i := start; i < start+nb; i++ { + int64Values = append(int64Values, int64(i)) + } + return column.NewColumnInt64(fieldName, int64Values) + + case entity.FieldTypeInt8: + int8Values := make([]int8, 0, nb) + for i := start; i < start+nb; i++ { + int8Values = append(int8Values, int8(i)) + } + return column.NewColumnInt8(fieldName, int8Values) + + case entity.FieldTypeInt16: + int16Values := make([]int16, 0, nb) + for i := start; i < start+nb; i++ { + int16Values = append(int16Values, int16(i)) + } + return column.NewColumnInt16(fieldName, int16Values) + + case entity.FieldTypeInt32: + int32Values := make([]int32, 0, nb) + for i := start; i < start+nb; i++ { + int32Values = append(int32Values, int32(i)) + } + return column.NewColumnInt32(fieldName, int32Values) + + case entity.FieldTypeBool: + boolValues := make([]bool, 0, nb) + for i := start; i < start+nb; i++ { + boolValues = append(boolValues, i/2 == 0) + } + return column.NewColumnBool(fieldName, boolValues) + + case entity.FieldTypeFloat: + floatValues := make([]float32, 0, nb) + for i := start; i < start+nb; i++ { + floatValues = append(floatValues, float32(i)) + } + return column.NewColumnFloat(fieldName, floatValues) + + case entity.FieldTypeDouble: + floatValues := make([]float64, 0, nb) + for i := start; i < start+nb; i++ { + floatValues = append(floatValues, float64(i)) + } + return column.NewColumnDouble(fieldName, floatValues) + + case entity.FieldTypeVarChar: + varcharValues := make([]string, 0, nb) + for i := start; i < start+nb; i++ { + varcharValues = append(varcharValues, strconv.Itoa(i)) + } + return column.NewColumnVarChar(fieldName, varcharValues) + + case entity.FieldTypeArray: + return GenArrayColumnData(nb, option.elementType, option) + + case entity.FieldTypeFloatVector: + vecFloatValues := make([][]float32, 0, nb) + for i := start; i < start+nb; i++ { + vec := common.GenFloatVector(dim) + vecFloatValues = append(vecFloatValues, vec) + } + return column.NewColumnFloatVector(fieldName, int(option.dim), vecFloatValues) + case entity.FieldTypeBinaryVector: + binaryVectors := make([][]byte, 0, nb) + for i := 0; i < nb; i++ { + vec := common.GenBinaryVector(dim) + binaryVectors = append(binaryVectors, vec) + } + return column.NewColumnBinaryVector(fieldName, dim, binaryVectors) + case entity.FieldTypeFloat16Vector: + fp16Vectors := make([][]byte, 0, nb) + for i := start; i < start+nb; i++ { + vec := common.GenFloat16Vector(dim) + fp16Vectors = append(fp16Vectors, vec) + } + return column.NewColumnFloat16Vector(fieldName, dim, fp16Vectors) + case entity.FieldTypeBFloat16Vector: + bf16Vectors := make([][]byte, 0, nb) + for i := start; i < start+nb; i++ { + vec := common.GenBFloat16Vector(dim) + bf16Vectors = append(bf16Vectors, vec) + } + return column.NewColumnBFloat16Vector(fieldName, dim, bf16Vectors) + case entity.FieldTypeSparseVector: + vectors := make([]entity.SparseEmbedding, 0, nb) + for i := start; i < start+nb; i++ { + vec := common.GenSparseVector(maxLen) + vectors = append(vectors, vec) + } + return column.NewColumnSparseVectors(fieldName, vectors) + default: + log.Fatal("GenColumnData failed", zap.Any("FieldType", fieldType)) + return nil + } +} + +func GenDynamicFieldData(start int, nb int) []column.Column { + type ListStruct struct { + List []int64 `json:"list" milvus:"name:list"` + } + + // gen number, string bool list data column + numberValues := make([]int32, 0, nb) + stringValues := make([]string, 0, nb) + boolValues := make([]bool, 0, nb) + //listValues := make([][]byte, 0, Nb) + //m := make(map[string]interface{}) + for i := start; i < start+nb; i++ { + numberValues = append(numberValues, int32(i)) + stringValues = append(stringValues, strconv.Itoa(i)) + boolValues = append(boolValues, i%3 == 0) + //m["list"] = ListStruct{ + // List: []int64{int64(i), int64(i + 1)}, + //} + //bs, err := json.Marshal(m) + //if err != nil { + // log.Fatalf("Marshal json field failed: %s", err) + //} + //listValues = append(listValues, bs) + } + data := []column.Column{ + column.NewColumnInt32(common.DefaultDynamicNumberField, numberValues), + column.NewColumnString(common.DefaultDynamicStringField, stringValues), + column.NewColumnBool(common.DefaultDynamicBoolField, boolValues), + //entity.NewColumnJSONBytes(DefaultDynamicListField, listValues), + } + return data +} diff --git a/tests/go_client/testcases/helper/field_helper.go b/tests/go_client/testcases/helper/field_helper.go new file mode 100644 index 0000000000000..1e2a321e62e44 --- /dev/null +++ b/tests/go_client/testcases/helper/field_helper.go @@ -0,0 +1,299 @@ +package helper + +import ( + "github.com/milvus-io/milvus/client/v2/entity" + "github.com/milvus-io/milvus/pkg/log" + "github.com/milvus-io/milvus/tests/go_client/common" + "go.uber.org/zap" +) + +func GetFieldNameByElementType(t entity.FieldType) string { + switch t { + case entity.FieldTypeBool: + return common.DefaultBoolArrayField + case entity.FieldTypeInt8: + return common.DefaultInt8ArrayField + case entity.FieldTypeInt16: + return common.DefaultInt16ArrayField + case entity.FieldTypeInt32: + return common.DefaultInt32ArrayField + case entity.FieldTypeInt64: + return common.DefaultInt64ArrayField + case entity.FieldTypeFloat: + return common.DefaultFloatArrayField + case entity.FieldTypeDouble: + return common.DefaultDoubleArrayField + case entity.FieldTypeVarChar: + return common.DefaultVarcharArrayField + default: + return common.DefaultArrayFieldName + } +} + +func GetFieldNameByFieldType(t entity.FieldType, eleType ...entity.FieldType) string { + switch t { + case entity.FieldTypeBool: + return common.DefaultBoolFieldName + case entity.FieldTypeInt8: + return common.DefaultInt8FieldName + case entity.FieldTypeInt16: + return common.DefaultInt16FieldName + case entity.FieldTypeInt32: + return common.DefaultInt32FieldName + case entity.FieldTypeInt64: + return common.DefaultInt64FieldName + case entity.FieldTypeFloat: + return common.DefaultFloatFieldName + case entity.FieldTypeDouble: + return common.DefaultDoubleFieldName + case entity.FieldTypeVarChar: + return common.DefaultVarcharFieldName + case entity.FieldTypeJSON: + return common.DefaultJSONFieldName + case entity.FieldTypeArray: + return GetFieldNameByElementType(eleType[0]) + case entity.FieldTypeBinaryVector: + return common.DefaultBinaryVecFieldName + case entity.FieldTypeFloatVector: + return common.DefaultFloatVecFieldName + case entity.FieldTypeFloat16Vector: + return common.DefaultFloat16VecFieldName + case entity.FieldTypeBFloat16Vector: + return common.DefaultBFloat16VecFieldName + case entity.FieldTypeSparseVector: + return common.DefaultSparseVecFieldName + default: + return "" + } +} + +type CollectionFieldsType int32 + +const ( + // FieldTypeNone zero value place holder + Int64Vec CollectionFieldsType = 1 // int64 + floatVec + VarcharBinary CollectionFieldsType = 2 // varchar + binaryVec + Int64VecJSON CollectionFieldsType = 3 // int64 + floatVec + json + Int64VecArray CollectionFieldsType = 4 // int64 + floatVec + array + Int64VarcharSparseVec CollectionFieldsType = 5 // int64 + varchar + sparse vector + Int64MultiVec CollectionFieldsType = 6 // int64 + floatVec + binaryVec + fp16Vec + bf16vec + AllFields CollectionFieldsType = 7 // all fields excepted sparse +) + +type GenFieldsOption struct { + AutoID bool // is auto id + Dim int64 + IsDynamic bool + MaxLength int64 // varchar len or array capacity + MaxCapacity int64 + IsPartitionKey bool + ElementType entity.FieldType +} + +func TNewFieldsOption() *GenFieldsOption { + return &GenFieldsOption{ + AutoID: false, + Dim: common.DefaultDim, + MaxLength: common.TestMaxLen, + MaxCapacity: common.TestCapacity, + IsDynamic: false, + IsPartitionKey: false, + ElementType: entity.FieldTypeNone, + } +} + +func (opt *GenFieldsOption) TWithAutoID(autoID bool) *GenFieldsOption { + opt.AutoID = autoID + return opt +} + +func (opt *GenFieldsOption) TWithDim(dim int64) *GenFieldsOption { + opt.Dim = dim + return opt +} + +func (opt *GenFieldsOption) TWithIsDynamic(isDynamic bool) *GenFieldsOption { + opt.IsDynamic = isDynamic + return opt +} + +func (opt *GenFieldsOption) TWithIsPartitionKey(isPartitionKey bool) *GenFieldsOption { + opt.IsPartitionKey = isPartitionKey + return opt +} + +func (opt *GenFieldsOption) TWithElementType(elementType entity.FieldType) *GenFieldsOption { + opt.ElementType = elementType + return opt +} + +func (opt *GenFieldsOption) TWithMaxLen(maxLen int64) *GenFieldsOption { + opt.MaxLength = maxLen + return opt +} + +func (opt *GenFieldsOption) TWithMaxCapacity(maxCapacity int64) *GenFieldsOption { + opt.MaxCapacity = maxCapacity + return opt +} + +// factory +type FieldsFactory struct{} + +// product +type CollectionFields interface { + GenFields(opts GenFieldsOption) []*entity.Field +} + +type FieldsInt64Vec struct{} + +func (cf FieldsInt64Vec) GenFields(option GenFieldsOption) []*entity.Field { + pkField := entity.NewField().WithName(GetFieldNameByFieldType(entity.FieldTypeInt64)).WithDataType(entity.FieldTypeInt64).WithIsPrimaryKey(true) + vecField := entity.NewField().WithName(GetFieldNameByFieldType(entity.FieldTypeFloatVector)).WithDataType(entity.FieldTypeFloatVector).WithDim(option.Dim) + if option.AutoID { + pkField.WithIsAutoID(option.AutoID) + } + return []*entity.Field{pkField, vecField} +} + +type FieldsVarcharBinary struct{} + +func (cf FieldsVarcharBinary) GenFields(option GenFieldsOption) []*entity.Field { + pkField := entity.NewField().WithName(GetFieldNameByFieldType(entity.FieldTypeVarChar)).WithDataType(entity.FieldTypeVarChar). + WithIsPrimaryKey(true).WithMaxLength(option.MaxLength) + vecField := entity.NewField().WithName(GetFieldNameByFieldType(entity.FieldTypeBinaryVector)).WithDataType(entity.FieldTypeBinaryVector).WithDim(option.Dim) + if option.AutoID { + pkField.WithIsAutoID(option.AutoID) + } + return []*entity.Field{pkField, vecField} +} + +type FieldsInt64VecJSON struct{} + +func (cf FieldsInt64VecJSON) GenFields(option GenFieldsOption) []*entity.Field { + pkField := entity.NewField().WithName(GetFieldNameByFieldType(entity.FieldTypeInt64)).WithDataType(entity.FieldTypeInt64).WithIsPrimaryKey(true) + vecField := entity.NewField().WithName(GetFieldNameByFieldType(entity.FieldTypeFloatVector)).WithDataType(entity.FieldTypeFloatVector).WithDim(option.Dim) + jsonField := entity.NewField().WithName(GetFieldNameByFieldType(entity.FieldTypeJSON)).WithDataType(entity.FieldTypeJSON) + if option.AutoID { + pkField.WithIsAutoID(option.AutoID) + } + return []*entity.Field{pkField, vecField, jsonField} +} + +type FieldsInt64VecArray struct{} + +func (cf FieldsInt64VecArray) GenFields(option GenFieldsOption) []*entity.Field { + pkField := entity.NewField().WithName(GetFieldNameByFieldType(entity.FieldTypeInt64)).WithDataType(entity.FieldTypeInt64).WithIsPrimaryKey(true) + vecField := entity.NewField().WithName(GetFieldNameByFieldType(entity.FieldTypeFloatVector)).WithDataType(entity.FieldTypeFloatVector).WithDim(option.Dim) + fields := []*entity.Field{ + pkField, vecField, + } + for _, eleType := range GetAllArrayElementType() { + arrayField := entity.NewField().WithName(GetFieldNameByElementType(eleType)).WithDataType(entity.FieldTypeArray).WithElementType(eleType).WithMaxCapacity(option.MaxCapacity) + if eleType == entity.FieldTypeVarChar { + arrayField.WithMaxLength(option.MaxLength) + } + fields = append(fields, arrayField) + } + if option.AutoID { + pkField.WithIsAutoID(option.AutoID) + } + return fields +} + +type FieldsInt64VarcharSparseVec struct{} + +func (cf FieldsInt64VarcharSparseVec) GenFields(option GenFieldsOption) []*entity.Field { + pkField := entity.NewField().WithName(GetFieldNameByFieldType(entity.FieldTypeInt64)).WithDataType(entity.FieldTypeInt64).WithIsPrimaryKey(true) + varcharField := entity.NewField().WithName(GetFieldNameByFieldType(entity.FieldTypeVarChar)).WithDataType(entity.FieldTypeVarChar).WithMaxLength(option.MaxLength) + sparseVecField := entity.NewField().WithName(GetFieldNameByFieldType(entity.FieldTypeSparseVector)).WithDataType(entity.FieldTypeSparseVector) + + if option.AutoID { + pkField.WithIsAutoID(option.AutoID) + } + return []*entity.Field{pkField, varcharField, sparseVecField} +} + +type FieldsInt64MultiVec struct{} + +func (cf FieldsInt64MultiVec) GenFields(option GenFieldsOption) []*entity.Field { + pkField := entity.NewField().WithName(GetFieldNameByFieldType(entity.FieldTypeInt64)).WithDataType(entity.FieldTypeInt64).WithIsPrimaryKey(true) + fields := []*entity.Field{ + pkField, + } + for _, fieldType := range GetAllVectorFieldType() { + if fieldType == entity.FieldTypeSparseVector { + continue + } + vecField := entity.NewField().WithName(GetFieldNameByFieldType(fieldType)).WithDataType(fieldType).WithDim(option.Dim) + fields = append(fields, vecField) + } + + if option.AutoID { + pkField.WithIsAutoID(option.AutoID) + } + return fields +} + +type FieldsAllFields struct{} // except sparse vector field +func (cf FieldsAllFields) GenFields(option GenFieldsOption) []*entity.Field { + pkField := entity.NewField().WithName(GetFieldNameByFieldType(entity.FieldTypeInt64)).WithDataType(entity.FieldTypeInt64).WithIsPrimaryKey(true) + fields := []*entity.Field{ + pkField, + } + // scalar fields and array fields + for _, fieldType := range GetAllScaleFieldType() { + if fieldType == entity.FieldTypeInt64 { + continue + } else if fieldType == entity.FieldTypeArray { + for _, eleType := range GetAllArrayElementType() { + arrayField := entity.NewField().WithName(GetFieldNameByElementType(eleType)).WithDataType(entity.FieldTypeArray).WithElementType(eleType).WithMaxCapacity(option.MaxCapacity) + if eleType == entity.FieldTypeVarChar { + arrayField.WithMaxLength(option.MaxLength) + } + fields = append(fields, arrayField) + } + } else if fieldType == entity.FieldTypeVarChar { + varcharField := entity.NewField().WithName(GetFieldNameByFieldType(fieldType)).WithDataType(fieldType).WithMaxLength(option.MaxLength) + fields = append(fields, varcharField) + } else { + scalarField := entity.NewField().WithName(GetFieldNameByFieldType(fieldType)).WithDataType(fieldType) + fields = append(fields, scalarField) + } + + } + for _, fieldType := range GetAllVectorFieldType() { + if fieldType == entity.FieldTypeSparseVector { + continue + } + vecField := entity.NewField().WithName(GetFieldNameByFieldType(fieldType)).WithDataType(fieldType).WithDim(option.Dim) + fields = append(fields, vecField) + } + + if option.AutoID { + pkField.WithIsAutoID(option.AutoID) + } + return fields +} + +func (ff FieldsFactory) GenFieldsForCollection(collectionFieldsType CollectionFieldsType, option *GenFieldsOption) []*entity.Field { + log.Info("GenFieldsForCollection", zap.Any("GenFieldsOption", option)) + switch collectionFieldsType { + case Int64Vec: + return FieldsInt64Vec{}.GenFields(*option) + case VarcharBinary: + return FieldsVarcharBinary{}.GenFields(*option) + case Int64VecJSON: + return FieldsInt64VecJSON{}.GenFields(*option) + case Int64VecArray: + return FieldsInt64VecArray{}.GenFields(*option) + case Int64VarcharSparseVec: + return FieldsInt64VarcharSparseVec{}.GenFields(*option) + case Int64MultiVec: + return FieldsInt64MultiVec{}.GenFields(*option) + case AllFields: + return FieldsAllFields{}.GenFields(*option) + default: + return FieldsInt64Vec{}.GenFields(*option) + } +} diff --git a/tests/go_client/testcases/helper/helper.go b/tests/go_client/testcases/helper/helper.go new file mode 100644 index 0000000000000..f2b93bbfdbb6e --- /dev/null +++ b/tests/go_client/testcases/helper/helper.go @@ -0,0 +1,192 @@ +package helper + +import ( + "context" + "testing" + "time" + + clientv2 "github.com/milvus-io/milvus/client/v2" + "github.com/milvus-io/milvus/pkg/log" + "github.com/milvus-io/milvus/tests/go_client/base" + "github.com/milvus-io/milvus/tests/go_client/common" + "go.uber.org/zap" + + "github.com/milvus-io/milvus/client/v2/entity" +) + +func CreateContext(t *testing.T, timeout time.Duration) context.Context { + ctx, cancel := context.WithTimeout(context.Background(), timeout) + t.Cleanup(func() { + cancel() + }) + return ctx +} + +//var ArrayFieldType = + +func GetAllArrayElementType() []entity.FieldType { + return []entity.FieldType{ + entity.FieldTypeBool, + entity.FieldTypeInt8, + entity.FieldTypeInt16, + entity.FieldTypeInt32, + entity.FieldTypeInt64, + entity.FieldTypeFloat, + entity.FieldTypeDouble, + entity.FieldTypeVarChar, + } +} + +func GetAllVectorFieldType() []entity.FieldType { + return []entity.FieldType{ + entity.FieldTypeBinaryVector, + entity.FieldTypeFloatVector, + entity.FieldTypeFloat16Vector, + entity.FieldTypeBFloat16Vector, + entity.FieldTypeSparseVector, + } +} + +func GetAllScaleFieldType() []entity.FieldType { + return []entity.FieldType{ + entity.FieldTypeBool, + entity.FieldTypeInt8, + entity.FieldTypeInt16, + entity.FieldTypeInt32, + entity.FieldTypeInt64, + entity.FieldTypeFloat, + entity.FieldTypeDouble, + entity.FieldTypeVarChar, + entity.FieldTypeArray, + entity.FieldTypeJSON, + } +} + +func GetAllFieldsType() []entity.FieldType { + allFieldType := GetAllScaleFieldType() + allFieldType = append(allFieldType, entity.FieldTypeBinaryVector, + entity.FieldTypeFloatVector, + entity.FieldTypeFloat16Vector, + entity.FieldTypeBFloat16Vector, + //entity.FieldTypeSparseVector, max vector fields num is 4 + ) + return allFieldType +} + +func GetInvalidPkFieldType() []entity.FieldType { + nonPkFieldTypes := []entity.FieldType{ + entity.FieldTypeNone, + entity.FieldTypeBool, + entity.FieldTypeInt8, + entity.FieldTypeInt16, + entity.FieldTypeInt32, + entity.FieldTypeFloat, + entity.FieldTypeDouble, + entity.FieldTypeString, + entity.FieldTypeJSON, + entity.FieldTypeArray, + } + return nonPkFieldTypes +} + +func GetInvalidPartitionKeyFieldType() []entity.FieldType { + nonPkFieldTypes := []entity.FieldType{ + entity.FieldTypeBool, + entity.FieldTypeInt8, + entity.FieldTypeInt16, + entity.FieldTypeInt32, + entity.FieldTypeFloat, + entity.FieldTypeDouble, + entity.FieldTypeJSON, + entity.FieldTypeArray, + entity.FieldTypeFloatVector, + } + return nonPkFieldTypes +} + +// ----------------- prepare data -------------------------- +type CollectionPrepare struct{} + +var CollPrepare CollectionPrepare +var FieldsFact FieldsFactory + +func (chainTask *CollectionPrepare) CreateCollection(ctx context.Context, t *testing.T, mc *base.MilvusClient, + cp *CreateCollectionParams, fieldOpt *GenFieldsOption, schemaOpt *GenSchemaOption) (*CollectionPrepare, *entity.Schema) { + + fields := FieldsFact.GenFieldsForCollection(cp.CollectionFieldsType, fieldOpt) + schemaOpt.Fields = fields + schema := GenSchema(schemaOpt) + + err := mc.CreateCollection(ctx, clientv2.NewCreateCollectionOption(schema.CollectionName, schema)) + common.CheckErr(t, err, true) + + t.Cleanup(func() { + err := mc.DropCollection(ctx, clientv2.NewDropCollectionOption(schema.CollectionName)) + common.CheckErr(t, err, true) + }) + return chainTask, schema +} + +func (chainTask *CollectionPrepare) InsertData(ctx context.Context, t *testing.T, mc *base.MilvusClient, + ip *InsertParams, option *GenColumnOption) (*CollectionPrepare, clientv2.InsertResult) { + if nil == ip.Schema || ip.Schema.CollectionName == "" { + log.Fatal("[InsertData] Nil Schema is not expected") + } + fields := ip.Schema.Fields + insertOpt := clientv2.NewColumnBasedInsertOption(ip.Schema.CollectionName) + for _, field := range fields { + column := GenColumnData(ip.Nb, field.DataType, *option) + insertOpt.WithColumns(column) + } + + insertRes, err := mc.Insert(ctx, insertOpt) + common.CheckErr(t, err, true) + return chainTask, insertRes +} + +func (chainTask *CollectionPrepare) FlushData(ctx context.Context, t *testing.T, mc *base.MilvusClient, collName string) *CollectionPrepare { + flushTask, err := mc.Flush(ctx, clientv2.NewFlushOption(collName)) + common.CheckErr(t, err, true) + err = flushTask.Await(ctx) + common.CheckErr(t, err, true) + return chainTask +} + +func (chainTask *CollectionPrepare) CreateIndex(ctx context.Context, t *testing.T, mc *base.MilvusClient, ip *IndexParams) *CollectionPrepare { + if nil == ip.Schema || ip.Schema.CollectionName == "" { + log.Fatal("[CreateIndex] Empty collection name is not expected") + } + collName := ip.Schema.CollectionName + mFieldIndex := ip.FieldIndexMap + + for _, field := range ip.Schema.Fields { + if field.DataType >= 100 { + if idx, ok := mFieldIndex[field.Name]; ok { + log.Info("CreateIndex", zap.String("indexName", idx.Name()), zap.Any("indexType", idx.IndexType()), zap.Any("indexParams", idx.Params())) + createIndexTask, err := mc.CreateIndex(ctx, clientv2.NewCreateIndexOption(collName, field.Name, idx)) + common.CheckErr(t, err, true) + err = createIndexTask.Await(ctx) + common.CheckErr(t, err, true) + } else { + idx := GetDefaultVectorIndex(field.DataType) + log.Info("CreateIndex", zap.String("indexName", idx.Name()), zap.Any("indexType", idx.IndexType()), zap.Any("indexParams", idx.Params())) + createIndexTask, err := mc.CreateIndex(ctx, clientv2.NewCreateIndexOption(collName, field.Name, idx)) + common.CheckErr(t, err, true) + err = createIndexTask.Await(ctx) + common.CheckErr(t, err, true) + } + } + } + return chainTask +} + +func (chainTask *CollectionPrepare) Load(ctx context.Context, t *testing.T, mc *base.MilvusClient, lp *LoadParams) *CollectionPrepare { + if lp.CollectionName == "" { + log.Fatal("[Load] Empty collection name is not expected") + } + loadTask, err := mc.LoadCollection(ctx, clientv2.NewLoadCollectionOption(lp.CollectionName).WithReplica(lp.Replica)) + common.CheckErr(t, err, true) + err = loadTask.Await(ctx) + common.CheckErr(t, err, true) + return chainTask +} diff --git a/tests/go_client/testcases/helper/index_helper.go b/tests/go_client/testcases/helper/index_helper.go new file mode 100644 index 0000000000000..554fcbb3789e5 --- /dev/null +++ b/tests/go_client/testcases/helper/index_helper.go @@ -0,0 +1,35 @@ +package helper + +import ( + "github.com/milvus-io/milvus/client/v2/entity" + "github.com/milvus-io/milvus/client/v2/index" +) + +func GetDefaultVectorIndex(fieldType entity.FieldType) index.Index { + switch fieldType { + case entity.FieldTypeFloatVector, entity.FieldTypeFloat16Vector, entity.FieldTypeBFloat16Vector, entity.FieldTypeSparseVector: + return index.NewHNSWIndex(entity.COSINE, 8, 200) + case entity.FieldTypeBinaryVector: + return nil + // return binary index + default: + return nil + // return auto index + } +} + +type IndexParams struct { + Schema *entity.Schema + FieldIndexMap map[string]index.Index +} + +func NewIndexParams(schema *entity.Schema) *IndexParams { + return &IndexParams{ + Schema: schema, + } +} + +func (opt *IndexParams) TWithFieldIndex(mFieldIndex map[string]index.Index) *IndexParams { + opt.FieldIndexMap = mFieldIndex + return opt +} diff --git a/tests/go_client/testcases/helper/read_helper.go b/tests/go_client/testcases/helper/read_helper.go new file mode 100644 index 0000000000000..085dfec31d255 --- /dev/null +++ b/tests/go_client/testcases/helper/read_helper.go @@ -0,0 +1,55 @@ +package helper + +import ( + "github.com/milvus-io/milvus/client/v2/entity" + "github.com/milvus-io/milvus/tests/go_client/common" +) + +type LoadParams struct { + CollectionName string + Replica int +} + +func NewLoadParams(name string) *LoadParams { + return &LoadParams{ + CollectionName: name, + } +} + +func (opt *LoadParams) TWithReplica(replica int) *LoadParams { + opt.Replica = replica + return opt +} + +// GenSearchVectors gen search vectors +func GenSearchVectors(nq int, dim int, dataType entity.FieldType) []entity.Vector { + vectors := make([]entity.Vector, 0, nq) + switch dataType { + case entity.FieldTypeFloatVector: + for i := 0; i < nq; i++ { + vector := common.GenFloatVector(dim) + vectors = append(vectors, entity.FloatVector(vector)) + } + case entity.FieldTypeBinaryVector: + for i := 0; i < nq; i++ { + vector := common.GenBinaryVector(dim) + vectors = append(vectors, entity.BinaryVector(vector)) + } + case entity.FieldTypeFloat16Vector: + for i := 0; i < nq; i++ { + vector := common.GenFloat16Vector(dim) + vectors = append(vectors, entity.Float16Vector(vector)) + } + case entity.FieldTypeBFloat16Vector: + for i := 0; i < nq; i++ { + vector := common.GenBFloat16Vector(dim) + vectors = append(vectors, entity.BFloat16Vector(vector)) + } + case entity.FieldTypeSparseVector: + for i := 0; i < nq; i++ { + vec := common.GenSparseVector(dim) + vectors = append(vectors, vec) + } + } + return vectors +} diff --git a/tests/go_client/testcases/helper/schema_helper.go b/tests/go_client/testcases/helper/schema_helper.go new file mode 100644 index 0000000000000..d96e567a28632 --- /dev/null +++ b/tests/go_client/testcases/helper/schema_helper.go @@ -0,0 +1,68 @@ +package helper + +import ( + "github.com/milvus-io/milvus/client/v2/entity" + "github.com/milvus-io/milvus/pkg/log" + "github.com/milvus-io/milvus/tests/go_client/common" +) + +type GenSchemaOption struct { + CollectionName string + Description string + AutoID bool + Fields []*entity.Field + EnableDynamicField bool +} + +func TNewSchemaOption() *GenSchemaOption { + return &GenSchemaOption{} +} + +func (opt *GenSchemaOption) TWithName(collectionName string) *GenSchemaOption { + opt.CollectionName = collectionName + return opt +} + +func (opt *GenSchemaOption) TWithDescription(description string) *GenSchemaOption { + opt.Description = description + return opt +} + +func (opt *GenSchemaOption) TWithAutoID(autoID bool) *GenSchemaOption { + opt.AutoID = autoID + return opt +} + +func (opt *GenSchemaOption) TWithEnableDynamicField(enableDynamicField bool) *GenSchemaOption { + opt.EnableDynamicField = enableDynamicField + return opt +} + +func (opt *GenSchemaOption) TWithFields(fields []*entity.Field) *GenSchemaOption { + opt.Fields = fields + return opt +} + +func GenSchema(option *GenSchemaOption) *entity.Schema { + if len(option.Fields) == 0 { + log.Fatal("Require at least a primary field and a vector field") + } + if option.CollectionName == "" { + option.CollectionName = common.GenRandomString("pre", 6) + } + schema := entity.NewSchema().WithName(option.CollectionName) + for _, field := range option.Fields { + schema.WithField(field) + } + + if option.Description != "" { + schema.WithDescription(option.Description) + } + if option.AutoID { + schema.WithAutoID(option.AutoID) + } + if option.EnableDynamicField { + schema.WithDynamicFieldEnabled(option.EnableDynamicField) + } + return schema +} diff --git a/tests/go_client/testcases/main_test.go b/tests/go_client/testcases/main_test.go new file mode 100644 index 0000000000000..221f1746c857e --- /dev/null +++ b/tests/go_client/testcases/main_test.go @@ -0,0 +1,74 @@ +package testcases + +import ( + "context" + "flag" + "os" + "testing" + "time" + + "go.uber.org/zap" + + clientv2 "github.com/milvus-io/milvus/client/v2" + + "github.com/milvus-io/milvus/pkg/log" + "github.com/milvus-io/milvus/tests/go_client/base" + "github.com/milvus-io/milvus/tests/go_client/common" +) + +var addr = flag.String("addr", "localhost:19530", "server host and port") +var defaultCfg = clientv2.ClientConfig{Address: *addr} + +// teardown +func teardown() { + log.Info("Start to tear down all.....") + ctx, cancel := context.WithTimeout(context.Background(), time.Second*common.DefaultTimeout) + defer cancel() + mc, err := base.NewMilvusClient(ctx, &defaultCfg) + if err != nil { + log.Fatal("teardown failed to connect milvus with error", zap.Error(err)) + } + defer mc.Close(ctx) + + // clear dbs + dbs, _ := mc.ListDatabases(ctx, clientv2.NewListDatabaseOption()) + for _, db := range dbs { + if db != common.DefaultDb { + _ = mc.UsingDatabase(ctx, clientv2.NewUsingDatabaseOption(db)) + collections, _ := mc.ListCollections(ctx, clientv2.NewListCollectionOption()) + for _, coll := range collections { + _ = mc.DropCollection(ctx, clientv2.NewDropCollectionOption(coll)) + } + _ = mc.DropDatabase(ctx, clientv2.NewDropDatabaseOption(db)) + } + } +} + +// create connect +func createDefaultMilvusClient(ctx context.Context, t *testing.T) *base.MilvusClient { + t.Helper() + + var ( + mc *base.MilvusClient + err error + ) + mc, err = base.NewMilvusClient(ctx, &defaultCfg) + common.CheckErr(t, err, true) + + t.Cleanup(func() { + mc.Close(ctx) + }) + + return mc +} + +func TestMain(m *testing.M) { + flag.Parse() + log.Info("Parser Milvus address", zap.String("address", *addr)) + code := m.Run() + if code != 0 { + log.Error("Tests failed and exited", zap.Int("code", code)) + } + teardown() + os.Exit(code) +} diff --git a/tests/go_client/testcases/search_test.go b/tests/go_client/testcases/search_test.go new file mode 100644 index 0000000000000..f1dd0236bb5ac --- /dev/null +++ b/tests/go_client/testcases/search_test.go @@ -0,0 +1,42 @@ +package testcases + +import ( + "testing" + "time" + + clientv2 "github.com/milvus-io/milvus/client/v2" + "github.com/milvus-io/milvus/client/v2/entity" + "github.com/milvus-io/milvus/pkg/log" + "github.com/milvus-io/milvus/tests/go_client/common" + hp "github.com/milvus-io/milvus/tests/go_client/testcases/helper" + "go.uber.org/zap" +) + +func TestSearch(t *testing.T) { + ctx := hp.CreateContext(t, time.Second*common.DefaultTimeout) + mc := createDefaultMilvusClient(ctx, t) + + cp := hp.NewCreateCollectionParams(hp.Int64Vec) + _, schema := hp.CollPrepare.CreateCollection(ctx, t, mc, cp, hp.TNewFieldsOption(), hp.TNewSchemaOption()) + log.Info("schema", zap.Any("schema", schema)) + + insertParams := hp.NewInsertParams(schema, common.DefaultNb) + hp.CollPrepare.InsertData(ctx, t, mc, insertParams, hp.TNewColumnOption()) + + // flush -> index -> load + hp.CollPrepare.FlushData(ctx, t, mc, schema.CollectionName) + hp.CollPrepare.CreateIndex(ctx, t, mc, hp.NewIndexParams(schema)) + hp.CollPrepare.Load(ctx, t, mc, hp.NewLoadParams(schema.CollectionName)) + + // search + vectors := hp.GenSearchVectors(common.DefaultNq, common.DefaultDim, entity.FieldTypeFloatVector) + resSearch, err := mc.Search(ctx, clientv2.NewSearchOption(schema.CollectionName, common.DefaultLimit, vectors).WithConsistencyLevel(entity.ClStrong)) + common.CheckErr(t, err, true) + common.CheckSearchResult(t, resSearch, common.DefaultNq, common.DefaultLimit) + + log.Info("search", zap.Any("resSearch", resSearch)) + log.Info("search", zap.Any("ids", resSearch[0].IDs)) + log.Info("search", zap.Any("scores", resSearch[0].Scores)) + id, _ := resSearch[0].IDs.GetAsInt64(0) + log.Info("search", zap.Int64("ids", id)) +} diff --git a/tests/integration/bloomfilter/bloom_filter_test.go b/tests/integration/bloomfilter/bloom_filter_test.go new file mode 100644 index 0000000000000..595ecdd025a3c --- /dev/null +++ b/tests/integration/bloomfilter/bloom_filter_test.go @@ -0,0 +1,196 @@ +// Licensed to the LF AI & Data foundation under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package bloomfilter + +import ( + "context" + "fmt" + "strconv" + "strings" + "testing" + + "github.com/golang/protobuf/proto" + "github.com/samber/lo" + "github.com/stretchr/testify/suite" + "go.uber.org/zap" + + "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" + "github.com/milvus-io/milvus-proto/go-api/v2/milvuspb" + "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" + "github.com/milvus-io/milvus/pkg/log" + "github.com/milvus-io/milvus/pkg/util/funcutil" + "github.com/milvus-io/milvus/pkg/util/merr" + "github.com/milvus-io/milvus/pkg/util/metric" + "github.com/milvus-io/milvus/pkg/util/paramtable" + "github.com/milvus-io/milvus/tests/integration" +) + +type BloomFilterTestSuit struct { + integration.MiniClusterSuite +} + +func (s *BloomFilterTestSuit) SetupSuite() { + paramtable.Init() + paramtable.Get().Save(paramtable.Get().QueryCoordCfg.BalanceCheckInterval.Key, "1000") + paramtable.Get().Save(paramtable.Get().QueryNodeCfg.GracefulStopTimeout.Key, "1") + + // disable compaction + paramtable.Get().Save(paramtable.Get().DataCoordCfg.EnableCompaction.Key, "false") + + s.Require().NoError(s.SetupEmbedEtcd()) +} + +func (s *BloomFilterTestSuit) TearDownSuite() { + defer paramtable.Get().Reset(paramtable.Get().DataCoordCfg.EnableCompaction.Key) + s.MiniClusterSuite.TearDownSuite() +} + +func (s *BloomFilterTestSuit) initCollection(collectionName string, replica int, channelNum int, segmentNum int, segmentRowNum int, segmentDeleteNum int) { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + const ( + dim = 128 + dbName = "" + ) + + schema := integration.ConstructSchema(collectionName, dim, true) + marshaledSchema, err := proto.Marshal(schema) + s.NoError(err) + + createCollectionStatus, err := s.Cluster.Proxy.CreateCollection(ctx, &milvuspb.CreateCollectionRequest{ + DbName: dbName, + CollectionName: collectionName, + Schema: marshaledSchema, + ShardsNum: int32(channelNum), + }) + s.NoError(err) + s.True(merr.Ok(createCollectionStatus)) + + log.Info("CreateCollection result", zap.Any("createCollectionStatus", createCollectionStatus)) + showCollectionsResp, err := s.Cluster.Proxy.ShowCollections(ctx, &milvuspb.ShowCollectionsRequest{}) + s.NoError(err) + s.True(merr.Ok(showCollectionsResp.Status)) + log.Info("ShowCollections result", zap.Any("showCollectionsResp", showCollectionsResp)) + + for i := 0; i < segmentNum; i++ { + // change bf type in real time + if i%2 == 0 { + paramtable.Get().Save(paramtable.Get().CommonCfg.BloomFilterType.Key, "BasicBloomFilter") + } else { + paramtable.Get().Save(paramtable.Get().CommonCfg.BloomFilterType.Key, "BlockedBloomFilter") + } + + fVecColumn := integration.NewFloatVectorFieldData(integration.FloatVecField, segmentRowNum, dim) + hashKeys := integration.GenerateHashKeys(segmentRowNum) + insertResult, err := s.Cluster.Proxy.Insert(ctx, &milvuspb.InsertRequest{ + DbName: dbName, + CollectionName: collectionName, + FieldsData: []*schemapb.FieldData{fVecColumn}, + HashKeys: hashKeys, + NumRows: uint32(segmentRowNum), + }) + s.NoError(err) + s.True(merr.Ok(insertResult.Status)) + + if segmentDeleteNum > 0 { + if segmentDeleteNum > segmentRowNum { + segmentDeleteNum = segmentRowNum + } + + pks := insertResult.GetIDs().GetIntId().GetData()[:segmentDeleteNum] + log.Info("========================delete expr==================", + zap.Int("length of pk", len(pks)), + ) + + expr := fmt.Sprintf("%s in [%s]", integration.Int64Field, strings.Join(lo.Map(pks, func(pk int64, _ int) string { return strconv.FormatInt(pk, 10) }), ",")) + + deleteResp, err := s.Cluster.Proxy.Delete(ctx, &milvuspb.DeleteRequest{ + CollectionName: collectionName, + Expr: expr, + }) + s.Require().NoError(err) + s.Require().True(merr.Ok(deleteResp.GetStatus())) + s.Require().EqualValues(len(pks), deleteResp.GetDeleteCnt()) + } + + // flush + flushResp, err := s.Cluster.Proxy.Flush(ctx, &milvuspb.FlushRequest{ + DbName: dbName, + CollectionNames: []string{collectionName}, + }) + s.NoError(err) + segmentIDs, has := flushResp.GetCollSegIDs()[collectionName] + ids := segmentIDs.GetData() + s.Require().NotEmpty(segmentIDs) + s.Require().True(has) + flushTs, has := flushResp.GetCollFlushTs()[collectionName] + s.True(has) + s.WaitForFlush(ctx, ids, flushTs, dbName, collectionName) + } + + // create index + createIndexStatus, err := s.Cluster.Proxy.CreateIndex(ctx, &milvuspb.CreateIndexRequest{ + CollectionName: collectionName, + FieldName: integration.FloatVecField, + IndexName: "_default", + ExtraParams: integration.ConstructIndexParam(dim, integration.IndexFaissIvfFlat, metric.L2), + }) + s.NoError(err) + s.True(merr.Ok(createIndexStatus)) + s.WaitForIndexBuilt(ctx, collectionName, integration.FloatVecField) + + for i := 1; i < replica; i++ { + s.Cluster.AddQueryNode() + } + + // load + loadStatus, err := s.Cluster.Proxy.LoadCollection(ctx, &milvuspb.LoadCollectionRequest{ + DbName: dbName, + CollectionName: collectionName, + ReplicaNumber: int32(replica), + }) + s.NoError(err) + s.Equal(commonpb.ErrorCode_Success, loadStatus.GetErrorCode()) + s.True(merr.Ok(loadStatus)) + s.WaitForLoad(ctx, collectionName) + log.Info("initCollection Done") +} + +func (s *BloomFilterTestSuit) TestLoadAndQuery() { + name := "test_balance_" + funcutil.GenRandomStr() + s.initCollection(name, 1, 2, 10, 2000, 500) + + ctx := context.Background() + queryResult, err := s.Cluster.Proxy.Query(ctx, &milvuspb.QueryRequest{ + DbName: "", + CollectionName: name, + Expr: "", + OutputFields: []string{"count(*)"}, + }) + if !merr.Ok(queryResult.GetStatus()) { + log.Warn("searchResult fail reason", zap.String("reason", queryResult.GetStatus().GetReason())) + } + s.NoError(err) + s.True(merr.Ok(queryResult.GetStatus())) + numEntities := queryResult.FieldsData[0].GetScalars().GetLongData().Data[0] + s.Equal(numEntities, int64(15000)) +} + +func TestBloomFilter(t *testing.T) { + suite.Run(t, new(BloomFilterTestSuit)) +} diff --git a/tests/integration/compaction/compaction_test.go b/tests/integration/compaction/compaction_test.go new file mode 100644 index 0000000000000..2e738e00fb6c8 --- /dev/null +++ b/tests/integration/compaction/compaction_test.go @@ -0,0 +1,47 @@ +// Licensed to the LF AI & Data foundation under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package compaction + +import ( + "testing" + + "github.com/stretchr/testify/suite" + + "github.com/milvus-io/milvus/pkg/util/paramtable" + "github.com/milvus-io/milvus/tests/integration" +) + +type CompactionSuite struct { + integration.MiniClusterSuite +} + +func (s *CompactionSuite) SetupSuite() { + s.MiniClusterSuite.SetupSuite() + + paramtable.Init() + paramtable.Get().Save(paramtable.Get().DataCoordCfg.GlobalCompactionInterval.Key, "1") +} + +func (s *CompactionSuite) TearDownSuite() { + s.MiniClusterSuite.TearDownSuite() + + paramtable.Get().Reset(paramtable.Get().DataCoordCfg.GlobalCompactionInterval.Key) +} + +func TestCompaction(t *testing.T) { + suite.Run(t, new(CompactionSuite)) +} diff --git a/tests/integration/compaction/l0_compaction_test.go b/tests/integration/compaction/l0_compaction_test.go new file mode 100644 index 0000000000000..984e8eb3ce5e5 --- /dev/null +++ b/tests/integration/compaction/l0_compaction_test.go @@ -0,0 +1,238 @@ +// Licensed to the LF AI & Data foundation under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package compaction + +import ( + "context" + "fmt" + "time" + + "github.com/golang/protobuf/proto" + "github.com/samber/lo" + "go.uber.org/zap" + + "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" + "github.com/milvus-io/milvus-proto/go-api/v2/milvuspb" + "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" + "github.com/milvus-io/milvus/internal/proto/datapb" + "github.com/milvus-io/milvus/pkg/common" + "github.com/milvus-io/milvus/pkg/log" + "github.com/milvus-io/milvus/pkg/util/funcutil" + "github.com/milvus-io/milvus/pkg/util/merr" + "github.com/milvus-io/milvus/pkg/util/metric" + "github.com/milvus-io/milvus/pkg/util/paramtable" + "github.com/milvus-io/milvus/tests/integration" +) + +func (s *CompactionSuite) TestL0Compaction() { + ctx, cancel := context.WithTimeout(context.Background(), time.Minute*10) + defer cancel() + c := s.Cluster + + const ( + dim = 128 + dbName = "" + rowNum = 100000 + deleteCnt = 50000 + + indexType = integration.IndexFaissIvfFlat + metricType = metric.L2 + vecType = schemapb.DataType_FloatVector + ) + + paramtable.Get().Save(paramtable.Get().DataCoordCfg.LevelZeroCompactionTriggerDeltalogMinNum.Key, "1") + defer paramtable.Get().Reset(paramtable.Get().DataCoordCfg.LevelZeroCompactionTriggerDeltalogMinNum.Key) + + collectionName := "TestCompaction_" + funcutil.GenRandomStr() + + schema := integration.ConstructSchemaOfVecDataType(collectionName, dim, false, vecType) + marshaledSchema, err := proto.Marshal(schema) + s.NoError(err) + + // create collection + createCollectionStatus, err := c.Proxy.CreateCollection(ctx, &milvuspb.CreateCollectionRequest{ + DbName: dbName, + CollectionName: collectionName, + Schema: marshaledSchema, + ShardsNum: common.DefaultShardsNum, + ConsistencyLevel: commonpb.ConsistencyLevel_Strong, + }) + err = merr.CheckRPCCall(createCollectionStatus, err) + s.NoError(err) + log.Info("CreateCollection result", zap.Any("createCollectionStatus", createCollectionStatus)) + + // show collection + showCollectionsResp, err := c.Proxy.ShowCollections(ctx, &milvuspb.ShowCollectionsRequest{}) + err = merr.CheckRPCCall(showCollectionsResp, err) + s.NoError(err) + log.Info("ShowCollections result", zap.Any("showCollectionsResp", showCollectionsResp)) + + // insert + pkColumn := integration.NewInt64FieldData(integration.Int64Field, rowNum) + fVecColumn := integration.NewFloatVectorFieldData(integration.FloatVecField, rowNum, dim) + hashKeys := integration.GenerateHashKeys(rowNum) + insertResult, err := c.Proxy.Insert(ctx, &milvuspb.InsertRequest{ + DbName: dbName, + CollectionName: collectionName, + FieldsData: []*schemapb.FieldData{pkColumn, fVecColumn}, + HashKeys: hashKeys, + NumRows: uint32(rowNum), + }) + err = merr.CheckRPCCall(insertResult, err) + s.NoError(err) + s.Equal(int64(rowNum), insertResult.GetInsertCnt()) + + // flush + flushResp, err := c.Proxy.Flush(ctx, &milvuspb.FlushRequest{ + DbName: dbName, + CollectionNames: []string{collectionName}, + }) + err = merr.CheckRPCCall(flushResp, err) + s.NoError(err) + segmentIDs, has := flushResp.GetCollSegIDs()[collectionName] + ids := segmentIDs.GetData() + s.Require().NotEmpty(segmentIDs) + s.Require().True(has) + flushTs, has := flushResp.GetCollFlushTs()[collectionName] + s.True(has) + s.WaitForFlush(ctx, ids, flushTs, dbName, collectionName) + + // create index + createIndexStatus, err := c.Proxy.CreateIndex(ctx, &milvuspb.CreateIndexRequest{ + CollectionName: collectionName, + FieldName: integration.FloatVecField, + IndexName: "_default", + ExtraParams: integration.ConstructIndexParam(dim, indexType, metricType), + }) + err = merr.CheckRPCCall(createIndexStatus, err) + s.NoError(err) + s.WaitForIndexBuilt(ctx, collectionName, integration.FloatVecField) + + segments, err := c.MetaWatcher.ShowSegments() + s.NoError(err) + s.NotEmpty(segments) + s.Equal(1, len(segments)) + s.Equal(int64(rowNum), segments[0].GetNumOfRows()) + + // load + loadStatus, err := c.Proxy.LoadCollection(ctx, &milvuspb.LoadCollectionRequest{ + DbName: dbName, + CollectionName: collectionName, + }) + err = merr.CheckRPCCall(loadStatus, err) + s.NoError(err) + s.WaitForLoad(ctx, collectionName) + + // delete + deleteResult, err := c.Proxy.Delete(ctx, &milvuspb.DeleteRequest{ + DbName: dbName, + CollectionName: collectionName, + Expr: fmt.Sprintf("%s < %d", integration.Int64Field, deleteCnt), + }) + err = merr.CheckRPCCall(deleteResult, err) + s.NoError(err) + + // flush l0 + flushResp, err = c.Proxy.Flush(ctx, &milvuspb.FlushRequest{ + DbName: dbName, + CollectionNames: []string{collectionName}, + }) + err = merr.CheckRPCCall(flushResp, err) + s.NoError(err) + flushTs, has = flushResp.GetCollFlushTs()[collectionName] + s.True(has) + s.WaitForFlush(ctx, ids, flushTs, dbName, collectionName) + + // query + queryResult, err := c.Proxy.Query(ctx, &milvuspb.QueryRequest{ + DbName: dbName, + CollectionName: collectionName, + Expr: "", + OutputFields: []string{"count(*)"}, + }) + err = merr.CheckRPCCall(queryResult, err) + s.NoError(err) + s.Equal(int64(rowNum-deleteCnt), queryResult.GetFieldsData()[0].GetScalars().GetLongData().GetData()[0]) + + // wait for l0 compaction completed + showSegments := func() bool { + segments, err = c.MetaWatcher.ShowSegments() + s.NoError(err) + s.NotEmpty(segments) + log.Info("ShowSegments result", zap.Any("segments", segments)) + flushed := lo.Filter(segments, func(segment *datapb.SegmentInfo, _ int) bool { + return segment.GetState() == commonpb.SegmentState_Flushed + }) + if len(flushed) == 1 && + flushed[0].GetLevel() == datapb.SegmentLevel_L1 && + flushed[0].GetNumOfRows() == rowNum { + log.Info("l0 compaction done, wait for single compaction") + } + return len(flushed) == 1 && + flushed[0].GetLevel() == datapb.SegmentLevel_L1 && + flushed[0].GetNumOfRows() == rowNum-deleteCnt + } + for !showSegments() { + select { + case <-ctx.Done(): + s.Fail("waiting for compaction timeout") + return + case <-time.After(1 * time.Second): + } + } + + // search + expr := fmt.Sprintf("%s > 0", integration.Int64Field) + nq := 10 + topk := 10 + roundDecimal := -1 + params := integration.GetSearchParams(indexType, metricType) + searchReq := integration.ConstructSearchRequest("", collectionName, expr, + integration.FloatVecField, vecType, nil, metricType, params, nq, dim, topk, roundDecimal) + + searchResult, err := c.Proxy.Search(ctx, searchReq) + err = merr.CheckRPCCall(searchResult, err) + s.NoError(err) + s.Equal(nq*topk, len(searchResult.GetResults().GetScores())) + + // query + queryResult, err = c.Proxy.Query(ctx, &milvuspb.QueryRequest{ + DbName: dbName, + CollectionName: collectionName, + Expr: "", + OutputFields: []string{"count(*)"}, + }) + err = merr.CheckRPCCall(queryResult, err) + s.NoError(err) + s.Equal(int64(rowNum-deleteCnt), queryResult.GetFieldsData()[0].GetScalars().GetLongData().GetData()[0]) + + // release collection + status, err := c.Proxy.ReleaseCollection(ctx, &milvuspb.ReleaseCollectionRequest{ + CollectionName: collectionName, + }) + err = merr.CheckRPCCall(status, err) + s.NoError(err) + + // drop collection + status, err = c.Proxy.DropCollection(ctx, &milvuspb.DropCollectionRequest{ + CollectionName: collectionName, + }) + err = merr.CheckRPCCall(status, err) + s.NoError(err) + + log.Info("Test compaction succeed") +} diff --git a/tests/integration/compaction/mix_compaction_test.go b/tests/integration/compaction/mix_compaction_test.go new file mode 100644 index 0000000000000..b51636be5fd1e --- /dev/null +++ b/tests/integration/compaction/mix_compaction_test.go @@ -0,0 +1,205 @@ +// Licensed to the LF AI & Data foundation under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package compaction + +import ( + "context" + "fmt" + "time" + + "github.com/golang/protobuf/proto" + "github.com/samber/lo" + "go.uber.org/zap" + + "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" + "github.com/milvus-io/milvus-proto/go-api/v2/milvuspb" + "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" + "github.com/milvus-io/milvus/internal/proto/datapb" + "github.com/milvus-io/milvus/pkg/common" + "github.com/milvus-io/milvus/pkg/log" + "github.com/milvus-io/milvus/pkg/util/funcutil" + "github.com/milvus-io/milvus/pkg/util/merr" + "github.com/milvus-io/milvus/pkg/util/metric" + "github.com/milvus-io/milvus/tests/integration" +) + +func (s *CompactionSuite) TestMixCompaction() { + ctx, cancel := context.WithTimeout(context.Background(), time.Minute*10) + defer cancel() + c := s.Cluster + + const ( + dim = 128 + dbName = "" + rowNum = 10000 + batch = 1000 + + indexType = integration.IndexFaissIvfFlat + metricType = metric.L2 + vecType = schemapb.DataType_FloatVector + ) + + collectionName := "TestCompaction_" + funcutil.GenRandomStr() + + schema := integration.ConstructSchemaOfVecDataType(collectionName, dim, true, vecType) + marshaledSchema, err := proto.Marshal(schema) + s.NoError(err) + + // create collection + createCollectionStatus, err := c.Proxy.CreateCollection(ctx, &milvuspb.CreateCollectionRequest{ + DbName: dbName, + CollectionName: collectionName, + Schema: marshaledSchema, + ShardsNum: common.DefaultShardsNum, + ConsistencyLevel: commonpb.ConsistencyLevel_Strong, + }) + err = merr.CheckRPCCall(createCollectionStatus, err) + s.NoError(err) + log.Info("CreateCollection result", zap.Any("createCollectionStatus", createCollectionStatus)) + + // show collection + showCollectionsResp, err := c.Proxy.ShowCollections(ctx, &milvuspb.ShowCollectionsRequest{}) + err = merr.CheckRPCCall(showCollectionsResp, err) + s.NoError(err) + log.Info("ShowCollections result", zap.Any("showCollectionsResp", showCollectionsResp)) + + for i := 0; i < rowNum/batch; i++ { + // insert + fVecColumn := integration.NewFloatVectorFieldData(integration.FloatVecField, batch, dim) + hashKeys := integration.GenerateHashKeys(batch) + insertResult, err := c.Proxy.Insert(ctx, &milvuspb.InsertRequest{ + DbName: dbName, + CollectionName: collectionName, + FieldsData: []*schemapb.FieldData{fVecColumn}, + HashKeys: hashKeys, + NumRows: uint32(batch), + }) + err = merr.CheckRPCCall(insertResult, err) + s.NoError(err) + s.Equal(int64(batch), insertResult.GetInsertCnt()) + + // flush + flushResp, err := c.Proxy.Flush(ctx, &milvuspb.FlushRequest{ + DbName: dbName, + CollectionNames: []string{collectionName}, + }) + err = merr.CheckRPCCall(flushResp, err) + s.NoError(err) + segmentIDs, has := flushResp.GetCollSegIDs()[collectionName] + ids := segmentIDs.GetData() + s.Require().NotEmpty(segmentIDs) + s.Require().True(has) + flushTs, has := flushResp.GetCollFlushTs()[collectionName] + s.True(has) + s.WaitForFlush(ctx, ids, flushTs, dbName, collectionName) + + log.Info("insert done", zap.Int("i", i)) + } + + // create index + createIndexStatus, err := c.Proxy.CreateIndex(ctx, &milvuspb.CreateIndexRequest{ + CollectionName: collectionName, + FieldName: integration.FloatVecField, + IndexName: "_default", + ExtraParams: integration.ConstructIndexParam(dim, indexType, metricType), + }) + err = merr.CheckRPCCall(createIndexStatus, err) + s.NoError(err) + s.WaitForIndexBuilt(ctx, collectionName, integration.FloatVecField) + + segments, err := c.MetaWatcher.ShowSegments() + s.NoError(err) + s.NotEmpty(segments) + s.Equal(rowNum/batch, len(segments)) + for _, segment := range segments { + log.Info("show segment result", zap.String("segment", segment.String())) + } + + // wait for compaction completed + showSegments := func() bool { + segments, err = c.MetaWatcher.ShowSegments() + s.NoError(err) + s.NotEmpty(segments) + compactFromSegments := lo.Filter(segments, func(segment *datapb.SegmentInfo, _ int) bool { + return segment.GetState() == commonpb.SegmentState_Dropped + }) + compactToSegments := lo.Filter(segments, func(segment *datapb.SegmentInfo, _ int) bool { + return segment.GetState() == commonpb.SegmentState_Flushed + }) + log.Info("ShowSegments result", zap.Int("len(compactFromSegments)", len(compactFromSegments)), + zap.Int("len(compactToSegments)", len(compactToSegments))) + return len(compactToSegments) == 1 + } + for !showSegments() { + select { + case <-ctx.Done(): + s.Fail("waiting for compaction timeout") + return + case <-time.After(1 * time.Second): + } + } + + // load + loadStatus, err := c.Proxy.LoadCollection(ctx, &milvuspb.LoadCollectionRequest{ + DbName: dbName, + CollectionName: collectionName, + }) + err = merr.CheckRPCCall(loadStatus, err) + s.NoError(err) + s.WaitForLoad(ctx, collectionName) + + // search + expr := fmt.Sprintf("%s > 0", integration.Int64Field) + nq := 10 + topk := 10 + roundDecimal := -1 + params := integration.GetSearchParams(indexType, metricType) + searchReq := integration.ConstructSearchRequest("", collectionName, expr, + integration.FloatVecField, vecType, nil, metricType, params, nq, dim, topk, roundDecimal) + + searchResult, err := c.Proxy.Search(ctx, searchReq) + err = merr.CheckRPCCall(searchResult, err) + s.NoError(err) + s.Equal(nq*topk, len(searchResult.GetResults().GetScores())) + + // query + queryResult, err := c.Proxy.Query(ctx, &milvuspb.QueryRequest{ + DbName: dbName, + CollectionName: collectionName, + Expr: "", + OutputFields: []string{"count(*)"}, + }) + err = merr.CheckRPCCall(queryResult, err) + s.NoError(err) + s.Equal(int64(rowNum), queryResult.GetFieldsData()[0].GetScalars().GetLongData().GetData()[0]) + + // release collection + status, err := c.Proxy.ReleaseCollection(ctx, &milvuspb.ReleaseCollectionRequest{ + CollectionName: collectionName, + }) + err = merr.CheckRPCCall(status, err) + s.NoError(err) + + // drop collection + status, err = c.Proxy.DropCollection(ctx, &milvuspb.DropCollectionRequest{ + CollectionName: collectionName, + }) + err = merr.CheckRPCCall(status, err) + s.NoError(err) + + log.Info("Test compaction succeed") +} diff --git a/tests/integration/import/partition_key_test.go b/tests/integration/import/partition_key_test.go new file mode 100644 index 0000000000000..b9cba86c84b50 --- /dev/null +++ b/tests/integration/import/partition_key_test.go @@ -0,0 +1,215 @@ +// Licensed to the LF AI & Data foundation under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package importv2 + +import ( + "context" + "fmt" + "math/rand" + "os" + "strings" + "time" + + "github.com/golang/protobuf/proto" + "github.com/samber/lo" + "go.uber.org/zap" + + "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" + "github.com/milvus-io/milvus-proto/go-api/v2/milvuspb" + "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" + "github.com/milvus-io/milvus/internal/proto/internalpb" + "github.com/milvus-io/milvus/pkg/common" + "github.com/milvus-io/milvus/pkg/log" + "github.com/milvus-io/milvus/pkg/util/funcutil" + "github.com/milvus-io/milvus/pkg/util/merr" + "github.com/milvus-io/milvus/pkg/util/metric" + "github.com/milvus-io/milvus/tests/integration" +) + +func (s *BulkInsertSuite) TestImportWithPartitionKey() { + const ( + rowCount = 10000 + ) + + c := s.Cluster + ctx, cancel := context.WithTimeout(c.GetContext(), 60*time.Second) + defer cancel() + + collectionName := "TestBulkInsert_WithPartitionKey_" + funcutil.GenRandomStr() + + schema := integration.ConstructSchema(collectionName, dim, true, &schemapb.FieldSchema{ + FieldID: 100, + Name: integration.Int64Field, + IsPrimaryKey: true, + DataType: schemapb.DataType_Int64, + AutoID: true, + }, &schemapb.FieldSchema{ + FieldID: 101, + Name: integration.FloatVecField, + DataType: schemapb.DataType_FloatVector, + TypeParams: []*commonpb.KeyValuePair{ + { + Key: common.DimKey, + Value: fmt.Sprintf("%d", dim), + }, + }, + }, &schemapb.FieldSchema{ + FieldID: 102, + Name: integration.VarCharField, + DataType: schemapb.DataType_VarChar, + TypeParams: []*commonpb.KeyValuePair{ + { + Key: common.MaxLengthKey, + Value: fmt.Sprintf("%d", 256), + }, + }, + IsPartitionKey: true, + }) + marshaledSchema, err := proto.Marshal(schema) + s.NoError(err) + + createCollectionStatus, err := c.Proxy.CreateCollection(ctx, &milvuspb.CreateCollectionRequest{ + DbName: "", + CollectionName: collectionName, + Schema: marshaledSchema, + ShardsNum: common.DefaultShardsNum, + }) + s.NoError(err) + s.Equal(int32(0), createCollectionStatus.GetCode()) + + // create index + createIndexStatus, err := c.Proxy.CreateIndex(ctx, &milvuspb.CreateIndexRequest{ + CollectionName: collectionName, + FieldName: integration.FloatVecField, + IndexName: "_default", + ExtraParams: integration.ConstructIndexParam(dim, integration.IndexFaissIvfFlat, metric.L2), + }) + s.NoError(err) + s.Equal(int32(0), createIndexStatus.GetCode()) + + s.WaitForIndexBuilt(ctx, collectionName, integration.FloatVecField) + + // import + var files []*internalpb.ImportFile + err = os.MkdirAll(c.ChunkManager.RootPath(), os.ModePerm) + s.NoError(err) + + filePath := fmt.Sprintf("/tmp/test_%d.parquet", rand.Int()) + insertData, err := GenerateParquetFileAndReturnInsertData(filePath, schema, rowCount) + s.NoError(err) + defer os.Remove(filePath) + files = []*internalpb.ImportFile{ + { + Paths: []string{ + filePath, + }, + }, + } + + importResp, err := c.Proxy.ImportV2(ctx, &internalpb.ImportRequest{ + CollectionName: collectionName, + Files: files, + }) + s.NoError(err) + s.Equal(int32(0), importResp.GetStatus().GetCode()) + log.Info("Import result", zap.Any("importResp", importResp)) + + jobID := importResp.GetJobID() + err = WaitForImportDone(ctx, c, jobID) + s.NoError(err) + + // load + loadStatus, err := c.Proxy.LoadCollection(ctx, &milvuspb.LoadCollectionRequest{ + CollectionName: collectionName, + }) + s.NoError(err) + s.Equal(commonpb.ErrorCode_Success, loadStatus.GetErrorCode()) + s.WaitForLoad(ctx, collectionName) + + segments, err := c.MetaWatcher.ShowSegments() + s.NoError(err) + s.NotEmpty(segments) + log.Info("Show segments", zap.Any("segments", segments)) + + // load refresh + loadStatus, err = c.Proxy.LoadCollection(ctx, &milvuspb.LoadCollectionRequest{ + CollectionName: collectionName, + Refresh: true, + }) + s.NoError(err) + s.Equal(commonpb.ErrorCode_Success, loadStatus.GetErrorCode()) + s.WaitForLoadRefresh(ctx, "", collectionName) + + // query partition key, TermExpr + queryNum := 10 + partitionKeyData := insertData.Data[int64(102)].GetRows().([]string) + queryData := partitionKeyData[:queryNum] + strs := lo.Map(queryData, func(str string, _ int) string { + return fmt.Sprintf("\"%s\"", str) + }) + str := strings.Join(strs, `,`) + expr := fmt.Sprintf("%s in [%v]", integration.VarCharField, str) + queryResult, err := c.Proxy.Query(ctx, &milvuspb.QueryRequest{ + CollectionName: collectionName, + Expr: expr, + OutputFields: []string{integration.VarCharField}, + }) + err = merr.CheckRPCCall(queryResult, err) + s.NoError(err) + for _, data := range queryResult.GetFieldsData() { + if data.GetType() == schemapb.DataType_VarChar { + resData := data.GetScalars().GetStringData().GetData() + s.Equal(queryNum, len(resData)) + s.ElementsMatch(resData, queryData) + } + } + + // query partition key, CmpOp 1 + expr = fmt.Sprintf("%s >= 0", integration.Int64Field) + queryResult, err = c.Proxy.Query(ctx, &milvuspb.QueryRequest{ + CollectionName: collectionName, + Expr: expr, + OutputFields: []string{integration.VarCharField}, + }) + err = merr.CheckRPCCall(queryResult, err) + s.NoError(err) + for _, data := range queryResult.GetFieldsData() { + if data.GetType() == schemapb.DataType_VarChar { + resData := data.GetScalars().GetStringData().GetData() + s.Equal(rowCount, len(resData)) + s.ElementsMatch(resData, partitionKeyData) + } + } + + // query partition key, CmpOp 2 + target := partitionKeyData[rand.Intn(rowCount)] + expr = fmt.Sprintf("%s == \"%s\"", integration.VarCharField, target) + queryResult, err = c.Proxy.Query(ctx, &milvuspb.QueryRequest{ + CollectionName: collectionName, + Expr: expr, + OutputFields: []string{integration.VarCharField}, + }) + err = merr.CheckRPCCall(queryResult, err) + s.NoError(err) + for _, data := range queryResult.GetFieldsData() { + if data.GetType() == schemapb.DataType_VarChar { + resData := data.GetScalars().GetStringData().GetData() + s.Equal(1, len(resData)) + s.Equal(resData[0], target) + } + } +} diff --git a/tests/integration/import/util_test.go b/tests/integration/import/util_test.go index 237a705ec2474..6987ffc355253 100644 --- a/tests/integration/import/util_test.go +++ b/tests/integration/import/util_test.go @@ -39,40 +39,44 @@ import ( "github.com/milvus-io/milvus/internal/util/testutil" "github.com/milvus-io/milvus/pkg/log" "github.com/milvus-io/milvus/pkg/util/merr" - "github.com/milvus-io/milvus/pkg/util/typeutil" "github.com/milvus-io/milvus/tests/integration" ) const dim = 128 func GenerateParquetFile(filePath string, schema *schemapb.CollectionSchema, numRows int) error { + _, err := GenerateParquetFileAndReturnInsertData(filePath, schema, numRows) + return err +} + +func GenerateParquetFileAndReturnInsertData(filePath string, schema *schemapb.CollectionSchema, numRows int) (*storage.InsertData, error) { w, err := os.OpenFile(filePath, os.O_RDWR|os.O_CREATE, 0o666) if err != nil { - return err + return nil, err } pqSchema, err := pq.ConvertToArrowSchema(schema) if err != nil { - return err + return nil, err } fw, err := pqarrow.NewFileWriter(pqSchema, w, parquet.NewWriterProperties(parquet.WithMaxRowGroupLength(int64(numRows))), pqarrow.DefaultWriterProps()) if err != nil { - return err + return nil, err } defer fw.Close() insertData, err := testutil.CreateInsertData(schema, numRows) if err != nil { - return err + return nil, err } columns, err := testutil.BuildArrayData(schema, insertData) if err != nil { - return err + return nil, err } recordBatch := array.NewRecord(pqSchema, columns, int64(numRows)) - return fw.Write(recordBatch) + return insertData, fw.Write(recordBatch) } func GenerateNumpyFiles(cm storage.ChunkManager, schema *schemapb.CollectionSchema, rowCount int) (*internalpb.ImportFile, error) { @@ -105,87 +109,60 @@ func GenerateNumpyFiles(cm storage.ChunkManager, schema *schemapb.CollectionSche path := fmt.Sprintf("%s/%s.npy", cm.RootPath(), field.GetName()) fieldID := field.GetFieldID() + fieldData := insertData.Data[fieldID] dType := field.GetDataType() switch dType { - case schemapb.DataType_Bool: - data = insertData.Data[fieldID].(*storage.BoolFieldData).Data - case schemapb.DataType_Int8: - data = insertData.Data[fieldID].(*storage.Int8FieldData).Data - case schemapb.DataType_Int16: - data = insertData.Data[fieldID].(*storage.Int16FieldData).Data - case schemapb.DataType_Int32: - data = insertData.Data[fieldID].(*storage.Int32FieldData).Data - case schemapb.DataType_Int64: - data = insertData.Data[fieldID].(*storage.Int64FieldData).Data - case schemapb.DataType_Float: - data = insertData.Data[fieldID].(*storage.FloatFieldData).Data - case schemapb.DataType_Double: - data = insertData.Data[fieldID].(*storage.DoubleFieldData).Data - case schemapb.DataType_String, schemapb.DataType_VarChar: - data = insertData.Data[fieldID].(*storage.StringFieldData).Data case schemapb.DataType_BinaryVector: - vecData := insertData.Data[fieldID].(*storage.BinaryVectorFieldData).Data - if dim != insertData.Data[fieldID].(*storage.BinaryVectorFieldData).Dim { - panic(fmt.Sprintf("dim mis-match: %d, %d", dim, insertData.Data[fieldID].(*storage.BinaryVectorFieldData).Dim)) + rows := fieldData.GetRows().([]byte) + if dim != fieldData.(*storage.BinaryVectorFieldData).Dim { + panic(fmt.Sprintf("dim mis-match: %d, %d", dim, fieldData.(*storage.BinaryVectorFieldData).Dim)) } const rowBytes = dim / 8 - rows := len(vecData) / rowBytes - binVecData := make([][rowBytes]byte, 0, rows) - for i := 0; i < rows; i++ { - rowVec := [rowBytes]byte{} - copy(rowVec[:], vecData[i*rowBytes:(i+1)*rowBytes]) - binVecData = append(binVecData, rowVec) + chunked := lo.Chunk(rows, rowBytes) + chunkedRows := make([][rowBytes]byte, len(chunked)) + for i, innerSlice := range chunked { + copy(chunkedRows[i][:], innerSlice[:]) } - data = binVecData + data = chunkedRows case schemapb.DataType_FloatVector: - vecData := insertData.Data[fieldID].(*storage.FloatVectorFieldData).Data - if dim != insertData.Data[fieldID].(*storage.FloatVectorFieldData).Dim { - panic(fmt.Sprintf("dim mis-match: %d, %d", dim, insertData.Data[fieldID].(*storage.FloatVectorFieldData).Dim)) + rows := fieldData.GetRows().([]float32) + if dim != fieldData.(*storage.FloatVectorFieldData).Dim { + panic(fmt.Sprintf("dim mis-match: %d, %d", dim, fieldData.(*storage.FloatVectorFieldData).Dim)) } - rows := len(vecData) / dim - floatVecData := make([][dim]float32, 0, rows) - for i := 0; i < rows; i++ { - rowVec := [dim]float32{} - copy(rowVec[:], vecData[i*dim:(i+1)*dim]) - floatVecData = append(floatVecData, rowVec) + chunked := lo.Chunk(rows, dim) + chunkedRows := make([][dim]float32, len(chunked)) + for i, innerSlice := range chunked { + copy(chunkedRows[i][:], innerSlice[:]) } - data = floatVecData + data = chunkedRows case schemapb.DataType_Float16Vector: - vecData := insertData.Data[fieldID].(*storage.Float16VectorFieldData).Data - if dim != insertData.Data[fieldID].(*storage.Float16VectorFieldData).Dim { - panic(fmt.Sprintf("dim mis-match: %d, %d", dim, insertData.Data[fieldID].(*storage.Float16VectorFieldData).Dim)) + rows := insertData.Data[fieldID].GetRows().([]byte) + if dim != fieldData.(*storage.Float16VectorFieldData).Dim { + panic(fmt.Sprintf("dim mis-match: %d, %d", dim, fieldData.(*storage.Float16VectorFieldData).Dim)) } const rowBytes = dim * 2 - rows := len(vecData) / rowBytes - float16VecData := make([][rowBytes]byte, 0, rows) - for i := 0; i < rows; i++ { - rowVec := [rowBytes]byte{} - copy(rowVec[:], vecData[i*rowBytes:(i+1)*rowBytes]) - float16VecData = append(float16VecData, rowVec) + chunked := lo.Chunk(rows, rowBytes) + chunkedRows := make([][rowBytes]byte, len(chunked)) + for i, innerSlice := range chunked { + copy(chunkedRows[i][:], innerSlice[:]) } - data = float16VecData + data = chunkedRows case schemapb.DataType_BFloat16Vector: - vecData := insertData.Data[fieldID].(*storage.BFloat16VectorFieldData).Data - if dim != insertData.Data[fieldID].(*storage.BFloat16VectorFieldData).Dim { - panic(fmt.Sprintf("dim mis-match: %d, %d", dim, insertData.Data[fieldID].(*storage.BFloat16VectorFieldData).Dim)) + rows := insertData.Data[fieldID].GetRows().([]byte) + if dim != fieldData.(*storage.BFloat16VectorFieldData).Dim { + panic(fmt.Sprintf("dim mis-match: %d, %d", dim, fieldData.(*storage.BFloat16VectorFieldData).Dim)) } const rowBytes = dim * 2 - rows := len(vecData) / rowBytes - bfloat16VecData := make([][rowBytes]byte, 0, rows) - for i := 0; i < rows; i++ { - rowVec := [rowBytes]byte{} - copy(rowVec[:], vecData[i*rowBytes:(i+1)*rowBytes]) - bfloat16VecData = append(bfloat16VecData, rowVec) + chunked := lo.Chunk(rows, rowBytes) + chunkedRows := make([][rowBytes]byte, len(chunked)) + for i, innerSlice := range chunked { + copy(chunkedRows[i][:], innerSlice[:]) } - data = bfloat16VecData + data = chunkedRows case schemapb.DataType_SparseFloatVector: data = insertData.Data[fieldID].(*storage.SparseFloatVectorFieldData).GetContents() - case schemapb.DataType_JSON: - data = insertData.Data[fieldID].(*storage.JSONFieldData).Data - case schemapb.DataType_Array: - data = insertData.Data[fieldID].(*storage.ArrayFieldData).Data default: - panic(fmt.Sprintf("unsupported data type: %s", dType.String())) + data = insertData.Data[fieldID].GetRows() } err := writeFn(path, data) @@ -202,47 +179,9 @@ func GenerateNumpyFiles(cm storage.ChunkManager, schema *schemapb.CollectionSche func GenerateJSONFile(t *testing.T, filePath string, schema *schemapb.CollectionSchema, count int) { insertData, err := testutil.CreateInsertData(schema, count) assert.NoError(t, err) - rows := make([]map[string]any, 0, count) - fieldIDToField := lo.KeyBy(schema.GetFields(), func(field *schemapb.FieldSchema) int64 { - return field.GetFieldID() - }) - for i := 0; i < count; i++ { - data := make(map[int64]interface{}) - for fieldID, v := range insertData.Data { - dataType := fieldIDToField[fieldID].GetDataType() - if fieldIDToField[fieldID].GetAutoID() { - continue - } - switch dataType { - case schemapb.DataType_Array: - data[fieldID] = v.GetRow(i).(*schemapb.ScalarField).GetIntData().GetData() - case schemapb.DataType_JSON: - data[fieldID] = string(v.GetRow(i).([]byte)) - case schemapb.DataType_BinaryVector: - bytes := v.GetRow(i).([]byte) - ints := make([]int, 0, len(bytes)) - for _, b := range bytes { - ints = append(ints, int(b)) - } - data[fieldID] = ints - case schemapb.DataType_Float16Vector: - bytes := v.GetRow(i).([]byte) - data[fieldID] = typeutil.Float16BytesToFloat32Vector(bytes) - case schemapb.DataType_BFloat16Vector: - bytes := v.GetRow(i).([]byte) - data[fieldID] = typeutil.BFloat16BytesToFloat32Vector(bytes) - case schemapb.DataType_SparseFloatVector: - bytes := v.GetRow(i).([]byte) - data[fieldID] = typeutil.SparseFloatBytesToMap(bytes) - default: - data[fieldID] = v.GetRow(i) - } - } - row := lo.MapKeys(data, func(_ any, fieldID int64) string { - return fieldIDToField[fieldID].GetName() - }) - rows = append(rows, row) - } + + rows, err := testutil.CreateInsertDataRowsForJSON(schema, insertData) + assert.NoError(t, err) jsonBytes, err := json.Marshal(rows) assert.NoError(t, err) diff --git a/tests/integration/replicas/load/load_test.go b/tests/integration/replicas/load/load_test.go new file mode 100644 index 0000000000000..837a634c53799 --- /dev/null +++ b/tests/integration/replicas/load/load_test.go @@ -0,0 +1,187 @@ +// Licensed to the LF AI & Data foundation under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package balance + +import ( + "context" + "fmt" + "strings" + "testing" + "time" + + "github.com/stretchr/testify/suite" + + "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" + "github.com/milvus-io/milvus-proto/go-api/v2/milvuspb" + "github.com/milvus-io/milvus-proto/go-api/v2/rgpb" + "github.com/milvus-io/milvus/internal/proto/querypb" + "github.com/milvus-io/milvus/internal/querycoordv2/meta" + "github.com/milvus-io/milvus/pkg/common" + "github.com/milvus-io/milvus/pkg/util/merr" + "github.com/milvus-io/milvus/pkg/util/paramtable" + "github.com/milvus-io/milvus/tests/integration" +) + +const ( + dim = 128 + dbName = "" + collectionName = "test_load_collection" +) + +type LoadTestSuite struct { + integration.MiniClusterSuite +} + +func (s *LoadTestSuite) SetupSuite() { + paramtable.Init() + paramtable.Get().Save(paramtable.Get().QueryCoordCfg.BalanceCheckInterval.Key, "1000") + paramtable.Get().Save(paramtable.Get().QueryNodeCfg.GracefulStopTimeout.Key, "1") + + s.Require().NoError(s.SetupEmbedEtcd()) +} + +func (s *LoadTestSuite) loadCollection(collectionName string, replica int, rgs []string) { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + // load + loadStatus, err := s.Cluster.Proxy.LoadCollection(ctx, &milvuspb.LoadCollectionRequest{ + DbName: dbName, + CollectionName: collectionName, + ReplicaNumber: int32(replica), + ResourceGroups: rgs, + }) + s.NoError(err) + s.True(merr.Ok(loadStatus)) + s.WaitForLoad(ctx, collectionName) +} + +func (s *LoadTestSuite) releaseCollection(collectionName string) { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + // load + status, err := s.Cluster.Proxy.ReleaseCollection(ctx, &milvuspb.ReleaseCollectionRequest{ + DbName: dbName, + CollectionName: collectionName, + }) + s.NoError(err) + s.True(merr.Ok(status)) +} + +func (s *LoadTestSuite) TestLoadWithDatabaseLevelConfig() { + ctx := context.Background() + s.CreateCollectionWithConfiguration(ctx, &integration.CreateCollectionConfig{ + DBName: dbName, + Dim: dim, + CollectionName: collectionName, + ChannelNum: 1, + SegmentNum: 3, + RowNumPerSegment: 2000, + }) + + // prepare resource groups + rgNum := 3 + rgs := make([]string, 0) + for i := 0; i < rgNum; i++ { + rgs = append(rgs, fmt.Sprintf("rg_%d", i)) + s.Cluster.QueryCoord.CreateResourceGroup(ctx, &milvuspb.CreateResourceGroupRequest{ + ResourceGroup: rgs[i], + Config: &rgpb.ResourceGroupConfig{ + Requests: &rgpb.ResourceGroupLimit{ + NodeNum: 1, + }, + Limits: &rgpb.ResourceGroupLimit{ + NodeNum: 1, + }, + + TransferFrom: []*rgpb.ResourceGroupTransfer{ + { + ResourceGroup: meta.DefaultResourceGroupName, + }, + }, + TransferTo: []*rgpb.ResourceGroupTransfer{ + { + ResourceGroup: meta.DefaultResourceGroupName, + }, + }, + }, + }) + } + + resp, err := s.Cluster.QueryCoord.ListResourceGroups(ctx, &milvuspb.ListResourceGroupsRequest{}) + s.NoError(err) + s.True(merr.Ok(resp.GetStatus())) + s.Len(resp.GetResourceGroups(), rgNum+1) + + for i := 1; i < rgNum; i++ { + s.Cluster.AddQueryNode() + } + + s.Eventually(func() bool { + matchCounter := 0 + for _, rg := range rgs { + resp1, err := s.Cluster.QueryCoord.DescribeResourceGroup(ctx, &querypb.DescribeResourceGroupRequest{ + ResourceGroup: rg, + }) + s.NoError(err) + s.True(merr.Ok(resp.GetStatus())) + if len(resp1.ResourceGroup.Nodes) == 1 { + matchCounter += 1 + } + } + return matchCounter == rgNum + }, 30*time.Second, time.Second) + + status, err := s.Cluster.Proxy.AlterDatabase(ctx, &milvuspb.AlterDatabaseRequest{ + DbName: "default", + Properties: []*commonpb.KeyValuePair{ + { + Key: common.DatabaseReplicaNumber, + Value: "3", + }, + { + Key: common.DatabaseResourceGroups, + Value: strings.Join(rgs, ","), + }, + }, + }) + s.NoError(err) + s.True(merr.Ok(status)) + + resp1, err := s.Cluster.Proxy.DescribeDatabase(ctx, &milvuspb.DescribeDatabaseRequest{ + DbName: "default", + }) + s.NoError(err) + s.True(merr.Ok(resp1.Status)) + s.Len(resp1.GetProperties(), 2) + + // load collection without specified replica and rgs + s.loadCollection(collectionName, 0, nil) + resp2, err := s.Cluster.Proxy.GetReplicas(ctx, &milvuspb.GetReplicasRequest{ + DbName: dbName, + CollectionName: collectionName, + }) + s.NoError(err) + s.True(merr.Ok(resp2.Status)) + s.Len(resp2.GetReplicas(), 3) + s.releaseCollection(collectionName) +} + +func TestReplicas(t *testing.T) { + suite.Run(t, new(LoadTestSuite)) +} diff --git a/tests/python_client/base/client_base.py b/tests/python_client/base/client_base.py index e5b3cfd2e6cc5..0b52845885136 100644 --- a/tests/python_client/base/client_base.py +++ b/tests/python_client/base/client_base.py @@ -1,4 +1,3 @@ -from numpy.core.fromnumeric import _partition_dispatcher import pytest import sys from pymilvus import DefaultConfig @@ -33,7 +32,7 @@ class Base: collection_object_list = [] resource_group_list = [] high_level_api_wrap = None - + skip_connection = False def setup_class(self): log.info("[setup_class] Start setup class...") @@ -128,6 +127,9 @@ class TestcaseBase(Base): def _connect(self, enable_milvus_client_api=False): """ Add a connection and create the connect """ + if self.skip_connection: + return None + if enable_milvus_client_api: if cf.param_info.param_uri: uri = cf.param_info.param_uri @@ -252,8 +254,8 @@ def init_collection_general(self, prefix="test", insert_data=False, nb=ct.defaul insert_ids = [] time_stamp = 0 # 1 create collection - default_schema = cf.gen_default_collection_schema(auto_id=auto_id, dim=dim, primary_field=primary_field, - enable_dynamic_field=enable_dynamic_field, + default_schema = cf.gen_default_collection_schema(auto_id=auto_id, dim=dim, primary_field=primary_field, + enable_dynamic_field=enable_dynamic_field, with_json=with_json, multiple_dim_array=multiple_dim_array, is_partition_key=is_partition_key, vector_data_type=vector_data_type) diff --git a/tests/python_client/base/collection_wrapper.py b/tests/python_client/base/collection_wrapper.py index 39650957af4b4..43dc117191952 100644 --- a/tests/python_client/base/collection_wrapper.py +++ b/tests/python_client/base/collection_wrapper.py @@ -226,7 +226,6 @@ def hybrid_search(self, reqs, rerank, limit, partition_names=None, output_fields @trace() def query(self, expr, output_fields=None, partition_names=None, timeout=None, check_task=None, check_items=None, **kwargs): - # time.sleep(5) timeout = TIMEOUT if timeout is None else timeout func_name = sys._getframe().f_code.co_name @@ -240,7 +239,6 @@ def query(self, expr, output_fields=None, partition_names=None, timeout=None, ch @trace() def query_iterator(self, batch_size=1000, limit=-1, expr=None, output_fields=None, partition_names=None, timeout=None, check_task=None, check_items=None, **kwargs): - # time.sleep(5) timeout = TIMEOUT if timeout is None else timeout func_name = sys._getframe().f_code.co_name diff --git a/tests/python_client/chaos/checker.py b/tests/python_client/chaos/checker.py index 5eb256977775d..66cd25475d4d7 100644 --- a/tests/python_client/chaos/checker.py +++ b/tests/python_client/chaos/checker.py @@ -1331,10 +1331,10 @@ def keep_running(self): class DeleteChecker(Checker): """check delete operations in a dependent thread""" - def __init__(self, collection_name=None, schema=None): + def __init__(self, collection_name=None, schema=None, shards_num=2): if collection_name is None: collection_name = cf.gen_unique_str("DeleteChecker_") - super().__init__(collection_name=collection_name, schema=schema) + super().__init__(collection_name=collection_name, schema=schema, shards_num=shards_num) res, result = self.c_wrap.create_index(self.float_vector_field_name, constants.DEFAULT_INDEX_PARAM, timeout=timeout, diff --git a/tests/python_client/common/bulk_insert_data.py b/tests/python_client/common/bulk_insert_data.py index 7a98a6c9f8213..ce80d9cbf0c23 100644 --- a/tests/python_client/common/bulk_insert_data.py +++ b/tests/python_client/common/bulk_insert_data.py @@ -4,7 +4,7 @@ import time import numpy as np -import jax.numpy as jnp +from ml_dtypes import bfloat16 import pandas as pd import random from faker import Faker @@ -23,6 +23,7 @@ class DataField: pk_field = "uid" vec_field = "vectors" float_vec_field = "float_vectors" + sparse_vec_field = "sparse_vectors" image_float_vec_field = "image_float_vec_field" text_float_vec_field = "text_float_vec_field" binary_vec_field = "binary_vec_field" @@ -127,9 +128,9 @@ def gen_bf16_vectors(num, dim, for_json=False): raw_vector = [random.random() for _ in range(dim)] raw_vectors.append(raw_vector) if for_json: - bf16_vector = np.array(jnp.array(raw_vector, dtype=jnp.bfloat16)).tolist() + bf16_vector = np.array(raw_vector, dtype=bfloat16).tolist() else: - bf16_vector = np.array(jnp.array(raw_vector, dtype=jnp.bfloat16)).view(np.uint8).tolist() + bf16_vector = np.array(raw_vector, dtype=bfloat16).view(np.uint8).tolist() bf16_vectors.append(bf16_vector) return raw_vectors, bf16_vectors @@ -473,7 +474,22 @@ def gen_vectors(float_vector, rows, dim): return vectors -def gen_data_by_data_field(data_field, rows, start=0, float_vector=True, dim=128, array_length=None): +def gen_sparse_vectors(rows, sparse_format="dok"): + # default sparse format is dok, dict of keys + # another option is coo, coordinate List + + rng = np.random.default_rng() + vectors = [{ + d: rng.random() for d in random.sample(range(1000), random.randint(20, 30)) + } for _ in range(rows)] + if sparse_format == "coo": + vectors = [ + {"indices": list(x.keys()), "values": list(x.values())} for x in vectors + ] + return vectors + + +def gen_data_by_data_field(data_field, rows, start=0, float_vector=True, dim=128, array_length=None, sparse_format="dok"): if array_length is None: array_length = random.randint(0, 10) @@ -483,6 +499,9 @@ def gen_data_by_data_field(data_field, rows, start=0, float_vector=True, dim=128 if "float" in data_field: data = gen_vectors(float_vector=True, rows=rows, dim=dim) data = pd.Series([np.array(x, dtype=np.dtype("float32")) for x in data]) + elif "sparse" in data_field: + data = gen_sparse_vectors(rows, sparse_format=sparse_format) + data = pd.Series([json.dumps(x) for x in data], dtype=np.dtype("str")) elif "fp16" in data_field: data = gen_fp16_vectors(rows, dim)[1] data = pd.Series([np.array(x, dtype=np.dtype("uint8")) for x in data]) @@ -596,7 +615,7 @@ def gen_json_files(is_row_based, rows, dim, auto_id, str_pk, return files -def gen_dict_data_by_data_field(data_fields, rows, start=0, float_vector=True, dim=128, array_length=None, enable_dynamic_field=False): +def gen_dict_data_by_data_field(data_fields, rows, start=0, float_vector=True, dim=128, array_length=None, enable_dynamic_field=False, **kwargs): data = [] for r in range(rows): d = {} @@ -605,6 +624,9 @@ def gen_dict_data_by_data_field(data_fields, rows, start=0, float_vector=True, d if "float" in data_field: float_vector = True d[data_field] = gen_vectors(float_vector=float_vector, rows=1, dim=dim)[0] + if "sparse" in data_field: + sparse_format = kwargs.get("sparse_format", "dok") + d[data_field] = gen_sparse_vectors(1, sparse_format=sparse_format)[0] if "binary" in data_field: float_vector = False d[data_field] = gen_vectors(float_vector=float_vector, rows=1, dim=dim)[0] @@ -647,7 +669,7 @@ def gen_dict_data_by_data_field(data_fields, rows, start=0, float_vector=True, d return data -def gen_new_json_files(float_vector, rows, dim, data_fields, file_nums=1, array_length=None, file_size=None, err_type="", enable_dynamic_field=False): +def gen_new_json_files(float_vector, rows, dim, data_fields, file_nums=1, array_length=None, file_size=None, err_type="", enable_dynamic_field=False, **kwargs): files = [] if file_size is not None: rows = 5000 @@ -655,7 +677,7 @@ def gen_new_json_files(float_vector, rows, dim, data_fields, file_nums=1, array_ for i in range(file_nums): file_name = f"data-fields-{len(data_fields)}-rows-{rows}-dim-{dim}-file-num-{i}-{int(time.time())}.json" file = f"{data_source}/{file_name}" - data = gen_dict_data_by_data_field(data_fields=data_fields, rows=rows, start=start_uid, float_vector=float_vector, dim=dim, array_length=array_length, enable_dynamic_field=enable_dynamic_field) + data = gen_dict_data_by_data_field(data_fields=data_fields, rows=rows, start=start_uid, float_vector=float_vector, dim=dim, array_length=array_length, enable_dynamic_field=enable_dynamic_field, **kwargs) # log.info(f"data: {data}") with open(file, "w") as f: json.dump(data, f) @@ -762,7 +784,7 @@ def gen_dynamic_field_data_in_parquet_file(rows, start=0): return data -def gen_parquet_files(float_vector, rows, dim, data_fields, file_size=None, row_group_size=None, file_nums=1, array_length=None, err_type="", enable_dynamic_field=False, include_meta=True): +def gen_parquet_files(float_vector, rows, dim, data_fields, file_size=None, row_group_size=None, file_nums=1, array_length=None, err_type="", enable_dynamic_field=False, include_meta=True, sparse_format="doc"): # gen numpy files if err_type == "": err_type = "none" @@ -775,7 +797,7 @@ def gen_parquet_files(float_vector, rows, dim, data_fields, file_size=None, row_ all_field_data = {} for data_field in data_fields: data = gen_data_by_data_field(data_field=data_field, rows=rows, start=0, - float_vector=float_vector, dim=dim, array_length=array_length) + float_vector=float_vector, dim=dim, array_length=array_length, sparse_format=sparse_format) all_field_data[data_field] = data if enable_dynamic_field and include_meta: all_field_data["$meta"] = gen_dynamic_field_data_in_parquet_file(rows=rows, start=0) @@ -948,7 +970,7 @@ def prepare_bulk_insert_numpy_files(minio_endpoint="", bucket_name="milvus-bucke def prepare_bulk_insert_parquet_files(minio_endpoint="", bucket_name="milvus-bucket", rows=100, dim=128, array_length=None, file_size=None, row_group_size=None, - enable_dynamic_field=False, data_fields=[DataField.vec_field], float_vector=True, file_nums=1, force=False, include_meta=True): + enable_dynamic_field=False, data_fields=[DataField.vec_field], float_vector=True, file_nums=1, force=False, include_meta=True, sparse_format="doc"): """ Generate column based files based on params in parquet format and copy them to the minio Note: each field in data_fields would be generated one parquet file. @@ -980,7 +1002,7 @@ def prepare_bulk_insert_parquet_files(minio_endpoint="", bucket_name="milvus-buc """ files = gen_parquet_files(rows=rows, dim=dim, float_vector=float_vector, enable_dynamic_field=enable_dynamic_field, data_fields=data_fields, array_length=array_length, file_size=file_size, row_group_size=row_group_size, - file_nums=file_nums, include_meta=include_meta) + file_nums=file_nums, include_meta=include_meta, sparse_format=sparse_format) copy_files_to_minio(host=minio_endpoint, r_source=data_source, files=files, bucket_name=bucket_name, force=force) return files diff --git a/tests/python_client/common/common_func.py b/tests/python_client/common/common_func.py index 4a4c15d322c7a..71d3328ced8cd 100644 --- a/tests/python_client/common/common_func.py +++ b/tests/python_client/common/common_func.py @@ -8,7 +8,7 @@ from functools import singledispatch import numpy as np import pandas as pd -import jax.numpy as jnp +from ml_dtypes import bfloat16 from sklearn import preprocessing from npy_append_array import NpyAppendArray from faker import Faker @@ -20,7 +20,6 @@ from utils.util_log import test_log as log from customize.milvus_operator import MilvusOperator import pickle -import tensorflow as tf fake = Faker() """" Methods of processing data """ @@ -1070,14 +1069,12 @@ def gen_data_by_collection_field(field, nb=None, start=None): dim = field.params['dim'] if nb is None: raw_vector = [random.random() for _ in range(dim)] - bf16_vector = jnp.array(raw_vector, dtype=jnp.bfloat16) - bf16_vector = np.array(bf16_vector).view(np.uint8).tolist() + bf16_vector = np.array(raw_vector, dtype=bfloat16).view(np.uint8).tolist() return bytes(bf16_vector) bf16_vectors = [] for i in range(nb): raw_vector = [random.random() for _ in range(dim)] - bf16_vector = jnp.array(raw_vector, dtype=jnp.bfloat16) - bf16_vector = np.array(bf16_vector).view(np.uint8).tolist() + bf16_vector = np.array(raw_vector, dtype=bfloat16).view(np.uint8).tolist() bf16_vectors.append(bytes(bf16_vector)) return bf16_vectors if data_type == DataType.FLOAT16_VECTOR: @@ -2077,7 +2074,7 @@ def gen_bf16_vectors(num, dim): for _ in range(num): raw_vector = [random.random() for _ in range(dim)] raw_vectors.append(raw_vector) - bf16_vector = tf.cast(raw_vector, dtype=tf.bfloat16).numpy() + bf16_vector = np.array(raw_vector, dtype=bfloat16) bf16_vectors.append(bf16_vector) return raw_vectors, bf16_vectors diff --git a/tests/python_client/common/milvus_sys.py b/tests/python_client/common/milvus_sys.py index f8f2e3e4721a7..7db540bb72875 100644 --- a/tests/python_client/common/milvus_sys.py +++ b/tests/python_client/common/milvus_sys.py @@ -3,6 +3,7 @@ from pymilvus.grpc_gen import milvus_pb2 as milvus_types from pymilvus import connections from utils.util_log import test_log as log +from utils.util_log import test_log as log sys_info_req = ujson.dumps({"metric_type": "system_info"}) sys_statistics_req = ujson.dumps({"metric_type": "system_statistics"}) sys_logs_req = ujson.dumps({"metric_type": "system_logs"}) @@ -17,9 +18,24 @@ def __init__(self, alias='default'): # TODO: for now it only supports non_orm style API for getMetricsRequest req = milvus_types.GetMetricsRequest(request=sys_info_req) + self.sys_info = self.handler._stub.GetMetrics(req, wait_for_ready=True, timeout=None) + # req = milvus_types.GetMetricsRequest(request=sys_statistics_req) + # self.sys_statistics = self.handler._stub.GetMetrics(req, wait_for_ready=True, timeout=None) + # req = milvus_types.GetMetricsRequest(request=sys_logs_req) + # self.sys_logs = self.handler._stub.GetMetrics(req, wait_for_ready=True, timeout=None) self.sys_info = self.handler._stub.GetMetrics(req, wait_for_ready=True, timeout=60) log.debug(f"sys_info: {self.sys_info}") + def refresh(self): + req = milvus_types.GetMetricsRequest(request=sys_info_req) + self.sys_info = self.handler._stub.GetMetrics(req, wait_for_ready=True, timeout=None) + # req = milvus_types.GetMetricsRequest(request=sys_statistics_req) + # self.sys_statistics = self.handler._stub.GetMetrics(req, wait_for_ready=True, timeout=None) + # req = milvus_types.GetMetricsRequest(request=sys_logs_req) + # self.sys_logs = self.handler._stub.GetMetrics(req, wait_for_ready=True, timeout=None) + log.debug(f"sys info response: {self.sys_info.response}") + + @property def build_version(self): """get the first node's build version as milvus build version""" @@ -84,6 +100,7 @@ def proxy_nodes(self): @property def nodes(self): """get all the nodes in Milvus deployment""" + self.refresh() all_nodes = json.loads(self.sys_info.response).get('nodes_info') online_nodes = [node for node in all_nodes if node["infos"]["has_error"] is False] return online_nodes diff --git a/tests/python_client/customize/milvus_operator.py b/tests/python_client/customize/milvus_operator.py index 1140ff08f0e1a..658cbc4334bc3 100644 --- a/tests/python_client/customize/milvus_operator.py +++ b/tests/python_client/customize/milvus_operator.py @@ -3,6 +3,7 @@ import time from benedict import benedict from utils.util_log import test_log as log +from utils.util_k8s import get_pod_ip_name_pairs from common.cus_resource_opts import CustomResourceOperations as CusResource template_yaml = os.path.join(os.path.dirname(__file__), 'template/default.yaml') @@ -81,11 +82,13 @@ def uninstall(self, release_name, namespace='default', delete_depends=True, dele if delete_depends: del_configs = {'spec.dependencies.etcd.inCluster.deletionPolicy': 'Delete', 'spec.dependencies.pulsar.inCluster.deletionPolicy': 'Delete', + 'spec.dependencies.kafka.inCluster.deletionPolicy': 'Delete', 'spec.dependencies.storage.inCluster.deletionPolicy': 'Delete' } if delete_pvc: del_configs.update({'spec.dependencies.etcd.inCluster.pvcDeletion': True, 'spec.dependencies.pulsar.inCluster.pvcDeletion': True, + 'spec.dependencies.kafka.inCluster.pvcDeletion': True, 'spec.dependencies.storage.inCluster.pvcDeletion': True }) if delete_depends or delete_pvc: @@ -113,6 +116,40 @@ def upgrade(self, release_name, configs, namespace='default'): version=self.version, namespace=namespace) log.debug(f"upgrade milvus with configs: {d_configs}") cus_res.patch(release_name, d_configs) + self.wait_for_healthy(release_name, namespace=namespace) + + def rolling_update(self, release_name, new_image_name, namespace='default'): + """ + Method: patch custom resource object to rolling update milvus + Params: + release_name: release name of milvus + namespace: namespace that the milvus is running in + """ + cus_res = CusResource(kind=self.plural, group=self.group, + version=self.version, namespace=namespace) + rolling_configs = {'spec.components.enableRollingUpdate': True, + 'spec.components.imageUpdateMode': "rollingUpgrade", + 'spec.components.image': new_image_name} + log.debug(f"rolling update milvus with configs: {rolling_configs}") + cus_res.patch(release_name, rolling_configs) + self.wait_for_healthy(release_name, namespace=namespace) + + def scale(self, release_name, component, replicas, namespace='default'): + """ + Method: scale milvus components by replicas + Params: + release_name: release name of milvus + replicas: the number of replicas to scale + component: the component to scale, e.g: dataNode, queryNode, indexNode, proxy + namespace: namespace that the milvus is running in + """ + cus_res = CusResource(kind=self.plural, group=self.group, + version=self.version, namespace=namespace) + component = component.replace('node', 'Node') + scale_configs = {f'spec.components.{component}.replicas': replicas} + log.info(f"scale milvus with configs: {scale_configs}") + self.upgrade(release_name, scale_configs, namespace=namespace) + self.wait_for_healthy(release_name, namespace=namespace) def wait_for_healthy(self, release_name, namespace='default', timeout=600): """ @@ -152,3 +189,24 @@ def endpoint(self, release_name, namespace='default'): endpoint = res_object['status']['endpoint'] return endpoint + + def etcd_endpoints(self, release_name, namespace='default'): + """ + Method: get etcd endpoints by name and namespace + Return: a string type etcd endpoints. e.g: host:port + """ + etcd_endpoints = None + cus_res = CusResource(kind=self.plural, group=self.group, + version=self.version, namespace=namespace) + res_object = cus_res.get(release_name) + try: + etcd_endpoints = res_object['spec']['dependencies']['etcd']['endpoints'] + except KeyError: + log.info("etcd endpoints not found") + # get pod ip by pod name + label_selector = f"app.kubernetes.io/instance={release_name}-etcd, app.kubernetes.io/name=etcd" + res = get_pod_ip_name_pairs(namespace, label_selector) + if res: + etcd_endpoints = [f"{pod_ip}:2379" for pod_ip in res.keys()] + return etcd_endpoints[0] + diff --git a/tests/python_client/customize/template/default.yaml b/tests/python_client/customize/template/default.yaml index 507fe56193322..d3f71a8bbe139 100644 --- a/tests/python_client/customize/template/default.yaml +++ b/tests/python_client/customize/template/default.yaml @@ -13,6 +13,7 @@ spec: simdType: avx components: {} dependencies: + msgStreamType: kafka etcd: inCluster: deletionPolicy: Delete @@ -21,6 +22,113 @@ spec: metrics: podMonitor: enabled: true + kafka: + inCluster: + deletionPolicy: Retain + pvcDeletion: false + values: + replicaCount: 3 + defaultReplicationFactor: 2 + metrics: + kafka: + enabled: true + serviceMonitor: + enabled: true + jmx: + enabled: true + pulsar: + inCluster: + deletionPolicy: Retain + pvcDeletion: false + values: + components: + autorecovery: false + functions: false + toolset: false + pulsar_manager: false + monitoring: + prometheus: false + grafana: false + node_exporter: false + alert_manager: false + proxy: + replicaCount: 1 + resources: + requests: + cpu: 0.01 + memory: 256Mi + configData: + PULSAR_MEM: > + -Xms256m -Xmx256m + PULSAR_GC: > + -XX:MaxDirectMemorySize=256m + bookkeeper: + replicaCount: 2 + resources: + requests: + cpu: 0.01 + memory: 256Mi + configData: + PULSAR_MEM: > + -Xms256m + -Xmx256m + -XX:MaxDirectMemorySize=256m + PULSAR_GC: > + -Dio.netty.leakDetectionLevel=disabled + -Dio.netty.recycler.linkCapacity=1024 + -XX:+UseG1GC -XX:MaxGCPauseMillis=10 + -XX:+ParallelRefProcEnabled + -XX:+UnlockExperimentalVMOptions + -XX:+DoEscapeAnalysis + -XX:ParallelGCThreads=32 + -XX:ConcGCThreads=32 + -XX:G1NewSizePercent=50 + -XX:+DisableExplicitGC + -XX:-ResizePLAB + -XX:+ExitOnOutOfMemoryError + -XX:+PerfDisableSharedMem + -XX:+PrintGCDetails + zookeeper: + replicaCount: 1 + resources: + requests: + cpu: 0.01 + memory: 256Mi + configData: + PULSAR_MEM: > + -Xms256m + -Xmx256m + PULSAR_GC: > + -Dcom.sun.management.jmxremote + -Djute.maxbuffer=10485760 + -XX:+ParallelRefProcEnabled + -XX:+UnlockExperimentalVMOptions + -XX:+DoEscapeAnalysis -XX:+DisableExplicitGC + -XX:+PerfDisableSharedMem + -Dzookeeper.forceSync=no + broker: + replicaCount: 1 + resources: + requests: + cpu: 0.01 + memory: 256Mi + configData: + PULSAR_MEM: > + -Xms256m + -Xmx256m + PULSAR_GC: > + -XX:MaxDirectMemorySize=256m + -Dio.netty.leakDetectionLevel=disabled + -Dio.netty.recycler.linkCapacity=1024 + -XX:+ParallelRefProcEnabled + -XX:+UnlockExperimentalVMOptions + -XX:+DoEscapeAnalysis + -XX:ParallelGCThreads=32 + -XX:ConcGCThreads=32 + -XX:G1NewSizePercent=50 + -XX:+DisableExplicitGC + -XX:-ResizePLAB + -XX:+ExitOnOutOfMemoryError storage: inCluster: deletionPolicy: Delete @@ -29,4 +137,3 @@ spec: metrics: podMonitor: enabled: true - \ No newline at end of file diff --git a/tests/python_client/deploy/milvus_crd.yaml b/tests/python_client/deploy/milvus_crd.yaml index 41cab3351122b..d078b76463753 100644 --- a/tests/python_client/deploy/milvus_crd.yaml +++ b/tests/python_client/deploy/milvus_crd.yaml @@ -7,11 +7,11 @@ metadata: labels: app: milvus spec: - mode: standalone + mode: cluster config: dataNode: memory: - forceSyncEnable: false + forceSyncEnable: false rootCoord: enableActiveStandby: true dataCoord: @@ -29,7 +29,7 @@ spec: components: enableRollingUpdate: true imageUpdateMode: rollingUpgrade - image: milvusdb/milvus:2.2.0-20230208-2e4d64ec + image: harbor.milvus.io/milvus/milvus:master-20240426-4fb8044a-amd64 disableMetric: false dataNode: replicas: 3 @@ -45,7 +45,7 @@ spec: pvcDeletion: false values: replicaCount: 3 - kafka: + kafka: inCluster: deletionPolicy: Retain pvcDeletion: false @@ -58,13 +58,13 @@ spec: serviceMonitor: enabled: true jmx: - enabled: true + enabled: true pulsar: inCluster: deletionPolicy: Retain pvcDeletion: false values: - components: + components: autorecovery: false functions: false toolset: false @@ -158,4 +158,3 @@ spec: pvcDeletion: false values: mode: distributed - \ No newline at end of file diff --git a/tests/python_client/pytest.ini b/tests/python_client/pytest.ini index 122b5e8bf6a0f..1c90a7f2fd3c9 100644 --- a/tests/python_client/pytest.ini +++ b/tests/python_client/pytest.ini @@ -1,7 +1,7 @@ [pytest] -addopts = --host localhost --html=/tmp/ci_logs/report.html --self-contained-html -v +addopts = --host 10.104.21.154 --minio_host 10.104.21.153 --html=/tmp/ci_logs/report.html --self-contained-html -v --log-cli-level=INFO --capture=no # python3 -W ignore -m pytest log_format = [%(asctime)s - %(levelname)s - %(name)s]: %(message)s (%(filename)s:%(lineno)s) @@ -9,4 +9,4 @@ log_date_format = %Y-%m-%d %H:%M:%S filterwarnings = - ignore::DeprecationWarning \ No newline at end of file + ignore::DeprecationWarning diff --git a/tests/python_client/requirements.txt b/tests/python_client/requirements.txt index 177e44cd3692f..6b62783758641 100644 --- a/tests/python_client/requirements.txt +++ b/tests/python_client/requirements.txt @@ -46,6 +46,7 @@ loguru==0.7.0 psutil==5.9.4 pandas==1.5.3 tenacity==8.1.0 +rich==13.7.0 # for standby test etcd-sdk-python==0.0.4 deepdiff==6.7.1 @@ -56,7 +57,5 @@ pyarrow==14.0.1 fastparquet==2023.7.0 # for bf16 datatype -jax==0.4.13 -jaxlib==0.4.13 -tensorflow==2.13.1 +ml-dtypes==0.2.0 diff --git a/tests/python_client/resource_group/conftest.py b/tests/python_client/resource_group/conftest.py new file mode 100644 index 0000000000000..7e56a38456b65 --- /dev/null +++ b/tests/python_client/resource_group/conftest.py @@ -0,0 +1,11 @@ +import pytest + + +def pytest_addoption(parser): + parser.addoption("--image_tag", action="store", default="master-20240514-89a7c34c", help="image_tag") + + +@pytest.fixture +def image_tag(request): + return request.config.getoption("--image_tag") + diff --git a/tests/python_client/resource_group/test_channel_exclusive_balance.py b/tests/python_client/resource_group/test_channel_exclusive_balance.py new file mode 100644 index 0000000000000..f916014fde0b9 --- /dev/null +++ b/tests/python_client/resource_group/test_channel_exclusive_balance.py @@ -0,0 +1,446 @@ +import pytest +import time +from pymilvus import connections, utility, Collection +from utils.util_log import test_log as log +from base.client_base import TestcaseBase +from chaos.checker import (InsertChecker, + FlushChecker, + UpsertChecker, + DeleteChecker, + Op, + ResultAnalyzer + ) +from chaos import chaos_commons as cc +from common import common_func as cf +from utils.util_k8s import get_querynode_id_pod_pairs +from utils.util_birdwatcher import BirdWatcher +from customize.milvus_operator import MilvusOperator +from common.milvus_sys import MilvusSys +from common.common_type import CaseLabel +from chaos.chaos_commons import assert_statistic + +namespace = 'chaos-testing' +prefix = "test_rg" + +from rich.table import Table +from rich.console import Console + + +def display_segment_distribution_info(collection_name, release_name, segment_info=None): + table = Table(title=f"{collection_name} Segment Distribution Info") + table.width = 200 + table.add_column("Segment ID", style="cyan") + table.add_column("Collection ID", style="cyan") + table.add_column("Partition ID", style="cyan") + table.add_column("Num Rows", style="cyan") + table.add_column("State", style="cyan") + table.add_column("Channel", style="cyan") + table.add_column("Node ID", style="cyan") + table.add_column("Node Name", style="cyan") + res = utility.get_query_segment_info(collection_name) + log.info(f"segment info: {res}") + label = f"app.kubernetes.io/instance={release_name}, app.kubernetes.io/component=querynode" + querynode_id_pod_pair = get_querynode_id_pod_pairs("chaos-testing", label) + for r in res: + channel = "unknown" + if segment_info and str(r.segmentID) in segment_info: + channel = segment_info[str(r.segmentID)]["Insert Channel"] + table.add_row( + str(r.segmentID), + str(r.collectionID), + str(r.partitionID), + str(r.num_rows), + str(r.state), + str(channel), + str(r.nodeIds), + str([querynode_id_pod_pair.get(node_id) for node_id in r.nodeIds]) + ) + console = Console() + console.width = 300 + console.print(table) + + +def display_channel_on_qn_distribution_info(collection_name, release_name, segment_info=None): + """ + node id, node name, channel, segment id + 1, rg-test-613938-querynode-0, [rg-test-613938-rootcoord-dml_3_449617770820133536v0], [449617770820133655] + 2, rg-test-613938-querynode-1, [rg-test-613938-rootcoord-dml_3_449617770820133537v0], [449617770820133656] + + """ + m = {} + res = utility.get_query_segment_info(collection_name) + for r in res: + if r.nodeIds: + for node_id in r.nodeIds: + if node_id not in m: + m[node_id] = { + "node_name": "", + "channel": [], + "segment_id": [] + } + m[node_id]["segment_id"].append(r.segmentID) + # get channel info + for node_id in m.keys(): + for seg in m[node_id]["segment_id"]: + if segment_info and str(seg) in segment_info: + m[node_id]["channel"].append(segment_info[str(seg)]["Insert Channel"]) + + # get node name + label = f"app.kubernetes.io/instance={release_name}, app.kubernetes.io/component=querynode" + querynode_id_pod_pair = get_querynode_id_pod_pairs("chaos-testing", label) + for node_id in m.keys(): + m[node_id]["node_name"] = querynode_id_pod_pair.get(node_id) + + table = Table(title=f"{collection_name} Channel Distribution Info") + table.width = 200 + table.add_column("Node ID", style="cyan") + table.add_column("Node Name", style="cyan") + table.add_column("Channel", style="cyan") + table.add_column("Segment ID", style="cyan") + for node_id, v in m.items(): + table.add_row( + str(node_id), + str(v["node_name"]), + "\n".join([str(x) for x in set(v["channel"])]), + "\n".join([str(x) for x in v["segment_id"]]) + ) + console = Console() + console.width = 300 + console.print(table) + return m + + +def _install_milvus(image_tag="master-latest"): + release_name = f"rg-test-{cf.gen_digits_by_length(6)}" + cus_configs = {'spec.mode': 'cluster', + 'spec.dependencies.msgStreamType': 'kafka', + 'spec.components.image': f'harbor.milvus.io/milvus/milvus:{image_tag}', + 'metadata.namespace': namespace, + 'metadata.name': release_name, + 'spec.components.proxy.serviceType': 'LoadBalancer', + 'spec.config.queryCoord.balancer': 'ChannelLevelScoreBalancer', + 'spec.config.queryCoord.channelExclusiveNodeFactor': 2 + } + milvus_op = MilvusOperator() + log.info(f"install milvus with configs: {cus_configs}") + milvus_op.install(cus_configs) + healthy = milvus_op.wait_for_healthy(release_name, namespace, timeout=1200) + log.info(f"milvus healthy: {healthy}") + if healthy: + endpoint = milvus_op.endpoint(release_name, namespace).split(':') + log.info(f"milvus endpoint: {endpoint}") + host = endpoint[0] + port = endpoint[1] + return release_name, host, port + else: + return release_name, None, None + + +class TestChannelExclusiveBalance(TestcaseBase): + + def teardown_method(self, method): + log.info(("*" * 35) + " teardown " + ("*" * 35)) + log.info("[teardown_method] Start teardown test case %s..." % method.__name__) + milvus_op = MilvusOperator() + milvus_op.uninstall(self.release_name, namespace) + connections.disconnect("default") + connections.remove_connection("default") + + def init_health_checkers(self, collection_name=None, shards_num=2): + c_name = collection_name + checkers = { + Op.insert: InsertChecker(collection_name=c_name, shards_num=shards_num), + Op.flush: FlushChecker(collection_name=c_name, shards_num=shards_num), + Op.upsert: UpsertChecker(collection_name=c_name, shards_num=shards_num), + Op.delete: DeleteChecker(collection_name=c_name, shards_num=shards_num), + } + self.health_checkers = checkers + + @pytest.mark.tags(CaseLabel.L3) + def test_channel_exclusive_balance_during_qn_scale_up(self, image_tag): + """ + steps + """ + milvus_op = MilvusOperator() + release_name, host, port = _install_milvus(image_tag=image_tag) + qn_num = 1 + milvus_op.scale(release_name, 'queryNode', qn_num, namespace) + self.release_name = release_name + assert host is not None + connections.connect("default", host=host, port=port) + etcd_endpoint = milvus_op.etcd_endpoints(release_name, namespace) + bw = BirdWatcher(etcd_endpoints=etcd_endpoint, root_path=release_name) + mil = MilvusSys(alias="default") + log.info(f"milvus build version: {mil.build_version}") + c_name = cf.gen_unique_str("Checker_") + self.init_health_checkers(collection_name=c_name) + c = Collection(name=c_name) + res = c.describe() + collection_id = res["collection_id"] + cc.start_monitor_threads(self.health_checkers) + seg_res = bw.show_segment_info(collection_id) + display_segment_distribution_info(c_name, release_name, segment_info=seg_res) + display_channel_on_qn_distribution_info(c_name, release_name, segment_info=seg_res) + log.info("*********************Load Start**********************") + request_duration = 360 + for i in range(10): + time.sleep(request_duration // 10) + for k, v in self.health_checkers.items(): + v.check_result() + qn_num += min(qn_num + 1, 8) + seg_res = bw.show_segment_info(collection_id) + display_segment_distribution_info(c_name, release_name, segment_info=seg_res) + display_channel_on_qn_distribution_info(c_name, release_name, segment_info=seg_res) + milvus_op.scale(release_name, 'queryNode', 8, namespace) + seg_res = bw.show_segment_info(collection_id) + display_segment_distribution_info(c_name, release_name, segment_info=seg_res) + res = display_channel_on_qn_distribution_info(c_name, release_name, segment_info=seg_res) + time.sleep(60) + ra = ResultAnalyzer() + ra.get_stage_success_rate() + assert_statistic(self.health_checkers) + for k, v in self.health_checkers.items(): + v.terminate() + time.sleep(60) + # in final state, channel exclusive balance is on, so all qn should have only one channel + for k, v in res.items(): + assert len(set(v["channel"])) == 1 + + + @pytest.mark.tags(CaseLabel.L3) + def test_channel_exclusive_balance_during_qn_scale_down(self, image_tag): + """ + steps + """ + milvus_op = MilvusOperator() + release_name, host, port = _install_milvus(image_tag=image_tag) + qn_num = 8 + milvus_op.scale(release_name, 'queryNode', qn_num, namespace) + self.release_name = release_name + assert host is not None + connections.connect("default", host=host, port=port) + etcd_endpoint = milvus_op.etcd_endpoints(release_name, namespace) + bw = BirdWatcher(etcd_endpoints=etcd_endpoint, root_path=release_name) + mil = MilvusSys(alias="default") + log.info(f"milvus build version: {mil.build_version}") + c_name = cf.gen_unique_str("Checker_") + self.init_health_checkers(collection_name=c_name) + c = Collection(name=c_name) + res = c.describe() + collection_id = res["collection_id"] + cc.start_monitor_threads(self.health_checkers) + seg_res = bw.show_segment_info(collection_id) + display_segment_distribution_info(c_name, release_name, segment_info=seg_res) + display_channel_on_qn_distribution_info(c_name, release_name, segment_info=seg_res) + log.info("*********************Load Start**********************") + request_duration = 360 + for i in range(10): + time.sleep(request_duration // 10) + for k, v in self.health_checkers.items(): + v.check_result() + qn_num = max(qn_num - 1, 3) + milvus_op.scale(release_name, 'queryNode', qn_num, namespace) + seg_res = bw.show_segment_info(collection_id) + display_segment_distribution_info(c_name, release_name, segment_info=seg_res) + display_channel_on_qn_distribution_info(c_name, release_name, segment_info=seg_res) + milvus_op.scale(release_name, 'queryNode', 1, namespace) + seg_res = bw.show_segment_info(collection_id) + display_segment_distribution_info(c_name, release_name, segment_info=seg_res) + res = display_channel_on_qn_distribution_info(c_name, release_name, segment_info=seg_res) + time.sleep(60) + ra = ResultAnalyzer() + ra.get_stage_success_rate() + assert_statistic(self.health_checkers) + for k, v in self.health_checkers.items(): + v.terminate() + time.sleep(60) + # shard num = 2, k = 2, qn_num = 3 + # in final state, channel exclusive balance is off, so all qn should have more than one channel + for k, v in res.items(): + assert len(set(v["channel"])) > 1 + + @pytest.mark.tags(CaseLabel.L3) + def test_channel_exclusive_balance_with_channel_num_is_1(self, image_tag): + """ + steps + """ + milvus_op = MilvusOperator() + release_name, host, port = _install_milvus(image_tag=image_tag) + qn_num = 1 + milvus_op.scale(release_name, 'queryNode', qn_num, namespace) + self.release_name = release_name + assert host is not None + connections.connect("default", host=host, port=port) + etcd_endpoint = milvus_op.etcd_endpoints(release_name, namespace) + bw = BirdWatcher(etcd_endpoints=etcd_endpoint, root_path=release_name) + mil = MilvusSys(alias="default") + log.info(f"milvus build version: {mil.build_version}") + c_name = cf.gen_unique_str("Checker_") + self.init_health_checkers(collection_name=c_name, shards_num=1) + c = Collection(name=c_name) + res = c.describe() + collection_id = res["collection_id"] + cc.start_monitor_threads(self.health_checkers) + seg_res = bw.show_segment_info(collection_id) + display_segment_distribution_info(c_name, release_name, segment_info=seg_res) + display_channel_on_qn_distribution_info(c_name, release_name, segment_info=seg_res) + log.info("*********************Load Start**********************") + request_duration = 360 + for i in range(10): + time.sleep(request_duration // 10) + for k, v in self.health_checkers.items(): + v.check_result() + qn_num = qn_num + 1 + qn_num = min(qn_num, 8) + milvus_op.scale(release_name, 'queryNode', qn_num, namespace) + seg_res = bw.show_segment_info(collection_id) + display_segment_distribution_info(c_name, release_name, segment_info=seg_res) + res = display_channel_on_qn_distribution_info(c_name, release_name, segment_info=seg_res) + for r in res: + assert len(set(r["channel"])) == 1 + milvus_op.scale(release_name, 'queryNode', 8, namespace) + seg_res = bw.show_segment_info(collection_id) + display_segment_distribution_info(c_name, release_name, segment_info=seg_res) + res = display_channel_on_qn_distribution_info(c_name, release_name, segment_info=seg_res) + time.sleep(60) + ra = ResultAnalyzer() + ra.get_stage_success_rate() + assert_statistic(self.health_checkers) + for k, v in self.health_checkers.items(): + v.terminate() + time.sleep(60) + + # since shard num is 1, so all qn should have only one channel, no matter what k is + for k, v in res.items(): + assert len(set(v["channel"])) == 1 + + @pytest.mark.tags(CaseLabel.L3) + def test_channel_exclusive_balance_after_k_increase(self, image_tag): + """ + steps + """ + milvus_op = MilvusOperator() + release_name, host, port = _install_milvus(image_tag=image_tag) + qn_num = 1 + milvus_op.scale(release_name, 'queryNode', qn_num, namespace) + self.release_name = release_name + assert host is not None + connections.connect("default", host=host, port=port) + etcd_endpoint = milvus_op.etcd_endpoints(release_name, namespace) + bw = BirdWatcher(etcd_endpoints=etcd_endpoint, root_path=release_name) + mil = MilvusSys(alias="default") + log.info(f"milvus build version: {mil.build_version}") + c_name = cf.gen_unique_str("Checker_") + self.init_health_checkers(collection_name=c_name) + c = Collection(name=c_name) + res = c.describe() + collection_id = res["collection_id"] + cc.start_monitor_threads(self.health_checkers) + seg_res = bw.show_segment_info(collection_id) + display_segment_distribution_info(c_name, release_name, segment_info=seg_res) + display_channel_on_qn_distribution_info(c_name, release_name, segment_info=seg_res) + log.info("*********************Load Start**********************") + request_duration = 360 + for i in range(10): + time.sleep(request_duration // 10) + for k, v in self.health_checkers.items(): + v.check_result() + qn_num = qn_num + 1 + qn_num = min(qn_num, 8) + if qn_num == 5: + config = { + "spec.config.queryCoord.channelExclusiveNodeFactor": 3 + } + milvus_op.upgrade(release_name, config, namespace) + milvus_op.scale(release_name, 'queryNode', qn_num, namespace) + seg_res = bw.show_segment_info(collection_id) + display_segment_distribution_info(c_name, release_name, segment_info=seg_res) + res = display_channel_on_qn_distribution_info(c_name, release_name, segment_info=seg_res) + if qn_num == 4: + # channel exclusive balance is on, so all qn should have only one channel + for r in res.values(): + assert len(set(r["channel"])) == 1 + if qn_num == 5: + # k is changed to 3 when qn_num is 5, + # channel exclusive balance is off, so all qn should have more than one channel + # wait for a while to make sure all qn have more than one channel + ready = False + t0 = time.time() + while not ready and time.time() - t0 < 180: + ready = True + for r in res.values(): + if len(set(r["channel"])) == 1: + ready = False + time.sleep(10) + res = display_channel_on_qn_distribution_info(c_name, release_name, segment_info=seg_res) + if qn_num == 6: + # channel exclusive balance is on, so all qn should have only one channel + ready = False + t0 = time.time() + while not ready and time.time() - t0 < 180: + ready = True + for r in res.values(): + if len(set(r["channel"])) != 1: + ready = False + time.sleep(10) + res = display_channel_on_qn_distribution_info(c_name, release_name, segment_info=seg_res) + milvus_op.scale(release_name, 'queryNode', 8, namespace) + seg_res = bw.show_segment_info(collection_id) + display_segment_distribution_info(c_name, release_name, segment_info=seg_res) + display_channel_on_qn_distribution_info(c_name, release_name, segment_info=seg_res) + time.sleep(60) + ra = ResultAnalyzer() + ra.get_stage_success_rate() + assert_statistic(self.health_checkers) + for k, v in self.health_checkers.items(): + v.terminate() + time.sleep(60) + + @pytest.mark.tags(CaseLabel.L3) + def test_channel_exclusive_balance_for_search_performance(self, image_tag): + """ + steps + """ + milvus_op = MilvusOperator() + release_name, host, port = _install_milvus(image_tag=image_tag) + qn_num = 1 + milvus_op.scale(release_name, 'queryNode', qn_num, namespace) + self.release_name = release_name + assert host is not None + connections.connect("default", host=host, port=port) + etcd_endpoint = milvus_op.etcd_endpoints(release_name, namespace) + bw = BirdWatcher(etcd_endpoints=etcd_endpoint, root_path=release_name) + mil = MilvusSys(alias="default") + log.info(f"milvus build version: {mil.build_version}") + c_name = cf.gen_unique_str("Checker_") + self.init_health_checkers(collection_name=c_name) + c = Collection(name=c_name) + res = c.describe() + collection_id = res["collection_id"] + cc.start_monitor_threads(self.health_checkers) + seg_res = bw.show_segment_info(collection_id) + display_segment_distribution_info(c_name, release_name, segment_info=seg_res) + display_channel_on_qn_distribution_info(c_name, release_name, segment_info=seg_res) + log.info("*********************Load Start**********************") + request_duration = 360 + for i in range(10): + time.sleep(request_duration // 10) + for k, v in self.health_checkers.items(): + v.check_result() + qn_num = qn_num + 1 + qn_num = min(qn_num, 8) + milvus_op.scale(release_name, 'queryNode', qn_num, namespace) + seg_res = bw.show_segment_info(collection_id) + display_segment_distribution_info(c_name, release_name, segment_info=seg_res) + display_channel_on_qn_distribution_info(c_name, release_name, segment_info=seg_res) + milvus_op.scale(release_name, 'queryNode', 8, namespace) + seg_res = bw.show_segment_info(collection_id) + display_segment_distribution_info(c_name, release_name, segment_info=seg_res) + display_channel_on_qn_distribution_info(c_name, release_name, segment_info=seg_res) + time.sleep(60) + ra = ResultAnalyzer() + ra.get_stage_success_rate() + assert_statistic(self.health_checkers) + for k, v in self.health_checkers.items(): + v.terminate() + time.sleep(60) diff --git a/tests/python_client/resource_group/test_resource_group.py b/tests/python_client/resource_group/test_resource_group.py new file mode 100644 index 0000000000000..0e4e448bd25dc --- /dev/null +++ b/tests/python_client/resource_group/test_resource_group.py @@ -0,0 +1,944 @@ +import pytest +import time +from typing import Union, List +from pymilvus import connections, utility, Collection +from pymilvus.client.constants import DEFAULT_RESOURCE_GROUP +from pymilvus.client.types import ResourceGroupConfig, ResourceGroupInfo +from utils.util_log import test_log as log +from base.client_base import TestcaseBase +from chaos.checker import (InsertChecker, + UpsertChecker, + SearchChecker, + HybridSearchChecker, + QueryChecker, + DeleteChecker, + Op, + ResultAnalyzer + ) +from chaos import chaos_commons as cc +from common import common_func as cf +from utils.util_k8s import get_querynode_id_pod_pairs +from common import common_type as ct +from customize.milvus_operator import MilvusOperator +from common.milvus_sys import MilvusSys +from common.common_type import CaseLabel +from chaos.chaos_commons import assert_statistic +from delayed_assert import assert_expectations + +namespace = 'chaos-testing' +prefix = "test_rg" + +from rich.table import Table +from rich.console import Console + + +def display_resource_group_info(info: Union[ResourceGroupInfo, List[ResourceGroupInfo]]): + table = Table(title="Resource Group Info") + table.width = 200 + table.add_column("Name", style="cyan") + table.add_column("Capacity", style="cyan") + table.add_column("Available Node", style="cyan") + table.add_column("Loaded Replica", style="cyan") + table.add_column("Outgoing Node", style="cyan") + table.add_column("Incoming Node", style="cyan") + table.add_column("Request", style="cyan") + table.add_column("Limit", style="cyan") + table.add_column("Nodes", style="cyan") + if isinstance(info, list): + for i in info: + table.add_row( + i.name, + str(i.capacity), + str(i.num_available_node), + str(i.num_loaded_replica), + str(i.num_outgoing_node), + str(i.num_incoming_node), + str(i.config.requests.node_num), + str(i.config.limits.node_num), + "\n".join([str(node.hostname) for node in i.nodes]) + ) + else: + table.add_row( + info.name, + str(info.capacity), + str(info.num_available_node), + str(info.num_loaded_replica), + str(info.num_outgoing_node), + str(info.num_incoming_node), + str(info.config.requests.node_num), + str(info.config.limits.node_num), + "\n".join([str(node.hostname) for node in info.nodes]) + ) + + console = Console() + console.width = 300 + console.print(table) + + +def display_segment_distribution_info(collection_name, release_name): + table = Table(title=f"{collection_name} Segment Distribution Info") + table.width = 200 + table.add_column("Segment ID", style="cyan") + table.add_column("Collection ID", style="cyan") + table.add_column("Partition ID", style="cyan") + table.add_column("Num Rows", style="cyan") + table.add_column("State", style="cyan") + table.add_column("Node ID", style="cyan") + table.add_column("Node Name", style="cyan") + res = utility.get_query_segment_info(collection_name) + label = f"app.kubernetes.io/instance={release_name}, app.kubernetes.io/component=querynode" + querynode_id_pod_pair = get_querynode_id_pod_pairs("chaos-testing", label) + + for r in res: + table.add_row( + str(r.segmentID), + str(r.collectionID), + str(r.partitionID), + str(r.num_rows), + str(r.state), + str(r.nodeIds), + str([querynode_id_pod_pair.get(node_id) for node_id in r.nodeIds]) + ) + console = Console() + console.width = 300 + console.print(table) + + +def list_all_resource_groups(): + rg_names = utility.list_resource_groups() + resource_groups = [] + for rg_name in rg_names: + resource_group = utility.describe_resource_group(rg_name) + resource_groups.append(resource_group) + display_resource_group_info(resource_groups) + + +def _install_milvus(image_tag="master-latest"): + release_name = f"rg-test-{cf.gen_digits_by_length(6)}" + cus_configs = {'spec.mode': 'cluster', + 'spec.dependencies.msgStreamType': 'kafka', + 'spec.components.image': f'harbor.milvus.io/milvus/milvus:{image_tag}', + 'metadata.namespace': namespace, + 'metadata.name': release_name, + 'spec.components.proxy.serviceType': 'LoadBalancer', + } + milvus_op = MilvusOperator() + log.info(f"install milvus with configs: {cus_configs}") + milvus_op.install(cus_configs) + healthy = milvus_op.wait_for_healthy(release_name, namespace, timeout=1200) + log.info(f"milvus healthy: {healthy}") + if healthy: + endpoint = milvus_op.endpoint(release_name, namespace).split(':') + log.info(f"milvus endpoint: {endpoint}") + host = endpoint[0] + port = endpoint[1] + return release_name, host, port + else: + return release_name, None, None + + +class TestResourceGroup(TestcaseBase): + + def teardown_method(self, method): + log.info(("*" * 35) + " teardown " + ("*" * 35)) + log.info("[teardown_method] Start teardown test case %s..." % method.__name__) + milvus_op = MilvusOperator() + milvus_op.uninstall(self.release_name, namespace) + connections.disconnect("default") + connections.remove_connection("default") + + @pytest.mark.tags(CaseLabel.L3) + def test_resource_group_scale_up(self, image_tag): + """ + steps + """ + milvus_op = MilvusOperator() + release_name, host, port = _install_milvus(image_tag=image_tag) + self.release_name = release_name + assert host is not None + connections.connect("default", host=host, port=port) + mil = MilvusSys(alias="default") + log.info(f"milvus build version: {mil.build_version}") + # create rg1 with request node_num=4, limit node_num=6 + name = cf.gen_unique_str("rg") + self.utility = utility + self.utility.create_resource_group(name, config=ResourceGroupConfig( + requests={"node_num": 4}, + limits={"node_num": 6}, + )) + # scale up rg1 to 8 nodes one by one + for replicas in range(1, 8): + milvus_op.scale(release_name, 'queryNode', replicas, namespace) + time.sleep(10) + # get querynode info + qn = mil.query_nodes + log.info(f"query node info: {len(qn)}") + resource_group = self.utility.describe_resource_group(name) + log.info(f"Resource group {name} info:\n {display_resource_group_info(resource_group)}") + list_all_resource_groups() + # assert the node in rg >= 4 + resource_group = self.utility.describe_resource_group(name) + assert resource_group.num_available_node >= 4 + + @pytest.mark.tags(CaseLabel.L3) + def test_resource_group_scale_down(self, image_tag): + """ + steps + """ + milvus_op = MilvusOperator() + release_name, host, port = _install_milvus(image_tag=image_tag) + milvus_op.scale(release_name, 'queryNode', 8, namespace) + self.release_name = release_name + assert host is not None + connections.connect("default", host=host, port=port) + mil = MilvusSys(alias="default") + log.info(f"milvus build version: {mil.build_version}") + # create rg1 with request node_num=4, limit node_num=6 + name = cf.gen_unique_str("rg") + self.utility = utility + self.utility.create_resource_group(name, config=ResourceGroupConfig( + requests={"node_num": 4}, + limits={"node_num": 6}, + )) + # scale down rg1 from 8 to 1 node one by one + for replicas in range(8, 1, -1): + milvus_op.scale(release_name, 'queryNode', replicas, namespace) + time.sleep(10) + resource_group = self.utility.describe_resource_group(name) + log.info(f"Resource group {name} info:\n {display_resource_group_info(resource_group)}") + list_all_resource_groups() + # assert the node in rg <= 1 + resource_group = self.utility.describe_resource_group(name) + assert resource_group.num_available_node <= 1 + + @pytest.mark.tags(CaseLabel.L3) + def test_resource_group_all_querynode_add_into_two_different_config_rg(self, image_tag): + """ + steps + """ + milvus_op = MilvusOperator() + release_name, host, port = _install_milvus(image_tag=image_tag) + milvus_op.scale(release_name, 'queryNode', 8, namespace) + self.release_name = release_name + assert host is not None + connections.connect("default", host=host, port=port) + mil = MilvusSys(alias="default") + log.info(f"milvus build version: {mil.build_version}") + rg_list = [] + # create rg1 with request node_num=4, limit node_num=6 + + name = cf.gen_unique_str("rg") + self.utility = utility + self.utility.create_resource_group(name, config=ResourceGroupConfig( + requests={"node_num": 4}, + limits={"node_num": 6}, + )) + rg_list.append(name) + name = cf.gen_unique_str("rg") + self.utility = utility + self.utility.create_resource_group(name, config=ResourceGroupConfig( + requests={"node_num": 3}, + limits={"node_num": 6}, + )) + rg_list.append(name) + # assert two rg satisfy the request node_num + list_all_resource_groups() + for rg in rg_list: + resource_group = self.utility.describe_resource_group(rg) + assert resource_group.num_available_node >= resource_group.config.requests.node_num + + # scale down rg1 from 8 to 1 node one by one + for replicas in range(8, 1, -1): + milvus_op.scale(release_name, 'queryNode', replicas, namespace) + time.sleep(10) + for name in rg_list: + resource_group = self.utility.describe_resource_group(name) + log.info(f"Resource group {name} info:\n {display_resource_group_info(resource_group)}") + list_all_resource_groups() + + @pytest.mark.tags(CaseLabel.L3) + def test_resource_group_querynode_add_into_two_different_config_rg_one_by_one(self, image_tag): + """ + steps + """ + milvus_op = MilvusOperator() + release_name, host, port = _install_milvus(image_tag=image_tag) + self.release_name = release_name + assert host is not None + connections.connect("default", host=host, port=port) + mil = MilvusSys(alias="default") + log.info(f"milvus build version: {mil.build_version}") + rg_list = [] + # create rg1 with request node_num=4, limit node_num=6 + name = cf.gen_unique_str("rg") + self.utility = utility + self.utility.create_resource_group(name, config=ResourceGroupConfig( + requests={"node_num": 4}, + limits={"node_num": 6}, + )) + rg_list.append(name) + + name = cf.gen_unique_str("rg") + self.utility = utility + self.utility.create_resource_group(name, config=ResourceGroupConfig( + requests={"node_num": 3}, + limits={"node_num": 6}, + )) + rg_list.append(name) + for replicas in range(1, 8): + milvus_op.scale(release_name, 'queryNode', replicas, namespace) + time.sleep(10) + list_all_resource_groups() + + for rg in rg_list: + resource_group = self.utility.describe_resource_group(rg) + assert resource_group.num_available_node >= resource_group.config.requests.node_num + # scale down rg1 from 8 to 1 node one by one + for replicas in range(8, 1, -1): + milvus_op.scale(release_name, 'queryNode', replicas, namespace) + time.sleep(10) + list_all_resource_groups() + for rg in rg_list: + resource_group = self.utility.describe_resource_group(rg) + assert resource_group.num_available_node >= 1 + + + @pytest.mark.tags(CaseLabel.L3) + def test_resource_group_querynode_add_into_new_rg(self, image_tag): + """ + steps + """ + milvus_op = MilvusOperator() + release_name, host, port = _install_milvus(image_tag=image_tag) + + self.release_name = release_name + milvus_op.scale(release_name, 'queryNode', 10, namespace) + assert host is not None + connections.connect("default", host=host, port=port) + mil = MilvusSys(alias="default") + log.info(f"milvus build version: {mil.build_version}") + rg_list = [] + # create rg1 with request node_num=4, limit node_num=6 + name = cf.gen_unique_str("rg") + self.utility = utility + self.utility.create_resource_group(name, config=ResourceGroupConfig( + requests={"node_num": 4}, + limits={"node_num": 6}, + )) + rg_list.append(name) + for rg in rg_list: + resource_group = self.utility.describe_resource_group(rg) + assert resource_group.num_available_node >= resource_group.config.requests.node_num + + # create a new rg with request node_num=3, limit node_num=6 + # the querynode will be added into the new rg from default rg + name = cf.gen_unique_str("rg") + self.utility = utility + self.utility.create_resource_group(name, config=ResourceGroupConfig( + requests={"node_num": 3}, + limits={"node_num": 6}, + )) + rg_list.append(name) + list_all_resource_groups() + for rg in rg_list: + resource_group = self.utility.describe_resource_group(rg) + assert resource_group.num_available_node >= resource_group.config.requests.node_num + + @pytest.mark.tags(CaseLabel.L3) + def test_resource_group_with_two_rg_link_to_each_other_when_all_not_reached_to_request(self, image_tag): + """ + steps + """ + milvus_op = MilvusOperator() + release_name, host, port = _install_milvus(image_tag=image_tag) + self.release_name = release_name + assert host is not None + connections.connect("default", host=host, port=port) + mil = MilvusSys(alias="default") + log.info(f"milvus build version: {mil.build_version}") + milvus_op.scale(release_name, 'queryNode', 8, namespace) + utility.update_resource_groups( + {DEFAULT_RESOURCE_GROUP: ResourceGroupConfig(requests={"node_num": 0}, limits={"node_num": 1})}) + # create rg1 with request node_num=4, limit node_num=6 + name = cf.gen_unique_str("rg") + rg1_name = name + self.utility = utility + self.utility.create_resource_group(name, config=ResourceGroupConfig( + requests={"node_num": 4}, + limits={"node_num": 6}, + )) + name = cf.gen_unique_str("rg") + rg2_name = name + self.utility = utility + self.utility.create_resource_group(name, config=ResourceGroupConfig( + requests={"node_num": 4}, + limits={"node_num": 6}, + )) + list_all_resource_groups() + log.info("update resource group") + utility.update_resource_groups( + {rg1_name: ResourceGroupConfig(requests={"node_num": 6}, + limits={"node_num": 8}, + transfer_from=[{"resource_group": rg2_name}], + transfer_to=[{"resource_group": rg2_name}], )}) + time.sleep(10) + list_all_resource_groups() + utility.update_resource_groups( + {rg2_name: ResourceGroupConfig(requests={"node_num": 6}, + limits={"node_num": 8}, + transfer_from=[{"resource_group": rg1_name}], + transfer_to=[{"resource_group": rg1_name}], )}) + time.sleep(10) + list_all_resource_groups() + # no querynode was transferred between rg1 and rg2 + resource_group = self.utility.describe_resource_group(rg1_name) + assert resource_group.num_available_node == 4 + resource_group = self.utility.describe_resource_group(rg2_name) + assert resource_group.num_available_node == 4 + + @pytest.mark.tags(CaseLabel.L3) + def test_resource_group_with_rg_transfer_from_non_default_rg(self, image_tag): + """ + steps + """ + milvus_op = MilvusOperator() + release_name, host, port = _install_milvus(image_tag=image_tag) + self.release_name = release_name + assert host is not None + connections.connect("default", host=host, port=port) + mil = MilvusSys(alias="default") + log.info(f"milvus build version: {mil.build_version}") + milvus_op.scale(release_name, 'queryNode', 15, namespace) + utility.update_resource_groups( + {DEFAULT_RESOURCE_GROUP: ResourceGroupConfig(requests={"node_num": 0}, limits={"node_num": 3})}) + # create rg1 with request node_num=4, limit node_num=6 + name = cf.gen_unique_str("rg") + rg1_name = name + self.utility = utility + self.utility.create_resource_group(name, config=ResourceGroupConfig( + requests={"node_num": 2}, + limits={"node_num": 2}, + )) + name = cf.gen_unique_str("rg") + rg2_name = name + self.utility = utility + self.utility.create_resource_group(name, config=ResourceGroupConfig( + requests={"node_num": 6}, + limits={"node_num": 10}, + )) + list_all_resource_groups() + rg2_available_node_before = self.utility.describe_resource_group(rg2_name).num_available_node + log.info("update resource group") + utility.update_resource_groups( + {rg1_name: ResourceGroupConfig(requests={"node_num": 4}, + limits={"node_num": 6}, + transfer_from=[{"resource_group": rg2_name}], + transfer_to=[{"resource_group": rg2_name}], )}) + time.sleep(10) + list_all_resource_groups() + # expect qn in rg 1 transfer from rg2 not the default rg + rg2_available_node_after = self.utility.describe_resource_group(rg2_name).num_available_node + assert rg2_available_node_before > rg2_available_node_after + + @pytest.mark.tags(CaseLabel.L3) + def test_resource_group_with_rg_transfer_to_non_default_rg(self, image_tag): + """ + steps + """ + milvus_op = MilvusOperator() + release_name, host, port = _install_milvus(image_tag=image_tag) + self.release_name = release_name + assert host is not None + connections.connect("default", host=host, port=port) + mil = MilvusSys(alias="default") + log.info(f"milvus build version: {mil.build_version}") + milvus_op.scale(release_name, 'queryNode', 10, namespace) + utility.update_resource_groups( + {DEFAULT_RESOURCE_GROUP: ResourceGroupConfig(requests={"node_num": 0}, limits={"node_num": 10})}) + # create rg1 with request node_num=4, limit node_num=6 + name = cf.gen_unique_str("rg") + rg1_name = name + self.utility = utility + self.utility.create_resource_group(name, config=ResourceGroupConfig( + requests={"node_num": 2}, + limits={"node_num": 10}, + )) + name = cf.gen_unique_str("rg") + rg2_name = name + self.utility = utility + self.utility.create_resource_group(name, config=ResourceGroupConfig( + requests={"node_num": 4}, + limits={"node_num": 4}, + )) + list_all_resource_groups() + rg1_node_available_before = self.utility.describe_resource_group(rg1_name).num_available_node + log.info("update resource group") + utility.update_resource_groups( + {rg2_name: ResourceGroupConfig(requests={"node_num": 2}, + limits={"node_num": 2}, + transfer_from=[{"resource_group": rg1_name}], + transfer_to=[{"resource_group": rg1_name}], )}) + time.sleep(10) + list_all_resource_groups() + # expect qn in rg 2 transfer to rg1 not the default rg + rg1_node_available_after = self.utility.describe_resource_group(rg1_name).num_available_node + assert rg1_node_available_after > rg1_node_available_before + + + @pytest.mark.tags(CaseLabel.L3) + def test_resource_group_with_rg_transfer_with_rg_list(self, image_tag): + """ + steps + """ + milvus_op = MilvusOperator() + release_name, host, port = _install_milvus(image_tag=image_tag) + self.release_name = release_name + assert host is not None + connections.connect("default", host=host, port=port) + mil = MilvusSys(alias="default") + log.info(f"milvus build version: {mil.build_version}") + milvus_op.scale(release_name, 'queryNode', 12, namespace) + utility.update_resource_groups( + {DEFAULT_RESOURCE_GROUP: ResourceGroupConfig(requests={"node_num": 0}, limits={"node_num": 1})}) + # create rg1 with request node_num=4, limit node_num=6 + name = cf.gen_unique_str("rg") + source_rg = name + self.utility = utility + self.utility.create_resource_group(name, config=ResourceGroupConfig( + requests={"node_num": 1}, + limits={"node_num": 1}, + )) + name = cf.gen_unique_str("rg") + small_rg = name + self.utility = utility + self.utility.create_resource_group(name, config=ResourceGroupConfig( + requests={"node_num": 2}, + limits={"node_num": 4}, + )) + name = cf.gen_unique_str("rg") + big_rg = name + self.utility = utility + self.utility.create_resource_group(name, config=ResourceGroupConfig( + requests={"node_num": 3}, + limits={"node_num": 6}, + )) + list_all_resource_groups() + small_rg_node_available_before = self.utility.describe_resource_group(small_rg).num_available_node + big_rg_node_available_before = self.utility.describe_resource_group(big_rg).num_available_node + log.info("update resource group") + utility.update_resource_groups( + {source_rg: ResourceGroupConfig(requests={"node_num": 6}, + limits={"node_num": 6}, + transfer_from=[{"resource_group": small_rg}, {"resource_group": big_rg}], + )}) + time.sleep(10) + list_all_resource_groups() + # expect source rg transfer from small rg and big rg + small_rg_node_available_after = self.utility.describe_resource_group(small_rg).num_available_node + big_rg_node_available_after = self.utility.describe_resource_group(big_rg).num_available_node + assert (small_rg_node_available_before + big_rg_node_available_before > small_rg_node_available_after + + big_rg_node_available_after) + + +class TestReplicasManagement(TestcaseBase): + + def teardown_method(self, method): + log.info(("*" * 35) + " teardown " + ("*" * 35)) + log.info("[teardown_method] Start teardown test case %s..." % method.__name__) + milvus_op = MilvusOperator() + milvus_op.uninstall(self.release_name, namespace) + connections.disconnect("default") + connections.remove_connection("default") + + @pytest.mark.tags(CaseLabel.L3) + def test_load_replicas_one_collection_multi_replicas_to_multi_rg(self, image_tag): + """ + steps + """ + milvus_op = MilvusOperator() + release_name, host, port = _install_milvus(image_tag=image_tag) + milvus_op.scale(release_name, 'queryNode', 12, namespace) + self.release_name = release_name + assert host is not None + connections.connect("default", host=host, port=port) + mil = MilvusSys(alias="default") + log.info(f"milvus build version: {mil.build_version}") + resource_groups = [] + for i in range(4): + name = cf.gen_unique_str("rg") + self.utility = utility + self.utility.create_resource_group(name, config=ResourceGroupConfig( + requests={"node_num": 2}, + limits={"node_num": 6}, + )) + resource_groups.append(name) + list_all_resource_groups() + + # create collection and load with 2 replicase + self.skip_connection = True + collection_w, vectors = self.init_collection_general(prefix, insert_data=True, + enable_dynamic_field=True)[0:2] + collection_w.release() + log.info(f"resource groups: {resource_groups}") + collection_w.load(replica_number=len(resource_groups), _resource_groups=resource_groups) + list_all_resource_groups() + + # list replicas + replicas = collection_w.get_replicas() + log.info(f"replicas: {replicas}") + rg_to_scale_down = resource_groups[0] + # scale down a rg to 1 node + self.utility.update_resource_groups( + {rg_to_scale_down: ResourceGroupConfig(requests={"node_num": 1}, + limits={"node_num": 1}, )} + ) + + list_all_resource_groups() + replicas = collection_w.get_replicas() + log.info(f"replicas: {replicas}") + # scale down a rg t0 0 node + self.utility.update_resource_groups( + {rg_to_scale_down: ResourceGroupConfig(requests={"node_num": 0}, + limits={"node_num": 0}, )} + ) + list_all_resource_groups() + replicas = collection_w.get_replicas() + log.info(f"replicas: {replicas}") + + @pytest.mark.tags(CaseLabel.L3) + def test_load_multi_collection_multi_replicas_to_multi_rg(self, image_tag): + """ + steps + """ + milvus_op = MilvusOperator() + release_name, host, port = _install_milvus(image_tag=image_tag) + milvus_op.scale(release_name, 'queryNode', 12, namespace) + self.release_name = release_name + assert host is not None + connections.connect("default", host=host, port=port) + mil = MilvusSys(alias="default") + log.info(f"milvus build version: {mil.build_version}") + # create two rg with request node_num=4, limit node_num=6 + resource_groups = [] + for i in range(3): + name = cf.gen_unique_str("rg") + self.utility = utility + self.utility.create_resource_group(name, config=ResourceGroupConfig( + requests={"node_num": 3}, + limits={"node_num": 6}, + )) + resource_groups.append(name) + log.info(f"resource groups: {resource_groups}") + list_all_resource_groups() + col_list = [] + # create collection and load with multi replicase + self.skip_connection = True + for i in range(3): + prefix = cf.gen_unique_str("test_rg") + collection_w, vectors = self.init_collection_general(prefix, insert_data=True, + enable_dynamic_field=True)[0:2] + collection_w.release() + col_list.append(collection_w) + collection_w.load(replica_number=len(resource_groups), _resource_groups=resource_groups) + list_all_resource_groups() + + # list replicas + for col in col_list: + replicas = col.get_replicas() + log.info(f"replicas: {replicas}") + + @pytest.mark.tags(CaseLabel.L3) + def test_load_multi_collection_one_replicas_to_multi_rg(self, image_tag): + """ + steps + """ + milvus_op = MilvusOperator() + release_name, host, port = _install_milvus(image_tag=image_tag) + milvus_op.scale(release_name, 'queryNode', 12, namespace) + self.release_name = release_name + assert host is not None + connections.connect("default", host=host, port=port) + mil = MilvusSys(alias="default") + log.info(f"milvus build version: {mil.build_version}") + # create two rg with request node_num=4, limit node_num=6 + resource_groups = [] + for i in range(3): + name = cf.gen_unique_str("rg") + self.utility = utility + self.utility.create_resource_group(name, config=ResourceGroupConfig( + requests={"node_num": 3}, + limits={"node_num": 6}, + )) + resource_groups.append(name) + log.info(f"resource groups: {resource_groups}") + list_all_resource_groups() + col_list = [] + # create collection and load with multi replicase + self.skip_connection = True + for i in range(3): + prefix = cf.gen_unique_str("test_rg") + collection_w, vectors = self.init_collection_general(prefix, insert_data=True, + enable_dynamic_field=True)[0:2] + collection_w.release() + col_list.append(collection_w) + collection_w.load(replica_number=1, _resource_groups=resource_groups) + list_all_resource_groups() + + # list replicas + for col in col_list: + replicas = col.get_replicas() + log.info(f"replicas: {replicas}") + + @pytest.mark.tags(CaseLabel.L3) + def test_transfer_replicas_to_other_rg(self, image_tag): + """ + steps + """ + milvus_op = MilvusOperator() + release_name, host, port = _install_milvus(image_tag=image_tag) + milvus_op.scale(release_name, 'queryNode', 12, namespace) + self.release_name = release_name + assert host is not None + connections.connect("default", host=host, port=port) + mil = MilvusSys(alias="default") + log.info(f"milvus build version: {mil.build_version}") + # create two rg with request node_num=4, limit node_num=6 + resource_groups = [] + for i in range(3): + name = cf.gen_unique_str("rg") + self.utility = utility + self.utility.create_resource_group(name, config=ResourceGroupConfig( + requests={"node_num": 3}, + limits={"node_num": 6}, + )) + resource_groups.append(name) + log.info(f"resource groups: {resource_groups}") + list_all_resource_groups() + col_list = [] + # create collection and load with multi replicase + self.skip_connection = True + for i in range(3): + prefix = cf.gen_unique_str("test_rg") + collection_w, vectors = self.init_collection_general(prefix, insert_data=True, + enable_dynamic_field=True)[0:2] + collection_w.release() + col_list.append(collection_w) + collection_w.load(replica_number=1, _resource_groups=[resource_groups[i]]) + list_all_resource_groups() + # list replicas + for col in col_list: + replicas = col.get_replicas() + log.info(f"replicas: {replicas}") + + # transfer replicas to default rg + self.utility.transfer_replica(source_group=resource_groups[0], target_group=DEFAULT_RESOURCE_GROUP, + collection_name=col_list[0].name, num_replicas=1) + + list_all_resource_groups() + # list replicas + for col in col_list: + replicas = col.get_replicas() + log.info(f"replicas: {replicas}") + + +class TestServiceAvailableDuringScale(TestcaseBase): + + def init_health_checkers(self, collection_name=None): + c_name = collection_name + shards_num = 5 + checkers = { + Op.insert: InsertChecker(collection_name=c_name, shards_num=shards_num), + Op.upsert: UpsertChecker(collection_name=c_name, shards_num=shards_num), + Op.search: SearchChecker(collection_name=c_name, shards_num=shards_num), + Op.hybrid_search: HybridSearchChecker(collection_name=c_name, shards_num=shards_num), + Op.query: QueryChecker(collection_name=c_name, shards_num=shards_num), + Op.delete: DeleteChecker(collection_name=c_name, shards_num=shards_num), + } + self.health_checkers = checkers + + def teardown_method(self, method): + log.info(("*" * 35) + " teardown " + ("*" * 35)) + log.info("[teardown_method] Start teardown test case %s..." % method.__name__) + milvus_op = MilvusOperator() + milvus_op.uninstall(self.release_name, namespace) + connections.disconnect("default") + connections.remove_connection("default") + + def test_service_available_during_scale_up(self, image_tag): + """ + steps + """ + milvus_op = MilvusOperator() + release_name, host, port = _install_milvus(image_tag=image_tag) + milvus_op.scale(release_name, 'queryNode', 3, namespace) + self.release_name = release_name + assert host is not None + connections.connect("default", host=host, port=port) + mil = MilvusSys(alias="default") + log.info(f"milvus build version: {mil.build_version}") + utility.update_resource_groups( + {DEFAULT_RESOURCE_GROUP: ResourceGroupConfig(requests={"node_num": 0}, limits={"node_num": 10})}) + # create rg + resource_groups = [] + name = cf.gen_unique_str("rg") + self.utility = utility + self.utility.create_resource_group(name, config=ResourceGroupConfig( + requests={"node_num": 1}, + limits={"node_num": 1}, + )) + resource_groups.append(name) + list_all_resource_groups() + c_name = cf.gen_unique_str("Checker_") + self.init_health_checkers(collection_name=c_name) + # load collection to non default rg + self.health_checkers[Op.search].c_wrap.release() + self.health_checkers[Op.search].c_wrap.load(_resource_groups=resource_groups) + cc.start_monitor_threads(self.health_checkers) + log.info("*********************Load Start**********************") + request_duration = 360 + for i in range(10): + time.sleep(request_duration//10) + for k, v in self.health_checkers.items(): + v.check_result() + # scale up querynode when progress is 3/10 + if i == 3: + utility.update_resource_groups( + {name: ResourceGroupConfig(requests={"node_num": 2}, limits={"node_num": 2})}) + log.info(f"scale up querynode in rg {name} from 1 to 2") + list_all_resource_groups() + display_segment_distribution_info(c_name, release_name) + time.sleep(60) + ra = ResultAnalyzer() + ra.get_stage_success_rate() + assert_statistic(self.health_checkers) + for k, v in self.health_checkers.items(): + v.terminate() + + def test_service_available_during_scale_down(self, image_tag): + """ + steps + """ + milvus_op = MilvusOperator() + release_name, host, port = _install_milvus(image_tag=image_tag) + milvus_op.scale(release_name, 'queryNode', 3, namespace) + self.release_name = release_name + assert host is not None + connections.connect("default", host=host, port=port) + mil = MilvusSys(alias="default") + log.info(f"milvus build version: {mil.build_version}") + utility.update_resource_groups( + {DEFAULT_RESOURCE_GROUP: ResourceGroupConfig(requests={"node_num": 0}, limits={"node_num": 5})}) + # create rg + resource_groups = [] + name = cf.gen_unique_str("rg") + self.utility = utility + self.utility.create_resource_group(name, config=ResourceGroupConfig( + requests={"node_num": 2}, + limits={"node_num": 2}, + )) + resource_groups.append(name) + list_all_resource_groups() + c_name = cf.gen_unique_str("Checker_") + self.init_health_checkers(collection_name=c_name) + # load collection to non default rg + self.health_checkers[Op.search].c_wrap.release() + self.health_checkers[Op.search].c_wrap.load(_resource_groups=resource_groups) + cc.start_monitor_threads(self.health_checkers) + list_all_resource_groups() + log.info("*********************Load Start**********************") + request_duration = 360 + for i in range(10): + time.sleep(request_duration//10) + for k, v in self.health_checkers.items(): + v.check_result() + # scale down querynode in rg when progress is 3/10 + if i == 3: + list_all_resource_groups() + utility.update_resource_groups( + {name: ResourceGroupConfig(requests={"node_num": 1}, limits={"node_num": 1})}) + log.info(f"scale down querynode in rg {name} from 2 to 1") + list_all_resource_groups() + time.sleep(60) + ra = ResultAnalyzer() + ra.get_stage_success_rate() + assert_statistic(self.health_checkers) + for k, v in self.health_checkers.items(): + v.terminate() + + +class TestServiceAvailableDuringTransferReplicas(TestcaseBase): + + def init_health_checkers(self, collection_name=None): + c_name = collection_name + shards_num = 5 + checkers = { + Op.insert: InsertChecker(collection_name=c_name, shards_num=shards_num), + Op.upsert: UpsertChecker(collection_name=c_name, shards_num=shards_num), + Op.search: SearchChecker(collection_name=c_name, shards_num=shards_num), + Op.hybrid_search: HybridSearchChecker(collection_name=c_name, shards_num=shards_num), + Op.query: QueryChecker(collection_name=c_name, shards_num=shards_num), + Op.delete: DeleteChecker(collection_name=c_name, shards_num=shards_num), + } + self.health_checkers = checkers + + def teardown_method(self, method): + log.info(("*" * 35) + " teardown " + ("*" * 35)) + log.info("[teardown_method] Start teardown test case %s..." % method.__name__) + milvus_op = MilvusOperator() + milvus_op.uninstall(self.release_name, namespace) + connections.disconnect("default") + connections.remove_connection("default") + + def test_service_available_during_transfer_replicas(self, image_tag): + """ + steps + """ + milvus_op = MilvusOperator() + release_name, host, port = _install_milvus(image_tag=image_tag) + milvus_op.scale(release_name, 'queryNode', 5, namespace) + self.release_name = release_name + assert host is not None + connections.connect("default", host=host, port=port) + mil = MilvusSys(alias="default") + log.info(f"milvus build version: {mil.build_version}") + utility.update_resource_groups( + {DEFAULT_RESOURCE_GROUP: ResourceGroupConfig(requests={"node_num": 0}, limits={"node_num": 10})}) + # create rg + resource_groups = [] + for i in range(2): + name = cf.gen_unique_str("rg") + self.utility = utility + self.utility.create_resource_group(name, config=ResourceGroupConfig( + requests={"node_num": 1}, + limits={"node_num": 1}, + )) + resource_groups.append(name) + list_all_resource_groups() + c_name = cf.gen_unique_str("Checker_") + self.init_health_checkers(collection_name=c_name) + self.health_checkers[Op.search].c_wrap.release() + self.health_checkers[Op.search].c_wrap.load(_resource_groups=resource_groups[0:1]) + cc.start_monitor_threads(self.health_checkers) + list_all_resource_groups() + display_segment_distribution_info(c_name, release_name) + log.info("*********************Load Start**********************") + request_duration = 360 + for i in range(10): + time.sleep(request_duration//10) + for k, v in self.health_checkers.items(): + v.check_result() + # transfer replicas from default to another + if i == 3: + # transfer replicas from default rg to another rg + list_all_resource_groups() + display_segment_distribution_info(c_name, release_name) + self.utility.transfer_replica(source_group=resource_groups[0], target_group=resource_groups[1], + collection_name=c_name, num_replicas=1) + list_all_resource_groups() + display_segment_distribution_info(c_name, release_name) + time.sleep(60) + ra = ResultAnalyzer() + ra.get_stage_success_rate() + assert_statistic(self.health_checkers) + for k, v in self.health_checkers.items(): + v.terminate() diff --git a/tests/python_client/testcases/test_bulk_insert.py b/tests/python_client/testcases/test_bulk_insert.py index 9e1a4ae0bc5e3..1270efb82282f 100644 --- a/tests/python_client/testcases/test_bulk_insert.py +++ b/tests/python_client/testcases/test_bulk_insert.py @@ -828,7 +828,8 @@ def test_with_all_field_json(self, auto_id, dim, entities, enable_dynamic_field) @pytest.mark.parametrize("dim", [128]) # 128 @pytest.mark.parametrize("entities", [1000]) # 1000 @pytest.mark.parametrize("enable_dynamic_field", [True]) - def test_bulk_insert_all_field_with_new_json_format(self, auto_id, dim, entities, enable_dynamic_field): + @pytest.mark.parametrize("enable_partition_key", [True, False]) + def test_bulk_insert_all_field_with_new_json_format(self, auto_id, dim, entities, enable_dynamic_field, enable_partition_key): """ collection schema 1: [pk, int64, float64, string float_vector] data file: vectors.npy and uid.npy, @@ -841,7 +842,7 @@ def test_bulk_insert_all_field_with_new_json_format(self, auto_id, dim, entities cf.gen_int64_field(name=df.pk_field, is_primary=True, auto_id=auto_id), cf.gen_int64_field(name=df.int_field), cf.gen_float_field(name=df.float_field), - cf.gen_string_field(name=df.string_field), + cf.gen_string_field(name=df.string_field, is_partition_key=enable_partition_key), cf.gen_json_field(name=df.json_field), cf.gen_array_field(name=df.array_int_field, element_type=DataType.INT64), cf.gen_array_field(name=df.array_float_field, element_type=DataType.FLOAT), @@ -945,16 +946,23 @@ def test_bulk_insert_all_field_with_new_json_format(self, auto_id, dim, entities if enable_dynamic_field: assert "name" in fields_from_search assert "address" in fields_from_search - - + # query data + res, _ = self.collection_wrap.query(expr=f"{df.string_field} >= '0'", output_fields=[df.string_field]) + assert len(res) == entities + query_data = [r[df.string_field] for r in res][:len(self.collection_wrap.partitions)] + res, _ = self.collection_wrap.query(expr=f"{df.string_field} in {query_data}", output_fields=[df.string_field]) + assert len(res) == len(query_data) + if enable_partition_key: + assert len(self.collection_wrap.partitions) > 1 @pytest.mark.tags(CaseLabel.L3) @pytest.mark.parametrize("auto_id", [True, False]) @pytest.mark.parametrize("dim", [128]) # 128 @pytest.mark.parametrize("entities", [1000]) # 1000 @pytest.mark.parametrize("enable_dynamic_field", [True, False]) + @pytest.mark.parametrize("enable_partition_key", [True, False]) @pytest.mark.parametrize("include_meta", [True, False]) - def test_bulk_insert_all_field_with_numpy(self, auto_id, dim, entities, enable_dynamic_field, include_meta): + def test_bulk_insert_all_field_with_numpy(self, auto_id, dim, entities, enable_dynamic_field, enable_partition_key, include_meta): """ collection schema 1: [pk, int64, float64, string float_vector] data file: vectors.npy and uid.npy, @@ -970,7 +978,7 @@ def test_bulk_insert_all_field_with_numpy(self, auto_id, dim, entities, enable_d cf.gen_int64_field(name=df.pk_field, is_primary=True, auto_id=auto_id), cf.gen_int64_field(name=df.int_field), cf.gen_float_field(name=df.float_field), - cf.gen_string_field(name=df.string_field), + cf.gen_string_field(name=df.string_field, is_partition_key=enable_partition_key), cf.gen_json_field(name=df.json_field), cf.gen_float_vec_field(name=df.float_vec_field, dim=dim), # cf.gen_float_vec_field(name=df.image_float_vec_field, dim=dim), @@ -1072,14 +1080,25 @@ def test_bulk_insert_all_field_with_numpy(self, auto_id, dim, entities, enable_d if enable_dynamic_field and include_meta: assert "name" in fields_from_search assert "address" in fields_from_search + # query data + res, _ = self.collection_wrap.query(expr=f"{df.string_field} >= '0'", output_fields=[df.string_field]) + assert len(res) == entities + query_data = [r[df.string_field] for r in res][:len(self.collection_wrap.partitions)] + res, _ = self.collection_wrap.query(expr=f"{df.string_field} in {query_data}", output_fields=[df.string_field]) + assert len(res) == len(query_data) + if enable_partition_key: + assert len(self.collection_wrap.partitions) > 1 + + @pytest.mark.tags(CaseLabel.L3) @pytest.mark.parametrize("auto_id", [True, False]) @pytest.mark.parametrize("dim", [128]) # 128 @pytest.mark.parametrize("entities", [1000]) # 1000 @pytest.mark.parametrize("enable_dynamic_field", [True, False]) + @pytest.mark.parametrize("enable_partition_key", [True, False]) @pytest.mark.parametrize("include_meta", [True, False]) - def test_bulk_insert_all_field_with_parquet(self, auto_id, dim, entities, enable_dynamic_field, include_meta): + def test_bulk_insert_all_field_with_parquet(self, auto_id, dim, entities, enable_dynamic_field, enable_partition_key, include_meta): """ collection schema 1: [pk, int64, float64, string float_vector] data file: vectors.parquet and uid.parquet, @@ -1094,15 +1113,13 @@ def test_bulk_insert_all_field_with_parquet(self, auto_id, dim, entities, enable cf.gen_int64_field(name=df.pk_field, is_primary=True, auto_id=auto_id), cf.gen_int64_field(name=df.int_field), cf.gen_float_field(name=df.float_field), - cf.gen_string_field(name=df.string_field), + cf.gen_string_field(name=df.string_field, is_partition_key=enable_partition_key), cf.gen_json_field(name=df.json_field), cf.gen_array_field(name=df.array_int_field, element_type=DataType.INT64), cf.gen_array_field(name=df.array_float_field, element_type=DataType.FLOAT), cf.gen_array_field(name=df.array_string_field, element_type=DataType.VARCHAR, max_length=100), cf.gen_array_field(name=df.array_bool_field, element_type=DataType.BOOL), cf.gen_float_vec_field(name=df.float_vec_field, dim=dim), - # cf.gen_float_vec_field(name=df.image_float_vec_field, dim=dim), - # cf.gen_float_vec_field(name=df.text_float_vec_field, dim=dim), cf.gen_binary_vec_field(name=df.binary_vec_field, dim=dim), cf.gen_bfloat16_vec_field(name=df.bf16_vec_field, dim=dim), cf.gen_float16_vec_field(name=df.fp16_vec_field, dim=dim) @@ -1199,6 +1216,256 @@ def test_bulk_insert_all_field_with_parquet(self, auto_id, dim, entities, enable if enable_dynamic_field and include_meta: assert "name" in fields_from_search assert "address" in fields_from_search + # query data + res, _ = self.collection_wrap.query(expr=f"{df.string_field} >= '0'", output_fields=[df.string_field]) + assert len(res) == entities + query_data = [r[df.string_field] for r in res][:len(self.collection_wrap.partitions)] + res, _ = self.collection_wrap.query(expr=f"{df.string_field} in {query_data}", output_fields=[df.string_field]) + assert len(res) == len(query_data) + if enable_partition_key: + assert len(self.collection_wrap.partitions) > 1 + + @pytest.mark.tags(CaseLabel.L3) + @pytest.mark.parametrize("auto_id", [True, False]) + @pytest.mark.parametrize("dim", [128]) # 128 + @pytest.mark.parametrize("entities", [1000]) # 1000 + @pytest.mark.parametrize("enable_dynamic_field", [True, False]) + @pytest.mark.parametrize("include_meta", [True, False]) + @pytest.mark.parametrize("sparse_format", ["doc", "coo"]) + def test_bulk_insert_sparse_vector_with_parquet(self, auto_id, dim, entities, enable_dynamic_field, include_meta, sparse_format): + """ + collection schema 1: [pk, int64, float64, string float_vector] + data file: vectors.parquet and uid.parquet, + Steps: + 1. create collection + 2. import data + 3. verify + """ + if enable_dynamic_field is False and include_meta is True: + pytest.skip("include_meta only works with enable_dynamic_field") + fields = [ + cf.gen_int64_field(name=df.pk_field, is_primary=True, auto_id=auto_id), + cf.gen_int64_field(name=df.int_field), + cf.gen_float_field(name=df.float_field), + cf.gen_string_field(name=df.string_field), + cf.gen_json_field(name=df.json_field), + cf.gen_array_field(name=df.array_int_field, element_type=DataType.INT64), + cf.gen_array_field(name=df.array_float_field, element_type=DataType.FLOAT), + cf.gen_array_field(name=df.array_string_field, element_type=DataType.VARCHAR, max_length=100), + cf.gen_array_field(name=df.array_bool_field, element_type=DataType.BOOL), + cf.gen_float_vec_field(name=df.float_vec_field, dim=dim), + cf.gen_sparse_vec_field(name=df.sparse_vec_field), + ] + data_fields = [f.name for f in fields if not f.to_dict().get("auto_id", False)] + files = prepare_bulk_insert_parquet_files( + minio_endpoint=self.minio_endpoint, + bucket_name=self.bucket_name, + rows=entities, + dim=dim, + data_fields=data_fields, + enable_dynamic_field=enable_dynamic_field, + force=True, + include_meta=include_meta, + sparse_format=sparse_format + ) + self._connect() + c_name = cf.gen_unique_str("bulk_insert") + schema = cf.gen_collection_schema(fields=fields, auto_id=auto_id, enable_dynamic_field=enable_dynamic_field) + self.collection_wrap.init_collection(c_name, schema=schema) + + # import data + t0 = time.time() + task_id, _ = self.utility_wrap.do_bulk_insert( + collection_name=c_name, files=files + ) + logging.info(f"bulk insert task ids:{task_id}") + success, states = self.utility_wrap.wait_for_bulk_insert_tasks_completed( + task_ids=[task_id], timeout=300 + ) + tt = time.time() - t0 + log.info(f"bulk insert state:{success} in {tt} with states:{states}") + assert success + num_entities = self.collection_wrap.num_entities + log.info(f" collection entities: {num_entities}") + assert num_entities == entities + # verify imported data is available for search + index_params = ct.default_index + float_vec_fields = [f.name for f in fields if "vec" in f.name and "float" in f.name] + sparse_vec_fields = [f.name for f in fields if "vec" in f.name and "sparse" in f.name] + for f in float_vec_fields: + self.collection_wrap.create_index( + field_name=f, index_params=index_params + ) + for f in sparse_vec_fields: + self.collection_wrap.create_index( + field_name=f, index_params=ct.default_sparse_inverted_index + ) + self.collection_wrap.load() + log.info(f"wait for load finished and be ready for search") + time.sleep(2) + # log.info(f"query seg info: {self.utility_wrap.get_query_segment_info(c_name)[0]}") + search_data = cf.gen_vectors(1, dim) + search_params = ct.default_search_params + for field_name in float_vec_fields: + res, _ = self.collection_wrap.search( + search_data, + field_name, + param=search_params, + limit=1, + output_fields=["*"], + check_task=CheckTasks.check_search_results, + check_items={"nq": 1, "limit": 1}, + ) + for hit in res: + for r in hit: + fields_from_search = r.fields.keys() + for f in fields: + assert f.name in fields_from_search + if enable_dynamic_field and include_meta: + assert "name" in fields_from_search + assert "address" in fields_from_search + search_data = cf.gen_sparse_vectors(1, dim) + search_params = ct.default_sparse_search_params + for field_name in sparse_vec_fields: + res, _ = self.collection_wrap.search( + search_data, + field_name, + param=search_params, + limit=1, + output_fields=["*"], + check_task=CheckTasks.check_search_results, + check_items={"nq": 1, "limit": 1}, + ) + for hit in res: + for r in hit: + fields_from_search = r.fields.keys() + for f in fields: + assert f.name in fields_from_search + if enable_dynamic_field and include_meta: + assert "name" in fields_from_search + assert "address" in fields_from_search + + + @pytest.mark.tags(CaseLabel.L3) + @pytest.mark.parametrize("auto_id", [True, False]) + @pytest.mark.parametrize("dim", [128]) # 128 + @pytest.mark.parametrize("entities", [1000]) # 1000 + @pytest.mark.parametrize("enable_dynamic_field", [True, False]) + @pytest.mark.parametrize("include_meta", [True, False]) + @pytest.mark.parametrize("sparse_format", ["doc", "coo"]) + def test_bulk_insert_sparse_vector_with_json(self, auto_id, dim, entities, enable_dynamic_field, include_meta, sparse_format): + """ + collection schema 1: [pk, int64, float64, string float_vector] + data file: vectors.parquet and uid.parquet, + Steps: + 1. create collection + 2. import data + 3. verify + """ + if enable_dynamic_field is False and include_meta is True: + pytest.skip("include_meta only works with enable_dynamic_field") + fields = [ + cf.gen_int64_field(name=df.pk_field, is_primary=True, auto_id=auto_id), + cf.gen_int64_field(name=df.int_field), + cf.gen_float_field(name=df.float_field), + cf.gen_string_field(name=df.string_field), + cf.gen_json_field(name=df.json_field), + cf.gen_array_field(name=df.array_int_field, element_type=DataType.INT64), + cf.gen_array_field(name=df.array_float_field, element_type=DataType.FLOAT), + cf.gen_array_field(name=df.array_string_field, element_type=DataType.VARCHAR, max_length=100), + cf.gen_array_field(name=df.array_bool_field, element_type=DataType.BOOL), + cf.gen_float_vec_field(name=df.float_vec_field, dim=dim), + cf.gen_sparse_vec_field(name=df.sparse_vec_field), + ] + data_fields = [f.name for f in fields if not f.to_dict().get("auto_id", False)] + files = prepare_bulk_insert_new_json_files( + minio_endpoint=self.minio_endpoint, + bucket_name=self.bucket_name, + rows=entities, + dim=dim, + data_fields=data_fields, + enable_dynamic_field=enable_dynamic_field, + force=True, + include_meta=include_meta, + sparse_format=sparse_format + ) + self._connect() + c_name = cf.gen_unique_str("bulk_insert") + schema = cf.gen_collection_schema(fields=fields, auto_id=auto_id, enable_dynamic_field=enable_dynamic_field) + self.collection_wrap.init_collection(c_name, schema=schema) + + # import data + t0 = time.time() + task_id, _ = self.utility_wrap.do_bulk_insert( + collection_name=c_name, files=files + ) + logging.info(f"bulk insert task ids:{task_id}") + success, states = self.utility_wrap.wait_for_bulk_insert_tasks_completed( + task_ids=[task_id], timeout=300 + ) + tt = time.time() - t0 + log.info(f"bulk insert state:{success} in {tt} with states:{states}") + assert success + num_entities = self.collection_wrap.num_entities + log.info(f" collection entities: {num_entities}") + assert num_entities == entities + # verify imported data is available for search + index_params = ct.default_index + float_vec_fields = [f.name for f in fields if "vec" in f.name and "float" in f.name] + sparse_vec_fields = [f.name for f in fields if "vec" in f.name and "sparse" in f.name] + for f in float_vec_fields: + self.collection_wrap.create_index( + field_name=f, index_params=index_params + ) + for f in sparse_vec_fields: + self.collection_wrap.create_index( + field_name=f, index_params=ct.default_sparse_inverted_index + ) + self.collection_wrap.load() + log.info(f"wait for load finished and be ready for search") + time.sleep(2) + # log.info(f"query seg info: {self.utility_wrap.get_query_segment_info(c_name)[0]}") + search_data = cf.gen_vectors(1, dim) + search_params = ct.default_search_params + for field_name in float_vec_fields: + res, _ = self.collection_wrap.search( + search_data, + field_name, + param=search_params, + limit=1, + output_fields=["*"], + check_task=CheckTasks.check_search_results, + check_items={"nq": 1, "limit": 1}, + ) + for hit in res: + for r in hit: + fields_from_search = r.fields.keys() + for f in fields: + assert f.name in fields_from_search + if enable_dynamic_field and include_meta: + assert "name" in fields_from_search + assert "address" in fields_from_search + search_data = cf.gen_sparse_vectors(1, dim) + search_params = ct.default_sparse_search_params + for field_name in sparse_vec_fields: + res, _ = self.collection_wrap.search( + search_data, + field_name, + param=search_params, + limit=1, + output_fields=["*"], + check_task=CheckTasks.check_search_results, + check_items={"nq": 1, "limit": 1}, + ) + for hit in res: + for r in hit: + fields_from_search = r.fields.keys() + for f in fields: + assert f.name in fields_from_search + if enable_dynamic_field and include_meta: + assert "name" in fields_from_search + assert "address" in fields_from_search + @pytest.mark.tags(CaseLabel.L3) @pytest.mark.parametrize("auto_id", [True]) diff --git a/tests/python_client/testcases/test_collection.py b/tests/python_client/testcases/test_collection.py index 63c96bfff3b50..71084a07910d5 100644 --- a/tests/python_client/testcases/test_collection.py +++ b/tests/python_client/testcases/test_collection.py @@ -106,9 +106,11 @@ def test_collection_invalid_name(self, name): expected: raise exception """ self._connect() - error = {ct.err_code: 1, ct.err_msg: "Invalid collection name: {}".format(name)} - if name is not None and name.strip() == "": - error = {ct.err_code: 1, ct.err_msg: "collection name should not be empty"} + error = {ct.err_code: 999, ct.err_msg: f"Invalid collection name: {name}"} + if name in [None, ""]: + error = {ct.err_code: 999, ct.err_msg: f"`collection_name` value {name} is illegal"} + if name in [" "]: + error = {ct.err_code: 999, ct.err_msg: f"collection name should not be empty"} self.collection_wrap.init_collection(name, schema=default_schema, check_task=CheckTasks.err_res, check_items=error) @@ -161,8 +163,8 @@ def test_collection_dup_name_new_schema(self): check_items={exp_name: c_name, exp_schema: default_schema}) fields = [cf.gen_int64_field(is_primary=True)] schema = cf.gen_collection_schema(fields=fields) - error = {ct.err_code: 0, ct.err_msg: "The collection already exist, but the schema is not the same as the " - "schema passed in."} + error = {ct.err_code: 999, ct.err_msg: "The collection already exist, but the schema is not the same as the " + "schema passed in."} self.collection_wrap.init_collection(c_name, schema=schema, check_task=CheckTasks.err_res, check_items=error) @pytest.mark.tags(CaseLabel.L2) @@ -382,7 +384,7 @@ def test_collection_without_vectors(self): self._connect() c_name = cf.gen_unique_str(prefix) schema = cf.gen_collection_schema([cf.gen_int64_field(is_primary=True)]) - error = {ct.err_code: 0, ct.err_msg: "No vector field is found."} + error = {ct.err_code: 999, ct.err_msg: "No vector field is found."} self.collection_wrap.init_collection(c_name, schema=schema, check_task=CheckTasks.err_res, check_items=error) @pytest.mark.tags(CaseLabel.L1) @@ -428,7 +430,7 @@ def test_collection_invalid_is_primary(self, is_primary): check_task=CheckTasks.err_res, check_items=error) @pytest.mark.tags(CaseLabel.L2) - @pytest.mark.parametrize("primary_field", ["12-s", "12 s", "(mn)", "中文", "%$#", "a".join("a" for i in range(256))]) + @pytest.mark.parametrize("primary_field", ["12-s", "non_existing", "(mn)", "中文", None]) def test_collection_invalid_primary_field(self, primary_field): """ target: test collection with invalid primary_field @@ -437,12 +439,12 @@ def test_collection_invalid_primary_field(self, primary_field): """ self._connect() fields = [cf.gen_int64_field(), cf.gen_float_vec_field()] - error = {ct.err_code: 1, ct.err_msg: "Schema must have a primary key field."} + error = {ct.err_code: 999, ct.err_msg: "Schema must have a primary key field"} self.collection_schema_wrap.init_collection_schema(fields=fields, primary_field=primary_field, check_task=CheckTasks.err_res, check_items=error) @pytest.mark.tags(CaseLabel.L2) - @pytest.mark.parametrize("primary_field", [[], 1, [1, "2", 3], (1,), {1: 1}, None]) + @pytest.mark.parametrize("primary_field", [[], 1, [1, "2", 3], (1,), {1: 1}]) def test_collection_non_string_primary_field(self, primary_field): """ target: test collection with non-string primary_field @@ -451,25 +453,10 @@ def test_collection_non_string_primary_field(self, primary_field): """ self._connect() fields = [cf.gen_int64_field(), cf.gen_float_vec_field()] - error = {ct.err_code: 1, ct.err_msg: "Param primary_field must be str type."} + error = {ct.err_code: 999, ct.err_msg: "Param primary_field must be int or str type"} self.collection_schema_wrap.init_collection_schema(fields, primary_field=primary_field, check_task=CheckTasks.err_res, check_items=error) - @pytest.mark.tags(CaseLabel.L2) - def test_collection_not_existed_primary_field(self): - """ - target: test collection with not exist primary field - method: specify not existed field as primary_field - expected: raise exception - """ - self._connect() - fake_field = cf.gen_unique_str() - fields = [cf.gen_int64_field(), cf.gen_float_vec_field()] - error = {ct.err_code: 1, ct.err_msg: "Schema must have a primary key field."} - - self.collection_schema_wrap.init_collection_schema(fields, primary_field=fake_field, - check_task=CheckTasks.err_res, check_items=error) - @pytest.mark.tags(CaseLabel.L0) def test_collection_primary_in_schema(self): """ @@ -506,7 +493,7 @@ def test_collection_unsupported_primary_field(self, get_unsupported_primary_fiel self._connect() field = get_unsupported_primary_field vec_field = cf.gen_float_vec_field(name="vec") - error = {ct.err_code: 1, ct.err_msg: "Primary key type must be DataType.INT64 or DataType.VARCHAR."} + error = {ct.err_code: 999, ct.err_msg: "Primary key type must be DataType.INT64 or DataType.VARCHAR."} self.collection_schema_wrap.init_collection_schema(fields=[field, vec_field], primary_field=field.name, check_task=CheckTasks.err_res, check_items=error) @@ -520,7 +507,7 @@ def test_collection_multi_primary_fields(self): self._connect() int_field_one = cf.gen_int64_field(is_primary=True) int_field_two = cf.gen_int64_field(name="int2", is_primary=True) - error = {ct.err_code: 0, ct.err_msg: "Expected only one primary key field"} + error = {ct.err_code: 999, ct.err_msg: "Expected only one primary key field"} self.collection_schema_wrap.init_collection_schema( fields=[int_field_one, int_field_two, cf.gen_float_vec_field()], check_task=CheckTasks.err_res, check_items=error) @@ -536,7 +523,7 @@ def test_collection_primary_inconsistent(self): int_field_one = cf.gen_int64_field(is_primary=True) int_field_two = cf.gen_int64_field(name="int2") fields = [int_field_one, int_field_two, cf.gen_float_vec_field()] - error = {ct.err_code: 1, ct.err_msg: "Expected only one primary key field"} + error = {ct.err_code: 999, ct.err_msg: "Expected only one primary key field"} self.collection_schema_wrap.init_collection_schema(fields, primary_field=int_field_two.name, check_task=CheckTasks.err_res, check_items=error) @@ -597,7 +584,7 @@ def test_collection_auto_id_non_primary_field(self): expected: raise exception """ self._connect() - error = {ct.err_code: 0, ct.err_msg: "auto_id can only be specified on the primary key field"} + error = {ct.err_code: 999, ct.err_msg: "auto_id can only be specified on the primary key field"} self.field_schema_wrap.init_field_schema(name=ct.default_int64_field_name, dtype=DataType.INT64, auto_id=True, check_task=CheckTasks.err_res, check_items=error) @@ -616,19 +603,21 @@ def test_collection_auto_id_false_non_primary(self): assert not schema.auto_id @pytest.mark.tags(CaseLabel.L2) - @pytest.mark.xfail(reason="issue 24578") - def test_collection_auto_id_inconsistent(self): + @pytest.mark.xfail(reason="pymilvus issue, should use fieldschema as top priority") + @pytest.mark.parametrize("auto_id", [True, False]) + def test_collection_auto_id_inconsistent(self, auto_id): """ target: test collection auto_id with both collection schema and field schema method: 1.set primary field auto_id=True in field schema 2.set auto_id=False in collection schema expected: raise exception """ self._connect() - int_field = cf.gen_int64_field(is_primary=True, auto_id=True) + int_field = cf.gen_int64_field(is_primary=True, auto_id=auto_id) vec_field = cf.gen_float_vec_field(name='vec') + schema, _ = self.collection_schema_wrap.init_collection_schema([int_field, vec_field], auto_id=not auto_id) + collection_w = self.collection_wrap.init_collection(cf.gen_unique_str(prefix), schema=schema)[0] - schema, _ = self.collection_schema_wrap.init_collection_schema([int_field, vec_field], auto_id=False) - assert schema.auto_id + assert collection_w.schema.auto_id is auto_id @pytest.mark.tags(CaseLabel.L2) @pytest.mark.parametrize("auto_id", [True, False]) @@ -718,7 +707,7 @@ def test_collection_vector_invalid_dim(self, get_invalid_dim): self.collection_wrap.init_collection(c_name, schema=schema, check_task=CheckTasks.err_res, check_items=error) @pytest.mark.tags(CaseLabel.L2) - @pytest.mark.parametrize("dim", [-1, 0, 32769]) + @pytest.mark.parametrize("dim", [ct.min_dim-1, ct.max_dim+1]) def test_collection_vector_out_bounds_dim(self, dim): """ target: test collection with out of bounds dim diff --git a/tests/python_client/testcases/test_connection.py b/tests/python_client/testcases/test_connection.py index 7e8a5c196ef87..74a46cad11fba 100644 --- a/tests/python_client/testcases/test_connection.py +++ b/tests/python_client/testcases/test_connection.py @@ -824,7 +824,7 @@ def test_close_repeatedly(self, host, port, connect_name): self.connection_wrap.disconnect(alias=connect_name) @pytest.mark.tags(ct.CaseLabel.L2) - @pytest.mark.parametrize("protocol", ["http", "ftp", "tcp"]) + @pytest.mark.parametrize("protocol", ["http", "tcp"]) @pytest.mark.parametrize("connect_name", [DefaultConfig.DEFAULT_USING]) def test_parameters_with_uri_connection(self, host, port, connect_name, protocol): """ @@ -836,6 +836,21 @@ def test_parameters_with_uri_connection(self, host, port, connect_name, protocol uri = "{}://{}:{}".format(protocol, host, port) self.connection_wrap.connect(alias=connect_name, uri=uri, check_task=ct.CheckTasks.ccr) + @pytest.mark.tags(ct.CaseLabel.L2) + @pytest.mark.parametrize("protocol", ["ftp"]) + @pytest.mark.parametrize("connect_name", [DefaultConfig.DEFAULT_USING]) + def test_parameters_with_invalid_uri_connection(self, host, port, connect_name, protocol): + """ + target: test the uri parameter to get a normal connection + method: get a connection with the uri parameter + expected: connected is True + """ + + uri = "{}://{}:{}".format(protocol, host, port) + self.connection_wrap.connect(alias=connect_name, uri=uri, check_task=ct.CheckTasks.err_res, + check_items={ct.err_code: 999, + ct.err_msg: "Open local milvus failed, dir: ftp: not exists"}) + @pytest.mark.tags(ct.CaseLabel.L2) @pytest.mark.parametrize("connect_name", [DefaultConfig.DEFAULT_USING]) def test_parameters_with_address_connection(self, host, port, connect_name): diff --git a/tests/python_client/testcases/test_index.py b/tests/python_client/testcases/test_index.py index a91cd115c1b85..753fb28cd5b5e 100644 --- a/tests/python_client/testcases/test_index.py +++ b/tests/python_client/testcases/test_index.py @@ -210,12 +210,15 @@ def test_index_create_indexes_for_different_fields(self): """ target: Test create indexes for different fields method: create two different indexes with default index name - expected: create successfully + expected: create successfully, and the default index name equals to field name """ - collection_w = self.init_collection_general(prefix, True, is_index=False)[0] - default_index = {"index_type": "IVF_FLAT", "params": {"nlist": 128}, "metric_type": "L2"} + collection_w = self.init_collection_general(prefix, True, nb=200, is_index=False)[0] + default_index = ct.default_index collection_w.create_index(default_field_name, default_index) collection_w.create_index(ct.default_int64_field_name, {}) + assert len(collection_w.indexes) == 2 + for index in collection_w.indexes: + assert index.field_name == index.index_name @pytest.mark.tags(CaseLabel.L1) def test_index_create_on_scalar_field(self): @@ -224,7 +227,7 @@ def test_index_create_on_scalar_field(self): method: create index on scalar field and load expected: raise exception """ - collection_w = self.init_collection_general(prefix, True, is_index=False)[0] + collection_w = self.init_collection_general(prefix, True, nb=200, is_index=False)[0] collection_w.create_index(ct.default_int64_field_name, {}) collection_w.load(check_task=CheckTasks.err_res, check_items={ct.err_code: 65535, @@ -256,7 +259,6 @@ def test_index_collection_empty(self): c_name = cf.gen_unique_str(prefix) collection_w = self.init_collection_wrap(name=c_name) index, _ = self.index_wrap.init_index(collection_w.collection, default_field_name, default_index_params) - # TODO: assert index cf.assert_equal_index(index, collection_w.collection.indexes[0]) @pytest.mark.tags(CaseLabel.L1) @@ -273,7 +275,6 @@ def test_index_params(self, index_param): collection_w.insert(data=data) index_params = index_param index, _ = self.index_wrap.init_index(collection_w.collection, default_field_name, index_params) - # TODO: assert index cf.assert_equal_index(index, collection_w.collection.indexes[0]) @pytest.mark.tags(CaseLabel.L1) @@ -294,78 +295,76 @@ def test_index_params_flush(self): cf.assert_equal_index(index, collection_w.collection.indexes[0]) assert collection_w.num_entities == ct.default_nb - # TODO: not support @pytest.mark.tags(CaseLabel.L1) - @pytest.mark.skip(reason='not supported') def test_index_name_dup(self): """ target: test index with duplicate index name - method: create index with existed index name create by `collection.create_index` + method: create index with existed index name and different index params + expected: raise exception + create index with the same index name and same index params expected: no exception raised """ c_name = cf.gen_unique_str(prefix) index_name = ct.default_index_name collection_w = self.init_collection_wrap(name=c_name) - collection_w.collection.create_index(default_field_name, default_index_params, index_name=index_name) - self.index_wrap.init_index(collection_w.collection, default_field_name, default_index_params, + params = cf.get_index_params_params("HNSW") + index_params = {"index_type": "HNSW", "metric_type": "L2", "params": params} + params2 = cf.get_index_params_params("HNSW") + params2.update({"M": 16, "efConstruction": 200}) + index_params2 = {"index_type": "HNSW", "metric_type": "L2", "params": params2} + collection_w.collection.create_index(default_field_name, index_params, index_name=index_name) + + # create index with the same index name and different index params + error = {ct.err_code: 999, ct.err_msg: "at most one distinct index is allowed per field"} + self.index_wrap.init_index(collection_w.collection, default_field_name, index_params2, index_name=index_name, check_task=CheckTasks.err_res, - check_items={ct.err_code: 1, ct.err_msg: ""}) + check_items=error) + # create index with the same index name and same index params + self.index_wrap.init_index(collection_w.collection, default_field_name, index_params) - # TODO: server not supported @pytest.mark.tags(CaseLabel.L1) - @pytest.mark.skip(reason='not supported') - def test_index_field_names(self): + def test_index_same_name_on_diff_fields(self): """ - target: test index on one field, with two indexes - method: create index with two different indexes - expected: no exception raised + target: verify index with the same name on different fields is not supported + method: create index with index name A on fieldA, create index with index name A on fieldB + expected: raise exception """ - pass + # collection_w, _ = self.init_collection_general(prefix, dim=64, insert_data=False, is_index=False, + # multiple_dim_array=[32]) + id_field = cf.gen_int64_field(name="id", is_primary=True) + vec_field = cf.gen_float_vec_field(name="vec_field", dim=64) + vec_field2 = cf.gen_float_vec_field(name="vec_field2", dim=32) + str_field = cf.gen_string_field(name="str_field") + str_field2 = cf.gen_string_field(name="str_field2") + schema, _ = self.collection_schema_wrap.init_collection_schema([id_field, vec_field, vec_field2, str_field, str_field2]) + collection_w = self.init_collection_wrap(schema=schema) + vec_index = ct.default_index + vec_index_name = "my_index" - # TODO: server not supported - @pytest.mark.tags(CaseLabel.L1) - @pytest.mark.skip(reason='not supported') - def test_index_fields(self): - """ - target: test index on two fields, with the same name - method: create the same index name with two different fields - expected: exception raised - """ - pass + # create same index name on different vector fields + error = {ct.err_code: 999, ct.err_msg: "at most one distinct index is allowed per field"} + collection_w.create_index(vec_field.name, vec_index, index_name=vec_index_name) + collection_w.create_index(vec_field2.name, vec_index, index_name=vec_index_name, + check_task=CheckTasks.err_res, + check_items=error) - # TODO: server not supported - @pytest.mark.tags(CaseLabel.L1) - @pytest.mark.skip(reason='not supported') - def test_index_fields_B(self): - """ - target: test index on two fields, with the different name - method: create the different index with two different fields - expected: no exception raised - """ - pass + # create same index name on different scalar fields + collection_w.create_index(str_field.name, index_name=vec_index_name, + check_task=CheckTasks.err_res, + check_items=error) - # TODO: server not supported - @pytest.mark.tags(CaseLabel.L1) - @pytest.mark.skip(reason='not supported') - def test_index_field_names_eq_maximum(self): - """ - target: test index on one field, with the different names, num of the names equal to the maximum num supported - method: create the different indexes - expected: no exception raised - """ - pass + # create same salar index nae on different scalar fields + index_name = "scalar_index" + collection_w.create_index(str_field.name, index_name=index_name) + collection_w.create_index(str_field2.name, index_name=index_name, + check_task=CheckTasks.err_res, + check_items=error) + all_indexes = collection_w.indexes + assert len(all_indexes) == 2 + assert all_indexes[0].index_name != all_indexes[1].index_name + for index in all_indexes: + assert index.index_name in [vec_index_name, index_name] - # TODO: server not supported - @pytest.mark.tags(CaseLabel.L1) - @pytest.mark.skip(reason='not supported') - def test_index_field_names_more_maximum(self): - """ - target: test index on one field, with the different names, num of the names more than the maximum num supported - method: create the different indexes - expected: exception raised - """ - pass - @pytest.mark.tags(CaseLabel.L1) def test_index_drop_index(self): """ @@ -381,7 +380,6 @@ def test_index_drop_index(self): assert len(collection_w.indexes) == 0 @pytest.mark.tags(CaseLabel.L1) - # TODO #7372 def test_index_drop_repeatedly(self): """ target: test index.drop @@ -417,52 +415,6 @@ def test_index_drop_multi_collections(self): assert cf.assert_equal_index(index_2, cw2.collection.indexes[0]) assert len(cw.collection.indexes) == 0 - @pytest.mark.tags(CaseLabel.L2) - @pytest.mark.skip(reason='TODO') - def test_index_drop_during_inserting(self): - """ - target: test index.drop during inserting - method: create indexes by `index`, and then drop it during inserting entities, make sure async insert - expected: no exception raised, insert success - """ - pass - - @pytest.mark.tags(CaseLabel.L2) - @pytest.mark.skip(reason='TODO') - def test_index_drop_during_searching(self): - """ - target: test index.drop during searching - method: create indexes by `index`, and then drop it during searching, make sure async search - expected: no exception raised, search success - """ - pass - - @pytest.mark.tags(CaseLabel.L2) - @pytest.mark.skip(reason='TODO') - def test_index_recovery_after_restart(self): - """ - target: test index still existed after server restart - method: create index by `index`, and then restart server, assert index existed - expected: index in collection.indexes - """ - pass - - @pytest.mark.tags(CaseLabel.L2) - @pytest.mark.skip(reason='TODO') - def test_index_building_after_restart(self): - """ - target: index can still build if not finished before server restart - method: create index by `index`, and then restart server, assert server is indexing - expected: index build finished after server restart - """ - pass - - """ - ****************************************************************** - The following classes are copied from pymilvus test - ****************************************************************** - """ - @pytest.mark.tags(CaseLabel.GPU) class TestNewIndexBase(TestcaseBase): @@ -532,22 +484,10 @@ def test_create_index_non_existed_field(self): collection_w.create_index(ct.default_int8_field_name, default_index_params, index_name=ct.default_index_name, check_task=CheckTasks.err_res, - check_items={ct.err_code: 1, + check_items={ct.err_code: 999, ct.err_msg: "cannot create index on non-existed field: int8"} ) - @pytest.mark.tags(CaseLabel.L1) - def test_create_index_no_vectors(self): - """ - target: test create index interface - method: create collection and add entities in it, create index - expected: return success - """ - c_name = cf.gen_unique_str(prefix) - collection_w = self.init_collection_wrap(name=c_name) - collection_w.create_index(ct.default_float_vec_field_name, default_index_params, - index_name=ct.default_index_name) - @pytest.mark.tags(CaseLabel.L1) def test_create_index_partition(self): """ @@ -597,7 +537,7 @@ def test_create_index_without_connect(self): assert ct.default_alias not in res_list collection_w.create_index(ct.default_float_vec_field_name, ct.default_all_indexes_params, check_task=CheckTasks.err_res, - check_items={ct.err_code: 1, ct.err_msg: "should create connect first"}) + check_items={ct.err_code: 999, ct.err_msg: "should create connection first"}) @pytest.mark.tags(CaseLabel.L1) def test_create_index_search_with_query_vectors(self): @@ -686,7 +626,7 @@ def test_create_index_different_name(self): collection_w.create_index(ct.default_float_vec_field_name, default_index_params, index_name="a") collection_w.create_index(ct.default_float_vec_field_name, default_index_params, index_name="b", check_task=CheckTasks.err_res, - check_items={ct.err_code: 1, + check_items={ct.err_code: 999, ct.err_msg: "CreateIndex failed: creating multiple indexes on same field is not supported"}) @pytest.mark.tags(CaseLabel.L1) @@ -722,18 +662,6 @@ def test_create_index_ip(self): collection_w.insert(data=data) collection_w.create_index(ct.default_float_vec_field_name, default_ip_index_params) - @pytest.mark.tags(CaseLabel.L1) - def test_create_index_no_vectors_ip(self): - """ - target: test create index interface - method: create collection and add entities in it, create index - expected: return success - """ - c_name = cf.gen_unique_str(prefix) - collection_w = self.init_collection_wrap(name=c_name) - collection_w.create_index(ct.default_float_vec_field_name, default_ip_index_params, - index_name=ct.default_index_name) - @pytest.mark.tags(CaseLabel.L1) def test_create_index_partition_ip(self): """ @@ -750,7 +678,7 @@ def test_create_index_partition_ip(self): assert len(ins_res.primary_keys) == len(data[0]) collection_w.create_index(ct.default_float_vec_field_name, default_ip_index_params) - @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.tags(CaseLabel.L2) def test_create_index_partition_flush_ip(self): """ target: test create index @@ -810,7 +738,7 @@ def build(collection_w): for t in threads: t.join() - @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.tags(CaseLabel.L2) def test_create_index_no_vectors_insert_ip(self): """ target: test create index interface when there is no vectors in collection, @@ -841,23 +769,6 @@ def test_create_same_index_repeatedly_ip(self): collection_w.create_index(ct.default_float_vec_field_name, default_ip_index_params) assert len(collection_w.indexes) == 1 - @pytest.mark.tags(CaseLabel.L2) - def test_create_index_different_name_ip(self): - """ - target: check if index can be created repeatedly, with the same create_index params - method: create index after index have been built - expected: raise error - """ - c_name = cf.gen_unique_str(prefix) - collection_w = self.init_collection_wrap(name=c_name) - data = cf.gen_default_list_data(default_nb) - collection_w.insert(data=data) - collection_w.create_index(ct.default_float_vec_field_name, default_ip_index_params, index_name="a") - collection_w.create_index(ct.default_float_vec_field_name, default_ip_index_params, index_name="b", - check_task=CheckTasks.err_res, - check_items={ct.err_code: 1, - ct.err_msg: "CreateIndex failed: creating multiple indexes on same field is not supported"}) - @pytest.mark.tags(CaseLabel.L0) def test_create_different_index_repeatedly_ip(self): """ @@ -903,7 +814,6 @@ def test_drop_index(self, get_simple_index): assert len(collection_w.indexes) == 0 @pytest.mark.tags(CaseLabel.L2) - # TODO #7372 def test_drop_index_repeatedly(self, get_simple_index): """ target: test drop index repeatedly @@ -935,7 +845,7 @@ def test_drop_index_without_connect(self): index_name=ct.default_index_name) self.connection_wrap.remove_connection(ct.default_alias) collection_w.drop_index(index_name=ct.default_index_name, check_task=CheckTasks.err_res, - check_items={ct.err_code: 1, ct.err_msg: "should create connect first."}) + check_items={ct.err_code: 999, ct.err_msg: "should create connection first."}) @pytest.mark.tags(CaseLabel.L2) def test_create_drop_index_repeatedly(self, get_simple_index): @@ -954,76 +864,6 @@ def test_create_drop_index_repeatedly(self, get_simple_index): collection_w.drop_index(index_name=ct.default_index_name) assert len(collection_w.indexes) == 0 - @pytest.mark.tags(CaseLabel.L2) - def test_drop_index_ip(self, get_simple_index): - """ - target: test drop index interface - method: create collection and add entities in it, create index, call drop index - expected: return code 0, and default index param - """ - get_simple_index["metric_type"] = "IP" - c_name = cf.gen_unique_str(prefix) - collection_w = self.init_collection_wrap(name=c_name) - if get_simple_index["index_type"] != "FLAT": - collection_w.create_index(ct.default_float_vec_field_name, get_simple_index, - index_name=ct.default_index_name) - assert len(collection_w.indexes) == 1 - collection_w.drop_index(index_name=ct.default_index_name) - assert len(collection_w.indexes) == 0 - - @pytest.mark.tags(CaseLabel.L2) - def test_drop_index_repeatedly_ip(self, get_simple_index): - """ - target: test drop index repeatedly - method: create index, call drop index, and drop again - expected: return code 0 - """ - get_simple_index["metric_type"] = "IP" - c_name = cf.gen_unique_str(prefix) - collection_w = self.init_collection_wrap(name=c_name) - if get_simple_index["index_type"] != "FLAT": - collection_w.create_index(ct.default_float_vec_field_name, get_simple_index, - index_name=ct.default_index_name) - assert len(collection_w.indexes) == 1 - collection_w.drop_index(index_name=ct.default_index_name) - assert len(collection_w.indexes) == 0 - collection_w.drop_index(index_name=ct.default_index_name) - assert len(collection_w.indexes) == 0 - - @pytest.mark.tags(CaseLabel.L2) - def test_drop_index_without_connect_ip(self): - """ - target: test drop index without connection - method: drop index, and check if drop successfully - expected: raise exception - """ - - c_name = cf.gen_unique_str(prefix) - collection_w = self.init_collection_wrap(c_name) - collection_w.create_index(ct.default_float_vec_field_name, default_ip_index_params, - index_name=ct.default_index_name) - self.connection_wrap.remove_connection(ct.default_alias) - collection_w.drop_index(index_name=ct.default_index_name, check_task=CheckTasks.err_res, - check_items={ct.err_code: 1, ct.err_msg: "should create connect first."}) - - @pytest.mark.tags(CaseLabel.L2) - def test_create_drop_index_repeatedly_ip(self, get_simple_index): - """ - target: test create / drop index repeatedly, use the same index params - method: create index, drop index, four times - expected: return code 0 - """ - get_simple_index["metric_type"] = "IP" - c_name = cf.gen_unique_str(prefix) - collection_w = self.init_collection_wrap(c_name) - if get_simple_index["index_type"] != "FLAT": - for i in range(4): - collection_w.create_index(ct.default_float_vec_field_name, get_simple_index, - index_name=ct.default_index_name) - assert len(collection_w.indexes) == 1 - collection_w.drop_index(index_name=ct.default_index_name) - assert len(collection_w.indexes) == 0 - @pytest.mark.tags(CaseLabel.L0) def test_create_PQ_without_nbits(self): """ @@ -1059,8 +899,8 @@ def test_index_collection_with_after_load(self): expected: load and search successfully """ collection_w = self.init_collection_wrap(cf.gen_unique_str(prefix)) - nums = 20 - tmp_nb = 5000 + nums = 5 + tmp_nb = 1000 for i in range(nums): df = cf.gen_default_dataframe_data(nb=tmp_nb, start=i * tmp_nb) insert_res, _ = collection_w.insert(df) @@ -1179,10 +1019,6 @@ def test_rebuild_mmap_index(self): @pytest.mark.tags(CaseLabel.GPU) class TestNewIndexBinary(TestcaseBase): - def get_simple_index(self, request): - log.info(request.param) - return copy.deepcopy(request.param) - """ ****************************************************************** The following cases are used to test `create_index` function @@ -1190,7 +1026,6 @@ def get_simple_index(self, request): """ @pytest.mark.tags(CaseLabel.L2) - # @pytest.mark.timeout(BUILD_TIMEOUT) def test_create_binary_index_on_scalar_field(self): """ target: test create index interface @@ -1202,7 +1037,6 @@ def test_create_binary_index_on_scalar_field(self): assert collection_w.has_index(index_name=binary_field_name)[0] is True @pytest.mark.tags(CaseLabel.L0) - # @pytest.mark.timeout(BUILD_TIMEOUT) def test_create_index_partition(self): """ target: test create index interface @@ -1219,11 +1053,10 @@ def test_create_index_partition(self): assert len(ins_res.primary_keys) == len(df) collection_w.create_index(default_binary_vec_field_name, default_binary_index_params, index_name=binary_field_name) - assert collection_w.has_index(index_name=binary_field_name)[0] == True + assert collection_w.has_index(index_name=binary_field_name)[0] is True assert len(collection_w.indexes) == 1 @pytest.mark.tags(CaseLabel.L0) - # @pytest.mark.timeout(BUILD_TIMEOUT) def test_create_index_search_with_query_vectors(self): """ target: test create index interface, search with more query vectors @@ -1242,7 +1075,6 @@ def test_create_index_search_with_query_vectors(self): default_search_binary_params, default_limit, default_search_exp) - # @pytest.mark.timeout(BUILD_TIMEOUT) @pytest.mark.tags(CaseLabel.L2) def test_create_index_invalid_metric_type_binary(self): """ @@ -1352,52 +1184,29 @@ def scalar_index(self, request): def vector_data_type(self, request): yield request.param - @pytest.fixture( - scope="function", - params=gen_invalid_strs() - ) - def get_collection_name(self, request): + @pytest.fixture(scope="function", params=ct.invalid_resource_names) + def invalid_index_name(self, request): + if request.param in [None, "", " "]: + pytest.skip("None and empty is valid for there is a default index name") yield request.param @pytest.mark.tags(CaseLabel.L0) - def test_create_index_with_invalid_collection_name(self, connect, get_collection_name): + def test_index_with_invalid_index_name(self, connect, invalid_index_name): """ target: test create index interface for invalid scenario - method: create index with invalid collection name - expected: raise exception - """ - collection_name = get_collection_name - with pytest.raises(Exception) as e: - connect.create_index(collection_name, field_name, default_ivf_flat_index) - - @pytest.mark.tags(CaseLabel.L2) - def test_drop_index_with_invalid_collection_name(self, connect, get_collection_name): - """ - target: test drop index interface for invalid scenario - method: drop index with invalid collection name + method: + 1. create index with invalid collection name expected: raise exception + 2. drop index with an invalid index name + expected: succeed """ - collection_name = get_collection_name - with pytest.raises(Exception) as e: - connect.drop_index(collection_name) - - @pytest.fixture( - scope="function", - params=gen_invalid_index() - ) - def get_index(self, request): - yield request.param + collection_w = self.init_collection_wrap() + error = {ct.err_code: 999, ct.err_msg: f"Invalid index name: {invalid_index_name}"} + collection_w.create_index(ct.default_float_vec_field_name, default_index_params, index_name=invalid_index_name, + check_task=CheckTasks.err_res, check_items=error) - @pytest.mark.tags(CaseLabel.L2) - def test_create_index_with_invalid_index_params(self, connect, collection, get_index): - """ - target: test create index interface for invalid scenario - method: create index with invalid index params - expected: raise exception - """ - log.info(get_index) - with pytest.raises(Exception) as e: - connect.create_index(collection, field_name, get_index) + # drop index with an invalid index name + collection_w.drop_index(index_name=invalid_index_name) @pytest.mark.tags(CaseLabel.L1) def test_drop_index_without_release(self): @@ -1407,12 +1216,11 @@ def test_drop_index_without_release(self): 2. drop the index expected: raise exception """ - collection_w = self.init_collection_general(prefix, True, is_index=False)[0] - default_index = {"index_type": "IVF_FLAT", "params": {"nlist": 128}, "metric_type": "L2"} - collection_w.create_index("float_vector", default_index) + collection_w = self.init_collection_general(prefix, True, nb=100, is_index=False)[0] + collection_w.create_index(ct.default_float_vec_field_name, ct.default_index) collection_w.load() collection_w.drop_index(check_task=CheckTasks.err_res, - check_items={"err_code": 1, + check_items={"err_code": 999, "err_msg": "index cannot be dropped, collection is " "loaded, please release it first"}) @@ -1425,7 +1233,7 @@ def test_annoy_index_with_invalid_params(self, n_trees): 2. set annoy index param n_trees type invalid(not int) expected: raise exception """ - collection_w = self.init_collection_general(prefix, True, is_index=False)[0] + collection_w = self.init_collection_general(prefix, True, nb=100, is_index=False)[0] index_annoy = {"index_type": "ANNOY", "params": {"n_trees": n_trees}, "metric_type": "L2"} collection_w.create_index("float_vector", index_annoy, check_task=CheckTasks.err_res, @@ -1439,10 +1247,9 @@ def test_create_index_json(self): method: 1.create collection, and create index expected: create index raise an error """ - collection_w, _, _, insert_ids = self.init_collection_general(prefix, True, - dim=ct.default_dim, is_index=False)[0:4] + collection_w = self.init_collection_general(prefix, True, nb=100, is_index=False)[0] # create index on JSON/Array field is not supported - collection_w.create_index(ct.default_json_field_name, index_params=ct.default_flat_index, + collection_w.create_index(ct.default_json_field_name, check_task=CheckTasks.err_res, check_items={ct.err_code: 1100, ct.err_msg: "create index on JSON field is not supported"}) @@ -1454,9 +1261,8 @@ def test_create_scalar_index_on_vector_field(self, scalar_index, vector_data_typ method: 1.create collection, and create index expected: Raise exception """ - collection_w, _, _, insert_ids = self.init_collection_general(prefix, True, - dim=ct.default_dim, is_index=False, - vector_data_type=vector_data_type)[0:4] + collection_w = self.init_collection_general(prefix, True, nb=100, + is_index=False, vector_data_type=vector_data_type)[0] scalar_index_params = {"index_type": scalar_index} collection_w.create_index(ct.default_float_vec_field_name, index_params=scalar_index_params, check_task=CheckTasks.err_res, @@ -1503,10 +1309,7 @@ def test_create_inverted_index_on_array_field(self): collection_w = self.init_collection_wrap(schema=schema) # 2. create index scalar_index_params = {"index_type": "INVERTED"} - collection_w.create_index(ct.default_int32_array_field_name, index_params=scalar_index_params, - check_task=CheckTasks.err_res, - check_items={ct.err_code: 1100, - ct.err_msg: "create index on Array field is not supported"}) + collection_w.create_index(ct.default_int32_array_field_name, index_params=scalar_index_params) @pytest.mark.tags(CaseLabel.L1) def test_create_inverted_index_no_vector_index(self): @@ -2191,22 +1994,6 @@ def build(collection_w): for t in threads: t.join() - @pytest.mark.skip(reason="diskann dim range is set to be [1, 32768)") - @pytest.mark.tags(CaseLabel.L1) - @pytest.mark.parametrize("dim", [2, 4, 8]) - def test_create_index_with_small_dim(self, dim): - """ - target: test create index with diskann - method: 1.create collection, when the dim of the vector Less than 8 - 2.create diskann index - expected: create index raise an error - """ - collection_w = self.init_collection_general(prefix, False, dim=dim, is_index=False)[0] - collection_w.create_index(default_float_vec_field_name, ct.default_diskann_index, - check_task=CheckTasks.err_res, - check_items={ct.err_code: 1, - ct.err_msg: "dim out of range: [8, 32768]"}) - @pytest.mark.tags(CaseLabel.L2) def test_diskann_enable_mmap(self): """ @@ -2227,7 +2014,6 @@ def test_diskann_enable_mmap(self): check_items={ct.err_code: 104, ct.err_msg: f"index type DISKANN does not support mmap"}) - @pytest.mark.tags(CaseLabel.GPU) class TestAutoIndex(TestcaseBase): """ Test case of Auto index """ diff --git a/tests/python_client/testcases/test_issues.py b/tests/python_client/testcases/test_issues.py index b579f10cadd8c..1dad8133ff23f 100644 --- a/tests/python_client/testcases/test_issues.py +++ b/tests/python_client/testcases/test_issues.py @@ -11,9 +11,8 @@ class TestIssues(TestcaseBase): @pytest.mark.tags(CaseLabel.L0) @pytest.mark.parametrize("par_key_field", [ct.default_int64_field_name]) - @pytest.mark.parametrize("index_on_par_key_field", [True]) @pytest.mark.parametrize("use_upsert", [True, False]) - def test_issue_30607(self, par_key_field, index_on_par_key_field, use_upsert): + def test_issue_30607(self, par_key_field, use_upsert): """ Method: 1. create a collection with partition key on collection schema with customized num_partitions @@ -50,27 +49,30 @@ def test_issue_30607(self, par_key_field, index_on_par_key_field, use_upsert): num_entities = collection_w.num_entities # build index collection_w.create_index(field_name=vector_field.name, index_params=ct.default_index) - if index_on_par_key_field: - collection_w.create_index(field_name=par_key_field, index_params={}) - # load - collection_w.load() - # verify the partition key values are bashed correctly - seeds = 200 - rand_ids = random.sample(range(0, num_entities), seeds) - rand_ids = [str(rand_ids[i]) for i in range(len(rand_ids))] - res = collection_w.query(expr=f"pk in {rand_ids}", output_fields=["pk", par_key_field]) - # verify every the random id exists - assert len(res) == len(rand_ids) + for index_on_par_key_field in [False, True]: + collection_w.release() + if index_on_par_key_field: + collection_w.create_index(field_name=par_key_field, index_params={}) + # load + collection_w.load() - dirty_count = 0 - for i in range(len(res)): - pk = res[i].get("pk") - parkey_value = res[i].get(par_key_field) - res_parkey = collection_w.query(expr=f"{par_key_field}=={parkey_value} and pk=='{pk}'", - output_fields=["pk", par_key_field]) - if len(res_parkey) != 1: - log.info(f"dirty data found: pk {pk} with parkey {parkey_value}") - dirty_count += 1 - assert dirty_count == 0 - log.info(f"check randomly {seeds}/{num_entities}, dirty count={dirty_count}") \ No newline at end of file + # verify the partition key values are bashed correctly + seeds = 200 + rand_ids = random.sample(range(0, num_entities), seeds) + rand_ids = [str(rand_ids[i]) for i in range(len(rand_ids))] + res = collection_w.query(expr=f"pk in {rand_ids}", output_fields=["pk", par_key_field]) + # verify every the random id exists + assert len(res) == len(rand_ids) + + dirty_count = 0 + for i in range(len(res)): + pk = res[i].get("pk") + parkey_value = res[i].get(par_key_field) + res_parkey = collection_w.query(expr=f"{par_key_field}=={parkey_value} and pk=='{pk}'", + output_fields=["pk", par_key_field]) + if len(res_parkey) != 1: + log.info(f"dirty data found: pk {pk} with parkey {parkey_value}") + dirty_count += 1 + assert dirty_count == 0 + log.info(f"check randomly {seeds}/{num_entities}, dirty count={dirty_count}") \ No newline at end of file diff --git a/tests/python_client/testcases/test_search.py b/tests/python_client/testcases/test_search.py index 3fb595afc1c58..829f3b7f86396 100644 --- a/tests/python_client/testcases/test_search.py +++ b/tests/python_client/testcases/test_search.py @@ -10097,7 +10097,6 @@ def test_search_group_by_default(self, index_type, metric, vector_data_type): collection_w.flush() collection_w.create_index(ct.default_float_vec_field_name, index_params=_index_params) - # time.sleep(10) collection_w.load() search_params = {"metric_type": metric, "params": {"ef": 128}} @@ -10214,7 +10213,6 @@ def test_search_group_by_with_field_indexed(self, grpby_field): collection_w.flush() collection_w.create_index(ct.default_float_vec_field_name, index_params=_index) collection_w.create_index(grpby_field) - time.sleep(30) collection_w.load() search_params = {"metric_type": metric, "params": {"ef": 128}} @@ -10507,7 +10505,6 @@ def test_range_search_not_support_group_by(self): collection_w.flush() collection_w.create_index(ct.default_float_vec_field_name, index_params=_index) - time.sleep(10) collection_w.load() nq = 1 diff --git a/tests/python_client/utils/util_birdwatcher.py b/tests/python_client/utils/util_birdwatcher.py new file mode 100644 index 0000000000000..b7c4abe405af1 --- /dev/null +++ b/tests/python_client/utils/util_birdwatcher.py @@ -0,0 +1,79 @@ +import os +import re +from utils.util_log import test_log as log + + +def extraction_all_data(text): + # Patterns to handle the specifics of each key-value line + patterns = { + 'Segment ID': r"Segment ID:\s*(\d+)", + 'Segment State': r"Segment State:\s*(\w+)", + 'Collection ID': r"Collection ID:\s*(\d+)", + 'PartitionID': r"PartitionID:\s*(\d+)", + 'Insert Channel': r"Insert Channel:(.+)", + 'Num of Rows': r"Num of Rows:\s*(\d+)", + 'Max Row Num': r"Max Row Num:\s*(\d+)", + 'Last Expire Time': r"Last Expire Time:\s*(.+)", + 'Compact from': r"Compact from:\s*(\[\])", + 'Start Position ID': r"Start Position ID:\s*(\[[\d\s]+\])", + 'Start Position Time': r"Start Position ID:.*time:\s*(.+),", + 'Start Channel Name': r"channel name:\s*([^,\n]+)", + 'Dml Position ID': r"Dml Position ID:\s*(\[[\d\s]+\])", + 'Dml Position Time': r"Dml Position ID:.*time:\s*(.+),", + 'Dml Channel Name': r"channel name:\s*(.+)", + 'Binlog Nums': r"Binlog Nums:\s*(\d+)", + 'StatsLog Nums': r"StatsLog Nums:\s*(\d+)", + 'DeltaLog Nums': r"DeltaLog Nums:\s*(\d+)" + } + + refined_data = {} + for key, pattern in patterns.items(): + match = re.search(pattern, text) + if match: + refined_data[key] = match.group(1).strip() + + return refined_data + + +class BirdWatcher: + """ + + birdwatcher is a cli tool to get information about milvus + the command: + show segment info + """ + + def __init__(self, etcd_endpoints, root_path): + self.prefix = f"birdwatcher --olc=\"#connect --etcd {etcd_endpoints} --rootPath={root_path}," + + def parse_segment_info(self, output): + splitter = output.strip().split('\n')[0] + segments = output.strip().split(splitter) + segments = [segment for segment in segments if segment.strip()] + + # Parse all segments + parsed_segments = [extraction_all_data(segment) for segment in segments] + parsed_segments = [segment for segment in parsed_segments if segment] + return parsed_segments + + def show_segment_info(self, collection_id=None): + cmd = f"{self.prefix} show segment info --format table\"" + if collection_id: + cmd = f"{self.prefix} show segment info --collection {collection_id} --format table\"" + log.info(f"cmd: {cmd}") + output = os.popen(cmd).read() + # log.info(f"{cmd} output: {output}") + output = self.parse_segment_info(output) + for segment in output: + log.info(segment) + seg_res = {} + for segment in output: + seg_res[segment['Segment ID']] = segment + return seg_res + + +if __name__ == "__main__": + birdwatcher = BirdWatcher("10.104.18.24:2379", "rg-test-613938") + res = birdwatcher.show_segment_info() + print(res) + diff --git a/tests/python_client/utils/util_k8s.py b/tests/python_client/utils/util_k8s.py index ffaba8bcc1ff0..b514e3444c551 100644 --- a/tests/python_client/utils/util_k8s.py +++ b/tests/python_client/utils/util_k8s.py @@ -452,6 +452,8 @@ def record_time_when_standby_activated(namespace, release_name, coord_type, time log.info(f"Standby {coord_type} pod does not switch standby mode") + + if __name__ == '__main__': label = "app.kubernetes.io/name=milvus, component=querynode" instance_name = get_milvus_instance_name("chaos-testing", "10.96.250.111") diff --git a/tests/restful_client_v2/api/milvus.py b/tests/restful_client_v2/api/milvus.py index 5d9d9f72e79ea..76807a4d36b37 100644 --- a/tests/restful_client_v2/api/milvus.py +++ b/tests/restful_client_v2/api/milvus.py @@ -15,7 +15,7 @@ def logger_request_response(response, url, tt, headers, data, str_data, str_resp data = data[:1000] + "..." + data[-1000:] try: if response.status_code == 200: - if ('code' in response.json() and response.json()["code"] == 200) or ( + if ('code' in response.json() and response.json()["code"] == 0) or ( 'Code' in response.json() and response.json()["Code"] == 0): logger.debug( f"\nmethod: {method}, \nurl: {url}, \ncost time: {tt}, \nheader: {headers}, \npayload: {str_data}, \nresponse: {str_response}") @@ -612,7 +612,7 @@ def role_create(self, payload): url = f'{self.endpoint}/v2/vectordb/roles/create' response = self.post(url, headers=self.update_headers(), data=payload) res = response.json() - if res["code"] == 200: + if res["code"] == 0: self.role_names.append(payload["roleName"]) return res diff --git a/tests/restful_client_v2/base/testbase.py b/tests/restful_client_v2/base/testbase.py index 7452058d61890..a47239ae96102 100644 --- a/tests/restful_client_v2/base/testbase.py +++ b/tests/restful_client_v2/base/testbase.py @@ -80,7 +80,7 @@ def init_collection(self, collection_name, pk_field="id", metric_type="L2", dim= "vectorField": "vector", } rsp = self.collection_client.collection_create(schema_payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 self.wait_collection_load_completed(collection_name) batch_size = batch_size batch = nb // batch_size @@ -97,7 +97,7 @@ def init_collection(self, collection_name, pk_field="id", metric_type="L2", dim= body_size = sys.getsizeof(json.dumps(payload)) logger.debug(f"body size: {body_size / 1024 / 1024} MB") rsp = self.vector_client.vector_insert(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 if return_insert_id: insert_ids.extend(rsp['data']['insertIds']) # insert remainder data @@ -109,7 +109,7 @@ def init_collection(self, collection_name, pk_field="id", metric_type="L2", dim= "data": data } rsp = self.vector_client.vector_insert(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 if return_insert_id: insert_ids.extend(rsp['data']['insertIds']) if return_insert_id: diff --git a/tests/restful_client_v2/testcases/test_alias_operation.py b/tests/restful_client_v2/testcases/test_alias_operation.py index 75b47ef498a8f..3919defa499f7 100644 --- a/tests/restful_client_v2/testcases/test_alias_operation.py +++ b/tests/restful_client_v2/testcases/test_alias_operation.py @@ -38,7 +38,7 @@ def test_alias_e2e(self): "aliasName": alias_name } rsp = self.alias_client.create_alias(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 # list alias after create rsp = self.alias_client.list_alias() assert alias_name in rsp['data'] diff --git a/tests/restful_client_v2/testcases/test_collection_operations.py b/tests/restful_client_v2/testcases/test_collection_operations.py index 8314bd40fe81c..a8f96808a89eb 100644 --- a/tests/restful_client_v2/testcases/test_collection_operations.py +++ b/tests/restful_client_v2/testcases/test_collection_operations.py @@ -33,14 +33,14 @@ def test_create_collections_quick_setup(self, dim): } logging.info(f"create collection {name} with payload: {payload}") rsp = client.collection_create(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 rsp = client.collection_list() all_collections = rsp['data'] assert name in all_collections # describe collection rsp = client.collection_describe(name) - assert rsp['code'] == 200 + assert rsp['code'] == 0 assert rsp['data']['collectionName'] == name assert rsp['data']['autoId'] is False assert rsp['data']['enableDynamicField'] is True @@ -68,10 +68,10 @@ def test_create_collection_quick_setup_with_custom(self, vector_field, primary_f if id_type == "VarChar": collection_payload["params"] = {"max_length": "256"} rsp = self.collection_client.collection_create(collection_payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 rsp = self.collection_client.collection_describe(name) logger.info(f"rsp: {rsp}") - assert rsp['code'] == 200 + assert rsp['code'] == 0 assert rsp['data']['collectionName'] == name fields = [f["name"] for f in rsp['data']['fields']] assert primary_field in fields @@ -113,7 +113,7 @@ def test_create_collections_without_params(self, enable_dynamic_field, request_s logging.info(f"create collection {name} with payload: {payload}") rsp = client.collection_create(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 rsp = client.collection_list() all_collections = rsp['data'] @@ -131,7 +131,7 @@ def test_create_collections_without_params(self, enable_dynamic_field, request_s for d in rsp["data"]["properties"]: if d["key"] == "collection.ttl.seconds": ttl_seconds_actual = int(d["value"]) - assert rsp['code'] == 200 + assert rsp['code'] == 0 assert rsp['data']['enableDynamicField'] == False assert rsp['data']['collectionName'] == name assert rsp['data']['shardsNum'] == num_shards @@ -178,7 +178,7 @@ def test_create_collections_with_all_params(self): logging.info(f"create collection {name} with payload: {payload}") rsp = client.collection_create(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 rsp = client.collection_list() all_collections = rsp['data'] @@ -196,7 +196,7 @@ def test_create_collections_with_all_params(self): for d in rsp["data"]["properties"]: if d["key"] == "collection.ttl.seconds": ttl_seconds_actual = int(d["value"]) - assert rsp['code'] == 200 + assert rsp['code'] == 0 assert rsp['data']['collectionName'] == name assert rsp['data']['shardsNum'] == num_shards assert rsp['data']['partitionsNum'] == num_partitions @@ -235,7 +235,7 @@ def test_create_collections_custom_without_index(self, dim, auto_id, enable_dyna } logging.info(f"create collection {name} with payload: {payload}") rsp = client.collection_create(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 rsp = client.collection_list() all_collections = rsp['data'] @@ -244,7 +244,7 @@ def test_create_collections_custom_without_index(self, dim, auto_id, enable_dyna logger.info(f"schema: {c.schema}") # describe collection rsp = client.collection_describe(name) - assert rsp['code'] == 200 + assert rsp['code'] == 0 assert rsp['data']['collectionName'] == name assert rsp['data']['autoId'] == auto_id assert c.schema.auto_id == auto_id @@ -288,7 +288,7 @@ def test_create_collections_one_float_vector_with_index(self, dim, metric_type): } logging.info(f"create collection {name} with payload: {payload}") rsp = client.collection_create(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 rsp = client.collection_list() all_collections = rsp['data'] @@ -296,7 +296,7 @@ def test_create_collections_one_float_vector_with_index(self, dim, metric_type): # describe collection time.sleep(10) rsp = client.collection_describe(name) - assert rsp['code'] == 200 + assert rsp['code'] == 0 assert rsp['data']['collectionName'] == name # assert index created indexes = rsp['data']['indexes'] @@ -339,7 +339,7 @@ def test_create_collections_multi_float_vector_with_one_index(self, dim, metric_ # describe collection time.sleep(10) rsp = client.collection_describe(name) - assert rsp['code'] == 200 + assert rsp['code'] == 0 assert rsp['data']['collectionName'] == name # assert index created indexes = rsp['data']['indexes'] @@ -375,7 +375,7 @@ def test_create_collections_multi_float_vector_with_all_index(self, dim, metric_ } logging.info(f"create collection {name} with payload: {payload}") rsp = client.collection_create(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 rsp = client.collection_list() all_collections = rsp['data'] @@ -383,7 +383,7 @@ def test_create_collections_multi_float_vector_with_all_index(self, dim, metric_ # describe collection time.sleep(10) rsp = client.collection_describe(name) - assert rsp['code'] == 200 + assert rsp['code'] == 0 assert rsp['data']['collectionName'] == name # assert index created indexes = rsp['data']['indexes'] @@ -426,7 +426,7 @@ def test_create_collections_float16_vector_datatype(self, dim, auto_id, enable_d } logging.info(f"create collection {name} with payload: {payload}") rsp = client.collection_create(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 rsp = client.collection_list() all_collections = rsp['data'] @@ -435,7 +435,7 @@ def test_create_collections_float16_vector_datatype(self, dim, auto_id, enable_d logger.info(f"schema: {c.schema}") # describe collection rsp = client.collection_describe(name) - assert rsp['code'] == 200 + assert rsp['code'] == 0 assert rsp['data']['collectionName'] == name assert len(rsp['data']['fields']) == len(c.schema.fields) @@ -472,7 +472,7 @@ def test_create_collections_binary_vector_datatype(self, dim, auto_id, enable_dy } logging.info(f"create collection {name} with payload: {payload}") rsp = client.collection_create(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 rsp = client.collection_list() all_collections = rsp['data'] @@ -481,7 +481,7 @@ def test_create_collections_binary_vector_datatype(self, dim, auto_id, enable_dy logger.info(f"schema: {c.schema}") # describe collection rsp = client.collection_describe(name) - assert rsp['code'] == 200 + assert rsp['code'] == 0 assert rsp['data']['collectionName'] == name assert len(rsp['data']['fields']) == len(c.schema.fields) @@ -518,7 +518,7 @@ def create_collection(c_name, vector_dim, c_metric_type): time.sleep(10) success_cnt = 0 for rsp in concurrent_rsp: - if rsp["code"] == 200: + if rsp['code'] == 0: success_cnt += 1 logger.info(concurrent_rsp) assert success_cnt == 10 @@ -527,7 +527,7 @@ def create_collection(c_name, vector_dim, c_metric_type): assert name in all_collections # describe collection rsp = client.collection_describe(name) - assert rsp['code'] == 200 + assert rsp['code'] == 0 assert rsp['data']['collectionName'] == name def test_create_collections_concurrent_with_different_param(self): @@ -565,7 +565,7 @@ def create_collection(c_name, vector_dim, c_metric_type): time.sleep(10) success_cnt = 0 for rsp in concurrent_rsp: - if rsp["code"] == 200: + if rsp['code'] == 0: success_cnt += 1 logger.info(concurrent_rsp) assert success_cnt == 1 @@ -574,7 +574,7 @@ def create_collection(c_name, vector_dim, c_metric_type): assert name in all_collections # describe collection rsp = client.collection_describe(name) - assert rsp['code'] == 200 + assert rsp['code'] == 0 assert rsp['data']['collectionName'] == name @@ -659,7 +659,7 @@ def test_has_collections_default(self): } time.sleep(1) rsp = client.collection_create(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 name_list.append(name) rsp = client.collection_list() all_collections = rsp['data'] @@ -706,11 +706,11 @@ def test_get_collections_stats(self): } time.sleep(1) rsp = client.collection_create(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 # describe collection client.collection_describe(collection_name=name) rsp = client.collection_stats(collection_name=name) - assert rsp['code'] == 200 + assert rsp['code'] == 0 assert rsp['data']['rowCount'] == 0 # insert data nb = 3000 @@ -746,7 +746,7 @@ def test_load_and_release_collection(self): } } rsp = client.collection_create(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 # create index before load index_params = [{"fieldName": "book_intro", "indexName": "book_intro_vector", "metricType": "L2"}] payload = { @@ -762,7 +762,7 @@ def test_load_and_release_collection(self): # describe collection client.collection_describe(collection_name=name) rsp = client.collection_load(collection_name=name) - assert rsp['code'] == 200 + assert rsp['code'] == 0 rsp = client.collection_load_state(collection_name=name) assert rsp['data']['loadState'] in ["LoadStateLoaded", "LoadStateLoading"] time.sleep(5) @@ -793,11 +793,11 @@ def test_get_collection_load_state(self): "dimension": dim, } rsp = client.collection_create(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 # describe collection client.collection_describe(collection_name=name) rsp = client.collection_load_state(collection_name=name) - assert rsp['code'] == 200 + assert rsp['code'] == 0 assert rsp['data']['loadState'] in ["LoadStateNotLoad", "LoadStateLoading"] # insert data nb = 3000 @@ -835,7 +835,7 @@ def test_list_collections_default(self): } time.sleep(1) rsp = client.collection_create(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 name_list.append(name) rsp = client.collection_list() all_collections = rsp['data'] @@ -863,7 +863,7 @@ def test_list_collections_with_invalid_api_key(self): } time.sleep(1) rsp = client.collection_create(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 name_list.append(name) client = self.collection_client client.api_key = "illegal_api_key" @@ -889,13 +889,13 @@ def test_describe_collections_default(self): "metricType": "L2" } rsp = client.collection_create(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 rsp = client.collection_list() all_collections = rsp['data'] assert name in all_collections # describe collection rsp = client.collection_describe(name) - assert rsp['code'] == 200 + assert rsp['code'] == 0 assert rsp['data']['collectionName'] == name assert rsp['data']['autoId'] is False assert rsp['data']['enableDynamicField'] is True @@ -936,7 +936,7 @@ def test_describe_collections_custom(self): assert name in all_collections # describe collection rsp = client.collection_describe(name) - assert rsp['code'] == 200 + assert rsp['code'] == 0 assert rsp['data']['collectionName'] == name for field in rsp['data']['fields']: @@ -964,7 +964,7 @@ def test_describe_collections_with_invalid_api_key(self): "dimension": dim, } rsp = client.collection_create(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 rsp = client.collection_list() all_collections = rsp['data'] assert name in all_collections @@ -987,7 +987,7 @@ def test_describe_collections_with_invalid_collection_name(self): "dimension": dim, } rsp = client.collection_create(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 rsp = client.collection_list() all_collections = rsp['data'] assert name in all_collections @@ -1017,7 +1017,7 @@ def test_drop_collections_default(self): "metricType": "L2" } rsp = self.collection_client.collection_create(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 clo_list.append(name) rsp = self.collection_client.collection_list() all_collections = rsp['data'] @@ -1029,7 +1029,7 @@ def test_drop_collections_default(self): "collectionName": name, } rsp = self.collection_client.collection_drop(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 rsp = self.collection_client.collection_list() all_collections = rsp['data'] for name in clo_list: @@ -1052,7 +1052,7 @@ def test_drop_collections_with_invalid_api_key(self): "dimension": dim, } rsp = client.collection_create(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 rsp = client.collection_list() all_collections = rsp['data'] assert name in all_collections @@ -1081,7 +1081,7 @@ def test_drop_collections_with_invalid_collection_name(self): "dimension": dim, } rsp = client.collection_create(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 rsp = client.collection_list() all_collections = rsp['data'] assert name in all_collections @@ -1091,7 +1091,7 @@ def test_drop_collections_with_invalid_collection_name(self): "collectionName": invalid_name, } rsp = client.collection_drop(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 @pytest.mark.L0 @@ -1112,7 +1112,7 @@ def test_rename_collection(self): "dimension": dim, } rsp = client.collection_create(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 rsp = client.collection_list() all_collections = rsp['data'] assert name in all_collections @@ -1122,7 +1122,7 @@ def test_rename_collection(self): "newCollectionName": new_name, } rsp = client.collection_rename(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 rsp = client.collection_list() all_collections = rsp['data'] assert new_name in all_collections diff --git a/tests/restful_client_v2/testcases/test_index_operation.py b/tests/restful_client_v2/testcases/test_index_operation.py index fbccc84250efb..534684c9bfbdf 100644 --- a/tests/restful_client_v2/testcases/test_index_operation.py +++ b/tests/restful_client_v2/testcases/test_index_operation.py @@ -76,13 +76,13 @@ def test_index_e2e(self, dim, metric_type, index_type): if index_type == "AUTOINDEX": payload["indexParams"][0]["params"] = {"index_type": "AUTOINDEX"} rsp = self.index_client.index_create(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 time.sleep(10) # list index, expect not empty rsp = self.index_client.index_list(collection_name=name) # describe index rsp = self.index_client.index_describe(collection_name=name, index_name="book_intro_vector") - assert rsp['code'] == 200 + assert rsp['code'] == 0 assert len(rsp['data']) == len(payload['indexParams']) expected_index = sorted(payload['indexParams'], key=lambda x: x['fieldName']) actual_index = sorted(rsp['data'], key=lambda x: x['fieldName']) @@ -99,7 +99,7 @@ def test_index_e2e(self, dim, metric_type, index_type): "indexName": actual_index[i]['indexName'] } rsp = self.index_client.index_drop(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 # list index, expect empty rsp = self.index_client.index_list(collection_name=name) assert rsp['data'] == [] @@ -156,13 +156,13 @@ def test_index_for_scalar_field(self, dim, index_type): "params": {"index_type": "INVERTED"}}] } rsp = self.index_client.index_create(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 time.sleep(10) # list index, expect not empty rsp = self.index_client.index_list(collection_name=name) # describe index rsp = self.index_client.index_describe(collection_name=name, index_name="word_count_vector") - assert rsp['code'] == 200 + assert rsp['code'] == 0 assert len(rsp['data']) == len(payload['indexParams']) expected_index = sorted(payload['indexParams'], key=lambda x: x['fieldName']) actual_index = sorted(rsp['data'], key=lambda x: x['fieldName']) @@ -226,13 +226,13 @@ def test_index_for_binary_vector_field(self, dim, metric_type, index_type): if index_type == "BIN_IVF_FLAT": payload["indexParams"][0]["params"]["nlist"] = "16384" rsp = self.index_client.index_create(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 time.sleep(10) # list index, expect not empty rsp = self.index_client.index_list(collection_name=name) # describe index rsp = self.index_client.index_describe(collection_name=name, index_name=index_name) - assert rsp['code'] == 200 + assert rsp['code'] == 0 assert len(rsp['data']) == len(payload['indexParams']) expected_index = sorted(payload['indexParams'], key=lambda x: x['fieldName']) actual_index = sorted(rsp['data'], key=lambda x: x['fieldName']) diff --git a/tests/restful_client_v2/testcases/test_jobs_operation.py b/tests/restful_client_v2/testcases/test_jobs_operation.py index 3e0afbd5ec774..c651463efaab1 100644 --- a/tests/restful_client_v2/testcases/test_jobs_operation.py +++ b/tests/restful_client_v2/testcases/test_jobs_operation.py @@ -114,7 +114,7 @@ def test_job_e2e(self, insert_num, import_task_num, auto_id, is_partition_key, e "outputFields": ["*"], } rsp = self.vector_client.vector_query(payload) - assert rsp["code"] == 200 + assert rsp['code'] == 0 @pytest.mark.parametrize("insert_num", [5000]) @pytest.mark.parametrize("import_task_num", [1]) @@ -205,7 +205,7 @@ def test_import_job_with_db(self, insert_num, import_task_num, auto_id, is_parti "outputFields": ["*"], } rsp = self.vector_client.vector_query(payload) - assert rsp["code"] == 200 + assert rsp['code'] == 0 @pytest.mark.parametrize("insert_num", [5000]) @pytest.mark.parametrize("import_task_num", [1]) @@ -306,7 +306,7 @@ def test_import_job_with_partition(self, insert_num, import_task_num, auto_id, i "outputFields": ["*"], } rsp = self.vector_client.vector_query(payload) - assert rsp["code"] == 200 + assert rsp['code'] == 0 def test_job_import_multi_json_file(self): # create collection @@ -720,15 +720,15 @@ def test_job_import_binlog_file_type(self, nb, dim, insert_round, auto_id, ] } rsp = self.collection_client.collection_create(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 # create restore collection restore_collection_name = f"{name}_restore" payload["collectionName"] = restore_collection_name rsp = self.collection_client.collection_create(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 rsp = self.collection_client.collection_describe(name) logger.info(f"rsp: {rsp}") - assert rsp['code'] == 200 + assert rsp['code'] == 0 # insert data for i in range(insert_round): data = [] @@ -772,7 +772,7 @@ def test_job_import_binlog_file_type(self, nb, dim, insert_round, auto_id, "data": data, } rsp = self.vector_client.vector_insert(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 assert rsp['data']['insertCount'] == nb # flush data to generate binlog file c = Collection(name) @@ -781,7 +781,7 @@ def test_job_import_binlog_file_type(self, nb, dim, insert_round, auto_id, # query data to make sure the data is inserted rsp = self.vector_client.vector_query({"collectionName": name, "filter": "user_id > 0", "limit": 50}) - assert rsp['code'] == 200 + assert rsp['code'] == 0 assert len(rsp['data']) == 50 # get collection id c = Collection(name) @@ -802,7 +802,7 @@ def test_job_import_binlog_file_type(self, nb, dim, insert_round, auto_id, if is_partition_key: payload["partitionName"] = "_default_0" rsp = self.import_job_client.create_import_jobs(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 # list import job payload = { "collectionName": restore_collection_name, @@ -1528,7 +1528,7 @@ def test_get_job_progress_with_mismatch_db_name(self, insert_num, import_task_nu "outputFields": ["*"], } rsp = self.vector_client.vector_query(payload) - assert rsp["code"] == 200 + assert rsp['code'] == 0 @pytest.mark.L1 diff --git a/tests/restful_client_v2/testcases/test_partition_operation.py b/tests/restful_client_v2/testcases/test_partition_operation.py index 13022895541d1..44717b5686c3b 100644 --- a/tests/restful_client_v2/testcases/test_partition_operation.py +++ b/tests/restful_client_v2/testcases/test_partition_operation.py @@ -36,13 +36,13 @@ def test_partition_e2e(self): {"fieldName": "book_intro", "indexName": "book_intro_vector", "metricType": f"{metric_type}"}] } rsp = self.collection_client.collection_create(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 rsp = client.collection_list() all_collections = rsp['data'] assert name in all_collections # describe collection rsp = client.collection_describe(name) - assert rsp['code'] == 200 + assert rsp['code'] == 0 assert rsp['data']['collectionName'] == name # insert data to default partition data = [] @@ -59,11 +59,11 @@ def test_partition_e2e(self): "data": data, } rsp = self.vector_client.vector_insert(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 # create partition partition_name = "test_partition" rsp = self.partition_client.partition_create(collection_name=name, partition_name=partition_name) - assert rsp['code'] == 200 + assert rsp['code'] == 0 # insert data to partition data = [] for j in range(3000, 6000): @@ -80,45 +80,45 @@ def test_partition_e2e(self): "data": data, } rsp = self.vector_client.vector_insert(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 # create partition again rsp = self.partition_client.partition_create(collection_name=name, partition_name=partition_name) # list partitions rsp = self.partition_client.partition_list(collection_name=name) - assert rsp['code'] == 200 + assert rsp['code'] == 0 assert partition_name in rsp['data'] # has partition rsp = self.partition_client.partition_has(collection_name=name, partition_name=partition_name) - assert rsp['code'] == 200 + assert rsp['code'] == 0 assert rsp['data']["has"] is True # flush and get partition statistics c = Collection(name=name) c.flush() rsp = self.partition_client.partition_stats(collection_name=name, partition_name=partition_name) - assert rsp['code'] == 200 + assert rsp['code'] == 0 assert rsp['data']['rowCount'] == 3000 # release partition rsp = self.partition_client.partition_release(collection_name=name, partition_names=[partition_name]) - assert rsp['code'] == 200 + assert rsp['code'] == 0 # release partition again rsp = self.partition_client.partition_release(collection_name=name, partition_names=[partition_name]) - assert rsp['code'] == 200 + assert rsp['code'] == 0 # load partition rsp = self.partition_client.partition_load(collection_name=name, partition_names=[partition_name]) - assert rsp['code'] == 200 + assert rsp['code'] == 0 # load partition again rsp = self.partition_client.partition_load(collection_name=name, partition_names=[partition_name]) - assert rsp['code'] == 200 + assert rsp['code'] == 0 # drop partition when it is loaded rsp = self.partition_client.partition_drop(collection_name=name, partition_name=partition_name) assert rsp['code'] == 65535 # drop partition after release rsp = self.partition_client.partition_release(collection_name=name, partition_names=[partition_name]) rsp = self.partition_client.partition_drop(collection_name=name, partition_name=partition_name) - assert rsp['code'] == 200 + assert rsp['code'] == 0 # has partition rsp = self.partition_client.partition_has(collection_name=name, partition_name=partition_name) - assert rsp['code'] == 200 + assert rsp['code'] == 0 assert rsp['data']["has"] is False diff --git a/tests/restful_client_v2/testcases/test_restful_sdk_mix_use_scenario.py b/tests/restful_client_v2/testcases/test_restful_sdk_mix_use_scenario.py index 97a862248a551..ab7e5a28b7bab 100644 --- a/tests/restful_client_v2/testcases/test_restful_sdk_mix_use_scenario.py +++ b/tests/restful_client_v2/testcases/test_restful_sdk_mix_use_scenario.py @@ -37,7 +37,7 @@ def test_collection_created_by_sdk_describe_by_restful(self, dim, enable_dynamic all_collections = rsp['data'] assert name in all_collections rsp = client.collection_describe(name) - assert rsp['code'] == 200 + assert rsp['code'] == 0 assert rsp['data']['collectionName'] == name assert rsp['data']['enableDynamicField'] == enable_dynamic assert rsp['data']['load'] == "LoadStateNotLoad" @@ -57,7 +57,7 @@ def test_collection_created_by_restful_describe_by_sdk(self, dim, metric_type): "metricType": metric_type, } rsp = client.collection_create(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 collection = Collection(name=name) logger.info(collection.schema) field_names = [field.name for field in collection.schema.fields] @@ -89,7 +89,7 @@ def test_collection_created_index_by_sdk_describe_by_restful(self, metric_type): all_collections = rsp['data'] assert name in all_collections rsp = client.collection_describe(name) - assert rsp['code'] == 200 + assert rsp['code'] == 0 assert rsp['data']['collectionName'] == name assert len(rsp['data']['indexes']) == 1 and rsp['data']['indexes'][0]['metricType'] == metric_type @@ -160,7 +160,7 @@ def test_collection_create_by_sdk_insert_vector_by_restful(self): "data": data, } rsp = client.vector_insert(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 assert rsp['data']['insertCount'] == nb assert len(rsp['data']["insertIds"]) == nb @@ -196,7 +196,7 @@ def test_collection_create_by_sdk_search_vector_by_restful(self): } # search data by restful rsp = client.vector_search(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 assert len(rsp['data']) == 10 def test_collection_create_by_sdk_query_vector_by_restful(self): @@ -230,7 +230,7 @@ def test_collection_create_by_sdk_query_vector_by_restful(self): } # query data by restful rsp = client.vector_query(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 assert len(rsp['data']) == 10 def test_collection_create_by_restful_search_vector_by_sdk(self): diff --git a/tests/restful_client_v2/testcases/test_role_operation.py b/tests/restful_client_v2/testcases/test_role_operation.py index 63b4acae2da42..9ad8049a65ce7 100644 --- a/tests/restful_client_v2/testcases/test_role_operation.py +++ b/tests/restful_client_v2/testcases/test_role_operation.py @@ -43,7 +43,7 @@ def test_role_e2e(self): assert role_name in rsp['data'] # describe role rsp = self.role_client.role_describe(role_name) - assert rsp['code'] == 200 + assert rsp['code'] == 0 # grant privilege to role payload = { "roleName": role_name, @@ -52,7 +52,7 @@ def test_role_e2e(self): "privilege": "CreateCollection" } rsp = self.role_client.role_grant(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 # describe role after grant rsp = self.role_client.role_describe(role_name) privileges = [] diff --git a/tests/restful_client_v2/testcases/test_user_operation.py b/tests/restful_client_v2/testcases/test_user_operation.py index 4491c2aec7db3..b3cc0e5b76ca0 100644 --- a/tests/restful_client_v2/testcases/test_user_operation.py +++ b/tests/restful_client_v2/testcases/test_user_operation.py @@ -56,7 +56,7 @@ def test_user_e2e(self): "newPassword": new_password } rsp = self.user_client.user_password_update(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 # drop user payload = { "userName": user_name @@ -124,7 +124,7 @@ def test_user_binding_role(self): } self.collection_client.api_key = f"{user_name}:{password}" rsp = self.collection_client.collection_create(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 @pytest.mark.L1 @@ -158,7 +158,7 @@ def test_create_user_twice(self): for i in range(2): rsp = self.user_client.user_create(payload) if i == 0: - assert rsp['code'] == 200 + assert rsp['code'] == 0 else: assert rsp['code'] == 65535 assert "user already exists" in rsp['message'] diff --git a/tests/restful_client_v2/testcases/test_vector_operations.py b/tests/restful_client_v2/testcases/test_vector_operations.py index d2b74552ff331..73d2a3b9a9911 100644 --- a/tests/restful_client_v2/testcases/test_vector_operations.py +++ b/tests/restful_client_v2/testcases/test_vector_operations.py @@ -33,10 +33,10 @@ def test_insert_entities_with_simple_payload(self, nb, dim, insert_round): "metricType": "L2" } rsp = self.collection_client.collection_create(collection_payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 rsp = self.collection_client.collection_describe(name) logger.info(f"rsp: {rsp}") - assert rsp['code'] == 200 + assert rsp['code'] == 0 # insert data for i in range(insert_round): data = get_data_by_payload(collection_payload, nb) @@ -47,7 +47,7 @@ def test_insert_entities_with_simple_payload(self, nb, dim, insert_round): body_size = sys.getsizeof(json.dumps(payload)) logger.info(f"body size: {body_size / 1024 / 1024} MB") rsp = self.vector_client.vector_insert(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 assert rsp['data']['insertCount'] == nb @pytest.mark.parametrize("insert_round", [1]) @@ -92,10 +92,10 @@ def test_insert_entities_with_all_scalar_datatype(self, nb, dim, insert_round, a ] } rsp = self.collection_client.collection_create(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 rsp = self.collection_client.collection_describe(name) logger.info(f"rsp: {rsp}") - assert rsp['code'] == 200 + assert rsp['code'] == 0 # insert data for i in range(insert_round): data = [] @@ -139,11 +139,11 @@ def test_insert_entities_with_all_scalar_datatype(self, nb, dim, insert_round, a "data": data, } rsp = self.vector_client.vector_insert(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 assert rsp['data']['insertCount'] == nb # query data to make sure the data is inserted rsp = self.vector_client.vector_query({"collectionName": name, "filter": "user_id > 0", "limit": 50}) - assert rsp['code'] == 200 + assert rsp['code'] == 0 assert len(rsp['data']) == 50 @pytest.mark.parametrize("insert_round", [1]) @@ -187,10 +187,10 @@ def test_insert_entities_with_all_vector_datatype(self, nb, dim, insert_round, a ] } rsp = self.collection_client.collection_create(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 rsp = self.collection_client.collection_describe(name) logger.info(f"rsp: {rsp}") - assert rsp['code'] == 200 + assert rsp['code'] == 0 # insert data for i in range(insert_round): data = [] @@ -224,7 +224,7 @@ def test_insert_entities_with_all_vector_datatype(self, nb, dim, insert_round, a "data": data, } rsp = self.vector_client.vector_insert(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 assert rsp['data']['insertCount'] == nb c = Collection(name) res = c.query( @@ -235,7 +235,7 @@ def test_insert_entities_with_all_vector_datatype(self, nb, dim, insert_round, a logger.info(f"res: {res}") # query data to make sure the data is inserted rsp = self.vector_client.vector_query({"collectionName": name, "filter": "user_id > 0", "limit": 50}) - assert rsp['code'] == 200 + assert rsp['code'] == 0 assert len(rsp['data']) == 50 @pytest.mark.parametrize("insert_round", [1]) @@ -280,10 +280,10 @@ def test_insert_entities_with_all_json_datatype(self, nb, dim, insert_round, aut ] } rsp = self.collection_client.collection_create(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 rsp = self.collection_client.collection_describe(name) logger.info(f"rsp: {rsp}") - assert rsp['code'] == 200 + assert rsp['code'] == 0 json_value = [ 1, 1.0, @@ -336,11 +336,11 @@ def test_insert_entities_with_all_json_datatype(self, nb, dim, insert_round, aut "data": data, } rsp = self.vector_client.vector_insert(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 assert rsp['data']['insertCount'] == nb # query data to make sure the data is inserted rsp = self.vector_client.vector_query({"collectionName": name, "filter": "user_id > 0", "limit": 50}) - assert rsp['code'] == 200 + assert rsp['code'] == 0 assert len(rsp['data']) == 50 @@ -359,9 +359,9 @@ def test_insert_vector_with_invalid_api_key(self): "dimension": dim, } rsp = self.collection_client.collection_create(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 rsp = self.collection_client.collection_describe(name) - assert rsp['code'] == 200 + assert rsp['code'] == 0 # insert data nb = 10 data = [ @@ -393,9 +393,9 @@ def test_insert_vector_with_invalid_collection_name(self): "dimension": dim, } rsp = self.collection_client.collection_create(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 rsp = self.collection_client.collection_describe(name) - assert rsp['code'] == 200 + assert rsp['code'] == 0 # insert data nb = 100 data = get_data_by_payload(payload, nb) @@ -421,9 +421,9 @@ def test_insert_vector_with_invalid_database_name(self): "dimension": dim, } rsp = self.collection_client.collection_create(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 rsp = self.collection_client.collection_describe(name) - assert rsp['code'] == 200 + assert rsp['code'] == 0 # insert data nb = 10 data = get_data_by_payload(payload, nb) @@ -449,9 +449,9 @@ def test_insert_vector_with_mismatch_dim(self): "dimension": dim, } rsp = self.collection_client.collection_create(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 rsp = self.collection_client.collection_describe(name) - assert rsp['code'] == 200 + assert rsp['code'] == 0 # insert data nb = 1 data = [ @@ -493,10 +493,10 @@ def test_upsert_vector_default(self, nb, dim, insert_round, id_type): "indexParams": [{"fieldName": "text_emb", "indexName": "text_emb_index", "metricType": "L2"}] } rsp = self.collection_client.collection_create(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 rsp = self.collection_client.collection_describe(name) logger.info(f"rsp: {rsp}") - assert rsp['code'] == 200 + assert rsp['code'] == 0 # insert data for i in range(insert_round): data = [] @@ -516,7 +516,7 @@ def test_upsert_vector_default(self, nb, dim, insert_round, id_type): body_size = sys.getsizeof(json.dumps(payload)) logger.info(f"body size: {body_size / 1024 / 1024} MB") rsp = self.vector_client.vector_insert(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 assert rsp['data']['insertCount'] == nb c = Collection(name) c.flush() @@ -575,10 +575,10 @@ def test_upsert_vector_pk_auto_id(self, nb, dim, insert_round, id_type): "indexParams": [{"fieldName": "text_emb", "indexName": "text_emb_index", "metricType": "L2"}] } rsp = self.collection_client.collection_create(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 rsp = self.collection_client.collection_describe(name) logger.info(f"rsp: {rsp}") - assert rsp['code'] == 200 + assert rsp['code'] == 0 ids = [] # insert data for i in range(insert_round): @@ -599,7 +599,7 @@ def test_upsert_vector_pk_auto_id(self, nb, dim, insert_round, id_type): body_size = sys.getsizeof(json.dumps(payload)) logger.info(f"body size: {body_size / 1024 / 1024} MB") rsp = self.vector_client.vector_insert(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 assert rsp['data']['insertCount'] == nb ids.extend(rsp['data']['insertIds']) c = Collection(name) @@ -682,10 +682,10 @@ def test_search_vector_with_all_vector_datatype(self, nb, dim, insert_round, aut ] } rsp = self.collection_client.collection_create(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 rsp = self.collection_client.collection_describe(name) logger.info(f"rsp: {rsp}") - assert rsp['code'] == 200 + assert rsp['code'] == 0 # insert data for i in range(insert_round): data = [] @@ -719,7 +719,7 @@ def test_search_vector_with_all_vector_datatype(self, nb, dim, insert_round, aut "data": data, } rsp = self.vector_client.vector_insert(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 assert rsp['data']['insertCount'] == nb # search data payload = { @@ -739,7 +739,7 @@ def test_search_vector_with_all_vector_datatype(self, nb, dim, insert_round, aut "limit": 100, } rsp = self.vector_client.vector_search(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 # assert no dup user_id user_ids = [r["user_id"]for r in rsp['data']] assert len(user_ids) == len(set(user_ids)) @@ -750,8 +750,9 @@ def test_search_vector_with_all_vector_datatype(self, nb, dim, insert_round, aut @pytest.mark.parametrize("enable_dynamic_schema", [True]) @pytest.mark.parametrize("nb", [3000]) @pytest.mark.parametrize("dim", [128]) + @pytest.mark.parametrize("nq", [1, 2]) def test_search_vector_with_float_vector_datatype(self, nb, dim, insert_round, auto_id, - is_partition_key, enable_dynamic_schema): + is_partition_key, enable_dynamic_schema, nq): """ Insert a vector with a simple payload """ @@ -776,10 +777,10 @@ def test_search_vector_with_float_vector_datatype(self, nb, dim, insert_round, a ] } rsp = self.collection_client.collection_create(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 rsp = self.collection_client.collection_describe(name) logger.info(f"rsp: {rsp}") - assert rsp['code'] == 200 + assert rsp['code'] == 0 # insert data for i in range(insert_round): data = [] @@ -807,12 +808,12 @@ def test_search_vector_with_float_vector_datatype(self, nb, dim, insert_round, a "data": data, } rsp = self.vector_client.vector_insert(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 assert rsp['data']['insertCount'] == nb # search data payload = { "collectionName": name, - "data": [gen_vector(datatype="FloatVector", dim=dim)], + "data": [gen_vector(datatype="FloatVector", dim=dim) for _ in range(nq)], "filter": "word_count > 100", "groupingField": "user_id", "outputFields": ["*"], @@ -826,19 +827,19 @@ def test_search_vector_with_float_vector_datatype(self, nb, dim, insert_round, a "limit": 100, } rsp = self.vector_client.vector_search(payload) - assert rsp['code'] == 200 - assert len(rsp['data']) == 100 + assert rsp['code'] == 0 + assert len(rsp['data']) == 100 * nq - @pytest.mark.parametrize("insert_round", [1]) - @pytest.mark.parametrize("auto_id", [True]) - @pytest.mark.parametrize("is_partition_key", [True]) + @pytest.mark.parametrize("insert_round", [1, 10]) + @pytest.mark.parametrize("auto_id", [True, False]) + @pytest.mark.parametrize("is_partition_key", [True, False]) @pytest.mark.parametrize("enable_dynamic_schema", [True]) @pytest.mark.parametrize("nb", [3000]) @pytest.mark.parametrize("dim", [128]) - @pytest.mark.xfail(reason="issue https://github.com/milvus-io/milvus/issues/32214") + @pytest.mark.parametrize("groupingField", ['user_id', None]) def test_search_vector_with_sparse_float_vector_datatype(self, nb, dim, insert_round, auto_id, - is_partition_key, enable_dynamic_schema): + is_partition_key, enable_dynamic_schema, groupingField): """ Insert a vector with a simple payload """ @@ -860,31 +861,32 @@ def test_search_vector_with_sparse_float_vector_datatype(self, nb, dim, insert_r }, "indexParams": [ {"fieldName": "sparse_float_vector", "indexName": "sparse_float_vector", "metricType": "IP", - "indexConfig": {"index_type": "SPARSE_INVERTED_INDEX", "drop_ratio_build": "0.2"}} + "params": {"index_type": "SPARSE_INVERTED_INDEX", "drop_ratio_build": "0.2"}} ] } rsp = self.collection_client.collection_create(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 rsp = self.collection_client.collection_describe(name) logger.info(f"rsp: {rsp}") - assert rsp['code'] == 200 + assert rsp['code'] == 0 # insert data for i in range(insert_round): data = [] - for i in range(nb): + for j in range(nb): + idx = i * nb + j if auto_id: tmp = { - "user_id": i%100, - "word_count": i, - "book_describe": f"book_{i}", + "user_id": idx%100, + "word_count": j, + "book_describe": f"book_{idx}", "sparse_float_vector": gen_vector(datatype="SparseFloatVector", dim=dim), } else: tmp = { - "book_id": i, - "user_id": i%100, - "word_count": i, - "book_describe": f"book_{i}", + "book_id": idx, + "user_id": idx%100, + "word_count": j, + "book_describe": f"book_{idx}", "sparse_float_vector": gen_vector(datatype="SparseFloatVector", dim=dim), } if enable_dynamic_schema: @@ -895,14 +897,13 @@ def test_search_vector_with_sparse_float_vector_datatype(self, nb, dim, insert_r "data": data, } rsp = self.vector_client.vector_insert(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 assert rsp['data']['insertCount'] == nb # search data payload = { "collectionName": name, "data": [gen_vector(datatype="SparseFloatVector", dim=dim)], "filter": "word_count > 100", - "groupingField": "user_id", "outputFields": ["*"], "searchParams": { "metricType": "IP", @@ -910,11 +911,12 @@ def test_search_vector_with_sparse_float_vector_datatype(self, nb, dim, insert_r "drop_ratio_search": "0.2", } }, - "limit": 100, + "limit": 500, } + if groupingField: + payload["groupingField"] = groupingField rsp = self.vector_client.vector_search(payload) - assert rsp['code'] == 200 - assert len(rsp['data']) == 100 + assert rsp['code'] == 0 @@ -951,10 +953,10 @@ def test_search_vector_with_binary_vector_datatype(self, nb, dim, insert_round, ] } rsp = self.collection_client.collection_create(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 rsp = self.collection_client.collection_describe(name) logger.info(f"rsp: {rsp}") - assert rsp['code'] == 200 + assert rsp['code'] == 0 # insert data for i in range(insert_round): data = [] @@ -982,7 +984,7 @@ def test_search_vector_with_binary_vector_datatype(self, nb, dim, insert_round, "data": data, } rsp = self.vector_client.vector_insert(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 assert rsp['data']['insertCount'] == nb # flush data c = Collection(name) @@ -1007,7 +1009,7 @@ def test_search_vector_with_binary_vector_datatype(self, nb, dim, insert_round, "limit": 100, } rsp = self.vector_client.vector_search(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 assert len(rsp['data']) == 100 @pytest.mark.parametrize("metric_type", ["IP", "L2", "COSINE"]) @@ -1027,7 +1029,7 @@ def test_search_vector_with_simple_payload(self, metric_type): "data": [vector_to_search], } rsp = self.vector_client.vector_search(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 res = rsp['data'] logger.info(f"res: {len(res)}") limit = int(payload.get("limit", 100)) @@ -1067,7 +1069,7 @@ def test_search_vector_with_exceed_sum_limit_offset(self, sum_limit_offset): if sum_limit_offset > max_search_sum_limit_offset: assert rsp['code'] == 65535 return - assert rsp['code'] == 200 + assert rsp['code'] == 0 res = rsp['data'] logger.info(f"res: {len(res)}") limit = int(payload.get("limit", 100)) @@ -1108,7 +1110,7 @@ def test_search_vector_with_complex_payload(self, limit, offset, metric_type): if offset + limit > constant.MAX_SUM_OFFSET_AND_LIMIT: assert rsp['code'] == 90126 return - assert rsp['code'] == 200 + assert rsp['code'] == 0 res = rsp['data'] logger.info(f"res: {len(res)}") assert len(res) == limit @@ -1141,7 +1143,7 @@ def test_search_vector_with_complex_int_filter(self, filter_expr): "offset": 0, } rsp = self.vector_client.vector_search(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 res = rsp['data'] logger.info(f"res: {len(res)}") assert len(res) <= limit @@ -1182,7 +1184,7 @@ def test_search_vector_with_complex_varchar_filter(self, filter_expr): "offset": 0, } rsp = self.vector_client.vector_search(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 res = rsp['data'] logger.info(f"res: {len(res)}") assert len(res) <= limit @@ -1229,7 +1231,7 @@ def test_search_vector_with_complex_int64_varchar_and_filter(self, filter_expr): "offset": 0, } rsp = self.vector_client.vector_search(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 res = rsp['data'] logger.info(f"res: {len(res)}") assert len(res) <= limit @@ -1354,10 +1356,10 @@ def test_advanced_search_vector_with_multi_float32_vector_datatype(self, nb, dim ] } rsp = self.collection_client.collection_create(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 rsp = self.collection_client.collection_describe(name) logger.info(f"rsp: {rsp}") - assert rsp['code'] == 200 + assert rsp['code'] == 0 # insert data for i in range(insert_round): data = [] @@ -1388,7 +1390,7 @@ def test_advanced_search_vector_with_multi_float32_vector_datatype(self, nb, dim "data": data, } rsp = self.vector_client.vector_insert(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 assert rsp['data']['insertCount'] == nb # advanced search data @@ -1419,7 +1421,7 @@ def test_advanced_search_vector_with_multi_float32_vector_datatype(self, nb, dim } rsp = self.vector_client.vector_advanced_search(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 assert len(rsp['data']) == 10 @@ -1462,10 +1464,10 @@ def test_hybrid_search_vector_with_multi_float32_vector_datatype(self, nb, dim, ] } rsp = self.collection_client.collection_create(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 rsp = self.collection_client.collection_describe(name) logger.info(f"rsp: {rsp}") - assert rsp['code'] == 200 + assert rsp['code'] == 0 # insert data for i in range(insert_round): data = [] @@ -1496,7 +1498,7 @@ def test_hybrid_search_vector_with_multi_float32_vector_datatype(self, nb, dim, "data": data, } rsp = self.vector_client.vector_insert(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 assert rsp['data']['insertCount'] == nb # advanced search data @@ -1527,7 +1529,7 @@ def test_hybrid_search_vector_with_multi_float32_vector_datatype(self, nb, dim, } rsp = self.vector_client.vector_hybrid_search(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 assert len(rsp['data']) == 10 @@ -1578,10 +1580,10 @@ def test_query_entities_with_all_scalar_datatype(self, nb, dim, insert_round, au ] } rsp = self.collection_client.collection_create(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 rsp = self.collection_client.collection_describe(name) logger.info(f"rsp: {rsp}") - assert rsp['code'] == 200 + assert rsp['code'] == 0 # insert data for i in range(insert_round): data = [] @@ -1625,7 +1627,7 @@ def test_query_entities_with_all_scalar_datatype(self, nb, dim, insert_round, au "data": data, } rsp = self.vector_client.vector_insert(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 assert rsp['data']['insertCount'] == nb # query data to make sure the data is inserted # 1. query for int64 @@ -1636,7 +1638,7 @@ def test_query_entities_with_all_scalar_datatype(self, nb, dim, insert_round, au "outputFields": ["*"] } rsp = self.vector_client.vector_query(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 assert len(rsp['data']) == 50 # 2. query for varchar @@ -1647,7 +1649,7 @@ def test_query_entities_with_all_scalar_datatype(self, nb, dim, insert_round, au "outputFields": ["*"] } rsp = self.vector_client.vector_query(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 assert len(rsp['data']) == 50 # 3. query for json @@ -1711,10 +1713,10 @@ def test_query_entities_with_all_vector_datatype(self, nb, dim, insert_round, au ] } rsp = self.collection_client.collection_create(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 rsp = self.collection_client.collection_describe(name) logger.info(f"rsp: {rsp}") - assert rsp['code'] == 200 + assert rsp['code'] == 0 # insert data for i in range(insert_round): data = [] @@ -1748,7 +1750,7 @@ def test_query_entities_with_all_vector_datatype(self, nb, dim, insert_round, au "data": data, } rsp = self.vector_client.vector_insert(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 assert rsp['data']['insertCount'] == nb c = Collection(name) res = c.query( @@ -1759,7 +1761,7 @@ def test_query_entities_with_all_vector_datatype(self, nb, dim, insert_round, au logger.info(f"res: {res}") # query data to make sure the data is inserted rsp = self.vector_client.vector_query({"collectionName": name, "filter": "user_id > 0", "limit": 50}) - assert rsp['code'] == 200 + assert rsp['code'] == 0 assert len(rsp['data']) == 50 @pytest.mark.parametrize("expr", ["10+20 <= uid < 20+30", "uid in [1,2,3,4]", @@ -1796,7 +1798,7 @@ def test_query_vector_with_int64_filter(self, expr, include_output_fields, parti output_fields.remove("vector") time.sleep(5) rsp = self.vector_client.vector_query(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 res = rsp['data'] logger.info(f"res: {len(res)}") for r in res: @@ -1820,7 +1822,7 @@ def test_query_vector_with_count(self): "outputFields": ["count(*)"] } rsp = self.vector_client.vector_query(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 assert rsp['data'][0]['count(*)'] == 3000 @pytest.mark.xfail(reason="query by id is not supported") @@ -1836,7 +1838,7 @@ def test_query_vector_by_id(self): "id": insert_ids, } rsp = self.vector_client.vector_query(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 @pytest.mark.parametrize("filter_expr", ["name > \"placeholder\"", "name like \"placeholder%\""]) @pytest.mark.parametrize("include_output_fields", [True, False]) @@ -1871,7 +1873,7 @@ def test_query_vector_with_varchar_filter(self, filter_expr, include_output_fiel if not include_output_fields: payload.pop("outputFields") rsp = self.vector_client.vector_query(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 res = rsp['data'] logger.info(f"res: {len(res)}") assert len(res) <= limit @@ -1919,7 +1921,7 @@ def test_query_vector_with_large_sum_of_limit_offset(self, sum_of_limit_offset): if sum_of_limit_offset > max_sum_of_limit_offset: assert rsp['code'] == 1 return - assert rsp['code'] == 200 + assert rsp['code'] == 0 res = rsp['data'] logger.info(f"res: {len(res)}") assert len(res) <= limit @@ -1974,7 +1976,7 @@ def test_get_vector_with_simple_payload(self): "data": [vector_to_search], } rsp = self.vector_client.vector_search(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 res = rsp['data'] logger.info(f"res: {len(res)}") limit = int(payload.get("limit", 100)) @@ -1987,7 +1989,7 @@ def test_get_vector_with_simple_payload(self): "id": ids[0], } rsp = self.vector_client.vector_get(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 res = rsp['data'] logger.info(f"res: {res}") logger.info(f"res: {len(res)}") @@ -2014,7 +2016,7 @@ def test_get_vector_complex(self, id_field_type, include_output_fields, include_ "filter": f"uid in {uids}", } rsp = self.vector_client.vector_query(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 res = rsp['data'] logger.info(f"res: {len(res)}") ids = [] @@ -2038,7 +2040,7 @@ def test_get_vector_complex(self, id_field_type, include_output_fields, include_ "id": id_to_get } rsp = self.vector_client.vector_get(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 res = rsp['data'] if isinstance(id_to_get, list): if include_invalid_id: @@ -2076,7 +2078,7 @@ def test_delete_vector_by_id(self): "id": insert_ids, } rsp = self.vector_client.vector_query(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 @pytest.mark.parametrize("id_field_type", ["list", "one"]) def test_delete_vector_by_pk_field_ids(self, id_field_type): @@ -2102,7 +2104,7 @@ def test_delete_vector_by_pk_field_ids(self, id_field_type): "filter": f"id == {id_to_delete}" } rsp = self.vector_client.vector_delete(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 # verify data deleted by get payload = { "collectionName": name, @@ -2129,7 +2131,7 @@ def test_delete_vector_by_filter_pk_field(self, id_field_type): "filter": f"uid in {uids}", } rsp = self.vector_client.vector_query(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 res = rsp['data'] logger.info(f"res: {len(res)}") ids = [] @@ -2157,7 +2159,7 @@ def test_delete_vector_by_filter_pk_field(self, id_field_type): } rsp = self.vector_client.vector_delete(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 logger.info(f"delete res: {rsp}") # verify data deleted @@ -2169,7 +2171,7 @@ def test_delete_vector_by_filter_pk_field(self, id_field_type): } time.sleep(5) rsp = self.vector_client.vector_query(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 assert len(rsp['data']) == 0 def test_delete_vector_by_custom_pk_field(self): @@ -2191,10 +2193,10 @@ def test_delete_vector_by_custom_pk_field(self): "indexParams": [{"fieldName": "text_emb", "indexName": "text_emb_index", "metricType": "L2"}] } rsp = self.collection_client.collection_create(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 rsp = self.collection_client.collection_describe(name) logger.info(f"rsp: {rsp}") - assert rsp['code'] == 200 + assert rsp['code'] == 0 pk_values = [] # insert data for i in range(insert_round): @@ -2216,7 +2218,7 @@ def test_delete_vector_by_custom_pk_field(self): body_size = sys.getsizeof(json.dumps(payload)) logger.info(f"body size: {body_size / 1024 / 1024} MB") rsp = self.vector_client.vector_insert(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 assert rsp['data']['insertCount'] == nb # query data before delete c = Collection(name) @@ -2254,10 +2256,10 @@ def test_delete_vector_by_filter_custom_field(self): "indexParams": [{"fieldName": "text_emb", "indexName": "text_emb_index", "metricType": "L2"}] } rsp = self.collection_client.collection_create(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 rsp = self.collection_client.collection_describe(name) logger.info(f"rsp: {rsp}") - assert rsp['code'] == 200 + assert rsp['code'] == 0 # insert data for i in range(insert_round): data = [] @@ -2276,7 +2278,7 @@ def test_delete_vector_by_filter_custom_field(self): body_size = sys.getsizeof(json.dumps(payload)) logger.info(f"body size: {body_size / 1024 / 1024} MB") rsp = self.vector_client.vector_insert(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 assert rsp['data']['insertCount'] == nb # query data before delete c = Collection(name) @@ -2312,7 +2314,7 @@ def test_delete_vector_with_non_primary_key(self): "outputFields": ["id", "uid"] } rsp = self.vector_client.vector_query(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 res = rsp['data'] logger.info(f"res: {len(res)}") id_list = [r['uid'] for r in res] @@ -2326,7 +2328,7 @@ def test_delete_vector_with_non_primary_key(self): "outputFields": ["id", "uid"] } rsp = self.vector_client.vector_query(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 res = rsp['data'] num_before_delete = len(res) logger.info(f"res: {len(res)}") @@ -2370,7 +2372,7 @@ def test_delete_vector_with_invalid_api_key(self): "filter": f"uid in {uids}", } rsp = self.vector_client.vector_query(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 res = rsp['data'] logger.info(f"res: {len(res)}") ids = [] @@ -2407,7 +2409,7 @@ def test_delete_vector_with_invalid_collection_name(self): "outputFields": ["id", "uid"] } rsp = self.vector_client.vector_query(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 res = rsp['data'] logger.info(f"res: {len(res)}") id_list = [r['id'] for r in res] @@ -2421,7 +2423,7 @@ def test_delete_vector_with_invalid_collection_name(self): "outputFields": ["id", "uid"] } rsp = self.vector_client.vector_query(payload) - assert rsp['code'] == 200 + assert rsp['code'] == 0 res = rsp['data'] logger.info(f"res: {len(res)}") # delete data diff --git a/tests/scripts/ci_e2e.sh b/tests/scripts/ci_e2e.sh index 322bca1ee8556..7daff6b45a034 100755 --- a/tests/scripts/ci_e2e.sh +++ b/tests/scripts/ci_e2e.sh @@ -64,6 +64,16 @@ fi echo "prepare e2e test" install_pytest_requirements +if [[ "${MILVUS_HELM_RELEASE_NAME}" != *"msop"* ]]; then + if [[ -n "${TEST_TIMEOUT:-}" ]]; then + + timeout "${TEST_TIMEOUT}" pytest testcases/test_bulk_insert.py --timeout=300 --host ${MILVUS_SERVICE_NAME} --port ${MILVUS_SERVICE_PORT} --minio_host ${MINIO_SERVICE_NAME} \ + --html=${CI_LOG_PATH}/report_bulk_insert.html --self-contained-html + else + pytest testcases/test_bulk_insert.py --timeout=300 --host ${MILVUS_SERVICE_NAME} --port ${MILVUS_SERVICE_PORT} --minio_host ${MINIO_SERVICE_NAME} \ + --html=${CI_LOG_PATH}/report_bulk_insert.html --self-contained-html + fi +fi # Pytest is not able to have both --timeout & --workers, so do not add --timeout or --workers in the shell script if [[ -n "${TEST_TIMEOUT:-}" ]]; then @@ -74,13 +84,3 @@ else pytest --host ${MILVUS_SERVICE_NAME} --port ${MILVUS_SERVICE_PORT} \ --html=${CI_LOG_PATH}/report.html --self-contained-html ${@:-} fi - -# Run bulk insert test -if [[ -n "${TEST_TIMEOUT:-}" ]]; then - - timeout "${TEST_TIMEOUT}" pytest testcases/test_bulk_insert.py --host ${MILVUS_SERVICE_NAME} --port ${MILVUS_SERVICE_PORT} --minio_host ${MINIO_SERVICE_NAME} \ - --html=${CI_LOG_PATH}/report_bulk_insert.html --self-contained-html -else - pytest testcases/test_bulk_insert.py --host ${MILVUS_SERVICE_NAME} --port ${MILVUS_SERVICE_PORT} --minio_host ${MINIO_SERVICE_NAME} \ - --html=${CI_LOG_PATH}/report_bulk_insert.html --self-contained-html -fi \ No newline at end of file diff --git a/tests/scripts/ci_e2e_4am.sh b/tests/scripts/ci_e2e_4am.sh index aea78b1c3fa5f..35b7d1e3e5c04 100755 --- a/tests/scripts/ci_e2e_4am.sh +++ b/tests/scripts/ci_e2e_4am.sh @@ -133,26 +133,3 @@ else pytest --host ${MILVUS_SERVICE_NAME} --port ${MILVUS_SERVICE_PORT} \ --html=${CI_LOG_PATH}/report.html --self-contained-html ${@:-} fi - -# Run bulk insert test -# if MILVUS_HELM_RELEASE_NAME contains "msop", then it is one pod mode, skip the bulk insert test -if [[ "${MILVUS_HELM_RELEASE_NAME}" != *"msop"* ]]; then - if [[ -n "${TEST_TIMEOUT:-}" ]]; then - - timeout "${TEST_TIMEOUT}" pytest testcases/test_bulk_insert.py --timeout=300 --host ${MILVUS_SERVICE_NAME} --port ${MILVUS_SERVICE_PORT} --minio_host ${MINIO_SERVICE_NAME} \ - --html=${CI_LOG_PATH}/report_bulk_insert.html --self-contained-html - else - pytest testcases/test_bulk_insert.py --timeout=300 --host ${MILVUS_SERVICE_NAME} --port ${MILVUS_SERVICE_PORT} --minio_host ${MINIO_SERVICE_NAME} \ - --html=${CI_LOG_PATH}/report_bulk_insert.html --self-contained-html - fi -fi - -# # Run concurrent test with 5 processes -# if [[ -n "${TEST_TIMEOUT:-}" ]]; then - -# timeout "${TEST_TIMEOUT}" pytest testcases/test_concurrent.py --host ${MILVUS_SERVICE_NAME} --port ${MILVUS_SERVICE_PORT} --count 5 -n 5 \ -# --html=${CI_LOG_PATH}/report_concurrent.html --self-contained-html -# else -# pytest testcases/test_concurrent.py --host ${MILVUS_SERVICE_NAME} --port ${MILVUS_SERVICE_PORT} --count 5 -n 5 \ -# --html=${CI_LOG_PATH}/report_concurrent.html --self-contained-html -# fi diff --git a/tests/scripts/values/ci/pr-arm.yaml b/tests/scripts/values/ci/pr-arm.yaml new file mode 100644 index 0000000000000..9327ae611322c --- /dev/null +++ b/tests/scripts/values/ci/pr-arm.yaml @@ -0,0 +1,202 @@ +metrics: + serviceMonitor: + enabled: true +log: + level: debug + +nodeSelector: + "kubernetes.io/arch": "arm64" +tolerations: + - key: "node-role.kubernetes.io/arm" + operator: "Exists" + effect: "NoSchedule" + +proxy: + resources: + requests: + cpu: "0.3" + memory: "256Mi" + limits: + cpu: "1" +rootCoordinator: + resources: + requests: + cpu: "0.2" + memory: "256Mi" + limits: + cpu: "1" +queryCoordinator: + resources: + requests: + cpu: "0.2" + memory: "100Mi" + limits: + cpu: "1" +queryNode: + resources: + requests: + cpu: "0.5" + memory: "500Mi" + limits: + cpu: "2" +indexCoordinator: + resources: + requests: + cpu: "0.1" + memory: "50Mi" + limits: + cpu: "1" +indexNode: + resources: + requests: + cpu: "0.5" + memory: "500Mi" + limits: + cpu: "2" +dataCoordinator: + resources: + requests: + cpu: "0.1" + memory: "50Mi" + limits: + cpu: "1" +dataNode: + resources: + requests: + cpu: "0.5" + memory: "500Mi" + limits: + cpu: "2" + +pulsar: + components: + autorecovery: false + proxy: + configData: + PULSAR_MEM: > + -Xms1024m -Xmx1024m + PULSAR_GC: > + -XX:MaxDirectMemorySize=2048m + httpNumThreads: "50" + resources: + requests: + cpu: "0.5" + memory: "1Gi" + # Resources for the websocket proxy + wsResources: + requests: + memory: "100Mi" + cpu: "0.1" + broker: + resources: + requests: + cpu: "0.5" + memory: "4Gi" + configData: + PULSAR_MEM: > + -Xms4096m + -Xmx4096m + -XX:MaxDirectMemorySize=8192m + PULSAR_GC: > + -Dio.netty.leakDetectionLevel=disabled + -Dio.netty.recycler.linkCapacity=1024 + -XX:+ParallelRefProcEnabled + -XX:+UnlockExperimentalVMOptions + -XX:+DoEscapeAnalysis + -XX:ParallelGCThreads=32 + -XX:ConcGCThreads=32 + -XX:G1NewSizePercent=50 + -XX:+DisableExplicitGC + -XX:-ResizePLAB + -XX:+ExitOnOutOfMemoryError + maxMessageSize: "104857600" + defaultRetentionTimeInMinutes: "10080" + defaultRetentionSizeInMB: "8192" + backlogQuotaDefaultLimitGB: "8" + backlogQuotaDefaultRetentionPolicy: producer_exception + + bookkeeper: + configData: + PULSAR_MEM: > + -Xms4096m + -Xmx4096m + -XX:MaxDirectMemorySize=8192m + PULSAR_GC: > + -Dio.netty.leakDetectionLevel=disabled + -Dio.netty.recycler.linkCapacity=1024 + -XX:+UseG1GC -XX:MaxGCPauseMillis=10 + -XX:+ParallelRefProcEnabled + -XX:+UnlockExperimentalVMOptions + -XX:+DoEscapeAnalysis + -XX:ParallelGCThreads=32 + -XX:ConcGCThreads=32 + -XX:G1NewSizePercent=50 + -XX:+DisableExplicitGC + -XX:-ResizePLAB + -XX:+ExitOnOutOfMemoryError + -XX:+PerfDisableSharedMem + -XX:+PrintGCDetails + nettyMaxFrameSizeBytes: "104867840" + resources: + requests: + cpu: "0.5" + memory: "4Gi" + + zookeeper: + + replicaCount: 1 + configData: + PULSAR_MEM: > + -Xms1024m + -Xmx1024m + PULSAR_GC: > + -Dcom.sun.management.jmxremote + -Djute.maxbuffer=10485760 + -XX:+ParallelRefProcEnabled + -XX:+UnlockExperimentalVMOptions + -XX:+DoEscapeAnalysis + -XX:+DisableExplicitGC + -XX:+PerfDisableSharedMem + -Dzookeeper.forceSync=no + resources: + requests: + cpu: "0.3" + memory: "512Mi" +kafka: + + resources: + requests: + cpu: "0.5" + memory: "1Gi" + zookeeper: + + replicaCount: 1 + resources: + requests: + cpu: "0.3" + memory: "512Mi" +etcd: + + + replicaCount: 1 + resources: + requests: + cpu: "0.3" + memory: "100Mi" +minio: + + resources: + requests: + cpu: "0.3" + memory: "512Mi" +standalone: + persistence: + persistentVolumeClaim: + storageClass: local-path + resources: + requests: + cpu: "1" + memory: "3.5Gi" + limits: + cpu: "4" +