From 565d69bd3f8279bc99d8ed2f0162458e97960eb0 Mon Sep 17 00:00:00 2001 From: LocoRichard Date: Fri, 12 Nov 2021 11:39:55 +0800 Subject: [PATCH] Doc iteration userguide (#1069) --- site/en/Variables.json | 2 +- .../cluster/install_cluster-docker.md | 2 +- .../cluster/install_cluster-helm.md | 2 +- .../cluster/install_cluster-milvusoperator.md | 4 +- site/en/getstarted/install-node.md | 2 +- site/en/getstarted/install-pymilvus.md | 2 +- site/en/getstarted/install_offline-docker.md | 2 +- site/en/getstarted/install_offline-helm.md | 2 +- .../standalone/install_standalone-docker.md | 2 +- .../standalone/install_standalone-helm.md | 2 +- site/en/home/home.json | 8 +- site/en/home/home.md | 10 +- site/en/menuStructure/en.json | 38 +- site/en/reference/schema/collection_schema.md | 2 +- site/en/reference/schema/field_schema.md | 2 +- site/en/userGuide/build.md | 96 ----- site/en/userGuide/connect.md | 102 ----- site/en/userGuide/create.md | 252 ----------- site/en/userGuide/delete.md | 11 - site/en/userGuide/drop.md | 133 ------ site/en/userGuide/insert.md | 129 ------ site/en/userGuide/manage_collection.md | 271 ++++++++++++ site/en/userGuide/manage_connection.md | 117 +++++ site/en/userGuide/manage_data.md | 223 ++++++++++ site/en/userGuide/manage_index.md | 145 +++++++ site/en/userGuide/manage_partition.md | 142 +++++++ site/en/userGuide/query.md | 289 ------------- site/en/userGuide/search/expression.md | 72 ---- site/en/userGuide/search/hybridsearch.md | 384 +++++++---------- site/en/userGuide/search/query.md | 207 +++++++++ site/en/userGuide/search/search.md | 349 +++++++++------ site/en/userGuide/search/timetravel.md | 117 +++++ site/zh-CN/Variables.json | 2 +- .../cluster/install_cluster-milvusoperator.md | 4 +- site/zh-CN/getstarted/install-node.md | 2 +- site/zh-CN/getstarted/install-pymilvus.md | 2 +- site/zh-CN/home/home.json | 10 +- site/zh-CN/home/home.md | 9 +- site/zh-CN/menuStructure/cn.json | 38 +- .../reference/schema/collection_schema.md | 2 +- site/zh-CN/reference/schema/field_schema.md | 2 +- site/zh-CN/userGuide/build.md | 89 ---- site/zh-CN/userGuide/connect.md | 92 ---- site/zh-CN/userGuide/create.md | 241 ----------- site/zh-CN/userGuide/delete.md | 10 - site/zh-CN/userGuide/drop.md | 125 ------ site/zh-CN/userGuide/insert.md | 121 ------ site/zh-CN/userGuide/manage_collection.md | 271 ++++++++++++ site/zh-CN/userGuide/manage_connection.md | 117 +++++ site/zh-CN/userGuide/manage_data.md | 223 ++++++++++ site/zh-CN/userGuide/manage_index.md | 145 +++++++ site/zh-CN/userGuide/manage_partition.md | 142 +++++++ site/zh-CN/userGuide/query.md | 280 ------------ site/zh-CN/userGuide/search/expression.md | 81 ---- site/zh-CN/userGuide/search/hybridsearch.md | 401 +++++++----------- site/zh-CN/userGuide/search/query.md | 207 +++++++++ site/zh-CN/userGuide/search/search.md | 372 +++++++++------- site/zh-CN/userGuide/search/timetravel.md | 117 +++++ 58 files changed, 3276 insertions(+), 2950 deletions(-) delete mode 100644 site/en/userGuide/build.md delete mode 100644 site/en/userGuide/connect.md delete mode 100644 site/en/userGuide/create.md delete mode 100644 site/en/userGuide/delete.md delete mode 100644 site/en/userGuide/drop.md delete mode 100644 site/en/userGuide/insert.md create mode 100644 site/en/userGuide/manage_collection.md create mode 100644 site/en/userGuide/manage_connection.md create mode 100644 site/en/userGuide/manage_data.md create mode 100644 site/en/userGuide/manage_index.md create mode 100644 site/en/userGuide/manage_partition.md delete mode 100644 site/en/userGuide/query.md delete mode 100644 site/en/userGuide/search/expression.md create mode 100644 site/en/userGuide/search/query.md create mode 100644 site/en/userGuide/search/timetravel.md delete mode 100644 site/zh-CN/userGuide/build.md delete mode 100644 site/zh-CN/userGuide/connect.md delete mode 100644 site/zh-CN/userGuide/create.md delete mode 100644 site/zh-CN/userGuide/delete.md delete mode 100644 site/zh-CN/userGuide/drop.md delete mode 100644 site/zh-CN/userGuide/insert.md create mode 100644 site/zh-CN/userGuide/manage_collection.md create mode 100644 site/zh-CN/userGuide/manage_connection.md create mode 100644 site/zh-CN/userGuide/manage_data.md create mode 100644 site/zh-CN/userGuide/manage_index.md create mode 100644 site/zh-CN/userGuide/manage_partition.md delete mode 100644 site/zh-CN/userGuide/query.md delete mode 100644 site/zh-CN/userGuide/search/expression.md create mode 100644 site/zh-CN/userGuide/search/query.md create mode 100644 site/zh-CN/userGuide/search/timetravel.md diff --git a/site/en/Variables.json b/site/en/Variables.json index 93300b83b..2b59750b9 100644 --- a/site/en/Variables.json +++ b/site/en/Variables.json @@ -1,7 +1,7 @@ { "milvus_release_version": "2.0.0-RC8", "milvus_python_sdk_version": "2.0.0rc8", - "milvus_node_sdk_version": "1.0.18", + "milvus_node_sdk_version": "1.0.19", "cpu_milvus_docker_image_version": "2.0.0-rc8", "gpu_milvus_docker_image_version": "" } diff --git a/site/en/getstarted/cluster/install_cluster-docker.md b/site/en/getstarted/cluster/install_cluster-docker.md index 8e8b568d0..0b9225b60 100644 --- a/site/en/getstarted/cluster/install_cluster-docker.md +++ b/site/en/getstarted/cluster/install_cluster-docker.md @@ -113,7 +113,7 @@ Having installed Milvus, you can: - Check [Hello Milvus](example_code.md) to run an example code with different SDKs to see what Milvus can do. - Learn the basic operations of Milvus: - - [Connect to Milvus server](connect.md) + - [Connect to Milvus server](manage_connection.md) - [Conduct a vector search](search.md) - [Conduct a hybrid search](hybridsearch.md) diff --git a/site/en/getstarted/cluster/install_cluster-helm.md b/site/en/getstarted/cluster/install_cluster-helm.md index c4b8edc74..5c63d3674 100644 --- a/site/en/getstarted/cluster/install_cluster-helm.md +++ b/site/en/getstarted/cluster/install_cluster-helm.md @@ -151,7 +151,7 @@ Having installed Milvus, you can: - Check [Hello Milvus](example_code.md) to run an example code with different SDKs to see what Milvus can do. - Learn the basic operations of Milvus: - - [Connect to Milvus server](connect.md) + - [Connect to Milvus server](manage_connection.md) - [Conduct a vector search](search.md) - [Conduct a hybrid search](hybridsearch.md) diff --git a/site/en/getstarted/cluster/install_cluster-milvusoperator.md b/site/en/getstarted/cluster/install_cluster-milvusoperator.md index 5004018e8..e8250c7bb 100644 --- a/site/en/getstarted/cluster/install_cluster-milvusoperator.md +++ b/site/en/getstarted/cluster/install_cluster-milvusoperator.md @@ -478,7 +478,7 @@ my-release-pulsar-zookeeper-1 1/1 Running 0 1 my-release-pulsar-zookeeper-2 1/1 Running 0 13m ``` -When the Milvus cluster is installed, you can learn how to [Connect to Milvus server](connect.md) +When the Milvus cluster is installed, you can learn how to [Connect to Milvus server](manage_connection.md) ## Uninstall the Milvus cluster @@ -509,7 +509,7 @@ If you use kind to install the K8s cluster, run $ kind delete cluster --name myk Having installed Milvus, you can: - Check [Hello Milvus](example_code.md) to run an example code with different SDKs to see what Milvus can do. - Learn the basic operations of Milvus: - - [Connect to Milvus server](connect.md) + - [Connect to Milvus server](manage_connection.md) - [Conduct a vector search](search.md) - [Conduct a hybrid search](hybridsearch.md) - [Upgrade Milvus Using Helm Chart](upgrade.md). diff --git a/site/en/getstarted/install-node.md b/site/en/getstarted/install-node.md index 32f261980..71858198f 100644 --- a/site/en/getstarted/install-node.md +++ b/site/en/getstarted/install-node.md @@ -32,7 +32,7 @@ $ npm install @zilliz/milvus2-sdk-node Having installed Milvus Node.js SDK, you can: - Learn the basic operations of Milvus: - - [Connect to Milvus server](connect.md) + - [Connect to Milvus server](manage_connection.md) - [Conduct a vector search](search.md) - [Conduct a hybrid search](hybridsearch.md) diff --git a/site/en/getstarted/install-pymilvus.md b/site/en/getstarted/install-pymilvus.md index f55f39995..2bd44621f 100644 --- a/site/en/getstarted/install-pymilvus.md +++ b/site/en/getstarted/install-pymilvus.md @@ -46,7 +46,7 @@ $ python -c "from pymilvus import Collection" Having installed PyMilvus, you can: - Learn the basic operations of Milvus: - - [Connect to Milvus server](connect.md) + - [Connect to Milvus server](manage_connection.md) - [Conduct a vector search](search.md) - [Conduct a hybrid search](hybridsearch.md) diff --git a/site/en/getstarted/install_offline-docker.md b/site/en/getstarted/install_offline-docker.md index 6e767e260..e628e3869 100644 --- a/site/en/getstarted/install_offline-docker.md +++ b/site/en/getstarted/install_offline-docker.md @@ -83,7 +83,7 @@ Having installed Milvus, you can: - Check [Hello Milvus](example_code.md) to run an example code with different SDKs to see what Milvus can do. - Learn the basic operations of Milvus: - - [Connect to Milvus server](connect.md) + - [Connect to Milvus server](manage_connection.md) - [Conduct a vector search](search.md) - [Conduct a hybrid search](hybridsearch.md) diff --git a/site/en/getstarted/install_offline-helm.md b/site/en/getstarted/install_offline-helm.md index 82db9929d..b320da082 100644 --- a/site/en/getstarted/install_offline-helm.md +++ b/site/en/getstarted/install_offline-helm.md @@ -87,7 +87,7 @@ Having installed Milvus, you can: - Check [Hello Milvus](example_code.md) to run an example code with different SDKs to see what Milvus can do. - Learn the basic operations of Milvus: - - [Connect to Milvus server](connect.md) + - [Connect to Milvus server](manage_connection.md) - [Conduct a vector search](search.md) - [Conduct a hybrid search](hybridsearch.md) diff --git a/site/en/getstarted/standalone/install_standalone-docker.md b/site/en/getstarted/standalone/install_standalone-docker.md index a16cbc6bc..9fa050040 100644 --- a/site/en/getstarted/standalone/install_standalone-docker.md +++ b/site/en/getstarted/standalone/install_standalone-docker.md @@ -85,7 +85,7 @@ Having installed Milvus, you can: - Check [Hello Milvus](example_code.md) to run an example code with different SDKs to see what Milvus can do. - Learn the basic operations of Milvus: - - [Connect to Milvus server](connect.md) + - [Connect to Milvus server](manage_connection.md) - [Conduct a vector search](search.md) - [Conduct a hybrid search](hybridsearch.md) diff --git a/site/en/getstarted/standalone/install_standalone-helm.md b/site/en/getstarted/standalone/install_standalone-helm.md index 7743d30c7..3820c11b9 100644 --- a/site/en/getstarted/standalone/install_standalone-helm.md +++ b/site/en/getstarted/standalone/install_standalone-helm.md @@ -127,7 +127,7 @@ Having installed Milvus, you can: - Check [Hello Milvus](example_code.md) to run an example code with different SDKs to see what Milvus can do. - Learn the basic operations of Milvus: - - [Connect to Milvus server](connect.md) + - [Connect to Milvus server](manage_connection.md) - [Conduct a vector search](search.md) - [Conduct a hybrid search](hybridsearch.md) diff --git a/site/en/home/home.json b/site/en/home/home.json index a1d6c80f1..c26f205e6 100644 --- a/site/en/home/home.json +++ b/site/en/home/home.json @@ -28,7 +28,7 @@ "order": -1, "title": "Welcome to Milvus Documentation! ", "desc": [ - "Here you will learn about what Milvus is, and how to install, use, and deploy Milvus to build an application according to your business need. You will also find FAQs and API references here.", + "Here you will learn about what Milvus is, and how to install, use, and deploy Milvus to build an application according to your business need. You will also find FAQs and API references here.", "You can start by browsing the recomended contents below or using the search box at the top left to search across the documentation. If you do not find the information you are looking for, feel free to ask MilMil at the bottom right or submit an issue via GitHub by using the buttons at the top right of each page." ] }, @@ -38,15 +38,15 @@ "label": "Use", "list": [{ "text": "Create a Collection", - "link": "create.md" + "link": "manage_collection.md" }, { "text": "Insert Data", - "link": "insert.md" + "link": "manage_data.md" }, { "text": "Build an Index", - "link": "build.md" + "link": "manage_index.md" }, { "text": "Vector Similarity Search", diff --git a/site/en/home/home.md b/site/en/home/home.md index 350739d8b..e245281a8 100644 --- a/site/en/home/home.md +++ b/site/en/home/home.md @@ -4,7 +4,9 @@ id: home.md # Welcome to Milvus Docs! -Here you will learn about [what Milvus is](overview.md), and how to [install](install_standalone-docker.md), [use](connect.md), and [deploy](aws.md) Milvus to build an [application](image_similarity_search.md) according to your business need. You will also find [FAQs](performance_faq.md) and [API references](https://milvus.io/api-reference/pymilvus/v2.0.0rc8/api/collection.html) here. + +Here you will learn about [what Milvus is](overview.md), and how to [install](install_standalone-docker.md), [use](manage_connection.md), and [deploy](aws.md) Milvus to build an [application](image_similarity_search.md) according to your business need. You will also find [FAQs](performance_faq.md) and [API references](https://milvus.io/api-reference/pymilvus/v2.0.0rc8/api/collection.html) here. + You can start by browsing the recomended contents below or using the search box at the top left to search across the documentation. If you do not find the information you are looking for, feel free to ask _MilMil_![MilMil](../../../assets/icon_bird.svg) at the bottom right or submit an issue via GitHub by using the buttons at the top right of each page. @@ -45,9 +47,9 @@ You can start by browsing the recomended contents below or using the search box

Use

-- [Create a Collection](create.md) -- [Insert Data](insert.md) -- [Build an Index](build.md) +- [Create a Collection](manage_collection.md) +- [Insert Data](manage_data.md) +- [Build an Index](manage_index.md) - [Vector Similarity Search](search.md) - [Query](query.md)
diff --git a/site/en/menuStructure/en.json b/site/en/menuStructure/en.json index 892025101..92e53ed4d 100644 --- a/site/en/menuStructure/en.json +++ b/site/en/menuStructure/en.json @@ -151,60 +151,52 @@ "isMenu": true }, { - "id": "connect.md", - "title": "Connect to Milvus Server", + "id": "manage_connection.md", + "title": "Manage Milvus Connections", "label1": "userguide", "label2": "", "label3": "", "order": 0 }, { - "id": "create.md", - "title": "Create a Collection or Partition", + "id": "manage_collection.md", + "title": "Manage Collections", "label1": "userguide", "label2": "", "label3": "", "order": 1 }, { - "id": "insert.md", - "title": "Insert Data", + "id": "manage_partition.md", + "title": "Manage Partitions", "label1": "userguide", "label2": "", "label3": "", "order": 2 }, { - "id": "build.md", - "title": "Build an Index", + "id": "manage_data.md", + "title": "Manage Data", "label1": "userguide", "label2": "", "label3": "", "order": 3 }, { - "id": "drop.md", - "title": "Drop Collection/Partition/Index", + "id": "manage_index.md", + "title": "Manage Indexes", "label1": "userguide", "label2": "", "label3": "", "order": 4 }, - { - "id": "delete.md", - "title": "Delete an Entity", - "label1": "userguide", - "label2": "", - "label3": "", - "order": 5 - }, { "id": "search", "title": "Search and Query", "label1": "userguide", "label2": "", "label3": "", - "order": 6, + "order": 5, "isMenu": true }, { @@ -229,6 +221,14 @@ "label1": "userguide", "label2": "search", "label3": "", + "order": 2 + }, + { + "id": "timetravel.md", + "title": "Search with Time Travel", + "label1": "userguide", + "label2": "search", + "label3": "", "order": 3 }, { diff --git a/site/en/reference/schema/collection_schema.md b/site/en/reference/schema/collection_schema.md index bb08c8550..0709f7fbd 100644 --- a/site/en/reference/schema/collection_schema.md +++ b/site/en/reference/schema/collection_schema.md @@ -5,7 +5,7 @@ summary: Learn how to define a collection schema in Milvus. # Collection Schema -A collection schema is the logical definition of a collection. Usually you need to define the [field schema](field_schema.md) before defining a collection schema and [creating a collection](create.md). +A collection schema is the logical definition of a collection. Usually you need to define the [field schema](field_schema.md) before defining a collection schema and [creating a collection](manage_collection.md). ## Collection schema properties diff --git a/site/en/reference/schema/field_schema.md b/site/en/reference/schema/field_schema.md index be6d2fdb1..67f55428f 100644 --- a/site/en/reference/schema/field_schema.md +++ b/site/en/reference/schema/field_schema.md @@ -5,7 +5,7 @@ summary: Learn how to define a field schema in Milvus. # Field Schema -A field schema is the logical definition of a field. It is the first thing you need to define before defining a [collection schema](collection_schema.md) and [creating a collection](create.md). +A field schema is the logical definition of a field. It is the first thing you need to define before defining a [collection schema](collection_schema.md) and [creating a collection](manage_collection.md). Milvus 2.0 supports a primary key field, a scalar field, and a vector field in a collection. diff --git a/site/en/userGuide/build.md b/site/en/userGuide/build.md deleted file mode 100644 index 745e85045..000000000 --- a/site/en/userGuide/build.md +++ /dev/null @@ -1,96 +0,0 @@ ---- -id: build.md -related_key: create index -summary: Learn how to build an index for vectors in Milvus. - ---- - -# Build an Index - -This topic describes how to build an index for a field. See [Vector Index](index.md) for more information about setting index parameters. - -
-Parameters marked with * are specific to Python SDK, and those marked with ** are specific to Node.js SDK. -
- -1. Prepare the index parameters: - -{{fragments/multiple_code.md}} - -```python ->>> index_param = { - "metric_type":"L2", - "index_type":"IVF_FLAT", - "params":{"nlist":1024} - } -``` - -```javascript -const index_param = { - metric_type: "L2", - index_type: "IVF_FLAT", - params: JSON.stringify({ nlist: 1024 }), -}; -``` - -
- Detailed Description - - - - - - - - - - - - - - - - - - - - - - - - -
Parameter - DescriptionNote
metric_typeMetrics used to measure similarity of vectorsFind more options in Simlarity Metrics.
Mandatory
index_typeType of index used to accelerate the vector searchFind more options in Index Selection.
Mandatory
paramsBuilding parameter(s) specific to the indexFind more parameter details of different indexes in Index Selection.
Mandatory
-
- -2. Build an index: - -{{fragments/multiple_code.md}} - -```python ->>> collection.create_index(field_name=field_name, index_params=index_param) -Status(code=0, message='') -``` - -```javascript -await milvusClient.indexManager.createIndex({ - collection_name: COLLECTION_NAME, - field_name: FIELD_NAME, - extra_params: index_param, -}); -``` - -3. View index details: - -{{fragments/multiple_code.md}} - -```python ->>> collection.index().params -{'metric_type': 'L2', 'index_type': 'IVF_FLAT', 'params': {'nlist': 1024}} -``` - -```javascript -await milvusClient.indexManager.describeIndex({ - collection_name: COLLECTION_NAME, -}); -``` diff --git a/site/en/userGuide/connect.md b/site/en/userGuide/connect.md deleted file mode 100644 index e83efc089..000000000 --- a/site/en/userGuide/connect.md +++ /dev/null @@ -1,102 +0,0 @@ ---- -id: connect.md -related_key: connect Milvus -summary: Learn how to connect Milvus server. ---- - -# Connect to Milvus Server - -This topic describes how to connect to and disconnect from a Milvus server. - -If you choose to operate in the Python interactive mode, type `python3` in your terminal. - -
-Parameters marked with * are specific to Python SDK, and those marked with ** are specific to Node.js SDK. -
- -## Connect to the Milvus server - -Construct a Milvus connection and register it under given alias. - -{{fragments/multiple_code.md}} - -```python ->>> from pymilvus import connections ->>> connections.connect("default", host='localhost', port='19530') -``` - -```javascript -import { MilvusClient } from "@zilliz/milvus2-sdk-node"; -const milvusClient = new MilvusClient("localhost:19530"); -``` - -
- Detailed Description - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Parameter - DescriptionNote
alias*Alias for the Milvus serverData type: String
Mandatory
host*IP address of the Milvus serverMandatory
port*Port of the Milvus serverMandatory
address**Address of the Milvus server"server_IP:server_port"
Mandatory
-
- -## Disconnect from the Milvus server - -When you no longer need Milvus services, you can disconnect from Milvus server: - -{{fragments/multiple_code.md}} - -```python ->>> connections.disconnect("default") -``` - - -```javascript -await milvusClient.closeConnection(); -``` - -
- Detailed Description - - - - - - - - - - - - - - -
Parameter - DescriptionNote
alias*Alias for the Milvus serverData type: String
Mandatory
-
- diff --git a/site/en/userGuide/create.md b/site/en/userGuide/create.md deleted file mode 100644 index f3b274101..000000000 --- a/site/en/userGuide/create.md +++ /dev/null @@ -1,252 +0,0 @@ ---- -id: create.md -related_key: create collection -summary: Learn how to create a collection in Milvus. ---- - -# Create a Collection or Partition - -This topic describes how to create a collection or a partition in Milvus. - -
-Parameters marked with * are specific to Python SDK, and those marked with ** are specific to Node.js SDK. -
- -## Create a collection - -Collections can only be created after successfully connecting to the Milvus server. - -
-The created collection must contain a primary key field. Int64 is the only supported data type for the primary key field for now. -
- -1. Prepare collection parameters, including collection name and field parameters. Refer to API documents for respective languages for a detailed description of these parameters. - -{{fragments/multiple_code.md}} - -```python -collection_name = "example_collection" -field_name = "example_field" -from pymilvus import Collection, CollectionSchema, FieldSchema, DataType -pk = FieldSchema(name="pk", dtype=DataType.INT64, is_primary=True, auto_id=True) -field = FieldSchema(name=field_name, dtype=DataType.FLOAT_VECTOR, dim=8) -schema = CollectionSchema(fields=[pk,field], description="example collection") -``` - -```javascript -const COLLECTION_NAME = "example_collection"; -const FIELD_NAME = "example_field"; - -const params = { - collection_name: COLLECTION_NAME, - fields: [ - { - name: FIELD_NAME, - description: "vector field", - data_type: DataType.FloatVector, - - type_params: { - dim: "8", - }, - }, - { - name: "age", - data_type: DataType.Int64, - autoID: true, - is_primary_key: true, - description: "", - }, - ], -}; -``` - -
- Detailed Description - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
ParameterDescriptionNote
collection_nameName of the collection to createData type: String
field_nameName of the field in the collectionData type: String
SchemaSchema used to create a collection and the fields within. Refer to field schema and collection schema for detailed description 
descriptionDescription of the collectionData type: String
-
- -2. Create a collection: - -{{fragments/multiple_code.md}} - -```python ->>> collection = Collection(name=collection_name, schema=schema, using='default', shards_num=2) - -# Get an existing collection by its name. -collection=Collection(name=collection_name) -``` - -```javascript -await milvusClient.collectionManager.createCollection(params); -``` - -
- Detailed Description - - - - - - - - - - - - - - - - - - - - - -
ParameterDescriptionNote
using*By specifying the server alias here, you can decide in which Milvus server you create a collection.Optional
shards_num*Number of the shards for the collection to createOptional
-
- -3. Check if the collection is created successfully: - -{{fragments/multiple_code.md}} - -```python ->>> import pymilvus ->>> pymilvus.utility.get_connection().has_collection(collection_name) -True -``` - -```javascript -await milvusClient.collectionManager.hasCollection({ - collection_name: COLLECTION_NAME, -}); -``` - -4. List all created collections: - -{{fragments/multiple_code.md}} - -```python ->>> pymilvus.utility.get_connection().list_collections() -['example_collection'] -``` - -```javascript -await milvusClient.collectionManager.showCollections(); -``` - -5. View collection statistics, such as row count: - -{{fragments/multiple_code.md}} - -```python ->>> collection.num_entities -0 -``` - -```javascript -await milvusClient.collectionManager.getCollectionStatistics({ - collection_name: COLLECTION_NAME, -}); -``` - -## Create a partition - -Search performance worsens as more vectors are inserted into the collection. To help mitigate declining search performance, consider creating collection partitions. Partitioning is a way to separate data. Partition names narrow a search to a specific number of vectors, improving query performance. To improve search efficiency, divide a collection into several partitions by name. - -{{fragments/multiple_code.md}} - -```python ->>> partition_name = "example_partition" ->>> partition = collection.create_partition(partition_name) -``` - -```javascript -await milvusClient.partitionManager.createPartition({ - collection_name: COLLECTION_NAME, - partition_name: "example_partition", -}); -``` - -
- Detailed Description - - - - - - - - - - - - - - - -
ParameterDescriptionNote
partition_nameName of the partition to createData type: String
-
- -Milvus creates a default partition name, `_default`, for new collections. After creating a partition, you have two partition names, `example_partition` and `_default`. - -List all partitions in a collection: - -{{fragments/multiple_code.md}} - -```python ->>> collection.partitions -[{"name": "_default", "description": "", "num_entities": 0}, {"name": "example_partition", "description": "", "num_entities": 0}] -``` - -```javascript -await milvusClient.partitionManager.showPartitions({ - collection_name: COLLECTION_NAME, -}); -``` - -Check if a partition is successfully created: - -{{fragments/multiple_code.md}} - -```python ->>> collection.has_partition(partition_name) -True -``` - -```javascript -await milvusClient.partitionManager.hasPartition({ - collection_name: COLLECTION_NAME, - partition_name: "example_partition", -}); -``` diff --git a/site/en/userGuide/delete.md b/site/en/userGuide/delete.md deleted file mode 100644 index f808a1a24..000000000 --- a/site/en/userGuide/delete.md +++ /dev/null @@ -1,11 +0,0 @@ ---- -id: delete.md -related_key: delete -summary: Learn how to delete an entity in Milvus. ---- - -# Delete an Entity - -
-This feature is still under development and will be available when a stable version of Milvus 2.0 is released. -
diff --git a/site/en/userGuide/drop.md b/site/en/userGuide/drop.md deleted file mode 100644 index fd2028c12..000000000 --- a/site/en/userGuide/drop.md +++ /dev/null @@ -1,133 +0,0 @@ ---- -id: drop.md -related_key: drop -summary: Learn how to drop index, partition, and collection in Milvus. ---- - -# Drop Operations - -This topic describes how to drop an index, a partition, or a collection. - -The drop operations affect data already inserted into Milvus. Think twice before you delete. - -
-Parameters marked with * are specific to Python SDK, and those marked with ** are specific to Node.js SDK. -
- -## Drop an index - -Drop the index of a specified field in a specified collection: - -
-Current release of Milvus only supports building and dropping index on vector field. Future version of Milvus will supports these operations on scalar field. -
- -{{fragments/multiple_code.md}} - -```python ->>> collection.drop_index() -``` - -```javascript -await milvusClient.indexManager.dropIndex({ - collection_name: COLLECTION_NAME, -}); -``` - -
- Detailed Description - - - - - - - - - - - - - - -
Parameter - DescriptionNote
collection_name**Name of the collection to drop index fromMandatory
-
- -## Drop a partition - -Remove a partition and all vectors under it: - -{{fragments/multiple_code.md}} - -```python ->>> collection.drop_partition(partition_name=partition_name) -``` - -```javascript -await milvusClient.partitionManager.dropPartition({ - collection_name: COLLECTION_NAME, - partition_name: PARTITION_NAME, -}); -``` - -
- Detailed Description - - - - - - - - - - - - - - - - - - - -
Parameter - DescriptionNote
partition_nameName of the partition to dropMandatory
collection_name**Name of the collection to drop partition fromMandatory
-
- -## Drop a collection - -When you no longer need a collection, you can delete it. - -{{fragments/multiple_code.md}} - -```python ->>> collection.drop() -``` - -```javascript -await milvusClient.collectionManager.dropCollection({ - collection_name: COLLECTION_NAME, -}); -``` - -
- Detailed Description - - - - - - - - - - - - - - -
Parameter - DescriptionNote
collection_name**Name of the collection to dropMandatory
-
diff --git a/site/en/userGuide/insert.md b/site/en/userGuide/insert.md deleted file mode 100644 index 2e077e943..000000000 --- a/site/en/userGuide/insert.md +++ /dev/null @@ -1,129 +0,0 @@ ---- -id: insert.md -related_key: insert data to Milvus -summary: Learn how to insert data to Milvus. - ---- - -# Insert Vectors - -This topic describes how to insert vectors into a collection or partition. - -
-Parameters marked with * are specific to Python SDK, and those marked with ** are specific to Node.js SDK. -
- -1. Generate random vectors: - -{{fragments/multiple_code.md}} - -```python ->>> import random ->>> vectors = [[random.random() for _ in range(8)] for _ in range(10)] ->>> entities = [vectors] -``` - -```javascript -const entities = Array.from({ length: 10 }, () => ({ - [FIELD_NAME]: Array.from({ length: 8 }, () => Math.floor(Math.random() * 10)), -})); -``` - -2. Insert the random vectors to the newly created collection. Milvus automatically assigns IDs to the inserted vectors, similar to AutoID in a relational database. - -_Milvus returns the value of MutationResult, which contains the corresponding primary_keys of the inserted vectors._ - -{{fragments/multiple_code.md}} - -```python ->>> mr = collection.insert(entities) -# Get the primary keys of the `MutationResult` ->>> mr.primary_keys -[425790736918318406, 425790736918318407, 425790736918318408, ...] -``` - -```javascript -await milvusClient.dataManager.insert({{ - collection_name: COLLECTION_NAME, - fields_data: entities, -}); -``` - -
- Detailed Description - - - - - - - - - - - - - - - - - - - - - - - - -
Parameter - DescriptionNote
dataData to insert into MilvusMandatory
collection_name**Name of the collection to insert data intoMandatory
partition_nameName of the partition to insert data intoOptional
-
- -3. By specifying `partition_name` when inserting, you can insert vectors to a specified partition: - -{{fragments/multiple_code.md}} - -```python ->>> collection.insert(data=entities, partition_name=partition_name) -``` - -```javascript -await milvusClient.dataManager.insert({{ - collection_name: COLLECTION_NAME, - partition_name: partition_name - fields_data: entities, -}); -``` - -4. Milvus temporarily stores the inserted vectors in the memory. To flush them to the disk, run: - -{{fragments/multiple_code.md}} - -```python ->>> pymilvus.utility.get_connection().flush([collection_name]) -``` - -```javascript -await milvusClient.dataManager.flush({ collection_names: [COLLECTION_NAME] }); -``` - -
- Detailed Description - - - - - - - - - - - - - - -
Parameter - DescriptionNote
collection_nameName of the collection to flushMandatory
-
- diff --git a/site/en/userGuide/manage_collection.md b/site/en/userGuide/manage_collection.md new file mode 100644 index 000000000..92d42bb57 --- /dev/null +++ b/site/en/userGuide/manage_collection.md @@ -0,0 +1,271 @@ +--- +id: manage_collection.md +related_key: create collection +summary: Learn how to manage collections in Milvus. +--- + +# Manage Collections + +This topic describes how to manage collections in Milvus. + +A collection consists of one or more partitions. While creating a new collection, Milvus creates a default partition `_default`. See [Glossary - Collection](glossary.md#Collection) for more information. + +The following example is based on a two-shard collection named `example_collection` with an eight-dimension float vector field, and an INT64, `auto_id` enabled primary key field. + + +## Create a collection + +
+ +
+ + +First, prepare necessary parameters, including field schema, collection schema, and collection name. + +{{fragments/multiple_code.md}} + +```python +>>> from pymilvus import CollectionSchema, FieldSchema, DataType +>>> pk = FieldSchema(name="pk", dtype=DataType.INT64, is_primary=True, auto_id=True) +>>> field = FieldSchema(name="example_field", dtype=DataType.FLOAT_VECTOR, dim=8) +>>> schema = CollectionSchema(fields=[pk,field], description="example collection") +>>> collection_name = "example_collection" +``` + +```javascript +const params = { + collection_name: "example_collection", + description: "example collection", + fields: [ + { + name: "example_field", + description: "", + data_type: 101, // DataType.FloatVector + type_params: { + dim: "8", + }, + }, + { + name: "pk", + data_type: 5, // DataType.Int64 + autoID: true, + is_primary_key: true, + description: "", + }, + ], +}; +``` + + + + + + + + + + + + + + + + + + + + + + +
ParameterDescription
FieldSchemaSchema of the fields within the collection to create. Refer to Field Schema for more information.
CollectionSchemaSchema of the collection to create. Refer to Collection Schema for more information.
collection_nameName of the collection to create.
+ + + + + + + + + + + + + + + + + + + + + + + + + + + +
ParameterDescription
collection_nameName of the collection to create.
descriptionDescription of the collection to create.
fieldsSchema of the filed and the collection to create. Refer to Field Schema and Collection Schema for more information.
data_typeData type of the filed to create. Refer to data type reference number for more information.
+ +Then, create a collection with the parameters you created above. + +{{fragments/multiple_code.md}} + +```python +>>> from pymilvus import Collection +>>> collection = Collection(name=collection_name, schema=schema, using='default', shards_num=2) +``` + +```javascript +await milvusClient.collectionManager.createCollection(params); +``` + + + + + + + + + + + + + + + + + + +
ParameterDescription
using (optional)By specifying the server alias here, you can choose in which Milvus server you create a collection.
shards_num (optional)Number of the shards for the collection to create.
+ + + + +## Verify if a collection exists + +{{fragments/multiple_code.md}} + +```python +>>> from pymilvus import utility +>>> utility.has_collection("example_collection") +``` + +```javascript +await milvusClient.collectionManager.hasCollection({ + collection_name: "example_collection", +}); +``` + + + +## List all collections + +{{fragments/multiple_code.md}} + +```python +>>> from pymilvus import utility +>>> utility.list_collections() +``` + +```javascript +await milvusClient.collectionManager.showCollections(); +``` + +## View collection statistics + +{{fragments/multiple_code.md}} + +```python +>>> from pymilvus import Collection +>>> collection = Collection("example_collection") # Get an existing collection. +>>> collection.num_entities +``` + +```javascript +await milvusClient.collectionManager.getCollectionStatistics({ collection_name: "example_collection",}); +``` + + +## Load a collection + +All CRUD operations within Milvus are executed in memory. Load the collection to memory before searching or deleting data. + +{{fragments/multiple_code.md}} + +```python +>>> from pymilvus import Collection +>>> collection = Collection("example_collection") # Get an existing collection. +>>> collection.load() +``` + +```javascript +await milvusClient.collectionManager.loadCollection({ + collection_name: "example_collection", +}); +``` + + + + + + + + + + + + + + +
ParameterDescription
partition_name (optional)Name of the partition to load.
+ + + + + + + + + + + + + + +
ParameterDescription
collection_nameName of the collection to load.
+ +## Drop a collection + +Remove a collection and the data within. + +
+The drop operation is irreversible. Dropping a collection deletes all data within it. +
+ + +{{fragments/multiple_code.md}} + +```python +>>> collection.drop("example_collection") +``` + +```javascript +await milvusClient.collectionManager.dropCollection({ collection_name: "example_collection",}); +``` + +## What's next + +- Learn more basic operations of Milvus: + - [Insert data into Milvus](manage_data.md) + - [Create a partition](manage_partition.md) + - [Build an index for vectors](manage_index.md) + - [Conduct a vector search](search.md) + - [Conduct a hybrid search](hybridsearch.md) +- Explore API references for Milvus SDKs: + - [PyMilvus API reference](/api-reference/pymilvus/v{{var.milvus_python_sdk_version}}/tutorial.html) + - [Node.js API reference](/api-reference/node/v{{var.milvus_node_sdk_version}}/tutorial.html) + diff --git a/site/en/userGuide/manage_connection.md b/site/en/userGuide/manage_connection.md new file mode 100644 index 000000000..2dfe524fd --- /dev/null +++ b/site/en/userGuide/manage_connection.md @@ -0,0 +1,117 @@ +--- +id: manage_connection.md +related_key: connect Milvus +summary: Learn how to connect to a Milvus server. +--- + +# Manage Milvus Connections + +This topic describes how to connect to and disconnect from a Milvus server. + +
+ Ensure to connect to Milvus server before any operations. +
+ +Below example connects to a Milvus server with host as `localhost` and port as `19530` and disconnects from it. + + +## Connect to a Milvus server + +Construct a Milvus connection. Ensure to connect to Milvus server before any operations. + +{{fragments/multiple_code.md}} + +```python +# Run `python3` in your terminal to operate in the Python interactive mode. +>>> from pymilvus import connections +>>> connections.connect(alias="default", host='localhost', port='19530') +``` + +```javascript +import { MilvusClient } from "@zilliz/milvus2-sdk-node"; +const milvusClient = new MilvusClient("localhost:19530"); +``` + + + + + + + + + + + + + + + + + + + + + + +
ParameterDescription
aliasAlias of the Milvus connection to construct.
hostIP address of the Milvus server.
portPort of the Milvus server.
+ + + + + + + + + + + + + + +
ParameterDescription
addressAddress of the Milvus connection to construct.
+ +## Disconnect from a Milvus server + +Disconnect from a Milvus server. + +{{fragments/multiple_code.md}} + +```python +>>> connections.disconnect("default") +``` + + +```javascript +await milvusClient.closeConnection(); +``` + + + + + + + + + + + + + + +
ParameterDescription
aliasAlias of the Milvus server to disconnect from.
+ +## What's next + +Having connected to a Milvus server, you can: + +- [Create a collection](manage_collection.md) +- [Manage data](manage_data.md) +- [Build a vector index](manage_index.md) +- [Conduct a vector search](search.md) +- [Conduct a hybrid search](hybridsearch.md) + +For advanced operations, check: + +- [PyMilvus API reference](/api-reference/pymilvus/v{{var.milvus_python_sdk_version}}/tutorial.html) +- [Node.js API reference](/api-reference/node/v{{var.milvus_node_sdk_version}}/tutorial.html) + diff --git a/site/en/userGuide/manage_data.md b/site/en/userGuide/manage_data.md new file mode 100644 index 000000000..f9ca2ad52 --- /dev/null +++ b/site/en/userGuide/manage_data.md @@ -0,0 +1,223 @@ +--- +id: manage_data.md +related_key: insert, delete +summary: Learn how to insert and delete data in Milvus. +--- + +# Manage Data + +This topic describes how to insert and delete data in Milvus. + +## Insert data + +First, prepare the data to insert. + +This topic inserts randomly generated 2,000 rows of eight-dimensional vector data as the example data. Real applications will likely use much higher dimensional vectors than this. You can prepare your own data to replace the example. + +{{fragments/multiple_code.md}} + +```python +>>> import random +>>> vectors = [[random.random() for _ in range(8)] for _ in range(2000)] +>>> entities = [vectors] +``` + +```javascript +const entities = Array.from({ length: 2000 }, () => ({ + ["example_field"]: Array.from({ length: 8 }, () => Math.random()), +})); +``` + +Insert the data to the collection. By specifying `partition_name`, you can decide to which partition to insert the data. + +With the collection schema `auto_id` enabled, Milvus automatically assigns an ID (primary key value) to each inserted data. + +{{fragments/multiple_code.md}} + +```python +>>> from pymilvus import collection +>>> collection = Collection("example_collection") # Get an existing collection. +>>> mr = collection.insert(entities) +``` + +```javascript +const mr = await milvusClient.dataManager.insert({{ + collection_name: "example_collection", + fields_data: entities, +}); +``` + + + + + + + + + + + + + + + + + + + + + + +
ParameterDescription
collection_nameName of the collection to get.
dataData to insert into Milvus.
partition_name (optional)Name of the partition to insert data into.
+ + + + + + + + + + + + + + + + + + + + + + + +
ParameterDescription
collection_nameName of the collection to get.
partition_name (optional)Name of the partition to insert data into.
fields_dataData to insert into Milvus.
+ + +After the data are inserted, Milvus returns `MutationResult` as an object. You can check the value of `MutationResult`, which contains the corresponding primary keys of the inserted data. + +{{fragments/multiple_code.md}} + +```python +>>> mr.primary_keys +``` + +```javascript +console.log(mr.IDs) +``` + +``` +[425790736918318406, 425790736918318407, 425790736918318408, ...] +``` + + + +## Delete entities + +Milvus supports deleting entities by primary key specified with boolean expression. + + +
+ +
+ +All CRUD operations within Milvus are executed in memory. Before deleting, load the collection that contains the entities you expect to delete to memory. + +{{fragments/multiple_code.md}} + +```python +>>> from pymilvus import collection +>>> collection = Collection("example_collection") # Get an existing collection. +>>> collection.load() +``` + +```javascript +await milvusClient.collectionManager.loadCollection({ + collection_name: "example_collection", +}); +``` + + + +Prepare the boolean expression that filters the entities to delete. See [Boolean Expression Rules](boolean.md) for more information. + +The following example filters data with primary key values of `425790736918318406` and `425790736918318407`. + +{{fragments/multiple_code.md}} + +```python +>>> expr = "pk in [425790736918318406,425790736918318407]" +``` + +```javascript +const expr = "pk in [425790736918318406,425790736918318407]"; +``` + + +Delete the entities with the boolean expression you created. By specifying `partition_name`, you can decide from which partition to delete the entities and thus save the resources. + +{{fragments/multiple_code.md}} + +```python +>>> collection.delete(expr) +``` + +```javascript +await milvusClient.dataManager.deleteEntities({ + collection_name: "example_collection", + expr: expr, +}); +``` + + + + + + + + + + + + + + + + + + + +
ParameterDescription
exprBoolean expression that specifies the entities to delete.
partition_name (optional)Name of the partition to delete entities from.
+ + + + +You can verify the delete operation by checking the number of entities after deleting. + +```python +>>> collection.num_entities +1998 +``` + +```javascript +const res = await collectionManager.getCollectionStatistics({ + collection_name: "example_collection", +}); +console.log(res.data.row_count); +``` + + +## What's next + +- Learn more basic operations of Milvus: + - [Build an index for vectors](manage_index.md) + - [Conduct a vector search](search.md) + - [Conduct a hybrid search](hybridsearch.md) +- Explore API references for Milvus SDKs: + - [PyMilvus API reference](/api-reference/pymilvus/v{{var.milvus_python_sdk_version}}/tutorial.html) + - [Node.js API reference](/api-reference/node/v{{var.milvus_node_sdk_version}}/tutorial.html) + diff --git a/site/en/userGuide/manage_index.md b/site/en/userGuide/manage_index.md new file mode 100644 index 000000000..598452530 --- /dev/null +++ b/site/en/userGuide/manage_index.md @@ -0,0 +1,145 @@ +--- +id: manage_index.md +related_key: create index +summary: Learn how to build an index for vectors in Milvus. +--- + +# Manage Indexes + +This topic describes how to manage indexes in Milvus. See [Vector Index](index.md) and [Index Selection](index_selection.md) for more information. + +Vector indexes are an organizational unit of metadata used to accelerate [vector similarity search](search.md). Without index built on vectors, Milvus will perform a brute-force search by default. + +
+
+ +## Build an index + +The following example builds a 1024-cluster IVF_FLAT index with Euclidean distance (L2) as the similarity metrics. You can choose the index and metric that suit your scenario. + +Prepare the index parameters. + +{{fragments/multiple_code.md}} + +```python +>>> index_params = { + "metric_type":"L2", + "index_type":"IVF_FLAT", + "params":{"nlist":1024} + } +``` + +```javascript +const index_params = { + metric_type: "L2", + index_type: "IVF_FLAT", + params: JSON.stringify({ nlist: 1024 }), +}; +``` + + + + + + + + + + + + + + + + + + + + + + +
ParameterDescription
metric_typeType of metrics used to measure similarity of vectors. Find more options in Simlarity Metrics.
index_typeType of index used to accelerate the vector search. Find more options in Index Selection.
paramsBuilding parameter(s) specific to the index. See Index Selection for more information.
+ + +Build the index by specifying the vector field name and index parameters. + +{{fragments/multiple_code.md}} + +```python +>>> from pymilvus import collection +>>> collection = Collection("example_collection") # Get an existing collection. +>>> collection.create_index(field_name="example_field", index_params=index_params) +``` + +```python +Status(code=0, message='') +``` + +```javascript +await milvusClient.indexManager.createIndex({ + collection_name: "example_collection", + field_name: "example_field", + extra_params: index_params, +}); +``` + + + +## View index details + +{{fragments/multiple_code.md}} + +```python +>>> from pymilvus import collection +>>> collection = Collection("example_collection") # Get an existing collection. +>>> collection.index().params +``` + +```python +{'metric_type': 'L2', 'index_type': 'IVF_FLAT', 'params': {'nlist': 1024}} +``` + +```javascript +await milvusClient.indexManager.describeIndex({ + collection_name: "example_collection", +}); +``` + + +## Drop an index + +Drop the index if you are sure that you do not want to use it anymore. + +
+The drop operation is irreversible. Dropping an index removes all corresponding index files. +
+ + + +{{fragments/multiple_code.md}} + +```python +>>> from pymilvus import collection +>>> collection = Collection("example_collection") # Get an existing collection. +>>> collection.drop_index() +``` + +```javascript +await milvusClient.indexManager.dropIndex({ + collection_name: "example_collection", +}); +``` + +## What's next + +- Learn more basic operations of Milvus: + - [Conduct a vector search](search.md) + - [Conduct a hybrid search](hybridsearch.md) + - [Search with Time Travel](timetravel.md) +- Explore API references for Milvus SDKs: + - [PyMilvus API reference](/api-reference/pymilvus/v{{var.milvus_python_sdk_version}}/tutorial.html) + - [Node.js API reference](/api-reference/node/v{{var.milvus_node_sdk_version}}/tutorial.html) + diff --git a/site/en/userGuide/manage_partition.md b/site/en/userGuide/manage_partition.md new file mode 100644 index 000000000..1ada4a174 --- /dev/null +++ b/site/en/userGuide/manage_partition.md @@ -0,0 +1,142 @@ +--- +id: manage_partition.md +related_key: Partition +summary: Learn how to manage partitions in Milvus. + +--- + +# Manage Partitions + +This topic describes how to manage partitions in Milvus. + +Milvus allows you to divide the bulk of vector data into a small number of partitions. Search and other operations can then be limited to one partition to improve the performance. + +A collection consists of one or more partitions. While creating a new collection, Milvus creates a default partition `_default`. See [Glossary - Partition](glossary.md#Partition) for more information. + +The following example is based on a partition `example_partition` in the collection `example_collection`. + +## Create a partition + +{{fragments/multiple_code.md}} + +```python +>>> from pymilvus import Collection +>>> collection = Collection("example_collection") # Get an existing collection. +>>> partition = collection.create_partition("example_partition") +``` + +```javascript +await milvusClient.partitionManager.createPartition({ + collection_name: "example_collection", + partition_name: "example_partition", +}); +``` + + + + + + + + + + + + + + + + + + +
ParameterDescription
partition_nameName of the partition to create.
description (optional)Description of the partition to create.
+ + + + + + + + + + + + + + + + + + + +
ParameterDescription
collection_nameName of the collection to create a partition in.
partition_nameName of the partition to create.
+ +## List all partitions + +{{fragments/multiple_code.md}} + +```python +>>> from pymilvus import Collection +>>> collection = Collection("example_collection") # Get an existing collection. +>>> collection.partitions +``` + +```javascript +await milvusClient.partitionManager.showPartitions({ + collection_name: "example_collection", +}); +``` + + + +## Verify if a partition exist + +{{fragments/multiple_code.md}} + +```python +>>> from pymilvus import Collection +>>> collection.has_partition("example_partition") +``` + +```javascript +await milvusClient.partitionManager.hasPartition({ + collection_name: "example_collection", + partition_name: "example_partition", +}); +``` + + +## Drop a partition + +Remove a partition. + +
+The drop operation is irreversible. Dropping a partition deletes all data within it. +
+ + +{{fragments/multiple_code.md}} + +```python +>>> from pymilvus import Collection +>>> collection.drop_partition("example_partition") +``` + +```javascript +await milvusClient.partitionManager.dropPartition({ + collection_name: "example_collection", + partition_name: "example_partition", +}); +``` + +## What's next + +- Learn more basic operations of Milvus: + - [Insert data into Milvus](manage_data.md) + - [Build an index for vectors](manage_index.md) + - [Conduct a vector search](search.md) + - [Conduct a hybrid search](hybridsearch.md) +- Explore API references for Milvus SDKs: + - [PyMilvus API reference](/api-reference/pymilvus/v{{var.milvus_python_sdk_version}}/tutorial.html) + - [Node.js API reference](/api-reference/node/v{{var.milvus_node_sdk_version}}/tutorial.html) + diff --git a/site/en/userGuide/query.md b/site/en/userGuide/query.md deleted file mode 100644 index 57c259894..000000000 --- a/site/en/userGuide/query.md +++ /dev/null @@ -1,289 +0,0 @@ ---- -id: query.md -related_key: query vectors -summary: Learn how to query vectors in Milvus. ---- - -# Query - -This topic describes how to conduct a query. - -In addition to vectors, Milvus supports data types such as boolean, integers, floating-point numbers, and more. - -A query is a search on all existing data. In Milvus, you can run a query which will return all the results that meet your specified requirements. Use [boolean expression](boolean.md) to specify the requirements. - -
-Parameters marked with * are specific to Python SDK, and those marked with ** are specific to Node.js SDK. -
- -1. Connect to the Milvus server: - -{{fragments/multiple_code.md}} - -```python ->>> from pymilvus import connections ->>> connections.connect("default", host='localhost', port='19530') -``` - -```javascript -import { MilvusClient } from "@zilliz/milvus2-sdk-node"; -const milvusClient = new MilvusClient("localhost:19530"); -``` - -
- Detailed Description - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Parameter - DescriptionNote
alias*Alias for the Milvus serverData type: String
Mandatory
host*IP address of the Milvus serverMandatory
port*Port of the Milvus serverMandatory
address**Address of the Milvus server."server_IP:server_port"
Mandatory
-
- -2. Prepare collection parameters and create a collection: - -{{fragments/multiple_code.md}} - -```python ->>> from pymilvus import Collection, FieldSchema, CollectionSchema, DataType ->>> collection_name = "test_collection_search" ->>> schema = CollectionSchema([ -... FieldSchema("film_id", DataType.INT64, is_primary=True), -... FieldSchema("film_date", DataType.INT64), -... FieldSchema("films", dtype=DataType.FLOAT_VECTOR, dim=2) -... ]) ->>> collection = Collection(collection_name, schema, using='default', shards_num=2) -``` - -```javascript -const COLLECTION_NAME = "example_collection"; -const FIELD_NAME = "example_field"; - -const params = { - collection_name: COLLECTION_NAME, - fields: [ - { - name: "films", - description: "vector field", - data_type: DataType.FloatVector, - - type_params: { - dim: "8", - }, - }, - { - name: "film_id", - data_type: DataType.Int64, - autoID: false, - is_primary_key: true, - description: "", - }, - ], -}; - -await milvusClient.collectionManager.createCollection(params); -``` - -
- Detailed Description - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Parameter - DescriptionNote
collection_nameName of the collection to createData type: String
field_nameName of the field in the collectionData type: String
SchemaSchema used to create a collection and the fields within. Refer to field schema and collection schema for detailed description.  
descriptionDescription of the collectionData type: String
using*By specifying the srever alias here, you can decide in which Milvus server you create a collection.Optional
shards_num*Number of the shards for the collection to createOptional
-
- -3. Insert random vectors to the newly created collection: - -{{fragments/multiple_code.md}} - -```python ->>> import random ->>> data = [ -... [i for i in range(10)], -... [1990 + i for i in range(10)], -... [[random.random() for _ in range(2)] for _ in range(10)], -... ] ->>> collection.insert(data) ->>> collection.num_entities -10 -``` - -```javascript -let id = 1; -const entities = Array.from({ length: 10 }, () => ({ - films: Array.from({ length: 2 }, () => Math.random() * 10), - film_id: id++, -})); - -await milvusClient.dataManager.insert({{ - collection_name: COLLECTION_NAME, - fields_data: entities, -}); -``` - -
- Detailed Description - - - - - - - - - - - - - - - - - - - - - - - - -
Parameter - DescriptionNote
dataData to insert into MilvusMandatory
partition_nameName of the partition to insert data intoOptional
timeout*Timeout (in seconds) to allow for RPC. Clients wait until server responds or error occurs when it is set to None.Optional
-
- -4. Load the collection to memory and run a query: - -{{fragments/multiple_code.md}} - -```python ->>> collection.load() ->>> expr = "film_id in [2,4,6,8]" ->>> output_fields = ["film_id", "film_date"] ->>> res = collection.query(expr, output_fields) -``` - -```javascript -await milvusClient.collectionManager.loadCollection({ - collection_name: COLLECTION_NAME, -}); - -await milvusClient.dataManager.query({ - collection_name: COLLECTION_NAME, - expr: "film_id in [2,4,6,8]", - output_fields: ["film_id"], -}); -``` - -
- Detailed Description - - - - - - - - - - - - - - - - - - - - - - - - -
Parameter - DescriptionNote
collection_name**Name of the collection to load and queryMandatory
exprBoolean expression used to filter attributeFind more expression details in Boolean Expression Rules.
Optional
output_fieldsName of the field to return (vector field not support in current release)Mandatory
-
- -
-In current release, data to be load must be under 70% of the total memory resources of all query nodes to reserve memory resources for execution engine. -
- -5. Check the returned results: - -{{fragments/multiple_code.md}} - -```python ->>> sorted_res = sorted(res, key=lambda k: k['film_id']) ->>> sorted_res -[{'film_id': 2, 'film_date': 1992}, - {'film_id': 4, 'film_date': 1994}, - {'film_id': 6, 'film_date': 1996}, - {'film_id': 8, 'film_date': 1998}] -``` - -```javascript -// query result -[{ film_id: "2" }, { film_id: "4" }, { film_id: "6" }, { film_id: "8" }]; -``` diff --git a/site/en/userGuide/search/expression.md b/site/en/userGuide/search/expression.md deleted file mode 100644 index a53eb5436..000000000 --- a/site/en/userGuide/search/expression.md +++ /dev/null @@ -1,72 +0,0 @@ ---- -id: expression.md -summary: learn about predicate expression rules in Milvus. ---- - -# Predicate Expressions -A predicate is an expression outputs a boolean value. Milvus conducts scalar filtering by searching with predicates. A predicate expression, when evaluated, returns either TRUE or FALSE. -View [Python SDK API Reference](/api-reference/pymilvus/{{var.milvus_python_sdk_version}}/api/collection.html) for instruction on using predicate expressions. -## Predicate Syntax -An expression can be either NONE or a logical expression. - -``` -Expr := LogicalExpr | NIL -``` - -### Types of predicate operators - -| Operator | Description | Examples | -| ---------- | ------------------------------------------------------------ | ------------------------------------------------------------ | -| Relational operators | Relational operators use symbols to check for equality, inequality, or relative order between two expressions. Relational operators include `>`, `>=`, `<`, `<=`, `==`, and `!=` | | -| Logical operators | Logical operators perform a comparison between two expressions. The supported logical operators are: AND, && OR, ||, and NOT | | -| IN operator | The IN condition is satisfied when the expression to the left of the keyword IN is included in the list of items | | - -## Relational Operators - -Relational operators are symbols that compare one expression with another expression. Data types between left and right side of the operator must match. -The supported operators are: -- equals(==) -- not equals(!=) -- is greater than (>) -- is greater than or equal to (>=) -- is less than (<) -- is less than or equal to (<=) - -## Logical Operators -There are two types of logical operators, unary and binray. UnaryLogicalOp acts on only one logical expression, while BinaryLogicalOp compares one logic expression with another logic expression. -The supported operators are: -- NOT ! -- AND && -- OR || - -### Syntax - -``` -LogicalExpr := LogicalExpr BinaryLogicalOp LogicalExpr - | UnaryLogicalOp LogicalExpr - | "(" LogicalExpr ")" - | RelationalExpr - | InExpr -``` - -## IN Operator - -The IN operator matches values in a field to any of the items in the constant array, which must be a comma-separated list of items. Data types between left and right side of the operator must match. - -### Syntax - -``` -InExpr := IDENTIFIER "in" ConstantArray -ConstantArray := "[" Constant+, "]" -``` - -## Order of Evaluation - -The order in which the Milvus evaluates predicate expressions follows the table below: -1. Expressions inside parentheses -2. Not operators -3. Or operators -4. And Operators - - -Learn more about [Boolean Expression Rules](boolean.md). diff --git a/site/en/userGuide/search/hybridsearch.md b/site/en/userGuide/search/hybridsearch.md index 4b1c54cf9..2efd0d034 100644 --- a/site/en/userGuide/search/hybridsearch.md +++ b/site/en/userGuide/search/hybridsearch.md @@ -8,315 +8,249 @@ summary: Conduct a Hybrid Search with Milvus. This topic describes how to conduct a hybrid search. -In addition to vectors, Milvus supports data types such as boolean, integers, floating-point numbers, and more. A collection in Milvus can hold multiple fields for accommodating different data features or properties. Milvus is a flexible vector database that pairs scalar filtering with powerful vector similarity search. +A hybrid search is essentially a vector search with boolean filtering. By specifying [boolean expressions](boolean.md) that filter the scalar fields or the primary key field, you can limit your search with certain conditions. -
-Parameters marked with * are specific to Python SDK, and those marked with ** are specific to Node.js SDK. -
- -A hybrid search is a vector similarity search, during which you can filter the scalar data by specifying a [boolean expression](boolean.md). - -1. Connect to the Milvus server: - -{{fragments/multiple_code.md}} - -```python -from pymilvus import connections -connections.connect("default", host='localhost', port='19530') -``` +The following example shows how to perform a hybrid search on the basis of a regular [vector search](search.md). Suppose you want to search for certain books based on their vectorized introductions, but you only want those within a specific range of word count. You can then specify the boolean expression to filter the `word_count` field in the search parameters. Milvus will search for similar vectors only among those entities that match the expression. -```javascript -import { MilvusClient } from "@zilliz/milvus2-sdk-node"; -const milvusClient = new MilvusClient("localhost:19530"); -``` - -
- Detailed Description - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Parameter - DescriptionNote
alias*Alias for the Milvus serverData type: String
Mandatory
host*IP address of the Milvus serverMandatory
port*Port of the Milvus serverMandatory
address**Address of the Milvus server"server_IP:server_port"
Mandatory
-
+## Preparations -2. Prepare collection parameters and create a collection: +The following example code demonstrates the steps prior to a search. -{{fragments/multiple_code.md}} +If you work with your own dataset in an existing Milvus instance, you can move forward to the next step. ```python ->>> from pymilvus import Collection, FieldSchema, CollectionSchema, DataType ->>> collection_name = "test_collection_search" +>>> from pymilvus import connections, Collection, FieldSchema, CollectionSchema, DataType +>>> connections.connect("default", host='localhost', port='19530') >>> schema = CollectionSchema([ -... FieldSchema("film_id", DataType.INT64, is_primary=True), -... FieldSchema("films", dtype=DataType.FLOAT_VECTOR, dim=2) -... ]) ->>> collection = Collection(collection_name, schema, using='default', shards_num=2) + FieldSchema("book_id", DataType.INT64, is_primary=True), + FieldSchema("word_count", DataType.INT64), + FieldSchema("book_intro", dtype=DataType.FLOAT_VECTOR, dim=2) + ]) +>>> collection = Collection("test_book_search", schema, using='default', shards_num=2) +>>> import random +>>> data = [ + [i for i in range(2000)], + [i for i in range(10000, 12000)], + [[random.random() for _ in range(2)] for _ in range(2000)], + ] +>>> collection.insert(data) +>>> index_params = { + "metric_type":"L2", + "index_type":"IVF_FLAT", + "params":{"nlist":1024} + } +>>> collection.create_index("book_intro", index_params=index_params) ``` ```javascript -const COLLECTION_NAME = "test_collection_search"; -milvusClient.collectionManager.createCollection({ - collection_name: COLLECTION_NAME, +const { MilvusClient } =require("@zilliz/milvus2-sdk-node"); +const milvusClient = new MilvusClient("localhost:19530"); +const params = { + collection_name: "test_book_search", fields: [ { - name: "films", - description: "vector field", - data_type: DataType.FloatVector, + name: "book_intro", + description: "", + data_type: 101, // DataType.FloatVector type_params: { dim: "2", }, }, - { - name: "film_id", - data_type: DataType.Int64, - autoID: false, + { + name: "book_id", + data_type: 5, // DataType.Int64 is_primary_key: true, description: "", }, + { + name: "word_count", + data_type: 5, //DataType.Int64 + description: "", + }, ], +}; +await milvusClient.collectionManager.createCollection(params); +const entities = Array.from({ length: 2000 }, (v,k) => ({ + "book_intro": Array.from({ length: 2 }, () => Math.random()), + "book_id": k, + "word_count": k+10000, +})); +await milvusClient.dataManager.insert({ + collection_name: "test_book_search", + fields_data: entities, +}); +const index_params = { + metric_type: "L2", + index_type: "IVF_FLAT", + params: JSON.stringify({ nlist: 1024 }), +}; +await milvusClient.indexManager.createIndex({ + collection_name: "test_book_search", + field_name: "book_intro", + extra_params: index_params, }); ``` -
- Detailed Description - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Parameter - DescriptionNote
collection_nameName of the collection to createData type: String
field_nameName of the field in the collectionData type: String
SchemaSchema used to create a collection and the fields within. Refer to field schema and collection schema for detailed description 
descriptionDescription of the collectionData type: String
using*By specifying the srever alias here, you can decide in which Milvus server you create a collectionOptional
shards_num*Number of the shards for the collection to createOptional
-
+## Load collection -3. Insert random vectors to the newly created collection: +All CRUD operations within Milvus are executed in memory. Load the collection to memory before conducting a vector query. {{fragments/multiple_code.md}} ```python ->>> import random ->>> data = [ -... [i for i in range(10)], -... [[random.random() for _ in range(2)] for _ in range(10)], -... ] ->>> collection.insert(data) ->>> collection.num_entities -10 +>>> from pymilvus import Collection +>>> collection = Collection("test_book_search") # Get an existing collection. +>>> collection.load() ``` ```javascript -let id = 1; -const entities = Array.from({ length: 10 }, () => ({ - films: Array.from({ length: 2 }, () => Math.random() * 10), - film_id: id++, -})); - -await milvusClient.collectionManager.insert({ - collection_name: COLLECTION_NAME, - fields_data: entities, +await milvusClient.collectionManager.loadCollection({ + collection_name: "test_book_search", }); ``` -
- Detailed Description - - - - - - - - - - - - - - - - - - - - - - - - -
Parameter - DescriptionNote
dataData to insert into MilvusMandatory
partition_nameName of the partition to insert data intoOptional
timeout*Timeout (in seconds) to allow for RPC. Clients wait until server responds or error occurs when it is set to NoneOptional
-
-4. Load the collection to memory and conduct a vector similarity search: +
+In current release, volume of the data to load must be under 70% of the total memory resources of all query nodes to reserve memory resources for execution engine. +
+ +## Conduct a hybrid vector search + +By specifying the boolean expression, you can filter the scalar field of the entities during the vector search. The following example limits the scale of search to the vectors within a specified `word_count` value range. {{fragments/multiple_code.md}} ```python ->>> collection.load() >>> search_param = { -... "data": [[1.0, 1.0]], -... "anns_field": "films", -... "param": {"metric_type": "L2"}, +... "data": [[0.1, 0.2]], +... "anns_field": "book_intro", +... "param": {"metric_type": "L2", "params": {"nprobe": 10}}, ... "limit": 2, -... "expr": "film_id in [2,4,6,8]", +... "expr": "word_count <= 11000", ... } >>> res = collection.search(**search_param) ``` ```javascript -await milvusClient.collectionManager.loadCollection({ - collection_name: COLLECTION_NAME, -}); -await milvusClient.dataManager.search({ - collection_name: COLLECTION_NAME, - // partition_names: [], - expr: "film_id in [1,4,6,8]", - vectors: [entities[0].films], +const results = await milvusClient.dataManager.search({ + collection_name: "test_book_search", + expr: "word_count <= 11000", + vectors: [[0.1, 0.2]], search_params: { - anns_field: "films", - topk: "4", + anns_field: "book_intro", + topk: "2", metric_type: "L2", params: JSON.stringify({ nprobe: 10 }), }, - vector_type: 100, // float vector -> 100 + vector_type: 101, // DataType.FloatVector, }); ``` -
- Detailed Description - +
- - - - - - - - - - + + - - - + + + + + + - - - - - - - + + - - - + + - - - + + - - - + + - - - + + - - - + +
Parameter + Parameter DescriptionNote
collection_name**Name of the collection to load and searchMandatory
vectorsVectors to search with. Length of the data represents the number of query nq.MandatorydataVectors to search with.
anns_fieldName of the field to search onMandatoryanns_fieldName of the field to search on.
paramsSearch parameter(s) specific to the index. See Index Selection for more information.
params*Search parameter(s) specific to the indexFind more parameter details of different indexes in Index Selection.
Mandatory
limit*Number of the most similar results to returnMandatorylimitNumber of the most similar results to return.
exprBoolean expression used to filter attributeFind more expression details in Boolean Expression Rules.
Optional
exprBoolean expression used to filter attribute. See Boolean Expression Rules for more information.
partition_namesName of the partition to search onOptionalpartition_names (optional)List of names of the partition to search in.
output_fieldsName of the field to return (vector field not support in current release)Optionaloutput_fields (optional)Name of the field to return. Vector field is not supported in current release.
timeout*Timeout (in seconds) to allow for RPC. Clients wait until server responds or error occurs when it is set to NoneOptionaltimeout (optional)A duration of time in seconds to allow for RPC. Clients wait until server responds or error occurs when it is set to None.
vector_type**Pre-check of binary/float vectors. 100 for binary vectors and 101 for float vectorsMandatoryround_decimal (optional)Number of decimal places of returned distance.
-
-
-In current release, data to be load must be under 70% of the total memory resources of all query nodes to reserve memory resources for execution engine. -
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ParameterDescription
collection_nameName of the collection to search in.
search_paramsParameters (as an object) used for search.
vectorsVectors to search with.
vector_typePre-check of binary or float vectors. 100 for binary vectors and 101 for float vectors.
partition_names (optional)List of names of the partition to search in.
expr (optional)Boolean expression used to filter attribute. See Boolean Expression Rules for more information.
output_fields (optional)Name of the field to return. Vector field not support in current release.
-5. Check the returned results: +Check the returned results: {{fragments/multiple_code.md}} ```python ->>> assert len(res) == 1>>> hits = res[0]>>> assert len(hits) == 2>>> print(f"- Total hits: {len(hits)}, hits ids: {hits.ids} ")- Total hits: 2, hits ids: [2, 4]>>> print(f"- Top1 hit id: {hits[0].id}, distance: {hits[0].distance}, score: {hits[0].score} ")- Top1 hit id: 2, distance: 0.10143111646175385, score: 0.101431116461 +>>> assert len(res) == 1 +>>> hits = res[0] +>>> assert len(hits) == 2 +>>> print(f"- Total hits: {len(hits)}, hits ids: {hits.ids} ") +>>> print(f"- Top1 hit id: {hits[0].id}, distance: {hits[0].distance}, score: {hits[0].score} ") ``` ```javascript -// search result will be like:{ status: { error_code: 'Success', reason: '' }, results: [ { score: 0, id: '1' }, { score: 9.266796112060547, id: '4' }, { score: 28.263811111450195, id: '8' }, { score: 41.055686950683594, id: '6' } ]} +console.log(results.results) ``` +## What's next + +- Learn more basic operations of Milvus: + - [Search with Time Travel](timetravel.md) +- Explore API references for Milvus SDKs: + - [PyMilvus API reference](/api-reference/pymilvus/v{{var.milvus_python_sdk_version}}/tutorial.html) + - [Node.js API reference](/api-reference/node/v{{var.milvus_node_sdk_version}}/tutorial.html) \ No newline at end of file diff --git a/site/en/userGuide/search/query.md b/site/en/userGuide/search/query.md new file mode 100644 index 000000000..6cce3024c --- /dev/null +++ b/site/en/userGuide/search/query.md @@ -0,0 +1,207 @@ +--- +id: query.md +related_key: query vectors +summary: Learn how to query vectors in Milvus. +--- + +# Conduct a Vector Query + +This topic describes how to conduct a vector query. + +Unlike a vector similarity search, a vector query retrieves vectors via scalar filtering based on [boolean expression](boolean.md). Milvus supports many data types in the scalar fields and a variety of boolean expressions. The boolean expression filters on scalar fields or the primary key field, and it retrieves all results that match the filters. + +The following example shows how to perform a vector query on a 2000-row dataset of book ID (primary key), word count (scalar field), and book introduction (vector field), simulating the situation where you query for certain books based on their IDs. + +## Preparations + +The following example code demonstrates the steps prior to a query. + +If you work with your own dataset in an existing Milvus server, you can move forward to the next step. + +```python +>>> from pymilvus import connections, Collection, FieldSchema, CollectionSchema, DataType +>>> connections.connect("default", host='localhost', port='19530') +>>> schema = CollectionSchema([ + FieldSchema("book_id", DataType.INT64, is_primary=True), + FieldSchema("word_count", DataType.INT64), + FieldSchema("book_intro", dtype=DataType.FLOAT_VECTOR, dim=2) + ]) +>>> collection = Collection("test_book_search", schema, using='default', shards_num=2) +>>> import random +>>> data = [ + [i for i in range(2000)], + [i for i in range(10000, 12000)], + [[random.random() for _ in range(2)] for _ in range(2000)], + ] +>>> collection.insert(data) +>>> index_params = { + "metric_type":"L2", + "index_type":"IVF_FLAT", + "params":{"nlist":1024} + } +>>> collection.create_index("book_intro", index_params=index_params) +``` + +```javascript +const { MilvusClient } =require("@zilliz/milvus2-sdk-node"); +const milvusClient = new MilvusClient("localhost:19530"); +const params = { + collection_name: "test_book_search", + fields: [ + { + name: "book_intro", + description: "", + data_type: 101, // DataType.FloatVector + type_params: { + dim: "2", + }, + }, + { + name: "book_id", + data_type: 5, // DataType.Int64 + is_primary_key: true, + description: "", + }, + { + name: "word_count", + data_type: 5, //DataType.Int64 + description: "", + }, + ], +}; +await milvusClient.collectionManager.createCollection(params); +const entities = Array.from({ length: 2000 }, (v,k) => ({ + "book_intro": Array.from({ length: 2 }, () => Math.random()), + "book_id": k, + "word_count": k+10000, +})); +await milvusClient.dataManager.insert({ + collection_name: "test_book_search", + fields_data: entities, +}); +const index_params = { + metric_type: "L2", + index_type: "IVF_FLAT", + params: JSON.stringify({ nlist: 1024 }), +}; +await milvusClient.indexManager.createIndex({ + collection_name: "test_book_search", + field_name: "book_intro", + extra_params: index_params, +}); +``` + +## Load collection + +All CRUD operations within Milvus are executed in memory. Load the collection to memory before conducting a vector query. + +{{fragments/multiple_code.md}} + +```python +>>> from pymilvus import Collection +>>> collection = Collection("test_book_search") # Get an existing collection. +>>> collection.load() +``` + +```javascript +await milvusClient.collectionManager.loadCollection({ + collection_name: "test_book_search", +}); +``` + + +
+In current release, volume of the data to load must be under 70% of the total memory resources of all query nodes to reserve memory resources for execution engine. +
+ +## Conduct a vector query + +The following example filters the vectors with certain `book_id` values, and returns the `book_id` field and `book_intro` of the results. + +{{fragments/multiple_code.md}} + +```python +>>> res = collection.query(expr = "book_id in [2,4,6,8]", output_fields = ["book_id", "book_intro"]) +``` + +```javascript +const results = await milvusClient.dataManager.query({ + collection_name: "test_book_search", + expr: "book_id in [2,4,6,8]", + output_fields: ["book_id", "book_intro"], +}); +``` + + + + + + + + + + + + + + + + + + + + + + +
ParameterDescription
exprBoolean expression used to filter attribute. Find more expression details in Boolean Expression Rules.
output_fields (optional)List of names of the field to return.
partition_names (optional)List of names of the partitions to query on.
+ + + + + + + + + + + + + + + + + + + + + + + + + + + +
ParameterDescription
collection_nameName of the collection to query.
exprBoolean expression used to filter attribute. Find more expression details in Boolean Expression Rules.
output_fields (optional)List of names of the field to return.
partition_names (optional)List of names of the partitions to query on.
+ +Check the returned results. + +{{fragments/multiple_code.md}} + +```python +>>> sorted_res = sorted(res, key=lambda k: k['book_id']) +>>> sorted_res +``` + +```javascript +console.log(results.data) +``` + +## What's next + +- Learn more basic operations of Milvus: + - [Conduct a vector search](search.md) + - [Conduct a hybrid search](hybridsearch.md) + - [Search with Time Travel](timetravel.md) +- Explore API references for Milvus SDKs: + - [PyMilvus API reference](/api-reference/pymilvus/v{{var.milvus_python_sdk_version}}/tutorial.html) + - [Node.js API reference](/api-reference/node/v{{var.milvus_node_sdk_version}}/tutorial.html) diff --git a/site/en/userGuide/search/search.md b/site/en/userGuide/search/search.md index bf57a6426..be7dfa2e3 100644 --- a/site/en/userGuide/search/search.md +++ b/site/en/userGuide/search/search.md @@ -2,18 +2,121 @@ id: search.md related_key: search summary: Conduct a vector similarity search with Milvus. - --- # Conduct a Vector Similarity Search -This topic describes how to conduct a vector similarity search. +This topic describes how to search entities with Milvus. + +A vector similarity search in Milvus calculates the distance between query vector(s) and vectors in the collection with specified similarity metrics, and returns the most similar results. By specifying a [boolean expression](boolean.md) that filters the scalar field or the primary key field, you can perform a [hybrid search](hybridsearch.md) or even a search with [Time Travel](timetravel.md). + +The following example shows how to perform a vector similarity search on a 2000-row dataset of book ID (primary key), word count (scalar field), and book introduction (vector field), simulating the situation that you search for certain books based on their vectorized introductions. Milvus will return the most similar results according to the query vector and search parameters you have defined. + +## Preparations + +The following example code demonstrates the steps prior to a search. + +If you work with your own dataset in an existing Milvus instance, you can move forward to the next step. + +```python +>>> from pymilvus import connections, Collection, FieldSchema, CollectionSchema, DataType +>>> connections.connect("default", host='localhost', port='19530') +>>> schema = CollectionSchema([ + FieldSchema("book_id", DataType.INT64, is_primary=True), + FieldSchema("word_count", DataType.INT64), + FieldSchema("book_intro", dtype=DataType.FLOAT_VECTOR, dim=2) + ]) +>>> collection = Collection("test_book_search", schema, using='default', shards_num=2) +>>> import random +>>> data = [ + [i for i in range(2000)], + [i for i in range(10000, 12000)], + [[random.random() for _ in range(2)] for _ in range(2000)], + ] +>>> collection.insert(data) +>>> index_params = { + "metric_type":"L2", + "index_type":"IVF_FLAT", + "params":{"nlist":1024} + } +>>> collection.create_index("book_intro", index_params=index_params) +``` + +```javascript +const { MilvusClient } =require("@zilliz/milvus2-sdk-node"); +const milvusClient = new MilvusClient("localhost:19530"); +const params = { + collection_name: "test_book_search", + fields: [ + { + name: "book_intro", + description: "", + data_type: 101, // DataType.FloatVector + type_params: { + dim: "2", + }, + }, + { + name: "book_id", + data_type: 5, //DataType.Int64 + is_primary_key: true, + description: "", + }, + { + name: "word_count", + data_type: 5, //DataType.Int64 + description: "", + }, + ], +}; +await milvusClient.collectionManager.createCollection(params); +const entities = Array.from({ length: 2000 }, (v,k) => ({ + "book_intro": Array.from({ length: 2 }, () => Math.random()), + "book_id": k, + "word_count": k+10000, +})); +await milvusClient.dataManager.insert({ + collection_name: "test_book_search", + fields_data: entities, +}); +const index_params = { + metric_type: "L2", + index_type: "IVF_FLAT", + params: JSON.stringify({ nlist: 1024 }), +}; +await milvusClient.indexManager.createIndex({ + collection_name: "test_book_search", + field_name: "book_intro", + extra_params: index_params, +}); +``` + +## Load collection + +All CRUD operations within Milvus are executed in memory. Load the collection to memory before conducting a vector similarity search. + +{{fragments/multiple_code.md}} + +```python +>>> from pymilvus import Collection +>>> collection = Collection("test_book_search") # Get an existing collection. +>>> collection.load() +``` + +```javascript +await milvusClient.collectionManager.loadCollection({ + collection_name: "test_book_search", +}); +``` -
-Parameters marked with * are specific to Python SDK, and those marked with ** are specific to Node.js SDK. + +
+In current release, volume of the data to load must be under 70% of the total memory resources of all query nodes to reserve memory resources for execution engine.
-1. Create search parameters: +## Prepare search parameters + +Prepare the parameters that suit your search scenario. The following example defines that the search will calculate the distance with Euclidean distance, and retrieve vectors from ten closest clusters built by the IVF_FLAT index. {{fragments/multiple_code.md}} @@ -23,206 +126,181 @@ Parameters marked with * are specific to Python SDK, and those mark ```javascript const searchParams = { - anns_field: "example_field", - topk: "4", + anns_field: "book_intro", + topk: "10", metric_type: "L2", params: JSON.stringify({ nprobe: 10 }), }; ``` -
- Detailed Description - +
- - - - - - - - - + - - - - - - - - - - - - +
Parameter + Parameter DescriptionNote
metric_typeMetrics used to measure similarity of vectorsFind more options in Simlarity Metrics.
Mandatory
index_typeType of index used to accelerate the vector searchFind more options in Index Selection.
Mandatory
Metrics used to measure similarity of vectors. See Simlarity Metrics for more information.
paramsSearch parameter(s) specific to the indexFind more parameter details of different indexes in Index Selection.
Mandatory
anns_field**Name of the field to search onMandatory
topk**Number of the most similar results to returnMandatorySearch parameter(s) specific to the index. See Index Selection for more information.
-
-2. Load the collection to memory before conducting a vector similarity search: - -{{fragments/multiple_code.md}} - -```python ->>> collection.load() -``` - -```javascript -await milvusClient.collectionManager.loadCollection({ - collection_name: COLLECTION_NAME, -}); -``` - -
- Detailed Description - +
- - + + + + - - - + + + + + + + + + +
Parameter + Parameter DescriptionNote
anns_fieldName of the field to search on.
collection_name**Name of the collection to loadMandatorytopkNumber of the most similar results to return.
metric_typeMetrics used to measure similarity of vectors. See Simlarity Metrics for more information.
paramsSearch parameter(s) specific to the index. See Index Selection for more information.
-
-
-In current release, data to be load must be under 70% of the total memory resources of all query nodes to reserve memory resources for execution engine. -
-3. Search with newly created random vectors: +## Conduct a vector search -_Milvus returns the IDs of the most similar vectors and their distances._ +Search vectors with Milvus. To search in a specific [partition](manage_partition.md), specify the list of partition names. {{fragments/multiple_code.md}} ```python ->>> results = collection.search(vectors[:5], field_name, param=search_params, limit=10, expr=None) ->>> results[0].ids -[424363819726212428, 424363819726212436, ...] ->>> results[0].distances -[0.0, 1.0862197875976562, 1.1029295921325684, ...] +>>> results = collection.search(data=[[0.1, 0.2]], anns_field="book_intro", param=search_params, limit=10, expr=None) ``` ```javascript -await milvusClient.dataManager.search({ - collection_name: COLLECTION_NAME, - // partition_names: [], +const results = await milvusClient.dataManager.search({ + collection_name: "test_book_search", expr: "", - vectors: [[1, 2, 3, 4, 5, 6, 7, 8]], + vectors: [[0.1, 0.2]], search_params: searchParams, - vector_type: 100, // Float vector -> 100 + vector_type: 101, // DataType.FloatVector }); ``` -
- Detailed Description - +
- - - - - - - - - - + + - - + - - - - + + + - - - + + - - + - - - + + - - - + + + + + + + + + + + + +
Parameter + Parameter DescriptionNote
collection_name**Name of the collection to searchMandatory
vectorsVectors to search with. Length of the data represents the number of query nq.MandatorydataVectors to search with.
anns_fieldName of the field to search onMandatoryName of the field to search on.
params*Search parameter(s) specific to the indexFind more parameter details of different indexes in Index Selection.
Mandatory
paramsSearch parameter(s) specific to the index. See Index Selection for more information.
limit*Number of the most similar results to returnMandatorylimitNumber of the most similar results to return.
exprBoolean expression used to filter attributeFind more expression details in Boolean Expression Rules.
Optional
Boolean expression used to filter attribute. See Boolean Expression Rules for more information.
partition_namesName of the partition to search onOptionalpartition_names (optional)List of names of the partition to search in.
output_fieldsName of the field to return (vector field not support in current release)Optionaloutput_fields (optional)Name of the field to return. Vector field is not supported in current release.
timeout (optional)A duration of time in seconds to allow for RPC. Clients wait until server responds or error occurs when it is set to None.
round_decimal (optional)Number of decimal places of returned distance.
+ + + + + + + + + + + + + + + + + + + + - - - + + - - - + + + + + + - - - + +
ParameterDescription
collection_nameName of the collection to search in.
search_paramsParameters (as an object) used for search.
vectorsVectors to search with.
timeoutTimeout (in seconds) to allow for RPC. Clients wait until server responds or error occurs when it is set to None.Optionalvector_typePre-check of binary or float vectors. 100 for binary vectors and 101 for float vectors.
vector_type**Pre-check of binary/float vectors. 100 for binary vectors and 101 for float vectors.Mandatorypartition_names (optional)List of names of the partition to search in.
expr (optional)Boolean expression used to filter attribute. See Boolean Expression Rules for more information.
round_decimal**Number of decimal places of returned distanceData type: Integer
Optional
output_fields (optional)Name of the field to return. Vector field is not supported in current release.
-
-To search in a specific partition or field, specify the name of the partition and field. + +Check the primary key values of the most similar vectors and their distances. {{fragments/multiple_code.md}} ```python ->>> collection.search(vectors[:5], field_name, param=search_params, limit=10, expr=None, partition_names=[partition_name]) +>>> results[0].ids +>>> results[0].distances ``` ```javascript -await milvusClient.dataManager.search({ - collection_name: COLLECTION_NAME, - partition_names: [partition_name], - expr: "", - vectors: [[1, 2, 3, 4, 5, 6, 7, 8]], - search_params: searchParams, - vector_type: 100, // Float vector -> 100 -}); +console.log(results.results) ``` -4. Release the collections loaded in Milvus to reduce memory consumption when the search is completed: +Release the collection loaded in Milvus to reduce memory consumption when the search is completed. {{fragments/multiple_code.md}} @@ -231,26 +309,15 @@ await milvusClient.dataManager.search({ ``` ```javascript -await milvusClient.collectionManager.releaseCollection({ collection_name: COLLECTION_NAME,}); +await milvusClient.collectionManager.releaseCollection({ collection_name: "test_book_search",}); ``` -
- Detailed Description - - - - - - - - - - - - - - -
Parameter - DescriptionNote
collection_name**Name of the collection to releaseMandatory
-
+## What's next +- Learn more basic operations of Milvus: + - [Query vectors](query.md) + - [Conduct a hybrid search](hybridsearch.md) + - [Search with Time Travel](timetravel.md) +- Explore API references for Milvus SDKs: + - [PyMilvus API reference](/api-reference/pymilvus/v{{var.milvus_python_sdk_version}}/tutorial.html) + - [Node.js API reference](/api-reference/node/v{{var.milvus_node_sdk_version}}/tutorial.html) \ No newline at end of file diff --git a/site/en/userGuide/search/timetravel.md b/site/en/userGuide/search/timetravel.md new file mode 100644 index 000000000..794eca925 --- /dev/null +++ b/site/en/userGuide/search/timetravel.md @@ -0,0 +1,117 @@ +--- +id: timetravel.md +related_key: Time Travel +summary: Learn how to search with Time Travel in Milvus. +--- + +# Search with Time Travel + +This topic describes how to use the Time Travel feature during vector search. + +Milvus maintains a timeline for all data insert and delete operations. It allows users to specify a timestamp in a search to retrieve a data view at a specified point in time, without spending tremendously on maintanence for data rollback. + +## Connect to the Milvus server + +```python +from pymilvus import connections +connections.connect("default", host='localhost', port='19530') +``` + +## Prepare parameters and create a collection + +```python +>>> from pymilvus import Collection, FieldSchema, CollectionSchema, DataType +>>> collection_name = "test_time_travel" +>>> schema = CollectionSchema([ +... FieldSchema("pk", DataType.INT64, is_primary=True), +... FieldSchema("example_field", dtype=DataType.FLOAT_VECTOR, dim=2) +... ]) +>>> collection = Collection(collection_name, schema) +``` + +## Insert first batch of data + +Insert random data to simulate the original data. + +```python +>>> import random +>>> data = [ +... [i for i in range(10)], +... [[random.random() for _ in range(2)] for _ in range(10)], +... ] +>>> batch1 = collection.insert(data) +``` + +## Check the timestamp of the first data batch + +Check the timepstamp of the first data batch for search with Time Travel. Data inserted within the same batch share an identical timestamp. + +```python +>>> batch1.timestamp +``` + +```python +428828271234252802 +``` + +
+ Milvus adopts a combination of physical clock and logic counter as a hybrid timestamp. The 64-bit timestamp consists of a 46-bit physical part (high-order bits) and an 18-bit logic part (low-order bits). The physical part is the number of milliseconds that have elapsed since January 1, 1970 (midnight UTC/GMT). +
+ + + +## Insert second batch of data + +Insert the second batch of data to simulate the dirty data, among which a piece of data with primary key value `19` and vector value `[1.0,1.0]` is appended as the target data to search with in the following step. + +```python +>>> data = [ +... [i for i in range(10, 20)], +... [[random.random() for _ in range(2)] for _ in range(9)], +... ] +>>> data[1].append([1.0,1.0]) +>>> batch2 = collection.insert(data) +``` + +## Search with Time Travel + +Load the collection and search the target data with the timestamp of the first data batch. With the timestamp specified, Milvus only retrieves the data view at the point of time the timestamp indicates. + +```python +>>> collection.load() +>>> search_param = { +... "data": [[1.0, 1.0]], +... "anns_field": "example_field", +... "param": {"metric_type": "L2"}, +... "limit": 10, +... "travel_timestamp": batch1.timestamp, +... } +>>> res = collection.search(**search_param) +>>> res[0].ids +``` + +As shown below, the target data itself and other data inserted later are not returned as results. + +```python +[8, 7, 4, 2, 5, 6, 9, 3, 0, 1] +``` + +If you do not specify the timestamp or specify it with the timestamp of the second data batch, Milvus will return the results from both batches. + +```python +>>> batch2.timestamp +428828283406123011 +>>> search_param = { +... "data": [[1.0, 1.0]], +... "anns_field": "example_field", +... "param": {"metric_type": "L2"}, +... "limit": 10, +... "travel_timestamp": batch2.timestamp, +... } +>>> res = collection.search(**search_param) +>>> res[0].ids +[19, 10, 8, 7, 4, 17, 2, 5, 13, 15] +``` + + + diff --git a/site/zh-CN/Variables.json b/site/zh-CN/Variables.json index 93300b83b..2b59750b9 100644 --- a/site/zh-CN/Variables.json +++ b/site/zh-CN/Variables.json @@ -1,7 +1,7 @@ { "milvus_release_version": "2.0.0-RC8", "milvus_python_sdk_version": "2.0.0rc8", - "milvus_node_sdk_version": "1.0.18", + "milvus_node_sdk_version": "1.0.19", "cpu_milvus_docker_image_version": "2.0.0-rc8", "gpu_milvus_docker_image_version": "" } diff --git a/site/zh-CN/getstarted/cluster/install_cluster-milvusoperator.md b/site/zh-CN/getstarted/cluster/install_cluster-milvusoperator.md index 013ba38a9..f8cc37bea 100644 --- a/site/zh-CN/getstarted/cluster/install_cluster-milvusoperator.md +++ b/site/zh-CN/getstarted/cluster/install_cluster-milvusoperator.md @@ -473,7 +473,7 @@ my-release-pulsar-zookeeper-1 1/1 Running 0 1 my-release-pulsar-zookeeper-2 1/1 Running 0 13m ``` -When the Milvus cluster is installed, you can learn how to [Connect to Milvus server](connect.md) +When the Milvus cluster is installed, you can learn how to [Connect to Milvus server](manage_connection.md) ## Uninstall the Milvus cluster @@ -504,7 +504,7 @@ If you use kind to install the K8s cluster, run $ kind delete cluster --name myk Having installed Milvus, you can: - Check [Hello Milvus](example_code.md) to run an example code with different SDKs to see what Milvus can do. - Learn the basic operations of Milvus: - - [Connect to Milvus server](connect.md) + - [Connect to Milvus server](manage_connection.md) - [Conduct a vector search](search.md) - [Conduct a hybrid search](hybridsearch.md) - [Upgrade Milvus Using Helm Chart](upgrade.md). diff --git a/site/zh-CN/getstarted/install-node.md b/site/zh-CN/getstarted/install-node.md index 13e82497b..67a6b26d8 100644 --- a/site/zh-CN/getstarted/install-node.md +++ b/site/zh-CN/getstarted/install-node.md @@ -32,7 +32,7 @@ $ npm install @zilliz/milvus2-sdk-node Having installed Milvus Node.js SDK, You can: - Learn the basic operations of Milvus: - - [Connect to Milvus server](connect.md) + - [Connect to Milvus server](manage_connection.md) - [Conduct a vector search](search.md) - [Conduct a hybrid search](hybridsearch.md) diff --git a/site/zh-CN/getstarted/install-pymilvus.md b/site/zh-CN/getstarted/install-pymilvus.md index c05d094cc..965a562f5 100644 --- a/site/zh-CN/getstarted/install-pymilvus.md +++ b/site/zh-CN/getstarted/install-pymilvus.md @@ -46,7 +46,7 @@ $ python -c "from pymilvus import Collection" Having installed PyMilvus, You can: - Learn the basic operations of Milvus: - - [Connect to Milvus server](connect.md) + - [Connect to Milvus server](manage_connection.md) - [Conduct a vector search](search.md) - [Conduct a hybrid search](hybridsearch.md) diff --git a/site/zh-CN/home/home.json b/site/zh-CN/home/home.json index 96e9bfded..fd6401e4f 100644 --- a/site/zh-CN/home/home.json +++ b/site/zh-CN/home/home.json @@ -28,7 +28,7 @@ "order": -1, "title": "欢迎阅读 Milvus 文档! ", "desc": [ - "在 Milvus 文档页,你可以了解 什么是 Milvus,如何 安装, 使用, and 部署 Milvus,以及根据场景需求使用 Milvus 搭建应用系统的 教程。 你还可以在这里找到常见问题 FAQsAPI 参考。", + "在 Milvus 文档页,你可以了解 什么是 Milvus,如何 安装, 使用, and 部署 Milvus,以及根据场景需求使用 Milvus 搭建应用系统的 教程。 你还可以在这里找到常见问题 FAQsAPI 参考。", "你可以先浏览推荐阅读下的内容,或者在左上角的搜索框内输入关键字寻找所需文档。如未能找到所需信息,你可以向页面右下角的 MilMil 提问,或者通过任意文档页右上角的选项反馈文档问题。" ] }, @@ -37,16 +37,16 @@ "items": [{ "label": "基本操作", "list": [{ - "text": "创建 Collection、Partition", - "link": "create.md" + "text": "创建 Collection", + "link": "manage_collection.md" }, { "text": "插入数据", - "link": "insert.md" + "link": "manage_data.md" }, { "text": "创建索引", - "link": "build.md" + "link": "manage_index.md" }, { "text": "向量搜索", diff --git a/site/zh-CN/home/home.md b/site/zh-CN/home/home.md index caac8b693..647639288 100644 --- a/site/zh-CN/home/home.md +++ b/site/zh-CN/home/home.md @@ -4,7 +4,8 @@ id: home.md # 欢迎阅读 Milvus 文档! -在 Milvus 文档页,你可以了解 [什么是 Milvus](overview.md),如何 [安装](install_standalone-docker.md)、[使用](connect.md)、[部署](aws.md) Milvus,以及 [根据场景需求使用 Milvus 搭建应用系统的教程](image_similarity_search.md)。 你还可以在这里找到常见问题 [FAQs](performance_faq.md) 及 [API 参考](https://milvus.io/api-reference/pymilvus/v2.0.0rc8/api/collection.html)。 +在 Milvus 文档页,你可以了解 [什么是 Milvus](overview.md),如何 [安装](install_standalone-docker.md)、[使用](manage_connection.md)、[部署](aws.md) Milvus,以及 [根据场景需求使用 Milvus 搭建应用系统的教程](image_similarity_search.md)。 你还可以在这里找到常见问题 [FAQs](performance_faq.md) 及 [API 参考](https://milvus.io/api-reference/pymilvus/v2.0.0rc8/api/collection.html)。 + 你可以先浏览推荐阅读下的内容,或者在左上角的搜索框内输入关键字寻找所需文档。如未能找到所需信息,你可以向页面右下角的 _MilMil_![MilMil](../../../assets/icon_bird.svg) 提问,或者通过任意文档页右上角的选项反馈文档问题。 @@ -45,9 +46,9 @@ id: home.md

基本操作

-- [创建 Collection、Partition](create.md) -- [插入数据](insert.md) -- [创建索引](build.md) +- [创建 Collection](manage_collection.md) +- [插入数据](manage_data.md) +- [创建索引](manage_index.md) - [向量搜索](search.) - [结构化匹配](query.md)
diff --git a/site/zh-CN/menuStructure/cn.json b/site/zh-CN/menuStructure/cn.json index 738b7fa39..91b55edff 100644 --- a/site/zh-CN/menuStructure/cn.json +++ b/site/zh-CN/menuStructure/cn.json @@ -151,60 +151,52 @@ "isMenu": true }, { - "id": "connect.md", - "title": "连接服务器", + "id": "manage_connection.md", + "title": "管理 Milvus 连接", "label1": "userguide", "label2": "", "label3": "", "order": 0 }, { - "id": "create.md", - "title": "创建 Collection、Partition", + "id": "manage_collection.md", + "title": "管理 Collection", "label1": "userguide", "label2": "", "label3": "", "order": 1 }, { - "id": "insert.md", - "title": "插入数据", + "id": "manage_partition.md", + "title": "管理 Partition", "label1": "userguide", "label2": "", "label3": "", "order": 2 }, { - "id": "build.md", - "title": "创建索引", + "id": "manage_data.md", + "title": "管理数据", "label1": "userguide", "label2": "", "label3": "", "order": 3 }, { - "id": "drop.md", - "title": "删除 Collection、Partition 或索引", + "id": "manage_index.md", + "title": "管理索引", "label1": "userguide", "label2": "", "label3": "", "order": 4 }, - { - "id": "delete.md", - "title": "删除向量", - "label1": "userguide", - "label2": "", - "label3": "", - "order": 5 - }, { "id": "search", "title": "搜索与结构化匹配", "label1": "userguide", "label2": "", "label3": "", - "order": 6, + "order": 5, "isMenu": true }, { @@ -229,6 +221,14 @@ "label1": "userguide", "label2": "search", "label3": "", + "order": 2 + }, + { + "id": "timetravel.md", + "title": "使用 Time Travel 搜索", + "label1": "userguide", + "label2": "search", + "label3": "", "order": 3 }, { diff --git a/site/zh-CN/reference/schema/collection_schema.md b/site/zh-CN/reference/schema/collection_schema.md index dd4e36633..43a19bb03 100644 --- a/site/zh-CN/reference/schema/collection_schema.md +++ b/site/zh-CN/reference/schema/collection_schema.md @@ -5,7 +5,7 @@ summary: 学习如何在 Milvus 中定义 collection schema。 # Collection Schema -Collection schema 是 collection 的逻辑定义。通常你需要在定义 collection schema 和 [创建 collection](create.md) 之前定义 [field schema](field_schema.md)。 +Collection schema 是 collection 的逻辑定义。通常你需要在定义 collection schema 和 [创建 collection](manage_collection.md) 之前定义 [field schema](field_schema.md)。 ## Collection schema 属性 diff --git a/site/zh-CN/reference/schema/field_schema.md b/site/zh-CN/reference/schema/field_schema.md index 62fb78205..98b8161a3 100644 --- a/site/zh-CN/reference/schema/field_schema.md +++ b/site/zh-CN/reference/schema/field_schema.md @@ -5,7 +5,7 @@ summary: Learn how to define a field schema in Milvus. # Field Schema -A field schema is the logical definition of a field. It is the first thing you need to define before defining a [collection schema](collection_schema.md) and [creating a collection](create.md). +A field schema is the logical definition of a field. It is the first thing you need to define before defining a [collection schema](collection_schema.md) and [creating a collection](manage_collection.md). Milvus 2.0 supports a primary key field, a scalar field, and a vector field in a collection. diff --git a/site/zh-CN/userGuide/build.md b/site/zh-CN/userGuide/build.md deleted file mode 100644 index fd0a99d3a..000000000 --- a/site/zh-CN/userGuide/build.md +++ /dev/null @@ -1,89 +0,0 @@ ---- -id: build.md ---- - -# 创建索引 - -为提高向量搜索的效率,你可以为 collection 中的某一列 Field 创建索引。具体索引参数设置详见[向量索引](index.md)。 - -1. 准备相关参数: - -{{fragments/multiple_code.md}} - -```python ->>> index_param = { - "metric_type":"L2", - "index_type":"IVF_FLAT", - "params":{"nlist":1024} - } -``` - -```javascript -const index_param = { - metric_type: "L2", - index_type: "IVF_FLAT", - params: JSON.stringify({ nlist: 1024 }), -}; -``` - -
- 详细资讯 - - - - - - - - - - - - - - - - - - - - - - - - -
参数 - 说明备注
metric_type用于评估向量相似性的计算方式可在 距离计算方式中查看其他选项。
必填项
index_type用于加速向量搜寻的索引类型可在选择索引中查看其他选项。
必填项
params建立索引的参数可在选择索引中查看不同索引的更多参数详细资讯。
必填项
-
- -2. 创建索引: - -{{fragments/multiple_code.md}} - -```python ->>> collection.create_index(field_name=field_name, index_params=index_param) -Status(code=0, message='') -``` - -```javascript -await milvusClient.indexManager.createIndex({ - collection_name: COLLECTION_NAME, - field_name: FIELD_NAME, - extra_params: index_param, -}); -``` - -3. 查看创建的索引相关信息: - -{{fragments/multiple_code.md}} - -```python ->>> collection.index().params -{'metric_type': 'L2', 'index_type': 'IVF_FLAT', 'params': {'nlist': 1024}} -``` - -```javascript -await milvusClient.indexManager.describeIndex({ - collection_name: COLLECTION_NAME, -}); -``` diff --git a/site/zh-CN/userGuide/connect.md b/site/zh-CN/userGuide/connect.md deleted file mode 100644 index 436f324cf..000000000 --- a/site/zh-CN/userGuide/connect.md +++ /dev/null @@ -1,92 +0,0 @@ ---- -id: connect.md ---- - -# 连接服务器 - -通过本章节文档,你将了解如何连接 Milvus 服务器。 - -如果你选择在 Python 交互式编程环境下学习基本操作,在命令行输入 `python3`。 - -## 连接 Milvus 服务器 - -{{fragments/multiple_code.md}} - -```python ->>> from pymilvus import connections ->>> connections.connect("default", host='localhost', port='19530') -``` - -```javascript -import { MilvusClient } from "@zilliz/milvus2-sdk-node"; -const milvusClient = new MilvusClient("localhost:19530"); -``` - -
- 详细资讯 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
参数 - 说明备注
alias*Milvus 服务器的名称数据类型: String
必填项
host*Milvus 服务器的 IP必填项
port*Milvus 服务器的端口必填项
address**Milvus 服务器的地址"server_IP:server_port"
必填项
-
- -## 断开与服务器的连接 - -使用完 Milvus 的服务之后,可以断开与 Milvus 服务器的连接以释放资源: - -{{fragments/multiple_code.md}} - -```python ->>> connections.disconnect("default") -``` - -```javascript -await milvusClient.closeConnection(); -``` - -
- 详细资讯 - - - - - - - - - - - - - - -
参数 - 说明备注
alias*Milvus 服务器的名称数据类型: String
必填项
-
diff --git a/site/zh-CN/userGuide/create.md b/site/zh-CN/userGuide/create.md deleted file mode 100644 index f3c281e3e..000000000 --- a/site/zh-CN/userGuide/create.md +++ /dev/null @@ -1,241 +0,0 @@ ---- -id: create.md ---- - -# 创建 collection 或 partition - -通过本章节文档,你将了解如何在 Milvus 中创建 collection 和 partition。 - -## 创建 collection - -连接 Milvus 服务器后,可通过以下步骤创建 collection。 - -> 创建 collection 必须包含一列主键字段,目前主键字段只支持 int64 类型。 - -1. 准备 collection 参数,包括 collection 名字、collection 字段参数等。具体参数详见 [API 文档](https://milvus.io/cn/api-reference/pymilvus/v2.0.0rc5/api/collection.html)。 - -{{fragments/multiple_code.md}} - -```python ->>> collection_name = "example_collection" ->>> field_name = "example_field" ->>> from pymilvus import Collection, CollectionSchema, FieldSchema, DataType ->>> pk = FieldSchema(name="pk", dtype=DataType.INT64, is_primary=True, auto_id=True) ->>> field = FieldSchema(name=field_name, dtype=DataType.FLOAT_VECTOR, dim=8) ->>> schema = CollectionSchema(fields=[pk,field], description="example collection") -``` - -```javascript -const COLLECTION_NAME = "example_collection"; -const FIELD_NAME = "example_field"; - -const params = { - collection_name: COLLECTION_NAME, - fields: [ - { - name: FIELD_NAME, - description: "vector field", - data_type: DataType.FloatVector, - - type_params: { - dim: "8", - }, - }, - { - name: "age", - data_type: DataType.Int64, - autoID: true, - is_primary_key: true, - description: "", - }, - ], -}; -``` - -
- 详细资讯 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
参数说明备注
collection_name要建立的 collection 名称数据类型: String
field_namecollection 中的 field 名称数据类型: String
Schema用于建立 collection 及其中的 field。详细说明请参考 field schema and collection schema 
descriptioncollection 的说明数据类型: String
-
- -2. 调用 Milvus 实例的 Collection() 方法创建 collection: - -{{fragments/multiple_code.md}} - -```python ->>> collection = Collection(name=collection_name, schema=schema, using='default', shards_num=2) - -# 根据 collection 名称获取指定 collection。 -collection=Collection(name=collection_name) -``` - -```javascript -await milvusClient.collectionManager.createCollection(params); -``` - -
- 详细资讯 - - - - - - - - - - - - - - - - - - - - -
参数说明备注
using*在此处标明服务器名称,以指定要建立 collection 的 Milvus 服务器。选填项
shards_num*指定 collection 要建立的 shards 数目选填项
-
- -3. 调用 `milvus.has_collection` 查看 collection 是否创建成功: - -{{fragments/multiple_code.md}} - -```python ->>> import pymilvus ->>> pymilvus.utility.get_connection().has_collection(collection_name) -True -``` - -```javascript -await milvusClient.collectionManager.hasCollection({ - collection_name: COLLECTION_NAME, -}); -``` - -4. 调用 `milvus.list_collections()` 查看所有创建成功的 collection: - -{{fragments/multiple_code.md}} - -```python ->>> pymilvus.utility.get_connection().list_collections() -['example_collection'] -``` - -```javascript -await milvusClient.collectionManager.showCollections(); -``` - -5. 查看 collection 相关数据,例如行数: - -{{fragments/multiple_code.md}} - -```python ->>> collection.num_entities -0 -``` - -```javascript -await milvusClient.collectionManager.getCollectionStatistics({ - collection_name: COLLECTION_NAME, -}); -``` - -## 创建 partition - -随着一个 collection 的数据增加,查询性能会逐渐下降。如果只需要查询一部分数据,可以考虑将数据进行分区(partitioning)。给 partition 加上 partition name 后,搜索时就只需要搜索一部分数据,从而能够提升搜索性能。 - -{{fragments/multiple_code.md}} - -```python ->>> partition_name = "example_partition" ->>> partition = collection.create_partition(partition_name) -``` - -```javascript -await milvusClient.partitionManager.createPartition({ - collection_name: COLLECTION_NAME, - partition_name: "example_partition", -}); -``` - -
- 详细资讯 - - - - - - - - - - - - - - - -
参数说明备注
partition_name要建立的 partition 名称数据类型: String
-
- -Milvus 会在创建 collection 时创建一个默认的 partition,name 为 `_default`。在创建新 partition 后,便有两个 partition——一个的 partition name 为 `example_partition`,另一个的为 `_default` 。我们可以调用 `list_partitions()` 的方法查看一个 collection 中的所有 partition。 - -{{fragments/multiple_code.md}} - -```python ->>> collection.partitions -[{"name": "_default", "description": "", "num_entities": 0}, {"name": "example_partition", "description": "", "num_entities": 0}] -``` - -```javascript -await milvusClient.partitionManager.showPartitions({ - collection_name: COLLECTION_NAME, -}); -``` - -调用 `has_partition()` 查看 partition 是否创建成功: - -{{fragments/multiple_code.md}} - -```python ->>> collection.has_partition(partition_name) -True -``` - -```javascript -await milvusClient.partitionManager.hasPartition({ - collection_name: COLLECTION_NAME, - partition_name: "example_partition", -}); -``` diff --git a/site/zh-CN/userGuide/delete.md b/site/zh-CN/userGuide/delete.md deleted file mode 100644 index cc2beccc8..000000000 --- a/site/zh-CN/userGuide/delete.md +++ /dev/null @@ -1,10 +0,0 @@ ---- -id: delete.md ---- -# 删除向量 - -
- -该功能正在开发中,将跟随 Milvus 2.0 稳定版一同发布。 - -
diff --git a/site/zh-CN/userGuide/drop.md b/site/zh-CN/userGuide/drop.md deleted file mode 100644 index d9f4ed402..000000000 --- a/site/zh-CN/userGuide/drop.md +++ /dev/null @@ -1,125 +0,0 @@ ---- -id: drop.md ---- - -# 删除操作 - -删除操作会影响已经插入 Milvus 系统的数据,请谨慎操作。 - -## 删除索引 - -调用 `drop_index()` 函数删除指定 collection 指定列的索引: - -
- 当前版本 Milvus 仅支持于向量 field 上创建或删除索引。未来版本 Milvus 将支持于标量 field 上创建或删除索引。 -
- -{{fragments/multiple_code.md}} - -```python ->>> collection.drop_index() -``` - -```javascript -await milvusClient.indexManager.dropIndex({ - collection_name: COLLECTION_NAME, -}); -``` - -
- 详细资讯 - - - - - - - - - - - - - - -
参数 - 说明备注
collection_name**要删除索引的 collection 名称必填项
-
- -## 删除 partition - -调用 `drop_partition()` 删除指定 partition 及其中的数据: - -{{fragments/multiple_code.md}} - -```python ->>> collection.drop_partition(partition_name=partition_name) -``` - -```javascript -await milvusClient.partitionManager.dropPartition({ - collection_name: COLLECTION_NAME, - partition_name: PARTITION_NAME, -}); -``` - -
- 详细资讯 - - - - - - - - - - - - - - - - - - - -
参数 - 说明备注
partition_name要删除的 partition 名称必填项
collection_name**要删除的 partition 所属 collection 名称必填项
-
- -## 删除 collection - -调用 `drop_collection()` 删除指定 collection: - -{{fragments/multiple_code.md}} - -```python ->>> collection.drop() -``` - -```javascript -await milvusClient.collectionManager.dropCollection({ - collection_name: COLLECTION_NAME, -}); -``` - -
- 详细资讯 - - - - - - - - - - - - - - -
参数 - 说明备注
collection_name**要删除的 collection 名称必填项
-
diff --git a/site/zh-CN/userGuide/insert.md b/site/zh-CN/userGuide/insert.md deleted file mode 100644 index b9dc2da75..000000000 --- a/site/zh-CN/userGuide/insert.md +++ /dev/null @@ -1,121 +0,0 @@ ---- -id: insert.md ---- - -# 在集合中插入数据 - -你可以通过以下步骤在指定 collection 的指定 partition 中插入数据。 - -1.随机生成待插入的数据: - -{{fragments/multiple_code.md}} - -```python ->>> import random ->>> vectors = [[random.random() for _ in range(8)] for _ in range(10)] ->>> entities = [vectors] -``` - -```javascript -const entities = Array.from({ length: 10 }, () => ({ - [FIELD_NAME]: Array.from({ length: 8 }, () => Math.floor(Math.random() * 10)), -})); -``` - -2. 调用以上函数将随机生成的数据插入新创建的 collection 中。Milvus 会为每条插入的数据自动生成 ID,类似于关系型数据库中的 AutoID。 - -_Milvus 将返回 `MutationResult`,其中包含插入数据对应的主键列 `primary_keys`。_ - -{{fragments/multiple_code.md}} - -```python ->>> mr = collection.insert(entities) -# 输出 `MutationResult` 的主键列 ->>> mr.primary_keys -[425790736918318406, 425790736918318407, 425790736918318408, ...] -``` - -```javascript -await milvusClient.dataManager.insert({{ - collection_name: COLLECTION_NAME, - fields_data: entities, -}); -``` - -
- 详细资讯 - - - - - - - - - - - - - - - - - - - - - - - - -
参数 - 说明备注
data要插入 Milvus 的数据必填项
collection_name**要将数据插入的 collection 名称必填项
partition_name要将数据插入的 partition 名称选填项
-
- -3. 调用 insert() 函数时指定 `partitiont_name` 可以将向量插入到指定的 Partition 中: - -{{fragments/multiple_code.md}} - -```python ->>> collection.insert(data=entities, partition_name=partition_name) -``` - -```javascript -await milvusClient.dataManager.insert({{ - collection_name: COLLECTION_NAME, - partition_name: partition_name - fields_data: entities, -}); -``` - -4. 插入的数据将存储在 Milvus 内存中。调用 `flush()` 函数将数据落盘: - -{{fragments/multiple_code.md}} - -```python ->>> pymilvus.utility.get_connection().flush([collection_name]) -``` - -```javascript -await milvusClient.dataManager.flush({ collection_names: [COLLECTION_NAME] }); -``` - -
- 详细资讯 - - - - - - - - - - - - - - -
参数 - 说明备注
collection_name要处理的 collection 名称必填项
-
diff --git a/site/zh-CN/userGuide/manage_collection.md b/site/zh-CN/userGuide/manage_collection.md new file mode 100644 index 000000000..92d42bb57 --- /dev/null +++ b/site/zh-CN/userGuide/manage_collection.md @@ -0,0 +1,271 @@ +--- +id: manage_collection.md +related_key: create collection +summary: Learn how to manage collections in Milvus. +--- + +# Manage Collections + +This topic describes how to manage collections in Milvus. + +A collection consists of one or more partitions. While creating a new collection, Milvus creates a default partition `_default`. See [Glossary - Collection](glossary.md#Collection) for more information. + +The following example is based on a two-shard collection named `example_collection` with an eight-dimension float vector field, and an INT64, `auto_id` enabled primary key field. + + +## Create a collection + +
+
    +
  • You can create collections only after connecting to Milvus server.
  • +
  • The collection to create must contain a primary key field. INT64 is the only supported data type for the primary key field in current release of Milvus.
  • +
+
+ + +First, prepare necessary parameters, including field schema, collection schema, and collection name. + +{{fragments/multiple_code.md}} + +```python +>>> from pymilvus import CollectionSchema, FieldSchema, DataType +>>> pk = FieldSchema(name="pk", dtype=DataType.INT64, is_primary=True, auto_id=True) +>>> field = FieldSchema(name="example_field", dtype=DataType.FLOAT_VECTOR, dim=8) +>>> schema = CollectionSchema(fields=[pk,field], description="example collection") +>>> collection_name = "example_collection" +``` + +```javascript +const params = { + collection_name: "example_collection", + description: "example collection", + fields: [ + { + name: "example_field", + description: "", + data_type: 101, // DataType.FloatVector + type_params: { + dim: "8", + }, + }, + { + name: "pk", + data_type: 5, // DataType.Int64 + autoID: true, + is_primary_key: true, + description: "", + }, + ], +}; +``` + + + + + + + + + + + + + + + + + + + + + + +
ParameterDescription
FieldSchemaSchema of the fields within the collection to create. Refer to Field Schema for more information.
CollectionSchemaSchema of the collection to create. Refer to Collection Schema for more information.
collection_nameName of the collection to create.
+ + + + + + + + + + + + + + + + + + + + + + + + + + + +
ParameterDescription
collection_nameName of the collection to create.
descriptionDescription of the collection to create.
fieldsSchema of the filed and the collection to create. Refer to Field Schema and Collection Schema for more information.
data_typeData type of the filed to create. Refer to data type reference number for more information.
+ +Then, create a collection with the parameters you created above. + +{{fragments/multiple_code.md}} + +```python +>>> from pymilvus import Collection +>>> collection = Collection(name=collection_name, schema=schema, using='default', shards_num=2) +``` + +```javascript +await milvusClient.collectionManager.createCollection(params); +``` + + + + + + + + + + + + + + + + + + +
ParameterDescription
using (optional)By specifying the server alias here, you can choose in which Milvus server you create a collection.
shards_num (optional)Number of the shards for the collection to create.
+ + + + +## Verify if a collection exists + +{{fragments/multiple_code.md}} + +```python +>>> from pymilvus import utility +>>> utility.has_collection("example_collection") +``` + +```javascript +await milvusClient.collectionManager.hasCollection({ + collection_name: "example_collection", +}); +``` + + + +## List all collections + +{{fragments/multiple_code.md}} + +```python +>>> from pymilvus import utility +>>> utility.list_collections() +``` + +```javascript +await milvusClient.collectionManager.showCollections(); +``` + +## View collection statistics + +{{fragments/multiple_code.md}} + +```python +>>> from pymilvus import Collection +>>> collection = Collection("example_collection") # Get an existing collection. +>>> collection.num_entities +``` + +```javascript +await milvusClient.collectionManager.getCollectionStatistics({ collection_name: "example_collection",}); +``` + + +## Load a collection + +All CRUD operations within Milvus are executed in memory. Load the collection to memory before searching or deleting data. + +{{fragments/multiple_code.md}} + +```python +>>> from pymilvus import Collection +>>> collection = Collection("example_collection") # Get an existing collection. +>>> collection.load() +``` + +```javascript +await milvusClient.collectionManager.loadCollection({ + collection_name: "example_collection", +}); +``` + + + + + + + + + + + + + + +
ParameterDescription
partition_name (optional)Name of the partition to load.
+ + + + + + + + + + + + + + +
ParameterDescription
collection_nameName of the collection to load.
+ +## Drop a collection + +Remove a collection and the data within. + +
+The drop operation is irreversible. Dropping a collection deletes all data within it. +
+ + +{{fragments/multiple_code.md}} + +```python +>>> collection.drop("example_collection") +``` + +```javascript +await milvusClient.collectionManager.dropCollection({ collection_name: "example_collection",}); +``` + +## What's next + +- Learn more basic operations of Milvus: + - [Insert data into Milvus](manage_data.md) + - [Create a partition](manage_partition.md) + - [Build an index for vectors](manage_index.md) + - [Conduct a vector search](search.md) + - [Conduct a hybrid search](hybridsearch.md) +- Explore API references for Milvus SDKs: + - [PyMilvus API reference](/api-reference/pymilvus/v{{var.milvus_python_sdk_version}}/tutorial.html) + - [Node.js API reference](/api-reference/node/v{{var.milvus_node_sdk_version}}/tutorial.html) + diff --git a/site/zh-CN/userGuide/manage_connection.md b/site/zh-CN/userGuide/manage_connection.md new file mode 100644 index 000000000..2dfe524fd --- /dev/null +++ b/site/zh-CN/userGuide/manage_connection.md @@ -0,0 +1,117 @@ +--- +id: manage_connection.md +related_key: connect Milvus +summary: Learn how to connect to a Milvus server. +--- + +# Manage Milvus Connections + +This topic describes how to connect to and disconnect from a Milvus server. + +
+ Ensure to connect to Milvus server before any operations. +
+ +Below example connects to a Milvus server with host as `localhost` and port as `19530` and disconnects from it. + + +## Connect to a Milvus server + +Construct a Milvus connection. Ensure to connect to Milvus server before any operations. + +{{fragments/multiple_code.md}} + +```python +# Run `python3` in your terminal to operate in the Python interactive mode. +>>> from pymilvus import connections +>>> connections.connect(alias="default", host='localhost', port='19530') +``` + +```javascript +import { MilvusClient } from "@zilliz/milvus2-sdk-node"; +const milvusClient = new MilvusClient("localhost:19530"); +``` + + + + + + + + + + + + + + + + + + + + + + +
ParameterDescription
aliasAlias of the Milvus connection to construct.
hostIP address of the Milvus server.
portPort of the Milvus server.
+ + + + + + + + + + + + + + +
ParameterDescription
addressAddress of the Milvus connection to construct.
+ +## Disconnect from a Milvus server + +Disconnect from a Milvus server. + +{{fragments/multiple_code.md}} + +```python +>>> connections.disconnect("default") +``` + + +```javascript +await milvusClient.closeConnection(); +``` + + + + + + + + + + + + + + +
ParameterDescription
aliasAlias of the Milvus server to disconnect from.
+ +## What's next + +Having connected to a Milvus server, you can: + +- [Create a collection](manage_collection.md) +- [Manage data](manage_data.md) +- [Build a vector index](manage_index.md) +- [Conduct a vector search](search.md) +- [Conduct a hybrid search](hybridsearch.md) + +For advanced operations, check: + +- [PyMilvus API reference](/api-reference/pymilvus/v{{var.milvus_python_sdk_version}}/tutorial.html) +- [Node.js API reference](/api-reference/node/v{{var.milvus_node_sdk_version}}/tutorial.html) + diff --git a/site/zh-CN/userGuide/manage_data.md b/site/zh-CN/userGuide/manage_data.md new file mode 100644 index 000000000..f9ca2ad52 --- /dev/null +++ b/site/zh-CN/userGuide/manage_data.md @@ -0,0 +1,223 @@ +--- +id: manage_data.md +related_key: insert, delete +summary: Learn how to insert and delete data in Milvus. +--- + +# Manage Data + +This topic describes how to insert and delete data in Milvus. + +## Insert data + +First, prepare the data to insert. + +This topic inserts randomly generated 2,000 rows of eight-dimensional vector data as the example data. Real applications will likely use much higher dimensional vectors than this. You can prepare your own data to replace the example. + +{{fragments/multiple_code.md}} + +```python +>>> import random +>>> vectors = [[random.random() for _ in range(8)] for _ in range(2000)] +>>> entities = [vectors] +``` + +```javascript +const entities = Array.from({ length: 2000 }, () => ({ + ["example_field"]: Array.from({ length: 8 }, () => Math.random()), +})); +``` + +Insert the data to the collection. By specifying `partition_name`, you can decide to which partition to insert the data. + +With the collection schema `auto_id` enabled, Milvus automatically assigns an ID (primary key value) to each inserted data. + +{{fragments/multiple_code.md}} + +```python +>>> from pymilvus import collection +>>> collection = Collection("example_collection") # Get an existing collection. +>>> mr = collection.insert(entities) +``` + +```javascript +const mr = await milvusClient.dataManager.insert({{ + collection_name: "example_collection", + fields_data: entities, +}); +``` + + + + + + + + + + + + + + + + + + + + + + +
ParameterDescription
collection_nameName of the collection to get.
dataData to insert into Milvus.
partition_name (optional)Name of the partition to insert data into.
+ + + + + + + + + + + + + + + + + + + + + + + +
ParameterDescription
collection_nameName of the collection to get.
partition_name (optional)Name of the partition to insert data into.
fields_dataData to insert into Milvus.
+ + +After the data are inserted, Milvus returns `MutationResult` as an object. You can check the value of `MutationResult`, which contains the corresponding primary keys of the inserted data. + +{{fragments/multiple_code.md}} + +```python +>>> mr.primary_keys +``` + +```javascript +console.log(mr.IDs) +``` + +``` +[425790736918318406, 425790736918318407, 425790736918318408, ...] +``` + + + +## Delete entities + +Milvus supports deleting entities by primary key specified with boolean expression. + + +
+
    +
  • The delete operation is irreversible. Deleted entities cannot be retrieved again.
  • +
  • Frequent delete operations will impact the system performance.
  • +
+
+ +All CRUD operations within Milvus are executed in memory. Before deleting, load the collection that contains the entities you expect to delete to memory. + +{{fragments/multiple_code.md}} + +```python +>>> from pymilvus import collection +>>> collection = Collection("example_collection") # Get an existing collection. +>>> collection.load() +``` + +```javascript +await milvusClient.collectionManager.loadCollection({ + collection_name: "example_collection", +}); +``` + + + +Prepare the boolean expression that filters the entities to delete. See [Boolean Expression Rules](boolean.md) for more information. + +The following example filters data with primary key values of `425790736918318406` and `425790736918318407`. + +{{fragments/multiple_code.md}} + +```python +>>> expr = "pk in [425790736918318406,425790736918318407]" +``` + +```javascript +const expr = "pk in [425790736918318406,425790736918318407]"; +``` + + +Delete the entities with the boolean expression you created. By specifying `partition_name`, you can decide from which partition to delete the entities and thus save the resources. + +{{fragments/multiple_code.md}} + +```python +>>> collection.delete(expr) +``` + +```javascript +await milvusClient.dataManager.deleteEntities({ + collection_name: "example_collection", + expr: expr, +}); +``` + + + + + + + + + + + + + + + + + + + +
ParameterDescription
exprBoolean expression that specifies the entities to delete.
partition_name (optional)Name of the partition to delete entities from.
+ + + + +You can verify the delete operation by checking the number of entities after deleting. + +```python +>>> collection.num_entities +1998 +``` + +```javascript +const res = await collectionManager.getCollectionStatistics({ + collection_name: "example_collection", +}); +console.log(res.data.row_count); +``` + + +## What's next + +- Learn more basic operations of Milvus: + - [Build an index for vectors](manage_index.md) + - [Conduct a vector search](search.md) + - [Conduct a hybrid search](hybridsearch.md) +- Explore API references for Milvus SDKs: + - [PyMilvus API reference](/api-reference/pymilvus/v{{var.milvus_python_sdk_version}}/tutorial.html) + - [Node.js API reference](/api-reference/node/v{{var.milvus_node_sdk_version}}/tutorial.html) + diff --git a/site/zh-CN/userGuide/manage_index.md b/site/zh-CN/userGuide/manage_index.md new file mode 100644 index 000000000..598452530 --- /dev/null +++ b/site/zh-CN/userGuide/manage_index.md @@ -0,0 +1,145 @@ +--- +id: manage_index.md +related_key: create index +summary: Learn how to build an index for vectors in Milvus. +--- + +# Manage Indexes + +This topic describes how to manage indexes in Milvus. See [Vector Index](index.md) and [Index Selection](index_selection.md) for more information. + +Vector indexes are an organizational unit of metadata used to accelerate [vector similarity search](search.md). Without index built on vectors, Milvus will perform a brute-force search by default. + +
+
    +
  • Current release of Milvus only supports building and dropping an index on vector field. Future releases will support these operations on scalar field.
  • +
  • By default, Milvus does not index a segment with less than 1,024 rows. To change this parameter, configure minSegmentSizeToEnableIndex in root_coord.yaml.
  • +
+ +## Build an index + +The following example builds a 1024-cluster IVF_FLAT index with Euclidean distance (L2) as the similarity metrics. You can choose the index and metric that suit your scenario. + +Prepare the index parameters. + +{{fragments/multiple_code.md}} + +```python +>>> index_params = { + "metric_type":"L2", + "index_type":"IVF_FLAT", + "params":{"nlist":1024} + } +``` + +```javascript +const index_params = { + metric_type: "L2", + index_type: "IVF_FLAT", + params: JSON.stringify({ nlist: 1024 }), +}; +``` + + + + + + + + + + + + + + + + + + + + + + +
ParameterDescription
metric_typeType of metrics used to measure similarity of vectors. Find more options in Simlarity Metrics.
index_typeType of index used to accelerate the vector search. Find more options in Index Selection.
paramsBuilding parameter(s) specific to the index. See Index Selection for more information.
+ + +Build the index by specifying the vector field name and index parameters. + +{{fragments/multiple_code.md}} + +```python +>>> from pymilvus import collection +>>> collection = Collection("example_collection") # Get an existing collection. +>>> collection.create_index(field_name="example_field", index_params=index_params) +``` + +```python +Status(code=0, message='') +``` + +```javascript +await milvusClient.indexManager.createIndex({ + collection_name: "example_collection", + field_name: "example_field", + extra_params: index_params, +}); +``` + + + +## View index details + +{{fragments/multiple_code.md}} + +```python +>>> from pymilvus import collection +>>> collection = Collection("example_collection") # Get an existing collection. +>>> collection.index().params +``` + +```python +{'metric_type': 'L2', 'index_type': 'IVF_FLAT', 'params': {'nlist': 1024}} +``` + +```javascript +await milvusClient.indexManager.describeIndex({ + collection_name: "example_collection", +}); +``` + + +## Drop an index + +Drop the index if you are sure that you do not want to use it anymore. + +
+The drop operation is irreversible. Dropping an index removes all corresponding index files. +
+ + + +{{fragments/multiple_code.md}} + +```python +>>> from pymilvus import collection +>>> collection = Collection("example_collection") # Get an existing collection. +>>> collection.drop_index() +``` + +```javascript +await milvusClient.indexManager.dropIndex({ + collection_name: "example_collection", +}); +``` + +## What's next + +- Learn more basic operations of Milvus: + - [Conduct a vector search](search.md) + - [Conduct a hybrid search](hybridsearch.md) + - [Search with Time Travel](timetravel.md) +- Explore API references for Milvus SDKs: + - [PyMilvus API reference](/api-reference/pymilvus/v{{var.milvus_python_sdk_version}}/tutorial.html) + - [Node.js API reference](/api-reference/node/v{{var.milvus_node_sdk_version}}/tutorial.html) + diff --git a/site/zh-CN/userGuide/manage_partition.md b/site/zh-CN/userGuide/manage_partition.md new file mode 100644 index 000000000..1ada4a174 --- /dev/null +++ b/site/zh-CN/userGuide/manage_partition.md @@ -0,0 +1,142 @@ +--- +id: manage_partition.md +related_key: Partition +summary: Learn how to manage partitions in Milvus. + +--- + +# Manage Partitions + +This topic describes how to manage partitions in Milvus. + +Milvus allows you to divide the bulk of vector data into a small number of partitions. Search and other operations can then be limited to one partition to improve the performance. + +A collection consists of one or more partitions. While creating a new collection, Milvus creates a default partition `_default`. See [Glossary - Partition](glossary.md#Partition) for more information. + +The following example is based on a partition `example_partition` in the collection `example_collection`. + +## Create a partition + +{{fragments/multiple_code.md}} + +```python +>>> from pymilvus import Collection +>>> collection = Collection("example_collection") # Get an existing collection. +>>> partition = collection.create_partition("example_partition") +``` + +```javascript +await milvusClient.partitionManager.createPartition({ + collection_name: "example_collection", + partition_name: "example_partition", +}); +``` + + + + + + + + + + + + + + + + + + +
ParameterDescription
partition_nameName of the partition to create.
description (optional)Description of the partition to create.
+ + + + + + + + + + + + + + + + + + + +
ParameterDescription
collection_nameName of the collection to create a partition in.
partition_nameName of the partition to create.
+ +## List all partitions + +{{fragments/multiple_code.md}} + +```python +>>> from pymilvus import Collection +>>> collection = Collection("example_collection") # Get an existing collection. +>>> collection.partitions +``` + +```javascript +await milvusClient.partitionManager.showPartitions({ + collection_name: "example_collection", +}); +``` + + + +## Verify if a partition exist + +{{fragments/multiple_code.md}} + +```python +>>> from pymilvus import Collection +>>> collection.has_partition("example_partition") +``` + +```javascript +await milvusClient.partitionManager.hasPartition({ + collection_name: "example_collection", + partition_name: "example_partition", +}); +``` + + +## Drop a partition + +Remove a partition. + +
+The drop operation is irreversible. Dropping a partition deletes all data within it. +
+ + +{{fragments/multiple_code.md}} + +```python +>>> from pymilvus import Collection +>>> collection.drop_partition("example_partition") +``` + +```javascript +await milvusClient.partitionManager.dropPartition({ + collection_name: "example_collection", + partition_name: "example_partition", +}); +``` + +## What's next + +- Learn more basic operations of Milvus: + - [Insert data into Milvus](manage_data.md) + - [Build an index for vectors](manage_index.md) + - [Conduct a vector search](search.md) + - [Conduct a hybrid search](hybridsearch.md) +- Explore API references for Milvus SDKs: + - [PyMilvus API reference](/api-reference/pymilvus/v{{var.milvus_python_sdk_version}}/tutorial.html) + - [Node.js API reference](/api-reference/node/v{{var.milvus_node_sdk_version}}/tutorial.html) + diff --git a/site/zh-CN/userGuide/query.md b/site/zh-CN/userGuide/query.md deleted file mode 100644 index cec32987c..000000000 --- a/site/zh-CN/userGuide/query.md +++ /dev/null @@ -1,280 +0,0 @@ ---- -id: query.md -title: 结构化匹配 ---- - -# 结构化匹配 - -Milvus 除了支持存储向量数据外,还支持存储 bool、int、float 等类型的结构化数据,并且提供了结构化数据的匹配功能。结构化匹配是一个全量检索的过程,Milvus 会返回满足条件的所有数据。结构化匹配使用[布尔表达式(boolean expression)](https://milvus.io/cn/docs/v2.0.0/boolean.md)来表示匹配条件。 - -1. 连接至 Milvus 服务器: - -{{fragments/multiple_code.md}} - -```python ->>> from pymilvus import connections ->>> connections.connect("default", host='localhost', port='19530') -``` - -```javascript -import { MilvusClient } from "@zilliz/milvus2-sdk-node"; -const milvusClient = new MilvusClient("localhost:19530"); -``` - -
- 详细资讯 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
参数 - 说明备注
alias*Milvus 服务器的名称数据类型: String
必填项
host*Milvus 服务器的 IP必填项
port*Milvus 服务器的端口必填项
address**Milvus 服务器的地址"server_IP:server_port"
必填项
-
- -2. 准备 collection 参数并创建 collection: - -{{fragments/multiple_code.md}} - -```python ->>> from pymilvus import Collection, FieldSchema, CollectionSchema, DataType ->>> collection_name = "test_collection_search" ->>> schema = CollectionSchema([ -... FieldSchema("film_id", DataType.INT64, is_primary=True), -... FieldSchema("film_date", DataType.INT64), -... FieldSchema("films", dtype=DataType.FLOAT_VECTOR, dim=2) -... ]) ->>> collection = Collection(collection_name, schema) -``` - -```javascript -const COLLECTION_NAME = "example_collection"; -const FIELD_NAME = "example_field"; - -const params = { - collection_name: COLLECTION_NAME, - fields: [ - { - name: "films", - description: "vector field", - data_type: DataType.FloatVector, - - type_params: { - dim: "8", - }, - }, - { - name: "film_id", - data_type: DataType.Int64, - autoID: false, - is_primary_key: true, - description: "", - }, - ], -}; - -await milvusClient.collectionManager.createCollection(params); -``` - -
- 详细资讯 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
参数 - 说明备注
collection_name要建立的 collection 名称数据类型: String
field_namecollection 中的字段名称数据类型: String
Schema用于建立 collection 及其中字段。详细说明请参考 field schema and collection schema 
descriptioncollection 的说明数据类型: String
using*在此处标明服务器名称,以指定要建立 collection 的 Milvus 服务器。选填项
shards_num*指定 collection 要建立的 shards 数目选填项
-
- -3. 随机生成向量数据并插入新建 collection 中: - -{{fragments/multiple_code.md}} - -```python ->>> import random ->>> data = [ -... [i for i in range(10)], -... [1990 + i for i in range(10)], -... [[random.random() for _ in range(2)] for _ in range(10)], -... ] ->>> collection.insert(data) ->>> collection.num_entities -10 -``` - -```javascript -let id = 1; -const entities = Array.from({ length: 10 }, () => ({ - films: Array.from({ length: 2 }, () => Math.random() * 10), - film_id: id++, -})); - -await milvusClient.dataManager.insert({{ - collection_name: COLLECTION_NAME, - fields_data: entities, -}); -``` - -
- 详细资讯 - - - - - - - - - - - - - - - - - - - - - - - - -
参数 - 说明备注
data要插入 Milvus 的数据必填项
partition_name要将数据插入的 partition 名称选填项
timeout*RPC 允许的时限(秒钟数)。设定成空值时,客户端会等待服务器回应或产生错误。选填项
-
- -4. 将 collection 加载到内存中并进行结构化匹配: - -{{fragments/multiple_code.md}} - -```python ->>> collection.load() ->>> expr = "film_id in [2,4,6,8]" ->>> output_fields = ["film_id", "film_date"] ->>> res = collection.query(expr, output_fields) -``` - -```javascript -await milvusClient.collectionManager.loadCollection({ - collection_name: COLLECTION_NAME, -}); - -await milvusClient.dataManager.query({ - collection_name: COLLECTION_NAME, - expr: "film_id in [2,4,6,8]", - output_fields: ["film_id"], -}); -``` - -
- 详细资讯 - - - - - - - - - - - - - - - - - - - - - - - - -
参数 - 说明备注
collection_name**要载入并查询的 collection 名称必填项
expr筛选属性用的布林表达式布林表达式规则中查询其他表达式资讯。
选填项
output_fields要传回的 field 名称(向量 field 在目前版本不支持)必填项
-
- -
-在当前版本中,加载数据最大值不能超过所有 query node 内存总量的 70%,从而为执行引擎预留内存资源。 -
- -5. 检查返回结果: - -{{fragments/multiple_code.md}} - -```python ->>> sorted_res = sorted(res, key=lambda k: k['film_id']) ->>> sorted_res -[{'film_id': 2, 'film_date': 1992}, - {'film_id': 4, 'film_date': 1994}, - {'film_id': 6, 'film_date': 1996}, - {'film_id': 8, 'film_date': 1998}] -``` - -```javascript -// query result -[{ film_id: "2" }, { film_id: "4" }, { film_id: "6" }, { film_id: "8" }]; -``` diff --git a/site/zh-CN/userGuide/search/expression.md b/site/zh-CN/userGuide/search/expression.md deleted file mode 100644 index f28cb3316..000000000 --- a/site/zh-CN/userGuide/search/expression.md +++ /dev/null @@ -1,81 +0,0 @@ ---- -id: expression.md ---- - - -# 表达式 - -Milvus 通过表达式搜索实现标量过滤。表达式是一种布尔值函数,取值为 `true` 或 `false`。 - -查看 [Python SDK API Reference](/api-reference/pymilvus/{{var.milvus_python_sdk_version}}/api/collection.html) 以了解表达式使用说明。 - -## 表达式语法 - -表达式可以为 NONE 或逻辑表达式。 - -``` -Expr := LogicalExpr | NIL -``` - -### 表达式运算符种类 - -| 运算符 | 描述 | 示例 | -| ---------- | ------------------------------------------------------------ | ------------------------------------------------------------ | -| 关系运算符 | 关系运算符使用符号检验两个表达式之间的相等、不相等或相对次序。Milvus 支持的关系运算符包括 `>`、`>=`、`<` 、`<=`、`==` 以及 `!=`。 | | -| 逻辑运算符 | 逻辑运算符作用于一个或两个表达式。Milvus 支持的逻辑运算符包括:AND &&、OR \|\| 以及 NOT !。 | | -| IN 运算符 | 当关键字 IN 左侧的表达式包含在右侧项目列表中时,满足 IN 条件。 | | - -## 关系运算符 - -关系运算符是比较两个表达式的符号。 运算符两边的数据类型必须保持一致。 - -Milvus 支持的关系运算符包括: - -- 等于(==) -- 不等于(!=) -- 大于(>) -- 大于等于(>=) -- 小于(<) -- 小于等于(<=) - -## 逻辑运算符 - -逻辑运算符分为一元运算符(UnaryLogicalOp)和二元运算符(BinaryLogicalOp)。一元运算符仅作用于一个逻辑表达式,而二元运算符将一个逻辑表达式与另一个逻辑表达式进行比较。 - -Milvus 支持的逻辑运算符包括: - -- NOT ! -- AND && -- OR || - -### 语法 - -``` -LogicalExpr := LogicalExpr BinaryLogicalOp LogicalExpr - | UnaryLogicalOp LogicalExpr - | "(" LogicalExpr ")" - | RelationalExpr - | InExpr -``` - -## IN 运算符 -IN 运算符将 field 中的值与常量数组中的每一项进行比对,以判断该数组是否包含该值。数组内各项须用逗号分隔。运算符左侧和右侧数值的类型必须保持一致。 - -### 语法 - -``` -InExpr := IDENTIFIER "in" ConstantArray -ConstantArray := "[" Constant+, "]" -``` - -## 评估顺序 - -Milvus 评估表达式的顺序如下表所示: - -1. 括号内的表达式 -2. NOT 运算符 -3. OR 运算符 -4. AND 运算符 - - -详细了解 [布尔表达式语法规则](boolean.md)。 diff --git a/site/zh-CN/userGuide/search/hybridsearch.md b/site/zh-CN/userGuide/search/hybridsearch.md index 3a6123bf6..2efd0d034 100644 --- a/site/zh-CN/userGuide/search/hybridsearch.md +++ b/site/zh-CN/userGuide/search/hybridsearch.md @@ -1,307 +1,238 @@ --- id: hybridsearch.md +related_key: filter +summary: Conduct a Hybrid Search with Milvus. --- -# 混合搜索 +# Conduct a Hybrid Search -除了向量以外,Milvus 还支持布尔值、整型、浮点等数据类型。在 Milvus 中,一个 collection 可以包含多个字段来代表数据特征或属性。Milvus 是一款灵活的向量数据库,还支持在向量相似度检索过程中进行标量字段过滤。 +This topic describes how to conduct a hybrid search. -混合搜索是一种向量相似度检索。在混合搜索时,你可以通过使用[布尔表达式(boolean expression)](boolean.md)进行标量字段过滤。 +A hybrid search is essentially a vector search with boolean filtering. By specifying [boolean expressions](boolean.md) that filter the scalar fields or the primary key field, you can limit your search with certain conditions. -1. 连接至 Milvus 服务器: +The following example shows how to perform a hybrid search on the basis of a regular [vector search](search.md). Suppose you want to search for certain books based on their vectorized introductions, but you only want those within a specific range of word count. You can then specify the boolean expression to filter the `word_count` field in the search parameters. Milvus will search for similar vectors only among those entities that match the expression. -{{fragments/multiple_code.md}} +## Preparations -```python -from pymilvus import connections -connections.connect("default", host='localhost', port='19530') -``` +The following example code demonstrates the steps prior to a search. -```javascript -import { MilvusClient } from "@zilliz/milvus2-sdk-node"; -const milvusClient = new MilvusClient("localhost:19530"); -``` - -
- 详细资讯 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
参数 - 说明备注
alias*Milvus 服务器的名称数据类型: String
必填项
host*Milvus 服务器的 IP必填项
port*Milvus 服务器的端口必填项
address**Milvus 服务器的地址"server_IP:server_port"
必填项
-
- -2. 准备 collection 参数并创建 collection: - -{{fragments/multiple_code.md}} +If you work with your own dataset in an existing Milvus instance, you can move forward to the next step. ```python ->>> from pymilvus import Collection, FieldSchema, CollectionSchema, DataType ->>> collection_name = "test_collection_search" +>>> from pymilvus import connections, Collection, FieldSchema, CollectionSchema, DataType +>>> connections.connect("default", host='localhost', port='19530') >>> schema = CollectionSchema([ -... FieldSchema("film_id", DataType.INT64, is_primary=True), -... FieldSchema("films", dtype=DataType.FLOAT_VECTOR, dim=2) -... ]) ->>> collection = Collection(collection_name, schema) + FieldSchema("book_id", DataType.INT64, is_primary=True), + FieldSchema("word_count", DataType.INT64), + FieldSchema("book_intro", dtype=DataType.FLOAT_VECTOR, dim=2) + ]) +>>> collection = Collection("test_book_search", schema, using='default', shards_num=2) +>>> import random +>>> data = [ + [i for i in range(2000)], + [i for i in range(10000, 12000)], + [[random.random() for _ in range(2)] for _ in range(2000)], + ] +>>> collection.insert(data) +>>> index_params = { + "metric_type":"L2", + "index_type":"IVF_FLAT", + "params":{"nlist":1024} + } +>>> collection.create_index("book_intro", index_params=index_params) ``` ```javascript -const COLLECTION_NAME = 'test_collection_search' -milvusClient.collectionManager.createCollection({ - collection_name: COLLECTION_NAME, +const { MilvusClient } =require("@zilliz/milvus2-sdk-node"); +const milvusClient = new MilvusClient("localhost:19530"); +const params = { + collection_name: "test_book_search", fields: [ { - name: "films", - description: "vector field", - data_type: DataType.FloatVector, + name: "book_intro", + description: "", + data_type: 101, // DataType.FloatVector type_params: { - dim:"2 - } + dim: "2", + }, }, - { - name: "film_id", - data_type: DataType.Int64, - autoID: false, + { + name: "book_id", + data_type: 5, // DataType.Int64 is_primary_key: true, description: "", }, + { + name: "word_count", + data_type: 5, //DataType.Int64 + description: "", + }, ], +}; +await milvusClient.collectionManager.createCollection(params); +const entities = Array.from({ length: 2000 }, (v,k) => ({ + "book_intro": Array.from({ length: 2 }, () => Math.random()), + "book_id": k, + "word_count": k+10000, +})); +await milvusClient.dataManager.insert({ + collection_name: "test_book_search", + fields_data: entities, +}); +const index_params = { + metric_type: "L2", + index_type: "IVF_FLAT", + params: JSON.stringify({ nlist: 1024 }), +}; +await milvusClient.indexManager.createIndex({ + collection_name: "test_book_search", + field_name: "book_intro", + extra_params: index_params, }); ``` -
- 详细资讯 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
参数 - 说明备注
collection_name要建立的 collection 名称数据类型: String
field_namecollection 中的 field 名称数据类型: String
Schema用于建立 collection 及其中的 field。详细说明请参考 field schema and collection schema 
descriptioncollection 的说明数据类型: String
using*在此处标明服务器名称,以指定要建立 collection 的 Milvus 服务器。选填项
shards_num*指定 collection 要建立的 shards 数目选填项
-
+## Load collection -3. 随机生成向量数据并插入新建 collection 中: +All CRUD operations within Milvus are executed in memory. Load the collection to memory before conducting a vector query. {{fragments/multiple_code.md}} ```python ->>> import random ->>> data = [ -... [i for i in range(10)], -... [[random.random() for _ in range(2)] for _ in range(10)], -... ] ->>> collection.insert(data) ->>> collection.num_entities -10 +>>> from pymilvus import Collection +>>> collection = Collection("test_book_search") # Get an existing collection. +>>> collection.load() ``` ```javascript -let id = 1; -const entities = Array.from({ length: 10 }, () => ({ - films: Array.from({ length: 2 }, () => Math.random() * 10), - film_id: id++, -})); - -await milvusClient.collectionManager.insert({ - collection_name: COLLECTION_NAME, - fields_data: entities, +await milvusClient.collectionManager.loadCollection({ + collection_name: "test_book_search", }); ``` -
- 详细资讯 - - - - - - - - - - - - - - - - - - - - - - - - -
参数 - 说明备注
data要插入 Milvus 的数据必填项
partition_name要将数据插入的 partition 名称选填项
timeout*RPC 允许的时限(秒钟数)。设定成空值时,客户端会等待服务器回应或产生错误。选填项
-
-4. 将集合加载到内存中并进行向量相似度检索: +
+In current release, volume of the data to load must be under 70% of the total memory resources of all query nodes to reserve memory resources for execution engine. +
+ +## Conduct a hybrid vector search + +By specifying the boolean expression, you can filter the scalar field of the entities during the vector search. The following example limits the scale of search to the vectors within a specified `word_count` value range. {{fragments/multiple_code.md}} ```python ->>> collection.load() >>> search_param = { -... "data": [[1.0, 1.0]], -... "anns_field": "films", -... "param": {"metric_type": "L2"}, +... "data": [[0.1, 0.2]], +... "anns_field": "book_intro", +... "param": {"metric_type": "L2", "params": {"nprobe": 10}}, ... "limit": 2, -... "expr": "film_id in [2,4,6,8]", +... "expr": "word_count <= 11000", ... } >>> res = collection.search(**search_param) ``` ```javascript -await milvusClient.collectionManager.loadCollection({ - collection_name: COLLECTION_NAME, -}); -await milvusClient.dataManager.search({ - collection_name: COLLECTION_NAME, - // partition_names: [], - expr: "film_id in [1,4,6,8]", - vectors: [entities[0].films], +const results = await milvusClient.dataManager.search({ + collection_name: "test_book_search", + expr: "word_count <= 11000", + vectors: [[0.1, 0.2]], search_params: { - anns_field: "films", - topk: "4", + anns_field: "book_intro", + topk: "2", metric_type: "L2", params: JSON.stringify({ nprobe: 10 }), }, - vector_type: 100, // float vector -> 100 + vector_type: 101, // DataType.FloatVector, }); ``` -
- 详细资讯 - +
- - + + - - - - + + + - - - + + - - - - + + + - - - - - - - + + - - - - + + + - - - - + + + + + + + + + + + + + + + + +
参数 - 说明备注ParameterDescription
collection_name**要载入并查询的 collection 名称必填项
dataVectors to search with.
vectors要查询的向量。数据的数目表示查询数量 nq必填项anns_fieldName of the field to search on.
anns_field要查询的字段名称必填项
paramsSearch parameter(s) specific to the index. See Index Selection for more information.
params*查询索引的参数可在选择索引中查看不同索引的更多参数详细资讯。
必填项
limit*传回多少条最接近的结果必填项limitNumber of the most similar results to return.
expr筛选属性用的布林表达式布林表达式规则中查询其他表达式资讯。
选填项
exprBoolean expression used to filter attribute. See Boolean Expression Rules for more information.
partition_names要查询的 partition 名称选填项
partition_names (optional)List of names of the partition to search in.
output_fields (optional)Name of the field to return. Vector field is not supported in current release.
timeout (optional)A duration of time in seconds to allow for RPC. Clients wait until server responds or error occurs when it is set to None.
round_decimal (optional)Number of decimal places of returned distance.
+ + + - - - + + + + - - - + + - - - + + + + + + + + + + + + + + + + + + + + + +
output_fields要传回的字段名称(向量字段在目前版本不支持)必填项ParameterDescription
timeout*RPC 允许的时限(秒钟数)。设定成空值时,客户端会等待服务器回应或产生错误。选填项collection_nameName of the collection to search in.
vector_type**预先检查二进制或浮点数向量。二进制为 100 而浮点数为 101必填项search_paramsParameters (as an object) used for search.
vectorsVectors to search with.
vector_typePre-check of binary or float vectors. 100 for binary vectors and 101 for float vectors.
partition_names (optional)List of names of the partition to search in.
expr (optional)Boolean expression used to filter attribute. See Boolean Expression Rules for more information.
output_fields (optional)Name of the field to return. Vector field not support in current release.
-
- -
-在当前版本中,加载数据最大值不能超过所有 query node 内存总量的 70%,从而为执行引擎预留内存资源。 -
-5. 检查返回结果: +Check the returned results: {{fragments/multiple_code.md}} @@ -310,20 +241,16 @@ await milvusClient.dataManager.search({ >>> hits = res[0] >>> assert len(hits) == 2 >>> print(f"- Total hits: {len(hits)}, hits ids: {hits.ids} ") -- Total hits: 2, hits ids: [2, 4] >>> print(f"- Top1 hit id: {hits[0].id}, distance: {hits[0].distance}, score: {hits[0].score} ") -- Top1 hit id: 2, distance: 0.10143111646175385, score: 0.101431116461 ``` ```javascript -// search result will be like: -{ - status: { error_code: 'Success', reason: '' }, - results: [ - { score: 0, id: '1' }, - { score: 9.266796112060547, id: '4' }, - { score: 28.263811111450195, id: '8' }, - { score: 41.055686950683594, id: '6' } - ] -} +console.log(results.results) ``` +## What's next + +- Learn more basic operations of Milvus: + - [Search with Time Travel](timetravel.md) +- Explore API references for Milvus SDKs: + - [PyMilvus API reference](/api-reference/pymilvus/v{{var.milvus_python_sdk_version}}/tutorial.html) + - [Node.js API reference](/api-reference/node/v{{var.milvus_node_sdk_version}}/tutorial.html) \ No newline at end of file diff --git a/site/zh-CN/userGuide/search/query.md b/site/zh-CN/userGuide/search/query.md new file mode 100644 index 000000000..6cce3024c --- /dev/null +++ b/site/zh-CN/userGuide/search/query.md @@ -0,0 +1,207 @@ +--- +id: query.md +related_key: query vectors +summary: Learn how to query vectors in Milvus. +--- + +# Conduct a Vector Query + +This topic describes how to conduct a vector query. + +Unlike a vector similarity search, a vector query retrieves vectors via scalar filtering based on [boolean expression](boolean.md). Milvus supports many data types in the scalar fields and a variety of boolean expressions. The boolean expression filters on scalar fields or the primary key field, and it retrieves all results that match the filters. + +The following example shows how to perform a vector query on a 2000-row dataset of book ID (primary key), word count (scalar field), and book introduction (vector field), simulating the situation where you query for certain books based on their IDs. + +## Preparations + +The following example code demonstrates the steps prior to a query. + +If you work with your own dataset in an existing Milvus server, you can move forward to the next step. + +```python +>>> from pymilvus import connections, Collection, FieldSchema, CollectionSchema, DataType +>>> connections.connect("default", host='localhost', port='19530') +>>> schema = CollectionSchema([ + FieldSchema("book_id", DataType.INT64, is_primary=True), + FieldSchema("word_count", DataType.INT64), + FieldSchema("book_intro", dtype=DataType.FLOAT_VECTOR, dim=2) + ]) +>>> collection = Collection("test_book_search", schema, using='default', shards_num=2) +>>> import random +>>> data = [ + [i for i in range(2000)], + [i for i in range(10000, 12000)], + [[random.random() for _ in range(2)] for _ in range(2000)], + ] +>>> collection.insert(data) +>>> index_params = { + "metric_type":"L2", + "index_type":"IVF_FLAT", + "params":{"nlist":1024} + } +>>> collection.create_index("book_intro", index_params=index_params) +``` + +```javascript +const { MilvusClient } =require("@zilliz/milvus2-sdk-node"); +const milvusClient = new MilvusClient("localhost:19530"); +const params = { + collection_name: "test_book_search", + fields: [ + { + name: "book_intro", + description: "", + data_type: 101, // DataType.FloatVector + type_params: { + dim: "2", + }, + }, + { + name: "book_id", + data_type: 5, // DataType.Int64 + is_primary_key: true, + description: "", + }, + { + name: "word_count", + data_type: 5, //DataType.Int64 + description: "", + }, + ], +}; +await milvusClient.collectionManager.createCollection(params); +const entities = Array.from({ length: 2000 }, (v,k) => ({ + "book_intro": Array.from({ length: 2 }, () => Math.random()), + "book_id": k, + "word_count": k+10000, +})); +await milvusClient.dataManager.insert({ + collection_name: "test_book_search", + fields_data: entities, +}); +const index_params = { + metric_type: "L2", + index_type: "IVF_FLAT", + params: JSON.stringify({ nlist: 1024 }), +}; +await milvusClient.indexManager.createIndex({ + collection_name: "test_book_search", + field_name: "book_intro", + extra_params: index_params, +}); +``` + +## Load collection + +All CRUD operations within Milvus are executed in memory. Load the collection to memory before conducting a vector query. + +{{fragments/multiple_code.md}} + +```python +>>> from pymilvus import Collection +>>> collection = Collection("test_book_search") # Get an existing collection. +>>> collection.load() +``` + +```javascript +await milvusClient.collectionManager.loadCollection({ + collection_name: "test_book_search", +}); +``` + + +
+In current release, volume of the data to load must be under 70% of the total memory resources of all query nodes to reserve memory resources for execution engine. +
+ +## Conduct a vector query + +The following example filters the vectors with certain `book_id` values, and returns the `book_id` field and `book_intro` of the results. + +{{fragments/multiple_code.md}} + +```python +>>> res = collection.query(expr = "book_id in [2,4,6,8]", output_fields = ["book_id", "book_intro"]) +``` + +```javascript +const results = await milvusClient.dataManager.query({ + collection_name: "test_book_search", + expr: "book_id in [2,4,6,8]", + output_fields: ["book_id", "book_intro"], +}); +``` + + + + + + + + + + + + + + + + + + + + + + +
ParameterDescription
exprBoolean expression used to filter attribute. Find more expression details in Boolean Expression Rules.
output_fields (optional)List of names of the field to return.
partition_names (optional)List of names of the partitions to query on.
+ + + + + + + + + + + + + + + + + + + + + + + + + + + +
ParameterDescription
collection_nameName of the collection to query.
exprBoolean expression used to filter attribute. Find more expression details in Boolean Expression Rules.
output_fields (optional)List of names of the field to return.
partition_names (optional)List of names of the partitions to query on.
+ +Check the returned results. + +{{fragments/multiple_code.md}} + +```python +>>> sorted_res = sorted(res, key=lambda k: k['book_id']) +>>> sorted_res +``` + +```javascript +console.log(results.data) +``` + +## What's next + +- Learn more basic operations of Milvus: + - [Conduct a vector search](search.md) + - [Conduct a hybrid search](hybridsearch.md) + - [Search with Time Travel](timetravel.md) +- Explore API references for Milvus SDKs: + - [PyMilvus API reference](/api-reference/pymilvus/v{{var.milvus_python_sdk_version}}/tutorial.html) + - [Node.js API reference](/api-reference/node/v{{var.milvus_node_sdk_version}}/tutorial.html) diff --git a/site/zh-CN/userGuide/search/search.md b/site/zh-CN/userGuide/search/search.md index 6cbc76f7a..be7dfa2e3 100644 --- a/site/zh-CN/userGuide/search/search.md +++ b/site/zh-CN/userGuide/search/search.md @@ -1,12 +1,122 @@ --- id: search.md +related_key: search +summary: Conduct a vector similarity search with Milvus. --- -# 查询向量 +# Conduct a Vector Similarity Search -通过本章节文档,你将了解如何在 Milvus 中进行相似性搜索。 +This topic describes how to search entities with Milvus. -1. 创建搜索参数: +A vector similarity search in Milvus calculates the distance between query vector(s) and vectors in the collection with specified similarity metrics, and returns the most similar results. By specifying a [boolean expression](boolean.md) that filters the scalar field or the primary key field, you can perform a [hybrid search](hybridsearch.md) or even a search with [Time Travel](timetravel.md). + +The following example shows how to perform a vector similarity search on a 2000-row dataset of book ID (primary key), word count (scalar field), and book introduction (vector field), simulating the situation that you search for certain books based on their vectorized introductions. Milvus will return the most similar results according to the query vector and search parameters you have defined. + +## Preparations + +The following example code demonstrates the steps prior to a search. + +If you work with your own dataset in an existing Milvus instance, you can move forward to the next step. + +```python +>>> from pymilvus import connections, Collection, FieldSchema, CollectionSchema, DataType +>>> connections.connect("default", host='localhost', port='19530') +>>> schema = CollectionSchema([ + FieldSchema("book_id", DataType.INT64, is_primary=True), + FieldSchema("word_count", DataType.INT64), + FieldSchema("book_intro", dtype=DataType.FLOAT_VECTOR, dim=2) + ]) +>>> collection = Collection("test_book_search", schema, using='default', shards_num=2) +>>> import random +>>> data = [ + [i for i in range(2000)], + [i for i in range(10000, 12000)], + [[random.random() for _ in range(2)] for _ in range(2000)], + ] +>>> collection.insert(data) +>>> index_params = { + "metric_type":"L2", + "index_type":"IVF_FLAT", + "params":{"nlist":1024} + } +>>> collection.create_index("book_intro", index_params=index_params) +``` + +```javascript +const { MilvusClient } =require("@zilliz/milvus2-sdk-node"); +const milvusClient = new MilvusClient("localhost:19530"); +const params = { + collection_name: "test_book_search", + fields: [ + { + name: "book_intro", + description: "", + data_type: 101, // DataType.FloatVector + type_params: { + dim: "2", + }, + }, + { + name: "book_id", + data_type: 5, //DataType.Int64 + is_primary_key: true, + description: "", + }, + { + name: "word_count", + data_type: 5, //DataType.Int64 + description: "", + }, + ], +}; +await milvusClient.collectionManager.createCollection(params); +const entities = Array.from({ length: 2000 }, (v,k) => ({ + "book_intro": Array.from({ length: 2 }, () => Math.random()), + "book_id": k, + "word_count": k+10000, +})); +await milvusClient.dataManager.insert({ + collection_name: "test_book_search", + fields_data: entities, +}); +const index_params = { + metric_type: "L2", + index_type: "IVF_FLAT", + params: JSON.stringify({ nlist: 1024 }), +}; +await milvusClient.indexManager.createIndex({ + collection_name: "test_book_search", + field_name: "book_intro", + extra_params: index_params, +}); +``` + +## Load collection + +All CRUD operations within Milvus are executed in memory. Load the collection to memory before conducting a vector similarity search. + +{{fragments/multiple_code.md}} + +```python +>>> from pymilvus import Collection +>>> collection = Collection("test_book_search") # Get an existing collection. +>>> collection.load() +``` + +```javascript +await milvusClient.collectionManager.loadCollection({ + collection_name: "test_book_search", +}); +``` + + +
+In current release, volume of the data to load must be under 70% of the total memory resources of all query nodes to reserve memory resources for execution engine. +
+ +## Prepare search parameters + +Prepare the parameters that suit your search scenario. The following example defines that the search will calculate the distance with Euclidean distance, and retrieve vectors from ten closest clusters built by the IVF_FLAT index. {{fragments/multiple_code.md}} @@ -16,205 +126,181 @@ id: search.md ```javascript const searchParams = { - anns_field: "example_field", - topk: "4", + anns_field: "book_intro", + topk: "10", metric_type: "L2", params: JSON.stringify({ nprobe: 10 }), }; ``` -
- 详细资讯 - +
- - + + - - - - - - - + - + - - - - - - - - - - - - +
参数 - 说明备注ParameterDescription
metric_type用于评估向量相似性的计算方式可在 距离计算方式中查看其他选项。
必填项
index_type用于加速向量搜寻的索引类型可在选择索引中查看其他选项。
必填项
Metrics used to measure similarity of vectors. See Simlarity Metrics for more information.
params查询索引的参数可在选择索引中查看不同索引的更多参数详细资讯。
必填项
anns_field**要查询的字段名称必填项
topk**传回多少条最接近的结果必填项Search parameter(s) specific to the index. See Index Selection for more information.
-
- -2. 在查询向量前,将集合加载到内存中: - -{{fragments/multiple_code.md}} - -```python ->>> collection.load() -``` - -```javascript -await milvusClient.collectionManager.loadCollection({ - collection_name: COLLECTION_NAME, -}); -``` -
- 详细资讯 - +
- - + + + + + + - - - + + + + + + + + + +
参数 - 说明备注ParameterDescription
anns_fieldName of the field to search on.
collection_name**要载入的 collection 名称必填项topkNumber of the most similar results to return.
metric_typeMetrics used to measure similarity of vectors. See Simlarity Metrics for more information.
paramsSearch parameter(s) specific to the index. See Index Selection for more information.
-
-
-在当前版本中,加载数据最大值不能超过所有 query node 内存总量的 70%,从而为执行引擎预留内存资源。 -
-3. 创建随机向量作为 `query_records` 并调用 `search()` 进行搜索。 - _Milvus 将返回搜索结果的 ID 和距离:_ +## Conduct a vector search + +Search vectors with Milvus. To search in a specific [partition](manage_partition.md), specify the list of partition names. {{fragments/multiple_code.md}} ```python ->>> results = collection.search(vectors[:5], field_name, param=search_params, limit=10, expr=None) ->>> results[0].ids -[424363819726212428, 424363819726212436, ...] ->>> results[0].distances -[0.0, 1.0862197875976562, 1.1029295921325684, ...] +>>> results = collection.search(data=[[0.1, 0.2]], anns_field="book_intro", param=search_params, limit=10, expr=None) ``` ```javascript -await milvusClient.dataManager.search({ - collection_name: COLLECTION_NAME, - // partition_names: [], +const results = await milvusClient.dataManager.search({ + collection_name: "test_book_search", expr: "", - vectors: [[1, 2, 3, 4, 5, 6, 7, 8]], + vectors: [[0.1, 0.2]], search_params: searchParams, - vector_type: 100, // Float vector -> 100 + vector_type: 101, // DataType.FloatVector }); ``` -
- 详细资讯 - +
- - + + - - - - - - - - - + + + - - + - - - - + + + - - - + + - + - - + - - - - + + + - - - - + + + + + + + + + + + + +
参数 - 说明备注ParameterDescription
collection_name**要查询的 collection 名称必填项
vectors要查询的向量。数据的数目表示查询数量 nq必填项
dataVectors to search with.
anns_field要查询的字段名称必填项Name of the field to search on.
params*查询索引的参数可在选择索引中查看不同索引的更多参数详细资讯。
必填项
paramsSearch parameter(s) specific to the index. See Index Selection for more information.
limit*传回多少条最接近的结果必填项limitNumber of the most similar results to return.
expr筛选属性用的布林表达式布林表达式规则中查询其他表达式资讯。
选填项
Boolean expression used to filter attribute. See Boolean Expression Rules for more information.
partition_names要查询的 partition 名称选填项
partition_names (optional)List of names of the partition to search in.
output_fields要传回的字段名称(向量字段在目前版本不支持)必填项
output_fields (optional)Name of the field to return. Vector field is not supported in current release.
timeout (optional)A duration of time in seconds to allow for RPC. Clients wait until server responds or error occurs when it is set to None.
round_decimal (optional)Number of decimal places of returned distance.
+ + + - - - + + + + - - - + + - - - + + + + + + + + + + + + + + + + + + + + + +
timeoutRPC 允许的时限(秒钟数)。设定成空值时,客户端会等待伺服器回应或产生错误。选填项ParameterDescription
vector_type**预先检查二进制或浮点数向量。二进制为 100 而浮点数为 101必填项collection_nameName of the collection to search in.
round_decimal**小数点取至第几位数据类型: Integer
选填项
search_paramsParameters (as an object) used for search.
vectorsVectors to search with.
vector_typePre-check of binary or float vectors. 100 for binary vectors and 101 for float vectors.
partition_names (optional)List of names of the partition to search in.
expr (optional)Boolean expression used to filter attribute. See Boolean Expression Rules for more information.
output_fields (optional)Name of the field to return. Vector field is not supported in current release.
-
-如果要在指定分区或者指定列查询,则可以在调用 `search()` 时设置`partition_names` 和 `fields` 参数 + +Check the primary key values of the most similar vectors and their distances. {{fragments/multiple_code.md}} ```python ->>> collection.search(vectors[:5], field_name, param=search_params, limit=10, expr=None, partition_names=[partition_name]) +>>> results[0].ids +>>> results[0].distances ``` ```javascript -await milvusClient.dataManager.search({ - collection_name: COLLECTION_NAME, - partition_names: [partition_name], - expr: "", - vectors: [[1, 2, 3, 4, 5, 6, 7, 8]], - search_params: searchParams, - vector_type: 100, // Float vector -> 100 -}); +console.log(results.results) ``` -4. 查询完成后,可以调用 `release_collection()` 将 Milvus 中加载的 collection 从内存中释放,以减少内存消耗。查询其他 collection: +Release the collection loaded in Milvus to reduce memory consumption when the search is completed. {{fragments/multiple_code.md}} @@ -223,27 +309,15 @@ await milvusClient.dataManager.search({ ``` ```javascript -await milvusClient.collectionManager.releaseCollection({ - collection_name: COLLECTION_NAME, -}); +await milvusClient.collectionManager.releaseCollection({ collection_name: "test_book_search",}); ``` -
- 详细资讯 - - - - - - - - - - - - - - -
参数 - 说明备注
collection_name**要释放的 collection 名称必填项
-
+## What's next + +- Learn more basic operations of Milvus: + - [Query vectors](query.md) + - [Conduct a hybrid search](hybridsearch.md) + - [Search with Time Travel](timetravel.md) +- Explore API references for Milvus SDKs: + - [PyMilvus API reference](/api-reference/pymilvus/v{{var.milvus_python_sdk_version}}/tutorial.html) + - [Node.js API reference](/api-reference/node/v{{var.milvus_node_sdk_version}}/tutorial.html) \ No newline at end of file diff --git a/site/zh-CN/userGuide/search/timetravel.md b/site/zh-CN/userGuide/search/timetravel.md new file mode 100644 index 000000000..794eca925 --- /dev/null +++ b/site/zh-CN/userGuide/search/timetravel.md @@ -0,0 +1,117 @@ +--- +id: timetravel.md +related_key: Time Travel +summary: Learn how to search with Time Travel in Milvus. +--- + +# Search with Time Travel + +This topic describes how to use the Time Travel feature during vector search. + +Milvus maintains a timeline for all data insert and delete operations. It allows users to specify a timestamp in a search to retrieve a data view at a specified point in time, without spending tremendously on maintanence for data rollback. + +## Connect to the Milvus server + +```python +from pymilvus import connections +connections.connect("default", host='localhost', port='19530') +``` + +## Prepare parameters and create a collection + +```python +>>> from pymilvus import Collection, FieldSchema, CollectionSchema, DataType +>>> collection_name = "test_time_travel" +>>> schema = CollectionSchema([ +... FieldSchema("pk", DataType.INT64, is_primary=True), +... FieldSchema("example_field", dtype=DataType.FLOAT_VECTOR, dim=2) +... ]) +>>> collection = Collection(collection_name, schema) +``` + +## Insert first batch of data + +Insert random data to simulate the original data. + +```python +>>> import random +>>> data = [ +... [i for i in range(10)], +... [[random.random() for _ in range(2)] for _ in range(10)], +... ] +>>> batch1 = collection.insert(data) +``` + +## Check the timestamp of the first data batch + +Check the timepstamp of the first data batch for search with Time Travel. Data inserted within the same batch share an identical timestamp. + +```python +>>> batch1.timestamp +``` + +```python +428828271234252802 +``` + +
+ Milvus adopts a combination of physical clock and logic counter as a hybrid timestamp. The 64-bit timestamp consists of a 46-bit physical part (high-order bits) and an 18-bit logic part (low-order bits). The physical part is the number of milliseconds that have elapsed since January 1, 1970 (midnight UTC/GMT). +
+ + + +## Insert second batch of data + +Insert the second batch of data to simulate the dirty data, among which a piece of data with primary key value `19` and vector value `[1.0,1.0]` is appended as the target data to search with in the following step. + +```python +>>> data = [ +... [i for i in range(10, 20)], +... [[random.random() for _ in range(2)] for _ in range(9)], +... ] +>>> data[1].append([1.0,1.0]) +>>> batch2 = collection.insert(data) +``` + +## Search with Time Travel + +Load the collection and search the target data with the timestamp of the first data batch. With the timestamp specified, Milvus only retrieves the data view at the point of time the timestamp indicates. + +```python +>>> collection.load() +>>> search_param = { +... "data": [[1.0, 1.0]], +... "anns_field": "example_field", +... "param": {"metric_type": "L2"}, +... "limit": 10, +... "travel_timestamp": batch1.timestamp, +... } +>>> res = collection.search(**search_param) +>>> res[0].ids +``` + +As shown below, the target data itself and other data inserted later are not returned as results. + +```python +[8, 7, 4, 2, 5, 6, 9, 3, 0, 1] +``` + +If you do not specify the timestamp or specify it with the timestamp of the second data batch, Milvus will return the results from both batches. + +```python +>>> batch2.timestamp +428828283406123011 +>>> search_param = { +... "data": [[1.0, 1.0]], +... "anns_field": "example_field", +... "param": {"metric_type": "L2"}, +... "limit": 10, +... "travel_timestamp": batch2.timestamp, +... } +>>> res = collection.search(**search_param) +>>> res[0].ids +[19, 10, 8, 7, 4, 17, 2, 5, 13, 15] +``` + + +