diff --git a/localization/v2.4.x/site/en/userGuide/data-import/import-data.json b/localization/v2.4.x/site/en/userGuide/data-import/import-data.json index 2833481b7..f290c886f 100644 --- a/localization/v2.4.x/site/en/userGuide/data-import/import-data.json +++ b/localization/v2.4.x/site/en/userGuide/data-import/import-data.json @@ -1 +1 @@ -{"codeList":["from pymilvus import MilvusClient, DataType\n\nclient = MilvusClient(\"http://localhost:19530\")\n\nschema = MilvusClient.create_schema(\n auto_id=False,\n enable_dynamic_field=True\n)\n\nDIM = 512\n\nschema.add_field(field_name=\"id\", datatype=DataType.INT64, is_primary=True),\nschema.add_field(field_name=\"bool\", datatype=DataType.BOOL),\nschema.add_field(field_name=\"int8\", datatype=DataType.INT8),\nschema.add_field(field_name=\"int16\", datatype=DataType.INT16),\nschema.add_field(field_name=\"int32\", datatype=DataType.INT32),\nschema.add_field(field_name=\"int64\", datatype=DataType.INT64),\nschema.add_field(field_name=\"float\", datatype=DataType.FLOAT),\nschema.add_field(field_name=\"double\", datatype=DataType.DOUBLE),\nschema.add_field(field_name=\"varchar\", datatype=DataType.VARCHAR, max_length=512),\nschema.add_field(field_name=\"json\", datatype=DataType.JSON),\nschema.add_field(field_name=\"array_str\", datatype=DataType.ARRAY, max_capacity=100, element_type=DataType.VARCHAR, max_length=128)\nschema.add_field(field_name=\"array_int\", datatype=DataType.ARRAY, max_capacity=100, element_type=DataType.INT64)\nschema.add_field(field_name=\"float_vector\", datatype=DataType.FLOAT_VECTOR, dim=DIM),\nschema.add_field(field_name=\"binary_vector\", datatype=DataType.BINARY_VECTOR, dim=DIM),\nschema.add_field(field_name=\"float16_vector\", datatype=DataType.FLOAT16_VECTOR, dim=DIM),\n# schema.add_field(field_name=\"bfloat16_vector\", datatype=DataType.BFLOAT16_VECTOR, dim=DIM),\nschema.add_field(field_name=\"sparse_vector\", datatype=DataType.SPARSE_FLOAT_VECTOR)\n\nschema.verify()\n\nclient.create_collection(\n collection_name=\"quick_setup\",\n schema=schema\n)\n","private static void createCollection() {\n MilvusClientV2 milvusClient = new MilvusClientV2(ConnectConfig.builder()\n .uri(\"http://localhost:19530\")\n .build());\n\n CreateCollectionReq.CollectionSchema schema = createSchema();\n CreateCollectionReq request = CreateCollectionReq.builder()\n .collectionName(\"quick_setup\")\n .collectionSchema(schema)\n .build();\n milvusClient.createCollection(request);\n System.out.println(\"Collection created\");\n}\n\npublic static void main(String[] args) throws Exception {\n createCollection();\n}\n","from pymilvus.bulk_writer import bulk_import\n\nurl = f\"http://127.0.0.1:19530\"\n\n# Bulk-insert data from a set of JSON files already uploaded to the MinIO server\nresp = bulk_import(\n url=url,\n collection_name=\"quick_setup\",\n files=[['a1e18323-a658-4d1b-95a7-9907a4391bcf/1.parquet'],\n ['a1e18323-a658-4d1b-95a7-9907a4391bcf/2.parquet'],\n ['a1e18323-a658-4d1b-95a7-9907a4391bcf/3.parquet'],\n ['a1e18323-a658-4d1b-95a7-9907a4391bcf/4.parquet'],\n ['a1e18323-a658-4d1b-95a7-9907a4391bcf/5.parquet'],\n ['a1e18323-a658-4d1b-95a7-9907a4391bcf/6.parquet'],\n ['a1e18323-a658-4d1b-95a7-9907a4391bcf/7.parquet'],\n ['a1e18323-a658-4d1b-95a7-9907a4391bcf/8.parquet'],\n ['a1e18323-a658-4d1b-95a7-9907a4391bcf/9.parquet'],\n ['a1e18323-a658-4d1b-95a7-9907a4391bcf/10.parquet']],\n)\n\njob_id = resp.json()['data']['jobId']\nprint(job_id)\n","private static String bulkImport(List> batchFiles) throws InterruptedException {\n MilvusImportRequest milvusImportRequest = MilvusImportRequest.builder()\n .collectionName(\"quick_setup\")\n .files(batchFiles)\n .build();\n String bulkImportResult = BulkImport.bulkImport(\"http://localhost:19530\", milvusImportRequest);\n System.out.println(bulkImportResult);\n\n JsonObject bulkImportObject = new Gson().fromJson(bulkImportResult, JsonObject.class);\n String jobId = bulkImportObject.getAsJsonObject(\"data\").get(\"jobId\").getAsString();\n System.out.println(\"Create a bulkInert task, job id: \" + jobId);\n return jobId;\n}\n\npublic static void main(String[] args) throws Exception {\n List> batchFiles = uploadData();\n String jobId = bulkImport(batchFiles);\n}\n","export MILVUS_URI=\"localhost:19530\"\n\ncurl --request POST \"http://${MILVUS_URI}/v2/vectordb/jobs/import/create\" \\\n--header \"Content-Type: application/json\" \\\n--data-raw '{\n \"files\": [\n [\n \"/8ca44f28-47f7-40ba-9604-98918afe26d1/1.parquet\"\n ],\n [\n \"/8ca44f28-47f7-40ba-9604-98918afe26d1/2.parquet\"\n ]\n ],\n \"collectionName\": \"quick_setup\"\n}'\n","[\n \"/d1782fa1-6b65-4ff3-b05a-43a436342445/1.json\"\n],\n","[\n \"/a6fb2d1c-7b1b-427c-a8a3-178944e3b66d/1.parquet\"\n]\n\n","{\n \"code\": 200,\n \"data\": {\n \"jobId\": \"448707763884413158\"\n }\n}\n","import json\nfrom pymilvus.bulk_writer import get_import_progress\n\nurl = f\"http://127.0.0.1:19530\"\n\n# Get bulk-insert job progress\nresp = get_import_progress(\n url=url,\n job_id=\"453265736269038336\",\n)\n\nprint(json.dumps(resp.json(), indent=4))\n","private static void getImportProgress(String jobId) {\n while (true) {\n System.out.println(\"Wait 5 second to check bulkInsert job state...\");\n try {\n TimeUnit.SECONDS.sleep(5);\n } catch (InterruptedException e) {\n break;\n }\n\n MilvusDescribeImportRequest request = MilvusDescribeImportRequest.builder()\n .jobId(jobId)\n .build();\n String getImportProgressResult = BulkImport.getImportProgress(\"http://localhost:19530\", request);\n\n JsonObject getImportProgressObject = new Gson().fromJson(getImportProgressResult, JsonObject.class);\n String state = getImportProgressObject.getAsJsonObject(\"data\").get(\"state\").getAsString();\n String progress = getImportProgressObject.getAsJsonObject(\"data\").get(\"progress\").getAsString();\n if (\"Failed\".equals(state)) {\n String reason = getImportProgressObject.getAsJsonObject(\"data\").get(\"reason\").getAsString();\n System.out.printf(\"The job %s failed, reason: %s%n\", jobId, reason);\n break;\n } else if (\"Completed\".equals(state)) {\n System.out.printf(\"The job %s completed%n\", jobId);\n break;\n } else {\n System.out.printf(\"The job %s is running, state:%s progress:%s%n\", jobId, state, progress);\n }\n }\n}\n\npublic static void main(String[] args) throws Exception {\n List> batchFiles = uploadData();\n String jobId = bulkImport(batchFiles);\n getImportProgress(jobId);\n}\n","export MILVUS_URI=\"localhost:19530\"\n\ncurl --request POST \"http://${MILVUS_URI}/v2/vectordb/jobs/import/describe\" \\\n--header \"Content-Type: application/json\" \\\n--data-raw '{\n \"jobId\": \"449839014328146739\"\n}'\n","{\n \"code\": 200,\n \"data\": {\n \"collectionName\": \"quick_setup\",\n \"completeTime\": \"2024-05-18T02:57:13Z\",\n \"details\": [\n {\n \"completeTime\": \"2024-05-18T02:57:11Z\",\n \"fileName\": \"id:449839014328146740 paths:\\\"/8ca44f28-47f7-40ba-9604-98918afe26d1/1.parquet\\\" \",\n \"fileSize\": 31567874,\n \"importedRows\": 100000,\n \"progress\": 100,\n \"state\": \"Completed\",\n \"totalRows\": 100000\n },\n {\n \"completeTime\": \"2024-05-18T02:57:11Z\",\n \"fileName\": \"id:449839014328146741 paths:\\\"/8ca44f28-47f7-40ba-9604-98918afe26d1/2.parquet\\\" \",\n \"fileSize\": 31517224,\n \"importedRows\": 100000,\n \"progress\": 100,\n \"state\": \"Completed\",\n \"totalRows\": 200000 \n }\n ],\n \"fileSize\": 63085098,\n \"importedRows\": 200000,\n \"jobId\": \"449839014328146739\",\n \"progress\": 100,\n \"state\": \"Completed\",\n \"totalRows\": 200000\n }\n}\n","import json\nfrom pymilvus.bulk_writer import list_import_jobs\n\nurl = f\"http://127.0.0.1:19530\"\n\n# List bulk-insert jobs\nresp = list_import_jobs(\n url=url,\n collection_name=\"quick_setup\",\n)\n\nprint(json.dumps(resp.json(), indent=4))\n","private static void listImportJobs() {\n MilvusListImportJobsRequest listImportJobsRequest = MilvusListImportJobsRequest.builder().collectionName(\"quick_setup\").build();\n String listImportJobsResult = BulkImport.listImportJobs(\"http://localhost:19530\", listImportJobsRequest);\n System.out.println(listImportJobsResult);\n}\n\npublic static void main(String[] args) throws Exception {\n listImportJobs();\n}\n","export MILVUS_URI=\"localhost:19530\"\n\ncurl --request POST \"http://${MILVUS_URI}/v2/vectordb/jobs/import/list\" \\\n--header \"Content-Type: application/json\" \\\n--data-raw '{\n \"collectionName\": \"quick_setup\"\n}'\n","{\n \"code\": 200,\n \"data\": {\n \"records\": [\n {\n \"collectionName\": \"quick_setup\",\n \"jobId\": \"448761313698322011\",\n \"progress\": 50,\n \"state\": \"Importing\"\n }\n ]\n }\n}\n"],"headingContent":"Import data","anchorList":[{"label":"Import data","href":"Import-data","type":1,"isActive":false},{"label":"Before you start","href":"Before-you-start","type":2,"isActive":false},{"label":"Import data","href":"Import-data","type":2,"isActive":false},{"label":"Check import progress","href":"Check-import-progress","type":2,"isActive":false},{"label":"List Import Jobs","href":"List-Import-Jobs","type":2,"isActive":false},{"label":"Limitations","href":"Limitations","type":2,"isActive":false},{"label":"Constraints","href":"Constraints","type":2,"isActive":false},{"label":"Recommendations","href":"Recommendations","type":2,"isActive":false}]} \ No newline at end of file +{"codeList":["from pymilvus.bulk_writer import bulk_import\n\nurl = f\"http://127.0.0.1:19530\"\n\n# Bulk-insert data from a set of JSON files already uploaded to the MinIO server\nresp = bulk_import(\n url=url,\n collection_name=\"quick_setup\",\n files=[['a1e18323-a658-4d1b-95a7-9907a4391bcf/1.parquet'],\n ['a1e18323-a658-4d1b-95a7-9907a4391bcf/2.parquet'],\n ['a1e18323-a658-4d1b-95a7-9907a4391bcf/3.parquet'],\n ['a1e18323-a658-4d1b-95a7-9907a4391bcf/4.parquet'],\n ['a1e18323-a658-4d1b-95a7-9907a4391bcf/5.parquet'],\n ['a1e18323-a658-4d1b-95a7-9907a4391bcf/6.parquet'],\n ['a1e18323-a658-4d1b-95a7-9907a4391bcf/7.parquet'],\n ['a1e18323-a658-4d1b-95a7-9907a4391bcf/8.parquet'],\n ['a1e18323-a658-4d1b-95a7-9907a4391bcf/9.parquet'],\n ['a1e18323-a658-4d1b-95a7-9907a4391bcf/10.parquet']],\n)\n\njob_id = resp.json()['data']['jobId']\nprint(job_id)\n","private static String bulkImport(List> batchFiles) throws InterruptedException {\n MilvusImportRequest milvusImportRequest = MilvusImportRequest.builder()\n .collectionName(\"quick_setup\")\n .files(batchFiles)\n .build();\n String bulkImportResult = BulkImport.bulkImport(\"http://localhost:19530\", milvusImportRequest);\n System.out.println(bulkImportResult);\n\n JsonObject bulkImportObject = new Gson().fromJson(bulkImportResult, JsonObject.class);\n String jobId = bulkImportObject.getAsJsonObject(\"data\").get(\"jobId\").getAsString();\n System.out.println(\"Create a bulkInert task, job id: \" + jobId);\n return jobId;\n}\n\npublic static void main(String[] args) throws Exception {\n List> batchFiles = uploadData();\n String jobId = bulkImport(batchFiles);\n}\n","export MILVUS_URI=\"localhost:19530\"\n\ncurl --request POST \"http://${MILVUS_URI}/v2/vectordb/jobs/import/create\" \\\n--header \"Content-Type: application/json\" \\\n--data-raw '{\n \"files\": [\n [\n \"/8ca44f28-47f7-40ba-9604-98918afe26d1/1.parquet\"\n ],\n [\n \"/8ca44f28-47f7-40ba-9604-98918afe26d1/2.parquet\"\n ]\n ],\n \"collectionName\": \"quick_setup\"\n}'\n","[\n \"/d1782fa1-6b65-4ff3-b05a-43a436342445/1.json\"\n],\n","[\n \"/a6fb2d1c-7b1b-427c-a8a3-178944e3b66d/1.parquet\"\n]\n\n","{\n \"code\": 200,\n \"data\": {\n \"jobId\": \"448707763884413158\"\n }\n}\n","import json\nfrom pymilvus.bulk_writer import get_import_progress\n\nurl = f\"http://127.0.0.1:19530\"\n\n# Get bulk-insert job progress\nresp = get_import_progress(\n url=url,\n job_id=\"453265736269038336\",\n)\n\nprint(json.dumps(resp.json(), indent=4))\n","private static void getImportProgress(String jobId) {\n while (true) {\n System.out.println(\"Wait 5 second to check bulkInsert job state...\");\n try {\n TimeUnit.SECONDS.sleep(5);\n } catch (InterruptedException e) {\n break;\n }\n\n MilvusDescribeImportRequest request = MilvusDescribeImportRequest.builder()\n .jobId(jobId)\n .build();\n String getImportProgressResult = BulkImport.getImportProgress(\"http://localhost:19530\", request);\n\n JsonObject getImportProgressObject = new Gson().fromJson(getImportProgressResult, JsonObject.class);\n String state = getImportProgressObject.getAsJsonObject(\"data\").get(\"state\").getAsString();\n String progress = getImportProgressObject.getAsJsonObject(\"data\").get(\"progress\").getAsString();\n if (\"Failed\".equals(state)) {\n String reason = getImportProgressObject.getAsJsonObject(\"data\").get(\"reason\").getAsString();\n System.out.printf(\"The job %s failed, reason: %s%n\", jobId, reason);\n break;\n } else if (\"Completed\".equals(state)) {\n System.out.printf(\"The job %s completed%n\", jobId);\n break;\n } else {\n System.out.printf(\"The job %s is running, state:%s progress:%s%n\", jobId, state, progress);\n }\n }\n}\n\npublic static void main(String[] args) throws Exception {\n List> batchFiles = uploadData();\n String jobId = bulkImport(batchFiles);\n getImportProgress(jobId);\n}\n","export MILVUS_URI=\"localhost:19530\"\n\ncurl --request POST \"http://${MILVUS_URI}/v2/vectordb/jobs/import/describe\" \\\n--header \"Content-Type: application/json\" \\\n--data-raw '{\n \"jobId\": \"449839014328146739\"\n}'\n","{\n \"code\": 200,\n \"data\": {\n \"collectionName\": \"quick_setup\",\n \"completeTime\": \"2024-05-18T02:57:13Z\",\n \"details\": [\n {\n \"completeTime\": \"2024-05-18T02:57:11Z\",\n \"fileName\": \"id:449839014328146740 paths:\\\"/8ca44f28-47f7-40ba-9604-98918afe26d1/1.parquet\\\" \",\n \"fileSize\": 31567874,\n \"importedRows\": 100000,\n \"progress\": 100,\n \"state\": \"Completed\",\n \"totalRows\": 100000\n },\n {\n \"completeTime\": \"2024-05-18T02:57:11Z\",\n \"fileName\": \"id:449839014328146741 paths:\\\"/8ca44f28-47f7-40ba-9604-98918afe26d1/2.parquet\\\" \",\n \"fileSize\": 31517224,\n \"importedRows\": 100000,\n \"progress\": 100,\n \"state\": \"Completed\",\n \"totalRows\": 200000 \n }\n ],\n \"fileSize\": 63085098,\n \"importedRows\": 200000,\n \"jobId\": \"449839014328146739\",\n \"progress\": 100,\n \"state\": \"Completed\",\n \"totalRows\": 200000\n }\n}\n","import json\nfrom pymilvus.bulk_writer import list_import_jobs\n\nurl = f\"http://127.0.0.1:19530\"\n\n# List bulk-insert jobs\nresp = list_import_jobs(\n url=url,\n collection_name=\"quick_setup\",\n)\n\nprint(json.dumps(resp.json(), indent=4))\n","private static void listImportJobs() {\n MilvusListImportJobsRequest listImportJobsRequest = MilvusListImportJobsRequest.builder().collectionName(\"quick_setup\").build();\n String listImportJobsResult = BulkImport.listImportJobs(\"http://localhost:19530\", listImportJobsRequest);\n System.out.println(listImportJobsResult);\n}\n\npublic static void main(String[] args) throws Exception {\n listImportJobs();\n}\n","export MILVUS_URI=\"localhost:19530\"\n\ncurl --request POST \"http://${MILVUS_URI}/v2/vectordb/jobs/import/list\" \\\n--header \"Content-Type: application/json\" \\\n--data-raw '{\n \"collectionName\": \"quick_setup\"\n}'\n","{\n \"code\": 200,\n \"data\": {\n \"records\": [\n {\n \"collectionName\": \"quick_setup\",\n \"jobId\": \"448761313698322011\",\n \"progress\": 50,\n \"state\": \"Importing\"\n }\n ]\n }\n}\n"],"headingContent":"Import data","anchorList":[{"label":"Import data","href":"Import-data","type":1,"isActive":false},{"label":"Before you start","href":"Before-you-start","type":2,"isActive":false},{"label":"Import data","href":"Import-data","type":2,"isActive":false},{"label":"Check import progress","href":"Check-import-progress","type":2,"isActive":false},{"label":"List Import Jobs","href":"List-Import-Jobs","type":2,"isActive":false},{"label":"Limitations","href":"Limitations","type":2,"isActive":false},{"label":"Constraints","href":"Constraints","type":2,"isActive":false},{"label":"Recommendations","href":"Recommendations","type":2,"isActive":false}]} \ No newline at end of file diff --git a/localization/v2.4.x/site/en/userGuide/data-import/import-data.md b/localization/v2.4.x/site/en/userGuide/data-import/import-data.md index a9953e437..bd00d0ec2 100644 --- a/localization/v2.4.x/site/en/userGuide/data-import/import-data.md +++ b/localization/v2.4.x/site/en/userGuide/data-import/import-data.md @@ -46,64 +46,6 @@ summary: This page demonstrates the procedure to import the prepared data.

The following code snippet creates a simple collection with the given schema. For more information on parameters, refer to createCollection() in the SDK reference.

- -
from pymilvus import MilvusClient, DataType
-
-client = MilvusClient("http://localhost:19530")
-
-schema = MilvusClient.create_schema(
-    auto_id=False,
-    enable_dynamic_field=True
-)
-
-DIM = 512
-
-schema.add_field(field_name="id", datatype=DataType.INT64, is_primary=True),
-schema.add_field(field_name="bool", datatype=DataType.BOOL),
-schema.add_field(field_name="int8", datatype=DataType.INT8),
-schema.add_field(field_name="int16", datatype=DataType.INT16),
-schema.add_field(field_name="int32", datatype=DataType.INT32),
-schema.add_field(field_name="int64", datatype=DataType.INT64),
-schema.add_field(field_name="float", datatype=DataType.FLOAT),
-schema.add_field(field_name="double", datatype=DataType.DOUBLE),
-schema.add_field(field_name="varchar", datatype=DataType.VARCHAR, max_length=512),
-schema.add_field(field_name="json", datatype=DataType.JSON),
-schema.add_field(field_name="array_str", datatype=DataType.ARRAY, max_capacity=100, element_type=DataType.VARCHAR, max_length=128)
-schema.add_field(field_name="array_int", datatype=DataType.ARRAY, max_capacity=100, element_type=DataType.INT64)
-schema.add_field(field_name="float_vector", datatype=DataType.FLOAT_VECTOR, dim=DIM),
-schema.add_field(field_name="binary_vector", datatype=DataType.BINARY_VECTOR, dim=DIM),
-schema.add_field(field_name="float16_vector", datatype=DataType.FLOAT16_VECTOR, dim=DIM),
-# schema.add_field(field_name="bfloat16_vector", datatype=DataType.BFLOAT16_VECTOR, dim=DIM),
-schema.add_field(field_name="sparse_vector", datatype=DataType.SPARSE_FLOAT_VECTOR)
-
-schema.verify()
-
-client.create_collection(
-    collection_name="quick_setup",
-    schema=schema
-)
-
-
private static void createCollection() {
-    MilvusClientV2 milvusClient = new MilvusClientV2(ConnectConfig.builder()
-            .uri("http://localhost:19530")
-            .build());
-
-    CreateCollectionReq.CollectionSchema schema = createSchema();
-    CreateCollectionReq request = CreateCollectionReq.builder()
-            .collectionName("quick_setup")
-            .collectionSchema(schema)
-            .build();
-    milvusClient.createCollection(request);
-    System.out.println("Collection created");
-}
-
-public static void main(String[] args) throws Exception {
-    createCollection();
-}
-

Import data