From 2e7cd20259822901d27f2c8eea542aab727ced45 Mon Sep 17 00:00:00 2001 From: yyang999 Date: Mon, 9 Oct 2023 21:49:13 -0400 Subject: [PATCH 01/13] Add weaviate.py and update various files --- evadb/third_party/vector_stores/weaviate.py | 207 ++++++++++++++++++++ 1 file changed, 207 insertions(+) create mode 100644 evadb/third_party/vector_stores/weaviate.py diff --git a/evadb/third_party/vector_stores/weaviate.py b/evadb/third_party/vector_stores/weaviate.py new file mode 100644 index 000000000..a15c3528a --- /dev/null +++ b/evadb/third_party/vector_stores/weaviate.py @@ -0,0 +1,207 @@ +# coding=utf-8 +# Copyright 2018-2023 EvaDB +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +from typing import List + +from evadb.configuration.configuration_manager import ConfigurationManager +from evadb.third_party.vector_stores.types import ( + FeaturePayload, + VectorIndexQuery, + VectorStore, +) +from evadb.utils.generic_utils import try_to_import_weaviate_client + +required_params = [] +_weaviate_init_done = False + + +class WeaviateVectorStore(VectorStore): + def __init__(self) -> None: + try_to_import_weaviate_client() + global _weaviate_init_done + + # Get the API key. + self._api_key = ConfigurationManager().get_value( + "third_party", "WEAVIATE_API_KEY" + ) + + if not self._api_key: + self._api_key = os.environ.get("WEAVIATE_API_KEY") + + + assert ( + self._api_key + ), "Please set your Weaviate API key in evadb.yml file (third_party, weaviate_api_key) or environment variable (WEAVIATE_API_KEY). It can be found at the Details tab in WCS Dashboard." + + # Get the API Url. + self._api_url = ConfigurationManager().get_value( + "third_party", "WEAVIATE_API_URL" + ) + + if not self._api_url: + self._api_url = os.environ.get("WEAVIATE_API_URL") + + assert ( + self._api_url + ), "Please set your Weaviate API Url in evadb.yml file (third_party, weaviate_api_url) or environment variable (WEAVIATE_API_URL). It can be found at the Details tab in WCS Dashboard." + + + if not _weaviate_init_done: + # Initialize weaviate client + import weaviate + + client = weaviate.Client( + url=self._api_url, + auth_client_secret=weaviate.AuthApiKey(api_key=self._api_key), + ) + client.schema.get() + + _weaviate_init_done = True + + self._client = client + + def create_weaviate_class(self, class_name: str, vectorizer: str, module_config: dict, properties: list) -> None: + # In Weaviate, vector index creation and management is not explicitly done like Pinecone + # Need to typically define a property in the schema to hold vectors and insert data accordingly + + """ + Create a Weaviate class with the specified configuration. + + Args: + class_name (str): The name of the class to create, e.g., "Article". + vectorizer (str): The vectorizer module to use, e.g., "text2vec-cohere". + module_config (dict): Configuration for vectorizer and generative module, e.g., + { + "text2vec-cohere": { + "model": "embed-multilingual-v2.0", + }, + } + properties (list): List of dictionaries specifying class properties, e.g., + [ + { + "name": "title", + "dataType": ["text"] + }, + { + "name": "body", + "dataType": ["text"] + }, + ] + + Returns: + None + """ + + # Define the class object with provided parameters + class_obj = { + "class": class_name, + "vectorizer": vectorizer, + "moduleConfig": module_config, + "properties": properties + } + + + # Call the Weaviate API to create the class + response = self._client.schema.create_class(class_obj) + + # Check the response for success or handle any errors + if response.status_code == 200: + print(f"Successfully created Weaviate class '{class_name}'") + else: + print(f"Failed to create Weaviate class '{class_name}'") + print(response.text) + + return None + + def delete_weaviate_class(self, class_name: str) -> None: + """ + Delete a Weaviate class and its data. + + Args: + class_name (str): The name of the Weaviate class to delete. + + Returns: + None + """ + # Call the Weaviate API to delete the class + response = self._client.schema.delete_class(class_name) + + # Check the response for success or handle any errors + if response.status_code == 200: + print(f"Successfully deleted Weaviate class '{class_name}'") + else: + print(f"Failed to delete Weaviate class '{class_name}'") + print(response.text) + + return None + + def add_to_weaviate_class(self, class_name: str, data_objects: List[dict]) -> None: + """ + Add objects to the specified Weaviate class. + + Args: + class_name (str): The name of the Weaviate class to add objects to. + data_objects (List[dict]): A list of dictionaries, where each dictionary contains property names and values. + + Returns: + None + """ + # Iterate over each data object and add it to the Weaviate class + for data_object in data_objects: + self._client.data_object.create(data_object, class_name) + + return None + + def query_weaviate_class(self, class_name, properties_to_retrieve, query: VectorIndexQuery) -> List[dict]: + """ + Perform a similarity-based search in Weaviate. + + Args: + class_name (str): The name of the Weaviate class to perform the search on. + properties_to_retrieve (List[str]): A list of property names to retrieve. + query (VectorIndexQuery): A query object for similarity search, containing the query vector and top_k. + + Returns: + List[dict]: A list of dictionaries containing the retrieved properties. + """ + try: + # Define the similarity search query + response = ( + self._client.query + .get(class_name, properties_to_retrieve) + .with_near_vector({ + "vector": query.embedding + }) + .with_limit(query.top_k) + .with_additional(["distance"]) + .do() + ) + + # Check if the response contains data + data = response.get('data', {}) + if 'Get' not in data or class_name not in data['Get']: + print(f"No objects of class {class_name} found.") + return [] + + # Extract the results + results = data['Get'][class_name] + + return results + + except Exception as e: + print(f"Failed to query Weaviate class '{class_name}'") + print(e) + + return [] \ No newline at end of file From fb13606c3e860bfa5ea2a56ab93b8f5f3ee16b5b Mon Sep 17 00:00:00 2001 From: yyang999 Date: Mon, 9 Oct 2023 21:53:18 -0400 Subject: [PATCH 02/13] update various files --- evadb/catalog/catalog_type.py | 2 +- evadb/evadb.yml | 5 ++ evadb/executor/executor_utils.py | 2 + evadb/interfaces/relational/db.py | 2 +- evadb/parser/evadb.lark | 3 +- .../parser/lark_visitor/_create_statements.py | 2 + evadb/third_party/vector_stores/types.py | 75 ++++++++++++++++++- evadb/third_party/vector_stores/utils.py | 7 ++ evadb/utils/generic_utils.py | 14 ++++ script/formatting/spelling.txt | 2 + setup.py | 2 + 11 files changed, 112 insertions(+), 4 deletions(-) diff --git a/evadb/catalog/catalog_type.py b/evadb/catalog/catalog_type.py index 4928807c2..5f785e126 100644 --- a/evadb/catalog/catalog_type.py +++ b/evadb/catalog/catalog_type.py @@ -117,7 +117,7 @@ class VectorStoreType(EvaDBEnum): PINECONE # noqa: F821 PGVECTOR # noqa: F821 CHROMADB # noqa: F821 - + WEAVIATE # noqa: F821 class VideoColumnName(EvaDBEnum): name # noqa: F821 diff --git a/evadb/evadb.yml b/evadb/evadb.yml index e7d6a3894..337bbdb51 100644 --- a/evadb/evadb.yml +++ b/evadb/evadb.yml @@ -27,3 +27,8 @@ third_party: OPENAI_KEY: "" PINECONE_API_KEY: "" PINECONE_ENV: "" + + # Weaviate Configuration + # Weaviate API key and url Can be obtained from cluster details on Weaviate Cloud Services (WCS) dashboard + WEAVIATE_API_KEY: "" + WEAVIATE_API_URL: "" \ No newline at end of file diff --git a/evadb/executor/executor_utils.py b/evadb/executor/executor_utils.py index 88d74ce3b..0dfa270b0 100644 --- a/evadb/executor/executor_utils.py +++ b/evadb/executor/executor_utils.py @@ -175,6 +175,8 @@ def handle_vector_store_params( return {"index_path": str(Path(index_path).parent)} elif vector_store_type == VectorStoreType.PINECONE: return {} + elif vector_store_type == VectorStoreType.WEAVIATE: + return {} else: raise ValueError("Unsupported vector store type: {}".format(vector_store_type)) diff --git a/evadb/interfaces/relational/db.py b/evadb/interfaces/relational/db.py index 1b3bd84ae..0e0aa8816 100644 --- a/evadb/interfaces/relational/db.py +++ b/evadb/interfaces/relational/db.py @@ -248,7 +248,7 @@ def create_vector_index( index_name (str): Name of the index. table_name (str): Name of the table. expr (str): Expression used to build the vector index. - using (str): Method used for indexing, can be `FAISS` or `QDRANT` or `PINECONE` or `CHROMADB`. + using (str): Method used for indexing, can be `FAISS` or `QDRANT` or `PINECONE` or `CHROMADB` or `WEAVIATE`. Returns: EvaDBCursor: The EvaDBCursor object. diff --git a/evadb/parser/evadb.lark b/evadb/parser/evadb.lark index b8cc2fc56..22f32ef9d 100644 --- a/evadb/parser/evadb.lark +++ b/evadb/parser/evadb.lark @@ -53,7 +53,7 @@ function_metadata_key: uid function_metadata_value: string_literal | decimal_literal -vector_store_type: USING (FAISS | QDRANT | PINECONE | PGVECTOR | CHROMADB) +vector_store_type: USING (FAISS | QDRANT | PINECONE | PGVECTOR | CHROMADB | WEAVIATE) index_elem: ("(" uid_list ")" | "(" function_call ")") @@ -423,6 +423,7 @@ QDRANT: "QDRANT"i PINECONE: "PINECONE"i PGVECTOR: "PGVECTOR"i CHROMADB: "CHROMADB"i +WEAVIATE: "WEAVIATE"i // Computer vision tasks diff --git a/evadb/parser/lark_visitor/_create_statements.py b/evadb/parser/lark_visitor/_create_statements.py index 18e13ca3f..637a1181d 100644 --- a/evadb/parser/lark_visitor/_create_statements.py +++ b/evadb/parser/lark_visitor/_create_statements.py @@ -299,6 +299,8 @@ def vector_store_type(self, tree): vector_store_type = VectorStoreType.PGVECTOR elif str.upper(token) == "CHROMADB": vector_store_type = VectorStoreType.CHROMADB + elif str.upper(token) == "WEAVIATE": + vector_store_type = VectorStoreType.WEAVIATE return vector_store_type diff --git a/evadb/third_party/vector_stores/types.py b/evadb/third_party/vector_stores/types.py index cdfc10e26..d337ac894 100644 --- a/evadb/third_party/vector_stores/types.py +++ b/evadb/third_party/vector_stores/types.py @@ -14,6 +14,7 @@ # limitations under the License. from dataclasses import dataclass from typing import List +from uuid import uuid5, NAMESPACE_DNS @dataclass @@ -33,7 +34,6 @@ class VectorIndexQueryResult: similarities: List[float] ids: List[int] - class VectorStore: def create(self, vector_dim: int): """Create an index""" @@ -54,3 +54,76 @@ def query(self, query: VectorIndexQuery) -> VectorIndexQueryResult: def delete(self): """delete an index""" ... + + def create_weaviate_class(self, class_name: str, vectorizer: str, module_config: dict, properties: list) -> None: + """ + Create a Weaviate class with the specified configuration. + + Args: + class_name (str): The name of the class to create, e.g., "Article". + vectorizer (str): The vectorizer module to use, e.g., "text2vec-cohere". + module_config (dict): Configuration for vectorizer and generative module, e.g., + { + "text2vec-cohere": { + "model": "embed-multilingual-v2.0", + }, + } + properties (list): List of dictionaries specifying class properties, e.g., + [ + { + "name": "title", + "dataType": ["text"] + }, + { + "name": "body", + "dataType": ["text"] + }, + ] + + Returns: + None + """ + # Implement the logic to create a Weaviate class with the given parameters. + ... + + def delete_weaviate_class(self, class_name: str) -> None: + """ + Delete a Weaviate class and its data. + + Args: + class_name (str): The name of the Weaviate class to delete. + + Returns: + None + """ + # Implement the logic to delete a Weaviate class and its data. + ... + + def add_to_weaviate_class(self, class_name: str, data_objects: List[dict]) -> None: + """ + Add objects to the specified Weaviate class. + + Args: + class_name (str): The name of the Weaviate class to add objects to. + data_objects (List[dict]): A list of dictionaries, where each dictionary contains property names and values. + + Returns: + None + """ + # Implement the logic to add payloads to a Weaviate class. + ... + + def query_weaviate_class(self, class_name, properties_to_retrieve, query: VectorIndexQuery) -> List[dict]: + """ + Perform a similarity-based search in Weaviate. + + Args: + class_name (str): The name of the Weaviate class to perform the search on. + properties_to_retrieve (List[str]): A list of property names to retrieve. + query (VectorIndexQuery): A query object for similarity search, containing the query vector and top_k. + + Returns: + List[dict]: A list of dictionaries containing the retrieved properties. + """ + # Implement the logic to query a Weaviate class for similar vectors. + ... \ No newline at end of file diff --git a/evadb/third_party/vector_stores/utils.py b/evadb/third_party/vector_stores/utils.py index e47d24f1f..a6507530f 100644 --- a/evadb/third_party/vector_stores/utils.py +++ b/evadb/third_party/vector_stores/utils.py @@ -17,6 +17,7 @@ from evadb.third_party.vector_stores.faiss import FaissVectorStore from evadb.third_party.vector_stores.pinecone import PineconeVectorStore from evadb.third_party.vector_stores.qdrant import QdrantVectorStore +from evadb.third_party.vector_stores.weaviate import WeaviateVectorStore from evadb.utils.generic_utils import validate_kwargs @@ -49,5 +50,11 @@ def init_vector_store( validate_kwargs(kwargs, required_params, required_params) return ChromaDBVectorStore(index_name, **kwargs) + elif vector_store_type == VectorStoreType.WEAVIATE: + from evadb.third_party.vector_stores.weaviate import required_params + + validate_kwargs(kwargs, required_params, required_params) + return WeaviateVectorStore(index_name, **kwargs) + else: raise Exception(f"Vector store {vector_store_type} not supported") diff --git a/evadb/utils/generic_utils.py b/evadb/utils/generic_utils.py index 04b6add1f..2396de105 100644 --- a/evadb/utils/generic_utils.py +++ b/evadb/utils/generic_utils.py @@ -561,6 +561,14 @@ def try_to_import_chromadb_client(): Please install it with 'pip install chromadb`.""" ) +def try_to_import_weaviate_client(): + try: + import weaviate # noqa: F401 + except ImportError: + raise ValueError( + """Could not import weaviate python package. + Please install it with 'pip install weaviate-client`.""" + ) def is_qdrant_available() -> bool: try: @@ -585,6 +593,12 @@ def is_chromadb_available() -> bool: except ValueError: # noqa: E722 return False +def is_weaviate_available() -> bool: + try: + try_to_import_weaviate_client() + return True + except ValueError: # noqa: E722 + return False ############################## ## UTILS diff --git a/script/formatting/spelling.txt b/script/formatting/spelling.txt index 35cdf845b..7ed382191 100644 --- a/script/formatting/spelling.txt +++ b/script/formatting/spelling.txt @@ -938,6 +938,7 @@ VisionEncoderDecoderModel WIP WMV WeakValueDictionary +WEAVIATE XdistTests Xeon XformExtractObjectToLinearFlow @@ -1746,6 +1747,7 @@ wal warmup wb weakref +weaviate westbrae wget whitespaces diff --git a/setup.py b/setup.py index 100646644..6685c293a 100644 --- a/setup.py +++ b/setup.py @@ -112,6 +112,8 @@ def read(path, encoding="utf-8"): chromadb_libs = ["chromadb"] +weaviate_libs = ["weaviate-client"] + postgres_libs = [ "psycopg2", ] From 331651805646bb4d8d8f0a541797d0889281b2c1 Mon Sep 17 00:00:00 2001 From: yyang999 Date: Tue, 10 Oct 2023 21:45:19 -0400 Subject: [PATCH 03/13] update weaviate.py --- evadb/third_party/vector_stores/weaviate.py | 22 ++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/evadb/third_party/vector_stores/weaviate.py b/evadb/third_party/vector_stores/weaviate.py index a15c3528a..0f91be39f 100644 --- a/evadb/third_party/vector_stores/weaviate.py +++ b/evadb/third_party/vector_stores/weaviate.py @@ -103,6 +103,9 @@ def create_weaviate_class(self, class_name: str, vectorizer: str, module_config: Returns: None """ + # Check if the class already exists + if self._client.schema.exists(class_name): + self._client.schema.delete_class(class_name) # Define the class object with provided parameters class_obj = { @@ -114,14 +117,14 @@ def create_weaviate_class(self, class_name: str, vectorizer: str, module_config: # Call the Weaviate API to create the class - response = self._client.schema.create_class(class_obj) + self._client.schema.create_class(class_obj) + # response = client.schema.get(class_name) # Check the response for success or handle any errors - if response.status_code == 200: + if self._client.schema.get(class_name)['class'] == class_name: print(f"Successfully created Weaviate class '{class_name}'") else: print(f"Failed to create Weaviate class '{class_name}'") - print(response.text) return None @@ -136,14 +139,15 @@ def delete_weaviate_class(self, class_name: str) -> None: None """ # Call the Weaviate API to delete the class - response = self._client.schema.delete_class(class_name) + self._client.schema.delete_class(class_name) - # Check the response for success or handle any errors - if response.status_code == 200: - print(f"Successfully deleted Weaviate class '{class_name}'") - else: + try: + # Attempt to retrieve the class, and if it results in an exception, + # consider the class as successfully deleted. + self._client.schema.get(class_name) print(f"Failed to delete Weaviate class '{class_name}'") - print(response.text) + except Exception as e: + print(f"Successfully deleted Weaviate class '{class_name}'") return None From 1f3e6d8aed9afdbc8e2744e5c41e05ccff061b67 Mon Sep 17 00:00:00 2001 From: hunteritself <104769634+hunteritself@users.noreply.github.com> Date: Tue, 10 Oct 2023 21:48:22 -0400 Subject: [PATCH 04/13] add test file_cs6422_test_Yang Yang.ipynb --- .../vector_stores/cs6422_test_Yang Yang.ipynb | 474 ++++++++++++++++++ 1 file changed, 474 insertions(+) create mode 100644 evadb/third_party/vector_stores/cs6422_test_Yang Yang.ipynb diff --git a/evadb/third_party/vector_stores/cs6422_test_Yang Yang.ipynb b/evadb/third_party/vector_stores/cs6422_test_Yang Yang.ipynb new file mode 100644 index 000000000..9bddc33d5 --- /dev/null +++ b/evadb/third_party/vector_stores/cs6422_test_Yang Yang.ipynb @@ -0,0 +1,474 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "uP2JSULAuJpF" + }, + "source": [ + "## CS 6422 Integration of Weaviate with EvaDB Test" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "- CS 6422 EVADB Assignment 1\n", + "- Code Repository Link: https://github.com/hunteritself/evadb/commits/staging\n", + "- Full name: Yang Yang\n", + "- Email: yyang999@gatech.edu\n", + "- GT SSO account name: yyang999\n", + "- GT ID: 903851095" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "R07JHEMtuJpK" + }, + "source": [ + "You will need the Weaviate Python client. If you don't yet have it installed - you can do so with:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!pip install -U weaviate-client" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ZW0ZC5KJuJpM" + }, + "source": [ + "### Weaviate instance\n", + "\n", + "For this, you will need a working instance of Weaviate somewhere.\n", + "- Creating a free sandbox instance on Weaviate Cloud Services (https://console.weaviate.cloud/)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "47DC2hLxuJpN" + }, + "source": [ + "#### For using WCS\n", + "\n", + "NOTE: \n", + "- Before you do this, you need to create the instance in WCS and get the credentials. Please refer to the [WCS Quickstart guide](https://weaviate.io/developers/wcs/quickstart).\n", + "- Here I have simplified the initialization steps in weaviate.py.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "id": "NRLMgUtcuJpN" + }, + "outputs": [], + "source": [ + "# For using WCS\n", + "import weaviate\n", + "import json\n", + "import os\n", + "\n", + "client = weaviate.Client(\n", + " url = \"https://cs6422-test-95rmw86w.weaviate.network\", # Replace with your endpoint\n", + " auth_client_secret=weaviate.AuthApiKey(api_key=\"AlONstEyNvdv3SBbmmzjTN0cmqqfu8762cs5\"), # Replace w/ your Weaviate instance API key\n", + " additional_headers = {\n", + " \"X-OpenAI-Api-Key\": \"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\" # Replace with your inference API key\n", + " }\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "5HHvIL49uJpP" + }, + "source": [ + "### Create a class" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "id": "oxeZ3Cr5uJpQ" + }, + "outputs": [], + "source": [ + "def create_weaviate_class(class_name: str, vectorizer: str, module_config: dict, properties: list) -> None:\n", + " # In Weaviate, vector index creation and management is not explicitly done like Pinecone\n", + " # Need to typically define a property in the schema to hold vectors and insert data accordingly\n", + "\n", + " \"\"\"\n", + " Create a Weaviate class with the specified configuration.\n", + "\n", + " Args:\n", + " class_name (str): The name of the class to create, e.g., \"Article\".\n", + " vectorizer (str): The vectorizer module to use, e.g., \"text2vec-cohere\".\n", + " module_config (dict): Configuration for vectorizer and generative module, e.g.,\n", + " {\n", + " \"text2vec-cohere\": {\n", + " \"model\": \"embed-multilingual-v2.0\",\n", + " },\n", + " }\n", + " properties (list): List of dictionaries specifying class properties, e.g.,\n", + " [\n", + " {\n", + " \"name\": \"title\",\n", + " \"dataType\": [\"text\"]\n", + " },\n", + " {\n", + " \"name\": \"body\",\n", + " \"dataType\": [\"text\"]\n", + " },\n", + " ]\n", + "\n", + " Returns:\n", + " None\n", + " \"\"\"\n", + " # Check if the class already exists\n", + " if client.schema.exists(class_name):\n", + " client.schema.delete_class(class_name)\n", + "\n", + " # Define the class object with provided parameters\n", + " class_obj = {\n", + " \"class\": class_name,\n", + " \"vectorizer\": vectorizer,\n", + " \"moduleConfig\": module_config,\n", + " \"properties\": properties\n", + " }\n", + "\n", + "\n", + " # Call the Weaviate API to create the class\n", + " client.schema.create_class(class_obj)\n", + "\n", + " # response = client.schema.get(class_name)\n", + " # Check the response for success or handle any errors\n", + " if client.schema.get(class_name)['class'] == class_name:\n", + " print(f\"Successfully created Weaviate class '{class_name}'\")\n", + " else:\n", + " print(f\"Failed to create Weaviate class '{class_name}'\")\n", + "\n", + " return None" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Successfully created Weaviate class 'Question'\n" + ] + } + ], + "source": [ + "def test_create_weaviate_class():\n", + " # Define the class configuration\n", + " class_name = \"Question\"\n", + " vectorizer = \"text2vec-openai\"\n", + " module_config = {\n", + " \"text2vec-openai\": {}\n", + " }\n", + " properties = [\n", + " {\n", + " \"name\": \"question\",\n", + " \"dataType\": [\"text\"]\n", + " },\n", + " {\n", + " \"name\": \"answer\",\n", + " \"dataType\": [\"text\"]\n", + " },\n", + " {\n", + " \"name\": \"category\",\n", + " \"dataType\": [\"text\"]\n", + " },\n", + " ]\n", + "\n", + " # Call the create_weaviate_class method\n", + " create_weaviate_class(class_name, vectorizer, module_config, properties)\n", + "\n", + "# Run the test\n", + "test_create_weaviate_class()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Delete a class" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "def delete_weaviate_class(class_name: str) -> None:\n", + " \"\"\"\n", + " Delete a Weaviate class and its data.\n", + "\n", + " Args:\n", + " class_name (str): The name of the Weaviate class to delete.\n", + "\n", + " Returns:\n", + " None\n", + " \"\"\"\n", + " # Call the Weaviate API to delete the class\n", + " client.schema.delete_class(class_name)\n", + "\n", + " try:\n", + " # Attempt to retrieve the class, and if it results in an exception,\n", + " # consider the class as successfully deleted.\n", + " client.schema.get(class_name)\n", + " print(f\"Failed to delete Weaviate class '{class_name}'\")\n", + " except Exception as e:\n", + " print(f\"Successfully deleted Weaviate class '{class_name}'\")\n", + "\n", + " return None" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Successfully deleted Weaviate class 'Question'\n" + ] + } + ], + "source": [ + "def test_delete_weaviate_class():\n", + " # Define the name of the class to be deleted\n", + " class_name = \"Question\"\n", + "\n", + " # Call the delete_weaviate_class method\n", + " delete_weaviate_class(class_name)\n", + "\n", + "# Run the test\n", + "test_delete_weaviate_class()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Successfully created Weaviate class 'Question'\n" + ] + } + ], + "source": [ + "test_create_weaviate_class()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "AzG7IkCMuJpQ" + }, + "source": [ + "### Add objects" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "from typing import List\n", + "\n", + "def add_to_weaviate_class(class_name: str, data_objects: List[dict]) -> None:\n", + " \"\"\"\n", + " Add objects to the specified Weaviate class.\n", + "\n", + " Args:\n", + " class_name (str): The name of the Weaviate class to add objects to.\n", + " data_objects (List[dict]): A list of dictionaries, where each dictionary contains property names and values.\n", + "\n", + " Returns:\n", + " None\n", + " \"\"\"\n", + " # Iterate over each data object and add it to the Weaviate class\n", + " for data_object in data_objects:\n", + " client.data_object.create(data_object, class_name)\n", + "\n", + " return None" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "def test_add_to_weaviate_class():\n", + " # Define the class name and data objects\n", + " class_name = \"Question\"\n", + " data_objects = [{\"Category\":\"SCIENCE\",\"Question\":\"This organ removes excess glucose from the blood & stores it as glycogen\",\"Answer\":\"Liver\"},{\"Category\":\"ANIMALS\",\"Question\":\"It's the only living mammal in the order Proboseidea\",\"Answer\":\"Elephant\"},{\"Category\":\"ANIMALS\",\"Question\":\"The gavial looks very much like a crocodile except for this bodily feature\",\"Answer\":\"the nose or snout\"},{\"Category\":\"ANIMALS\",\"Question\":\"Weighing around a ton, the eland is the largest species of this animal in Africa\",\"Answer\":\"Antelope\"},{\"Category\":\"ANIMALS\",\"Question\":\"Heaviest of all poisonous snakes is this North American rattlesnake\",\"Answer\":\"the diamondback rattler\"},{\"Category\":\"SCIENCE\",\"Question\":\"2000 news: the Gunnison sage grouse isn't just another northern sage grouse, but a new one of this classification\",\"Answer\":\"species\"},{\"Category\":\"SCIENCE\",\"Question\":\"A metal that is ductile can be pulled into this while cold & under pressure\",\"Answer\":\"wire\"},{\"Category\":\"SCIENCE\",\"Question\":\"In 1953 Watson & Crick built a model of the molecular structure of this, the gene-carrying substance\",\"Answer\":\"DNA\"},{\"Category\":\"SCIENCE\",\"Question\":\"Changes in the tropospheric layer of this are what gives us weather\",\"Answer\":\"the atmosphere\"},{\"Category\":\"SCIENCE\",\"Question\":\"In 70-degree air, a plane traveling at about 1,130 feet per second breaks it\",\"Answer\":\"Sound barrier\"}]\n", + "\n", + " # Call the add_to_weaviate_class method\n", + " add_to_weaviate_class(class_name, data_objects)\n", + "\n", + "# Run the test\n", + "test_add_to_weaviate_class()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "swXTNGi_uJpR" + }, + "source": [ + "### Queries" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "from dataclasses import dataclass\n", + "\n", + "@dataclass\n", + "class VectorIndexQuery:\n", + " embedding: list\n", + " top_k: int\n", + "\n", + "\n", + "def query_weaviate_class(class_name, properties_to_retrieve, query: VectorIndexQuery) -> List[dict]:\n", + " \"\"\"\n", + " Perform a similarity-based search in Weaviate.\n", + "\n", + " Args:\n", + " class_name (str): The name of the Weaviate class to perform the search on.\n", + " properties_to_retrieve (List[str]): A list of property names to retrieve.\n", + " query (VectorIndexQuery): A query object for similarity search, containing the query vector and top_k.\n", + "\n", + " Returns:\n", + " List[dict]: A list of dictionaries containing the retrieved properties.\n", + " \"\"\"\n", + " try:\n", + " # Define the similarity search query\n", + " response = (\n", + " client.query\n", + " .get(class_name, properties_to_retrieve)\n", + " .with_near_vector({\n", + " \"vector\": query.embedding\n", + " })\n", + " .with_limit(query.top_k)\n", + " .with_additional([\"distance\"])\n", + " .do()\n", + " )\n", + "\n", + " # Check if the response contains data\n", + " data = response.get('data', {})\n", + " if 'Get' not in data or class_name not in data['Get']:\n", + " print(f\"No objects of class {class_name} found.\")\n", + " return []\n", + "\n", + " # Extract the results\n", + " results = data['Get'][class_name]\n", + " print(results)\n", + "\n", + " return results\n", + "\n", + " except Exception as e:\n", + " print(f\"Failed to query Weaviate class '{class_name}'\")\n", + " print(e)\n", + "\n", + " return []" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[{'_additional': {'distance': 0.21892214}, 'question': 'The gavial looks very much like a crocodile except for this bodily feature'}, {'_additional': {'distance': 0.2226482}, 'question': \"It's the only living mammal in the order Proboseidea\"}, {'_additional': {'distance': 0.23982108}, 'question': 'Weighing around a ton, the eland is the largest species of this animal in Africa'}]\n", + "Query results:\n", + "{'_additional': {'distance': 0.21892214}, 'question': 'The gavial looks very much like a crocodile except for this bodily feature'}\n", + "{'_additional': {'distance': 0.2226482}, 'question': \"It's the only living mammal in the order Proboseidea\"}\n", + "{'_additional': {'distance': 0.23982108}, 'question': 'Weighing around a ton, the eland is the largest species of this animal in Africa'}\n" + ] + } + ], + "source": [ + "def test_query_weaviate_class():\n", + " # Define the class name, properties to retrieve, and the query\n", + " class_name = \"Question\"\n", + " properties_to_retrieve = [\"question\"]\n", + " query = VectorIndexQuery(\n", + " embedding=[-0.0125526935, -0.021168863, -0.01076519, -0.02589537, -0.0070362035, 0.019870078, -0.010001986, -0.019120263, 0.00090044655, -0.017393013, 0.021302758, 0.010055545, 0.02937665, -0.003816019, 0.007692291, 0.012385325, 0.032750815, 0.020847514, 0.020311933, -0.022159688, -0.0009924996, 0.009399457, 0.0022226637, -0.029510546, 0.014393755, -0.007223657, 0.018276723, -0.03639277, -0.010001986, -0.022842556, 0.010363504, -0.020927852, -0.006929087, -0.022521207, -0.007652122, -0.011126708, 0.0279038, -0.01721895, 0.016482525, 0.002281243, -0.00169294, 0.009191919, -0.019655844, -0.022869334, -0.012412104, 0.0031967526, -0.0033457114, -0.01483561, -0.03173321, 0.004746592, 0.010095714, 0.007973471, -0.032134898, -0.023739655, -0.008040419, 0.018290112, -0.013637247, -0.008488968, 0.024623364, -0.039365247, -0.0032586793, 0.0009606995, -0.029510546, 0.0063265576, -0.019602288, 0.003081268, 0.013463182, -0.006601043, 0.019910246, -0.01542475, 0.0367409, -0.01193008, 0.012961075, -0.015625594, 0.0062462203, -0.0058646183, -0.0059248717, 0.01889264, 0.008127451, 0.0037155973, 0.037142586, -0.025373178, -0.005503101, 0.014982895, 0.035053816, -0.012432188, -0.017285896, 0.022936283, 0.0024620018, 0.016937768, -0.0062127467, 0.02154377, 0.0066378643, 0.029698, 0.0013071538, 0.0043850746, -0.008040419, 0.024797428, -0.012452273, -0.025132166, -0.0031900578, 0.0000019433794, -0.002378317, -0.008629559, 0.0126732, -0.0022494427, 0.0009623732, 0.0035582704, 0.017312676, -0.024569806, -0.008890655, 0.023056788, 0.014902558, -0.047104403, -0.009011161, -0.030447815, 0.017982153, -0.0042009684, -0.00654079, 0.00069249026, 0.011936775, 0.023378137, 0.025105387, -0.009245478, 0.030929837, 0.00394322, 0.02123581, -0.0042545265, 0.0022578111, -0.017259117, 0.047157962, -0.00022029977, 0.03497348, -0.00072094303, -0.023605758, 0.036499888, -0.015384582, 0.011099929, -0.0139519, -0.03408977, 0.013155223, 0.030501373, -0.026698742, 0.004311432, -0.010236303, 0.011361024, 0.023793213, -0.00014874942, 0.0020352101, 0.0026829292, 0.00989487, 0.0074780583, 0.02734144, 0.003826061, 0.011722542, 0.00712993, -0.013992069, 0.0009406152, 0.010785274, -0.012325072, 0.01692438, 0.010617905, 0.016750315, -0.0070295087, 0.017687583, 0.038320865, 0.020485997, 0.005054551, -0.018812304, 0.0007201062, 0.0015381235, 0.0349467, 0.014728494, 0.050773136, -0.017901815, 0.0027716348, 0.0064704954, 0.026671965, -0.015063233, -0.013536825, 0.016696757, 0.008127451, 0.026966535, 0.029912233, -0.0031431946, 0.015156959, 0.012412104, -0.047907773, 0.022012403, -0.027006702, -0.0069491714, 0.010718327, 0.011976943, -0.008127451, -0.65212417, 0.00024289463, 0.0051214993, -0.013007938, 0.022373922, 0.0337952, -0.0026829292, -0.0110463705, -0.013034717, -0.0012167745, 0.010062239, -0.0023013272, 0.024409132, -0.009118277, -0.020191427, -0.01597372, 0.010115798, -0.030929837, -0.010932559, 0.010912475, -0.0009841312, 0.010571042, -0.008348378, -0.009104887, 0.02711382, 0.0036553445, -0.018263333, -0.030876279, 0.014594599, 0.037704945, -0.030126465, 0.014366977, 0.0055533117, 0.003487975, 0.044988856, 0.009881481, -0.012699978, 0.041132666, 0.01744657, 0.05417408, -0.004686339, 0.016121006, 0.0070495927, 0.015478308, -0.020593112, 0.0012376956, 0.027127208, -0.0051248465, 0.0005979267, 0.0063366, -0.008616169, 0.027877023, -0.00042679158, 0.008442105, 0.00069751136, 0.023806602, 0.029296314, -0.0047332025, 0.027877023, 0.0033005215, 0.014996285, -0.0061424514, 0.00451897, 0.015531867, -0.015317634, 0.044185482, 0.010196134, 0.007504837, 0.012405409, -0.030126465, 0.03821375, 0.0256008, -0.016710145, 0.0032804373, -0.013884953, 0.022775607, 0.030608488, -0.023431696, -0.008502358, 0.008683117, -0.0045490963, -0.0030143203, -0.024074392, 0.00874337, 0.009466405, -0.0072370465, -0.021383096, 0.001360712, 0.020298542, 0.0040168623, 0.008201093, 0.011106623, -0.03202778, 0.0046461704, -0.00088370964, -0.008957602, 0.0057575023, 0.00037407028, 0.017259117, -0.0482559, -0.0049507823, -0.024235068, -0.0014418861, 0.004425243, 0.023244241, 0.0107919695, -0.017058274, 0.0183035, 0.033339955, -0.009091497, 0.000118936776, 0.0031900578, -0.000044483608, -0.017058274, 0.001529755, -0.027984139, 0.02740839, -0.015344413, 0.015264076, -0.01719217, 0.010463926, -0.0067048124, 0.014942727, -0.00026653553, 0.02677908, -0.00036570182, -0.043194655, -0.022855945, -0.011294077, 0.005764197, 0.004910614, -0.0029724778, 0.0056637754, -0.01425986, -0.000008708432, 0.01866502, 0.031626094, 0.0050378144, 0.015451529, 0.009406152, -0.030742384, -0.0024318753, -0.029751558, -0.008348378, 0.0028519721, -0.008388547, -0.010611211, 0.0139519, -0.0006895613, -0.001230164, -0.0062462203, -0.013510046, 0.010617905, -0.010229609, 0.022213247, -0.00610563, -0.00568386, -0.0056503857, 0.02416812, -0.0076253433, 0.015183738, -0.005188447, -0.016080838, 0.013516741, 0.0062897364, -0.0068520973, 0.021396484, 0.007799407, -0.01721895, -0.025266062, 0.013791226, -0.017205559, -0.002068684, 0.032938268, 0.014661547, 0.023552202, -0.005827797, -0.008442105, -0.0074914475, 0.009111582, 0.016817262, -0.0050244248, -0.005871313, -0.008368462, 0.040329296, 0.008683117, 0.031518977, 0.026109602, -0.025815032, 0.011006202, -0.0034310697, 0.019575508, -0.013831395, -0.008676422, -0.008770149, -0.019990584, 0.008750064, 0.02851972, 0.0337952, 0.012666505, 0.021383096, -0.027448557, 0.0035448808, -0.016214734, 0.015197128, -0.027582452, -0.0138046155, -0.03899034, 0.008261346, 0.015478308, 0.017888425, 0.0153979715, 0.010658074, -0.011581952, 0.02530623, 0.017982153, -0.0059449556, 0.0054294583, 0.0022879376, -0.018758746, -0.0076119537, -0.027689569, 0.013463182, 0.011186961, -0.0063165156, 0.028412605, 0.011347636, 0.008709895, -0.003374164, -0.007919913, -0.025828423, 0.0033875536, -0.013831395, -0.0035716598, 0.010450536, -0.025172336, 0.003990083, -0.00093224674, 0.024047613, 0.008027029, -0.0029440252, 0.023458473, 0.016643198, -0.0326437, 0.019147042, 0.01925416, -0.0020151257, 0.0038628823, -0.026738912, 0.0008753412, -0.025105387, 0.0069491714, -0.02623011, 0.027033482, -0.0040737675, -0.021034967, 0.019468391, 0.0026042655, 0.03467891, 0.016107617, -0.0057139862, -0.011735932, 0.017687583, 0.011628816, 0.015090012, -0.006678033, -0.011715848, -0.01833028, 0.008040419, -0.01921399, -0.03267048, -0.005914829, 0.0014435598, -0.0030662047, 0.005479669, 0.01597372, -0.01454104, 0.023257632, 0.019722793, 0.0344379, 0.006929087, -0.043248214, 0.015853215, 0.012766927, -0.007417805, -0.018316891, -0.01163551, -0.017352844, -0.01978974, 0.015304244, -0.00005920687, 0.033580966, -0.0022343795, 0.0047800657, -0.007357552, 0.00033536615, 0.00887057, -0.025654359, 0.016388796, -0.011361024, 0.00019090556, 0.0060119033, -0.010075629, -0.0131485285, 0.01604067, -0.015531867, 0.0035616176, -0.017259117, 0.0035415334, 0.009265562, -0.0043348637, -0.005867966, -0.03283115, -0.004773371, -0.018410617, -0.0095400475, -0.006520706, -0.00414741, 0.031197628, 0.013690805, -0.008984381, -0.022320364, -0.012492441, -0.005724028, 0.09806499, 0.017272506, -0.00007704216, 0.00858939, 0.0030126465, -0.002835235, -0.023753043, -0.025587412, 0.016067449, 0.0024536331, 0.004719813, -0.02908208, 0.027743127, 0.0023414958, 0.0152908545, 0.00552988, -0.031974223, 0.0019582203, 0.010812053, -0.01952195, -0.00006171741, -0.02241409, 0.025252672, 0.013737668, 0.002356559, -0.03719614, 0.021637497, 0.033580966, 0.0044453274, -0.0074378895, -0.014715104, -0.01741979, -0.013489962, -0.003221858, 0.0038561875, -0.013121749, -0.012974464, 0.012619642, 0.053424265, -0.020459218, 0.011581952, 0.041962817, -0.00087032013, -0.0036988605, -0.0010025419, -0.020392269, 0.014902558, 0.021409875, 0.01771436, -0.006483885, 0.036633782, -0.00028808432, 0.011983639, 0.014326808, 0.024931323, 0.002629371, -0.01223804, -0.010972728, -0.011253908, 0.013831395, -0.01748674, -0.013777837, -0.0043449057, -0.009292341, -0.0015849868, -0.019455003, -0.031170849, -0.014393755, -0.03778528, -0.0028335615, -0.00785966, -0.027528895, -0.021008188, -0.03786562, -0.0008226199, -0.005539922, 0.011970249, -0.016937768, -0.0044553694, 0.015839826, -0.014929337, -0.011166876, 0.0031448682, -0.032402687, -0.011207045, -0.009432931, 0.0034059642, -0.00089124124, -0.009439626, -0.012840569, 0.013610467, 0.008877265, 0.006108978, 0.0021289368, 0.039124236, 0.0025557284, -0.004277958, 0.02822515, 0.022373922, -0.00888396, 0.032777593, -0.021610718, -0.010490704, -0.0017222296, -0.011113319, -0.024569806, 0.0024703701, 0.021155473, -0.004555791, -0.0060353354, 0.008241262, -0.03234913, -0.00048076818, -0.0069960346, 0.02910886, 0.013315897, -0.014728494, 0.01454104, -0.00567047, -0.0012602905, 0.0001736456, 0.005302258, -0.0000424961, 0.035589397, -0.01570593, 0.0107919695, 0.0051348885, -0.015331023, -0.0034193539, 0.003625218, -0.010477315, 0.024583196, -0.0030226887, -0.011776101, -0.040115062, -0.009091497, -0.003886314, 0.017888425, -0.03143864, -0.008629559, -0.005533227, -0.017138612, 0.01338954, -0.02681925, -0.006688075, -0.026538068, 0.0050210776, 0.011401193, 0.0076655117, 0.008576, -0.028171593, -0.0022025793, 0.005911482, 0.017205559, -0.02066006, -0.0413469, -0.016910989, 0.0097944485, 0.020807344, 0.030742384, 0.026738912, -0.011628816, 0.03350063, 0.011146792, -0.024556417, 0.019709403, -0.00712993, 0.012110839, -0.044694286, 0.02795736, 0.016777094, -0.0054729744, 0.025975708, 0.0109191695, 0.009821228, 0.012485746, 0.01571932, 0.0018661672, -0.014567819, -0.010972728, 0.0022394005, 0.01626829, 0.0014820547, -0.0030026045, 0.004120631, -0.023699487, 0.040918436, 0.0011640531, -0.0092856465, -0.0180491, 0.03459857, -0.013161918, -0.0036151758, -0.0073910262, 0.0028737301, -0.017968763, -0.016549472, -0.01355691, 0.0031616052, 0.0067516756, 0.0023096956, -0.0076789013, -0.009955123, 0.011233824, -0.0072906045, 0.016402187, 0.009727501, -0.0153979715, 0.020445827, -0.0042980425, -0.024556417, -0.048496913, -0.026886197, -0.047693543, 0.0007615301, -0.013925122, -0.010437147, 0.01483561, -0.0050277724, -0.022266805, 0.02793058, -0.015264076, 0.032563362, 0.00472316, 0.017526908, 0.021061746, -0.013818005, -0.021945456, 0.028573278, -0.0313583, 0.016469134, 0.00013180329, -0.000116426236, -0.0018477566, -0.03722292, -0.002868709, 0.001186648, -0.037463933, -0.046568822, 0.0128004, 0.015197128, 0.013054801, -0.017821478, -0.022320364, -0.022012403, 0.013289118, -0.0043516005, -0.0029808464, -0.01660303, -0.03786562, -0.024877766, -0.013356066, -0.006825318, 0.027582452, -0.0042545265, -0.0017063295, 0.024891155, -0.0049240035, -0.014500872, -0.016803874, 0.008127451, 0.022855945, -0.0014284966, -0.006339947, 0.01604067, 0.0026092867, 0.012057281, -0.008569306, 0.00007374708, 0.02766279, -0.025774864, 0.0047064233, -0.024676923, 0.013938512, -0.002286264, -0.011166876, -0.024074392, -0.018450785, -0.0049842563, 0.0035080595, 0.028305488, 0.033286396, -0.003054489, -0.003272069, -0.024502859, 0.021302758, -0.015558646, -0.006798539, 0.005667123, -0.01716539, 0.003325627, 0.00885718, -0.0047767186, -0.0073843314, -0.0038193662, -0.009352594, 0.0209948, 0.041507576, -0.036526665, -0.0022661798, -0.035401944, 0.012204566, -0.034759246, -0.008850486, -0.0009975208, -0.00022176426, -0.008629559, -0.015357803, -0.01455443, -0.0059416085, -0.01687082, 0.014487483, -0.0008845465, -0.0010284841, 0.02708704, 0.028653616, 0.0033189321, -0.025373178, 0.0036620393, 0.018772135, -0.0031130682, 0.0070495927, -0.00006830758, -0.017674193, 0.000969068, -0.018290112, -0.005546617, 0.0037658082, -0.00016872912, -0.024784038, -0.020860903, 0.02070023, 0.0029138986, -0.036285654, -0.041159447, -0.022106132, -0.018651629, 0.03435756, -0.008194398, -0.020485997, 0.01660303, 0.026270278, 0.0079065235, 0.0015649025, -0.005807713, -0.012733453, -0.0042377897, -0.021891898, -0.0180491, -0.008783538, -0.017111832, 0.005493059, 0.011501615, -0.0025657706, -0.018946199, 0.006052072, -0.0120438915, 0.010644685, -0.005165015, 0.009881481, 0.02677908, -0.0035716598, 0.005449543, 0.021758003, -0.0072035724, 0.010745106, -0.012130924, -0.0011799532, 0.0036620393, -0.0034411119, 0.013028023, 0.045095973, -0.021396484, -0.01895959, 0.016281681, 0.0020050837, 0.008214483, 0.004632781, -0.030501373, -0.019709403, -0.021075137, -0.0027230978, -0.015183738, 0.0008828728, 0.015304244, -0.0034578487, -0.02940343, 0.015344413, 0.00785966, -0.0026260235, -0.008529137, 0.00442859, 0.0013900016, 0.0001500047, -0.024368962, -0.005580091, -0.017205559, -0.0285465, 0.0054729744, -0.0009422889, -0.0076722065, 0.02475726, -0.02241409, -0.016469134, -0.0064370213, 0.00018034037, 0.009044634, -0.0044486746, 0.000060462142, -0.014942727, 0.026658574, -0.0043181265, 0.030046128, -0.042043157, 0.016616419, -0.007170099, 0.02040566, -0.008227873, 0.025975708, -0.027877023, -0.022668492, 0.0051181517, -0.007116541, 0.016522693, -0.0025373178, -0.0018259985, -0.015906774, 0.013858174, -0.019843299, 0.0029942358, -0.01632185, -0.029831896, -0.024007445, -0.0045022327, -0.015946941, 0.030662047, 0.18091947, -0.016576251, 0.003936525, 0.039659817, -0.008160925, 0.021168863, 0.026002487, -0.0043248213, -0.008488968, 0.0125526935, -0.007839575, 0.024020836, -0.014500872, 0.008529137, 0.0011925059, -0.015652372, -0.00050880254, -0.0032017739, -0.006353337, -0.03438434, -0.013208781, -0.0023113694, -0.011608731, -0.015411361, 0.022842556, 0.0013423014, -0.0017356192, -0.005104762, 0.0062395255, 0.0056403438, 0.0061960095, -0.033018608, 0.0053591635, -0.02067345, -0.001453602, -0.013289118, -0.02851972, 0.028118035, 0.0052687842, 0.01338954, -0.0035314912, 0.009673943, 0.009191919, 0.01281379, -0.013992069, 0.008134145, -0.004575875, 0.0015013022, -0.00028620142, 0.03550906, -0.0512016, 0.010477315, 0.008897349, 0.03347385, -0.02471709, 0.0011297425, 0.005851229, -0.019588897, 0.012037196, 0.010182745, 0.0065776114, 0.030233582, -0.01309497, 0.018839084, -0.024623364, 0.0072370465, -0.02241409, 0.03400943, -0.00069207186, -0.014674936, -0.0031833632, -0.024784038, -0.02645773, -0.012793706, -0.0008506542, -0.03583041, -0.012325072, 0.026966535, 0.01018944, 0.013356066, -0.02474387, -0.014326808, -0.007658817, 0.012827179, -0.02740839, -0.015277465, 0.021784782, -0.0015858236, -0.0018460829, 0.0004573365, -0.0057072914, -0.019588897, 0.0058411867, -0.002308022, -0.00066278223, 0.006460453, -0.00038369402, 0.018705187, -0.009078108, -0.020298542, -0.035991084, -0.047211517, 0.018571293, -0.0041775363, -0.008676422, -0.002138979, -0.007504837, -0.00078579865, 0.014621378, -0.0043850746, -0.01455443, -0.015906774, 0.0010176051, 0.006935782, 0.025199115, -0.0038093242, 0.013690805, -0.022253416, 0.036874793, -0.019053316, -0.0044821487, 0.0042377897, 0.005998514, 0.0064102425, 0.008080588, -0.028064476, -0.025239283, 0.0070295087, 0.023083568, -0.028653616, -0.010771885, -0.019280938, -0.005563354, -0.012579473, -0.005258742, 0.0012109166, 0.015531867, -0.017339455, 0.016241511, 0.0069424766, 0.015652372, 0.014380367, 0.006791844, -0.0023967277, 0.037945956, -0.0285465, 0.02128937, 0.0049942983, -0.029831896, -0.023819992, -0.016281681, -0.0031850368, 0.0029691304, -0.0038227136, 0.023645928, -0.036473107, -0.02153038, -0.025279451, -0.010242999, 0.018156216, -0.025413347, 0.0036218707, 0.005111457, -0.014487483, -0.0059784297, -0.013690805, -0.171279, -0.0037222921, 0.01626829, -0.010417062, -0.0007322405, -0.001834367, 0.008776844, -0.012867348, -0.005884703, -0.0027147292, 0.022306973, 0.0042244, -0.049300287, -0.0157461, 0.016054058, 0.002781677, 0.00197161, 0.007980166, -0.014366977, -0.0071834885, 0.021048358, -0.024971493, 0.017955374, -0.007692291, 0.0043683373, 0.018557902, 0.01570593, 0.0027063608, 0.0011791164, -0.03698191, -0.014875779, 0.008455494, 0.016536081, 0.009486489, -0.001415107, 0.002960762, -0.008368462, -0.021878509, -0.022454258, 0.004686339, 0.012392019, 0.04394447, 0.016121006, -0.0068085813, 0.014085797, -0.0022946324, 0.008509053, -0.0063868104, 0.022333752, -0.026591627, 0.006497274, -0.01454104, 0.0080337245, -0.0059014396, 0.01602728, 0.02651129, -0.010738411, 0.014567819, -0.010303251, -0.031010175, -0.03821375, -0.0056403438, -0.00006835988, -0.0011732584, -0.021945456, -0.011146792, -0.023498643, 0.021409875, -0.026712133, -0.004190926, 0.002542339, 0.0062462203, -0.004522317, -0.02967122, 0.008334989, 0.00029415145, -0.018544514, 0.022240026, -0.024261847, -0.021811562, -0.020566333, 0.0390439, -0.025466906, 0.014059017, 0.013476572, -0.007451279, -0.0101760505, -0.021918677, 0.004093852, -0.003772503, 0.034304, -0.029483767, -0.025574021, -0.015893385, -0.003407638, 0.030233582, 0.007799407, 0.00002280406, 0.021838339, -0.01633524, -0.006875529, -0.010229609, 0.0053256894, 0.02011109, -0.010885696, 0.04016862, 0.028760733, 0.015183738, 0.013061496, -0.0073307734, 0.0077324593, 0.007739154, 0.015344413, 0.03783884, -0.012124228, 0.0145276515, -0.00086027797, 0.0006744981, 0.035375167, -0.0044620642, 0.030903058, 0.01567915, -0.0053189946, -0.014045628, -0.018852472, 0.0035683124, -0.09554776, -0.013791226, -0.015116791, 0.0013891648, -0.026377395, 0.019147042, -0.008254652, 0.040623866, -0.01656286, 0.01948178, 0.01310836, -0.006025293, 0.005971735, -0.0051348885, 0.019843299, 0.02007092, -0.027421778, 0.00007709446, 0.0038896615, 0.005737418, 0.010095714, -0.0044988855, 0.011294077, -0.001899641, -0.01567915, 0.007216962, -0.02095463, 0.024797428, -0.0064805374, 0.010691548, 0.01208406, -0.012867348, -0.0057775867, -0.023110347, -0.019588897, 0.0060821986, -0.019374665, -0.0061391043, 0.031331524, -0.018490955, 0.004043641, 0.0032017739, -0.003973346, -0.014982895, 0.008696507, -0.025989097, 0.007156709, 0.013523435, -0.0041139363, -0.03055493, -0.02793058, -0.011106623, -0.02851972, 0.023753043, 0.04689017, 0.0035850494, 0.009834617, 0.0096003, 0.016147785, 0.019856688, 0.0031582578, 0.004666255, -0.00829482, 0.0395527, -0.01077858, -0.020512775, -0.020512775, 0.012057281, 0.027006702, -0.021999015, -0.009633774, 0.02878751, -0.026645185, -0.005057899, -0.016964547, 0.003315585, -0.02910886, -0.008107367, 0.0138046155, -0.023538811, -0.0028804247, -0.03491992, 0.0076789013, -0.03781206, 0.014032238, 0.019642456, 0.021798171, -0.0074780583, -0.01602728, -0.011909996, -0.015183738, 0.0031063734, 0.0016686714, -0.036553446, -0.0018594724, 0.015906774, -0.009225393, 0.006755023, 0.0065776114, 0.0139117325, -0.0045524435, 0.0051583205, -0.049166393, 0.018477565, -0.010182745, 0.0031398472, 0.022614934, 0.0048905294, 0.027234325, -0.005191794, 0.026966535, -0.0012477378, -0.029483767, 0.010303251, -0.0072370465, 0.015505088, -0.015183738, -0.009948429, 0.00054185797, -0.016844042, 0.0015339392, -0.008495663, 0.01105976, 0.008375158, 0.013992069, 0.00698934, 0.0035448808, 0.01427325, 0.0080538085, 0.005382595, -0.021677665, 0.004900572, 0.008977687, -0.034812804, 0.005998514, 0.024984881, 0.0032687215, -0.02795736, 0.009124972, 0.0022778956, 0.0038126716, 0.012646421, 0.0019180516, -0.0128004, 0.013034717, -0.046033237, -0.00021506949, -0.005104762, -0.010309946, 0.0054093744, 0.01632185, -0.005737418, 0.016937768, 0.010945949, -0.018129438, 0.0039532618, -0.0047432445, -0.04051675, 0.03703547, 0.007551701, 0.0031264576, 0.00073935365, 0.012887432, 0.00020000625, 0.003869577, -0.012961075, -0.010443841, 0.038481537, 0.0037089025, -0.013643941, 0.03639277, -0.040329296, -0.022293584, 0.004087157, 0.011709153, 0.014902558, -0.006122367, 0.007852965, 0.003081268, 0.018571293, -0.0077190697, 0.020927852, 0.021195643, -0.00010554723, -0.029055303, 0.006269652, 0.029885454, 0.0060554193, -0.0075583956, 0.0008188541, 0.013041412, -0.006453758, -0.03467891, 0.005814408, 0.015090012, 0.010383588, 0.013818005, 0.018504344, -0.025761476, -0.011856438, 0.0052219206, 0.021423263, 0.00829482, -0.009881481, -0.01326234, -0.0038093242, -0.016951159, -0.005590133, -0.0067115068, -0.03483958, -0.010838833, 0.01717878, 0.038053073, -0.015612204, -0.004231095, 0.008027029, -0.008040419, 0.025667747, -0.005677165, -0.016455745, -0.028010918, 0.024342183, 0.010095714, 0.014219692, 0.016910989, -0.00083266204, 0.010182745, 0.021516992, 0.011950164, -0.03055493, -0.012104144, 0.004468759, -0.006969256, -0.014393755, -0.021342928, 0.0085626105, -0.015665762, 0.0021841687, -0.004234442, 0.030715605, -0.017138612, 0.042712633, 0.0062462203, 0.020057531, 0.008073892, -0.0326437, 0.01250583, 0.024342183, 0.04747931, -0.027020091, 0.0019414834, 0.015170349, -0.016362019, 0.02825193, -0.009566827, -0.039954387, -0.00697595, -0.023927107, -0.0285465, -0.000100316945, -0.024850987, 0.022963062, 0.002122242, 0.027877023, 0.0012870695, -0.018182995, -0.0079266075, 0.016174564, -0.010068934, -0.015090012, -0.0054227635, 0.0051516257, -0.013235561, -0.0075583956, -0.0131485285, 0.039365247, 0.0065575275, -0.011474836, 0.0028268667, -0.004425243, -0.0020703576, -0.010631295, -0.011702458, -0.0038394507, 0.0059784297, 0.032268792, 0.02244087, -0.023458473, -0.0053859423, -0.01925416],\n", + " top_k=3\n", + " )\n", + "\n", + " # Call the query_weaviate_class method\n", + " results = query_weaviate_class(class_name, properties_to_retrieve, query)\n", + "\n", + " if results:\n", + " print(\"Query results:\")\n", + " for result in results:\n", + " print(result)\n", + " else:\n", + " print(\"No results found for the query.\")\n", + "\n", + "# Run the test\n", + "test_query_weaviate_class()\n", + "\n" + ] + } + ], + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} From b904f6f63174937b86427e53f9f361d0ad42a068 Mon Sep 17 00:00:00 2001 From: yyang999 Date: Fri, 17 Nov 2023 11:25:53 -0500 Subject: [PATCH 05/13] Remove .ipynb file --- .../vector_stores/cs6422_test_Yang Yang.ipynb | 474 ------------------ 1 file changed, 474 deletions(-) delete mode 100644 evadb/third_party/vector_stores/cs6422_test_Yang Yang.ipynb diff --git a/evadb/third_party/vector_stores/cs6422_test_Yang Yang.ipynb b/evadb/third_party/vector_stores/cs6422_test_Yang Yang.ipynb deleted file mode 100644 index 9bddc33d5..000000000 --- a/evadb/third_party/vector_stores/cs6422_test_Yang Yang.ipynb +++ /dev/null @@ -1,474 +0,0 @@ -{ - "cells": [ - { - "attachments": {}, - "cell_type": "markdown", - "metadata": { - "id": "uP2JSULAuJpF" - }, - "source": [ - "## CS 6422 Integration of Weaviate with EvaDB Test" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "- CS 6422 EVADB Assignment 1\n", - "- Code Repository Link: https://github.com/hunteritself/evadb/commits/staging\n", - "- Full name: Yang Yang\n", - "- Email: yyang999@gatech.edu\n", - "- GT SSO account name: yyang999\n", - "- GT ID: 903851095" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "R07JHEMtuJpK" - }, - "source": [ - "You will need the Weaviate Python client. If you don't yet have it installed - you can do so with:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "!pip install -U weaviate-client" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "ZW0ZC5KJuJpM" - }, - "source": [ - "### Weaviate instance\n", - "\n", - "For this, you will need a working instance of Weaviate somewhere.\n", - "- Creating a free sandbox instance on Weaviate Cloud Services (https://console.weaviate.cloud/)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "47DC2hLxuJpN" - }, - "source": [ - "#### For using WCS\n", - "\n", - "NOTE: \n", - "- Before you do this, you need to create the instance in WCS and get the credentials. Please refer to the [WCS Quickstart guide](https://weaviate.io/developers/wcs/quickstart).\n", - "- Here I have simplified the initialization steps in weaviate.py.\n" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "id": "NRLMgUtcuJpN" - }, - "outputs": [], - "source": [ - "# For using WCS\n", - "import weaviate\n", - "import json\n", - "import os\n", - "\n", - "client = weaviate.Client(\n", - " url = \"https://cs6422-test-95rmw86w.weaviate.network\", # Replace with your endpoint\n", - " auth_client_secret=weaviate.AuthApiKey(api_key=\"AlONstEyNvdv3SBbmmzjTN0cmqqfu8762cs5\"), # Replace w/ your Weaviate instance API key\n", - " additional_headers = {\n", - " \"X-OpenAI-Api-Key\": \"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\" # Replace with your inference API key\n", - " }\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "5HHvIL49uJpP" - }, - "source": [ - "### Create a class" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "id": "oxeZ3Cr5uJpQ" - }, - "outputs": [], - "source": [ - "def create_weaviate_class(class_name: str, vectorizer: str, module_config: dict, properties: list) -> None:\n", - " # In Weaviate, vector index creation and management is not explicitly done like Pinecone\n", - " # Need to typically define a property in the schema to hold vectors and insert data accordingly\n", - "\n", - " \"\"\"\n", - " Create a Weaviate class with the specified configuration.\n", - "\n", - " Args:\n", - " class_name (str): The name of the class to create, e.g., \"Article\".\n", - " vectorizer (str): The vectorizer module to use, e.g., \"text2vec-cohere\".\n", - " module_config (dict): Configuration for vectorizer and generative module, e.g.,\n", - " {\n", - " \"text2vec-cohere\": {\n", - " \"model\": \"embed-multilingual-v2.0\",\n", - " },\n", - " }\n", - " properties (list): List of dictionaries specifying class properties, e.g.,\n", - " [\n", - " {\n", - " \"name\": \"title\",\n", - " \"dataType\": [\"text\"]\n", - " },\n", - " {\n", - " \"name\": \"body\",\n", - " \"dataType\": [\"text\"]\n", - " },\n", - " ]\n", - "\n", - " Returns:\n", - " None\n", - " \"\"\"\n", - " # Check if the class already exists\n", - " if client.schema.exists(class_name):\n", - " client.schema.delete_class(class_name)\n", - "\n", - " # Define the class object with provided parameters\n", - " class_obj = {\n", - " \"class\": class_name,\n", - " \"vectorizer\": vectorizer,\n", - " \"moduleConfig\": module_config,\n", - " \"properties\": properties\n", - " }\n", - "\n", - "\n", - " # Call the Weaviate API to create the class\n", - " client.schema.create_class(class_obj)\n", - "\n", - " # response = client.schema.get(class_name)\n", - " # Check the response for success or handle any errors\n", - " if client.schema.get(class_name)['class'] == class_name:\n", - " print(f\"Successfully created Weaviate class '{class_name}'\")\n", - " else:\n", - " print(f\"Failed to create Weaviate class '{class_name}'\")\n", - "\n", - " return None" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Successfully created Weaviate class 'Question'\n" - ] - } - ], - "source": [ - "def test_create_weaviate_class():\n", - " # Define the class configuration\n", - " class_name = \"Question\"\n", - " vectorizer = \"text2vec-openai\"\n", - " module_config = {\n", - " \"text2vec-openai\": {}\n", - " }\n", - " properties = [\n", - " {\n", - " \"name\": \"question\",\n", - " \"dataType\": [\"text\"]\n", - " },\n", - " {\n", - " \"name\": \"answer\",\n", - " \"dataType\": [\"text\"]\n", - " },\n", - " {\n", - " \"name\": \"category\",\n", - " \"dataType\": [\"text\"]\n", - " },\n", - " ]\n", - "\n", - " # Call the create_weaviate_class method\n", - " create_weaviate_class(class_name, vectorizer, module_config, properties)\n", - "\n", - "# Run the test\n", - "test_create_weaviate_class()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Delete a class" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "def delete_weaviate_class(class_name: str) -> None:\n", - " \"\"\"\n", - " Delete a Weaviate class and its data.\n", - "\n", - " Args:\n", - " class_name (str): The name of the Weaviate class to delete.\n", - "\n", - " Returns:\n", - " None\n", - " \"\"\"\n", - " # Call the Weaviate API to delete the class\n", - " client.schema.delete_class(class_name)\n", - "\n", - " try:\n", - " # Attempt to retrieve the class, and if it results in an exception,\n", - " # consider the class as successfully deleted.\n", - " client.schema.get(class_name)\n", - " print(f\"Failed to delete Weaviate class '{class_name}'\")\n", - " except Exception as e:\n", - " print(f\"Successfully deleted Weaviate class '{class_name}'\")\n", - "\n", - " return None" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Successfully deleted Weaviate class 'Question'\n" - ] - } - ], - "source": [ - "def test_delete_weaviate_class():\n", - " # Define the name of the class to be deleted\n", - " class_name = \"Question\"\n", - "\n", - " # Call the delete_weaviate_class method\n", - " delete_weaviate_class(class_name)\n", - "\n", - "# Run the test\n", - "test_delete_weaviate_class()\n" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Successfully created Weaviate class 'Question'\n" - ] - } - ], - "source": [ - "test_create_weaviate_class()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "AzG7IkCMuJpQ" - }, - "source": [ - "### Add objects" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [], - "source": [ - "from typing import List\n", - "\n", - "def add_to_weaviate_class(class_name: str, data_objects: List[dict]) -> None:\n", - " \"\"\"\n", - " Add objects to the specified Weaviate class.\n", - "\n", - " Args:\n", - " class_name (str): The name of the Weaviate class to add objects to.\n", - " data_objects (List[dict]): A list of dictionaries, where each dictionary contains property names and values.\n", - "\n", - " Returns:\n", - " None\n", - " \"\"\"\n", - " # Iterate over each data object and add it to the Weaviate class\n", - " for data_object in data_objects:\n", - " client.data_object.create(data_object, class_name)\n", - "\n", - " return None" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [], - "source": [ - "def test_add_to_weaviate_class():\n", - " # Define the class name and data objects\n", - " class_name = \"Question\"\n", - " data_objects = [{\"Category\":\"SCIENCE\",\"Question\":\"This organ removes excess glucose from the blood & stores it as glycogen\",\"Answer\":\"Liver\"},{\"Category\":\"ANIMALS\",\"Question\":\"It's the only living mammal in the order Proboseidea\",\"Answer\":\"Elephant\"},{\"Category\":\"ANIMALS\",\"Question\":\"The gavial looks very much like a crocodile except for this bodily feature\",\"Answer\":\"the nose or snout\"},{\"Category\":\"ANIMALS\",\"Question\":\"Weighing around a ton, the eland is the largest species of this animal in Africa\",\"Answer\":\"Antelope\"},{\"Category\":\"ANIMALS\",\"Question\":\"Heaviest of all poisonous snakes is this North American rattlesnake\",\"Answer\":\"the diamondback rattler\"},{\"Category\":\"SCIENCE\",\"Question\":\"2000 news: the Gunnison sage grouse isn't just another northern sage grouse, but a new one of this classification\",\"Answer\":\"species\"},{\"Category\":\"SCIENCE\",\"Question\":\"A metal that is ductile can be pulled into this while cold & under pressure\",\"Answer\":\"wire\"},{\"Category\":\"SCIENCE\",\"Question\":\"In 1953 Watson & Crick built a model of the molecular structure of this, the gene-carrying substance\",\"Answer\":\"DNA\"},{\"Category\":\"SCIENCE\",\"Question\":\"Changes in the tropospheric layer of this are what gives us weather\",\"Answer\":\"the atmosphere\"},{\"Category\":\"SCIENCE\",\"Question\":\"In 70-degree air, a plane traveling at about 1,130 feet per second breaks it\",\"Answer\":\"Sound barrier\"}]\n", - "\n", - " # Call the add_to_weaviate_class method\n", - " add_to_weaviate_class(class_name, data_objects)\n", - "\n", - "# Run the test\n", - "test_add_to_weaviate_class()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "swXTNGi_uJpR" - }, - "source": [ - "### Queries" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [], - "source": [ - "from dataclasses import dataclass\n", - "\n", - "@dataclass\n", - "class VectorIndexQuery:\n", - " embedding: list\n", - " top_k: int\n", - "\n", - "\n", - "def query_weaviate_class(class_name, properties_to_retrieve, query: VectorIndexQuery) -> List[dict]:\n", - " \"\"\"\n", - " Perform a similarity-based search in Weaviate.\n", - "\n", - " Args:\n", - " class_name (str): The name of the Weaviate class to perform the search on.\n", - " properties_to_retrieve (List[str]): A list of property names to retrieve.\n", - " query (VectorIndexQuery): A query object for similarity search, containing the query vector and top_k.\n", - "\n", - " Returns:\n", - " List[dict]: A list of dictionaries containing the retrieved properties.\n", - " \"\"\"\n", - " try:\n", - " # Define the similarity search query\n", - " response = (\n", - " client.query\n", - " .get(class_name, properties_to_retrieve)\n", - " .with_near_vector({\n", - " \"vector\": query.embedding\n", - " })\n", - " .with_limit(query.top_k)\n", - " .with_additional([\"distance\"])\n", - " .do()\n", - " )\n", - "\n", - " # Check if the response contains data\n", - " data = response.get('data', {})\n", - " if 'Get' not in data or class_name not in data['Get']:\n", - " print(f\"No objects of class {class_name} found.\")\n", - " return []\n", - "\n", - " # Extract the results\n", - " results = data['Get'][class_name]\n", - " print(results)\n", - "\n", - " return results\n", - "\n", - " except Exception as e:\n", - " print(f\"Failed to query Weaviate class '{class_name}'\")\n", - " print(e)\n", - "\n", - " return []" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[{'_additional': {'distance': 0.21892214}, 'question': 'The gavial looks very much like a crocodile except for this bodily feature'}, {'_additional': {'distance': 0.2226482}, 'question': \"It's the only living mammal in the order Proboseidea\"}, {'_additional': {'distance': 0.23982108}, 'question': 'Weighing around a ton, the eland is the largest species of this animal in Africa'}]\n", - "Query results:\n", - "{'_additional': {'distance': 0.21892214}, 'question': 'The gavial looks very much like a crocodile except for this bodily feature'}\n", - "{'_additional': {'distance': 0.2226482}, 'question': \"It's the only living mammal in the order Proboseidea\"}\n", - "{'_additional': {'distance': 0.23982108}, 'question': 'Weighing around a ton, the eland is the largest species of this animal in Africa'}\n" - ] - } - ], - "source": [ - "def test_query_weaviate_class():\n", - " # Define the class name, properties to retrieve, and the query\n", - " class_name = \"Question\"\n", - " properties_to_retrieve = [\"question\"]\n", - " query = VectorIndexQuery(\n", - " embedding=[-0.0125526935, -0.021168863, -0.01076519, -0.02589537, -0.0070362035, 0.019870078, -0.010001986, -0.019120263, 0.00090044655, -0.017393013, 0.021302758, 0.010055545, 0.02937665, -0.003816019, 0.007692291, 0.012385325, 0.032750815, 0.020847514, 0.020311933, -0.022159688, -0.0009924996, 0.009399457, 0.0022226637, -0.029510546, 0.014393755, -0.007223657, 0.018276723, -0.03639277, -0.010001986, -0.022842556, 0.010363504, -0.020927852, -0.006929087, -0.022521207, -0.007652122, -0.011126708, 0.0279038, -0.01721895, 0.016482525, 0.002281243, -0.00169294, 0.009191919, -0.019655844, -0.022869334, -0.012412104, 0.0031967526, -0.0033457114, -0.01483561, -0.03173321, 0.004746592, 0.010095714, 0.007973471, -0.032134898, -0.023739655, -0.008040419, 0.018290112, -0.013637247, -0.008488968, 0.024623364, -0.039365247, -0.0032586793, 0.0009606995, -0.029510546, 0.0063265576, -0.019602288, 0.003081268, 0.013463182, -0.006601043, 0.019910246, -0.01542475, 0.0367409, -0.01193008, 0.012961075, -0.015625594, 0.0062462203, -0.0058646183, -0.0059248717, 0.01889264, 0.008127451, 0.0037155973, 0.037142586, -0.025373178, -0.005503101, 0.014982895, 0.035053816, -0.012432188, -0.017285896, 0.022936283, 0.0024620018, 0.016937768, -0.0062127467, 0.02154377, 0.0066378643, 0.029698, 0.0013071538, 0.0043850746, -0.008040419, 0.024797428, -0.012452273, -0.025132166, -0.0031900578, 0.0000019433794, -0.002378317, -0.008629559, 0.0126732, -0.0022494427, 0.0009623732, 0.0035582704, 0.017312676, -0.024569806, -0.008890655, 0.023056788, 0.014902558, -0.047104403, -0.009011161, -0.030447815, 0.017982153, -0.0042009684, -0.00654079, 0.00069249026, 0.011936775, 0.023378137, 0.025105387, -0.009245478, 0.030929837, 0.00394322, 0.02123581, -0.0042545265, 0.0022578111, -0.017259117, 0.047157962, -0.00022029977, 0.03497348, -0.00072094303, -0.023605758, 0.036499888, -0.015384582, 0.011099929, -0.0139519, -0.03408977, 0.013155223, 0.030501373, -0.026698742, 0.004311432, -0.010236303, 0.011361024, 0.023793213, -0.00014874942, 0.0020352101, 0.0026829292, 0.00989487, 0.0074780583, 0.02734144, 0.003826061, 0.011722542, 0.00712993, -0.013992069, 0.0009406152, 0.010785274, -0.012325072, 0.01692438, 0.010617905, 0.016750315, -0.0070295087, 0.017687583, 0.038320865, 0.020485997, 0.005054551, -0.018812304, 0.0007201062, 0.0015381235, 0.0349467, 0.014728494, 0.050773136, -0.017901815, 0.0027716348, 0.0064704954, 0.026671965, -0.015063233, -0.013536825, 0.016696757, 0.008127451, 0.026966535, 0.029912233, -0.0031431946, 0.015156959, 0.012412104, -0.047907773, 0.022012403, -0.027006702, -0.0069491714, 0.010718327, 0.011976943, -0.008127451, -0.65212417, 0.00024289463, 0.0051214993, -0.013007938, 0.022373922, 0.0337952, -0.0026829292, -0.0110463705, -0.013034717, -0.0012167745, 0.010062239, -0.0023013272, 0.024409132, -0.009118277, -0.020191427, -0.01597372, 0.010115798, -0.030929837, -0.010932559, 0.010912475, -0.0009841312, 0.010571042, -0.008348378, -0.009104887, 0.02711382, 0.0036553445, -0.018263333, -0.030876279, 0.014594599, 0.037704945, -0.030126465, 0.014366977, 0.0055533117, 0.003487975, 0.044988856, 0.009881481, -0.012699978, 0.041132666, 0.01744657, 0.05417408, -0.004686339, 0.016121006, 0.0070495927, 0.015478308, -0.020593112, 0.0012376956, 0.027127208, -0.0051248465, 0.0005979267, 0.0063366, -0.008616169, 0.027877023, -0.00042679158, 0.008442105, 0.00069751136, 0.023806602, 0.029296314, -0.0047332025, 0.027877023, 0.0033005215, 0.014996285, -0.0061424514, 0.00451897, 0.015531867, -0.015317634, 0.044185482, 0.010196134, 0.007504837, 0.012405409, -0.030126465, 0.03821375, 0.0256008, -0.016710145, 0.0032804373, -0.013884953, 0.022775607, 0.030608488, -0.023431696, -0.008502358, 0.008683117, -0.0045490963, -0.0030143203, -0.024074392, 0.00874337, 0.009466405, -0.0072370465, -0.021383096, 0.001360712, 0.020298542, 0.0040168623, 0.008201093, 0.011106623, -0.03202778, 0.0046461704, -0.00088370964, -0.008957602, 0.0057575023, 0.00037407028, 0.017259117, -0.0482559, -0.0049507823, -0.024235068, -0.0014418861, 0.004425243, 0.023244241, 0.0107919695, -0.017058274, 0.0183035, 0.033339955, -0.009091497, 0.000118936776, 0.0031900578, -0.000044483608, -0.017058274, 0.001529755, -0.027984139, 0.02740839, -0.015344413, 0.015264076, -0.01719217, 0.010463926, -0.0067048124, 0.014942727, -0.00026653553, 0.02677908, -0.00036570182, -0.043194655, -0.022855945, -0.011294077, 0.005764197, 0.004910614, -0.0029724778, 0.0056637754, -0.01425986, -0.000008708432, 0.01866502, 0.031626094, 0.0050378144, 0.015451529, 0.009406152, -0.030742384, -0.0024318753, -0.029751558, -0.008348378, 0.0028519721, -0.008388547, -0.010611211, 0.0139519, -0.0006895613, -0.001230164, -0.0062462203, -0.013510046, 0.010617905, -0.010229609, 0.022213247, -0.00610563, -0.00568386, -0.0056503857, 0.02416812, -0.0076253433, 0.015183738, -0.005188447, -0.016080838, 0.013516741, 0.0062897364, -0.0068520973, 0.021396484, 0.007799407, -0.01721895, -0.025266062, 0.013791226, -0.017205559, -0.002068684, 0.032938268, 0.014661547, 0.023552202, -0.005827797, -0.008442105, -0.0074914475, 0.009111582, 0.016817262, -0.0050244248, -0.005871313, -0.008368462, 0.040329296, 0.008683117, 0.031518977, 0.026109602, -0.025815032, 0.011006202, -0.0034310697, 0.019575508, -0.013831395, -0.008676422, -0.008770149, -0.019990584, 0.008750064, 0.02851972, 0.0337952, 0.012666505, 0.021383096, -0.027448557, 0.0035448808, -0.016214734, 0.015197128, -0.027582452, -0.0138046155, -0.03899034, 0.008261346, 0.015478308, 0.017888425, 0.0153979715, 0.010658074, -0.011581952, 0.02530623, 0.017982153, -0.0059449556, 0.0054294583, 0.0022879376, -0.018758746, -0.0076119537, -0.027689569, 0.013463182, 0.011186961, -0.0063165156, 0.028412605, 0.011347636, 0.008709895, -0.003374164, -0.007919913, -0.025828423, 0.0033875536, -0.013831395, -0.0035716598, 0.010450536, -0.025172336, 0.003990083, -0.00093224674, 0.024047613, 0.008027029, -0.0029440252, 0.023458473, 0.016643198, -0.0326437, 0.019147042, 0.01925416, -0.0020151257, 0.0038628823, -0.026738912, 0.0008753412, -0.025105387, 0.0069491714, -0.02623011, 0.027033482, -0.0040737675, -0.021034967, 0.019468391, 0.0026042655, 0.03467891, 0.016107617, -0.0057139862, -0.011735932, 0.017687583, 0.011628816, 0.015090012, -0.006678033, -0.011715848, -0.01833028, 0.008040419, -0.01921399, -0.03267048, -0.005914829, 0.0014435598, -0.0030662047, 0.005479669, 0.01597372, -0.01454104, 0.023257632, 0.019722793, 0.0344379, 0.006929087, -0.043248214, 0.015853215, 0.012766927, -0.007417805, -0.018316891, -0.01163551, -0.017352844, -0.01978974, 0.015304244, -0.00005920687, 0.033580966, -0.0022343795, 0.0047800657, -0.007357552, 0.00033536615, 0.00887057, -0.025654359, 0.016388796, -0.011361024, 0.00019090556, 0.0060119033, -0.010075629, -0.0131485285, 0.01604067, -0.015531867, 0.0035616176, -0.017259117, 0.0035415334, 0.009265562, -0.0043348637, -0.005867966, -0.03283115, -0.004773371, -0.018410617, -0.0095400475, -0.006520706, -0.00414741, 0.031197628, 0.013690805, -0.008984381, -0.022320364, -0.012492441, -0.005724028, 0.09806499, 0.017272506, -0.00007704216, 0.00858939, 0.0030126465, -0.002835235, -0.023753043, -0.025587412, 0.016067449, 0.0024536331, 0.004719813, -0.02908208, 0.027743127, 0.0023414958, 0.0152908545, 0.00552988, -0.031974223, 0.0019582203, 0.010812053, -0.01952195, -0.00006171741, -0.02241409, 0.025252672, 0.013737668, 0.002356559, -0.03719614, 0.021637497, 0.033580966, 0.0044453274, -0.0074378895, -0.014715104, -0.01741979, -0.013489962, -0.003221858, 0.0038561875, -0.013121749, -0.012974464, 0.012619642, 0.053424265, -0.020459218, 0.011581952, 0.041962817, -0.00087032013, -0.0036988605, -0.0010025419, -0.020392269, 0.014902558, 0.021409875, 0.01771436, -0.006483885, 0.036633782, -0.00028808432, 0.011983639, 0.014326808, 0.024931323, 0.002629371, -0.01223804, -0.010972728, -0.011253908, 0.013831395, -0.01748674, -0.013777837, -0.0043449057, -0.009292341, -0.0015849868, -0.019455003, -0.031170849, -0.014393755, -0.03778528, -0.0028335615, -0.00785966, -0.027528895, -0.021008188, -0.03786562, -0.0008226199, -0.005539922, 0.011970249, -0.016937768, -0.0044553694, 0.015839826, -0.014929337, -0.011166876, 0.0031448682, -0.032402687, -0.011207045, -0.009432931, 0.0034059642, -0.00089124124, -0.009439626, -0.012840569, 0.013610467, 0.008877265, 0.006108978, 0.0021289368, 0.039124236, 0.0025557284, -0.004277958, 0.02822515, 0.022373922, -0.00888396, 0.032777593, -0.021610718, -0.010490704, -0.0017222296, -0.011113319, -0.024569806, 0.0024703701, 0.021155473, -0.004555791, -0.0060353354, 0.008241262, -0.03234913, -0.00048076818, -0.0069960346, 0.02910886, 0.013315897, -0.014728494, 0.01454104, -0.00567047, -0.0012602905, 0.0001736456, 0.005302258, -0.0000424961, 0.035589397, -0.01570593, 0.0107919695, 0.0051348885, -0.015331023, -0.0034193539, 0.003625218, -0.010477315, 0.024583196, -0.0030226887, -0.011776101, -0.040115062, -0.009091497, -0.003886314, 0.017888425, -0.03143864, -0.008629559, -0.005533227, -0.017138612, 0.01338954, -0.02681925, -0.006688075, -0.026538068, 0.0050210776, 0.011401193, 0.0076655117, 0.008576, -0.028171593, -0.0022025793, 0.005911482, 0.017205559, -0.02066006, -0.0413469, -0.016910989, 0.0097944485, 0.020807344, 0.030742384, 0.026738912, -0.011628816, 0.03350063, 0.011146792, -0.024556417, 0.019709403, -0.00712993, 0.012110839, -0.044694286, 0.02795736, 0.016777094, -0.0054729744, 0.025975708, 0.0109191695, 0.009821228, 0.012485746, 0.01571932, 0.0018661672, -0.014567819, -0.010972728, 0.0022394005, 0.01626829, 0.0014820547, -0.0030026045, 0.004120631, -0.023699487, 0.040918436, 0.0011640531, -0.0092856465, -0.0180491, 0.03459857, -0.013161918, -0.0036151758, -0.0073910262, 0.0028737301, -0.017968763, -0.016549472, -0.01355691, 0.0031616052, 0.0067516756, 0.0023096956, -0.0076789013, -0.009955123, 0.011233824, -0.0072906045, 0.016402187, 0.009727501, -0.0153979715, 0.020445827, -0.0042980425, -0.024556417, -0.048496913, -0.026886197, -0.047693543, 0.0007615301, -0.013925122, -0.010437147, 0.01483561, -0.0050277724, -0.022266805, 0.02793058, -0.015264076, 0.032563362, 0.00472316, 0.017526908, 0.021061746, -0.013818005, -0.021945456, 0.028573278, -0.0313583, 0.016469134, 0.00013180329, -0.000116426236, -0.0018477566, -0.03722292, -0.002868709, 0.001186648, -0.037463933, -0.046568822, 0.0128004, 0.015197128, 0.013054801, -0.017821478, -0.022320364, -0.022012403, 0.013289118, -0.0043516005, -0.0029808464, -0.01660303, -0.03786562, -0.024877766, -0.013356066, -0.006825318, 0.027582452, -0.0042545265, -0.0017063295, 0.024891155, -0.0049240035, -0.014500872, -0.016803874, 0.008127451, 0.022855945, -0.0014284966, -0.006339947, 0.01604067, 0.0026092867, 0.012057281, -0.008569306, 0.00007374708, 0.02766279, -0.025774864, 0.0047064233, -0.024676923, 0.013938512, -0.002286264, -0.011166876, -0.024074392, -0.018450785, -0.0049842563, 0.0035080595, 0.028305488, 0.033286396, -0.003054489, -0.003272069, -0.024502859, 0.021302758, -0.015558646, -0.006798539, 0.005667123, -0.01716539, 0.003325627, 0.00885718, -0.0047767186, -0.0073843314, -0.0038193662, -0.009352594, 0.0209948, 0.041507576, -0.036526665, -0.0022661798, -0.035401944, 0.012204566, -0.034759246, -0.008850486, -0.0009975208, -0.00022176426, -0.008629559, -0.015357803, -0.01455443, -0.0059416085, -0.01687082, 0.014487483, -0.0008845465, -0.0010284841, 0.02708704, 0.028653616, 0.0033189321, -0.025373178, 0.0036620393, 0.018772135, -0.0031130682, 0.0070495927, -0.00006830758, -0.017674193, 0.000969068, -0.018290112, -0.005546617, 0.0037658082, -0.00016872912, -0.024784038, -0.020860903, 0.02070023, 0.0029138986, -0.036285654, -0.041159447, -0.022106132, -0.018651629, 0.03435756, -0.008194398, -0.020485997, 0.01660303, 0.026270278, 0.0079065235, 0.0015649025, -0.005807713, -0.012733453, -0.0042377897, -0.021891898, -0.0180491, -0.008783538, -0.017111832, 0.005493059, 0.011501615, -0.0025657706, -0.018946199, 0.006052072, -0.0120438915, 0.010644685, -0.005165015, 0.009881481, 0.02677908, -0.0035716598, 0.005449543, 0.021758003, -0.0072035724, 0.010745106, -0.012130924, -0.0011799532, 0.0036620393, -0.0034411119, 0.013028023, 0.045095973, -0.021396484, -0.01895959, 0.016281681, 0.0020050837, 0.008214483, 0.004632781, -0.030501373, -0.019709403, -0.021075137, -0.0027230978, -0.015183738, 0.0008828728, 0.015304244, -0.0034578487, -0.02940343, 0.015344413, 0.00785966, -0.0026260235, -0.008529137, 0.00442859, 0.0013900016, 0.0001500047, -0.024368962, -0.005580091, -0.017205559, -0.0285465, 0.0054729744, -0.0009422889, -0.0076722065, 0.02475726, -0.02241409, -0.016469134, -0.0064370213, 0.00018034037, 0.009044634, -0.0044486746, 0.000060462142, -0.014942727, 0.026658574, -0.0043181265, 0.030046128, -0.042043157, 0.016616419, -0.007170099, 0.02040566, -0.008227873, 0.025975708, -0.027877023, -0.022668492, 0.0051181517, -0.007116541, 0.016522693, -0.0025373178, -0.0018259985, -0.015906774, 0.013858174, -0.019843299, 0.0029942358, -0.01632185, -0.029831896, -0.024007445, -0.0045022327, -0.015946941, 0.030662047, 0.18091947, -0.016576251, 0.003936525, 0.039659817, -0.008160925, 0.021168863, 0.026002487, -0.0043248213, -0.008488968, 0.0125526935, -0.007839575, 0.024020836, -0.014500872, 0.008529137, 0.0011925059, -0.015652372, -0.00050880254, -0.0032017739, -0.006353337, -0.03438434, -0.013208781, -0.0023113694, -0.011608731, -0.015411361, 0.022842556, 0.0013423014, -0.0017356192, -0.005104762, 0.0062395255, 0.0056403438, 0.0061960095, -0.033018608, 0.0053591635, -0.02067345, -0.001453602, -0.013289118, -0.02851972, 0.028118035, 0.0052687842, 0.01338954, -0.0035314912, 0.009673943, 0.009191919, 0.01281379, -0.013992069, 0.008134145, -0.004575875, 0.0015013022, -0.00028620142, 0.03550906, -0.0512016, 0.010477315, 0.008897349, 0.03347385, -0.02471709, 0.0011297425, 0.005851229, -0.019588897, 0.012037196, 0.010182745, 0.0065776114, 0.030233582, -0.01309497, 0.018839084, -0.024623364, 0.0072370465, -0.02241409, 0.03400943, -0.00069207186, -0.014674936, -0.0031833632, -0.024784038, -0.02645773, -0.012793706, -0.0008506542, -0.03583041, -0.012325072, 0.026966535, 0.01018944, 0.013356066, -0.02474387, -0.014326808, -0.007658817, 0.012827179, -0.02740839, -0.015277465, 0.021784782, -0.0015858236, -0.0018460829, 0.0004573365, -0.0057072914, -0.019588897, 0.0058411867, -0.002308022, -0.00066278223, 0.006460453, -0.00038369402, 0.018705187, -0.009078108, -0.020298542, -0.035991084, -0.047211517, 0.018571293, -0.0041775363, -0.008676422, -0.002138979, -0.007504837, -0.00078579865, 0.014621378, -0.0043850746, -0.01455443, -0.015906774, 0.0010176051, 0.006935782, 0.025199115, -0.0038093242, 0.013690805, -0.022253416, 0.036874793, -0.019053316, -0.0044821487, 0.0042377897, 0.005998514, 0.0064102425, 0.008080588, -0.028064476, -0.025239283, 0.0070295087, 0.023083568, -0.028653616, -0.010771885, -0.019280938, -0.005563354, -0.012579473, -0.005258742, 0.0012109166, 0.015531867, -0.017339455, 0.016241511, 0.0069424766, 0.015652372, 0.014380367, 0.006791844, -0.0023967277, 0.037945956, -0.0285465, 0.02128937, 0.0049942983, -0.029831896, -0.023819992, -0.016281681, -0.0031850368, 0.0029691304, -0.0038227136, 0.023645928, -0.036473107, -0.02153038, -0.025279451, -0.010242999, 0.018156216, -0.025413347, 0.0036218707, 0.005111457, -0.014487483, -0.0059784297, -0.013690805, -0.171279, -0.0037222921, 0.01626829, -0.010417062, -0.0007322405, -0.001834367, 0.008776844, -0.012867348, -0.005884703, -0.0027147292, 0.022306973, 0.0042244, -0.049300287, -0.0157461, 0.016054058, 0.002781677, 0.00197161, 0.007980166, -0.014366977, -0.0071834885, 0.021048358, -0.024971493, 0.017955374, -0.007692291, 0.0043683373, 0.018557902, 0.01570593, 0.0027063608, 0.0011791164, -0.03698191, -0.014875779, 0.008455494, 0.016536081, 0.009486489, -0.001415107, 0.002960762, -0.008368462, -0.021878509, -0.022454258, 0.004686339, 0.012392019, 0.04394447, 0.016121006, -0.0068085813, 0.014085797, -0.0022946324, 0.008509053, -0.0063868104, 0.022333752, -0.026591627, 0.006497274, -0.01454104, 0.0080337245, -0.0059014396, 0.01602728, 0.02651129, -0.010738411, 0.014567819, -0.010303251, -0.031010175, -0.03821375, -0.0056403438, -0.00006835988, -0.0011732584, -0.021945456, -0.011146792, -0.023498643, 0.021409875, -0.026712133, -0.004190926, 0.002542339, 0.0062462203, -0.004522317, -0.02967122, 0.008334989, 0.00029415145, -0.018544514, 0.022240026, -0.024261847, -0.021811562, -0.020566333, 0.0390439, -0.025466906, 0.014059017, 0.013476572, -0.007451279, -0.0101760505, -0.021918677, 0.004093852, -0.003772503, 0.034304, -0.029483767, -0.025574021, -0.015893385, -0.003407638, 0.030233582, 0.007799407, 0.00002280406, 0.021838339, -0.01633524, -0.006875529, -0.010229609, 0.0053256894, 0.02011109, -0.010885696, 0.04016862, 0.028760733, 0.015183738, 0.013061496, -0.0073307734, 0.0077324593, 0.007739154, 0.015344413, 0.03783884, -0.012124228, 0.0145276515, -0.00086027797, 0.0006744981, 0.035375167, -0.0044620642, 0.030903058, 0.01567915, -0.0053189946, -0.014045628, -0.018852472, 0.0035683124, -0.09554776, -0.013791226, -0.015116791, 0.0013891648, -0.026377395, 0.019147042, -0.008254652, 0.040623866, -0.01656286, 0.01948178, 0.01310836, -0.006025293, 0.005971735, -0.0051348885, 0.019843299, 0.02007092, -0.027421778, 0.00007709446, 0.0038896615, 0.005737418, 0.010095714, -0.0044988855, 0.011294077, -0.001899641, -0.01567915, 0.007216962, -0.02095463, 0.024797428, -0.0064805374, 0.010691548, 0.01208406, -0.012867348, -0.0057775867, -0.023110347, -0.019588897, 0.0060821986, -0.019374665, -0.0061391043, 0.031331524, -0.018490955, 0.004043641, 0.0032017739, -0.003973346, -0.014982895, 0.008696507, -0.025989097, 0.007156709, 0.013523435, -0.0041139363, -0.03055493, -0.02793058, -0.011106623, -0.02851972, 0.023753043, 0.04689017, 0.0035850494, 0.009834617, 0.0096003, 0.016147785, 0.019856688, 0.0031582578, 0.004666255, -0.00829482, 0.0395527, -0.01077858, -0.020512775, -0.020512775, 0.012057281, 0.027006702, -0.021999015, -0.009633774, 0.02878751, -0.026645185, -0.005057899, -0.016964547, 0.003315585, -0.02910886, -0.008107367, 0.0138046155, -0.023538811, -0.0028804247, -0.03491992, 0.0076789013, -0.03781206, 0.014032238, 0.019642456, 0.021798171, -0.0074780583, -0.01602728, -0.011909996, -0.015183738, 0.0031063734, 0.0016686714, -0.036553446, -0.0018594724, 0.015906774, -0.009225393, 0.006755023, 0.0065776114, 0.0139117325, -0.0045524435, 0.0051583205, -0.049166393, 0.018477565, -0.010182745, 0.0031398472, 0.022614934, 0.0048905294, 0.027234325, -0.005191794, 0.026966535, -0.0012477378, -0.029483767, 0.010303251, -0.0072370465, 0.015505088, -0.015183738, -0.009948429, 0.00054185797, -0.016844042, 0.0015339392, -0.008495663, 0.01105976, 0.008375158, 0.013992069, 0.00698934, 0.0035448808, 0.01427325, 0.0080538085, 0.005382595, -0.021677665, 0.004900572, 0.008977687, -0.034812804, 0.005998514, 0.024984881, 0.0032687215, -0.02795736, 0.009124972, 0.0022778956, 0.0038126716, 0.012646421, 0.0019180516, -0.0128004, 0.013034717, -0.046033237, -0.00021506949, -0.005104762, -0.010309946, 0.0054093744, 0.01632185, -0.005737418, 0.016937768, 0.010945949, -0.018129438, 0.0039532618, -0.0047432445, -0.04051675, 0.03703547, 0.007551701, 0.0031264576, 0.00073935365, 0.012887432, 0.00020000625, 0.003869577, -0.012961075, -0.010443841, 0.038481537, 0.0037089025, -0.013643941, 0.03639277, -0.040329296, -0.022293584, 0.004087157, 0.011709153, 0.014902558, -0.006122367, 0.007852965, 0.003081268, 0.018571293, -0.0077190697, 0.020927852, 0.021195643, -0.00010554723, -0.029055303, 0.006269652, 0.029885454, 0.0060554193, -0.0075583956, 0.0008188541, 0.013041412, -0.006453758, -0.03467891, 0.005814408, 0.015090012, 0.010383588, 0.013818005, 0.018504344, -0.025761476, -0.011856438, 0.0052219206, 0.021423263, 0.00829482, -0.009881481, -0.01326234, -0.0038093242, -0.016951159, -0.005590133, -0.0067115068, -0.03483958, -0.010838833, 0.01717878, 0.038053073, -0.015612204, -0.004231095, 0.008027029, -0.008040419, 0.025667747, -0.005677165, -0.016455745, -0.028010918, 0.024342183, 0.010095714, 0.014219692, 0.016910989, -0.00083266204, 0.010182745, 0.021516992, 0.011950164, -0.03055493, -0.012104144, 0.004468759, -0.006969256, -0.014393755, -0.021342928, 0.0085626105, -0.015665762, 0.0021841687, -0.004234442, 0.030715605, -0.017138612, 0.042712633, 0.0062462203, 0.020057531, 0.008073892, -0.0326437, 0.01250583, 0.024342183, 0.04747931, -0.027020091, 0.0019414834, 0.015170349, -0.016362019, 0.02825193, -0.009566827, -0.039954387, -0.00697595, -0.023927107, -0.0285465, -0.000100316945, -0.024850987, 0.022963062, 0.002122242, 0.027877023, 0.0012870695, -0.018182995, -0.0079266075, 0.016174564, -0.010068934, -0.015090012, -0.0054227635, 0.0051516257, -0.013235561, -0.0075583956, -0.0131485285, 0.039365247, 0.0065575275, -0.011474836, 0.0028268667, -0.004425243, -0.0020703576, -0.010631295, -0.011702458, -0.0038394507, 0.0059784297, 0.032268792, 0.02244087, -0.023458473, -0.0053859423, -0.01925416],\n", - " top_k=3\n", - " )\n", - "\n", - " # Call the query_weaviate_class method\n", - " results = query_weaviate_class(class_name, properties_to_retrieve, query)\n", - "\n", - " if results:\n", - " print(\"Query results:\")\n", - " for result in results:\n", - " print(result)\n", - " else:\n", - " print(\"No results found for the query.\")\n", - "\n", - "# Run the test\n", - "test_query_weaviate_class()\n", - "\n" - ] - } - ], - "metadata": { - "colab": { - "provenance": [] - }, - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.12" - } - }, - "nbformat": 4, - "nbformat_minor": 1 -} From 93669e1f53ec4c29b229f8c1c2ec3520e925b0a2 Mon Sep 17 00:00:00 2001 From: yyang999 Date: Fri, 17 Nov 2023 11:45:34 -0500 Subject: [PATCH 06/13] Update weaviate.py --- evadb/third_party/vector_stores/weaviate.py | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/evadb/third_party/vector_stores/weaviate.py b/evadb/third_party/vector_stores/weaviate.py index 50deecd6d..c574d489d 100644 --- a/evadb/third_party/vector_stores/weaviate.py +++ b/evadb/third_party/vector_stores/weaviate.py @@ -15,7 +15,6 @@ import os from typing import List -from evadb.configuration.configuration_manager import ConfigurationManager from evadb.third_party.vector_stores.types import ( FeaturePayload, VectorIndexQuery, @@ -28,14 +27,12 @@ class WeaviateVectorStore(VectorStore): - def __init__(self) -> None: + def __init__(self, **kwargs) -> None: try_to_import_weaviate_client() global _weaviate_init_done # Get the API key. - self._api_key = ConfigurationManager().get_value( - "third_party", "WEAVIATE_API_KEY" - ) + self._api_key = kwargs.get("WEAVIATE_API_KEY") if not self._api_key: self._api_key = os.environ.get("WEAVIATE_API_KEY") @@ -47,9 +44,7 @@ def __init__(self) -> None: "environment variable (WEAVIATE_API_KEY). It can be found at the Details tab in WCS Dashboard." # Get the API Url. - self._api_url = ConfigurationManager().get_value( - "third_party", "WEAVIATE_API_URL" - ) + self._api_url = kwargs.get("WEAVIATE_API_URL") if not self._api_url: self._api_url = os.environ.get("WEAVIATE_API_URL") From d365c80f18207f7fe620bc2bb9544aa63ab761cb Mon Sep 17 00:00:00 2001 From: yyang999 Date: Fri, 17 Nov 2023 12:32:51 -0500 Subject: [PATCH 07/13] Optimize the code format --- evadb/evadb_config.py | 2 ++ evadb/third_party/vector_stores/types.py | 4 ++-- evadb/third_party/vector_stores/weaviate.py | 8 ++------ 3 files changed, 6 insertions(+), 8 deletions(-) diff --git a/evadb/evadb_config.py b/evadb/evadb_config.py index 9c209c012..6117514b2 100644 --- a/evadb/evadb_config.py +++ b/evadb/evadb_config.py @@ -41,4 +41,6 @@ "MILVUS_PASSWORD": "", "MILVUS_DB_NAME": "", "MILVUS_TOKEN": "", + "WEAVIATE_API_KEY": "", + "WEAVIATE_API_URL": "", } diff --git a/evadb/third_party/vector_stores/types.py b/evadb/third_party/vector_stores/types.py index d337ac894..44b210642 100644 --- a/evadb/third_party/vector_stores/types.py +++ b/evadb/third_party/vector_stores/types.py @@ -14,7 +14,6 @@ # limitations under the License. from dataclasses import dataclass from typing import List -from uuid import uuid5, NAMESPACE_DNS @dataclass @@ -34,6 +33,7 @@ class VectorIndexQueryResult: similarities: List[float] ids: List[int] + class VectorStore: def create(self, vector_dim: int): """Create an index""" @@ -126,4 +126,4 @@ def query_weaviate_class(self, class_name, properties_to_retrieve, query: Vector List[dict]: A list of dictionaries containing the retrieved properties. """ # Implement the logic to query a Weaviate class for similar vectors. - ... \ No newline at end of file + ... diff --git a/evadb/third_party/vector_stores/weaviate.py b/evadb/third_party/vector_stores/weaviate.py index c574d489d..17348c9f4 100644 --- a/evadb/third_party/vector_stores/weaviate.py +++ b/evadb/third_party/vector_stores/weaviate.py @@ -16,7 +16,6 @@ from typing import List from evadb.third_party.vector_stores.types import ( - FeaturePayload, VectorIndexQuery, VectorStore, ) @@ -37,7 +36,6 @@ def __init__(self, **kwargs) -> None: if not self._api_key: self._api_key = os.environ.get("WEAVIATE_API_KEY") - assert ( self._api_key ), "Please set your Weaviate API key in evadb.yml file (third_party, weaviate_api_key) or " \ @@ -54,7 +52,6 @@ def __init__(self, **kwargs) -> None: ), "Please set your Weaviate API Url in evadb.yml file (third_party, weaviate_api_url) or " \ "environment variable (WEAVIATE_API_URL). It can be found at the Details tab in WCS Dashboard." - if not _weaviate_init_done: # Initialize weaviate client import weaviate @@ -112,7 +109,6 @@ def create_weaviate_class(self, class_name: str, vectorizer: str, module_config: "properties": properties } - # Call the Weaviate API to create the class self._client.schema.create_class(class_obj) @@ -143,7 +139,7 @@ def delete_weaviate_class(self, class_name: str) -> None: # consider the class as successfully deleted. self._client.schema.get(class_name) print(f"Failed to delete Weaviate class '{class_name}'") - except Exception as e: + except Exception: print(f"Successfully deleted Weaviate class '{class_name}'") return None @@ -206,4 +202,4 @@ def query_weaviate_class(self, class_name, properties_to_retrieve, query: Vector print(f"Failed to query Weaviate class '{class_name}'") print(e) - return [] \ No newline at end of file + return [] From a9749145b59978a6e7e78ea8dfe3bc567326c876 Mon Sep 17 00:00:00 2001 From: yyang999 Date: Fri, 17 Nov 2023 13:31:10 -0500 Subject: [PATCH 08/13] Optimize the code format --- evadb/third_party/vector_stores/weaviate.py | 68 ++++++--------------- 1 file changed, 18 insertions(+), 50 deletions(-) diff --git a/evadb/third_party/vector_stores/weaviate.py b/evadb/third_party/vector_stores/weaviate.py index 17348c9f4..6332e935a 100644 --- a/evadb/third_party/vector_stores/weaviate.py +++ b/evadb/third_party/vector_stores/weaviate.py @@ -112,15 +112,6 @@ def create_weaviate_class(self, class_name: str, vectorizer: str, module_config: # Call the Weaviate API to create the class self._client.schema.create_class(class_obj) - # response = client.schema.get(class_name) - # Check the response for success or handle any errors - if self._client.schema.get(class_name)['class'] == class_name: - print(f"Successfully created Weaviate class '{class_name}'") - else: - print(f"Failed to create Weaviate class '{class_name}'") - - return None - def delete_weaviate_class(self, class_name: str) -> None: """ Delete a Weaviate class and its data. @@ -134,16 +125,6 @@ def delete_weaviate_class(self, class_name: str) -> None: # Call the Weaviate API to delete the class self._client.schema.delete_class(class_name) - try: - # Attempt to retrieve the class, and if it results in an exception, - # consider the class as successfully deleted. - self._client.schema.get(class_name) - print(f"Failed to delete Weaviate class '{class_name}'") - except Exception: - print(f"Successfully deleted Weaviate class '{class_name}'") - - return None - def add_to_weaviate_class(self, class_name: str, data_objects: List[dict]) -> None: """ Add objects to the specified Weaviate class. @@ -160,8 +141,6 @@ def add_to_weaviate_class(self, class_name: str, data_objects: List[dict]) -> No for data_object in data_objects: self._client.data_object.create(data_object, class_name) - return None - def query_weaviate_class(self, class_name, properties_to_retrieve, query: VectorIndexQuery) -> List[dict]: """ Perform a similarity-based search in Weaviate. @@ -174,32 +153,21 @@ def query_weaviate_class(self, class_name, properties_to_retrieve, query: Vector Returns: List[dict]: A list of dictionaries containing the retrieved properties. """ - try: - # Define the similarity search query - response = ( - self._client.query - .get(class_name, properties_to_retrieve) - .with_near_vector({ - "vector": query.embedding - }) - .with_limit(query.top_k) - .with_additional(["distance"]) - .do() - ) - - # Check if the response contains data - data = response.get('data', {}) - if 'Get' not in data or class_name not in data['Get']: - print(f"No objects of class {class_name} found.") - return [] - - # Extract the results - results = data['Get'][class_name] - - return results - - except Exception as e: - print(f"Failed to query Weaviate class '{class_name}'") - print(e) - - return [] + # Define the similarity search query + response = ( + self._client.query + .get(class_name, properties_to_retrieve) + .with_near_vector({ + "vector": query.embedding + }) + .with_limit(query.top_k) + .with_additional(["distance"]) + .do() + ) + + data = response.get('data', {}) + + # Extract the results + results = data['Get'][class_name] + + return results From f9136ce35486428890247ea64b262533176b8c17 Mon Sep 17 00:00:00 2001 From: yyang999 Date: Sun, 19 Nov 2023 23:08:38 -0500 Subject: [PATCH 09/13] commit local changes before merge --- docs/_toc.yml | 1 + .../reference/vector_databases/weaviate.rst | 31 ++++ evadb/executor/executor_utils.py | 2 +- evadb/third_party/vector_stores/types.py | 75 +--------- evadb/third_party/vector_stores/weaviate.py | 136 +++++------------- script/formatting/spelling.txt | 5 +- setup.py | 1 + .../integration_tests/long/test_similarity.py | 36 +++++ test/markers.py | 6 + 9 files changed, 120 insertions(+), 173 deletions(-) create mode 100644 docs/source/reference/vector_databases/weaviate.rst diff --git a/docs/_toc.yml b/docs/_toc.yml index ca191ce42..6efebf8e3 100644 --- a/docs/_toc.yml +++ b/docs/_toc.yml @@ -89,6 +89,7 @@ parts: - file: source/reference/vector_databases/pgvector - file: source/reference/vector_databases/pinecone - file: source/reference/vector_databases/milvus + - file: source/reference/vector_databases/weaviate - file: source/reference/ai/index title: AI Engines diff --git a/docs/source/reference/vector_databases/weaviate.rst b/docs/source/reference/vector_databases/weaviate.rst new file mode 100644 index 000000000..c964d7e87 --- /dev/null +++ b/docs/source/reference/vector_databases/weaviate.rst @@ -0,0 +1,31 @@ +Weaviate +========== + +Weaviate is an open-source vector database designed for scalability and rich querying capabilities. It allows for semantic search, automated vectorization, and supports large language model (LLM) integration. +The connection to Weaviate is based on the `weaviate-client `_ library. + +Dependency +---------- + +* weaviate-client + +Parameters +---------- + +To use Weaviate, you need an API key and a URL of your Weaviate instance. Here are the `instructions for setting up a Weaviate instance `_. After setting up your instance, you will find the API key and URL on the Details tab in Weaviate Cloud Services (WCS) dashboard. These details are essential for establishing a connection to the Weaviate server. + +* `WEAVIATE_API_KEY` is the API key for your Weaviate instance. +* `WEAVIATE_API_URL` is the URL of your Weaviate instance. + +The above values can either be set via the ``SET`` statement, or in the os environment fields "WEAVIATE_API_KEY", "WEAVIATE_API_URL" + +Create Collection +----------------- + +Weaviate uses collections (similar to 'classes') to store data. To create a collection in Weaviate, use the following SQL command in EvaDB: + +.. code-block:: sql + + CREATE INDEX collection_name ON table_name (data) USING WEAVIATE; + +This command creates a collection in Weaviate with the specified name, linked to the table in EvaDB. You can also specify vectorizer settings and other configurations for the collection as needed. \ No newline at end of file diff --git a/evadb/executor/executor_utils.py b/evadb/executor/executor_utils.py index 9d457c450..26f9d14f8 100644 --- a/evadb/executor/executor_utils.py +++ b/evadb/executor/executor_utils.py @@ -187,7 +187,7 @@ def handle_vector_store_params( } elif vector_store_type == VectorStoreType.WEAVIATE: # Weaviate Configuration - # Weaviate API key and url Can be obtained from cluster details on Weaviate Cloud Services (WCS) dashboard + # Weaviate API key and URL Can be obtained from cluster details on Weaviate Cloud Services (WCS) dashboard return { "WEAVIATE_API_KEY": catalog().get_configuration_catalog_value( "WEAVIATE_API_KEY" diff --git a/evadb/third_party/vector_stores/types.py b/evadb/third_party/vector_stores/types.py index 44b210642..da665fe11 100644 --- a/evadb/third_party/vector_stores/types.py +++ b/evadb/third_party/vector_stores/types.py @@ -53,77 +53,4 @@ def query(self, query: VectorIndexQuery) -> VectorIndexQueryResult: def delete(self): """delete an index""" - ... - - def create_weaviate_class(self, class_name: str, vectorizer: str, module_config: dict, properties: list) -> None: - """ - Create a Weaviate class with the specified configuration. - - Args: - class_name (str): The name of the class to create, e.g., "Article". - vectorizer (str): The vectorizer module to use, e.g., "text2vec-cohere". - module_config (dict): Configuration for vectorizer and generative module, e.g., - { - "text2vec-cohere": { - "model": "embed-multilingual-v2.0", - }, - } - properties (list): List of dictionaries specifying class properties, e.g., - [ - { - "name": "title", - "dataType": ["text"] - }, - { - "name": "body", - "dataType": ["text"] - }, - ] - - Returns: - None - """ - # Implement the logic to create a Weaviate class with the given parameters. - ... - - def delete_weaviate_class(self, class_name: str) -> None: - """ - Delete a Weaviate class and its data. - - Args: - class_name (str): The name of the Weaviate class to delete. - - Returns: - None - """ - # Implement the logic to delete a Weaviate class and its data. - ... - - def add_to_weaviate_class(self, class_name: str, data_objects: List[dict]) -> None: - """ - Add objects to the specified Weaviate class. - - Args: - class_name (str): The name of the Weaviate class to add objects to. - data_objects (List[dict]): A list of dictionaries, where each dictionary contains property names and values. - - Returns: - None - """ - # Implement the logic to add payloads to a Weaviate class. - ... - - def query_weaviate_class(self, class_name, properties_to_retrieve, query: VectorIndexQuery) -> List[dict]: - """ - Perform a similarity-based search in Weaviate. - - Args: - class_name (str): The name of the Weaviate class to perform the search on. - properties_to_retrieve (List[str]): A list of property names to retrieve. - query (VectorIndexQuery): A query object for similarity search, containing the query vector and top_k. - - Returns: - List[dict]: A list of dictionaries containing the retrieved properties. - """ - # Implement the logic to query a Weaviate class for similar vectors. - ... + ... \ No newline at end of file diff --git a/evadb/third_party/vector_stores/weaviate.py b/evadb/third_party/vector_stores/weaviate.py index 6332e935a..5868aee06 100644 --- a/evadb/third_party/vector_stores/weaviate.py +++ b/evadb/third_party/vector_stores/weaviate.py @@ -16,7 +16,9 @@ from typing import List from evadb.third_party.vector_stores.types import ( + FeaturePayload, VectorIndexQuery, + VectorIndexQueryResult, VectorStore, ) from evadb.utils.generic_utils import try_to_import_weaviate_client @@ -24,12 +26,13 @@ required_params = [] _weaviate_init_done = False - class WeaviateVectorStore(VectorStore): - def __init__(self, **kwargs) -> None: + def __init__(self, collection_name: str, **kwargs) -> None: try_to_import_weaviate_client() global _weaviate_init_done + self._collection_name = collection_name + # Get the API key. self._api_key = kwargs.get("WEAVIATE_API_KEY") @@ -38,8 +41,7 @@ def __init__(self, **kwargs) -> None: assert ( self._api_key - ), "Please set your Weaviate API key in evadb.yml file (third_party, weaviate_api_key) or " \ - "environment variable (WEAVIATE_API_KEY). It can be found at the Details tab in WCS Dashboard." + ), "Please set your `WEAVIATE_API_KEY` using set command or environment variable (WEAVIATE_API_KEY). It can be found at the Details tab in WCS Dashboard." # Get the API Url. self._api_url = kwargs.get("WEAVIATE_API_URL") @@ -49,8 +51,7 @@ def __init__(self, **kwargs) -> None: assert ( self._api_url - ), "Please set your Weaviate API Url in evadb.yml file (third_party, weaviate_api_url) or " \ - "environment variable (WEAVIATE_API_URL). It can be found at the Details tab in WCS Dashboard." + ), "Please set your `WEAVIATE_API_URL` using set command or environment variable (WEAVIATE_API_URL). It can be found at the Details tab in WCS Dashboard." if not _weaviate_init_done: # Initialize weaviate client @@ -66,108 +67,49 @@ def __init__(self, **kwargs) -> None: self._client = client - def create_weaviate_class(self, class_name: str, vectorizer: str, module_config: dict, properties: list) -> None: - # In Weaviate, vector index creation and management is not explicitly done like Pinecone - # Need to typically define a property in the schema to hold vectors and insert data accordingly - - """ - Create a Weaviate class with the specified configuration. - - Args: - class_name (str): The name of the class to create, e.g., "Article". - vectorizer (str): The vectorizer module to use, e.g., "text2vec-cohere". - module_config (dict): Configuration for vectorizer and generative module, e.g., - { - "text2vec-cohere": { - "model": "embed-multilingual-v2.0", - }, - } - properties (list): List of dictionaries specifying class properties, e.g., - [ - { - "name": "title", - "dataType": ["text"] - }, - { - "name": "body", - "dataType": ["text"] - }, - ] - - Returns: - None - """ - # Check if the class already exists - if self._client.schema.exists(class_name): - self._client.schema.delete_class(class_name) - - # Define the class object with provided parameters - class_obj = { - "class": class_name, - "vectorizer": vectorizer, - "moduleConfig": module_config, - "properties": properties + def create(self, vectorizer: str = 'text2vec-openai', properties: list = None, module_config: dict = None): + properties = properties or [] + module_config = module_config or {} + + collection_obj = { + 'class': self._collection_name, + 'properties': properties, + 'vectorizer': vectorizer, + 'moduleConfig': module_config } - # Call the Weaviate API to create the class - self._client.schema.create_class(class_obj) - - def delete_weaviate_class(self, class_name: str) -> None: - """ - Delete a Weaviate class and its data. - - Args: - class_name (str): The name of the Weaviate class to delete. - - Returns: - None - """ - # Call the Weaviate API to delete the class - self._client.schema.delete_class(class_name) - - def add_to_weaviate_class(self, class_name: str, data_objects: List[dict]) -> None: - """ - Add objects to the specified Weaviate class. - - Args: - class_name (str): The name of the Weaviate class to add objects to. - data_objects (List[dict]): A list of dictionaries, - where each dictionary contains property names and values. - - Returns: - None - """ - # Iterate over each data object and add it to the Weaviate class - for data_object in data_objects: - self._client.data_object.create(data_object, class_name) - - def query_weaviate_class(self, class_name, properties_to_retrieve, query: VectorIndexQuery) -> List[dict]: - """ - Perform a similarity-based search in Weaviate. - - Args: - class_name (str): The name of the Weaviate class to perform the search on. - properties_to_retrieve (List[str]): A list of property names to retrieve. - query (VectorIndexQuery): A query object for similarity search, containing the query vector and top_k. - - Returns: - List[dict]: A list of dictionaries containing the retrieved properties. - """ - # Define the similarity search query + if self._client.schema.exists(self._collection_name): + self._client.schema.delete_class(self._collection_name) + + self._client.schema.create_class(collection_obj) + + def add(self, payload: List[FeaturePayload]) -> None: + with self._client.batch as batch: + for item in payload: + data_object = { + "id": item.id, + "vector": item.embedding + } + batch.add_data_object(data_object, self._collection_name) + + def delete(self) -> None: + self._client.schema.delete_class(self._collection_name) + + def query(self, query: VectorIndexQuery) -> VectorIndexQueryResult: response = ( self._client.query - .get(class_name, properties_to_retrieve) + .get(self._collection_name, ['*']) .with_near_vector({ "vector": query.embedding }) .with_limit(query.top_k) - .with_additional(["distance"]) .do() ) data = response.get('data', {}) + results = data.get('Get', {}).get(self._collection_name, []) - # Extract the results - results = data['Get'][class_name] + similarities = [item['_additional']['distance'] for item in results] + ids = [item['id'] for item in results] - return results + return VectorIndexQueryResult(similarities, ids) \ No newline at end of file diff --git a/script/formatting/spelling.txt b/script/formatting/spelling.txt index e55954852..1dd5566ca 100644 --- a/script/formatting/spelling.txt +++ b/script/formatting/spelling.txt @@ -975,11 +975,13 @@ VideoFormat VideoStorageEngineTest VideoWriter VisionEncoderDecoderModel +WEAVIATE WH WIP WMV WeakValueDictionary -WEAVIATE +Weaviate +WeaviateVectorStore XGBoost XdistTests Xeon @@ -1732,6 +1734,7 @@ testRayErrorHandling testSimilarityFeatureTable testSimilarityImageDataset testSimilarityTable +testWeaviateIndexImageDataset testcase testcases testdeleteone diff --git a/setup.py b/setup.py index 8f3506323..12d6a98e8 100644 --- a/setup.py +++ b/setup.py @@ -174,6 +174,7 @@ def read(path, encoding="utf-8"): "pinecone": pinecone_libs, "chromadb": chromadb_libs, "milvus": milvus_libs, + "weaviate": weaviate_libs, "postgres": postgres_libs, "ludwig": ludwig_libs, "sklearn": sklearn_libs, diff --git a/test/integration_tests/long/test_similarity.py b/test/integration_tests/long/test_similarity.py index 81d6054fe..fd8dbd02b 100644 --- a/test/integration_tests/long/test_similarity.py +++ b/test/integration_tests/long/test_similarity.py @@ -20,6 +20,7 @@ milvus_skip_marker, pinecone_skip_marker, qdrant_skip_marker, + weaviate_skip_marker, ) from test.util import ( create_sample_image, @@ -142,6 +143,13 @@ def setUp(self): # use default Milvus database for testing os.environ["MILVUS_DB_NAME"] = "default" + # Weaviate: WEAVIATE_API_KEY and WEAVIATE_API_URL + self.original_weaviate_key = os.environ.get("WEAVIATE_API_KEY") + self.original_weaviate_env = os.environ.get("WEAVIATE_API_URL") + + os.environ["WEAVIATE_API_KEY"] = "NM4adxLmhtJDF1dPXDiNhEGTN7hhGDpymmO0" + os.environ["WEAVIATE_API_URL"] = "https://cs6422-test2-zn83syib.weaviate.network" + def tearDown(self): shutdown_ray() @@ -580,3 +588,31 @@ def test_end_to_end_index_scan_should_work_correctly_on_image_dataset_milvus( # Cleanup drop_query = "DROP INDEX testMilvusIndexImageDataset" execute_query_fetch_all(self.evadb, drop_query) + + @pytest.mark.skip(reason="Requires running Weaviate instance") + @weaviate_skip_marker + def test_end_to_end_index_scan_should_work_correctly_on_image_dataset_weaviate(self): + for _ in range(2): + create_index_query = """CREATE INDEX testWeaviateIndexImageDataset + ON testSimilarityImageDataset (DummyFeatureExtractor(data)) + USING WEAVIATE;""" + execute_query_fetch_all(self.evadb, create_index_query) + + select_query = """SELECT _row_id FROM testSimilarityImageDataset + ORDER BY Similarity(DummyFeatureExtractor(Open("{}")), DummyFeatureExtractor(data)) + LIMIT 1;""".format( + self.img_path + ) + explain_batch = execute_query_fetch_all( + self.evadb, f"EXPLAIN {select_query}" + ) + self.assertTrue("VectorIndexScan" in explain_batch.frames[0][0]) + + res_batch = execute_query_fetch_all(self.evadb, select_query) + self.assertEqual( + res_batch.frames["testsimilarityimagedataset._row_id"][0], 5 + ) + + # Cleanup + drop_query = "DROP INDEX testWeaviateIndexImageDataset" + execute_query_fetch_all(self.evadb, drop_query) diff --git a/test/markers.py b/test/markers.py index 8273f5f0f..deefadb29 100644 --- a/test/markers.py +++ b/test/markers.py @@ -28,6 +28,7 @@ is_pinecone_available, is_qdrant_available, is_replicate_available, + is_weaviate_available, ) asyncio_skip_marker = pytest.mark.skipif( @@ -54,6 +55,11 @@ reason="Skipping since pymilvus is not installed", ) +weaviate_skip_marker = pytest.mark.skipif( + is_weaviate_available() is False, + reason="Skipping since weaviate is not installed", +) + windows_skip_marker = pytest.mark.skipif( sys.platform == "win32", reason="Test case not supported on Windows" ) From 07239d9cf422e37e14be265cafcc0491b3572815 Mon Sep 17 00:00:00 2001 From: yyang999 Date: Mon, 20 Nov 2023 09:43:53 -0500 Subject: [PATCH 10/13] Apply code formatting --- evadb/catalog/catalog_type.py | 1 + evadb/third_party/vector_stores/types.py | 2 +- evadb/third_party/vector_stores/weaviate.py | 38 ++++++++++----------- evadb/utils/generic_utils.py | 4 +++ 4 files changed, 25 insertions(+), 20 deletions(-) diff --git a/evadb/catalog/catalog_type.py b/evadb/catalog/catalog_type.py index c4f29553e..5da568779 100644 --- a/evadb/catalog/catalog_type.py +++ b/evadb/catalog/catalog_type.py @@ -120,6 +120,7 @@ class VectorStoreType(EvaDBEnum): WEAVIATE # noqa: F821 MILVUS # noqa: F821 + class VideoColumnName(EvaDBEnum): name # noqa: F821 id # noqa: F821 diff --git a/evadb/third_party/vector_stores/types.py b/evadb/third_party/vector_stores/types.py index da665fe11..cdfc10e26 100644 --- a/evadb/third_party/vector_stores/types.py +++ b/evadb/third_party/vector_stores/types.py @@ -53,4 +53,4 @@ def query(self, query: VectorIndexQuery) -> VectorIndexQueryResult: def delete(self): """delete an index""" - ... \ No newline at end of file + ... diff --git a/evadb/third_party/vector_stores/weaviate.py b/evadb/third_party/vector_stores/weaviate.py index 5868aee06..073d53031 100644 --- a/evadb/third_party/vector_stores/weaviate.py +++ b/evadb/third_party/vector_stores/weaviate.py @@ -26,6 +26,7 @@ required_params = [] _weaviate_init_done = False + class WeaviateVectorStore(VectorStore): def __init__(self, collection_name: str, **kwargs) -> None: try_to_import_weaviate_client() @@ -67,15 +68,20 @@ def __init__(self, collection_name: str, **kwargs) -> None: self._client = client - def create(self, vectorizer: str = 'text2vec-openai', properties: list = None, module_config: dict = None): + def create( + self, + vectorizer: str = "text2vec-openai", + properties: list = None, + module_config: dict = None, + ): properties = properties or [] module_config = module_config or {} collection_obj = { - 'class': self._collection_name, - 'properties': properties, - 'vectorizer': vectorizer, - 'moduleConfig': module_config + "class": self._collection_name, + "properties": properties, + "vectorizer": vectorizer, + "moduleConfig": module_config, } if self._client.schema.exists(self._collection_name): @@ -86,10 +92,7 @@ def create(self, vectorizer: str = 'text2vec-openai', properties: list = None, m def add(self, payload: List[FeaturePayload]) -> None: with self._client.batch as batch: for item in payload: - data_object = { - "id": item.id, - "vector": item.embedding - } + data_object = {"id": item.id, "vector": item.embedding} batch.add_data_object(data_object, self._collection_name) def delete(self) -> None: @@ -97,19 +100,16 @@ def delete(self) -> None: def query(self, query: VectorIndexQuery) -> VectorIndexQueryResult: response = ( - self._client.query - .get(self._collection_name, ['*']) - .with_near_vector({ - "vector": query.embedding - }) + self._client.query.get(self._collection_name, ["*"]) + .with_near_vector({"vector": query.embedding}) .with_limit(query.top_k) .do() ) - data = response.get('data', {}) - results = data.get('Get', {}).get(self._collection_name, []) + data = response.get("data", {}) + results = data.get("Get", {}).get(self._collection_name, []) - similarities = [item['_additional']['distance'] for item in results] - ids = [item['id'] for item in results] + similarities = [item["_additional"]["distance"] for item in results] + ids = [item["id"] for item in results] - return VectorIndexQueryResult(similarities, ids) \ No newline at end of file + return VectorIndexQueryResult(similarities, ids) diff --git a/evadb/utils/generic_utils.py b/evadb/utils/generic_utils.py index 1a9a441d8..426719f87 100644 --- a/evadb/utils/generic_utils.py +++ b/evadb/utils/generic_utils.py @@ -572,6 +572,7 @@ def try_to_import_chromadb_client(): Please install it with 'pip install chromadb`.""" ) + def try_to_import_weaviate_client(): try: import weaviate # noqa: F401 @@ -581,6 +582,7 @@ def try_to_import_weaviate_client(): Please install it with 'pip install weaviate-client`.""" ) + def try_to_import_milvus_client(): try: import pymilvus # noqa: F401 @@ -614,6 +616,7 @@ def is_chromadb_available() -> bool: except ValueError: # noqa: E722 return False + def is_weaviate_available() -> bool: try: try_to_import_weaviate_client() @@ -621,6 +624,7 @@ def is_weaviate_available() -> bool: except ValueError: # noqa: E722 return False + def is_milvus_available() -> bool: try: try_to_import_milvus_client() From 5043876b0b85451217ebc0a781267cb76d55f9fe Mon Sep 17 00:00:00 2001 From: yyang999 Date: Mon, 20 Nov 2023 12:30:11 -0500 Subject: [PATCH 11/13] Manually fix whitespace issue --- .../integration_tests/long/test_similarity.py | 24 +++++++------------ 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/test/integration_tests/long/test_similarity.py b/test/integration_tests/long/test_similarity.py index fd8dbd02b..adefae04d 100644 --- a/test/integration_tests/long/test_similarity.py +++ b/test/integration_tests/long/test_similarity.py @@ -84,16 +84,7 @@ def setUp(self): for i in range(5): storage_engine.write( base_table_catalog_entry, - Batch( - pd.DataFrame( - [ - { - "data_col": base_img, - "dummy": i, - } - ] - ) - ), + Batch(pd.DataFrame([{"data_col": base_img, "dummy": i, }])), ) storage_engine.write( feature_table_catalog_entry, @@ -143,12 +134,13 @@ def setUp(self): # use default Milvus database for testing os.environ["MILVUS_DB_NAME"] = "default" - # Weaviate: WEAVIATE_API_KEY and WEAVIATE_API_URL self.original_weaviate_key = os.environ.get("WEAVIATE_API_KEY") self.original_weaviate_env = os.environ.get("WEAVIATE_API_URL") os.environ["WEAVIATE_API_KEY"] = "NM4adxLmhtJDF1dPXDiNhEGTN7hhGDpymmO0" - os.environ["WEAVIATE_API_URL"] = "https://cs6422-test2-zn83syib.weaviate.network" + os.environ[ + "WEAVIATE_API_URL" + ] = "https://cs6422-test2-zn83syib.weaviate.network" def tearDown(self): shutdown_ray() @@ -561,9 +553,7 @@ def test_end_to_end_index_scan_should_work_correctly_on_image_dataset_pinecone( @pytest.mark.skip(reason="Requires running local Milvus instance") @milvus_skip_marker - def test_end_to_end_index_scan_should_work_correctly_on_image_dataset_milvus( - self, - ): + def test_end_to_end_index_scan_should_work_correctly_on_image_dataset_milvus(self,): for _ in range(2): create_index_query = """CREATE INDEX testMilvusIndexImageDataset ON testSimilarityImageDataset (DummyFeatureExtractor(data)) @@ -591,7 +581,9 @@ def test_end_to_end_index_scan_should_work_correctly_on_image_dataset_milvus( @pytest.mark.skip(reason="Requires running Weaviate instance") @weaviate_skip_marker - def test_end_to_end_index_scan_should_work_correctly_on_image_dataset_weaviate(self): + def test_end_to_end_index_scan_should_work_correctly_on_image_dataset_weaviate( + self, + ): for _ in range(2): create_index_query = """CREATE INDEX testWeaviateIndexImageDataset ON testSimilarityImageDataset (DummyFeatureExtractor(data)) From 98722b213703416beba19d9250297703a527c026 Mon Sep 17 00:00:00 2001 From: Andy Xu Date: Tue, 21 Nov 2023 00:07:36 -0500 Subject: [PATCH 12/13] Fix linter --- test/integration_tests/long/test_similarity.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/test/integration_tests/long/test_similarity.py b/test/integration_tests/long/test_similarity.py index adefae04d..2a8d52cf8 100644 --- a/test/integration_tests/long/test_similarity.py +++ b/test/integration_tests/long/test_similarity.py @@ -84,7 +84,16 @@ def setUp(self): for i in range(5): storage_engine.write( base_table_catalog_entry, - Batch(pd.DataFrame([{"data_col": base_img, "dummy": i, }])), + Batch( + pd.DataFrame( + [ + { + "data_col": base_img, + "dummy": i, + } + ] + ) + ), ) storage_engine.write( feature_table_catalog_entry, @@ -553,7 +562,9 @@ def test_end_to_end_index_scan_should_work_correctly_on_image_dataset_pinecone( @pytest.mark.skip(reason="Requires running local Milvus instance") @milvus_skip_marker - def test_end_to_end_index_scan_should_work_correctly_on_image_dataset_milvus(self,): + def test_end_to_end_index_scan_should_work_correctly_on_image_dataset_milvus( + self, + ): for _ in range(2): create_index_query = """CREATE INDEX testMilvusIndexImageDataset ON testSimilarityImageDataset (DummyFeatureExtractor(data)) From 2fac6524c9880451b8b45282aad808bd1db737e5 Mon Sep 17 00:00:00 2001 From: Andy Xu Date: Tue, 21 Nov 2023 00:12:27 -0500 Subject: [PATCH 13/13] Fix link --- docs/source/reference/databases/hackernews.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/reference/databases/hackernews.rst b/docs/source/reference/databases/hackernews.rst index d96112e81..cc5bc97db 100644 --- a/docs/source/reference/databases/hackernews.rst +++ b/docs/source/reference/databases/hackernews.rst @@ -18,7 +18,7 @@ Required: Optional: -* ``tags`` is the tag used for filtering the query results. Check `available tags `_ to see a list of available filter tags. +* ``tags`` is the tag used for filtering the query results. Check `available tags `_ to see a list of available filter tags. Create Connection -----------------