-
Notifications
You must be signed in to change notification settings - Fork 115
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #334 from Shreyanand/milvus
Add Milvus database compatibility with the RAG recipe
- Loading branch information
Showing
14 changed files
with
216 additions
and
111 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
81 changes: 81 additions & 0 deletions
81
recipes/natural_language_processing/rag/app/manage_vectordb.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,81 @@ | ||
from langchain_community.vectorstores import Chroma | ||
from chromadb import HttpClient | ||
from chromadb.config import Settings | ||
import chromadb.utils.embedding_functions as embedding_functions | ||
from langchain.embeddings.sentence_transformer import SentenceTransformerEmbeddings | ||
from langchain_community.vectorstores import Milvus | ||
from pymilvus import MilvusClient | ||
from pymilvus import connections, utility | ||
|
||
class VectorDB: | ||
def __init__(self, vector_vendor, host, port, collection_name, embedding_model): | ||
self.vector_vendor = vector_vendor | ||
self.host = host | ||
self.port = port | ||
self.collection_name = collection_name | ||
self.embedding_model = embedding_model | ||
|
||
def connect(self): | ||
# Connection logic | ||
print(f"Connecting to {self.host}:{self.port}...") | ||
if self.vector_vendor == "chromadb": | ||
self.client = HttpClient(host=self.host, | ||
port=self.port, | ||
settings=Settings(allow_reset=True,)) | ||
elif self.vector_vendor == "milvus": | ||
self.client = MilvusClient(uri=f"http://{self.host}:{self.port}") | ||
return self.client | ||
|
||
def populate_db(self, documents): | ||
# Logic to populate the VectorDB with vectors | ||
e = SentenceTransformerEmbeddings(model_name=self.embedding_model) | ||
print(f"Populating VectorDB with vectors...") | ||
if self.vector_vendor == "chromadb": | ||
embedding_func = embedding_functions.SentenceTransformerEmbeddingFunction(model_name=self.embedding_model) | ||
collection = self.client.get_or_create_collection(self.collection_name, | ||
embedding_function=embedding_func) | ||
if collection.count() < 1: | ||
db = Chroma.from_documents( | ||
documents=documents, | ||
embedding=e, | ||
collection_name=self.collection_name, | ||
client=self.client | ||
) | ||
print("DB populated") | ||
else: | ||
db = Chroma(client=self.client, | ||
collection_name=self.collection_name, | ||
embedding_function=e, | ||
) | ||
print("DB already populated") | ||
|
||
elif self.vector_vendor == "milvus": | ||
connections.connect(host=self.host, port=self.port) | ||
if not utility.has_collection(self.collection_name): | ||
print("Populating VectorDB with vectors...") | ||
db = Milvus.from_documents( | ||
documents, | ||
e, | ||
collection_name=self.collection_name, | ||
connection_args={"host": self.host, "port": self.port}, | ||
) | ||
print("DB populated") | ||
else: | ||
print("DB already populated") | ||
db = Milvus( | ||
e, | ||
collection_name=self.collection_name, | ||
connection_args={"host": self.host, "port": self.port}, | ||
) | ||
return db | ||
|
||
def clear_db(self): | ||
print(f"Clearing VectorDB...") | ||
try: | ||
if self.vector_vendor == "chromadb": | ||
self.client.delete_collection(self.collection_name) | ||
elif self.vector_vendor == "milvus": | ||
self.client.drop_collection(self.collection_name) | ||
print("Cleared DB") | ||
except: | ||
print("Couldn't clear the collection possibly because it doesn't exist") |
36 changes: 0 additions & 36 deletions
36
recipes/natural_language_processing/rag/app/populate_vectordb.py
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,4 +4,5 @@ chromadb | |
sentence-transformers | ||
streamlit | ||
pypdf | ||
pymilvus | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,12 +1,10 @@ | ||
# Directory to store vector_dbs files | ||
This directory has make files and container files for open source vector databases. The built container images are used by recipes like `rag` to provide required database functions. | ||
|
||
[Chroma](https://www.trychroma.com/) is the open-source embedding database. | ||
## Chroma | ||
[Chroma](https://www.trychroma.com/) is an AI-native open-source embedding database. | ||
Chroma makes it easy to build LLM apps by making knowledge, facts, and skills | ||
pluggable for LLMs. | ||
|
||
chromadb is an the AI-native open-source embedding database. | ||
|
||
This container image is used by recipes like `rag` to provide required database | ||
functions. | ||
|
||
Use the included Makefile to build the container image. | ||
## Milvus | ||
[Milvus](https://milvus.io/) is an open-source vector database built to power embedding similarity search and AI applications. It is highly scalable and offers many production ready features for search. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
FROM docker.io/milvusdb/milvus:master-20240426-bed6363f | ||
ADD embedEtcd.yaml /milvus/configs/embedEtcd.yaml |
Oops, something went wrong.