From c151a502a7013d494014aba96a58b9fe515b0d94 Mon Sep 17 00:00:00 2001 From: Arjun Bingly Date: Thu, 21 Mar 2024 14:08:14 -0400 Subject: [PATCH 01/20] Refactor attributes from multivec_retriever for consistency. --- projects/Retriver-GUI/retriever_app.py | 14 +++---- src/grag/components/multivec_retriever.py | 45 +++++++++-------------- src/grag/rag/basic_rag.py | 2 +- src/tests/rag/basic_rag_test.py | 2 +- 4 files changed, 26 insertions(+), 37 deletions(-) diff --git a/projects/Retriver-GUI/retriever_app.py b/projects/Retriver-GUI/retriever_app.py index f55c0c6..9f4198c 100644 --- a/projects/Retriver-GUI/retriever_app.py +++ b/projects/Retriver-GUI/retriever_app.py @@ -46,7 +46,7 @@ def render_search_results(self): st.write(result.metadata) def check_connection(self): - response = self.app.retriever.client.test_connection() + response = self.app.retriever.vectordb.test_connection() if response: return True else: @@ -55,14 +55,14 @@ def check_connection(self): def render_stats(self): st.write(f''' **Chroma Client Details:** \n - Host Address : {self.app.retriever.client.host}:{self.app.retriever.client.port} \n - Collection Name : {self.app.retriever.client.collection_name} \n - Embeddings Type : {self.app.retriever.client.embedding_type} \n - Embeddings Model: {self.app.retriever.client.embedding_model} \n - Number of docs : {self.app.retriever.client.collection.count()} \n + Host Address : {self.app.retriever.vectordb.host}:{self.app.retriever.vectordb.port} \n + Collection Name : {self.app.retriever.vectordb.collection_name} \n + Embeddings Type : {self.app.retriever.vectordb.embedding_type} \n + Embeddings Model: {self.app.retriever.vectordb.embedding_model} \n + Number of docs : {self.app.retriever.vectordb.collection.count()} \n ''') if st.button('Check Connection'): - response = self.app.retriever.client.test_connection() + response = self.app.retriever.vectordb.test_connection() if response: st.write(':green[Connection Active]') else: diff --git a/src/grag/components/multivec_retriever.py b/src/grag/components/multivec_retriever.py index 18ed752..b57a67f 100644 --- a/src/grag/components/multivec_retriever.py +++ b/src/grag/components/multivec_retriever.py @@ -2,9 +2,10 @@ import uuid from typing import List -from grag.components.chroma_client import ChromaClient from grag.components.text_splitter import TextSplitter from grag.components.utils import get_config +from grag.components.vectordb.base import VectorDB +from grag.components.vectordb.chroma_client import ChromaClient from langchain.retrievers.multi_vector import MultiVectorRetriever from langchain.storage import LocalFileStore from langchain_core.documents import Document @@ -20,7 +21,7 @@ class Retriever: Attributes: store_path: Path to the local file store id_key: A key prefix for identifying documents - client: ChromaClient class instance from components.chroma_client + vectordb: ChromaClient class instance from components.client store: langchain.storage.LocalFileStore object, stores the key value pairs of document id and parent file retriever: langchain.retrievers.multi_vector.MultiVectorRetriever class instance, langchain's multi-vector retriever splitter: TextSplitter class instance from components.text_splitter @@ -30,11 +31,11 @@ class Retriever: """ def __init__( - self, - store_path: str = multivec_retriever_conf["store_path"], - id_key: str = multivec_retriever_conf["id_key"], - namespace: str = multivec_retriever_conf["namespace"], - top_k=1, + self, + store_path: str = multivec_retriever_conf["store_path"], + id_key: str = multivec_retriever_conf["id_key"], + namespace: str = multivec_retriever_conf["namespace"], + top_k=1, ): """Args: store_path: Path to the local file store, defaults to argument from config file @@ -45,10 +46,10 @@ def __init__( self.store_path = store_path self.id_key = id_key self.namespace = uuid.UUID(namespace) - self.client = ChromaClient() + self.vectordb: VectorDB = ChromaClient() # TODO - change to init argument self.store = LocalFileStore(self.store_path) self.retriever = MultiVectorRetriever( - vectorstore=self.client.langchain_chroma, + vectorstore=self.vectordb.langchain_client, byte_store=self.store, id_key=self.id_key, ) @@ -113,7 +114,7 @@ def add_docs(self, docs: List[Document]): """ chunks = self.split_docs(docs) doc_ids = self.gen_doc_ids(docs) - self.client.add_docs(chunks) + self.vectordb.add_docs(chunks) self.retriever.docstore.mset(list(zip(doc_ids, docs))) async def aadd_docs(self, docs: List[Document]): @@ -129,11 +130,11 @@ async def aadd_docs(self, docs: List[Document]): """ chunks = self.split_docs(docs) doc_ids = self.gen_doc_ids(docs) - await asyncio.run(self.client.aadd_docs(chunks)) + await asyncio.run(self.vectordb.aadd_docs(chunks)) self.retriever.docstore.mset(list(zip(doc_ids))) def get_chunk(self, query: str, with_score=False, top_k=None): - """Returns the most (cosine) similar chunks from the vector database. + """Returns the most similar chunks from the vector database. Args: query: A query string @@ -144,14 +145,8 @@ def get_chunk(self, query: str, with_score=False, top_k=None): list of Documents """ - if with_score: - return self.client.langchain_chroma.similarity_search_with_relevance_scores( - query=query, **{"k": top_k} if top_k else self.retriever.search_kwargs - ) - else: - return self.client.langchain_chroma.similarity_search( - query=query, **{"k": top_k} if top_k else self.retriever.search_kwargs - ) + _top_k = top_k if top_k else self.retriever.search_kwargs['k'] + return self.vectordb.get_chunk(query=query, top_k=_top_k, with_score=with_score) async def aget_chunk(self, query: str, with_score=False, top_k=None): """Returns the most (cosine) similar chunks from the vector database, asynchronously. @@ -165,14 +160,8 @@ async def aget_chunk(self, query: str, with_score=False, top_k=None): list of Documents """ - if with_score: - return await self.client.langchain_chroma.asimilarity_search_with_relevance_scores( - query=query, **{"k": top_k} if top_k else self.retriever.search_kwargs - ) - else: - return await self.client.langchain_chroma.asimilarity_search( - query=query, **{"k": top_k} if top_k else self.retriever.search_kwargs - ) + _top_k = top_k if top_k else self.retriever.search_kwargs['k'] + return await self.vectordb.aget_chunk(query=query, top_k=_top_k, with_score=with_score) def get_doc(self, query: str): """Returns the parent document of the most (cosine) similar chunk from the vector database. diff --git a/src/grag/rag/basic_rag.py b/src/grag/rag/basic_rag.py index a99ecdd..9589920 100644 --- a/src/grag/rag/basic_rag.py +++ b/src/grag/rag/basic_rag.py @@ -4,7 +4,7 @@ from grag import prompts from grag.components.llm import LLM from grag.components.multivec_retriever import Retriever -from grag.components.prompt import FewShotPrompt, Prompt +from grag.components.prompt import Prompt, FewShotPrompt from grag.components.utils import get_config from importlib_resources import files from langchain_core.documents import Document diff --git a/src/tests/rag/basic_rag_test.py b/src/tests/rag/basic_rag_test.py index 06db25e..2249028 100644 --- a/src/tests/rag/basic_rag_test.py +++ b/src/tests/rag/basic_rag_test.py @@ -1,4 +1,4 @@ -from typing import List, Text +from typing import Text, List from grag.rag.basic_rag import BasicRAG From 5e72ad990c9926d850c00d35d30cb54021e30b86 Mon Sep 17 00:00:00 2001 From: Arjun Bingly Date: Thu, 21 Mar 2024 14:08:38 -0400 Subject: [PATCH 02/20] DeepLake client, vectordb --- pyproject.toml | 1 + src/grag/components/vectordb/__init__.py | 0 src/grag/components/vectordb/base.py | 64 +++++++ src/grag/components/vectordb/chroma_client.py | 170 ++++++++++++++++++ .../components/vectordb/deeplake_client.py | 132 ++++++++++++++ 5 files changed, 367 insertions(+) create mode 100644 src/grag/components/vectordb/__init__.py create mode 100644 src/grag/components/vectordb/base.py create mode 100644 src/grag/components/vectordb/chroma_client.py create mode 100644 src/grag/components/vectordb/deeplake_client.py diff --git a/pyproject.toml b/pyproject.toml index 897ab02..58c2fe0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -42,6 +42,7 @@ dependencies = [ "huggingface_hub>=0.20.2", "pydantic>=2.5.0", "rouge-score>=0.1.2", + "deeplake>=3.8.27" ] [project.urls] diff --git a/src/grag/components/vectordb/__init__.py b/src/grag/components/vectordb/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/grag/components/vectordb/base.py b/src/grag/components/vectordb/base.py new file mode 100644 index 0000000..67bc5be --- /dev/null +++ b/src/grag/components/vectordb/base.py @@ -0,0 +1,64 @@ +from abc import ABC, abstractmethod +from typing import List + +from langchain_community.vectorstores.utils import filter_complex_metadata +from langchain_core.documents import Document + + +class VectorDB(ABC): + @abstractmethod + def add_docs(self, docs: List[Document], verbose: bool = True): + """Adds documents to the vector database. + + Args: + docs: List of Documents + verbose: Show progress bar + + Returns: + None + """ + ... + + @abstractmethod + async def aadd_docs(self, docs: List[Document], verbose: bool = True): + """Adds documents to the vector database (asynchronous). + + Args: + docs: List of Documents + verbose: Show progress bar + + Returns: + None + """ + ... + + @abstractmethod + def get_chunk(self, query: str, with_score: bool = False, top_k: int = None): + """Returns the most similar chunks from the vector database. + + Args: + query: A query string + with_score: Outputs scores of returned chunks + top_k: Number of top similar chunks to return, if None defaults to self.top_k + + Returns: + list of Documents + """ + ... + + @abstractmethod + async def aget_chunk(self, query: str, with_score: bool = False, top_k: int = None): + """Returns the most similar chunks from the vector database. (asynchronous) + + Args: + query: A query string + with_score: Outputs scores of returned chunks + top_k: Number of top similar chunks to return, if None defaults to self.top_k + + Returns: + list of Documents + """ + ... + + def _filter_metadata(self, docs: List[Document]): + return filter_complex_metadata(docs, allowed_types=self.allowed_metadata_types) diff --git a/src/grag/components/vectordb/chroma_client.py b/src/grag/components/vectordb/chroma_client.py new file mode 100644 index 0000000..53f5547 --- /dev/null +++ b/src/grag/components/vectordb/chroma_client.py @@ -0,0 +1,170 @@ +from typing import List + +import chromadb +from grag.components.embedding import Embedding +from grag.components.utils import get_config +from grag.components.vectordb.base import VectorDB +from langchain_community.vectorstores import Chroma +from langchain_core.documents import Document +from tqdm import tqdm +from tqdm.asyncio import tqdm as atqdm + +chroma_conf = get_config()["chroma"] + + +class ChromaClient(VectorDB): + """A class for connecting to a hosted Chroma Vectorstore collection. + + Attributes: + host : str + IP Address of hosted Chroma Vectorstore + port : str + port address of hosted Chroma Vectorstore + collection_name : str + name of the collection in the Chroma Vectorstore, each ChromaClient connects to a single collection + embedding_type : str + type of embedding used, supported 'sentence-transformers' and 'instructor-embedding' + embedding_model : str + model name of embedding used, should correspond to the embedding_type + embedding_function + a function of the embedding model, derived from the embedding_type and embedding_modelname + client: chromadb.HttpClient + Chroma API for client + collection + Chroma API for the collection + langchain_client: langchain_community.vectorstores.Chroma + LangChain wrapper for Chroma collection + """ + + def __init__( + self, + host=chroma_conf["host"], + port=chroma_conf["port"], + collection_name=chroma_conf["collection_name"], + embedding_type=chroma_conf["embedding_type"], + embedding_model=chroma_conf["embedding_model"], + ): + """Args: + host: IP Address of hosted Chroma Vectorstore, defaults to argument from config file + port: port address of hosted Chroma Vectorstore, defaults to argument from config file + collection_name: name of the collection in the Chroma Vectorstore, defaults to argument from config file + embedding_type: type of embedding used, supported 'sentence-transformers' and 'instructor-embedding', defaults to argument from config file + embedding_model: model name of embedding used, should correspond to the embedding_type, defaults to argument from config file + """ + self.host: str = host + self.port: str = port + self.collection_name: str = collection_name + self.embedding_type: str = embedding_type + self.embedding_model: str = embedding_model + + self.embedding_function = Embedding( + embedding_model=self.embedding_model, embedding_type=self.embedding_type + ).embedding_function + + self.client = chromadb.HttpClient(host=self.host, port=self.port) + self.collection = self.client.get_or_create_collection( + name=self.collection_name + ) + self.langchain_client = Chroma( + client=self.client, + collection_name=self.collection_name, + embedding_function=self.embedding_function, + ) + self.allowed_metadata_types = (str, int, float, bool) + + def test_connection(self, verbose=True): + """Tests connection with Chroma Vectorstore + + Args: + verbose: if True, prints connection status + + Returns: + A random integer if connection is alive else None + """ + response = self.client.heartbeat() + if verbose: + if response: + print(f"Connection to {self.host}/{self.port} is alive..") + else: + print(f"Connection to {self.host}/{self.port} is not alive !!") + return response + + def add_docs(self, docs: List[Document], verbose=True): + """Adds documents to chroma vectorstore + + Args: + docs: List of Documents + verbose: Show progress bar + + Returns: + None + """ + docs = self._filter_metadata(docs) + for doc in ( + tqdm(docs, desc=f"Adding to {self.collection_name}:") if verbose else docs + ): + _id = self.langchain_client.add_documents([doc]) + + async def aadd_docs(self, docs: List[Document], verbose=True): + """Asynchronously adds documents to chroma vectorstore + + Args: + docs: List of Documents + verbose: Show progress bar + + Returns: + None + """ + docs = self._filter_metadata(docs) + if verbose: + for doc in atqdm( + docs, + desc=f"Adding documents to {self.collection_name}", + total=len(docs), + ): + await self.langchain_client.aadd_documents([doc]) + else: + for doc in docs: + await self.langchain_client.aadd_documents([doc]) + + def get_chunk(self, query: str, with_score: bool = False, top_k: int = None): + """Returns the most similar chunks from the chroma database. + + Args: + query: A query string + with_score: Outputs scores of returned chunks + top_k: Number of top similar chunks to return, if None defaults to self.top_k + + Returns: + list of Documents + + """ + if with_score: + return self.langchain_client.similarity_search_with_relevance_scores( + query=query, **{"k": top_k} if top_k else 1 + ) + else: + return self.langchain_client.similarity_search( + query=query, **{"k": top_k} if top_k else 1 + ) + + async def aget_chunk(self, query: str, with_score=False, top_k=None): + """Returns the most (cosine) similar chunks from the vector database, asynchronously. + + Args: + query: A query string + with_score: Outputs scores of returned chunks + top_k: Number of top similar chunks to return, if None defaults to self.top_k + + Returns: + list of Documents + + """ + if with_score: + return await self.langchain_client.asimilarity_search_with_relevance_scores( + query=query, **{"k": top_k} if top_k else 1 + ) + else: + return await self.langchain_client.asimilarity_search( + query=query, **{"k": top_k} if top_k else 1 + ) diff --git a/src/grag/components/vectordb/deeplake_client.py b/src/grag/components/vectordb/deeplake_client.py new file mode 100644 index 0000000..75d6058 --- /dev/null +++ b/src/grag/components/vectordb/deeplake_client.py @@ -0,0 +1,132 @@ +from pathlib import Path +from typing import List, Union + +from deeplake.core.vectorstore import VectorStore +from grag.components.embedding import Embedding +from grag.components.utils import get_config +from grag.components.vectordb.base import VectorDB +from langchain_community.vectorstores import DeepLake +from langchain_core.documents import Document +from tqdm import tqdm +from tqdm.asyncio import tqdm as atqdm + +deeplake_conf = get_config()["deeplake"] + + +class DeepLakeClient(VectorDB): + """A class for connecting to a DeepLake Vectorstore + + Attributes: + store_path : str, Path + The path to store the DeepLake vectorstore. + embedding_type : str + type of embedding used, supported 'sentence-transformers' and 'instructor-embedding' + embedding_model : str + model name of embedding used, should correspond to the embedding_type + embedding_function + a function of the embedding model, derived from the embedding_type and embedding_modelname + client: deeplake.core.vectorstore.VectorStore + DeepLake API + collection + Chroma API for the collection + langchain_client: langchain_community.vectorstores.DeepLake + LangChain wrapper for DeepLake API + """ + + def __init__(self, + store_path: Union[str, Path], + embedding_model: str, + embedding_type: str, + ): + self.store_path = Path(store_path) + self.embedding_type: str = embedding_type + self.embedding_model: str = embedding_model + + self.embedding_function = Embedding( + embedding_model=self.embedding_model, embedding_type=self.embedding_type + ).embedding_function + + self.client = VectorStore(path=self.store_path) + self.langchain_client = DeepLake(path=self.store_path, + embedding=self.embedding_function) + self.allowed_metadata_types = (str, int, float, bool) + + def add_docs(self, docs: List[Document], verbose=True): + """Adds documents to deeplake vectorstore + + Args: + docs: List of Documents + verbose: Show progress bar + + Returns: + None + """ + docs = self._filter_metadata(docs) + for doc in ( + tqdm(docs, desc=f"Adding to {self.collection_name}:") if verbose else docs + ): + _id = self.langchain_chroma.add_documents([doc]) + + async def aadd_docs(self, docs: List[Document], verbose=True): + """Asynchronously adds documents to chroma vectorstore + + Args: + docs: List of Documents + verbose: Show progress bar + + Returns: + None + """ + docs = self._filter_metadata(docs) + if verbose: + for doc in atqdm( + docs, + desc=f"Adding documents to {self.collection_name}", + total=len(docs), + ): + await self.langchain_deeplake.aadd_documents([doc]) + else: + for doc in docs: + await self.langchain_deeplake.aadd_documents([doc]) + + def get_chunk(self, query: str, with_score: bool = False, top_k: int = None): + """Returns the most similar chunks from the deeplake database. + + Args: + query: A query string + with_score: Outputs scores of returned chunks + top_k: Number of top similar chunks to return, if None defaults to self.top_k + + Returns: + list of Documents + + """ + if with_score: + return self.langchain_client.similarity_search_with_relevance_scores( + query=query, **{"k": top_k} if top_k else 1 + ) + else: + return self.langchain_client.similarity_search( + query=query, **{"k": top_k} if top_k else 1 + ) + + async def aget_chunk(self, query: str, with_score=False, top_k=None): + """Returns the most similar chunks from the deeplake database, asynchronously. + + Args: + query: A query string + with_score: Outputs scores of returned chunks + top_k: Number of top similar chunks to return, if None defaults to self.top_k + + Returns: + list of Documents + + """ + if with_score: + return await self.langchain_client.asimilarity_search_with_relevance_scores( + query=query, **{"k": top_k} if top_k else 1 + ) + else: + return await self.langchain_client.asimilarity_search( + query=query, **{"k": top_k} if top_k else 1 + ) From 820702f3a8dfc9c73d1eaa3251942894e16a42b0 Mon Sep 17 00:00:00 2001 From: Arjun Bingly Date: Thu, 21 Mar 2024 16:35:10 -0400 Subject: [PATCH 03/20] Remove old chroma_client --- src/grag/components/chroma_client.py | 136 --------------------------- 1 file changed, 136 deletions(-) delete mode 100644 src/grag/components/chroma_client.py diff --git a/src/grag/components/chroma_client.py b/src/grag/components/chroma_client.py deleted file mode 100644 index 7efd7c3..0000000 --- a/src/grag/components/chroma_client.py +++ /dev/null @@ -1,136 +0,0 @@ -from typing import List - -import chromadb -from grag.components.embedding import Embedding -from grag.components.utils import get_config -from langchain_community.vectorstores import Chroma -from langchain_community.vectorstores.utils import filter_complex_metadata -from langchain_core.documents import Document -from tqdm import tqdm -from tqdm.asyncio import tqdm as atqdm - -chroma_conf = get_config()["chroma"] - - -class ChromaClient: - """A class for connecting to a hosted Chroma Vectorstore collection. - - Attributes: - host : str - IP Address of hosted Chroma Vectorstore - port : str - port address of hosted Chroma Vectorstore - collection_name : str - name of the collection in the Chroma Vectorstore, each ChromaClient connects to a single collection - embedding_type : str - type of embedding used, supported 'sentence-transformers' and 'instructor-embedding' - embedding_modelname : str - model name of embedding used, should correspond to the embedding_type - embedding_function - a function of the embedding model, derived from the embedding_type and embedding_modelname - chroma_client - Chroma API for client - collection - Chroma API for the collection - langchain_chroma - LangChain wrapper for Chroma collection - """ - - def __init__( - self, - host=chroma_conf["host"], - port=chroma_conf["port"], - collection_name=chroma_conf["collection_name"], - embedding_type=chroma_conf["embedding_type"], - embedding_model=chroma_conf["embedding_model"], - ): - """Args: - host: IP Address of hosted Chroma Vectorstore, defaults to argument from config file - port: port address of hosted Chroma Vectorstore, defaults to argument from config file - collection_name: name of the collection in the Chroma Vectorstore, defaults to argument from config file - embedding_type: type of embedding used, supported 'sentence-transformers' and 'instructor-embedding', defaults to argument from config file - embedding_model: model name of embedding used, should correspond to the embedding_type, defaults to argument from config file - """ - self.host: str = host - self.port: str = port - self.collection_name: str = collection_name - self.embedding_type: str = embedding_type - self.embedding_model: str = embedding_model - - self.embedding_function = Embedding( - embedding_model=self.embedding_model, embedding_type=self.embedding_type - ).embedding_function - - self.chroma_client = chromadb.HttpClient(host=self.host, port=self.port) - self.collection = self.chroma_client.get_or_create_collection( - name=self.collection_name - ) - self.langchain_chroma = Chroma( - client=self.chroma_client, - collection_name=self.collection_name, - embedding_function=self.embedding_function, - ) - self.allowed_metadata_types = (str, int, float, bool) - - def test_connection(self, verbose=True): - """Tests connection with Chroma Vectorstore - - Args: - verbose: if True, prints connection status - - Returns: - A random integer if connection is alive else None - """ - response = self.chroma_client.heartbeat() - if verbose: - if response: - print(f"Connection to {self.host}/{self.port} is alive..") - else: - print(f"Connection to {self.host}/{self.port} is not alive !!") - return response - - async def aadd_docs(self, docs: List[Document], verbose=True): - """Asynchronously adds documents to chroma vectorstore - - Args: - docs: List of Documents - verbose: Show progress bar - - Returns: - None - """ - docs = self._filter_metadata(docs) - # tasks = [self.langchain_chroma.aadd_documents([doc]) for doc in docs] - # if verbose: - # await tqdm_asyncio.gather(*tasks, desc=f'Adding to {self.collection_name}') - # else: - # await asyncio.gather(*tasks) - if verbose: - for doc in atqdm( - docs, - desc=f"Adding documents to {self.collection_name}", - total=len(docs), - ): - await self.langchain_chroma.aadd_documents([doc]) - else: - for doc in docs: - await self.langchain_chroma.aadd_documents([doc]) - - def add_docs(self, docs: List[Document], verbose=True): - """Adds documents to chroma vectorstore - - Args: - docs: List of Documents - verbose: Show progress bar - - Returns: - None - """ - docs = self._filter_metadata(docs) - for doc in ( - tqdm(docs, desc=f"Adding to {self.collection_name}:") if verbose else docs - ): - _id = self.langchain_chroma.add_documents([doc]) - - def _filter_metadata(self, docs: List[Document]): - return filter_complex_metadata(docs, allowed_types=self.allowed_metadata_types) From 7729d32647b29cce059d462d891995d00e427038 Mon Sep 17 00:00:00 2001 From: Arjun Bingly Date: Thu, 21 Mar 2024 16:37:14 -0400 Subject: [PATCH 04/20] Bug fix: top_k --- src/grag/components/vectordb/chroma_client.py | 8 ++++---- src/grag/components/vectordb/deeplake_client.py | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/grag/components/vectordb/chroma_client.py b/src/grag/components/vectordb/chroma_client.py index 53f5547..3e73b04 100644 --- a/src/grag/components/vectordb/chroma_client.py +++ b/src/grag/components/vectordb/chroma_client.py @@ -141,11 +141,11 @@ def get_chunk(self, query: str, with_score: bool = False, top_k: int = None): """ if with_score: return self.langchain_client.similarity_search_with_relevance_scores( - query=query, **{"k": top_k} if top_k else 1 + query=query, k=top_k if top_k else 1 ) else: return self.langchain_client.similarity_search( - query=query, **{"k": top_k} if top_k else 1 + query=query, k=top_k if top_k else 1 ) async def aget_chunk(self, query: str, with_score=False, top_k=None): @@ -162,9 +162,9 @@ async def aget_chunk(self, query: str, with_score=False, top_k=None): """ if with_score: return await self.langchain_client.asimilarity_search_with_relevance_scores( - query=query, **{"k": top_k} if top_k else 1 + query=query, k=top_k if top_k else 1 ) else: return await self.langchain_client.asimilarity_search( - query=query, **{"k": top_k} if top_k else 1 + query=query, k=top_k if top_k else 1 ) diff --git a/src/grag/components/vectordb/deeplake_client.py b/src/grag/components/vectordb/deeplake_client.py index 75d6058..bb88255 100644 --- a/src/grag/components/vectordb/deeplake_client.py +++ b/src/grag/components/vectordb/deeplake_client.py @@ -103,11 +103,11 @@ def get_chunk(self, query: str, with_score: bool = False, top_k: int = None): """ if with_score: return self.langchain_client.similarity_search_with_relevance_scores( - query=query, **{"k": top_k} if top_k else 1 + query=query, k=top_k if top_k else 1 ) else: return self.langchain_client.similarity_search( - query=query, **{"k": top_k} if top_k else 1 + query=query, k=top_k if top_k else 1 ) async def aget_chunk(self, query: str, with_score=False, top_k=None): @@ -124,9 +124,9 @@ async def aget_chunk(self, query: str, with_score=False, top_k=None): """ if with_score: return await self.langchain_client.asimilarity_search_with_relevance_scores( - query=query, **{"k": top_k} if top_k else 1 + query=query, k=top_k if top_k else 1 ) else: return await self.langchain_client.asimilarity_search( - query=query, **{"k": top_k} if top_k else 1 + query=query, k=top_k if top_k else 1 ) From 2f05d98c37d6358c9140a33c09d78877b845557e Mon Sep 17 00:00:00 2001 From: Arjun Bingly Date: Thu, 21 Mar 2024 16:37:47 -0400 Subject: [PATCH 05/20] Update chroma_client_test --- src/tests/components/vectordb/__init__.py | 0 .../{ => vectordb}/chroma_client_test.py | 79 +++++++++++++++---- 2 files changed, 64 insertions(+), 15 deletions(-) create mode 100644 src/tests/components/vectordb/__init__.py rename src/tests/components/{ => vectordb}/chroma_client_test.py (58%) diff --git a/src/tests/components/vectordb/__init__.py b/src/tests/components/vectordb/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/tests/components/chroma_client_test.py b/src/tests/components/vectordb/chroma_client_test.py similarity index 58% rename from src/tests/components/chroma_client_test.py rename to src/tests/components/vectordb/chroma_client_test.py index 1596dd3..6f0e925 100644 --- a/src/tests/components/chroma_client_test.py +++ b/src/tests/components/vectordb/chroma_client_test.py @@ -1,12 +1,13 @@ import asyncio -from grag.components.chroma_client import ChromaClient +import pytest +from grag.components.vectordb.chroma_client import ChromaClient from langchain_core.documents import Document def test_chroma_connection(): - client = ChromaClient() - response = client.test_connection() + chroma_client = ChromaClient() + response = chroma_client.test_connection() assert isinstance(response, int) @@ -45,13 +46,13 @@ def test_chroma_add_docs(): storm-clouds was split to the blinding zigzag of lightning, and the thunder rolled and boomed, like the Colorado in flood.""", ] - client = ChromaClient(collection_name="test") - if client.collection.count() > 0: - client.chroma_client.delete_collection("test") - client = ChromaClient(collection_name="test") + chroma_client = ChromaClient(collection_name="test") + if chroma_client.collection.count() > 0: + chroma_client.client.delete_collection("test") + chroma_client = ChromaClient(collection_name="test") docs = [Document(page_content=doc) for doc in docs] - client.add_docs(docs) - collection_count = client.collection.count() + chroma_client.add_docs(docs) + collection_count = chroma_client.collection.count() assert collection_count == len(docs) @@ -90,11 +91,59 @@ def test_chroma_aadd_docs(): storm-clouds was split to the blinding zigzag of lightning, and the thunder rolled and boomed, like the Colorado in flood.""", ] - client = ChromaClient(collection_name="test") - if client.collection.count() > 0: - client.chroma_client.delete_collection("test") - client = ChromaClient(collection_name="test") + chroma_client = ChromaClient(collection_name="test") + if chroma_client.collection.count() > 0: + chroma_client.client.delete_collection("test") + chroma_client = ChromaClient(collection_name="test") docs = [Document(page_content=doc) for doc in docs] loop = asyncio.get_event_loop() - loop.run_until_complete(client.aadd_docs(docs)) - assert client.collection.count() == len(docs) + loop.run_until_complete(chroma_client.aadd_docs(docs)) + assert chroma_client.collection.count() == len(docs) + + +chrome_get_chunk_params = [(1, False), (1, True), (2, False), (2, True)] + + +@pytest.mark.parametrize("top_k,with_score", chrome_get_chunk_params) +def test_chroma_get_chunk(top_k, with_score): + query = """Slone and Lucy never rode down so far as the stately monuments, though + these held memories as hauntingly sweet as others were poignantly + bitter. Lucy never rode the King again. But Slone rode him, learned to + love him. And Lucy did not race any more. When Slone tried to stir in + her the old spirit all the response he got was a wistful shake of head + or a laugh that hid the truth or an excuse that the strain on her + ankles from Joel Creech's lasso had never mended. The girl was + unutterably happy, but it was possible that she would never race a + horse again.""" + chroma_client = ChromaClient(collection_name="test") + retrieved_chunks = chroma_client.get_chunk(query=query, top_k=top_k, with_score=with_score) + assert len(retrieved_chunks) == top_k + if with_score: + assert all(isinstance(doc[0], Document) for doc in retrieved_chunks) + assert all(isinstance(doc[1], float) for doc in retrieved_chunks) + else: + assert all(isinstance(doc, Document) for doc in retrieved_chunks) + + +@pytest.mark.parametrize("top_k,with_score", chrome_get_chunk_params) +def test_chroma_aget_chunk(top_k, with_score): + query = """Slone and Lucy never rode down so far as the stately monuments, though + these held memories as hauntingly sweet as others were poignantly + bitter. Lucy never rode the King again. But Slone rode him, learned to + love him. And Lucy did not race any more. When Slone tried to stir in + her the old spirit all the response he got was a wistful shake of head + or a laugh that hid the truth or an excuse that the strain on her + ankles from Joel Creech's lasso had never mended. The girl was + unutterably happy, but it was possible that she would never race a + horse again.""" + chroma_client = ChromaClient(collection_name="test") + loop = asyncio.get_event_loop() + retrieved_chunks = loop.run_until_complete( + chroma_client.aget_chunk(query=query, top_k=top_k, with_score=with_score) + ) + assert len(retrieved_chunks) == top_k + if with_score: + assert all(isinstance(doc[0], Document) for doc in retrieved_chunks) + assert all(isinstance(doc[1], float) for doc in retrieved_chunks) + else: + assert all(isinstance(doc, Document) for doc in retrieved_chunks) From 41b2bcf4c89658ab7bd184864805457bc0752232 Mon Sep 17 00:00:00 2001 From: Arjun Bingly Date: Fri, 22 Mar 2024 16:02:30 -0400 Subject: [PATCH 06/20] Deeplake tests, typing --- src/grag/components/vectordb/base.py | 24 ++- src/grag/components/vectordb/chroma_client.py | 28 +++- .../components/vectordb/deeplake_client.py | 47 +++--- .../components/vectordb/chroma_client_test.py | 15 +- .../vectordb/deeplake_client_test.py | 144 ++++++++++++++++++ 5 files changed, 220 insertions(+), 38 deletions(-) create mode 100644 src/tests/components/vectordb/deeplake_client_test.py diff --git a/src/grag/components/vectordb/base.py b/src/grag/components/vectordb/base.py index 67bc5be..ab63fcb 100644 --- a/src/grag/components/vectordb/base.py +++ b/src/grag/components/vectordb/base.py @@ -1,13 +1,23 @@ from abc import ABC, abstractmethod -from typing import List +from typing import List, Tuple, Union from langchain_community.vectorstores.utils import filter_complex_metadata from langchain_core.documents import Document class VectorDB(ABC): + + @abstractmethod + def __len__(self) -> int: + """Number of chunks in the vector database.""" + ... + + @abstractmethod + def delete(self) -> None: + """Delete all chunks in the vector database.""" + @abstractmethod - def add_docs(self, docs: List[Document], verbose: bool = True): + def add_docs(self, docs: List[Document], verbose: bool = True) -> None: """Adds documents to the vector database. Args: @@ -20,7 +30,7 @@ def add_docs(self, docs: List[Document], verbose: bool = True): ... @abstractmethod - async def aadd_docs(self, docs: List[Document], verbose: bool = True): + async def aadd_docs(self, docs: List[Document], verbose: bool = True) -> None: """Adds documents to the vector database (asynchronous). Args: @@ -33,7 +43,8 @@ async def aadd_docs(self, docs: List[Document], verbose: bool = True): ... @abstractmethod - def get_chunk(self, query: str, with_score: bool = False, top_k: int = None): + def get_chunk(self, query: str, with_score: bool = False, top_k: int = None) -> Union[ + List[Document], List[Tuple[Document, float]]]: """Returns the most similar chunks from the vector database. Args: @@ -47,7 +58,8 @@ def get_chunk(self, query: str, with_score: bool = False, top_k: int = None): ... @abstractmethod - async def aget_chunk(self, query: str, with_score: bool = False, top_k: int = None): + async def aget_chunk(self, query: str, with_score: bool = False, top_k: int = None) -> Union[ + List[Document], List[Tuple[Document, float]]]: """Returns the most similar chunks from the vector database. (asynchronous) Args: @@ -60,5 +72,5 @@ async def aget_chunk(self, query: str, with_score: bool = False, top_k: int = No """ ... - def _filter_metadata(self, docs: List[Document]): + def _filter_metadata(self, docs: List[Document]) -> List[Document]: return filter_complex_metadata(docs, allowed_types=self.allowed_metadata_types) diff --git a/src/grag/components/vectordb/chroma_client.py b/src/grag/components/vectordb/chroma_client.py index 3e73b04..e97323d 100644 --- a/src/grag/components/vectordb/chroma_client.py +++ b/src/grag/components/vectordb/chroma_client.py @@ -1,4 +1,4 @@ -from typing import List +from typing import List, Tuple, Union import chromadb from grag.components.embedding import Embedding @@ -72,7 +72,21 @@ def __init__( ) self.allowed_metadata_types = (str, int, float, bool) - def test_connection(self, verbose=True): + def __len__(self) -> int: + return self.collection.count() + + def delete(self) -> None: + self.client.delete_collection(self.collection_name) + self.collection = self.client.get_or_create_collection( + name=self.collection_name + ) + self.langchain_client = Chroma( + client=self.client, + collection_name=self.collection_name, + embedding_function=self.embedding_function, + ) + + def test_connection(self, verbose=True) -> int: """Tests connection with Chroma Vectorstore Args: @@ -89,7 +103,7 @@ def test_connection(self, verbose=True): print(f"Connection to {self.host}/{self.port} is not alive !!") return response - def add_docs(self, docs: List[Document], verbose=True): + def add_docs(self, docs: List[Document], verbose=True) -> None: """Adds documents to chroma vectorstore Args: @@ -105,7 +119,7 @@ def add_docs(self, docs: List[Document], verbose=True): ): _id = self.langchain_client.add_documents([doc]) - async def aadd_docs(self, docs: List[Document], verbose=True): + async def aadd_docs(self, docs: List[Document], verbose=True) -> None: """Asynchronously adds documents to chroma vectorstore Args: @@ -127,7 +141,8 @@ async def aadd_docs(self, docs: List[Document], verbose=True): for doc in docs: await self.langchain_client.aadd_documents([doc]) - def get_chunk(self, query: str, with_score: bool = False, top_k: int = None): + def get_chunk(self, query: str, with_score: bool = False, top_k: int = None) -> Union[ + List[Document], List[Tuple[Document, float]]]: """Returns the most similar chunks from the chroma database. Args: @@ -148,7 +163,8 @@ def get_chunk(self, query: str, with_score: bool = False, top_k: int = None): query=query, k=top_k if top_k else 1 ) - async def aget_chunk(self, query: str, with_score=False, top_k=None): + async def aget_chunk(self, query: str, with_score=False, top_k=None) -> Union[ + List[Document], List[Tuple[Document, float]]]: """Returns the most (cosine) similar chunks from the vector database, asynchronously. Args: diff --git a/src/grag/components/vectordb/deeplake_client.py b/src/grag/components/vectordb/deeplake_client.py index bb88255..28fc606 100644 --- a/src/grag/components/vectordb/deeplake_client.py +++ b/src/grag/components/vectordb/deeplake_client.py @@ -1,7 +1,6 @@ from pathlib import Path -from typing import List, Union +from typing import List, Tuple, Union -from deeplake.core.vectorstore import VectorStore from grag.components.embedding import Embedding from grag.components.utils import get_config from grag.components.vectordb.base import VectorDB @@ -34,11 +33,15 @@ class DeepLakeClient(VectorDB): """ def __init__(self, - store_path: Union[str, Path], - embedding_model: str, - embedding_type: str, + collection_name: str = deeplake_conf["collection_name"], + store_path: Union[str, Path] = deeplake_conf["store_path"], + embedding_type: str = deeplake_conf["embedding_type"], + embedding_model: str = deeplake_conf["embedding_model"], + read_only: bool = False ): self.store_path = Path(store_path) + self.collection_name = collection_name + self.read_only = read_only self.embedding_type: str = embedding_type self.embedding_model: str = embedding_model @@ -46,12 +49,20 @@ def __init__(self, embedding_model=self.embedding_model, embedding_type=self.embedding_type ).embedding_function - self.client = VectorStore(path=self.store_path) - self.langchain_client = DeepLake(path=self.store_path, - embedding=self.embedding_function) + # self.client = VectorStore(path=self.store_path / self.collection_name) + self.langchain_client = DeepLake(dataset_path=str(self.store_path / self.collection_name), + embedding=self.embedding_function, + read_only=self.read_only) + self.client = self.langchain_client.vectorstore self.allowed_metadata_types = (str, int, float, bool) - def add_docs(self, docs: List[Document], verbose=True): + def __len__(self) -> int: + return self.client.__len__() + + def delete(self) -> None: + self.client.delete(delete_all=True) + + def add_docs(self, docs: List[Document], verbose=True) -> None: """Adds documents to deeplake vectorstore Args: @@ -65,9 +76,9 @@ def add_docs(self, docs: List[Document], verbose=True): for doc in ( tqdm(docs, desc=f"Adding to {self.collection_name}:") if verbose else docs ): - _id = self.langchain_chroma.add_documents([doc]) + _id = self.langchain_client.add_documents([doc]) - async def aadd_docs(self, docs: List[Document], verbose=True): + async def aadd_docs(self, docs: List[Document], verbose=True) -> None: """Asynchronously adds documents to chroma vectorstore Args: @@ -84,12 +95,13 @@ async def aadd_docs(self, docs: List[Document], verbose=True): desc=f"Adding documents to {self.collection_name}", total=len(docs), ): - await self.langchain_deeplake.aadd_documents([doc]) + await self.langchain_client.aadd_documents([doc]) else: for doc in docs: - await self.langchain_deeplake.aadd_documents([doc]) + await self.langchain_client.aadd_documents([doc]) - def get_chunk(self, query: str, with_score: bool = False, top_k: int = None): + def get_chunk(self, query: str, with_score: bool = False, top_k: int = None) -> Union[ + List[Document], List[Tuple[Document, float]]]: """Returns the most similar chunks from the deeplake database. Args: @@ -102,7 +114,7 @@ def get_chunk(self, query: str, with_score: bool = False, top_k: int = None): """ if with_score: - return self.langchain_client.similarity_search_with_relevance_scores( + return self.langchain_client.similarity_search_with_score( query=query, k=top_k if top_k else 1 ) else: @@ -110,7 +122,8 @@ def get_chunk(self, query: str, with_score: bool = False, top_k: int = None): query=query, k=top_k if top_k else 1 ) - async def aget_chunk(self, query: str, with_score=False, top_k=None): + async def aget_chunk(self, query: str, with_score=False, top_k=None) -> Union[ + List[Document], List[Tuple[Document, float]]]: """Returns the most similar chunks from the deeplake database, asynchronously. Args: @@ -123,7 +136,7 @@ async def aget_chunk(self, query: str, with_score=False, top_k=None): """ if with_score: - return await self.langchain_client.asimilarity_search_with_relevance_scores( + return await self.langchain_client.asimilarity_search_with_score( query=query, k=top_k if top_k else 1 ) else: diff --git a/src/tests/components/vectordb/chroma_client_test.py b/src/tests/components/vectordb/chroma_client_test.py index 6f0e925..ecffa22 100644 --- a/src/tests/components/vectordb/chroma_client_test.py +++ b/src/tests/components/vectordb/chroma_client_test.py @@ -47,13 +47,11 @@ def test_chroma_add_docs(): thunder rolled and boomed, like the Colorado in flood.""", ] chroma_client = ChromaClient(collection_name="test") - if chroma_client.collection.count() > 0: - chroma_client.client.delete_collection("test") - chroma_client = ChromaClient(collection_name="test") + if len(chroma_client) > 0: + chroma_client.delete() docs = [Document(page_content=doc) for doc in docs] chroma_client.add_docs(docs) - collection_count = chroma_client.collection.count() - assert collection_count == len(docs) + assert len(chroma_client) == len(docs) def test_chroma_aadd_docs(): @@ -92,13 +90,12 @@ def test_chroma_aadd_docs(): thunder rolled and boomed, like the Colorado in flood.""", ] chroma_client = ChromaClient(collection_name="test") - if chroma_client.collection.count() > 0: - chroma_client.client.delete_collection("test") - chroma_client = ChromaClient(collection_name="test") + if len(chroma_client) > 0: + chroma_client.delete() docs = [Document(page_content=doc) for doc in docs] loop = asyncio.get_event_loop() loop.run_until_complete(chroma_client.aadd_docs(docs)) - assert chroma_client.collection.count() == len(docs) + assert len(chroma_client) == len(docs) chrome_get_chunk_params = [(1, False), (1, True), (2, False), (2, True)] diff --git a/src/tests/components/vectordb/deeplake_client_test.py b/src/tests/components/vectordb/deeplake_client_test.py new file mode 100644 index 0000000..921bd18 --- /dev/null +++ b/src/tests/components/vectordb/deeplake_client_test.py @@ -0,0 +1,144 @@ +import asyncio + +import pytest +from grag.components.vectordb.deeplake_client import DeepLakeClient +from langchain_core.documents import Document + + +def test_deeplake_add_docs(): + docs = [ + """And so on this rainbow day, with storms all around them, and blue sky + above, they rode only as far as the valley. But from there, before they + turned to go back, the monuments appeared close, and they loomed + grandly with the background of purple bank and creamy cloud and shafts + of golden lightning. They seemed like sentinels--guardians of a great + and beautiful love born under their lofty heights, in the lonely + silence of day, in the star-thrown shadow of night. They were like that + love. And they held Lucy and Slone, calling every day, giving a + nameless and tranquil content, binding them true to love, true to the + sage and the open, true to that wild upland home.""", + """Slone and Lucy never rode down so far as the stately monuments, though + these held memories as hauntingly sweet as others were poignantly + bitter. Lucy never rode the King again. But Slone rode him, learned to + love him. And Lucy did not race any more. When Slone tried to stir in + her the old spirit all the response he got was a wistful shake of head + or a laugh that hid the truth or an excuse that the strain on her + ankles from Joel Creech's lasso had never mended. The girl was + unutterably happy, but it was possible that she would never race a + horse again.""", + """Bostil wanted to be alone, to welcome the King, to lead him back to the + home corral, perhaps to hide from all eyes the change and the uplift + that would forever keep him from wronging another man. + + The late rains came and like magic, in a few days, the sage grew green + and lustrous and fresh, the gray turning to purple. + + Every morning the sun rose white and hot in a blue and cloudless sky. + And then soon the horizon line showed creamy clouds that rose and + spread and darkened. Every afternoon storms hung along the ramparts and + rainbows curved down beautiful and ethereal. The dim blackness of the + storm-clouds was split to the blinding zigzag of lightning, and the + thunder rolled and boomed, like the Colorado in flood.""", + ] + deeplake_client = DeepLakeClient(collection_name="test") + if len(deeplake_client) > 0: + deeplake_client.delete() + docs = [Document(page_content=doc) for doc in docs] + deeplake_client.add_docs(docs) + assert len(deeplake_client) == len(docs) + del (deeplake_client) + + +def test_chroma_aadd_docs(): + docs = [ + """And so on this rainbow day, with storms all around them, and blue sky + above, they rode only as far as the valley. But from there, before they + turned to go back, the monuments appeared close, and they loomed + grandly with the background of purple bank and creamy cloud and shafts + of golden lightning. They seemed like sentinels--guardians of a great + and beautiful love born under their lofty heights, in the lonely + silence of day, in the star-thrown shadow of night. They were like that + love. And they held Lucy and Slone, calling every day, giving a + nameless and tranquil content, binding them true to love, true to the + sage and the open, true to that wild upland home.""", + """Slone and Lucy never rode down so far as the stately monuments, though + these held memories as hauntingly sweet as others were poignantly + bitter. Lucy never rode the King again. But Slone rode him, learned to + love him. And Lucy did not race any more. When Slone tried to stir in + her the old spirit all the response he got was a wistful shake of head + or a laugh that hid the truth or an excuse that the strain on her + ankles from Joel Creech's lasso had never mended. The girl was + unutterably happy, but it was possible that she would never race a + horse again.""", + """Bostil wanted to be alone, to welcome the King, to lead him back to the + home corral, perhaps to hide from all eyes the change and the uplift + that would forever keep him from wronging another man. + + The late rains came and like magic, in a few days, the sage grew green + and lustrous and fresh, the gray turning to purple. + + Every morning the sun rose white and hot in a blue and cloudless sky. + And then soon the horizon line showed creamy clouds that rose and + spread and darkened. Every afternoon storms hung along the ramparts and + rainbows curved down beautiful and ethereal. The dim blackness of the + storm-clouds was split to the blinding zigzag of lightning, and the + thunder rolled and boomed, like the Colorado in flood.""", + ] + deeplake_client = DeepLakeClient(collection_name="test") + if len(deeplake_client) > 0: + deeplake_client.delete() + docs = [Document(page_content=doc) for doc in docs] + loop = asyncio.get_event_loop() + loop.run_until_complete(deeplake_client.aadd_docs(docs)) + assert len(deeplake_client) == len(docs) + del (deeplake_client) + + +deeplake_get_chunk_params = [(1, False), (1, True), (2, False), (2, True)] + + +@pytest.mark.parametrize("top_k,with_score", deeplake_get_chunk_params) +def test_deeplake_get_chunk(top_k, with_score): + query = """Slone and Lucy never rode down so far as the stately monuments, though + these held memories as hauntingly sweet as others were poignantly + bitter. Lucy never rode the King again. But Slone rode him, learned to + love him. And Lucy did not race any more. When Slone tried to stir in + her the old spirit all the response he got was a wistful shake of head + or a laugh that hid the truth or an excuse that the strain on her + ankles from Joel Creech's lasso had never mended. The girl was + unutterably happy, but it was possible that she would never race a + horse again.""" + deeplake_client = DeepLakeClient(collection_name="test", read_only=True) + retrieved_chunks = deeplake_client.get_chunk(query=query, top_k=top_k, with_score=with_score) + assert len(retrieved_chunks) == top_k + if with_score: + assert all(isinstance(doc[0], Document) for doc in retrieved_chunks) + assert all(isinstance(doc[1], float) for doc in retrieved_chunks) + else: + assert all(isinstance(doc, Document) for doc in retrieved_chunks) + del (deeplake_client) + + +@pytest.mark.parametrize("top_k,with_score", deeplake_get_chunk_params) +def test_deeplake_aget_chunk(top_k, with_score): + query = """Slone and Lucy never rode down so far as the stately monuments, though + these held memories as hauntingly sweet as others were poignantly + bitter. Lucy never rode the King again. But Slone rode him, learned to + love him. And Lucy did not race any more. When Slone tried to stir in + her the old spirit all the response he got was a wistful shake of head + or a laugh that hid the truth or an excuse that the strain on her + ankles from Joel Creech's lasso had never mended. The girl was + unutterably happy, but it was possible that she would never race a + horse again.""" + deeplake_client = DeepLakeClient(collection_name="test", read_only=True) + loop = asyncio.get_event_loop() + retrieved_chunks = loop.run_until_complete( + deeplake_client.aget_chunk(query=query, top_k=top_k, with_score=with_score) + ) + assert len(retrieved_chunks) == top_k + if with_score: + assert all(isinstance(doc[0], Document) for doc in retrieved_chunks) + assert all(isinstance(doc[1], float) for doc in retrieved_chunks) + else: + assert all(isinstance(doc, Document) for doc in retrieved_chunks) + del (deeplake_client) From 428c634e73c7134b4507f35d8df94fef6477902e Mon Sep 17 00:00:00 2001 From: sanchitvj Date: Fri, 22 Mar 2024 18:02:54 -0400 Subject: [PATCH 07/20] quantization --- src/config.ini | 5 ++- src/grag/quantize/quantize.py | 76 +++++++++++++++++++++++++++++++++++ 2 files changed, 80 insertions(+), 1 deletion(-) create mode 100644 src/grag/quantize/quantize.py diff --git a/src/config.ini b/src/config.ini index 452ac04..74ab6c4 100644 --- a/src/config.ini +++ b/src/config.ini @@ -51,4 +51,7 @@ table_as_html : True data_path : ${root:root_path}/data [root] -root_path : /home/ubuntu/volume_2k/Capstone_5 \ No newline at end of file +root_path : /home/ubuntu/volume_2k/Capstone_5 + +[quantize] +llama_cpp_path : ${root:root_path} \ No newline at end of file diff --git a/src/grag/quantize/quantize.py b/src/grag/quantize/quantize.py new file mode 100644 index 0000000..2728e13 --- /dev/null +++ b/src/grag/quantize/quantize.py @@ -0,0 +1,76 @@ +import os +import subprocess + +from grag.components.utils import get_config +from huggingface_hub import snapshot_download + +original_dir = os.getcwd() +config = get_config() +root_path = config['quantize']['llama_cpp_path'] + + +def get_llamacpp_repo(): + if os.path.exists(f"{root_path}/llama.cpp"): + subprocess.run([f"cd {root_path}/llama.cpp && git pull"], check=True, shell=True) + else: + subprocess.run( + [f"cd {root_path} && git clone https://github.com/ggerganov/llama.cpp.git"], + check=True, shell=True) + + +def building_llama(): + os.chdir(f"{root_path}/llama.cpp/") + try: + subprocess.run(['which', 'make'], check=True, stdout=subprocess.DEVNULL) + subprocess.run(['make', 'LLAMA_CUBLAS=1'], check=True) + print('Llama.cpp build successfull.') + except subprocess.CalledProcessError: + try: + subprocess.run(['which', 'cmake'], check=True, stdout=subprocess.DEVNULL) + subprocess.run(['mkdir', 'build'], check=True) + subprocess.run( + ['cd', 'build', '&&', 'cmake', '..', '-DLLAMA_CUBLAS=ON', '&&', 'cmake', '--build', '.', '--config', + 'Release'], shell=True, check=True) + print('Llama.cpp build successfull.') + except subprocess.CalledProcessError: + print("Unable to build, cannot find make or cmake.") + os.chdir(original_dir) + + +def fetch_model_repo(): + response = input("Do you want us to download the model? (yes/no) [Enter for yes]: ").strip().lower() + if response == "no": + print("Please copy the model folder to 'llama.cpp/models/' folder.") + elif response == "yes" or response == "": + repo_id = input('Please enter the repo_id for the model (you can check on https://huggingface.co/models): ') + local_dir = f"{root_path}/llama.cpp/model/{repo_id.split('/')[1]}" + os.mkdir(local_dir) + snapshot_download(repo_id=repo_id, local_dir=local_dir, + local_dir_use_symlinks=False) + print(f"Model downloaded in {local_dir}") + + +def quantize_model(quantization): + os.chdir(f"{root_path}/llama.cpp/") + subprocess.run(["python3", "convert.py", f"models/{model_dir_path}/"], check=True) + + model_file = f"models/{model_dir_path}/ggml-model-f16.gguf" + quantized_model_file = f"models/{model_dir_path.split('/')[-1]}/ggml-model-{quantization}.gguf" + subprocess.run(["llm_quantize", model_file, quantized_model_file, quantization], check=True) + print(f"Quantized model present at {root_path}/llama.cpp/{quantized_model_file}") + os.chdir(original_dir) + + +if __name__ == "__main__": + get_llamacpp_repo() + building_llama() + fetch_model_repo() + + quantization = input("Enter quantization: ") + quantize_model(quantization) + # if len(sys.argv) < 2 or len(sys.argv) > 3: + # print("Usage: python script.py []") + # sys.exit(1) + # model_dir_path = sys.argv[1] + # quantization = sys.argv[2] if len(sys.argv) == 3 else None + # execute_commands(model_dir_path, quantization) From 016011747380dbd57fb45f674447a204eda7efc1 Mon Sep 17 00:00:00 2001 From: arjbingly Date: Fri, 22 Mar 2024 22:23:27 +0000 Subject: [PATCH 08/20] style fixes by ruff --- src/grag/components/multivec_retriever.py | 18 ++++--- src/grag/components/vectordb/base.py | 13 ++--- src/grag/components/vectordb/chroma_client.py | 30 ++++++----- .../components/vectordb/deeplake_client.py | 51 ++++++++++--------- .../components/vectordb/chroma_client_test.py | 4 +- .../vectordb/deeplake_client_test.py | 12 +++-- 6 files changed, 71 insertions(+), 57 deletions(-) diff --git a/src/grag/components/multivec_retriever.py b/src/grag/components/multivec_retriever.py index b57a67f..98b3e6e 100644 --- a/src/grag/components/multivec_retriever.py +++ b/src/grag/components/multivec_retriever.py @@ -31,11 +31,11 @@ class Retriever: """ def __init__( - self, - store_path: str = multivec_retriever_conf["store_path"], - id_key: str = multivec_retriever_conf["id_key"], - namespace: str = multivec_retriever_conf["namespace"], - top_k=1, + self, + store_path: str = multivec_retriever_conf["store_path"], + id_key: str = multivec_retriever_conf["id_key"], + namespace: str = multivec_retriever_conf["namespace"], + top_k=1, ): """Args: store_path: Path to the local file store, defaults to argument from config file @@ -145,7 +145,7 @@ def get_chunk(self, query: str, with_score=False, top_k=None): list of Documents """ - _top_k = top_k if top_k else self.retriever.search_kwargs['k'] + _top_k = top_k if top_k else self.retriever.search_kwargs["k"] return self.vectordb.get_chunk(query=query, top_k=_top_k, with_score=with_score) async def aget_chunk(self, query: str, with_score=False, top_k=None): @@ -160,8 +160,10 @@ async def aget_chunk(self, query: str, with_score=False, top_k=None): list of Documents """ - _top_k = top_k if top_k else self.retriever.search_kwargs['k'] - return await self.vectordb.aget_chunk(query=query, top_k=_top_k, with_score=with_score) + _top_k = top_k if top_k else self.retriever.search_kwargs["k"] + return await self.vectordb.aget_chunk( + query=query, top_k=_top_k, with_score=with_score + ) def get_doc(self, query: str): """Returns the parent document of the most (cosine) similar chunk from the vector database. diff --git a/src/grag/components/vectordb/base.py b/src/grag/components/vectordb/base.py index ab63fcb..c474232 100644 --- a/src/grag/components/vectordb/base.py +++ b/src/grag/components/vectordb/base.py @@ -6,7 +6,6 @@ class VectorDB(ABC): - @abstractmethod def __len__(self) -> int: """Number of chunks in the vector database.""" @@ -19,7 +18,7 @@ def delete(self) -> None: @abstractmethod def add_docs(self, docs: List[Document], verbose: bool = True) -> None: """Adds documents to the vector database. - + Args: docs: List of Documents verbose: Show progress bar @@ -43,8 +42,9 @@ async def aadd_docs(self, docs: List[Document], verbose: bool = True) -> None: ... @abstractmethod - def get_chunk(self, query: str, with_score: bool = False, top_k: int = None) -> Union[ - List[Document], List[Tuple[Document, float]]]: + def get_chunk( + self, query: str, with_score: bool = False, top_k: int = None + ) -> Union[List[Document], List[Tuple[Document, float]]]: """Returns the most similar chunks from the vector database. Args: @@ -58,8 +58,9 @@ def get_chunk(self, query: str, with_score: bool = False, top_k: int = None) -> ... @abstractmethod - async def aget_chunk(self, query: str, with_score: bool = False, top_k: int = None) -> Union[ - List[Document], List[Tuple[Document, float]]]: + async def aget_chunk( + self, query: str, with_score: bool = False, top_k: int = None + ) -> Union[List[Document], List[Tuple[Document, float]]]: """Returns the most similar chunks from the vector database. (asynchronous) Args: diff --git a/src/grag/components/vectordb/chroma_client.py b/src/grag/components/vectordb/chroma_client.py index e97323d..ef9091f 100644 --- a/src/grag/components/vectordb/chroma_client.py +++ b/src/grag/components/vectordb/chroma_client.py @@ -37,12 +37,12 @@ class ChromaClient(VectorDB): """ def __init__( - self, - host=chroma_conf["host"], - port=chroma_conf["port"], - collection_name=chroma_conf["collection_name"], - embedding_type=chroma_conf["embedding_type"], - embedding_model=chroma_conf["embedding_model"], + self, + host=chroma_conf["host"], + port=chroma_conf["port"], + collection_name=chroma_conf["collection_name"], + embedding_type=chroma_conf["embedding_type"], + embedding_model=chroma_conf["embedding_model"], ): """Args: host: IP Address of hosted Chroma Vectorstore, defaults to argument from config file @@ -115,7 +115,7 @@ def add_docs(self, docs: List[Document], verbose=True) -> None: """ docs = self._filter_metadata(docs) for doc in ( - tqdm(docs, desc=f"Adding to {self.collection_name}:") if verbose else docs + tqdm(docs, desc=f"Adding to {self.collection_name}:") if verbose else docs ): _id = self.langchain_client.add_documents([doc]) @@ -132,17 +132,18 @@ async def aadd_docs(self, docs: List[Document], verbose=True) -> None: docs = self._filter_metadata(docs) if verbose: for doc in atqdm( - docs, - desc=f"Adding documents to {self.collection_name}", - total=len(docs), + docs, + desc=f"Adding documents to {self.collection_name}", + total=len(docs), ): await self.langchain_client.aadd_documents([doc]) else: for doc in docs: await self.langchain_client.aadd_documents([doc]) - def get_chunk(self, query: str, with_score: bool = False, top_k: int = None) -> Union[ - List[Document], List[Tuple[Document, float]]]: + def get_chunk( + self, query: str, with_score: bool = False, top_k: int = None + ) -> Union[List[Document], List[Tuple[Document, float]]]: """Returns the most similar chunks from the chroma database. Args: @@ -163,8 +164,9 @@ def get_chunk(self, query: str, with_score: bool = False, top_k: int = None) -> query=query, k=top_k if top_k else 1 ) - async def aget_chunk(self, query: str, with_score=False, top_k=None) -> Union[ - List[Document], List[Tuple[Document, float]]]: + async def aget_chunk( + self, query: str, with_score=False, top_k=None + ) -> Union[List[Document], List[Tuple[Document, float]]]: """Returns the most (cosine) similar chunks from the vector database, asynchronously. Args: diff --git a/src/grag/components/vectordb/deeplake_client.py b/src/grag/components/vectordb/deeplake_client.py index 28fc606..3a389c3 100644 --- a/src/grag/components/vectordb/deeplake_client.py +++ b/src/grag/components/vectordb/deeplake_client.py @@ -14,7 +14,7 @@ class DeepLakeClient(VectorDB): """A class for connecting to a DeepLake Vectorstore - + Attributes: store_path : str, Path The path to store the DeepLake vectorstore. @@ -32,13 +32,14 @@ class DeepLakeClient(VectorDB): LangChain wrapper for DeepLake API """ - def __init__(self, - collection_name: str = deeplake_conf["collection_name"], - store_path: Union[str, Path] = deeplake_conf["store_path"], - embedding_type: str = deeplake_conf["embedding_type"], - embedding_model: str = deeplake_conf["embedding_model"], - read_only: bool = False - ): + def __init__( + self, + collection_name: str = deeplake_conf["collection_name"], + store_path: Union[str, Path] = deeplake_conf["store_path"], + embedding_type: str = deeplake_conf["embedding_type"], + embedding_model: str = deeplake_conf["embedding_model"], + read_only: bool = False, + ): self.store_path = Path(store_path) self.collection_name = collection_name self.read_only = read_only @@ -50,9 +51,11 @@ def __init__(self, ).embedding_function # self.client = VectorStore(path=self.store_path / self.collection_name) - self.langchain_client = DeepLake(dataset_path=str(self.store_path / self.collection_name), - embedding=self.embedding_function, - read_only=self.read_only) + self.langchain_client = DeepLake( + dataset_path=str(self.store_path / self.collection_name), + embedding=self.embedding_function, + read_only=self.read_only, + ) self.client = self.langchain_client.vectorstore self.allowed_metadata_types = (str, int, float, bool) @@ -64,44 +67,45 @@ def delete(self) -> None: def add_docs(self, docs: List[Document], verbose=True) -> None: """Adds documents to deeplake vectorstore - + Args: docs: List of Documents verbose: Show progress bar - + Returns: None """ docs = self._filter_metadata(docs) for doc in ( - tqdm(docs, desc=f"Adding to {self.collection_name}:") if verbose else docs + tqdm(docs, desc=f"Adding to {self.collection_name}:") if verbose else docs ): _id = self.langchain_client.add_documents([doc]) async def aadd_docs(self, docs: List[Document], verbose=True) -> None: """Asynchronously adds documents to chroma vectorstore - + Args: docs: List of Documents verbose: Show progress bar - + Returns: None """ docs = self._filter_metadata(docs) if verbose: for doc in atqdm( - docs, - desc=f"Adding documents to {self.collection_name}", - total=len(docs), + docs, + desc=f"Adding documents to {self.collection_name}", + total=len(docs), ): await self.langchain_client.aadd_documents([doc]) else: for doc in docs: await self.langchain_client.aadd_documents([doc]) - def get_chunk(self, query: str, with_score: bool = False, top_k: int = None) -> Union[ - List[Document], List[Tuple[Document, float]]]: + def get_chunk( + self, query: str, with_score: bool = False, top_k: int = None + ) -> Union[List[Document], List[Tuple[Document, float]]]: """Returns the most similar chunks from the deeplake database. Args: @@ -122,8 +126,9 @@ def get_chunk(self, query: str, with_score: bool = False, top_k: int = None) -> query=query, k=top_k if top_k else 1 ) - async def aget_chunk(self, query: str, with_score=False, top_k=None) -> Union[ - List[Document], List[Tuple[Document, float]]]: + async def aget_chunk( + self, query: str, with_score=False, top_k=None + ) -> Union[List[Document], List[Tuple[Document, float]]]: """Returns the most similar chunks from the deeplake database, asynchronously. Args: diff --git a/src/tests/components/vectordb/chroma_client_test.py b/src/tests/components/vectordb/chroma_client_test.py index ecffa22..c491dfd 100644 --- a/src/tests/components/vectordb/chroma_client_test.py +++ b/src/tests/components/vectordb/chroma_client_test.py @@ -113,7 +113,9 @@ def test_chroma_get_chunk(top_k, with_score): unutterably happy, but it was possible that she would never race a horse again.""" chroma_client = ChromaClient(collection_name="test") - retrieved_chunks = chroma_client.get_chunk(query=query, top_k=top_k, with_score=with_score) + retrieved_chunks = chroma_client.get_chunk( + query=query, top_k=top_k, with_score=with_score + ) assert len(retrieved_chunks) == top_k if with_score: assert all(isinstance(doc[0], Document) for doc in retrieved_chunks) diff --git a/src/tests/components/vectordb/deeplake_client_test.py b/src/tests/components/vectordb/deeplake_client_test.py index 921bd18..cea5e61 100644 --- a/src/tests/components/vectordb/deeplake_client_test.py +++ b/src/tests/components/vectordb/deeplake_client_test.py @@ -46,7 +46,7 @@ def test_deeplake_add_docs(): docs = [Document(page_content=doc) for doc in docs] deeplake_client.add_docs(docs) assert len(deeplake_client) == len(docs) - del (deeplake_client) + del deeplake_client def test_chroma_aadd_docs(): @@ -91,7 +91,7 @@ def test_chroma_aadd_docs(): loop = asyncio.get_event_loop() loop.run_until_complete(deeplake_client.aadd_docs(docs)) assert len(deeplake_client) == len(docs) - del (deeplake_client) + del deeplake_client deeplake_get_chunk_params = [(1, False), (1, True), (2, False), (2, True)] @@ -109,14 +109,16 @@ def test_deeplake_get_chunk(top_k, with_score): unutterably happy, but it was possible that she would never race a horse again.""" deeplake_client = DeepLakeClient(collection_name="test", read_only=True) - retrieved_chunks = deeplake_client.get_chunk(query=query, top_k=top_k, with_score=with_score) + retrieved_chunks = deeplake_client.get_chunk( + query=query, top_k=top_k, with_score=with_score + ) assert len(retrieved_chunks) == top_k if with_score: assert all(isinstance(doc[0], Document) for doc in retrieved_chunks) assert all(isinstance(doc[1], float) for doc in retrieved_chunks) else: assert all(isinstance(doc, Document) for doc in retrieved_chunks) - del (deeplake_client) + del deeplake_client @pytest.mark.parametrize("top_k,with_score", deeplake_get_chunk_params) @@ -141,4 +143,4 @@ def test_deeplake_aget_chunk(top_k, with_score): assert all(isinstance(doc[1], float) for doc in retrieved_chunks) else: assert all(isinstance(doc, Document) for doc in retrieved_chunks) - del (deeplake_client) + del deeplake_client From aec73771f90cdeeaa9d4fe128fc04fb5befb786e Mon Sep 17 00:00:00 2001 From: sanchitvj Date: Fri, 22 Mar 2024 22:23:39 +0000 Subject: [PATCH 09/20] style fixes by ruff --- src/grag/quantize/quantize.py | 64 +++++++++++++++++++++++++---------- 1 file changed, 47 insertions(+), 17 deletions(-) diff --git a/src/grag/quantize/quantize.py b/src/grag/quantize/quantize.py index 2728e13..773e987 100644 --- a/src/grag/quantize/quantize.py +++ b/src/grag/quantize/quantize.py @@ -6,47 +6,73 @@ original_dir = os.getcwd() config = get_config() -root_path = config['quantize']['llama_cpp_path'] +root_path = config["quantize"]["llama_cpp_path"] def get_llamacpp_repo(): if os.path.exists(f"{root_path}/llama.cpp"): - subprocess.run([f"cd {root_path}/llama.cpp && git pull"], check=True, shell=True) + subprocess.run( + [f"cd {root_path}/llama.cpp && git pull"], check=True, shell=True + ) else: subprocess.run( [f"cd {root_path} && git clone https://github.com/ggerganov/llama.cpp.git"], - check=True, shell=True) + check=True, + shell=True, + ) def building_llama(): os.chdir(f"{root_path}/llama.cpp/") try: - subprocess.run(['which', 'make'], check=True, stdout=subprocess.DEVNULL) - subprocess.run(['make', 'LLAMA_CUBLAS=1'], check=True) - print('Llama.cpp build successfull.') + subprocess.run(["which", "make"], check=True, stdout=subprocess.DEVNULL) + subprocess.run(["make", "LLAMA_CUBLAS=1"], check=True) + print("Llama.cpp build successfull.") except subprocess.CalledProcessError: try: - subprocess.run(['which', 'cmake'], check=True, stdout=subprocess.DEVNULL) - subprocess.run(['mkdir', 'build'], check=True) + subprocess.run(["which", "cmake"], check=True, stdout=subprocess.DEVNULL) + subprocess.run(["mkdir", "build"], check=True) subprocess.run( - ['cd', 'build', '&&', 'cmake', '..', '-DLLAMA_CUBLAS=ON', '&&', 'cmake', '--build', '.', '--config', - 'Release'], shell=True, check=True) - print('Llama.cpp build successfull.') + [ + "cd", + "build", + "&&", + "cmake", + "..", + "-DLLAMA_CUBLAS=ON", + "&&", + "cmake", + "--build", + ".", + "--config", + "Release", + ], + shell=True, + check=True, + ) + print("Llama.cpp build successfull.") except subprocess.CalledProcessError: print("Unable to build, cannot find make or cmake.") os.chdir(original_dir) def fetch_model_repo(): - response = input("Do you want us to download the model? (yes/no) [Enter for yes]: ").strip().lower() + response = ( + input("Do you want us to download the model? (yes/no) [Enter for yes]: ") + .strip() + .lower() + ) if response == "no": print("Please copy the model folder to 'llama.cpp/models/' folder.") elif response == "yes" or response == "": - repo_id = input('Please enter the repo_id for the model (you can check on https://huggingface.co/models): ') + repo_id = input( + "Please enter the repo_id for the model (you can check on https://huggingface.co/models): " + ) local_dir = f"{root_path}/llama.cpp/model/{repo_id.split('/')[1]}" os.mkdir(local_dir) - snapshot_download(repo_id=repo_id, local_dir=local_dir, - local_dir_use_symlinks=False) + snapshot_download( + repo_id=repo_id, local_dir=local_dir, local_dir_use_symlinks=False + ) print(f"Model downloaded in {local_dir}") @@ -55,8 +81,12 @@ def quantize_model(quantization): subprocess.run(["python3", "convert.py", f"models/{model_dir_path}/"], check=True) model_file = f"models/{model_dir_path}/ggml-model-f16.gguf" - quantized_model_file = f"models/{model_dir_path.split('/')[-1]}/ggml-model-{quantization}.gguf" - subprocess.run(["llm_quantize", model_file, quantized_model_file, quantization], check=True) + quantized_model_file = ( + f"models/{model_dir_path.split('/')[-1]}/ggml-model-{quantization}.gguf" + ) + subprocess.run( + ["llm_quantize", model_file, quantized_model_file, quantization], check=True + ) print(f"Quantized model present at {root_path}/llama.cpp/{quantized_model_file}") os.chdir(original_dir) From 26235f561748e709cc4073cdcefddbcdc0e5e27d Mon Sep 17 00:00:00 2001 From: Arjun Bingly Date: Sat, 23 Mar 2024 15:55:47 -0400 Subject: [PATCH 10/20] Update doc strings. --- src/grag/components/vectordb/base.py | 14 +++++-- src/grag/components/vectordb/chroma_client.py | 41 +++++++++++-------- .../components/vectordb/deeplake_client.py | 39 +++++++++++------- 3 files changed, 60 insertions(+), 34 deletions(-) diff --git a/src/grag/components/vectordb/base.py b/src/grag/components/vectordb/base.py index c474232..1146d77 100644 --- a/src/grag/components/vectordb/base.py +++ b/src/grag/components/vectordb/base.py @@ -1,3 +1,9 @@ +"""Abstract base class for vector database clients. + +This module provides: +- VectorDB +""" + from abc import ABC, abstractmethod from typing import List, Tuple, Union @@ -6,6 +12,8 @@ class VectorDB(ABC): + """Abstract base class for vector database clients.""" + @abstractmethod def __len__(self) -> int: """Number of chunks in the vector database.""" @@ -43,7 +51,7 @@ async def aadd_docs(self, docs: List[Document], verbose: bool = True) -> None: @abstractmethod def get_chunk( - self, query: str, with_score: bool = False, top_k: int = None + self, query: str, with_score: bool = False, top_k: int = None ) -> Union[List[Document], List[Tuple[Document, float]]]: """Returns the most similar chunks from the vector database. @@ -59,9 +67,9 @@ def get_chunk( @abstractmethod async def aget_chunk( - self, query: str, with_score: bool = False, top_k: int = None + self, query: str, with_score: bool = False, top_k: int = None ) -> Union[List[Document], List[Tuple[Document, float]]]: - """Returns the most similar chunks from the vector database. (asynchronous) + """Returns the most similar chunks from the vector database (asynchronous). Args: query: A query string diff --git a/src/grag/components/vectordb/chroma_client.py b/src/grag/components/vectordb/chroma_client.py index ef9091f..105882c 100644 --- a/src/grag/components/vectordb/chroma_client.py +++ b/src/grag/components/vectordb/chroma_client.py @@ -1,3 +1,8 @@ +"""Class for Chroma vector database. + +This module provides: +- ChromaClient +""" from typing import List, Tuple, Union import chromadb @@ -37,14 +42,16 @@ class ChromaClient(VectorDB): """ def __init__( - self, - host=chroma_conf["host"], - port=chroma_conf["port"], - collection_name=chroma_conf["collection_name"], - embedding_type=chroma_conf["embedding_type"], - embedding_model=chroma_conf["embedding_model"], + self, + host=chroma_conf["host"], + port=chroma_conf["port"], + collection_name=chroma_conf["collection_name"], + embedding_type=chroma_conf["embedding_type"], + embedding_model=chroma_conf["embedding_model"], ): - """Args: + """Initialize a ChromaClient object. + + Args: host: IP Address of hosted Chroma Vectorstore, defaults to argument from config file port: port address of hosted Chroma Vectorstore, defaults to argument from config file collection_name: name of the collection in the Chroma Vectorstore, defaults to argument from config file @@ -73,9 +80,11 @@ def __init__( self.allowed_metadata_types = (str, int, float, bool) def __len__(self) -> int: + """Count the number of chunks in the database.""" return self.collection.count() def delete(self) -> None: + """Delete all the chunks in the database collection.""" self.client.delete_collection(self.collection_name) self.collection = self.client.get_or_create_collection( name=self.collection_name @@ -87,7 +96,7 @@ def delete(self) -> None: ) def test_connection(self, verbose=True) -> int: - """Tests connection with Chroma Vectorstore + """Tests connection with Chroma Vectorstore. Args: verbose: if True, prints connection status @@ -104,7 +113,7 @@ def test_connection(self, verbose=True) -> int: return response def add_docs(self, docs: List[Document], verbose=True) -> None: - """Adds documents to chroma vectorstore + """Adds documents to chroma vectorstore. Args: docs: List of Documents @@ -115,12 +124,12 @@ def add_docs(self, docs: List[Document], verbose=True) -> None: """ docs = self._filter_metadata(docs) for doc in ( - tqdm(docs, desc=f"Adding to {self.collection_name}:") if verbose else docs + tqdm(docs, desc=f"Adding to {self.collection_name}:") if verbose else docs ): _id = self.langchain_client.add_documents([doc]) async def aadd_docs(self, docs: List[Document], verbose=True) -> None: - """Asynchronously adds documents to chroma vectorstore + """Asynchronously adds documents to chroma vectorstore. Args: docs: List of Documents @@ -132,9 +141,9 @@ async def aadd_docs(self, docs: List[Document], verbose=True) -> None: docs = self._filter_metadata(docs) if verbose: for doc in atqdm( - docs, - desc=f"Adding documents to {self.collection_name}", - total=len(docs), + docs, + desc=f"Adding documents to {self.collection_name}", + total=len(docs), ): await self.langchain_client.aadd_documents([doc]) else: @@ -142,7 +151,7 @@ async def aadd_docs(self, docs: List[Document], verbose=True) -> None: await self.langchain_client.aadd_documents([doc]) def get_chunk( - self, query: str, with_score: bool = False, top_k: int = None + self, query: str, with_score: bool = False, top_k: int = None ) -> Union[List[Document], List[Tuple[Document, float]]]: """Returns the most similar chunks from the chroma database. @@ -165,7 +174,7 @@ def get_chunk( ) async def aget_chunk( - self, query: str, with_score=False, top_k=None + self, query: str, with_score=False, top_k=None ) -> Union[List[Document], List[Tuple[Document, float]]]: """Returns the most (cosine) similar chunks from the vector database, asynchronously. diff --git a/src/grag/components/vectordb/deeplake_client.py b/src/grag/components/vectordb/deeplake_client.py index 3a389c3..2cb0270 100644 --- a/src/grag/components/vectordb/deeplake_client.py +++ b/src/grag/components/vectordb/deeplake_client.py @@ -1,3 +1,9 @@ +"""Class for DeepLake vector database. + +This module provides: +- DeepLakeClient +""" + from pathlib import Path from typing import List, Tuple, Union @@ -13,7 +19,7 @@ class DeepLakeClient(VectorDB): - """A class for connecting to a DeepLake Vectorstore + """A class for connecting to a DeepLake Vectorstore. Attributes: store_path : str, Path @@ -33,13 +39,14 @@ class DeepLakeClient(VectorDB): """ def __init__( - self, - collection_name: str = deeplake_conf["collection_name"], - store_path: Union[str, Path] = deeplake_conf["store_path"], - embedding_type: str = deeplake_conf["embedding_type"], - embedding_model: str = deeplake_conf["embedding_model"], - read_only: bool = False, + self, + collection_name: str = deeplake_conf["collection_name"], + store_path: Union[str, Path] = deeplake_conf["store_path"], + embedding_type: str = deeplake_conf["embedding_type"], + embedding_model: str = deeplake_conf["embedding_model"], + read_only: bool = False, ): + """Initialize DeepLake client object.""" self.store_path = Path(store_path) self.collection_name = collection_name self.read_only = read_only @@ -60,13 +67,15 @@ def __init__( self.allowed_metadata_types = (str, int, float, bool) def __len__(self) -> int: + """Number of chunks in the vector database.""" return self.client.__len__() def delete(self) -> None: + """Delete all chunks in the vector database.""" self.client.delete(delete_all=True) def add_docs(self, docs: List[Document], verbose=True) -> None: - """Adds documents to deeplake vectorstore + """Adds documents to deeplake vectorstore. Args: docs: List of Documents @@ -77,12 +86,12 @@ def add_docs(self, docs: List[Document], verbose=True) -> None: """ docs = self._filter_metadata(docs) for doc in ( - tqdm(docs, desc=f"Adding to {self.collection_name}:") if verbose else docs + tqdm(docs, desc=f"Adding to {self.collection_name}:") if verbose else docs ): _id = self.langchain_client.add_documents([doc]) async def aadd_docs(self, docs: List[Document], verbose=True) -> None: - """Asynchronously adds documents to chroma vectorstore + """Asynchronously adds documents to chroma vectorstore. Args: docs: List of Documents @@ -94,9 +103,9 @@ async def aadd_docs(self, docs: List[Document], verbose=True) -> None: docs = self._filter_metadata(docs) if verbose: for doc in atqdm( - docs, - desc=f"Adding documents to {self.collection_name}", - total=len(docs), + docs, + desc=f"Adding documents to {self.collection_name}", + total=len(docs), ): await self.langchain_client.aadd_documents([doc]) else: @@ -104,7 +113,7 @@ async def aadd_docs(self, docs: List[Document], verbose=True) -> None: await self.langchain_client.aadd_documents([doc]) def get_chunk( - self, query: str, with_score: bool = False, top_k: int = None + self, query: str, with_score: bool = False, top_k: int = None ) -> Union[List[Document], List[Tuple[Document, float]]]: """Returns the most similar chunks from the deeplake database. @@ -127,7 +136,7 @@ def get_chunk( ) async def aget_chunk( - self, query: str, with_score=False, top_k=None + self, query: str, with_score=False, top_k=None ) -> Union[List[Document], List[Tuple[Document, float]]]: """Returns the most similar chunks from the deeplake database, asynchronously. From 8e78f75a4dec4a9fcebec9fb89052b05c97f62c5 Mon Sep 17 00:00:00 2001 From: sanchitvj Date: Sat, 23 Mar 2024 18:49:45 -0400 Subject: [PATCH 11/20] quantize file --- src/grag/quantize/__init__.py | 0 src/grag/quantize/quantize.py | 104 +++++++++++----------------------- src/grag/quantize/utils.py | 76 +++++++++++++++++++++++++ 3 files changed, 109 insertions(+), 71 deletions(-) create mode 100644 src/grag/quantize/__init__.py create mode 100644 src/grag/quantize/utils.py diff --git a/src/grag/quantize/__init__.py b/src/grag/quantize/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/grag/quantize/quantize.py b/src/grag/quantize/quantize.py index 2728e13..02065a9 100644 --- a/src/grag/quantize/quantize.py +++ b/src/grag/quantize/quantize.py @@ -1,76 +1,38 @@ -import os -import subprocess - from grag.components.utils import get_config -from huggingface_hub import snapshot_download +from grag.quantize.utils import ( + building_llama, + fetch_model_repo, + get_llamacpp_repo, + quantize_model, +) -original_dir = os.getcwd() config = get_config() root_path = config['quantize']['llama_cpp_path'] - -def get_llamacpp_repo(): - if os.path.exists(f"{root_path}/llama.cpp"): - subprocess.run([f"cd {root_path}/llama.cpp && git pull"], check=True, shell=True) - else: - subprocess.run( - [f"cd {root_path} && git clone https://github.com/ggerganov/llama.cpp.git"], - check=True, shell=True) - - -def building_llama(): - os.chdir(f"{root_path}/llama.cpp/") - try: - subprocess.run(['which', 'make'], check=True, stdout=subprocess.DEVNULL) - subprocess.run(['make', 'LLAMA_CUBLAS=1'], check=True) - print('Llama.cpp build successfull.') - except subprocess.CalledProcessError: - try: - subprocess.run(['which', 'cmake'], check=True, stdout=subprocess.DEVNULL) - subprocess.run(['mkdir', 'build'], check=True) - subprocess.run( - ['cd', 'build', '&&', 'cmake', '..', '-DLLAMA_CUBLAS=ON', '&&', 'cmake', '--build', '.', '--config', - 'Release'], shell=True, check=True) - print('Llama.cpp build successfull.') - except subprocess.CalledProcessError: - print("Unable to build, cannot find make or cmake.") - os.chdir(original_dir) - - -def fetch_model_repo(): - response = input("Do you want us to download the model? (yes/no) [Enter for yes]: ").strip().lower() - if response == "no": - print("Please copy the model folder to 'llama.cpp/models/' folder.") - elif response == "yes" or response == "": - repo_id = input('Please enter the repo_id for the model (you can check on https://huggingface.co/models): ') - local_dir = f"{root_path}/llama.cpp/model/{repo_id.split('/')[1]}" - os.mkdir(local_dir) - snapshot_download(repo_id=repo_id, local_dir=local_dir, - local_dir_use_symlinks=False) - print(f"Model downloaded in {local_dir}") - - -def quantize_model(quantization): - os.chdir(f"{root_path}/llama.cpp/") - subprocess.run(["python3", "convert.py", f"models/{model_dir_path}/"], check=True) - - model_file = f"models/{model_dir_path}/ggml-model-f16.gguf" - quantized_model_file = f"models/{model_dir_path.split('/')[-1]}/ggml-model-{quantization}.gguf" - subprocess.run(["llm_quantize", model_file, quantized_model_file, quantization], check=True) - print(f"Quantized model present at {root_path}/llama.cpp/{quantized_model_file}") - os.chdir(original_dir) - - -if __name__ == "__main__": - get_llamacpp_repo() - building_llama() - fetch_model_repo() - - quantization = input("Enter quantization: ") - quantize_model(quantization) - # if len(sys.argv) < 2 or len(sys.argv) > 3: - # print("Usage: python script.py []") - # sys.exit(1) - # model_dir_path = sys.argv[1] - # quantization = sys.argv[2] if len(sys.argv) == 3 else None - # execute_commands(model_dir_path, quantization) +user_input = input( + "Enter the path to the llama_cpp cloned repo, or where you'd like to clone it. Press Enter to use the default config path: ").strip() + +if user_input != "": + root_path = user_input + +res = get_llamacpp_repo(root_path) + +if "Already up to date." in res.stdout: + print("Repository is already up to date. Skipping build.") +else: + print("Updates found. Starting build...") + building_llama(root_path) + +response = input("Do you want us to download the model? (y/n) [Enter for yes]: ").strip().lower() +if response == "n": + print("Please copy the model folder to 'llama.cpp/models/' folder.") + _ = input("Enter if you have already copied the model:") + model_dir = input("Enter the model directory name: ") +elif response == "y" or response == "": + repo_id = input('Please enter the repo_id for the model (you can check on https://huggingface.co/models): ').strip() + fetch_model_repo(repo_id, root_path) + model_dir = repo_id.split('/')[1] + +quantization = input( + "Enter quantization, recommended - Q5_K_M or Q4_K_M for more check https://github.com/ggerganov/llama.cpp/blob/master/examples/quantize/quantize.cpp#L19 : ") +quantize_model(model_dir, quantization, root_path) diff --git a/src/grag/quantize/utils.py b/src/grag/quantize/utils.py new file mode 100644 index 0000000..3df3c1a --- /dev/null +++ b/src/grag/quantize/utils.py @@ -0,0 +1,76 @@ +import os +import subprocess +from pathlib import Path + +from huggingface_hub import snapshot_download + + +def get_llamacpp_repo(root_path: str) -> None: + """Clones or pulls the llama.cpp repository into the specified root path. + + Args: + root_path (str): The root directory where the llama.cpp repository will be cloned or updated. + """ + if os.path.exists(f"{root_path}/llama.cpp"): + print(f"Repo exists at: {root_path}/llama.cpp") + res = subprocess.run([f"cd {root_path}/llama.cpp && git pull"], check=True, shell=True, capture_output=True) + else: + + subprocess.run( + [f"cd {root_path} && git clone https://github.com/ggerganov/llama.cpp.git"], + check=True, shell=True) + + +def building_llama(root_path: str) -> None: + """Attempts to build the llama.cpp project using make or cmake. + + Args: + root_path (str): The root directory where the llama.cpp project is located. + """ + os.chdir(f"{root_path}/llama.cpp/") + try: + subprocess.run(['which', 'make'], check=True, stdout=subprocess.DEVNULL) + subprocess.run(['make', 'LLAMA_CUBLAS=1'], check=True) + print('Llama.cpp build successful.') + except subprocess.CalledProcessError: + try: + subprocess.run(['which', 'cmake'], check=True, stdout=subprocess.DEVNULL) + subprocess.run(['mkdir', 'build'], check=True) + subprocess.run( + ['cd', 'build', '&&', 'cmake', '..', '-DLLAMA_CUBLAS=ON', '&&', 'cmake', '--build', '.', '--config', + 'Release'], shell=True, check=True) + print('Llama.cpp build successful.') + except subprocess.CalledProcessError: + print("Unable to build, cannot find make or cmake.") + finally: + os.chdir(Path(__file__).parent) # Assuming you want to return to the root path after operation + + +def fetch_model_repo(repo_id: str, root_path: str) -> None: + """Download model from huggingface.co/models. + + Args: + repo_id (str): Repository ID of the model to download. + root_path (str): The root path where the model should be downloaded or copied. + """ + local_dir = f"{root_path}/llama.cpp/model/{repo_id.split('/')[1]}" + os.mkdir(local_dir) + snapshot_download(repo_id=repo_id, local_dir=local_dir, local_dir_use_symlinks=False) + print(f"Model downloaded in {local_dir}") + + +def quantize_model(model_dir_path: str, quantization: str, root_path: str) -> None: + """Quantizes a specified model using a given quantization level. + + Args: + model_dir_path (str): The directory path of the model to be quantized. + quantization (str): The quantization level to apply. + root_path (str): The root directory path of the project. + """ + os.chdir(f"{root_path}/llama.cpp/") + subprocess.run(["python3", "convert.py", f"models/{model_dir_path}/"], check=True) + model_file = f"models/{model_dir_path}/ggml-model-f16.gguf" + quantized_model_file = f"models/{model_dir_path.split('/')[-1]}/ggml-model-{quantization}.gguf" + subprocess.run(["llm_quantize", model_file, quantized_model_file, quantization], check=True) + print(f"Quantized model present at {root_path}/llama.cpp/{quantized_model_file}") + os.chdir(Path(__file__).parent) # Return to the root path after operation From 11697c006bac3865203cc242c6841a024ec1f52a Mon Sep 17 00:00:00 2001 From: sanchitvj Date: Sat, 23 Mar 2024 18:55:11 -0400 Subject: [PATCH 12/20] Revert "Merge branch 'quantize' of https://github.com/arjbingly/Capstone_5 into quantize" This reverts commit 79ebf3ae4bbc634f075d51791b1442569b1cd03a, reversing changes made to 8e78f75a4dec4a9fcebec9fb89052b05c97f62c5. --- src/grag/quantize/quantize.py | 102 +--------------------------------- 1 file changed, 1 insertion(+), 101 deletions(-) diff --git a/src/grag/quantize/quantize.py b/src/grag/quantize/quantize.py index c013b07..02065a9 100644 --- a/src/grag/quantize/quantize.py +++ b/src/grag/quantize/quantize.py @@ -7,70 +7,21 @@ ) config = get_config() -root_path = config["quantize"]["llama_cpp_path"] +root_path = config['quantize']['llama_cpp_path'] user_input = input( "Enter the path to the llama_cpp cloned repo, or where you'd like to clone it. Press Enter to use the default config path: ").strip() -<<<<<<< HEAD if user_input != "": root_path = user_input -======= -def get_llamacpp_repo(): - if os.path.exists(f"{root_path}/llama.cpp"): - subprocess.run( - [f"cd {root_path}/llama.cpp && git pull"], check=True, shell=True - ) - else: - subprocess.run( - [f"cd {root_path} && git clone https://github.com/ggerganov/llama.cpp.git"], - check=True, - shell=True, - ) ->>>>>>> aec73771f90cdeeaa9d4fe128fc04fb5befb786e res = get_llamacpp_repo(root_path) -<<<<<<< HEAD if "Already up to date." in res.stdout: print("Repository is already up to date. Skipping build.") else: print("Updates found. Starting build...") building_llama(root_path) -======= -def building_llama(): - os.chdir(f"{root_path}/llama.cpp/") - try: - subprocess.run(["which", "make"], check=True, stdout=subprocess.DEVNULL) - subprocess.run(["make", "LLAMA_CUBLAS=1"], check=True) - print("Llama.cpp build successfull.") - except subprocess.CalledProcessError: - try: - subprocess.run(["which", "cmake"], check=True, stdout=subprocess.DEVNULL) - subprocess.run(["mkdir", "build"], check=True) - subprocess.run( - [ - "cd", - "build", - "&&", - "cmake", - "..", - "-DLLAMA_CUBLAS=ON", - "&&", - "cmake", - "--build", - ".", - "--config", - "Release", - ], - shell=True, - check=True, - ) - print("Llama.cpp build successfull.") - except subprocess.CalledProcessError: - print("Unable to build, cannot find make or cmake.") - os.chdir(original_dir) ->>>>>>> aec73771f90cdeeaa9d4fe128fc04fb5befb786e response = input("Do you want us to download the model? (y/n) [Enter for yes]: ").strip().lower() if response == "n": @@ -82,57 +33,6 @@ def building_llama(): fetch_model_repo(repo_id, root_path) model_dir = repo_id.split('/')[1] -<<<<<<< HEAD quantization = input( "Enter quantization, recommended - Q5_K_M or Q4_K_M for more check https://github.com/ggerganov/llama.cpp/blob/master/examples/quantize/quantize.cpp#L19 : ") quantize_model(model_dir, quantization, root_path) -======= -def fetch_model_repo(): - response = ( - input("Do you want us to download the model? (yes/no) [Enter for yes]: ") - .strip() - .lower() - ) - if response == "no": - print("Please copy the model folder to 'llama.cpp/models/' folder.") - elif response == "yes" or response == "": - repo_id = input( - "Please enter the repo_id for the model (you can check on https://huggingface.co/models): " - ) - local_dir = f"{root_path}/llama.cpp/model/{repo_id.split('/')[1]}" - os.mkdir(local_dir) - snapshot_download( - repo_id=repo_id, local_dir=local_dir, local_dir_use_symlinks=False - ) - print(f"Model downloaded in {local_dir}") - - -def quantize_model(quantization): - os.chdir(f"{root_path}/llama.cpp/") - subprocess.run(["python3", "convert.py", f"models/{model_dir_path}/"], check=True) - - model_file = f"models/{model_dir_path}/ggml-model-f16.gguf" - quantized_model_file = ( - f"models/{model_dir_path.split('/')[-1]}/ggml-model-{quantization}.gguf" - ) - subprocess.run( - ["llm_quantize", model_file, quantized_model_file, quantization], check=True - ) - print(f"Quantized model present at {root_path}/llama.cpp/{quantized_model_file}") - os.chdir(original_dir) - - -if __name__ == "__main__": - get_llamacpp_repo() - building_llama() - fetch_model_repo() - - quantization = input("Enter quantization: ") - quantize_model(quantization) - # if len(sys.argv) < 2 or len(sys.argv) > 3: - # print("Usage: python script.py []") - # sys.exit(1) - # model_dir_path = sys.argv[1] - # quantization = sys.argv[2] if len(sys.argv) == 3 else None - # execute_commands(model_dir_path, quantization) ->>>>>>> aec73771f90cdeeaa9d4fe128fc04fb5befb786e From 1bb12163f584150286714344082d60280113f9d1 Mon Sep 17 00:00:00 2001 From: sanchitvj Date: Sat, 23 Mar 2024 20:12:26 -0400 Subject: [PATCH 13/20] rectified quantization, issue with llama.cpp --- src/grag/quantize/quantize.py | 2 +- src/grag/quantize/utils.py | 17 +++++++++-------- src/tests/quantize/__init__.py | 0 src/tests/quantize/quantize_test.py | 0 4 files changed, 10 insertions(+), 9 deletions(-) create mode 100644 src/tests/quantize/__init__.py create mode 100644 src/tests/quantize/quantize_test.py diff --git a/src/grag/quantize/quantize.py b/src/grag/quantize/quantize.py index 02065a9..8e42117 100644 --- a/src/grag/quantize/quantize.py +++ b/src/grag/quantize/quantize.py @@ -17,7 +17,7 @@ res = get_llamacpp_repo(root_path) -if "Already up to date." in res.stdout: +if "Already up to date." in str(res.stdout): print("Repository is already up to date. Skipping build.") else: print("Updates found. Starting build...") diff --git a/src/grag/quantize/utils.py b/src/grag/quantize/utils.py index 3df3c1a..661fb65 100644 --- a/src/grag/quantize/utils.py +++ b/src/grag/quantize/utils.py @@ -15,10 +15,11 @@ def get_llamacpp_repo(root_path: str) -> None: print(f"Repo exists at: {root_path}/llama.cpp") res = subprocess.run([f"cd {root_path}/llama.cpp && git pull"], check=True, shell=True, capture_output=True) else: - - subprocess.run( + res = subprocess.run( [f"cd {root_path} && git clone https://github.com/ggerganov/llama.cpp.git"], - check=True, shell=True) + check=True, shell=True, capture_output=True) + + return res def building_llama(root_path: str) -> None: @@ -53,9 +54,9 @@ def fetch_model_repo(repo_id: str, root_path: str) -> None: repo_id (str): Repository ID of the model to download. root_path (str): The root path where the model should be downloaded or copied. """ - local_dir = f"{root_path}/llama.cpp/model/{repo_id.split('/')[1]}" - os.mkdir(local_dir) - snapshot_download(repo_id=repo_id, local_dir=local_dir, local_dir_use_symlinks=False) + local_dir = f"{root_path}/llama.cpp/models/{repo_id.split('/')[1]}" + os.makedirs(local_dir, exist_ok=True) + snapshot_download(repo_id=repo_id, local_dir=local_dir, local_dir_use_symlinks=auto, resume_download=True) print(f"Model downloaded in {local_dir}") @@ -69,8 +70,8 @@ def quantize_model(model_dir_path: str, quantization: str, root_path: str) -> No """ os.chdir(f"{root_path}/llama.cpp/") subprocess.run(["python3", "convert.py", f"models/{model_dir_path}/"], check=True) - model_file = f"models/{model_dir_path}/ggml-model-f16.gguf" + model_file = f"models/{model_dir_path}/ggml-model-f32.gguf" quantized_model_file = f"models/{model_dir_path.split('/')[-1]}/ggml-model-{quantization}.gguf" - subprocess.run(["llm_quantize", model_file, quantized_model_file, quantization], check=True) + subprocess.run(["quantize", model_file, quantized_model_file, quantization], check=True) print(f"Quantized model present at {root_path}/llama.cpp/{quantized_model_file}") os.chdir(Path(__file__).parent) # Return to the root path after operation diff --git a/src/tests/quantize/__init__.py b/src/tests/quantize/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/tests/quantize/quantize_test.py b/src/tests/quantize/quantize_test.py new file mode 100644 index 0000000..e69de29 From a7354ee7be3dadeff6a596e88a4b16e36cccbb69 Mon Sep 17 00:00:00 2001 From: sanchitvj Date: Sun, 24 Mar 2024 15:00:14 -0400 Subject: [PATCH 14/20] issue in llama.cpp --- llm_quantize/quantize.py | 4 ++-- src/grag/quantize/quantize.py | 1 + src/grag/quantize/utils.py | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/llm_quantize/quantize.py b/llm_quantize/quantize.py index 7fb1c24..708b6c8 100644 --- a/llm_quantize/quantize.py +++ b/llm_quantize/quantize.py @@ -1,6 +1,6 @@ +import os import subprocess import sys -import os def execute_commands(model_dir_path, quantization=None): @@ -13,7 +13,7 @@ def execute_commands(model_dir_path, quantization=None): if quantization: model_file = f"llama.cpp/models/{model_dir_path}/ggml-model-f16.gguf" quantized_model_file = f"llama.cpp/models/{model_dir_path.split('/')[-1]}/ggml-model-{quantization}.gguf" - subprocess.run(["llama.cpp/llm_quantize", model_file, quantized_model_file, quantization], check=True) + subprocess.run(["llama.cpp/quantize", model_file, quantized_model_file, quantization], check=True) else: print("llama.cpp doesn't exist, check readme how to clone.") diff --git a/src/grag/quantize/quantize.py b/src/grag/quantize/quantize.py index 8e42117..d05990c 100644 --- a/src/grag/quantize/quantize.py +++ b/src/grag/quantize/quantize.py @@ -15,6 +15,7 @@ if user_input != "": root_path = user_input +# noinspection PyNoneFunctionAssignment res = get_llamacpp_repo(root_path) if "Already up to date." in str(res.stdout): diff --git a/src/grag/quantize/utils.py b/src/grag/quantize/utils.py index 661fb65..7e2b92f 100644 --- a/src/grag/quantize/utils.py +++ b/src/grag/quantize/utils.py @@ -72,6 +72,6 @@ def quantize_model(model_dir_path: str, quantization: str, root_path: str) -> No subprocess.run(["python3", "convert.py", f"models/{model_dir_path}/"], check=True) model_file = f"models/{model_dir_path}/ggml-model-f32.gguf" quantized_model_file = f"models/{model_dir_path.split('/')[-1]}/ggml-model-{quantization}.gguf" - subprocess.run(["quantize", model_file, quantized_model_file, quantization], check=True) + subprocess.run(["./quantize", model_file, quantized_model_file, quantization], check=True) print(f"Quantized model present at {root_path}/llama.cpp/{quantized_model_file}") os.chdir(Path(__file__).parent) # Return to the root path after operation From caebf0a3eb19e681c08804e67a3cc6ce8d8ade0a Mon Sep 17 00:00:00 2001 From: Arjun Bingly Date: Sun, 24 Mar 2024 15:29:54 -0400 Subject: [PATCH 15/20] Config changes for deeplake --- src/config.ini | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/config.ini b/src/config.ini index 452ac04..eb3cab2 100644 --- a/src/config.ini +++ b/src/config.ini @@ -14,6 +14,12 @@ n_gpu_layers_cpp : -1 std_out : True base_dir : ${root:root_path}/models +[deeplake] +collection_name : arxiv +embedding_type : instructor-embedding +embedding_model : hkunlp/instructor-xl +store_path : ${data:data_path}/vectordb + [chroma] host : localhost port : 8000 @@ -51,4 +57,4 @@ table_as_html : True data_path : ${root:root_path}/data [root] -root_path : /home/ubuntu/volume_2k/Capstone_5 \ No newline at end of file +root_path : /home/ubuntu/volume_2k/Capstone_5 From 66c06d0fb8e53daf69b967c7c7c102976d5bfd48 Mon Sep 17 00:00:00 2001 From: sanchitvj Date: Sun, 24 Mar 2024 17:05:47 -0400 Subject: [PATCH 16/20] modifications and corrections after testing --- src/grag/quantize/quantize.py | 5 ++-- src/grag/quantize/utils.py | 43 +++++++++++++++++++++++------------ 2 files changed, 31 insertions(+), 17 deletions(-) diff --git a/src/grag/quantize/quantize.py b/src/grag/quantize/quantize.py index d05990c..68168a2 100644 --- a/src/grag/quantize/quantize.py +++ b/src/grag/quantize/quantize.py @@ -1,6 +1,6 @@ from grag.components.utils import get_config from grag.quantize.utils import ( - building_llama, + building_llamacpp, fetch_model_repo, get_llamacpp_repo, quantize_model, @@ -15,14 +15,13 @@ if user_input != "": root_path = user_input -# noinspection PyNoneFunctionAssignment res = get_llamacpp_repo(root_path) if "Already up to date." in str(res.stdout): print("Repository is already up to date. Skipping build.") else: print("Updates found. Starting build...") - building_llama(root_path) + building_llamacpp(root_path) response = input("Do you want us to download the model? (y/n) [Enter for yes]: ").strip().lower() if response == "n": diff --git a/src/grag/quantize/utils.py b/src/grag/quantize/utils.py index 7e2b92f..8d9d5bc 100644 --- a/src/grag/quantize/utils.py +++ b/src/grag/quantize/utils.py @@ -1,28 +1,34 @@ import os import subprocess from pathlib import Path +from typing import Optional, Union +from grag.components.utils import get_config from huggingface_hub import snapshot_download +config = get_config() -def get_llamacpp_repo(root_path: str) -> None: + +def get_llamacpp_repo(root_path: str) -> subprocess.CompletedProcess: """Clones or pulls the llama.cpp repository into the specified root path. Args: - root_path (str): The root directory where the llama.cpp repository will be cloned or updated. + root_path: The root directory where the llama.cpp repository will be cloned or updated. + + Returns: + A subprocess.CompletedProcess instance containing the result of the git operation. """ if os.path.exists(f"{root_path}/llama.cpp"): print(f"Repo exists at: {root_path}/llama.cpp") - res = subprocess.run([f"cd {root_path}/llama.cpp && git pull"], check=True, shell=True, capture_output=True) + res = subprocess.run(["git", "-C", f"{root_path}/llama.cpp", "pull"], check=True, capture_output=True) else: - res = subprocess.run( - [f"cd {root_path} && git clone https://github.com/ggerganov/llama.cpp.git"], - check=True, shell=True, capture_output=True) + res = subprocess.run(["git", "clone", "https://github.com/ggerganov/llama.cpp.git", f"{root_path}/llama.cpp"], + check=True, capture_output=True) return res -def building_llama(root_path: str) -> None: +def building_llamacpp(root_path: str) -> None: """Attempts to build the llama.cpp project using make or cmake. Args: @@ -56,22 +62,31 @@ def fetch_model_repo(repo_id: str, root_path: str) -> None: """ local_dir = f"{root_path}/llama.cpp/models/{repo_id.split('/')[1]}" os.makedirs(local_dir, exist_ok=True) - snapshot_download(repo_id=repo_id, local_dir=local_dir, local_dir_use_symlinks=auto, resume_download=True) + snapshot_download(repo_id=repo_id, local_dir=local_dir, local_dir_use_symlinks='auto', resume_download=True) print(f"Model downloaded in {local_dir}") -def quantize_model(model_dir_path: str, quantization: str, root_path: str) -> None: +def quantize_model(model_dir_path: str, quantization: str, root_path: str, + output_dir: Optional[Union[str, Path]] = None) -> None: """Quantizes a specified model using a given quantization level. Args: + output_dir (str, optional): Directory to save quantized model. Defaults to None model_dir_path (str): The directory path of the model to be quantized. quantization (str): The quantization level to apply. root_path (str): The root directory path of the project. """ os.chdir(f"{root_path}/llama.cpp/") - subprocess.run(["python3", "convert.py", f"models/{model_dir_path}/"], check=True) - model_file = f"models/{model_dir_path}/ggml-model-f32.gguf" - quantized_model_file = f"models/{model_dir_path.split('/')[-1]}/ggml-model-{quantization}.gguf" - subprocess.run(["./quantize", model_file, quantized_model_file, quantization], check=True) - print(f"Quantized model present at {root_path}/llama.cpp/{quantized_model_file}") + model_dir_path = Path(model_dir_path) + if output_dir is None: + output_dir = config['llm']['base_dir'] + + output_dir = Path(output_dir) / model_dir_path.name + os.makedirs(output_dir, exist_ok=True) + + subprocess.run(["python3", "convert.py", f"{model_dir_path}/"], check=True) + model_file = model_dir_path / "ggml-model-f32.gguf" + quantized_model_file = output_dir / f"ggml-model-{quantization}.gguf" + subprocess.run(["./quantize", str(model_file), str(quantized_model_file), quantization], check=True) + print(f"Quantized model present at {output_dir}") os.chdir(Path(__file__).parent) # Return to the root path after operation From f94114e4264eabe6952646062a6574cc954693e7 Mon Sep 17 00:00:00 2001 From: Arjun Bingly Date: Sun, 24 Mar 2024 17:50:26 -0400 Subject: [PATCH 17/20] Retriever update --- projects/Basic-RAG/BasicRAG_stuff.py | 8 +- src/grag/components/multivec_retriever.py | 24 +++-- src/grag/rag/basic_rag.py | 28 +++--- .../components/multivec_retriever_test.py | 89 +++++++++++++++++++ 4 files changed, 127 insertions(+), 22 deletions(-) diff --git a/projects/Basic-RAG/BasicRAG_stuff.py b/projects/Basic-RAG/BasicRAG_stuff.py index 4bfafc3..63edeab 100644 --- a/projects/Basic-RAG/BasicRAG_stuff.py +++ b/projects/Basic-RAG/BasicRAG_stuff.py @@ -1,6 +1,10 @@ -from grag.grag.rag import BasicRAG +from grag.components.multivec_retriever import Retriever +from grag.components.vectordb.deeplake_client import DeepLakeClient +from grag.rag.basic_rag import BasicRAG -rag = BasicRAG(doc_chain="stuff") +client = DeepLakeClient(collection_name="test") +retriever = Retriever(vectordb=client) +rag = BasicRAG(doc_chain="stuff", retriever=retriever) if __name__ == "__main__": while True: diff --git a/src/grag/components/multivec_retriever.py b/src/grag/components/multivec_retriever.py index 98b3e6e..b0eeac7 100644 --- a/src/grag/components/multivec_retriever.py +++ b/src/grag/components/multivec_retriever.py @@ -1,11 +1,11 @@ import asyncio import uuid -from typing import List +from typing import Any, Dict, List, Optional from grag.components.text_splitter import TextSplitter from grag.components.utils import get_config from grag.components.vectordb.base import VectorDB -from grag.components.vectordb.chroma_client import ChromaClient +from grag.components.vectordb.deeplake_client import DeepLakeClient from langchain.retrievers.multi_vector import MultiVectorRetriever from langchain.storage import LocalFileStore from langchain_core.documents import Document @@ -31,11 +31,13 @@ class Retriever: """ def __init__( - self, - store_path: str = multivec_retriever_conf["store_path"], - id_key: str = multivec_retriever_conf["id_key"], - namespace: str = multivec_retriever_conf["namespace"], - top_k=1, + self, + vectordb: Optional[VectorDB] = None, + store_path: str = multivec_retriever_conf["store_path"], + id_key: str = multivec_retriever_conf["id_key"], + namespace: str = multivec_retriever_conf["namespace"], + top_k=1, + client_kwargs: Optional[Dict[str, Any]] = None ): """Args: store_path: Path to the local file store, defaults to argument from config file @@ -46,7 +48,13 @@ def __init__( self.store_path = store_path self.id_key = id_key self.namespace = uuid.UUID(namespace) - self.vectordb: VectorDB = ChromaClient() # TODO - change to init argument + if vectordb is None: + if client_kwargs is not None: + self.vectordb = DeepLakeClient(**client_kwargs) + else: + self.vectordb = DeepLakeClient() + else: + self.vectordb = vectordb self.store = LocalFileStore(self.store_path) self.retriever = MultiVectorRetriever( vectorstore=self.vectordb.langchain_client, diff --git a/src/grag/rag/basic_rag.py b/src/grag/rag/basic_rag.py index 9589920..1b344ea 100644 --- a/src/grag/rag/basic_rag.py +++ b/src/grag/rag/basic_rag.py @@ -1,10 +1,10 @@ import json -from typing import List, Union +from typing import List, Optional, Union from grag import prompts from grag.components.llm import LLM from grag.components.multivec_retriever import Retriever -from grag.components.prompt import Prompt, FewShotPrompt +from grag.components.prompt import FewShotPrompt, Prompt from grag.components.utils import get_config from importlib_resources import files from langchain_core.documents import Document @@ -14,18 +14,22 @@ class BasicRAG: def __init__( - self, - model_name=None, - doc_chain="stuff", - task="QA", - llm_kwargs=None, - retriever_kwargs=None, - custom_prompt: Union[Prompt, FewShotPrompt, None] = None, + self, + retriever: Optional[Retriever] = None, + model_name=None, + doc_chain="stuff", + task="QA", + llm_kwargs=None, + retriever_kwargs=None, + custom_prompt: Union[Prompt, FewShotPrompt, None] = None, ): - if retriever_kwargs is None: - self.retriever = Retriever() + if retriever is None: + if retriever_kwargs is None: + self.retriever = Retriever() + else: + self.retriever = Retriever(**retriever_kwargs) else: - self.retriever = Retriever(**retriever_kwargs) + self.retriever = retriever if llm_kwargs is None: self.llm_ = LLM() diff --git a/src/tests/components/multivec_retriever_test.py b/src/tests/components/multivec_retriever_test.py index 3ccb3fb..14dad0b 100644 --- a/src/tests/components/multivec_retriever_test.py +++ b/src/tests/components/multivec_retriever_test.py @@ -1,3 +1,92 @@ +import json + +from grag.components.multivec_retriever import Retriever +from langchain_core.documents import Document + +retriever = Retriever() # pass test collection + +doc = Document(page_content="Hello worlds", metadata={"source": "bars"}) + + +def test_retriver_id_gen(): + doc = Document(page_content="Hello world", metadata={"source": "bar"}) + id_ = retriever.id_gen(doc) + assert isinstance(id, str) + assert len(id_) == 32 + doc.page_content = doc.page_content + 'ABC' + id_1 = retriever.id_gen(doc) + assert id_ == id_1 + doc.metadata["source"] = "bars" + id_1 = retriever.id_gen(doc) + assert id_ != id_1 + + +def test_retriever_gen_doc_ids(): + docs = [Document(page_content="Hello world", metadata={"source": "bar"}), + Document(page_content="Hello", metadata={"source": "foo"})] + ids = retriever.gen_doc_ids(docs) + assert len(ids) == len(docs) + assert all(isinstance(id, str) for id in ids) + + +def test_retriever_split_docs(): + pass + + +def test_retriever_split_docs(): + pass + + +def test_retriever_add_docs(): + # small enough docs to not split. + docs = [Document(page_content= + """And so on this rainbow day, with storms all around them, and blue sky + above, they rode only as far as the valley. But from there, before they + turned to go back, the monuments appeared close, and they loomed + grandly with the background of purple bank and creamy cloud and shafts + of golden lightning. They seemed like sentinels--guardians of a great + and beautiful love born under their lofty heights, in the lonely + silence of day, in the star-thrown shadow of night. They were like that + love. And they held Lucy and Slone, calling every day, giving a + nameless and tranquil content, binding them true to love, true to the + sage and the open, true to that wild upland home""", metadata={"source": "test_doc_1"}), + Document(page_content= + """Slone and Lucy never rode down so far as the stately monuments, though + these held memories as hauntingly sweet as others were poignantly + bitter. Lucy never rode the King again. But Slone rode him, learned to + love him. And Lucy did not race any more. When Slone tried to stir in + her the old spirit all the response he got was a wistful shake of head + or a laugh that hid the truth or an excuse that the strain on her + ankles from Joel Creech's lasso had never mended. The girl was + unutterably happy, but it was possible that she would never race a + horse again.""", metadata={"source": "test_doc_2"}), + Document(page_content= + """Bostil wanted to be alone, to welcome the King, to lead him back to the + home corral, perhaps to hide from all eyes the change and the uplift + that would forever keep him from wronging another man. + + The late rains came and like magic, in a few days, the sage grew green + and lustrous and fresh, the gray turning to purple. + + Every morning the sun rose white and hot in a blue and cloudless sky. + And then soon the horizon line showed creamy clouds that rose and + spread and darkened. Every afternoon storms hung along the ramparts and + rainbows curved down beautiful and ethereal. The dim blackness of the + storm-clouds was split to the blinding zigzag of lightning, and the + thunder rolled and boomed, like the Colorado in flood.""", metadata={"source": "test_doc_3"}) + ] + ids = retriever.gen_doc_ids(docs) + retriever.add_docs(docs) + retrieved = retriever.store.mget(ids) + assert len(retrieved) == len(ids) + for i, doc in enumerate(docs): + retrieved_doc = json.loads(retrieved[i].decode()) + assert doc.metadata == retrieved_doc.metadata + + +def test_retriever_aadd_docs(): + pass + # # add code folder to sys path # import os # from pathlib import Path From b90a8823d39215226b123553802efff0e9dd26d5 Mon Sep 17 00:00:00 2001 From: sanchitvj Date: Sun, 24 Mar 2024 17:52:50 -0400 Subject: [PATCH 18/20] quantizations all tests passed --- src/grag/quantize/quantize.py | 72 +++++++++++++---------- src/grag/quantize/utils.py | 89 +++++++++++++++++++++-------- src/tests/quantize/quantize_test.py | 37 ++++++++++++ 3 files changed, 146 insertions(+), 52 deletions(-) diff --git a/src/grag/quantize/quantize.py b/src/grag/quantize/quantize.py index 68168a2..64fba47 100644 --- a/src/grag/quantize/quantize.py +++ b/src/grag/quantize/quantize.py @@ -1,3 +1,7 @@ +"""Interactive file for quantizing models.""" + +from pathlib import Path + from grag.components.utils import get_config from grag.quantize.utils import ( building_llamacpp, @@ -7,32 +11,42 @@ ) config = get_config() -root_path = config['quantize']['llama_cpp_path'] - -user_input = input( - "Enter the path to the llama_cpp cloned repo, or where you'd like to clone it. Press Enter to use the default config path: ").strip() - -if user_input != "": - root_path = user_input - -res = get_llamacpp_repo(root_path) - -if "Already up to date." in str(res.stdout): - print("Repository is already up to date. Skipping build.") -else: - print("Updates found. Starting build...") - building_llamacpp(root_path) - -response = input("Do you want us to download the model? (y/n) [Enter for yes]: ").strip().lower() -if response == "n": - print("Please copy the model folder to 'llama.cpp/models/' folder.") - _ = input("Enter if you have already copied the model:") - model_dir = input("Enter the model directory name: ") -elif response == "y" or response == "": - repo_id = input('Please enter the repo_id for the model (you can check on https://huggingface.co/models): ').strip() - fetch_model_repo(repo_id, root_path) - model_dir = repo_id.split('/')[1] - -quantization = input( - "Enter quantization, recommended - Q5_K_M or Q4_K_M for more check https://github.com/ggerganov/llama.cpp/blob/master/examples/quantize/quantize.cpp#L19 : ") -quantize_model(model_dir, quantization, root_path) +root_path = Path(config["quantize"]["llama_cpp_path"]) + +if __name__ == "__main__": + user_input = input( + "Enter the path to the llama_cpp cloned repo, or where you'd like to clone it. Press Enter to use the default config path: " + ).strip() + + if user_input != "": + root_path = Path(user_input) + + res = get_llamacpp_repo(root_path) + + if "Already up to date." in str(res.stdout): + print("Repository is already up to date. Skipping build.") + else: + print("Updates found. Starting build...") + building_llamacpp(root_path) + + response = ( + input("Do you want us to download the model? (y/n) [Enter for yes]: ") + .strip() + .lower() + ) + if response == "n": + print("Please copy the model folder to 'llama.cpp/models/' folder.") + _ = input("Enter if you have already copied the model:") + model_dir = Path(input("Enter the model directory name: ")) + elif response == "y" or response == "": + repo_id = input( + "Please enter the repo_id for the model (you can check on https://huggingface.co/models): " + ).strip() + fetch_model_repo(repo_id, root_path) + # model_dir = repo_id.split('/')[1] + model_dir = root_path / "llama.cpp" / "models" / repo_id.split("/")[1] + + quantization = input( + "Enter quantization, recommended - Q5_K_M or Q4_K_M for more check https://github.com/ggerganov/llama.cpp/blob/master/examples/quantize/quantize.cpp#L19 : " + ) + quantize_model(model_dir, quantization, root_path) diff --git a/src/grag/quantize/utils.py b/src/grag/quantize/utils.py index 8d9d5bc..bc1d280 100644 --- a/src/grag/quantize/utils.py +++ b/src/grag/quantize/utils.py @@ -1,3 +1,5 @@ +"""Utility functions for quantization.""" + import os import subprocess from pathlib import Path @@ -9,7 +11,7 @@ config = get_config() -def get_llamacpp_repo(root_path: str) -> subprocess.CompletedProcess: +def get_llamacpp_repo(root_path: Union[str, Path]) -> subprocess.CompletedProcess: """Clones or pulls the llama.cpp repository into the specified root path. Args: @@ -20,15 +22,27 @@ def get_llamacpp_repo(root_path: str) -> subprocess.CompletedProcess: """ if os.path.exists(f"{root_path}/llama.cpp"): print(f"Repo exists at: {root_path}/llama.cpp") - res = subprocess.run(["git", "-C", f"{root_path}/llama.cpp", "pull"], check=True, capture_output=True) + res = subprocess.run( + ["git", "-C", f"{root_path}/llama.cpp", "pull"], + check=True, + capture_output=True, + ) else: - res = subprocess.run(["git", "clone", "https://github.com/ggerganov/llama.cpp.git", f"{root_path}/llama.cpp"], - check=True, capture_output=True) + res = subprocess.run( + [ + "git", + "clone", + "https://github.com/ggerganov/llama.cpp.git", + f"{root_path}/llama.cpp", + ], + check=True, + capture_output=True, + ) return res -def building_llamacpp(root_path: str) -> None: +def building_llamacpp(root_path: Union[str, Path]) -> None: """Attempts to build the llama.cpp project using make or cmake. Args: @@ -36,24 +50,41 @@ def building_llamacpp(root_path: str) -> None: """ os.chdir(f"{root_path}/llama.cpp/") try: - subprocess.run(['which', 'make'], check=True, stdout=subprocess.DEVNULL) - subprocess.run(['make', 'LLAMA_CUBLAS=1'], check=True) - print('Llama.cpp build successful.') + subprocess.run(["which", "make"], check=True, stdout=subprocess.DEVNULL) + subprocess.run(["make", "LLAMA_CUBLAS=1"], check=True) + print("Llama.cpp build successful.") except subprocess.CalledProcessError: try: - subprocess.run(['which', 'cmake'], check=True, stdout=subprocess.DEVNULL) - subprocess.run(['mkdir', 'build'], check=True) + subprocess.run(["which", "cmake"], check=True, stdout=subprocess.DEVNULL) + subprocess.run(["mkdir", "build"], check=True) subprocess.run( - ['cd', 'build', '&&', 'cmake', '..', '-DLLAMA_CUBLAS=ON', '&&', 'cmake', '--build', '.', '--config', - 'Release'], shell=True, check=True) - print('Llama.cpp build successful.') + [ + "cd", + "build", + "&&", + "cmake", + "..", + "-DLLAMA_CUBLAS=ON", + "&&", + "cmake", + "--build", + ".", + "--config", + "Release", + ], + shell=True, + check=True, + ) + print("Llama.cpp build successful.") except subprocess.CalledProcessError: print("Unable to build, cannot find make or cmake.") finally: - os.chdir(Path(__file__).parent) # Assuming you want to return to the root path after operation + os.chdir( + Path(__file__).parent + ) # Assuming you want to return to the root path after operation -def fetch_model_repo(repo_id: str, root_path: str) -> None: +def fetch_model_repo(repo_id: str, root_path: Union[str, Path]) -> None: """Download model from huggingface.co/models. Args: @@ -62,24 +93,33 @@ def fetch_model_repo(repo_id: str, root_path: str) -> None: """ local_dir = f"{root_path}/llama.cpp/models/{repo_id.split('/')[1]}" os.makedirs(local_dir, exist_ok=True) - snapshot_download(repo_id=repo_id, local_dir=local_dir, local_dir_use_symlinks='auto', resume_download=True) + snapshot_download( + repo_id=repo_id, + local_dir=local_dir, + local_dir_use_symlinks="auto", + resume_download=True, + ) print(f"Model downloaded in {local_dir}") -def quantize_model(model_dir_path: str, quantization: str, root_path: str, - output_dir: Optional[Union[str, Path]] = None) -> None: +def quantize_model( + model_dir_path: Union[str, Path], + quantization: str, + root_path: Union[str, Path], + output_dir: Optional[Union[str, Path]] = None, +) -> None: """Quantizes a specified model using a given quantization level. Args: - output_dir (str, optional): Directory to save quantized model. Defaults to None - model_dir_path (str): The directory path of the model to be quantized. + output_dir (str, Path, optional): Directory to save quantized model. Defaults to None + model_dir_path (str, Path): The directory path of the model to be quantized. quantization (str): The quantization level to apply. - root_path (str): The root directory path of the project. + root_path (str, Path): The root directory path of the project. """ os.chdir(f"{root_path}/llama.cpp/") model_dir_path = Path(model_dir_path) if output_dir is None: - output_dir = config['llm']['base_dir'] + output_dir = config["llm"]["base_dir"] output_dir = Path(output_dir) / model_dir_path.name os.makedirs(output_dir, exist_ok=True) @@ -87,6 +127,9 @@ def quantize_model(model_dir_path: str, quantization: str, root_path: str, subprocess.run(["python3", "convert.py", f"{model_dir_path}/"], check=True) model_file = model_dir_path / "ggml-model-f32.gguf" quantized_model_file = output_dir / f"ggml-model-{quantization}.gguf" - subprocess.run(["./quantize", str(model_file), str(quantized_model_file), quantization], check=True) + subprocess.run( + ["./quantize", str(model_file), str(quantized_model_file), quantization], + check=True, + ) print(f"Quantized model present at {output_dir}") os.chdir(Path(__file__).parent) # Return to the root path after operation diff --git a/src/tests/quantize/quantize_test.py b/src/tests/quantize/quantize_test.py index e69de29..f7b3c51 100644 --- a/src/tests/quantize/quantize_test.py +++ b/src/tests/quantize/quantize_test.py @@ -0,0 +1,37 @@ +import os +from pathlib import Path + +from grag.quantize.utils import ( + building_llamacpp, + fetch_model_repo, + get_llamacpp_repo, + quantize_model, +) + +root_path = Path(__file__).parent / 'test_data' +os.makedirs(root_path, exist_ok=True) + + +def test_get_llamacpp_repo(): + get_llamacpp_repo(root_path) + repo_path = root_path / 'llama.cpp' / '.git' + assert os.path.exists(repo_path) + + +def test_build_llamacpp(): + building_llamacpp(root_path) + bin_path = root_path / 'llama.cpp' / 'quantize' + assert os.path.exists(bin_path) + + +def test_fetch_model_repo(): + fetch_model_repo('meta-llama/Llama-2-7b-chat', root_path) + model_dir_path = root_path / 'llama.cpp' / 'models' / 'Llama-2-7b-chat' + assert os.path.exists(model_dir_path) + + +def test_quantize_model(): + model_dir_path = root_path / 'llama.cpp' / 'models' / 'Llama-2-7b-chat' + quantize_model(model_dir_path, 'Q3_K_M', root_path, output_dir=model_dir_path.parent) + gguf_file_path = model_dir_path / "ggml-model-Q3_K_M.gguf" + assert os.path.exists(gguf_file_path) From 14ca30db4b806996307c0e1de482e482c06b2826 Mon Sep 17 00:00:00 2001 From: sanchitvj Date: Sun, 24 Mar 2024 21:57:48 +0000 Subject: [PATCH 19/20] style fixes by ruff --- src/tests/quantize/quantize_test.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/src/tests/quantize/quantize_test.py b/src/tests/quantize/quantize_test.py index f7b3c51..af0e9dd 100644 --- a/src/tests/quantize/quantize_test.py +++ b/src/tests/quantize/quantize_test.py @@ -8,30 +8,32 @@ quantize_model, ) -root_path = Path(__file__).parent / 'test_data' +root_path = Path(__file__).parent / "test_data" os.makedirs(root_path, exist_ok=True) def test_get_llamacpp_repo(): get_llamacpp_repo(root_path) - repo_path = root_path / 'llama.cpp' / '.git' + repo_path = root_path / "llama.cpp" / ".git" assert os.path.exists(repo_path) def test_build_llamacpp(): building_llamacpp(root_path) - bin_path = root_path / 'llama.cpp' / 'quantize' + bin_path = root_path / "llama.cpp" / "quantize" assert os.path.exists(bin_path) def test_fetch_model_repo(): - fetch_model_repo('meta-llama/Llama-2-7b-chat', root_path) - model_dir_path = root_path / 'llama.cpp' / 'models' / 'Llama-2-7b-chat' + fetch_model_repo("meta-llama/Llama-2-7b-chat", root_path) + model_dir_path = root_path / "llama.cpp" / "models" / "Llama-2-7b-chat" assert os.path.exists(model_dir_path) def test_quantize_model(): - model_dir_path = root_path / 'llama.cpp' / 'models' / 'Llama-2-7b-chat' - quantize_model(model_dir_path, 'Q3_K_M', root_path, output_dir=model_dir_path.parent) + model_dir_path = root_path / "llama.cpp" / "models" / "Llama-2-7b-chat" + quantize_model( + model_dir_path, "Q3_K_M", root_path, output_dir=model_dir_path.parent + ) gguf_file_path = model_dir_path / "ggml-model-Q3_K_M.gguf" assert os.path.exists(gguf_file_path) From 454bb5d4639ea212757307020439df87f0638196 Mon Sep 17 00:00:00 2001 From: arjbingly Date: Sun, 24 Mar 2024 21:57:50 +0000 Subject: [PATCH 20/20] style fixes by ruff --- src/grag/components/multivec_retriever.py | 14 +++++----- src/grag/components/vectordb/base.py | 4 +-- src/grag/components/vectordb/chroma_client.py | 27 ++++++++++--------- .../components/vectordb/deeplake_client.py | 24 ++++++++--------- src/grag/rag/basic_rag.py | 16 +++++------ 5 files changed, 43 insertions(+), 42 deletions(-) diff --git a/src/grag/components/multivec_retriever.py b/src/grag/components/multivec_retriever.py index 9062e69..9fd8664 100644 --- a/src/grag/components/multivec_retriever.py +++ b/src/grag/components/multivec_retriever.py @@ -39,13 +39,13 @@ class Retriever: """ def __init__( - self, - vectordb: Optional[VectorDB] = None, - store_path: str = multivec_retriever_conf["store_path"], - id_key: str = multivec_retriever_conf["id_key"], - namespace: str = multivec_retriever_conf["namespace"], - top_k=1, - client_kwargs: Optional[Dict[str, Any]] = None + self, + vectordb: Optional[VectorDB] = None, + store_path: str = multivec_retriever_conf["store_path"], + id_key: str = multivec_retriever_conf["id_key"], + namespace: str = multivec_retriever_conf["namespace"], + top_k=1, + client_kwargs: Optional[Dict[str, Any]] = None, ): """Initialize the Retriever. diff --git a/src/grag/components/vectordb/base.py b/src/grag/components/vectordb/base.py index 1146d77..b0b0623 100644 --- a/src/grag/components/vectordb/base.py +++ b/src/grag/components/vectordb/base.py @@ -51,7 +51,7 @@ async def aadd_docs(self, docs: List[Document], verbose: bool = True) -> None: @abstractmethod def get_chunk( - self, query: str, with_score: bool = False, top_k: int = None + self, query: str, with_score: bool = False, top_k: int = None ) -> Union[List[Document], List[Tuple[Document, float]]]: """Returns the most similar chunks from the vector database. @@ -67,7 +67,7 @@ def get_chunk( @abstractmethod async def aget_chunk( - self, query: str, with_score: bool = False, top_k: int = None + self, query: str, with_score: bool = False, top_k: int = None ) -> Union[List[Document], List[Tuple[Document, float]]]: """Returns the most similar chunks from the vector database (asynchronous). diff --git a/src/grag/components/vectordb/chroma_client.py b/src/grag/components/vectordb/chroma_client.py index 105882c..cac8ab3 100644 --- a/src/grag/components/vectordb/chroma_client.py +++ b/src/grag/components/vectordb/chroma_client.py @@ -3,6 +3,7 @@ This module provides: - ChromaClient """ + from typing import List, Tuple, Union import chromadb @@ -42,15 +43,15 @@ class ChromaClient(VectorDB): """ def __init__( - self, - host=chroma_conf["host"], - port=chroma_conf["port"], - collection_name=chroma_conf["collection_name"], - embedding_type=chroma_conf["embedding_type"], - embedding_model=chroma_conf["embedding_model"], + self, + host=chroma_conf["host"], + port=chroma_conf["port"], + collection_name=chroma_conf["collection_name"], + embedding_type=chroma_conf["embedding_type"], + embedding_model=chroma_conf["embedding_model"], ): """Initialize a ChromaClient object. - + Args: host: IP Address of hosted Chroma Vectorstore, defaults to argument from config file port: port address of hosted Chroma Vectorstore, defaults to argument from config file @@ -124,7 +125,7 @@ def add_docs(self, docs: List[Document], verbose=True) -> None: """ docs = self._filter_metadata(docs) for doc in ( - tqdm(docs, desc=f"Adding to {self.collection_name}:") if verbose else docs + tqdm(docs, desc=f"Adding to {self.collection_name}:") if verbose else docs ): _id = self.langchain_client.add_documents([doc]) @@ -141,9 +142,9 @@ async def aadd_docs(self, docs: List[Document], verbose=True) -> None: docs = self._filter_metadata(docs) if verbose: for doc in atqdm( - docs, - desc=f"Adding documents to {self.collection_name}", - total=len(docs), + docs, + desc=f"Adding documents to {self.collection_name}", + total=len(docs), ): await self.langchain_client.aadd_documents([doc]) else: @@ -151,7 +152,7 @@ async def aadd_docs(self, docs: List[Document], verbose=True) -> None: await self.langchain_client.aadd_documents([doc]) def get_chunk( - self, query: str, with_score: bool = False, top_k: int = None + self, query: str, with_score: bool = False, top_k: int = None ) -> Union[List[Document], List[Tuple[Document, float]]]: """Returns the most similar chunks from the chroma database. @@ -174,7 +175,7 @@ def get_chunk( ) async def aget_chunk( - self, query: str, with_score=False, top_k=None + self, query: str, with_score=False, top_k=None ) -> Union[List[Document], List[Tuple[Document, float]]]: """Returns the most (cosine) similar chunks from the vector database, asynchronously. diff --git a/src/grag/components/vectordb/deeplake_client.py b/src/grag/components/vectordb/deeplake_client.py index 2cb0270..f0d5ba5 100644 --- a/src/grag/components/vectordb/deeplake_client.py +++ b/src/grag/components/vectordb/deeplake_client.py @@ -39,12 +39,12 @@ class DeepLakeClient(VectorDB): """ def __init__( - self, - collection_name: str = deeplake_conf["collection_name"], - store_path: Union[str, Path] = deeplake_conf["store_path"], - embedding_type: str = deeplake_conf["embedding_type"], - embedding_model: str = deeplake_conf["embedding_model"], - read_only: bool = False, + self, + collection_name: str = deeplake_conf["collection_name"], + store_path: Union[str, Path] = deeplake_conf["store_path"], + embedding_type: str = deeplake_conf["embedding_type"], + embedding_model: str = deeplake_conf["embedding_model"], + read_only: bool = False, ): """Initialize DeepLake client object.""" self.store_path = Path(store_path) @@ -86,7 +86,7 @@ def add_docs(self, docs: List[Document], verbose=True) -> None: """ docs = self._filter_metadata(docs) for doc in ( - tqdm(docs, desc=f"Adding to {self.collection_name}:") if verbose else docs + tqdm(docs, desc=f"Adding to {self.collection_name}:") if verbose else docs ): _id = self.langchain_client.add_documents([doc]) @@ -103,9 +103,9 @@ async def aadd_docs(self, docs: List[Document], verbose=True) -> None: docs = self._filter_metadata(docs) if verbose: for doc in atqdm( - docs, - desc=f"Adding documents to {self.collection_name}", - total=len(docs), + docs, + desc=f"Adding documents to {self.collection_name}", + total=len(docs), ): await self.langchain_client.aadd_documents([doc]) else: @@ -113,7 +113,7 @@ async def aadd_docs(self, docs: List[Document], verbose=True) -> None: await self.langchain_client.aadd_documents([doc]) def get_chunk( - self, query: str, with_score: bool = False, top_k: int = None + self, query: str, with_score: bool = False, top_k: int = None ) -> Union[List[Document], List[Tuple[Document, float]]]: """Returns the most similar chunks from the deeplake database. @@ -136,7 +136,7 @@ def get_chunk( ) async def aget_chunk( - self, query: str, with_score=False, top_k=None + self, query: str, with_score=False, top_k=None ) -> Union[List[Document], List[Tuple[Document, float]]]: """Returns the most similar chunks from the deeplake database, asynchronously. diff --git a/src/grag/rag/basic_rag.py b/src/grag/rag/basic_rag.py index cdad471..da461b6 100644 --- a/src/grag/rag/basic_rag.py +++ b/src/grag/rag/basic_rag.py @@ -31,14 +31,14 @@ class BasicRAG: """ def __init__( - self, - retriever: Optional[Retriever] = None, - model_name=None, - doc_chain="stuff", - task="QA", - llm_kwargs=None, - retriever_kwargs=None, - custom_prompt: Union[Prompt, FewShotPrompt, None] = None, + self, + retriever: Optional[Retriever] = None, + model_name=None, + doc_chain="stuff", + task="QA", + llm_kwargs=None, + retriever_kwargs=None, + custom_prompt: Union[Prompt, FewShotPrompt, None] = None, ): if retriever is None: if retriever_kwargs is None: