Merge pull request #39 from arjbingly/sanchitvj-patch-2

Update ruff_commit.yml
arjbingly · Mar 20, 2024 · c07329c · c07329c
2 parents e91ea84 + b378f56
commit c07329c
Show file tree

Hide file tree

Showing 17 changed files with 357 additions and 292 deletions.
diff --git a/.github/workflows/ruff_commit.yml b/.github/workflows/ruff_commit.yml
@@ -3,14 +3,13 @@ on: push
 
 jobs:
   lint:
-    runs-on: ubuntu_latest
+    runs-on: self-hosted
     steps:
     - uses: actions/checkout@v2
     - uses: actions/setup-python@v2
     - run: pip install ruff
-    - run: |
-      ruff check src/
-      ruff fix src/
+    # - run: ruff check src/
+    - run: ruff format src/
     - uses: stefanzweifel/git-auto-commit-action@v4
       with:
         commit_message: 'style fixes by ruff'
diff --git a/src/grag/components/chroma_client.py b/src/grag/components/chroma_client.py
@@ -10,7 +10,7 @@
 from grag.components.embedding import Embedding
 from grag.components.utils import get_config
 
-chroma_conf = get_config()['chroma']
+chroma_conf = get_config()["chroma"]
 
 
 class ChromaClient:
@@ -37,12 +37,14 @@ class ChromaClient:
             LangChain wrapper for Chroma collection
     """
 
-    def __init__(self,
-                 host=chroma_conf['host'],
-                 port=chroma_conf['port'],
-                 collection_name=chroma_conf['collection_name'],
-                 embedding_type=chroma_conf['embedding_type'],
-                 embedding_model=chroma_conf['embedding_model']):
+    def __init__(
+        self,
+        host=chroma_conf["host"],
+        port=chroma_conf["port"],
+        collection_name=chroma_conf["collection_name"],
+        embedding_type=chroma_conf["embedding_type"],
+        embedding_model=chroma_conf["embedding_model"],
+    ):
         """Args:
         host: IP Address of hosted Chroma Vectorstore, defaults to argument from config file
         port: port address of hosted Chroma Vectorstore, defaults to argument from config file
@@ -56,14 +58,19 @@ def __init__(self,
         self.embedding_type: str = embedding_type
         self.embedding_model: str = embedding_model
 
-        self.embedding_function = Embedding(embedding_model=self.embedding_model,
-                                            embedding_type=self.embedding_type).embedding_function
+        self.embedding_function = Embedding(
+            embedding_model=self.embedding_model, embedding_type=self.embedding_type
+        ).embedding_function
 
         self.chroma_client = chromadb.HttpClient(host=self.host, port=self.port)
-        self.collection = self.chroma_client.get_or_create_collection(name=self.collection_name)
-        self.langchain_chroma = Chroma(client=self.chroma_client,
-                                       collection_name=self.collection_name,
-                                       embedding_function=self.embedding_function, )
+        self.collection = self.chroma_client.get_or_create_collection(
+            name=self.collection_name
+        )
+        self.langchain_chroma = Chroma(
+            client=self.chroma_client,
+            collection_name=self.collection_name,
+            embedding_function=self.embedding_function,
+        )
         self.allowed_metadata_types = (str, int, float, bool)
 
     def test_connection(self, verbose=True):
@@ -78,9 +85,9 @@ def test_connection(self, verbose=True):
         response = self.chroma_client.heartbeat()
         if verbose:
             if response:
-                print(f'Connection to {self.host}/{self.port} is alive..')
+                print(f"Connection to {self.host}/{self.port} is alive..")
             else:
-                print(f'Connection to {self.host}/{self.port} is not alive !!')
+                print(f"Connection to {self.host}/{self.port} is not alive !!")
         return response
 
     async def aadd_docs(self, docs: List[Document], verbose=True):
@@ -100,7 +107,11 @@ async def aadd_docs(self, docs: List[Document], verbose=True):
         # else:
         #     await asyncio.gather(*tasks)
         if verbose:
-            for doc in atqdm(docs, desc=f'Adding documents to {self.collection_name}', total=len(docs)):
+            for doc in atqdm(
+                docs,
+                desc=f"Adding documents to {self.collection_name}",
+                total=len(docs),
+            ):
                 await self.langchain_chroma.aadd_documents([doc])
         else:
             for doc in docs:
@@ -117,7 +128,9 @@ def add_docs(self, docs: List[Document], verbose=True):
             None
         """
         docs = self._filter_metadata(docs)
-        for doc in (tqdm(docs, desc=f'Adding to {self.collection_name}:') if verbose else docs):
+        for doc in (
+            tqdm(docs, desc=f"Adding to {self.collection_name}:") if verbose else docs
+        ):
             _id = self.langchain_chroma.add_documents([doc])
 
     def _filter_metadata(self, docs: List[Document]):

diff --git a/src/grag/components/embedding.py b/src/grag/components/embedding.py
@@ -20,11 +20,15 @@ def __init__(self, embedding_type: str, embedding_model: str):
         self.embedding_type = embedding_type
         self.embedding_model = embedding_model
         match self.embedding_type:
-            case 'sentence-transformers':
-                self.embedding_function = SentenceTransformerEmbeddings(model_name=self.embedding_model)
-            case 'instructor-embedding':
-                self.embedding_instruction = 'Represent the document for retrival'
-                self.embedding_function = HuggingFaceInstructEmbeddings(model_name=self.embedding_model)
+            case "sentence-transformers":
+                self.embedding_function = SentenceTransformerEmbeddings(
+                    model_name=self.embedding_model
+                )
+            case "instructor-embedding":
+                self.embedding_instruction = "Represent the document for retrival"
+                self.embedding_function = HuggingFaceInstructEmbeddings(
+                    model_name=self.embedding_model
+                )
                 self.embedding_function.embed_instruction = self.embedding_instruction
             case _:
-                raise Exception('embedding_type is invalid')
+                raise Exception("embedding_type is invalid")
diff --git a/src/grag/components/llm.py b/src/grag/components/llm.py
@@ -16,7 +16,7 @@
 
 from .utils import get_config
 
-llm_conf = get_config()['llm']
+llm_conf = get_config()["llm"]
 
 print("CUDA: ", torch.cuda.is_available())
 
@@ -35,20 +35,21 @@ class LLM:
         n_gpu_layers (int): Number of GPU layers for CPP.
     """
 
-    def __init__(self,
-                 model_name=llm_conf["model_name"],
-                 device_map=llm_conf["device_map"],
-                 task=llm_conf["task"],
-                 max_new_tokens=llm_conf["max_new_tokens"],
-                 temperature=llm_conf["temperature"],
-                 n_batch=llm_conf["n_batch_gpu_cpp"],
-                 n_ctx=llm_conf["n_ctx_cpp"],
-                 n_gpu_layers=llm_conf["n_gpu_layers_cpp"],
-                 std_out=llm_conf["std_out"],
-                 base_dir=llm_conf["base_dir"],
-                 quantization=llm_conf["quantization"],
-                 pipeline=llm_conf["pipeline"],
-                 ):
+    def __init__(
+        self,
+        model_name=llm_conf["model_name"],
+        device_map=llm_conf["device_map"],
+        task=llm_conf["task"],
+        max_new_tokens=llm_conf["max_new_tokens"],
+        temperature=llm_conf["temperature"],
+        n_batch=llm_conf["n_batch_gpu_cpp"],
+        n_ctx=llm_conf["n_ctx_cpp"],
+        n_gpu_layers=llm_conf["n_gpu_layers_cpp"],
+        std_out=llm_conf["std_out"],
+        base_dir=llm_conf["base_dir"],
+        quantization=llm_conf["quantization"],
+        pipeline=llm_conf["pipeline"],
+    ):
         self.base_dir = Path(base_dir)
         self._model_name = model_name
         self.quantization = quantization
@@ -74,7 +75,8 @@ def model_name(self):
     def model_path(self):
         """Sets the model name."""
         return str(
-            self.base_dir / self.model_name / f'ggml-model-{self.quantization}.gguf')
+            self.base_dir / self.model_name / f"ggml-model-{self.quantization}.gguf"
+        )
 
     @model_name.setter
     def model_name(self, value):
@@ -92,46 +94,54 @@ def hf_pipeline(self, is_local=False):
         else:
             hf_model = self.model_name
             match self.quantization:
-                case 'Q8':
+                case "Q8":
                     quantization_config = BitsAndBytesConfig(load_in_8bit=True)
-                case 'Q4':
+                case "Q4":
                     quantization_config = BitsAndBytesConfig(load_in_4bit=True)
                 case _:
                     raise ValueError(
-                        f'{self.quantization} is not a valid quantization. Non-local hf_pipeline takes only Q4 and Q8.')
+                        f"{self.quantization} is not a valid quantization. Non-local hf_pipeline takes only Q4 and Q8."
+                    )
 
         try:
             # Try to load the model without passing the token
             tokenizer = AutoTokenizer.from_pretrained(hf_model)
-            model = AutoModelForCausalLM.from_pretrained(hf_model,
-                                                         quantization_config=quantization_config,
-                                                         device_map=self.device_map,
-                                                         torch_dtype=torch.float16, )
+            model = AutoModelForCausalLM.from_pretrained(
+                hf_model,
+                quantization_config=quantization_config,
+                device_map=self.device_map,
+                torch_dtype=torch.float16,
+            )
         except OSError:  # LocalTokenNotFoundError:
             # If loading fails due to an auth token error, then load the token and retry
             load_dotenv()
             auth_token = os.getenv("AUTH_TOKEN")
             if not auth_token:
                 raise ValueError("Authentication token not provided.")
             tokenizer = AutoTokenizer.from_pretrained(hf_model, token=True)
-            model = AutoModelForCausalLM.from_pretrained(hf_model,
-                                                         quantization_config=quantization_config,
-                                                         device_map=self.device_map,
-                                                         torch_dtype=torch.float16,
-                                                         token=True)
-
-        pipe = pipeline(self.task,
-                        model=model,
-                        tokenizer=tokenizer,
-                        torch_dtype=torch.bfloat16,
-                        device_map=self.device_map,
-                        max_new_tokens=self.max_new_tokens,
-                        do_sample=True,
-                        top_k=10,
-                        num_return_sequences=1,
-                        eos_token_id=tokenizer.eos_token_id
-                        )
-        llm = HuggingFacePipeline(pipeline=pipe, model_kwargs={'temperature': self.temperature})
+            model = AutoModelForCausalLM.from_pretrained(
+                hf_model,
+                quantization_config=quantization_config,
+                device_map=self.device_map,
+                torch_dtype=torch.float16,
+                token=True,
+            )
+
+        pipe = pipeline(
+            self.task,
+            model=model,
+            tokenizer=tokenizer,
+            torch_dtype=torch.bfloat16,
+            device_map=self.device_map,
+            max_new_tokens=self.max_new_tokens,
+            do_sample=True,
+            top_k=10,
+            num_return_sequences=1,
+            eos_token_id=tokenizer.eos_token_id,
+        )
+        llm = HuggingFacePipeline(
+            pipeline=pipe, model_kwargs={"temperature": self.temperature}
+        )
         return llm
 
     def llama_cpp(self):
@@ -149,11 +159,9 @@ def llama_cpp(self):
         )
         return llm
 
-    def load_model(self,
-                   model_name=None,
-                   pipeline=None,
-                   quantization=None,
-                   is_local=None):
+    def load_model(
+        self, model_name=None, pipeline=None, quantization=None, is_local=None
+    ):
         """Loads the model based on the specified pipeline and model name.
 
         Args:
@@ -172,7 +180,7 @@ def load_model(self,
             is_local = False
 
         match self.pipeline:
-            case 'llama_cpp':
+            case "llama_cpp":
                 return self.llama_cpp()
-            case 'hf':
+            case "hf":
                 return self.hf_pipeline(is_local=is_local)
diff --git a/src/grag/components/multivec_retriever.py b/src/grag/components/multivec_retriever.py
@@ -10,7 +10,7 @@
 from grag.components.text_splitter import TextSplitter
 from grag.components.utils import get_config
 
-multivec_retriever_conf = get_config()['multivec_retriever']
+multivec_retriever_conf = get_config()["multivec_retriever"]
 
 
 class Retriever:
@@ -30,11 +30,13 @@ class Retriever:
 
     """
 
-    def __init__(self,
-                 store_path: str = multivec_retriever_conf['store_path'],
-                 id_key: str = multivec_retriever_conf['id_key'],
-                 namespace: str = multivec_retriever_conf['namespace'],
-                 top_k=1):
+    def __init__(
+        self,
+        store_path: str = multivec_retriever_conf["store_path"],
+        id_key: str = multivec_retriever_conf["id_key"],
+        namespace: str = multivec_retriever_conf["namespace"],
+        top_k=1,
+    ):
         """Args:
         store_path: Path to the local file store, defaults to argument from config file
         id_key: A key prefix for identifying documents, defaults to argument from config file
@@ -53,7 +55,7 @@ def __init__(self,
         )
         self.splitter = TextSplitter()
         self.top_k: int = top_k
-        self.retriever.search_kwargs = {'k': self.top_k}
+        self.retriever.search_kwargs = {"k": self.top_k}
 
     def id_gen(self, doc: Document) -> str:
         """Takes a document and returns a unique id (uuid5) using the namespace and document source.
@@ -65,7 +67,7 @@ def id_gen(self, doc: Document) -> str:
         Returns:
             string of hexadecimal uuid
         """
-        return uuid.uuid5(self.namespace, doc.metadata['source']).hex
+        return uuid.uuid5(self.namespace, doc.metadata["source"]).hex
 
     def gen_doc_ids(self, docs: List[Document]) -> List[str]:
         """Takes a list of documents and produces a list of unique id, refer id_gen method for more details.
@@ -144,15 +146,12 @@ def get_chunk(self, query: str, with_score=False, top_k=None):
 
         """
         if with_score:
-
             return self.client.langchain_chroma.similarity_search_with_relevance_scores(
-                query=query,
-                **{'k': top_k} if top_k else self.retriever.search_kwargs
+                query=query, **{"k": top_k} if top_k else self.retriever.search_kwargs
             )
         else:
             return self.client.langchain_chroma.similarity_search(
-                query=query,
-                **{'k': top_k} if top_k else self.retriever.search_kwargs
+                query=query, **{"k": top_k} if top_k else self.retriever.search_kwargs
             )
 
     async def aget_chunk(self, query: str, with_score=False, top_k=None):
@@ -169,13 +168,11 @@ async def aget_chunk(self, query: str, with_score=False, top_k=None):
         """
         if with_score:
             return await self.client.langchain_chroma.asimilarity_search_with_relevance_scores(
-                query=query,
-                **{'k': top_k} if top_k else self.retriever.search_kwargs
+                query=query, **{"k": top_k} if top_k else self.retriever.search_kwargs
             )
         else:
             return await self.client.langchain_chroma.asimilarity_search(
-                query=query,
-                **{'k': top_k} if top_k else self.retriever.search_kwargs
+                query=query, **{"k": top_k} if top_k else self.retriever.search_kwargs
             )
 
     def get_doc(self, query: str):