From 8a85720c6c443949af014543ed30be6d763e5ca3 Mon Sep 17 00:00:00 2001 From: Arjun Bingly Date: Sun, 28 Apr 2024 12:53:22 -0400 Subject: [PATCH 1/5] Add defaults --- config_Def.ini | 68 +++++++++++++++++++ src/grag/components/llm.py | 18 ++--- src/grag/components/multivec_retriever.py | 38 +++++------ src/grag/components/parse_pdf.py | 18 ++--- src/grag/components/text_splitter.py | 4 +- src/grag/components/vectordb/chroma_client.py | 10 +-- .../components/vectordb/deeplake_client.py | 8 +-- 7 files changed, 117 insertions(+), 47 deletions(-) create mode 100644 config_Def.ini diff --git a/config_Def.ini b/config_Def.ini new file mode 100644 index 0000000..d39e345 --- /dev/null +++ b/config_Def.ini @@ -0,0 +1,68 @@ +[llm] +model_name : Llama-2-13b-chat +# meta-llama/Llama-2-70b-chat-hf Mixtral-8x7B-Instruct-v0.1 +quantization : Q5_K_M +pipeline : llama_cpp +device_map : auto +task : text-generation +max_new_tokens : 1024 +temperature : 0.1 +n_batch : 1024 +n_ctx : 6000 +n_gpu_layers : -1 +# The number of layers to put on the GPU. Mixtral-18, gemma-20 +std_out : True +base_dir : ${root:root_path}/models + +[chroma_client] +host : localhost +port : 8000 +collection_name : arxiv +# embedding_type : sentence-transformers +# embedding_model : "all-mpnet-base-v2" +embedding_type : instructor-embedding +embedding_model : hkunlp/instructor-xl +;store_path : ${data:data_path}/vectordb +;allow_reset : True + +[deeplake_client] +collection_name : arxiv +# embedding_type : sentence-transformers +# embedding_model : "all-mpnet-base-v2" +embedding_type : instructor-embedding +embedding_model : hkunlp/instructor-xl +store_path : ${data:data_path}/vectordb + +[text_splitter] +chunk_size : 5000 +chunk_overlap : 400 + +[multivec_retriever] +# store_path: data/docs +store_path : ${data:data_path}/doc_store +# namespace: UUID(8c9040b0-b5cd-4d7c-bc2e-737da1b24ebf) +namespace : 8c9040b0b5cd4d7cbc2e737da1b24ebf +id_key : doc_id +top_k : 3 + +[parse_pdf] +single_text_out : True +strategy : hi_res +infer_table_structure : True +extract_images : True +image_output_dir : None +add_captions_to_text : True +add_captions_to_blocks : True +table_as_html : True + +[data] +data_path : ${root:root_path}/data + +[env] +env_path : ${root:root_path}/.env + +[root] +root_path : /home/ubuntu/volume_2k/Capstone_5 + +[quantize] +llama_cpp_path : ${root:root_path} diff --git a/src/grag/components/llm.py b/src/grag/components/llm.py index 1786941..2799b8b 100644 --- a/src/grag/components/llm.py +++ b/src/grag/components/llm.py @@ -36,17 +36,17 @@ class LLM: def __init__( self, model_name: str, - device_map: str, - task: str, - max_new_tokens: str, - temperature: str, - n_batch: str, - n_ctx: str, - n_gpu_layers: str, - std_out: Union[bool, str], - base_dir: str, quantization: str, pipeline: str, + device_map: str = 'auto', + task: str = 'text-generation', + max_new_tokens: str = '1024', + temperature: Union[str, int] = 0.1, + n_batch: Union[str, int] = 1024, + n_ctx: Union[str, int] = 6000, + n_gpu_layers: Union[str, int] = -1, + std_out: Union[bool, str] = True, + base_dir: Union[str, Path] = Path('models'), callbacks=None, ): """Initialize the LLM class using the given parameters.""" diff --git a/src/grag/components/multivec_retriever.py b/src/grag/components/multivec_retriever.py index 3f85fe9..6ba7b1e 100644 --- a/src/grag/components/multivec_retriever.py +++ b/src/grag/components/multivec_retriever.py @@ -43,13 +43,13 @@ class Retriever: """ def __init__( - self, - store_path: Union[str, Path], - top_k: str, - id_key: str, - vectordb: Optional[VectorDB] = None, - namespace: Optional[str] = None, - client_kwargs: Optional[Dict[str, Any]] = None, + self, + vectordb: Optional[VectorDB] = None, + store_path: Union[str, Path] = Path('data/doc_store'), + top_k: Union[str, int] = 3, + id_key: str = 'doc_id', + namespace: str = '71e4b558187b270922923569301f1039', + client_kwargs: Optional[Dict[str, Any]] = None, ): """Initialize the Retriever. @@ -240,12 +240,12 @@ def get_docs_from_chunks(self, chunks: List[Document], one_to_one=False): return [d for d in docs if d is not None] def ingest( - self, - dir_path: Union[str, Path], - glob_pattern: str = "**/*.pdf", - dry_run: bool = False, - verbose: bool = True, - parser_kwargs: Optional[Dict[str, Any]] = None, + self, + dir_path: Union[str, Path], + glob_pattern: str = "**/*.pdf", + dry_run: bool = False, + verbose: bool = True, + parser_kwargs: Optional[Dict[str, Any]] = None, ): """Ingests the files in directory. @@ -282,12 +282,12 @@ def ingest( print(f"DRY RUN: found - {filepath.relative_to(dir_path)}") async def aingest( - self, - dir_path: Union[str, Path], - glob_pattern: str = "**/*.pdf", - dry_run: bool = False, - verbose: bool = True, - parser_kwargs: Optional[Dict[str, Any]] = None, + self, + dir_path: Union[str, Path], + glob_pattern: str = "**/*.pdf", + dry_run: bool = False, + verbose: bool = True, + parser_kwargs: Optional[Dict[str, Any]] = None, ): """Asynchronously ingests the files in directory. diff --git a/src/grag/components/parse_pdf.py b/src/grag/components/parse_pdf.py index 3636098..13fe28b 100644 --- a/src/grag/components/parse_pdf.py +++ b/src/grag/components/parse_pdf.py @@ -5,6 +5,8 @@ - ParsePDF """ +from typing import Optional + from grag.components.utils import configure_args from langchain_core.documents import Document from unstructured.partition.pdf import partition_pdf @@ -28,14 +30,14 @@ class ParsePDF: def __init__( self, - single_text_out, - strategy, - infer_table_structure, - extract_images, - image_output_dir, - add_captions_to_text, - add_captions_to_blocks, - table_as_html, + single_text_out: bool = True, + strategy: str = "hi_res", + infer_table_structure: bool = True, + extract_images: bool = True, + image_output_dir: Optional[str] = None, + add_captions_to_text: bool = True, + add_captions_to_blocks: bool = True, + table_as_html: bool = False, ): """Initialize instance variables with parameters.""" self.strategy = strategy diff --git a/src/grag/components/text_splitter.py b/src/grag/components/text_splitter.py index 025ec72..94a059e 100644 --- a/src/grag/components/text_splitter.py +++ b/src/grag/components/text_splitter.py @@ -22,8 +22,8 @@ class TextSplitter: def __init__( self, - chunk_size: Union[int, str], - chunk_overlap: Union[int, str] + chunk_size: Union[int, str] = 2000, + chunk_overlap: Union[int, str] = 400 ): """Initialize TextSplitter.""" self.text_splitter = RecursiveCharacterTextSplitter( diff --git a/src/grag/components/vectordb/chroma_client.py b/src/grag/components/vectordb/chroma_client.py index 7047171..758749c 100644 --- a/src/grag/components/vectordb/chroma_client.py +++ b/src/grag/components/vectordb/chroma_client.py @@ -44,11 +44,11 @@ class ChromaClient(VectorDB): def __init__( self, - host: str, - port: str, - collection_name: str, - embedding_type: str, - embedding_model: str, + host: str = 'localhost', + port: Union[str, int] = 8000, + collection_name: str = 'grag', + embedding_type: str = 'instructor-embedding', + embedding_model: str = 'hkunlp/instructor-xl', ): """Initialize a ChromaClient object. diff --git a/src/grag/components/vectordb/deeplake_client.py b/src/grag/components/vectordb/deeplake_client.py index 5c9cbf9..5d4b832 100644 --- a/src/grag/components/vectordb/deeplake_client.py +++ b/src/grag/components/vectordb/deeplake_client.py @@ -40,10 +40,10 @@ class DeepLakeClient(VectorDB): def __init__( self, - collection_name: str, - store_path: Union[str, Path], - embedding_type: str, - embedding_model: str, + collection_name: str = 'grag', + store_path: Union[str, Path] = Path('data/vectordb'), + embedding_type: str = 'instructor-embedding', + embedding_model: str = 'kunlp/instructor-xl', read_only: bool = False, ): """Initialize DeepLake client object.""" From bccad302efd340f32ad09d77c85da7daa7ee5a34 Mon Sep 17 00:00:00 2001 From: Arjun Bingly Date: Sun, 28 Apr 2024 17:55:01 -0400 Subject: [PATCH 2/5] Fix doc strings --- src/grag/components/embedding.py | 2 +- src/grag/components/multivec_retriever.py | 17 +++++++------- src/grag/components/parse_pdf.py | 10 +++++---- src/grag/components/prompt.py | 6 ++--- src/grag/components/text_splitter.py | 5 ++--- src/grag/components/utils.py | 13 ++++++----- src/grag/components/vectordb/base.py | 6 ++--- src/grag/components/vectordb/chroma_client.py | 16 ++++++++------ .../components/vectordb/deeplake_client.py | 22 ++++++++++++++----- 9 files changed, 57 insertions(+), 40 deletions(-) diff --git a/src/grag/components/embedding.py b/src/grag/components/embedding.py index a29e2e2..8b24d45 100644 --- a/src/grag/components/embedding.py +++ b/src/grag/components/embedding.py @@ -2,7 +2,7 @@ This module provides: -- Embedding +— Embedding """ from langchain_community.embeddings import HuggingFaceInstructEmbeddings diff --git a/src/grag/components/multivec_retriever.py b/src/grag/components/multivec_retriever.py index 6ba7b1e..40f9775 100644 --- a/src/grag/components/multivec_retriever.py +++ b/src/grag/components/multivec_retriever.py @@ -2,7 +2,7 @@ This module provides: -- Retriever +— Retriever """ import uuid @@ -30,9 +30,10 @@ class Retriever: linked document, chunk, etc. Attributes: + vectordb: ChromaClient class instance from components.client + (Optional, if the user provides it, store_path, id_key and namespace is not considered) store_path: Path to the local file store id_key: A key prefix for identifying documents - vectordb: ChromaClient class instance from components.client store: langchain.storage.LocalFileStore object, stores the key value pairs of document id and parent file retriever: langchain.retrievers.multi_vector.MultiVectorRetriever class instance, langchain's multi-vector retriever @@ -55,11 +56,11 @@ def __init__( Args: vectordb: Vector DB client instance - store_path: Path to the local file store, defaults to argument from config file - id_key: A key prefix for identifying documents, defaults to argument from config file - namespace: A namespace for producing unique id, defaults to argument from congig file - top_k: Number of top chunks to return from similarity search, defaults to 1 - client_kwargs: kwargs to pass to the vectordb client + store_path: Path to the local file store, defaults to data/doc_store + id_key: A key prefix for identifying documents, defaults to 'doc_id' + namespace: A namespace for producing unique id + top_k: Number of top chunks to return from similarity search, defaults to 3 + client_kwargs: kwargs to pass to the vectordb client constructor, optional, defaults to None """ self.store_path = store_path self.id_key = id_key @@ -89,7 +90,7 @@ def __init__( def id_gen(self, doc: Document) -> str: """Takes a document and returns a unique id (uuid5) using the namespace and document source. - This ensures that a single document always gets the same unique id. + This ensures that a single document always gets the same unique id. Args: doc: langchain_core.documents.Document diff --git a/src/grag/components/parse_pdf.py b/src/grag/components/parse_pdf.py index 13fe28b..5ec4089 100644 --- a/src/grag/components/parse_pdf.py +++ b/src/grag/components/parse_pdf.py @@ -2,7 +2,7 @@ This module provides: -- ParsePDF +— ParsePDF """ from typing import Optional @@ -19,13 +19,14 @@ class ParsePDF: Attributes: single_text_out (bool): Whether to combine all text elements into a single output document. strategy (str): The strategy for PDF partitioning; default is "hi_res" for better accuracy. - extract_image_block_types (list): Elements to be extracted as image blocks. infer_table_structure (bool): Whether to extract tables during partitioning. extract_images (bool): Whether to extract images. image_output_dir (str): Directory to save extracted images, if any. add_captions_to_text (bool): Whether to include figure captions in text output. Default is True. add_captions_to_blocks (bool): Whether to add captions to table and image blocks. Default is True. - add_caption_first (bool): Whether to place captions before their corresponding image or table in the output. Default is True. + add_caption_first (bool): Whether to place captions before their corresponding image or table in the output. + Default is True. + table_as_html (bool): Whether to add table elements as HTML. Default is False. """ def __init__( @@ -37,6 +38,7 @@ def __init__( image_output_dir: Optional[str] = None, add_captions_to_text: bool = True, add_captions_to_blocks: bool = True, + add_caption_first: bool = True, table_as_html: bool = False, ): """Initialize instance variables with parameters.""" @@ -53,7 +55,7 @@ def __init__( self.add_captions_to_blocks = add_captions_to_blocks self.image_output_dir = image_output_dir self.single_text_out = single_text_out - self.add_caption_first = True + self.add_caption_first = add_caption_first self.table_as_html = table_as_html def partition(self, path: str): diff --git a/src/grag/components/prompt.py b/src/grag/components/prompt.py index 7cfd40b..4c43e1a 100644 --- a/src/grag/components/prompt.py +++ b/src/grag/components/prompt.py @@ -2,9 +2,9 @@ This module provides: -- Prompt - for generic prompts +— Prompt: for generic prompts -- FewShotPrompt - for few-shot prompts +— FewShotPrompt: for few-shot prompts """ import json @@ -86,7 +86,7 @@ def __init__(self, **kwargs): ) def save( - self, filepath: Union[Path, str, None], overwrite=False + self, filepath: Union[Path, str, None], overwrite=False ) -> Union[None, ValueError]: """Saves the prompt class into a json file.""" dump = self.model_dump_json(indent=2, exclude_defaults=True, exclude_none=True) diff --git a/src/grag/components/text_splitter.py b/src/grag/components/text_splitter.py index 94a059e..7b8275c 100644 --- a/src/grag/components/text_splitter.py +++ b/src/grag/components/text_splitter.py @@ -2,7 +2,7 @@ This module provides: -- TextSplitter +— TextSplitter """ from typing import Union @@ -25,11 +25,10 @@ def __init__( chunk_size: Union[int, str] = 2000, chunk_overlap: Union[int, str] = 400 ): - """Initialize TextSplitter.""" + """Initialize TextSplitter using chunk_size and chunk_overlap.""" self.text_splitter = RecursiveCharacterTextSplitter( chunk_size=int(chunk_size), chunk_overlap=int(chunk_overlap), length_function=len, is_separator_regex=False, ) - """Initialize TextSplitter using chunk_size and chunk_overlap""" diff --git a/src/grag/components/utils.py b/src/grag/components/utils.py index 991550c..233d79c 100644 --- a/src/grag/components/utils.py +++ b/src/grag/components/utils.py @@ -2,13 +2,15 @@ This module provides: -- stuff_docs: concats langchain documents into string +— stuff_docs: concats langchain documents into string -- load_prompt: loads json prompt to langchain prompt +— load_prompt: loads json prompt to langchain prompt -- find_config_path: finds the path of the 'config.ini' file by traversing up the directory tree from the current path. +— find_config_path: finds the path of the 'config.ini' file by traversing up the directory tree from the current path. -- get_config: retrieves and parses the configuration settings from the 'config.ini' file. +— get_config: retrieves and parses the configuration settings from the 'config.ini' file. + +— configure_args: a decorator to configure class instantiation arguments from a 'config.ini' file. """ import os @@ -38,7 +40,7 @@ def find_config_path(current_path: Path): """Finds the path of the 'config.ini' file by traversing up the directory tree from the current path. This function starts at the current path and moves up the directory tree until it finds a file named 'config.ini'. - If 'config.ini' is not found by the time the root of the directory tree is reached, a FileNotFoundError is raised. + If 'config.ini' is not found by the time the root of the directory tree is reached, None is returned. Args: current_path (Path): The starting point for the search, typically the location of the script being executed. @@ -61,6 +63,7 @@ def get_config(load_env=False): This function locates the 'config.ini' file by calling `find_config_path` using the script's current location. It initializes a `ConfigParser` object to read the configuration settings from the located 'config.ini' file. Optionally, it can also load environment variables from a `.env` file specified in the config. + If a config file cannot be read, a default dictionary is returned. Args: load_env (bool): If True, load environment variables from the path specified in the 'config.ini'. Defaults to False. diff --git a/src/grag/components/vectordb/base.py b/src/grag/components/vectordb/base.py index 420a1b7..ee11c07 100644 --- a/src/grag/components/vectordb/base.py +++ b/src/grag/components/vectordb/base.py @@ -2,7 +2,7 @@ This module provides: -- VectorDB +— VectorDB """ from abc import ABC, abstractmethod @@ -56,7 +56,7 @@ async def aadd_docs(self, docs: List[Document], verbose: bool = True) -> None: @abstractmethod def get_chunk( - self, query: str, with_score: bool = False, top_k: Optional[int] = None + self, query: str, with_score: bool = False, top_k: Optional[int] = None ) -> Union[List[Document], List[Tuple[Document, float]]]: """Returns the most similar chunks from the vector database. @@ -72,7 +72,7 @@ def get_chunk( @abstractmethod async def aget_chunk( - self, query: str, with_score: bool = False, top_k: Optional[int] = None + self, query: str, with_score: bool = False, top_k: Optional[int] = None ) -> Union[List[Document], List[Tuple[Document, float]]]: """Returns the most similar chunks from the vector database (asynchronous). diff --git a/src/grag/components/vectordb/chroma_client.py b/src/grag/components/vectordb/chroma_client.py index 758749c..ddb80f2 100644 --- a/src/grag/components/vectordb/chroma_client.py +++ b/src/grag/components/vectordb/chroma_client.py @@ -2,7 +2,7 @@ This module provides: -- ChromaClient +— ChromaClient """ from typing import List, Optional, Tuple, Union @@ -24,7 +24,7 @@ class ChromaClient(VectorDB): Attributes: host : str IP Address of hosted Chroma Vectorstore - port : str + port : str or int port address of hosted Chroma Vectorstore collection_name : str name of the collection in the Chroma Vectorstore, each ChromaClient connects to a single collection @@ -53,11 +53,13 @@ def __init__( """Initialize a ChromaClient object. Args: - host: IP Address of hosted Chroma Vectorstore, defaults to argument from config file - port: port address of hosted Chroma Vectorstore, defaults to argument from config file - collection_name: name of the collection in the Chroma Vectorstore, defaults to argument from config file - embedding_type: type of embedding used, supported 'sentence-transformers' and 'instructor-embedding', defaults to argument from config file - embedding_model: model name of embedding used, should correspond to the embedding_type, defaults to argument from config file + host: IP Address of hosted Chroma Vectorstore, defaults to localhost + port: port address of hosted Chroma Vectorstore, defaults to 8000 + collection_name: name of the collection in the Chroma Vectorstore, defaults to 'grag' + embedding_type: type of embedding used, supported 'sentence-transformers' and 'instructor-embedding', + defaults to instructor-embedding + embedding_model: model name of embedding used, should correspond to the embedding_type, + defaults to hkunlp/instructor-xl. """ self.host = host self.port = port diff --git a/src/grag/components/vectordb/deeplake_client.py b/src/grag/components/vectordb/deeplake_client.py index 5d4b832..1eb57e1 100644 --- a/src/grag/components/vectordb/deeplake_client.py +++ b/src/grag/components/vectordb/deeplake_client.py @@ -2,7 +2,7 @@ This module provides: -- DeepLakeClient +— DeepLakeClient """ from pathlib import Path @@ -32,21 +32,31 @@ class DeepLakeClient(VectorDB): a function of the embedding model, derived from the embedding_type and embedding_modelname client: deeplake.core.vectorstore.VectorStore DeepLake API - collection - Chroma API for the collection + collection_name: str + The name of the collection where the vectors are stored. langchain_client: langchain_community.vectorstores.DeepLake - LangChain wrapper for DeepLake API + LangChain wrapper for DeepLake API. """ def __init__( self, - collection_name: str = 'grag', store_path: Union[str, Path] = Path('data/vectordb'), + collection_name: str = 'grag', embedding_type: str = 'instructor-embedding', embedding_model: str = 'kunlp/instructor-xl', read_only: bool = False, ): - """Initialize DeepLake client object.""" + """Initialize a DeepLakeClient object. + + Args: + store_path: path to the deeplake vectorstore, defaults to 'data/vectordb' + collection_name: name of the collection in the DeepLake Vectorstore, defaults to 'grag' + embedding_type: type of embedding used, supported 'sentence-transformers' and 'instructor-embedding', + defaults to instructor-embedding + embedding_model: model name of embedding used, should correspond to the embedding_type, + defaults to hkunlp/instructor-xl + read_only: flag indicating whether the client is read-only, defaults to False. + """ self.store_path = Path(store_path) self.collection_name = collection_name self.read_only = read_only From 911165dc5db2e1c8f5d818f279eea13a0c2097a7 Mon Sep 17 00:00:00 2001 From: Arjun Bingly Date: Sun, 28 Apr 2024 23:02:26 -0400 Subject: [PATCH 3/5] Add config.ini with defaults --- default_config.ini | 62 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) create mode 100644 default_config.ini diff --git a/default_config.ini b/default_config.ini new file mode 100644 index 0000000..5428c80 --- /dev/null +++ b/default_config.ini @@ -0,0 +1,62 @@ +; This is the default config.ini file generated by GRAG +; All values are same as package defaults +; Values that do not have a default value are commented out + +[llm] +;model_name : Llama-2-13b-chat +;quantization : Q5_K_M +;pipeline : llama_cpp +device_map : auto +task : text-generation +max_new_tokens : 1024 +temperature : 0.1 +n_batch : 1024 +n_ctx : 6000 +n_gpu_layers : -1 +std_out : True +base_dir : ${root:root_path}/models + +[chroma_client] +host : localhost +port : 8000 +collection_name : grag +embedding_type : instructor-embedding +embedding_model : hkunlp/instructor-xl + +[deeplake_client] +collection_name : grag +embedding_type : instructor-embedding +embedding_model : hkunlp/instructor-xl +store_path : ${data:data_path}/vectordb + +[text_splitter] +chunk_size : 2000 +chunk_overlap : 400 + +[multivec_retriever] +store_path : ${data:data_path}/doc_store +top_k : 3 +id_key : doc_id +namespace : 71e4b558187b270922923569301f1039 + +[parse_pdf] +single_text_out : True +strategy : hi_res +infer_table_structure : True +extract_images : True +image_output_dir : None +add_captions_to_text : True +add_captions_to_blocks : True +table_as_html : False + +[data] +data_path : ${root:root_path}/data + +[env] +env_path : ${root:root_path}/.env + +[root] +root_path : ~/Capstone_5 ; check if ~ works + +[quantize] +llama_cpp_path : ${root:root_path} From fc3f5f7594a4a8b174e6f6d85a9e53c778d91f08 Mon Sep 17 00:00:00 2001 From: sanchitvj Date: Mon, 29 Apr 2024 14:27:43 -0400 Subject: [PATCH 4/5] modified docstrings --- default_config.ini | 3 +- src/grag/components/llm.py | 73 +++++++++++++++++++++++++------------- 2 files changed, 51 insertions(+), 25 deletions(-) diff --git a/default_config.ini b/default_config.ini index 5428c80..c1ea6c3 100644 --- a/default_config.ini +++ b/default_config.ini @@ -56,7 +56,8 @@ data_path : ${root:root_path}/data env_path : ${root:root_path}/.env [root] -root_path : ~/Capstone_5 ; check if ~ works +root_path : ~/Capstone_5 +; check if ~ works [quantize] llama_cpp_path : ${root:root_path} diff --git a/src/grag/components/llm.py b/src/grag/components/llm.py index 2799b8b..3560387 100644 --- a/src/grag/components/llm.py +++ b/src/grag/components/llm.py @@ -22,34 +22,59 @@ class LLM: """A class for managing and utilizing large language models (LLMs). + This class facilitates the loading and operation of large language models using different pipelines and settings. + It supports both local and Hugging Face-based model management, with adjustable parameters for quantization, + computational specifics, and output control. + Attributes: model_name (str): Name of the model to be loaded. - device_map (dict): Device mapping for model execution. - task (str): The task for which the model is being used. - max_new_tokens (int): Maximum new tokens to be generated. - temperature (float): Sampling temperature for generation. - n_batch (int): Number of batches for GPU CPP. - n_ctx (int): Context size for CPP. - n_gpu_layers (int): Number of GPU layers for CPP. + quantization (str): Quantization setting for the model, affecting performance and memory usage. + pipeline (str): Type of pipeline ('llama_cpp' or 'hf') used for model operations. + device_map (str): Device mapping for model execution, defaults to 'auto'. + task (str): The task for which the model is being used, defaults to 'text-generation'. + max_new_tokens (int): Maximum number of new tokens to be generated, defaults to 1024. + temperature (float): Sampling temperature for generation, affecting randomness. + n_batch (int): Number of batches for GPU CPP, impacting batch processing. + n_ctx (int): Context size for CPP, defining the extent of context considered. + n_gpu_layers (int): Number of GPU layers for CPP, specifying computational depth. + std_out (bool or str): Flag or descriptor for standard output during operations. + base_dir (str or Path): Base directory path for model files, defaults to 'models'. + callbacks (list or None): List of callback functions for additional processing. """ def __init__( - self, - model_name: str, - quantization: str, - pipeline: str, - device_map: str = 'auto', - task: str = 'text-generation', - max_new_tokens: str = '1024', - temperature: Union[str, int] = 0.1, - n_batch: Union[str, int] = 1024, - n_ctx: Union[str, int] = 6000, - n_gpu_layers: Union[str, int] = -1, - std_out: Union[bool, str] = True, - base_dir: Union[str, Path] = Path('models'), - callbacks=None, + self, + model_name: str, + quantization: str, + pipeline: str, + device_map: str = 'auto', + task: str = 'text-generation', + max_new_tokens: str = '1024', + temperature: Union[str, int] = 0.1, + n_batch: Union[str, int] = 1024, + n_ctx: Union[str, int] = 6000, + n_gpu_layers: Union[str, int] = -1, + std_out: Union[bool, str] = True, + base_dir: Union[str, Path] = Path('models'), + callbacks=None, ): - """Initialize the LLM class using the given parameters.""" + """Initialize the LLM class using the given parameters. + + Args: + model_name (str): Specifies the model name. + quantization (str): Sets the model's quantization configuration. + pipeline (str): Determines which pipeline to use for model operations. + device_map (str, optional): Device configuration for model deployment. + task (str, optional): Defines the specific task or use-case of the model. + max_new_tokens (int, optional): Limits the number of tokens generated in one operation. + temperature (float, optional): Controls the generation randomness. + n_batch (int, optional): Adjusts batch processing size. + n_ctx (int, optional): Configures the context size used in model operations. + n_gpu_layers (int, optional): Sets the depth of computation in GPU layers. + std_out (bool or str, optional): Manages standard output settings. + base_dir (str or Path, optional): Specifies the directory for storing model files. + callbacks (list, optional): Provides custom callback functions for runtime. + """ self.base_dir = Path(base_dir) self._model_name = model_name self.quantization = quantization @@ -159,8 +184,8 @@ def llama_cpp(self): return llm def load_model( - self, model_name: Optional[str] = None, pipeline: Optional[str] = None, quantization: Optional[str] = None, - is_local: Optional[bool] = None + self, model_name: Optional[str] = None, pipeline: Optional[str] = None, quantization: Optional[str] = None, + is_local: Optional[bool] = None ): """Loads the model based on the specified pipeline and model name. From 1cfe5cfaef43a01d56661a13fe0a658a597d8d23 Mon Sep 17 00:00:00 2001 From: Sanchit Vijay Date: Tue, 30 Apr 2024 16:24:47 -0400 Subject: [PATCH 5/5] Delete config_Def.ini --- config_Def.ini | 68 -------------------------------------------------- 1 file changed, 68 deletions(-) delete mode 100644 config_Def.ini diff --git a/config_Def.ini b/config_Def.ini deleted file mode 100644 index d39e345..0000000 --- a/config_Def.ini +++ /dev/null @@ -1,68 +0,0 @@ -[llm] -model_name : Llama-2-13b-chat -# meta-llama/Llama-2-70b-chat-hf Mixtral-8x7B-Instruct-v0.1 -quantization : Q5_K_M -pipeline : llama_cpp -device_map : auto -task : text-generation -max_new_tokens : 1024 -temperature : 0.1 -n_batch : 1024 -n_ctx : 6000 -n_gpu_layers : -1 -# The number of layers to put on the GPU. Mixtral-18, gemma-20 -std_out : True -base_dir : ${root:root_path}/models - -[chroma_client] -host : localhost -port : 8000 -collection_name : arxiv -# embedding_type : sentence-transformers -# embedding_model : "all-mpnet-base-v2" -embedding_type : instructor-embedding -embedding_model : hkunlp/instructor-xl -;store_path : ${data:data_path}/vectordb -;allow_reset : True - -[deeplake_client] -collection_name : arxiv -# embedding_type : sentence-transformers -# embedding_model : "all-mpnet-base-v2" -embedding_type : instructor-embedding -embedding_model : hkunlp/instructor-xl -store_path : ${data:data_path}/vectordb - -[text_splitter] -chunk_size : 5000 -chunk_overlap : 400 - -[multivec_retriever] -# store_path: data/docs -store_path : ${data:data_path}/doc_store -# namespace: UUID(8c9040b0-b5cd-4d7c-bc2e-737da1b24ebf) -namespace : 8c9040b0b5cd4d7cbc2e737da1b24ebf -id_key : doc_id -top_k : 3 - -[parse_pdf] -single_text_out : True -strategy : hi_res -infer_table_structure : True -extract_images : True -image_output_dir : None -add_captions_to_text : True -add_captions_to_blocks : True -table_as_html : True - -[data] -data_path : ${root:root_path}/data - -[env] -env_path : ${root:root_path}/.env - -[root] -root_path : /home/ubuntu/volume_2k/Capstone_5 - -[quantize] -llama_cpp_path : ${root:root_path}