Skip to content

Commit

Permalink
Merge pull request #39 from arjbingly/sanchitvj-patch-2
Browse files Browse the repository at this point in the history
Update ruff_commit.yml
  • Loading branch information
arjbingly authored Mar 20, 2024
2 parents e91ea84 + b378f56 commit c07329c
Show file tree
Hide file tree
Showing 17 changed files with 357 additions and 292 deletions.
7 changes: 3 additions & 4 deletions .github/workflows/ruff_commit.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,13 @@ on: push

jobs:
lint:
runs-on: ubuntu_latest
runs-on: self-hosted
steps:
- uses: actions/checkout@v2
- uses: actions/setup-python@v2
- run: pip install ruff
- run: |
ruff check src/
ruff fix src/
# - run: ruff check src/
- run: ruff format src/
- uses: stefanzweifel/git-auto-commit-action@v4
with:
commit_message: 'style fixes by ruff'
47 changes: 30 additions & 17 deletions src/grag/components/chroma_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from grag.components.embedding import Embedding
from grag.components.utils import get_config

chroma_conf = get_config()['chroma']
chroma_conf = get_config()["chroma"]


class ChromaClient:
Expand All @@ -37,12 +37,14 @@ class ChromaClient:
LangChain wrapper for Chroma collection
"""

def __init__(self,
host=chroma_conf['host'],
port=chroma_conf['port'],
collection_name=chroma_conf['collection_name'],
embedding_type=chroma_conf['embedding_type'],
embedding_model=chroma_conf['embedding_model']):
def __init__(
self,
host=chroma_conf["host"],
port=chroma_conf["port"],
collection_name=chroma_conf["collection_name"],
embedding_type=chroma_conf["embedding_type"],
embedding_model=chroma_conf["embedding_model"],
):
"""Args:
host: IP Address of hosted Chroma Vectorstore, defaults to argument from config file
port: port address of hosted Chroma Vectorstore, defaults to argument from config file
Expand All @@ -56,14 +58,19 @@ def __init__(self,
self.embedding_type: str = embedding_type
self.embedding_model: str = embedding_model

self.embedding_function = Embedding(embedding_model=self.embedding_model,
embedding_type=self.embedding_type).embedding_function
self.embedding_function = Embedding(
embedding_model=self.embedding_model, embedding_type=self.embedding_type
).embedding_function

self.chroma_client = chromadb.HttpClient(host=self.host, port=self.port)
self.collection = self.chroma_client.get_or_create_collection(name=self.collection_name)
self.langchain_chroma = Chroma(client=self.chroma_client,
collection_name=self.collection_name,
embedding_function=self.embedding_function, )
self.collection = self.chroma_client.get_or_create_collection(
name=self.collection_name
)
self.langchain_chroma = Chroma(
client=self.chroma_client,
collection_name=self.collection_name,
embedding_function=self.embedding_function,
)
self.allowed_metadata_types = (str, int, float, bool)

def test_connection(self, verbose=True):
Expand All @@ -78,9 +85,9 @@ def test_connection(self, verbose=True):
response = self.chroma_client.heartbeat()
if verbose:
if response:
print(f'Connection to {self.host}/{self.port} is alive..')
print(f"Connection to {self.host}/{self.port} is alive..")
else:
print(f'Connection to {self.host}/{self.port} is not alive !!')
print(f"Connection to {self.host}/{self.port} is not alive !!")
return response

async def aadd_docs(self, docs: List[Document], verbose=True):
Expand All @@ -100,7 +107,11 @@ async def aadd_docs(self, docs: List[Document], verbose=True):
# else:
# await asyncio.gather(*tasks)
if verbose:
for doc in atqdm(docs, desc=f'Adding documents to {self.collection_name}', total=len(docs)):
for doc in atqdm(
docs,
desc=f"Adding documents to {self.collection_name}",
total=len(docs),
):
await self.langchain_chroma.aadd_documents([doc])
else:
for doc in docs:
Expand All @@ -117,7 +128,9 @@ def add_docs(self, docs: List[Document], verbose=True):
None
"""
docs = self._filter_metadata(docs)
for doc in (tqdm(docs, desc=f'Adding to {self.collection_name}:') if verbose else docs):
for doc in (
tqdm(docs, desc=f"Adding to {self.collection_name}:") if verbose else docs
):
_id = self.langchain_chroma.add_documents([doc])

def _filter_metadata(self, docs: List[Document]):
Expand Down
16 changes: 10 additions & 6 deletions src/grag/components/embedding.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,15 @@ def __init__(self, embedding_type: str, embedding_model: str):
self.embedding_type = embedding_type
self.embedding_model = embedding_model
match self.embedding_type:
case 'sentence-transformers':
self.embedding_function = SentenceTransformerEmbeddings(model_name=self.embedding_model)
case 'instructor-embedding':
self.embedding_instruction = 'Represent the document for retrival'
self.embedding_function = HuggingFaceInstructEmbeddings(model_name=self.embedding_model)
case "sentence-transformers":
self.embedding_function = SentenceTransformerEmbeddings(
model_name=self.embedding_model
)
case "instructor-embedding":
self.embedding_instruction = "Represent the document for retrival"
self.embedding_function = HuggingFaceInstructEmbeddings(
model_name=self.embedding_model
)
self.embedding_function.embed_instruction = self.embedding_instruction
case _:
raise Exception('embedding_type is invalid')
raise Exception("embedding_type is invalid")
104 changes: 56 additions & 48 deletions src/grag/components/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

from .utils import get_config

llm_conf = get_config()['llm']
llm_conf = get_config()["llm"]

print("CUDA: ", torch.cuda.is_available())

Expand All @@ -35,20 +35,21 @@ class LLM:
n_gpu_layers (int): Number of GPU layers for CPP.
"""

def __init__(self,
model_name=llm_conf["model_name"],
device_map=llm_conf["device_map"],
task=llm_conf["task"],
max_new_tokens=llm_conf["max_new_tokens"],
temperature=llm_conf["temperature"],
n_batch=llm_conf["n_batch_gpu_cpp"],
n_ctx=llm_conf["n_ctx_cpp"],
n_gpu_layers=llm_conf["n_gpu_layers_cpp"],
std_out=llm_conf["std_out"],
base_dir=llm_conf["base_dir"],
quantization=llm_conf["quantization"],
pipeline=llm_conf["pipeline"],
):
def __init__(
self,
model_name=llm_conf["model_name"],
device_map=llm_conf["device_map"],
task=llm_conf["task"],
max_new_tokens=llm_conf["max_new_tokens"],
temperature=llm_conf["temperature"],
n_batch=llm_conf["n_batch_gpu_cpp"],
n_ctx=llm_conf["n_ctx_cpp"],
n_gpu_layers=llm_conf["n_gpu_layers_cpp"],
std_out=llm_conf["std_out"],
base_dir=llm_conf["base_dir"],
quantization=llm_conf["quantization"],
pipeline=llm_conf["pipeline"],
):
self.base_dir = Path(base_dir)
self._model_name = model_name
self.quantization = quantization
Expand All @@ -74,7 +75,8 @@ def model_name(self):
def model_path(self):
"""Sets the model name."""
return str(
self.base_dir / self.model_name / f'ggml-model-{self.quantization}.gguf')
self.base_dir / self.model_name / f"ggml-model-{self.quantization}.gguf"
)

@model_name.setter
def model_name(self, value):
Expand All @@ -92,46 +94,54 @@ def hf_pipeline(self, is_local=False):
else:
hf_model = self.model_name
match self.quantization:
case 'Q8':
case "Q8":
quantization_config = BitsAndBytesConfig(load_in_8bit=True)
case 'Q4':
case "Q4":
quantization_config = BitsAndBytesConfig(load_in_4bit=True)
case _:
raise ValueError(
f'{self.quantization} is not a valid quantization. Non-local hf_pipeline takes only Q4 and Q8.')
f"{self.quantization} is not a valid quantization. Non-local hf_pipeline takes only Q4 and Q8."
)

try:
# Try to load the model without passing the token
tokenizer = AutoTokenizer.from_pretrained(hf_model)
model = AutoModelForCausalLM.from_pretrained(hf_model,
quantization_config=quantization_config,
device_map=self.device_map,
torch_dtype=torch.float16, )
model = AutoModelForCausalLM.from_pretrained(
hf_model,
quantization_config=quantization_config,
device_map=self.device_map,
torch_dtype=torch.float16,
)
except OSError: # LocalTokenNotFoundError:
# If loading fails due to an auth token error, then load the token and retry
load_dotenv()
auth_token = os.getenv("AUTH_TOKEN")
if not auth_token:
raise ValueError("Authentication token not provided.")
tokenizer = AutoTokenizer.from_pretrained(hf_model, token=True)
model = AutoModelForCausalLM.from_pretrained(hf_model,
quantization_config=quantization_config,
device_map=self.device_map,
torch_dtype=torch.float16,
token=True)

pipe = pipeline(self.task,
model=model,
tokenizer=tokenizer,
torch_dtype=torch.bfloat16,
device_map=self.device_map,
max_new_tokens=self.max_new_tokens,
do_sample=True,
top_k=10,
num_return_sequences=1,
eos_token_id=tokenizer.eos_token_id
)
llm = HuggingFacePipeline(pipeline=pipe, model_kwargs={'temperature': self.temperature})
model = AutoModelForCausalLM.from_pretrained(
hf_model,
quantization_config=quantization_config,
device_map=self.device_map,
torch_dtype=torch.float16,
token=True,
)

pipe = pipeline(
self.task,
model=model,
tokenizer=tokenizer,
torch_dtype=torch.bfloat16,
device_map=self.device_map,
max_new_tokens=self.max_new_tokens,
do_sample=True,
top_k=10,
num_return_sequences=1,
eos_token_id=tokenizer.eos_token_id,
)
llm = HuggingFacePipeline(
pipeline=pipe, model_kwargs={"temperature": self.temperature}
)
return llm

def llama_cpp(self):
Expand All @@ -149,11 +159,9 @@ def llama_cpp(self):
)
return llm

def load_model(self,
model_name=None,
pipeline=None,
quantization=None,
is_local=None):
def load_model(
self, model_name=None, pipeline=None, quantization=None, is_local=None
):
"""Loads the model based on the specified pipeline and model name.
Args:
Expand All @@ -172,7 +180,7 @@ def load_model(self,
is_local = False

match self.pipeline:
case 'llama_cpp':
case "llama_cpp":
return self.llama_cpp()
case 'hf':
case "hf":
return self.hf_pipeline(is_local=is_local)
31 changes: 14 additions & 17 deletions src/grag/components/multivec_retriever.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from grag.components.text_splitter import TextSplitter
from grag.components.utils import get_config

multivec_retriever_conf = get_config()['multivec_retriever']
multivec_retriever_conf = get_config()["multivec_retriever"]


class Retriever:
Expand All @@ -30,11 +30,13 @@ class Retriever:
"""

def __init__(self,
store_path: str = multivec_retriever_conf['store_path'],
id_key: str = multivec_retriever_conf['id_key'],
namespace: str = multivec_retriever_conf['namespace'],
top_k=1):
def __init__(
self,
store_path: str = multivec_retriever_conf["store_path"],
id_key: str = multivec_retriever_conf["id_key"],
namespace: str = multivec_retriever_conf["namespace"],
top_k=1,
):
"""Args:
store_path: Path to the local file store, defaults to argument from config file
id_key: A key prefix for identifying documents, defaults to argument from config file
Expand All @@ -53,7 +55,7 @@ def __init__(self,
)
self.splitter = TextSplitter()
self.top_k: int = top_k
self.retriever.search_kwargs = {'k': self.top_k}
self.retriever.search_kwargs = {"k": self.top_k}

def id_gen(self, doc: Document) -> str:
"""Takes a document and returns a unique id (uuid5) using the namespace and document source.
Expand All @@ -65,7 +67,7 @@ def id_gen(self, doc: Document) -> str:
Returns:
string of hexadecimal uuid
"""
return uuid.uuid5(self.namespace, doc.metadata['source']).hex
return uuid.uuid5(self.namespace, doc.metadata["source"]).hex

def gen_doc_ids(self, docs: List[Document]) -> List[str]:
"""Takes a list of documents and produces a list of unique id, refer id_gen method for more details.
Expand Down Expand Up @@ -144,15 +146,12 @@ def get_chunk(self, query: str, with_score=False, top_k=None):
"""
if with_score:

return self.client.langchain_chroma.similarity_search_with_relevance_scores(
query=query,
**{'k': top_k} if top_k else self.retriever.search_kwargs
query=query, **{"k": top_k} if top_k else self.retriever.search_kwargs
)
else:
return self.client.langchain_chroma.similarity_search(
query=query,
**{'k': top_k} if top_k else self.retriever.search_kwargs
query=query, **{"k": top_k} if top_k else self.retriever.search_kwargs
)

async def aget_chunk(self, query: str, with_score=False, top_k=None):
Expand All @@ -169,13 +168,11 @@ async def aget_chunk(self, query: str, with_score=False, top_k=None):
"""
if with_score:
return await self.client.langchain_chroma.asimilarity_search_with_relevance_scores(
query=query,
**{'k': top_k} if top_k else self.retriever.search_kwargs
query=query, **{"k": top_k} if top_k else self.retriever.search_kwargs
)
else:
return await self.client.langchain_chroma.asimilarity_search(
query=query,
**{'k': top_k} if top_k else self.retriever.search_kwargs
query=query, **{"k": top_k} if top_k else self.retriever.search_kwargs
)

def get_doc(self, query: str):
Expand Down
Loading

0 comments on commit c07329c

Please sign in to comment.