diff --git a/README.md b/README.md index 17f69b9..451b10c 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,17 @@ + + ![Supported python versions](https://img.shields.io/badge/python-3.8%20%7C%203.9%20%7C%203.10%20%7C%203.11-blue) [![PEP8](https://img.shields.io/badge/code%20style-pep8-black.svg)](https://www.python.org/dev/peps/pep-0008/) [![License](https://img.shields.io/badge/License-MIT%202.0-blue.svg)](LICENSE) [![Run Pytest](https://github.com/samadpls/BestRAG/actions/workflows/pytest.yml/badge.svg?branch=main)](https://github.com/samadpls/BestRAG/actions/workflows/pytest.yml) - +![GitHub stars](https://img.shields.io/github/stars/samadpls/BestRAG?color=red&label=stars&logoColor=black&style=social) + Welcome to **BestRAG**! This Python library enables you to efficiently store and retrieve embeddings using a hybrid Retrieval-Augmented Generation (RAG) approach. It combines dense, sparse, and late interaction embeddings to provide a robust solution for handling large datasets. +--- + ## 🚀 Installation To install **BestRAG**, simply run: @@ -54,4 +59,4 @@ This project is licensed under the [MIT License](LICENSE). --- -Created by [samadpls](https://github.com/samadpls) 🎉 \ No newline at end of file +Created by [samadpls](https://github.com/samadpls) 🎉 diff --git a/bestrag/best_rag.py b/bestrag/best_rag.py index 77ff6da..062a960 100644 --- a/bestrag/best_rag.py +++ b/bestrag/best_rag.py @@ -28,10 +28,10 @@ class BestRAG: interaction model to use. Defaults to "BAAI/bge-small-en-v1.5". """ - def __init__(self, - url: str, - api_key: str, - collection_name: str, + def __init__(self, + url: str, + api_key: str, + collection_name: str, late_interaction_model_name: Optional[str] = "BAAI/bge-small-en-v1.5" ): self.collection_name = collection_name @@ -57,9 +57,9 @@ def _create_or_use_collection(self): collection_name=self.collection_name, vectors_config={ "dense-vector": models.VectorParams( - size=384, + size=384, distance=Distance.COSINE - ), + ), "output-token-embeddings": models.VectorParams( size=384, distance=Distance.COSINE, @@ -180,6 +180,7 @@ def store_pdf_embeddings(self, pdf_path: str): collection_name=self.collection_name, points=[point] ) + print( f"Stored embedding for page {page_num + 1} in collection '{self.collection_name}'.") diff --git a/setup.py b/setup.py index 4ce21ae..0a2f26c 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setup( name="bestrag", - version="0.1.3", + version="0.2.0", description="BestRAG (Best Retrieval Augmented) is a library for storing and" " searching document embeddings in a Qdrant vector database. It uses a " "hybrid embedding technique combining dense, late interaction and sparse representations for better performance.", @@ -13,7 +13,7 @@ author_email="abdulsamadsid1@gmail.com", long_description=long_description, long_description_content_type="text/markdown", - url="https://github.com/samadpls/bestrag", + url="https://github.com/samadpls/bestRAG", packages=find_packages(), install_requires=[ "fastembed==0.4.1",