Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Oracle DB as vector store #995

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions application/core/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,9 @@ class Settings(BaseSettings):
QDRANT_PATH: Optional[str] = None
QDRANT_DISTANCE_FUNC: str = "Cosine"

# ORACLE_URI="your_username/your_password@localhost:1521/docsgpt"
ORACLE_URI: Optional[str] = None # Oracle DB connection string

BRAVE_SEARCH_API_KEY: Optional[str] = None

FLASK_DEBUG_MODE: bool = False
Expand Down
1 change: 1 addition & 0 deletions application/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ javalang==0.13.0
langchain==0.1.4
langchain-openai==0.0.5
openapi3_parser==1.1.16
oracledb==2.2.1
pandas==2.2.0
pydantic_settings==2.1.0
pymongo==4.6.3
Expand Down
101 changes: 101 additions & 0 deletions application/vectorstore/oracledb.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
# oracle_db.py
from application.vectorstore.base import BaseVectorStore
from application.core.settings import settings
from application.vectorstore.document_class import Document


class OracleDBVectorStore(BaseVectorStore):
def __init__(
self,
embeddings_key: str = "embeddings",
table: str = "documents",
text_key: str = "text",
embedding_key: str = "embedding",
database: str = "docsgpt",
):
self._table = table
self._text_key = text_key
self._embedding_key = embedding_key
self._embeddings_key = embeddings_key
self._oracle_uri = settings.ORACLE_URI
self._embedding = self._get_embeddings(settings.EMBEDDINGS_NAME, embeddings_key)

try:
import oracledb
except ImportError:
raise ImportError(
"Could not import oracledb python package. "
"Please install it with `pip install oracledb`."
)

self._connection = oracledb.connect(self._oracle_uri)
self._cursor = self._connection.cursor()

def search(self, question, k=2, *args, **kwargs):
query_vector = self._embedding.embed_query(question)

query = f"""
SELECT {self._text_key}, {self._embedding_key}, METADATA
FROM {self._table}
ORDER BY SDO_GEOM.SDO_DISTANCE(SDO_GEOMETRY({query_vector}),
SDO_GEOMETRY({self._embedding_key})) ASC
FETCH FIRST {k} ROWS ONLY
"""

self._cursor.execute(query)
results = []
for row in self._cursor.fetchall():
text, embedding, metadata = row
results.append(Document(text, metadata))
return results

def _insert_texts(self, texts, metadatas):
if not texts:
return []

embeddings = self._embedding.embed_documents(texts)
to_insert = [
(t, embedding, m) for t, m, embedding in zip(texts, metadatas, embeddings)
]

query = f"""
INSERT INTO {self._table} ({self._text_key}, {self._embedding_key}, METADATA)
VALUES (:1, :2, :3)
"""

self._cursor.executemany(query, to_insert)
self._connection.commit()
return [i[0] for i in self._cursor.fetchall()]

def add_texts(
self,
texts,
metadatas=None,
ids=None,
refresh_indices=True,
create_index_if_not_exists=True,
bulk_kwargs=None,
**kwargs,
):
batch_size = 100
_metadatas = metadatas or ({} for _ in texts)
texts_batch = []
metadatas_batch = []
result_ids = []

for i, (text, metadata) in enumerate(zip(texts, _metadatas)):
texts_batch.append(text)
metadatas_batch.append(metadata)
if (i + 1) % batch_size == 0:
result_ids.extend(self._insert_texts(texts_batch, metadatas_batch))
texts_batch = []
metadatas_batch = []

if texts_batch:
result_ids.extend(self._insert_texts(texts_batch, metadatas_batch))
return result_ids

def delete_index(self, *args, **kwargs):
query = f"DELETE FROM {self._table} WHERE 1=1"
self._cursor.execute(query)
self._connection.commit()
2 changes: 2 additions & 0 deletions application/vectorstore/vector_creator.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from application.vectorstore.elasticsearch import ElasticsearchStore
from application.vectorstore.mongodb import MongoDBVectorStore
from application.vectorstore.qdrant import QdrantStore
from application.vectorstore.oracledb import OracleDBVectorStore


class VectorCreator:
Expand All @@ -10,6 +11,7 @@ class VectorCreator:
"elasticsearch": ElasticsearchStore,
"mongodb": MongoDBVectorStore,
"qdrant": QdrantStore,
"oracledb": OracleDBVectorStore,
}

@classmethod
Expand Down
Loading