Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Community][fix] Fix Azure cosmos db no SQL similarity search with score and mmr #28479

Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
from __future__ import annotations

import uuid
import warnings
from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Tuple
from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Tuple, Callable

import numpy as np
from langchain_core.documents import Document
from langchain_core.embeddings import Embeddings
from langchain_core.vectorstores import VectorStore

from langchain_community.vectorstores.utils import maximal_marginal_relevance

if TYPE_CHECKING:

Check failure on line 14 in libs/community/langchain_community/vectorstores/azure_cosmos_db_no_sql.py

View workflow job for this annotation

GitHub Actions / cd libs/community / make lint #3.13

Ruff (I001)

langchain_community/vectorstores/azure_cosmos_db_no_sql.py:1:1: I001 Import block is un-sorted or un-formatted

Check failure on line 14 in libs/community/langchain_community/vectorstores/azure_cosmos_db_no_sql.py

View workflow job for this annotation

GitHub Actions / cd libs/community / make lint #3.9

Ruff (I001)

langchain_community/vectorstores/azure_cosmos_db_no_sql.py:1:1: I001 Import block is un-sorted or un-formatted
from azure.cosmos.cosmos_client import CosmosClient


Expand Down Expand Up @@ -121,6 +121,8 @@
self._embedding_key = self._vector_embedding_policy["vectorEmbeddings"][0][
"path"
][1:]
self._distance_strategy = self._vector_embedding_policy[
'vectorEmbeddings'][0]['distanceFunction']

def add_texts(
self,
Expand Down Expand Up @@ -260,6 +262,28 @@
raise ValueError("No document ids provided to delete.")
self._container.delete_item(document_id, partition_key=document_id)

def _select_relevance_score_fn(self) -> Callable[[float], float]:
"""
The 'correct' relevance function

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

may differ depending on a few things, including:
- the distance / similarity metric used by the VectorStore
- the scale of your embeddings (OpenAI's are unit normed. Many others are not!)
- embedding dimensionality
- etc.
"""
if self._distance_strategy == 'cosine':
return self._cosine_relevance_score_fn
elif self._distance_strategy == "euclidean":
# Default behavior is to use euclidean distance relevancy
return self._euclidean_relevance_score_fn
elif self._distance_strategy == "dot product":
return self._max_inner_product_relevance_score_fn
else:
raise ValueError(
"Unknown distance strategy, must be cosine, max_inner_product,"
" or euclidean"
)

def _similarity_search_with_score(
self,
embeddings: List[float],
Expand All @@ -274,7 +298,7 @@
query += "TOP @limit "

query += (
"c.id, c[@embeddingKey], c.text, c.metadata, "
f"c.id, c[@embeddingKey] as embeddingKey, c.text, c.metadata, "

Check failure on line 301 in libs/community/langchain_community/vectorstores/azure_cosmos_db_no_sql.py

View workflow job for this annotation

GitHub Actions / cd libs/community / make lint #3.13

Ruff (F541)

langchain_community/vectorstores/azure_cosmos_db_no_sql.py:301:13: F541 f-string without any placeholders

Check failure on line 301 in libs/community/langchain_community/vectorstores/azure_cosmos_db_no_sql.py

View workflow job for this annotation

GitHub Actions / cd libs/community / make lint #3.9

Ruff (F541)

langchain_community/vectorstores/azure_cosmos_db_no_sql.py:301:13: F541 f-string without any placeholders
"VectorDistance(c[@embeddingKey], @embeddings) AS SimilarityScore FROM c"
)

Expand Down Expand Up @@ -305,7 +329,7 @@
metadata = item["metadata"]
score = item["SimilarityScore"]
if with_embedding:
metadata[self._embedding_key] = item[self._embedding_key]
metadata[self._embedding_key] = item["embeddingKey"]
docs_and_scores.append(
(Document(page_content=text, metadata=metadata), score)
)
Expand Down
Loading