Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

couchbase: Add document id to vector search results #27622

Merged
merged 3 commits into from
Oct 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion libs/partners/couchbase/langchain_couchbase/vectorstores.py
Original file line number Diff line number Diff line change
Expand Up @@ -559,12 +559,13 @@ def similarity_search_with_score_by_vector(
# Parse the results
for row in search_iter.rows():
text = row.fields.pop(self._text_key, "")
id = row.id

# Format the metadata from Couchbase
metadata = self._format_metadata(row.fields)

score = row.score
doc = Document(page_content=text, metadata=metadata)
doc = Document(id=id, page_content=text, metadata=metadata)
docs_with_score.append((doc, score))

except Exception as e:
Expand Down
2 changes: 1 addition & 1 deletion libs/partners/couchbase/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"

[tool.poetry]
name = "langchain-couchbase"
version = "0.2.0"
version = "0.2.1"
description = "An integration package connecting Couchbase and LangChain"
authors = []
readme = "README.md"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,7 @@ def test_add_texts_with_ids_and_metadatas(self, cluster: Any) -> None:
time.sleep(SLEEP_DURATION)

output = vectorstore.similarity_search("foo", k=1)
assert output[0].id == "a"
assert output[0].page_content == "foo"
assert output[0].metadata["a"] == 1

Expand Down Expand Up @@ -364,3 +365,32 @@ def test_hybrid_search(self, cluster: Any) -> None:

assert result == hybrid_result
assert score <= hybrid_score

def test_id_in_results(self, cluster: Any) -> None:
"""Test that the id is returned in the result documents."""

texts = [
"foo",
"bar",
"baz",
]

metadatas = [{"a": 1}, {"b": 2}, {"c": 3}]

vectorstore = CouchbaseVectorStore(
cluster=cluster,
embedding=ConsistentFakeEmbeddings(),
index_name=INDEX_NAME,
bucket_name=BUCKET_NAME,
scope_name=SCOPE_NAME,
collection_name=COLLECTION_NAME,
)

ids = vectorstore.add_texts(texts, metadatas=metadatas)
assert len(ids) == len(texts)

# Wait for the documents to be indexed
time.sleep(SLEEP_DURATION)

output = vectorstore.similarity_search("foo", k=1)
assert output[0].id == ids[0]
Loading