Skip to content

Commit

Permalink
Merge branch 'main' into mongodb-keyword-search
Browse files Browse the repository at this point in the history
  • Loading branch information
vblagoje authored Nov 25, 2024
2 parents aa4ae2b + ad60688 commit aaa59ae
Show file tree
Hide file tree
Showing 74 changed files with 1,921 additions and 180 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/pgvector.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ jobs:
python-version: ["3.9", "3.10", "3.11"]
services:
pgvector:
image: ankane/pgvector:latest
image: pgvector/pgvector:pg17
env:
POSTGRES_USER: postgres
POSTGRES_PASSWORD: postgres
Expand Down
7 changes: 6 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ Please check out our [Contribution Guidelines](CONTRIBUTING.md) for all the deta
| [google-ai-haystack](integrations/google_ai/) | Generator | [![PyPI - Version](https://img.shields.io/pypi/v/google-ai-haystack.svg)](https://pypi.org/project/google-ai-haystack) | [![Test / google-ai](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/google_ai.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/google_ai.yml) |
| [google-vertex-haystack](integrations/google_vertex/) | Generator | [![PyPI - Version](https://img.shields.io/pypi/v/google-vertex-haystack.svg)](https://pypi.org/project/google-vertex-haystack) | [![Test / google-vertex](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/google_vertex.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/google_vertex.yml) |
| [instructor-embedders-haystack](integrations/instructor_embedders/) | Embedder | [![PyPI - Version](https://img.shields.io/pypi/v/instructor-embedders-haystack.svg)](https://pypi.org/project/instructor-embedders-haystack) | [![Test / instructor-embedders](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/instructor_embedders.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/instructor_embedders.yml) |
| [jina-haystack](integrations/jina/) | Embedder, Ranker | [![PyPI - Version](https://img.shields.io/pypi/v/jina-haystack.svg)](https://pypi.org/project/jina-haystack) | [![Test / jina](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/jina.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/jina.yml) |
| [jina-haystack](integrations/jina/) | Connector, Embedder, Ranker | [![PyPI - Version](https://img.shields.io/pypi/v/jina-haystack.svg)](https://pypi.org/project/jina-haystack) | [![Test / jina](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/jina.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/jina.yml) |
| [langfuse-haystack](integrations/langfuse/) | Tracer | [![PyPI - Version](https://img.shields.io/pypi/v/langfuse-haystack.svg?color=orange)](https://pypi.org/project/langfuse-haystack) | [![Test / langfuse](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/langfuse.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/langfuse.yml) |
| [llama-cpp-haystack](integrations/llama_cpp/) | Generator | [![PyPI - Version](https://img.shields.io/pypi/v/llama-cpp-haystack.svg?color=orange)](https://pypi.org/project/llama-cpp-haystack) | [![Test / llama-cpp](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/llama_cpp.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/llama_cpp.yml) |
| [mistral-haystack](integrations/mistral/) | Embedder, Generator | [![PyPI - Version](https://img.shields.io/pypi/v/mistral-haystack.svg)](https://pypi.org/project/mistral-haystack) | [![Test / mistral](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/mistral.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/mistral.yml) |
Expand Down Expand Up @@ -85,3 +85,8 @@ GitHub. The GitHub Actions workflow will take care of the rest.
git push --tags origin
```
3. Wait for the CI to do its magic
> [!IMPORTANT]
> When releasing a new integration version, always tag a commit that includes the changes for that integration
> (usually the PR merge commit). If you tag a commit that doesn't include changes for the integration being released,
> the generated changelog will be incorrect.
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@
from .document_embedder import AmazonBedrockDocumentEmbedder
from .text_embedder import AmazonBedrockTextEmbedder

__all__ = ["AmazonBedrockTextEmbedder", "AmazonBedrockDocumentEmbedder"]
__all__ = ["AmazonBedrockDocumentEmbedder", "AmazonBedrockTextEmbedder"]
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,7 @@ def run(self, documents: List[Document]):
- `documents`: The `Document`s with the `embedding` field populated.
:raises AmazonBedrockInferenceError: If the inference fails.
"""
if not isinstance(documents, list) or documents and not isinstance(documents[0], Document):
if not isinstance(documents, list) or (documents and not isinstance(documents[0], Document)):
msg = (
"AmazonBedrockDocumentEmbedder expects a list of Documents as input."
"In case you want to embed a string, please use the AmazonBedrockTextEmbedder."
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@
from .chat.chat_generator import AmazonBedrockChatGenerator
from .generator import AmazonBedrockGenerator

__all__ = ["AmazonBedrockGenerator", "AmazonBedrockChatGenerator"]
__all__ = ["AmazonBedrockChatGenerator", "AmazonBedrockGenerator"]
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,8 @@ def prepare_body(self, messages: List[ChatMessage], **inference_kwargs) -> Dict[
stop_sequences = inference_kwargs.get("stop_sequences", []) + inference_kwargs.pop("stop_words", [])
if stop_sequences:
inference_kwargs["stop_sequences"] = stop_sequences
# pop stream kwarg from inference_kwargs as Anthropic does not support it (if provided)
inference_kwargs.pop("stream", None)
params = self._get_params(inference_kwargs, default_params, self.ALLOWED_PARAMS)
body = {**self.prepare_chat_messages(messages=messages), **params}
return body
Expand Down Expand Up @@ -384,6 +386,10 @@ def prepare_body(self, messages: List[ChatMessage], **inference_kwargs) -> Dict[
stop_words = inference_kwargs.pop("stop_words", [])
if stop_words:
inference_kwargs["stop"] = stop_words

# pop stream kwarg from inference_kwargs as Mistral does not support it (if provided)
inference_kwargs.pop("stream", None)

params = self._get_params(inference_kwargs, default_params, self.ALLOWED_PARAMS)
body = {"prompt": self.prepare_chat_messages(messages=messages), **params}
return body
Expand Down
2 changes: 1 addition & 1 deletion integrations/amazon_bedrock/tests/test_chat_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
)

KLASS = "haystack_integrations.components.generators.amazon_bedrock.chat.chat_generator.AmazonBedrockChatGenerator"
MODELS_TO_TEST = ["anthropic.claude-3-sonnet-20240229-v1:0", "anthropic.claude-v2:1", "meta.llama2-13b-chat-v1"]
MODELS_TO_TEST = ["anthropic.claude-3-sonnet-20240229-v1:0", "anthropic.claude-v2:1"]
MODELS_TO_TEST_WITH_TOOLS = ["anthropic.claude-3-haiku-20240307-v1:0"]
MISTRAL_MODELS = [
"mistral.mistral-7b-instruct-v0:2",
Expand Down
40 changes: 38 additions & 2 deletions integrations/anthropic/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,11 +1,29 @@
# Changelog

## [unreleased]

### ⚙️ CI

- Adopt uv as installer (#1142)

### 🧹 Chores

- Update ruff linting scripts and settings (#1105)

### 🌀 Miscellaneous

- Add AnthropicVertexChatGenerator component (#1192)

## [integrations/anthropic-v1.1.0] - 2024-09-20

### 🚀 Features

- Add Anthropic prompt caching support, add example (#1006)

### 🌀 Miscellaneous

- Chore: Update Anthropic example, use ChatPromptBuilder properly (#978)

## [integrations/anthropic-v1.0.0] - 2024-08-12

### 🐛 Bug Fixes
Expand All @@ -20,25 +38,43 @@

- Do not retry tests in `hatch run test` command (#954)


## [integrations/anthropic-v0.4.1] - 2024-07-17

### ⚙️ Miscellaneous Tasks
### 🧹 Chores

- Update ruff invocation to include check parameter (#853)

### 🌀 Miscellaneous

- Ci: install `pytest-rerunfailures` where needed; add retry config to `test-cov` script (#845)
- Add meta deprecration warning (#910)

## [integrations/anthropic-v0.4.0] - 2024-06-21

### 🚀 Features

- Update Anthropic/Cohere for tools use (#790)
- Update Anthropic default models, pydocs (#839)

### ⚙️ Miscellaneous Tasks
### ⚙️ CI

- Retry tests to reduce flakyness (#836)

### 🌀 Miscellaneous

- Remove references to Python 3.7 (#601)
- Chore: add license classifiers (#680)
- Chore: change the pydoc renderer class (#718)
- Docs: add missing api references (#728)

## [integrations/anthropic-v0.2.0] - 2024-03-15

### 🌀 Miscellaneous

- Docs: Replace amazon-bedrock with anthropic in readme (#584)
- Chore: Use the correct sonnet model name (#587)

## [integrations/anthropic-v0.1.0] - 2024-03-15

### 🚀 Features
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,4 @@
from .chat.vertex_chat_generator import AnthropicVertexChatGenerator
from .generator import AnthropicGenerator

__all__ = ["AnthropicGenerator", "AnthropicChatGenerator", "AnthropicVertexChatGenerator"]
__all__ = ["AnthropicChatGenerator", "AnthropicGenerator", "AnthropicVertexChatGenerator"]
82 changes: 70 additions & 12 deletions integrations/astra/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,16 +1,29 @@
# Changelog

## [integrations/astra-v0.9.4] - 2024-11-25

### 🌀 Miscellaneous

- Fix: Astra - fix embedding retrieval top-k limit (#1210)

## [integrations/astra-v0.10.0] - 2024-10-22

### 🚀 Features

- Update astradb integration for latest client library (#1145)

### ⚙️ Miscellaneous Tasks
### ⚙️ CI

- Update ruff linting scripts and settings (#1105)
- Adopt uv as installer (#1142)

### 🧹 Chores

- Update ruff linting scripts and settings (#1105)

### 🌀 Miscellaneous

- Fix: #1047 Remove count_documents from delete_documents (#1049)

## [integrations/astra-v0.9.3] - 2024-09-12

### 🐛 Bug Fixes
Expand All @@ -22,8 +35,13 @@

- Do not retry tests in `hatch run test` command (#954)


## [integrations/astra-v0.9.2] - 2024-07-22

### 🌀 Miscellaneous

- Normalize logical filter conditions (#874)

## [integrations/astra-v0.9.1] - 2024-07-15

### 🚀 Features
Expand All @@ -37,27 +55,48 @@
- Fix typing checks
- `Astra` - Fallback to default filter policy when deserializing retrievers without the init parameter (#896)

### ⚙️ Miscellaneous Tasks
### ⚙️ CI

- Retry tests to reduce flakyness (#836)

### 🌀 Miscellaneous

- Ci: install `pytest-rerunfailures` where needed; add retry config to `test-cov` script (#845)
- Fix: Incorrect astra not equal operator (#868)
- Chore: Minor retriever pydoc fix (#884)

## [integrations/astra-v0.7.0] - 2024-05-15

### 🐛 Bug Fixes

- Make unit tests pass (#720)

### 🌀 Miscellaneous

- Chore: change the pydoc renderer class (#718)
- [Astra DB] Explicit projection when reading from Astra DB (#733)

## [integrations/astra-v0.6.0] - 2024-04-24

### 🐛 Bug Fixes

- Pass namespace in the docstore init (#683)

### 🌀 Miscellaneous

- Chore: add license classifiers (#680)
- Bug fix for document_store.py (#618)

## [integrations/astra-v0.5.1] - 2024-04-09

### 🐛 Bug Fixes

- Fix haystack-ai pin (#649)
- Fix `haystack-ai` pins (#649)

### 🌀 Miscellaneous

- Remove references to Python 3.7 (#601)
- Make Document Stores initially skip `SparseEmbedding` (#606)

## [integrations/astra-v0.5.0] - 2024-03-18

Expand All @@ -67,9 +106,15 @@
- Small consistency improvements (#536)
- Disable-class-def (#556)

### 🌀 Miscellaneous

- Fix example code for Astra DB pipeline (#481)
- Make tests show coverage (#566)
- Astra DB: Add integration usage tracking (#568)

## [integrations/astra-v0.4.2] - 2024-02-21

### FIX
### 🌀 Miscellaneous

- Proper name for the sort param (#454)

Expand All @@ -78,30 +123,43 @@
### 🐛 Bug Fixes

- Fix order of API docs (#447)

This PR will also push the docs to Readme
- Fix integration tests (#450)
- Astra: fix integration tests (#450)

## [integrations/astra-v0.4.0] - 2024-02-20

### 📚 Documentation

- Update category slug (#442)

### 🌀 Miscellaneous

- Update the Astra DB Integration to fit latest conventions (#428)

## [integrations/astra-v0.3.0] - 2024-02-15

## [integrations/astra-v0.2.0] - 2024-02-13
### 🌀 Miscellaneous

### Astra
- Model_name_or_path > model (#418)
- [Astra] Change authentication parameters (#423)

- Generate api docs (#327)
## [integrations/astra-v0.2.0] - 2024-02-13

### Refact
### 🌀 Miscellaneous

- [**breaking**] Change import paths (#277)
- Generate api docs (#327)
- Astra: rename retriever (#399)

## [integrations/astra-v0.1.1] - 2024-01-18

### 🌀 Miscellaneous

- Update the import paths for beta5 (#235)

## [integrations/astra-v0.1.0] - 2024-01-11

### 🌀 Miscellaneous

- Adding AstraDB as a DocumentStore (#144)

<!-- generated by git-cliff -->
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,7 @@ def _format_query_response(responses, include_metadata, include_values):
return QueryResponse(final_res)

def _query(self, vector, top_k, filters=None):
query = {"sort": {"$vector": vector}, "options": {"limit": top_k, "includeSimilarity": True}}
query = {"sort": {"$vector": vector}, "limit": top_k, "includeSimilarity": True}

if filters is not None:
query["filter"] = filters
Expand All @@ -222,6 +222,7 @@ def find_documents(self, find_query):
filter=find_query.get("filter"),
sort=find_query.get("sort"),
limit=find_query.get("limit"),
include_similarity=find_query.get("includeSimilarity"),
projection={"*": 1},
)

Expand Down
48 changes: 48 additions & 0 deletions integrations/astra/tests/test_embedding_retrieval.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
import os

import pytest
from haystack import Document
from haystack.document_stores.types import DuplicatePolicy

from haystack_integrations.document_stores.astra import AstraDocumentStore


@pytest.mark.integration
@pytest.mark.skipif(
os.environ.get("ASTRA_DB_APPLICATION_TOKEN", "") == "", reason="ASTRA_DB_APPLICATION_TOKEN env var not set"
)
@pytest.mark.skipif(os.environ.get("ASTRA_DB_API_ENDPOINT", "") == "", reason="ASTRA_DB_API_ENDPOINT env var not set")
class TestEmbeddingRetrieval:

@pytest.fixture
def document_store(self) -> AstraDocumentStore:
return AstraDocumentStore(
collection_name="haystack_integration",
duplicates_policy=DuplicatePolicy.OVERWRITE,
embedding_dimension=768,
)

@pytest.fixture(autouse=True)
def run_before_and_after_tests(self, document_store: AstraDocumentStore):
"""
Cleaning up document store
"""
document_store.delete_documents(delete_all=True)
assert document_store.count_documents() == 0

def test_search_with_top_k(self, document_store):
query_embedding = [0.1] * 768
common_embedding = [0.8] * 768

documents = [Document(content=f"This is document number {i}", embedding=common_embedding) for i in range(0, 3)]

document_store.write_documents(documents)

top_k = 2

result = document_store.search(query_embedding, top_k)

assert top_k == len(result)

for document in result:
assert document.score is not None
Loading

0 comments on commit aaa59ae

Please sign in to comment.