From f03073f995fa39a88f8ca4b14438ee6a6aa3c892 Mon Sep 17 00:00:00 2001
From: Rashmi Pawar <168514198+raspawar@users.noreply.github.com>
Date: Mon, 12 Aug 2024 14:30:16 +0530
Subject: [PATCH 1/6]  Add default model for NVIDIA HayStack local NIM
 endpoints (#915)

* initial embedder code

* default model code

* docs: update model docstring

* tests: add userwarning

* docs: literal lint fix

* review changes

* remove pydantic dependency

* move backend, nim_backend under utils

* move is_hosted to warm_up

* test cases, docstring fix

* error message updation

Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com>

* move is_hosted code to util

* remove backend code

* update import for is_hosted

* remove util and move code to utils

* fix api key issue for failing test cases

* Update integrations/nvidia/tests/conftest.py

---------

Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com>
---
 integrations/nvidia/pyproject.toml            |  4 +-
 .../embedders/nvidia/_nim_backend.py          | 52 -----------
 .../components/embedders/nvidia/backend.py    | 29 -------
 .../embedders/nvidia/document_embedder.py     | 36 ++++++--
 .../embedders/nvidia/text_embedder.py         | 36 ++++++--
 .../components/generators/nvidia/backend.py   | 29 -------
 .../components/generators/nvidia/generator.py | 38 ++++++--
 .../utils/nvidia/__init__.py                  |  5 +-
 .../nvidia/nim_backend.py}                    | 57 +++++++++++-
 .../utils/nvidia/utils.py                     |  8 ++
 integrations/nvidia/tests/__init__.py         |  3 +
 integrations/nvidia/tests/conftest.py         | 44 ++++++++++
 .../nvidia/tests/test_document_embedder.py    | 86 ++++++++++++++-----
 integrations/nvidia/tests/test_generator.py   | 55 ++++++++++++
 .../nvidia/tests/test_text_embedder.py        | 48 +++++++----
 15 files changed, 358 insertions(+), 172 deletions(-)
 delete mode 100644 integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/_nim_backend.py
 delete mode 100644 integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/backend.py
 delete mode 100644 integrations/nvidia/src/haystack_integrations/components/generators/nvidia/backend.py
 rename integrations/nvidia/src/haystack_integrations/{components/generators/nvidia/_nim_backend.py => utils/nvidia/nim_backend.py} (61%)
 create mode 100644 integrations/nvidia/tests/conftest.py

diff --git a/integrations/nvidia/pyproject.toml b/integrations/nvidia/pyproject.toml
index 504077b4e..f35485e9c 100644
--- a/integrations/nvidia/pyproject.toml
+++ b/integrations/nvidia/pyproject.toml
@@ -42,7 +42,7 @@ root = "../.."
 git_describe_command = 'git describe --tags --match="integrations/nvidia-v[0-9]*"'
 
 [tool.hatch.envs.default]
-dependencies = ["coverage[toml]>=6.5", "pytest", "pytest-rerunfailures", "haystack-pydoc-tools"]
+dependencies = ["coverage[toml]>=6.5", "pytest", "pytest-rerunfailures", "haystack-pydoc-tools", "requests_mock"]
 [tool.hatch.envs.default.scripts]
 test = "pytest --reruns 3 --reruns-delay 30 -x {args:tests}"
 test-cov = "coverage run -m pytest --reruns 3 --reruns-delay 30 -x {args:tests}"
@@ -147,6 +147,8 @@ module = [
   "haystack_integrations.*",
   "pytest.*",
   "numpy.*",
+  "requests_mock.*",
+  "pydantic.*"
 ]
 ignore_missing_imports = true
 
diff --git a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/_nim_backend.py b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/_nim_backend.py
deleted file mode 100644
index ee25df7fd..000000000
--- a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/_nim_backend.py
+++ /dev/null
@@ -1,52 +0,0 @@
-from typing import Any, Dict, List, Optional, Tuple
-
-import requests
-from haystack.utils import Secret
-
-from .backend import EmbedderBackend
-
-REQUEST_TIMEOUT = 60
-
-
-class NimBackend(EmbedderBackend):
-    def __init__(
-        self,
-        model: str,
-        api_url: str,
-        api_key: Optional[Secret] = Secret.from_env_var("NVIDIA_API_KEY"),
-        model_kwargs: Optional[Dict[str, Any]] = None,
-    ):
-        headers = {
-            "Content-Type": "application/json",
-            "accept": "application/json",
-        }
-
-        if api_key:
-            headers["authorization"] = f"Bearer {api_key.resolve_value()}"
-
-        self.session = requests.Session()
-        self.session.headers.update(headers)
-
-        self.model = model
-        self.api_url = api_url
-        self.model_kwargs = model_kwargs or {}
-
-    def embed(self, texts: List[str]) -> Tuple[List[List[float]], Dict[str, Any]]:
-        url = f"{self.api_url}/embeddings"
-
-        res = self.session.post(
-            url,
-            json={
-                "model": self.model,
-                "input": texts,
-                **self.model_kwargs,
-            },
-            timeout=REQUEST_TIMEOUT,
-        )
-        res.raise_for_status()
-
-        data = res.json()
-        # Sort the embeddings by index, we don't know whether they're out of order or not
-        embeddings = [e["embedding"] for e in sorted(data["data"], key=lambda e: e["index"])]
-
-        return embeddings, {"usage": data["usage"]}
diff --git a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/backend.py b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/backend.py
deleted file mode 100644
index 09e9b7c80..000000000
--- a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/backend.py
+++ /dev/null
@@ -1,29 +0,0 @@
-from abc import ABC, abstractmethod
-from typing import Any, Dict, List, Optional, Tuple
-
-
-class EmbedderBackend(ABC):
-    def __init__(self, model: str, model_kwargs: Optional[Dict[str, Any]] = None):
-        """
-        Initialize the backend.
-
-        :param model:
-            The name of the model to use.
-        :param model_kwargs:
-            Additional keyword arguments to pass to the model.
-        """
-        self.model_name = model
-        self.model_kwargs = model_kwargs or {}
-
-    @abstractmethod
-    def embed(self, texts: List[str]) -> Tuple[List[List[float]], Dict[str, Any]]:
-        """
-        Invoke the backend and embed the given texts.
-
-        :param texts:
-            Texts to embed.
-        :return:
-            Vector representation of the texts and
-            metadata returned by the service.
-        """
-        pass
diff --git a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/document_embedder.py b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/document_embedder.py
index 4cc805c01..f5d1747b8 100644
--- a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/document_embedder.py
+++ b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/document_embedder.py
@@ -1,12 +1,11 @@
+import warnings
 from typing import Any, Dict, List, Optional, Tuple, Union
 
 from haystack import Document, component, default_from_dict, default_to_dict
 from haystack.utils import Secret, deserialize_secrets_inplace
-from haystack_integrations.utils.nvidia import url_validation
+from haystack_integrations.utils.nvidia import NimBackend, is_hosted, url_validation
 from tqdm import tqdm
 
-from ._nim_backend import NimBackend
-from .backend import EmbedderBackend
 from .truncate import EmbeddingTruncateMode
 
 _DEFAULT_API_URL = "https://ai.api.nvidia.com/v1/retrieval/nvidia"
@@ -34,7 +33,7 @@ class NvidiaDocumentEmbedder:
 
     def __init__(
         self,
-        model: str = "NV-Embed-QA",
+        model: Optional[str] = None,
         api_key: Optional[Secret] = Secret.from_env_var("NVIDIA_API_KEY"),
         api_url: str = _DEFAULT_API_URL,
         prefix: str = "",
@@ -50,6 +49,8 @@ def __init__(
 
         :param model:
             Embedding model to use.
+            If no specific model along with locally hosted API URL is provided,
+            the system defaults to the available model found using /models API.
         :param api_key:
             API key for the NVIDIA NIM.
         :param api_url:
@@ -87,9 +88,31 @@ def __init__(
             truncate = EmbeddingTruncateMode.from_str(truncate)
         self.truncate = truncate
 
-        self.backend: Optional[EmbedderBackend] = None
+        self.backend: Optional[Any] = None
         self._initialized = False
 
+        if is_hosted(api_url) and not self.model:  # manually set default model
+            self.model = "NV-Embed-QA"
+
+    def default_model(self):
+        """Set default model in local NIM mode."""
+        valid_models = [
+            model.id for model in self.backend.models() if not model.base_model or model.base_model == model.id
+        ]
+        name = next(iter(valid_models), None)
+        if name:
+            warnings.warn(
+                f"Default model is set as: {name}. \n"
+                "Set model using model parameter. \n"
+                "To get available models use available_models property.",
+                UserWarning,
+                stacklevel=2,
+            )
+            self.model = self.backend.model = name
+        else:
+            error_message = "No locally hosted model was found."
+            raise ValueError(error_message)
+
     def warm_up(self):
         """
         Initializes the component.
@@ -109,6 +132,9 @@ def warm_up(self):
 
         self._initialized = True
 
+        if not self.model:
+            self.default_model()
+
     def to_dict(self) -> Dict[str, Any]:
         """
         Serializes the component to a dictionary.
diff --git a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/text_embedder.py b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/text_embedder.py
index e1a8c36dd..1c4a7c5c9 100644
--- a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/text_embedder.py
+++ b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/text_embedder.py
@@ -1,11 +1,10 @@
+import warnings
 from typing import Any, Dict, List, Optional, Union
 
 from haystack import component, default_from_dict, default_to_dict
 from haystack.utils import Secret, deserialize_secrets_inplace
-from haystack_integrations.utils.nvidia import url_validation
+from haystack_integrations.utils.nvidia import NimBackend, is_hosted, url_validation
 
-from ._nim_backend import NimBackend
-from .backend import EmbedderBackend
 from .truncate import EmbeddingTruncateMode
 
 _DEFAULT_API_URL = "https://ai.api.nvidia.com/v1/retrieval/nvidia"
@@ -35,7 +34,7 @@ class NvidiaTextEmbedder:
 
     def __init__(
         self,
-        model: str = "NV-Embed-QA",
+        model: Optional[str] = None,
         api_key: Optional[Secret] = Secret.from_env_var("NVIDIA_API_KEY"),
         api_url: str = _DEFAULT_API_URL,
         prefix: str = "",
@@ -47,6 +46,8 @@ def __init__(
 
         :param model:
             Embedding model to use.
+            If no specific model along with locally hosted API URL is provided,
+            the system defaults to the available model found using /models API.
         :param api_key:
             API key for the NVIDIA NIM.
         :param api_url:
@@ -71,9 +72,31 @@ def __init__(
             truncate = EmbeddingTruncateMode.from_str(truncate)
         self.truncate = truncate
 
-        self.backend: Optional[EmbedderBackend] = None
+        self.backend: Optional[Any] = None
         self._initialized = False
 
+        if is_hosted(api_url) and not self.model:  # manually set default model
+            self.model = "NV-Embed-QA"
+
+    def default_model(self):
+        """Set default model in local NIM mode."""
+        valid_models = [
+            model.id for model in self.backend.models() if not model.base_model or model.base_model == model.id
+        ]
+        name = next(iter(valid_models), None)
+        if name:
+            warnings.warn(
+                f"Default model is set as: {name}. \n"
+                "Set model using model parameter. \n"
+                "To get available models use available_models property.",
+                UserWarning,
+                stacklevel=2,
+            )
+            self.model = self.backend.model = name
+        else:
+            error_message = "No locally hosted model was found."
+            raise ValueError(error_message)
+
     def warm_up(self):
         """
         Initializes the component.
@@ -93,6 +116,9 @@ def warm_up(self):
 
         self._initialized = True
 
+        if not self.model:
+            self.default_model()
+
     def to_dict(self) -> Dict[str, Any]:
         """
         Serializes the component to a dictionary.
diff --git a/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/backend.py b/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/backend.py
deleted file mode 100644
index d14199daf..000000000
--- a/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/backend.py
+++ /dev/null
@@ -1,29 +0,0 @@
-from abc import ABC, abstractmethod
-from typing import Any, Dict, List, Optional, Tuple
-
-
-class GeneratorBackend(ABC):
-    def __init__(self, model: str, model_kwargs: Optional[Dict[str, Any]] = None):
-        """
-        Initialize the backend.
-
-        :param model:
-            The name of the model to use.
-        :param model_kwargs:
-            Additional keyword arguments to pass to the model.
-        """
-        self.model_name = model
-        self.model_kwargs = model_kwargs or {}
-
-    @abstractmethod
-    def generate(self, prompt: str) -> Tuple[List[str], List[Dict[str, Any]]]:
-        """
-        Invoke the backend and prompt the model.
-
-        :param prompt:
-            Prompt text.
-        :return:
-            Vector representation of the generated texts related
-            metadata returned by the service.
-        """
-        pass
diff --git a/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/generator.py b/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/generator.py
index f11ef8aaf..a286400ab 100644
--- a/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/generator.py
+++ b/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/generator.py
@@ -1,14 +1,12 @@
 # SPDX-FileCopyrightText: 2024-present deepset GmbH <info@deepset.ai>
 #
 # SPDX-License-Identifier: Apache-2.0
+import warnings
 from typing import Any, Dict, List, Optional
 
 from haystack import component, default_from_dict, default_to_dict
 from haystack.utils.auth import Secret, deserialize_secrets_inplace
-from haystack_integrations.utils.nvidia import url_validation
-
-from ._nim_backend import NimBackend
-from .backend import GeneratorBackend
+from haystack_integrations.utils.nvidia import NimBackend, is_hosted, url_validation
 
 _DEFAULT_API_URL = "https://integrate.api.nvidia.com/v1"
 
@@ -45,7 +43,7 @@ class NvidiaGenerator:
 
     def __init__(
         self,
-        model: str,
+        model: Optional[str] = None,
         api_url: str = _DEFAULT_API_URL,
         api_key: Optional[Secret] = Secret.from_env_var("NVIDIA_API_KEY"),
         model_arguments: Optional[Dict[str, Any]] = None,
@@ -55,6 +53,10 @@ def __init__(
 
         :param model:
             Name of the model to use for text generation.
+            See the [NVIDIA NIMs](https://ai.nvidia.com)
+            for more information on the supported models.
+            `Note`: If no specific model along with locally hosted API URL is provided,
+            the system defaults to the available model found using /models API.
             Check supported models at [NVIDIA NIM](https://ai.nvidia.com).
         :param api_key:
             API key for the NVIDIA NIM. Set it as the `NVIDIA_API_KEY` environment
@@ -72,7 +74,28 @@ def __init__(
         self._api_key = api_key
         self._model_arguments = model_arguments or {}
 
-        self._backend: Optional[GeneratorBackend] = None
+        self._backend: Optional[Any] = None
+
+        self.is_hosted = is_hosted(api_url)
+
+    def default_model(self):
+        """Set default model in local NIM mode."""
+        valid_models = [
+            model.id for model in self._backend.models() if not model.base_model or model.base_model == model.id
+        ]
+        name = next(iter(valid_models), None)
+        if name:
+            warnings.warn(
+                f"Default model is set as: {name}. \n"
+                "Set model using model parameter. \n"
+                "To get available models use available_models property.",
+                UserWarning,
+                stacklevel=2,
+            )
+            self._model = self._backend.model_name = name
+        else:
+            error_message = "No locally hosted model was found."
+            raise ValueError(error_message)
 
     def warm_up(self):
         """
@@ -91,6 +114,9 @@ def warm_up(self):
             model_kwargs=self._model_arguments,
         )
 
+        if not self.is_hosted and not self._model:
+            self.default_model()
+
     def to_dict(self) -> Dict[str, Any]:
         """
         Serializes the component to a dictionary.
diff --git a/integrations/nvidia/src/haystack_integrations/utils/nvidia/__init__.py b/integrations/nvidia/src/haystack_integrations/utils/nvidia/__init__.py
index 9863e4a38..da301d29d 100644
--- a/integrations/nvidia/src/haystack_integrations/utils/nvidia/__init__.py
+++ b/integrations/nvidia/src/haystack_integrations/utils/nvidia/__init__.py
@@ -1,3 +1,4 @@
-from .utils import url_validation
+from .nim_backend import Model, NimBackend
+from .utils import is_hosted, url_validation
 
-__all__ = ["url_validation"]
+__all__ = ["NimBackend", "Model", "is_hosted", "url_validation"]
diff --git a/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/_nim_backend.py b/integrations/nvidia/src/haystack_integrations/utils/nvidia/nim_backend.py
similarity index 61%
rename from integrations/nvidia/src/haystack_integrations/components/generators/nvidia/_nim_backend.py
rename to integrations/nvidia/src/haystack_integrations/utils/nvidia/nim_backend.py
index 5253b3254..f69862f0e 100644
--- a/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/_nim_backend.py
+++ b/integrations/nvidia/src/haystack_integrations/utils/nvidia/nim_backend.py
@@ -1,14 +1,29 @@
+from dataclasses import dataclass, field
 from typing import Any, Dict, List, Optional, Tuple
 
 import requests
 from haystack.utils import Secret
 
-from .backend import GeneratorBackend
-
 REQUEST_TIMEOUT = 60
 
 
-class NimBackend(GeneratorBackend):
+@dataclass
+class Model:
+    """
+    Model information.
+
+    id: unique identifier for the model, passed as model parameter for requests
+    aliases: list of aliases for the model
+    base_model: root model for the model
+    All aliases are deprecated and will trigger a warning when used.
+    """
+
+    id: str
+    aliases: Optional[List[str]] = field(default_factory=list)
+    base_model: Optional[str] = None
+
+
+class NimBackend:
     def __init__(
         self,
         model: str,
@@ -31,6 +46,26 @@ def __init__(
         self.api_url = api_url
         self.model_kwargs = model_kwargs or {}
 
+    def embed(self, texts: List[str]) -> Tuple[List[List[float]], Dict[str, Any]]:
+        url = f"{self.api_url}/embeddings"
+
+        res = self.session.post(
+            url,
+            json={
+                "model": self.model,
+                "input": texts,
+                **self.model_kwargs,
+            },
+            timeout=REQUEST_TIMEOUT,
+        )
+        res.raise_for_status()
+
+        data = res.json()
+        # Sort the embeddings by index, we don't know whether they're out of order or not
+        embeddings = [e["embedding"] for e in sorted(data["data"], key=lambda e: e["index"])]
+
+        return embeddings, {"usage": data["usage"]}
+
     def generate(self, prompt: str) -> Tuple[List[str], List[Dict[str, Any]]]:
         # We're using the chat completion endpoint as the NIM API doesn't support
         # the /completions endpoint. So both the non-chat and chat generator will use this.
@@ -78,3 +113,19 @@ def generate(self, prompt: str) -> Tuple[List[str], List[Dict[str, Any]]]:
             meta.append(choice_meta)
 
         return replies, meta
+
+    def models(self) -> List[Model]:
+        url = f"{self.api_url}/models"
+
+        res = self.session.get(
+            url,
+            timeout=REQUEST_TIMEOUT,
+        )
+        res.raise_for_status()
+
+        data = res.json()["data"]
+        models = [Model(element["id"]) for element in data if "id" in element]
+        if not models:
+            msg = f"No hosted model were found at URL '{url}'."
+            raise ValueError(msg)
+        return models
diff --git a/integrations/nvidia/src/haystack_integrations/utils/nvidia/utils.py b/integrations/nvidia/src/haystack_integrations/utils/nvidia/utils.py
index 4f8e14b09..7d4dfc3b4 100644
--- a/integrations/nvidia/src/haystack_integrations/utils/nvidia/utils.py
+++ b/integrations/nvidia/src/haystack_integrations/utils/nvidia/utils.py
@@ -37,3 +37,11 @@ def url_validation(api_url: str, default_api_url: str, allowed_paths: List[str])
 
     base_url = urlunparse((result.scheme, result.netloc, "v1", "", "", ""))
     return base_url
+
+
+def is_hosted(api_url: str):
+    """"""
+    return urlparse(api_url).netloc in [
+        "integrate.api.nvidia.com",
+        "ai.api.nvidia.com",
+    ]
diff --git a/integrations/nvidia/tests/__init__.py b/integrations/nvidia/tests/__init__.py
index e873bc332..47611e0b9 100644
--- a/integrations/nvidia/tests/__init__.py
+++ b/integrations/nvidia/tests/__init__.py
@@ -1,3 +1,6 @@
 # SPDX-FileCopyrightText: 2023-present deepset GmbH <info@deepset.ai>
 #
 # SPDX-License-Identifier: Apache-2.0
+from .conftest import MockBackend
+
+__all__ = ["MockBackend"]
diff --git a/integrations/nvidia/tests/conftest.py b/integrations/nvidia/tests/conftest.py
new file mode 100644
index 000000000..794c994ff
--- /dev/null
+++ b/integrations/nvidia/tests/conftest.py
@@ -0,0 +1,44 @@
+from typing import Any, Dict, List, Optional, Tuple
+
+import pytest
+from haystack.utils import Secret
+from haystack_integrations.utils.nvidia import Model, NimBackend
+from requests_mock import Mocker
+
+
+class MockBackend(NimBackend):
+    def __init__(self, model: str, api_key: Optional[Secret] = None, model_kwargs: Optional[Dict[str, Any]] = None):
+        api_key = api_key or Secret.from_env_var("NVIDIA_API_KEY")
+        super().__init__(model, api_url="", api_key=api_key, model_kwargs=model_kwargs or {})
+
+    def embed(self, texts):
+        inputs = texts
+        data = [[0.1, 0.2, 0.3] for i in range(len(inputs))]
+        return data, {"usage": {"total_tokens": 4, "prompt_tokens": 4}}
+
+    def models(self):
+        return [Model(id="aa")]
+
+    def generate(self) -> Tuple[List[str], List[Dict[str, Any]]]:
+        return (
+            ["This is a mocked response."],
+            [{"role": "assistant", "usage": {"prompt_tokens": 5, "total_tokens": 10, "completion_tokens": 5}}],
+        )
+
+
+@pytest.fixture
+def mock_local_models(requests_mock: Mocker) -> None:
+    requests_mock.get(
+        "http://localhost:8080/v1/models",
+        json={
+            "data": [
+                {
+                    "id": "model1",
+                    "object": "model",
+                    "created": 1234567890,
+                    "owned_by": "OWNER",
+                    "root": "model1",
+                },
+            ]
+        },
+    )
diff --git a/integrations/nvidia/tests/test_document_embedder.py b/integrations/nvidia/tests/test_document_embedder.py
index 856ae4652..6562a0ea9 100644
--- a/integrations/nvidia/tests/test_document_embedder.py
+++ b/integrations/nvidia/tests/test_document_embedder.py
@@ -4,17 +4,8 @@
 from haystack import Document
 from haystack.utils import Secret
 from haystack_integrations.components.embedders.nvidia import EmbeddingTruncateMode, NvidiaDocumentEmbedder
-from haystack_integrations.components.embedders.nvidia.backend import EmbedderBackend
 
-
-class MockBackend(EmbedderBackend):
-    def __init__(self, model, model_kwargs):
-        super().__init__(model, model_kwargs)
-
-    def embed(self, texts):
-        inputs = texts
-        data = [[0.1, 0.2, 0.3] for i in range(len(inputs))]
-        return data, {"usage": {"total_tokens": 4, "prompt_tokens": 4}}
+from . import MockBackend
 
 
 class TestNvidiaDocumentEmbedder:
@@ -185,14 +176,18 @@ def test_prepare_texts_to_embed_w_suffix(self):
 
     def test_embed_batch(self):
         texts = ["text 1", "text 2", "text 3", "text 4", "text 5"]
-
+        model = "playground_nvolveqa_40k"
+        api_key = Secret.from_token("fake-api-key")
         embedder = NvidiaDocumentEmbedder(
-            "playground_nvolveqa_40k",
-            api_key=Secret.from_token("fake-api-key"),
+            model,
+            api_key=api_key,
         )
 
         embedder.warm_up()
-        embedder.backend = MockBackend("aa", None)
+        embedder.backend = MockBackend(
+            model=model,
+            api_key=api_key,
+        )
 
         embeddings, metadata = embedder._embed_batch(texts_to_embed=texts, batch_size=2)
 
@@ -205,15 +200,55 @@ def test_embed_batch(self):
 
         assert metadata == {"usage": {"prompt_tokens": 3 * 4, "total_tokens": 3 * 4}}
 
-    def test_run(self):
+    @pytest.mark.usefixtures("mock_local_models")
+    def test_run_default_model(self):
         docs = [
             Document(content="I love cheese", meta={"topic": "Cuisine"}),
             Document(content="A transformer is a deep learning architecture", meta={"topic": "ML"}),
         ]
+        api_key = Secret.from_token("fake-api-key")
+
+        embedder = NvidiaDocumentEmbedder(
+            api_key=api_key,
+            model=None,
+            api_url="http://localhost:8080/v1",
+            prefix="prefix ",
+            suffix=" suffix",
+            meta_fields_to_embed=["topic"],
+            embedding_separator=" | ",
+        )
+
+        with pytest.warns(UserWarning) as record:
+            embedder.warm_up()
+        assert len(record) == 1
+        assert "Default model is set as:" in str(record[0].message)
+        assert embedder.model == "model1"
+
+        embedder.backend = MockBackend(model=embedder.model, api_key=api_key)
+
+        result = embedder.run(documents=docs)
 
+        documents_with_embeddings = result["documents"]
+        metadata = result["meta"]
+
+        assert isinstance(documents_with_embeddings, list)
+        assert len(documents_with_embeddings) == len(docs)
+        for doc in documents_with_embeddings:
+            assert isinstance(doc, Document)
+            assert isinstance(doc.embedding, list)
+            assert len(doc.embedding) == 3
+            assert all(isinstance(x, float) for x in doc.embedding)
+        assert metadata == {"usage": {"prompt_tokens": 4, "total_tokens": 4}}
+
+    def test_run(self):
+        docs = [
+            Document(content="I love cheese", meta={"topic": "Cuisine"}),
+            Document(content="A transformer is a deep learning architecture", meta={"topic": "ML"}),
+        ]
+        api_key = Secret.from_token("fake-api-key")
         model = "playground_nvolveqa_40k"
         embedder = NvidiaDocumentEmbedder(
-            api_key=Secret.from_token("fake-api-key"),
+            api_key=api_key,
             model=model,
             prefix="prefix ",
             suffix=" suffix",
@@ -222,7 +257,7 @@ def test_run(self):
         )
 
         embedder.warm_up()
-        embedder.backend = MockBackend("aa", None)
+        embedder.backend = MockBackend(model=model, api_key=api_key)
 
         result = embedder.run(documents=docs)
 
@@ -243,9 +278,10 @@ def test_run_custom_batch_size(self):
             Document(content="I love cheese", meta={"topic": "Cuisine"}),
             Document(content="A transformer is a deep learning architecture", meta={"topic": "ML"}),
         ]
+        api_key = Secret.from_token("fake-api-key")
         model = "playground_nvolveqa_40k"
         embedder = NvidiaDocumentEmbedder(
-            api_key=Secret.from_token("fake-api-key"),
+            api_key=api_key,
             model=model,
             prefix="prefix ",
             suffix=" suffix",
@@ -255,7 +291,7 @@ def test_run_custom_batch_size(self):
         )
 
         embedder.warm_up()
-        embedder.backend = MockBackend("aa", None)
+        embedder.backend = MockBackend(model=model, api_key=api_key)
 
         result = embedder.run(documents=docs)
 
@@ -273,10 +309,12 @@ def test_run_custom_batch_size(self):
         assert metadata == {"usage": {"prompt_tokens": 2 * 4, "total_tokens": 2 * 4}}
 
     def test_run_wrong_input_format(self):
-        embedder = NvidiaDocumentEmbedder("playground_nvolveqa_40k", api_key=Secret.from_token("fake-api-key"))
+        model = "playground_nvolveqa_40k"
+        api_key = Secret.from_token("fake-api-key")
+        embedder = NvidiaDocumentEmbedder(model, api_key=api_key)
 
         embedder.warm_up()
-        embedder.backend = MockBackend("aa", None)
+        embedder.backend = MockBackend(model=model, api_key=api_key)
 
         string_input = "text"
         list_integers_input = [1, 2, 3]
@@ -288,10 +326,12 @@ def test_run_wrong_input_format(self):
             embedder.run(documents=list_integers_input)
 
     def test_run_on_empty_list(self):
-        embedder = NvidiaDocumentEmbedder("playground_nvolveqa_40k", api_key=Secret.from_token("fake-api-key"))
+        model = "playground_nvolveqa_40k"
+        api_key = Secret.from_token("fake-api-key")
+        embedder = NvidiaDocumentEmbedder(model, api_key=api_key)
 
         embedder.warm_up()
-        embedder.backend = MockBackend("aa", None)
+        embedder.backend = MockBackend(model=model, api_key=api_key)
 
         empty_list_input = []
         result = embedder.run(documents=empty_list_input)
diff --git a/integrations/nvidia/tests/test_generator.py b/integrations/nvidia/tests/test_generator.py
index 3ddeebe88..9fff9c2e8 100644
--- a/integrations/nvidia/tests/test_generator.py
+++ b/integrations/nvidia/tests/test_generator.py
@@ -6,6 +6,35 @@
 import pytest
 from haystack.utils import Secret
 from haystack_integrations.components.generators.nvidia import NvidiaGenerator
+from requests_mock import Mocker
+
+
+@pytest.fixture
+def mock_local_chat_completion(requests_mock: Mocker) -> None:
+    requests_mock.post(
+        "http://localhost:8080/v1/chat/completions",
+        json={
+            "choices": [
+                {
+                    "message": {"content": "Hello!", "role": "system"},
+                    "usage": {"prompt_tokens": 3, "total_tokens": 5, "completion_tokens": 9},
+                    "finish_reason": "stop",
+                    "index": 0,
+                },
+                {
+                    "message": {"content": "How are you?", "role": "system"},
+                    "usage": {"prompt_tokens": 3, "total_tokens": 5, "completion_tokens": 9},
+                    "finish_reason": "stop",
+                    "index": 1,
+                },
+            ],
+            "usage": {
+                "prompt_tokens": 3,
+                "total_tokens": 5,
+                "completion_tokens": 9,
+            },
+        },
+    )
 
 
 class TestNvidiaGenerator:
@@ -116,6 +145,32 @@ def test_run_integration_with_nim_backend(self):
         assert result["replies"]
         assert result["meta"]
 
+    @pytest.mark.integration
+    @pytest.mark.usefixtures("mock_local_models")
+    @pytest.mark.usefixtures("mock_local_chat_completion")
+    def test_run_integration_with_default_model_nim_backend(self):
+        model = None
+        url = "http://localhost:8080/v1"
+        generator = NvidiaGenerator(
+            model=model,
+            api_url=url,
+            api_key=None,
+            model_arguments={
+                "temperature": 0.2,
+            },
+        )
+        with pytest.warns(UserWarning) as record:
+            generator.warm_up()
+        assert len(record) == 1
+        assert "Default model is set as:" in str(record[0].message)
+        assert generator._model == "model1"
+        assert not generator.is_hosted
+
+        result = generator.run(prompt="What is the answer?")
+
+        assert result["replies"]
+        assert result["meta"]
+
     @pytest.mark.skipif(
         not os.environ.get("NVIDIA_API_KEY", None),
         reason="Export an env var called NVIDIA_API_KEY containing the NVIDIA API key to run this test.",
diff --git a/integrations/nvidia/tests/test_text_embedder.py b/integrations/nvidia/tests/test_text_embedder.py
index 42d60dee2..7c0a7000d 100644
--- a/integrations/nvidia/tests/test_text_embedder.py
+++ b/integrations/nvidia/tests/test_text_embedder.py
@@ -3,17 +3,8 @@
 import pytest
 from haystack.utils import Secret
 from haystack_integrations.components.embedders.nvidia import EmbeddingTruncateMode, NvidiaTextEmbedder
-from haystack_integrations.components.embedders.nvidia.backend import EmbedderBackend
 
-
-class MockBackend(EmbedderBackend):
-    def __init__(self, model, model_kwargs):
-        super().__init__(model, model_kwargs)
-
-    def embed(self, texts):
-        inputs = texts
-        data = [[0.1, 0.2, 0.3] for i in range(len(inputs))]
-        return data, {"usage": {"total_tokens": 4, "prompt_tokens": 4}}
+from . import MockBackend
 
 
 class TestNvidiaTextEmbedder:
@@ -22,7 +13,6 @@ def test_init_default(self, monkeypatch):
         embedder = NvidiaTextEmbedder()
 
         assert embedder.api_key == Secret.from_env_var("NVIDIA_API_KEY")
-        assert embedder.model == "NV-Embed-QA"
         assert embedder.api_url == "https://ai.api.nvidia.com/v1/retrieval/nvidia"
         assert embedder.prefix == ""
         assert embedder.suffix == ""
@@ -106,13 +96,36 @@ def from_dict(self, monkeypatch):
         assert component.suffix == "suffix"
         assert component.truncate == "START"
 
+    @pytest.mark.usefixtures("mock_local_models")
+    def test_run_default_model(self):
+        api_key = Secret.from_token("fake-api-key")
+        embedder = NvidiaTextEmbedder(api_url="http://localhost:8080/v1", api_key=api_key)
+
+        assert embedder.model is None
+
+        with pytest.warns(UserWarning) as record:
+            embedder.warm_up()
+
+        assert len(record) == 1
+        assert "Default model is set as:" in str(record[0].message)
+        assert embedder.model == "model1"
+
+        embedder.backend = MockBackend(model=embedder.model, api_key=api_key)
+
+        result = embedder.run(text="The food was delicious")
+
+        assert len(result["embedding"]) == 3
+        assert all(isinstance(x, float) for x in result["embedding"])
+        assert result["meta"] == {
+            "usage": {"prompt_tokens": 4, "total_tokens": 4},
+        }
+
     def test_run(self):
-        embedder = NvidiaTextEmbedder(
-            "playground_nvolveqa_40k", api_key=Secret.from_token("fake-api-key"), prefix="prefix ", suffix=" suffix"
-        )
+        api_key = Secret.from_token("fake-api-key")
+        embedder = NvidiaTextEmbedder("playground_nvolveqa_40k", api_key=api_key, prefix="prefix ", suffix=" suffix")
 
         embedder.warm_up()
-        embedder.backend = MockBackend("aa", None)
+        embedder.backend = MockBackend(model="playground_nvolveqa_40k", api_key=api_key)
 
         result = embedder.run(text="The food was delicious")
 
@@ -123,9 +136,10 @@ def test_run(self):
         }
 
     def test_run_wrong_input_format(self):
-        embedder = NvidiaTextEmbedder("playground_nvolveqa_40k", api_key=Secret.from_token("fake-api-key"))
+        api_key = Secret.from_token("fake-api-key")
+        embedder = NvidiaTextEmbedder("playground_nvolveqa_40k", api_key=api_key)
         embedder.warm_up()
-        embedder.backend = MockBackend("aa", None)
+        embedder.backend = MockBackend(model="playground_nvolveqa_40k", api_key=api_key)
 
         list_integers_input = [1, 2, 3]
 

From 7d90a58f1e77e776d5682ccbb50c87d71eee99c4 Mon Sep 17 00:00:00 2001
From: tstadel <60758086+tstadel@users.noreply.github.com>
Date: Mon, 12 Aug 2024 15:36:58 +0200
Subject: [PATCH 2/6] fix: support streaming_callback param in amazon bedrock
 generators (#927)

* fix: support streaming_callback param in amazon bedrock generators

* fix chat generator merge

* reformat

---------

Co-authored-by: Thomas Stadelmann <tstadelmann@outlook.com>
---
 .../generators/amazon_bedrock/adapters.py     |  93 ++++-----
 .../amazon_bedrock/chat/adapters.py           |  55 +++---
 .../amazon_bedrock/chat/chat_generator.py     |  53 +++---
 .../generators/amazon_bedrock/generator.py    |  91 ++++-----
 .../generators/amazon_bedrock/handlers.py     |  33 ----
 .../tests/test_chat_generator.py              |   9 -
 .../amazon_bedrock/tests/test_generator.py    | 179 ++++++++----------
 7 files changed, 217 insertions(+), 296 deletions(-)

diff --git a/integrations/amazon_bedrock/src/haystack_integrations/components/generators/amazon_bedrock/adapters.py b/integrations/amazon_bedrock/src/haystack_integrations/components/generators/amazon_bedrock/adapters.py
index 7c7fdd7ce..8b5c2b530 100644
--- a/integrations/amazon_bedrock/src/haystack_integrations/components/generators/amazon_bedrock/adapters.py
+++ b/integrations/amazon_bedrock/src/haystack_integrations/components/generators/amazon_bedrock/adapters.py
@@ -1,8 +1,8 @@
 import json
 from abc import ABC, abstractmethod
-from typing import Any, Dict, List, Optional
+from typing import Any, Callable, Dict, List, Optional
 
-from .handlers import TokenStreamingHandler
+from haystack.dataclasses import StreamingChunk
 
 
 class BedrockModelAdapter(ABC):
@@ -39,22 +39,24 @@ def get_responses(self, response_body: Dict[str, Any]) -> List[str]:
         responses = [completion.lstrip() for completion in completions]
         return responses
 
-    def get_stream_responses(self, stream, stream_handler: TokenStreamingHandler) -> List[str]:
+    def get_stream_responses(self, stream, streaming_callback: Callable[[StreamingChunk], None]) -> List[str]:
         """
         Extracts the responses from the Amazon Bedrock streaming response.
 
         :param stream: The streaming response from the Amazon Bedrock request.
-        :param stream_handler: The handler for the streaming response.
+        :param streaming_callback: The handler for the streaming response.
         :returns: A list of string responses.
         """
-        tokens: List[str] = []
+        streaming_chunks: List[StreamingChunk] = []
         for event in stream:
             chunk = event.get("chunk")
             if chunk:
                 decoded_chunk = json.loads(chunk["bytes"].decode("utf-8"))
-                token = self._extract_token_from_stream(decoded_chunk)
-                tokens.append(stream_handler(token, event_data=decoded_chunk))
-        responses = ["".join(tokens).lstrip()]
+                streaming_chunk: StreamingChunk = self._build_streaming_chunk(decoded_chunk)
+                streaming_chunks.append(streaming_chunk)
+                streaming_callback(streaming_chunk)
+
+        responses = ["".join(streaming_chunk.content for streaming_chunk in streaming_chunks).lstrip()]
         return responses
 
     def _get_params(self, inference_kwargs: Dict[str, Any], default_params: Dict[str, Any]) -> Dict[str, Any]:
@@ -84,12 +86,12 @@ def _extract_completions_from_response(self, response_body: Dict[str, Any]) -> L
         """
 
     @abstractmethod
-    def _extract_token_from_stream(self, chunk: Dict[str, Any]) -> str:
+    def _build_streaming_chunk(self, chunk: Dict[str, Any]) -> StreamingChunk:
         """
-        Extracts the token from a streaming chunk.
+        Extracts the content and meta from a streaming chunk.
 
-        :param chunk: The streaming chunk.
-        :returns: A string token.
+        :param chunk: The streaming chunk as dict.
+        :returns: A StreamingChunk object.
         """
 
 
@@ -150,17 +152,17 @@ def _extract_completions_from_response(self, response_body: Dict[str, Any]) -> L
 
         return [response_body["completion"]]
 
-    def _extract_token_from_stream(self, chunk: Dict[str, Any]) -> str:
+    def _build_streaming_chunk(self, chunk: Dict[str, Any]) -> StreamingChunk:
         """
-        Extracts the token from a streaming chunk.
+        Extracts the content and meta from a streaming chunk.
 
-        :param chunk: The streaming chunk.
-        :returns: A string token.
+        :param chunk: The streaming chunk as dict.
+        :returns: A StreamingChunk object.
         """
         if self.use_messages_api:
-            return chunk.get("delta", {}).get("text", "")
+            return StreamingChunk(content=chunk.get("delta", {}).get("text", ""), meta=chunk)
 
-        return chunk.get("completion", "")
+        return StreamingChunk(content=chunk.get("completion", ""), meta=chunk)
 
 
 class MistralAdapter(BedrockModelAdapter):
@@ -199,17 +201,18 @@ def _extract_completions_from_response(self, response_body: Dict[str, Any]) -> L
         """
         return [output.get("text", "") for output in response_body.get("outputs", [])]
 
-    def _extract_token_from_stream(self, chunk: Dict[str, Any]) -> str:
+    def _build_streaming_chunk(self, chunk: Dict[str, Any]) -> StreamingChunk:
         """
-        Extracts the token from a streaming chunk.
+        Extracts the content and meta from a streaming chunk.
 
-        :param chunk: The streaming chunk.
-        :returns: A string token.
+        :param chunk: The streaming chunk as dict.
+        :returns: A StreamingChunk object.
         """
+        content = ""
         chunk_list = chunk.get("outputs", [])
         if chunk_list:
-            return chunk_list[0].get("text", "")
-        return ""
+            content = chunk_list[0].get("text", "")
+        return StreamingChunk(content=content, meta=chunk)
 
 
 class CohereCommandAdapter(BedrockModelAdapter):
@@ -254,14 +257,14 @@ def _extract_completions_from_response(self, response_body: Dict[str, Any]) -> L
         responses = [generation["text"] for generation in response_body["generations"]]
         return responses
 
-    def _extract_token_from_stream(self, chunk: Dict[str, Any]) -> str:
+    def _build_streaming_chunk(self, chunk: Dict[str, Any]) -> StreamingChunk:
         """
-        Extracts the token from a streaming chunk.
+        Extracts the content and meta from a streaming chunk.
 
-        :param chunk: The streaming chunk.
-        :returns: A string token.
+        :param chunk: The streaming chunk as dict.
+        :returns: A StreamingChunk object.
         """
-        return chunk.get("text", "")
+        return StreamingChunk(content=chunk.get("text", ""), meta=chunk)
 
 
 class CohereCommandRAdapter(BedrockModelAdapter):
@@ -313,15 +316,15 @@ def _extract_completions_from_response(self, response_body: Dict[str, Any]) -> L
         responses = [response_body["text"]]
         return responses
 
-    def _extract_token_from_stream(self, chunk: Dict[str, Any]) -> str:
+    def _build_streaming_chunk(self, chunk: Dict[str, Any]) -> StreamingChunk:
         """
-        Extracts the token from a streaming chunk.
+        Extracts the content and meta from a streaming chunk.
 
-        :param chunk: The streaming chunk.
-        :returns: A string token.
+        :param chunk: The streaming chunk as dict.
+        :returns: A StreamingChunk object.
         """
         token: str = chunk.get("text", "")
-        return token
+        return StreamingChunk(content=token, meta=chunk)
 
 
 class AI21LabsJurassic2Adapter(BedrockModelAdapter):
@@ -357,7 +360,7 @@ def _extract_completions_from_response(self, response_body: Dict[str, Any]) -> L
         responses = [completion["data"]["text"] for completion in response_body["completions"]]
         return responses
 
-    def _extract_token_from_stream(self, chunk: Dict[str, Any]) -> str:
+    def _build_streaming_chunk(self, chunk: Dict[str, Any]) -> StreamingChunk:
         msg = "Streaming is not supported for AI21 Jurassic 2 models."
         raise NotImplementedError(msg)
 
@@ -398,14 +401,14 @@ def _extract_completions_from_response(self, response_body: Dict[str, Any]) -> L
         responses = [result["outputText"] for result in response_body["results"]]
         return responses
 
-    def _extract_token_from_stream(self, chunk: Dict[str, Any]) -> str:
+    def _build_streaming_chunk(self, chunk: Dict[str, Any]) -> StreamingChunk:
         """
-        Extracts the token from a streaming chunk.
+        Extracts the content and meta from a streaming chunk.
 
-        :param chunk: The streaming chunk.
-        :returns: A string token.
+        :param chunk: The streaming chunk as dict.
+        :returns: A StreamingChunk object.
         """
-        return chunk.get("outputText", "")
+        return StreamingChunk(content=chunk.get("outputText", ""), meta=chunk)
 
 
 class MetaLlamaAdapter(BedrockModelAdapter):
@@ -442,11 +445,11 @@ def _extract_completions_from_response(self, response_body: Dict[str, Any]) -> L
         """
         return [response_body["generation"]]
 
-    def _extract_token_from_stream(self, chunk: Dict[str, Any]) -> str:
+    def _build_streaming_chunk(self, chunk: Dict[str, Any]) -> StreamingChunk:
         """
-        Extracts the token from a streaming chunk.
+        Extracts the content and meta from a streaming chunk.
 
-        :param chunk: The streaming chunk.
-        :returns: A string token.
+        :param chunk: The streaming chunk as dict.
+        :returns: A StreamingChunk object.
         """
-        return chunk.get("generation", "")
+        return StreamingChunk(content=chunk.get("generation", ""), meta=chunk)
diff --git a/integrations/amazon_bedrock/src/haystack_integrations/components/generators/amazon_bedrock/chat/adapters.py b/integrations/amazon_bedrock/src/haystack_integrations/components/generators/amazon_bedrock/chat/adapters.py
index 162100934..67e833f73 100644
--- a/integrations/amazon_bedrock/src/haystack_integrations/components/generators/amazon_bedrock/chat/adapters.py
+++ b/integrations/amazon_bedrock/src/haystack_integrations/components/generators/amazon_bedrock/chat/adapters.py
@@ -48,19 +48,18 @@ def get_responses(self, response_body: Dict[str, Any]) -> List[ChatMessage]:
         return self._extract_messages_from_response(response_body)
 
     def get_stream_responses(
-        self, stream: EventStream, stream_handler: Callable[[StreamingChunk], None]
+        self, stream: EventStream, streaming_callback: Callable[[StreamingChunk], None]
     ) -> List[ChatMessage]:
-        tokens: List[str] = []
+        streaming_chunks: List[StreamingChunk] = []
         last_decoded_chunk: Dict[str, Any] = {}
         for event in stream:
             chunk = event.get("chunk")
             if chunk:
                 last_decoded_chunk = json.loads(chunk["bytes"].decode("utf-8"))
-                token = self._extract_token_from_stream(last_decoded_chunk)
-                stream_chunk = StreamingChunk(content=token)  # don't extract meta, we care about tokens only
-                stream_handler(stream_chunk)  # callback the stream handler with StreamingChunk
-                tokens.append(token)
-        responses = ["".join(tokens).lstrip()]
+                streaming_chunk = self._build_streaming_chunk(last_decoded_chunk)
+                streaming_callback(streaming_chunk)  # callback the stream handler with StreamingChunk
+                streaming_chunks.append(streaming_chunk)
+        responses = ["".join(chunk.content for chunk in streaming_chunks).lstrip()]
         return [ChatMessage.from_assistant(response, meta=last_decoded_chunk) for response in responses]
 
     @staticmethod
@@ -142,12 +141,12 @@ def _extract_messages_from_response(self, response_body: Dict[str, Any]) -> List
         """
 
     @abstractmethod
-    def _extract_token_from_stream(self, chunk: Dict[str, Any]) -> str:
+    def _build_streaming_chunk(self, chunk: Dict[str, Any]) -> StreamingChunk:
         """
-        Extracts the token from a streaming chunk.
+        Extracts the content and meta from a streaming chunk.
 
-        :param chunk: The streaming chunk.
-        :returns: The extracted token.
+        :param chunk: The streaming chunk as dict.
+        :returns: A StreamingChunk object.
         """
 
 
@@ -252,16 +251,16 @@ def _extract_messages_from_response(self, response_body: Dict[str, Any]) -> List
                     messages.append(ChatMessage.from_assistant(content["text"], meta=meta))
         return messages
 
-    def _extract_token_from_stream(self, chunk: Dict[str, Any]) -> str:
+    def _build_streaming_chunk(self, chunk: Dict[str, Any]) -> StreamingChunk:
         """
-        Extracts the token from a streaming chunk.
+        Extracts the content and meta from a streaming chunk.
 
-        :param chunk: The streaming chunk.
-        :returns: The extracted token.
+        :param chunk: The streaming chunk as dict.
+        :returns: A StreamingChunk object.
         """
         if chunk.get("type") == "content_block_delta" and chunk.get("delta", {}).get("type") == "text_delta":
-            return chunk.get("delta", {}).get("text", "")
-        return ""
+            return StreamingChunk(content=chunk.get("delta", {}).get("text", ""), meta=chunk)
+        return StreamingChunk(content="", meta=chunk)
 
     def _to_anthropic_message(self, m: ChatMessage) -> Dict[str, Any]:
         """
@@ -425,17 +424,17 @@ def _extract_messages_from_response(self, response_body: Dict[str, Any]) -> List
             messages.append(ChatMessage.from_assistant(response["text"], meta=meta))
         return messages
 
-    def _extract_token_from_stream(self, chunk: Dict[str, Any]) -> str:
+    def _build_streaming_chunk(self, chunk: Dict[str, Any]) -> StreamingChunk:
         """
-        Extracts the token from a streaming chunk.
+        Extracts the content and meta from a streaming chunk.
 
-        :param chunk: The streaming chunk.
-        :returns: The extracted token.
+        :param chunk: The streaming chunk as dict.
+        :returns: A StreamingChunk object.
         """
         response_chunk = chunk.get("outputs", [])
         if response_chunk:
-            return response_chunk[0].get("text", "")
-        return ""
+            return StreamingChunk(content=response_chunk[0].get("text", ""), meta=chunk)
+        return StreamingChunk(content="", meta=chunk)
 
 
 class MetaLlama2ChatAdapter(BedrockModelChatAdapter):
@@ -543,11 +542,11 @@ def _extract_messages_from_response(self, response_body: Dict[str, Any]) -> List
         metadata = {k: v for (k, v) in response_body.items() if k != message_tag}
         return [ChatMessage.from_assistant(response_body[message_tag], meta=metadata)]
 
-    def _extract_token_from_stream(self, chunk: Dict[str, Any]) -> str:
+    def _build_streaming_chunk(self, chunk: Dict[str, Any]) -> StreamingChunk:
         """
-        Extracts the token from a streaming chunk.
+        Extracts the content and meta from a streaming chunk.
 
-        :param chunk: The streaming chunk.
-        :returns: The extracted token.
+        :param chunk: The streaming chunk as dict.
+        :returns: A StreamingChunk object.
         """
-        return chunk.get("generation", "")
+        return StreamingChunk(content=chunk.get("generation", ""), meta=chunk)
diff --git a/integrations/amazon_bedrock/src/haystack_integrations/components/generators/amazon_bedrock/chat/chat_generator.py b/integrations/amazon_bedrock/src/haystack_integrations/components/generators/amazon_bedrock/chat/chat_generator.py
index 7485a96c5..206cb0b9a 100644
--- a/integrations/amazon_bedrock/src/haystack_integrations/components/generators/amazon_bedrock/chat/chat_generator.py
+++ b/integrations/amazon_bedrock/src/haystack_integrations/components/generators/amazon_bedrock/chat/chat_generator.py
@@ -156,18 +156,29 @@ def resolve_secret(secret: Optional[Secret]) -> Optional[str]:
             stacklevel=2,
         )
 
-    def invoke(self, *args, **kwargs):
+    @component.output_types(replies=List[ChatMessage])
+    def run(
+        self,
+        messages: List[ChatMessage],
+        streaming_callback: Optional[Callable[[StreamingChunk], None]] = None,
+        generation_kwargs: Optional[Dict[str, Any]] = None,
+    ):
         """
-        Invokes the Amazon Bedrock LLM with the given parameters. The parameters are passed to the Amazon Bedrock
-        client.
+        Generates a list of `ChatMessage` response to the given messages using the Amazon Bedrock LLM.
 
-        :param args: The positional arguments passed to the generator.
-        :param kwargs: The keyword arguments passed to the generator.
-        :returns: List of `ChatMessage` generated by LLM.
+        :param messages: The messages to generate a response to.
+        :param streaming_callback:
+            A callback function that is called when a new token is received from the stream.
+        :param generation_kwargs: Additional generation keyword arguments passed to the model.
+        :returns: A dictionary with the following keys:
+            - `replies`: The generated List of `ChatMessage` objects.
         """
+        generation_kwargs = generation_kwargs or {}
+        generation_kwargs = generation_kwargs.copy()
+
+        streaming_callback = streaming_callback or self.streaming_callback
+        generation_kwargs["stream"] = streaming_callback is not None
 
-        kwargs = kwargs.copy()
-        messages: List[ChatMessage] = kwargs.pop("messages", [])
         # check if the prompt is a list of ChatMessage objects
         if not (
             isinstance(messages, list)
@@ -177,39 +188,29 @@ def invoke(self, *args, **kwargs):
             msg = f"The model {self.model} requires a list of ChatMessage objects as a prompt."
             raise ValueError(msg)
 
-        body = self.model_adapter.prepare_body(messages=messages, **{"stop_words": self.stop_words, **kwargs})
+        body = self.model_adapter.prepare_body(
+            messages=messages, **{"stop_words": self.stop_words, **generation_kwargs}
+        )
         try:
-            if self.streaming_callback:
+            if streaming_callback:
                 response = self.client.invoke_model_with_response_stream(
                     body=json.dumps(body), modelId=self.model, accept="application/json", contentType="application/json"
                 )
                 response_stream = response["body"]
-                responses = self.model_adapter.get_stream_responses(
-                    stream=response_stream, stream_handler=self.streaming_callback
+                replies = self.model_adapter.get_stream_responses(
+                    stream=response_stream, streaming_callback=streaming_callback
                 )
             else:
                 response = self.client.invoke_model(
                     body=json.dumps(body), modelId=self.model, accept="application/json", contentType="application/json"
                 )
                 response_body = json.loads(response.get("body").read().decode("utf-8"))
-                responses = self.model_adapter.get_responses(response_body=response_body)
+                replies = self.model_adapter.get_responses(response_body=response_body)
         except ClientError as exception:
             msg = f"Could not inference Amazon Bedrock model {self.model} due: {exception}"
             raise AmazonBedrockInferenceError(msg) from exception
 
-        return responses
-
-    @component.output_types(replies=List[ChatMessage])
-    def run(self, messages: List[ChatMessage], generation_kwargs: Optional[Dict[str, Any]] = None):
-        """
-        Generates a list of `ChatMessage` responses to the given messages using the Amazon Bedrock LLM.
-
-        :param messages: The messages to generate a response to.
-        :param generation_kwargs: Additional generation keyword arguments passed to the model.
-        :returns: A dictionary with the following keys:
-            - `replies`: The generated list of `ChatMessage` objects.
-        """
-        return {"replies": self.invoke(messages=messages, **(generation_kwargs or {}))}
+        return {"replies": replies}
 
     @classmethod
     def get_model_adapter(cls, model: str) -> Optional[Type[BedrockModelChatAdapter]]:
diff --git a/integrations/amazon_bedrock/src/haystack_integrations/components/generators/amazon_bedrock/generator.py b/integrations/amazon_bedrock/src/haystack_integrations/components/generators/amazon_bedrock/generator.py
index b15000aa2..6ef0a4765 100644
--- a/integrations/amazon_bedrock/src/haystack_integrations/components/generators/amazon_bedrock/generator.py
+++ b/integrations/amazon_bedrock/src/haystack_integrations/components/generators/amazon_bedrock/generator.py
@@ -1,11 +1,12 @@
 import json
 import logging
 import re
-from typing import Any, ClassVar, Dict, List, Optional, Type, Union
+from typing import Any, Callable, ClassVar, Dict, List, Optional, Type
 
 from botocore.exceptions import ClientError
 from haystack import component, default_from_dict, default_to_dict
-from haystack.utils.auth import Secret, deserialize_secrets_inplace
+from haystack.dataclasses import StreamingChunk
+from haystack.utils import Secret, deserialize_callable, deserialize_secrets_inplace, serialize_callable
 
 from haystack_integrations.common.amazon_bedrock.errors import (
     AmazonBedrockConfigurationError,
@@ -25,8 +26,6 @@
 )
 from .handlers import (
     DefaultPromptHandler,
-    DefaultTokenStreamingHandler,
-    TokenStreamingHandler,
 )
 
 logger = logging.getLogger(__name__)
@@ -87,6 +86,7 @@ def __init__(
         aws_profile_name: Optional[Secret] = Secret.from_env_var("AWS_PROFILE", strict=False),  # noqa: B008
         max_length: Optional[int] = 100,
         truncate: Optional[bool] = True,
+        streaming_callback: Optional[Callable[[StreamingChunk], None]] = None,
         **kwargs,
     ):
         """
@@ -100,6 +100,8 @@ def __init__(
         :param aws_profile_name: The AWS profile name.
         :param max_length: The maximum length of the generated text.
         :param truncate: Whether to truncate the prompt or not.
+        :param streaming_callback: A callback function that is called when a new token is received from the stream.
+            The callback function accepts StreamingChunk as an argument.
         :param kwargs: Additional keyword arguments to be passed to the model.
         These arguments are specific to the model. You can find them in the model's documentation.
         :raises ValueError: If the model name is empty or None.
@@ -117,6 +119,7 @@ def __init__(
         self.aws_session_token = aws_session_token
         self.aws_region_name = aws_region_name
         self.aws_profile_name = aws_profile_name
+        self.streaming_callback = streaming_callback
         self.kwargs = kwargs
 
         def resolve_secret(secret: Optional[Secret]) -> Optional[str]:
@@ -158,7 +161,7 @@ def resolve_secret(secret: Optional[Secret]) -> Optional[str]:
             raise AmazonBedrockConfigurationError(msg)
         self.model_adapter = model_adapter_cls(model_kwargs=model_input_kwargs, max_length=self.max_length)
 
-    def _ensure_token_limit(self, prompt: Union[str, List[Dict[str, str]]]) -> Union[str, List[Dict[str, str]]]:
+    def _ensure_token_limit(self, prompt: str) -> str:
         """
         Ensures that the prompt and answer token lengths together are within the model_max_length specified during
         the initialization of the component.
@@ -166,14 +169,6 @@ def _ensure_token_limit(self, prompt: Union[str, List[Dict[str, str]]]) -> Union
         :param prompt: The prompt to be sent to the model.
         :returns: The resized prompt.
         """
-        # the prompt for this model will be of the type str
-        if isinstance(prompt, List):
-            msg = (
-                "AmazonBedrockGenerator only supports a string as a prompt, "
-                "while currently, the prompt is of type List."
-            )
-            raise ValueError(msg)
-
         resize_info = self.prompt_handler(prompt)
         if resize_info["prompt_length"] != resize_info["new_prompt_length"]:
             logger.warning(
@@ -187,31 +182,36 @@ def _ensure_token_limit(self, prompt: Union[str, List[Dict[str, str]]]) -> Union
             )
         return str(resize_info["resized_prompt"])
 
-    def invoke(self, *args, **kwargs):
+    @component.output_types(replies=List[str])
+    def run(
+        self,
+        prompt: str,
+        streaming_callback: Optional[Callable[[StreamingChunk], None]] = None,
+        generation_kwargs: Optional[Dict[str, Any]] = None,
+    ):
         """
-        Invokes the model with the given prompt.
+        Generates a list of string response to the given prompt.
 
-        :param args: Additional positional arguments passed to the generator.
-        :param kwargs: Additional keyword arguments passed to the generator.
-        :returns: A list of generated responses (strings).
+        :param prompt: The prompt to generate a response for.
+        :param streaming_callback:
+            A callback function that is called when a new token is received from the stream.
+        :param generation_kwargs: Additional keyword arguments passed to the generator.
+        :returns: A dictionary with the following keys:
+            - `replies`: A list of generated responses.
+        :raises ValueError: If the prompt is empty or None.
+        :raises AmazonBedrockInferenceError: If the model cannot be invoked.
         """
-        kwargs = kwargs.copy()
-        prompt: str = kwargs.pop("prompt", None)
-        stream: bool = kwargs.get("stream", self.model_adapter.model_kwargs.get("stream", False))
-
-        if not prompt or not isinstance(prompt, (str, list)):
-            msg = (
-                f"The model {self.model} requires a valid prompt, but currently, it has no prompt. "
-                f"Make sure to provide a prompt in the format that the model expects."
-            )
-            raise ValueError(msg)
+        generation_kwargs = generation_kwargs or {}
+        generation_kwargs = generation_kwargs.copy()
+        streaming_callback = streaming_callback or self.streaming_callback
+        generation_kwargs["stream"] = streaming_callback is not None
 
         if self.truncate:
             prompt = self._ensure_token_limit(prompt)
 
-        body = self.model_adapter.prepare_body(prompt=prompt, **kwargs)
+        body = self.model_adapter.prepare_body(prompt=prompt, **generation_kwargs)
         try:
-            if stream:
+            if streaming_callback:
                 response = self.client.invoke_model_with_response_stream(
                     body=json.dumps(body),
                     modelId=self.model,
@@ -219,11 +219,9 @@ def invoke(self, *args, **kwargs):
                     contentType="application/json",
                 )
                 response_stream = response["body"]
-                handler: TokenStreamingHandler = kwargs.get(
-                    "stream_handler",
-                    self.model_adapter.model_kwargs.get("stream_handler", DefaultTokenStreamingHandler()),
+                replies = self.model_adapter.get_stream_responses(
+                    stream=response_stream, streaming_callback=streaming_callback
                 )
-                responses = self.model_adapter.get_stream_responses(stream=response_stream, stream_handler=handler)
             else:
                 response = self.client.invoke_model(
                     body=json.dumps(body),
@@ -232,7 +230,7 @@ def invoke(self, *args, **kwargs):
                     contentType="application/json",
                 )
                 response_body = json.loads(response.get("body").read().decode("utf-8"))
-                responses = self.model_adapter.get_responses(response_body=response_body)
+                replies = self.model_adapter.get_responses(response_body=response_body)
         except ClientError as exception:
             msg = (
                 f"Could not connect to Amazon Bedrock model {self.model}. "
@@ -241,22 +239,7 @@ def invoke(self, *args, **kwargs):
             )
             raise AmazonBedrockInferenceError(msg) from exception
 
-        return responses
-
-    @component.output_types(replies=List[str])
-    def run(self, prompt: str, generation_kwargs: Optional[Dict[str, Any]] = None):
-        """
-        Generates a list of string response to the given prompt.
-
-        :param prompt: Instructions for the model.
-        :param generation_kwargs: Additional keyword arguments to customize text generation.
-        These arguments are specific to the model. You can find them in the model's documentation.
-        :returns: A dictionary with the following keys:
-            - `replies`: A list of generated responses.
-        :raises ValueError: If the prompt is empty or None.
-        :raises AmazonBedrockInferenceError: If the model cannot be invoked.
-        """
-        return {"replies": self.invoke(prompt=prompt, **(generation_kwargs or {}))}
+        return {"replies": replies}
 
     @classmethod
     def get_model_adapter(cls, model: str) -> Optional[Type[BedrockModelAdapter]]:
@@ -278,6 +261,7 @@ def to_dict(self) -> Dict[str, Any]:
         :returns:
             Dictionary with serialized data.
         """
+        callback_name = serialize_callable(self.streaming_callback) if self.streaming_callback else None
         return default_to_dict(
             self,
             aws_access_key_id=self.aws_access_key_id.to_dict() if self.aws_access_key_id else None,
@@ -288,6 +272,7 @@ def to_dict(self) -> Dict[str, Any]:
             model=self.model,
             max_length=self.max_length,
             truncate=self.truncate,
+            streaming_callback=callback_name,
             **self.kwargs,
         )
 
@@ -305,4 +290,8 @@ def from_dict(cls, data: Dict[str, Any]) -> "AmazonBedrockGenerator":
             data["init_parameters"],
             ["aws_access_key_id", "aws_secret_access_key", "aws_session_token", "aws_region_name", "aws_profile_name"],
         )
+        init_params = data.get("init_parameters", {})
+        serialized_callback_handler = init_params.get("streaming_callback")
+        if serialized_callback_handler:
+            data["init_parameters"]["streaming_callback"] = deserialize_callable(serialized_callback_handler)
         return default_from_dict(cls, data)
diff --git a/integrations/amazon_bedrock/src/haystack_integrations/components/generators/amazon_bedrock/handlers.py b/integrations/amazon_bedrock/src/haystack_integrations/components/generators/amazon_bedrock/handlers.py
index f4dc1aa4f..07db2742f 100644
--- a/integrations/amazon_bedrock/src/haystack_integrations/components/generators/amazon_bedrock/handlers.py
+++ b/integrations/amazon_bedrock/src/haystack_integrations/components/generators/amazon_bedrock/handlers.py
@@ -1,4 +1,3 @@
-from abc import ABC, abstractmethod
 from typing import Dict, Union
 
 from transformers import AutoTokenizer, PreTrainedTokenizer, PreTrainedTokenizerBase, PreTrainedTokenizerFast
@@ -61,35 +60,3 @@ def __call__(self, prompt: str, **kwargs) -> Dict[str, Union[str, int]]:
             "model_max_length": self.model_max_length,
             "max_length": self.max_length,
         }
-
-
-class TokenStreamingHandler(ABC):
-    """
-    TokenStreamingHandler implementations handle the streaming of tokens from the stream.
-    """
-
-    DONE_MARKER = "[DONE]"
-
-    @abstractmethod
-    def __call__(self, token_received: str, **kwargs) -> str:
-        """
-        This callback method is called when a new token is received from the stream.
-
-        :param token_received: The token received from the stream.
-        :param kwargs: Additional keyword arguments passed to the handler.
-        :returns: The token to be sent to the stream.
-        """
-        pass
-
-
-class DefaultTokenStreamingHandler(TokenStreamingHandler):
-    def __call__(self, token_received, **kwargs) -> str:
-        """
-        This callback method is called when a new token is received from the stream.
-
-        :param token_received: The token received from the stream.
-        :param kwargs: Additional keyword arguments passed to the handler.
-        :returns: The token to be sent to the stream.
-        """
-        print(token_received, flush=True, end="")  # noqa: T201
-        return token_received
diff --git a/integrations/amazon_bedrock/tests/test_chat_generator.py b/integrations/amazon_bedrock/tests/test_chat_generator.py
index 3e62b56ea..79a04d52b 100644
--- a/integrations/amazon_bedrock/tests/test_chat_generator.py
+++ b/integrations/amazon_bedrock/tests/test_chat_generator.py
@@ -121,15 +121,6 @@ def test_constructor_with_empty_model():
         AmazonBedrockChatGenerator(model="")
 
 
-def test_invoke_with_no_kwargs(mock_boto3_session):
-    """
-    Test invoke raises an error if no messages are provided
-    """
-    layer = AmazonBedrockChatGenerator(model="anthropic.claude-v2")
-    with pytest.raises(ValueError, match="The model anthropic.claude-v2 requires"):
-        layer.invoke()
-
-
 @pytest.mark.parametrize(
     "model, expected_model_adapter",
     [
diff --git a/integrations/amazon_bedrock/tests/test_generator.py b/integrations/amazon_bedrock/tests/test_generator.py
index 65463caae..f0233888c 100644
--- a/integrations/amazon_bedrock/tests/test_generator.py
+++ b/integrations/amazon_bedrock/tests/test_generator.py
@@ -2,6 +2,7 @@
 from unittest.mock import MagicMock, call, patch
 
 import pytest
+from haystack.dataclasses import StreamingChunk
 
 from haystack_integrations.components.generators.amazon_bedrock import AmazonBedrockGenerator
 from haystack_integrations.components.generators.amazon_bedrock.adapters import (
@@ -34,6 +35,7 @@ def test_to_dict(mock_boto3_session):
             "max_length": 99,
             "truncate": False,
             "temperature": 10,
+            "streaming_callback": None,
         },
     }
 
@@ -120,15 +122,6 @@ def test_constructor_with_empty_model():
         AmazonBedrockGenerator(model="")
 
 
-def test_invoke_with_no_kwargs(mock_boto3_session):
-    """
-    Test invoke raises an error if no prompt is provided
-    """
-    layer = AmazonBedrockGenerator(model="anthropic.claude-v2")
-    with pytest.raises(ValueError, match="The model anthropic.claude-v2 requires a valid prompt."):
-        layer.invoke()
-
-
 def test_short_prompt_is_not_truncated(mock_boto3_session):
     """
     Test that a short prompt is not truncated
@@ -224,13 +217,13 @@ def test_long_prompt_is_not_truncated_when_truncate_false(mock_boto3_session):
             generator.model_adapter.get_responses = MagicMock(return_value=["response"])
 
             # Invoke the generator
-            generator.invoke(prompt=long_prompt_text)
+            generator.run(prompt=long_prompt_text)
 
         # Ensure _ensure_token_limit was not called
         mock_ensure_token_limit.assert_not_called(),
 
         # Check the prompt passed to prepare_body
-        generator.model_adapter.prepare_body.assert_called_with(prompt=long_prompt_text)
+        generator.model_adapter.prepare_body.assert_called_with(prompt=long_prompt_text, stream=False)
 
 
 @pytest.mark.parametrize(
@@ -407,7 +400,7 @@ def test_get_responses_leading_whitespace(self) -> None:
 
     def test_get_stream_responses(self) -> None:
         stream_mock = MagicMock()
-        stream_handler_mock = MagicMock()
+        streaming_callback_mock = MagicMock()
 
         stream_mock.__iter__.return_value = [
             {"chunk": {"bytes": b'{"delta": {"text": " This"}}'}},
@@ -417,35 +410,31 @@ def test_get_stream_responses(self) -> None:
             {"chunk": {"bytes": b'{"delta": {"text": " response."}}'}},
         ]
 
-        stream_handler_mock.side_effect = lambda token_received, **kwargs: token_received
-
         adapter = AnthropicClaudeAdapter(model_kwargs={}, max_length=99)
         expected_responses = ["This is a single response."]
-        assert adapter.get_stream_responses(stream_mock, stream_handler_mock) == expected_responses
+        assert adapter.get_stream_responses(stream_mock, streaming_callback_mock) == expected_responses
 
-        stream_handler_mock.assert_has_calls(
+        streaming_callback_mock.assert_has_calls(
             [
-                call(" This", event_data={"delta": {"text": " This"}}),
-                call(" is", event_data={"delta": {"text": " is"}}),
-                call(" a", event_data={"delta": {"text": " a"}}),
-                call(" single", event_data={"delta": {"text": " single"}}),
-                call(" response.", event_data={"delta": {"text": " response."}}),
+                call(StreamingChunk(content=" This", meta={"delta": {"text": " This"}})),
+                call(StreamingChunk(content=" is", meta={"delta": {"text": " is"}})),
+                call(StreamingChunk(content=" a", meta={"delta": {"text": " a"}})),
+                call(StreamingChunk(content=" single", meta={"delta": {"text": " single"}})),
+                call(StreamingChunk(content=" response.", meta={"delta": {"text": " response."}})),
             ]
         )
 
     def test_get_stream_responses_empty(self) -> None:
         stream_mock = MagicMock()
-        stream_handler_mock = MagicMock()
+        streaming_callback_mock = MagicMock()
 
         stream_mock.__iter__.return_value = []
 
-        stream_handler_mock.side_effect = lambda token_received, **kwargs: token_received
-
         adapter = AnthropicClaudeAdapter(model_kwargs={}, max_length=99)
         expected_responses = [""]
-        assert adapter.get_stream_responses(stream_mock, stream_handler_mock) == expected_responses
+        assert adapter.get_stream_responses(stream_mock, streaming_callback_mock) == expected_responses
 
-        stream_handler_mock.assert_not_called()
+        streaming_callback_mock.assert_not_called()
 
 
 class TestAnthropicClaudeAdapterNoMessagesAPI:
@@ -553,7 +542,7 @@ def test_get_responses_leading_whitespace(self) -> None:
 
     def test_get_stream_responses(self) -> None:
         stream_mock = MagicMock()
-        stream_handler_mock = MagicMock()
+        streaming_callback_mock = MagicMock()
 
         stream_mock.__iter__.return_value = [
             {"chunk": {"bytes": b'{"completion": " This"}'}},
@@ -563,35 +552,31 @@ def test_get_stream_responses(self) -> None:
             {"chunk": {"bytes": b'{"completion": " response."}'}},
         ]
 
-        stream_handler_mock.side_effect = lambda token_received, **kwargs: token_received
-
         adapter = AnthropicClaudeAdapter(model_kwargs={"use_messages_api": False}, max_length=99)
         expected_responses = ["This is a single response."]
-        assert adapter.get_stream_responses(stream_mock, stream_handler_mock) == expected_responses
+        assert adapter.get_stream_responses(stream_mock, streaming_callback_mock) == expected_responses
 
-        stream_handler_mock.assert_has_calls(
+        streaming_callback_mock.assert_has_calls(
             [
-                call(" This", event_data={"completion": " This"}),
-                call(" is", event_data={"completion": " is"}),
-                call(" a", event_data={"completion": " a"}),
-                call(" single", event_data={"completion": " single"}),
-                call(" response.", event_data={"completion": " response."}),
+                call(StreamingChunk(content=" This", meta={"completion": " This"})),
+                call(StreamingChunk(content=" is", meta={"completion": " is"})),
+                call(StreamingChunk(content=" a", meta={"completion": " a"})),
+                call(StreamingChunk(content=" single", meta={"completion": " single"})),
+                call(StreamingChunk(content=" response.", meta={"completion": " response."})),
             ]
         )
 
     def test_get_stream_responses_empty(self) -> None:
         stream_mock = MagicMock()
-        stream_handler_mock = MagicMock()
+        streaming_callback_mock = MagicMock()
 
         stream_mock.__iter__.return_value = []
 
-        stream_handler_mock.side_effect = lambda token_received, **kwargs: token_received
-
         adapter = AnthropicClaudeAdapter(model_kwargs={"use_messages_api": False}, max_length=99)
         expected_responses = [""]
-        assert adapter.get_stream_responses(stream_mock, stream_handler_mock) == expected_responses
+        assert adapter.get_stream_responses(stream_mock, streaming_callback_mock) == expected_responses
 
-        stream_handler_mock.assert_not_called()
+        streaming_callback_mock.assert_not_called()
 
 
 class TestMistralAdapter:
@@ -686,7 +671,7 @@ def test_get_responses(self) -> None:
 
     def test_get_stream_responses(self) -> None:
         stream_mock = MagicMock()
-        stream_handler_mock = MagicMock()
+        streaming_callback_mock = MagicMock()
 
         stream_mock.__iter__.return_value = [
             {"chunk": {"bytes": b'{"outputs": [{"text": " This"}]}'}},
@@ -696,35 +681,33 @@ def test_get_stream_responses(self) -> None:
             {"chunk": {"bytes": b'{"outputs": [{"text": " response."}]}'}},
         ]
 
-        stream_handler_mock.side_effect = lambda token_received, **kwargs: token_received
-
         adapter = MistralAdapter(model_kwargs={}, max_length=99)
         expected_responses = ["This is a single response."]
-        assert adapter.get_stream_responses(stream_mock, stream_handler_mock) == expected_responses
+        assert adapter.get_stream_responses(stream_mock, streaming_callback_mock) == expected_responses
 
-        stream_handler_mock.assert_has_calls(
+        streaming_callback_mock.assert_has_calls(
             [
-                call(" This", event_data={"outputs": [{"text": " This"}]}),
-                call(" is", event_data={"outputs": [{"text": " is"}]}),
-                call(" a", event_data={"outputs": [{"text": " a"}]}),
-                call(" single", event_data={"outputs": [{"text": " single"}]}),
-                call(" response.", event_data={"outputs": [{"text": " response."}]}),
+                call(StreamingChunk(content=" This", meta={"outputs": [{"text": " This"}]})),
+                call(StreamingChunk(content=" is", meta={"outputs": [{"text": " is"}]})),
+                call(StreamingChunk(content=" a", meta={"outputs": [{"text": " a"}]})),
+                call(StreamingChunk(content=" single", meta={"outputs": [{"text": " single"}]})),
+                call(StreamingChunk(content=" response.", meta={"outputs": [{"text": " response."}]})),
             ]
         )
 
     def test_get_stream_responses_empty(self) -> None:
         stream_mock = MagicMock()
-        stream_handler_mock = MagicMock()
+        streaming_callback_mock = MagicMock()
 
         stream_mock.__iter__.return_value = []
 
-        stream_handler_mock.side_effect = lambda token_received, **kwargs: token_received
+        streaming_callback_mock.side_effect = lambda token_received, **kwargs: token_received
 
         adapter = MistralAdapter(model_kwargs={}, max_length=99)
         expected_responses = [""]
-        assert adapter.get_stream_responses(stream_mock, stream_handler_mock) == expected_responses
+        assert adapter.get_stream_responses(stream_mock, streaming_callback_mock) == expected_responses
 
-        stream_handler_mock.assert_not_called()
+        streaming_callback_mock.assert_not_called()
 
 
 class TestCohereCommandAdapter:
@@ -881,7 +864,7 @@ def test_get_responses_multiple_responses(self) -> None:
 
     def test_get_stream_responses(self) -> None:
         stream_mock = MagicMock()
-        stream_handler_mock = MagicMock()
+        streaming_callback_mock = MagicMock()
 
         stream_mock.__iter__.return_value = [
             {"chunk": {"bytes": b'{"text": " This"}'}},
@@ -892,36 +875,32 @@ def test_get_stream_responses(self) -> None:
             {"chunk": {"bytes": b'{"finish_reason": "MAX_TOKENS", "is_finished": true}'}},
         ]
 
-        stream_handler_mock.side_effect = lambda token_received, **kwargs: token_received
-
         adapter = CohereCommandAdapter(model_kwargs={}, max_length=99)
         expected_responses = ["This is a single response."]
-        assert adapter.get_stream_responses(stream_mock, stream_handler_mock) == expected_responses
+        assert adapter.get_stream_responses(stream_mock, streaming_callback_mock) == expected_responses
 
-        stream_handler_mock.assert_has_calls(
+        streaming_callback_mock.assert_has_calls(
             [
-                call(" This", event_data={"text": " This"}),
-                call(" is", event_data={"text": " is"}),
-                call(" a", event_data={"text": " a"}),
-                call(" single", event_data={"text": " single"}),
-                call(" response.", event_data={"text": " response."}),
-                call("", event_data={"finish_reason": "MAX_TOKENS", "is_finished": True}),
+                call(StreamingChunk(content=" This", meta={"text": " This"})),
+                call(StreamingChunk(content=" is", meta={"text": " is"})),
+                call(StreamingChunk(content=" a", meta={"text": " a"})),
+                call(StreamingChunk(content=" single", meta={"text": " single"})),
+                call(StreamingChunk(content=" response.", meta={"text": " response."})),
+                call(StreamingChunk(content="", meta={"finish_reason": "MAX_TOKENS", "is_finished": True})),
             ]
         )
 
     def test_get_stream_responses_empty(self) -> None:
         stream_mock = MagicMock()
-        stream_handler_mock = MagicMock()
+        streaming_callback_mock = MagicMock()
 
         stream_mock.__iter__.return_value = []
 
-        stream_handler_mock.side_effect = lambda token_received, **kwargs: token_received
-
         adapter = CohereCommandAdapter(model_kwargs={}, max_length=99)
         expected_responses = [""]
-        assert adapter.get_stream_responses(stream_mock, stream_handler_mock) == expected_responses
+        assert adapter.get_stream_responses(stream_mock, streaming_callback_mock) == expected_responses
 
-        stream_handler_mock.assert_not_called()
+        streaming_callback_mock.assert_not_called()
 
 
 class TestCohereCommandRAdapter:
@@ -1025,11 +1004,11 @@ def test_extract_completions_from_response(self) -> None:
         completions = adapter._extract_completions_from_response(response_body=response_body)
         assert completions == ["response"]
 
-    def test_extract_token_from_stream(self) -> None:
+    def test_build_chunk(self) -> None:
         adapter = CohereCommandRAdapter(model_kwargs={}, max_length=100)
         chunk = {"text": "response_token"}
-        token = adapter._extract_token_from_stream(chunk=chunk)
-        assert token == "response_token"
+        streaming_chunk = adapter._build_streaming_chunk(chunk=chunk)
+        assert streaming_chunk == StreamingChunk(content="response_token", meta=chunk)
 
 
 class TestAI21LabsJurassic2Adapter:
@@ -1288,7 +1267,7 @@ def test_get_responses_multiple_responses(self) -> None:
 
     def test_get_stream_responses(self) -> None:
         stream_mock = MagicMock()
-        stream_handler_mock = MagicMock()
+        streaming_callback_mock = MagicMock()
 
         stream_mock.__iter__.return_value = [
             {"chunk": {"bytes": b'{"outputText": " This"}'}},
@@ -1298,35 +1277,31 @@ def test_get_stream_responses(self) -> None:
             {"chunk": {"bytes": b'{"outputText": " response."}'}},
         ]
 
-        stream_handler_mock.side_effect = lambda token_received, **kwargs: token_received
-
         adapter = AmazonTitanAdapter(model_kwargs={}, max_length=99)
         expected_responses = ["This is a single response."]
-        assert adapter.get_stream_responses(stream_mock, stream_handler_mock) == expected_responses
+        assert adapter.get_stream_responses(stream_mock, streaming_callback_mock) == expected_responses
 
-        stream_handler_mock.assert_has_calls(
+        streaming_callback_mock.assert_has_calls(
             [
-                call(" This", event_data={"outputText": " This"}),
-                call(" is", event_data={"outputText": " is"}),
-                call(" a", event_data={"outputText": " a"}),
-                call(" single", event_data={"outputText": " single"}),
-                call(" response.", event_data={"outputText": " response."}),
+                call(StreamingChunk(content=" This", meta={"outputText": " This"})),
+                call(StreamingChunk(content=" is", meta={"outputText": " is"})),
+                call(StreamingChunk(content=" a", meta={"outputText": " a"})),
+                call(StreamingChunk(content=" single", meta={"outputText": " single"})),
+                call(StreamingChunk(content=" response.", meta={"outputText": " response."})),
             ]
         )
 
     def test_get_stream_responses_empty(self) -> None:
         stream_mock = MagicMock()
-        stream_handler_mock = MagicMock()
+        streaming_callback_mock = MagicMock()
 
         stream_mock.__iter__.return_value = []
 
-        stream_handler_mock.side_effect = lambda token_received, **kwargs: token_received
-
         adapter = AmazonTitanAdapter(model_kwargs={}, max_length=99)
         expected_responses = [""]
-        assert adapter.get_stream_responses(stream_mock, stream_handler_mock) == expected_responses
+        assert adapter.get_stream_responses(stream_mock, streaming_callback_mock) == expected_responses
 
-        stream_handler_mock.assert_not_called()
+        streaming_callback_mock.assert_not_called()
 
 
 class TestMetaLlamaAdapter:
@@ -1417,7 +1392,7 @@ def test_get_responses_leading_whitespace(self) -> None:
 
     def test_get_stream_responses(self) -> None:
         stream_mock = MagicMock()
-        stream_handler_mock = MagicMock()
+        streaming_callback_mock = MagicMock()
 
         stream_mock.__iter__.return_value = [
             {"chunk": {"bytes": b'{"generation": " This"}'}},
@@ -1427,32 +1402,28 @@ def test_get_stream_responses(self) -> None:
             {"chunk": {"bytes": b'{"generation": " response."}'}},
         ]
 
-        stream_handler_mock.side_effect = lambda token_received, **kwargs: token_received
-
         adapter = MetaLlamaAdapter(model_kwargs={}, max_length=99)
         expected_responses = ["This is a single response."]
-        assert adapter.get_stream_responses(stream_mock, stream_handler_mock) == expected_responses
+        assert adapter.get_stream_responses(stream_mock, streaming_callback_mock) == expected_responses
 
-        stream_handler_mock.assert_has_calls(
+        streaming_callback_mock.assert_has_calls(
             [
-                call(" This", event_data={"generation": " This"}),
-                call(" is", event_data={"generation": " is"}),
-                call(" a", event_data={"generation": " a"}),
-                call(" single", event_data={"generation": " single"}),
-                call(" response.", event_data={"generation": " response."}),
+                call(StreamingChunk(content=" This", meta={"generation": " This"})),
+                call(StreamingChunk(content=" is", meta={"generation": " is"})),
+                call(StreamingChunk(content=" a", meta={"generation": " a"})),
+                call(StreamingChunk(content=" single", meta={"generation": " single"})),
+                call(StreamingChunk(content=" response.", meta={"generation": " response."})),
             ]
         )
 
     def test_get_stream_responses_empty(self) -> None:
         stream_mock = MagicMock()
-        stream_handler_mock = MagicMock()
+        streaming_callback_mock = MagicMock()
 
         stream_mock.__iter__.return_value = []
 
-        stream_handler_mock.side_effect = lambda token_received, **kwargs: token_received
-
         adapter = MetaLlamaAdapter(model_kwargs={}, max_length=99)
         expected_responses = [""]
-        assert adapter.get_stream_responses(stream_mock, stream_handler_mock) == expected_responses
+        assert adapter.get_stream_responses(stream_mock, streaming_callback_mock) == expected_responses
 
-        stream_handler_mock.assert_not_called()
+        streaming_callback_mock.assert_not_called()

From 93d2c6824207f0e29928fc82516b2327ff0d54d2 Mon Sep 17 00:00:00 2001
From: HaystackBot <accounts@deepset.ai>
Date: Mon, 12 Aug 2024 13:41:07 +0000
Subject: [PATCH 3/6] Update the changelog

---
 integrations/amazon_bedrock/CHANGELOG.md | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/integrations/amazon_bedrock/CHANGELOG.md b/integrations/amazon_bedrock/CHANGELOG.md
index f1e8b69d0..33a1598a8 100644
--- a/integrations/amazon_bedrock/CHANGELOG.md
+++ b/integrations/amazon_bedrock/CHANGELOG.md
@@ -1,5 +1,16 @@
 # Changelog
 
+## [integrations/amazon_bedrock-v0.10.0] - 2024-08-12
+
+### 🐛 Bug Fixes
+
+- Support streaming_callback param in amazon bedrock generators (#927)
+
+### Docs
+
+- Update AmazonBedrockChatGenerator docstrings (#949)
+- Update AmazonBedrockGenerator docstrings (#956)
+
 ## [integrations/amazon_bedrock-v0.9.3] - 2024-07-17
 
 ### 🚀 Features

From a8b2de9d86621aba2243e2d6e350e082c087700d Mon Sep 17 00:00:00 2001
From: Stefano Fiorucci <stefanofiorucci@gmail.com>
Date: Mon, 12 Aug 2024 17:19:02 +0200
Subject: [PATCH 4/6] test: do not retry tests in `hatch run test` command
 (#954)

* do not retry tests in hatch run test command

* fix

* hatch config improvements
---
 .github/workflows/amazon_bedrock.yml             | 6 +++---
 .github/workflows/amazon_sagemaker.yml           | 4 ++--
 .github/workflows/anthropic.yml                  | 4 ++--
 .github/workflows/astra.yml                      | 4 ++--
 .github/workflows/chroma.yml                     | 4 ++--
 .github/workflows/cohere.yml                     | 4 ++--
 .github/workflows/deepeval.yml                   | 4 ++--
 .github/workflows/elasticsearch.yml              | 4 ++--
 .github/workflows/fastembed.yml                  | 4 ++--
 .github/workflows/google_ai.yml                  | 4 ++--
 .github/workflows/google_vertex.yml              | 4 ++--
 .github/workflows/instructor_embedders.yml       | 4 ++--
 .github/workflows/jina.yml                       | 4 ++--
 .github/workflows/langfuse.yml                   | 4 ++--
 .github/workflows/llama_cpp.yml                  | 4 ++--
 .github/workflows/mistral.yml                    | 4 ++--
 .github/workflows/mongodb_atlas.yml              | 4 ++--
 .github/workflows/nvidia.yml                     | 4 ++--
 .github/workflows/ollama.yml                     | 4 ++--
 .github/workflows/opensearch.yml                 | 4 ++--
 .github/workflows/optimum.yml                    | 4 ++--
 .github/workflows/pgvector.yml                   | 4 ++--
 .github/workflows/pinecone.yml                   | 4 ++--
 .github/workflows/qdrant.yml                     | 4 ++--
 .github/workflows/ragas.yml                      | 4 ++--
 .github/workflows/unstructured.yml               | 4 ++--
 .github/workflows/weaviate.yml                   | 4 ++--
 integrations/amazon_bedrock/pyproject.toml       | 6 ++++--
 integrations/amazon_sagemaker/pyproject.toml     | 6 ++++--
 integrations/anthropic/pyproject.toml            | 6 ++++--
 integrations/astra/pyproject.toml                | 6 ++++--
 integrations/chroma/pyproject.toml               | 6 ++++--
 integrations/cohere/pyproject.toml               | 6 ++++--
 integrations/deepeval/pyproject.toml             | 6 ++++--
 integrations/elasticsearch/pyproject.toml        | 6 ++++--
 integrations/fastembed/pyproject.toml            | 6 ++++--
 integrations/google_ai/pyproject.toml            | 6 ++++--
 integrations/google_vertex/pyproject.toml        | 6 ++++--
 integrations/instructor_embedders/pyproject.toml | 6 ++++--
 integrations/jina/pyproject.toml                 | 6 ++++--
 integrations/langfuse/pyproject.toml             | 6 ++++--
 integrations/llama_cpp/pyproject.toml            | 6 ++++--
 integrations/mistral/pyproject.toml              | 6 ++++--
 integrations/mongodb_atlas/pyproject.toml        | 6 ++++--
 integrations/nvidia/pyproject.toml               | 6 ++++--
 integrations/ollama/pyproject.toml               | 6 ++++--
 integrations/opensearch/pyproject.toml           | 6 ++++--
 integrations/optimum/pyproject.toml              | 6 ++++--
 integrations/pgvector/pyproject.toml             | 6 ++++--
 integrations/pinecone/pyproject.toml             | 6 ++++--
 integrations/qdrant/pyproject.toml               | 6 ++++--
 integrations/ragas/pyproject.toml                | 6 ++++--
 integrations/unstructured/pyproject.toml         | 6 ++++--
 integrations/weaviate/pyproject.toml             | 6 ++++--
 54 files changed, 163 insertions(+), 109 deletions(-)

diff --git a/.github/workflows/amazon_bedrock.yml b/.github/workflows/amazon_bedrock.yml
index eff39e2af..2057d4bdf 100644
--- a/.github/workflows/amazon_bedrock.yml
+++ b/.github/workflows/amazon_bedrock.yml
@@ -63,7 +63,7 @@ jobs:
         run: hatch run docs
 
       - name: Run unit tests
-        run: hatch run cov -m "not integration"
+        run: hatch run cov-retry -m "not integration"
 
       # Do not authenticate on pull requests from forks
       - name: AWS authentication
@@ -76,13 +76,13 @@ jobs:
       # Do not run integration tests on pull requests from forks
       - name: Run integration tests
         if: github.event.pull_request.head.repo.full_name == github.repository
-        run: hatch run cov -m "integration"
+        run: hatch run cov-retry -m "integration"
 
       - name: Nightly - run unit tests with Haystack main branch
         if: github.event_name == 'schedule'
         run: |
           hatch run pip install git+https://github.com/deepset-ai/haystack.git
-          hatch run test -m "not integration"
+          hatch run cov-retry -m "not integration"
 
       - name: Send event to Datadog for nightly failures
         if: failure() && github.event_name == 'schedule'
diff --git a/.github/workflows/amazon_sagemaker.yml b/.github/workflows/amazon_sagemaker.yml
index 8ebbf6c65..ed0a571e6 100644
--- a/.github/workflows/amazon_sagemaker.yml
+++ b/.github/workflows/amazon_sagemaker.yml
@@ -57,13 +57,13 @@ jobs:
         run: hatch run docs
 
       - name: Run tests
-        run: hatch run cov
+        run: hatch run cov-retry
 
       - name: Nightly - run unit tests with Haystack main branch
         if: github.event_name == 'schedule'
         run: |
           hatch run pip install git+https://github.com/deepset-ai/haystack.git
-          hatch run test -m "not integration"
+          hatch run cov-retry -m "not integration"
 
       - name: Send event to Datadog for nightly failures
         if: failure() && github.event_name == 'schedule'
diff --git a/.github/workflows/anthropic.yml b/.github/workflows/anthropic.yml
index 858b7be35..c4cdeb2d1 100644
--- a/.github/workflows/anthropic.yml
+++ b/.github/workflows/anthropic.yml
@@ -54,13 +54,13 @@ jobs:
         run: hatch run lint:all
 
       - name: Run tests
-        run: hatch run cov
+        run: hatch run cov-retry
 
       - name: Nightly - run unit tests with Haystack main branch
         if: github.event_name == 'schedule'
         run: |
           hatch run pip install git+https://github.com/deepset-ai/haystack.git
-          hatch run test -m "not integration"
+          hatch run cov-retry -m "not integration"
 
       - name: Send event to Datadog for nightly failures
         if: failure() && github.event_name == 'schedule'
diff --git a/.github/workflows/astra.yml b/.github/workflows/astra.yml
index a264fdfd5..dcfc00c75 100644
--- a/.github/workflows/astra.yml
+++ b/.github/workflows/astra.yml
@@ -61,13 +61,13 @@ jobs:
         env:
           ASTRA_DB_API_ENDPOINT: ${{ secrets.ASTRA_DB_API_ENDPOINT }}
           ASTRA_DB_APPLICATION_TOKEN: ${{ secrets.ASTRA_DB_APPLICATION_TOKEN }}
-        run: hatch run cov
+        run: hatch run cov-retry
 
       - name: Nightly - run unit tests with Haystack main branch
         if: github.event_name == 'schedule'
         run: |
           hatch run pip install git+https://github.com/deepset-ai/haystack.git
-          hatch run test -m "not integration"
+          hatch run cov-retry -m "not integration"
 
       - name: Send event to Datadog for nightly failures
         if: failure() && github.event_name == 'schedule'
diff --git a/.github/workflows/chroma.yml b/.github/workflows/chroma.yml
index 496ddaa03..26b6287bd 100644
--- a/.github/workflows/chroma.yml
+++ b/.github/workflows/chroma.yml
@@ -57,13 +57,13 @@ jobs:
         run: hatch run docs
 
       - name: Run tests
-        run: hatch run cov
+        run: hatch run cov-retry
 
       - name: Nightly - run unit tests with Haystack main branch
         if: github.event_name == 'schedule'
         run: |
           hatch run pip install git+https://github.com/deepset-ai/haystack.git
-          hatch run test -m "not integration"
+          hatch run cov-retry -m "not integration"
 
       - name: Send event to Datadog for nightly failures
         if: failure() && github.event_name == 'schedule'
diff --git a/.github/workflows/cohere.yml b/.github/workflows/cohere.yml
index 05b84af4b..00a8ee2ed 100644
--- a/.github/workflows/cohere.yml
+++ b/.github/workflows/cohere.yml
@@ -58,13 +58,13 @@ jobs:
         run: hatch run docs
 
       - name: Run tests
-        run: hatch run cov
+        run: hatch run cov-retry
 
       - name: Nightly - run unit tests with Haystack main branch
         if: github.event_name == 'schedule'
         run: |
           hatch run pip install git+https://github.com/deepset-ai/haystack.git
-          hatch run test -m "not integration"
+          hatch run cov-retry -m "not integration"
 
       - name: Send event to Datadog for nightly failures
         if: failure() && github.event_name == 'schedule'
diff --git a/.github/workflows/deepeval.yml b/.github/workflows/deepeval.yml
index 0048528bc..23de1a3f4 100644
--- a/.github/workflows/deepeval.yml
+++ b/.github/workflows/deepeval.yml
@@ -58,13 +58,13 @@ jobs:
         run: hatch run docs
 
       - name: Run tests
-        run: hatch run cov
+        run: hatch run cov-retry
 
       - name: Nightly - run unit tests with Haystack main branch
         if: github.event_name == 'schedule'
         run: |
           hatch run pip install git+https://github.com/deepset-ai/haystack.git
-          hatch run test -m "not integration"
+          hatch run cov-retry -m "not integration"
 
       - name: Send event to Datadog for nightly failures
         if: failure() && github.event_name == 'schedule'
diff --git a/.github/workflows/elasticsearch.yml b/.github/workflows/elasticsearch.yml
index 4cf34b301..476e832b5 100644
--- a/.github/workflows/elasticsearch.yml
+++ b/.github/workflows/elasticsearch.yml
@@ -55,13 +55,13 @@ jobs:
         run: hatch run docs
 
       - name: Run tests
-        run: hatch run cov
+        run: hatch run cov-retry
 
       - name: Nightly - run unit tests with Haystack main branch
         if: github.event_name == 'schedule'
         run: |
           hatch run pip install git+https://github.com/deepset-ai/haystack.git
-          hatch run test -m "not integration"
+          hatch run cov-retry -m "not integration"
 
       - name: Send event to Datadog for nightly failures
         if: failure() && github.event_name == 'schedule'
diff --git a/.github/workflows/fastembed.yml b/.github/workflows/fastembed.yml
index 32b276466..e389bf3a4 100644
--- a/.github/workflows/fastembed.yml
+++ b/.github/workflows/fastembed.yml
@@ -42,13 +42,13 @@ jobs:
         run: hatch run docs
 
       - name: Run tests
-        run: hatch run cov
+        run: hatch run cov-retry
 
       - name: Nightly - run unit tests with Haystack main branch
         if: github.event_name == 'schedule'
         run: |
           hatch run pip install git+https://github.com/deepset-ai/haystack.git
-          hatch run test -m "not integration"
+          hatch run cov-retry -m "not integration"
 
       - name: Send event to Datadog for nightly failures
         if: failure() && github.event_name == 'schedule'
diff --git a/.github/workflows/google_ai.yml b/.github/workflows/google_ai.yml
index 046b92463..1b4b2e496 100644
--- a/.github/workflows/google_ai.yml
+++ b/.github/workflows/google_ai.yml
@@ -58,13 +58,13 @@ jobs:
         run: hatch run docs
 
       - name: Run tests
-        run: hatch run cov
+        run: hatch run cov-retry
 
       - name: Nightly - run unit tests with Haystack main branch
         if: github.event_name == 'schedule'
         run: |
           hatch run pip install git+https://github.com/deepset-ai/haystack.git
-          hatch run test -m "not integration"
+          hatch run cov-retry -m "not integration"
 
       - name: Send event to Datadog for nightly failures
         if: failure() && github.event_name == 'schedule'
diff --git a/.github/workflows/google_vertex.yml b/.github/workflows/google_vertex.yml
index b4006924c..78ba5694b 100644
--- a/.github/workflows/google_vertex.yml
+++ b/.github/workflows/google_vertex.yml
@@ -57,13 +57,13 @@ jobs:
         run: hatch run docs
 
       - name: Run tests
-        run: hatch run cov
+        run: hatch run cov-retry
 
       - name: Nightly - run unit tests with Haystack main branch
         if: github.event_name == 'schedule'
         run: |
           hatch run pip install git+https://github.com/deepset-ai/haystack.git
-          hatch run test -m "not integration"
+          hatch run cov-retry -m "not integration"
 
       - name: Send event to Datadog for nightly failures
         if: failure() && github.event_name == 'schedule'
diff --git a/.github/workflows/instructor_embedders.yml b/.github/workflows/instructor_embedders.yml
index 9d84066a7..f12f4d696 100644
--- a/.github/workflows/instructor_embedders.yml
+++ b/.github/workflows/instructor_embedders.yml
@@ -35,13 +35,13 @@ jobs:
         run: hatch run docs
 
       - name: Run tests
-        run: hatch run cov
+        run: hatch run cov-retry
 
       - name: Nightly - run unit tests with Haystack main branch
         if: github.event_name == 'schedule'
         run: |
           hatch run pip install git+https://github.com/deepset-ai/haystack.git
-          hatch run test -m "not integration"
+          hatch run cov-retry -m "not integration"
 
       - name: Send event to Datadog for nightly failures
         if: failure() && github.event_name == 'schedule'
diff --git a/.github/workflows/jina.yml b/.github/workflows/jina.yml
index cd0a55ad4..00af6eb45 100644
--- a/.github/workflows/jina.yml
+++ b/.github/workflows/jina.yml
@@ -57,13 +57,13 @@ jobs:
         run: hatch run docs
 
       - name: Run tests
-        run: hatch run cov
+        run: hatch run cov-retry
 
       - name: Nightly - run unit tests with Haystack main branch
         if: github.event_name == 'schedule'
         run: |
           hatch run pip install git+https://github.com/deepset-ai/haystack.git
-          hatch run test -m "not integration"
+          hatch run cov-retry -m "not integration"
 
       - name: Send event to Datadog for nightly failures
         if: failure() && github.event_name == 'schedule'
diff --git a/.github/workflows/langfuse.yml b/.github/workflows/langfuse.yml
index edbec4840..8a10cf241 100644
--- a/.github/workflows/langfuse.yml
+++ b/.github/workflows/langfuse.yml
@@ -60,13 +60,13 @@ jobs:
         run: hatch run docs
 
       - name: Run tests
-        run: hatch run cov
+        run: hatch run cov-retry
 
       - name: Nightly - run unit tests with Haystack main branch
         if: github.event_name == 'schedule'
         run: |
           hatch run pip install git+https://github.com/deepset-ai/haystack.git
-          hatch run test -m "not integration"
+          hatch run cov-retry -m "not integration"
 
       - name: Send event to Datadog for nightly failures
         if: failure() && github.event_name == 'schedule'
diff --git a/.github/workflows/llama_cpp.yml b/.github/workflows/llama_cpp.yml
index ec86ab268..a9480ca96 100644
--- a/.github/workflows/llama_cpp.yml
+++ b/.github/workflows/llama_cpp.yml
@@ -57,13 +57,13 @@ jobs:
         run: hatch run docs
 
       - name: Run tests
-        run: hatch run cov
+        run: hatch run cov-retry
 
       - name: Nightly - run unit tests with Haystack main branch
         if: github.event_name == 'schedule'
         run: |
           hatch run pip install git+https://github.com/deepset-ai/haystack.git
-          hatch run test -m "not integration"
+          hatch run cov-retry -m "not integration"
 
       - name: Send event to Datadog for nightly failures
         if: failure() && github.event_name == 'schedule'
diff --git a/.github/workflows/mistral.yml b/.github/workflows/mistral.yml
index efc8ed065..e62008906 100644
--- a/.github/workflows/mistral.yml
+++ b/.github/workflows/mistral.yml
@@ -58,13 +58,13 @@ jobs:
         run: hatch run docs
 
       - name: Run tests
-        run: hatch run cov
+        run: hatch run cov-retry
 
       - name: Nightly - run unit tests with Haystack main branch
         if: github.event_name == 'schedule'
         run: |
           hatch run pip install git+https://github.com/deepset-ai/haystack.git
-          hatch run test -m "not integration"
+          hatch run cov-retry -m "not integration"
 
       - name: Send event to Datadog for nightly failures
         if: failure() && github.event_name == 'schedule'
diff --git a/.github/workflows/mongodb_atlas.yml b/.github/workflows/mongodb_atlas.yml
index 9c4f17ba0..3d1ad5101 100644
--- a/.github/workflows/mongodb_atlas.yml
+++ b/.github/workflows/mongodb_atlas.yml
@@ -54,13 +54,13 @@ jobs:
         run: hatch run docs
 
       - name: Run tests
-        run: hatch run cov
+        run: hatch run cov-retry
 
       - name: Nightly - run unit tests with Haystack main branch
         if: github.event_name == 'schedule'
         run: |
           hatch run pip install git+https://github.com/deepset-ai/haystack.git
-          hatch run test -m "not integration"
+          hatch run cov-retry -m "not integration"
 
       - name: Send event to Datadog for nightly failures
         if: failure() && github.event_name == 'schedule'
diff --git a/.github/workflows/nvidia.yml b/.github/workflows/nvidia.yml
index 1ae3a0f1a..0d39a4d91 100644
--- a/.github/workflows/nvidia.yml
+++ b/.github/workflows/nvidia.yml
@@ -55,7 +55,7 @@ jobs:
         run: hatch run lint:all
 
       - name: Run tests
-        run: hatch run cov
+        run: hatch run cov-retry
 
       - name: Generate docs
         if: matrix.python-version == '3.9' && runner.os == 'Linux'
@@ -65,7 +65,7 @@ jobs:
         if: github.event_name == 'schedule'
         run: |
           hatch run pip install git+https://github.com/deepset-ai/haystack.git
-          hatch run test -m "not integration"
+          hatch run cov-retry -m "not integration"
 
       - name: Send event to Datadog for nightly failures
         if: failure() && github.event_name == 'schedule'
diff --git a/.github/workflows/ollama.yml b/.github/workflows/ollama.yml
index 7a6465e16..43af485b7 100644
--- a/.github/workflows/ollama.yml
+++ b/.github/workflows/ollama.yml
@@ -75,13 +75,13 @@ jobs:
         run: hatch run docs
 
       - name: Run tests
-        run: hatch run cov
+        run: hatch run cov-retry
 
       - name: Nightly - run unit tests with Haystack main branch
         if: github.event_name == 'schedule'
         run: |
           hatch run pip install git+https://github.com/deepset-ai/haystack.git
-          hatch run test -m "not integration"
+          hatch run cov-retry -m "not integration"
 
       - name: Send event to Datadog for nightly failures
         if: failure() && github.event_name == 'schedule'
diff --git a/.github/workflows/opensearch.yml b/.github/workflows/opensearch.yml
index 46ce2e6a5..48169a75f 100644
--- a/.github/workflows/opensearch.yml
+++ b/.github/workflows/opensearch.yml
@@ -55,13 +55,13 @@ jobs:
         run: hatch run docs
 
       - name: Run tests
-        run: hatch run cov
+        run: hatch run cov-retry
 
       - name: Nightly - run unit tests with Haystack main branch
         if: github.event_name == 'schedule'
         run: |
           hatch run pip install git+https://github.com/deepset-ai/haystack.git
-          hatch run test -m "not integration"
+          hatch run cov-retry -m "not integration"
 
       - name: Send event to Datadog for nightly failures
         if: failure() && github.event_name == 'schedule'
diff --git a/.github/workflows/optimum.yml b/.github/workflows/optimum.yml
index fe0421d59..c33baa7f8 100644
--- a/.github/workflows/optimum.yml
+++ b/.github/workflows/optimum.yml
@@ -57,13 +57,13 @@ jobs:
         run: hatch run docs
 
       - name: Run tests
-        run: hatch run cov
+        run: hatch run cov-retry
 
       - name: Nightly - run unit tests with Haystack main branch
         if: github.event_name == 'schedule'
         run: |
           hatch run pip install git+https://github.com/deepset-ai/haystack.git
-          hatch run test -m "not integration"
+          hatch run cov-retry -m "not integration"
 
       - name: Send event to Datadog for nightly failures
         if: failure() && github.event_name == 'schedule'
diff --git a/.github/workflows/pgvector.yml b/.github/workflows/pgvector.yml
index 59422298a..0fe20e037 100644
--- a/.github/workflows/pgvector.yml
+++ b/.github/workflows/pgvector.yml
@@ -61,13 +61,13 @@ jobs:
         run: hatch run docs
 
       - name: Run tests
-        run: hatch run cov
+        run: hatch run cov-retry
 
       - name: Nightly - run unit tests with Haystack main branch
         if: github.event_name == 'schedule'
         run: |
           hatch run pip install git+https://github.com/deepset-ai/haystack.git
-          hatch run test -m "not integration"
+          hatch run cov-retry -m "not integration"
 
       - name: Send event to Datadog for nightly failures
         if: failure() && github.event_name == 'schedule'
diff --git a/.github/workflows/pinecone.yml b/.github/workflows/pinecone.yml
index 8f963854d..9e143005b 100644
--- a/.github/workflows/pinecone.yml
+++ b/.github/workflows/pinecone.yml
@@ -63,13 +63,13 @@ jobs:
       - name: Run tests
         env:
           INDEX_NAME: ${{ matrix.INDEX_NAME }}
-        run: hatch run cov
+        run: hatch run cov-retry
 
       - name: Nightly - run unit tests with Haystack main branch
         if: github.event_name == 'schedule'
         run: |
           hatch run pip install git+https://github.com/deepset-ai/haystack.git
-          hatch run test -m "not integration"
+          hatch run cov-retry -m "not integration"
 
       - name: Send event to Datadog for nightly failures
         if: failure() && github.event_name == 'schedule'
diff --git a/.github/workflows/qdrant.yml b/.github/workflows/qdrant.yml
index 5e17c16cb..116225b2d 100644
--- a/.github/workflows/qdrant.yml
+++ b/.github/workflows/qdrant.yml
@@ -57,13 +57,13 @@ jobs:
         run: hatch run docs
 
       - name: Run tests
-        run: hatch run cov
+        run: hatch run cov-retry
 
       - name: Nightly - run unit tests with Haystack main branch
         if: github.event_name == 'schedule'
         run: |
           hatch run pip install git+https://github.com/deepset-ai/haystack.git
-          hatch run test -m "not integration"
+          hatch run cov-retry -m "not integration"
 
       - name: Send event to Datadog for nightly failures
         if: failure() && github.event_name == 'schedule'
diff --git a/.github/workflows/ragas.yml b/.github/workflows/ragas.yml
index de53abd94..c4757e704 100644
--- a/.github/workflows/ragas.yml
+++ b/.github/workflows/ragas.yml
@@ -58,13 +58,13 @@ jobs:
         run: hatch run docs
 
       - name: Run tests
-        run: hatch run cov
+        run: hatch run cov-retry
 
       - name: Nightly - run unit tests with Haystack main branch
         if: github.event_name == 'schedule'
         run: |
           hatch run pip install git+https://github.com/deepset-ai/haystack.git
-          hatch run test -m "not integration"
+          hatch run cov-retry -m "not integration"
 
       - name: Send event to Datadog for nightly failures
         if: failure() && github.event_name == 'schedule'
diff --git a/.github/workflows/unstructured.yml b/.github/workflows/unstructured.yml
index c1eb0b4ea..e4b640275 100644
--- a/.github/workflows/unstructured.yml
+++ b/.github/workflows/unstructured.yml
@@ -69,13 +69,13 @@ jobs:
         run: hatch run docs
 
       - name: Run tests
-        run: hatch run cov
+        run: hatch run cov-retry
 
       - name: Nightly - run unit tests with Haystack main branch
         if: github.event_name == 'schedule'
         run: |
           hatch run pip install git+https://github.com/deepset-ai/haystack.git
-          hatch run test -m "not integration"
+          hatch run cov-retry -m "not integration"
 
       - name: Send event to Datadog for nightly failures
         if: failure() && github.event_name == 'schedule'
diff --git a/.github/workflows/weaviate.yml b/.github/workflows/weaviate.yml
index 8e1bbbc4f..5e29eafe7 100644
--- a/.github/workflows/weaviate.yml
+++ b/.github/workflows/weaviate.yml
@@ -54,13 +54,13 @@ jobs:
         run: hatch run docs
 
       - name: Run tests
-        run: hatch run cov
+        run: hatch run cov-retry
 
       - name: Nightly - run unit tests with Haystack main branch
         if: github.event_name == 'schedule'
         run: |
           hatch run pip install git+https://github.com/deepset-ai/haystack.git
-          hatch run test -m "not integration"
+          hatch run cov-retry -m "not integration"
 
       - name: Send event to Datadog for nightly failures
         if: failure() && github.event_name == 'schedule'
diff --git a/integrations/amazon_bedrock/pyproject.toml b/integrations/amazon_bedrock/pyproject.toml
index 2a4f88960..f4a410dbd 100644
--- a/integrations/amazon_bedrock/pyproject.toml
+++ b/integrations/amazon_bedrock/pyproject.toml
@@ -49,10 +49,12 @@ dependencies = [
   "haystack-pydoc-tools",
 ]
 [tool.hatch.envs.default.scripts]
-test = "pytest --reruns 3 --reruns-delay 30 -x {args:tests}"
-test-cov = "coverage run -m pytest --reruns 3 --reruns-delay 30 -x {args:tests}"
+test = "pytest {args:tests}"
+test-cov = "coverage run -m pytest {args:tests}"
+test-cov-retry = "test-cov --reruns 3 --reruns-delay 30 -x"
 cov-report = ["- coverage combine", "coverage report"]
 cov = ["test-cov", "cov-report"]
+cov-retry = ["test-cov-retry", "cov-report"]
 docs = ["pydoc-markdown pydoc/config.yml"]
 [[tool.hatch.envs.all.matrix]]
 python = ["3.8", "3.9", "3.10", "3.11", "3.12"]
diff --git a/integrations/amazon_sagemaker/pyproject.toml b/integrations/amazon_sagemaker/pyproject.toml
index 40a2a1b16..f8050bb48 100644
--- a/integrations/amazon_sagemaker/pyproject.toml
+++ b/integrations/amazon_sagemaker/pyproject.toml
@@ -52,10 +52,12 @@ dependencies = [
   "haystack-pydoc-tools",
 ]
 [tool.hatch.envs.default.scripts]
-test = "pytest --reruns 3 --reruns-delay 30 -x {args:tests}"
-test-cov = "coverage run -m pytest --reruns 3 --reruns-delay 30 -x {args:tests}"
+test = "pytest {args:tests}"
+test-cov = "coverage run -m pytest {args:tests}"
+test-cov-retry = "test-cov --reruns 3 --reruns-delay 30 -x"
 cov-report = ["- coverage combine", "coverage report"]
 cov = ["test-cov", "cov-report"]
+cov-retry = ["test-cov-retry", "cov-report"]
 
 docs = ["pydoc-markdown pydoc/config.yml"]
 
diff --git a/integrations/anthropic/pyproject.toml b/integrations/anthropic/pyproject.toml
index e5cfdcbce..3f8c9812b 100644
--- a/integrations/anthropic/pyproject.toml
+++ b/integrations/anthropic/pyproject.toml
@@ -49,10 +49,12 @@ dependencies = [
   "haystack-pydoc-tools",
 ]
 [tool.hatch.envs.default.scripts]
-test = "pytest --reruns 3 --reruns-delay 30 -x {args:tests}"
-test-cov = "coverage run -m pytest --reruns 3 --reruns-delay 30 -x {args:tests}"
+test = "pytest {args:tests}"
+test-cov = "coverage run -m pytest {args:tests}"
+test-cov-retry = "test-cov --reruns 3 --reruns-delay 30 -x"
 cov-report = ["- coverage combine", "coverage report"]
 cov = ["test-cov", "cov-report"]
+cov-retry = ["test-cov-retry", "cov-report"]
 docs = ["pydoc-markdown pydoc/config.yml"]
 [[tool.hatch.envs.all.matrix]]
 python = ["3.8", "3.9", "3.10", "3.11", "3.12"]
diff --git a/integrations/astra/pyproject.toml b/integrations/astra/pyproject.toml
index 9bbf8e07b..7d543ddc9 100644
--- a/integrations/astra/pyproject.toml
+++ b/integrations/astra/pyproject.toml
@@ -48,10 +48,12 @@ dependencies = [
   "haystack-pydoc-tools",
 ]
 [tool.hatch.envs.default.scripts]
-test = "pytest --reruns 3 --reruns-delay 30 -x {args:tests}"
-test-cov = "coverage run -m pytest --reruns 3 --reruns-delay 30 -x {args:tests}"
+test = "pytest {args:tests}"
+test-cov = "coverage run -m pytest {args:tests}"
+test-cov-retry = "test-cov --reruns 3 --reruns-delay 30 -x"
 cov-report = ["- coverage combine", "coverage report"]
 cov = ["test-cov", "cov-report"]
+cov-retry = ["test-cov-retry", "cov-report"]
 docs = ["pydoc-markdown pydoc/config.yml"]
 [[tool.hatch.envs.all.matrix]]
 python = ["3.8", "3.9", "3.10", "3.11"]
diff --git a/integrations/chroma/pyproject.toml b/integrations/chroma/pyproject.toml
index 12a4a4b2b..b4591e041 100644
--- a/integrations/chroma/pyproject.toml
+++ b/integrations/chroma/pyproject.toml
@@ -49,10 +49,12 @@ dependencies = [
   "databind-core<4.5.0",  # FIXME: the latest 4.5.0 causes loops in pip resolver
 ]
 [tool.hatch.envs.default.scripts]
-test = "pytest --reruns 3 --reruns-delay 30 -x {args:tests}"
-test-cov = "coverage run -m pytest --reruns 3 --reruns-delay 30 -x {args:tests}"
+test = "pytest {args:tests}"
+test-cov = "coverage run -m pytest {args:tests}"
+test-cov-retry = "test-cov --reruns 3 --reruns-delay 30 -x"
 cov-report = ["- coverage combine", "coverage report"]
 cov = ["test-cov", "cov-report"]
+cov-retry = ["test-cov-retry", "cov-report"]
 docs = ["pydoc-markdown pydoc/config.yml"]
 
 [[tool.hatch.envs.all.matrix]]
diff --git a/integrations/cohere/pyproject.toml b/integrations/cohere/pyproject.toml
index 4d1ab36dd..04fe15585 100644
--- a/integrations/cohere/pyproject.toml
+++ b/integrations/cohere/pyproject.toml
@@ -43,10 +43,12 @@ git_describe_command = 'git describe --tags --match="integrations/cohere-v[0-9]*
 [tool.hatch.envs.default]
 dependencies = ["coverage[toml]>=6.5", "pytest", "pytest-rerunfailures", "haystack-pydoc-tools"]
 [tool.hatch.envs.default.scripts]
-test = "pytest --reruns 3 --reruns-delay 30 -x {args:tests}"
-test-cov = "coverage run -m pytest --reruns 3 --reruns-delay 30 -x {args:tests}"
+test = "pytest {args:tests}"
+test-cov = "coverage run -m pytest {args:tests}"
+test-cov-retry = "test-cov --reruns 3 --reruns-delay 30 -x"
 cov-report = ["- coverage combine", "coverage report"]
 cov = ["test-cov", "cov-report"]
+cov-retry = ["test-cov-retry", "cov-report"]
 docs = ["pydoc-markdown pydoc/config.yml"]
 
 [[tool.hatch.envs.all.matrix]]
diff --git a/integrations/deepeval/pyproject.toml b/integrations/deepeval/pyproject.toml
index 4e93c6c41..44d89cb11 100644
--- a/integrations/deepeval/pyproject.toml
+++ b/integrations/deepeval/pyproject.toml
@@ -43,10 +43,12 @@ git_describe_command = 'git describe --tags --match="integrations/deepeval-v[0-9
 [tool.hatch.envs.default]
 dependencies = ["coverage[toml]>=6.5", "pytest", "pytest-rerunfailures", "haystack-pydoc-tools"]
 [tool.hatch.envs.default.scripts]
-test = "pytest --reruns 3 --reruns-delay 30 -x {args:tests}"
-test-cov = "coverage run -m pytest --reruns 3 --reruns-delay 30 -x {args:tests}"
+test = "pytest {args:tests}"
+test-cov = "coverage run -m pytest {args:tests}"
+test-cov-retry = "test-cov --reruns 3 --reruns-delay 30 -x"
 cov-report = ["- coverage combine", "coverage report"]
 cov = ["test-cov", "cov-report"]
+cov-retry = ["test-cov-retry", "cov-report"]
 docs = ["pydoc-markdown pydoc/config.yml"]
 
 [[tool.hatch.envs.all.matrix]]
diff --git a/integrations/elasticsearch/pyproject.toml b/integrations/elasticsearch/pyproject.toml
index 87bf5a167..cb9281030 100644
--- a/integrations/elasticsearch/pyproject.toml
+++ b/integrations/elasticsearch/pyproject.toml
@@ -49,10 +49,12 @@ dependencies = [
   "haystack-pydoc-tools",
 ]
 [tool.hatch.envs.default.scripts]
-test = "pytest --reruns 3 --reruns-delay 30 -x {args:tests}"
-test-cov = "coverage run -m pytest --reruns 3 --reruns-delay 30 -x {args:tests}"
+test = "pytest {args:tests}"
+test-cov = "coverage run -m pytest {args:tests}"
+test-cov-retry = "test-cov --reruns 3 --reruns-delay 30 -x"
 cov-report = ["- coverage combine", "coverage report"]
 cov = ["test-cov", "cov-report"]
+cov-retry = ["test-cov-retry", "cov-report"]
 docs = ["pydoc-markdown pydoc/config.yml"]
 
 [[tool.hatch.envs.all.matrix]]
diff --git a/integrations/fastembed/pyproject.toml b/integrations/fastembed/pyproject.toml
index b0a367ee4..9afd344c9 100644
--- a/integrations/fastembed/pyproject.toml
+++ b/integrations/fastembed/pyproject.toml
@@ -50,10 +50,12 @@ dependencies = [
   "haystack-pydoc-tools",
 ]
 [tool.hatch.envs.default.scripts]
-test = "pytest --reruns 3 --reruns-delay 30 -x {args:tests}"
-test-cov = "coverage run -m pytest --reruns 3 --reruns-delay 30 -x {args:tests}"
+test = "pytest {args:tests}"
+test-cov = "coverage run -m pytest {args:tests}"
+test-cov-retry = "test-cov --reruns 3 --reruns-delay 30 -x"
 cov-report = ["- coverage combine", "coverage report"]
 cov = ["test-cov", "cov-report"]
+cov-retry = ["test-cov-retry", "cov-report"]
 docs = ["pydoc-markdown pydoc/config.yml"]
 
 [[tool.hatch.envs.all.matrix]]
diff --git a/integrations/google_ai/pyproject.toml b/integrations/google_ai/pyproject.toml
index 3a2382477..db958a487 100644
--- a/integrations/google_ai/pyproject.toml
+++ b/integrations/google_ai/pyproject.toml
@@ -48,10 +48,12 @@ dependencies = [
   "haystack-pydoc-tools",
 ]
 [tool.hatch.envs.default.scripts]
-test = "pytest --reruns 3 --reruns-delay 30 -x {args:tests}"
-test-cov = "coverage run -m pytest --reruns 3 --reruns-delay 30 -x {args:tests}"
+test = "pytest {args:tests}"
+test-cov = "coverage run -m pytest {args:tests}"
+test-cov-retry = "test-cov --reruns 3 --reruns-delay 30 -x"
 cov-report = ["- coverage combine", "coverage report"]
 cov = ["test-cov", "cov-report"]
+cov-retry = ["test-cov-retry", "cov-report"]
 docs = ["pydoc-markdown pydoc/config.yml"]
 [[tool.hatch.envs.all.matrix]]
 python = ["3.8", "3.9", "3.10", "3.11"]
diff --git a/integrations/google_vertex/pyproject.toml b/integrations/google_vertex/pyproject.toml
index 81d22c21b..747bbecbf 100644
--- a/integrations/google_vertex/pyproject.toml
+++ b/integrations/google_vertex/pyproject.toml
@@ -48,10 +48,12 @@ dependencies = [
   "haystack-pydoc-tools",
 ]
 [tool.hatch.envs.default.scripts]
-test = "pytest --reruns 3 --reruns-delay 30 -x {args:tests}"
-test-cov = "coverage run -m pytest --reruns 3 --reruns-delay 30 -x {args:tests}"
+test = "pytest {args:tests}"
+test-cov = "coverage run -m pytest {args:tests}"
+test-cov-retry = "test-cov --reruns 3 --reruns-delay 30 -x"
 cov-report = ["- coverage combine", "coverage report"]
 cov = ["test-cov", "cov-report"]
+cov-retry = ["test-cov-retry", "cov-report"]
 docs = ["pydoc-markdown pydoc/config.yml"]
 [[tool.hatch.envs.all.matrix]]
 python = ["3.8", "3.9", "3.10", "3.11"]
diff --git a/integrations/instructor_embedders/pyproject.toml b/integrations/instructor_embedders/pyproject.toml
index 47afee31f..0543a9a88 100644
--- a/integrations/instructor_embedders/pyproject.toml
+++ b/integrations/instructor_embedders/pyproject.toml
@@ -71,10 +71,12 @@ dependencies = [
   "haystack-pydoc-tools",
 ]
 [tool.hatch.envs.default.scripts]
-test = "pytest --reruns 3 --reruns-delay 30 -x {args:tests}"
-test-cov = "coverage run -m pytest --reruns 3 --reruns-delay 30 -x {args:tests}"
+test = "pytest {args:tests}"
+test-cov = "coverage run -m pytest {args:tests}"
+test-cov-retry = "test-cov --reruns 3 --reruns-delay 30 -x"
 cov-report = ["- coverage combine", "coverage report"]
 cov = ["test-cov", "cov-report"]
+cov-retry = ["test-cov-retry", "cov-report"]
 docs = ["pydoc-markdown pydoc/config.yml"]
 
 [[tool.hatch.envs.test.matrix]]
diff --git a/integrations/jina/pyproject.toml b/integrations/jina/pyproject.toml
index 494d7d1ea..fa2fd50ed 100644
--- a/integrations/jina/pyproject.toml
+++ b/integrations/jina/pyproject.toml
@@ -45,10 +45,12 @@ git_describe_command = 'git describe --tags --match="integrations/jina-v[0-9]*"'
 [tool.hatch.envs.default]
 dependencies = ["coverage[toml]>=6.5", "pytest", "pytest-rerunfailures", "haystack-pydoc-tools"]
 [tool.hatch.envs.default.scripts]
-test = "pytest --reruns 3 --reruns-delay 30 -x {args:tests}"
-test-cov = "coverage run -m pytest --reruns 3 --reruns-delay 30 -x {args:tests}"
+test = "pytest {args:tests}"
+test-cov = "coverage run -m pytest {args:tests}"
+test-cov-retry = "test-cov --reruns 3 --reruns-delay 30 -x"
 cov-report = ["- coverage combine", "coverage report"]
 cov = ["test-cov", "cov-report"]
+cov-retry = ["test-cov-retry", "cov-report"]
 docs = ["pydoc-markdown pydoc/config.yml"]
 
 
diff --git a/integrations/langfuse/pyproject.toml b/integrations/langfuse/pyproject.toml
index 41b1c1da9..cf7b85b64 100644
--- a/integrations/langfuse/pyproject.toml
+++ b/integrations/langfuse/pyproject.toml
@@ -49,10 +49,12 @@ dependencies = [
   "haystack-pydoc-tools",
 ]
 [tool.hatch.envs.default.scripts]
-test = "pytest --reruns 3 --reruns-delay 30 -x {args:tests}"
-test-cov = "coverage run -m pytest --reruns 3 --reruns-delay 30 -x {args:tests}"
+test = "pytest {args:tests}"
+test-cov = "coverage run -m pytest {args:tests}"
+test-cov-retry = "test-cov --reruns 3 --reruns-delay 30 -x"
 cov-report = ["- coverage combine", "coverage report"]
 cov = ["test-cov", "cov-report"]
+cov-retry = ["test-cov-retry", "cov-report"]
 docs = ["pydoc-markdown pydoc/config.yml"]
 
 
diff --git a/integrations/llama_cpp/pyproject.toml b/integrations/llama_cpp/pyproject.toml
index 8c539cc08..2efb15d53 100644
--- a/integrations/llama_cpp/pyproject.toml
+++ b/integrations/llama_cpp/pyproject.toml
@@ -53,10 +53,12 @@ dependencies = [
     "transformers[sentencepiece]",
 ]
 [tool.hatch.envs.default.scripts]
-test = "pytest --reruns 3 --reruns-delay 30 -x {args:tests}"
-test-cov = "coverage run -m pytest --reruns 3 --reruns-delay 30 -x {args:tests}"
+test = "pytest {args:tests}"
+test-cov = "coverage run -m pytest {args:tests}"
+test-cov-retry = "test-cov --reruns 3 --reruns-delay 30 -x"
 cov-report = ["- coverage combine", "coverage report"]
 cov = ["test-cov", "cov-report"]
+cov-retry = ["test-cov-retry", "cov-report"]
 docs = ["pydoc-markdown pydoc/config.yml"]
 [[tool.hatch.envs.all.matrix]]
 python = ["3.8", "3.9", "3.10", "3.11", "3.12"]
diff --git a/integrations/mistral/pyproject.toml b/integrations/mistral/pyproject.toml
index abef1d747..bcb4f5999 100644
--- a/integrations/mistral/pyproject.toml
+++ b/integrations/mistral/pyproject.toml
@@ -43,10 +43,12 @@ git_describe_command = 'git describe --tags --match="integrations/mistral-v[0-9]
 [tool.hatch.envs.default]
 dependencies = ["coverage[toml]>=6.5", "pytest", "pytest-rerunfailures", "haystack-pydoc-tools"]
 [tool.hatch.envs.default.scripts]
-test = "pytest --reruns 3 --reruns-delay 30 -x {args:tests}"
-test-cov = "coverage run -m pytest --reruns 3 --reruns-delay 30 -x {args:tests}"
+test = "pytest {args:tests}"
+test-cov = "coverage run -m pytest {args:tests}"
+test-cov-retry = "test-cov --reruns 3 --reruns-delay 30 -x"
 cov-report = ["- coverage combine", "coverage report"]
 cov = ["test-cov", "cov-report"]
+cov-retry = ["test-cov-retry", "cov-report"]
 docs = ["pydoc-markdown pydoc/config.yml"]
 
 [[tool.hatch.envs.all.matrix]]
diff --git a/integrations/mongodb_atlas/pyproject.toml b/integrations/mongodb_atlas/pyproject.toml
index 9fdfa3385..170f6e94d 100644
--- a/integrations/mongodb_atlas/pyproject.toml
+++ b/integrations/mongodb_atlas/pyproject.toml
@@ -50,10 +50,12 @@ dependencies = [
   "haystack-pydoc-tools",
 ]
 [tool.hatch.envs.default.scripts]
-test = "pytest --reruns 3 --reruns-delay 30 -x {args:tests}"
-test-cov = "coverage run -m pytest --reruns 3 --reruns-delay 30 -x {args:tests}"
+test = "pytest {args:tests}"
+test-cov = "coverage run -m pytest {args:tests}"
+test-cov-retry = "test-cov --reruns 3 --reruns-delay 30 -x"
 cov-report = ["- coverage combine", "coverage report"]
 cov = ["test-cov", "cov-report"]
+cov-retry = ["test-cov-retry", "cov-report"]
 docs = ["pydoc-markdown pydoc/config.yml"]
 
 [[tool.hatch.envs.all.matrix]]
diff --git a/integrations/nvidia/pyproject.toml b/integrations/nvidia/pyproject.toml
index f35485e9c..af1d806ef 100644
--- a/integrations/nvidia/pyproject.toml
+++ b/integrations/nvidia/pyproject.toml
@@ -44,10 +44,12 @@ git_describe_command = 'git describe --tags --match="integrations/nvidia-v[0-9]*
 [tool.hatch.envs.default]
 dependencies = ["coverage[toml]>=6.5", "pytest", "pytest-rerunfailures", "haystack-pydoc-tools", "requests_mock"]
 [tool.hatch.envs.default.scripts]
-test = "pytest --reruns 3 --reruns-delay 30 -x {args:tests}"
-test-cov = "coverage run -m pytest --reruns 3 --reruns-delay 30 -x {args:tests}"
+test = "pytest {args:tests}"
+test-cov = "coverage run -m pytest {args:tests}"
+test-cov-retry = "test-cov --reruns 3 --reruns-delay 30 -x"
 cov-report = ["- coverage combine", "coverage report"]
 cov = ["test-cov", "cov-report"]
+cov-retry = ["test-cov-retry", "cov-report"]
 docs = ["pydoc-markdown pydoc/config.yml"]
 
 [[tool.hatch.envs.all.matrix]]
diff --git a/integrations/ollama/pyproject.toml b/integrations/ollama/pyproject.toml
index 3b7119dfd..5187de31f 100644
--- a/integrations/ollama/pyproject.toml
+++ b/integrations/ollama/pyproject.toml
@@ -53,10 +53,12 @@ dependencies = [
     "haystack-pydoc-tools",
 ]
 [tool.hatch.envs.default.scripts]
-test = "pytest --reruns 3 --reruns-delay 30 -x {args:tests}"
-test-cov = "coverage run -m pytest --reruns 3 --reruns-delay 30 -x {args:tests}"
+test = "pytest {args:tests}"
+test-cov = "coverage run -m pytest {args:tests}"
+test-cov-retry = "test-cov --reruns 3 --reruns-delay 30 -x"
 cov-report = ["- coverage combine", "coverage report"]
 cov = ["test-cov", "cov-report"]
+cov-retry = ["test-cov-retry", "cov-report"]
 docs = ["pydoc-markdown pydoc/config.yml"]
 
 [[tool.hatch.envs.all.matrix]]
diff --git a/integrations/opensearch/pyproject.toml b/integrations/opensearch/pyproject.toml
index 842b46415..b7e5e3da6 100644
--- a/integrations/opensearch/pyproject.toml
+++ b/integrations/opensearch/pyproject.toml
@@ -50,10 +50,12 @@ dependencies = [
   "boto3",
 ]
 [tool.hatch.envs.default.scripts]
-test = "pytest --reruns 3 --reruns-delay 30 -x {args:tests}"
-test-cov = "coverage run -m pytest --reruns 3 --reruns-delay 30 -x {args:tests}"
+test = "pytest {args:tests}"
+test-cov = "coverage run -m pytest {args:tests}"
+test-cov-retry = "test-cov --reruns 3 --reruns-delay 30 -x"
 cov-report = ["- coverage combine", "coverage report"]
 cov = ["test-cov", "cov-report"]
+cov-retry = ["test-cov-retry", "cov-report"]
 
 docs = ["pydoc-markdown pydoc/config.yml"]
 
diff --git a/integrations/optimum/pyproject.toml b/integrations/optimum/pyproject.toml
index d5d00c2cf..2e0fb26a4 100644
--- a/integrations/optimum/pyproject.toml
+++ b/integrations/optimum/pyproject.toml
@@ -63,10 +63,12 @@ dependencies = [
   "setuptools",           # FIXME: the latest 4.5.0 causes loops in pip resolver
 ]
 [tool.hatch.envs.default.scripts]
-test = "pytest --reruns 3 --reruns-delay 30 -x {args:tests}"
-test-cov = "coverage run -m pytest --reruns 3 --reruns-delay 30 -x {args:tests}"
+test = "pytest {args:tests}"
+test-cov = "coverage run -m pytest {args:tests}"
+test-cov-retry = "test-cov --reruns 3 --reruns-delay 30 -x"
 cov-report = ["- coverage combine", "coverage report"]
 cov = ["test-cov", "cov-report"]
+cov-retry = ["test-cov-retry", "cov-report"]
 docs = ["pydoc-markdown pydoc/config.yml"]
 
 [[tool.hatch.envs.all.matrix]]
diff --git a/integrations/pgvector/pyproject.toml b/integrations/pgvector/pyproject.toml
index 10d00606f..7f31a5203 100644
--- a/integrations/pgvector/pyproject.toml
+++ b/integrations/pgvector/pyproject.toml
@@ -50,10 +50,12 @@ dependencies = [
   "haystack-pydoc-tools",
 ]
 [tool.hatch.envs.default.scripts]
-test = "pytest --reruns 3 --reruns-delay 30 -x {args:tests}"
-test-cov = "coverage run -m pytest --reruns 3 --reruns-delay 30 -x {args:tests}"
+test = "pytest {args:tests}"
+test-cov = "coverage run -m pytest {args:tests}"
+test-cov-retry = "test-cov --reruns 3 --reruns-delay 30 -x"
 cov-report = ["- coverage combine", "coverage report"]
 cov = ["test-cov", "cov-report"]
+cov-retry = ["test-cov-retry", "cov-report"]
 docs = ["pydoc-markdown pydoc/config.yml"]
 
 [[tool.hatch.envs.all.matrix]]
diff --git a/integrations/pinecone/pyproject.toml b/integrations/pinecone/pyproject.toml
index ab764589b..866385dd3 100644
--- a/integrations/pinecone/pyproject.toml
+++ b/integrations/pinecone/pyproject.toml
@@ -54,10 +54,12 @@ dependencies = [
 [tool.hatch.envs.default.scripts]
 # Pinecone tests are slow (require HTTP requests), so we run them in parallel
 # with pytest-xdist (https://pytest-xdist.readthedocs.io/en/stable/distribution.html)
-test = "pytest -n auto --maxprocesses=2 --reruns 3 --reruns-delay 30 -x {args:tests}"
-test-cov = "coverage run -m pytest -n auto --maxprocesses=2 --reruns 3 --reruns-delay 30 -x {args:tests}"
+test = "pytest -n auto --maxprocesses=2 -x {args:tests}"
+test-cov = "coverage run -m pytest -n auto --maxprocesses=2 {args:tests}"
+test-cov-retry = "test-cov --reruns 3 --reruns-delay 30 -x"
 cov-report = ["- coverage combine", "coverage report"]
 cov = ["test-cov", "cov-report"]
+cov-retry = ["test-cov-retry", "cov-report"]
 docs = ["pydoc-markdown pydoc/config.yml"]
 
 [[tool.hatch.envs.all.matrix]]
diff --git a/integrations/qdrant/pyproject.toml b/integrations/qdrant/pyproject.toml
index d43926417..8b9c44cc7 100644
--- a/integrations/qdrant/pyproject.toml
+++ b/integrations/qdrant/pyproject.toml
@@ -46,10 +46,12 @@ git_describe_command = 'git describe --tags --match="integrations/qdrant-v[0-9]*
 [tool.hatch.envs.default]
 dependencies = ["coverage[toml]>=6.5", "pytest", "pytest-rerunfailures", "haystack-pydoc-tools"]
 [tool.hatch.envs.default.scripts]
-test = "pytest --reruns 3 --reruns-delay 30 -x {args:tests}"
-test-cov = "coverage run -m pytest --reruns 3 --reruns-delay 30 -x {args:tests}"
+test = "pytest {args:tests}"
+test-cov = "coverage run -m pytest {args:tests}"
+test-cov-retry = "test-cov --reruns 3 --reruns-delay 30 -x"
 cov-report = ["- coverage combine", "coverage report"]
 cov = ["test-cov", "cov-report"]
+cov-retry = ["test-cov-retry", "cov-report"]
 docs = ["pydoc-markdown pydoc/config.yml"]
 
 [[tool.hatch.envs.all.matrix]]
diff --git a/integrations/ragas/pyproject.toml b/integrations/ragas/pyproject.toml
index 3a32d27a7..edc33eee1 100644
--- a/integrations/ragas/pyproject.toml
+++ b/integrations/ragas/pyproject.toml
@@ -43,10 +43,12 @@ git_describe_command = 'git describe --tags --match="integrations/ragas-v[0-9]*"
 [tool.hatch.envs.default]
 dependencies = ["coverage[toml]>=6.5", "pytest", "pytest-rerunfailures", "haystack-pydoc-tools", "pytest-asyncio"]
 [tool.hatch.envs.default.scripts]
-test = "pytest --reruns 3 --reruns-delay 30 -x {args:tests}"
-test-cov = "coverage run -m pytest --reruns 3 --reruns-delay 30 -x {args:tests}"
+test = "pytest {args:tests}"
+test-cov = "coverage run -m pytest {args:tests}"
+test-cov-retry = "test-cov --reruns 3 --reruns-delay 30 -x"
 cov-report = ["- coverage combine", "coverage report"]
 cov = ["test-cov", "cov-report"]
+cov-retry = ["test-cov-retry", "cov-report"]
 docs = ["pydoc-markdown pydoc/config.yml"]
 
 [[tool.hatch.envs.all.matrix]]
diff --git a/integrations/unstructured/pyproject.toml b/integrations/unstructured/pyproject.toml
index d00f60e3a..b5de6c66a 100644
--- a/integrations/unstructured/pyproject.toml
+++ b/integrations/unstructured/pyproject.toml
@@ -48,10 +48,12 @@ dependencies = [
   "haystack-pydoc-tools",
 ]
 [tool.hatch.envs.default.scripts]
-test = "pytest --reruns 3 --reruns-delay 30 -x {args:tests}"
-test-cov = "coverage run -m pytest --reruns 3 --reruns-delay 30 -x {args:tests}"
+test = "pytest {args:tests}"
+test-cov = "coverage run -m pytest {args:tests}"
+test-cov-retry = "test-cov --reruns 3 --reruns-delay 30 -x"
 cov-report = ["- coverage combine", "coverage report"]
 cov = ["test-cov", "cov-report"]
+cov-retry = ["test-cov-retry", "cov-report"]
 docs = ["pydoc-markdown pydoc/config.yml"]
 
 [[tool.hatch.envs.all.matrix]]
diff --git a/integrations/weaviate/pyproject.toml b/integrations/weaviate/pyproject.toml
index b0aa15143..14b60fe12 100644
--- a/integrations/weaviate/pyproject.toml
+++ b/integrations/weaviate/pyproject.toml
@@ -49,10 +49,12 @@ git_describe_command = 'git describe --tags --match="integrations/weaviate-v[0-9
 [tool.hatch.envs.default]
 dependencies = ["coverage[toml]>=6.5", "pytest", "pytest-rerunfailures", "ipython"]
 [tool.hatch.envs.default.scripts]
-test = "pytest --reruns 3 --reruns-delay 30 -x {args:tests}"
-test-cov = "coverage run -m pytest --reruns 3 --reruns-delay 30 -x {args:tests}"
+test = "pytest {args:tests}"
+test-cov = "coverage run -m pytest {args:tests}"
+test-cov-retry = "test-cov --reruns 3 --reruns-delay 30 -x"
 cov-report = ["- coverage combine", "coverage report"]
 cov = ["test-cov", "cov-report"]
+cov-retry = ["test-cov-retry", "cov-report"]
 docs = ["pydoc-markdown pydoc/config.yml"]
 
 [[tool.hatch.envs.all.matrix]]

From 0f1452ac3d7fcec1142473f5049cd0266b52784a Mon Sep 17 00:00:00 2001
From: "David S. Batista" <dsbatista@gmail.com>
Date: Mon, 12 Aug 2024 17:46:03 +0200
Subject: [PATCH 5/6] refactor: change meta data fields (#911)

* initial import

* formatting

* fixing tests

* removing warnings

* linting issues

* fixes due to conflicts
---
 .../amazon_bedrock/chat/chat_generator.py          | 13 ++++++-------
 .../generators/anthropic/chat/chat_generator.py    | 14 +++++++-------
 2 files changed, 13 insertions(+), 14 deletions(-)

diff --git a/integrations/amazon_bedrock/src/haystack_integrations/components/generators/amazon_bedrock/chat/chat_generator.py b/integrations/amazon_bedrock/src/haystack_integrations/components/generators/amazon_bedrock/chat/chat_generator.py
index 206cb0b9a..f7bb0ba23 100644
--- a/integrations/amazon_bedrock/src/haystack_integrations/components/generators/amazon_bedrock/chat/chat_generator.py
+++ b/integrations/amazon_bedrock/src/haystack_integrations/components/generators/amazon_bedrock/chat/chat_generator.py
@@ -1,7 +1,6 @@
 import json
 import logging
 import re
-import warnings
 from typing import Any, Callable, ClassVar, Dict, List, Optional, Type
 
 from botocore.exceptions import ClientError
@@ -150,12 +149,6 @@ def resolve_secret(secret: Optional[Secret]) -> Optional[str]:
         self.stop_words = stop_words or []
         self.streaming_callback = streaming_callback
 
-        warnings.warn(
-            "The `meta` output of the AmazonBedrockChatGenerator will change in the next release to be inline with "
-            "OpenAI `meta`output keys.",
-            stacklevel=2,
-        )
-
     @component.output_types(replies=List[ChatMessage])
     def run(
         self,
@@ -210,6 +203,12 @@ def run(
             msg = f"Could not inference Amazon Bedrock model {self.model} due: {exception}"
             raise AmazonBedrockInferenceError(msg) from exception
 
+        # rename the meta key to be inline with OpenAI meta output keys
+        for response in replies:
+            if response.meta is not None and "usage" in response.meta:
+                response.meta["usage"]["prompt_tokens"] = response.meta["usage"].pop("input_tokens")
+                response.meta["usage"]["completion_tokens"] = response.meta["usage"].pop("output_tokens")
+
         return {"replies": replies}
 
     @classmethod
diff --git a/integrations/anthropic/src/haystack_integrations/components/generators/anthropic/chat/chat_generator.py b/integrations/anthropic/src/haystack_integrations/components/generators/anthropic/chat/chat_generator.py
index 06b3dc353..9954f08c5 100644
--- a/integrations/anthropic/src/haystack_integrations/components/generators/anthropic/chat/chat_generator.py
+++ b/integrations/anthropic/src/haystack_integrations/components/generators/anthropic/chat/chat_generator.py
@@ -1,6 +1,5 @@
 import dataclasses
 import json
-import warnings
 from typing import Any, Callable, ClassVar, Dict, List, Optional, Union
 
 from haystack import component, default_from_dict, default_to_dict, logging
@@ -115,12 +114,6 @@ def __init__(
         self.client = Anthropic(api_key=self.api_key.resolve_value())
         self.ignore_tools_thinking_messages = ignore_tools_thinking_messages
 
-        warnings.warn(
-            "The `meta` output of the AnthropicChatGenerator will change in the next release to be inline with "
-            "OpenAI `meta`output keys.",
-            stacklevel=2,
-        )
-
     def _get_telemetry_data(self) -> Dict[str, Any]:
         """
         Data that is sent to Posthog for usage analytics.
@@ -220,6 +213,7 @@ def run(self, messages: List[ChatMessage], generation_kwargs: Optional[Dict[str,
                     # capture stop reason and stop sequence
                     delta = stream_event
             completions = [self._connect_chunks(chunks, start_event, delta)]
+
         # if streaming is disabled, the response is an Anthropic Message
         elif isinstance(response, Message):
             has_tools_msgs = any(isinstance(content_block, ToolUseBlock) for content_block in response.content)
@@ -227,6 +221,12 @@ def run(self, messages: List[ChatMessage], generation_kwargs: Optional[Dict[str,
                 response.content = [block for block in response.content if isinstance(block, ToolUseBlock)]
             completions = [self._build_message(content_block, response) for content_block in response.content]
 
+        # rename the meta key to be inline with OpenAI meta output keys
+        for response in completions:
+            if response.meta is not None and "usage" in response.meta:
+                response.meta["usage"]["prompt_tokens"] = response.meta["usage"].pop("input_tokens")
+                response.meta["usage"]["completion_tokens"] = response.meta["usage"].pop("output_tokens")
+
         return {"replies": completions}
 
     def _build_message(self, content_block: Union[TextBlock, ToolUseBlock], message: Message) -> ChatMessage:

From 0451e6f43ad5731887ab0aa2aa8e4de9020913e6 Mon Sep 17 00:00:00 2001
From: HaystackBot <accounts@deepset.ai>
Date: Mon, 12 Aug 2024 15:56:05 +0000
Subject: [PATCH 6/6] Update the changelog

---
 integrations/amazon_bedrock/CHANGELOG.md | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/integrations/amazon_bedrock/CHANGELOG.md b/integrations/amazon_bedrock/CHANGELOG.md
index 33a1598a8..d347c08d9 100644
--- a/integrations/amazon_bedrock/CHANGELOG.md
+++ b/integrations/amazon_bedrock/CHANGELOG.md
@@ -1,5 +1,15 @@
 # Changelog
 
+## [integrations/amazon_bedrock-v1.0.0] - 2024-08-12
+
+### 🚜 Refactor
+
+- Change meta data fields (#911)
+
+### 🧪 Testing
+
+- Do not retry tests in `hatch run test` command (#954)
+
 ## [integrations/amazon_bedrock-v0.10.0] - 2024-08-12
 
 ### 🐛 Bug Fixes