From f03073f995fa39a88f8ca4b14438ee6a6aa3c892 Mon Sep 17 00:00:00 2001 From: Rashmi Pawar <168514198+raspawar@users.noreply.github.com> Date: Mon, 12 Aug 2024 14:30:16 +0530 Subject: [PATCH 1/6] Add default model for NVIDIA HayStack local NIM endpoints (#915) * initial embedder code * default model code * docs: update model docstring * tests: add userwarning * docs: literal lint fix * review changes * remove pydantic dependency * move backend, nim_backend under utils * move is_hosted to warm_up * test cases, docstring fix * error message updation Co-authored-by: Madeesh Kannan * move is_hosted code to util * remove backend code * update import for is_hosted * remove util and move code to utils * fix api key issue for failing test cases * Update integrations/nvidia/tests/conftest.py --------- Co-authored-by: Madeesh Kannan --- integrations/nvidia/pyproject.toml | 4 +- .../embedders/nvidia/_nim_backend.py | 52 ----------- .../components/embedders/nvidia/backend.py | 29 ------- .../embedders/nvidia/document_embedder.py | 36 ++++++-- .../embedders/nvidia/text_embedder.py | 36 ++++++-- .../components/generators/nvidia/backend.py | 29 ------- .../components/generators/nvidia/generator.py | 38 ++++++-- .../utils/nvidia/__init__.py | 5 +- .../nvidia/nim_backend.py} | 57 +++++++++++- .../utils/nvidia/utils.py | 8 ++ integrations/nvidia/tests/__init__.py | 3 + integrations/nvidia/tests/conftest.py | 44 ++++++++++ .../nvidia/tests/test_document_embedder.py | 86 ++++++++++++++----- integrations/nvidia/tests/test_generator.py | 55 ++++++++++++ .../nvidia/tests/test_text_embedder.py | 48 +++++++---- 15 files changed, 358 insertions(+), 172 deletions(-) delete mode 100644 integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/_nim_backend.py delete mode 100644 integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/backend.py delete mode 100644 integrations/nvidia/src/haystack_integrations/components/generators/nvidia/backend.py rename integrations/nvidia/src/haystack_integrations/{components/generators/nvidia/_nim_backend.py => utils/nvidia/nim_backend.py} (61%) create mode 100644 integrations/nvidia/tests/conftest.py diff --git a/integrations/nvidia/pyproject.toml b/integrations/nvidia/pyproject.toml index 504077b4e..f35485e9c 100644 --- a/integrations/nvidia/pyproject.toml +++ b/integrations/nvidia/pyproject.toml @@ -42,7 +42,7 @@ root = "../.." git_describe_command = 'git describe --tags --match="integrations/nvidia-v[0-9]*"' [tool.hatch.envs.default] -dependencies = ["coverage[toml]>=6.5", "pytest", "pytest-rerunfailures", "haystack-pydoc-tools"] +dependencies = ["coverage[toml]>=6.5", "pytest", "pytest-rerunfailures", "haystack-pydoc-tools", "requests_mock"] [tool.hatch.envs.default.scripts] test = "pytest --reruns 3 --reruns-delay 30 -x {args:tests}" test-cov = "coverage run -m pytest --reruns 3 --reruns-delay 30 -x {args:tests}" @@ -147,6 +147,8 @@ module = [ "haystack_integrations.*", "pytest.*", "numpy.*", + "requests_mock.*", + "pydantic.*" ] ignore_missing_imports = true diff --git a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/_nim_backend.py b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/_nim_backend.py deleted file mode 100644 index ee25df7fd..000000000 --- a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/_nim_backend.py +++ /dev/null @@ -1,52 +0,0 @@ -from typing import Any, Dict, List, Optional, Tuple - -import requests -from haystack.utils import Secret - -from .backend import EmbedderBackend - -REQUEST_TIMEOUT = 60 - - -class NimBackend(EmbedderBackend): - def __init__( - self, - model: str, - api_url: str, - api_key: Optional[Secret] = Secret.from_env_var("NVIDIA_API_KEY"), - model_kwargs: Optional[Dict[str, Any]] = None, - ): - headers = { - "Content-Type": "application/json", - "accept": "application/json", - } - - if api_key: - headers["authorization"] = f"Bearer {api_key.resolve_value()}" - - self.session = requests.Session() - self.session.headers.update(headers) - - self.model = model - self.api_url = api_url - self.model_kwargs = model_kwargs or {} - - def embed(self, texts: List[str]) -> Tuple[List[List[float]], Dict[str, Any]]: - url = f"{self.api_url}/embeddings" - - res = self.session.post( - url, - json={ - "model": self.model, - "input": texts, - **self.model_kwargs, - }, - timeout=REQUEST_TIMEOUT, - ) - res.raise_for_status() - - data = res.json() - # Sort the embeddings by index, we don't know whether they're out of order or not - embeddings = [e["embedding"] for e in sorted(data["data"], key=lambda e: e["index"])] - - return embeddings, {"usage": data["usage"]} diff --git a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/backend.py b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/backend.py deleted file mode 100644 index 09e9b7c80..000000000 --- a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/backend.py +++ /dev/null @@ -1,29 +0,0 @@ -from abc import ABC, abstractmethod -from typing import Any, Dict, List, Optional, Tuple - - -class EmbedderBackend(ABC): - def __init__(self, model: str, model_kwargs: Optional[Dict[str, Any]] = None): - """ - Initialize the backend. - - :param model: - The name of the model to use. - :param model_kwargs: - Additional keyword arguments to pass to the model. - """ - self.model_name = model - self.model_kwargs = model_kwargs or {} - - @abstractmethod - def embed(self, texts: List[str]) -> Tuple[List[List[float]], Dict[str, Any]]: - """ - Invoke the backend and embed the given texts. - - :param texts: - Texts to embed. - :return: - Vector representation of the texts and - metadata returned by the service. - """ - pass diff --git a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/document_embedder.py b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/document_embedder.py index 4cc805c01..f5d1747b8 100644 --- a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/document_embedder.py +++ b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/document_embedder.py @@ -1,12 +1,11 @@ +import warnings from typing import Any, Dict, List, Optional, Tuple, Union from haystack import Document, component, default_from_dict, default_to_dict from haystack.utils import Secret, deserialize_secrets_inplace -from haystack_integrations.utils.nvidia import url_validation +from haystack_integrations.utils.nvidia import NimBackend, is_hosted, url_validation from tqdm import tqdm -from ._nim_backend import NimBackend -from .backend import EmbedderBackend from .truncate import EmbeddingTruncateMode _DEFAULT_API_URL = "https://ai.api.nvidia.com/v1/retrieval/nvidia" @@ -34,7 +33,7 @@ class NvidiaDocumentEmbedder: def __init__( self, - model: str = "NV-Embed-QA", + model: Optional[str] = None, api_key: Optional[Secret] = Secret.from_env_var("NVIDIA_API_KEY"), api_url: str = _DEFAULT_API_URL, prefix: str = "", @@ -50,6 +49,8 @@ def __init__( :param model: Embedding model to use. + If no specific model along with locally hosted API URL is provided, + the system defaults to the available model found using /models API. :param api_key: API key for the NVIDIA NIM. :param api_url: @@ -87,9 +88,31 @@ def __init__( truncate = EmbeddingTruncateMode.from_str(truncate) self.truncate = truncate - self.backend: Optional[EmbedderBackend] = None + self.backend: Optional[Any] = None self._initialized = False + if is_hosted(api_url) and not self.model: # manually set default model + self.model = "NV-Embed-QA" + + def default_model(self): + """Set default model in local NIM mode.""" + valid_models = [ + model.id for model in self.backend.models() if not model.base_model or model.base_model == model.id + ] + name = next(iter(valid_models), None) + if name: + warnings.warn( + f"Default model is set as: {name}. \n" + "Set model using model parameter. \n" + "To get available models use available_models property.", + UserWarning, + stacklevel=2, + ) + self.model = self.backend.model = name + else: + error_message = "No locally hosted model was found." + raise ValueError(error_message) + def warm_up(self): """ Initializes the component. @@ -109,6 +132,9 @@ def warm_up(self): self._initialized = True + if not self.model: + self.default_model() + def to_dict(self) -> Dict[str, Any]: """ Serializes the component to a dictionary. diff --git a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/text_embedder.py b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/text_embedder.py index e1a8c36dd..1c4a7c5c9 100644 --- a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/text_embedder.py +++ b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/text_embedder.py @@ -1,11 +1,10 @@ +import warnings from typing import Any, Dict, List, Optional, Union from haystack import component, default_from_dict, default_to_dict from haystack.utils import Secret, deserialize_secrets_inplace -from haystack_integrations.utils.nvidia import url_validation +from haystack_integrations.utils.nvidia import NimBackend, is_hosted, url_validation -from ._nim_backend import NimBackend -from .backend import EmbedderBackend from .truncate import EmbeddingTruncateMode _DEFAULT_API_URL = "https://ai.api.nvidia.com/v1/retrieval/nvidia" @@ -35,7 +34,7 @@ class NvidiaTextEmbedder: def __init__( self, - model: str = "NV-Embed-QA", + model: Optional[str] = None, api_key: Optional[Secret] = Secret.from_env_var("NVIDIA_API_KEY"), api_url: str = _DEFAULT_API_URL, prefix: str = "", @@ -47,6 +46,8 @@ def __init__( :param model: Embedding model to use. + If no specific model along with locally hosted API URL is provided, + the system defaults to the available model found using /models API. :param api_key: API key for the NVIDIA NIM. :param api_url: @@ -71,9 +72,31 @@ def __init__( truncate = EmbeddingTruncateMode.from_str(truncate) self.truncate = truncate - self.backend: Optional[EmbedderBackend] = None + self.backend: Optional[Any] = None self._initialized = False + if is_hosted(api_url) and not self.model: # manually set default model + self.model = "NV-Embed-QA" + + def default_model(self): + """Set default model in local NIM mode.""" + valid_models = [ + model.id for model in self.backend.models() if not model.base_model or model.base_model == model.id + ] + name = next(iter(valid_models), None) + if name: + warnings.warn( + f"Default model is set as: {name}. \n" + "Set model using model parameter. \n" + "To get available models use available_models property.", + UserWarning, + stacklevel=2, + ) + self.model = self.backend.model = name + else: + error_message = "No locally hosted model was found." + raise ValueError(error_message) + def warm_up(self): """ Initializes the component. @@ -93,6 +116,9 @@ def warm_up(self): self._initialized = True + if not self.model: + self.default_model() + def to_dict(self) -> Dict[str, Any]: """ Serializes the component to a dictionary. diff --git a/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/backend.py b/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/backend.py deleted file mode 100644 index d14199daf..000000000 --- a/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/backend.py +++ /dev/null @@ -1,29 +0,0 @@ -from abc import ABC, abstractmethod -from typing import Any, Dict, List, Optional, Tuple - - -class GeneratorBackend(ABC): - def __init__(self, model: str, model_kwargs: Optional[Dict[str, Any]] = None): - """ - Initialize the backend. - - :param model: - The name of the model to use. - :param model_kwargs: - Additional keyword arguments to pass to the model. - """ - self.model_name = model - self.model_kwargs = model_kwargs or {} - - @abstractmethod - def generate(self, prompt: str) -> Tuple[List[str], List[Dict[str, Any]]]: - """ - Invoke the backend and prompt the model. - - :param prompt: - Prompt text. - :return: - Vector representation of the generated texts related - metadata returned by the service. - """ - pass diff --git a/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/generator.py b/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/generator.py index f11ef8aaf..a286400ab 100644 --- a/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/generator.py +++ b/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/generator.py @@ -1,14 +1,12 @@ # SPDX-FileCopyrightText: 2024-present deepset GmbH # # SPDX-License-Identifier: Apache-2.0 +import warnings from typing import Any, Dict, List, Optional from haystack import component, default_from_dict, default_to_dict from haystack.utils.auth import Secret, deserialize_secrets_inplace -from haystack_integrations.utils.nvidia import url_validation - -from ._nim_backend import NimBackend -from .backend import GeneratorBackend +from haystack_integrations.utils.nvidia import NimBackend, is_hosted, url_validation _DEFAULT_API_URL = "https://integrate.api.nvidia.com/v1" @@ -45,7 +43,7 @@ class NvidiaGenerator: def __init__( self, - model: str, + model: Optional[str] = None, api_url: str = _DEFAULT_API_URL, api_key: Optional[Secret] = Secret.from_env_var("NVIDIA_API_KEY"), model_arguments: Optional[Dict[str, Any]] = None, @@ -55,6 +53,10 @@ def __init__( :param model: Name of the model to use for text generation. + See the [NVIDIA NIMs](https://ai.nvidia.com) + for more information on the supported models. + `Note`: If no specific model along with locally hosted API URL is provided, + the system defaults to the available model found using /models API. Check supported models at [NVIDIA NIM](https://ai.nvidia.com). :param api_key: API key for the NVIDIA NIM. Set it as the `NVIDIA_API_KEY` environment @@ -72,7 +74,28 @@ def __init__( self._api_key = api_key self._model_arguments = model_arguments or {} - self._backend: Optional[GeneratorBackend] = None + self._backend: Optional[Any] = None + + self.is_hosted = is_hosted(api_url) + + def default_model(self): + """Set default model in local NIM mode.""" + valid_models = [ + model.id for model in self._backend.models() if not model.base_model or model.base_model == model.id + ] + name = next(iter(valid_models), None) + if name: + warnings.warn( + f"Default model is set as: {name}. \n" + "Set model using model parameter. \n" + "To get available models use available_models property.", + UserWarning, + stacklevel=2, + ) + self._model = self._backend.model_name = name + else: + error_message = "No locally hosted model was found." + raise ValueError(error_message) def warm_up(self): """ @@ -91,6 +114,9 @@ def warm_up(self): model_kwargs=self._model_arguments, ) + if not self.is_hosted and not self._model: + self.default_model() + def to_dict(self) -> Dict[str, Any]: """ Serializes the component to a dictionary. diff --git a/integrations/nvidia/src/haystack_integrations/utils/nvidia/__init__.py b/integrations/nvidia/src/haystack_integrations/utils/nvidia/__init__.py index 9863e4a38..da301d29d 100644 --- a/integrations/nvidia/src/haystack_integrations/utils/nvidia/__init__.py +++ b/integrations/nvidia/src/haystack_integrations/utils/nvidia/__init__.py @@ -1,3 +1,4 @@ -from .utils import url_validation +from .nim_backend import Model, NimBackend +from .utils import is_hosted, url_validation -__all__ = ["url_validation"] +__all__ = ["NimBackend", "Model", "is_hosted", "url_validation"] diff --git a/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/_nim_backend.py b/integrations/nvidia/src/haystack_integrations/utils/nvidia/nim_backend.py similarity index 61% rename from integrations/nvidia/src/haystack_integrations/components/generators/nvidia/_nim_backend.py rename to integrations/nvidia/src/haystack_integrations/utils/nvidia/nim_backend.py index 5253b3254..f69862f0e 100644 --- a/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/_nim_backend.py +++ b/integrations/nvidia/src/haystack_integrations/utils/nvidia/nim_backend.py @@ -1,14 +1,29 @@ +from dataclasses import dataclass, field from typing import Any, Dict, List, Optional, Tuple import requests from haystack.utils import Secret -from .backend import GeneratorBackend - REQUEST_TIMEOUT = 60 -class NimBackend(GeneratorBackend): +@dataclass +class Model: + """ + Model information. + + id: unique identifier for the model, passed as model parameter for requests + aliases: list of aliases for the model + base_model: root model for the model + All aliases are deprecated and will trigger a warning when used. + """ + + id: str + aliases: Optional[List[str]] = field(default_factory=list) + base_model: Optional[str] = None + + +class NimBackend: def __init__( self, model: str, @@ -31,6 +46,26 @@ def __init__( self.api_url = api_url self.model_kwargs = model_kwargs or {} + def embed(self, texts: List[str]) -> Tuple[List[List[float]], Dict[str, Any]]: + url = f"{self.api_url}/embeddings" + + res = self.session.post( + url, + json={ + "model": self.model, + "input": texts, + **self.model_kwargs, + }, + timeout=REQUEST_TIMEOUT, + ) + res.raise_for_status() + + data = res.json() + # Sort the embeddings by index, we don't know whether they're out of order or not + embeddings = [e["embedding"] for e in sorted(data["data"], key=lambda e: e["index"])] + + return embeddings, {"usage": data["usage"]} + def generate(self, prompt: str) -> Tuple[List[str], List[Dict[str, Any]]]: # We're using the chat completion endpoint as the NIM API doesn't support # the /completions endpoint. So both the non-chat and chat generator will use this. @@ -78,3 +113,19 @@ def generate(self, prompt: str) -> Tuple[List[str], List[Dict[str, Any]]]: meta.append(choice_meta) return replies, meta + + def models(self) -> List[Model]: + url = f"{self.api_url}/models" + + res = self.session.get( + url, + timeout=REQUEST_TIMEOUT, + ) + res.raise_for_status() + + data = res.json()["data"] + models = [Model(element["id"]) for element in data if "id" in element] + if not models: + msg = f"No hosted model were found at URL '{url}'." + raise ValueError(msg) + return models diff --git a/integrations/nvidia/src/haystack_integrations/utils/nvidia/utils.py b/integrations/nvidia/src/haystack_integrations/utils/nvidia/utils.py index 4f8e14b09..7d4dfc3b4 100644 --- a/integrations/nvidia/src/haystack_integrations/utils/nvidia/utils.py +++ b/integrations/nvidia/src/haystack_integrations/utils/nvidia/utils.py @@ -37,3 +37,11 @@ def url_validation(api_url: str, default_api_url: str, allowed_paths: List[str]) base_url = urlunparse((result.scheme, result.netloc, "v1", "", "", "")) return base_url + + +def is_hosted(api_url: str): + """""" + return urlparse(api_url).netloc in [ + "integrate.api.nvidia.com", + "ai.api.nvidia.com", + ] diff --git a/integrations/nvidia/tests/__init__.py b/integrations/nvidia/tests/__init__.py index e873bc332..47611e0b9 100644 --- a/integrations/nvidia/tests/__init__.py +++ b/integrations/nvidia/tests/__init__.py @@ -1,3 +1,6 @@ # SPDX-FileCopyrightText: 2023-present deepset GmbH # # SPDX-License-Identifier: Apache-2.0 +from .conftest import MockBackend + +__all__ = ["MockBackend"] diff --git a/integrations/nvidia/tests/conftest.py b/integrations/nvidia/tests/conftest.py new file mode 100644 index 000000000..794c994ff --- /dev/null +++ b/integrations/nvidia/tests/conftest.py @@ -0,0 +1,44 @@ +from typing import Any, Dict, List, Optional, Tuple + +import pytest +from haystack.utils import Secret +from haystack_integrations.utils.nvidia import Model, NimBackend +from requests_mock import Mocker + + +class MockBackend(NimBackend): + def __init__(self, model: str, api_key: Optional[Secret] = None, model_kwargs: Optional[Dict[str, Any]] = None): + api_key = api_key or Secret.from_env_var("NVIDIA_API_KEY") + super().__init__(model, api_url="", api_key=api_key, model_kwargs=model_kwargs or {}) + + def embed(self, texts): + inputs = texts + data = [[0.1, 0.2, 0.3] for i in range(len(inputs))] + return data, {"usage": {"total_tokens": 4, "prompt_tokens": 4}} + + def models(self): + return [Model(id="aa")] + + def generate(self) -> Tuple[List[str], List[Dict[str, Any]]]: + return ( + ["This is a mocked response."], + [{"role": "assistant", "usage": {"prompt_tokens": 5, "total_tokens": 10, "completion_tokens": 5}}], + ) + + +@pytest.fixture +def mock_local_models(requests_mock: Mocker) -> None: + requests_mock.get( + "http://localhost:8080/v1/models", + json={ + "data": [ + { + "id": "model1", + "object": "model", + "created": 1234567890, + "owned_by": "OWNER", + "root": "model1", + }, + ] + }, + ) diff --git a/integrations/nvidia/tests/test_document_embedder.py b/integrations/nvidia/tests/test_document_embedder.py index 856ae4652..6562a0ea9 100644 --- a/integrations/nvidia/tests/test_document_embedder.py +++ b/integrations/nvidia/tests/test_document_embedder.py @@ -4,17 +4,8 @@ from haystack import Document from haystack.utils import Secret from haystack_integrations.components.embedders.nvidia import EmbeddingTruncateMode, NvidiaDocumentEmbedder -from haystack_integrations.components.embedders.nvidia.backend import EmbedderBackend - -class MockBackend(EmbedderBackend): - def __init__(self, model, model_kwargs): - super().__init__(model, model_kwargs) - - def embed(self, texts): - inputs = texts - data = [[0.1, 0.2, 0.3] for i in range(len(inputs))] - return data, {"usage": {"total_tokens": 4, "prompt_tokens": 4}} +from . import MockBackend class TestNvidiaDocumentEmbedder: @@ -185,14 +176,18 @@ def test_prepare_texts_to_embed_w_suffix(self): def test_embed_batch(self): texts = ["text 1", "text 2", "text 3", "text 4", "text 5"] - + model = "playground_nvolveqa_40k" + api_key = Secret.from_token("fake-api-key") embedder = NvidiaDocumentEmbedder( - "playground_nvolveqa_40k", - api_key=Secret.from_token("fake-api-key"), + model, + api_key=api_key, ) embedder.warm_up() - embedder.backend = MockBackend("aa", None) + embedder.backend = MockBackend( + model=model, + api_key=api_key, + ) embeddings, metadata = embedder._embed_batch(texts_to_embed=texts, batch_size=2) @@ -205,15 +200,55 @@ def test_embed_batch(self): assert metadata == {"usage": {"prompt_tokens": 3 * 4, "total_tokens": 3 * 4}} - def test_run(self): + @pytest.mark.usefixtures("mock_local_models") + def test_run_default_model(self): docs = [ Document(content="I love cheese", meta={"topic": "Cuisine"}), Document(content="A transformer is a deep learning architecture", meta={"topic": "ML"}), ] + api_key = Secret.from_token("fake-api-key") + + embedder = NvidiaDocumentEmbedder( + api_key=api_key, + model=None, + api_url="http://localhost:8080/v1", + prefix="prefix ", + suffix=" suffix", + meta_fields_to_embed=["topic"], + embedding_separator=" | ", + ) + + with pytest.warns(UserWarning) as record: + embedder.warm_up() + assert len(record) == 1 + assert "Default model is set as:" in str(record[0].message) + assert embedder.model == "model1" + + embedder.backend = MockBackend(model=embedder.model, api_key=api_key) + + result = embedder.run(documents=docs) + documents_with_embeddings = result["documents"] + metadata = result["meta"] + + assert isinstance(documents_with_embeddings, list) + assert len(documents_with_embeddings) == len(docs) + for doc in documents_with_embeddings: + assert isinstance(doc, Document) + assert isinstance(doc.embedding, list) + assert len(doc.embedding) == 3 + assert all(isinstance(x, float) for x in doc.embedding) + assert metadata == {"usage": {"prompt_tokens": 4, "total_tokens": 4}} + + def test_run(self): + docs = [ + Document(content="I love cheese", meta={"topic": "Cuisine"}), + Document(content="A transformer is a deep learning architecture", meta={"topic": "ML"}), + ] + api_key = Secret.from_token("fake-api-key") model = "playground_nvolveqa_40k" embedder = NvidiaDocumentEmbedder( - api_key=Secret.from_token("fake-api-key"), + api_key=api_key, model=model, prefix="prefix ", suffix=" suffix", @@ -222,7 +257,7 @@ def test_run(self): ) embedder.warm_up() - embedder.backend = MockBackend("aa", None) + embedder.backend = MockBackend(model=model, api_key=api_key) result = embedder.run(documents=docs) @@ -243,9 +278,10 @@ def test_run_custom_batch_size(self): Document(content="I love cheese", meta={"topic": "Cuisine"}), Document(content="A transformer is a deep learning architecture", meta={"topic": "ML"}), ] + api_key = Secret.from_token("fake-api-key") model = "playground_nvolveqa_40k" embedder = NvidiaDocumentEmbedder( - api_key=Secret.from_token("fake-api-key"), + api_key=api_key, model=model, prefix="prefix ", suffix=" suffix", @@ -255,7 +291,7 @@ def test_run_custom_batch_size(self): ) embedder.warm_up() - embedder.backend = MockBackend("aa", None) + embedder.backend = MockBackend(model=model, api_key=api_key) result = embedder.run(documents=docs) @@ -273,10 +309,12 @@ def test_run_custom_batch_size(self): assert metadata == {"usage": {"prompt_tokens": 2 * 4, "total_tokens": 2 * 4}} def test_run_wrong_input_format(self): - embedder = NvidiaDocumentEmbedder("playground_nvolveqa_40k", api_key=Secret.from_token("fake-api-key")) + model = "playground_nvolveqa_40k" + api_key = Secret.from_token("fake-api-key") + embedder = NvidiaDocumentEmbedder(model, api_key=api_key) embedder.warm_up() - embedder.backend = MockBackend("aa", None) + embedder.backend = MockBackend(model=model, api_key=api_key) string_input = "text" list_integers_input = [1, 2, 3] @@ -288,10 +326,12 @@ def test_run_wrong_input_format(self): embedder.run(documents=list_integers_input) def test_run_on_empty_list(self): - embedder = NvidiaDocumentEmbedder("playground_nvolveqa_40k", api_key=Secret.from_token("fake-api-key")) + model = "playground_nvolveqa_40k" + api_key = Secret.from_token("fake-api-key") + embedder = NvidiaDocumentEmbedder(model, api_key=api_key) embedder.warm_up() - embedder.backend = MockBackend("aa", None) + embedder.backend = MockBackend(model=model, api_key=api_key) empty_list_input = [] result = embedder.run(documents=empty_list_input) diff --git a/integrations/nvidia/tests/test_generator.py b/integrations/nvidia/tests/test_generator.py index 3ddeebe88..9fff9c2e8 100644 --- a/integrations/nvidia/tests/test_generator.py +++ b/integrations/nvidia/tests/test_generator.py @@ -6,6 +6,35 @@ import pytest from haystack.utils import Secret from haystack_integrations.components.generators.nvidia import NvidiaGenerator +from requests_mock import Mocker + + +@pytest.fixture +def mock_local_chat_completion(requests_mock: Mocker) -> None: + requests_mock.post( + "http://localhost:8080/v1/chat/completions", + json={ + "choices": [ + { + "message": {"content": "Hello!", "role": "system"}, + "usage": {"prompt_tokens": 3, "total_tokens": 5, "completion_tokens": 9}, + "finish_reason": "stop", + "index": 0, + }, + { + "message": {"content": "How are you?", "role": "system"}, + "usage": {"prompt_tokens": 3, "total_tokens": 5, "completion_tokens": 9}, + "finish_reason": "stop", + "index": 1, + }, + ], + "usage": { + "prompt_tokens": 3, + "total_tokens": 5, + "completion_tokens": 9, + }, + }, + ) class TestNvidiaGenerator: @@ -116,6 +145,32 @@ def test_run_integration_with_nim_backend(self): assert result["replies"] assert result["meta"] + @pytest.mark.integration + @pytest.mark.usefixtures("mock_local_models") + @pytest.mark.usefixtures("mock_local_chat_completion") + def test_run_integration_with_default_model_nim_backend(self): + model = None + url = "http://localhost:8080/v1" + generator = NvidiaGenerator( + model=model, + api_url=url, + api_key=None, + model_arguments={ + "temperature": 0.2, + }, + ) + with pytest.warns(UserWarning) as record: + generator.warm_up() + assert len(record) == 1 + assert "Default model is set as:" in str(record[0].message) + assert generator._model == "model1" + assert not generator.is_hosted + + result = generator.run(prompt="What is the answer?") + + assert result["replies"] + assert result["meta"] + @pytest.mark.skipif( not os.environ.get("NVIDIA_API_KEY", None), reason="Export an env var called NVIDIA_API_KEY containing the NVIDIA API key to run this test.", diff --git a/integrations/nvidia/tests/test_text_embedder.py b/integrations/nvidia/tests/test_text_embedder.py index 42d60dee2..7c0a7000d 100644 --- a/integrations/nvidia/tests/test_text_embedder.py +++ b/integrations/nvidia/tests/test_text_embedder.py @@ -3,17 +3,8 @@ import pytest from haystack.utils import Secret from haystack_integrations.components.embedders.nvidia import EmbeddingTruncateMode, NvidiaTextEmbedder -from haystack_integrations.components.embedders.nvidia.backend import EmbedderBackend - -class MockBackend(EmbedderBackend): - def __init__(self, model, model_kwargs): - super().__init__(model, model_kwargs) - - def embed(self, texts): - inputs = texts - data = [[0.1, 0.2, 0.3] for i in range(len(inputs))] - return data, {"usage": {"total_tokens": 4, "prompt_tokens": 4}} +from . import MockBackend class TestNvidiaTextEmbedder: @@ -22,7 +13,6 @@ def test_init_default(self, monkeypatch): embedder = NvidiaTextEmbedder() assert embedder.api_key == Secret.from_env_var("NVIDIA_API_KEY") - assert embedder.model == "NV-Embed-QA" assert embedder.api_url == "https://ai.api.nvidia.com/v1/retrieval/nvidia" assert embedder.prefix == "" assert embedder.suffix == "" @@ -106,13 +96,36 @@ def from_dict(self, monkeypatch): assert component.suffix == "suffix" assert component.truncate == "START" + @pytest.mark.usefixtures("mock_local_models") + def test_run_default_model(self): + api_key = Secret.from_token("fake-api-key") + embedder = NvidiaTextEmbedder(api_url="http://localhost:8080/v1", api_key=api_key) + + assert embedder.model is None + + with pytest.warns(UserWarning) as record: + embedder.warm_up() + + assert len(record) == 1 + assert "Default model is set as:" in str(record[0].message) + assert embedder.model == "model1" + + embedder.backend = MockBackend(model=embedder.model, api_key=api_key) + + result = embedder.run(text="The food was delicious") + + assert len(result["embedding"]) == 3 + assert all(isinstance(x, float) for x in result["embedding"]) + assert result["meta"] == { + "usage": {"prompt_tokens": 4, "total_tokens": 4}, + } + def test_run(self): - embedder = NvidiaTextEmbedder( - "playground_nvolveqa_40k", api_key=Secret.from_token("fake-api-key"), prefix="prefix ", suffix=" suffix" - ) + api_key = Secret.from_token("fake-api-key") + embedder = NvidiaTextEmbedder("playground_nvolveqa_40k", api_key=api_key, prefix="prefix ", suffix=" suffix") embedder.warm_up() - embedder.backend = MockBackend("aa", None) + embedder.backend = MockBackend(model="playground_nvolveqa_40k", api_key=api_key) result = embedder.run(text="The food was delicious") @@ -123,9 +136,10 @@ def test_run(self): } def test_run_wrong_input_format(self): - embedder = NvidiaTextEmbedder("playground_nvolveqa_40k", api_key=Secret.from_token("fake-api-key")) + api_key = Secret.from_token("fake-api-key") + embedder = NvidiaTextEmbedder("playground_nvolveqa_40k", api_key=api_key) embedder.warm_up() - embedder.backend = MockBackend("aa", None) + embedder.backend = MockBackend(model="playground_nvolveqa_40k", api_key=api_key) list_integers_input = [1, 2, 3] From 7d90a58f1e77e776d5682ccbb50c87d71eee99c4 Mon Sep 17 00:00:00 2001 From: tstadel <60758086+tstadel@users.noreply.github.com> Date: Mon, 12 Aug 2024 15:36:58 +0200 Subject: [PATCH 2/6] fix: support streaming_callback param in amazon bedrock generators (#927) * fix: support streaming_callback param in amazon bedrock generators * fix chat generator merge * reformat --------- Co-authored-by: Thomas Stadelmann --- .../generators/amazon_bedrock/adapters.py | 93 ++++----- .../amazon_bedrock/chat/adapters.py | 55 +++--- .../amazon_bedrock/chat/chat_generator.py | 53 +++--- .../generators/amazon_bedrock/generator.py | 91 ++++----- .../generators/amazon_bedrock/handlers.py | 33 ---- .../tests/test_chat_generator.py | 9 - .../amazon_bedrock/tests/test_generator.py | 179 ++++++++---------- 7 files changed, 217 insertions(+), 296 deletions(-) diff --git a/integrations/amazon_bedrock/src/haystack_integrations/components/generators/amazon_bedrock/adapters.py b/integrations/amazon_bedrock/src/haystack_integrations/components/generators/amazon_bedrock/adapters.py index 7c7fdd7ce..8b5c2b530 100644 --- a/integrations/amazon_bedrock/src/haystack_integrations/components/generators/amazon_bedrock/adapters.py +++ b/integrations/amazon_bedrock/src/haystack_integrations/components/generators/amazon_bedrock/adapters.py @@ -1,8 +1,8 @@ import json from abc import ABC, abstractmethod -from typing import Any, Dict, List, Optional +from typing import Any, Callable, Dict, List, Optional -from .handlers import TokenStreamingHandler +from haystack.dataclasses import StreamingChunk class BedrockModelAdapter(ABC): @@ -39,22 +39,24 @@ def get_responses(self, response_body: Dict[str, Any]) -> List[str]: responses = [completion.lstrip() for completion in completions] return responses - def get_stream_responses(self, stream, stream_handler: TokenStreamingHandler) -> List[str]: + def get_stream_responses(self, stream, streaming_callback: Callable[[StreamingChunk], None]) -> List[str]: """ Extracts the responses from the Amazon Bedrock streaming response. :param stream: The streaming response from the Amazon Bedrock request. - :param stream_handler: The handler for the streaming response. + :param streaming_callback: The handler for the streaming response. :returns: A list of string responses. """ - tokens: List[str] = [] + streaming_chunks: List[StreamingChunk] = [] for event in stream: chunk = event.get("chunk") if chunk: decoded_chunk = json.loads(chunk["bytes"].decode("utf-8")) - token = self._extract_token_from_stream(decoded_chunk) - tokens.append(stream_handler(token, event_data=decoded_chunk)) - responses = ["".join(tokens).lstrip()] + streaming_chunk: StreamingChunk = self._build_streaming_chunk(decoded_chunk) + streaming_chunks.append(streaming_chunk) + streaming_callback(streaming_chunk) + + responses = ["".join(streaming_chunk.content for streaming_chunk in streaming_chunks).lstrip()] return responses def _get_params(self, inference_kwargs: Dict[str, Any], default_params: Dict[str, Any]) -> Dict[str, Any]: @@ -84,12 +86,12 @@ def _extract_completions_from_response(self, response_body: Dict[str, Any]) -> L """ @abstractmethod - def _extract_token_from_stream(self, chunk: Dict[str, Any]) -> str: + def _build_streaming_chunk(self, chunk: Dict[str, Any]) -> StreamingChunk: """ - Extracts the token from a streaming chunk. + Extracts the content and meta from a streaming chunk. - :param chunk: The streaming chunk. - :returns: A string token. + :param chunk: The streaming chunk as dict. + :returns: A StreamingChunk object. """ @@ -150,17 +152,17 @@ def _extract_completions_from_response(self, response_body: Dict[str, Any]) -> L return [response_body["completion"]] - def _extract_token_from_stream(self, chunk: Dict[str, Any]) -> str: + def _build_streaming_chunk(self, chunk: Dict[str, Any]) -> StreamingChunk: """ - Extracts the token from a streaming chunk. + Extracts the content and meta from a streaming chunk. - :param chunk: The streaming chunk. - :returns: A string token. + :param chunk: The streaming chunk as dict. + :returns: A StreamingChunk object. """ if self.use_messages_api: - return chunk.get("delta", {}).get("text", "") + return StreamingChunk(content=chunk.get("delta", {}).get("text", ""), meta=chunk) - return chunk.get("completion", "") + return StreamingChunk(content=chunk.get("completion", ""), meta=chunk) class MistralAdapter(BedrockModelAdapter): @@ -199,17 +201,18 @@ def _extract_completions_from_response(self, response_body: Dict[str, Any]) -> L """ return [output.get("text", "") for output in response_body.get("outputs", [])] - def _extract_token_from_stream(self, chunk: Dict[str, Any]) -> str: + def _build_streaming_chunk(self, chunk: Dict[str, Any]) -> StreamingChunk: """ - Extracts the token from a streaming chunk. + Extracts the content and meta from a streaming chunk. - :param chunk: The streaming chunk. - :returns: A string token. + :param chunk: The streaming chunk as dict. + :returns: A StreamingChunk object. """ + content = "" chunk_list = chunk.get("outputs", []) if chunk_list: - return chunk_list[0].get("text", "") - return "" + content = chunk_list[0].get("text", "") + return StreamingChunk(content=content, meta=chunk) class CohereCommandAdapter(BedrockModelAdapter): @@ -254,14 +257,14 @@ def _extract_completions_from_response(self, response_body: Dict[str, Any]) -> L responses = [generation["text"] for generation in response_body["generations"]] return responses - def _extract_token_from_stream(self, chunk: Dict[str, Any]) -> str: + def _build_streaming_chunk(self, chunk: Dict[str, Any]) -> StreamingChunk: """ - Extracts the token from a streaming chunk. + Extracts the content and meta from a streaming chunk. - :param chunk: The streaming chunk. - :returns: A string token. + :param chunk: The streaming chunk as dict. + :returns: A StreamingChunk object. """ - return chunk.get("text", "") + return StreamingChunk(content=chunk.get("text", ""), meta=chunk) class CohereCommandRAdapter(BedrockModelAdapter): @@ -313,15 +316,15 @@ def _extract_completions_from_response(self, response_body: Dict[str, Any]) -> L responses = [response_body["text"]] return responses - def _extract_token_from_stream(self, chunk: Dict[str, Any]) -> str: + def _build_streaming_chunk(self, chunk: Dict[str, Any]) -> StreamingChunk: """ - Extracts the token from a streaming chunk. + Extracts the content and meta from a streaming chunk. - :param chunk: The streaming chunk. - :returns: A string token. + :param chunk: The streaming chunk as dict. + :returns: A StreamingChunk object. """ token: str = chunk.get("text", "") - return token + return StreamingChunk(content=token, meta=chunk) class AI21LabsJurassic2Adapter(BedrockModelAdapter): @@ -357,7 +360,7 @@ def _extract_completions_from_response(self, response_body: Dict[str, Any]) -> L responses = [completion["data"]["text"] for completion in response_body["completions"]] return responses - def _extract_token_from_stream(self, chunk: Dict[str, Any]) -> str: + def _build_streaming_chunk(self, chunk: Dict[str, Any]) -> StreamingChunk: msg = "Streaming is not supported for AI21 Jurassic 2 models." raise NotImplementedError(msg) @@ -398,14 +401,14 @@ def _extract_completions_from_response(self, response_body: Dict[str, Any]) -> L responses = [result["outputText"] for result in response_body["results"]] return responses - def _extract_token_from_stream(self, chunk: Dict[str, Any]) -> str: + def _build_streaming_chunk(self, chunk: Dict[str, Any]) -> StreamingChunk: """ - Extracts the token from a streaming chunk. + Extracts the content and meta from a streaming chunk. - :param chunk: The streaming chunk. - :returns: A string token. + :param chunk: The streaming chunk as dict. + :returns: A StreamingChunk object. """ - return chunk.get("outputText", "") + return StreamingChunk(content=chunk.get("outputText", ""), meta=chunk) class MetaLlamaAdapter(BedrockModelAdapter): @@ -442,11 +445,11 @@ def _extract_completions_from_response(self, response_body: Dict[str, Any]) -> L """ return [response_body["generation"]] - def _extract_token_from_stream(self, chunk: Dict[str, Any]) -> str: + def _build_streaming_chunk(self, chunk: Dict[str, Any]) -> StreamingChunk: """ - Extracts the token from a streaming chunk. + Extracts the content and meta from a streaming chunk. - :param chunk: The streaming chunk. - :returns: A string token. + :param chunk: The streaming chunk as dict. + :returns: A StreamingChunk object. """ - return chunk.get("generation", "") + return StreamingChunk(content=chunk.get("generation", ""), meta=chunk) diff --git a/integrations/amazon_bedrock/src/haystack_integrations/components/generators/amazon_bedrock/chat/adapters.py b/integrations/amazon_bedrock/src/haystack_integrations/components/generators/amazon_bedrock/chat/adapters.py index 162100934..67e833f73 100644 --- a/integrations/amazon_bedrock/src/haystack_integrations/components/generators/amazon_bedrock/chat/adapters.py +++ b/integrations/amazon_bedrock/src/haystack_integrations/components/generators/amazon_bedrock/chat/adapters.py @@ -48,19 +48,18 @@ def get_responses(self, response_body: Dict[str, Any]) -> List[ChatMessage]: return self._extract_messages_from_response(response_body) def get_stream_responses( - self, stream: EventStream, stream_handler: Callable[[StreamingChunk], None] + self, stream: EventStream, streaming_callback: Callable[[StreamingChunk], None] ) -> List[ChatMessage]: - tokens: List[str] = [] + streaming_chunks: List[StreamingChunk] = [] last_decoded_chunk: Dict[str, Any] = {} for event in stream: chunk = event.get("chunk") if chunk: last_decoded_chunk = json.loads(chunk["bytes"].decode("utf-8")) - token = self._extract_token_from_stream(last_decoded_chunk) - stream_chunk = StreamingChunk(content=token) # don't extract meta, we care about tokens only - stream_handler(stream_chunk) # callback the stream handler with StreamingChunk - tokens.append(token) - responses = ["".join(tokens).lstrip()] + streaming_chunk = self._build_streaming_chunk(last_decoded_chunk) + streaming_callback(streaming_chunk) # callback the stream handler with StreamingChunk + streaming_chunks.append(streaming_chunk) + responses = ["".join(chunk.content for chunk in streaming_chunks).lstrip()] return [ChatMessage.from_assistant(response, meta=last_decoded_chunk) for response in responses] @staticmethod @@ -142,12 +141,12 @@ def _extract_messages_from_response(self, response_body: Dict[str, Any]) -> List """ @abstractmethod - def _extract_token_from_stream(self, chunk: Dict[str, Any]) -> str: + def _build_streaming_chunk(self, chunk: Dict[str, Any]) -> StreamingChunk: """ - Extracts the token from a streaming chunk. + Extracts the content and meta from a streaming chunk. - :param chunk: The streaming chunk. - :returns: The extracted token. + :param chunk: The streaming chunk as dict. + :returns: A StreamingChunk object. """ @@ -252,16 +251,16 @@ def _extract_messages_from_response(self, response_body: Dict[str, Any]) -> List messages.append(ChatMessage.from_assistant(content["text"], meta=meta)) return messages - def _extract_token_from_stream(self, chunk: Dict[str, Any]) -> str: + def _build_streaming_chunk(self, chunk: Dict[str, Any]) -> StreamingChunk: """ - Extracts the token from a streaming chunk. + Extracts the content and meta from a streaming chunk. - :param chunk: The streaming chunk. - :returns: The extracted token. + :param chunk: The streaming chunk as dict. + :returns: A StreamingChunk object. """ if chunk.get("type") == "content_block_delta" and chunk.get("delta", {}).get("type") == "text_delta": - return chunk.get("delta", {}).get("text", "") - return "" + return StreamingChunk(content=chunk.get("delta", {}).get("text", ""), meta=chunk) + return StreamingChunk(content="", meta=chunk) def _to_anthropic_message(self, m: ChatMessage) -> Dict[str, Any]: """ @@ -425,17 +424,17 @@ def _extract_messages_from_response(self, response_body: Dict[str, Any]) -> List messages.append(ChatMessage.from_assistant(response["text"], meta=meta)) return messages - def _extract_token_from_stream(self, chunk: Dict[str, Any]) -> str: + def _build_streaming_chunk(self, chunk: Dict[str, Any]) -> StreamingChunk: """ - Extracts the token from a streaming chunk. + Extracts the content and meta from a streaming chunk. - :param chunk: The streaming chunk. - :returns: The extracted token. + :param chunk: The streaming chunk as dict. + :returns: A StreamingChunk object. """ response_chunk = chunk.get("outputs", []) if response_chunk: - return response_chunk[0].get("text", "") - return "" + return StreamingChunk(content=response_chunk[0].get("text", ""), meta=chunk) + return StreamingChunk(content="", meta=chunk) class MetaLlama2ChatAdapter(BedrockModelChatAdapter): @@ -543,11 +542,11 @@ def _extract_messages_from_response(self, response_body: Dict[str, Any]) -> List metadata = {k: v for (k, v) in response_body.items() if k != message_tag} return [ChatMessage.from_assistant(response_body[message_tag], meta=metadata)] - def _extract_token_from_stream(self, chunk: Dict[str, Any]) -> str: + def _build_streaming_chunk(self, chunk: Dict[str, Any]) -> StreamingChunk: """ - Extracts the token from a streaming chunk. + Extracts the content and meta from a streaming chunk. - :param chunk: The streaming chunk. - :returns: The extracted token. + :param chunk: The streaming chunk as dict. + :returns: A StreamingChunk object. """ - return chunk.get("generation", "") + return StreamingChunk(content=chunk.get("generation", ""), meta=chunk) diff --git a/integrations/amazon_bedrock/src/haystack_integrations/components/generators/amazon_bedrock/chat/chat_generator.py b/integrations/amazon_bedrock/src/haystack_integrations/components/generators/amazon_bedrock/chat/chat_generator.py index 7485a96c5..206cb0b9a 100644 --- a/integrations/amazon_bedrock/src/haystack_integrations/components/generators/amazon_bedrock/chat/chat_generator.py +++ b/integrations/amazon_bedrock/src/haystack_integrations/components/generators/amazon_bedrock/chat/chat_generator.py @@ -156,18 +156,29 @@ def resolve_secret(secret: Optional[Secret]) -> Optional[str]: stacklevel=2, ) - def invoke(self, *args, **kwargs): + @component.output_types(replies=List[ChatMessage]) + def run( + self, + messages: List[ChatMessage], + streaming_callback: Optional[Callable[[StreamingChunk], None]] = None, + generation_kwargs: Optional[Dict[str, Any]] = None, + ): """ - Invokes the Amazon Bedrock LLM with the given parameters. The parameters are passed to the Amazon Bedrock - client. + Generates a list of `ChatMessage` response to the given messages using the Amazon Bedrock LLM. - :param args: The positional arguments passed to the generator. - :param kwargs: The keyword arguments passed to the generator. - :returns: List of `ChatMessage` generated by LLM. + :param messages: The messages to generate a response to. + :param streaming_callback: + A callback function that is called when a new token is received from the stream. + :param generation_kwargs: Additional generation keyword arguments passed to the model. + :returns: A dictionary with the following keys: + - `replies`: The generated List of `ChatMessage` objects. """ + generation_kwargs = generation_kwargs or {} + generation_kwargs = generation_kwargs.copy() + + streaming_callback = streaming_callback or self.streaming_callback + generation_kwargs["stream"] = streaming_callback is not None - kwargs = kwargs.copy() - messages: List[ChatMessage] = kwargs.pop("messages", []) # check if the prompt is a list of ChatMessage objects if not ( isinstance(messages, list) @@ -177,39 +188,29 @@ def invoke(self, *args, **kwargs): msg = f"The model {self.model} requires a list of ChatMessage objects as a prompt." raise ValueError(msg) - body = self.model_adapter.prepare_body(messages=messages, **{"stop_words": self.stop_words, **kwargs}) + body = self.model_adapter.prepare_body( + messages=messages, **{"stop_words": self.stop_words, **generation_kwargs} + ) try: - if self.streaming_callback: + if streaming_callback: response = self.client.invoke_model_with_response_stream( body=json.dumps(body), modelId=self.model, accept="application/json", contentType="application/json" ) response_stream = response["body"] - responses = self.model_adapter.get_stream_responses( - stream=response_stream, stream_handler=self.streaming_callback + replies = self.model_adapter.get_stream_responses( + stream=response_stream, streaming_callback=streaming_callback ) else: response = self.client.invoke_model( body=json.dumps(body), modelId=self.model, accept="application/json", contentType="application/json" ) response_body = json.loads(response.get("body").read().decode("utf-8")) - responses = self.model_adapter.get_responses(response_body=response_body) + replies = self.model_adapter.get_responses(response_body=response_body) except ClientError as exception: msg = f"Could not inference Amazon Bedrock model {self.model} due: {exception}" raise AmazonBedrockInferenceError(msg) from exception - return responses - - @component.output_types(replies=List[ChatMessage]) - def run(self, messages: List[ChatMessage], generation_kwargs: Optional[Dict[str, Any]] = None): - """ - Generates a list of `ChatMessage` responses to the given messages using the Amazon Bedrock LLM. - - :param messages: The messages to generate a response to. - :param generation_kwargs: Additional generation keyword arguments passed to the model. - :returns: A dictionary with the following keys: - - `replies`: The generated list of `ChatMessage` objects. - """ - return {"replies": self.invoke(messages=messages, **(generation_kwargs or {}))} + return {"replies": replies} @classmethod def get_model_adapter(cls, model: str) -> Optional[Type[BedrockModelChatAdapter]]: diff --git a/integrations/amazon_bedrock/src/haystack_integrations/components/generators/amazon_bedrock/generator.py b/integrations/amazon_bedrock/src/haystack_integrations/components/generators/amazon_bedrock/generator.py index b15000aa2..6ef0a4765 100644 --- a/integrations/amazon_bedrock/src/haystack_integrations/components/generators/amazon_bedrock/generator.py +++ b/integrations/amazon_bedrock/src/haystack_integrations/components/generators/amazon_bedrock/generator.py @@ -1,11 +1,12 @@ import json import logging import re -from typing import Any, ClassVar, Dict, List, Optional, Type, Union +from typing import Any, Callable, ClassVar, Dict, List, Optional, Type from botocore.exceptions import ClientError from haystack import component, default_from_dict, default_to_dict -from haystack.utils.auth import Secret, deserialize_secrets_inplace +from haystack.dataclasses import StreamingChunk +from haystack.utils import Secret, deserialize_callable, deserialize_secrets_inplace, serialize_callable from haystack_integrations.common.amazon_bedrock.errors import ( AmazonBedrockConfigurationError, @@ -25,8 +26,6 @@ ) from .handlers import ( DefaultPromptHandler, - DefaultTokenStreamingHandler, - TokenStreamingHandler, ) logger = logging.getLogger(__name__) @@ -87,6 +86,7 @@ def __init__( aws_profile_name: Optional[Secret] = Secret.from_env_var("AWS_PROFILE", strict=False), # noqa: B008 max_length: Optional[int] = 100, truncate: Optional[bool] = True, + streaming_callback: Optional[Callable[[StreamingChunk], None]] = None, **kwargs, ): """ @@ -100,6 +100,8 @@ def __init__( :param aws_profile_name: The AWS profile name. :param max_length: The maximum length of the generated text. :param truncate: Whether to truncate the prompt or not. + :param streaming_callback: A callback function that is called when a new token is received from the stream. + The callback function accepts StreamingChunk as an argument. :param kwargs: Additional keyword arguments to be passed to the model. These arguments are specific to the model. You can find them in the model's documentation. :raises ValueError: If the model name is empty or None. @@ -117,6 +119,7 @@ def __init__( self.aws_session_token = aws_session_token self.aws_region_name = aws_region_name self.aws_profile_name = aws_profile_name + self.streaming_callback = streaming_callback self.kwargs = kwargs def resolve_secret(secret: Optional[Secret]) -> Optional[str]: @@ -158,7 +161,7 @@ def resolve_secret(secret: Optional[Secret]) -> Optional[str]: raise AmazonBedrockConfigurationError(msg) self.model_adapter = model_adapter_cls(model_kwargs=model_input_kwargs, max_length=self.max_length) - def _ensure_token_limit(self, prompt: Union[str, List[Dict[str, str]]]) -> Union[str, List[Dict[str, str]]]: + def _ensure_token_limit(self, prompt: str) -> str: """ Ensures that the prompt and answer token lengths together are within the model_max_length specified during the initialization of the component. @@ -166,14 +169,6 @@ def _ensure_token_limit(self, prompt: Union[str, List[Dict[str, str]]]) -> Union :param prompt: The prompt to be sent to the model. :returns: The resized prompt. """ - # the prompt for this model will be of the type str - if isinstance(prompt, List): - msg = ( - "AmazonBedrockGenerator only supports a string as a prompt, " - "while currently, the prompt is of type List." - ) - raise ValueError(msg) - resize_info = self.prompt_handler(prompt) if resize_info["prompt_length"] != resize_info["new_prompt_length"]: logger.warning( @@ -187,31 +182,36 @@ def _ensure_token_limit(self, prompt: Union[str, List[Dict[str, str]]]) -> Union ) return str(resize_info["resized_prompt"]) - def invoke(self, *args, **kwargs): + @component.output_types(replies=List[str]) + def run( + self, + prompt: str, + streaming_callback: Optional[Callable[[StreamingChunk], None]] = None, + generation_kwargs: Optional[Dict[str, Any]] = None, + ): """ - Invokes the model with the given prompt. + Generates a list of string response to the given prompt. - :param args: Additional positional arguments passed to the generator. - :param kwargs: Additional keyword arguments passed to the generator. - :returns: A list of generated responses (strings). + :param prompt: The prompt to generate a response for. + :param streaming_callback: + A callback function that is called when a new token is received from the stream. + :param generation_kwargs: Additional keyword arguments passed to the generator. + :returns: A dictionary with the following keys: + - `replies`: A list of generated responses. + :raises ValueError: If the prompt is empty or None. + :raises AmazonBedrockInferenceError: If the model cannot be invoked. """ - kwargs = kwargs.copy() - prompt: str = kwargs.pop("prompt", None) - stream: bool = kwargs.get("stream", self.model_adapter.model_kwargs.get("stream", False)) - - if not prompt or not isinstance(prompt, (str, list)): - msg = ( - f"The model {self.model} requires a valid prompt, but currently, it has no prompt. " - f"Make sure to provide a prompt in the format that the model expects." - ) - raise ValueError(msg) + generation_kwargs = generation_kwargs or {} + generation_kwargs = generation_kwargs.copy() + streaming_callback = streaming_callback or self.streaming_callback + generation_kwargs["stream"] = streaming_callback is not None if self.truncate: prompt = self._ensure_token_limit(prompt) - body = self.model_adapter.prepare_body(prompt=prompt, **kwargs) + body = self.model_adapter.prepare_body(prompt=prompt, **generation_kwargs) try: - if stream: + if streaming_callback: response = self.client.invoke_model_with_response_stream( body=json.dumps(body), modelId=self.model, @@ -219,11 +219,9 @@ def invoke(self, *args, **kwargs): contentType="application/json", ) response_stream = response["body"] - handler: TokenStreamingHandler = kwargs.get( - "stream_handler", - self.model_adapter.model_kwargs.get("stream_handler", DefaultTokenStreamingHandler()), + replies = self.model_adapter.get_stream_responses( + stream=response_stream, streaming_callback=streaming_callback ) - responses = self.model_adapter.get_stream_responses(stream=response_stream, stream_handler=handler) else: response = self.client.invoke_model( body=json.dumps(body), @@ -232,7 +230,7 @@ def invoke(self, *args, **kwargs): contentType="application/json", ) response_body = json.loads(response.get("body").read().decode("utf-8")) - responses = self.model_adapter.get_responses(response_body=response_body) + replies = self.model_adapter.get_responses(response_body=response_body) except ClientError as exception: msg = ( f"Could not connect to Amazon Bedrock model {self.model}. " @@ -241,22 +239,7 @@ def invoke(self, *args, **kwargs): ) raise AmazonBedrockInferenceError(msg) from exception - return responses - - @component.output_types(replies=List[str]) - def run(self, prompt: str, generation_kwargs: Optional[Dict[str, Any]] = None): - """ - Generates a list of string response to the given prompt. - - :param prompt: Instructions for the model. - :param generation_kwargs: Additional keyword arguments to customize text generation. - These arguments are specific to the model. You can find them in the model's documentation. - :returns: A dictionary with the following keys: - - `replies`: A list of generated responses. - :raises ValueError: If the prompt is empty or None. - :raises AmazonBedrockInferenceError: If the model cannot be invoked. - """ - return {"replies": self.invoke(prompt=prompt, **(generation_kwargs or {}))} + return {"replies": replies} @classmethod def get_model_adapter(cls, model: str) -> Optional[Type[BedrockModelAdapter]]: @@ -278,6 +261,7 @@ def to_dict(self) -> Dict[str, Any]: :returns: Dictionary with serialized data. """ + callback_name = serialize_callable(self.streaming_callback) if self.streaming_callback else None return default_to_dict( self, aws_access_key_id=self.aws_access_key_id.to_dict() if self.aws_access_key_id else None, @@ -288,6 +272,7 @@ def to_dict(self) -> Dict[str, Any]: model=self.model, max_length=self.max_length, truncate=self.truncate, + streaming_callback=callback_name, **self.kwargs, ) @@ -305,4 +290,8 @@ def from_dict(cls, data: Dict[str, Any]) -> "AmazonBedrockGenerator": data["init_parameters"], ["aws_access_key_id", "aws_secret_access_key", "aws_session_token", "aws_region_name", "aws_profile_name"], ) + init_params = data.get("init_parameters", {}) + serialized_callback_handler = init_params.get("streaming_callback") + if serialized_callback_handler: + data["init_parameters"]["streaming_callback"] = deserialize_callable(serialized_callback_handler) return default_from_dict(cls, data) diff --git a/integrations/amazon_bedrock/src/haystack_integrations/components/generators/amazon_bedrock/handlers.py b/integrations/amazon_bedrock/src/haystack_integrations/components/generators/amazon_bedrock/handlers.py index f4dc1aa4f..07db2742f 100644 --- a/integrations/amazon_bedrock/src/haystack_integrations/components/generators/amazon_bedrock/handlers.py +++ b/integrations/amazon_bedrock/src/haystack_integrations/components/generators/amazon_bedrock/handlers.py @@ -1,4 +1,3 @@ -from abc import ABC, abstractmethod from typing import Dict, Union from transformers import AutoTokenizer, PreTrainedTokenizer, PreTrainedTokenizerBase, PreTrainedTokenizerFast @@ -61,35 +60,3 @@ def __call__(self, prompt: str, **kwargs) -> Dict[str, Union[str, int]]: "model_max_length": self.model_max_length, "max_length": self.max_length, } - - -class TokenStreamingHandler(ABC): - """ - TokenStreamingHandler implementations handle the streaming of tokens from the stream. - """ - - DONE_MARKER = "[DONE]" - - @abstractmethod - def __call__(self, token_received: str, **kwargs) -> str: - """ - This callback method is called when a new token is received from the stream. - - :param token_received: The token received from the stream. - :param kwargs: Additional keyword arguments passed to the handler. - :returns: The token to be sent to the stream. - """ - pass - - -class DefaultTokenStreamingHandler(TokenStreamingHandler): - def __call__(self, token_received, **kwargs) -> str: - """ - This callback method is called when a new token is received from the stream. - - :param token_received: The token received from the stream. - :param kwargs: Additional keyword arguments passed to the handler. - :returns: The token to be sent to the stream. - """ - print(token_received, flush=True, end="") # noqa: T201 - return token_received diff --git a/integrations/amazon_bedrock/tests/test_chat_generator.py b/integrations/amazon_bedrock/tests/test_chat_generator.py index 3e62b56ea..79a04d52b 100644 --- a/integrations/amazon_bedrock/tests/test_chat_generator.py +++ b/integrations/amazon_bedrock/tests/test_chat_generator.py @@ -121,15 +121,6 @@ def test_constructor_with_empty_model(): AmazonBedrockChatGenerator(model="") -def test_invoke_with_no_kwargs(mock_boto3_session): - """ - Test invoke raises an error if no messages are provided - """ - layer = AmazonBedrockChatGenerator(model="anthropic.claude-v2") - with pytest.raises(ValueError, match="The model anthropic.claude-v2 requires"): - layer.invoke() - - @pytest.mark.parametrize( "model, expected_model_adapter", [ diff --git a/integrations/amazon_bedrock/tests/test_generator.py b/integrations/amazon_bedrock/tests/test_generator.py index 65463caae..f0233888c 100644 --- a/integrations/amazon_bedrock/tests/test_generator.py +++ b/integrations/amazon_bedrock/tests/test_generator.py @@ -2,6 +2,7 @@ from unittest.mock import MagicMock, call, patch import pytest +from haystack.dataclasses import StreamingChunk from haystack_integrations.components.generators.amazon_bedrock import AmazonBedrockGenerator from haystack_integrations.components.generators.amazon_bedrock.adapters import ( @@ -34,6 +35,7 @@ def test_to_dict(mock_boto3_session): "max_length": 99, "truncate": False, "temperature": 10, + "streaming_callback": None, }, } @@ -120,15 +122,6 @@ def test_constructor_with_empty_model(): AmazonBedrockGenerator(model="") -def test_invoke_with_no_kwargs(mock_boto3_session): - """ - Test invoke raises an error if no prompt is provided - """ - layer = AmazonBedrockGenerator(model="anthropic.claude-v2") - with pytest.raises(ValueError, match="The model anthropic.claude-v2 requires a valid prompt."): - layer.invoke() - - def test_short_prompt_is_not_truncated(mock_boto3_session): """ Test that a short prompt is not truncated @@ -224,13 +217,13 @@ def test_long_prompt_is_not_truncated_when_truncate_false(mock_boto3_session): generator.model_adapter.get_responses = MagicMock(return_value=["response"]) # Invoke the generator - generator.invoke(prompt=long_prompt_text) + generator.run(prompt=long_prompt_text) # Ensure _ensure_token_limit was not called mock_ensure_token_limit.assert_not_called(), # Check the prompt passed to prepare_body - generator.model_adapter.prepare_body.assert_called_with(prompt=long_prompt_text) + generator.model_adapter.prepare_body.assert_called_with(prompt=long_prompt_text, stream=False) @pytest.mark.parametrize( @@ -407,7 +400,7 @@ def test_get_responses_leading_whitespace(self) -> None: def test_get_stream_responses(self) -> None: stream_mock = MagicMock() - stream_handler_mock = MagicMock() + streaming_callback_mock = MagicMock() stream_mock.__iter__.return_value = [ {"chunk": {"bytes": b'{"delta": {"text": " This"}}'}}, @@ -417,35 +410,31 @@ def test_get_stream_responses(self) -> None: {"chunk": {"bytes": b'{"delta": {"text": " response."}}'}}, ] - stream_handler_mock.side_effect = lambda token_received, **kwargs: token_received - adapter = AnthropicClaudeAdapter(model_kwargs={}, max_length=99) expected_responses = ["This is a single response."] - assert adapter.get_stream_responses(stream_mock, stream_handler_mock) == expected_responses + assert adapter.get_stream_responses(stream_mock, streaming_callback_mock) == expected_responses - stream_handler_mock.assert_has_calls( + streaming_callback_mock.assert_has_calls( [ - call(" This", event_data={"delta": {"text": " This"}}), - call(" is", event_data={"delta": {"text": " is"}}), - call(" a", event_data={"delta": {"text": " a"}}), - call(" single", event_data={"delta": {"text": " single"}}), - call(" response.", event_data={"delta": {"text": " response."}}), + call(StreamingChunk(content=" This", meta={"delta": {"text": " This"}})), + call(StreamingChunk(content=" is", meta={"delta": {"text": " is"}})), + call(StreamingChunk(content=" a", meta={"delta": {"text": " a"}})), + call(StreamingChunk(content=" single", meta={"delta": {"text": " single"}})), + call(StreamingChunk(content=" response.", meta={"delta": {"text": " response."}})), ] ) def test_get_stream_responses_empty(self) -> None: stream_mock = MagicMock() - stream_handler_mock = MagicMock() + streaming_callback_mock = MagicMock() stream_mock.__iter__.return_value = [] - stream_handler_mock.side_effect = lambda token_received, **kwargs: token_received - adapter = AnthropicClaudeAdapter(model_kwargs={}, max_length=99) expected_responses = [""] - assert adapter.get_stream_responses(stream_mock, stream_handler_mock) == expected_responses + assert adapter.get_stream_responses(stream_mock, streaming_callback_mock) == expected_responses - stream_handler_mock.assert_not_called() + streaming_callback_mock.assert_not_called() class TestAnthropicClaudeAdapterNoMessagesAPI: @@ -553,7 +542,7 @@ def test_get_responses_leading_whitespace(self) -> None: def test_get_stream_responses(self) -> None: stream_mock = MagicMock() - stream_handler_mock = MagicMock() + streaming_callback_mock = MagicMock() stream_mock.__iter__.return_value = [ {"chunk": {"bytes": b'{"completion": " This"}'}}, @@ -563,35 +552,31 @@ def test_get_stream_responses(self) -> None: {"chunk": {"bytes": b'{"completion": " response."}'}}, ] - stream_handler_mock.side_effect = lambda token_received, **kwargs: token_received - adapter = AnthropicClaudeAdapter(model_kwargs={"use_messages_api": False}, max_length=99) expected_responses = ["This is a single response."] - assert adapter.get_stream_responses(stream_mock, stream_handler_mock) == expected_responses + assert adapter.get_stream_responses(stream_mock, streaming_callback_mock) == expected_responses - stream_handler_mock.assert_has_calls( + streaming_callback_mock.assert_has_calls( [ - call(" This", event_data={"completion": " This"}), - call(" is", event_data={"completion": " is"}), - call(" a", event_data={"completion": " a"}), - call(" single", event_data={"completion": " single"}), - call(" response.", event_data={"completion": " response."}), + call(StreamingChunk(content=" This", meta={"completion": " This"})), + call(StreamingChunk(content=" is", meta={"completion": " is"})), + call(StreamingChunk(content=" a", meta={"completion": " a"})), + call(StreamingChunk(content=" single", meta={"completion": " single"})), + call(StreamingChunk(content=" response.", meta={"completion": " response."})), ] ) def test_get_stream_responses_empty(self) -> None: stream_mock = MagicMock() - stream_handler_mock = MagicMock() + streaming_callback_mock = MagicMock() stream_mock.__iter__.return_value = [] - stream_handler_mock.side_effect = lambda token_received, **kwargs: token_received - adapter = AnthropicClaudeAdapter(model_kwargs={"use_messages_api": False}, max_length=99) expected_responses = [""] - assert adapter.get_stream_responses(stream_mock, stream_handler_mock) == expected_responses + assert adapter.get_stream_responses(stream_mock, streaming_callback_mock) == expected_responses - stream_handler_mock.assert_not_called() + streaming_callback_mock.assert_not_called() class TestMistralAdapter: @@ -686,7 +671,7 @@ def test_get_responses(self) -> None: def test_get_stream_responses(self) -> None: stream_mock = MagicMock() - stream_handler_mock = MagicMock() + streaming_callback_mock = MagicMock() stream_mock.__iter__.return_value = [ {"chunk": {"bytes": b'{"outputs": [{"text": " This"}]}'}}, @@ -696,35 +681,33 @@ def test_get_stream_responses(self) -> None: {"chunk": {"bytes": b'{"outputs": [{"text": " response."}]}'}}, ] - stream_handler_mock.side_effect = lambda token_received, **kwargs: token_received - adapter = MistralAdapter(model_kwargs={}, max_length=99) expected_responses = ["This is a single response."] - assert adapter.get_stream_responses(stream_mock, stream_handler_mock) == expected_responses + assert adapter.get_stream_responses(stream_mock, streaming_callback_mock) == expected_responses - stream_handler_mock.assert_has_calls( + streaming_callback_mock.assert_has_calls( [ - call(" This", event_data={"outputs": [{"text": " This"}]}), - call(" is", event_data={"outputs": [{"text": " is"}]}), - call(" a", event_data={"outputs": [{"text": " a"}]}), - call(" single", event_data={"outputs": [{"text": " single"}]}), - call(" response.", event_data={"outputs": [{"text": " response."}]}), + call(StreamingChunk(content=" This", meta={"outputs": [{"text": " This"}]})), + call(StreamingChunk(content=" is", meta={"outputs": [{"text": " is"}]})), + call(StreamingChunk(content=" a", meta={"outputs": [{"text": " a"}]})), + call(StreamingChunk(content=" single", meta={"outputs": [{"text": " single"}]})), + call(StreamingChunk(content=" response.", meta={"outputs": [{"text": " response."}]})), ] ) def test_get_stream_responses_empty(self) -> None: stream_mock = MagicMock() - stream_handler_mock = MagicMock() + streaming_callback_mock = MagicMock() stream_mock.__iter__.return_value = [] - stream_handler_mock.side_effect = lambda token_received, **kwargs: token_received + streaming_callback_mock.side_effect = lambda token_received, **kwargs: token_received adapter = MistralAdapter(model_kwargs={}, max_length=99) expected_responses = [""] - assert adapter.get_stream_responses(stream_mock, stream_handler_mock) == expected_responses + assert adapter.get_stream_responses(stream_mock, streaming_callback_mock) == expected_responses - stream_handler_mock.assert_not_called() + streaming_callback_mock.assert_not_called() class TestCohereCommandAdapter: @@ -881,7 +864,7 @@ def test_get_responses_multiple_responses(self) -> None: def test_get_stream_responses(self) -> None: stream_mock = MagicMock() - stream_handler_mock = MagicMock() + streaming_callback_mock = MagicMock() stream_mock.__iter__.return_value = [ {"chunk": {"bytes": b'{"text": " This"}'}}, @@ -892,36 +875,32 @@ def test_get_stream_responses(self) -> None: {"chunk": {"bytes": b'{"finish_reason": "MAX_TOKENS", "is_finished": true}'}}, ] - stream_handler_mock.side_effect = lambda token_received, **kwargs: token_received - adapter = CohereCommandAdapter(model_kwargs={}, max_length=99) expected_responses = ["This is a single response."] - assert adapter.get_stream_responses(stream_mock, stream_handler_mock) == expected_responses + assert adapter.get_stream_responses(stream_mock, streaming_callback_mock) == expected_responses - stream_handler_mock.assert_has_calls( + streaming_callback_mock.assert_has_calls( [ - call(" This", event_data={"text": " This"}), - call(" is", event_data={"text": " is"}), - call(" a", event_data={"text": " a"}), - call(" single", event_data={"text": " single"}), - call(" response.", event_data={"text": " response."}), - call("", event_data={"finish_reason": "MAX_TOKENS", "is_finished": True}), + call(StreamingChunk(content=" This", meta={"text": " This"})), + call(StreamingChunk(content=" is", meta={"text": " is"})), + call(StreamingChunk(content=" a", meta={"text": " a"})), + call(StreamingChunk(content=" single", meta={"text": " single"})), + call(StreamingChunk(content=" response.", meta={"text": " response."})), + call(StreamingChunk(content="", meta={"finish_reason": "MAX_TOKENS", "is_finished": True})), ] ) def test_get_stream_responses_empty(self) -> None: stream_mock = MagicMock() - stream_handler_mock = MagicMock() + streaming_callback_mock = MagicMock() stream_mock.__iter__.return_value = [] - stream_handler_mock.side_effect = lambda token_received, **kwargs: token_received - adapter = CohereCommandAdapter(model_kwargs={}, max_length=99) expected_responses = [""] - assert adapter.get_stream_responses(stream_mock, stream_handler_mock) == expected_responses + assert adapter.get_stream_responses(stream_mock, streaming_callback_mock) == expected_responses - stream_handler_mock.assert_not_called() + streaming_callback_mock.assert_not_called() class TestCohereCommandRAdapter: @@ -1025,11 +1004,11 @@ def test_extract_completions_from_response(self) -> None: completions = adapter._extract_completions_from_response(response_body=response_body) assert completions == ["response"] - def test_extract_token_from_stream(self) -> None: + def test_build_chunk(self) -> None: adapter = CohereCommandRAdapter(model_kwargs={}, max_length=100) chunk = {"text": "response_token"} - token = adapter._extract_token_from_stream(chunk=chunk) - assert token == "response_token" + streaming_chunk = adapter._build_streaming_chunk(chunk=chunk) + assert streaming_chunk == StreamingChunk(content="response_token", meta=chunk) class TestAI21LabsJurassic2Adapter: @@ -1288,7 +1267,7 @@ def test_get_responses_multiple_responses(self) -> None: def test_get_stream_responses(self) -> None: stream_mock = MagicMock() - stream_handler_mock = MagicMock() + streaming_callback_mock = MagicMock() stream_mock.__iter__.return_value = [ {"chunk": {"bytes": b'{"outputText": " This"}'}}, @@ -1298,35 +1277,31 @@ def test_get_stream_responses(self) -> None: {"chunk": {"bytes": b'{"outputText": " response."}'}}, ] - stream_handler_mock.side_effect = lambda token_received, **kwargs: token_received - adapter = AmazonTitanAdapter(model_kwargs={}, max_length=99) expected_responses = ["This is a single response."] - assert adapter.get_stream_responses(stream_mock, stream_handler_mock) == expected_responses + assert adapter.get_stream_responses(stream_mock, streaming_callback_mock) == expected_responses - stream_handler_mock.assert_has_calls( + streaming_callback_mock.assert_has_calls( [ - call(" This", event_data={"outputText": " This"}), - call(" is", event_data={"outputText": " is"}), - call(" a", event_data={"outputText": " a"}), - call(" single", event_data={"outputText": " single"}), - call(" response.", event_data={"outputText": " response."}), + call(StreamingChunk(content=" This", meta={"outputText": " This"})), + call(StreamingChunk(content=" is", meta={"outputText": " is"})), + call(StreamingChunk(content=" a", meta={"outputText": " a"})), + call(StreamingChunk(content=" single", meta={"outputText": " single"})), + call(StreamingChunk(content=" response.", meta={"outputText": " response."})), ] ) def test_get_stream_responses_empty(self) -> None: stream_mock = MagicMock() - stream_handler_mock = MagicMock() + streaming_callback_mock = MagicMock() stream_mock.__iter__.return_value = [] - stream_handler_mock.side_effect = lambda token_received, **kwargs: token_received - adapter = AmazonTitanAdapter(model_kwargs={}, max_length=99) expected_responses = [""] - assert adapter.get_stream_responses(stream_mock, stream_handler_mock) == expected_responses + assert adapter.get_stream_responses(stream_mock, streaming_callback_mock) == expected_responses - stream_handler_mock.assert_not_called() + streaming_callback_mock.assert_not_called() class TestMetaLlamaAdapter: @@ -1417,7 +1392,7 @@ def test_get_responses_leading_whitespace(self) -> None: def test_get_stream_responses(self) -> None: stream_mock = MagicMock() - stream_handler_mock = MagicMock() + streaming_callback_mock = MagicMock() stream_mock.__iter__.return_value = [ {"chunk": {"bytes": b'{"generation": " This"}'}}, @@ -1427,32 +1402,28 @@ def test_get_stream_responses(self) -> None: {"chunk": {"bytes": b'{"generation": " response."}'}}, ] - stream_handler_mock.side_effect = lambda token_received, **kwargs: token_received - adapter = MetaLlamaAdapter(model_kwargs={}, max_length=99) expected_responses = ["This is a single response."] - assert adapter.get_stream_responses(stream_mock, stream_handler_mock) == expected_responses + assert adapter.get_stream_responses(stream_mock, streaming_callback_mock) == expected_responses - stream_handler_mock.assert_has_calls( + streaming_callback_mock.assert_has_calls( [ - call(" This", event_data={"generation": " This"}), - call(" is", event_data={"generation": " is"}), - call(" a", event_data={"generation": " a"}), - call(" single", event_data={"generation": " single"}), - call(" response.", event_data={"generation": " response."}), + call(StreamingChunk(content=" This", meta={"generation": " This"})), + call(StreamingChunk(content=" is", meta={"generation": " is"})), + call(StreamingChunk(content=" a", meta={"generation": " a"})), + call(StreamingChunk(content=" single", meta={"generation": " single"})), + call(StreamingChunk(content=" response.", meta={"generation": " response."})), ] ) def test_get_stream_responses_empty(self) -> None: stream_mock = MagicMock() - stream_handler_mock = MagicMock() + streaming_callback_mock = MagicMock() stream_mock.__iter__.return_value = [] - stream_handler_mock.side_effect = lambda token_received, **kwargs: token_received - adapter = MetaLlamaAdapter(model_kwargs={}, max_length=99) expected_responses = [""] - assert adapter.get_stream_responses(stream_mock, stream_handler_mock) == expected_responses + assert adapter.get_stream_responses(stream_mock, streaming_callback_mock) == expected_responses - stream_handler_mock.assert_not_called() + streaming_callback_mock.assert_not_called() From 93d2c6824207f0e29928fc82516b2327ff0d54d2 Mon Sep 17 00:00:00 2001 From: HaystackBot Date: Mon, 12 Aug 2024 13:41:07 +0000 Subject: [PATCH 3/6] Update the changelog --- integrations/amazon_bedrock/CHANGELOG.md | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/integrations/amazon_bedrock/CHANGELOG.md b/integrations/amazon_bedrock/CHANGELOG.md index f1e8b69d0..33a1598a8 100644 --- a/integrations/amazon_bedrock/CHANGELOG.md +++ b/integrations/amazon_bedrock/CHANGELOG.md @@ -1,5 +1,16 @@ # Changelog +## [integrations/amazon_bedrock-v0.10.0] - 2024-08-12 + +### ๐Ÿ› Bug Fixes + +- Support streaming_callback param in amazon bedrock generators (#927) + +### Docs + +- Update AmazonBedrockChatGenerator docstrings (#949) +- Update AmazonBedrockGenerator docstrings (#956) + ## [integrations/amazon_bedrock-v0.9.3] - 2024-07-17 ### ๐Ÿš€ Features From a8b2de9d86621aba2243e2d6e350e082c087700d Mon Sep 17 00:00:00 2001 From: Stefano Fiorucci Date: Mon, 12 Aug 2024 17:19:02 +0200 Subject: [PATCH 4/6] test: do not retry tests in `hatch run test` command (#954) * do not retry tests in hatch run test command * fix * hatch config improvements --- .github/workflows/amazon_bedrock.yml | 6 +++--- .github/workflows/amazon_sagemaker.yml | 4 ++-- .github/workflows/anthropic.yml | 4 ++-- .github/workflows/astra.yml | 4 ++-- .github/workflows/chroma.yml | 4 ++-- .github/workflows/cohere.yml | 4 ++-- .github/workflows/deepeval.yml | 4 ++-- .github/workflows/elasticsearch.yml | 4 ++-- .github/workflows/fastembed.yml | 4 ++-- .github/workflows/google_ai.yml | 4 ++-- .github/workflows/google_vertex.yml | 4 ++-- .github/workflows/instructor_embedders.yml | 4 ++-- .github/workflows/jina.yml | 4 ++-- .github/workflows/langfuse.yml | 4 ++-- .github/workflows/llama_cpp.yml | 4 ++-- .github/workflows/mistral.yml | 4 ++-- .github/workflows/mongodb_atlas.yml | 4 ++-- .github/workflows/nvidia.yml | 4 ++-- .github/workflows/ollama.yml | 4 ++-- .github/workflows/opensearch.yml | 4 ++-- .github/workflows/optimum.yml | 4 ++-- .github/workflows/pgvector.yml | 4 ++-- .github/workflows/pinecone.yml | 4 ++-- .github/workflows/qdrant.yml | 4 ++-- .github/workflows/ragas.yml | 4 ++-- .github/workflows/unstructured.yml | 4 ++-- .github/workflows/weaviate.yml | 4 ++-- integrations/amazon_bedrock/pyproject.toml | 6 ++++-- integrations/amazon_sagemaker/pyproject.toml | 6 ++++-- integrations/anthropic/pyproject.toml | 6 ++++-- integrations/astra/pyproject.toml | 6 ++++-- integrations/chroma/pyproject.toml | 6 ++++-- integrations/cohere/pyproject.toml | 6 ++++-- integrations/deepeval/pyproject.toml | 6 ++++-- integrations/elasticsearch/pyproject.toml | 6 ++++-- integrations/fastembed/pyproject.toml | 6 ++++-- integrations/google_ai/pyproject.toml | 6 ++++-- integrations/google_vertex/pyproject.toml | 6 ++++-- integrations/instructor_embedders/pyproject.toml | 6 ++++-- integrations/jina/pyproject.toml | 6 ++++-- integrations/langfuse/pyproject.toml | 6 ++++-- integrations/llama_cpp/pyproject.toml | 6 ++++-- integrations/mistral/pyproject.toml | 6 ++++-- integrations/mongodb_atlas/pyproject.toml | 6 ++++-- integrations/nvidia/pyproject.toml | 6 ++++-- integrations/ollama/pyproject.toml | 6 ++++-- integrations/opensearch/pyproject.toml | 6 ++++-- integrations/optimum/pyproject.toml | 6 ++++-- integrations/pgvector/pyproject.toml | 6 ++++-- integrations/pinecone/pyproject.toml | 6 ++++-- integrations/qdrant/pyproject.toml | 6 ++++-- integrations/ragas/pyproject.toml | 6 ++++-- integrations/unstructured/pyproject.toml | 6 ++++-- integrations/weaviate/pyproject.toml | 6 ++++-- 54 files changed, 163 insertions(+), 109 deletions(-) diff --git a/.github/workflows/amazon_bedrock.yml b/.github/workflows/amazon_bedrock.yml index eff39e2af..2057d4bdf 100644 --- a/.github/workflows/amazon_bedrock.yml +++ b/.github/workflows/amazon_bedrock.yml @@ -63,7 +63,7 @@ jobs: run: hatch run docs - name: Run unit tests - run: hatch run cov -m "not integration" + run: hatch run cov-retry -m "not integration" # Do not authenticate on pull requests from forks - name: AWS authentication @@ -76,13 +76,13 @@ jobs: # Do not run integration tests on pull requests from forks - name: Run integration tests if: github.event.pull_request.head.repo.full_name == github.repository - run: hatch run cov -m "integration" + run: hatch run cov-retry -m "integration" - name: Nightly - run unit tests with Haystack main branch if: github.event_name == 'schedule' run: | hatch run pip install git+https://github.com/deepset-ai/haystack.git - hatch run test -m "not integration" + hatch run cov-retry -m "not integration" - name: Send event to Datadog for nightly failures if: failure() && github.event_name == 'schedule' diff --git a/.github/workflows/amazon_sagemaker.yml b/.github/workflows/amazon_sagemaker.yml index 8ebbf6c65..ed0a571e6 100644 --- a/.github/workflows/amazon_sagemaker.yml +++ b/.github/workflows/amazon_sagemaker.yml @@ -57,13 +57,13 @@ jobs: run: hatch run docs - name: Run tests - run: hatch run cov + run: hatch run cov-retry - name: Nightly - run unit tests with Haystack main branch if: github.event_name == 'schedule' run: | hatch run pip install git+https://github.com/deepset-ai/haystack.git - hatch run test -m "not integration" + hatch run cov-retry -m "not integration" - name: Send event to Datadog for nightly failures if: failure() && github.event_name == 'schedule' diff --git a/.github/workflows/anthropic.yml b/.github/workflows/anthropic.yml index 858b7be35..c4cdeb2d1 100644 --- a/.github/workflows/anthropic.yml +++ b/.github/workflows/anthropic.yml @@ -54,13 +54,13 @@ jobs: run: hatch run lint:all - name: Run tests - run: hatch run cov + run: hatch run cov-retry - name: Nightly - run unit tests with Haystack main branch if: github.event_name == 'schedule' run: | hatch run pip install git+https://github.com/deepset-ai/haystack.git - hatch run test -m "not integration" + hatch run cov-retry -m "not integration" - name: Send event to Datadog for nightly failures if: failure() && github.event_name == 'schedule' diff --git a/.github/workflows/astra.yml b/.github/workflows/astra.yml index a264fdfd5..dcfc00c75 100644 --- a/.github/workflows/astra.yml +++ b/.github/workflows/astra.yml @@ -61,13 +61,13 @@ jobs: env: ASTRA_DB_API_ENDPOINT: ${{ secrets.ASTRA_DB_API_ENDPOINT }} ASTRA_DB_APPLICATION_TOKEN: ${{ secrets.ASTRA_DB_APPLICATION_TOKEN }} - run: hatch run cov + run: hatch run cov-retry - name: Nightly - run unit tests with Haystack main branch if: github.event_name == 'schedule' run: | hatch run pip install git+https://github.com/deepset-ai/haystack.git - hatch run test -m "not integration" + hatch run cov-retry -m "not integration" - name: Send event to Datadog for nightly failures if: failure() && github.event_name == 'schedule' diff --git a/.github/workflows/chroma.yml b/.github/workflows/chroma.yml index 496ddaa03..26b6287bd 100644 --- a/.github/workflows/chroma.yml +++ b/.github/workflows/chroma.yml @@ -57,13 +57,13 @@ jobs: run: hatch run docs - name: Run tests - run: hatch run cov + run: hatch run cov-retry - name: Nightly - run unit tests with Haystack main branch if: github.event_name == 'schedule' run: | hatch run pip install git+https://github.com/deepset-ai/haystack.git - hatch run test -m "not integration" + hatch run cov-retry -m "not integration" - name: Send event to Datadog for nightly failures if: failure() && github.event_name == 'schedule' diff --git a/.github/workflows/cohere.yml b/.github/workflows/cohere.yml index 05b84af4b..00a8ee2ed 100644 --- a/.github/workflows/cohere.yml +++ b/.github/workflows/cohere.yml @@ -58,13 +58,13 @@ jobs: run: hatch run docs - name: Run tests - run: hatch run cov + run: hatch run cov-retry - name: Nightly - run unit tests with Haystack main branch if: github.event_name == 'schedule' run: | hatch run pip install git+https://github.com/deepset-ai/haystack.git - hatch run test -m "not integration" + hatch run cov-retry -m "not integration" - name: Send event to Datadog for nightly failures if: failure() && github.event_name == 'schedule' diff --git a/.github/workflows/deepeval.yml b/.github/workflows/deepeval.yml index 0048528bc..23de1a3f4 100644 --- a/.github/workflows/deepeval.yml +++ b/.github/workflows/deepeval.yml @@ -58,13 +58,13 @@ jobs: run: hatch run docs - name: Run tests - run: hatch run cov + run: hatch run cov-retry - name: Nightly - run unit tests with Haystack main branch if: github.event_name == 'schedule' run: | hatch run pip install git+https://github.com/deepset-ai/haystack.git - hatch run test -m "not integration" + hatch run cov-retry -m "not integration" - name: Send event to Datadog for nightly failures if: failure() && github.event_name == 'schedule' diff --git a/.github/workflows/elasticsearch.yml b/.github/workflows/elasticsearch.yml index 4cf34b301..476e832b5 100644 --- a/.github/workflows/elasticsearch.yml +++ b/.github/workflows/elasticsearch.yml @@ -55,13 +55,13 @@ jobs: run: hatch run docs - name: Run tests - run: hatch run cov + run: hatch run cov-retry - name: Nightly - run unit tests with Haystack main branch if: github.event_name == 'schedule' run: | hatch run pip install git+https://github.com/deepset-ai/haystack.git - hatch run test -m "not integration" + hatch run cov-retry -m "not integration" - name: Send event to Datadog for nightly failures if: failure() && github.event_name == 'schedule' diff --git a/.github/workflows/fastembed.yml b/.github/workflows/fastembed.yml index 32b276466..e389bf3a4 100644 --- a/.github/workflows/fastembed.yml +++ b/.github/workflows/fastembed.yml @@ -42,13 +42,13 @@ jobs: run: hatch run docs - name: Run tests - run: hatch run cov + run: hatch run cov-retry - name: Nightly - run unit tests with Haystack main branch if: github.event_name == 'schedule' run: | hatch run pip install git+https://github.com/deepset-ai/haystack.git - hatch run test -m "not integration" + hatch run cov-retry -m "not integration" - name: Send event to Datadog for nightly failures if: failure() && github.event_name == 'schedule' diff --git a/.github/workflows/google_ai.yml b/.github/workflows/google_ai.yml index 046b92463..1b4b2e496 100644 --- a/.github/workflows/google_ai.yml +++ b/.github/workflows/google_ai.yml @@ -58,13 +58,13 @@ jobs: run: hatch run docs - name: Run tests - run: hatch run cov + run: hatch run cov-retry - name: Nightly - run unit tests with Haystack main branch if: github.event_name == 'schedule' run: | hatch run pip install git+https://github.com/deepset-ai/haystack.git - hatch run test -m "not integration" + hatch run cov-retry -m "not integration" - name: Send event to Datadog for nightly failures if: failure() && github.event_name == 'schedule' diff --git a/.github/workflows/google_vertex.yml b/.github/workflows/google_vertex.yml index b4006924c..78ba5694b 100644 --- a/.github/workflows/google_vertex.yml +++ b/.github/workflows/google_vertex.yml @@ -57,13 +57,13 @@ jobs: run: hatch run docs - name: Run tests - run: hatch run cov + run: hatch run cov-retry - name: Nightly - run unit tests with Haystack main branch if: github.event_name == 'schedule' run: | hatch run pip install git+https://github.com/deepset-ai/haystack.git - hatch run test -m "not integration" + hatch run cov-retry -m "not integration" - name: Send event to Datadog for nightly failures if: failure() && github.event_name == 'schedule' diff --git a/.github/workflows/instructor_embedders.yml b/.github/workflows/instructor_embedders.yml index 9d84066a7..f12f4d696 100644 --- a/.github/workflows/instructor_embedders.yml +++ b/.github/workflows/instructor_embedders.yml @@ -35,13 +35,13 @@ jobs: run: hatch run docs - name: Run tests - run: hatch run cov + run: hatch run cov-retry - name: Nightly - run unit tests with Haystack main branch if: github.event_name == 'schedule' run: | hatch run pip install git+https://github.com/deepset-ai/haystack.git - hatch run test -m "not integration" + hatch run cov-retry -m "not integration" - name: Send event to Datadog for nightly failures if: failure() && github.event_name == 'schedule' diff --git a/.github/workflows/jina.yml b/.github/workflows/jina.yml index cd0a55ad4..00af6eb45 100644 --- a/.github/workflows/jina.yml +++ b/.github/workflows/jina.yml @@ -57,13 +57,13 @@ jobs: run: hatch run docs - name: Run tests - run: hatch run cov + run: hatch run cov-retry - name: Nightly - run unit tests with Haystack main branch if: github.event_name == 'schedule' run: | hatch run pip install git+https://github.com/deepset-ai/haystack.git - hatch run test -m "not integration" + hatch run cov-retry -m "not integration" - name: Send event to Datadog for nightly failures if: failure() && github.event_name == 'schedule' diff --git a/.github/workflows/langfuse.yml b/.github/workflows/langfuse.yml index edbec4840..8a10cf241 100644 --- a/.github/workflows/langfuse.yml +++ b/.github/workflows/langfuse.yml @@ -60,13 +60,13 @@ jobs: run: hatch run docs - name: Run tests - run: hatch run cov + run: hatch run cov-retry - name: Nightly - run unit tests with Haystack main branch if: github.event_name == 'schedule' run: | hatch run pip install git+https://github.com/deepset-ai/haystack.git - hatch run test -m "not integration" + hatch run cov-retry -m "not integration" - name: Send event to Datadog for nightly failures if: failure() && github.event_name == 'schedule' diff --git a/.github/workflows/llama_cpp.yml b/.github/workflows/llama_cpp.yml index ec86ab268..a9480ca96 100644 --- a/.github/workflows/llama_cpp.yml +++ b/.github/workflows/llama_cpp.yml @@ -57,13 +57,13 @@ jobs: run: hatch run docs - name: Run tests - run: hatch run cov + run: hatch run cov-retry - name: Nightly - run unit tests with Haystack main branch if: github.event_name == 'schedule' run: | hatch run pip install git+https://github.com/deepset-ai/haystack.git - hatch run test -m "not integration" + hatch run cov-retry -m "not integration" - name: Send event to Datadog for nightly failures if: failure() && github.event_name == 'schedule' diff --git a/.github/workflows/mistral.yml b/.github/workflows/mistral.yml index efc8ed065..e62008906 100644 --- a/.github/workflows/mistral.yml +++ b/.github/workflows/mistral.yml @@ -58,13 +58,13 @@ jobs: run: hatch run docs - name: Run tests - run: hatch run cov + run: hatch run cov-retry - name: Nightly - run unit tests with Haystack main branch if: github.event_name == 'schedule' run: | hatch run pip install git+https://github.com/deepset-ai/haystack.git - hatch run test -m "not integration" + hatch run cov-retry -m "not integration" - name: Send event to Datadog for nightly failures if: failure() && github.event_name == 'schedule' diff --git a/.github/workflows/mongodb_atlas.yml b/.github/workflows/mongodb_atlas.yml index 9c4f17ba0..3d1ad5101 100644 --- a/.github/workflows/mongodb_atlas.yml +++ b/.github/workflows/mongodb_atlas.yml @@ -54,13 +54,13 @@ jobs: run: hatch run docs - name: Run tests - run: hatch run cov + run: hatch run cov-retry - name: Nightly - run unit tests with Haystack main branch if: github.event_name == 'schedule' run: | hatch run pip install git+https://github.com/deepset-ai/haystack.git - hatch run test -m "not integration" + hatch run cov-retry -m "not integration" - name: Send event to Datadog for nightly failures if: failure() && github.event_name == 'schedule' diff --git a/.github/workflows/nvidia.yml b/.github/workflows/nvidia.yml index 1ae3a0f1a..0d39a4d91 100644 --- a/.github/workflows/nvidia.yml +++ b/.github/workflows/nvidia.yml @@ -55,7 +55,7 @@ jobs: run: hatch run lint:all - name: Run tests - run: hatch run cov + run: hatch run cov-retry - name: Generate docs if: matrix.python-version == '3.9' && runner.os == 'Linux' @@ -65,7 +65,7 @@ jobs: if: github.event_name == 'schedule' run: | hatch run pip install git+https://github.com/deepset-ai/haystack.git - hatch run test -m "not integration" + hatch run cov-retry -m "not integration" - name: Send event to Datadog for nightly failures if: failure() && github.event_name == 'schedule' diff --git a/.github/workflows/ollama.yml b/.github/workflows/ollama.yml index 7a6465e16..43af485b7 100644 --- a/.github/workflows/ollama.yml +++ b/.github/workflows/ollama.yml @@ -75,13 +75,13 @@ jobs: run: hatch run docs - name: Run tests - run: hatch run cov + run: hatch run cov-retry - name: Nightly - run unit tests with Haystack main branch if: github.event_name == 'schedule' run: | hatch run pip install git+https://github.com/deepset-ai/haystack.git - hatch run test -m "not integration" + hatch run cov-retry -m "not integration" - name: Send event to Datadog for nightly failures if: failure() && github.event_name == 'schedule' diff --git a/.github/workflows/opensearch.yml b/.github/workflows/opensearch.yml index 46ce2e6a5..48169a75f 100644 --- a/.github/workflows/opensearch.yml +++ b/.github/workflows/opensearch.yml @@ -55,13 +55,13 @@ jobs: run: hatch run docs - name: Run tests - run: hatch run cov + run: hatch run cov-retry - name: Nightly - run unit tests with Haystack main branch if: github.event_name == 'schedule' run: | hatch run pip install git+https://github.com/deepset-ai/haystack.git - hatch run test -m "not integration" + hatch run cov-retry -m "not integration" - name: Send event to Datadog for nightly failures if: failure() && github.event_name == 'schedule' diff --git a/.github/workflows/optimum.yml b/.github/workflows/optimum.yml index fe0421d59..c33baa7f8 100644 --- a/.github/workflows/optimum.yml +++ b/.github/workflows/optimum.yml @@ -57,13 +57,13 @@ jobs: run: hatch run docs - name: Run tests - run: hatch run cov + run: hatch run cov-retry - name: Nightly - run unit tests with Haystack main branch if: github.event_name == 'schedule' run: | hatch run pip install git+https://github.com/deepset-ai/haystack.git - hatch run test -m "not integration" + hatch run cov-retry -m "not integration" - name: Send event to Datadog for nightly failures if: failure() && github.event_name == 'schedule' diff --git a/.github/workflows/pgvector.yml b/.github/workflows/pgvector.yml index 59422298a..0fe20e037 100644 --- a/.github/workflows/pgvector.yml +++ b/.github/workflows/pgvector.yml @@ -61,13 +61,13 @@ jobs: run: hatch run docs - name: Run tests - run: hatch run cov + run: hatch run cov-retry - name: Nightly - run unit tests with Haystack main branch if: github.event_name == 'schedule' run: | hatch run pip install git+https://github.com/deepset-ai/haystack.git - hatch run test -m "not integration" + hatch run cov-retry -m "not integration" - name: Send event to Datadog for nightly failures if: failure() && github.event_name == 'schedule' diff --git a/.github/workflows/pinecone.yml b/.github/workflows/pinecone.yml index 8f963854d..9e143005b 100644 --- a/.github/workflows/pinecone.yml +++ b/.github/workflows/pinecone.yml @@ -63,13 +63,13 @@ jobs: - name: Run tests env: INDEX_NAME: ${{ matrix.INDEX_NAME }} - run: hatch run cov + run: hatch run cov-retry - name: Nightly - run unit tests with Haystack main branch if: github.event_name == 'schedule' run: | hatch run pip install git+https://github.com/deepset-ai/haystack.git - hatch run test -m "not integration" + hatch run cov-retry -m "not integration" - name: Send event to Datadog for nightly failures if: failure() && github.event_name == 'schedule' diff --git a/.github/workflows/qdrant.yml b/.github/workflows/qdrant.yml index 5e17c16cb..116225b2d 100644 --- a/.github/workflows/qdrant.yml +++ b/.github/workflows/qdrant.yml @@ -57,13 +57,13 @@ jobs: run: hatch run docs - name: Run tests - run: hatch run cov + run: hatch run cov-retry - name: Nightly - run unit tests with Haystack main branch if: github.event_name == 'schedule' run: | hatch run pip install git+https://github.com/deepset-ai/haystack.git - hatch run test -m "not integration" + hatch run cov-retry -m "not integration" - name: Send event to Datadog for nightly failures if: failure() && github.event_name == 'schedule' diff --git a/.github/workflows/ragas.yml b/.github/workflows/ragas.yml index de53abd94..c4757e704 100644 --- a/.github/workflows/ragas.yml +++ b/.github/workflows/ragas.yml @@ -58,13 +58,13 @@ jobs: run: hatch run docs - name: Run tests - run: hatch run cov + run: hatch run cov-retry - name: Nightly - run unit tests with Haystack main branch if: github.event_name == 'schedule' run: | hatch run pip install git+https://github.com/deepset-ai/haystack.git - hatch run test -m "not integration" + hatch run cov-retry -m "not integration" - name: Send event to Datadog for nightly failures if: failure() && github.event_name == 'schedule' diff --git a/.github/workflows/unstructured.yml b/.github/workflows/unstructured.yml index c1eb0b4ea..e4b640275 100644 --- a/.github/workflows/unstructured.yml +++ b/.github/workflows/unstructured.yml @@ -69,13 +69,13 @@ jobs: run: hatch run docs - name: Run tests - run: hatch run cov + run: hatch run cov-retry - name: Nightly - run unit tests with Haystack main branch if: github.event_name == 'schedule' run: | hatch run pip install git+https://github.com/deepset-ai/haystack.git - hatch run test -m "not integration" + hatch run cov-retry -m "not integration" - name: Send event to Datadog for nightly failures if: failure() && github.event_name == 'schedule' diff --git a/.github/workflows/weaviate.yml b/.github/workflows/weaviate.yml index 8e1bbbc4f..5e29eafe7 100644 --- a/.github/workflows/weaviate.yml +++ b/.github/workflows/weaviate.yml @@ -54,13 +54,13 @@ jobs: run: hatch run docs - name: Run tests - run: hatch run cov + run: hatch run cov-retry - name: Nightly - run unit tests with Haystack main branch if: github.event_name == 'schedule' run: | hatch run pip install git+https://github.com/deepset-ai/haystack.git - hatch run test -m "not integration" + hatch run cov-retry -m "not integration" - name: Send event to Datadog for nightly failures if: failure() && github.event_name == 'schedule' diff --git a/integrations/amazon_bedrock/pyproject.toml b/integrations/amazon_bedrock/pyproject.toml index 2a4f88960..f4a410dbd 100644 --- a/integrations/amazon_bedrock/pyproject.toml +++ b/integrations/amazon_bedrock/pyproject.toml @@ -49,10 +49,12 @@ dependencies = [ "haystack-pydoc-tools", ] [tool.hatch.envs.default.scripts] -test = "pytest --reruns 3 --reruns-delay 30 -x {args:tests}" -test-cov = "coverage run -m pytest --reruns 3 --reruns-delay 30 -x {args:tests}" +test = "pytest {args:tests}" +test-cov = "coverage run -m pytest {args:tests}" +test-cov-retry = "test-cov --reruns 3 --reruns-delay 30 -x" cov-report = ["- coverage combine", "coverage report"] cov = ["test-cov", "cov-report"] +cov-retry = ["test-cov-retry", "cov-report"] docs = ["pydoc-markdown pydoc/config.yml"] [[tool.hatch.envs.all.matrix]] python = ["3.8", "3.9", "3.10", "3.11", "3.12"] diff --git a/integrations/amazon_sagemaker/pyproject.toml b/integrations/amazon_sagemaker/pyproject.toml index 40a2a1b16..f8050bb48 100644 --- a/integrations/amazon_sagemaker/pyproject.toml +++ b/integrations/amazon_sagemaker/pyproject.toml @@ -52,10 +52,12 @@ dependencies = [ "haystack-pydoc-tools", ] [tool.hatch.envs.default.scripts] -test = "pytest --reruns 3 --reruns-delay 30 -x {args:tests}" -test-cov = "coverage run -m pytest --reruns 3 --reruns-delay 30 -x {args:tests}" +test = "pytest {args:tests}" +test-cov = "coverage run -m pytest {args:tests}" +test-cov-retry = "test-cov --reruns 3 --reruns-delay 30 -x" cov-report = ["- coverage combine", "coverage report"] cov = ["test-cov", "cov-report"] +cov-retry = ["test-cov-retry", "cov-report"] docs = ["pydoc-markdown pydoc/config.yml"] diff --git a/integrations/anthropic/pyproject.toml b/integrations/anthropic/pyproject.toml index e5cfdcbce..3f8c9812b 100644 --- a/integrations/anthropic/pyproject.toml +++ b/integrations/anthropic/pyproject.toml @@ -49,10 +49,12 @@ dependencies = [ "haystack-pydoc-tools", ] [tool.hatch.envs.default.scripts] -test = "pytest --reruns 3 --reruns-delay 30 -x {args:tests}" -test-cov = "coverage run -m pytest --reruns 3 --reruns-delay 30 -x {args:tests}" +test = "pytest {args:tests}" +test-cov = "coverage run -m pytest {args:tests}" +test-cov-retry = "test-cov --reruns 3 --reruns-delay 30 -x" cov-report = ["- coverage combine", "coverage report"] cov = ["test-cov", "cov-report"] +cov-retry = ["test-cov-retry", "cov-report"] docs = ["pydoc-markdown pydoc/config.yml"] [[tool.hatch.envs.all.matrix]] python = ["3.8", "3.9", "3.10", "3.11", "3.12"] diff --git a/integrations/astra/pyproject.toml b/integrations/astra/pyproject.toml index 9bbf8e07b..7d543ddc9 100644 --- a/integrations/astra/pyproject.toml +++ b/integrations/astra/pyproject.toml @@ -48,10 +48,12 @@ dependencies = [ "haystack-pydoc-tools", ] [tool.hatch.envs.default.scripts] -test = "pytest --reruns 3 --reruns-delay 30 -x {args:tests}" -test-cov = "coverage run -m pytest --reruns 3 --reruns-delay 30 -x {args:tests}" +test = "pytest {args:tests}" +test-cov = "coverage run -m pytest {args:tests}" +test-cov-retry = "test-cov --reruns 3 --reruns-delay 30 -x" cov-report = ["- coverage combine", "coverage report"] cov = ["test-cov", "cov-report"] +cov-retry = ["test-cov-retry", "cov-report"] docs = ["pydoc-markdown pydoc/config.yml"] [[tool.hatch.envs.all.matrix]] python = ["3.8", "3.9", "3.10", "3.11"] diff --git a/integrations/chroma/pyproject.toml b/integrations/chroma/pyproject.toml index 12a4a4b2b..b4591e041 100644 --- a/integrations/chroma/pyproject.toml +++ b/integrations/chroma/pyproject.toml @@ -49,10 +49,12 @@ dependencies = [ "databind-core<4.5.0", # FIXME: the latest 4.5.0 causes loops in pip resolver ] [tool.hatch.envs.default.scripts] -test = "pytest --reruns 3 --reruns-delay 30 -x {args:tests}" -test-cov = "coverage run -m pytest --reruns 3 --reruns-delay 30 -x {args:tests}" +test = "pytest {args:tests}" +test-cov = "coverage run -m pytest {args:tests}" +test-cov-retry = "test-cov --reruns 3 --reruns-delay 30 -x" cov-report = ["- coverage combine", "coverage report"] cov = ["test-cov", "cov-report"] +cov-retry = ["test-cov-retry", "cov-report"] docs = ["pydoc-markdown pydoc/config.yml"] [[tool.hatch.envs.all.matrix]] diff --git a/integrations/cohere/pyproject.toml b/integrations/cohere/pyproject.toml index 4d1ab36dd..04fe15585 100644 --- a/integrations/cohere/pyproject.toml +++ b/integrations/cohere/pyproject.toml @@ -43,10 +43,12 @@ git_describe_command = 'git describe --tags --match="integrations/cohere-v[0-9]* [tool.hatch.envs.default] dependencies = ["coverage[toml]>=6.5", "pytest", "pytest-rerunfailures", "haystack-pydoc-tools"] [tool.hatch.envs.default.scripts] -test = "pytest --reruns 3 --reruns-delay 30 -x {args:tests}" -test-cov = "coverage run -m pytest --reruns 3 --reruns-delay 30 -x {args:tests}" +test = "pytest {args:tests}" +test-cov = "coverage run -m pytest {args:tests}" +test-cov-retry = "test-cov --reruns 3 --reruns-delay 30 -x" cov-report = ["- coverage combine", "coverage report"] cov = ["test-cov", "cov-report"] +cov-retry = ["test-cov-retry", "cov-report"] docs = ["pydoc-markdown pydoc/config.yml"] [[tool.hatch.envs.all.matrix]] diff --git a/integrations/deepeval/pyproject.toml b/integrations/deepeval/pyproject.toml index 4e93c6c41..44d89cb11 100644 --- a/integrations/deepeval/pyproject.toml +++ b/integrations/deepeval/pyproject.toml @@ -43,10 +43,12 @@ git_describe_command = 'git describe --tags --match="integrations/deepeval-v[0-9 [tool.hatch.envs.default] dependencies = ["coverage[toml]>=6.5", "pytest", "pytest-rerunfailures", "haystack-pydoc-tools"] [tool.hatch.envs.default.scripts] -test = "pytest --reruns 3 --reruns-delay 30 -x {args:tests}" -test-cov = "coverage run -m pytest --reruns 3 --reruns-delay 30 -x {args:tests}" +test = "pytest {args:tests}" +test-cov = "coverage run -m pytest {args:tests}" +test-cov-retry = "test-cov --reruns 3 --reruns-delay 30 -x" cov-report = ["- coverage combine", "coverage report"] cov = ["test-cov", "cov-report"] +cov-retry = ["test-cov-retry", "cov-report"] docs = ["pydoc-markdown pydoc/config.yml"] [[tool.hatch.envs.all.matrix]] diff --git a/integrations/elasticsearch/pyproject.toml b/integrations/elasticsearch/pyproject.toml index 87bf5a167..cb9281030 100644 --- a/integrations/elasticsearch/pyproject.toml +++ b/integrations/elasticsearch/pyproject.toml @@ -49,10 +49,12 @@ dependencies = [ "haystack-pydoc-tools", ] [tool.hatch.envs.default.scripts] -test = "pytest --reruns 3 --reruns-delay 30 -x {args:tests}" -test-cov = "coverage run -m pytest --reruns 3 --reruns-delay 30 -x {args:tests}" +test = "pytest {args:tests}" +test-cov = "coverage run -m pytest {args:tests}" +test-cov-retry = "test-cov --reruns 3 --reruns-delay 30 -x" cov-report = ["- coverage combine", "coverage report"] cov = ["test-cov", "cov-report"] +cov-retry = ["test-cov-retry", "cov-report"] docs = ["pydoc-markdown pydoc/config.yml"] [[tool.hatch.envs.all.matrix]] diff --git a/integrations/fastembed/pyproject.toml b/integrations/fastembed/pyproject.toml index b0a367ee4..9afd344c9 100644 --- a/integrations/fastembed/pyproject.toml +++ b/integrations/fastembed/pyproject.toml @@ -50,10 +50,12 @@ dependencies = [ "haystack-pydoc-tools", ] [tool.hatch.envs.default.scripts] -test = "pytest --reruns 3 --reruns-delay 30 -x {args:tests}" -test-cov = "coverage run -m pytest --reruns 3 --reruns-delay 30 -x {args:tests}" +test = "pytest {args:tests}" +test-cov = "coverage run -m pytest {args:tests}" +test-cov-retry = "test-cov --reruns 3 --reruns-delay 30 -x" cov-report = ["- coverage combine", "coverage report"] cov = ["test-cov", "cov-report"] +cov-retry = ["test-cov-retry", "cov-report"] docs = ["pydoc-markdown pydoc/config.yml"] [[tool.hatch.envs.all.matrix]] diff --git a/integrations/google_ai/pyproject.toml b/integrations/google_ai/pyproject.toml index 3a2382477..db958a487 100644 --- a/integrations/google_ai/pyproject.toml +++ b/integrations/google_ai/pyproject.toml @@ -48,10 +48,12 @@ dependencies = [ "haystack-pydoc-tools", ] [tool.hatch.envs.default.scripts] -test = "pytest --reruns 3 --reruns-delay 30 -x {args:tests}" -test-cov = "coverage run -m pytest --reruns 3 --reruns-delay 30 -x {args:tests}" +test = "pytest {args:tests}" +test-cov = "coverage run -m pytest {args:tests}" +test-cov-retry = "test-cov --reruns 3 --reruns-delay 30 -x" cov-report = ["- coverage combine", "coverage report"] cov = ["test-cov", "cov-report"] +cov-retry = ["test-cov-retry", "cov-report"] docs = ["pydoc-markdown pydoc/config.yml"] [[tool.hatch.envs.all.matrix]] python = ["3.8", "3.9", "3.10", "3.11"] diff --git a/integrations/google_vertex/pyproject.toml b/integrations/google_vertex/pyproject.toml index 81d22c21b..747bbecbf 100644 --- a/integrations/google_vertex/pyproject.toml +++ b/integrations/google_vertex/pyproject.toml @@ -48,10 +48,12 @@ dependencies = [ "haystack-pydoc-tools", ] [tool.hatch.envs.default.scripts] -test = "pytest --reruns 3 --reruns-delay 30 -x {args:tests}" -test-cov = "coverage run -m pytest --reruns 3 --reruns-delay 30 -x {args:tests}" +test = "pytest {args:tests}" +test-cov = "coverage run -m pytest {args:tests}" +test-cov-retry = "test-cov --reruns 3 --reruns-delay 30 -x" cov-report = ["- coverage combine", "coverage report"] cov = ["test-cov", "cov-report"] +cov-retry = ["test-cov-retry", "cov-report"] docs = ["pydoc-markdown pydoc/config.yml"] [[tool.hatch.envs.all.matrix]] python = ["3.8", "3.9", "3.10", "3.11"] diff --git a/integrations/instructor_embedders/pyproject.toml b/integrations/instructor_embedders/pyproject.toml index 47afee31f..0543a9a88 100644 --- a/integrations/instructor_embedders/pyproject.toml +++ b/integrations/instructor_embedders/pyproject.toml @@ -71,10 +71,12 @@ dependencies = [ "haystack-pydoc-tools", ] [tool.hatch.envs.default.scripts] -test = "pytest --reruns 3 --reruns-delay 30 -x {args:tests}" -test-cov = "coverage run -m pytest --reruns 3 --reruns-delay 30 -x {args:tests}" +test = "pytest {args:tests}" +test-cov = "coverage run -m pytest {args:tests}" +test-cov-retry = "test-cov --reruns 3 --reruns-delay 30 -x" cov-report = ["- coverage combine", "coverage report"] cov = ["test-cov", "cov-report"] +cov-retry = ["test-cov-retry", "cov-report"] docs = ["pydoc-markdown pydoc/config.yml"] [[tool.hatch.envs.test.matrix]] diff --git a/integrations/jina/pyproject.toml b/integrations/jina/pyproject.toml index 494d7d1ea..fa2fd50ed 100644 --- a/integrations/jina/pyproject.toml +++ b/integrations/jina/pyproject.toml @@ -45,10 +45,12 @@ git_describe_command = 'git describe --tags --match="integrations/jina-v[0-9]*"' [tool.hatch.envs.default] dependencies = ["coverage[toml]>=6.5", "pytest", "pytest-rerunfailures", "haystack-pydoc-tools"] [tool.hatch.envs.default.scripts] -test = "pytest --reruns 3 --reruns-delay 30 -x {args:tests}" -test-cov = "coverage run -m pytest --reruns 3 --reruns-delay 30 -x {args:tests}" +test = "pytest {args:tests}" +test-cov = "coverage run -m pytest {args:tests}" +test-cov-retry = "test-cov --reruns 3 --reruns-delay 30 -x" cov-report = ["- coverage combine", "coverage report"] cov = ["test-cov", "cov-report"] +cov-retry = ["test-cov-retry", "cov-report"] docs = ["pydoc-markdown pydoc/config.yml"] diff --git a/integrations/langfuse/pyproject.toml b/integrations/langfuse/pyproject.toml index 41b1c1da9..cf7b85b64 100644 --- a/integrations/langfuse/pyproject.toml +++ b/integrations/langfuse/pyproject.toml @@ -49,10 +49,12 @@ dependencies = [ "haystack-pydoc-tools", ] [tool.hatch.envs.default.scripts] -test = "pytest --reruns 3 --reruns-delay 30 -x {args:tests}" -test-cov = "coverage run -m pytest --reruns 3 --reruns-delay 30 -x {args:tests}" +test = "pytest {args:tests}" +test-cov = "coverage run -m pytest {args:tests}" +test-cov-retry = "test-cov --reruns 3 --reruns-delay 30 -x" cov-report = ["- coverage combine", "coverage report"] cov = ["test-cov", "cov-report"] +cov-retry = ["test-cov-retry", "cov-report"] docs = ["pydoc-markdown pydoc/config.yml"] diff --git a/integrations/llama_cpp/pyproject.toml b/integrations/llama_cpp/pyproject.toml index 8c539cc08..2efb15d53 100644 --- a/integrations/llama_cpp/pyproject.toml +++ b/integrations/llama_cpp/pyproject.toml @@ -53,10 +53,12 @@ dependencies = [ "transformers[sentencepiece]", ] [tool.hatch.envs.default.scripts] -test = "pytest --reruns 3 --reruns-delay 30 -x {args:tests}" -test-cov = "coverage run -m pytest --reruns 3 --reruns-delay 30 -x {args:tests}" +test = "pytest {args:tests}" +test-cov = "coverage run -m pytest {args:tests}" +test-cov-retry = "test-cov --reruns 3 --reruns-delay 30 -x" cov-report = ["- coverage combine", "coverage report"] cov = ["test-cov", "cov-report"] +cov-retry = ["test-cov-retry", "cov-report"] docs = ["pydoc-markdown pydoc/config.yml"] [[tool.hatch.envs.all.matrix]] python = ["3.8", "3.9", "3.10", "3.11", "3.12"] diff --git a/integrations/mistral/pyproject.toml b/integrations/mistral/pyproject.toml index abef1d747..bcb4f5999 100644 --- a/integrations/mistral/pyproject.toml +++ b/integrations/mistral/pyproject.toml @@ -43,10 +43,12 @@ git_describe_command = 'git describe --tags --match="integrations/mistral-v[0-9] [tool.hatch.envs.default] dependencies = ["coverage[toml]>=6.5", "pytest", "pytest-rerunfailures", "haystack-pydoc-tools"] [tool.hatch.envs.default.scripts] -test = "pytest --reruns 3 --reruns-delay 30 -x {args:tests}" -test-cov = "coverage run -m pytest --reruns 3 --reruns-delay 30 -x {args:tests}" +test = "pytest {args:tests}" +test-cov = "coverage run -m pytest {args:tests}" +test-cov-retry = "test-cov --reruns 3 --reruns-delay 30 -x" cov-report = ["- coverage combine", "coverage report"] cov = ["test-cov", "cov-report"] +cov-retry = ["test-cov-retry", "cov-report"] docs = ["pydoc-markdown pydoc/config.yml"] [[tool.hatch.envs.all.matrix]] diff --git a/integrations/mongodb_atlas/pyproject.toml b/integrations/mongodb_atlas/pyproject.toml index 9fdfa3385..170f6e94d 100644 --- a/integrations/mongodb_atlas/pyproject.toml +++ b/integrations/mongodb_atlas/pyproject.toml @@ -50,10 +50,12 @@ dependencies = [ "haystack-pydoc-tools", ] [tool.hatch.envs.default.scripts] -test = "pytest --reruns 3 --reruns-delay 30 -x {args:tests}" -test-cov = "coverage run -m pytest --reruns 3 --reruns-delay 30 -x {args:tests}" +test = "pytest {args:tests}" +test-cov = "coverage run -m pytest {args:tests}" +test-cov-retry = "test-cov --reruns 3 --reruns-delay 30 -x" cov-report = ["- coverage combine", "coverage report"] cov = ["test-cov", "cov-report"] +cov-retry = ["test-cov-retry", "cov-report"] docs = ["pydoc-markdown pydoc/config.yml"] [[tool.hatch.envs.all.matrix]] diff --git a/integrations/nvidia/pyproject.toml b/integrations/nvidia/pyproject.toml index f35485e9c..af1d806ef 100644 --- a/integrations/nvidia/pyproject.toml +++ b/integrations/nvidia/pyproject.toml @@ -44,10 +44,12 @@ git_describe_command = 'git describe --tags --match="integrations/nvidia-v[0-9]* [tool.hatch.envs.default] dependencies = ["coverage[toml]>=6.5", "pytest", "pytest-rerunfailures", "haystack-pydoc-tools", "requests_mock"] [tool.hatch.envs.default.scripts] -test = "pytest --reruns 3 --reruns-delay 30 -x {args:tests}" -test-cov = "coverage run -m pytest --reruns 3 --reruns-delay 30 -x {args:tests}" +test = "pytest {args:tests}" +test-cov = "coverage run -m pytest {args:tests}" +test-cov-retry = "test-cov --reruns 3 --reruns-delay 30 -x" cov-report = ["- coverage combine", "coverage report"] cov = ["test-cov", "cov-report"] +cov-retry = ["test-cov-retry", "cov-report"] docs = ["pydoc-markdown pydoc/config.yml"] [[tool.hatch.envs.all.matrix]] diff --git a/integrations/ollama/pyproject.toml b/integrations/ollama/pyproject.toml index 3b7119dfd..5187de31f 100644 --- a/integrations/ollama/pyproject.toml +++ b/integrations/ollama/pyproject.toml @@ -53,10 +53,12 @@ dependencies = [ "haystack-pydoc-tools", ] [tool.hatch.envs.default.scripts] -test = "pytest --reruns 3 --reruns-delay 30 -x {args:tests}" -test-cov = "coverage run -m pytest --reruns 3 --reruns-delay 30 -x {args:tests}" +test = "pytest {args:tests}" +test-cov = "coverage run -m pytest {args:tests}" +test-cov-retry = "test-cov --reruns 3 --reruns-delay 30 -x" cov-report = ["- coverage combine", "coverage report"] cov = ["test-cov", "cov-report"] +cov-retry = ["test-cov-retry", "cov-report"] docs = ["pydoc-markdown pydoc/config.yml"] [[tool.hatch.envs.all.matrix]] diff --git a/integrations/opensearch/pyproject.toml b/integrations/opensearch/pyproject.toml index 842b46415..b7e5e3da6 100644 --- a/integrations/opensearch/pyproject.toml +++ b/integrations/opensearch/pyproject.toml @@ -50,10 +50,12 @@ dependencies = [ "boto3", ] [tool.hatch.envs.default.scripts] -test = "pytest --reruns 3 --reruns-delay 30 -x {args:tests}" -test-cov = "coverage run -m pytest --reruns 3 --reruns-delay 30 -x {args:tests}" +test = "pytest {args:tests}" +test-cov = "coverage run -m pytest {args:tests}" +test-cov-retry = "test-cov --reruns 3 --reruns-delay 30 -x" cov-report = ["- coverage combine", "coverage report"] cov = ["test-cov", "cov-report"] +cov-retry = ["test-cov-retry", "cov-report"] docs = ["pydoc-markdown pydoc/config.yml"] diff --git a/integrations/optimum/pyproject.toml b/integrations/optimum/pyproject.toml index d5d00c2cf..2e0fb26a4 100644 --- a/integrations/optimum/pyproject.toml +++ b/integrations/optimum/pyproject.toml @@ -63,10 +63,12 @@ dependencies = [ "setuptools", # FIXME: the latest 4.5.0 causes loops in pip resolver ] [tool.hatch.envs.default.scripts] -test = "pytest --reruns 3 --reruns-delay 30 -x {args:tests}" -test-cov = "coverage run -m pytest --reruns 3 --reruns-delay 30 -x {args:tests}" +test = "pytest {args:tests}" +test-cov = "coverage run -m pytest {args:tests}" +test-cov-retry = "test-cov --reruns 3 --reruns-delay 30 -x" cov-report = ["- coverage combine", "coverage report"] cov = ["test-cov", "cov-report"] +cov-retry = ["test-cov-retry", "cov-report"] docs = ["pydoc-markdown pydoc/config.yml"] [[tool.hatch.envs.all.matrix]] diff --git a/integrations/pgvector/pyproject.toml b/integrations/pgvector/pyproject.toml index 10d00606f..7f31a5203 100644 --- a/integrations/pgvector/pyproject.toml +++ b/integrations/pgvector/pyproject.toml @@ -50,10 +50,12 @@ dependencies = [ "haystack-pydoc-tools", ] [tool.hatch.envs.default.scripts] -test = "pytest --reruns 3 --reruns-delay 30 -x {args:tests}" -test-cov = "coverage run -m pytest --reruns 3 --reruns-delay 30 -x {args:tests}" +test = "pytest {args:tests}" +test-cov = "coverage run -m pytest {args:tests}" +test-cov-retry = "test-cov --reruns 3 --reruns-delay 30 -x" cov-report = ["- coverage combine", "coverage report"] cov = ["test-cov", "cov-report"] +cov-retry = ["test-cov-retry", "cov-report"] docs = ["pydoc-markdown pydoc/config.yml"] [[tool.hatch.envs.all.matrix]] diff --git a/integrations/pinecone/pyproject.toml b/integrations/pinecone/pyproject.toml index ab764589b..866385dd3 100644 --- a/integrations/pinecone/pyproject.toml +++ b/integrations/pinecone/pyproject.toml @@ -54,10 +54,12 @@ dependencies = [ [tool.hatch.envs.default.scripts] # Pinecone tests are slow (require HTTP requests), so we run them in parallel # with pytest-xdist (https://pytest-xdist.readthedocs.io/en/stable/distribution.html) -test = "pytest -n auto --maxprocesses=2 --reruns 3 --reruns-delay 30 -x {args:tests}" -test-cov = "coverage run -m pytest -n auto --maxprocesses=2 --reruns 3 --reruns-delay 30 -x {args:tests}" +test = "pytest -n auto --maxprocesses=2 -x {args:tests}" +test-cov = "coverage run -m pytest -n auto --maxprocesses=2 {args:tests}" +test-cov-retry = "test-cov --reruns 3 --reruns-delay 30 -x" cov-report = ["- coverage combine", "coverage report"] cov = ["test-cov", "cov-report"] +cov-retry = ["test-cov-retry", "cov-report"] docs = ["pydoc-markdown pydoc/config.yml"] [[tool.hatch.envs.all.matrix]] diff --git a/integrations/qdrant/pyproject.toml b/integrations/qdrant/pyproject.toml index d43926417..8b9c44cc7 100644 --- a/integrations/qdrant/pyproject.toml +++ b/integrations/qdrant/pyproject.toml @@ -46,10 +46,12 @@ git_describe_command = 'git describe --tags --match="integrations/qdrant-v[0-9]* [tool.hatch.envs.default] dependencies = ["coverage[toml]>=6.5", "pytest", "pytest-rerunfailures", "haystack-pydoc-tools"] [tool.hatch.envs.default.scripts] -test = "pytest --reruns 3 --reruns-delay 30 -x {args:tests}" -test-cov = "coverage run -m pytest --reruns 3 --reruns-delay 30 -x {args:tests}" +test = "pytest {args:tests}" +test-cov = "coverage run -m pytest {args:tests}" +test-cov-retry = "test-cov --reruns 3 --reruns-delay 30 -x" cov-report = ["- coverage combine", "coverage report"] cov = ["test-cov", "cov-report"] +cov-retry = ["test-cov-retry", "cov-report"] docs = ["pydoc-markdown pydoc/config.yml"] [[tool.hatch.envs.all.matrix]] diff --git a/integrations/ragas/pyproject.toml b/integrations/ragas/pyproject.toml index 3a32d27a7..edc33eee1 100644 --- a/integrations/ragas/pyproject.toml +++ b/integrations/ragas/pyproject.toml @@ -43,10 +43,12 @@ git_describe_command = 'git describe --tags --match="integrations/ragas-v[0-9]*" [tool.hatch.envs.default] dependencies = ["coverage[toml]>=6.5", "pytest", "pytest-rerunfailures", "haystack-pydoc-tools", "pytest-asyncio"] [tool.hatch.envs.default.scripts] -test = "pytest --reruns 3 --reruns-delay 30 -x {args:tests}" -test-cov = "coverage run -m pytest --reruns 3 --reruns-delay 30 -x {args:tests}" +test = "pytest {args:tests}" +test-cov = "coverage run -m pytest {args:tests}" +test-cov-retry = "test-cov --reruns 3 --reruns-delay 30 -x" cov-report = ["- coverage combine", "coverage report"] cov = ["test-cov", "cov-report"] +cov-retry = ["test-cov-retry", "cov-report"] docs = ["pydoc-markdown pydoc/config.yml"] [[tool.hatch.envs.all.matrix]] diff --git a/integrations/unstructured/pyproject.toml b/integrations/unstructured/pyproject.toml index d00f60e3a..b5de6c66a 100644 --- a/integrations/unstructured/pyproject.toml +++ b/integrations/unstructured/pyproject.toml @@ -48,10 +48,12 @@ dependencies = [ "haystack-pydoc-tools", ] [tool.hatch.envs.default.scripts] -test = "pytest --reruns 3 --reruns-delay 30 -x {args:tests}" -test-cov = "coverage run -m pytest --reruns 3 --reruns-delay 30 -x {args:tests}" +test = "pytest {args:tests}" +test-cov = "coverage run -m pytest {args:tests}" +test-cov-retry = "test-cov --reruns 3 --reruns-delay 30 -x" cov-report = ["- coverage combine", "coverage report"] cov = ["test-cov", "cov-report"] +cov-retry = ["test-cov-retry", "cov-report"] docs = ["pydoc-markdown pydoc/config.yml"] [[tool.hatch.envs.all.matrix]] diff --git a/integrations/weaviate/pyproject.toml b/integrations/weaviate/pyproject.toml index b0aa15143..14b60fe12 100644 --- a/integrations/weaviate/pyproject.toml +++ b/integrations/weaviate/pyproject.toml @@ -49,10 +49,12 @@ git_describe_command = 'git describe --tags --match="integrations/weaviate-v[0-9 [tool.hatch.envs.default] dependencies = ["coverage[toml]>=6.5", "pytest", "pytest-rerunfailures", "ipython"] [tool.hatch.envs.default.scripts] -test = "pytest --reruns 3 --reruns-delay 30 -x {args:tests}" -test-cov = "coverage run -m pytest --reruns 3 --reruns-delay 30 -x {args:tests}" +test = "pytest {args:tests}" +test-cov = "coverage run -m pytest {args:tests}" +test-cov-retry = "test-cov --reruns 3 --reruns-delay 30 -x" cov-report = ["- coverage combine", "coverage report"] cov = ["test-cov", "cov-report"] +cov-retry = ["test-cov-retry", "cov-report"] docs = ["pydoc-markdown pydoc/config.yml"] [[tool.hatch.envs.all.matrix]] From 0f1452ac3d7fcec1142473f5049cd0266b52784a Mon Sep 17 00:00:00 2001 From: "David S. Batista" Date: Mon, 12 Aug 2024 17:46:03 +0200 Subject: [PATCH 5/6] refactor: change meta data fields (#911) * initial import * formatting * fixing tests * removing warnings * linting issues * fixes due to conflicts --- .../amazon_bedrock/chat/chat_generator.py | 13 ++++++------- .../generators/anthropic/chat/chat_generator.py | 14 +++++++------- 2 files changed, 13 insertions(+), 14 deletions(-) diff --git a/integrations/amazon_bedrock/src/haystack_integrations/components/generators/amazon_bedrock/chat/chat_generator.py b/integrations/amazon_bedrock/src/haystack_integrations/components/generators/amazon_bedrock/chat/chat_generator.py index 206cb0b9a..f7bb0ba23 100644 --- a/integrations/amazon_bedrock/src/haystack_integrations/components/generators/amazon_bedrock/chat/chat_generator.py +++ b/integrations/amazon_bedrock/src/haystack_integrations/components/generators/amazon_bedrock/chat/chat_generator.py @@ -1,7 +1,6 @@ import json import logging import re -import warnings from typing import Any, Callable, ClassVar, Dict, List, Optional, Type from botocore.exceptions import ClientError @@ -150,12 +149,6 @@ def resolve_secret(secret: Optional[Secret]) -> Optional[str]: self.stop_words = stop_words or [] self.streaming_callback = streaming_callback - warnings.warn( - "The `meta` output of the AmazonBedrockChatGenerator will change in the next release to be inline with " - "OpenAI `meta`output keys.", - stacklevel=2, - ) - @component.output_types(replies=List[ChatMessage]) def run( self, @@ -210,6 +203,12 @@ def run( msg = f"Could not inference Amazon Bedrock model {self.model} due: {exception}" raise AmazonBedrockInferenceError(msg) from exception + # rename the meta key to be inline with OpenAI meta output keys + for response in replies: + if response.meta is not None and "usage" in response.meta: + response.meta["usage"]["prompt_tokens"] = response.meta["usage"].pop("input_tokens") + response.meta["usage"]["completion_tokens"] = response.meta["usage"].pop("output_tokens") + return {"replies": replies} @classmethod diff --git a/integrations/anthropic/src/haystack_integrations/components/generators/anthropic/chat/chat_generator.py b/integrations/anthropic/src/haystack_integrations/components/generators/anthropic/chat/chat_generator.py index 06b3dc353..9954f08c5 100644 --- a/integrations/anthropic/src/haystack_integrations/components/generators/anthropic/chat/chat_generator.py +++ b/integrations/anthropic/src/haystack_integrations/components/generators/anthropic/chat/chat_generator.py @@ -1,6 +1,5 @@ import dataclasses import json -import warnings from typing import Any, Callable, ClassVar, Dict, List, Optional, Union from haystack import component, default_from_dict, default_to_dict, logging @@ -115,12 +114,6 @@ def __init__( self.client = Anthropic(api_key=self.api_key.resolve_value()) self.ignore_tools_thinking_messages = ignore_tools_thinking_messages - warnings.warn( - "The `meta` output of the AnthropicChatGenerator will change in the next release to be inline with " - "OpenAI `meta`output keys.", - stacklevel=2, - ) - def _get_telemetry_data(self) -> Dict[str, Any]: """ Data that is sent to Posthog for usage analytics. @@ -220,6 +213,7 @@ def run(self, messages: List[ChatMessage], generation_kwargs: Optional[Dict[str, # capture stop reason and stop sequence delta = stream_event completions = [self._connect_chunks(chunks, start_event, delta)] + # if streaming is disabled, the response is an Anthropic Message elif isinstance(response, Message): has_tools_msgs = any(isinstance(content_block, ToolUseBlock) for content_block in response.content) @@ -227,6 +221,12 @@ def run(self, messages: List[ChatMessage], generation_kwargs: Optional[Dict[str, response.content = [block for block in response.content if isinstance(block, ToolUseBlock)] completions = [self._build_message(content_block, response) for content_block in response.content] + # rename the meta key to be inline with OpenAI meta output keys + for response in completions: + if response.meta is not None and "usage" in response.meta: + response.meta["usage"]["prompt_tokens"] = response.meta["usage"].pop("input_tokens") + response.meta["usage"]["completion_tokens"] = response.meta["usage"].pop("output_tokens") + return {"replies": completions} def _build_message(self, content_block: Union[TextBlock, ToolUseBlock], message: Message) -> ChatMessage: From 0451e6f43ad5731887ab0aa2aa8e4de9020913e6 Mon Sep 17 00:00:00 2001 From: HaystackBot Date: Mon, 12 Aug 2024 15:56:05 +0000 Subject: [PATCH 6/6] Update the changelog --- integrations/amazon_bedrock/CHANGELOG.md | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/integrations/amazon_bedrock/CHANGELOG.md b/integrations/amazon_bedrock/CHANGELOG.md index 33a1598a8..d347c08d9 100644 --- a/integrations/amazon_bedrock/CHANGELOG.md +++ b/integrations/amazon_bedrock/CHANGELOG.md @@ -1,5 +1,15 @@ # Changelog +## [integrations/amazon_bedrock-v1.0.0] - 2024-08-12 + +### ๐Ÿšœ Refactor + +- Change meta data fields (#911) + +### ๐Ÿงช Testing + +- Do not retry tests in `hatch run test` command (#954) + ## [integrations/amazon_bedrock-v0.10.0] - 2024-08-12 ### ๐Ÿ› Bug Fixes