Merge branch 'main' into azure_converter

deepset-ai · Apr 8, 2024 · 8347a14 · 8347a14
2 parents ca9c8b9 + be77e85
commit 8347a14
Show file tree

Hide file tree

Showing 87 changed files with 4,345 additions and 322 deletions.
diff --git a/.github/workflows/e2e.yml b/.github/workflows/e2e.yml
@@ -36,3 +36,23 @@ jobs:
 
     - name: Run tests
       run: hatch run test:e2e
+
+    - name: Send event to Datadog
+      if: failure() && github.event_name == 'schedule'
+      uses: masci/datadog@v1
+      with:
+        api-key: ${{ secrets.CORE_DATADOG_API_KEY }}
+        api-url: https://api.datadoghq.eu
+        events: |
+          - title: "${{ github.workflow }} workflow"
+            text: "Job ${{ github.job }} in branch ${{ github.ref_name }}"
+            alert_type: "error"
+            source_type_name: "Github"
+            host: ${{ github.repository_owner }}
+            tags:
+              - "project:${{ github.repository }}"
+              - "job:${{ github.job }}"
+              - "run_id:${{ github.run_id }}"
+              - "workflow:${{ github.workflow }}"
+              - "branch:${{ github.ref_name }}"
+              - "url:https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}"
diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml
@@ -27,7 +27,7 @@ jobs:
 
       - name: Get changed files
         id: files
-        uses: tj-actions/changed-files@v43
+        uses: tj-actions/changed-files@v44
         with:
           files: |
             **/*.py
@@ -59,7 +59,7 @@ jobs:
 
       - name: Get changed files
         id: files
-        uses: tj-actions/changed-files@v43
+        uses: tj-actions/changed-files@v44
         with:
           files: |
             haystack/**/*.py

diff --git a/.github/workflows/project.yml b/.github/workflows/project.yml
@@ -10,7 +10,7 @@ jobs:
     name: Add new issues to project for triage
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/add-to-project@v0.6.1
+      - uses: actions/add-to-project@v1.0.0
         with:
           project-url: https://github.com/orgs/deepset-ai/projects/5
           github-token: ${{ secrets.GH_PROJECT_PAT }}
diff --git a/.github/workflows/release_notes.yml b/.github/workflows/release_notes.yml
@@ -27,7 +27,7 @@ jobs:
 
       - name: Get release note files
         id: changed-files
-        uses: tj-actions/changed-files@v43
+        uses: tj-actions/changed-files@v44
         with:
           files: releasenotes/notes/*.yaml
 

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -28,6 +28,7 @@ env:
   AZURE_OPENAI_API_KEY: ${{ secrets.AZURE_OPENAI_API_KEY }}
   AZURE_OPENAI_ENDPOINT: ${{ secrets.AZURE_OPENAI_ENDPOINT }}
   GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+  HF_API_TOKEN: ${{ secrets.HUGGINGFACE_API_KEY }}
   PYTHON_VERSION: "3.8"
   HATCH_VERSION: "1.9.3"
 

diff --git a/docker/README.md b/docker/README.md
@@ -1,25 +1,19 @@
 <p align="center">
-  <a href="https://www.deepset.ai/haystack/"><img src="https://raw.githubusercontent.com/deepset-ai/haystack/main/docs/img/haystack_logo_colored.png" alt="Haystack"></a>
+  <a href="https://haystack.deepset.ai/"><img src="https://raw.githubusercontent.com/deepset-ai/.github/main/haystack-logo-colored.png" alt="Haystack by deepset"></a>
 </p>
 
-Haystack is an end-to-end framework that enables you to build powerful and production-ready
-pipelines for different search use cases. The Docker image comes with a web service
-configured to serve Haystack's `rest_api` to ease pipeline deployments in containerized
-environments.
+[Haystack](https://github.com/deepset-ai/haystack) is an end-to-end LLM framework that allows you to build applications powered by LLMs, Transformer models, vector search and more. Whether you want to perform retrieval-augmented generation (RAG), document search, question answering or answer generation, Haystack can orchestrate state-of-the-art embedding models and LLMs into pipelines to build end-to-end NLP applications and solve your use case.
 
-To start the Docker container binding the TCP port `8000` locally, run:
-```sh
-docker run -p 8000:8000 deepset/haystack
-```
+## Haystack 2.0
 
-If you need the container to access other services available in the host, run:
-```sh
-docker run -p 8000:8000 --network="host" deepset/haystack
-```
+For the latest version of Haystack there's only one image available:
+
+- `haystack:base-<version>` contains a working Python environment with Haystack preinstalled. This image is expected to
+  be derived `FROM`.
 
-## Image Variants
+## Haystack 1.x image variants
 
-The Docker image comes in six variants:
+The Docker image for Haystack 1.x comes in six variants:
 - `haystack:gpu-<version>` contains Haystack dependencies as well as what's needed to run the REST API and UI. It comes with the CUDA runtime and is capable of running on GPUs.
 - `haystack:cpu-remote-inference-<version>` is a slimmed down version of the CPU image with the REST API and UI. It is specifically designed for PromptNode inferencing using remotely hosted models, such as Hugging Face Inference, OpenAI, Cohere, Anthropic, and similar.
 - `haystack:cpu-<version>` contains Haystack dependencies as well as what's needed to run the REST API and UI. It has no support for GPU so must be run on CPU.

diff --git a/docs/pydoc/config/embedders_api.yml b/docs/pydoc/config/embedders_api.yml
@@ -7,6 +7,8 @@ loaders:
         "azure_text_embedder",
         "hugging_face_tei_document_embedder",
         "hugging_face_tei_text_embedder",
+        "hugging_face_api_document_embedder",
+        "hugging_face_api_text_embedder",
         "openai_document_embedder",
         "openai_text_embedder",
         "sentence_transformers_document_embedder",

diff --git a/docs/pydoc/config/generators_api.yml b/docs/pydoc/config/generators_api.yml
@@ -6,10 +6,12 @@ loaders:
         "azure",
         "hugging_face_local",
         "hugging_face_tgi",
+        "hugging_face_api",
         "openai",
         "chat/azure",
         "chat/hugging_face_local",
         "chat/hugging_face_tgi",
+        "chat/hugging_face_api",
         "chat/openai",
       ]
     ignore_when_discovered: ["__init__"]

diff --git a/docs/pydoc/config/routers_api.yml b/docs/pydoc/config/routers_api.yml
@@ -7,6 +7,7 @@ loaders:
         "file_type_router",
         "metadata_router",
         "text_language_router",
+        "zero_shot_text_router",
       ]
     ignore_when_discovered: ["__init__"]
 processors:

diff --git a/examples/retrievers/in_memory_bm25_documentsearch.py b/examples/retrievers/in_memory_bm25_documentsearch.py
diff --git a/haystack/components/builders/dynamic_chat_prompt_builder.py b/haystack/components/builders/dynamic_chat_prompt_builder.py
@@ -12,9 +12,9 @@
 class DynamicChatPromptBuilder:
     """
     DynamicChatPromptBuilder is designed to construct dynamic prompts from a list of `ChatMessage` instances. It
-    integrates with Jinja2 templating for dynamic prompt generation. It assumes that the last user message in the list
-    contains a template and renders it with variables provided to the constructor. Additional template variables
-    can be feed into the pipeline `run` method and will be merged before rendering the template.
+    integrates with Jinja2 templating for dynamic prompt generation. It considers any user or system message in the list
+    potentially containing a template and renders it with variables provided to the constructor. Additional template
+    variables can be feed into the component/pipeline `run` method and will be merged before rendering the template.
 
     Usage example:
     ```python
@@ -34,11 +34,12 @@ class DynamicChatPromptBuilder:
     pipe.connect("prompt_builder.prompt", "llm.messages")
 
     location = "Berlin"
-    system_message = ChatMessage.from_system("You are a helpful assistant giving out valuable information to tourists.")
+    language = "English"
+    system_message = ChatMessage.from_system("You are an assistant giving information to tourists in {{language}}")
     messages = [system_message, ChatMessage.from_user("Tell me about {{location}}")]
 
-
-    res = pipe.run(data={"prompt_builder": {"template_variables": {"location": location}, "prompt_source": messages}})
+    res = pipe.run(data={"prompt_builder": {"template_variables": {"location": location, "language": language},
+                                        "prompt_source": messages}})
     print(res)
 
     >> {'llm': {'replies': [ChatMessage(content="Berlin is the capital city of Germany and one of the most vibrant
@@ -91,48 +92,22 @@ def __init__(self, runtime_variables: Optional[List[str]] = None):
     def run(self, prompt_source: List[ChatMessage], template_variables: Optional[Dict[str, Any]] = None, **kwargs):
         """
         Executes the dynamic prompt building process by processing a list of `ChatMessage` instances.
-        The last user message is treated as a template and rendered with the variables provided to the constructor.
-        You can provide additional template variables directly to this method, which are then merged with the variables
-        provided to the constructor.
+        Any user message or system message is inspected for templates and rendered with the variables provided to the
+        constructor. You can provide additional template variables directly to this method, which are then merged with
+        the variables provided to the constructor.
 
         :param prompt_source:
-            A list of `ChatMessage` instances. We make an assumption that the last user message has
-            the template for the chat prompt
+            A list of `ChatMessage` instances. All user and system messages are treated as potentially having templates
+            and are rendered with the provided template variables - if templates are found.
         :param template_variables:
             A dictionary of template variables. Template variables provided at initialization are required
             to resolve pipeline variables, and these are additional variables users can provide directly to this method.
         :param kwargs:
-            Additional keyword arguments, typically resolved from a pipeline, which are merged with the provided template variables.
+            Additional keyword arguments, typically resolved from a pipeline, which are merged with the provided
+            template variables.
 
         :returns: A dictionary with the following keys:
-            - `prompt`: The updated list of `ChatMessage` instances after rendering the string template.
-        """
-        kwargs = kwargs or {}
-        template_variables = template_variables or {}
-        template_variables_combined = {**kwargs, **template_variables}
-        if not template_variables_combined:
-            logger.warning(
-                "The DynamicChatPromptBuilder run method requires template variables, but none were provided. "
-                "Please provide an appropriate template variable to enable correct prompt generation."
-            )
-        result: List[ChatMessage] = self._process_chat_messages(prompt_source, template_variables_combined)
-        return {"prompt": result}
-
-    def _process_chat_messages(self, prompt_source: List[ChatMessage], template_variables: Dict[str, Any]):
-        """
-        Processes a list of :class:`ChatMessage` instances to generate a chat prompt.
-
-        It takes the last user message in the list, treats it as a template, and renders it with the provided
-        template variables. The resulting message replaces the last user message in the list, forming a complete,
-        templated chat prompt.
-
-        :param prompt_source:
-            A list of `ChatMessage` instances to be processed. The last message is expected
-            to be from a user and is treated as a template.
-        :param template_variables:
-            A dictionary of template variables used for rendering the last user message.
-        :returns:
-            A list of `ChatMessage` instances, where the last user message has been replaced with its
+            - `prompt`: The updated list of `ChatMessage` instances after rendering the found templates.
         :raises ValueError:
             If `chat_messages` is empty or contains elements that are not instances of `ChatMessage`.
         :raises ValueError:
@@ -150,17 +125,28 @@ def _process_chat_messages(self, prompt_source: List[ChatMessage], template_vari
                 f"are ChatMessage instances."
             )
 
-        last_message: ChatMessage = prompt_source[-1]
-        if last_message.is_from(ChatRole.USER):
-            template = self._validate_template(last_message.content, set(template_variables.keys()))
-            templated_user_message = ChatMessage.from_user(template.render(template_variables))
-            return prompt_source[:-1] + [templated_user_message]
-        else:
+        kwargs = kwargs or {}
+        template_variables = template_variables or {}
+        template_variables = {**kwargs, **template_variables}
+        if not template_variables:
             logger.warning(
-                "DynamicChatPromptBuilder was not provided with a user message as the last message in "
-                "chat conversation, no templating will be applied."
+                "The DynamicChatPromptBuilder run method requires template variables, but none were provided. "
+                "Please provide an appropriate template variable to enable correct prompt generation."
             )
-            return prompt_source
+        processed_messages = []
+        for message in prompt_source:
+            if message.is_from(ChatRole.USER) or message.is_from(ChatRole.SYSTEM):
+                template = self._validate_template(message.content, set(template_variables.keys()))
+                rendered_content = template.render(template_variables)
+                rendered_message = (
+                    ChatMessage.from_user(rendered_content)
+                    if message.is_from(ChatRole.USER)
+                    else ChatMessage.from_system(rendered_content)
+                )
+                processed_messages.append(rendered_message)
+            else:
+                processed_messages.append(message)
+        return {"prompt": processed_messages}
 
     def _validate_template(self, template_text: str, provided_variables: Set[str]):
         """

diff --git a/haystack/components/builders/prompt_builder.py b/haystack/components/builders/prompt_builder.py
@@ -9,7 +9,9 @@
 class PromptBuilder:
     """
     PromptBuilder is a component that renders a prompt from a template string using Jinja2 templates.
-    The template variables found in the template string are used as input types for the component and are all required.
+
+    The template variables found in the template string are used as input types for the component and are all optional.
+    If a template variable is not provided as an input, it will be replaced with an empty string in the rendered prompt.
 
     Usage example:
     ```python