From fee5178dd0d1b3f535922dda59e41f762a41ab4d Mon Sep 17 00:00:00 2001 From: shadeMe Date: Mon, 25 Nov 2024 15:30:50 +0100 Subject: [PATCH 1/2] feat: Add support for the new `ChatMessage` data class in `ChatPromptBuilder` --- docs/pydoc/config/builders_api.yml | 27 + .../components/builders/__init__.py | 7 + .../builders/chat_prompt_builder.py | 270 +++++++ .../dataclasses/chat_message.py | 33 +- test/components/builders/__init__.py | 3 + .../builders/test_chat_prompt_builder.py | 704 ++++++++++++++++++ 6 files changed, 1038 insertions(+), 6 deletions(-) create mode 100644 docs/pydoc/config/builders_api.yml create mode 100644 haystack_experimental/components/builders/__init__.py create mode 100644 haystack_experimental/components/builders/chat_prompt_builder.py create mode 100644 test/components/builders/__init__.py create mode 100644 test/components/builders/test_chat_prompt_builder.py diff --git a/docs/pydoc/config/builders_api.yml b/docs/pydoc/config/builders_api.yml new file mode 100644 index 00000000..0462d135 --- /dev/null +++ b/docs/pydoc/config/builders_api.yml @@ -0,0 +1,27 @@ +loaders: + - type: haystack_pydoc_tools.loaders.CustomPythonLoader + search_path: [../../../] + modules: ["haystack_experimental.components.builders.chat_prompt_builder"] + ignore_when_discovered: ["__init__"] +processors: + - type: filter + expression: + documented_only: true + do_not_filter_modules: false + skip_empty_modules: true + - type: smart + - type: crossref +renderer: + type: haystack_pydoc_tools.renderers.ReadmeCoreRenderer + excerpt: Extract the output of a Generator to an Answer format, and build prompts. + category_slug: experiments-api + title: Builders + slug: experimental-builders-api + order: 160 + markdown: + descriptive_class_title: false + classdef_code_block: false + descriptive_module_title: true + add_method_class_prefix: true + add_member_class_prefix: false + filename: experimental_builders_api.md diff --git a/haystack_experimental/components/builders/__init__.py b/haystack_experimental/components/builders/__init__.py new file mode 100644 index 00000000..e7aeffa1 --- /dev/null +++ b/haystack_experimental/components/builders/__init__.py @@ -0,0 +1,7 @@ +# SPDX-FileCopyrightText: 2022-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 + +from .chat_prompt_builder import ChatPromptBuilder + +__all__ = ["ChatPromptBuilder"] diff --git a/haystack_experimental/components/builders/chat_prompt_builder.py b/haystack_experimental/components/builders/chat_prompt_builder.py new file mode 100644 index 00000000..d1c6f7b0 --- /dev/null +++ b/haystack_experimental/components/builders/chat_prompt_builder.py @@ -0,0 +1,270 @@ +# SPDX-FileCopyrightText: 2022-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 + +from copy import deepcopy +from typing import Any, Dict, List, Optional, Set + +from haystack import component, default_from_dict, default_to_dict, logging +from jinja2 import meta +from jinja2.sandbox import SandboxedEnvironment + +from haystack_experimental.dataclasses.chat_message import ( + ChatMessage, + ChatRole, + TextContent, +) + +logger = logging.getLogger(__name__) + + +@component +class ChatPromptBuilder: + """ + Renders a chat prompt from a template string using Jinja2 syntax. + + It constructs prompts using static or dynamic templates, which you can update for each pipeline run. + + Template variables in the template are optional unless specified otherwise. + If an optional variable isn't provided, it defaults to an empty string. Use `variable` and `required_variables` + to define input types and required variables. + + ### Usage examples + + #### With static prompt template + + ```python + template = [ChatMessage.from_user("Translate to {{ target_language }}. Context: {{ snippet }}; Translation:")] + builder = ChatPromptBuilder(template=template) + builder.run(target_language="spanish", snippet="I can't speak spanish.") + ``` + + #### Overriding static template at runtime + + ```python + template = [ChatMessage.from_user("Translate to {{ target_language }}. Context: {{ snippet }}; Translation:")] + builder = ChatPromptBuilder(template=template) + builder.run(target_language="spanish", snippet="I can't speak spanish.") + + msg = "Translate to {{ target_language }} and summarize. Context: {{ snippet }}; Summary:" + summary_template = [ChatMessage.from_user(msg)] + builder.run(target_language="spanish", snippet="I can't speak spanish.", template=summary_template) + ``` + + #### With dynamic prompt template + + ```python + from haystack.components.builders import ChatPromptBuilder + from haystack.components.generators.chat import OpenAIChatGenerator + from haystack.dataclasses import ChatMessage + from haystack import Pipeline + from haystack.utils import Secret + + # no parameter init, we don't use any runtime template variables + prompt_builder = ChatPromptBuilder() + llm = OpenAIChatGenerator(api_key=Secret.from_token(""), model="gpt-4o-mini") + + pipe = Pipeline() + pipe.add_component("prompt_builder", prompt_builder) + pipe.add_component("llm", llm) + pipe.connect("prompt_builder.prompt", "llm.messages") + + location = "Berlin" + language = "English" + system_message = ChatMessage.from_system("You are an assistant giving information to tourists in {{language}}") + messages = [system_message, ChatMessage.from_user("Tell me about {{location}}")] + + res = pipe.run(data={"prompt_builder": {"template_variables": {"location": location, "language": language}, + "template": messages}}) + print(res) + + >> {'llm': {'replies': [ChatMessage(content="Berlin is the capital city of Germany and one of the most vibrant + and diverse cities in Europe. Here are some key things to know...Enjoy your time exploring the vibrant and dynamic + capital of Germany!", role=, name=None, meta={'model': 'gpt-4o-mini', + 'index': 0, 'finish_reason': 'stop', 'usage': {'prompt_tokens': 27, 'completion_tokens': 681, 'total_tokens': + 708}})]}} + + + messages = [system_message, ChatMessage.from_user("What's the weather forecast for {{location}} in the next + {{day_count}} days?")] + + res = pipe.run(data={"prompt_builder": {"template_variables": {"location": location, "day_count": "5"}, + "template": messages}}) + + print(res) + >> {'llm': {'replies': [ChatMessage(content="Here is the weather forecast for Berlin in the next 5 + days:\\n\\nDay 1: Mostly cloudy with a high of 22°C (72°F) and...so it's always a good idea to check for updates + closer to your visit.", role=, name=None, meta={'model': 'gpt-4o-mini', + 'index': 0, 'finish_reason': 'stop', 'usage': {'prompt_tokens': 37, 'completion_tokens': 201, + 'total_tokens': 238}})]}} + ``` + + """ + + def __init__( + self, + template: Optional[List[ChatMessage]] = None, + required_variables: Optional[List[str]] = None, + variables: Optional[List[str]] = None, + ): + """ + Constructs a ChatPromptBuilder component. + + :param template: + A list of `ChatMessage` objects. The component looks for Jinja2 template syntax and + renders the prompt with the provided variables. Provide the template in either + the `init` method` or the `run` method. + :param required_variables: + List variables that must be provided as input to ChatPromptBuilder. + If a variable listed as required is not provided, an exception is raised. Optional. + :param variables: + List input variables to use in prompt templates instead of the ones inferred from the + `template` parameter. For example, to use more variables during prompt engineering than the ones present + in the default template, you can provide them here. + """ + self._variables = variables + self._required_variables = required_variables + self.required_variables = required_variables or [] + self.template = template + variables = variables or [] + self._env = SandboxedEnvironment() + if template and not variables: + for message in template: + if message.is_from(ChatRole.USER) or message.is_from(ChatRole.SYSTEM): + # infere variables from template + if message.text is None: + raise ValueError( + f"The {self.__class__.__name__} requires a non-empty list of ChatMessage" + " instances with text content." + ) + ast = self._env.parse(message.text) + template_variables = meta.find_undeclared_variables(ast) + variables += list(template_variables) + + # setup inputs + for var in variables: + if var in self.required_variables: + component.set_input_type(self, var, Any) + else: + component.set_input_type(self, var, Any, "") + + @component.output_types(prompt=List[ChatMessage]) + def run( + self, + template: Optional[List[ChatMessage]] = None, + template_variables: Optional[Dict[str, Any]] = None, + **kwargs, + ): + """ + Renders the prompt template with the provided variables. + + It applies the template variables to render the final prompt. You can provide variables with pipeline kwargs. + To overwrite the default template, you can set the `template` parameter. + To overwrite pipeline kwargs, you can set the `template_variables` parameter. + + :param template: + An optional list of `ChatMessage` objects to overwrite ChatPromptBuilder's default template. + If `None`, the default template provided at initialization is used. + :param template_variables: + An optional dictionary of template variables to overwrite the pipeline variables. + :param kwargs: + Pipeline variables used for rendering the prompt. + + :returns: A dictionary with the following keys: + - `prompt`: The updated list of `ChatMessage` objects after rendering the templates. + :raises ValueError: + If `chat_messages` is empty or contains elements that are not instances of `ChatMessage`. + """ + kwargs = kwargs or {} + template_variables = template_variables or {} + template_variables_combined = {**kwargs, **template_variables} + + if template is None: + template = self.template + + if not template: + raise ValueError( + f"The {self.__class__.__name__} requires a non-empty list of ChatMessage instances. " + f"Please provide a valid list of ChatMessage instances to render the prompt." + ) + + if not all(isinstance(message, ChatMessage) and message.text is not None for message in template): + raise ValueError( + f"The {self.__class__.__name__} expects a list containing only ChatMessage instances " + f"with text content. The provided list contains other types. Please ensure that all " + "elements in the list are ChatMessage instances." + ) + + processed_messages = [] + for message in template: + if message.is_from(ChatRole.USER) or message.is_from(ChatRole.SYSTEM): + self._validate_variables(set(template_variables_combined.keys())) + + assert message.text is not None + compiled_template = self._env.from_string(message.text) + rendered_content = compiled_template.render(template_variables_combined) + + rendered_message = ChatMessage( + message.role, + [TextContent(rendered_content)], + deepcopy(message.meta), + ) + processed_messages.append(rendered_message) + else: + processed_messages.append(message) + + return {"prompt": processed_messages} + + def _validate_variables(self, provided_variables: Set[str]): + """ + Checks if all the required template variables are provided. + + :param provided_variables: + A set of provided template variables. + :raises ValueError: + If no template is provided or if all the required template variables are not provided. + """ + missing_variables = [var for var in self.required_variables if var not in provided_variables] + if missing_variables: + missing_vars_str = ", ".join(missing_variables) + raise ValueError( + f"Missing required input variables in ChatPromptBuilder: {missing_vars_str}. " + f"Required variables: {self.required_variables}. Provided variables: {provided_variables}." + ) + + def to_dict(self) -> Dict[str, Any]: + """ + Returns a dictionary representation of the component. + + :returns: + Serialized dictionary representation of the component. + """ + if self.template is not None: + template = [m.to_dict() for m in self.template] + else: + template = None + + return default_to_dict( + self, + template=template, + variables=self._variables, + required_variables=self._required_variables, + ) + + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> "ChatPromptBuilder": + """ + Deserialize this component from a dictionary. + + :param data: + The dictionary to deserialize and create the component. + + :returns: + The deserialized component. + """ + init_parameters = data["init_parameters"] + template = init_parameters.get("template") + if template: + init_parameters["template"] = [ChatMessage.from_dict(d) for d in template] + + return default_from_dict(cls, data) diff --git a/haystack_experimental/dataclasses/chat_message.py b/haystack_experimental/dataclasses/chat_message.py index 9f139af1..8490483a 100644 --- a/haystack_experimental/dataclasses/chat_message.py +++ b/haystack_experimental/dataclasses/chat_message.py @@ -158,24 +158,34 @@ def is_from(self, role: ChatRole) -> bool: return self._role == role @classmethod - def from_user(cls, text: str) -> "ChatMessage": + def from_user( + cls, + text: str, + meta: Optional[Dict[str, Any]] = None, + ) -> "ChatMessage": """ Create a message from the user. :param text: The text content of the message. + :param meta: Additional metadata associated with the message. :returns: A new ChatMessage instance. """ - return cls(_role=ChatRole.USER, _content=[TextContent(text=text)]) + return cls(_role=ChatRole.USER, _content=[TextContent(text=text)], _meta=meta or {}) @classmethod - def from_system(cls, text: str) -> "ChatMessage": + def from_system( + cls, + text: str, + meta: Optional[Dict[str, Any]] = None, + ) -> "ChatMessage": """ Create a message from the system. :param text: The text content of the message. + :param meta: Additional metadata associated with the message. :returns: A new ChatMessage instance. """ - return cls(_role=ChatRole.SYSTEM, _content=[TextContent(text=text)]) + return cls(_role=ChatRole.SYSTEM, _content=[TextContent(text=text)], _meta=meta or {}) @classmethod def from_assistant( @@ -201,16 +211,27 @@ def from_assistant( return cls(_role=ChatRole.ASSISTANT, _content=content, _meta=meta or {}) @classmethod - def from_tool(cls, tool_result: str, origin: ToolCall, error: bool = False) -> "ChatMessage": + def from_tool( + cls, + tool_result: str, + origin: ToolCall, + error: bool = False, + meta: Optional[Dict[str, Any]] = None, + ) -> "ChatMessage": """ Create a message from a Tool. :param tool_result: The result of the Tool invocation. :param origin: The Tool call that produced this result. :param error: Whether the Tool invocation resulted in an error. + :param meta: Additional metadata associated with the message. :returns: A new ChatMessage instance. """ - return cls(_role=ChatRole.TOOL, _content=[ToolCallResult(result=tool_result, origin=origin, error=error)]) + return cls( + _role=ChatRole.TOOL, + _content=[ToolCallResult(result=tool_result, origin=origin, error=error)], + _meta=meta or {}, + ) def to_dict(self) -> Dict[str, Any]: """ diff --git a/test/components/builders/__init__.py b/test/components/builders/__init__.py new file mode 100644 index 00000000..c1764a6e --- /dev/null +++ b/test/components/builders/__init__.py @@ -0,0 +1,3 @@ +# SPDX-FileCopyrightText: 2022-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 diff --git a/test/components/builders/test_chat_prompt_builder.py b/test/components/builders/test_chat_prompt_builder.py new file mode 100644 index 00000000..703b2702 --- /dev/null +++ b/test/components/builders/test_chat_prompt_builder.py @@ -0,0 +1,704 @@ +from typing import Any, Dict, List, Optional +from jinja2 import TemplateSyntaxError +import pytest + +from haystack_experimental.components.builders.chat_prompt_builder import ( + ChatPromptBuilder, +) +from haystack import component +from haystack.core.pipeline.pipeline import Pipeline +from haystack_experimental.dataclasses.chat_message import ChatMessage, ChatRole +from haystack.dataclasses.document import Document + + +class TestChatPromptBuilder: + def test_init(self): + builder = ChatPromptBuilder( + template=[ + ChatMessage.from_user(text="This is a {{ variable }}"), + ChatMessage.from_system(text="This is a {{ variable2 }}"), + ] + ) + assert builder.required_variables == [] + assert builder.template[0].text == "This is a {{ variable }}" + assert builder.template[1].text == "This is a {{ variable2 }}" + assert builder._variables is None + assert builder._required_variables is None + + # we have inputs that contain: template, template_variables + inferred variables + inputs = builder.__haystack_input__._sockets_dict + assert set(inputs.keys()) == { + "template", + "template_variables", + "variable", + "variable2", + } + assert inputs["template"].type == Optional[List[ChatMessage]] + assert inputs["template_variables"].type == Optional[Dict[str, Any]] + assert inputs["variable"].type == Any + assert inputs["variable2"].type == Any + + # response is always prompt + outputs = builder.__haystack_output__._sockets_dict + assert set(outputs.keys()) == {"prompt"} + assert outputs["prompt"].type == List[ChatMessage] + + def test_init_without_template(self): + variables = ["var1", "var2"] + builder = ChatPromptBuilder(variables=variables) + assert builder.template is None + assert builder.required_variables == [] + assert builder._variables == variables + assert builder._required_variables is None + + # we have inputs that contain: template, template_variables + variables + inputs = builder.__haystack_input__._sockets_dict + assert set(inputs.keys()) == {"template", "template_variables", "var1", "var2"} + assert inputs["template"].type == Optional[List[ChatMessage]] + assert inputs["template_variables"].type == Optional[Dict[str, Any]] + assert inputs["var1"].type == Any + assert inputs["var2"].type == Any + + # response is always prompt + outputs = builder.__haystack_output__._sockets_dict + assert set(outputs.keys()) == {"prompt"} + assert outputs["prompt"].type == List[ChatMessage] + + def test_init_with_required_variables(self): + builder = ChatPromptBuilder( + template=[ChatMessage.from_user("This is a {{ variable }}")], + required_variables=["variable"], + ) + assert builder.required_variables == ["variable"] + assert builder.template[0].text == "This is a {{ variable }}" + assert builder._variables is None + assert builder._required_variables == ["variable"] + + # we have inputs that contain: template, template_variables + inferred variables + inputs = builder.__haystack_input__._sockets_dict + assert set(inputs.keys()) == {"template", "template_variables", "variable"} + assert inputs["template"].type == Optional[List[ChatMessage]] + assert inputs["template_variables"].type == Optional[Dict[str, Any]] + assert inputs["variable"].type == Any + + # response is always prompt + outputs = builder.__haystack_output__._sockets_dict + assert set(outputs.keys()) == {"prompt"} + assert outputs["prompt"].type == List[ChatMessage] + + def test_init_with_custom_variables(self): + variables = ["var1", "var2", "var3"] + template = [ChatMessage.from_user("Hello, {{ var1 }}, {{ var2 }}!")] + builder = ChatPromptBuilder(template=template, variables=variables) + assert builder.required_variables == [] + assert builder._variables == variables + assert builder.template[0].text == "Hello, {{ var1 }}, {{ var2 }}!" + assert builder._required_variables is None + + # we have inputs that contain: template, template_variables + variables + inputs = builder.__haystack_input__._sockets_dict + assert set(inputs.keys()) == { + "template", + "template_variables", + "var1", + "var2", + "var3", + } + assert inputs["template"].type == Optional[List[ChatMessage]] + assert inputs["template_variables"].type == Optional[Dict[str, Any]] + assert inputs["var1"].type == Any + assert inputs["var2"].type == Any + assert inputs["var3"].type == Any + + # response is always prompt + outputs = builder.__haystack_output__._sockets_dict + assert set(outputs.keys()) == {"prompt"} + assert outputs["prompt"].type == List[ChatMessage] + + def test_run(self): + builder = ChatPromptBuilder( + template=[ChatMessage.from_user("This is a {{ variable }}")] + ) + res = builder.run(variable="test") + assert res == {"prompt": [ChatMessage.from_user("This is a test")]} + + def test_run_template_variable(self): + builder = ChatPromptBuilder( + template=[ChatMessage.from_user("This is a {{ variable }}")] + ) + res = builder.run(template_variables={"variable": "test"}) + assert res == {"prompt": [ChatMessage.from_user("This is a test")]} + + def test_run_template_variable_overrides_variable(self): + builder = ChatPromptBuilder( + template=[ChatMessage.from_user("This is a {{ variable }}")] + ) + res = builder.run( + template_variables={"variable": "test_from_template_var"}, variable="test" + ) + assert res == { + "prompt": [ChatMessage.from_user("This is a test_from_template_var")] + } + + def test_run_without_input(self): + builder = ChatPromptBuilder( + template=[ChatMessage.from_user("This is a template without input")] + ) + res = builder.run() + assert res == { + "prompt": [ChatMessage.from_user("This is a template without input")] + } + + def test_run_with_missing_input(self): + builder = ChatPromptBuilder( + template=[ChatMessage.from_user("This is a {{ variable }}")] + ) + res = builder.run() + assert res == {"prompt": [ChatMessage.from_user("This is a ")]} + + def test_run_with_missing_required_input(self): + builder = ChatPromptBuilder( + template=[ChatMessage.from_user("This is a {{ foo }}, not a {{ bar }}")], + required_variables=["foo", "bar"], + ) + with pytest.raises(ValueError, match="foo"): + builder.run(bar="bar") + with pytest.raises(ValueError, match="bar"): + builder.run(foo="foo") + with pytest.raises(ValueError, match="foo, bar"): + builder.run() + + def test_run_with_variables(self): + variables = ["var1", "var2", "var3"] + template = [ChatMessage.from_user("Hello, {{ name }}! {{ var1 }}")] + + builder = ChatPromptBuilder(template=template, variables=variables) + + template_variables = {"name": "John"} + expected_result = { + "prompt": [ChatMessage.from_user("Hello, John! How are you?")] + } + + assert ( + builder.run(template_variables=template_variables, var1="How are you?") + == expected_result + ) + + def test_run_with_variables_and_runtime_template(self): + variables = ["var1", "var2", "var3"] + + builder = ChatPromptBuilder(variables=variables) + + template = [ChatMessage.from_user("Hello, {{ name }}! {{ var1 }}")] + template_variables = {"name": "John"} + expected_result = { + "prompt": [ChatMessage.from_user("Hello, John! How are you?")] + } + + assert ( + builder.run( + template=template, + template_variables=template_variables, + var1="How are you?", + ) + == expected_result + ) + + def test_run_overwriting_default_template(self): + default_template = [ChatMessage.from_user("Hello, {{ name }}!")] + + builder = ChatPromptBuilder(template=default_template) + + template = [ChatMessage.from_user("Hello, {{ var1 }}{{ name }}!")] + expected_result = {"prompt": [ChatMessage.from_user("Hello, John!")]} + + assert builder.run(template, name="John") == expected_result + + def test_run_overwriting_default_template_with_template_variables(self): + default_template = [ChatMessage.from_user("Hello, {{ name }}!")] + + builder = ChatPromptBuilder(template=default_template) + + template = [ChatMessage.from_user("Hello, {{ var1 }} {{ name }}!")] + template_variables = {"var1": "Big"} + expected_result = {"prompt": [ChatMessage.from_user("Hello, Big John!")]} + + assert builder.run(template, template_variables, name="John") == expected_result + + def test_run_overwriting_default_template_with_variables(self): + variables = ["var1", "var2", "name"] + default_template = [ChatMessage.from_user("Hello, {{ name }}!")] + + builder = ChatPromptBuilder(template=default_template, variables=variables) + + template = [ChatMessage.from_user("Hello, {{ var1 }} {{ name }}!")] + expected_result = {"prompt": [ChatMessage.from_user("Hello, Big John!")]} + + assert builder.run(template, name="John", var1="Big") == expected_result + + def test_run_with_meta(self): + """ + Test that the ChatPromptBuilder correctly handles meta data. + It should render the message and copy the meta data from the original message. + """ + m = ChatMessage.from_user( + text="This is a {{ variable }}", + meta={"test": "test"}, + ) + builder = ChatPromptBuilder(template=[m]) + res = builder.run(variable="test") + res_msg = ChatMessage.from_user( + text="This is a test", + meta={"test": "test"}, + ) + assert res == {"prompt": [res_msg]} + + def test_run_with_invalid_template(self): + builder = ChatPromptBuilder() + + template = [ChatMessage.from_user("Hello, {{ name }!")] + template_variables = {"name": "John"} + with pytest.raises(TemplateSyntaxError): + builder.run(template, template_variables) + + def test_init_with_invalid_template(self): + template = [ChatMessage.from_user("Hello, {{ name }!")] + with pytest.raises(TemplateSyntaxError): + ChatPromptBuilder(template) + + def test_run_without_template(self): + prompt_builder = ChatPromptBuilder() + with pytest.raises( + ValueError, + match="The ChatPromptBuilder requires a non-empty list of ChatMessage instances", + ): + prompt_builder.run() + + def test_run_with_empty_chat_message_list(self): + prompt_builder = ChatPromptBuilder(template=[], variables=["documents"]) + with pytest.raises( + ValueError, + match="The ChatPromptBuilder requires a non-empty list of ChatMessage instances", + ): + prompt_builder.run() + + def test_chat_message_list_with_mixed_object_list(self): + prompt_builder = ChatPromptBuilder( + template=[ChatMessage.from_user("Hello"), "there world"], + variables=["documents"], + ) + with pytest.raises( + ValueError, + match="The ChatPromptBuilder expects a list containing only ChatMessage instances", + ): + prompt_builder.run() + + def test_provided_template_variables(self): + prompt_builder = ChatPromptBuilder( + variables=["documents"], required_variables=["city"] + ) + + # both variables are provided + prompt_builder._validate_variables({"name", "city"}) + + # provided variables are a superset of the required variables + prompt_builder._validate_variables({"name", "city", "age"}) + + with pytest.raises(ValueError): + prompt_builder._validate_variables({"name"}) + + def test_example_in_pipeline(self): + default_template = [ + ChatMessage.from_user( + "Here is the document: {{documents[0].text}} \\n Answer: {{query}}" + ) + ] + prompt_builder = ChatPromptBuilder( + template=default_template, variables=["documents"] + ) + + @component + class DocumentProducer: + @component.output_types(documents=List[Document]) + def run(self, doc_input: str): + return {"documents": [Document(content=doc_input)]} + + pipe = Pipeline() + pipe.add_component("doc_producer", DocumentProducer()) + pipe.add_component("prompt_builder", prompt_builder) + pipe.connect("doc_producer.documents", "prompt_builder.documents") + + template = [ + ChatMessage.from_user( + "Here is the document: {{documents[0].content}} \n Query: {{query}}" + ) + ] + result = pipe.run( + data={ + "doc_producer": {"doc_input": "Hello world, I live in Berlin"}, + "prompt_builder": { + "template": template, + "template_variables": {"query": "Where does the speaker live?"}, + }, + } + ) + + assert result == { + "prompt_builder": { + "prompt": [ + ChatMessage.from_user( + "Here is the document: Hello world, I live in Berlin \n Query: Where does the speaker live?" + ) + ] + } + } + + def test_example_in_pipeline_simple(self): + default_template = [ + ChatMessage.from_user("This is the default prompt:\n Query: {{query}}") + ] + prompt_builder = ChatPromptBuilder(template=default_template) + + pipe = Pipeline() + pipe.add_component("prompt_builder", prompt_builder) + + # using the default prompt + result = pipe.run(data={"query": "Where does the speaker live?"}) + expected_default = { + "prompt_builder": { + "prompt": [ + ChatMessage.from_user( + "This is the default prompt:\n Query: Where does the speaker live?" + ) + ] + } + } + assert result == expected_default + + # using the dynamic prompt + result = pipe.run( + data={ + "query": "Where does the speaker live?", + "template": [ + ChatMessage.from_user( + "This is the dynamic prompt:\n Query: {{query}}" + ) + ], + } + ) + expected_dynamic = { + "prompt_builder": { + "prompt": [ + ChatMessage.from_user( + "This is the dynamic prompt:\n Query: Where does the speaker live?" + ) + ] + } + } + assert result == expected_dynamic + + +class TestChatPromptBuilderDynamic: + def test_multiple_templated_chat_messages(self): + prompt_builder = ChatPromptBuilder() + language = "French" + location = "Berlin" + messages = [ + ChatMessage.from_system( + "Write your response in this language:{{language}}" + ), + ChatMessage.from_user("Tell me about {{location}}"), + ] + + result = prompt_builder.run( + template_variables={"language": language, "location": location}, + template=messages, + ) + assert result["prompt"] == [ + ChatMessage.from_system("Write your response in this language:French"), + ChatMessage.from_user("Tell me about Berlin"), + ], "The templated messages should match the expected output." + + def test_multiple_templated_chat_messages_in_place(self): + prompt_builder = ChatPromptBuilder() + language = "French" + location = "Berlin" + messages = [ + ChatMessage.from_system( + "Write your response ins this language:{{language}}" + ), + ChatMessage.from_user("Tell me about {{location}}"), + ] + + res = prompt_builder.run( + template_variables={"language": language, "location": location}, + template=messages, + ) + assert res == { + "prompt": [ + ChatMessage.from_system("Write your response ins this language:French"), + ChatMessage.from_user("Tell me about Berlin"), + ] + }, "The templated messages should match the expected output." + + def test_some_templated_chat_messages(self): + prompt_builder = ChatPromptBuilder() + language = "English" + location = "Paris" + messages = [ + ChatMessage.from_system( + "Please, respond in the following language: {{language}}." + ), + ChatMessage.from_user("I would like to learn more about {{location}}."), + ChatMessage.from_assistant("Yes, I can help you with that {{subject}}"), + ChatMessage.from_user("Ok so do so please, be elaborate."), + ] + + result = prompt_builder.run( + template_variables={"language": language, "location": location}, + template=messages, + ) + + expected_messages = [ + ChatMessage.from_system( + "Please, respond in the following language: English." + ), + ChatMessage.from_user("I would like to learn more about Paris."), + ChatMessage.from_assistant( + "Yes, I can help you with that {{subject}}" + ), # assistant message should not be templated + ChatMessage.from_user("Ok so do so please, be elaborate."), + ] + + assert ( + result["prompt"] == expected_messages + ), "The templated messages should match the expected output." + + def test_example_in_pipeline(self): + prompt_builder = ChatPromptBuilder() + + pipe = Pipeline() + pipe.add_component("prompt_builder", prompt_builder) + + location = "Berlin" + system_message = ChatMessage.from_system( + "You are a helpful assistant giving out valuable information to tourists." + ) + messages = [system_message, ChatMessage.from_user("Tell me about {{location}}")] + + res = pipe.run( + data={ + "prompt_builder": { + "template_variables": {"location": location}, + "template": messages, + } + } + ) + assert res == { + "prompt_builder": { + "prompt": [ + ChatMessage.from_system( + "You are a helpful assistant giving out valuable information to tourists." + ), + ChatMessage.from_user("Tell me about Berlin"), + ] + } + } + + messages = [ + system_message, + ChatMessage.from_user( + "What's the weather forecast for {{location}} in the next {{day_count}} days?" + ), + ] + + res = pipe.run( + data={ + "prompt_builder": { + "template_variables": {"location": location, "day_count": "5"}, + "template": messages, + } + } + ) + assert res == { + "prompt_builder": { + "prompt": [ + ChatMessage.from_system( + "You are a helpful assistant giving out valuable information to tourists." + ), + ChatMessage.from_user( + "What's the weather forecast for Berlin in the next 5 days?" + ), + ] + } + } + + def test_example_in_pipeline_with_multiple_templated_messages(self): + # no parameter init, we don't use any runtime template variables + prompt_builder = ChatPromptBuilder() + + pipe = Pipeline() + pipe.add_component("prompt_builder", prompt_builder) + + location = "Berlin" + system_message = ChatMessage.from_system( + "You are a helpful assistant giving out valuable information to tourists in {{language}}." + ) + messages = [system_message, ChatMessage.from_user("Tell me about {{location}}")] + + res = pipe.run( + data={ + "prompt_builder": { + "template_variables": {"location": location, "language": "German"}, + "template": messages, + } + } + ) + assert res == { + "prompt_builder": { + "prompt": [ + ChatMessage.from_system( + "You are a helpful assistant giving out valuable information to tourists in German." + ), + ChatMessage.from_user("Tell me about Berlin"), + ] + } + } + + messages = [ + system_message, + ChatMessage.from_user( + "What's the weather forecast for {{location}} in the next {{day_count}} days?" + ), + ] + + res = pipe.run( + data={ + "prompt_builder": { + "template_variables": { + "location": location, + "day_count": "5", + "language": "English", + }, + "template": messages, + } + } + ) + assert res == { + "prompt_builder": { + "prompt": [ + ChatMessage.from_system( + "You are a helpful assistant giving out valuable information to tourists in English." + ), + ChatMessage.from_user( + "What's the weather forecast for Berlin in the next 5 days?" + ), + ] + } + } + + def test_pipeline_complex(self): + @component + class ValueProducer: + def __init__(self, value_to_produce: str): + self.value_to_produce = value_to_produce + + @component.output_types(value_output=str) + def run(self): + return {"value_output": self.value_to_produce} + + pipe = Pipeline() + pipe.add_component( + "prompt_builder", ChatPromptBuilder(variables=["value_output"]) + ) + pipe.add_component("value_producer", ValueProducer(value_to_produce="Berlin")) + pipe.connect("value_producer.value_output", "prompt_builder") + + messages = [ + ChatMessage.from_system("You give valuable information to tourists."), + ChatMessage.from_user("Tell me about {{value_output}}"), + ] + + res = pipe.run(data={"template": messages}) + assert res == { + "prompt_builder": { + "prompt": [ + ChatMessage.from_system( + "You give valuable information to tourists." + ), + ChatMessage.from_user("Tell me about Berlin"), + ] + } + } + + def test_to_dict(self): + component = ChatPromptBuilder( + template=[ + ChatMessage.from_user("text and {var}"), + ChatMessage.from_assistant("content {required_var}"), + ], + variables=["var", "required_var"], + required_variables=["required_var"], + ) + + assert component.to_dict() == { + "type": "haystack_experimental.components.builders.chat_prompt_builder.ChatPromptBuilder", + "init_parameters": { + "template": [ + { + "_content": [{"text": "text and {var}"}], + "_role": "user", + "_meta": {}, + }, + { + "_content": [{"text": "content {required_var}"}], + "_role": "assistant", + "_meta": {}, + }, + ], + "variables": ["var", "required_var"], + "required_variables": ["required_var"], + }, + } + + def test_from_dict(self): + component = ChatPromptBuilder.from_dict( + data={ + "type": "haystack_experimental.components.builders.chat_prompt_builder.ChatPromptBuilder", + "init_parameters": { + "template": [ + { + "_content": [{"text": "text and {var}"}], + "_role": "user", + "_meta": {}, + }, + { + "_content": [{"text": "content {required_var}"}], + "_role": "assistant", + "_meta": {}, + }, + ], + "variables": ["var", "required_var"], + "required_variables": ["required_var"], + }, + } + ) + + assert component.template == [ + ChatMessage.from_user("text and {var}"), + ChatMessage.from_assistant("content {required_var}"), + ] + assert component._variables == ["var", "required_var"] + assert component._required_variables == ["required_var"] + + def test_from_dict_template_none(self): + component = ChatPromptBuilder.from_dict( + data={ + "type": "haystack_experimental.components.builders.chat_prompt_builder.ChatPromptBuilder", + "init_parameters": {"template": None}, + } + ) + + assert component.template is None + assert component._variables is None + assert component._required_variables is None From ba09cec467419f2968607162b5156d434b97868e Mon Sep 17 00:00:00 2001 From: shadeMe Date: Mon, 25 Nov 2024 16:51:20 +0100 Subject: [PATCH 2/2] fix: Cherry-pick upstream changes from https://github.com/deepset-ai/haystack/pull/8572 --- .../builders/chat_prompt_builder.py | 22 ++++++++++++------- .../builders/test_chat_prompt_builder.py | 12 ++++++++++ 2 files changed, 26 insertions(+), 8 deletions(-) diff --git a/haystack_experimental/components/builders/chat_prompt_builder.py b/haystack_experimental/components/builders/chat_prompt_builder.py index d1c6f7b0..38bc1816 100644 --- a/haystack_experimental/components/builders/chat_prompt_builder.py +++ b/haystack_experimental/components/builders/chat_prompt_builder.py @@ -3,7 +3,7 @@ # SPDX-License-Identifier: Apache-2.0 from copy import deepcopy -from typing import Any, Dict, List, Optional, Set +from typing import Any, Dict, List, Literal, Optional, Set, Union from haystack import component, default_from_dict, default_to_dict, logging from jinja2 import meta @@ -104,7 +104,7 @@ class ChatPromptBuilder: def __init__( self, template: Optional[List[ChatMessage]] = None, - required_variables: Optional[List[str]] = None, + required_variables: Optional[Union[List[str], Literal["*"]]] = None, variables: Optional[List[str]] = None, ): """ @@ -116,7 +116,8 @@ def __init__( the `init` method` or the `run` method. :param required_variables: List variables that must be provided as input to ChatPromptBuilder. - If a variable listed as required is not provided, an exception is raised. Optional. + If a variable listed as required is not provided, an exception is raised. + If set to "*", all variables found in the prompt are required. :param variables: List input variables to use in prompt templates instead of the ones inferred from the `template` parameter. For example, to use more variables during prompt engineering than the ones present @@ -131,7 +132,7 @@ def __init__( if template and not variables: for message in template: if message.is_from(ChatRole.USER) or message.is_from(ChatRole.SYSTEM): - # infere variables from template + # infer variables from template if message.text is None: raise ValueError( f"The {self.__class__.__name__} requires a non-empty list of ChatMessage" @@ -140,10 +141,11 @@ def __init__( ast = self._env.parse(message.text) template_variables = meta.find_undeclared_variables(ast) variables += list(template_variables) + self.variables = variables # setup inputs - for var in variables: - if var in self.required_variables: + for var in self.variables: + if self.required_variables == "*" or var in self.required_variables: component.set_input_type(self, var, Any) else: component.set_input_type(self, var, Any, "") @@ -224,12 +226,16 @@ def _validate_variables(self, provided_variables: Set[str]): :raises ValueError: If no template is provided or if all the required template variables are not provided. """ - missing_variables = [var for var in self.required_variables if var not in provided_variables] + if self.required_variables == "*": + required_variables = sorted(self.variables) + else: + required_variables = self.required_variables + missing_variables = [var for var in required_variables if var not in provided_variables] if missing_variables: missing_vars_str = ", ".join(missing_variables) raise ValueError( f"Missing required input variables in ChatPromptBuilder: {missing_vars_str}. " - f"Required variables: {self.required_variables}. Provided variables: {provided_variables}." + f"Required variables: {required_variables}. Provided variables: {provided_variables}." ) def to_dict(self) -> Dict[str, Any]: diff --git a/test/components/builders/test_chat_prompt_builder.py b/test/components/builders/test_chat_prompt_builder.py index 703b2702..cf4791dd 100644 --- a/test/components/builders/test_chat_prompt_builder.py +++ b/test/components/builders/test_chat_prompt_builder.py @@ -140,6 +140,18 @@ def test_run_template_variable_overrides_variable(self): "prompt": [ChatMessage.from_user("This is a test_from_template_var")] } + def test_run_with_missing_required_input_using_star(self): + builder = ChatPromptBuilder( + template=[ChatMessage.from_user("This is a {{ foo }}, not a {{ bar }}")], + required_variables="*", + ) + with pytest.raises(ValueError, match="foo"): + builder.run(bar="bar") + with pytest.raises(ValueError, match="bar"): + builder.run(foo="foo") + with pytest.raises(ValueError, match="bar, foo"): + builder.run() + def test_run_without_input(self): builder = ChatPromptBuilder( template=[ChatMessage.from_user("This is a template without input")]