diff --git a/integrations/anthropic/example/prompt_caching.py b/integrations/anthropic/example/prompt_caching.py new file mode 100644 index 000000000..618090f8e --- /dev/null +++ b/integrations/anthropic/example/prompt_caching.py @@ -0,0 +1,63 @@ +# To run this example, you will need to set a `ANTHROPIC_API_KEY` environment variable. + +from haystack import Pipeline +from haystack.components.builders import ChatPromptBuilder +from haystack.components.converters import HTMLToDocument +from haystack.components.fetchers import LinkContentFetcher +from haystack.components.generators.utils import print_streaming_chunk +from haystack.dataclasses import ChatMessage +from haystack.utils import Secret + +from haystack_integrations.components.generators.anthropic import AnthropicChatGenerator + +msg = ChatMessage.from_system( + "You are a prompt expert who answers questions based on the given documents.\n" + "Here are the documents:\n" + "{% for d in documents %} \n" + " {{d.content}} \n" + "{% endfor %}" +) + +fetch_pipeline = Pipeline() +fetch_pipeline.add_component("fetcher", LinkContentFetcher()) +fetch_pipeline.add_component("converter", HTMLToDocument()) +fetch_pipeline.add_component("prompt_builder", ChatPromptBuilder(template=[msg], variables=["documents"])) + +fetch_pipeline.connect("fetcher", "converter") +fetch_pipeline.connect("converter", "prompt_builder") + +result = fetch_pipeline.run( + data={ + "fetcher": {"urls": ["https://ar5iv.labs.arxiv.org/html/2310.04406"]}, + } +) + +# Now we have our document fetched as a ChatMessage +final_prompt_msg = result["prompt_builder"]["prompt"][0] + +# We add a cache control header to the prompt message +final_prompt_msg.meta["cache_control"] = {"type": "ephemeral"} + + +# Build QA pipeline +qa_pipeline = Pipeline() +qa_pipeline.add_component("llm", AnthropicChatGenerator( + api_key=Secret.from_env_var("ANTHROPIC_API_KEY"), + streaming_callback=print_streaming_chunk, + generation_kwargs={"extra_headers": {"anthropic-beta": "prompt-caching-2024-07-31"}}, +)) + +questions = ["Why is Monte-Carlo Tree Search used in LATS", + "Summarize LATS selection, expansion, evaluation, simulation, backpropagation, and reflection"] + +# Answer the questions using prompt caching (i.e. the entire document is cached, we run the question against it) +for question in questions: + print("Question: " + question) + qa_pipeline.run( + data={ + "llm": {"messages": [final_prompt_msg, + ChatMessage.from_user("Given these documents, answer the question:" + question)]}, + } + ) + print("\n") + diff --git a/integrations/anthropic/src/haystack_integrations/components/generators/anthropic/chat/chat_generator.py b/integrations/anthropic/src/haystack_integrations/components/generators/anthropic/chat/chat_generator.py index 9954f08c5..9fc5066fd 100644 --- a/integrations/anthropic/src/haystack_integrations/components/generators/anthropic/chat/chat_generator.py +++ b/integrations/anthropic/src/haystack_integrations/components/generators/anthropic/chat/chat_generator.py @@ -72,6 +72,7 @@ class AnthropicChatGenerator: "temperature", "top_p", "top_k", + "extra_headers", ] def __init__( @@ -101,6 +102,7 @@ def __init__( - `temperature`: The temperature to use for sampling. - `top_p`: The top_p value to use for nucleus sampling. - `top_k`: The top_k value to use for top-k sampling. + - `extra_headers`: A dictionary of extra headers to be passed to the model (i.e. for beta features). :param ignore_tools_thinking_messages: Anthropic's approach to tools (function calling) resolution involves a "chain of thought" messages before returning the actual function names and parameters in a message. If `ignore_tools_thinking_messages` is `True`, the generator will drop so-called thinking messages when tool @@ -260,6 +262,7 @@ def _convert_to_anthropic_format(self, messages: List[ChatMessage]) -> List[Dict for m in messages: message_dict = dataclasses.asdict(m) filtered_message = {k: v for k, v in message_dict.items() if k in {"role", "content"} and v} + filtered_message.update(m.meta or {}) anthropic_formatted_messages.append(filtered_message) return anthropic_formatted_messages