Add prompt caching, add example

deepset-ai · Aug 19, 2024 · 2db1616 · 2db1616
1 parent 55c65af
commit 2db1616
Show file tree

Hide file tree

Showing 2 changed files with 66 additions and 0 deletions.
diff --git a/integrations/anthropic/example/prompt_caching.py b/integrations/anthropic/example/prompt_caching.py
@@ -0,0 +1,63 @@
+# To run this example, you will need to set a `ANTHROPIC_API_KEY` environment variable.
+
+from haystack import Pipeline
+from haystack.components.builders import ChatPromptBuilder
+from haystack.components.converters import HTMLToDocument
+from haystack.components.fetchers import LinkContentFetcher
+from haystack.components.generators.utils import print_streaming_chunk
+from haystack.dataclasses import ChatMessage
+from haystack.utils import Secret
+
+from haystack_integrations.components.generators.anthropic import AnthropicChatGenerator
+
+msg = ChatMessage.from_system(
+    "You are a prompt expert who answers questions based on the given documents.\n"
+    "Here are the documents:\n"
+    "{% for d in documents %} \n"
+    "    {{d.content}} \n"
+    "{% endfor %}"
+)
+
+fetch_pipeline = Pipeline()
+fetch_pipeline.add_component("fetcher", LinkContentFetcher())
+fetch_pipeline.add_component("converter", HTMLToDocument())
+fetch_pipeline.add_component("prompt_builder", ChatPromptBuilder(template=[msg], variables=["documents"]))
+
+fetch_pipeline.connect("fetcher", "converter")
+fetch_pipeline.connect("converter", "prompt_builder")
+
+result = fetch_pipeline.run(
+    data={
+        "fetcher": {"urls": ["https://ar5iv.labs.arxiv.org/html/2310.04406"]},
+    }
+)
+
+# Now we have our document fetched as a ChatMessage
+final_prompt_msg = result["prompt_builder"]["prompt"][0]
+
+# We add a cache control header to the prompt message
+final_prompt_msg.meta["cache_control"] = {"type": "ephemeral"}
+
+
+# Build QA pipeline
+qa_pipeline = Pipeline()
+qa_pipeline.add_component("llm", AnthropicChatGenerator(
+    api_key=Secret.from_env_var("ANTHROPIC_API_KEY"),
+    streaming_callback=print_streaming_chunk,
+    generation_kwargs={"extra_headers": {"anthropic-beta": "prompt-caching-2024-07-31"}},
+))
+
+questions = ["Why is Monte-Carlo Tree Search used in LATS",
+             "Summarize LATS selection, expansion, evaluation, simulation, backpropagation, and reflection"]
+
+# Answer the questions using prompt caching (i.e. the entire document is cached, we run the question against it)
+for question in questions:
+    print("Question: " + question)
+    qa_pipeline.run(
+        data={
+            "llm": {"messages": [final_prompt_msg,
+                                 ChatMessage.from_user("Given these documents, answer the question:" + question)]},
+        }
+    )
+    print("\n")
+
diff --git a/...nthropic/src/haystack_integrations/components/generators/anthropic/chat/chat_generator.py b/...nthropic/src/haystack_integrations/components/generators/anthropic/chat/chat_generator.py
@@ -72,6 +72,7 @@ class AnthropicChatGenerator:
         "temperature",
         "top_p",
         "top_k",
+        "extra_headers",
     ]
 
     def __init__(
@@ -101,6 +102,7 @@ def __init__(
             - `temperature`: The temperature to use for sampling.
             - `top_p`: The top_p value to use for nucleus sampling.
             - `top_k`: The top_k value to use for top-k sampling.
+            - `extra_headers`: A dictionary of extra headers to be passed to the model (i.e. for beta features).
         :param ignore_tools_thinking_messages: Anthropic's approach to tools (function calling) resolution involves a
             "chain of thought" messages before returning the actual function names and parameters in a message. If
             `ignore_tools_thinking_messages` is `True`, the generator will drop so-called thinking messages when tool
@@ -260,6 +262,7 @@ def _convert_to_anthropic_format(self, messages: List[ChatMessage]) -> List[Dict
         for m in messages:
             message_dict = dataclasses.asdict(m)
             filtered_message = {k: v for k, v in message_dict.items() if k in {"role", "content"} and v}
+            filtered_message.update(m.meta or {})
             anthropic_formatted_messages.append(filtered_message)
         return anthropic_formatted_messages