Skip to content

Commit

Permalink
Add prompt caching, add example
Browse files Browse the repository at this point in the history
  • Loading branch information
vblagoje committed Aug 19, 2024
1 parent 55c65af commit 2db1616
Show file tree
Hide file tree
Showing 2 changed files with 66 additions and 0 deletions.
63 changes: 63 additions & 0 deletions integrations/anthropic/example/prompt_caching.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
# To run this example, you will need to set a `ANTHROPIC_API_KEY` environment variable.

from haystack import Pipeline
from haystack.components.builders import ChatPromptBuilder
from haystack.components.converters import HTMLToDocument
from haystack.components.fetchers import LinkContentFetcher
from haystack.components.generators.utils import print_streaming_chunk
from haystack.dataclasses import ChatMessage
from haystack.utils import Secret

from haystack_integrations.components.generators.anthropic import AnthropicChatGenerator

msg = ChatMessage.from_system(
"You are a prompt expert who answers questions based on the given documents.\n"
"Here are the documents:\n"
"{% for d in documents %} \n"
" {{d.content}} \n"
"{% endfor %}"
)

fetch_pipeline = Pipeline()
fetch_pipeline.add_component("fetcher", LinkContentFetcher())
fetch_pipeline.add_component("converter", HTMLToDocument())
fetch_pipeline.add_component("prompt_builder", ChatPromptBuilder(template=[msg], variables=["documents"]))

fetch_pipeline.connect("fetcher", "converter")
fetch_pipeline.connect("converter", "prompt_builder")

result = fetch_pipeline.run(
data={
"fetcher": {"urls": ["https://ar5iv.labs.arxiv.org/html/2310.04406"]},
}
)

# Now we have our document fetched as a ChatMessage
final_prompt_msg = result["prompt_builder"]["prompt"][0]

# We add a cache control header to the prompt message
final_prompt_msg.meta["cache_control"] = {"type": "ephemeral"}


# Build QA pipeline
qa_pipeline = Pipeline()
qa_pipeline.add_component("llm", AnthropicChatGenerator(
api_key=Secret.from_env_var("ANTHROPIC_API_KEY"),
streaming_callback=print_streaming_chunk,
generation_kwargs={"extra_headers": {"anthropic-beta": "prompt-caching-2024-07-31"}},
))

questions = ["Why is Monte-Carlo Tree Search used in LATS",
"Summarize LATS selection, expansion, evaluation, simulation, backpropagation, and reflection"]

# Answer the questions using prompt caching (i.e. the entire document is cached, we run the question against it)
for question in questions:
print("Question: " + question)
qa_pipeline.run(
data={
"llm": {"messages": [final_prompt_msg,
ChatMessage.from_user("Given these documents, answer the question:" + question)]},
}
)
print("\n")

Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ class AnthropicChatGenerator:
"temperature",
"top_p",
"top_k",
"extra_headers",
]

def __init__(
Expand Down Expand Up @@ -101,6 +102,7 @@ def __init__(
- `temperature`: The temperature to use for sampling.
- `top_p`: The top_p value to use for nucleus sampling.
- `top_k`: The top_k value to use for top-k sampling.
- `extra_headers`: A dictionary of extra headers to be passed to the model (i.e. for beta features).
:param ignore_tools_thinking_messages: Anthropic's approach to tools (function calling) resolution involves a
"chain of thought" messages before returning the actual function names and parameters in a message. If
`ignore_tools_thinking_messages` is `True`, the generator will drop so-called thinking messages when tool
Expand Down Expand Up @@ -260,6 +262,7 @@ def _convert_to_anthropic_format(self, messages: List[ChatMessage]) -> List[Dict
for m in messages:
message_dict = dataclasses.asdict(m)
filtered_message = {k: v for k, v in message_dict.items() if k in {"role", "content"} and v}
filtered_message.update(m.meta or {})
anthropic_formatted_messages.append(filtered_message)
return anthropic_formatted_messages

Expand Down

0 comments on commit 2db1616

Please sign in to comment.