diff --git a/plugins/anthropic/src/claude.test.ts b/plugins/anthropic/src/claude.test.ts index 9ab56a6..98c19f4 100644 --- a/plugins/anthropic/src/claude.test.ts +++ b/plugins/anthropic/src/claude.test.ts @@ -813,6 +813,40 @@ describe('toAnthropicRequestBody', () => { 'Only text output format is supported for Claude models currently' ); }); + + it('should apply system prompt caching when enabled', () => { + const request: GenerateRequest = { + messages: [ + { role: 'system', content: [{ text: 'You are a helpful assistant' }] }, + { role: 'user', content: [{ text: 'Hi' }] }, + ], + output: { format: 'text' }, + }; + + // Test with caching enabled + const outputWithCaching = toAnthropicRequestBody( + 'claude-3-haiku', + request, + false, + true + ); + expect(outputWithCaching.system).toEqual([ + { + type: 'text', + text: 'You are a helpful assistant', + cache_control: { type: 'ephemeral' }, + }, + ]); + + // Test with caching disabled + const outputWithoutCaching = toAnthropicRequestBody( + 'claude-3-haiku', + request, + false, + false + ); + expect(outputWithoutCaching.system).toBe('You are a helpful assistant'); + }); }); describe('claudeRunner', () => { diff --git a/plugins/anthropic/src/claude.ts b/plugins/anthropic/src/claude.ts index c587a23..9e63262 100644 --- a/plugins/anthropic/src/claude.ts +++ b/plugins/anthropic/src/claude.ts @@ -420,20 +420,31 @@ export function fromAnthropicResponse(response: Message): GenerateResponseData { * @param modelName The name of the Anthropic model to use. * @param request The Genkit GenerateRequest to convert. * @param stream Whether to stream the response. + * @param cacheSystemPrompt Whether to cache the system prompt. * @returns The converted Anthropic API request body. * @throws An error if the specified model is not supported or if an unsupported output format is requested. */ export function toAnthropicRequestBody( modelName: string, request: GenerateRequest, - stream?: boolean + stream?: boolean, + cacheSystemPrompt?: boolean ): MessageCreateParams { const model = SUPPORTED_CLAUDE_MODELS[modelName]; if (!model) throw new Error(`Unsupported model: ${modelName}`); const { system, messages } = toAnthropicMessages(request.messages); const mappedModelName = request.config?.version ?? model.version ?? modelName; const body: MessageCreateParams = { - system, + system: cacheSystemPrompt + ? [ + { + type: 'text', + text: system, + // @ts-expect-error cache_control is in beta + cache_control: { type: 'ephemeral' }, + }, + ] + : system, messages, tools: request.tools?.map(toAnthropicTool), max_tokens: request.config?.maxOutputTokens ?? 4096, @@ -463,15 +474,25 @@ export function toAnthropicRequestBody( * Creates the runner used by Genkit to interact with the Claude model. * @param name The name of the Claude model. * @param client The Anthropic client instance. + * @param cacheSystemPrompt Whether to cache the system prompt. * @returns The runner that Genkit will call when the model is invoked. */ -export function claudeRunner(name: string, client: Anthropic) { +export function claudeRunner( + name: string, + client: Anthropic, + cacheSystemPrompt?: boolean +) { return async ( request: GenerateRequest, streamingCallback?: StreamingCallback ): Promise => { let response: Message; - const body = toAnthropicRequestBody(name, request, !!streamingCallback); + const body = toAnthropicRequestBody( + name, + request, + !!streamingCallback, + cacheSystemPrompt + ); if (streamingCallback) { const stream = client.messages.stream(body); for await (const chunk of stream) { @@ -497,7 +518,8 @@ export function claudeRunner(name: string, client: Anthropic) { export function claudeModel( ai: Genkit, name: string, - client: Anthropic + client: Anthropic, + cacheSystemPrompt?: boolean ): ModelAction { const modelId = `anthropic/${name}`; const model = SUPPORTED_CLAUDE_MODELS[name]; @@ -509,6 +531,6 @@ export function claudeModel( ...model.info, configSchema: model.configSchema, }, - claudeRunner(name, client) + claudeRunner(name, client, cacheSystemPrompt) ); } diff --git a/plugins/anthropic/src/index.ts b/plugins/anthropic/src/index.ts index 4bce186..90bd63f 100644 --- a/plugins/anthropic/src/index.ts +++ b/plugins/anthropic/src/index.ts @@ -31,6 +31,7 @@ export { claude35Sonnet, claude3Opus, claude3Sonnet, claude3Haiku }; export interface PluginOptions { apiKey?: string; + cacheSystemPrompt?: boolean; } /** @@ -48,7 +49,7 @@ export interface PluginOptions { * - anthropic: The main plugin function to interact with the Anthropic AI. * * Usage: - * To use the Claude models, initialize the anthropic plugin inside `configureGenkit` and pass the configuration options. If no API key is provided in the options, the environment variable `ANTHROPIC_API_KEY` must be set. + * To use the Claude models, initialize the anthropic plugin inside `configureGenkit` and pass the configuration options. If no API key is provided in the options, the environment variable `ANTHROPIC_API_KEY` must be set. If you want to cache the system prompt, set `cacheSystemPrompt` to `true`. **Note:** Prompt caching is in beta and may change. To learn more, see https://docs.anthropic.com/en/docs/prompt-caching. * * Example: * ``` @@ -56,7 +57,7 @@ export interface PluginOptions { * * export default configureGenkit({ * plugins: [ - * anthropic({ apiKey: 'your-api-key' }) + * anthropic({ apiKey: 'your-api-key', cacheSystemPrompt: false }) * ... // other plugins * ] * }); @@ -71,10 +72,14 @@ export const anthropic = (options?: PluginOptions) => 'Please pass in the API key or set the ANTHROPIC_API_KEY environment variable' ); } - const client = new Anthropic({ apiKey }); + let defaultHeaders = {}; + if (options?.cacheSystemPrompt == true) { + defaultHeaders['anthropic-beta'] = 'prompt-caching-2024-07-31'; + } + const client = new Anthropic({ apiKey, defaultHeaders }); for (const name of Object.keys(SUPPORTED_CLAUDE_MODELS)) { - claudeModel(ai, name, client); + claudeModel(ai, name, client, options?.cacheSystemPrompt); } });