From ac64f641c33cf733df2928e0d08e8e1bdc3b523d Mon Sep 17 00:00:00 2001 From: jacoblee93 Date: Sun, 15 Dec 2024 08:43:33 -0800 Subject: [PATCH] Add cache usage metadata to Anthropic streaming responses --- .../src/tests/chat_models.int.test.ts | 12 +++++++++++- .../langchain-anthropic/src/utils/message_outputs.ts | 12 +++++++++--- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/libs/langchain-anthropic/src/tests/chat_models.int.test.ts b/libs/langchain-anthropic/src/tests/chat_models.int.test.ts index d9240f2f03aa..c0e11b8c4fdf 100644 --- a/libs/langchain-anthropic/src/tests/chat_models.int.test.ts +++ b/libs/langchain-anthropic/src/tests/chat_models.int.test.ts @@ -688,7 +688,7 @@ LangChain has many different types of output parsers. This is a list of output p The current date is ${new Date().toISOString()}`; -test("system prompt caching", async () => { +test.only("system prompt caching", async () => { const model = new ChatAnthropic({ model: "claude-3-haiku-20240307", clientOptions: { @@ -721,6 +721,16 @@ test("system prompt caching", async () => { expect(res2.response_metadata.usage.cache_read_input_tokens).toBeGreaterThan( 0 ); + const stream = await model.stream(messages); + let agg = undefined; + for await (const chunk of stream) { + agg = agg === undefined ? chunk : concat(agg, chunk); + } + expect(agg).toBeDefined(); + expect(agg!.response_metadata.usage.cache_creation_input_tokens).toBe(0); + expect(agg!.response_metadata.usage.cache_read_input_tokens).toBeGreaterThan( + 0 + ); }); // TODO: Add proper test with long tool content diff --git a/libs/langchain-anthropic/src/utils/message_outputs.ts b/libs/langchain-anthropic/src/utils/message_outputs.ts index fd34dba87bf7..a30fcb5c401c 100644 --- a/libs/langchain-anthropic/src/utils/message_outputs.ts +++ b/libs/langchain-anthropic/src/utils/message_outputs.ts @@ -30,16 +30,22 @@ export function _makeMessageChunkFromAnthropicEvent( filteredAdditionalKwargs[key] = value; } } + const { input_tokens, output_tokens, ...rest } = usage ?? {}; const usageMetadata: UsageMetadata = { - input_tokens: usage.input_tokens, - output_tokens: usage.output_tokens, - total_tokens: usage.input_tokens + usage.output_tokens, + input_tokens: input_tokens, + output_tokens: output_tokens, + total_tokens: input_tokens + output_tokens, }; return { chunk: new AIMessageChunk({ content: fields.coerceContentToString ? "" : [], additional_kwargs: filteredAdditionalKwargs, usage_metadata: fields.streamUsage ? usageMetadata : undefined, + response_metadata: { + usage: { + ...rest, + }, + }, id: data.message.id, }), };