diff --git a/src/apps/chat/AppChat.tsx b/src/apps/chat/AppChat.tsx
index 841a7d941..96ae435ec 100644
--- a/src/apps/chat/AppChat.tsx
+++ b/src/apps/chat/AppChat.tsx
@@ -277,7 +277,7 @@ export function AppChat() {
     const conversation = getConversation(conversationId);
     if (!conversation)
       return;
-    const imaginedPrompt = await imaginePromptFromText(messageText) || 'An error sign.';
+    const imaginedPrompt = await imaginePromptFromText(messageText, conversationId) || 'An error sign.';
     await handleExecuteAndOutcome('generate-image', conversationId, [
       ...conversation.messages,
       createDMessage('user', imaginedPrompt),
diff --git a/src/apps/chat/editors/chat-stream.ts b/src/apps/chat/editors/chat-stream.ts
index f51290c59..fb84f3d2f 100644
--- a/src/apps/chat/editors/chat-stream.ts
+++ b/src/apps/chat/editors/chat-stream.ts
@@ -2,7 +2,7 @@ import type { DLLMId } from '~/modules/llms/store-llms';
 import type { StreamingClientUpdate } from '~/modules/llms/vendors/unifiedStreamingClient';
 import { autoSuggestions } from '~/modules/aifn/autosuggestions/autoSuggestions';
 import { conversationAutoTitle } from '~/modules/aifn/autotitle/autoTitle';
-import { llmStreamingChatGenerate, VChatContextRef, VChatContextName, VChatMessageIn } from '~/modules/llms/llm.client';
+import { llmStreamingChatGenerate, VChatContextRef, VChatMessageIn, VChatStreamContextName } from '~/modules/llms/llm.client';
 import { speakText } from '~/modules/elevenlabs/elevenlabs.client';
 
 import type { DMessage } from '~/common/state/store-chats';
@@ -63,7 +63,7 @@ type StreamMessageStatus = { outcome: StreamMessageOutcome, errorMessage?: strin
 export async function streamAssistantMessage(
   llmId: DLLMId,
   messagesHistory: VChatMessageIn[],
-  contextName: VChatContextName,
+  contextName: VChatStreamContextName,
   contextRef: VChatContextRef,
   throttleUnits: number, // 0: disable, 1: default throttle (12Hz), 2+ reduce the message frequency with the square root
   autoSpeak: ChatAutoSpeakType,
diff --git a/src/modules/aifn/autosuggestions/autoSuggestions.ts b/src/modules/aifn/autosuggestions/autoSuggestions.ts
index 74c7953a6..d73d737b0 100644
--- a/src/modules/aifn/autosuggestions/autoSuggestions.ts
+++ b/src/modules/aifn/autosuggestions/autoSuggestions.ts
@@ -1,4 +1,4 @@
-import { llmChatGenerateOrThrow, VChatFunctionIn } from '~/modules/llms/llm.client';
+import { llmChatGenerateOrThrow, VChatFunctionIn, VChatMessageIn } from '~/modules/llms/llm.client';
 import { useModelsStore } from '~/modules/llms/store-llms';
 
 import { useChatStore } from '~/common/state/store-chats';
@@ -83,13 +83,18 @@ export function autoSuggestions(conversationId: string, assistantMessageId: stri
 
   // Follow-up: Auto-Diagrams
   if (suggestDiagrams) {
-    llmChatGenerateOrThrow(funcLLMId, [
-        { role: 'system', content: systemMessage.text },
-        { role: 'user', content: userMessage.text },
-        { role: 'assistant', content: assistantMessageText },
-      ], [suggestPlantUMLFn], 'draw_plantuml_diagram',
+    const instructions: VChatMessageIn[] = [
+      { role: 'system', content: systemMessage.text },
+      { role: 'user', content: userMessage.text },
+      { role: 'assistant', content: assistantMessageText },
+    ];
+    llmChatGenerateOrThrow(
+      funcLLMId,
+      instructions,
+      'chat-followup-diagram', conversationId,
+      [suggestPlantUMLFn], 'draw_plantuml_diagram',
     ).then(chatResponse => {
-
+      // cheap way to check if the function was supported
       if (!('function_arguments' in chatResponse))
         return;
 
diff --git a/src/modules/aifn/autotitle/autoTitle.ts b/src/modules/aifn/autotitle/autoTitle.ts
index 69793bcba..f4f0983f6 100644
--- a/src/modules/aifn/autotitle/autoTitle.ts
+++ b/src/modules/aifn/autotitle/autoTitle.ts
@@ -1,5 +1,5 @@
 import { getFastLLMId } from '~/modules/llms/store-llms';
-import { llmChatGenerateOrThrow } from '~/modules/llms/llm.client';
+import { llmChatGenerateOrThrow, VChatMessageIn } from '~/modules/llms/llm.client';
 
 import { useChatStore } from '~/common/state/store-chats';
 
@@ -34,21 +34,23 @@ export async function conversationAutoTitle(conversationId: string, forceReplace
 
   try {
     // LLM chat-generate call
+    const instructions: VChatMessageIn[] = [
+      { role: 'system', content: `You are an AI conversation titles assistant who specializes in creating expressive yet few-words chat titles.` },
+      {
+        role: 'user', content:
+          'Analyze the given short conversation (every line is truncated) and extract a concise chat title that ' +
+          'summarizes the conversation in as little as a couple of words.\n' +
+          'Only respond with the lowercase short title and nothing else.\n' +
+          '\n' +
+          '```\n' +
+          historyLines.join('\n') +
+          '```\n',
+      },
+    ];
     const chatResponse = await llmChatGenerateOrThrow(
       fastLLMId,
-      [
-        { role: 'system', content: `You are an AI conversation titles assistant who specializes in creating expressive yet few-words chat titles.` },
-        {
-          role: 'user', content:
-            'Analyze the given short conversation (every line is truncated) and extract a concise chat title that ' +
-            'summarizes the conversation in as little as a couple of words.\n' +
-            'Only respond with the lowercase short title and nothing else.\n' +
-            '\n' +
-            '```\n' +
-            historyLines.join('\n') +
-            '```\n',
-        },
-      ],
+      instructions,
+      'chat-ai-title', conversationId,
       null, null,
     );
 
diff --git a/src/modules/aifn/imagine/imaginePromptFromText.ts b/src/modules/aifn/imagine/imaginePromptFromText.ts
index 436147b78..ce3034afa 100644
--- a/src/modules/aifn/imagine/imaginePromptFromText.ts
+++ b/src/modules/aifn/imagine/imaginePromptFromText.ts
@@ -1,5 +1,5 @@
 import { getFastLLMId } from '~/modules/llms/store-llms';
-import { llmChatGenerateOrThrow } from '~/modules/llms/llm.client';
+import { llmChatGenerateOrThrow, VChatMessageIn } from '~/modules/llms/llm.client';
 
 
 const simpleImagineSystemPrompt =
@@ -10,14 +10,15 @@ Provide output as a lowercase prompt and nothing else.`;
 /**
  * Creates a caption for a drawing or photo given some description - used to elevate the quality of the imaging
  */
-export async function imaginePromptFromText(messageText: string): Promise<string | null> {
+export async function imaginePromptFromText(messageText: string, contextRef: string): Promise<string | null> {
   const fastLLMId = getFastLLMId();
   if (!fastLLMId) return null;
   try {
-    const chatResponse = await llmChatGenerateOrThrow(fastLLMId, [
+    const instructions: VChatMessageIn[] = [
       { role: 'system', content: simpleImagineSystemPrompt },
       { role: 'user', content: 'Write a prompt, based on the following input.\n\n```\n' + messageText.slice(0, 1000) + '\n```\n' },
-    ], null, null);
+    ];
+    const chatResponse = await llmChatGenerateOrThrow(fastLLMId, instructions, 'draw-expand-prompt', contextRef, null, null);
     return chatResponse.content?.trim() ?? null;
   } catch (error: any) {
     console.error('imaginePromptFromText: fetch request error:', error);
diff --git a/src/modules/aifn/react/react.ts b/src/modules/aifn/react/react.ts
index 9a41e1df9..db2052ee7 100644
--- a/src/modules/aifn/react/react.ts
+++ b/src/modules/aifn/react/react.ts
@@ -132,7 +132,7 @@ export class Agent {
     S.messages.push({ role: 'user', content: prompt });
     let content: string;
     try {
-      content = (await llmChatGenerateOrThrow(llmId, S.messages, null, null, 500)).content;
+      content = (await llmChatGenerateOrThrow(llmId, S.messages, 'chat-react-turn', null, null, null, 500)).content;
     } catch (error: any) {
       content = `Error in llmChatGenerateOrThrow: ${error}`;
     }
diff --git a/src/modules/aifn/summarize/summerize.ts b/src/modules/aifn/summarize/summerize.ts
index 00e042525..c58339279 100644
--- a/src/modules/aifn/summarize/summerize.ts
+++ b/src/modules/aifn/summarize/summerize.ts
@@ -1,5 +1,5 @@
 import { DLLMId, findLLMOrThrow } from '~/modules/llms/store-llms';
-import { llmChatGenerateOrThrow } from '~/modules/llms/llm.client';
+import { llmChatGenerateOrThrow, VChatMessageIn } from '~/modules/llms/llm.client';
 
 
 // prompt to be tried when doing recursive summerization.
@@ -80,10 +80,11 @@ async function cleanUpContent(chunk: string, llmId: DLLMId, _ignored_was_targetW
   const autoResponseTokensSize = contextTokens ? Math.floor(contextTokens * outputTokenShare) : null;
 
   try {
-    const chatResponse = await llmChatGenerateOrThrow(llmId, [
+    const instructions: VChatMessageIn[] = [
       { role: 'system', content: cleanupPrompt },
       { role: 'user', content: chunk },
-    ], null, null, autoResponseTokensSize ?? undefined);
+    ];
+    const chatResponse = await llmChatGenerateOrThrow(llmId, instructions, 'chat-ai-summarize', null, null, null, autoResponseTokensSize ?? undefined);
     return chatResponse?.content ?? '';
   } catch (error: any) {
     return '';
diff --git a/src/modules/aifn/useLLMChain.ts b/src/modules/aifn/useLLMChain.ts
index 1e7a654ee..4572a66d7 100644
--- a/src/modules/aifn/useLLMChain.ts
+++ b/src/modules/aifn/useLLMChain.ts
@@ -1,7 +1,7 @@
 import * as React from 'react';
 
 import { DLLMId, findLLMOrThrow } from '~/modules/llms/store-llms';
-import { llmStreamingChatGenerate, VChatContextName, VChatContextRef, VChatMessageIn } from '~/modules/llms/llm.client';
+import { llmStreamingChatGenerate, VChatContextRef, VChatMessageIn, VChatStreamContextName } from '~/modules/llms/llm.client';
 
 
 // set to true to log to the console
@@ -20,7 +20,7 @@ export interface LLMChainStep {
 /**
  * React hook to manage a chain of LLM transformations.
  */
-export function useLLMChain(steps: LLMChainStep[], llmId: DLLMId | undefined, chainInput: string | undefined, onSuccess: (output: string, input: string) => void, contextName: VChatContextName, contextRef: VChatContextRef) {
+export function useLLMChain(steps: LLMChainStep[], llmId: DLLMId | undefined, chainInput: string | undefined, onSuccess: (output: string, input: string) => void, contextName: VChatStreamContextName, contextRef: VChatContextRef) {
 
   // state
   const [chain, setChain] = React.useState<ChainState | null>(null);
diff --git a/src/modules/aifn/useStreamChatText.ts b/src/modules/aifn/useStreamChatText.ts
index 11b856f7e..fffacd5c8 100644
--- a/src/modules/aifn/useStreamChatText.ts
+++ b/src/modules/aifn/useStreamChatText.ts
@@ -1,7 +1,7 @@
 import * as React from 'react';
 
 import type { DLLMId } from '~/modules/llms/store-llms';
-import { llmStreamingChatGenerate, VChatContextName, VChatContextRef, VChatMessageIn } from '~/modules/llms/llm.client';
+import { llmStreamingChatGenerate, VChatContextRef, VChatMessageIn, VChatStreamContextName } from '~/modules/llms/llm.client';
 
 
 export function useStreamChatText() {
@@ -13,7 +13,7 @@ export function useStreamChatText() {
   const abortControllerRef = React.useRef<AbortController | null>(null);
 
 
-  const startStreaming = React.useCallback(async (llmId: DLLMId, prompt: VChatMessageIn[], contextName: VChatContextName, contextRef: VChatContextRef) => {
+  const startStreaming = React.useCallback(async (llmId: DLLMId, prompt: VChatMessageIn[], contextName: VChatStreamContextName, contextRef: VChatContextRef) => {
     setStreamError(null);
     setPartialText(null);
     setText(null);
diff --git a/src/modules/llms/llm.client.ts b/src/modules/llms/llm.client.ts
index 580e4b8cb..f8cd37fb1 100644
--- a/src/modules/llms/llm.client.ts
+++ b/src/modules/llms/llm.client.ts
@@ -2,7 +2,7 @@ import { sendGAEvent } from '@next/third-parties/google';
 
 import { hasGoogleAnalytics } from '~/common/components/GoogleAnalytics';
 
-import type { ModelDescriptionSchema } from './server/llm.server.types';
+import type { GenerateContextNameSchema, ModelDescriptionSchema, StreamingContextNameSchema } from './server/llm.server.types';
 import type { OpenAIWire } from './server/openai/openai.wiretypes';
 import type { StreamingClientUpdate } from './vendors/unifiedStreamingClient';
 import { DLLM, DLLMId, DModelSource, DModelSourceId, LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, useModelsStore } from './store-llms';
@@ -21,14 +21,8 @@ export interface VChatMessageIn {
 
 export type VChatFunctionIn = OpenAIWire.ChatCompletion.RequestFunctionDef;
 
-export type VChatContextName =
-  | 'conversation'
-  | 'ai-diagram'
-  | 'ai-flattener'
-  | 'beam-scatter'
-  | 'beam-gather'
-  | 'call'
-  | 'persona-extract';
+export type VChatStreamContextName = StreamingContextNameSchema;
+export type VChatGenerateContextName = GenerateContextNameSchema;
 export type VChatContextRef = string;
 
 export interface VChatMessageOut {
@@ -122,7 +116,10 @@ function modelDescriptionToDLLMOpenAIOptions<TSourceSetup, TLLMOptions>(model: M
 export async function llmChatGenerateOrThrow<TSourceSetup = unknown, TAccess = unknown, TLLMOptions = unknown>(
   llmId: DLLMId,
   messages: VChatMessageIn[],
-  functions: VChatFunctionIn[] | null, forceFunctionName: string | null,
+  contextName: VChatGenerateContextName,
+  contextRef: VChatContextRef | null,
+  functions: VChatFunctionIn[] | null,
+  forceFunctionName: string | null,
   maxTokens?: number,
 ): Promise<VChatMessageOut | VChatMessageOrFunctionCallOut> {
 
@@ -146,14 +143,14 @@ export async function llmChatGenerateOrThrow<TSourceSetup = unknown, TAccess = u
     await new Promise(resolve => setTimeout(resolve, delay));
 
   // execute via the vendor
-  return await vendor.rpcChatGenerateOrThrow(access, options, messages, functions, forceFunctionName, maxTokens);
+  return await vendor.rpcChatGenerateOrThrow(access, options, messages, contextName, contextRef, functions, forceFunctionName, maxTokens);
 }
 
 
 export async function llmStreamingChatGenerate<TSourceSetup = unknown, TAccess = unknown, TLLMOptions = unknown>(
   llmId: DLLMId,
   messages: VChatMessageIn[],
-  contextName: VChatContextName,
+  contextName: VChatStreamContextName,
   contextRef: VChatContextRef,
   functions: VChatFunctionIn[] | null,
   forceFunctionName: string | null,
diff --git a/src/modules/llms/server/anthropic/anthropic.router.ts b/src/modules/llms/server/anthropic/anthropic.router.ts
index 0fbc764f9..25f467a81 100644
--- a/src/modules/llms/server/anthropic/anthropic.router.ts
+++ b/src/modules/llms/server/anthropic/anthropic.router.ts
@@ -8,7 +8,7 @@ import { fetchJsonOrTRPCError } from '~/server/api/trpc.router.fetchers';
 import { fixupHost } from '~/common/util/urlUtils';
 
 import { OpenAIHistorySchema, openAIHistorySchema, OpenAIModelSchema, openAIModelSchema } from '../openai/openai.router';
-import { llmsChatGenerateOutputSchema, llmsListModelsOutputSchema } from '../llm.server.types';
+import { llmsChatGenerateOutputSchema, llmsGenerateContextSchema, llmsListModelsOutputSchema } from '../llm.server.types';
 
 import { AnthropicWireMessagesRequest, anthropicWireMessagesRequestSchema, AnthropicWireMessagesResponse, anthropicWireMessagesResponseSchema } from './anthropic.wiretypes';
 import { hardcodedAnthropicModels } from './anthropic.models';
@@ -158,7 +158,11 @@ const listModelsInputSchema = z.object({
 
 const chatGenerateInputSchema = z.object({
   access: anthropicAccessSchema,
-  model: openAIModelSchema, history: openAIHistorySchema,
+  model: openAIModelSchema,
+  history: openAIHistorySchema,
+  // functions: openAIFunctionsSchema.optional(),
+  // forceFunctionName: z.string().optional(),
+  context: llmsGenerateContextSchema.optional(),
 });
 
 
diff --git a/src/modules/llms/server/gemini/gemini.router.ts b/src/modules/llms/server/gemini/gemini.router.ts
index e398b5b6d..2e4cd0c46 100644
--- a/src/modules/llms/server/gemini/gemini.router.ts
+++ b/src/modules/llms/server/gemini/gemini.router.ts
@@ -8,7 +8,7 @@ import { createTRPCRouter, publicProcedure } from '~/server/api/trpc.server';
 import { fetchJsonOrTRPCError } from '~/server/api/trpc.router.fetchers';
 
 import { fixupHost } from '~/common/util/urlUtils';
-import { llmsChatGenerateOutputSchema, llmsListModelsOutputSchema } from '../llm.server.types';
+import { llmsChatGenerateOutputSchema, llmsGenerateContextSchema, llmsListModelsOutputSchema } from '../llm.server.types';
 
 import { OpenAIHistorySchema, openAIHistorySchema, OpenAIModelSchema, openAIModelSchema } from '../openai/openai.router';
 
@@ -120,8 +120,11 @@ const accessOnlySchema = z.object({
 
 const chatGenerateInputSchema = z.object({
   access: geminiAccessSchema,
-  model: openAIModelSchema, history: openAIHistorySchema,
-  // functions: openAIFunctionsSchema.optional(), forceFunctionName: z.string().optional(),
+  model: openAIModelSchema,
+  history: openAIHistorySchema,
+  // functions: openAIFunctionsSchema.optional(),
+  // forceFunctionName: z.string().optional(),
+  context: llmsGenerateContextSchema.optional(),
 });
 
 
diff --git a/src/modules/llms/server/llm.server.streaming.ts b/src/modules/llms/server/llm.server.streaming.ts
index 03c788782..a85aa1a6f 100644
--- a/src/modules/llms/server/llm.server.streaming.ts
+++ b/src/modules/llms/server/llm.server.streaming.ts
@@ -22,6 +22,9 @@ import type { OpenAIWire } from './openai/openai.wiretypes';
 import { openAIAccess, openAIAccessSchema, openAIChatCompletionPayload, openAIHistorySchema, openAIModelSchema } from './openai/openai.router';
 
 
+import { llmsStreamingContextSchema } from './llm.server.types';
+
+
 // configuration
 const USER_SYMBOL_MAX_TOKENS = '🧱';
 const USER_SYMBOL_PROMPT_BLOCKED = '🚫';
@@ -46,17 +49,14 @@ type MuxingFormat = 'sse' | 'json-nl';
  */
 type AIStreamParser = (data: string, eventType?: string) => { text: string, close: boolean };
 
-const streamingContextSchema = z.object({
-  method: z.literal('chat-stream'),
-  name: z.enum(['conversation', 'ai-diagram', 'ai-flattener', 'call', 'beam-scatter', 'beam-gather', 'persona-extract']),
-  ref: z.string(),
-});
 
 const chatStreamingInputSchema = z.object({
   access: z.union([anthropicAccessSchema, geminiAccessSchema, ollamaAccessSchema, openAIAccessSchema]),
   model: openAIModelSchema,
   history: openAIHistorySchema,
-  context: streamingContextSchema,
+  // NOTE: made it optional for now as we have some old requests without it
+  // 2024-07-07: remove .optional()
+  context: llmsStreamingContextSchema.optional(),
 });
 export type ChatStreamingInputSchema = z.infer<typeof chatStreamingInputSchema>;
 
diff --git a/src/modules/llms/server/llm.server.types.ts b/src/modules/llms/server/llm.server.types.ts
index 351d7339d..dc037f570 100644
--- a/src/modules/llms/server/llm.server.types.ts
+++ b/src/modules/llms/server/llm.server.types.ts
@@ -46,6 +46,25 @@ export const llmsListModelsOutputSchema = z.object({
 });
 
 
+// Chat Generation Input (some parts of)
+
+const generateContextNameSchema = z.enum(['chat-ai-title', 'chat-ai-summarize', 'chat-followup-diagram', 'chat-react-turn', 'draw-expand-prompt']);
+export type GenerateContextNameSchema = z.infer<typeof generateContextNameSchema>;
+export const llmsGenerateContextSchema = z.object({
+  method: z.literal('chat-generate'),
+  name: generateContextNameSchema,
+  ref: z.string(),
+});
+
+const streamingContextNameSchema = z.enum(['conversation', 'ai-diagram', 'ai-flattener', 'call', 'beam-scatter', 'beam-gather', 'persona-extract']);
+export type StreamingContextNameSchema = z.infer<typeof streamingContextNameSchema>;
+export const llmsStreamingContextSchema = z.object({
+  method: z.literal('chat-stream'),
+  name: streamingContextNameSchema,
+  ref: z.string(),
+});
+
+
 // (non-streaming) Chat Generation Output
 
 export const llmsChatGenerateOutputSchema = z.object({
diff --git a/src/modules/llms/server/ollama/ollama.router.ts b/src/modules/llms/server/ollama/ollama.router.ts
index f5482a438..e8097d06c 100644
--- a/src/modules/llms/server/ollama/ollama.router.ts
+++ b/src/modules/llms/server/ollama/ollama.router.ts
@@ -11,7 +11,7 @@ import { capitalizeFirstLetter } from '~/common/util/textUtils';
 import { fixupHost } from '~/common/util/urlUtils';
 
 import { OpenAIHistorySchema, openAIHistorySchema, OpenAIModelSchema, openAIModelSchema } from '../openai/openai.router';
-import { llmsChatGenerateOutputSchema, llmsListModelsOutputSchema, ModelDescriptionSchema } from '../llm.server.types';
+import { llmsChatGenerateOutputSchema, llmsGenerateContextSchema, llmsListModelsOutputSchema, ModelDescriptionSchema } from '../llm.server.types';
 
 import { OLLAMA_BASE_MODELS, OLLAMA_PREV_UPDATE } from './ollama.models';
 import { WireOllamaChatCompletionInput, wireOllamaChunkedOutputSchema, wireOllamaListModelsSchema, wireOllamaModelInfoSchema } from './ollama.wiretypes';
@@ -117,8 +117,11 @@ const adminPullModelSchema = z.object({
 
 const chatGenerateInputSchema = z.object({
   access: ollamaAccessSchema,
-  model: openAIModelSchema, history: openAIHistorySchema,
-  // functions: openAIFunctionsSchema.optional(), forceFunctionName: z.string().optional(),
+  model: openAIModelSchema,
+  history: openAIHistorySchema,
+  // functions: openAIFunctionsSchema.optional(),
+  // forceFunctionName: z.string().optional(),
+  context: llmsGenerateContextSchema.optional(),
 });
 
 const listPullableOutputSchema = z.object({
diff --git a/src/modules/llms/server/openai/openai.router.ts b/src/modules/llms/server/openai/openai.router.ts
index 76cb96392..6a3a8f659 100644
--- a/src/modules/llms/server/openai/openai.router.ts
+++ b/src/modules/llms/server/openai/openai.router.ts
@@ -12,7 +12,7 @@ import { fixupHost } from '~/common/util/urlUtils';
 
 import { OpenAIWire, WireOpenAICreateImageOutput, wireOpenAICreateImageOutputSchema, WireOpenAICreateImageRequest } from './openai.wiretypes';
 import { azureModelToModelDescription, groqModelSortFn, groqModelToModelDescription, lmStudioModelToModelDescription, localAIModelToModelDescription, mistralModelsSort, mistralModelToModelDescription, oobaboogaModelToModelDescription, openAIModelFilter, openAIModelToModelDescription, openRouterModelFamilySortFn, openRouterModelToModelDescription, perplexityAIModelDescriptions, perplexityAIModelSort, togetherAIModelsToModelDescriptions } from './models.data';
-import { llmsChatGenerateWithFunctionsOutputSchema, llmsListModelsOutputSchema, ModelDescriptionSchema } from '../llm.server.types';
+import { llmsChatGenerateWithFunctionsOutputSchema, llmsGenerateContextSchema, llmsListModelsOutputSchema, ModelDescriptionSchema } from '../llm.server.types';
 import { wilreLocalAIModelsApplyOutputSchema, wireLocalAIModelsAvailableOutputSchema, wireLocalAIModelsListOutputSchema } from './localai.wiretypes';
 
 
@@ -72,8 +72,11 @@ const listModelsInputSchema = z.object({
 
 const chatGenerateWithFunctionsInputSchema = z.object({
   access: openAIAccessSchema,
-  model: openAIModelSchema, history: openAIHistorySchema,
-  functions: openAIFunctionsSchema.optional(), forceFunctionName: z.string().optional(),
+  model: openAIModelSchema,
+  history: openAIHistorySchema,
+  functions: openAIFunctionsSchema.optional(),
+  forceFunctionName: z.string().optional(),
+  context: llmsGenerateContextSchema.optional(),
 });
 
 const createImagesInputSchema = z.object({
diff --git a/src/modules/llms/vendors/IModelVendor.ts b/src/modules/llms/vendors/IModelVendor.ts
index ff6962480..2dd0f8714 100644
--- a/src/modules/llms/vendors/IModelVendor.ts
+++ b/src/modules/llms/vendors/IModelVendor.ts
@@ -8,7 +8,7 @@ import type { DLLM, DLLMId, DModelSourceId } from '../store-llms';
 import type { ModelDescriptionSchema } from '../server/llm.server.types';
 import type { ModelVendorId } from './vendors.registry';
 import type { StreamingClientUpdate } from './unifiedStreamingClient';
-import type { VChatContextName, VChatContextRef, VChatFunctionIn, VChatMessageIn, VChatMessageOrFunctionCallOut, VChatMessageOut } from '../llm.client';
+import type { VChatContextRef, VChatFunctionIn, VChatGenerateContextName, VChatMessageIn, VChatMessageOrFunctionCallOut, VChatMessageOut, VChatStreamContextName } from '../llm.client';
 
 
 export interface IModelVendor<TSourceSetup = unknown, TAccess = unknown, TLLMOptions = unknown, TDLLM = DLLM<TSourceSetup, TLLMOptions>> {
@@ -44,6 +44,7 @@ export interface IModelVendor<TSourceSetup = unknown, TAccess = unknown, TLLMOpt
     access: TAccess,
     llmOptions: TLLMOptions,
     messages: VChatMessageIn[],
+    contextName: VChatGenerateContextName, contextRef: VChatContextRef | null,
     functions: VChatFunctionIn[] | null, forceFunctionName: string | null,
     maxTokens?: number,
   ) => Promise<VChatMessageOut | VChatMessageOrFunctionCallOut>;
@@ -53,7 +54,7 @@ export interface IModelVendor<TSourceSetup = unknown, TAccess = unknown, TLLMOpt
     llmId: DLLMId,
     llmOptions: TLLMOptions,
     messages: VChatMessageIn[],
-    contextName: VChatContextName, contexRef: VChatContextRef,
+    contextName: VChatStreamContextName, contextRef: VChatContextRef,
     functions: VChatFunctionIn[] | null, forceFunctionName: string | null,
     abortSignal: AbortSignal,
     onUpdate: (update: StreamingClientUpdate, done: boolean) => void,
diff --git a/src/modules/llms/vendors/anthropic/anthropic.vendor.ts b/src/modules/llms/vendors/anthropic/anthropic.vendor.ts
index 6bcb79c48..71a9d7cb1 100644
--- a/src/modules/llms/vendors/anthropic/anthropic.vendor.ts
+++ b/src/modules/llms/vendors/anthropic/anthropic.vendor.ts
@@ -3,7 +3,7 @@ import { apiAsync } from '~/common/util/trpc.client';
 
 import type { AnthropicAccessSchema } from '../../server/anthropic/anthropic.router';
 import type { IModelVendor } from '../IModelVendor';
-import type { VChatMessageOut } from '../../llm.client';
+import type { VChatContextRef, VChatGenerateContextName, VChatMessageOut } from '../../llm.client';
 import { unifiedStreamingClient } from '../unifiedStreamingClient';
 
 import { FALLBACK_LLM_RESPONSE_TOKENS, FALLBACK_LLM_TEMPERATURE, LLMOptionsOpenAI } from '../openai/openai.vendor';
@@ -47,7 +47,7 @@ export const ModelVendorAnthropic: IModelVendor<SourceSetupAnthropic, AnthropicA
   rpcUpdateModelsOrThrow: async (access) => await apiAsync.llmAnthropic.listModels.query({ access }),
 
   // Chat Generate (non-streaming) with Functions
-  rpcChatGenerateOrThrow: async (access, llmOptions, messages, functions, forceFunctionName, maxTokens) => {
+  rpcChatGenerateOrThrow: async (access, llmOptions, messages, contextName: VChatGenerateContextName, contextRef: VChatContextRef | null, functions, forceFunctionName, maxTokens) => {
     if (functions?.length || forceFunctionName)
       throw new Error('Anthropic does not support functions');
 
@@ -61,6 +61,11 @@ export const ModelVendorAnthropic: IModelVendor<SourceSetupAnthropic, AnthropicA
           maxTokens: maxTokens || llmResponseTokens || FALLBACK_LLM_RESPONSE_TOKENS,
         },
         history: messages,
+        context: contextRef ? {
+          method: 'chat-generate',
+          name: contextName,
+          ref: contextRef,
+        } : undefined,
       }) as VChatMessageOut;
     } catch (error: any) {
       const errorMessage = error?.message || error?.toString() || 'Anthropic Chat Generate Error';
diff --git a/src/modules/llms/vendors/gemini/gemini.vendor.ts b/src/modules/llms/vendors/gemini/gemini.vendor.ts
index 29b7825a6..e045a7e0e 100644
--- a/src/modules/llms/vendors/gemini/gemini.vendor.ts
+++ b/src/modules/llms/vendors/gemini/gemini.vendor.ts
@@ -1,10 +1,10 @@
 import { GeminiIcon } from '~/common/components/icons/vendors/GeminiIcon';
- import { apiAsync } from '~/common/util/trpc.client';
+import { apiAsync } from '~/common/util/trpc.client';
 
 import type { GeminiAccessSchema } from '../../server/gemini/gemini.router';
 import type { GeminiBlockSafetyLevel } from '../../server/gemini/gemini.wiretypes';
 import type { IModelVendor } from '../IModelVendor';
-import type { VChatMessageOut } from '../../llm.client';
+import type { VChatContextRef, VChatGenerateContextName, VChatMessageOut } from '../../llm.client';
 import { unifiedStreamingClient } from '../unifiedStreamingClient';
 
 import { FALLBACK_LLM_RESPONSE_TOKENS, FALLBACK_LLM_TEMPERATURE } from '../openai/openai.vendor';
@@ -60,7 +60,7 @@ export const ModelVendorGemini: IModelVendor<SourceSetupGemini, GeminiAccessSche
   rpcUpdateModelsOrThrow: async (access) => await apiAsync.llmGemini.listModels.query({ access }),
 
   // Chat Generate (non-streaming) with Functions
-  rpcChatGenerateOrThrow: async (access, llmOptions, messages, functions, forceFunctionName, maxTokens) => {
+  rpcChatGenerateOrThrow: async (access, llmOptions, messages, contextName: VChatGenerateContextName, contextRef: VChatContextRef | null, functions, forceFunctionName, maxTokens) => {
     if (functions?.length || forceFunctionName)
       throw new Error('Gemini does not support functions');
 
@@ -74,6 +74,11 @@ export const ModelVendorGemini: IModelVendor<SourceSetupGemini, GeminiAccessSche
           maxTokens: maxTokens || maxOutputTokens || FALLBACK_LLM_RESPONSE_TOKENS,
         },
         history: messages,
+        context: contextRef ? {
+          method: 'chat-generate',
+          name: contextName,
+          ref: contextRef,
+        } : undefined,
       }) as VChatMessageOut;
     } catch (error: any) {
       const errorMessage = error?.message || error?.toString() || 'Gemini Chat Generate Error';
diff --git a/src/modules/llms/vendors/ollama/ollama.vendor.ts b/src/modules/llms/vendors/ollama/ollama.vendor.ts
index fe6d2a68c..1d44780e9 100644
--- a/src/modules/llms/vendors/ollama/ollama.vendor.ts
+++ b/src/modules/llms/vendors/ollama/ollama.vendor.ts
@@ -3,7 +3,7 @@ import { apiAsync } from '~/common/util/trpc.client';
 
 import type { IModelVendor } from '../IModelVendor';
 import type { OllamaAccessSchema } from '../../server/ollama/ollama.router';
-import type { VChatMessageOut } from '../../llm.client';
+import type { VChatContextRef, VChatGenerateContextName, VChatMessageOut } from '../../llm.client';
 import { unifiedStreamingClient } from '../unifiedStreamingClient';
 
 import { FALLBACK_LLM_RESPONSE_TOKENS, FALLBACK_LLM_TEMPERATURE, LLMOptionsOpenAI } from '../openai/openai.vendor';
@@ -42,7 +42,7 @@ export const ModelVendorOllama: IModelVendor<SourceSetupOllama, OllamaAccessSche
   rpcUpdateModelsOrThrow: async (access) => await apiAsync.llmOllama.listModels.query({ access }),
 
   // Chat Generate (non-streaming) with Functions
-  rpcChatGenerateOrThrow: async (access, llmOptions, messages, functions, forceFunctionName, maxTokens) => {
+  rpcChatGenerateOrThrow: async (access, llmOptions, messages, contextName: VChatGenerateContextName, contextRef: VChatContextRef | null, functions, forceFunctionName, maxTokens) => {
     if (functions?.length || forceFunctionName)
       throw new Error('Ollama does not support functions');
 
@@ -56,6 +56,11 @@ export const ModelVendorOllama: IModelVendor<SourceSetupOllama, OllamaAccessSche
           maxTokens: maxTokens || llmResponseTokens || FALLBACK_LLM_RESPONSE_TOKENS,
         },
         history: messages,
+        context: contextRef ? {
+          method: 'chat-generate',
+          name: contextName,
+          ref: contextRef,
+        } : undefined,
       }) as VChatMessageOut;
     } catch (error: any) {
       const errorMessage = error?.message || error?.toString() || 'Ollama Chat Generate Error';
diff --git a/src/modules/llms/vendors/openai/openai.vendor.ts b/src/modules/llms/vendors/openai/openai.vendor.ts
index 294b05a96..353e3e904 100644
--- a/src/modules/llms/vendors/openai/openai.vendor.ts
+++ b/src/modules/llms/vendors/openai/openai.vendor.ts
@@ -3,7 +3,7 @@ import { apiAsync } from '~/common/util/trpc.client';
 
 import type { IModelVendor } from '../IModelVendor';
 import type { OpenAIAccessSchema } from '../../server/openai/openai.router';
-import type { VChatMessageOrFunctionCallOut } from '../../llm.client';
+import type { VChatContextRef, VChatGenerateContextName, VChatMessageOrFunctionCallOut } from '../../llm.client';
 import { unifiedStreamingClient } from '../unifiedStreamingClient';
 
 import { OpenAILLMOptions } from './OpenAILLMOptions';
@@ -60,7 +60,7 @@ export const ModelVendorOpenAI: IModelVendor<SourceSetupOpenAI, OpenAIAccessSche
   rpcUpdateModelsOrThrow: async (access) => await apiAsync.llmOpenAI.listModels.query({ access }),
 
   // Chat Generate (non-streaming) with Functions
-  rpcChatGenerateOrThrow: async (access, llmOptions, messages, functions, forceFunctionName, maxTokens) => {
+  rpcChatGenerateOrThrow: async (access, llmOptions, messages, contextName: VChatGenerateContextName, contextRef: VChatContextRef | null, functions, forceFunctionName, maxTokens) => {
     const { llmRef, llmTemperature, llmResponseTokens } = llmOptions;
     try {
       return await apiAsync.llmOpenAI.chatGenerateWithFunctions.mutate({
@@ -73,6 +73,11 @@ export const ModelVendorOpenAI: IModelVendor<SourceSetupOpenAI, OpenAIAccessSche
         functions: functions ?? undefined,
         forceFunctionName: forceFunctionName ?? undefined,
         history: messages,
+        context: contextRef ? {
+          method: 'chat-generate',
+          name: contextName,
+          ref: contextRef,
+        } : undefined,
       }) as VChatMessageOrFunctionCallOut;
     } catch (error: any) {
       const errorMessage = error?.message || error?.toString() || 'OpenAI Chat Generate Error';
diff --git a/src/modules/llms/vendors/unifiedStreamingClient.ts b/src/modules/llms/vendors/unifiedStreamingClient.ts
index c8aea7577..9a0b9f920 100644
--- a/src/modules/llms/vendors/unifiedStreamingClient.ts
+++ b/src/modules/llms/vendors/unifiedStreamingClient.ts
@@ -3,7 +3,7 @@ import { frontendSideFetch } from '~/common/util/clientFetchers';
 
 import type { ChatStreamingInputSchema, ChatStreamingPreambleModelSchema, ChatStreamingPreambleStartSchema } from '../server/llm.server.streaming';
 import type { DLLMId } from '../store-llms';
-import type { VChatContextName, VChatContextRef, VChatFunctionIn, VChatMessageIn } from '../llm.client';
+import type { VChatContextRef, VChatFunctionIn, VChatMessageIn, VChatStreamContextName } from '../llm.client';
 
 import type { OpenAIAccessSchema } from '../server/openai/openai.router';
 import type { OpenAIWire } from '../server/openai/openai.wiretypes';
@@ -29,7 +29,7 @@ export async function unifiedStreamingClient<TSourceSetup = unknown, TLLMOptions
   llmId: DLLMId,
   llmOptions: TLLMOptions,
   messages: VChatMessageIn[],
-  contextName: VChatContextName, contextRef: VChatContextRef,
+  contextName: VChatStreamContextName, contextRef: VChatContextRef,
   functions: VChatFunctionIn[] | null, forceFunctionName: string | null,
   abortSignal: AbortSignal,
   onUpdate: (update: StreamingClientUpdate, done: boolean) => void,