From 2e4532593f65e0f525ffc43c2f7b5eff7ba41435 Mon Sep 17 00:00:00 2001 From: Enrico Ros Date: Tue, 7 May 2024 00:58:02 -0700 Subject: [PATCH] Toggle JSON mode, Fixes #515 --- src/modules/llms/server/llm.server.streaming.ts | 2 +- src/modules/llms/server/ollama/ollama.router.ts | 6 ++++-- .../llms/server/ollama/ollama.wiretypes.ts | 8 ++++++-- .../llms/vendors/ollama/OllamaSourceSetup.tsx | 15 +++++++++++++-- src/modules/llms/vendors/ollama/ollama.vendor.ts | 4 +++- 5 files changed, 27 insertions(+), 8 deletions(-) diff --git a/src/modules/llms/server/llm.server.streaming.ts b/src/modules/llms/server/llm.server.streaming.ts index ba1f7002c..88b7b4e2e 100644 --- a/src/modules/llms/server/llm.server.streaming.ts +++ b/src/modules/llms/server/llm.server.streaming.ts @@ -513,7 +513,7 @@ function _prepareRequestData(access: ChatStreamingInputSchema['access'], model: case 'ollama': return { ...ollamaAccess(access, OLLAMA_PATH_CHAT), - body: ollamaChatCompletionPayload(model, history, true), + body: ollamaChatCompletionPayload(model, history, access.ollamaJson, true), vendorMuxingFormat: 'json-nl', vendorStreamParser: createStreamParserOllama(), }; diff --git a/src/modules/llms/server/ollama/ollama.router.ts b/src/modules/llms/server/ollama/ollama.router.ts index c275b3ae9..fcc0999c8 100644 --- a/src/modules/llms/server/ollama/ollama.router.ts +++ b/src/modules/llms/server/ollama/ollama.router.ts @@ -40,12 +40,13 @@ export function ollamaAccess(access: OllamaAccessSchema, apiPath: string): { hea } -export const ollamaChatCompletionPayload = (model: OpenAIModelSchema, history: OpenAIHistorySchema, stream: boolean): WireOllamaChatCompletionInput => ({ +export const ollamaChatCompletionPayload = (model: OpenAIModelSchema, history: OpenAIHistorySchema, jsonOutput: boolean, stream: boolean): WireOllamaChatCompletionInput => ({ model: model.id, messages: history, options: { ...(model.temperature !== undefined && { temperature: model.temperature }), }, + ...(jsonOutput && { format: 'json' }), // n: ... // functions: ... // function_call: ... @@ -101,6 +102,7 @@ async function ollamaPOST(access: export const ollamaAccessSchema = z.object({ dialect: z.enum(['ollama']), ollamaHost: z.string().trim(), + ollamaJson: z.boolean(), }); export type OllamaAccessSchema = z.infer; @@ -250,7 +252,7 @@ export const llmOllamaRouter = createTRPCRouter({ .output(llmsChatGenerateOutputSchema) .mutation(async ({ input: { access, history, model } }) => { - const wireGeneration = await ollamaPOST(access, ollamaChatCompletionPayload(model, history, false), OLLAMA_PATH_CHAT); + const wireGeneration = await ollamaPOST(access, ollamaChatCompletionPayload(model, history, access.ollamaJson, false), OLLAMA_PATH_CHAT); const generation = wireOllamaChunkedOutputSchema.parse(wireGeneration); if ('error' in generation) diff --git a/src/modules/llms/server/ollama/ollama.wiretypes.ts b/src/modules/llms/server/ollama/ollama.wiretypes.ts index a7626dba1..9a600e993 100644 --- a/src/modules/llms/server/ollama/ollama.wiretypes.ts +++ b/src/modules/llms/server/ollama/ollama.wiretypes.ts @@ -46,12 +46,13 @@ const wireOllamaChatCompletionInputSchema = z.object({ messages: z.array(z.object({ role: z.enum(['assistant', 'system', 'user']), content: z.string(), + images: z.array(z.string()).optional(), // base64 encoded images })), // optional format: z.enum(['json']).optional(), options: z.object({ - // https://github.com/jmorganca/ollama/blob/main/docs/modelfile.md + // https://github.com/ollama/ollama/blob/main/docs/modelfile.md // Maximum number of tokens to predict when generating text. num_predict: z.number().int().optional(), // Sets the random number seed to use for generation @@ -63,8 +64,11 @@ const wireOllamaChatCompletionInputSchema = z.object({ // Works together with top-k. A higher value (e.g., 0.95) will lead to more diverse text. (Default 0.9) top_p: z.number().positive().optional(), }).optional(), - template: z.string().optional(), // overrides what is defined in the Modelfile stream: z.boolean().optional(), // default: true + keep_alive: z.string().optional(), // e.g. '5m' + + // Note: not used anymore as of 2024-05-07? + // template: z.string().optional(), // overrides what is defined in the Modelfile // Future Improvements? // n: z.number().int().optional(), // number of completions to generate diff --git a/src/modules/llms/vendors/ollama/OllamaSourceSetup.tsx b/src/modules/llms/vendors/ollama/OllamaSourceSetup.tsx index 2496df6f5..52476f873 100644 --- a/src/modules/llms/vendors/ollama/OllamaSourceSetup.tsx +++ b/src/modules/llms/vendors/ollama/OllamaSourceSetup.tsx @@ -2,6 +2,7 @@ import * as React from 'react'; import { Button } from '@mui/joy'; +import { FormSwitchControl } from '~/common/components/forms/FormSwitchControl'; import { FormTextField } from '~/common/components/forms/FormTextField'; import { InlineError } from '~/common/components/InlineError'; import { Link } from '~/common/components/Link'; @@ -26,7 +27,7 @@ export function OllamaSourceSetup(props: { sourceId: DModelSourceId }) { useSourceSetup(props.sourceId, ModelVendorOllama); // derived state - const { ollamaHost } = access; + const { ollamaHost, ollamaJson } = access; const hostValid = !!asValidURL(ollamaHost); const hostError = !!ollamaHost && !hostValid; @@ -41,13 +42,23 @@ export function OllamaSourceSetup(props: { sourceId: DModelSourceId }) { information} + description={Information} placeholder='http://127.0.0.1:11434' isError={hostError} value={ollamaHost || ''} onChange={text => updateSetup({ ollamaHost: text })} /> + Information} + checked={ollamaJson} + onChange={on => { + updateSetup({ ollamaJson: on }); + refetch(); + }} + /> + ({ dialect: 'ollama', ollamaHost: partialSetup?.ollamaHost || '', + ollamaJson: partialSetup?.ollamaJson || false, }), // List Models