diff --git a/core/src/types/model/modelEntity.ts b/core/src/types/model/modelEntity.ts index 6f045588ce..73637d2e75 100644 --- a/core/src/types/model/modelEntity.ts +++ b/core/src/types/model/modelEntity.ts @@ -119,9 +119,7 @@ export type ModelSettingParams = { embedding?: boolean n_parallel?: number cpu_threads?: number - system_prompt?: string - user_prompt?: string - ai_prompt?: string + prompt_template?: string } /** diff --git a/extensions/inference-nitro-extension/src/module.ts b/extensions/inference-nitro-extension/src/module.ts index 047581dbed..4bfc63af76 100644 --- a/extensions/inference-nitro-extension/src/module.ts +++ b/extensions/inference-nitro-extension/src/module.ts @@ -46,9 +46,19 @@ async function initModel(wrapper: any): Promise { } else { // Gather system information for CPU physical cores and memory const nitroResourceProbe = await getResourcesInfo(); - console.log( - "Nitro with physical core: " + nitroResourceProbe.numCpuPhysicalCore - ); + + // Convert settings.prompt_template to system_prompt, user_prompt, ai_prompt + if (wrapper.model.settings.prompt_template) { + const promptTemplate = wrapper.model.settings.prompt_template; + const prompt = promptTemplateConverter(promptTemplate); + if (prompt.error) { + return Promise.resolve({ error: prompt.error }); + } + wrapper.model.settings.system_prompt = prompt.system_prompt; + wrapper.model.settings.user_prompt = prompt.user_prompt; + wrapper.model.settings.ai_prompt = prompt.ai_prompt; + } + const settings = { llama_model_path: currentModelFile, ...wrapper.model.settings, @@ -74,12 +84,53 @@ async function initModel(wrapper: any): Promise { } } +function promptTemplateConverter(promptTemplate) { + // Split the string using the markers + const systemMarker = "{system_message}"; + const promptMarker = "{prompt}"; + + if ( + promptTemplate.includes(systemMarker) && + promptTemplate.includes(promptMarker) + ) { + // Find the indices of the markers + const systemIndex = promptTemplate.indexOf(systemMarker); + const promptIndex = promptTemplate.indexOf(promptMarker); + + // Extract the parts of the string + const system_prompt = promptTemplate.substring(0, systemIndex); + const user_prompt = promptTemplate.substring( + systemIndex + systemMarker.length, + promptIndex + ); + const ai_prompt = promptTemplate.substring( + promptIndex + promptMarker.length + ); + + // Return the split parts + return { system_prompt, user_prompt, ai_prompt }; + } else if (promptTemplate.includes(promptMarker)) { + // Extract the parts of the string for the case where only promptMarker is present + const promptIndex = promptTemplate.indexOf(promptMarker); + const user_prompt = promptTemplate.substring(0, promptIndex); + const ai_prompt = promptTemplate.substring( + promptIndex + promptMarker.length + ); + const system_prompt = ""; + + // Return the split parts + return { system_prompt, user_prompt, ai_prompt }; + } + + // Return an error if none of the conditions are met + return { error: "Cannot split prompt template" }; +} + /** * Loads a LLM model into the Nitro subprocess by sending a HTTP POST request. * @returns A Promise that resolves when the model is loaded successfully, or rejects with an error message if the model is not found or fails to load. */ function loadLLMModel(settings): Promise { - // Load model config return fetchRetry(NITRO_HTTP_LOAD_MODEL_URL, { method: "POST", headers: { diff --git a/models/capybara-34b/model.json b/models/capybara-34b/model.json index e4263f9572..e337191a91 100644 --- a/models/capybara-34b/model.json +++ b/models/capybara-34b/model.json @@ -8,9 +8,7 @@ "format": "gguf", "settings": { "ctx_len": 2048, - "system_prompt": "", - "user_prompt": "USER:\n", - "ai_prompt": "ASSISTANT:\n" + "prompt_template": "USER:\n{prompt}\nASSISTANT:" }, "parameters": { "max_tokens": 2048 diff --git a/models/deepseek-coder-1.3b/model.json b/models/deepseek-coder-1.3b/model.json index 70f86ec896..77fa2d1f77 100644 --- a/models/deepseek-coder-1.3b/model.json +++ b/models/deepseek-coder-1.3b/model.json @@ -9,9 +9,7 @@ "format": "gguf", "settings": { "ctx_len": 2048, - "system_prompt": "", - "user_prompt": "### Instruction:\n", - "ai_prompt": "### Response:\n" + "prompt_template": "### Instruction:\n{prompt}\n### Response:" }, "parameters": { "max_tokens": 2048 diff --git a/models/deepseek-coder-34b/model.json b/models/deepseek-coder-34b/model.json index 4024193575..80b224691b 100644 --- a/models/deepseek-coder-34b/model.json +++ b/models/deepseek-coder-34b/model.json @@ -8,9 +8,7 @@ "format": "gguf", "settings": { "ctx_len": 2048, - "system_prompt": "", - "user_prompt": "### Instruction:\n", - "ai_prompt": "### Response:\n" + "prompt_template": "### Instruction:\n{prompt}\n### Response:" }, "parameters": { "max_tokens": 2048 diff --git a/models/llama2-chat-70b-q4/model.json b/models/llama2-chat-70b-q4/model.json index 07886aed59..abe44ddad5 100644 --- a/models/llama2-chat-70b-q4/model.json +++ b/models/llama2-chat-70b-q4/model.json @@ -8,9 +8,7 @@ "format": "gguf", "settings": { "ctx_len": 2048, - "system_prompt": "[INST] <>\n", - "user_prompt": "<>\n", - "ai_prompt": "[/INST]" + "prompt_template": "[INST] <>\n{system_message}<>\n{prompt}[/INST]" }, "parameters": { "max_tokens": 2048 diff --git a/models/llama2-chat-7b-q4/model.json b/models/llama2-chat-7b-q4/model.json index 2d1a67236e..9ec6014296 100644 --- a/models/llama2-chat-7b-q4/model.json +++ b/models/llama2-chat-7b-q4/model.json @@ -8,9 +8,7 @@ "format": "gguf", "settings": { "ctx_len": 2048, - "system_prompt": "[INST] <>\n", - "user_prompt": "<>\n", - "ai_prompt": "[/INST]" + "prompt_template": "[INST] <>\n{system_message}<>\n{prompt}[/INST]" }, "parameters": { "max_tokens": 2048 diff --git a/models/llama2-chat-7b-q5/model.json b/models/llama2-chat-7b-q5/model.json index 96c652ab2f..1e999003f5 100644 --- a/models/llama2-chat-7b-q5/model.json +++ b/models/llama2-chat-7b-q5/model.json @@ -8,9 +8,7 @@ "format": "gguf", "settings": { "ctx_len": 2048, - "system_prompt": "[INST] <>\n", - "user_prompt": "<>\n", - "ai_prompt": "[/INST]" + "prompt_template": "[INST] <>\n{system_message}<>\n{prompt}[/INST]" }, "parameters": { "max_tokens": 2048 diff --git a/models/lzlv-70b/model.json b/models/lzlv-70b/model.json index ca6af617ec..cb2387432a 100644 --- a/models/lzlv-70b/model.json +++ b/models/lzlv-70b/model.json @@ -8,9 +8,7 @@ "format": "gguf", "settings": { "ctx_len": 2048, - "system_prompt": "", - "user_prompt": "USER:\n", - "ai_prompt": "ASSISTANT:\n" + "prompt_template": "USER:\n{prompt}\nASSISTANT:" }, "parameters": { "max_tokens": 2048 diff --git a/models/mistral-ins-7b-q4/model.json b/models/mistral-ins-7b-q4/model.json index 9fc86f1bea..9f2c22a03e 100644 --- a/models/mistral-ins-7b-q4/model.json +++ b/models/mistral-ins-7b-q4/model.json @@ -10,7 +10,8 @@ "ctx_len": 2048, "system_prompt": "", "user_prompt": "[INST]", - "ai_prompt": "[/INST]" + "ai_prompt": "[/INST]", + "prompt_template": "[INST]{prompt}\n[/INST]" }, "parameters": { "max_tokens": 2048 diff --git a/models/mistral-ins-7b-q5/model.json b/models/mistral-ins-7b-q5/model.json index 291f2ad0ef..328ba2926e 100644 --- a/models/mistral-ins-7b-q5/model.json +++ b/models/mistral-ins-7b-q5/model.json @@ -8,9 +8,7 @@ "format": "gguf", "settings": { "ctx_len": 2048, - "system_prompt": "", - "user_prompt": "[INST]", - "ai_prompt": "[/INST]" + "prompt_template": "[INST]{prompt}\n[/INST]" }, "parameters": { "max_tokens": 2048 diff --git a/models/neural-chat-7b/model.json b/models/neural-chat-7b/model.json index 1d62714693..20c14c5073 100644 --- a/models/neural-chat-7b/model.json +++ b/models/neural-chat-7b/model.json @@ -8,9 +8,7 @@ "format": "gguf", "settings": { "ctx_len": 2048, - "system_prompt": "### System:\n", - "user_prompt": "### User:\n", - "ai_prompt": "### Assistant:\n" + "prompt_template": "### System:\n{system_message}### User:\n{prompt}### Assistant:" }, "parameters": { "max_tokens": 2048 diff --git a/models/noromaid-20b/model.json b/models/noromaid-20b/model.json index 5c937a8313..961c4bd25f 100644 --- a/models/noromaid-20b/model.json +++ b/models/noromaid-20b/model.json @@ -8,9 +8,7 @@ "format": "gguf", "settings": { "ctx_len": 2048, - "system_prompt": "", - "user_prompt": "### Instruction:\n", - "ai_prompt": "### Response:\n" + "prompt_template": "### Instruction:{prompt}\n### Response:" }, "parameters": { "max_tokens": 2048 diff --git a/models/openhermes-neural-7b/model.json b/models/openhermes-neural-7b/model.json index 8dcb51ad7f..bc4a4e3154 100644 --- a/models/openhermes-neural-7b/model.json +++ b/models/openhermes-neural-7b/model.json @@ -8,9 +8,7 @@ "format": "gguf", "settings": { "ctx_len": 2048, - "system_prompt": "<|im_start|>system\n", - "user_prompt": "<|im_end|>\n<|im_start|>user\n", - "ai_prompt": "<|im_end|>\n<|im_start|>assistant\n" + "prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant" }, "parameters": { "max_tokens": 2048 diff --git a/models/openorca-13b/model.json b/models/openorca-13b/model.json index 42c8bd96e7..3478ad5285 100644 --- a/models/openorca-13b/model.json +++ b/models/openorca-13b/model.json @@ -8,9 +8,7 @@ "format": "gguf", "settings": { "ctx_len": 2048, - "system_prompt": "<|im_start|>system\n", - "user_prompt": "<|im_end|>\n<|im_start|>user\n", - "ai_prompt": "<|im_end|>\n<|im_start|>assistant\n" + "prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant" }, "parameters": { "max_tokens": 2048 diff --git a/models/phind-34b/model.json b/models/phind-34b/model.json index eb3c1a18af..d3fc6664ab 100644 --- a/models/phind-34b/model.json +++ b/models/phind-34b/model.json @@ -8,9 +8,7 @@ "format": "gguf", "settings": { "ctx_len": 2048, - "system_prompt": "### System Prompt\n", - "user_prompt": "### User Message\n", - "ai_prompt": "### Assistant\n" + "prompt_template": "### System Prompt\n{system_message}\n### User Message\n{prompt}\n### Assistant" }, "parameters": { "max_tokens": 2048 diff --git a/models/rocket-3b/model.json b/models/rocket-3b/model.json index 8d4fc76bf7..02335a0ed2 100644 --- a/models/rocket-3b/model.json +++ b/models/rocket-3b/model.json @@ -8,9 +8,7 @@ "format": "gguf", "settings": { "ctx_len": 4096, - "system_prompt": "<|im_start|>system\n", - "user_prompt": "<|im_end|>\n<|im_start|>user\n", - "ai_prompt": "<|im_end|>\n<|im_start|>assistant\n" + "prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant" }, "parameters": { "max_tokens": 2048 diff --git a/models/starling-7b/model.json b/models/starling-7b/model.json index 52b03f8b1b..cdb06c2c72 100644 --- a/models/starling-7b/model.json +++ b/models/starling-7b/model.json @@ -8,9 +8,7 @@ "format": "gguf", "settings": { "ctx_len": 2048, - "system_prompt": "", - "user_prompt": "GPT4 User: ", - "ai_prompt": "<|end_of_turn|>\nGPT4 Assistant: " + "prompt_template": "GPT4 User: {prompt}<|end_of_turn|>GPT4 Assistant:" }, "parameters": { "max_tokens": 2048 diff --git a/models/tiefighter-13b/model.json b/models/tiefighter-13b/model.json index 20075777c1..3f5abe3e3b 100644 --- a/models/tiefighter-13b/model.json +++ b/models/tiefighter-13b/model.json @@ -8,9 +8,7 @@ "format": "gguf", "settings": { "ctx_len": 2048, - "system_prompt": "", - "user_prompt": "### Instruction: ", - "ai_prompt": "\n### Response: " + "prompt_template": "### Instruction:\n{prompt}\n### Response:" }, "parameters": { "max_tokens": 2048 diff --git a/models/tinyllama-1.1b/model.json b/models/tinyllama-1.1b/model.json index bb6aeaf5c6..d924be0460 100644 --- a/models/tinyllama-1.1b/model.json +++ b/models/tinyllama-1.1b/model.json @@ -8,9 +8,7 @@ "format": "gguf", "settings": { "ctx_len": 2048, - "system_prompt": "<|system|>\n", - "user_prompt": "<|user|>\n", - "ai_prompt": "<|assistant|>\n" + "prompt_template": "<|system|>\n{system_message}<|user|>\n{prompt}<|assistant|>" }, "parameters": { "max_tokens": 2048 diff --git a/models/wizardcoder-13b/model.json b/models/wizardcoder-13b/model.json index 40f2750372..774c1a4241 100644 --- a/models/wizardcoder-13b/model.json +++ b/models/wizardcoder-13b/model.json @@ -8,9 +8,7 @@ "format": "gguf", "settings": { "ctx_len": 2048, - "system_prompt": "", - "user_prompt": "### Instruction:\n", - "ai_prompt": "### Response:\n" + "prompt_template": "### Instruction:\n{prompt}\n### Response:" }, "parameters": { "max_tokens": 2048 diff --git a/models/yi-34b/model.json b/models/yi-34b/model.json index ab111591cf..6a570b4de4 100644 --- a/models/yi-34b/model.json +++ b/models/yi-34b/model.json @@ -8,9 +8,7 @@ "format": "gguf", "settings": { "ctx_len": 2048, - "system_prompt": "<|im_start|>system\n", - "user_prompt": "<|im_end|>\n<|im_start|>user\n", - "ai_prompt": "<|im_end|>\n<|im_start|>assistant\n" + "prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant" }, "parameters": { "max_tokens": 2048 diff --git a/models/zephyr-beta-7b/model.json b/models/zephyr-beta-7b/model.json index 4993366fd5..b12f03c67b 100644 --- a/models/zephyr-beta-7b/model.json +++ b/models/zephyr-beta-7b/model.json @@ -8,9 +8,7 @@ "format": "gguf", "settings": { "ctx_len": 2048, - "system_prompt": "<|system|>\n", - "user_prompt": "\n<|user|>\n", - "ai_prompt": "\n<|assistant|>\n" + "prompt_template": "<|system|>\n{system_message}\n<|user|>\n{prompt}\n<|assistant|>" }, "parameters": { "max_tokens": 2048 diff --git a/web/hooks/useActiveModel.ts b/web/hooks/useActiveModel.ts index 15084278ca..dc9e6e33f5 100644 --- a/web/hooks/useActiveModel.ts +++ b/web/hooks/useActiveModel.ts @@ -1,6 +1,6 @@ /* eslint-disable @typescript-eslint/no-explicit-any */ import { EventName, events } from '@janhq/core' -import { Model, ModelSettingParams } from '@janhq/core' +import { Model } from '@janhq/core' import { atom, useAtom } from 'jotai' import { toaster } from '@/containers/Toast'