Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Inference Nitro with Prompt Template #952

Merged
merged 3 commits into from
Dec 13, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions core/src/types/model/modelEntity.ts
Original file line number Diff line number Diff line change
Expand Up @@ -119,9 +119,7 @@ export type ModelSettingParams = {
embedding?: boolean
n_parallel?: number
cpu_threads?: number
system_prompt?: string
user_prompt?: string
ai_prompt?: string
prompt_template?: string
}

/**
Expand Down
59 changes: 55 additions & 4 deletions extensions/inference-nitro-extension/src/module.ts
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,19 @@ async function initModel(wrapper: any): Promise<ModelOperationResponse> {
} else {
// Gather system information for CPU physical cores and memory
const nitroResourceProbe = await getResourcesInfo();
console.log(
"Nitro with physical core: " + nitroResourceProbe.numCpuPhysicalCore
);

// Convert settings.prompt_template to system_prompt, user_prompt, ai_prompt
if (wrapper.model.settings.prompt_template) {
const promptTemplate = wrapper.model.settings.prompt_template;
const prompt = promptTemplateConverter(promptTemplate);
if (prompt.error) {
return Promise.resolve({ error: prompt.error });
}
wrapper.model.settings.system_prompt = prompt.system_prompt;
wrapper.model.settings.user_prompt = prompt.user_prompt;
wrapper.model.settings.ai_prompt = prompt.ai_prompt;
}

const settings = {
llama_model_path: currentModelFile,
...wrapper.model.settings,
Expand All @@ -74,12 +84,53 @@ async function initModel(wrapper: any): Promise<ModelOperationResponse> {
}
}

function promptTemplateConverter(promptTemplate) {
// Split the string using the markers
const systemMarker = "{system_message}";
const promptMarker = "{prompt}";

if (
promptTemplate.includes(systemMarker) &&
promptTemplate.includes(promptMarker)
) {
// Find the indices of the markers
const systemIndex = promptTemplate.indexOf(systemMarker);
const promptIndex = promptTemplate.indexOf(promptMarker);

// Extract the parts of the string
const system_prompt = promptTemplate.substring(0, systemIndex);
const user_prompt = promptTemplate.substring(
systemIndex + systemMarker.length,
promptIndex
);
const ai_prompt = promptTemplate.substring(
promptIndex + promptMarker.length
);

// Return the split parts
return { system_prompt, user_prompt, ai_prompt };
} else if (promptTemplate.includes(promptMarker)) {
// Extract the parts of the string for the case where only promptMarker is present
const promptIndex = promptTemplate.indexOf(promptMarker);
const user_prompt = promptTemplate.substring(0, promptIndex);
const ai_prompt = promptTemplate.substring(
promptIndex + promptMarker.length
);
const system_prompt = "";

// Return the split parts
return { system_prompt, user_prompt, ai_prompt };
}

// Return an error if none of the conditions are met
return { error: "Cannot split prompt template" };
}

/**
* Loads a LLM model into the Nitro subprocess by sending a HTTP POST request.
* @returns A Promise that resolves when the model is loaded successfully, or rejects with an error message if the model is not found or fails to load.
*/
function loadLLMModel(settings): Promise<Response> {
// Load model config
return fetchRetry(NITRO_HTTP_LOAD_MODEL_URL, {
method: "POST",
headers: {
Expand Down
4 changes: 1 addition & 3 deletions models/capybara-34b/model.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,7 @@
"format": "gguf",
"settings": {
"ctx_len": 2048,
"system_prompt": "",
"user_prompt": "USER:\n",
"ai_prompt": "ASSISTANT:\n"
"prompt_template": "USER:\n{prompt}\nASSISTANT:"
},
"parameters": {
"max_tokens": 2048
Expand Down
4 changes: 1 addition & 3 deletions models/deepseek-coder-1.3b/model.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,7 @@
"format": "gguf",
"settings": {
"ctx_len": 2048,
"system_prompt": "",
"user_prompt": "### Instruction:\n",
"ai_prompt": "### Response:\n"
"prompt_template": "### Instruction:\n{prompt}\n### Response:"
},
"parameters": {
"max_tokens": 2048
Expand Down
4 changes: 1 addition & 3 deletions models/deepseek-coder-34b/model.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,7 @@
"format": "gguf",
"settings": {
"ctx_len": 2048,
"system_prompt": "",
"user_prompt": "### Instruction:\n",
"ai_prompt": "### Response:\n"
"prompt_template": "### Instruction:\n{prompt}\n### Response:"
},
"parameters": {
"max_tokens": 2048
Expand Down
4 changes: 1 addition & 3 deletions models/llama2-chat-70b-q4/model.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,7 @@
"format": "gguf",
"settings": {
"ctx_len": 2048,
"system_prompt": "[INST] <<SYS>>\n",
"user_prompt": "<</SYS>>\n",
"ai_prompt": "[/INST]"
"prompt_template": "[INST] <<SYS>>\n{system_message}<</SYS>>\n{prompt}[/INST]"
},
"parameters": {
"max_tokens": 2048
Expand Down
4 changes: 1 addition & 3 deletions models/llama2-chat-7b-q4/model.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,7 @@
"format": "gguf",
"settings": {
"ctx_len": 2048,
"system_prompt": "[INST] <<SYS>>\n",
"user_prompt": "<</SYS>>\n",
"ai_prompt": "[/INST]"
"prompt_template": "[INST] <<SYS>>\n{system_message}<</SYS>>\n{prompt}[/INST]"
},
"parameters": {
"max_tokens": 2048
Expand Down
4 changes: 1 addition & 3 deletions models/llama2-chat-7b-q5/model.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,7 @@
"format": "gguf",
"settings": {
"ctx_len": 2048,
"system_prompt": "[INST] <<SYS>>\n",
"user_prompt": "<</SYS>>\n",
"ai_prompt": "[/INST]"
"prompt_template": "[INST] <<SYS>>\n{system_message}<</SYS>>\n{prompt}[/INST]"
},
"parameters": {
"max_tokens": 2048
Expand Down
4 changes: 1 addition & 3 deletions models/lzlv-70b/model.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,7 @@
"format": "gguf",
"settings": {
"ctx_len": 2048,
"system_prompt": "",
"user_prompt": "USER:\n",
"ai_prompt": "ASSISTANT:\n"
"prompt_template": "USER:\n{prompt}\nASSISTANT:"
},
"parameters": {
"max_tokens": 2048
Expand Down
3 changes: 2 additions & 1 deletion models/mistral-ins-7b-q4/model.json
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@
"ctx_len": 2048,
"system_prompt": "",
"user_prompt": "<s>[INST]",
"ai_prompt": "[/INST]"
"ai_prompt": "[/INST]",
"prompt_template": "<s>[INST]{prompt}\n[/INST]"
},
"parameters": {
"max_tokens": 2048
Expand Down
4 changes: 1 addition & 3 deletions models/mistral-ins-7b-q5/model.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,7 @@
"format": "gguf",
"settings": {
"ctx_len": 2048,
"system_prompt": "",
"user_prompt": "<s>[INST]",
"ai_prompt": "[/INST]"
"prompt_template": "<s>[INST]{prompt}\n[/INST]"
},
"parameters": {
"max_tokens": 2048
Expand Down
4 changes: 1 addition & 3 deletions models/neural-chat-7b/model.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,7 @@
"format": "gguf",
"settings": {
"ctx_len": 2048,
"system_prompt": "### System:\n",
"user_prompt": "### User:\n",
"ai_prompt": "### Assistant:\n"
"prompt_template": "### System:\n{system_message}### User:\n{prompt}### Assistant:"
},
"parameters": {
"max_tokens": 2048
Expand Down
4 changes: 1 addition & 3 deletions models/noromaid-20b/model.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,7 @@
"format": "gguf",
"settings": {
"ctx_len": 2048,
"system_prompt": "",
"user_prompt": "### Instruction:\n",
"ai_prompt": "### Response:\n"
"prompt_template": "### Instruction:{prompt}\n### Response:"
},
"parameters": {
"max_tokens": 2048
Expand Down
4 changes: 1 addition & 3 deletions models/openhermes-neural-7b/model.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,7 @@
"format": "gguf",
"settings": {
"ctx_len": 2048,
"system_prompt": "<|im_start|>system\n",
"user_prompt": "<|im_end|>\n<|im_start|>user\n",
"ai_prompt": "<|im_end|>\n<|im_start|>assistant\n"
"prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant"
},
"parameters": {
"max_tokens": 2048
Expand Down
4 changes: 1 addition & 3 deletions models/openorca-13b/model.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,7 @@
"format": "gguf",
"settings": {
"ctx_len": 2048,
"system_prompt": "<|im_start|>system\n",
"user_prompt": "<|im_end|>\n<|im_start|>user\n",
"ai_prompt": "<|im_end|>\n<|im_start|>assistant\n"
"prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant"
},
"parameters": {
"max_tokens": 2048
Expand Down
4 changes: 1 addition & 3 deletions models/phind-34b/model.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,7 @@
"format": "gguf",
"settings": {
"ctx_len": 2048,
"system_prompt": "### System Prompt\n",
"user_prompt": "### User Message\n",
"ai_prompt": "### Assistant\n"
"prompt_template": "### System Prompt\n{system_message}\n### User Message\n{prompt}\n### Assistant"
},
"parameters": {
"max_tokens": 2048
Expand Down
4 changes: 1 addition & 3 deletions models/rocket-3b/model.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,7 @@
"format": "gguf",
"settings": {
"ctx_len": 4096,
"system_prompt": "<|im_start|>system\n",
"user_prompt": "<|im_end|>\n<|im_start|>user\n",
"ai_prompt": "<|im_end|>\n<|im_start|>assistant\n"
"prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant"
},
"parameters": {
"max_tokens": 2048
Expand Down
4 changes: 1 addition & 3 deletions models/starling-7b/model.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,7 @@
"format": "gguf",
"settings": {
"ctx_len": 2048,
"system_prompt": "",
"user_prompt": "GPT4 User: ",
"ai_prompt": "<|end_of_turn|>\nGPT4 Assistant: "
"prompt_template": "GPT4 User: {prompt}<|end_of_turn|>GPT4 Assistant:"
},
"parameters": {
"max_tokens": 2048
Expand Down
4 changes: 1 addition & 3 deletions models/tiefighter-13b/model.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,7 @@
"format": "gguf",
"settings": {
"ctx_len": 2048,
"system_prompt": "",
"user_prompt": "### Instruction: ",
"ai_prompt": "\n### Response: "
"prompt_template": "### Instruction:\n{prompt}\n### Response:"
},
"parameters": {
"max_tokens": 2048
Expand Down
4 changes: 1 addition & 3 deletions models/tinyllama-1.1b/model.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,7 @@
"format": "gguf",
"settings": {
"ctx_len": 2048,
"system_prompt": "<|system|>\n",
"user_prompt": "<|user|>\n",
"ai_prompt": "<|assistant|>\n"
"prompt_template": "<|system|>\n{system_message}<|user|>\n{prompt}<|assistant|>"
},
"parameters": {
"max_tokens": 2048
Expand Down
4 changes: 1 addition & 3 deletions models/wizardcoder-13b/model.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,7 @@
"format": "gguf",
"settings": {
"ctx_len": 2048,
"system_prompt": "",
"user_prompt": "### Instruction:\n",
"ai_prompt": "### Response:\n"
"prompt_template": "### Instruction:\n{prompt}\n### Response:"
},
"parameters": {
"max_tokens": 2048
Expand Down
4 changes: 1 addition & 3 deletions models/yi-34b/model.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,7 @@
"format": "gguf",
"settings": {
"ctx_len": 2048,
"system_prompt": "<|im_start|>system\n",
"user_prompt": "<|im_end|>\n<|im_start|>user\n",
"ai_prompt": "<|im_end|>\n<|im_start|>assistant\n"
"prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant"
},
"parameters": {
"max_tokens": 2048
Expand Down
4 changes: 1 addition & 3 deletions models/zephyr-beta-7b/model.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,7 @@
"format": "gguf",
"settings": {
"ctx_len": 2048,
"system_prompt": "<|system|>\n",
"user_prompt": "</s>\n<|user|>\n",
"ai_prompt": "</s>\n<|assistant|>\n"
"prompt_template": "<|system|>\n{system_message}</s>\n<|user|>\n{prompt}</s>\n<|assistant|>"
},
"parameters": {
"max_tokens": 2048
Expand Down
2 changes: 1 addition & 1 deletion web/hooks/useActiveModel.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/* eslint-disable @typescript-eslint/no-explicit-any */
import { EventName, events } from '@janhq/core'
import { Model, ModelSettingParams } from '@janhq/core'
import { Model } from '@janhq/core'
import { atom, useAtom } from 'jotai'

import { toaster } from '@/containers/Toast'
Expand Down
Loading