From cd4f72dd31a409bd2aa955c69504da9526a570ec Mon Sep 17 00:00:00 2001 From: Louis Date: Tue, 12 Nov 2024 13:46:13 +0700 Subject: [PATCH 1/5] chore: add qwen2.5-coder 14B and 32B models --- .../inference-cortex-extension/package.json | 2 +- .../qwen2.5-coder-14b-instruct/model.json | 36 +++++++++++++++++++ .../qwen2.5-coder-32b-instruct/model.json | 36 +++++++++++++++++++ .../rollup.config.ts | 4 +++ 4 files changed, 77 insertions(+), 1 deletion(-) create mode 100644 extensions/inference-cortex-extension/resources/models/qwen2.5-coder-14b-instruct/model.json create mode 100644 extensions/inference-cortex-extension/resources/models/qwen2.5-coder-32b-instruct/model.json diff --git a/extensions/inference-cortex-extension/package.json b/extensions/inference-cortex-extension/package.json index 5a9fc56e98..d262ad5ec0 100644 --- a/extensions/inference-cortex-extension/package.json +++ b/extensions/inference-cortex-extension/package.json @@ -1,7 +1,7 @@ { "name": "@janhq/inference-cortex-extension", "productName": "Cortex Inference Engine", - "version": "1.0.20", + "version": "1.0.21", "description": "This extension embeds cortex.cpp, a lightweight inference engine written in C++. See https://jan.ai.\nAdditional dependencies could be installed to run without Cuda Toolkit installation.", "main": "dist/index.js", "node": "dist/node/index.cjs.js", diff --git a/extensions/inference-cortex-extension/resources/models/qwen2.5-coder-14b-instruct/model.json b/extensions/inference-cortex-extension/resources/models/qwen2.5-coder-14b-instruct/model.json new file mode 100644 index 0000000000..a445ee2db9 --- /dev/null +++ b/extensions/inference-cortex-extension/resources/models/qwen2.5-coder-14b-instruct/model.json @@ -0,0 +1,36 @@ +{ + "sources": [ + { + "filename": "Qwen2.5-Coder-14B-Instruct-Q4_K_M.gguf", + "url": "https://huggingface.co/bartowski/Qwen2.5-Coder-14B-Instruct-GGUF/resolve/main/Qwen2.5-Coder-14B-Instruct-Q4_K_M.gguf" + } + ], + "id": "qwen2.5-coder-14b-instruct", + "object": "model", + "name": "Qwen2.5 Coder 14B Instruct Q4", + "version": "1.0", + "description": "Qwen2.5-Coder is the latest series of Code-Specific Qwen large language models. Significantly improvements in code generation, code reasoning and code fixing.", + "format": "gguf", + "settings": { + "ctx_len": 32768, + "prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant", + "llama_model_path": "Qwen2.5-Coder-14B-Instruct-Q4_K_M.gguf", + "ngl": 29 + }, + "parameters": { + "temperature": 0.7, + "top_p": 0.95, + "stream": true, + "max_tokens": 32768, + "stop": ["<|endoftext|>", "<|im_end|>"], + "frequency_penalty": 0, + "presence_penalty": 0 + }, + "metadata": { + "author": "QwenLM", + "tags": ["14B", "Featured"], + "size": 8990000000 + }, + "engine": "llama-cpp" + } + \ No newline at end of file diff --git a/extensions/inference-cortex-extension/resources/models/qwen2.5-coder-32b-instruct/model.json b/extensions/inference-cortex-extension/resources/models/qwen2.5-coder-32b-instruct/model.json new file mode 100644 index 0000000000..cffdf03df8 --- /dev/null +++ b/extensions/inference-cortex-extension/resources/models/qwen2.5-coder-32b-instruct/model.json @@ -0,0 +1,36 @@ +{ + "sources": [ + { + "filename": "Qwen2.5-Coder-32B-Instruct-Q4_K_M.gguf", + "url": "https://huggingface.co/bartowski/Qwen2.5-Coder-32B-Instruct-GGUF/resolve/main/Qwen2.5-Coder-32B-Instruct-Q4_K_M.gguf" + } + ], + "id": "qwen2.5-coder-32b-instruct", + "object": "model", + "name": "Qwen2.5 Coder 32B Instruct Q4", + "version": "1.0", + "description": "Qwen2.5-Coder is the latest series of Code-Specific Qwen large language models. Significantly improvements in code generation, code reasoning and code fixing.", + "format": "gguf", + "settings": { + "ctx_len": 32768, + "prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant", + "llama_model_path": "Qwen2.5-Coder-32B-Instruct-Q4_K_M.gguf", + "ngl": 29 + }, + "parameters": { + "temperature": 0.7, + "top_p": 0.95, + "stream": true, + "max_tokens": 32768, + "stop": ["<|endoftext|>", "<|im_end|>"], + "frequency_penalty": 0, + "presence_penalty": 0 + }, + "metadata": { + "author": "QwenLM", + "tags": ["32B", "Featured"], + "size": 19900000000 + }, + "engine": "llama-cpp" + } + \ No newline at end of file diff --git a/extensions/inference-cortex-extension/rollup.config.ts b/extensions/inference-cortex-extension/rollup.config.ts index ea873990b9..d7dc8d40a3 100644 --- a/extensions/inference-cortex-extension/rollup.config.ts +++ b/extensions/inference-cortex-extension/rollup.config.ts @@ -49,6 +49,8 @@ const llama321bJson = require('./resources/models/llama3.2-1b-instruct/model.jso const llama323bJson = require('./resources/models/llama3.2-3b-instruct/model.json') const qwen257bJson = require('./resources/models/qwen2.5-7b-instruct/model.json') const qwen25coder7bJson = require('./resources/models/qwen2.5-coder-7b-instruct/model.json') +const qwen25coder14bJson = require('./resources/models/qwen2.5-coder-14b-instruct/model.json') +const qwen25coder32bJson = require('./resources/models/qwen2.5-coder-32b-instruct/model.json') const qwen2514bJson = require('./resources/models/qwen2.5-14b-instruct/model.json') const qwen2532bJson = require('./resources/models/qwen2.5-32b-instruct/model.json') const qwen2572bJson = require('./resources/models/qwen2.5-72b-instruct/model.json') @@ -108,6 +110,8 @@ export default [ llama323bJson, qwen257bJson, qwen25coder7bJson, + qwen25coder14bJson, + qwen25coder32bJson, qwen2514bJson, qwen2532bJson, qwen2572bJson, From 818f4757caeccc366b3355076accdf5e854d2378 Mon Sep 17 00:00:00 2001 From: Louis Date: Tue, 12 Nov 2024 15:10:13 +0700 Subject: [PATCH 2/5] fix: reload window on gpu update instead of relaunch --- web/screens/Settings/Advanced/index.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/web/screens/Settings/Advanced/index.tsx b/web/screens/Settings/Advanced/index.tsx index 0b1438c477..ccd3d76985 100644 --- a/web/screens/Settings/Advanced/index.tsx +++ b/web/screens/Settings/Advanced/index.tsx @@ -307,7 +307,7 @@ const Advanced = () => { } // Stop any running model to apply the changes if (e.target.checked !== gpuEnabled) - stopModel().then(() => window.core?.api?.relaunch()) + stopModel().then(() => window.location.reload()) }} /> } From e87d25de85eb4c5c249fc16d6f0c4cfc8d15ad8b Mon Sep 17 00:00:00 2001 From: Faisal Amir Date: Tue, 12 Nov 2024 15:30:18 +0700 Subject: [PATCH 3/5] fix: calculate base layout with system monitor panel (#3999) --- .../BottomPanel/SystemMonitor/index.tsx | 3 +-- .../Thread/ThreadCenterPanel/index.tsx | 19 +++++++++++++++++-- 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/web/containers/Layout/BottomPanel/SystemMonitor/index.tsx b/web/containers/Layout/BottomPanel/SystemMonitor/index.tsx index 14055b5352..3dfdff2f9e 100644 --- a/web/containers/Layout/BottomPanel/SystemMonitor/index.tsx +++ b/web/containers/Layout/BottomPanel/SystemMonitor/index.tsx @@ -79,7 +79,7 @@ const SystemMonitor = () => { {showSystemMonitorPanel && (
{ {ramUtilitized}%
- {gpus.length > 0 && (
{gpus.map((gpu, index) => { diff --git a/web/screens/Thread/ThreadCenterPanel/index.tsx b/web/screens/Thread/ThreadCenterPanel/index.tsx index 3f74181f76..1f23e9dc5b 100644 --- a/web/screens/Thread/ThreadCenterPanel/index.tsx +++ b/web/screens/Thread/ThreadCenterPanel/index.tsx @@ -147,6 +147,20 @@ const ThreadCenterPanel = () => { const showSystemMonitorPanel = useAtomValue(showSystemMonitorPanelAtom) + const [height, setHeight] = useState(0) + + useEffect(() => { + if (showSystemMonitorPanel) { + const element = document.querySelector('.system-monitor-panel') + + if (element) { + setHeight(element.clientHeight) // You can also use offsetHeight if needed + } + } else { + setHeight(0) + } + }, [showSystemMonitorPanel]) + return (
{ )}
{activeThread ? (
From 1bd58e13276b9dc075f05d5f29de669d5f1d1411 Mon Sep 17 00:00:00 2001 From: Louis Date: Tue, 12 Nov 2024 15:46:14 +0700 Subject: [PATCH 4/5] fix: shift model events subscription to cortex --- .../rollup.config.ts | 1 + .../src/@types/global.d.ts | 1 + .../inference-cortex-extension/src/index.ts | 67 ++++++++++++++++++- extensions/model-extension/src/cortex.ts | 55 +-------------- 4 files changed, 68 insertions(+), 56 deletions(-) diff --git a/extensions/inference-cortex-extension/rollup.config.ts b/extensions/inference-cortex-extension/rollup.config.ts index d7dc8d40a3..34ad9295d6 100644 --- a/extensions/inference-cortex-extension/rollup.config.ts +++ b/extensions/inference-cortex-extension/rollup.config.ts @@ -119,6 +119,7 @@ export default [ NODE: JSON.stringify(`${packageJson.name}/${packageJson.node}`), DEFAULT_SETTINGS: JSON.stringify(defaultSettingJson), CORTEX_API_URL: JSON.stringify('http://127.0.0.1:39291'), + CORTEX_SOCKET_URL: JSON.stringify('ws://127.0.0.1:39291'), }), // Allow json resolution json(), diff --git a/extensions/inference-cortex-extension/src/@types/global.d.ts b/extensions/inference-cortex-extension/src/@types/global.d.ts index 64ae5a6e78..48dbcd7806 100644 --- a/extensions/inference-cortex-extension/src/@types/global.d.ts +++ b/extensions/inference-cortex-extension/src/@types/global.d.ts @@ -1,5 +1,6 @@ declare const NODE: string declare const CORTEX_API_URL: string +declare const CORTEX_SOCKET_URL: string declare const DEFAULT_SETTINGS: Array declare const MODELS: Array diff --git a/extensions/inference-cortex-extension/src/index.ts b/extensions/inference-cortex-extension/src/index.ts index d070ff9a39..44ec423dac 100644 --- a/extensions/inference-cortex-extension/src/index.ts +++ b/extensions/inference-cortex-extension/src/index.ts @@ -16,17 +16,29 @@ import { getJanDataFolderPath, extractModelLoadParams, fs, + events, + ModelEvent } from '@janhq/core' import PQueue from 'p-queue' import ky from 'ky' +/** + * Event subscription types of Downloader + */ +enum DownloadTypes { + DownloadUpdated = 'onFileDownloadUpdate', + DownloadError = 'onFileDownloadError', + DownloadSuccess = 'onFileDownloadSuccess', + DownloadStopped = 'onFileDownloadStopped', + DownloadStarted = 'onFileDownloadStarted', +} + /** * A class that implements the InferenceExtension interface from the @janhq/core package. * The class provides methods for initializing and stopping a model, and for making inference requests. * It also subscribes to events emitted by the @janhq/core package and handles new message requests. */ export default class JanInferenceCortexExtension extends LocalOAIEngine { - // DEPRECATED nodeModule: string = 'node' queue = new PQueue({ concurrency: 1 }) @@ -38,6 +50,11 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine { */ inferenceUrl = `${CORTEX_API_URL}/v1/chat/completions` + /** + * Socket instance of events subscription + */ + socket?: WebSocket = undefined + /** * Subscribes to events emitted by the @janhq/core package. */ @@ -55,6 +72,8 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine { this.queue.add(() => this.healthz()) + this.subscribeToEvents() + window.addEventListener('beforeunload', () => { this.clean() }) @@ -138,7 +157,7 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine { methods: ['get'], }, }) - .then(() => {}) + .then(() => { }) } /** @@ -154,6 +173,50 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine { // Do nothing }) } + + /** + * Subscribe to cortex.cpp websocket events + */ + subscribeToEvents() { + this.queue.add( + () => + new Promise((resolve) => { + this.socket = new WebSocket(`${CORTEX_SOCKET_URL}/events`) + + this.socket.addEventListener('message', (event) => { + const data = JSON.parse(event.data) + const transferred = data.task.items.reduce( + (acc: number, cur: any) => acc + cur.downloadedBytes, + 0 + ) + const total = data.task.items.reduce( + (acc: number, cur: any) => acc + cur.bytes, + 0 + ) + const percent = total > 0 ? transferred / total : 0 + + events.emit(DownloadTypes[data.type as keyof typeof DownloadTypes], { + modelId: data.task.id, + percent: percent, + size: { + transferred: transferred, + total: total, + }, + }) + // Update models list from Hub + if (data.type === DownloadTypes.DownloadSuccess) { + // Delay for the state update from cortex.cpp + // Just to be sure + setTimeout(() => { + events.emit(ModelEvent.OnModelsUpdate, {}) + }, 500) + } + }) + resolve() + }) + ) + } + } /// Legacy diff --git a/extensions/model-extension/src/cortex.ts b/extensions/model-extension/src/cortex.ts index b7111c8597..7a65e8e3fe 100644 --- a/extensions/model-extension/src/cortex.ts +++ b/extensions/model-extension/src/cortex.ts @@ -1,6 +1,6 @@ import PQueue from 'p-queue' import ky from 'ky' -import { events, extractModelLoadParams, Model, ModelEvent } from '@janhq/core' +import { extractModelLoadParams, Model } from '@janhq/core' import { extractInferenceParams } from '@janhq/core' /** * cortex.cpp Model APIs interface @@ -24,21 +24,11 @@ type ModelList = { data: any[] } -enum DownloadTypes { - DownloadUpdated = 'onFileDownloadUpdate', - DownloadError = 'onFileDownloadError', - DownloadSuccess = 'onFileDownloadSuccess', - DownloadStopped = 'onFileDownloadStopped', - DownloadStarted = 'onFileDownloadStarted', -} - export class CortexAPI implements ICortexAPI { queue = new PQueue({ concurrency: 1 }) - socket?: WebSocket = undefined constructor() { this.queue.add(() => this.healthz()) - this.subscribeToEvents() } /** @@ -172,49 +162,6 @@ export class CortexAPI implements ICortexAPI { .then(() => {}) } - /** - * Subscribe to cortex.cpp websocket events - */ - subscribeToEvents() { - this.queue.add( - () => - new Promise((resolve) => { - this.socket = new WebSocket(`${SOCKET_URL}/events`) - - this.socket.addEventListener('message', (event) => { - const data = JSON.parse(event.data) - const transferred = data.task.items.reduce( - (acc, cur) => acc + cur.downloadedBytes, - 0 - ) - const total = data.task.items.reduce( - (acc, cur) => acc + cur.bytes, - 0 - ) - const percent = total > 0 ? transferred / total : 0 - - events.emit(DownloadTypes[data.type], { - modelId: data.task.id, - percent: percent, - size: { - transferred: transferred, - total: total, - }, - }) - // Update models list from Hub - if (data.type === DownloadTypes.DownloadSuccess) { - // Delay for the state update from cortex.cpp - // Just to be sure - setTimeout(() => { - events.emit(ModelEvent.OnModelsUpdate, {}) - }, 500) - } - }) - resolve() - }) - ) - } - /** * TRansform model to the expected format (e.g. parameters, settings, metadata) * @param model From 23cbeeedc3b16c0a0ebae48c555f8d2a41321719 Mon Sep 17 00:00:00 2001 From: Louis Date: Tue, 12 Nov 2024 15:59:09 +0700 Subject: [PATCH 5/5] fix: GPU settings are not being persisted properly --- web/screens/Settings/Advanced/index.tsx | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/web/screens/Settings/Advanced/index.tsx b/web/screens/Settings/Advanced/index.tsx index ccd3d76985..150f70398b 100644 --- a/web/screens/Settings/Advanced/index.tsx +++ b/web/screens/Settings/Advanced/index.tsx @@ -189,7 +189,7 @@ const Advanced = () => { * @param gpuId * @returns */ - const handleGPUChange = (gpuId: string) => { + const handleGPUChange = async (gpuId: string) => { let updatedGpusInUse = [...gpusInUse] if (updatedGpusInUse.includes(gpuId)) { updatedGpusInUse = updatedGpusInUse.filter((id) => id !== gpuId) @@ -208,7 +208,7 @@ const Advanced = () => { updatedGpusInUse.push(gpuId) } setGpusInUse(updatedGpusInUse) - saveSettings({ gpusInUse: updatedGpusInUse }) + await saveSettings({ gpusInUse: updatedGpusInUse }) window.core?.api?.relaunch() } @@ -306,8 +306,13 @@ const Advanced = () => { }) } // Stop any running model to apply the changes - if (e.target.checked !== gpuEnabled) - stopModel().then(() => window.location.reload()) + if (e.target.checked !== gpuEnabled) { + stopModel().finally(() => { + setTimeout(() => { + window.location.reload() + }, 300) + }) + } }} /> }