From ba13676e5ffb8cde118f62f84d829af5d463a2cc Mon Sep 17 00:00:00 2001 From: Louis Date: Wed, 15 May 2024 14:24:25 +0700 Subject: [PATCH] feat: cortex onboarding (#565) --- cortex-js/package.json | 3 + cortex-js/src/command.module.ts | 6 + .../commanders/basic-command.commander.ts | 9 +- .../infrastructure/commanders/init.command.ts | 140 ++++++++++++++++++ .../commanders/inquirer/init.questions.ts | 39 +++++ .../commanders/start.command.ts | 8 + .../providers/cortex/cortex.provider.ts | 1 + .../src/usecases/cortex/cortex.usecases.ts | 47 +++--- cortex-js/tsconfig.json | 1 + 9 files changed, 226 insertions(+), 28 deletions(-) create mode 100644 cortex-js/src/infrastructure/commanders/init.command.ts create mode 100644 cortex-js/src/infrastructure/commanders/inquirer/init.questions.ts diff --git a/cortex-js/package.json b/cortex-js/package.json index f5519d13a..3b3989381 100644 --- a/cortex-js/package.json +++ b/cortex-js/package.json @@ -33,10 +33,12 @@ "@nestjs/mapped-types": "*", "@nestjs/platform-express": "^10.0.0", "@nestjs/swagger": "^7.3.1", + "@terascope/fetch-github-release": "^0.8.8", "axios": "^1.6.8", "class-transformer": "^0.5.1", "class-validator": "^0.14.1", "cli-progress": "^3.12.0", + "decompress": "^4.2.1", "nest-commander": "^3.13.0", "readline": "^1.3.0", "reflect-metadata": "^0.2.0", @@ -52,6 +54,7 @@ "@nestjs/testing": "^10.0.0", "@nestjs/typeorm": "^10.0.2", "@types/cli-progress": "^3.11.5", + "@types/decompress": "^4.2.7", "@types/express": "^4.17.17", "@types/jest": "^29.5.2", "@types/node": "^20.12.9", diff --git a/cortex-js/src/command.module.ts b/cortex-js/src/command.module.ts index f3e4efd42..d1c0619f1 100644 --- a/cortex-js/src/command.module.ts +++ b/cortex-js/src/command.module.ts @@ -11,6 +11,9 @@ import { ModelsCommand } from './infrastructure/commanders/models.command'; import { StartCommand } from './infrastructure/commanders/start.command'; import { ExtensionModule } from './infrastructure/repositories/extensions/extension.module'; import { ChatModule } from './usecases/chat/chat.module'; +import { InitCommand } from './infrastructure/commanders/init.command'; +import { HttpModule } from '@nestjs/axios'; +import { CreateInitQuestions } from './infrastructure/commanders/inquirer/init.questions'; @Module({ imports: [ @@ -24,6 +27,7 @@ import { ChatModule } from './usecases/chat/chat.module'; CortexModule, ChatModule, ExtensionModule, + HttpModule, ], providers: [ BasicCommand, @@ -32,6 +36,8 @@ import { ChatModule } from './usecases/chat/chat.module'; ServeCommand, InferenceCommand, StartCommand, + InitCommand, + CreateInitQuestions, ], }) export class CommandModule {} diff --git a/cortex-js/src/infrastructure/commanders/basic-command.commander.ts b/cortex-js/src/infrastructure/commanders/basic-command.commander.ts index ea0c71237..a5434c910 100644 --- a/cortex-js/src/infrastructure/commanders/basic-command.commander.ts +++ b/cortex-js/src/infrastructure/commanders/basic-command.commander.ts @@ -5,9 +5,16 @@ import { InferenceCommand } from './inference.command'; import { ModelsCommand } from './models.command'; import { CortexUsecases } from '@/usecases/cortex/cortex.usecases'; import { defaultCortexJsHost, defaultCortexJsPort } from 'constant'; +import { InitCommand } from './init.command'; @RootCommand({ - subCommands: [ModelsCommand, PullCommand, ServeCommand, InferenceCommand], + subCommands: [ + ModelsCommand, + PullCommand, + ServeCommand, + InferenceCommand, + InitCommand, + ], }) export class BasicCommand extends CommandRunner { constructor(private readonly cortexUsecases: CortexUsecases) { diff --git a/cortex-js/src/infrastructure/commanders/init.command.ts b/cortex-js/src/infrastructure/commanders/init.command.ts new file mode 100644 index 000000000..5f916663f --- /dev/null +++ b/cortex-js/src/infrastructure/commanders/init.command.ts @@ -0,0 +1,140 @@ +import { createWriteStream, existsSync, rmSync } from 'fs'; +import { CommandRunner, SubCommand, InquirerService } from 'nest-commander'; +import { resolve } from 'path'; +import { HttpService } from '@nestjs/axios'; +import { Presets, SingleBar } from 'cli-progress'; +import decompress from 'decompress'; + +@SubCommand({ name: 'init', aliases: ['setup'] }) +export class InitCommand extends CommandRunner { + CORTEX_RELEASES_URL = 'https://api.github.com/repos/janhq/cortex/releases'; + + constructor( + private readonly httpService: HttpService, + private readonly inquirerService: InquirerService, + ) { + super(); + } + + async run(input: string[], options?: any): Promise { + options = await this.inquirerService.ask('create-init-questions', options); + const version = input[0] ?? 'latest'; + + await this.download(this.parseEngineFileName(options), version); + } + + download = async ( + engineFileName: string, + version: string = 'latest', + ): Promise => { + const res = await this.httpService + .get( + this.CORTEX_RELEASES_URL + `${version === 'latest' ? '/latest' : ''}`, + { + headers: { + 'X-GitHub-Api-Version': '2022-11-28', + Accept: 'application/vnd.github+json', + }, + }, + ) + .toPromise(); + + if (!res?.data) { + console.log('Failed to fetch releases'); + process.exit(1); + } + + let release = res?.data; + if (Array.isArray(res?.data)) { + release = Array(res?.data)[0].find( + (e) => e.name === version.replace('v', ''), + ); + } + const toDownloadAsset = release.assets.find((s: any) => + s.name.includes(engineFileName), + ); + + if (!toDownloadAsset) { + console.log(`Could not find engine file ${engineFileName}`); + process.exit(1); + } + + console.log(`Downloading engine file ${engineFileName}`); + const engineDir = resolve(this.rootDir(), 'cortex-cpp'); + if (existsSync(engineDir)) rmSync(engineDir, { recursive: true }); + + const download = await this.httpService + .get(toDownloadAsset.browser_download_url, { + responseType: 'stream', + }) + .toPromise(); + if (!download) { + throw new Error('Failed to download model'); + } + + const destination = resolve(this.rootDir(), toDownloadAsset.name); + + await new Promise((resolve, reject) => { + const writer = createWriteStream(destination); + let receivedBytes = 0; + const totalBytes = download.headers['content-length']; + + writer.on('finish', () => { + bar.stop(); + resolve(true); + }); + + writer.on('error', (error) => { + bar.stop(); + reject(error); + }); + + const bar = new SingleBar({}, Presets.shades_classic); + bar.start(100, 0); + + download.data.on('data', (chunk: any) => { + receivedBytes += chunk.length; + bar.update(Math.floor((receivedBytes / totalBytes) * 100)); + }); + + download.data.pipe(writer); + }); + + try { + await decompress( + resolve(this.rootDir(), destination), + resolve(this.rootDir()), + ); + } catch (e) { + console.log(e); + process.exit(1); + } + process.exit(0); + }; + + parseEngineFileName = (options: { + runMode?: 'CPU' | 'GPU'; + gpuType?: 'Nvidia' | 'Others (Vulkan)'; + instructions?: 'AVX' | 'AVX2' | 'AVX-512' | undefined; + cudaVersion?: '11' | '12'; + }) => { + const platform = + process.platform === 'win32' + ? 'windows' + : process.platform === 'darwin' + ? 'mac' + : process.platform; + const arch = process.arch === 'arm64' ? process.arch : 'amd64'; + const cudaVersion = + options.runMode === 'GPU' + ? options.gpuType === 'Nvidia' + ? '-cuda-' + (options.cudaVersion === '11' ? '11.7' : '12.2') + : '-vulkan' + : ''; + const instructions = options.instructions ? `-${options.instructions}` : ''; + const engineName = `${platform}-${arch}${instructions.toLowerCase()}${cudaVersion}`; + return `${engineName}.tar.gz`; + }; + + rootDir = () => resolve(__dirname, `../../../`); +} diff --git a/cortex-js/src/infrastructure/commanders/inquirer/init.questions.ts b/cortex-js/src/infrastructure/commanders/inquirer/init.questions.ts new file mode 100644 index 000000000..2aadbe490 --- /dev/null +++ b/cortex-js/src/infrastructure/commanders/inquirer/init.questions.ts @@ -0,0 +1,39 @@ +import { Question, QuestionSet } from 'nest-commander'; + +@QuestionSet({ name: 'create-init-questions' }) +export class CreateInitQuestions { + @Question({ + type: 'list', + message: 'Select run mode', + name: 'runMode', + default: 'CPU', + choices: ['CPU', 'GPU'], + when: () => process.platform !== 'darwin', + }) + parseRunMode(val: string) { + return val; + } + + @Question({ + type: 'list', + message: 'Select GPU type', + name: 'gpuType', + default: 'Nvidia', + choices: ['Nvidia', 'Others (Vulkan)'], + when: (answers: any) => answers.runMode === 'GPU', + }) + parseGPUType(val: string) { + return val; + } + + @Question({ + type: 'list', + message: 'Select CPU instructions set', + name: 'instructions', + choices: ['AVX2', 'AVX', 'AVX-512'], + when: () => process.platform !== 'darwin', + }) + parseContent(val: string) { + return val; + } +} diff --git a/cortex-js/src/infrastructure/commanders/start.command.ts b/cortex-js/src/infrastructure/commanders/start.command.ts index a6b3a3429..734d2d108 100644 --- a/cortex-js/src/infrastructure/commanders/start.command.ts +++ b/cortex-js/src/infrastructure/commanders/start.command.ts @@ -2,6 +2,8 @@ import { CortexUsecases } from '@/usecases/cortex/cortex.usecases'; import { ModelsUsecases } from '@/usecases/models/models.usecases'; import { CommandRunner, SubCommand } from 'nest-commander'; import { LoadModelDto } from '../dtos/models/load-model.dto'; +import { resolve } from 'path'; +import { existsSync } from 'fs'; @SubCommand({ name: 'start', aliases: ['run'] }) export class StartCommand extends CommandRunner { @@ -26,6 +28,10 @@ export class StartCommand extends CommandRunner { } private async startCortex() { + if (!existsSync(resolve(this.rootDir(), 'cortex-cpp'))) { + console.log('Please init the cortex by running cortex init command!'); + process.exit(0); + } const host = '127.0.0.1'; const port = '3928'; return this.cortexUsecases.startCortex(host, port); @@ -45,4 +51,6 @@ export class StartCommand extends CommandRunner { const loadModelDto: LoadModelDto = { modelId, settings }; return this.modelsUsecases.startModel(loadModelDto); } + + rootDir = () => resolve(__dirname, `../../../`); } diff --git a/cortex-js/src/infrastructure/providers/cortex/cortex.provider.ts b/cortex-js/src/infrastructure/providers/cortex/cortex.provider.ts index f1256f072..43b78410f 100644 --- a/cortex-js/src/infrastructure/providers/cortex/cortex.provider.ts +++ b/cortex-js/src/infrastructure/providers/cortex/cortex.provider.ts @@ -46,6 +46,7 @@ export default class CortexProvider extends OAIEngineExtension { const cpuThreadCount = 1; // TODO: NamH Math.max(1, nitroResourceProbe.numCpuPhysicalCore); const modelSettings = { // This is critical and requires real CPU physical core count (or performance core) + model: model.id, cpu_threads: cpuThreadCount, ...model.settings, llama_model_path: modelBinaryLocalPath, diff --git a/cortex-js/src/usecases/cortex/cortex.usecases.ts b/cortex-js/src/usecases/cortex/cortex.usecases.ts index 846af2ada..e08b226d6 100644 --- a/cortex-js/src/usecases/cortex/cortex.usecases.ts +++ b/cortex-js/src/usecases/cortex/cortex.usecases.ts @@ -1,8 +1,6 @@ -import { Injectable, InternalServerErrorException } from '@nestjs/common'; -import { ConfigService } from '@nestjs/config'; +import { Injectable } from '@nestjs/common'; import { ChildProcess, spawn } from 'child_process'; import { join } from 'path'; -import { existsSync } from 'fs'; import { CortexOperationSuccessfullyDto } from '@/infrastructure/dtos/cortex/cortex-operation-successfully.dto'; import { HttpService } from '@nestjs/axios'; @@ -10,10 +8,7 @@ import { HttpService } from '@nestjs/axios'; export class CortexUsecases { private cortexProcess: ChildProcess | undefined; - constructor( - private readonly configService: ConfigService, - private readonly httpService: HttpService, - ) {} + constructor(private readonly httpService: HttpService) {} async startCortex( host: string, @@ -26,29 +21,27 @@ export class CortexUsecases { }; } - const binaryPath = this.configService.get('CORTEX_BINARY_PATH'); - if (!binaryPath || !existsSync(binaryPath)) { - throw new InternalServerErrorException('Cortex binary not found'); - } - const args: string[] = ['1', host, port]; // go up one level to get the binary folder, have to also work on windows - const binaryFolder = join(binaryPath, '..'); - - this.cortexProcess = spawn(binaryPath, args, { - detached: false, - cwd: binaryFolder, - stdio: 'inherit', - env: { - ...process.env, - // TODO: NamH need to get below information - // CUDA_VISIBLE_DEVICES: executableOptions.cudaVisibleDevices, - // // Vulkan - Support 1 device at a time for now - // ...(executableOptions.vkVisibleDevices?.length > 0 && { - // GGML_VULKAN_DEVICE: executableOptions.vkVisibleDevices[0], - // }), + // const binaryFolder = join(binaryPath, '..'); + this.cortexProcess = spawn( + join(__dirname, '../../../cortex-cpp/cortex-cpp'), + args, + { + detached: false, + cwd: join(__dirname, '../../../cortex-cpp'), + stdio: 'inherit', + env: { + ...process.env, + // TODO: NamH need to get below information + // CUDA_VISIBLE_DEVICES: executableOptions.cudaVisibleDevices, + // // Vulkan - Support 1 device at a time for now + // ...(executableOptions.vkVisibleDevices?.length > 0 && { + // GGML_VULKAN_DEVICE: executableOptions.vkVisibleDevices[0], + // }), + }, }, - }); + ); this.registerCortexEvents(); diff --git a/cortex-js/tsconfig.json b/cortex-js/tsconfig.json index b7b0011be..f52125fca 100644 --- a/cortex-js/tsconfig.json +++ b/cortex-js/tsconfig.json @@ -18,6 +18,7 @@ "strictBindCallApply": true, "forceConsistentCasingInFileNames": true, "noFallthroughCasesInSwitch": true, + "esModuleInterop": true, "paths": { "@/*": ["src/*"] }