From f49820ff04fe69e367765f1d7e7fc42517fe8695 Mon Sep 17 00:00:00 2001 From: Louis Date: Wed, 17 Jul 2024 15:39:38 +0700 Subject: [PATCH] feat: add engine init endpoint (#888) --- .../src/domain/abstracts/engine.abstract.ts | 2 + .../domain/abstracts/extension.abstract.ts | 3 + .../src/domain/config/config.interface.ts | 1 - .../src/domain/models/download.interface.ts | 1 + cortex-js/src/extensions/anthropic.engine.ts | 40 +-- cortex-js/src/extensions/groq.engine.ts | 1 + cortex-js/src/extensions/mistral.engine.ts | 1 + cortex-js/src/extensions/openai.engine.ts | 1 + .../engines/engines-init.command.ts | 9 +- .../commanders/models/model-pull.command.ts | 6 +- .../commanders/models/model-start.command.ts | 4 +- .../commanders/shortcuts/run.command.ts | 4 +- .../commanders/test/helpers.command.spec.ts | 1 - .../commanders/test/models.command.spec.ts | 1 - .../usecases/cli.usecases.module.ts | 7 +- .../commanders/usecases/init.cli.usecases.ts | 305 ------------------ .../controllers/engines.controller.ts | 31 +- .../controllers/events.controller.ts | 2 +- .../dtos/engines/engines.dto.ts | 10 +- .../extensions/extension.repository.ts | 19 ++ .../file-manager/file-manager.service.ts | 1 - .../src/usecases/engines/engines.module.ts | 11 +- .../src/usecases/engines/engines.usecase.ts | 248 +++++++++++++- .../src/usecases/models/models.usecases.ts | 4 +- cortex-js/src/utils/huggingface.ts | 2 +- cortex-js/src/utils/normalize-model-id.ts | 2 +- 26 files changed, 362 insertions(+), 355 deletions(-) delete mode 100644 cortex-js/src/infrastructure/commanders/usecases/init.cli.usecases.ts diff --git a/cortex-js/src/domain/abstracts/engine.abstract.ts b/cortex-js/src/domain/abstracts/engine.abstract.ts index 92335fc63..7c4b33ae6 100644 --- a/cortex-js/src/domain/abstracts/engine.abstract.ts +++ b/cortex-js/src/domain/abstracts/engine.abstract.ts @@ -10,6 +10,8 @@ export abstract class EngineExtension extends Extension { transformResponse?: Function; + initalized: boolean = false; + abstract inference( dto: any, headers: Record, diff --git a/cortex-js/src/domain/abstracts/extension.abstract.ts b/cortex-js/src/domain/abstracts/extension.abstract.ts index d016920ed..c3f677edf 100644 --- a/cortex-js/src/domain/abstracts/extension.abstract.ts +++ b/cortex-js/src/domain/abstracts/extension.abstract.ts @@ -21,6 +21,9 @@ export abstract class Extension { /** @type {string} Extension's version. */ version?: string; + /** @type {boolean} Whether the extension is initialized or not. */ + initalized: boolean; + /** * Called when the extension is loaded. * Any initialization logic for the extension should be put here. diff --git a/cortex-js/src/domain/config/config.interface.ts b/cortex-js/src/domain/config/config.interface.ts index 7f2875abb..f7378dd35 100644 --- a/cortex-js/src/domain/config/config.interface.ts +++ b/cortex-js/src/domain/config/config.interface.ts @@ -1,6 +1,5 @@ export interface Config { dataFolderPath: string; - initialized: boolean; cortexCppHost: string; cortexCppPort: number; } diff --git a/cortex-js/src/domain/models/download.interface.ts b/cortex-js/src/domain/models/download.interface.ts index ecf67cf76..a0c99473a 100644 --- a/cortex-js/src/domain/models/download.interface.ts +++ b/cortex-js/src/domain/models/download.interface.ts @@ -69,4 +69,5 @@ export interface DownloadStateEvent { export enum DownloadType { Model = 'model', Miscelanous = 'miscelanous', + Engine = 'engine' } diff --git a/cortex-js/src/extensions/anthropic.engine.ts b/cortex-js/src/extensions/anthropic.engine.ts index 4e00dabb9..268b3337a 100644 --- a/cortex-js/src/extensions/anthropic.engine.ts +++ b/cortex-js/src/extensions/anthropic.engine.ts @@ -16,6 +16,7 @@ export default class AnthropicEngineExtension extends OAIEngineExtension { productName = 'Anthropic Inference Engine'; description = 'This extension enables Anthropic chat completion API calls'; version = '0.0.1'; + initalized = true; apiKey?: string; constructor( @@ -39,42 +40,45 @@ export default class AnthropicEngineExtension extends OAIEngineExtension { this.apiKey = configs?.apiKey; } - override async inference(dto: any, headers: Record): Promise { - headers['x-api-key'] = this.apiKey as string - headers['Content-Type'] = 'application/json' - headers['anthropic-version'] = '2023-06-01' - return super.inference(dto, headers) + override async inference( + dto: any, + headers: Record, + ): Promise { + headers['x-api-key'] = this.apiKey as string; + headers['Content-Type'] = 'application/json'; + headers['anthropic-version'] = '2023-06-01'; + return super.inference(dto, headers); } transformPayload = (data: any): any => { return _.pick(data, ['messages', 'model', 'stream', 'max_tokens']); - } + }; transformResponse = (data: any): string => { // handling stream response if (typeof data === 'string' && data.trim().length === 0) { - return ''; + return ''; } if (typeof data === 'string' && data.startsWith('event: ')) { - return '' + return ''; } if (typeof data === 'string' && data.startsWith('data: ')) { data = data.replace('data: ', ''); const parsedData = JSON.parse(data); if (parsedData.type !== 'content_block_delta') { - return '' + return ''; } const text = parsedData.delta?.text; //convert to have this format data.choices[0]?.delta?.content return JSON.stringify({ choices: [ - { - delta: { - content: text - } - } - ] - }) + { + delta: { + content: text, + }, + }, + ], + }); } // non-stream response if (data.content && data.content.length > 0 && data.content[0].text) { @@ -88,8 +92,8 @@ export default class AnthropicEngineExtension extends OAIEngineExtension { ], }); } - + console.error('Invalid response format:', data); return ''; - } + }; } diff --git a/cortex-js/src/extensions/groq.engine.ts b/cortex-js/src/extensions/groq.engine.ts index 9a37d4875..a23b87752 100644 --- a/cortex-js/src/extensions/groq.engine.ts +++ b/cortex-js/src/extensions/groq.engine.ts @@ -14,6 +14,7 @@ export default class GroqEngineExtension extends OAIEngineExtension { productName = 'Groq Inference Engine'; description = 'This extension enables fast Groq chat completion API calls'; version = '0.0.1'; + initalized = true; apiKey?: string; constructor( diff --git a/cortex-js/src/extensions/mistral.engine.ts b/cortex-js/src/extensions/mistral.engine.ts index bcf934121..44f8aa095 100644 --- a/cortex-js/src/extensions/mistral.engine.ts +++ b/cortex-js/src/extensions/mistral.engine.ts @@ -14,6 +14,7 @@ export default class MistralEngineExtension extends OAIEngineExtension { productName = 'Mistral Inference Engine'; description = 'This extension enables Mistral chat completion API calls'; version = '0.0.1'; + initalized = true; apiKey?: string; constructor( diff --git a/cortex-js/src/extensions/openai.engine.ts b/cortex-js/src/extensions/openai.engine.ts index 90ca9b1cc..5917d20c1 100644 --- a/cortex-js/src/extensions/openai.engine.ts +++ b/cortex-js/src/extensions/openai.engine.ts @@ -14,6 +14,7 @@ export default class OpenAIEngineExtension extends OAIEngineExtension { productName = 'OpenAI Inference Engine'; description = 'This extension enables OpenAI chat completion API calls'; version = '0.0.1'; + initalized = true; apiKey?: string; constructor( diff --git a/cortex-js/src/infrastructure/commanders/engines/engines-init.command.ts b/cortex-js/src/infrastructure/commanders/engines/engines-init.command.ts index 7ed136c9e..65b758075 100644 --- a/cortex-js/src/infrastructure/commanders/engines/engines-init.command.ts +++ b/cortex-js/src/infrastructure/commanders/engines/engines-init.command.ts @@ -1,10 +1,10 @@ import { CommandRunner, Option, SubCommand } from 'nest-commander'; import { SetCommandContext } from '../decorators/CommandContext'; import { ContextService } from '@/infrastructure/services/context/context.service'; -import { InitCliUsecases } from '../usecases/init.cli.usecases'; import { Engines } from '../types/engine.interface'; import { CortexUsecases } from '@/usecases/cortex/cortex.usecases'; import { FileManagerService } from '@/infrastructure/services/file-manager/file-manager.service'; +import { EnginesUsecases } from '@/usecases/engines/engines.usecase'; @SubCommand({ name: ' init', @@ -16,7 +16,7 @@ import { FileManagerService } from '@/infrastructure/services/file-manager/file- @SetCommandContext() export class EnginesInitCommand extends CommandRunner { constructor( - private readonly initUsecases: InitCliUsecases, + private readonly engineUsecases: EnginesUsecases, private readonly cortexUsecases: CortexUsecases, private readonly fileManagerService: FileManagerService, readonly contextService: ContextService, @@ -31,7 +31,7 @@ export class EnginesInitCommand extends CommandRunner { const engine = passedParams[0]; const params = passedParams.includes(Engines.llamaCPP) ? { - ...(await this.initUsecases.defaultInstallationOptions()), + ...(await this.engineUsecases.defaultInstallationOptions()), ...options, } : {}; @@ -43,7 +43,8 @@ export class EnginesInitCommand extends CommandRunner { if (await this.cortexUsecases.healthCheck(host, port)) { await this.cortexUsecases.stopCortex(); } - return this.initUsecases + console.log(`Installing engine ${engine}...`); + return this.engineUsecases .installEngine( params, engine.includes('@') ? engine.split('@')[1] : 'latest', diff --git a/cortex-js/src/infrastructure/commanders/models/model-pull.command.ts b/cortex-js/src/infrastructure/commanders/models/model-pull.command.ts index a37ab8632..ad9021b08 100644 --- a/cortex-js/src/infrastructure/commanders/models/model-pull.command.ts +++ b/cortex-js/src/infrastructure/commanders/models/model-pull.command.ts @@ -12,9 +12,9 @@ import { ContextService } from '@/infrastructure/services/context/context.servic import { existsSync } from 'fs'; import { join } from 'node:path'; import { FileManagerService } from '@/infrastructure/services/file-manager/file-manager.service'; -import { InitCliUsecases } from '../usecases/init.cli.usecases'; import { checkModelCompatibility } from '@/utils/model-check'; import { Engines } from '../types/engine.interface'; +import { EnginesUsecases } from '@/usecases/engines/engines.usecase'; @SubCommand({ name: 'pull', @@ -28,7 +28,7 @@ import { Engines } from '../types/engine.interface'; export class ModelPullCommand extends CommandRunner { constructor( private readonly modelsCliUsecases: ModelsCliUsecases, - private readonly initUsecases: InitCliUsecases, + private readonly engineUsecases: EnginesUsecases, private readonly fileService: FileManagerService, readonly contextService: ContextService, private readonly telemetryUsecases: TelemetryUsecases, @@ -60,7 +60,7 @@ export class ModelPullCommand extends CommandRunner { !existsSync(join(await this.fileService.getCortexCppEnginePath(), engine)) ) { console.log('\n'); - await this.initUsecases.installEngine(undefined, 'latest', engine); + await this.engineUsecases.installEngine(undefined, 'latest', engine); } this.telemetryUsecases.sendEvent( [ diff --git a/cortex-js/src/infrastructure/commanders/models/model-start.command.ts b/cortex-js/src/infrastructure/commanders/models/model-start.command.ts index a74787015..e3aae3e0b 100644 --- a/cortex-js/src/infrastructure/commanders/models/model-start.command.ts +++ b/cortex-js/src/infrastructure/commanders/models/model-start.command.ts @@ -10,12 +10,12 @@ import { ModelsCliUsecases } from '@commanders/usecases/models.cli.usecases'; import { CortexUsecases } from '@/usecases/cortex/cortex.usecases'; import { SetCommandContext } from '../decorators/CommandContext'; import { ContextService } from '@/infrastructure/services/context/context.service'; -import { InitCliUsecases } from '../usecases/init.cli.usecases'; import { createReadStream, existsSync, statSync, watchFile } from 'node:fs'; import { FileManagerService } from '@/infrastructure/services/file-manager/file-manager.service'; import { join } from 'node:path'; import { Engines } from '../types/engine.interface'; import { checkModelCompatibility } from '@/utils/model-check'; +import { EnginesUsecases } from '@/usecases/engines/engines.usecase'; type ModelStartOptions = { attach: boolean; @@ -36,7 +36,7 @@ export class ModelStartCommand extends CommandRunner { private readonly inquirerService: InquirerService, private readonly cortexUsecases: CortexUsecases, private readonly modelsCliUsecases: ModelsCliUsecases, - private readonly initUsecases: InitCliUsecases, + private readonly initUsecases: EnginesUsecases, private readonly fileService: FileManagerService, readonly contextService: ContextService, ) { diff --git a/cortex-js/src/infrastructure/commanders/shortcuts/run.command.ts b/cortex-js/src/infrastructure/commanders/shortcuts/run.command.ts index 84e3fd92a..2142e6004 100644 --- a/cortex-js/src/infrastructure/commanders/shortcuts/run.command.ts +++ b/cortex-js/src/infrastructure/commanders/shortcuts/run.command.ts @@ -13,9 +13,9 @@ import { ModelNotFoundException } from '@/infrastructure/exception/model-not-fou import { existsSync } from 'fs'; import { join } from 'path'; import { FileManagerService } from '@/infrastructure/services/file-manager/file-manager.service'; -import { InitCliUsecases } from '../usecases/init.cli.usecases'; import { Engines } from '../types/engine.interface'; import { checkModelCompatibility } from '@/utils/model-check'; +import { EnginesUsecases } from '@/usecases/engines/engines.usecase'; type RunOptions = { threadId?: string; @@ -38,7 +38,7 @@ export class RunCommand extends CommandRunner { private readonly chatCliUsecases: ChatCliUsecases, private readonly inquirerService: InquirerService, private readonly fileService: FileManagerService, - private readonly initUsecases: InitCliUsecases, + private readonly initUsecases: EnginesUsecases, ) { super(); } diff --git a/cortex-js/src/infrastructure/commanders/test/helpers.command.spec.ts b/cortex-js/src/infrastructure/commanders/test/helpers.command.spec.ts index 933d2f979..b07b070e9 100644 --- a/cortex-js/src/infrastructure/commanders/test/helpers.command.spec.ts +++ b/cortex-js/src/infrastructure/commanders/test/helpers.command.spec.ts @@ -35,7 +35,6 @@ beforeAll( // Attempt to create test folder await fileService.writeConfigFile({ dataFolderPath: join(__dirname, 'test_data'), - initialized: false, cortexCppHost: 'localhost', cortexCppPort: 3929 }); diff --git a/cortex-js/src/infrastructure/commanders/test/models.command.spec.ts b/cortex-js/src/infrastructure/commanders/test/models.command.spec.ts index a40f49dc0..f1a6f6879 100644 --- a/cortex-js/src/infrastructure/commanders/test/models.command.spec.ts +++ b/cortex-js/src/infrastructure/commanders/test/models.command.spec.ts @@ -23,7 +23,6 @@ beforeAll( // Attempt to create test folder await fileService.writeConfigFile({ dataFolderPath: join(__dirname, 'test_data'), - initialized: false, cortexCppHost: 'localhost', cortexCppPort: 3929, }); diff --git a/cortex-js/src/infrastructure/commanders/usecases/cli.usecases.module.ts b/cortex-js/src/infrastructure/commanders/usecases/cli.usecases.module.ts index 8f33fecd6..7cc8b81f5 100644 --- a/cortex-js/src/infrastructure/commanders/usecases/cli.usecases.module.ts +++ b/cortex-js/src/infrastructure/commanders/usecases/cli.usecases.module.ts @@ -1,5 +1,4 @@ import { Module } from '@nestjs/common'; -import { InitCliUsecases } from './init.cli.usecases'; import { HttpModule } from '@nestjs/axios'; import { ModelsCliUsecases } from './models.cli.usecases'; import { ModelsModule } from '@/usecases/models/models.module'; @@ -13,6 +12,8 @@ import { FileManagerModule } from '@/infrastructure/services/file-manager/file-m import { PSCliUsecases } from './ps.cli.usecases'; import { BenchmarkCliUsecases } from './benchmark.cli.usecases'; import { TelemetryModule } from '@/usecases/telemetry/telemetry.module'; +import { DownloadManagerModule } from '@/infrastructure/services/download-manager/download-manager.module'; +import { EnginesModule } from '@/usecases/engines/engines.module'; @Module({ imports: [ @@ -25,16 +26,16 @@ import { TelemetryModule } from '@/usecases/telemetry/telemetry.module'; MessagesModule, FileManagerModule, TelemetryModule, + DownloadManagerModule, + EnginesModule, ], providers: [ - InitCliUsecases, ModelsCliUsecases, ChatCliUsecases, PSCliUsecases, BenchmarkCliUsecases, ], exports: [ - InitCliUsecases, ModelsCliUsecases, ChatCliUsecases, PSCliUsecases, diff --git a/cortex-js/src/infrastructure/commanders/usecases/init.cli.usecases.ts b/cortex-js/src/infrastructure/commanders/usecases/init.cli.usecases.ts deleted file mode 100644 index be4437820..000000000 --- a/cortex-js/src/infrastructure/commanders/usecases/init.cli.usecases.ts +++ /dev/null @@ -1,305 +0,0 @@ -import { - cpSync, - createWriteStream, - existsSync, - mkdirSync, - readdirSync, - rmSync, -} from 'fs'; -import { join } from 'path'; -import { HttpService } from '@nestjs/axios'; -import { Presets, SingleBar } from 'cli-progress'; -import decompress from 'decompress'; -import { exit } from 'node:process'; -import { InitOptions } from '@commanders/types/init-options.interface'; -import { Injectable } from '@nestjs/common'; -import { firstValueFrom } from 'rxjs'; -import { FileManagerService } from '@/infrastructure/services/file-manager/file-manager.service'; -import { rm } from 'fs/promises'; -import { - CORTEX_ENGINE_RELEASES_URL, - CUDA_DOWNLOAD_URL, -} from '@/infrastructure/constants/cortex'; -import { checkNvidiaGPUExist } from '@/utils/cuda'; -import { Engines } from '../types/engine.interface'; - -import { cpuInfo } from 'cpu-instructions'; -import ora from 'ora'; - -@Injectable() -export class InitCliUsecases { - constructor( - private readonly httpService: HttpService, - private readonly fileManagerService: FileManagerService, - ) {} - - /** - * Default installation options base on the system - * @returns - */ - defaultInstallationOptions = async (): Promise => { - let options: InitOptions = {}; - - // Skip check if darwin - if (process.platform === 'darwin') { - return options; - } - // If Nvidia Driver is installed -> GPU - options.runMode = (await checkNvidiaGPUExist()) ? 'GPU' : 'CPU'; - options.gpuType = 'Nvidia'; - //CPU Instructions detection - options.instructions = await this.detectInstructions(); - return options; - }; - - /** - * Install Engine and Dependencies with given options - * @param engineFileName - * @param version - */ - installEngine = async ( - options?: InitOptions, - version: string = 'latest', - engine: string = 'default', - force: boolean = false, - ): Promise => { - // Use default option if not defined - if (!options && engine === Engines.llamaCPP) { - options = await this.defaultInstallationOptions(); - } - const configs = await this.fileManagerService.getConfig(); - - const engineSpinner = ora('Installing engine...').start(); - // Ship Llama.cpp engine by default - if ( - !existsSync( - join(await this.fileManagerService.getCortexCppEnginePath(), engine), - ) || - force - ) { - const isVulkan = - engine === Engines.llamaCPP && - (options?.vulkan || - (options?.runMode === 'GPU' && options?.gpuType !== 'Nvidia')); - await this.installAcceleratedEngine(version, engine, [ - process.platform === 'win32' - ? '-windows' - : process.platform === 'darwin' - ? '-mac' - : '-linux', - // CPU Instructions - CPU | GPU Non-Vulkan - options?.instructions && !isVulkan - ? `-${options?.instructions?.toLowerCase()}` - : '', - // Cuda - options?.runMode === 'GPU' && options?.gpuType === 'Nvidia' && !isVulkan - ? `cuda-${options.cudaVersion ?? '12'}` - : '', - // Vulkan - isVulkan ? '-vulkan' : '', - - // Arch - engine !== Engines.tensorrtLLM - ? process.arch === 'arm64' - ? '-arm64' - : '-amd64' - : '', - ]); - } - - if ( - (engine === Engines.llamaCPP || engine === Engines.tensorrtLLM) && - options?.runMode === 'GPU' && - options?.gpuType === 'Nvidia' && - !options?.vulkan - ) - await this.installCudaToolkitDependency(options?.cudaVersion); - - configs.initialized = true; - await this.fileManagerService.writeConfigFile(configs); - engineSpinner.succeed('Engine installed'); - }; - - /** - * Install CUDA Toolkit dependency (dll/so files) - * @param options - */ - private installCudaToolkitDependency = async (cudaVersion?: string) => { - const platform = process.platform === 'win32' ? 'windows' : 'linux'; - - const dataFolderPath = await this.fileManagerService.getDataFolderPath(); - const url = CUDA_DOWNLOAD_URL.replace( - '', - cudaVersion === '11' ? '11.7' : '12.3', - ).replace('', platform); - const destination = join(dataFolderPath, 'cuda-toolkit.tar.gz'); - - console.log('Downloading CUDA Toolkit dependency...'); - const download = await firstValueFrom( - this.httpService.get(url, { - responseType: 'stream', - }), - ); - - if (!download) { - console.log('Failed to download dependency'); - process.exit(1); - } - - await new Promise((resolve, reject) => { - const writer = createWriteStream(destination); - let receivedBytes = 0; - const totalBytes = download.headers['content-length']; - - writer.on('finish', () => { - bar.stop(); - resolve(true); - }); - - writer.on('error', (error) => { - bar.stop(); - reject(error); - }); - - const bar = new SingleBar({}, Presets.shades_classic); - bar.start(100, 0); - - download.data.on('data', (chunk: any) => { - receivedBytes += chunk.length; - bar.update(Math.floor((receivedBytes / totalBytes) * 100)); - }); - - download.data.pipe(writer); - }); - - try { - await decompress( - destination, - await this.fileManagerService.getCortexCppEnginePath(), - ); - } catch (e) { - console.log(e); - exit(1); - } - await rm(destination, { force: true }); - }; - - private detectInstructions = (): Promise< - 'AVX' | 'AVX2' | 'AVX512' | undefined - > => { - const cpuInstruction = cpuInfo.cpuInfo()[0] ?? 'AVX'; - console.log(cpuInstruction, 'CPU instructions detected'); - return Promise.resolve(cpuInstruction); - }; - - /** - * Download and install accelerated engine - * @param version - * @param engineFileName - */ - private async installAcceleratedEngine( - version: string = 'latest', - engine: string = Engines.llamaCPP, - matchers: string[] = [], - ) { - const checkingIndicator = ora('Fetching engine repo...').start(); - const res = await firstValueFrom( - this.httpService.get( - CORTEX_ENGINE_RELEASES_URL(engine) + - `${version === 'latest' ? '/latest' : ''}`, - { - headers: { - 'X-GitHub-Api-Version': '2022-11-28', - Accept: 'application/vnd.github+json', - }, - }, - ), - ); - - if (!res?.data) { - console.log('Failed to fetch releases'); - exit(1); - } - - let release = res?.data; - if (Array.isArray(res?.data)) { - release = Array(res?.data)[0].find( - (e) => e.name === version.replace('v', ''), - ); - } - // Find the asset for the current platform - const toDownloadAsset = release.assets - .sort((a: any, b: any) => a.name.length - b.name.length) - .find((asset: any) => - matchers.every((matcher) => asset.name.includes(matcher)), - ); - - if (!toDownloadAsset) { - console.log( - `Could not find engine file for platform ${process.platform}`, - ); - exit(1); - } - - checkingIndicator.succeed('Engine repo fetched'); - - const engineDir = await this.fileManagerService.getCortexCppEnginePath(); - - if (!existsSync(engineDir)) mkdirSync(engineDir, { recursive: true }); - - const download = await firstValueFrom( - this.httpService.get(toDownloadAsset.browser_download_url, { - responseType: 'stream', - }), - ); - if (!download) { - console.log('Failed to download model'); - process.exit(1); - } - - const destination = join(engineDir, toDownloadAsset.name); - - await new Promise((resolve, reject) => { - const writer = createWriteStream(destination); - let receivedBytes = 0; - const totalBytes = download.headers['content-length']; - - writer.on('finish', () => { - bar.stop(); - resolve(true); - }); - - writer.on('error', (error) => { - bar.stop(); - reject(error); - }); - - const bar = new SingleBar({}, Presets.shades_classic); - bar.start(100, 0); - - download.data.on('data', (chunk: any) => { - receivedBytes += chunk.length; - bar.update(Math.floor((receivedBytes / totalBytes) * 100)); - }); - - download.data.pipe(writer); - }); - - const decompressIndicator = ora('Decompressing engine...').start(); - try { - await decompress(destination, engineDir); - } catch (e) { - console.error('Error decompressing file', e); - exit(1); - } - await rm(destination, { force: true }); - - // Copy the additional files to the cortex-cpp directory - for (const file of readdirSync(join(engineDir, engine))) { - if (!file.includes('engine')) { - await cpSync(join(engineDir, engine, file), join(engineDir, file)); - } - } - decompressIndicator.succeed('Engine decompressed'); - } -} diff --git a/cortex-js/src/infrastructure/controllers/engines.controller.ts b/cortex-js/src/infrastructure/controllers/engines.controller.ts index ad754189d..aabae3a58 100644 --- a/cortex-js/src/infrastructure/controllers/engines.controller.ts +++ b/cortex-js/src/infrastructure/controllers/engines.controller.ts @@ -4,17 +4,22 @@ import { Param, HttpCode, UseInterceptors, + Post, } from '@nestjs/common'; import { ApiOperation, ApiParam, ApiTags, ApiResponse } from '@nestjs/swagger'; import { TransformInterceptor } from '../interceptors/transform.interceptor'; import { EnginesUsecases } from '@/usecases/engines/engines.usecase'; import { EngineDto } from '../dtos/engines/engines.dto'; +import { CommonResponseDto } from '../dtos/common/common-response.dto'; @ApiTags('Engines') @Controller('engines') @UseInterceptors(TransformInterceptor) export class EnginesController { - constructor(private readonly enginesUsecases: EnginesUsecases) {} + constructor( + private readonly enginesUsecases: EnginesUsecases, + private readonly initUsescases: EnginesUsecases, + ) {} @HttpCode(200) @ApiResponse({ @@ -52,4 +57,28 @@ export class EnginesController { findOne(@Param('name') name: string) { return this.enginesUsecases.getEngine(name); } + + @HttpCode(200) + @ApiResponse({ + status: 200, + description: 'Ok', + type: CommonResponseDto, + }) + @ApiOperation({ + summary: 'Initialize an engine', + description: + 'Initializes an engine instance with the given name. It will download the engine if it is not available locally.', + }) + @ApiParam({ + name: 'name', + required: true, + description: 'The unique identifier of the engine.', + }) + @Post(':name(*)/init') + initialize(@Param('name') name: string) { + this.initUsescases.installEngine(undefined, 'latest', name, true); + return { + message: 'Engine initialization started successfully.', + }; + } } diff --git a/cortex-js/src/infrastructure/controllers/events.controller.ts b/cortex-js/src/infrastructure/controllers/events.controller.ts index ade7b1dab..037aebcb0 100644 --- a/cortex-js/src/infrastructure/controllers/events.controller.ts +++ b/cortex-js/src/infrastructure/controllers/events.controller.ts @@ -61,7 +61,7 @@ export class EventsController { 'download.event', ).pipe( map((downloadState) => ({ data: downloadState })), - throttleTime(1000), + throttleTime(1000, undefined, { trailing: true }), ); return merge( diff --git a/cortex-js/src/infrastructure/dtos/engines/engines.dto.ts b/cortex-js/src/infrastructure/dtos/engines/engines.dto.ts index b043b6c98..4737be534 100644 --- a/cortex-js/src/infrastructure/dtos/engines/engines.dto.ts +++ b/cortex-js/src/infrastructure/dtos/engines/engines.dto.ts @@ -1,6 +1,6 @@ import { Extension } from '@/domain/abstracts/extension.abstract'; import { ApiProperty } from '@nestjs/swagger'; -import { IsOptional, IsString } from 'class-validator'; +import { IsBoolean, IsOptional, IsString } from 'class-validator'; export class EngineDto implements Partial { @ApiProperty({ @@ -38,4 +38,12 @@ export class EngineDto implements Partial { @IsString() @IsOptional() version?: string; + + @ApiProperty({ + type: String, + example: true, + description: 'Whether the engine is initialized or not.', + }) + @IsBoolean() + initalized?: boolean; } diff --git a/cortex-js/src/infrastructure/repositories/extensions/extension.repository.ts b/cortex-js/src/infrastructure/repositories/extensions/extension.repository.ts index 8eae51cea..d8ad85994 100644 --- a/cortex-js/src/infrastructure/repositories/extensions/extension.repository.ts +++ b/cortex-js/src/infrastructure/repositories/extensions/extension.repository.ts @@ -49,17 +49,36 @@ export class ExtensionRepositoryImpl implements ExtensionRepository { this.fileManagerService, ); llamaCPPEngine.name = Engines.llamaCPP; + llamaCPPEngine.initalized = existsSync( + join( + await this.fileManagerService.getCortexCppEnginePath(), + Engines.llamaCPP, + ), + ); + const onnxEngine = new CortexProvider( this.httpService, this.fileManagerService, ); onnxEngine.name = Engines.onnx; + onnxEngine.initalized = existsSync( + join( + await this.fileManagerService.getCortexCppEnginePath(), + Engines.onnx, + ), + ); const tensorrtLLMEngine = new CortexProvider( this.httpService, this.fileManagerService, ); tensorrtLLMEngine.name = Engines.tensorrtLLM; + tensorrtLLMEngine.initalized = existsSync( + join( + await this.fileManagerService.getCortexCppEnginePath(), + Engines.tensorrtLLM, + ), + ); await llamaCPPEngine.onLoad(); await onnxEngine.onLoad(); diff --git a/cortex-js/src/infrastructure/services/file-manager/file-manager.service.ts b/cortex-js/src/infrastructure/services/file-manager/file-manager.service.ts index 0354c1a2a..f4fa2537c 100644 --- a/cortex-js/src/infrastructure/services/file-manager/file-manager.service.ts +++ b/cortex-js/src/infrastructure/services/file-manager/file-manager.service.ts @@ -107,7 +107,6 @@ export class FileManagerService { return { dataFolderPath, - initialized: false, cortexCppHost: defaultCortexCppHost, cortexCppPort: defaultCortexCppPort, }; diff --git a/cortex-js/src/usecases/engines/engines.module.ts b/cortex-js/src/usecases/engines/engines.module.ts index 40f15df8c..3a14929a0 100644 --- a/cortex-js/src/usecases/engines/engines.module.ts +++ b/cortex-js/src/usecases/engines/engines.module.ts @@ -2,9 +2,18 @@ import { Module } from '@nestjs/common'; import { ConfigsModule } from '../configs/configs.module'; import { EnginesUsecases } from './engines.usecase'; import { ExtensionModule } from '@/infrastructure/repositories/extensions/extension.module'; +import { HttpModule } from '@nestjs/axios'; +import { FileManagerModule } from '@/infrastructure/services/file-manager/file-manager.module'; +import { DownloadManagerModule } from '@/infrastructure/services/download-manager/download-manager.module'; @Module({ - imports: [ConfigsModule, ExtensionModule], + imports: [ + ConfigsModule, + ExtensionModule, + HttpModule, + FileManagerModule, + DownloadManagerModule, + ], controllers: [], providers: [EnginesUsecases], exports: [EnginesUsecases], diff --git a/cortex-js/src/usecases/engines/engines.usecase.ts b/cortex-js/src/usecases/engines/engines.usecase.ts index 1609822a2..db36408d9 100644 --- a/cortex-js/src/usecases/engines/engines.usecase.ts +++ b/cortex-js/src/usecases/engines/engines.usecase.ts @@ -1,20 +1,46 @@ import { Injectable } from '@nestjs/common'; import { ExtensionRepository } from '@/domain/repositories/extension.interface'; +import { cpSync, existsSync, mkdirSync, readdirSync } from 'fs'; +import { join } from 'path'; +import { HttpService } from '@nestjs/axios'; +import decompress from 'decompress'; +import { exit } from 'node:process'; +import { InitOptions } from '@commanders/types/init-options.interface'; +import { firstValueFrom } from 'rxjs'; +import { FileManagerService } from '@/infrastructure/services/file-manager/file-manager.service'; +import { rm } from 'fs/promises'; +import { + CORTEX_ENGINE_RELEASES_URL, + CUDA_DOWNLOAD_URL, +} from '@/infrastructure/constants/cortex'; +import { checkNvidiaGPUExist } from '@/utils/cuda'; + +import { cpuInfo } from 'cpu-instructions'; +import { DownloadManagerService } from '@/infrastructure/services/download-manager/download-manager.service'; +import { DownloadType } from '@/domain/models/download.interface'; +import { Engines } from '@/infrastructure/commanders/types/engine.interface'; + @Injectable() export class EnginesUsecases { - constructor(private readonly extensionRepository: ExtensionRepository) {} + constructor( + private readonly httpService: HttpService, + private readonly fileManagerService: FileManagerService, + private readonly downloadManagerService: DownloadManagerService, + private readonly extensionRepository: ExtensionRepository, + ) {} /** * Get the engines * @returns Cortex supported Engines */ async getEngines() { - return (await this.extensionRepository.findAll()).map((e) => ({ - name: e.name, - description: e.description, - version: e.version, - productName: e.productName, + return (await this.extensionRepository.findAll()).map((engine) => ({ + name: engine.name, + description: engine.description, + version: engine.version, + productName: engine.productName, + initialized: engine.initalized, })); } @@ -31,8 +57,218 @@ export class EnginesUsecases { description: engine.description, version: engine.version, productName: engine.productName, + initialized: engine.initalized, } : undefined, ); } + + /** + * Default installation options base on the system + * @returns + */ + defaultInstallationOptions = async (): Promise => { + let options: InitOptions = {}; + + // Skip check if darwin + if (process.platform === 'darwin') { + return options; + } + // If Nvidia Driver is installed -> GPU + options.runMode = (await checkNvidiaGPUExist()) ? 'GPU' : 'CPU'; + options.gpuType = 'Nvidia'; + //CPU Instructions detection + options.instructions = await this.detectInstructions(); + return options; + }; + + /** + * Install Engine and Dependencies with given options + * @param engineFileName + * @param version + */ + installEngine = async ( + options?: InitOptions, + version: string = 'latest', + engine: string = 'default', + force: boolean = false, + ): Promise => { + // Use default option if not defined + if (!options && engine === Engines.llamaCPP) { + options = await this.defaultInstallationOptions(); + } + const configs = await this.fileManagerService.getConfig(); + + // Ship Llama.cpp engine by default + if ( + !existsSync( + join(await this.fileManagerService.getCortexCppEnginePath(), engine), + ) || + force + ) { + const isVulkan = + engine === Engines.llamaCPP && + (options?.vulkan || + (options?.runMode === 'GPU' && options?.gpuType !== 'Nvidia')); + await this.installAcceleratedEngine(version, engine, [ + process.platform === 'win32' + ? '-windows' + : process.platform === 'darwin' + ? '-mac' + : '-linux', + // CPU Instructions - CPU | GPU Non-Vulkan + options?.instructions && !isVulkan + ? `-${options?.instructions?.toLowerCase()}` + : '', + // Cuda + options?.runMode === 'GPU' && options?.gpuType === 'Nvidia' && !isVulkan + ? `cuda-${options.cudaVersion ?? '12'}` + : '', + // Vulkan + isVulkan ? '-vulkan' : '', + + // Arch + engine !== Engines.tensorrtLLM + ? process.arch === 'arm64' + ? '-arm64' + : '-amd64' + : '', + ]); + } + + if ( + (engine === Engines.llamaCPP || engine === Engines.tensorrtLLM) && + options?.runMode === 'GPU' && + options?.gpuType === 'Nvidia' && + !options?.vulkan + ) + await this.installCudaToolkitDependency(options?.cudaVersion); + + // Update states + await this.extensionRepository.findOne(engine).then((e) => { + if (e) e.initalized = true; + }); + }; + + /** + * Install CUDA Toolkit dependency (dll/so files) + * @param options + */ + private installCudaToolkitDependency = async (cudaVersion?: string) => { + const platform = process.platform === 'win32' ? 'windows' : 'linux'; + + const dataFolderPath = await this.fileManagerService.getDataFolderPath(); + const url = CUDA_DOWNLOAD_URL.replace( + '', + cudaVersion === '11' ? '11.7' : '12.3', + ).replace('', platform); + const destination = join(dataFolderPath, 'cuda-toolkit.tar.gz'); + + console.log('Downloading CUDA Toolkit dependency...'); + + await this.downloadManagerService.submitDownloadRequest( + url, + 'Cuda Toolkit Dependencies', + DownloadType.Engine, + { [url]: destination }, + async () => { + try { + await decompress( + destination, + await this.fileManagerService.getCortexCppEnginePath(), + ); + } catch (e) { + console.log(e); + exit(1); + } + await rm(destination, { force: true }); + }, + ); + }; + + private detectInstructions = (): Promise< + 'AVX' | 'AVX2' | 'AVX512' | undefined + > => { + const cpuInstruction = cpuInfo.cpuInfo()[0] ?? 'AVX'; + console.log(cpuInstruction, 'CPU instructions detected'); + return Promise.resolve(cpuInstruction); + }; + + /** + * Download and install accelerated engine + * @param version + * @param engineFileName + */ + private async installAcceleratedEngine( + version: string = 'latest', + engine: string = Engines.llamaCPP, + matchers: string[] = [], + ) { + const res = await firstValueFrom( + this.httpService.get( + CORTEX_ENGINE_RELEASES_URL(engine) + + `${version === 'latest' ? '/latest' : ''}`, + { + headers: { + 'X-GitHub-Api-Version': '2022-11-28', + Accept: 'application/vnd.github+json', + }, + }, + ), + ); + + if (!res?.data) { + console.log('Failed to fetch releases'); + exit(1); + } + + let release = res?.data; + if (Array.isArray(res?.data)) { + release = Array(res?.data)[0].find( + (e) => e.name === version.replace('v', ''), + ); + } + // Find the asset for the current platform + const toDownloadAsset = release.assets + .sort((a: any, b: any) => a.name.length - b.name.length) + .find((asset: any) => + matchers.every((matcher) => asset.name.includes(matcher)), + ); + + if (!toDownloadAsset) { + console.log( + `Could not find engine file for platform ${process.platform}`, + ); + exit(1); + } + + const engineDir = await this.fileManagerService.getCortexCppEnginePath(); + + if (!existsSync(engineDir)) mkdirSync(engineDir, { recursive: true }); + + const destination = join(engineDir, toDownloadAsset.name); + + await this.downloadManagerService.submitDownloadRequest( + toDownloadAsset.browser_download_url, + engine, + DownloadType.Engine, + { [toDownloadAsset.browser_download_url]: destination }, + async () => { + try { + await decompress(destination, engineDir); + } catch (e) { + console.error('Error decompressing file', e); + exit(1); + } + await rm(destination, { force: true }); + + // Copy the additional files to the cortex-cpp directory + for (const file of readdirSync(join(engineDir, engine))) { + if (!file.includes('engine')) { + await cpSync(join(engineDir, engine, file), join(engineDir, file)); + } + } + }, + ); + } } diff --git a/cortex-js/src/usecases/models/models.usecases.ts b/cortex-js/src/usecases/models/models.usecases.ts index 9c3824a17..ebaab9424 100644 --- a/cortex-js/src/usecases/models/models.usecases.ts +++ b/cortex-js/src/usecases/models/models.usecases.ts @@ -393,7 +393,7 @@ export class ModelsUsecases { llamaModelFile(model.files), ); model.files = [fileUrl]; - model.name = modelId.replace(':default', ''); + model.name = modelId.replace(':main', ''); } else if (model.engine === Engines.llamaCPP) { model.files = [ join( @@ -424,7 +424,7 @@ export class ModelsUsecases { ); await this.update(modelId, { files: [fileUrl], - name: modelId.replace(':default', ''), + name: modelId.replace(':main', ''), }); } } diff --git a/cortex-js/src/utils/huggingface.ts b/cortex-js/src/utils/huggingface.ts index 099e1aacf..85d1afe94 100644 --- a/cortex-js/src/utils/huggingface.ts +++ b/cortex-js/src/utils/huggingface.ts @@ -116,7 +116,7 @@ export async function fetchJanRepoData( ): Promise { const repo = modelId.split(':')[0]; const tree = await parseModelHubEngineBranch( - modelId.split(':')[1] ?? (!modelId.includes('/') ? 'default' : ''), + modelId.split(':')[1] ?? (!modelId.includes('/') ? 'main' : ''), ); const url = getRepoModelsUrl( `${!modelId.includes('/') ? 'cortexso/' : ''}${repo}`, diff --git a/cortex-js/src/utils/normalize-model-id.ts b/cortex-js/src/utils/normalize-model-id.ts index f5d9e0b51..239e43b0e 100644 --- a/cortex-js/src/utils/normalize-model-id.ts +++ b/cortex-js/src/utils/normalize-model-id.ts @@ -2,7 +2,7 @@ import { ModelArtifact } from '@/domain/models/model.interface'; import { getGpuInfo } from './cuda'; export const normalizeModelId = (modelId: string): string => { - return modelId.replace(':default', '').replace(/[:/]/g, '-'); + return modelId.replace(':main', '').replace(/[:/]/g, '-'); }; export const isLocalModel = (