diff --git a/cortex-js/.env.development b/cortex-js/.env.development new file mode 100644 index 000000000..e69de29bb diff --git a/cortex-js/.env.example b/cortex-js/.env.example index 51944b2db..d0666607c 100644 --- a/cortex-js/.env.example +++ b/cortex-js/.env.example @@ -1,3 +1,2 @@ EXTENSIONS_PATH= CORTEX_MODELS_DIR= -CORTEX_BINARY_PATH= \ No newline at end of file diff --git a/cortex-js/package.json b/cortex-js/package.json index f5681e447..a66b5b6da 100644 --- a/cortex-js/package.json +++ b/cortex-js/package.json @@ -25,6 +25,7 @@ "typeorm": "typeorm-ts-node-esm" }, "dependencies": { + "@huggingface/gguf": "^0.1.5", "@nestjs/axios": "^3.0.2", "@nestjs/common": "^10.0.0", "@nestjs/config": "^3.2.2", diff --git a/cortex-js/src/command.module.ts b/cortex-js/src/command.module.ts index 6aed1aff4..4627852f1 100644 --- a/cortex-js/src/command.module.ts +++ b/cortex-js/src/command.module.ts @@ -1,24 +1,26 @@ import { Module } from '@nestjs/common'; -import { BasicCommand } from './infrastructure/commanders/basic-command.commander'; import { ModelsModule } from './usecases/models/models.module'; import { DatabaseModule } from './infrastructure/database/database.module'; import { ConfigModule } from '@nestjs/config'; import { CortexModule } from './usecases/cortex/cortex.module'; import { ServeCommand } from './infrastructure/commanders/serve.command'; -import { ChatCommand } from './infrastructure/commanders/chat.command'; import { ModelsCommand } from './infrastructure/commanders/models.command'; -import { StartCommand } from './infrastructure/commanders/start.command'; import { ExtensionModule } from './infrastructure/repositories/extensions/extension.module'; import { ChatModule } from './usecases/chat/chat.module'; import { InitCommand } from './infrastructure/commanders/init.command'; import { HttpModule } from '@nestjs/axios'; -import { CreateInitQuestions } from './infrastructure/commanders/inquirer/init.questions'; +import { InitRunModeQuestions } from './infrastructure/commanders/questions/init.questions'; +import { ModelListCommand } from './infrastructure/commanders/models/model-list.command'; +import { ModelPullCommand } from './infrastructure/commanders/models/model-pull.command'; +import { CortexCommand } from './infrastructure/commanders/cortex-command.commander'; +import { ChatCommand } from './infrastructure/commanders/chat.command'; import { ModelStartCommand } from './infrastructure/commanders/models/model-start.command'; import { ModelStopCommand } from './infrastructure/commanders/models/model-stop.command'; -import { ModelListCommand } from './infrastructure/commanders/models/model-list.command'; import { ModelGetCommand } from './infrastructure/commanders/models/model-get.command'; import { ModelRemoveCommand } from './infrastructure/commanders/models/model-remove.command'; -import { ModelPullCommand } from './infrastructure/commanders/models/model-pull.command'; +import { RunCommand } from './infrastructure/commanders/shortcuts/run.command'; +import { InitCudaQuestions } from './infrastructure/commanders/questions/cuda.questions'; +import { CliUsecasesModule } from './infrastructure/commanders/usecases/cli.usecases.module'; @Module({ imports: [ @@ -33,15 +35,18 @@ import { ModelPullCommand } from './infrastructure/commanders/models/model-pull. ChatModule, ExtensionModule, HttpModule, + CliUsecasesModule ], providers: [ - BasicCommand, + CortexCommand, ModelsCommand, ServeCommand, ChatCommand, - StartCommand, InitCommand, - CreateInitQuestions, + + // Questions + InitRunModeQuestions, + InitCudaQuestions, // Model commands ModelStartCommand, @@ -50,6 +55,9 @@ import { ModelPullCommand } from './infrastructure/commanders/models/model-pull. ModelGetCommand, ModelRemoveCommand, ModelPullCommand, + + // Shortcuts + RunCommand, ], }) export class CommandModule {} diff --git a/cortex-js/src/domain/models/huggingface.interface.ts b/cortex-js/src/domain/models/huggingface.interface.ts new file mode 100644 index 000000000..86cb6a955 --- /dev/null +++ b/cortex-js/src/domain/models/huggingface.interface.ts @@ -0,0 +1,66 @@ +export interface HuggingFaceRepoData { + id: string; + modelId: string; + modelUrl?: string; + author: string; + sha: string; + downloads: number; + lastModified: string; + private: boolean; + disabled: boolean; + gated: boolean; + pipeline_tag: 'text-generation'; + tags: Array<'transformers' | 'pytorch' | 'safetensors' | string>; + cardData: Record; + siblings: { + rfilename: string; + downloadUrl?: string; + fileSize?: number; + quantization?: Quantization; + stopWord?: string; + }[]; + createdAt: string; +} + +const CardDataKeys = [ + 'base_model', + 'datasets', + 'inference', + 'language', + 'library_name', + 'license', + 'model_creator', + 'model_name', + 'model_type', + 'pipeline_tag', + 'prompt_template', + 'quantized_by', + 'tags', +] as const; +export type CardDataKeysTuple = typeof CardDataKeys; +export type CardDataKeys = CardDataKeysTuple[number]; + +export const AllQuantizations = [ + 'Q3_K_S', + 'Q3_K_M', + 'Q3_K_L', + 'Q4_K_S', + 'Q4_K_M', + 'Q5_K_S', + 'Q5_K_M', + 'Q4_0', + 'Q4_1', + 'Q5_0', + 'Q5_1', + 'IQ2_XXS', + 'IQ2_XS', + 'Q2_K', + 'Q2_K_S', + 'Q6_K', + 'Q8_0', + 'F16', + 'F32', + 'COPY', +]; +export type QuantizationsTuple = typeof AllQuantizations; +export type Quantization = QuantizationsTuple[number]; diff --git a/cortex-js/src/infrastructure/commanders/basic-command.commander.ts b/cortex-js/src/infrastructure/commanders/basic-command.commander.ts deleted file mode 100644 index fd2c39832..000000000 --- a/cortex-js/src/infrastructure/commanders/basic-command.commander.ts +++ /dev/null @@ -1,61 +0,0 @@ -import { RootCommand, CommandRunner, Option } from 'nest-commander'; -import { ServeCommand } from './serve.command'; -import { ChatCommand } from './chat.command'; -import { ModelsCommand } from './models.command'; -import { CortexUsecases } from '@/usecases/cortex/cortex.usecases'; -import { defaultCortexJsHost, defaultCortexJsPort } from 'constant'; -import { InitCommand } from './init.command'; -import { StartCommand } from './start.command'; - -@RootCommand({ - subCommands: [ - ModelsCommand, - ServeCommand, - ChatCommand, - InitCommand, - StartCommand, - ], -}) -export class BasicCommand extends CommandRunner { - constructor(private readonly cortexUsecases: CortexUsecases) { - super(); - } - - async run(input: string[], options?: any): Promise { - const command = input[0]; - - switch (command) { - case 'start': - const host = options?.host || 'localhost'; - const port = options?.port || 3928; - return this.cortexUsecases - .startCortex(host, port) - .then((e) => console.log(e)); - - case 'stop': - return this.cortexUsecases - .stopCortex(defaultCortexJsHost, defaultCortexJsPort) - .then((e) => console.log(e)); - - default: - console.error(`Command ${command} is not supported`); - return; - } - } - - @Option({ - flags: '--host ', - description: 'Host to serve the application', - }) - parseHost(value: string) { - return value; - } - - @Option({ - flags: '--port ', - description: 'Port to serve the application', - }) - parsePort(value: string) { - return parseInt(value, 10); - } -} diff --git a/cortex-js/src/infrastructure/commanders/chat.command.ts b/cortex-js/src/infrastructure/commanders/chat.command.ts index 37025ca34..8efacb093 100644 --- a/cortex-js/src/infrastructure/commanders/chat.command.ts +++ b/cortex-js/src/infrastructure/commanders/chat.command.ts @@ -1,15 +1,41 @@ import { ChatUsecases } from '@/usecases/chat/chat.usecases'; -import { CommandRunner, SubCommand } from 'nest-commander'; +import { CommandRunner, SubCommand, Option } from 'nest-commander'; import { ChatCliUsecases } from './usecases/chat.cli.usecases'; +import { CortexUsecases } from '@/usecases/cortex/cortex.usecases'; +import { exit } from 'node:process'; -@SubCommand({ name: 'chat' }) +type ChatOptions = { + model?: string; +}; + +@SubCommand({ name: 'chat', description: 'Start a chat with a model' }) export class ChatCommand extends CommandRunner { - constructor(private readonly chatUsecases: ChatUsecases) { + constructor( + private readonly chatUsecases: ChatUsecases, + private readonly cortexUsecases: CortexUsecases, + ) { super(); } - async run(input: string[]): Promise { - const chatCliService = new ChatCliUsecases(this.chatUsecases); - return chatCliService.run(input); + async run(_input: string[], option: ChatOptions): Promise { + const modelId = option.model; + if (!modelId) { + console.error('Model ID is required'); + exit(1); + } + + const chatCliUsecases = new ChatCliUsecases( + this.chatUsecases, + this.cortexUsecases, + ); + return chatCliUsecases.chat(modelId); + } + + @Option({ + flags: '--model ', + description: 'Model Id to start chat with', + }) + parseModelId(value: string) { + return value; } } diff --git a/cortex-js/src/infrastructure/commanders/cortex-command.commander.ts b/cortex-js/src/infrastructure/commanders/cortex-command.commander.ts new file mode 100644 index 000000000..05aa30271 --- /dev/null +++ b/cortex-js/src/infrastructure/commanders/cortex-command.commander.ts @@ -0,0 +1,20 @@ +import { RootCommand, CommandRunner } from 'nest-commander'; +import { ServeCommand } from './serve.command'; +import { ChatCommand } from './chat.command'; +import { ModelsCommand } from './models.command'; +import { InitCommand } from './init.command'; +import { RunCommand } from './shortcuts/run.command'; + +@RootCommand({ + subCommands: [ + ModelsCommand, + ServeCommand, + ChatCommand, + InitCommand, + RunCommand, + ], + description: 'Cortex CLI', +}) +export class CortexCommand extends CommandRunner { + async run(): Promise {} +} diff --git a/cortex-js/src/infrastructure/commanders/init.command.ts b/cortex-js/src/infrastructure/commanders/init.command.ts index 9340da330..adf8eba4b 100644 --- a/cortex-js/src/infrastructure/commanders/init.command.ts +++ b/cortex-js/src/infrastructure/commanders/init.command.ts @@ -1,141 +1,37 @@ -import { createWriteStream, existsSync, rmSync } from 'fs'; -import { CommandRunner, SubCommand, InquirerService } from 'nest-commander'; -import { resolve } from 'path'; -import { HttpService } from '@nestjs/axios'; -import { Presets, SingleBar } from 'cli-progress'; -import decompress from 'decompress'; -import { exit } from 'node:process'; - -@SubCommand({ name: 'init', aliases: ['setup'] }) +import { CommandRunner, InquirerService, SubCommand } from 'nest-commander'; +import { InitCliUsecases } from './usecases/init.cli.usecases'; +import { InitOptions } from './types/init-options.interface'; + +@SubCommand({ + name: 'init', + aliases: ['setup'], + description: "Init settings and download cortex's dependencies", +}) export class InitCommand extends CommandRunner { - CORTEX_RELEASES_URL = 'https://api.github.com/repos/janhq/cortex/releases'; - constructor( - private readonly httpService: HttpService, private readonly inquirerService: InquirerService, + private readonly initUsecases: InitCliUsecases, ) { super(); } - async run(input: string[], options?: any): Promise { - options = await this.inquirerService.ask('create-init-questions', options); - const version = input[0] ?? 'latest'; - - await this.download(this.parseEngineFileName(options), version); - } - - download = async ( - engineFileName: string, - version: string = 'latest', - ): Promise => { - const res = await this.httpService - .get( - this.CORTEX_RELEASES_URL + `${version === 'latest' ? '/latest' : ''}`, - { - headers: { - 'X-GitHub-Api-Version': '2022-11-28', - Accept: 'application/vnd.github+json', - }, - }, - ) - .toPromise(); - - if (!res?.data) { - console.log('Failed to fetch releases'); - exit(1); - } - - let release = res?.data; - if (Array.isArray(res?.data)) { - release = Array(res?.data)[0].find( - (e) => e.name === version.replace('v', ''), - ); - } - const toDownloadAsset = release.assets.find((s: any) => - s.name.includes(engineFileName), + async run(input: string[], options?: InitOptions): Promise { + options = await this.inquirerService.ask( + 'init-run-mode-questions', + options, ); - if (!toDownloadAsset) { - console.log(`Could not find engine file ${engineFileName}`); - exit(1); - } - - console.log(`Downloading engine file ${engineFileName}`); - const engineDir = resolve(this.rootDir(), 'cortex-cpp'); - if (existsSync(engineDir)) rmSync(engineDir, { recursive: true }); - - const download = await this.httpService - .get(toDownloadAsset.browser_download_url, { - responseType: 'stream', - }) - .toPromise(); - if (!download) { - throw new Error('Failed to download model'); + if (options.runMode === 'GPU' && !(await this.initUsecases.cudaVersion())) { + options = await this.inquirerService.ask('init-cuda-questions', options); } - const destination = resolve(this.rootDir(), toDownloadAsset.name); - - await new Promise((resolve, reject) => { - const writer = createWriteStream(destination); - let receivedBytes = 0; - const totalBytes = download.headers['content-length']; - - writer.on('finish', () => { - bar.stop(); - resolve(true); - }); - - writer.on('error', (error) => { - bar.stop(); - reject(error); - }); - - const bar = new SingleBar({}, Presets.shades_classic); - bar.start(100, 0); - - download.data.on('data', (chunk: any) => { - receivedBytes += chunk.length; - bar.update(Math.floor((receivedBytes / totalBytes) * 100)); - }); + const version = input[0] ?? 'latest'; - download.data.pipe(writer); - }); + const engineFileName = this.initUsecases.parseEngineFileName(options); + await this.initUsecases.installEngine(engineFileName, version); - try { - await decompress( - resolve(this.rootDir(), destination), - resolve(this.rootDir()), - ); - } catch (e) { - console.log(e); - exit(1); + if (options.installCuda === 'Yes') { + await this.initUsecases.installCudaToolkitDependency(options); } - exit(0); - }; - - parseEngineFileName = (options: { - runMode?: 'CPU' | 'GPU'; - gpuType?: 'Nvidia' | 'Others (Vulkan)'; - instructions?: 'AVX' | 'AVX2' | 'AVX-512' | undefined; - cudaVersion?: '11' | '12'; - }) => { - const platform = - process.platform === 'win32' - ? 'windows' - : process.platform === 'darwin' - ? 'mac' - : process.platform; - const arch = process.arch === 'arm64' ? process.arch : 'amd64'; - const cudaVersion = - options.runMode === 'GPU' - ? options.gpuType === 'Nvidia' - ? '-cuda-' + (options.cudaVersion === '11' ? '11-7' : '12-0') - : '-vulkan' - : ''; - const instructions = options.instructions ? `-${options.instructions}` : ''; - const engineName = `${platform}-${arch}${instructions.toLowerCase()}${cudaVersion}`; - return `${engineName}.tar.gz`; - }; - - rootDir = () => resolve(__dirname, `../../../`); + } } diff --git a/cortex-js/src/infrastructure/commanders/models.command.ts b/cortex-js/src/infrastructure/commanders/models.command.ts index 4b0f12d73..631c55774 100644 --- a/cortex-js/src/infrastructure/commanders/models.command.ts +++ b/cortex-js/src/infrastructure/commanders/models.command.ts @@ -16,6 +16,7 @@ import { ModelRemoveCommand } from './models/model-remove.command'; ModelGetCommand, ModelRemoveCommand, ], + description: 'Subcommands for managing models', }) export class ModelsCommand extends CommandRunner { async run(): Promise {} diff --git a/cortex-js/src/infrastructure/commanders/models/model-get.command.ts b/cortex-js/src/infrastructure/commanders/models/model-get.command.ts index 295d68379..850e738c0 100644 --- a/cortex-js/src/infrastructure/commanders/models/model-get.command.ts +++ b/cortex-js/src/infrastructure/commanders/models/model-get.command.ts @@ -3,9 +3,9 @@ import { CommandRunner, SubCommand } from 'nest-commander'; import { ModelsCliUsecases } from '../usecases/models.cli.usecases'; import { exit } from 'node:process'; -@SubCommand({ name: 'get' }) +@SubCommand({ name: 'get', description: 'Get a model by ID.' }) export class ModelGetCommand extends CommandRunner { - constructor(private readonly modelsUsecases: ModelsUsecases) { + constructor(private readonly modelsCliUsecases: ModelsCliUsecases) { super(); } @@ -15,8 +15,7 @@ export class ModelGetCommand extends CommandRunner { exit(1); } - const modelsCliUsecases = new ModelsCliUsecases(this.modelsUsecases); - const models = await modelsCliUsecases.getModel(input[0]); + const models = await this.modelsCliUsecases.getModel(input[0]); console.log(models); } } diff --git a/cortex-js/src/infrastructure/commanders/models/model-list.command.ts b/cortex-js/src/infrastructure/commanders/models/model-list.command.ts index 61280f058..a0c17fcfe 100644 --- a/cortex-js/src/infrastructure/commanders/models/model-list.command.ts +++ b/cortex-js/src/infrastructure/commanders/models/model-list.command.ts @@ -2,15 +2,14 @@ import { ModelsUsecases } from '@/usecases/models/models.usecases'; import { CommandRunner, SubCommand } from 'nest-commander'; import { ModelsCliUsecases } from '../usecases/models.cli.usecases'; -@SubCommand({ name: 'list' }) +@SubCommand({ name: 'list', description: 'List all models locally.' }) export class ModelListCommand extends CommandRunner { - constructor(private readonly modelsUsecases: ModelsUsecases) { + constructor(private readonly modelsCliUsecases: ModelsCliUsecases) { super(); } async run(): Promise { - const modelsCliUsecases = new ModelsCliUsecases(this.modelsUsecases); - const models = await modelsCliUsecases.listAllModels(); + const models = await this.modelsCliUsecases.listAllModels(); console.log(models); } } diff --git a/cortex-js/src/infrastructure/commanders/models/model-pull.command.ts b/cortex-js/src/infrastructure/commanders/models/model-pull.command.ts index 68afaeb81..eae732704 100644 --- a/cortex-js/src/infrastructure/commanders/models/model-pull.command.ts +++ b/cortex-js/src/infrastructure/commanders/models/model-pull.command.ts @@ -1,12 +1,15 @@ -import { ModelsUsecases } from '@/usecases/models/models.usecases'; import { CommandRunner, SubCommand } from 'nest-commander'; import { Presets, SingleBar } from 'cli-progress'; import { exit } from 'node:process'; import { ModelsCliUsecases } from '../usecases/models.cli.usecases'; -@SubCommand({ name: 'pull', aliases: ['download'] }) +@SubCommand({ + name: 'pull', + aliases: ['download'], + description: 'Download a model. Working with HuggingFace model id.', +}) export class ModelPullCommand extends CommandRunner { - constructor(private readonly modelsUsecases: ModelsUsecases) { + constructor(private readonly modelsCliUsecases: ModelsCliUsecases) { super(); } @@ -21,7 +24,7 @@ export class ModelPullCommand extends CommandRunner { const callback = (progress: number) => { bar.update(progress); }; - await new ModelsCliUsecases(this.modelsUsecases).pullModel( + await this.modelsCliUsecases.pullModel( input[0], callback, ); diff --git a/cortex-js/src/infrastructure/commanders/models/model-remove.command.ts b/cortex-js/src/infrastructure/commanders/models/model-remove.command.ts index 35d9c2d2c..531f0f893 100644 --- a/cortex-js/src/infrastructure/commanders/models/model-remove.command.ts +++ b/cortex-js/src/infrastructure/commanders/models/model-remove.command.ts @@ -1,11 +1,10 @@ -import { ModelsUsecases } from '@/usecases/models/models.usecases'; import { CommandRunner, SubCommand } from 'nest-commander'; import { ModelsCliUsecases } from '../usecases/models.cli.usecases'; import { exit } from 'node:process'; -@SubCommand({ name: 'remove' }) +@SubCommand({ name: 'remove', description: 'Remove a model by ID locally.' }) export class ModelRemoveCommand extends CommandRunner { - constructor(private readonly modelsUsecases: ModelsUsecases) { + constructor(private readonly modelsCliUsecases: ModelsCliUsecases) { super(); } @@ -15,8 +14,7 @@ export class ModelRemoveCommand extends CommandRunner { exit(1); } - const modelsCliUsecases = new ModelsCliUsecases(this.modelsUsecases); - const result = await modelsCliUsecases.removeModel(input[0]); + const result = await this.modelsCliUsecases.removeModel(input[0]); console.log(result); } } diff --git a/cortex-js/src/infrastructure/commanders/models/model-start.command.ts b/cortex-js/src/infrastructure/commanders/models/model-start.command.ts index 1d254cae3..b3108ff3e 100644 --- a/cortex-js/src/infrastructure/commanders/models/model-start.command.ts +++ b/cortex-js/src/infrastructure/commanders/models/model-start.command.ts @@ -1,11 +1,14 @@ -import { ModelsUsecases } from '@/usecases/models/models.usecases'; import { CommandRunner, SubCommand } from 'nest-commander'; import { exit } from 'node:process'; import { ModelsCliUsecases } from '../usecases/models.cli.usecases'; +import { CortexUsecases } from '@/usecases/cortex/cortex.usecases'; -@SubCommand({ name: 'start' }) +@SubCommand({ name: 'start', description: 'Start a model by ID.' }) export class ModelStartCommand extends CommandRunner { - constructor(private readonly modelsUsecases: ModelsUsecases) { + constructor( + private readonly cortexUsecases: CortexUsecases, + private readonly modelsCliUsecases: ModelsCliUsecases, + ) { super(); } @@ -15,7 +18,7 @@ export class ModelStartCommand extends CommandRunner { exit(1); } - const modelsCliUsecases = new ModelsCliUsecases(this.modelsUsecases); - await modelsCliUsecases.startModel(input[0]); + await this.cortexUsecases.startCortex(); + await this.modelsCliUsecases.startModel(input[0]); } } diff --git a/cortex-js/src/infrastructure/commanders/models/model-stop.command.ts b/cortex-js/src/infrastructure/commanders/models/model-stop.command.ts index 8ac43647f..b9a4b112b 100644 --- a/cortex-js/src/infrastructure/commanders/models/model-stop.command.ts +++ b/cortex-js/src/infrastructure/commanders/models/model-stop.command.ts @@ -1,11 +1,14 @@ -import { ModelsUsecases } from '@/usecases/models/models.usecases'; import { CommandRunner, SubCommand } from 'nest-commander'; import { exit } from 'node:process'; import { ModelsCliUsecases } from '../usecases/models.cli.usecases'; +import { CortexUsecases } from '@/usecases/cortex/cortex.usecases'; -@SubCommand({ name: 'stop' }) +@SubCommand({ name: 'stop', description: 'Stop a model by ID.' }) export class ModelStopCommand extends CommandRunner { - constructor(private readonly modelsUsecases: ModelsUsecases) { + constructor( + private readonly cortexUsecases: CortexUsecases, + private readonly modelsCliUsecases: ModelsCliUsecases, + ) { super(); } @@ -15,7 +18,7 @@ export class ModelStopCommand extends CommandRunner { exit(1); } - const modelsCliUsecases = new ModelsCliUsecases(this.modelsUsecases); - await modelsCliUsecases.stopModel(input[0]); + await this.modelsCliUsecases.stopModel(input[0]); + await this.cortexUsecases.stopCortex(); } } diff --git a/cortex-js/src/infrastructure/commanders/questions/cuda.questions.ts b/cortex-js/src/infrastructure/commanders/questions/cuda.questions.ts new file mode 100644 index 000000000..2309c3d00 --- /dev/null +++ b/cortex-js/src/infrastructure/commanders/questions/cuda.questions.ts @@ -0,0 +1,17 @@ +import { Question, QuestionSet } from 'nest-commander'; +import { platform } from 'node:process'; + +@QuestionSet({ name: 'init-cuda-questions' }) +export class InitCudaQuestions { + @Question({ + type: 'list', + message: 'Do you want to install additional dependencies for CUDA Toolkit?', + name: 'installCuda', + default: 'Yes', + choices: ['Yes', 'No, I want to use my own CUDA Toolkit'], + when: () => platform !== 'darwin', + }) + parseRunMode(val: string) { + return val; + } +} diff --git a/cortex-js/src/infrastructure/commanders/inquirer/init.questions.ts b/cortex-js/src/infrastructure/commanders/questions/init.questions.ts similarity index 86% rename from cortex-js/src/infrastructure/commanders/inquirer/init.questions.ts rename to cortex-js/src/infrastructure/commanders/questions/init.questions.ts index 04df43158..ee4675320 100644 --- a/cortex-js/src/infrastructure/commanders/inquirer/init.questions.ts +++ b/cortex-js/src/infrastructure/commanders/questions/init.questions.ts @@ -1,8 +1,8 @@ import { Question, QuestionSet } from 'nest-commander'; import { platform } from 'node:process'; -@QuestionSet({ name: 'create-init-questions' }) -export class CreateInitQuestions { +@QuestionSet({ name: 'init-run-mode-questions' }) +export class InitRunModeQuestions { @Question({ type: 'list', message: 'Select run mode', @@ -31,7 +31,7 @@ export class CreateInitQuestions { type: 'list', message: 'Select CPU instructions set', name: 'instructions', - choices: ['AVX2', 'AVX', 'AVX-512'], + choices: ['AVX2', 'AVX', 'AVX512'], when: () => platform !== 'darwin', }) parseContent(val: string) { diff --git a/cortex-js/src/infrastructure/commanders/serve.command.ts b/cortex-js/src/infrastructure/commanders/serve.command.ts index d0f63a33e..6af783c76 100644 --- a/cortex-js/src/infrastructure/commanders/serve.command.ts +++ b/cortex-js/src/infrastructure/commanders/serve.command.ts @@ -8,7 +8,10 @@ type ServeOptions = { port?: number; }; -@SubCommand({ name: 'serve' }) +@SubCommand({ + name: 'serve', + description: 'Providing API endpoint for Cortex backend', +}) export class ServeCommand extends CommandRunner { constructor() { super(); diff --git a/cortex-js/src/infrastructure/commanders/shortcuts/run.command.ts b/cortex-js/src/infrastructure/commanders/shortcuts/run.command.ts new file mode 100644 index 000000000..d12786519 --- /dev/null +++ b/cortex-js/src/infrastructure/commanders/shortcuts/run.command.ts @@ -0,0 +1,48 @@ +import { CortexUsecases } from '@/usecases/cortex/cortex.usecases'; +import { ModelsUsecases } from '@/usecases/models/models.usecases'; +import { CommandRunner, SubCommand, Option } from 'nest-commander'; +import { exit } from 'node:process'; +import { ChatUsecases } from '@/usecases/chat/chat.usecases'; +import { ChatCliUsecases } from '../usecases/chat.cli.usecases'; + +type RunOptions = { + model?: string; +}; + +@SubCommand({ + name: 'run', + description: 'EXPERIMENTAL: Shortcut to start a model and chat', +}) +export class RunCommand extends CommandRunner { + constructor( + private readonly modelsUsecases: ModelsUsecases, + private readonly cortexUsecases: CortexUsecases, + private readonly chatUsecases: ChatUsecases, + ) { + super(); + } + + async run(_input: string[], option: RunOptions): Promise { + const modelId = option.model; + if (!modelId) { + console.error('Model ID is required'); + exit(1); + } + + await this.cortexUsecases.startCortex(); + await this.modelsUsecases.startModel(modelId); + const chatCliUsecases = new ChatCliUsecases( + this.chatUsecases, + this.cortexUsecases, + ); + await chatCliUsecases.chat(modelId); + } + + @Option({ + flags: '--model ', + description: 'Model Id to start chat with', + }) + parseModelId(value: string) { + return value; + } +} diff --git a/cortex-js/src/infrastructure/commanders/start.command.ts b/cortex-js/src/infrastructure/commanders/start.command.ts deleted file mode 100644 index e0f4b5fef..000000000 --- a/cortex-js/src/infrastructure/commanders/start.command.ts +++ /dev/null @@ -1,62 +0,0 @@ -import { CortexUsecases } from '@/usecases/cortex/cortex.usecases'; -import { ModelsUsecases } from '@/usecases/models/models.usecases'; -import { CommandRunner, SubCommand } from 'nest-commander'; -import { resolve } from 'path'; -import { existsSync } from 'fs'; -import { Model } from '@/domain/models/model.interface'; -import { exit } from 'node:process'; -import { ChatUsecases } from '@/usecases/chat/chat.usecases'; -import { ChatCliUsecases } from './usecases/chat.cli.usecases'; - -@SubCommand({ name: 'start', aliases: ['run'] }) -export class StartCommand extends CommandRunner { - constructor( - private readonly modelsUsecases: ModelsUsecases, - private readonly cortexUsecases: CortexUsecases, - private readonly chatUsecases: ChatUsecases, - ) { - super(); - } - - async run(input: string[]): Promise { - if (input.length === 0) { - console.error('Model ID is required'); - exit(1); - } - - const modelId = input[0]; - const model = await this.getModelOrStop(modelId); - - return this.startCortex() - .then(() => this.startModel(model.id)) - .then(() => { - const chatCliUsecases = new ChatCliUsecases(this.chatUsecases); - return chatCliUsecases.run(input); - }) - .then(console.log) - .catch(console.error); - } - - private async startCortex() { - if (!existsSync(resolve(this.rootDir(), 'cortex-cpp'))) { - console.log('Please init the cortex by running cortex init command!'); - exit(0); - } - return this.cortexUsecases.startCortex(); - } - - private async startModel(modelId: string) { - return this.modelsUsecases.startModel(modelId); - } - - private async getModelOrStop(modelId: string): Promise { - const model = await this.modelsUsecases.findOne(modelId); - if (!model) { - console.debug('Model not found'); - exit(1); - } - return model; - } - - rootDir = () => resolve(__dirname, `../../../`); -} diff --git a/cortex-js/src/infrastructure/commanders/types/init-options.interface.ts b/cortex-js/src/infrastructure/commanders/types/init-options.interface.ts new file mode 100644 index 000000000..f65e4c7cd --- /dev/null +++ b/cortex-js/src/infrastructure/commanders/types/init-options.interface.ts @@ -0,0 +1,7 @@ +export interface InitOptions { + runMode?: 'CPU' | 'GPU'; + gpuType?: 'Nvidia' | 'Others (Vulkan)'; + instructions?: 'AVX' | 'AVX2' | 'AVX512' | undefined; + cudaVersion?: '11' | '12'; + installCuda?: 'Yes' | string +} \ No newline at end of file diff --git a/cortex-js/src/infrastructure/commanders/usecases/chat.cli.usecases.ts b/cortex-js/src/infrastructure/commanders/usecases/chat.cli.usecases.ts index fe0a62b4b..9f7409cca 100644 --- a/cortex-js/src/infrastructure/commanders/usecases/chat.cli.usecases.ts +++ b/cortex-js/src/infrastructure/commanders/usecases/chat.cli.usecases.ts @@ -5,6 +5,7 @@ import * as readline from 'node:readline/promises'; import { ChatStreamEvent } from '@/domain/abstracts/oai.abstract'; import { ChatCompletionMessage } from '@/infrastructure/dtos/chat/chat-completion-message.dto'; import { CreateChatCompletionDto } from '@/infrastructure/dtos/chat/create-chat-completion.dto'; +import { CortexUsecases } from '@/usecases/cortex/cortex.usecases'; // TODO: make this class injectable export class ChatCliUsecases { @@ -12,15 +13,12 @@ export class ChatCliUsecases { private userIndicator = '>> '; private exitMessage = 'Bye!'; - constructor(private readonly chatUsecases: ChatUsecases) {} + constructor( + private readonly chatUsecases: ChatUsecases, + private readonly cortexUsecases: CortexUsecases, + ) {} - async run(input: string[]): Promise { - if (input.length == 0) { - console.error('Please provide a model id.'); - exit(1); - } - - const modelId = input[0]; + async chat(modelId: string): Promise { console.log(`Inorder to exit, type '${this.exitClause}'.`); const messages: ChatCompletionMessage[] = []; @@ -32,8 +30,10 @@ export class ChatCliUsecases { rl.prompt(); rl.on('close', () => { - console.log(this.exitMessage); - exit(0); + this.cortexUsecases.stopCortex().then(() => { + console.log(this.exitMessage); + exit(0); + }); }); rl.on('line', (userInput: string) => { diff --git a/cortex-js/src/infrastructure/commanders/usecases/cli.usecases.module.ts b/cortex-js/src/infrastructure/commanders/usecases/cli.usecases.module.ts new file mode 100644 index 000000000..a01216c8c --- /dev/null +++ b/cortex-js/src/infrastructure/commanders/usecases/cli.usecases.module.ts @@ -0,0 +1,13 @@ +import { Module } from "@nestjs/common"; +import { InitCliUsecases } from "./init.cli.usecases"; +import { HttpModule } from "@nestjs/axios"; +import { ModelsCliUsecases } from "./models.cli.usecases"; +import { ModelsModule } from "@/usecases/models/models.module"; + +@Module({ + imports: [HttpModule, ModelsModule], + controllers: [], + providers: [InitCliUsecases, ModelsCliUsecases], + exports: [InitCliUsecases, ModelsCliUsecases], + }) + export class CliUsecasesModule {} \ No newline at end of file diff --git a/cortex-js/src/infrastructure/commanders/usecases/init.cli.usecases.ts b/cortex-js/src/infrastructure/commanders/usecases/init.cli.usecases.ts new file mode 100644 index 000000000..0b9ba2e8e --- /dev/null +++ b/cortex-js/src/infrastructure/commanders/usecases/init.cli.usecases.ts @@ -0,0 +1,219 @@ +import { createWriteStream, existsSync, rmSync } from 'fs'; +import { resolve, delimiter, join } from 'path'; +import { HttpService } from '@nestjs/axios'; +import { Presets, SingleBar } from 'cli-progress'; +import decompress from 'decompress'; +import { exit } from 'node:process'; +import { InitOptions } from '../types/init-options.interface'; +import { Injectable } from '@nestjs/common'; + +@Injectable() +export class InitCliUsecases { + CORTEX_RELEASES_URL = 'https://api.github.com/repos/janhq/cortex/releases'; + CUDA_DOWNLOAD_URL = 'https://catalog.jan.ai/dist/cuda-dependencies///cuda.tar.gz' + + constructor( + private readonly httpService: HttpService, + ) { + } + + installEngine = async ( + engineFileName: string, + version: string = 'latest', + ): Promise => { + const res = await this.httpService + .get( + this.CORTEX_RELEASES_URL + `${version === 'latest' ? '/latest' : ''}`, + { + headers: { + 'X-GitHub-Api-Version': '2022-11-28', + Accept: 'application/vnd.github+json', + }, + }, + ) + .toPromise(); + + if (!res?.data) { + console.log('Failed to fetch releases'); + exit(1); + } + + let release = res?.data; + if (Array.isArray(res?.data)) { + release = Array(res?.data)[0].find( + (e) => e.name === version.replace('v', ''), + ); + } + const toDownloadAsset = release.assets.find((s: any) => + s.name.includes(engineFileName), + ); + + if (!toDownloadAsset) { + console.log(`Could not find engine file ${engineFileName}`); + exit(1); + } + + console.log(`Downloading engine file ${engineFileName}`); + const engineDir = resolve(this.rootDir(), 'cortex-cpp'); + if (existsSync(engineDir)) rmSync(engineDir, { recursive: true }); + + const download = await this.httpService + .get(toDownloadAsset.browser_download_url, { + responseType: 'stream', + }) + .toPromise(); + if (!download) { + console.log('Failed to download model'); + process.exit(1) + } + + const destination = resolve(this.rootDir(), toDownloadAsset.name); + + await new Promise((resolve, reject) => { + const writer = createWriteStream(destination); + let receivedBytes = 0; + const totalBytes = download.headers['content-length']; + + writer.on('finish', () => { + bar.stop(); + resolve(true); + }); + + writer.on('error', (error) => { + bar.stop(); + reject(error); + }); + + const bar = new SingleBar({}, Presets.shades_classic); + bar.start(100, 0); + + download.data.on('data', (chunk: any) => { + receivedBytes += chunk.length; + bar.update(Math.floor((receivedBytes / totalBytes) * 100)); + }); + + download.data.pipe(writer); + }); + + try { + await decompress( + resolve(this.rootDir(), destination), + resolve(this.rootDir()), + ); + } catch (e) { + console.log(e); + exit(1); + } + }; + + parseEngineFileName = (options: InitOptions) => { + const platform = + process.platform === 'win32' + ? 'windows' + : process.platform === 'darwin' + ? 'mac' + : process.platform; + const arch = process.arch === 'arm64' ? process.arch : 'amd64'; + const cudaVersion = + options.runMode === 'GPU' + ? options.gpuType === 'Nvidia' + ? '-cuda-' + (options.cudaVersion === '11' ? '11-7' : '12-0') + : '-vulkan' + : ''; + const instructions = options.instructions ? `-${options.instructions}` : ''; + const engineName = `${platform}-${arch}${instructions.toLowerCase()}${cudaVersion}`; + return `${engineName}.tar.gz`; + }; + + rootDir = () => resolve(__dirname, `../../../`); + + cudaVersion = async () => { + let filesCuda12: string[] + let filesCuda11: string[] + let paths: string[] + + if (process.platform === 'win32') { + filesCuda12 = ['cublas64_12.dll', 'cudart64_12.dll', 'cublasLt64_12.dll'] + filesCuda11 = ['cublas64_11.dll', 'cudart64_110.dll', 'cublasLt64_11.dll'] + paths = process.env.PATH ? process.env.PATH.split(delimiter) : [] + } else { + filesCuda12 = ['libcudart.so.12', 'libcublas.so.12', 'libcublasLt.so.12'] + filesCuda11 = ['libcudart.so.11.0', 'libcublas.so.11', 'libcublasLt.so.11'] + paths = process.env.LD_LIBRARY_PATH + ? process.env.LD_LIBRARY_PATH.split(delimiter) + : [] + paths.push('/usr/lib/x86_64-linux-gnu/') + } + + if (filesCuda12.every( + (file) => existsSync(file) || this.checkFileExistenceInPaths(file, paths) + )) return '12' + + + if (filesCuda11.every( + (file) => existsSync(file) || this.checkFileExistenceInPaths(file, paths) + )) return '11' + + return undefined // No CUDA Toolkit found + } + + checkFileExistenceInPaths = (file: string, paths: string[]): boolean => { + return paths.some((p) => existsSync(join(p, file))) + } + + installCudaToolkitDependency = async (options: InitOptions) => { + const platform = process.platform === 'win32' ? 'windows' : 'linux' + + const url = this.CUDA_DOWNLOAD_URL + .replace('', options.cudaVersion === '11' ? '11.7' : '12.0') + .replace('', platform) + const destination = resolve(this.rootDir(), 'cuda-toolkit.tar.gz'); + + const download = await this.httpService + .get(url, { + responseType: 'stream', + }) + .toPromise(); + + if (!download) { + console.log('Failed to download dependency'); + process.exit(1) + } + + await new Promise((resolve, reject) => { + const writer = createWriteStream(destination); + let receivedBytes = 0; + const totalBytes = download.headers['content-length']; + + writer.on('finish', () => { + bar.stop(); + resolve(true); + }); + + writer.on('error', (error) => { + bar.stop(); + reject(error); + }); + + const bar = new SingleBar({}, Presets.shades_classic); + bar.start(100, 0); + + download.data.on('data', (chunk: any) => { + receivedBytes += chunk.length; + bar.update(Math.floor((receivedBytes / totalBytes) * 100)); + }); + + download.data.pipe(writer); + }); + + try { + await decompress( + resolve(this.rootDir(), destination), + resolve(this.rootDir(), 'cortex-cpp'), + ); + } catch (e) { + console.log(e); + exit(1); + } + } +} \ No newline at end of file diff --git a/cortex-js/src/infrastructure/commanders/usecases/models.cli.usecases.ts b/cortex-js/src/infrastructure/commanders/usecases/models.cli.usecases.ts index 3445b708a..cdb3110cf 100644 --- a/cortex-js/src/infrastructure/commanders/usecases/models.cli.usecases.ts +++ b/cortex-js/src/infrastructure/commanders/usecases/models.cli.usecases.ts @@ -2,6 +2,10 @@ import { exit } from 'node:process'; import { ModelsUsecases } from '@/usecases/models/models.usecases'; import { Model, ModelFormat } from '@/domain/models/model.interface'; import { CreateModelDto } from '@/infrastructure/dtos/models/create-model.dto'; +import { HuggingFaceRepoData } from '@/domain/models/huggingface.interface'; +import { gguf } from '@huggingface/gguf'; +import { InquirerService } from 'nest-commander'; +import { Inject, Injectable } from '@nestjs/common'; const AllQuantizations = [ 'Q3_K_S', @@ -26,9 +30,13 @@ const AllQuantizations = [ 'COPY', ]; -// TODO: make this class injectable +@Injectable() export class ModelsCliUsecases { - constructor(private readonly modelsUsecases: ModelsUsecases) {} + constructor( + private readonly modelsUsecases: ModelsUsecases, + @Inject(InquirerService) + private readonly inquirerService: InquirerService, + ) {} async startModel(modelId: string): Promise { await this.getModelOrStop(modelId); @@ -73,18 +81,27 @@ export class ModelsCliUsecases { private async pullHuggingFaceModel(modelId: string) { const data = await this.fetchHuggingFaceRepoData(modelId); + const { quantization } = await this.inquirerService.inquirer.prompt({ + type: 'list', + name: 'quantization', + message: 'Select quantization', + choices: data.siblings.map((e) => e.quantization).filter((e) => !!e), + }); - // TODO: add select options - const sibling = data.siblings.filter( - (e: any) => e.quantization == 'Q5_K_M', - )[0]; + const sibling = data.siblings.filter((e) => !!e.quantization).find( + (e: any) => e.quantization === quantization, + ); if (!sibling) throw 'No expected quantization found'; + const stopWords: string[] = []; + if (sibling.stopWord) { + stopWords.push(sibling.stopWord); + } const model: CreateModelDto = { sources: [ { - url: sibling.downloadUrl, + url: sibling?.downloadUrl ?? '', }, ], id: modelId, @@ -93,10 +110,12 @@ export class ModelsCliUsecases { format: ModelFormat.GGUF, description: '', settings: {}, - parameters: {}, + parameters: { + stop: stopWords, + }, metadata: { author: data.author, - size: sibling.fileSize, + size: sibling.fileSize ?? 0, tags: [], }, engine: 'cortex', @@ -109,11 +128,13 @@ export class ModelsCliUsecases { const sanitizedUrl = this.toHuggingFaceUrl(repoId); const res = await fetch(sanitizedUrl); - const data = await res.json(); - if (data['error'] != null) { - throw new Error(data['error']); + const response = await res.json(); + if (response['error'] != null) { + throw new Error(response['error']); } + const data = response as HuggingFaceRepoData; + if (data.tags.indexOf('gguf') === -1) { throw `${repoId} is not supported. Only GGUF models are supported.`; } @@ -125,6 +146,21 @@ export class ModelsCliUsecases { for (let i = 0; i < data.siblings.length; i++) { const downloadUrl = `https://huggingface.co/${paths[2]}/${paths[3]}/resolve/main/${data.siblings[i].rfilename}`; data.siblings[i].downloadUrl = downloadUrl; + + if (downloadUrl.endsWith('.gguf')) { + // getting stop word + let stopWord = ''; + try { + const { metadata } = await gguf(downloadUrl); + // @ts-expect-error "tokenizer.ggml.eos_token_id" + const index = metadata['tokenizer.ggml.eos_token_id']; + // @ts-expect-error "tokenizer.ggml.tokens" + stopWord = metadata['tokenizer.ggml.tokens'][index] ?? ''; + data.siblings[i].stopWord = stopWord; + } catch (err) { + console.log('Failed to get stop word: ', err); + } + } } AllQuantizations.forEach((quantization) => { diff --git a/cortex-js/src/infrastructure/database/sqlite-database.providers.ts b/cortex-js/src/infrastructure/database/sqlite-database.providers.ts index 84700ff49..9c14ee965 100644 --- a/cortex-js/src/infrastructure/database/sqlite-database.providers.ts +++ b/cortex-js/src/infrastructure/database/sqlite-database.providers.ts @@ -1,13 +1,15 @@ import { databaseFile } from 'constant'; +import { resolve } from 'path'; import { DataSource } from 'typeorm'; export const sqliteDatabaseProviders = [ { provide: 'DATA_SOURCE', useFactory: async () => { + const sqlitePath = resolve(__dirname, `../../../${databaseFile}`); const dataSource = new DataSource({ type: 'sqlite', - database: databaseFile, + database: sqlitePath, synchronize: process.env.NODE_ENV !== 'production', entities: [__dirname + '/../**/*.entity{.ts,.js}'], }); diff --git a/cortex-js/src/infrastructure/providers/cortex/cortex.provider.ts b/cortex-js/src/infrastructure/providers/cortex/cortex.provider.ts index 562e989b2..21fce643f 100644 --- a/cortex-js/src/infrastructure/providers/cortex/cortex.provider.ts +++ b/cortex-js/src/infrastructure/providers/cortex/cortex.provider.ts @@ -3,7 +3,6 @@ import { OAIEngineExtension } from '@/domain/abstracts/oai.abstract'; import { PromptTemplate } from '@/domain/models/prompt-template.interface'; import { join, resolve } from 'path'; import { Model, ModelSettingParams } from '@/domain/models/model.interface'; -import { ConfigService } from '@nestjs/config'; import { HttpService } from '@nestjs/axios'; import { defaultCortexCppHost, defaultCortexCppPort } from 'constant'; import { readdirSync } from 'node:fs'; @@ -21,20 +20,17 @@ export default class CortexProvider extends OAIEngineExtension { private loadModelUrl = `http://${defaultCortexCppHost}:${defaultCortexCppPort}/inferences/server/loadmodel`; private unloadModelUrl = `http://${defaultCortexCppHost}:${defaultCortexCppPort}/inferences/server/unloadmodel`; - constructor( - private readonly configService: ConfigService, - protected readonly httpService: HttpService, - ) { + constructor(protected readonly httpService: HttpService) { super(httpService); } + modelDir = () => resolve(__dirname, `../../../models`); + override async loadModel( model: Model, settings?: ModelSettingParams, ): Promise { - const modelsContainerDir = - this.configService.get('CORTEX_MODELS_DIR') ?? - resolve('./models'); + const modelsContainerDir = this.modelDir(); const modelFolderFullPath = join(modelsContainerDir, model.id); const ggufFiles = readdirSync(modelFolderFullPath).filter((file) => { @@ -47,7 +43,7 @@ export default class CortexProvider extends OAIEngineExtension { const modelBinaryLocalPath = join(modelFolderFullPath, ggufFiles[0]); - const cpuThreadCount = 1; // TODO: NamH Math.max(1, nitroResourceProbe.numCpuPhysicalCore); + const cpuThreadCount = 1; // TODO: Math.max(1, nitroResourceProbe.numCpuPhysicalCore); const modelSettings = { // This is critical and requires real CPU physical core count (or performance core) model: model.id, diff --git a/cortex-js/src/usecases/cortex/cortex.usecases.ts b/cortex-js/src/usecases/cortex/cortex.usecases.ts index ad56f37af..1decf6b97 100644 --- a/cortex-js/src/usecases/cortex/cortex.usecases.ts +++ b/cortex-js/src/usecases/cortex/cortex.usecases.ts @@ -4,6 +4,7 @@ import { join } from 'path'; import { CortexOperationSuccessfullyDto } from '@/infrastructure/dtos/cortex/cortex-operation-successfully.dto'; import { HttpService } from '@nestjs/axios'; import { defaultCortexCppHost, defaultCortexCppPort } from 'constant'; +import { existsSync } from 'node:fs'; @Injectable() export class CortexUsecases { @@ -23,26 +24,30 @@ export class CortexUsecases { } const args: string[] = ['1', host, `${port}`]; + const cortexCppPath = join( + __dirname, + '../../../cortex-cpp/cortex-cpp' + + `${process.platform === 'win32' ? '.exe' : ''}`, + ); + + if (!existsSync(cortexCppPath)) { + throw new Error('Cortex binary not found'); + } // go up one level to get the binary folder, have to also work on windows - this.cortexProcess = spawn( - join(__dirname, '../../../cortex-cpp/cortex-cpp'), - args, - { - detached: false, - cwd: join(__dirname, '../../../cortex-cpp'), - stdio: 'inherit', - env: { - ...process.env, - // TODO: NamH need to get below information - CUDA_VISIBLE_DEVICES: '0', - // // Vulkan - Support 1 device at a time for now - // ...(executableOptions.vkVisibleDevices?.length > 0 && { - // GGML_VULKAN_DEVICE: executableOptions.vkVisibleDevices[0], - // }), - }, + this.cortexProcess = spawn(cortexCppPath, args, { + detached: false, + cwd: join(__dirname, '../../../cortex-cpp'), + stdio: 'inherit', + env: { + ...process.env, + CUDA_VISIBLE_DEVICES: '0', + // // Vulkan - Support 1 device at a time for now + // ...(executableOptions.vkVisibleDevices?.length > 0 && { + // GGML_VULKAN_DEVICE: executableOptions.vkVisibleDevices[0], + // }), }, - ); + }); this.registerCortexEvents(); @@ -73,7 +78,7 @@ export class CortexUsecases { .delete(`http://${host}:${port}/processmanager/destroy`) .toPromise(); } catch (err) { - console.error(err); + console.error(err.response.data); } finally { this.cortexProcess?.kill(); return { diff --git a/cortex-js/src/usecases/models/models.usecases.ts b/cortex-js/src/usecases/models/models.usecases.ts index 0a468ebb6..d7843d7d8 100644 --- a/cortex-js/src/usecases/models/models.usecases.ts +++ b/cortex-js/src/usecases/models/models.usecases.ts @@ -5,7 +5,7 @@ import { BadRequestException, Inject, Injectable } from '@nestjs/common'; import { Repository } from 'typeorm'; import { Model, ModelFormat } from '@/domain/models/model.interface'; import { ModelNotFoundException } from '@/infrastructure/exception/model-not-found.exception'; -import { join, basename } from 'path'; +import { join, basename, resolve } from 'path'; import { promises, createWriteStream, @@ -14,7 +14,6 @@ import { rmdirSync, } from 'fs'; import { StartModelSuccessDto } from '@/infrastructure/dtos/models/start-model-success.dto'; -import { ConfigService } from '@nestjs/config'; import { ExtensionRepository } from '@/domain/repositories/extension.interface'; import { EngineExtension } from '@/domain/abstracts/engine.abstract'; import { HttpService } from '@nestjs/axios'; @@ -26,7 +25,6 @@ export class ModelsUsecases { @Inject('MODEL_REPOSITORY') private readonly modelRepository: Repository, private readonly extensionRepository: ExtensionRepository, - private readonly configService: ConfigService, private readonly httpService: HttpService, ) {} @@ -65,8 +63,7 @@ export class ModelsUsecases { } async remove(id: string) { - const modelsContainerDir = - this.configService.get('CORTEX_MODELS_DIR') ?? './models'; + const modelsContainerDir = this.modelDir(); if (!existsSync(modelsContainerDir)) { return; @@ -150,6 +147,8 @@ export class ModelsUsecases { }); } + modelDir = () => resolve(__dirname, `../../../models`); + async downloadModel(modelId: string, callback?: (progress: number) => void) { const model = await this.getModelOrThrow(modelId); @@ -165,8 +164,7 @@ export class ModelsUsecases { } const fileName = basename(downloadUrl); - const modelsContainerDir = - this.configService.get('CORTEX_MODELS_DIR') ?? './models'; + const modelsContainerDir = this.modelDir(); if (!existsSync(modelsContainerDir)) { mkdirSync(modelsContainerDir, { recursive: true });