diff --git a/.github/workflows/cortex-cpp-build.yml b/.github/workflows/cortex-cpp-build.yml index e75be1c33..80bf677de 100644 --- a/.github/workflows/cortex-cpp-build.yml +++ b/.github/workflows/cortex-cpp-build.yml @@ -162,6 +162,11 @@ jobs: with: submodules: recursive + - uses: actions/setup-dotnet@v3 + if: runner.os == 'Windows' + with: + dotnet-version: "8.0.x" + - name: Install choco on Windows if: runner.os == 'Windows' run: | @@ -189,13 +194,12 @@ jobs: run: | cd cortex-cpp make pre-package - + - name: Code Signing run: | cd cortex-cpp make codesign CODE_SIGN=true AZURE_KEY_VAULT_URI="${{ secrets.AZURE_KEY_VAULT_URI }}" AZURE_CLIENT_ID="${{ secrets.AZURE_CLIENT_ID }}" AZURE_TENANT_ID="${{ secrets.AZURE_TENANT_ID }}" AZURE_CLIENT_SECRET="${{ secrets.AZURE_CLIENT_SECRET }}" AZURE_CERT_NAME="${{ secrets.AZURE_CERT_NAME }}" DEVELOPER_ID="${{ secrets.DEVELOPER_ID }}" - - name: Package run: | cd cortex-cpp diff --git a/cortex-cpp/Makefile b/cortex-cpp/Makefile index 80614f14d..98486f023 100644 --- a/cortex-cpp/Makefile +++ b/cortex-cpp/Makefile @@ -62,7 +62,8 @@ endif ifeq ($(OS),Windows_NT) @powershell -Command "dotnet tool install --global AzureSignTool;" - @powershell -Command "Get-ChildItem -Path .\cortex-cpp -Recurse | ForEach-Object { & 'azuresigntool.exe' sign -kvu '$(AZURE_KEY_VAULT_URI)' -kvi '$(AZURE_CLIENT_ID)' -kvt '$(AZURE_TENANT_ID)' -kvs '$(AZURE_CLIENT_SECRET)' -kvc '$(AZURE_CERT_NAME)' -tr http://timestamp.globalsign.com/tsa/r6advanced1 -v '$_.FullName' };" + @powershell -Command 'azuresigntool.exe sign -kvu "$(AZURE_KEY_VAULT_URI)" -kvi "$(AZURE_CLIENT_ID)" -kvt "$(AZURE_TENANT_ID)" -kvs "$(AZURE_CLIENT_SECRET)" -kvc "$(AZURE_CERT_NAME)" -tr http://timestamp.globalsign.com/tsa/r6advanced1 -v ".\cortex-cpp\cortex-cpp.exe";' + @powershell -Command 'azuresigntool.exe sign -kvu "$(AZURE_KEY_VAULT_URI)" -kvi "$(AZURE_CLIENT_ID)" -kvt "$(AZURE_TENANT_ID)" -kvs "$(AZURE_CLIENT_SECRET)" -kvc "$(AZURE_CERT_NAME)" -tr http://timestamp.globalsign.com/tsa/r6advanced1 -v ".\cortex-cpp\engines\cortex.llamacpp\engine.dll";' else ifeq ($(shell uname -s),Linux) @echo "Skipping Code Sign for linux" @exit 0 @@ -102,4 +103,5 @@ ifeq ($(OS),Windows_NT) else ifeq ($(shell uname -s),Linux) @rm -rf build; rm -rf build-deps; rm -rf cortex-cpp; rm -rf cortex-cpp.tar.gz; else - @rm -rf build; rm -rf build-deps; rm -rf cortex-cpp; rm -rf cortex-cpp.tar.gz; \ No newline at end of file + @rm -rf build; rm -rf build-deps; rm -rf cortex-cpp; rm -rf cortex-cpp.tar.gz; +endif \ No newline at end of file diff --git a/cortex-js/constant.ts b/cortex-js/constant.ts index 8f1ba5008..b9a983657 100644 --- a/cortex-js/constant.ts +++ b/cortex-js/constant.ts @@ -4,3 +4,6 @@ export const databaseFile = `${databaseName}.db`; export const defaultCortexJsHost = 'localhost'; export const defaultCortexJsPort = 7331; + +export const defaultCortexCppHost = '127.0.0.1'; +export const defaultCortexCppPort = 3928; diff --git a/cortex-js/src/app.module.ts b/cortex-js/src/app.module.ts index 748ca9bb9..3120e90c1 100644 --- a/cortex-js/src/app.module.ts +++ b/cortex-js/src/app.module.ts @@ -6,20 +6,19 @@ import { DevtoolsModule } from '@nestjs/devtools-integration'; import { DatabaseModule } from './infrastructure/database/database.module'; import { ChatModule } from './usecases/chat/chat.module'; import { AssistantsModule } from './usecases/assistants/assistants.module'; -import { InferenceSettingsModule } from './usecases/inference-settings/inference-settings.module'; import { ExtensionModule } from './infrastructure/repositories/extensions/extension.module'; import { CortexModule } from './usecases/cortex/cortex.module'; import { ConfigModule } from '@nestjs/config'; +import { env } from 'node:process'; @Module({ imports: [ DevtoolsModule.register({ - http: process.env.NODE_ENV !== 'production', + http: env.NODE_ENV !== 'production', }), ConfigModule.forRoot({ isGlobal: true, - envFilePath: - process.env.NODE_ENV === 'production' ? '.env' : '.env.development', + envFilePath: env.NODE_ENV !== 'production' ? '.env.development' : '.env', }), DatabaseModule, MessagesModule, @@ -27,7 +26,6 @@ import { ConfigModule } from '@nestjs/config'; ModelsModule, ChatModule, AssistantsModule, - InferenceSettingsModule, CortexModule, ExtensionModule, ], diff --git a/cortex-js/src/command.module.ts b/cortex-js/src/command.module.ts index d1c0619f1..6aed1aff4 100644 --- a/cortex-js/src/command.module.ts +++ b/cortex-js/src/command.module.ts @@ -5,8 +5,7 @@ import { DatabaseModule } from './infrastructure/database/database.module'; import { ConfigModule } from '@nestjs/config'; import { CortexModule } from './usecases/cortex/cortex.module'; import { ServeCommand } from './infrastructure/commanders/serve.command'; -import { PullCommand } from './infrastructure/commanders/pull.command'; -import { InferenceCommand } from './infrastructure/commanders/inference.command'; +import { ChatCommand } from './infrastructure/commanders/chat.command'; import { ModelsCommand } from './infrastructure/commanders/models.command'; import { StartCommand } from './infrastructure/commanders/start.command'; import { ExtensionModule } from './infrastructure/repositories/extensions/extension.module'; @@ -14,6 +13,12 @@ import { ChatModule } from './usecases/chat/chat.module'; import { InitCommand } from './infrastructure/commanders/init.command'; import { HttpModule } from '@nestjs/axios'; import { CreateInitQuestions } from './infrastructure/commanders/inquirer/init.questions'; +import { ModelStartCommand } from './infrastructure/commanders/models/model-start.command'; +import { ModelStopCommand } from './infrastructure/commanders/models/model-stop.command'; +import { ModelListCommand } from './infrastructure/commanders/models/model-list.command'; +import { ModelGetCommand } from './infrastructure/commanders/models/model-get.command'; +import { ModelRemoveCommand } from './infrastructure/commanders/models/model-remove.command'; +import { ModelPullCommand } from './infrastructure/commanders/models/model-pull.command'; @Module({ imports: [ @@ -32,12 +37,19 @@ import { CreateInitQuestions } from './infrastructure/commanders/inquirer/init.q providers: [ BasicCommand, ModelsCommand, - PullCommand, ServeCommand, - InferenceCommand, + ChatCommand, StartCommand, InitCommand, CreateInitQuestions, + + // Model commands + ModelStartCommand, + ModelStopCommand, + ModelListCommand, + ModelGetCommand, + ModelRemoveCommand, + ModelPullCommand, ], }) export class CommandModule {} diff --git a/cortex-js/src/domain/abstracts/engine.abstract.ts b/cortex-js/src/domain/abstracts/engine.abstract.ts index 596f5eadf..f21f6664b 100644 --- a/cortex-js/src/domain/abstracts/engine.abstract.ts +++ b/cortex-js/src/domain/abstracts/engine.abstract.ts @@ -1,4 +1,5 @@ -import { Model } from '../models/model.interface'; +/* eslint-disable no-unused-vars, @typescript-eslint/no-unused-vars */ +import { Model, ModelSettingParams } from '../models/model.interface'; import { Extension } from './extension.abstract'; export abstract class EngineExtension extends Extension { @@ -6,7 +7,10 @@ export abstract class EngineExtension extends Extension { abstract inference(completion: any, req: any, stream: any, res?: any): void; - async loadModel(model: Model): Promise {} + async loadModel( + model: Model, + settingParams?: ModelSettingParams, + ): Promise {} async unloadModel(modelId: string): Promise {} } diff --git a/cortex-js/src/infrastructure/commanders/basic-command.commander.ts b/cortex-js/src/infrastructure/commanders/basic-command.commander.ts index a5434c910..fd2c39832 100644 --- a/cortex-js/src/infrastructure/commanders/basic-command.commander.ts +++ b/cortex-js/src/infrastructure/commanders/basic-command.commander.ts @@ -1,19 +1,19 @@ import { RootCommand, CommandRunner, Option } from 'nest-commander'; -import { PullCommand } from './pull.command'; import { ServeCommand } from './serve.command'; -import { InferenceCommand } from './inference.command'; +import { ChatCommand } from './chat.command'; import { ModelsCommand } from './models.command'; import { CortexUsecases } from '@/usecases/cortex/cortex.usecases'; import { defaultCortexJsHost, defaultCortexJsPort } from 'constant'; import { InitCommand } from './init.command'; +import { StartCommand } from './start.command'; @RootCommand({ subCommands: [ ModelsCommand, - PullCommand, ServeCommand, - InferenceCommand, + ChatCommand, InitCommand, + StartCommand, ], }) export class BasicCommand extends CommandRunner { @@ -26,15 +26,17 @@ export class BasicCommand extends CommandRunner { switch (command) { case 'start': - const host = options?.host || defaultCortexJsHost; - const port = options?.port || defaultCortexJsPort; + const host = options?.host || 'localhost'; + const port = options?.port || 3928; return this.cortexUsecases .startCortex(host, port) .then((e) => console.log(e)); + case 'stop': return this.cortexUsecases .stopCortex(defaultCortexJsHost, defaultCortexJsPort) .then((e) => console.log(e)); + default: console.error(`Command ${command} is not supported`); return; diff --git a/cortex-js/src/infrastructure/commanders/chat.command.ts b/cortex-js/src/infrastructure/commanders/chat.command.ts new file mode 100644 index 000000000..37025ca34 --- /dev/null +++ b/cortex-js/src/infrastructure/commanders/chat.command.ts @@ -0,0 +1,15 @@ +import { ChatUsecases } from '@/usecases/chat/chat.usecases'; +import { CommandRunner, SubCommand } from 'nest-commander'; +import { ChatCliUsecases } from './usecases/chat.cli.usecases'; + +@SubCommand({ name: 'chat' }) +export class ChatCommand extends CommandRunner { + constructor(private readonly chatUsecases: ChatUsecases) { + super(); + } + + async run(input: string[]): Promise { + const chatCliService = new ChatCliUsecases(this.chatUsecases); + return chatCliService.run(input); + } +} diff --git a/cortex-js/src/infrastructure/commanders/init.command.ts b/cortex-js/src/infrastructure/commanders/init.command.ts index 5f916663f..9340da330 100644 --- a/cortex-js/src/infrastructure/commanders/init.command.ts +++ b/cortex-js/src/infrastructure/commanders/init.command.ts @@ -4,6 +4,7 @@ import { resolve } from 'path'; import { HttpService } from '@nestjs/axios'; import { Presets, SingleBar } from 'cli-progress'; import decompress from 'decompress'; +import { exit } from 'node:process'; @SubCommand({ name: 'init', aliases: ['setup'] }) export class InitCommand extends CommandRunner { @@ -41,7 +42,7 @@ export class InitCommand extends CommandRunner { if (!res?.data) { console.log('Failed to fetch releases'); - process.exit(1); + exit(1); } let release = res?.data; @@ -56,7 +57,7 @@ export class InitCommand extends CommandRunner { if (!toDownloadAsset) { console.log(`Could not find engine file ${engineFileName}`); - process.exit(1); + exit(1); } console.log(`Downloading engine file ${engineFileName}`); @@ -107,9 +108,9 @@ export class InitCommand extends CommandRunner { ); } catch (e) { console.log(e); - process.exit(1); + exit(1); } - process.exit(0); + exit(0); }; parseEngineFileName = (options: { @@ -128,7 +129,7 @@ export class InitCommand extends CommandRunner { const cudaVersion = options.runMode === 'GPU' ? options.gpuType === 'Nvidia' - ? '-cuda-' + (options.cudaVersion === '11' ? '11.7' : '12.2') + ? '-cuda-' + (options.cudaVersion === '11' ? '11-7' : '12-0') : '-vulkan' : ''; const instructions = options.instructions ? `-${options.instructions}` : ''; diff --git a/cortex-js/src/infrastructure/commanders/inquirer/init.questions.ts b/cortex-js/src/infrastructure/commanders/inquirer/init.questions.ts index 2aadbe490..04df43158 100644 --- a/cortex-js/src/infrastructure/commanders/inquirer/init.questions.ts +++ b/cortex-js/src/infrastructure/commanders/inquirer/init.questions.ts @@ -1,4 +1,5 @@ import { Question, QuestionSet } from 'nest-commander'; +import { platform } from 'node:process'; @QuestionSet({ name: 'create-init-questions' }) export class CreateInitQuestions { @@ -8,7 +9,7 @@ export class CreateInitQuestions { name: 'runMode', default: 'CPU', choices: ['CPU', 'GPU'], - when: () => process.platform !== 'darwin', + when: () => platform !== 'darwin', }) parseRunMode(val: string) { return val; @@ -31,7 +32,7 @@ export class CreateInitQuestions { message: 'Select CPU instructions set', name: 'instructions', choices: ['AVX2', 'AVX', 'AVX-512'], - when: () => process.platform !== 'darwin', + when: () => platform !== 'darwin', }) parseContent(val: string) { return val; diff --git a/cortex-js/src/infrastructure/commanders/models.command.ts b/cortex-js/src/infrastructure/commanders/models.command.ts index e20d27970..4b0f12d73 100644 --- a/cortex-js/src/infrastructure/commanders/models.command.ts +++ b/cortex-js/src/infrastructure/commanders/models.command.ts @@ -1,52 +1,22 @@ -import { ModelsUsecases } from '@/usecases/models/models.usecases'; import { CommandRunner, SubCommand } from 'nest-commander'; -import { PullCommand } from './pull.command'; -import { StartCommand } from './start.command'; +import { ModelStartCommand } from './models/model-start.command'; +import { ModelGetCommand } from './models/model-get.command'; +import { ModelListCommand } from './models/model-list.command'; +import { ModelStopCommand } from './models/model-stop.command'; +import { ModelPullCommand } from './models/model-pull.command'; +import { ModelRemoveCommand } from './models/model-remove.command'; -@SubCommand({ name: 'models', subCommands: [PullCommand, StartCommand] }) +@SubCommand({ + name: 'models', + subCommands: [ + ModelPullCommand, + ModelStartCommand, + ModelStopCommand, + ModelListCommand, + ModelGetCommand, + ModelRemoveCommand, + ], +}) export class ModelsCommand extends CommandRunner { - constructor(private readonly modelsUsecases: ModelsUsecases) { - super(); - } - - async run(input: string[]): Promise { - const command = input[0]; - const modelId = input[1]; - - if (command !== 'list') { - if (!modelId) { - console.log('Model ID is required'); - return; - } - } - - switch (command) { - case 'list': - this.modelsUsecases.findAll().then(console.log); - return; - case 'get': - this.modelsUsecases.findOne(modelId).then(console.log); - return; - case 'remove': - this.modelsUsecases.remove(modelId).then(console.log); - return; - - case 'stop': - return this.modelsUsecases - .stopModel(modelId) - .then(console.log) - .catch(console.error); - - case 'stats': - case 'fetch': - case 'build': { - console.log('Command is not supported yet'); - return; - } - - default: - console.error(`Command ${command} is not supported`); - return; - } - } + async run(): Promise {} } diff --git a/cortex-js/src/infrastructure/commanders/models/model-get.command.ts b/cortex-js/src/infrastructure/commanders/models/model-get.command.ts new file mode 100644 index 000000000..295d68379 --- /dev/null +++ b/cortex-js/src/infrastructure/commanders/models/model-get.command.ts @@ -0,0 +1,22 @@ +import { ModelsUsecases } from '@/usecases/models/models.usecases'; +import { CommandRunner, SubCommand } from 'nest-commander'; +import { ModelsCliUsecases } from '../usecases/models.cli.usecases'; +import { exit } from 'node:process'; + +@SubCommand({ name: 'get' }) +export class ModelGetCommand extends CommandRunner { + constructor(private readonly modelsUsecases: ModelsUsecases) { + super(); + } + + async run(input: string[]): Promise { + if (input.length === 0) { + console.error('Model ID is required'); + exit(1); + } + + const modelsCliUsecases = new ModelsCliUsecases(this.modelsUsecases); + const models = await modelsCliUsecases.getModel(input[0]); + console.log(models); + } +} diff --git a/cortex-js/src/infrastructure/commanders/models/model-list.command.ts b/cortex-js/src/infrastructure/commanders/models/model-list.command.ts new file mode 100644 index 000000000..61280f058 --- /dev/null +++ b/cortex-js/src/infrastructure/commanders/models/model-list.command.ts @@ -0,0 +1,16 @@ +import { ModelsUsecases } from '@/usecases/models/models.usecases'; +import { CommandRunner, SubCommand } from 'nest-commander'; +import { ModelsCliUsecases } from '../usecases/models.cli.usecases'; + +@SubCommand({ name: 'list' }) +export class ModelListCommand extends CommandRunner { + constructor(private readonly modelsUsecases: ModelsUsecases) { + super(); + } + + async run(): Promise { + const modelsCliUsecases = new ModelsCliUsecases(this.modelsUsecases); + const models = await modelsCliUsecases.listAllModels(); + console.log(models); + } +} diff --git a/cortex-js/src/infrastructure/commanders/models/model-pull.command.ts b/cortex-js/src/infrastructure/commanders/models/model-pull.command.ts new file mode 100644 index 000000000..68afaeb81 --- /dev/null +++ b/cortex-js/src/infrastructure/commanders/models/model-pull.command.ts @@ -0,0 +1,31 @@ +import { ModelsUsecases } from '@/usecases/models/models.usecases'; +import { CommandRunner, SubCommand } from 'nest-commander'; +import { Presets, SingleBar } from 'cli-progress'; +import { exit } from 'node:process'; +import { ModelsCliUsecases } from '../usecases/models.cli.usecases'; + +@SubCommand({ name: 'pull', aliases: ['download'] }) +export class ModelPullCommand extends CommandRunner { + constructor(private readonly modelsUsecases: ModelsUsecases) { + super(); + } + + async run(input: string[]) { + if (input.length < 1) { + console.error('Model ID is required'); + exit(1); + } + + const bar = new SingleBar({}, Presets.shades_classic); + bar.start(100, 0); + const callback = (progress: number) => { + bar.update(progress); + }; + await new ModelsCliUsecases(this.modelsUsecases).pullModel( + input[0], + callback, + ); + console.log('\nDownload complete!'); + exit(0); + } +} diff --git a/cortex-js/src/infrastructure/commanders/models/model-remove.command.ts b/cortex-js/src/infrastructure/commanders/models/model-remove.command.ts new file mode 100644 index 000000000..35d9c2d2c --- /dev/null +++ b/cortex-js/src/infrastructure/commanders/models/model-remove.command.ts @@ -0,0 +1,22 @@ +import { ModelsUsecases } from '@/usecases/models/models.usecases'; +import { CommandRunner, SubCommand } from 'nest-commander'; +import { ModelsCliUsecases } from '../usecases/models.cli.usecases'; +import { exit } from 'node:process'; + +@SubCommand({ name: 'remove' }) +export class ModelRemoveCommand extends CommandRunner { + constructor(private readonly modelsUsecases: ModelsUsecases) { + super(); + } + + async run(input: string[]): Promise { + if (input.length === 0) { + console.error('Model ID is required'); + exit(1); + } + + const modelsCliUsecases = new ModelsCliUsecases(this.modelsUsecases); + const result = await modelsCliUsecases.removeModel(input[0]); + console.log(result); + } +} diff --git a/cortex-js/src/infrastructure/commanders/models/model-start.command.ts b/cortex-js/src/infrastructure/commanders/models/model-start.command.ts new file mode 100644 index 000000000..1d254cae3 --- /dev/null +++ b/cortex-js/src/infrastructure/commanders/models/model-start.command.ts @@ -0,0 +1,21 @@ +import { ModelsUsecases } from '@/usecases/models/models.usecases'; +import { CommandRunner, SubCommand } from 'nest-commander'; +import { exit } from 'node:process'; +import { ModelsCliUsecases } from '../usecases/models.cli.usecases'; + +@SubCommand({ name: 'start' }) +export class ModelStartCommand extends CommandRunner { + constructor(private readonly modelsUsecases: ModelsUsecases) { + super(); + } + + async run(input: string[]): Promise { + if (input.length === 0) { + console.error('Model ID is required'); + exit(1); + } + + const modelsCliUsecases = new ModelsCliUsecases(this.modelsUsecases); + await modelsCliUsecases.startModel(input[0]); + } +} diff --git a/cortex-js/src/infrastructure/commanders/models/model-stop.command.ts b/cortex-js/src/infrastructure/commanders/models/model-stop.command.ts new file mode 100644 index 000000000..8ac43647f --- /dev/null +++ b/cortex-js/src/infrastructure/commanders/models/model-stop.command.ts @@ -0,0 +1,21 @@ +import { ModelsUsecases } from '@/usecases/models/models.usecases'; +import { CommandRunner, SubCommand } from 'nest-commander'; +import { exit } from 'node:process'; +import { ModelsCliUsecases } from '../usecases/models.cli.usecases'; + +@SubCommand({ name: 'stop' }) +export class ModelStopCommand extends CommandRunner { + constructor(private readonly modelsUsecases: ModelsUsecases) { + super(); + } + + async run(input: string[]): Promise { + if (input.length === 0) { + console.error('Model ID is required'); + exit(1); + } + + const modelsCliUsecases = new ModelsCliUsecases(this.modelsUsecases); + await modelsCliUsecases.stopModel(input[0]); + } +} diff --git a/cortex-js/src/infrastructure/commanders/start.command.ts b/cortex-js/src/infrastructure/commanders/start.command.ts index 734d2d108..e0f4b5fef 100644 --- a/cortex-js/src/infrastructure/commanders/start.command.ts +++ b/cortex-js/src/infrastructure/commanders/start.command.ts @@ -1,28 +1,38 @@ import { CortexUsecases } from '@/usecases/cortex/cortex.usecases'; import { ModelsUsecases } from '@/usecases/models/models.usecases'; import { CommandRunner, SubCommand } from 'nest-commander'; -import { LoadModelDto } from '../dtos/models/load-model.dto'; import { resolve } from 'path'; import { existsSync } from 'fs'; +import { Model } from '@/domain/models/model.interface'; +import { exit } from 'node:process'; +import { ChatUsecases } from '@/usecases/chat/chat.usecases'; +import { ChatCliUsecases } from './usecases/chat.cli.usecases'; @SubCommand({ name: 'start', aliases: ['run'] }) export class StartCommand extends CommandRunner { constructor( private readonly modelsUsecases: ModelsUsecases, private readonly cortexUsecases: CortexUsecases, + private readonly chatUsecases: ChatUsecases, ) { super(); } async run(input: string[]): Promise { + if (input.length === 0) { + console.error('Model ID is required'); + exit(1); + } + const modelId = input[0]; + const model = await this.getModelOrStop(modelId); - if (!modelId) { - console.log('Model ID is required'); - return; - } return this.startCortex() - .then(() => this.startModel(modelId)) + .then(() => this.startModel(model.id)) + .then(() => { + const chatCliUsecases = new ChatCliUsecases(this.chatUsecases); + return chatCliUsecases.run(input); + }) .then(console.log) .catch(console.error); } @@ -30,26 +40,22 @@ export class StartCommand extends CommandRunner { private async startCortex() { if (!existsSync(resolve(this.rootDir(), 'cortex-cpp'))) { console.log('Please init the cortex by running cortex init command!'); - process.exit(0); + exit(0); } - const host = '127.0.0.1'; - const port = '3928'; - return this.cortexUsecases.startCortex(host, port); + return this.cortexUsecases.startCortex(); } + private async startModel(modelId: string) { - const settings = { - cpu_threads: 10, - ctx_len: 2048, - embedding: false, - prompt_template: - '{system_message}\n### Instruction: {prompt}\n### Response:', - system_prompt: '', - user_prompt: '\n### Instruction: ', - ai_prompt: '\n### Response:', - ngl: 100, - }; - const loadModelDto: LoadModelDto = { modelId, settings }; - return this.modelsUsecases.startModel(loadModelDto); + return this.modelsUsecases.startModel(modelId); + } + + private async getModelOrStop(modelId: string): Promise { + const model = await this.modelsUsecases.findOne(modelId); + if (!model) { + console.debug('Model not found'); + exit(1); + } + return model; } rootDir = () => resolve(__dirname, `../../../`); diff --git a/cortex-js/src/infrastructure/commanders/inference.command.ts b/cortex-js/src/infrastructure/commanders/usecases/chat.cli.usecases.ts similarity index 70% rename from cortex-js/src/infrastructure/commanders/inference.command.ts rename to cortex-js/src/infrastructure/commanders/usecases/chat.cli.usecases.ts index fc94058df..fe0a62b4b 100644 --- a/cortex-js/src/infrastructure/commanders/inference.command.ts +++ b/cortex-js/src/infrastructure/commanders/usecases/chat.cli.usecases.ts @@ -1,36 +1,39 @@ import { ChatUsecases } from '@/usecases/chat/chat.usecases'; -import { CommandRunner, SubCommand } from 'nest-commander'; -import { CreateChatCompletionDto } from '../dtos/chat/create-chat-completion.dto'; import { ChatCompletionRole } from '@/domain/models/message.interface'; -import { stdout } from 'process'; +import { exit, stdin, stdout } from 'node:process'; import * as readline from 'node:readline/promises'; import { ChatStreamEvent } from '@/domain/abstracts/oai.abstract'; -import { ChatCompletionMessage } from '../dtos/chat/chat-completion-message.dto'; +import { ChatCompletionMessage } from '@/infrastructure/dtos/chat/chat-completion-message.dto'; +import { CreateChatCompletionDto } from '@/infrastructure/dtos/chat/create-chat-completion.dto'; -@SubCommand({ name: 'chat' }) -export class InferenceCommand extends CommandRunner { - exitClause = 'exit()'; - userIndicator = '>> '; - exitMessage = 'Bye!'; +// TODO: make this class injectable +export class ChatCliUsecases { + private exitClause = 'exit()'; + private userIndicator = '>> '; + private exitMessage = 'Bye!'; - constructor(private readonly chatUsecases: ChatUsecases) { - super(); - } + constructor(private readonly chatUsecases: ChatUsecases) {} + + async run(input: string[]): Promise { + if (input.length == 0) { + console.error('Please provide a model id.'); + exit(1); + } - async run(): Promise { + const modelId = input[0]; console.log(`Inorder to exit, type '${this.exitClause}'.`); const messages: ChatCompletionMessage[] = []; const rl = readline.createInterface({ - input: process.stdin, - output: process.stdout, + input: stdin, + output: stdout, prompt: this.userIndicator, }); rl.prompt(); rl.on('close', () => { console.log(this.exitMessage); - process.exit(0); + exit(0); }); rl.on('line', (userInput: string) => { @@ -46,7 +49,7 @@ export class InferenceCommand extends CommandRunner { const chatDto: CreateChatCompletionDto = { messages, - model: 'TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF', + model: modelId, stream: true, max_tokens: 2048, stop: [], diff --git a/cortex-js/src/infrastructure/commanders/pull.command.ts b/cortex-js/src/infrastructure/commanders/usecases/models.cli.usecases.ts similarity index 69% rename from cortex-js/src/infrastructure/commanders/pull.command.ts rename to cortex-js/src/infrastructure/commanders/usecases/models.cli.usecases.ts index 825b859b2..3445b708a 100644 --- a/cortex-js/src/infrastructure/commanders/pull.command.ts +++ b/cortex-js/src/infrastructure/commanders/usecases/models.cli.usecases.ts @@ -1,8 +1,7 @@ +import { exit } from 'node:process'; import { ModelsUsecases } from '@/usecases/models/models.usecases'; -import { CommandRunner, SubCommand } from 'nest-commander'; -import { CreateModelDto } from '../dtos/models/create-model.dto'; -import { ModelFormat } from '@/domain/models/model.interface'; -import { Presets, SingleBar } from 'cli-progress'; +import { Model, ModelFormat } from '@/domain/models/model.interface'; +import { CreateModelDto } from '@/infrastructure/dtos/models/create-model.dto'; const AllQuantizations = [ 'Q3_K_S', @@ -27,32 +26,52 @@ const AllQuantizations = [ 'COPY', ]; -@SubCommand({ name: 'pull', aliases: ['download'] }) -export class PullCommand extends CommandRunner { - constructor(private readonly modelsUsecases: ModelsUsecases) { - super(); +// TODO: make this class injectable +export class ModelsCliUsecases { + constructor(private readonly modelsUsecases: ModelsUsecases) {} + + async startModel(modelId: string): Promise { + await this.getModelOrStop(modelId); + await this.modelsUsecases.startModel(modelId); + } + + async stopModel(modelId: string): Promise { + await this.getModelOrStop(modelId); + await this.modelsUsecases.stopModel(modelId); } - async run(input: string[]): Promise { - if (input.length < 1) { - return Promise.reject('Model ID is required'); + private async getModelOrStop(modelId: string): Promise { + const model = await this.modelsUsecases.findOne(modelId); + if (!model) { + console.debug('Model not found'); + exit(1); } + return model; + } + + async listAllModels(): Promise { + return this.modelsUsecases.findAll(); + } - const modelId = input[0]; + async getModel(modelId: string): Promise { + const model = await this.getModelOrStop(modelId); + return model; + } + + async removeModel(modelId: string) { + await this.getModelOrStop(modelId); + return this.modelsUsecases.remove(modelId); + } + + async pullModel(modelId: string, callback: (progress: number) => void) { if (modelId.includes('/')) { await this.pullHuggingFaceModel(modelId); } - const bar = new SingleBar({}, Presets.shades_classic); - bar.start(100, 0); - await this.modelsUsecases.downloadModel({ modelId }, (progress) => { - bar.update(progress); - }); - console.log('\nDownload complete!'); - process.exit(0); + await this.modelsUsecases.downloadModel(modelId, callback); } - async pullHuggingFaceModel(modelId: string) { + private async pullHuggingFaceModel(modelId: string) { const data = await this.fetchHuggingFaceRepoData(modelId); // TODO: add select options diff --git a/cortex-js/src/infrastructure/controllers/inference-settings.controller.spec.ts b/cortex-js/src/infrastructure/controllers/inference-settings.controller.spec.ts deleted file mode 100644 index 05097ddae..000000000 --- a/cortex-js/src/infrastructure/controllers/inference-settings.controller.spec.ts +++ /dev/null @@ -1,22 +0,0 @@ -import { Test, TestingModule } from '@nestjs/testing'; -import { InferenceSettingsController } from './inference-settings.controller'; -import { InferenceSettingsUsecases } from '@/usecases/inference-settings/inference-settings.usecases'; - -describe('InferenceSettingsController', () => { - let controller: InferenceSettingsController; - - beforeEach(async () => { - const module: TestingModule = await Test.createTestingModule({ - controllers: [InferenceSettingsController], - providers: [InferenceSettingsUsecases], - }).compile(); - - controller = module.get( - InferenceSettingsController, - ); - }); - - it('should be defined', () => { - expect(controller).toBeDefined(); - }); -}); diff --git a/cortex-js/src/infrastructure/controllers/inference-settings.controller.ts b/cortex-js/src/infrastructure/controllers/inference-settings.controller.ts deleted file mode 100644 index 805a4c1b3..000000000 --- a/cortex-js/src/infrastructure/controllers/inference-settings.controller.ts +++ /dev/null @@ -1,49 +0,0 @@ -import { - Controller, - Get, - Post, - Body, - Patch, - Param, - Delete, -} from '@nestjs/common'; -import { InferenceSettingsUsecases } from '@/usecases/inference-settings/inference-settings.usecases'; -import { CreateInferenceSettingDto } from '@/infrastructure/dtos/inference-settings/create-inference-setting.dto'; -import { UpdateInferenceSettingDto } from '@/infrastructure/dtos/inference-settings/update-inference-setting.dto'; -import { ApiTags } from '@nestjs/swagger'; - -@ApiTags('Inference Settings') -@Controller('inference-settings') -export class InferenceSettingsController { - constructor( - private readonly inferenceSettingsService: InferenceSettingsUsecases, - ) {} - - @Post() - create(@Body() createInferenceSettingDto: CreateInferenceSettingDto) { - return this.inferenceSettingsService.create(createInferenceSettingDto); - } - - @Get() - findAll() { - return this.inferenceSettingsService.findAll(); - } - - @Get(':id') - findOne(@Param('id') id: string) { - return this.inferenceSettingsService.findOne(id); - } - - @Patch(':id') - update( - @Param('id') id: string, - @Body() updateInferenceSettingDto: UpdateInferenceSettingDto, - ) { - return this.inferenceSettingsService.update(id, updateInferenceSettingDto); - } - - @Delete(':id') - remove(@Param('id') id: string) { - return this.inferenceSettingsService.remove(id); - } -} diff --git a/cortex-js/src/infrastructure/controllers/models.controller.ts b/cortex-js/src/infrastructure/controllers/models.controller.ts index 907349fc3..2983d1c31 100644 --- a/cortex-js/src/infrastructure/controllers/models.controller.ts +++ b/cortex-js/src/infrastructure/controllers/models.controller.ts @@ -12,53 +12,66 @@ import { ModelsUsecases } from '@/usecases/models/models.usecases'; import { CreateModelDto } from '@/infrastructure/dtos/models/create-model.dto'; import { UpdateModelDto } from '@/infrastructure/dtos/models/update-model.dto'; import { ApiResponse, ApiTags } from '@nestjs/swagger'; -import { LoadModelSuccessDto } from '@/infrastructure/dtos/models/load-model-success.dto'; -import { LoadModelDto } from '@/infrastructure/dtos/models/load-model.dto'; -import { DownloadModelDto } from '@/infrastructure/dtos/models/download-model.dto'; +import { StartModelSuccessDto } from '@/infrastructure/dtos/models/start-model-success.dto'; +import { ModelSettingParamsDto } from '../dtos/models/model-setting-params.dto'; @ApiTags('Models') @Controller('models') export class ModelsController { - constructor(private readonly modelsService: ModelsUsecases) {} + constructor(private readonly modelsUsecases: ModelsUsecases) {} @Post() create(@Body() createModelDto: CreateModelDto) { - return this.modelsService.create(createModelDto); + return this.modelsUsecases.create(createModelDto); } @HttpCode(200) @ApiResponse({ status: 200, - description: 'The model has been loaded successfully.', - type: LoadModelSuccessDto, + description: 'The model has been started successfully.', + type: StartModelSuccessDto, }) - @Post('load') - load(@Body() loadModelDto: LoadModelDto) { - return this.modelsService.startModel(loadModelDto); + @Post(':modelId/start') + startModel( + @Param('modelId') modelId: string, + @Body() settings: ModelSettingParamsDto, + ) { + return this.modelsUsecases.startModel(modelId, settings); } - @Post('download') - downloadModel(@Body() downloadModelDto: DownloadModelDto) { - return this.modelsService.downloadModel(downloadModelDto); + @HttpCode(200) + @ApiResponse({ + status: 200, + description: 'The model has been stopped successfully.', + type: StartModelSuccessDto, + }) + @Post(':modelId/stop') + stopModel(@Param('modelId') modelId: string) { + return this.modelsUsecases.stopModel(modelId); + } + + @Get('download/:modelId') + downloadModel(@Param('modelId') modelId: string) { + return this.modelsUsecases.downloadModel(modelId); } @Get() findAll() { - return this.modelsService.findAll(); + return this.modelsUsecases.findAll(); } @Get(':id') findOne(@Param('id') id: string) { - return this.modelsService.findOne(id); + return this.modelsUsecases.findOne(id); } @Patch(':id') update(@Param('id') id: string, @Body() updateModelDto: UpdateModelDto) { - return this.modelsService.update(id, updateModelDto); + return this.modelsUsecases.update(id, updateModelDto); } @Delete(':id') remove(@Param('id') id: string) { - return this.modelsService.remove(id); + return this.modelsUsecases.remove(id); } } diff --git a/cortex-js/src/infrastructure/database/database.module.ts b/cortex-js/src/infrastructure/database/database.module.ts index ace7da1c9..016066219 100644 --- a/cortex-js/src/infrastructure/database/database.module.ts +++ b/cortex-js/src/infrastructure/database/database.module.ts @@ -4,7 +4,6 @@ import { sqliteDatabaseProviders } from './sqlite-database.providers'; import { modelProviders } from './providers/model.providers'; import { assistantProviders } from './providers/assistant.providers'; import { messageProviders } from './providers/message.providers'; -import { inferenceSettingProviders } from './providers/inference-setting.providers'; @Module({ providers: [ @@ -13,14 +12,12 @@ import { inferenceSettingProviders } from './providers/inference-setting.provide ...modelProviders, ...assistantProviders, ...messageProviders, - ...inferenceSettingProviders, ], exports: [ ...threadProviders, ...modelProviders, ...assistantProviders, ...messageProviders, - ...inferenceSettingProviders, ], }) export class DatabaseModule {} diff --git a/cortex-js/src/infrastructure/database/providers/inference-setting.providers.ts b/cortex-js/src/infrastructure/database/providers/inference-setting.providers.ts deleted file mode 100644 index 621d25fd8..000000000 --- a/cortex-js/src/infrastructure/database/providers/inference-setting.providers.ts +++ /dev/null @@ -1,11 +0,0 @@ -import { InferenceSettingEntity } from '@/infrastructure/entities/inference-setting.entity'; -import { DataSource } from 'typeorm'; - -export const inferenceSettingProviders = [ - { - provide: 'INFERENCE_SETTING_REPOSITORY', - useFactory: (dataSource: DataSource) => - dataSource.getRepository(InferenceSettingEntity), - inject: ['DATA_SOURCE'], - }, -]; diff --git a/cortex-js/src/infrastructure/dtos/cortex/start-cortex.dto.ts b/cortex-js/src/infrastructure/dtos/cortex/start-cortex.dto.ts index 12b9e6d2f..4745a3b3d 100644 --- a/cortex-js/src/infrastructure/dtos/cortex/start-cortex.dto.ts +++ b/cortex-js/src/infrastructure/dtos/cortex/start-cortex.dto.ts @@ -1,10 +1,24 @@ -import { IsIP, IsString } from 'class-validator'; +import { ApiProperty } from '@nestjs/swagger'; +import { IsIP, IsNumber, IsString, Max, Min } from 'class-validator'; +import { defaultCortexCppHost, defaultCortexCppPort } from 'constant'; export class StartCortexDto { + @ApiProperty({ + name: 'host', + description: 'Cortexcpp host', + default: defaultCortexCppHost, + }) @IsString() @IsIP() host: string; - @IsString() - port: string; + @ApiProperty({ + name: 'port', + description: 'Cortexcpp port', + default: defaultCortexCppPort, + }) + @IsNumber() + @Min(0) + @Max(65535) + port: number; } diff --git a/cortex-js/src/infrastructure/dtos/inference-settings/controller-props.dto.ts b/cortex-js/src/infrastructure/dtos/inference-settings/controller-props.dto.ts deleted file mode 100644 index 15c4601f3..000000000 --- a/cortex-js/src/infrastructure/dtos/inference-settings/controller-props.dto.ts +++ /dev/null @@ -1,14 +0,0 @@ -import { IsOptional, IsString } from 'class-validator'; -import { ControllerProps } from '@/domain/models/inference-setting.interface'; - -export class ControllerPropsDto implements ControllerProps { - @IsString() - placeholder: string; - - @IsString() - value: string; - - @IsOptional() - @IsString() - type?: string; -} diff --git a/cortex-js/src/infrastructure/dtos/inference-settings/create-inference-setting.dto.ts b/cortex-js/src/infrastructure/dtos/inference-settings/create-inference-setting.dto.ts deleted file mode 100644 index ba34ec81c..000000000 --- a/cortex-js/src/infrastructure/dtos/inference-settings/create-inference-setting.dto.ts +++ /dev/null @@ -1,14 +0,0 @@ -import { Type } from 'class-transformer'; -import { IsArray, IsString, ValidateNested } from 'class-validator'; -import { InferenceSetting } from '@/domain/models/inference-setting.interface'; -import { InferenceSettingDocumentDto } from './inference-setting-document.dto'; - -export class CreateInferenceSettingDto implements Partial { - @IsString() - inferenceId: string; - - @IsArray() - @ValidateNested({ each: true }) - @Type(() => InferenceSettingDocumentDto) - settings: InferenceSettingDocumentDto[]; -} diff --git a/cortex-js/src/infrastructure/dtos/inference-settings/inference-setting-document.dto.ts b/cortex-js/src/infrastructure/dtos/inference-settings/inference-setting-document.dto.ts deleted file mode 100644 index bca718c6a..000000000 --- a/cortex-js/src/infrastructure/dtos/inference-settings/inference-setting-document.dto.ts +++ /dev/null @@ -1,23 +0,0 @@ -import { IsString, ValidateNested } from 'class-validator'; -import { InferenceSettingDocument } from '@/domain/models/inference-setting.interface'; -import { ControllerPropsDto } from './controller-props.dto'; - -export class InferenceSettingDocumentDto implements InferenceSettingDocument { - @IsString() - key: string; - - @IsString() - extensionName: string; - - @IsString() - title: string; - - @IsString() - description: string; - - @IsString() - controllerType: string; - - @ValidateNested() - controllerProps: ControllerPropsDto; -} diff --git a/cortex-js/src/infrastructure/dtos/inference-settings/update-inference-setting.dto.ts b/cortex-js/src/infrastructure/dtos/inference-settings/update-inference-setting.dto.ts deleted file mode 100644 index 026dffded..000000000 --- a/cortex-js/src/infrastructure/dtos/inference-settings/update-inference-setting.dto.ts +++ /dev/null @@ -1,4 +0,0 @@ -import { PartialType } from '@nestjs/swagger'; -import { CreateInferenceSettingDto } from './create-inference-setting.dto'; - -export class UpdateInferenceSettingDto extends PartialType(CreateInferenceSettingDto) {} diff --git a/cortex-js/src/infrastructure/dtos/messages/thread-content.dto.ts b/cortex-js/src/infrastructure/dtos/messages/thread-content.dto.ts index 8e062893d..a8d6b3337 100644 --- a/cortex-js/src/infrastructure/dtos/messages/thread-content.dto.ts +++ b/cortex-js/src/infrastructure/dtos/messages/thread-content.dto.ts @@ -1,8 +1,5 @@ import { IsEnum, ValidateNested } from 'class-validator'; -import { - ContentType, - ThreadContent, -} from '@/domain/models/message.interface'; +import { ContentType, ThreadContent } from '@/domain/models/message.interface'; import { ContentValueDto } from './content-value.dto'; export class ThreadContentDto implements ThreadContent { diff --git a/cortex-js/src/infrastructure/dtos/models/download-model.dto.ts b/cortex-js/src/infrastructure/dtos/models/download-model.dto.ts deleted file mode 100644 index 46ea3c12d..000000000 --- a/cortex-js/src/infrastructure/dtos/models/download-model.dto.ts +++ /dev/null @@ -1,6 +0,0 @@ -import { IsString } from 'class-validator'; - -export class DownloadModelDto { - @IsString() - modelId: string; -} diff --git a/cortex-js/src/infrastructure/dtos/models/load-model.dto.ts b/cortex-js/src/infrastructure/dtos/models/load-model.dto.ts deleted file mode 100644 index 5aaa07194..000000000 --- a/cortex-js/src/infrastructure/dtos/models/load-model.dto.ts +++ /dev/null @@ -1,11 +0,0 @@ -import { IsOptional, IsString, ValidateNested } from 'class-validator'; -import { ModelSettingParamsDto } from './model-setting-params.dto'; - -export class LoadModelDto { - @IsString() - modelId: string; - - @IsOptional() - @ValidateNested() - settings?: ModelSettingParamsDto; -} diff --git a/cortex-js/src/infrastructure/dtos/models/load-model-success.dto.ts b/cortex-js/src/infrastructure/dtos/models/start-model-success.dto.ts similarity index 76% rename from cortex-js/src/infrastructure/dtos/models/load-model-success.dto.ts rename to cortex-js/src/infrastructure/dtos/models/start-model-success.dto.ts index e604e80b9..01c0bba05 100644 --- a/cortex-js/src/infrastructure/dtos/models/load-model-success.dto.ts +++ b/cortex-js/src/infrastructure/dtos/models/start-model-success.dto.ts @@ -1,6 +1,6 @@ import { IsString } from 'class-validator'; -export class LoadModelSuccessDto { +export class StartModelSuccessDto { @IsString() message: string; diff --git a/cortex-js/src/infrastructure/entities/inference-setting.entity.ts b/cortex-js/src/infrastructure/entities/inference-setting.entity.ts deleted file mode 100644 index 9b6d0cb41..000000000 --- a/cortex-js/src/infrastructure/entities/inference-setting.entity.ts +++ /dev/null @@ -1,14 +0,0 @@ -import { - InferenceSetting, - InferenceSettingDocument, -} from '@/domain/models/inference-setting.interface'; -import { Column, Entity, PrimaryColumn } from 'typeorm'; - -@Entity('inference_setting') -export class InferenceSettingEntity implements InferenceSetting { - @PrimaryColumn() - inferenceId: string; - - @Column({ type: 'simple-json' }) - settings: InferenceSettingDocument[]; -} diff --git a/cortex-js/src/infrastructure/providers/cortex/cortex.provider.ts b/cortex-js/src/infrastructure/providers/cortex/cortex.provider.ts index 43b78410f..562e989b2 100644 --- a/cortex-js/src/infrastructure/providers/cortex/cortex.provider.ts +++ b/cortex-js/src/infrastructure/providers/cortex/cortex.provider.ts @@ -1,26 +1,25 @@ import { Injectable } from '@nestjs/common'; import { OAIEngineExtension } from '@/domain/abstracts/oai.abstract'; import { PromptTemplate } from '@/domain/models/prompt-template.interface'; -import { basename, join, resolve } from 'path'; -import { Model } from '@/domain/models/model.interface'; +import { join, resolve } from 'path'; +import { Model, ModelSettingParams } from '@/domain/models/model.interface'; import { ConfigService } from '@nestjs/config'; import { HttpService } from '@nestjs/axios'; +import { defaultCortexCppHost, defaultCortexCppPort } from 'constant'; +import { readdirSync } from 'node:fs'; /** * A class that implements the InferenceExtension interface from the @janhq/core package. * The class provides methods for initializing and stopping a model, and for making inference requests. * It also subscribes to events emitted by the @janhq/core package and handles new message requests. */ -const LOCAL_HOST = '127.0.0.1'; -const NITRO_DEFAULT_PORT = 3928; -const NITRO_HTTP_SERVER_URL = `http://${LOCAL_HOST}:${NITRO_DEFAULT_PORT}`; -const LOAD_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/server/loadmodel`; -const UNLOAD_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/server/unloadmodel`; - @Injectable() export default class CortexProvider extends OAIEngineExtension { provider: string = 'cortex'; - apiUrl = 'http://127.0.0.1:3928/inferences/server/chat_completion'; + apiUrl = `http://${defaultCortexCppHost}:${defaultCortexCppPort}/inferences/server/chat_completion`; + + private loadModelUrl = `http://${defaultCortexCppHost}:${defaultCortexCppPort}/inferences/server/loadmodel`; + private unloadModelUrl = `http://${defaultCortexCppHost}:${defaultCortexCppPort}/inferences/server/unloadmodel`; constructor( private readonly configService: ConfigService, @@ -29,19 +28,24 @@ export default class CortexProvider extends OAIEngineExtension { super(httpService); } - override async loadModel(model: Model): Promise { + override async loadModel( + model: Model, + settings?: ModelSettingParams, + ): Promise { const modelsContainerDir = this.configService.get('CORTEX_MODELS_DIR') ?? resolve('./models'); const modelFolderFullPath = join(modelsContainerDir, model.id); - //TODO: recheck this - const modelBinaryLocalPath = join( - modelFolderFullPath, - basename(model.sources[0].url), - ); + const ggufFiles = readdirSync(modelFolderFullPath).filter((file) => { + return file.endsWith('.gguf'); + }); + + if (ggufFiles.length === 0) { + throw new Error('Model binary not found'); + } - // TODO: NamH check if the binary is there + const modelBinaryLocalPath = join(modelFolderFullPath, ggufFiles[0]); const cpuThreadCount = 1; // TODO: NamH Math.max(1, nitroResourceProbe.numCpuPhysicalCore); const modelSettings = { @@ -49,6 +53,7 @@ export default class CortexProvider extends OAIEngineExtension { model: model.id, cpu_threads: cpuThreadCount, ...model.settings, + ...settings, llama_model_path: modelBinaryLocalPath, ...(model.settings.mmproj && { mmproj: join(modelFolderFullPath, model.settings.mmproj), @@ -67,12 +72,12 @@ export default class CortexProvider extends OAIEngineExtension { modelSettings.ai_prompt = prompt.ai_prompt; } - await this.httpService.post(LOAD_MODEL_URL, modelSettings).toPromise(); + await this.httpService.post(this.loadModelUrl, modelSettings).toPromise(); } override async unloadModel(modelId: string): Promise { await this.httpService - .post(UNLOAD_MODEL_URL, { model: modelId }) + .post(this.unloadModelUrl, { model: modelId }) .toPromise(); } diff --git a/cortex-js/src/usecases/cortex/cortex.usecases.ts b/cortex-js/src/usecases/cortex/cortex.usecases.ts index e08b226d6..ad56f37af 100644 --- a/cortex-js/src/usecases/cortex/cortex.usecases.ts +++ b/cortex-js/src/usecases/cortex/cortex.usecases.ts @@ -3,6 +3,7 @@ import { ChildProcess, spawn } from 'child_process'; import { join } from 'path'; import { CortexOperationSuccessfullyDto } from '@/infrastructure/dtos/cortex/cortex-operation-successfully.dto'; import { HttpService } from '@nestjs/axios'; +import { defaultCortexCppHost, defaultCortexCppPort } from 'constant'; @Injectable() export class CortexUsecases { @@ -11,8 +12,8 @@ export class CortexUsecases { constructor(private readonly httpService: HttpService) {} async startCortex( - host: string, - port: string, + host: string = defaultCortexCppHost, + port: number = defaultCortexCppPort, ): Promise { if (this.cortexProcess) { return { @@ -21,9 +22,9 @@ export class CortexUsecases { }; } - const args: string[] = ['1', host, port]; + const args: string[] = ['1', host, `${port}`]; + // go up one level to get the binary folder, have to also work on windows - // const binaryFolder = join(binaryPath, '..'); this.cortexProcess = spawn( join(__dirname, '../../../cortex-cpp/cortex-cpp'), args, @@ -34,7 +35,7 @@ export class CortexUsecases { env: { ...process.env, // TODO: NamH need to get below information - // CUDA_VISIBLE_DEVICES: executableOptions.cudaVisibleDevices, + CUDA_VISIBLE_DEVICES: '0', // // Vulkan - Support 1 device at a time for now // ...(executableOptions.vkVisibleDevices?.length > 0 && { // GGML_VULKAN_DEVICE: executableOptions.vkVisibleDevices[0], diff --git a/cortex-js/src/usecases/inference-settings/inference-settings.module.ts b/cortex-js/src/usecases/inference-settings/inference-settings.module.ts deleted file mode 100644 index d7ca6d05e..000000000 --- a/cortex-js/src/usecases/inference-settings/inference-settings.module.ts +++ /dev/null @@ -1,12 +0,0 @@ -import { Module } from '@nestjs/common'; -import { InferenceSettingsUsecases } from './inference-settings.usecases'; -import { InferenceSettingsController } from '@/infrastructure/controllers/inference-settings.controller'; -import { DatabaseModule } from '@/infrastructure/database/database.module'; - -@Module({ - imports: [DatabaseModule], - controllers: [InferenceSettingsController], - providers: [InferenceSettingsUsecases], - exports: [InferenceSettingsUsecases], -}) -export class InferenceSettingsModule {} diff --git a/cortex-js/src/usecases/inference-settings/inference-settings.usecases.spec.ts b/cortex-js/src/usecases/inference-settings/inference-settings.usecases.spec.ts deleted file mode 100644 index a47dd23b2..000000000 --- a/cortex-js/src/usecases/inference-settings/inference-settings.usecases.spec.ts +++ /dev/null @@ -1,18 +0,0 @@ -import { Test, TestingModule } from '@nestjs/testing'; -import { InferenceSettingsUsecases } from './inference-settings.usecases'; - -describe('InferenceSettingsService', () => { - let service: InferenceSettingsUsecases; - - beforeEach(async () => { - const module: TestingModule = await Test.createTestingModule({ - providers: [InferenceSettingsUsecases], - }).compile(); - - service = module.get(InferenceSettingsUsecases); - }); - - it('should be defined', () => { - expect(service).toBeDefined(); - }); -}); diff --git a/cortex-js/src/usecases/inference-settings/inference-settings.usecases.ts b/cortex-js/src/usecases/inference-settings/inference-settings.usecases.ts deleted file mode 100644 index 8c512aa72..000000000 --- a/cortex-js/src/usecases/inference-settings/inference-settings.usecases.ts +++ /dev/null @@ -1,38 +0,0 @@ -import { Inject, Injectable } from '@nestjs/common'; -import { CreateInferenceSettingDto } from '@/infrastructure/dtos/inference-settings/create-inference-setting.dto'; -import { UpdateInferenceSettingDto } from '@/infrastructure/dtos/inference-settings/update-inference-setting.dto'; -import { Repository } from 'typeorm'; -import { InferenceSettingEntity } from '@/infrastructure/entities/inference-setting.entity'; - -@Injectable() -export class InferenceSettingsUsecases { - constructor( - @Inject('INFERENCE_SETTING_REPOSITORY') - private inferenceSettingRepository: Repository, - ) {} - - create(createInferenceSettingDto: CreateInferenceSettingDto) { - return this.inferenceSettingRepository.insert(createInferenceSettingDto); - } - - findAll() { - return this.inferenceSettingRepository.find(); - } - - findOne(id: string) { - return this.inferenceSettingRepository.findOne({ - where: { inferenceId: id }, - }); - } - - update(id: string, updateInferenceSettingDto: UpdateInferenceSettingDto) { - return this.inferenceSettingRepository.update( - id, - updateInferenceSettingDto, - ); - } - - remove(id: string) { - return this.inferenceSettingRepository.delete(id); - } -} diff --git a/cortex-js/src/usecases/models/models.usecases.ts b/cortex-js/src/usecases/models/models.usecases.ts index c76beeb3a..0a468ebb6 100644 --- a/cortex-js/src/usecases/models/models.usecases.ts +++ b/cortex-js/src/usecases/models/models.usecases.ts @@ -13,13 +13,12 @@ import { mkdirSync, rmdirSync, } from 'fs'; -import { LoadModelSuccessDto } from '@/infrastructure/dtos/models/load-model-success.dto'; -import { LoadModelDto } from '@/infrastructure/dtos/models/load-model.dto'; -import { DownloadModelDto } from '@/infrastructure/dtos/models/download-model.dto'; +import { StartModelSuccessDto } from '@/infrastructure/dtos/models/start-model-success.dto'; import { ConfigService } from '@nestjs/config'; import { ExtensionRepository } from '@/domain/repositories/extension.interface'; import { EngineExtension } from '@/domain/abstracts/engine.abstract'; import { HttpService } from '@nestjs/axios'; +import { ModelSettingParamsDto } from '@/infrastructure/dtos/models/model-setting-params.dto'; @Injectable() export class ModelsUsecases { @@ -86,8 +85,11 @@ export class ModelsUsecases { }); } - async startModel(loadModelDto: LoadModelDto): Promise { - const model = await this.getModelOrThrow(loadModelDto.modelId); + async startModel( + modelId: string, + settings?: ModelSettingParamsDto, + ): Promise { + const model = await this.getModelOrThrow(modelId); const extensions = (await this.extensionRepository.findAll()) ?? []; const engine = extensions.find((e: any) => e.provider === model?.engine) as | EngineExtension @@ -96,27 +98,28 @@ export class ModelsUsecases { if (!engine) { return { message: 'No extension handler found for model', - modelId: loadModelDto.modelId, + modelId: modelId, }; } return engine - .loadModel(model) + .loadModel(model, settings) .then(() => { return { message: 'Model loaded successfully', - modelId: loadModelDto.modelId, + modelId: modelId, }; }) .catch((err) => { console.error(err); return { message: 'Model failed to load', - modelId: loadModelDto.modelId, + modelId: modelId, }; }); } - async stopModel(modelId: string): Promise { + + async stopModel(modelId: string): Promise { const model = await this.getModelOrThrow(modelId); const extensions = (await this.extensionRepository.findAll()) ?? []; const engine = extensions.find((e: any) => e.provider === model?.engine) as @@ -147,11 +150,8 @@ export class ModelsUsecases { }); } - async downloadModel( - downloadModelDto: DownloadModelDto, - callback?: (progress: number) => void, - ) { - const model = await this.getModelOrThrow(downloadModelDto.modelId); + async downloadModel(modelId: string, callback?: (progress: number) => void) { + const model = await this.getModelOrThrow(modelId); if (model.format === ModelFormat.API) { throw new BadRequestException('Cannot download remote model'); @@ -169,7 +169,7 @@ export class ModelsUsecases { this.configService.get('CORTEX_MODELS_DIR') ?? './models'; if (!existsSync(modelsContainerDir)) { - await mkdirSync(modelsContainerDir, { recursive: true }); + mkdirSync(modelsContainerDir, { recursive: true }); } const modelFolder = join(modelsContainerDir, model.id);