From ab128f5dd2e0ec1af3d7df9e49d45b0a1d9ba5c0 Mon Sep 17 00:00:00 2001 From: Mark Nguyen Date: Mon, 26 Aug 2024 11:06:09 +0700 Subject: [PATCH] feat: support to start model by file path (#1026) --- cortex-js/package.json | 4 +-- .../src/domain/models/model.interface.ts | 5 +++ .../domain/repositories/model.interface.ts | 8 ++++- .../infrastructure/commanders/run.command.ts | 17 +++++++--- .../controllers/models.controller.ts | 31 ++++++++++++++++++ .../repositories/models/model.repository.ts | 32 +++++++++++++++++++ .../src/usecases/models/models.usecases.ts | 26 +++++++++------ 7 files changed, 107 insertions(+), 16 deletions(-) diff --git a/cortex-js/package.json b/cortex-js/package.json index 8acdac4d5..3cabf3381 100644 --- a/cortex-js/package.json +++ b/cortex-js/package.json @@ -13,7 +13,7 @@ "scripts": { "dev": "nest dev", "compile": "npx ncc build ./dist/src/command.js -o command", - "build": "nest build", + "build": "yarn add sqlite3 --build-from-source && nest build", "build:binary": "yarn build && yarn compile && npx -q patch-package && run-script-os", "build:binary:windows": "npx @yao-pkg/pkg . --targets node20-win", "build:binary:linux": "npx @yao-pkg/pkg . --targets node20-linux", @@ -58,7 +58,7 @@ "class-transformer": "^0.5.1", "class-validator": "^0.14.1", "cli-progress": "^3.12.0", - "cortex-cpp": "0.5.0-44", + "cortex-cpp": "0.5.0-46", "decompress": "^4.2.1", "hyllama": "^0.2.2", "js-yaml": "^4.1.0", diff --git a/cortex-js/src/domain/models/model.interface.ts b/cortex-js/src/domain/models/model.interface.ts index dac406634..51861b768 100644 --- a/cortex-js/src/domain/models/model.interface.ts +++ b/cortex-js/src/domain/models/model.interface.ts @@ -126,6 +126,11 @@ export interface ModelSettingParams { * Model type we want to use: llm or embedding, default value is llm (latest llama.cpp update) */ model_type?: string; + + /** + * The model path. + */ + model_path?: string; } /** diff --git a/cortex-js/src/domain/repositories/model.interface.ts b/cortex-js/src/domain/repositories/model.interface.ts index 97cc1b779..193af9a52 100644 --- a/cortex-js/src/domain/repositories/model.interface.ts +++ b/cortex-js/src/domain/repositories/model.interface.ts @@ -1,4 +1,10 @@ import { Model } from '@/domain/models/model.interface'; import { Repository } from './repository.interface'; -export abstract class ModelRepository extends Repository {} +export abstract class ModelRepository extends Repository { + abstract loadModelByFile( + modelId: string, + filePath: string, + modelFile: string, + ): Promise; +} diff --git a/cortex-js/src/infrastructure/commanders/run.command.ts b/cortex-js/src/infrastructure/commanders/run.command.ts index a0dcdcb84..3d30b6a16 100644 --- a/cortex-js/src/infrastructure/commanders/run.command.ts +++ b/cortex-js/src/infrastructure/commanders/run.command.ts @@ -5,7 +5,10 @@ import ora from 'ora'; import { existsSync } from 'fs'; import { join } from 'path'; import { Engines } from './types/engine.interface'; -import { checkModelCompatibility, checkRequiredVersion } from '@/utils/model-check'; +import { + checkModelCompatibility, + checkRequiredVersion, +} from '@/utils/model-check'; import { BaseCommand } from './base.command'; import { isRemoteEngine } from '@/utils/normalize-model-id'; import { ChatClient } from './services/chat-client'; @@ -98,9 +101,15 @@ export class RunCommand extends BaseCommand { await this.cortex.engines.init(engine); await downloadProgress(this.cortex, undefined, DownloadType.Engine); } - const { version: engineVersion } = await this.cortex.engines.retrieve(engine); - if(existingModel.engine_version && !checkRequiredVersion(existingModel.engine_version, engineVersion)) { - console.log(`Model engine version ${existingModel.engine_version} is not compatible with engine version ${engineVersion}`); + const { version: engineVersion } = + await this.cortex.engines.retrieve(engine); + if ( + existingModel.engine_version && + !checkRequiredVersion(existingModel.engine_version, engineVersion) + ) { + console.log( + `Model engine version ${existingModel.engine_version} is not compatible with engine version ${engineVersion}`, + ); process.exit(1); } diff --git a/cortex-js/src/infrastructure/controllers/models.controller.ts b/cortex-js/src/infrastructure/controllers/models.controller.ts index c4e73e40f..a2f2ac80c 100644 --- a/cortex-js/src/infrastructure/controllers/models.controller.ts +++ b/cortex-js/src/infrastructure/controllers/models.controller.ts @@ -75,6 +75,37 @@ export class ModelsController { return this.modelsUsecases.startModel(modelId, params); } + @HttpCode(200) + @ApiResponse({ + status: 200, + description: 'The model has been successfully started.', + type: StartModelSuccessDto, + }) + @ApiOperation({ + summary: 'Start model by file path', + description: + 'Starts a model operation defined by a model `id` with a file path.', + }) + @ApiParam({ + name: 'modelId', + required: true, + description: 'The unique identifier of the model.', + }) + @Post(':modelId(*)/start-by-file') + startModelByFilePath( + @Param('modelId') modelId: string, + @Body() + params: ModelSettingsDto & { filePath: string; metadataPath: string }, + ) { + const { filePath, metadataPath, ...settings } = params; + return this.modelsUsecases.startModel( + modelId, + settings, + filePath, + metadataPath, + ); + } + @HttpCode(200) @ApiResponse({ status: 200, diff --git a/cortex-js/src/infrastructure/repositories/models/model.repository.ts b/cortex-js/src/infrastructure/repositories/models/model.repository.ts index c2625d3c9..c2f9fdc04 100644 --- a/cortex-js/src/infrastructure/repositories/models/model.repository.ts +++ b/cortex-js/src/infrastructure/repositories/models/model.repository.ts @@ -159,4 +159,36 @@ export class ModelRepositoryImpl implements ModelRepository { this.loaded = true; return Array.from(this.models.values()); } + + /** + * Load a model by file + * This would load a model from a file + * @returns the model + */ + async loadModelByFile( + modelId: string, + modelPath: string, + modelFile: string, + ): Promise { + const checkExists = await this.findOne(modelId); + if (checkExists) return checkExists; + if (!existsSync(modelPath)) return null; + + const model = readFileSync(modelPath, 'utf8'); + const yamlObject = load(model) as Model; + const fileName = basename(modelId); + const modelObject = { + ...yamlObject, + model: modelId, + llama_model_path: modelFile, + model_path: modelFile, + files: [modelFile], + }; + if (modelObject) { + this.fileModel.set(modelId, fileName); + this.models.set(modelId, modelObject); + } + this.loaded = true; + return modelObject; + } } diff --git a/cortex-js/src/usecases/models/models.usecases.ts b/cortex-js/src/usecases/models/models.usecases.ts index 260c966c1..e5a9a4b37 100644 --- a/cortex-js/src/usecases/models/models.usecases.ts +++ b/cortex-js/src/usecases/models/models.usecases.ts @@ -16,13 +16,8 @@ import { TelemetryUsecases } from '../telemetry/telemetry.usecases'; import { TelemetrySource } from '@/domain/telemetry/telemetry.interface'; import { ModelRepository } from '@/domain/repositories/model.interface'; import { ModelParameterParser } from '@/utils/model-parameter.parser'; -import { - HuggingFaceRepoSibling, -} from '@/domain/models/huggingface.interface'; -import { - fetchJanRepoData, - getHFModelMetadata, -} from '@/utils/huggingface'; +import { HuggingFaceRepoSibling } from '@/domain/models/huggingface.interface'; +import { fetchJanRepoData, getHFModelMetadata } from '@/utils/huggingface'; import { DownloadStatus, DownloadType, @@ -162,8 +157,22 @@ export class ModelsUsecases { async startModel( modelId: string, settings?: ModelSettingParams, + filePath?: string, + metadataPath?: string, ): Promise { - const model = await this.getModelOrThrow(modelId); + let model: Model | null; + if (filePath) { + model = await this.modelRepository.loadModelByFile( + modelId, + metadataPath!, + filePath, + ); + if (!existsSync(filePath) || !model) { + throw new ModelNotFoundException(model?.id ?? filePath); + } + } else { + model = await this.getModelOrThrow(modelId); + } const engine = (await this.extensionRepository.findOne( model!.engine ?? Engines.llamaCPP, )) as EngineExtension | undefined; @@ -209,7 +218,6 @@ export class ModelsUsecases { ...parser.parseModelEngineSettings(model), ...parser.parseModelEngineSettings(settings ?? {}), }; - return engine .loadModel(model, loadModelSettings) .catch((e) => {