diff --git a/cortex-cpp/addon.cc b/cortex-cpp/addon.cc index 0184baeff..3669d71e0 100644 --- a/cortex-cpp/addon.cc +++ b/cortex-cpp/addon.cc @@ -24,10 +24,9 @@ static Napi::Env* s_env = nullptr; -void start() { +void start(const int port = 3929) { int thread_num = 1; std::string host = "127.0.0.1"; - int port = 3929; std::string uploads_folder_path; int logical_cores = std::thread::hardware_concurrency(); int drogon_thread_num = std::max(thread_num, logical_cores); @@ -66,7 +65,11 @@ Napi::Value Start(const Napi::CallbackInfo& info) { // Register exitCallback with atexit std::atexit(exitCallback); - start(); + + Napi::Number jsParam = info[0].As(); + int port = jsParam.Int32Value(); + + start(port); return env.Undefined(); } @@ -82,4 +85,4 @@ Napi::Object Init(Napi::Env env, Napi::Object exports) { return exports; } -NODE_API_MODULE(cortex-cpp, Init) \ No newline at end of file +NODE_API_MODULE(cortex-cpp, Init) diff --git a/cortex-cpp/binding/index.d.ts b/cortex-cpp/binding/index.d.ts index 9d600d31d..a98421ac9 100644 --- a/cortex-cpp/binding/index.d.ts +++ b/cortex-cpp/binding/index.d.ts @@ -2,6 +2,6 @@ /// declare module "cortex-cpp" { - export function start(); + export function start(port?: number); export function stop(); } diff --git a/cortex-js/src/infrastructure/commanders/cortex-command.commander.ts b/cortex-js/src/infrastructure/commanders/cortex-command.commander.ts index 9dde645e6..533143828 100644 --- a/cortex-js/src/infrastructure/commanders/cortex-command.commander.ts +++ b/cortex-js/src/infrastructure/commanders/cortex-command.commander.ts @@ -13,7 +13,11 @@ import { BenchmarkCommand } from './benchmark.command'; import chalk from 'chalk'; import { ContextService } from '../services/context/context.service'; import { EnginesCommand } from './engines.command'; -import { defaultCortexJsHost, defaultCortexJsPort } from '../constants/cortex'; +import { + defaultCortexCppPort, + defaultCortexJsHost, + defaultCortexJsPort, +} from '../constants/cortex'; import { getApp } from '@/app'; import { fileManagerService } from '../services/file-manager/file-manager.service'; import { CortexUsecases } from '@/usecases/cortex/cortex.usecases'; @@ -28,6 +32,7 @@ type ServeOptions = { dataFolder?: string; version?: boolean; name?: string; + enginePort?: string; }; @RootCommand({ @@ -53,6 +58,7 @@ export class CortexCommand extends CommandRunner { port: number; configHost: string; configPort: number; + enginePort: number; constructor( readonly contextService: ContextService, readonly cortexUseCases: CortexUsecases, @@ -70,12 +76,14 @@ export class CortexCommand extends CommandRunner { ...fileManagerService.defaultConfig(), apiServerHost: options?.address || defaultCortexJsHost, apiServerPort: options?.port || defaultCortexJsPort, + cortexCppPort: Number(options?.enginePort) || defaultCortexCppPort, }); } } const { apiServerHost: configApiServerHost, apiServerPort: configApiServerPort, + cortexCppPort: configCortexCppPort, } = await fileManagerService.getConfig(); this.configHost = configApiServerHost || defaultCortexJsHost; @@ -83,6 +91,10 @@ export class CortexCommand extends CommandRunner { this.host = options?.address || configApiServerHost || defaultCortexJsHost; this.port = options?.port || configApiServerPort || defaultCortexJsPort; + this.enginePort = + Number(options?.enginePort) || + configCortexCppPort || + defaultCortexCppPort; const showLogs = options?.logs || false; const showVersion = options?.version || false; const dataFolderPath = options?.dataFolder; @@ -140,6 +152,7 @@ export class CortexCommand extends CommandRunner { apiServerHost: this.host, apiServerPort: this.port, dataFolderPath: dataFolderPath || config.dataFolderPath, + cortexCppPort: this.enginePort, }); if (!attach) process.exit(0); } catch (e) { @@ -178,7 +191,7 @@ export class CortexCommand extends CommandRunner { } @Option({ - flags: '--dataFolder ', + flags: '-df, --dataFolder ', description: 'Set the data folder directory', }) parseDataFolder(value: string) { @@ -192,6 +205,7 @@ export class CortexCommand extends CommandRunner { parseVersion() { return true; } + @Option({ flags: '-n, --name ', description: 'Name of the process', @@ -200,4 +214,12 @@ export class CortexCommand extends CommandRunner { fileManagerService.setConfigProfile(value); return value; } + + @Option({ + flags: '-ep, --engine-port ', + description: 'Port to serve the engine', + }) + parseEnginePort(value: string) { + return value; + } } diff --git a/cortex-js/src/infrastructure/providers/cortex/cortex.provider.ts b/cortex-js/src/infrastructure/providers/cortex/cortex.provider.ts index fd45b1fcd..810aa8b92 100644 --- a/cortex-js/src/infrastructure/providers/cortex/cortex.provider.ts +++ b/cortex-js/src/infrastructure/providers/cortex/cortex.provider.ts @@ -165,6 +165,12 @@ export default class CortexProvider extends OAIEngineExtension { return { error: 'Cannot split prompt template' }; }; + public setUrls(host: string, port: number): void { + this.apiUrl = `http://${host}:${port}/inferences/server/chat_completion`; + this.loadModelUrl = `http://${host}:${port}/inferences/server/loadmodel`; + this.unloadModelUrl = `http://${host}:${port}/inferences/server/unloadmodel`; + } + private persistEngineVersion = async () => { const versionFilePath = join( await fileManagerService.getCortexCppEnginePath(), diff --git a/cortex-js/src/infrastructure/repositories/extensions/extension.repository.ts b/cortex-js/src/infrastructure/repositories/extensions/extension.repository.ts index ce43b068e..86ddef9d9 100644 --- a/cortex-js/src/infrastructure/repositories/extensions/extension.repository.ts +++ b/cortex-js/src/infrastructure/repositories/extensions/extension.repository.ts @@ -85,7 +85,10 @@ export class ExtensionRepositoryImpl implements ExtensionRepository { } private async loadCoreExtensions() { + const { cortexCppPort, cortexCppHost } = + await fileManagerService.getConfig(); const llamaCPPEngine = new LlamaCPPProvider(this.httpService); + llamaCPPEngine.setUrls(cortexCppHost, cortexCppPort); llamaCPPEngine.status = existsSync( join(await fileManagerService.getCortexCppEnginePath(), Engines.llamaCPP), ) @@ -93,6 +96,7 @@ export class ExtensionRepositoryImpl implements ExtensionRepository { : EngineStatus.NOT_INITIALIZED; const onnxEngine = new Onnxprovider(this.httpService); + onnxEngine.setUrls(cortexCppHost, cortexCppPort); onnxEngine.status = existsSync( join(await fileManagerService.getCortexCppEnginePath(), Engines.onnx), @@ -103,6 +107,7 @@ export class ExtensionRepositoryImpl implements ExtensionRepository { : EngineStatus.NOT_INITIALIZED; const tensorrtLLMEngine = new TensorrtLLMProvider(this.httpService); + onnxEngine.setUrls(cortexCppHost, cortexCppPort); tensorrtLLMEngine.status = existsSync( join( diff --git a/cortex-js/src/usecases/cortex/cortex.usecases.ts b/cortex-js/src/usecases/cortex/cortex.usecases.ts index 3e75af1aa..758c5892b 100644 --- a/cortex-js/src/usecases/cortex/cortex.usecases.ts +++ b/cortex-js/src/usecases/cortex/cortex.usecases.ts @@ -62,6 +62,7 @@ export class CortexUsecases implements BeforeApplicationShutdown { delimiter, engineDir, ), + CORTEX_CPP_PORT: port.toString(), // // Vulkan - Support 1 device at a time for now // ...(executableOptions.vkVisibleDevices?.length > 0 && { // GGML_VULKAN_DEVICE: executableOptions.vkVisibleDevices[0], diff --git a/cortex-js/src/utils/cortex-cpp.ts b/cortex-js/src/utils/cortex-cpp.ts index 909304cfc..b62870712 100644 --- a/cortex-js/src/utils/cortex-cpp.ts +++ b/cortex-js/src/utils/cortex-cpp.ts @@ -1,3 +1,8 @@ import * as cortexCPP from 'cortex-cpp'; -cortexCPP.start(); +const port = process.env.CORTEX_CPP_PORT + ? parseInt(process.env.CORTEX_CPP_PORT) + : 3929; +// eslint-disable-next-line @typescript-eslint/ban-ts-comment +// @ts-expect-error +cortexCPP.start(port);