diff --git a/.github/workflows/build.yml b/.github/workflows/cortex-cpp-build.yml similarity index 81% rename from .github/workflows/build.yml rename to .github/workflows/cortex-cpp-build.yml index 716dfd679..69915a034 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/cortex-cpp-build.yml @@ -1,4 +1,4 @@ -name: CI +name: CI Cortex CPP on: push: @@ -25,7 +25,8 @@ jobs: steps: - name: Extract tag name without v prefix id: get_version - run: echo "VERSION=${GITHUB_REF#refs/tags/v}" >> $GITHUB_ENV && echo "::set-output name=version::${GITHUB_REF#refs/tags/v}" + run: | + echo "VERSION=${GITHUB_REF#refs/tags/v}" >> $GITHUB_ENV && echo "::set-output name=version::${GITHUB_REF#refs/tags/v}" env: GITHUB_REF: ${{ github.ref }} - name: Create Draft Release @@ -91,7 +92,7 @@ jobs: - os: "mac" name: "arm64" - runs-on: "mac-silicon" + runs-on: "macos-latest" cmake-flags: "-DMAC_ARM64=ON" run-e2e: true @@ -161,16 +162,54 @@ jobs: with: submodules: recursive + - uses: actions/setup-dotnet@v3 + if: runner.os == 'Windows' + with: + dotnet-version: "8.0.x" + - name: Install choco on Windows if: runner.os == 'Windows' run: | choco install make -y + - name: Get Cer for code signing + if: runner.os == 'macOS' + run: base64 -d <<< "$CODE_SIGN_P12_BASE64" > /tmp/codesign.p12 + shell: bash + env: + CODE_SIGN_P12_BASE64: ${{ secrets.CODE_SIGN_P12_BASE64 }} + + - uses: apple-actions/import-codesign-certs@v2 + if: runner.os == 'macOS' + with: + p12-file-base64: ${{ secrets.CODE_SIGN_P12_BASE64 }} + p12-password: ${{ secrets.CODE_SIGN_P12_PASSWORD }} + - name: Build run: | cd cortex-cpp make build CMAKE_EXTRA_FLAGS="${{ matrix.cmake-flags }}" + - name: Pre-package + run: | + cd cortex-cpp + make pre-package + + - name: Code Signing macOS + if: runner.os == 'macOS' + run: | + cd cortex-cpp + make codesign CODE_SIGN=true DEVELOPER_ID="${{ secrets.DEVELOPER_ID }}" + + - name: Code Signing Windows + if: runner.os == 'Windows' + shell: cmd + run: | + cd cortex-cpp + set PATH=%PATH%;%USERPROFILE%\.dotnet\tools + make codesign CODE_SIGN=true AZURE_KEY_VAULT_URI="${{ secrets.AZURE_KEY_VAULT_URI }}" AZURE_CLIENT_ID="${{ secrets.AZURE_CLIENT_ID }}" AZURE_TENANT_ID="${{ secrets.AZURE_TENANT_ID }}" AZURE_CLIENT_SECRET="${{ secrets.AZURE_CLIENT_SECRET }}" AZURE_CERT_NAME="${{ secrets.AZURE_CERT_NAME }}" + + - name: Package run: | cd cortex-cpp diff --git a/.github/workflows/quality-gate.yml b/.github/workflows/cortex-cpp-quality-gate.yml similarity index 97% rename from .github/workflows/quality-gate.yml rename to .github/workflows/cortex-cpp-quality-gate.yml index 909ab7e77..33c8a4533 100644 --- a/.github/workflows/quality-gate.yml +++ b/.github/workflows/cortex-cpp-quality-gate.yml @@ -1,4 +1,4 @@ -name: CI Quality Gate +name: CI Quality Gate Cortex CPP on: pull_request: @@ -145,6 +145,11 @@ jobs: cd cortex-cpp make build CMAKE_EXTRA_FLAGS="${{ matrix.cmake-flags }}" + - name: Pre-package + run: | + cd cortex-cpp + make pre-package + - name: Package run: | cd cortex-cpp diff --git a/.github/workflows/cortex-js.yml b/.github/workflows/cortex-js.yml new file mode 100644 index 000000000..31175b1be --- /dev/null +++ b/.github/workflows/cortex-js.yml @@ -0,0 +1,44 @@ +name: Publish cortex js Package to npmjs +on: + push: + tags: ["v[0-9]+.[0-9]+.[0-9]+-cortex-js"] + paths: + [ + "cortex-js/**", + ] +jobs: + build-and-publish-plugins: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: "0" + + - name: Install jq + uses: dcarbone/install-jq-action@v2.0.1 + + - name: "Update version by tag" + run: | + cd cortex-js + # Remove the v prefix + tag_version=${GITHUB_REF#refs/tags/v} + # Remove the -cortex-js suffix + new_version=${tag_version%-cortex-js} + + # Replace the old version with the new version in package.json + jq --arg version "$new_version" '.version = $version' ./package.json > /tmp/package.json && mv /tmp/package.json ./package.json + + # Print the new version + echo "Updated package.json version to: $new_version" + + # Setup .npmrc file to publish to npm + - uses: actions/setup-node@v3 + with: + node-version: "20.x" + registry-url: "https://registry.npmjs.org" + - run: yarn install && yarn build + working-directory: ./cortex-js + - run: npm publish --access public + env: + NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} + working-directory: ./cortex-js diff --git a/cortex-cpp/Makefile b/cortex-cpp/Makefile index 9f4c98d1b..98486f023 100644 --- a/cortex-cpp/Makefile +++ b/cortex-cpp/Makefile @@ -6,6 +6,13 @@ CMAKE_EXTRA_FLAGS ?= "" RUN_TESTS ?= false LLM_MODEL_URL ?= "https://delta.jan.ai/tinyllama-1.1b-chat-v0.3.Q2_K.gguf" EMBEDDING_MODEL_URL ?= "https://catalog.jan.ai/dist/models/embeds/nomic-embed-text-v1.5.f16.gguf" +CODE_SIGN ?= false +AZURE_KEY_VAULT_URI ?= xxxx +AZURE_CLIENT_ID ?= xxxx +AZURE_TENANT_ID ?= xxxx +AZURE_CLIENT_SECRET ?= xxxx +AZURE_CERT_NAME ?= xxxx +DEVELOPER_ID ?= xxxx # Default target, does nothing all: @@ -29,24 +36,47 @@ else make -j4; endif -package: +pre-package: ifeq ($(OS),Windows_NT) - @powershell -Command "mkdir -p cortex-cpp\engines\cortex.llamacpp\; cp build\engines\cortex.llamacpp\engine.dll cortex-cpp\engines\cortex.llamacpp\;" - @powershell -Command "cp build\Release\cortex-cpp.exe .\cortex-cpp\;" - @powershell -Command "cp build-deps\_install\bin\zlib.dll .\cortex-cpp\;" - @powershell -Command "cp ..\.github\patches\windows\msvcp140.dll .\cortex-cpp\;" - @powershell -Command "cp ..\.github\patches\windows\vcruntime140_1.dll .\cortex-cpp\;" - @powershell -Command "cp ..\.github\patches\windows\vcruntime140.dll .\cortex-cpp\;" - @powershell -Command "7z a -ttar temp.tar cortex-cpp\\*; 7z a -tgzip cortex-cpp.tar.gz temp.tar;" + @powershell -Command "mkdir -p cortex-cpp\engines\cortex.llamacpp\; cp -r build\engines\cortex.llamacpp\engine.dll cortex-cpp\engines\cortex.llamacpp\;" + @powershell -Command "cp -r build\Release\cortex-cpp.exe .\cortex-cpp\;" + @powershell -Command "cp -r build-deps\_install\bin\zlib.dll .\cortex-cpp\;" + @powershell -Command "cp -r ..\.github\patches\windows\msvcp140.dll .\cortex-cpp\;" + @powershell -Command "cp -r ..\.github\patches\windows\vcruntime140_1.dll .\cortex-cpp\;" + @powershell -Command "cp -r ..\.github\patches\windows\vcruntime140.dll .\cortex-cpp\;" else ifeq ($(shell uname -s),Linux) @mkdir -p cortex-cpp/engines/cortex.llamacpp; \ cp build/engines/cortex.llamacpp/libengine.so cortex-cpp/engines/cortex.llamacpp/; \ - cp build/cortex-cpp cortex-cpp/; \ - tar -czvf cortex-cpp.tar.gz cortex-cpp; + cp build/cortex-cpp cortex-cpp/; else @mkdir -p cortex-cpp/engines/cortex.llamacpp; \ cp build/engines/cortex.llamacpp/libengine.dylib cortex-cpp/engines/cortex.llamacpp/; \ - cp build/cortex-cpp cortex-cpp/; \ + cp build/cortex-cpp cortex-cpp/; +endif + +codesign: +ifeq ($(CODE_SIGN),false) + @echo "Skipping Code Sign" + @exit 0 +endif + +ifeq ($(OS),Windows_NT) + @powershell -Command "dotnet tool install --global AzureSignTool;" + @powershell -Command 'azuresigntool.exe sign -kvu "$(AZURE_KEY_VAULT_URI)" -kvi "$(AZURE_CLIENT_ID)" -kvt "$(AZURE_TENANT_ID)" -kvs "$(AZURE_CLIENT_SECRET)" -kvc "$(AZURE_CERT_NAME)" -tr http://timestamp.globalsign.com/tsa/r6advanced1 -v ".\cortex-cpp\cortex-cpp.exe";' + @powershell -Command 'azuresigntool.exe sign -kvu "$(AZURE_KEY_VAULT_URI)" -kvi "$(AZURE_CLIENT_ID)" -kvt "$(AZURE_TENANT_ID)" -kvs "$(AZURE_CLIENT_SECRET)" -kvc "$(AZURE_CERT_NAME)" -tr http://timestamp.globalsign.com/tsa/r6advanced1 -v ".\cortex-cpp\engines\cortex.llamacpp\engine.dll";' +else ifeq ($(shell uname -s),Linux) + @echo "Skipping Code Sign for linux" + @exit 0 +else + find "cortex-cpp" -type f -exec codesign --force -s "$(DEVELOPER_ID)" --options=runtime {} \; +endif + +package: +ifeq ($(OS),Windows_NT) + @powershell -Command "7z a -ttar temp.tar cortex-cpp\*; 7z a -tgzip cortex-cpp.tar.gz temp.tar;" +else ifeq ($(shell uname -s),Linux) + tar -czvf cortex-cpp.tar.gz cortex-cpp; +else tar -czvf cortex-cpp.tar.gz cortex-cpp; endif @@ -65,4 +95,13 @@ else @cd cortex-cpp; \ chmod +x ../../.github/scripts/e2e-test-llama-linux-and-mac.sh && ../../.github/scripts/e2e-test-llama-linux-and-mac.sh ./cortex-cpp $(LLM_MODEL_URL) $(EMBEDDING_MODEL_URL); \ rm -rf uploads/; +endif + +clean: +ifeq ($(OS),Windows_NT) + @powershell -Command "rm -rf build; rm -rf build-deps; rm -rf cortex-cpp; rm -rf cortex-cpp.tar.gz;" +else ifeq ($(shell uname -s),Linux) + @rm -rf build; rm -rf build-deps; rm -rf cortex-cpp; rm -rf cortex-cpp.tar.gz; +else + @rm -rf build; rm -rf build-deps; rm -rf cortex-cpp; rm -rf cortex-cpp.tar.gz; endif \ No newline at end of file diff --git a/cortex-cpp/engines/cortex.llamacpp/engine.cmake b/cortex-cpp/engines/cortex.llamacpp/engine.cmake index c273d7e38..635fb9945 100644 --- a/cortex-cpp/engines/cortex.llamacpp/engine.cmake +++ b/cortex-cpp/engines/cortex.llamacpp/engine.cmake @@ -1,6 +1,7 @@ # cortex.llamacpp release version -set(VERSION 0.1.2) +set(VERSION 0.1.4) set(ENGINE_VERSION v${VERSION}) +add_compile_definitions(CORTEX_LLAMACPP_VERSION="${VERSION}") # MESSAGE("ENGINE_VERSION=" ${ENGINE_VERSION}) diff --git a/cortex-cpp/main.cc b/cortex-cpp/main.cc index 53c65cd37..12cabeb0c 100644 --- a/cortex-cpp/main.cc +++ b/cortex-cpp/main.cc @@ -51,6 +51,10 @@ int main(int argc, char* argv[]) { #else LOG_INFO << "cortex-cpp version: undefined"; #endif +#ifdef CORTEX_LLAMACPP_VERSION + LOG_INFO << "cortex.llamacpp version: " << CORTEX_LLAMACPP_VERSION; +#endif + LOG_INFO << "Server started, listening at: " << host << ":" << port; LOG_INFO << "Please load your model"; drogon::app().addListener(host, port); diff --git a/cortex-js/.env.development b/cortex-js/.env.development new file mode 100644 index 000000000..e69de29bb diff --git a/cortex-js/.env.example b/cortex-js/.env.example index 51944b2db..d0666607c 100644 --- a/cortex-js/.env.example +++ b/cortex-js/.env.example @@ -1,3 +1,2 @@ EXTENSIONS_PATH= CORTEX_MODELS_DIR= -CORTEX_BINARY_PATH= \ No newline at end of file diff --git a/cortex-js/constant.ts b/cortex-js/constant.ts index 8f1ba5008..b9a983657 100644 --- a/cortex-js/constant.ts +++ b/cortex-js/constant.ts @@ -4,3 +4,6 @@ export const databaseFile = `${databaseName}.db`; export const defaultCortexJsHost = 'localhost'; export const defaultCortexJsPort = 7331; + +export const defaultCortexCppHost = '127.0.0.1'; +export const defaultCortexCppPort = 3928; diff --git a/cortex-js/package.json b/cortex-js/package.json index f5519d13a..a66b5b6da 100644 --- a/cortex-js/package.json +++ b/cortex-js/package.json @@ -1,5 +1,5 @@ { - "name": "cortex-js", + "name": "@janhq/cortex", "version": "0.0.1", "description": "", "author": "", @@ -25,6 +25,7 @@ "typeorm": "typeorm-ts-node-esm" }, "dependencies": { + "@huggingface/gguf": "^0.1.5", "@nestjs/axios": "^3.0.2", "@nestjs/common": "^10.0.0", "@nestjs/config": "^3.2.2", @@ -33,10 +34,12 @@ "@nestjs/mapped-types": "*", "@nestjs/platform-express": "^10.0.0", "@nestjs/swagger": "^7.3.1", + "@terascope/fetch-github-release": "^0.8.8", "axios": "^1.6.8", "class-transformer": "^0.5.1", "class-validator": "^0.14.1", "cli-progress": "^3.12.0", + "decompress": "^4.2.1", "nest-commander": "^3.13.0", "readline": "^1.3.0", "reflect-metadata": "^0.2.0", @@ -52,6 +55,7 @@ "@nestjs/testing": "^10.0.0", "@nestjs/typeorm": "^10.0.2", "@types/cli-progress": "^3.11.5", + "@types/decompress": "^4.2.7", "@types/express": "^4.17.17", "@types/jest": "^29.5.2", "@types/node": "^20.12.9", diff --git a/cortex-js/src/app.module.ts b/cortex-js/src/app.module.ts index 748ca9bb9..3120e90c1 100644 --- a/cortex-js/src/app.module.ts +++ b/cortex-js/src/app.module.ts @@ -6,20 +6,19 @@ import { DevtoolsModule } from '@nestjs/devtools-integration'; import { DatabaseModule } from './infrastructure/database/database.module'; import { ChatModule } from './usecases/chat/chat.module'; import { AssistantsModule } from './usecases/assistants/assistants.module'; -import { InferenceSettingsModule } from './usecases/inference-settings/inference-settings.module'; import { ExtensionModule } from './infrastructure/repositories/extensions/extension.module'; import { CortexModule } from './usecases/cortex/cortex.module'; import { ConfigModule } from '@nestjs/config'; +import { env } from 'node:process'; @Module({ imports: [ DevtoolsModule.register({ - http: process.env.NODE_ENV !== 'production', + http: env.NODE_ENV !== 'production', }), ConfigModule.forRoot({ isGlobal: true, - envFilePath: - process.env.NODE_ENV === 'production' ? '.env' : '.env.development', + envFilePath: env.NODE_ENV !== 'production' ? '.env.development' : '.env', }), DatabaseModule, MessagesModule, @@ -27,7 +26,6 @@ import { ConfigModule } from '@nestjs/config'; ModelsModule, ChatModule, AssistantsModule, - InferenceSettingsModule, CortexModule, ExtensionModule, ], diff --git a/cortex-js/src/command.module.ts b/cortex-js/src/command.module.ts index fe23ca16c..d15758746 100644 --- a/cortex-js/src/command.module.ts +++ b/cortex-js/src/command.module.ts @@ -1,14 +1,26 @@ import { Module } from '@nestjs/common'; -import { BasicCommand } from './infrastructure/commanders/basic-command.commander'; import { ModelsModule } from './usecases/models/models.module'; import { DatabaseModule } from './infrastructure/database/database.module'; import { ConfigModule } from '@nestjs/config'; import { CortexModule } from './usecases/cortex/cortex.module'; import { ServeCommand } from './infrastructure/commanders/serve.command'; -import { PullCommand } from './infrastructure/commanders/pull.command'; -import { InferenceCommand } from './infrastructure/commanders/inference.command'; import { ModelsCommand } from './infrastructure/commanders/models.command'; -import { StartCommand } from './infrastructure/commanders/start.command'; +import { ExtensionModule } from './infrastructure/repositories/extensions/extension.module'; +import { ChatModule } from './usecases/chat/chat.module'; +import { InitCommand } from './infrastructure/commanders/init.command'; +import { HttpModule } from '@nestjs/axios'; +import { InitRunModeQuestions } from './infrastructure/commanders/questions/init.questions'; +import { ModelListCommand } from './infrastructure/commanders/models/model-list.command'; +import { ModelPullCommand } from './infrastructure/commanders/models/model-pull.command'; +import { CortexCommand } from './infrastructure/commanders/cortex-command.commander'; +import { ChatCommand } from './infrastructure/commanders/chat.command'; +import { ModelStartCommand } from './infrastructure/commanders/models/model-start.command'; +import { ModelStopCommand } from './infrastructure/commanders/models/model-stop.command'; +import { ModelGetCommand } from './infrastructure/commanders/models/model-get.command'; +import { ModelRemoveCommand } from './infrastructure/commanders/models/model-remove.command'; +import { RunCommand } from './infrastructure/commanders/shortcuts/run.command'; +import { InitCudaQuestions } from './infrastructure/commanders/questions/cuda.questions'; +import { CliUsecasesModule } from './infrastructure/commanders/usecases/cli.usecases.module'; @Module({ imports: [ @@ -20,14 +32,32 @@ import { StartCommand } from './infrastructure/commanders/start.command'; DatabaseModule, ModelsModule, CortexModule, + ChatModule, + ExtensionModule, + HttpModule, + CliUsecasesModule, ], providers: [ - BasicCommand, + CortexCommand, ModelsCommand, - PullCommand, ServeCommand, - InferenceCommand, - StartCommand, + ChatCommand, + InitCommand, + + // Questions + InitRunModeQuestions, + InitCudaQuestions, + + // Model commands + ModelStartCommand, + ModelStopCommand, + ModelListCommand, + ModelGetCommand, + ModelRemoveCommand, + ModelPullCommand, + + // Shortcuts + RunCommand, ], }) export class CommandModule {} diff --git a/cortex-js/src/domain/abstracts/engine.abstract.ts b/cortex-js/src/domain/abstracts/engine.abstract.ts index 564faa2a1..f21f6664b 100644 --- a/cortex-js/src/domain/abstracts/engine.abstract.ts +++ b/cortex-js/src/domain/abstracts/engine.abstract.ts @@ -1,8 +1,16 @@ +/* eslint-disable no-unused-vars, @typescript-eslint/no-unused-vars */ +import { Model, ModelSettingParams } from '../models/model.interface'; import { Extension } from './extension.abstract'; export abstract class EngineExtension extends Extension { abstract provider: string; - abstract inference(completion: any, req: any, res: any): void; - abstract loadModel(loadModel: any): Promise; - abstract unloadModel(modelId: string): Promise; + + abstract inference(completion: any, req: any, stream: any, res?: any): void; + + async loadModel( + model: Model, + settingParams?: ModelSettingParams, + ): Promise {} + + async unloadModel(modelId: string): Promise {} } diff --git a/cortex-js/src/domain/abstracts/oai.abstract.ts b/cortex-js/src/domain/abstracts/oai.abstract.ts index 96748449d..2923c4277 100644 --- a/cortex-js/src/domain/abstracts/oai.abstract.ts +++ b/cortex-js/src/domain/abstracts/oai.abstract.ts @@ -1,6 +1,12 @@ -/* eslint-disable @typescript-eslint/no-unused-vars */ import { HttpService } from '@nestjs/axios'; import { EngineExtension } from './engine.abstract'; +import { stdout } from 'process'; + +export type ChatStreamEvent = { + type: 'data' | 'error' | 'end'; + data?: any; + error?: any; +}; export abstract class OAIEngineExtension extends EngineExtension { abstract apiUrl: string; @@ -9,44 +15,120 @@ export abstract class OAIEngineExtension extends EngineExtension { super(); } - async inference( + inference( createChatDto: any, headers: Record, - res: any, + writableStream: WritableStream, + res?: any, ) { if (createChatDto.stream === true) { - const response = await this.httpService - .post(this.apiUrl, createChatDto, { - headers: { - 'Content-Type': headers['content-type'] ?? 'application/json', - Authorization: headers['authorization'], - }, - responseType: 'stream', - }) - .toPromise(); - - res.writeHead(200, { - 'Content-Type': 'text/event-stream', - 'Cache-Control': 'no-cache', - Connection: 'keep-alive', - 'Access-Control-Allow-Origin': '*', - }); + if (res) { + res.writeHead(200, { + 'Content-Type': 'text/event-stream', + 'Cache-Control': 'no-cache', + Connection: 'keep-alive', + 'Access-Control-Allow-Origin': '*', + }); + this.httpService + .post(this.apiUrl, createChatDto, { + headers: { + 'Content-Type': headers['content-type'] ?? 'application/json', + Authorization: headers['authorization'], + }, + responseType: 'stream', + }) + .toPromise() + .then((response) => { + response?.data.pipe(res); + }); + } else { + const decoder = new TextDecoder('utf-8'); + const defaultWriter = writableStream.getWriter(); + defaultWriter.ready.then(() => { + this.httpService + .post(this.apiUrl, createChatDto, { + headers: { + 'Content-Type': headers['content-type'] ?? 'application/json', + Authorization: headers['authorization'], + }, + responseType: 'stream', + }) + .subscribe({ + next: (response) => { + response.data.on('data', (chunk: any) => { + let content = ''; + const text = decoder.decode(chunk); + const lines = text.trim().split('\n'); + let cachedLines = ''; + for (const line of lines) { + try { + const toParse = cachedLines + line; + if (!line.includes('data: [DONE]')) { + const data = JSON.parse(toParse.replace('data: ', '')); + content += data.choices[0]?.delta?.content ?? ''; + + if (content.startsWith('assistant: ')) { + content = content.replace('assistant: ', ''); + } + + if (content !== '') { + defaultWriter.write({ + type: 'data', + data: content, + }); + } + } + } catch { + cachedLines = line; + } + } + }); - response?.data.pipe(res); + response.data.on('error', (error: any) => { + defaultWriter.write({ + type: 'error', + error, + }); + }); + + response.data.on('end', () => { + // stdout.write('Stream end'); + defaultWriter.write({ + type: 'end', + }); + }); + }, + + error: (error) => { + stdout.write('Stream error: ' + error); + }, + }); + }); + } } else { - const response = await this.httpService - .post(this.apiUrl, createChatDto, { - headers: { - 'Content-Type': headers['content-type'] ?? 'application/json', - Authorization: headers['authorization'], - }, - }) - .toPromise(); - - res.json(response?.data); + const defaultWriter = writableStream.getWriter(); + defaultWriter.ready.then(() => { + this.httpService + .post(this.apiUrl, createChatDto, { + headers: { + 'Content-Type': headers['content-type'] ?? 'application/json', + Authorization: headers['authorization'], + }, + }) + .toPromise() + .then((response) => { + defaultWriter.write({ + type: 'data', + data: response?.data, + }); + }) + .catch((error: any) => { + defaultWriter.write({ + type: 'error', + error, + }); + }); + }); } } - - async loadModel(_loadModel: any): Promise {} - async unloadModel(_modelId: string): Promise {} } diff --git a/cortex-js/src/domain/models/huggingface.interface.ts b/cortex-js/src/domain/models/huggingface.interface.ts new file mode 100644 index 000000000..cfb1dc5a3 --- /dev/null +++ b/cortex-js/src/domain/models/huggingface.interface.ts @@ -0,0 +1,65 @@ +export interface HuggingFaceRepoData { + id: string; + modelId: string; + modelUrl?: string; + author: string; + sha: string; + downloads: number; + lastModified: string; + private: boolean; + disabled: boolean; + gated: boolean; + pipeline_tag: 'text-generation'; + tags: Array<'transformers' | 'pytorch' | 'safetensors' | string>; + cardData: Record; + siblings: { + rfilename: string; + downloadUrl?: string; + fileSize?: number; + quantization?: Quantization; + }[]; + createdAt: string; +} + +const CardDataKeys = [ + 'base_model', + 'datasets', + 'inference', + 'language', + 'library_name', + 'license', + 'model_creator', + 'model_name', + 'model_type', + 'pipeline_tag', + 'prompt_template', + 'quantized_by', + 'tags', +] as const; +export type CardDataKeysTuple = typeof CardDataKeys; +export type CardDataKeys = CardDataKeysTuple[number]; + +export const AllQuantizations = [ + 'Q3_K_S', + 'Q3_K_M', + 'Q3_K_L', + 'Q4_K_S', + 'Q4_K_M', + 'Q5_K_S', + 'Q5_K_M', + 'Q4_0', + 'Q4_1', + 'Q5_0', + 'Q5_1', + 'IQ2_XXS', + 'IQ2_XS', + 'Q2_K', + 'Q2_K_S', + 'Q6_K', + 'Q8_0', + 'F16', + 'F32', + 'COPY', +]; +export type QuantizationsTuple = typeof AllQuantizations; +export type Quantization = QuantizationsTuple[number]; diff --git a/cortex-js/src/infrastructure/commanders/basic-command.commander.ts b/cortex-js/src/infrastructure/commanders/basic-command.commander.ts deleted file mode 100644 index ea0c71237..000000000 --- a/cortex-js/src/infrastructure/commanders/basic-command.commander.ts +++ /dev/null @@ -1,52 +0,0 @@ -import { RootCommand, CommandRunner, Option } from 'nest-commander'; -import { PullCommand } from './pull.command'; -import { ServeCommand } from './serve.command'; -import { InferenceCommand } from './inference.command'; -import { ModelsCommand } from './models.command'; -import { CortexUsecases } from '@/usecases/cortex/cortex.usecases'; -import { defaultCortexJsHost, defaultCortexJsPort } from 'constant'; - -@RootCommand({ - subCommands: [ModelsCommand, PullCommand, ServeCommand, InferenceCommand], -}) -export class BasicCommand extends CommandRunner { - constructor(private readonly cortexUsecases: CortexUsecases) { - super(); - } - - async run(input: string[], options?: any): Promise { - const command = input[0]; - - switch (command) { - case 'start': - const host = options?.host || defaultCortexJsHost; - const port = options?.port || defaultCortexJsPort; - return this.cortexUsecases - .startCortex(host, port) - .then((e) => console.log(e)); - case 'stop': - return this.cortexUsecases - .stopCortex(defaultCortexJsHost, defaultCortexJsPort) - .then((e) => console.log(e)); - default: - console.error(`Command ${command} is not supported`); - return; - } - } - - @Option({ - flags: '--host ', - description: 'Host to serve the application', - }) - parseHost(value: string) { - return value; - } - - @Option({ - flags: '--port ', - description: 'Port to serve the application', - }) - parsePort(value: string) { - return parseInt(value, 10); - } -} diff --git a/cortex-js/src/infrastructure/commanders/chat.command.ts b/cortex-js/src/infrastructure/commanders/chat.command.ts new file mode 100644 index 000000000..8efacb093 --- /dev/null +++ b/cortex-js/src/infrastructure/commanders/chat.command.ts @@ -0,0 +1,41 @@ +import { ChatUsecases } from '@/usecases/chat/chat.usecases'; +import { CommandRunner, SubCommand, Option } from 'nest-commander'; +import { ChatCliUsecases } from './usecases/chat.cli.usecases'; +import { CortexUsecases } from '@/usecases/cortex/cortex.usecases'; +import { exit } from 'node:process'; + +type ChatOptions = { + model?: string; +}; + +@SubCommand({ name: 'chat', description: 'Start a chat with a model' }) +export class ChatCommand extends CommandRunner { + constructor( + private readonly chatUsecases: ChatUsecases, + private readonly cortexUsecases: CortexUsecases, + ) { + super(); + } + + async run(_input: string[], option: ChatOptions): Promise { + const modelId = option.model; + if (!modelId) { + console.error('Model ID is required'); + exit(1); + } + + const chatCliUsecases = new ChatCliUsecases( + this.chatUsecases, + this.cortexUsecases, + ); + return chatCliUsecases.chat(modelId); + } + + @Option({ + flags: '--model ', + description: 'Model Id to start chat with', + }) + parseModelId(value: string) { + return value; + } +} diff --git a/cortex-js/src/infrastructure/commanders/cortex-command.commander.ts b/cortex-js/src/infrastructure/commanders/cortex-command.commander.ts new file mode 100644 index 000000000..05aa30271 --- /dev/null +++ b/cortex-js/src/infrastructure/commanders/cortex-command.commander.ts @@ -0,0 +1,20 @@ +import { RootCommand, CommandRunner } from 'nest-commander'; +import { ServeCommand } from './serve.command'; +import { ChatCommand } from './chat.command'; +import { ModelsCommand } from './models.command'; +import { InitCommand } from './init.command'; +import { RunCommand } from './shortcuts/run.command'; + +@RootCommand({ + subCommands: [ + ModelsCommand, + ServeCommand, + ChatCommand, + InitCommand, + RunCommand, + ], + description: 'Cortex CLI', +}) +export class CortexCommand extends CommandRunner { + async run(): Promise {} +} diff --git a/cortex-js/src/infrastructure/commanders/inference.command.ts b/cortex-js/src/infrastructure/commanders/inference.command.ts deleted file mode 100644 index b5eba3988..000000000 --- a/cortex-js/src/infrastructure/commanders/inference.command.ts +++ /dev/null @@ -1,25 +0,0 @@ -import { CommandRunner, SubCommand } from 'nest-commander'; - -@SubCommand({ name: 'chat' }) -export class InferenceCommand extends CommandRunner { - constructor() { - super(); - } - - async run(_input: string[]): Promise { - const lineByLine = require('readline'); - const lbl = lineByLine.createInterface({ - input: process.stdin, - output: process.stdout, - }); - lbl.on('line', (userInput: string) => { - if (userInput.trim() === 'exit()') { - lbl.close(); - return; - } - - console.log('Result:', userInput); - console.log('Enter another equation or type "exit()" to quit.'); - }); - } -} diff --git a/cortex-js/src/infrastructure/commanders/init.command.ts b/cortex-js/src/infrastructure/commanders/init.command.ts new file mode 100644 index 000000000..adf8eba4b --- /dev/null +++ b/cortex-js/src/infrastructure/commanders/init.command.ts @@ -0,0 +1,37 @@ +import { CommandRunner, InquirerService, SubCommand } from 'nest-commander'; +import { InitCliUsecases } from './usecases/init.cli.usecases'; +import { InitOptions } from './types/init-options.interface'; + +@SubCommand({ + name: 'init', + aliases: ['setup'], + description: "Init settings and download cortex's dependencies", +}) +export class InitCommand extends CommandRunner { + constructor( + private readonly inquirerService: InquirerService, + private readonly initUsecases: InitCliUsecases, + ) { + super(); + } + + async run(input: string[], options?: InitOptions): Promise { + options = await this.inquirerService.ask( + 'init-run-mode-questions', + options, + ); + + if (options.runMode === 'GPU' && !(await this.initUsecases.cudaVersion())) { + options = await this.inquirerService.ask('init-cuda-questions', options); + } + + const version = input[0] ?? 'latest'; + + const engineFileName = this.initUsecases.parseEngineFileName(options); + await this.initUsecases.installEngine(engineFileName, version); + + if (options.installCuda === 'Yes') { + await this.initUsecases.installCudaToolkitDependency(options); + } + } +} diff --git a/cortex-js/src/infrastructure/commanders/models.command.ts b/cortex-js/src/infrastructure/commanders/models.command.ts index e20d27970..631c55774 100644 --- a/cortex-js/src/infrastructure/commanders/models.command.ts +++ b/cortex-js/src/infrastructure/commanders/models.command.ts @@ -1,52 +1,23 @@ -import { ModelsUsecases } from '@/usecases/models/models.usecases'; import { CommandRunner, SubCommand } from 'nest-commander'; -import { PullCommand } from './pull.command'; -import { StartCommand } from './start.command'; +import { ModelStartCommand } from './models/model-start.command'; +import { ModelGetCommand } from './models/model-get.command'; +import { ModelListCommand } from './models/model-list.command'; +import { ModelStopCommand } from './models/model-stop.command'; +import { ModelPullCommand } from './models/model-pull.command'; +import { ModelRemoveCommand } from './models/model-remove.command'; -@SubCommand({ name: 'models', subCommands: [PullCommand, StartCommand] }) +@SubCommand({ + name: 'models', + subCommands: [ + ModelPullCommand, + ModelStartCommand, + ModelStopCommand, + ModelListCommand, + ModelGetCommand, + ModelRemoveCommand, + ], + description: 'Subcommands for managing models', +}) export class ModelsCommand extends CommandRunner { - constructor(private readonly modelsUsecases: ModelsUsecases) { - super(); - } - - async run(input: string[]): Promise { - const command = input[0]; - const modelId = input[1]; - - if (command !== 'list') { - if (!modelId) { - console.log('Model ID is required'); - return; - } - } - - switch (command) { - case 'list': - this.modelsUsecases.findAll().then(console.log); - return; - case 'get': - this.modelsUsecases.findOne(modelId).then(console.log); - return; - case 'remove': - this.modelsUsecases.remove(modelId).then(console.log); - return; - - case 'stop': - return this.modelsUsecases - .stopModel(modelId) - .then(console.log) - .catch(console.error); - - case 'stats': - case 'fetch': - case 'build': { - console.log('Command is not supported yet'); - return; - } - - default: - console.error(`Command ${command} is not supported`); - return; - } - } + async run(): Promise {} } diff --git a/cortex-js/src/infrastructure/commanders/models/model-get.command.ts b/cortex-js/src/infrastructure/commanders/models/model-get.command.ts new file mode 100644 index 000000000..15136adc6 --- /dev/null +++ b/cortex-js/src/infrastructure/commanders/models/model-get.command.ts @@ -0,0 +1,20 @@ +import { CommandRunner, SubCommand } from 'nest-commander'; +import { ModelsCliUsecases } from '../usecases/models.cli.usecases'; +import { exit } from 'node:process'; + +@SubCommand({ name: 'get', description: 'Get a model by ID.' }) +export class ModelGetCommand extends CommandRunner { + constructor(private readonly modelsCliUsecases: ModelsCliUsecases) { + super(); + } + + async run(input: string[]): Promise { + if (input.length === 0) { + console.error('Model ID is required'); + exit(1); + } + + const models = await this.modelsCliUsecases.getModel(input[0]); + console.log(models); + } +} diff --git a/cortex-js/src/infrastructure/commanders/models/model-list.command.ts b/cortex-js/src/infrastructure/commanders/models/model-list.command.ts new file mode 100644 index 000000000..6e491fc8d --- /dev/null +++ b/cortex-js/src/infrastructure/commanders/models/model-list.command.ts @@ -0,0 +1,14 @@ +import { CommandRunner, SubCommand } from 'nest-commander'; +import { ModelsCliUsecases } from '../usecases/models.cli.usecases'; + +@SubCommand({ name: 'list', description: 'List all models locally.' }) +export class ModelListCommand extends CommandRunner { + constructor(private readonly modelsCliUsecases: ModelsCliUsecases) { + super(); + } + + async run(): Promise { + const models = await this.modelsCliUsecases.listAllModels(); + console.log(models); + } +} diff --git a/cortex-js/src/infrastructure/commanders/models/model-pull.command.ts b/cortex-js/src/infrastructure/commanders/models/model-pull.command.ts new file mode 100644 index 000000000..c1a1af7ac --- /dev/null +++ b/cortex-js/src/infrastructure/commanders/models/model-pull.command.ts @@ -0,0 +1,25 @@ +import { CommandRunner, SubCommand } from 'nest-commander'; +import { exit } from 'node:process'; +import { ModelsCliUsecases } from '../usecases/models.cli.usecases'; + +@SubCommand({ + name: 'pull', + aliases: ['download'], + description: 'Download a model. Working with HuggingFace model id.', +}) +export class ModelPullCommand extends CommandRunner { + constructor(private readonly modelsCliUsecases: ModelsCliUsecases) { + super(); + } + + async run(input: string[]) { + if (input.length < 1) { + console.error('Model ID is required'); + exit(1); + } + + await this.modelsCliUsecases.pullModel(input[0]); + console.log('\nDownload complete!'); + exit(0); + } +} diff --git a/cortex-js/src/infrastructure/commanders/models/model-remove.command.ts b/cortex-js/src/infrastructure/commanders/models/model-remove.command.ts new file mode 100644 index 000000000..531f0f893 --- /dev/null +++ b/cortex-js/src/infrastructure/commanders/models/model-remove.command.ts @@ -0,0 +1,20 @@ +import { CommandRunner, SubCommand } from 'nest-commander'; +import { ModelsCliUsecases } from '../usecases/models.cli.usecases'; +import { exit } from 'node:process'; + +@SubCommand({ name: 'remove', description: 'Remove a model by ID locally.' }) +export class ModelRemoveCommand extends CommandRunner { + constructor(private readonly modelsCliUsecases: ModelsCliUsecases) { + super(); + } + + async run(input: string[]): Promise { + if (input.length === 0) { + console.error('Model ID is required'); + exit(1); + } + + const result = await this.modelsCliUsecases.removeModel(input[0]); + console.log(result); + } +} diff --git a/cortex-js/src/infrastructure/commanders/models/model-start.command.ts b/cortex-js/src/infrastructure/commanders/models/model-start.command.ts new file mode 100644 index 000000000..b3108ff3e --- /dev/null +++ b/cortex-js/src/infrastructure/commanders/models/model-start.command.ts @@ -0,0 +1,24 @@ +import { CommandRunner, SubCommand } from 'nest-commander'; +import { exit } from 'node:process'; +import { ModelsCliUsecases } from '../usecases/models.cli.usecases'; +import { CortexUsecases } from '@/usecases/cortex/cortex.usecases'; + +@SubCommand({ name: 'start', description: 'Start a model by ID.' }) +export class ModelStartCommand extends CommandRunner { + constructor( + private readonly cortexUsecases: CortexUsecases, + private readonly modelsCliUsecases: ModelsCliUsecases, + ) { + super(); + } + + async run(input: string[]): Promise { + if (input.length === 0) { + console.error('Model ID is required'); + exit(1); + } + + await this.cortexUsecases.startCortex(); + await this.modelsCliUsecases.startModel(input[0]); + } +} diff --git a/cortex-js/src/infrastructure/commanders/models/model-stop.command.ts b/cortex-js/src/infrastructure/commanders/models/model-stop.command.ts new file mode 100644 index 000000000..b9a4b112b --- /dev/null +++ b/cortex-js/src/infrastructure/commanders/models/model-stop.command.ts @@ -0,0 +1,24 @@ +import { CommandRunner, SubCommand } from 'nest-commander'; +import { exit } from 'node:process'; +import { ModelsCliUsecases } from '../usecases/models.cli.usecases'; +import { CortexUsecases } from '@/usecases/cortex/cortex.usecases'; + +@SubCommand({ name: 'stop', description: 'Stop a model by ID.' }) +export class ModelStopCommand extends CommandRunner { + constructor( + private readonly cortexUsecases: CortexUsecases, + private readonly modelsCliUsecases: ModelsCliUsecases, + ) { + super(); + } + + async run(input: string[]): Promise { + if (input.length === 0) { + console.error('Model ID is required'); + exit(1); + } + + await this.modelsCliUsecases.stopModel(input[0]); + await this.cortexUsecases.stopCortex(); + } +} diff --git a/cortex-js/src/infrastructure/commanders/pull.command.ts b/cortex-js/src/infrastructure/commanders/pull.command.ts deleted file mode 100644 index 825b859b2..000000000 --- a/cortex-js/src/infrastructure/commanders/pull.command.ts +++ /dev/null @@ -1,143 +0,0 @@ -import { ModelsUsecases } from '@/usecases/models/models.usecases'; -import { CommandRunner, SubCommand } from 'nest-commander'; -import { CreateModelDto } from '../dtos/models/create-model.dto'; -import { ModelFormat } from '@/domain/models/model.interface'; -import { Presets, SingleBar } from 'cli-progress'; - -const AllQuantizations = [ - 'Q3_K_S', - 'Q3_K_M', - 'Q3_K_L', - 'Q4_K_S', - 'Q4_K_M', - 'Q5_K_S', - 'Q5_K_M', - 'Q4_0', - 'Q4_1', - 'Q5_0', - 'Q5_1', - 'IQ2_XXS', - 'IQ2_XS', - 'Q2_K', - 'Q2_K_S', - 'Q6_K', - 'Q8_0', - 'F16', - 'F32', - 'COPY', -]; - -@SubCommand({ name: 'pull', aliases: ['download'] }) -export class PullCommand extends CommandRunner { - constructor(private readonly modelsUsecases: ModelsUsecases) { - super(); - } - - async run(input: string[]): Promise { - if (input.length < 1) { - return Promise.reject('Model ID is required'); - } - - const modelId = input[0]; - if (modelId.includes('/')) { - await this.pullHuggingFaceModel(modelId); - } - - const bar = new SingleBar({}, Presets.shades_classic); - bar.start(100, 0); - await this.modelsUsecases.downloadModel({ modelId }, (progress) => { - bar.update(progress); - }); - console.log('\nDownload complete!'); - process.exit(0); - } - - async pullHuggingFaceModel(modelId: string) { - const data = await this.fetchHuggingFaceRepoData(modelId); - - // TODO: add select options - const sibling = data.siblings.filter( - (e: any) => e.quantization == 'Q5_K_M', - )[0]; - - if (!sibling) throw 'No expected quantization found'; - - const model: CreateModelDto = { - sources: [ - { - url: sibling.downloadUrl, - }, - ], - id: modelId, - name: modelId, - version: '', - format: ModelFormat.GGUF, - description: '', - settings: {}, - parameters: {}, - metadata: { - author: data.author, - size: sibling.fileSize, - tags: [], - }, - engine: 'cortex', - }; - if (!(await this.modelsUsecases.findOne(modelId))) - await this.modelsUsecases.create(model); - } - - private async fetchHuggingFaceRepoData(repoId: string) { - const sanitizedUrl = this.toHuggingFaceUrl(repoId); - - const res = await fetch(sanitizedUrl); - const data = await res.json(); - if (data['error'] != null) { - throw new Error(data['error']); - } - - if (data.tags.indexOf('gguf') === -1) { - throw `${repoId} is not supported. Only GGUF models are supported.`; - } - - // fetching file sizes - const url = new URL(sanitizedUrl); - const paths = url.pathname.split('/').filter((e) => e.trim().length > 0); - - for (let i = 0; i < data.siblings.length; i++) { - const downloadUrl = `https://huggingface.co/${paths[2]}/${paths[3]}/resolve/main/${data.siblings[i].rfilename}`; - data.siblings[i].downloadUrl = downloadUrl; - } - - AllQuantizations.forEach((quantization) => { - data.siblings.forEach((sibling: any) => { - if (!sibling.quantization && sibling.rfilename.includes(quantization)) { - sibling.quantization = quantization; - } - }); - }); - - data.modelUrl = `https://huggingface.co/${paths[2]}/${paths[3]}`; - return data; - } - - private toHuggingFaceUrl(repoId: string): string { - try { - const url = new URL(`https://huggingface.co/${repoId}`); - if (url.host !== 'huggingface.co') { - throw `Invalid Hugging Face repo URL: ${repoId}`; - } - - const paths = url.pathname.split('/').filter((e) => e.trim().length > 0); - if (paths.length < 2) { - throw `Invalid Hugging Face repo URL: ${repoId}`; - } - - return `${url.origin}/api/models/${paths[0]}/${paths[1]}`; - } catch (err) { - if (repoId.startsWith('https')) { - throw new Error(`Cannot parse url: ${repoId}`); - } - throw err; - } - } -} diff --git a/cortex-js/src/infrastructure/commanders/questions/cuda.questions.ts b/cortex-js/src/infrastructure/commanders/questions/cuda.questions.ts new file mode 100644 index 000000000..2309c3d00 --- /dev/null +++ b/cortex-js/src/infrastructure/commanders/questions/cuda.questions.ts @@ -0,0 +1,17 @@ +import { Question, QuestionSet } from 'nest-commander'; +import { platform } from 'node:process'; + +@QuestionSet({ name: 'init-cuda-questions' }) +export class InitCudaQuestions { + @Question({ + type: 'list', + message: 'Do you want to install additional dependencies for CUDA Toolkit?', + name: 'installCuda', + default: 'Yes', + choices: ['Yes', 'No, I want to use my own CUDA Toolkit'], + when: () => platform !== 'darwin', + }) + parseRunMode(val: string) { + return val; + } +} diff --git a/cortex-js/src/infrastructure/commanders/questions/init.questions.ts b/cortex-js/src/infrastructure/commanders/questions/init.questions.ts new file mode 100644 index 000000000..ee4675320 --- /dev/null +++ b/cortex-js/src/infrastructure/commanders/questions/init.questions.ts @@ -0,0 +1,40 @@ +import { Question, QuestionSet } from 'nest-commander'; +import { platform } from 'node:process'; + +@QuestionSet({ name: 'init-run-mode-questions' }) +export class InitRunModeQuestions { + @Question({ + type: 'list', + message: 'Select run mode', + name: 'runMode', + default: 'CPU', + choices: ['CPU', 'GPU'], + when: () => platform !== 'darwin', + }) + parseRunMode(val: string) { + return val; + } + + @Question({ + type: 'list', + message: 'Select GPU type', + name: 'gpuType', + default: 'Nvidia', + choices: ['Nvidia', 'Others (Vulkan)'], + when: (answers: any) => answers.runMode === 'GPU', + }) + parseGPUType(val: string) { + return val; + } + + @Question({ + type: 'list', + message: 'Select CPU instructions set', + name: 'instructions', + choices: ['AVX2', 'AVX', 'AVX512'], + when: () => platform !== 'darwin', + }) + parseContent(val: string) { + return val; + } +} diff --git a/cortex-js/src/infrastructure/commanders/serve.command.ts b/cortex-js/src/infrastructure/commanders/serve.command.ts index d0f63a33e..6af783c76 100644 --- a/cortex-js/src/infrastructure/commanders/serve.command.ts +++ b/cortex-js/src/infrastructure/commanders/serve.command.ts @@ -8,7 +8,10 @@ type ServeOptions = { port?: number; }; -@SubCommand({ name: 'serve' }) +@SubCommand({ + name: 'serve', + description: 'Providing API endpoint for Cortex backend', +}) export class ServeCommand extends CommandRunner { constructor() { super(); diff --git a/cortex-js/src/infrastructure/commanders/shortcuts/run.command.ts b/cortex-js/src/infrastructure/commanders/shortcuts/run.command.ts new file mode 100644 index 000000000..d12786519 --- /dev/null +++ b/cortex-js/src/infrastructure/commanders/shortcuts/run.command.ts @@ -0,0 +1,48 @@ +import { CortexUsecases } from '@/usecases/cortex/cortex.usecases'; +import { ModelsUsecases } from '@/usecases/models/models.usecases'; +import { CommandRunner, SubCommand, Option } from 'nest-commander'; +import { exit } from 'node:process'; +import { ChatUsecases } from '@/usecases/chat/chat.usecases'; +import { ChatCliUsecases } from '../usecases/chat.cli.usecases'; + +type RunOptions = { + model?: string; +}; + +@SubCommand({ + name: 'run', + description: 'EXPERIMENTAL: Shortcut to start a model and chat', +}) +export class RunCommand extends CommandRunner { + constructor( + private readonly modelsUsecases: ModelsUsecases, + private readonly cortexUsecases: CortexUsecases, + private readonly chatUsecases: ChatUsecases, + ) { + super(); + } + + async run(_input: string[], option: RunOptions): Promise { + const modelId = option.model; + if (!modelId) { + console.error('Model ID is required'); + exit(1); + } + + await this.cortexUsecases.startCortex(); + await this.modelsUsecases.startModel(modelId); + const chatCliUsecases = new ChatCliUsecases( + this.chatUsecases, + this.cortexUsecases, + ); + await chatCliUsecases.chat(modelId); + } + + @Option({ + flags: '--model ', + description: 'Model Id to start chat with', + }) + parseModelId(value: string) { + return value; + } +} diff --git a/cortex-js/src/infrastructure/commanders/start.command.ts b/cortex-js/src/infrastructure/commanders/start.command.ts deleted file mode 100644 index a6b3a3429..000000000 --- a/cortex-js/src/infrastructure/commanders/start.command.ts +++ /dev/null @@ -1,48 +0,0 @@ -import { CortexUsecases } from '@/usecases/cortex/cortex.usecases'; -import { ModelsUsecases } from '@/usecases/models/models.usecases'; -import { CommandRunner, SubCommand } from 'nest-commander'; -import { LoadModelDto } from '../dtos/models/load-model.dto'; - -@SubCommand({ name: 'start', aliases: ['run'] }) -export class StartCommand extends CommandRunner { - constructor( - private readonly modelsUsecases: ModelsUsecases, - private readonly cortexUsecases: CortexUsecases, - ) { - super(); - } - - async run(input: string[]): Promise { - const modelId = input[0]; - - if (!modelId) { - console.log('Model ID is required'); - return; - } - return this.startCortex() - .then(() => this.startModel(modelId)) - .then(console.log) - .catch(console.error); - } - - private async startCortex() { - const host = '127.0.0.1'; - const port = '3928'; - return this.cortexUsecases.startCortex(host, port); - } - private async startModel(modelId: string) { - const settings = { - cpu_threads: 10, - ctx_len: 2048, - embedding: false, - prompt_template: - '{system_message}\n### Instruction: {prompt}\n### Response:', - system_prompt: '', - user_prompt: '\n### Instruction: ', - ai_prompt: '\n### Response:', - ngl: 100, - }; - const loadModelDto: LoadModelDto = { modelId, settings }; - return this.modelsUsecases.startModel(loadModelDto); - } -} diff --git a/cortex-js/src/infrastructure/commanders/types/init-options.interface.ts b/cortex-js/src/infrastructure/commanders/types/init-options.interface.ts new file mode 100644 index 000000000..24d460bbb --- /dev/null +++ b/cortex-js/src/infrastructure/commanders/types/init-options.interface.ts @@ -0,0 +1,7 @@ +export interface InitOptions { + runMode?: 'CPU' | 'GPU'; + gpuType?: 'Nvidia' | 'Others (Vulkan)'; + instructions?: 'AVX' | 'AVX2' | 'AVX512' | undefined; + cudaVersion?: '11' | '12'; + installCuda?: 'Yes' | string; +} diff --git a/cortex-js/src/infrastructure/commanders/usecases/chat.cli.usecases.ts b/cortex-js/src/infrastructure/commanders/usecases/chat.cli.usecases.ts new file mode 100644 index 000000000..9f7409cca --- /dev/null +++ b/cortex-js/src/infrastructure/commanders/usecases/chat.cli.usecases.ts @@ -0,0 +1,84 @@ +import { ChatUsecases } from '@/usecases/chat/chat.usecases'; +import { ChatCompletionRole } from '@/domain/models/message.interface'; +import { exit, stdin, stdout } from 'node:process'; +import * as readline from 'node:readline/promises'; +import { ChatStreamEvent } from '@/domain/abstracts/oai.abstract'; +import { ChatCompletionMessage } from '@/infrastructure/dtos/chat/chat-completion-message.dto'; +import { CreateChatCompletionDto } from '@/infrastructure/dtos/chat/create-chat-completion.dto'; +import { CortexUsecases } from '@/usecases/cortex/cortex.usecases'; + +// TODO: make this class injectable +export class ChatCliUsecases { + private exitClause = 'exit()'; + private userIndicator = '>> '; + private exitMessage = 'Bye!'; + + constructor( + private readonly chatUsecases: ChatUsecases, + private readonly cortexUsecases: CortexUsecases, + ) {} + + async chat(modelId: string): Promise { + console.log(`Inorder to exit, type '${this.exitClause}'.`); + const messages: ChatCompletionMessage[] = []; + + const rl = readline.createInterface({ + input: stdin, + output: stdout, + prompt: this.userIndicator, + }); + rl.prompt(); + + rl.on('close', () => { + this.cortexUsecases.stopCortex().then(() => { + console.log(this.exitMessage); + exit(0); + }); + }); + + rl.on('line', (userInput: string) => { + if (userInput.trim() === this.exitClause) { + rl.close(); + return; + } + + messages.push({ + content: userInput, + role: ChatCompletionRole.User, + }); + + const chatDto: CreateChatCompletionDto = { + messages, + model: modelId, + stream: true, + max_tokens: 2048, + stop: [], + frequency_penalty: 0.7, + presence_penalty: 0.7, + temperature: 0.7, + top_p: 0.7, + }; + + let llmFullResponse = ''; + const writableStream = new WritableStream({ + write(chunk) { + if (chunk.type === 'data') { + stdout.write(chunk.data ?? ''); + llmFullResponse += chunk.data ?? ''; + } else if (chunk.type === 'error') { + console.log('Error!!'); + } else { + messages.push({ + content: llmFullResponse, + role: ChatCompletionRole.Assistant, + }); + llmFullResponse = ''; + console.log('\n'); + } + }, + }); + + this.chatUsecases.createChatCompletions(chatDto, {}, writableStream); + }); + } +} diff --git a/cortex-js/src/infrastructure/commanders/usecases/cli.usecases.module.ts b/cortex-js/src/infrastructure/commanders/usecases/cli.usecases.module.ts new file mode 100644 index 000000000..a82b60dd0 --- /dev/null +++ b/cortex-js/src/infrastructure/commanders/usecases/cli.usecases.module.ts @@ -0,0 +1,13 @@ +import { Module } from '@nestjs/common'; +import { InitCliUsecases } from './init.cli.usecases'; +import { HttpModule } from '@nestjs/axios'; +import { ModelsCliUsecases } from './models.cli.usecases'; +import { ModelsModule } from '@/usecases/models/models.module'; + +@Module({ + imports: [HttpModule, ModelsModule], + controllers: [], + providers: [InitCliUsecases, ModelsCliUsecases], + exports: [InitCliUsecases, ModelsCliUsecases], +}) +export class CliUsecasesModule {} diff --git a/cortex-js/src/infrastructure/commanders/usecases/init.cli.usecases.ts b/cortex-js/src/infrastructure/commanders/usecases/init.cli.usecases.ts new file mode 100644 index 000000000..47456e4f9 --- /dev/null +++ b/cortex-js/src/infrastructure/commanders/usecases/init.cli.usecases.ts @@ -0,0 +1,233 @@ +import { createWriteStream, existsSync, rmSync } from 'fs'; +import { resolve, delimiter, join } from 'path'; +import { HttpService } from '@nestjs/axios'; +import { Presets, SingleBar } from 'cli-progress'; +import decompress from 'decompress'; +import { exit } from 'node:process'; +import { InitOptions } from '../types/init-options.interface'; +import { Injectable } from '@nestjs/common'; + +@Injectable() +export class InitCliUsecases { + CORTEX_RELEASES_URL = 'https://api.github.com/repos/janhq/cortex/releases'; + CUDA_DOWNLOAD_URL = + 'https://catalog.jan.ai/dist/cuda-dependencies///cuda.tar.gz'; + + constructor(private readonly httpService: HttpService) {} + + installEngine = async ( + engineFileName: string, + version: string = 'latest', + ): Promise => { + const res = await this.httpService + .get( + this.CORTEX_RELEASES_URL + `${version === 'latest' ? '/latest' : ''}`, + { + headers: { + 'X-GitHub-Api-Version': '2022-11-28', + Accept: 'application/vnd.github+json', + }, + }, + ) + .toPromise(); + + if (!res?.data) { + console.log('Failed to fetch releases'); + exit(1); + } + + let release = res?.data; + if (Array.isArray(res?.data)) { + release = Array(res?.data)[0].find( + (e) => e.name === version.replace('v', ''), + ); + } + const toDownloadAsset = release.assets.find((s: any) => + s.name.includes(engineFileName), + ); + + if (!toDownloadAsset) { + console.log(`Could not find engine file ${engineFileName}`); + exit(1); + } + + console.log(`Downloading engine file ${engineFileName}`); + const engineDir = resolve(this.rootDir(), 'cortex-cpp'); + if (existsSync(engineDir)) rmSync(engineDir, { recursive: true }); + + const download = await this.httpService + .get(toDownloadAsset.browser_download_url, { + responseType: 'stream', + }) + .toPromise(); + if (!download) { + console.log('Failed to download model'); + process.exit(1); + } + + const destination = resolve(this.rootDir(), toDownloadAsset.name); + + await new Promise((resolve, reject) => { + const writer = createWriteStream(destination); + let receivedBytes = 0; + const totalBytes = download.headers['content-length']; + + writer.on('finish', () => { + bar.stop(); + resolve(true); + }); + + writer.on('error', (error) => { + bar.stop(); + reject(error); + }); + + const bar = new SingleBar({}, Presets.shades_classic); + bar.start(100, 0); + + download.data.on('data', (chunk: any) => { + receivedBytes += chunk.length; + bar.update(Math.floor((receivedBytes / totalBytes) * 100)); + }); + + download.data.pipe(writer); + }); + + try { + await decompress( + resolve(this.rootDir(), destination), + resolve(this.rootDir()), + ); + } catch (e) { + console.error('Error decompressing file', e); + exit(1); + } + }; + + parseEngineFileName = (options: InitOptions) => { + const platform = + process.platform === 'win32' + ? 'windows' + : process.platform === 'darwin' + ? 'mac' + : process.platform; + const arch = process.arch === 'arm64' ? process.arch : 'amd64'; + const cudaVersion = + options.runMode === 'GPU' + ? options.gpuType === 'Nvidia' + ? '-cuda-' + (options.cudaVersion === '11' ? '11-7' : '12-0') + : '-vulkan' + : ''; + const instructions = options.instructions ? `-${options.instructions}` : ''; + const engineName = `${platform}-${arch}${instructions.toLowerCase()}${cudaVersion}`; + return `${engineName}.tar.gz`; + }; + + rootDir = () => resolve(__dirname, `../../../../`); + + cudaVersion = async () => { + let filesCuda12: string[]; + let filesCuda11: string[]; + let paths: string[]; + + if (process.platform === 'win32') { + filesCuda12 = ['cublas64_12.dll', 'cudart64_12.dll', 'cublasLt64_12.dll']; + filesCuda11 = [ + 'cublas64_11.dll', + 'cudart64_110.dll', + 'cublasLt64_11.dll', + ]; + paths = process.env.PATH ? process.env.PATH.split(delimiter) : []; + } else { + filesCuda12 = ['libcudart.so.12', 'libcublas.so.12', 'libcublasLt.so.12']; + filesCuda11 = [ + 'libcudart.so.11.0', + 'libcublas.so.11', + 'libcublasLt.so.11', + ]; + paths = process.env.LD_LIBRARY_PATH + ? process.env.LD_LIBRARY_PATH.split(delimiter) + : []; + paths.push('/usr/lib/x86_64-linux-gnu/'); + } + + if ( + filesCuda12.every( + (file) => + existsSync(file) || this.checkFileExistenceInPaths(file, paths), + ) + ) + return '12'; + + if ( + filesCuda11.every( + (file) => + existsSync(file) || this.checkFileExistenceInPaths(file, paths), + ) + ) + return '11'; + + return undefined; // No CUDA Toolkit found + }; + + checkFileExistenceInPaths = (file: string, paths: string[]): boolean => { + return paths.some((p) => existsSync(join(p, file))); + }; + + installCudaToolkitDependency = async (options: InitOptions) => { + const platform = process.platform === 'win32' ? 'windows' : 'linux'; + + const url = this.CUDA_DOWNLOAD_URL.replace( + '', + options.cudaVersion === '11' ? '11.7' : '12.0', + ).replace('', platform); + const destination = resolve(this.rootDir(), 'cuda-toolkit.tar.gz'); + + const download = await this.httpService + .get(url, { + responseType: 'stream', + }) + .toPromise(); + + if (!download) { + console.log('Failed to download dependency'); + process.exit(1); + } + + await new Promise((resolve, reject) => { + const writer = createWriteStream(destination); + let receivedBytes = 0; + const totalBytes = download.headers['content-length']; + + writer.on('finish', () => { + bar.stop(); + resolve(true); + }); + + writer.on('error', (error) => { + bar.stop(); + reject(error); + }); + + const bar = new SingleBar({}, Presets.shades_classic); + bar.start(100, 0); + + download.data.on('data', (chunk: any) => { + receivedBytes += chunk.length; + bar.update(Math.floor((receivedBytes / totalBytes) * 100)); + }); + + download.data.pipe(writer); + }); + + try { + await decompress( + resolve(this.rootDir(), destination), + resolve(this.rootDir(), 'cortex-cpp'), + ); + } catch (e) { + console.log(e); + exit(1); + } + }; +} diff --git a/cortex-js/src/infrastructure/commanders/usecases/models.cli.usecases.ts b/cortex-js/src/infrastructure/commanders/usecases/models.cli.usecases.ts new file mode 100644 index 000000000..509abd565 --- /dev/null +++ b/cortex-js/src/infrastructure/commanders/usecases/models.cli.usecases.ts @@ -0,0 +1,203 @@ +import { exit } from 'node:process'; +import { ModelsUsecases } from '@/usecases/models/models.usecases'; +import { Model, ModelFormat } from '@/domain/models/model.interface'; +import { CreateModelDto } from '@/infrastructure/dtos/models/create-model.dto'; +import { HuggingFaceRepoData } from '@/domain/models/huggingface.interface'; +import { gguf } from '@huggingface/gguf'; +import { InquirerService } from 'nest-commander'; +import { Inject, Injectable } from '@nestjs/common'; +import { Presets, SingleBar } from 'cli-progress'; + +const AllQuantizations = [ + 'Q3_K_S', + 'Q3_K_M', + 'Q3_K_L', + 'Q4_K_S', + 'Q4_K_M', + 'Q5_K_S', + 'Q5_K_M', + 'Q4_0', + 'Q4_1', + 'Q5_0', + 'Q5_1', + 'IQ2_XXS', + 'IQ2_XS', + 'Q2_K', + 'Q2_K_S', + 'Q6_K', + 'Q8_0', + 'F16', + 'F32', + 'COPY', +]; + +@Injectable() +export class ModelsCliUsecases { + constructor( + private readonly modelsUsecases: ModelsUsecases, + @Inject(InquirerService) + private readonly inquirerService: InquirerService, + ) {} + + async startModel(modelId: string): Promise { + await this.getModelOrStop(modelId); + await this.modelsUsecases.startModel(modelId); + } + + async stopModel(modelId: string): Promise { + await this.getModelOrStop(modelId); + await this.modelsUsecases.stopModel(modelId); + } + + private async getModelOrStop(modelId: string): Promise { + const model = await this.modelsUsecases.findOne(modelId); + if (!model) { + console.debug('Model not found'); + exit(1); + } + return model; + } + + async listAllModels(): Promise { + return this.modelsUsecases.findAll(); + } + + async getModel(modelId: string): Promise { + const model = await this.getModelOrStop(modelId); + return model; + } + + async removeModel(modelId: string) { + await this.getModelOrStop(modelId); + return this.modelsUsecases.remove(modelId); + } + + async pullModel(modelId: string) { + if (modelId.includes('/')) { + await this.pullHuggingFaceModel(modelId); + } + + const bar = new SingleBar({}, Presets.shades_classic); + bar.start(100, 0); + const callback = (progress: number) => { + bar.update(progress); + }; + await this.modelsUsecases.downloadModel(modelId, callback); + } + + private async pullHuggingFaceModel(modelId: string) { + const data = await this.fetchHuggingFaceRepoData(modelId); + const { quantization } = await this.inquirerService.inquirer.prompt({ + type: 'list', + name: 'quantization', + message: 'Select quantization', + choices: data.siblings + .map((e) => e.quantization) + .filter((e) => e != null), + }); + + const sibling = data.siblings + .filter((e) => !!e.quantization) + .find((e: any) => e.quantization === quantization); + + if (!sibling) throw 'No expected quantization found'; + + let stopWord = ''; + try { + const { metadata } = await gguf(sibling.downloadUrl!); + // @ts-expect-error "tokenizer.ggml.eos_token_id" + const index = metadata['tokenizer.ggml.eos_token_id']; + // @ts-expect-error "tokenizer.ggml.tokens" + stopWord = metadata['tokenizer.ggml.tokens'][index] ?? ''; + } catch (err) { + console.log('Failed to get stop word: ', err); + } + + const stopWords: string[] = []; + if (stopWord.length > 0) { + stopWords.push(stopWord); + } + + const model: CreateModelDto = { + sources: [ + { + url: sibling?.downloadUrl ?? '', + }, + ], + id: modelId, + name: modelId, + version: '', + format: ModelFormat.GGUF, + description: '', + settings: {}, + parameters: { + stop: stopWords, + }, + metadata: { + author: data.author, + size: sibling.fileSize ?? 0, + tags: [], + }, + engine: 'cortex', + }; + if (!(await this.modelsUsecases.findOne(modelId))) + await this.modelsUsecases.create(model); + } + + private async fetchHuggingFaceRepoData(repoId: string) { + const sanitizedUrl = this.toHuggingFaceUrl(repoId); + + const res = await fetch(sanitizedUrl); + const response = await res.json(); + if (response['error'] != null) { + throw new Error(response['error']); + } + + const data = response as HuggingFaceRepoData; + + if (data.tags.indexOf('gguf') === -1) { + throw `${repoId} is not supported. Only GGUF models are supported.`; + } + + // fetching file sizes + const url = new URL(sanitizedUrl); + const paths = url.pathname.split('/').filter((e) => e.trim().length > 0); + + for (let i = 0; i < data.siblings.length; i++) { + const downloadUrl = `https://huggingface.co/${paths[2]}/${paths[3]}/resolve/main/${data.siblings[i].rfilename}`; + data.siblings[i].downloadUrl = downloadUrl; + } + + AllQuantizations.forEach((quantization) => { + data.siblings.forEach((sibling: any) => { + if (!sibling.quantization && sibling.rfilename.includes(quantization)) { + sibling.quantization = quantization; + } + }); + }); + + data.modelUrl = `https://huggingface.co/${paths[2]}/${paths[3]}`; + return data; + } + + private toHuggingFaceUrl(repoId: string): string { + try { + const url = new URL(`https://huggingface.co/${repoId}`); + if (url.host !== 'huggingface.co') { + throw `Invalid Hugging Face repo URL: ${repoId}`; + } + + const paths = url.pathname.split('/').filter((e) => e.trim().length > 0); + if (paths.length < 2) { + throw `Invalid Hugging Face repo URL: ${repoId}`; + } + + return `${url.origin}/api/models/${paths[0]}/${paths[1]}`; + } catch (err) { + if (repoId.startsWith('https')) { + throw new Error(`Cannot parse url: ${repoId}`); + } + throw err; + } + } +} diff --git a/cortex-js/src/infrastructure/controllers/chat.controller.ts b/cortex-js/src/infrastructure/controllers/chat.controller.ts index dc9f7abda..e9c50591e 100644 --- a/cortex-js/src/infrastructure/controllers/chat.controller.ts +++ b/cortex-js/src/infrastructure/controllers/chat.controller.ts @@ -3,6 +3,7 @@ import { CreateChatCompletionDto } from '@/infrastructure/dtos/chat/create-chat- import { ChatUsecases } from '@/usecases/chat/chat.usecases'; import { Response } from 'express'; import { ApiTags } from '@nestjs/swagger'; +import { ChatStreamEvent } from '@/domain/abstracts/oai.abstract'; @ApiTags('Inference') @Controller('chat') @@ -15,6 +16,23 @@ export class ChatController { @Body() createChatDto: CreateChatCompletionDto, @Res() res: Response, ) { - this.chatService.createChatCompletions(createChatDto, headers, res); + const writableStream = new WritableStream({ + write(chunk) { + if (chunk.type === 'data') { + res.json(chunk.data ?? {}); + } else if (chunk.type === 'error') { + res.json(chunk.error ?? {}); + } else { + console.log('\n'); + } + }, + }); + + this.chatService.createChatCompletions( + createChatDto, + headers, + writableStream, + res, + ); } } diff --git a/cortex-js/src/infrastructure/controllers/inference-settings.controller.spec.ts b/cortex-js/src/infrastructure/controllers/inference-settings.controller.spec.ts deleted file mode 100644 index 05097ddae..000000000 --- a/cortex-js/src/infrastructure/controllers/inference-settings.controller.spec.ts +++ /dev/null @@ -1,22 +0,0 @@ -import { Test, TestingModule } from '@nestjs/testing'; -import { InferenceSettingsController } from './inference-settings.controller'; -import { InferenceSettingsUsecases } from '@/usecases/inference-settings/inference-settings.usecases'; - -describe('InferenceSettingsController', () => { - let controller: InferenceSettingsController; - - beforeEach(async () => { - const module: TestingModule = await Test.createTestingModule({ - controllers: [InferenceSettingsController], - providers: [InferenceSettingsUsecases], - }).compile(); - - controller = module.get( - InferenceSettingsController, - ); - }); - - it('should be defined', () => { - expect(controller).toBeDefined(); - }); -}); diff --git a/cortex-js/src/infrastructure/controllers/inference-settings.controller.ts b/cortex-js/src/infrastructure/controllers/inference-settings.controller.ts deleted file mode 100644 index 805a4c1b3..000000000 --- a/cortex-js/src/infrastructure/controllers/inference-settings.controller.ts +++ /dev/null @@ -1,49 +0,0 @@ -import { - Controller, - Get, - Post, - Body, - Patch, - Param, - Delete, -} from '@nestjs/common'; -import { InferenceSettingsUsecases } from '@/usecases/inference-settings/inference-settings.usecases'; -import { CreateInferenceSettingDto } from '@/infrastructure/dtos/inference-settings/create-inference-setting.dto'; -import { UpdateInferenceSettingDto } from '@/infrastructure/dtos/inference-settings/update-inference-setting.dto'; -import { ApiTags } from '@nestjs/swagger'; - -@ApiTags('Inference Settings') -@Controller('inference-settings') -export class InferenceSettingsController { - constructor( - private readonly inferenceSettingsService: InferenceSettingsUsecases, - ) {} - - @Post() - create(@Body() createInferenceSettingDto: CreateInferenceSettingDto) { - return this.inferenceSettingsService.create(createInferenceSettingDto); - } - - @Get() - findAll() { - return this.inferenceSettingsService.findAll(); - } - - @Get(':id') - findOne(@Param('id') id: string) { - return this.inferenceSettingsService.findOne(id); - } - - @Patch(':id') - update( - @Param('id') id: string, - @Body() updateInferenceSettingDto: UpdateInferenceSettingDto, - ) { - return this.inferenceSettingsService.update(id, updateInferenceSettingDto); - } - - @Delete(':id') - remove(@Param('id') id: string) { - return this.inferenceSettingsService.remove(id); - } -} diff --git a/cortex-js/src/infrastructure/controllers/models.controller.ts b/cortex-js/src/infrastructure/controllers/models.controller.ts index 907349fc3..2983d1c31 100644 --- a/cortex-js/src/infrastructure/controllers/models.controller.ts +++ b/cortex-js/src/infrastructure/controllers/models.controller.ts @@ -12,53 +12,66 @@ import { ModelsUsecases } from '@/usecases/models/models.usecases'; import { CreateModelDto } from '@/infrastructure/dtos/models/create-model.dto'; import { UpdateModelDto } from '@/infrastructure/dtos/models/update-model.dto'; import { ApiResponse, ApiTags } from '@nestjs/swagger'; -import { LoadModelSuccessDto } from '@/infrastructure/dtos/models/load-model-success.dto'; -import { LoadModelDto } from '@/infrastructure/dtos/models/load-model.dto'; -import { DownloadModelDto } from '@/infrastructure/dtos/models/download-model.dto'; +import { StartModelSuccessDto } from '@/infrastructure/dtos/models/start-model-success.dto'; +import { ModelSettingParamsDto } from '../dtos/models/model-setting-params.dto'; @ApiTags('Models') @Controller('models') export class ModelsController { - constructor(private readonly modelsService: ModelsUsecases) {} + constructor(private readonly modelsUsecases: ModelsUsecases) {} @Post() create(@Body() createModelDto: CreateModelDto) { - return this.modelsService.create(createModelDto); + return this.modelsUsecases.create(createModelDto); } @HttpCode(200) @ApiResponse({ status: 200, - description: 'The model has been loaded successfully.', - type: LoadModelSuccessDto, + description: 'The model has been started successfully.', + type: StartModelSuccessDto, }) - @Post('load') - load(@Body() loadModelDto: LoadModelDto) { - return this.modelsService.startModel(loadModelDto); + @Post(':modelId/start') + startModel( + @Param('modelId') modelId: string, + @Body() settings: ModelSettingParamsDto, + ) { + return this.modelsUsecases.startModel(modelId, settings); } - @Post('download') - downloadModel(@Body() downloadModelDto: DownloadModelDto) { - return this.modelsService.downloadModel(downloadModelDto); + @HttpCode(200) + @ApiResponse({ + status: 200, + description: 'The model has been stopped successfully.', + type: StartModelSuccessDto, + }) + @Post(':modelId/stop') + stopModel(@Param('modelId') modelId: string) { + return this.modelsUsecases.stopModel(modelId); + } + + @Get('download/:modelId') + downloadModel(@Param('modelId') modelId: string) { + return this.modelsUsecases.downloadModel(modelId); } @Get() findAll() { - return this.modelsService.findAll(); + return this.modelsUsecases.findAll(); } @Get(':id') findOne(@Param('id') id: string) { - return this.modelsService.findOne(id); + return this.modelsUsecases.findOne(id); } @Patch(':id') update(@Param('id') id: string, @Body() updateModelDto: UpdateModelDto) { - return this.modelsService.update(id, updateModelDto); + return this.modelsUsecases.update(id, updateModelDto); } @Delete(':id') remove(@Param('id') id: string) { - return this.modelsService.remove(id); + return this.modelsUsecases.remove(id); } } diff --git a/cortex-js/src/infrastructure/database/database.module.ts b/cortex-js/src/infrastructure/database/database.module.ts index ace7da1c9..016066219 100644 --- a/cortex-js/src/infrastructure/database/database.module.ts +++ b/cortex-js/src/infrastructure/database/database.module.ts @@ -4,7 +4,6 @@ import { sqliteDatabaseProviders } from './sqlite-database.providers'; import { modelProviders } from './providers/model.providers'; import { assistantProviders } from './providers/assistant.providers'; import { messageProviders } from './providers/message.providers'; -import { inferenceSettingProviders } from './providers/inference-setting.providers'; @Module({ providers: [ @@ -13,14 +12,12 @@ import { inferenceSettingProviders } from './providers/inference-setting.provide ...modelProviders, ...assistantProviders, ...messageProviders, - ...inferenceSettingProviders, ], exports: [ ...threadProviders, ...modelProviders, ...assistantProviders, ...messageProviders, - ...inferenceSettingProviders, ], }) export class DatabaseModule {} diff --git a/cortex-js/src/infrastructure/database/providers/inference-setting.providers.ts b/cortex-js/src/infrastructure/database/providers/inference-setting.providers.ts deleted file mode 100644 index 621d25fd8..000000000 --- a/cortex-js/src/infrastructure/database/providers/inference-setting.providers.ts +++ /dev/null @@ -1,11 +0,0 @@ -import { InferenceSettingEntity } from '@/infrastructure/entities/inference-setting.entity'; -import { DataSource } from 'typeorm'; - -export const inferenceSettingProviders = [ - { - provide: 'INFERENCE_SETTING_REPOSITORY', - useFactory: (dataSource: DataSource) => - dataSource.getRepository(InferenceSettingEntity), - inject: ['DATA_SOURCE'], - }, -]; diff --git a/cortex-js/src/infrastructure/database/sqlite-database.providers.ts b/cortex-js/src/infrastructure/database/sqlite-database.providers.ts index 84700ff49..9c14ee965 100644 --- a/cortex-js/src/infrastructure/database/sqlite-database.providers.ts +++ b/cortex-js/src/infrastructure/database/sqlite-database.providers.ts @@ -1,13 +1,15 @@ import { databaseFile } from 'constant'; +import { resolve } from 'path'; import { DataSource } from 'typeorm'; export const sqliteDatabaseProviders = [ { provide: 'DATA_SOURCE', useFactory: async () => { + const sqlitePath = resolve(__dirname, `../../../${databaseFile}`); const dataSource = new DataSource({ type: 'sqlite', - database: databaseFile, + database: sqlitePath, synchronize: process.env.NODE_ENV !== 'production', entities: [__dirname + '/../**/*.entity{.ts,.js}'], }); diff --git a/cortex-js/src/infrastructure/dtos/cortex/start-cortex.dto.ts b/cortex-js/src/infrastructure/dtos/cortex/start-cortex.dto.ts index 12b9e6d2f..4745a3b3d 100644 --- a/cortex-js/src/infrastructure/dtos/cortex/start-cortex.dto.ts +++ b/cortex-js/src/infrastructure/dtos/cortex/start-cortex.dto.ts @@ -1,10 +1,24 @@ -import { IsIP, IsString } from 'class-validator'; +import { ApiProperty } from '@nestjs/swagger'; +import { IsIP, IsNumber, IsString, Max, Min } from 'class-validator'; +import { defaultCortexCppHost, defaultCortexCppPort } from 'constant'; export class StartCortexDto { + @ApiProperty({ + name: 'host', + description: 'Cortexcpp host', + default: defaultCortexCppHost, + }) @IsString() @IsIP() host: string; - @IsString() - port: string; + @ApiProperty({ + name: 'port', + description: 'Cortexcpp port', + default: defaultCortexCppPort, + }) + @IsNumber() + @Min(0) + @Max(65535) + port: number; } diff --git a/cortex-js/src/infrastructure/dtos/inference-settings/controller-props.dto.ts b/cortex-js/src/infrastructure/dtos/inference-settings/controller-props.dto.ts deleted file mode 100644 index 15c4601f3..000000000 --- a/cortex-js/src/infrastructure/dtos/inference-settings/controller-props.dto.ts +++ /dev/null @@ -1,14 +0,0 @@ -import { IsOptional, IsString } from 'class-validator'; -import { ControllerProps } from '@/domain/models/inference-setting.interface'; - -export class ControllerPropsDto implements ControllerProps { - @IsString() - placeholder: string; - - @IsString() - value: string; - - @IsOptional() - @IsString() - type?: string; -} diff --git a/cortex-js/src/infrastructure/dtos/inference-settings/create-inference-setting.dto.ts b/cortex-js/src/infrastructure/dtos/inference-settings/create-inference-setting.dto.ts deleted file mode 100644 index ba34ec81c..000000000 --- a/cortex-js/src/infrastructure/dtos/inference-settings/create-inference-setting.dto.ts +++ /dev/null @@ -1,14 +0,0 @@ -import { Type } from 'class-transformer'; -import { IsArray, IsString, ValidateNested } from 'class-validator'; -import { InferenceSetting } from '@/domain/models/inference-setting.interface'; -import { InferenceSettingDocumentDto } from './inference-setting-document.dto'; - -export class CreateInferenceSettingDto implements Partial { - @IsString() - inferenceId: string; - - @IsArray() - @ValidateNested({ each: true }) - @Type(() => InferenceSettingDocumentDto) - settings: InferenceSettingDocumentDto[]; -} diff --git a/cortex-js/src/infrastructure/dtos/inference-settings/inference-setting-document.dto.ts b/cortex-js/src/infrastructure/dtos/inference-settings/inference-setting-document.dto.ts deleted file mode 100644 index bca718c6a..000000000 --- a/cortex-js/src/infrastructure/dtos/inference-settings/inference-setting-document.dto.ts +++ /dev/null @@ -1,23 +0,0 @@ -import { IsString, ValidateNested } from 'class-validator'; -import { InferenceSettingDocument } from '@/domain/models/inference-setting.interface'; -import { ControllerPropsDto } from './controller-props.dto'; - -export class InferenceSettingDocumentDto implements InferenceSettingDocument { - @IsString() - key: string; - - @IsString() - extensionName: string; - - @IsString() - title: string; - - @IsString() - description: string; - - @IsString() - controllerType: string; - - @ValidateNested() - controllerProps: ControllerPropsDto; -} diff --git a/cortex-js/src/infrastructure/dtos/inference-settings/update-inference-setting.dto.ts b/cortex-js/src/infrastructure/dtos/inference-settings/update-inference-setting.dto.ts deleted file mode 100644 index 026dffded..000000000 --- a/cortex-js/src/infrastructure/dtos/inference-settings/update-inference-setting.dto.ts +++ /dev/null @@ -1,4 +0,0 @@ -import { PartialType } from '@nestjs/swagger'; -import { CreateInferenceSettingDto } from './create-inference-setting.dto'; - -export class UpdateInferenceSettingDto extends PartialType(CreateInferenceSettingDto) {} diff --git a/cortex-js/src/infrastructure/dtos/messages/thread-content.dto.ts b/cortex-js/src/infrastructure/dtos/messages/thread-content.dto.ts index 8e062893d..a8d6b3337 100644 --- a/cortex-js/src/infrastructure/dtos/messages/thread-content.dto.ts +++ b/cortex-js/src/infrastructure/dtos/messages/thread-content.dto.ts @@ -1,8 +1,5 @@ import { IsEnum, ValidateNested } from 'class-validator'; -import { - ContentType, - ThreadContent, -} from '@/domain/models/message.interface'; +import { ContentType, ThreadContent } from '@/domain/models/message.interface'; import { ContentValueDto } from './content-value.dto'; export class ThreadContentDto implements ThreadContent { diff --git a/cortex-js/src/infrastructure/dtos/models/download-model.dto.ts b/cortex-js/src/infrastructure/dtos/models/download-model.dto.ts deleted file mode 100644 index 46ea3c12d..000000000 --- a/cortex-js/src/infrastructure/dtos/models/download-model.dto.ts +++ /dev/null @@ -1,6 +0,0 @@ -import { IsString } from 'class-validator'; - -export class DownloadModelDto { - @IsString() - modelId: string; -} diff --git a/cortex-js/src/infrastructure/dtos/models/load-model.dto.ts b/cortex-js/src/infrastructure/dtos/models/load-model.dto.ts deleted file mode 100644 index 5aaa07194..000000000 --- a/cortex-js/src/infrastructure/dtos/models/load-model.dto.ts +++ /dev/null @@ -1,11 +0,0 @@ -import { IsOptional, IsString, ValidateNested } from 'class-validator'; -import { ModelSettingParamsDto } from './model-setting-params.dto'; - -export class LoadModelDto { - @IsString() - modelId: string; - - @IsOptional() - @ValidateNested() - settings?: ModelSettingParamsDto; -} diff --git a/cortex-js/src/infrastructure/dtos/models/load-model-success.dto.ts b/cortex-js/src/infrastructure/dtos/models/start-model-success.dto.ts similarity index 76% rename from cortex-js/src/infrastructure/dtos/models/load-model-success.dto.ts rename to cortex-js/src/infrastructure/dtos/models/start-model-success.dto.ts index e604e80b9..01c0bba05 100644 --- a/cortex-js/src/infrastructure/dtos/models/load-model-success.dto.ts +++ b/cortex-js/src/infrastructure/dtos/models/start-model-success.dto.ts @@ -1,6 +1,6 @@ import { IsString } from 'class-validator'; -export class LoadModelSuccessDto { +export class StartModelSuccessDto { @IsString() message: string; diff --git a/cortex-js/src/infrastructure/entities/inference-setting.entity.ts b/cortex-js/src/infrastructure/entities/inference-setting.entity.ts deleted file mode 100644 index 9b6d0cb41..000000000 --- a/cortex-js/src/infrastructure/entities/inference-setting.entity.ts +++ /dev/null @@ -1,14 +0,0 @@ -import { - InferenceSetting, - InferenceSettingDocument, -} from '@/domain/models/inference-setting.interface'; -import { Column, Entity, PrimaryColumn } from 'typeorm'; - -@Entity('inference_setting') -export class InferenceSettingEntity implements InferenceSetting { - @PrimaryColumn() - inferenceId: string; - - @Column({ type: 'simple-json' }) - settings: InferenceSettingDocument[]; -} diff --git a/cortex-js/src/infrastructure/providers/cortex/cortex.provider.ts b/cortex-js/src/infrastructure/providers/cortex/cortex.provider.ts index f1256f072..941f1b860 100644 --- a/cortex-js/src/infrastructure/providers/cortex/cortex.provider.ts +++ b/cortex-js/src/infrastructure/providers/cortex/cortex.provider.ts @@ -1,53 +1,55 @@ import { Injectable } from '@nestjs/common'; import { OAIEngineExtension } from '@/domain/abstracts/oai.abstract'; import { PromptTemplate } from '@/domain/models/prompt-template.interface'; -import { basename, join, resolve } from 'path'; -import { Model } from '@/domain/models/model.interface'; -import { ConfigService } from '@nestjs/config'; +import { join, resolve } from 'path'; +import { Model, ModelSettingParams } from '@/domain/models/model.interface'; import { HttpService } from '@nestjs/axios'; +import { defaultCortexCppHost, defaultCortexCppPort } from 'constant'; +import { readdirSync } from 'node:fs'; /** * A class that implements the InferenceExtension interface from the @janhq/core package. * The class provides methods for initializing and stopping a model, and for making inference requests. * It also subscribes to events emitted by the @janhq/core package and handles new message requests. */ -const LOCAL_HOST = '127.0.0.1'; -const NITRO_DEFAULT_PORT = 3928; -const NITRO_HTTP_SERVER_URL = `http://${LOCAL_HOST}:${NITRO_DEFAULT_PORT}`; -const LOAD_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/server/loadmodel`; -const UNLOAD_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/server/unloadmodel`; - @Injectable() export default class CortexProvider extends OAIEngineExtension { provider: string = 'cortex'; - apiUrl = 'http://127.0.0.1:3928/inferences/server/chat_completion'; + apiUrl = `http://${defaultCortexCppHost}:${defaultCortexCppPort}/inferences/server/chat_completion`; + + private loadModelUrl = `http://${defaultCortexCppHost}:${defaultCortexCppPort}/inferences/server/loadmodel`; + private unloadModelUrl = `http://${defaultCortexCppHost}:${defaultCortexCppPort}/inferences/server/unloadmodel`; - constructor( - private readonly configService: ConfigService, - protected readonly httpService: HttpService, - ) { + constructor(protected readonly httpService: HttpService) { super(httpService); } - override async loadModel(model: Model): Promise { - const modelsContainerDir = - this.configService.get('CORTEX_MODELS_DIR') ?? - resolve('./models'); + modelDir = () => resolve(__dirname, `../../../../models`); + + override async loadModel( + model: Model, + settings?: ModelSettingParams, + ): Promise { + const modelsContainerDir = this.modelDir(); const modelFolderFullPath = join(modelsContainerDir, model.id); - //TODO: recheck this - const modelBinaryLocalPath = join( - modelFolderFullPath, - basename(model.sources[0].url), - ); + const ggufFiles = readdirSync(modelFolderFullPath).filter((file) => { + return file.endsWith('.gguf'); + }); + + if (ggufFiles.length === 0) { + throw new Error('Model binary not found'); + } - // TODO: NamH check if the binary is there + const modelBinaryLocalPath = join(modelFolderFullPath, ggufFiles[0]); - const cpuThreadCount = 1; // TODO: NamH Math.max(1, nitroResourceProbe.numCpuPhysicalCore); + const cpuThreadCount = 1; // TODO: Math.max(1, nitroResourceProbe.numCpuPhysicalCore); const modelSettings = { // This is critical and requires real CPU physical core count (or performance core) + model: model.id, cpu_threads: cpuThreadCount, ...model.settings, + ...settings, llama_model_path: modelBinaryLocalPath, ...(model.settings.mmproj && { mmproj: join(modelFolderFullPath, model.settings.mmproj), @@ -66,12 +68,12 @@ export default class CortexProvider extends OAIEngineExtension { modelSettings.ai_prompt = prompt.ai_prompt; } - await this.httpService.post(LOAD_MODEL_URL, modelSettings).toPromise(); + await this.httpService.post(this.loadModelUrl, modelSettings).toPromise(); } override async unloadModel(modelId: string): Promise { await this.httpService - .post(UNLOAD_MODEL_URL, { model: modelId }) + .post(this.unloadModelUrl, { model: modelId }) .toPromise(); } diff --git a/cortex-js/src/usecases/chat/chat.module.ts b/cortex-js/src/usecases/chat/chat.module.ts index 1f7c70090..e69b10b73 100644 --- a/cortex-js/src/usecases/chat/chat.module.ts +++ b/cortex-js/src/usecases/chat/chat.module.ts @@ -8,5 +8,6 @@ import { ExtensionModule } from '@/infrastructure/repositories/extensions/extens imports: [DatabaseModule, ExtensionModule], controllers: [ChatController], providers: [ChatUsecases], + exports: [ChatUsecases], }) export class ChatModule {} diff --git a/cortex-js/src/usecases/chat/chat.usecases.ts b/cortex-js/src/usecases/chat/chat.usecases.ts index f4c338b0a..6386e57d8 100644 --- a/cortex-js/src/usecases/chat/chat.usecases.ts +++ b/cortex-js/src/usecases/chat/chat.usecases.ts @@ -1,10 +1,10 @@ import { Inject, Injectable } from '@nestjs/common'; import { CreateChatCompletionDto } from '@/infrastructure/dtos/chat/create-chat-completion.dto'; -import { Response } from 'express'; import { ExtensionRepository } from '@/domain/repositories/extension.interface'; import { Repository } from 'typeorm'; import { ModelEntity } from '@/infrastructure/entities/model.entity'; import { EngineExtension } from '@/domain/abstracts/engine.abstract'; +import { ChatStreamEvent } from '@/domain/abstracts/oai.abstract'; @Injectable() export class ChatUsecases { @@ -17,7 +17,8 @@ export class ChatUsecases { async createChatCompletions( createChatDto: CreateChatCompletionDto, headers: Record, - res: Response, + stream: WritableStream, + res?: any, ) { const extensions = (await this.extensionRepository.findAll()) ?? []; const model = await this.modelRepository.findOne({ @@ -26,6 +27,6 @@ export class ChatUsecases { const engine = extensions.find((e: any) => e.provider === model?.engine) as | EngineExtension | undefined; - await engine?.inference(createChatDto, headers, res); + engine?.inference(createChatDto, headers, stream, res); } } diff --git a/cortex-js/src/usecases/cortex/cortex.usecases.ts b/cortex-js/src/usecases/cortex/cortex.usecases.ts index 846af2ada..1decf6b97 100644 --- a/cortex-js/src/usecases/cortex/cortex.usecases.ts +++ b/cortex-js/src/usecases/cortex/cortex.usecases.ts @@ -1,23 +1,20 @@ -import { Injectable, InternalServerErrorException } from '@nestjs/common'; -import { ConfigService } from '@nestjs/config'; +import { Injectable } from '@nestjs/common'; import { ChildProcess, spawn } from 'child_process'; import { join } from 'path'; -import { existsSync } from 'fs'; import { CortexOperationSuccessfullyDto } from '@/infrastructure/dtos/cortex/cortex-operation-successfully.dto'; import { HttpService } from '@nestjs/axios'; +import { defaultCortexCppHost, defaultCortexCppPort } from 'constant'; +import { existsSync } from 'node:fs'; @Injectable() export class CortexUsecases { private cortexProcess: ChildProcess | undefined; - constructor( - private readonly configService: ConfigService, - private readonly httpService: HttpService, - ) {} + constructor(private readonly httpService: HttpService) {} async startCortex( - host: string, - port: string, + host: string = defaultCortexCppHost, + port: number = defaultCortexCppPort, ): Promise { if (this.cortexProcess) { return { @@ -26,23 +23,25 @@ export class CortexUsecases { }; } - const binaryPath = this.configService.get('CORTEX_BINARY_PATH'); - if (!binaryPath || !existsSync(binaryPath)) { - throw new InternalServerErrorException('Cortex binary not found'); + const args: string[] = ['1', host, `${port}`]; + const cortexCppPath = join( + __dirname, + '../../../cortex-cpp/cortex-cpp' + + `${process.platform === 'win32' ? '.exe' : ''}`, + ); + + if (!existsSync(cortexCppPath)) { + throw new Error('Cortex binary not found'); } - const args: string[] = ['1', host, port]; // go up one level to get the binary folder, have to also work on windows - const binaryFolder = join(binaryPath, '..'); - - this.cortexProcess = spawn(binaryPath, args, { + this.cortexProcess = spawn(cortexCppPath, args, { detached: false, - cwd: binaryFolder, + cwd: join(__dirname, '../../../cortex-cpp'), stdio: 'inherit', env: { ...process.env, - // TODO: NamH need to get below information - // CUDA_VISIBLE_DEVICES: executableOptions.cudaVisibleDevices, + CUDA_VISIBLE_DEVICES: '0', // // Vulkan - Support 1 device at a time for now // ...(executableOptions.vkVisibleDevices?.length > 0 && { // GGML_VULKAN_DEVICE: executableOptions.vkVisibleDevices[0], @@ -79,7 +78,7 @@ export class CortexUsecases { .delete(`http://${host}:${port}/processmanager/destroy`) .toPromise(); } catch (err) { - console.error(err); + console.error(err.response.data); } finally { this.cortexProcess?.kill(); return { diff --git a/cortex-js/src/usecases/inference-settings/inference-settings.module.ts b/cortex-js/src/usecases/inference-settings/inference-settings.module.ts deleted file mode 100644 index d7ca6d05e..000000000 --- a/cortex-js/src/usecases/inference-settings/inference-settings.module.ts +++ /dev/null @@ -1,12 +0,0 @@ -import { Module } from '@nestjs/common'; -import { InferenceSettingsUsecases } from './inference-settings.usecases'; -import { InferenceSettingsController } from '@/infrastructure/controllers/inference-settings.controller'; -import { DatabaseModule } from '@/infrastructure/database/database.module'; - -@Module({ - imports: [DatabaseModule], - controllers: [InferenceSettingsController], - providers: [InferenceSettingsUsecases], - exports: [InferenceSettingsUsecases], -}) -export class InferenceSettingsModule {} diff --git a/cortex-js/src/usecases/inference-settings/inference-settings.usecases.spec.ts b/cortex-js/src/usecases/inference-settings/inference-settings.usecases.spec.ts deleted file mode 100644 index a47dd23b2..000000000 --- a/cortex-js/src/usecases/inference-settings/inference-settings.usecases.spec.ts +++ /dev/null @@ -1,18 +0,0 @@ -import { Test, TestingModule } from '@nestjs/testing'; -import { InferenceSettingsUsecases } from './inference-settings.usecases'; - -describe('InferenceSettingsService', () => { - let service: InferenceSettingsUsecases; - - beforeEach(async () => { - const module: TestingModule = await Test.createTestingModule({ - providers: [InferenceSettingsUsecases], - }).compile(); - - service = module.get(InferenceSettingsUsecases); - }); - - it('should be defined', () => { - expect(service).toBeDefined(); - }); -}); diff --git a/cortex-js/src/usecases/inference-settings/inference-settings.usecases.ts b/cortex-js/src/usecases/inference-settings/inference-settings.usecases.ts deleted file mode 100644 index 8c512aa72..000000000 --- a/cortex-js/src/usecases/inference-settings/inference-settings.usecases.ts +++ /dev/null @@ -1,38 +0,0 @@ -import { Inject, Injectable } from '@nestjs/common'; -import { CreateInferenceSettingDto } from '@/infrastructure/dtos/inference-settings/create-inference-setting.dto'; -import { UpdateInferenceSettingDto } from '@/infrastructure/dtos/inference-settings/update-inference-setting.dto'; -import { Repository } from 'typeorm'; -import { InferenceSettingEntity } from '@/infrastructure/entities/inference-setting.entity'; - -@Injectable() -export class InferenceSettingsUsecases { - constructor( - @Inject('INFERENCE_SETTING_REPOSITORY') - private inferenceSettingRepository: Repository, - ) {} - - create(createInferenceSettingDto: CreateInferenceSettingDto) { - return this.inferenceSettingRepository.insert(createInferenceSettingDto); - } - - findAll() { - return this.inferenceSettingRepository.find(); - } - - findOne(id: string) { - return this.inferenceSettingRepository.findOne({ - where: { inferenceId: id }, - }); - } - - update(id: string, updateInferenceSettingDto: UpdateInferenceSettingDto) { - return this.inferenceSettingRepository.update( - id, - updateInferenceSettingDto, - ); - } - - remove(id: string) { - return this.inferenceSettingRepository.delete(id); - } -} diff --git a/cortex-js/src/usecases/models/models.usecases.ts b/cortex-js/src/usecases/models/models.usecases.ts index c76beeb3a..d7843d7d8 100644 --- a/cortex-js/src/usecases/models/models.usecases.ts +++ b/cortex-js/src/usecases/models/models.usecases.ts @@ -5,7 +5,7 @@ import { BadRequestException, Inject, Injectable } from '@nestjs/common'; import { Repository } from 'typeorm'; import { Model, ModelFormat } from '@/domain/models/model.interface'; import { ModelNotFoundException } from '@/infrastructure/exception/model-not-found.exception'; -import { join, basename } from 'path'; +import { join, basename, resolve } from 'path'; import { promises, createWriteStream, @@ -13,13 +13,11 @@ import { mkdirSync, rmdirSync, } from 'fs'; -import { LoadModelSuccessDto } from '@/infrastructure/dtos/models/load-model-success.dto'; -import { LoadModelDto } from '@/infrastructure/dtos/models/load-model.dto'; -import { DownloadModelDto } from '@/infrastructure/dtos/models/download-model.dto'; -import { ConfigService } from '@nestjs/config'; +import { StartModelSuccessDto } from '@/infrastructure/dtos/models/start-model-success.dto'; import { ExtensionRepository } from '@/domain/repositories/extension.interface'; import { EngineExtension } from '@/domain/abstracts/engine.abstract'; import { HttpService } from '@nestjs/axios'; +import { ModelSettingParamsDto } from '@/infrastructure/dtos/models/model-setting-params.dto'; @Injectable() export class ModelsUsecases { @@ -27,7 +25,6 @@ export class ModelsUsecases { @Inject('MODEL_REPOSITORY') private readonly modelRepository: Repository, private readonly extensionRepository: ExtensionRepository, - private readonly configService: ConfigService, private readonly httpService: HttpService, ) {} @@ -66,8 +63,7 @@ export class ModelsUsecases { } async remove(id: string) { - const modelsContainerDir = - this.configService.get('CORTEX_MODELS_DIR') ?? './models'; + const modelsContainerDir = this.modelDir(); if (!existsSync(modelsContainerDir)) { return; @@ -86,8 +82,11 @@ export class ModelsUsecases { }); } - async startModel(loadModelDto: LoadModelDto): Promise { - const model = await this.getModelOrThrow(loadModelDto.modelId); + async startModel( + modelId: string, + settings?: ModelSettingParamsDto, + ): Promise { + const model = await this.getModelOrThrow(modelId); const extensions = (await this.extensionRepository.findAll()) ?? []; const engine = extensions.find((e: any) => e.provider === model?.engine) as | EngineExtension @@ -96,27 +95,28 @@ export class ModelsUsecases { if (!engine) { return { message: 'No extension handler found for model', - modelId: loadModelDto.modelId, + modelId: modelId, }; } return engine - .loadModel(model) + .loadModel(model, settings) .then(() => { return { message: 'Model loaded successfully', - modelId: loadModelDto.modelId, + modelId: modelId, }; }) .catch((err) => { console.error(err); return { message: 'Model failed to load', - modelId: loadModelDto.modelId, + modelId: modelId, }; }); } - async stopModel(modelId: string): Promise { + + async stopModel(modelId: string): Promise { const model = await this.getModelOrThrow(modelId); const extensions = (await this.extensionRepository.findAll()) ?? []; const engine = extensions.find((e: any) => e.provider === model?.engine) as @@ -147,11 +147,10 @@ export class ModelsUsecases { }); } - async downloadModel( - downloadModelDto: DownloadModelDto, - callback?: (progress: number) => void, - ) { - const model = await this.getModelOrThrow(downloadModelDto.modelId); + modelDir = () => resolve(__dirname, `../../../models`); + + async downloadModel(modelId: string, callback?: (progress: number) => void) { + const model = await this.getModelOrThrow(modelId); if (model.format === ModelFormat.API) { throw new BadRequestException('Cannot download remote model'); @@ -165,11 +164,10 @@ export class ModelsUsecases { } const fileName = basename(downloadUrl); - const modelsContainerDir = - this.configService.get('CORTEX_MODELS_DIR') ?? './models'; + const modelsContainerDir = this.modelDir(); if (!existsSync(modelsContainerDir)) { - await mkdirSync(modelsContainerDir, { recursive: true }); + mkdirSync(modelsContainerDir, { recursive: true }); } const modelFolder = join(modelsContainerDir, model.id); diff --git a/cortex-js/tsconfig.json b/cortex-js/tsconfig.json index b7b0011be..f52125fca 100644 --- a/cortex-js/tsconfig.json +++ b/cortex-js/tsconfig.json @@ -18,6 +18,7 @@ "strictBindCallApply": true, "forceConsistentCasingInFileNames": true, "noFallthroughCasesInSwitch": true, + "esModuleInterop": true, "paths": { "@/*": ["src/*"] } diff --git a/package.json b/package.json deleted file mode 100644 index a782f0925..000000000 --- a/package.json +++ /dev/null @@ -1,21 +0,0 @@ -{ - "name": "@janhq/cortex", - "version": "1.0.0", - "license": "AGPL-3.0", - "scripts": { - "preinstall": "npm pre-install script; platform specific (MacOS / Windows / Linux)", - "dev": "cd cortex-js && yarn start", - "build": "cd cortex-js && yarn build" - }, - "dependencies": { - "express": "^4.17.1", - "typeorm": "^0.2.37", - "pg": "^8.7.1", - "dotenv": "^10.0.0" - }, - "devDependencies": { - "@types/express": "^4.17.13", - "@types/node": "^14.14.33", - "typescript": "^4F3.5" - } -}