Skip to content

Commit

Permalink
feat: cortex onboarding (#565)
Browse files Browse the repository at this point in the history
  • Loading branch information
louis-jan authored May 15, 2024
1 parent 952d8a2 commit ba13676
Show file tree
Hide file tree
Showing 9 changed files with 226 additions and 28 deletions.
3 changes: 3 additions & 0 deletions cortex-js/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,12 @@
"@nestjs/mapped-types": "*",
"@nestjs/platform-express": "^10.0.0",
"@nestjs/swagger": "^7.3.1",
"@terascope/fetch-github-release": "^0.8.8",
"axios": "^1.6.8",
"class-transformer": "^0.5.1",
"class-validator": "^0.14.1",
"cli-progress": "^3.12.0",
"decompress": "^4.2.1",
"nest-commander": "^3.13.0",
"readline": "^1.3.0",
"reflect-metadata": "^0.2.0",
Expand All @@ -52,6 +54,7 @@
"@nestjs/testing": "^10.0.0",
"@nestjs/typeorm": "^10.0.2",
"@types/cli-progress": "^3.11.5",
"@types/decompress": "^4.2.7",
"@types/express": "^4.17.17",
"@types/jest": "^29.5.2",
"@types/node": "^20.12.9",
Expand Down
6 changes: 6 additions & 0 deletions cortex-js/src/command.module.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@ import { ModelsCommand } from './infrastructure/commanders/models.command';
import { StartCommand } from './infrastructure/commanders/start.command';
import { ExtensionModule } from './infrastructure/repositories/extensions/extension.module';
import { ChatModule } from './usecases/chat/chat.module';
import { InitCommand } from './infrastructure/commanders/init.command';
import { HttpModule } from '@nestjs/axios';
import { CreateInitQuestions } from './infrastructure/commanders/inquirer/init.questions';

@Module({
imports: [
Expand All @@ -24,6 +27,7 @@ import { ChatModule } from './usecases/chat/chat.module';
CortexModule,
ChatModule,
ExtensionModule,
HttpModule,
],
providers: [
BasicCommand,
Expand All @@ -32,6 +36,8 @@ import { ChatModule } from './usecases/chat/chat.module';
ServeCommand,
InferenceCommand,
StartCommand,
InitCommand,
CreateInitQuestions,
],
})
export class CommandModule {}
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,16 @@ import { InferenceCommand } from './inference.command';
import { ModelsCommand } from './models.command';
import { CortexUsecases } from '@/usecases/cortex/cortex.usecases';
import { defaultCortexJsHost, defaultCortexJsPort } from 'constant';
import { InitCommand } from './init.command';

@RootCommand({
subCommands: [ModelsCommand, PullCommand, ServeCommand, InferenceCommand],
subCommands: [
ModelsCommand,
PullCommand,
ServeCommand,
InferenceCommand,
InitCommand,
],
})
export class BasicCommand extends CommandRunner {
constructor(private readonly cortexUsecases: CortexUsecases) {
Expand Down
140 changes: 140 additions & 0 deletions cortex-js/src/infrastructure/commanders/init.command.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
import { createWriteStream, existsSync, rmSync } from 'fs';
import { CommandRunner, SubCommand, InquirerService } from 'nest-commander';
import { resolve } from 'path';
import { HttpService } from '@nestjs/axios';
import { Presets, SingleBar } from 'cli-progress';
import decompress from 'decompress';

@SubCommand({ name: 'init', aliases: ['setup'] })
export class InitCommand extends CommandRunner {
CORTEX_RELEASES_URL = 'https://api.github.com/repos/janhq/cortex/releases';

constructor(
private readonly httpService: HttpService,
private readonly inquirerService: InquirerService,
) {
super();
}

async run(input: string[], options?: any): Promise<void> {
options = await this.inquirerService.ask('create-init-questions', options);
const version = input[0] ?? 'latest';

await this.download(this.parseEngineFileName(options), version);
}

download = async (
engineFileName: string,
version: string = 'latest',
): Promise<any> => {
const res = await this.httpService
.get(
this.CORTEX_RELEASES_URL + `${version === 'latest' ? '/latest' : ''}`,
{
headers: {
'X-GitHub-Api-Version': '2022-11-28',
Accept: 'application/vnd.github+json',
},
},
)
.toPromise();

if (!res?.data) {
console.log('Failed to fetch releases');
process.exit(1);
}

let release = res?.data;
if (Array.isArray(res?.data)) {
release = Array(res?.data)[0].find(
(e) => e.name === version.replace('v', ''),
);
}
const toDownloadAsset = release.assets.find((s: any) =>
s.name.includes(engineFileName),
);

if (!toDownloadAsset) {
console.log(`Could not find engine file ${engineFileName}`);
process.exit(1);
}

console.log(`Downloading engine file ${engineFileName}`);
const engineDir = resolve(this.rootDir(), 'cortex-cpp');
if (existsSync(engineDir)) rmSync(engineDir, { recursive: true });

const download = await this.httpService
.get(toDownloadAsset.browser_download_url, {
responseType: 'stream',
})
.toPromise();
if (!download) {
throw new Error('Failed to download model');
}

const destination = resolve(this.rootDir(), toDownloadAsset.name);

await new Promise((resolve, reject) => {
const writer = createWriteStream(destination);
let receivedBytes = 0;
const totalBytes = download.headers['content-length'];

writer.on('finish', () => {
bar.stop();
resolve(true);
});

writer.on('error', (error) => {
bar.stop();
reject(error);
});

const bar = new SingleBar({}, Presets.shades_classic);
bar.start(100, 0);

download.data.on('data', (chunk: any) => {
receivedBytes += chunk.length;
bar.update(Math.floor((receivedBytes / totalBytes) * 100));
});

download.data.pipe(writer);
});

try {
await decompress(
resolve(this.rootDir(), destination),
resolve(this.rootDir()),
);
} catch (e) {
console.log(e);
process.exit(1);
}
process.exit(0);
};

parseEngineFileName = (options: {
runMode?: 'CPU' | 'GPU';
gpuType?: 'Nvidia' | 'Others (Vulkan)';
instructions?: 'AVX' | 'AVX2' | 'AVX-512' | undefined;
cudaVersion?: '11' | '12';
}) => {
const platform =
process.platform === 'win32'
? 'windows'
: process.platform === 'darwin'
? 'mac'
: process.platform;
const arch = process.arch === 'arm64' ? process.arch : 'amd64';
const cudaVersion =
options.runMode === 'GPU'
? options.gpuType === 'Nvidia'
? '-cuda-' + (options.cudaVersion === '11' ? '11.7' : '12.2')
: '-vulkan'
: '';
const instructions = options.instructions ? `-${options.instructions}` : '';
const engineName = `${platform}-${arch}${instructions.toLowerCase()}${cudaVersion}`;
return `${engineName}.tar.gz`;
};

rootDir = () => resolve(__dirname, `../../../`);
}
39 changes: 39 additions & 0 deletions cortex-js/src/infrastructure/commanders/inquirer/init.questions.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
import { Question, QuestionSet } from 'nest-commander';

@QuestionSet({ name: 'create-init-questions' })
export class CreateInitQuestions {
@Question({
type: 'list',
message: 'Select run mode',
name: 'runMode',
default: 'CPU',
choices: ['CPU', 'GPU'],
when: () => process.platform !== 'darwin',
})
parseRunMode(val: string) {
return val;
}

@Question({
type: 'list',
message: 'Select GPU type',
name: 'gpuType',
default: 'Nvidia',
choices: ['Nvidia', 'Others (Vulkan)'],
when: (answers: any) => answers.runMode === 'GPU',
})
parseGPUType(val: string) {
return val;
}

@Question({
type: 'list',
message: 'Select CPU instructions set',
name: 'instructions',
choices: ['AVX2', 'AVX', 'AVX-512'],
when: () => process.platform !== 'darwin',
})
parseContent(val: string) {
return val;
}
}
8 changes: 8 additions & 0 deletions cortex-js/src/infrastructure/commanders/start.command.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ import { CortexUsecases } from '@/usecases/cortex/cortex.usecases';
import { ModelsUsecases } from '@/usecases/models/models.usecases';
import { CommandRunner, SubCommand } from 'nest-commander';
import { LoadModelDto } from '../dtos/models/load-model.dto';
import { resolve } from 'path';
import { existsSync } from 'fs';

@SubCommand({ name: 'start', aliases: ['run'] })
export class StartCommand extends CommandRunner {
Expand All @@ -26,6 +28,10 @@ export class StartCommand extends CommandRunner {
}

private async startCortex() {
if (!existsSync(resolve(this.rootDir(), 'cortex-cpp'))) {
console.log('Please init the cortex by running cortex init command!');
process.exit(0);
}
const host = '127.0.0.1';
const port = '3928';
return this.cortexUsecases.startCortex(host, port);
Expand All @@ -45,4 +51,6 @@ export class StartCommand extends CommandRunner {
const loadModelDto: LoadModelDto = { modelId, settings };
return this.modelsUsecases.startModel(loadModelDto);
}

rootDir = () => resolve(__dirname, `../../../`);
}
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ export default class CortexProvider extends OAIEngineExtension {
const cpuThreadCount = 1; // TODO: NamH Math.max(1, nitroResourceProbe.numCpuPhysicalCore);
const modelSettings = {
// This is critical and requires real CPU physical core count (or performance core)
model: model.id,
cpu_threads: cpuThreadCount,
...model.settings,
llama_model_path: modelBinaryLocalPath,
Expand Down
47 changes: 20 additions & 27 deletions cortex-js/src/usecases/cortex/cortex.usecases.ts
Original file line number Diff line number Diff line change
@@ -1,19 +1,14 @@
import { Injectable, InternalServerErrorException } from '@nestjs/common';
import { ConfigService } from '@nestjs/config';
import { Injectable } from '@nestjs/common';
import { ChildProcess, spawn } from 'child_process';
import { join } from 'path';
import { existsSync } from 'fs';
import { CortexOperationSuccessfullyDto } from '@/infrastructure/dtos/cortex/cortex-operation-successfully.dto';
import { HttpService } from '@nestjs/axios';

@Injectable()
export class CortexUsecases {
private cortexProcess: ChildProcess | undefined;

constructor(
private readonly configService: ConfigService,
private readonly httpService: HttpService,
) {}
constructor(private readonly httpService: HttpService) {}

async startCortex(
host: string,
Expand All @@ -26,29 +21,27 @@ export class CortexUsecases {
};
}

const binaryPath = this.configService.get<string>('CORTEX_BINARY_PATH');
if (!binaryPath || !existsSync(binaryPath)) {
throw new InternalServerErrorException('Cortex binary not found');
}

const args: string[] = ['1', host, port];
// go up one level to get the binary folder, have to also work on windows
const binaryFolder = join(binaryPath, '..');

this.cortexProcess = spawn(binaryPath, args, {
detached: false,
cwd: binaryFolder,
stdio: 'inherit',
env: {
...process.env,
// TODO: NamH need to get below information
// CUDA_VISIBLE_DEVICES: executableOptions.cudaVisibleDevices,
// // Vulkan - Support 1 device at a time for now
// ...(executableOptions.vkVisibleDevices?.length > 0 && {
// GGML_VULKAN_DEVICE: executableOptions.vkVisibleDevices[0],
// }),
// const binaryFolder = join(binaryPath, '..');
this.cortexProcess = spawn(
join(__dirname, '../../../cortex-cpp/cortex-cpp'),
args,
{
detached: false,
cwd: join(__dirname, '../../../cortex-cpp'),
stdio: 'inherit',
env: {
...process.env,
// TODO: NamH need to get below information
// CUDA_VISIBLE_DEVICES: executableOptions.cudaVisibleDevices,
// // Vulkan - Support 1 device at a time for now
// ...(executableOptions.vkVisibleDevices?.length > 0 && {
// GGML_VULKAN_DEVICE: executableOptions.vkVisibleDevices[0],
// }),
},
},
});
);

this.registerCortexEvents();

Expand Down
1 change: 1 addition & 0 deletions cortex-js/tsconfig.json
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
"strictBindCallApply": true,
"forceConsistentCasingInFileNames": true,
"noFallthroughCasesInSwitch": true,
"esModuleInterop": true,
"paths": {
"@/*": ["src/*"]
}
Expand Down

0 comments on commit ba13676

Please sign in to comment.