From 050fe6e41f79ee9962103d3b305509e87cdafdea Mon Sep 17 00:00:00 2001 From: James Date: Mon, 20 May 2024 14:41:38 +0700 Subject: [PATCH] feat: add model settings and prompt template from hf Signed-off-by: James --- cortex-js/src/command.module.ts | 2 + .../infrastructure/commanders/chat.command.ts | 2 +- .../commanders/models.command.ts | 2 + .../commanders/models/model-update.command.ts | 90 ++++++++++++ .../commanders/prompt-constants.ts | 37 +++++ .../commanders/shortcuts/run.command.ts | 9 +- .../usecases/models.cli.usecases.ts | 69 ++++++++- .../utils/model-parameter.parser.ts | 133 ++++++++++++++++++ .../src/usecases/cortex/cortex.usecases.ts | 3 +- .../src/usecases/models/models.usecases.ts | 39 ++++- 10 files changed, 379 insertions(+), 7 deletions(-) create mode 100644 cortex-js/src/infrastructure/commanders/models/model-update.command.ts create mode 100644 cortex-js/src/infrastructure/commanders/prompt-constants.ts create mode 100644 cortex-js/src/infrastructure/commanders/utils/model-parameter.parser.ts diff --git a/cortex-js/src/command.module.ts b/cortex-js/src/command.module.ts index d15758746..204bf1887 100644 --- a/cortex-js/src/command.module.ts +++ b/cortex-js/src/command.module.ts @@ -21,6 +21,7 @@ import { ModelRemoveCommand } from './infrastructure/commanders/models/model-rem import { RunCommand } from './infrastructure/commanders/shortcuts/run.command'; import { InitCudaQuestions } from './infrastructure/commanders/questions/cuda.questions'; import { CliUsecasesModule } from './infrastructure/commanders/usecases/cli.usecases.module'; +import { ModelUpdateCommand } from './infrastructure/commanders/models/model-update.command'; @Module({ imports: [ @@ -55,6 +56,7 @@ import { CliUsecasesModule } from './infrastructure/commanders/usecases/cli.usec ModelGetCommand, ModelRemoveCommand, ModelPullCommand, + ModelUpdateCommand, // Shortcuts RunCommand, diff --git a/cortex-js/src/infrastructure/commanders/chat.command.ts b/cortex-js/src/infrastructure/commanders/chat.command.ts index 8efacb093..faffa3ede 100644 --- a/cortex-js/src/infrastructure/commanders/chat.command.ts +++ b/cortex-js/src/infrastructure/commanders/chat.command.ts @@ -32,7 +32,7 @@ export class ChatCommand extends CommandRunner { } @Option({ - flags: '--model ', + flags: '-m, --model ', description: 'Model Id to start chat with', }) parseModelId(value: string) { diff --git a/cortex-js/src/infrastructure/commanders/models.command.ts b/cortex-js/src/infrastructure/commanders/models.command.ts index 631c55774..aafe50fc9 100644 --- a/cortex-js/src/infrastructure/commanders/models.command.ts +++ b/cortex-js/src/infrastructure/commanders/models.command.ts @@ -5,6 +5,7 @@ import { ModelListCommand } from './models/model-list.command'; import { ModelStopCommand } from './models/model-stop.command'; import { ModelPullCommand } from './models/model-pull.command'; import { ModelRemoveCommand } from './models/model-remove.command'; +import { ModelUpdateCommand } from './models/model-update.command'; @SubCommand({ name: 'models', @@ -15,6 +16,7 @@ import { ModelRemoveCommand } from './models/model-remove.command'; ModelListCommand, ModelGetCommand, ModelRemoveCommand, + ModelUpdateCommand, ], description: 'Subcommands for managing models', }) diff --git a/cortex-js/src/infrastructure/commanders/models/model-update.command.ts b/cortex-js/src/infrastructure/commanders/models/model-update.command.ts new file mode 100644 index 000000000..6f583e64c --- /dev/null +++ b/cortex-js/src/infrastructure/commanders/models/model-update.command.ts @@ -0,0 +1,90 @@ +import { CommandRunner, SubCommand, Option } from 'nest-commander'; +import { ModelsCliUsecases } from '../usecases/models.cli.usecases'; +import { exit } from 'node:process'; +import { ModelParameterParser } from '../utils/model-parameter.parser'; +import { + ModelRuntimeParams, + ModelSettingParams, +} from '@/domain/models/model.interface'; + +type UpdateOptions = { + model?: string; + options?: string[]; +}; + +@SubCommand({ name: 'update', description: 'Update configuration of a model.' }) +export class ModelUpdateCommand extends CommandRunner { + constructor(private readonly modelsCliUsecases: ModelsCliUsecases) { + super(); + } + + async run(_input: string[], option: UpdateOptions): Promise { + const modelId = option.model; + if (!modelId) { + console.error('Model Id is required'); + exit(1); + } + + const options = option.options; + if (!options || options.length === 0) { + console.log('Nothing to update'); + exit(0); + } + + const parser = new ModelParameterParser(); + const settingParams: ModelSettingParams = {}; + const runtimeParams: ModelRuntimeParams = {}; + + options.forEach((option) => { + const [key, stringValue] = option.split('='); + if (parser.isModelSettingParam(key)) { + const value = parser.parse(key, stringValue); + // @ts-expect-error did the check so it's safe + settingParams[key] = value; + } else if (parser.isModelRuntimeParam(key)) { + const value = parser.parse(key, stringValue); + // @ts-expect-error did the check so it's safe + runtimeParams[key] = value; + } + }); + + if (Object.keys(settingParams).length > 0) { + const updatedSettingParams = + await this.modelsCliUsecases.updateModelSettingParams( + modelId, + settingParams, + ); + console.log( + 'Updated setting params! New setting params:', + updatedSettingParams, + ); + } + + if (Object.keys(runtimeParams).length > 0) { + await this.modelsCliUsecases.updateModelRuntimeParams( + modelId, + runtimeParams, + ); + console.log('Updated runtime params! New runtime params:', runtimeParams); + } + } + + @Option({ + flags: '-m, --model ', + required: true, + description: 'Model Id to update', + }) + parseModelId(value: string) { + return value; + } + + @Option({ + flags: '-c, --options ', + description: + 'Specify the options to update the model. Syntax: -c option1=value1 option2=value2. For example: cortex models update -c max_tokens=100 temperature=0.5', + }) + parseOptions(option: string, optionsAccumulator: string[] = []): string[] { + optionsAccumulator.push(option); + return optionsAccumulator; + } +} diff --git a/cortex-js/src/infrastructure/commanders/prompt-constants.ts b/cortex-js/src/infrastructure/commanders/prompt-constants.ts new file mode 100644 index 000000000..969b24f43 --- /dev/null +++ b/cortex-js/src/infrastructure/commanders/prompt-constants.ts @@ -0,0 +1,37 @@ +//// HF Chat template +export const OPEN_CHAT_3_5_JINJA = ``; + +export const ZEPHYR_JINJA = `{% for message in messages %} +{% if message['role'] == 'user' %} +{{ '<|user|> +' + message['content'] + eos_token }} +{% elif message['role'] == 'system' %} +{{ '<|system|> +' + message['content'] + eos_token }} +{% elif message['role'] == 'assistant' %} +{{ '<|assistant|> +' + message['content'] + eos_token }} +{% endif %} +{% if loop.last and add_generation_prompt %} +{{ '<|assistant|>' }} +{% endif %} +{% endfor %}`; + +//// Corresponding prompt template +export const OPEN_CHAT_3_5 = `GPT4 Correct User: {prompt}<|end_of_turn|>GPT4 Correct Assistant:`; + +export const ZEPHYR = `<|system|> +{system_message} +<|user|> +{prompt} +<|assistant|> +`; + +export const COMMAND_R = `<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>{system}<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|USER_TOKEN|>{prompt}<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>{response} +`; + +// getting from https://huggingface.co/TheBloke/Llama-2-70B-Chat-GGUF +export const LLAMA_2 = `[INST] <> +{system_message} +<> +{prompt}[/INST]`; diff --git a/cortex-js/src/infrastructure/commanders/shortcuts/run.command.ts b/cortex-js/src/infrastructure/commanders/shortcuts/run.command.ts index d12786519..e0f9b0152 100644 --- a/cortex-js/src/infrastructure/commanders/shortcuts/run.command.ts +++ b/cortex-js/src/infrastructure/commanders/shortcuts/run.command.ts @@ -4,6 +4,7 @@ import { CommandRunner, SubCommand, Option } from 'nest-commander'; import { exit } from 'node:process'; import { ChatUsecases } from '@/usecases/chat/chat.usecases'; import { ChatCliUsecases } from '../usecases/chat.cli.usecases'; +import { defaultCortexCppHost, defaultCortexCppPort } from 'constant'; type RunOptions = { model?: string; @@ -29,7 +30,11 @@ export class RunCommand extends CommandRunner { exit(1); } - await this.cortexUsecases.startCortex(); + await this.cortexUsecases.startCortex( + defaultCortexCppHost, + defaultCortexCppPort, + false, + ); await this.modelsUsecases.startModel(modelId); const chatCliUsecases = new ChatCliUsecases( this.chatUsecases, @@ -39,7 +44,7 @@ export class RunCommand extends CommandRunner { } @Option({ - flags: '--model ', + flags: '-m, --model ', description: 'Model Id to start chat with', }) parseModelId(value: string) { diff --git a/cortex-js/src/infrastructure/commanders/usecases/models.cli.usecases.ts b/cortex-js/src/infrastructure/commanders/usecases/models.cli.usecases.ts index 509abd565..e3e242997 100644 --- a/cortex-js/src/infrastructure/commanders/usecases/models.cli.usecases.ts +++ b/cortex-js/src/infrastructure/commanders/usecases/models.cli.usecases.ts @@ -1,12 +1,24 @@ import { exit } from 'node:process'; import { ModelsUsecases } from '@/usecases/models/models.usecases'; -import { Model, ModelFormat } from '@/domain/models/model.interface'; +import { + Model, + ModelFormat, + ModelRuntimeParams, + ModelSettingParams, +} from '@/domain/models/model.interface'; import { CreateModelDto } from '@/infrastructure/dtos/models/create-model.dto'; import { HuggingFaceRepoData } from '@/domain/models/huggingface.interface'; import { gguf } from '@huggingface/gguf'; import { InquirerService } from 'nest-commander'; import { Inject, Injectable } from '@nestjs/common'; import { Presets, SingleBar } from 'cli-progress'; +import { + LLAMA_2, + OPEN_CHAT_3_5, + OPEN_CHAT_3_5_JINJA, + ZEPHYR, + ZEPHYR_JINJA, +} from '../prompt-constants'; const AllQuantizations = [ 'Q3_K_S', @@ -49,6 +61,20 @@ export class ModelsCliUsecases { await this.modelsUsecases.stopModel(modelId); } + async updateModelSettingParams( + modelId: string, + settingParams: ModelSettingParams, + ): Promise { + return this.modelsUsecases.updateModelSettingParams(modelId, settingParams); + } + + async updateModelRuntimeParams( + modelId: string, + runtimeParams: ModelRuntimeParams, + ): Promise { + return this.modelsUsecases.updateModelRuntimeParams(modelId, runtimeParams); + } + private async getModelOrStop(modelId: string): Promise { const model = await this.modelsUsecases.findOne(modelId); if (!model) { @@ -103,10 +129,16 @@ export class ModelsCliUsecases { if (!sibling) throw 'No expected quantization found'; let stopWord = ''; + let promptTemplate = LLAMA_2; + try { const { metadata } = await gguf(sibling.downloadUrl!); // @ts-expect-error "tokenizer.ggml.eos_token_id" const index = metadata['tokenizer.ggml.eos_token_id']; + // @ts-expect-error "tokenizer.ggml.eos_token_id" + const hfChatTemplate = metadata['tokenizer.chat_template']; + promptTemplate = this.guessPromptTemplateFromHuggingFace(hfChatTemplate); + // @ts-expect-error "tokenizer.ggml.tokens" stopWord = metadata['tokenizer.ggml.tokens'][index] ?? ''; } catch (err) { @@ -129,7 +161,9 @@ export class ModelsCliUsecases { version: '', format: ModelFormat.GGUF, description: '', - settings: {}, + settings: { + prompt_template: promptTemplate, + }, parameters: { stop: stopWords, }, @@ -144,6 +178,37 @@ export class ModelsCliUsecases { await this.modelsUsecases.create(model); } + // TODO: move this to somewhere else, should be reused by API as well. Maybe in a separate service / provider? + private guessPromptTemplateFromHuggingFace(jinjaCode?: string): string { + if (!jinjaCode) { + console.log('No jinja code provided. Returning default LLAMA_2'); + return LLAMA_2; + } + + if (typeof jinjaCode !== 'string') { + console.log( + `Invalid jinja code provided (type is ${typeof jinjaCode}). Returning default LLAMA_2`, + ); + return LLAMA_2; + } + + switch (jinjaCode) { + case ZEPHYR_JINJA: + return ZEPHYR; + + case OPEN_CHAT_3_5_JINJA: + return OPEN_CHAT_3_5; + + default: + console.log( + 'Unknown jinja code:', + jinjaCode, + 'Returning default LLAMA_2', + ); + return LLAMA_2; + } + } + private async fetchHuggingFaceRepoData(repoId: string) { const sanitizedUrl = this.toHuggingFaceUrl(repoId); diff --git a/cortex-js/src/infrastructure/commanders/utils/model-parameter.parser.ts b/cortex-js/src/infrastructure/commanders/utils/model-parameter.parser.ts new file mode 100644 index 000000000..c8ca62650 --- /dev/null +++ b/cortex-js/src/infrastructure/commanders/utils/model-parameter.parser.ts @@ -0,0 +1,133 @@ +// Make this class injectable +export class ModelParameterParser { + private modelSettingParamTypes: { [key: string]: string } = { + ctx_len: 'number', + ngl: 'number', + embedding: 'boolean', + n_parallel: 'number', + cpu_threads: 'number', + prompt_template: 'string', + system_prompt: 'string', + ai_prompt: 'string', + user_prompt: 'string', + llama_model_path: 'string', + mmproj: 'string', + cont_batching: 'boolean', + vision_model: 'boolean', + text_model: 'boolean', + }; + + private modelRuntimeParamTypes: { [key: string]: string } = { + temperature: 'number', + token_limit: 'number', + top_k: 'number', + top_p: 'number', + stream: 'boolean', + max_tokens: 'number', + stop: 'string[]', + frequency_penalty: 'number', + presence_penalty: 'number', + }; + + isModelSettingParam(key: string): boolean { + return key in this.modelSettingParamTypes; + } + + isModelRuntimeParam(key: string): boolean { + return key in this.modelRuntimeParamTypes; + } + + parse(key: string, value: string): boolean | number | string | string[] { + if (this.isModelSettingParam(key)) { + return this.parseModelSettingParams(key, value); + } + + if (this.isModelRuntimeParam(key)) { + return this.parseModelRuntimeParams(key, value); + } + + throw new Error(`Invalid setting key: ${key}`); + } + + private parseModelSettingParams( + key: string, + value: string, + ): boolean | number | string | string[] { + const settingType = this.modelSettingParamTypes[key]; + if (!settingType) { + throw new Error(`Invalid setting key: ${key}`); + } + + switch (settingType) { + case 'string': + return value; + + case 'number': + return this.toNumber(value); + + case 'string[]': + return this.toStringArray(value); + + case 'boolean': + return this.toBoolean(value); + + default: + throw new Error('Invalid setting type'); + } + } + + private parseModelRuntimeParams( + key: string, + value: string, + ): boolean | number | string | string[] { + const settingType = this.modelRuntimeParamTypes[key]; + if (!settingType) { + throw new Error(`Invalid setting key: ${key}`); + } + + switch (settingType) { + case 'string': + return value; + + case 'number': + return this.toNumber(value); + + case 'string[]': + return this.toStringArray(value); + + case 'boolean': + return this.toBoolean(value); + + default: + throw new Error('Invalid setting type'); + } + } + + private toNumber(str: string): number { + const num = parseFloat(str.trim()); + if (isNaN(num)) { + throw new Error(`Invalid number value: ${str}`); + } + return num; + } + + private toStringArray(str: string, delimiter: string = ','): string[] { + return str.split(delimiter).map((s) => s.trim()); + } + + private toBoolean(str: string): boolean { + const normalizedStr = str.trim().toLowerCase(); + switch (normalizedStr) { + case '1': + case 'true': + return true; + + case '0': + case 'false': + return false; + + default: + throw new Error(`Invalid boolean value: ${str}`); + } + } +} diff --git a/cortex-js/src/usecases/cortex/cortex.usecases.ts b/cortex-js/src/usecases/cortex/cortex.usecases.ts index 1decf6b97..035aa1486 100644 --- a/cortex-js/src/usecases/cortex/cortex.usecases.ts +++ b/cortex-js/src/usecases/cortex/cortex.usecases.ts @@ -15,6 +15,7 @@ export class CortexUsecases { async startCortex( host: string = defaultCortexCppHost, port: number = defaultCortexCppPort, + verbose: boolean = false, ): Promise { if (this.cortexProcess) { return { @@ -38,7 +39,7 @@ export class CortexUsecases { this.cortexProcess = spawn(cortexCppPath, args, { detached: false, cwd: join(__dirname, '../../../cortex-cpp'), - stdio: 'inherit', + stdio: verbose ? 'inherit' : undefined, env: { ...process.env, CUDA_VISIBLE_DEVICES: '0', diff --git a/cortex-js/src/usecases/models/models.usecases.ts b/cortex-js/src/usecases/models/models.usecases.ts index d7843d7d8..3f960497a 100644 --- a/cortex-js/src/usecases/models/models.usecases.ts +++ b/cortex-js/src/usecases/models/models.usecases.ts @@ -3,7 +3,12 @@ import { UpdateModelDto } from '@/infrastructure/dtos/models/update-model.dto'; import { ModelEntity } from '@/infrastructure/entities/model.entity'; import { BadRequestException, Inject, Injectable } from '@nestjs/common'; import { Repository } from 'typeorm'; -import { Model, ModelFormat } from '@/domain/models/model.interface'; +import { + Model, + ModelFormat, + ModelRuntimeParams, + ModelSettingParams, +} from '@/domain/models/model.interface'; import { ModelNotFoundException } from '@/infrastructure/exception/model-not-found.exception'; import { join, basename, resolve } from 'path'; import { @@ -62,6 +67,38 @@ export class ModelsUsecases { return this.modelRepository.update(id, updateModelDto); } + async updateModelSettingParams( + id: string, + settingParams: ModelSettingParams, + ): Promise { + const model = await this.getModelOrThrow(id); + const currentSettingParams = model.settings; + const updateDto: UpdateModelDto = { + settings: { + ...currentSettingParams, + ...settingParams, + }, + }; + await this.update(id, updateDto); + return updateDto.settings ?? {}; + } + + async updateModelRuntimeParams( + id: string, + runtimeParams: ModelRuntimeParams, + ): Promise { + const model = await this.getModelOrThrow(id); + const currentRuntimeParams = model.parameters; + const updateDto: UpdateModelDto = { + parameters: { + ...currentRuntimeParams, + ...runtimeParams, + }, + }; + await this.update(id, updateDto); + return updateDto.parameters ?? {}; + } + async remove(id: string) { const modelsContainerDir = this.modelDir();