From 050fe6e41f79ee9962103d3b305509e87cdafdea Mon Sep 17 00:00:00 2001
From: James <namnh0122@gmail.com>
Date: Mon, 20 May 2024 14:41:38 +0700
Subject: [PATCH] feat: add model settings and prompt template from hf

Signed-off-by: James <namnh0122@gmail.com>
---
 cortex-js/src/command.module.ts               |   2 +
 .../infrastructure/commanders/chat.command.ts |   2 +-
 .../commanders/models.command.ts              |   2 +
 .../commanders/models/model-update.command.ts |  90 ++++++++++++
 .../commanders/prompt-constants.ts            |  37 +++++
 .../commanders/shortcuts/run.command.ts       |   9 +-
 .../usecases/models.cli.usecases.ts           |  69 ++++++++-
 .../utils/model-parameter.parser.ts           | 133 ++++++++++++++++++
 .../src/usecases/cortex/cortex.usecases.ts    |   3 +-
 .../src/usecases/models/models.usecases.ts    |  39 ++++-
 10 files changed, 379 insertions(+), 7 deletions(-)
 create mode 100644 cortex-js/src/infrastructure/commanders/models/model-update.command.ts
 create mode 100644 cortex-js/src/infrastructure/commanders/prompt-constants.ts
 create mode 100644 cortex-js/src/infrastructure/commanders/utils/model-parameter.parser.ts

diff --git a/cortex-js/src/command.module.ts b/cortex-js/src/command.module.ts
index d15758746..204bf1887 100644
--- a/cortex-js/src/command.module.ts
+++ b/cortex-js/src/command.module.ts
@@ -21,6 +21,7 @@ import { ModelRemoveCommand } from './infrastructure/commanders/models/model-rem
 import { RunCommand } from './infrastructure/commanders/shortcuts/run.command';
 import { InitCudaQuestions } from './infrastructure/commanders/questions/cuda.questions';
 import { CliUsecasesModule } from './infrastructure/commanders/usecases/cli.usecases.module';
+import { ModelUpdateCommand } from './infrastructure/commanders/models/model-update.command';
 
 @Module({
   imports: [
@@ -55,6 +56,7 @@ import { CliUsecasesModule } from './infrastructure/commanders/usecases/cli.usec
     ModelGetCommand,
     ModelRemoveCommand,
     ModelPullCommand,
+    ModelUpdateCommand,
 
     // Shortcuts
     RunCommand,
diff --git a/cortex-js/src/infrastructure/commanders/chat.command.ts b/cortex-js/src/infrastructure/commanders/chat.command.ts
index 8efacb093..faffa3ede 100644
--- a/cortex-js/src/infrastructure/commanders/chat.command.ts
+++ b/cortex-js/src/infrastructure/commanders/chat.command.ts
@@ -32,7 +32,7 @@ export class ChatCommand extends CommandRunner {
   }
 
   @Option({
-    flags: '--model <model_id>',
+    flags: '-m, --model <model_id>',
     description: 'Model Id to start chat with',
   })
   parseModelId(value: string) {
diff --git a/cortex-js/src/infrastructure/commanders/models.command.ts b/cortex-js/src/infrastructure/commanders/models.command.ts
index 631c55774..aafe50fc9 100644
--- a/cortex-js/src/infrastructure/commanders/models.command.ts
+++ b/cortex-js/src/infrastructure/commanders/models.command.ts
@@ -5,6 +5,7 @@ import { ModelListCommand } from './models/model-list.command';
 import { ModelStopCommand } from './models/model-stop.command';
 import { ModelPullCommand } from './models/model-pull.command';
 import { ModelRemoveCommand } from './models/model-remove.command';
+import { ModelUpdateCommand } from './models/model-update.command';
 
 @SubCommand({
   name: 'models',
@@ -15,6 +16,7 @@ import { ModelRemoveCommand } from './models/model-remove.command';
     ModelListCommand,
     ModelGetCommand,
     ModelRemoveCommand,
+    ModelUpdateCommand,
   ],
   description: 'Subcommands for managing models',
 })
diff --git a/cortex-js/src/infrastructure/commanders/models/model-update.command.ts b/cortex-js/src/infrastructure/commanders/models/model-update.command.ts
new file mode 100644
index 000000000..6f583e64c
--- /dev/null
+++ b/cortex-js/src/infrastructure/commanders/models/model-update.command.ts
@@ -0,0 +1,90 @@
+import { CommandRunner, SubCommand, Option } from 'nest-commander';
+import { ModelsCliUsecases } from '../usecases/models.cli.usecases';
+import { exit } from 'node:process';
+import { ModelParameterParser } from '../utils/model-parameter.parser';
+import {
+  ModelRuntimeParams,
+  ModelSettingParams,
+} from '@/domain/models/model.interface';
+
+type UpdateOptions = {
+  model?: string;
+  options?: string[];
+};
+
+@SubCommand({ name: 'update', description: 'Update configuration of a model.' })
+export class ModelUpdateCommand extends CommandRunner {
+  constructor(private readonly modelsCliUsecases: ModelsCliUsecases) {
+    super();
+  }
+
+  async run(_input: string[], option: UpdateOptions): Promise<void> {
+    const modelId = option.model;
+    if (!modelId) {
+      console.error('Model Id is required');
+      exit(1);
+    }
+
+    const options = option.options;
+    if (!options || options.length === 0) {
+      console.log('Nothing to update');
+      exit(0);
+    }
+
+    const parser = new ModelParameterParser();
+    const settingParams: ModelSettingParams = {};
+    const runtimeParams: ModelRuntimeParams = {};
+
+    options.forEach((option) => {
+      const [key, stringValue] = option.split('=');
+      if (parser.isModelSettingParam(key)) {
+        const value = parser.parse(key, stringValue);
+        // @ts-expect-error did the check so it's safe
+        settingParams[key] = value;
+      } else if (parser.isModelRuntimeParam(key)) {
+        const value = parser.parse(key, stringValue);
+        // @ts-expect-error did the check so it's safe
+        runtimeParams[key] = value;
+      }
+    });
+
+    if (Object.keys(settingParams).length > 0) {
+      const updatedSettingParams =
+        await this.modelsCliUsecases.updateModelSettingParams(
+          modelId,
+          settingParams,
+        );
+      console.log(
+        'Updated setting params! New setting params:',
+        updatedSettingParams,
+      );
+    }
+
+    if (Object.keys(runtimeParams).length > 0) {
+      await this.modelsCliUsecases.updateModelRuntimeParams(
+        modelId,
+        runtimeParams,
+      );
+      console.log('Updated runtime params! New runtime params:', runtimeParams);
+    }
+  }
+
+  @Option({
+    flags: '-m, --model <model_id>',
+    required: true,
+    description: 'Model Id to update',
+  })
+  parseModelId(value: string) {
+    return value;
+  }
+
+  @Option({
+    flags: '-c, --options <options...>',
+    description:
+      'Specify the options to update the model. Syntax: -c option1=value1 option2=value2. For example: cortex models update -c max_tokens=100 temperature=0.5',
+  })
+  parseOptions(option: string, optionsAccumulator: string[] = []): string[] {
+    optionsAccumulator.push(option);
+    return optionsAccumulator;
+  }
+}
diff --git a/cortex-js/src/infrastructure/commanders/prompt-constants.ts b/cortex-js/src/infrastructure/commanders/prompt-constants.ts
new file mode 100644
index 000000000..969b24f43
--- /dev/null
+++ b/cortex-js/src/infrastructure/commanders/prompt-constants.ts
@@ -0,0 +1,37 @@
+//// HF Chat template
+export const OPEN_CHAT_3_5_JINJA = ``;
+
+export const ZEPHYR_JINJA = `{% for message in messages %}
+{% if message['role'] == 'user' %}
+{{ '<|user|>
+' + message['content'] + eos_token }}
+{% elif message['role'] == 'system' %}
+{{ '<|system|>
+' + message['content'] + eos_token }}
+{% elif message['role'] == 'assistant' %}
+{{ '<|assistant|>
+'  + message['content'] + eos_token }}
+{% endif %}
+{% if loop.last and add_generation_prompt %}
+{{ '<|assistant|>' }}
+{% endif %}
+{% endfor %}`;
+
+//// Corresponding prompt template
+export const OPEN_CHAT_3_5 = `GPT4 Correct User: {prompt}<|end_of_turn|>GPT4 Correct Assistant:`;
+
+export const ZEPHYR = `<|system|>
+{system_message}</s>
+<|user|>
+{prompt}</s>
+<|assistant|>
+`;
+
+export const COMMAND_R = `<BOS_TOKEN><|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>{system}<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|USER_TOKEN|>{prompt}<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>{response}
+`;
+
+// getting from https://huggingface.co/TheBloke/Llama-2-70B-Chat-GGUF
+export const LLAMA_2 = `[INST] <<SYS>>
+{system_message}
+<</SYS>>
+{prompt}[/INST]`;
diff --git a/cortex-js/src/infrastructure/commanders/shortcuts/run.command.ts b/cortex-js/src/infrastructure/commanders/shortcuts/run.command.ts
index d12786519..e0f9b0152 100644
--- a/cortex-js/src/infrastructure/commanders/shortcuts/run.command.ts
+++ b/cortex-js/src/infrastructure/commanders/shortcuts/run.command.ts
@@ -4,6 +4,7 @@ import { CommandRunner, SubCommand, Option } from 'nest-commander';
 import { exit } from 'node:process';
 import { ChatUsecases } from '@/usecases/chat/chat.usecases';
 import { ChatCliUsecases } from '../usecases/chat.cli.usecases';
+import { defaultCortexCppHost, defaultCortexCppPort } from 'constant';
 
 type RunOptions = {
   model?: string;
@@ -29,7 +30,11 @@ export class RunCommand extends CommandRunner {
       exit(1);
     }
 
-    await this.cortexUsecases.startCortex();
+    await this.cortexUsecases.startCortex(
+      defaultCortexCppHost,
+      defaultCortexCppPort,
+      false,
+    );
     await this.modelsUsecases.startModel(modelId);
     const chatCliUsecases = new ChatCliUsecases(
       this.chatUsecases,
@@ -39,7 +44,7 @@ export class RunCommand extends CommandRunner {
   }
 
   @Option({
-    flags: '--model <model_id>',
+    flags: '-m, --model <model_id>',
     description: 'Model Id to start chat with',
   })
   parseModelId(value: string) {
diff --git a/cortex-js/src/infrastructure/commanders/usecases/models.cli.usecases.ts b/cortex-js/src/infrastructure/commanders/usecases/models.cli.usecases.ts
index 509abd565..e3e242997 100644
--- a/cortex-js/src/infrastructure/commanders/usecases/models.cli.usecases.ts
+++ b/cortex-js/src/infrastructure/commanders/usecases/models.cli.usecases.ts
@@ -1,12 +1,24 @@
 import { exit } from 'node:process';
 import { ModelsUsecases } from '@/usecases/models/models.usecases';
-import { Model, ModelFormat } from '@/domain/models/model.interface';
+import {
+  Model,
+  ModelFormat,
+  ModelRuntimeParams,
+  ModelSettingParams,
+} from '@/domain/models/model.interface';
 import { CreateModelDto } from '@/infrastructure/dtos/models/create-model.dto';
 import { HuggingFaceRepoData } from '@/domain/models/huggingface.interface';
 import { gguf } from '@huggingface/gguf';
 import { InquirerService } from 'nest-commander';
 import { Inject, Injectable } from '@nestjs/common';
 import { Presets, SingleBar } from 'cli-progress';
+import {
+  LLAMA_2,
+  OPEN_CHAT_3_5,
+  OPEN_CHAT_3_5_JINJA,
+  ZEPHYR,
+  ZEPHYR_JINJA,
+} from '../prompt-constants';
 
 const AllQuantizations = [
   'Q3_K_S',
@@ -49,6 +61,20 @@ export class ModelsCliUsecases {
     await this.modelsUsecases.stopModel(modelId);
   }
 
+  async updateModelSettingParams(
+    modelId: string,
+    settingParams: ModelSettingParams,
+  ): Promise<ModelSettingParams> {
+    return this.modelsUsecases.updateModelSettingParams(modelId, settingParams);
+  }
+
+  async updateModelRuntimeParams(
+    modelId: string,
+    runtimeParams: ModelRuntimeParams,
+  ): Promise<ModelRuntimeParams> {
+    return this.modelsUsecases.updateModelRuntimeParams(modelId, runtimeParams);
+  }
+
   private async getModelOrStop(modelId: string): Promise<Model> {
     const model = await this.modelsUsecases.findOne(modelId);
     if (!model) {
@@ -103,10 +129,16 @@ export class ModelsCliUsecases {
     if (!sibling) throw 'No expected quantization found';
 
     let stopWord = '';
+    let promptTemplate = LLAMA_2;
+
     try {
       const { metadata } = await gguf(sibling.downloadUrl!);
       // @ts-expect-error "tokenizer.ggml.eos_token_id"
       const index = metadata['tokenizer.ggml.eos_token_id'];
+      // @ts-expect-error "tokenizer.ggml.eos_token_id"
+      const hfChatTemplate = metadata['tokenizer.chat_template'];
+      promptTemplate = this.guessPromptTemplateFromHuggingFace(hfChatTemplate);
+
       // @ts-expect-error "tokenizer.ggml.tokens"
       stopWord = metadata['tokenizer.ggml.tokens'][index] ?? '';
     } catch (err) {
@@ -129,7 +161,9 @@ export class ModelsCliUsecases {
       version: '',
       format: ModelFormat.GGUF,
       description: '',
-      settings: {},
+      settings: {
+        prompt_template: promptTemplate,
+      },
       parameters: {
         stop: stopWords,
       },
@@ -144,6 +178,37 @@ export class ModelsCliUsecases {
       await this.modelsUsecases.create(model);
   }
 
+  // TODO: move this to somewhere else, should be reused by API as well. Maybe in a separate service / provider?
+  private guessPromptTemplateFromHuggingFace(jinjaCode?: string): string {
+    if (!jinjaCode) {
+      console.log('No jinja code provided. Returning default LLAMA_2');
+      return LLAMA_2;
+    }
+
+    if (typeof jinjaCode !== 'string') {
+      console.log(
+        `Invalid jinja code provided (type is ${typeof jinjaCode}). Returning default LLAMA_2`,
+      );
+      return LLAMA_2;
+    }
+
+    switch (jinjaCode) {
+      case ZEPHYR_JINJA:
+        return ZEPHYR;
+
+      case OPEN_CHAT_3_5_JINJA:
+        return OPEN_CHAT_3_5;
+
+      default:
+        console.log(
+          'Unknown jinja code:',
+          jinjaCode,
+          'Returning default LLAMA_2',
+        );
+        return LLAMA_2;
+    }
+  }
+
   private async fetchHuggingFaceRepoData(repoId: string) {
     const sanitizedUrl = this.toHuggingFaceUrl(repoId);
 
diff --git a/cortex-js/src/infrastructure/commanders/utils/model-parameter.parser.ts b/cortex-js/src/infrastructure/commanders/utils/model-parameter.parser.ts
new file mode 100644
index 000000000..c8ca62650
--- /dev/null
+++ b/cortex-js/src/infrastructure/commanders/utils/model-parameter.parser.ts
@@ -0,0 +1,133 @@
+// Make this class injectable
+export class ModelParameterParser {
+  private modelSettingParamTypes: { [key: string]: string } = {
+    ctx_len: 'number',
+    ngl: 'number',
+    embedding: 'boolean',
+    n_parallel: 'number',
+    cpu_threads: 'number',
+    prompt_template: 'string',
+    system_prompt: 'string',
+    ai_prompt: 'string',
+    user_prompt: 'string',
+    llama_model_path: 'string',
+    mmproj: 'string',
+    cont_batching: 'boolean',
+    vision_model: 'boolean',
+    text_model: 'boolean',
+  };
+
+  private modelRuntimeParamTypes: { [key: string]: string } = {
+    temperature: 'number',
+    token_limit: 'number',
+    top_k: 'number',
+    top_p: 'number',
+    stream: 'boolean',
+    max_tokens: 'number',
+    stop: 'string[]',
+    frequency_penalty: 'number',
+    presence_penalty: 'number',
+  };
+
+  isModelSettingParam(key: string): boolean {
+    return key in this.modelSettingParamTypes;
+  }
+
+  isModelRuntimeParam(key: string): boolean {
+    return key in this.modelRuntimeParamTypes;
+  }
+
+  parse(key: string, value: string): boolean | number | string | string[] {
+    if (this.isModelSettingParam(key)) {
+      return this.parseModelSettingParams(key, value);
+    }
+
+    if (this.isModelRuntimeParam(key)) {
+      return this.parseModelRuntimeParams(key, value);
+    }
+
+    throw new Error(`Invalid setting key: ${key}`);
+  }
+
+  private parseModelSettingParams(
+    key: string,
+    value: string,
+  ): boolean | number | string | string[] {
+    const settingType = this.modelSettingParamTypes[key];
+    if (!settingType) {
+      throw new Error(`Invalid setting key: ${key}`);
+    }
+
+    switch (settingType) {
+      case 'string':
+        return value;
+
+      case 'number':
+        return this.toNumber(value);
+
+      case 'string[]':
+        return this.toStringArray(value);
+
+      case 'boolean':
+        return this.toBoolean(value);
+
+      default:
+        throw new Error('Invalid setting type');
+    }
+  }
+
+  private parseModelRuntimeParams(
+    key: string,
+    value: string,
+  ): boolean | number | string | string[] {
+    const settingType = this.modelRuntimeParamTypes[key];
+    if (!settingType) {
+      throw new Error(`Invalid setting key: ${key}`);
+    }
+
+    switch (settingType) {
+      case 'string':
+        return value;
+
+      case 'number':
+        return this.toNumber(value);
+
+      case 'string[]':
+        return this.toStringArray(value);
+
+      case 'boolean':
+        return this.toBoolean(value);
+
+      default:
+        throw new Error('Invalid setting type');
+    }
+  }
+
+  private toNumber(str: string): number {
+    const num = parseFloat(str.trim());
+    if (isNaN(num)) {
+      throw new Error(`Invalid number value: ${str}`);
+    }
+    return num;
+  }
+
+  private toStringArray(str: string, delimiter: string = ','): string[] {
+    return str.split(delimiter).map((s) => s.trim());
+  }
+
+  private toBoolean(str: string): boolean {
+    const normalizedStr = str.trim().toLowerCase();
+    switch (normalizedStr) {
+      case '1':
+      case 'true':
+        return true;
+
+      case '0':
+      case 'false':
+        return false;
+
+      default:
+        throw new Error(`Invalid boolean value: ${str}`);
+    }
+  }
+}
diff --git a/cortex-js/src/usecases/cortex/cortex.usecases.ts b/cortex-js/src/usecases/cortex/cortex.usecases.ts
index 1decf6b97..035aa1486 100644
--- a/cortex-js/src/usecases/cortex/cortex.usecases.ts
+++ b/cortex-js/src/usecases/cortex/cortex.usecases.ts
@@ -15,6 +15,7 @@ export class CortexUsecases {
   async startCortex(
     host: string = defaultCortexCppHost,
     port: number = defaultCortexCppPort,
+    verbose: boolean = false,
   ): Promise<CortexOperationSuccessfullyDto> {
     if (this.cortexProcess) {
       return {
@@ -38,7 +39,7 @@ export class CortexUsecases {
     this.cortexProcess = spawn(cortexCppPath, args, {
       detached: false,
       cwd: join(__dirname, '../../../cortex-cpp'),
-      stdio: 'inherit',
+      stdio: verbose ? 'inherit' : undefined,
       env: {
         ...process.env,
         CUDA_VISIBLE_DEVICES: '0',
diff --git a/cortex-js/src/usecases/models/models.usecases.ts b/cortex-js/src/usecases/models/models.usecases.ts
index d7843d7d8..3f960497a 100644
--- a/cortex-js/src/usecases/models/models.usecases.ts
+++ b/cortex-js/src/usecases/models/models.usecases.ts
@@ -3,7 +3,12 @@ import { UpdateModelDto } from '@/infrastructure/dtos/models/update-model.dto';
 import { ModelEntity } from '@/infrastructure/entities/model.entity';
 import { BadRequestException, Inject, Injectable } from '@nestjs/common';
 import { Repository } from 'typeorm';
-import { Model, ModelFormat } from '@/domain/models/model.interface';
+import {
+  Model,
+  ModelFormat,
+  ModelRuntimeParams,
+  ModelSettingParams,
+} from '@/domain/models/model.interface';
 import { ModelNotFoundException } from '@/infrastructure/exception/model-not-found.exception';
 import { join, basename, resolve } from 'path';
 import {
@@ -62,6 +67,38 @@ export class ModelsUsecases {
     return this.modelRepository.update(id, updateModelDto);
   }
 
+  async updateModelSettingParams(
+    id: string,
+    settingParams: ModelSettingParams,
+  ): Promise<ModelSettingParams> {
+    const model = await this.getModelOrThrow(id);
+    const currentSettingParams = model.settings;
+    const updateDto: UpdateModelDto = {
+      settings: {
+        ...currentSettingParams,
+        ...settingParams,
+      },
+    };
+    await this.update(id, updateDto);
+    return updateDto.settings ?? {};
+  }
+
+  async updateModelRuntimeParams(
+    id: string,
+    runtimeParams: ModelRuntimeParams,
+  ): Promise<ModelRuntimeParams> {
+    const model = await this.getModelOrThrow(id);
+    const currentRuntimeParams = model.parameters;
+    const updateDto: UpdateModelDto = {
+      parameters: {
+        ...currentRuntimeParams,
+        ...runtimeParams,
+      },
+    };
+    await this.update(id, updateDto);
+    return updateDto.parameters ?? {};
+  }
+
   async remove(id: string) {
     const modelsContainerDir = this.modelDir();