Skip to content

Commit

Permalink
Merge branch 'dev' into feat/remote-engine
Browse files Browse the repository at this point in the history
  • Loading branch information
vansangpfiev authored Dec 4, 2024
2 parents a7e4659 + 2b74824 commit 8e44992
Show file tree
Hide file tree
Showing 9 changed files with 75 additions and 30 deletions.
7 changes: 4 additions & 3 deletions docs/docs/cli/models/index.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -159,9 +159,10 @@ This command uses a `model_id` from the model that you have downloaded or availa

| Option | Description | Required | Default value | Example |
|---------------------------|---------------------------------------------------------------------------|----------|----------------------------------------------|------------------------|
| `model_id` | The identifier of the model you want to start. | Yes | `Prompt to select from the available models` | `mistral` |
| `--gpus` | List of GPUs to use. | No | - | `[0,1]` |
| `-h`, `--help` | Display help information for the command. | No | - | `-h` |
| `model_id` | The identifier of the model you want to start. | Yes | `Prompt to select from the available models` | `mistral` |
| `--gpus` | List of GPUs to use. | No | - | `[0,1]` |
| `--ctx_len` | Maximum context length for inference. | No | `min(8192, max_model_context_length)` | `1024` |
| `-h`, `--help` | Display help information for the command. | No | - | `-h` |

## `cortex models stop`
:::info
Expand Down
1 change: 1 addition & 0 deletions docs/docs/cli/models/start.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ cortex models start [model_id]:[engine] [options]
|---------------------------|----------------------------------------------------------|----------|----------------------------------------------|-------------------|
| `model_id` | The identifier of the model you want to start. | No | `Prompt to select from the available models` | `mistral` |
| `--gpus` | List of GPUs to use. | No | - | `[0,1]` |
| `--ctx_len` | Maximum context length for inference. | No | `min(8192, max_model_context_length)` | `1024` |
| `-h`, `--help` | Display help information for the command. | No | - | `-h` |


Expand Down
5 changes: 3 additions & 2 deletions docs/docs/cli/run.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,8 @@ You can use the `--verbose` flag to display more detailed output of the internal

| Option | Description | Required | Default value | Example |
|-----------------------------|-----------------------------------------------------------------------------|----------|----------------------------------------------|------------------------|
| `model_id` | The identifier of the model you want to chat with. | Yes | - | `mistral` |
| `--gpus` | List of GPUs to use. | No | - | `[0,1]` |
| `model_id` | The identifier of the model you want to chat with. | Yes | - | `mistral` |
| `--gpus` | List of GPUs to use. | No | - | `[0,1]` |
| `--ctx_len` | Maximum context length for inference. | No | `min(8192, max_model_context_length)` | `1024` |
| `-h`, `--help` | Display help information for the command. | No | - | `-h` |
<!-- | `-t`, `--thread <thread_id>` | Specify the Thread ID. Defaults to creating a new thread if none specified. | No | - | `-t jan_1717650808` | | `-c` | -->
39 changes: 26 additions & 13 deletions docs/static/openapi/cortex.json
Original file line number Diff line number Diff line change
Expand Up @@ -2316,8 +2316,18 @@
"default": [],
"type": "array",
"items": {
"type": "object"
}
"type": "array",
"properties": {
"type": {
"type": "string",
"enum": ["function"]
},
"function": {
"$ref": "#/components/schemas/Function"
}
},
"required": ["type", "function"]
},
},
"metadata": {
"type": "object",
Expand Down Expand Up @@ -2378,7 +2388,7 @@
"nullable": true
},
"tools": {
"type": "object"
"type": "array"
},
"metadata": {
"type": "object",
Expand Down Expand Up @@ -2961,17 +2971,20 @@
}
},
"tools": {
"type": "object",
"properties": {
"type": {
"type": "string",
"enum": ["function"]
"type": "array",
"items": {
"type": "object",
"properties": {
"type": {
"type": "string",
"enum": ["function"]
},
"function": {
"$ref": "#/components/schemas/Function"
}
},
"function": {
"$ref": "#/components/schemas/Function"
}
},
"required": ["type", "function"]
"required": ["type", "function"]
}
},
"tool_choice": {
"anyOf": [
Expand Down
18 changes: 11 additions & 7 deletions engine/cli/command_line_parser.cc
Original file line number Diff line number Diff line change
Expand Up @@ -163,16 +163,18 @@ void CommandLineParser::SetupCommonCommands() {
run_cmd->usage("Usage:\n" + commands::GetCortexBinary() +
" run [options] [model_id]");
run_cmd->add_option("model_id", cml_data_.model_id, "");
run_cmd->add_option("--gpus", hw_activate_opts_["gpus"],
run_cmd->add_option("--gpus", run_settings_["gpus"],
"List of GPU to activate, for example [0, 1]");
run_cmd->add_option("--ctx_len", run_settings_["ctx_len"],
"Maximum context length for inference");
run_cmd->add_flag("-d,--detach", cml_data_.run_detach, "Detached mode");
run_cmd->callback([this, run_cmd] {
if (std::exchange(executed_, true))
return;
commands::RunCmd rc(cml_data_.config.apiServerHost,
std::stoi(cml_data_.config.apiServerPort),
cml_data_.model_id, download_service_);
rc.Exec(cml_data_.run_detach, hw_activate_opts_);
rc.Exec(cml_data_.run_detach, run_settings_);
});
}

Expand Down Expand Up @@ -203,8 +205,10 @@ void CommandLineParser::SetupModelCommands() {
model_start_cmd->usage("Usage:\n" + commands::GetCortexBinary() +
" models start [model_id]");
model_start_cmd->add_option("model_id", cml_data_.model_id, "");
model_start_cmd->add_option("--gpus", hw_activate_opts_["gpus"],
model_start_cmd->add_option("--gpus", run_settings_["gpus"],
"List of GPU to activate, for example [0, 1]");
model_start_cmd->add_option("--ctx_len", run_settings_["ctx_len"],
"Maximum context length for inference");
model_start_cmd->group(kSubcommands);
model_start_cmd->callback([this, model_start_cmd]() {
if (std::exchange(executed_, true))
Expand All @@ -216,7 +220,7 @@ void CommandLineParser::SetupModelCommands() {
};
commands::ModelStartCmd().Exec(cml_data_.config.apiServerHost,
std::stoi(cml_data_.config.apiServerPort),
cml_data_.model_id, hw_activate_opts_);
cml_data_.model_id, run_settings_);
});

auto stop_model_cmd =
Expand Down Expand Up @@ -562,7 +566,7 @@ void CommandLineParser::SetupHardwareCommands() {
hw_activate_cmd->usage("Usage:\n" + commands::GetCortexBinary() +
" hardware activate --gpus [list_gpu]");
hw_activate_cmd->group(kSubcommands);
hw_activate_cmd->add_option("--gpus", hw_activate_opts_["gpus"],
hw_activate_cmd->add_option("--gpus", run_settings_["gpus"],
"List of GPU to activate, for example [0, 1]");
hw_activate_cmd->callback([this, hw_activate_cmd]() {
if (std::exchange(executed_, true))
Expand All @@ -572,14 +576,14 @@ void CommandLineParser::SetupHardwareCommands() {
return;
}

if (hw_activate_opts_["gpus"].empty()) {
if (run_settings_["gpus"].empty()) {
CLI_LOG("[list_gpu] is required\n");
CLI_LOG(hw_activate_cmd->help());
return;
}
commands::HardwareActivateCmd().Exec(
cml_data_.config.apiServerHost,
std::stoi(cml_data_.config.apiServerPort), hw_activate_opts_);
std::stoi(cml_data_.config.apiServerPort), run_settings_);
});
}

Expand Down
2 changes: 1 addition & 1 deletion engine/cli/command_line_parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -79,5 +79,5 @@ class CommandLineParser {
std::unordered_map<std::string, std::string> config_update_opts_;
bool executed_ = false;
commands::HarwareOptions hw_opts_;
std::unordered_map<std::string, std::string> hw_activate_opts_;
std::unordered_map<std::string, std::string> run_settings_;
};
19 changes: 17 additions & 2 deletions engine/cli/commands/model_start_cmd.cc
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@ bool ModelStartCmd::Exec(

//
bool should_activate_hw = false;
for (auto const& [_, v] : options) {
if (!v.empty()) {
for (auto const& [k, v] : options) {
if (k == "gpus" && !v.empty()) {
should_activate_hw = true;
break;
}
Expand All @@ -57,6 +57,9 @@ bool ModelStartCmd::Exec(

Json::Value json_data;
json_data["model"] = model_id.value();
for (auto const& [k, v] : options) {
UpdateConfig(json_data, k, v);
}
auto data_str = json_data.toStyledString();
auto res = curl_utils::SimplePostJson(url.ToFullPath(), data_str);
if (res.has_error()) {
Expand All @@ -75,4 +78,16 @@ bool ModelStartCmd::Exec(
}
return true;
}

bool ModelStartCmd::UpdateConfig(Json::Value& data, const std::string& key,
const std::string& value) {
if (key == "ctx_len" && !value.empty()) {
try {
data["ctx_len"] = std::stoi(value);
} catch (const std::exception& e) {
CLI_LOG("Failed to parse numeric value for " << key << ": " << e.what());
}
}
return true;
}
}; // namespace commands
4 changes: 4 additions & 0 deletions engine/cli/commands/model_start_cmd.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

#include <string>
#include <unordered_map>
#include "json/json.h"

namespace commands {

Expand All @@ -10,5 +11,8 @@ class ModelStartCmd {
bool Exec(const std::string& host, int port, const std::string& model_handle,
const std::unordered_map<std::string, std::string>& options,
bool print_success_log = true);
private:
bool UpdateConfig(Json::Value& data, const std::string& key,
const std::string& value);
};
} // namespace commands
10 changes: 8 additions & 2 deletions engine/services/model_service.cc
Original file line number Diff line number Diff line change
Expand Up @@ -704,6 +704,8 @@ cpp::result<StartModelResult, std::string> ModelService::StartModel(
config::YamlHandler yaml_handler;

try {
constexpr const int kDefautlContextLength = 8192;
int max_model_context_length = kDefautlContextLength;
Json::Value json_data;
// Currently we don't support download vision models, so we need to bypass check
if (!params_override.bypass_model_check()) {
Expand Down Expand Up @@ -777,6 +779,8 @@ cpp::result<StartModelResult, std::string> ModelService::StartModel(
json_data["system_prompt"] = mc.system_template;
json_data["user_prompt"] = mc.user_template;
json_data["ai_prompt"] = mc.ai_template;
json_data["ctx_len"] = std::min(kDefautlContextLength, mc.ctx_len);
max_model_context_length = mc.ctx_len;
} else {
bypass_stop_check_set_.insert(model_handle);
}
Expand All @@ -798,12 +802,14 @@ cpp::result<StartModelResult, std::string> ModelService::StartModel(
ASSIGN_IF_PRESENT(json_data, params_override, cache_enabled);
ASSIGN_IF_PRESENT(json_data, params_override, ngl);
ASSIGN_IF_PRESENT(json_data, params_override, n_parallel);
ASSIGN_IF_PRESENT(json_data, params_override, ctx_len);
ASSIGN_IF_PRESENT(json_data, params_override, cache_type);
ASSIGN_IF_PRESENT(json_data, params_override, mmproj);
ASSIGN_IF_PRESENT(json_data, params_override, model_path);
#undef ASSIGN_IF_PRESENT

if (params_override.ctx_len) {
json_data["ctx_len"] =
std::min(params_override.ctx_len.value(), max_model_context_length);
}
CTL_INF(json_data.toStyledString());
auto may_fallback_res = MayFallbackToCpu(json_data["model_path"].asString(),
json_data["ngl"].asInt(),
Expand Down

0 comments on commit 8e44992

Please sign in to comment.