Skip to content

Commit

Permalink
feat: model sources (#1777)
Browse files Browse the repository at this point in the history
* feat: prioritize GPUs

* fix: migrate db

* fix: add priority

* fix: db

* fix: more

* feat: model sources

* feat: support delete API

* feat: cli: support models sources add

* feat: cli: model source delete

* feat: cli: add model source list

* feat: sync cortex.db

* chore: cleanup

* feat: add metadata for model

* fix: migration

* chore: unit tests: cleanup

* fix: add metadata

* fix: pull model

* chore: unit tests: update

* chore: add e2e tests for models sources

* chore: add API docs

* chore: rename

---------

Co-authored-by: vansangpfiev <[email protected]>
  • Loading branch information
vansangpfiev and sangjanai authored Dec 12, 2024
1 parent 8dde05c commit f473b0b
Show file tree
Hide file tree
Showing 23 changed files with 1,269 additions and 271 deletions.
99 changes: 99 additions & 0 deletions docs/static/openapi/cortex.json
Original file line number Diff line number Diff line change
Expand Up @@ -807,6 +807,105 @@
"tags": ["Pulling Models"]
}
},
"/v1/models/sources": {
"post": {
"summary": "Add a model source",
"description": "User can add a Huggingface Organization or Repository",
"requestBody": {
"required": false,
"content": {
"application/json": {
"schema": {
"type": "object",
"properties": {
"source": {
"type": "string",
"description": "The url of model source to add",
"example": "https://huggingface.co/cortexso/tinyllama"
}
}
}
}
}
},
"responses": {
"200": {
"description": "Successful installation",
"content": {
"application/json": {
"schema": {
"type": "object",
"properties": {
"message": {
"type": "string",
"example": "Added model source"
}
}
}
}
}
}
},
"tags": ["Pulling Models"]
},
"delete": {
"summary": "Remove a model source",
"description": "User can remove a Huggingface Organization or Repository",
"requestBody": {
"required": false,
"content": {
"application/json": {
"schema": {
"type": "object",
"properties": {
"source": {
"type": "string",
"description": "The url of model source to remove",
"example": "https://huggingface.co/cortexso/tinyllama"
}
}
}
}
}
},
"responses": {
"200": {
"description": "Successful uninstallation",
"content": {
"application/json": {
"schema": {
"type": "object",
"properties": {
"message": {
"type": "string",
"description": "Removed model source successfully!",
"example": "Removed model source successfully!"
}
}
}
}
}
},
"400": {
"description": "Bad request",
"content": {
"application/json": {
"schema": {
"type": "object",
"properties": {
"error": {
"type": "string",
"description": "Error message describing the issue with the request"
}
}
}
}
}
}
},
"tags": ["Pulling Models"]
}
},
"/v1/threads": {
"post": {
"operationId": "ThreadsController_create",
Expand Down
76 changes: 75 additions & 1 deletion engine/cli/command_line_parser.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@
#include "commands/model_import_cmd.h"
#include "commands/model_list_cmd.h"
#include "commands/model_pull_cmd.h"
#include "commands/model_source_add_cmd.h"
#include "commands/model_source_del_cmd.h"
#include "commands/model_source_list_cmd.h"
#include "commands/model_start_cmd.h"
#include "commands/model_stop_cmd.h"
#include "commands/model_upd_cmd.h"
Expand Down Expand Up @@ -253,6 +256,8 @@ void CommandLineParser::SetupModelCommands() {
"Display cpu mode");
list_models_cmd->add_flag("--gpu_mode", cml_data_.display_gpu_mode,
"Display gpu mode");
list_models_cmd->add_flag("--available", cml_data_.display_available_model,
"Display available models to download");
list_models_cmd->group(kSubcommands);
list_models_cmd->callback([this]() {
if (std::exchange(executed_, true))
Expand All @@ -261,7 +266,8 @@ void CommandLineParser::SetupModelCommands() {
cml_data_.config.apiServerHost,
std::stoi(cml_data_.config.apiServerPort), cml_data_.filter,
cml_data_.display_engine, cml_data_.display_version,
cml_data_.display_cpu_mode, cml_data_.display_gpu_mode);
cml_data_.display_cpu_mode, cml_data_.display_gpu_mode,
cml_data_.display_available_model);
});

auto get_models_cmd =
Expand Down Expand Up @@ -329,6 +335,74 @@ void CommandLineParser::SetupModelCommands() {
std::stoi(cml_data_.config.apiServerPort),
cml_data_.model_id, cml_data_.model_path);
});

auto model_source_cmd = models_cmd->add_subcommand(
"sources", "Subcommands for managing model sources");
model_source_cmd->usage("Usage:\n" + commands::GetCortexBinary() +
" models sources [options] [subcommand]");
model_source_cmd->group(kSubcommands);

model_source_cmd->callback([this, model_source_cmd] {
if (std::exchange(executed_, true))
return;
if (model_source_cmd->get_subcommands().empty()) {
CLI_LOG(model_source_cmd->help());
}
});

auto model_src_add_cmd =
model_source_cmd->add_subcommand("add", "Add a model source");
model_src_add_cmd->usage("Usage:\n" + commands::GetCortexBinary() +
" models sources add [model_source]");
model_src_add_cmd->group(kSubcommands);
model_src_add_cmd->add_option("source", cml_data_.model_src, "");
model_src_add_cmd->callback([&]() {
if (std::exchange(executed_, true))
return;
if (cml_data_.model_src.empty()) {
CLI_LOG("[model_source] is required\n");
CLI_LOG(model_src_add_cmd->help());
return;
};

commands::ModelSourceAddCmd().Exec(
cml_data_.config.apiServerHost,
std::stoi(cml_data_.config.apiServerPort), cml_data_.model_src);
});

auto model_src_del_cmd =
model_source_cmd->add_subcommand("remove", "Remove a model source");
model_src_del_cmd->usage("Usage:\n" + commands::GetCortexBinary() +
" models sources remove [model_source]");
model_src_del_cmd->group(kSubcommands);
model_src_del_cmd->add_option("source", cml_data_.model_src, "");
model_src_del_cmd->callback([&]() {
if (std::exchange(executed_, true))
return;
if (cml_data_.model_src.empty()) {
CLI_LOG("[model_source] is required\n");
CLI_LOG(model_src_del_cmd->help());
return;
};

commands::ModelSourceDelCmd().Exec(
cml_data_.config.apiServerHost,
std::stoi(cml_data_.config.apiServerPort), cml_data_.model_src);
});

auto model_src_list_cmd =
model_source_cmd->add_subcommand("list", "List all model sources");
model_src_list_cmd->usage("Usage:\n" + commands::GetCortexBinary() +
" models sources list");
model_src_list_cmd->group(kSubcommands);
model_src_list_cmd->callback([&]() {
if (std::exchange(executed_, true))
return;

commands::ModelSourceListCmd().Exec(
cml_data_.config.apiServerHost,
std::stoi(cml_data_.config.apiServerPort));
});
}

void CommandLineParser::SetupConfigsCommands() {
Expand Down
2 changes: 2 additions & 0 deletions engine/cli/command_line_parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ class CommandLineParser {
bool display_version = false;
bool display_cpu_mode = false;
bool display_gpu_mode = false;
bool display_available_model = false;
std::string filter = "";
std::string log_level = "INFO";

Expand All @@ -74,6 +75,7 @@ class CommandLineParser {
int port;
config_yaml_utils::CortexConfig config;
std::unordered_map<std::string, std::string> model_update_options;
std::string model_src;
};
CmlData cml_data_;
std::unordered_map<std::string, std::string> config_update_opts_;
Expand Down
78 changes: 50 additions & 28 deletions engine/cli/commands/model_list_cmd.cc
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ using Row_t =
void ModelListCmd::Exec(const std::string& host, int port,
const std::string& filter, bool display_engine,
bool display_version, bool display_cpu_mode,
bool display_gpu_mode) {
bool display_gpu_mode, bool available) {
// Start server if server is not started yet
if (!commands::IsServerAlive(host, port)) {
CLI_LOG("Starting server ...");
Expand Down Expand Up @@ -73,40 +73,62 @@ void ModelListCmd::Exec(const std::string& host, int port,
continue;
}

count += 1;
if (available) {
if (v["status"].asString() != "downloadable") {
continue;
}

std::vector<std::string> row = {std::to_string(count),
v["model"].asString()};
if (display_engine) {
row.push_back(v["engine"].asString());
}
if (display_version) {
row.push_back(v["version"].asString());
}
count += 1;

if (auto& r = v["recommendation"]; !r.isNull()) {
if (display_cpu_mode) {
if (!r["cpu_mode"].isNull()) {
row.push_back("RAM: " + r["cpu_mode"]["ram"].asString() + " MiB");
}
std::vector<std::string> row = {std::to_string(count),
v["model"].asString()};
if (display_engine) {
row.push_back(v["engine"].asString());
}
if (display_version) {
row.push_back(v["version"].asString());
}
table.add_row({row.begin(), row.end()});
} else {
if (v["status"].asString() == "downloadable") {
continue;
}

count += 1;

std::vector<std::string> row = {std::to_string(count),
v["model"].asString()};
if (display_engine) {
row.push_back(v["engine"].asString());
}
if (display_version) {
row.push_back(v["version"].asString());
}

if (display_gpu_mode) {
if (!r["gpu_mode"].isNull()) {
std::string s;
s += "ngl: " + r["gpu_mode"][0]["ngl"].asString() + " - ";
s += "context: " + r["gpu_mode"][0]["context_length"].asString() +
" - ";
s += "RAM: " + r["gpu_mode"][0]["ram"].asString() + " MiB - ";
s += "VRAM: " + r["gpu_mode"][0]["vram"].asString() + " MiB - ";
s += "recommended ngl: " +
r["gpu_mode"][0]["recommend_ngl"].asString();
row.push_back(s);
if (auto& r = v["recommendation"]; !r.isNull()) {
if (display_cpu_mode) {
if (!r["cpu_mode"].isNull()) {
row.push_back("RAM: " + r["cpu_mode"]["ram"].asString() + " MiB");
}
}

if (display_gpu_mode) {
if (!r["gpu_mode"].isNull()) {
std::string s;
s += "ngl: " + r["gpu_mode"][0]["ngl"].asString() + " - ";
s += "context: " + r["gpu_mode"][0]["context_length"].asString() +
" - ";
s += "RAM: " + r["gpu_mode"][0]["ram"].asString() + " MiB - ";
s += "VRAM: " + r["gpu_mode"][0]["vram"].asString() + " MiB - ";
s += "recommended ngl: " +
r["gpu_mode"][0]["recommend_ngl"].asString();
row.push_back(s);
}
}
}
}

table.add_row({row.begin(), row.end()});
table.add_row({row.begin(), row.end()});
}
}
}

Expand Down
3 changes: 2 additions & 1 deletion engine/cli/commands/model_list_cmd.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ class ModelListCmd {
public:
void Exec(const std::string& host, int port, const std::string& filter,
bool display_engine = false, bool display_version = false,
bool display_cpu_mode = false, bool display_gpu_mode = false);
bool display_cpu_mode = false, bool display_gpu_mode = false,
bool available = false);
};
} // namespace commands
38 changes: 38 additions & 0 deletions engine/cli/commands/model_source_add_cmd.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
#include "model_source_add_cmd.h"
#include "server_start_cmd.h"
#include "utils/json_helper.h"
#include "utils/logging_utils.h"
namespace commands {
bool ModelSourceAddCmd::Exec(const std::string& host, int port, const std::string& model_source) {
// Start server if server is not started yet
if (!commands::IsServerAlive(host, port)) {
CLI_LOG("Starting server ...");
commands::ServerStartCmd ssc;
if (!ssc.Exec(host, port)) {
return false;
}
}

auto url = url_parser::Url{
.protocol = "http",
.host = host + ":" + std::to_string(port),
.pathParams = {"v1", "models", "sources"},
};

Json::Value json_data;
json_data["source"] = model_source;

auto data_str = json_data.toStyledString();
auto res = curl_utils::SimplePostJson(url.ToFullPath(), data_str);
if (res.has_error()) {
auto root = json_helper::ParseJsonString(res.error());
CLI_LOG(root["message"].asString());
return false;
}

CLI_LOG("Added model source: " << model_source);
return true;
}


}; // namespace commands
12 changes: 12 additions & 0 deletions engine/cli/commands/model_source_add_cmd.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#pragma once

#include <string>
#include <unordered_map>

namespace commands {

class ModelSourceAddCmd {
public:
bool Exec(const std::string& host, int port, const std::string& model_source);
};
} // namespace commands
Loading

0 comments on commit f473b0b

Please sign in to comment.