Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: model sources #1777

Merged
merged 30 commits into from
Dec 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
0526471
feat: prioritize GPUs
sangjanai Dec 4, 2024
df6d174
fix: migrate db
sangjanai Dec 4, 2024
80bd6b4
fix: add priority
sangjanai Dec 4, 2024
2cc0260
Merge branch 'dev' of https://github.com/janhq/cortex.cpp into feat/p…
sangjanai Dec 5, 2024
50b774e
fix: db
sangjanai Dec 5, 2024
fb581da
fix: more
sangjanai Dec 5, 2024
bd08898
Merge branch 'dev' of https://github.com/janhq/cortex.cpp into feat/p…
sangjanai Dec 5, 2024
a66173f
feat: model sources
vansangpfiev Dec 5, 2024
aebc317
feat: support delete API
sangjanai Dec 6, 2024
1eac21d
feat: cli: support models sources add
sangjanai Dec 6, 2024
9160a09
feat: cli: model source delete
sangjanai Dec 6, 2024
4c49f06
feat: cli: add model source list
sangjanai Dec 6, 2024
34d766f
feat: sync cortex.db
sangjanai Dec 9, 2024
875e488
chore: cleanup
sangjanai Dec 9, 2024
afafe33
feat: add metadata for model
sangjanai Dec 9, 2024
a2c2c40
Merge branch 'dev' of https://github.com/janhq/cortex.cpp into feat/m…
sangjanai Dec 9, 2024
349bd5f
Merge branch 'dev' of https://github.com/janhq/cortex.cpp into feat/p…
sangjanai Dec 9, 2024
f9f6002
fix: migration
sangjanai Dec 9, 2024
1fcb689
chore: unit tests: cleanup
sangjanai Dec 9, 2024
14654a5
Merge branch 'feat/prioritize-gpus' of https://github.com/janhq/corte…
sangjanai Dec 9, 2024
3707a18
fix: add metadata
sangjanai Dec 9, 2024
5cda8f3
fix: pull model
sangjanai Dec 9, 2024
1181426
chore: unit tests: update
sangjanai Dec 9, 2024
b85f05c
Merge branch 'dev' of https://github.com/janhq/cortex.cpp into feat/m…
sangjanai Dec 10, 2024
67636c8
chore: add e2e tests for models sources
sangjanai Dec 10, 2024
fd29df3
Merge branch 'dev' of github.com:janhq/nitro into feat/model-sources
vansangpfiev Dec 10, 2024
e8037ab
Merge branch 'feat/model-sources' of github.com:janhq/nitro into feat…
vansangpfiev Dec 10, 2024
53664a9
chore: add API docs
vansangpfiev Dec 11, 2024
92f0597
chore: rename
vansangpfiev Dec 11, 2024
563cae5
Merge branch 'dev' of github.com:janhq/nitro into feat/model-sources
vansangpfiev Dec 12, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
99 changes: 99 additions & 0 deletions docs/static/openapi/cortex.json
Original file line number Diff line number Diff line change
Expand Up @@ -807,6 +807,105 @@
"tags": ["Pulling Models"]
}
},
"/v1/models/sources": {
"post": {
"summary": "Add a model source",
"description": "User can add a Huggingface Organization or Repository",
"requestBody": {
"required": false,
"content": {
"application/json": {
"schema": {
"type": "object",
"properties": {
"source": {
"type": "string",
"description": "The url of model source to add",
"example": "https://huggingface.co/cortexso/tinyllama"
}
}
}
}
}
},
"responses": {
"200": {
"description": "Successful installation",
"content": {
"application/json": {
"schema": {
"type": "object",
"properties": {
"message": {
"type": "string",
"example": "Added model source"
}
}
}
}
}
}
},
"tags": ["Pulling Models"]
},
"delete": {
"summary": "Remove a model source",
"description": "User can remove a Huggingface Organization or Repository",
"requestBody": {
"required": false,
"content": {
"application/json": {
"schema": {
"type": "object",
"properties": {
"source": {
"type": "string",
"description": "The url of model source to remove",
"example": "https://huggingface.co/cortexso/tinyllama"
}
}
}
}
}
},
"responses": {
"200": {
"description": "Successful uninstallation",
"content": {
"application/json": {
"schema": {
"type": "object",
"properties": {
"message": {
"type": "string",
"description": "Removed model source successfully!",
"example": "Removed model source successfully!"
}
}
}
}
}
},
"400": {
"description": "Bad request",
"content": {
"application/json": {
"schema": {
"type": "object",
"properties": {
"error": {
"type": "string",
"description": "Error message describing the issue with the request"
}
}
}
}
}
}
},
"tags": ["Pulling Models"]
}
},
"/v1/threads": {
"post": {
"operationId": "ThreadsController_create",
Expand Down
76 changes: 75 additions & 1 deletion engine/cli/command_line_parser.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@
#include "commands/model_import_cmd.h"
#include "commands/model_list_cmd.h"
#include "commands/model_pull_cmd.h"
#include "commands/model_source_add_cmd.h"
#include "commands/model_source_del_cmd.h"
#include "commands/model_source_list_cmd.h"
#include "commands/model_start_cmd.h"
#include "commands/model_stop_cmd.h"
#include "commands/model_upd_cmd.h"
Expand Down Expand Up @@ -253,6 +256,8 @@ void CommandLineParser::SetupModelCommands() {
"Display cpu mode");
list_models_cmd->add_flag("--gpu_mode", cml_data_.display_gpu_mode,
"Display gpu mode");
list_models_cmd->add_flag("--available", cml_data_.display_available_model,
"Display available models to download");
list_models_cmd->group(kSubcommands);
list_models_cmd->callback([this]() {
if (std::exchange(executed_, true))
Expand All @@ -261,7 +266,8 @@ void CommandLineParser::SetupModelCommands() {
cml_data_.config.apiServerHost,
std::stoi(cml_data_.config.apiServerPort), cml_data_.filter,
cml_data_.display_engine, cml_data_.display_version,
cml_data_.display_cpu_mode, cml_data_.display_gpu_mode);
cml_data_.display_cpu_mode, cml_data_.display_gpu_mode,
cml_data_.display_available_model);
});

auto get_models_cmd =
Expand Down Expand Up @@ -329,6 +335,74 @@ void CommandLineParser::SetupModelCommands() {
std::stoi(cml_data_.config.apiServerPort),
cml_data_.model_id, cml_data_.model_path);
});

auto model_source_cmd = models_cmd->add_subcommand(
"sources", "Subcommands for managing model sources");
model_source_cmd->usage("Usage:\n" + commands::GetCortexBinary() +
" models sources [options] [subcommand]");
model_source_cmd->group(kSubcommands);

model_source_cmd->callback([this, model_source_cmd] {
if (std::exchange(executed_, true))
return;
if (model_source_cmd->get_subcommands().empty()) {
CLI_LOG(model_source_cmd->help());
}
});

auto model_src_add_cmd =
model_source_cmd->add_subcommand("add", "Add a model source");
model_src_add_cmd->usage("Usage:\n" + commands::GetCortexBinary() +
" models sources add [model_source]");
model_src_add_cmd->group(kSubcommands);
model_src_add_cmd->add_option("source", cml_data_.model_src, "");
model_src_add_cmd->callback([&]() {
if (std::exchange(executed_, true))
return;
if (cml_data_.model_src.empty()) {
CLI_LOG("[model_source] is required\n");
CLI_LOG(model_src_add_cmd->help());
return;
};

commands::ModelSourceAddCmd().Exec(
cml_data_.config.apiServerHost,
std::stoi(cml_data_.config.apiServerPort), cml_data_.model_src);
});

auto model_src_del_cmd =
model_source_cmd->add_subcommand("remove", "Remove a model source");
model_src_del_cmd->usage("Usage:\n" + commands::GetCortexBinary() +
" models sources remove [model_source]");
model_src_del_cmd->group(kSubcommands);
model_src_del_cmd->add_option("source", cml_data_.model_src, "");
model_src_del_cmd->callback([&]() {
if (std::exchange(executed_, true))
return;
if (cml_data_.model_src.empty()) {
CLI_LOG("[model_source] is required\n");
CLI_LOG(model_src_del_cmd->help());
return;
};

commands::ModelSourceDelCmd().Exec(
cml_data_.config.apiServerHost,
std::stoi(cml_data_.config.apiServerPort), cml_data_.model_src);
});

auto model_src_list_cmd =
model_source_cmd->add_subcommand("list", "List all model sources");
model_src_list_cmd->usage("Usage:\n" + commands::GetCortexBinary() +
" models sources list");
model_src_list_cmd->group(kSubcommands);
model_src_list_cmd->callback([&]() {
if (std::exchange(executed_, true))
return;

commands::ModelSourceListCmd().Exec(
cml_data_.config.apiServerHost,
std::stoi(cml_data_.config.apiServerPort));
});
}

void CommandLineParser::SetupConfigsCommands() {
Expand Down
2 changes: 2 additions & 0 deletions engine/cli/command_line_parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ class CommandLineParser {
bool display_version = false;
bool display_cpu_mode = false;
bool display_gpu_mode = false;
bool display_available_model = false;
std::string filter = "";
std::string log_level = "INFO";

Expand All @@ -74,6 +75,7 @@ class CommandLineParser {
int port;
config_yaml_utils::CortexConfig config;
std::unordered_map<std::string, std::string> model_update_options;
std::string model_src;
};
CmlData cml_data_;
std::unordered_map<std::string, std::string> config_update_opts_;
Expand Down
78 changes: 50 additions & 28 deletions engine/cli/commands/model_list_cmd.cc
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ using Row_t =
void ModelListCmd::Exec(const std::string& host, int port,
const std::string& filter, bool display_engine,
bool display_version, bool display_cpu_mode,
bool display_gpu_mode) {
bool display_gpu_mode, bool available) {
// Start server if server is not started yet
if (!commands::IsServerAlive(host, port)) {
CLI_LOG("Starting server ...");
Expand Down Expand Up @@ -73,40 +73,62 @@ void ModelListCmd::Exec(const std::string& host, int port,
continue;
}

count += 1;
if (available) {
if (v["status"].asString() != "downloadable") {
continue;
}

std::vector<std::string> row = {std::to_string(count),
v["model"].asString()};
if (display_engine) {
row.push_back(v["engine"].asString());
}
if (display_version) {
row.push_back(v["version"].asString());
}
count += 1;

if (auto& r = v["recommendation"]; !r.isNull()) {
if (display_cpu_mode) {
if (!r["cpu_mode"].isNull()) {
row.push_back("RAM: " + r["cpu_mode"]["ram"].asString() + " MiB");
}
std::vector<std::string> row = {std::to_string(count),
v["model"].asString()};
if (display_engine) {
row.push_back(v["engine"].asString());
}
if (display_version) {
row.push_back(v["version"].asString());
}
table.add_row({row.begin(), row.end()});
} else {
if (v["status"].asString() == "downloadable") {
continue;
}

count += 1;

std::vector<std::string> row = {std::to_string(count),
v["model"].asString()};
if (display_engine) {
row.push_back(v["engine"].asString());
}
if (display_version) {
row.push_back(v["version"].asString());
}

if (display_gpu_mode) {
if (!r["gpu_mode"].isNull()) {
std::string s;
s += "ngl: " + r["gpu_mode"][0]["ngl"].asString() + " - ";
s += "context: " + r["gpu_mode"][0]["context_length"].asString() +
" - ";
s += "RAM: " + r["gpu_mode"][0]["ram"].asString() + " MiB - ";
s += "VRAM: " + r["gpu_mode"][0]["vram"].asString() + " MiB - ";
s += "recommended ngl: " +
r["gpu_mode"][0]["recommend_ngl"].asString();
row.push_back(s);
if (auto& r = v["recommendation"]; !r.isNull()) {
if (display_cpu_mode) {
if (!r["cpu_mode"].isNull()) {
row.push_back("RAM: " + r["cpu_mode"]["ram"].asString() + " MiB");
}
}

if (display_gpu_mode) {
if (!r["gpu_mode"].isNull()) {
std::string s;
s += "ngl: " + r["gpu_mode"][0]["ngl"].asString() + " - ";
s += "context: " + r["gpu_mode"][0]["context_length"].asString() +
" - ";
s += "RAM: " + r["gpu_mode"][0]["ram"].asString() + " MiB - ";
s += "VRAM: " + r["gpu_mode"][0]["vram"].asString() + " MiB - ";
s += "recommended ngl: " +
r["gpu_mode"][0]["recommend_ngl"].asString();
row.push_back(s);
}
}
}
}

table.add_row({row.begin(), row.end()});
table.add_row({row.begin(), row.end()});
}
}
}

Expand Down
3 changes: 2 additions & 1 deletion engine/cli/commands/model_list_cmd.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ class ModelListCmd {
public:
void Exec(const std::string& host, int port, const std::string& filter,
bool display_engine = false, bool display_version = false,
bool display_cpu_mode = false, bool display_gpu_mode = false);
bool display_cpu_mode = false, bool display_gpu_mode = false,
bool available = false);
};
} // namespace commands
38 changes: 38 additions & 0 deletions engine/cli/commands/model_source_add_cmd.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
#include "model_source_add_cmd.h"
#include "server_start_cmd.h"
#include "utils/json_helper.h"
#include "utils/logging_utils.h"
namespace commands {
bool ModelSourceAddCmd::Exec(const std::string& host, int port, const std::string& model_source) {
// Start server if server is not started yet
if (!commands::IsServerAlive(host, port)) {
CLI_LOG("Starting server ...");
commands::ServerStartCmd ssc;
if (!ssc.Exec(host, port)) {
return false;
}
}

auto url = url_parser::Url{
.protocol = "http",
.host = host + ":" + std::to_string(port),
.pathParams = {"v1", "models", "sources"},
};

Json::Value json_data;
json_data["source"] = model_source;

auto data_str = json_data.toStyledString();
auto res = curl_utils::SimplePostJson(url.ToFullPath(), data_str);
if (res.has_error()) {
auto root = json_helper::ParseJsonString(res.error());
CLI_LOG(root["message"].asString());
return false;
}

CLI_LOG("Added model source: " << model_source);
return true;
}


}; // namespace commands
12 changes: 12 additions & 0 deletions engine/cli/commands/model_source_add_cmd.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#pragma once

#include <string>
#include <unordered_map>

namespace commands {

class ModelSourceAddCmd {
public:
bool Exec(const std::string& host, int port, const std::string& model_source);
};
} // namespace commands
Loading
Loading