diff --git a/engine/commands/engine_get_cmd.cc b/engine/commands/engine_get_cmd.cc new file mode 100644 index 000000000..17ffeaf66 --- /dev/null +++ b/engine/commands/engine_get_cmd.cc @@ -0,0 +1,28 @@ +#include "engine_get_cmd.h" +#include +#include +#include "services/engine_service.h" +#include "utils/logging_utils.h" + +namespace commands { + +void EngineGetCmd::Exec() const { + CTL_INF("[EngineGetCmd] engine: " << engine_); + + auto engine_service = EngineService(); + try { + auto status = engine_service.GetEngineInfo(engine_); + tabulate::Table table; + table.add_row({"name", "description", "version", "product name", "status"}); + table.format().font_color(tabulate::Color::green); + table.add_row({status.name, status.description, status.version, + status.product_name, status.status}); + std::cout << table << std::endl; + } catch (const std::runtime_error& e) { + std::cerr << "Engine " << engine_ << " is not supported!" << "\n"; + } catch (const std::exception& e) { + std::cerr << "Failed to get engine info for " << engine_ << ": " << e.what() + << "\n"; + } +} +}; // namespace commands diff --git a/engine/commands/engine_get_cmd.h b/engine/commands/engine_get_cmd.h new file mode 100644 index 000000000..505ee5120 --- /dev/null +++ b/engine/commands/engine_get_cmd.h @@ -0,0 +1,15 @@ +#pragma once + +#include + +namespace commands { +class EngineGetCmd { + public: + EngineGetCmd(const std::string& engine) : engine_{engine} {}; + + void Exec() const; + + private: + std::string engine_; +}; +} // namespace commands diff --git a/engine/commands/engine_init_cmd.cc b/engine/commands/engine_init_cmd.cc index e0b05804f..b3ab8bae5 100644 --- a/engine/commands/engine_init_cmd.cc +++ b/engine/commands/engine_init_cmd.cc @@ -29,7 +29,7 @@ bool EngineInitCmd::Exec() const { if (system_info.arch == system_info_utils::kUnsupported || system_info.os == system_info_utils::kUnsupported) { CTL_ERR("Unsupported OS or architecture: " << system_info.os << ", " - << system_info.arch); + << system_info.arch); return false; } CTL_INF("OS: " << system_info.os << ", Arch: " << system_info.arch); @@ -192,9 +192,10 @@ bool EngineInitCmd::Exec() const { // cuda driver version should be greater than toolkit version to ensure compatibility if (semantic_version_utils::CompareSemanticVersion( cuda_driver_version, suitable_toolkit_version) < 0) { - CTL_ERR("Your Cuda driver version " << cuda_driver_version - << " is not compatible with cuda toolkit version " - << suitable_toolkit_version); + CTL_ERR("Your Cuda driver version " + << cuda_driver_version + << " is not compatible with cuda toolkit version " + << suitable_toolkit_version); return false; } diff --git a/engine/commands/engine_list_cmd.cc b/engine/commands/engine_list_cmd.cc index d76d885d7..21220e7e4 100644 --- a/engine/commands/engine_list_cmd.cc +++ b/engine/commands/engine_list_cmd.cc @@ -1,65 +1,24 @@ -// clang-format off -#include "utils/cortex_utils.h" -// clang-format on #include "engine_list_cmd.h" -#include #include -#include -#include "trantor/utils/Logger.h" +#include "services/engine_service.h" namespace commands { bool EngineListCmd::Exec() { + auto engine_service = EngineService(); + auto status_list = engine_service.GetEngineInfoList(); + tabulate::Table table; + table.format().font_color(tabulate::Color::green); table.add_row( {"(Index)", "name", "description", "version", "product name", "status"}); - table.format().font_color(tabulate::Color::green); -#ifdef _WIN32 - if (std::filesystem::exists(std::filesystem::current_path().string() + - cortex_utils::kOnnxLibPath)) { - table.add_row({"1", "cortex.onnx", - "This extension enables chat completion API calls using the " - "Onnx engine", - "0.0.1", "Onnx Inference Engine", "ready"}); - } else { - table.add_row({"1", "cortex.onnx", - "This extension enables chat completion API calls using the " - "Onnx engine", - "0.0.1", "Onnx Inference Engine", "not_initialized"}); + for (int i = 0; i < status_list.size(); i++) { + auto status = status_list[i]; + std::string index = std::to_string(i + 1); + table.add_row({index, status.name, status.description, status.version, + status.product_name, status.status}); } -#else - table.add_row( - {"1", "cortex.onnx", - "This extension enables chat completion API calls using the Onnx engine", - "0.0.1", "Onnx Inference Engine", "not_supported"}); -#endif - // lllamacpp - if (std::filesystem::exists(std::filesystem::current_path().string() + - cortex_utils::kLlamaLibPath)) { - table.add_row({"2", "cortex.llamacpp", - "This extension enables chat completion API calls using the " - "LlamaCPP engine", - "0.0.1", "LlamaCPP Inference Engine", "ready"}); - } else { - table.add_row({"2", "cortex.llamacpp", - "This extension enables chat completion API calls using the " - "LlamaCPP engine", - "0.0.1", "LlamaCPP Inference Engine", "not_initialized"}); - } - // tensorrt llm - if (std::filesystem::exists(std::filesystem::current_path().string() + - cortex_utils::kTensorrtLlmPath)) { - table.add_row({"3", "cortex.tensorrt-llm", - "This extension enables chat completion API calls using the " - "TensorrtLLM engine", - "0.0.1", "TensorrtLLM Inference Engine", "ready"}); - } else { - table.add_row({"3", "cortex.tensorrt-llm", - "This extension enables chat completion API calls using the " - "TensorrtLLM engine", - "0.0.1", "TensorrtLLM Inference Engine", "not_initialized"}); - } for (int i = 0; i < 6; i++) { table[0][i] .format() @@ -77,5 +36,4 @@ bool EngineListCmd::Exec() { std::cout << table << std::endl; return true; } - }; // namespace commands diff --git a/engine/commands/engine_list_cmd.h b/engine/commands/engine_list_cmd.h index 2193572b7..2369f47ce 100644 --- a/engine/commands/engine_list_cmd.h +++ b/engine/commands/engine_list_cmd.h @@ -1,11 +1,9 @@ #pragma once -#include - namespace commands { class EngineListCmd { public: - bool Exec() ; + bool Exec(); }; -} // namespace commands \ No newline at end of file +} // namespace commands diff --git a/engine/commands/engine_uninstall_cmd.cc b/engine/commands/engine_uninstall_cmd.cc new file mode 100644 index 000000000..4ea41cac9 --- /dev/null +++ b/engine/commands/engine_uninstall_cmd.cc @@ -0,0 +1,21 @@ +#include "engine_uninstall_cmd.h" +#include "services/engine_service.h" +#include "utils/logging_utils.h" + +namespace commands { + +EngineUninstallCmd::EngineUninstallCmd(std::string engine) + : engine_{std::move(engine)} {} + +void EngineUninstallCmd::Exec() const { + CTL_INF("Uninstall engine " + engine_); + auto engine_service = EngineService(); + + try { + engine_service.UninstallEngine(engine_); + CLI_LOG("Engine " << engine_ << " uninstalled successfully!") + } catch (const std::exception& e) { + CLI_LOG("Failed to uninstall engine " << engine_ << ": " << e.what()); + } +} +}; // namespace commands diff --git a/engine/commands/engine_uninstall_cmd.h b/engine/commands/engine_uninstall_cmd.h new file mode 100644 index 000000000..94c1016e7 --- /dev/null +++ b/engine/commands/engine_uninstall_cmd.h @@ -0,0 +1,18 @@ +#pragma once +#include +#include + +namespace commands { +class EngineUninstallCmd { + public: + EngineUninstallCmd(std::string engine); + + void Exec() const; + + private: + std::string engine_; + + static constexpr std::array supportedEngines_ = { + "cortex.llamacpp", "cortex.onnx", "cortex.tensorrt-llm"}; +}; +} // namespace commands diff --git a/engine/commands/model_pull_cmd.cc b/engine/commands/model_pull_cmd.cc index b058bd305..f64ad0737 100644 --- a/engine/commands/model_pull_cmd.cc +++ b/engine/commands/model_pull_cmd.cc @@ -1,10 +1,9 @@ #include "model_pull_cmd.h" #include #include "services/download_service.h" -#include "trantor/utils/Logger.h" #include "utils/cortexso_parser.h" -#include "utils/model_callback_utils.h" #include "utils/logging_utils.h" +#include "utils/model_callback_utils.h" namespace commands { ModelPullCmd::ModelPullCmd(std::string model_handle, std::string branch) @@ -24,4 +23,4 @@ bool ModelPullCmd::Exec() { } } -}; // namespace commands \ No newline at end of file +}; // namespace commands diff --git a/engine/commands/model_start_cmd.cc b/engine/commands/model_start_cmd.cc index db64c7ee3..83d051891 100644 --- a/engine/commands/model_start_cmd.cc +++ b/engine/commands/model_start_cmd.cc @@ -43,4 +43,4 @@ bool ModelStartCmd::Exec() { return true; } -}; // namespace commands \ No newline at end of file +}; // namespace commands diff --git a/engine/commands/model_start_cmd.h b/engine/commands/model_start_cmd.h index 809f71c83..26daf9d0e 100644 --- a/engine/commands/model_start_cmd.h +++ b/engine/commands/model_start_cmd.h @@ -1,13 +1,13 @@ #pragma once #include -#include #include "config/model_config.h" namespace commands { -class ModelStartCmd{ +class ModelStartCmd { public: - explicit ModelStartCmd(std::string host, int port, const config::ModelConfig& mc); + explicit ModelStartCmd(std::string host, int port, + const config::ModelConfig& mc); bool Exec(); private: @@ -15,4 +15,4 @@ class ModelStartCmd{ int port_; const config::ModelConfig& mc_; }; -} // namespace commands \ No newline at end of file +} // namespace commands diff --git a/engine/commands/run_cmd.cc b/engine/commands/run_cmd.cc index 7d3734805..5070c3937 100644 --- a/engine/commands/run_cmd.cc +++ b/engine/commands/run_cmd.cc @@ -3,7 +3,6 @@ #include "cmd_info.h" #include "config/yaml_config.h" #include "engine_init_cmd.h" -#include "httplib.h" #include "model_pull_cmd.h" #include "model_start_cmd.h" #include "trantor/utils/Logger.h" @@ -37,7 +36,7 @@ void RunCmd::Exec() { if (!eic.Exec()) { LOG_INFO << "Failed to install engine"; return; - } + } } } @@ -95,4 +94,4 @@ bool RunCmd::IsEngineExisted(const std::string& e) { return false; } -}; // namespace commands \ No newline at end of file +}; // namespace commands diff --git a/engine/controllers/command_line_parser.cc b/engine/controllers/command_line_parser.cc index 12e1db88c..e90f99873 100644 --- a/engine/controllers/command_line_parser.cc +++ b/engine/controllers/command_line_parser.cc @@ -1,16 +1,19 @@ #include "command_line_parser.h" #include "commands/chat_cmd.h" #include "commands/cmd_info.h" +#include "commands/engine_get_cmd.h" #include "commands/engine_init_cmd.h" #include "commands/engine_list_cmd.h" +#include "commands/engine_uninstall_cmd.h" #include "commands/model_get_cmd.h" #include "commands/model_list_cmd.h" #include "commands/model_pull_cmd.h" #include "commands/model_start_cmd.h" -#include "commands/run_cmd.h" #include "commands/model_stop_cmd.h" +#include "commands/run_cmd.h" #include "commands/server_stop_cmd.h" #include "config/yaml_config.h" +#include "services/engine_service.h" #include "utils/cortex_utils.h" #include "utils/logging_utils.h" @@ -124,11 +127,11 @@ bool CommandLineParser::SetupCommand(int argc, char** argv) { command.Exec(); }); - auto get_engine_cmd = engines_cmd->add_subcommand("get", "Get an engine"); + EngineManagement(engines_cmd, "cortex.llamacpp", version); + EngineManagement(engines_cmd, "cortex.onnx", version); + EngineManagement(engines_cmd, "cortex.tensorrt-llm", version); - EngineInstall(engines_cmd, "cortex.llamacpp", version); - EngineInstall(engines_cmd, "cortex.onnx", version); - EngineInstall(engines_cmd, "cortex.tensorrt-llm", version); + EngineGet(engines_cmd); } { @@ -157,9 +160,9 @@ bool CommandLineParser::SetupCommand(int argc, char** argv) { return true; } -void CommandLineParser::EngineInstall(CLI::App* parent, - const std::string& engine_name, - std::string& version) { +void CommandLineParser::EngineManagement(CLI::App* parent, + const std::string& engine_name, + std::string& version) { auto engine_cmd = parent->add_subcommand(engine_name, "Manage " + engine_name + " engine"); @@ -172,4 +175,27 @@ void CommandLineParser::EngineInstall(CLI::App* parent, commands::EngineInitCmd eic(engine_name, version); eic.Exec(); }); -} \ No newline at end of file + + auto uninstall_desc{"Uninstall " + engine_name + " engine"}; + auto uninstall_cmd = engine_cmd->add_subcommand("uninstall", uninstall_desc); + uninstall_cmd->callback([engine_name] { + commands::EngineUninstallCmd cmd(engine_name); + cmd.Exec(); + }); +} + +void CommandLineParser::EngineGet(CLI::App* parent) { + auto get_cmd = parent->add_subcommand("get", "Get an engine info"); + auto engine_service = EngineService(); + + for (auto& engine : engine_service.kSupportEngines) { + std::string engine_name{engine}; + std::string desc = "Get " + engine_name + " status"; + + auto engine_get_cmd = get_cmd->add_subcommand(engine_name, desc); + engine_get_cmd->callback([engine_name] { + commands::EngineGetCmd cmd(engine_name); + cmd.Exec(); + }); + } +} diff --git a/engine/controllers/command_line_parser.h b/engine/controllers/command_line_parser.h index b6695346e..a15aa0529 100644 --- a/engine/controllers/command_line_parser.h +++ b/engine/controllers/command_line_parser.h @@ -1,6 +1,5 @@ #pragma once -#include #include "CLI/CLI.hpp" class CommandLineParser { @@ -9,8 +8,10 @@ class CommandLineParser { bool SetupCommand(int argc, char** argv); private: - void EngineInstall(CLI::App* parent, const std::string& engine_name, - std::string& version); + void EngineManagement(CLI::App* parent, const std::string& engine_name, + std::string& version); + + void EngineGet(CLI::App* parent); CLI::App app_; -}; \ No newline at end of file +}; diff --git a/engine/controllers/engines.cc b/engine/controllers/engines.cc index 9668be441..dd7ba3036 100644 --- a/engine/controllers/engines.cc +++ b/engine/controllers/engines.cc @@ -1,11 +1,17 @@ #include "engines.h" +#include +#include +#include +#include +#include "services/engine_service.h" #include "utils/archive_utils.h" -#include "utils/file_manager_utils.h" +#include "utils/cortex_utils.h" #include "utils/system_info_utils.h" -void Engines::InitEngine(const HttpRequestPtr& req, - std::function&& callback, - const std::string& engine) const { +void Engines::InstallEngine( + const HttpRequestPtr& req, + std::function&& callback, + const std::string& engine) const { LOG_DEBUG << "InitEngine, Engine: " << engine; if (engine.empty()) { Json::Value res; @@ -114,62 +120,88 @@ void Engines::InitEngine(const HttpRequestPtr& req, void Engines::ListEngine( const HttpRequestPtr& req, std::function&& callback) const { + auto engine_service = EngineService(); + auto status_list = engine_service.GetEngineInfoList(); + Json::Value ret; ret["object"] = "list"; Json::Value data(Json::arrayValue); - Json::Value obj_onnx, obj_llamacpp, obj_tensorrt; - obj_onnx["name"] = "cortex.onnx"; - obj_onnx["description"] = - "This extension enables chat completion API calls using the Onnx engine"; - obj_onnx["version"] = "0.0.1"; - obj_onnx["productName"] = "Onnx Inference Engine"; - - obj_llamacpp["name"] = "cortex.llamacpp"; - obj_llamacpp["description"] = - "This extension enables chat completion API calls using the LlamaCPP " - "engine"; - obj_llamacpp["version"] = "0.0.1"; - obj_llamacpp["productName"] = "LlamaCPP Inference Engine"; - - obj_tensorrt["name"] = "cortex.tensorrt-llm"; - obj_tensorrt["description"] = - "This extension enables chat completion API calls using the TensorrtLLM " - "engine"; - obj_tensorrt["version"] = "0.0.1"; - obj_tensorrt["productName"] = "TensorrtLLM Inference Engine"; - -#ifdef _WIN32 - if (std::filesystem::exists(std::filesystem::current_path().string() + - cortex_utils::kOnnxLibPath)) { - obj_onnx["status"] = "ready"; - } else { - obj_onnx["status"] = "not_initialized"; - } -#else - obj_onnx["status"] = "not_supported"; -#endif - // lllamacpp - if (std::filesystem::exists(std::filesystem::current_path().string() + - cortex_utils::kLlamaLibPath)) { - - obj_llamacpp["status"] = "ready"; - } else { - obj_llamacpp["status"] = "not_initialized"; - } - // tensorrt llm - if (std::filesystem::exists(std::filesystem::current_path().string() + - cortex_utils::kTensorrtLlmPath)) { - obj_tensorrt["status"] = "ready"; - } else { - obj_tensorrt["status"] = "not_initialized"; + for (auto& status : status_list) { + Json::Value ret; + ret["name"] = status.name; + ret["description"] = status.description; + ret["version"] = status.version; + ret["productName"] = status.product_name; + ret["status"] = status.status; + + data.append(std::move(ret)); } - data.append(std::move(obj_onnx)); - data.append(std::move(obj_llamacpp)); - data.append(std::move(obj_tensorrt)); ret["data"] = data; ret["result"] = "OK"; auto resp = cortex_utils::CreateCortexHttpJsonResponse(ret); resp->setStatusCode(k200OK); callback(resp); -} \ No newline at end of file +} + +void Engines::GetEngine(const HttpRequestPtr& req, + std::function&& callback, + const std::string& engine) const { + auto engine_service = EngineService(); + try { + auto status = engine_service.GetEngineInfo(engine); + Json::Value ret; + ret["name"] = status.name; + ret["description"] = status.description; + ret["version"] = status.version; + ret["productName"] = status.product_name; + ret["status"] = status.status; + + auto resp = cortex_utils::CreateCortexHttpJsonResponse(ret); + resp->setStatusCode(k200OK); + callback(resp); + } catch (const std::runtime_error e) { + Json::Value ret; + ret["message"] = e.what(); + auto resp = cortex_utils::CreateCortexHttpJsonResponse(ret); + resp->setStatusCode(k400BadRequest); + callback(resp); + } catch (const std::exception& e) { + Json::Value ret; + ret["message"] = e.what(); + auto resp = cortex_utils::CreateCortexHttpJsonResponse(ret); + resp->setStatusCode(k500InternalServerError); + callback(resp); + } +} + +void Engines::UninstallEngine( + const HttpRequestPtr& req, + std::function&& callback, + const std::string& engine) const { + LOG_INFO << "[Http] Uninstall engine " << engine; + auto engine_service = EngineService(); + + Json::Value ret; + try { + // TODO: Unload the model which is currently running on engine_ + // TODO: Unload engine if is loaded + engine_service.UninstallEngine(engine); + + ret["message"] = "Engine " + engine + " uninstalled successfully!"; + auto resp = cortex_utils::CreateCortexHttpJsonResponse(ret); + resp->setStatusCode(k200OK); + callback(resp); + } catch (const std::runtime_error& e) { + CLI_LOG("Runtime exception"); + ret["message"] = "Engine " + engine + " is not installed!"; + auto resp = cortex_utils::CreateCortexHttpJsonResponse(ret); + resp->setStatusCode(k400BadRequest); + callback(resp); + } catch (const std::exception& e) { + ret["message"] = "Engine " + engine + " failed to uninstall: " + e.what(); + auto resp = cortex_utils::CreateCortexHttpJsonResponse(ret); + resp->setStatusCode(k400BadRequest); + callback(resp); + } +} diff --git a/engine/controllers/engines.h b/engine/controllers/engines.h index ccc526efe..91127e5e0 100644 --- a/engine/controllers/engines.h +++ b/engine/controllers/engines.h @@ -1,24 +1,33 @@ #pragma once #include +#include #include -#include "services/download_service.h" -#include "utils/cortex_utils.h" #include "utils/cortexso_parser.h" -#include "utils/http_util.h" using namespace drogon; class Engines : public drogon::HttpController { public: METHOD_LIST_BEGIN - METHOD_ADD(Engines::InitEngine, "/{1}/init", Post); - METHOD_ADD(Engines::ListEngine, "/list", Get); + METHOD_ADD(Engines::InstallEngine, "/{1}/init", Post); + METHOD_ADD(Engines::UninstallEngine, "/{1}", Delete); + METHOD_ADD(Engines::ListEngine, "", Get); + METHOD_ADD(Engines::GetEngine, "/{1}", Get); METHOD_LIST_END - void InitEngine(const HttpRequestPtr& req, - std::function&& callback, - const std::string& engine) const; + void InstallEngine(const HttpRequestPtr& req, + std::function&& callback, + const std::string& engine) const; + void ListEngine(const HttpRequestPtr& req, std::function&& callback) const; + + void GetEngine(const HttpRequestPtr& req, + std::function&& callback, + const std::string& engine) const; + + void UninstallEngine(const HttpRequestPtr& req, + std::function&& callback, + const std::string& engine) const; }; diff --git a/engine/services/download_service.cc b/engine/services/download_service.cc index 5f261918f..ffb25f02d 100644 --- a/engine/services/download_service.cc +++ b/engine/services/download_service.cc @@ -37,8 +37,7 @@ void DownloadService::StartDownloadItem( auto containerFolderPath{file_manager_utils::GetContainerFolderPath( file_manager_utils::downloadTypeToString(item.type))}; - CTL_INF("Container folder path: " << containerFolderPath.string() - << "\n"); + CTL_INF("Container folder path: " << containerFolderPath.string() << "\n"); auto itemFolderPath{containerFolderPath / std::filesystem::path(downloadId)}; CTL_INF("itemFolderPath: " << itemFolderPath.string()); @@ -82,8 +81,8 @@ void DownloadService::StartDownloadItem( } if (current == total) { outputFile.flush(); - CLI_LOG("Done download: " - << static_cast(total) / 1024 / 1024 << " MiB"); + CLI_LOG("Done download: " << static_cast(total) / 1024 / 1024 + << " MiB"); if (callback.has_value()) { auto need_parse_gguf = item.path.find("cortexso") == std::string::npos; @@ -93,4 +92,4 @@ void DownloadService::StartDownloadItem( } return true; }); -} \ No newline at end of file +} diff --git a/engine/services/engine_service.cc b/engine/services/engine_service.cc new file mode 100644 index 000000000..836a3c10c --- /dev/null +++ b/engine/services/engine_service.cc @@ -0,0 +1,87 @@ +#include "engine_service.h" +#include +#include "algorithm" +#include "utils/file_manager_utils.h" + +EngineInfo EngineService::GetEngineInfo(const std::string& engine) const { + // if engine is not found in kSupportEngine, throw runtime error + if (std::find(kSupportEngines.begin(), kSupportEngines.end(), engine) == + kSupportEngines.end()) { + // TODO: create a custom exception class + throw std::runtime_error("Engine " + engine + " is not supported!"); + } + + auto engine_status_list = GetEngineInfoList(); + + return *std::find_if( + engine_status_list.begin(), engine_status_list.end(), + [&engine](const EngineInfo& e) { return e.name == engine; }); +} + +std::vector EngineService::GetEngineInfoList() const { + auto ecp = file_manager_utils::GetEnginesContainerPath(); + + std::string onnx_status{"not_supported"}; + std::string llamacpp_status = std::filesystem::exists(ecp / "cortex.llamacpp") + ? "ready" + : "not_initialized"; + std::string tensorrt_status{"not_supported"}; + +#ifdef _WIN32 + onnx_status = std::filesystem::exists(ecp / "cortex.onnx") + ? "ready" + : "not_initialized"; + tensorrt_status = std::filesystem::exists(ecp / "cortex.tensorrt-llm") + ? "ready" + : "not_initialized"; +#elif defined(__linux__) + tensorrt_status = std::filesystem::exists(ecp / "cortex.tensorrt-llm") + ? "ready" + : "not_initialized"; +#endif + std::vector engines = { + {.name = "cortex.onnx", + .description = "This extension enables chat completion API calls using " + "the Onnx engine", + .version = "0.0.1", + .product_name = "Onnx Inference Engine", + .status = onnx_status}, + {.name = "cortex.llamacpp", + .description = "This extension enables chat completion API calls using " + "the LlamaCPP engine", + .version = "0.0.1", + .product_name = "LlamaCPP Inference Engine", + .status = llamacpp_status}, + {.name = "cortex.tensorrt-llm", + .description = "This extension enables chat completion API calls using " + "the TensorrtLLM engine", + .version = "0.0.1", + .product_name = "TensorrtLLM Inference Engine", + .status = tensorrt_status}, + }; + + return engines; +} + +void EngineService::UninstallEngine(const std::string& engine) { + CTL_INF("Uninstall engine " + engine); + + // TODO: Unload the model which is currently running on engine_ + + // TODO: Unload engine if is loaded + + auto ecp = file_manager_utils::GetEnginesContainerPath(); + auto engine_path = ecp / engine; + + if (!std::filesystem::exists(engine_path)) { + throw std::runtime_error("Engine " + engine + " is not installed!"); + } + + try { + std::filesystem::remove_all(engine_path); + CTL_INF("Engine " << engine << " uninstalled successfully!"); + } catch (const std::exception& e) { + CTL_ERR("Failed to uninstall engine " << engine << ": " << e.what()); + throw; + } +} diff --git a/engine/services/engine_service.h b/engine/services/engine_service.h new file mode 100644 index 000000000..3a9a91876 --- /dev/null +++ b/engine/services/engine_service.h @@ -0,0 +1,27 @@ +#pragma once + +#include +#include +#include + +struct EngineInfo { + std::string name; + std::string description; + std::string version; + std::string product_name; + std::string status; +}; + +class EngineService { + public: + const std::vector kSupportEngines = { + "cortex.llamacpp", "cortex.onnx", "cortex.tensorrt-llm"}; + + EngineInfo GetEngineInfo(const std::string& engine) const; + + std::vector GetEngineInfoList() const; + + void InstallEngine(const std::string& engine); + + void UninstallEngine(const std::string& engine); +}; diff --git a/engine/utils/command_executor.h b/engine/utils/command_executor.h index 9ba13025a..86b8e6b79 100644 --- a/engine/utils/command_executor.h +++ b/engine/utils/command_executor.h @@ -1,6 +1,5 @@ #include #include -#include #include #include #include @@ -46,4 +45,4 @@ class CommandExecutor { private: std::unique_ptr m_pipe{nullptr, PCLOSE}; -}; \ No newline at end of file +}; diff --git a/engine/utils/config_yaml_utils.h b/engine/utils/config_yaml_utils.h new file mode 100644 index 000000000..ece860065 --- /dev/null +++ b/engine/utils/config_yaml_utils.h @@ -0,0 +1,61 @@ +#include +#include +#include +#include +#include "utils/logging_utils.h" +#include "yaml-cpp/yaml.h" +#include "yaml_config.h" + +namespace config_yaml_utils { +struct CortexConfig { + std::string dataFolderPath; + std::string host; + std::string port; +}; + +inline CortexConfig FromYaml(const std::string& path, + const std::string& variant) { + std::filesystem::path config_file_path{path}; + if (!std::filesystem::exists(config_file_path)) { + throw std::runtime_error("File not found: " + path); + } + + try { + auto node = YAML::LoadFile(config_file_path.string()); + CortexConfig config = { + .dataFolderPath = node["dataFolderPath"].as(), + .host = node["host"].as(), + .port = node["port"].as(), + }; + return config; + } catch (const YAML::BadFile& e) { + CTL_ERR("Failed to read file: " << e.what()); + throw; + } +} + +inline void DumpYamlConfig(const CortexConfig& config, + const std::string& path) { + std::filesystem::path config_file_path{path}; + if (!std::filesystem::exists(config_file_path)) { + throw std::runtime_error("File not found: " + path); + } + + try { + std::ofstream out_file(config_file_path); + if (!out_file) { + throw std::runtime_error("Failed to open output file."); + } + YAML::Node node; + node["dataFolderPath"] = config.dataFolderPath; + node["host"] = config.host; + node["port"] = config.port; + + out_file << node; + out_file.close(); + } catch (const std::exception& e) { + CTL_ERR("Error writing to file: " << e.what()); + throw; + } +} +} // namespace config_yaml_utils diff --git a/engine/utils/cortex_utils.h b/engine/utils/cortex_utils.h index 777cd6d84..32dea9321 100644 --- a/engine/utils/cortex_utils.h +++ b/engine/utils/cortex_utils.h @@ -1,21 +1,21 @@ #pragma once #include #include +#include #include #include #include #include -#include -#include #include +#include #include -#include +#include // Include platform-specific headers #ifdef _WIN32 +#include #include #include -#include #define mkdir _mkdir #else #include @@ -23,7 +23,6 @@ #endif #if __APPLE__ -#include #include #endif @@ -36,7 +35,7 @@ constexpr static auto kTensorrtLlmPath = "/engines/cortex.tensorrt-llm"; inline std::string models_folder = "./models"; inline std::string logs_folder = "./logs"; inline std::string logs_base_name = "./logs/cortex"; -inline size_t log_file_size_limit = 20000000; // ~20 mb +inline size_t log_file_size_limit = 20000000; // ~20 mb inline std::string extractBase64(const std::string& input) { std::regex pattern("base64,(.*)"); @@ -273,7 +272,8 @@ inline drogon::HttpResponsePtr CreateCortexHttpResponse() { return resp; } -inline drogon::HttpResponsePtr CreateCortexHttpJsonResponse(const Json::Value& data) { +inline drogon::HttpResponsePtr CreateCortexHttpJsonResponse( + const Json::Value& data) { auto resp = drogon::HttpResponse::newHttpJsonResponse(data); #ifdef ALLOW_ALL_CORS LOG_INFO << "Respond for all cors!"; @@ -342,4 +342,4 @@ inline std::string GetCurrentPath() { } #endif -} // namespace cortex_utils \ No newline at end of file +} // namespace cortex_utils diff --git a/engine/utils/file_manager_utils.h b/engine/utils/file_manager_utils.h index a3c2d39c6..71a3a5add 100644 --- a/engine/utils/file_manager_utils.h +++ b/engine/utils/file_manager_utils.h @@ -1,8 +1,9 @@ #pragma once -#include "logging_utils.h" #include #include #include +#include "logging_utils.h" +#include "services/download_service.h" #if defined(__APPLE__) && defined(__MACH__) #include @@ -13,6 +14,7 @@ #endif namespace file_manager_utils { +constexpr std::string_view kCortexConfigurationFileName = ".cortexrc"; inline std::filesystem::path GetExecutableFolderContainerPath() { #if defined(__APPLE__) && defined(__MACH__) @@ -27,7 +29,6 @@ inline std::filesystem::path GetExecutableFolderContainerPath() { return std::filesystem::current_path(); } #elif defined(__linux__) - // TODO: haven't tested char buffer[1024]; ssize_t len = readlink("/proc/self/exe", buffer, sizeof(buffer) - 1); if (len != -1) { @@ -39,7 +40,6 @@ inline std::filesystem::path GetExecutableFolderContainerPath() { return std::filesystem::current_path(); } #elif defined(_WIN32) - // TODO: haven't tested char buffer[MAX_PATH]; GetModuleFileNameA(NULL, buffer, MAX_PATH); CTL_INF("Executable path: " << buffer); @@ -50,15 +50,84 @@ inline std::filesystem::path GetExecutableFolderContainerPath() { #endif } +inline std::filesystem::path GetHomeDirectoryPath() { +#ifdef _WIN32 + const char* homeDir = std::getenv("USERPROFILE"); + if (!homeDir) { + // Fallback if USERPROFILE is not set + const char* homeDrive = std::getenv("HOMEDRIVE"); + const char* homePath = std::getenv("HOMEPATH"); + if (homeDrive && homePath) { + return std::filesystem::path(homeDrive) / std::filesystem::path(homePath); + } else { + throw std::runtime_error("Cannot determine the home directory"); + } + } +#else + const char* homeDir = std::getenv("HOME"); + if (!homeDir) { + throw std::runtime_error("Cannot determine the home directory"); + } +#endif + return std::filesystem::path(homeDir); +} + +inline std::filesystem::path GetConfigurationPath() { + auto home_path = GetHomeDirectoryPath(); + auto configuration_path = home_path / kCortexConfigurationFileName; + return configuration_path; +} + +inline std::filesystem::path GetCortexPath() { + // TODO: We will need to support user to move the data folder to other place. + // TODO: get the variant of cortex. As discussed, we will have: prod, beta, nightly + // currently we will store cortex data at ~/.cortex + + auto home_path = GetHomeDirectoryPath(); + auto cortex_path = home_path / ".cortex"; + if (!std::filesystem::exists(cortex_path)) { + CTL_INF("Cortex home folder not found. Create one: " + + cortex_path.string()); + std::filesystem::create_directory(cortex_path); + } + return cortex_path; +} + +inline std::filesystem::path GetModelsContainerPath() { + auto cortex_path = GetCortexPath(); + auto models_container_path = cortex_path / "models"; + + if (!std::filesystem::exists(models_container_path)) { + CTL_INF("Model container folder not found. Create one: " + << models_container_path.string()); + std::filesystem::create_directory(models_container_path); + } + + return models_container_path; +} + +inline std::filesystem::path GetEnginesContainerPath() { + auto cortex_path = GetCortexPath(); + auto engines_container_path = cortex_path / "engines"; + + if (!std::filesystem::exists(engines_container_path)) { + CTL_INF("Engine container folder not found. Create one: " + << engines_container_path.string()); + std::filesystem::create_directory(engines_container_path); + } + + return engines_container_path; +} + inline std::filesystem::path GetContainerFolderPath( const std::string_view type) { const auto current_path{GetExecutableFolderContainerPath()}; auto container_folder_path = std::filesystem::path{}; if (type == "Model") { - container_folder_path = current_path / "models"; + container_folder_path = GetModelsContainerPath(); } else if (type == "Engine") { - container_folder_path = current_path / "engines"; + container_folder_path = GetEnginesContainerPath(); } else if (type == "CudaToolkit") { container_folder_path = current_path; } else { @@ -88,4 +157,4 @@ inline std::string downloadTypeToString(DownloadType type) { } } -} // namespace file_manager_utils \ No newline at end of file +} // namespace file_manager_utils diff --git a/engine/utils/system_info_utils.h b/engine/utils/system_info_utils.h index e57725301..ede7915d2 100644 --- a/engine/utils/system_info_utils.h +++ b/engine/utils/system_info_utils.h @@ -2,6 +2,7 @@ #include #include +#include #include #include "utils/command_executor.h" #include "utils/logging_utils.h" @@ -292,7 +293,9 @@ inline std::vector GetGpuInfoListVulkan() { gpuInfoList.push_back(gpuInfo); ++iter; } - } catch (const std::exception& e) {} + } catch (const std::exception& e) { + LOG_ERROR << "Error: " << e.what(); + } return gpuInfoList; }