diff --git a/engine/commands/engine_get_cmd.cc b/engine/commands/engine_get_cmd.cc index 15868d121..17ffeaf66 100644 --- a/engine/commands/engine_get_cmd.cc +++ b/engine/commands/engine_get_cmd.cc @@ -1,64 +1,28 @@ #include "engine_get_cmd.h" #include #include -#include "utils/file_manager_utils.h" +#include "services/engine_service.h" #include "utils/logging_utils.h" namespace commands { void EngineGetCmd::Exec() const { - CTL_INF("[EneingeGetCmd] engine: " << engine_); + CTL_INF("[EngineGetCmd] engine: " << engine_); - auto ecp = file_manager_utils::GetEnginesContainerPath(); - std::string onnx_status{"not_supported"}; - std::string llamacpp_status = std::filesystem::exists(ecp / "cortex.llamacpp") - ? "ready" - : "not_initialized"; - std::string tensorrt_status{"not_supported"}; - -#ifdef _WIN32 - onnx_status = std::filesystem::exists(ecp / "cortex.onnx") - ? "ready" - : "not_initialized"; - tensorrt_status = std::filesystem::exists(ecp / "cortex.tensorrt-llm") - ? "ready" - : "not_initialized"; -#elif defined(__linux__) - tensorrt_status = std::filesystem::exists(ecp / "cortex.tensorrt-llm") - ? "ready" - : "not_initialized"; -#endif - std::vector engines = { - {.name = "cortex.onnx", - .description = "This extension enables chat completion API calls using " - "the Onnx engine", - .version = "0.0.1", - .product_name = "Onnx Inference Engine", - .status = onnx_status}, - {.name = "cortex.llamacpp", - .description = "This extension enables chat completion API calls using " - "the LlamaCPP engine", - .version = "0.0.1", - .product_name = "LlamaCPP Inference Engine", - .status = llamacpp_status}, - {.name = "cortex.tensorrt-llm", - .description = "This extension enables chat completion API calls using " - "the TensorrtLLM engine", - .version = "0.0.1", - .product_name = "TensorrtLLM Inference Engine", - .status = tensorrt_status}, - }; - - tabulate::Table table; - table.add_row({"name", "description", "version", "product name", "status"}); - table.format().font_color(tabulate::Color::green); - for (auto& engine : engines) { - if (engine.name == engine_) { - table.add_row({engine.name, engine.description, engine.version, - engine.product_name, engine.status}); - } + auto engine_service = EngineService(); + try { + auto status = engine_service.GetEngineInfo(engine_); + tabulate::Table table; + table.add_row({"name", "description", "version", "product name", "status"}); + table.format().font_color(tabulate::Color::green); + table.add_row({status.name, status.description, status.version, + status.product_name, status.status}); + std::cout << table << std::endl; + } catch (const std::runtime_error& e) { + std::cerr << "Engine " << engine_ << " is not supported!" << "\n"; + } catch (const std::exception& e) { + std::cerr << "Failed to get engine info for " << engine_ << ": " << e.what() + << "\n"; } - - std::cout << table << std::endl; } }; // namespace commands diff --git a/engine/commands/engine_get_cmd.h b/engine/commands/engine_get_cmd.h index cbd2cd876..505ee5120 100644 --- a/engine/commands/engine_get_cmd.h +++ b/engine/commands/engine_get_cmd.h @@ -1,16 +1,9 @@ #pragma once + #include namespace commands { class EngineGetCmd { - struct EngineInfo { - std::string name; - std::string description; - std::string version; - std::string product_name; - std::string status; - }; - public: EngineGetCmd(const std::string& engine) : engine_{engine} {}; @@ -19,5 +12,4 @@ class EngineGetCmd { private: std::string engine_; }; - } // namespace commands diff --git a/engine/commands/engine_list_cmd.cc b/engine/commands/engine_list_cmd.cc index 039b2a0aa..21220e7e4 100644 --- a/engine/commands/engine_list_cmd.cc +++ b/engine/commands/engine_list_cmd.cc @@ -1,49 +1,23 @@ #include "engine_list_cmd.h" -#include #include -#include "utils/file_manager_utils.h" +#include "services/engine_service.h" namespace commands { bool EngineListCmd::Exec() { - auto ecp = file_manager_utils::GetEnginesContainerPath(); - std::string onnx_status{"not_supported"}; - std::string llamacpp_status = std::filesystem::exists(ecp / "cortex.llamacpp") - ? "ready" - : "not_initialized"; - std::string tensorrt_status{"not_supported"}; -#ifdef _WIN32 - onnx_status = std::filesystem::exists(ecp / "cortex.onnx") - ? "ready" - : "not_initialized"; - tensorrt_status = std::filesystem::exists(ecp / "cortex.tensorrt-llm") - ? "ready" - : "not_initialized"; -#elif defined(__linux__) - tensorrt_status = std::filesystem::exists(ecp / "cortex.tensorrt-llm") - ? "ready" - : "not_initialized"; -#endif + auto engine_service = EngineService(); + auto status_list = engine_service.GetEngineInfoList(); tabulate::Table table; - table.add_row( - {"(Index)", "name", "description", "version", "product name", "status"}); table.format().font_color(tabulate::Color::green); table.add_row( - {"1", "cortex.onnx", - "This extension enables chat completion API calls using the Onnx engine", - "0.0.1", "Onnx Inference Engine", onnx_status}); - - table.add_row({"2", "cortex.llamacpp", - "This extension enables chat completion API calls using the " - "LlamaCPP engine", - "0.0.1", "LlamaCPP Inference Engine", llamacpp_status}); - - // tensorrt llm - table.add_row({"3", "cortex.tensorrt-llm", - "This extension enables chat completion API calls using the " - "TensorrtLLM engine", - "0.0.1", "TensorrtLLM Inference Engine", tensorrt_status}); + {"(Index)", "name", "description", "version", "product name", "status"}); + for (int i = 0; i < status_list.size(); i++) { + auto status = status_list[i]; + std::string index = std::to_string(i + 1); + table.add_row({index, status.name, status.description, status.version, + status.product_name, status.status}); + } for (int i = 0; i < 6; i++) { table[0][i] @@ -62,5 +36,4 @@ bool EngineListCmd::Exec() { std::cout << table << std::endl; return true; } - }; // namespace commands diff --git a/engine/controllers/command_line_parser.cc b/engine/controllers/command_line_parser.cc index 1be2386ae..e90f99873 100644 --- a/engine/controllers/command_line_parser.cc +++ b/engine/controllers/command_line_parser.cc @@ -13,6 +13,7 @@ #include "commands/run_cmd.h" #include "commands/server_stop_cmd.h" #include "config/yaml_config.h" +#include "services/engine_service.h" #include "utils/cortex_utils.h" #include "utils/logging_utils.h" @@ -185,11 +186,13 @@ void CommandLineParser::EngineManagement(CLI::App* parent, void CommandLineParser::EngineGet(CLI::App* parent) { auto get_cmd = parent->add_subcommand("get", "Get an engine info"); + auto engine_service = EngineService(); - for (auto& engine : supportedEngines_) { + for (auto& engine : engine_service.kSupportEngines) { std::string engine_name{engine}; std::string desc = "Get " + engine_name + " status"; - auto engine_get_cmd = get_cmd->add_subcommand(engine, desc); + + auto engine_get_cmd = get_cmd->add_subcommand(engine_name, desc); engine_get_cmd->callback([engine_name] { commands::EngineGetCmd cmd(engine_name); cmd.Exec(); diff --git a/engine/controllers/command_line_parser.h b/engine/controllers/command_line_parser.h index 718b94f25..a15aa0529 100644 --- a/engine/controllers/command_line_parser.h +++ b/engine/controllers/command_line_parser.h @@ -1,6 +1,5 @@ #pragma once -#include #include "CLI/CLI.hpp" class CommandLineParser { @@ -15,8 +14,4 @@ class CommandLineParser { void EngineGet(CLI::App* parent); CLI::App app_; - - // TODO: move this one to somewhere else - static constexpr std::array supportedEngines_ = { - "cortex.llamacpp", "cortex.onnx", "cortex.tensorrt-llm"}; }; diff --git a/engine/controllers/engines.cc b/engine/controllers/engines.cc index b51651b93..c509ff40c 100644 --- a/engine/controllers/engines.cc +++ b/engine/controllers/engines.cc @@ -1,11 +1,17 @@ #include "engines.h" +#include +#include +#include +#include +#include "services/engine_service.h" #include "utils/archive_utils.h" #include "utils/cortex_utils.h" #include "utils/system_info_utils.h" -void Engines::InitEngine(const HttpRequestPtr& req, - std::function&& callback, - const std::string& engine) const { +void Engines::InstallEngine( + const HttpRequestPtr& req, + std::function&& callback, + const std::string& engine) const { LOG_DEBUG << "InitEngine, Engine: " << engine; if (engine.empty()) { Json::Value res; @@ -114,62 +120,84 @@ void Engines::InitEngine(const HttpRequestPtr& req, void Engines::ListEngine( const HttpRequestPtr& req, std::function&& callback) const { + auto engine_service = EngineService(); + auto status_list = engine_service.GetEngineInfoList(); + Json::Value ret; ret["object"] = "list"; Json::Value data(Json::arrayValue); - Json::Value obj_onnx, obj_llamacpp, obj_tensorrt; - obj_onnx["name"] = "cortex.onnx"; - obj_onnx["description"] = - "This extension enables chat completion API calls using the Onnx engine"; - obj_onnx["version"] = "0.0.1"; - obj_onnx["productName"] = "Onnx Inference Engine"; - - obj_llamacpp["name"] = "cortex.llamacpp"; - obj_llamacpp["description"] = - "This extension enables chat completion API calls using the LlamaCPP " - "engine"; - obj_llamacpp["version"] = "0.0.1"; - obj_llamacpp["productName"] = "LlamaCPP Inference Engine"; - - obj_tensorrt["name"] = "cortex.tensorrt-llm"; - obj_tensorrt["description"] = - "This extension enables chat completion API calls using the TensorrtLLM " - "engine"; - obj_tensorrt["version"] = "0.0.1"; - obj_tensorrt["productName"] = "TensorrtLLM Inference Engine"; - -#ifdef _WIN32 - if (std::filesystem::exists(std::filesystem::current_path().string() + - cortex_utils::kOnnxLibPath)) { - obj_onnx["status"] = "ready"; - } else { - obj_onnx["status"] = "not_initialized"; - } -#else - obj_onnx["status"] = "not_supported"; -#endif - // lllamacpp - if (std::filesystem::exists(std::filesystem::current_path().string() + - cortex_utils::kLlamaLibPath)) { - - obj_llamacpp["status"] = "ready"; - } else { - obj_llamacpp["status"] = "not_initialized"; - } - // tensorrt llm - if (std::filesystem::exists(std::filesystem::current_path().string() + - cortex_utils::kTensorrtLlmPath)) { - obj_tensorrt["status"] = "ready"; - } else { - obj_tensorrt["status"] = "not_initialized"; + for (auto& status : status_list) { + Json::Value ret; + ret["name"] = status.name; + ret["description"] = status.description; + ret["version"] = status.version; + ret["productName"] = status.product_name; + ret["status"] = status.status; + + data.append(std::move(ret)); } - data.append(std::move(obj_onnx)); - data.append(std::move(obj_llamacpp)); - data.append(std::move(obj_tensorrt)); ret["data"] = data; ret["result"] = "OK"; auto resp = cortex_utils::CreateCortexHttpJsonResponse(ret); resp->setStatusCode(k200OK); callback(resp); } + +void Engines::GetEngine(const HttpRequestPtr& req, + std::function&& callback, + const std::string& engine) const { + auto engine_service = EngineService(); + try { + auto status = engine_service.GetEngineInfo(engine); + Json::Value ret; + ret["name"] = status.name; + ret["description"] = status.description; + ret["version"] = status.version; + ret["productName"] = status.product_name; + ret["status"] = status.status; + + auto resp = cortex_utils::CreateCortexHttpJsonResponse(ret); + resp->setStatusCode(k200OK); + callback(resp); + } catch (const std::runtime_error e) { + Json::Value ret; + ret["message"] = e.what(); + auto resp = cortex_utils::CreateCortexHttpJsonResponse(ret); + resp->setStatusCode(k400BadRequest); + callback(resp); + } catch (const std::exception& e) { + Json::Value ret; + ret["message"] = e.what(); + auto resp = cortex_utils::CreateCortexHttpJsonResponse(ret); + resp->setStatusCode(k500InternalServerError); + callback(resp); + } +} + +void Engines::UninstallEngine( + const HttpRequestPtr& req, + std::function&& callback, + const std::string& engine) const { + LOG_INFO << "[Http] Uninstall engine " << engine; + // TODO: think of a way to prevent code duplication. This should be shared with cmd as well + + // TODO: Unload the model which is currently running on engine_ + + // TODO: Unload engine if is loaded + + // auto ecp = file_manager_utils::GetEnginesContainerPath(); + // auto engine_path = ecp / engine; + // if (!std::filesystem::exists(engine_path)) { + // ("Engine " << engine_ << " is not installed!"); + // return; + // } + // + // // remove + // try { + // std::filesystem::remove_all(engine_path); + // CTL_INF("Engine " << engine_ << " uninstalled successfully!"); + // } catch (const std::exception& e) { + // CTL_ERR("Failed to uninstall engine " << engine_ + ": " << e.what()); + // } +} diff --git a/engine/controllers/engines.h b/engine/controllers/engines.h index 2195d150c..91127e5e0 100644 --- a/engine/controllers/engines.h +++ b/engine/controllers/engines.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include "utils/cortexso_parser.h" @@ -9,13 +10,24 @@ using namespace drogon; class Engines : public drogon::HttpController { public: METHOD_LIST_BEGIN - METHOD_ADD(Engines::InitEngine, "/{1}/init", Post); - METHOD_ADD(Engines::ListEngine, "/list", Get); + METHOD_ADD(Engines::InstallEngine, "/{1}/init", Post); + METHOD_ADD(Engines::UninstallEngine, "/{1}", Delete); + METHOD_ADD(Engines::ListEngine, "", Get); + METHOD_ADD(Engines::GetEngine, "/{1}", Get); METHOD_LIST_END - void InitEngine(const HttpRequestPtr& req, - std::function&& callback, - const std::string& engine) const; + void InstallEngine(const HttpRequestPtr& req, + std::function&& callback, + const std::string& engine) const; + void ListEngine(const HttpRequestPtr& req, std::function&& callback) const; + + void GetEngine(const HttpRequestPtr& req, + std::function&& callback, + const std::string& engine) const; + + void UninstallEngine(const HttpRequestPtr& req, + std::function&& callback, + const std::string& engine) const; }; diff --git a/engine/services/engine_service.cc b/engine/services/engine_service.cc new file mode 100644 index 000000000..097180616 --- /dev/null +++ b/engine/services/engine_service.cc @@ -0,0 +1,63 @@ +#include "engine_service.h" +#include "algorithm" +#include "utils/file_manager_utils.h" + +EngineInfo EngineService::GetEngineInfo(const std::string& engine) const { + // if engine is not found in kSupportEngine, throw runtime error + if (std::find(kSupportEngines.begin(), kSupportEngines.end(), engine) == + kSupportEngines.end()) { + // TODO: create a custom exception class + throw std::runtime_error("Engine " + engine + " is not supported!"); + } + + auto engine_status_list = GetEngineInfoList(); + + return *std::find_if( + engine_status_list.begin(), engine_status_list.end(), + [&engine](const EngineInfo& e) { return e.name == engine; }); +} + +std::vector EngineService::GetEngineInfoList() const { + auto ecp = file_manager_utils::GetEnginesContainerPath(); + + std::string onnx_status{"not_supported"}; + std::string llamacpp_status = std::filesystem::exists(ecp / "cortex.llamacpp") + ? "ready" + : "not_initialized"; + std::string tensorrt_status{"not_supported"}; + +#ifdef _WIN32 + onnx_status = std::filesystem::exists(ecp / "cortex.onnx") + ? "ready" + : "not_initialized"; + tensorrt_status = std::filesystem::exists(ecp / "cortex.tensorrt-llm") + ? "ready" + : "not_initialized"; +#elif defined(__linux__) + tensorrt_status = std::filesystem::exists(ecp / "cortex.tensorrt-llm") + ? "ready" + : "not_initialized"; +#endif + std::vector engines = { + {.name = "cortex.onnx", + .description = "This extension enables chat completion API calls using " + "the Onnx engine", + .version = "0.0.1", + .product_name = "Onnx Inference Engine", + .status = onnx_status}, + {.name = "cortex.llamacpp", + .description = "This extension enables chat completion API calls using " + "the LlamaCPP engine", + .version = "0.0.1", + .product_name = "LlamaCPP Inference Engine", + .status = llamacpp_status}, + {.name = "cortex.tensorrt-llm", + .description = "This extension enables chat completion API calls using " + "the TensorrtLLM engine", + .version = "0.0.1", + .product_name = "TensorrtLLM Inference Engine", + .status = tensorrt_status}, + }; + + return engines; +} diff --git a/engine/services/engine_service.h b/engine/services/engine_service.h new file mode 100644 index 000000000..3a9a91876 --- /dev/null +++ b/engine/services/engine_service.h @@ -0,0 +1,27 @@ +#pragma once + +#include +#include +#include + +struct EngineInfo { + std::string name; + std::string description; + std::string version; + std::string product_name; + std::string status; +}; + +class EngineService { + public: + const std::vector kSupportEngines = { + "cortex.llamacpp", "cortex.onnx", "cortex.tensorrt-llm"}; + + EngineInfo GetEngineInfo(const std::string& engine) const; + + std::vector GetEngineInfoList() const; + + void InstallEngine(const std::string& engine); + + void UninstallEngine(const std::string& engine); +};