From 8139f23dc84d037f6df8a55e1cd30253edf90275 Mon Sep 17 00:00:00 2001 From: James Date: Mon, 2 Dec 2024 15:13:34 +0700 Subject: [PATCH] feat: update engine interface --- engine/controllers/engines.cc | 5 +- engine/cortex-common/EngineI.h | 15 +++ engine/services/engine_service.cc | 193 ++++++++---------------------- engine/services/engine_service.h | 2 + engine/utils/config_yaml_utils.h | 11 +- engine/utils/file_manager_utils.h | 1 + 6 files changed, 81 insertions(+), 146 deletions(-) diff --git a/engine/controllers/engines.cc b/engine/controllers/engines.cc index 9e110bd66..8a5e5010b 100644 --- a/engine/controllers/engines.cc +++ b/engine/controllers/engines.cc @@ -23,10 +23,9 @@ std::string NormalizeEngine(const std::string& engine) { void Engines::ListEngine( const HttpRequestPtr& req, std::function&& callback) const { - std::vector supported_engines{kLlamaEngine, kOnnxEngine, - kTrtLlmEngine}; Json::Value ret; - for (const auto& engine : supported_engines) { + auto engine_names = engine_service_->GetSupportedEngineNames().value(); + for (const auto& engine : engine_names) { auto installed_engines = engine_service_->GetInstalledEngineVariants(engine); if (installed_engines.has_error()) { diff --git a/engine/cortex-common/EngineI.h b/engine/cortex-common/EngineI.h index 95ce605de..6b362b395 100644 --- a/engine/cortex-common/EngineI.h +++ b/engine/cortex-common/EngineI.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include @@ -7,8 +8,22 @@ #include "trantor/utils/Logger.h" class EngineI { public: + struct EngineLoadOption { + // engine + std::filesystem::path engine_path; + std::filesystem::path cuda_path; + bool custom_engine_path; + + // logging + std::filesystem::path log_path; + int max_log_lines; + trantor::Logger::LogLevel log_level; + }; + virtual ~EngineI() {} + virtual void Load(EngineLoadOption opts) = 0; + // cortex.llamacpp interface virtual void HandleChatCompletion( std::shared_ptr json_body, diff --git a/engine/services/engine_service.cc b/engine/services/engine_service.cc index c52e32ef0..9d33d4d76 100644 --- a/engine/services/engine_service.cc +++ b/engine/services/engine_service.cc @@ -179,6 +179,7 @@ cpp::result EngineService::UninstallEngineVariant( const std::string& engine, const std::optional version, const std::optional variant) { auto ne = NormalizeEngine(engine); + std::lock_guard lock(engines_mutex_); if (IsEngineLoaded(ne)) { CTL_INF("Engine " << ne << " is already loaded, unloading it"); auto unload_res = UnloadEngine(ne); @@ -272,6 +273,7 @@ cpp::result EngineService::DownloadEngine( if (selected_variant == std::nullopt) { return cpp::fail("Failed to find a suitable variant for " + engine); } + std::lock_guard lock(engines_mutex_); if (IsEngineLoaded(engine)) { CTL_INF("Engine " << engine << " is already loaded, unloading it"); auto unload_res = UnloadEngine(engine); @@ -503,6 +505,7 @@ EngineService::SetDefaultEngineVariant(const std::string& engine, " is not installed yet!"); } + std::lock_guard lock(engines_mutex_); if (IsEngineLoaded(ne)) { CTL_INF("Engine " << ne << " is already loaded, unloading it"); auto unload_res = UnloadEngine(ne); @@ -631,7 +634,6 @@ EngineService::GetInstalledEngineVariants(const std::string& engine) const { } bool EngineService::IsEngineLoaded(const std::string& engine) { - std::lock_guard lock(engines_mutex_); auto ne = NormalizeEngine(engine); return engines_.find(ne) != engines_.end(); } @@ -651,6 +653,7 @@ cpp::result EngineService::LoadEngine( const std::string& engine_name) { auto ne = NormalizeEngine(engine_name); + std::lock_guard lock(engines_mutex_); if (IsEngineLoaded(ne)) { CTL_INF("Engine " << ne << " is already loaded"); return {}; @@ -672,6 +675,7 @@ cpp::result EngineService::LoadEngine( auto user_defined_engine_path = getenv("ENGINE_PATH"); #endif + auto custom_engine_path = user_defined_engine_path != nullptr; CTL_DBG("user defined engine path: " << user_defined_engine_path); const std::filesystem::path engine_dir_path = [&] { if (user_defined_engine_path != nullptr) { @@ -685,8 +689,6 @@ cpp::result EngineService::LoadEngine( } }(); - CTL_DBG("Engine path: " << engine_dir_path.string()); - if (!std::filesystem::exists(engine_dir_path)) { CTL_ERR("Directory " + engine_dir_path.string() + " is not exist!"); return cpp::fail("Directory " + engine_dir_path.string() + @@ -696,164 +698,65 @@ cpp::result EngineService::LoadEngine( CTL_INF("Engine path: " << engine_dir_path.string()); try { -#if defined(_WIN32) - // TODO(?) If we only allow to load an engine at a time, the logic is simpler. - // We would like to support running multiple engines at the same time. Therefore, - // the adding/removing dll directory logic is quite complicated: - // 1. If llamacpp is loaded and new requested engine is tensorrt-llm: - // Unload the llamacpp dll directory then load the tensorrt-llm - // 2. If tensorrt-llm is loaded and new requested engine is llamacpp: - // Do nothing, llamacpp can re-use tensorrt-llm dependencies (need to be tested careful) - // 3. Add dll directory if met other conditions - - auto add_dll = [this](const std::string& e_type, - const std::filesystem::path& p) { - if (auto cookie = AddDllDirectory(p.c_str()); cookie != 0) { - CTL_DBG("Added dll directory: " << p.string()); - engines_[e_type].cookie = cookie; - } else { - CTL_WRN("Could not add dll directory: " << p.string()); - } - - auto cuda_path = file_manager_utils::GetCudaToolkitPath(e_type); - if (auto cuda_cookie = AddDllDirectory(cuda_path.c_str()); - cuda_cookie != 0) { - CTL_DBG("Added cuda dll directory: " << p.string()); - engines_[e_type].cuda_cookie = cuda_cookie; - } else { - CTL_WRN("Could not add cuda dll directory: " << p.string()); - } + auto dylib = + std::make_unique(engine_dir_path.string(), "engine"); + + auto config = file_manager_utils::GetCortexConfig(); + + auto log_path = + std::filesystem::path(config.logFolderPath) / + std::filesystem::path( + config.logLlamaCppPath); // for now seems like we use same log path + + // init + auto func = dylib->get_function("get_engine"); + auto engine_obj = func(); + auto load_opts = EngineI::EngineLoadOption{ + .engine_path = engine_dir_path, + .cuda_path = file_manager_utils::GetCudaToolkitPath(ne), + .custom_engine_path = custom_engine_path, + .log_path = log_path, + .max_log_lines = config.maxLogLines, + .log_level = logging_utils_helper::global_log_level, }; + engine_obj->Load(load_opts); -#if defined(_WIN32) - if (bool should_use_dll_search_path = !(_wgetenv(L"ENGINE_PATH")); -#else - if (bool should_use_dll_search_path = !(getenv("ENGINE_PATH")); -#endif - should_use_dll_search_path) { - if (IsEngineLoaded(kLlamaRepo) && ne == kTrtLlmRepo && - should_use_dll_search_path) { - - { - std::lock_guard lock(engines_mutex_); - // Remove llamacpp dll directory - if (!RemoveDllDirectory(engines_[kLlamaRepo].cookie)) { - CTL_WRN("Could not remove dll directory: " << kLlamaRepo); - } else { - CTL_DBG("Removed dll directory: " << kLlamaRepo); - } - if (!RemoveDllDirectory(engines_[kLlamaRepo].cuda_cookie)) { - CTL_WRN("Could not remove cuda dll directory: " << kLlamaRepo); - } else { - CTL_DBG("Removed cuda dll directory: " << kLlamaRepo); - } - } - - add_dll(ne, engine_dir_path); - } else if (IsEngineLoaded(kTrtLlmRepo) && ne == kLlamaRepo) { - // Do nothing - } else { - add_dll(ne, engine_dir_path); - } - } -#endif - { - std::lock_guard lock(engines_mutex_); - engines_[ne].dl = std::make_unique( - engine_dir_path.string(), "engine"); - } -#if defined(__linux__) - const char* name = "LD_LIBRARY_PATH"; - auto data = getenv(name); - std::string v; - if (auto g = getenv(name); g) { - v += g; - } - CTL_INF("LD_LIBRARY_PATH: " << v); - auto llamacpp_path = file_manager_utils::GetCudaToolkitPath(kLlamaRepo); - CTL_INF("llamacpp_path: " << llamacpp_path); - // tensorrt is not supported for now - // auto trt_path = file_manager_utils::GetCudaToolkitPath(kTrtLlmRepo); - - auto new_v = llamacpp_path.string() + ":" + v; - setenv(name, new_v.c_str(), true); - CTL_INF("LD_LIBRARY_PATH: " << getenv(name)); -#endif + engines_[ne].engine = engine_obj; + engines_[ne].dl = std::move(dylib); + CTL_DBG("Engine loaded: " + << ne); // TODO: output more information like version and variant + return {}; } catch (const cortex_cpp::dylib::load_error& e) { CTL_ERR("Could not load engine: " << e.what()); - { - std::lock_guard lock(engines_mutex_); - engines_.erase(ne); - } + engines_.erase(ne); return cpp::fail("Could not load engine " + ne + ": " + e.what()); } - - { - std::lock_guard lock(engines_mutex_); - auto func = engines_[ne].dl->get_function("get_engine"); - engines_[ne].engine = func(); - - auto& en = std::get(engines_[ne].engine); - if (ne == kLlamaRepo) { //fix for llamacpp engine first - auto config = file_manager_utils::GetCortexConfig(); - if (en->IsSupported("SetFileLogger")) { - en->SetFileLogger(config.maxLogLines, - (std::filesystem::path(config.logFolderPath) / - std::filesystem::path(config.logLlamaCppPath)) - .string()); - } else { - CTL_WRN("Method SetFileLogger is not supported yet"); - } - if (en->IsSupported("SetLogLevel")) { - en->SetLogLevel(logging_utils_helper::global_log_level); - } else { - CTL_WRN("Method SetLogLevel is not supported yet"); - } - } - CTL_DBG("loaded engine: " << ne); - } - return {}; } cpp::result EngineService::UnloadEngine( const std::string& engine) { auto ne = NormalizeEngine(engine); - { - std::lock_guard lock(engines_mutex_); - if (!IsEngineLoaded(ne)) { - return cpp::fail("Engine " + ne + " is not loaded yet!"); - } - EngineI* e = std::get(engines_[ne].engine); - delete e; + LOG_INFO << "Unloading engine " << ne; -#if defined(_WIN32) - if (!RemoveDllDirectory(engines_[ne].cookie)) { - CTL_WRN("Could not remove dll directory: " << ne); - } else { - CTL_DBG("Removed dll directory: " << ne); - } - if (!RemoveDllDirectory(engines_[ne].cuda_cookie)) { - CTL_WRN("Could not remove cuda dll directory: " << ne); - } else { - CTL_DBG("Removed cuda dll directory: " << ne); - } -#endif - engines_.erase(ne); + std::lock_guard lock(engines_mutex_); + if (!IsEngineLoaded(ne)) { + return cpp::fail("Engine " + ne + " is not loaded yet!"); } - CTL_DBG("Unloaded engine " + ne); + EngineI* e = std::get(engines_[ne].engine); + delete e; + engines_.erase(ne); + CTL_DBG("Engine unloaded: " + ne); return {}; } std::vector EngineService::GetLoadedEngines() { - { - std::lock_guard lock(engines_mutex_); - std::vector loaded_engines; - for (const auto& [key, value] : engines_) { - loaded_engines.push_back(value.engine); - } - return loaded_engines; + std::lock_guard lock(engines_mutex_); + std::vector loaded_engines; + for (const auto& [key, value] : engines_) { + loaded_engines.push_back(value.engine); } + return loaded_engines; } cpp::result @@ -899,6 +802,7 @@ cpp::result EngineService::UpdateEngine( CTL_INF("Default variant: " << default_variant->variant << ", version: " + default_variant->version); + std::lock_guard lock(engines_mutex_); if (IsEngineLoaded(ne)) { CTL_INF("Engine " << ne << " is already loaded, unloading it"); auto unload_res = UnloadEngine(ne); @@ -955,3 +859,8 @@ cpp::result EngineService::UpdateEngine( .from = default_variant->version, .to = latest_version->tag_name}; } + +cpp::result, std::string> +EngineService::GetSupportedEngineNames() { + return file_manager_utils::GetCortexConfig().supportedEngines; +} diff --git a/engine/services/engine_service.h b/engine/services/engine_service.h index 47d7c272f..61af7aec6 100644 --- a/engine/services/engine_service.h +++ b/engine/services/engine_service.h @@ -123,6 +123,8 @@ class EngineService : public EngineServiceI { cpp::result UpdateEngine( const std::string& engine); + cpp::result, std::string> GetSupportedEngineNames(); + private: cpp::result DownloadEngine( const std::string& engine, const std::string& version = "latest", diff --git a/engine/utils/config_yaml_utils.h b/engine/utils/config_yaml_utils.h index 73c990996..7ba72a157 100644 --- a/engine/utils/config_yaml_utils.h +++ b/engine/utils/config_yaml_utils.h @@ -5,6 +5,7 @@ #include #include #include +#include "utils/engine_constants.h" #include "utils/logging_utils.h" #include "utils/result.hpp" #include "yaml-cpp/yaml.h" @@ -22,6 +23,8 @@ constexpr const auto kDefaultCorsEnabled = true; const std::vector kDefaultEnabledOrigins{ "http://localhost:39281", "http://127.0.0.1:39281", "http://0.0.0.0:39281"}; constexpr const auto kDefaultNoProxy = "example.com,::1,localhost,127.0.0.1"; +const std::vector kDefaultSupportedEngines{ + kLlamaEngine, kOnnxEngine, kTrtLlmEngine}; struct CortexConfig { std::string logFolderPath; @@ -59,6 +62,7 @@ struct CortexConfig { bool verifyPeerSsl; bool verifyHostSsl; + std::vector supportedEngines; }; class CortexConfigMgr { @@ -117,6 +121,7 @@ class CortexConfigMgr { node["noProxy"] = config.noProxy; node["verifyPeerSsl"] = config.verifyPeerSsl; node["verifyHostSsl"] = config.verifyHostSsl; + node["supportedEngines"] = config.supportedEngines; out_file << node; out_file.close(); @@ -151,7 +156,7 @@ class CortexConfigMgr { !node["proxyUsername"] || !node["proxyPassword"] || !node["verifyPeerSsl"] || !node["verifyHostSsl"] || !node["verifyProxySsl"] || !node["verifyProxyHostSsl"] || - !node["noProxy"]); + !node["noProxy"] || !node["supportedEngines"]); CortexConfig config = { .logFolderPath = node["logFolderPath"] @@ -235,6 +240,10 @@ class CortexConfigMgr { .verifyHostSsl = node["verifyHostSsl"] ? node["verifyHostSsl"].as() : default_cfg.verifyHostSsl, + .supportedEngines = + node["supportedEngines"] + ? node["supportedEngines"].as>() + : default_cfg.supportedEngines, }; if (should_update_config) { l.unlock(); diff --git a/engine/utils/file_manager_utils.h b/engine/utils/file_manager_utils.h index 72310385c..0b3c1ca6a 100644 --- a/engine/utils/file_manager_utils.h +++ b/engine/utils/file_manager_utils.h @@ -202,6 +202,7 @@ inline config_yaml_utils::CortexConfig GetDefaultConfig() { .noProxy = config_yaml_utils::kDefaultNoProxy, .verifyPeerSsl = true, .verifyHostSsl = true, + .supportedEngines = config_yaml_utils::kDefaultSupportedEngines, }; }