diff --git a/docs/docs/engines/engine-extension.mdx b/docs/docs/engines/engine-extension.mdx index 84000767b..6bb966f60 100644 --- a/docs/docs/engines/engine-extension.mdx +++ b/docs/docs/engines/engine-extension.mdx @@ -22,12 +22,32 @@ First, create an engine that implements the `EngineI.h` interface. Here's the in ```cpp class EngineI { public: - struct EngineLoadOption{}; - struct EngineUnloadOption{}; + struct RegisterLibraryOption { + std::vector paths; + }; + + struct EngineLoadOption { + // engine + std::filesystem::path engine_path; + std::filesystem::path cuda_path; + bool custom_engine_path; + + // logging + std::filesystem::path log_path; + int max_log_lines; + trantor::Logger::LogLevel log_level; + }; + + struct EngineUnloadOption { + bool unload_dll; + }; virtual ~EngineI() {} + virtual void RegisterLibraryPath(RegisterLibraryOption opts) = 0; + virtual void Load(EngineLoadOption opts) = 0; + virtual void Unload(EngineUnloadOption opts) = 0; // Cortex.llamacpp interface methods @@ -65,7 +85,71 @@ class EngineI { }; ``` -Note that Cortex will call `Load` before loading any models and `Unload` when stopping the engine. +#### Lifecycle Management + +##### RegisterLibraryPath + +```cpp +virtual void RegisterLibraryPath(RegisterLibraryOption opts) = 0; +``` + +This method is called during engine initialization to set up dynamic library search paths. For example, in Linux, we still have to use `LD_LIBRARY_PATH` to add CUDA dependencies to the search path. + +**Parameters:** + +- `opts.paths`: Vector of filesystem paths that the engine should register + +**Implementation Requirements:** + +- Register provided paths for dynamic library loading +- Handle invalid paths gracefully +- Thread-safe implementation +- No exceptions should escape the method + +##### Load + +```cpp +virtual void Load(EngineLoadOption opts) = 0; +``` + +Initializes the engine with the provided configuration options. + +**Parameters:** + +- `engine_path`: Base path for engine files +- `cuda_path`: Path to CUDA installation +- `custom_engine_path`: Flag for using custom engine location +- `log_path`: Location for log files +- `max_log_lines`: Maximum number of lines per log file +- `log_level`: Logging verbosity level + +**Implementation Requirements:** + +- Validate all paths before use +- Initialize engine components +- Set up logging configuration +- Handle missing dependencies gracefully +- Clean initialization state in case of failures + +##### Unload + +```cpp +virtual void Unload(EngineUnloadOption opts) = 0; +``` + +Performs cleanup and shutdown of the engine. + +**Parameters:** + +- `unload_dll`: Boolean flag indicating whether to unload dynamic libraries + +**Implementation Requirements:** + +- Clean up all allocated resources +- Close file handles and connections +- Release memory +- Ensure proper shutdown of running models +- Handle cleanup in a thread-safe manner ### 2. Create a Dynamic Library @@ -98,7 +182,7 @@ To test your engine locally: 1. Create a directory structure following this hierarchy: -``` +```bash engines/ └── cortex.llamacpp/ └── mac-arm64/ @@ -107,12 +191,12 @@ engines/ └── version.txt ``` -2. Configure your engine: +1. Configure your engine: - Edit the `~/.cortexrc` file to register your engine name - Add your model with the appropriate engine field in `model.yaml` -3. Testing: +2. Testing: - Start the engine - Load your model - Verify functionality diff --git a/engine/cli/commands/server_start_cmd.cc b/engine/cli/commands/server_start_cmd.cc index ba4f7bd82..3d52f3d25 100644 --- a/engine/cli/commands/server_start_cmd.cc +++ b/engine/cli/commands/server_start_cmd.cc @@ -1,9 +1,12 @@ #include "server_start_cmd.h" #include "commands/cortex_upd_cmd.h" +#include "services/engine_service.h" #include "utils/cortex_utils.h" -#include "utils/engine_constants.h" #include "utils/file_manager_utils.h" + +#if defined(_WIN32) || defined(_WIN64) #include "utils/widechar_conv.h" +#endif namespace commands { @@ -108,22 +111,9 @@ bool ServerStartCmd::Exec(const std::string& host, int port, std::cerr << "Could not start server: " << std::endl; return false; } else if (pid == 0) { - // No need to configure LD_LIBRARY_PATH for macOS -#if !defined(__APPLE__) || !defined(__MACH__) - const char* name = "LD_LIBRARY_PATH"; - auto data = getenv(name); - std::string v; - if (auto g = getenv(name); g) { - v += g; - } - CTL_INF("LD_LIBRARY_PATH: " << v); - auto llamacpp_path = file_manager_utils::GetCudaToolkitPath(kLlamaRepo); - auto trt_path = file_manager_utils::GetCudaToolkitPath(kTrtLlmRepo); + // Some engines requires to add lib search path before process being created + EngineService().RegisterEngineLibPath(); - auto new_v = trt_path.string() + ":" + llamacpp_path.string() + ":" + v; - setenv(name, new_v.c_str(), true); - CTL_INF("LD_LIBRARY_PATH: " << getenv(name)); -#endif std::string p = cortex_utils::GetCurrentPath() + "/" + exe; execl(p.c_str(), exe.c_str(), "--start-server", "--config_file_path", get_config_file_path().c_str(), "--data_folder_path", diff --git a/engine/controllers/engines.cc b/engine/controllers/engines.cc index 9e110bd66..8a5e5010b 100644 --- a/engine/controllers/engines.cc +++ b/engine/controllers/engines.cc @@ -23,10 +23,9 @@ std::string NormalizeEngine(const std::string& engine) { void Engines::ListEngine( const HttpRequestPtr& req, std::function&& callback) const { - std::vector supported_engines{kLlamaEngine, kOnnxEngine, - kTrtLlmEngine}; Json::Value ret; - for (const auto& engine : supported_engines) { + auto engine_names = engine_service_->GetSupportedEngineNames().value(); + for (const auto& engine : engine_names) { auto installed_engines = engine_service_->GetInstalledEngineVariants(engine); if (installed_engines.has_error()) { diff --git a/engine/cortex-common/EngineI.h b/engine/cortex-common/EngineI.h index 95ce605de..da2f5a5ab 100644 --- a/engine/cortex-common/EngineI.h +++ b/engine/cortex-common/EngineI.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include @@ -7,8 +8,37 @@ #include "trantor/utils/Logger.h" class EngineI { public: + struct RegisterLibraryOption { + std::vector paths; + }; + + struct EngineLoadOption { + // engine + std::filesystem::path engine_path; + std::filesystem::path cuda_path; + bool custom_engine_path; + + // logging + std::filesystem::path log_path; + int max_log_lines; + trantor::Logger::LogLevel log_level; + }; + + struct EngineUnloadOption { + bool unload_dll; + }; + virtual ~EngineI() {} + /** + * Being called before starting process to register dependencies search paths. + */ + virtual void RegisterLibraryPath(RegisterLibraryOption opts) = 0; + + virtual void Load(EngineLoadOption opts) = 0; + + virtual void Unload(EngineUnloadOption opts) = 0; + // cortex.llamacpp interface virtual void HandleChatCompletion( std::shared_ptr json_body, diff --git a/engine/services/engine_service.cc b/engine/services/engine_service.cc index c52e32ef0..0df4a8ccb 100644 --- a/engine/services/engine_service.cc +++ b/engine/services/engine_service.cc @@ -2,6 +2,7 @@ #include #include #include +#include #include "algorithm" #include "utils/archive_utils.h" #include "utils/engine_constants.h" @@ -179,6 +180,7 @@ cpp::result EngineService::UninstallEngineVariant( const std::string& engine, const std::optional version, const std::optional variant) { auto ne = NormalizeEngine(engine); + std::lock_guard lock(engines_mutex_); if (IsEngineLoaded(ne)) { CTL_INF("Engine " << ne << " is already loaded, unloading it"); auto unload_res = UnloadEngine(ne); @@ -272,6 +274,7 @@ cpp::result EngineService::DownloadEngine( if (selected_variant == std::nullopt) { return cpp::fail("Failed to find a suitable variant for " + engine); } + std::lock_guard lock(engines_mutex_); if (IsEngineLoaded(engine)) { CTL_INF("Engine " << engine << " is already loaded, unloading it"); auto unload_res = UnloadEngine(engine); @@ -503,6 +506,7 @@ EngineService::SetDefaultEngineVariant(const std::string& engine, " is not installed yet!"); } + std::lock_guard lock(engines_mutex_); if (IsEngineLoaded(ne)) { CTL_INF("Engine " << ne << " is already loaded, unloading it"); auto unload_res = UnloadEngine(ne); @@ -631,7 +635,6 @@ EngineService::GetInstalledEngineVariants(const std::string& engine) const { } bool EngineService::IsEngineLoaded(const std::string& engine) { - std::lock_guard lock(engines_mutex_); auto ne = NormalizeEngine(engine); return engines_.find(ne) != engines_.end(); } @@ -647,16 +650,43 @@ cpp::result EngineService::GetLoadedEngine( return engines_[ne].engine; } -cpp::result EngineService::LoadEngine( - const std::string& engine_name) { - auto ne = NormalizeEngine(engine_name); - - if (IsEngineLoaded(ne)) { - CTL_INF("Engine " << ne << " is already loaded"); - return {}; +void EngineService::RegisterEngineLibPath() { + auto engine_names = GetSupportedEngineNames().value(); + for (const auto& engine : engine_names) { + auto ne = NormalizeEngine(engine); + try { + auto engine_dir_path_res = GetEngineDirPath(engine); + if (engine_dir_path_res.has_error()) { + CTL_ERR( + "Could not get engine dir path: " << engine_dir_path_res.error()); + continue; + } + auto engine_dir_path = engine_dir_path_res.value().first; + auto custom_engine_path = engine_dir_path_res.value().second; + + auto dylib = std::make_unique(engine_dir_path.string(), + "engine"); + + auto cuda_path = file_manager_utils::GetCudaToolkitPath(ne); + // init + auto func = dylib->get_function("get_engine"); + auto engine = func(); + std::vector paths{}; + auto register_opts = EngineI::RegisterLibraryOption{ + .paths = paths, + }; + engine->RegisterLibraryPath(register_opts); + delete engine; + CTL_DBG("Register lib path for: " << engine); + } catch (const std::exception& e) { + CTL_WRN("Failed to registering engine lib path: " << e.what()); + } } +} - CTL_INF("Loading engine: " << ne); +cpp::result, std::string> +EngineService::GetEngineDirPath(const std::string& engine_name) { + auto ne = NormalizeEngine(engine_name); auto selected_engine_variant = GetDefaultEngineVariant(ne); @@ -672,6 +702,7 @@ cpp::result EngineService::LoadEngine( auto user_defined_engine_path = getenv("ENGINE_PATH"); #endif + auto custom_engine_path = user_defined_engine_path != nullptr; CTL_DBG("user defined engine path: " << user_defined_engine_path); const std::filesystem::path engine_dir_path = [&] { if (user_defined_engine_path != nullptr) { @@ -685,175 +716,99 @@ cpp::result EngineService::LoadEngine( } }(); - CTL_DBG("Engine path: " << engine_dir_path.string()); - if (!std::filesystem::exists(engine_dir_path)) { CTL_ERR("Directory " + engine_dir_path.string() + " is not exist!"); return cpp::fail("Directory " + engine_dir_path.string() + " is not exist!"); } - CTL_INF("Engine path: " << engine_dir_path.string()); + CTL_INF("Engine path: " << engine_dir_path.string() + << ", custom_engine_path: " << custom_engine_path); + return std::make_pair(engine_dir_path, custom_engine_path); +} - try { -#if defined(_WIN32) - // TODO(?) If we only allow to load an engine at a time, the logic is simpler. - // We would like to support running multiple engines at the same time. Therefore, - // the adding/removing dll directory logic is quite complicated: - // 1. If llamacpp is loaded and new requested engine is tensorrt-llm: - // Unload the llamacpp dll directory then load the tensorrt-llm - // 2. If tensorrt-llm is loaded and new requested engine is llamacpp: - // Do nothing, llamacpp can re-use tensorrt-llm dependencies (need to be tested careful) - // 3. Add dll directory if met other conditions - - auto add_dll = [this](const std::string& e_type, - const std::filesystem::path& p) { - if (auto cookie = AddDllDirectory(p.c_str()); cookie != 0) { - CTL_DBG("Added dll directory: " << p.string()); - engines_[e_type].cookie = cookie; - } else { - CTL_WRN("Could not add dll directory: " << p.string()); - } +cpp::result EngineService::LoadEngine( + const std::string& engine_name) { + auto ne = NormalizeEngine(engine_name); - auto cuda_path = file_manager_utils::GetCudaToolkitPath(e_type); - if (auto cuda_cookie = AddDllDirectory(cuda_path.c_str()); - cuda_cookie != 0) { - CTL_DBG("Added cuda dll directory: " << p.string()); - engines_[e_type].cuda_cookie = cuda_cookie; - } else { - CTL_WRN("Could not add cuda dll directory: " << p.string()); - } - }; + std::lock_guard lock(engines_mutex_); + if (IsEngineLoaded(ne)) { + CTL_INF("Engine " << ne << " is already loaded"); + return {}; + } -#if defined(_WIN32) - if (bool should_use_dll_search_path = !(_wgetenv(L"ENGINE_PATH")); -#else - if (bool should_use_dll_search_path = !(getenv("ENGINE_PATH")); -#endif - should_use_dll_search_path) { - if (IsEngineLoaded(kLlamaRepo) && ne == kTrtLlmRepo && - should_use_dll_search_path) { - - { - std::lock_guard lock(engines_mutex_); - // Remove llamacpp dll directory - if (!RemoveDllDirectory(engines_[kLlamaRepo].cookie)) { - CTL_WRN("Could not remove dll directory: " << kLlamaRepo); - } else { - CTL_DBG("Removed dll directory: " << kLlamaRepo); - } - if (!RemoveDllDirectory(engines_[kLlamaRepo].cuda_cookie)) { - CTL_WRN("Could not remove cuda dll directory: " << kLlamaRepo); - } else { - CTL_DBG("Removed cuda dll directory: " << kLlamaRepo); - } - } + CTL_INF("Loading engine: " << ne); - add_dll(ne, engine_dir_path); - } else if (IsEngineLoaded(kTrtLlmRepo) && ne == kLlamaRepo) { - // Do nothing - } else { - add_dll(ne, engine_dir_path); - } - } -#endif - { - std::lock_guard lock(engines_mutex_); - engines_[ne].dl = std::make_unique( - engine_dir_path.string(), "engine"); - } -#if defined(__linux__) - const char* name = "LD_LIBRARY_PATH"; - auto data = getenv(name); - std::string v; - if (auto g = getenv(name); g) { - v += g; - } - CTL_INF("LD_LIBRARY_PATH: " << v); - auto llamacpp_path = file_manager_utils::GetCudaToolkitPath(kLlamaRepo); - CTL_INF("llamacpp_path: " << llamacpp_path); - // tensorrt is not supported for now - // auto trt_path = file_manager_utils::GetCudaToolkitPath(kTrtLlmRepo); - - auto new_v = llamacpp_path.string() + ":" + v; - setenv(name, new_v.c_str(), true); - CTL_INF("LD_LIBRARY_PATH: " << getenv(name)); -#endif + auto engine_dir_path_res = GetEngineDirPath(ne); + if (engine_dir_path_res.has_error()) { + return cpp::fail(engine_dir_path_res.error()); + } + auto engine_dir_path = engine_dir_path_res.value().first; + auto custom_engine_path = engine_dir_path_res.value().second; + + try { + auto dylib = + std::make_unique(engine_dir_path.string(), "engine"); + + auto config = file_manager_utils::GetCortexConfig(); + + auto log_path = + std::filesystem::path(config.logFolderPath) / + std::filesystem::path( + config.logLlamaCppPath); // for now seems like we use same log path + + // init + auto func = dylib->get_function("get_engine"); + auto engine_obj = func(); + auto load_opts = EngineI::EngineLoadOption{ + .engine_path = engine_dir_path, + .cuda_path = file_manager_utils::GetCudaToolkitPath(ne), + .custom_engine_path = custom_engine_path, + .log_path = log_path, + .max_log_lines = config.maxLogLines, + .log_level = logging_utils_helper::global_log_level, + }; + engine_obj->Load(load_opts); + + engines_[ne].engine = engine_obj; + engines_[ne].dl = std::move(dylib); + CTL_DBG("Engine loaded: " << ne); + return {}; } catch (const cortex_cpp::dylib::load_error& e) { CTL_ERR("Could not load engine: " << e.what()); - { - std::lock_guard lock(engines_mutex_); - engines_.erase(ne); - } + engines_.erase(ne); return cpp::fail("Could not load engine " + ne + ": " + e.what()); } - - { - std::lock_guard lock(engines_mutex_); - auto func = engines_[ne].dl->get_function("get_engine"); - engines_[ne].engine = func(); - - auto& en = std::get(engines_[ne].engine); - if (ne == kLlamaRepo) { //fix for llamacpp engine first - auto config = file_manager_utils::GetCortexConfig(); - if (en->IsSupported("SetFileLogger")) { - en->SetFileLogger(config.maxLogLines, - (std::filesystem::path(config.logFolderPath) / - std::filesystem::path(config.logLlamaCppPath)) - .string()); - } else { - CTL_WRN("Method SetFileLogger is not supported yet"); - } - if (en->IsSupported("SetLogLevel")) { - en->SetLogLevel(logging_utils_helper::global_log_level); - } else { - CTL_WRN("Method SetLogLevel is not supported yet"); - } - } - CTL_DBG("loaded engine: " << ne); - } - return {}; } cpp::result EngineService::UnloadEngine( const std::string& engine) { auto ne = NormalizeEngine(engine); - { - std::lock_guard lock(engines_mutex_); - if (!IsEngineLoaded(ne)) { - return cpp::fail("Engine " + ne + " is not loaded yet!"); - } - EngineI* e = std::get(engines_[ne].engine); - delete e; + LOG_INFO << "Unloading engine " << ne; -#if defined(_WIN32) - if (!RemoveDllDirectory(engines_[ne].cookie)) { - CTL_WRN("Could not remove dll directory: " << ne); - } else { - CTL_DBG("Removed dll directory: " << ne); - } - if (!RemoveDllDirectory(engines_[ne].cuda_cookie)) { - CTL_WRN("Could not remove cuda dll directory: " << ne); - } else { - CTL_DBG("Removed cuda dll directory: " << ne); - } -#endif - engines_.erase(ne); + std::lock_guard lock(engines_mutex_); + if (!IsEngineLoaded(ne)) { + return cpp::fail("Engine " + ne + " is not loaded yet!"); } - CTL_DBG("Unloaded engine " + ne); + auto* e = std::get(engines_[ne].engine); + auto unload_opts = EngineI::EngineUnloadOption{ + .unload_dll = true, + }; + e->Unload(unload_opts); + delete e; + engines_.erase(ne); + CTL_DBG("Engine unloaded: " + ne); return {}; } std::vector EngineService::GetLoadedEngines() { - { - std::lock_guard lock(engines_mutex_); - std::vector loaded_engines; - for (const auto& [key, value] : engines_) { - loaded_engines.push_back(value.engine); - } - return loaded_engines; + std::lock_guard lock(engines_mutex_); + std::vector loaded_engines; + for (const auto& [key, value] : engines_) { + loaded_engines.push_back(value.engine); } + return loaded_engines; } cpp::result @@ -899,6 +854,7 @@ cpp::result EngineService::UpdateEngine( CTL_INF("Default variant: " << default_variant->variant << ", version: " + default_variant->version); + std::lock_guard lock(engines_mutex_); if (IsEngineLoaded(ne)) { CTL_INF("Engine " << ne << " is already loaded, unloading it"); auto unload_res = UnloadEngine(ne); @@ -955,3 +911,8 @@ cpp::result EngineService::UpdateEngine( .from = default_variant->version, .to = latest_version->tag_name}; } + +cpp::result, std::string> +EngineService::GetSupportedEngineNames() { + return file_manager_utils::GetCortexConfig().supportedEngines; +} diff --git a/engine/services/engine_service.h b/engine/services/engine_service.h index 47d7c272f..5437cb496 100644 --- a/engine/services/engine_service.h +++ b/engine/services/engine_service.h @@ -65,6 +65,9 @@ class EngineService : public EngineServiceI { .cuda_driver_version = system_info_utils::GetDriverAndCudaVersion().second} {} + // just for initialize supported engines + EngineService() {}; + std::vector GetEngineInfoList() const; /** @@ -123,6 +126,10 @@ class EngineService : public EngineServiceI { cpp::result UpdateEngine( const std::string& engine); + cpp::result, std::string> GetSupportedEngineNames(); + + void RegisterEngineLibPath(); + private: cpp::result DownloadEngine( const std::string& engine, const std::string& version = "latest", @@ -134,6 +141,9 @@ class EngineService : public EngineServiceI { std::string GetMatchedVariant(const std::string& engine, const std::vector& variants); + cpp::result, std::string> + GetEngineDirPath(const std::string& engine_name); + cpp::result IsEngineVariantReady( const std::string& engine, const std::string& version, const std::string& variant); diff --git a/engine/services/hardware_service.cc b/engine/services/hardware_service.cc index 681ca7578..a5890eab9 100644 --- a/engine/services/hardware_service.cc +++ b/engine/services/hardware_service.cc @@ -5,11 +5,11 @@ #if defined(_WIN32) || defined(_WIN64) #include #include +#include "utils/widechar_conv.h" #endif #include "cli/commands/cortex_upd_cmd.h" #include "database/hardware.h" #include "utils/cortex_utils.h" -#include "utils/widechar_conv.h" namespace services { diff --git a/engine/utils/config_yaml_utils.cc b/engine/utils/config_yaml_utils.cc index 4d6f47ebe..3c5e6b727 100644 --- a/engine/utils/config_yaml_utils.cc +++ b/engine/utils/config_yaml_utils.cc @@ -42,6 +42,7 @@ cpp::result CortexConfigMgr::DumpYamlConfig( node["noProxy"] = config.noProxy; node["verifyPeerSsl"] = config.verifyPeerSsl; node["verifyHostSsl"] = config.verifyHostSsl; + node["supportedEngines"] = config.supportedEngines; out_file << node; out_file.close(); diff --git a/engine/utils/config_yaml_utils.h b/engine/utils/config_yaml_utils.h index aa1b4027e..caaa4dacf 100644 --- a/engine/utils/config_yaml_utils.h +++ b/engine/utils/config_yaml_utils.h @@ -5,6 +5,7 @@ #include #include #include +#include "utils/engine_constants.h" #include "utils/logging_utils.h" #include "utils/result.hpp" #include "yaml-cpp/yaml.h" @@ -22,6 +23,8 @@ constexpr const auto kDefaultCorsEnabled = true; const std::vector kDefaultEnabledOrigins{ "http://localhost:39281", "http://127.0.0.1:39281", "http://0.0.0.0:39281"}; constexpr const auto kDefaultNoProxy = "example.com,::1,localhost,127.0.0.1"; +const std::vector kDefaultSupportedEngines{ + kLlamaEngine, kOnnxEngine, kTrtLlmEngine}; struct CortexConfig { std::string logFolderPath; @@ -59,6 +62,7 @@ struct CortexConfig { bool verifyPeerSsl; bool verifyHostSsl; + std::vector supportedEngines; }; class CortexConfigMgr { @@ -82,5 +86,4 @@ class CortexConfigMgr { CortexConfig FromYaml(const std::string& path, const CortexConfig& default_cfg); }; - } // namespace config_yaml_utils diff --git a/engine/utils/file_manager_utils.cc b/engine/utils/file_manager_utils.cc index 11128a275..4f2a68804 100644 --- a/engine/utils/file_manager_utils.cc +++ b/engine/utils/file_manager_utils.cc @@ -185,6 +185,7 @@ config_yaml_utils::CortexConfig GetDefaultConfig() { .noProxy = config_yaml_utils::kDefaultNoProxy, .verifyPeerSsl = true, .verifyHostSsl = true, + .supportedEngines = config_yaml_utils::kDefaultSupportedEngines, }; }