diff --git a/docs/docs/engines/engine-extension.mdx b/docs/docs/engines/engine-extension.mdx index 8a62cd813..6bb966f60 100644 --- a/docs/docs/engines/engine-extension.mdx +++ b/docs/docs/engines/engine-extension.mdx @@ -1,89 +1,210 @@ --- -title: Building Engine Extensions +title: Adding a Third-Party Engine to Cortex description: Cortex supports Engine Extensions to integrate both :ocal inference engines, and Remote APIs. --- -:::info -🚧 Cortex is currently under development, and this page is a stub for future development. -::: - - +We welcome suggestions and contributions to improve this integration process. Please feel free to submit issues or pull requests through our repository. diff --git a/engine/cli/commands/server_start_cmd.cc b/engine/cli/commands/server_start_cmd.cc index ba4f7bd82..3d52f3d25 100644 --- a/engine/cli/commands/server_start_cmd.cc +++ b/engine/cli/commands/server_start_cmd.cc @@ -1,9 +1,12 @@ #include "server_start_cmd.h" #include "commands/cortex_upd_cmd.h" +#include "services/engine_service.h" #include "utils/cortex_utils.h" -#include "utils/engine_constants.h" #include "utils/file_manager_utils.h" + +#if defined(_WIN32) || defined(_WIN64) #include "utils/widechar_conv.h" +#endif namespace commands { @@ -108,22 +111,9 @@ bool ServerStartCmd::Exec(const std::string& host, int port, std::cerr << "Could not start server: " << std::endl; return false; } else if (pid == 0) { - // No need to configure LD_LIBRARY_PATH for macOS -#if !defined(__APPLE__) || !defined(__MACH__) - const char* name = "LD_LIBRARY_PATH"; - auto data = getenv(name); - std::string v; - if (auto g = getenv(name); g) { - v += g; - } - CTL_INF("LD_LIBRARY_PATH: " << v); - auto llamacpp_path = file_manager_utils::GetCudaToolkitPath(kLlamaRepo); - auto trt_path = file_manager_utils::GetCudaToolkitPath(kTrtLlmRepo); + // Some engines requires to add lib search path before process being created + EngineService().RegisterEngineLibPath(); - auto new_v = trt_path.string() + ":" + llamacpp_path.string() + ":" + v; - setenv(name, new_v.c_str(), true); - CTL_INF("LD_LIBRARY_PATH: " << getenv(name)); -#endif std::string p = cortex_utils::GetCurrentPath() + "/" + exe; execl(p.c_str(), exe.c_str(), "--start-server", "--config_file_path", get_config_file_path().c_str(), "--data_folder_path", diff --git a/engine/controllers/engines.cc b/engine/controllers/engines.cc index 3d3c0c037..1d0223d9a 100644 --- a/engine/controllers/engines.cc +++ b/engine/controllers/engines.cc @@ -23,10 +23,9 @@ std::string NormalizeEngine(const std::string& engine) { void Engines::ListEngine( const HttpRequestPtr& req, std::function&& callback) const { - std::vector supported_engines{kLlamaEngine, kOnnxEngine, - kTrtLlmEngine}; Json::Value ret; - for (const auto& engine : supported_engines) { + auto engine_names = engine_service_->GetSupportedEngineNames().value(); + for (const auto& engine : engine_names) { auto installed_engines = engine_service_->GetInstalledEngineVariants(engine); if (installed_engines.has_error()) { diff --git a/engine/cortex-common/EngineI.h b/engine/cortex-common/EngineI.h index 51e19c124..11866a708 100644 --- a/engine/cortex-common/EngineI.h +++ b/engine/cortex-common/EngineI.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include @@ -7,8 +8,37 @@ #include "trantor/utils/Logger.h" class EngineI { public: + struct RegisterLibraryOption { + std::vector paths; + }; + + struct EngineLoadOption { + // engine + std::filesystem::path engine_path; + std::filesystem::path cuda_path; + bool custom_engine_path; + + // logging + std::filesystem::path log_path; + int max_log_lines; + trantor::Logger::LogLevel log_level; + }; + + struct EngineUnloadOption { + bool unload_dll; + }; + virtual ~EngineI() {} + /** + * Being called before starting process to register dependencies search paths. + */ + virtual void RegisterLibraryPath(RegisterLibraryOption opts) = 0; + + virtual void Load(EngineLoadOption opts) = 0; + + virtual void Unload(EngineUnloadOption opts) = 0; + // cortex.llamacpp interface virtual void HandleChatCompletion( std::shared_ptr json_body, diff --git a/engine/services/engine_service.cc b/engine/services/engine_service.cc index fe5317c7d..4f2122f6b 100644 --- a/engine/services/engine_service.cc +++ b/engine/services/engine_service.cc @@ -2,6 +2,7 @@ #include #include #include +#include #include #include "algorithm" #include "database/engines.h" @@ -17,6 +18,7 @@ #include "utils/semantic_version_utils.h" #include "utils/system_info_utils.h" #include "utils/url_parser.h" + namespace { std::string GetSuitableCudaVersion(const std::string& engine, const std::string& cuda_driver_version) { @@ -701,6 +703,87 @@ cpp::result EngineService::LoadEngine( CTL_INF("Loading engine: " << ne); + auto engine_dir_path_res = GetEngineDirPath(ne); + if (engine_dir_path_res.has_error()) { + return cpp::fail(engine_dir_path_res.error()); + } + auto engine_dir_path = engine_dir_path_res.value().first; + auto custom_engine_path = engine_dir_path_res.value().second; + + try { + auto dylib = + std::make_unique(engine_dir_path.string(), "engine"); + + auto config = file_manager_utils::GetCortexConfig(); + + auto log_path = + std::filesystem::path(config.logFolderPath) / + std::filesystem::path( + config.logLlamaCppPath); // for now seems like we use same log path + + // init + auto func = dylib->get_function("get_engine"); + auto engine_obj = func(); + auto load_opts = EngineI::EngineLoadOption{ + .engine_path = engine_dir_path, + .cuda_path = file_manager_utils::GetCudaToolkitPath(ne), + .custom_engine_path = custom_engine_path, + .log_path = log_path, + .max_log_lines = config.maxLogLines, + .log_level = logging_utils_helper::global_log_level, + }; + engine_obj->Load(load_opts); + + engines_[ne].engine = engine_obj; + engines_[ne].dl = std::move(dylib); + + CTL_DBG("Engine loaded: " << ne); + return {}; + } catch (const cortex_cpp::dylib::load_error& e) { + CTL_ERR("Could not load engine: " << e.what()); + engines_.erase(ne); + return cpp::fail("Could not load engine " + ne + ": " + e.what()); + } +} + +void EngineService::RegisterEngineLibPath() { + auto engine_names = GetSupportedEngineNames().value(); + for (const auto& engine : engine_names) { + auto ne = NormalizeEngine(engine); + try { + auto engine_dir_path_res = GetEngineDirPath(engine); + if (engine_dir_path_res.has_error()) { + CTL_ERR( + "Could not get engine dir path: " << engine_dir_path_res.error()); + continue; + } + auto engine_dir_path = engine_dir_path_res.value().first; + auto custom_engine_path = engine_dir_path_res.value().second; + + auto dylib = std::make_unique(engine_dir_path.string(), + "engine"); + + auto cuda_path = file_manager_utils::GetCudaToolkitPath(ne); + // init + auto func = dylib->get_function("get_engine"); + auto engine = func(); + std::vector paths{}; + auto register_opts = EngineI::RegisterLibraryOption{ + .paths = paths, + }; + engine->RegisterLibraryPath(register_opts); + delete engine; + CTL_DBG("Register lib path for: " << engine); + } catch (const std::exception& e) { + CTL_WRN("Failed to registering engine lib path: " << e.what()); + } + } +} + +cpp::result, std::string> +EngineService::GetEngineDirPath(const std::string& engine_name) { + auto ne = NormalizeEngine(engine_name); + auto selected_engine_variant = GetDefaultEngineVariant(ne); if (selected_engine_variant.has_error()) { @@ -715,6 +798,7 @@ cpp::result EngineService::LoadEngine( auto user_defined_engine_path = getenv("ENGINE_PATH"); #endif + auto custom_engine_path = user_defined_engine_path != nullptr; CTL_DBG("user defined engine path: " << user_defined_engine_path); const std::filesystem::path engine_dir_path = [&] { if (user_defined_engine_path != nullptr) { @@ -728,157 +812,38 @@ cpp::result EngineService::LoadEngine( } }(); - CTL_DBG("Engine path: " << engine_dir_path.string()); - if (!std::filesystem::exists(engine_dir_path)) { CTL_ERR("Directory " + engine_dir_path.string() + " is not exist!"); return cpp::fail("Directory " + engine_dir_path.string() + " is not exist!"); } - CTL_INF("Engine path: " << engine_dir_path.string()); - - try { -#if defined(_WIN32) - // TODO(?) If we only allow to load an engine at a time, the logic is simpler. - // We would like to support running multiple engines at the same time. Therefore, - // the adding/removing dll directory logic is quite complicated: - // 1. If llamacpp is loaded and new requested engine is tensorrt-llm: - // Unload the llamacpp dll directory then load the tensorrt-llm - // 2. If tensorrt-llm is loaded and new requested engine is llamacpp: - // Do nothing, llamacpp can re-use tensorrt-llm dependencies (need to be tested careful) - // 3. Add dll directory if met other conditions - - auto add_dll = [this](const std::string& e_type, - const std::filesystem::path& p) { - if (auto cookie = AddDllDirectory(p.c_str()); cookie != 0) { - CTL_DBG("Added dll directory: " << p.string()); - engines_[e_type].cookie = cookie; - } else { - CTL_WRN("Could not add dll directory: " << p.string()); - } - - auto cuda_path = file_manager_utils::GetCudaToolkitPath(e_type); - if (auto cuda_cookie = AddDllDirectory(cuda_path.c_str()); - cuda_cookie != 0) { - CTL_DBG("Added cuda dll directory: " << p.string()); - engines_[e_type].cuda_cookie = cuda_cookie; - } else { - CTL_WRN("Could not add cuda dll directory: " << p.string()); - } - }; - -#if defined(_WIN32) - if (bool should_use_dll_search_path = !(_wgetenv(L"ENGINE_PATH")); -#else - if (bool should_use_dll_search_path = !(getenv("ENGINE_PATH")); -#endif - should_use_dll_search_path) { - if (IsEngineLoaded(kLlamaRepo) && ne == kTrtLlmRepo && - should_use_dll_search_path) { - - { - - // Remove llamacpp dll directory - if (!RemoveDllDirectory(engines_[kLlamaRepo].cookie)) { - CTL_WRN("Could not remove dll directory: " << kLlamaRepo); - } else { - CTL_DBG("Removed dll directory: " << kLlamaRepo); - } - if (!RemoveDllDirectory(engines_[kLlamaRepo].cuda_cookie)) { - CTL_WRN("Could not remove cuda dll directory: " << kLlamaRepo); - } else { - CTL_DBG("Removed cuda dll directory: " << kLlamaRepo); - } - } - - add_dll(ne, engine_dir_path); - } else if (IsEngineLoaded(kTrtLlmRepo) && ne == kLlamaRepo) { - // Do nothing - } else { - add_dll(ne, engine_dir_path); - } - } -#endif - engines_[ne].dl = - std::make_unique(engine_dir_path.string(), "engine"); -#if defined(__linux__) - const char* name = "LD_LIBRARY_PATH"; - auto data = getenv(name); - std::string v; - if (auto g = getenv(name); g) { - v += g; - } - CTL_INF("LD_LIBRARY_PATH: " << v); - auto llamacpp_path = file_manager_utils::GetCudaToolkitPath(kLlamaRepo); - CTL_INF("llamacpp_path: " << llamacpp_path); - // tensorrt is not supported for now - // auto trt_path = file_manager_utils::GetCudaToolkitPath(kTrtLlmRepo); - - auto new_v = llamacpp_path.string() + ":" + v; - setenv(name, new_v.c_str(), true); - CTL_INF("LD_LIBRARY_PATH: " << getenv(name)); -#endif - - } catch (const cortex_cpp::dylib::load_error& e) { - CTL_ERR("Could not load engine: " << e.what()); - engines_.erase(ne); - return cpp::fail("Could not load engine " + ne + ": " + e.what()); - } - - auto func = engines_[ne].dl->get_function("get_engine"); - engines_[ne].engine = func(); - - auto& en = std::get(engines_[ne].engine); - if (ne == kLlamaRepo) { //fix for llamacpp engine first - auto config = file_manager_utils::GetCortexConfig(); - if (en->IsSupported("SetFileLogger")) { - en->SetFileLogger(config.maxLogLines, - (std::filesystem::path(config.logFolderPath) / - std::filesystem::path(config.logLlamaCppPath)) - .string()); - } else { - CTL_WRN("Method SetFileLogger is not supported yet"); - } - if (en->IsSupported("SetLogLevel")) { - en->SetLogLevel(logging_utils_helper::global_log_level); - } else { - CTL_WRN("Method SetLogLevel is not supported yet"); - } - } - CTL_DBG("loaded engine: " << ne); - return {}; + CTL_INF("Engine path: " << engine_dir_path.string() + << ", custom_engine_path: " << custom_engine_path); + return std::make_pair(engine_dir_path, custom_engine_path); } cpp::result EngineService::UnloadEngine( const std::string& engine) { auto ne = NormalizeEngine(engine); std::lock_guard lock(engines_mutex_); - { - if (!IsEngineLoaded(ne)) { - return cpp::fail("Engine " + ne + " is not loaded yet!"); - } - if (std::holds_alternative(engines_[ne].engine)) { - delete std::get(engines_[ne].engine); - } else { - delete std::get(engines_[ne].engine); - } - -#if defined(_WIN32) - if (!RemoveDllDirectory(engines_[ne].cookie)) { - CTL_WRN("Could not remove dll directory: " << ne); - } else { - CTL_DBG("Removed dll directory: " << ne); - } - if (!RemoveDllDirectory(engines_[ne].cuda_cookie)) { - CTL_WRN("Could not remove cuda dll directory: " << ne); - } else { - CTL_DBG("Removed cuda dll directory: " << ne); - } -#endif + if (!IsEngineLoaded(ne)) { + return cpp::fail("Engine " + ne + " is not loaded yet!"); + } + if (std::holds_alternative(engines_[ne].engine)) { + LOG_INFO << "Unloading engine " << ne; + auto* e = std::get(engines_[ne].engine); + auto unload_opts = EngineI::EngineUnloadOption{ + .unload_dll = true, + }; + e->Unload(unload_opts); + delete e; engines_.erase(ne); + } else { + delete std::get(engines_[ne].engine); } - CTL_DBG("Unloaded engine " + ne); + + CTL_DBG("Engine unloaded: " + ne); return {}; } @@ -1097,4 +1062,9 @@ cpp::result EngineService::GetRemoteModels( } else { return res; } -} \ No newline at end of file +} + +cpp::result, std::string> +EngineService::GetSupportedEngineNames() { + return file_manager_utils::GetCortexConfig().supportedEngines; +} diff --git a/engine/services/engine_service.h b/engine/services/engine_service.h index ab274825d..8299655f2 100644 --- a/engine/services/engine_service.h +++ b/engine/services/engine_service.h @@ -13,7 +13,6 @@ #include "cortex-common/cortexpythoni.h" #include "cortex-common/remote_enginei.h" #include "database/engines.h" -#include "extensions/remote-engine/remote_engine.h" #include "services/download_service.h" #include "utils/cpuid/cpu_info.h" #include "utils/dylib.h" @@ -75,6 +74,9 @@ class EngineService : public EngineServiceI { .cuda_driver_version = system_info_utils::GetDriverAndCudaVersion().second} {} + // just for initialize supported engines + EngineService() {}; + std::vector GetEngineInfoList() const; /** @@ -148,6 +150,9 @@ class EngineService : public EngineServiceI { cpp::result GetRemoteModels( const std::string& engine_name); + cpp::result, std::string> GetSupportedEngineNames(); + + void RegisterEngineLibPath(); private: bool IsEngineLoaded(const std::string& engine); @@ -162,7 +167,10 @@ class EngineService : public EngineServiceI { std::string GetMatchedVariant(const std::string& engine, const std::vector& variants); + cpp::result, std::string> + GetEngineDirPath(const std::string& engine_name); + cpp::result IsEngineVariantReady( const std::string& engine, const std::string& version, const std::string& variant); -}; \ No newline at end of file +}; diff --git a/engine/services/hardware_service.cc b/engine/services/hardware_service.cc index 25be78873..97ddacb97 100644 --- a/engine/services/hardware_service.cc +++ b/engine/services/hardware_service.cc @@ -5,11 +5,11 @@ #if defined(_WIN32) || defined(_WIN64) #include #include +#include "utils/widechar_conv.h" #endif #include "cli/commands/cortex_upd_cmd.h" #include "database/hardware.h" #include "utils/cortex_utils.h" -#include "utils/widechar_conv.h" namespace services { diff --git a/engine/utils/config_yaml_utils.cc b/engine/utils/config_yaml_utils.cc index ed6437256..c7a696df4 100644 --- a/engine/utils/config_yaml_utils.cc +++ b/engine/utils/config_yaml_utils.cc @@ -49,6 +49,7 @@ cpp::result CortexConfigMgr::DumpYamlConfig( node["verifyHostSsl"] = config.verifyHostSsl; node["sslCertPath"] = config.sslCertPath; node["sslKeyPath"] = config.sslKeyPath; + node["supportedEngines"] = config.supportedEngines; out_file << node; out_file.close(); diff --git a/engine/utils/config_yaml_utils.h b/engine/utils/config_yaml_utils.h index d36cc48e0..f9925ea86 100644 --- a/engine/utils/config_yaml_utils.h +++ b/engine/utils/config_yaml_utils.h @@ -3,6 +3,7 @@ #include #include #include +#include "utils/engine_constants.h" #include "utils/result.hpp" namespace config_yaml_utils { @@ -18,6 +19,8 @@ constexpr const auto kDefaultCorsEnabled = true; const std::vector kDefaultEnabledOrigins{ "http://localhost:39281", "http://127.0.0.1:39281", "http://0.0.0.0:39281"}; constexpr const auto kDefaultNoProxy = "example.com,::1,localhost,127.0.0.1"; +const std::vector kDefaultSupportedEngines{ + kLlamaEngine, kOnnxEngine, kTrtLlmEngine}; struct CortexConfig { std::string logFolderPath; @@ -57,6 +60,7 @@ struct CortexConfig { bool verifyHostSsl; std::string sslCertPath; std::string sslKeyPath; + std::vector supportedEngines; }; class CortexConfigMgr { @@ -80,5 +84,4 @@ class CortexConfigMgr { CortexConfig FromYaml(const std::string& path, const CortexConfig& default_cfg); }; - } // namespace config_yaml_utils diff --git a/engine/utils/file_manager_utils.cc b/engine/utils/file_manager_utils.cc index ca3d0c07b..338abadac 100644 --- a/engine/utils/file_manager_utils.cc +++ b/engine/utils/file_manager_utils.cc @@ -187,6 +187,7 @@ config_yaml_utils::CortexConfig GetDefaultConfig() { .verifyHostSsl = true, .sslCertPath = "", .sslKeyPath = "", + .supportedEngines = config_yaml_utils::kDefaultSupportedEngines, }; }