feat: update engine interface

janhq · Dec 2, 2024 · 4f97bd4 · 4f97bd4
1 parent 1641500
commit 4f97bd4
Show file tree

Hide file tree

Showing 6 changed files with 83 additions and 144 deletions.
diff --git a/engine/controllers/engines.cc b/engine/controllers/engines.cc
@@ -23,10 +23,9 @@ std::string NormalizeEngine(const std::string& engine) {
 void Engines::ListEngine(
     const HttpRequestPtr& req,
     std::function<void(const HttpResponsePtr&)>&& callback) const {
-  std::vector<std::string> supported_engines{kLlamaEngine, kOnnxEngine,
-                                             kTrtLlmEngine};
   Json::Value ret;
-  for (const auto& engine : supported_engines) {
+  auto engine_names = engine_service_->GetSupportedEngineNames().value();
+  for (const auto& engine : engine_names) {
     auto installed_engines =
         engine_service_->GetInstalledEngineVariants(engine);
     if (installed_engines.has_error()) {

diff --git a/engine/cortex-common/EngineI.h b/engine/cortex-common/EngineI.h
@@ -1,14 +1,24 @@
 #pragma once
 
+#include <filesystem>
 #include <functional>
 #include <memory>
 
 #include "json/value.h"
 #include "trantor/utils/Logger.h"
 class EngineI {
  public:
+  struct EngineLoadOption {
+    std::filesystem::path engine_path;
+    std::filesystem::path
+        cuda_path;  // TODO: make this more generic. Here just to test for now
+    bool custom_engine_path;
+  };
+
   virtual ~EngineI() {}
 
+  virtual void Load(EngineLoadOption opts) = 0;
+
   // cortex.llamacpp interface
   virtual void HandleChatCompletion(
       std::shared_ptr<Json::Value> json_body,

diff --git a/engine/services/engine_service.cc b/engine/services/engine_service.cc
@@ -179,6 +179,7 @@ cpp::result<bool, std::string> EngineService::UninstallEngineVariant(
     const std::string& engine, const std::optional<std::string> version,
     const std::optional<std::string> variant) {
   auto ne = NormalizeEngine(engine);
+  std::lock_guard<std::mutex> lock(engines_mutex_);
   if (IsEngineLoaded(ne)) {
     CTL_INF("Engine " << ne << " is already loaded, unloading it");
     auto unload_res = UnloadEngine(ne);
@@ -272,6 +273,7 @@ cpp::result<void, std::string> EngineService::DownloadEngine(
   if (selected_variant == std::nullopt) {
     return cpp::fail("Failed to find a suitable variant for " + engine);
   }
+  std::lock_guard<std::mutex> lock(engines_mutex_);
   if (IsEngineLoaded(engine)) {
     CTL_INF("Engine " << engine << " is already loaded, unloading it");
     auto unload_res = UnloadEngine(engine);
@@ -503,6 +505,7 @@ EngineService::SetDefaultEngineVariant(const std::string& engine,
                      " is not installed yet!");
   }
 
+  std::lock_guard<std::mutex> lock(engines_mutex_);
   if (IsEngineLoaded(ne)) {
     CTL_INF("Engine " << ne << " is already loaded, unloading it");
     auto unload_res = UnloadEngine(ne);
@@ -631,7 +634,6 @@ EngineService::GetInstalledEngineVariants(const std::string& engine) const {
 }
 
 bool EngineService::IsEngineLoaded(const std::string& engine) {
-  std::lock_guard<std::mutex> lock(engines_mutex_);
   auto ne = NormalizeEngine(engine);
   return engines_.find(ne) != engines_.end();
 }
@@ -651,6 +653,7 @@ cpp::result<void, std::string> EngineService::LoadEngine(
     const std::string& engine_name) {
   auto ne = NormalizeEngine(engine_name);
 
+  std::lock_guard<std::mutex> lock(engines_mutex_);
   if (IsEngineLoaded(ne)) {
     CTL_INF("Engine " << ne << " is already loaded");
     return {};
@@ -672,6 +675,7 @@ cpp::result<void, std::string> EngineService::LoadEngine(
   auto user_defined_engine_path = getenv("ENGINE_PATH");
 #endif
 
+  auto custom_engine_path = user_defined_engine_path != nullptr;
   CTL_DBG("user defined engine path: " << user_defined_engine_path);
   const std::filesystem::path engine_dir_path = [&] {
     if (user_defined_engine_path != nullptr) {
@@ -685,8 +689,6 @@ cpp::result<void, std::string> EngineService::LoadEngine(
     }
   }();
 
-  CTL_DBG("Engine path: " << engine_dir_path.string());
-
   if (!std::filesystem::exists(engine_dir_path)) {
     CTL_ERR("Directory " + engine_dir_path.string() + " is not exist!");
     return cpp::fail("Directory " + engine_dir_path.string() +
@@ -696,164 +698,74 @@ cpp::result<void, std::string> EngineService::LoadEngine(
   CTL_INF("Engine path: " << engine_dir_path.string());
 
   try {
-#if defined(_WIN32)
-    // TODO(?) If we only allow to load an engine at a time, the logic is simpler.
-    // We would like to support running multiple engines at the same time. Therefore,
-    // the adding/removing dll directory logic is quite complicated:
-    // 1. If llamacpp is loaded and new requested engine is tensorrt-llm:
-    // Unload the llamacpp dll directory then load the tensorrt-llm
-    // 2. If tensorrt-llm is loaded and new requested engine is llamacpp:
-    // Do nothing, llamacpp can re-use tensorrt-llm dependencies (need to be tested careful)
-    // 3. Add dll directory if met other conditions
-
-    auto add_dll = [this](const std::string& e_type,
-                          const std::filesystem::path& p) {
-      if (auto cookie = AddDllDirectory(p.c_str()); cookie != 0) {
-        CTL_DBG("Added dll directory: " << p.string());
-        engines_[e_type].cookie = cookie;
-      } else {
-        CTL_WRN("Could not add dll directory: " << p.string());
-      }
-
-      auto cuda_path = file_manager_utils::GetCudaToolkitPath(e_type);
-      if (auto cuda_cookie = AddDllDirectory(cuda_path.c_str());
-          cuda_cookie != 0) {
-        CTL_DBG("Added cuda dll directory: " << p.string());
-        engines_[e_type].cuda_cookie = cuda_cookie;
-      } else {
-        CTL_WRN("Could not add cuda dll directory: " << p.string());
-      }
+    auto dylib =
+        std::make_unique<cortex_cpp::dylib>(engine_dir_path.string(), "engine");
+
+    // init
+    auto func = dylib->get_function<EngineI*()>("get_engine");
+    auto engine_obj = func();
+    auto load_opts = EngineI::EngineLoadOption{
+        .engine_path = engine_dir_path,
+        .cuda_path = file_manager_utils::GetCudaToolkitPath(ne),
+        .custom_engine_path = custom_engine_path,
     };
+    engine_obj->Load(load_opts);
 
-#if defined(_WIN32)
-    if (bool should_use_dll_search_path = !(_wgetenv(L"ENGINE_PATH"));
-#else
-    if (bool should_use_dll_search_path = !(getenv("ENGINE_PATH"));
-#endif
-        should_use_dll_search_path) {
-      if (IsEngineLoaded(kLlamaRepo) && ne == kTrtLlmRepo &&
-          should_use_dll_search_path) {
-
-        {
-          std::lock_guard<std::mutex> lock(engines_mutex_);
-          // Remove llamacpp dll directory
-          if (!RemoveDllDirectory(engines_[kLlamaRepo].cookie)) {
-            CTL_WRN("Could not remove dll directory: " << kLlamaRepo);
-          } else {
-            CTL_DBG("Removed dll directory: " << kLlamaRepo);
-          }
-          if (!RemoveDllDirectory(engines_[kLlamaRepo].cuda_cookie)) {
-            CTL_WRN("Could not remove cuda dll directory: " << kLlamaRepo);
-          } else {
-            CTL_DBG("Removed cuda dll directory: " << kLlamaRepo);
-          }
-        }
-
-        add_dll(ne, engine_dir_path);
-      } else if (IsEngineLoaded(kTrtLlmRepo) && ne == kLlamaRepo) {
-        // Do nothing
-      } else {
-        add_dll(ne, engine_dir_path);
-      }
-    }
-#endif
-    {
-      std::lock_guard<std::mutex> lock(engines_mutex_);
-      engines_[ne].dl = std::make_unique<cortex_cpp::dylib>(
-          engine_dir_path.string(), "engine");
-    }
-#if defined(__linux__)
-    const char* name = "LD_LIBRARY_PATH";
-    auto data = getenv(name);
-    std::string v;
-    if (auto g = getenv(name); g) {
-      v += g;
-    }
-    CTL_INF("LD_LIBRARY_PATH: " << v);
-    auto llamacpp_path = file_manager_utils::GetCudaToolkitPath(kLlamaRepo);
-    CTL_INF("llamacpp_path: " << llamacpp_path);
-    // tensorrt is not supported for now
-    // auto trt_path = file_manager_utils::GetCudaToolkitPath(kTrtLlmRepo);
-
-    auto new_v = llamacpp_path.string() + ":" + v;
-    setenv(name, new_v.c_str(), true);
-    CTL_INF("LD_LIBRARY_PATH: " << getenv(name));
-#endif
+    engines_[ne].engine = engine_obj;
+    engines_[ne].dl = std::move(dylib);
 
+    CTL_DBG("Engine loaded: "
+            << ne);  // TODO: output more information like version and variant
   } catch (const cortex_cpp::dylib::load_error& e) {
     CTL_ERR("Could not load engine: " << e.what());
-    {
-      std::lock_guard<std::mutex> lock(engines_mutex_);
-      engines_.erase(ne);
-    }
+    engines_.erase(ne);
     return cpp::fail("Could not load engine " + ne + ": " + e.what());
   }
 
-  {
-    std::lock_guard<std::mutex> lock(engines_mutex_);
-    auto func = engines_[ne].dl->get_function<EngineI*()>("get_engine");
-    engines_[ne].engine = func();
-
-    auto& en = std::get<EngineI*>(engines_[ne].engine);
-    if (ne == kLlamaRepo) {  //fix for llamacpp engine first
-      auto config = file_manager_utils::GetCortexConfig();
-      if (en->IsSupported("SetFileLogger")) {
-        en->SetFileLogger(config.maxLogLines,
-                          (std::filesystem::path(config.logFolderPath) /
-                           std::filesystem::path(config.logLlamaCppPath))
-                              .string());
-      } else {
-        CTL_WRN("Method SetFileLogger is not supported yet");
-      }
-      if (en->IsSupported("SetLogLevel")) {
-        en->SetLogLevel(logging_utils_helper::global_log_level);
-      } else {
-        CTL_WRN("Method SetLogLevel is not supported yet");
-      }
-    }
-    CTL_DBG("loaded engine: " << ne);
-  }
+  // TODO: namh recheck this if can be moved to cortex.llamacpp
+  // if (ne == kLlamaRepo) {  //fix for llamacpp engine first
+  //   auto config = file_manager_utils::GetCortexConfig();
+  //   if (en->IsSupported("SetFileLogger")) {
+  //     en->SetFileLogger(config.maxLogLines,
+  //                       (std::filesystem::path(config.logFolderPath) /
+  //                        std::filesystem::path(config.logLlamaCppPath))
+  //                           .string());
+  //   } else {
+  //     CTL_WRN("Method SetFileLogger is not supported yet");
+  //   }
+  //   if (en->IsSupported("SetLogLevel")) {
+  //     en->SetLogLevel(logging_utils_helper::global_log_level);
+  //   } else {
+  //     CTL_WRN("Method SetLogLevel is not supported yet");
+  //   }
+  // }
+
   return {};
 }
 
 cpp::result<void, std::string> EngineService::UnloadEngine(
     const std::string& engine) {
   auto ne = NormalizeEngine(engine);
-  {
-    std::lock_guard<std::mutex> lock(engines_mutex_);
-    if (!IsEngineLoaded(ne)) {
-      return cpp::fail("Engine " + ne + " is not loaded yet!");
-    }
-    EngineI* e = std::get<EngineI*>(engines_[ne].engine);
-    delete e;
+  LOG_INFO << "Unloading engine " << ne;
 
-#if defined(_WIN32)
-    if (!RemoveDllDirectory(engines_[ne].cookie)) {
-      CTL_WRN("Could not remove dll directory: " << ne);
-    } else {
-      CTL_DBG("Removed dll directory: " << ne);
-    }
-    if (!RemoveDllDirectory(engines_[ne].cuda_cookie)) {
-      CTL_WRN("Could not remove cuda dll directory: " << ne);
-    } else {
-      CTL_DBG("Removed cuda dll directory: " << ne);
-    }
-#endif
-    engines_.erase(ne);
+  std::lock_guard<std::mutex> lock(engines_mutex_);
+  if (!IsEngineLoaded(ne)) {
+    return cpp::fail("Engine " + ne + " is not loaded yet!");
   }
-  CTL_DBG("Unloaded engine " + ne);
+  EngineI* e = std::get<EngineI*>(engines_[ne].engine);
+  delete e;
+  engines_.erase(ne);
+  CTL_DBG("Engine unloaded: " + ne);
   return {};
 }
 
 std::vector<EngineV> EngineService::GetLoadedEngines() {
-  {
-    std::lock_guard<std::mutex> lock(engines_mutex_);
-    std::vector<EngineV> loaded_engines;
-    for (const auto& [key, value] : engines_) {
-      loaded_engines.push_back(value.engine);
-    }
-    return loaded_engines;
+  std::lock_guard<std::mutex> lock(engines_mutex_);
+  std::vector<EngineV> loaded_engines;
+  for (const auto& [key, value] : engines_) {
+    loaded_engines.push_back(value.engine);
   }
+  return loaded_engines;
 }
 
 cpp::result<github_release_utils::GitHubRelease, std::string>
@@ -899,6 +811,7 @@ cpp::result<EngineUpdateResult, std::string> EngineService::UpdateEngine(
   CTL_INF("Default variant: " << default_variant->variant
                               << ", version: " + default_variant->version);
 
+  std::lock_guard<std::mutex> lock(engines_mutex_);
   if (IsEngineLoaded(ne)) {
     CTL_INF("Engine " << ne << " is already loaded, unloading it");
     auto unload_res = UnloadEngine(ne);
@@ -955,3 +868,8 @@ cpp::result<EngineUpdateResult, std::string> EngineService::UpdateEngine(
                             .from = default_variant->version,
                             .to = latest_version->tag_name};
 }
+
+cpp::result<std::vector<std::string>, std::string>
+EngineService::GetSupportedEngineNames() {
+  return file_manager_utils::GetCortexConfig().supportedEngines;
+}
diff --git a/engine/services/engine_service.h b/engine/services/engine_service.h
@@ -123,6 +123,8 @@ class EngineService : public EngineServiceI {
   cpp::result<EngineUpdateResult, std::string> UpdateEngine(
       const std::string& engine);
 
+  cpp::result<std::vector<std::string>, std::string> GetSupportedEngineNames();
+
  private:
   cpp::result<void, std::string> DownloadEngine(
       const std::string& engine, const std::string& version = "latest",

diff --git a/engine/utils/config_yaml_utils.h b/engine/utils/config_yaml_utils.h
@@ -5,6 +5,7 @@
 #include <iostream>
 #include <mutex>
 #include <string>
+#include "utils/engine_constants.h"
 #include "utils/logging_utils.h"
 #include "utils/result.hpp"
 #include "yaml-cpp/yaml.h"
@@ -22,6 +23,8 @@ constexpr const auto kDefaultCorsEnabled = true;
 const std::vector<std::string> kDefaultEnabledOrigins{
     "http://localhost:39281", "http://127.0.0.1:39281", "http://0.0.0.0:39281"};
 constexpr const auto kDefaultNoProxy = "example.com,::1,localhost,127.0.0.1";
+const std::vector<std::string> kDefaultSupportedEngines{
+    kLlamaEngine, kOnnxEngine, kTrtLlmEngine};
 
 struct CortexConfig {
   std::string logFolderPath;
@@ -59,6 +62,7 @@ struct CortexConfig {
 
   bool verifyPeerSsl;
   bool verifyHostSsl;
+  std::vector<std::string> supportedEngines;
 };
 
 class CortexConfigMgr {
@@ -117,6 +121,7 @@ class CortexConfigMgr {
       node["noProxy"] = config.noProxy;
       node["verifyPeerSsl"] = config.verifyPeerSsl;
       node["verifyHostSsl"] = config.verifyHostSsl;
+      node["supportedEngines"] = config.supportedEngines;
 
       out_file << node;
       out_file.close();
@@ -151,7 +156,7 @@ class CortexConfigMgr {
            !node["proxyUsername"] || !node["proxyPassword"] ||
            !node["verifyPeerSsl"] || !node["verifyHostSsl"] ||
            !node["verifyProxySsl"] || !node["verifyProxyHostSsl"] ||
-           !node["noProxy"]);
+           !node["noProxy"] || !node["supportedEngines"]);
 
       CortexConfig config = {
           .logFolderPath = node["logFolderPath"]
@@ -235,6 +240,10 @@ class CortexConfigMgr {
           .verifyHostSsl = node["verifyHostSsl"]
                                ? node["verifyHostSsl"].as<bool>()
                                : default_cfg.verifyHostSsl,
+          .supportedEngines =
+              node["supportedEngines"]
+                  ? node["supportedEngines"].as<std::vector<std::string>>()
+                  : default_cfg.supportedEngines,
       };
       if (should_update_config) {
         l.unlock();

diff --git a/engine/utils/file_manager_utils.h b/engine/utils/file_manager_utils.h
@@ -202,6 +202,7 @@ inline config_yaml_utils::CortexConfig GetDefaultConfig() {
       .noProxy = config_yaml_utils::kDefaultNoProxy,
       .verifyPeerSsl = true,
       .verifyHostSsl = true,
+      .supportedEngines = config_yaml_utils::kDefaultSupportedEngines,
   };
 }