feat: update engine interface

janhq · Dec 2, 2024 · 5336334 · 5336334
1 parent 1641500
commit 5336334
Show file tree

Hide file tree

Showing 2 changed files with 58 additions and 137 deletions.
diff --git a/engine/cortex-common/EngineI.h b/engine/cortex-common/EngineI.h
@@ -1,14 +1,24 @@
 #pragma once
 
+#include <filesystem>
 #include <functional>
 #include <memory>
 
 #include "json/value.h"
 #include "trantor/utils/Logger.h"
 class EngineI {
  public:
+  struct EngineLoadOption {
+    std::filesystem::path engine_path;
+    std::filesystem::path
+        cuda_path;  // TODO: make this more generic. Here just to test for now
+    bool custom_engine_path;
+  };
+
   virtual ~EngineI() {}
 
+  virtual void Load(EngineLoadOption opts) = 0;
+
   // cortex.llamacpp interface
   virtual void HandleChatCompletion(
       std::shared_ptr<Json::Value> json_body,

diff --git a/engine/services/engine_service.cc b/engine/services/engine_service.cc
@@ -651,6 +651,7 @@ cpp::result<void, std::string> EngineService::LoadEngine(
     const std::string& engine_name) {
   auto ne = NormalizeEngine(engine_name);
 
+  // std::lock_guard<std::mutex> lock(engines_mutex_);
   if (IsEngineLoaded(ne)) {
     CTL_INF("Engine " << ne << " is already loaded");
     return {};
@@ -672,6 +673,7 @@ cpp::result<void, std::string> EngineService::LoadEngine(
   auto user_defined_engine_path = getenv("ENGINE_PATH");
 #endif
 
+  auto custom_engine_path = user_defined_engine_path != nullptr;
   CTL_DBG("user defined engine path: " << user_defined_engine_path);
   const std::filesystem::path engine_dir_path = [&] {
     if (user_defined_engine_path != nullptr) {
@@ -685,8 +687,6 @@ cpp::result<void, std::string> EngineService::LoadEngine(
     }
   }();
 
-  CTL_DBG("Engine path: " << engine_dir_path.string());
-
   if (!std::filesystem::exists(engine_dir_path)) {
     CTL_ERR("Directory " + engine_dir_path.string() + " is not exist!");
     return cpp::fail("Directory " + engine_dir_path.string() +
@@ -696,90 +696,20 @@ cpp::result<void, std::string> EngineService::LoadEngine(
   CTL_INF("Engine path: " << engine_dir_path.string());
 
   try {
-#if defined(_WIN32)
-    // TODO(?) If we only allow to load an engine at a time, the logic is simpler.
-    // We would like to support running multiple engines at the same time. Therefore,
-    // the adding/removing dll directory logic is quite complicated:
-    // 1. If llamacpp is loaded and new requested engine is tensorrt-llm:
-    // Unload the llamacpp dll directory then load the tensorrt-llm
-    // 2. If tensorrt-llm is loaded and new requested engine is llamacpp:
-    // Do nothing, llamacpp can re-use tensorrt-llm dependencies (need to be tested careful)
-    // 3. Add dll directory if met other conditions
-
-    auto add_dll = [this](const std::string& e_type,
-                          const std::filesystem::path& p) {
-      if (auto cookie = AddDllDirectory(p.c_str()); cookie != 0) {
-        CTL_DBG("Added dll directory: " << p.string());
-        engines_[e_type].cookie = cookie;
-      } else {
-        CTL_WRN("Could not add dll directory: " << p.string());
-      }
-
-      auto cuda_path = file_manager_utils::GetCudaToolkitPath(e_type);
-      if (auto cuda_cookie = AddDllDirectory(cuda_path.c_str());
-          cuda_cookie != 0) {
-        CTL_DBG("Added cuda dll directory: " << p.string());
-        engines_[e_type].cuda_cookie = cuda_cookie;
-      } else {
-        CTL_WRN("Could not add cuda dll directory: " << p.string());
-      }
+    auto dylib =
+        std::make_unique<cortex_cpp::dylib>(engine_dir_path.string(), "engine");
+
+    // init
+    auto func = dylib->get_function<EngineI*()>("get_engine");
+    auto engine_obj = func();
+    engines_[ne].engine = engine_obj;
+    auto load_opts = EngineI::EngineLoadOption{
+        .engine_path = engine_dir_path,
+        .cuda_path = file_manager_utils::GetCudaToolkitPath(ne),
+        .custom_engine_path = custom_engine_path,
     };
-
-#if defined(_WIN32)
-    if (bool should_use_dll_search_path = !(_wgetenv(L"ENGINE_PATH"));
-#else
-    if (bool should_use_dll_search_path = !(getenv("ENGINE_PATH"));
-#endif
-        should_use_dll_search_path) {
-      if (IsEngineLoaded(kLlamaRepo) && ne == kTrtLlmRepo &&
-          should_use_dll_search_path) {
-
-        {
-          std::lock_guard<std::mutex> lock(engines_mutex_);
-          // Remove llamacpp dll directory
-          if (!RemoveDllDirectory(engines_[kLlamaRepo].cookie)) {
-            CTL_WRN("Could not remove dll directory: " << kLlamaRepo);
-          } else {
-            CTL_DBG("Removed dll directory: " << kLlamaRepo);
-          }
-          if (!RemoveDllDirectory(engines_[kLlamaRepo].cuda_cookie)) {
-            CTL_WRN("Could not remove cuda dll directory: " << kLlamaRepo);
-          } else {
-            CTL_DBG("Removed cuda dll directory: " << kLlamaRepo);
-          }
-        }
-
-        add_dll(ne, engine_dir_path);
-      } else if (IsEngineLoaded(kTrtLlmRepo) && ne == kLlamaRepo) {
-        // Do nothing
-      } else {
-        add_dll(ne, engine_dir_path);
-      }
-    }
-#endif
-    {
-      std::lock_guard<std::mutex> lock(engines_mutex_);
-      engines_[ne].dl = std::make_unique<cortex_cpp::dylib>(
-          engine_dir_path.string(), "engine");
-    }
-#if defined(__linux__)
-    const char* name = "LD_LIBRARY_PATH";
-    auto data = getenv(name);
-    std::string v;
-    if (auto g = getenv(name); g) {
-      v += g;
-    }
-    CTL_INF("LD_LIBRARY_PATH: " << v);
-    auto llamacpp_path = file_manager_utils::GetCudaToolkitPath(kLlamaRepo);
-    CTL_INF("llamacpp_path: " << llamacpp_path);
-    // tensorrt is not supported for now
-    // auto trt_path = file_manager_utils::GetCudaToolkitPath(kTrtLlmRepo);
-
-    auto new_v = llamacpp_path.string() + ":" + v;
-    setenv(name, new_v.c_str(), true);
-    CTL_INF("LD_LIBRARY_PATH: " << getenv(name));
-#endif
-
+    engine_obj->Load(load_opts);
+    engines_[ne].dl = std::move(dylib);
   } catch (const cortex_cpp::dylib::load_error& e) {
     CTL_ERR("Could not load engine: " << e.what());
     {
@@ -789,71 +719,52 @@ cpp::result<void, std::string> EngineService::LoadEngine(
     return cpp::fail("Could not load engine " + ne + ": " + e.what());
   }
 
-  {
-    std::lock_guard<std::mutex> lock(engines_mutex_);
-    auto func = engines_[ne].dl->get_function<EngineI*()>("get_engine");
-    engines_[ne].engine = func();
-
-    auto& en = std::get<EngineI*>(engines_[ne].engine);
-    if (ne == kLlamaRepo) {  //fix for llamacpp engine first
-      auto config = file_manager_utils::GetCortexConfig();
-      if (en->IsSupported("SetFileLogger")) {
-        en->SetFileLogger(config.maxLogLines,
-                          (std::filesystem::path(config.logFolderPath) /
-                           std::filesystem::path(config.logLlamaCppPath))
-                              .string());
-      } else {
-        CTL_WRN("Method SetFileLogger is not supported yet");
-      }
-      if (en->IsSupported("SetLogLevel")) {
-        en->SetLogLevel(logging_utils_helper::global_log_level);
-      } else {
-        CTL_WRN("Method SetLogLevel is not supported yet");
-      }
-    }
-    CTL_DBG("loaded engine: " << ne);
-  }
+  auto& en = std::get<EngineI*>(engines_[ne].engine);
+  // TODO: namh recheck this if can be moved to cortex.llamacpp
+  // if (ne == kLlamaRepo) {  //fix for llamacpp engine first
+  //   auto config = file_manager_utils::GetCortexConfig();
+  //   if (en->IsSupported("SetFileLogger")) {
+  //     en->SetFileLogger(config.maxLogLines,
+  //                       (std::filesystem::path(config.logFolderPath) /
+  //                        std::filesystem::path(config.logLlamaCppPath))
+  //                           .string());
+  //   } else {
+  //     CTL_WRN("Method SetFileLogger is not supported yet");
+  //   }
+  //   if (en->IsSupported("SetLogLevel")) {
+  //     en->SetLogLevel(logging_utils_helper::global_log_level);
+  //   } else {
+  //     CTL_WRN("Method SetLogLevel is not supported yet");
+  //   }
+  // }
+  CTL_DBG("loaded engine: " << ne);
+
   return {};
 }
 
 cpp::result<void, std::string> EngineService::UnloadEngine(
     const std::string& engine) {
   auto ne = NormalizeEngine(engine);
-  {
-    std::lock_guard<std::mutex> lock(engines_mutex_);
-    if (!IsEngineLoaded(ne)) {
-      return cpp::fail("Engine " + ne + " is not loaded yet!");
-    }
-    EngineI* e = std::get<EngineI*>(engines_[ne].engine);
-    delete e;
+  LOG_INFO << "Unloading engine " << ne;
 
-#if defined(_WIN32)
-    if (!RemoveDllDirectory(engines_[ne].cookie)) {
-      CTL_WRN("Could not remove dll directory: " << ne);
-    } else {
-      CTL_DBG("Removed dll directory: " << ne);
-    }
-    if (!RemoveDllDirectory(engines_[ne].cuda_cookie)) {
-      CTL_WRN("Could not remove cuda dll directory: " << ne);
-    } else {
-      CTL_DBG("Removed cuda dll directory: " << ne);
-    }
-#endif
-    engines_.erase(ne);
+  std::lock_guard<std::mutex> lock(engines_mutex_);
+  if (!IsEngineLoaded(ne)) {
+    return cpp::fail("Engine " + ne + " is not loaded yet!");
   }
-  CTL_DBG("Unloaded engine " + ne);
+  EngineI* e = std::get<EngineI*>(engines_[ne].engine);
+  delete e;
+  engines_.erase(ne);
+  CTL_DBG("Engine unloaded: " + ne);
   return {};
 }
 
 std::vector<EngineV> EngineService::GetLoadedEngines() {
-  {
-    std::lock_guard<std::mutex> lock(engines_mutex_);
-    std::vector<EngineV> loaded_engines;
-    for (const auto& [key, value] : engines_) {
-      loaded_engines.push_back(value.engine);
-    }
-    return loaded_engines;
+  std::lock_guard<std::mutex> lock(engines_mutex_);
+  std::vector<EngineV> loaded_engines;
+  for (const auto& [key, value] : engines_) {
+    loaded_engines.push_back(value.engine);
   }
+  return loaded_engines;
 }
 
 cpp::result<github_release_utils::GitHubRelease, std::string>