feat: update engine interface

janhq · Dec 4, 2024 · bd1bf91 · bd1bf91
1 parent 21cde00
commit bd1bf91
Show file tree

Hide file tree

Showing 10 changed files with 257 additions and 178 deletions.
diff --git a/docs/docs/engines/engine-extension.mdx b/docs/docs/engines/engine-extension.mdx
@@ -22,12 +22,32 @@ First, create an engine that implements the `EngineI.h` interface. Here's the in
 ```cpp
 class EngineI {
  public:
-  struct EngineLoadOption{};
-  struct EngineUnloadOption{};
+  struct RegisterLibraryOption {
+    std::vector<std::filesystem::path> paths;
+  };
+
+  struct EngineLoadOption {
+    // engine
+    std::filesystem::path engine_path;
+    std::filesystem::path cuda_path;
+    bool custom_engine_path;
+
+    // logging
+    std::filesystem::path log_path;
+    int max_log_lines;
+    trantor::Logger::LogLevel log_level;
+  };
+
+  struct EngineUnloadOption {
+    bool unload_dll;
+  };
 
   virtual ~EngineI() {}
 
+  virtual void RegisterLibraryPath(RegisterLibraryOption opts) = 0;
+
   virtual void Load(EngineLoadOption opts) = 0;
+
   virtual void Unload(EngineUnloadOption opts) = 0;
 
   // Cortex.llamacpp interface methods
@@ -65,7 +85,71 @@ class EngineI {
 };
 ```
 
-Note that Cortex will call `Load` before loading any models and `Unload` when stopping the engine.
+#### Lifecycle Management
+
+##### RegisterLibraryPath
+
+```cpp
+virtual void RegisterLibraryPath(RegisterLibraryOption opts) = 0;
+```
+
+This method is called during engine initialization to set up dynamic library search paths. For example, in Linux, we still have to use `LD_LIBRARY_PATH` to add CUDA dependencies to the search path.
+
+**Parameters:**
+
+- `opts.paths`: Vector of filesystem paths that the engine should register
+
+**Implementation Requirements:**
+
+- Register provided paths for dynamic library loading
+- Handle invalid paths gracefully
+- Thread-safe implementation
+- No exceptions should escape the method
+
+##### Load
+
+```cpp
+virtual void Load(EngineLoadOption opts) = 0;
+```
+
+Initializes the engine with the provided configuration options.
+
+**Parameters:**
+
+- `engine_path`: Base path for engine files
+- `cuda_path`: Path to CUDA installation
+- `custom_engine_path`: Flag for using custom engine location
+- `log_path`: Location for log files
+- `max_log_lines`: Maximum number of lines per log file
+- `log_level`: Logging verbosity level
+
+**Implementation Requirements:**
+
+- Validate all paths before use
+- Initialize engine components
+- Set up logging configuration
+- Handle missing dependencies gracefully
+- Clean initialization state in case of failures
+
+##### Unload
+
+```cpp
+virtual void Unload(EngineUnloadOption opts) = 0;
+```
+
+Performs cleanup and shutdown of the engine.
+
+**Parameters:**
+
+- `unload_dll`: Boolean flag indicating whether to unload dynamic libraries
+
+**Implementation Requirements:**
+
+- Clean up all allocated resources
+- Close file handles and connections
+- Release memory
+- Ensure proper shutdown of running models
+- Handle cleanup in a thread-safe manner
 
 ### 2. Create a Dynamic Library
 
@@ -98,7 +182,7 @@ To test your engine locally:
 
 1. Create a directory structure following this hierarchy:
 
-```
+```bash
 engines/
 └── cortex.llamacpp/
     └── mac-arm64/
@@ -107,12 +191,12 @@ engines/
             └── version.txt
 ```
 
-2. Configure your engine:
+1. Configure your engine:
 
    - Edit the `~/.cortexrc` file to register your engine name
    - Add your model with the appropriate engine field in `model.yaml`
 
-3. Testing:
+2. Testing:
    - Start the engine
    - Load your model
    - Verify functionality

diff --git a/engine/cli/commands/server_start_cmd.cc b/engine/cli/commands/server_start_cmd.cc
@@ -1,9 +1,12 @@
 #include "server_start_cmd.h"
 #include "commands/cortex_upd_cmd.h"
+#include "services/engine_service.h"
 #include "utils/cortex_utils.h"
-#include "utils/engine_constants.h"
 #include "utils/file_manager_utils.h"
+
+#if defined(_WIN32) || defined(_WIN64)
 #include "utils/widechar_conv.h"
+#endif
 
 namespace commands {
 
@@ -108,22 +111,9 @@ bool ServerStartCmd::Exec(const std::string& host, int port,
     std::cerr << "Could not start server: " << std::endl;
     return false;
   } else if (pid == 0) {
-    // No need to configure LD_LIBRARY_PATH for macOS
-#if !defined(__APPLE__) || !defined(__MACH__)
-    const char* name = "LD_LIBRARY_PATH";
-    auto data = getenv(name);
-    std::string v;
-    if (auto g = getenv(name); g) {
-      v += g;
-    }
-    CTL_INF("LD_LIBRARY_PATH: " << v);
-    auto llamacpp_path = file_manager_utils::GetCudaToolkitPath(kLlamaRepo);
-    auto trt_path = file_manager_utils::GetCudaToolkitPath(kTrtLlmRepo);
+    // Some engines requires to add lib search path before process being created
+    EngineService().RegisterEngineLibPath();
 
-    auto new_v = trt_path.string() + ":" + llamacpp_path.string() + ":" + v;
-    setenv(name, new_v.c_str(), true);
-    CTL_INF("LD_LIBRARY_PATH: " << getenv(name));
-#endif
     std::string p = cortex_utils::GetCurrentPath() + "/" + exe;
     execl(p.c_str(), exe.c_str(), "--start-server", "--config_file_path",
           get_config_file_path().c_str(), "--data_folder_path",

diff --git a/engine/controllers/engines.cc b/engine/controllers/engines.cc
@@ -23,10 +23,9 @@ std::string NormalizeEngine(const std::string& engine) {
 void Engines::ListEngine(
     const HttpRequestPtr& req,
     std::function<void(const HttpResponsePtr&)>&& callback) const {
-  std::vector<std::string> supported_engines{kLlamaEngine, kOnnxEngine,
-                                             kTrtLlmEngine};
   Json::Value ret;
-  for (const auto& engine : supported_engines) {
+  auto engine_names = engine_service_->GetSupportedEngineNames().value();
+  for (const auto& engine : engine_names) {
     auto installed_engines =
         engine_service_->GetInstalledEngineVariants(engine);
     if (installed_engines.has_error()) {

diff --git a/engine/cortex-common/EngineI.h b/engine/cortex-common/EngineI.h
@@ -1,14 +1,44 @@
 #pragma once
 
+#include <filesystem>
 #include <functional>
 #include <memory>
 
 #include "json/value.h"
 #include "trantor/utils/Logger.h"
 class EngineI {
  public:
+  struct RegisterLibraryOption {
+    std::vector<std::filesystem::path> paths;
+  };
+
+  struct EngineLoadOption {
+    // engine
+    std::filesystem::path engine_path;
+    std::filesystem::path cuda_path;
+    bool custom_engine_path;
+
+    // logging
+    std::filesystem::path log_path;
+    int max_log_lines;
+    trantor::Logger::LogLevel log_level;
+  };
+
+  struct EngineUnloadOption {
+    bool unload_dll;
+  };
+
   virtual ~EngineI() {}
 
+  /**
+   * Being called before starting process to register dependencies search paths.
+   */
+  virtual void RegisterLibraryPath(RegisterLibraryOption opts) = 0;
+
+  virtual void Load(EngineLoadOption opts) = 0;
+
+  virtual void Unload(EngineUnloadOption opts) = 0;
+
   // cortex.llamacpp interface
   virtual void HandleChatCompletion(
       std::shared_ptr<Json::Value> json_body,