Skip to content

Commit

Permalink
feat: update engine interface
Browse files Browse the repository at this point in the history
  • Loading branch information
namchuai committed Dec 2, 2024
1 parent 1641500 commit 5336334
Show file tree
Hide file tree
Showing 2 changed files with 58 additions and 137 deletions.
10 changes: 10 additions & 0 deletions engine/cortex-common/EngineI.h
Original file line number Diff line number Diff line change
@@ -1,14 +1,24 @@
#pragma once

#include <filesystem>
#include <functional>
#include <memory>

#include "json/value.h"
#include "trantor/utils/Logger.h"
class EngineI {
public:
struct EngineLoadOption {
std::filesystem::path engine_path;
std::filesystem::path
cuda_path; // TODO: make this more generic. Here just to test for now
bool custom_engine_path;
};

virtual ~EngineI() {}

virtual void Load(EngineLoadOption opts) = 0;

// cortex.llamacpp interface
virtual void HandleChatCompletion(
std::shared_ptr<Json::Value> json_body,
Expand Down
185 changes: 48 additions & 137 deletions engine/services/engine_service.cc
Original file line number Diff line number Diff line change
Expand Up @@ -651,6 +651,7 @@ cpp::result<void, std::string> EngineService::LoadEngine(
const std::string& engine_name) {
auto ne = NormalizeEngine(engine_name);

// std::lock_guard<std::mutex> lock(engines_mutex_);
if (IsEngineLoaded(ne)) {
CTL_INF("Engine " << ne << " is already loaded");
return {};
Expand All @@ -672,6 +673,7 @@ cpp::result<void, std::string> EngineService::LoadEngine(
auto user_defined_engine_path = getenv("ENGINE_PATH");
#endif

auto custom_engine_path = user_defined_engine_path != nullptr;
CTL_DBG("user defined engine path: " << user_defined_engine_path);
const std::filesystem::path engine_dir_path = [&] {
if (user_defined_engine_path != nullptr) {
Expand All @@ -685,8 +687,6 @@ cpp::result<void, std::string> EngineService::LoadEngine(
}
}();

CTL_DBG("Engine path: " << engine_dir_path.string());

if (!std::filesystem::exists(engine_dir_path)) {
CTL_ERR("Directory " + engine_dir_path.string() + " is not exist!");
return cpp::fail("Directory " + engine_dir_path.string() +
Expand All @@ -696,90 +696,20 @@ cpp::result<void, std::string> EngineService::LoadEngine(
CTL_INF("Engine path: " << engine_dir_path.string());

try {
#if defined(_WIN32)
// TODO(?) If we only allow to load an engine at a time, the logic is simpler.
// We would like to support running multiple engines at the same time. Therefore,
// the adding/removing dll directory logic is quite complicated:
// 1. If llamacpp is loaded and new requested engine is tensorrt-llm:
// Unload the llamacpp dll directory then load the tensorrt-llm
// 2. If tensorrt-llm is loaded and new requested engine is llamacpp:
// Do nothing, llamacpp can re-use tensorrt-llm dependencies (need to be tested careful)
// 3. Add dll directory if met other conditions

auto add_dll = [this](const std::string& e_type,
const std::filesystem::path& p) {
if (auto cookie = AddDllDirectory(p.c_str()); cookie != 0) {
CTL_DBG("Added dll directory: " << p.string());
engines_[e_type].cookie = cookie;
} else {
CTL_WRN("Could not add dll directory: " << p.string());
}

auto cuda_path = file_manager_utils::GetCudaToolkitPath(e_type);
if (auto cuda_cookie = AddDllDirectory(cuda_path.c_str());
cuda_cookie != 0) {
CTL_DBG("Added cuda dll directory: " << p.string());
engines_[e_type].cuda_cookie = cuda_cookie;
} else {
CTL_WRN("Could not add cuda dll directory: " << p.string());
}
auto dylib =
std::make_unique<cortex_cpp::dylib>(engine_dir_path.string(), "engine");

// init
auto func = dylib->get_function<EngineI*()>("get_engine");
auto engine_obj = func();
engines_[ne].engine = engine_obj;
auto load_opts = EngineI::EngineLoadOption{
.engine_path = engine_dir_path,
.cuda_path = file_manager_utils::GetCudaToolkitPath(ne),
.custom_engine_path = custom_engine_path,
};

#if defined(_WIN32)
if (bool should_use_dll_search_path = !(_wgetenv(L"ENGINE_PATH"));
#else
if (bool should_use_dll_search_path = !(getenv("ENGINE_PATH"));
#endif
should_use_dll_search_path) {
if (IsEngineLoaded(kLlamaRepo) && ne == kTrtLlmRepo &&
should_use_dll_search_path) {

{
std::lock_guard<std::mutex> lock(engines_mutex_);
// Remove llamacpp dll directory
if (!RemoveDllDirectory(engines_[kLlamaRepo].cookie)) {
CTL_WRN("Could not remove dll directory: " << kLlamaRepo);
} else {
CTL_DBG("Removed dll directory: " << kLlamaRepo);
}
if (!RemoveDllDirectory(engines_[kLlamaRepo].cuda_cookie)) {
CTL_WRN("Could not remove cuda dll directory: " << kLlamaRepo);
} else {
CTL_DBG("Removed cuda dll directory: " << kLlamaRepo);
}
}

add_dll(ne, engine_dir_path);
} else if (IsEngineLoaded(kTrtLlmRepo) && ne == kLlamaRepo) {
// Do nothing
} else {
add_dll(ne, engine_dir_path);
}
}
#endif
{
std::lock_guard<std::mutex> lock(engines_mutex_);
engines_[ne].dl = std::make_unique<cortex_cpp::dylib>(
engine_dir_path.string(), "engine");
}
#if defined(__linux__)
const char* name = "LD_LIBRARY_PATH";
auto data = getenv(name);
std::string v;
if (auto g = getenv(name); g) {
v += g;
}
CTL_INF("LD_LIBRARY_PATH: " << v);
auto llamacpp_path = file_manager_utils::GetCudaToolkitPath(kLlamaRepo);
CTL_INF("llamacpp_path: " << llamacpp_path);
// tensorrt is not supported for now
// auto trt_path = file_manager_utils::GetCudaToolkitPath(kTrtLlmRepo);

auto new_v = llamacpp_path.string() + ":" + v;
setenv(name, new_v.c_str(), true);
CTL_INF("LD_LIBRARY_PATH: " << getenv(name));
#endif

engine_obj->Load(load_opts);
engines_[ne].dl = std::move(dylib);
} catch (const cortex_cpp::dylib::load_error& e) {
CTL_ERR("Could not load engine: " << e.what());
{
Expand All @@ -789,71 +719,52 @@ cpp::result<void, std::string> EngineService::LoadEngine(
return cpp::fail("Could not load engine " + ne + ": " + e.what());
}

{
std::lock_guard<std::mutex> lock(engines_mutex_);
auto func = engines_[ne].dl->get_function<EngineI*()>("get_engine");
engines_[ne].engine = func();

auto& en = std::get<EngineI*>(engines_[ne].engine);
if (ne == kLlamaRepo) { //fix for llamacpp engine first
auto config = file_manager_utils::GetCortexConfig();
if (en->IsSupported("SetFileLogger")) {
en->SetFileLogger(config.maxLogLines,
(std::filesystem::path(config.logFolderPath) /
std::filesystem::path(config.logLlamaCppPath))
.string());
} else {
CTL_WRN("Method SetFileLogger is not supported yet");
}
if (en->IsSupported("SetLogLevel")) {
en->SetLogLevel(logging_utils_helper::global_log_level);
} else {
CTL_WRN("Method SetLogLevel is not supported yet");
}
}
CTL_DBG("loaded engine: " << ne);
}
auto& en = std::get<EngineI*>(engines_[ne].engine);
// TODO: namh recheck this if can be moved to cortex.llamacpp
// if (ne == kLlamaRepo) { //fix for llamacpp engine first
// auto config = file_manager_utils::GetCortexConfig();
// if (en->IsSupported("SetFileLogger")) {
// en->SetFileLogger(config.maxLogLines,
// (std::filesystem::path(config.logFolderPath) /
// std::filesystem::path(config.logLlamaCppPath))
// .string());
// } else {
// CTL_WRN("Method SetFileLogger is not supported yet");
// }
// if (en->IsSupported("SetLogLevel")) {
// en->SetLogLevel(logging_utils_helper::global_log_level);
// } else {
// CTL_WRN("Method SetLogLevel is not supported yet");
// }
// }
CTL_DBG("loaded engine: " << ne);

return {};
}

cpp::result<void, std::string> EngineService::UnloadEngine(
const std::string& engine) {
auto ne = NormalizeEngine(engine);
{
std::lock_guard<std::mutex> lock(engines_mutex_);
if (!IsEngineLoaded(ne)) {
return cpp::fail("Engine " + ne + " is not loaded yet!");
}
EngineI* e = std::get<EngineI*>(engines_[ne].engine);
delete e;
LOG_INFO << "Unloading engine " << ne;

#if defined(_WIN32)
if (!RemoveDllDirectory(engines_[ne].cookie)) {
CTL_WRN("Could not remove dll directory: " << ne);
} else {
CTL_DBG("Removed dll directory: " << ne);
}
if (!RemoveDllDirectory(engines_[ne].cuda_cookie)) {
CTL_WRN("Could not remove cuda dll directory: " << ne);
} else {
CTL_DBG("Removed cuda dll directory: " << ne);
}
#endif
engines_.erase(ne);
std::lock_guard<std::mutex> lock(engines_mutex_);
if (!IsEngineLoaded(ne)) {
return cpp::fail("Engine " + ne + " is not loaded yet!");
}
CTL_DBG("Unloaded engine " + ne);
EngineI* e = std::get<EngineI*>(engines_[ne].engine);
delete e;
engines_.erase(ne);
CTL_DBG("Engine unloaded: " + ne);
return {};
}

std::vector<EngineV> EngineService::GetLoadedEngines() {
{
std::lock_guard<std::mutex> lock(engines_mutex_);
std::vector<EngineV> loaded_engines;
for (const auto& [key, value] : engines_) {
loaded_engines.push_back(value.engine);
}
return loaded_engines;
std::lock_guard<std::mutex> lock(engines_mutex_);
std::vector<EngineV> loaded_engines;
for (const auto& [key, value] : engines_) {
loaded_engines.push_back(value.engine);
}
return loaded_engines;
}

cpp::result<github_release_utils::GitHubRelease, std::string>
Expand Down

0 comments on commit 5336334

Please sign in to comment.