Skip to content

Commit

Permalink
feat: update engine interface
Browse files Browse the repository at this point in the history
  • Loading branch information
namchuai committed Dec 2, 2024
1 parent 1641500 commit 4f97bd4
Show file tree
Hide file tree
Showing 6 changed files with 83 additions and 144 deletions.
5 changes: 2 additions & 3 deletions engine/controllers/engines.cc
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,9 @@ std::string NormalizeEngine(const std::string& engine) {
void Engines::ListEngine(
const HttpRequestPtr& req,
std::function<void(const HttpResponsePtr&)>&& callback) const {
std::vector<std::string> supported_engines{kLlamaEngine, kOnnxEngine,
kTrtLlmEngine};
Json::Value ret;
for (const auto& engine : supported_engines) {
auto engine_names = engine_service_->GetSupportedEngineNames().value();
for (const auto& engine : engine_names) {
auto installed_engines =
engine_service_->GetInstalledEngineVariants(engine);
if (installed_engines.has_error()) {
Expand Down
10 changes: 10 additions & 0 deletions engine/cortex-common/EngineI.h
Original file line number Diff line number Diff line change
@@ -1,14 +1,24 @@
#pragma once

#include <filesystem>
#include <functional>
#include <memory>

#include "json/value.h"
#include "trantor/utils/Logger.h"
class EngineI {
public:
struct EngineLoadOption {
std::filesystem::path engine_path;
std::filesystem::path
cuda_path; // TODO: make this more generic. Here just to test for now
bool custom_engine_path;
};

virtual ~EngineI() {}

virtual void Load(EngineLoadOption opts) = 0;

// cortex.llamacpp interface
virtual void HandleChatCompletion(
std::shared_ptr<Json::Value> json_body,
Expand Down
198 changes: 58 additions & 140 deletions engine/services/engine_service.cc
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,7 @@ cpp::result<bool, std::string> EngineService::UninstallEngineVariant(
const std::string& engine, const std::optional<std::string> version,
const std::optional<std::string> variant) {
auto ne = NormalizeEngine(engine);
std::lock_guard<std::mutex> lock(engines_mutex_);
if (IsEngineLoaded(ne)) {
CTL_INF("Engine " << ne << " is already loaded, unloading it");
auto unload_res = UnloadEngine(ne);
Expand Down Expand Up @@ -272,6 +273,7 @@ cpp::result<void, std::string> EngineService::DownloadEngine(
if (selected_variant == std::nullopt) {
return cpp::fail("Failed to find a suitable variant for " + engine);
}
std::lock_guard<std::mutex> lock(engines_mutex_);
if (IsEngineLoaded(engine)) {
CTL_INF("Engine " << engine << " is already loaded, unloading it");
auto unload_res = UnloadEngine(engine);
Expand Down Expand Up @@ -503,6 +505,7 @@ EngineService::SetDefaultEngineVariant(const std::string& engine,
" is not installed yet!");
}

std::lock_guard<std::mutex> lock(engines_mutex_);
if (IsEngineLoaded(ne)) {
CTL_INF("Engine " << ne << " is already loaded, unloading it");
auto unload_res = UnloadEngine(ne);
Expand Down Expand Up @@ -631,7 +634,6 @@ EngineService::GetInstalledEngineVariants(const std::string& engine) const {
}

bool EngineService::IsEngineLoaded(const std::string& engine) {
std::lock_guard<std::mutex> lock(engines_mutex_);
auto ne = NormalizeEngine(engine);
return engines_.find(ne) != engines_.end();
}
Expand All @@ -651,6 +653,7 @@ cpp::result<void, std::string> EngineService::LoadEngine(
const std::string& engine_name) {
auto ne = NormalizeEngine(engine_name);

std::lock_guard<std::mutex> lock(engines_mutex_);
if (IsEngineLoaded(ne)) {
CTL_INF("Engine " << ne << " is already loaded");
return {};
Expand All @@ -672,6 +675,7 @@ cpp::result<void, std::string> EngineService::LoadEngine(
auto user_defined_engine_path = getenv("ENGINE_PATH");
#endif

auto custom_engine_path = user_defined_engine_path != nullptr;
CTL_DBG("user defined engine path: " << user_defined_engine_path);
const std::filesystem::path engine_dir_path = [&] {
if (user_defined_engine_path != nullptr) {
Expand All @@ -685,8 +689,6 @@ cpp::result<void, std::string> EngineService::LoadEngine(
}
}();

CTL_DBG("Engine path: " << engine_dir_path.string());

if (!std::filesystem::exists(engine_dir_path)) {
CTL_ERR("Directory " + engine_dir_path.string() + " is not exist!");
return cpp::fail("Directory " + engine_dir_path.string() +
Expand All @@ -696,164 +698,74 @@ cpp::result<void, std::string> EngineService::LoadEngine(
CTL_INF("Engine path: " << engine_dir_path.string());

try {
#if defined(_WIN32)
// TODO(?) If we only allow to load an engine at a time, the logic is simpler.
// We would like to support running multiple engines at the same time. Therefore,
// the adding/removing dll directory logic is quite complicated:
// 1. If llamacpp is loaded and new requested engine is tensorrt-llm:
// Unload the llamacpp dll directory then load the tensorrt-llm
// 2. If tensorrt-llm is loaded and new requested engine is llamacpp:
// Do nothing, llamacpp can re-use tensorrt-llm dependencies (need to be tested careful)
// 3. Add dll directory if met other conditions

auto add_dll = [this](const std::string& e_type,
const std::filesystem::path& p) {
if (auto cookie = AddDllDirectory(p.c_str()); cookie != 0) {
CTL_DBG("Added dll directory: " << p.string());
engines_[e_type].cookie = cookie;
} else {
CTL_WRN("Could not add dll directory: " << p.string());
}

auto cuda_path = file_manager_utils::GetCudaToolkitPath(e_type);
if (auto cuda_cookie = AddDllDirectory(cuda_path.c_str());
cuda_cookie != 0) {
CTL_DBG("Added cuda dll directory: " << p.string());
engines_[e_type].cuda_cookie = cuda_cookie;
} else {
CTL_WRN("Could not add cuda dll directory: " << p.string());
}
auto dylib =
std::make_unique<cortex_cpp::dylib>(engine_dir_path.string(), "engine");

// init
auto func = dylib->get_function<EngineI*()>("get_engine");
auto engine_obj = func();
auto load_opts = EngineI::EngineLoadOption{
.engine_path = engine_dir_path,
.cuda_path = file_manager_utils::GetCudaToolkitPath(ne),
.custom_engine_path = custom_engine_path,
};
engine_obj->Load(load_opts);

#if defined(_WIN32)
if (bool should_use_dll_search_path = !(_wgetenv(L"ENGINE_PATH"));
#else
if (bool should_use_dll_search_path = !(getenv("ENGINE_PATH"));
#endif
should_use_dll_search_path) {
if (IsEngineLoaded(kLlamaRepo) && ne == kTrtLlmRepo &&
should_use_dll_search_path) {

{
std::lock_guard<std::mutex> lock(engines_mutex_);
// Remove llamacpp dll directory
if (!RemoveDllDirectory(engines_[kLlamaRepo].cookie)) {
CTL_WRN("Could not remove dll directory: " << kLlamaRepo);
} else {
CTL_DBG("Removed dll directory: " << kLlamaRepo);
}
if (!RemoveDllDirectory(engines_[kLlamaRepo].cuda_cookie)) {
CTL_WRN("Could not remove cuda dll directory: " << kLlamaRepo);
} else {
CTL_DBG("Removed cuda dll directory: " << kLlamaRepo);
}
}

add_dll(ne, engine_dir_path);
} else if (IsEngineLoaded(kTrtLlmRepo) && ne == kLlamaRepo) {
// Do nothing
} else {
add_dll(ne, engine_dir_path);
}
}
#endif
{
std::lock_guard<std::mutex> lock(engines_mutex_);
engines_[ne].dl = std::make_unique<cortex_cpp::dylib>(
engine_dir_path.string(), "engine");
}
#if defined(__linux__)
const char* name = "LD_LIBRARY_PATH";
auto data = getenv(name);
std::string v;
if (auto g = getenv(name); g) {
v += g;
}
CTL_INF("LD_LIBRARY_PATH: " << v);
auto llamacpp_path = file_manager_utils::GetCudaToolkitPath(kLlamaRepo);
CTL_INF("llamacpp_path: " << llamacpp_path);
// tensorrt is not supported for now
// auto trt_path = file_manager_utils::GetCudaToolkitPath(kTrtLlmRepo);

auto new_v = llamacpp_path.string() + ":" + v;
setenv(name, new_v.c_str(), true);
CTL_INF("LD_LIBRARY_PATH: " << getenv(name));
#endif
engines_[ne].engine = engine_obj;
engines_[ne].dl = std::move(dylib);

CTL_DBG("Engine loaded: "
<< ne); // TODO: output more information like version and variant
} catch (const cortex_cpp::dylib::load_error& e) {
CTL_ERR("Could not load engine: " << e.what());
{
std::lock_guard<std::mutex> lock(engines_mutex_);
engines_.erase(ne);
}
engines_.erase(ne);
return cpp::fail("Could not load engine " + ne + ": " + e.what());
}

{
std::lock_guard<std::mutex> lock(engines_mutex_);
auto func = engines_[ne].dl->get_function<EngineI*()>("get_engine");
engines_[ne].engine = func();

auto& en = std::get<EngineI*>(engines_[ne].engine);
if (ne == kLlamaRepo) { //fix for llamacpp engine first
auto config = file_manager_utils::GetCortexConfig();
if (en->IsSupported("SetFileLogger")) {
en->SetFileLogger(config.maxLogLines,
(std::filesystem::path(config.logFolderPath) /
std::filesystem::path(config.logLlamaCppPath))
.string());
} else {
CTL_WRN("Method SetFileLogger is not supported yet");
}
if (en->IsSupported("SetLogLevel")) {
en->SetLogLevel(logging_utils_helper::global_log_level);
} else {
CTL_WRN("Method SetLogLevel is not supported yet");
}
}
CTL_DBG("loaded engine: " << ne);
}
// TODO: namh recheck this if can be moved to cortex.llamacpp
// if (ne == kLlamaRepo) { //fix for llamacpp engine first
// auto config = file_manager_utils::GetCortexConfig();
// if (en->IsSupported("SetFileLogger")) {
// en->SetFileLogger(config.maxLogLines,
// (std::filesystem::path(config.logFolderPath) /
// std::filesystem::path(config.logLlamaCppPath))
// .string());
// } else {
// CTL_WRN("Method SetFileLogger is not supported yet");
// }
// if (en->IsSupported("SetLogLevel")) {
// en->SetLogLevel(logging_utils_helper::global_log_level);
// } else {
// CTL_WRN("Method SetLogLevel is not supported yet");
// }
// }

return {};
}

cpp::result<void, std::string> EngineService::UnloadEngine(
const std::string& engine) {
auto ne = NormalizeEngine(engine);
{
std::lock_guard<std::mutex> lock(engines_mutex_);
if (!IsEngineLoaded(ne)) {
return cpp::fail("Engine " + ne + " is not loaded yet!");
}
EngineI* e = std::get<EngineI*>(engines_[ne].engine);
delete e;
LOG_INFO << "Unloading engine " << ne;

#if defined(_WIN32)
if (!RemoveDllDirectory(engines_[ne].cookie)) {
CTL_WRN("Could not remove dll directory: " << ne);
} else {
CTL_DBG("Removed dll directory: " << ne);
}
if (!RemoveDllDirectory(engines_[ne].cuda_cookie)) {
CTL_WRN("Could not remove cuda dll directory: " << ne);
} else {
CTL_DBG("Removed cuda dll directory: " << ne);
}
#endif
engines_.erase(ne);
std::lock_guard<std::mutex> lock(engines_mutex_);
if (!IsEngineLoaded(ne)) {
return cpp::fail("Engine " + ne + " is not loaded yet!");
}
CTL_DBG("Unloaded engine " + ne);
EngineI* e = std::get<EngineI*>(engines_[ne].engine);
delete e;
engines_.erase(ne);
CTL_DBG("Engine unloaded: " + ne);
return {};
}

std::vector<EngineV> EngineService::GetLoadedEngines() {
{
std::lock_guard<std::mutex> lock(engines_mutex_);
std::vector<EngineV> loaded_engines;
for (const auto& [key, value] : engines_) {
loaded_engines.push_back(value.engine);
}
return loaded_engines;
std::lock_guard<std::mutex> lock(engines_mutex_);
std::vector<EngineV> loaded_engines;
for (const auto& [key, value] : engines_) {
loaded_engines.push_back(value.engine);
}
return loaded_engines;
}

cpp::result<github_release_utils::GitHubRelease, std::string>
Expand Down Expand Up @@ -899,6 +811,7 @@ cpp::result<EngineUpdateResult, std::string> EngineService::UpdateEngine(
CTL_INF("Default variant: " << default_variant->variant
<< ", version: " + default_variant->version);

std::lock_guard<std::mutex> lock(engines_mutex_);
if (IsEngineLoaded(ne)) {
CTL_INF("Engine " << ne << " is already loaded, unloading it");
auto unload_res = UnloadEngine(ne);
Expand Down Expand Up @@ -955,3 +868,8 @@ cpp::result<EngineUpdateResult, std::string> EngineService::UpdateEngine(
.from = default_variant->version,
.to = latest_version->tag_name};
}

cpp::result<std::vector<std::string>, std::string>
EngineService::GetSupportedEngineNames() {
return file_manager_utils::GetCortexConfig().supportedEngines;
}
2 changes: 2 additions & 0 deletions engine/services/engine_service.h
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,8 @@ class EngineService : public EngineServiceI {
cpp::result<EngineUpdateResult, std::string> UpdateEngine(
const std::string& engine);

cpp::result<std::vector<std::string>, std::string> GetSupportedEngineNames();

private:
cpp::result<void, std::string> DownloadEngine(
const std::string& engine, const std::string& version = "latest",
Expand Down
11 changes: 10 additions & 1 deletion engine/utils/config_yaml_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#include <iostream>
#include <mutex>
#include <string>
#include "utils/engine_constants.h"
#include "utils/logging_utils.h"
#include "utils/result.hpp"
#include "yaml-cpp/yaml.h"
Expand All @@ -22,6 +23,8 @@ constexpr const auto kDefaultCorsEnabled = true;
const std::vector<std::string> kDefaultEnabledOrigins{
"http://localhost:39281", "http://127.0.0.1:39281", "http://0.0.0.0:39281"};
constexpr const auto kDefaultNoProxy = "example.com,::1,localhost,127.0.0.1";
const std::vector<std::string> kDefaultSupportedEngines{
kLlamaEngine, kOnnxEngine, kTrtLlmEngine};

struct CortexConfig {
std::string logFolderPath;
Expand Down Expand Up @@ -59,6 +62,7 @@ struct CortexConfig {

bool verifyPeerSsl;
bool verifyHostSsl;
std::vector<std::string> supportedEngines;
};

class CortexConfigMgr {
Expand Down Expand Up @@ -117,6 +121,7 @@ class CortexConfigMgr {
node["noProxy"] = config.noProxy;
node["verifyPeerSsl"] = config.verifyPeerSsl;
node["verifyHostSsl"] = config.verifyHostSsl;
node["supportedEngines"] = config.supportedEngines;

out_file << node;
out_file.close();
Expand Down Expand Up @@ -151,7 +156,7 @@ class CortexConfigMgr {
!node["proxyUsername"] || !node["proxyPassword"] ||
!node["verifyPeerSsl"] || !node["verifyHostSsl"] ||
!node["verifyProxySsl"] || !node["verifyProxyHostSsl"] ||
!node["noProxy"]);
!node["noProxy"] || !node["supportedEngines"]);

CortexConfig config = {
.logFolderPath = node["logFolderPath"]
Expand Down Expand Up @@ -235,6 +240,10 @@ class CortexConfigMgr {
.verifyHostSsl = node["verifyHostSsl"]
? node["verifyHostSsl"].as<bool>()
: default_cfg.verifyHostSsl,
.supportedEngines =
node["supportedEngines"]
? node["supportedEngines"].as<std::vector<std::string>>()
: default_cfg.supportedEngines,
};
if (should_update_config) {
l.unlock();
Expand Down
1 change: 1 addition & 0 deletions engine/utils/file_manager_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,7 @@ inline config_yaml_utils::CortexConfig GetDefaultConfig() {
.noProxy = config_yaml_utils::kDefaultNoProxy,
.verifyPeerSsl = true,
.verifyHostSsl = true,
.supportedEngines = config_yaml_utils::kDefaultSupportedEngines,
};
}

Expand Down

0 comments on commit 4f97bd4

Please sign in to comment.