Skip to content

Commit

Permalink
feat: cortex.onnx (#660)
Browse files Browse the repository at this point in the history
  • Loading branch information
vansangpfiev authored Jun 14, 2024
1 parent 5b7e6dc commit fa82078
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 8 deletions.
29 changes: 21 additions & 8 deletions cortex-cpp/controllers/server.cc
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ namespace inferences {
namespace {
constexpr static auto kLlamaEngine = "cortex.llamacpp";
constexpr static auto kPythonRuntimeEngine = "cortex.python";
constexpr static auto kOnnxEngine = "cortex.onnx";
} // namespace

server::server(){
Expand All @@ -32,7 +33,7 @@ void server::ChatCompletion(
const HttpRequestPtr& req,
std::function<void(const HttpResponsePtr&)>&& callback) {
auto engine_type =
(*(req->getJsonObject())).get("engine", kLlamaEngine).asString();
(*(req->getJsonObject())).get("engine", cur_engine_type_).asString();
if (!IsEngineLoaded(engine_type)) {
Json::Value res;
res["message"] = "Engine is not loaded yet";
Expand Down Expand Up @@ -91,7 +92,7 @@ void server::UnloadModel(
const HttpRequestPtr& req,
std::function<void(const HttpResponsePtr&)>&& callback) {
auto engine_type =
(*(req->getJsonObject())).get("engine", kLlamaEngine).asString();
(*(req->getJsonObject())).get("engine", cur_engine_type_).asString();
if (!IsEngineLoaded(engine_type)) {
Json::Value res;
res["message"] = "Engine is not loaded yet";
Expand All @@ -118,7 +119,7 @@ void server::ModelStatus(
const HttpRequestPtr& req,
std::function<void(const HttpResponsePtr&)>&& callback) {
auto engine_type =
(*(req->getJsonObject())).get("engine", kLlamaEngine).asString();
(*(req->getJsonObject())).get("engine", cur_engine_type_).asString();
if (!IsEngineLoaded(engine_type)) {
Json::Value res;
res["message"] = "Engine is not loaded yet";
Expand All @@ -144,8 +145,7 @@ void server::ModelStatus(

void server::GetModels(const HttpRequestPtr& req,
std::function<void(const HttpResponsePtr&)>&& callback) {
// TODO(sang) need to change this when we support Tensorrt-llm
if (!IsEngineLoaded(kLlamaEngine)) {
if (!IsEngineLoaded(cur_engine_type_)) {
Json::Value res;
res["message"] = "Engine is not loaded yet";
auto resp = cortex_utils::nitroHttpJsonResponse(res);
Expand All @@ -156,7 +156,7 @@ void server::GetModels(const HttpRequestPtr& req,
}

LOG_TRACE << "Start to get models";
auto& en = std::get<EngineI*>(engines_[kLlamaEngine].engine);
auto& en = std::get<EngineI*>(engines_[cur_engine_type_].engine);
if (en->IsSupported("GetModels")) {
en->GetModels(
req->getJsonObject(),
Expand Down Expand Up @@ -257,11 +257,13 @@ void server::LoadModel(const HttpRequestPtr& req,

// We have not loaded engine yet, should load it before using it
if (engines_.find(engine_type) == engines_.end()) {
// TODO(sang) we cannot run cortex.llamacpp and cortex.tensorrt-llm at the same time.
// So need an unload engine machanism to handle.
// We only use single engine so unload all engines before load new engine
UnloadEngines();
auto get_engine_path = [](std::string_view e) {
if (e == kLlamaEngine) {
return cortex_utils::kLlamaLibPath;
} else if(e == kOnnxEngine) {
return cortex_utils::kOnnxLibPath;
}
return cortex_utils::kLlamaLibPath;
};
Expand Down Expand Up @@ -292,6 +294,7 @@ void server::LoadModel(const HttpRequestPtr& req,
callback(resp);
return;
}
cur_engine_type_ = engine_type;

auto func =
engines_[engine_type].dl->get_function<EngineI*()>("get_engine");
Expand Down Expand Up @@ -358,4 +361,14 @@ bool server::IsEngineLoaded(const std::string& e) {
return engines_.find(e) != engines_.end();
}

void server::UnloadEngines() {
// We unload all engines except python engine
for (auto it = engines_.begin(); it != engines_.end();) {
if (it->first != kPythonRuntimeEngine) {
it = engines_.erase(it);
} else
it++;
}
}

} // namespace inferences
3 changes: 3 additions & 0 deletions cortex-cpp/controllers/server.h
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,8 @@ class server : public drogon::HttpController<server>,
SyncQueue& q);
bool IsEngineLoaded(const std::string& e);

void UnloadEngines();

private:
struct SyncQueue {
void push(std::pair<Json::Value, Json::Value>&& p) {
Expand Down Expand Up @@ -145,5 +147,6 @@ class server : public drogon::HttpController<server>,
EngineV engine;
};
std::unordered_map<std::string, EngineInfo> engines_;
std::string cur_engine_type_;
};
}; // namespace inferences
1 change: 1 addition & 0 deletions cortex-cpp/utils/cortex_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
namespace cortex_utils {
constexpr static auto kLlamaLibPath = "/engines/cortex.llamacpp";
constexpr static auto kPythonRuntimeLibPath = "/engines/cortex.python";
constexpr static auto kOnnxLibPath = "/engines/cortex.onnx";

inline std::string models_folder = "./models";

Expand Down

0 comments on commit fa82078

Please sign in to comment.