update

janhq · Sep 5, 2024 · f29148b · f29148b
1 parent f3cb69a
commit f29148b
Show file tree

Hide file tree

Showing 9 changed files with 218 additions and 161 deletions.
diff --git a/engine/commands/engine_get_cmd.cc b/engine/commands/engine_get_cmd.cc
@@ -1,64 +1,28 @@
 #include "engine_get_cmd.h"
 #include <iostream>
 #include <tabulate/table.hpp>
-#include "utils/file_manager_utils.h"
+#include "services/engine_service.h"
 #include "utils/logging_utils.h"
 
 namespace commands {
 
 void EngineGetCmd::Exec() const {
-  CTL_INF("[EneingeGetCmd] engine: " << engine_);
+  CTL_INF("[EngineGetCmd] engine: " << engine_);
 
-  auto ecp = file_manager_utils::GetEnginesContainerPath();
-  std::string onnx_status{"not_supported"};
-  std::string llamacpp_status = std::filesystem::exists(ecp / "cortex.llamacpp")
-                                    ? "ready"
-                                    : "not_initialized";
-  std::string tensorrt_status{"not_supported"};
-
-#ifdef _WIN32
-  onnx_status = std::filesystem::exists(ecp / "cortex.onnx")
-                    ? "ready"
-                    : "not_initialized";
-  tensorrt_status = std::filesystem::exists(ecp / "cortex.tensorrt-llm")
-                        ? "ready"
-                        : "not_initialized";
-#elif defined(__linux__)
-  tensorrt_status = std::filesystem::exists(ecp / "cortex.tensorrt-llm")
-                        ? "ready"
-                        : "not_initialized";
-#endif
-  std::vector<EngineInfo> engines = {
-      {.name = "cortex.onnx",
-       .description = "This extension enables chat completion API calls using "
-                      "the Onnx engine",
-       .version = "0.0.1",
-       .product_name = "Onnx Inference Engine",
-       .status = onnx_status},
-      {.name = "cortex.llamacpp",
-       .description = "This extension enables chat completion API calls using "
-                      "the LlamaCPP engine",
-       .version = "0.0.1",
-       .product_name = "LlamaCPP Inference Engine",
-       .status = llamacpp_status},
-      {.name = "cortex.tensorrt-llm",
-       .description = "This extension enables chat completion API calls using "
-                      "the TensorrtLLM engine",
-       .version = "0.0.1",
-       .product_name = "TensorrtLLM Inference Engine",
-       .status = tensorrt_status},
-  };
-
-  tabulate::Table table;
-  table.add_row({"name", "description", "version", "product name", "status"});
-  table.format().font_color(tabulate::Color::green);
-  for (auto& engine : engines) {
-    if (engine.name == engine_) {
-      table.add_row({engine.name, engine.description, engine.version,
-                     engine.product_name, engine.status});
-    }
+  auto engine_service = EngineService();
+  try {
+    auto status = engine_service.GetEngineInfo(engine_);
+    tabulate::Table table;
+    table.add_row({"name", "description", "version", "product name", "status"});
+    table.format().font_color(tabulate::Color::green);
+    table.add_row({status.name, status.description, status.version,
+                   status.product_name, status.status});
+    std::cout << table << std::endl;
+  } catch (const std::runtime_error& e) {
+    std::cerr << "Engine " << engine_ << " is not supported!" << "\n";
+  } catch (const std::exception& e) {
+    std::cerr << "Failed to get engine info for " << engine_ << ": " << e.what()
+              << "\n";
   }
-
-  std::cout << table << std::endl;
 }
 };  // namespace commands
diff --git a/engine/commands/engine_get_cmd.h b/engine/commands/engine_get_cmd.h
@@ -1,16 +1,9 @@
 #pragma once
+
 #include <string>
 
 namespace commands {
 class EngineGetCmd {
-  struct EngineInfo {
-    std::string name;
-    std::string description;
-    std::string version;
-    std::string product_name;
-    std::string status;
-  };
-
  public:
   EngineGetCmd(const std::string& engine) : engine_{engine} {};
 
@@ -19,5 +12,4 @@ class EngineGetCmd {
  private:
   std::string engine_;
 };
-
 }  // namespace commands
diff --git a/engine/commands/engine_list_cmd.cc b/engine/commands/engine_list_cmd.cc
@@ -1,49 +1,23 @@
 #include "engine_list_cmd.h"
-#include <filesystem>
 #include <tabulate/table.hpp>
-#include "utils/file_manager_utils.h"
+#include "services/engine_service.h"
 
 namespace commands {
 
 bool EngineListCmd::Exec() {
-  auto ecp = file_manager_utils::GetEnginesContainerPath();
-  std::string onnx_status{"not_supported"};
-  std::string llamacpp_status = std::filesystem::exists(ecp / "cortex.llamacpp")
-                                    ? "ready"
-                                    : "not_initialized";
-  std::string tensorrt_status{"not_supported"};
-#ifdef _WIN32
-  onnx_status = std::filesystem::exists(ecp / "cortex.onnx")
-                    ? "ready"
-                    : "not_initialized";
-  tensorrt_status = std::filesystem::exists(ecp / "cortex.tensorrt-llm")
-                        ? "ready"
-                        : "not_initialized";
-#elif defined(__linux__)
-  tensorrt_status = std::filesystem::exists(ecp / "cortex.tensorrt-llm")
-                        ? "ready"
-                        : "not_initialized";
-#endif
+  auto engine_service = EngineService();
+  auto status_list = engine_service.GetEngineInfoList();
 
   tabulate::Table table;
-  table.add_row(
-      {"(Index)", "name", "description", "version", "product name", "status"});
   table.format().font_color(tabulate::Color::green);
   table.add_row(
-      {"1", "cortex.onnx",
-       "This extension enables chat completion API calls using the Onnx engine",
-       "0.0.1", "Onnx Inference Engine", onnx_status});
-
-  table.add_row({"2", "cortex.llamacpp",
-                 "This extension enables chat completion API calls using the "
-                 "LlamaCPP engine",
-                 "0.0.1", "LlamaCPP Inference Engine", llamacpp_status});
-
-  // tensorrt llm
-  table.add_row({"3", "cortex.tensorrt-llm",
-                 "This extension enables chat completion API calls using the "
-                 "TensorrtLLM engine",
-                 "0.0.1", "TensorrtLLM Inference Engine", tensorrt_status});
+      {"(Index)", "name", "description", "version", "product name", "status"});
+  for (int i = 0; i < status_list.size(); i++) {
+    auto status = status_list[i];
+    std::string index = std::to_string(i + 1);
+    table.add_row({index, status.name, status.description, status.version,
+                   status.product_name, status.status});
+  }
 
   for (int i = 0; i < 6; i++) {
     table[0][i]
@@ -62,5 +36,4 @@ bool EngineListCmd::Exec() {
   std::cout << table << std::endl;
   return true;
 }
-
 };  // namespace commands
diff --git a/engine/controllers/command_line_parser.cc b/engine/controllers/command_line_parser.cc
@@ -13,6 +13,7 @@
 #include "commands/run_cmd.h"
 #include "commands/server_stop_cmd.h"
 #include "config/yaml_config.h"
+#include "services/engine_service.h"
 #include "utils/cortex_utils.h"
 #include "utils/logging_utils.h"
 
@@ -185,11 +186,13 @@ void CommandLineParser::EngineManagement(CLI::App* parent,
 
 void CommandLineParser::EngineGet(CLI::App* parent) {
   auto get_cmd = parent->add_subcommand("get", "Get an engine info");
+  auto engine_service = EngineService();
 
-  for (auto& engine : supportedEngines_) {
+  for (auto& engine : engine_service.kSupportEngines) {
     std::string engine_name{engine};
     std::string desc = "Get " + engine_name + " status";
-    auto engine_get_cmd = get_cmd->add_subcommand(engine, desc);
+
+    auto engine_get_cmd = get_cmd->add_subcommand(engine_name, desc);
     engine_get_cmd->callback([engine_name] {
       commands::EngineGetCmd cmd(engine_name);
       cmd.Exec();

diff --git a/engine/controllers/command_line_parser.h b/engine/controllers/command_line_parser.h
@@ -1,6 +1,5 @@
 #pragma once
 
-#include <array>
 #include "CLI/CLI.hpp"
 
 class CommandLineParser {
@@ -15,8 +14,4 @@ class CommandLineParser {
   void EngineGet(CLI::App* parent);
 
   CLI::App app_;
-
-  // TODO: move this one to somewhere else
-  static constexpr std::array<const char*, 3> supportedEngines_ = {
-      "cortex.llamacpp", "cortex.onnx", "cortex.tensorrt-llm"};
 };
diff --git a/engine/controllers/engines.cc b/engine/controllers/engines.cc
@@ -1,11 +1,17 @@
 #include "engines.h"
+#include <filesystem>
+#include <sstream>
+#include <stdexcept>
+#include <utility>
+#include "services/engine_service.h"
 #include "utils/archive_utils.h"
 #include "utils/cortex_utils.h"
 #include "utils/system_info_utils.h"
 
-void Engines::InitEngine(const HttpRequestPtr& req,
-                         std::function<void(const HttpResponsePtr&)>&& callback,
-                         const std::string& engine) const {
+void Engines::InstallEngine(
+    const HttpRequestPtr& req,
+    std::function<void(const HttpResponsePtr&)>&& callback,
+    const std::string& engine) const {
   LOG_DEBUG << "InitEngine, Engine: " << engine;
   if (engine.empty()) {
     Json::Value res;
@@ -114,62 +120,84 @@ void Engines::InitEngine(const HttpRequestPtr& req,
 void Engines::ListEngine(
     const HttpRequestPtr& req,
     std::function<void(const HttpResponsePtr&)>&& callback) const {
+  auto engine_service = EngineService();
+  auto status_list = engine_service.GetEngineInfoList();
+
   Json::Value ret;
   ret["object"] = "list";
   Json::Value data(Json::arrayValue);
-  Json::Value obj_onnx, obj_llamacpp, obj_tensorrt;
-  obj_onnx["name"] = "cortex.onnx";
-  obj_onnx["description"] =
-      "This extension enables chat completion API calls using the Onnx engine";
-  obj_onnx["version"] = "0.0.1";
-  obj_onnx["productName"] = "Onnx Inference Engine";
-
-  obj_llamacpp["name"] = "cortex.llamacpp";
-  obj_llamacpp["description"] =
-      "This extension enables chat completion API calls using the LlamaCPP "
-      "engine";
-  obj_llamacpp["version"] = "0.0.1";
-  obj_llamacpp["productName"] = "LlamaCPP Inference Engine";
-
-  obj_tensorrt["name"] = "cortex.tensorrt-llm";
-  obj_tensorrt["description"] =
-      "This extension enables chat completion API calls using the TensorrtLLM "
-      "engine";
-  obj_tensorrt["version"] = "0.0.1";
-  obj_tensorrt["productName"] = "TensorrtLLM Inference Engine";
-
-#ifdef _WIN32
-  if (std::filesystem::exists(std::filesystem::current_path().string() +
-                              cortex_utils::kOnnxLibPath)) {
-    obj_onnx["status"] = "ready";
-  } else {
-    obj_onnx["status"] = "not_initialized";
-  }
-#else
-  obj_onnx["status"] = "not_supported";
-#endif
-  // lllamacpp
-  if (std::filesystem::exists(std::filesystem::current_path().string() +
-                              cortex_utils::kLlamaLibPath)) {
-
-    obj_llamacpp["status"] = "ready";
-  } else {
-    obj_llamacpp["status"] = "not_initialized";
-  }
-  // tensorrt llm
-  if (std::filesystem::exists(std::filesystem::current_path().string() +
-                              cortex_utils::kTensorrtLlmPath)) {
-    obj_tensorrt["status"] = "ready";
-  } else {
-    obj_tensorrt["status"] = "not_initialized";
+  for (auto& status : status_list) {
+    Json::Value ret;
+    ret["name"] = status.name;
+    ret["description"] = status.description;
+    ret["version"] = status.version;
+    ret["productName"] = status.product_name;
+    ret["status"] = status.status;
+
+    data.append(std::move(ret));
   }
 
-  data.append(std::move(obj_onnx));
-  data.append(std::move(obj_llamacpp));
-  data.append(std::move(obj_tensorrt));
   ret["data"] = data;
   ret["result"] = "OK";
   auto resp = cortex_utils::CreateCortexHttpJsonResponse(ret);
   resp->setStatusCode(k200OK);
   callback(resp);
 }
+
+void Engines::GetEngine(const HttpRequestPtr& req,
+                        std::function<void(const HttpResponsePtr&)>&& callback,
+                        const std::string& engine) const {
+  auto engine_service = EngineService();
+  try {
+    auto status = engine_service.GetEngineInfo(engine);
+    Json::Value ret;
+    ret["name"] = status.name;
+    ret["description"] = status.description;
+    ret["version"] = status.version;
+    ret["productName"] = status.product_name;
+    ret["status"] = status.status;
+
+    auto resp = cortex_utils::CreateCortexHttpJsonResponse(ret);
+    resp->setStatusCode(k200OK);
+    callback(resp);
+  } catch (const std::runtime_error e) {
+    Json::Value ret;
+    ret["message"] = e.what();
+    auto resp = cortex_utils::CreateCortexHttpJsonResponse(ret);
+    resp->setStatusCode(k400BadRequest);
+    callback(resp);
+  } catch (const std::exception& e) {
+    Json::Value ret;
+    ret["message"] = e.what();
+    auto resp = cortex_utils::CreateCortexHttpJsonResponse(ret);
+    resp->setStatusCode(k500InternalServerError);
+    callback(resp);
+  }
+}
+
+void Engines::UninstallEngine(
+    const HttpRequestPtr& req,
+    std::function<void(const HttpResponsePtr&)>&& callback,
+    const std::string& engine) const {
+  LOG_INFO << "[Http] Uninstall engine " << engine;
+  // TODO: think of a way to prevent code duplication. This should be shared with cmd as well
+
+  // TODO: Unload the model which is currently running on engine_
+
+  // TODO: Unload engine if is loaded
+
+  // auto ecp = file_manager_utils::GetEnginesContainerPath();
+  // auto engine_path = ecp / engine;
+  // if (!std::filesystem::exists(engine_path)) {
+  //   ("Engine " << engine_ << " is not installed!");
+  //   return;
+  // }
+  //
+  // // remove
+  // try {
+  //   std::filesystem::remove_all(engine_path);
+  //   CTL_INF("Engine " << engine_ << " uninstalled successfully!");
+  // } catch (const std::exception& e) {
+  //   CTL_ERR("Failed to uninstall engine " << engine_ + ": " << e.what());
+  // }
+}