update

janhq · Sep 5, 2024 · c08df8a · c08df8a
1 parent 5715cb0
commit c08df8a
Show file tree

Hide file tree

Showing 10 changed files with 123 additions and 34 deletions.
diff --git a/engine/commands/engine_get_cmd.cc b/engine/commands/engine_get_cmd.cc
@@ -1,6 +1,64 @@
 #include "engine_get_cmd.h"
+#include <iostream>
+#include <tabulate/table.hpp>
+#include "utils/file_manager_utils.h"
+#include "utils/logging_utils.h"
 
 namespace commands {
 
-void EngineGetCmd::Exec() const {}
+void EngineGetCmd::Exec() const {
+  CTL_INF("[EneingeGetCmd] engine: " << engine_);
+
+  auto ecp = file_manager_utils::GetEnginesContainerPath();
+  std::string onnx_status{"not_supported"};
+  std::string llamacpp_status = std::filesystem::exists(ecp / "cortex.llamacpp")
+                                    ? "ready"
+                                    : "not_initialized";
+  std::string tensorrt_status{"not_supported"};
+
+#ifdef _WIN32
+  onnx_status = std::filesystem::exists(ecp / "cortex.onnx")
+                    ? "ready"
+                    : "not_initialized";
+  tensorrt_status = std::filesystem::exists(ecp / "cortex.tensorrt-llm")
+                        ? "ready"
+                        : "not_initialized";
+#elif defined(__linux__)
+  tensorrt_status = std::filesystem::exists(ecp / "cortex.tensorrt-llm")
+                        ? "ready"
+                        : "not_initialized";
+#endif
+  std::vector<EngineInfo> engines = {
+      {.name = "cortex.onnx",
+       .description = "This extension enables chat completion API calls using "
+                      "the Onnx engine",
+       .version = "0.0.1",
+       .product_name = "Onnx Inference Engine",
+       .status = onnx_status},
+      {.name = "cortex.llamacpp",
+       .description = "This extension enables chat completion API calls using "
+                      "the LlamaCPP engine",
+       .version = "0.0.1",
+       .product_name = "LlamaCPP Inference Engine",
+       .status = llamacpp_status},
+      {.name = "cortex.tensorrt-llm",
+       .description = "This extension enables chat completion API calls using "
+                      "the TensorrtLLM engine",
+       .version = "0.0.1",
+       .product_name = "TensorrtLLM Inference Engine",
+       .status = tensorrt_status},
+  };
+
+  tabulate::Table table;
+  table.add_row({"name", "description", "version", "product name", "status"});
+  table.format().font_color(tabulate::Color::green);
+  for (auto& engine : engines) {
+    if (engine.name == engine_) {
+      table.add_row({engine.name, engine.description, engine.version,
+                     engine.product_name, engine.status});
+    }
+  }
+
+  std::cout << table << std::endl;
+}
 };  // namespace commands
diff --git a/engine/commands/engine_get_cmd.h b/engine/commands/engine_get_cmd.h
@@ -1,9 +1,23 @@
 #pragma once
+#include <string>
 
 namespace commands {
 class EngineGetCmd {
+  struct EngineInfo {
+    std::string name;
+    std::string description;
+    std::string version;
+    std::string product_name;
+    std::string status;
+  };
+
  public:
+  EngineGetCmd(const std::string& engine) : engine_{engine} {};
+
   void Exec() const;
+
+ private:
+  std::string engine_;
 };
 
 }  // namespace commands
diff --git a/engine/commands/engine_list_cmd.cc b/engine/commands/engine_list_cmd.cc
@@ -1,6 +1,3 @@
-// clang-format off
-#include "utils/cortex_utils.h"
-// clang-format on
 #include "engine_list_cmd.h"
 #include <filesystem>
 #include <tabulate/table.hpp>
@@ -14,16 +11,16 @@ bool EngineListCmd::Exec() {
   std::string llamacpp_status = std::filesystem::exists(ecp / "cortex.llamacpp")
                                     ? "ready"
                                     : "not_initialized";
-  std::string tenssorrt_status{"not_supported"};
+  std::string tensorrt_status{"not_supported"};
 #ifdef _WIN32
   onnx_status = std::filesystem::exists(ecp / "cortex.onnx")
                     ? "ready"
                     : "not_initialized";
-  tenssort_status = std::filesystem::exists(ecp / "cortex.tensorrt-llm")
+  tensorrt_status = std::filesystem::exists(ecp / "cortex.tensorrt-llm")
                         ? "ready"
                         : "not_initialized";
-#elif __linux__
-  tenssort_status = std::filesystem::exists(ecp / "cortex.tensorrt-llm")
+#elif defined(__linux__)
+  tensorrt_status = std::filesystem::exists(ecp / "cortex.tensorrt-llm")
                         ? "ready"
                         : "not_initialized";
 #endif
@@ -46,7 +43,7 @@ bool EngineListCmd::Exec() {
   table.add_row({"3", "cortex.tensorrt-llm",
                  "This extension enables chat completion API calls using the "
                  "TensorrtLLM engine",
-                 "0.0.1", "TensorrtLLM Inference Engine", tenssorrt_status});
+                 "0.0.1", "TensorrtLLM Inference Engine", tensorrt_status});
 
   for (int i = 0; i < 6; i++) {
     table[0][i]

diff --git a/engine/controllers/command_line_parser.cc b/engine/controllers/command_line_parser.cc
@@ -1,6 +1,7 @@
 #include "command_line_parser.h"
 #include "commands/chat_cmd.h"
 #include "commands/cmd_info.h"
+#include "commands/engine_get_cmd.h"
 #include "commands/engine_init_cmd.h"
 #include "commands/engine_list_cmd.h"
 #include "commands/engine_uninstall_cmd.h"
@@ -125,11 +126,11 @@ bool CommandLineParser::SetupCommand(int argc, char** argv) {
       command.Exec();
     });
 
-    auto get_engine_cmd = engines_cmd->add_subcommand("get", "Get an engine");
+    EngineManagement(engines_cmd, "cortex.llamacpp", version);
+    EngineManagement(engines_cmd, "cortex.onnx", version);
+    EngineManagement(engines_cmd, "cortex.tensorrt-llm", version);
 
-    EngineInstall(engines_cmd, "cortex.llamacpp", version);
-    EngineInstall(engines_cmd, "cortex.onnx", version);
-    EngineInstall(engines_cmd, "cortex.tensorrt-llm", version);
+    EngineGet(engines_cmd);
   }
 
   {
@@ -158,9 +159,9 @@ bool CommandLineParser::SetupCommand(int argc, char** argv) {
   return true;
 }
 
-void CommandLineParser::EngineInstall(CLI::App* parent,
-                                      const std::string& engine_name,
-                                      std::string& version) {
+void CommandLineParser::EngineManagement(CLI::App* parent,
+                                         const std::string& engine_name,
+                                         std::string& version) {
   auto engine_cmd =
       parent->add_subcommand(engine_name, "Manage " + engine_name + " engine");
 
@@ -181,3 +182,17 @@ void CommandLineParser::EngineInstall(CLI::App* parent,
     cmd.Exec();
   });
 }
+
+void CommandLineParser::EngineGet(CLI::App* parent) {
+  auto get_cmd = parent->add_subcommand("get", "Get an engine info");
+
+  for (auto& engine : supportedEngines_) {
+    std::string engine_name{engine};
+    std::string desc = "Get " + engine_name + " status";
+    auto engine_get_cmd = get_cmd->add_subcommand(engine, desc);
+    engine_get_cmd->callback([engine_name] {
+      commands::EngineGetCmd cmd(engine_name);
+      cmd.Exec();
+    });
+  }
+}
diff --git a/engine/controllers/command_line_parser.h b/engine/controllers/command_line_parser.h
@@ -8,8 +8,14 @@ class CommandLineParser {
   bool SetupCommand(int argc, char** argv);
 
  private:
-  void EngineInstall(CLI::App* parent, const std::string& engine_name,
-                     std::string& version);
+  void EngineManagement(CLI::App* parent, const std::string& engine_name,
+                        std::string& version);
+
+  void EngineGet(CLI::App* parent);
 
   CLI::App app_;
+
+  // TODO: move this one to somewhere else
+  static constexpr std::array<const char*, 3> supportedEngines_ = {
+      "cortex.llamacpp", "cortex.onnx", "cortex.tensorrt-llm"};
 };
diff --git a/engine/controllers/engines.cc b/engine/controllers/engines.cc
@@ -1,6 +1,6 @@
 #include "engines.h"
 #include "utils/archive_utils.h"
-#include "utils/file_manager_utils.h"
+#include "utils/cortex_utils.h"
 #include "utils/system_info_utils.h"
 
 void Engines::InitEngine(const HttpRequestPtr& req,
@@ -172,4 +172,4 @@ void Engines::ListEngine(
   auto resp = cortex_utils::CreateCortexHttpJsonResponse(ret);
   resp->setStatusCode(k200OK);
   callback(resp);
-}
+}
diff --git a/engine/controllers/engines.h b/engine/controllers/engines.h
@@ -2,10 +2,7 @@
 
 #include <drogon/HttpController.h>
 #include <trantor/utils/Logger.h>
-#include "services/download_service.h"
-#include "utils/cortex_utils.h"
 #include "utils/cortexso_parser.h"
-#include "utils/http_util.h"
 
 using namespace drogon;
 

diff --git a/engine/utils/command_executor.h b/engine/utils/command_executor.h
@@ -1,6 +1,5 @@
 #include <array>
 #include <cstdio>
-#include <iostream>
 #include <memory>
 #include <stdexcept>
 #include <string>
@@ -46,4 +45,4 @@ class CommandExecutor {
 
  private:
   std::unique_ptr<FILE, decltype(&PCLOSE)> m_pipe{nullptr, PCLOSE};
-};
+};
diff --git a/engine/utils/cortex_utils.h b/engine/utils/cortex_utils.h
@@ -1,29 +1,28 @@
 #pragma once
 #include <drogon/HttpClient.h>
 #include <drogon/HttpResponse.h>
+#include <sys/stat.h>
 #include <algorithm>
 #include <fstream>
 #include <iostream>
 #include <ostream>
-#include <regex>
-#include <vector>
 #include <random>
+#include <regex>
 #include <string>
-#include <sys/stat.h>
+#include <vector>
 
 // Include platform-specific headers
 #ifdef _WIN32
+#include <direct.h>
 #include <windows.h>
 #include <winsock2.h>
-#include <direct.h>
 #define mkdir _mkdir
 #else
 #include <dirent.h>
 #include <unistd.h>
 #endif
 
 #if __APPLE__
-#include <limits.h>
 #include <mach-o/dyld.h>
 #endif
 
@@ -36,7 +35,7 @@ constexpr static auto kTensorrtLlmPath = "/engines/cortex.tensorrt-llm";
 inline std::string models_folder = "./models";
 inline std::string logs_folder = "./logs";
 inline std::string logs_base_name = "./logs/cortex";
-inline size_t log_file_size_limit = 20000000; // ~20 mb
+inline size_t log_file_size_limit = 20000000;  // ~20 mb
 
 inline std::string extractBase64(const std::string& input) {
   std::regex pattern("base64,(.*)");
@@ -273,7 +272,8 @@ inline drogon::HttpResponsePtr CreateCortexHttpResponse() {
   return resp;
 }
 
-inline drogon::HttpResponsePtr CreateCortexHttpJsonResponse(const Json::Value& data) {
+inline drogon::HttpResponsePtr CreateCortexHttpJsonResponse(
+    const Json::Value& data) {
   auto resp = drogon::HttpResponse::newHttpJsonResponse(data);
 #ifdef ALLOW_ALL_CORS
   LOG_INFO << "Respond for all cors!";
@@ -342,4 +342,4 @@ inline std::string GetCurrentPath() {
 }
 #endif
 
-}  // namespace cortex_utils
+}  // namespace cortex_utils
diff --git a/engine/utils/system_info_utils.h b/engine/utils/system_info_utils.h
@@ -2,6 +2,7 @@
 
 #include <trantor/utils/Logger.h>
 #include <regex>
+#include <sstream>
 #include <vector>
 #include "utils/command_executor.h"
 #include "utils/logging_utils.h"
@@ -292,7 +293,9 @@ inline std::vector<GpuInfo> GetGpuInfoListVulkan() {
       gpuInfoList.push_back(gpuInfo);
       ++iter;
     }
-  } catch (const std::exception& e) {}
+  } catch (const std::exception& e) {
+    LOG_ERROR << "Error: " << e.what();
+  }
 
   return gpuInfoList;
 }