Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
namchuai committed Sep 5, 2024
1 parent 5715cb0 commit c08df8a
Show file tree
Hide file tree
Showing 10 changed files with 123 additions and 34 deletions.
60 changes: 59 additions & 1 deletion engine/commands/engine_get_cmd.cc
Original file line number Diff line number Diff line change
@@ -1,6 +1,64 @@
#include "engine_get_cmd.h"
#include <iostream>
#include <tabulate/table.hpp>
#include "utils/file_manager_utils.h"
#include "utils/logging_utils.h"

namespace commands {

void EngineGetCmd::Exec() const {}
void EngineGetCmd::Exec() const {
CTL_INF("[EneingeGetCmd] engine: " << engine_);

auto ecp = file_manager_utils::GetEnginesContainerPath();
std::string onnx_status{"not_supported"};
std::string llamacpp_status = std::filesystem::exists(ecp / "cortex.llamacpp")
? "ready"
: "not_initialized";
std::string tensorrt_status{"not_supported"};

#ifdef _WIN32
onnx_status = std::filesystem::exists(ecp / "cortex.onnx")
? "ready"
: "not_initialized";
tensorrt_status = std::filesystem::exists(ecp / "cortex.tensorrt-llm")
? "ready"
: "not_initialized";
#elif defined(__linux__)
tensorrt_status = std::filesystem::exists(ecp / "cortex.tensorrt-llm")
? "ready"
: "not_initialized";
#endif
std::vector<EngineInfo> engines = {
{.name = "cortex.onnx",
.description = "This extension enables chat completion API calls using "
"the Onnx engine",
.version = "0.0.1",
.product_name = "Onnx Inference Engine",
.status = onnx_status},
{.name = "cortex.llamacpp",
.description = "This extension enables chat completion API calls using "
"the LlamaCPP engine",
.version = "0.0.1",
.product_name = "LlamaCPP Inference Engine",
.status = llamacpp_status},
{.name = "cortex.tensorrt-llm",
.description = "This extension enables chat completion API calls using "
"the TensorrtLLM engine",
.version = "0.0.1",
.product_name = "TensorrtLLM Inference Engine",
.status = tensorrt_status},
};

tabulate::Table table;
table.add_row({"name", "description", "version", "product name", "status"});
table.format().font_color(tabulate::Color::green);
for (auto& engine : engines) {
if (engine.name == engine_) {
table.add_row({engine.name, engine.description, engine.version,
engine.product_name, engine.status});
}
}

std::cout << table << std::endl;
}
}; // namespace commands
14 changes: 14 additions & 0 deletions engine/commands/engine_get_cmd.h
Original file line number Diff line number Diff line change
@@ -1,9 +1,23 @@
#pragma once
#include <string>

namespace commands {
class EngineGetCmd {
struct EngineInfo {
std::string name;
std::string description;
std::string version;
std::string product_name;
std::string status;
};

public:
EngineGetCmd(const std::string& engine) : engine_{engine} {};

void Exec() const;

private:
std::string engine_;
};

} // namespace commands
13 changes: 5 additions & 8 deletions engine/commands/engine_list_cmd.cc
Original file line number Diff line number Diff line change
@@ -1,6 +1,3 @@
// clang-format off
#include "utils/cortex_utils.h"
// clang-format on
#include "engine_list_cmd.h"
#include <filesystem>
#include <tabulate/table.hpp>
Expand All @@ -14,16 +11,16 @@ bool EngineListCmd::Exec() {
std::string llamacpp_status = std::filesystem::exists(ecp / "cortex.llamacpp")
? "ready"
: "not_initialized";
std::string tenssorrt_status{"not_supported"};
std::string tensorrt_status{"not_supported"};
#ifdef _WIN32
onnx_status = std::filesystem::exists(ecp / "cortex.onnx")
? "ready"
: "not_initialized";
tenssort_status = std::filesystem::exists(ecp / "cortex.tensorrt-llm")
tensorrt_status = std::filesystem::exists(ecp / "cortex.tensorrt-llm")
? "ready"
: "not_initialized";
#elif __linux__
tenssort_status = std::filesystem::exists(ecp / "cortex.tensorrt-llm")
#elif defined(__linux__)
tensorrt_status = std::filesystem::exists(ecp / "cortex.tensorrt-llm")
? "ready"
: "not_initialized";
#endif
Expand All @@ -46,7 +43,7 @@ bool EngineListCmd::Exec() {
table.add_row({"3", "cortex.tensorrt-llm",
"This extension enables chat completion API calls using the "
"TensorrtLLM engine",
"0.0.1", "TensorrtLLM Inference Engine", tenssorrt_status});
"0.0.1", "TensorrtLLM Inference Engine", tensorrt_status});

for (int i = 0; i < 6; i++) {
table[0][i]
Expand Down
29 changes: 22 additions & 7 deletions engine/controllers/command_line_parser.cc
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#include "command_line_parser.h"
#include "commands/chat_cmd.h"
#include "commands/cmd_info.h"
#include "commands/engine_get_cmd.h"
#include "commands/engine_init_cmd.h"
#include "commands/engine_list_cmd.h"
#include "commands/engine_uninstall_cmd.h"
Expand Down Expand Up @@ -125,11 +126,11 @@ bool CommandLineParser::SetupCommand(int argc, char** argv) {
command.Exec();
});

auto get_engine_cmd = engines_cmd->add_subcommand("get", "Get an engine");
EngineManagement(engines_cmd, "cortex.llamacpp", version);
EngineManagement(engines_cmd, "cortex.onnx", version);
EngineManagement(engines_cmd, "cortex.tensorrt-llm", version);

EngineInstall(engines_cmd, "cortex.llamacpp", version);
EngineInstall(engines_cmd, "cortex.onnx", version);
EngineInstall(engines_cmd, "cortex.tensorrt-llm", version);
EngineGet(engines_cmd);
}

{
Expand Down Expand Up @@ -158,9 +159,9 @@ bool CommandLineParser::SetupCommand(int argc, char** argv) {
return true;
}

void CommandLineParser::EngineInstall(CLI::App* parent,
const std::string& engine_name,
std::string& version) {
void CommandLineParser::EngineManagement(CLI::App* parent,
const std::string& engine_name,
std::string& version) {
auto engine_cmd =
parent->add_subcommand(engine_name, "Manage " + engine_name + " engine");

Expand All @@ -181,3 +182,17 @@ void CommandLineParser::EngineInstall(CLI::App* parent,
cmd.Exec();
});
}

void CommandLineParser::EngineGet(CLI::App* parent) {
auto get_cmd = parent->add_subcommand("get", "Get an engine info");

for (auto& engine : supportedEngines_) {
std::string engine_name{engine};
std::string desc = "Get " + engine_name + " status";
auto engine_get_cmd = get_cmd->add_subcommand(engine, desc);
engine_get_cmd->callback([engine_name] {
commands::EngineGetCmd cmd(engine_name);
cmd.Exec();
});
}
}
10 changes: 8 additions & 2 deletions engine/controllers/command_line_parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,14 @@ class CommandLineParser {
bool SetupCommand(int argc, char** argv);

private:
void EngineInstall(CLI::App* parent, const std::string& engine_name,
std::string& version);
void EngineManagement(CLI::App* parent, const std::string& engine_name,
std::string& version);

void EngineGet(CLI::App* parent);

CLI::App app_;

// TODO: move this one to somewhere else
static constexpr std::array<const char*, 3> supportedEngines_ = {
"cortex.llamacpp", "cortex.onnx", "cortex.tensorrt-llm"};
};
4 changes: 2 additions & 2 deletions engine/controllers/engines.cc
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#include "engines.h"
#include "utils/archive_utils.h"
#include "utils/file_manager_utils.h"
#include "utils/cortex_utils.h"
#include "utils/system_info_utils.h"

void Engines::InitEngine(const HttpRequestPtr& req,
Expand Down Expand Up @@ -172,4 +172,4 @@ void Engines::ListEngine(
auto resp = cortex_utils::CreateCortexHttpJsonResponse(ret);
resp->setStatusCode(k200OK);
callback(resp);
}
}
3 changes: 0 additions & 3 deletions engine/controllers/engines.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,7 @@

#include <drogon/HttpController.h>
#include <trantor/utils/Logger.h>
#include "services/download_service.h"
#include "utils/cortex_utils.h"
#include "utils/cortexso_parser.h"
#include "utils/http_util.h"

using namespace drogon;

Expand Down
3 changes: 1 addition & 2 deletions engine/utils/command_executor.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
#include <array>
#include <cstdio>
#include <iostream>
#include <memory>
#include <stdexcept>
#include <string>
Expand Down Expand Up @@ -46,4 +45,4 @@ class CommandExecutor {

private:
std::unique_ptr<FILE, decltype(&PCLOSE)> m_pipe{nullptr, PCLOSE};
};
};
16 changes: 8 additions & 8 deletions engine/utils/cortex_utils.h
Original file line number Diff line number Diff line change
@@ -1,29 +1,28 @@
#pragma once
#include <drogon/HttpClient.h>
#include <drogon/HttpResponse.h>
#include <sys/stat.h>
#include <algorithm>
#include <fstream>
#include <iostream>
#include <ostream>
#include <regex>
#include <vector>
#include <random>
#include <regex>
#include <string>
#include <sys/stat.h>
#include <vector>

// Include platform-specific headers
#ifdef _WIN32
#include <direct.h>
#include <windows.h>
#include <winsock2.h>
#include <direct.h>
#define mkdir _mkdir
#else
#include <dirent.h>
#include <unistd.h>
#endif

#if __APPLE__
#include <limits.h>
#include <mach-o/dyld.h>
#endif

Expand All @@ -36,7 +35,7 @@ constexpr static auto kTensorrtLlmPath = "/engines/cortex.tensorrt-llm";
inline std::string models_folder = "./models";
inline std::string logs_folder = "./logs";
inline std::string logs_base_name = "./logs/cortex";
inline size_t log_file_size_limit = 20000000; // ~20 mb
inline size_t log_file_size_limit = 20000000; // ~20 mb

inline std::string extractBase64(const std::string& input) {
std::regex pattern("base64,(.*)");
Expand Down Expand Up @@ -273,7 +272,8 @@ inline drogon::HttpResponsePtr CreateCortexHttpResponse() {
return resp;
}

inline drogon::HttpResponsePtr CreateCortexHttpJsonResponse(const Json::Value& data) {
inline drogon::HttpResponsePtr CreateCortexHttpJsonResponse(
const Json::Value& data) {
auto resp = drogon::HttpResponse::newHttpJsonResponse(data);
#ifdef ALLOW_ALL_CORS
LOG_INFO << "Respond for all cors!";
Expand Down Expand Up @@ -342,4 +342,4 @@ inline std::string GetCurrentPath() {
}
#endif

} // namespace cortex_utils
} // namespace cortex_utils
5 changes: 4 additions & 1 deletion engine/utils/system_info_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

#include <trantor/utils/Logger.h>
#include <regex>
#include <sstream>
#include <vector>
#include "utils/command_executor.h"
#include "utils/logging_utils.h"
Expand Down Expand Up @@ -292,7 +293,9 @@ inline std::vector<GpuInfo> GetGpuInfoListVulkan() {
gpuInfoList.push_back(gpuInfo);
++iter;
}
} catch (const std::exception& e) {}
} catch (const std::exception& e) {
LOG_ERROR << "Error: " << e.what();
}

return gpuInfoList;
}
Expand Down

0 comments on commit c08df8a

Please sign in to comment.