Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feat background process server #1043

Merged
merged 11 commits into from
Aug 29, 2024
5 changes: 4 additions & 1 deletion engine/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,10 @@ endif()

add_compile_definitions(CORTEX_CPP_VERSION="${CORTEX_CPP_VERSION}")

# add_subdirectory(test)
option(CMAKE_BUILD_TEST "Enable testing" OFF)
if(CMAKE_BUILD_TEST)
add_subdirectory(test)
endif()

find_package(jsoncpp CONFIG REQUIRED)
find_package(Drogon CONFIG REQUIRED)
Expand Down
6 changes: 6 additions & 0 deletions engine/commands/model_get_cmd.cc
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include "utils/cortex_utils.h"

namespace commands {

ModelGetCmd::ModelGetCmd(std::string model_handle)
: model_handle_(std::move(model_handle)) {}

Expand All @@ -17,6 +18,7 @@ void ModelGetCmd::Exec() {
// Iterate through directory
for (const auto& entry :
std::filesystem::directory_iterator(cortex_utils::models_folder)) {

if (entry.is_regular_file() && entry.path().stem() == model_handle_ &&
entry.path().extension() == ".yaml") {
try {
Expand Down Expand Up @@ -60,6 +62,7 @@ void ModelGetCmd::Exec() {
if (!std::isnan(static_cast<double>(model_config.max_tokens)))
std::cout << "max_tokens: " << model_config.max_tokens << "\n";
if (!std::isnan(static_cast<double>(model_config.stream)))

std::cout << "stream: " << std::boolalpha << model_config.stream
<< "\n";
if (!std::isnan(static_cast<double>(model_config.ngl)))
Expand All @@ -71,6 +74,7 @@ void ModelGetCmd::Exec() {
if (!model_config.engine.empty())
std::cout << "engine: " << model_config.engine << "\n";
if (!model_config.prompt_template.empty())

std::cout << "prompt_template: " << model_config.prompt_template
<< "\n";
if (!model_config.system_template.empty())
Expand All @@ -86,6 +90,7 @@ void ModelGetCmd::Exec() {
if (!model_config.gpu_arch.empty())
std::cout << "gpu_arch: " << model_config.gpu_arch << "\n";
if (!model_config.quantization_method.empty())

std::cout << "quantization_method: "
<< model_config.quantization_method << "\n";
if (!model_config.precision.empty())
Expand All @@ -96,6 +101,7 @@ void ModelGetCmd::Exec() {

// Print non-null strings
if (!model_config.trtllm_version.empty())

std::cout << "trtllm_version: " << model_config.trtllm_version
<< "\n";
if (!std::isnan(static_cast<double>(model_config.text_model)))
Expand Down
2 changes: 2 additions & 0 deletions engine/commands/model_get_cmd.h
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
#pragma once


#include <cmath> // For std::isnan
#include <string>
namespace commands {

class ModelGetCmd {
public:

ModelGetCmd(std::string model_handle);
void Exec();

Expand Down
2 changes: 1 addition & 1 deletion engine/controllers/command_line_parser.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
#include "commands/engine_init_cmd.h"
#include "commands/model_list_cmd.h"
#include "commands/model_get_cmd.h"

#include "commands/model_pull_cmd.h"
#include "commands/start_model_cmd.h"
#include "commands/stop_model_cmd.h"
Expand Down Expand Up @@ -140,6 +139,7 @@ void CommandLineParser::EngineInstall(CLI::App* parent,
"install", "Install " + engine_name + " engine");
install_cmd->add_option("-v, --version", version,
"Engine version. Default will be latest");

install_cmd->callback([engine_name, &version] {
commands::EngineInitCmd eic(engine_name, version);
eic.Exec();
Expand Down
160 changes: 108 additions & 52 deletions engine/main.cc
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,10 @@
#if defined(__APPLE__) && defined(__MACH__)
#include <libgen.h> // for dirname()
#include <mach-o/dyld.h>
#include <sys/types.h>
#elif defined(__linux__)
#include <libgen.h> // for dirname()
#include <sys/types.h>
#include <unistd.h> // for readlink()
#elif defined(_WIN32)
#include <windows.h>
Expand All @@ -20,8 +22,104 @@
#error "Unsupported platform!"
#endif


void RunServer(){
// Create logs/ folder and setup log to file
std::filesystem::create_directory(cortex_utils::logs_folder);
trantor::AsyncFileLogger asyncFileLogger;
asyncFileLogger.setFileName(cortex_utils::logs_base_name);
asyncFileLogger.startLogging();
trantor::Logger::setOutputFunction(
[&](const char* msg, const uint64_t len) {
asyncFileLogger.output(msg, len);
},
[&]() { asyncFileLogger.flush(); });
asyncFileLogger.setFileSizeLimit(cortex_utils::log_file_size_limit);
// Number of cortex.cpp threads
// if (argc > 1) {
// thread_num = std::atoi(argv[1]);
// }

// // Check for host argument
// if (argc > 2) {
// host = argv[2];
// }

// // Check for port argument
// if (argc > 3) {
// port = std::atoi(argv[3]); // Convert string argument to int
// }
int thread_num = 1;
std::string host = "127.0.0.1";
int port = 3928;

int logical_cores = std::thread::hardware_concurrency();
int drogon_thread_num = std::max(thread_num, logical_cores);
// cortex_utils::nitro_logo();
#ifdef CORTEX_CPP_VERSION
LOG_INFO << "cortex.cpp version: " << CORTEX_CPP_VERSION;
#else
LOG_INFO << "cortex.cpp version: undefined";
#endif

LOG_INFO << "Server started, listening at: " << host << ":" << port;
LOG_INFO << "Please load your model";
drogon::app().addListener(host, port);
drogon::app().setThreadNum(drogon_thread_num);
LOG_INFO << "Number of thread is:" << drogon::app().getThreadNum();

drogon::app().run();
// return 0;
}

void ForkProcess() {
#if defined(_WIN32) || defined(_WIN64)
// Windows-specific code to create a new process
STARTUPINFO si;
PROCESS_INFORMATION pi;

ZeroMemory(&si, sizeof(si));
si.cb = sizeof(si);
ZeroMemory(&pi, sizeof(pi));
std::string cmds = cortex_utils::GetCurrentPath() + "/cortex-cpp.exe --start-server";
// Create child process
if (!CreateProcess(
NULL, // No module name (use command line)
const_cast<char*>(cmds.c_str()), // Command line (replace with your actual executable)
NULL, // Process handle not inheritable
NULL, // Thread handle not inheritable
FALSE, // Set handle inheritance to FALSE
0, // No creation flags
NULL, // Use parent's environment block
NULL, // Use parent's starting directory
&si, // Pointer to STARTUPINFO structure
&pi)) // Pointer to PROCESS_INFORMATION structure
{
std::cout << "Could not start server: " << GetLastError() << std::endl;
} else {
std::cout << "Server started" << std::endl;
}

#else
// Unix-like system-specific code to fork a child process
pid_t pid = fork();

if (pid < 0) {
// Fork failed
std::cerr << "Could not start server: " << std::endl;
return;
} else if (pid == 0) {
// Child process
RunServer();
} else {
// Parent process
std::cout << "Server started" << std::endl;
}
#endif
}

int main(int argc, char* argv[]) {

// Check if this process is for python execution
if (argc > 1) {
if (strcmp(argv[1], "--run_python_file") == 0) {
Expand All @@ -44,58 +142,16 @@ int main(int argc, char* argv[]) {
}

if (argc > 1) {
CommandLineParser clp;
clp.SetupCommand(argc, argv);
return 0;
}

// Create logs/ folder and setup log to file
std::filesystem::create_directory(cortex_utils::logs_folder);
trantor::AsyncFileLogger asyncFileLogger;
asyncFileLogger.setFileName(cortex_utils::logs_base_name);
asyncFileLogger.startLogging();
trantor::Logger::setOutputFunction(
[&](const char* msg, const uint64_t len) {
asyncFileLogger.output(msg, len);
},
[&]() { asyncFileLogger.flush(); });
asyncFileLogger.setFileSizeLimit(cortex_utils::log_file_size_limit);

int thread_num = 1;
std::string host = "127.0.0.1";
int port = 3928;

// Number of cortex.cpp threads
if (argc > 1) {
thread_num = std::atoi(argv[1]);
}

// Check for host argument
if (argc > 2) {
host = argv[2];
}

// Check for port argument
if (argc > 3) {
port = std::atoi(argv[3]); // Convert string argument to int
if (strcmp(argv[1], "--start-server") == 0) {
RunServer();
return 0;
} else {
CommandLineParser clp;
clp.SetupCommand(argc, argv);
return 0;
}
}

int logical_cores = std::thread::hardware_concurrency();
int drogon_thread_num = std::max(thread_num, logical_cores);
// cortex_utils::nitro_logo();
#ifdef CORTEX_CPP_VERSION
LOG_INFO << "cortex.cpp version: " << CORTEX_CPP_VERSION;
#else
LOG_INFO << "cortex.cpp version: undefined";
#endif

LOG_INFO << "Server started, listening at: " << host << ":" << port;
LOG_INFO << "Please load your model";
drogon::app().addListener(host, port);
drogon::app().setThreadNum(drogon_thread_num);
LOG_INFO << "Number of thread is:" << drogon::app().getThreadNum();

drogon::app().run();

ForkProcess();
return 0;
}
2 changes: 1 addition & 1 deletion engine/test/components/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ add_executable(${PROJECT_NAME} ${SRCS})
find_package(Drogon CONFIG REQUIRED)
find_package(GTest CONFIG REQUIRED)

target_link_libraries(${PROJECT_NAME} PRIVATE Drogon::Drogon GTest::gtest GTest::gmock
target_link_libraries(${PROJECT_NAME} PRIVATE Drogon::Drogon GTest::gtest GTest::gtest_main
${CMAKE_THREAD_LIBS_INIT})
target_include_directories(${PROJECT_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../)

Expand Down
Loading