Skip to content

Commit

Permalink
Merge pull request #1044 from janhq/feat/download-cuda-toolkit
Browse files Browse the repository at this point in the history
feat: download cuda toolkit
  • Loading branch information
namchuai authored Aug 29, 2024
2 parents 8fdff72 + 45103c0 commit 2d09448
Show file tree
Hide file tree
Showing 8 changed files with 291 additions and 54 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# cortex-js
.DS_Store
cortex-js/cortex.db
dist
*.lock
Expand All @@ -18,4 +19,4 @@ cortex-js/package-lock.json
.vscode
cortex-js/command
cortex-js/src/infrastructure/commanders/test/test_data
**/vcpkg_installed
**/vcpkg_installed
60 changes: 57 additions & 3 deletions engine/commands/engine_init_cmd.cc
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include "utils/archive_utils.h"
#include "utils/system_info_utils.h"
// clang-format on
#include "utils/cuda_toolkit_utils.h"
#include "utils/engine_matcher_utils.h"

namespace commands {
Expand Down Expand Up @@ -103,9 +104,10 @@ bool EngineInitCmd::Exec() const {
.path = path,
}}};

DownloadService().AddDownloadTask(downloadTask, [](const std::string&
absolute_path,
bool unused) {
DownloadService download_service;
download_service.AddDownloadTask(downloadTask, [](const std::string&
absolute_path,
bool unused) {
// try to unzip the downloaded file
std::filesystem::path downloadedEnginePath{absolute_path};
LOG_INFO << "Downloaded engine path: "
Expand All @@ -125,6 +127,58 @@ bool EngineInitCmd::Exec() const {
}
LOG_INFO << "Finished!";
});
if (system_info.os == "mac" || engineName_ == "cortex.onnx") {
return false;
}
// download cuda toolkit
const std::string jan_host = "https://catalog.jan.ai";
const std::string cuda_toolkit_file_name = "cuda.tar.gz";
const std::string download_id = "cuda";

auto gpu_driver_version = system_info_utils::GetDriverVersion();

auto cuda_runtime_version =
cuda_toolkit_utils::GetCompatibleCudaToolkitVersion(
gpu_driver_version, system_info.os, engineName_);

std::ostringstream cuda_toolkit_path;
cuda_toolkit_path << "dist/cuda-dependencies/" << 11.7 << "/"
<< system_info.os << "/"
<< cuda_toolkit_file_name;

LOG_DEBUG << "Cuda toolkit download url: " << jan_host
<< cuda_toolkit_path.str();

auto downloadCudaToolkitTask = DownloadTask{
.id = download_id,
.type = DownloadType::CudaToolkit,
.error = std::nullopt,
.items = {DownloadItem{
.id = download_id,
.host = jan_host,
.fileName = cuda_toolkit_file_name,
.type = DownloadType::CudaToolkit,
.path = cuda_toolkit_path.str(),
}},
};

download_service.AddDownloadTask(
downloadCudaToolkitTask,
[](const std::string& absolute_path, bool unused) {
LOG_DEBUG << "Downloaded cuda path: " << absolute_path;
// try to unzip the downloaded file
std::filesystem::path downloaded_path{absolute_path};

archive_utils::ExtractArchive(
absolute_path,
downloaded_path.parent_path().parent_path().string());

try {
std::filesystem::remove(absolute_path);
} catch (std::exception& e) {
LOG_ERROR << "Error removing downloaded file: " << e.what();
}
});

return true;
}
Expand Down
98 changes: 50 additions & 48 deletions engine/main.cc
Original file line number Diff line number Diff line change
Expand Up @@ -22,53 +22,53 @@
#error "Unsupported platform!"
#endif

void RunServer(){
void RunServer() {
// Create logs/ folder and setup log to file
std::filesystem::create_directory(cortex_utils::logs_folder);
trantor::AsyncFileLogger asyncFileLogger;
asyncFileLogger.setFileName(cortex_utils::logs_base_name);
asyncFileLogger.startLogging();
trantor::Logger::setOutputFunction(
[&](const char* msg, const uint64_t len) {
asyncFileLogger.output(msg, len);
},
[&]() { asyncFileLogger.flush(); });
asyncFileLogger.setFileSizeLimit(cortex_utils::log_file_size_limit);
// Number of cortex.cpp threads
// if (argc > 1) {
// thread_num = std::atoi(argv[1]);
// }
std::filesystem::create_directory(cortex_utils::logs_folder);
trantor::AsyncFileLogger asyncFileLogger;
asyncFileLogger.setFileName(cortex_utils::logs_base_name);
asyncFileLogger.startLogging();
trantor::Logger::setOutputFunction(
[&](const char* msg, const uint64_t len) {
asyncFileLogger.output(msg, len);
},
[&]() { asyncFileLogger.flush(); });
asyncFileLogger.setFileSizeLimit(cortex_utils::log_file_size_limit);
// Number of cortex.cpp threads
// if (argc > 1) {
// thread_num = std::atoi(argv[1]);
// }

// // Check for host argument
// if (argc > 2) {
// host = argv[2];
// }
// // Check for host argument
// if (argc > 2) {
// host = argv[2];
// }

// // Check for port argument
// if (argc > 3) {
// port = std::atoi(argv[3]); // Convert string argument to int
// }
int thread_num = 1;
std::string host = "127.0.0.1";
int port = 3928;
// // Check for port argument
// if (argc > 3) {
// port = std::atoi(argv[3]); // Convert string argument to int
// }
int thread_num = 1;
std::string host = "127.0.0.1";
int port = 3928;

int logical_cores = std::thread::hardware_concurrency();
int drogon_thread_num = std::max(thread_num, logical_cores);
// cortex_utils::nitro_logo();
int logical_cores = std::thread::hardware_concurrency();
int drogon_thread_num = std::max(thread_num, logical_cores);
// cortex_utils::nitro_logo();
#ifdef CORTEX_CPP_VERSION
LOG_INFO << "cortex.cpp version: " << CORTEX_CPP_VERSION;
LOG_INFO << "cortex.cpp version: " << CORTEX_CPP_VERSION;
#else
LOG_INFO << "cortex.cpp version: undefined";
LOG_INFO << "cortex.cpp version: undefined";
#endif

LOG_INFO << "Server started, listening at: " << host << ":" << port;
LOG_INFO << "Please load your model";
drogon::app().addListener(host, port);
drogon::app().setThreadNum(drogon_thread_num);
LOG_INFO << "Number of thread is:" << drogon::app().getThreadNum();
LOG_INFO << "Server started, listening at: " << host << ":" << port;
LOG_INFO << "Please load your model";
drogon::app().addListener(host, port);
drogon::app().setThreadNum(drogon_thread_num);
LOG_INFO << "Number of thread is:" << drogon::app().getThreadNum();

drogon::app().run();
// return 0;
drogon::app().run();
// return 0;
}

void ForkProcess() {
Expand All @@ -80,19 +80,21 @@ void ForkProcess() {
ZeroMemory(&si, sizeof(si));
si.cb = sizeof(si);
ZeroMemory(&pi, sizeof(pi));
std::string cmds = cortex_utils::GetCurrentPath() + "/cortex-cpp.exe --start-server";
std::string cmds =
cortex_utils::GetCurrentPath() + "/cortex-cpp.exe --start-server";
// Create child process
if (!CreateProcess(
NULL, // No module name (use command line)
const_cast<char*>(cmds.c_str()), // Command line (replace with your actual executable)
NULL, // Process handle not inheritable
NULL, // Thread handle not inheritable
FALSE, // Set handle inheritance to FALSE
0, // No creation flags
NULL, // Use parent's environment block
NULL, // Use parent's starting directory
&si, // Pointer to STARTUPINFO structure
&pi)) // Pointer to PROCESS_INFORMATION structure
const_cast<char*>(
cmds.c_str()), // Command line (replace with your actual executable)
NULL, // Process handle not inheritable
NULL, // Thread handle not inheritable
FALSE, // Set handle inheritance to FALSE
0, // No creation flags
NULL, // Use parent's environment block
NULL, // Use parent's starting directory
&si, // Pointer to STARTUPINFO structure
&pi)) // Pointer to PROCESS_INFORMATION structure
{
std::cout << "Could not start server: " << GetLastError() << std::endl;
} else {
Expand Down
2 changes: 1 addition & 1 deletion engine/services/download_service.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
#include <optional>
#include <vector>

enum class DownloadType { Model, Engine, Miscellaneous };
enum class DownloadType { Model, Engine, Miscellaneous, CudaToolkit };

enum class DownloadStatus {
Pending,
Expand Down
63 changes: 63 additions & 0 deletions engine/utils/cuda_toolkit_utils.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
#include <string>
#include "utils/semantic_version_utils.h"

namespace cuda_toolkit_utils {
// those semantic versions are based on: https://docs.nvidia.com/deeplearning/cudnn/latest/reference/support-matrix.html#f1
inline std::string GetCompatibleCudaToolkitVersion(
const std::string& driver_semantic_version, const std::string& os,
const std::string& engine) {

if (engine == "cortex.tensorrt-llm") {
// if the engine is cortex.tensorrt-llm, the minimum required CUDA version is 12.4
if (os == "windows") {
if (semantic_version_utils::CompareSemanticVersion(
driver_semantic_version, "527.41") >= 0) {
return "12.4";
} else {
throw std::runtime_error(
"GPU driver version not supported. Minimum "
"required driver version is 527.41");
}
} else if (os == "linux") {
if (semantic_version_utils::CompareSemanticVersion(
driver_semantic_version, "525.60.13") >= 0) {
return "12.4";
} else {
throw std::runtime_error(
"GPU driver version not supported. Minimum required driver version "
"is 525.60.13");
}
} else {
throw std::runtime_error("Unsupported OS");
}
}

if (os == "windows") {
if (semantic_version_utils::CompareSemanticVersion(driver_semantic_version,
"527.41") >= 0) {
return "12.4";
} else if (semantic_version_utils::CompareSemanticVersion(
driver_semantic_version, "452.39") >= 0) {
return "11.7";
} else {
throw std::runtime_error(
"GPU driver version not supported. Minimum "
"required driver version is 452.39");
}
} else if (os == "linux") {
if (semantic_version_utils::CompareSemanticVersion(driver_semantic_version,
"525.60.13") >= 0) {
return "12.4";
} else if (semantic_version_utils::CompareSemanticVersion(
driver_semantic_version, "450.80.02") >= 0) {
return "11.7";
} else {
throw std::runtime_error(
"GPU driver version not supported. Minimum "
"required driver version is 450.80.02");
}
} else {
throw std::runtime_error("Unsupported OS");
}
}
} // namespace cuda_toolkit_utils
50 changes: 49 additions & 1 deletion engine/utils/file_manager_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,63 @@
#include <string>
#include <string_view>

#if defined(__APPLE__) && defined(__MACH__)
#include <mach-o/dyld.h>
#elif defined(__linux__)
#include <unistd.h>
#elif defined(_WIN32)
#include <windows.h>
#endif

namespace file_manager_utils {

inline std::filesystem::path GetExecutableFolderContainerPath() {
#if defined(__APPLE__) && defined(__MACH__)
char buffer[1024];
uint32_t size = sizeof(buffer);

if (_NSGetExecutablePath(buffer, &size) == 0) {
LOG_INFO << "Executable path: " << buffer;
return std::filesystem::path{buffer}.parent_path();
} else {
LOG_ERROR << "Failed to get executable path";
return std::filesystem::current_path();
}
#elif defined(__linux__)
// TODO: haven't tested
char buffer[1024];
ssize_t len = readlink("/proc/self/exe", buffer, sizeof(buffer) - 1);
if (len != -1) {
buffer[len] = '\0';
LOG_INFO << "Executable path: " << buffer;
return std::filesystem::path{buffer}.parent_path();
} else {
LOG_ERROR << "Failed to get executable path";
return std::filesystem::current_path();
}
#elif defined(_WIN32)
// TODO: haven't tested
char buffer[MAX_PATH];
GetModuleFileNameA(NULL, buffer, MAX_PATH);
LOG_INFO << "Executable path: " << buffer;
return std::filesystem::path{buffer}.parent_path();
#else
LOG_ERROR << "Unsupported platform!";
return std::filesystem::current_path();
#endif
}

inline std::filesystem::path GetContainerFolderPath(
const std::string_view type) {
const auto current_path{std::filesystem::current_path()};
const auto current_path{GetExecutableFolderContainerPath()};
auto container_folder_path = std::filesystem::path{};

if (type == "Model") {
container_folder_path = current_path / "models";
} else if (type == "Engine") {
container_folder_path = current_path / "engines";
} else if (type == "CudaToolkit") {
container_folder_path = current_path;
} else {
container_folder_path = current_path / "misc";
}
Expand All @@ -35,6 +81,8 @@ inline std::string downloadTypeToString(DownloadType type) {
return "Engine";
case DownloadType::Miscellaneous:
return "Misc";
case DownloadType::CudaToolkit:
return "CudaToolkit";
default:
return "UNKNOWN";
}
Expand Down
Loading

0 comments on commit 2d09448

Please sign in to comment.