diff --git a/.gitignore b/.gitignore index 237fb5b33..6b785abe9 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ # cortex-js +.DS_Store cortex-js/cortex.db dist *.lock @@ -18,4 +19,4 @@ cortex-js/package-lock.json .vscode cortex-js/command cortex-js/src/infrastructure/commanders/test/test_data -**/vcpkg_installed \ No newline at end of file +**/vcpkg_installed diff --git a/engine/commands/engine_init_cmd.cc b/engine/commands/engine_init_cmd.cc index 5a1165e23..0f22bd57f 100644 --- a/engine/commands/engine_init_cmd.cc +++ b/engine/commands/engine_init_cmd.cc @@ -7,6 +7,7 @@ #include "utils/archive_utils.h" #include "utils/system_info_utils.h" // clang-format on +#include "utils/cuda_toolkit_utils.h" #include "utils/engine_matcher_utils.h" namespace commands { @@ -103,9 +104,10 @@ bool EngineInitCmd::Exec() const { .path = path, }}}; - DownloadService().AddDownloadTask(downloadTask, [](const std::string& - absolute_path, - bool unused) { + DownloadService download_service; + download_service.AddDownloadTask(downloadTask, [](const std::string& + absolute_path, + bool unused) { // try to unzip the downloaded file std::filesystem::path downloadedEnginePath{absolute_path}; LOG_INFO << "Downloaded engine path: " @@ -125,6 +127,58 @@ bool EngineInitCmd::Exec() const { } LOG_INFO << "Finished!"; }); + if (system_info.os == "mac" || engineName_ == "cortex.onnx") { + return false; + } + // download cuda toolkit + const std::string jan_host = "https://catalog.jan.ai"; + const std::string cuda_toolkit_file_name = "cuda.tar.gz"; + const std::string download_id = "cuda"; + + auto gpu_driver_version = system_info_utils::GetDriverVersion(); + + auto cuda_runtime_version = + cuda_toolkit_utils::GetCompatibleCudaToolkitVersion( + gpu_driver_version, system_info.os, engineName_); + + std::ostringstream cuda_toolkit_path; + cuda_toolkit_path << "dist/cuda-dependencies/" << 11.7 << "/" + << system_info.os << "/" + << cuda_toolkit_file_name; + + LOG_DEBUG << "Cuda toolkit download url: " << jan_host + << cuda_toolkit_path.str(); + + auto downloadCudaToolkitTask = DownloadTask{ + .id = download_id, + .type = DownloadType::CudaToolkit, + .error = std::nullopt, + .items = {DownloadItem{ + .id = download_id, + .host = jan_host, + .fileName = cuda_toolkit_file_name, + .type = DownloadType::CudaToolkit, + .path = cuda_toolkit_path.str(), + }}, + }; + + download_service.AddDownloadTask( + downloadCudaToolkitTask, + [](const std::string& absolute_path, bool unused) { + LOG_DEBUG << "Downloaded cuda path: " << absolute_path; + // try to unzip the downloaded file + std::filesystem::path downloaded_path{absolute_path}; + + archive_utils::ExtractArchive( + absolute_path, + downloaded_path.parent_path().parent_path().string()); + + try { + std::filesystem::remove(absolute_path); + } catch (std::exception& e) { + LOG_ERROR << "Error removing downloaded file: " << e.what(); + } + }); return true; } diff --git a/engine/main.cc b/engine/main.cc index 193f84c60..4b966b3f6 100644 --- a/engine/main.cc +++ b/engine/main.cc @@ -22,53 +22,53 @@ #error "Unsupported platform!" #endif -void RunServer(){ +void RunServer() { // Create logs/ folder and setup log to file - std::filesystem::create_directory(cortex_utils::logs_folder); - trantor::AsyncFileLogger asyncFileLogger; - asyncFileLogger.setFileName(cortex_utils::logs_base_name); - asyncFileLogger.startLogging(); - trantor::Logger::setOutputFunction( - [&](const char* msg, const uint64_t len) { - asyncFileLogger.output(msg, len); - }, - [&]() { asyncFileLogger.flush(); }); - asyncFileLogger.setFileSizeLimit(cortex_utils::log_file_size_limit); - // Number of cortex.cpp threads - // if (argc > 1) { - // thread_num = std::atoi(argv[1]); - // } + std::filesystem::create_directory(cortex_utils::logs_folder); + trantor::AsyncFileLogger asyncFileLogger; + asyncFileLogger.setFileName(cortex_utils::logs_base_name); + asyncFileLogger.startLogging(); + trantor::Logger::setOutputFunction( + [&](const char* msg, const uint64_t len) { + asyncFileLogger.output(msg, len); + }, + [&]() { asyncFileLogger.flush(); }); + asyncFileLogger.setFileSizeLimit(cortex_utils::log_file_size_limit); + // Number of cortex.cpp threads + // if (argc > 1) { + // thread_num = std::atoi(argv[1]); + // } - // // Check for host argument - // if (argc > 2) { - // host = argv[2]; - // } + // // Check for host argument + // if (argc > 2) { + // host = argv[2]; + // } - // // Check for port argument - // if (argc > 3) { - // port = std::atoi(argv[3]); // Convert string argument to int - // } - int thread_num = 1; - std::string host = "127.0.0.1"; - int port = 3928; + // // Check for port argument + // if (argc > 3) { + // port = std::atoi(argv[3]); // Convert string argument to int + // } + int thread_num = 1; + std::string host = "127.0.0.1"; + int port = 3928; - int logical_cores = std::thread::hardware_concurrency(); - int drogon_thread_num = std::max(thread_num, logical_cores); - // cortex_utils::nitro_logo(); + int logical_cores = std::thread::hardware_concurrency(); + int drogon_thread_num = std::max(thread_num, logical_cores); + // cortex_utils::nitro_logo(); #ifdef CORTEX_CPP_VERSION - LOG_INFO << "cortex.cpp version: " << CORTEX_CPP_VERSION; + LOG_INFO << "cortex.cpp version: " << CORTEX_CPP_VERSION; #else - LOG_INFO << "cortex.cpp version: undefined"; + LOG_INFO << "cortex.cpp version: undefined"; #endif - LOG_INFO << "Server started, listening at: " << host << ":" << port; - LOG_INFO << "Please load your model"; - drogon::app().addListener(host, port); - drogon::app().setThreadNum(drogon_thread_num); - LOG_INFO << "Number of thread is:" << drogon::app().getThreadNum(); + LOG_INFO << "Server started, listening at: " << host << ":" << port; + LOG_INFO << "Please load your model"; + drogon::app().addListener(host, port); + drogon::app().setThreadNum(drogon_thread_num); + LOG_INFO << "Number of thread is:" << drogon::app().getThreadNum(); - drogon::app().run(); - // return 0; + drogon::app().run(); + // return 0; } void ForkProcess() { @@ -80,19 +80,21 @@ void ForkProcess() { ZeroMemory(&si, sizeof(si)); si.cb = sizeof(si); ZeroMemory(&pi, sizeof(pi)); - std::string cmds = cortex_utils::GetCurrentPath() + "/cortex-cpp.exe --start-server"; + std::string cmds = + cortex_utils::GetCurrentPath() + "/cortex-cpp.exe --start-server"; // Create child process if (!CreateProcess( NULL, // No module name (use command line) - const_cast(cmds.c_str()), // Command line (replace with your actual executable) - NULL, // Process handle not inheritable - NULL, // Thread handle not inheritable - FALSE, // Set handle inheritance to FALSE - 0, // No creation flags - NULL, // Use parent's environment block - NULL, // Use parent's starting directory - &si, // Pointer to STARTUPINFO structure - &pi)) // Pointer to PROCESS_INFORMATION structure + const_cast( + cmds.c_str()), // Command line (replace with your actual executable) + NULL, // Process handle not inheritable + NULL, // Thread handle not inheritable + FALSE, // Set handle inheritance to FALSE + 0, // No creation flags + NULL, // Use parent's environment block + NULL, // Use parent's starting directory + &si, // Pointer to STARTUPINFO structure + &pi)) // Pointer to PROCESS_INFORMATION structure { std::cout << "Could not start server: " << GetLastError() << std::endl; } else { diff --git a/engine/services/download_service.h b/engine/services/download_service.h index 4efe653bf..a8f7f109b 100644 --- a/engine/services/download_service.h +++ b/engine/services/download_service.h @@ -4,7 +4,7 @@ #include #include -enum class DownloadType { Model, Engine, Miscellaneous }; +enum class DownloadType { Model, Engine, Miscellaneous, CudaToolkit }; enum class DownloadStatus { Pending, diff --git a/engine/utils/cuda_toolkit_utils.h b/engine/utils/cuda_toolkit_utils.h new file mode 100644 index 000000000..748af1bd3 --- /dev/null +++ b/engine/utils/cuda_toolkit_utils.h @@ -0,0 +1,63 @@ +#include +#include "utils/semantic_version_utils.h" + +namespace cuda_toolkit_utils { +// those semantic versions are based on: https://docs.nvidia.com/deeplearning/cudnn/latest/reference/support-matrix.html#f1 +inline std::string GetCompatibleCudaToolkitVersion( + const std::string& driver_semantic_version, const std::string& os, + const std::string& engine) { + + if (engine == "cortex.tensorrt-llm") { + // if the engine is cortex.tensorrt-llm, the minimum required CUDA version is 12.4 + if (os == "windows") { + if (semantic_version_utils::CompareSemanticVersion( + driver_semantic_version, "527.41") >= 0) { + return "12.4"; + } else { + throw std::runtime_error( + "GPU driver version not supported. Minimum " + "required driver version is 527.41"); + } + } else if (os == "linux") { + if (semantic_version_utils::CompareSemanticVersion( + driver_semantic_version, "525.60.13") >= 0) { + return "12.4"; + } else { + throw std::runtime_error( + "GPU driver version not supported. Minimum required driver version " + "is 525.60.13"); + } + } else { + throw std::runtime_error("Unsupported OS"); + } + } + + if (os == "windows") { + if (semantic_version_utils::CompareSemanticVersion(driver_semantic_version, + "527.41") >= 0) { + return "12.4"; + } else if (semantic_version_utils::CompareSemanticVersion( + driver_semantic_version, "452.39") >= 0) { + return "11.7"; + } else { + throw std::runtime_error( + "GPU driver version not supported. Minimum " + "required driver version is 452.39"); + } + } else if (os == "linux") { + if (semantic_version_utils::CompareSemanticVersion(driver_semantic_version, + "525.60.13") >= 0) { + return "12.4"; + } else if (semantic_version_utils::CompareSemanticVersion( + driver_semantic_version, "450.80.02") >= 0) { + return "11.7"; + } else { + throw std::runtime_error( + "GPU driver version not supported. Minimum " + "required driver version is 450.80.02"); + } + } else { + throw std::runtime_error("Unsupported OS"); + } +} +} // namespace cuda_toolkit_utils \ No newline at end of file diff --git a/engine/utils/file_manager_utils.h b/engine/utils/file_manager_utils.h index 77c6b74a6..334116fe7 100644 --- a/engine/utils/file_manager_utils.h +++ b/engine/utils/file_manager_utils.h @@ -4,17 +4,63 @@ #include #include +#if defined(__APPLE__) && defined(__MACH__) +#include +#elif defined(__linux__) +#include +#elif defined(_WIN32) +#include +#endif + namespace file_manager_utils { +inline std::filesystem::path GetExecutableFolderContainerPath() { +#if defined(__APPLE__) && defined(__MACH__) + char buffer[1024]; + uint32_t size = sizeof(buffer); + + if (_NSGetExecutablePath(buffer, &size) == 0) { + LOG_INFO << "Executable path: " << buffer; + return std::filesystem::path{buffer}.parent_path(); + } else { + LOG_ERROR << "Failed to get executable path"; + return std::filesystem::current_path(); + } +#elif defined(__linux__) + // TODO: haven't tested + char buffer[1024]; + ssize_t len = readlink("/proc/self/exe", buffer, sizeof(buffer) - 1); + if (len != -1) { + buffer[len] = '\0'; + LOG_INFO << "Executable path: " << buffer; + return std::filesystem::path{buffer}.parent_path(); + } else { + LOG_ERROR << "Failed to get executable path"; + return std::filesystem::current_path(); + } +#elif defined(_WIN32) + // TODO: haven't tested + char buffer[MAX_PATH]; + GetModuleFileNameA(NULL, buffer, MAX_PATH); + LOG_INFO << "Executable path: " << buffer; + return std::filesystem::path{buffer}.parent_path(); +#else + LOG_ERROR << "Unsupported platform!"; + return std::filesystem::current_path(); +#endif +} + inline std::filesystem::path GetContainerFolderPath( const std::string_view type) { - const auto current_path{std::filesystem::current_path()}; + const auto current_path{GetExecutableFolderContainerPath()}; auto container_folder_path = std::filesystem::path{}; if (type == "Model") { container_folder_path = current_path / "models"; } else if (type == "Engine") { container_folder_path = current_path / "engines"; + } else if (type == "CudaToolkit") { + container_folder_path = current_path; } else { container_folder_path = current_path / "misc"; } @@ -35,6 +81,8 @@ inline std::string downloadTypeToString(DownloadType type) { return "Engine"; case DownloadType::Miscellaneous: return "Misc"; + case DownloadType::CudaToolkit: + return "CudaToolkit"; default: return "UNKNOWN"; } diff --git a/engine/utils/semantic_version_utils.h b/engine/utils/semantic_version_utils.h new file mode 100644 index 000000000..ea9244dc1 --- /dev/null +++ b/engine/utils/semantic_version_utils.h @@ -0,0 +1,34 @@ +#include +#include + +namespace semantic_version_utils { +inline std::vector SplitVersion(const std::string& version) { + std::vector parts; + std::stringstream ss(version); + std::string part; + + while (std::getline(ss, part, '.')) { + parts.push_back(std::stoi(part)); + } + + while (parts.size() < 3) { + parts.push_back(0); + } + + return parts; +} + +inline int CompareSemanticVersion(const std::string& version1, + const std::string& version2) { + std::vector v1 = SplitVersion(version1); + std::vector v2 = SplitVersion(version2); + + for (size_t i = 0; i < 3; ++i) { + if (v1[i] < v2[i]) + return -1; + if (v1[i] > v2[i]) + return 1; + } + return 0; +} +} // namespace semantic_version_utils \ No newline at end of file diff --git a/engine/utils/system_info_utils.h b/engine/utils/system_info_utils.h index d13935295..16a9570b7 100644 --- a/engine/utils/system_info_utils.h +++ b/engine/utils/system_info_utils.h @@ -12,6 +12,7 @@ namespace system_info_utils { constexpr static auto kUnsupported{"Unsupported"}; constexpr static auto kCudaVersionRegex{R"(CUDA Version:\s*([\d\.]+))"}; +constexpr static auto kDriverVersionRegex{R"(Driver Version:\s*(\d+\.\d+))"}; constexpr static auto kGpuQueryCommand{ "nvidia-smi --query-gpu=index,memory.total,name,compute_cap " "--format=csv,noheader,nounits"}; @@ -177,6 +178,31 @@ inline bool IsNvidiaSmiAvailable() { #endif } +inline std::string GetDriverVersion() { + if (!IsNvidiaSmiAvailable()) { + LOG_INFO << "nvidia-smi is not available!"; + return ""; + } + try { + CommandExecutor cmd("nvidia-smi"); + auto output = cmd.execute(); + + const std::regex driver_version_reg(kDriverVersionRegex); + std::smatch match; + + if (std::regex_search(output, match, driver_version_reg)) { + LOG_INFO << "Gpu Driver Version: " << match[1].str(); + return match[1].str(); + } else { + LOG_ERROR << "Gpu Driver not found!"; + return ""; + } + } catch (const std::exception& e) { + LOG_ERROR << "Error: " << e.what(); + return ""; + } +} + inline std::string GetCudaVersion() { if (!IsNvidiaSmiAvailable()) { LOG_INFO << "nvidia-smi is not available!"; @@ -207,6 +233,9 @@ struct GpuInfo { std::string vram; std::string name; std::string arch; + // nvidia driver version. Haven't checked for AMD GPU. + std::optional driver_version; + std::optional cuda_driver_version; std::optional compute_cap; }; @@ -271,6 +300,10 @@ inline std::vector GetGpuInfoList() { std::vector gpuInfoList; try { + // TODO: improve by parsing both in one command execution + auto driver_version = GetDriverVersion(); + auto cuda_version = GetCudaVersion(); + CommandExecutor cmd(kGpuQueryCommand); auto output = cmd.execute(); @@ -285,6 +318,8 @@ inline std::vector GetGpuInfoList() { match[2].str(), // vram match[3].str(), // name GetGpuArch(match[3].str()), // arch + driver_version, // driver_version + cuda_version, // cuda_driver_version match[4].str() // compute_cap }; gpuInfoList.push_back(gpuInfo);