From 045762c14ffc8638c064aa500aa4febcf948f32c Mon Sep 17 00:00:00 2001 From: James Date: Mon, 2 Dec 2024 11:20:39 +0700 Subject: [PATCH 1/2] fix: floating point for models endpoint --- engine/controllers/models.cc | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/engine/controllers/models.cc b/engine/controllers/models.cc index c51bb3b77..2e60b4779 100644 --- a/engine/controllers/models.cc +++ b/engine/controllers/models.cc @@ -12,6 +12,14 @@ #include "utils/logging_utils.h" #include "utils/string_utils.h" +namespace { +std::string ToJsonStringWithPrecision(Json::Value& input, int precision = 2) { + Json::StreamWriterBuilder wbuilder; + wbuilder.settings_["precision"] = 2; + return Json::writeString(wbuilder, input); +} +} // namespace + void Models::PullModel(const HttpRequestPtr& req, std::function&& callback) { if (!http_util::HasFieldInReq(req, callback, "model")) { @@ -178,9 +186,11 @@ void Models::ListModel( << model_entry.path_to_model_yaml << ", error: " << e.what(); } } + ret["data"] = data; ret["result"] = "OK"; - auto resp = cortex_utils::CreateCortexHttpJsonResponse(ret); + auto ret_str = ToJsonStringWithPrecision(ret); + auto resp = cortex_utils::CreateCortexHttpTextAsJsonResponse(ret_str); resp->setStatusCode(k200OK); callback(resp); } else { From f40a3770b533389b2b7bb4b5d14aa2b658bb7395 Mon Sep 17 00:00:00 2001 From: vansangpfiev Date: Mon, 2 Dec 2024 12:58:21 +0700 Subject: [PATCH 2/2] fix: get driver version and cuda version at a single command (#1754) Co-authored-by: vansangpfiev --- engine/cli/commands/engine_install_cmd.cc | 6 ++- engine/cli/commands/engine_install_cmd.h | 3 +- engine/cli/commands/engine_update_cmd.cc | 3 +- engine/cli/commands/server_start_cmd.cc | 2 +- engine/services/engine_service.h | 3 +- engine/services/hardware_service.cc | 4 +- engine/utils/hardware/gpu_info.h | 3 +- engine/utils/system_info_utils.h | 54 +++++++++-------------- 8 files changed, 36 insertions(+), 42 deletions(-) diff --git a/engine/cli/commands/engine_install_cmd.cc b/engine/cli/commands/engine_install_cmd.cc index 477e38ee2..21cd9f042 100644 --- a/engine/cli/commands/engine_install_cmd.cc +++ b/engine/cli/commands/engine_install_cmd.cc @@ -37,7 +37,8 @@ bool EngineInstallCmd::Exec(const std::string& engine, dp.Connect(host_, port_); // engine can be small, so need to start ws first auto dp_res = std::async(std::launch::deferred, [&dp] { - bool need_cuda_download = !system_info_utils::GetCudaVersion().empty(); + bool need_cuda_download = + !system_info_utils::GetDriverAndCudaVersion().second.empty(); if (need_cuda_download) { return dp.Handle({DownloadType::Engine, DownloadType::CudaToolkit}); } else { @@ -149,7 +150,8 @@ bool EngineInstallCmd::Exec(const std::string& engine, dp.Connect(host_, port_); // engine can be small, so need to start ws first auto dp_res = std::async(std::launch::deferred, [&dp] { - bool need_cuda_download = !system_info_utils::GetCudaVersion().empty(); + bool need_cuda_download = + !system_info_utils::GetDriverAndCudaVersion().second.empty(); if (need_cuda_download) { return dp.Handle({DownloadType::Engine, DownloadType::CudaToolkit}); } else { diff --git a/engine/cli/commands/engine_install_cmd.h b/engine/cli/commands/engine_install_cmd.h index deb9197e1..d50776dc4 100644 --- a/engine/cli/commands/engine_install_cmd.h +++ b/engine/cli/commands/engine_install_cmd.h @@ -14,7 +14,8 @@ class EngineInstallCmd { port_(port), show_menu_(show_menu), hw_inf_{.sys_inf = system_info_utils::GetSystemInfo(), - .cuda_driver_version = system_info_utils::GetCudaVersion()} {}; + .cuda_driver_version = + system_info_utils::GetDriverAndCudaVersion().second} {}; bool Exec(const std::string& engine, const std::string& version = "latest", const std::string& src = ""); diff --git a/engine/cli/commands/engine_update_cmd.cc b/engine/cli/commands/engine_update_cmd.cc index 9717ddb15..a86106ed2 100644 --- a/engine/cli/commands/engine_update_cmd.cc +++ b/engine/cli/commands/engine_update_cmd.cc @@ -25,7 +25,8 @@ bool EngineUpdateCmd::Exec(const std::string& host, int port, dp.Connect(host, port); // engine can be small, so need to start ws first auto dp_res = std::async(std::launch::deferred, [&dp] { - bool need_cuda_download = !system_info_utils::GetCudaVersion().empty(); + bool need_cuda_download = + !system_info_utils::GetDriverAndCudaVersion().second.empty(); if (need_cuda_download) { return dp.Handle({DownloadType::Engine, DownloadType::CudaToolkit}); } else { diff --git a/engine/cli/commands/server_start_cmd.cc b/engine/cli/commands/server_start_cmd.cc index 5ba972463..9e910220f 100644 --- a/engine/cli/commands/server_start_cmd.cc +++ b/engine/cli/commands/server_start_cmd.cc @@ -8,7 +8,7 @@ namespace commands { namespace { bool TryConnectToServer(const std::string& host, int port) { - constexpr const auto kMaxRetry = 3u; + constexpr const auto kMaxRetry = 4u; auto count = 0u; // Check if server is started while (true) { diff --git a/engine/services/engine_service.h b/engine/services/engine_service.h index dee8a530b..78dc8773e 100644 --- a/engine/services/engine_service.h +++ b/engine/services/engine_service.h @@ -60,7 +60,8 @@ class EngineService : public EngineServiceI { explicit EngineService(std::shared_ptr download_service) : download_service_{download_service}, hw_inf_{.sys_inf = system_info_utils::GetSystemInfo(), - .cuda_driver_version = system_info_utils::GetCudaVersion()} {} + .cuda_driver_version = + system_info_utils::GetDriverAndCudaVersion().second} {} std::vector GetEngineInfoList() const; diff --git a/engine/services/hardware_service.cc b/engine/services/hardware_service.cc index 16ae234b4..a6ceb556f 100644 --- a/engine/services/hardware_service.cc +++ b/engine/services/hardware_service.cc @@ -16,7 +16,7 @@ namespace services { namespace { bool TryConnectToServer(const std::string& host, int port) { - constexpr const auto kMaxRetry = 3u; + constexpr const auto kMaxRetry = 4u; auto count = 0u; // Check if server is started while (true) { @@ -292,7 +292,7 @@ void HardwareService::UpdateHardwareInfos() { } #if defined(_WIN32) || defined(_WIN64) || defined(__linux__) - if (system_info_utils::IsNvidiaSmiAvailable()) { + if (!gpus.empty()) { const char* value = std::getenv("CUDA_VISIBLE_DEVICES"); if (value) { LOG_INFO << "CUDA_VISIBLE_DEVICES: " << value; diff --git a/engine/utils/hardware/gpu_info.h b/engine/utils/hardware/gpu_info.h index bbd4a49d6..1e10589a9 100644 --- a/engine/utils/hardware/gpu_info.h +++ b/engine/utils/hardware/gpu_info.h @@ -11,12 +11,11 @@ inline std::vector GetGPUInfo() { // Only support for nvidia for now // auto gpus = hwinfo::getAllGPUs(); auto nvidia_gpus = system_info_utils::GetGpuInfoList(); - auto cuda_version = system_info_utils::GetCudaVersion(); for (auto& n : nvidia_gpus) { res.emplace_back( GPU{.id = n.id, .name = n.name, - .version = cuda_version, + .version = nvidia_gpus[0].cuda_driver_version.value_or("unknown"), .add_info = NvidiaAddInfo{ .driver_version = n.driver_version.value_or("unknown"), diff --git a/engine/utils/system_info_utils.h b/engine/utils/system_info_utils.h index 013069699..f2fab10cb 100644 --- a/engine/utils/system_info_utils.h +++ b/engine/utils/system_info_utils.h @@ -19,7 +19,8 @@ constexpr static auto kUnsupported{"Unsupported"}; constexpr static auto kCudaVersionRegex{R"(CUDA Version:\s*([\d\.]+))"}; constexpr static auto kDriverVersionRegex{R"(Driver Version:\s*(\d+\.\d+))"}; constexpr static auto kGpuQueryCommand{ - "nvidia-smi --query-gpu=index,memory.total,memory.free,name,compute_cap,uuid " + "nvidia-smi " + "--query-gpu=index,memory.total,memory.free,name,compute_cap,uuid " "--format=csv,noheader,nounits"}; constexpr static auto kGpuInfoRegex{ R"((\d+),\s*(\d+),\s*(\d+),\s*([^,]+),\s*([\d\.]+),\s*([^\n,]+))"}; @@ -100,53 +101,42 @@ inline bool IsNvidiaSmiAvailable() { #endif } -inline std::string GetDriverVersion() { +inline std::pair GetDriverAndCudaVersion() { if (!IsNvidiaSmiAvailable()) { CTL_INF("nvidia-smi is not available!"); - return ""; + return {}; } try { + std::string driver_version; + std::string cuda_version; CommandExecutor cmd("nvidia-smi"); auto output = cmd.execute(); const std::regex driver_version_reg(kDriverVersionRegex); - std::smatch match; + std::smatch driver_match; - if (std::regex_search(output, match, driver_version_reg)) { - LOG_INFO << "Gpu Driver Version: " << match[1].str(); - return match[1].str(); + if (std::regex_search(output, driver_match, driver_version_reg)) { + LOG_INFO << "Gpu Driver Version: " << driver_match[1].str(); + driver_version = driver_match[1].str(); } else { LOG_ERROR << "Gpu Driver not found!"; - return ""; + return {}; } - } catch (const std::exception& e) { - LOG_ERROR << "Error: " << e.what(); - return ""; - } -} - -inline std::string GetCudaVersion() { - if (!IsNvidiaSmiAvailable()) { - CTL_INF("nvidia-smi is not available!"); - return ""; - } - try { - CommandExecutor cmd("nvidia-smi"); - auto output = cmd.execute(); const std::regex cuda_version_reg(kCudaVersionRegex); - std::smatch match; + std::smatch cuda_match; - if (std::regex_search(output, match, cuda_version_reg)) { - LOG_INFO << "CUDA Version: " << match[1].str(); - return match[1].str(); + if (std::regex_search(output, cuda_match, cuda_version_reg)) { + LOG_INFO << "CUDA Version: " << cuda_match[1].str(); + cuda_version = cuda_match[1].str(); } else { LOG_ERROR << "CUDA Version not found!"; - return ""; + return {}; } + return std::pair(driver_version, cuda_version); } catch (const std::exception& e) { LOG_ERROR << "Error: " << e.what(); - return ""; + return {}; } } @@ -227,9 +217,9 @@ inline std::vector GetGpuInfoList() { if (!IsNvidiaSmiAvailable()) return gpuInfoList; try { - // TODO: improve by parsing both in one command execution - auto driver_version = GetDriverVersion(); - auto cuda_version = GetCudaVersion(); + auto [driver_version, cuda_version] = GetDriverAndCudaVersion(); + if (driver_version.empty() || cuda_version.empty()) + return gpuInfoList; CommandExecutor cmd(kGpuQueryCommand); auto output = cmd.execute(); @@ -249,7 +239,7 @@ inline std::vector GetGpuInfoList() { driver_version, // driver_version cuda_version, // cuda_driver_version match[5].str(), // compute_cap - match[6].str() // uuid + match[6].str() // uuid }; gpuInfoList.push_back(gpuInfo); search_start = match.suffix().first;