diff --git a/engine/commands/engine_init_cmd.cc b/engine/commands/engine_init_cmd.cc index b3ab8bae5..7b0153f69 100644 --- a/engine/commands/engine_init_cmd.cc +++ b/engine/commands/engine_init_cmd.cc @@ -75,7 +75,9 @@ bool EngineInitCmd::Exec() const { matched_variant = engine_matcher_utils::ValidateOnnx( variants, system_info.os, system_info.arch); } else if (engineName_ == "cortex.llamacpp") { - auto suitable_avx = engine_matcher_utils::GetSuitableAvxVariant(); + cortex::cpuid::CpuInfo cpu_info; + auto suitable_avx = + engine_matcher_utils::GetSuitableAvxVariant(cpu_info); matched_variant = engine_matcher_utils::Validate( variants, system_info.os, system_info.arch, suitable_avx, cuda_driver_version); diff --git a/engine/test/components/test_engine_matcher_utils.cc b/engine/test/components/test_engine_matcher_utils.cc new file mode 100644 index 000000000..d724c3fde --- /dev/null +++ b/engine/test/components/test_engine_matcher_utils.cc @@ -0,0 +1,137 @@ +#include +#include +#include "gtest/gtest.h" +#include "utils/engine_matcher_utils.h" + +class EngineMatcherUtilsTestSuite : public ::testing::Test { + protected: + const std::vector cortex_llamacpp_variants{ + "cortex.llamacpp-0.1.25-25.08.24-linux-amd64-avx-cuda-11-7.tar.gz", + "cortex.llamacpp-0.1.25-25.08.24-linux-amd64-avx-cuda-12-0.tar.gz", + "cortex.llamacpp-0.1.25-25.08.24-linux-amd64-avx.tar.gz", + "cortex.llamacpp-0.1.25-25.08.24-linux-amd64-avx2-cuda-11-7.tar.gz", + "cortex.llamacpp-0.1.25-25.08.24-linux-amd64-avx2-cuda-12-0.tar.gz", + "cortex.llamacpp-0.1.25-25.08.24-linux-amd64-avx2.tar.gz", + "cortex.llamacpp-0.1.25-25.08.24-linux-amd64-avx512-cuda-11-7.tar.gz", + "cortex.llamacpp-0.1.25-25.08.24-linux-amd64-avx512-cuda-12-0.tar.gz", + "cortex.llamacpp-0.1.25-25.08.24-linux-amd64-avx512.tar.gz", + "cortex.llamacpp-0.1.25-25.08.24-linux-amd64-noavx-cuda-11-7.tar.gz", + "cortex.llamacpp-0.1.25-25.08.24-linux-amd64-noavx-cuda-12-0.tar.gz", + "cortex.llamacpp-0.1.25-25.08.24-linux-amd64-noavx.tar.gz", + "cortex.llamacpp-0.1.25-25.08.24-linux-amd64-vulkan.tar.gz", + "cortex.llamacpp-0.1.25-25.08.24-mac-amd64.tar.gz", + "cortex.llamacpp-0.1.25-25.08.24-mac-arm64.tar.gz", + "cortex.llamacpp-0.1.25-25.08.24-windows-amd64-avx-cuda-11-7.tar.gz", + "cortex.llamacpp-0.1.25-25.08.24-windows-amd64-avx-cuda-12-0.tar.gz", + "cortex.llamacpp-0.1.25-25.08.24-windows-amd64-avx.tar.gz", + "cortex.llamacpp-0.1.25-25.08.24-windows-amd64-avx2-cuda-11-7.tar.gz", + "cortex.llamacpp-0.1.25-25.08.24-windows-amd64-avx2-cuda-12-0.tar.gz", + "cortex.llamacpp-0.1.25-25.08.24-windows-amd64-avx2.tar.gz", + "cortex.llamacpp-0.1.25-25.08.24-windows-amd64-avx512-cuda-11-7.tar.gz", + "cortex.llamacpp-0.1.25-25.08.24-windows-amd64-avx512-cuda-12-0.tar.gz", + "cortex.llamacpp-0.1.25-25.08.24-windows-amd64-avx512.tar.gz", + "cortex.llamacpp-0.1.25-25.08.24-windows-amd64-noavx-cuda-11-7.tar.gz", + "cortex.llamacpp-0.1.25-25.08.24-windows-amd64-noavx-cuda-12-0.tar.gz", + "cortex.llamacpp-0.1.25-25.08.24-windows-amd64-noavx.tar.gz", + "cortex.llamacpp-0.1.25-25.08.24-windows-amd64-vulkan.tar.gz", + }; + + const std::vector cortex_tensorrt_variants{ + "cortex.tensorrt-llm-0.0.9-linux-cuda-12-4.tar.gz", + "cortex.tensorrt-llm-0.0.9-windows-cuda-12-4.tar.gz"}; + + const std::vector cortex_onnx_variants{ + "cortex.onnx-0.1.7-windows-amd64.tar.gz"}; +}; + +TEST_F(EngineMatcherUtilsTestSuite, TestValidateOnnx) { + + { + auto expect_matched_variant = cortex_onnx_variants[0]; + auto result = engine_matcher_utils::ValidateOnnx(cortex_onnx_variants, + "windows", "amd64"); + + EXPECT_EQ(result, expect_matched_variant); + } + + { + // should return an empty variant because no variant matched + auto expect_matched_variant{""}; + auto windows_arm_result = engine_matcher_utils::ValidateOnnx( + cortex_onnx_variants, "windows", "arm"); + auto mac_arm64_result = engine_matcher_utils::ValidateOnnx( + cortex_onnx_variants, "mac", "arm64"); + + EXPECT_EQ(windows_arm_result, expect_matched_variant); + EXPECT_EQ(mac_arm64_result, expect_matched_variant); + } +} + +TEST_F(EngineMatcherUtilsTestSuite, TestValidateTensorrt) { + + { + auto windows_expect_matched_variant{cortex_tensorrt_variants[1]}; + auto linux_expect_matched_variant{cortex_tensorrt_variants[0]}; + auto windows{"windows"}; + auto linux{"linux"}; + auto cuda_version{"12.4"}; + auto windows_result = engine_matcher_utils::ValidateTensorrtLlm( + cortex_tensorrt_variants, windows, cuda_version); + auto linux_result = engine_matcher_utils::ValidateTensorrtLlm( + cortex_tensorrt_variants, linux, cuda_version); + + EXPECT_EQ(windows_result, windows_expect_matched_variant); + EXPECT_EQ(linux_result, linux_expect_matched_variant); + } + + { // macos is not supported + auto os = "mac"; + auto cuda_version{"12.4"}; + + auto result = engine_matcher_utils::ValidateTensorrtLlm( + cortex_tensorrt_variants, os, cuda_version); + EXPECT_EQ(result, ""); + } +} + +TEST_F(EngineMatcherUtilsTestSuite, TestValidate) { + { + auto os{"windows"}; + auto cpu_arch{"amd64"}; + auto suitable_avx{"avx2"}; + auto cuda_version{"12.4"}; + + auto variant = engine_matcher_utils::Validate( + cortex_llamacpp_variants, os, cpu_arch, suitable_avx, cuda_version); + + EXPECT_EQ( + variant, + "cortex.llamacpp-0.1.25-25.08.24-windows-amd64-avx2-cuda-12-0.tar.gz"); + } + + { + auto os{"mac"}; + auto cpu_arch{"amd64"}; + auto suitable_avx{""}; + auto cuda_version{""}; + + auto variant = engine_matcher_utils::Validate( + cortex_llamacpp_variants, os, cpu_arch, suitable_avx, cuda_version); + + EXPECT_EQ(variant, "cortex.llamacpp-0.1.25-25.08.24-mac-amd64.tar.gz"); + } + + { + auto os{"windows"}; + auto cpu_arch{"amd64"}; + auto suitable_avx{"avx2"}; + auto cuda_version{"10"}; + + auto variant = engine_matcher_utils::Validate( + cortex_llamacpp_variants, os, cpu_arch, suitable_avx, cuda_version); + + // fallback to no cuda version + EXPECT_EQ(variant, + "cortex.llamacpp-0.1.25-25.08.24-windows-amd64-avx2.tar.gz"); + } +} diff --git a/engine/test/components/test_system_info_utils.cc b/engine/test/components/test_system_info_utils.cc new file mode 100644 index 000000000..da38bde5d --- /dev/null +++ b/engine/test/components/test_system_info_utils.cc @@ -0,0 +1,90 @@ +#include +#include "gtest/gtest.h" + +class SystemInfoUtilsTestSuite : public ::testing::Test { + protected: + const std::string nvidia_smi_sample_output = R"( +Sun Aug 25 22:29:25 2024 ++-----------------------------------------------------------------------------------------+ +| NVIDIA-SMI 560.70 Driver Version: 560.70 CUDA Version: 12.6 | +|-----------------------------------------+------------------------+----------------------+ +| GPU Name Driver-Model | Bus-Id Disp.A | Volatile Uncorr. ECC | +| Fan Temp Perf Pwr:Usage/Cap | Memory-Usage | GPU-Util Compute M. | +| | | MIG M. | +|=========================================+========================+======================| +| 0 NVIDIA GeForce RTX 4090 WDDM | 00000000:01:00.0 Off | Off | +| 0% 24C P8 10W / 500W | 395MiB / 24564MiB | 19% Default | +| | | N/A | ++-----------------------------------------+------------------------+----------------------+ + ++-----------------------------------------------------------------------------------------+ +| Processes: | +| GPU GI CI PID Type Process name GPU Memory | +| ID ID Usage | +|=========================================================================================| +| 0 N/A N/A 3984 C+G ...5n1h2txyewy\ShellExperienceHost.exe N/A | +| 0 N/A N/A 7904 C+G ...ekyb3d8bbwe\PhoneExperienceHost.exe N/A | +| 0 N/A N/A 8240 C+G ...__8wekyb3d8bbwe\WindowsTerminal.exe N/A | +| 0 N/A N/A 8904 C+G C:\Windows\explorer.exe N/A | +| 0 N/A N/A 9304 C+G ...siveControlPanel\SystemSettings.exe N/A | +| 0 N/A N/A 9944 C+G ...nt.CBS_cw5n1h2txyewy\SearchHost.exe N/A | +| 0 N/A N/A 11140 C+G ...2txyewy\StartMenuExperienceHost.exe N/A | ++-----------------------------------------------------------------------------------------+ +)"; + + const std::string vulkan_sample_output = R"( +========== +VULKANINFO +========== + +Vulkan Instance Version: 1.3.280 + + +Instance Extensions: count = 19 +------------------------------- +VK_EXT_debug_report : extension revision 10 +VK_EXT_debug_utils : extension revision 2 +VK_EXT_direct_mode_display : extension revision 1 +VK_EXT_surface_maintenance1 : extension revision 1 +VK_EXT_swapchain_colorspace : extension revision 4 +VK_KHR_device_group_creation : extension revision 1 +VK_KHR_display : extension revision 23 +VK_KHR_external_fence_capabilities : extension revision 1 +VK_KHR_external_memory_capabilities : extension revision 1 +VK_KHR_external_semaphore_capabilities : extension revision 1 +VK_KHR_get_display_properties2 : extension revision 1 +VK_KHR_get_physical_device_properties2 : extension revision 2 +VK_KHR_get_surface_capabilities2 : extension revision 1 +VK_KHR_portability_enumeration : extension revision 1 +VK_KHR_surface : extension revision 25 +VK_KHR_surface_protected_capabilities : extension revision 1 +VK_KHR_win32_surface : extension revision 6 +VK_LUNARG_direct_driver_loading : extension revision 1 +VK_NV_external_memory_capabilities : extension revision 1 + +Instance Layers: count = 1 +-------------------------- +VK_LAYER_NV_optimus NVIDIA Optimus layer 1.3.280 version 1 + +Devices: +======== +GPU0: + apiVersion = 1.3.280 + driverVersion = 560.70.0.0 + vendorID = 0x10de + deviceID = 0x2684 + deviceType = PHYSICAL_DEVICE_TYPE_DISCRETE_GPU + deviceName = NVIDIA GeForce RTX 4090 + driverID = DRIVER_ID_NVIDIA_PROPRIETARY + driverName = NVIDIA + driverInfo = 560.70 + conformanceVersion = 1.3.8.2 + deviceUUID = 11deafdf-9f15-e857-2a87-8acc153fc9f7 + driverUUID = 10f251d9-d3c0-5001-bf67-24bb06423040 +)"; + + const std::string gpu_query_list_sample_output = R"( +0, 46068, NVIDIA RTX A6000, 8.6 +1, 46068, NVIDIA RTX A6000, 8.6 +)"; +}; diff --git a/engine/utils/engine_matcher_utils.h b/engine/utils/engine_matcher_utils.h index 77baf1f72..287304f02 100644 --- a/engine/utils/engine_matcher_utils.h +++ b/engine/utils/engine_matcher_utils.h @@ -8,49 +8,10 @@ #include "utils/logging_utils.h" namespace engine_matcher_utils { -// for testing purpose -const std::vector cortex_llamacpp_variants{ - "cortex.llamacpp-0.1.25-25.08.24-linux-amd64-avx-cuda-11-7.tar.gz", - "cortex.llamacpp-0.1.25-25.08.24-linux-amd64-avx-cuda-12-0.tar.gz", - "cortex.llamacpp-0.1.25-25.08.24-linux-amd64-avx.tar.gz", - "cortex.llamacpp-0.1.25-25.08.24-linux-amd64-avx2-cuda-11-7.tar.gz", - "cortex.llamacpp-0.1.25-25.08.24-linux-amd64-avx2-cuda-12-0.tar.gz", - "cortex.llamacpp-0.1.25-25.08.24-linux-amd64-avx2.tar.gz", - "cortex.llamacpp-0.1.25-25.08.24-linux-amd64-avx512-cuda-11-7.tar.gz", - "cortex.llamacpp-0.1.25-25.08.24-linux-amd64-avx512-cuda-12-0.tar.gz", - "cortex.llamacpp-0.1.25-25.08.24-linux-amd64-avx512.tar.gz", - "cortex.llamacpp-0.1.25-25.08.24-linux-amd64-noavx-cuda-11-7.tar.gz", - "cortex.llamacpp-0.1.25-25.08.24-linux-amd64-noavx-cuda-12-0.tar.gz", - "cortex.llamacpp-0.1.25-25.08.24-linux-amd64-noavx.tar.gz", - "cortex.llamacpp-0.1.25-25.08.24-linux-amd64-vulkan.tar.gz", - "cortex.llamacpp-0.1.25-25.08.24-mac-amd64.tar.gz", - "cortex.llamacpp-0.1.25-25.08.24-mac-arm64.tar.gz", - "cortex.llamacpp-0.1.25-25.08.24-windows-amd64-avx-cuda-11-7.tar.gz", - "cortex.llamacpp-0.1.25-25.08.24-windows-amd64-avx-cuda-12-0.tar.gz", - "cortex.llamacpp-0.1.25-25.08.24-windows-amd64-avx.tar.gz", - "cortex.llamacpp-0.1.25-25.08.24-windows-amd64-avx2-cuda-11-7.tar.gz", - "cortex.llamacpp-0.1.25-25.08.24-windows-amd64-avx2-cuda-12-0.tar.gz", - "cortex.llamacpp-0.1.25-25.08.24-windows-amd64-avx2.tar.gz", - "cortex.llamacpp-0.1.25-25.08.24-windows-amd64-avx512-cuda-11-7.tar.gz", - "cortex.llamacpp-0.1.25-25.08.24-windows-amd64-avx512-cuda-12-0.tar.gz", - "cortex.llamacpp-0.1.25-25.08.24-windows-amd64-avx512.tar.gz", - "cortex.llamacpp-0.1.25-25.08.24-windows-amd64-noavx-cuda-11-7.tar.gz", - "cortex.llamacpp-0.1.25-25.08.24-windows-amd64-noavx-cuda-12-0.tar.gz", - "cortex.llamacpp-0.1.25-25.08.24-windows-amd64-noavx.tar.gz", - "cortex.llamacpp-0.1.25-25.08.24-windows-amd64-vulkan.tar.gz", -}; -const std::vector cortex_onnx_variants{ - "cortex.onnx-0.1.7-windows-amd64.tar.gz"}; - -const std::vector cortex_tensorrt_variants{ - "cortex.tensorrt-llm-0.0.9-linux-cuda-12-4.tar.gz", - "cortex.tensorrt-llm-0.0.9-windows-cuda-12-4.tar.gz"}; - -inline std::string GetSuitableAvxVariant() { - cortex::cpuid::CpuInfo cpu_info; - +inline std::string GetSuitableAvxVariant(cortex::cpuid::CpuInfo& cpu_info) { CTL_INF("GetSuitableAvxVariant:" << "\n" << cpu_info.to_string()); + // prioritize avx512 > avx2 > avx > noavx if (cpu_info.has_avx512_f()) return "avx512"; if (cpu_info.has_avx2()) @@ -151,10 +112,8 @@ inline std::string Validate(const std::vector& variants, const std::string& os, const std::string& cpu_arch, const std::string& suitable_avx, const std::string& cuda_version) { - - // Early return if the OS is unsupported + // Early return if the OS is not supported if (os != "mac" && os != "windows" && os != "linux") { - // TODO: throw is better return ""; } diff --git a/engine/utils/http_util.h b/engine/utils/http_util.h index 471ef3b27..422d2c543 100644 --- a/engine/utils/http_util.h +++ b/engine/utils/http_util.h @@ -1,6 +1,7 @@ #pragma once #include +#include "utils/cortex_utils.h" using namespace drogon; @@ -21,4 +22,4 @@ inline bool HasFieldInReq(const HttpRequestPtr& req, return true; } -} // namespace http_util \ No newline at end of file +} // namespace http_util diff --git a/engine/utils/system_info_utils.h b/engine/utils/system_info_utils.h index ede7915d2..9cdcc8f05 100644 --- a/engine/utils/system_info_utils.h +++ b/engine/utils/system_info_utils.h @@ -79,91 +79,6 @@ inline SystemInfo GetSystemInfo() { return SystemInfo{os.str(), arch.str()}; } -constexpr auto vulkan_sample_output = R"( -========== -VULKANINFO -========== - -Vulkan Instance Version: 1.3.280 - - -Instance Extensions: count = 19 -------------------------------- -VK_EXT_debug_report : extension revision 10 -VK_EXT_debug_utils : extension revision 2 -VK_EXT_direct_mode_display : extension revision 1 -VK_EXT_surface_maintenance1 : extension revision 1 -VK_EXT_swapchain_colorspace : extension revision 4 -VK_KHR_device_group_creation : extension revision 1 -VK_KHR_display : extension revision 23 -VK_KHR_external_fence_capabilities : extension revision 1 -VK_KHR_external_memory_capabilities : extension revision 1 -VK_KHR_external_semaphore_capabilities : extension revision 1 -VK_KHR_get_display_properties2 : extension revision 1 -VK_KHR_get_physical_device_properties2 : extension revision 2 -VK_KHR_get_surface_capabilities2 : extension revision 1 -VK_KHR_portability_enumeration : extension revision 1 -VK_KHR_surface : extension revision 25 -VK_KHR_surface_protected_capabilities : extension revision 1 -VK_KHR_win32_surface : extension revision 6 -VK_LUNARG_direct_driver_loading : extension revision 1 -VK_NV_external_memory_capabilities : extension revision 1 - -Instance Layers: count = 1 --------------------------- -VK_LAYER_NV_optimus NVIDIA Optimus layer 1.3.280 version 1 - -Devices: -======== -GPU0: - apiVersion = 1.3.280 - driverVersion = 560.70.0.0 - vendorID = 0x10de - deviceID = 0x2684 - deviceType = PHYSICAL_DEVICE_TYPE_DISCRETE_GPU - deviceName = NVIDIA GeForce RTX 4090 - driverID = DRIVER_ID_NVIDIA_PROPRIETARY - driverName = NVIDIA - driverInfo = 560.70 - conformanceVersion = 1.3.8.2 - deviceUUID = 11deafdf-9f15-e857-2a87-8acc153fc9f7 - driverUUID = 10f251d9-d3c0-5001-bf67-24bb06423040 -)"; - -constexpr auto gpu_query_list_sample_output = R"( -0, 46068, NVIDIA RTX A6000, 8.6 -1, 46068, NVIDIA RTX A6000, 8.6 -)"; - -constexpr auto nvidia_smi_sample_output = R"( -Sun Aug 25 22:29:25 2024 -+-----------------------------------------------------------------------------------------+ -| NVIDIA-SMI 560.70 Driver Version: 560.70 CUDA Version: 12.6 | -|-----------------------------------------+------------------------+----------------------+ -| GPU Name Driver-Model | Bus-Id Disp.A | Volatile Uncorr. ECC | -| Fan Temp Perf Pwr:Usage/Cap | Memory-Usage | GPU-Util Compute M. | -| | | MIG M. | -|=========================================+========================+======================| -| 0 NVIDIA GeForce RTX 4090 WDDM | 00000000:01:00.0 Off | Off | -| 0% 24C P8 10W / 500W | 395MiB / 24564MiB | 19% Default | -| | | N/A | -+-----------------------------------------+------------------------+----------------------+ - -+-----------------------------------------------------------------------------------------+ -| Processes: | -| GPU GI CI PID Type Process name GPU Memory | -| ID ID Usage | -|=========================================================================================| -| 0 N/A N/A 3984 C+G ...5n1h2txyewy\ShellExperienceHost.exe N/A | -| 0 N/A N/A 7904 C+G ...ekyb3d8bbwe\PhoneExperienceHost.exe N/A | -| 0 N/A N/A 8240 C+G ...__8wekyb3d8bbwe\WindowsTerminal.exe N/A | -| 0 N/A N/A 8904 C+G C:\Windows\explorer.exe N/A | -| 0 N/A N/A 9304 C+G ...siveControlPanel\SystemSettings.exe N/A | -| 0 N/A N/A 9944 C+G ...nt.CBS_cw5n1h2txyewy\SearchHost.exe N/A | -| 0 N/A N/A 11140 C+G ...2txyewy\StartMenuExperienceHost.exe N/A | -+-----------------------------------------------------------------------------------------+ -)"; - inline bool IsNvidiaSmiAvailable() { #ifdef _WIN32 // Check if nvidia-smi.exe exists in the PATH on Windows diff --git a/engine/vcpkg.json b/engine/vcpkg.json index 74f89965a..e7a74d2dc 100644 --- a/engine/vcpkg.json +++ b/engine/vcpkg.json @@ -1,19 +1,19 @@ { - "dependencies": [ - "cli11", - { - "name": "cpp-httplib", - "features": [ - "openssl" - ] - }, - "drogon", - "jinja2cpp", - "jsoncpp", - "minizip", - "nlohmann-json", - "yaml-cpp", - "libarchive", - "tabulate" - ] - } \ No newline at end of file + "dependencies": [ + "gtest", + "cli11", + { + "name": "cpp-httplib", + "features": ["openssl"] + }, + "drogon", + "jinja2cpp", + "jsoncpp", + "minizip", + "nlohmann-json", + "yaml-cpp", + "libarchive", + "tabulate" + ] +} +