From 50abbea0675b01c1959e96e32e85404a8ba03516 Mon Sep 17 00:00:00 2001 From: vansangpfiev Date: Wed, 29 May 2024 18:54:16 +0700 Subject: [PATCH] feat: cpu instruction set detection (#570) --- cortex-cpp/CMakeLists.txt | 21 +- cortex-cpp/Makefile | 12 +- cortex-cpp/controllers/server.cc | 11 + cortex-cpp/utils/cpuid/cpu_info.cc | 190 ++++++++++++++++++ cortex-cpp/utils/cpuid/cpu_info.h | 131 ++++++++++++ cortex-cpp/utils/cpuid/cpu_validation.cc | 60 ++++++ cortex-cpp/utils/cpuid/cpu_validation.h | 8 + cortex-cpp/utils/cpuid/detail/cpu_info_impl.h | 74 +++++++ .../utils/cpuid/detail/extract_x86_flags.h | 51 +++++ cortex-cpp/utils/cpuid/detail/init_gcc_x86.h | 65 ++++++ .../utils/cpuid/detail/init_ios_clang_arm.h | 23 +++ .../utils/cpuid/detail/init_linux_gcc_arm.h | 52 +++++ cortex-cpp/utils/cpuid/detail/init_msvc_arm.h | 19 ++ cortex-cpp/utils/cpuid/detail/init_msvc_x86.h | 43 ++++ cortex-cpp/utils/cpuid/detail/init_unknown.h | 13 ++ cortex-cpp/utils/cpuid/platform.h | 157 +++++++++++++++ 16 files changed, 923 insertions(+), 7 deletions(-) create mode 100644 cortex-cpp/utils/cpuid/cpu_info.cc create mode 100644 cortex-cpp/utils/cpuid/cpu_info.h create mode 100644 cortex-cpp/utils/cpuid/cpu_validation.cc create mode 100644 cortex-cpp/utils/cpuid/cpu_validation.h create mode 100644 cortex-cpp/utils/cpuid/detail/cpu_info_impl.h create mode 100644 cortex-cpp/utils/cpuid/detail/extract_x86_flags.h create mode 100644 cortex-cpp/utils/cpuid/detail/init_gcc_x86.h create mode 100644 cortex-cpp/utils/cpuid/detail/init_ios_clang_arm.h create mode 100644 cortex-cpp/utils/cpuid/detail/init_linux_gcc_arm.h create mode 100644 cortex-cpp/utils/cpuid/detail/init_msvc_arm.h create mode 100644 cortex-cpp/utils/cpuid/detail/init_msvc_x86.h create mode 100644 cortex-cpp/utils/cpuid/detail/init_unknown.h create mode 100644 cortex-cpp/utils/cpuid/platform.h diff --git a/cortex-cpp/CMakeLists.txt b/cortex-cpp/CMakeLists.txt index 4e2092afe..7351e38f5 100644 --- a/cortex-cpp/CMakeLists.txt +++ b/cortex-cpp/CMakeLists.txt @@ -59,9 +59,28 @@ endif() add_compile_definitions(CORTEX_CPP_VERSION="${CORTEX_CPP_VERSION}") +if(LLAMA_CUDA) + add_compile_definitions(CORTEX_CUDA) +endif() + +if(LLAMA_AVX512) + add_compile_definitions(CORTEX_AVX512) +endif() + +if(LLAMA_AVX2) + add_compile_definitions(CORTEX_AVX2) +endif() + +if(LLAMA_VULKAN) + add_compile_definitions(CORTEX_VULKAN) +endif() + add_subdirectory(test) -add_executable(${PROJECT_NAME} main.cc) +add_executable(${PROJECT_NAME} main.cc + ${CMAKE_CURRENT_SOURCE_DIR}/utils/cpuid/cpu_info.cc + ${CMAKE_CURRENT_SOURCE_DIR}/utils/cpuid/cpu_validation.cc +) # ############################################################################## # If you include the drogon source code locally in your project, use this method diff --git a/cortex-cpp/Makefile b/cortex-cpp/Makefile index 20cd4da7f..185675ad4 100644 --- a/cortex-cpp/Makefile +++ b/cortex-cpp/Makefile @@ -39,12 +39,12 @@ endif pre-package: ifeq ($(OS),Windows_NT) - @powershell -Command "mkdir -p cortex-cpp\engines\cortex.llamacpp\; cp -r build\engines\cortex.llamacpp\engine.dll cortex-cpp\engines\cortex.llamacpp\;" - @powershell -Command "cp -r build\Release\cortex-cpp.exe .\cortex-cpp\;" - @powershell -Command "cp -r build-deps\_install\bin\zlib.dll .\cortex-cpp\;" - @powershell -Command "cp -r ..\.github\patches\windows\msvcp140.dll .\cortex-cpp\;" - @powershell -Command "cp -r ..\.github\patches\windows\vcruntime140_1.dll .\cortex-cpp\;" - @powershell -Command "cp -r ..\.github\patches\windows\vcruntime140.dll .\cortex-cpp\;" + @powershell -Command "mkdir -p cortex-cpp\engines\cortex.llamacpp\; cp build\engines\cortex.llamacpp\engine.dll cortex-cpp\engines\cortex.llamacpp\;" + @powershell -Command "cp build\Release\cortex-cpp.exe .\cortex-cpp\;" + @powershell -Command "cp build-deps\_install\bin\zlib.dll .\cortex-cpp\;" + @powershell -Command "cp ..\.github\patches\windows\msvcp140.dll .\cortex-cpp\;" + @powershell -Command "cp ..\.github\patches\windows\vcruntime140_1.dll .\cortex-cpp\;" + @powershell -Command "cp ..\.github\patches\windows\vcruntime140.dll .\cortex-cpp\;" else ifeq ($(shell uname -s),Linux) @mkdir -p cortex-cpp/engines/cortex.llamacpp; \ cp build/engines/cortex.llamacpp/libengine.so cortex-cpp/engines/cortex.llamacpp/; \ diff --git a/cortex-cpp/controllers/server.cc b/cortex-cpp/controllers/server.cc index 6f703aac4..8bdab4757 100644 --- a/cortex-cpp/controllers/server.cc +++ b/cortex-cpp/controllers/server.cc @@ -6,6 +6,8 @@ #include "trantor/utils/Logger.h" #include "utils/cortex_utils.h" +#include "utils/cpuid/cpu_info.h" +#include "utils/cpuid/cpu_validation.h" #include "utils/logging_utils.h" using namespace inferences; @@ -265,6 +267,15 @@ void server::LoadModel(const HttpRequestPtr& req, }; try { + if (engine_type == kLlamaEngine) { + cortex::cpuid::CpuInfo cpu_info; + LOG_INFO << "CPU instruction set: " << cpu_info.to_string(); + if (auto [res, err] = cortex::cpuid::llamacpp::IsValidInstructions(); + !res) { + LOG_WARN << err; + } + } + std::string abs_path = cortex_utils::GetCurrentPath() + get_engine_path(engine_type); engines_[engine_type].dl = diff --git a/cortex-cpp/utils/cpuid/cpu_info.cc b/cortex-cpp/utils/cpuid/cpu_info.cc new file mode 100644 index 000000000..538221536 --- /dev/null +++ b/cortex-cpp/utils/cpuid/cpu_info.cc @@ -0,0 +1,190 @@ +// Copyright (c) 2013 Steinwurf ApS +// All Rights Reserved +// Inspired by https://github.com/steinwurf/cpuid +#include "platform.h" + +#include "cpu_info.h" +#include "detail/cpu_info_impl.h" + +#if defined(PLATFORM_GCC_COMPATIBLE_X86) +#include "detail/init_gcc_x86.h" +#elif defined(PLATFORM_MSVC_X86) && !defined(PLATFORM_WINDOWS_PHONE) +#include "detail/init_msvc_x86.h" +#elif defined(PLATFORM_MSVC_ARM) +#include "detail/init_msvc_arm.h" +#elif defined(PLATFORM_CLANG_ARM) && defined(PLATFORM_IOS) +#include "detail/init_ios_clang_arm.h" +#elif defined(PLATFORM_GCC_COMPATIBLE_ARM) && defined(PLATFORM_LINUX) +#include "detail/init_linux_gcc_arm.h" +#else +#include "detail/init_unknown.h" +#endif + +namespace cortex::cpuid { + +CpuInfo::CpuInfo() : impl(new Impl()) { + init_cpuinfo(*impl); +} + +CpuInfo::~CpuInfo() {} + +// x86 member functions +bool CpuInfo::has_fpu() const { + return impl->has_fpu; +} + +bool CpuInfo::has_mmx() const { + return impl->has_mmx; +} + +bool CpuInfo::has_sse() const { + return impl->has_sse; +} + +bool CpuInfo::has_sse2() const { + return impl->has_sse2; +} + +bool CpuInfo::has_sse3() const { + return impl->has_sse3; +} + +bool CpuInfo::has_ssse3() const { + return impl->has_ssse3; +} + +bool CpuInfo::has_sse4_1() const { + return impl->has_sse4_1; +} + +bool CpuInfo::has_sse4_2() const { + return impl->has_sse4_2; +} + +bool CpuInfo::has_pclmulqdq() const { + return impl->has_pclmulqdq; +} + +bool CpuInfo::has_avx() const { + return impl->has_avx; +} + +bool CpuInfo::has_avx2() const { + return impl->has_avx2; +} + +bool CpuInfo::has_avx512_f() const { + return impl->has_avx512_f; +} + +bool CpuInfo::has_avx512_dq() const { + return impl->has_avx512_dq; +} + +bool CpuInfo::has_avx512_ifma() const { + return impl->has_avx512_ifma; +} + +bool CpuInfo::has_avx512_pf() const { + return impl->has_avx512_pf; +} + +bool CpuInfo::has_avx512_er() const { + return impl->has_avx512_er; +} + +bool CpuInfo::has_avx512_cd() const { + return impl->has_avx512_cd; +} + +bool CpuInfo::has_avx512_bw() const { + return impl->has_avx512_bw; +} + +bool CpuInfo::has_avx512_vl() const { + return impl->has_avx512_vl; +} + +bool CpuInfo::has_avx512_vbmi() const { + return impl->has_avx512_vbmi; +} + +bool CpuInfo::has_avx512_vbmi2() const { + return impl->has_avx512_vbmi2; +} + +bool CpuInfo::has_avx512_vnni() const { + return impl->has_avx512_vnni; +} + +bool CpuInfo::has_avx512_bitalg() const { + return impl->has_avx512_bitalg; +} + +bool CpuInfo::has_avx512_vpopcntdq() const { + return impl->has_avx512_vpopcntdq; +} + +bool CpuInfo::has_avx512_4vnniw() const { + return impl->has_avx512_4vnniw; +} + +bool CpuInfo::has_avx512_4fmaps() const { + return impl->has_avx512_4fmaps; +} + +bool CpuInfo::has_avx512_vp2intersect() const { + return impl->has_avx512_vp2intersect; +} + +bool CpuInfo::has_f16c() const { + return impl->has_f16c; +} + +bool CpuInfo::has_aes() const { + return impl->has_aes; +} + +// ARM member functions +bool CpuInfo::has_neon() const { + return impl->has_neon; +} + +std::string CpuInfo::to_string() { + std::string s; + auto get = [](bool flag) -> std::string { + return flag ? "1" : "0"; + }; + s += "fpu = " + get(impl->has_fpu) + "| "; + s += "mmx = " + get(impl->has_mmx) + "| "; + s += "sse = " + get(impl->has_sse) + "| "; + s += "sse2 = " + get(impl->has_sse2) + "| "; + s += "sse3 = " + get(impl->has_sse3) + "| "; + s += "ssse3 = " + get(impl->has_ssse3) + "| "; + s += "sse4_1 = " + get(impl->has_sse4_1) + "| "; + s += "sse4_2 = " + get(impl->has_sse4_2) + "| "; + s += "pclmulqdq = " + get(impl->has_pclmulqdq) + "| "; + s += "avx = " + get(impl->has_avx) + "| "; + s += "avx2 = " + get(impl->has_avx2) + "| "; + s += "avx512_f = " + get(impl->has_avx512_f) + "| "; + s += "avx512_dq = " + get(impl->has_avx512_dq) + "| "; + s += "avx512_ifma = " + get(impl->has_avx512_ifma) + "| "; + s += "avx512_pf = " + get(impl->has_avx512_pf) + "| "; + s += "avx512_er = " + get(impl->has_avx512_er) + "| "; + s += "avx512_cd = " + get(impl->has_avx512_cd) + "| "; + s += "avx512_bw = " + get(impl->has_avx512_bw) + "| "; + s += "has_avx512_vl = " + get(impl->has_avx512_vl) + "| "; + s += "has_avx512_vbmi = " + get(impl->has_avx512_vbmi) + "| "; + s += "has_avx512_vbmi2 = " + get(impl->has_avx512_vbmi2) + "| "; + s += "avx512_vnni = " + get(impl->has_avx512_vnni) + "| "; + s += "avx512_bitalg = " + get(impl->has_avx512_bitalg) + "| "; + s += "avx512_vpopcntdq = " + get(impl->has_avx512_vpopcntdq) + "| "; + s += "avx512_4vnniw = " + get(impl->has_avx512_4vnniw) + "| "; + s += "avx512_4fmaps = " + get(impl->has_avx512_4fmaps) + "| "; + s += "avx512_vp2intersect = " + get(impl->has_avx512_vp2intersect) + "| "; + s += "aes = " + get(impl->has_aes) + "| "; + s += "f16c = " + get(impl->has_f16c) + "|"; + return s; +} + +} // namespace cpuid diff --git a/cortex-cpp/utils/cpuid/cpu_info.h b/cortex-cpp/utils/cpuid/cpu_info.h new file mode 100644 index 000000000..384d0d6f0 --- /dev/null +++ b/cortex-cpp/utils/cpuid/cpu_info.h @@ -0,0 +1,131 @@ +// Copyright (c) 2013 Steinwurf ApS +// All Rights Reserved +// Inspired by https://github.com/steinwurf/cpuid +#pragma once + +#include +#include + +namespace cortex::cpuid { +/// The CpuInfo object extract information about which, if any, additional +/// instructions are supported by the CPU. +class CpuInfo { + public: + /// Constructor for feature detection with default values + CpuInfo(); + + /// Destructor + ~CpuInfo(); + + /// Return true if the CPU supports x87 Floating-Point Unit + bool has_fpu() const; + + /// Return true if the CPU supports MMX + bool has_mmx() const; + + /// Return true if the CPU supports Streaming SIMD Extensions + bool has_sse() const; + + /// Return true if the CPU supports Streaming SIMD Extensions 2 + bool has_sse2() const; + + /// Return true if the CPU supports Streaming SIMD Extensions 3 + bool has_sse3() const; + + /// Return true if the CPU supports Supplemental Streaming SIMD Extensions 3 + bool has_ssse3() const; + + /// Return true if the CPU supports Streaming SIMD Extensions 4.1 + bool has_sse4_1() const; + + /// Return true if the CPU supports Streaming SIMD Extensions 4.2 + bool has_sse4_2() const; + + /// Return true if the CPU supports carry-less multiplication of two 64-bit + /// polynomials over the finite field GF(2) + bool has_pclmulqdq() const; + + /// Return true if the CPU supports Advanced Vector Extensions + bool has_avx() const; + + /// Return true if the CPU supports Advanced Vector Extensions 2 + bool has_avx2() const; + + /// Return true if the CPU supports AVX-512 Foundation + bool has_avx512_f() const; + + /// Return true if the CPU supports AVX-512 Doubleword and Quadword + /// Instructions + bool has_avx512_dq() const; + + /// Return true if the CPU supports AVX-512 Integer Fused Multiply Add + bool has_avx512_ifma() const; + + /// Return true if the CPU supports AVX-512 Prefetch Instructions + bool has_avx512_pf() const; + + /// Return true if the CPU supports AVX-512 Exponential and Reciprocal + /// Instructions + bool has_avx512_er() const; + + /// Return true if the CPU supports AVX-512 Conflict Detection Instructions + bool has_avx512_cd() const; + + /// Return true if the CPU supports AVX-512 Byte and Word Instructions + bool has_avx512_bw() const; + + /// Return true if the CPU supports AVX-512 Vector Length Extensions + bool has_avx512_vl() const; + + /// Return true if the CPU supports AVX-512 Vector Byte Manipulation + /// Instructions + bool has_avx512_vbmi() const; + + /// Return true if the CPU supports AVX-512 Vector Byte Manipulation + /// Instructions 2 + bool has_avx512_vbmi2() const; + + /// Return true if the CPU supports AVX-512 Vector Neural Network + /// Instructions + bool has_avx512_vnni() const; + + /// Return true if the CPU supports AVX-512 Bit Algorithms + bool has_avx512_bitalg() const; + + /// Return true if the CPU supports Vector population count instruction + bool has_avx512_vpopcntdq() const; + + /// Return true if the CPU supports AVX-512 Vector Neural Network + /// Instructions Word variable precision + bool has_avx512_4vnniw() const; + + /// Return true if the CPU supports AVX-512 Fused Multiply Accumulation + /// Packed Single precision + bool has_avx512_4fmaps() const; + + /// Return true if the CPU supports AVX-512 Vector Pair Intersection to a + /// Pair of Mask Registers + bool has_avx512_vp2intersect() const; + + /// Return true if the CPU supports converting between half-precision and + /// standard IEEE single-precision floating-point formats + bool has_f16c() const; + + /// Return true if the CPU supports Advanced Encryption Standard instruction + /// set + bool has_aes() const; + + /// Return true if the CPU supports ARM Advanced SIMD + bool has_neon() const; + + std::string to_string(); + + public: + /// Private implementation + struct Impl; + + private: + /// Pimpl pointer + std::unique_ptr impl; +}; +} // namespace cortex::cpuid \ No newline at end of file diff --git a/cortex-cpp/utils/cpuid/cpu_validation.cc b/cortex-cpp/utils/cpuid/cpu_validation.cc new file mode 100644 index 000000000..14968f8d6 --- /dev/null +++ b/cortex-cpp/utils/cpuid/cpu_validation.cc @@ -0,0 +1,60 @@ +#include "cpu_validation.h" +#include "cpu_info.h" + +namespace cortex::cpuid::llamacpp { + +// TODO implement Result for better perf +std::pair IsValidInstructions() { + cpuid::CpuInfo info; +#if defined(_WIN32) +#if defined(CORTEX_AVX512) + auto res = info.has_avx512_f() || info.has_avx512_dq() || + info.has_avx512_ifma() || info.has_avx512_pf() || + info.has_avx512_er() || info.has_avx512_cd() || + info.has_avx512_bw() || info.has_avx512_vl() || + info.has_avx512_vbmi() || info.has_avx512_vbmi2() || + info.has_avx512_vnni() || info.has_avx512_bitalg() || + info.has_avx512_vpopcntdq() || info.has_avx512_4vnniw() || + info.has_avx512_4fmaps() || info.has_avx512_vp2intersect(); + return res ? std::make_pair(true, "") + : std::make_pair(false, "System does not support AVX512"); +#elif defined(CORTEX_AVX2) + return info.has_avx2() + ? std::make_pair(true, "") + : std::make_pair(false, "System does not support AVX2"); +#elif defined(CORTEX_VULKAN) + return std::make_pair(true, ""); +#else + return info.has_avx() ? std::make_pair(true, "") + : std::make_pair(false, "System does not support AVX"); +#endif +#elif defined(__APPLE__) + return std::make_pair(true, ""); +#else +#if defined(CORTEX_CUDA) + return std::make_pair(true, ""); +#elif defined(CORTEX_AVX512) + auto res = info.has_avx512_f() || info.has_avx512_dq() || + info.has_avx512_ifma() || info.has_avx512_pf() || + info.has_avx512_er() || info.has_avx512_cd() || + info.has_avx512_bw() || info.has_avx512_vl() || + info.has_avx512_vbmi() || info.has_avx512_vbmi2() || + info.has_avx512_vnni() || info.has_avx512_bitalg() || + info.has_avx512_vpopcntdq() || info.has_avx512_4vnniw() || + info.has_avx512_4fmaps() || info.has_avx512_vp2intersect(); + return res ? std::make_pair(true, "") + : std::make_pair(false, "System does not support AVX512"); +#elif defined(CORTEX_AVX2) + return info.has_avx2() + ? std::make_pair(true, "") + : std::make_pair(false, "System does not support AVX2"); +#elif defined(CORTEX_VULKAN) + return std::make_pair(true, ""); +#else + return info.has_avx() ? std::make_pair(true, "") + : std::make_pair(false, "System does not support AVX"); +#endif +#endif + return std::make_pair(true, ""); +} +} // namespace cortex::cpuid::llamacpp \ No newline at end of file diff --git a/cortex-cpp/utils/cpuid/cpu_validation.h b/cortex-cpp/utils/cpuid/cpu_validation.h new file mode 100644 index 000000000..d50d22b8e --- /dev/null +++ b/cortex-cpp/utils/cpuid/cpu_validation.h @@ -0,0 +1,8 @@ +// Inspired by https://github.com/steinwurf/cpuid +#pragma once +#include +#include + +namespace cortex::cpuid::llamacpp { +std::pair IsValidInstructions(); +} // namespace cortex::cpuid::llamacpp \ No newline at end of file diff --git a/cortex-cpp/utils/cpuid/detail/cpu_info_impl.h b/cortex-cpp/utils/cpuid/detail/cpu_info_impl.h new file mode 100644 index 000000000..50c62a89e --- /dev/null +++ b/cortex-cpp/utils/cpuid/detail/cpu_info_impl.h @@ -0,0 +1,74 @@ +// Copyright (c) 2013 Steinwurf ApS +// All Rights Reserved +// Inspired by https://github.com/steinwurf/cpuid +#pragma once + +#include "../cpu_info.h" + +namespace cortex::cpuid { + +struct CpuInfo::Impl { + Impl() + : has_fpu(false), + has_mmx(false), + has_sse(false), + has_sse2(false), + has_sse3(false), + has_ssse3(false), + has_sse4_1(false), + has_sse4_2(false), + has_pclmulqdq(false), + has_avx(false), + has_avx2(false), + has_avx512_f(false), + has_avx512_dq(false), + has_avx512_ifma(false), + has_avx512_pf(false), + has_avx512_er(false), + has_avx512_cd(false), + has_avx512_bw(false), + has_avx512_vl(false), + has_avx512_vbmi(false), + has_avx512_vbmi2(false), + has_avx512_vnni(false), + has_avx512_bitalg(false), + has_avx512_vpopcntdq(false), + has_avx512_4vnniw(false), + has_avx512_4fmaps(false), + has_avx512_vp2intersect(false), + has_f16c(false), + has_aes(false), + has_neon(false) {} + + bool has_fpu; + bool has_mmx; + bool has_sse; + bool has_sse2; + bool has_sse3; + bool has_ssse3; + bool has_sse4_1; + bool has_sse4_2; + bool has_pclmulqdq; + bool has_avx; + bool has_avx2; + bool has_avx512_f; + bool has_avx512_dq; + bool has_avx512_ifma; + bool has_avx512_pf; + bool has_avx512_er; + bool has_avx512_cd; + bool has_avx512_bw; + bool has_avx512_vl; + bool has_avx512_vbmi; + bool has_avx512_vbmi2; + bool has_avx512_vnni; + bool has_avx512_bitalg; + bool has_avx512_vpopcntdq; + bool has_avx512_4vnniw; + bool has_avx512_4fmaps; + bool has_avx512_vp2intersect; + bool has_f16c; + bool has_aes; + bool has_neon; +}; +} // namespace cortex::cpuid \ No newline at end of file diff --git a/cortex-cpp/utils/cpuid/detail/extract_x86_flags.h b/cortex-cpp/utils/cpuid/detail/extract_x86_flags.h new file mode 100644 index 000000000..334373e97 --- /dev/null +++ b/cortex-cpp/utils/cpuid/detail/extract_x86_flags.h @@ -0,0 +1,51 @@ +// Copyright (c) 2013 Steinwurf ApS +// All Rights Reserved +// Inspired by https://github.com/steinwurf/cpuid +#pragma once + +#include + +#include "cpu_info_impl.h" + +namespace cortex::cpuid { + +void extract_x86_flags(CpuInfo::Impl& info, uint32_t ecx, uint32_t edx) { + // Instruction set flags + + info.has_fpu = (edx & (1 << 0)) != 0; + info.has_mmx = (edx & (1 << 23)) != 0; + info.has_sse = (edx & (1 << 25)) != 0; + info.has_sse2 = (edx & (1 << 26)) != 0; + info.has_sse3 = (ecx & (1 << 0)) != 0; + info.has_ssse3 = (ecx & (1 << 9)) != 0; + info.has_sse4_1 = (ecx & (1 << 19)) != 0; + info.has_sse4_2 = (ecx & (1 << 20)) != 0; + info.has_pclmulqdq = (ecx & (1 << 1)) != 0; + info.has_avx = (ecx & (1 << 28)) != 0; + info.has_aes = (ecx & (1 << 25)) != 0; + info.has_f16c = (ecx & (1 << 29)) != 0; +} + +void extract_x86_extended_flags(CpuInfo::Impl& info, uint32_t ebx, uint32_t ecx, + uint32_t edx) { + // Extended instruction set flags + + info.has_avx2 = (ebx & (1 << 5)) != 0; + info.has_avx512_f = (ebx & (1 << 16)) != 0; + info.has_avx512_dq = (ebx & (1 << 17)) != 0; + info.has_avx512_ifma = (ebx & (1 << 21)) != 0; + info.has_avx512_pf = (ebx & (1 << 26)) != 0; + info.has_avx512_er = (ebx & (1 << 27)) != 0; + info.has_avx512_cd = (ebx & (1 << 28)) != 0; + info.has_avx512_bw = (ebx & (1 << 30)) != 0; + info.has_avx512_vl = (ebx & (1 << 31)) != 0; + info.has_avx512_vbmi = (ecx & (1 << 1)) != 0; + info.has_avx512_vbmi2 = (ecx & (1 << 6)) != 0; + info.has_avx512_vnni = (ecx & (1 << 11)) != 0; + info.has_avx512_bitalg = (ecx & (1 << 12)) != 0; + info.has_avx512_vpopcntdq = (ecx & (1 << 14)) != 0; + info.has_avx512_4vnniw = (edx & (1 << 2)) != 0; + info.has_avx512_4fmaps = (edx & (1 << 3)) != 0; + info.has_avx512_vp2intersect = (edx & (1 << 8)) != 0; +} +} // namespace cortex::cpuid \ No newline at end of file diff --git a/cortex-cpp/utils/cpuid/detail/init_gcc_x86.h b/cortex-cpp/utils/cpuid/detail/init_gcc_x86.h new file mode 100644 index 000000000..9e2d526b9 --- /dev/null +++ b/cortex-cpp/utils/cpuid/detail/init_gcc_x86.h @@ -0,0 +1,65 @@ +// Copyright (c) 2013 Steinwurf ApS +// All Rights Reserved +// Inspired by https://github.com/steinwurf/cpuid +#pragma once + +#include + +#include "cpu_info_impl.h" +#include "extract_x86_flags.h" + +namespace cortex::cpuid { + +// Reference for this code is Intel's recommendation for detecting AVX2 +// on Haswell located here: http://goo.gl/c6IkGX +void run_cpuid(uint32_t eax, uint32_t ecx, uint32_t* abcd) { + uint32_t ebx = 0, edx = 0; + +#if defined(__i386__) && defined(__PIC__) + // If PIC used under 32-bit, EBX cannot be clobbered + // EBX is saved to EDI and later restored + __asm__( + "movl %%ebx, %%edi;" + "cpuid;" + "xchgl %%ebx, %%edi;" + : "=D"(ebx), "+a"(eax), "+c"(ecx), "=d"(edx)); +#else + __asm__("cpuid;" : "+b"(ebx), "+a"(eax), "+c"(ecx), "=d"(edx)); +#endif + + abcd[0] = eax; + abcd[1] = ebx; + abcd[2] = ecx; + abcd[3] = edx; +} + +/// @todo Document +void init_cpuinfo(CpuInfo::Impl& info) { + // Note: We need to capture these 4 registers, otherwise we get + // a segmentation fault on 32-bit Linux + uint32_t output[4]; + + // The register information per input can be extracted from here: + // http://en.wikipedia.org/wiki/CPUID + + // CPUID should be called with EAX=0 first, as this will return the + // maximum supported EAX input value for future calls + run_cpuid(0, 0, output); + uint32_t maximum_index = output[0]; + + // Set registers for basic flag extraction + // All CPUs should support index=1 + if (maximum_index >= 1U) { + run_cpuid(1, 0, output); + extract_x86_flags(info, output[2], output[3]); + } + + // Set registers for extended flags extraction using index=7 + // This operation is not supported on older CPUs, so it should be skipped + // to avoid incorrect results + if (maximum_index >= 7U) { + run_cpuid(7, 0, output); + extract_x86_extended_flags(info, output[1], output[2], output[3]); + } +} +} // namespace cortex::cpuid \ No newline at end of file diff --git a/cortex-cpp/utils/cpuid/detail/init_ios_clang_arm.h b/cortex-cpp/utils/cpuid/detail/init_ios_clang_arm.h new file mode 100644 index 000000000..f31344ee4 --- /dev/null +++ b/cortex-cpp/utils/cpuid/detail/init_ios_clang_arm.h @@ -0,0 +1,23 @@ +// Copyright (c) 2013 Steinwurf ApS +// All Rights Reserved +// Inspired by https://github.com/steinwurf/cpuid +#pragma once + +#include "cpu_info_impl.h" + +namespace cortex::cpuid { + +void init_cpuinfo(CpuInfo::Impl& info) { + // The __ARM_NEON__ macro will be defined by the Apple Clang compiler + // when targeting ARMv7 processors that have NEON. + // The compiler guarantees this capability, so there is no benefit + // in doing a runtime check. More info in this SO answer: + // http://stackoverflow.com/a/1601234 + +#if defined __ARM_NEON__ + info.has_neon = true; +#else + info.has_neon = false; +#endif +} +} // namespace cortex::cpuid \ No newline at end of file diff --git a/cortex-cpp/utils/cpuid/detail/init_linux_gcc_arm.h b/cortex-cpp/utils/cpuid/detail/init_linux_gcc_arm.h new file mode 100644 index 000000000..f10d360fd --- /dev/null +++ b/cortex-cpp/utils/cpuid/detail/init_linux_gcc_arm.h @@ -0,0 +1,52 @@ +// Copyright (c) 2013 Steinwurf ApS +// All Rights Reserved +// Inspired by https://github.com/steinwurf/cpuid +#pragma once + +#include +#include +#include + +#include +#include +#include +#include + +#include "cpu_info_impl.h" + +namespace cortex::cpuid { + +void init_cpuinfo(CpuInfo::Impl& info) { +#if defined(__aarch64__) + // The Advanced SIMD (NEON) instruction set is required on AArch64 + // (64-bit ARM). Note that /proc/cpuinfo will display "asimd" instead of + // "neon" in the Features list on a 64-bit ARM CPU. + info.m_has_neon = true; +#else + // Runtime detection of NEON is necessary on 32-bit ARM CPUs + // + // Follow recommendation from Cortex-A Series Programmer's guide + // in Section 20.1.7 Detecting NEON. The guide is available at + // Steinwurf's Google drive: steinwurf/technical/experimental/cpuid + + auto cpufile = open("/proc/self/auxv", O_RDONLY); + assert(cpufile); + + Elf32_auxv_t auxv; + + if (cpufile >= 0) { + const auto size_auxv_t = sizeof(Elf32_auxv_t); + while (read(cpufile, &auxv, size_auxv_t) == size_auxv_t) { + if (auxv.a_type == AT_HWCAP) { + info.has_neon = (auxv.a_un.a_val & 4096) != 0; + break; + } + } + + close(cpufile); + } else { + info.has_neon = false; + } +#endif +} +} // namespace cortex::cpuid \ No newline at end of file diff --git a/cortex-cpp/utils/cpuid/detail/init_msvc_arm.h b/cortex-cpp/utils/cpuid/detail/init_msvc_arm.h new file mode 100644 index 000000000..6580bd23a --- /dev/null +++ b/cortex-cpp/utils/cpuid/detail/init_msvc_arm.h @@ -0,0 +1,19 @@ +// Copyright (c) 2013 Steinwurf ApS +// All Rights Reserved +// Inspired by https://github.com/steinwurf/cpuid +#pragma once + +#include "cpu_info_impl.h" + +namespace cortex::cpuid { +void init_cpuinfo(CpuInfo::Impl& info) { + // Visual Studio 2012 (and above) guarantees the NEON capability when + // compiling for Windows Phone 8 (and above) + +#if defined(PLATFORM_WINDOWS_PHONE) + info.has_neon = true; +#else + info.has_neon = false; +#endif +} +} // namespace cortex::cpuid \ No newline at end of file diff --git a/cortex-cpp/utils/cpuid/detail/init_msvc_x86.h b/cortex-cpp/utils/cpuid/detail/init_msvc_x86.h new file mode 100644 index 000000000..1d0b23f73 --- /dev/null +++ b/cortex-cpp/utils/cpuid/detail/init_msvc_x86.h @@ -0,0 +1,43 @@ +// Copyright (c) 2013 Steinwurf ApS +// All Rights Reserved +// Inspired by https://github.com/steinwurf/cpuid +#pragma once + +#include + +#include "cpu_info_impl.h" +#include "extract_x86_flags.h" + +namespace cortex::cpuid { + +void init_cpuinfo(CpuInfo::Impl& info) { + int registers[4]; + + /// According to the msvc docs eax, ebx, ecx and edx are + /// stored (in that order) in the array passed to the __cpuid + /// function. + + // The register information per input can be extracted from here: + // http://en.wikipedia.org/wiki/CPUID + + // CPUID should be called with EAX=0 first, as this will return the + // maximum supported EAX input value for future calls + __cpuid(registers, 0); + uint32_t maximum_eax = registers[0]; + + // Set registers for basic flag extraction, eax=1 + // All CPUs should support index=1 + if (maximum_eax >= 1U) { + __cpuid(registers, 1); + extract_x86_flags(info, registers[2], registers[3]); + } + + // Set registers for extended flags extraction, eax=7 and ecx=0 + // This operation is not supported on older CPUs, so it should be skipped + // to avoid incorrect results + if (maximum_eax >= 7U) { + __cpuidex(registers, 7, 0); + extract_x86_extended_flags(info, registers[1], registers[2], registers[3]); + } +} +} // namespace cortex::cpuid \ No newline at end of file diff --git a/cortex-cpp/utils/cpuid/detail/init_unknown.h b/cortex-cpp/utils/cpuid/detail/init_unknown.h new file mode 100644 index 000000000..eaa354312 --- /dev/null +++ b/cortex-cpp/utils/cpuid/detail/init_unknown.h @@ -0,0 +1,13 @@ +// Copyright (c) 2013 Steinwurf ApS +// All Rights Reserved +// Inspired by https://github.com/steinwurf/cpuid +#pragma once + +#include "cpu_info_impl.h" + +namespace cortex::cpuid { + +void init_cpuinfo(CpuInfo::Impl& info) { + (void)info; +} +} // namespace cortex::cpuid \ No newline at end of file diff --git a/cortex-cpp/utils/cpuid/platform.h b/cortex-cpp/utils/cpuid/platform.h new file mode 100644 index 000000000..8a8cf9f67 --- /dev/null +++ b/cortex-cpp/utils/cpuid/platform.h @@ -0,0 +1,157 @@ +// Copyright (c) 2013 Steinwurf ApS +// All Rights Reserved +// Inspired by https://github.com/steinwurf/cpuid +#pragma once + +// Here we create a number of defines to make it easy to choose between +// different compilers, operatings systems and CPU architectures. +// Some information about the defines used can be found here: +// http://sourceforge.net/p/predef/wiki/Architectures/ + +// Detect operating systems +#if defined(__linux__) +#define PLATFORM_LINUX 1 +#if defined(__ANDROID__) +#define PLATFORM_ANDROID 1 +#endif +#elif defined(_WIN32) +#define PLATFORM_WINDOWS 1 +#if defined(WINAPI_FAMILY) +#include +#if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_PHONE_APP) +#define PLATFORM_WINDOWS_PHONE 1 +#endif +#endif +#elif defined(__APPLE__) +// Detect iOS before MacOSX (__MACH__ is also defined for iOS) +#if defined(IPHONE) +#define PLATFORM_IOS 1 +#elif defined(__MACH__) +#define PLATFORM_MAC 1 +#endif +#elif defined(__EMSCRIPTEN__) +#define PLATFORM_EMSCRIPTEN 1 +#else +#error "Unable to determine operating system" +#endif + +// Detect compilers and CPU architectures +// Note: clang also defines __GNUC__ since it aims to be compatible with GCC. +// Therefore we need to check for __clang__ or __llvm__ first. +#if defined(__clang__) || defined(__llvm__) +#define PLATFORM_CLANG 1 +#define PLATFORM_GCC_COMPATIBLE 1 +#if defined(__i386__) || defined(__x86_64__) +#define PLATFORM_X86 1 +#define PLATFORM_CLANG_X86 1 +#define PLATFORM_GCC_COMPATIBLE_X86 1 +#elif defined(__arm__) || defined(__arm64__) || defined(__aarch64__) +#define PLATFORM_ARM 1 +#define PLATFORM_CLANG_ARM 1 +#define PLATFORM_GCC_COMPATIBLE_ARM 1 +#elif defined(__mips__) +#define PLATFORM_MIPS 1 +#define PLATFORM_CLANG_MIPS 1 +#define PLATFORM_GCC_COMPATIBLE_MIPS 1 +#elif defined(__asmjs__) +#define PLATFORM_ASMJS 1 +#define PLATFORM_CLANG_ASMJS 1 +#define PLATFORM_GCC_COMPATIBLE_ASMJS 1 +#endif +#elif defined(__GNUC__) +#define PLATFORM_GCC 1 +#define PLATFORM_GCC_COMPATIBLE 1 +#if defined(__i386__) || defined(__x86_64__) +#define PLATFORM_X86 1 +#define PLATFORM_GCC_X86 1 +#define PLATFORM_GCC_COMPATIBLE_X86 1 +#elif defined(__arm__) || defined(__arm64__) || defined(__aarch64__) +#define PLATFORM_ARM 1 +#define PLATFORM_GCC_ARM 1 +#define PLATFORM_GCC_COMPATIBLE_ARM 1 +#elif defined(__mips__) +#define PLATFORM_MIPS 1 +#define PLATFORM_GCC_MIPS 1 +#define PLATFORM_GCC_COMPATIBLE_MIPS 1 +#endif +#elif defined(_MSC_VER) +#define PLATFORM_MSVC 1 +#if defined(_M_IX86) || defined(_M_X64) +#define PLATFORM_X86 1 +#define PLATFORM_MSVC_X86 1 +#elif defined(_M_ARM) || defined(_M_ARMT) +#define PLATFORM_ARM 1 +#define PLATFORM_MSVC_ARM 1 +#endif +#else +#error "Unable to determine compiler" +#endif + +// Define macros for supported CPU instruction sets +#if defined(PLATFORM_GCC_COMPATIBLE) +#if defined(__MMX__) +#define PLATFORM_MMX 1 +#endif +#if defined(__SSE__) +#define PLATFORM_SSE 1 +#endif +#if defined(__SSE2__) +#define PLATFORM_SSE2 1 +#endif +#if defined(__SSE3__) +#define PLATFORM_SSE3 1 +#endif +#if defined(__SSSE3__) +#define PLATFORM_SSSE3 1 +#endif +#if defined(__SSE4_1__) +#define PLATFORM_SSE41 1 +#endif +#if defined(__SSE4_2__) +#define PLATFORM_SSE42 1 +#endif +#if defined(__PCLMUL__) +#define PLATFORM_PCLMUL 1 +#endif +#if defined(__AVX__) +#define PLATFORM_AVX 1 +#endif +#if defined(__AVX2__) +#define PLATFORM_AVX2 1 +#endif +#if defined(__ARM_NEON__) || defined(__ARM_NEON) +#define PLATFORM_NEON 1 +#endif +// First, check the PLATFORM_WINDOWS_PHONE define, because +// the X86 instructions sets are not supported on the Windows Phone emulator +#elif defined(PLATFORM_WINDOWS_PHONE) +#if defined(PLATFORM_MSVC_ARM) +// NEON introduced in VS2012 +#if (_MSC_VER >= 1700) +#define PLATFORM_NEON 1 +#endif +#endif +#elif defined(PLATFORM_MSVC_X86) +// MMX, SSE and SSE2 introduced in VS2003 +#if (_MSC_VER >= 1310) +#define PLATFORM_MMX 1 +#define PLATFORM_SSE 1 +#define PLATFORM_SSE2 1 +#endif +// SSE3 introduced in VS2005 +#if (_MSC_VER >= 1400) +#define PLATFORM_SSE3 1 +#endif +// SSSE3, SSE4.1, SSE4.2, PCLMUL introduced in VS2008 +#if (_MSC_VER >= 1500) +#define PLATFORM_SSSE3 1 +#define PLATFORM_SSE41 1 +#define PLATFORM_SSE42 1 +#define PLATFORM_PCLMUL 1 +#endif +// AVX and AVX2 introduced in VS2012 +#if (_MSC_VER >= 1700) +#define PLATFORM_AVX 1 +#define PLATFORM_AVX2 1 +#endif +#endif \ No newline at end of file