Skip to content

Commit

Permalink
Merge branch 'openvinotoolkit:master' into patch-1
Browse files Browse the repository at this point in the history
  • Loading branch information
ytxmobile98 authored Oct 7, 2024
2 parents f5bcd15 + cf870cd commit dea9f13
Show file tree
Hide file tree
Showing 17 changed files with 416 additions and 192 deletions.
2 changes: 1 addition & 1 deletion src/core/include/openvino/op/sink.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ namespace op {
class OPENVINO_API Sink : public Op {
public:
~Sink() override = 0;
OPENVINO_OP("Sink");
OPENVINO_OP("Sink", "util", Op);

protected:
Sink() : Op() {}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ namespace util {
/// \brief Base class for operations like back propagation convolution
class OPENVINO_API ConvolutionBackPropBase : public ConvolutionBase {
public:
OPENVINO_OP("ConvolutionBackPropBase", "util");
OPENVINO_OP("ConvolutionBackPropBase", "util", ConvolutionBase);

/// \brief Constructs a conversion operation.
ConvolutionBackPropBase() = default;
Expand Down
2 changes: 1 addition & 1 deletion src/core/include/openvino/op/util/convolution_base.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ class OPENVINO_API ConvolutionBase : public Op {
/// \brief Base class for operations like back propagation convolution
class OPENVINO_API ConvolutionFwdPropBase : public ConvolutionBase {
public:
OPENVINO_OP("ConvolutionFwdPropBase", "util");
OPENVINO_OP("ConvolutionFwdPropBase", "util", ConvolutionBase);

/// \brief Constructs a conversion operation.
ConvolutionFwdPropBase() = default;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ namespace util {
/// v8.
class OPENVINO_API DeformableConvolutionBase : public util::ConvolutionBase {
public:
OPENVINO_OP("DeformableConvolutionBase", "util");
OPENVINO_OP("DeformableConvolutionBase", "util", util::ConvolutionBase);

/// \brief Constructs a conversion operation.
DeformableConvolutionBase() = default;
Expand Down
2 changes: 1 addition & 1 deletion src/core/include/openvino/op/util/framework_node.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ class OPENVINO_API FrameworkNodeAttrs {

class OPENVINO_API FrameworkNode : public MultiSubGraphOp {
public:
OPENVINO_OP("FrameworkNode", "util");
OPENVINO_OP("FrameworkNode", "util", MultiSubGraphOp);

FrameworkNode() = default;

Expand Down
2 changes: 1 addition & 1 deletion src/core/include/openvino/op/util/multi_subgraph_base.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ namespace util {
///
class OPENVINO_API MultiSubGraphOp : public ov::op::Sink {
public:
OPENVINO_OP("MultiSubGraphOp", "util");
OPENVINO_OP("MultiSubGraphOp", "util", ov::op::Sink);
/// \brief Abstract class describes a connection between a MultiSubGraphOp input and
/// the body.
class InputDescription {
Expand Down
12 changes: 12 additions & 0 deletions src/core/reference/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,18 @@ add_library(${TARGET_NAME} STATIC ${LIBRARY_SRC} ${PUBLIC_HEADERS})
add_library(openvino::reference ALIAS ${TARGET_NAME})
set_target_properties(${TARGET_NAME} PROPERTIES EXPORT_NAME reference)

if(ENABLE_AVX2)
ov_avx2_optimization_flags(avx2_flags)

set(OV_REFERENCE_X86_AVX2_SRC
${CMAKE_CURRENT_SOURCE_DIR}/src/op/convert_x86_intrinsics.cpp
)
set_source_files_properties(${OV_REFERENCE_X86_AVX2_SRC} PROPERTIES COMPILE_OPTIONS "${avx2_flags}"
SKIP_UNITY_BUILD_INCLUSION ON
SKIP_PRECOMPILE_HEADERS ON)
target_compile_definitions(${TARGET_NAME} PRIVATE HAVE_AVX2)
endif()

ov_build_target_faster(${TARGET_NAME}
UNITY
PCH PRIVATE "src/precomp.hpp")
Expand Down
16 changes: 7 additions & 9 deletions src/core/reference/include/openvino/reference/convert.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,11 @@
#include "openvino/core/type/nf4.hpp"

#if !defined(OS_CHROMEOS) && (defined(OPENVINO_ARCH_X86) || defined(OPENVINO_ARCH_X86_64))
# define OV_CORE_USE_XBYAK_JIT 1
#else
# define OV_CORE_USE_XBYAK_JIT 0
# define OV_CORE_USE_XBYAK_JIT
#endif

#if defined(OS_CHROMEOS) && defined(OPENVINO_ARCH_X86_64) && defined(HAVE_AVX2)
# define OV_CORE_USE_INTRINSICS
#endif

namespace ov {
Expand All @@ -33,12 +35,12 @@ namespace reference {
namespace detail {

template <typename TI, typename TO>
typename std::enable_if<!std::is_same<TO, char>::value, TO>::type convert(const TI v) {
constexpr typename std::enable_if<!std::is_same<TO, char>::value, TO>::type convert(const TI v) {
return static_cast<TO>(v);
}

template <typename TI, typename TO>
typename std::enable_if<std::is_same<TO, char>::value, TO>::type convert(const TI v) {
constexpr typename std::enable_if<std::is_same<TO, char>::value, TO>::type convert(const TI v) {
return static_cast<char>(static_cast<bool>(v));
}
} // namespace detail
Expand All @@ -62,8 +64,6 @@ void convert(const TI* arg, TO* out, const size_t count) {
std::transform(arg, arg + count, out, detail::convert<TI, TO>);
}

#if OV_CORE_USE_XBYAK_JIT

template <>
void convert<uint8_t, float16>(const uint8_t* arg, float16* out, size_t count);
template <>
Expand All @@ -79,8 +79,6 @@ void convert<bfloat16, float16>(const bfloat16* arg, float16* out, size_t count)
template <>
void convert<bfloat16, float>(const bfloat16* arg, float* out, size_t count);

#endif // OV_CORE_USE_XBYAK_JIT

template <>
void convert<int32_t, float16>(const int32_t* arg, float16* out, size_t count);

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
// Copyright (C) 2018-2024 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma once

#include <functional>
#include <type_traits>

#include "openvino/reference/convert.hpp"

namespace ov {

// forward declare from inference dev API (cannot be included)
extern bool with_cpu_x86_avx2();

namespace reference {

struct NoClamp {
static constexpr bool enabled = false;

// Generic implementation
template <class T>
static constexpr T apply(const T v) {
return v;
}

// Specialize for optimization
template <class T, class R>
static R apply(const T v);
};

template <class TI, class TO>
struct Clamp {
static constexpr bool enabled = true;

// Generic implementation
static constexpr TO apply(const TI v) {
return (v < std::numeric_limits<TO>::lowest())
? std::numeric_limits<TO>::lowest()
: ((v > std::numeric_limits<TO>::max()) ? std::numeric_limits<TO>::max()
: detail::convert<TI, TO>(v));
}

// Specialize for optimization
template <class T, class R>
static R apply(const T v);
};

template <class TI, class TO>
struct Converter {
static constexpr size_t vec_f32_size = 32 / sizeof(float);

// Generic implementation to convert tail elements
template <class ClampMode>
static void tail(const TI* in, TO* out, size_t n) {
std::transform(in, in + n, out, [](const TI v) {
return detail::convert<decltype(ClampMode::apply(v)), TO>(ClampMode::apply(v));
});
}

// Helper struct to defined optimized version of conversion
template <class ClampMode>
struct Optimized {
static constexpr bool enabled = false;
static void run(const TI* in, TO* out) {}
};

// Generic implementation of conversion
template <class ClampMode, typename std::enable_if<!Optimized<ClampMode>::enabled>::type* = nullptr>
static void apply(const TI* in, TO* out, size_t n) {
return tail<ClampMode>(in, out, n);
}

// Enabled when Optimized struct specialized defined for optimization
template <class ClampMode, typename std::enable_if<Optimized<ClampMode>::enabled>::type* = nullptr>
static void apply(const TI* in, TO* out, size_t n) {
if (with_cpu_x86_avx2()) {
for (; n >= vec_f32_size; n -= vec_f32_size, in += vec_f32_size, out += vec_f32_size) {
Optimized<ClampMode>::run(in, out);
}
}
tail<ClampMode>(in, out, n);
}
};

} // namespace reference
} // namespace ov
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
// Copyright (C) 2018-2024 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma once

#ifdef OV_CORE_USE_INTRINSICS
# include <immintrin.h>

# include "openvino/reference/utils/convert_util.hpp"

namespace ov {
namespace reference {
# ifdef HAVE_AVX2

// Clamp optimized specializations
template <>
__m128i NoClamp::apply<__m256i, __m128i>(const __m256i vec_i32);

template <>
template <>
__m256 Clamp<float, float16>::apply<__m256, __m256>(const __m256 vec_f32);

// Conversion optimized specializations
// --- f32 -> other
template <>
template <>
struct Converter<float, float16>::Optimized<NoClamp> {
static constexpr bool enabled = true;
static void run(const float* in, float16* out);
};

template <>
template <>
struct Converter<float, float16>::Optimized<Clamp<float, float16>> {
static constexpr bool enabled = true;
static void run(const float* in, float16* out);
};

template <>
template <>
struct Converter<float, int8_t>::Optimized<NoClamp> {
static constexpr bool enabled = true;
static void run(const float* in, int8_t* out);
};

// --- f16 -> other
template <>
template <>
struct Converter<float16, float>::Optimized<NoClamp> {
static constexpr bool enabled = true;
static void run(const float16* in, float* out);
};

template <>
template <>
struct Converter<float16, int8_t>::Optimized<NoClamp> {
static constexpr bool enabled = true;
static void run(const float16* in, int8_t* out);
};

// --- bf16 -> other
template <>
template <>
struct Converter<bfloat16, float16>::Optimized<Clamp<float, float16>> {
static constexpr bool enabled = true;
static void run(const bfloat16* in, float16* out);
};

template <>
template <>
struct Converter<bfloat16, float>::Optimized<NoClamp> {
static constexpr bool enabled = true;
static void run(const bfloat16* in, float* out);
};

// --- u8 -> other
template <>
template <>
struct Converter<uint8_t, float16>::Optimized<NoClamp> {
static constexpr bool enabled = true;
static void run(const uint8_t* in, float16* out);
};
# endif // HAVE_AVX2
} // namespace reference
} // namespace ov
#endif
Loading

0 comments on commit dea9f13

Please sign in to comment.