-
Notifications
You must be signed in to change notification settings - Fork 2.3k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'openvinotoolkit:master' into patch-1
- Loading branch information
Showing
17 changed files
with
416 additions
and
192 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
88 changes: 88 additions & 0 deletions
88
src/core/reference/include/openvino/reference/utils/convert_util.hpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,88 @@ | ||
// Copyright (C) 2018-2024 Intel Corporation | ||
// SPDX-License-Identifier: Apache-2.0 | ||
// | ||
|
||
#pragma once | ||
|
||
#include <functional> | ||
#include <type_traits> | ||
|
||
#include "openvino/reference/convert.hpp" | ||
|
||
namespace ov { | ||
|
||
// forward declare from inference dev API (cannot be included) | ||
extern bool with_cpu_x86_avx2(); | ||
|
||
namespace reference { | ||
|
||
struct NoClamp { | ||
static constexpr bool enabled = false; | ||
|
||
// Generic implementation | ||
template <class T> | ||
static constexpr T apply(const T v) { | ||
return v; | ||
} | ||
|
||
// Specialize for optimization | ||
template <class T, class R> | ||
static R apply(const T v); | ||
}; | ||
|
||
template <class TI, class TO> | ||
struct Clamp { | ||
static constexpr bool enabled = true; | ||
|
||
// Generic implementation | ||
static constexpr TO apply(const TI v) { | ||
return (v < std::numeric_limits<TO>::lowest()) | ||
? std::numeric_limits<TO>::lowest() | ||
: ((v > std::numeric_limits<TO>::max()) ? std::numeric_limits<TO>::max() | ||
: detail::convert<TI, TO>(v)); | ||
} | ||
|
||
// Specialize for optimization | ||
template <class T, class R> | ||
static R apply(const T v); | ||
}; | ||
|
||
template <class TI, class TO> | ||
struct Converter { | ||
static constexpr size_t vec_f32_size = 32 / sizeof(float); | ||
|
||
// Generic implementation to convert tail elements | ||
template <class ClampMode> | ||
static void tail(const TI* in, TO* out, size_t n) { | ||
std::transform(in, in + n, out, [](const TI v) { | ||
return detail::convert<decltype(ClampMode::apply(v)), TO>(ClampMode::apply(v)); | ||
}); | ||
} | ||
|
||
// Helper struct to defined optimized version of conversion | ||
template <class ClampMode> | ||
struct Optimized { | ||
static constexpr bool enabled = false; | ||
static void run(const TI* in, TO* out) {} | ||
}; | ||
|
||
// Generic implementation of conversion | ||
template <class ClampMode, typename std::enable_if<!Optimized<ClampMode>::enabled>::type* = nullptr> | ||
static void apply(const TI* in, TO* out, size_t n) { | ||
return tail<ClampMode>(in, out, n); | ||
} | ||
|
||
// Enabled when Optimized struct specialized defined for optimization | ||
template <class ClampMode, typename std::enable_if<Optimized<ClampMode>::enabled>::type* = nullptr> | ||
static void apply(const TI* in, TO* out, size_t n) { | ||
if (with_cpu_x86_avx2()) { | ||
for (; n >= vec_f32_size; n -= vec_f32_size, in += vec_f32_size, out += vec_f32_size) { | ||
Optimized<ClampMode>::run(in, out); | ||
} | ||
} | ||
tail<ClampMode>(in, out, n); | ||
} | ||
}; | ||
|
||
} // namespace reference | ||
} // namespace ov |
87 changes: 87 additions & 0 deletions
87
src/core/reference/include/openvino/reference/utils/convert_x86_intrinsics.hpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,87 @@ | ||
// Copyright (C) 2018-2024 Intel Corporation | ||
// SPDX-License-Identifier: Apache-2.0 | ||
// | ||
|
||
#pragma once | ||
|
||
#ifdef OV_CORE_USE_INTRINSICS | ||
# include <immintrin.h> | ||
|
||
# include "openvino/reference/utils/convert_util.hpp" | ||
|
||
namespace ov { | ||
namespace reference { | ||
# ifdef HAVE_AVX2 | ||
|
||
// Clamp optimized specializations | ||
template <> | ||
__m128i NoClamp::apply<__m256i, __m128i>(const __m256i vec_i32); | ||
|
||
template <> | ||
template <> | ||
__m256 Clamp<float, float16>::apply<__m256, __m256>(const __m256 vec_f32); | ||
|
||
// Conversion optimized specializations | ||
// --- f32 -> other | ||
template <> | ||
template <> | ||
struct Converter<float, float16>::Optimized<NoClamp> { | ||
static constexpr bool enabled = true; | ||
static void run(const float* in, float16* out); | ||
}; | ||
|
||
template <> | ||
template <> | ||
struct Converter<float, float16>::Optimized<Clamp<float, float16>> { | ||
static constexpr bool enabled = true; | ||
static void run(const float* in, float16* out); | ||
}; | ||
|
||
template <> | ||
template <> | ||
struct Converter<float, int8_t>::Optimized<NoClamp> { | ||
static constexpr bool enabled = true; | ||
static void run(const float* in, int8_t* out); | ||
}; | ||
|
||
// --- f16 -> other | ||
template <> | ||
template <> | ||
struct Converter<float16, float>::Optimized<NoClamp> { | ||
static constexpr bool enabled = true; | ||
static void run(const float16* in, float* out); | ||
}; | ||
|
||
template <> | ||
template <> | ||
struct Converter<float16, int8_t>::Optimized<NoClamp> { | ||
static constexpr bool enabled = true; | ||
static void run(const float16* in, int8_t* out); | ||
}; | ||
|
||
// --- bf16 -> other | ||
template <> | ||
template <> | ||
struct Converter<bfloat16, float16>::Optimized<Clamp<float, float16>> { | ||
static constexpr bool enabled = true; | ||
static void run(const bfloat16* in, float16* out); | ||
}; | ||
|
||
template <> | ||
template <> | ||
struct Converter<bfloat16, float>::Optimized<NoClamp> { | ||
static constexpr bool enabled = true; | ||
static void run(const bfloat16* in, float* out); | ||
}; | ||
|
||
// --- u8 -> other | ||
template <> | ||
template <> | ||
struct Converter<uint8_t, float16>::Optimized<NoClamp> { | ||
static constexpr bool enabled = true; | ||
static void run(const uint8_t* in, float16* out); | ||
}; | ||
# endif // HAVE_AVX2 | ||
} // namespace reference | ||
} // namespace ov | ||
#endif |
Oops, something went wrong.