added unit tests

openvinotoolkit · Oct 29, 2024 · bc284f5 · bc284f5
1 parent 2f33854
commit bc284f5
Show file tree

Hide file tree

Showing 9 changed files with 146 additions and 9 deletions.
diff --git a/src/bindings/python/src/openvino/runtime/properties/hint/__init__.py b/src/bindings/python/src/openvino/runtime/properties/hint/__init__.py
@@ -23,3 +23,4 @@
 from openvino._pyopenvino.properties.hint import allow_auto_batching
 from openvino._pyopenvino.properties.hint import dynamic_quantization_group_size
 from openvino._pyopenvino.properties.hint import kv_cache_precision
+from openvino._pyopenvino.properties.hint import activations_scale_factor
diff --git a/src/bindings/python/src/pyopenvino/core/properties/properties.cpp b/src/bindings/python/src/pyopenvino/core/properties/properties.cpp
@@ -101,6 +101,7 @@ void regmodule_properties(py::module m) {
     wrap_property_RW(m_hint, ov::hint::allow_auto_batching, "allow_auto_batching");
     wrap_property_RW(m_hint, ov::hint::dynamic_quantization_group_size, "dynamic_quantization_group_size");
     wrap_property_RW(m_hint, ov::hint::kv_cache_precision, "kv_cache_precision");
+    wrap_property_RW(m_hint, ov::hint::activations_scale_factor, "activations_scale_factor");
 
     // Submodule intel_cpu
     py::module m_intel_cpu =

diff --git a/src/bindings/python/tests/test_runtime/test_properties.py b/src/bindings/python/tests/test_runtime/test_properties.py
@@ -335,6 +335,11 @@ def test_properties_ro(ov_property_ro, expected_value):
             ((64, 64),),
         ),
         (hints.kv_cache_precision, "KV_CACHE_PRECISION", ((Type.f32, Type.f32),)),
+        (
+            hints.activations_scale_factor,
+            "ACTIVATIONS_SCALE_FACTOR",
+            ((0.0, 0.0),),
+        ),
         (
             intel_cpu.denormals_optimization,
             "CPU_DENORMALS_OPTIMIZATION",

diff --git a/...mmon/transformations/include/transformations/common_optimizations/activations_scaling.hpp b/...mmon/transformations/include/transformations/common_optimizations/activations_scaling.hpp
@@ -20,6 +20,7 @@ class TRANSFORMATIONS_API MulMulAddFusion;
 }  // namespace pass
 }  // namespace ov
 
+// ActivationsScaling scales down activations to prevent overflow due to the limited range of FP16
 class ov::pass::ActivationsScaling : public ov::pass::ModelPass {
 public:
     OPENVINO_RTTI("ActivationsScaling", "0");

diff --git a/src/common/transformations/src/transformations/common_optimizations/activations_scaling.cpp b/src/common/transformations/src/transformations/common_optimizations/activations_scaling.cpp
@@ -79,26 +79,40 @@ ov::pass::ScaleDownSingleLayer::ScaleDownSingleLayer(float scale_factor) {
         auto scale_down = std::make_shared<ov::op::v1::Multiply>(
             scaled_op->input(0).get_source_output(),
             (scaled_op->input(0).get_element_type() == ov::element::f32) ? scale_down_const_f32 : scale_down_const_f16);
+        scale_down->set_friendly_name(scaled_op->get_friendly_name() + "_scale_down");
+        ov::copy_runtime_info(scaled_op, scale_down);
         scaled_op->input(0).replace_source_output(scale_down->output(0));
 
         auto child = scaled_op->get_output_target_inputs(0).begin()->get_node();
         if (scaled_op->get_output_target_inputs(0).size() == 1 && ov::is_type<ov::op::v1::Add>(child)) {
             auto add = child->shared_from_this();
+            auto target_inputs = add->get_output_target_inputs(0);
             auto scale_down_bias = std::make_shared<ov::op::v1::Multiply>(
                 add->input(1).get_source_output(),
                 (add->input(1).get_element_type() == ov::element::f32) ? scale_down_const_f32 : scale_down_const_f16);
+            scale_down_bias->set_friendly_name(add->get_friendly_name() + "_scale_down");
+            ov::copy_runtime_info(add, scale_down_bias);
             add->input(1).replace_source_output(scale_down_bias->output(0));
 
-            auto scale_up = std::make_shared<ov::op::v1::Multiply>(
+            auto scale_up = register_new_node<ov::op::v1::Multiply>(
                 add->output(0),
                 (add->output(0).get_element_type() == ov::element::f32) ? scale_up_const_f32 : scale_up_const_f16);
-            ov::replace_node(add, scale_up);
+            scale_up->set_friendly_name(scaled_op->get_friendly_name() + "_scale_up");
+            ov::copy_runtime_info(scaled_op, scale_up);
+            for (auto& in : target_inputs) {
+                in.replace_source_output(scale_up);
+            }
         } else {
-            auto scale_up = std::make_shared<ov::op::v1::Multiply>(
+            auto target_inputs = scaled_op->get_output_target_inputs(0);
+            auto scale_up = register_new_node<ov::op::v1::Multiply>(
                 scaled_op->output(0),
                 (scaled_op->output(0).get_element_type() == ov::element::f32) ? scale_up_const_f32
                                                                               : scale_up_const_f16);
-            ov::replace_node(scaled_op, scale_up);
+            scale_up->set_friendly_name(scaled_op->get_friendly_name() + "_scale_up");
+            ov::copy_runtime_info(scaled_op, scale_up);
+            for (auto& in : target_inputs) {
+                in.replace_source_output(scale_up);
+            }
         }
 
         return true;
@@ -144,6 +158,7 @@ ov::pass::MulMulAddFusion::MulMulAddFusion() {
         if (transformation_callback(add)) {
             return false;
         }
+        auto target_inputs = add->get_output_target_inputs(0);
 
         auto scale_const0 =
             std::dynamic_pointer_cast<ov::op::v0::Constant>(pattern_map.at(scale_const0_m).get_node_shared_ptr());
@@ -156,9 +171,10 @@ ov::pass::MulMulAddFusion::MulMulAddFusion() {
         mul0->input(1).replace_source_output(
             ov::op::util::eltwise_fold<ov::op::v1::Divide>(scale_const0, scale_const1));
         add->input(1).replace_source_output(mul1->get_input_source_output(0));
-
-        auto new_mul = register_new_node<ov::op::v1::Multiply>(add, scale_const1);
-        replace_node(add, new_mul);
+        mul1->input(0).replace_source_output(add);
+        for (auto& in : target_inputs) {
+            in.replace_source_output(mul1);
+        }
 
         return true;
     };

diff --git a/src/common/transformations/tests/common_optimizations/activations_scaling_test.cpp b/src/common/transformations/tests/common_optimizations/activations_scaling_test.cpp
@@ -0,0 +1,110 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "transformations/common_optimizations/activations_scaling.hpp"
+
+#include <gtest/gtest.h>
+#include "common_test_utils/graph_comparator.hpp"
+#include "common_test_utils/ov_test_utils.hpp"
+
+#include <string>
+#include <memory>
+
+#include "openvino/op/add.hpp"
+#include "openvino/op/constant.hpp"
+#include "openvino/op/convolution.hpp"
+#include "openvino/op/group_normalization.hpp"
+#include "openvino/op/parameter.hpp"
+#include "openvino/op/multiply.hpp"
+#include "openvino/pass/manager.hpp"
+
+#include "transformations/utils/utils.hpp"
+
+using namespace ov;
+using namespace testing;
+
+TEST_F(TransformationTestsF, ScaleDownSingleLayerTest) {
+    float scale_factor = 128.f;
+    {
+        auto input = std::make_shared<ov::op::v0::Parameter>(ov::element::f16, ov::PartialShape{ 1, 3, 16, 16 });
+        auto weights_const = ov::op::v0::Constant::create(ov::element::f16, ov::Shape{ 3, 3, 3, 3 }, { 1 });
+        auto conv = std::make_shared<ov::op::v1::Convolution>(input, weights_const,
+                                                              Strides{}, CoordinateDiff{}, CoordinateDiff{}, Strides{});
+        auto convert = std::make_shared<ov::op::v0::Convert>(conv, ov::element::f32);
+        auto result = std::make_shared<ov::op::v0::Result>(convert);
+
+        model = std::make_shared<ov::Model>(ov::ResultVector{result}, ov::ParameterVector{input});
+        manager.register_pass<ov::pass::ScaleDownSingleLayer>(scale_factor);
+    }
+    {
+        auto input = std::make_shared<ov::op::v0::Parameter>(ov::element::f16, ov::PartialShape{ 1, 3, 16, 16 });
+        auto weights_const = ov::op::v0::Constant::create(ov::element::f16, ov::Shape{ 3, 3, 3, 3 }, { 1 });
+        auto scale_down_const = ov::op::v0::Constant::create(ov::element::f16, ov::Shape{ 1 }, { 1.f / scale_factor });
+        auto scale_down = std::make_shared<ov::op::v1::Multiply>(input, scale_down_const);
+        auto conv = std::make_shared<ov::op::v1::Convolution>(scale_down, weights_const,
+                                                              Strides{}, CoordinateDiff{}, CoordinateDiff{}, Strides{});
+        auto scale_up_const = ov::op::v0::Constant::create(ov::element::f16, ov::Shape{ 1 }, { scale_factor });
+        auto scale_up = std::make_shared<ov::op::v1::Multiply>(conv, scale_up_const);
+        auto convert = std::make_shared<ov::op::v0::Convert>(scale_up, ov::element::f32);
+        auto result = std::make_shared<ov::op::v0::Result>(convert);
+
+        model_ref = std::make_shared<ov::Model>(ov::ResultVector{result}, ov::ParameterVector{input});
+    }
+}
+
+TEST_F(TransformationTestsF, MulMulAddFusionTest) {
+    {
+        auto input0 = std::make_shared<ov::op::v0::Parameter>(ov::element::f16, ov::PartialShape{ 1, 3, 16, 16 });
+        auto scale_const_0 = ov::op::v0::Constant::create(ov::element::f16, ov::Shape{ 1 }, { 10 });
+        auto mul0 = std::make_shared<ov::op::v1::Multiply>(input0, scale_const_0);
+        auto input1 = std::make_shared<ov::op::v0::Parameter>(ov::element::f16, ov::PartialShape{ 1, 3, 16, 16 });
+        auto scale_const_1 = ov::op::v0::Constant::create(ov::element::f16, ov::Shape{ 1 }, { 10 });
+        auto mul1 = std::make_shared<ov::op::v1::Multiply>(input1, scale_const_1);
+        auto add = std::make_shared<ov::op::v1::Add>(mul0, mul1);
+        auto convert = std::make_shared<ov::op::v0::Convert>(add, ov::element::f32);
+        auto result = std::make_shared<ov::op::v0::Result>(convert);
+
+        model = std::make_shared<ov::Model>(ov::ResultVector{result}, ov::ParameterVector{input0, input1});
+        manager.register_pass<ov::pass::MulMulAddFusion>();
+    }
+    {
+        auto input0 = std::make_shared<ov::op::v0::Parameter>(ov::element::f16, ov::PartialShape{ 1, 3, 16, 16 });
+        auto scale_const_0 = ov::op::v0::Constant::create(ov::element::f16, ov::Shape{ 1 }, { 10 });
+        auto mul0 = std::make_shared<ov::op::v1::Multiply>(input0, scale_const_0);
+        auto input1 = std::make_shared<ov::op::v0::Parameter>(ov::element::f16, ov::PartialShape{ 1, 3, 16, 16 });
+        auto add = std::make_shared<ov::op::v1::Add>(mul0, input1);
+        auto scale_const_1 = ov::op::v0::Constant::create(ov::element::f16, ov::Shape{ 1 }, { 10 });
+        auto mul1 = std::make_shared<ov::op::v1::Multiply>(add, scale_const_1);
+        auto convert = std::make_shared<ov::op::v0::Convert>(mul1, ov::element::f32);
+        auto result = std::make_shared<ov::op::v0::Result>(convert);
+
+        model_ref = std::make_shared<ov::Model>(ov::ResultVector{result}, ov::ParameterVector{input0, input1});
+    }
+}
+
+TEST_F(TransformationTestsF, MulGroupNormFusionTest) {
+    {
+        auto input = std::make_shared<ov::op::v0::Parameter>(ov::element::f16, ov::PartialShape{ 1, 3, 16, 16 });
+        auto scale_const = ov::op::v0::Constant::create(ov::element::f16, ov::Shape{ 1 }, { 10 });
+        auto mul = std::make_shared<ov::op::v1::Multiply>(input, scale_const);
+        auto norm_scale_const = ov::op::v0::Constant::create(ov::element::f16, ov::Shape{ 3 }, { 10 });
+        auto norm_bias_const = ov::op::v0::Constant::create(ov::element::f16, ov::Shape{ 3 }, { 10 });
+        auto group_norm = std::make_shared<ov::op::v12::GroupNormalization>(mul, norm_scale_const, norm_bias_const, 1, 0.01f);
+        auto convert = std::make_shared<ov::op::v0::Convert>(group_norm, ov::element::f32);
+        auto result = std::make_shared<ov::op::v0::Result>(convert);
+
+        model = std::make_shared<ov::Model>(ov::ResultVector{result}, ov::ParameterVector{input});
+        manager.register_pass<ov::pass::MulGroupNormFusion>();
+    }
+    {
+        auto input = std::make_shared<ov::op::v0::Parameter>(ov::element::f16, ov::PartialShape{ 1, 3, 16, 16 });
+        auto norm_scale_const = ov::op::v0::Constant::create(ov::element::f16, ov::Shape{ 3 }, { 10 });
+        auto norm_bias_const = ov::op::v0::Constant::create(ov::element::f16, ov::Shape{ 3 }, { 10 });
+        auto group_norm = std::make_shared<ov::op::v12::GroupNormalization>(input, norm_scale_const, norm_bias_const, 1, 0.01f);
+        auto convert = std::make_shared<ov::op::v0::Convert>(group_norm, ov::element::f32);
+        auto result = std::make_shared<ov::op::v0::Result>(convert);
+
+        model_ref = std::make_shared<ov::Model>(ov::ResultVector{result}, ov::ParameterVector{input});
+    }
+}
diff --git a/src/plugins/intel_gpu/src/plugin/compiled_model.cpp b/src/plugins/intel_gpu/src/plugin/compiled_model.cpp
@@ -257,6 +257,7 @@ ov::Any CompiledModel::get_property(const std::string& name) const {
             ov::PropertyName{ov::hint::num_requests.name(), PropertyMutability::RO},
             ov::PropertyName{ov::hint::inference_precision.name(), PropertyMutability::RO},
             ov::PropertyName{ov::hint::dynamic_quantization_group_size.name(), PropertyMutability::RO},
+            ov::PropertyName{ov::hint::activations_scale_factor.name(), PropertyMutability::RO},
             ov::PropertyName{ov::device::id.name(), PropertyMutability::RO},
             ov::PropertyName{ov::execution_devices.name(), PropertyMutability::RO},
         };

diff --git a/src/plugins/intel_gpu/src/plugin/plugin.cpp b/src/plugins/intel_gpu/src/plugin/plugin.cpp
@@ -540,6 +540,7 @@ std::vector<ov::PropertyName> Plugin::get_caching_properties() const {
         ov::PropertyName{ov::hint::execution_mode.name(), PropertyMutability::RW},
         ov::PropertyName{ov::hint::performance_mode.name(), PropertyMutability::RW},
         ov::PropertyName{ov::hint::dynamic_quantization_group_size.name(), PropertyMutability::RW},
+        ov::PropertyName{ov::hint::activations_scale_factor.name(), PropertyMutability::RW},
     };
 
     return caching_properties;
@@ -585,7 +586,8 @@ std::vector<ov::PropertyName> Plugin::get_supported_properties() const {
         ov::PropertyName{ov::hint::inference_precision.name(), PropertyMutability::RW},
         ov::PropertyName{ov::hint::enable_cpu_pinning.name(), PropertyMutability::RW},
         ov::PropertyName{ov::device::id.name(), PropertyMutability::RW},
-        ov::PropertyName{ov::hint::dynamic_quantization_group_size.name(), PropertyMutability::RW}
+        ov::PropertyName{ov::hint::dynamic_quantization_group_size.name(), PropertyMutability::RW},
+        ov::PropertyName{ov::hint::activations_scale_factor.name(), PropertyMutability::RW}
     };
 
     return supported_properties;

diff --git a/src/plugins/intel_gpu/src/runtime/execution_config.cpp b/src/plugins/intel_gpu/src/runtime/execution_config.cpp
@@ -61,7 +61,7 @@ void ExecutionConfig::set_default() {
         std::make_tuple(ov::hint::kv_cache_precision, ov::element::undefined),
         std::make_tuple(ov::intel_gpu::hint::enable_kernels_reuse, false),
         std::make_tuple(ov::weights_path, ""),
-        std::make_tuple(ov::hint::activations_scale_factor, 0.f),
+        std::make_tuple(ov::hint::activations_scale_factor, -1.f),
 
         // Legacy API properties
         std::make_tuple(ov::intel_gpu::nv12_two_inputs, false),