Skip to content

Commit

Permalink
added unit tests
Browse files Browse the repository at this point in the history
  • Loading branch information
e-ddykim committed Oct 29, 2024
1 parent 2f33854 commit bc284f5
Show file tree
Hide file tree
Showing 9 changed files with 146 additions and 9 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -23,3 +23,4 @@
from openvino._pyopenvino.properties.hint import allow_auto_batching
from openvino._pyopenvino.properties.hint import dynamic_quantization_group_size
from openvino._pyopenvino.properties.hint import kv_cache_precision
from openvino._pyopenvino.properties.hint import activations_scale_factor
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ void regmodule_properties(py::module m) {
wrap_property_RW(m_hint, ov::hint::allow_auto_batching, "allow_auto_batching");
wrap_property_RW(m_hint, ov::hint::dynamic_quantization_group_size, "dynamic_quantization_group_size");
wrap_property_RW(m_hint, ov::hint::kv_cache_precision, "kv_cache_precision");
wrap_property_RW(m_hint, ov::hint::activations_scale_factor, "activations_scale_factor");

// Submodule intel_cpu
py::module m_intel_cpu =
Expand Down
5 changes: 5 additions & 0 deletions src/bindings/python/tests/test_runtime/test_properties.py
Original file line number Diff line number Diff line change
Expand Up @@ -335,6 +335,11 @@ def test_properties_ro(ov_property_ro, expected_value):
((64, 64),),
),
(hints.kv_cache_precision, "KV_CACHE_PRECISION", ((Type.f32, Type.f32),)),
(
hints.activations_scale_factor,
"ACTIVATIONS_SCALE_FACTOR",
((0.0, 0.0),),
),
(
intel_cpu.denormals_optimization,
"CPU_DENORMALS_OPTIMIZATION",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ class TRANSFORMATIONS_API MulMulAddFusion;
} // namespace pass
} // namespace ov

// ActivationsScaling scales down activations to prevent overflow due to the limited range of FP16
class ov::pass::ActivationsScaling : public ov::pass::ModelPass {
public:
OPENVINO_RTTI("ActivationsScaling", "0");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -79,26 +79,40 @@ ov::pass::ScaleDownSingleLayer::ScaleDownSingleLayer(float scale_factor) {
auto scale_down = std::make_shared<ov::op::v1::Multiply>(
scaled_op->input(0).get_source_output(),
(scaled_op->input(0).get_element_type() == ov::element::f32) ? scale_down_const_f32 : scale_down_const_f16);
scale_down->set_friendly_name(scaled_op->get_friendly_name() + "_scale_down");
ov::copy_runtime_info(scaled_op, scale_down);
scaled_op->input(0).replace_source_output(scale_down->output(0));

auto child = scaled_op->get_output_target_inputs(0).begin()->get_node();
if (scaled_op->get_output_target_inputs(0).size() == 1 && ov::is_type<ov::op::v1::Add>(child)) {
auto add = child->shared_from_this();
auto target_inputs = add->get_output_target_inputs(0);
auto scale_down_bias = std::make_shared<ov::op::v1::Multiply>(
add->input(1).get_source_output(),
(add->input(1).get_element_type() == ov::element::f32) ? scale_down_const_f32 : scale_down_const_f16);
scale_down_bias->set_friendly_name(add->get_friendly_name() + "_scale_down");
ov::copy_runtime_info(add, scale_down_bias);
add->input(1).replace_source_output(scale_down_bias->output(0));

auto scale_up = std::make_shared<ov::op::v1::Multiply>(
auto scale_up = register_new_node<ov::op::v1::Multiply>(
add->output(0),
(add->output(0).get_element_type() == ov::element::f32) ? scale_up_const_f32 : scale_up_const_f16);
ov::replace_node(add, scale_up);
scale_up->set_friendly_name(scaled_op->get_friendly_name() + "_scale_up");
ov::copy_runtime_info(scaled_op, scale_up);
for (auto& in : target_inputs) {
in.replace_source_output(scale_up);
}
} else {
auto scale_up = std::make_shared<ov::op::v1::Multiply>(
auto target_inputs = scaled_op->get_output_target_inputs(0);
auto scale_up = register_new_node<ov::op::v1::Multiply>(
scaled_op->output(0),
(scaled_op->output(0).get_element_type() == ov::element::f32) ? scale_up_const_f32
: scale_up_const_f16);
ov::replace_node(scaled_op, scale_up);
scale_up->set_friendly_name(scaled_op->get_friendly_name() + "_scale_up");
ov::copy_runtime_info(scaled_op, scale_up);
for (auto& in : target_inputs) {
in.replace_source_output(scale_up);
}
}

return true;
Expand Down Expand Up @@ -144,6 +158,7 @@ ov::pass::MulMulAddFusion::MulMulAddFusion() {
if (transformation_callback(add)) {
return false;
}
auto target_inputs = add->get_output_target_inputs(0);

auto scale_const0 =
std::dynamic_pointer_cast<ov::op::v0::Constant>(pattern_map.at(scale_const0_m).get_node_shared_ptr());
Expand All @@ -156,9 +171,10 @@ ov::pass::MulMulAddFusion::MulMulAddFusion() {
mul0->input(1).replace_source_output(
ov::op::util::eltwise_fold<ov::op::v1::Divide>(scale_const0, scale_const1));
add->input(1).replace_source_output(mul1->get_input_source_output(0));

auto new_mul = register_new_node<ov::op::v1::Multiply>(add, scale_const1);
replace_node(add, new_mul);
mul1->input(0).replace_source_output(add);
for (auto& in : target_inputs) {
in.replace_source_output(mul1);
}

return true;
};
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
// Copyright (C) 2024 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#include "transformations/common_optimizations/activations_scaling.hpp"

#include <gtest/gtest.h>
#include "common_test_utils/graph_comparator.hpp"
#include "common_test_utils/ov_test_utils.hpp"

#include <string>
#include <memory>

#include "openvino/op/add.hpp"
#include "openvino/op/constant.hpp"
#include "openvino/op/convolution.hpp"
#include "openvino/op/group_normalization.hpp"
#include "openvino/op/parameter.hpp"
#include "openvino/op/multiply.hpp"
#include "openvino/pass/manager.hpp"

#include "transformations/utils/utils.hpp"

using namespace ov;
using namespace testing;

TEST_F(TransformationTestsF, ScaleDownSingleLayerTest) {
float scale_factor = 128.f;
{
auto input = std::make_shared<ov::op::v0::Parameter>(ov::element::f16, ov::PartialShape{ 1, 3, 16, 16 });
auto weights_const = ov::op::v0::Constant::create(ov::element::f16, ov::Shape{ 3, 3, 3, 3 }, { 1 });
auto conv = std::make_shared<ov::op::v1::Convolution>(input, weights_const,
Strides{}, CoordinateDiff{}, CoordinateDiff{}, Strides{});
auto convert = std::make_shared<ov::op::v0::Convert>(conv, ov::element::f32);
auto result = std::make_shared<ov::op::v0::Result>(convert);

model = std::make_shared<ov::Model>(ov::ResultVector{result}, ov::ParameterVector{input});
manager.register_pass<ov::pass::ScaleDownSingleLayer>(scale_factor);
}
{
auto input = std::make_shared<ov::op::v0::Parameter>(ov::element::f16, ov::PartialShape{ 1, 3, 16, 16 });
auto weights_const = ov::op::v0::Constant::create(ov::element::f16, ov::Shape{ 3, 3, 3, 3 }, { 1 });
auto scale_down_const = ov::op::v0::Constant::create(ov::element::f16, ov::Shape{ 1 }, { 1.f / scale_factor });
auto scale_down = std::make_shared<ov::op::v1::Multiply>(input, scale_down_const);
auto conv = std::make_shared<ov::op::v1::Convolution>(scale_down, weights_const,
Strides{}, CoordinateDiff{}, CoordinateDiff{}, Strides{});
auto scale_up_const = ov::op::v0::Constant::create(ov::element::f16, ov::Shape{ 1 }, { scale_factor });
auto scale_up = std::make_shared<ov::op::v1::Multiply>(conv, scale_up_const);
auto convert = std::make_shared<ov::op::v0::Convert>(scale_up, ov::element::f32);
auto result = std::make_shared<ov::op::v0::Result>(convert);

model_ref = std::make_shared<ov::Model>(ov::ResultVector{result}, ov::ParameterVector{input});
}
}

TEST_F(TransformationTestsF, MulMulAddFusionTest) {
{
auto input0 = std::make_shared<ov::op::v0::Parameter>(ov::element::f16, ov::PartialShape{ 1, 3, 16, 16 });
auto scale_const_0 = ov::op::v0::Constant::create(ov::element::f16, ov::Shape{ 1 }, { 10 });
auto mul0 = std::make_shared<ov::op::v1::Multiply>(input0, scale_const_0);
auto input1 = std::make_shared<ov::op::v0::Parameter>(ov::element::f16, ov::PartialShape{ 1, 3, 16, 16 });
auto scale_const_1 = ov::op::v0::Constant::create(ov::element::f16, ov::Shape{ 1 }, { 10 });
auto mul1 = std::make_shared<ov::op::v1::Multiply>(input1, scale_const_1);
auto add = std::make_shared<ov::op::v1::Add>(mul0, mul1);
auto convert = std::make_shared<ov::op::v0::Convert>(add, ov::element::f32);
auto result = std::make_shared<ov::op::v0::Result>(convert);

model = std::make_shared<ov::Model>(ov::ResultVector{result}, ov::ParameterVector{input0, input1});
manager.register_pass<ov::pass::MulMulAddFusion>();
}
{
auto input0 = std::make_shared<ov::op::v0::Parameter>(ov::element::f16, ov::PartialShape{ 1, 3, 16, 16 });
auto scale_const_0 = ov::op::v0::Constant::create(ov::element::f16, ov::Shape{ 1 }, { 10 });
auto mul0 = std::make_shared<ov::op::v1::Multiply>(input0, scale_const_0);
auto input1 = std::make_shared<ov::op::v0::Parameter>(ov::element::f16, ov::PartialShape{ 1, 3, 16, 16 });
auto add = std::make_shared<ov::op::v1::Add>(mul0, input1);
auto scale_const_1 = ov::op::v0::Constant::create(ov::element::f16, ov::Shape{ 1 }, { 10 });
auto mul1 = std::make_shared<ov::op::v1::Multiply>(add, scale_const_1);
auto convert = std::make_shared<ov::op::v0::Convert>(mul1, ov::element::f32);
auto result = std::make_shared<ov::op::v0::Result>(convert);

model_ref = std::make_shared<ov::Model>(ov::ResultVector{result}, ov::ParameterVector{input0, input1});
}
}

TEST_F(TransformationTestsF, MulGroupNormFusionTest) {
{
auto input = std::make_shared<ov::op::v0::Parameter>(ov::element::f16, ov::PartialShape{ 1, 3, 16, 16 });
auto scale_const = ov::op::v0::Constant::create(ov::element::f16, ov::Shape{ 1 }, { 10 });
auto mul = std::make_shared<ov::op::v1::Multiply>(input, scale_const);
auto norm_scale_const = ov::op::v0::Constant::create(ov::element::f16, ov::Shape{ 3 }, { 10 });
auto norm_bias_const = ov::op::v0::Constant::create(ov::element::f16, ov::Shape{ 3 }, { 10 });
auto group_norm = std::make_shared<ov::op::v12::GroupNormalization>(mul, norm_scale_const, norm_bias_const, 1, 0.01f);
auto convert = std::make_shared<ov::op::v0::Convert>(group_norm, ov::element::f32);
auto result = std::make_shared<ov::op::v0::Result>(convert);

model = std::make_shared<ov::Model>(ov::ResultVector{result}, ov::ParameterVector{input});
manager.register_pass<ov::pass::MulGroupNormFusion>();
}
{
auto input = std::make_shared<ov::op::v0::Parameter>(ov::element::f16, ov::PartialShape{ 1, 3, 16, 16 });
auto norm_scale_const = ov::op::v0::Constant::create(ov::element::f16, ov::Shape{ 3 }, { 10 });
auto norm_bias_const = ov::op::v0::Constant::create(ov::element::f16, ov::Shape{ 3 }, { 10 });
auto group_norm = std::make_shared<ov::op::v12::GroupNormalization>(input, norm_scale_const, norm_bias_const, 1, 0.01f);
auto convert = std::make_shared<ov::op::v0::Convert>(group_norm, ov::element::f32);
auto result = std::make_shared<ov::op::v0::Result>(convert);

model_ref = std::make_shared<ov::Model>(ov::ResultVector{result}, ov::ParameterVector{input});
}
}
1 change: 1 addition & 0 deletions src/plugins/intel_gpu/src/plugin/compiled_model.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,7 @@ ov::Any CompiledModel::get_property(const std::string& name) const {
ov::PropertyName{ov::hint::num_requests.name(), PropertyMutability::RO},
ov::PropertyName{ov::hint::inference_precision.name(), PropertyMutability::RO},
ov::PropertyName{ov::hint::dynamic_quantization_group_size.name(), PropertyMutability::RO},
ov::PropertyName{ov::hint::activations_scale_factor.name(), PropertyMutability::RO},
ov::PropertyName{ov::device::id.name(), PropertyMutability::RO},
ov::PropertyName{ov::execution_devices.name(), PropertyMutability::RO},
};
Expand Down
4 changes: 3 additions & 1 deletion src/plugins/intel_gpu/src/plugin/plugin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -540,6 +540,7 @@ std::vector<ov::PropertyName> Plugin::get_caching_properties() const {
ov::PropertyName{ov::hint::execution_mode.name(), PropertyMutability::RW},
ov::PropertyName{ov::hint::performance_mode.name(), PropertyMutability::RW},
ov::PropertyName{ov::hint::dynamic_quantization_group_size.name(), PropertyMutability::RW},
ov::PropertyName{ov::hint::activations_scale_factor.name(), PropertyMutability::RW},
};

return caching_properties;
Expand Down Expand Up @@ -585,7 +586,8 @@ std::vector<ov::PropertyName> Plugin::get_supported_properties() const {
ov::PropertyName{ov::hint::inference_precision.name(), PropertyMutability::RW},
ov::PropertyName{ov::hint::enable_cpu_pinning.name(), PropertyMutability::RW},
ov::PropertyName{ov::device::id.name(), PropertyMutability::RW},
ov::PropertyName{ov::hint::dynamic_quantization_group_size.name(), PropertyMutability::RW}
ov::PropertyName{ov::hint::dynamic_quantization_group_size.name(), PropertyMutability::RW},
ov::PropertyName{ov::hint::activations_scale_factor.name(), PropertyMutability::RW}
};

return supported_properties;
Expand Down
2 changes: 1 addition & 1 deletion src/plugins/intel_gpu/src/runtime/execution_config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ void ExecutionConfig::set_default() {
std::make_tuple(ov::hint::kv_cache_precision, ov::element::undefined),
std::make_tuple(ov::intel_gpu::hint::enable_kernels_reuse, false),
std::make_tuple(ov::weights_path, ""),
std::make_tuple(ov::hint::activations_scale_factor, 0.f),
std::make_tuple(ov::hint::activations_scale_factor, -1.f),

// Legacy API properties
std::make_tuple(ov::intel_gpu::nv12_two_inputs, false),
Expand Down

0 comments on commit bc284f5

Please sign in to comment.