Skip to content

Commit

Permalink
[GPU] Add more mixed type of bfyx to eltwise_blocked_opt
Browse files Browse the repository at this point in the history
Improve yolo_v5m peformance by using eltwise_blocked_opt for
batchNormalization which help for platforms has XMX

Signed-off-by: Chon Ming Lee <[email protected]>
  • Loading branch information
clee30 committed Nov 28, 2024
1 parent 7b3fd79 commit ce7bdf8
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ ParamsKey EltwiseKernel_blocked_opt::GetSupportedKey() const {
k.EnableOutputDataType(Datatype::F32);
k.EnableOutputDataType(Datatype::INT8);
k.EnableOutputDataType(Datatype::UINT8);
k.EnableInputLayout(DataLayout::bfyx);
k.EnableInputLayout(DataLayout::b_fs_yx_fsv4);
k.EnableOutputLayout(DataLayout::b_fs_yx_fsv4);
k.EnableInputLayout(DataLayout::b_fs_yx_fsv16);
Expand Down Expand Up @@ -112,7 +113,7 @@ bool EltwiseKernel_blocked_opt::Validate(const Params& params) const {
}

const auto vec_size = SelectVecSizeFromFormat(ewParams.outputs[0]);
const auto input0 = ewParams.inputs[0];
const auto& input0 = ewParams.inputs[0];
const auto& output = ewParams.outputs[0];
// Check that padding before features doesn't mis-align the blocks
if (input0.Feature().pad.before % vec_size != 0 || output.Feature().pad.before % vec_size != 0)
Expand All @@ -137,11 +138,21 @@ bool EltwiseKernel_blocked_opt::Validate(const Params& params) const {
};

for (size_t i = 1; i < ewParams.inputs.size(); i++) {
if (ewParams.inputs[i].LogicalSize() == input0.LogicalSize() && !(compareTensors(ewParams.inputs[i], input0)))
const auto& input = ewParams.inputs[i];
if (input.LogicalSize() == input0.LogicalSize() && !(compareTensors(input, input0)))
return false;
if (ewParams.inputs[i].Feature().pad.before % vec_size != 0) {
if (input.Feature().pad.before % vec_size != 0) {
return false;
}
if (input.GetLayout() == DataLayout::bfyx) {
bool is_valid = input.LogicalSize() % vec_size == 0 &&
input.LogicalSize() == input.Feature().v &&
input.LogicalSize() == output.Feature().v &&
GetInnerBatchBlockSize(input) == 1;
if (!is_valid) {
return false;
}
}
}

return true;
Expand Down Expand Up @@ -422,6 +433,7 @@ static inline int SelectVecSizeFromFormat(const DataTensor& tensor) {
static inline int GetInnerBatchBlockSize(const DataTensor& tensor) {
auto layout = tensor.GetLayout();
switch (layout) {
case DataLayout::bfyx:
case DataLayout::b_fs_yx_fsv4:
case DataLayout::b_fs_yx_fsv16:
case DataLayout::b_fs_zyx_fsv16:
Expand Down
12 changes: 11 additions & 1 deletion src/plugins/intel_gpu/tests/unit/test_cases/eltwise_gpu_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4705,12 +4705,17 @@ struct eltwise_layout_test_params {
#define CASE_ELTWISE_TEST1 eltwise_mode::sum, {1, 2, 1, 1}, {4, 2, 4, 4}, format::b_fs_yx_fsv16, format::bfyx, "generic_eltwise_ref"
#define CASE_ELTWISE_TEST2 eltwise_mode::sum, {4, 1, 4, 4}, {1, 5, 1, 1}, format::b_fs_yx_fsv16, format::bfyx, "generic_eltwise_ref"
#define CASE_ELTWISE_TEST3 eltwise_mode::sum, {4, 5, 4, 1}, {4, 1, 4, 1}, format::b_fs_yx_fsv16, format::bfyx, "generic_eltwise_ref"
#define CASE_ELTWISE_TEST4 eltwise_mode::sum, {4, 2, 4, 4}, {1, 1, 1, 1}, format::b_fs_yx_fsv16, format::bfyx, "eltwise_blocked_opt"
#define CASE_ELTWISE_TEST4 eltwise_mode::sum, {4, 2, 4, 4}, {1, 1, 1, 1}, format::b_fs_yx_fsv16, format::bfyx, "generic_eltwise_ref"
#define CASE_ELTWISE_TEST5 eltwise_mode::sum, {1, 2, 1, 1}, {4, 2, 4, 4}, format::bfyx, format::b_fs_yx_fsv16, "generic_eltwise_ref"
#define CASE_ELTWISE_TEST6 eltwise_mode::sum, {4, 1, 4, 4}, {1, 5, 1, 1}, format::bfyx, format::b_fs_yx_fsv16, "generic_eltwise_ref"
#define CASE_ELTWISE_TEST7 eltwise_mode::sum, {4, 5, 4, 1}, {4, 1, 4, 1}, format::bfyx, format::b_fs_yx_fsv16, "generic_eltwise_ref"
#define CASE_ELTWISE_TEST8 eltwise_mode::sum, {4, 2, 4, 4}, {1, 1, 1, 1}, format::bfyx, format::b_fs_yx_fsv16, "generic_eltwise_ref"
#define CASE_ELTWISE_TEST9 eltwise_mode::eq, {4, 2, 4, 4}, {1, 1, 1, 1}, format::b_fs_yx_fsv16, format::bfyx, "generic_eltwise_ref"
#define CASE_ELTWISE_TEST10 eltwise_mode::sum, {4, 8, 1, 1}, {1, 8, 1, 1}, format::b_fs_yx_fsv32, format::bfyx, "eltwise_blocked_opt"
#define CASE_ELTWISE_TEST11 eltwise_mode::sum, {4, 8, 1, 1}, {1, 8, 1, 1}, format::b_fs_yx_fsv16, format::bfyx, "eltwise_blocked_opt"
#define CASE_ELTWISE_TEST12 eltwise_mode::sum, {4, 16, 4, 4}, {1, 16, 1, 1}, format::b_fs_yx_fsv16, format::bfyx, "eltwise_blocked_opt"
#define CASE_ELTWISE_TEST13 eltwise_mode::sum, {4, 7, 4, 4}, {1, 7, 1, 1}, format::b_fs_yx_fsv16, format::bfyx, "generic_eltwise_ref"
#define CASE_ELTWISE_TEST14 eltwise_mode::sum, {1, 8, 1, 1}, {4, 8, 1, 1}, format::bfyx, format::b_fs_yx_fsv32, "generic_eltwise_ref"

class eltwise_layout_test : public BaseEltwiseTest<eltwise_layout_test_params> {
public:
Expand Down Expand Up @@ -4800,6 +4805,11 @@ INSTANTIATE_TEST_SUITE_P(eltwise, eltwise_test_mixed_layout,
eltwise_layout_test_params{CASE_ELTWISE_TEST7},
eltwise_layout_test_params{CASE_ELTWISE_TEST8},
eltwise_layout_test_params{CASE_ELTWISE_TEST9},
eltwise_layout_test_params{CASE_ELTWISE_TEST10},
eltwise_layout_test_params{CASE_ELTWISE_TEST11},
eltwise_layout_test_params{CASE_ELTWISE_TEST12},
eltwise_layout_test_params{CASE_ELTWISE_TEST13},
eltwise_layout_test_params{CASE_ELTWISE_TEST14},
}));

//
Expand Down

0 comments on commit ce7bdf8

Please sign in to comment.