Skip to content

Commit

Permalink
[CPU] Finalize fixing empty input of Reduce node on CPU part
Browse files Browse the repository at this point in the history
  • Loading branch information
xuchen-intel committed Nov 25, 2024
1 parent 1da165b commit f9d53fc
Show file tree
Hide file tree
Showing 8 changed files with 176 additions and 51 deletions.
42 changes: 34 additions & 8 deletions src/plugins/intel_cpu/src/nodes/reduce.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2033,6 +2033,14 @@ void Reduce::initSupportedPrimitiveDescriptors() {
std::make_shared<ExecutorContext>(context, getImplPriority()));
if (!factory->isEmpty()) {
supportedPrimitiveDescriptors.push_back({config, impl_type, factory});
} else {
bool apply_ref = customImplPriorities.size() > 0 && customImplPriorities[0] == ref;
// For the case of empty input, transformations ConvertReduceProd(Min, Max, Sum) are disabled to avoid empty output.
// So these 4 reduce modes are not supported for such case, then factory->isEmpty() returns true. Though we don't
// actually need these acl kernels in execution, supportedPrimitiveDescriptors mustn't be empty otherwise we get error.
if (!apply_ref) {
supportedPrimitiveDescriptors.push_back({config, impl_type});
}
}
} else {
supportedPrimitiveDescriptors.push_back({config, impl_type});
Expand Down Expand Up @@ -2093,6 +2101,10 @@ bool Reduce::isExecutable() const {
}

void Reduce::prepareParams() {
auto srcMemPtr = getSrcMemoryAtPort(REDUCE_DATA);
const auto& src_shape = srcMemPtr->getStaticDims();
empty_input = shape_size(src_shape) == 0 || srcMemPtr->getSize() == 0;
#if defined (OV_CPU_WITH_ACL)
if (canUseAclExecutor) {
std::vector<MemoryDescPtr> srcMemoryDescs;
for (size_t i = 0; i < getParentEdges().size(); i++) {
Expand All @@ -2102,11 +2114,25 @@ void Reduce::prepareParams() {
dstMemoryDescs.push_back(getDstMemoryAtPort(0)->getDescPtr());

auto selectedPD = getSelectedPrimitiveDescriptor();
aclExecPtr = selectedPD->getExecutorFactoryAs<ReduceExecutorFactory>()->makeExecutor(reduceAttrs, srcMemoryDescs, dstMemoryDescs, {});
selectedPD->setImplementationType(aclExecPtr->getImplType());

if (!empty_input) {
aclExecPtr = selectedPD->getExecutorFactoryAs<ReduceExecutorFactory>()->makeExecutor(reduceAttrs, srcMemoryDescs, dstMemoryDescs, {});
selectedPD->setImplementationType(aclExecPtr->getImplType());
} else {
selectedPD->setImplementationType(acl);
}
return;
} else {
auto selectedPD = getSelectedPrimitiveDescriptor();
if (!empty_input) {
// ref
selectedPD->setImplementationType(ref);
} else {
// unsupported reduce mode (prod, min, max, sum) for empty input
selectedPD->setImplementationType(acl);
return;
}
}
#endif

src_dims = getParentEdgeAt(REDUCE_DATA)->getMemory().getDesc().getShape().getDims();
std::vector<int> reduce_axes;
Expand Down Expand Up @@ -2274,14 +2300,14 @@ void Reduce::execute(dnnl::stream strm) {
const uint8_t *src_data = srcMemPtr->getDataAs<const uint8_t>();
uint8_t *dst_data = dstMemPtr->getDataAs<uint8_t>();

const auto& src_shape = srcMemPtr->getStaticDims();
if ((shape_size(src_shape) == 0 || srcMemPtr->getSize() == 0)) {
if (empty_input) {
if (dstMemPtr->getSize() > 0) {
init_dst_data(dst_data, dstMemPtr->getSize());
const bool skip_post_process = getAlgorithm() == Algorithm::ReduceMean || attr.get()->post_ops_.len() == 0;
if (!skip_post_process) {
#if defined(OPENVINO_ARCH_X86_64)
if (attr.get()->post_ops_.len() != 0) {
reduce_kernel_post_process(dst_data);
}
#endif
}
return;
}
Expand Down Expand Up @@ -2737,7 +2763,7 @@ inline void Reduce::reduce_kernel_process(const uint8_t *in_p, uint8_t *out_p, s

inline void Reduce::reduce_kernel_post_process(uint8_t *out_ptr) {
const uint8_t *in_ptr = fuse_low_precision ? static_cast<uint8_t *>(&intermediate_buf[0]) : nullptr;
const size_t integerDivisor = IB * IC * ID * IH * IW / (OB * OC * OD * OH * OW);
const size_t integerDivisor = empty_input ? 1 : IB * IC * ID * IH * IW / (OB * OC * OD * OH * OW);
const float divisor = static_cast<float>(integerDivisor);
if (layout == ReduceLayoutType::reduce_ncsp) {
parallel_for2d(OB, OC, [&](size_t ob, size_t oc) {
Expand Down
1 change: 1 addition & 0 deletions src/plugins/intel_cpu/src/nodes/reduce.h
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,7 @@ class Reduce : public Node {
bool ReduceCDW_opt = false;
bool use_aux_kernel = false;
bool set_use_aux_kernel = false;
bool empty_input = false;
bool ReduceN, ReduceC, ReduceD, ReduceH, ReduceW;
size_t IB, IC, ID, IH, IW;
size_t OB, OC, OD, OH, OW;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@ ov::matcher_pass_callback ov::intel_cpu::ConvertReduceMultiAxisBase::convert_red
if (!reduction_axes) {
return false;
}
if (!reduce->is_dynamic() && ov::shape_size(input0.get_shape()) == 0) {
return false;
}
if (ov::shape_size(input1.get_shape()) <= 1) {
return false;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,25 @@ const std::vector<ov::test::utils::ReductionType>& reductionTypes() {
return reductionTypes;
}

const std::vector<ov::test::utils::ReductionType>& reductionTypesArithmetic() {
static const std::vector<ov::test::utils::ReductionType> reductionTypesArithmetic = {
ov::test::utils::ReductionType::Mean,
ov::test::utils::ReductionType::Sum,
ov::test::utils::ReductionType::Prod,
ov::test::utils::ReductionType::L1,
ov::test::utils::ReductionType::L2,
};
return reductionTypesArithmetic;
}

const std::vector<ov::test::utils::ReductionType>& reductionTypesCompare() {
static const std::vector<ov::test::utils::ReductionType> reductionTypesCompare = {
ov::test::utils::ReductionType::Max,
ov::test::utils::ReductionType::Min,
};
return reductionTypesCompare;
}

const std::vector<ElementType>& inpOutPrc() {
static const std::vector<ElementType> inpOutPrc = {ElementType::f32};
return inpOutPrc;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@ const std::vector<std::vector<int>>& axes();
const std::vector<std::vector<int>>& axesND();
const std::vector<ov::test::utils::OpType>& opTypes();
const std::vector<utils::ReductionType>& reductionTypes();
const std::vector<utils::ReductionType>& reductionTypesArithmetic();
const std::vector<utils::ReductionType>& reductionTypesCompare();
const std::vector<ElementType>& inpOutPrc();
const std::vector<std::map<std::string, ov::element::Type>> additionalConfig();
const std::vector<std::map<std::string, ov::element::Type>> additionalConfigFP32();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,11 @@ namespace {

std::vector<std::vector<ov::test::InputShape>> inputShapes_5D = {
{{{}, {{2, 19, 2, 2, 9}}}},
{{{}, {{0, 19, 2, 2, 9}}}},
{{{}, {{1, 0, 0, 2, 9}}}},
};

std::vector<std::vector<ov::test::InputShape>> inputShapes_5D_ZeroDim = {
{{{}, {{2, 19, 0, 2, 9}}}},
{{{}, {{2, 19, 0, 2, 0}}}},
};

const std::vector<std::vector<int>> axes5D = {
Expand All @@ -37,7 +40,7 @@ const auto params_MultiAxis_5D = testing::Combine(
testing::ValuesIn(axes5D),
testing::Values(ov::test::utils::OpType::VECTOR),
testing::Values(true),
testing::ValuesIn(reductionTypes()),
testing::ValuesIn(reductionTypesArithmetic()),
testing::ValuesIn(inpOutPrc()),
testing::Values(ElementType::undefined),
testing::Values(ElementType::undefined),
Expand All @@ -46,6 +49,34 @@ const auto params_MultiAxis_5D = testing::Combine(
testing::Values(emptyFusingSpec),
testing::ValuesIn(additionalConfig()));

const auto params_MultiAxis_5D_ZeroDim = testing::Combine(
testing::Combine(
testing::ValuesIn(axes5D),
testing::Values(ov::test::utils::OpType::VECTOR),
testing::Values(true),
testing::ValuesIn(reductionTypesArithmetic()),
testing::ValuesIn(inpOutPrc()),
testing::Values(ElementType::undefined),
testing::Values(ElementType::undefined),
testing::ValuesIn(inputShapes_5D_ZeroDim)),
testing::ValuesIn(filterCPUSpecificParams(cpuParams_5D)),
testing::Values(emptyFusingSpec),
testing::ValuesIn(additionalConfig()));

const auto params_MultiAxis_5D_ZeroDim_Compare = testing::Combine(
testing::Combine(
testing::ValuesIn(axes5D),
testing::Values(ov::test::utils::OpType::VECTOR),
testing::Values(true),
testing::ValuesIn(reductionTypesCompare()),
testing::ValuesIn(inpOutPrc()),
testing::Values(ElementType::undefined),
testing::Values(ElementType::undefined),
testing::ValuesIn(inputShapes_5D_ZeroDim)),
testing::ValuesIn(filterCPUSpecificParams(cpuParams_5D)),
testing::Values(emptyFusingSpec),
testing::ValuesIn(additionalConfigFP32()));

const std::vector<std::vector<int>> axes5D_ref = {
{0}
};
Expand Down Expand Up @@ -81,6 +112,20 @@ INSTANTIATE_TEST_SUITE_P(
ReduceCPULayerTest::getTestCaseName
);

INSTANTIATE_TEST_SUITE_P(
smoke_Reduce_MultiAxis_5D_ZeroDim_CPU,
ReduceCPULayerTest,
params_MultiAxis_5D_ZeroDim,
ReduceCPULayerTest::getTestCaseName
);

INSTANTIATE_TEST_SUITE_P(
smoke_Reduce_MultiAxis_5D_ZeroDim_Compare_CPU,
ReduceCPULayerTest,
params_MultiAxis_5D_ZeroDim_Compare,
ReduceCPULayerTest::getTestCaseName
);

// Reference implementation testing of ACL unsupported case
INSTANTIATE_TEST_SUITE_P(
smoke_Reduce_MultiAxis_5D_CPU_ref,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,6 @@ INSTANTIATE_TEST_SUITE_P(
ReduceCPULayerTest::getTestCaseName
);


INSTANTIATE_TEST_SUITE_P(
smoke_Reduce_Int32_CPU,
ReduceCPULayerTest,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,17 @@ const std::vector<std::vector<int>> axesGather = {
{3}
};

const std::vector<std::vector<int>> axesZeroDim {
{1, 3},
{0, 1, 3},
{1, 2, 3},
{0, 1, 2, 3}
};

const std::vector<std::vector<int>> axesZeroDimFusing = {
{1, 3},
};

std::vector<CPUSpecificParams> cpuParams_5D = {
CPUSpecificParams({nCdhw16c}, {nCdhw16c}, {}, {}),
CPUSpecificParams({ndhwc}, {ndhwc}, {}, {}),
Expand Down Expand Up @@ -331,6 +342,20 @@ const auto params_SingleBatch = testing::Combine(
testing::Values(emptyFusingSpec),
testing::ValuesIn(additionalConfig()));

const auto params_MultiAxis_4D_dynamic_with_zero = testing::Combine(
testing::Combine(
testing::ValuesIn(axesZeroDim),
testing::Values(ov::test::utils::OpType::VECTOR),
testing::ValuesIn(keepDims()),
testing::ValuesIn(reductionTypes()),
testing::ValuesIn(inpOutPrc()),
testing::Values(ElementType::undefined),
testing::Values(ElementType::undefined),
testing::ValuesIn(inputShapes_Dynmic_ZeroDim)),
testing::Values(emptyCPUSpec),
testing::Values(emptyFusingSpec),
testing::ValuesIn(additionalConfig()));

INSTANTIATE_TEST_SUITE_P(
smoke_Reduce_OneAxis_CPU,
ReduceCPULayerTest,
Expand Down Expand Up @@ -408,6 +433,13 @@ INSTANTIATE_TEST_SUITE_P(
ReduceCPULayerTest::getTestCaseName
);

INSTANTIATE_TEST_SUITE_P(
smoke_Reduce_MultiAxis_4D_dynamic_with_zero_CPU,
ReduceCPULayerTest,
params_MultiAxis_4D_dynamic_with_zero,
ReduceCPULayerTest::getTestCaseName
);

/* ================================ 1.2 No fusion - Logical ================================ */
const auto params_OneAxis_Logical = testing::Combine(
testing::Combine(
Expand Down Expand Up @@ -606,6 +638,20 @@ const auto params_LowPrecision_fusing = testing::Combine(
testing::ValuesIn(fusingParamsSet_LowPrecision),
testing::ValuesIn(additionalConfig()));

const auto params_MultiAxis_4D_dynamic_with_zero_fusing = testing::Combine(
testing::Combine(
testing::ValuesIn(axesZeroDimFusing),
testing::Values(ov::test::utils::OpType::VECTOR),
testing::Values(true),
testing::ValuesIn(reductionTypes()),
testing::ValuesIn(inpOutPrc()),
testing::Values(ElementType::undefined),
testing::Values(ElementType::undefined),
testing::ValuesIn(inputShapes_Dynmic_ZeroDim)),
testing::Values(emptyCPUSpec),
testing::ValuesIn(fusingParamsSet),
testing::ValuesIn(additionalConfig()));

INSTANTIATE_TEST_SUITE_P(
smoke_Reduce_OneAxis_fusing_CPU,
ReduceCPULayerTest,
Expand Down Expand Up @@ -641,6 +687,13 @@ INSTANTIATE_TEST_SUITE_P(
ReduceCPULayerTest::getTestCaseName
);

INSTANTIATE_TEST_SUITE_P(
smoke_Reduce_MultiAxis_4D_dynamic_with_zero_fusing_CPU,
ReduceCPULayerTest,
params_MultiAxis_4D_dynamic_with_zero_fusing,
ReduceCPULayerTest::getTestCaseName
);

/* ================================ 2.2 Fusion - KeepNoDims ================================ */
const auto params_OneAxis_fusing_KeepNoDims = testing::Combine(
testing::Combine(
Expand Down Expand Up @@ -684,6 +737,20 @@ const auto params_MultiAxis_5D_Hybrid_fusing_KeepNoDims = testing::Combine(
testing::ValuesIn(fusingParamsSet_KeepNoDims),
testing::ValuesIn(additionalConfigFP32()));

const auto params_MultiAxis_4D_dynamic_with_zero_fusing_KeepNoDims = testing::Combine(
testing::Combine(
testing::ValuesIn(axesZeroDimFusing),
testing::Values(ov::test::utils::OpType::VECTOR),
testing::Values(false),
testing::ValuesIn(reductionTypes()),
testing::ValuesIn(inpOutPrc()),
testing::Values(ElementType::undefined),
testing::Values(ElementType::undefined),
testing::ValuesIn(inputShapes_Dynmic_ZeroDim)),
testing::Values(emptyCPUSpec),
testing::ValuesIn(fusingParamsSet_KeepNoDims),
testing::ValuesIn(additionalConfig()));

INSTANTIATE_TEST_SUITE_P(
smoke_Reduce_OneAxis_fusing_KeepNoDims_CPU,
ReduceCPULayerTest,
Expand All @@ -705,47 +772,10 @@ INSTANTIATE_TEST_SUITE_P(
ReduceCPULayerTest::getTestCaseName
);


/* ================================ 2.3 Empty dims ================================ */
const auto params_MultiAxis_4D_dynamic_with_zero = testing::Combine(
testing::Combine(
testing::ValuesIn(axesND()),
testing::Values(ov::test::utils::OpType::VECTOR),
testing::ValuesIn(keepDims()),
testing::ValuesIn(reductionTypes()),
testing::ValuesIn(inpOutPrc()),
testing::Values(ElementType::undefined),
testing::Values(ElementType::undefined),
testing::ValuesIn(inputShapes_Dynmic_ZeroDim)),
testing::Values(emptyCPUSpec),
testing::Values(emptyFusingSpec),
testing::ValuesIn(additionalConfig()));

const auto params_MultiAxis_4D_dynamic_with_zero_fusing = testing::Combine(
testing::Combine(
testing::ValuesIn(axesNDFusing),
testing::Values(ov::test::utils::OpType::VECTOR),
testing::ValuesIn(keepDims()),
testing::ValuesIn(reductionTypes()),
testing::ValuesIn(inpOutPrc()),
testing::Values(ElementType::undefined),
testing::Values(ElementType::undefined),
testing::ValuesIn(inputShapes_Dynmic_ZeroDim)),
testing::Values(emptyCPUSpec),
testing::Values(fusingSwish),
testing::ValuesIn(additionalConfig()));

INSTANTIATE_TEST_SUITE_P(
smoke_Reduce_MultiAxis_4D_dynamic_with_zero_CPU,
smoke_Reduce_MultiAxis_4D_dynamic_with_zero_fusing_KeepNoDims_CPU,
ReduceCPULayerTest,
params_MultiAxis_4D_dynamic_with_zero,
ReduceCPULayerTest::getTestCaseName
);

INSTANTIATE_TEST_SUITE_P(
smoke_Reduce_MultiAxis_4D_dynamic_with_zero_fusing_CPU,
ReduceCPULayerTest,
params_MultiAxis_4D_dynamic_with_zero_fusing,
params_MultiAxis_4D_dynamic_with_zero_fusing_KeepNoDims,
ReduceCPULayerTest::getTestCaseName
);

Expand Down

0 comments on commit f9d53fc

Please sign in to comment.