From 412cbe6cf9cfa4a9f3957b2780aeed0563b26617 Mon Sep 17 00:00:00 2001 From: Yeonbok Taylor Lee Date: Sun, 10 Nov 2024 15:12:03 +0900 Subject: [PATCH] Relaxon heuristic to apply vertical weight optimization --- .../kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp index 91f86f4a45d390..2caefdd78ad3ef 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp @@ -145,7 +145,7 @@ static bool is_weight_vertical(const fully_connected_params& params, size_t outp << "(computeUnitsCount : " << params.engineInfo.computeUnitsCount << " min_num_threads : " << min_num_threads << ")" << std::endl; GPU_DEBUG_TRACE_DETAIL << "Use ofm_tile size 1 if the batch size is 1." << std::endl; - return (params.weights.IFM().v >= params.weights.OFM().v * 3 + return (params.weights.IFM().v >= params.weights.OFM().v * 2 && output_f / 2 /*most frequently used tile_ofm*/ <= min_num_threads); }