From 8c61a95a8784fa1d4f6628bf973816ccd45abf9b Mon Sep 17 00:00:00 2001 From: Bo Zhang <105368690+zhangbopd@users.noreply.github.com> Date: Wed, 22 Mar 2023 11:16:49 +0800 Subject: [PATCH] =?UTF-8?q?=E3=80=90AMP=20OP&Test=E3=80=91unit=20test=20fo?= =?UTF-8?q?r=20accuracy=5Fop=20(#51009)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * test_accuracy_op * add create_test_fp/bf16_class * cast after calculation * change convert_uint16_to_float_ifneed * delete TestAccuracyOpFp32 according to PR comment * fix the rtol setting rules in bfloat16 forward --- paddle/phi/kernels/gpu/accuracy_kernel.cu | 6 ++- .../fluid/tests/unittests/test_accuracy_op.py | 45 ++++++++++++++++++- 2 files changed, 49 insertions(+), 2 deletions(-) diff --git a/paddle/phi/kernels/gpu/accuracy_kernel.cu b/paddle/phi/kernels/gpu/accuracy_kernel.cu index f67605714aba8..a42d131a48d69 100644 --- a/paddle/phi/kernels/gpu/accuracy_kernel.cu +++ b/paddle/phi/kernels/gpu/accuracy_kernel.cu @@ -20,6 +20,8 @@ #include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/backends/gpu/gpu_info.h" #include "paddle/phi/backends/gpu/gpu_primitives.h" +#include "paddle/phi/common/amp_type_traits.h" +#include "paddle/phi/common/bfloat16.h" #include "paddle/phi/common/float16.h" #include "paddle/phi/core/kernel_registry.h" @@ -34,6 +36,7 @@ __global__ void AccuracyCudaKernel(const int N, int* correct_data, T* accuracy, int* total_data) { + using MT = typename phi::dtype::MPTypeTrait::Type; int count = 0; __shared__ int total[BlockSize]; @@ -64,7 +67,7 @@ __global__ void AccuracyCudaKernel(const int N, #endif if (threadIdx.x == 0) { *correct_data = result; - *accuracy = static_cast(result) / static_cast(N); + *accuracy = static_cast(static_cast(result) / static_cast(N)); *total_data = N; } } @@ -136,6 +139,7 @@ PD_REGISTER_KERNEL(accuracy, ALL_LAYOUT, phi::AccuracyRawKernel, phi::dtype::float16, + phi::dtype::bfloat16, float, double) { kernel->InputAt(1).SetDataType(phi::DataType::INT64); diff --git a/python/paddle/fluid/tests/unittests/test_accuracy_op.py b/python/paddle/fluid/tests/unittests/test_accuracy_op.py index b6f99020ea87f..5b579ccae6107 100755 --- a/python/paddle/fluid/tests/unittests/test_accuracy_op.py +++ b/python/paddle/fluid/tests/unittests/test_accuracy_op.py @@ -19,7 +19,8 @@ import paddle import paddle.fluid as fluid -from paddle.fluid import Program, program_guard +from paddle.fluid import Program, core, program_guard +from paddle.fluid.tests.unittests.op_test import convert_float_to_uint16 def accuracy_wrapper(infer, indices, label): @@ -64,6 +65,48 @@ def test_check_output(self): self.check_output(atol=1e-3) +@unittest.skipIf( + not core.is_compiled_with_cuda() + or not core.is_bfloat16_supported(core.CUDAPlace(0)), + "core is not compiled with CUDA and not support the bfloat16", +) +class TestAccuracyOpBf16(OpTest): + def setUp(self): + self.op_type = "accuracy" + self.python_api = accuracy_wrapper + self.init_dtype() + n = 8192 + infer = np.random.random((n, 1)).astype(np.float32) + indices = np.random.randint(0, 2, (n, 1)).astype('int64') + label = np.random.randint(0, 2, (n, 1)).astype('int64') + self.inputs = { + 'Out': convert_float_to_uint16(infer), + 'Indices': indices, + "Label": label, + } + num_correct = 0 + for rowid in range(n): + for ele in indices[rowid]: + if ele == label[rowid]: + num_correct += 1 + break + self.outputs = { + 'Accuracy': convert_float_to_uint16( + np.array([num_correct / float(n)]).astype(np.float32) + ), + 'Correct': np.array([num_correct]).astype("int32"), + 'Total': np.array([n]).astype("int32"), + } + + def init_dtype(self): + self.dtype = np.uint16 + + def test_check_output(self): + if core.is_compiled_with_cuda(): + place = core.CUDAPlace(0) + self.check_output_with_place(place, atol=1e-2) + + class TestAccuracyOpError(unittest.TestCase): def test_type_errors(self): with program_guard(Program(), Program()):