diff --git a/diopi_test/python/conformance/diopi_configs.py b/diopi_test/python/conformance/diopi_configs.py index d64e36067..d2e7eed7c 100644 --- a/diopi_test/python/conformance/diopi_configs.py +++ b/diopi_test/python/conformance/diopi_configs.py @@ -4514,23 +4514,25 @@ atol_half=1e-4, rtol_half=1e-3, para=dict( - nesterov=[False, True], - lr=[0.1, 0.1], - momentum=[0.01, 0.01], - weight_decay=[0, 0.1], - dampening=[0.1, 0], + lr=[0.05, 0.001, 0.1, 0.1, 0, 3, 0.2, 0.07], + momentum=[0.5, 0, 0.01, 0.01, 1, 0.5, 2, 1.2], + weight_decay=[0, 0.5, 0, 0.1, 3, 2.3, 4.0, 5], + dampening=[0, -0.5, 0.1, 0, 2, 3.0, 0, 6.5], + nesterov=[True, False, False, True, False, False, True, False], ), tensor_para=dict( dtype=[Dtype.float32, Dtype.float16, Dtype.float64], args=[ { "ins": ['param', 'param_grad'], - "shape": [(2, 3, 16), (4, 32, 7, 7)], + "shape": [(), (16, 8), (2, 3, 16), (4, 32, 7, 7), (4, 16, 3, 8, 2), + (0,), (3, 0), (4, 0, 9)], "gen_fn": Genfunc.randn, }, { "ins": ['buf'], - "shape": [(2, 3, 16), (4, 32, 7, 7)], + "shape": [(), (16, 8), (2, 3, 16), (4, 32, 7, 7), (4, 16, 3, 8, 2), + (0,), (3, 0), (4, 0, 9)], "gen_fn": Genfunc.rand, }, ] @@ -4739,39 +4741,50 @@ ), ), - # 'adam': dict( - # name=['adam', 'adamw'], - # interface=["CustomizedTest"], - # atol=1e-4, - # rtol=1e-3, - # atol_half=1e-4, - # rtol_half=1e-3, - - # para=dict( - # lr=[0.1, 0.1], - # beta1=[0.9, 0.8], - # beta2=[0.99, 0.88], - # eps=[1e-08, 1e-09], - # step=[1, 4], - # weight_decay=[0, 0.1], - # amsgrad=[False, True], - # ), - # tensor_para=dict( - # dtype=[Dtype.float16, Dtype.float32, Dtype.float64], - # args=[ - # { - # "ins": ['param', 'param_grad'], - # "shape": [(2, 3, 16), (4, 32, 7, 7)], - # "gen_fn": Genfunc.rand, - # }, - # { - # "ins": ['exp_avg', 'exp_avg_sq', 'max_exp_avg_sq'], - # "shape": [(2, 3, 16), (4, 32, 7, 7)], - # "gen_fn": Genfunc.zeros, - # }, - # ] - # ), - # ), + # FXIME adamw、adam输出精度不一致 + 'adam': dict( + name=['adam', 'adamw'], + interface=["CustomizedTest"], + atol=1e-4, + rtol=1e-3, + atol_half=1e-4, + rtol_half=1e-3, + para=dict( + # lr=[0, -0.2, 2, 0.001, 0.1, 3.2, -2, 0], + # beta1=[0, -1, 0.004, 0.9, 0.8, -2, 4.3, 0], + # beta2=[0.3, 0, -2, 0.99, 0.88, 1, -4, 0], + # eps=[-1e-02, 0, 1e-2, 1e-08, 1e-09, 0, 2, 1e-4], + # step=[3, 2, 0, 1, 4, 2, 4, 5], + # weight_decay=[-0.2, 0, 2, 0, 0.1, 2.5, 0, -3], + # amsgrad=[False, True, True, False, True, False, True, True], + lr=[0, 3.2, -2, 0], + beta1=[0, -2, 4.3, 0], + beta2=[0.3, 1, -4, 0], + eps=[-1e-02, 0, 2, 1e-4], + step=[3, 2, 4, 5], + weight_decay=[-0.2, 2.5, 0, -3], + amsgrad=[False, False, True, True], + ), + tensor_para=dict( + dtype=[Dtype.float16, Dtype.float32, Dtype.float64], + args=[ + { + "ins": ['param', 'param_grad'], + # "shape": [(), (16,), (16, 8), (2, 3, 16), (4, 32, 7, 7), + # (0,), (4, 0), (12, 0, 9)], + "shape": [(), (0,), (4, 0), (12, 0, 9)], + "gen_fn": Genfunc.randn, + }, + { + "ins": ['exp_avg', 'exp_avg_sq', 'max_exp_avg_sq'], + # "shape": [(), (16,), (16, 8), (2, 3, 16), (4, 32, 7, 7), + # (0,), (4, 0), (12, 0, 9)], + "shape": [(), (0,), (4, 0), (12, 0, 9)], + "gen_fn": Genfunc.randn, + }, + ] + ), + ), # FIXME conv_transpose2d特定参数组合,反向传播失败 'conv_transpose2d': dict( @@ -5043,23 +5056,25 @@ atol_half=1e-4, rtol_half=1e-3, para=dict( - lr=[0.1, 0.1], - rho=[0.9, 0.88], - eps=[1e-6, 1e-6], - weight_decay=[0, 0.1], + lr=[1.0, 0, -0.5, 0.1, 0.1, 2.3, -2, 0], + rho=[-1, 1.2, 0, 0.9, 0.88, -3, 0.5, 0], + eps=[1e-2, 0, -1e-4, 1e-6, 1e-6, 0, 1e-4, -1e-6], + weight_decay=[1.2, 0.5, -1.3, 0, 0.1, 0.5, 0, -1.2], ), tensor_para=dict( dtype=[Dtype.float32, Dtype.float16, Dtype.float64], args=[ { "ins": ['param', 'param_grad'], - "shape": [(2, 3, 16), (4, 32, 7, 7)], - "gen_fn": Genfunc.rand, + "shape": [(), (16,), (16, 8), (2, 3, 16), (4, 32, 7, 7), + (0,), (4, 0), (12, 0, 9)], + "gen_fn": Genfunc.randn, }, { "ins": ['square_avg', 'acc_delta'], - "shape": [(2, 3, 16), (4, 32, 7, 7)], - "gen_fn": Genfunc.zeros, + "shape": [(), (16,), (16, 8), (2, 3, 16), (4, 32, 7, 7), + (0,), (4, 0), (12, 0, 9)], + "gen_fn": Genfunc.randn, }, ] ), @@ -5073,25 +5088,27 @@ atol=1e-5, rtol=1e-3, para=dict( - lr=[0.1, 0.01], - alpha=[0.9, 0.99], - eps=[1e-6, 1e-8], - weight_decay=[0, 0.1], - momentum=[0, 0.1], - centered=[False, True], + lr=[0, 1.2, -0.05, 0.1, 0.01, 0, 2, 2.3], + alpha=[-0.3, 0, 1.2, 0.9, 0.99, 3, 0, 0.4], + eps=[1e-2, 0, -1e-4, 1e-6, 1e-8, 0, 1e-4, -1e-6], + weight_decay=[1.2, 0.5, -1.3, 0, 0.1, 0.5, 0, -1.2], + momentum=[-2, 0.3, 1, 0, 0.1, 0.05, -3, 0], + centered=[True, False, True, False, True, True, False, True], ), tensor_para=dict( dtype=[Dtype.float32, Dtype.float16, Dtype.float64], args=[ { "ins": ['param', 'param_grad'], - "shape": [(2, 3, 16), (4, 32, 7, 7)], + "shape": [(), (16,), (16, 8), (2, 3, 16), (4, 32, 7, 7), + (0,), (4, 0), (12, 0, 9)], "gen_fn": Genfunc.randn, }, { "ins": ['square_avg', 'grad_avg', 'momentum_buffer'], - "shape": [(2, 3, 16), (4, 32, 7, 7)], - "gen_fn": Genfunc.zeros, + "shape": [(), (16,), (16, 8), (2, 3, 16), (4, 32, 7, 7), + (0,), (4, 0), (12, 0, 9)], + "gen_fn": Genfunc.randn, }, ] ), diff --git a/diopi_test/python/conformance/gen_data.py b/diopi_test/python/conformance/gen_data.py index e641091d8..e5283a861 100644 --- a/diopi_test/python/conformance/gen_data.py +++ b/diopi_test/python/conformance/gen_data.py @@ -442,7 +442,7 @@ def adamw(param, param_grad, exp_avg, exp_avg_sq, max_exp_avg_sq, lr, beta1, bet exp_avgs = [exp_avg] exp_avg_sqs = [exp_avg_sq] max_exp_avg_sqs = [max_exp_avg_sq] - state_steps = [step] + state_steps = [torch.tensor(float(step))] torch.optim._functional.adamw(params_with_grad, grads, @@ -455,7 +455,8 @@ def adamw(param, param_grad, exp_avg, exp_avg_sq, max_exp_avg_sq, lr, beta1, bet beta2=beta2, lr=lr, weight_decay=weight_decay, - eps=eps) + eps=eps, + maximize=False) return param, param_grad, exp_avg, exp_avg_sq, max_exp_avg_sq def adadelta(param, param_grad, square_avg, acc_delta, lr, rho, eps, weight_decay): diff --git a/impl/camb/device_configs.py b/impl/camb/device_configs.py index b8b75d16f..0095e69ee 100644 --- a/impl/camb/device_configs.py +++ b/impl/camb/device_configs.py @@ -999,6 +999,8 @@ args=[ { "ins": ['param', 'param_grad'], + # FIXME Run diopi_functions.adam failed, because of inputs: param_grad changed + "shape": [Skip(())], "dtype": [Skip(Dtype.float16)], }, ] @@ -1045,6 +1047,15 @@ name=["adadelta"], atol_half=1e-3, rtol_half=1e-3, + tensor_para=dict( + args=[ + { + # can't get correct result + "ins": ['param', 'param_grad'], + "dtype": [Skip(Dtype.float16)], + }, + ] + ), ), 'rmsprop': dict(