diff --git a/pytorch_optimizer/__init__.py b/pytorch_optimizer/__init__.py index f8147013..88a259ed 100644 --- a/pytorch_optimizer/__init__.py +++ b/pytorch_optimizer/__init__.py @@ -109,6 +109,7 @@ Nero, NovoGrad, PAdam, + PCGrad, Prodigy, QHAdam, RAdam, diff --git a/pytorch_optimizer/optimizer/adopt.py b/pytorch_optimizer/optimizer/adopt.py index e4706b57..8b7026db 100644 --- a/pytorch_optimizer/optimizer/adopt.py +++ b/pytorch_optimizer/optimizer/adopt.py @@ -14,8 +14,6 @@ class ADOPT(BaseOptimizer): :param weight_decay: float. weight decay (L2 penalty). :param weight_decouple: bool. the optimizer uses decoupled weight decay as in AdamW. :param fixed_decay: bool. fix weight decay. - :param adanorm: bool. whether to use the AdaNorm variant. - :param adam_debias: bool. Only correct the denominator to avoid inflating step sizes early in training. :param eps: float. term added to the denominator to improve numerical stability. """