-
Notifications
You must be signed in to change notification settings - Fork 36
/
optimizers.py
86 lines (71 loc) · 2.74 KB
/
optimizers.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
#coding:utf-8
import os, sys
import os.path as osp
import numpy as np
import torch
from torch import nn
from torch.optim import Optimizer
from functools import reduce
from torch.optim import AdamW
class MultiOptimizer:
def __init__(self, optimizers={}, schedulers={}):
self.optimizers = optimizers
self.schedulers = schedulers
self.keys = list(optimizers.keys())
self.param_groups = reduce(lambda x,y: x+y, [v.param_groups for v in self.optimizers.values()])
def state_dict(self):
state_dicts = [(key, self.optimizers[key].state_dict())\
for key in self.keys]
return state_dicts
def load_state_dict(self, state_dict):
for key, val in state_dict:
try:
self.optimizers[key].load_state_dict(val)
except:
print("Unloaded %s" % key)
def step(self, key=None):
if key is not None:
self.optimizers[key].step()
else:
_ = [self.optimizers[key].step() for key in self.keys]
def zero_grad(self, key=None):
if key is not None:
self.optimizers[key].zero_grad()
else:
_ = [self.optimizers[key].zero_grad() for key in self.keys]
def scheduler(self, *args, key=None):
if key is not None:
self.schedulers[key].step(*args)
else:
_ = [self.schedulers[key].step(*args) for key in self.keys]
def build_optimizer(parameters):
optimizer, scheduler = _define_optimizer(parameters)
return optimizer, scheduler
def _define_optimizer(params):
optimizer_params = params['optimizer_params']
sch_params = params['scheduler_params']
optimizer = AdamW(
params['params'],
lr=optimizer_params.get('lr', 1e-4),
weight_decay=optimizer_params.get('weight_decay', 5e-4),
betas=(0.9, 0.98),
eps=1e-9)
scheduler = _define_scheduler(optimizer, sch_params)
return optimizer, scheduler
def _define_scheduler(optimizer, params):
print(params)
scheduler = torch.optim.lr_scheduler.OneCycleLR(
optimizer,
max_lr=params.get('max_lr', 5e-4),
epochs=params.get('epochs', 200),
steps_per_epoch=params.get('steps_per_epoch', 1000),
pct_start=params.get('pct_start', 0.0),
final_div_factor=5)
return scheduler
def build_multi_optimizer(parameters_dict, scheduler_params):
optim = dict([(key, AdamW(params, lr=1e-4, weight_decay=1e-6, betas=(0.9, 0.98), eps=1e-9))
for key, params in parameters_dict.items()])
schedulers = dict([(key, _define_scheduler(opt, scheduler_params)) \
for key, opt in optim.items()])
multi_optim = MultiOptimizer(optim, schedulers)
return multi_optim