forked from andrewjong/SwapNet
-
Notifications
You must be signed in to change notification settings - Fork 0
/
train.py
119 lines (105 loc) · 5.51 KB
/
train.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
"""
General-purpose training script for image-to-image translation, adapted for SwapNet.
This script works for various models (with option '--model': e.g., pix2pix, cyclegan,
colorization) and different datasets (with option '--dataset_mode': e.g., image, video).
You need to specify the dataset ('--dataroot'), experiment name ('--name'), and model
('--model').
It first creates the model, dataset, and visualizer given the options.
It then does standard network training. During the training, it also visualize/save
the images, print/save the loss plot, and save models.
The script supports continue/resume training. Use '--continue_train' to resume your
previous training.
Example:
Train the warp model:
python train.py --name warp_stage --model warp --dataroot data/deep_fashion
Train the texture model:
python train.py --name texture_stage --model texture --dataroot data/deep_fashion
"""
from tqdm import tqdm
import time
from options.train_options import TrainOptions
from datasets import create_dataset
from models import create_model
from util.visualizer import Visualizer
print = tqdm.write
if __name__ == "__main__":
opt = TrainOptions().parse(store_options=True) # get training options
# create a dataset given opt.dataset_mode and other options
dataset = create_dataset(opt)
dataset_size = len(dataset) # get the number of images in the dataset.
print(f"The number of training images = {dataset_size:d}")
model = create_model(opt) # create a model given opt.model and other options
model.setup(opt) # regular setup: load and print networks; create schedulers
# create a visualizer that display/save images and plots
visualizer = Visualizer(opt)
total_iters = 0 # the total number of training iterations
# outer loop for different epochs;
# we save the model by # <epoch_count>, <epoch_count>+<save_latest_freq>
for epoch in tqdm(
range(opt.start_epoch + 1, opt.n_epochs + 1), desc="Completed Epochs"
):
epoch_start_time = time.time() # timer for entire epoch
iter_data_time = time.time() # timer for data loading per iteration
# the number of training iterations in current epoch, reset to 0 every epoch
epoch_iter = 0
with tqdm(total=len(dataset), unit="image") as pbar:
for i, data in enumerate(dataset): # inner loop within one epoch
iter_start_time = time.time() # timer for computation per iteration
if total_iters % opt.print_freq == 0:
t_data = iter_start_time - iter_data_time
visualizer.reset()
total_iters += opt.batch_size
epoch_iter += opt.batch_size
model.set_input(data) # unpack data from dataset and preprocess
# calculate loss functions, get gradients, update network weights
model.optimize_parameters()
if total_iters % opt.display_freq == 0:
# display images on visdom and save images to a HTML file
save_result = total_iters % opt.update_html_freq == 0
model.compute_visuals()
visualizer.display_current_results(
model.get_current_visuals(), epoch, save_result
)
losses = model.get_current_losses()
Visualizer.just_print_losses(
epoch, losses, print_func=lambda m: pbar.set_description(m)
)
if total_iters % opt.print_freq == 0:
# print training losses and save logging information to the disk
t_comp = (time.time() - iter_start_time) / opt.batch_size
visualizer.print_current_losses(
epoch,
epoch_iter,
losses,
t_comp,
t_data,
print_func=lambda *args: None,
)
if opt.display_id > 0:
visualizer.plot_current_losses(
epoch - 1, float(epoch_iter) / dataset_size, losses
)
if (
opt.latest_checkpoint_freq
and total_iters % opt.latest_checkpoint_freq == 0
):
# cache our latest model every <save_latest_freq> iterations
print(
f"saving the latest model (epoch {epoch:d}, total_iters {total_iters:d}) "
)
save_prefix = (
"iter_%d" % total_iters if opt.save_by_iter else f"latest"
)
model.save_checkpoint(save_prefix)
iter_data_time = time.time()
# weird unpacking to get the batch_size (we can't use opt.batch_size in case total len is not a multiple of batch_size
pbar.update(len(tuple(data.values())[0]))
if opt.checkpoint_freq and epoch % opt.checkpoint_freq == 0:
# cache our model every <save_epoch_freq> epochs
print(
f"saving the model at the end of epoch {epoch:d}, iters {total_iters:d}"
)
model.save_checkpoint("latest")
model.save_checkpoint(epoch)
# print('End of epoch %d / %d \t Time Taken: %d sec' % (epoch, opt.n_epochs, time.time() - epoch_start_time))
# model.update_learning_rate() # update learning rates at the end of every epoch.