diff --git a/.gitignore b/.gitignore index b6e4761..30b5af2 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,8 @@ +# Custome +.vscode +datasets/ +out-of-date/ + # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] diff --git a/README.md b/README.md index 11ffafe..1ada819 100644 --- a/README.md +++ b/README.md @@ -26,7 +26,7 @@ This provides a convenient way to initialize backbone in detectron2. 3. Run: `python train_net_builtin.py --num-gpus --config-file configs/`. For example: `sh scripts/train_net_builtin.sh` -- Trained with pytorch formal imagenet trainer +- Trained with pytorch formal imagenet trainer [**Recommend**] 1. Read carefully with some arguments in `train_net.py` 2. Run: `sh /scripts/train_net.sh` \ No newline at end of file diff --git a/imgcls/modeling/meta_arch/clsnet.py b/imgcls/modeling/meta_arch/clsnet.py index f7dd2e2..b44ac33 100644 --- a/imgcls/modeling/meta_arch/clsnet.py +++ b/imgcls/modeling/meta_arch/clsnet.py @@ -40,7 +40,7 @@ def __init__(self, cfg): self.num_classes = cfg.MODEL.CLSNET.NUM_CLASSES self.in_features = cfg.MODEL.CLSNET.IN_FEATURES - self.backbone = build_backbone(cfg) + self.bottom_up = build_backbone(cfg) self.criterion = nn.CrossEntropyLoss() self.register_buffer("pixel_mean", torch.Tensor(cfg.MODEL.PIXEL_MEAN).view(-1, 1, 1)) @@ -63,7 +63,7 @@ def forward_d2(self, batched_inputs): images = self.preprocess_image(batched_inputs) gt_labels = [x['label'] for x in batched_inputs] gt_labels = torch.as_tensor(gt_labels, dtype=torch.long).to(self.device) - features = self.backbone(images.tensor) + features = self.bottom_up(images.tensor) features = [features[f] for f in self.in_features] if self.training: @@ -79,7 +79,7 @@ def forward_d2(self, batched_inputs): return processed_results def forward(self, images): - features = self.backbone(images) + features = self.bottom_up(images) return features["linear"] diff --git a/scripts/train_net.sh b/scripts/train_net.sh index cc29e7b..1ae9e6f 100644 --- a/scripts/train_net.sh +++ b/scripts/train_net.sh @@ -8,4 +8,4 @@ ### -CUDA_VISIBLE_DEVICES=4,5,6,7 python train_net.py --config-file configs/Base_image_cls.yaml --batch-size 2048 --dist-url 'tcp://127.0.0.1:51151' --dist-backend 'nccl' --multiprocessing-distributed --world-size 1 --rank 0 datasets/ImageNet2012 +CUDA_VISIBLE_DEVICES=4,5,6,7 python train_net.py --config-file configs/Base_image_cls.yaml --batch-size 1024 --dist-url 'tcp://127.0.0.1:51151' --dist-backend 'nccl' --multiprocessing-distributed --world-size 1 --rank 0 datasets/ImageNet2012 diff --git a/train_net.py b/train_net.py index aa4ecf3..9d2c95d 100644 --- a/train_net.py +++ b/train_net.py @@ -212,7 +212,7 @@ def main_worker(gpu, ngpus_per_node, args, cfg): if args.gpu is not None: # best_acc1 may be from a checkpoint from a different GPU best_acc1 = best_acc1.to(args.gpu) - model.load_state_dict(checkpoint['state_dict']) + model.load_state_dict(checkpoint['model']) optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})" .format(args.resume, checkpoint['epoch'])) @@ -278,9 +278,11 @@ def main_worker(gpu, ngpus_per_node, args, cfg): if not args.multiprocessing_distributed or (args.multiprocessing_distributed and args.rank % ngpus_per_node == 0): + save_checkpoint({ 'epoch': epoch + 1, - 'state_dict': model.state_dict(), + 'model': model.state_dict(), + 'matching_heuristics': True, 'best_acc1': best_acc1, 'optimizer': optimizer.state_dict(), }, is_best) @@ -381,7 +383,16 @@ def validate(val_loader, model, criterion, args): def save_checkpoint(state, is_best, filename='checkpoint.pth.tar'): torch.save(state, filename) if is_best: - shutil.copyfile(filename, 'model_best.pth.tar') + # strip 'module.' + from collections import OrderedDict + p = OrderedDict() + for key, value in state['model'].items(): + if key.startswith('module.'): + key = key[7:] + p[key] = value + state['model'] = p + torch.save(state, "model_best.pth.tar") + # shutil.copyfile(filename, 'model_best.pth.tar') class AverageMeter(object):