From 47c92550e36a93be020ebbd9bd88d0795014e623 Mon Sep 17 00:00:00 2001 From: facebook-github-bot Date: Thu, 5 Dec 2019 12:56:49 -0800 Subject: [PATCH] Initial commit fbshipit-source-id: 5a7576eed3413a21a0ba0f620b59d45f695a253f --- .circleci/config.yml | 157 ++++ .github/ISSUE_TEMPLATE/bug-report.md | 53 ++ .github/ISSUE_TEMPLATE/documentation.md | 9 + .github/ISSUE_TEMPLATE/feature-request.md | 24 + .../ISSUE_TEMPLATE/questions-help-support.md | 8 + .gitignore | 52 ++ .isort.cfg | 2 + CHANGELOG.md | 3 + CODE_OF_CONDUCT.md | 76 ++ CONTRIBUTING.md | 28 + LICENSE | 21 + MANIFEST.in | 4 + README.md | 86 ++ bin/classy-project | 55 ++ classy_train.py | 167 ++++ classy_vision/__init__.py | 7 + classy_vision/configs/hmdb51/r3d34.json | 121 +++ .../densenet121_imagenet_classy_config.json | 65 ++ .../imagenet/postactivated_r2d101.json | 99 +++ .../configs/imagenet/postactivated_r2d50.json | 99 +++ .../configs/imagenet/preactivated_r2d101.json | 99 +++ .../configs/imagenet/preactivated_r2d50.json | 99 +++ .../resnet101_imagenet_classy_config.json | 74 ++ .../resnet50_imagenet_classy_config.json | 74 ++ .../resnet50_label_smoothing_criterion.json | 77 ++ .../resnext101_32x4d_fine_tuning_config.json | 78 ++ .../resnext101_imagenet_classy_config.json | 75 ++ .../resnext50_imagenet_classy_config.json | 75 ++ .../kinetics400/postactivated_i3d50.json | 111 +++ .../postactivated_i3d50_fine_tuning.json | 113 +++ .../kinetics400/preactivated_i3d50.json | 111 +++ .../preactivated_i3d50_fine_tuning.json | 113 +++ .../configs/r3d34_synthetic_video.json | 80 ++ ...esnet50_synthetic_image_classy_config.json | 74 ++ classy_vision/configs/ucf101/r3d34.json | 121 +++ classy_vision/dataset/README.md | 3 + classy_vision/dataset/__init__.py | 91 +++ classy_vision/dataset/classy_cifar.py | 80 ++ classy_vision/dataset/classy_dataset.py | 189 +++++ classy_vision/dataset/classy_hmdb51.py | 185 +++++ classy_vision/dataset/classy_imagenet.py | 60 ++ classy_vision/dataset/classy_kinetics400.py | 190 +++++ .../dataset/classy_synthetic_image.py | 103 +++ .../dataset/classy_synthetic_video.py | 133 ++++ classy_vision/dataset/classy_ucf101.py | 184 +++++ classy_vision/dataset/classy_video_dataset.py | 261 ++++++ classy_vision/dataset/core/__init__.py | 17 + classy_vision/dataset/core/list_dataset.py | 44 + .../dataset/core/random_image_datasets.py | 88 ++ .../dataset/core/random_video_datasets.py | 70 ++ classy_vision/dataset/generic/hive.py | 123 +++ classy_vision/dataset/image_path_dataset.py | 130 +++ classy_vision/dataset/transforms/__init__.py | 116 +++ .../dataset/transforms/classy_transform.py | 32 + .../dataset/transforms/lighting_transform.py | 67 ++ classy_vision/dataset/transforms/util.py | 379 +++++++++ .../dataset/transforms/util_video.py | 378 +++++++++ classy_vision/distributed/__init__.py | 5 + classy_vision/distributed/launch_ray.py | 221 ++++++ classy_vision/generic/__init__.py | 5 + classy_vision/generic/args.py | 47 ++ classy_vision/generic/debug.py | 20 + classy_vision/generic/distributed_util.py | 147 ++++ classy_vision/generic/opts.py | 205 +++++ classy_vision/generic/pdb.py | 48 ++ classy_vision/generic/perf_stats.py | 237 ++++++ classy_vision/generic/profiler.py | 337 ++++++++ classy_vision/generic/registry_utils.py | 50 ++ classy_vision/generic/util.py | 749 ++++++++++++++++++ classy_vision/generic/visualize.py | 223 ++++++ classy_vision/heads/__init__.py | 91 +++ classy_vision/heads/classy_head.py | 59 ++ classy_vision/heads/fully_connected_head.py | 63 ++ .../heads/fully_convolutional_linear_head.py | 132 +++ classy_vision/heads/identity_head.py | 33 + classy_vision/hooks/__init__.py | 42 + classy_vision/hooks/checkpoint_hook.py | 115 +++ classy_vision/hooks/classy_hook.py | 174 ++++ .../exponential_moving_average_model_hook.py | 127 +++ .../hooks/loss_lr_meter_logging_hook.py | 112 +++ classy_vision/hooks/model_complexity_hook.py | 56 ++ classy_vision/hooks/model_tensorboard_hook.py | 79 ++ classy_vision/hooks/profiler_hook.py | 46 ++ classy_vision/hooks/progress_bar_hook.py | 70 ++ classy_vision/hooks/tensorboard_plot_hook.py | 138 ++++ classy_vision/hooks/time_metrics_hook.py | 98 +++ classy_vision/hooks/visdom_hook.py | 121 +++ classy_vision/hub/__init__.py | 10 + classy_vision/hub/classy_hub_interface.py | 177 +++++ classy_vision/hydra/args.yaml | 21 + .../hydra/config/resnet50_synthetic.yaml | 63 ++ classy_vision/hydra/dataset/imagenet.yaml | 12 + .../hydra/dataset/synthetic_image.yaml | 36 + classy_vision/hydra/loss/cross_entropy.yaml | 3 + .../loss/label_smoothing_cross_entropy.yaml | 4 + classy_vision/hydra/meters/accuracy.yaml | 4 + classy_vision/hydra/model/resnet_50.yaml | 12 + classy_vision/hydra/optimizer/sgd.yaml | 5 + classy_vision/hydra/param_scheduler/step.yaml | 5 + .../hydra/task/classification_task.yaml | 3 + classy_vision/losses/__init__.py | 115 +++ classy_vision/losses/barron_loss.py | 78 ++ classy_vision/losses/classy_loss.py | 44 + classy_vision/losses/label_smoothing_loss.py | 106 +++ classy_vision/losses/multi_output_sum_loss.py | 50 ++ .../losses/soft_target_cross_entropy_loss.py | 83 ++ classy_vision/losses/sum_arbitrary_loss.py | 67 ++ classy_vision/meters/__init__.py | 85 ++ classy_vision/meters/accuracy_meter.py | 179 +++++ classy_vision/meters/classy_meter.py | 116 +++ classy_vision/meters/precision_meter.py | 210 +++++ classy_vision/meters/recall_meter.py | 210 +++++ classy_vision/meters/video_accuracy_meter.py | 138 ++++ classy_vision/models/__init__.py | 108 +++ classy_vision/models/classy_block.py | 34 + classy_vision/models/classy_model.py | 304 +++++++ classy_vision/models/classy_model_wrapper.py | 59 ++ classy_vision/models/densenet.py | 263 ++++++ classy_vision/models/mlp.py | 99 +++ classy_vision/models/resnet.py | 33 + classy_vision/models/resnext.py | 409 ++++++++++ classy_vision/models/resnext3d.py | 421 ++++++++++ classy_vision/models/resnext3d_block.py | 465 +++++++++++ classy_vision/models/resnext3d_stage.py | 193 +++++ classy_vision/models/resnext3d_stem.py | 206 +++++ classy_vision/optim/__init__.py | 76 ++ classy_vision/optim/classy_optimizer.py | 257 ++++++ .../optim/param_scheduler/__init__.py | 94 +++ .../classy_vision_param_scheduler.py | 70 ++ .../param_scheduler/composite_scheduler.py | 143 ++++ .../param_scheduler/constant_scheduler.py | 40 + .../optim/param_scheduler/cosine_scheduler.py | 55 ++ .../optim/param_scheduler/linear_scheduler.py | 50 ++ .../param_scheduler/multi_step_scheduler.py | 108 +++ .../polynomial_decay_scheduler.py | 52 ++ .../optim/param_scheduler/step_scheduler.py | 57 ++ .../step_with_fixed_gamma_scheduler.py | 80 ++ classy_vision/optim/rmsprop.py | 106 +++ classy_vision/optim/sgd.py | 95 +++ classy_vision/tasks/__init__.py | 80 ++ classy_vision/tasks/classification_task.py | 714 +++++++++++++++++ classy_vision/tasks/classy_task.py | 156 ++++ classy_vision/tasks/fine_tuning_task.py | 99 +++ .../synthetic/configs/template_config.json | 66 ++ .../templates/synthetic/datasets/__init__.py | 15 + .../synthetic/datasets/my_dataset.py | 62 ++ .../templates/synthetic/losses/__init__.py | 15 + .../templates/synthetic/losses/my_loss.py | 20 + .../templates/synthetic/models/__init__.py | 15 + .../templates/synthetic/models/my_model.py | 29 + classy_vision/trainer/__init__.py | 12 + classy_vision/trainer/classy_trainer.py | 96 +++ classy_vision/trainer/distributed_trainer.py | 94 +++ classy_vision/trainer/elastic_trainer.py | 266 +++++++ classy_vision/trainer/local_trainer.py | 47 ++ examples/ray/cluster_config.yml | 165 ++++ examples/ray/requirements.txt | 2 + hubconf.py | 32 + requirements.txt | 2 + scripts/build_docs.sh | 96 +++ scripts/parse_sphinx.py | 77 ++ scripts/parse_tutorials.py | 114 +++ setup.py | 66 ++ sphinx/Makefile | 20 + sphinx/conf.py | 102 +++ sphinx/dataset.rst | 5 + sphinx/heads.rst | 5 + sphinx/hooks.rst | 5 + sphinx/index.rst | 34 + sphinx/losses.rst | 5 + sphinx/make.bat | 35 + sphinx/meters.rst | 5 + sphinx/models.rst | 5 + sphinx/optim.rst | 5 + sphinx/param_scheduler.rst | 5 + sphinx/tasks.rst | 5 + sphinx/trainer.rst | 5 + sphinx/transforms.rst | 16 + test/__init__.py | 5 + test/api_test.py | 177 +++++ test/classy_block_test.py | 74 ++ test/classy_vision_head_test.py | 67 ++ test/dataset_classy_dataset_test.py | 258 ++++++ test/dataset_classy_imagenet_test.py | 74 ++ test/dataset_classy_video_dataset_test.py | 154 ++++ test/dataset_image_path_dataset_test.py | 120 +++ ...aset_transforms_lighting_transform_test.py | 35 + test/dataset_transforms_test.py | 82 ++ test/dataset_transforms_util_test.py | 161 ++++ test/dataset_transforms_util_video_test.py | 137 ++++ test/generic/__init__.py | 5 + test/generic/config_utils.py | 369 +++++++++ test/generic/merge_dataset.py | 55 ++ test/generic/meter_test_utils.py | 288 +++++++ test/generic/optim_test_util.py | 223 ++++++ test/generic/utils.py | 265 +++++++ test/generic_util_json_blob_test.json | 34 + test/generic_util_test.py | 416 ++++++++++ test/hooks_checkpoint_hook_test.py | 127 +++ test/hooks_classy_hook_test.py | 40 + ...onential_moving_average_model_hook_test.py | 112 +++ test/hooks_loss_lr_meter_logging_hook_test.py | 112 +++ test/hooks_profiler_hook_test.py | 51 ++ test/hooks_time_metrics_hook_test.py | 127 +++ test/hub_classy_hub_interface_test.py | 91 +++ test/losses_barron_loss_test.py | 66 ++ test/losses_generic_utils_test.py | 24 + ...label_smoothing_cross_entropy_loss_test.py | 212 +++++ test/losses_multi_output_sum_loss_test.py | 42 + ...ses_soft_target_cross_entropy_loss_test.py | 84 ++ test/losses_sum_arbitrary_loss_test.py | 101 +++ test/losses_test.py | 61 ++ .../hooks_model_complexity_hook_test.py | 63 ++ .../hooks_model_tensorboard_hook_test.py | 53 ++ test/manual/hooks_progress_bar_hook_test.py | 89 +++ .../hooks_tensorboard_plot_hook_test.py | 158 ++++ test/manual/hooks_visdom_hook_test.py | 121 +++ .../manual/models_classy_vision_model_test.py | 113 +++ test/meters_accuracy_meter_test.py | 137 ++++ test/meters_precision_meter_test.py | 191 +++++ test/meters_recall_meter_test.py | 192 +++++ test/meters_video_accuracy_meter_test.py | 207 +++++ test/models_classy_model_test.py | 77 ++ test/models_classy_model_wrapper_test.py | 83 ++ test/models_densenet_test.py | 53 ++ test/models_mlp_test.py | 26 + test/models_resnext3d_test.py | 236 ++++++ test/models_resnext_test.py | 85 ++ test/optim_param_scheduler_composite_test.py | 254 ++++++ test/optim_param_scheduler_constant_test.py | 49 ++ test/optim_param_scheduler_cosine_test.py | 95 +++ test/optim_param_scheduler_linear_test.py | 67 ++ test/optim_param_scheduler_multi_step_test.py | 126 +++ test/optim_param_scheduler_polynomial_test.py | 58 ++ test/optim_param_scheduler_step_test.py | 75 ++ ...am_scheduler_step_with_fixed_gamma_test.py | 83 ++ test/optim_param_scheduler_test.py | 179 +++++ test/optim_rmsprop_test.py | 27 + test/optim_sgd_test.py | 25 + test/suites.py | 48 ++ test/tasks_classification_task_test.py | 112 +++ test/tasks_fine_tuning_task_test.py | 112 +++ test/trainer_distributed_trainer_test.py | 64 ++ test/trainer_local_trainer_test.py | 42 + tutorials/classy_dataset.ipynb | 615 ++++++++++++++ tutorials/classy_loss.ipynb | 174 ++++ tutorials/classy_model.ipynb | 195 +++++ tutorials/fine_tuning.ipynb | 693 ++++++++++++++++ tutorials/getting_started.ipynb | 559 +++++++++++++ tutorials/pet_aws.ipynb | 295 +++++++ tutorials/ray_aws.ipynb | 255 ++++++ tutorials/torchscript.ipynb | 142 ++++ tutorials/video_classification.ipynb | 313 ++++++++ tutorials/wsl_model_predict.ipynb | 167 ++++ website/README.md | 193 +++++ website/core/Footer.js | 111 +++ website/core/Tutorial.js | 83 ++ website/core/TutorialSidebar.js | 92 +++ website/package.json | 14 + website/pages/en/help.js | 54 ++ website/pages/en/index.js | 215 +++++ website/pages/en/users.js | 48 ++ website/pages/tutorials/index.js | 58 ++ website/sidebars.json | 10 + website/siteConfig.js | 114 +++ website/static/css/alabaster.css | 716 +++++++++++++++++ website/static/css/basic.css | 736 +++++++++++++++++ website/static/css/code_block_buttons.css | 39 + website/static/css/custom.css | 310 ++++++++ website/static/img/cv-icon.png | Bin 0 -> 51451 bytes website/static/img/cv-logo.png | Bin 0 -> 69847 bytes website/static/img/cv-logo.svg | 1 + website/static/img/expanding_arrows.svg | 1 + website/static/img/favicon.ico | Bin 0 -> 766 bytes website/static/img/favicon.png | Bin 0 -> 1742 bytes website/static/img/landing-background.jpg | Bin 0 -> 217910 bytes website/static/img/modular.svg | 1 + website/static/img/multi-modal.png | Bin 0 -> 21240 bytes website/static/img/oss_logo.png | Bin 0 -> 4370 bytes website/static/img/pytorch_logo.svg | 13 + website/static/js/code_block_buttons.js | 47 ++ website/tutorials.json | 32 + 282 files changed, 31438 insertions(+) create mode 100644 .circleci/config.yml create mode 100644 .github/ISSUE_TEMPLATE/bug-report.md create mode 100644 .github/ISSUE_TEMPLATE/documentation.md create mode 100644 .github/ISSUE_TEMPLATE/feature-request.md create mode 100644 .github/ISSUE_TEMPLATE/questions-help-support.md create mode 100644 .gitignore create mode 100644 .isort.cfg create mode 100644 CHANGELOG.md create mode 100644 CODE_OF_CONDUCT.md create mode 100644 CONTRIBUTING.md create mode 100644 LICENSE create mode 100644 MANIFEST.in create mode 100644 README.md create mode 100755 bin/classy-project create mode 100755 classy_train.py create mode 100644 classy_vision/__init__.py create mode 100644 classy_vision/configs/hmdb51/r3d34.json create mode 100644 classy_vision/configs/imagenet/densenet121_imagenet_classy_config.json create mode 100644 classy_vision/configs/imagenet/postactivated_r2d101.json create mode 100644 classy_vision/configs/imagenet/postactivated_r2d50.json create mode 100644 classy_vision/configs/imagenet/preactivated_r2d101.json create mode 100644 classy_vision/configs/imagenet/preactivated_r2d50.json create mode 100644 classy_vision/configs/imagenet/resnet101_imagenet_classy_config.json create mode 100644 classy_vision/configs/imagenet/resnet50_imagenet_classy_config.json create mode 100644 classy_vision/configs/imagenet/resnet50_label_smoothing_criterion.json create mode 100644 classy_vision/configs/imagenet/resnext101_32x4d_fine_tuning_config.json create mode 100644 classy_vision/configs/imagenet/resnext101_imagenet_classy_config.json create mode 100644 classy_vision/configs/imagenet/resnext50_imagenet_classy_config.json create mode 100644 classy_vision/configs/kinetics400/postactivated_i3d50.json create mode 100644 classy_vision/configs/kinetics400/postactivated_i3d50_fine_tuning.json create mode 100644 classy_vision/configs/kinetics400/preactivated_i3d50.json create mode 100644 classy_vision/configs/kinetics400/preactivated_i3d50_fine_tuning.json create mode 100644 classy_vision/configs/r3d34_synthetic_video.json create mode 100644 classy_vision/configs/resnet50_synthetic_image_classy_config.json create mode 100644 classy_vision/configs/ucf101/r3d34.json create mode 100644 classy_vision/dataset/README.md create mode 100644 classy_vision/dataset/__init__.py create mode 100644 classy_vision/dataset/classy_cifar.py create mode 100644 classy_vision/dataset/classy_dataset.py create mode 100644 classy_vision/dataset/classy_hmdb51.py create mode 100644 classy_vision/dataset/classy_imagenet.py create mode 100644 classy_vision/dataset/classy_kinetics400.py create mode 100644 classy_vision/dataset/classy_synthetic_image.py create mode 100644 classy_vision/dataset/classy_synthetic_video.py create mode 100644 classy_vision/dataset/classy_ucf101.py create mode 100644 classy_vision/dataset/classy_video_dataset.py create mode 100644 classy_vision/dataset/core/__init__.py create mode 100644 classy_vision/dataset/core/list_dataset.py create mode 100644 classy_vision/dataset/core/random_image_datasets.py create mode 100644 classy_vision/dataset/core/random_video_datasets.py create mode 100644 classy_vision/dataset/generic/hive.py create mode 100644 classy_vision/dataset/image_path_dataset.py create mode 100644 classy_vision/dataset/transforms/__init__.py create mode 100644 classy_vision/dataset/transforms/classy_transform.py create mode 100644 classy_vision/dataset/transforms/lighting_transform.py create mode 100644 classy_vision/dataset/transforms/util.py create mode 100644 classy_vision/dataset/transforms/util_video.py create mode 100644 classy_vision/distributed/__init__.py create mode 100755 classy_vision/distributed/launch_ray.py create mode 100644 classy_vision/generic/__init__.py create mode 100644 classy_vision/generic/args.py create mode 100644 classy_vision/generic/debug.py create mode 100644 classy_vision/generic/distributed_util.py create mode 100644 classy_vision/generic/opts.py create mode 100644 classy_vision/generic/pdb.py create mode 100644 classy_vision/generic/perf_stats.py create mode 100644 classy_vision/generic/profiler.py create mode 100644 classy_vision/generic/registry_utils.py create mode 100644 classy_vision/generic/util.py create mode 100644 classy_vision/generic/visualize.py create mode 100644 classy_vision/heads/__init__.py create mode 100644 classy_vision/heads/classy_head.py create mode 100644 classy_vision/heads/fully_connected_head.py create mode 100644 classy_vision/heads/fully_convolutional_linear_head.py create mode 100644 classy_vision/heads/identity_head.py create mode 100644 classy_vision/hooks/__init__.py create mode 100644 classy_vision/hooks/checkpoint_hook.py create mode 100644 classy_vision/hooks/classy_hook.py create mode 100644 classy_vision/hooks/exponential_moving_average_model_hook.py create mode 100644 classy_vision/hooks/loss_lr_meter_logging_hook.py create mode 100644 classy_vision/hooks/model_complexity_hook.py create mode 100644 classy_vision/hooks/model_tensorboard_hook.py create mode 100644 classy_vision/hooks/profiler_hook.py create mode 100644 classy_vision/hooks/progress_bar_hook.py create mode 100644 classy_vision/hooks/tensorboard_plot_hook.py create mode 100644 classy_vision/hooks/time_metrics_hook.py create mode 100644 classy_vision/hooks/visdom_hook.py create mode 100644 classy_vision/hub/__init__.py create mode 100644 classy_vision/hub/classy_hub_interface.py create mode 100644 classy_vision/hydra/args.yaml create mode 100644 classy_vision/hydra/config/resnet50_synthetic.yaml create mode 100644 classy_vision/hydra/dataset/imagenet.yaml create mode 100644 classy_vision/hydra/dataset/synthetic_image.yaml create mode 100644 classy_vision/hydra/loss/cross_entropy.yaml create mode 100644 classy_vision/hydra/loss/label_smoothing_cross_entropy.yaml create mode 100644 classy_vision/hydra/meters/accuracy.yaml create mode 100644 classy_vision/hydra/model/resnet_50.yaml create mode 100644 classy_vision/hydra/optimizer/sgd.yaml create mode 100644 classy_vision/hydra/param_scheduler/step.yaml create mode 100644 classy_vision/hydra/task/classification_task.yaml create mode 100644 classy_vision/losses/__init__.py create mode 100644 classy_vision/losses/barron_loss.py create mode 100644 classy_vision/losses/classy_loss.py create mode 100644 classy_vision/losses/label_smoothing_loss.py create mode 100644 classy_vision/losses/multi_output_sum_loss.py create mode 100644 classy_vision/losses/soft_target_cross_entropy_loss.py create mode 100644 classy_vision/losses/sum_arbitrary_loss.py create mode 100644 classy_vision/meters/__init__.py create mode 100644 classy_vision/meters/accuracy_meter.py create mode 100644 classy_vision/meters/classy_meter.py create mode 100644 classy_vision/meters/precision_meter.py create mode 100644 classy_vision/meters/recall_meter.py create mode 100644 classy_vision/meters/video_accuracy_meter.py create mode 100644 classy_vision/models/__init__.py create mode 100644 classy_vision/models/classy_block.py create mode 100644 classy_vision/models/classy_model.py create mode 100644 classy_vision/models/classy_model_wrapper.py create mode 100644 classy_vision/models/densenet.py create mode 100644 classy_vision/models/mlp.py create mode 100644 classy_vision/models/resnet.py create mode 100644 classy_vision/models/resnext.py create mode 100644 classy_vision/models/resnext3d.py create mode 100644 classy_vision/models/resnext3d_block.py create mode 100644 classy_vision/models/resnext3d_stage.py create mode 100644 classy_vision/models/resnext3d_stem.py create mode 100644 classy_vision/optim/__init__.py create mode 100644 classy_vision/optim/classy_optimizer.py create mode 100644 classy_vision/optim/param_scheduler/__init__.py create mode 100644 classy_vision/optim/param_scheduler/classy_vision_param_scheduler.py create mode 100644 classy_vision/optim/param_scheduler/composite_scheduler.py create mode 100644 classy_vision/optim/param_scheduler/constant_scheduler.py create mode 100644 classy_vision/optim/param_scheduler/cosine_scheduler.py create mode 100644 classy_vision/optim/param_scheduler/linear_scheduler.py create mode 100644 classy_vision/optim/param_scheduler/multi_step_scheduler.py create mode 100644 classy_vision/optim/param_scheduler/polynomial_decay_scheduler.py create mode 100644 classy_vision/optim/param_scheduler/step_scheduler.py create mode 100644 classy_vision/optim/param_scheduler/step_with_fixed_gamma_scheduler.py create mode 100644 classy_vision/optim/rmsprop.py create mode 100644 classy_vision/optim/sgd.py create mode 100644 classy_vision/tasks/__init__.py create mode 100644 classy_vision/tasks/classification_task.py create mode 100644 classy_vision/tasks/classy_task.py create mode 100644 classy_vision/tasks/fine_tuning_task.py create mode 100644 classy_vision/templates/synthetic/configs/template_config.json create mode 100644 classy_vision/templates/synthetic/datasets/__init__.py create mode 100644 classy_vision/templates/synthetic/datasets/my_dataset.py create mode 100644 classy_vision/templates/synthetic/losses/__init__.py create mode 100644 classy_vision/templates/synthetic/losses/my_loss.py create mode 100644 classy_vision/templates/synthetic/models/__init__.py create mode 100644 classy_vision/templates/synthetic/models/my_model.py create mode 100644 classy_vision/trainer/__init__.py create mode 100644 classy_vision/trainer/classy_trainer.py create mode 100644 classy_vision/trainer/distributed_trainer.py create mode 100644 classy_vision/trainer/elastic_trainer.py create mode 100644 classy_vision/trainer/local_trainer.py create mode 100644 examples/ray/cluster_config.yml create mode 100644 examples/ray/requirements.txt create mode 100644 hubconf.py create mode 100644 requirements.txt create mode 100755 scripts/build_docs.sh create mode 100644 scripts/parse_sphinx.py create mode 100644 scripts/parse_tutorials.py create mode 100644 setup.py create mode 100644 sphinx/Makefile create mode 100644 sphinx/conf.py create mode 100644 sphinx/dataset.rst create mode 100644 sphinx/heads.rst create mode 100644 sphinx/hooks.rst create mode 100644 sphinx/index.rst create mode 100644 sphinx/losses.rst create mode 100644 sphinx/make.bat create mode 100644 sphinx/meters.rst create mode 100644 sphinx/models.rst create mode 100644 sphinx/optim.rst create mode 100644 sphinx/param_scheduler.rst create mode 100644 sphinx/tasks.rst create mode 100644 sphinx/trainer.rst create mode 100644 sphinx/transforms.rst create mode 100644 test/__init__.py create mode 100644 test/api_test.py create mode 100644 test/classy_block_test.py create mode 100644 test/classy_vision_head_test.py create mode 100644 test/dataset_classy_dataset_test.py create mode 100644 test/dataset_classy_imagenet_test.py create mode 100644 test/dataset_classy_video_dataset_test.py create mode 100644 test/dataset_image_path_dataset_test.py create mode 100644 test/dataset_transforms_lighting_transform_test.py create mode 100644 test/dataset_transforms_test.py create mode 100644 test/dataset_transforms_util_test.py create mode 100644 test/dataset_transforms_util_video_test.py create mode 100644 test/generic/__init__.py create mode 100644 test/generic/config_utils.py create mode 100644 test/generic/merge_dataset.py create mode 100644 test/generic/meter_test_utils.py create mode 100644 test/generic/optim_test_util.py create mode 100644 test/generic/utils.py create mode 100644 test/generic_util_json_blob_test.json create mode 100644 test/generic_util_test.py create mode 100644 test/hooks_checkpoint_hook_test.py create mode 100644 test/hooks_classy_hook_test.py create mode 100644 test/hooks_exponential_moving_average_model_hook_test.py create mode 100644 test/hooks_loss_lr_meter_logging_hook_test.py create mode 100644 test/hooks_profiler_hook_test.py create mode 100644 test/hooks_time_metrics_hook_test.py create mode 100644 test/hub_classy_hub_interface_test.py create mode 100644 test/losses_barron_loss_test.py create mode 100644 test/losses_generic_utils_test.py create mode 100644 test/losses_label_smoothing_cross_entropy_loss_test.py create mode 100644 test/losses_multi_output_sum_loss_test.py create mode 100644 test/losses_soft_target_cross_entropy_loss_test.py create mode 100644 test/losses_sum_arbitrary_loss_test.py create mode 100644 test/losses_test.py create mode 100644 test/manual/hooks_model_complexity_hook_test.py create mode 100644 test/manual/hooks_model_tensorboard_hook_test.py create mode 100644 test/manual/hooks_progress_bar_hook_test.py create mode 100644 test/manual/hooks_tensorboard_plot_hook_test.py create mode 100644 test/manual/hooks_visdom_hook_test.py create mode 100644 test/manual/models_classy_vision_model_test.py create mode 100644 test/meters_accuracy_meter_test.py create mode 100644 test/meters_precision_meter_test.py create mode 100644 test/meters_recall_meter_test.py create mode 100644 test/meters_video_accuracy_meter_test.py create mode 100644 test/models_classy_model_test.py create mode 100644 test/models_classy_model_wrapper_test.py create mode 100644 test/models_densenet_test.py create mode 100644 test/models_mlp_test.py create mode 100644 test/models_resnext3d_test.py create mode 100644 test/models_resnext_test.py create mode 100644 test/optim_param_scheduler_composite_test.py create mode 100644 test/optim_param_scheduler_constant_test.py create mode 100644 test/optim_param_scheduler_cosine_test.py create mode 100644 test/optim_param_scheduler_linear_test.py create mode 100644 test/optim_param_scheduler_multi_step_test.py create mode 100644 test/optim_param_scheduler_polynomial_test.py create mode 100644 test/optim_param_scheduler_step_test.py create mode 100644 test/optim_param_scheduler_step_with_fixed_gamma_test.py create mode 100644 test/optim_param_scheduler_test.py create mode 100644 test/optim_rmsprop_test.py create mode 100644 test/optim_sgd_test.py create mode 100644 test/suites.py create mode 100644 test/tasks_classification_task_test.py create mode 100644 test/tasks_fine_tuning_task_test.py create mode 100644 test/trainer_distributed_trainer_test.py create mode 100644 test/trainer_local_trainer_test.py create mode 100644 tutorials/classy_dataset.ipynb create mode 100644 tutorials/classy_loss.ipynb create mode 100644 tutorials/classy_model.ipynb create mode 100644 tutorials/fine_tuning.ipynb create mode 100644 tutorials/getting_started.ipynb create mode 100644 tutorials/pet_aws.ipynb create mode 100644 tutorials/ray_aws.ipynb create mode 100644 tutorials/torchscript.ipynb create mode 100644 tutorials/video_classification.ipynb create mode 100644 tutorials/wsl_model_predict.ipynb create mode 100644 website/README.md create mode 100644 website/core/Footer.js create mode 100644 website/core/Tutorial.js create mode 100644 website/core/TutorialSidebar.js create mode 100644 website/package.json create mode 100644 website/pages/en/help.js create mode 100755 website/pages/en/index.js create mode 100644 website/pages/en/users.js create mode 100644 website/pages/tutorials/index.js create mode 100644 website/sidebars.json create mode 100644 website/siteConfig.js create mode 100644 website/static/css/alabaster.css create mode 100644 website/static/css/basic.css create mode 100644 website/static/css/code_block_buttons.css create mode 100644 website/static/css/custom.css create mode 100644 website/static/img/cv-icon.png create mode 100644 website/static/img/cv-logo.png create mode 100644 website/static/img/cv-logo.svg create mode 100644 website/static/img/expanding_arrows.svg create mode 100644 website/static/img/favicon.ico create mode 100644 website/static/img/favicon.png create mode 100644 website/static/img/landing-background.jpg create mode 100644 website/static/img/modular.svg create mode 100644 website/static/img/multi-modal.png create mode 100644 website/static/img/oss_logo.png create mode 100644 website/static/img/pytorch_logo.svg create mode 100644 website/static/js/code_block_buttons.js create mode 100644 website/tutorials.json diff --git a/.circleci/config.yml b/.circleci/config.yml new file mode 100644 index 0000000000..5aed9769cc --- /dev/null +++ b/.circleci/config.yml @@ -0,0 +1,157 @@ +# Python CircleCI 2.0 configuration file +# +# Check https://circleci.com/docs/2.0/language-python/ for more details +# +version: 2 + +# ------------------------------------------------------------------------------------- +# Environments to run the jobs in +# ------------------------------------------------------------------------------------- +cpu: &cpu + docker: + machine: + image: default + resource_class: medium + +gpu: &gpu + environment: + CUDA_VERSION: "10.1" + machine: + image: default + resource_class: gpu.medium # Tesla M60 + +# ------------------------------------------------------------------------------------- +# Re-usable commands +# ------------------------------------------------------------------------------------- +setup_venv: &setup_venv + - run: + name: Activate Venv + command: | + python -m venv ~/venv + echo ". ~/venv/bin/activate" >> $BASH_ENV + . ~/venv/bin/activate + python --version + which python + which pip + +install_python: &install_python + - run: + name: Install Python + working_directory: ~/ + command: | + pyenv install 3.6.1 + pyenv global 3.6.1 + +install_dep: &install_dep + - run: + name: Install Dependencies + command: | + which python + which pip + pip install --upgrade pip + pip install --progress-bar off -r requirements.txt + pip list + +run_tests: &run_tests + - run: + name: Run Tests + command: | + python setup.py test -s test.suites.unittests -v + +# ------------------------------------------------------------------------------------- +# Jobs to run +# ------------------------------------------------------------------------------------- +jobs: + cpu_tests: + <<: *cpu + + working_directory: ~/ClassyVision + + steps: + - checkout + + - <<: *install_python + + - <<: *setup_venv + + # Download and cache dependencies + - restore_cache: + keys: + - v2-cpu-dependencies-{{ checksum "requirements.txt" }} + # fallback to using the latest cache if no exact match is found + - v2-cpu-dependencies- + + - <<: *install_dep + + - save_cache: + paths: + - ~/venv + key: v2-cpu-dependencies-{{ checksum "requirements.txt" }} + + - <<: *run_tests + + - run: + name: Run classy-project tests + command: | + pip install . + classy-project my-project + pushd my-project + ./classy_train.py --device cpu --config configs/template_config.json + popd + rm -rf my-project + + + gpu_tests: + <<: *gpu + + working_directory: ~/ClassyVision + + steps: + - checkout + + - run: + name: Setup CUDA + working_directory: ~/ + command: | + # download and install nvidia drivers, cuda, etc + wget -q 'https://s3.amazonaws.com/ossci-linux/nvidia_driver/NVIDIA-Linux-x86_64-430.40.run' + sudo /bin/bash ./NVIDIA-Linux-x86_64-430.40.run -s --no-drm + wget -q https://developer.download.nvidia.com/compute/cuda/10.1/Prod/local_installers/cuda-repo-ubuntu1404-10-1-local-10.1.243-418.87.00_1.0-1_amd64.deb + sudo dpkg -i cuda-repo-ubuntu1404-10-1-local-10.1.243-418.87.00_1.0-1_amd64.deb + sudo apt-key add /var/cuda-repo-10-1-local-10.1.243-418.87.00/7fa2af80.pub + nvidia-smi + + - <<: *install_python + + - <<: *setup_venv + + # Download and cache dependencies + - restore_cache: + keys: + - v1-gpu-dependencies-{{ checksum "requirements.txt" }} + # fallback to using the latest cache if no exact match is found + - v1-gpu-dependencies- + + - <<: *install_dep + + - run: + name: Check CUDA Available + command: python -c "import torch; assert torch.cuda.is_available(), 'CUDA not available'" + + - save_cache: + paths: + - ~/venv + key: v1-gpu-dependencies-{{ checksum "requirements.txt" }} + + - <<: *run_tests + + +# ------------------------------------------------------------------------------------- +# Workflows +# ------------------------------------------------------------------------------------- +workflows: + version: 2 + build_and_test: + jobs: + - cpu_tests + - gpu_tests diff --git a/.github/ISSUE_TEMPLATE/bug-report.md b/.github/ISSUE_TEMPLATE/bug-report.md new file mode 100644 index 0000000000..6556275399 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug-report.md @@ -0,0 +1,53 @@ +--- +name: "\U0001F41B Bug Report" +about: Submit a bug report to help us improve Classy Vision + +--- + +## 🐛 Bug + + + +## To Reproduce + +Steps to reproduce the behavior: + +1. +1. +1. + + + +## Expected behavior + + + +## Environment + + - What commands did you use to install Classy Vision (conda/pip/build from source)? + - If you are building from source, which commit is it? + - What does `classy_vision.__version__` print? (If applicable) + +Please copy and paste the output from the Pytorch +[environment collection script](https://raw.githubusercontent.com/pytorch/pytorch/master/torch/utils/collect_env.py) +(or fill out the checklist below manually). + +You can get the script and run it with: +``` +wget https://raw.githubusercontent.com/pytorch/pytorch/master/torch/utils/collect_env.py +# For security purposes, please check the contents of collect_env.py before running it. +python collect_env.py +``` + + - PyTorch Version (e.g., 1.0): + - OS (e.g., Linux): + - How you installed PyTorch (`conda`, `pip`, source): + - Build command you used (if compiling from source): + - Python version: + - CUDA/cuDNN version: + - GPU models and configuration: + - Any other relevant information: + +## Additional context + + diff --git a/.github/ISSUE_TEMPLATE/documentation.md b/.github/ISSUE_TEMPLATE/documentation.md new file mode 100644 index 0000000000..bbb0c23c7b --- /dev/null +++ b/.github/ISSUE_TEMPLATE/documentation.md @@ -0,0 +1,9 @@ +--- +name: "\U0001F4DA Documentation" +about: Report an issue related to Classy Vision documentation + +--- + +## 📚 Documentation + + diff --git a/.github/ISSUE_TEMPLATE/feature-request.md b/.github/ISSUE_TEMPLATE/feature-request.md new file mode 100644 index 0000000000..66677b41ab --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature-request.md @@ -0,0 +1,24 @@ +--- +name: "\U0001F680Feature Request" +about: Submit a proposal/request for a new Classy Vision feature + +--- + +## 🚀 Feature + + +## Motivation + + + +## Pitch + + + +## Alternatives + + + +## Additional context + + diff --git a/.github/ISSUE_TEMPLATE/questions-help-support.md b/.github/ISSUE_TEMPLATE/questions-help-support.md new file mode 100644 index 0000000000..78cc87af49 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/questions-help-support.md @@ -0,0 +1,8 @@ +--- +name: "❓Questions/Help/Support" +about: Do you need support? +--- + +## ❓ Questions and Help + +Before creating an issue, please go over our [Tutorials](https://classyvision.ai/tutorials) and [API Reference](https://classyvision.ai/api). If you cannot find the information you are looking for, please enquire in Classy Vision's `#help` [slack](https://classyvision.slack.com) channel before creating an issue. diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000000..4a71e12c88 --- /dev/null +++ b/.gitignore @@ -0,0 +1,52 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +.DS_Store + +# Distribution / packaging +.Python +env/ +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ + +# Jupyter Notebook +.ipynb_checkpoints + +node_modules + +lib/core/metadata.js +lib/core/MetadataBlog.js + +# Sphinx documentation +sphinx/build/ + +# Docusaurus +website/translated_docs +website/build/ +website/yarn.lock +website/node_modules +website/i18n/* + +## Generated for tutorials +website/_tutorials/ +website/static/files/ +website/pages/tutorials/* +!website/pages/tutorials/index.js + +## Generated for Sphinx +website/pages/api/ +website/static/js/* +!website/static/js/code_block_buttons.js +website/static/_sphinx-sources/ diff --git a/.isort.cfg b/.isort.cfg new file mode 100644 index 0000000000..efc0bdb07a --- /dev/null +++ b/.isort.cfg @@ -0,0 +1,2 @@ +[settings] +multi_line_output=3 diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000000..7199f3ef2e --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,3 @@ +0.1 (December 9, 2019) + +* Initial release diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md new file mode 100644 index 0000000000..f049d4c531 --- /dev/null +++ b/CODE_OF_CONDUCT.md @@ -0,0 +1,76 @@ +# Code of Conduct + +## Our Pledge + +In the interest of fostering an open and welcoming environment, we as +contributors and maintainers pledge to make participation in our project and +our community a harassment-free experience for everyone, regardless of age, body +size, disability, ethnicity, sex characteristics, gender identity and expression, +level of experience, education, socio-economic status, nationality, personal +appearance, race, religion, or sexual identity and orientation. + +## Our Standards + +Examples of behavior that contributes to creating a positive environment +include: + +* Using welcoming and inclusive language +* Being respectful of differing viewpoints and experiences +* Gracefully accepting constructive criticism +* Focusing on what is best for the community +* Showing empathy towards other community members + +Examples of unacceptable behavior by participants include: + +* The use of sexualized language or imagery and unwelcome sexual attention or + advances +* Trolling, insulting/derogatory comments, and personal or political attacks +* Public or private harassment +* Publishing others' private information, such as a physical or electronic + address, without explicit permission +* Other conduct which could reasonably be considered inappropriate in a + professional setting + +## Our Responsibilities + +Project maintainers are responsible for clarifying the standards of acceptable +behavior and are expected to take appropriate and fair corrective action in +response to any instances of unacceptable behavior. + +Project maintainers have the right and responsibility to remove, edit, or +reject comments, commits, code, wiki edits, issues, and other contributions +that are not aligned to this Code of Conduct, or to ban temporarily or +permanently any contributor for other behaviors that they deem inappropriate, +threatening, offensive, or harmful. + +## Scope + +This Code of Conduct applies within all project spaces, and it also applies when +an individual is representing the project or its community in public spaces. +Examples of representing a project or community include using an official +project e-mail address, posting via an official social media account, or acting +as an appointed representative at an online or offline event. Representation of +a project may be further defined and clarified by project maintainers. + +## Enforcement + +Instances of abusive, harassing, or otherwise unacceptable behavior may be +reported by contacting the project team at . All +complaints will be reviewed and investigated and will result in a response that +is deemed necessary and appropriate to the circumstances. The project team is +obligated to maintain confidentiality with regard to the reporter of an incident. +Further details of specific enforcement policies may be posted separately. + +Project maintainers who do not follow or enforce the Code of Conduct in good +faith may face temporary or permanent repercussions as determined by other +members of the project's leadership. + +## Attribution + +This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, +available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html + +[homepage]: https://www.contributor-covenant.org + +For answers to common questions about this code of conduct, see +https://www.contributor-covenant.org/faq diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000000..d18f22348d --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,28 @@ +# Contributing to Facebook AI Research Classy Vision framework (ClassyVision) +We want to make contributing to this project as easy and transparent as +possible. + +## Pull Requests +We actively welcome your pull requests. + +1. Fork the repo and create your branch from `master`. +2. If you've added code that should be tested, add tests. +3. If you've changed APIs, update the documentation. +4. Ensure the test suite passes. +5. Make sure your code lints. +6. If you haven't already, complete the Contributor License Agreement ("CLA"). + +## Contributor License Agreement ("CLA") +In order to accept your pull request, we need you to submit a CLA. You only need +to do this once to work on any of Facebook's open source projects. + +Complete your CLA here: + +## Issues +We use GitHub issues to track public bugs. Please ensure your description is +clear and has sufficient instructions to be able to reproduce the issue. + +## License +By contributing to Facebook AI Research Classy Vision framework (ClassyVision), +you agree that your contributions will be licensed under the LICENSE file in +the root directory of this source tree. diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000000..b96dcb0480 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) Facebook, Inc. and its affiliates. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000000..1d762a275e --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,4 @@ +graft classy_vision/templates +graft classy_vision/configs +include classy_train.py +recursive-include classy_vision/hydra *.yaml diff --git a/README.md b/README.md new file mode 100644 index 0000000000..d7ec5048ca --- /dev/null +++ b/README.md @@ -0,0 +1,86 @@ +

+ +[![GitHub license](https://img.shields.io/badge/license-MIT-blue.svg)](https://github.com/facebookresearch/ClassyVision/blob/master/LICENSE) [![CircleCI](https://circleci.com/gh/facebookresearch/ClassyVision.svg?style=svg&circle-token=feeafa057f8d3f6c0c15dfd74db8dd596d9684c8)](https://circleci.com/gh/facebookresearch/ClassyVision) [![PRs Welcome](https://img.shields.io/badge/PRs-welcome-brightgreen.svg)](https://github.com/facebookresearch/ClassyVision/blob/master/CONTRIBUTING.md) + +-------------------------------------------------------------------------------- + +[Classy Vision](http://classyvision.ai) is a new end-to-end, PyTorch-based framework for large-scale training of state-of-the-art image and video classification models. Previous computer vision (CV) libraries have been focused on providing components for users to build their own frameworks for their research. While this approach offers flexibility for researchers, in production settings it leads to duplicative efforts, and requires users to migrate research between frameworks and to relearn the minutia of efficient distributed training and data loading. Our PyTorch-based CV framework offers a better solution for training at scale and for deploying to production. It offers several notable advantages: + +* Ease of use. The library features a modular, flexible design that allows anyone to train machine learning models on top of PyTorch using very simple abstractions. The system also has out-of-the-box integration with Amazon Web Services (AWS), facilitating research at scale and making it simple to move between research and production. +* High performance. Researchers can use the framework to train Resnet50 on ImageNet in as little as 15 minutes, for example. +* Demonstrated success in production at scale. The framework is currently in use at Facebook, where we’ve used it to replicate the state-of-the-art results from the paper [Exploring the Limits of Weakly Supervised Pretraining](https://arxiv.org/abs/1805.00932). +* Integration with PyTorch Hub. AI researchers and engineers can download and fine-tune the best publically available ImageNet models with just a few lines of code. +* Elastic training. We have also added experimental integration with PyTorch Elastic, which allows distributed training jobs to adjust as available resources in the cluster changes. It also makes distributed training robust to transient hardware failures. + +Classy Vision is beta software. The project is under active development and our APIs are subject to change in future releases. + +## Installation + +#### Installation Requirements +Make sure you have an up-to-date installation of PyTorch (1.3.1), Python (3.6) and torchvision (1.4). If you want to use GPUs, then a CUDA installation (10.1) is also required. + +#### Installing the latest stable release +To install Classy Vision: +```bash +pip install classy_vision +``` + +#### Manual install of latest commit on master + +Alternatively you can do a manual install. + +```bash +git clone https://github.com/facebookresearch/ClassyVision.git +cd ClassyVision +pip install . +``` + +## Getting started + +Classy Vision aims to support a variety of projects to be built and open sourced on top of the core library. We provide utilities for setting up a project in a standard format with some simple generated examples to get started with. To start a new project: + +```bash +classy-project my-project +cd my-project +``` + +We even include a simple, synthetic, training example to show how to use Classy Vision: + +```bash + ./classy_train.py --config configs/template_config.json + ``` + +Voila! A few seconds later your first training run using our classification task should be done. Check out the results in the output folder: +```bash +ls output_/checkpoints/ +checkpoint.torch model_phase-0_end.torch model_phase-1_end.torch model_phase-2_end.torch model_phase-3_end.torch +``` + +checkpoint.torch is the latest model (in this case, same as model_phase-3), a checkpoint is saved at the end of each phase. + +For more details / tutorials see the documentation section below. + +## Documentation + +Please see our [tutorials](https://classyvision.ai/tutorials/getting_started) to learn how to get started on Classy Vision and customize your training runs. Full documentation is available [here](http://TODO). + +## Join the Classy Vision community +* Website: http://classyvision.ai +* Slack: [invite link](https://join.slack.com/t/classyvision/shared_invite/enQtODMwODA5Mjg3MTI3LWM5NzNlOTZjNWY3ZTE5YTViYmU2NWM1MDBjMWIwZTIwNmIyY2JjOTkyMTVmMTYzMmIwZWRmZjZmYjhhZTBkZGE) + +See the [CONTRIBUTING](CONTRIBUTING.md) file for how to help out. + +## License +Classy Vision is MIT licensed, as found in the LICENSE file. + +## Citing Classy Vision +If you use Classy Vision in your work, please use the following BibTeX entry: + +``` +@article{adcock2019classy, + title={Classy Vision}, + author={{Adcock}, A. and {Reis}, V. and {Singh}, M. and {Yan}, Z. and {van der Maaten} L., and {Zhang}, K. and {Motwani}, S. and {Guerin}, J. and {Goyal}, N. and {Misra}, I. and {Gustafson}, L. and {Changhan}, C. and {Goyal}, P.}, + howpublished = {\url{https://github.com/facebookresearch/ClassyVision}}, + year={2019} +} +``` diff --git a/bin/classy-project b/bin/classy-project new file mode 100755 index 0000000000..b54510177c --- /dev/null +++ b/bin/classy-project @@ -0,0 +1,55 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import argparse +import logging +import os +import shutil +import sys +from pathlib import Path +import site + +logging.basicConfig(level=logging.INFO) +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("project_name") + parser.add_argument("--template-name", default="synthetic") + args = parser.parse_args() + + root = Path(site.getsitepackages()[0]) + egg_link = root / "classy-vision.egg-link" + dev_install = False + # Support development mode (pip install -e) + if egg_link.exists(): + dev_install = True + with egg_link.open("r") as f: + lines = f.read().split("\n") + if lines[1] != ".": + raise RuntimeError("Unexpected egg-link format") + root = Path(lines[0]) + + base_path = root / "classy_vision" + template_path = base_path / "templates" / args.template_name + destination_path = Path(os.getcwd()) / args.project_name + + if destination_path.exists(): + logging.error(f"Project directory '{destination_path}' already exists!") + sys.exit(1) + + if dev_install: + classy_train_path = root / "classy_train.py" + else: + classy_train_path = Path(sys.prefix) / "classy_vision" / "classy_train.py" + + shutil.copytree(template_path, destination_path) + shutil.copy(classy_train_path, destination_path) + + logging.info( + f""" + Successfully generated template project at '{destination_path}'. + To get started, run: + $ cd {args.project_name} + $ ./classy_train.py --config configs/template_config.json""" + ) diff --git a/classy_train.py b/classy_train.py new file mode 100755 index 0000000000..8ef5ebdd37 --- /dev/null +++ b/classy_train.py @@ -0,0 +1,167 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +""" +This is the main script used for training Classy Vision jobs. + +This can be used for training on your local machine, using CPU or GPU, and +for distributed training. This script also supports Tensorboard, Visdom and +checkpointing. + +Example: + For training locally, simply specify a configuration file and whether + to use CPU or GPU: + + $ ./classy_train.py --device gpu --config configs/my_config.json + + For distributed training, this can be invoked via + :func:`torch.distributed.launch`. For instance + + $ python -m torch.distributed.launch \ + --nnodes=1 \ + --nproc_per_node=1 \ + --master_addr=127.0.0.1 \ + --master_port=29500 \ + --use_env \ + classy_train.py \ + --device=gpu \ + --config=configs/resnet50_synthetic_image_classy_config.json \ + --num_workers=1 \ + --log_freq=100 + + For other use cases, try + + $ ./classy_train.py --help +""" + +import logging +import os +from datetime import datetime +from pathlib import Path + +import torch +from classy_vision.generic.args import parse_args +from classy_vision.generic.registry_utils import import_all_packages_from_directory +from classy_vision.generic.util import load_checkpoint +from classy_vision.hooks import ( + CheckpointHook, + LossLrMeterLoggingHook, + ProfilerHook, + ProgressBarHook, + TensorboardPlotHook, + TimeMetricsHook, + VisdomHook, +) +from classy_vision.tasks import FineTuningTask, build_task +from classy_vision.trainer import DistributedTrainer, LocalTrainer +from torchvision import set_image_backend, set_video_backend + + +def main(args, config): + # Global flags + torch.manual_seed(0) + set_image_backend(args.image_backend) + set_video_backend(args.video_backend) + + task = build_task(config) + + # Load checkpoint, if available. This automatically resumes from an + # existing checkpoint, in case training is being restarted. + checkpoint = load_checkpoint(args.checkpoint_folder, args.device) + task.set_checkpoint(checkpoint) + + # Load a checkpoint contraining a pre-trained model. This is how we + # implement fine-tuning of existing models. + pretrained_checkpoint = load_checkpoint( + args.pretrained_checkpoint_folder, args.device + ) + if pretrained_checkpoint is not None: + assert isinstance( + task, FineTuningTask + ), "Can only use a pretrained checkpoint for fine tuning tasks" + task.set_pretrained_checkpoint(pretrained_checkpoint) + + # Configure hooks to do tensorboard logging, checkpoints and so on + task.set_hooks(configure_hooks(args, config)) + + use_gpu = None + if args.device is not None: + use_gpu = args.device == "gpu" + + # LocalTrainer is used for a single node. DistributedTrainer will setup + # training to use PyTorch's DistributedDataParallel. + trainer_class = {"none": LocalTrainer, "ddp": DistributedTrainer}[ + args.distributed_backend + ] + + trainer = trainer_class(use_gpu=use_gpu, num_dataloader_workers=args.num_workers) + + # That's it! When this call returns, training is done. + trainer.train(task) + + output_folder = Path(args.checkpoint_folder).resolve() + logging.info("Training successful!") + logging.info(f'Results of this training run are available at: "{output_folder}"') + + +def configure_hooks(args, config): + hooks = [LossLrMeterLoggingHook(args.log_freq), TimeMetricsHook()] + + # Make a folder to store checkpoints and tensorboard logging outputs + suffix = datetime.now().isoformat() + base_folder = Path(__file__).parent / f"output_{suffix}" + if args.checkpoint_folder == "": + args.checkpoint_folder = base_folder / "checkpoints" + os.makedirs(args.checkpoint_folder, exist_ok=True) + + logging.info(f"Logging outputs to {base_folder.resolve()}") + logging.info(f"Logging checkpoints to {args.checkpoint_folder}") + + if not args.skip_tensorboard: + try: + from tensorboardX import SummaryWriter + + tb_writer = SummaryWriter(log_dir=base_folder / "tensorboard") + hooks.append(TensorboardPlotHook(tb_writer)) + except ImportError: + logging.warning("tensorboardX not installed, skipping tensorboard hooks") + + args_dict = vars(args) + args_dict["config"] = config + hooks.append( + CheckpointHook( + args.checkpoint_folder, args_dict, checkpoint_period=args.checkpoint_period + ) + ) + + if args.profiler: + hooks.append(ProfilerHook()) + if args.show_progress: + hooks.append(ProgressBarHook()) + if args.visdom_server != "": + hooks.append(VisdomHook(args.visdom_server, args.visdom_port)) + + return hooks + + +# run all the things: +if __name__ == "__main__": + logger = logging.getLogger() + logger.setLevel(logging.INFO) + + logging.info("Classy Vision's default training script.") + + # This imports all modules in the same directory as classy_train.py + # Because of the way Classy Vision's registration decorators work, + # importing a module has a side effect of registering it with Classy + # Vision. This means you can give classy_train.py a config referencing your + # custom module (e.g. my_dataset) and it'll actually know how to + # instantiate it. + file_root = Path(__file__).parent + import_all_packages_from_directory(file_root) + + args, config = parse_args() + main(args, config) diff --git a/classy_vision/__init__.py b/classy_vision/__init__.py new file mode 100644 index 0000000000..090c7eaf6d --- /dev/null +++ b/classy_vision/__init__.py @@ -0,0 +1,7 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +__version__ = "0.1.0" diff --git a/classy_vision/configs/hmdb51/r3d34.json b/classy_vision/configs/hmdb51/r3d34.json new file mode 100644 index 0000000000..742632f7fb --- /dev/null +++ b/classy_vision/configs/hmdb51/r3d34.json @@ -0,0 +1,121 @@ +{ + "name": "classification_task", + "num_epochs": 300, + "loss": { + "name": "CrossEntropyLoss" + }, + "dataset": { + "train": { + "name": "hmdb51", + "split": "train", + "batchsize_per_replica": 16, + "use_shuffle": true, + "num_samples": null, + "frames_per_clip": 32, + "step_between_clips": 1, + "clips_per_video": 1, + "video_dir": "", + "splits_dir": "", + "metadata_file": "", + "fold": 1, + "transforms": { + "video": [ + { + "name": "video_default_augment", + "crop_size": 112, + "size_range": [128, 160] + } + ] + } + }, + "test": { + "name": "hmdb51", + "split": "test", + "batchsize_per_replica": 10, + "use_shuffle": false, + "num_samples": null, + "frames_per_clip": 32, + "step_between_clips": 1, + "clips_per_video": 10, + "video_dir": "", + "splits_dir": "", + "metadata_file": "", + "fold": 1, + "transforms": { + "video": [ + { + "name": "video_default_no_augment", + "size": 128 + } + ] + } + } + }, + "meters": { + "accuracy": { + "topk": [1, 5] + }, + "video_accuracy": { + "topk": [1, 5], + "clips_per_video_train": 1, + "clips_per_video_test": 10 + } + }, + "model": { + "name": "resnext3d", + "frames_per_clip": 32, + "input_planes": 3, + "clip_crop_size": 112, + "skip_transformation_type": "postactivated_shortcut", + "residual_transformation_type": "basic_transformation", + "num_blocks": [3, 4, 6, 3], + "input_key": "video", + "stem_name": "resnext3d_stem", + "stem_planes": 64, + "stem_temporal_kernel": 3, + "stem_maxpool": false, + "stage_planes": 64, + "stage_temporal_kernel_basis": [[3], [3], [3], [3]], + "temporal_conv_1x1": [false, false, false, false], + "stage_temporal_stride": [1, 2, 2, 2], + "stage_spatial_stride": [1, 2, 2, 2], + "num_groups": 1, + "width_per_group": 64, + "num_classes": 51, + "heads": [ + { + "name": "fully_convolutional_linear", + "unique_id": "default_head", + "pool_size": [4, 7, 7], + "activation_func": "softmax", + "num_classes": 51, + "fork_block": "pathway0-stage4-block2", + "in_plane": 512 + } + ] + }, + "optimizer": { + "name": "sgd", + "lr": { + "name": "composite", + "schedulers": [ + { + "name": "linear", + "start_lr": 0.005, + "end_lr": 0.04 + }, + { + "name": "cosine", + "start_lr": 0.04, + "end_lr": 0.00004 + } + ], + "update_interval": "epoch", + "interval_scaling": ["rescaled", "rescaled"], + "lengths": [0.13, 0.87] + }, + "weight_decay": 0.005, + "momentum": 0.9, + "nesterov": true + } +} diff --git a/classy_vision/configs/imagenet/densenet121_imagenet_classy_config.json b/classy_vision/configs/imagenet/densenet121_imagenet_classy_config.json new file mode 100644 index 0000000000..b482c3b794 --- /dev/null +++ b/classy_vision/configs/imagenet/densenet121_imagenet_classy_config.json @@ -0,0 +1,65 @@ +{ + "name": "classification_task", + "num_epochs": 90, + "loss": { + "name": "CrossEntropyLoss" + }, + "dataset": { + "train": { + "name": "classy_imagenet", + "split": "train", + "batchsize_per_replica": 32, + "num_samples": null, + "use_shuffle": true, + "transforms": [{"name": "generic_image_transform", "transforms": [ + {"name": "RandomResizedCrop", "size": 224}, + {"name": "RandomHorizontalFlip"}, + {"name": "ToTensor"}, + { + "name": "Normalize", + "mean": [0.485, 0.456, 0.406], + "std": [0.229, 0.224, 0.225] + } + ]}], + "root": "/tmp/imagenet" + }, + "test": { + "name": "classy_imagenet", + "split": "val", + "batchsize_per_replica": 32, + "num_samples": null, + "use_shuffle": false, + "transforms": [{"name": "generic_image_transform", "transforms": [ + {"name": "Resize", "size": 256}, + {"name": "CenterCrop", "size": 224}, + {"name": "ToTensor"}, + { + "name": "Normalize", + "mean": [0.485, 0.456, 0.406], + "std": [0.229, 0.224, 0.225] + } + ]}], + "root": "/tmp/imagenet" + } + }, + "meters": { + "accuracy": { + "topk": [1, 5] + } + }, + "model": { + "name": "densenet", + "num_blocks": [6, 12, 24, 16], + "num_classes": 1000, + "small_input": false + }, + "optimizer": { + "name": "sgd", + "lr": { + "name": "step", + "values": [0.1, 0.01, 0.001] + }, + "weight_decay": 1e-4, + "momentum": 0.9 + } +} diff --git a/classy_vision/configs/imagenet/postactivated_r2d101.json b/classy_vision/configs/imagenet/postactivated_r2d101.json new file mode 100644 index 0000000000..3b12106b0a --- /dev/null +++ b/classy_vision/configs/imagenet/postactivated_r2d101.json @@ -0,0 +1,99 @@ +{ + "name": "classification_task", + "num_epochs": 95, + "loss": { + "name": "CrossEntropyLoss" + }, + "dataset": { + "train": { + "name": "classy_imagenet", + "split": "train", + "batchsize_per_replica": 32, + "num_samples": null, + "use_augmentation": true, + "use_shuffle": true, + "transforms": [{"name": "generic_image_transform", "transforms": [ + {"name": "RandomResizedCrop", "size": 224}, + {"name": "RandomHorizontalFlip"}, + {"name": "ToTensor"}, + { + "name": "Normalize", + "mean": [0.485, 0.456, 0.406], + "std": [0.229, 0.224, 0.225] + } + ]}], + "root": "/tmp/imagenet" + }, + "test": { + "name": "classy_imagenet", + "split": "val", + "batchsize_per_replica": 32, + "num_samples": null, + "use_augmentation": false, + "use_shuffle": false, + "transforms": [{"name": "generic_image_transform", "transforms": [ + {"name": "Resize", "size": 256}, + {"name": "CenterCrop", "size": 224}, + {"name": "ToTensor"}, + { + "name": "Normalize", + "mean": [0.485, 0.456, 0.406], + "std": [0.229, 0.224, 0.225] + } + ]}], + "root": "/tmp/imagenet" + } + }, + "meters": { + "accuracy": { + "topk": [1, 5] + } + }, + "model": { + "name": "resnext3d", + "frames_per_clip": 1, + "input_planes": 3, + "clip_crop_size": 224, + "skip_transformation_type": "postactivated_shortcut", + "residual_transformation_type": "postactivated_bottleneck_transformation", + "num_blocks": [3, 4, 23, 3], + "stem_name": "resnext3d_stem", + "stem_planes": 64, + "stem_temporal_kernel": 1, + "stem_spatial_kernel": 7, + "stem_maxpool": true, + "stage_planes": 256, + "stage_temporal_kernel_basis": [[1], [1], [1], [1]], + "temporal_conv_1x1": [true, true, true, true], + "stage_temporal_stride": [1, 1, 1, 1], + "stage_spatial_stride": [1, 2, 2, 2], + "num_groups": 1, + "width_per_group": 64, + "num_classes": 1000, + "zero_init_residual_transform": true, + "heads": [ + { + "name": "fully_convolutional_linear", + "unique_id": "default_head", + "pool_size": [1, 7, 7], + "activation_func": "softmax", + "num_classes": 1000, + "fork_block": "pathway0-stage4-block2", + "in_plane": 2048, + "use_dropout": false + } + ] + }, + "optimizer": { + "name": "sgd", + "lr": { + "name": "multistep", + "num_epochs": 95, + "values": [0.1, 0.01, 0.001, 0.0001], + "milestones": [30, 60, 90] + }, + "weight_decay": 0.0001, + "momentum": 0.9, + "nesterov": true + } +} diff --git a/classy_vision/configs/imagenet/postactivated_r2d50.json b/classy_vision/configs/imagenet/postactivated_r2d50.json new file mode 100644 index 0000000000..340264f00d --- /dev/null +++ b/classy_vision/configs/imagenet/postactivated_r2d50.json @@ -0,0 +1,99 @@ +{ + "name": "classification_task", + "num_epochs": 95, + "loss": { + "name": "CrossEntropyLoss" + }, + "dataset": { + "train": { + "name": "classy_imagenet", + "split": "train", + "batchsize_per_replica": 32, + "num_samples": null, + "use_augmentation": true, + "use_shuffle": true, + "transforms": [{"name": "generic_image_transform", "transforms": [ + {"name": "RandomResizedCrop", "size": 224}, + {"name": "RandomHorizontalFlip"}, + {"name": "ToTensor"}, + { + "name": "Normalize", + "mean": [0.485, 0.456, 0.406], + "std": [0.229, 0.224, 0.225] + } + ]}], + "root": "/tmp/imagenet" + }, + "test": { + "name": "classy_imagenet", + "split": "val", + "batchsize_per_replica": 32, + "num_samples": null, + "use_augmentation": false, + "use_shuffle": false, + "transforms": [{"name": "generic_image_transform", "transforms": [ + {"name": "Resize", "size": 256}, + {"name": "CenterCrop", "size": 224}, + {"name": "ToTensor"}, + { + "name": "Normalize", + "mean": [0.485, 0.456, 0.406], + "std": [0.229, 0.224, 0.225] + } + ]}], + "root": "/tmp/imagenet" + } + }, + "meters": { + "accuracy": { + "topk": [1, 5] + } + }, + "model": { + "name": "resnext3d", + "frames_per_clip": 1, + "input_planes": 3, + "clip_crop_size": 224, + "skip_transformation_type": "postactivated_shortcut", + "residual_transformation_type": "postactivated_bottleneck_transformation", + "num_blocks": [3, 4, 6, 3], + "stem_name": "resnext3d_stem", + "stem_planes": 64, + "stem_temporal_kernel": 1, + "stem_spatial_kernel": 7, + "stem_maxpool": true, + "stage_planes": 256, + "stage_temporal_kernel_basis": [[1], [1], [1], [1]], + "temporal_conv_1x1": [true, true, true, true], + "stage_temporal_stride": [1, 1, 1, 1], + "stage_spatial_stride": [1, 2, 2, 2], + "num_groups": 1, + "width_per_group": 64, + "num_classes": 1000, + "zero_init_residual_transform": true, + "heads": [ + { + "name": "fully_convolutional_linear", + "unique_id": "default_head", + "pool_size": [1, 7, 7], + "activation_func": "softmax", + "num_classes": 1000, + "fork_block": "pathway0-stage4-block2", + "in_plane": 2048, + "use_dropout": false + } + ] + }, + "optimizer": { + "name": "sgd", + "lr": { + "name": "multistep", + "num_epochs": 95, + "values": [0.1, 0.01, 0.001, 0.0001], + "milestones": [30, 60, 90] + }, + "weight_decay": 0.0001, + "momentum": 0.9, + "nesterov": true + } +} diff --git a/classy_vision/configs/imagenet/preactivated_r2d101.json b/classy_vision/configs/imagenet/preactivated_r2d101.json new file mode 100644 index 0000000000..f126df0b38 --- /dev/null +++ b/classy_vision/configs/imagenet/preactivated_r2d101.json @@ -0,0 +1,99 @@ +{ + "name": "classification_task", + "num_epochs": 95, + "loss": { + "name": "CrossEntropyLoss" + }, + "dataset": { + "train": { + "name": "classy_imagenet", + "split": "train", + "batchsize_per_replica": 32, + "num_samples": null, + "use_augmentation": true, + "use_shuffle": true, + "transforms": [{"name": "generic_image_transform", "transforms": [ + {"name": "RandomResizedCrop", "size": 224}, + {"name": "RandomHorizontalFlip"}, + {"name": "ToTensor"}, + { + "name": "Normalize", + "mean": [0.485, 0.456, 0.406], + "std": [0.229, 0.224, 0.225] + } + ]}], + "root": "/tmp/imagenet" + }, + "test": { + "name": "classy_imagenet", + "split": "val", + "batchsize_per_replica": 32, + "num_samples": null, + "use_augmentation": false, + "use_shuffle": false, + "transforms": [{"name": "generic_image_transform", "transforms": [ + {"name": "Resize", "size": 256}, + {"name": "CenterCrop", "size": 224}, + {"name": "ToTensor"}, + { + "name": "Normalize", + "mean": [0.485, 0.456, 0.406], + "std": [0.229, 0.224, 0.225] + } + ]}], + "root": "/tmp/imagenet" + } + }, + "meters": { + "accuracy": { + "topk": [1, 5] + } + }, + "model": { + "name": "resnext3d", + "frames_per_clip": 1, + "input_planes": 3, + "clip_crop_size": 224, + "skip_transformation_type": "preactivated_shortcut", + "residual_transformation_type": "preactivated_bottleneck_transformation", + "num_blocks": [3, 4, 23, 3], + "stem_name": "resnext3d_stem", + "stem_planes": 32, + "stem_temporal_kernel": 1, + "stem_spatial_kernel": 5, + "stem_maxpool": true, + "stage_planes": 256, + "stage_temporal_kernel_basis": [[1], [1], [1], [1]], + "temporal_conv_1x1": [true, true, true, true], + "stage_temporal_stride": [1, 1, 1, 1], + "stage_spatial_stride": [1, 2, 2, 2], + "num_groups": 1, + "width_per_group": 64, + "num_classes": 1000, + "zero_init_residual_transform": true, + "heads": [ + { + "name": "fully_convolutional_linear", + "unique_id": "default_head", + "pool_size": [1, 7, 7], + "activation_func": "softmax", + "num_classes": 1000, + "fork_block": "pathway0-stage4-block2-relu", + "in_plane": 2048, + "use_dropout": false + } + ] + }, + "optimizer": { + "name": "sgd", + "lr": { + "name": "multistep", + "num_epochs": 95, + "values": [0.1, 0.01, 0.001, 0.0001], + "milestones": [30, 60, 90] + }, + "weight_decay": 0.0001, + "momentum": 0.9, + "nesterov": true + } +} diff --git a/classy_vision/configs/imagenet/preactivated_r2d50.json b/classy_vision/configs/imagenet/preactivated_r2d50.json new file mode 100644 index 0000000000..b1628e876c --- /dev/null +++ b/classy_vision/configs/imagenet/preactivated_r2d50.json @@ -0,0 +1,99 @@ +{ + "name": "classification_task", + "num_epochs": 95, + "loss": { + "name": "CrossEntropyLoss" + }, + "dataset": { + "train": { + "name": "classy_imagenet", + "split": "train", + "batchsize_per_replica": 32, + "num_samples": null, + "use_augmentation": true, + "use_shuffle": true, + "transforms": [{"name": "generic_image_transform", "transforms": [ + {"name": "RandomResizedCrop", "size": 224}, + {"name": "RandomHorizontalFlip"}, + {"name": "ToTensor"}, + { + "name": "Normalize", + "mean": [0.485, 0.456, 0.406], + "std": [0.229, 0.224, 0.225] + } + ]}], + "root": "/tmp/imagenet" + }, + "test": { + "name": "classy_imagenet", + "split": "val", + "batchsize_per_replica": 32, + "num_samples": null, + "use_augmentation": false, + "use_shuffle": false, + "transforms": [{"name": "generic_image_transform", "transforms": [ + {"name": "Resize", "size": 256}, + {"name": "CenterCrop", "size": 224}, + {"name": "ToTensor"}, + { + "name": "Normalize", + "mean": [0.485, 0.456, 0.406], + "std": [0.229, 0.224, 0.225] + } + ]}], + "root": "/tmp/imagenet" + } + }, + "meters": { + "accuracy": { + "topk": [1, 5] + } + }, + "model": { + "name": "resnext3d", + "frames_per_clip": 1, + "input_planes": 3, + "clip_crop_size": 224, + "skip_transformation_type": "preactivated_shortcut", + "residual_transformation_type": "preactivated_bottleneck_transformation", + "num_blocks": [3, 4, 6, 3], + "stem_name": "resnext3d_stem", + "stem_planes": 32, + "stem_temporal_kernel": 1, + "stem_spatial_kernel": 5, + "stem_maxpool": true, + "stage_planes": 256, + "stage_temporal_kernel_basis": [[1], [1], [1], [1]], + "temporal_conv_1x1": [true, true, true, true], + "stage_temporal_stride": [1, 1, 1, 1], + "stage_spatial_stride": [1, 2, 2, 2], + "num_groups": 1, + "width_per_group": 64, + "num_classes": 1000, + "zero_init_residual_transform": true, + "heads": [ + { + "name": "fully_convolutional_linear", + "unique_id": "default_head", + "pool_size": [1, 7, 7], + "activation_func": "softmax", + "num_classes": 1000, + "fork_block": "pathway0-stage4-block2-relu", + "in_plane": 2048, + "use_dropout": false + } + ] + }, + "optimizer": { + "name": "sgd", + "lr": { + "name": "multistep", + "num_epochs": 95, + "values": [0.1, 0.01, 0.001, 0.0001], + "milestones": [30, 60, 90] + }, + "weight_decay": 0.0001, + "momentum": 0.9, + "nesterov": true + } +} diff --git a/classy_vision/configs/imagenet/resnet101_imagenet_classy_config.json b/classy_vision/configs/imagenet/resnet101_imagenet_classy_config.json new file mode 100644 index 0000000000..7846eac9e5 --- /dev/null +++ b/classy_vision/configs/imagenet/resnet101_imagenet_classy_config.json @@ -0,0 +1,74 @@ +{ + "name": "classification_task", + "num_epochs": 90, + "loss": { + "name": "CrossEntropyLoss" + }, + "dataset": { + "train": { + "name": "classy_imagenet", + "split": "train", + "batchsize_per_replica": 32, + "num_samples": null, + "use_shuffle": true, + "transforms": [{"name": "generic_image_transform", "transforms": [ + {"name": "RandomResizedCrop", "size": 224}, + {"name": "RandomHorizontalFlip"}, + {"name": "ToTensor"}, + { + "name": "Normalize", + "mean": [0.485, 0.456, 0.406], + "std": [0.229, 0.224, 0.225] + } + ]}], + "root": "/tmp/imagenet" + }, + "test": { + "name": "classy_imagenet", + "split": "val", + "batchsize_per_replica": 32, + "num_samples": null, + "use_shuffle": false, + "transforms": [{"name": "generic_image_transform", "transforms": [ + {"name": "Resize", "size": 256}, + {"name": "CenterCrop", "size": 224}, + {"name": "ToTensor"}, + { + "name": "Normalize", + "mean": [0.485, 0.456, 0.406], + "std": [0.229, 0.224, 0.225] + } + ]}], + "root": "/tmp/imagenet" + } + }, + "meters": { + "accuracy": { + "topk": [1, 5] + } + }, + "model": { + "name": "resnet", + "num_blocks": [3, 4, 23, 3], + "num_classes": 1000, + "small_input": false, + "heads": [ + { + "name": "fully_connected", + "unique_id": "default_head", + "num_classes": 1000, + "fork_block": "block3-2", + "in_plane": 2048 + } + ] + }, + "optimizer": { + "name": "sgd", + "lr": { + "name": "step", + "values": [0.1, 0.01, 0.001] + }, + "weight_decay": 1e-4, + "momentum": 0.9 + } +} diff --git a/classy_vision/configs/imagenet/resnet50_imagenet_classy_config.json b/classy_vision/configs/imagenet/resnet50_imagenet_classy_config.json new file mode 100644 index 0000000000..6557efb38c --- /dev/null +++ b/classy_vision/configs/imagenet/resnet50_imagenet_classy_config.json @@ -0,0 +1,74 @@ +{ + "name": "classification_task", + "num_epochs": 90, + "loss": { + "name": "CrossEntropyLoss" + }, + "dataset": { + "train": { + "name": "classy_imagenet", + "split": "train", + "batchsize_per_replica": 32, + "num_samples": null, + "use_shuffle": true, + "transforms": [{"name": "generic_image_transform", "transforms": [ + {"name": "RandomResizedCrop", "size": 224}, + {"name": "RandomHorizontalFlip"}, + {"name": "ToTensor"}, + { + "name": "Normalize", + "mean": [0.485, 0.456, 0.406], + "std": [0.229, 0.224, 0.225] + } + ]}], + "root": "/tmp/imagenet" + }, + "test": { + "name": "classy_imagenet", + "split": "val", + "batchsize_per_replica": 32, + "num_samples": null, + "use_shuffle": false, + "transforms": [{"name": "generic_image_transform", "transforms": [ + {"name": "Resize", "size": 256}, + {"name": "CenterCrop", "size": 224}, + {"name": "ToTensor"}, + { + "name": "Normalize", + "mean": [0.485, 0.456, 0.406], + "std": [0.229, 0.224, 0.225] + } + ]}], + "root": "/tmp/imagenet" + } + }, + "meters": { + "accuracy": { + "topk": [1, 5] + } + }, + "model": { + "name": "resnet", + "num_blocks": [3, 4, 6, 3], + "small_input": false, + "zero_init_bn_residuals": true, + "heads": [ + { + "name": "fully_connected", + "unique_id": "default_head", + "num_classes": 1000, + "fork_block": "block3-2", + "in_plane": 2048 + } + ] + }, + "optimizer": { + "name": "sgd", + "lr": { + "name": "step", + "values": [0.1, 0.01, 0.001] + }, + "weight_decay": 1e-4, + "momentum": 0.9 + } +} diff --git a/classy_vision/configs/imagenet/resnet50_label_smoothing_criterion.json b/classy_vision/configs/imagenet/resnet50_label_smoothing_criterion.json new file mode 100644 index 0000000000..0c9156f8d0 --- /dev/null +++ b/classy_vision/configs/imagenet/resnet50_label_smoothing_criterion.json @@ -0,0 +1,77 @@ +{ + "name": "classification_task", + "num_epochs": 90, + "loss": { + "name": "label_smoothing_cross_entropy", + "smoothing_param": 0.1 + }, + "dataset": { + "train": { + "name": "classy_imagenet", + "split": "train", + "batchsize_per_replica": 32, + "num_samples": null, + "use_augmentation": true, + "use_shuffle": true, + "transforms": [{"name": "generic_image_transform", "transforms": [ + {"name": "RandomResizedCrop", "size": 224}, + {"name": "RandomHorizontalFlip"}, + {"name": "ToTensor"}, + { + "name": "Normalize", + "mean": [0.485, 0.456, 0.406], + "std": [0.229, 0.224, 0.225] + } + ]}], + "root": "/tmp/imagenet" + }, + "test": { + "name": "classy_imagenet", + "split": "val", + "batchsize_per_replica": 32, + "num_samples": null, + "use_augmentation": false, + "use_shuffle": false, + "transforms": [{"name": "generic_image_transform", "transforms": [ + {"name": "Resize", "size": 256}, + {"name": "CenterCrop", "size": 224}, + {"name": "ToTensor"}, + { + "name": "Normalize", + "mean": [0.485, 0.456, 0.406], + "std": [0.229, 0.224, 0.225] + } + ]}], + "root": "/tmp/imagenet" + } + }, + "meters": { + "accuracy": { + "topk": [1, 5] + } + }, + "model": { + "name": "resnet", + "num_blocks": [3, 4, 6, 3], + "small_input": false, + "zero_init_bn_residuals": true, + "heads": [ + { + "name": "fully_connected", + "unique_id": "default_head", + "num_classes": 1000, + "fork_block": "block3-2", + "in_plane": 2048 + } + ] + }, + "optimizer": { + "name": "sgd", + "lr": { + "name": "step", + "values": [0.1, 0.01, 0.001] + }, + "weight_decay": 1e-4, + "momentum": 0.9 + } +} diff --git a/classy_vision/configs/imagenet/resnext101_32x4d_fine_tuning_config.json b/classy_vision/configs/imagenet/resnext101_32x4d_fine_tuning_config.json new file mode 100644 index 0000000000..bd4e51ea3c --- /dev/null +++ b/classy_vision/configs/imagenet/resnext101_32x4d_fine_tuning_config.json @@ -0,0 +1,78 @@ +{ + "name": "fine_tuning", + "num_epochs": 10, + "loss": { + "name": "CrossEntropyLoss" + }, + "dataset": { + "train": { + "name": "classy_imagenet", + "split": "train", + "batchsize_per_replica": 32, + "num_samples": null, + "use_shuffle": true, + "transforms": [{"name": "generic_image_transform", "transforms": [ + {"name": "RandomResizedCrop", "size": 224}, + {"name": "RandomHorizontalFlip"}, + {"name": "ToTensor"}, + { + "name": "Normalize", + "mean": [0.485, 0.456, 0.406], + "std": [0.229, 0.224, 0.225] + } + ]}], + "root": "/tmp/imagenet" + }, + "test": { + "name": "classy_imagenet", + "split": "val", + "batchsize_per_replica": 32, + "num_samples": null, + "use_shuffle": false, + "transforms": [{"name": "generic_image_transform", "transforms": [ + {"name": "Resize", "size": 256}, + {"name": "CenterCrop", "size": 224}, + {"name": "ToTensor"}, + { + "name": "Normalize", + "mean": [0.485, 0.456, 0.406], + "std": [0.229, 0.224, 0.225] + } + ]}], + "root": "/tmp/imagenet" + } + }, + "meters": { + "accuracy": { + "topk": [1, 5] + } + }, + "model": { + "name": "resnext", + "num_blocks": [3, 4, 23, 3], + "base_width_and_cardinality": [4, 32], + "small_input": false, + "zero_init_bn_residuals": true, + "heads": [ + { + "name": "fully_connected", + "unique_id": "default_head", + "num_classes": 1000, + "fork_block": "block3-2", + "in_plane": 2048 + } + ] + }, + "optimizer": { + "name": "sgd", + "lr": { + "name": "step", + "values": [0.1, 0.01, 0.001] + }, + "weight_decay": 1e-4, + "momentum": 0.9, + "nesterov": true + }, + "reset_heads": true, + "freeze_trunk": true +} diff --git a/classy_vision/configs/imagenet/resnext101_imagenet_classy_config.json b/classy_vision/configs/imagenet/resnext101_imagenet_classy_config.json new file mode 100644 index 0000000000..e252cdd191 --- /dev/null +++ b/classy_vision/configs/imagenet/resnext101_imagenet_classy_config.json @@ -0,0 +1,75 @@ +{ + "name": "classification_task", + "num_epochs": 90, + "loss": { + "name": "CrossEntropyLoss" + }, + "dataset": { + "train": { + "name": "classy_imagenet", + "split": "train", + "batchsize_per_replica": 32, + "num_samples": null, + "use_shuffle": true, + "transforms": [{"name": "generic_image_transform", "transforms": [ + {"name": "RandomResizedCrop", "size": 224}, + {"name": "RandomHorizontalFlip"}, + {"name": "ToTensor"}, + { + "name": "Normalize", + "mean": [0.485, 0.456, 0.406], + "std": [0.229, 0.224, 0.225] + } + ]}], + "root": "/tmp/imagenet" + }, + "test": { + "name": "classy_imagenet", + "split": "val", + "batchsize_per_replica": 32, + "num_samples": null, + "use_shuffle": false, + "transforms": [{"name": "generic_image_transform", "transforms": [ + {"name": "Resize", "size": 256}, + {"name": "CenterCrop", "size": 224}, + {"name": "ToTensor"}, + { + "name": "Normalize", + "mean": [0.485, 0.456, 0.406], + "std": [0.229, 0.224, 0.225] + } + ]}], + "root": "/tmp/imagenet" + } + }, + "meters": { + "accuracy": { + "topk": [1, 5] + } + }, + "model": { + "name": "resnext", + "num_blocks": [3, 4, 23, 3], + "num_classes": 1000, + "base_width_and_cardinality": [4, 32], + "small_input": false, + "heads": [ + { + "name": "fully_connected", + "unique_id": "default_head", + "num_classes": 1000, + "fork_block": "block3-2", + "in_plane": 2048 + } + ] + }, + "optimizer": { + "name": "sgd", + "lr": { + "name": "step", + "values": [0.1, 0.01, 0.001] + }, + "weight_decay": 1e-4, + "momentum": 0.9 + } +} diff --git a/classy_vision/configs/imagenet/resnext50_imagenet_classy_config.json b/classy_vision/configs/imagenet/resnext50_imagenet_classy_config.json new file mode 100644 index 0000000000..6ab0f82bbd --- /dev/null +++ b/classy_vision/configs/imagenet/resnext50_imagenet_classy_config.json @@ -0,0 +1,75 @@ +{ + "name": "classification_task", + "num_epochs": 90, + "loss": { + "name": "CrossEntropyLoss" + }, + "dataset": { + "train": { + "name": "classy_imagenet", + "split": "train", + "batchsize_per_replica": 32, + "num_samples": null, + "use_shuffle": true, + "transforms": [{"name": "generic_image_transform", "transforms": [ + {"name": "RandomResizedCrop", "size": 224}, + {"name": "RandomHorizontalFlip"}, + {"name": "ToTensor"}, + { + "name": "Normalize", + "mean": [0.485, 0.456, 0.406], + "std": [0.229, 0.224, 0.225] + } + ]}], + "root": "/tmp/imagenet" + }, + "test": { + "name": "classy_imagenet", + "split": "val", + "batchsize_per_replica": 32, + "num_samples": null, + "use_shuffle": false, + "transforms": [{"name": "generic_image_transform", "transforms": [ + {"name": "Resize", "size": 256}, + {"name": "CenterCrop", "size": 224}, + {"name": "ToTensor"}, + { + "name": "Normalize", + "mean": [0.485, 0.456, 0.406], + "std": [0.229, 0.224, 0.225] + } + ]}], + "root": "/tmp/imagenet" + } + }, + "meters": { + "accuracy": { + "topk": [1, 5] + } + }, + "model": { + "name": "resnext", + "num_blocks": [3, 4, 6, 3], + "num_classes": 1000, + "base_width_and_cardinality": [4, 32], + "small_input": false, + "heads": [ + { + "name": "fully_connected", + "unique_id": "default_head", + "num_classes": 1000, + "fork_block": "block3-2", + "in_plane": 2048 + } + ] + }, + "optimizer": { + "name": "sgd", + "lr": { + "name": "step", + "values": [0.1, 0.01, 0.001] + }, + "weight_decay": 1e-4, + "momentum": 0.9 + } +} diff --git a/classy_vision/configs/kinetics400/postactivated_i3d50.json b/classy_vision/configs/kinetics400/postactivated_i3d50.json new file mode 100644 index 0000000000..bb2bebb3d8 --- /dev/null +++ b/classy_vision/configs/kinetics400/postactivated_i3d50.json @@ -0,0 +1,111 @@ +{ + "name": "classification_task", + "num_epochs": 80, + "loss": { + "name": "CrossEntropyLoss" + }, + "dataset": { + "train": { + "name": "kinetics400", + "split": "train", + "batchsize_per_replica": 8, + "use_shuffle": true, + "num_samples": null, + "frames_per_clip": 8, + "frame_rate": 4, + "step_between_clips": 1, + "clips_per_video": 1, + "video_dir": "", + "metadata_file": "", + "extensions": "mp4", + "transforms": { + "video": [ + { + "name": "video_default_augment", + "crop_size": 224, + "size_range": [256, 320] + } + ] + } + }, + "test": { + "name": "kinetics400", + "split": "val", + "batchsize_per_replica": 10, + "use_shuffle": false, + "num_samples": null, + "frames_per_clip": 8, + "frame_rate": 4, + "step_between_clips": 1, + "clips_per_video": 10, + "video_dir": "", + "metadata_file": "", + "extensions": "mp4", + "transforms": { + "video": [ + { + "name": "video_default_no_augment", + "size": 256 + } + ] + } + } + }, + "meters": { + "accuracy": { + "topk": [1, 5] + }, + "video_accuracy": { + "topk": [1, 5], + "clips_per_video_train": 1, + "clips_per_video_test": 10 + } + }, + "model": { + "name": "resnext3d", + "frames_per_clip": 8, + "input_planes": 3, + "clip_crop_size": 224, + "skip_transformation_type": "postactivated_shortcut", + "residual_transformation_type": "postactivated_bottleneck_transformation", + "num_blocks": [3, 4, 6, 3], + "input_key": "video", + "stem_name": "resnext3d_stem", + "stem_planes": 64, + "stem_temporal_kernel": 5, + "stem_spatial_kernel": 7, + "stem_maxpool": true, + "stage_planes": 256, + "stage_temporal_kernel_basis": [[3], [3, 1], [3, 1], [1, 3]], + "temporal_conv_1x1": [true, true, true, true], + "stage_temporal_stride": [1, 1, 1, 1], + "stage_spatial_stride": [1, 2, 2, 2], + "num_groups": 1, + "width_per_group": 64, + "num_classes": 400, + "zero_init_residual_transform": true, + "heads": [ + { + "name": "fully_convolutional_linear", + "unique_id": "default_head", + "pool_size": [8, 7, 7], + "activation_func": "softmax", + "num_classes": 400, + "fork_block": "pathway0-stage4-block2", + "in_plane": 2048, + "use_dropout": true + } + ] + }, + "optimizer": { + "name": "sgd", + "lr": { + "name": "cosine", + "start_lr": 0.1, + "end_lr": 0.0001 + }, + "weight_decay": 0.0001, + "momentum": 0.9, + "nesterov": true + } +} diff --git a/classy_vision/configs/kinetics400/postactivated_i3d50_fine_tuning.json b/classy_vision/configs/kinetics400/postactivated_i3d50_fine_tuning.json new file mode 100644 index 0000000000..2a52a795d2 --- /dev/null +++ b/classy_vision/configs/kinetics400/postactivated_i3d50_fine_tuning.json @@ -0,0 +1,113 @@ +{ + "name": "fine_tuning", + "reset_heads": true, + "num_epochs": 80, + "loss": { + "name": "CrossEntropyLoss" + }, + "dataset": { + "train": { + "name": "kinetics400", + "split": "train", + "batchsize_per_replica": 8, + "use_shuffle": true, + "num_samples": null, + "frames_per_clip": 8, + "frame_rate": 4, + "step_between_clips": 1, + "clips_per_video": 1, + "video_dir": "", + "metadata_file": "", + "extensions": "mp4", + "transforms": { + "video": [ + { + "name": "video_default_augment", + "crop_size": 224, + "size_range": [256, 320] + } + ] + } + }, + "test": { + "name": "kinetics400", + "split": "val", + "batchsize_per_replica": 10, + "use_shuffle": false, + "num_samples": null, + "frames_per_clip": 8, + "frame_rate": 4, + "step_between_clips": 1, + "clips_per_video": 10, + "video_dir": "", + "metadata_file": "", + "extensions": "mp4", + "transforms": { + "video": [ + { + "name": "video_default_no_augment", + "size": 256 + } + ] + } + } + }, + "meters": { + "accuracy": { + "topk": [1, 5] + }, + "video_accuracy": { + "topk": [1, 5], + "clips_per_video_train": 1, + "clips_per_video_test": 10 + } + }, + "model": { + "name": "resnext3d", + "frames_per_clip": 8, + "input_planes": 3, + "clip_crop_size": 224, + "skip_transformation_type": "postactivated_shortcut", + "residual_transformation_type": "postactivated_bottleneck_transformation", + "num_blocks": [3, 4, 6, 3], + "input_key": "video", + "stem_name": "resnext3d_stem", + "stem_planes": 64, + "stem_temporal_kernel": 5, + "stem_spatial_kernel": 7, + "stem_maxpool": true, + "stage_planes": 256, + "stage_temporal_kernel_basis": [[3], [3, 1], [3, 1], [1, 3]], + "temporal_conv_1x1": [true, true, true, true], + "stage_temporal_stride": [1, 1, 1, 1], + "stage_spatial_stride": [1, 2, 2, 2], + "num_groups": 1, + "width_per_group": 64, + "num_classes": 400, + "zero_init_residual_transform": true, + "heads": [ + { + "name": "fully_convolutional_linear", + "unique_id": "default_head", + "pool_size": [8, 7, 7], + "activation_func": "softmax", + "num_classes": 400, + "fork_block": "pathway0-stage4-block2", + "in_plane": 2048, + "use_dropout": true + } + ] + }, + "optimizer": { + "name": "sgd", + "lr": { + "name": "multistep", + "num_epochs": 80, + "values": [0.04, 0.004, 0.0004], + "milestones": [40, 70] + }, + "weight_decay": 0.0001, + "momentum": 0.9, + "nesterov": true + } +} diff --git a/classy_vision/configs/kinetics400/preactivated_i3d50.json b/classy_vision/configs/kinetics400/preactivated_i3d50.json new file mode 100644 index 0000000000..b363cf6335 --- /dev/null +++ b/classy_vision/configs/kinetics400/preactivated_i3d50.json @@ -0,0 +1,111 @@ +{ + "name": "classification_task", + "num_epochs": 160, + "loss": { + "name": "CrossEntropyLoss" + }, + "dataset": { + "train": { + "name": "kinetics400", + "split": "train", + "batchsize_per_replica": 8, + "use_shuffle": true, + "num_samples": null, + "frames_per_clip": 8, + "frame_rate": 4, + "step_between_clips": 1, + "clips_per_video": 1, + "video_dir": "", + "metadata_file": "", + "extensions": "mp4", + "transforms": { + "video": [ + { + "name": "video_default_augment", + "crop_size": 224, + "size_range": [256, 320] + } + ] + } + }, + "test": { + "name": "kinetics400", + "split": "val", + "batchsize_per_replica": 10, + "use_shuffle": false, + "num_samples": null, + "frames_per_clip": 8, + "frame_rate": 4, + "step_between_clips": 1, + "clips_per_video": 10, + "video_dir": "", + "metadata_file": "", + "extensions": "mp4", + "transforms": { + "video": [ + { + "name": "video_default_no_augment", + "size": 256 + } + ] + } + } + }, + "meters": { + "accuracy": { + "topk": [1, 5] + }, + "video_accuracy": { + "topk": [1, 5], + "clips_per_video_train": 1, + "clips_per_video_test": 10 + } + }, + "model": { + "name": "resnext3d", + "frames_per_clip": 8, + "input_planes": 3, + "clip_crop_size": 224, + "skip_transformation_type": "preactivated_shortcut", + "residual_transformation_type": "preactivated_bottleneck_transformation", + "num_blocks": [3, 4, 6, 3], + "input_key": "video", + "stem_name": "resnext3d_stem", + "stem_planes": 32, + "stem_temporal_kernel": 3, + "stem_spatial_kernel": 5, + "stem_maxpool": true, + "stage_planes": 256, + "stage_temporal_kernel_basis": [[3], [3, 1], [3, 1], [1, 3]], + "temporal_conv_1x1": [true, true, true, true], + "stage_temporal_stride": [1, 2, 1, 1], + "stage_spatial_stride": [1, 2, 2, 2], + "num_groups": 1, + "width_per_group": 64, + "num_classes": 400, + "zero_init_residual_transform": true, + "heads": [ + { + "name": "fully_convolutional_linear", + "unique_id": "default_head", + "pool_size": [4, 7, 7], + "activation_func": "softmax", + "num_classes": 400, + "fork_block": "pathway0-stage4-block2-relu", + "in_plane": 2048, + "use_dropout": true + } + ] + }, + "optimizer": { + "name": "sgd", + "lr": { + "name": "cosine", + "start_lr": 0.1, + "end_lr": 0.0001 + }, + "weight_decay": 0.0001, + "momentum": 0.9, + "nesterov": true + } +} diff --git a/classy_vision/configs/kinetics400/preactivated_i3d50_fine_tuning.json b/classy_vision/configs/kinetics400/preactivated_i3d50_fine_tuning.json new file mode 100644 index 0000000000..829610dcfc --- /dev/null +++ b/classy_vision/configs/kinetics400/preactivated_i3d50_fine_tuning.json @@ -0,0 +1,113 @@ +{ + "name": "fine_tuning", + "reset_heads": true, + "num_epochs": 120, + "loss": { + "name": "CrossEntropyLoss" + }, + "dataset": { + "train": { + "name": "kinetics400", + "split": "train", + "batchsize_per_replica": 8, + "use_shuffle": true, + "num_samples": null, + "frames_per_clip": 8, + "frame_rate": 4, + "step_between_clips": 1, + "clips_per_video": 1, + "video_dir": "", + "metadata_file": "", + "extensions": "mp4", + "transforms": { + "video": [ + { + "name": "video_default_augment", + "crop_size": 224, + "size_range": [256, 320] + } + ] + } + }, + "test": { + "name": "kinetics400", + "split": "val", + "batchsize_per_replica": 10, + "use_shuffle": false, + "num_samples": null, + "frames_per_clip": 8, + "frame_rate": 4, + "step_between_clips": 1, + "clips_per_video": 10, + "video_dir": "", + "metadata_file": "", + "extensions": "mp4", + "transforms": { + "video": [ + { + "name": "video_default_no_augment", + "size": 256 + } + ] + } + } + }, + "meters": { + "accuracy": { + "topk": [1, 5] + }, + "video_accuracy": { + "topk": [1, 5], + "clips_per_video_train": 1, + "clips_per_video_test": 10 + } + }, + "model": { + "name": "resnext3d", + "frames_per_clip": 8, + "input_planes": 3, + "clip_crop_size": 224, + "skip_transformation_type": "preactivated_shortcut", + "residual_transformation_type": "preactivated_bottleneck_transformation", + "num_blocks": [3, 4, 6, 3], + "input_key": "video", + "stem_name": "resnext3d_stem", + "stem_planes": 32, + "stem_temporal_kernel": 3, + "stem_spatial_kernel": 5, + "stem_maxpool": true, + "stage_planes": 256, + "stage_temporal_kernel_basis": [[3], [3, 1], [3, 1], [1, 3]], + "temporal_conv_1x1": [true, true, true, true], + "stage_temporal_stride": [1, 2, 1, 1], + "stage_spatial_stride": [1, 2, 2, 2], + "num_groups": 1, + "width_per_group": 64, + "num_classes": 400, + "zero_init_residual_transform": true, + "heads": [ + { + "name": "fully_convolutional_linear", + "unique_id": "default_head", + "pool_size": [4, 7, 7], + "activation_func": "softmax", + "num_classes": 400, + "fork_block": "pathway0-stage4-block2-relu", + "in_plane": 2048, + "use_dropout": true + } + ] + }, + "optimizer": { + "name": "sgd", + "lr": { + "name": "multistep", + "num_epochs": 120, + "values": [0.04, 0.004, 0.0004], + "milestones": [60, 105] + }, + "weight_decay": 0.0001, + "momentum": 0.9, + "nesterov": true + } +} diff --git a/classy_vision/configs/r3d34_synthetic_video.json b/classy_vision/configs/r3d34_synthetic_video.json new file mode 100644 index 0000000000..4072524c46 --- /dev/null +++ b/classy_vision/configs/r3d34_synthetic_video.json @@ -0,0 +1,80 @@ +{ + "name": "classification_task", + "num_epochs": 2, + "loss": { + "name": "CrossEntropyLoss" + }, + "dataset": { + "train": { + "name": "synthetic_video", + "split": "train", + "batchsize_per_replica": 16, + "use_shuffle": true, + "num_samples": 128, + "frames_per_clip": 32, + "video_height": 128, + "video_width": 160, + "num_classes": 50, + "clips_per_video": 1 + }, + "test": { + "name": "synthetic_video", + "split": "test", + "batchsize_per_replica": 10, + "use_shuffle": false, + "num_samples": 40, + "frames_per_clip": 32, + "video_height": 128, + "video_width": 160, + "num_classes": 50, + "clips_per_video": 10 + } + }, + "meters": { + "accuracy": { + "topk": [1, 5] + } + }, + "model": { + "name": "resnext3d", + "input_planes": 3, + "clip_crop_size": 112, + "frames_per_clip": 32, + "skip_transformation_type": "postactivated_shortcut", + "residual_transformation_type": "basic_transformation", + "num_blocks": [2, 2, 2, 2], + "input_key": "video", + "stem_name": "resnext3d_stem", + "stem_planes": 64, + "stem_temporal_kernel": 3, + "stem_maxpool": false, + "stage_planes": 64, + "stage_temporal_kernel_basis": [[3], [3], [3], [3]], + "temporal_conv_1x1": [false, false, false, false], + "stage_temporal_stride": [1, 2, 2, 2], + "stage_spatial_stride": [1, 2, 2, 2], + "num_groups": 1, + "width_per_group": 64, + "num_classes": 50, + "heads": [ + { + "name": "fully_convolutional_linear", + "unique_id": "default_head", + "pool_size": [4, 7, 7], + "activation_func": "softmax", + "num_classes": 50, + "fork_block": "pathway0-stage4-block1", + "in_plane": 512 + } + ] + }, + "optimizer": { + "name": "sgd", + "lr": { + "name": "step", + "values": [0.04, 0.004] + }, + "weight_decay": 5e-3, + "momentum": 0.9 + } +} diff --git a/classy_vision/configs/resnet50_synthetic_image_classy_config.json b/classy_vision/configs/resnet50_synthetic_image_classy_config.json new file mode 100644 index 0000000000..3abc18a438 --- /dev/null +++ b/classy_vision/configs/resnet50_synthetic_image_classy_config.json @@ -0,0 +1,74 @@ +{ + "name": "classification_task", + "num_epochs": 2, + "loss": { + "name": "CrossEntropyLoss" + }, + "dataset": { + "train": { + "name": "synthetic_image", + "split": "train", + "crop_size": 224, + "class_ratio": 0.5, + "num_samples": 2000, + "seed": 0, + "batchsize_per_replica": 32, + "use_shuffle": true, + "transforms": [{ + "name": "apply_transform_to_key", + "transforms": [ + {"name": "ToTensor"}, + {"name": "Normalize", "mean": [0.485, 0.456, 0.406], "std": [0.229, 0.224, 0.225]} + ], + "key": "input" + }] + }, + "test": { + "name": "synthetic_image", + "split": "val", + "crop_size": 224, + "class_ratio": 0.5, + "num_samples": 2000, + "seed": 1, + "batchsize_per_replica": 32, + "use_shuffle": false, + "transforms": [{ + "name": "apply_transform_to_key", + "transforms": [ + {"name": "ToTensor"}, + {"name": "Normalize", "mean": [0.485, 0.456, 0.406], "std": [0.229, 0.224, 0.225]} + ], + "key": "input" + }] + } + }, + "meters": { + "accuracy": { + "topk": [1, 5] + } + }, + "model": { + "name": "resnet", + "num_blocks": [3, 4, 6, 3], + "small_input": false, + "zero_init_bn_residuals": true, + "heads": [ + { + "name": "fully_connected", + "unique_id": "default_head", + "num_classes": 1000, + "fork_block": "block3-2", + "in_plane": 2048 + } + ] + }, + "optimizer": { + "name": "sgd", + "lr": { + "name": "step", + "values": [0.1, 0.01] + }, + "weight_decay": 1e-4, + "momentum": 0.9 + } +} diff --git a/classy_vision/configs/ucf101/r3d34.json b/classy_vision/configs/ucf101/r3d34.json new file mode 100644 index 0000000000..745bcba191 --- /dev/null +++ b/classy_vision/configs/ucf101/r3d34.json @@ -0,0 +1,121 @@ +{ + "name": "classification_task", + "num_epochs": 300, + "loss": { + "name": "CrossEntropyLoss" + }, + "dataset": { + "train": { + "name": "ucf101", + "split": "train", + "batchsize_per_replica": 16, + "use_shuffle": true, + "num_samples": null, + "frames_per_clip": 32, + "step_between_clips": 1, + "clips_per_video": 1, + "video_dir": "", + "splits_dir": "", + "metadata_file": "", + "fold": 1, + "transforms": { + "video": [ + { + "name": "video_default_augment", + "crop_size": 112, + "size_range": [128, 160] + } + ] + } + }, + "test": { + "name": "ucf101", + "split": "test", + "batchsize_per_replica": 10, + "use_shuffle": false, + "num_samples": null, + "frames_per_clip": 32, + "step_between_clips": 1, + "clips_per_video": 10, + "video_dir": "", + "splits_dir": "", + "metadata_file": "", + "fold": 1, + "transforms": { + "video": [ + { + "name": "video_default_no_augment", + "size": 128 + } + ] + } + } + }, + "meters": { + "accuracy": { + "topk": [1, 5] + }, + "video_accuracy": { + "topk": [1, 5], + "clips_per_video_train": 1, + "clips_per_video_test": 10 + } + }, + "model": { + "name": "resnext3d", + "frames_per_clip": 32, + "input_planes": 3, + "clip_crop_size": 112, + "skip_transformation_type": "postactivated_shortcut", + "residual_transformation_type": "basic_transformation", + "num_blocks": [3, 4, 6, 3], + "input_key": "video", + "stem_name": "resnext3d_stem", + "stem_planes": 64, + "stem_temporal_kernel": 3, + "stem_maxpool": false, + "stage_planes": 64, + "stage_temporal_kernel_basis": [[3], [3], [3], [3]], + "temporal_conv_1x1": [false, false, false, false], + "stage_temporal_stride": [1, 2, 2, 2], + "stage_spatial_stride": [1, 2, 2, 2], + "num_groups": 1, + "width_per_group": 64, + "num_classes": 101, + "heads": [ + { + "name": "fully_convolutional_linear", + "unique_id": "default_head", + "pool_size": [4, 7, 7], + "activation_func": "softmax", + "num_classes": 101, + "fork_block": "pathway0-stage4-block2", + "in_plane": 512 + } + ] + }, + "optimizer": { + "name": "sgd", + "lr": { + "name": "composite", + "schedulers": [ + { + "name": "linear", + "start_lr": 0.005, + "end_lr": 0.04 + }, + { + "name": "cosine", + "start_lr": 0.04, + "end_lr": 0.00004 + } + ], + "lengths": [0.13, 0.87], + "update_interval": "epoch", + "interval_scaling": ["rescaled", "rescaled"] + }, + "weight_decay": 0.005, + "momentum": 0.9, + "nesterov": true + } +} diff --git a/classy_vision/dataset/README.md b/classy_vision/dataset/README.md new file mode 100644 index 0000000000..682a041a49 --- /dev/null +++ b/classy_vision/dataset/README.md @@ -0,0 +1,3 @@ +Each dataset implements a single function `get_dataset(split)`, where `split` can +have the values `train` and `test`. It returns a tuple containing the torch +Dataset objects, and the number of classes in the dataset. diff --git a/classy_vision/dataset/__init__.py b/classy_vision/dataset/__init__.py new file mode 100644 index 0000000000..6f4d239705 --- /dev/null +++ b/classy_vision/dataset/__init__.py @@ -0,0 +1,91 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from pathlib import Path + +from classy_vision.generic.registry_utils import import_all_modules + +from .classy_dataset import ClassyDataset + + +FILE_ROOT = Path(__file__).parent + +DATASET_REGISTRY = {} +DATASET_CLASS_NAMES = set() + + +def build_dataset(config, *args, **kwargs): + """Builds a ClassyDataset from a config. + + This assumes a 'name' key in the config which is used to determine what + dataset class to instantiate. For instance, a config `{"name": "my_dataset", + "folder": "/data"}` will find a class that was registered as "my_dataset" + (see :func:`register_dataset`) and call .from_config on it.""" + return DATASET_REGISTRY[config["name"]].from_config(config, *args, **kwargs) + + +def register_dataset(name): + """Registers a ClassyDataset subclass. + + This decorator allows Classy Vision to instantiate a subclass of + ClassyDataset from a configuration file, even if the class itself is not + part of the Classy Vision framework. To use it, apply this decorator to a + ClassyDataset subclass like this: + + | @register_dataset("my_dataset") + | class MyDataset(ClassyDataset): + | ... + + To instantiate a dataset from a configuration file, see + :func:`build_dataset`.""" + + def register_dataset_cls(cls): + if name in DATASET_REGISTRY: + raise ValueError("Cannot register duplicate dataset ({})".format(name)) + if not issubclass(cls, ClassyDataset): + raise ValueError( + "Dataset ({}: {}) must extend ClassyDataset".format(name, cls.__name__) + ) + if cls.__name__ in DATASET_CLASS_NAMES: + raise ValueError( + "Cannot register dataset with duplicate class name({})".format( + cls.__name__ + ) + ) + DATASET_REGISTRY[name] = cls + DATASET_CLASS_NAMES.add(cls.__name__) + return cls + + return register_dataset_cls + + +# automatically import any Python files in the dataset/ directory +import_all_modules(FILE_ROOT, "classy_vision.dataset") + +from .classy_cifar import CIFARDataset # isort:skip +from .classy_hmdb51 import HMDB51Dataset # isort:skip +from .classy_imagenet import ImageNetDataset # isort:skip +from .classy_kinetics400 import Kinetics400Dataset # isort:skip +from .classy_synthetic_image import SyntheticImageDataset # isort:skip +from .classy_synthetic_video import SyntheticVideoDataset # isort:skip +from .classy_ucf101 import UCF101Dataset # isort:skip +from .classy_video_dataset import ClassyVideoDataset # isort:skip +from .image_path_dataset import ImagePathDataset # isort:skip + +__all__ = [ + "CIFARDataset", + "ClassyDataset", + "ClassyVideoDataset", + "HMDB51Dataset", + "ImageNetDataset", + "ImagePathDataset", + "Kinetics400Dataset", + "SyntheticImageDataset", + "SyntheticVideoDataset", + "UCF101Dataset", + "build_dataset", + "register_dataset", +] diff --git a/classy_vision/dataset/classy_cifar.py b/classy_vision/dataset/classy_cifar.py new file mode 100644 index 0000000000..7bcdf27980 --- /dev/null +++ b/classy_vision/dataset/classy_cifar.py @@ -0,0 +1,80 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from typing import Any, Callable, Dict, Optional, Union + +from classy_vision.dataset import ClassyDataset, register_dataset +from classy_vision.dataset.transforms import ClassyTransform, build_transforms +from torchvision.datasets.cifar import CIFAR10, CIFAR100 + + +class CIFARDataset(ClassyDataset): + _CIFAR_TYPE = None + + def __init__( + self, + split: Optional[str], + batchsize_per_replica: int, + shuffle: bool, + transform: Optional[Union[ClassyTransform, Callable]], + num_samples: Optional[int], + root: str, + download: bool = None, + ): + assert self._CIFAR_TYPE in [ + "cifar10", + "cifar100", + ], "CIFARDataset must be subclassed and a valid _CIFAR_TYPE provided" + if self._CIFAR_TYPE == "cifar10": + dataset = CIFAR10(root=root, train=(split == "train"), download=download) + if self._CIFAR_TYPE == "cifar100": + dataset = CIFAR100(root=root, train=(split == "train"), download=download) + + super().__init__( + dataset, split, batchsize_per_replica, shuffle, transform, num_samples + ) + + @classmethod + def from_config(cls, config: Dict[str, Any]) -> "CIFARDataset": + """Instantiates a CIFARDataset from a configuration. + + Args: + config: A configuration for a CIFARDataset. + See :func:`__init__` for parameters expected in the config. + + Returns: + A CIFARDataset instance. + """ + ( + transform_config, + batchsize_per_replica, + shuffle, + num_samples, + ) = cls.parse_config(config) + split = config.get("split") + root = config.get("root") + download = config.get("download") + + transform = build_transforms(transform_config) + return cls( + split=split, + batchsize_per_replica=batchsize_per_replica, + shuffle=shuffle, + transform=transform, + num_samples=num_samples, + root=root, + download=download, + ) + + +@register_dataset("classy_cifar10") +class CIFAR10Dataset(CIFARDataset): + _CIFAR_TYPE = "cifar10" + + +@register_dataset("classy_cifar100") +class CIFAR100Dataset(CIFARDataset): + _CIFAR_TYPE = "cifar100" diff --git a/classy_vision/dataset/classy_dataset.py b/classy_vision/dataset/classy_dataset.py new file mode 100644 index 0000000000..f13cbf79b7 --- /dev/null +++ b/classy_vision/dataset/classy_dataset.py @@ -0,0 +1,189 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from typing import Any, Callable, Dict, Optional, Sequence, Union + +from classy_vision.dataset.transforms import ClassyTransform +from classy_vision.generic.distributed_util import get_rank, get_world_size +from classy_vision.generic.util import is_pos_int +from torch.utils.data import DataLoader +from torch.utils.data.distributed import DistributedSampler + + +def _return_true(_sample): + return True + + +class ClassyDataset: + """ + Class representing a dataset abstraction. + + This class wraps a :class:`torch.utils.data.Dataset` via the `dataset` attribute + and configures the dataloaders needed to access the datasets. + Transforms which need to be applied to the data should be specified in this class. + ClassyDataset can be used to instantiate datasets from a configuration file as well. + """ + + def __init__( + self, + dataset: Sequence, + split: Optional[str], + batchsize_per_replica: int, + shuffle: bool, + transform: Optional[Union[ClassyTransform, Callable]], + num_samples: Optional[int], + ) -> None: + """ + Constructor for a ClassyDataset. + + Args: + split: Split of dataset to use ("train", "test") + batchsize_per_replica: Positive integer indicating batch size for each + replica + shuffle: Whether we should shuffle between epochs + transform: Transform to be applied to each sample + num_samples: When set, this restricts the number of samples provided by + the dataset + """ + # Asserts: + assert is_pos_int( + batchsize_per_replica + ), "batchsize_per_replica must be a positive int" + assert isinstance(shuffle, bool), "shuffle must be a boolean" + assert num_samples is None or is_pos_int( + num_samples + ), "num_samples must be a positive int or None" + + # Assignments: + self.split = split + self.batchsize_per_replica = batchsize_per_replica + self.shuffle = shuffle + self.transform = transform + self.num_samples = num_samples + self.dataset = dataset + + @classmethod + def from_config(cls, config: Dict[str, Any]) -> "ClassyDataset": + """Instantiates a ClassyDataset from a configuration. + + Args: + config: A configuration for the ClassyDataset. + + Returns: + A ClassyDataset instance. + """ + raise NotImplementedError + + @classmethod + def parse_config(cls, config: Dict[str, Any]): + """ + This function parses out common config options. + + Args: + config: A dict with the following string keys - + + | batchsize_per_replica (int): Must be a positive int, batch size + | for each replica + | use_shuffle (bool): Whether to enable shuffling for the dataset + | num_samples (int, optional): When set, restricts the number of samples + | in a dataset + | transforms: list of tranform configurations to be applied in order + + Returns: + A tuple containing the following variables - + | transform_config: Config for the dataset transform. Can be passed to + | :func:`build_transform` + | batchsize_per_replica: Batch size per replica + | shuffle: Whether we should shuffle between epochs + | num_samples: When set, restricts the number of samples in a dataset + """ + batchsize_per_replica = config.get("batchsize_per_replica") + shuffle = config.get("use_shuffle") + num_samples = config.get("num_samples") + transform_config = config.get("transforms") + return transform_config, batchsize_per_replica, shuffle, num_samples + + def __getitem__(self, idx: int): + assert idx >= 0 and idx < len( + self.dataset + ), "Provided idx is outside of dataset range" + sample = self.dataset[idx] + if self.transform is None: + return sample + return self.transform(sample) + + def __len__(self): + assert self.num_samples is None or self.num_samples <= len( + self.dataset + ), "Num samples mus be less than length of base dataset" + return len(self.dataset) if self.num_samples is None else self.num_samples + + def _get_sampler(self, epoch: int): + """ + Return a :class:`torch.utils.data.sampler.Sampler` to sample the data. + + This is used to distribute the data across the replicas. If shuffling + is enabled, every epoch will have a different shuffle. + + Args: + epoch: The epoch being fetched. + + Returns: + A sampler which tells the data loader which sample to load next. + """ + world_size = get_world_size() + rank = get_rank() + sampler = DistributedSampler( + self, num_replicas=world_size, rank=rank, shuffle=self.shuffle + ) + sampler.set_epoch(epoch) + return sampler + + def iterator(self, *args, **kwargs): + """ + Returns an iterable which can be used to iterate over the data. + + Args: + shuffle_seed (int, optional): Seed for the shuffle + current_phase_id (int, optional): The epoch being fetched. Needed so that + each epoch has a different shuffle order + Returns: + An iterable over the data + """ + # TODO: Fix naming to be consistent (i.e. everyone uses epoch) + shuffle_seed = kwargs.get("shuffle_seed", 0) + assert isinstance(shuffle_seed, int), "Shuffle seed must be an int" + epoch = kwargs.get("current_phase_id", 0) + assert isinstance(epoch, int), "Epoch must be an int" + + offset_epoch = shuffle_seed + epoch + + return DataLoader( + self, + batch_size=self.batchsize_per_replica, + num_workers=kwargs.get("num_workers", 0), + pin_memory=kwargs.get("pin_memory", False), + multiprocessing_context=kwargs.get("multiprocessing_context", None), + sampler=self._get_sampler(epoch=offset_epoch), + ) + + def get_batchsize_per_replica(self): + """ + Get the batch size per replica. + + Returns: + The batch size for each replica. + """ + return self.batchsize_per_replica + + def get_global_batchsize(self): + """ + Get the global batch size, combined over all the replicas. + + Returns: + The overall batch size of the dataset. + """ + return self.get_batchsize_per_replica() * get_world_size() diff --git a/classy_vision/dataset/classy_hmdb51.py b/classy_vision/dataset/classy_hmdb51.py new file mode 100644 index 0000000000..b13715e54e --- /dev/null +++ b/classy_vision/dataset/classy_hmdb51.py @@ -0,0 +1,185 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import os +from typing import Any, Callable, Dict, Optional + +import torch +from torchvision.datasets.hmdb51 import HMDB51 + +from . import register_dataset +from .classy_video_dataset import ClassyVideoDataset +from .transforms.util_video import build_video_field_transform_default + + +@register_dataset("hmdb51") +class HMDB51Dataset(ClassyVideoDataset): + """HMDB51 is an action recognition video dataset, and it has 51 classes. + + Page: + + This dataset consider every video as a collection of video clips of fixed size, + specified by ``frames_per_clip``, where the step in frames between each clip + is given by ``step_between_clips``. It uses clip sampler to sample clips + from each video. For training set, a random clip sampler is used to + sample a small number of clips (e.g. 1) from each video + For testing set, a uniform clip sampler is used to evenly sample a large + number of clips (e.g. 10) from the video. + + To give an example, for 2 videos with 10 and 15 frames respectively, + if ``frames_per_clip=5`` and ``step_between_clips=5``, the dataset size + will be (2 + 3) = 5, where the first two elements will come from video 1, + and the next three elements from video 2. Note that we drop clips which do + not have exactly ``frames_per_clip`` elements, so not all frames in a video + might be present. + + It is built on top of HMDB51 dataset class in TorchVision. + + """ + + def __init__( + self, + split: str, + batchsize_per_replica: int, + shuffle: bool, + transform: Callable, + num_samples: Optional[int], + frames_per_clip: int, + video_width: int, + video_height: int, + video_min_dimension: int, + audio_samples: int, + step_between_clips: int, + frame_rate: Optional[int], + clips_per_video: int, + video_dir: str, + splits_dir: str, + fold: int, + metadata_filepath: str, + ): + """The constructor of HMDB51Dataset. + + Args: + split: dataset split which can be either "train" or "test" + batchsize_per_replica: batch size per model replica + shuffle: If true, shuffle the dataset + transform: a dict where transforms video and audio data + num_samples: if not None, it will subsample dataset + frames_per_clip: the No. of frames in a video clip + video_width: rescaled video width. If 0, keep original width + video_height: rescaled video height. If 0, keep original height + video_min_dimension: rescale video so that min(height, width) = + video_min_dimension. If 0, keep original video resolution. Note + only one of (video_width, video_height) and (video_min_dimension) + can be set + audio_samples: desired audio sample rate. If 0, keep original + audio sample rate. + step_between_clips: No. of frames between each clip. + frame_rate: desired video frame rate. If None, keep + orignal video frame rate. + clips_per_video: No. of clips to sample from each video + video_dir: path to video folder + splits_dir: path to dataset splitting file folder + fold: HMDB51 dataset has 3 folds. Valid values are 1, 2 and 3. + metadata_filepath: path to the dataset meta data + + """ + # dataset metadata includes the path of video file, the pts of frames in + # the video and other meta info such as video fps, duration, audio sample rate. + # Users do not need to know the details of metadata. The computing, loading + # and saving logic of metata are all handled inside of the dataset. + # Given the "metadata_file" path, if such file exists, we load it as meta data. + # Otherwise, we compute the meta data, and save it at "metadata_file" path. + metadata = None + if os.path.exists(metadata_filepath): + metadata = HMDB51Dataset.load_metadata( + metadata_filepath, video_dir=video_dir, update_file_path=True + ) + + dataset = HMDB51( + video_dir, + splits_dir, + frames_per_clip, + step_between_clips=step_between_clips, + frame_rate=frame_rate, + _precomputed_metadata=metadata, + fold=fold, + train=(split == "train"), + num_workers=torch.get_num_threads(), + _video_width=video_width, + _video_height=video_height, + _video_min_dimension=video_min_dimension, + _audio_samples=audio_samples, + ) + metadata = dataset.metadata + if metadata and not os.path.exists(metadata_filepath): + HMDB51Dataset.save_metadata(metadata, metadata_filepath) + + super().__init__( + dataset, + split, + batchsize_per_replica, + shuffle, + transform, + num_samples, + clips_per_video, + ) + + @classmethod + def from_config(cls, config: Dict[str, Any]) -> "HMDB51Dataset": + """Instantiates a HMDB51Dataset from a configuration. + + Args: + config: A configuration for a HMDB51Dataset. + See :func:`__init__` for parameters expected in the config. + + Returns: + A HMDB51Dataset instance. + """ + required_args = ["split", "metadata_file", "video_dir", "splits_dir"] + assert all( + arg in config for arg in required_args + ), f"The arguments {required_args} are all required." + + split = config["split"] + ( + transform_config, + batchsize_per_replica, + shuffle, + num_samples, + frames_per_clip, + video_width, + video_height, + video_min_dimension, + audio_samples, + step_between_clips, + frame_rate, + clips_per_video, + ) = cls.parse_config(config) + + transform = build_video_field_transform_default(transform_config, split) + + return cls( + split, + batchsize_per_replica, + shuffle, + transform, + num_samples, + frames_per_clip, + video_width, + video_height, + video_min_dimension, + audio_samples, + step_between_clips, + frame_rate, + clips_per_video, + config["video_dir"], + config["splits_dir"], + config["fold"] + if "fold" in config + else 1, # HMDB51 has 3 folds. Use fold 1 by default + config["metadata_file"], + ) diff --git a/classy_vision/dataset/classy_imagenet.py b/classy_vision/dataset/classy_imagenet.py new file mode 100644 index 0000000000..440aa17973 --- /dev/null +++ b/classy_vision/dataset/classy_imagenet.py @@ -0,0 +1,60 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import os +from typing import Any, Callable, Dict, Optional, Union + +from classy_vision.dataset import ClassyDataset, register_dataset +from classy_vision.dataset.transforms import ClassyTransform, build_transforms +from torchvision.datasets import ImageFolder + + +@register_dataset("classy_imagenet") +class ImageNetDataset(ClassyDataset): + def __init__( + self, + split: str, + batchsize_per_replica: int, + shuffle: bool, + transform: Optional[Union[ClassyTransform, Callable]], + num_samples: Optional[int], + root: str, + ): + img_dir = os.path.join(root, split) + dataset = ImageFolder(img_dir) + super().__init__( + dataset, split, batchsize_per_replica, shuffle, transform, num_samples + ) + + @classmethod + def from_config(cls, config: Dict[str, Any]) -> "ImageNetDataset": + """Instantiates a ImageNetDataset from a configuration. + + Args: + config: A configuration for a ImageNetDataset. + See :func:`__init__` for parameters expected in the config. + + Returns: + A ImageNetDataset instance. + """ + ( + transform_config, + batchsize_per_replica, + shuffle, + num_samples, + ) = cls.parse_config(config) + split = config.get("split") + root = config.get("root") + + transform = build_transforms(transform_config) + return cls( + split=split, + batchsize_per_replica=batchsize_per_replica, + shuffle=shuffle, + transform=transform, + num_samples=num_samples, + root=root, + ) diff --git a/classy_vision/dataset/classy_kinetics400.py b/classy_vision/dataset/classy_kinetics400.py new file mode 100644 index 0000000000..6d0cc64367 --- /dev/null +++ b/classy_vision/dataset/classy_kinetics400.py @@ -0,0 +1,190 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import os +from typing import Any, Callable, Dict, List, Optional + +import torch +from torchvision.datasets.kinetics import Kinetics400 + +from . import register_dataset +from .classy_video_dataset import ClassyVideoDataset +from .transforms.util_video import build_video_field_transform_default + + +@register_dataset("kinetics400") +class Kinetics400Dataset(ClassyVideoDataset): + """Kinetics-400 is an action recognition video dataset, and it has 400 classes. + + Page: + It is originally published in (https://arxiv.org/pdf/1705.06950.pdf). + + This dataset consider every video as a collection of video clips of fixed size, + specified by ``frames_per_clip``, where the step in frames between each clip + is given by ``step_between_clips``. It uses clip sampler to sample clips + from each video. For training set, a random clip sampler is used to + sample a small number of clips (e.g. 1) from each video + For testing set, a uniform clip sampler is used to evenly sample a large + number of clips (e.g. 10) from the video. + + To give an example, for 2 videos with 10 and 15 frames respectively, if + ``frames_per_clip=5`` and ``step_between_clips=5``, the dataset size + will be (2 + 3) = 5, where the first two elements will come from video 1, + and the next three elements from video 2. Note that we drop clips which do + not have exactly ``frames_per_clip`` elements, so not all frames in a video + might be present. + + We assume videos are already trimmed to 10-second clip, and are stored in a + folder. + + It is built on top of Kinetics400 dataset class in TorchVision. + + """ + + def __init__( + self, + split: str, + batchsize_per_replica: int, + shuffle: bool, + transform: Callable, + num_samples: Optional[int], + frames_per_clip: int, + video_width: int, + video_height: int, + video_min_dimension: int, + audio_samples: int, + audio_channels: int, + step_between_clips: int, + frame_rate: Optional[int], + clips_per_video: int, + video_dir: str, + extensions: List[str], + metadata_filepath: str, + ): + """The constructor of Kinetics400Dataset. + + Args: + split: dataset split which can be either "train" or "test" + batchsize_per_replica: batch size per model replica + shuffle: If true, shuffle the dataset + transform: a dict where transforms video and audio data + num_samples: if provided, it will subsample dataset + frames_per_clip: the No. of frames in a video clip + video_width: rescaled video width. If 0, keep original width + video_height: rescaled video height. If 0, keep original height + video_min_dimension: rescale video so that min(height, width) = + video_min_dimension. If 0, keep original video resolution. Note + only one of (video_width, video_height) and (video_min_dimension) + can be set + audio_samples: desired audio sample rate. If 0, keep original + audio sample rate + audio_channels: desire No. of audio channel. If 0, keep original audio + channels + step_between_clips: No. of frames between each clip. + frame_rate: desired video frame rate. If None, keep + orignal video frame rate. + clips_per_video: No. of clips to sample from each video + video_dir: path to video folder + extensions: A list of file extensions, such as "avi" and "mp4". Only + video matching those file extensions are added to the dataset + metadata_filepath: path to the dataset meta data + + """ + # dataset metadata includes the path of video file, the pts of frames in + # the video and other meta info such as video fps, duration, audio sample rate. + # Users do not need to know the details of metadata. The computing, loading + # and saving logic of metata are all handled inside of the dataset. + # Given the "metadata_file" path, if such file exists, we load it as meta data. + # Otherwise, we compute the meta data, and save it at "metadata_file" path. + metadata = None + if os.path.exists(metadata_filepath): + metadata = Kinetics400Dataset.load_metadata( + metadata_filepath, video_dir=video_dir, update_file_path=True + ) + + dataset = Kinetics400( + video_dir, + frames_per_clip, + step_between_clips=step_between_clips, + frame_rate=frame_rate, + _precomputed_metadata=metadata, + extensions=extensions, + num_workers=torch.get_num_threads(), + _video_width=video_width, + _video_height=video_height, + _video_min_dimension=video_min_dimension, + _audio_samples=audio_samples, + _audio_channels=audio_channels, + ) + metadata = dataset.metadata + if metadata and not os.path.exists(metadata_filepath): + Kinetics400Dataset.save_metadata(metadata, metadata_filepath) + + super().__init__( + dataset, + split, + batchsize_per_replica, + shuffle, + transform, + num_samples, + clips_per_video, + ) + + @classmethod + def from_config(cls, config: Dict[str, Any]) -> "Kinetics400Dataset": + """Instantiates a UCF101Dataset from a configuration. + + Args: + config: A configuration for a Kinetics400Dataset. + See :func:`__init__` for parameters expected in the config. + + Returns: + A Kinetics400Dataset instance. + """ + required_args = ["split", "metadata_file", "video_dir"] + assert all( + arg in config for arg in required_args + ), f"The arguments {required_args} are all required." + + split = config["split"] + audio_channels = config.get("audio_channels", 0) + ( + transform_config, + batchsize_per_replica, + shuffle, + num_samples, + frames_per_clip, + video_width, + video_height, + video_min_dimension, + audio_samples, + step_between_clips, + frame_rate, + clips_per_video, + ) = cls.parse_config(config) + extensions = config.get("extensions", ("mp4")) + + transform = build_video_field_transform_default(transform_config, split) + + return cls( + split, + batchsize_per_replica, + shuffle, + transform, + num_samples, + frames_per_clip, + video_width, + video_height, + video_min_dimension, + audio_samples, + audio_channels, + step_between_clips, + frame_rate, + clips_per_video, + config["video_dir"], + extensions, + config["metadata_file"], + ) diff --git a/classy_vision/dataset/classy_synthetic_image.py b/classy_vision/dataset/classy_synthetic_image.py new file mode 100644 index 0000000000..4654f0471b --- /dev/null +++ b/classy_vision/dataset/classy_synthetic_image.py @@ -0,0 +1,103 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import logging +from typing import Any, Callable, Dict, List, Optional + +import torchvision.transforms as transforms + +from . import register_dataset +from .classy_dataset import ClassyDataset +from .core import RandomImageBinaryClassDataset +from .transforms import build_transforms +from .transforms.util import ImagenetConstants, build_field_transform_default_imagenet + + +@register_dataset("synthetic_image") +class SyntheticImageDataset(ClassyDataset): + """Classy Dataset which produces random synthetic images with binary targets. + + The underlying dataset sets targets based on the image channel, so users can + validate their setup by checking if they can get 100% accuracy on this dataset. + Useful for testing since the dataset is much faster to initialize and fetch samples + from, compared to real world datasets. + """ + + def __init__( + self, + batchsize_per_replica: int, + shuffle: bool, + transform: Optional[Callable], + num_samples: int, + crop_size: int, + class_ratio: float, + seed: int, + split: Optional[str] = None, + ) -> None: + """ + Args: + batchsize_per_replica: Positive integer indicating batch size for each + replica + shuffle: Whether we should shuffle between epochs + transform: Transform to be applied to each sample + num_samples: Number of samples to return + crop_size: Image size, used for both height and width + class_ratio: Ratio of the distribution of target classes + seed: Seed used for image generation. Use the same seed to generate the same + set of samples. + split: Split of dataset to use + """ + dataset = RandomImageBinaryClassDataset( + crop_size, class_ratio, num_samples, seed + ) + super().__init__( + dataset, split, batchsize_per_replica, shuffle, transform, num_samples + ) + + @classmethod + def from_config(cls, config: Dict[str, Any]) -> "SyntheticImageDataset": + """Instantiates a SyntheticImageDataset from a configuration. + + Args: + config: A configuration for a SyntheticImageDataset. + See :func:`__init__` for parameters expected in the config. + + Returns: + A SyntheticImageDataset instance. + """ + assert all(key in config for key in ["crop_size", "class_ratio", "seed"]) + split = config.get("split") + crop_size = config["crop_size"] + class_ratio = config["class_ratio"] + seed = config["seed"] + ( + transform_config, + batchsize_per_replica, + shuffle, + num_samples, + ) = cls.parse_config(config) + + try: + transform = build_transforms(transform_config) + except Exception: + logging.error( + "We recently changed transform behavior" + " do you need to update your config?" + " See resnet50_synthetic_image_classy_config.json" + " as an example." + ) + raise + + return cls( + batchsize_per_replica, + shuffle, + transform, + num_samples, + crop_size, + class_ratio, + seed, + split=split, + ) diff --git a/classy_vision/dataset/classy_synthetic_video.py b/classy_vision/dataset/classy_synthetic_video.py new file mode 100644 index 0000000000..fe90c1bcdb --- /dev/null +++ b/classy_vision/dataset/classy_synthetic_video.py @@ -0,0 +1,133 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + + +from typing import Any, Callable, Dict + +from classy_vision.generic.distributed_util import get_rank, get_world_size +from torch.utils.data.distributed import DistributedSampler + +from . import register_dataset +from .classy_video_dataset import ClassyVideoDataset +from .core import RandomVideoDataset +from .transforms.util_video import build_video_field_transform_default + + +@register_dataset("synthetic_video") +class SyntheticVideoDataset(ClassyVideoDataset): + """Classy Dataset which produces random synthetic video clips. + + Useful for testing since the dataset is much faster to initialize and fetch samples + from, compared to real world datasets. + + Note: Unlike SyntheticImageDataset, this dataset generates targets randomly, + independent of the video clips. + """ + + def __init__( + self, + num_classes: int, + split: str, + batchsize_per_replica: int, + shuffle: bool, + transform: Callable, + num_samples: int, + frames_per_clip: int, + video_width: int, + video_height: int, + audio_samples: int, + clips_per_video: int, + ): + """The constructor of SyntheticVideoDataset. + + Args: + num_classes: Number of classes in the generated targets. + split: Split of dataset to use + batchsize_per_replica: batch size per model replica + shuffle: Whether we should shuffle between epochs + transform: Transform to be applied to each sample + num_samples: Number of samples to return + frames_per_clip: Number of frames in a video clip + video_width: Width of the video clip + video_height: Height of the video clip + audio_samples: Audio sample rate + clips_per_video: Number of clips per video + """ + dataset = RandomVideoDataset( + num_classes, + split, + num_samples, + frames_per_clip, + video_width, + video_height, + audio_samples, + clips_per_video, + ) + super().__init__( + dataset, + split, + batchsize_per_replica, + shuffle, + transform, + num_samples, + clips_per_video, + ) + + @classmethod + def from_config(cls, config: Dict[str, Any]) -> "SyntheticVideoDataset": + """Instantiates a SyntheticVideoDataset from a configuration. + + Args: + config: A configuration for a SyntheticVideoDataset. + See :func:`__init__` for parameters expected in the config. + + Returns: + A SyntheticVideoDataset instance. + """ + split = config["split"] + num_classes = config["num_classes"] + ( + transform_config, + batchsize_per_replica, + shuffle, + num_samples, + frames_per_clip, + video_width, + video_height, + video_min_dimension, + audio_samples, + step_between_clips, + frame_rate, + clips_per_video, + ) = cls.parse_config(config) + + transform = build_video_field_transform_default(transform_config, split) + return cls( + num_classes, + split, + batchsize_per_replica, + shuffle, + transform, + num_samples, + frames_per_clip, + video_width, + video_height, + audio_samples, + clips_per_video, + ) + + @property + def video_clips(self): + raise NotImplementedError() + + def _get_sampler(self, epoch): + world_size = get_world_size() + rank = get_rank() + sampler = DistributedSampler( + self, num_replicas=world_size, rank=rank, shuffle=self.shuffle + ) + sampler.set_epoch(epoch) + return sampler diff --git a/classy_vision/dataset/classy_ucf101.py b/classy_vision/dataset/classy_ucf101.py new file mode 100644 index 0000000000..d94f521728 --- /dev/null +++ b/classy_vision/dataset/classy_ucf101.py @@ -0,0 +1,184 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import os +from typing import Any, Callable, Dict, Optional + +import torch +from torchvision.datasets.ucf101 import UCF101 + +from . import register_dataset +from .classy_video_dataset import ClassyVideoDataset +from .transforms.util_video import build_video_field_transform_default + + +@register_dataset("ucf101") +class UCF101Dataset(ClassyVideoDataset): + """UCF101 is an action recognition video dataset, and it has 101 classes. + + Page: + + This dataset consider every video as a collection of video clips of fixed size, + specified by ``frames_per_clip``, where the step in frames between each clip + is given by ``step_between_clips``. It uses clip sampler to sample clips + from each video. For training set, a random clip sampler is used to + sample a small number of clips (e.g. 1) from each video + For testing set, a uniform clip sampler is used to evenly sample a large + number of clips (e.g. 10) from the video. + + To give an example, for 2 videos with 10 and 15 frames respectively, + if ``frames_per_clip=5`` and ``step_between_clips=5``, + the dataset size will be (2 + 3) = 5, where the first two elements will come + from video 1, and the next three elements from video 2. Note that we drop + clips which do not have exactly ``frames_per_clip`` elements, so not all + frames in a video might be present. + + It is built on top of UCF101 dataset class in TorchVision. + + """ + + def __init__( + self, + split: str, + batchsize_per_replica: int, + shuffle: bool, + transform: Callable, + num_samples: Optional[int], + frames_per_clip: int, + video_width: int, + video_height: int, + video_min_dimension: int, + audio_samples: int, + step_between_clips: int, + frame_rate: Optional[int], + clips_per_video: int, + video_dir: str, + splits_dir: str, + fold: int, + metadata_filepath: str, + ): + """The constructor of UCF101Dataset. + + Args: + split: dataset split which can be either "train" or "test" + batchsize_per_replica: batch size per model replica + shuffle: If true, shuffle the dataset + transform: a dict where transforms video and audio data + num_samples: if not None, it will subsample dataset + frames_per_clip: the No. of frames in a video clip + video_width: rescaled video width. If 0, keep original width + video_height: rescaled video height. If 0, keep original height + video_min_dimension: rescale video so that min(height, width) = + video_min_dimension. If 0, keep original video resolution. Note + only one of (video_width, video_height) and (video_min_dimension) + can be set + audio_samples: desired audio sample rate. If 0, keep original + audio sample rate. + step_between_clips: No. of frames between each clip. + frame_rate: desired video frame rate. If None, keep original video + frame rate. + clips_per_video: No. of clips to sample from each video + video_dir: path to video folder + splits_dir: path to dataset splitting file folder + fold: UCF101 dataset has 3 folds. Valid values are 1, 2 and 3. + metadata_filepath: path to the dataset meta data + + """ + # dataset metadata includes the path of video file, the pts of frames in + # the video and other meta info such as video fps, duration, audio sample rate. + # Users do not need to know the details of metadata. The computing, loading + # and saving logic of metata are all handled inside of the dataset. + # Given the "metadata_file" path, if such file exists, we load it as meta data. + # Otherwise, we compute the meta data, and save it at "metadata_file" path. + metadata = None + if os.path.exists(metadata_filepath): + metadata = UCF101Dataset.load_metadata( + metadata_filepath, video_dir=video_dir, update_file_path=True + ) + + dataset = UCF101( + video_dir, + splits_dir, + frames_per_clip, + step_between_clips=step_between_clips, + frame_rate=frame_rate, + _precomputed_metadata=metadata, + fold=fold, + train=True if split == "train" else False, + num_workers=torch.get_num_threads(), + _video_width=video_width, + _video_height=video_height, + _video_min_dimension=video_min_dimension, + _audio_samples=audio_samples, + ) + metadata = dataset.metadata + if metadata and not os.path.exists(metadata_filepath): + UCF101Dataset.save_metadata(metadata, metadata_filepath) + + super().__init__( + dataset, + split, + batchsize_per_replica, + shuffle, + transform, + num_samples, + clips_per_video, + ) + + @classmethod + def from_config(cls, config: Dict[str, Any]) -> "UCF101Dataset": + """Instantiates a UCF101Dataset from a configuration. + + Args: + config: A configuration for a UCF101Dataset. + See :func:`__init__` for parameters expected in the config. + + Returns: + A UCF101Dataset instance. + """ + required_args = ["split", "metadata_file", "video_dir", "splits_dir"] + assert all( + arg in config for arg in required_args + ), f"The arguments {required_args} are all required." + + split = config["split"] + ( + transform_config, + batchsize_per_replica, + shuffle, + num_samples, + frames_per_clip, + video_width, + video_height, + video_min_dimension, + audio_samples, + step_between_clips, + frame_rate, + clips_per_video, + ) = cls.parse_config(config) + + transform = build_video_field_transform_default(transform_config, split) + return cls( + split, + batchsize_per_replica, + shuffle, + transform, + num_samples, + frames_per_clip, + video_width, + video_height, + video_min_dimension, + audio_samples, + step_between_clips, + frame_rate, + clips_per_video, + config["video_dir"], + config["splits_dir"], + config["fold"] + if "fold" in config + else 1, # UCF101 has 3 folds. Use fold 1 by default + config["metadata_file"], + ) diff --git a/classy_vision/dataset/classy_video_dataset.py b/classy_vision/dataset/classy_video_dataset.py new file mode 100644 index 0000000000..318aa4c9be --- /dev/null +++ b/classy_vision/dataset/classy_video_dataset.py @@ -0,0 +1,261 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import logging +import multiprocessing +import os +from typing import Any, Callable, Dict, Optional + +import torch +from classy_vision.generic.distributed_util import get_rank, get_world_size +from torch.utils.data import Sampler +from torchvision.datasets.samplers.clip_sampler import ( + DistributedSampler, + RandomClipSampler, + UniformClipSampler, +) + +from .classy_dataset import ClassyDataset + + +class MaxLengthClipSampler(Sampler): + """MaxLengthClipSampler is a thin wrapper on top of clip samplers in TorchVision. + + It takes as input a TorchVision clip sampler, and an optional argument + `num_samples` to limit the number of samples. + """ + + def __init__(self, clip_sampler, num_samples=None): + """The constructor method of MaxLengthClipSampler. + + Args: + clip_sampler: clip sampler without a limit on the total number of clips + it can sample, such as RandomClipSampler and UniformClipSampler. + num_samples: if provided, it denotes the maximal number of clips the sampler + will return + + """ + self.clip_sampler = clip_sampler + self.num_samples = num_samples + + def __iter__(self): + num_samples = len(self) + n = 0 + for clip in self.clip_sampler: + if n < num_samples: + yield clip + n += 1 + else: + break + + def __len__(self): + full_size = len(self.clip_sampler) + if self.num_samples is None: + return full_size + + return min(full_size, self.num_samples) + + +class ClassyVideoDataset(ClassyDataset): + """Interface specifying what a ClassyVision video dataset is expected to provide. + """ + + def __init__( + self, + dataset: Any, + split: str, + batchsize_per_replica: int, + shuffle: bool, + transform: Callable, + num_samples: Optional[int], + clips_per_video: int, + ): + """The constructor method of ClassyVideoDataset. + + Args: + dataset: the underlying video dataset from either TorchVision or other + source. It should have an attribute `video_clips` of type + torchvision.datasets.video_utils.VideoClips + split: dataset split. Must be either "train" or "test" + batchsize_per_replica: batch size per model replica + shuffle: If true, shuffle video clips. + transform: callable function to transform video clip sample from + ClassyVideoDataset + num_samples: If provided, return at most `num_samples` video clips + clips_per_video: The number of clips sampled from each video + + """ + super(ClassyVideoDataset, self).__init__( + dataset, split, batchsize_per_replica, shuffle, transform, num_samples + ) + # Assignments: + self.clips_per_video = clips_per_video + + @classmethod + def parse_config(cls, config: Dict[str, Any]): + """Parse config to prepare arguments needed by the class constructor.""" + assert "frames_per_clip" in config, "frames_per_clip must be set" + + video_width = config.get("video_width", 0) + video_height = config.get("video_height", 0) + video_min_dimension = config.get("video_min_dimension", 0) + audio_samples = config.get("audio_samples", 0) + step_between_clips = config.get("step_between_clips", 1) + frame_rate = config.get("frame_rate", None) + clips_per_video = config.get("clips_per_video", 1) + + ( + transform_config, + batchsize_per_replica, + shuffle, + num_samples, + ) = super().parse_config(config) + + if not config["split"] == "train": + # At testing time, we do not crop frames but conduct a FCN-style evaluation. + # Video spatial resolution can vary from video to video. So we test one + # video at a time, and NO. of clips in a minibatch should be equal to + # No. of clips sampled from a video + if not batchsize_per_replica == clips_per_video: + logging.warning( + f"For testing, batchsize per replica ({batchsize_per_replica})" + + f"should be equal to clips_per_video ({clips_per_video})" + ) + + return ( + transform_config, + batchsize_per_replica, + shuffle, + num_samples, + config["frames_per_clip"], + video_width, + video_height, + video_min_dimension, + audio_samples, + step_between_clips, + frame_rate, + clips_per_video, + ) + + @classmethod + def load_metadata( + cls, + filepath: str, + video_dir: Optional[str] = None, + update_file_path: bool = False, + ) -> Dict[str, Any]: + """Load pre-computed video dataset meta data. + + Video dataset meta data computation takes minutes on small dataset and hours + on large dataset, and thus is time-consuming. However, it only needs to be + computed once, and can be saved into file. Later we can load the meta data + to reuse it. + + The format of meta data is defined in TorchVision as shown below. + https://github.com/pytorch/vision/blob/master/torchvision/datasets/ + video_utils.py#L131 + + For each video, meta data contains the video file path, presentation + timestamps of all video frames, and video fps. + + Args: + filepath: file path of pre-computed meta data + video_dir: If provided, the folder where video files are stored. + update_file_path: If true, replace the directory part of video file path + in meta data with the actual video directory provided in `video_dir`. + This is needed for successsfully reusing pre-computed meta data + when video directory has been moved and it is no longer consitent + with the full video file path saved in the meta data. + """ + metadata = torch.load(filepath) + if video_dir is not None and update_file_path: + # video path in meta data can be computed in a different root video folder + # If we use a different root video folder, we need to update the video paths + assert os.path.exists(video_dir), "folder does not exist: %s" % video_dir + for idx, video_path in enumerate(metadata["video_paths"]): + # video path template is $VIDEO_DIR/$CLASS_NAME/$VIDEO_FILE + dirname, filename = os.path.split(video_path) + _, class_name = os.path.split(dirname) + metadata["video_paths"][idx] = os.path.join( + video_dir, class_name, filename + ) + return metadata + + @classmethod + def save_metadata(cls, metadata: Dict[str, Any], filepath: str): + """Save dataset meta data into a file. + + Args: + metadata: dataset meta data, which contains video meta infomration, such + as video file path, video fps, video frame timestamp in each video. + For the format of dataset meta data, check the TorchVision + documentations below. + https://github.com/pytorch/vision/blob/master/torchvision/datasets + /video_utils.py#L132-L137 + + filepath: file path where the meta data will be saved + + """ + filedir = os.path.dirname(filepath) + if not os.path.exists(filedir): + try: + os.mkdirs(filedir) + except Exception as err: + logging.warn(f"Fail to create folder: {filedir}") + raise err + logging.info(f"Save metadata to file: {filedir}") + try: + torch.save(metadata, filepath) + except ValueError: + logging.warn(f"Fail to save metadata to file: {filepath}") + + @property + def video_clips(self): + """Attribute video_clips. + + It is used in `_get_sampler` method. Its data type should be + torchvision.datasets.video_utils.VideoClips. + """ + return self.dataset.video_clips + + def _get_sampler(self, epoch) -> "DistributedSampler": + if self.split == "train": + # For video model training, we don't necessarily want to use all possible + # clips in the video in one training epoch. More often, we randomly + # sample at most N clips per training video. In practice, N is often 1 + clip_sampler = RandomClipSampler(self.video_clips, self.clips_per_video) + else: + # For video model testing, we sample N evenly spaced clips per test + # video. We will simply average predictions over them + clip_sampler = UniformClipSampler(self.video_clips, self.clips_per_video) + clip_sampler = MaxLengthClipSampler(clip_sampler, num_samples=self.num_samples) + world_size = get_world_size() + rank = get_rank() + sampler = DistributedSampler( + clip_sampler, + num_replicas=world_size, + rank=rank, + shuffle=self.shuffle, + group_size=self.clips_per_video, + ) + sampler.set_epoch(epoch) + return sampler + + def iterator(self, *args, **kwargs): + """Overrides the implementation in parent class `ClassyDataset`. + + You can check all the usable positional and keyword arguments in parent + class `ClassyDataset.iterator(...)`. + For video dataset, it may use VideoClips class from TorchVision, + which may use a cpp python extension for video decoding when video backend + is set to `video_reader`. In such case, it is difficult to use "spawning" + as multiprocessing start method. Thus we choose "fork" as multiprocessing + start method. + """ + if "num_workers" in kwargs and kwargs["num_workers"] > 0: + mp = multiprocessing.get_context("fork") + kwargs["multiprocessing_context"] = mp + return super(ClassyVideoDataset, self).iterator(*args, **kwargs) diff --git a/classy_vision/dataset/core/__init__.py b/classy_vision/dataset/core/__init__.py new file mode 100644 index 0000000000..a7f5c1d4ea --- /dev/null +++ b/classy_vision/dataset/core/__init__.py @@ -0,0 +1,17 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from .list_dataset import ListDataset +from .random_image_datasets import RandomImageBinaryClassDataset, RandomImageDataset +from .random_video_datasets import RandomVideoDataset + + +__all__ = [ + "ListDataset", + "RandomImageBinaryClassDataset", + "RandomImageDataset", + "RandomVideoDataset", +] diff --git a/classy_vision/dataset/core/list_dataset.py b/classy_vision/dataset/core/list_dataset.py new file mode 100644 index 0000000000..3fe1d77c89 --- /dev/null +++ b/classy_vision/dataset/core/list_dataset.py @@ -0,0 +1,44 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from torchvision.datasets.folder import default_loader + + +class ListDataset: + """Dataset that loads data using a list of items, a corresponding loader, + and a list of metadata. The default loader is an image file loader so this + dataset can be used directly with a list of image files. + You can use it without metadata if you set metadata parameter to None + """ + + def __init__(self, files, metadata, loader=default_loader): + """ + metadata (List[Dict[Type]] or List[Type], Optional): + metadata to be added to each sample. + The Type can be anything that pytorch default_collate can handle. + If Type is tensor, make sure that the tensors are of same dimension. + """ + if metadata is not None: + assert isinstance(metadata, list), "metadata should be a list" + assert len(files) == len(metadata) + assert len(files) > 0, "Empty ListDataset is not allowed" + if not isinstance(metadata[0], dict): + metadata = [{"target": target} for target in metadata] + self.files = files + self.metadata = metadata + self.loader = loader + + def __getitem__(self, idx): + assert idx >= 0 and idx < len(self) + img = self.loader(self.files[idx]) + item = {"input": img} + if self.metadata is not None: + item.update(self.metadata[idx]) + + return item + + def __len__(self): + return len(self.files) diff --git a/classy_vision/dataset/core/random_image_datasets.py b/classy_vision/dataset/core/random_image_datasets.py new file mode 100644 index 0000000000..a084c044e9 --- /dev/null +++ b/classy_vision/dataset/core/random_image_datasets.py @@ -0,0 +1,88 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from enum import Enum + +import numpy as np +from PIL import Image + +from ...generic.util import numpy_seed + + +class SampleType(Enum): + DICT = "dict" + TUPLE = "tuple" + LIST = "list" + + +def _get_typed_sample(input, target, sample_type): + if sample_type == SampleType.DICT: + return {"input": input, "target": target} + elif sample_type == SampleType.TUPLE: + return (input, target) + elif sample_type == SampleType.LIST: + return [input, target] + else: + raise TypeError("Provided sample_type is not dict, list, tuple") + + +class RandomImageDataset: + def __init__( + self, + crop_size, + num_channels, + num_classes, + num_samples, + seed, + sample_type=SampleType.DICT, + ): + self.crop_size = crop_size + self.num_channels = num_channels + self.num_classes = num_classes + self.num_samples = num_samples + self.seed = seed + self.sample_type = sample_type + + def __getitem__(self, idx): + with numpy_seed(self.seed + idx): + input = Image.fromarray( + ( + np.random.standard_normal( + [self.crop_size, self.crop_size, self.num_channels] + ) + * 255 + ).astype(np.uint8) + ) + target = np.random.randint(self.num_classes) + return _get_typed_sample(input, target, self.sample_type) + + def __len__(self): + return self.num_samples + + +class RandomImageBinaryClassDataset: + def __init__( + self, crop_size, class_ratio, num_samples, seed, sample_type=SampleType.DICT + ): + self.crop_size = crop_size + # User Defined Class Imbalace Ratio + self.class_ratio = class_ratio + self.num_samples = num_samples + self.seed = seed + self.sample_type = sample_type + + def __getitem__(self, idx): + with numpy_seed(self.seed + idx): + class_id = int(np.random.random() < self.class_ratio) + image = np.zeros((self.crop_size, self.crop_size, 3)) + image[:, :, class_id] = np.random.random([self.crop_size, self.crop_size]) + image[:, :, 2] = np.random.random([self.crop_size, self.crop_size]) + input = Image.fromarray((image * 255).astype(np.uint8)) + target = class_id + return _get_typed_sample(input, target, self.sample_type) + + def __len__(self): + return self.num_samples diff --git a/classy_vision/dataset/core/random_video_datasets.py b/classy_vision/dataset/core/random_video_datasets.py new file mode 100644 index 0000000000..396801ec63 --- /dev/null +++ b/classy_vision/dataset/core/random_video_datasets.py @@ -0,0 +1,70 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import torch + +from ...generic.util import torch_seed + + +class RandomVideoDataset: + def __init__( + self, + num_classes, + split, + num_samples, + frames_per_clip, + video_width, + video_height, + audio_samples, + clips_per_video, + seed=10, + ): + self.num_classes = num_classes + self.split = split + # video config + self.video_channels = 3 + self.num_samples = num_samples + self.frames_per_clip = frames_per_clip + self.video_width = video_width + self.video_height = video_height + # audio config + self.audio_samples = audio_samples + self.clips_per_video = clips_per_video + # misc config + self.seed = seed + + def __getitem__(self, idx): + if self.split == "train": + # assume we only sample 1 clip from each training video + target_seed_offset = idx + else: + # for video model testing, clips from the same video share the same + # target label + target_seed_offset = idx // self.clips_per_video + with torch_seed(self.seed + target_seed_offset): + target = torch.randint(0, self.num_classes, (1,)).item() + + with torch_seed(self.seed + idx): + return { + "input": { + "video": torch.randint( + 0, + 256, + ( + self.frames_per_clip, + self.video_height, + self.video_width, + self.video_channels, + ), + dtype=torch.uint8, + ), + "audio": torch.rand((self.audio_samples, 1), dtype=torch.float), + }, + "target": target, + } + + def __len__(self): + return self.num_samples diff --git a/classy_vision/dataset/generic/hive.py b/classy_vision/dataset/generic/hive.py new file mode 100644 index 0000000000..8d01893a6e --- /dev/null +++ b/classy_vision/dataset/generic/hive.py @@ -0,0 +1,123 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + + +import logging +import os +import sys + +# NOTE: Edit /usr/local/jdk-8u60-64/jre/lib/logging.properties to hide all logs +import metastore +from hiveio import par_init +from hiveio_cpp import hiveio + + +# set classpath: +par_init.install_class_path() + + +def download_from_hive( + namespace, + tablename, + everstore_column, + label_column, + partition_column, + partition_column_values, +): + """ + Function to load IDs from Hive. + """ + # disable HiveIO info logs + os.environ["GLOG_minloglevel"] = "3" + # set partition name and columns: + partitions = [ + "{0}={1}".format(partition_column, partition_column_value) + for partition_column_value in partition_column_values + ] + columns = [everstore_column, label_column] + + # check metastore for number of rows: + sys.argv = ["."] + ms = metastore.metastore(namespace=namespace) + num_rows = 0 + for partition in partitions: + assert ms.exists_partition( + tablename, partition + ), "partition not found: {0}/{1}".format(tablename, partition) + + p = ms.get_partition(tablename, partition) + num_rows += int(p.parameters["numRows"]) + + # start HiveIO reader: + batch_size = 10000 + hiveio.start_reading( + namespace=namespace, + table=tablename, + partitions=partitions, + column_names=columns, + batch_size=batch_size, + max_queued_batches=10, + ) + + # read all data: + handles, labels, cnt = [None] * num_rows, [None] * num_rows, 0 + while True: + # read and process batch: + batch = hiveio.get_batch() + if batch == [] or cnt + len(batch) > num_rows: + break + cur_handles = [val[0] for val in batch] + cur_labels = [val[1] for val in batch] + + # store data: + handles[cnt : cnt + len(batch)] = cur_handles + labels[cnt : cnt + len(batch)] = cur_labels + cnt += len(batch) + logging.info("Downloaded {0} of {1} rows.".format(cnt, num_rows)) + + # close reader and return data: + hiveio.stop_reading() + if cnt < num_rows: + handles = handles[0:cnt] + labels = labels[0:cnt] + return handles, labels + + +def get_partition_to_num_rows( + namespace, tablename, partition_column, partition_column_values +): + """ + Helper function to get total num_rows in hive for given + partition_column_values. + """ + partitions = { + "{0}={1}".format(partition_column, partition_column_value) + for partition_column_value in partition_column_values + } + # Setting higher number of retries, as during testing, sometimes default + # "retries" values didn't seem enough in some cases. + ms = metastore.metastore( + namespace=namespace, + meta_only=True, + retries=10, + # timeout in milliseconds. + timeout=1800000, + ) + partition_to_num_rows = {} + + all_partitions = ms.get_partitions(tablename) + for hive_partition in all_partitions: + assert "numRows" in hive_partition.parameters, ( + "numRows not in hive_partition.parameters," + "Do not use Presto tables, only Hive tables!')" + ) + if hive_partition.partitionName in partitions: + patition_column_value = hive_partition.partitionName.split("=")[1] + partition_to_num_rows[patition_column_value] = int( + hive_partition.parameters["numRows"] + ) + + return partition_to_num_rows diff --git a/classy_vision/dataset/image_path_dataset.py b/classy_vision/dataset/image_path_dataset.py new file mode 100644 index 0000000000..f4f2f6c391 --- /dev/null +++ b/classy_vision/dataset/image_path_dataset.py @@ -0,0 +1,130 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import os.path +from typing import Any, Callable, Dict, List, Optional, Union + +import torch +import torchvision.datasets as datasets +import torchvision.transforms as transforms + +from .classy_dataset import ClassyDataset +from .core import ListDataset +from .transforms import build_transforms +from .transforms.classy_transform import ClassyTransform +from .transforms.util import TupleToMapTransform + + +def _load_dataset(image_paths, targets): + if targets is None: + targets = [torch.tensor([]) for _ in image_paths] + if isinstance(image_paths, str): + assert os.path.isdir( + image_paths + ), "Expect image_paths to be a dir when it is a string" + dataset = datasets.ImageFolder(image_paths) + preproc_transform = TupleToMapTransform(list_of_map_keys=["input", "target"]) + else: + dataset = ListDataset(image_paths, targets) + preproc_transform = None + return dataset, preproc_transform + + +class ImagePathDataset(ClassyDataset): + """Dataset which reads images from a local filesystem. Implements ClassyDataset. + + The image paths provided can be: + - A single directory location, in which case the data is expected to be + arranged in a format similar to :class:`torchvision.datasets.ImageFolder`. + The targets will be inferred from the directory structure. + - A list of paths, in which case the list will contain the paths to all the + images. In this situation, the targets can be specified by the targets + argument. + """ + + def __init__( + self, + batchsize_per_replica: int, + shuffle: bool, + transform: Optional[Union[ClassyTransform, Callable]], + num_samples: Optional[int], + image_paths: Union[str, List[str]], + targets: Optional[List[Any]] = None, + split: Optional[str] = None, + ): + """Constructor for ImagePathDataset. + + Args: + batchsize_per_replica: Positive integer indicating batch size for each + replica + shuffle: Whether we should shuffle between epochs + transform: Transform to be applied to each sample + num_samples: When set, this restricts the number of samples provided by + the dataset + image_paths: A directory or a list of file paths where images can be found. + targets: If a list of file paths is specified, this argument can + be used to specify a target for each path (must be same length + as list of file paths). If no targets are needed or image_paths is + a directory, then targets should be None. + split: Split of dataset ("train", "test") + + """ + # TODO(@mannatsingh): we should be able to call build_dataset() to create + # datasets from this class. + assert image_paths is not None, "image_paths needs to be provided" + assert targets is None or isinstance(image_paths, list), ( + "targets cannot be specified when image_paths is a directory containing " + "the targets in the directory structure" + ) + dataset, preproc_transform = _load_dataset(image_paths, targets) + super().__init__( + dataset, split, batchsize_per_replica, shuffle, transform, num_samples + ) + # Some of the base datasets from _load_dataset have different + # sample formats, the preproc_transform should map them all to + # the dict {"input": img, "target": label} format + if preproc_transform is not None: + self.transform = transforms.Compose([preproc_transform, self.transform]) + + @classmethod + def from_config( + cls, + config: Dict[str, Any], + image_paths: Union[str, List[str]], + targets: Optional[List[Any]] = None, + ): + """Instantiates ImagePathDataset from a config. + + Because image_paths / targets can be arbitrarily long, we + allow passing in the image paths and targets from python in + addition to the configuration parameter. + + Args: + config: A configuration for ImagePathDataset. + See :func:`__init__` for parameters expected in the config. + image_paths: Directory or list of image paths. + See :func:`__init__` for more details + targets: Optional list of targets for dataset. + See :func:`__init__` for more details + """ + split = config.get("split") + ( + transform_config, + batchsize_per_replica, + shuffle, + num_samples, + ) = cls.parse_config(config) + + transform = build_transforms(transform_config) + return cls( + batchsize_per_replica, + shuffle, + transform, + num_samples, + image_paths, + targets=targets, + split=split, + ) diff --git a/classy_vision/dataset/transforms/__init__.py b/classy_vision/dataset/transforms/__init__.py new file mode 100644 index 0000000000..812849d73f --- /dev/null +++ b/classy_vision/dataset/transforms/__init__.py @@ -0,0 +1,116 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + + +import copy +from pathlib import Path +from typing import Any, Callable, Dict, List + +import torchvision.transforms as transforms +import torchvision.transforms._transforms_video as transforms_video +from classy_vision.generic.registry_utils import import_all_modules + +from .classy_transform import ClassyTransform + + +FILE_ROOT = Path(__file__).parent + + +TRANSFORM_REGISTRY = {} + + +def build_transform(transform_config: Dict[str, Any]) -> Callable: + """Builds a ClassyTransform from a config. + + This assumes a 'name' key in the config which is used to determine what + transform class to instantiate. For instance, a config `{"name": + "my_transform", "foo": "bar"}` will find a class that was registered as + "my_transform" (see :func:`register_transform`) and call .from_config on + it. + + In addition to transforms registered with :func:`register_transform`, we + also support instantiating transforms available in the + `torchvision.transforms` module. Any keys in the config will get expanded + to parameters of the transform constructor. For instance, the following + call will instantiate a :class:`torchvision.transforms.CenterCrop`: + + build_transform({"name": "CenterCrop", "size": 224}) + """ + assert ( + "name" in transform_config + ), f"name not provided for transform: {transform_config}" + name = transform_config["name"] + transform_args = copy.deepcopy(transform_config) + del transform_args["name"] + if name in TRANSFORM_REGISTRY: + return TRANSFORM_REGISTRY[name].from_config(transform_args) + # the name should be available in torchvision.transforms + assert hasattr(transforms, name) or hasattr(transforms_video, name), ( + f"{name} isn't a registered tranform" + ", nor is it available in torchvision.transforms" + ) + if hasattr(transforms, name): + return getattr(transforms, name)(**transform_args) + else: + return getattr(transforms_video, name)(**transform_args) + + +def build_transforms(transforms_config: List[Dict[str, Any]]) -> Callable: + """ + Builds a transform from the list of transform configurations. + """ + transform_list = [build_transform(config) for config in transforms_config] + return transforms.Compose(transform_list) + + +def register_transform(name: str): + """Registers a ClassyTransform subclass. + + This decorator allows Classy Vision to instantiate a subclass of + ClassyTransform from a configuration file, even if the class itself is not + part of the Classy Vision framework. To use it, apply this decorator to a + ClassyTransform subclass like this: + + @register_transform("my_transform") + class MyTransform(ClassyTransform): + ... + + To instantiate a transform from a configuration file, see + :func:`build_transform`.""" + + def register_transform_cls(cls: Callable[..., Callable]): + if name in TRANSFORM_REGISTRY: + raise ValueError("Cannot register duplicate transform ({})".format(name)) + TRANSFORM_REGISTRY[name] = cls + return cls + + return register_transform_cls + + +# automatically import any Python files in the transforms/ directory +import_all_modules(FILE_ROOT, "classy_vision.dataset.transforms") + +from .lighting_transform import LightingTransform # isort:skip +from .util import ApplyTransformToKey # isort:skip +from .util import ImagenetAugmentTransform # isort:skip +from .util import ImagenetAugmentTransform # isort:skip +from .util import ImagenetNoAugmentTransform # isort:skip +from .util import GenericImageTransform # isort:skip +from .util import TupleToMapTransform # isort:skip + + +__all__ = [ + "ClassyTransform", + "ImagenetAugmentTransform", + "ImagenetNoAugmentTransform", + "GenericImageTransform", + "ApplyTransformToKey", + "TupleToMapTransform", + "LightingTransform", + "register_transform", + "build_transform", + "build_transforms", +] diff --git a/classy_vision/dataset/transforms/classy_transform.py b/classy_vision/dataset/transforms/classy_transform.py new file mode 100644 index 0000000000..589800d050 --- /dev/null +++ b/classy_vision/dataset/transforms/classy_transform.py @@ -0,0 +1,32 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from abc import ABC, abstractmethod +from typing import Any, Dict + + +class ClassyTransform(ABC): + """ + Class representing a data transform abstraction. + + Data transform is most often needed to pre-process input data (e.g. image, video) + before sending it to a model. But it can also be used for other purposes. + """ + + @abstractmethod + def __call__(self, image): + """ + The interface `__call__` is used to transform the input data. It should contain + the actual implementation of data transform. + + Args: + image: input image data + """ + pass + + @classmethod + def from_config(cls, config: Dict[str, Any]): + return cls(**config) diff --git a/classy_vision/dataset/transforms/lighting_transform.py b/classy_vision/dataset/transforms/lighting_transform.py new file mode 100644 index 0000000000..2d5de21399 --- /dev/null +++ b/classy_vision/dataset/transforms/lighting_transform.py @@ -0,0 +1,67 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import torch + +from . import register_transform +from .classy_transform import ClassyTransform + + +_IMAGENET_EIGEN_VAL = [0.2175, 0.0188, 0.0045] +_IMAGENET_EIGEN_VEC = [ + [-144.7125, 183.396, 102.2295], + [-148.104, -1.1475, -207.57], + [-148.818, -177.174, 107.1765], +] + +_DEFAULT_COLOR_LIGHTING_STD = 0.1 + + +@register_transform("lighting") +class LightingTransform(ClassyTransform): + """ + Lighting noise(AlexNet - style PCA - based noise). + This trick was originally used in AlexNet paper + https://papers.nips.cc/paper/4824-imagenet-classification + -with-deep-convolutional-neural-networks.pdf + + The eigen values and eigen vectors, are taken from caffe2 ImageInputOp.h + (https://github.com/pytorch/pytorch/blob/master/caffe2/image/image_input_op.h#L265). + """ + + def __init__( + self, + alphastd=_DEFAULT_COLOR_LIGHTING_STD, + eigval=_IMAGENET_EIGEN_VAL, + eigvec=_IMAGENET_EIGEN_VEC, + ): + self.alphastd = alphastd + # Divide by 255 as the Lighting operation is expected to be applied + # on `img` pixels ranging between [0.0, 1.0] + self.eigval = torch.tensor(eigval) / 255.0 + self.eigvec = torch.tensor(eigvec) / 255.0 + + def __call__(self, img): + """ + img: (C x H x W) Tensor with values in range [0.0, 1.0] + """ + assert ( + img.min() >= 0.0 and img.max() <= 1.0 + ), "Image should be normalized by 255 and be in range [0.0, 1.0]" + if self.alphastd == 0: + return img + + alpha = img.new().resize_(3).normal_(0, self.alphastd) + rgb = ( + self.eigvec.type_as(img) + .clone() + .mul(alpha.view(1, 3).expand(3, 3)) + .mul(self.eigval.view(1, 3).expand(3, 3)) + .sum(1) + .squeeze() + ) + + return img.add(rgb.view(3, 1, 1).expand_as(img)) diff --git a/classy_vision/dataset/transforms/util.py b/classy_vision/dataset/transforms/util.py new file mode 100644 index 0000000000..06cc8e7cbd --- /dev/null +++ b/classy_vision/dataset/transforms/util.py @@ -0,0 +1,379 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + + +from typing import Any, Callable, Dict, List, Optional, Tuple, Union + +import torchvision.transforms as transforms + +from . import ClassyTransform, build_transforms, register_transform + + +class ImagenetConstants: + """Constant variables related to the image classification. + + MEAN: often used to be subtracted from image RGB value. Computed on ImageNet. + STD: often used to divide the image RGB value after mean centering. Computed + on ImageNet. + CROP_SIZE: the size of image cropping which is often the input to deep network. + RESIZE: the size of rescaled image. + + """ + + MEAN = [0.485, 0.456, 0.406] + STD = [0.229, 0.224, 0.225] + CROP_SIZE = 224 + RESIZE = 256 + + +@register_transform("apply_transform_to_key") +class ApplyTransformToKey: + """Serializable class that applies a transform to a key specified field in samples. + """ + + def __init__(self, transform: Callable, key: Union[int, str] = "input") -> None: + """The constructor method of ApplyTransformToKey class. + + Args: + transform: a callable function that takes sample data of type dict as input + key: the key in sample whose corresponding value will undergo + the transform + + """ + self.key: Union[int, str] = key + self.transform: Callable = transform + + @classmethod + def from_config(cls, config: Dict[str, Any]): + transform = build_transforms(config["transforms"]) + + return cls(transform=transform, key=config["key"]) + + def __call__( + self, sample: Union[Tuple[Any], Dict[str, Any]] + ) -> Union[Tuple[Any], Dict[str, Any]]: + """Updates sample by applying a transform to the value at the specified key. + + Args: + sample: input sample which will be transformed + + """ + if sample is None: + return sample + + # Asserts + deal with tuple immutability + convert_to_tuple = False + if isinstance(sample, dict): + assert ( + self.key in sample + ), "This transform only supports dicts with key '{}'".format(self.key) + elif isinstance(sample, (tuple, list)): + assert self.key < len( + sample + ), "This transform only supports tuples / lists with key less " + "than {length}, key provided {key}".format(length=len(sample), key=self.key) + # Convert to list for transformation + if isinstance(sample, tuple): + convert_to_tuple = True + sample = list(sample) + + sample[self.key] = self.transform(sample[self.key]) + if convert_to_tuple: + sample = tuple(sample) + + return sample + + +@register_transform("imagenet_augment") +class ImagenetAugmentTransform(ClassyTransform): + """The default image transform with data augmentation. + + It is often useful for training models on Imagenet. It sequentially resizes + the image into a random scale, takes a random spatial cropping, randomly flips + the image horizontally, transforms PIL image data into a torch.Tensor and + normalizes the pixel values by mean subtraction and standard deviation division. + """ + + def __init__( + self, + crop_size: int = ImagenetConstants.CROP_SIZE, + mean: List[float] = ImagenetConstants.MEAN, + std: List[float] = ImagenetConstants.STD, + ): + """The constructor method of ImagenetAugmentTransform class. + + Args: + crop_size: expected output size of random cropping + mean: a 3-tuple denoting the pixel RGB mean + std: a 3-tuple denoting the pixel RGB standard deviation + + """ + self.transform = transforms.Compose( + [ + transforms.RandomResizedCrop(crop_size), + transforms.RandomHorizontalFlip(), + transforms.ToTensor(), + transforms.Normalize(mean=mean, std=std), + ] + ) + + def __call__(self, img): + """Callable function which applies the tranform to the input image. + + Args: + image: input image that will undergo the transform + + """ + return self.transform(img) + + +@register_transform("imagenet_no_augment") +class ImagenetNoAugmentTransform(ClassyTransform): + """The default image transform without data augmentation. + + It is often useful for testing models on Imagenet. It sequentially resizes + the image, takes a central cropping, transforms PIL image data into a + torch.Tensor and normalizes the pixel values by mean subtraction and standard + deviation division. + + """ + + def __init__( + self, + resize: int = ImagenetConstants.RESIZE, + crop_size: int = ImagenetConstants.CROP_SIZE, + mean: List[float] = ImagenetConstants.MEAN, + std: List[float] = ImagenetConstants.STD, + ): + """The constructor method of ImagenetNoAugmentTransform class. + + Args: + resize: expected image size after resizing + crop_size: expected size of central cropping + mean: a 3-tuple denoting the pixel RGB mean + std: a 3-tuple denoting the pixel RGB standard deviation + + """ + self.transform = transforms.Compose( + [ + transforms.Resize(resize), + transforms.CenterCrop(crop_size), + transforms.ToTensor(), + transforms.Normalize(mean=mean, std=std), + ] + ) + + def __call__(self, img): + """Callable function which applies the tranform to the input image. + + Args: + image: input image that will undergo the transform + + """ + return self.transform(img) + + +@register_transform("generic_image_transform") +class GenericImageTransform(ClassyTransform): + """Default transform for images used in the classification task + + This transform does several things. First, it expects a tuple or + list input (torchvision datasets supply tuples / lists). Second, + it applies a user-provided image transforms to the first entry in + the tuple (again, matching the torchvision tuple format). Third, + it transforms the tuple to a dict sample with entries "input" and + "target". + + The defaults are for the standard imagenet augmentations + + This is just a convenience wrapper to cover the common + use-case. You can get the same behavior by composing torchvision + transforms + ApplyTransformToKey + TupleToMap. + + """ + + def __init__( + self, transform: Optional[Callable] = None, split: Optional[str] = None + ): + """Constructor for GenericImageTransfrom + + Args: + transform: A callable or ClassyTransform to be applied to the image only + split: 'train' or 'test'. Only one of the two arguments + should be specified + """ + assert ( + transform is not None or split is not None + ), "One of transform / split must be specified" + assert ( + transform is None or split is None + ), "Only one of transform / split should be specified" + assert split in [None, "train", "test"], ( + "If specified, split should be either 'train' or 'test', " + "instead got {}".format(split) + ) + + if transform is not None: + self._transform = transform + + if split is not None: + self._transform = ( + ImagenetAugmentTransform() + if split == "train" + else ImagenetNoAugmentTransform() + ) + + @classmethod + def from_config(cls, config: Dict[str, Any]): + transform = None + if "transforms" in config: + transform = build_transforms(config["transforms"]) + split = config.get("split") + return cls(transform, split) + + def __call__(self, sample: Tuple[Any]): + """Applied transform to sample + + Args: + sample: A tuple with length >= 2. The first entry should + be the image data, the second entry should be the + target data. + """ + image = sample[0] + transformed_image = self._transform(image) + new_sample = {"input": transformed_image, "target": sample[1]} + # Any additional metadata is just appended under index of tuple + if len(sample) > 2: + for i in range(2, len(sample)): + new_sample[str(i)] = sample[i] + + return new_sample + + +@register_transform("tuple_to_map") +class TupleToMapTransform(ClassyTransform): + """A transform which maps image data from tuple to dict. + + This transform takes a sample of the form (data1, data2, ...) and + returns a sample of the form {key1: data1, key2: data2, ...} + + It is useful for mapping output from datasets like the PyTorch + ImageFolder dataset (tuple) to dict with named data fields. + + If sample is already a dict with the required keys, pass sample through. + + """ + + def __init__(self, list_of_map_keys: List[str]): + """The constructor method of TupleToMapTransform class. + + Args: + list_of_map_keys: a list of dict keys that will be mapped to item + in the input sample of data type list + + """ + self._map_keys = list_of_map_keys + + def __call__(self, sample): + """Transform sample from type tuple to type dict. + + Args: + sample: input sample which will be transformed + + """ + # If already a dict/map with appropriate keys, exit early + if isinstance(sample, dict): + for key in self._map_keys: + assert ( + key in sample + ), "Sample {sample} must be a tuple or a dict with keys {keys}".format( + sample=str(sample), keys=str(self._map_keys) + ) + return sample + + assert len(sample) == len(self._map_keys), ( + "Provided sample tuple must have same number of keys " + "as provided to transform" + ) + output_sample = {} + for idx, s in enumerate(sample): + output_sample[self._map_keys[idx]] = s + + return output_sample + + +DEFAULT_KEY_MAP = TupleToMapTransform(["input", "target"]) + + +def build_field_transform_default_imagenet( + config: Optional[List[Dict[str, Any]]], + default_transform: Optional[Callable] = None, + split: Optional[bool] = None, + key: Union[int, str] = "input", + key_map_transform: Optional[Callable] = DEFAULT_KEY_MAP, +) -> Callable: + """Returns a ApplyTransformToKey which applies a transform on the specified key. + + The transform is built from the config, if it is not None. + + Otherwise, uses one of the two mutually exclusive args: If + default_transform is not None, it is used. If split is not None, + imagenet transforms are used, using augmentation for "train", no + augmentation otherwise. + + This function also provides an additional + function for mapping from tuples (or other keys) to a desired set + of keys + + Args: + config: field transform config + default_transform: used if config is None + split: split for dataset, e.g. "train" or "test" + key: Key to apply transform to + key_map_transform: Used to produce desired map / keys + (e.g. for torchvision datasets, default samples is a + tuple so this argument can be used to map + (input, target) -> {"input": input, "target": target}) + + """ + assert ( + default_transform is None or split is None + ), "Can only specify one of default_transform and split" + if config is None: + if default_transform is not None: + transform = default_transform + elif split is not None: + transform = ( + ImagenetAugmentTransform() + if split == "train" + else ImagenetNoAugmentTransform() + ) + else: + raise ValueError("No transform config provided with no defaults") + else: + transform = build_transforms(config) + + transform = ApplyTransformToKey(transform, key=key) + if key_map_transform is None: + return transform + + return transforms.Compose([key_map_transform, transform]) + + +def default_unnormalize(img): + """Default unnormalization transform which undo the "transforms.Normalize". + + Specially, it cancels out mean subtraction and standard deviation division. + + Args: + img (torch.Tensor): image data to which the transform will be applied + + """ + # TODO T39752655: Allow this to be configurable + img = img.clone() + for channel, std, mean in zip(img, ImagenetConstants.STD, ImagenetConstants.MEAN): + channel.mul_(std).add_(mean) + return img diff --git a/classy_vision/dataset/transforms/util_video.py b/classy_vision/dataset/transforms/util_video.py new file mode 100644 index 0000000000..f2cd264006 --- /dev/null +++ b/classy_vision/dataset/transforms/util_video.py @@ -0,0 +1,378 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import random +from typing import Any, Callable, Dict, List, Optional, Union + +import torch +import torchvision.transforms as transforms +import torchvision.transforms._transforms_video as transforms_video + +from . import ClassyTransform, build_transforms, register_transform +from .util import ApplyTransformToKey, ImagenetConstants + + +class VideoConstants: + """Constant variables related to the video classification. + + Use the same mean/std from image classification to enable the parameter + inflation where parameters of 2D conv in image model can be inflated into + 3D conv in video model. + + MEAN: often used to be subtracted from pixel RGB value. + STD: often used to divide the pixel RGB value after mean centering. + SIZE_RANGE: a (min_size, max_size) tuple which denotes the range of + size of the rescaled video clip. + CROP_SIZE: the size of spatial cropping in the video clip. + """ + + MEAN = ImagenetConstants.MEAN # + STD = ImagenetConstants.STD + SIZE_RANGE = (128, 160) + CROP_SIZE = 112 + + +def _get_rescaled_size(scale, h, w): + if h < w: + new_h = scale + new_w = int(scale * w / h) + else: + new_w = scale + new_h = int(scale * h / w) + return new_h, new_w + + +@register_transform("video_clip_random_resize_crop") +class VideoClipRandomResizeCrop(ClassyTransform): + """A video clip transform that is often useful for trainig data. + + Given a size range, randomly choose a size. Rescale the clip so that + its short edge equals to the chosen size. Then randomly crop the video + clip with the specified size. + Such training data augmentation is used in VGG net + (https://arxiv.org/abs/1409.1556). + Also see reference implementation `Kinetics.spatial_sampling` in SlowFast + codebase. + """ + + def __init__( + self, + crop_size: Union[int, List[int]], + size_range: List[int], + interpolation_mode: str = "bilinear", + ): + """The constructor method of VideoClipRandomResizeCrop class. + + Args: + crop_size: int or 2-tuple as the expected output crop_size (height, width) + size_range: the min- and max size + interpolation_mode: Default: "bilinear" + + """ + if isinstance(crop_size, tuple): + assert len(crop_size) == 2, "crop_size should be tuple (height, width)" + self.crop_size = crop_size + else: + self.crop_size = (crop_size, crop_size) + + self.interpolation_mode = interpolation_mode + self.size_range = size_range + + def __call__(self, clip): + """Callable function which applies the tranform to the input clip. + + Args: + clip (torch.Tensor): input clip tensor + + """ + # clip size: C x T x H x W + rand_size = random.randint(self.size_range[0], self.size_range[1]) + new_h, new_w = _get_rescaled_size(rand_size, clip.size()[2], clip.size()[3]) + clip = torch.nn.functional.interpolate( + clip, size=(new_h, new_w), mode=self.interpolation_mode + ) + assert ( + self.crop_size[0] <= new_h and self.crop_size[1] <= new_w + ), "crop size can not be larger than video frame size" + + i = random.randint(0, new_h - self.crop_size[0]) + j = random.randint(0, new_w - self.crop_size[1]) + clip = clip[:, :, i : i + self.crop_size[0], j : j + self.crop_size[1]] + return clip + + +@register_transform("video_clip_resize") +class VideoClipResize(ClassyTransform): + """A video clip transform that is often useful for testing data. + + Given an input size, rescale the clip so that its short edge equals to + the input size while aspect ratio is preserved. + """ + + def __init__(self, size: int, interpolation_mode: str = "bilinear"): + """The constructor method of VideoClipResize class. + + Args: + size: input size + interpolation_mode: Default: "bilinear". See valid values in + (https://pytorch.org/docs/stable/nn.functional.html#torch.nn. + functional.interpolate) + + """ + self.interpolation_mode = interpolation_mode + self.size = size + + def __call__(self, clip): + """Callable function which applies the tranform to the input clip. + + Args: + clip (torch.Tensor): input clip tensor + + """ + # clip size: C x T x H x W + if not min(clip.size()[2], clip.size()[3]) == self.size: + new_h, new_w = _get_rescaled_size(self.size, clip.size()[2], clip.size()[3]) + clip = torch.nn.functional.interpolate( + clip, size=(new_h, new_w), mode=self.interpolation_mode + ) + return clip + + +@register_transform("video_tuple_to_map_transform") +class VideoTupleToMapTransform(ClassyTransform): + """A video transform which maps video data from tuple to dict. + + It takes a sample of the form (video, audio, target) and returns a sample of + the form {"input": {"video" video, "audio": audio}, "target": target}. If + the sample is a map with these keys already present, it will pass the sample + through. + + It's particularly useful for remapping torchvision samples which are + tuples of the form (video, audio, target). + """ + + def __call__(self, sample): + """Callable function which applies the tranform to the input sample data. + + Args: + sample: input sample data that will undergo the transform + + """ + # If sample is a map and already has input / target keys, pass through + if isinstance(sample, dict): + assert "input" in sample and "target" in sample, ( + "Input to tuple to map transform must be a tuple of length 3 " + "or a dict with keys 'input' and 'target'" + ) + assert ( + "video" in sample["input"] and "audio" in sample["input"] + ), "Input data must include video / audio fields" + return sample + + # Should be a tuple (or other sequential) of length 3, transform to map + assert len(sample) == 3, "Sequential must be length 3 for conversion" + video, audio, target = sample + output_sample = {"input": {"video": video, "audio": audio}, "target": target} + return output_sample + + +@register_transform("video_default_augment") +class VideoDefaultAugmentTransform(ClassyTransform): + """This is the default video transform with data augmentation which is useful for + training. + + It sequentially prepares a torch.Tensor of video data, randomly + resizes the video clip, takes a random spatial cropping, randomly flips the + video clip horizontally, and normalizes the pixel values by mean subtraction + and standard deviation division. + + """ + + def __init__( + self, + crop_size: Union[int, List[int]] = VideoConstants.CROP_SIZE, + size_range: List[int] = VideoConstants.SIZE_RANGE, + mean: List[float] = VideoConstants.MEAN, + std: List[float] = VideoConstants.STD, + ): + """The constructor method of VideoDefaultAugmentTransform class. + + Args: + crop_size: expected output crop_size (height, width) + size_range : a 2-tuple denoting the min- and max size + mean: a 3-tuple denoting the pixel RGB mean + std: a 3-tuple denoting the pixel RGB standard deviation + + """ + + self._transform = transforms.Compose( + [ + transforms_video.ToTensorVideo(), + # TODO(zyan3): migrate VideoClipRandomResizeCrop to TorchVision + VideoClipRandomResizeCrop(crop_size, size_range), + transforms_video.RandomHorizontalFlipVideo(), + transforms_video.NormalizeVideo(mean=mean, std=std), + ] + ) + + def __call__(self, video): + """Apply the default transform with data augmentation to video. + + Args: + video: input video that will undergo the transform + + """ + return self._transform(video) + + +@register_transform("video_default_no_augment") +class VideoDefaultNoAugmentTransform(ClassyTransform): + """This is the default video transform without data augmentation which is useful + for testing. + + It sequentially prepares a torch.Tensor of video data, resize the + video clip to have the specified short edge, and normalize the pixel values + by mean subtraction and standard deviation division. + + """ + + def __init__( + self, + size: int = VideoConstants.SIZE_RANGE[0], + mean: List[float] = VideoConstants.MEAN, + std: List[float] = VideoConstants.STD, + ): + """The constructor method of VideoDefaultNoAugmentTransform class. + + Args: + size: the short edge of rescaled video clip + mean: a 3-tuple denoting the pixel RGB mean + std: a 3-tuple denoting the pixel RGB standard deviation + + """ + self._transform = transforms.Compose( + # At testing stage, central cropping is not used because we + # conduct fully convolutional-style testing + [ + transforms_video.ToTensorVideo(), + # TODO(zyan3): migrate VideoClipResize to TorchVision + VideoClipResize(size), + transforms_video.NormalizeVideo(mean=mean, std=std), + ] + ) + + def __call__(self, video): + """Apply the default transform without data augmentation to video. + + Args: + video: input video that will undergo the transform + + """ + return self._transform(video) + + +@register_transform("dummy_audio_transform") +class DummyAudioTransform(ClassyTransform): + """This is a dummy audio transform. + + It ignores actual audio data, and returns an empty tensor. It is useful when + actual audio data is raw waveform and has a varying number of waveform samples + which makes minibatch assembling impossible + + """ + + def __init__(self): + """The constructor method of DummyAudioTransform class. + """ + + pass + + def __call__(self, _audio): + """Callable function which applies the tranform to the input audio data. + + Args: + audio: input audio data that will undergo the dummy transform + + """ + return torch.zeros(0, 1, dtype=torch.float) + + +class ClassyVideoGenericTransform(object): + """This is a generic video transform which includes both video transform + and audio transform. + """ + + def __init__( + self, + config: Optional[Dict[str, List[Dict[str, Any]]]] = None, + split: str = "train", + ): + """The constructor method of ClassyVideoGenericTransform class. + + Args: + config: If provided, it is a dict where key is the data modality, and + value is a dict specifying the transform config + split: the split of the data to which the transform will be applied + """ + self.transforms = { + "video": VideoDefaultAugmentTransform() + if split == "train" + else VideoDefaultNoAugmentTransform(), + "audio": DummyAudioTransform(), + } + if config is not None: + for mode, modal_config in config.items(): + assert mode in ["video", "audio"], ( + "unknown video data modality %s" % mode + ) + self.transforms[mode] = build_transforms(modal_config) + + def __call__(self, video: Dict): + """Callable function which applies the tranform to the input video data. + + Args: + video: input video data that will undergo the transform + + """ + assert isinstance(video, dict), "video data is expected be a dict" + for mode, modal_data in video.items(): + if mode in self.transforms: + video[mode] = self.transforms[mode](modal_data) + return video + + +DEFAULT_KEY_MAP = VideoTupleToMapTransform() + + +def build_video_field_transform_default( + config: Optional[Dict[str, List[Dict[str, Any]]]], + split: str = "train", + key: str = "input", + key_map_transform: Optional[Callable] = DEFAULT_KEY_MAP, +) -> Callable: + """Returns transform that first maps sample to video keys, then + returns a transform on the specified key in dict. + + Converts tuple (list, etc) sample to dict with input / target keys. + For a dict sample, verifies that dict has input / target keys. + For all other samples throws. + + Args: + config: If provided, it is a dict where key is the data modality, and + value is a dict specifying the transform config + split: the split of the data to which the transform will be applied + key: the key in data sample of type dict whose corresponding value will + undergo the transform + key_map_transform: If provided, it is a transform which maps sample of type + tuple to sample of type dict. See default value VideoTupleToMapTransform() + as an example + + """ + transform = ApplyTransformToKey(ClassyVideoGenericTransform(config, split), key=key) + if key_map_transform is None: + return transform + + return transforms.Compose([key_map_transform, transform]) diff --git a/classy_vision/distributed/__init__.py b/classy_vision/distributed/__init__.py new file mode 100644 index 0000000000..734a1eb4e2 --- /dev/null +++ b/classy_vision/distributed/__init__.py @@ -0,0 +1,5 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. diff --git a/classy_vision/distributed/launch_ray.py b/classy_vision/distributed/launch_ray.py new file mode 100755 index 0000000000..6c86a4a854 --- /dev/null +++ b/classy_vision/distributed/launch_ray.py @@ -0,0 +1,221 @@ +#!/usr/bin/env python3 -u +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + + +import logging +import os +import socket +import subprocess +import sys +import time +from argparse import REMAINDER, ArgumentParser +from contextlib import closing + +import ray + + +class NodeLaunchActor: + """Ray actor. The code here will run in each node allocated by Ray.""" + + def run(self, master_addr, master_port, node_rank, dist_world_size, args): + processes = [] + + # set PyTorch distributed related environmental variables + current_env = os.environ.copy() + current_env["MASTER_ADDR"] = master_addr + current_env["MASTER_PORT"] = str(master_port) + current_env["WORLD_SIZE"] = str(dist_world_size) + + if "OMP_NUM_THREADS" not in os.environ and args.nproc_per_node > 1: + current_env["OMP_NUM_THREADS"] = str(1) + print( + "*****************************************\n" + "Setting OMP_NUM_THREADS environment variable for each process " + "to be {} in default, to avoid your system being overloaded, " + "please further tune the variable for optimal performance in " + "your application as needed. \n" + "*****************************************".format( + current_env["OMP_NUM_THREADS"] + ) + ) + + # Set the init_method and rank of the process for distributed training. + for local_rank in range(0, args.nproc_per_node): + # each process's rank + dist_rank = args.nproc_per_node * node_rank + local_rank + current_env["RANK"] = str(dist_rank) + current_env["LOCAL_RANK"] = str(local_rank) + + # spawn the processes + with_python = not args.no_python + cmd = [] + if with_python: + cmd = [sys.executable, "-u"] + if args.module: + cmd.append("-m") + else: + if not args.use_env: + raise ValueError( + "When using the '--no_python' flag, " + "you must also set the '--use_env' flag." + ) + if args.module: + raise ValueError( + "Don't use both the '--no_python' flag" + "and the '--module' flag at the same time." + ) + + cmd.append(args.training_script) + + if not args.use_env: + cmd.append("--local_rank={}".format(local_rank)) + + cmd.extend(args.training_script_args) + process = subprocess.Popen(cmd, env=current_env) + processes.append(process) + + for process in processes: + process.wait() + if process.returncode != 0: + raise subprocess.CalledProcessError( + returncode=process.returncode, cmd=cmd + ) + + def get_node_ip(self): + return ray.services.get_node_ip_address() + + def find_free_port(self): + with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s: + s.bind(("", 0)) + s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + return s.getsockname()[1] + + +def wait_for_gpus(world_size, timeout_secs=3600): + n_gpus = int(ray.cluster_resources().get("GPU", 0)) + elapsed_time = 0 + while n_gpus < world_size: + logging.warning( + f"Not enough GPUs available ({n_gpus} available," + f"need {world_size}), waiting 10 seconds" + ) + time.sleep(10) + elapsed_time += 10 + if elapsed_time > timeout_secs: + raise RuntimeError("Timeout: could not find enough GPUs") + n_gpus = int(ray.cluster_resources().get("GPU", 0)) + + +def parse_args(): + """Helper function parsing the command line options. + @retval ArgumentParser + """ + parser = ArgumentParser( + description="Classy Vision distributed training launch " + "helper utility that will spawn up multiple nodes using Ray" + ) + + # Optional arguments for the launch helper + parser.add_argument( + "--nnodes", + type=int, + default=1, + help="The number of nodes to use for distributed training", + ) + parser.add_argument( + "--nproc_per_node", + type=int, + default=1, + help="The number of processes to launch on each node, " + "for GPU training, this is recommended to be set " + "to the number of GPUs in your system so that " + "each process can be bound to a single GPU.", + ) + parser.add_argument( + "--use_env", + default=False, + action="store_true", + help="Use environment variable to pass " + "'local rank'." + "If set to True, the script will not pass " + "--local_rank as argument, and will instead set LOCAL_RANK.", + ) + parser.add_argument( + "-m", + "--module", + default=False, + action="store_true", + help="Changes each process to interpret the launch script " + "as a python module, executing with the same behavior as" + "'python -m'.", + ) + parser.add_argument( + "--no_python", + default=False, + action="store_true", + help='Do not prepend the training script with "python" - just exec ' + "it directly. Useful when the script is not a Python script.", + ) + # Ray-related arguments + group = parser.add_argument_group("Ray related arguments") + group.add_argument("--ray-address", default="auto", type=str) + + # positional + parser.add_argument( + "training_script", + type=str, + help="The full path to the single GPU training " + "program/script to be launched in parallel, " + "followed by all the arguments for the " + "training script", + ) + + # rest from the training program + parser.add_argument("training_script_args", nargs=REMAINDER) + return parser.parse_args() + + +def main(): + args = parse_args() + + ray.init(address=args.ray_address) + + # world size in terms of number of processes + dist_world_size = args.nproc_per_node * args.nnodes + + wait_for_gpus(dist_world_size) + + # Set up Ray distributed actors. + actor = ray.remote(num_cpus=1, num_gpus=args.nproc_per_node)(NodeLaunchActor) + workers = [actor.remote() for i in range(args.nnodes)] + + # Set worker 0 as the master + master_addr = ray.get(workers[0].get_node_ip.remote()) + master_port = ray.get(workers[0].find_free_port.remote()) + + unfinished = [ + worker.run.remote( + master_addr=master_addr, + master_port=master_port, + node_rank=i, + dist_world_size=dist_world_size, + args=args, + ) + for i, worker in enumerate(workers) + ] + + try: + while len(unfinished) > 0: + finished, unfinished = ray.wait(unfinished) + finished = ray.get(finished) + except Exception as inst: + logging.exception("An error occurred:") + + ray.shutdown() + + +if __name__ == "__main__": + main() diff --git a/classy_vision/generic/__init__.py b/classy_vision/generic/__init__.py new file mode 100644 index 0000000000..734a1eb4e2 --- /dev/null +++ b/classy_vision/generic/__init__.py @@ -0,0 +1,5 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. diff --git a/classy_vision/generic/args.py b/classy_vision/generic/args.py new file mode 100644 index 0000000000..cfec2d1c75 --- /dev/null +++ b/classy_vision/generic/args.py @@ -0,0 +1,47 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from classy_vision.generic.opts import check_generic_args, parse_train_arguments +from classy_vision.generic.util import load_json + + +try: + import hydra + + hydra_available = True +except ImportError: + hydra_available = False + +args = None +config = None + + +if hydra_available: + + @hydra.main(config_path="../hydra/args.yaml") + def _parse_hydra_args(cfg): + # This need to be a separate function which sets globals because hydra doesn't + # support returning from its main function + global args, config + args = cfg + check_generic_args(args) + config = args.config.to_container() + + +def parse_args(): + """Parse arguments. + + Parses the args from argparse. If hydra is installed, uses hydra based args + (experimental). + """ + if hydra_available: + global args, config + _parse_hydra_args() + return args, config + else: + args = parse_train_arguments() + config = load_json(args.config_file) + return args, config diff --git a/classy_vision/generic/debug.py b/classy_vision/generic/debug.py new file mode 100644 index 0000000000..fc9e6f72fe --- /dev/null +++ b/classy_vision/generic/debug.py @@ -0,0 +1,20 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + + +import sys + + +def debug_info(type, value, tb): + if hasattr(sys, "ps1") or not sys.stderr.isatty(): + sys.__excepthook__(type, value, tb) + else: + import traceback + import pdb + + traceback.print_exception(type, value, tb) + print + pdb.post_mortem(tb) diff --git a/classy_vision/generic/distributed_util.py b/classy_vision/generic/distributed_util.py new file mode 100644 index 0000000000..1d802c02c8 --- /dev/null +++ b/classy_vision/generic/distributed_util.py @@ -0,0 +1,147 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import torch + + +# Default to GPU 0 +_cuda_device_index: int = 0 + +# Setting _cuda_device_index to -1 internally implies that we should use CPU +_CPU_DEVICE_INDEX = -1 + + +def convert_to_distributed_tensor(tensor): + """ + For some backends, such as NCCL, communication only works if the + tensor is on the GPU. This helper function converts to the correct + device and returns the tensor + original device. + """ + orig_device = "cpu" if not tensor.is_cuda else "gpu" + if ( + torch.distributed.is_available() + and torch.distributed.get_backend() == torch.distributed.Backend.NCCL + and not tensor.is_cuda + ): + tensor = tensor.cuda() + return (tensor, orig_device) + + +def convert_to_normal_tensor(tensor, orig_device): + """ + For some backends, such as NCCL, communication only works if the + tensor is on the GPU. This converts the tensor back to original device. + """ + if tensor.is_cuda and orig_device == "cpu": + tensor = tensor.cpu() + return tensor + + +def is_distributed_training_run(): + return ( + torch.distributed.is_available() + and torch.distributed.is_initialized() + and (torch.distributed.get_world_size() > 1) + ) + + +def is_master(): + """ + Returns True if this is rank 0 of a distributed training job OR if it is + a single trainer job. Otherwise False. + """ + return get_rank() == 0 + + +def all_reduce_mean(tensor): + """ + Wrapper over torch.distributed.all_reduce for performing mean reduction + of tensor over all processes. + """ + if is_distributed_training_run(): + tensor, orig_device = convert_to_distributed_tensor(tensor) + torch.distributed.all_reduce(tensor, torch.distributed.ReduceOp.SUM) + tensor = tensor / torch.distributed.get_world_size() + tensor = convert_to_normal_tensor(tensor, orig_device) + return tensor + + +def all_reduce_sum(tensor): + """ + Wrapper over torch.distributed.all_reduce for performing sum + reduction of tensor over all processes in both distributed / + non-distributed scenarios. + """ + if is_distributed_training_run(): + tensor, orig_device = convert_to_distributed_tensor(tensor) + torch.distributed.all_reduce(tensor, torch.distributed.ReduceOp.SUM) + tensor = convert_to_normal_tensor(tensor, orig_device) + return tensor + + +def barrier(): + """ + Wrapper over torch.distributed.barrier, returns without waiting + if the distributed process group is not initialized instead of throwing error. + """ + if not torch.distributed.is_available() or not torch.distributed.is_initialized(): + return + torch.distributed.barrier() + + +def get_world_size(): + """ + Simple wrapper for correctly getting worldsize in both distributed + / non-distributed settings + """ + return ( + torch.distributed.get_world_size() + if torch.distributed.is_available() and torch.distributed.is_initialized() + else 1 + ) + + +def get_rank(): + """ + Simple wrapper for correctly getting rank in both distributed + / non-distributed settings + """ + return ( + torch.distributed.get_rank() + if torch.distributed.is_available() and torch.distributed.is_initialized() + else 0 + ) + + +def set_cuda_device_index(idx: int): + global _cuda_device_index + _cuda_device_index = idx + torch.cuda.set_device(_cuda_device_index) + + +def set_cpu_device(): + global _cuda_device_index + _cuda_device_index = _CPU_DEVICE_INDEX + + +def get_cuda_device_index() -> int: + return _cuda_device_index + + +def init_distributed_data_parallel_model(model): + global _cuda_device_index + + if _cuda_device_index == _CPU_DEVICE_INDEX: + # CPU-only model, don't specify device + return torch.nn.parallel.DistributedDataParallel(model, broadcast_buffers=False) + else: + # GPU model + return torch.nn.parallel.DistributedDataParallel( + model, + device_ids=[_cuda_device_index], + output_device=_cuda_device_index, + broadcast_buffers=False, + ) diff --git a/classy_vision/generic/opts.py b/classy_vision/generic/opts.py new file mode 100644 index 0000000000..0029088f51 --- /dev/null +++ b/classy_vision/generic/opts.py @@ -0,0 +1,205 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import argparse +import os + +import torch +import torchvision +from classy_vision.generic.util import is_pos_int + + +def add_generic_args(parser): + """ + Adds generic command-line arguments for convnet training / testing to parser. + """ + parser.add_argument( + "--config_file", type=str, help="path to config file for model", required=True + ) + parser.add_argument( + "--device", + default=None, + type=str, + help="device to use: either 'cpu' or 'gpu'. If unspecified, will use GPU when available and CPU otherwise.", + ) + parser.add_argument( + "--num_workers", + default=4, + type=int, + help="number of dataloading workers (default = 4)", + ) + parser.add_argument( + "--checkpoint_folder", + default="", + type=str, + help="""folder to use for checkpoints: + epochal checkpoints are stored as model_.torch, + latest epoch checkpoint is at checkpoint.torch""", + ) + parser.add_argument( + "--pretrained_checkpoint_folder", + default="", + type=str, + help="""folder to use for pre-trained checkpoints: + epochal checkpoints are stored as model_.torch, + latest epoch checkpoint is at checkpoint.torch, + checkpoint is used for fine-tuning task, and it will + not resume training from the checkpoint""", + ) + parser.add_argument( + "--checkpoint_period", + default=1, + type=int, + help="""Checkpoint every x phases (default 1)""", + ) + parser.add_argument( + "--show_progress", + default=False, + action="store_true", + help="shows progress bar during training / testing", + ) + parser.add_argument( + "--skip_tensorboard", + default=False, + action="store_true", + help="do not perform tensorboard visualization", + ) + parser.add_argument( + "--visdom_server", + default="", + type=str, + help="visdom server to use (default None)", + ) + parser.add_argument( + "--visdom_port", + default=8097, + type=int, + help="port of visdom server (default = 8097)", + ) + parser.add_argument( + "--profiler", + default=False, + action="store_true", + help="specify this argument to profile training code", + ) + parser.add_argument( + "--debug", + default=False, + action="store_true", + help="specify this argument for debugging mode", + ) + parser.add_argument( + "--ignore_checkpoint_config", + default=False, + action="store_true", + help="""specify this argument to ignore + the compatibility of the config (or lack of config) attached + to the checkpoint; this will allow mismatches between + the training specified in the config and the + actual training of the model""", + ) + parser.add_argument( + "--log_freq", + default=5, + type=int, + help="Logging frequency for LossLrMeterLoggingHook (default 5)", + ) + parser.add_argument( + "--image_backend", + default="PIL", + type=str, + help="torchvision image decoder backend (PIL or accimage). Default PIL", + ) + parser.add_argument( + "--video_backend", + default="pyav", + type=str, + help="torchvision video decoder backend (pyav or video_reader). Default pyav", + ) + parser.add_argument( + "--distributed_backend", + default="none", + type=str, + help="""Distributed backend: either 'none' (for non-distributed runs) + or 'ddp' (for distributed runs). Default none.""", + ) + + return parser + + +def check_generic_args(args): + """ + Perform assertions on generic command-line arguments. + """ + + # check types and values: + assert is_pos_int(args.num_workers), "incorrect number of workers" + assert is_pos_int(args.visdom_port), "incorrect visdom port" + assert ( + args.device is None or args.device == "cpu" or args.device == "gpu" + ), "unknown device" + + # check that CUDA is available: + if args.device == "gpu": + assert torch.cuda.is_available(), "CUDA required to train on GPUs" + + # create checkpoint folder if it does not exist: + if args.checkpoint_folder != "" and not os.path.exists(args.checkpoint_folder): + os.makedirs(args.checkpoint_folder, exist_ok=True) + assert os.path.exists(args.checkpoint_folder), ( + "could not create folder %s" % args.checkpoint_folder + ) + + # when in debugging mode, enter debugger upon error: + if args.debug: + import sys + from classy_vision.generic.debug import debug_info + + sys.excepthook = debug_info + + # check visdom server name: + if args.visdom_server != "": + if args.visdom_server.startswith("https://"): + print("WARNING: Visdom does not work over HTTPS.") + args.visdom_server = args.visdom_server[8:] + if not args.visdom_server.startswith("http://"): + args.visdom_server = "http://%s" % args.visdom_server + + # return input arguments: + return args + + +def get_parser(): + """ + Return a standard command-line parser. + """ + parser = argparse.ArgumentParser( + description="""Start a Classy Vision training job. + + This can be used for training on your local machine, using CPU or GPU, and + for distributed training. This script also supports Tensorboard, Visdom and + checkpointing.""" + ) + + parser = add_generic_args(parser) + return parser + + +def parse_train_arguments(parser=None): + """ + Assert and parse the command-line arguments of a given (or default) parser. + """ + + # set input arguments: + if parser is None: + parser = get_parser() + + # parse input arguments: + args = parser.parse_args() + + # assertions: + args = check_generic_args(args) + return args diff --git a/classy_vision/generic/pdb.py b/classy_vision/generic/pdb.py new file mode 100644 index 0000000000..f37f63ff41 --- /dev/null +++ b/classy_vision/generic/pdb.py @@ -0,0 +1,48 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import multiprocessing +import os +import pdb +import sys + + +__all__ = ["set_trace"] + + +_stdin = [None] +_stdin_lock = multiprocessing.Lock() +try: + _stdin_fd = sys.stdin.fileno() +except Exception: + _stdin_fd = None + + +class MultiprocessingPdb(pdb.Pdb): + """A Pdb wrapper that works in a multiprocessing environment. + + Usage: `from fairseq import pdb; pdb.set_trace()` + """ + + def __init__(self): + pdb.Pdb.__init__(self, nosigint=True) + + def _cmdloop(self): + stdin_bak = sys.stdin + with _stdin_lock: + try: + if _stdin_fd is not None: + if not _stdin[0]: + _stdin[0] = os.fdopen(_stdin_fd) + sys.stdin = _stdin[0] + self.cmdloop() + finally: + sys.stdin = stdin_bak + + +def set_trace(): + pdb = MultiprocessingPdb() + pdb.set_trace(sys._getframe().f_back) diff --git a/classy_vision/generic/perf_stats.py b/classy_vision/generic/perf_stats.py new file mode 100644 index 0000000000..8f030b84d1 --- /dev/null +++ b/classy_vision/generic/perf_stats.py @@ -0,0 +1,237 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import logging +from collections import defaultdict, deque +from time import perf_counter +from typing import List, Mapping, Optional, Tuple + +import torch +from torch.cuda import Event as CudaEvent + + +class PerfTimer: + """ + Very simple timing wrapper, with context manager wrapping. + Typical usage: + + with PerfTimer('forward_pass', perf_stats): + model.forward(data) + # ... + with PerfTimer('backward_pass', perf_stats): + model.backward(loss) + # ... + print(perf_stats.report_str()) + + Note that timer stats accumulate by name, so you can as if resume them + by re-using the name. + + You can also use it without context manager, i.e. via start() / stop() directly. + + If supplied PerfStats is constructed with use_cuda_events=True (which is default), + then Cuda events will be added to correctly track time of async execution + of Cuda kernels: + + with PerfTimer('foobar', perf_stats): + some_cpu_work() + schedule_some_cuda_work() + + In example above, the "Host" column will capture elapsed time from the perspective + of the Python process, and "CudaEvent" column will capture elapsed time between + scheduling of Cuda work (within the PerfTimer scope) and completion of this work, + some of which might happen outside the PerfTimer scope. + + If perf_stats is None, using PerfTimer does nothing. + """ + + def __init__(self, timer_name: str, perf_stats: Optional["PerfStats"]): + self.skip: bool = False + if perf_stats is None: + self.skip = True + return + + self.name: str = timer_name + self.elapsed: float = 0.0 + + self._last_interval: float = 0.0 + self._perf_stats: PerfStats = perf_stats + self._is_running: bool = False + + if perf_stats.use_cuda_events(): + self._cuda_event_intervals: List[Tuple[CudaEvent, CudaEvent]] = [] + + def __enter__(self): + self.start() + return self + + def __exit__(self, exc_type, exception, traceback): + self.stop() + if exc_type is None: + # Only record timer value if with-context finished without error + self.record() + + return False # re-raise if there was exception + + def start(self): + if self.skip or self._is_running: + return + + self._last_interval = 0.0 + self._is_running = True + self._start_time: float = perf_counter() + if self._perf_stats.use_cuda_events(): + self._start_event = torch.cuda.Event(enable_timing=True) + self._start_event.record() + + def stop(self): + if self.skip or not self._is_running: + return + + self._last_interval = perf_counter() - self._start_time + self.elapsed += self._last_interval + + if self._perf_stats.use_cuda_events(): + # Two cuda events will measure real GPU time within PerfTimer scope: + end_event = torch.cuda.Event(enable_timing=True) + end_event.record() + self._cuda_event_intervals.append((self._start_event, end_event)) + + self._is_running = False + + def record(self): + if self.skip: + return + assert not self._is_running + self._perf_stats.update_with_timer(self) + + +class PerfMetric: + """ + Encapsulates numerical tracking of a single metric, with a `.update(value)` API. + Under-the-hood this can additionally keep track of sums, (exp.) moving averages, + sum of squares (e.g. for stdev), filtered values, etc. + """ + + # Coefficient for exponential moving average (EMA). + # Value of 0.1 means last 8 values account for ~50% of weight. + EMA_FACTOR = 0.1 + + def __init__(self): + self.last_value: Optional[float] = None + self.smoothed_value: Optional[float] = None + + self.sum_values: float = 0.0 + self.num_updates: int = 0 + + def update(self, value: float): + self.last_value = value + if self.smoothed_value is None: + self.smoothed_value = value + else: + # TODO (T47970762): correct for initialization bias + self.smoothed_value = ( + PerfMetric.EMA_FACTOR * value + + (1.0 - PerfMetric.EMA_FACTOR) * self.smoothed_value + ) + + self.sum_values += value + self.num_updates += 1 + + def get_avg(self): + if self.num_updates == 0: + return 0.0 + else: + return self.sum_values / self.num_updates + + +class PerfStats: + """ + Accumulate stats (from timers) over many iterations + """ + + MAX_PENDING_TIMERS = 1000 + + def __init__(self, use_cuda_events=True): + self._host_stats: Mapping[str, PerfMetric] = defaultdict(PerfMetric) + self._cuda_stats: Mapping[str, PerfMetric] = defaultdict(PerfMetric) + + if use_cuda_events: + if torch.cuda.is_available(): + self._cuda_pending_timers = deque(maxlen=PerfStats.MAX_PENDING_TIMERS) + else: + logging.warning("CUDA unavailable: CUDA events are not logged.") + self._cuda_pending_timers = None + else: + self._cuda_pending_timers = None + + def update_with_timer(self, timer: PerfTimer): + self._host_stats[timer.name].update(timer._last_interval) + + if self.use_cuda_events(): + if len(self._cuda_pending_timers) >= self._cuda_pending_timers.maxlen: + logging.error( + "Too many pending timers. CudaEvent-based stats will be inaccurate!" + ) + else: + self._cuda_pending_timers.append(timer) + self._process_cuda_events() + + def _process_cuda_events(self): + """ + Service pending timers. Dequeue timers and aggregate Cuda time intervals, + until the first "pending" timer (i.e. dependent on a not-yet-ready cuda event). + """ + while len(self._cuda_pending_timers) > 0: + timer = self._cuda_pending_timers[0] + elapsed_cuda = 0.0 + + for ev_start, ev_end in timer._cuda_event_intervals: + if not ev_start.query() or not ev_end.query(): + # Cuda events associated with this timer aren't ready yet, + # stop servicing the queue. + return + # Use seconds (instead of ms) for consistency with "host" timers + elapsed_cuda += ev_start.elapsed_time(ev_end) / 1000.0 + + # All time intervals for this timer are now accounted for. + # Aggregate stats and pop from pending queue. + self._cuda_stats[timer.name].update(elapsed_cuda) + self._cuda_pending_timers.popleft() + + def report_str(self): + """ + Fancy column-aligned human-readable report. + If using Cuda events, calling this invokes cuda.synchronize(), which is needed + to capture pending Cuda work in the report. + """ + if self.use_cuda_events(): + torch.cuda.synchronize() + self._process_cuda_events() + + name_width = max(len(k) for k in self._host_stats.keys()) + + header = ("{:>" + str(name_width + 4) + "s} {:>7s} {:>7s}").format( + "Timer", "Host", "CudaEvent" + ) + row_fmt = "{:>" + str(name_width + 4) + "s}: {:>7.2f} ms {:>7.2f} ms" + + rows = [] + rows.append(header) + for name, metric in self._host_stats.items(): + rows.append( + row_fmt.format( + name, + metric.get_avg() * 1000.0, + self._cuda_stats[name].get_avg() * 1000.0, + ) + ) + return "\n".join(rows) + + def use_cuda_events(self): + return torch.cuda.is_available() and self._cuda_pending_timers is not None + + def __str__(self): + return str((self._host_stats, self._cuda_stats)) diff --git a/classy_vision/generic/profiler.py b/classy_vision/generic/profiler.py new file mode 100644 index 0000000000..052d2b3d3e --- /dev/null +++ b/classy_vision/generic/profiler.py @@ -0,0 +1,337 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import collections.abc as abc +import logging +import operator + +import torch +import torch.nn as nn +from classy_vision.generic.util import get_model_dummy_input, is_leaf, is_on_gpu +from torch.cuda import cudart + + +def profile( + model, + batchsize_per_replica=32, + input_shape=(3, 224, 224), + use_nvprof=False, + input_key=None, +): + """ + Performs CPU or GPU profiling of the specified model on the specified input. + """ + + # assertions: + if use_nvprof: + raise NotImplementedError + # FIXME (mannatsingh): in case of use_nvprof, exit() is called at the end + # and we do not return a profile. + assert is_on_gpu(model), "can only nvprof model that lives on GPU" + logging.info("CUDA profiling: Make sure you are running under nvprof!") + + # input for model: + input = get_model_dummy_input( + model, + input_shape, + input_key, + batchsize=batchsize_per_replica, + non_blocking=False, + ) + # perform profiling: + with torch.no_grad(): + model(input) # warm up CUDA memory allocator and profiler + if use_nvprof: # nvprof profiling (TODO: Can we infer this?) + cudart().cudaProfilerStart() + model(input) + cudart().cudaProfilerStop() + exit() # exit gracefully + else: # regular profiling + with torch.autograd.profiler.profile(use_cuda=True) as profiler: + model(input) + return profiler + + +def _layer_flops(layer, x): + """ + Computes the number of FLOPs required for a single layer. + """ + + # get layer type: + typestr = layer.__repr__() + layer_type = typestr[: typestr.find("(")].strip() + batchsize_per_replica = x.size()[0] + # 2D convolution: + if layer_type in ["Conv2d"]: + out_h = int( + (x.size()[2] + 2 * layer.padding[0] - layer.kernel_size[0]) + / layer.stride[0] + + 1 + ) + out_w = int( + (x.size()[3] + 2 * layer.padding[1] - layer.kernel_size[1]) + / layer.stride[1] + + 1 + ) + return ( + batchsize_per_replica + * layer.in_channels + * layer.out_channels + * layer.kernel_size[0] + * layer.kernel_size[1] + * out_h + * out_w + / layer.groups + ) + + # learned group convolution: + elif layer_type in ["LearnedGroupConv"]: + conv = layer.conv + out_h = int( + (x.size()[2] + 2 * conv.padding[0] - conv.kernel_size[0]) / conv.stride[0] + + 1 + ) + out_w = int( + (x.size()[3] + 2 * conv.padding[1] - conv.kernel_size[1]) / conv.stride[1] + + 1 + ) + count1 = _layer_flops(layer.relu, x) + _layer_flops(layer.norm, x) + count2 = ( + batchsize_per_replica + * conv.in_channels + * conv.out_channels + * conv.kernel_size[0] + * conv.kernel_size[1] + * out_h + * out_w + / layer.condense_factor + ) + return count1 + count2 + + # non-linearities: + elif layer_type in ["ReLU", "Tanh", "Sigmoid", "Softmax"]: + return x.numel() + + # 2D pooling layers: + elif layer_type in ["AvgPool2d", "MaxPool2d"]: + in_h = x.size()[2] + in_w = x.size()[3] + if isinstance(layer.kernel_size, int): + layer.kernel_size = (layer.kernel_size, layer.kernel_size) + kernel_ops = layer.kernel_size[0] * layer.kernel_size[1] + out_h = 1 + int( + (in_h + 2 * layer.padding - layer.kernel_size[0]) / layer.stride + ) + out_w = 1 + int( + (in_w + 2 * layer.padding - layer.kernel_size[1]) / layer.stride + ) + return x.size()[0] * x.size()[1] * out_w * out_h * kernel_ops + + # adaptive avg pool2d + # This is approximate and works only for downsampling without padding + # based on aten/src/ATen/native/AdaptiveAveragePooling.cpp + elif layer_type in ["AdaptiveAvgPool2d"]: + in_h = x.size()[2] + in_w = x.size()[3] + out_h = layer.output_size[0] + out_w = layer.output_size[1] + if out_h > in_h or out_w > in_w: + raise NotImplementedError() + batchsize_per_replica = x.size()[0] + num_channels = x.size()[1] + kh = in_h - out_h + 1 + kw = in_w - out_w + 1 + kernel_ops = kh * kw + return batchsize_per_replica * num_channels * out_h * out_w * kernel_ops + + # linear layer: + elif layer_type in ["Linear"]: + weight_ops = layer.weight.numel() + bias_ops = layer.bias.numel() + return x.size()[0] * (weight_ops + bias_ops) + + # 2D/3D batch normalization: + elif layer_type in ["BatchNorm2d", "BatchNorm3d"]: + return 2 * x.numel() + + # 3D convolution + elif layer_type in ["Conv3d"]: + out_t = int( + (x.size()[2] + 2 * layer.padding[0] - layer.kernel_size[0]) + // layer.stride[0] + + 1 + ) + out_h = int( + (x.size()[3] + 2 * layer.padding[1] - layer.kernel_size[1]) + // layer.stride[1] + + 1 + ) + out_w = int( + (x.size()[4] + 2 * layer.padding[2] - layer.kernel_size[2]) + // layer.stride[2] + + 1 + ) + return ( + batchsize_per_replica + * layer.in_channels + * layer.out_channels + * layer.kernel_size[0] + * layer.kernel_size[1] + * layer.kernel_size[2] + * out_t + * out_h + * out_w + / layer.groups + ) + + # 3D pooling layers + elif layer_type in ["AvgPool3d", "MaxPool3d"]: + in_t = x.size()[2] + in_h = x.size()[3] + in_w = x.size()[4] + if isinstance(layer.kernel_size, int): + layer.kernel_size = ( + layer.kernel_size, + layer.kernel_size, + layer.kernel_size, + ) + if isinstance(layer.padding, int): + layer.padding = (layer.padding, layer.padding, layer.padding) + if isinstance(layer.stride, int): + layer.stride = (layer.stride, layer.stride, layer.stride) + kernel_ops = layer.kernel_size[0] * layer.kernel_size[1] * layer.kernel_size[2] + out_t = 1 + int( + (in_t + 2 * layer.padding[0] - layer.kernel_size[0]) / layer.stride[0] + ) + out_h = 1 + int( + (in_h + 2 * layer.padding[1] - layer.kernel_size[1]) / layer.stride[1] + ) + out_w = 1 + int( + (in_w + 2 * layer.padding[2] - layer.kernel_size[2]) / layer.stride[2] + ) + return batchsize_per_replica * x.size()[1] * out_t * out_h * out_w * kernel_ops + + # dropout layer + elif layer_type in ["Dropout"]: + # At test time, we do not drop values but scale the feature map by the + # dropout ratio + flops = 1 + for dim_size in x.size(): + flops *= dim_size + return flops + + # not implemented: + raise NotImplementedError("FLOPs not implemented for %s layer" % layer_type) + + +def summarize_profiler_info(prof): + """ + Summarizes the statistics in the specified profiler. + """ + + # create sorted list of times per operator: + op2time = {} + for item in prof.key_averages(): + op2time[item.key] = ( + item.cpu_time_total / 1000.0, + item.cuda_time_total / 1000.0, + ) # to milliseconds + op2time = sorted(op2time.items(), key=operator.itemgetter(1), reverse=True) + + # created string containing information: + str = "\n%s\tCPU Time\tCUDA Time\n" % ("Key".rjust(20)) + for (key, value) in op2time: + str += "%s\t%2.5f ms\t%2.5f ms\n" % (key.rjust(20), value[0], value[1]) + return str + + +def _flops_module(module, flops_list): + """ + Convert module into FLOP-counting module. + """ + ty = type(module) + typestring = module.__repr__() + + class FLOPsModule(ty): + orig_type = ty + + def _original_forward(self, *args, **kwargs): + return ty.forward(self, *args, **kwargs) + + def forward(self, *args, **kwargs): + flops_list.append(_layer_flops(self, args[0])) + return self._original_forward(*args, **kwargs) + + def __repr__(self): + return typestring + + return FLOPsModule + + +def modify_forward(model, flops_list): + """ + Modify forward pass to measure FLOPs: + """ + if is_leaf(model): + model.__class__ = _flops_module(model, flops_list) + for child in model.children(): + modify_forward(child, flops_list) + + return model + + +def restore_forward(model): + """ + Restore original forward in model: + """ + if is_leaf(model): + model.__class__ = model.orig_type + for child in model.children(): + restore_forward(child) + + return model + + +def compute_flops(model, input_shape=(3, 244, 244), input_key=None): + """ + Compute the number of FLOPs needed for a forward pass. + """ + + # assertions, input, and upvalue in which we will perform the count: + assert isinstance(model, nn.Module) + if not isinstance(input_shape, abc.Sequence): + return None + input = get_model_dummy_input(model, input_shape, input_key) + flops_list = [] + + # measure FLOPs: + modify_forward(model, flops_list) + try: + model.forward(input) + except NotImplementedError as err: + raise err + finally: + restore_forward(model) + return sum(flops_list) + + +def count_params(model): + """ + Count the number of parameters in a model. + """ + assert isinstance(model, nn.Module) + count = 0 + for child in model.children(): + if is_leaf(child): + if hasattr(child, "_mask"): # for masked modules (like LGC) + count += child._mask.long().sum().item() + # FIXME: BatchNorm parameters in LGC are not counted. + else: # for regular modules + for p in child.parameters(): + count += p.nelement() + else: + count += count_params(child) + return count diff --git a/classy_vision/generic/registry_utils.py b/classy_vision/generic/registry_utils.py new file mode 100644 index 0000000000..9db51158f3 --- /dev/null +++ b/classy_vision/generic/registry_utils.py @@ -0,0 +1,50 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import importlib +import logging +import os +import sys +from pathlib import Path + + +def import_all_modules(root, base_module): + for file in os.listdir(root): + if file.endswith((".py", ".pyc")) and not file.startswith("_"): + module = file[: file.find(".py")] + if module not in sys.modules: + module_name = ".".join([base_module, module]) + importlib.import_module(module_name) + + +def import_all_packages_from_directory(root): + """Automatically imports all packages under the root directory. + + For instance, if your directories look like: + root / foo / __init__.py + root / foo / abc.py + root / bar.py + root / baz / xyz.py + + This function will import the package foo, but not bar or baz.""" + + for file in os.listdir(root): + # Try to import each file in the directory. Our previous implementation + # would look for directories here and see if there's a __init__.py + # under that directory, but that turns out to be unreliable while + # running on AWS: EFS filesystems cache metadata bits so the directory + # and existence checks fail even when the import succeeds. We should + # find a better workaround eventually, but this will do for now. + try: + file = Path(file) + module_name = file.name + # Dots have special meaning in Python packages -- it's a relative + # import or a subpackage. Skip these. + if "." not in module_name and module_name not in sys.modules: + logging.debug(f"Automatically importing {module_name}") + importlib.import_module(module_name) + except ModuleNotFoundError: + pass diff --git a/classy_vision/generic/util.py b/classy_vision/generic/util.py new file mode 100644 index 0000000000..8cda2aa485 --- /dev/null +++ b/classy_vision/generic/util.py @@ -0,0 +1,749 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import argparse +import collections +import contextlib +import json +import logging +import math +import os +import sys +import tempfile +import traceback + +import numpy as np +import torch +import torch.nn as nn +from torch._six import container_abcs + + +# constants: +CHECKPOINT_FILE = "checkpoint.torch" + + +def is_pos_int(number): + """ + Returns True if a number is a positive integer. + """ + return type(number) == int and number >= 0 + + +def is_pos_float(number): + """ + Returns True if a number is a positive float. + """ + return type(number) == float and number >= 0.0 + + +def is_pos_int_list(l): + """ + Returns True if a list contains positive integers + """ + return type(l) == list and all(is_pos_int(n) for n in l) + + +def is_long_tensor(tensor): + """ + Returns True if a tensor is a long tensor. + """ + if torch.is_tensor(tensor): + return tensor.type().endswith("LongTensor") + else: + return False + + +def is_float_tensor(tensor): + """ + Returns True if a tensor is a float tensor. + """ + if torch.is_tensor(tensor): + return tensor.type().endswith("FloatTensor") + else: + return False + + +def is_double_tensor(tensor): + """ + Returns True if a tensor is a double tensor. + """ + if torch.is_tensor(tensor): + return tensor.type().endswith("DoubleTensor") + else: + return False + + +def is_leaf(module): + """ + Returns True if module is leaf in the graph. + """ + assert isinstance(module, nn.Module), "module should be nn.Module" + return len([c for c in module.children()]) == 0 or hasattr(module, "_mask") + + +def is_on_gpu(model): + """ + Returns True if all parameters of a model live on the GPU. + """ + assert isinstance(model, torch.nn.Module) + on_gpu = True + has_params = False + for param in model.parameters(): + has_params = True + if not param.data.is_cuda: + on_gpu = False + return has_params and on_gpu + + +def is_not_none(sample): + """ + Returns True if sample is not None and constituents are not none. + """ + if sample is None: + return False + + if isinstance(sample, (list, tuple)): + if any(s is None for s in sample): + return False + + if isinstance(sample, dict): + if any(s is None for s in sample.values()): + return False + return True + + +def copy_model_to_gpu(model, loss=None): + """ + Copies a model and (optional) loss to GPU and enables cudnn benchmarking. + For multiple gpus training, the model in DistributedDataParallel for + distributed training. + """ + if not torch.backends.cudnn.deterministic: + torch.backends.cudnn.benchmark = True + model = model.cuda() + + if loss is not None: + loss = loss.cuda() + return model, loss + else: + return model + + +def copy_upvalue(value, upvalue): + """ + Iteratively copies a particular value into an upvalue dict. + """ + assert type(value) == type(upvalue), "type of value and upvalue must match" + if type(value) == dict: + upvalue.clear() + for key, val in value.items(): + upvalue[key] = val + elif type(value) == list: + del upvalue[:] + for _, val in value: + upvalue.append(val) + else: + raise BaseException("unsupported upvalue type") + + +def recursive_copy_to_gpu(value, non_blocking=True, max_depth=3, curr_depth=0): + """ + Recursively searches lists, tuples, dicts and copies to GPU if possible. + Note: These are all copies, so if there are two objects that reference + the same object, then after this call, there will be two different objects + referenced on the GPU. + """ + if curr_depth >= max_depth: + raise ValueError("Depth of value object is too deep") + + try: + return value.cuda(non_blocking=non_blocking) + except AttributeError: + if isinstance(value, container_abcs.Sequence): + gpu_val = [] + for val in value: + gpu_val.append( + recursive_copy_to_gpu( + val, + non_blocking=non_blocking, + max_depth=max_depth, + curr_depth=curr_depth + 1, + ) + ) + + return gpu_val if isinstance(value, list) else tuple(gpu_val) + elif isinstance(value, container_abcs.Mapping): + gpu_val = {} + for key, val in value.items(): + gpu_val[key] = recursive_copy_to_gpu( + val, + non_blocking=non_blocking, + max_depth=max_depth, + curr_depth=curr_depth + 1, + ) + + return gpu_val + + raise AttributeError("Value must have .cuda attr or be a Seq / Map iterable") + + +@contextlib.contextmanager +def numpy_seed(seed, *addl_seeds): + """Context manager which seeds the NumPy PRNG with the specified seed and + restores the state afterward""" + if seed is None: + yield + return + if len(addl_seeds) > 0: + seed = int(hash((seed, *addl_seeds)) % 1e6) + state = np.random.get_state() + np.random.seed(seed) + try: + yield + finally: + np.random.set_state(state) + + +def accuracy(output, target, topk=(1,)): + """ + Computes the top-k accuracy in a multi-class problem from an NxK output + matrix and a corresponding Nx1 target matrix with values in 0, ..., K-1. + """ + + # assertions: + assert torch.is_tensor(output) + assert torch.is_tensor(target) + assert output.size(0) == target.size(0) + if type(topk) == int: + topk = (topk,) + assert type(topk) == tuple or type(topk) == list + assert all(is_pos_int(k) for k in topk) + maxk = max(topk) + assert maxk < output.size(1) + + # determine whether predictions are correct: + _, pred = output.topk(maxk, 1, True, True) + pred = pred.t() + correct = pred.eq(target.view(1, -1).expand_as(pred)) + + # compute accuracies: + res = [] + for k in topk: + correct_k = correct[:k].view(-1).float().sum(0, keepdim=True) + res.append(correct_k.mul_(100.0 / target.size(0)).item()) + return res + + +def binary_accuracy(output, target, threshold=0.0): + """ + Computes the accuracy based on a real-valued output tensor and a corresponding + target matrix of the same size with valuesin [0, 1]. + + An optional threshold for positive classification can be specified (default = 0). + """ + + # assertions: + assert torch.is_tensor(output) + assert torch.is_tensor(target) + assert output.size() == target.size() + + # compute accuracy: + correct = output.ge(threshold).type_as(target).eq_(target).long() + return correct.sum().item() * 100.0 / float(target.nelement()) + + +def create_class_histograms(pred_prob, target, num_bins): + """ + Creates two histograms to contain total positive or true samples + for a given class at a given score value and total samples at a + given predicted probability. Then for a given threshold we can compute + precision = true_samples_above_threshold / + total_samples_above_threshold and recall = + true_samples_above_threshold / true_samples. Output is expected to + be prediction probabilities. + + The pred_prob vector is num_classes x batchsize_per_replica, the target vector is + num_classes x 1, should contain integers corresponding to each class. + Output is two tensors of same size: num_bins x num_classes + where the bins are equispaced from 0 to 1 across the score. + The class_hist[:, c] contains true example histogram for class c + prediction probabilities while total_hist[:, c] contains histogram + for all samples for class c prediction probabilities. + """ + + # assertions: + assert torch.is_tensor(pred_prob) + assert torch.is_tensor(target) + assert pred_prob.size()[0] == target.size()[0], "%s pred_prob, %s target" % ( + str(pred_prob.size()), + str(target.size()), + ) + assert ( + pred_prob.lt(0.0).sum() == 0 and pred_prob.gt(1.0).sum() == 0 + ), "Prediction probability must be between 0 and 1" + + num_classes = pred_prob.size()[1] + class_hist = torch.zeros([num_bins, num_classes], dtype=torch.int64) + total_hist = torch.zeros([num_bins, num_classes], dtype=torch.int64) + for c in range(num_classes): + total_hist[:, c] = torch.histc( + pred_prob[:, c], bins=num_bins, min=0.0, max=1.0 + ).long() + class_hist[:, c] = torch.histc( + pred_prob[:, c][(target == c).nonzero().squeeze(1)], + bins=num_bins, + min=0.0, + max=1.0, + ).long() + + return class_hist, total_hist + + +def _find_last_larger_than(target, val_array): + """ + Takes an array and finds the last value larger than the target value. + Returns the index of that value, returns -1 if none exists in array. + """ + ind = -1 + for j in range(len(val_array), 0, -1): + if val_array[j - 1] > target: + ind = j - 1 + break + return ind + + +def calc_ap(prec, recall): + """ + Computes average precision from precision recall curves. Curves + are expected to be same size and 1D. + """ + assert prec.size() == recall.size(), "Precision and recall curves must be same size" + ap = 0.0 + if len(prec) == 0: + return ap + prev_r = 0.0 + prev_p = prec[0] + for p, r in zip(prec, recall): + ap += (r - prev_r) * (p + prev_p) / 2.0 + prev_r = r + prev_p = p + + return ap + + +def compute_pr_curves(class_hist, total_hist): + """ + Computes precision recall curves from the true sample / total + sample histogram tensors. The histogram tensors are num_bins x num_classes + and each column represents a histogram over + prediction_probabilities. + + The two tensors should have the same dimensions. + The two tensors should have nonnegative integer values. + + Returns map of precision / recall values from highest precision to lowest + and the calculated AUPRC (i.e. the average precision). + """ + assert torch.is_tensor(class_hist) and torch.is_tensor( + total_hist + ), "Both arguments must be tensors" + assert ( + class_hist.dtype == torch.int64 and total_hist.dtype == torch.int64 + ), "Both arguments must contain int64 values" + assert ( + len(class_hist.size()) == 2 and len(total_hist.size()) == 2 + ), "Both arguments must have 2 dimensions, (score_bin, class)" + assert ( + class_hist.size() == total_hist.size() + ), """ + For compute_pr_curve, arguments must be of same size. + class_hist.size(): %s + total_hist.size(): %s + """ % ( + str(class_hist.size()), + str(total_hist.size()), + ) + assert ( + class_hist > total_hist + ).sum() == 0, ( + "Invalid. Class histogram must be less than or equal to total histogram" + ) + + num_bins = class_hist.size()[0] + # Cumsum from highest bucket to lowest + cum_class_hist = torch.cumsum(torch.flip(class_hist, dims=(0,)), dim=0).double() + cum_total_hist = torch.cumsum(torch.flip(total_hist, dims=(0,)), dim=0).double() + class_totals = cum_class_hist[-1, :] + + prec_t = cum_class_hist / cum_total_hist + recall_t = cum_class_hist / class_totals + + prec = torch.unbind(prec_t, dim=1) + recall = torch.unbind(recall_t, dim=1) + assert len(prec) == len( + recall + ), "The number of precision curves does not match the number of recall curves" + + final_prec = [] + final_recall = [] + final_ap = [] + for c, prec_curve in enumerate(prec): + recall_curve = recall[c] + assert ( + recall_curve.size()[0] == num_bins and prec_curve.size()[0] == num_bins + ), "Precision and recall curves do not have the correct number of entries" + + # Check if any samples from class were seen + if class_totals[c] == 0: + continue + + # Remove duplicate entries + prev_r = torch.tensor(-1.0).double() + prev_p = torch.tensor(1.1).double() + new_recall_curve = torch.tensor([], dtype=torch.double) + new_prec_curve = torch.tensor([], dtype=torch.double) + for idx, r in enumerate(recall_curve): + p = prec_curve[idx] + # Remove points on PR curve that are invalid + if r.item() <= 0: + continue + + # Remove duplicates (due to empty buckets): + if r.item() == prev_r.item() and p.item() == prev_p.item(): + continue + + # Add points to curve + new_recall_curve = torch.cat((new_recall_curve, r.unsqueeze(0)), dim=0) + new_prec_curve = torch.cat((new_prec_curve, p.unsqueeze(0)), dim=0) + prev_r = r + prev_p = p + + ap = calc_ap(new_prec_curve, new_recall_curve) + final_prec.append(new_prec_curve) + final_recall.append(new_recall_curve) + final_ap.append(ap) + + return {"prec": final_prec, "recall": final_recall, "ap": final_ap} + + +def get_checkpoint_dict(task, input_args): + assert isinstance( + input_args, dict + ), f"Unexpected input_args of type: {type(input_args)}" + return {"input_args": input_args, "classy_state_dict": task.get_classy_state()} + + +# function that tries to load a checkpoint: +def load_checkpoint(checkpoint_folder, device=None, checkpoint_file=CHECKPOINT_FILE): + """ + Loads a state variable from the specified checkpoint folder. + """ + if not checkpoint_folder: + return None + + if device is None: + device = "gpu" if torch.cuda.is_available() else "cpu" + + if not os.path.exists(checkpoint_folder): + logging.warning("Checkpoint folder '%s' not found" % checkpoint_folder) + return None + logging.info("Attempting to load checkpoint from '%s'" % checkpoint_folder) + + # read what the latest model file is: + filename = os.path.join(checkpoint_folder, checkpoint_file) + if not os.path.exists(filename): + logging.warning("Checkpoint file %s not found." % filename) + return None + + # load and return the checkpoint: + if device == "cpu": + checkpoint = torch.load(filename, map_location=torch.device("cpu")) + else: + assert device == "gpu" + # Load model on current device and not on saved device for model. + checkpoint = torch.load( + filename, + map_location=torch.device("cuda:{0}".format(torch.cuda.current_device())), + ) + logging.info(f"Loaded checkpoint from {filename}") + return checkpoint + + +def update_classy_model(model, model_state_dict, reset_heads): + """ + Updates the model with the provided model state dictionary. + + Args: + model: ClassyVisionModel instance to update + model_state_dict: State dict, should be the output of a call to + ClassyVisionModel.get_classy_state(). + reset_heads: if False, uses the heads' state from model_state_dict. + """ + try: + if reset_heads: + current_model_state_dict = model.get_classy_state() + # replace the checkpointed head states with source head states + model_state_dict["model"]["heads"] = current_model_state_dict["model"][ + "heads" + ] + model.set_classy_state(model_state_dict) + logging.info("Model state load successful") + return True + except Exception: + logging.exception("Could not load the model state") + return False + + +def update_classy_state(task, state_dict): + """ + Updates the task with the provided task dictionary. + + Args: + task: ClassyTask instance to update + state_dict: State dict, should be the output of a call to + ClassyTask.get_classy_state(). + """ + logging.info("Loading classy state from checkpoint") + + try: + task.set_classy_state(state_dict) + logging.info("Checkpoint load successful") + return True + except Exception: + logging.exception("Could not load the checkpoint") + + return False + + +def save_checkpoint(checkpoint_folder, state, checkpoint_file=CHECKPOINT_FILE): + """ + Saves a state variable to the specified checkpoint folder. Returns filename + of checkpoint if successful, and False otherwise. + """ + + # make sure that we have a checkpoint folder: + if not os.path.isdir(checkpoint_folder): + try: + os.makedirs(checkpoint_folder) + except BaseException: + logging.warning( + "Could not create folder %s." % checkpoint_folder, exc_info=True + ) + if not os.path.isdir(checkpoint_folder): + return False + + # write checkpoint atomically: + try: + with tempfile.NamedTemporaryFile( + "w+b", dir=checkpoint_folder, delete=False + ) as fwrite: + tmp_fname = fwrite.name + torch.save(state, fwrite.name) + full_filename = os.path.join(checkpoint_folder, checkpoint_file) + os.rename(tmp_fname, full_filename) + os.chmod(full_filename, 0o666) + return full_filename + except BaseException: + logging.warning( + "Did not write checkpoint to %s." % checkpoint_folder, exc_info=True + ) + return False + + +def stepwise_learning_rate(base_lr, epoch, optimizer, epoch_step=30): + """ + Step-wise reduction of learning rate by a factor of 10 every epoch_step epochs. + """ + lr = base_lr * (0.1 ** (epoch // epoch_step)) + for param_group in optimizer.param_groups: + param_group["lr"] = lr + + +def cosine_learning_rate(base_lr, epoch, optimizer, max_epoch=90): + """ + Cosine-shaped reduction of learning rate. + """ + lr = 0.5 * base_lr * (1.0 + math.cos(math.pi * epoch / max_epoch)) + for param_group in optimizer.param_groups: + param_group["lr"] = lr + + +def visualize_image(img, mean=None, std=None): + """ + Make preprocessed image look pretty again to facilitate visualization. + """ + + # assertions: + assert torch.is_tensor(img) and img.dim() == 3 + for val in [mean, std]: + if val is not None: + assert torch.is_tensor(val) + assert val.dim() == 1 and len(val) == img.size(0) + + # undo mean/std normalization: + new_img = img.clone() + if mean is not None and std is not None: + for c in range(new_img.size(0)): + new_img[c].mul_(std[c]).add_(mean[c]) + + # normalize image to be a byte image: + if new_img.max() < 1.1: + new_img.mul_(255.0) + return new_img.byte() + + +def set_proxies(): + """Set proxies to allow downloading of external URLs.""" + os.environ["HTTP_PROXY"] = "http://fwdproxy:8080" + os.environ["HTTPS_PROXY"] = "http://fwdproxy:8080" + os.environ["http_proxy"] = "fwdproxy:8080" + os.environ["https_proxy"] = "fwdproxy:8080" + + +def unset_proxies(): + """Unset proxies to prevent downloading of external URLs.""" + del os.environ["HTTP_PROXY"] + del os.environ["HTTPS_PROXY"] + del os.environ["http_proxy"] + del os.environ["https_proxy"] + + +def flatten_dict(value_dict, prefix="", sep="_"): + """ + Flattens nested dict into (key, val) dict. Used for flattening meters + structure, so that they can be visualized. + """ + items = [] + for k, v in value_dict.items(): + key = prefix + sep + k if prefix else k + if isinstance(v, collections.MutableMapping): + items.extend(flatten_dict(value_dict=v, prefix=key, sep=sep).items()) + else: + items.append((key, v)) + return dict(items) + + +def load_json(json_path): + """ + Loads a json config from a file. + """ + assert os.path.exists(json_path), "Json file %s not found" % json_path + json_file = open(json_path) + json_config = json_file.read() + json_file.close() + try: + config = json.loads(json_config) + except BaseException as err: + raise Exception("Failed to validate config with error: %s" % str(err)) + + return config + + +@contextlib.contextmanager +def torch_seed(seed): + """Context manager which seeds the PyTorch PRNG with the specified seed and + restores the state afterward. Setting seed to None is equivalent to running + the code without the context manager.""" + if seed is None: + yield + return + state = torch.get_rng_state() + torch.manual_seed(seed) + try: + yield + finally: + torch.set_rng_state(state) + + +# NOTE [ Python Traceback Reference Cycle Problem ] +# +# When using sys.exc_info(), it is important to **not** store the exc_info[2], +# which is the traceback, because otherwise you will run into the traceback +# reference cycle problem, i.e., the traceback holding reference to the frame, +# and the frame (which holds reference to all the object in its temporary scope) +# holding reference the traceback. +# +# TODO: aadcock: This is a fork of the PyTorch ExceptionWrapper class +# to facilitate the backfill dataloader until we kill it. Once we kill +# datasets/core/backfill_async_dataset_iterator.py we can kill these functions +class KeyErrorMessage(str): + r"""str subclass that returns itself in repr""" + + def __repr__(self): + return self + + +class ExceptionWrapper(object): + r"""Wraps an exception plus traceback to communicate across threads""" + + def __init__(self, exc_info=None, where="in background"): + # It is important that we don't store exc_info, see + # NOTE [ Python Traceback Reference Cycle Problem ] + if exc_info is None: + exc_info = sys.exc_info() + self.exc_type = exc_info[0] + self.exc_msg = "".join(traceback.format_exception(*exc_info)) + self.where = where + + def reraise(self): + r"""Reraises the wrapped exception in the current thread""" + # Format a message such as: "Caught ValueError in DataLoader worker + # process 2. Original Traceback:", followed by the traceback. + msg = "Caught {} {}.\nOriginal {}".format( + self.exc_type.__name__, self.where, self.exc_msg + ) + if self.exc_type == KeyError: + # KeyError calls repr() on its argument (usually a dict key). This + # makes stack traces unreadable. It will not be changed in Python + # (https://bugs.python.org/issue2651), so we work around it. + msg = KeyErrorMessage(msg) + raise self.exc_type(msg) + + +def convert_to_one_hot(targets, classes): + """ + This function converts target class indices to one-hot vectors, + given the number of classes. + + """ + assert ( + torch.max(targets).item() < classes + ), "Class Index must be less than number of classes" + one_hot_targets = torch.zeros( + (targets.shape[0], classes), dtype=torch.long, device=targets.device + ) + one_hot_targets.scatter_(1, targets.long(), 1) + return one_hot_targets + + +def bind_method_to_class(method, cls): + """ + Binds an already bound method to the provided class. + """ + return method.__func__.__get__(cls) + + +def get_model_dummy_input( + model, input_shape, input_key, batchsize=1, non_blocking=False +): + if isinstance(input_key, list): + # av mode, with multiple input keys + input = {} + for i, key in enumerate(input_key): + shape = (batchsize,) + tuple(input_shape[i]) + cur_input = torch.zeros(shape) + if next(model.parameters()).is_cuda: + cur_input = cur_input.cuda(non_blocking=non_blocking) + input[key] = cur_input + else: + # add a dimension to represent minibatch axis + shape = (batchsize,) + tuple(input_shape) + input = torch.zeros(shape) + if next(model.parameters()).is_cuda: + input = input.cuda(non_blocking=non_blocking) + if input_key: + input = {input_key: input} + return input diff --git a/classy_vision/generic/visualize.py b/classy_vision/generic/visualize.py new file mode 100644 index 0000000000..9a29342890 --- /dev/null +++ b/classy_vision/generic/visualize.py @@ -0,0 +1,223 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import math + +import numpy as np +import torch +import torch.nn.modules as nn +from classy_vision.generic.util import get_model_dummy_input, is_pos_int +from PIL import Image + + +try: + import visdom + from tensorboardX import SummaryWriter +except ImportError: + pass + +# define loss types: +vis = [] # using a list makes this work as an upvalue +UNSUPPORTED_LOSSES = ( + nn.CosineEmbeddingLoss, + nn.PoissonNLLLoss, + nn.KLDivLoss, + nn.TripletMarginLoss, +) +REGRESSION_LOSSES = (nn.L1Loss, nn.SmoothL1Loss, nn.MSELoss) + + +# connection to visdom: +def visdom_connect(server=None, port=None): + """Connects to a visdom server if not currently connected.""" + if not visdom_connected(): + vis.append(visdom.Visdom(server=server, port=port)) + + +# check if we are connected to visdom: +def visdom_connected(): + """Returns True if the client is connected to a visdom server.""" + return ( + len(vis) > 0 + and hasattr(vis[-1], "check_connection") + and vis[-1].check_connection() + ) + + +# function that plots learning curve: +def plot_learning_curves(curves, visdom_server=None, env=None, win=None, title=""): + """Plots the specified dict of learning curves in visdom. Optionally, the + environment, window handle, and title for the visdom plot can be specified. + """ + + if visdom_server is None and visdom_connected(): + visdom_server = vis[-1] + + # return if we are not connected to visdom server: + if not visdom_server or not visdom_server.check_connection(): + print("WARNING: Not connected to visdom. Skipping plotting.") + return + + # assertions: + assert type(curves) == dict + assert all(type(curve) == list for _, curve in curves.items()) + + # remove batch time curves: + _curves = {k: curves[k] for k in curves.keys() if "batch time" not in k} + + # show plot: + X = torch.stack([torch.FloatTensor(curve) for _, curve in _curves.items()], dim=1) + Y = torch.arange(0, X.size(0)) + Y = Y.view(Y.numel(), 1).expand(Y.numel(), X.size(1)) + opts = {"title": title, "legend": list(_curves.keys()), "xlabel": "Epochs"} + return visdom_server.line(X, Y, env=env, win=win, opts=opts) + + +# function that plots loss functions: +def plot_losses(losses, visdom_server=None, env=None, win=None, title=""): + """Constructs a plot of specified losses as function of y * f(x). The losses + are a list of nn.Module losses. Optionally, the environment, window handle, + and title for the visdom plot can be specified. + """ + + if visdom_server is None and visdom_connected(): + visdom_server = vis[-1] + + # return if we are not connected to visdom server: + if not visdom_server or not visdom_server.check_connection(): + print("WARNING: Not connected to visdom. Skipping plotting.") + return + + # assertions: + if isinstance(losses, nn.Module): + losses = [losses] + assert type(losses) == list + assert all(isinstance(loss, nn.Module) for loss in losses) + if any(isinstance(loss, UNSUPPORTED_LOSSES) for loss in losses): + raise NotImplementedError("loss function not supported") + + # loop over all loss functions: + for idx, loss in enumerate(losses): + + # construct scores and targets: + score = torch.arange(-5.0, 5.0, 0.005) + if idx == 0: + loss_val = torch.FloatTensor(score.size(0), len(losses)) + if isinstance(loss, REGRESSION_LOSSES): + target = torch.FloatTensor(score.size()).fill_(0.0) + else: + target = torch.LongTensor(score.size()).fill_(1) + + # compute loss values: + for n in range(0, score.nelement()): + loss_val[n][idx] = loss( + score.narrow(0, n, 1), target.narrow(0, n, 1) + ).item() + + # show plot: + title = str(loss) if title == "" else title + legend = [str(loss) for loss in losses] + opts = {"title": title, "xlabel": "Score", "ylabel": "Loss", "legend": legend} + win = visdom_server.line(loss_val, score, env=env, win=win, opts=opts) + return win + + +def plot_model( + model, size=(3, 224, 224), input_key=None, writer=None, folder="", train=True +): + """Visualizes a model in TensorBoard. + + The TensorBoard writer can be either specified directly via `writer` or can + be specified via a `folder`. + + The model can be run in training or evaluation model via the `train` argument. + + Example usage on devserver: + - Install TensorBoard using: `sudo feature install tensorboard` + - Start TensorBoard using: `tensorboard --port=8098 --logdir ` + """ + + assert ( + writer is not None or folder != "" + ), "must specify SummaryWriter or folder to create SummaryWriter in" + input = get_model_dummy_input(model, size, input_key) + if writer is None: + writer = SummaryWriter(log_dir=folder, comment="Model graph") + with writer: + orig_train = model.training + model.train(train) # visualize model in desired mode + writer.add_graph(model, input_to_model=(input,)) + model.train(orig_train) + + +# function that produces an image map: +def image_map( + mapcoord, dataset, mapsize=5000, imsize=32, unnormalize=None, snap_to_grid=False +): + """Constructs a 2D map of images. + + The 2D coordinates for each of the images are specified in `mapcoord`, the + corresponding images are in `dataset`. Optional arguments set the size of + the map images, the size of the images themselves, the unnormalization + transform, and whether or not to snap images to a grid. + """ + + # assertions: + if type(mapcoord) == np.ndarray: + mapcoord = torch.from_numpy(mapcoord) + assert torch.is_tensor(mapcoord) + if isinstance(dataset, torch.utils.data.dataloader.DataLoader): + dataset = dataset.dataset + assert isinstance(dataset, torch.utils.data.dataset.Dataset) + assert is_pos_int(mapsize) + assert is_pos_int(imsize) + if unnormalize is not None: + assert callable(unnormalize) + + # initialize some variables: + import torchvision.transforms.functional as F + + background = 255 + mapim = torch.ByteTensor(3, mapsize, mapsize).fill_(background) + + # normalize map coordinates: + mapc = mapcoord.add(-mapcoord.min()) + mapc.div_(mapc.max()) + + # loop over images: + for idx in range(len(dataset)): + + # compute grid location: + if snap_to_grid: + y = 1 + int(math.floor(mapc[idx][0] * (mapsize - imsize - 2))) + x = 1 + int(math.floor(mapc[idx][1] * (mapsize - imsize - 2))) + else: + y = 1 + int(math.floor(mapc[idx][0] * (math.floor(mapsize - imsize) - 2))) + x = 1 + int(math.floor(mapc[idx][1] * (math.floor(mapsize - imsize) - 2))) + + # check whether we can overwrite this location: + overwrite = not snap_to_grid + if not overwrite: + segment = mapim.narrow(1, y, imsize).narrow(2, x, imsize) + overwrite = segment.eq(background).all() + + # draw image: + if overwrite: + + # load, unnormalize, and resize image: + image = dataset[idx][0] + if unnormalize is not None: + image = unnormalize(image) + resized_im = F.to_tensor( + F.resize(F.to_pil_image(image), imsize, Image.BILINEAR) + ) + + # place image: + segment = mapim.narrow(1, y, imsize).narrow(2, x, imsize) + segment.copy_(resized_im.mul_(255.0).byte()) + + # return map: + return mapim diff --git a/classy_vision/heads/__init__.py b/classy_vision/heads/__init__.py new file mode 100644 index 0000000000..6047032a14 --- /dev/null +++ b/classy_vision/heads/__init__.py @@ -0,0 +1,91 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import copy +from pathlib import Path + +from classy_vision.generic.registry_utils import import_all_modules + +from .classy_head import ClassyHead + + +FILE_ROOT = Path(__file__).parent + + +HEAD_REGISTRY = {} +HEAD_CLASS_NAMES = set() + + +def register_head(name): + """Registers a ClassyHead subclass. + + This decorator allows Classy Vision to instantiate a subclass of + ClassyHead from a configuration file, even if the class itself is not + part of the Classy Vision framework. To use it, apply this decorator to a + ClassyHead subclass, like this: + + .. code-block:: python + + @register_head("my_head") + class MyHead(ClassyHead): + ... + + To instantiate a head from a configuration file, see + :func:`build_head`.""" + + def register_head_cls(cls): + if name in HEAD_REGISTRY: + raise ValueError("Cannot register duplicate head ({})".format(name)) + if not issubclass(cls, ClassyHead): + raise ValueError( + "Head ({}: {}) must extend ClassyHead".format(name, cls.__name__) + ) + if cls.__name__ in HEAD_CLASS_NAMES: + raise ValueError( + "Cannot register head with duplicate class name ({})".format( + cls.__name__ + ) + ) + HEAD_REGISTRY[name] = cls + HEAD_CLASS_NAMES.add(cls.__name__) + return cls + + return register_head_cls + + +def build_head(config): + """Builds a ClassyHead from a config. + + This assumes a 'name' key in the config which is used to determine what + head class to instantiate. For instance, a config `{"name": "my_head", + "foo": "bar"}` will find a class that was registered as "my_head" + (see :func:`register_head`) and call .from_config on it.""" + + assert "name" in config, "Expect name in config" + assert "unique_id" in config, "Expect a global unique id in config" + assert config["name"] in HEAD_REGISTRY, "unknown head" + name = config["name"] + head_config = copy.deepcopy(config) + del head_config["name"] + return HEAD_REGISTRY[name].from_config(head_config) + + +# automatically import any Python files in the heads/ directory +import_all_modules(FILE_ROOT, "classy_vision.heads") + +from .fully_connected_head import FullyConnectedHead # isort:skip +from .fully_convolutional_linear_head import FullyConvolutionalLinearHead # isort:skip +from .identity_head import IdentityHead # isort:skip + + +__all__ = [ + "ClassyHead", + "FullyConnectedHead", + "FullyConvolutionalLinearHead", + "IdentityHead", + "build_head", + "register_head", +] diff --git a/classy_vision/heads/classy_head.py b/classy_vision/heads/classy_head.py new file mode 100644 index 0000000000..380bb7f838 --- /dev/null +++ b/classy_vision/heads/classy_head.py @@ -0,0 +1,59 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from typing import Any, Dict, Optional + +import torch.nn as nn + + +class ClassyHead(nn.Module): + """ + Base class for heads that can be attached to :class:`ClassyModel`. + + A head is a regular :class:`torch.nn.Module` that can be attached to a + pretrained model. This enables a form of transfer learning: utilizing a + model trained for one dataset to extract features that can be used for + other problems. A head must be attached to a :class:`models.ClassyBlock` + within a :class:`models.ClassyModel`. + """ + + def __init__( + self, unique_id: Optional[str] = None, num_classes: Optional[int] = None + ): + """ + Constructs a ClassyHead. + + Args: + unique_id: A unique identifier for the head. Multiple instances of + the same head might be attached to a model, and unique_id is used + to refer to them. + + num_classes: Number of classes for the head. + """ + super().__init__() + self.unique_id = unique_id or self.__class__.__name__ + self.num_classes = num_classes + + @classmethod + def from_config(cls, config: Dict[str, Any]) -> "ClassyHead": + """Instantiates a ClassyHead from a configuration. + + Args: + config: A configuration for the ClassyHead. + + Returns: + A ClassyHead instance. + """ + raise NotImplementedError + + def forward(self, x): + """ + Performs inference on the head. + + This is a regular PyTorch method, refer to :class:`torch.nn.Module` for + more details + """ + raise NotImplementedError diff --git a/classy_vision/heads/fully_connected_head.py b/classy_vision/heads/fully_connected_head.py new file mode 100644 index 0000000000..1377fd55e9 --- /dev/null +++ b/classy_vision/heads/fully_connected_head.py @@ -0,0 +1,63 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from typing import Any, Dict + +import torch.nn as nn +from classy_vision.generic.util import is_pos_int +from classy_vision.heads import ClassyHead, register_head + + +@register_head("fully_connected") +class FullyConnectedHead(ClassyHead): + """This head defines a 2d average pooling layer + (:class:`torch.nn.AdaptiveAvgPool2d`) followed by a fully connected + layer (:class:`torch.nn.Linear`). + """ + + def __init__(self, unique_id: str, num_classes: int, in_plane: int): + """Constructor for FullyConnectedHead + + Args: + unique_id: A unique identifier for the head. Multiple instances of + the same head might be attached to a model, and unique_id is used + to refer to them. + + num_classes: Number of classes for the head. If None, then the fully + connected layer is not applied. + + in_plane: Input size for the fully connected layer. + """ + super().__init__(unique_id, num_classes) + assert num_classes is None or is_pos_int(num_classes) + assert is_pos_int(in_plane) + self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) + self.fc = None if num_classes is None else nn.Linear(in_plane, num_classes) + + @classmethod + def from_config(cls, config: Dict[str, Any]) -> "FullyConnectedHead": + """Instantiates a FullyConnectedHead from a configuration. + + Args: + config: A configuration for a FullyConnectedHead. + See :func:`__init__` for parameters expected in the config. + + Returns: + A FullyConnectedHead instance. + """ + num_classes = config.get("num_classes", None) + in_plane = config["in_plane"] + return cls(config["unique_id"], num_classes, in_plane) + + def forward(self, x): + # perform average pooling: + out = self.avgpool(x) + + # final classifier: + out = out.reshape(out.size(0), -1) + if self.fc is not None: + out = self.fc(out) + return out diff --git a/classy_vision/heads/fully_convolutional_linear_head.py b/classy_vision/heads/fully_convolutional_linear_head.py new file mode 100644 index 0000000000..622d884815 --- /dev/null +++ b/classy_vision/heads/fully_convolutional_linear_head.py @@ -0,0 +1,132 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from collections.abc import Sequence +from typing import Any, Dict, List, Optional + +import torch.nn as nn +from classy_vision.generic.util import is_pos_int +from classy_vision.heads import ClassyHead, register_head + + +class FullyConvolutionalLinear(nn.Module): + def __init__(self, dim_in, num_classes, act_func="softmax"): + super(FullyConvolutionalLinear, self).__init__() + # Perform FC in a fully convolutional manner. The FC layer will be + # initialized with a different std comparing to convolutional layers. + self.projection = nn.Linear(dim_in, num_classes, bias=True) + + # Softmax for evaluation and testing. + if act_func == "softmax": + self.act = nn.Softmax(dim=4) + elif act_func == "sigmoid": + self.act = nn.Sigmoid() + else: + raise NotImplementedError( + "{} is not supported as an activation" "function.".format(act_func) + ) + + def forward(self, x): + # (N, C, T, H, W) -> (N, T, H, W, C). + x = x.permute((0, 2, 3, 4, 1)) + x = self.projection(x) + # Performs fully convlutional inference. + if not self.training: + x = self.act(x) + x = x.mean([1, 2, 3]) + x = x.view(x.shape[0], -1) + return x + + +@register_head("fully_convolutional_linear") +class FullyConvolutionalLinearHead(ClassyHead): + """ + This head defines a 3d average pooling layer (:class:`torch.nn.AvgPool3d`) + followed by a fully convolutional linear layer. This layer performs a + fully-connected projection during training, when the input size is 1x1x1. + It performs a convolutional projection during testing when the input size + is larger than 1x1x1. + """ + + def __init__( + self, + unique_id: str, + num_classes: int, + in_plane: int, + pool_size: List[int], + activation_func: str, + use_dropout: Optional[bool] = None, + ): + """ + Constructor for FullyConvolutionalLinearHead. + + Args: + unique_id: A unique identifier for the head. Multiple instances of + the same head might be attached to a model, and unique_id is used + to refer to them. + num_classes: Number of classes for the head. + in_plane: Input size for the fully connected layer. + pool_size: Kernel size for the 3d pooling layer. + activation_func: activation function to use. 'softmax': applies + softmax on the output. 'sigmoid': applies sigmoid on the output. + use_dropout: Whether to apply dropout after the pooling layer. + """ + super().__init__(unique_id, num_classes) + self.final_avgpool = nn.AvgPool3d(pool_size, stride=1) + if use_dropout: + self.dropout = nn.Dropout(p=0.5) + # we separate average pooling from the fully-convolutional linear projection + # because for multi-path models such as SlowFast model, the input can be + # more than 1 tesnor. In such case, we can define a new head to combine multiple + # tensors via concat or addition, do average pooling, but still reuse + # FullyConvolutionalLinear inside of it. + self.head_fcl = FullyConvolutionalLinear( + in_plane, num_classes, act_func=activation_func + ) + + @classmethod + def from_config(cls, config: Dict[str, Any]) -> "FullyConvolutionalLinearHead": + """Instantiates a FullyConvolutionalLinearHead from a configuration. + + Args: + config: A configuration for a FullyConvolutionalLinearHead. + See :func:`__init__` for parameters expected in the config. + + Returns: + A FullyConvolutionalLinearHead instance. + """ + required_args = ["pool_size", "in_plane", "num_classes"] + for arg in required_args: + assert arg in config, "argument %s is required" % arg + + config.update({"activation_func": config.get("activation_func", "softmax")}) + config.update({"use_dropout": config.get("use_dropout", False)}) + + assert ( + isinstance(config["pool_size"], Sequence) and len(config["pool_size"]) == 3 + ) + for pool_size_dim in config["pool_size"]: + assert is_pos_int(pool_size_dim) + assert is_pos_int(config["in_plane"]) + assert is_pos_int(config["num_classes"]) + + num_classes = config.get("num_classes", None) + in_plane = config["in_plane"] + return cls( + config["unique_id"], + num_classes, + in_plane, + config["pool_size"], + config["activation_func"], + config["use_dropout"], + ) + + def forward(self, x): + out = self.final_avgpool(x) + if hasattr(self, "dropout"): + out = self.dropout(out) + out = self.head_fcl(out) + return out diff --git a/classy_vision/heads/identity_head.py b/classy_vision/heads/identity_head.py new file mode 100644 index 0000000000..c8dc6e40c0 --- /dev/null +++ b/classy_vision/heads/identity_head.py @@ -0,0 +1,33 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from typing import Any, Dict + +from classy_vision.heads import ClassyHead, register_head + + +@register_head("identity") +class IdentityHead(ClassyHead): + """This head returns the input without changing it. This can + be attached to a model, if the output of the model is the + desired result. + """ + + def forward(self, x): + return x + + @classmethod + def from_config(cls, config: Dict[str, Any]) -> "IdentityHead": + """Instantiates a IdentityHead from a configuration. + + Args: + config: A configuration for a IdentityHead. + See :func:`__init__` for parameters expected in the config. + + Returns: + A IdentityHead instance. + """ + return cls(config["unique_id"]) diff --git a/classy_vision/hooks/__init__.py b/classy_vision/hooks/__init__.py new file mode 100644 index 0000000000..28eda768ea --- /dev/null +++ b/classy_vision/hooks/__init__.py @@ -0,0 +1,42 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from pathlib import Path + +from classy_vision.generic.registry_utils import import_all_modules + +from .checkpoint_hook import CheckpointHook +from .classy_hook import ClassyHook, ClassyHookFunctions +from .exponential_moving_average_model_hook import ExponentialMovingAverageModelHook +from .loss_lr_meter_logging_hook import LossLrMeterLoggingHook +from .model_complexity_hook import ModelComplexityHook +from .model_tensorboard_hook import ModelTensorboardHook +from .profiler_hook import ProfilerHook +from .progress_bar_hook import ProgressBarHook +from .tensorboard_plot_hook import TensorboardPlotHook +from .time_metrics_hook import TimeMetricsHook +from .visdom_hook import VisdomHook + + +__all__ = [ + "CheckpointHook", + "ClassyHook", + "ClassyHookFunctions", + "ExponentialMovingAverageModelHook", + "LossLrMeterLoggingHook", + "TensorboardPlotHook", + "ModelComplexityHook", + "ModelTensorboardHook", + "ProfilerHook", + "ProgressBarHook", + "TimeMetricsHook", + "VisdomHook", +] + +FILE_ROOT = Path(__file__).parent + +# automatically import any Python files in the hooks/ directory +import_all_modules(FILE_ROOT, "classy_vision.hooks") diff --git a/classy_vision/hooks/checkpoint_hook.py b/classy_vision/hooks/checkpoint_hook.py new file mode 100644 index 0000000000..e8f3a300d8 --- /dev/null +++ b/classy_vision/hooks/checkpoint_hook.py @@ -0,0 +1,115 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import logging +import os +import tempfile +from shutil import copy2, move +from typing import Any, Collection, Dict, Optional + +from classy_vision import tasks +from classy_vision.generic.distributed_util import is_master +from classy_vision.generic.util import get_checkpoint_dict, save_checkpoint +from classy_vision.hooks.classy_hook import ClassyHook + + +class CheckpointHook(ClassyHook): + """ + Hook to checkpoint a model's task. + + Saves the checkpoints in checkpoint_folder. + """ + + on_rendezvous = ClassyHook._noop + on_phase_start = ClassyHook._noop + on_sample = ClassyHook._noop + on_forward = ClassyHook._noop + on_loss_and_meter = ClassyHook._noop + on_backward = ClassyHook._noop + on_update = ClassyHook._noop + on_end = ClassyHook._noop + + def __init__( + self, + checkpoint_folder: str, + input_args: Any, + phase_types: Optional[Collection[str]] = None, + checkpoint_period: int = 1, + ) -> None: + """The constructor method of CheckpointHook. + + Args: + checkpoint_folder: Folder to store checkpoints in + input_args: Any arguments to save about the runtime setup. For example, + it is useful to store the config that was used to instantiate the model. + phase_types: If ``phase_types`` is specified, only checkpoint on those phase + types. Each item in ``phase_types`` must be either "train" or "test". + checkpoint_period: Checkpoint at the end of every x phases (default 1) + + """ + super().__init__() + self.checkpoint_folder: str = checkpoint_folder + self.input_args: Any = input_args + if phase_types is None: + phase_types = ["train", "test"] + assert len(phase_types) > 0 and all( + phase_type in ["train", "test"] for phase_type in phase_types + ), "phase_types should contain one or more of ['train', 'test']" + assert ( + isinstance(checkpoint_period, int) and checkpoint_period > 0 + ), "checkpoint period must be positive" + + self.phase_types: Collection[str] = phase_types + self.checkpoint_period: int = checkpoint_period + self.phase_counter: int = 0 + + def _save_checkpoint(self, task, filename): + if getattr(task, "test_only", False): + return + assert os.path.exists( + self.checkpoint_folder + ), "Checkpoint folder '{}' deleted unexpectedly".format(self.checkpoint_folder) + + # save checkpoint: + logging.info("Saving checkpoint to '{}'...".format(self.checkpoint_folder)) + checkpoint_file = save_checkpoint( + self.checkpoint_folder, get_checkpoint_dict(task, self.input_args) + ) + + # make copy of checkpoint that won't be overwritten: + if checkpoint_file: + tmp_dir = tempfile.mkdtemp() + tmp_file = os.path.join(tmp_dir, filename) + copy2(checkpoint_file, tmp_file) + move(tmp_file, os.path.join(self.checkpoint_folder, filename)) + + def on_start( + self, task: "tasks.ClassyTask", local_variables: Dict[str, Any] + ) -> None: + if getattr(task, "test_only", False): + return + if not os.path.exists(self.checkpoint_folder): + err_msg = "Checkpoint folder '{}' does not exist.".format( + self.checkpoint_folder + ) + raise FileNotFoundError(err_msg) + + def on_phase_end( + self, task: "tasks.ClassyTask", local_variables: Dict[str, Any] + ) -> None: + """Checkpoint the task every checkpoint_period phases. + + We do not necessarily checkpoint the task at the end of every phase. + """ + if not is_master() or task.phase_type not in self.phase_types: + return + + self.phase_counter += 1 + if self.phase_counter % self.checkpoint_period != 0: + return + + checkpoint_name = "model_phase-{phase}_end.torch".format(phase=task.phase_idx) + self._save_checkpoint(task, checkpoint_name) diff --git a/classy_vision/hooks/classy_hook.py b/classy_vision/hooks/classy_hook.py new file mode 100644 index 0000000000..33b0ed2461 --- /dev/null +++ b/classy_vision/hooks/classy_hook.py @@ -0,0 +1,174 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from abc import ABC, abstractmethod +from enum import Enum, auto +from typing import Any, Dict + +from classy_vision import tasks + + +class ClassyHookFunctions(Enum): + """ + Enumeration of all the hook functions in the ClassyHook class. + """ + + on_rendezvous = auto() + on_start = auto() + on_phase_start = auto() + on_sample = auto() + on_forward = auto() + on_loss_and_meter = auto() + on_backward = auto() + on_update = auto() + on_phase_end = auto() + on_end = auto() + + +class ClassyHookState: + """Class to store state within instances of ClassyHook. + + Any serializable data can be stored in the instance's attributes. + """ + + def get_classy_state(self) -> Dict[str, Any]: + return self.__dict__ + + def set_classy_state(self, state_dict: Dict[str, Any]): + self.__dict__ = state_dict + + +class ClassyHook(ABC): + """Base class for hooks. + + Hooks allow to inject behavior at different places of the training loop, which + are listed below in the chronological order. + + on_start -> on_phase_start -> on_sample -> on_forward -> on_loss_and_meter -> + on_backward -> on_update -> on_phase_end -> on_end + + Deriving classes should call ``super().__init__()`` and store any state in + ``self.state``. Any state added to this property should be serializable. + E.g. - + + .. code-block:: python + + class MyHook(ClassyHook): + def __init__(self, a, b): + super().__init__() + self.state.a = [1,2,3] + self.state.b = "my_hook" + # the following line is not allowed + # self.state.my_lambda = lambda x: x^2 + + """ + + def __init__(self): + self.state = ClassyHookState() + + def _noop(self, task: "tasks.ClassyTask", local_variables: Dict[str, Any]) -> None: + """Derived classes can set their hook functions to this. + + This is useful if they want those hook functions to not do anything. + + """ + pass + + @classmethod + def name(cls) -> str: + """Returns the name of the class.""" + return cls.__name__ + + @abstractmethod + def on_rendezvous( + self, task: "tasks.ClassyTask", local_variables: Dict[str, Any] + ) -> None: + """Called when the trainers rendezvous.""" + pass + + @abstractmethod + def on_start( + self, task: "tasks.ClassyTask", local_variables: Dict[str, Any] + ) -> None: + """Called at the start of training.""" + pass + + @abstractmethod + def on_phase_start( + self, task: "tasks.ClassyTask", local_variables: Dict[str, Any] + ) -> None: + """Called at the start of each phase.""" + pass + + @abstractmethod + def on_sample( + self, task: "tasks.ClassyTask", local_variables: Dict[str, Any] + ) -> None: + """Called each time trainer obtained a sample from the dataset.""" + pass + + @abstractmethod + def on_forward( + self, task: "tasks.ClassyTask", local_variables: Dict[str, Any] + ) -> None: + """Called each time forward pass is done in the model.""" + pass + + @abstractmethod + def on_loss_and_meter( + self, task: "tasks.ClassyTask", local_variables: Dict[str, Any] + ) -> None: + """Called each time after a loss has been computed and meters are updated.""" + pass + + @abstractmethod + def on_backward( + self, task: "tasks.ClassyTask", local_variables: Dict[str, Any] + ) -> None: + """Called each time a backward step is performed on the loss.""" + pass + + @abstractmethod + def on_update( + self, task: "tasks.ClassyTask", local_variables: Dict[str, Any] + ) -> None: + """Called each time after parameters have been updated by the optimizer.""" + pass + + @abstractmethod + def on_phase_end( + self, task: "tasks.ClassyTask", local_variables: Dict[str, Any] + ) -> None: + """Called at the end of each phase (epoch).""" + pass + + @abstractmethod + def on_end(self, task: "tasks.ClassyTask", local_variables: Dict[str, Any]) -> None: + """Called at the end of training.""" + pass + + def get_classy_state(self) -> Dict[str, Any]: + """Get the state of the ClassyHook. + + The returned state is used for checkpointing. + + Returns: + A state dictionary containing the state of the hook.\ + + """ + return self.state.get_classy_state() + + def set_classy_state(self, state_dict: Dict[str, Any]) -> None: + """Set the state of the ClassyHook. + + Args: + state_dict: The state dictionary. Must be the output of a call to + :func:`get_classy_state`. + + This is used to load the state of the hook from a checkpoint. + + """ + self.state.set_classy_state(state_dict) diff --git a/classy_vision/hooks/exponential_moving_average_model_hook.py b/classy_vision/hooks/exponential_moving_average_model_hook.py new file mode 100644 index 0000000000..762fb5c2e9 --- /dev/null +++ b/classy_vision/hooks/exponential_moving_average_model_hook.py @@ -0,0 +1,127 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import itertools +import logging +from typing import Any, Dict, Iterable, Tuple + +import torch +import torch.nn as nn +from classy_vision.hooks import ClassyHook +from classy_vision.tasks import ClassyTask + + +class ExponentialMovingAverageModelHook(ClassyHook): + """ + Hook which keeps a track of the exponential moving average (EMA) of the model's + parameters and applies the EMA params to the model during the test phases. + + Saving the state in cpu will save gpu memory, but will make training slower since + the model parameters will need to be moved to cpu before the averaging. + + Note: + This hooks stores two additional copies of the model's parameters, which will + increase memory usage significantly. + """ + + on_rendezvous = ClassyHook._noop + on_sample = ClassyHook._noop + on_forward = ClassyHook._noop + on_loss_and_meter = ClassyHook._noop + on_backward = ClassyHook._noop + on_end = ClassyHook._noop + + def __init__( + self, decay: float, consider_bn_buffers: bool = True, device: str = "cpu" + ) -> None: + """The constructor method of ExponentialMovingAverageModelHook. + + Args: + decay: EMA decay factor, should be in [0, 1]. A decay of 0 corresponds to + always using the latest value (no EMA) and a decay of 1 corresponds to + not updating weights after initialization. + consider_bn_buffers: Whether to apply EMA to batch norm buffers + device: Device to store the model state. + """ + super().__init__() + assert 0 <= decay <= 1, "Decay should be between 0 and 1" + assert device in ["cpu", "gpu"], "Device should be one of cpu or gpu" + self.decay: int = decay + self.consider_bn_buffers = consider_bn_buffers + self.device = "cuda" if device == "gpu" else "cpu" + self.state.model_state = {} + self.state.ema_model_state = {} + logging.info( + f"{self.__class__.__name__} initialized with a decay of " + f"{decay} on device {device}" + ) + + def get_model_state_iterator(self, model: nn.Module) -> Iterable[Tuple[str, Any]]: + """Get an iterator over the model state to apply EMA to.""" + iterable = model.named_parameters() + if self.consider_bn_buffers: + # also add batch norm buffers to the list of state params to iterate over + buffers_iterable = ( + (f"{module_name}_buffer_{name}", buffer) + for module_name, module in model.named_modules() + for name, buffer in module.named_buffers() + if isinstance( + module, + (nn.BatchNorm1d, nn.BatchNorm2d, nn.BatchNorm3d, nn.SyncBatchNorm), + ) + ) + iterable = itertools.chain(iterable, buffers_iterable) + return iterable + + def _save_current_model_state(self, model: nn.Module, model_state: Dict[str, Any]): + """Copy the model's state to the provided dict.""" + for name, param in self.get_model_state_iterator(model): + model_state[name] = param.detach().clone().to(device=self.device) + + def on_start(self, task: ClassyTask, local_variables: Dict[str, Any]) -> None: + if self.state.model_state: + # loaded state from checkpoint, do not re-initialize, only move the state + # to the right device + for name in self.state.model_state: + self.state.model_state[name] = self.state.model_state[name].to( + device=self.device + ) + self.state.ema_model_state[name] = self.state.ema_model_state[name].to( + device=self.device + ) + return + self._save_current_model_state(task.base_model, self.state.model_state) + self._save_current_model_state(task.base_model, self.state.ema_model_state) + + def on_phase_start(self, task: ClassyTask, local_variables: Dict[str, Any]) -> None: + # restore the right state depending on the phase type + self.set_model_state(task, use_ema=not task.train) + + def on_phase_end(self, task: ClassyTask, local_variables: Dict[str, Any]) -> None: + if task.train: + # save the current model state since this will be overwritten by the ema + # state in the test phase + self._save_current_model_state(task.base_model, self.state.model_state) + + def on_update(self, task: ClassyTask, local_variables: Dict[str, Any]) -> None: + with torch.no_grad(): + for name, param in self.get_model_state_iterator(task.base_model): + self.state.ema_model_state[ + name + ] = self.decay * self.state.ema_model_state[name] + ( + 1 - self.decay + ) * param.to( + device=self.device + ) + + def set_model_state(self, task: ClassyTask, use_ema: bool) -> None: + """ + Depending on use_ema, set the appropriate state for the model. + """ + model_state = self.state.ema_model_state if use_ema else self.state.model_state + with torch.no_grad(): + for name, param in self.get_model_state_iterator(task.base_model): + param.copy_(model_state[name]) diff --git a/classy_vision/hooks/loss_lr_meter_logging_hook.py b/classy_vision/hooks/loss_lr_meter_logging_hook.py new file mode 100644 index 0000000000..3455d03375 --- /dev/null +++ b/classy_vision/hooks/loss_lr_meter_logging_hook.py @@ -0,0 +1,112 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import logging +from typing import Any, Dict, Optional + +from classy_vision import tasks +from classy_vision.generic.distributed_util import get_rank +from classy_vision.hooks.classy_hook import ClassyHook + + +class LossLrMeterLoggingHook(ClassyHook): + """ + Logs the loss, optimizer LR, and meters. Logs at the end of a phase. + """ + + on_rendezvous = ClassyHook._noop + on_start = ClassyHook._noop + on_phase_start = ClassyHook._noop + on_sample = ClassyHook._noop + on_forward = ClassyHook._noop + on_backward = ClassyHook._noop + on_end = ClassyHook._noop + + def __init__(self, log_freq: Optional[int] = None) -> None: + """The constructor method of LossLrMeterLoggingHook. + + Args: + log_freq: if specified, also logs every ``log_freq`` batches. + + """ + super().__init__() + self.log_freq: Optional[int] = log_freq + + def on_loss_and_meter( + self, task: "tasks.ClassyTask", local_variables: Dict[str, Any] + ) -> None: + """ + Log metrics every log_freq batches, if log_freq is not None. + """ + if self.log_freq is None: + return + batches = len(task.losses) + if batches and batches % self.log_freq == 0: + logging.info("Local unsynced metric values:") + self._log_loss_meters(task, local_variables) + + def on_phase_end( + self, task: "tasks.ClassyTask", local_variables: Dict[str, Any] + ) -> None: + """ + Log the loss, optimizer LR, and meters for the phase. + """ + batches = len(task.losses) + if batches: + # Most trainers will sync meters on phase end, however we + # do not explicitly state this since it is possible for a + # trainer to implement an unsynced end of phase meter or + # for meters to not provide a sync function. + logging.info("End of phase metric values:") + self._log_loss_meters(task, local_variables) + if task.train: + self._log_lr(task, local_variables) + + def on_update( + self, task: "tasks.ClassyTask", local_variables: Dict[str, Any] + ) -> None: + """ + Log the LR every log_freq batches, if log_freq is not None. + """ + if self.log_freq is None: + return + batches = len(task.losses) + if batches and batches % self.log_freq == 0: + self._log_lr(task, local_variables) + + def _log_lr( + self, task: "tasks.ClassyTask", local_variables: Dict[str, Any] + ) -> None: + """ + Compute and log the optimizer LR. + """ + optimizer_lr = task.optimizer.lr + logging.info("Learning Rate: {}\n".format(optimizer_lr)) + + def _log_loss_meters( + self, task: "tasks.ClassyTask", local_variables: Dict[str, Any] + ) -> None: + """ + Compute and log the loss and meters. + """ + + phase_type = task.phase_type + phase_type_idx = task.train_phase_idx if task.train else task.eval_phase_idx + batches = len(task.losses) + + # Loss for the phase + loss = sum(task.losses) / (batches * task.get_batchsize_per_replica()) + + log_strs = [ + "Rank: {}, {} phase: {}, processed batches: {}".format( + get_rank(), phase_type, phase_type_idx, batches + ), + "{} loss: {}".format(phase_type, loss), + "Meters:", + ] + for meter in task.meters: + log_strs.append("{}".format(meter)) + logging.info("\n".join(log_strs)) diff --git a/classy_vision/hooks/model_complexity_hook.py b/classy_vision/hooks/model_complexity_hook.py new file mode 100644 index 0000000000..e965fded7c --- /dev/null +++ b/classy_vision/hooks/model_complexity_hook.py @@ -0,0 +1,56 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import logging +from typing import Any, Dict + +from classy_vision import tasks +from classy_vision.generic.profiler import compute_flops, count_params +from classy_vision.hooks.classy_hook import ClassyHook + + +class ModelComplexityHook(ClassyHook): + """ + Logs the number of paramaters and forward pass FLOPs of the model. + """ + + on_rendezvous = ClassyHook._noop + on_phase_start = ClassyHook._noop + on_sample = ClassyHook._noop + on_forward = ClassyHook._noop + on_loss_and_meter = ClassyHook._noop + on_backward = ClassyHook._noop + on_update = ClassyHook._noop + on_phase_end = ClassyHook._noop + on_end = ClassyHook._noop + + def on_start( + self, task: "tasks.ClassyTask", local_variables: Dict[str, Any] + ) -> None: + """Measure number of parameters and number of FLOPs.""" + try: + num_flops = compute_flops( + task.base_model, + input_shape=task.base_model.input_shape, + input_key=task.base_model.input_key + if hasattr(task.base_model, "input_key") + else None, + ) + if num_flops is None: + logging.info("FLOPs for forward pass: skipped.") + else: + logging.info( + "FLOPs for forward pass: %d MFLOPs" % (float(num_flops) / 1e6) + ) + except NotImplementedError: + logging.warning( + """Model contains unsupported modules: + Could not compute FLOPs for model forward pass. Exception:""", + exc_info=True, + ) + logging.info( + "Number of parameters in model: %d" % count_params(task.base_model) + ) diff --git a/classy_vision/hooks/model_tensorboard_hook.py b/classy_vision/hooks/model_tensorboard_hook.py new file mode 100644 index 0000000000..2d0cbc6ccf --- /dev/null +++ b/classy_vision/hooks/model_tensorboard_hook.py @@ -0,0 +1,79 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import logging +from typing import Any, Dict + +from classy_vision import tasks +from classy_vision.generic.distributed_util import is_master +from classy_vision.generic.visualize import plot_model +from classy_vision.hooks.classy_hook import ClassyHook + + +try: + from tensorboardX import SummaryWriter # noqa F401 + + tbx_available = True +except ImportError: + tbx_available = False + + +class ModelTensorboardHook(ClassyHook): + """ + Shows the model graph in `TensorBoard _. + """ + + on_rendezvous = ClassyHook._noop + on_phase_start = ClassyHook._noop + on_sample = ClassyHook._noop + on_forward = ClassyHook._noop + on_loss_and_meter = ClassyHook._noop + on_backward = ClassyHook._noop + on_update = ClassyHook._noop + on_phase_end = ClassyHook._noop + on_end = ClassyHook._noop + + def __init__(self, tb_writer) -> None: + """The constructor method of ModelTensorboardHook. + + Args: + tb_writer: `Tensorboard SummaryWriter `_ instance + + """ + super().__init__() + if not tbx_available: + raise RuntimeError( + "tensorboardX not installed, cannot use ModelTensorboardHook" + ) + + self.tb_writer = tb_writer + + def on_start( + self, task: "tasks.ClassyTask", local_variables: Dict[str, Any] + ) -> None: + """ + Plot the model on Tensorboard. + """ + # Show model in tensorboard: + logging.info("Showing model graph in TensorBoard...") + + if is_master(): + try: + plot_model( + task.base_model, + size=task.base_model.input_shape, + input_key=task.base_model.input_key + if hasattr(task.base_model, "input_key") + else None, + writer=self.tb_writer, + ) + except Exception: + logging.warn( + "Unable to plot model to tensorboard. Exception: ", exc_info=True + ) diff --git a/classy_vision/hooks/profiler_hook.py b/classy_vision/hooks/profiler_hook.py new file mode 100644 index 0000000000..2fbe507708 --- /dev/null +++ b/classy_vision/hooks/profiler_hook.py @@ -0,0 +1,46 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import logging +from typing import Any, Dict + +from classy_vision import tasks +from classy_vision.generic.profiler import profile, summarize_profiler_info +from classy_vision.hooks.classy_hook import ClassyHook + + +class ProfilerHook(ClassyHook): + """ + Hook to profile a model and to show model runtime information, such as + the time breakdown in milliseconds of forward/backward pass. + """ + + on_rendezvous = ClassyHook._noop + on_phase_start = ClassyHook._noop + on_sample = ClassyHook._noop + on_forward = ClassyHook._noop + on_loss_and_meter = ClassyHook._noop + on_backward = ClassyHook._noop + on_update = ClassyHook._noop + on_phase_end = ClassyHook._noop + on_end = ClassyHook._noop + + def on_start( + self, task: "tasks.ClassyTask", local_variables: Dict[str, Any] + ) -> None: + """Profile the forward pass.""" + logging.info("Profiling forward pass...") + batchsize_per_replica = getattr( + task.dataloaders[task.phase_type].dataset, "batchsize_per_replica", 1 + ) + input_shape = task.base_model.input_shape + p = profile( + task.model, + batchsize_per_replica=batchsize_per_replica, + input_shape=input_shape, + input_key=getattr(task.base_model, "input_key", None), + ) + logging.info(summarize_profiler_info(p)) diff --git a/classy_vision/hooks/progress_bar_hook.py b/classy_vision/hooks/progress_bar_hook.py new file mode 100644 index 0000000000..9f64b587c5 --- /dev/null +++ b/classy_vision/hooks/progress_bar_hook.py @@ -0,0 +1,70 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from typing import Any, Dict, Optional + +from classy_vision import tasks +from classy_vision.generic.distributed_util import is_master +from classy_vision.hooks.classy_hook import ClassyHook + + +try: + import progressbar + + progressbar_available = True +except ImportError: + progressbar_available = False + + +class ProgressBarHook(ClassyHook): + """ + Displays a progress bar to show progress in processing batches. + """ + + on_rendezvous = ClassyHook._noop + on_start = ClassyHook._noop + on_sample = ClassyHook._noop + on_forward = ClassyHook._noop + on_loss_and_meter = ClassyHook._noop + on_backward = ClassyHook._noop + on_end = ClassyHook._noop + + def __init__(self) -> None: + """The constructor method of ProgressBarHook.""" + super().__init__() + self.progress_bar: Optional[progressbar.ProgressBar] = None + self.bar_size: int = 0 + self.batches: int = 0 + + def on_phase_start( + self, task: "tasks.ClassyTask", local_variables: Dict[str, Any] + ) -> None: + """Create and display a progress bar with 0 progress.""" + if not progressbar_available: + raise RuntimeError( + "progressbar module not installed, cannot use ProgressBarHook" + ) + + if is_master(): + self.bar_size = task.num_batches_per_phase + self.batches = 0 + self.progress_bar = progressbar.ProgressBar(self.bar_size) + self.progress_bar.start() + + def on_update( + self, task: "tasks.ClassyTask", local_variables: Dict[str, Any] + ) -> None: + """Update the progress bar with the batch size.""" + if is_master() and self.progress_bar is not None: + self.batches += 1 + self.progress_bar.update(min(self.batches, self.bar_size)) + + def on_phase_end( + self, task: "tasks.ClassyTask", local_variables: Dict[str, Any] + ) -> None: + """Clear the progress bar at the end of the phase.""" + if is_master() and self.progress_bar is not None: + self.progress_bar.finish() diff --git a/classy_vision/hooks/tensorboard_plot_hook.py b/classy_vision/hooks/tensorboard_plot_hook.py new file mode 100644 index 0000000000..70dfae887d --- /dev/null +++ b/classy_vision/hooks/tensorboard_plot_hook.py @@ -0,0 +1,138 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import logging +import time +from typing import Any, Dict, List, Optional + +from classy_vision import tasks +from classy_vision.generic.distributed_util import is_master +from classy_vision.hooks.classy_hook import ClassyHook + + +try: + from tensorboardX import SummaryWriter # noqa F401 + + tbx_available = True +except ImportError: + tbx_available = False + + +log = logging.getLogger() + + +class TensorboardPlotHook(ClassyHook): + """ + Hook for writing the losses, learning rates and meters to `tensorboard _. + + Global steps are counted in terms of the number of samples processed. + """ + + on_rendezvous = ClassyHook._noop + on_start = ClassyHook._noop + on_sample = ClassyHook._noop + on_forward = ClassyHook._noop + on_backward = ClassyHook._noop + on_loss_and_meter = ClassyHook._noop + on_end = ClassyHook._noop + + def __init__(self, tb_writer) -> None: + """The constructor method of TensorboardPlotHook. + + Args: + tb_writer: `Tensorboard SummaryWriter `_ instance + """ + super().__init__() + if not tbx_available: + raise RuntimeError( + "tensorboardX not installed, cannot use TensorboardPlotHook" + ) + + self.tb_writer = tb_writer + self.learning_rates: Optional[List[float]] = None + self.wall_times: Optional[List[float]] = None + self.num_steps_global: Optional[List[int]] = None + + def on_phase_start( + self, task: "tasks.ClassyTask", local_variables: Dict[str, Any] + ) -> None: + """Initialize losses and learning_rates.""" + self.learning_rates = [] + self.wall_times = [] + self.num_steps_global = [] + + def on_update( + self, task: "tasks.ClassyTask", local_variables: Dict[str, Any] + ) -> None: + """Store the observed learning rates.""" + if self.learning_rates is None: + logging.warning("learning_rates is not initialized") + return + + if not task.train: + # Only need to log the average loss during the test phase + return + + learning_rate_val = task.optimizer.lr + + self.learning_rates.append(learning_rate_val) + self.wall_times.append(time.time()) + self.num_steps_global.append(task.num_updates) + + def on_phase_end( + self, task: "tasks.ClassyTask", local_variables: Dict[str, Any] + ) -> None: + """Add the losses and learning rates to tensorboard.""" + if self.learning_rates is None: + logging.warning("learning_rates is not initialized") + return + + batches = len(task.losses) + if batches == 0 or not is_master(): + return + + phase_type = task.phase_type + phase_type_idx = task.train_phase_idx if task.train else task.eval_phase_idx + + logging.info(f"Plotting to Tensorboard for {phase_type} phase {phase_type_idx}") + + phase_type = task.phase_type + loss_key = f"{phase_type}_loss" + learning_rate_key = f"{phase_type}_learning_rate_updates" + + if task.train: + for loss, learning_rate, global_step, wall_time in zip( + task.losses, self.learning_rates, self.num_steps_global, self.wall_times + ): + loss /= task.get_batchsize_per_replica() + self.tb_writer.add_scalar( + loss_key, loss, global_step=global_step, walltime=wall_time + ) + self.tb_writer.add_scalar( + learning_rate_key, + learning_rate, + global_step=global_step, + walltime=wall_time, + ) + + loss_avg = sum(task.losses) / (batches * task.get_batchsize_per_replica()) + + loss_key = "avg_{phase_type}_loss".format(phase_type=task.phase_type) + self.tb_writer.add_scalar(loss_key, loss_avg, global_step=phase_type_idx) + + # plot meters which return a dict + for meter in task.meters: + if not isinstance(meter.value, dict): + log.warn(f"Skipping meter {meter.name} with value: {meter.value}") + continue + for name, value in meter.value.items(): + meter_key = f"{phase_type}_{meter.name}_{name}" + self.tb_writer.add_scalar(meter_key, value, global_step=phase_type_idx) + + logging.info(f"Done plotting to Tensorboard") diff --git a/classy_vision/hooks/time_metrics_hook.py b/classy_vision/hooks/time_metrics_hook.py new file mode 100644 index 0000000000..d922a7cb40 --- /dev/null +++ b/classy_vision/hooks/time_metrics_hook.py @@ -0,0 +1,98 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import logging +import time +from typing import Any, Dict, Optional + +from classy_vision import tasks +from classy_vision.generic.distributed_util import get_rank +from classy_vision.generic.perf_stats import PerfStats +from classy_vision.hooks.classy_hook import ClassyHook + + +class TimeMetricsHook(ClassyHook): + """ + Computes and prints performance metrics. Logs at the end of a phase. + """ + + on_rendezvous = ClassyHook._noop + on_start = ClassyHook._noop + on_sample = ClassyHook._noop + on_forward = ClassyHook._noop + on_backward = ClassyHook._noop + on_update = ClassyHook._noop + on_end = ClassyHook._noop + + def __init__(self, log_freq: Optional[int] = None) -> None: + """ + Args: + log_freq: if specified, logs every log_freq batches also. + """ + super().__init__() + self.log_freq: Optional[int] = log_freq + self.start_time: Optional[float] = None + + def on_phase_start( + self, task: "tasks.ClassyTask", local_variables: Dict[str, Any] + ) -> None: + """ + Initialize start time and reset perf stats + """ + self.start_time = time.time() + local_variables["perf_stats"] = PerfStats() + + def on_loss_and_meter( + self, task: "tasks.ClassyTask", local_variables: Dict[str, Any] + ) -> None: + """ + Log metrics every log_freq batches, if log_freq is not None. + """ + if self.log_freq is None: + return + batches = len(task.losses) + if batches and batches % self.log_freq == 0: + self._log_performance_metrics(task, local_variables) + + def on_phase_end( + self, task: "tasks.ClassyTask", local_variables: Dict[str, Any] + ) -> None: + """ + Log metrics at the end of a phase if log_freq is None. + """ + batches = len(task.losses) + if batches: + self._log_performance_metrics(task, local_variables) + + def _log_performance_metrics( + self, task: "tasks.ClassyTask", local_variables: Dict[str, Any] + ) -> None: + """ + Compute and log performance metrics. + """ + phase_type = task.phase_type + batches = len(task.losses) + + if self.start_time is None: + logging.warning("start_time not initialized") + else: + # Average batch time calculation + total_batch_time = time.time() - self.start_time + average_batch_time = total_batch_time / batches + logging.info( + "Average %s batch time (ms) for %d batches: %d" + % (phase_type, batches, 1000.0 * average_batch_time) + ) + + # Train step time breakdown + if local_variables.get("perf_stats") is None: + logging.warning('"perf_stats" not set in local_variables') + elif task.train: + logging.info( + "Train step time breakdown (rank {}):\n{}".format( + get_rank(), local_variables["perf_stats"].report_str() + ) + ) diff --git a/classy_vision/hooks/visdom_hook.py b/classy_vision/hooks/visdom_hook.py new file mode 100644 index 0000000000..0d1d6ce9a0 --- /dev/null +++ b/classy_vision/hooks/visdom_hook.py @@ -0,0 +1,121 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import collections +import logging +from typing import Any, Dict + +from classy_vision import tasks +from classy_vision.generic.distributed_util import is_master +from classy_vision.generic.util import flatten_dict +from classy_vision.generic.visualize import plot_learning_curves +from classy_vision.hooks.classy_hook import ClassyHook + + +try: + from visdom import Visdom + + visdom_available = True +except ImportError: + visdom_available = False + + +class VisdomHook(ClassyHook): + """Plots metrics on to `Visdom `_. + + Visdom is a flexible tool for creating, organizing, and sharing visualizations + of live, rich data. It supports Python. + + """ + + on_rendezvous = ClassyHook._noop + on_start = ClassyHook._noop + on_phase_start = ClassyHook._noop + on_sample = ClassyHook._noop + on_forward = ClassyHook._noop + on_loss_and_meter = ClassyHook._noop + on_backward = ClassyHook._noop + on_update = ClassyHook._noop + on_end = ClassyHook._noop + + def __init__( + self, server: str, port: str, env: str = "main", title_suffix: str = "" + ) -> None: + """ + Args: + server: host name of the visdom server + port: port of visdom server, such as 8097 + env: environment of visdom + title_suffix: suffix that will be appended to the title + """ + super().__init__() + if not visdom_available: + raise RuntimeError("Visdom is not installed, cannot use VisdomHook") + + self.server: str = server + self.port: str = port + self.env: str = env + self.title_suffix: str = title_suffix + + self.metrics: Dict = {} + self.visdom: Visdom = Visdom(self.server, self.port) + + def on_phase_end( + self, task: "tasks.ClassyTask", local_variables: Dict[str, Any] + ) -> None: + """ + Plot the metrics on visdom. + """ + phase_type = task.phase_type + metrics = self.metrics + batches = len(task.losses) + + if batches == 0: + return + + # Loss for the phase + loss = sum(task.losses) / (batches * task.get_batchsize_per_replica()) + loss_key = phase_type + "_loss" + if loss_key not in metrics: + metrics[loss_key] = [] + metrics[loss_key].append(loss) + + # Optimizer LR for the phase + optimizer_lr = task.optimizer.lr + lr_key = phase_type + "_learning_rate" + if lr_key not in metrics: + metrics[lr_key] = [] + metrics[lr_key].append(optimizer_lr) + + # Calculate meters + for meter in task.meters: + if isinstance(meter.value, collections.MutableMapping): + flattened_meters_dict = flatten_dict(meter.value, prefix=meter.name) + for k, v in flattened_meters_dict.items(): + metric_key = phase_type + "_" + k + if metric_key not in metrics: + metrics[metric_key] = [] + metrics[metric_key].append(v) + else: + metric_key = phase_type + "_" + meter.name + if metric_key not in metrics: + metrics[metric_key] = [] + metrics[metric_key].append(meter.value) + + # update learning curve visualizations: + phase_type = "train" if task.train else "test" + title = "%s-%s-%d" % ( + phase_type, + task.base_model.__class__.__name__, + task.base_model.model_depth, + ) + title += self.title_suffix + + if not task.train and is_master(): + logging.info("Plotting learning curves to visdom") + plot_learning_curves( + metrics, visdom_server=self.visdom, env=self.env, win=title, title=title + ) diff --git a/classy_vision/hub/__init__.py b/classy_vision/hub/__init__.py new file mode 100644 index 0000000000..73bf112b96 --- /dev/null +++ b/classy_vision/hub/__init__.py @@ -0,0 +1,10 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from .classy_hub_interface import ClassyHubInterface + + +__all__ = ["ClassyHubInterface"] diff --git a/classy_vision/hub/classy_hub_interface.py b/classy_vision/hub/classy_hub_interface.py new file mode 100644 index 0000000000..173a907859 --- /dev/null +++ b/classy_vision/hub/classy_hub_interface.py @@ -0,0 +1,177 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from typing import Any, Callable, Dict, Iterator, List, Optional, Union + +import torch +import torch.nn as nn +from classy_vision.dataset import ClassyDataset +from classy_vision.dataset.image_path_dataset import ImagePathDataset +from classy_vision.dataset.transforms.util import build_field_transform_default_imagenet +from classy_vision.models import ClassyModel, ClassyModelWrapper +from classy_vision.tasks import ClassyTask + + +class ClassyHubInterface: + """PyTorch Hub interface for classy vision tasks and models. + + The task is optional, but a model is guaranteed to be present. Do + not use the constructor directly, instead Use from_task() or + from_model() to instantiate the class. + + See the examples folder for an example of how to use this class + + Attributes: + task: If present, task that can be used to train the torchhub model + model: torchub model + + """ + + def __init__( + self, task: Optional[ClassyTask] = None, model: Optional[ClassyModel] = None + ) -> None: + """Constructor for ClassyHubInterface. + + Only one of task or model can be specified at construction + time. If task is specified then task.model is used to populate + the model attribute. + + Do not use the constructor directly, instead use from_task() + or from_model() to instantiate the class. + + Args: + task: task that can be used to train torchhub model, + task.model is used to populate the model attribute + model: torchhub model + """ + self.task = task + if task is None: + assert model is not None, "Need to specify a model if task is None" + self.model = model + else: + assert model is None, "Cannot pass a model if task is not None" + self.model = task.model + + @classmethod + def from_task(cls, task: ClassyTask) -> "ClassyHubInterface": + """Instantiates the ClassyHubInterface from a task. + + This function returns a hub interface based on a ClassyTask. + + Args: + task: ClassyTask that contains hub model + + """ + return cls(task=task) + + @classmethod + def from_model(cls, model: Union[nn.Module, ClassyModel]) -> "ClassyHubInterface": + """Instantiates the ClassyHubInterface from a model. + + This function returns a hub interface based on a ClassyModel + + Args: + model: torchhub model + + """ + if not isinstance(model, ClassyModel): + model = ClassyModelWrapper(model) + return cls(model=model) + + def create_image_dataset( + self, + image_paths: Union[List[str], str], + targets: Optional[List[Any]] = None, + batchsize_per_replica: int = 32, + shuffle: bool = True, + transform: Optional[Callable] = None, + num_samples: Optional[int] = None, + phase_type: str = "train", + ) -> ClassyDataset: + """Create a ClassyDataset which reads images from image_paths. + + Args: + image_paths: Can be + - A single directory location, in which case the data is expected to be + arranged in a format similar to + `:class:torchvision.datasets.ImageFolder`. The targets will + be inferred from the directory structure. + - A list of paths, in which case the list will contain the paths + to all the images. In this situation, the targets can be specified + by using the targets argument. + targets: A list containing the target classes for each image + batchsize_per_replica: Minibatch size per replica (i.e. samples per GPU) + shuffle: If true, data is shuffled between epochs + transform: Transform to apply to sample. If left as None, the dataset's + phase_type is used to determine the transform to apply. The transform + for the phase_type is searched for in self.task, falling back to + imagenet transformations if it is not found there. + num_samples: If specified, limits the number of samples returned by + the dataset + phase_type: String specifying the phase_type, e.g. "train" or "test" + """ + if transform is None: + if self.task is not None and phase_type in self.task.datasets: + # use the transform from the dataset for the phase_type + dataset = self.task.datasets[phase_type] + transform = dataset.transform + assert transform is not None, "Cannot infer transform from the task" + else: + transform = build_field_transform_default_imagenet( + config=None, split=phase_type + ) + return ImagePathDataset( + batchsize_per_replica, + shuffle, + transform, + num_samples, + image_paths, + targets=targets, + split=phase_type, + ) + + @staticmethod + def get_data_iterator(dataset: ClassyDataset) -> Iterator[Any]: + """Returns an iterator that can be used to retrieve training / testing samples. + + Args: + dataset: Dataset to iterate over + """ + return iter(dataset.iterator()) + + def train(self) -> None: + """Sets the model to train mode and enables torch gradient calculation + + """ + torch.autograd.set_grad_enabled(True) + self.model.train() + + def eval(self) -> None: + """Sets the model to eval mode and disables torch gradient calculation + + """ + torch.autograd.set_grad_enabled(False) + self.model.eval() + + def predict(self, sample): + """Returns the model's prediction for a sample. + + Args: + sample: Must contain "input" key, model calculates prediction over input. + """ + output = self.model(sample["input"]) + # squeeze the output in case the batch size is 1 + return output.squeeze() + + def extract_features(self, sample): + """Calculates feature embeddings of sample. + + Args: + sample: Must contain "input" key, model calculates prediction over input. + """ + output = self.model.extract_features(sample["input"]) + # squeeze the output in case the batch size is 1 + return output.squeeze() diff --git a/classy_vision/hydra/args.yaml b/classy_vision/hydra/args.yaml new file mode 100644 index 0000000000..9ab4d8e78d --- /dev/null +++ b/classy_vision/hydra/args.yaml @@ -0,0 +1,21 @@ +debug: False +device: gpu +checkpoint_folder: "" +checkpoint_period: 1 +log_freq: 5 +num_workers: 4 +pretrained_checkpoint_folder: "" +profiler: False +skip_tensorboard: False +show_progress: False +test_only: False +visdom_port: 8097 +visdom_server: "" +video_backend: pyav +defaults: + - task: classification_task + - loss: cross_entropy + - dataset: synthetic_image + - model: resnet_50 + - optimizer: sgd + - param_scheduler: step diff --git a/classy_vision/hydra/config/resnet50_synthetic.yaml b/classy_vision/hydra/config/resnet50_synthetic.yaml new file mode 100644 index 0000000000..ce79b17c8a --- /dev/null +++ b/classy_vision/hydra/config/resnet50_synthetic.yaml @@ -0,0 +1,63 @@ +config: + name: classification_task + num_epochs: 2 + loss: + name: CrossEntropyLoss + dataset: + train: + name: synthetic_image + split: train + crop_size: 224 + class_ratio: 0.5 + num_samples: 2000 + seed: 0 + batchsize_per_replica: 32 + use_shuffle: true + transforms: + - name: apply_transform_to_key + transforms: + - name: ToTensor + - name: Normalize + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + key: input + + test: + name: synthetic_image + split: val + crop_size: 224 + class_ratio: 0.5 + num_samples: 2000 + seed: 1 + batchsize_per_replica: 32 + use_shuffle: false + transforms: + - name: apply_transform_to_key + transforms: + - name: ToTensor + - name: Normalize + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + key: input + + meters: + accuracy: + topk: [1, 5] + model: + name: resnet + num_blocks: [3, 4, 6, 3] + small_input: false + zero_init_bn_residuals: true + heads: + - name: fully_connected + unique_id: default_head + num_classes: 1000 + fork_block: block3-2 + in_plane: 2048 + optimizer: + name: sgd + lr: + name: step + values: [0.1, 0.01] + weight_decay: 1e-4 + momentum: 0.9 diff --git a/classy_vision/hydra/dataset/imagenet.yaml b/classy_vision/hydra/dataset/imagenet.yaml new file mode 100644 index 0000000000..ee0fb1771f --- /dev/null +++ b/classy_vision/hydra/dataset/imagenet.yaml @@ -0,0 +1,12 @@ +config: + dataset: + train: + name: imagenet + split: train + batchsize_per_replica: 32 + use_shuffle: true + test: + name: imagenet + split: val + batchsize_per_replica: 32 + use_shuffle: false diff --git a/classy_vision/hydra/dataset/synthetic_image.yaml b/classy_vision/hydra/dataset/synthetic_image.yaml new file mode 100644 index 0000000000..c730e05427 --- /dev/null +++ b/classy_vision/hydra/dataset/synthetic_image.yaml @@ -0,0 +1,36 @@ +config: + dataset: + train: + name: synthetic_image + split: train + crop_size: 224 + class_ratio: 0.5 + num_samples: 2000 + seed: 0 + batchsize_per_replica: 32 + use_shuffle: true + transforms: + - name: apply_transform_to_key + transforms: + - name: ToTensor + - name: Normalize + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + key: input + test: + name: synthetic_image + split: val + crop_size: 224 + class_ratio: 0.5 + num_samples: 2000 + seed: 1 + batchsize_per_replica: 32 + use_shuffle: false + transforms: + - name: apply_transform_to_key + transforms: + - name: ToTensor + - name: Normalize + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + key: input diff --git a/classy_vision/hydra/loss/cross_entropy.yaml b/classy_vision/hydra/loss/cross_entropy.yaml new file mode 100644 index 0000000000..5327045a78 --- /dev/null +++ b/classy_vision/hydra/loss/cross_entropy.yaml @@ -0,0 +1,3 @@ +config: + loss: + name: CrossEntropyLoss diff --git a/classy_vision/hydra/loss/label_smoothing_cross_entropy.yaml b/classy_vision/hydra/loss/label_smoothing_cross_entropy.yaml new file mode 100644 index 0000000000..85729d505f --- /dev/null +++ b/classy_vision/hydra/loss/label_smoothing_cross_entropy.yaml @@ -0,0 +1,4 @@ +config: + loss: + name: label_smoothing_cross_entropy + smoothing_param: 0.1 diff --git a/classy_vision/hydra/meters/accuracy.yaml b/classy_vision/hydra/meters/accuracy.yaml new file mode 100644 index 0000000000..34315653cd --- /dev/null +++ b/classy_vision/hydra/meters/accuracy.yaml @@ -0,0 +1,4 @@ +config: + meters: + accuracy: + topk: [1, 5] diff --git a/classy_vision/hydra/model/resnet_50.yaml b/classy_vision/hydra/model/resnet_50.yaml new file mode 100644 index 0000000000..4fc4255f3b --- /dev/null +++ b/classy_vision/hydra/model/resnet_50.yaml @@ -0,0 +1,12 @@ +config: + model: + name: resnet + num_blocks: [3, 4, 6, 3] + small_input: false + zero_init_bn_residuals: true + heads: + - name: fully_connected + unique_id: default_head + num_classes: 1000 + fork_block: block3-2 + in_plane: 2048 diff --git a/classy_vision/hydra/optimizer/sgd.yaml b/classy_vision/hydra/optimizer/sgd.yaml new file mode 100644 index 0000000000..cfa1dc51da --- /dev/null +++ b/classy_vision/hydra/optimizer/sgd.yaml @@ -0,0 +1,5 @@ +config: + optimizer: + name: sgd + weight_decay: 1e-4 + momentum: 0.9 diff --git a/classy_vision/hydra/param_scheduler/step.yaml b/classy_vision/hydra/param_scheduler/step.yaml new file mode 100644 index 0000000000..55a977a025 --- /dev/null +++ b/classy_vision/hydra/param_scheduler/step.yaml @@ -0,0 +1,5 @@ +config: + optimizer: + lr: + name: step + values: [0.1, 0.01] diff --git a/classy_vision/hydra/task/classification_task.yaml b/classy_vision/hydra/task/classification_task.yaml new file mode 100644 index 0000000000..e5da69c789 --- /dev/null +++ b/classy_vision/hydra/task/classification_task.yaml @@ -0,0 +1,3 @@ +config: + name: classification_task + num_epochs: 2 diff --git a/classy_vision/losses/__init__.py b/classy_vision/losses/__init__.py new file mode 100644 index 0000000000..82f7b40d82 --- /dev/null +++ b/classy_vision/losses/__init__.py @@ -0,0 +1,115 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import copy +from pathlib import Path + +import torch +import torch.nn.modules.loss as torch_losses +from classy_vision.generic.registry_utils import import_all_modules + +from .classy_loss import ClassyLoss + + +FILE_ROOT = Path(__file__).parent + + +LOSS_REGISTRY = {} +LOSS_CLASS_NAMES = set() + + +def build_loss(config): + """Builds a ClassyLoss from a config. + + This assumes a 'name' key in the config which is used to determine what + model class to instantiate. For instance, a config `{"name": "my_loss", + "foo": "bar"}` will find a class that was registered as "my_loss" + (see :func:`register_loss`) and call .from_config on it. + + In addition to losses registered with :func:`register_loss`, we also + support instantiating losses available in the `torch.nn.modules.loss `_ + module. Any keys in the config will get expanded to parameters of the loss + constructor. For instance, the following call will instantiate a + `torch.nn.modules.CrossEntropyLoss `_: + + .. code-block:: python + + build_loss({"name": "CrossEntropyLoss", "reduction": "sum"}) + """ + + assert "name" in config, f"name not provided for loss: {config}" + name = config["name"] + if name in LOSS_REGISTRY: + return LOSS_REGISTRY[name].from_config(config) + + # the name should be available in torch.nn.modules.loss + assert hasattr(torch_losses, name), ( + f"{name} isn't a registered loss" + ", nor is it available in torch.nn.modules.loss" + ) + args = copy.deepcopy(config) + del args["name"] + if "weight" in args: + # if we are passing weights, we need to change the weights from a list + # to a tensor + args["weight"] = torch.tensor(args["weight"], dtype=torch.float) + return getattr(torch_losses, name)(**args) + + +def register_loss(name): + """Registers a ClassyLoss subclass. + + This decorator allows Classy Vision to instantiate a subclass of + ClassyLoss from a configuration file, even if the class itself is not + part of the Classy Vision framework. To use it, apply this decorator to a + ClassyLoss subclass, like this: + + .. code-block:: python + + @register_loss("my_loss") + class MyLoss(ClassyLoss): + ... + + To instantiate a loss from a configuration file, see + :func:`build_loss`.""" + + def register_loss_cls(cls): + if name in LOSS_REGISTRY: + raise ValueError("Cannot register duplicate optimizer ({})".format(name)) + if not issubclass(cls, ClassyLoss): + raise ValueError( + "Loss ({}: {}) must extend ClassyLoss".format(name, cls.__name__) + ) + LOSS_REGISTRY[name] = cls + LOSS_CLASS_NAMES.add(cls.__name__) + return cls + + return register_loss_cls + + +# automatically import any Python files in the losses/ directory +import_all_modules(FILE_ROOT, "classy_vision.losses") + + +from .barron_loss import BarronLoss # isort:skip +from .label_smoothing_loss import LabelSmoothingCrossEntropyLoss # isort:skip +from .multi_output_sum_loss import MultiOutputSumLoss # isort:skip +from .soft_target_cross_entropy_loss import SoftTargetCrossEntropyLoss # isort:skip +from .sum_arbitrary_loss import SumArbitraryLoss # isort:skip + + +__all__ = [ + "BarronLoss", + "ClassyLoss", + "LabelSmoothingCrossEntropyLoss", + "MultiOutputSumLoss", + "SoftTargetCrossEntropyLoss", + "SumArbitraryLoss", + "build_loss", + "register_loss", +] diff --git a/classy_vision/losses/barron_loss.py b/classy_vision/losses/barron_loss.py new file mode 100644 index 0000000000..3a8f1f823a --- /dev/null +++ b/classy_vision/losses/barron_loss.py @@ -0,0 +1,78 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from typing import Any, Dict + +import torch + +from . import ClassyLoss, register_loss + + +@register_loss("barron") +class BarronLoss(ClassyLoss): + """ + This implements the `Barron loss `_. + """ + + def __init__(self, alpha, size_average, c): + super(BarronLoss, self).__init__() + + self.size_average = size_average + self.alpha = alpha + self.c = c + self.z = max(1.0, 2.0 - self.alpha) + + # define all three losses: + def _forward_zero(diff): + out = diff.div(self.c).pow(2.0).mul(0.5).add(1.0).log() + return out + + def _forward_inf(diff): + out = 1.0 - diff.div(self.c).pow(2.0).mul(-0.5).exp() + return out + + def _forward(diff): + out = diff.div(self.c).pow(2.0).div(self.z).add(1.0).pow(self.alpha / 2.0) + out.add_(-1.0).mul_(self.z / self.alpha) + return out + + # set the correct loss: + if self.alpha == 0.0: + self._forward = _forward_zero + elif self.alpha == -float("inf") or self.alpha == float("inf"): + self._forward = _forward_inf + else: + self._forward = _forward + + @classmethod + def from_config(cls, config: Dict[str, Any]) -> "BarronLoss": + """Instantiates a BarronLoss from a configuration. + + Args: + config: A configuration for a BarronLoss. + See :func:`__init__` for parameters expected in the config. + + Returns: + A BarronLoss instance. + """ + # Infinity is a valid alpha value but is frequently a string + config["alpha"] = float(config["alpha"]) + # assertions: + assert type(config["size_average"]) == bool + assert type(config["alpha"]) == float + assert type(config["c"]) == float and config["c"] > 0.0 + + return cls( + alpha=config["alpha"], size_average=config["size_average"], c=config["c"] + ) + + def forward(self, prediction, target): + diff = torch.add(prediction, -target) + loss = self._forward(diff) + loss = loss.sum(0, keepdim=True) + if self.size_average: + loss.div_(prediction.size(0)) + return loss diff --git a/classy_vision/losses/classy_loss.py b/classy_vision/losses/classy_loss.py new file mode 100644 index 0000000000..94f9a26b21 --- /dev/null +++ b/classy_vision/losses/classy_loss.py @@ -0,0 +1,44 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from typing import Any, Dict + +import torch.nn as nn + + +class ClassyLoss(nn.Module): + """ + Base class to calculate the loss during training. + + This implementation of :class:`torch.nn.Module` allows building + the loss object from a configuration file. + """ + + def __init__(self): + """ + Constructor for ClassyLoss. + """ + super(ClassyLoss, self).__init__() + + @classmethod + def from_config(cls, config: Dict[str, Any]) -> "ClassyLoss": + """Instantiates a ClassyLoss from a configuration. + + Args: + config: A configuration for a ClassyLoss. + + Returns: + A ClassyLoss instance. + """ + raise NotImplementedError() + + def forward(self, output, target): + """ + Compute the loss for the provided sample. + + Refer to :class:`torch.nn.Module` for more details. + """ + raise NotImplementedError diff --git a/classy_vision/losses/label_smoothing_loss.py b/classy_vision/losses/label_smoothing_loss.py new file mode 100644 index 0000000000..c6a6349d6b --- /dev/null +++ b/classy_vision/losses/label_smoothing_loss.py @@ -0,0 +1,106 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from typing import Any, Dict + +import numpy as np +from classy_vision.generic.util import convert_to_one_hot +from classy_vision.losses import ClassyLoss, register_loss +from classy_vision.losses.soft_target_cross_entropy_loss import ( + SoftTargetCrossEntropyLoss, +) + + +@register_loss("label_smoothing_cross_entropy") +class LabelSmoothingCrossEntropyLoss(ClassyLoss): + def __init__(self, ignore_index, reduction, smoothing_param): + """Intializer for the label smoothed cross entropy loss. + This decreases gap between output scores and encourages generalization. + Targets provided to forward can be one-hot vectors (NxC) or class indices(Nx1) + + Config params: + 'weight': weight of sample (not yet implemented), + 'ignore_index': sample should be ignored for loss (optional), + 'smoothing_param': value to be added to each target entry + """ + super().__init__() + self._ignore_index = ignore_index + self._reduction = reduction + self._smoothing_param = smoothing_param + self.loss_function = SoftTargetCrossEntropyLoss( + self._ignore_index, self._reduction, None + ) + self._eps = np.finfo(np.float32).eps + + @classmethod + def from_config(cls, config: Dict[str, Any]) -> "LabelSmoothingCrossEntropyLoss": + """Instantiates a LabelSmoothingCrossEntropyLoss from a configuration. + + Args: + config: A configuration for a LabelSmoothingCrossEntropyLoss. + See :func:`__init__` for parameters expected in the config. + + Returns: + A LabelSmoothingCrossEntropyLoss instance. + """ + + assert "weight" not in config, '"weight" not implemented' + assert ( + "smoothing_param" in config + ), "Label Smoothing needs a smoothing parameter" + return cls( + ignore_index=config.get("ignore_index", -100), + reduction=config.get("reduction", "mean"), + smoothing_param=config.get("smoothing_param"), + ) + + def compute_valid_targets(self, target, classes): + + """ + This function takes one-hot or index target vectors and computes valid one-hot + target vectors, based on ignore index value + """ + target_shape_list = list(target.size()) + + valid_mask = target != self._ignore_index + valid_targets = target.float() * valid_mask.float() + + # check if targets are inputted as class integers + if len(target_shape_list) == 1 or ( + len(target_shape_list) == 2 and target_shape_list[1] == 1 + ): + + valid_targets = convert_to_one_hot(valid_targets.view(-1, 1), classes) + valid_targets = valid_targets.float() * valid_mask.view(-1, 1).float() + + return valid_targets + + def smooth_targets(self, valid_targets, classes): + + """ + This function takes valid (No ignore values present) one-hot target vectors + and computes smoothed target vectors (normalized) according to the loss's + smoothing parameter + """ + + valid_targets /= self._eps + valid_targets.sum(dim=1, keepdim=True) + if classes > 0: + smoothed_targets = valid_targets + (self._smoothing_param / classes) + smoothed_targets /= self._eps + smoothed_targets.sum(dim=1, keepdim=True) + + return smoothed_targets + + def forward(self, output, target): + valid_targets = self.compute_valid_targets( + target=target, classes=output.shape[1] + ) + assert ( + valid_targets.shape == output.shape + ), "LabelSmoothingCrossEntropyLoss requires output and target to be same size" + smoothed_targets = self.smooth_targets( + valid_targets=valid_targets, classes=output.shape[1] + ) + return self.loss_function(output, smoothed_targets) diff --git a/classy_vision/losses/multi_output_sum_loss.py b/classy_vision/losses/multi_output_sum_loss.py new file mode 100644 index 0000000000..6d13f0c695 --- /dev/null +++ b/classy_vision/losses/multi_output_sum_loss.py @@ -0,0 +1,50 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from typing import Any, Dict + +import torch + +from . import ClassyLoss, build_loss, register_loss + + +@register_loss("multi_output_sum_loss") +class MultiOutputSumLoss(ClassyLoss): + """ + Applies the provided loss to the list of outputs (or single output) and sums + up the losses. + """ + + def __init__(self, loss) -> None: + super().__init__() + + self._loss = loss + + @classmethod + def from_config(cls, config: Dict[str, Any]) -> "MultiOutputSumLoss": + """Instantiates a MultiOutputSumLoss from a configuration. + + Args: + config: A configuration for a MultiOutpuSumLoss. + See :func:`__init__` for parameters expected in the config. + + Returns: + A MultiOutputSumLoss instance. + """ + assert ( + type(config["loss"]) == dict + ), "loss must be a dict containing a configuration for a registered loss" + return cls(loss=build_loss(config["loss"])) + + def forward(self, output, target): + if torch.is_tensor(output): + output = [output] + + loss = 0 + for pred in output: + loss += self._loss(pred, target) + + return loss diff --git a/classy_vision/losses/soft_target_cross_entropy_loss.py b/classy_vision/losses/soft_target_cross_entropy_loss.py new file mode 100644 index 0000000000..a8d67e7a8a --- /dev/null +++ b/classy_vision/losses/soft_target_cross_entropy_loss.py @@ -0,0 +1,83 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + + +from typing import Any, Dict + +import numpy as np +import torch +import torch.nn.functional as F +from classy_vision.losses import ClassyLoss, register_loss + + +@register_loss("soft_target_cross_entropy") +class SoftTargetCrossEntropyLoss(ClassyLoss): + def __init__(self, ignore_index, reduction, normalize_targets): + """Intializer for the soft target cross-entropy loss loss. + This allows the targets for the cross entropy loss to be multilabel + + Config params: + 'weight': weight of sample (not yet implemented), + 'ignore_index': sample should be ignored for loss (optional), + 'reduction': specifies reduction to apply to the output (optional), + """ + super(SoftTargetCrossEntropyLoss, self).__init__() + self._ignore_index = ignore_index + self._reduction = reduction + assert normalize_targets in [None, "count_based"] + self._normalize_targets = normalize_targets + if self._reduction != "mean": + raise NotImplementedError( + 'reduction type "{}" not implemented'.format(self._reduction) + ) + self._eps = np.finfo(np.float32).eps + + @classmethod + def from_config(cls, config: Dict[str, Any]) -> "SoftTargetCrossEntropyLoss": + """Instantiates a SoftTargetCrossEntropyLoss from a configuration. + + Args: + config: A configuration for a SoftTargetCrossEntropyLoss. + See :func:`__init__` for parameters expected in the config. + + Returns: + A SoftTargetCrossEntropyLoss instance. + """ + + if "weight" in config: + raise NotImplementedError('"weight" not implemented') + return cls( + ignore_index=config.get("ignore_index", -100), + reduction=config.get("reduction", "mean"), + normalize_targets=config.get("normalize_targets", "count_based"), + ) + + def forward(self, output, target): + """for N examples and C classes + - output: N x C these are raw outputs (without softmax/sigmoid) + - target: N x C corresponding targets + + Target elements set to ignore_index contribute 0 loss. + + Samples where all entries are ignore_index do not contribute to the loss + reduction. + """ + assert ( + output.shape == target.shape + ), "SoftTargetCrossEntropyLoss requires output and target to be same" + valid_mask = target != self._ignore_index + valid_targets = target.float() * valid_mask.float() + if self._normalize_targets == "count_based": + valid_targets /= self._eps + valid_targets.sum(dim=1, keepdim=True) + per_sample_per_target_loss = -valid_targets * F.log_softmax(output, -1) + # perform reduction + if self._reduction == "mean": + per_sample_loss = torch.sum(per_sample_per_target_loss, -1) + # normalize based on the number of samples with > 0 non-ignored targets + loss = per_sample_loss.sum() / torch.sum( + (torch.sum(valid_mask, -1) > 0) + ).clamp(min=1) + return loss diff --git a/classy_vision/losses/sum_arbitrary_loss.py b/classy_vision/losses/sum_arbitrary_loss.py new file mode 100644 index 0000000000..5dfddb664c --- /dev/null +++ b/classy_vision/losses/sum_arbitrary_loss.py @@ -0,0 +1,67 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from typing import Any, Dict, List, Optional + +import torch +from torch import Tensor + +from . import ClassyLoss, build_loss, register_loss + + +@register_loss("sum_arbitrary") +class SumArbitraryLoss(ClassyLoss): + """ + Sums a collection of (weighted) torch.nn losses. + + NOTE: this applies all the losses to the same output and does not support + taking a list of outputs as input. + """ + + def __init__(self, losses: List[ClassyLoss], weights: Optional[Tensor] = None) -> None: + super().__init__() + if weights is None: + weights = torch.ones((len(losses))) + + self.losses = losses + self.weights = weights + + @classmethod + def from_config(cls, config: Dict[str, Any]) -> "SumArbitraryLoss": + """Instantiates a SumArbitraryLoss from a configuration. + + Args: + config: A configuration for a SumArbitraryLoss. + See :func:`__init__` for parameters expected in the config. + + Returns: + A SumArbitraryLoss instance. + """ + assert ( + type(config["losses"]) == list and len(config["losses"]) > 0 + ), "losses must be a list of registered losses with length > 0" + assert type(config["weights"]) == list and len(config["weights"]) == len( + config["losses"] + ), "weights must be None or a list and have same length as losses" + + loss_modules = [] + for loss_config in config["losses"]: + loss_modules.append(build_loss(loss_config)) + + assert all( + isinstance(loss_module, ClassyLoss) for loss_module in loss_modules + ), "All losses must be registered, valid ClassyLosses" + + return cls(losses=loss_modules, weights=config.get("weights", None)) + + def forward(self, prediction, target): + for idx, loss in enumerate(self.losses): + current_loss = loss(prediction, target) + if idx == 0: + total_loss = current_loss + else: + total_loss = total_loss.add(self.weights[idx], current_loss) + return total_loss diff --git a/classy_vision/meters/__init__.py b/classy_vision/meters/__init__.py new file mode 100644 index 0000000000..3a1e010937 --- /dev/null +++ b/classy_vision/meters/__init__.py @@ -0,0 +1,85 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from pathlib import Path + +from classy_vision.generic.registry_utils import import_all_modules + +from .classy_meter import ClassyMeter + + +FILE_ROOT = Path(__file__).parent + + +METER_REGISTRY = {} + + +def build_meter(config): + """Builds a :class:`ClassyMeter` from a config. + + This assumes a 'name' key in the config which is used to determine what + meter class to instantiate. For instance, a config `{"name": "my_meter", + "foo": "bar"}` will find a class that was registered as "my_meter" (see + :func:`register_meter`) and call .from_config on it.""" + return METER_REGISTRY[config["name"]].from_config(config) + + +def build_meters(config): + configs = [{"name": name, **args} for name, args in config.items()] + return [build_meter(config) for config in configs] + + +def register_meter(name): + """Registers a :class:`ClassyMeter` subclass. + + This decorator allows Classy Vision to instantiate a subclass of + ClassyMeter from a configuration file, even if the class itself is not + part of the Classy Vision framework. To use it, apply this decorator to a + ClassyMeter subclass, like this: + + .. code-block:: python + + @register_meter('accuracy') + class AccuracyMeter(ClassyMeter): + ... + + To instantiate a meter from a configuration file, see + :func:`build_meter`.""" + + def register_meter_cls(cls): + if name in METER_REGISTRY: + raise ValueError("Cannot register duplicate meter ({})".format(name)) + if not issubclass(cls, ClassyMeter): + raise ValueError( + "Meter ({}: {}) must extend \ + ClassyMeter".format( + name, cls.__name__ + ) + ) + METER_REGISTRY[name] = cls + return cls + + return register_meter_cls + + +# automatically import any Python files in the meters/ directory +import_all_modules(FILE_ROOT, "classy_vision.meters") + +from .accuracy_meter import AccuracyMeter # isort:skip +from .precision_meter import PrecisionAtKMeter # isort:skip +from .recall_meter import RecallAtKMeter # isort:skip +from .video_accuracy_meter import VideoAccuracyMeter # isort:skip + +__all__ = [ + "AccuracyMeter", + "ClassyMeter", + "PrecisionAtKMeter", + "RecallAtKMeter", + "VideoAccuracyMeter", + "build_meter", + "build_meters", + "register_meter", +] diff --git a/classy_vision/meters/accuracy_meter.py b/classy_vision/meters/accuracy_meter.py new file mode 100644 index 0000000000..5615c9ec61 --- /dev/null +++ b/classy_vision/meters/accuracy_meter.py @@ -0,0 +1,179 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from typing import Any, Dict + +import torch +from classy_vision.generic.distributed_util import all_reduce_sum +from classy_vision.generic.util import is_pos_int +from classy_vision.meters import ClassyMeter + +from . import register_meter + + +@register_meter("accuracy") +class AccuracyMeter(ClassyMeter): + """Meter to calculate top-k accuracy for single label + image classification task. + """ + + def __init__(self, topk): + """ + args: + topk: list of int `k` values. + """ + assert isinstance(topk, list), "topk must be a list" + assert len(topk) > 0, "topk list should have at least one element" + assert [is_pos_int(x) for x in topk], "each value in topk must be >= 1" + + self._topk = topk + + # _total_* variables store running, in-sync totals for the + # metrics. These should not be communicated / summed. + self._total_correct_predictions_k = None + self._total_sample_count = None + + # _curr_* variables store counts since the last sync. Only + # these should be summed across workers and they are reset + # after each communication + self._curr_correct_predictions_k = None + self._curr_sample_count = None + + # Initialize all values properly + self.reset() + + @classmethod + def from_config(cls, config: Dict[str, Any]) -> "AccuracyMeter": + """Instantiates a AccuracyMeter from a configuration. + + Args: + config: A configuration for a AccuracyMeter. + See :func:`__init__` for parameters expected in the config. + + Returns: + A AccuracyMeter instance. + """ + return cls(topk=config["topk"]) + + @property + def name(self): + return "accuracy" + + def sync_state(self): + # Communications + self._curr_correct_predictions_k = all_reduce_sum( + self._curr_correct_predictions_k + ) + self._curr_sample_count = all_reduce_sum(self._curr_sample_count) + + # Store results + self._total_correct_predictions_k += self._curr_correct_predictions_k + self._total_sample_count += self._curr_sample_count + + # Reset values until next sync + self._curr_correct_predictions_k.zero_() + self._curr_sample_count.zero_() + + @property + def value(self): + # Return value based on the local state of meter which + # includes the local sample count since last sync and the total global sample + # count obtained at the last sync + correct_predictions = { + k: self._curr_correct_predictions_k[i] + + self._total_correct_predictions_k[i] + for i, k in enumerate(self._topk) + } + sample_count = self._total_sample_count + self._curr_sample_count + return { + "top_{}".format(k): (correct_predictions[k] / sample_count).item() + if sample_count + else 0.0 + for k in self._topk + } + + def get_classy_state(self): + """Contains the states of the meter. + """ + return { + "name": self.name, + "top_k": self._topk, + "total_correct_predictions": self._total_correct_predictions_k.clone(), + "total_sample_count": self._total_sample_count.clone(), + "curr_sample_count": self._curr_sample_count.clone(), + "curr_correct_predictions_k": self._curr_correct_predictions_k.clone(), + } + + def set_classy_state(self, state): + assert ( + self.name == state["name"] + ), "State name {state_name} does not match meter name {obj_name}".format( + state_name=state["name"], obj_name=self.name + ) + assert ( + self._topk == state["top_k"] + ), "top-k of state {state_k} does not match object's top-k {obj_k}".format( + state_k=state["top_k"], obj_k=self._topk + ) + + # Restore the state -- correct_predictions and sample_count. + self.reset() + self._total_correct_predictions_k = state["total_correct_predictions"].clone() + self._total_sample_count = state["total_sample_count"].clone() + self._curr_correct_predictions_k = state["curr_correct_predictions_k"].clone() + self._curr_sample_count = state["curr_sample_count"].clone() + + def __repr__(self): + return repr({"name": self.name, "value": self.value}) + + def update(self, model_output, target, **kwargs): + """ + args: + model_output: tensor of shape (B, C) where each value is + either logit or class probability. + target: tensor of shape (B). + Note: For binary classification, C=2. + """ + # Due to dummy samples, in some corner cases, the whole batch could + # be dummy samples, in that case we want to not update meters on that + # process + if model_output.shape[0] == 0: + return + _, pred = model_output.topk(max(self._topk), dim=1, largest=True, sorted=True) + + correct_predictions = pred.eq(target.unsqueeze(1).expand_as(pred)) + for i, k in enumerate(self._topk): + self._curr_correct_predictions_k[i] += ( + correct_predictions[:, :k].float().sum().item() + ) + self._curr_sample_count += model_output.shape[0] + + def reset(self): + self._total_correct_predictions_k = torch.zeros(len(self._topk)) + self._total_sample_count = torch.zeros(1) + self._curr_correct_predictions_k = torch.zeros(len(self._topk)) + self._curr_sample_count = torch.zeros(1) + + def validate(self, model_output_shape, target_shape): + assert ( + len(model_output_shape) == 2 + ), "model_output_shape must be (B, C) \ + Found shape {}".format( + model_output_shape + ) + assert ( + len(target_shape) == 1 + ), "target_shape must be (B) \ + Found shape {}".format( + target_shape + ) + assert ( + max(self._topk) < model_output_shape[1] + ), "k in top_k, for \ + accuracy_meter cannot be larger than num_classes: \ + {}".format( + model_output_shape[1] + ) diff --git a/classy_vision/meters/classy_meter.py b/classy_vision/meters/classy_meter.py new file mode 100644 index 0000000000..324a566083 --- /dev/null +++ b/classy_vision/meters/classy_meter.py @@ -0,0 +1,116 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from typing import Any, Dict, Tuple + +import torch + + +class ClassyMeter: + """ + Base class to measure various metrics during training and testing phases. + + This can include meters like Accuracy, Precision and Recall, etc. + """ + + @classmethod + def from_config(cls, config: Dict[str, Any]) -> "ClassyMeter": + """Instantiates a ClassyMeter using a configuration. + + Args: + config: A configuration for a ClassyMeter. + + Returns: + A ClassyMeter instance. + """ + raise NotImplementedError + + @property + def name(self) -> str: + """The name of the meter.""" + raise NotImplementedError + + @property + def value(self) -> Any: + """ + Value of meter based on local state, can be any python object. + + Note: + If there are multiple training processes then this + represents the local state of the meter. If :func:`sync_state` is + implemented, then value will return the global state since the + last sync PLUS any local unsynced updates that have occurred + in the local process. + """ + raise NotImplementedError + + def sync_state(self) -> None: + """ + Syncs state with all other meters in distributed training. + + If not provided by child class this does nothing by default + and meter only provides the local process stats. If + implemented then the meter provides the global stats at last + sync + any local updates since the last sync. + + Warning: + Calls to sync_state could involve communications via + :mod:`torch.distributed` which can result in a loss of performance or + deadlocks if not coordinated among threads. + """ + pass + + def reset(self): + """ + Resets any internal meter state. + + Should normally be called at the end of a phase. + """ + raise NotImplementedError + + def update( + self, model_output: torch.Tensor, target: torch.Tensor, **kwargs + ) -> None: + """ + Updates any internal state of meter. + + Should be called after each batch processing of each phase. + + Args: + model_output: Output of a :class:`ClassyModel`. + target: Target provided by a dataloader from :class:`ClassyDataset`. + """ + raise NotImplementedError + + def validate(self, model_output_shape: Tuple, target_shape: Tuple) -> None: + """ + Validate the meter. + + Checks if the meter can be calculated on the given ``model_output_shape`` + and ``target_shape``. + """ + raise NotImplementedError + + def get_classy_state(self) -> Dict[str, Any]: + """Get the state of the ClassyMeter. + + The returned state is used for checkpointing. + + Returns: + A state dictionary containing the state of the meter. + """ + raise NotImplementedError + + def set_classy_state(self, state: Dict[str, Any]) -> None: + """Set the state of the ClassyMeter. + + Args: + state_dict: The state dictionary. Must be the output of a call to + :func:`get_classy_state`. + + This is used to load the state of the meter from a checkpoint. + """ + raise NotImplementedError diff --git a/classy_vision/meters/precision_meter.py b/classy_vision/meters/precision_meter.py new file mode 100644 index 0000000000..e7b2ca3cf7 --- /dev/null +++ b/classy_vision/meters/precision_meter.py @@ -0,0 +1,210 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from typing import Any, Dict + +import torch +from classy_vision.generic.distributed_util import all_reduce_sum +from classy_vision.generic.util import convert_to_one_hot, is_pos_int +from classy_vision.meters import ClassyMeter + +from . import register_meter + + +@register_meter("precision_at_k") +class PrecisionAtKMeter(ClassyMeter): + """ + Meter to calculate top-k precision for single-label or multi-label + image classification task. Note, ties are resolved randomly. + """ + + def __init__(self, topk, target_is_one_hot=True, num_classes=-1): + """ + args: + topk: list of int `k` values. + target_is_one_hot: boolean, if class labels are one-hot encoded. + num_classes: int, number of classes. + """ + assert isinstance(topk, list), "topk must be a list" + assert len(topk) > 0, "topk list should have at least one element" + assert [is_pos_int(x) for x in topk], "each value in topk must be >= 1" + if not target_is_one_hot: + assert ( + type(num_classes) == int and num_classes > 0 + ), "num_classes must be positive integer" + + self._topk = topk + self._target_is_one_hot = target_is_one_hot + self._num_classes = num_classes + + # _total_* variables store running, in-sync totals for the + # metrics. These should not be communicated / summed. + self._total_correct_predictions_k = None + self._total_sample_count = None + + # _curr_* variables store counts since the last sync. Only + # these should be summed across workers and they are reset + # after each communication + self._curr_correct_predictions_k = None + self._curr_sample_count = None + + # Initialize all values properly + self.reset() + + @classmethod + def from_config(cls, config: Dict[str, Any]) -> "PrecisionAtKMeter": + """Instantiates a PrecisionAtKMeter from a configuration. + + Args: + config: A configuration for a PrecisionAtKMeter. + See :func:`__init__` for parameters expected in the config. + + Returns: + A PrecisionAtKMeter instance. + """ + return cls( + topk=config["topk"], + target_is_one_hot=config.get("target_is_one_hot", True), + num_classes=config.get("num_classes", -1), + ) + + @property + def name(self): + return "precision_at_k" + + def sync_state(self): + # Communications + self._curr_correct_predictions_k = all_reduce_sum( + self._curr_correct_predictions_k + ) + self._curr_sample_count = all_reduce_sum(self._curr_sample_count) + + # Store results + self._total_correct_predictions_k += self._curr_correct_predictions_k + self._total_sample_count += self._curr_sample_count + + # Reset values until next sync + self._curr_correct_predictions_k.zero_() + self._curr_sample_count.zero_() + + @property + def value(self): + # Return value based on the local state of meter which + # includes the local sample count since last sync and the total global sample + # count obtained at the last sync + correct_predictions = { + k: self._curr_correct_predictions_k[i] + + self._total_correct_predictions_k[i] + for i, k in enumerate(self._topk) + } + sample_count = self._total_sample_count + self._curr_sample_count + return { + "top_{}".format(k): (correct_predictions[k] / (k * sample_count)).item() + if sample_count + else 0.0 + for k in self._topk + } + + def get_classy_state(self): + """Contains the states of the meter. + """ + return { + "name": self.name, + "top_k": self._topk, + "total_correct_predictions": self._total_correct_predictions_k.clone(), + "total_sample_count": self._total_sample_count.clone(), + "curr_sample_count": self._curr_sample_count.clone(), + "curr_correct_predictions_k": self._curr_correct_predictions_k.clone(), + } + + def set_classy_state(self, state): + assert ( + self.name == state["name"] + ), "State name {state_name} does not match meter name {obj_name}".format( + state_name=state["name"], obj_name=self.name + ) + assert ( + self._topk == state["top_k"] + ), "top-k of state {state_k} does not match object's top-k {obj_k}".format( + state_k=state["top_k"], obj_k=self._topk + ) + + # Restore the state -- correct_predictions and sample_count. + self.reset() + self._total_correct_predictions_k = state["total_correct_predictions"].clone() + self._total_sample_count = state["total_sample_count"].clone() + self._curr_correct_predictions_k = state["curr_correct_predictions_k"].clone() + self._curr_sample_count = state["curr_sample_count"].clone() + + def __repr__(self): + return repr({"name": self.name, "value": self.value}) + + def update(self, model_output, target, **kwargs): + """ + args: + model_output: tensor of shape (B, C) where each value is + either logit or class probability. + target: tensor of shape (B, C), one-hot encoded + or integer encoded. + Note: For binary classification, C=2. + For integer encoded target, C=1. + """ + + if self._target_is_one_hot is False: + assert target.shape[1] == 1, "Integer encoded target must be single labeled" + target = convert_to_one_hot(target, self._num_classes) + + assert ( + torch.min(target.eq(0) + target.eq(1)) == 1 + ), "Target must be one-hot encoded vector" + + # Due to dummy samples, in some corner cases, the whole batch could + # be dummy samples, in that case we want to not update meters on that + # process + if model_output.shape[0] == 0: + return + _, pred_classes = model_output.topk( + max(self._topk), dim=1, largest=True, sorted=True + ) + pred_mask_tensor = torch.zeros(target.size()) + for i, k in enumerate(self._topk): + pred_mask_tensor.zero_() + self._curr_correct_predictions_k[i] += torch.sum( + # torch.min is used to simulate AND between binary + # tensors. If tensors are not binary, this will fail. + torch.min( + pred_mask_tensor.scatter_(1, pred_classes[:, :k], 1.0), + target.float(), + ) + ).item() + self._curr_sample_count += model_output.shape[0] + + def reset(self): + self._total_correct_predictions_k = torch.zeros(len(self._topk)) + self._total_sample_count = torch.zeros(1) + self._curr_correct_predictions_k = torch.zeros(len(self._topk)) + self._curr_sample_count = torch.zeros(1) + + def validate(self, model_output_shape, target_shape): + assert ( + len(model_output_shape) == 2 + ), "model_output_shape must be (B, C) \ + Found shape {}".format( + model_output_shape + ) + assert ( + len(target_shape) == 2 + ), "target_shape must be (B, C) \ + Found shape {}".format( + target_shape + ) + assert ( + max(self._topk) < model_output_shape[1] + ), "k in top_k, for \ + precision_meter cannot be larger than num_classes: \ + {}".format( + model_output_shape[1] + ) diff --git a/classy_vision/meters/recall_meter.py b/classy_vision/meters/recall_meter.py new file mode 100644 index 0000000000..35ebc82559 --- /dev/null +++ b/classy_vision/meters/recall_meter.py @@ -0,0 +1,210 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from typing import Any, Dict + +import torch +from classy_vision.generic.distributed_util import all_reduce_sum +from classy_vision.generic.util import convert_to_one_hot, is_pos_int +from classy_vision.meters import ClassyMeter + +from . import register_meter + + +@register_meter("recall_at_k") +class RecallAtKMeter(ClassyMeter): + """Meter to calculate top-k recall for single-label or multi-label + image classification task. + """ + + def __init__(self, topk, target_is_one_hot=True, num_classes=None): + """ + args: + topk: list of int `k` values. + target_is_one_hot: boolean, if class labels are one-hot encoded. + num_classes: int, number of classes. + """ + assert isinstance(topk, list), "topk must be a list" + assert len(topk) > 0, "topk list should have at least one element" + assert [is_pos_int(x) for x in topk], "each value in topk must be >= 1" + if not target_is_one_hot: + assert ( + type(num_classes) == int and num_classes > 0 + ), "num_classes must be positive integer" + + self._topk = topk + self._target_is_one_hot = target_is_one_hot + self._num_classes = num_classes + + # _total_* variables store running, in-sync totals for the + # metrics. These should not be communicated / summed. + self._total_correct_predictions_k = None + self._total_correct_targets = None + + # _curr_* variables store counts since the last sync. Only + # these should be summed across workers and they are reset + # after each communication + self._curr_correct_predictions_k = None + self._curr_correct_targets = None + + # Initialize all values properly + self.reset() + + @classmethod + def from_config(cls, config: Dict[str, Any]) -> "RecallAtKMeter": + """Instantiates a RecallAtKMeter from a configuration. + + Args: + config: A configuration for a RecallAtKMeter. + See :func:`__init__` for parameters expected in the config. + + Returns: + A RecallAtKMeter instance. + """ + return cls( + topk=config["topk"], + target_is_one_hot=config.get("target_is_one_hot", True), + num_classes=config.get("num_classes", None), + ) + + @property + def name(self): + return "recall_at_k" + + def sync_state(self): + # Communications + self._curr_correct_predictions_k = all_reduce_sum( + self._curr_correct_predictions_k + ) + self._curr_correct_targets = all_reduce_sum(self._curr_correct_targets) + + # Store results + self._total_correct_predictions_k += self._curr_correct_predictions_k + self._total_correct_targets += self._curr_correct_targets + + # Reset values until next sync + self._curr_correct_predictions_k.zero_() + self._curr_correct_targets.zero_() + + @property + def value(self): + # Return value based on the local state of meter which + # includes the local sample count since last sync and the total global sample + # count obtained at the last sync + correct_predictions = { + k: self._curr_correct_predictions_k[i] + + self._total_correct_predictions_k[i] + for i, k in enumerate(self._topk) + } + correct_targets = self._total_correct_targets + self._curr_correct_targets + return { + "top_{}".format(k): ((correct_predictions[k] / correct_targets).item()) + if correct_targets + else 0.0 + for k in self._topk + } + + def get_classy_state(self): + """Contains the states of the meter. + """ + return { + "name": self.name, + "top_k": self._topk, + "total_correct_predictions": self._total_correct_predictions_k.clone(), + "total_correct_targets": self._total_correct_targets.clone(), + "curr_correct_targets": self._curr_correct_targets.clone(), + "curr_correct_predictions_k": self._curr_correct_predictions_k.clone(), + } + + def set_classy_state(self, state): + assert ( + self.name == state["name"] + ), "State name {state_name} does not match meter name {obj_name}".format( + state_name=state["name"], obj_name=self.name + ) + assert ( + self._topk == state["top_k"] + ), "top-k of state {state_k} does not match object's top-k {obj_k}".format( + state_k=state["top_k"], obj_k=self._topk + ) + + # Restore the state -- correct_predictions and correct_targets. + self.reset() + self._total_correct_predictions_k = state["total_correct_predictions"].clone() + self._total_correct_targets = state["total_correct_targets"].clone() + self._curr_correct_predictions_k = state["curr_correct_predictions_k"].clone() + self._curr_correct_targets = state["curr_correct_targets"].clone() + + def __repr__(self): + return repr({"name": self.name, "value": self.value}) + + def update(self, model_output, target, **kwargs): + """ + args: + model_output: tensor of shape (B, C) where each value is + either logit or class probability. + target: tensor of shape (B, C), one-hot encoded + or integer encoded. + + Note: + + For binary classification, C=2. For integer encoded target, C=1. + """ + + if self._target_is_one_hot is False: + assert target.shape[1] == 1, "Integer encoded target must be single labeled" + target = convert_to_one_hot(target, self._num_classes) + + assert ( + torch.min(target.eq(0) + target.eq(1)) == 1 + ), "Target must be one-hot encoded vector" + # Due to dummy samples, in some corner cases, the whole batch could + # be dummy samples, in that case we want to not update meters on that + # process + if model_output.shape[0] == 0: + return + _, pred_classes = model_output.topk( + max(self._topk), dim=1, largest=True, sorted=True + ) + pred_mask_tensor = torch.zeros(target.size()) + for i, k in enumerate(self._topk): + pred_mask_tensor.zero_() + self._curr_correct_predictions_k[i] += torch.sum( + # torch.min is used to simulate AND between binary + # tensors. If tensors are not binary, this will fail. + torch.min( + pred_mask_tensor.scatter_(1, pred_classes[:, :k], 1.0), + target.float(), + ) + ).item() + self._curr_correct_targets += target.sum().item() + + def reset(self): + self._total_correct_predictions_k = torch.zeros(len(self._topk)) + self._total_correct_targets = torch.zeros(1) + self._curr_correct_predictions_k = torch.zeros(len(self._topk)) + self._curr_correct_targets = torch.zeros(1) + + def validate(self, model_output_shape, target_shape): + assert ( + len(model_output_shape) == 2 + ), "model_output_shape must be (B, C) \ + Found shape {}".format( + model_output_shape + ) + assert ( + len(target_shape) == 2 + ), "target_shape must be (B, C) \ + Found shape {}".format( + target_shape + ) + assert ( + max(self._topk) < model_output_shape[1] + ), "k in top_k, for \ + recall_meter cannot be larger than num_classes: \ + {}".format( + model_output_shape[1] + ) diff --git a/classy_vision/meters/video_accuracy_meter.py b/classy_vision/meters/video_accuracy_meter.py new file mode 100644 index 0000000000..4852e32897 --- /dev/null +++ b/classy_vision/meters/video_accuracy_meter.py @@ -0,0 +1,138 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from typing import Any, Dict + +import torch +from classy_vision.generic.util import is_pos_int +from classy_vision.meters import ClassyMeter +from classy_vision.meters.accuracy_meter import AccuracyMeter + +from . import register_meter + + +@register_meter("video_accuracy") +class VideoAccuracyMeter(ClassyMeter): + """Meter to calculate top-k video-level accuracy for single label + video classification task. Video-level accuarcy is computed by averaging + clip-level predictions and compare the reslt with video-level groundtruth + label. + """ + + def __init__(self, topk, clips_per_video_train, clips_per_video_test): + """ + args: + topk: list of int `k` values. + clips_per_video_train: No. of clips sampled per video at train time + clips_per_video_test: No. of clips sampled per video at test time + """ + assert isinstance(topk, list), "topk must be a list" + assert len(topk) > 0, "topk list should have at least one element" + assert [is_pos_int(x) for x in topk], "each value in topk must be >= 1" + + self._clips_per_video_train = clips_per_video_train + self._clips_per_video_test = clips_per_video_test + self._accuracy_meter = AccuracyMeter(topk) + + @classmethod + def from_config(cls, config: Dict[str, Any]) -> "VideoAccuracyMeter": + """Instantiates a VideoAccuracyMeter from a configuration. + + Args: + config: A configuration for a VideoAccuracyMeter. + See :func:`__init__` for parameters expected in the config. + + Returns: + A VideoAccuracyMeter instance. + """ + return cls( + topk=config["topk"], + clips_per_video_train=config.get("clips_per_video_train", 1), + clips_per_video_test=config["clips_per_video_test"], + ) + + @property + def name(self): + return "video_accuracy" + + @property + def value(self): + return self._accuracy_meter.value + + def sync_state(self): + self._accuracy_meter.sync_state() + + def get_classy_state(self): + """Contains the states of the meter. + """ + state = {} + state["accuracy_state"] = self._accuracy_meter.get_classy_state() + state["name"] = "video_accuracy" + state["clips_per_video_train"] = self._clips_per_video_train + state["clips_per_video_test"] = self._clips_per_video_test + return state + + def set_classy_state(self, state): + assert ( + "video_accuracy" == state["name"] + ), "State name {state_name} does not match meter name {obj_name}".format( + state_name=state["name"], obj_name=self.name + ) + assert ( + self._clips_per_video_train == state["clips_per_video_train"] + ), "incompatible clips_per_video_train for video accuracy" + assert ( + self._clips_per_video_test == state["clips_per_video_test"] + ), "incompatible clips_per_video_test for video accuracy" + # Restore the state -- correct_predictions and sample_count. + self.reset() + self._accuracy_meter.set_classy_state(state["accuracy_state"]) + + def __repr__(self): + return repr({"name": self.name, "value": self._accuracy_meter.value}) + + def update(self, model_output, target, is_train, **kwargs): + """ + args: + model_output: tensor of shape (B * clips_per_video, C) where each value is + either logit or class probability. + target: tensor of shape (B * clips_per_video). + is_train if True, it is training stage when meter is updated + + Note: For binary classification, C=2. + """ + num_clips = len(model_output) + if num_clips == 0: + # It is possible that a minibatch entirely contains dummy samples + # when dataset is sharded. In such case, the effective target and output + # can be empty, and we immediately return + return + + clips_per_video = ( + self._clips_per_video_train if is_train else self._clips_per_video_test + ) + assert num_clips % clips_per_video == 0, ( + "For video model testing, batch size must be a multplier of No. of " + "clips per video" + ) + num_videos = num_clips // clips_per_video + for i in range(num_videos): + clip_labels = target[i * clips_per_video : (i + 1) * clips_per_video] + assert ( + len(torch.unique(clip_labels)) == 1 + ), "all clips from the same video should have same label" + + video_target = target[::clips_per_video] + video_model_output = torch.mean( + torch.reshape(model_output, (num_videos, clips_per_video, -1)), 1 + ) + self._accuracy_meter.update(video_model_output, video_target) + + def reset(self): + self._accuracy_meter.reset() + + def validate(self, model_output_shape, target_shape): + self._accuracy_meter.validate(model_output_shape, target_shape) diff --git a/classy_vision/models/__init__.py b/classy_vision/models/__init__.py new file mode 100644 index 0000000000..085100b248 --- /dev/null +++ b/classy_vision/models/__init__.py @@ -0,0 +1,108 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import copy +from collections import defaultdict +from pathlib import Path + +from classy_vision.generic.registry_utils import import_all_modules +from classy_vision.heads import build_head + +from .classy_model import ClassyModel + + +FILE_ROOT = Path(__file__).parent + + +MODEL_REGISTRY = {} +MODEL_CLASS_NAMES = set() + + +def register_model(name): + """Registers a ClassyModel subclass. + + This decorator allows Classy Vision to instantiate a subclass of + ClassyModel from a configuration file, even if the class itself is not + part of the Classy Vision framework. To use it, apply this decorator to a + ClassyModel subclass, like this: + + @register_model('resnet') + class ResidualNet(ClassyModel): + ... + + To instantiate a model from a configuration file, see + :func:`build_model`.""" + + def register_model_cls(cls): + if name in MODEL_REGISTRY: + raise ValueError("Cannot register duplicate model ({})".format(name)) + if not issubclass(cls, ClassyModel): + raise ValueError( + "Model ({}: {}) must extend ClassyModel".format(name, cls.__name__) + ) + if cls.__name__ in MODEL_CLASS_NAMES: + raise ValueError( + "Cannot register model with duplicate class name ({})".format( + cls.__name__ + ) + ) + MODEL_REGISTRY[name] = cls + MODEL_CLASS_NAMES.add(cls.__name__) + return cls + + return register_model_cls + + +def build_model(config): + """Builds a ClassyModel from a config. + + This assumes a 'name' key in the config which is used to determine what + model class to instantiate. For instance, a config `{"name": "my_model", + "foo": "bar"}` will find a class that was registered as "my_model" + (see :func:`register_model`) and call .from_config on it.""" + + assert config["name"] in MODEL_REGISTRY, "unknown model" + model = MODEL_REGISTRY[config["name"]].from_config(config) + if "heads" in config: + heads = defaultdict(dict) + for head_config in config["heads"]: + assert "fork_block" in head_config, "Expect fork_block in config" + fork_block = head_config["fork_block"] + updated_config = copy.deepcopy(head_config) + del updated_config["fork_block"] + + head = build_head(updated_config) + heads[fork_block][head.unique_id] = head + model.set_heads(heads) + return model + + +# automatically import any Python files in the models/ directory +import_all_modules(FILE_ROOT, "classy_vision.models") + +from .classy_block import ClassyBlock # isort:skip +from .classy_model import ClassyModelEvaluationMode # isort:skip +from .classy_model_wrapper import ClassyModelWrapper # isort:skip +from .densenet import DenseNet # isort:skip +from .mlp import MLP # isort:skip +from .resnet import ResNet # isort:skip +from .resnext import ResNeXt # isort:skip +from .resnext3d import ResNeXt3D # isort:skip + + +__all__ = [ + "build_model", + "register_model", + "ClassyBlock", + "ClassyModel", + "ClassyModelEvaluationMode", + "ClassyModelWrapper", + "DenseNet", + "MLP", + "ResNet", + "ResNeXt", + "ResNeXt3D", +] diff --git a/classy_vision/models/classy_block.py b/classy_vision/models/classy_block.py new file mode 100644 index 0000000000..f353728e86 --- /dev/null +++ b/classy_vision/models/classy_block.py @@ -0,0 +1,34 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import torch +import torch.nn as nn + + +class ClassyBlock(nn.Module): + """ + This is a thin wrapper for head execution, which records the output of + wrapped module for executing the heads forked from this module. + """ + + def __init__(self, name, module): + super().__init__() + self.name = name + self.output = torch.zeros(0) + self._module = module + self._should_cache_output = False + + def set_cache_output(self, should_cache_output: bool = True): + """ + Whether to cache the output of wrapped module for head execution. + """ + self._should_cache_output = should_cache_output + + def forward(self, input): + output = self._module(input) + if self._should_cache_output: + self.output = output + return output diff --git a/classy_vision/models/classy_model.py b/classy_vision/models/classy_model.py new file mode 100644 index 0000000000..61824e6504 --- /dev/null +++ b/classy_vision/models/classy_model.py @@ -0,0 +1,304 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import copy +from enum import Enum +from typing import Any, Dict + +import torch +import torch.nn as nn +from classy_vision.heads.classy_head import ClassyHead + +from .classy_block import ClassyBlock + + +class ClassyModelEvaluationMode(Enum): + DEFAULT = 0 + VIDEO_CLIP_AVERAGING = 1 + + +class ClassyModel(nn.Module): + """Base class for models in classy vision. + + A model refers either to a specific architecture (e.g. ResNet50) or a + family of architectures (e.g. ResNet). Models can take arguments in the + constructor in order to configure different behavior (e.g. + hyperparameters). Classy Models must implement :method:`from_config` in + order to allow instantiation from a configuration file. Like regular + PyTorch models, Classy Models must also implement :method:`forward`, where + the bulk of the inference logic lives. + + Classy Models also have some advanced functionality for production + fine-tuning systems. For example, we allow users to train a trunk + model and then attach heads to the model via the attachable + blocks. Making your model support the trunk-heads paradigm is + completely optional. + + """ + + def __init__(self): + """Constructor for ClassyModel.""" + super().__init__() + + self._attachable_blocks = {} + self._heads = nn.ModuleDict() + self._head_outputs = {} + + @classmethod + def from_config(cls, config: Dict[str, Any]) -> "ClassyModel": + """Instantiates a ClassyModel from a configuration. + + Args: + config: A configuration for the ClassyModel. + + Returns: + A ClassyModel instance. + """ + raise NotImplementedError + + @classmethod + def from_checkpoint(cls, checkpoint): + from . import build_model + + model = build_model(checkpoint["input_args"]["config"]["model"]) + model.set_classy_state(checkpoint["classy_state_dict"]["base_model"]) + return model + + def get_classy_state(self, deep_copy=False): + """Get the state of the ClassyModel. + + The returned state is used for checkpointing. + + Args: + deep_copy: If True, creates a deep copy of the state Dict. Otherwise, the + returned Dict's state will be tied to the object's. + + Returns: + A state dictionary containing the state of the model. + """ + # If the model doesn't have head for fine-tuning, all of model's state + # live in the trunk + attached_heads = self.get_heads() + # clear heads to get trunk only states. There shouldn't be any component + # states depend on heads + self._clear_heads() + trunk_state_dict = super().state_dict() + self.set_heads(attached_heads) + + head_state_dict = {} + for block, heads in attached_heads.items(): + head_state_dict[block] = { + head_name: head.state_dict() for head_name, head in heads.items() + } + model_state_dict = { + "model": {"trunk": trunk_state_dict, "heads": head_state_dict} + } + if deep_copy: + model_state_dict = copy.deepcopy(model_state_dict) + return model_state_dict + + def load_head_states(self, state): + """Load only the state (weights) of the heads. + + For a trunk-heads model, this function allows the user to + only update the head state of the model. Useful for attaching + fine-tuned heads to a pre-trained trunk. + + Args: + state (Dict): Contains the classy model state under key "model" + + """ + for block_name, head_states in state["model"]["heads"].items(): + for head_name, head_state in head_states.items(): + self._heads[block_name][head_name].load_state_dict(head_state) + + def set_classy_state(self, state): + """Set the state of the ClassyModel. + + Args: + state_dict: The state dictionary. Must be the output of a call to + :method:`get_classy_state`. + + This is used to load the state of the model from a checkpoint. + """ + self.load_head_states(state) + + current_state = self.state_dict() + current_state.update(state["model"]["trunk"]) + super().load_state_dict(current_state) + + def forward(self, x): + """ + Perform computation of blocks in the order define in get_blocks. + """ + raise NotImplementedError + + def extract_features(self, x): + """ + Extract features from the model. + + Derived classes can implement this method to extract the features before + applying the final fc layer. + """ + return self.forward(x) + + def build_attachable_block(self, name, module): + """ + Add a wrapper to the module to allow to attach heads to the module. + """ + if name in self._attachable_blocks: + raise ValueError("Found duplicated block name {}".format(name)) + block = ClassyBlock(name, module) + self._attachable_blocks[name] = block + return block + + @property + def attachable_block_names(self): + """ + Return names of all attachable blocks. + """ + return self._attachable_blocks.keys() + + def _clear_heads(self): + # clear all existing heads + self._heads.clear() + self._head_outputs.clear() + + def set_heads(self, heads: Dict[str, Dict[str, ClassyHead]]): + """Attach all the heads to corresponding blocks. + + A head is expected to be a ClassyHead object. For more + details, see :class:`ClassyHead`. + + Args: + heads (Dict): a mapping between attachable block name + and a dictionary of heads attached to that block. For + example, if you have two different teams that want to + attach two different heads for downstream classifiers to + the 15th block, then they would use: + heads = {"block15": + {"team1": classifier_head1, "team2": classifier_head2} + } + """ + self._clear_heads() + + head_ids = set() + for block_name, block_heads in heads.items(): + if block_name not in self._attachable_blocks: + raise ValueError( + "block {} does not exist or can not be attached".format(block_name) + ) + self._attachable_blocks[block_name].set_cache_output() + for head in block_heads.values(): + if head.unique_id in head_ids: + raise ValueError("head id {} already exists".format(head.unique_id)) + head_ids.add(head.unique_id) + self._heads[block_name] = nn.ModuleDict(block_heads) + + def get_heads(self): + """Returns the heads on the model + + Function returns the heads a dictionary of block names to + nn.modules attached to that block. + + """ + return {block_name: dict(heads) for block_name, heads in self._heads.items()} + + @property + def head_outputs(self): + """Return outputs of all heads in the format of Dict[head_id, output] + + Head outputs are cached during a forward pass. + """ + return self._head_outputs.copy() + + def get_block_outputs(self) -> Dict[str, torch.Tensor]: + outputs = {} + for name, block in self._attachable_blocks.items(): + outputs[name] = block.output + return outputs + + def execute_heads(self) -> Dict[str, torch.Tensor]: + block_outs = self.get_block_outputs() + outputs = {} + for block_name, heads in self._heads.items(): + for head in heads.values(): + outputs[head.unique_id] = head(block_outs[block_name]) + self._head_outputs = outputs + return outputs + + def get_optimizer_params(self, bn_weight_decay=False): + """Returns param groups for optimizer. + + Function to return dict of params with "keys" from + {"regularized_params", "unregularized_params"} + to "values" a list of torch Params. + + "weight_decay" provided as part of optimizer is only used + for "regularized_params". For "unregularized_params", weight_decay is set + to 0.0 + + This implementation sets BatchNorm's all trainable params to be + unregularized_params if bn_weight_decay is False. + + Override this function for any custom behavior. + + Args: + bn_weight_decay (bool): Apply weight decay to bn params if true + """ + unregularized_params = [] + regularized_params = [] + for module in self.modules(): + # If module has children (i.e. internal node of constructed DAG) then + # only add direct parameters() to the list of params, else go over + # children node to find if they are BatchNorm or have "bias". + if list(module.children()) != []: + for params in module.parameters(recurse=False): + if params.requires_grad: + regularized_params.append(params) + elif not bn_weight_decay and isinstance( + module, + (nn.BatchNorm1d, nn.BatchNorm2d, nn.BatchNorm3d, nn.SyncBatchNorm), + ): + for params in module.parameters(): + if params.requires_grad: + unregularized_params.append(params) + else: + for params in module.parameters(): + if params.requires_grad: + regularized_params.append(params) + return { + "regularized_params": regularized_params, + "unregularized_params": unregularized_params, + } + + @property + def input_shape(self): + """If implemented, returns expected input tensor shape + """ + raise NotImplementedError + + @property + def output_shape(self): + """If implemented, returns expected output tensor shape + """ + raise NotImplementedError + + @property + def model_depth(self): + """If implemented, returns number of layers in model + """ + raise NotImplementedError + + @property + def evaluation_mode(self): + """Used by video models for averaging over contiguous clips. + + TODO: Remove this once we have a video task, this logic should + live in a video specific task + + """ + return ClassyModelEvaluationMode.DEFAULT diff --git a/classy_vision/models/classy_model_wrapper.py b/classy_vision/models/classy_model_wrapper.py new file mode 100644 index 0000000000..3012a4bea3 --- /dev/null +++ b/classy_vision/models/classy_model_wrapper.py @@ -0,0 +1,59 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from typing import Any, Dict, Optional, Tuple + +import torch.nn as nn + +from .classy_model import ClassyModel + + +class ClassyModelWrapper(ClassyModel): + """ + Class which wraps an nn.Module within a ClassyModel. + + The only required argument is the model, the additional args are needed + to get some additional capabilities from Classy Vision to work. + """ + + def __init__( + self, + model: nn.Module, + input_shape: Optional[Tuple] = None, + output_shape: Optional[Tuple] = None, + model_depth: Optional[int] = None, + ): + super().__init__() + self.model = model + self._input_shape = input_shape + self._output_shape = output_shape + self._model_depth = model_depth + + def forward(self, x): + return self.model(x) + + def extract_features(self, x): + if hasattr(self.model, "extract_features"): + return self.model.extract_features(x) + return super().extract_features(x) + + @property + def input_shape(self): + if self._input_shape is not None: + return self._input_shape + return super().input_shape + + @property + def output_shape(self): + if self._output_shape is not None: + return self._output_shape + return super().output_shape + + @property + def model_depth(self): + if self._model_depth is not None: + return self._model_depth + return super().model_depth diff --git a/classy_vision/models/densenet.py b/classy_vision/models/densenet.py new file mode 100644 index 0000000000..0efdeea622 --- /dev/null +++ b/classy_vision/models/densenet.py @@ -0,0 +1,263 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +# TODO: Some batch-normalization operations are superfluous and can be removed. + +# dependencies: +import math +from typing import Any, Dict + +import torch +import torch.nn as nn +from classy_vision.generic.util import is_pos_int + +from . import register_model +from .classy_model import ClassyModel + + +# global setting for in-place ReLU: +INPLACE = True + + +class _DenseLayer(nn.Sequential): + """ + Single layer of a DenseNet. + """ + + def __init__(self, in_planes, growth_rate=32, expansion=4): + + # assertions: + assert is_pos_int(in_planes) + assert is_pos_int(growth_rate) + assert is_pos_int(expansion) + + # add all layers to layer + super(_DenseLayer, self).__init__() + intermediate = expansion * growth_rate + self.add_module("norm-1", nn.BatchNorm2d(in_planes)) + self.add_module("relu-1", nn.ReLU(inplace=INPLACE)) + self.add_module( + "conv-1", + nn.Conv2d(in_planes, intermediate, kernel_size=1, stride=1, bias=False), + ) + self.add_module("norm-2", nn.BatchNorm2d(intermediate)) + self.add_module("relu-2", nn.ReLU(inplace=INPLACE)) + self.add_module( + "conv-2", + nn.Conv2d( + intermediate, + growth_rate, + kernel_size=3, + stride=1, + padding=1, + bias=False, + ), + ) + + def forward(self, x): + new_features = super(_DenseLayer, self).forward(x) + return torch.cat([x, new_features], 1) + + +class _DenseBlock(nn.Sequential): + """ + Block of densely connected layers at same resolution. + """ + + def __init__(self, num_layers, in_planes, growth_rate=32, expansion=4): + + # assertions: + assert is_pos_int(in_planes) + assert is_pos_int(growth_rate) + assert is_pos_int(expansion) + + # create block of dense layers at same resolution: + super(_DenseBlock, self).__init__() + for idx in range(num_layers): + layer = _DenseLayer( + in_planes + idx * growth_rate, + growth_rate=growth_rate, + expansion=expansion, + ) + self.add_module("denselayer-%d" % (idx + 1), layer) + + +class _Transition(nn.Sequential): + """ + Transition layer to reduce spatial resolution. + """ + + def __init__(self, in_planes, out_planes, reduction=2): + + # assertions: + assert is_pos_int(in_planes) + assert is_pos_int(out_planes) + assert is_pos_int(reduction) + + # create layers for pooling: + super(_Transition, self).__init__() + self.add_module("pool-norm", nn.BatchNorm2d(in_planes)) + self.add_module("pool-relu", nn.ReLU(inplace=INPLACE)) + self.add_module( + "pool-conv", + nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=1, bias=False), + ) + self.add_module( + "pool-pool", nn.AvgPool2d(kernel_size=reduction, stride=reduction) + ) + + +@register_model("densenet") +class DenseNet(ClassyModel): + def __init__( + self, + num_blocks, + num_classes, + init_planes, + growth_rate, + expansion, + small_input, + final_bn_relu, + ): + """ + Implementation of a standard densely connected network (DenseNet). + + Set `small_input` to `True` for 32x32 sized image inputs. + + Set `final_bn_relu` to `False` to exclude the final batchnorm and ReLU + layers. These settings are useful when + training Siamese networks. + """ + super().__init__() + + # assertions: + assert type(num_blocks) == list + assert all(is_pos_int(b) for b in num_blocks) + assert num_classes is None or is_pos_int(num_classes) + assert is_pos_int(init_planes) + assert is_pos_int(growth_rate) + assert is_pos_int(expansion) + assert type(small_input) == bool + + # initial convolutional block: + self._num_classes = num_classes + self.num_blocks = num_blocks + self.small_input = small_input + if self.small_input: + self.initial_block = nn.Sequential( + nn.Conv2d( + 3, init_planes, kernel_size=3, stride=1, padding=1, bias=False + ), + nn.BatchNorm2d(init_planes), + nn.ReLU(inplace=INPLACE), + ) + else: + self.initial_block = nn.Sequential( + nn.Conv2d( + 3, init_planes, kernel_size=7, stride=2, padding=3, bias=False + ), + nn.BatchNorm2d(init_planes), + nn.ReLU(inplace=INPLACE), + nn.MaxPool2d(kernel_size=3, stride=2, padding=1), + ) + # loop over spatial resolutions: + num_planes = init_planes + self.features = nn.Sequential() + for idx, num_layers in enumerate(num_blocks): + + # add dense block: + block = _DenseBlock( + num_layers, num_planes, growth_rate=growth_rate, expansion=expansion + ) + self.features.add_module("denseblock-%d" % (idx + 1), block) + num_planes = num_planes + num_layers * growth_rate + + # add transition layer: + if idx != len(num_blocks) - 1: + trans = _Transition(num_planes, num_planes // 2) + self.features.add_module("transition-%d" % (idx + 1), trans) + num_planes = num_planes // 2 + + # final batch normalization: + if final_bn_relu: + self.features.add_module("norm-final", nn.BatchNorm2d(num_planes)) + self.features.add_module("relu-final", nn.ReLU(inplace=INPLACE)) + + # final classifier: + self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) + self.fc = None if num_classes is None else nn.Linear(num_planes, num_classes) + self.num_planes = num_planes + + # initialize weights of convolutional and batchnorm layers: + for m in self.modules(): + if isinstance(m, nn.Conv2d): + n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels + m.weight.data.normal_(0, math.sqrt(2.0 / n)) + elif isinstance(m, nn.BatchNorm2d): + m.weight.data.fill_(1) + m.bias.data.zero_() + elif isinstance(m, nn.Linear): + m.bias.data.zero_() + + @classmethod + def from_config(cls, config: Dict[str, Any]) -> "DenseNet": + """Instantiates a DenseNet from a configuration. + + Args: + config: A configuration for a DenseNet. + See :func:`__init__` for parameters expected in the config. + + Returns: + A DenseNet instance. + """ + assert "num_blocks" in config + config = { + "num_blocks": config["num_blocks"], + "num_classes": config.get("num_classes"), + "init_planes": config.get("init_planes", 64), + "growth_rate": config.get("growth_rate", 32), + "expansion": config.get("expansion", 4), + "small_input": config.get("small_input", False), + "final_bn_relu": config.get("final_bn_relu", True), + } + return cls(**config) + + # forward pass in DenseNet: + def forward(self, x): + + # initial convolutional block: + out = self.initial_block(x) + + # evaluate all dense blocks: + out = self.features(out) + + # perform average pooling: + out = self.avgpool(out) + + # final classifier: + out = out.view(out.size(0), -1) + if self.fc is not None: + out = self.fc(out) + return out + + def get_optimizer_params(self): + # use weight decay on BatchNorm for DenseNets + return super().get_optimizer_params(bn_weight_decay=True) + + @property + def input_shape(self): + if self.small_input: + return (3, 32, 32) + else: + return (3, 224, 224) + + @property + def output_shape(self): + return (1, self._num_classes) + + @property + def model_depth(self): + return sum(self.num_blocks) diff --git a/classy_vision/models/mlp.py b/classy_vision/models/mlp.py new file mode 100644 index 0000000000..56ecca5945 --- /dev/null +++ b/classy_vision/models/mlp.py @@ -0,0 +1,99 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +"""MLP model.""" + +from typing import Any, Dict + +import torch.nn as nn + +from . import register_model +from .classy_model import ClassyModel + + +@register_model("mlp") +class MLP(ClassyModel): + """MLP model using ReLU. Useful for testing on CPUs.""" + + def __init__( + self, + input_dim, + output_dim, + hidden_dims, + dropout, + first_dropout, + use_batchnorm, + first_batchnorm, + ): + super().__init__() + + layers = [] + # If first_batchnorm is set, must be using batchnorm + assert not first_batchnorm or use_batchnorm + + self._num_inputs = input_dim + self._num_classes = output_dim + self._model_depth = len(hidden_dims) + 1 + + if dropout > 0 and first_dropout: + layers.append(nn.Dropout(p=dropout)) + + if use_batchnorm and first_batchnorm: + layers.append(nn.BatchNorm1d(input_dim)) + + for dim in hidden_dims: + layers.append(nn.Linear(input_dim, dim)) + if use_batchnorm: + layers.append(nn.BatchNorm1d(dim)) + if dropout > 0: + layers.append(nn.Dropout(p=dropout)) + layers.append(nn.ReLU(inplace=True)) + input_dim = dim + + layers.append(nn.Linear(input_dim, output_dim)) + self.mlp = nn.Sequential(*layers) + + @classmethod + def from_config(cls, config: Dict[str, Any]) -> "MLP": + """Instantiates a MLP from a configuration. + + Args: + config: A configuration for a MLP. + See :func:`__init__` for parameters expected in the config. + + Returns: + A MLP instance. + """ + assert (key in config for key in ["input_dim", "output_dim", "hidden_dims"]) + + output_dim = config["output_dim"] + return cls( + input_dim=config["input_dim"], + output_dim=output_dim, + hidden_dims=config["hidden_dims"], + dropout=config.get("dropout", 0), + first_dropout=config.get("first_dropout", False), + use_batchnorm=config.get("use_batchnorm", False), + first_batchnorm=config.get("first_batchnorm", False), + ) + + def forward(self, x): + batchsize_per_replica = x.shape[0] + out = x.view(batchsize_per_replica, -1) + out = self.mlp(out) + return out + + @property + def input_shape(self): + return (self._num_inputs,) + + @property + def output_shape(self): + return (1, self._num_classes) + + @property + def model_depth(self): + return self._model_depth diff --git a/classy_vision/models/resnet.py b/classy_vision/models/resnet.py new file mode 100644 index 0000000000..4c08b9ebf9 --- /dev/null +++ b/classy_vision/models/resnet.py @@ -0,0 +1,33 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +""" +Implementation of ResNet (https://arxiv.org/pdf/1512.03385.pdf) as a special +case of ResNeXt (https://arxiv.org/pdf/1611.05431.pdf) +""" + +from . import register_model +from .resnext import ResNeXt + + +# global setting for in-place ReLU: +INPLACE = True + + +@register_model("resnet") +class ResNet(ResNeXt): + """ + ResNet is a special case of :class:`ResNeXt`. + """ + + def __init__(self, **kwargs): + """ + See :func:`ResNeXt.__init__` + """ + assert ( + kwargs["base_width_and_cardinality"] is None + ), "base_width_and_cardinality should be None for ResNet" + super().__init__(**kwargs) diff --git a/classy_vision/models/resnext.py b/classy_vision/models/resnext.py new file mode 100644 index 0000000000..192be4c253 --- /dev/null +++ b/classy_vision/models/resnext.py @@ -0,0 +1,409 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +""" +Implementation of ResNeXt (https://arxiv.org/pdf/1611.05431.pdf) +""" + +import math +from typing import Any, Dict + +import torch.nn as nn +from classy_vision.generic.util import is_pos_int + +from . import register_model +from .classy_model import ClassyModel + + +# global setting for in-place ReLU: +INPLACE = True + + +def conv3x3(in_planes, out_planes, stride=1, groups=1): + """helper function for constructing 3x3 grouped convolution""" + return nn.Conv2d( + in_planes, + out_planes, + kernel_size=3, + stride=stride, + padding=1, + groups=groups, + bias=False, + ) + + +def conv1x1(in_planes, out_planes, stride=1): + """helper function for constructing 1x1 convolution""" + return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False) + + +class GenericLayer(nn.Module): + """ + Parent class for 2-layer (BasicLayer) and 3-layer (BottleneckLayer) + bottleneck layer class + """ + + def __init__( + self, + convolutional_block, + in_planes, + out_planes, + stride=1, + mid_planes_and_cardinality=None, + reduction=4, + final_bn_relu=True, + ): + + # assertions on inputs: + assert is_pos_int(in_planes) and is_pos_int(out_planes) + assert is_pos_int(stride) and is_pos_int(reduction) + + # set object fields: + super(GenericLayer, self).__init__() + self.convolutional_block = convolutional_block + self.final_bn_relu = final_bn_relu + + # final batchnorm and relu layer: + if final_bn_relu: + self.bn = nn.BatchNorm2d(out_planes) + self.relu = nn.ReLU(inplace=INPLACE) + + # define down-sampling layer (if direct residual impossible): + self.downsample = None + if stride != 1 or in_planes != out_planes: + self.downsample = nn.Sequential( + conv1x1(in_planes, out_planes, stride=stride), + nn.BatchNorm2d(out_planes), + ) + + def forward(self, x): + + # if required, perform downsampling along shortcut connection: + if self.downsample is None: + residual = x + else: + residual = self.downsample(x) + + # forward pass through convolutional block: + out = self.convolutional_block(x) + + if self.final_bn_relu: + out = self.bn(out) + # add residual connection, perform rely + batchnorm, and return result: + out += residual + if self.final_bn_relu: + out = self.relu(out) + return out + + +class BasicLayer(GenericLayer): + """ + ResNeXt bottleneck layer with `in_planes` input planes and `out_planes` + output planes. + """ + + def __init__( + self, + in_planes, + out_planes, + stride=1, + mid_planes_and_cardinality=None, + reduction=4, + final_bn_relu=True, + ): + + # assertions on inputs: + assert is_pos_int(in_planes) and is_pos_int(out_planes) + assert is_pos_int(stride) and is_pos_int(reduction) + + # define convolutional block: + convolutional_block = nn.Sequential( + conv3x3(in_planes, out_planes, stride=stride), + nn.BatchNorm2d(out_planes), + nn.ReLU(inplace=INPLACE), + conv3x3(out_planes, out_planes), + ) + + # call constructor of generic layer: + super(BasicLayer, self).__init__( + convolutional_block, + in_planes, + out_planes, + stride=stride, + reduction=reduction, + final_bn_relu=final_bn_relu, + ) + + +class BottleneckLayer(GenericLayer): + """ + ResNeXt bottleneck layer with `in_planes` input planes, `out_planes` + output planes, and a bottleneck `reduction`. + """ + + def __init__( + self, + in_planes, + out_planes, + stride=1, + mid_planes_and_cardinality=None, + reduction=4, + final_bn_relu=True, + ): + + # assertions on inputs: + assert is_pos_int(in_planes) and is_pos_int(out_planes) + assert is_pos_int(stride) and is_pos_int(reduction) + + # define convolutional layers: + bottleneck_planes = int(math.ceil(out_planes / reduction)) + cardinality = 1 + if mid_planes_and_cardinality is not None: + mid_planes, cardinality = mid_planes_and_cardinality + bottleneck_planes = mid_planes * cardinality + + convolutional_block = nn.Sequential( + conv1x1(in_planes, bottleneck_planes), + nn.BatchNorm2d(bottleneck_planes), + nn.ReLU(inplace=INPLACE), + conv3x3( + bottleneck_planes, bottleneck_planes, stride=stride, groups=cardinality + ), + nn.BatchNorm2d(bottleneck_planes), + nn.ReLU(inplace=INPLACE), + conv1x1(bottleneck_planes, out_planes), + ) + + # call constructor of generic layer: + super(BottleneckLayer, self).__init__( + convolutional_block, + in_planes, + out_planes, + stride=stride, + reduction=reduction, + final_bn_relu=final_bn_relu, + ) + + +class SmallInputInitialBlock(nn.Module): + """ + ResNeXt initial block for small input with `in_planes` input planes + """ + + def __init__(self, init_planes): + super().__init__() + self._module = nn.Sequential( + conv3x3(3, init_planes, stride=1), + nn.BatchNorm2d(init_planes), + nn.ReLU(inplace=INPLACE), + ) + + def forward(self, x): + return self._module(x) + + +class InitialBlock(nn.Module): + """ + ResNeXt initial block with `in_planes` input planes + """ + + def __init__(self, init_planes): + super().__init__() + self._module = nn.Sequential( + nn.Conv2d(3, init_planes, kernel_size=7, stride=2, padding=3, bias=False), + nn.BatchNorm2d(init_planes), + nn.ReLU(inplace=INPLACE), + nn.MaxPool2d(kernel_size=3, stride=2, padding=1), + ) + + def forward(self, x): + return self._module(x) + + +@register_model("resnext") +class ResNeXt(ClassyModel): + def __init__( + self, + num_blocks, + init_planes, + reduction, + small_input, + zero_init_bn_residuals, + base_width_and_cardinality, + basic_layer, + final_bn_relu, + ): + """ + Implementation of `ResNeXt `_. + + Set ``small_input`` to `True` for 32x32 sized image inputs. + + Set ``final_bn_relu`` to `False` to exclude the final batchnorm and + ReLU layers. These settings are useful when training Siamese networks. + """ + super().__init__() + + # assertions on inputs: + assert type(num_blocks) == list + assert all(is_pos_int(n) for n in num_blocks) + assert is_pos_int(init_planes) and is_pos_int(reduction) + assert type(small_input) == bool + assert ( + type(zero_init_bn_residuals) == bool + ), "zero_init_bn_residuals must be a boolean, set to true if gamma of last\ + BN of residual block should be initialized to 0.0, false for 1.0" + assert base_width_and_cardinality is None or ( + isinstance(base_width_and_cardinality, (tuple, list)) + and len(base_width_and_cardinality) == 2 + and is_pos_int(base_width_and_cardinality[0]) + and is_pos_int(base_width_and_cardinality[1]) + ) + + # initial convolutional block: + self.num_blocks = num_blocks + self.small_input = small_input + self._make_initial_block(small_input, init_planes, basic_layer) + + # compute number of planes at each spatial resolution: + out_planes = [init_planes * 2 ** i * reduction for i in range(len(num_blocks))] + in_planes = [init_planes] + out_planes[:-1] + + # create subnetworks for each spatial resolution: + blocks = [] + for idx in range(len(out_planes)): + mid_planes_and_cardinality = None + if base_width_and_cardinality is not None: + w, c = base_width_and_cardinality + mid_planes_and_cardinality = (w * 2 ** idx, c) + new_block = self._make_resolution_block( + in_planes[idx], + out_planes[idx], + idx, + num_blocks[idx], # num layers + stride=1 if idx == 0 else 2, + mid_planes_and_cardinality=mid_planes_and_cardinality, + reduction=reduction, + final_bn_relu=final_bn_relu or (idx != (len(out_planes) - 1)), + ) + blocks.append(nn.Sequential(*new_block)) + self.blocks = nn.Sequential(*blocks) + + self.out_planes = out_planes[-1] + self._num_classes = out_planes + + # initialize weights: + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_normal_(m.weight, mode="fan_out", nonlinearity="relu") + elif isinstance(m, nn.BatchNorm2d): + nn.init.constant_(m.weight, 1) + nn.init.constant_(m.bias, 0) + + # Init BatchNorm gamma to 0.0 for last BN layer, it gets 0.2-0.3% higher + # final val top1 for larger batch sizes. + if zero_init_bn_residuals: + for m in self.modules(): + if isinstance(m, GenericLayer): + if hasattr(m, "bn"): + nn.init.constant_(m.bn.weight, 0) + + def _make_initial_block(self, small_input, init_planes, basic_layer): + if small_input: + self.initial_block = SmallInputInitialBlock(init_planes) + self.layer_type = BasicLayer + else: + self.initial_block = InitialBlock(init_planes) + self.layer_type = BasicLayer if basic_layer else BottleneckLayer + + # helper function that creates ResNet blocks at single spatial resolution: + def _make_resolution_block( + self, + in_planes, + out_planes, + resolution_idx, + num_blocks, + stride=1, + mid_planes_and_cardinality=None, + reduction=4, + final_bn_relu=True, + ): + + # add the desired number of residual blocks: + blocks = [] + for idx in range(num_blocks): + blocks.append( + self.build_attachable_block( + "block{}-{}".format(resolution_idx, idx), + self.layer_type( + in_planes if idx == 0 else out_planes, + out_planes, + stride=stride if idx == 0 else 1, # only first block has stride + mid_planes_and_cardinality=mid_planes_and_cardinality, + reduction=reduction, + final_bn_relu=final_bn_relu or (idx != (num_blocks - 1)), + ), + ) + ) + return blocks + + @classmethod + def from_config(cls, config: Dict[str, Any]) -> "ResNeXt": + """Instantiates a ResNeXt from a configuration. + + Args: + config: A configuration for a ResNeXt. + See :func:`__init__` for parameters expected in the config. + + Returns: + A ResNeXt instance. + """ + assert "num_blocks" in config + config = { + "num_blocks": config["num_blocks"], + "init_planes": config.get("init_planes", 64), + "reduction": config.get("reduction", 4), + "base_width_and_cardinality": config.get("base_width_and_cardinality"), + "small_input": config.get("small_input", False), + "basic_layer": config.get("basic_layer", False), + "final_bn_relu": config.get("final_bn_relu", True), + "zero_init_bn_residuals": config.get("zero_init_bn_residuals", False), + } + return cls(**config) + + # forward pass in residual network: + def forward(self, x): + # initial convolutional block: + out = self.initial_block(x) + + # evaluate all residual blocks: + # TODO: (kaizh) T43794289 exit early if there is no block that has heads + self.blocks(out) + + # By default the classification layer is implemented as one head on top + # of the last block. The head is automatically computed right after the + # last block. + head_outputs = self.execute_heads() + if len(head_outputs) == 0: + raise Exception("Expecting at least one head that generates output") + elif len(head_outputs) == 1: + return list(head_outputs.values())[0] + else: + return head_outputs + + @property + def input_shape(self): + if self.small_input: + return (3, 32, 32) + else: + return (3, 224, 224) + + @property + def output_shape(self): + return (1, self._num_classes) + + @property + def model_depth(self): + return sum(self.num_blocks) diff --git a/classy_vision/models/resnext3d.py b/classy_vision/models/resnext3d.py new file mode 100644 index 0000000000..f1edfdf8d2 --- /dev/null +++ b/classy_vision/models/resnext3d.py @@ -0,0 +1,421 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from typing import Any, Dict + +import torch +import torch.nn as nn +from classy_vision.generic.util import is_pos_int, is_pos_int_list + +from . import register_model +from .classy_model import ClassyModel, ClassyModelEvaluationMode +from .resnext3d_stage import ResStage +from .resnext3d_stem import ResNeXt3DStem + + +model_stems = { + "resnext3d_stem": ResNeXt3DStem, + # For more types of model stem, add them below +} + + +class ResNeXt3DBase(ClassyModel): + def __init__( + self, + input_key, + input_planes, + clip_crop_size, + frames_per_clip, + num_blocks, + stem_name, + stem_planes, + stem_temporal_kernel, + stem_spatial_kernel, + stem_maxpool, + ): + """ + ResNeXt3DBase implements everything in ResNeXt3D model except the + construction of 4 stages. See more details in ResNeXt3D. + """ + super(ResNeXt3DBase, self).__init__() + + self._input_key = input_key + self.input_planes = input_planes + self.clip_crop_size = clip_crop_size + self.frames_per_clip = frames_per_clip + self.num_blocks = num_blocks + + assert stem_name in model_stems, "unknown stem: %s" % stem_name + self.stem = model_stems[stem_name]( + stem_temporal_kernel, + stem_spatial_kernel, + input_planes, + stem_planes, + stem_maxpool, + ) + + @classmethod + def _parse_config(cls, config): + ret_config = {} + required_args = [ + "input_planes", + "clip_crop_size", + "skip_transformation_type", + "residual_transformation_type", + "frames_per_clip", + "num_blocks", + ] + for arg in required_args: + assert arg in config, "resnext3d model requires argument %s" % arg + ret_config[arg] = config[arg] + + # Default setting for model stem + # stem_planes: No. of output channles of conv op in stem + # stem_temporal_kernel: temporal size of conv op in stem + # stem_spatial_kernel: spatial size of conv op in stem + # stem_maxpool: by default, spatial maxpool op is disabled in stem + ret_config.update( + { + "input_key": config.get("input_key", None), + "stem_name": config.get("stem_name", "resnext3d_stem"), + "stem_planes": config.get("stem_planes", 64), + "stem_temporal_kernel": config.get("stem_temporal_kernel", 3), + "stem_spatial_kernel": config.get("stem_spatial_kernel", 7), + "stem_maxpool": config.get("stem_maxpool", False), + } + ) + # Default setting for model stages 2, 3, 4 and 5 + # stage_planes: No. of output channel of 1st conv op in stage 2 + # stage_temporal_kernel_basis: Basis of temporal kernel sizes for each of + # the stage. + # temporal_conv_1x1: if True, do temporal convolution in the fist + # 1x1 Conv3d. Otherwise, do it in the second 3x3 Conv3d (default settting) + # stage_temporal_stride: temporal stride for each stage + # stage_spatial_stride: spatial stride for each stage + # num_groups: No. of groups in 2nd (group) conv in the residual transformation + # width_per_group: No. of channels per group in 2nd (group) conv in the + # residual transformation + ret_config.update( + { + "stage_planes": config.get("stage_planes", 256), + "stage_temporal_kernel_basis": config.get( + "stage_temporal_kernel_basis", [[3], [3], [3], [3]] + ), + "temporal_conv_1x1": config.get( + "temporal_conv_1x1", [False, False, False, False] + ), + "stage_temporal_stride": config.get( + "stage_temporal_stride", [1, 2, 2, 2] + ), + "stage_spatial_stride": config.get( + "stage_spatial_stride", [1, 2, 2, 2] + ), + "num_groups": config.get("num_groups", 1), + "width_per_group": config.get("width_per_group", 64), + } + ) + # Default setting for model parameter initialization + ret_config.update( + { + "zero_init_residual_transform": config.get( + "zero_init_residual_transform", False + ) + } + ) + assert is_pos_int_list(ret_config["num_blocks"]) + assert is_pos_int(ret_config["stem_planes"]) + assert is_pos_int(ret_config["stem_temporal_kernel"]) + assert is_pos_int(ret_config["stem_spatial_kernel"]) + assert type(ret_config["stem_maxpool"]) == bool + assert is_pos_int(ret_config["stage_planes"]) + assert type(ret_config["stage_temporal_kernel_basis"]) == list + assert all( + is_pos_int_list(l) for l in ret_config["stage_temporal_kernel_basis"] + ) + assert type(ret_config["temporal_conv_1x1"]) == list + assert is_pos_int_list(ret_config["stage_temporal_stride"]) + assert is_pos_int_list(ret_config["stage_spatial_stride"]) + assert is_pos_int(ret_config["num_groups"]) + assert is_pos_int(ret_config["width_per_group"]) + return ret_config + + def _init_parameter(self, zero_init_residual_transform): + for m in self.modules(): + if isinstance(m, nn.Conv3d): + if ( + hasattr(m, "final_transform_op") + and m.final_transform_op + and zero_init_residual_transform + ): + nn.init.constant_(m.weight, 0) + else: + nn.init.kaiming_normal_( + m.weight, mode="fan_out", nonlinearity="relu" + ) + if m.bias is not None: + nn.init.constant_(m.bias, 0) + elif isinstance(m, nn.BatchNorm3d) and m.affine: + if ( + hasattr(m, "final_transform_op") + and m.final_transform_op + and zero_init_residual_transform + ): + batchnorm_weight = 0.0 + else: + batchnorm_weight = 1.0 + nn.init.constant_(m.weight, batchnorm_weight) + nn.init.constant_(m.bias, 0) + elif isinstance(m, nn.Linear): + nn.init.normal_(m.weight, mean=0.0, std=0.01) + nn.init.constant_(m.bias, 0) + + def set_classy_state(self, state): + # We need to support both regular checkpoint loading and 2D conv weight + # inflation into 3D conv weight in this function. + self.load_head_states(state) + current_state = self.state_dict() + for name, weight_src in state["model"]["trunk"].items(): + assert name in current_state, ( + "weight %s is not found in ResNeXt3D model" % name + ) + weight_tgt = current_state[name] + assert ( + weight_src.dim() == weight_tgt.dim() + ), "weight of source- and target 3D convolution should have same dimension" + if ( + weight_src.dim() == 5 + and weight_src.shape[2] == 1 + and weight_tgt.shape[2] > 1 + ): + # Find a source weight tensor where temporal dimension is 1. If the + # temporal dimension of the current weight tensor with the same name + # is larger than 1, we inflate the source weight tensor before + # loading it. Such parameter inflation was first introduced in + # the paper (https://arxiv.org/abs/1705.07750). It can achieve a + # better initialization compared to random initialization. + assert ( + weight_src.shape[-2:] == weight_tgt.shape[-2:] + and weight_src.shape[:2] == weight_tgt.shape[:2] + ), "weight shapes of source- and target 3D convolution mismatch" + weight_src_inflated = ( + weight_src.repeat(1, 1, weight_tgt.shape[2], 1, 1) + / weight_tgt.shape[2] + ) + weight_src = weight_src_inflated + else: + assert all( + weight_src.size(d) == weight_tgt.size(d) + for d in range(weight_src.dim()) + ), ( + "the shapes of source and target weight mismatch: %s Vs %s" + % (str(weight_src.size()), str(weight_tgt.size())) + ) + + current_state[name] = weight_src.clone() + super().load_state_dict(current_state) + + def forward(self, x): + """ + Args: + x (dict or torch.Tensor): video input. + When its type is dict, the dataset is a video dataset, and its + content is like {"video": torch.tensor, "audio": torch.tensor}. + When its type is torch.Tensor, the dataset is an image dataset. + """ + assert isinstance(x, dict) or isinstance( + x, torch.Tensor + ), "x must be either a dictionary or a torch.Tensor" + if isinstance(x, dict): + assert self._input_key is not None and self._input_key in x, ( + "input key (%s) not in the input" % self._input_key + ) + x = x[self._input_key] + else: + assert ( + self._input_key is None + ), "when input of forward pass is a tensor, input key should not be set" + assert x.dim() == 4 or x.dim() == 5, "tensor x must be 4D/5D tensor" + if x.dim() == 4: + # x is a 4D tensor of size N x C x H x W and is prepared from an + # image dataset. We insert a temporal axis make it 5D of size + # N x C x T x H x W + x = torch.unsqueeze(x, 2) + + out = self.stem([x]) + out = self.stages(out) + + head_outputs = self.execute_heads() + if len(head_outputs) == 0: + raise Exception("Expecting at least one head that generates output") + elif len(head_outputs) == 1: + return list(head_outputs.values())[0] + else: + return head_outputs + + @property + def input_shape(self): + """ + Shape of video model input can vary in the following cases + - At training stage, input are video frame croppings of fixed size. + - At test stage, input are original video frames to support Fully Convolutional + evaluation and its size can vary video by video + """ + # Input shape is used by tensorboard hook. We put the input shape at + # training stage for profiling and visualization purpose. + return ( + self.input_planes, + self.frames_per_clip, + self.clip_crop_size, + self.clip_crop_size, + ) + + @property + def output_shape(self): + return (1, None) + + @property + def model_depth(self): + return sum(self.num_blocks) + + @property + def evaluation_mode(self): + return ClassyModelEvaluationMode.VIDEO_CLIP_AVERAGING + + @property + def input_key(self): + return self._input_key + + +@register_model("resnext3d") +class ResNeXt3D(ResNeXt3DBase): + """ + Implementation of: + 1. Conventional `post-activated 3D ResNe(X)t `_. + + 2. `Pre-activated 3D ResNe(X)t `_. + The model consists of one stem, a number of stages, and one or multiple + heads that are attached to different blocks in the stage. + """ + def __init__( + self, + input_key, + input_planes, + clip_crop_size, + skip_transformation_type, + residual_transformation_type, + frames_per_clip, + num_blocks, + stem_name, + stem_planes, + stem_temporal_kernel, + stem_spatial_kernel, + stem_maxpool, + stage_planes, + stage_temporal_kernel_basis, + temporal_conv_1x1, + stage_temporal_stride, + stage_spatial_stride, + num_groups, + width_per_group, + zero_init_residual_transform, + ): + """ + Args: + input_key (str): a key that can index into model input that is + of dict type. + input_planes (int): the channel dimension of the input. Normally 3 is used + for rgb input. + clip_crop_size (int): spatial cropping size of video clip at train time. + skip_transformation_type (str): the type of skip transformation. + residual_transformation_type (str): the type of residual transformation. + frames_per_clip (int): Number of frames in a video clip. + num_blocks (list): list of the number of blocks in stages. + stem_name (str): name of model stem. + stem_planes (int): the output dimension of the convolution in the model + stem. + stem_temporal_kernel (int): the temporal kernel size of the convolution + in the model stem. + stem_spatial_kernel (int): the spatial kernel size of the convolution + in the model stem. + stem_maxpool (bool): If true, perform max pooling. + stage_planes (int): the output channel dimension of the 1st residual stage + stage_temporal_kernel_basis (list): Basis of temporal kernel sizes for + each of the stage. + temporal_conv_1x1 (bool): Only useful for BottleneckTransformation. + In a pathaway, if True, do temporal convolution in the first 1x1 + Conv3d. Otherwise, do it in the second 3x3 Conv3d. + stage_temporal_stride (int): the temporal stride of the residual + transformation. + stage_spatial_stride (int): the spatial stride of the the residual + transformation. + num_groups (int): number of groups for the convolution. + num_groups = 1 is for standard ResNet like networks, and + num_groups > 1 is for ResNeXt like networks. + width_per_group (int): Number of channels per group in 2nd (group) + conv in the residual transformation in the first stage + zero_init_residual_transform (bool): if true, the weight of last + operation, which could be either BatchNorm3D in post-activated + transformation or Conv3D in pre-activated transformation, in the + residual transformation is initialized to zero + """ + super(ResNeXt3D, self).__init__( + input_key, + input_planes, + clip_crop_size, + frames_per_clip, + num_blocks, + stem_name, + stem_planes, + stem_temporal_kernel, + stem_spatial_kernel, + stem_maxpool, + ) + + num_stages = len(num_blocks) + out_planes = [stage_planes * 2 ** i for i in range(num_stages)] + in_planes = [stem_planes] + out_planes[:-1] + inner_planes = [ + num_groups * width_per_group * 2 ** i for i in range(num_stages) + ] + + stages = [] + for s in range(num_stages): + stage = ResStage( + s + 1, # stem is viewed as stage 0, and following stages start from 1 + [in_planes[s]], + [out_planes[s]], + [inner_planes[s]], + [stage_temporal_kernel_basis[s]], + [temporal_conv_1x1[s]], + [stage_temporal_stride[s]], + [stage_spatial_stride[s]], + [num_blocks[s]], + [num_groups], + skip_transformation_type, + residual_transformation_type, + block_callback=self.build_attachable_block, + disable_pre_activation=(s == 0), + final_stage=(s == (num_stages - 1)), + ) + stages.append(stage) + + self.stages = nn.Sequential(*stages) + self._init_parameter(zero_init_residual_transform) + + @classmethod + def from_config(cls, config: Dict[str, Any]) -> "ResNeXt3D": + """Instantiates a ResNeXt3D from a configuration. + + Args: + config: A configuration for a ResNeXt3D. + See :func:`__init__` for parameters expected in the config. + + Returns: + A ResNeXt3D instance. + """ + ret_config = ResNeXt3D._parse_config(config) + return cls(**ret_config) diff --git a/classy_vision/models/resnext3d_block.py b/classy_vision/models/resnext3d_block.py new file mode 100644 index 0000000000..2147507e76 --- /dev/null +++ b/classy_vision/models/resnext3d_block.py @@ -0,0 +1,465 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import torch.nn as nn + + +class BasicTransformation(nn.Module): + """ + Basic transformation: 3x3x3 group conv, 3x3x3 group conv + """ + + def __init__( + self, + dim_in, + dim_out, + temporal_stride, + spatial_stride, + groups, + inplace_relu=True, + bn_eps=1e-5, + bn_mmt=0.1, + **kwargs + ): + """ + Args: + dim_in (int): the channel dimensions of the input. + dim_out (int): the channel dimension of the output. + temporal_stride (int): the temporal stride of the bottleneck. + spatial_stride (int): the spatial_stride of the bottleneck. + groups (int): number of groups for the convolution. + inplace_relu (bool): calculate the relu on the original input + without allocating new memory. + bn_eps (float): epsilon for batch norm. + bn_mmt (float): momentum for batch norm. Noted that BN momentum in + PyTorch = 1 - BN momentum in Caffe2. + """ + super(BasicTransformation, self).__init__() + + # 3x3x3 group conv, BN, ReLU. + branch2a = nn.Conv3d( + dim_in, + dim_out, + [3, 3, 3], # kernel + stride=[temporal_stride, spatial_stride, spatial_stride], + padding=[1, 1, 1], + groups=groups, + bias=False, + ) + branch2a_bn = nn.BatchNorm3d(dim_out, eps=bn_eps, momentum=bn_mmt) + branch2a_relu = nn.ReLU(inplace=inplace_relu) + # 3x3x3 group conv, BN, ReLU. + branch2b = nn.Conv3d( + dim_out, + dim_out, + [3, 3, 3], # kernel + stride=[1, 1, 1], + padding=[1, 1, 1], + groups=groups, + bias=False, + ) + branch2b_bn = nn.BatchNorm3d(dim_out, eps=bn_eps, momentum=bn_mmt) + branch2b_bn.final_transform_op = True + + self.basic_transform = nn.Sequential( + branch2a, branch2a_bn, branch2a_relu, branch2b, branch2b_bn + ) + + def forward(self, x): + return self.basic_transform(x) + + +class PostactivatedBottleneckTransformation(nn.Module): + """ + Bottleneck transformation: Tx1x1, 1x3x3, 1x1x1, where T is the size of + temporal kernel. + """ + + def __init__( + self, + dim_in, + dim_out, + temporal_stride, + spatial_stride, + num_groups, + dim_inner, + temporal_kernel_size=3, + temporal_conv_1x1=True, + spatial_stride_1x1=False, + inplace_relu=True, + bn_eps=1e-5, + bn_mmt=0.1, + **kwargs + ): + """ + Args: + dim_in (int): the channel dimensions of the input. + dim_out (int): the channel dimension of the output. + temporal_kernel_size (int): the temporal kernel sizes of the middle + convolution in the bottleneck. + temporal_conv_1x1 (bool): if True, do temporal convolution in the fist + 1x1 Conv3d. Otherwise, do it in the second 3x3 Conv3d + temporal_stride (int): the temporal stride of the bottleneck. + spatial_stride (int): the spatial_stride of the bottleneck. + num_groups (int): number of groups for the convolution. + dim_inner (int): the inner dimension of the block. + is for standard ResNet like networks, and num_groups>1 is for + ResNeXt like networks. + spatial_stride_1x1 (bool): if True, apply spatial_stride to 1x1 conv. + inplace_relu (bool): calculate the relu on the original input + without allocating new memory. + bn_eps (float): epsilon for batch norm. + bn_mmt (float): momentum for batch norm. Noted that BN momentum in + PyTorch = 1 - BN momentum in Caffe2. + """ + super(PostactivatedBottleneckTransformation, self).__init__() + (temporal_kernel_size_1x1, temporal_kernel_size_3x3) = ( + (temporal_kernel_size, 1) + if temporal_conv_1x1 + else (1, temporal_kernel_size) + ) + # MSRA -> stride=2 is on 1x1; TH/C2 -> stride=2 is on 3x3. + (str1x1, str3x3) = ( + (spatial_stride, 1) if spatial_stride_1x1 else (1, spatial_stride) + ) + # Tx1x1 conv, BN, ReLU. + self.branch2a = nn.Conv3d( + dim_in, + dim_inner, + kernel_size=[temporal_kernel_size_1x1, 1, 1], + stride=[1, str1x1, str1x1], + padding=[temporal_kernel_size_1x1 // 2, 0, 0], + bias=False, + ) + self.branch2a_bn = nn.BatchNorm3d(dim_inner, eps=bn_eps, momentum=bn_mmt) + self.branch2a_relu = nn.ReLU(inplace=inplace_relu) + # Tx3x3 group conv, BN, ReLU. + self.branch2b = nn.Conv3d( + dim_inner, + dim_inner, + [temporal_kernel_size_3x3, 3, 3], + stride=[temporal_stride, str3x3, str3x3], + padding=[temporal_kernel_size_3x3 // 2, 1, 1], + groups=num_groups, + bias=False, + ) + self.branch2b_bn = nn.BatchNorm3d(dim_inner, eps=bn_eps, momentum=bn_mmt) + self.branch2b_relu = nn.ReLU(inplace=inplace_relu) + # 1x1x1 conv, BN. + self.branch2c = nn.Conv3d( + dim_inner, + dim_out, + kernel_size=[1, 1, 1], + stride=[1, 1, 1], + padding=[0, 0, 0], + bias=False, + ) + self.branch2c_bn = nn.BatchNorm3d(dim_out, eps=bn_eps, momentum=bn_mmt) + self.branch2c_bn.final_transform_op = True + + def forward(self, x): + # Explicitly forward every layer. + # Branch2a. + x = self.branch2a(x) + x = self.branch2a_bn(x) + x = self.branch2a_relu(x) + + # Branch2b. + x = self.branch2b(x) + x = self.branch2b_bn(x) + x = self.branch2b_relu(x) + + # Branch2c + x = self.branch2c(x) + x = self.branch2c_bn(x) + return x + + +class PreactivatedBottleneckTransformation(nn.Module): + """ + Bottleneck transformation with pre-activation, which includes BatchNorm3D + and ReLu. Conv3D kernsl are Tx1x1, 1x3x3, 1x1x1, where T is the size of + temporal kernel (https://arxiv.org/abs/1603.05027). + """ + + def __init__( + self, + dim_in, + dim_out, + temporal_stride, + spatial_stride, + num_groups, + dim_inner, + temporal_kernel_size=3, + temporal_conv_1x1=True, + spatial_stride_1x1=False, + inplace_relu=True, + bn_eps=1e-5, + bn_mmt=0.1, + disable_pre_activation=False, + **kwargs + ): + """ + Args: + dim_in (int): the channel dimensions of the input. + dim_out (int): the channel dimension of the output. + temporal_kernel_size (int): the temporal kernel sizes of the middle + convolution in the bottleneck. + temporal_conv_1x1 (bool): if True, do temporal convolution in the fist + 1x1 Conv3d. Otherwise, do it in the second 3x3 Conv3d + temporal_stride (int): the temporal stride of the bottleneck. + spatial_stride (int): the spatial_stride of the bottleneck. + num_groups (int): number of groups for the convolution. + dim_inner (int): the inner dimension of the block. + is for standard ResNet like networks, and num_groups>1 is for + ResNeXt like networks. + spatial_stride_1x1 (bool): if True, apply spatial_stride to 1x1 conv. + inplace_relu (bool): calculate the relu on the original input + without allocating new memory. + bn_eps (float): epsilon for batch norm. + bn_mmt (float): momentum for batch norm. Noted that BN momentum in + PyTorch = 1 - BN momentum in Caffe2. + disable_pre_activation (bool): If true, disable pre activation, + including BatchNorm3D and ReLU. + """ + super(PreactivatedBottleneckTransformation, self).__init__() + (temporal_kernel_size_1x1, temporal_kernel_size_3x3) = ( + (temporal_kernel_size, 1) + if temporal_conv_1x1 + else (1, temporal_kernel_size) + ) + (str1x1, str3x3) = ( + (spatial_stride, 1) if spatial_stride_1x1 else (1, spatial_stride) + ) + + self.disable_pre_activation = disable_pre_activation + if not disable_pre_activation: + self.branch2a_bn = nn.BatchNorm3d(dim_in, eps=bn_eps, momentum=bn_mmt) + self.branch2a_relu = nn.ReLU(inplace=inplace_relu) + + self.branch2a = nn.Conv3d( + dim_in, + dim_inner, + kernel_size=[temporal_kernel_size_1x1, 1, 1], + stride=[1, str1x1, str1x1], + padding=[temporal_kernel_size_1x1 // 2, 0, 0], + bias=False, + ) + # Tx3x3 group conv, BN, ReLU. + self.branch2b_bn = nn.BatchNorm3d(dim_inner, eps=bn_eps, momentum=bn_mmt) + self.branch2b_relu = nn.ReLU(inplace=inplace_relu) + self.branch2b = nn.Conv3d( + dim_inner, + dim_inner, + [temporal_kernel_size_3x3, 3, 3], + stride=[temporal_stride, str3x3, str3x3], + padding=[temporal_kernel_size_3x3 // 2, 1, 1], + groups=num_groups, + bias=False, + ) + # 1x1x1 conv, BN. + self.branch2c_bn = nn.BatchNorm3d(dim_inner, eps=bn_eps, momentum=bn_mmt) + self.branch2c_relu = nn.ReLU(inplace=inplace_relu) + self.branch2c = nn.Conv3d( + dim_inner, + dim_out, + kernel_size=[1, 1, 1], + stride=[1, 1, 1], + padding=[0, 0, 0], + bias=False, + ) + self.branch2c.final_transform_op = True + + def forward(self, x): + # Branch2a + if not self.disable_pre_activation: + x = self.branch2a_bn(x) + x = self.branch2a_relu(x) + x = self.branch2a(x) + # Branch2b + x = self.branch2b_bn(x) + x = self.branch2b_relu(x) + x = self.branch2b(x) + # Branch2c + x = self.branch2c_bn(x) + x = self.branch2c_relu(x) + x = self.branch2c(x) + return x + + +residual_transformations = { + "basic_transformation": BasicTransformation, + "postactivated_bottleneck_transformation": PostactivatedBottleneckTransformation, + "preactivated_bottleneck_transformation": PreactivatedBottleneckTransformation, + # For more types of residual transformations, add them below +} + + +class PostactivatedShortcutTransformation(nn.Module): + """ + Skip connection used in ResNet3D model. + """ + + def __init__( + self, + dim_in, + dim_out, + temporal_stride, + spatial_stride, + bn_eps=1e-5, + bn_mmt=0.1, + **kwargs + ): + super(PostactivatedShortcutTransformation, self).__init__() + # Use skip connection with projection if dim or spatial/temporal res change. + assert (dim_in != dim_out) or (spatial_stride != 1) or (temporal_stride != 1) + self.branch1 = nn.Conv3d( + dim_in, + dim_out, + kernel_size=1, + stride=[temporal_stride, spatial_stride, spatial_stride], + padding=0, + bias=False, + ) + self.branch1_bn = nn.BatchNorm3d(dim_out, eps=bn_eps, momentum=bn_mmt) + + def forward(self, x): + return self.branch1_bn(self.branch1(x)) + + +class PreactivatedShortcutTransformation(nn.Module): + """ + Skip connection with pre-activation, which includes BatchNorm3D and ReLU, + in ResNet3D model (https://arxiv.org/abs/1603.05027). + """ + + def __init__( + self, + dim_in, + dim_out, + temporal_stride, + spatial_stride, + inplace_relu=True, + bn_eps=1e-5, + bn_mmt=0.1, + disable_pre_activation=False, + **kwargs + ): + super(PreactivatedShortcutTransformation, self).__init__() + # Use skip connection with projection if dim or spatial/temporal res change. + assert (dim_in != dim_out) or (spatial_stride != 1) or (temporal_stride != 1) + if not disable_pre_activation: + self.branch1_bn = nn.BatchNorm3d(dim_in, eps=bn_eps, momentum=bn_mmt) + self.branch1_relu = nn.ReLU(inplace=inplace_relu) + self.branch1 = nn.Conv3d( + dim_in, + dim_out, + kernel_size=1, + stride=[temporal_stride, spatial_stride, spatial_stride], + padding=0, + bias=False, + ) + + def forward(self, x): + if hasattr(self, "branch1_bn") and hasattr(self, "branch1_relu"): + x = self.branch1_relu(self.branch1_bn(x)) + x = self.branch1(x) + return x + + +skip_transformations = { + "postactivated_shortcut": PostactivatedShortcutTransformation, + "preactivated_shortcut": PreactivatedShortcutTransformation, + # For more types of skip transformations, add them below +} + + +class ResBlock(nn.Module): + """ + Residual block with skip connection. + """ + + def __init__( + self, + dim_in, + dim_out, + dim_inner, + temporal_kernel_size, + temporal_conv_1x1, + temporal_stride, + spatial_stride, + skip_transformation_type, + residual_transformation_type, + num_groups=1, + inplace_relu=True, + bn_eps=1e-5, + bn_mmt=0.1, + disable_pre_activation=False, + ): + """ + ResBlock class constructs redisual blocks. More details can be found in: + "Deep residual learning for image recognition." + https://arxiv.org/abs/1512.03385 + Args: + dim_in (int): the channel dimensions of the input. + dim_out (int): the channel dimension of the output. + dim_inner (int): the inner dimension of the block. + temporal_kernel_size (int): the temporal kernel sizes of the middle + convolution in the bottleneck. + temporal_conv_1x1 (bool): Only useful for PostactivatedBottleneckTransformation. + if True, do temporal convolution in the fist 1x1 Conv3d. + Otherwise, do it in the second 3x3 Conv3d + temporal_stride (int): the temporal stride of the bottleneck. + spatial_stride (int): the spatial_stride of the bottleneck. + stride (int): the stride of the bottleneck. + skip_transformation_type (str): the type of skip transformation + residual_transformation_type (str): the type of residual transformation + num_groups (int): number of groups for the convolution. num_groups=1 + is for standard ResNet like networks, and num_groups>1 is for + ResNeXt like networks. + disable_pre_activation (bool): If true, disable the preactivation, + which includes BatchNorm3D and ReLU. + """ + super(ResBlock, self).__init__() + + assert skip_transformation_type in skip_transformations, ( + "unknown skip transformation: %s" % skip_transformation_type + ) + + if (dim_in != dim_out) or (spatial_stride != 1) or (temporal_stride != 1): + self.skip = skip_transformations[skip_transformation_type]( + dim_in, + dim_out, + temporal_stride, + spatial_stride, + bn_eps=bn_eps, + bn_mmt=bn_mmt, + disable_pre_activation=disable_pre_activation, + ) + + assert residual_transformation_type in residual_transformations, ( + "unknown residual transformation: %s" % residual_transformation_type + ) + self.residual = residual_transformations[residual_transformation_type]( + dim_in, + dim_out, + temporal_stride, + spatial_stride, + num_groups, + dim_inner, + temporal_kernel_size=temporal_kernel_size, + temporal_conv_1x1=temporal_conv_1x1, + disable_pre_activation=disable_pre_activation, + ) + self.relu = nn.ReLU(inplace_relu) + + def forward(self, x): + if hasattr(self, "skip"): + x = self.skip(x) + self.residual(x) + else: + x = x + self.residual(x) + x = self.relu(x) + return x diff --git a/classy_vision/models/resnext3d_stage.py b/classy_vision/models/resnext3d_stage.py new file mode 100644 index 0000000000..1597c04219 --- /dev/null +++ b/classy_vision/models/resnext3d_stage.py @@ -0,0 +1,193 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from collections import OrderedDict + +import torch.nn as nn + +from .resnext3d_block import ResBlock + + +class ResStageBase(nn.Module): + def __init__( + self, + stage_idx, + dim_in, + dim_out, + dim_inner, + temporal_kernel_basis, + temporal_conv_1x1, + temporal_stride, + spatial_stride, + num_blocks, + num_groups, + ): + super(ResStageBase, self).__init__() + + assert ( + len( + { + len(dim_in), + len(dim_out), + len(temporal_kernel_basis), + len(temporal_conv_1x1), + len(temporal_stride), + len(spatial_stride), + len(num_blocks), + len(dim_inner), + len(num_groups), + } + ) + == 1 + ) + + self.stage_idx = stage_idx + self.num_blocks = num_blocks + self.num_pathways = len(self.num_blocks) + + self.temporal_kernel_sizes = [ + (temporal_kernel_basis[i] * num_blocks[i])[: num_blocks[i]] + for i in range(len(temporal_kernel_basis)) + ] + + def _block_name(self, pathway_idx, stage_idx, block_idx): + return "pathway{}-stage{}-block{}".format(pathway_idx, stage_idx, block_idx) + + def _pathway_name(self, pathway_idx): + return "pathway{}".format(pathway_idx) + + def forward(self, inputs): + output = [] + for p in range(self.num_pathways): + x = inputs[p] + pathway_module = getattr(self, self._pathway_name(p)) + output.append(pathway_module(x)) + return output + + +class ResStage(ResStageBase): + """ + Stage of 3D ResNet. It expects to have one or more tensors as input for + single pathway (C2D, I3D, SlowOnly), and multi-pathway (SlowFast) cases. + More details can be found here: + "Slowfast networks for video recognition." + https://arxiv.org/pdf/1812.03982.pdf + """ + + def __init__( + self, + stage_idx, + dim_in, + dim_out, + dim_inner, + temporal_kernel_basis, + temporal_conv_1x1, + temporal_stride, + spatial_stride, + num_blocks, + num_groups, + skip_transformation_type, + residual_transformation_type, + block_callback=None, + inplace_relu=True, + bn_eps=1e-5, + bn_mmt=0.1, + disable_pre_activation=False, + final_stage=False, + ): + """ + The `__init__` method of any subclass should also contain these arguments. + ResStage builds p streams, where p can be greater or equal to one. + Args: + stage_idx (int): integer index of stage. + dim_in (list): list of p the channel dimensions of the input. + Different channel dimensions control the input dimension of + different pathways. + dim_out (list): list of p the channel dimensions of the output. + Different channel dimensions control the input dimension of + different pathways. + dim_inner (list): list of the p inner channel dimensions of the + input. + Different channel dimensions control the input dimension of + different pathways. + temporal_kernel_basis (list): Basis of temporal kernel sizes for each of + the stage. + temporal_conv_1x1 (list): Only useful for BottleneckBlock. + In a pathaway, if True, do temporal convolution in the fist 1x1 Conv3d. + Otherwise, do it in the second 3x3 Conv3d + temporal_stride (list): the temporal stride of the bottleneck. + spatial_stride (list): the spatial_stride of the bottleneck. + num_blocks (list): list of p numbers of blocks for each of the + pathway. + num_groups (list): list of number of p groups for the convolution. + num_groups=1 is for standard ResNet like networks, and + num_groups>1 is for ResNeXt like networks. + skip_transformation_type (str): the type of skip transformation + residual_transformation_type (str): the type of residual transformation + block_callback (function object): a callback function to be called with + residual block and its name as input arguments + disable_pre_activation (bool): If true, disable the preactivation, + which includes BatchNorm3D and ReLU. + final_stage (bool): If true, this is the last stage in the model. + """ + super(ResStage, self).__init__( + stage_idx, + dim_in, + dim_out, + dim_inner, + temporal_kernel_basis, + temporal_conv_1x1, + temporal_stride, + spatial_stride, + num_blocks, + num_groups, + ) + + for p in range(self.num_pathways): + blocks = [] + for i in range(self.num_blocks[p]): + # Retrieve the transformation function. + # Construct the block. + block_disable_pre_activation = ( + True if disable_pre_activation and i == 0 else False + ) + res_block = ResBlock( + dim_in[p] if i == 0 else dim_out[p], + dim_out[p], + dim_inner[p], + self.temporal_kernel_sizes[p][i], + temporal_conv_1x1[p], + temporal_stride[p] if i == 0 else 1, + spatial_stride[p] if i == 0 else 1, + skip_transformation_type, + residual_transformation_type, + num_groups=num_groups[p], + inplace_relu=inplace_relu, + bn_eps=bn_eps, + bn_mmt=bn_mmt, + disable_pre_activation=block_disable_pre_activation, + ) + block_name = self._block_name(p, stage_idx, i) + if block_callback: + res_block = block_callback(block_name, res_block) + blocks.append((block_name, res_block)) + + if final_stage and ( + residual_transformation_type == "preactivated_bottleneck_transformation" + ): + # For pre-activation residual transformation, we conduct + # activation in the final stage before continuing forward pass + # through the head + activate_bn = nn.BatchNorm3d(dim_out[p]) + activate_relu = nn.ReLU(inplace=True) + activate_bn_name = "-".join([block_name, "bn"]) + activate_relu_name = "-".join([block_name, "relu"]) + if block_callback: + activate_relu = block_callback(activate_relu_name, activate_relu) + blocks.append((activate_bn_name, activate_bn)) + blocks.append((activate_relu_name, activate_relu)) + + self.add_module(self._pathway_name(p), nn.Sequential(OrderedDict(blocks))) diff --git a/classy_vision/models/resnext3d_stem.py b/classy_vision/models/resnext3d_stem.py new file mode 100644 index 0000000000..880fa8bb48 --- /dev/null +++ b/classy_vision/models/resnext3d_stem.py @@ -0,0 +1,206 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import torch.nn as nn + + +class ResNeXt3DStemSinglePathway(nn.Module): + """ + ResNe(X)t 3D basic stem module. Assume a single pathway. + Performs spatiotemporal Convolution, BN, and Relu following by a + spatiotemporal pooling. + """ + + def __init__( + self, + dim_in, + dim_out, + kernel, + stride, + padding, + maxpool=True, + inplace_relu=True, + bn_eps=1e-5, + bn_mmt=0.1, + ): + """ + The `__init__` method of any subclass should also contain these arguments. + + Args: + dim_in (int): the channel dimension of the input. Normally 3 is used + for rgb input + dim_out (int): the output dimension of the convolution in the stem + layer. + kernel (list): the kernel size of the convolution in the stem layer. + temporal kernel size, height kernel size, width kernel size in + order. + stride (list): the stride size of the convolution in the stem layer. + temporal kernel stride, height kernel size, width kernel size in + order. + padding (int): the padding size of the convolution in the stem + layer, temporal padding size, height padding size, width + padding size in order. + maxpool (bool): If true, perform max pooling. + inplace_relu (bool): calculate the relu on the original input + without allocating new memory. + bn_eps (float): epsilon for batch norm. + bn_mmt (float): momentum for batch norm. Noted that BN momentum in + PyTorch = 1 - BN momentum in Caffe2. + """ + super(ResNeXt3DStemSinglePathway, self).__init__() + self.kernel = kernel + self.stride = stride + self.padding = padding + self.inplace_relu = inplace_relu + self.bn_eps = bn_eps + self.bn_mmt = bn_mmt + self.maxpool = maxpool + + # Construct the stem layer. + self._construct_stem(dim_in, dim_out) + + def _construct_stem(self, dim_in, dim_out): + self.conv = nn.Conv3d( + dim_in, + dim_out, + self.kernel, + stride=self.stride, + padding=self.padding, + bias=False, + ) + self.bn = nn.BatchNorm3d(dim_out, eps=self.bn_eps, momentum=self.bn_mmt) + self.relu = nn.ReLU(self.inplace_relu) + if self.maxpool: + self.pool_layer = nn.MaxPool3d( + kernel_size=[1, 3, 3], stride=[1, 2, 2], padding=[0, 1, 1] + ) + + def forward(self, x): + x = self.conv(x) + x = self.bn(x) + x = self.relu(x) + if self.maxpool: + x = self.pool_layer(x) + return x + + +class ResNeXt3DStemMultiPathway(nn.Module): + """ + Video 3D stem module. Provides stem operations of Conv, BN, ReLU, MaxPool + on input data tensor for one or multiple pathways. + """ + + def __init__( + self, + dim_in, + dim_out, + kernel, + stride, + padding, + inplace_relu=True, + bn_eps=1e-5, + bn_mmt=0.1, + maxpool=(True,), + ): + """ + The `__init__` method of any subclass should also contain these + arguments. List size of 1 for single pathway models (C2D, I3D, SlowOnly + and etc), list size of 2 for two pathway models (SlowFast). + + Args: + dim_in (list): the list of channel dimensions of the inputs. + dim_out (list): the output dimension of the convolution in the stem + layer. + kernel (list): the kernels' size of the convolutions in the stem + layers. Temporal kernel size, height kernel size, width kernel + size in order. + stride (list): the stride sizes of the convolutions in the stem + layer. Temporal kernel stride, height kernel size, width kernel + size in order. + padding (list): the paddings' sizes of the convolutions in the stem + layer. Temporal padding size, height padding size, width padding + size in order. + inplace_relu (bool): calculate the relu on the original input + without allocating new memory. + bn_eps (float): epsilon for batch norm. + bn_mmt (float): momentum for batch norm. Noted that BN momentum in + PyTorch = 1 - BN momentum in Caffe2. + maxpool (iterable): At training time, when crop size is 224 x 224, do max + pooling. When crop size is 112 x 112, skip max pooling. + Default value is a (True,) + """ + super(ResNeXt3DStemMultiPathway, self).__init__() + + assert ( + len({len(dim_in), len(dim_out), len(kernel), len(stride), len(padding)}) + == 1 + ), "Input pathway dimensions are not consistent." + self.num_pathways = len(dim_in) + self.kernel = kernel + self.stride = stride + self.padding = padding + self.inplace_relu = inplace_relu + self.bn_eps = bn_eps + self.bn_mmt = bn_mmt + self.maxpool = maxpool + + # Construct the stem layer. + self._construct_stem(dim_in, dim_out) + + def _construct_stem(self, dim_in, dim_out): + assert type(dim_in) == list + assert all(dim > 0 for dim in dim_in) + assert type(dim_out) == list + assert all(dim > 0 for dim in dim_out) + + self.blocks = {} + for p in range(len(dim_in)): + stem = ResNeXt3DStemSinglePathway( + dim_in[p], + dim_out[p], + self.kernel[p], + self.stride[p], + self.padding[p], + inplace_relu=self.inplace_relu, + bn_eps=self.bn_eps, + bn_mmt=self.bn_mmt, + maxpool=self.maxpool[p], + ) + stem_name = self._stem_name(p) + self.add_module(stem_name, stem) + self.blocks[stem_name] = stem + + def _stem_name(self, path_idx): + return "stem-path{}".format(path_idx) + + def forward(self, x): + assert ( + len(x) == self.num_pathways + ), "Input tensor does not contain {} pathway".format(self.num_pathways) + for p in range(len(x)): + stem_name = self._stem_name(p) + x[p] = self.blocks[stem_name](x[p]) + return x + + +class ResNeXt3DStem(nn.Module): + def __init__( + self, temporal_kernel, spatial_kernel, input_planes, stem_planes, maxpool + ): + super(ResNeXt3DStem, self).__init__() + self.stem = ResNeXt3DStemMultiPathway( + [input_planes], + [stem_planes], + [[temporal_kernel, spatial_kernel, spatial_kernel]], + [[1, 2, 2]], # stride + [ + [temporal_kernel // 2, spatial_kernel // 2, spatial_kernel // 2] + ], # padding + maxpool=[maxpool], + ) + + def forward(self, x): + return self.stem(x) diff --git a/classy_vision/optim/__init__.py b/classy_vision/optim/__init__.py new file mode 100644 index 0000000000..d061e30a32 --- /dev/null +++ b/classy_vision/optim/__init__.py @@ -0,0 +1,76 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from pathlib import Path + +from classy_vision.generic.registry_utils import import_all_modules + +from .classy_optimizer import ClassyOptimizer + + +FILE_ROOT = Path(__file__).parent + + +OPTIMIZER_REGISTRY = {} +OPTIMIZER_CLASS_NAMES = set() + + +def build_optimizer(config): + """Builds a ClassyOptimizer from a config. + + This assumes a 'name' key in the config which is used to determine what + optimizer class to instantiate. For instance, a config `{"name": "my_optimizer", + "foo": "bar"}` will find a class that was registered as "my_optimizer" + (see :func:`register_optimizer`) and call .from_config on it.""" + return OPTIMIZER_REGISTRY[config["name"]].from_config(config) + + +def register_optimizer(name): + """Registers a ClassyOptimizer subclass. + + This decorator allows Classy Vision to instantiate a subclass of + ClassyOptimizer from a configuration file, even if the class itself is not + part of the Classy Vision framework. To use it, apply this decorator to a + ClassyOptimizer subclass, like this: + + .. code-block:: python + + @register_optimizer('my_optimizer') + class MyOptimizer(ClassyOptimizer): + ... + + To instantiate an optimizer from a configuration file, see + :func:`build_optimizer`.""" + + def register_optimizer_cls(cls): + if name in OPTIMIZER_REGISTRY: + raise ValueError("Cannot register duplicate optimizer ({})".format(name)) + if not issubclass(cls, ClassyOptimizer): + raise ValueError( + "Optimizer ({}: {}) must extend ClassyVisionOptimizer".format( + name, cls.__name__ + ) + ) + if cls.__name__ in OPTIMIZER_CLASS_NAMES: + raise ValueError( + "Cannot register optimizer with duplicate class name({})".format( + cls.__name__ + ) + ) + OPTIMIZER_REGISTRY[name] = cls + OPTIMIZER_CLASS_NAMES.add(cls.__name__) + return cls + + return register_optimizer_cls + + +# automatically import any Python files in the optim/ directory +import_all_modules(FILE_ROOT, "classy_vision.optim") + +from .rmsprop import RMSProp # isort:skip +from .sgd import SGD # isort:skip + +__all__ = ["ClassyOptimizer", "RMSProp", "SGD"] diff --git a/classy_vision/optim/classy_optimizer.py b/classy_vision/optim/classy_optimizer.py new file mode 100644 index 0000000000..6ef3aca8ac --- /dev/null +++ b/classy_vision/optim/classy_optimizer.py @@ -0,0 +1,257 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from typing import Any, Callable, Dict, Optional + +import torch +from classy_vision.models import ClassyModel + +from .param_scheduler.classy_vision_param_scheduler import ( + ClassyParamScheduler, + UpdateInterval, +) + + +class ClassyOptimizer: + """ + Base class for classy optimizers. + + This wraps a :class:`torch.optim.Optimizer` instance, handles learning + rate scheduling by using a :class:`param_scheduler.ClassyParamScheduler` + and supports specifying regularized and unregularized param groups. + Specifying unregularized params is especially useful to avoid applying + weight decay on batch norm. See + :func:`classy_vision.models.ClassyModel.get_optimizer_params` for more + information. + + Deriving classes can extend functionality be overriding the appropriate functions. + """ + + def __init__(self, lr_scheduler: ClassyParamScheduler): + """ + Constructor for ClassyOptimizer. + + Args: + lr_scheduler: The learning rate scheduler to use. + """ + self.lr_scheduler = lr_scheduler + self.lr = self.lr_scheduler(0) + self.optimizer = None + self.optimizer_params = None + + def _validate_and_get_optimizer_params(self, model: ClassyModel) -> Dict[str, Any]: + """ + Validate and return the optimizer params. + + The optimizer params are fetched from + :fun:`models.ClassyModel.get_optimizer_params`. + + Args: + model: The model to get the params from. + + Returns: + A dict containing "regularized_params" and "unregularized_params". + Weight decay will only be applied to "regularized_params". + """ + if isinstance(model, torch.nn.parallel.DistributedDataParallel): + optimizer_params = model.module.get_optimizer_params() + else: + optimizer_params = model.get_optimizer_params() + + assert isinstance(optimizer_params, dict) and set(optimizer_params.keys()) == { + "regularized_params", + "unregularized_params", + }, "get_optimizer_params() of {0} should return dict with exact two keys\ + 'regularized_params', 'unregularized_params'".format( + type(model).__name__ + ) + + trainable_params = [ + params for params in model.parameters() if params.requires_grad + ] + assert len(trainable_params) == len( + optimizer_params["regularized_params"] + ) + len(optimizer_params["unregularized_params"]), ( + "get_optimizer_params() of {0} should return params that cover all" + "trainable params of model".format(type(model).__name__) + ) + + return optimizer_params + + @classmethod + def from_config(cls, config: Dict[str, Any]) -> "ClassyOptimizer": + """Instantiates a ClassyOptimizer from a configuration. + + Args: + config: A configuration for the ClassyOptimizer. + + Returns: + A ClassyOptimizer instance. + """ + raise NotImplementedError + + @property + def parameters(self) -> Dict[str, Any]: + """ + Get the parameters of the optimizer which need to be overridden. All optimizer + param groups will use these parameters. + + Returns: + A kwarg dictionary that will be used to override optimizer args. + """ + return {"lr": self.lr} + + def init_pytorch_optimizer(self, model: ClassyModel) -> None: + """ + Initialize the underlying :class:`torch.optim.Optimizer` instance. + + Using the provided model, create param groups for the optimizer with a + weight decay override for params which should be left unregularized. + + Note: + Deriving classes should initialize the underlying Pytorch optimizer + in this call. The simplest way to do this after a call to + + ``super().init_pytorch_optimizer()`` + + Warning: + This should called only after the model has been moved to the correct + device. + """ + self.optimizer_params = self._validate_and_get_optimizer_params(model) + + param_groups_override = [] + self.contains_unregularized_params = False + if len(self.optimizer_params["unregularized_params"]) != 0: + param_groups_override.append( + { + "params": self.optimizer_params["unregularized_params"], + "weight_decay": 0.0, + } + ) + self.contains_unregularized_params = True + + if len(self.optimizer_params["regularized_params"]) != 0: + param_groups_override.append( + {"params": self.optimizer_params["regularized_params"]} + ) + self.param_groups_override = param_groups_override + + def get_classy_state(self) -> Dict[str, Any]: + """Get the state of the ClassyOptimizer. + + The returned state is used for checkpointing. + + Returns: + A state dictionary containing the state of the optimizer. + """ + return {"optim": self.optimizer.state_dict(), "parameters": self.parameters} + + def set_classy_state(self, state: Dict[str, Any]) -> None: + """Set the state of the ClassyOptimizer. + + Args: + state_dict: The state dictionary. Must be the output of a call to + :func:`get_classy_state`. + + This is used to load the state of the optimizer from a checkpoint. + """ + self.optimizer.load_state_dict(state["optim"]) + for param_name, param_value in state["parameters"].items(): + setattr(self, param_name, param_value) + + def backward(self, loss: torch.Tensor) -> None: + """ + Computer gradients with respect to the loss. + + Calls :func:`zero_grad` and then computes the gradient using + `torch.Tensor.backward `_. See :mod:`torch.autograd` for + more information. + """ + # TODO (aadcock): Add gradient accumulation logic + self.zero_grad() + loss.backward() + + def update_schedule_on_epoch(self, where: float) -> None: + """ + Update the param schedule at the end of an epoch. + + This should be called by the task at the end of every epoch to update the + schedule of epoch based param schedulers (See + :class:`param_scheduler.ClassyParamScheduler` for more information). + + Args: + where: where we are in terms of training progress (output of + :func:`tasks.ClassyTask.where`) + """ + assert self.lr_scheduler.update_interval in [ + UpdateInterval.EPOCH, + UpdateInterval.STEP, + ] + + if self.lr_scheduler.update_interval == UpdateInterval.EPOCH: + self._update_schedule(where) + + def update_schedule_on_step(self, where: float) -> None: + """ + Update the param schedule at the end of a train step. + + This should be called by the task at the end of every train step ( + :func:`tasks.ClassyTask.train_step`) to update the schedule of step + based param schedulers (See :class:`param_scheduler.ClassyParamScheduler` + for more information). + + Args: + where: where we are in terms of training progress (output of + :method:`ClassyTask.where`) + """ + assert self.lr_scheduler.update_interval in [ + UpdateInterval.EPOCH, + UpdateInterval.STEP, + ] + + if self.lr_scheduler.update_interval == UpdateInterval.STEP: + self._update_schedule(where) + + def _update_schedule(self, where: float) -> None: + """ + Args: + where: where we are in terms of training progress (output of + :func:`tasks.ClassyTask.where`) + """ + self.lr = self.lr_scheduler(where) + for group in self.optimizer.param_groups: + group.update(self.parameters) + + # Here there's an assumption that pytorch optimizer maintain the order of + # param_groups and batch_norm param_group is 0th param_group as initially + # set in the __init__ call. + # It seems like pytorch optim doesn't have way to get params by 'id': + # See thread https://github.com/pytorch/pytorch/issues/1489 + if self.contains_unregularized_params: + self.optimizer.param_groups[0].update(weight_decay=0.0) + + def step(self, closure: Optional[Callable] = None): + """ + Performs a single optimization step. + + See `torch.optim.Optimizer.step `_for more information. + + Args: + closure: A closure that re-evaluates the model and returns the loss + """ + self.optimizer.step(closure) + + def zero_grad(self): + """ + Clears the gradients of all optimized parameters. + + See `torch.optim.Optimizer.zero_grad `_ for more information. + """ + self.optimizer.zero_grad() diff --git a/classy_vision/optim/param_scheduler/__init__.py b/classy_vision/optim/param_scheduler/__init__.py new file mode 100644 index 0000000000..2797955444 --- /dev/null +++ b/classy_vision/optim/param_scheduler/__init__.py @@ -0,0 +1,94 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from pathlib import Path + +from classy_vision.generic.registry_utils import import_all_modules + +from .classy_vision_param_scheduler import ( # noqa F401 + ClassyParamScheduler, + UpdateInterval, +) + + +FILE_ROOT = Path(__file__).parent + + +PARAM_SCHEDULER_REGISTRY = {} + + +def build_param_scheduler(config): + """Builds a :class:`ClassyParamScheduler` from a config. + + This assumes a 'name' key in the config which is used to determine what + param scheduler class to instantiate. For instance, a config `{"name": + "my_scheduler", "foo": "bar"}` will find a class that was registered as + "my_scheduler" (see :func:`register_param_scheduler`) and call .from_config + on it.""" + return PARAM_SCHEDULER_REGISTRY[config["name"]].from_config(config) + + +def register_param_scheduler(name): + """Registers a :class:`ClassyParamScheduler` subclass. + + This decorator allows Classy Vision to instantiate a subclass of + ClassyParamScheduler from a configuration file, even if the class itself is not + part of the Classy Vision framework. To use it, apply this decorator to a + ClassyParamScheduler subclass, like this: + + .. code-block:: python + + @register_param_scheduler('my_scheduler') + class MyParamScheduler(ClassyParamScheduler): + ... + + To instantiate a param scheduler from a configuration file, see + :func:`build_param_scheduler`.""" + + def register_param_scheduler_cls(cls): + if name in PARAM_SCHEDULER_REGISTRY: + raise ValueError( + "Cannot register duplicate param scheduler ({})".format(name) + ) + if not issubclass(cls, ClassyParamScheduler): + raise ValueError( + "Param Scheduler ({}: {}) must extend ClassyParamScheduler".format( + name, cls.__name__ + ) + ) + PARAM_SCHEDULER_REGISTRY[name] = cls + return cls + + return register_param_scheduler_cls + + +# automatically import any Python files in the optim/param_scheduler/ directory +import_all_modules(FILE_ROOT, "classy_vision.optim.param_scheduler") + +from .composite_scheduler import CompositeParamScheduler # isort:skip +from .constant_scheduler import ConstantParamScheduler # isort:skip +from .cosine_scheduler import CosineParamScheduler # isort:skip +from .linear_scheduler import LinearParamScheduler # isort:skip +from .multi_step_scheduler import MultiStepParamScheduler # isort:skip +from .polynomial_decay_scheduler import PolynomialDecayParamScheduler # isort:skip +from .step_scheduler import StepParamScheduler # isort:skip +from .step_with_fixed_gamma_scheduler import ( # isort:skip + StepWithFixedGammaParamScheduler, +) + +__all__ = [ + "ClassyParamScheduler", + "CompositeParamScheduler", + "ConstantParamScheduler", + "CosineParamScheduler", + "LinearParamScheduler", + "MultiStepParamScheduler", + "PolynomialDecayParamScheduler", + "StepParamScheduler", + "StepWithFixedGammaParamScheduler", + "build_param_scheduler", + "register_param_scheduler" +] diff --git a/classy_vision/optim/param_scheduler/classy_vision_param_scheduler.py b/classy_vision/optim/param_scheduler/classy_vision_param_scheduler.py new file mode 100644 index 0000000000..e64070708d --- /dev/null +++ b/classy_vision/optim/param_scheduler/classy_vision_param_scheduler.py @@ -0,0 +1,70 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from enum import Enum +from typing import Any, Dict + + +class UpdateInterval(Enum): + """ + Enum for specifying update frequency for scheduler. + + Attributes: + EPOCH (str): Update param before each epoch + STEP (str): Update param before each optimizer step + """ + + EPOCH = "epoch" + STEP = "step" + + +class ClassyParamScheduler(object): + """ + Base class for Classy parameter schedulers. + + Attributes: + update_interval: Specifies how often to update each parameter + (before each epoch or each batch) + """ + + # To be used for comparisons with where + WHERE_EPSILON = 1e-6 + + def __init__(self, update_interval: UpdateInterval = UpdateInterval.EPOCH): + """ + Constructor for ClassyParamScheduler + + Args: + update_interval: Specifies the frequency of the param updates + """ + self.update_interval = update_interval + + @classmethod + def from_config(cls, config: Dict[str, Any]) -> "ClassyParamScheduler": + """Instantiates a ClassyParamScheduler from a configuration. + + Args: + config: A configuration for the ClassyParamScheduler. + + Returns: + A ClassyParamScheduler instance. + """ + raise NotImplementedError + + def __call__(self, where: float): + """ + Get the param for a given point at training. + + For Classy Vision we update params (such as learning rate) based on + the percent progress of training completed. This allows a + scheduler to be agnostic to the exact specifications of a + particular run (e.g. 120 epochs vs 90 epochs). + + Args: + where: A float in [0;1) that represents how far training has progressed + + """ + raise NotImplementedError("Param schedulers must override __call__") diff --git a/classy_vision/optim/param_scheduler/composite_scheduler.py b/classy_vision/optim/param_scheduler/composite_scheduler.py new file mode 100644 index 0000000000..ef06730fd7 --- /dev/null +++ b/classy_vision/optim/param_scheduler/composite_scheduler.py @@ -0,0 +1,143 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from enum import Enum, auto +from typing import Any, Dict, Sequence + +from . import ( + ClassyParamScheduler, + UpdateInterval, + build_param_scheduler, + register_param_scheduler, +) + + +@register_param_scheduler("composite") +class CompositeParamScheduler(ClassyParamScheduler): + """ + Composite parameter scheduler composed of intermediate schedulers. + Takes a list of schedulers and a list of lengths corresponding to + percentage of training each scheduler should run for. Schedulers + are run in order. All values in lengths should sum to 1.0. + + Each scheduler also has a corresponding interval scale. If interval + scale is 'fixed', the intermidiate scheduler will be run without any rescaling + of the time. If interval scale is 'rescaled', intermediate scheduler is + run such that each scheduler will start and end at the same values as it + would if it were the only scheduler. Default is 'fixed' for all schedulers. + + Example: + + .. code-block:: python + + update_interval = "step" + schedulers = [ + {"name": "constant", "value": 0.42}, + {"name": "cosine_decay", "start_lr": 0.42, "end_lr": 0.0001} + ] + interval_scaling = ['rescaled', 'rescaled'], + lengths = [0.3, 0.7] + + The parameter value will be 0.42 for the first [0%, 30%) of steps, + and then will cosine decay from 0.42 to 0.0001 for [30%, 100%) of + training. + """ + + class IntervalScaling(Enum): + RESCALED = auto() + FIXED = auto() + + def __init__( + self, + schedulers: Sequence[ClassyParamScheduler], + lengths: Sequence[float], + update_interval: UpdateInterval, + interval_scaling: Sequence[IntervalScaling], + ): + super().__init__() + self.update_interval = update_interval + self._lengths = lengths + self._schedulers = schedulers + self._interval_scaling = interval_scaling + + @classmethod + def from_config(cls, config: Dict[str, Any]) -> "CompositeParamScheduler": + """Instantiates a CompositeParamScheduler from a configuration. + + Args: + config: A configuration for a CompositeParamScheduler. + See :func:`__init__` for parameters expected in the config. + + Returns: + A CompositeParamScheduler instance. + """ + assert ( + "schedulers" in config and "lengths" in config + ), "Composite scheduler needs both a list of schedulers and lengths" + assert len(config["schedulers"]) == len( + config["lengths"] + ), "Schedulers and lengths must be same length" + assert ( + len(config["schedulers"]) > 0 + ), "There must be at least one scheduler in the composite scheduler" + assert ( + abs(sum(config["lengths"]) - 1.0) < 1e-3 + ), "The sum of all values in lengths must be 1" + if sum(config["lengths"]) != 1.0: + config["lengths"][-1] = 1.0 - sum(config["lengths"][:-1]) + update_interval = UpdateInterval.STEP + if "update_interval" in config: + assert config["update_interval"] in { + "step", + "epoch", + }, "Choices for update interval are 'step' or 'epoch'" + update_interval = UpdateInterval[config["update_interval"].upper()] + interval_scaling = [] + if "interval_scaling" in config: + assert len(config["schedulers"]) == len( + config["interval_scaling"] + ), "Schedulers and interval scaling must be the same length" + for interval_scale in config["interval_scaling"]: + assert interval_scale in { + "fixed", + "rescaled", + }, "Choices for interval scaline are 'fixed' or 'rescaled'" + interval_scaling.append(cls.IntervalScaling[interval_scale.upper()]) + else: + interval_scaling = [cls.IntervalScaling.RESCALED] * len( + config["schedulers"] + ) + if "num_epochs" in config: # Propogate value to intermediate schedulers + config["schedulers"] = [ + dict(schedule, **{"num_epochs": config["num_epochs"]}) + for schedule in config["schedulers"] + ] + return cls( + schedulers=[ + build_param_scheduler(scheduler) for scheduler in config["schedulers"] + ], + lengths=config["lengths"], + update_interval=update_interval, + interval_scaling=interval_scaling, + ) + + def __call__(self, where: float): + # Find scheduler corresponding to where + i = 0 + running_total = self._lengths[i] + while (where + self.WHERE_EPSILON) > running_total and i < len( + self._schedulers + ) - 1: + i += 1 + running_total += self._lengths[i] + scheduler = self._schedulers[i] + scheduler_where = where + interval_scale = self._interval_scaling[i] + if interval_scale == self.IntervalScaling.RESCALED: + # Calculate corresponding where % for scheduler + scheduler_start = running_total - self._lengths[i] + scheduler_where = (where - scheduler_start) / self._lengths[i] + return scheduler(scheduler_where) diff --git a/classy_vision/optim/param_scheduler/constant_scheduler.py b/classy_vision/optim/param_scheduler/constant_scheduler.py new file mode 100644 index 0000000000..6f973bc81b --- /dev/null +++ b/classy_vision/optim/param_scheduler/constant_scheduler.py @@ -0,0 +1,40 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from typing import Any, Dict + +from . import ClassyParamScheduler, register_param_scheduler + + +@register_param_scheduler("constant") +class ConstantParamScheduler(ClassyParamScheduler): + """ + Returns a constant value for a optimizer param. + """ + + def __init__(self, value: float): + super().__init__() + self._value = value + + @classmethod + def from_config(cls, config: Dict[str, Any]) -> "ConstantParamScheduler": + """Instantiates a ConstantParamScheduler from a configuration. + + Args: + config: A configuration for a ConstantParamScheduler. + See :func:`__init__` for parameters expected in the config. + + Returns: + A ConstantParamScheduler instance. + """ + assert "value" in config + return cls(value=config["value"]) + + def __call__(self, where: float): + if where >= 1.0: + raise RuntimeError(f"Invalid where parameter for scheduler: {where}") + + return self._value diff --git a/classy_vision/optim/param_scheduler/cosine_scheduler.py b/classy_vision/optim/param_scheduler/cosine_scheduler.py new file mode 100644 index 0000000000..25cff0cb9e --- /dev/null +++ b/classy_vision/optim/param_scheduler/cosine_scheduler.py @@ -0,0 +1,55 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import logging +import math +from typing import Any, Dict + +from . import ClassyParamScheduler, register_param_scheduler + + +@register_param_scheduler("cosine") +class CosineParamScheduler(ClassyParamScheduler): + """ + Changes the param value after every epoch based on a `cosine schedule `_. + Can be used for either cosine decay or cosine warmup schedules based on + start and end values. + + Example: + + .. code-block:: python + + start_lr: 0.1 + end_lr: 0.0001 + """ + + def __init__(self, start_lr: float, end_lr: float): + super().__init__() + self._start_lr = start_lr + self._end_lr = end_lr + + @classmethod + def from_config(cls, config: Dict[str, Any]) -> "CosineParamScheduler": + """Instantiates a CosineParamScheduler from a configuration. + + Args: + config: A configuration for a CosineParamScheduler. + See :func:`__init__` for parameters expected in the config. + + Returns: + A CosineParamScheduler instance. + """ + assert ( + "start_lr" in config and "end_lr" in config + ), "Cosine scheduler requires a start_lr and a end_lr" + + return cls(start_lr=config["start_lr"], end_lr=config["end_lr"]) + + def __call__(self, where: float): + return self._end_lr + 0.5 * (self._start_lr - self._end_lr) * ( + 1 + math.cos(math.pi * where) + ) diff --git a/classy_vision/optim/param_scheduler/linear_scheduler.py b/classy_vision/optim/param_scheduler/linear_scheduler.py new file mode 100644 index 0000000000..3fc6ab1bfc --- /dev/null +++ b/classy_vision/optim/param_scheduler/linear_scheduler.py @@ -0,0 +1,50 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from typing import Any, Dict + +from . import ClassyParamScheduler, register_param_scheduler + + +@register_param_scheduler("linear") +class LinearParamScheduler(ClassyParamScheduler): + """ + Linearly interpolates parameter between ``start_lr`` and ``end_lr``. + Can be used for either warmup or decay based on start and end values. + + Example: + + .. code-block:: python + + start_lr: 0.0001 + end_lr: 0.01 + Corresponds to a linear increasing schedule with values in [0.0001, 0.01) + """ + + def __init__(self, start_lr: float, end_lr: float): + super().__init__() + self._start_lr = start_lr + self._end_lr = end_lr + + @classmethod + def from_config(cls, config: Dict[str, Any]) -> "LinearParamScheduler": + """Instantiates a LinearParamScheduler from a configuration. + + Args: + config: A configuration for a LinearParamScheduler. + See :func:`__init__` for parameters expected in the config. + + Returns: + A LinearParamScheduler instance. + """ + assert ( + "start_lr" in config and "end_lr" in config + ), "Linear scheduler requires a start and a end" + return cls(start_lr=config["start_lr"], end_lr=config["end_lr"]) + + def __call__(self, where: float): + # interpolate between start and end values + return self._end_lr * where + self._start_lr * (1 - where) diff --git a/classy_vision/optim/param_scheduler/multi_step_scheduler.py b/classy_vision/optim/param_scheduler/multi_step_scheduler.py new file mode 100644 index 0000000000..a1c22de84c --- /dev/null +++ b/classy_vision/optim/param_scheduler/multi_step_scheduler.py @@ -0,0 +1,108 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import bisect +import math +from typing import Any, Dict, List, NamedTuple, Optional, Union + +from classy_vision.generic.util import is_pos_int + +from . import ClassyParamScheduler, UpdateInterval, register_param_scheduler + + +@register_param_scheduler("multistep") +class MultiStepParamScheduler(ClassyParamScheduler): + """ + Takes a predefined schedule for a param value, and a list of epochs + which stand for the upper boundary (excluded) of each range. + + Example: + + .. code-block:: python + + values: [0.1, 0.01, 0.001, 0.0001] + milestones = [30, 60, 80] + num_epochs = 120 + + Then the param value will be 0.1 for epochs 0-29, 0.01 for + epochs 30-59, 0.001 for epochs 60-79, 0.0001 for epochs after epoch 80. + Note that the length of values must be equal to the length of milestones + plus one. + """ + + def __init__( + self, + values, + num_epochs: int, + update_interval: UpdateInterval, + milestones: Optional[List[int]] = None, + ): + super().__init__(update_interval) + self._param_schedule = values + self._num_epochs = num_epochs + self._milestones = milestones + + if milestones is None: + # Default equispaced drop_epochs behavior + self._milestones = [] + step_width = math.ceil(self._num_epochs / float(len(self._param_schedule))) + for idx in range(len(self._param_schedule) - 1): + self._milestones.append(step_width * (idx + 1)) + + start_epoch = 0 + for milestone in self._milestones: + # Do not exceed the total number of epochs + assert milestone < self._num_epochs, ( + "Epoch milestone must be smaller than total number of epochs: num_epochs=%d, milestone=%d" + % (self._num_epochs, milestone) + ) + # Must be in ascending order + assert start_epoch < milestone, ( + "Epoch milestone must be smaller than start epoch: start_epoch=%d, milestone=%d" + % (start_epoch, milestone) + ) + start_epoch = milestone + + @classmethod + def from_config(cls, config: Dict[str, Any]) -> "MultiStepParamScheduler": + """Instantiates a MultiStepParamScheduler from a configuration. + + Args: + config: A configuration for a MultiStepParamScheduler. + See :func:`__init__` for parameters expected in the config. + + Returns: + A MultiStepParamScheduler instance. + """ + assert ( + "values" in config + and isinstance(config["values"], list) + and len(config["values"]) > 0 + ), "Non-Equi Step scheduler requires a list of at least one param value" + assert is_pos_int(config["num_epochs"]), "Num epochs must be a positive integer" + assert config["num_epochs"] >= len( + config["values"] + ), "Num epochs must be greater than param schedule" + + milestones = config.get("milestones", None) + if "milestones" in config: + assert ( + isinstance(config["milestones"], list) + and len(config["milestones"]) == len(config["values"]) - 1 + ), ( + "Non-Equi Step scheduler requires a list of %d epochs" + % (len(config["values"]) - 1) + ) + return cls( + values=config["values"], + num_epochs=config["num_epochs"], + update_interval=UpdateInterval(config.get("update_interval", "epoch")), + milestones=milestones, + ) + + def __call__(self, where: float): + epoch_num = int((where + self.WHERE_EPSILON) * self._num_epochs) + return self._param_schedule[bisect.bisect_right(self._milestones, epoch_num)] diff --git a/classy_vision/optim/param_scheduler/polynomial_decay_scheduler.py b/classy_vision/optim/param_scheduler/polynomial_decay_scheduler.py new file mode 100644 index 0000000000..db91502be9 --- /dev/null +++ b/classy_vision/optim/param_scheduler/polynomial_decay_scheduler.py @@ -0,0 +1,52 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from typing import Any, Dict + +from . import ClassyParamScheduler, register_param_scheduler + + +@register_param_scheduler("polynomial") +class PolynomialDecayParamScheduler(ClassyParamScheduler): + """ + Decays the param value after every epoch according to a + polynomial function with a fixed power. + + Example: + + .. code-block:: python + + base_lr: 0.1 + power: 0.9 + + Then the param value will be 0.1 for epoch 0, 0.099 for epoch 1, and + so on. + """ + + def __init__(self, base_lr, power): + super().__init__() + + self._base_lr = base_lr + self._power = power + + @classmethod + def from_config(cls, config: Dict[str, Any]) -> "PolynomialDecayParamScheduler": + """Instantiates a PolynomialDecayParamScheduler from a configuration. + + Args: + config: A configuration for a PolynomialDecayParamScheduler. + See :func:`__init__` for parameters expected in the config. + + Returns: + A PolynomialDecayParamScheduler instance. + """ + assert ( + "base_lr" in config and "power" in config + ), "Polynomial decay scheduler requires a base lr and a power of decay" + return cls(base_lr=config["base_lr"], power=config["power"]) + + def __call__(self, where: float): + return self._base_lr * (1 - where) ** self._power diff --git a/classy_vision/optim/param_scheduler/step_scheduler.py b/classy_vision/optim/param_scheduler/step_scheduler.py new file mode 100644 index 0000000000..5dbb46fb45 --- /dev/null +++ b/classy_vision/optim/param_scheduler/step_scheduler.py @@ -0,0 +1,57 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from typing import Any, Dict, List, NamedTuple, Optional, Union + +from . import ClassyParamScheduler, register_param_scheduler + + +@register_param_scheduler("step") +class StepParamScheduler(ClassyParamScheduler): + """ + Takes a fixed schedule for a param value. If the length of the + fixed schedule is less than the number of epochs, then the epochs + are divided evenly among the param schedule. + + Example: + + .. code-block:: python + + values: [0.1, 0.01, 0.001, 0.0001] + num_epochs = 120 + + Then the param value will be 0.1 for epochs 0-29, 0.01 for + epochs 30-59, 0.001 for epoch 60-89, 0.0001 for epochs 90-119. + """ + + def __init__(self, num_epochs: Union[int, float], values: List[float]): + super().__init__() + + self._param_schedule = values + + @classmethod + def from_config(cls, config: Dict[str, Any]) -> "StepParamScheduler": + """Instantiates a StepParamScheduler from a configuration. + + Args: + config: A configuration for a StepParamScheduler. + See :func:`__init__` for parameters expected in the config. + + Returns: + A StepParamScheduler instance. + """ + assert ( + "values" in config + and isinstance(config["values"], list) + and len(config["values"]) > 0 + ), "Step scheduler requires a list of at least one param value" + assert config["num_epochs"] > 0, "Num epochs must be greater than 0" + + return cls(num_epochs=config["num_epochs"], values=config["values"]) + + def __call__(self, where: float): + ind = int((where + self.WHERE_EPSILON) * len(self._param_schedule)) + return self._param_schedule[ind] diff --git a/classy_vision/optim/param_scheduler/step_with_fixed_gamma_scheduler.py b/classy_vision/optim/param_scheduler/step_with_fixed_gamma_scheduler.py new file mode 100644 index 0000000000..7db70f4b35 --- /dev/null +++ b/classy_vision/optim/param_scheduler/step_with_fixed_gamma_scheduler.py @@ -0,0 +1,80 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from typing import Any, Dict + +from . import ClassyParamScheduler, UpdateInterval, register_param_scheduler +from .step_scheduler import StepParamScheduler + + +@register_param_scheduler("step_with_fixed_gamma") +class StepWithFixedGammaParamScheduler(ClassyParamScheduler): + """ + Decays the param value by gamma at equal number of steps so as to have the + specified total number of decays. + + Example: + + .. code-block:: python + + base_lr: 0.1 + gamma: 0.1 + num_decays: 3 + num_epochs: 120 + + Then the param value will be 0.1 for epochs 0-29, 0.01 for + epochs 30-59, 0.001 for epoch 60-89, 0.0001 for epochs 90-119. + """ + + @classmethod + def from_config(cls, config: Dict[str, Any]) -> "StepWithFixedGammaParamScheduler": + """Instantiates a StepWithFixedGammaParamScheduler from a configuration. + + Args: + config: A configuration for a StepWithFixedGammaParamScheduler. + See :func:`__init__` for parameters expected in the config. + + Returns: + A StepWithFixedGammaParamScheduler instance. + """ + for key in ["base_lr", "gamma", "num_decays", "num_epochs"]: + assert key in config, f"Step with fixed decay scheduler requires: {key}" + for key in ["base_lr", "gamma"]: + assert ( + isinstance(config[key], (int, float)) and config[key] > 0 + ), f"{key} must be a positive number" + for key in ["num_decays", "num_epochs"]: + assert ( + isinstance(config[key], int) and config[key] > 0 + ), f"{key} must be a positive integer" + + return cls( + base_lr=config["base_lr"], + num_decays=config["num_decays"], + gamma=config["gamma"], + num_epochs=config["num_epochs"], + ) + + def __init__(self, base_lr, num_decays, gamma, num_epochs): + super().__init__() + + self.base_lr = base_lr + self.num_decays = num_decays + self.gamma = gamma + self.num_epochs = num_epochs + values = [base_lr] + for _ in range(num_decays): + values.append(values[-1] * gamma) + + self._step_param_scheduler = StepParamScheduler( + num_epochs=num_epochs, values=values + ) + + # make this a STEP scheduler + self.update_interval = UpdateInterval.STEP + + def __call__(self, where: float) -> float: + return self._step_param_scheduler(where) diff --git a/classy_vision/optim/rmsprop.py b/classy_vision/optim/rmsprop.py new file mode 100644 index 0000000000..f80d146d81 --- /dev/null +++ b/classy_vision/optim/rmsprop.py @@ -0,0 +1,106 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from typing import Any, Dict + +import torch.optim +from classy_vision.generic.util import is_pos_float +from classy_vision.optim.param_scheduler import build_param_scheduler + +from . import ClassyOptimizer, register_optimizer +from .param_scheduler.classy_vision_param_scheduler import ClassyParamScheduler + + +@register_optimizer("rmsprop") +class RMSProp(ClassyOptimizer): + def __init__( + self, + lr_scheduler: ClassyParamScheduler, + momentum: float, + weight_decay: float, + alpha: float, + eps: float = 1e-8, + centered: bool = False, + ) -> None: + super().__init__(lr_scheduler=lr_scheduler) + + self.momentum = momentum + self.weight_decay = weight_decay + self.alpha = alpha + self.eps = eps + self.centered = centered + + def init_pytorch_optimizer(self, model): + super().init_pytorch_optimizer(model) + self.optimizer = torch.optim.RMSprop( + self.param_groups_override, + lr=self.lr, + momentum=self.momentum, + weight_decay=self.weight_decay, + alpha=self.alpha, + eps=self.eps, + centered=self.centered, + ) + + @classmethod + def from_config(cls, config: Dict[str, Any]) -> "RMSProp": + """Instantiates a RMSProp from a configuration. + + Args: + config: A configuration for a RMSProp. + See :func:`__init__` for parameters expected in the config. + + Returns: + A RMSProp instance. + """ + # Default params + config.setdefault("eps", 1e-8) + config.setdefault("centered", False) + + assert ( + "lr" in config + ), "Config must contain a learning rate 'lr' section for RMSProp optimizer" + for key in ["momentum", "alpha"]: + assert ( + key in config + and config[key] >= 0.0 + and config[key] < 1.0 + and type(config[key]) == float + ), f"Config must contain a '{key}' in [0, 1) for RMSProp optimizer" + for key in ["weight_decay", "eps"]: + assert key in config and is_pos_float( + config[key] + ), f"Config must contain a positive '{key}' for RMSProp optimizer" + assert "centered" in config and isinstance( + config["centered"], bool + ), "Config must contain a boolean 'centered' param for RMSProp optimizer" + + lr_config = config["lr"] + if not isinstance(lr_config, dict): + lr_config = {"name": "constant", "value": lr_config} + + lr_config["num_epochs"] = config["num_epochs"] + lr_scheduler = build_param_scheduler(lr_config) + + return cls( + lr_scheduler=lr_scheduler, + momentum=config["momentum"], + weight_decay=config["weight_decay"], + alpha=config["alpha"], + eps=config["eps"], + centered=config["centered"], + ) + + @property + def parameters(self) -> Dict[str, Any]: + return { + "lr": self.lr, + "momentum": self.momentum, + "weight_decay": self.weight_decay, + "alpha": self.alpha, + "eps": self.eps, + "centered": self.centered, + } diff --git a/classy_vision/optim/sgd.py b/classy_vision/optim/sgd.py new file mode 100644 index 0000000000..e6db404a25 --- /dev/null +++ b/classy_vision/optim/sgd.py @@ -0,0 +1,95 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from typing import Any, Dict + +import torch.optim +from classy_vision.generic.util import is_pos_float +from classy_vision.optim.param_scheduler import ( + ClassyParamScheduler, + build_param_scheduler, +) + +from . import ClassyOptimizer, register_optimizer + + +@register_optimizer("sgd") +class SGD(ClassyOptimizer): + def __init__( + self, + lr_scheduler: ClassyParamScheduler, + momentum: float = 0, + weight_decay: float = 0, + nesterov=False, + ): + super().__init__(lr_scheduler=lr_scheduler) + + self.momentum = momentum + self.weight_decay = weight_decay + self.nesterov = nesterov + + def init_pytorch_optimizer(self, model): + super().init_pytorch_optimizer(model) + self.optimizer = torch.optim.SGD( + self.param_groups_override, + lr=self.lr, + nesterov=self.nesterov, + momentum=self.momentum, + weight_decay=self.weight_decay, + ) + + @classmethod + def from_config(cls, config: Dict[str, Any]) -> "SGD": + """Instantiates a SGD from a configuration. + + Args: + config: A configuration for a SGD. + See :func:`__init__` for parameters expected in the config. + + Returns: + A SGD instance. + """ + # Default params + config["nesterov"] = config.get("nesterov", False) + + assert ( + "lr" in config + ), "Config must contain a learning rate 'lr' section for SGD optimizer" + assert ( + "momentum" in config + and config["momentum"] >= 0.0 + and config["momentum"] < 1.0 + and type(config["momentum"]) == float + ), "Config must contain a 'momentum' in [0, 1) for SGD optimizer" + assert "nesterov" in config and isinstance( + config["nesterov"], bool + ), "Config must contain a boolean 'nesterov' param for SGD optimizer" + assert "weight_decay" in config and is_pos_float( + config["weight_decay"] + ), "Config must contain a positive 'weight_decay' for SGD optimizer" + + lr_config = config["lr"] + if not isinstance(lr_config, dict): + lr_config = {"name": "constant", "value": lr_config} + + lr_config["num_epochs"] = config["num_epochs"] + lr_scheduler = build_param_scheduler(lr_config) + + return cls( + lr_scheduler=lr_scheduler, + momentum=config["momentum"], + weight_decay=config["weight_decay"], + nesterov=config["nesterov"], + ) + + @property + def parameters(self): + return { + "lr": self.lr, + "momentum": self.momentum, + "weight_decay": self.weight_decay, + "nesterov": self.nesterov, + } diff --git a/classy_vision/tasks/__init__.py b/classy_vision/tasks/__init__.py new file mode 100644 index 0000000000..17e7a013ee --- /dev/null +++ b/classy_vision/tasks/__init__.py @@ -0,0 +1,80 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from pathlib import Path + +from classy_vision.generic.registry_utils import import_all_modules + +from .classy_task import ClassyTask + + +FILE_ROOT = Path(__file__).parent + + +TASK_REGISTRY = {} +TASK_CLASS_NAMES = set() + + +def build_task(config): + """Builds a ClassyTask from a config. + + This assumes a 'name' key in the config which is used to determine what + task class to instantiate. For instance, a config `{"name": "my_task", + "foo": "bar"}` will find a class that was registered as "my_task" + (see :func:`register_task`) and call .from_config on it.""" + + return TASK_REGISTRY[config["name"]].from_config(config) + + +def register_task(name): + """Registers a ClassyTask subclass. + + This decorator allows Classy Vision to instantiate a subclass of ClassyTask + from a configuration file, even if the class itself is not part of the + Classy Vision framework. To use it, apply this decorator to a ClassyTask + subclass, like this: + + .. code-block:: python + + @register_task('my_task') + class MyTask(ClassyTask): + ... + + To instantiate a task from a configuration file, see :func:`build_task`.""" + + def register_task_cls(cls): + if name in TASK_REGISTRY: + raise ValueError("Cannot register duplicate task ({})".format(name)) + if not issubclass(cls, ClassyTask): + raise ValueError( + "Task ({}: {}) must extend ClassyTask".format(name, cls.__name__) + ) + if cls.__name__ in TASK_CLASS_NAMES: + raise ValueError( + "Cannot register task with duplicate class name ({})".format( + cls.__name__ + ) + ) + TASK_REGISTRY[name] = cls + TASK_CLASS_NAMES.add(cls.__name__) + return cls + + return register_task_cls + + +from .classification_task import ClassificationTask # isort:skip +from .fine_tuning_task import FineTuningTask # isort:skip + +__all__ = [ + "ClassyTask", + "FineTuningTask", + "build_task", + "register_task", + "ClassificationTask", +] + +# automatically import any Python files in the tasks/ directory +import_all_modules(FILE_ROOT, "classy_vision.tasks") diff --git a/classy_vision/tasks/classification_task.py b/classy_vision/tasks/classification_task.py new file mode 100644 index 0000000000..99695d64a7 --- /dev/null +++ b/classy_vision/tasks/classification_task.py @@ -0,0 +1,714 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import copy +import logging +from typing import Any, Dict, List, Union + +import torch +from classy_vision.dataset import ClassyDataset, build_dataset +from classy_vision.generic.distributed_util import ( + all_reduce_mean, + init_distributed_data_parallel_model, + is_distributed_training_run, +) +from classy_vision.generic.perf_stats import PerfTimer +from classy_vision.generic.util import ( + copy_model_to_gpu, + recursive_copy_to_gpu, + update_classy_state, +) +from classy_vision.losses import ClassyLoss, build_loss +from classy_vision.meters import build_meters +from classy_vision.models import ClassyModel, build_model +from classy_vision.optim import ClassyOptimizer, build_optimizer + +from . import register_task +from .classy_task import ClassyTask + + +@register_task("classification_task") +class ClassificationTask(ClassyTask): + """Basic classification training task. + + This task encapsultates all of the components and steps needed to + train a classifier using a :class:`classy_vision.trainer.ClassyTrainer`. + + Assumes a train / test phase per each epoch and that the datasets + have the same API as the map-style Dataset class in + `torch.utils.data.dataset `_ (in particular, this task makes use of + the len). If you are using an `IterableDataset `_ then a custom task + may be appropriate. + + + :var loss: Loss (see :class:`classy_vision.losses.ClassyLoss`) function used + for computing the loss in each forward pass + :var datasets: Mapping from a ``phase_type`` in ["train", "test'] + to dataset used for training (or testing) + :var meters: List of meters (see :class:`classy_vision.meters.ClassyMeter`) + to calculate during training + :var num_epochs: Number of epochs (passes over dataset) to train + :var test_only: Used to only run the test phase + :var base_model: Model to be trained, unwrapped in DDP or DP wrappers + :var optimizer: Optimizer used in train step + :var checkpoint: Serializable dict which represents state in training + :var phases: List of phase specific information, e.g. if phase is + train / test. + :var hooks: List of hooks to apply during training + :var train: Phase type, if true it means we are training, + false means testing + :var distributed_model: Base model, but wrapped in DDP (DistributedDataParallel) + :var phase_idx: Current phase id, first phase is 0, if task has not started + training then returns -1 + :var train_phase_idx: Only counts train phases + :var num_updates: Number of total parameter updates applied to model + by the optimizer + :var data_iterator: Iterator which can be used to obtain batches + :var num_samples_this_phase: Number of samples ran this phase + :var losses: Loss curve + + """ + + def __init__(self): + """Constructs a ClassificationTask + """ + super().__init__() + + self.loss = None + self.datasets = {} + self.meters = [] + self.num_epochs = 1 + self.test_only = False + self.base_model = None + self.optimizer = None + self.checkpoint = None + self.phases = [] + self.hooks = [] + self.train = True + self.distributed_model = None + self.phase_idx = -1 + self.train_phase_idx = -1 + self.num_updates = 0 + self.data_iterator = None + self.num_samples_this_phase = 0 + self.losses = [] + + def set_checkpoint(self, checkpoint): + """Sets checkpoint on task. + + Args: + checkpoint: A serializable dict representing current task state + """ + assert ( + checkpoint is None or "classy_state_dict" in checkpoint + ), "Checkpoint does not contain classy_state_dict" + self.checkpoint = checkpoint + + def set_num_epochs(self, num_epochs: Union[int, float]): + """Set number of epochs to be run. + + Args: + num_epochs: Number of epochs to run task + """ + self.num_epochs = num_epochs + return self + + def set_dataset(self, dataset: ClassyDataset, phase_type: str): + """Set dataset for phase type on task + + Args: + dataset: ClassyDataset for returning samples. + phase_type: str must be one of "train" or "test" + """ + assert phase_type in [ + "train", + "test", + ], "phase_type must be in ['train', 'test']" + self.datasets[phase_type] = dataset + return self + + def set_optimizer(self, optimizer: ClassyOptimizer): + """Set optimizer for task + + Args: + optimizer: optimizer for task + """ + self.optimizer = optimizer + return self + + def set_loss(self, loss: ClassyLoss): + """Set loss function for task + + Args: + loss: loss for task + """ + self.loss = loss + return self + + def set_meters(self, meters: List["ClassyMeter"]): + """Set meters for task + + Args: + meters: list of meters to compute during training + """ + self.meters = meters + return self + + def set_hooks(self, hooks: List["ClassyHook"]): + """Set hooks for task + + Args: + hooks: List of hooks to apply during training + """ + from classy_vision.hooks import ClassyHook + + assert isinstance(hooks, list) + assert all(isinstance(hook, ClassyHook) for hook in hooks) + assert len({hook.name() for hook in hooks}) == len( + hooks + ), "Cannot have repeated hooks of the same class" + + self.hooks = hooks + return self + + def set_model(self, model: ClassyModel): + """Set model for task + + Args: + model: Model to be trained + """ + self.base_model = model + return self + + def set_test_only(self, test_only: bool): + """Set test only flag + + Args: + test_only: If true, only test phases will be run + """ + self.test_only = test_only + return self + + @classmethod + def from_config(cls, config: Dict[str, Any]) -> "ClassificationTask": + """Instantiates a ClassificationTask from a configuration. + + Args: + config: A configuration for a ClassificationTask. + See :func:`__init__` for parameters expected in the config. + + Returns: + A ClassificationTask instance. + """ + optimizer_config = config["optimizer"] + optimizer_config["num_epochs"] = config["num_epochs"] + + datasets = {} + phase_types = ["train", "test"] + for phase_type in phase_types: + datasets[phase_type] = build_dataset(config["dataset"][phase_type]) + loss = build_loss(config["loss"]) + test_only = config.get("test_only", False) + meters = build_meters(config.get("meters", {})) + model = build_model(config["model"]) + # put model in eval mode in case any hooks modify model states, it'll + # be reset to train mode before training + model.eval() + optimizer = build_optimizer(optimizer_config) + + task = ( + cls() + .set_num_epochs(config["num_epochs"]) + .set_loss(loss) + .set_test_only(test_only) + .set_model(model) + .set_optimizer(optimizer) + .set_meters(meters) + ) + for phase_type in phase_types: + task.set_dataset(datasets[phase_type], phase_type) + + return task + + @property + def num_batches_per_phase(self): + """Returns number of batches in current phase iterator + """ + return len(self.data_iterator) + + @property + def model(self): + """Returns model used in training (can be wrapped with DDP) + """ + return ( + self.distributed_model if is_distributed_training_run() else self.base_model + ) + + @property + def phase_type(self): + """Returns current phase type. String with value "train" or "test" + """ + return "train" if self.train else "test" + + @property + def eval_phase_idx(self): + """Returns current evaluation phase + """ + return self.phase_idx - self.train_phase_idx - 1 + + def get_data_iterator(self): + """Returns data iterator for current phase + """ + return self.data_iterator + + def get_total_training_phases(self): + """ + Returns the total number of "train" phases in the task + """ + num_training_phases = 0 + for phase in self.phases: + if phase["train"] is True: + num_training_phases += 1 + return num_training_phases + + def _build_phases(self): + """Returns list of phases from config. + + These phases will look like: + { + train: is this a train or test phase? + optimizer: optimizer settings + } + + If this is a test only run, then only test phases will be + generated, if this is a training run, then x phases = x train + phases + x test phases, interleaved. + """ + if not self.test_only: + phases = [{"train": True} for _ in range(self.num_epochs)] + + final_phases = [] + for phase in phases: + final_phases.append(phase) + final_phases.append({"train": False}) + return final_phases + + return [{"train": False} for _ in range(self.num_epochs)] + + def build_dataloader( + self, + phase_type, + num_workers, + pin_memory, + multiprocessing_context=None, + **kwargs, + ): + """Buildss a dataloader iterable for a particular phase type. + + Args: + phase_type: "train" or "test" iterable + num_workers: Number of dataloading processes. If 0, + dataloading is done on main process. See `PyTorch dataloader + documentation `_ for more details on + ``num_workers`` and the usage + of python multiprocessing in dataloaders + pin_memory: if true pin memory on GPU. See PyTorch dataloader + documentation for details on ``pin_memory``. + multiprocessing_context: Determines how processes are spawned. + Value must be one of None, "spawn", "fork", "forkserver". + If None, then context is inherited from parent process + + Returns: + Returns a iterable over the dataset + """ + return self.datasets[phase_type].iterator( + num_workers=num_workers, + pin_memory=pin_memory, + multiprocessing_context=multiprocessing_context, + **kwargs, + ) + + def build_dataloaders( + self, num_workers, pin_memory, multiprocessing_context=None, **kwargs + ): + """Build a dataloader for each phase type + + Args: + num_workers: Number of dataloading processes. If 0, + dataloading is done on main process. See `PyTorch dataloader + documentation `_ + for more details on num_workers and the usage + of python multiprocessing in dataloaders + pin_memory: if true pin memory on GPU. See PyTorch dataloader + documentation for details on pin_memory. + multiprocessing_context: Determines how processes are spawned. + Value must be one of None, "spawn", "fork", "forkserver". + If None, then context is inherited from parent process + + Returns: + Returns an iterable over the dataset associated with each phase_type + """ + return { + phase_type: self.build_dataloader( + phase_type, + num_workers=num_workers, + pin_memory=pin_memory, + multiprocessing_context=multiprocessing_context, + **kwargs, + ) + for phase_type in self.datasets.keys() + } + + def prepare( + self, + num_dataloader_workers=0, + pin_memory=False, + use_gpu=False, + dataloader_mp_context=None, + ): + """Prepares task for training, populates all derived attributes + + Args: + num_dataloader_workers: Number of dataloading processes. If 0, + dataloading is done on main process + pin_memory: if true pin memory on GPU + use_gpu: if true, load model, optimizer, loss, etc on GPU + dataloader_mp_context: Determines how processes are spawned. + Value must be one of None, "spawn", "fork", "forkserver". + If None, then context is inherited from parent process + """ + self.phases = self._build_phases() + self.dataloaders = self.build_dataloaders( + num_workers=num_dataloader_workers, + pin_memory=pin_memory, + multiprocessing_context=dataloader_mp_context, + ) + + # move the model and loss to the right device + if use_gpu: + self.loss.cuda() + self.base_model = copy_model_to_gpu(self.base_model) + else: + self.loss.cpu() + self.base_model.cpu() + + # initialize the pytorch optimizer now since the model has been moved to + # the appropriate device + self.optimizer.init_pytorch_optimizer(self.base_model) + + classy_state_dict = ( + None + if self.checkpoint is None + else self.checkpoint.get("classy_state_dict") + ) + + if classy_state_dict is not None: + state_load_success = update_classy_state(self, classy_state_dict) + assert ( + state_load_success + ), "Update classy state from checkpoint was unsuccessful." + + def init_distributed_data_parallel_model(self): + """Sets up distributed dataparallel and wraps model in DDP + """ + assert ( + self.distributed_model is None + ), "init_ddp_non_elastic must only be called once" + + self.distributed_model = init_distributed_data_parallel_model(self.base_model) + + @property + def where(self): + """Returns the proportion of training that has completed. + + Returned value is a float in the range [0, 1) + """ + current_step = self.num_updates / self.get_global_batchsize() + num_steps = self.get_total_training_phases() * self.num_batches_per_phase + where = current_step / num_steps + + assert where >= 0 and where < 1, f"Invalid where: {where}" + + return where + + def get_classy_state(self, deep_copy: bool = False): + """Returns serialiable state of task + + Args: + deep_copy: If true, does a deep copy of state before returning. + """ + classy_state_dict = { + "train": self.train, + "base_model": self.base_model.get_classy_state(), + "meters": [meter.get_classy_state() for meter in self.meters], + "optimizer": self.optimizer.get_classy_state(), + "phase_idx": self.phase_idx, + "train_phase_idx": self.train_phase_idx, + "num_updates": self.num_updates, + "num_samples_this_phase": self.num_samples_this_phase, + "losses": self.losses, + "hooks": {hook.name(): hook.get_classy_state() for hook in self.hooks}, + } + if deep_copy: + classy_state_dict = copy.deepcopy(classy_state_dict) + return classy_state_dict + + def set_classy_state(self, state): + """Set task state + + Args: + state: Dict containing state of a task + """ + self.train = state["train"] + self.base_model.set_classy_state(state["base_model"]) + for meter, meter_state in zip(self.meters, state["meters"]): + meter.set_classy_state(meter_state) + self.optimizer.set_classy_state(state["optimizer"]) + self.phase_idx = state["phase_idx"] + self.train_phase_idx = state["train_phase_idx"] + self.num_updates = state["num_updates"] + self.num_samples_this_phase = state["num_samples_this_phase"] + self.losses = state["losses"] + for hook in self.hooks: + # we still want to be able to run when new hooks are added or old + # hooks are removed + if hook.name() in state["hooks"]: + hook.set_classy_state(state["hooks"][hook.name()]) + else: + logging.warn(f"No state found for hook: {hook.name()}") + # TODO (mannatsingh): Figure out how to set the state of the dataloaders + # Re-build dataloader & re-create iterator. + self._recreate_data_loader_from_dataset() + self._reshuffle_data() + self.create_data_iterator() + # Set up pytorch module in train vs eval mode, update optimizer. + self._set_model_train_mode() + + def train_step(self, use_gpu, local_variables=None): + """Train step to be executed in train loop + + Args: + use_gpu: if true, execute training on GPU + local_variables: Dict containing intermediate values + in train_step for access by hooks + """ + from classy_vision.hooks import ClassyHookFunctions + + if local_variables is None: + local_variables = {} + + # We'll time train_step and some of its sections, and accumulate values + # into perf_stats if it were defined in local_variables: + perf_stats = local_variables.get("perf_stats", None) + timer_train_step = PerfTimer("train_step_total", perf_stats) + timer_train_step.start() + + # Process next sample + with PerfTimer("read_sample", perf_stats): + sample = next(self.get_data_iterator()) + local_variables["sample"] = sample + + assert ( + isinstance(local_variables["sample"], dict) + and "input" in local_variables["sample"] + and "target" in local_variables["sample"] + ), ( + f"Returned sample [{sample}] is not a map with 'input' and" + + "'target' keys" + ) + + self.run_hooks(local_variables, ClassyHookFunctions.on_sample.name) + + # Copy sample to GPU + local_variables["target"] = local_variables["sample"]["target"] + if use_gpu: + for key, value in local_variables["sample"].items(): + local_variables["sample"][key] = recursive_copy_to_gpu( + value, non_blocking=True + ) + + # Only need gradients during training + context = torch.enable_grad() if self.train else torch.no_grad() + with context: + # Forward pass + with PerfTimer("forward", perf_stats): + local_variables["output"] = self.model( + local_variables["sample"]["input"] + ) + + self.run_hooks(local_variables, ClassyHookFunctions.on_forward.name) + + model_output = local_variables["output"] + target = local_variables["sample"]["target"] + local_variables["local_loss"] = self.loss(model_output, target) + + # NOTE: This performs an all_reduce_mean() on the losses across the + # replicas. The reduce should ideally be weighted by the length of + # the targets on each replica. This will only be an issue when + # there are dummy samples present (once an epoch) and will only + # impact the loss reporting (slightly). + with PerfTimer("loss_allreduce", perf_stats): + local_variables["loss"] = local_variables["local_loss"].detach().clone() + local_variables["loss"] = all_reduce_mean(local_variables["loss"]) + + self.losses.append( + local_variables["loss"].data.cpu().item() + * local_variables["target"].size(0) + ) + + model_output_cpu = model_output.cpu() if use_gpu else model_output + + # Update meters + with PerfTimer("meters_update", perf_stats): + for meter in self.meters: + meter.update( + model_output_cpu, target.detach().cpu(), is_train=self.train + ) + # After both loss and meters are updated, we run hooks. Among hooks, + # `LossLrMeterLoggingHook` will log both loss and meter status + self.run_hooks(local_variables, ClassyHookFunctions.on_loss_and_meter.name) + + num_samples_in_step = self.get_global_batchsize() + self.num_samples_this_phase += num_samples_in_step + + # For training phases, run backwards pass / update optimizer + if self.train: + with PerfTimer("backward", perf_stats): + self.optimizer.backward(local_variables["local_loss"]) + + self.run_hooks(local_variables, ClassyHookFunctions.on_backward.name) + + self.optimizer.update_schedule_on_step(self.where) + with PerfTimer("optimizer_step", perf_stats): + self.optimizer.step() + + self.run_hooks(local_variables, ClassyHookFunctions.on_update.name) + + self.num_updates += num_samples_in_step + + timer_train_step.stop() + timer_train_step.record() + + def advance_phase(self): + """Performs bookkeeping / task updates between phases + + Increments phase idx, resets meters, resets loss history, + resets counters, shuffles dataset, rebuilds iterators, and + sets the train / test state for phase. + """ + logging.info("Advancing phase") + # Reset meters for next phase / epoch + for meter in self.meters: + meter.reset() + + # Reset loss history for next epoch + self.losses = [] + + # Setup new phase + self.num_samples_this_phase = 0 + self.phase_idx += 1 + phase = self.phases[self.phase_idx] + self.train = True if phase["train"] else False + if self.train: + self.train_phase_idx += 1 + + # Re-build dataloader & re-create iterator anytime membership changes. + self._recreate_data_loader_from_dataset() + self._reshuffle_data() + self.create_data_iterator() + # Set up pytorch module in train vs eval mode, update optimizer. + self._set_model_train_mode() + + def done_training(self): + """Stop condition for training + """ + return self.phase_idx + 1 >= len(self.phases) + + def _recreate_data_loader_from_dataset(self, phase_type=None): + """ + This utility is invoked to re-create the data loader object + for the current phase of execution, using the existing dataset. + This is sufficient when advancing phases. + """ + if phase_type is None: + phase_type = self.phase_type + + logging.info("Recreating data loader for new phase") + num_workers = 0 + if hasattr(self.dataloaders[phase_type], "num_workers"): + num_workers = self.dataloaders[phase_type].num_workers + pin_memory = False + if hasattr(self.dataloaders[phase_type], "pin_memory"): + pin_memory = self.dataloaders[phase_type].pin_memory + multiprocessing_context = None + if hasattr(self.dataloaders[phase_type], "multiprocessing_context"): + multiprocessing_context = self.dataloaders[ + phase_type + ].multiprocessing_context + if phase_type == "test": + current_phase_id = 0 + else: + current_phase_id = max(self.train_phase_idx, 0) + + self.dataloaders[phase_type] = self.build_dataloader( + phase_type=phase_type, + num_workers=num_workers, + pin_memory=pin_memory, + multiprocessing_context=multiprocessing_context, + current_phase_id=current_phase_id, + ) + + def _reshuffle_data(self): + """Shuffles the dataset if needed. + """ + if hasattr(self.dataloaders[self.phase_type].dataset, "do_shuffle"): + self.dataloaders[self.phase_type].dataset.do_shuffle( + epoch_num=self.phase_idx + ) + logging.info("Data shuffled.") + + def create_data_iterator(self): + """Creates data iterator for phase. + """ + # Delete iterator explicitly so that all dataloader processes + # are cleaned up. + del self.data_iterator + self.data_iterator = iter(self.dataloaders[self.phase_type]) + + def _set_model_train_mode(self): + """Set train mode for model + """ + phase = self.phases[self.phase_idx] + self.base_model.train(phase["train"]) + + if self.train and self.train_phase_idx >= 0: + self.optimizer.update_schedule_on_epoch(self.where) + + # TODO: Functions below should be better abstracted into the dataloader + # abstraction + def get_batchsize_per_replica(self): + """Return local replica's batchsize for dataset (e.g. batchsize per GPU) + """ + # TODO(T47573564) - cleaner abstraction + return self.dataloaders[self.phase_type].dataset.get_batchsize_per_replica() + + def get_global_batchsize(self): + """Return global batchsize across all trainers + """ + return self.dataloaders[self.phase_type].dataset.get_global_batchsize() + + def get_total_samples_trained_this_phase(self): + """Returns the total number of samples processed in current phase + """ + # TODO(T47573564) - cleaner abstraction + # TODO(T47387605) - instead of get_world_size, we need the max world + # size for elasticity to match parity with Uru and other systems, + # although DPP will solve this by dynamically re-sharding. + return self.num_samples_this_phase diff --git a/classy_vision/tasks/classy_task.py b/classy_vision/tasks/classy_task.py new file mode 100644 index 0000000000..08ad3dea45 --- /dev/null +++ b/classy_vision/tasks/classy_task.py @@ -0,0 +1,156 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + + +from abc import ABC, abstractmethod +from typing import Any, Dict, Optional + + +class ClassyTask(ABC): + """ + An abstract base class for a training task. + + A ClassyTask encapsulates all the components and steps needed + to train using a :class:`classy_vision.trainer.ClassyTrainer`. + """ + + def __init__(self) -> "ClassyTask": + """ + Constructs a ClassyTask. + """ + self.hooks = [] + + @classmethod + @abstractmethod + def from_config(cls, config: Dict[str, Any]) -> "ClassyTask": + """Instantiates a ClassyTask from a configuration. + + Args: + config: A configuration for a ClassyTask. + + Returns: + A ClassyTask instance. + """ + raise NotImplementedError() + + @abstractmethod + def init_distributed_data_parallel_model(self) -> None: + """ + Initialize + `torch.nn.parallel.distributed.DistributedDataParallel `_. + + Needed for distributed training. This is where a model should be wrapped by DDP. + """ + pass + + @property + @abstractmethod + def where(self) -> float: + """ + Tells how far along (where) we are during training. + + Returns: + A float in [0, 1) which tells the training progress. + """ + pass + + @abstractmethod + def advance_phase(self) -> None: + """ + Advance the task a phase. + + Called when one phase of reading from + :class:`classy_vision.dataset.ClassyDataset` is over. + """ + pass + + @abstractmethod + def done_training(self) -> bool: + """ + Tells if we are done training. + + Returns: + A boolean telling if training is over. + """ + pass + + @abstractmethod + def get_classy_state(self, deep_copy: bool = False) -> Dict[str, Any]: + """Get the state of the ClassyTask. + + The returned state is used for checkpointing. + + Args: + deep_copy: If True, creates a deep copy of the state dict. Otherwise, the + returned dict's state will be tied to the object's. + + Returns: + A state dictionary containing the state of the task. + """ + pass + + @abstractmethod + def set_classy_state(self, state): + """Set the state of the ClassyTask. + + Args: + state_dict: The state dictionary. Must be the output of a call to + :func:`get_classy_state`. + + This is used to load the state of the task from a checkpoint. + """ + pass + + @abstractmethod + def prepare( + self, + num_dataloader_workers=0, + pin_memory=False, + use_gpu=False, + dataloader_mp_context=None, + ) -> None: + """ + Prepares the task for training. + + Will be called by the :class:`classy_vision.trainer.ClassyTrainer` to + prepare the task. + + Args: + num_dataloader_workers: Number of workers to create for the dataloaders + pin_memory: Whether the dataloaders should copy the Tensors into CUDA + pinned memory (default False) + use_gpu: True if training on GPUs, False otherwise + """ + pass + + @abstractmethod + def train_step(self, use_gpu, local_variables: Optional[Dict] = None) -> None: + """ + Run a train step. + + This corresponds to training over one batch of data from the dataloaders. + + Args: + use_gpu: True if training on GPUs, False otherwise + local_variables: Local variables created in the function. Can be passed to + custom :class:`classy_vision.hooks.ClassyHook`. + """ + pass + + def run_hooks(self, local_variables: Dict[str, Any], hook_function: str) -> None: + """ + Helper function that runs a hook function for all the + :class:`classy_vision.hooks.ClassyHook`. + + Args: + local_variables: Local variables created in :func:`train_step` + hook_function: One of the hook functions in the + :class:`classy_vision.hooks.ClassyHookFunctions` + enum. + """ + for hook in self.hooks: + getattr(hook, hook_function)(self, local_variables) diff --git a/classy_vision/tasks/fine_tuning_task.py b/classy_vision/tasks/fine_tuning_task.py new file mode 100644 index 0000000000..e60dbb1937 --- /dev/null +++ b/classy_vision/tasks/fine_tuning_task.py @@ -0,0 +1,99 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from typing import Any, Dict + +from classy_vision.generic.util import update_classy_model +from classy_vision.tasks import ClassificationTask, register_task + + +@register_task("fine_tuning") +class FineTuningTask(ClassificationTask): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.pretrained_checkpoint = None + self.reset_heads = False + self.freeze_trunk = False + + @classmethod + def from_config(cls, config: Dict[str, Any]) -> "FineTuningTask": + """Instantiates a FineTuningTask from a configuration. + + Args: + config: A configuration for a FineTuningTask. + See :func:`__init__` for parameters expected in the config. + + Returns: + A FineTuningTask instance. + """ + task = super().from_config(config) + task.set_reset_heads(config.get("reset_heads", False)) + task.set_freeze_trunk(config.get("freeze_trunk", False)) + return task + + def set_pretrained_checkpoint(self, checkpoint: Dict[str, Any]) -> "FineTuningTask": + assert ( + "classy_state_dict" in checkpoint + ), "Checkpoint does not contain classy_state_dict" + self.pretrained_checkpoint = checkpoint + return self + + def set_reset_heads(self, reset_heads: bool) -> "FineTuningTask": + self.reset_heads = reset_heads + return self + + def set_freeze_trunk(self, freeze_trunk: bool) -> "FineTuningTask": + self.freeze_trunk = freeze_trunk + return self + + def _set_model_train_mode(self): + phase = self.phases[self.phase_idx] + if self.freeze_trunk: + # convert all the sub-modules to the eval mode, except the heads + self.base_model.eval() + for heads in self.base_model.get_heads().values(): + for h in heads.values(): + h.train(phase["train"]) + else: + self.base_model.train(phase["train"]) + + if self.train and self.train_phase_idx >= 0: + self.optimizer.update_schedule_on_epoch(self.where) + + def prepare( + self, + num_dataloader_workers: int = 0, + pin_memory: bool = False, + use_gpu: bool = False, + dataloader_mp_context=None, + ) -> None: + assert ( + self.pretrained_checkpoint is not None + ), "Need a pretrained checkpoint for fine tuning" + super().prepare( + num_dataloader_workers, pin_memory, use_gpu, dataloader_mp_context + ) + if self.checkpoint is None: + # no checkpoint exists, load the model's state from the pretrained + # checkpoint + state_load_success = update_classy_model( + self.base_model, + self.pretrained_checkpoint["classy_state_dict"]["base_model"], + self.reset_heads, + ) + assert ( + state_load_success + ), "Update classy state from pretrained checkpoint was unsuccessful." + + if self.freeze_trunk: + # do not track gradients for all the parameters in the model except + # for the parameters in the heads + for param in self.base_model.parameters(): + param.requires_grad = False + for heads in self.base_model.get_heads().values(): + for h in heads.values(): + for param in h.parameters(): + param.requires_grad = True diff --git a/classy_vision/templates/synthetic/configs/template_config.json b/classy_vision/templates/synthetic/configs/template_config.json new file mode 100644 index 0000000000..daa3f0a48c --- /dev/null +++ b/classy_vision/templates/synthetic/configs/template_config.json @@ -0,0 +1,66 @@ +{ + "name": "classification_task", + "num_epochs": 2, + "loss": { + "name": "my_loss" + }, + "dataset": { + "train": { + "name": "my_dataset", + "split": "train", + "crop_size": 224, + "class_ratio": 0.5, + "num_samples": 320, + "seed": 0, + "batchsize_per_replica": 32, + "use_shuffle": true, + "transforms": [{"name": "generic_image_transform", "transforms": [ + {"name": "RandomResizedCrop", "size": 224}, + {"name": "RandomHorizontalFlip"}, + {"name": "ToTensor"}, + { + "name": "Normalize", + "mean": [0.485, 0.456, 0.406], + "std": [0.229, 0.224, 0.225] + } + ]}] + }, + "test": { + "name": "my_dataset", + "split": "val", + "crop_size": 224, + "class_ratio": 0.5, + "num_samples": 100, + "seed": 1, + "batchsize_per_replica": 32, + "use_shuffle": false, + "transforms": [{"name": "generic_image_transform", "transforms": [ + {"name": "Resize", "size": 256}, + {"name": "CenterCrop", "size": 224}, + {"name": "ToTensor"}, + { + "name": "Normalize", + "mean": [0.485, 0.456, 0.406], + "std": [0.229, 0.224, 0.225] + } + ]}] + } + }, + "meters": { + "accuracy": { + "topk": [1] + } + }, + "model": { + "name": "my_model" + }, + "optimizer": { + "name": "sgd", + "lr": { + "name": "step", + "values": [0.1, 0.01] + }, + "weight_decay": 1e-4, + "momentum": 0.9 + } +} diff --git a/classy_vision/templates/synthetic/datasets/__init__.py b/classy_vision/templates/synthetic/datasets/__init__.py new file mode 100644 index 0000000000..a152ab0446 --- /dev/null +++ b/classy_vision/templates/synthetic/datasets/__init__.py @@ -0,0 +1,15 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from pathlib import Path + +from classy_vision.generic.registry_utils import import_all_modules + + +FILE_ROOT = Path(__file__).parent + +# Automatically import any Python files in the datasets/ directory +import_all_modules(FILE_ROOT, "datasets") diff --git a/classy_vision/templates/synthetic/datasets/my_dataset.py b/classy_vision/templates/synthetic/datasets/my_dataset.py new file mode 100644 index 0000000000..58ac50f938 --- /dev/null +++ b/classy_vision/templates/synthetic/datasets/my_dataset.py @@ -0,0 +1,62 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + + +from typing import Any, Callable, Dict, Optional, Union + +from classy_vision.dataset import ClassyDataset, register_dataset +from classy_vision.dataset.core.random_image_datasets import ( + RandomImageBinaryClassDataset, + SampleType, +) +from classy_vision.dataset.transforms import ClassyTransform, build_transforms + + +@register_dataset("my_dataset") +class MyDataset(ClassyDataset): + def __init__( + self, + batchsize_per_replica: int, + shuffle: bool, + transform: Optional[Union[ClassyTransform, Callable]], + num_samples: int, + crop_size: int, + class_ratio: float, + seed: int, + split: Optional[str] = None, + ) -> None: + dataset = RandomImageBinaryClassDataset( + crop_size, class_ratio, num_samples, seed, SampleType.TUPLE + ) + super().__init__( + dataset, split, batchsize_per_replica, shuffle, transform, num_samples + ) + + @classmethod + def from_config(cls, config: Dict[str, Any]) -> "MyDataset": + assert all(key in config for key in ["crop_size", "class_ratio", "seed"]) + + split = config.get("split") + crop_size = config["crop_size"] + class_ratio = config["class_ratio"] + seed = config["seed"] + ( + transform_config, + batchsize_per_replica, + shuffle, + num_samples, + ) = cls.parse_config(config) + transform = build_transforms(transform_config) + return cls( + batchsize_per_replica, + shuffle, + transform, + num_samples, + crop_size, + class_ratio, + seed, + split=split, + ) diff --git a/classy_vision/templates/synthetic/losses/__init__.py b/classy_vision/templates/synthetic/losses/__init__.py new file mode 100644 index 0000000000..4402c47283 --- /dev/null +++ b/classy_vision/templates/synthetic/losses/__init__.py @@ -0,0 +1,15 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from pathlib import Path + +from classy_vision.generic.registry_utils import import_all_modules + + +FILE_ROOT = Path(__file__).parent + +# Automatically import any Python files in the losses/ directory +import_all_modules(FILE_ROOT, "losses") diff --git a/classy_vision/templates/synthetic/losses/my_loss.py b/classy_vision/templates/synthetic/losses/my_loss.py new file mode 100644 index 0000000000..8ce9c445d3 --- /dev/null +++ b/classy_vision/templates/synthetic/losses/my_loss.py @@ -0,0 +1,20 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import torch.nn.functional as F +from classy_vision.losses import ClassyLoss, register_loss + + +@register_loss("my_loss") +class MyLoss(ClassyLoss): + def forward(self, input, target): + labels = F.one_hot(target, num_classes=2).float() + return F.binary_cross_entropy(input, labels) + + @classmethod + def from_config(cls, config): + # We don't need anything from the config + return cls() diff --git a/classy_vision/templates/synthetic/models/__init__.py b/classy_vision/templates/synthetic/models/__init__.py new file mode 100644 index 0000000000..51e5b16284 --- /dev/null +++ b/classy_vision/templates/synthetic/models/__init__.py @@ -0,0 +1,15 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from pathlib import Path + +from classy_vision.generic.registry_utils import import_all_modules + + +FILE_ROOT = Path(__file__).parent + +# Automatically import any Python files in the models/ directory +import_all_modules(FILE_ROOT, "models") diff --git a/classy_vision/templates/synthetic/models/my_model.py b/classy_vision/templates/synthetic/models/my_model.py new file mode 100644 index 0000000000..8cc42a9510 --- /dev/null +++ b/classy_vision/templates/synthetic/models/my_model.py @@ -0,0 +1,29 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import torch.nn as nn +import torchvision.models as models +from classy_vision.models import ClassyModel, register_model + + +@register_model("my_model") +class MyModel(ClassyModel): + def __init__(self): + super().__init__() + self.model = nn.Sequential( + nn.AdaptiveAvgPool2d((20, 20)), + nn.Flatten(1), + nn.Linear(3 * 20 * 20, 2), + nn.Sigmoid(), + ) + + def forward(self, x): + x = self.model(x) + return x + + @classmethod + def from_config(cls, config): + return cls() diff --git a/classy_vision/trainer/__init__.py b/classy_vision/trainer/__init__.py new file mode 100644 index 0000000000..3474b8cc79 --- /dev/null +++ b/classy_vision/trainer/__init__.py @@ -0,0 +1,12 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from .classy_trainer import ClassyTrainer +from .distributed_trainer import DistributedTrainer +from .local_trainer import LocalTrainer + + +__all__ = ["ClassyTrainer", "DistributedTrainer", "LocalTrainer"] diff --git a/classy_vision/trainer/classy_trainer.py b/classy_vision/trainer/classy_trainer.py new file mode 100644 index 0000000000..29fabb8e23 --- /dev/null +++ b/classy_vision/trainer/classy_trainer.py @@ -0,0 +1,96 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import logging +from typing import Optional + +import torch +from classy_vision.generic.distributed_util import barrier, is_distributed_training_run +from classy_vision.hooks import ClassyHookFunctions +from classy_vision.tasks import ClassyTask + + +class ClassyTrainer: + """Base class for shared training code. + + A trainer is responsible for setting up the environment for + training, for instance: configuring rendezvous for distributed + training, deciding what GPU to use and so on. Trainers also + control the outer portion of the training loop, but delegate to + the task to decide how exactly to perform inference, compute loss + etc. That allows combining tasks with different trainers depending + on whether you want to train on your current machine, AWS cluster + etc. + + """ + + def __init__( + self, + use_gpu: Optional[bool] = None, + num_dataloader_workers: int = 0, + dataloader_mp_context: Optional[str] = None, + ): + """Constructor for ClassyTrainer. + + Args: + use_gpu: If true, then use GPUs for training. + If None, then check if we have GPUs available, if we do + then use GPU for training. + num_dataloader_workers: Number of CPU processes doing dataloading + per GPU. If 0, then dataloading is done on main thread. + dataloader_mp_context: Determines how to launch + new processes for dataloading. Must be one of "fork", "forkserver", + "spawn". If None, process launching is inherited from parent. + """ + if use_gpu is None: + use_gpu = torch.cuda.is_available() + self.use_gpu = use_gpu + self.num_dataloader_workers = num_dataloader_workers + self.dataloader_mp_context = dataloader_mp_context + + def train(self, task: ClassyTask): + """Runs training phases, phases are generated from the config. + + Args: + task: Task to be used in training. It should contain + everything that is needed for training + """ + + pin_memory = self.use_gpu and torch.cuda.device_count() > 1 + task.prepare( + num_dataloader_workers=self.num_dataloader_workers, + pin_memory=pin_memory, + use_gpu=self.use_gpu, + dataloader_mp_context=self.dataloader_mp_context, + ) + assert isinstance(task, ClassyTask) + + if is_distributed_training_run(): + task.init_distributed_data_parallel_model() + + local_variables = {} + task.run_hooks(local_variables, ClassyHookFunctions.on_start.name) + + while not task.done_training(): + task.advance_phase() + + # Start phase hooks + task.run_hooks(local_variables, ClassyHookFunctions.on_phase_start.name) + while True: + # Process next sample + try: + task.train_step(self.use_gpu, local_variables) + except StopIteration: + break + + logging.info("Syncing meters on phase end...") + for meter in task.meters: + meter.sync_state() + logging.info("...meters synced") + barrier() + task.run_hooks(local_variables, ClassyHookFunctions.on_phase_end.name) + + task.run_hooks(local_variables, ClassyHookFunctions.on_end.name) diff --git a/classy_vision/trainer/distributed_trainer.py b/classy_vision/trainer/distributed_trainer.py new file mode 100644 index 0000000000..df301dd00e --- /dev/null +++ b/classy_vision/trainer/distributed_trainer.py @@ -0,0 +1,94 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import logging +import os +from typing import Optional + +import torch +from classy_vision.generic.distributed_util import ( + get_rank, + get_world_size, + set_cpu_device, + set_cuda_device_index, +) + +from .classy_trainer import ClassyTrainer + + +def _init_env_vars(): + """Function sets up default environment variables for distributed training. + """ + if "WORLD_SIZE" not in os.environ or "RANK" not in os.environ: + os.environ["WORLD_SIZE"] = "1" + os.environ["RANK"] = "0" + os.environ["LOCAL_RANK"] = "0" + + if "MASTER_ADDR" not in os.environ or "MASTER_PORT" not in os.environ: + os.environ["MASTER_ADDR"] = "127.0.0.1" + os.environ["MASTER_PORT"] = "29500" + + +def _init_distributed(use_gpu: bool): + """Function perform distributed setup for DDP. + + Requires the script to be started with torch.distributed.launch + script and uses environment variables for node finding. + + Args: + use_gpu: If true, use distributed GPU training, else use CPU + """ + distributed_world_size = int(os.environ["WORLD_SIZE"]) + distributed_rank = int(os.environ["RANK"]) + backend = "nccl" if use_gpu else "gloo" + torch.distributed.init_process_group( + backend=backend, + init_method="env://", + world_size=distributed_world_size, + rank=distributed_rank, + ) + + +class DistributedTrainer(ClassyTrainer): + """Distributed trainer for using multiple training processes + """ + + def __init__( + self, + use_gpu: Optional[bool] = None, + num_dataloader_workers: int = 0, + dataloader_mp_context: Optional[str] = None, + ): + """Constructor for DistributedTrainer. + + Args: + use_gpu: If true, then use GPU 0 for training. + If None, then check if we have GPUs available, if we do + then use GPU for training. + num_dataloader_workers: Number of CPU processes doing dataloading + per GPU. If 0, then dataloading is done on main thread. + dataloader_mp_context: Determines how to launch + new processes for dataloading. Must be one of "fork", "forkserver", + "spawn". If None, process launching is inherited from parent. + """ + super().__init__( + use_gpu=use_gpu, + num_dataloader_workers=num_dataloader_workers, + dataloader_mp_context=dataloader_mp_context, + ) + _init_env_vars() + _init_distributed(self.use_gpu) + logging.info( + f"Done setting up distributed process_group with rank {get_rank()}" + + f", world_size {get_world_size()}" + ) + local_rank = int(os.environ["LOCAL_RANK"]) + if self.use_gpu: + logging.info("Using GPU, CUDA device index: {}".format(local_rank)) + set_cuda_device_index(local_rank) + else: + logging.info("Using CPU") + set_cpu_device() diff --git a/classy_vision/trainer/elastic_trainer.py b/classy_vision/trainer/elastic_trainer.py new file mode 100644 index 0000000000..5f25dafabb --- /dev/null +++ b/classy_vision/trainer/elastic_trainer.py @@ -0,0 +1,266 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import io +import logging +import os +from typing import Any, Optional + +import numpy +import torch +import torchelastic +import torchelastic.distributed as dist +from classy_vision.generic.distributed_util import ( + barrier, + set_cpu_device, + set_cuda_device_index, +) +from classy_vision.generic.util import get_checkpoint_dict +from classy_vision.hooks import ClassyHookFunctions +from classy_vision.tasks import ClassyTask +from classy_vision.trainer import ClassyTrainer +from torchelastic.worker_stats import WorkerStats + + +log = logging.getLogger(__name__) + + +class ElasticTrainer(ClassyTrainer): + def __init__( + self, + use_gpu, + num_dataloader_workers, + elastic_coordinator, + input_args, + local_rank, + dataloader_mp_context=None, + ): + super().__init__( + use_gpu=use_gpu, + num_dataloader_workers=num_dataloader_workers, + dataloader_mp_context=dataloader_mp_context, + ) + pid = os.getpid() + if use_gpu: + set_cuda_device_index(local_rank) + device_idx = torch.cuda.current_device() + log.info(f"initialized worker {local_rank} (pid={pid}, gpu={device_idx})") + device_properties = torch.cuda.get_device_properties(device_idx) + log.info(f"gpu device properties: {device_properties}") + else: + # cpu + set_cpu_device() + log.info(f"initialized worker {local_rank} (pid={pid}, cpu)") + + self.elastic_coordinator = elastic_coordinator + self.input_args = input_args + + def train(self, task): + """ + Runs training phases, phases are generated from the config. + """ + + assert isinstance(task, ClassyTask) + pin_memory = self.use_gpu and torch.cuda.device_count() > 1 + + task.prepare( + num_dataloader_workers=self.num_dataloader_workers, + pin_memory=pin_memory, + use_gpu=self.use_gpu, + dataloader_mp_context=self.dataloader_mp_context, + ) + state = self._ClassyElasticState(task, self.input_args) + + local_variables = {} + + state.advance_to_next_phase = True + + def elastic_train_step(orig_state): + return self._run_step(orig_state, local_variables, self.use_gpu) + + task.run_hooks(local_variables, ClassyHookFunctions.on_start.name) + + torchelastic.train(self.elastic_coordinator, elastic_train_step, state) + + task.run_hooks(local_variables, ClassyHookFunctions.on_end.name) + + def _run_step(self, state, local_variables, use_gpu): + # Check for training complete but only terminate when the last phase is done + if state.task.done_training() and state.advance_to_next_phase: + raise StopIteration + + if state.advance_to_next_phase: + state.task.advance_phase() + + # Start phase hooks + state.task.run_hooks( + local_variables, ClassyHookFunctions.on_phase_start.name + ) + + state.advance_to_next_phase = False + + # Process one train step + try: + if state.skip_current_phase: + state.advance_to_next_phase = True + state.skip_current_phase = False # Reset flag + else: + state.task.train_step(use_gpu, local_variables) + except StopIteration: + state.advance_to_next_phase = True + if state.advance_to_next_phase: + logging.info("Syncing meters on phase end...") + for meter in state.task.meters: + meter.sync_state() + logging.info("...meters synced") + barrier() + # Phase complete + # NOTE: this is a good time to checkpoint, as it guarantees + # that loading from checkpoint will properly advance the phase. + state.task.run_hooks(local_variables, ClassyHookFunctions.on_phase_end.name) + + progress_rate = None # using None to signal 'unknown' + perf_stats = local_variables.get("perf_stats", None) + if perf_stats is not None: + batch_time = perf_stats._cuda_stats["train_step_total"].smoothed_value + if batch_time is not None and batch_time > 0.0: + # rate = number of mini-batches per second + progress_rate = 1.0 / batch_time + + progress_stats = self._ClassyWorkerStats(progress_rate) + return state, progress_stats + + class _ClassyWorkerStats(WorkerStats): + """ + ClassyVision-specific implementation of WorkerStats, + which is used by torchelastic train_loop + to detect (and correct stragglers), or other progress-impeding issues. + """ + + def __init__(self, progress_rate): + self.progress_rate = progress_rate + + def get_progress_rate(self) -> Optional[float]: + return self.progress_rate + + class _ClassyElasticState(torchelastic.State): + """ + Rollback is disabled on this state since currently, data loaders are + too expensive to snapshot on every train_step + """ + + def __init__(self, task: ClassyTask, input_args: Any): + self.task = task + self.input_args = input_args if input_args else {} + self.advance_to_next_phase = True + self.skip_current_phase = False + + def broadcast_state(self, rank, src_rank): + data = None + if rank == src_rank: + save_stream = io.BytesIO() + self.save(save_stream) + # Note: save_stream.getbuffer() will return a memoryview, which + # cannot be convert to a tensor, need convert it to np array first + data = numpy.asarray(save_stream.getbuffer()) + data = dist.broadcast_binary(data, src_rank) + load_stream = io.BytesIO(data) + self.load(load_stream) + + def sync(self, world_size, rank): + self._recreate_ddp_model() + + # Figure out which trainer has the most up-to-date data, and + # use that trainer to broadcast task to all others. + src_rank = self._compute_most_tenured_rank(rank) + self.broadcast_state(rank, src_rank) + + # Current on-box data loaders don't support recovery in the middle of + # a phase and since we don't rollback the model, re-training is + # worse than losing data so we're skipping rest of the phase. + # + # Also we can't just set advance_to_next_phase to True here as it + # will cause on_phase_end() hooks to not run. + # We also only skip the current phase if this isn't the first time + # calling sync from the PET train_loop. We'll need to reconsider this + # if the PET train_loop changes. advance_to_next_phase is already + # synced from rest of the trainers at this point. + if not self.advance_to_next_phase: + self.skip_current_phase = True + + logging.warning( + "RANK {}: now we all have {} updates and latest task".format( + rank, self.task.num_updates + ) + ) + + # Re-build dataloader, dataset, and iterator anytime membership + # changes. When world_size potentially changes (e.g. re-rendezvous), we + # need to re-create both the dataset and dataloader objects because we + # create a ShardDataset based on the world size at the time of + # construction. + # TODO (T55691442): Figure out how to solve re-sharding without + # rebuilding the datasets. sync() only works correctly without elasticity + # currently. + for phase_type in self.task.datasets.keys(): + self.task._recreate_data_loader_from_dataset(phase_type) + self.task._reshuffle_data() + self.task.create_data_iterator() + # Set up pytorch module in train vs eval mode, update optimizer. + self.task._set_model_train_mode() + + def should_save_checkpoint(self, rank): + # should_save_checkpoint need to return same value for all trainers + # we take checkpoint when a phase completed + # TODO add test coverage for this + + # currently for typical imagenet resnet model checkpointing take 15 seconds + # consider the cost it is not very necessary to do checkpoint for test phase + return self.task.train and self.advance_to_next_phase + + def save(self, stream): + checkpoint_state = get_checkpoint_dict(self.task, self.input_args) + checkpoint_state["advance_to_next_phase"] = self.advance_to_next_phase + torch.save(checkpoint_state, stream) + + def load(self, stream): + checkpoint_state = torch.load(stream) + state = checkpoint_state["classy_state_dict"] + self.task.set_classy_state(state) + if "advance_to_next_phase" in checkpoint_state: + self.advance_to_next_phase = checkpoint_state["advance_to_next_phase"] + + def _recreate_ddp_model(self): + # Delete & re-create the DDP module wrapper. This is required because + # each instance of DDP is tied to a specific process group, and + # any time the set of workers in PET changes, we create a new + # process group, so the old DDP wrapper is invalid. + # TODO: does calling del here invoke C++ destructor if it's the last + # reference? Or is assigning None sufficient? + del self.task.distributed_model + self.task.distributed_model = None + self.task.init_distributed_data_parallel_model() + + def _compute_most_tenured_rank(self, rank): + logging.warning( + "RANK {}: syncing, I have {} updates".format( + rank, self.task.num_updates + ) + ) + # Propagate state to new trainer processes. + # First, figure out which process has a copy of the most recent + # state by getting a copy of everybody's iteration counter. + max_rank, max_num_updates = dist.all_gather_return_max_long( + self.task.num_updates + ) + + logging.warning( + "RANK {}: rank {} has the most updates {}".format( + rank, max_rank, max_num_updates + ) + ) + + return max_rank diff --git a/classy_vision/trainer/local_trainer.py b/classy_vision/trainer/local_trainer.py new file mode 100644 index 0000000000..4435381ffb --- /dev/null +++ b/classy_vision/trainer/local_trainer.py @@ -0,0 +1,47 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import logging +from typing import Optional + +from classy_vision.generic.distributed_util import set_cpu_device, set_cuda_device_index + +from .classy_trainer import ClassyTrainer + + +class LocalTrainer(ClassyTrainer): + """Trainer to be used if you want want use only a single training process. + """ + + def __init__( + self, + use_gpu: Optional[bool] = None, + num_dataloader_workers: int = 0, + dataloader_mp_context: Optional[str] = None, + ): + """Constructor for LocalTrainer. + + Args: + use_gpu: If true, then use GPU 0 for training. + If None, then check if we have GPUs available, if we do + then use GPU for training. + num_dataloader_workers: Number of CPU processes doing dataloading + per GPU. If 0, then dataloading is done on main thread. + dataloader_mp_context: Determines how to launch + new processes for dataloading. Must be one of "fork", "forkserver", + "spawn". If None, process launching is inherited from parent. + """ + super().__init__( + use_gpu=use_gpu, + num_dataloader_workers=num_dataloader_workers, + dataloader_mp_context=dataloader_mp_context, + ) + if self.use_gpu: + logging.info("Using GPU, CUDA device index: {}".format(0)) + set_cuda_device_index(0) + else: + logging.info("Using CPU") + set_cpu_device() diff --git a/examples/ray/cluster_config.yml b/examples/ray/cluster_config.yml new file mode 100644 index 0000000000..fec38fde31 --- /dev/null +++ b/examples/ray/cluster_config.yml @@ -0,0 +1,165 @@ +# An unique identifier for the head node and workers of this cluster. +cluster_name: default +# The minimum number of workers nodes to launch in addition to the head +# node. This number should be >= 0. +min_workers: 2 + +# The maximum number of workers nodes to launch in addition to the head +# node. This takes precedence over min_workers. +max_workers: 2 + +# The initial number of worker nodes to launch in addition to the head +# node. When the cluster is first brought up (or when it is refreshed with a +# subsequent `ray up`) this number of nodes will be started. +initial_workers: 2 + +# Whether or not to autoscale aggressively. If this is enabled, if at any point +# we would start more workers, we start at least enough to bring us to +# initial_workers. +autoscaling_mode: default + +# This executes all commands on all nodes in the docker container, +# and opens all the necessary ports to support the Ray cluster. +# Empty string means disabled. +docker: + image: "" # e.g., tensorflow/tensorflow:1.5.0-py3 + container_name: "" # e.g. ray_docker + run_options: [] # Extra options to pass into "docker run" + +# The autoscaler will scale up the cluster to this target fraction of resource +# usage. For example, if a cluster of 10 nodes is 100% busy and +# target_utilization is 0.8, it would resize the cluster to 13. This fraction +# can be decreased to increase the aggressiveness of upscaling. +# This value must be less than 1.0 for scaling to happen. +# +# For this Classy Vision example, we want to disable autoscaling. +target_utilization_fraction: 1.0 + +# If a node is idle for this many minutes, it will be removed. +idle_timeout_minutes: 5 + +# Cloud-provider specific configuration. +provider: + type: aws + region: us-west-2 + # Availability zone(s), comma-separated, that nodes may be launched in. + # Nodes are currently spread between zones by a round-robin approach, + # however this implementation detail should not be relied upon. + availability_zone: us-west-2a,us-west-2b + + # This prevents Ray from re-using nodes from a previous cluster that were stopped. + # Reusing nodes is desirable cause it speeds up the time to setup a new cluster, + # but in our experiments it made each run hard to reproduce. You have to make sure + # your setup commands are idempotent. For now disable caching to keep things simple, + # but it shouldn't be too hard to get this working with caching. + cache_stopped_nodes: False + +# How Ray will authenticate with newly launched nodes. +auth: + ssh_user: ubuntu +# By default Ray creates a new private keypair, but you can also use your own. +# If you do so, make sure to also set "KeyName" in the head and worker node +# configurations below. +# ssh_private_key: /path/to/your/key.pem + +# Provider-specific config for the head node, e.g. instance type. By default +# Ray will auto-configure unspecified fields such as SubnetId and KeyName. +# For more documentation on available fields, see: +# http://boto3.readthedocs.io/en/latest/reference/services/ec2.html#EC2.ServiceResource.create_instances +# +# In this Classy Vision example, we will have a CPU-only machine as the head node, +# and GPU workers to actually run training jobs. +head_node: + InstanceType: m5.large + ImageId: ami-05931d11d2bf831c3 # Deep Learning AMI (Ubuntu) Version 24.3 + + # You can provision additional disk space with a conf as follows + BlockDeviceMappings: + - DeviceName: /dev/sda1 + Ebs: + VolumeSize: 100 + + # Additional options in the boto docs. + +# Provider-specific config for worker nodes, e.g. instance type. By default +# Ray will auto-configure unspecified fields such as SubnetId and KeyName. +# For more documentation on available fields, see: +# http://boto3.readthedocs.io/en/latest/reference/services/ec2.html#EC2.ServiceResource.create_instances +worker_nodes: + # We will use GPU workers + InstanceType: p2.xlarge + ImageId: ami-05931d11d2bf831c3 # Deep Learning AMI (Ubuntu) Version 24.3 + + # Additional options in the boto docs. + +# Files or directories to copy to the head and worker nodes. The format is a +# dictionary from REMOTE_PATH: LOCAL_PATH, e.g. +file_mounts: { +# "/path1/on/remote/machine": "/path1/on/local/machine", +} + +# List of commands that will be run before `setup_commands`. If docker is +# enabled, these commands will run outside the container and before docker +# is setup. +initialization_commands: [] + +# List of shell commands to run to set up nodes. +setup_commands: + # These commands are carefully crafted to get EFS mounts working correctly + # with this AMI. Be careful editing this: in my experience the EFS setup + # process is very fragile + # + # ********************************************* + # + # ATTENTION: edit the {{FileSystemId}} entry below to match the EFS volume you have created + # + # ********************************************* + - sudo kill -9 `sudo lsof /var/lib/dpkg/lock-frontend | awk '{print $2}' | tail -n 1`; + sudo pkill -9 apt-get; + sudo pkill -9 dpkg; + sudo dpkg --configure -a; + sudo apt-get -y update; + sudo apt-get -y install binutils; + cd $HOME; + git clone https://github.com/aws/efs-utils; + cd $HOME/efs-utils; + ./build-deb.sh; + sudo kill -9 `sudo lsof /var/lib/dpkg/lock-frontend | awk '{print $2}' | tail -n 1`; + sudo pkill -9 apt-get; + sudo pkill -9 dpkg; + sudo dpkg --configure -a; + sudo apt-get -y install ./build/amazon-efs-utils*deb; + sudo apt-get -y install ./build/amazon-efs-utils*deb; + cd $HOME; + mkdir efs; + sudo mount -t efs {{FileSystemId}}:/ efs; + sudo chmod 777 efs; + +# Custom commands that will be run on the head node after common setup. +head_setup_commands: + - conda install pytorch torchvision -c pytorch -y + - pip install --upgrade pip + - pip install -U https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-0.8.0.dev6-cp36-cp36m-manylinux1_x86_64.whl + - pip install boto3==1.4.8 # 1.4.8 adds InstanceMarketOptions + - pip install classy-vision + +# Custom commands that will be run on worker nodes after common setup. +worker_setup_commands: + # The AMI we use does not have a version of PyTorch compatible with Classy Vision, + # so we have to install it manually here. This AMI also comes with the hard drive + # almost full, so we have to uninstall a conda environment to make up space. + - conda env remove -n caffe2_p27 -y || true + - conda install pytorch torchvision -c pytorch -y + - pip install --upgrade pip + - pip install -U https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-0.8.0.dev6-cp36-cp36m-manylinux1_x86_64.whl + - pip install classy-vision + +# Command to start ray on the head node. You don't need to change this. +head_start_ray_commands: + - ray stop + - ulimit -n 65536; ray start --head --redis-port=6379 --object-manager-port=8076 --autoscaling-config=~/ray_bootstrap_config.yaml + +# Command to start ray on worker nodes. You don't need to change this. +worker_start_ray_commands: + - ray stop + - ulimit -n 65536; ray start --redis-address=$RAY_HEAD_IP:6379 --object-manager-port=8076 diff --git a/examples/ray/requirements.txt b/examples/ray/requirements.txt new file mode 100644 index 0000000000..bae4aa9b29 --- /dev/null +++ b/examples/ray/requirements.txt @@ -0,0 +1,2 @@ +ray==0.7.6 +boto3==1.10.28 diff --git a/hubconf.py b/hubconf.py new file mode 100644 index 0000000000..a5e21c5c4a --- /dev/null +++ b/hubconf.py @@ -0,0 +1,32 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import functools + +import torch +from classy_vision.hub import ClassyHubInterface + + +dependencies = ["torch", "torchvision"] + +# export the wsl models (https://github.com/facebookresearch/WSL-Images) +resnext_wsl_models = [ + "resnext101_32x8d_wsl", + "resnext101_32x16d_wsl", + "resnext101_32x32d_wsl", + "resnext101_32x48d_wsl", +] + + +def _create_interface_from_torchhub(github, *args, **kwargs): + model = torch.hub.load(github, *args, **kwargs) + return ClassyHubInterface.from_model(model) + + +for model in resnext_wsl_models: + globals()[model] = functools.partial( + _create_interface_from_torchhub, "facebookresearch/WSL-Images", model + ) diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000000..d2a2c65e68 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +torch==1.3.1 +torchvision==0.4.2 diff --git a/scripts/build_docs.sh b/scripts/build_docs.sh new file mode 100755 index 0000000000..5ddcc2739c --- /dev/null +++ b/scripts/build_docs.sh @@ -0,0 +1,96 @@ +#!/bin/bash +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +# run this script from the project root using `./scripts/build_docs.sh` + +usage() { + echo "Usage: $0 [-b]" + echo "" + echo "Build Classy Vision documentation." + echo "" + echo " -b Build static version of documentation (otherwise start server)" + echo "" + exit 1 +} + +BUILD_STATIC=false + +while getopts 'hb' flag; do + case "${flag}" in + h) + usage + ;; + b) + BUILD_STATIC=true + ;; + *) + usage + ;; + esac +done + +echo "-----------------------------------" +echo "Generating API reference via Sphinx" +echo "-----------------------------------" +cd sphinx || exit +make html +cd .. || exit + +echo "-----------------------------------" +echo "Building Captum Docusaurus site" +echo "-----------------------------------" +cd website || exit +yarn + +# run script to parse html generated by sphinx +echo "--------------------------------------------" +echo "Parsing Sphinx docs and moving to Docusaurus" +echo "--------------------------------------------" +cd .. +mkdir -p "website/pages/api/" + +cwd=$(pwd) +python scripts/parse_sphinx.py -i "${cwd}/sphinx/build/html/" -o "${cwd}/website/pages/api/" + +SPHINX_JS_DIR='sphinx/build/html/_static/' +DOCUSAURUS_JS_DIR='website/static/js/' + +mkdir -p $DOCUSAURUS_JS_DIR + +# move JS files from /sphinx/build/html/_static/*: +cp "${SPHINX_JS_DIR}documentation_options.js" "${DOCUSAURUS_JS_DIR}documentation_options.js" +cp "${SPHINX_JS_DIR}jquery.js" "${DOCUSAURUS_JS_DIR}jquery.js" +cp "${SPHINX_JS_DIR}underscore.js" "${DOCUSAURUS_JS_DIR}underscore.js" +cp "${SPHINX_JS_DIR}doctools.js" "${DOCUSAURUS_JS_DIR}doctools.js" +cp "${SPHINX_JS_DIR}language_data.js" "${DOCUSAURUS_JS_DIR}language_data.js" +cp "${SPHINX_JS_DIR}searchtools.js" "${DOCUSAURUS_JS_DIR}searchtools.js" + +# searchindex.js is not static util +cp "sphinx/build/html/searchindex.js" "${DOCUSAURUS_JS_DIR}searchindex.js" + +# copy module sources +cp -r "sphinx/build/html/_sources/" "website/static/_sphinx-sources/" + +echo "-----------------------------------" +echo "Generating tutorials" +echo "-----------------------------------" +mkdir -p "website/_tutorials" +mkdir -p "website/static/files" +python scripts/parse_tutorials.py -w "${cwd}" + +cd website || exit + +if [[ $BUILD_STATIC == true ]]; then + echo "-----------------------------------" + echo "Building static site" + echo "-----------------------------------" + yarn build +else + echo "-----------------------------------" + echo "Starting local server" + echo "-----------------------------------" + yarn start +fi diff --git a/scripts/parse_sphinx.py b/scripts/parse_sphinx.py new file mode 100644 index 0000000000..5eeb2909e5 --- /dev/null +++ b/scripts/parse_sphinx.py @@ -0,0 +1,77 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import argparse +import os + +from bs4 import BeautifulSoup + + +js_scripts = """ + + + + + + +""" # noqa: E501 + +search_js_scripts = """ + + + +""" + + +def parse_sphinx(input_dir, output_dir): + for cur, _, files in os.walk(input_dir): + for fname in files: + if fname.endswith(".html"): + with open(os.path.join(cur, fname), "r") as f: + soup = BeautifulSoup(f.read(), "html.parser") + doc = soup.find("div", {"class": "document"}) + wrapped_doc = doc.wrap(soup.new_tag("div", **{"class": "sphinx"})) + # add js + if fname == "search.html": + out = js_scripts + search_js_scripts + str(wrapped_doc) + else: + out = js_scripts + str(wrapped_doc) + output_path = os.path.join(output_dir, os.path.relpath(cur, input_dir)) + os.makedirs(output_path, exist_ok=True) + with open(os.path.join(output_path, fname), "w") as fout: + fout.write(out) + + # update reference in JS file + with open(os.path.join(input_dir, "_static/searchtools.js"), "r") as js_file: + js = js_file.read() + js = js.replace( + "DOCUMENTATION_OPTIONS.URL_ROOT + '_sources/'", "'_sphinx-sources/'" + ) + with open(os.path.join(input_dir, "_static/searchtools.js"), "w") as js_file: + js_file.write(js) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Strip HTML body from Sphinx docs.") + parser.add_argument( + "-i", + "--input_dir", + metavar="path", + required=True, + help="Input directory for Sphinx HTML.", + ) + parser.add_argument( + "-o", + "--output_dir", + metavar="path", + required=True, + help="Output directory in Docusaurus.", + ) + args = parser.parse_args() + parse_sphinx(args.input_dir, args.output_dir) diff --git a/scripts/parse_tutorials.py b/scripts/parse_tutorials.py new file mode 100644 index 0000000000..07bcc111f3 --- /dev/null +++ b/scripts/parse_tutorials.py @@ -0,0 +1,114 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import argparse +import json +import os + +import nbformat +from bs4 import BeautifulSoup +from nbconvert import HTMLExporter, ScriptExporter + + +TEMPLATE = """const CWD = process.cwd(); + +const React = require('react'); +const Tutorial = require(`${{CWD}}/core/Tutorial.js`); + +class TutorialPage extends React.Component {{ + render() {{ + const {{config: siteConfig}} = this.props; + const {{baseUrl}} = siteConfig; + return ; + }} +}} + +module.exports = TutorialPage; + +""" + +JS_SCRIPTS = """ + + +""" # noqa: E501 + + +def gen_tutorials(repo_dir: str) -> None: + """Generate HTML tutorials for captum Docusaurus site from Jupyter notebooks. + + Also create ipynb and py versions of tutorial in Docusaurus site for + download. + """ + with open(os.path.join(repo_dir, "website", "tutorials.json"), "r") as infile: + tutorial_config = json.loads(infile.read()) + + tutorial_ids = {x["id"] for v in tutorial_config.values() for x in v} + + for tid in tutorial_ids: + print("Generating {} tutorial".format(tid)) + + # convert notebook to HTML + ipynb_in_path = os.path.join(repo_dir, "tutorials", "{}.ipynb".format(tid)) + with open(ipynb_in_path, "r") as infile: + nb_str = infile.read() + nb = nbformat.reads(nb_str, nbformat.NO_CONVERT) + + # displayname is absent from notebook metadata + nb["metadata"]["kernelspec"]["display_name"] = "python3" + + exporter = HTMLExporter() + html, meta = exporter.from_notebook_node(nb) + + # pull out html div for notebook + soup = BeautifulSoup(html, "html.parser") + nb_meat = soup.find("div", {"id": "notebook-container"}) + del nb_meat.attrs["id"] + nb_meat.attrs["class"] = ["notebook"] + html_out = JS_SCRIPTS + str(nb_meat) + + # generate html file + html_out_path = os.path.join( + repo_dir, "website", "_tutorials", "{}.html".format(tid) + ) + with open(html_out_path, "w") as html_outfile: + html_outfile.write(html_out) + + # generate JS file + script = TEMPLATE.format(tid) + js_out_path = os.path.join( + repo_dir, "website", "pages", "tutorials", "{}.js".format(tid) + ) + with open(js_out_path, "w") as js_outfile: + js_outfile.write(script) + + # output tutorial in both ipynb & py form + ipynb_out_path = os.path.join( + repo_dir, "website", "static", "files", "{}.ipynb".format(tid) + ) + with open(ipynb_out_path, "w") as ipynb_outfile: + ipynb_outfile.write(nb_str) + exporter = ScriptExporter() + script, meta = exporter.from_notebook_node(nb) + py_out_path = os.path.join( + repo_dir, "website", "static", "files", "{}.py".format(tid) + ) + with open(py_out_path, "w") as py_outfile: + py_outfile.write(script) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="Generate JS, HTML, ipynb, and py files for tutorials." + ) + parser.add_argument( + "-w", "--repo_dir", metavar="path", required=True, help="captum repo directory." + ) + args = parser.parse_args() + gen_tutorials(args.repo_dir) diff --git a/setup.py b/setup.py new file mode 100644 index 0000000000..f99fd88e4e --- /dev/null +++ b/setup.py @@ -0,0 +1,66 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + + +import os +import re +import sys + +from setuptools import find_packages, setup + + +if __name__ == "__main__": + if sys.version_info < (3, 6): + sys.exit("Sorry, Python >=3.6 is required for Classy Vision.") + + # get version string from module + with open( + os.path.join(os.path.dirname(__file__), "classy_vision/__init__.py"), "r" + ) as f: + version = re.search(r"__version__ = ['\"]([^'\"]*)['\"]", f.read(), re.M).group( + 1 + ) + print("-- Building version " + version) + + with open("README.md", encoding="utf8") as f: + readme = f.read() + + with open("requirements.txt") as f: + reqs = f.read() + + setup( + name="classy_vision", + version=version, + description="An end-to-end PyTorch framework for image and video classification.", + long_description_content_type="text/markdown", + long_description=readme, + url="https://classyvision.ai", + project_urls={ + "Documentation": "https://classyvision.ai", + "Source": "https://github.com/facebookresearch/ClassyVision", + }, + license="MIT License", + python_requires=">=3.6", + packages=find_packages(exclude=("tests",)), + install_requires=reqs.strip().split("\n"), + extras_require={"dev": ["black", "sphinx", "isort", "bs4", "nbconvert"]}, + package_data={"classy_vision": ["configs/*.json", "templates"]}, + data_files=[("classy_vision", ["classy_train.py"])], + include_package_data=True, + test_suite="test.suites.unittests", + scripts=["bin/classy-project"], + keywords=["deep learning", "pytorch", "AI"], + classifiers=[ + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.6", + "License :: OSI Approved :: MIT License", + "Operating System :: OS Independent", + "Development Status :: 4 - Beta", + "Intended Audience :: Developers", + "Topic :: Scientific/Engineering :: Artificial Intelligence", + "Topic :: Scientific/Engineering :: Image Recognition", + ], + ) diff --git a/sphinx/Makefile b/sphinx/Makefile new file mode 100644 index 0000000000..ed88099027 --- /dev/null +++ b/sphinx/Makefile @@ -0,0 +1,20 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line, and also +# from the environment for the first two. +SPHINXOPTS ?= +SPHINXBUILD ?= sphinx-build +SOURCEDIR = . +BUILDDIR = build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/sphinx/conf.py b/sphinx/conf.py new file mode 100644 index 0000000000..67cf729d61 --- /dev/null +++ b/sphinx/conf.py @@ -0,0 +1,102 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +# Configuration file for the Sphinx documentation builder. +# +# This file only contains a selection of the most common options. For a full +# list see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html + +# -- Path setup -------------------------------------------------------------- + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +# + +import os +import sys + + +sys.path.insert(0, os.path.abspath("..")) + + +# -- Project information ----------------------------------------------------- + +project = "Classy Vision" +copyright = "2019, Facebook AI Research" +author = "Facebook AI Research" + +# The full version, including alpha/beta/rc tags +release = "0.1" + + +# -- General configuration --------------------------------------------------- + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named "sphinx.ext.*") or your custom +# ones. +extensions = [ + "sphinx.ext.autodoc", + "sphinx.ext.autosummary", + "sphinx.ext.githubpages", + "sphinx.ext.napoleon", + "sphinx.ext.intersphinx", + "sphinx.ext.mathjax", +] + +autodoc_default_flags = ["undoc-members"] +autodoc_default_options = {"special-members": "__init__", "autodoc_typehints": "none"} + +primary_domain = "py" + +intersphinx_mapping = { + "python": ("https://docs.python.org/3", None), + "pytorch": ("https://pytorch.org/docs/stable", None), +} + +# Add any paths that contain templates here, relative to this directory. +templates_path = ["_templates"] + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +# This pattern also affects html_static_path and html_extra_path. +exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] + + +# -- Options for HTML output ------------------------------------------------- + + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +html_theme = "alabaster" + +# Theme options are theme-specific and customize the look and feel of a theme +# further. For a list of options available for each theme, see the +# documentation. +html_theme_options = {} + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +# html_static_path = ["_static"] +html_static_path = [] # for now we have no static files to track + +# Custom sidebar templates, must be a dictionary that maps document names +# to template names. +# +# The default sidebars (for documents that don't match any pattern) are +# defined by theme itself. Builtin themes are using these templates by +# default: ``['localtoc.html', 'relations.html', 'sourcelink.html', +# 'searchbox.html']``. +# +# html_sidebars = {} + +# If true, "Created using Sphinx" is shown in the HTML footer. Default is True. +html_show_sphinx = False + +# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. +html_show_copyright = False diff --git a/sphinx/dataset.rst b/sphinx/dataset.rst new file mode 100644 index 0000000000..15181efac4 --- /dev/null +++ b/sphinx/dataset.rst @@ -0,0 +1,5 @@ +Dataset +========== + +.. automodule:: classy_vision.dataset + :members: diff --git a/sphinx/heads.rst b/sphinx/heads.rst new file mode 100644 index 0000000000..f7a5590e35 --- /dev/null +++ b/sphinx/heads.rst @@ -0,0 +1,5 @@ +Heads +========== + +.. automodule:: classy_vision.heads + :members: diff --git a/sphinx/hooks.rst b/sphinx/hooks.rst new file mode 100644 index 0000000000..832e967a93 --- /dev/null +++ b/sphinx/hooks.rst @@ -0,0 +1,5 @@ +Hooks +========== + +.. automodule:: classy_vision.hooks + :members: diff --git a/sphinx/index.rst b/sphinx/index.rst new file mode 100644 index 0000000000..823a951bf7 --- /dev/null +++ b/sphinx/index.rst @@ -0,0 +1,34 @@ +.. Classy Vision documentation master file, created by + sphinx-quickstart on Sat Oct 26 09:05:26 2019. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + +Classy Vision's API Reference +========================================= + +This is an exhaustive reference for APIs in Classy Vision. Please refer to our +tutorials for high-level information about +how these abstractions can used together. + +.. toctree:: + :maxdepth: 3 + + dataset + heads + hooks + losses + meters + models + optim + param_scheduler + tasks + trainer + transforms + + +Indices and tables +================== + +* :ref:`genindex` +* :ref:`modindex` +* :ref:`search` diff --git a/sphinx/losses.rst b/sphinx/losses.rst new file mode 100644 index 0000000000..92b48d32bb --- /dev/null +++ b/sphinx/losses.rst @@ -0,0 +1,5 @@ +Losses +========== + +.. automodule:: classy_vision.losses + :members: diff --git a/sphinx/make.bat b/sphinx/make.bat new file mode 100644 index 0000000000..2119f51099 --- /dev/null +++ b/sphinx/make.bat @@ -0,0 +1,35 @@ +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set SOURCEDIR=. +set BUILDDIR=_build + +if "%1" == "" goto help + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.http://sphinx-doc.org/ + exit /b 1 +) + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% + +:end +popd diff --git a/sphinx/meters.rst b/sphinx/meters.rst new file mode 100644 index 0000000000..9e32d98d59 --- /dev/null +++ b/sphinx/meters.rst @@ -0,0 +1,5 @@ +Meters +========== + +.. automodule:: classy_vision.meters + :members: diff --git a/sphinx/models.rst b/sphinx/models.rst new file mode 100644 index 0000000000..78add08080 --- /dev/null +++ b/sphinx/models.rst @@ -0,0 +1,5 @@ +Models +========== + +.. automodule:: classy_vision.models + :members: diff --git a/sphinx/optim.rst b/sphinx/optim.rst new file mode 100644 index 0000000000..7d326bd5cb --- /dev/null +++ b/sphinx/optim.rst @@ -0,0 +1,5 @@ +Optimizers +========== + +.. automodule:: classy_vision.optim + :members: diff --git a/sphinx/param_scheduler.rst b/sphinx/param_scheduler.rst new file mode 100644 index 0000000000..6d4e8a0245 --- /dev/null +++ b/sphinx/param_scheduler.rst @@ -0,0 +1,5 @@ +Param Schedulers +========== + +.. automodule:: classy_vision.optim.param_scheduler + :members: diff --git a/sphinx/tasks.rst b/sphinx/tasks.rst new file mode 100644 index 0000000000..6caca1b657 --- /dev/null +++ b/sphinx/tasks.rst @@ -0,0 +1,5 @@ +Tasks +========== + +.. automodule:: classy_vision.tasks + :members: diff --git a/sphinx/trainer.rst b/sphinx/trainer.rst new file mode 100644 index 0000000000..2041db57b0 --- /dev/null +++ b/sphinx/trainer.rst @@ -0,0 +1,5 @@ +Trainer +========== + +.. automodule:: classy_vision.trainer + :members: diff --git a/sphinx/transforms.rst b/sphinx/transforms.rst new file mode 100644 index 0000000000..b343bfa64b --- /dev/null +++ b/sphinx/transforms.rst @@ -0,0 +1,16 @@ +Transforms +========== + +Classy Vision is able to work directly with `torchvision` transforms +, so it ships with +very few built-in transforms. However, during research it's common to +experiment with new transforms. The `ClassyTransform` class allows users to +express their transforms in a common format and define them in a configuration +file. + +Like other Classy Vision abstractions, `ClassyTransform` is accompannied by a +`register_transform` decorator and `build_transform` function for integration +with the config system. + +.. automodule:: classy_vision.dataset.transforms + :members: diff --git a/test/__init__.py b/test/__init__.py new file mode 100644 index 0000000000..734a1eb4e2 --- /dev/null +++ b/test/__init__.py @@ -0,0 +1,5 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. diff --git a/test/api_test.py b/test/api_test.py new file mode 100644 index 0000000000..1db5ed45ee --- /dev/null +++ b/test/api_test.py @@ -0,0 +1,177 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from __future__ import absolute_import, division, print_function, unicode_literals + +import unittest +from typing import Any, Callable, Dict, Optional, Union + +import torch.nn as nn +import torch.nn.functional as F +from classy_vision.dataset import ClassyDataset, register_dataset +from classy_vision.dataset.core.random_image_datasets import ( + RandomImageBinaryClassDataset, + SampleType, +) +from classy_vision.dataset.transforms import ( + ClassyTransform, + GenericImageTransform, + build_transforms, +) +from classy_vision.losses import ClassyLoss, register_loss +from classy_vision.models import ClassyModel, register_model +from classy_vision.optim import SGD +from classy_vision.optim.param_scheduler import ConstantParamScheduler +from classy_vision.tasks import ClassificationTask +from classy_vision.trainer import LocalTrainer +from torchvision import transforms + + +# WARNING: The goal of this test is to use our public API as advertised in our +# tutorials and make sure everything trains successfully. If you break this +# test, make sure you also update our tutorials. + + +@register_dataset("my_dataset") +class MyDataset(ClassyDataset): + def __init__( + self, + batchsize_per_replica: int, + shuffle: bool, + transform: Optional[Union[ClassyTransform, Callable]], + num_samples: int, + crop_size: int, + class_ratio: float, + seed: int, + split: Optional[str] = None, + ) -> None: + dataset = RandomImageBinaryClassDataset( + crop_size, class_ratio, num_samples, seed, SampleType.TUPLE + ) + super().__init__( + dataset, split, batchsize_per_replica, shuffle, transform, num_samples + ) + + @classmethod + def from_config(cls, config: Dict[str, Any]) -> "MyDataset": + assert all(key in config for key in ["crop_size", "class_ratio", "seed"]) + + split = config.get("split") + crop_size = config["crop_size"] + class_ratio = config["class_ratio"] + seed = config["seed"] + ( + transform_config, + batchsize_per_replica, + shuffle, + num_samples, + ) = cls.parse_config(config) + transform = build_transforms(transform_config) + return cls( + batchsize_per_replica, + shuffle, + transform, + num_samples, + crop_size, + class_ratio, + seed, + split=split, + ) + + +@register_loss("my_loss") +class MyLoss(ClassyLoss): + def forward(self, input, target): + labels = F.one_hot(target, num_classes=2).float() + return F.binary_cross_entropy(input, labels) + + @classmethod + def from_config(cls, config): + # We don't need anything from the config + return cls() + + +@register_model("my_model") +class MyModel(ClassyModel): + def __init__(self): + super().__init__() + self.model = nn.Sequential( + nn.AdaptiveAvgPool2d((20, 20)), + nn.Flatten(1), + nn.Linear(3 * 20 * 20, 2), + nn.Sigmoid(), + ) + + def forward(self, x): + x = self.model(x) + return x + + @classmethod + def from_config(cls, config): + return cls() + + +class APITest(unittest.TestCase): + def testOne(self): + train_dataset = MyDataset( + batchsize_per_replica=32, + shuffle=False, + transform=GenericImageTransform( + transform=transforms.Compose( + [ + transforms.RandomResizedCrop(224), + transforms.RandomHorizontalFlip(), + transforms.ToTensor(), + transforms.Normalize( + mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225] + ), + ] + ) + ), + num_samples=100, + crop_size=224, + class_ratio=0.5, + seed=0, + ) + + test_dataset = MyDataset( + batchsize_per_replica=32, + shuffle=False, + transform=GenericImageTransform( + transform=transforms.Compose( + [ + transforms.Resize(256), + transforms.CenterCrop(224), + transforms.ToTensor(), + transforms.Normalize( + mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225] + ), + ] + ) + ), + num_samples=100, + crop_size=224, + class_ratio=0.5, + seed=0, + ) + + model = MyModel() + loss = MyLoss() + + optimizer = SGD(lr_scheduler=ConstantParamScheduler(0.01)) + + task = ( + ClassificationTask() + .set_model(model) + .set_dataset(train_dataset, "train") + .set_dataset(test_dataset, "test") + .set_loss(loss) + .set_optimizer(optimizer) + .set_num_epochs(1) + ) + + trainer = LocalTrainer() + trainer.train(task) diff --git a/test/classy_block_test.py b/test/classy_block_test.py new file mode 100644 index 0000000000..45491dbf59 --- /dev/null +++ b/test/classy_block_test.py @@ -0,0 +1,74 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import unittest + +import torch +from classy_vision.heads import ClassyHead +from classy_vision.models import ClassyModel + + +class TestClassyBlock(unittest.TestCase): + class DummyTestHead(ClassyHead): + def __init__(self): + super().__init__("head_id") + self.layer = torch.nn.Linear(2, 2) + + def forward(self, x): + return self.layer(x) + + class DummyTestModel(ClassyModel): + def __init__(self): + super().__init__() + self.layer1 = self.build_attachable_block( + "dummy_block", torch.nn.Linear(2, 2) + ) + self.layer2 = self.build_attachable_block( + "dummy_block2", torch.nn.Linear(2, 2) + ) + + def forward(self, x): + out = self.layer1(x) + return self.layer2(out) + + def test_head_execution(self): + model = self.DummyTestModel() + head = self.DummyTestHead() + model.set_heads({"dummy_block2": {head.unique_id: head}}) + input = torch.randn(1, 2) + output = model(input) + head_output = model.execute_heads() + self.assertTrue(torch.allclose(head(output), head_output["head_id"])) + + def test_duplicated_head_ids(self): + model = self.DummyTestModel() + head1 = self.DummyTestHead() + head2 = self.DummyTestHead() + heads = { + "dummy_block": {head1.unique_id: head1}, + "dummy_block2": {head2.unique_id: head2}, + } + with self.assertRaises(ValueError): + model.set_heads(heads) + + head2.unique_id = "head_id2" + model.set_heads(heads) + + def test_set_heads(self): + model = self.DummyTestModel() + head = self.DummyTestHead() + self.assertEqual( + len(model.get_heads()), 0, "heads should be empty before set_heads" + ) + model.set_heads({"dummy_block2": {head.unique_id: head}}) + input = torch.randn(1, 2) + model(input) + head_outputs = model.execute_heads() + self.assertEqual(len(head_outputs), 1, "should have output for one head") + + # remove all heads + model.set_heads({}) + self.assertEqual(len(model.get_heads()), 0, "heads should be empty") diff --git a/test/classy_vision_head_test.py b/test/classy_vision_head_test.py new file mode 100644 index 0000000000..e768578211 --- /dev/null +++ b/test/classy_vision_head_test.py @@ -0,0 +1,67 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import unittest + +import torch +from classy_vision.heads import ClassyHead, build_head, register_head + + +class TestClassyHead(unittest.TestCase): + @register_head("dummy_head") + class DummyHead(ClassyHead): + def __init__(self, unique_id, num_classes, in_plane): + super().__init__(unique_id, num_classes) + self.fc = torch.nn.Linear(in_plane, num_classes) + + def forward(self, x): + return self.fc(x) + + @classmethod + def from_config(cls, config): + return cls(config["unique_id"], config["num_classes"], config["in_plane"]) + + def _get_config(self): + return { + "name": "dummy_head", + "num_classes": 3, + "unique_id": "cortex_dummy_head", + "fork_block": "block3", + "in_plane": 2048, + } + + def test_build_head(self): + config = self._get_config() + head = build_head(config) + self.assertEqual(head.unique_id, config["unique_id"]) + + del config["unique_id"] + with self.assertRaises(AssertionError): + head = build_head(config) + + def test_forward(self): + config = self._get_config() + head = build_head(config) + input = torch.randn(1, config["in_plane"]) + output = head(input) + self.assertEqual(output.size(), torch.Size([1, 3])) + + def _get_pass_through_config(self): + return { + "name": "identity", + "num_classes": 3, + "unique_id": "cortex_pass_through_head", + "fork_block": "block3", + "in_plane": 4, + } + + def test_identity_forward(self): + config = self._get_pass_through_config() + head = build_head(config) + input = torch.randn(1, config["in_plane"]) + output = head(input) + self.assertEqual(input.size(), output.size()) + self.assert_(torch.all(torch.eq(input, output))) diff --git a/test/dataset_classy_dataset_test.py b/test/dataset_classy_dataset_test.py new file mode 100644 index 0000000000..2885465c9e --- /dev/null +++ b/test/dataset_classy_dataset_test.py @@ -0,0 +1,258 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import copy +import unittest +import unittest.mock as mock +from test.generic.utils import compare_batches, compare_samples + +import classy_vision.dataset.classy_dataset as classy_dataset +import torch +from classy_vision.dataset import build_dataset, register_dataset +from classy_vision.dataset.core import ListDataset +from torch.utils.data import DataLoader + + +DUMMY_SAMPLES_1 = [ + {"input": torch.tensor([[[0, 1], [2, 3]]]), "target": torch.tensor([[0]])} +] + + +DUMMY_SAMPLES_2 = [ + {"input": torch.tensor([[[0, 1], [2, 3]]]), "target": torch.tensor([[0]])}, + {"input": torch.tensor([[[4, 5], [6, 7]]]), "target": torch.tensor([[1]])}, +] + +BATCHED_DUMMY_SAMPLES_2 = [ + { + "input": torch.tensor([[[[0, 1], [2, 3]]], [[[4, 5], [6, 7]]]]), + "target": torch.tensor([[[0]], [[1]]]), + } +] + +DUMMY_CONFIG = {"name": "test_dataset", "dummy0": 0, "dummy1": 1} + +OTHER_DUMMY_CONFIG = {"name": "other_test_dataset", "dummy0": 0, "dummy1": 1} + + +def mock_get_world_size(): + return 2 + + +def mock_get_rank(): + return 1 + + +@register_dataset("test_dataset") +class TestDataset(classy_dataset.ClassyDataset): + """Test dataset for validating registry functions""" + + def __init__( + self, + samples, + batchsize_per_replica=1, + num_samples=None, + shuffle=False, + transform=None, + ): + input_tensors = [sample["input"] for sample in samples] + target_tensors = [sample["target"] for sample in samples] + dataset = ListDataset(input_tensors, target_tensors, loader=lambda x: x) + super().__init__( + dataset=dataset, + split=None, + batchsize_per_replica=batchsize_per_replica, + shuffle=shuffle, + transform=transform, + num_samples=len(samples) if num_samples is None else num_samples, + ) + + @classmethod + def from_config(cls, config, *args, **kwargs): + return cls(*args, **kwargs) + + +@register_dataset("other_test_dataset") +class OtherTestDataset(classy_dataset.ClassyDataset): + """ + Test dataset for validating registry functions that has a different + type than TestDataset + """ + + def __init__(self, samples, batchsize_per_replica=1): + input_tensors = [sample["input"] for sample in samples] + target_tensors = [sample["target"] for sample in samples] + dataset = ListDataset(input_tensors, target_tensors, loader=lambda x: x) + super().__init__( + dataset=dataset, + split=None, + batchsize_per_replica=batchsize_per_replica, + shuffle=False, + transform=None, + num_samples=len(samples), + ) + + @classmethod + def from_config(cls, config, *args, **kwargs): + return cls(*args, **kwargs) + + +class TestRegistryFunctions(unittest.TestCase): + """ + Tests functions that use registry + """ + + def test_build_model(self): + dataset = build_dataset(DUMMY_CONFIG, DUMMY_SAMPLES_1) + self.assertTrue(isinstance(dataset, TestDataset)) + + +class TestClassyDataset(unittest.TestCase): + """ + Tests member functions of ClassyDataset. Note, NotImplemented + functions are mocked in TestDataset class. + """ + + def setUp(self): + self.dataset1 = build_dataset(DUMMY_CONFIG, DUMMY_SAMPLES_1) + self.dataset2 = build_dataset(DUMMY_CONFIG, DUMMY_SAMPLES_2) + + def _compare_samples(self, sample1, sample2): + compare_samples(self, sample1, sample2) + + def _compare_batches(self, batch1, batch2): + compare_batches(self, batch1, batch2) + + def test_init(self): + self.assertTrue(self.dataset1 is not None) + self.assertTrue(self.dataset2 is not None) + + def test_len(self): + self.assertEqual(len(self.dataset1), 1) + self.assertEqual(len(self.dataset2), 2) + + def test_getitem(self): + sample = self.dataset1[0] + self._compare_samples(sample, DUMMY_SAMPLES_1[0]) + + for idx in range(len(self.dataset2)): + sample = self.dataset2[idx] + self._compare_samples(sample, DUMMY_SAMPLES_2[idx]) + + def test_get_iterator(self): + # Verifies that we can retrieve samples with iterators + dl = self.dataset1.iterator(num_workers=0) + assert isinstance( + dl, DataLoader + ), "Classy Iterator should return instance of PyTorch Dataloader" + next(iter(dl)) + + dl = self.dataset1.iterator(num_workers=2) + assert isinstance( + dl, DataLoader + ), "Classy Iterator should return instance of PyTorch Dataloader" + it = iter(dl) + next(it) + # Because we use multiprocessing we delete the iterable to + # shutdown workers + del it + + def test_batch_logic(self): + dataset = TestDataset(DUMMY_SAMPLES_2, batchsize_per_replica=2) + dl = dataset.iterator(num_workers=0) + batch = next(iter(dl)) + self.assertEqual(batch["input"].size()[0], 2) + self._compare_batches(batch, BATCHED_DUMMY_SAMPLES_2[0]) + + @mock.patch( + "classy_vision.dataset.classy_dataset.get_world_size", mock_get_world_size + ) + @mock.patch("classy_vision.dataset.classy_dataset.get_rank", mock_get_rank) + def test_shard_logic(self): + # This test uses a world size of 2, rank 1 to verify that the + # second sample is returned by the dataloader + dataset = TestDataset(DUMMY_SAMPLES_2, batchsize_per_replica=1) + dl = dataset.iterator(num_workers=0) + sample = next(iter(dl)) + self._compare_batches(sample, DUMMY_SAMPLES_2[1]) + + def test_num_samples_logic(self): + dataset = TestDataset(DUMMY_SAMPLES_2) + self.assertEqual(len(dataset), 2) + + dataset = TestDataset(DUMMY_SAMPLES_2, num_samples=1) + # Verify len returns right value for dataset + self.assertEqual(len(dataset), 1) + # Verify len returns right value for iterator + self.assertEqual(len(dataset.iterator(num_workers=0)), 1) + # Verify iterator returns correct number of samples + it = iter(dataset.iterator(num_workers=0)) + num_samples = 0 + while True: + try: + next(it) + num_samples += 1 + except StopIteration: + break + self.assertEqual(num_samples, 1) + + # Check assert for num_samples > length of base dataset + dataset = TestDataset(DUMMY_SAMPLES_2, num_samples=3) + with self.assertRaises(AssertionError): + len(dataset) + + def test_shuffle_logic(self): + # Simple samples to test shuffling, just a single value tensor + # so we know how things were shuffled + dummy_samples_10 = [ + {"input": torch.tensor([[0]]), "target": torch.tensor([0])}, + {"input": torch.tensor([[1]]), "target": torch.tensor([0])}, + {"input": torch.tensor([[2]]), "target": torch.tensor([0])}, + {"input": torch.tensor([[3]]), "target": torch.tensor([0])}, + {"input": torch.tensor([[4]]), "target": torch.tensor([0])}, + {"input": torch.tensor([[5]]), "target": torch.tensor([0])}, + {"input": torch.tensor([[6]]), "target": torch.tensor([0])}, + {"input": torch.tensor([[7]]), "target": torch.tensor([0])}, + {"input": torch.tensor([[8]]), "target": torch.tensor([0])}, + {"input": torch.tensor([[9]]), "target": torch.tensor([0])}, + ] + dataset = TestDataset(dummy_samples_10, shuffle=True) + + def unpack_tensors(tensor_list): + return [t["input"].item() for t in tensor_list] + + # Epoch 0 + iterator = dataset.iterator(num_workers=0, current_phase_id=0) + it = iter(iterator) + epoch_0_list = [sample for sample in it] + epoch_0_list = unpack_tensors(epoch_0_list) + + # Epoch 1 + iterator = dataset.iterator(num_workers=0, current_phase_id=1) + it = iter(iterator) + epoch_1_list = [sample for sample in it] + epoch_1_list = unpack_tensors(epoch_1_list) + + # Should be same length, should be shuffled, should be + # different shuffles for each epoch + self.assertEqual(len(epoch_0_list), len(epoch_1_list)) + self.assertTrue(epoch_0_list != [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) + self.assertTrue(epoch_0_list != epoch_1_list) + + # Test different shuffle seeds + iterator = dataset.iterator(num_workers=0, current_phase_id=0, shuffle_seed=10) + it = iter(iterator) + epoch_0_seed_10_list = [sample for sample in it] + epoch_0_seed_10_list = unpack_tensors(epoch_0_seed_10_list) + self.assertTrue(epoch_0_seed_10_list != epoch_0_list) + + def test_transform_logic(self): + def _return_1_transform(sample): + return 1 + + dataset = TestDataset(DUMMY_SAMPLES_2, transform=_return_1_transform) + sample = dataset[0] + self.assertEqual(sample, 1) diff --git a/test/dataset_classy_imagenet_test.py b/test/dataset_classy_imagenet_test.py new file mode 100644 index 0000000000..0304a27be7 --- /dev/null +++ b/test/dataset_classy_imagenet_test.py @@ -0,0 +1,74 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import os +import shutil +import tempfile +import unittest + +import PIL +from classy_vision.dataset import build_dataset +from classy_vision.dataset.classy_imagenet import ImageNetDataset +from torchvision import transforms + + +class TestImageNet(unittest.TestCase): + def get_test_image_dataset(self, num_samples): + config = { + "name": "synthetic_image", + "crop_size": 224, + "num_channels": 3, + "seed": 0, + "class_ratio": 0.5, + "num_samples": num_samples, + "batchsize_per_replica": 1, + "use_shuffle": False, + "transforms": [ + { + "name": "apply_transform_to_key", + "transforms": [{"name": "ToTensor"}], + "key": "input", + } + ], + } + dataset = build_dataset(config) + return dataset + + def setUp(self): + # create a base directory to write image files to + self.base_dir = tempfile.mkdtemp() + for split in ["train", "val"]: + os.makedirs(f"{self.base_dir}/{split}/0") + os.makedirs(f"{self.base_dir}/{split}/1") + + def tearDown(self): + # delete all the temporary data created + shutil.rmtree(self.base_dir) + + def test_imagenet_retrieve_sample(self): + num_samples = 10 + for split in ["train", "val"]: + dataloader = self.get_test_image_dataset(num_samples).iterator() + for i, sample in enumerate(dataloader): + input = sample["input"] + target = sample["target"] + image = transforms.ToPILImage()(input.squeeze()) + path = f"{self.base_dir}/{split}/{target.item()}/{i}.png" + # save the image in a lossless format (png) + image.save(path) + + dataset = ImageNetDataset( + split="train", + batchsize_per_replica=1, + shuffle=True, + transform=None, + num_samples=None, + root=self.base_dir, + ) + self.assertEqual(len(dataset), num_samples) + img, target = dataset[0] + self.assertTrue(isinstance(img, PIL.Image.Image)) + self.assertTrue(isinstance(target, int)) diff --git a/test/dataset_classy_video_dataset_test.py b/test/dataset_classy_video_dataset_test.py new file mode 100644 index 0000000000..4d045b8c66 --- /dev/null +++ b/test/dataset_classy_video_dataset_test.py @@ -0,0 +1,154 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import unittest + +import torch +from classy_vision.dataset import build_dataset, register_dataset +from classy_vision.dataset.classy_video_dataset import ( + ClassyVideoDataset, + MaxLengthClipSampler, +) +from classy_vision.dataset.core import ListDataset +from classy_vision.dataset.transforms.util_video import ( + build_video_field_transform_default, +) +from torch.utils.data import Sampler + + +DUMMY_SAMPLES_1 = [ + { + "input": { + "video": torch.randint(0, 256, (8, 3, 128, 128), dtype=torch.uint8), + "audio": torch.rand(1000, 1, dtype=torch.float32), + }, + "target": torch.tensor([[0]]), + } +] + + +DUMMY_CONFIG = { + "name": "test_video_dataset", + "split": "train", + "batchsize_per_replica": 1, + "use_shuffle": True, + "num_samples": 1, + "frames_per_clip": 8, + "video_dir": "dummy_video_dir", +} + + +class MockClipSampler(Sampler): + def __init__(self, full_size=1000): + self.full_size = full_size + + def __iter__(self): + indices = list(range(self.full_size)) + return iter(indices) + + def __len__(self): + return self.full_size + + +@register_dataset("test_video_dataset") +class TestVideoDataset(ClassyVideoDataset): + """Test dataset for validating registry functions""" + + def __init__( + self, + split, + batchsize_per_replica, + shuffle, + transform, + num_samples, + clips_per_video, + samples, + ): + self.samples = samples + input_tensors = [sample["input"] for sample in samples] + target_tensors = [sample["target"] for sample in samples] + dataset = ListDataset(input_tensors, target_tensors, loader=lambda x: x) + super(TestVideoDataset, self).__init__( + dataset, + split, + batchsize_per_replica, + shuffle, + transform, + num_samples, + clips_per_video, + ) + + @classmethod + def from_config(cls, config, samples): + split = config.get("split") + ( + transform_config, + batchsize_per_replica, + shuffle, + num_samples, + frames_per_clip, + video_width, + video_height, + video_min_dimension, + audio_samples, + step_between_clips, + frame_rate, + clips_per_video, + ) = cls.parse_config(config) + transform = build_video_field_transform_default(transform_config, split) + return cls( + split, + batchsize_per_replica, + shuffle, + transform, + num_samples, + clips_per_video, + samples, + ) + + +class TestRegistryFunctions(unittest.TestCase): + """ + Tests functions that use registry + """ + + def test_build_dataset(self): + dataset = build_dataset(DUMMY_CONFIG, DUMMY_SAMPLES_1) + self.assertTrue(isinstance(dataset, TestVideoDataset)) + + +class TestClassyVideoDataset(unittest.TestCase): + """ + Tests member functions of ClassyVideoDataset. + """ + + def setUp(self): + self.dataset = build_dataset(DUMMY_CONFIG, DUMMY_SAMPLES_1) + + def test_parse_config(self): + ( + transform_config, + batchsize_per_replica, + shuffle, + num_samples, + frames_per_clip, + video_width, + video_height, + video_min_dimension, + audio_samples, + step_between_clips, + frame_rate, + clips_per_video, + ) = self.dataset.parse_config(DUMMY_CONFIG) + + def test_max_length_clip_sampler(self): + clip_sampler = MockClipSampler(full_size=1000) + clip_sampler = MaxLengthClipSampler(clip_sampler, num_samples=64) + count = 0 + for _clip_index in iter(clip_sampler): + count += 1 + self.assertEqual(count, 64) + self.assertEqual(len(clip_sampler), 64) diff --git a/test/dataset_image_path_dataset_test.py b/test/dataset_image_path_dataset_test.py new file mode 100644 index 0000000000..2fac897f42 --- /dev/null +++ b/test/dataset_image_path_dataset_test.py @@ -0,0 +1,120 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import os +import shutil +import tempfile +import unittest + +import torch +from classy_vision.dataset import ClassyDataset, build_dataset +from classy_vision.dataset.image_path_dataset import ImagePathDataset +from torchvision import transforms + + +class TestImageDataset(unittest.TestCase): + def get_test_image_dataset(self): + config = { + "name": "synthetic_image", + "crop_size": 224, + "num_channels": 3, + "seed": 0, + "class_ratio": 0.5, + "num_samples": 100, + "batchsize_per_replica": 1, + "use_shuffle": False, + "transforms": [ + { + "name": "apply_transform_to_key", + "transforms": [{"name": "ToTensor"}], + "key": "input", + } + ], + } + dataset = build_dataset(config) + return dataset + + def setUp(self): + # create a base directory to write image files to + self.base_dir = tempfile.mkdtemp() + os.mkdir(f"{self.base_dir}/0") + os.mkdir(f"{self.base_dir}/1") + + def tearDown(self): + # delete all the temporary data created + shutil.rmtree(self.base_dir) + + def get_dataset_config(self): + return { + "batchsize_per_replica": 1, + "use_shuffle": False, + "num_samples": None, + "transforms": [ + { + "name": "apply_transform_to_key", + "transforms": [{"name": "ToTensor"}], + "key": "input", + } + ], + } + + @unittest.skip( + "Skipping test since build_dataset doesn't " + "work right now for ImagePathDataset" + ) + def test_build_dataset(self): + config = self.get_dataset_config() + dataset = build_dataset(config) + self.assertIsInstance(dataset, ClassyDataset) + + def test_image_dataset(self): + image_paths = [] + inputs = [] + targets = [] + dataloader = self.get_test_image_dataset().iterator() + for i, sample in enumerate(dataloader): + input = sample["input"] + target = sample["target"] + image = transforms.ToPILImage()(input.squeeze()) + path = f"{self.base_dir}/{target.item()}/{i}.png" + # save the image in a lossless format (png) + image.save(path) + image_paths.append(path) + inputs.append(input) + targets.append(target) + + # config for the image dataset + config = self.get_dataset_config() + + # create an image dataset from the list of images + dataset = ImagePathDataset.from_config( + config, image_paths=image_paths, targets=targets + ) + dataloader = dataset.iterator() + # the samples should be in the same order + for sample, expected_input, expected_target in zip(dataloader, inputs, targets): + self.assertTrue(torch.allclose(sample["input"], expected_input)) + self.assertEqual(sample["target"], expected_target) + + # test the dataset works without targets as well + dataset = ImagePathDataset.from_config(config, image_paths=image_paths) + dataloader = dataset.iterator() + # the samples should be in the same order + for sample, expected_input in zip(dataloader, inputs): + self.assertTrue(torch.allclose(sample["input"], expected_input)) + + # create an image dataset from the root dir + dataset = ImagePathDataset.from_config(config, image_paths=self.base_dir) + dataloader = dataset.iterator() + # test that we get the same class distribution + # we don't test the actual samples since the ordering isn't defined + counts = [0, 0] + for sample in dataloader: + counts[sample["target"].item()] += 1 + expected_counts = [0, 0] + for target in targets: + expected_counts[target.item()] += 1 + self.assertEqual(counts, expected_counts) diff --git a/test/dataset_transforms_lighting_transform_test.py b/test/dataset_transforms_lighting_transform_test.py new file mode 100644 index 0000000000..c280bc5ccc --- /dev/null +++ b/test/dataset_transforms_lighting_transform_test.py @@ -0,0 +1,35 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import unittest + +from classy_vision.dataset.core.random_image_datasets import ( + RandomImageBinaryClassDataset, +) +from classy_vision.dataset.transforms.util import build_field_transform_default_imagenet + + +class LightingTransformTest(unittest.TestCase): + def get_test_image_dataset(self): + return RandomImageBinaryClassDataset( + crop_size=224, class_ratio=0.5, num_samples=100, seed=0 + ) + + def test_lighting_transform_no_errors(self): + """ + Tests that the lighting transform runs without any errors. + """ + dataset = self.get_test_image_dataset() + + config = [{"name": "ToTensor"}, {"name": "lighting"}] + transform = build_field_transform_default_imagenet(config) + sample = dataset[0] + try: + # test that lighting has been registered and runs without errors + transform(sample) + except Exception: + self.fail("LightingTransform raised an exception") + return diff --git a/test/dataset_transforms_test.py b/test/dataset_transforms_test.py new file mode 100644 index 0000000000..b6317fb813 --- /dev/null +++ b/test/dataset_transforms_test.py @@ -0,0 +1,82 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import unittest + +import torch +import torchvision.transforms as transforms +from classy_vision.dataset.transforms import ( + ClassyTransform, + build_transforms, + register_transform, +) +from classy_vision.dataset.transforms.util import ImagenetNoAugmentTransform + + +@register_transform("resize") +class resize(ClassyTransform): + def __init__(self, size: int): + self.transform = transforms.Resize(size=size) + + def __call__(self, img): + return self.transform(img) + + +@register_transform("center_crop") +class center_crop(ClassyTransform): + def __init__(self, size: int): + self.transform = transforms.CenterCrop(size=size) + + def __call__(self, img): + return self.transform(img) + + +class DatasetTransformsTest(unittest.TestCase): + def get_test_image(self): + return transforms.ToPILImage()(torch.randn((3, 224, 224))) + + def test_transforms(self): + input = self.get_test_image() + + # reference transform which we will use to validate the built transforms + reference_transform = ImagenetNoAugmentTransform() + reference_output = reference_transform(input) + + # test a registered transform + config = [{"name": "imagenet_no_augment"}] + transform = build_transforms(config) + output = transform(input) + self.assertTrue(torch.allclose(output, reference_output)) + + # test a transform built using torchvision transforms + config = [ + {"name": "Resize", "size": 256}, + {"name": "CenterCrop", "size": 224}, + {"name": "ToTensor"}, + { + "name": "Normalize", + "mean": [0.485, 0.456, 0.406], + "std": [0.229, 0.224, 0.225], + }, + ] + transform = build_transforms(config) + output = transform(input) + self.assertTrue(torch.allclose(output, reference_output)) + + # test a combination of registered and torchvision transforms + config = [ + {"name": "resize", "size": 256}, + {"name": "center_crop", "size": 224}, + {"name": "ToTensor"}, + { + "name": "Normalize", + "mean": [0.485, 0.456, 0.406], + "std": [0.229, 0.224, 0.225], + }, + ] + transform = build_transforms(config) + output = transform(input) + self.assertTrue(torch.allclose(output, reference_output)) diff --git a/test/dataset_transforms_util_test.py b/test/dataset_transforms_util_test.py new file mode 100644 index 0000000000..1f2d21061b --- /dev/null +++ b/test/dataset_transforms_util_test.py @@ -0,0 +1,161 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import copy +import random +import unittest + +import numpy +import torch +import torchvision.transforms as transforms +from classy_vision.dataset.core.random_image_datasets import ( + RandomImageBinaryClassDataset, + SampleType, +) +from classy_vision.dataset.transforms import build_transforms +from classy_vision.dataset.transforms.util import ( + GenericImageTransform, + ImagenetAugmentTransform, + ImagenetNoAugmentTransform, + build_field_transform_default_imagenet, +) + + +class DatasetTransformsUtilTest(unittest.TestCase): + def get_test_image_dataset(self, sample_type): + return RandomImageBinaryClassDataset( + crop_size=224, + class_ratio=0.5, + num_samples=100, + seed=0, + sample_type=sample_type, + ) + + def transform_checks( + self, sample, transform, expected_transform, key, transformed_key=None + ): + # If transformed key is None, then use key + transformed_key = transformed_key if transformed_key is not None else key + input_image = copy.deepcopy(sample[key]) + + torch.manual_seed(0) + numpy.random.seed(0) + random.seed(0) + output_image = transform(sample)[transformed_key] + + torch.manual_seed(0) + numpy.random.seed(0) + random.seed(0) + self.assertTrue(torch.allclose(output_image, expected_transform(input_image))) + + def test_build_dict_field_transform_default_imagenet(self): + dataset = self.get_test_image_dataset(SampleType.DICT) + + # should apply the transform in the config + config = [{"name": "ToTensor"}] + default_transform = transforms.Compose( + [transforms.CenterCrop(100), transforms.ToTensor()] + ) + transform = build_field_transform_default_imagenet( + config, default_transform=default_transform + ) + sample = dataset[0] + self.transform_checks(sample, transform, transforms.ToTensor(), "input") + + # should apply default_transform + config = None + transform = build_field_transform_default_imagenet( + config, default_transform=default_transform + ) + sample = dataset[0] + self.transform_checks(sample, transform, default_transform, "input") + + # should apply the transform for a test split + transform = build_field_transform_default_imagenet(config, split="test") + sample = dataset[0] + self.transform_checks(sample, transform, ImagenetNoAugmentTransform(), "input") + + def test_build_tuple_field_transform_default_imagenet(self): + dataset = self.get_test_image_dataset(SampleType.TUPLE) + + # should apply the transform in the config + config = [{"name": "ToTensor"}] + default_transform = transforms.Compose( + [transforms.CenterCrop(100), transforms.ToTensor()] + ) + transform = build_field_transform_default_imagenet( + config, default_transform=default_transform, key=0, key_map_transform=None + ) + sample = dataset[0] + self.transform_checks(sample, transform, transforms.ToTensor(), 0) + + # should apply default_transform + config = None + transform = build_field_transform_default_imagenet( + config, default_transform=default_transform, key=0, key_map_transform=None + ) + sample = dataset[0] + self.transform_checks(sample, transform, default_transform, 0) + + # should apply the transform for a test split + transform = build_field_transform_default_imagenet( + config, split="test", key=0, key_map_transform=None + ) + sample = dataset[0] + self.transform_checks(sample, transform, ImagenetNoAugmentTransform(), 0) + + def test_apply_transform_to_key_from_config(self): + dataset = self.get_test_image_dataset(SampleType.DICT) + + config = [ + { + "name": "apply_transform_to_key", + "transforms": [{"name": "ToTensor"}], + "key": "input", + } + ] + transform = build_transforms(config) + sample = dataset[0] + self.transform_checks(sample, transform, transforms.ToTensor(), "input") + + def test_generic_image_transform(self): + dataset = self.get_test_image_dataset(SampleType.TUPLE) + + # Check class constructor + transform = GenericImageTransform(transform=transforms.ToTensor()) + sample = dataset[0] + self.transform_checks(sample, transform, transforms.ToTensor(), 0, "input") + + transform = GenericImageTransform(split="train") + sample = dataset[0] + self.transform_checks(sample, transform, ImagenetAugmentTransform(), 0, "input") + + transform = GenericImageTransform(split="test") + sample = dataset[0] + self.transform_checks( + sample, transform, ImagenetNoAugmentTransform(), 0, "input" + ) + + # Check from_config constructor / registry + config = [ + {"name": "generic_image_transform", "transforms": [{"name": "ToTensor"}]} + ] + transform = build_transforms(config) + sample = dataset[0] + self.transform_checks(sample, transform, transforms.ToTensor(), 0, "input") + + # Check with Imagenet defaults + config = [{"name": "generic_image_transform", "split": "train"}] + transform = build_transforms(config) + sample = dataset[0] + self.transform_checks(sample, transform, ImagenetAugmentTransform(), 0, "input") + + config = [{"name": "generic_image_transform", "split": "test"}] + transform = build_transforms(config) + sample = dataset[0] + self.transform_checks( + sample, transform, ImagenetNoAugmentTransform(), 0, "input" + ) diff --git a/test/dataset_transforms_util_video_test.py b/test/dataset_transforms_util_video_test.py new file mode 100644 index 0000000000..4c5f7d18c9 --- /dev/null +++ b/test/dataset_transforms_util_video_test.py @@ -0,0 +1,137 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import copy +import unittest + +import torch +import torchvision.transforms._transforms_video as transforms_video +from classy_vision.dataset.core.random_video_datasets import RandomVideoDataset +from classy_vision.dataset.transforms.util_video import ( + VideoConstants, + build_video_field_transform_default, +) + + +class DatasetTransformUtilVideoTest(unittest.TestCase): + def get_test_video_dataset(self): + self.num_classes = 10 + self.split = "train" + self.num_samples = 100 + self.frames_per_clip = 32 + self.video_width = 320 + self.video_height = 256 + self.audio_samples = 44000 + self.clips_per_video = 1 + self.seed = 1 + + dataset = RandomVideoDataset( + self.num_classes, + self.split, + self.num_samples, + self.frames_per_clip, + self.video_width, + self.video_height, + self.audio_samples, + self.clips_per_video, + self.seed, + ) + return dataset + + def test_build_field_transform_default_video(self): + dataset = self.get_test_video_dataset() + + # transform config is not provided. Use default transforms + config = None + # default training data transform + sample = dataset[0] + + transform = build_video_field_transform_default(config, "train") + output_clip = transform(sample)["input"]["video"] + self.assertEqual( + output_clip.size(), + torch.Size( + ( + 3, + self.frames_per_clip, + VideoConstants.CROP_SIZE, + VideoConstants.CROP_SIZE, + ) + ), + ) + # default testing data transform + sample = dataset[1] + sample_copy = copy.deepcopy(sample) + + expected_output_clip = transforms_video.ToTensorVideo()( + sample["input"]["video"] + ) + expected_output_clip = transforms_video.CenterCropVideo( + VideoConstants.CROP_SIZE + )(expected_output_clip) + expected_output_clip = transforms_video.NormalizeVideo( + mean=VideoConstants.MEAN, std=VideoConstants.STD + )(expected_output_clip) + + transform = build_video_field_transform_default(config, "test") + output_clip = transform(sample_copy)["input"]["video"] + + rescaled_width = int( + VideoConstants.SIZE_RANGE[0] * self.video_width / self.video_height + ) + self.assertEqual( + output_clip.size(), + torch.Size( + (3, self.frames_per_clip, VideoConstants.SIZE_RANGE[0], rescaled_width) + ), + ) + # transform config is provided. Simulate training config + sample = dataset[2] + config = { + "video": [ + {"name": "ToTensorVideo"}, + { + "name": "video_clip_random_resize_crop", + "crop_size": 64, + "size_range": [256, 320], + }, + {"name": "RandomHorizontalFlipVideo"}, + { + "name": "NormalizeVideo", + "mean": [0.485, 0.456, 0.406], + "std": [0.229, 0.224, 0.225], + }, + ] + } + transform = build_video_field_transform_default(config, "train") + output_clip = transform(sample)["input"]["video"] + self.assertEqual( + output_clip.size(), torch.Size((3, self.frames_per_clip, 64, 64)) + ) + self.assertTrue(output_clip.dtype == torch.float) + + # transform config is provided. Simulate testing config + sample = dataset[3] + config = { + "video": [ + {"name": "ToTensorVideo"}, + {"name": "video_clip_resize", "size": 64}, + { + "name": "NormalizeVideo", + "mean": [0.485, 0.456, 0.406], + "std": [0.229, 0.224, 0.225], + }, + ] + } + transform = build_video_field_transform_default(config, "train") + output_clip = transform(sample)["input"]["video"] + + rescaled_width = int(64 * self.video_width / self.video_height) + self.assertEqual( + output_clip.size(), + torch.Size((3, self.frames_per_clip, 64, rescaled_width)), + ) + self.assertTrue(output_clip.dtype == torch.float) diff --git a/test/generic/__init__.py b/test/generic/__init__.py new file mode 100644 index 0000000000..734a1eb4e2 --- /dev/null +++ b/test/generic/__init__.py @@ -0,0 +1,5 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. diff --git a/test/generic/config_utils.py b/test/generic/config_utils.py new file mode 100644 index 0000000000..42e8aac669 --- /dev/null +++ b/test/generic/config_utils.py @@ -0,0 +1,369 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from classy_vision.tasks import build_task + + +def get_test_task_config(head_num_classes=1000): + return { + "name": "classification_task", + "num_epochs": 12, + "loss": {"name": "CrossEntropyLoss"}, + "dataset": { + "train": { + "name": "synthetic_image", + "split": "train", + "crop_size": 224, + "class_ratio": 0.5, + "num_samples": 2000, + "seed": 0, + "batchsize_per_replica": 32, + "use_shuffle": True, + "transforms": [ + { + "name": "apply_transform_to_key", + "transforms": [ + {"name": "ToTensor"}, + { + "name": "Normalize", + "mean": [0.485, 0.456, 0.406], + "std": [0.229, 0.224, 0.225], + }, + ], + "key": "input", + } + ], + }, + "test": { + "name": "synthetic_image", + "split": "test", + "crop_size": 224, + "class_ratio": 0.5, + "num_samples": 2000, + "seed": 0, + "batchsize_per_replica": 32, + "use_shuffle": False, + "transforms": [ + { + "name": "apply_transform_to_key", + "transforms": [ + {"name": "ToTensor"}, + { + "name": "Normalize", + "mean": [0.485, 0.456, 0.406], + "std": [0.229, 0.224, 0.225], + }, + ], + "key": "input", + } + ], + }, + }, + "meters": {"accuracy": {"topk": [1, 5]}}, + "model": { + "name": "resnet", + "num_blocks": [3, 4, 6, 3], + "small_input": False, + "zero_init_bn_residuals": True, + "heads": [ + { + "name": "fully_connected", + "unique_id": "default_head", + "num_classes": head_num_classes, + "fork_block": "block3-2", + "in_plane": 2048, + } + ], + }, + "optimizer": { + "name": "sgd", + "num_epochs": 12, + "lr": {"name": "step", "values": [0.1, 0.01]}, + "weight_decay": 1e-4, + "momentum": 0.9, + }, + } + + +def get_fast_test_task_config(head_num_classes=1000): + return { + "name": "classification_task", + "num_epochs": 1, + "loss": {"name": "CrossEntropyLoss"}, + "dataset": { + "train": { + "name": "synthetic_image", + "split": "train", + "crop_size": 20, + "class_ratio": 0.5, + "num_samples": 10, + "seed": 0, + "batchsize_per_replica": 2, + "use_shuffle": False, + "transforms": [ + { + "name": "apply_transform_to_key", + "transforms": [ + {"name": "ToTensor"}, + { + "name": "Normalize", + "mean": [0.485, 0.456, 0.406], + "std": [0.229, 0.224, 0.225], + }, + ], + "key": "input", + } + ], + }, + "test": { + "name": "synthetic_image", + "split": "test", + "crop_size": 20, + "class_ratio": 0.5, + "num_samples": 10, + "seed": 0, + "batchsize_per_replica": 2, + "use_shuffle": False, + "transforms": [ + { + "name": "apply_transform_to_key", + "transforms": [ + {"name": "ToTensor"}, + { + "name": "Normalize", + "mean": [0.485, 0.456, 0.406], + "std": [0.229, 0.224, 0.225], + }, + ], + "key": "input", + } + ], + }, + }, + "model": { + "name": "resnet", + "num_blocks": [1], + "small_input": False, + "zero_init_bn_residuals": True, + "heads": [ + { + "name": "fully_connected", + "unique_id": "default_head", + "num_classes": head_num_classes, + "fork_block": "block0-0", + "in_plane": 256, + } + ], + }, + "meters": {"accuracy": {"topk": [1]}}, + "optimizer": {"name": "sgd", "lr": 0.01, "weight_decay": 1e-4, "momentum": 0.9}, + } + + +def get_test_classy_task(): + config = get_test_task_config() + task = build_task(config) + return task + + +def get_test_mlp_task_config(): + return { + "name": "classification_task", + "num_epochs": 10, + "loss": {"name": "CrossEntropyLoss"}, + "dataset": { + "train": { + "name": "synthetic_image", + "split": "train", + "num_classes": 2, + "crop_size": 20, + "class_ratio": 0.5, + "num_samples": 10, + "seed": 0, + "batchsize_per_replica": 3, + "use_augmentation": False, + "use_shuffle": True, + "transforms": [ + { + "name": "apply_transform_to_key", + "transforms": [ + {"name": "ToTensor"}, + { + "name": "Normalize", + "mean": [0.485, 0.456, 0.406], + "std": [0.229, 0.224, 0.225], + }, + ], + "key": "input", + } + ], + }, + "test": { + "name": "synthetic_image", + "split": "test", + "num_classes": 2, + "crop_size": 20, + "class_ratio": 0.5, + "num_samples": 10, + "seed": 0, + "batchsize_per_replica": 1, + "use_augmentation": False, + "use_shuffle": False, + "transforms": [ + { + "name": "apply_transform_to_key", + "transforms": [ + {"name": "ToTensor"}, + { + "name": "Normalize", + "mean": [0.485, 0.456, 0.406], + "std": [0.229, 0.224, 0.225], + }, + ], + "key": "input", + } + ], + }, + }, + "model": { + "name": "mlp", + # 3x20x20 = 1200 + "input_dim": 1200, + "output_dim": 1000, + "hidden_dims": [10], + }, + "meters": {"accuracy": {"topk": [1]}}, + "optimizer": { + "name": "sgd", + "num_epochs": 10, + "lr": 0.1, + "weight_decay": 1e-4, + "momentum": 0.9, + }, + } + + +def get_test_model_configs(): + return [ + # resnet 101 + { + "name": "resnet", + "num_blocks": [3, 4, 6, 3], + "small_input": False, + "heads": [ + { + "name": "fully_connected", + "unique_id": "default_head", + "num_classes": 1000, + "fork_block": "block3-2", + "in_plane": 2048, + } + ], + }, + # resnext 101 32-4d + { + "name": "resnext", + "num_blocks": [3, 4, 6, 3], + "base_width_and_cardinality": [4, 32], + "small_input": False, + "heads": [ + { + "name": "fully_connected", + "unique_id": "default_head", + "num_classes": 1000, + "fork_block": "block3-2", + "in_plane": 2048, + } + ], + }, + ] + + +def get_test_video_task_config(): + return { + "name": "classification_task", + "num_epochs": 27, + "loss": {"name": "CrossEntropyLoss"}, + "dataset": { + "train": { + "name": "synthetic_video", + "split": "train", + "batchsize_per_replica": 8, + "use_shuffle": True, + "num_samples": 128, + "frames_per_clip": 8, + "video_height": 128, + "video_width": 160, + "num_classes": 50, + "clips_per_video": 1, + }, + "test": { + "name": "synthetic_video", + "split": "test", + "batchsize_per_replica": 10, + "use_shuffle": False, + "num_samples": 40, + "frames_per_clip": 8, + "video_height": 128, + "video_width": 160, + "num_classes": 50, + "clips_per_video": 10, + }, + }, + "meters": {"accuracy": {"topk": [1, 5]}}, + "model": { + "name": "resnext3d", + "frames_per_clip": 8, + "input_planes": 3, + "clip_crop_size": 224, + "skip_transformation_type": "postactivated_shortcut", + "residual_transformation_type": "postactivated_bottleneck_transformation", + "num_blocks": [3, 4, 6, 3], + "input_key": "video", + "stem_name": "resnext3d_stem", + "stem_planes": 64, + "stem_temporal_kernel": 5, + "stem_spatial_kernel": 7, + "stem_maxpool": True, + "stage_planes": 64, + "stage_temporal_kernel_basis": [[3], [3, 1], [3, 1], [1, 3]], + "temporal_conv_1x1": [True, True, True, True], + "stage_temporal_stride": [1, 1, 1, 1], + "stage_spatial_stride": [1, 2, 2, 2], + "num_groups": 1, + "width_per_group": 64, + "num_classes": 50, + "heads": [ + { + "name": "fully_convolutional_linear", + "unique_id": "default_head", + "pool_size": [8, 7, 7], + "activation_func": "softmax", + "num_classes": 50, + "fork_block": "pathway0-stage4-block2", + "in_plane": 512, + "use_dropout": True, + } + ], + }, + "optimizer": { + "name": "sgd", + "lr": { + "name": "multistep", + "num_epochs": 10, + "values": [0.1, 0.01, 0.001, 0.0001], + "milestones": [3, 7, 9], + }, + "weight_decay": 0.0001, + "momentum": 0.9, + }, + } + + +def get_test_classy_video_task(): + config = get_test_video_task_config() + task = build_task(config) + return task diff --git a/test/generic/merge_dataset.py b/test/generic/merge_dataset.py new file mode 100644 index 0000000000..7f70ab793f --- /dev/null +++ b/test/generic/merge_dataset.py @@ -0,0 +1,55 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + + +class MergeDataset: + """ + Dataset that merges samples from multiple datasets into single sample. + + If datasets have distinct keys, then we merge dicts, e.g. + + dataset1[idx] = {'input': input_tensor} + dataset2[idx] = {'target': target_tensor} + merged_dataset[idx] = {'input': input_tensor, 'target': target_tensor} + + If datasets have matching keys then we create a list and append, e.g. + + dataset1[idx] = {'input': input_tensor1} + dataset2[idx] = {'input': input_tensor2} + merged_dataset[idx] = {'input': [input_tensor1, input_tensor2]} + + Note, if your datasets' samples do not have consistent keys for each sample, + this could lead to inconsistent samples merged samples. + """ + + def __init__(self, datasets): + + # assertions: + assert isinstance(datasets, list) + assert all(len(dataset) == len(datasets[0]) for dataset in datasets) + + # create object: + super(MergeDataset, self).__init__() + self.datasets = datasets + + def __getitem__(self, idx): + final_sample = {} + for dataset in self.datasets: + curr_sample = dataset[idx] + assert isinstance(curr_sample, dict), "Merge dataset only supports dicts" + for key in curr_sample.keys(): + # If keys are distinct, then + if key not in final_sample: + final_sample[key] = curr_sample[key] + elif not isinstance(final_sample[key], list): + final_sample[key] = [final_sample[key], curr_sample[key]] + else: + final_sample[key].append(curr_sample[key]) + + return final_sample + + def __len__(self): + return len(self.datasets[0]) diff --git a/test/generic/meter_test_utils.py b/test/generic/meter_test_utils.py new file mode 100644 index 0000000000..7d489f8df4 --- /dev/null +++ b/test/generic/meter_test_utils.py @@ -0,0 +1,288 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import multiprocessing +import queue +import tempfile +import unittest + +import torch + + +UPDATE_SIGNAL = 0 +VALUE_SIGNAL = 1 +SHUTDOWN_SIGNAL = 2 +TIMEOUT = 100 + + +def _get_value_or_raise_error(qout, qerr): + try: + return qout.get(timeout=TIMEOUT) + except queue.Empty: + raise qerr.get(timeout=TIMEOUT) + + +def _run(qin, qout, qerr, func, *args): + try: + func(qin, qout, *args) + except Exception as e: + print(e) + qerr.put(e) + + +def _meter_worker(qin, qout, meter, is_train, world_size, rank, filename): + backend = "gloo" + torch.distributed.init_process_group( + backend=backend, + init_method="file://{filename}".format(filename=filename), + world_size=world_size, + rank=rank, + ) + # Listen for commands on queues + while True: + try: + signal, val = qin.get() + except queue.Empty: + continue + + if signal == UPDATE_SIGNAL: + meter.update(val[0], val[1], is_train=is_train) + + elif signal == VALUE_SIGNAL: + meter.sync_state() + qout.put(meter.value) + + elif signal == SHUTDOWN_SIGNAL: + break + + else: + raise NotImplementedError("Bad signal value") + + return + + +class ClassificationMeterTest(unittest.TestCase): + def setUp(self): + self.mp = multiprocessing.get_context("spawn") + self.processes = [] + + def tearDown(self): + for p in self.processes: + p.terminate() + + def _spawn(self, func, *args): + name = "process #%d" % len(self.processes) + qin = self.mp.Queue() + qout = self.mp.Queue() + qerr = self.mp.Queue() + qio = (qin, qout, qerr) + args = qio + (func,) + args + process = self.mp.Process(target=_run, name=name, args=args, daemon=True) + process.start() + self.processes.append(process) + return qio + + def _apply_updates_and_test_meter( + self, meter, model_output, target, expected_value, **kwargs + ): + """ + Runs a valid meter test. Does not reset meter before / after running + """ + if not isinstance(model_output, list): + model_output = [model_output] + + if not isinstance(target, list): + target = [target] + + for i in range(len(model_output)): + meter.update(model_output[i], target[i], **kwargs) + + meter.sync_state() + meter_value = meter.value + for key, val in expected_value.items(): + self.assertTrue( + key in meter_value, msg="{0} not in meter value!".format(key) + ) + self.assertAlmostEqual( + meter_value[key], + val, + places=4, + msg="{0} meter value mismatch!".format(key), + ) + + def _values_match_expected_value(self, value0, value1, expected_value): + for key, val in expected_value.items(): + self.assertTrue(key in value0, msg="{0} not in meter value!".format(key)) + self.assertAlmostEqual( + value0[key], val, places=4, msg="{0} meter value mismatch!".format(key) + ) + self.assertTrue(key in value1, msg="{0} not in meter value!".format(key)) + self.assertAlmostEqual( + value1[key], val, places=4, msg="{0} meter value mismatch!".format(key) + ) + + def meter_update_and_reset_test( + self, meter, model_outputs, targets, expected_value, **kwargs + ): + """ + This test verifies that a single update on the meter is successful, + resets the meter, then applies the update again. + """ + # If a single output is provided, wrap in list + if not isinstance(model_outputs, list): + model_outputs = [model_outputs] + targets = [targets] + + for i in range(len(model_outputs)): + meter.validate(model_outputs[i].size(), targets[i].size()) + + self._apply_updates_and_test_meter( + meter, model_outputs, targets, expected_value, **kwargs + ) + + meter.reset() + + # Verify reset works by reusing single update test + self._apply_updates_and_test_meter( + meter, model_outputs, targets, expected_value, **kwargs + ) + + def meter_invalid_meter_input_test(self, meter, model_output, target): + # Invalid model + with self.assertRaises(AssertionError): + meter.validate(model_output.shape, target.shape) + + def meter_invalid_update_test(self, meter, model_output, target, **kwargs): + """ + Runs a valid meter test. Does not reset meter before / after running + """ + if not isinstance(model_output, list): + model_output = [model_output] + + if not isinstance(target, list): + target = [target] + + with self.assertRaises(AssertionError): + for i in range(len(model_output)): + meter.update(model_output[i], target[i], **kwargs) + + def meter_get_set_classy_state_test( + self, meters, model_outputs, targets, expected_value, **kwargs + ): + """ + Tests get and set classy state methods of meter. + """ + assert len(meters) == 2, "Incorrect number of meters passed to test" + assert ( + len(model_outputs) == 2 + ), "Incorrect number of model_outputs passed to test" + assert len(targets) == 2, "Incorrect number of targets passed to test" + meter0 = meters[0] + meter1 = meters[1] + + meter0.update(model_outputs[0], targets[0], **kwargs) + meter1.update(model_outputs[1], targets[1], **kwargs) + + meter0.sync_state() + value0 = meter0.value + + meter1.sync_state() + value1 = meter1.value + for key, val in value0.items(): + self.assertNotEqual( + value1[key], val, msg="{0} meter values should not be same!".format(key) + ) + + meter0.set_classy_state(meter1.get_classy_state()) + value0 = meter0.value + for key, val in value0.items(): + self.assertAlmostEqual( + value1[key], + val, + places=4, + msg="{0} meter value mismatch after state transfer!".format(key), + ) + self.assertAlmostEqual( + value1[key], + expected_value[key], + places=4, + msg="{0} meter value mismatch from ground truth!".format(key), + ) + + def _spawn_all_meter_workers(self, world_size, meters, is_train): + filename = tempfile.NamedTemporaryFile(delete=True).name + qins = [] + qerrs = [] + qouts = [] + + for i in range(world_size): + qin, qout, qerr = self._spawn( + _meter_worker, meters[i], is_train, world_size, i, filename + ) + qins.append(qin) + qouts.append(qout) + qerrs.append(qerr) + + return qins, qouts, qerrs + + def meter_distributed_test( + self, meters, model_outputs, targets, expected_values, is_train=False + ): + """ + Sets up two processes each with a given meter on that process. + Verifies that sync code path works. + """ + world_size = len(meters) + assert world_size == 2, "This test only works for world_size of 2" + assert len(model_outputs) == 4, ( + "Test assumes 4 model outputs, " + "0, 2 passed to meter0 and 1, 3 passed to meter1" + ) + assert ( + len(targets) == 4 + ), "Test assumes 4 targets, 0, 2 passed to meter0 and 1, 3 passed to meter1" + assert len(expected_values) == 2, ( + "Test assumes 2 expected values, " + "first is result of applying updates 0,1 to the meter, " + "second is result of applying all 4 updates to meter" + ) + + qins, qouts, qerrs = self._spawn_all_meter_workers( + world_size, meters, is_train=is_train + ) + + # First update each meter, then get value from each meter + qins[0].put_nowait((UPDATE_SIGNAL, (model_outputs[0], targets[0]))) + qins[1].put_nowait((UPDATE_SIGNAL, (model_outputs[1], targets[1]))) + + qins[0].put_nowait((VALUE_SIGNAL, None)) + qins[1].put_nowait((VALUE_SIGNAL, None)) + + value0 = _get_value_or_raise_error(qouts[0], qerrs[0]) + value1 = _get_value_or_raise_error(qouts[1], qerrs[1]) + self._values_match_expected_value(value0, value1, expected_values[0]) + + # Verify that calling value again does not break things + qins[0].put_nowait((VALUE_SIGNAL, None)) + qins[1].put_nowait((VALUE_SIGNAL, None)) + + value0 = _get_value_or_raise_error(qouts[0], qerrs[0]) + value1 = _get_value_or_raise_error(qouts[1], qerrs[1]) + self._values_match_expected_value(value0, value1, expected_values[0]) + + # Second, update each meter, then get value from each meter + qins[0].put_nowait((UPDATE_SIGNAL, (model_outputs[2], targets[2]))) + qins[1].put_nowait((UPDATE_SIGNAL, (model_outputs[3], targets[3]))) + + qins[0].put_nowait((VALUE_SIGNAL, None)) + qins[1].put_nowait((VALUE_SIGNAL, None)) + + value0 = _get_value_or_raise_error(qouts[0], qerrs[0]) + value1 = _get_value_or_raise_error(qouts[1], qerrs[1]) + self._values_match_expected_value(value0, value1, expected_values[1]) + + qins[0].put_nowait((SHUTDOWN_SIGNAL, None)) + qins[1].put_nowait((SHUTDOWN_SIGNAL, None)) diff --git a/test/generic/optim_test_util.py b/test/generic/optim_test_util.py new file mode 100644 index 0000000000..998c57dcf6 --- /dev/null +++ b/test/generic/optim_test_util.py @@ -0,0 +1,223 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from abc import ABC, abstractmethod +from unittest.mock import MagicMock + +import torch +from classy_vision.models import ClassyModel +from classy_vision.optim import build_optimizer + + +class TestOptimizer(ABC): + @abstractmethod + def _get_config(self): + pass + + @abstractmethod + def _instance_to_test(self): + pass + + def _get_optimizer_params(self): + return { + "regularized_params": [ + torch.tensor([[1.0, 2.0]], requires_grad=True), + torch.tensor([[3.0, 4.0]], requires_grad=True), + ], + "unregularized_params": [torch.tensor([[1.0, 2.0]], requires_grad=True)], + } + + def _get_mock_classy_vision_model(self, trainable_params=True): + mock_classy_vision_model = ClassyModel() + + if trainable_params: + mock_classy_vision_model.get_optimizer_params = MagicMock( + return_value=self._get_optimizer_params() + ) + mock_classy_vision_model.parameters = MagicMock( + return_value=self._get_optimizer_params()["regularized_params"] + + self._get_optimizer_params()["unregularized_params"] + ) + else: + mock_classy_vision_model.get_optimizer_params = MagicMock( + return_value={"regularized_params": [], "unregularized_params": []} + ) + mock_classy_vision_model.parameters = MagicMock( + return_value=[ + param.detach() + for param in self._get_optimizer_params()["regularized_params"] + + self._get_optimizer_params()["unregularized_params"] + ] + ) + + return mock_classy_vision_model + + def _set_gradient(self, params, grad_values=None): + if grad_values is None: + grad_values = [0.1, 0.1] + for i in range(len(params)): + params[i].grad = torch.tensor([grad_values]) + + def _set_model_gradient(self, model, grad_values=None): + for param_type in ["regularized_params", "unregularized_params"]: + self._set_gradient(model.get_optimizer_params()[param_type], grad_values) + + def _compare_momentum_values(self, optim1, optim2): + self.assertEqual(len(optim1["param_groups"]), len(optim2["param_groups"])) + + for i in range(len(optim1["param_groups"])): + self.assertEqual( + len(optim1["param_groups"][i]["params"]), + len(optim2["param_groups"][i]["params"]), + ) + + for j in range(len(optim1["param_groups"][i]["params"])): + id1 = optim1["param_groups"][i]["params"][j] + id2 = optim2["param_groups"][i]["params"][j] + self.assertTrue( + torch.allclose( + optim1["state"][id1]["momentum_buffer"], + optim2["state"][id2]["momentum_buffer"], + ) + ) + + def _get_set_state(self, grad_values): + config = self._get_config() + + mock_classy_vision_model = self._get_mock_classy_vision_model() + opt1 = build_optimizer(config) + opt1.init_pytorch_optimizer(mock_classy_vision_model) + + self._set_model_gradient(mock_classy_vision_model, grad_values) + opt1.step() + state = opt1.get_classy_state() + + config["lr"] += 0.1 + opt2 = build_optimizer(config) + opt2.init_pytorch_optimizer(mock_classy_vision_model) + self.assertTrue(isinstance(opt1, self._instance_to_test())) + opt2.set_classy_state(state) + self.assertEqual(opt1.parameters, opt2.parameters) + for i in range(len(opt1.optimizer.param_groups[0]["params"])): + self.assertTrue( + torch.allclose( + opt1.optimizer.param_groups[0]["params"][i], + opt2.optimizer.param_groups[0]["params"][i], + ) + ) + self._compare_momentum_values( + opt1.get_classy_state()["optim"], opt2.get_classy_state()["optim"] + ) + + # check if the optimizers behave the same on params update + mock_classy_vision_model1 = self._get_mock_classy_vision_model() + mock_classy_vision_model2 = self._get_mock_classy_vision_model() + self._set_model_gradient(mock_classy_vision_model1, grad_values) + self._set_model_gradient(mock_classy_vision_model2, grad_values) + opt1 = build_optimizer(config) + opt1.init_pytorch_optimizer(mock_classy_vision_model1) + opt2 = build_optimizer(config) + opt2.init_pytorch_optimizer(mock_classy_vision_model2) + opt1.step() + opt2.step() + for i in range(len(opt1.optimizer.param_groups[0]["params"])): + print(opt1.optimizer.param_groups[0]["params"][i]) + self.assertTrue( + torch.allclose( + opt1.optimizer.param_groups[0]["params"][i], + opt2.optimizer.param_groups[0]["params"][i], + ) + ) + self._compare_momentum_values( + opt1.get_classy_state()["optim"], opt2.get_classy_state()["optim"] + ) + + def test_build_sgd(self): + config = self._get_config() + mock_classy_vision_model = self._get_mock_classy_vision_model( + trainable_params=True + ) + opt = build_optimizer(config) + opt.init_pytorch_optimizer(mock_classy_vision_model) + self.assertTrue(isinstance(opt, self._instance_to_test())) + + def test_raise_error_on_non_trainable_params(self): + # Test Raise ValueError if there are no trainable params in the model. + config = self._get_config() + with self.assertRaises(ValueError): + opt = build_optimizer(config) + opt.init_pytorch_optimizer( + self._get_mock_classy_vision_model(trainable_params=False) + ) + + def test_get_set_state(self): + for grad_values in [[0.1, 0.1], [-0.1, -0.1], [0.0, 0.0], [0.1, -0.1]]: + self._get_set_state(grad_values) + + def test_set_invalid_state(self): + config = self._get_config() + mock_classy_vision_model = self._get_mock_classy_vision_model() + opt = build_optimizer(config) + opt.init_pytorch_optimizer(mock_classy_vision_model) + self.assertTrue(isinstance(opt, self._instance_to_test())) + + with self.assertRaises(KeyError): + opt.set_classy_state({}) + + def test_lr_schedule(self): + config = self._get_config() + + mock_classy_vision_model = self._get_mock_classy_vision_model() + opt = build_optimizer(config) + opt.init_pytorch_optimizer(mock_classy_vision_model) + + # Test initial learning rate + for group in opt.optimizer.param_groups: + self.assertEqual(group["lr"], 0.1) + + def _test_lr_schedule(optimizer, num_epochs, epochs, targets): + for i in range(len(epochs)): + epoch = epochs[i] + target = targets[i] + param_groups = optimizer.optimizer.param_groups.copy() + optimizer.update_schedule_on_epoch(epoch / num_epochs) + for idx, group in enumerate(optimizer.optimizer.param_groups): + self.assertEqual(group["lr"], target) + # Make sure all but LR is same + param_groups[idx]["lr"] = target + self.assertEqual(param_groups[idx], group) + + # Test constant learning schedule + num_epochs = 90 + epochs = [0, 0.025, 0.05, 0.1, 0.5, 1, 15, 29, 30, 31, 59, 60, 61, 88, 89] + targets = [0.1] * 15 + _test_lr_schedule(opt, num_epochs, epochs, targets) + + # Test step learning schedule + config["lr"] = {"name": "step", "values": [0.1, 0.01, 0.001]} + opt = build_optimizer(config) + opt.init_pytorch_optimizer(mock_classy_vision_model) + targets = [0.1] * 8 + [0.01] * 3 + [0.001] * 4 + _test_lr_schedule(opt, num_epochs, epochs, targets) + + # Test step learning schedule with warmup + init_lr = 0.01 + warmup_epochs = 0.1 + config["lr"] = { + "name": "composite", + "schedulers": [ + {"name": "linear", "start_lr": init_lr, "end_lr": 0.1}, + {"name": "step", "values": [0.1, 0.01, 0.001]}, + ], + "update_interval": "epoch", + "interval_scaling": ["rescaled", "fixed"], + "lengths": [warmup_epochs / num_epochs, 1 - warmup_epochs / num_epochs], + } + + opt = build_optimizer(config) + opt.init_pytorch_optimizer(mock_classy_vision_model) + targets = [0.01, 0.0325, 0.055] + [0.1] * 5 + [0.01] * 3 + [0.001] * 4 + _test_lr_schedule(opt, num_epochs, epochs, targets) diff --git a/test/generic/utils.py b/test/generic/utils.py new file mode 100644 index 0000000000..0d04aa3edd --- /dev/null +++ b/test/generic/utils.py @@ -0,0 +1,265 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import os +from functools import wraps + +import torch + + +class Arguments(object): + """Object that looks like input arguments. Used to spoof argparse namespace.""" + + def __init__(self, **args): + self.args = args + self.__dict__.update(args) + + def __iter__(self): + return iter(self.args) + + def __eq__(self, other): + if isinstance(other, Arguments): + return self.args == other.args + else: + return NotImplemented + + def _asdict(self): + return vars(self) + + +def skip_if_no_gpu(func): + """Decorator that can be used to skip GPU tests on non-GPU machines.""" + func.skip_if_no_gpu = True + + @wraps(func) + def wrapper(*args, **kwargs): + if not torch.cuda.is_available(): + return + if torch.cuda.device_count() <= 0: + return + + return func(*args, **kwargs) + + return wrapper + + +def repeat_test(original_function=None, *, num_times=3): + """Decorator that can be used to repeat test multiple times.""" + + def repeat_test_decorator(func): + @wraps(func) + def wrapper(*args, **kwargs): + for _ in range(num_times): + func(*args, **kwargs) + + return wrapper + + # this handles default arguments to decorator: + if original_function: + return repeat_test_decorator(original_function) + return repeat_test_decorator + + +def make_torch_deterministic(seed=0): + """Makes Torch code run deterministically.""" + torch.backends.cudnn.benchmark = False + torch.backends.cudnn.deterministic = True + torch.manual_seed(seed) + torch.cuda.manual_seed_all(seed) + os.environ["MKL_NUM_THREADS"] = "1" + os.environ["OMP_NUM_THREADS"] = "1" + + +def compare_batches(test_fixture, batch1, batch2): + """Compare two batches. Does not do recursive comparison""" + test_fixture.assertEqual(type(batch1), type(batch2)) + if isinstance(batch1, (tuple, list)): + test_fixture.assertEqual(len(batch1), len(batch2)) + for n in range(len(batch1)): + value1 = batch1[n] + value2 = batch2[n] + test_fixture.assertEqual(type(value1), type(value2)) + if torch.is_tensor(value1): + test_fixture.assertTrue(torch.allclose(value1, value2)) + else: + test_fixture.assertEqual(value1, value2) + + elif isinstance(batch1, dict): + test_fixture.assertEqual(batch1.keys(), batch2.keys()) + for key, value1 in batch1.items(): + value2 = batch2[key] + test_fixture.assertEqual(type(value1), type(value2)) + if torch.is_tensor(value1): + test_fixture.assertTrue(torch.allclose(value1, value2)) + else: + test_fixture.assertEqual(value1, value2) + + +def compare_datasets(test_fixture, dataset1, dataset2): + test_fixture.assertEqual(len(dataset1), len(dataset2)) + for idx in range(len(dataset1)): + compare_batches(test_fixture, dataset1[idx], dataset2[idx]) + + +def compare_batchlist_and_dataset_with_skips( + test_fixture, batch_list, dataset, skip_indices=None +): + """ + Compares a list of batches and the dataset. If some samples were + skipped in the iterator (i.e. if we simulated an error on that + sample), that should be indicated in the skip_indices list + """ + if skip_indices is None: + skip_indices = [] + if isinstance(skip_indices, int): + skip_indices = [skip_indices] + + skips = 0 + for idx, batch in enumerate(batch_list): + while (idx + skips) in skip_indices: + skips += 1 + dataset_batch = dataset[idx + skips] + compare_batches(test_fixture, batch, dataset_batch) + + +class MockErrorDataset: + """ + Dataset used for testing. Wraps a real dataset with a + batchsize_per_replica, but allows us to delete samples on return + to simulate errors (similar to what happens with Everstore) + """ + + def __init__(self, dataset): + self.rebatch_map = {} + self.dataset = dataset + self.batchsize_per_replica = dataset.batchsize_per_replica + + def __getitem__(self, idx): + batch = self.dataset[idx] + # If rebatch map contains index, resize the batch + if idx in self.rebatch_map: + num_samples = self.rebatch_map[idx] + if num_samples < batch["input"].size()[0]: + batch["input"] = batch["input"][:num_samples] + batch["target"] = batch["target"][:num_samples] + + return batch + + def __len__(self): + return len(self.dataset) + + +def recursive_unpack(batch): + """ + Takes a batch of samples, e.g. + + batch = {'input': tensor([256, 3, 224, 224]), 'target': tensor([256])} + + and unpacks them into a list of single samples, e.g. + + [{'input': tensor([1, 3, 224, 224]), 'target': tensor([1])} ... ] + """ + new_list = [] + if isinstance(batch, dict): + unpacked_dict = {} + batchsize_per_replica = -1 + for key, val in batch.items(): + unpacked_dict[key] = recursive_unpack(val) + batchsize_per_replica = ( + len(unpacked_dict[key]) + if not torch.is_tensor(unpacked_dict[key]) + else 1 + ) + + for idx in range(batchsize_per_replica): + sample = {} + for key, val in unpacked_dict.items(): + sample[key] = val[idx] + + new_list.append(sample) + return new_list + + elif isinstance(batch, (list, tuple)): + unpacked_list = [] + if isinstance(batch, tuple): + batch = list(batch) + + for val in batch: + unpacked_list.append(recursive_unpack(val)) + batchsize_per_replica = ( + len(unpacked_list[0]) if not torch.is_tensor(unpacked_list[0]) else 1 + ) + + for idx in range(batchsize_per_replica): + sample = [] + for val in unpacked_list: + sample.append(val[idx]) + + if isinstance(batch, tuple): + sample = tuple(sample) + new_list.append(sample) + return new_list + + elif torch.is_tensor(batch): + for i in range(batch.size()[0]): + new_list.append(batch[i]) + return new_list + + raise TypeError("Unexpected type %s passed to unpack" % type(batch)) + + +def compare_model_state(test_fixture, state, state2, check_heads=True): + for k in state["model"]["trunk"].keys(): + if not torch.allclose(state["model"]["trunk"][k], state2["model"]["trunk"][k]): + print(k, state["model"]["trunk"][k], state2["model"]["trunk"][k]) + test_fixture.assertTrue( + torch.allclose(state["model"]["trunk"][k], state2["model"]["trunk"][k]) + ) + if check_heads: + for block, head_states in state["model"]["heads"].items(): + for head_id, states in head_states.items(): + for k in states.keys(): + test_fixture.assertTrue( + torch.allclose( + state["model"]["heads"][block][head_id][k], + state2["model"]["heads"][block][head_id][k], + ) + ) + + +def compare_samples(test_fixture, sample1, sample2): + test_fixture.assertEqual(sample1.keys(), sample2.keys()) + test_fixture.assertTrue(torch.is_tensor(sample1["input"])) + test_fixture.assertTrue(torch.is_tensor(sample2["input"])) + test_fixture.assertTrue(torch.is_tensor(sample1["target"])) + test_fixture.assertTrue(torch.is_tensor(sample2["target"])) + + test_fixture.assertTrue(torch.allclose(sample1["input"], sample2["input"])) + test_fixture.assertTrue(torch.allclose(sample1["target"], sample2["target"])) + + +def compare_states(test_fixture, state_1, state_2, check_heads=True): + """ + Tests the classy state dicts for equality, but skips the member objects + which implement their own {get, set}_classy_state functions. + """ + # check base_model + compare_model_state( + test_fixture, state_1["base_model"], state_2["base_model"], check_heads + ) + # check losses + test_fixture.assertEqual(len(state_1["losses"]), len(state_2["losses"])) + for loss_1, loss_2 in zip(state_1["losses"], state_2["losses"]): + test_fixture.assertAlmostEqual(loss_1, loss_2) + + for key in ["base_model", "meters", "optimizer", "losses"]: + # we trust that these have been tested using their unit tests or + # by the code above + test_fixture.assertIn(key, state_1) + test_fixture.assertIn(key, state_2) + del state_1[key] + del state_2[key] + test_fixture.assertDictEqual(state_1, state_2) diff --git a/test/generic_util_json_blob_test.json b/test/generic_util_json_blob_test.json new file mode 100644 index 0000000000..24d1af7276 --- /dev/null +++ b/test/generic_util_json_blob_test.json @@ -0,0 +1,34 @@ +{ + "name": "test_task", + "num_epochs": 12, + "loss": { + "name": "test_loss" + }, + "dataset": { + "name": "test_data", + "batchsize_per_replica": 8, + "use_pairs": false, + "num_samples": null, + "use_shuffle": { + "train": true, + "test": false + } + }, + "meters": [ + { + "name": "test_meter", + "test_param": 0.1 + } + ], + "model": { + "name": "test_model", + "architecture": [1, 2, 3, 4] + }, + "optimizer": { + "name": "test_optimizer", + "test_param": { + "name": "test_scheduler", + "values": [0.1, 0.01, 0.001, 0.0001] + } + } +} diff --git a/test/generic_util_test.py b/test/generic_util_test.py new file mode 100644 index 0000000000..f4f9120637 --- /dev/null +++ b/test/generic_util_test.py @@ -0,0 +1,416 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import argparse +import typing +import unittest +import unittest.mock as mock +from pathlib import Path +from test.generic.config_utils import get_fast_test_task_config, get_test_model_configs +from test.generic.utils import compare_model_state, compare_states + +import classy_vision.generic.util as util +import torch +from classy_vision.generic.util import update_classy_model, update_classy_state +from classy_vision.models import build_model +from classy_vision.tasks import build_task +from classy_vision.trainer import LocalTrainer + + +ROOT = Path(__file__).parent + + +@mock.patch("torch.tensor") +def get_mock_tensor(mock_class): + def get_cuda_tensor(): + t = torch.tensor([1, 2, 3]) + t.is_cuda = True + return t + + mock_class.return_value.cuda.return_value = get_cuda_tensor() + mock_class.is_cuda = False + return torch.tensor([1, 2, 3]) + + +class TestUtilMethods(unittest.TestCase): + def _get_base_pred_probs(self): + return torch.tensor( + [ + [0.92, 0.08], # 1 + [0.91, 0.09], # 0 + [0.89, 0.11], # 0 + [0.79, 0.21], # 0 + [0.78, 0.22], # 0 + [0.69, 0.31], # 1 + [0.68, 0.32], # 0 + [0.59, 0.41], # 1 + [0.58, 0.42], # 0 + [0.49, 0.51], # 1 + [0.48, 0.52], # 1 + [0.39, 0.61], # 0 + [0.38, 0.62], # 0 + [0.29, 0.71], # 1 + [0.28, 0.72], # 1 + [0.19, 0.81], # 0 + [0.18, 0.82], # 1 + [0.09, 0.91], # 1 + [0.08, 0.92], # 1 + [0.07, 0.93], # 0 + [0.06, 0.94], # 1 + [0.03, 0.97], # 0 + ] + ) + + def _get_base_targets(self): + return torch.tensor( + [1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0] + ).int() + + def _get_base_class_hist(self): + return torch.stack( + [ + torch.Tensor( + [1, 1, 0, 1, 0, 0, 0, 2, 0, 0, 0, 1, 0, 1, 0, 2, 0, 1, 1, 0] + ), + torch.Tensor( + [0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 2, 0, 0, 0, 2, 0, 1, 0, 3, 0] + ), + ], + dim=1, + ).long() + + def _get_base_total_hist(self): + return torch.stack( + [ + torch.Tensor( + [1, 4, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 1, 2, 0] + ), + torch.Tensor( + [0, 2, 1, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 4, 1] + ), + ], + dim=1, + ).long() + + def test_create_class_histograms_success(self): + pred_probs = self._get_base_pred_probs() + targets = self._get_base_targets() + + class_hist, total_hist = util.create_class_histograms(pred_probs, targets, 20) + torch.testing.assert_allclose(class_hist, self._get_base_class_hist()) + torch.testing.assert_allclose(total_hist, self._get_base_total_hist()) + + def test_create_class_histograms_fail(self): + pred_probs = self._get_base_pred_probs() + targets = self._get_base_targets() + + # Torch tensors only + with self.assertRaises(AssertionError): + class_hist, total_hist = util.create_class_histograms( + pred_probs.numpy(), targets, 20 + ) + + # Torch tensors only + with self.assertRaises(AssertionError): + class_hist, total_hist = util.create_class_histograms( + pred_probs, targets.numpy(), 20 + ) + + # Prediction and target are same size + with self.assertRaises(AssertionError): + class_hist, total_hist = util.create_class_histograms( + pred_probs[0:5, :], targets, 20 + ) + + # Prediction is between 0 and 1 + with self.assertRaises(AssertionError): + pred_probs[0, :] = torch.tensor([-0.1, 1.1]) + class_hist, total_hist = util.create_class_histograms( + pred_probs, targets, 20 + ) + + def test_compute_pr_curves(self): + class_hist = self._get_base_class_hist() + total_hist = self._get_base_total_hist() + + pr_curves = util.compute_pr_curves(class_hist, total_hist) + # For curves without duplicates removed / precisions cleaned + # up, see: P60302268 + exp_pos_prec = torch.tensor( + [ + 3.0 / 5.0, + 4.0 / 7.0, + 6.0 / 9.0, + 6.0 / 11.0, + 8.0 / 13.0, + 9.0 / 15.0, + 10.0 / 17.0, + 10.0 / 19.0, + 10.0 / 20.0, + 11.0 / 22.0, + ], + dtype=torch.double, + ) + exp_pos_recall = torch.tensor( + [ + 3.0 / 11.0, + 4.0 / 11.0, + 6.0 / 11.0, + 6.0 / 11.0, + 8.0 / 11.0, + 9.0 / 11.0, + 10.0 / 11.0, + 10.0 / 11.0, + 10.0 / 11.0, + 11.0 / 11.0, + ], + dtype=torch.double, + ) + + exp_neg_prec = torch.tensor( + [ + 1.0 / 2.0, + 2.0 / 3.0, + 4.0 / 5.0, + 5.0 / 7.0, + 6.0 / 9.0, + 6.0 / 11.0, + 8.0 / 13.0, + 8.0 / 15.0, + 9.0 / 17.0, + 10.0 / 21.0, + 11.0 / 22.0, + ], + dtype=torch.double, + ) + exp_neg_recall = torch.tensor( + [ + 1.0 / 11.0, + 2.0 / 11.0, + 4.0 / 11.0, + 5.0 / 11.0, + 6.0 / 11.0, + 6.0 / 11.0, + 8.0 / 11.0, + 8.0 / 11.0, + 9.0 / 11.0, + 10.0 / 11.0, + 11.0 / 11.0, + ], + dtype=torch.double, + ) + + torch.testing.assert_allclose(pr_curves["prec"][1], exp_pos_prec) + torch.testing.assert_allclose(pr_curves["prec"][0], exp_neg_prec) + + torch.testing.assert_allclose(pr_curves["recall"][1], exp_pos_recall) + torch.testing.assert_allclose(pr_curves["recall"][0], exp_neg_recall) + + torch.testing.assert_allclose( + pr_curves["ap"][1], torch.tensor(0.589678058127256).double() + ) + torch.testing.assert_allclose( + pr_curves["ap"][0], torch.tensor(0.6073388287292031).double() + ) + + def test_compute_pr_curves_fail(self): + class_hist = self._get_base_class_hist() + total_hist = self._get_base_total_hist() + + # invalid histograms + with self.assertRaises(AssertionError): + class_hist += torch.ones(class_hist.size(), dtype=torch.int64) * 100 + util.compute_pr_curves(class_hist, total_hist) + + # Doesn't accept numpy + with self.assertRaises(AssertionError): + util.compute_pr_curves(class_hist.numpy(), total_hist) + + with self.assertRaises(AssertionError): + util.compute_pr_curves(class_hist, total_hist.numpy()) + + # Longs only + with self.assertRaises(AssertionError): + util.compute_pr_curves(class_hist.float(), total_hist.float()) + + # Bad tensor size + with self.assertRaises(AssertionError): + util.compute_pr_curves(class_hist.view(40, 1), total_hist) + + with self.assertRaises(AssertionError): + util.compute_pr_curves(class_hist, total_hist.view(40, 1)) + + def test_recursive_copy_to_gpu(self): + tensor_a = get_mock_tensor() + tensor_b = get_mock_tensor() + + valid_gpu_copy_value = tensor_a + gpu_value = util.recursive_copy_to_gpu(valid_gpu_copy_value) + self.assertTrue(gpu_value.is_cuda) + + valid_recursive_copy_value = [[tensor_a]] + gpu_value = util.recursive_copy_to_gpu(valid_recursive_copy_value) + self.assertTrue(gpu_value[0][0].is_cuda) + + valid_gpu_copy_collections = [ + (tensor_a, tensor_b), + [tensor_a, tensor_b], + {"tensor_a": tensor_a, "tensor_b": tensor_b}, + ] + for value in valid_gpu_copy_collections: + gpu_value = util.recursive_copy_to_gpu(value) + if isinstance(value, dict): + self.assertTrue(gpu_value["tensor_a"].is_cuda) + self.assertTrue(gpu_value["tensor_b"].is_cuda) + else: + self.assertEqual(len(gpu_value), 2) + self.assertTrue(gpu_value[0].is_cuda) + self.assertTrue(gpu_value[1].is_cuda) + + invalid_gpu_copy_values = [1234, True, 1.0] + for value in invalid_gpu_copy_values: + with self.assertRaises(AttributeError): + gpu_value = util.recursive_copy_to_gpu(value) + + invalid_gpu_copy_depth = [ + ((((tensor_a, tensor_b), tensor_b), tensor_b), tensor_b), + {"tensor_map_a": {"tensor_map_b": {"tensor_map_c": {"tensor": tensor_a}}}}, + [[[[tensor_a, tensor_b], tensor_b], tensor_b], tensor_b], + "abcd", # Strings are sequences, includeing single char strings + ] + for value in invalid_gpu_copy_depth: + with self.assertRaises(ValueError): + gpu_value = util.recursive_copy_to_gpu(value, max_depth=3) + + _json_config_file = ROOT / "generic_util_json_blob_test.json" + + def _get_config(self): + return { + "name": "test_task", + "num_epochs": 12, + "loss": {"name": "test_loss"}, + "dataset": { + "name": "test_data", + "batchsize_per_replica": 8, + "use_pairs": False, + "num_samples": None, + "use_shuffle": {"train": True, "test": False}, + }, + "meters": [{"name": "test_meter", "test_param": 0.1}], + "model": {"name": "test_model", "architecture": [1, 2, 3, 4]}, + "optimizer": { + "name": "test_optimizer", + "test_param": { + "name": "test_scheduler", + "values": [0.1, 0.01, 0.001, 0.0001], + }, + }, + } + + def test_load_config(self): + expected_config = self._get_config() + config = util.load_json(self._json_config_file) + + self.assertEqual(config, expected_config) + + def test_torch_seed(self): + # test that using util.torch_seed doesn't impact the generation of + # random numbers outside its context and that random numbers generated + # within its context are the same as setting a manual seed + torch.manual_seed(0) + torch.randn(10) + random_tensor_1 = torch.randn(10) + torch.manual_seed(0) + torch.randn(10) + with util.torch_seed(1): + random_tensor_2 = torch.randn(10) + self.assertTrue(torch.equal(torch.randn(10), random_tensor_1)) + torch.manual_seed(1) + self.assertTrue(torch.equal(torch.randn(10), random_tensor_2)) + + def test_get_model_dummy_input(self): + for config in get_test_model_configs(): + model = build_model(config) # pass in a dummy model for the cuda check + batchsize = 8 + # input_key is list + input_key = ["audio", "video"] + input_shape = [[3, 40, 100], [4, 16, 223, 223]] # dummy input shapes + result = util.get_model_dummy_input( + model, input_shape, input_key, batchsize + ) + self.assertEqual(result.keys(), {"audio", "video"}) + for i in range(len(input_key)): + self.assertEqual( + result[input_key[i]].size(), tuple([batchsize] + input_shape[i]) + ) + # input_key is string + input_key = "video" + input_shape = [4, 16, 223, 223] + result = util.get_model_dummy_input( + model, input_shape, input_key, batchsize + ) + self.assertEqual(result.keys(), {"video"}) + self.assertEqual(result[input_key].size(), tuple([batchsize] + input_shape)) + # input_key is None + input_key = None + input_shape = [4, 16, 223, 223] + result = util.get_model_dummy_input( + model, input_shape, input_key, batchsize + ) + self.assertEqual(result.size(), tuple([batchsize] + input_shape)) + + +class TestUpdateStateFunctions(unittest.TestCase): + def _compare_states(self, state_1, state_2, check_heads=True): + compare_states(self, state_1, state_2) + + def _compare_model_state(self, state_1, state_2, check_heads=True): + return compare_model_state(self, state_1, state_2, check_heads=check_heads) + + def test_update_classy_state(self): + """ + Tests that the update_classy_state successfully updates from a + checkpoint + """ + config = get_fast_test_task_config() + task = build_task(config) + task_2 = build_task(config) + task_2.prepare() + trainer = LocalTrainer(use_gpu=False) + trainer.train(task) + update_classy_state(task_2, task.get_classy_state(deep_copy=True)) + self._compare_states(task.get_classy_state(), task_2.get_classy_state()) + + def test_update_classy_model(self): + """ + Tests that the update_classy_model successfully updates from a + checkpoint + """ + config = get_fast_test_task_config() + task = build_task(config) + use_gpu = torch.cuda.is_available() + trainer = LocalTrainer(use_gpu=use_gpu) + trainer.train(task) + for reset_heads in [False, True]: + task_2 = build_task(config) + # prepare task_2 for the right device + task_2.prepare(use_gpu=use_gpu) + update_classy_model( + task_2.model, task.model.get_classy_state(deep_copy=True), reset_heads + ) + self._compare_model_state( + task.model.get_classy_state(), + task_2.model.get_classy_state(), + check_heads=not reset_heads, + ) + if reset_heads: + # the model head states should be different + with self.assertRaises(Exception): + self._compare_model_state( + task.model.get_classy_state(), + task_2.model.get_classy_state(), + check_heads=True, + ) diff --git a/test/hooks_checkpoint_hook_test.py b/test/hooks_checkpoint_hook_test.py new file mode 100644 index 0000000000..147588cc8d --- /dev/null +++ b/test/hooks_checkpoint_hook_test.py @@ -0,0 +1,127 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import os +import shutil +import tempfile +import unittest +from test.generic.config_utils import get_test_task_config + +from classy_vision.generic.util import load_checkpoint +from classy_vision.hooks import CheckpointHook +from classy_vision.tasks import build_task + + +class TestCheckpointHook(unittest.TestCase): + def setUp(self) -> None: + self.base_dir = tempfile.mkdtemp() + + def tearDown(self) -> None: + shutil.rmtree(self.base_dir) + + def test_state_checkpointing(self) -> None: + """ + Test that the state gets checkpointed without any errors, but only on the + right phase_type and only if the checkpoint directory exists. + """ + config = get_test_task_config() + task = build_task(config) + task.prepare() + + local_variables = {} + checkpoint_folder = self.base_dir + "/checkpoint_end_test/" + device = "cpu" + input_args = {"foo": "bar"} + + # create a checkpoint hook + checkpoint_hook = CheckpointHook( + checkpoint_folder, input_args, phase_types=["train"] + ) + + # checkpoint directory doesn't exist + # call the on start function + with self.assertRaises(FileNotFoundError): + checkpoint_hook.on_start(task, local_variables) + # call the on end phase function + with self.assertRaises(AssertionError): + checkpoint_hook.on_phase_end(task, local_variables) + # try loading a non-existent checkpoint + checkpoint = load_checkpoint(checkpoint_folder, device) + self.assertIsNone(checkpoint) + + # create checkpoint dir, verify on_start hook runs + os.mkdir(checkpoint_folder) + checkpoint_hook.on_start(task, local_variables) + + # Phase_type is test, expect no checkpoint + task.train = False + # call the on end phase function + checkpoint_hook.on_phase_end(task, local_variables) + checkpoint = load_checkpoint(checkpoint_folder, device) + self.assertIsNone(checkpoint) + + task.train = True + # call the on end phase function + checkpoint_hook.on_phase_end(task, local_variables) + # model should be checkpointed. load and compare + checkpoint = load_checkpoint(checkpoint_folder, device) + self.assertIsNotNone(checkpoint) + for key in ["input_args", "classy_state_dict"]: + self.assertIn(key, checkpoint) + # not testing for equality of classy_state_dict, that is tested in + # a separate test + self.assertDictEqual(checkpoint["input_args"], input_args) + + def test_checkpoint_period(self) -> None: + """ + Test that the checkpoint_period works as expected. + """ + config = get_test_task_config() + task = build_task(config) + task.prepare() + + local_variables = {} + checkpoint_folder = self.base_dir + "/checkpoint_end_test/" + device = "cpu" + checkpoint_period = 10 + + for phase_types in [["train"], ["train", "test"]]: + # create a checkpoint hook + checkpoint_hook = CheckpointHook( + checkpoint_folder, + {}, + phase_types=phase_types, + checkpoint_period=checkpoint_period, + ) + + # create checkpoint dir + os.mkdir(checkpoint_folder) + + # call the on start function + checkpoint_hook.on_start(task, local_variables) + + # shouldn't create any checkpoints until there are checkpoint_period + # phases which are in phase_types + count = 0 + valid_phase_count = 0 + while valid_phase_count < checkpoint_period - 1: + task.train = count % 2 == 0 + # call the on end phase function + checkpoint_hook.on_phase_end(task, local_variables) + checkpoint = load_checkpoint(checkpoint_folder, device) + self.assertIsNone(checkpoint) + valid_phase_count += 1 if task.phase_type in phase_types else 0 + count += 1 + + # create a phase which is in phase_types + task.train = True + # call the on end phase function + checkpoint_hook.on_phase_end(task, local_variables) + # model should be checkpointed. load and compare + checkpoint = load_checkpoint(checkpoint_folder, device) + self.assertIsNotNone(checkpoint) + # delete the checkpoint dir + shutil.rmtree(checkpoint_folder) diff --git a/test/hooks_classy_hook_test.py b/test/hooks_classy_hook_test.py new file mode 100644 index 0000000000..d7a22e37de --- /dev/null +++ b/test/hooks_classy_hook_test.py @@ -0,0 +1,40 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import unittest + +from classy_vision.hooks import ClassyHook + + +class TestHook(ClassyHook): + on_rendezvous = ClassyHook._noop + on_start = ClassyHook._noop + on_phase_start = ClassyHook._noop + on_sample = ClassyHook._noop + on_forward = ClassyHook._noop + on_loss_and_meter = ClassyHook._noop + on_backward = ClassyHook._noop + on_update = ClassyHook._noop + on_phase_end = ClassyHook._noop + on_end = ClassyHook._noop + + def __init__(self, a, b): + super().__init__() + self.state.a = a + self.state.b = b + + +class TestClassyHook(unittest.TestCase): + def test_state_dict(self): + a = 0 + b = {1: 2, 3: [4]} + test_hook = TestHook(a, b) + state_dict = test_hook.get_classy_state() + # create a new test_hook and set its state to the old hook's. + test_hook = TestHook("", 0) + test_hook.set_classy_state(state_dict) + self.assertEqual(test_hook.state.a, a) + self.assertEqual(test_hook.state.b, b) diff --git a/test/hooks_exponential_moving_average_model_hook_test.py b/test/hooks_exponential_moving_average_model_hook_test.py new file mode 100644 index 0000000000..6248bfba39 --- /dev/null +++ b/test/hooks_exponential_moving_average_model_hook_test.py @@ -0,0 +1,112 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import math +import unittest +import unittest.mock as mock + +import torch +import torch.nn as nn +from classy_vision.hooks import ExponentialMovingAverageModelHook +from classy_vision.models import ClassyModel + + +class TestModel(ClassyModel): + def __init__(self): + super().__init__() + self.fc = nn.Linear(10, 10) + self.bn = nn.BatchNorm1d(10) + + def init_fc_weight(self): + nn.init.zeros_(self.fc.weight) + + def update_fc_weight(self): + nn.init.ones_(self.fc.weight) + + def forward(self, x): + return self.bn(self.fc(x)) + + +class TestExponentialMovingAverageModelHook(unittest.TestCase): + def _map_device_string(self, device): + return "cuda" if device == "gpu" else "cpu" + + def _test_exponential_moving_average_hook(self, model_device, hook_device): + task = mock.MagicMock() + model = TestModel().to(device=self._map_device_string(model_device)) + local_variables = {} + task.base_model = model + task.train = True + decay = 0.5 + num_updates = 10 + model.init_fc_weight() + exponential_moving_average_hook = ExponentialMovingAverageModelHook( + decay=decay, device=hook_device + ) + + exponential_moving_average_hook.on_start(task, local_variables) + exponential_moving_average_hook.on_phase_start(task, local_variables) + # set the weights to all ones and simulate 10 updates + task.base_model.update_fc_weight() + fc_weight = model.fc.weight.clone() + for _ in range(num_updates): + exponential_moving_average_hook.on_update(task, local_variables) + exponential_moving_average_hook.on_phase_end(task, local_variables) + # the model weights shouldn't have changed + self.assertTrue(torch.allclose(model.fc.weight, fc_weight)) + + # simulate a test phase now + task.train = False + exponential_moving_average_hook.on_phase_start(task, local_variables) + exponential_moving_average_hook.on_phase_end(task, local_variables) + + # the model weights should be updated to the ema weights + self.assertTrue( + torch.allclose( + model.fc.weight, fc_weight * (1 - math.pow(1 - decay, num_updates)) + ) + ) + + # simulate a train phase again + task.train = True + exponential_moving_average_hook.on_phase_start(task, local_variables) + + # the model weights should be back to the old value + self.assertTrue(torch.allclose(model.fc.weight, fc_weight)) + + def test_get_model_state_iterator(self): + device = "gpu" if torch.cuda.is_available() else "cpu" + model = TestModel().to(device=self._map_device_string(device)) + decay = 0.5 + # test that we pick up the right parameters in the iterator + for consider_bn_buffers in [True, False]: + exponential_moving_average_hook = ExponentialMovingAverageModelHook( + decay=decay, consider_bn_buffers=consider_bn_buffers, device=device + ) + iterable = exponential_moving_average_hook.get_model_state_iterator(model) + fc_found = False + bn_found = False + bn_buffer_found = False + for _, param in iterable: + if any(param is item for item in model.fc.parameters()): + fc_found = True + if any(param is item for item in model.bn.parameters()): + bn_found = True + if any(param is item for item in model.bn.buffers()): + bn_buffer_found = True + self.assertTrue(fc_found) + self.assertTrue(bn_found) + self.assertEqual(bn_buffer_found, consider_bn_buffers) + + def test_exponential_moving_average_hook(self): + device = "gpu" if torch.cuda.is_available() else "cpu" + self._test_exponential_moving_average_hook(device, device) + + @unittest.skipUnless(torch.cuda.is_available(), "This test needs a gpu to run") + def test_mixed_devices(self): + """Tests that the hook works when the model and hook's device are different""" + self._test_exponential_moving_average_hook("cpu", "gpu") + self._test_exponential_moving_average_hook("gpu", "cpu") diff --git a/test/hooks_loss_lr_meter_logging_hook_test.py b/test/hooks_loss_lr_meter_logging_hook_test.py new file mode 100644 index 0000000000..79fabc98f8 --- /dev/null +++ b/test/hooks_loss_lr_meter_logging_hook_test.py @@ -0,0 +1,112 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import unittest +import unittest.mock as mock +from itertools import product +from test.generic.config_utils import get_test_mlp_task_config, get_test_task_config + +from classy_vision.hooks import LossLrMeterLoggingHook +from classy_vision.optim.param_scheduler import UpdateInterval +from classy_vision.tasks import ClassyTask, build_task +from classy_vision.trainer import LocalTrainer + + +class TestLossLrMeterLoggingHook(unittest.TestCase): + @mock.patch("classy_vision.hooks.loss_lr_meter_logging_hook.get_rank") + def test_logging(self, mock_get_rank: mock.MagicMock) -> None: + """ + Test that the logging happens as expected and the loss and lr values are + correct. + """ + rank = 5 + mock_get_rank.return_value = rank + + # set up the task and state + config = get_test_task_config() + config["dataset"]["train"]["batchsize_per_replica"] = 2 + config["dataset"]["test"]["batchsize_per_replica"] = 5 + task = build_task(config) + task.prepare() + + losses = [1.2, 2.3, 3.4, 4.5] + + local_variables = {} + task.phase_idx = 0 + + loss_vals = {"train": 1.425, "test": 0.57} + + for log_freq, phase_type in product([5, None], loss_vals): + task.train = phase_type == "train" + + # create a loss lr meter hook + loss_lr_meter_hook = LossLrMeterLoggingHook(log_freq=log_freq) + + # check that _log_loss_meters() is called after on_loss_and_meter() every + # log_freq batches and after on_phase_end() + # and _log_lr() is called after on_update() every log_freq batches + # and after on_phase_end() + with mock.patch.object(loss_lr_meter_hook, "_log_loss_meters") as mock_fn: + with mock.patch.object(loss_lr_meter_hook, "_log_lr") as mock_lr_fn: + num_batches = 20 + + for i in range(num_batches): + task.losses = list(range(i)) + loss_lr_meter_hook.on_loss_and_meter(task, local_variables) + loss_lr_meter_hook.on_update(task, local_variables) + if log_freq is not None and i and i % log_freq == 0: + mock_fn.assert_called_with(task, local_variables) + mock_fn.reset_mock() + mock_lr_fn.assert_called_with(task, local_variables) + mock_lr_fn.reset_mock() + continue + mock_fn.assert_not_called() + mock_lr_fn.assert_not_called() + + loss_lr_meter_hook.on_phase_end(task, local_variables) + mock_fn.assert_called_with(task, local_variables) + if task.train: + mock_lr_fn.assert_called_with(task, local_variables) + + # test _log_loss_lr_meters() + task.losses = losses + + with self.assertLogs(): + loss_lr_meter_hook._log_loss_meters(task, local_variables) + loss_lr_meter_hook._log_lr(task, local_variables) + + task.phase_idx += 1 + + def test_logged_lr(self): + # Mock LR scheduler + def scheduler_mock(where): + return where + + mock_lr_scheduler = mock.Mock(side_effect=scheduler_mock) + mock_lr_scheduler.update_interval = UpdateInterval.STEP + config = get_test_mlp_task_config() + config["num_epochs"] = 3 + config["dataset"]["train"]["batchsize_per_replica"] = 5 + config["dataset"]["test"]["batchsize_per_replica"] = 5 + task = build_task(config) + task.optimizer.lr_scheduler = mock_lr_scheduler + trainer = LocalTrainer() + + # 2 LR updates per epoch + # At end of each epoch for train, LR is logged an additional time + lr_order = [0.0, 1 / 6, 1 / 6, 2 / 6, 3 / 6, 3 / 6, 4 / 6, 5 / 6, 5 / 6] + lr_list = [] + + def mock_log_lr(task: ClassyTask, local_variables) -> None: + lr_list.append(task.optimizer.lr) + + with mock.patch.object( + LossLrMeterLoggingHook, "_log_lr", side_effect=mock_log_lr + ): + hook = LossLrMeterLoggingHook(1) + task.set_hooks([hook]) + trainer.train(task) + self.assertEqual(lr_list, lr_order) diff --git a/test/hooks_profiler_hook_test.py b/test/hooks_profiler_hook_test.py new file mode 100644 index 0000000000..b8168ebede --- /dev/null +++ b/test/hooks_profiler_hook_test.py @@ -0,0 +1,51 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import unittest +import unittest.mock as mock +from test.generic.config_utils import get_test_classy_task, get_test_classy_video_task + +from classy_vision.hooks import ProfilerHook + + +class TestProfilerHook(unittest.TestCase): + @mock.patch("torch.autograd.profiler.profile", auto_spec=True) + @mock.patch("classy_vision.hooks.profiler_hook.summarize_profiler_info") + def test_profiler( + self, + mock_summarize_profiler_info: mock.MagicMock, + mock_profile_cls: mock.MagicMock, + ) -> None: + """ + Tests that a profile instance is returned by the profiler + and that the profiler actually ran. + """ + mock_summarize_profiler_info.return_value = "" + + mock_profile = mock.MagicMock() + mock_profile_returned = mock.MagicMock() + mock_profile.__enter__.return_value = mock_profile_returned + mock_profile_cls.return_value = mock_profile + + for task in [get_test_classy_task(), get_test_classy_video_task()]: + task.prepare() + local_variables = {} + + # create a model tensorboard hook + profiler_hook = ProfilerHook() + + with self.assertLogs(): + profiler_hook.on_start(task, local_variables) + + # a new profile should be created with use_cuda=True + mock_profile_cls.assert_called_once_with(use_cuda=True) + mock_profile_cls.reset_mock() + + # summarize_profiler_info should have been called once with the profile + mock_summarize_profiler_info.assert_called_once() + profile = mock_summarize_profiler_info.call_args[0][0] + mock_summarize_profiler_info.reset_mock() + self.assertEqual(profile, mock_profile_returned) diff --git a/test/hooks_time_metrics_hook_test.py b/test/hooks_time_metrics_hook_test.py new file mode 100644 index 0000000000..9b3ac0706d --- /dev/null +++ b/test/hooks_time_metrics_hook_test.py @@ -0,0 +1,127 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import logging +import re +import unittest +import unittest.mock as mock +from itertools import product +from test.generic.config_utils import get_test_classy_task + +from classy_vision.generic.perf_stats import PerfStats +from classy_vision.hooks import TimeMetricsHook + + +class TestTimeMetricsHook(unittest.TestCase): + @mock.patch("time.time") + @mock.patch("classy_vision.hooks.time_metrics_hook.PerfStats.report_str") + @mock.patch("classy_vision.hooks.time_metrics_hook.get_rank") + def test_time_metrics( + self, + mock_get_rank: mock.MagicMock, + mock_report_str: mock.MagicMock, + mock_time: mock.MagicMock, + ) -> None: + """ + Tests that the progress bar is created, updated and destroyed correctly. + """ + rank = 5 + mock_get_rank.return_value = rank + + mock_report_str.return_value = "" + local_variables = {} + + for log_freq, train in product([5, None], [True, False]): + # create a time metrics hook + time_metrics_hook = TimeMetricsHook(log_freq=log_freq) + + phase_type = "train" if train else "test" + + task = get_test_classy_task() + task.prepare() + task.train = train + + # on_phase_start() should set the start time and perf_stats + start_time = 1.2 + mock_time.return_value = start_time + time_metrics_hook.on_phase_start(task, local_variables) + self.assertEqual(time_metrics_hook.start_time, start_time) + self.assertTrue(isinstance(local_variables.get("perf_stats"), PerfStats)) + + # test that the code doesn't raise an exception if losses is empty + try: + time_metrics_hook.on_phase_end(task, local_variables) + except Exception as e: + self.fail("Received Exception when losses is []: {}".format(e)) + + # check that _log_performance_metrics() is called after on_loss_and_meter() + # every log_freq batches and after on_phase_end() + with mock.patch.object( + time_metrics_hook, "_log_performance_metrics" + ) as mock_fn: + num_batches = 20 + + for i in range(num_batches): + task.losses = list(range(i)) + time_metrics_hook.on_loss_and_meter(task, local_variables) + if log_freq is not None and i and i % log_freq == 0: + mock_fn.assert_called_with(task, local_variables) + mock_fn.reset_mock() + continue + mock_fn.assert_not_called() + + time_metrics_hook.on_phase_end(task, local_variables) + mock_fn.assert_called_with(task, local_variables) + + task.losses = [0.23, 0.45, 0.34, 0.67] + + end_time = 10.4 + avg_batch_time_ms = 2.3 * 1000 + mock_time.return_value = end_time + + # test _log_performance_metrics() + with self.assertLogs() as log_watcher: + time_metrics_hook._log_performance_metrics(task, local_variables) + + # there should 2 be info logs for train and 1 for test + self.assertEqual(len(log_watcher.output), 2 if train else 1) + self.assertTrue( + all( + log_record.levelno == logging.INFO + for log_record in log_watcher.records + ) + ) + match = re.search( + ( + r"Average {} batch time \(ms\) for {} batches: " + r"(?P[-+]?\d*\.\d+|\d+)" + ).format(phase_type, len(task.losses)), + log_watcher.output[0], + ) + self.assertIsNotNone(match) + self.assertAlmostEqual( + avg_batch_time_ms, float(match.group("avg_batch_time")), places=4 + ) + if train: + self.assertIn( + f"Train step time breakdown (rank {rank})", log_watcher.output[1] + ) + + # if on_phase_start() is not called, 2 warnings should be logged + # create a new time metrics hook + local_variables = {} + time_metrics_hook_new = TimeMetricsHook() + + with self.assertLogs() as log_watcher: + time_metrics_hook_new.on_phase_end(task, local_variables) + + self.assertEqual(len(log_watcher.output), 2) + self.assertTrue( + all( + log_record.levelno == logging.WARN + for log_record in log_watcher.records + ) + ) diff --git a/test/hub_classy_hub_interface_test.py b/test/hub_classy_hub_interface_test.py new file mode 100644 index 0000000000..a74869465a --- /dev/null +++ b/test/hub_classy_hub_interface_test.py @@ -0,0 +1,91 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import shutil +import tempfile +import unittest +from test.generic.config_utils import get_test_task_config + +import torch +from classy_vision.dataset.transforms import ClassyTransform +from classy_vision.hub import ClassyHubInterface +from classy_vision.models import ClassyModel, build_model +from classy_vision.tasks import ClassyTask, build_task +from torchvision import models, transforms + + +class TestTransform(ClassyTransform): + def __call__(self, x): + return x + + +class TestClassyHubInterface(unittest.TestCase): + def setUp(self): + # create a base directory to write image files to + self.base_dir = tempfile.mkdtemp() + self.image_path = self.base_dir + "/img.jpg" + # create an image with a non standard size + image_tensor = torch.zeros((3, 1000, 2500), dtype=torch.float) + transforms.ToPILImage()(image_tensor).save(self.image_path) + + def tearDown(self): + # delete all the temporary data created + shutil.rmtree(self.base_dir) + + def _test_predict_and_extract_features(self, hub_interface: ClassyHubInterface): + dataset = hub_interface.create_image_dataset( + [self.image_path], phase_type="test" + ) + data_iterator = hub_interface.get_data_iterator(dataset) + input = next(data_iterator) + # set the model to eval mode + hub_interface.eval() + output = hub_interface.predict(input) + self.assertIsNotNone(output) + # see the prediction for the input + hub_interface.predict(input).argmax().item() + # check extract features + output = hub_interface.extract_features(input) + self.assertIsNotNone(output) + + def _get_classy_model(self): + config = get_test_task_config() + model_config = config["model"] + return build_model(model_config) + + def _get_non_classy_model(self): + return models.resnet18(pretrained=False) + + def test_from_task(self): + config = get_test_task_config() + task = build_task(config) + hub_interface = ClassyHubInterface.from_task(task) + + self.assertIsInstance(hub_interface.task, ClassyTask) + self.assertIsInstance(hub_interface.model, ClassyModel) + + # this will pick up the transform from the task's config + self._test_predict_and_extract_features(hub_interface) + + # test that the correct transform is picked up + phase_type = "test" + test_transform = TestTransform() + task.datasets[phase_type].transform = test_transform + hub_interface = ClassyHubInterface.from_task(task) + dataset = hub_interface.create_image_dataset( + [self.image_path], phase_type=phase_type + ) + self.assertIsInstance(dataset.transform, TestTransform) + + def test_from_model(self): + for model in [self._get_classy_model(), self._get_non_classy_model()]: + hub_interface = ClassyHubInterface.from_model(model) + + self.assertIsNone(hub_interface.task) + self.assertIsInstance(hub_interface.model, ClassyModel) + + # this will pick up the transform from imagenet + self._test_predict_and_extract_features(hub_interface) diff --git a/test/losses_barron_loss_test.py b/test/losses_barron_loss_test.py new file mode 100644 index 0000000000..1667932704 --- /dev/null +++ b/test/losses_barron_loss_test.py @@ -0,0 +1,66 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import copy +import unittest + +import torch +from classy_vision.losses import BarronLoss, build_loss + + +class TestBarronLoss(unittest.TestCase): + def _get_config(self): + return {"name": "barron", "size_average": True, "alpha": 1.0, "c": 1.0} + + def _get_outputs(self): + return torch.tensor([[2.0]]) + + def _get_targets(self): + return torch.tensor([3.0]) + + def test_build_barron(self): + config = self._get_config() + crit = build_loss(config) + self.assertTrue(isinstance(crit, BarronLoss)) + self.assertEqual(crit.size_average, config["size_average"]) + self.assertAlmostEqual(crit.alpha, config["alpha"]) + self.assertAlmostEqual(crit.c, config["c"]) + + def test_barron(self): + config = self._get_config() + crit = BarronLoss.from_config(config) + outputs = self._get_outputs() + targets = self._get_targets() + self.assertAlmostEqual(crit(outputs, targets).item(), 0.41421353816986084) + + # Alpha = 0 + config = self._get_config() + config["alpha"] = 0.0 + crit = BarronLoss.from_config(config) + outputs = self._get_outputs() + targets = self._get_targets() + self.assertAlmostEqual(crit(outputs, targets).item(), 0.40546512603759766) + + # Alpha = inf + config = self._get_config() + config["alpha"] = float("inf") + crit = BarronLoss.from_config(config) + outputs = self._get_outputs() + targets = self._get_targets() + self.assertAlmostEqual(crit(outputs, targets).item(), 0.39346933364868164) + + def test_deep_copy(self): + config = self._get_config() + crit1 = build_loss(config) + self.assertTrue(isinstance(crit1, BarronLoss)) + outputs = self._get_outputs() + targets = self._get_targets() + crit1(outputs, targets) + + crit2 = copy.deepcopy(crit1) + self.assertAlmostEqual( + crit1(outputs, targets).item(), crit2(outputs, targets).item() + ) diff --git a/test/losses_generic_utils_test.py b/test/losses_generic_utils_test.py new file mode 100644 index 0000000000..2da346da70 --- /dev/null +++ b/test/losses_generic_utils_test.py @@ -0,0 +1,24 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import unittest + +import torch +from classy_vision.generic.util import convert_to_one_hot + + +class TestUtils(unittest.TestCase): + def test_single(self): + targets = torch.tensor([[4]]) + one_hot_target = convert_to_one_hot(targets, 5) + self.assertTrue(torch.allclose(one_hot_target, torch.tensor([[0, 0, 0, 0, 1]]))) + + def test_two(self): + targets = torch.tensor([[0], [1]]) + one_hot_target = convert_to_one_hot(targets, 3) + self.assertTrue( + torch.allclose(one_hot_target, torch.tensor([[1, 0, 0], [0, 1, 0]])) + ) diff --git a/test/losses_label_smoothing_cross_entropy_loss_test.py b/test/losses_label_smoothing_cross_entropy_loss_test.py new file mode 100644 index 0000000000..4666b8fd8f --- /dev/null +++ b/test/losses_label_smoothing_cross_entropy_loss_test.py @@ -0,0 +1,212 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import copy +import unittest + +import torch +from classy_vision.losses import LabelSmoothingCrossEntropyLoss, build_loss + + +class TestLabelSmoothingCrossEntropyLoss(unittest.TestCase): + def test_build_label_smoothing_cross_entropy(self): + config = { + "name": "label_smoothing_cross_entropy", + "ignore_index": -1, + "smoothing_param": 0.1, + } + crit = build_loss(config) + self.assertTrue(isinstance(crit, LabelSmoothingCrossEntropyLoss)) + self.assertEqual(crit._ignore_index, -1) + + def test_smoothing_one_hot_targets(self): + config = { + "name": "label_smoothing_cross_entropy", + "ignore_index": -1, + "smoothing_param": 0.1, + } + crit = build_loss(config) + targets = torch.tensor([[0, 0, 0, 0, 1]]) + self.assertTrue(isinstance(crit, LabelSmoothingCrossEntropyLoss)) + valid_targets = crit.compute_valid_targets(targets, 5) + self.assertTrue( + torch.allclose(valid_targets, torch.tensor([[0.0, 0.0, 0.0, 0.0, 1.0]])) + ) + smoothed_targets = crit.smooth_targets(valid_targets, 5) + self.assertTrue( + torch.allclose( + smoothed_targets, + torch.tensor([[0.2 / 11, 0.2 / 11, 0.2 / 11, 0.2 / 11, 10.2 / 11]]), + ) + ) + + def test_smoothing_ignore_index_one_hot_targets(self): + config = { + "name": "label_smoothing_cross_entropy", + "ignore_index": -1, + "smoothing_param": 0.5, + } + crit = build_loss(config) + targets = torch.tensor([[-1, 0, 0, 0, 1]]) + self.assertTrue(isinstance(crit, LabelSmoothingCrossEntropyLoss)) + valid_targets = crit.compute_valid_targets(targets, 5) + self.assertTrue( + torch.allclose(valid_targets, torch.tensor([[0.0, 0.0, 0.0, 0.0, 1.0]])) + ) + smoothed_targets = crit.smooth_targets(valid_targets, 5) + self.assertTrue( + torch.allclose( + smoothed_targets, + torch.tensor([[1 / 15, 1 / 15, 1 / 15, 1 / 15, 11 / 15]]), + ) + ) + + def test_smoothing_multilabel_one_hot_targets(self): + config = { + "name": "label_smoothing_cross_entropy", + "ignore_index": -1, + "smoothing_param": 0.5, + } + crit = build_loss(config) + targets = torch.tensor([[1, 0, 0, 0, 1]]) + self.assertTrue(isinstance(crit, LabelSmoothingCrossEntropyLoss)) + valid_targets = crit.compute_valid_targets(targets, 5) + self.assertTrue( + torch.allclose(valid_targets, torch.tensor([[1.0, 0.0, 0.0, 0.0, 1.0]])) + ) + + smoothed_targets = crit.smooth_targets(valid_targets, 5) + self.assertTrue( + torch.allclose( + smoothed_targets, + torch.tensor([[6 / 15, 1 / 15, 1 / 15, 1 / 15, 6 / 15]]), + ) + ) + + def test_smoothing_all_ones_one_hot_targets(self): + config = { + "name": "label_smoothing_cross_entropy", + "ignore_index": -1, + "smoothing_param": 0.1, + } + crit = build_loss(config) + targets = torch.tensor([[1, 1, 1, 1]]) + self.assertTrue(isinstance(crit, LabelSmoothingCrossEntropyLoss)) + valid_targets = crit.compute_valid_targets(targets, 4) + self.assertTrue( + torch.allclose(valid_targets, torch.tensor([[1.0, 1.0, 1.0, 1.0]])) + ) + + smoothed_targets = crit.smooth_targets(valid_targets, 4) + self.assertTrue( + torch.allclose(smoothed_targets, torch.tensor([[0.25, 0.25, 0.25, 0.25]])) + ) + + def test_smoothing_mixed_one_hot_targets(self): + config = { + "name": "label_smoothing_cross_entropy", + "ignore_index": -1, + "smoothing_param": 0.5, + } + crit = build_loss(config) + targets = torch.tensor([[1, 1, 1, 1, 1], [1, 0, 0, 0, 1]]) + self.assertTrue(isinstance(crit, LabelSmoothingCrossEntropyLoss)) + valid_targets = crit.compute_valid_targets(targets, 5) + self.assertTrue( + torch.allclose( + valid_targets, + torch.tensor([[1.0, 1.0, 1.0, 1.0, 1.0], [1.0, 0.0, 0.0, 0.0, 1.0]]), + ) + ) + smoothed_targets = crit.smooth_targets(valid_targets, 5) + self.assertTrue( + torch.allclose( + smoothed_targets, + torch.tensor( + [ + [0.2, 0.2, 0.2, 0.2, 0.2], + [6 / 15, 1 / 15, 1 / 15, 1 / 15, 6 / 15], + ] + ), + ) + ) + + def test_smoothing_class_targets(self): + config = { + "name": "label_smoothing_cross_entropy", + "ignore_index": -1, + "smoothing_param": 0.5, + } + crit = build_loss(config) + targets = torch.tensor([4, -1]) + self.assertTrue(isinstance(crit, LabelSmoothingCrossEntropyLoss)) + valid_targets = crit.compute_valid_targets(targets, 5) + self.assertTrue( + torch.allclose( + valid_targets, + torch.tensor([[0.0, 0.0, 0.0, 0.0, 1.0], [0.0, 0.0, 0.0, 0.0, 0.0]]), + ) + ) + smoothed_targets = crit.smooth_targets(valid_targets, 5) + self.assertTrue( + torch.allclose( + smoothed_targets, + torch.tensor( + [ + [1 / 15, 1 / 15, 1 / 15, 1 / 15, 11 / 15], + [0.2, 0.2, 0.2, 0.2, 0.2], + ] + ), + ) + ) + + def test_unnormalized_label_smoothing_cross_entropy(self): + config = { + "name": "label_smoothing_cross_entropy", + "ignore_index": -1, + "smoothing_param": 0.5, + } + crit = LabelSmoothingCrossEntropyLoss.from_config(config) + outputs = torch.tensor([[0.0, 7.0, 0.0, 0.0, 2.0]]) + targets = torch.tensor([[0, 0, 0, 0, 1]]) + self.assertAlmostEqual(crit(outputs, targets).item(), 5.07609558) + + def test_ignore_index_label_smoothing_cross_entropy(self): + config = { + "name": "label_smoothing_cross_entropy", + "ignore_index": -1, + "smoothing_param": 0.2, + } + crit = LabelSmoothingCrossEntropyLoss.from_config(config) + outputs = torch.tensor([[0.0, 7.0]]) + targets = torch.tensor([[-1]]) + self.assertAlmostEqual(crit(outputs, targets).item(), 3.50090909) + + def test_class_integer_label_smoothing_cross_entropy(self): + config = { + "name": "label_smoothing_cross_entropy", + "ignore_index": -1, + "smoothing_param": 0.2, + } + crit = LabelSmoothingCrossEntropyLoss.from_config(config) + outputs = torch.tensor([[1.0, 2.0], [0.0, 2.0]]) + targets = torch.tensor([[0], [1]]) + self.assertAlmostEqual(crit(outputs, targets).item(), 0.76176142) + + def test_deep_copy(self): + config = { + "name": "label_smoothing_cross_entropy", + "ignore_index": -1, + "smoothing_param": 0.5, + } + crit = build_loss(config) + self.assertTrue(isinstance(crit, LabelSmoothingCrossEntropyLoss)) + outputs = torch.tensor([[0.0, 7.0, 0.0, 0.0, 2.0]]) + targets = torch.tensor([[0, 0, 0, 0, 1]]) + crit(outputs, targets) + + crit2 = copy.deepcopy(crit) + self.assertAlmostEqual(crit2(outputs, targets).item(), 5.07609558) diff --git a/test/losses_multi_output_sum_loss_test.py b/test/losses_multi_output_sum_loss_test.py new file mode 100644 index 0000000000..1bd83488fc --- /dev/null +++ b/test/losses_multi_output_sum_loss_test.py @@ -0,0 +1,42 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import unittest + +import torch +from classy_vision.losses import ( + ClassyLoss, + MultiOutputSumLoss, + build_loss, + register_loss, +) + + +@register_loss("mock_1") +class MockLoss1(ClassyLoss): + def forward(self, pred, target): + return torch.tensor(1.0) + + @classmethod + def from_config(cls, config): + return cls() + + +class TestMultiOutputSumLoss(unittest.TestCase): + def test_multi_output_sum_loss(self): + config = {"name": "multi_output_sum_loss", "loss": {"name": "mock_1"}} + crit = build_loss(config) + self.assertTrue(isinstance(crit, MultiOutputSumLoss)) + + # test with a single output + output = torch.tensor([1.0, 2.3]) + target = torch.tensor(1.0) + self.assertAlmostEqual(crit(output, target).item(), 1.0) + + # test with a list of outputs + output = [torch.tensor([1.2, 3.2])] * 5 + target = torch.tensor(2.3) + self.assertAlmostEqual(crit(output, target).item(), 5.0) diff --git a/test/losses_soft_target_cross_entropy_loss_test.py b/test/losses_soft_target_cross_entropy_loss_test.py new file mode 100644 index 0000000000..47374ecdb3 --- /dev/null +++ b/test/losses_soft_target_cross_entropy_loss_test.py @@ -0,0 +1,84 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import copy +import unittest + +import torch +from classy_vision.losses import SoftTargetCrossEntropyLoss, build_loss + + +class TestSoftTargetCrossEntropyLoss(unittest.TestCase): + def _get_config(self): + return { + "name": "soft_target_cross_entropy", + "ignore_index": -1, + "reduction": "mean", + } + + def _get_outputs(self): + return torch.tensor([[1.0, 7.0, 0.0, 0.0, 2.0]]) + + def _get_targets(self): + return torch.tensor([[1, 0, 0, 0, 1]]) + + def _get_loss(self): + return 5.51097965 + + def test_build_soft_target_cross_entropy(self): + config = self._get_config() + crit = build_loss(config) + self.assertTrue(isinstance(crit, SoftTargetCrossEntropyLoss)) + self.assertEqual(crit._ignore_index, -1) + self.assertEqual(crit._reduction, "mean") + + def test_soft_target_cross_entropy(self): + config = self._get_config() + crit = SoftTargetCrossEntropyLoss.from_config(config) + outputs = self._get_outputs() + targets = self._get_targets() + self.assertAlmostEqual(crit(outputs, targets).item(), self._get_loss()) + + # Verify ignore index works + outputs = self._get_outputs() + targets = torch.tensor([[-1, 0, 0, 0, 1]]) + self.assertAlmostEqual(crit(outputs, targets).item(), 5.01097918) + + def test_unnormalized_soft_target_cross_entropy(self): + config = { + "name": "soft_target_cross_entropy", + "ignore_index": -1, + "reduction": "mean", + "normalize_targets": None, + } + crit = SoftTargetCrossEntropyLoss.from_config(config) + outputs = self._get_outputs() + targets = self._get_targets() + self.assertAlmostEqual(crit(outputs, targets).item(), 11.0219593) + + # Verify ignore index works + outputs = self._get_outputs() + targets = torch.tensor([[-1, 0, 0, 0, 1]]) + self.assertAlmostEqual(crit(outputs, targets).item(), 5.01097965) + + def test_ignore_row(self): + # If a sample has no valid targets, it should be ignored in the reduction. + config = self._get_config() + crit = SoftTargetCrossEntropyLoss.from_config(config) + outputs = torch.tensor([[1.0, 7.0, 0.0, 0.0, 2.0], [4.0, 2.0, 1.0, 6.0, 0.5]]) + targets = torch.tensor([[1, 0, 0, 0, 1], [-1, -1, -1, -1, -1]]) + self.assertAlmostEqual(crit(outputs, targets).item(), self._get_loss()) + + def test_deep_copy(self): + config = self._get_config() + crit = build_loss(config) + self.assertTrue(isinstance(crit, SoftTargetCrossEntropyLoss)) + outputs = self._get_outputs() + targets = self._get_targets() + crit(outputs, targets) + + crit2 = copy.deepcopy(crit) + self.assertAlmostEqual(crit2(outputs, targets).item(), self._get_loss()) diff --git a/test/losses_sum_arbitrary_loss_test.py b/test/losses_sum_arbitrary_loss_test.py new file mode 100644 index 0000000000..3aa1bd0442 --- /dev/null +++ b/test/losses_sum_arbitrary_loss_test.py @@ -0,0 +1,101 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import copy +import unittest + +import torch +from classy_vision.losses import ClassyLoss, SumArbitraryLoss, build_loss, register_loss + + +@register_loss("mock_a") +class MockLoss1(ClassyLoss): + def forward(self, pred, target): + return torch.tensor(1.0) + + @classmethod + def from_config(cls, config): + return cls() + + +@register_loss("mock_b") +class MockLoss2(ClassyLoss): + def forward(self, pred, target): + return torch.tensor(2.0) + + @classmethod + def from_config(cls, config): + return cls() + + +@register_loss("mock_c") +class MockLoss3(ClassyLoss): + def forward(self, pred, target): + return torch.tensor(3.0) + + @classmethod + def from_config(cls, config): + return cls() + + +class TestSumArbitraryLoss(unittest.TestCase): + def _get_config(self): + return { + "name": "sum_arbitrary", + "weights": [1.0, 1.0, 1.0], + "losses": [{"name": "mock_a"}, {"name": "mock_b"}, {"name": "mock_c"}], + } + + def _get_outputs(self): + return torch.tensor([[2.0, 8.0]]) + + def _get_targets(self): + return torch.tensor([1]) + + def test_build_sum_arbitrary(self): + config = self._get_config() + crit = build_loss(config) + self.assertTrue(isinstance(crit, SumArbitraryLoss)) + self.assertAlmostEqual(crit.weights, [1.0, 1.0, 1.0]) + mod_list = [MockLoss1, MockLoss2, MockLoss3] + for idx, crit_type in enumerate(mod_list): + self.assertTrue(isinstance(crit.losses[idx], crit_type)) + + def test_sum_arbitrary(self): + config = self._get_config() + crit = SumArbitraryLoss.from_config(config) + outputs = self._get_outputs() + targets = self._get_targets() + self.assertAlmostEqual(crit(outputs, targets).item(), 1.0 + 2.0 + 3.0) + + # Verify changing losses works + new_config = copy.deepcopy(config) + new_config.update( + {"losses": [{"name": "mock_a"}, {"name": "mock_b"}], "weights": [1.0, 1.0]} + ) + crit = SumArbitraryLoss.from_config(new_config) + self.assertAlmostEqual(crit(outputs, targets).item(), 1.0 + 2.0) + + # Verify changing weights works + new_config = copy.deepcopy(config) + new_config.update({"weights": [1.0, 2.0, 3.0]}) + crit = SumArbitraryLoss.from_config(new_config) + self.assertAlmostEqual( + crit(outputs, targets).item(), 1.0 + 2.0 * 2.0 + 3.0 * 3.0 + ) + + def test_deep_copy(self): + config = self._get_config() + crit1 = build_loss(config) + self.assertTrue(isinstance(crit1, SumArbitraryLoss)) + outputs = self._get_outputs() + targets = self._get_targets() + crit1(outputs, targets) + + crit2 = copy.deepcopy(crit1) + self.assertAlmostEqual( + crit1(outputs, targets).item(), crit2(outputs, targets).item() + ) diff --git a/test/losses_test.py b/test/losses_test.py new file mode 100644 index 0000000000..5f8796aa74 --- /dev/null +++ b/test/losses_test.py @@ -0,0 +1,61 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import unittest + +import torch +from classy_vision.losses import build_loss + + +class CriterionsTest(unittest.TestCase): + """ + Test that build_transform is able to build torch losses correctly. + """ + + def _test_loss(self, config, output, target, expected_loss): + # test that we are able to build losses from torch.nn.modules.loss + # and that they work correctly + + crit = build_loss(config) + + # test that the weights are set correctly + self.assertAlmostEqual(crit.weight.numpy().tolist(), [1.0, 1.0]) + + # test that the loss is computed correctly + self.assertAlmostEqual(crit(output, target).item(), expected_loss) + + # verify ignore index works + if "ignore_index" in config: + self.assertAlmostEqual(crit(output, torch.tensor([-1])).item(), 0.0) + + def test_cross_entropy_loss(self): + """ + Test CrossEntropyLoss + """ + config = { + "name": "CrossEntropyLoss", + "weight": [1.0, 1.0], + "ignore_index": -1, + "reduction": "mean", + } + output = torch.tensor([[9.0, 1.0]]) + target = torch.tensor([1]) + expected_loss = 8.000335693359375 + self._test_loss(config, output, target, expected_loss) + + def test_bce_with_logits_loss(self): + """ + Test BCEWithLogitsLoss + """ + config = { + "name": "BCEWithLogitsLoss", + "weight": [1.0, 1.0], + "reduction": "mean", + } + output = torch.tensor([0.999, 0.999]) + target = torch.tensor([1.0, 1.0]) + expected_loss = 0.313530727260701 + self._test_loss(config, output, target, expected_loss) diff --git a/test/manual/hooks_model_complexity_hook_test.py b/test/manual/hooks_model_complexity_hook_test.py new file mode 100644 index 0000000000..248a4a3eab --- /dev/null +++ b/test/manual/hooks_model_complexity_hook_test.py @@ -0,0 +1,63 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import re +import unittest +from test.generic.config_utils import get_test_classy_task, get_test_model_configs + +from classy_vision.hooks import ModelComplexityHook +from classy_vision.models import build_model + + +class TestModelComplexityHook(unittest.TestCase): + def test_model_complexity(self) -> None: + """ + Test that the number of parameters and the FLOPs are calcuated correctly. + """ + model_configs = get_test_model_configs() + expected_mega_flops = [4122, 4274, 106152] + expected_params = [25557032, 25028904, 43009448] + local_variables = {} + + task = get_test_classy_task() + task.prepare() + + # create a model complexity hook + model_complexity_hook = ModelComplexityHook() + + for model_config, mega_flops, params in zip( + model_configs, expected_mega_flops, expected_params + ): + model = build_model(model_config) + + task.base_model = model + + with self.assertLogs() as log_watcher: + model_complexity_hook.on_start(task, local_variables) + + # there should be 2 log statements generated + self.assertEqual(len(log_watcher.output), 2) + + # first statement - either the MFLOPs or a warning + if mega_flops is not None: + match = re.search( + r"FLOPs for forward pass: (?P[-+]?\d*\.\d+|\d+) MFLOPs", + log_watcher.output[0], + ) + self.assertIsNotNone(match) + self.assertEqual(mega_flops, float(match.group("mega_flops"))) + else: + self.assertIn( + "Model contains unsupported modules", log_watcher.output[0] + ) + + # second statement + match = re.search( + r"Number of parameters in model: (?P[-+]?\d*\.\d+|\d+)", + log_watcher.output[1], + ) + self.assertIsNotNone(match) + self.assertEqual(params, float(match.group("params"))) diff --git a/test/manual/hooks_model_tensorboard_hook_test.py b/test/manual/hooks_model_tensorboard_hook_test.py new file mode 100644 index 0000000000..dc01b40753 --- /dev/null +++ b/test/manual/hooks_model_tensorboard_hook_test.py @@ -0,0 +1,53 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import unittest +import unittest.mock as mock +from test.generic.config_utils import get_test_classy_task, get_test_model_configs + +from classy_vision.hooks import ModelTensorboardHook +from classy_vision.models import build_model +from tensorboardX import SummaryWriter + + +class TestModelTensorboardHook(unittest.TestCase): + @mock.patch("classy_vision.hooks.model_tensorboard_hook.is_master") + def test_writer(self, mock_is_master_func: mock.MagicMock) -> None: + """ + Tests that the tensorboard writer calls SummaryWriter with the model + iff is_master() is True. + """ + mock_summary_writer = mock.create_autospec(SummaryWriter, instance=True) + + task = get_test_classy_task() + task.prepare() + + for master in [False, True]: + mock_is_master_func.return_value = master + model_configs = get_test_model_configs() + local_variables = {} + + for model_config in model_configs: + model = build_model(model_config) + task.base_model = model + + # create a model tensorboard hook + model_tensorboard_hook = ModelTensorboardHook(mock_summary_writer) + + with self.assertLogs(): + model_tensorboard_hook.on_start(task, local_variables) + + if master: + # SummaryWriter should have been init-ed with the correct + # add_graph should be called once with model as the first arg + mock_summary_writer.add_graph.assert_called_once() + self.assertEqual( + mock_summary_writer.add_graph.call_args[0][0], model + ) + else: + # add_graph shouldn't be called since is_master() is False + mock_summary_writer.add_graph.assert_not_called() + mock_summary_writer.reset_mock() diff --git a/test/manual/hooks_progress_bar_hook_test.py b/test/manual/hooks_progress_bar_hook_test.py new file mode 100644 index 0000000000..ad3b8df6a7 --- /dev/null +++ b/test/manual/hooks_progress_bar_hook_test.py @@ -0,0 +1,89 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import unittest +import unittest.mock as mock +from test.generic.config_utils import get_test_classy_task + +import progressbar +from classy_vision.hooks import ProgressBarHook + + +class TestProgressBarHook(unittest.TestCase): + @mock.patch("classy_vision.hooks.progress_bar_hook.progressbar") + @mock.patch("classy_vision.hooks.progress_bar_hook.is_master") + def test_progress_bar( + self, mock_is_master: mock.MagicMock, mock_progressbar_pkg: mock.MagicMock + ) -> None: + """ + Tests that the progress bar is created, updated and destroyed correctly. + """ + mock_progress_bar = mock.create_autospec(progressbar.ProgressBar, instance=True) + mock_progressbar_pkg.ProgressBar.return_value = mock_progress_bar + + mock_is_master.return_value = True + + local_variables = {} + + task = get_test_classy_task() + task.prepare() + task.advance_phase() + + num_batches = task.num_batches_per_phase + # make sure we are checking at least one batch + self.assertGreater(num_batches, 0) + + # create a progress bar hook + progress_bar_hook = ProgressBarHook() + + # progressbar.ProgressBar should be init-ed with num_batches + progress_bar_hook.on_phase_start(task, local_variables) + mock_progressbar_pkg.ProgressBar.assert_called_once_with(num_batches) + mock_progress_bar.start.assert_called_once_with() + mock_progress_bar.start.reset_mock() + mock_progressbar_pkg.ProgressBar.reset_mock() + + # on_update should update the progress bar correctly + for i in range(num_batches): + progress_bar_hook.on_update(task, local_variables) + mock_progress_bar.update.assert_called_once_with(i + 1) + mock_progress_bar.update.reset_mock() + + # check that even if on_update is called again, the progress bar is + # only updated with num_batches + for _ in range(num_batches): + progress_bar_hook.on_update(task, local_variables) + mock_progress_bar.update.assert_called_once_with(num_batches) + mock_progress_bar.update.reset_mock() + + # finish should be called on the progress bar + progress_bar_hook.on_phase_end(task, local_variables) + mock_progress_bar.finish.assert_called_once_with() + mock_progress_bar.finish.reset_mock() + + # check that even if the progress bar isn't created, the code doesn't + # crash + progress_bar_hook = ProgressBarHook() + try: + progress_bar_hook.on_update(task, local_variables) + progress_bar_hook.on_phase_end(task, local_variables) + except Exception as e: + self.fail( + "Received Exception when on_phase_start() isn't called: {}".format(e) + ) + mock_progressbar_pkg.ProgressBar.assert_not_called() + + # check that a progress bar is not created if is_master() returns False + mock_is_master.return_value = False + progress_bar_hook = ProgressBarHook() + try: + progress_bar_hook.on_phase_start(task, local_variables) + progress_bar_hook.on_update(task, local_variables) + progress_bar_hook.on_phase_end(task, local_variables) + except Exception as e: + self.fail("Received Exception when is_master() is False: {}".format(e)) + self.assertIsNone(progress_bar_hook.progress_bar) + mock_progressbar_pkg.ProgressBar.assert_not_called() diff --git a/test/manual/hooks_tensorboard_plot_hook_test.py b/test/manual/hooks_tensorboard_plot_hook_test.py new file mode 100644 index 0000000000..d98e28ae35 --- /dev/null +++ b/test/manual/hooks_tensorboard_plot_hook_test.py @@ -0,0 +1,158 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import logging +import shutil +import tempfile +import unittest +import unittest.mock as mock +from itertools import product +from test.generic.config_utils import get_test_mlp_task_config, get_test_task_config + +from classy_vision.hooks import TensorboardPlotHook +from classy_vision.optim.param_scheduler import UpdateInterval +from classy_vision.tasks import build_task +from classy_vision.trainer import LocalTrainer +from tensorboardX import SummaryWriter + + +class TestTensorboardPlotHook(unittest.TestCase): + def setUp(self) -> None: + self.base_dir = tempfile.mkdtemp() + + def tearDown(self) -> None: + shutil.rmtree(self.base_dir) + + @mock.patch("classy_vision.hooks.tensorboard_plot_hook.is_master") + def test_writer(self, mock_is_master_func: mock.MagicMock) -> None: + """ + Tests that the tensorboard writer writes the correct scalars to SummaryWriter + iff is_master() is True. + """ + for phase_idx, master in product([0, 1, 2], [True, False]): + train, phase_type = ( + (True, "train") if phase_idx % 2 == 0 else (False, "test") + ) + mock_is_master_func.return_value = master + + # set up the task and state + config = get_test_task_config() + config["dataset"]["train"]["batchsize_per_replica"] = 2 + config["dataset"]["test"]["batchsize_per_replica"] = 5 + task = build_task(config) + task.prepare() + task.phase_idx = phase_idx + task.train = train + + losses = [1.23, 4.45, 12.3, 3.4] + + local_variables = {} + + summary_writer = SummaryWriter(self.base_dir) + # create a spy on top of summary_writer + summary_writer = mock.MagicMock(wraps=summary_writer) + + # create a loss lr tensorboard hook + tensorboard_plot_hook = TensorboardPlotHook(summary_writer) + + # test that the hook logs a warning and doesn't write anything to + # the writer if on_phase_start() is not called for initialization + # before on_update() is called. + with self.assertLogs() as log_watcher: + tensorboard_plot_hook.on_update(task, local_variables) + + self.assertTrue( + len(log_watcher.records) == 1 + and log_watcher.records[0].levelno == logging.WARN + and "learning_rates is not initialized" in log_watcher.output[0] + ) + + # test that the hook logs a warning and doesn't write anything to + # the writer if on_phase_start() is not called for initialization + # if on_phase_end() is called. + with self.assertLogs() as log_watcher: + tensorboard_plot_hook.on_phase_end(task, local_variables) + + self.assertTrue( + len(log_watcher.records) == 1 + and log_watcher.records[0].levelno == logging.WARN + and "learning_rates is not initialized" in log_watcher.output[0] + ) + summary_writer.add_scalar.reset_mock() + + # run the hook in the correct order + tensorboard_plot_hook.on_phase_start(task, local_variables) + + for loss in losses: + task.losses.append(loss) + tensorboard_plot_hook.on_update(task, local_variables) + + tensorboard_plot_hook.on_phase_end(task, local_variables) + + if master: + # add_scalar() should have been called with the right scalars + if train: + loss_key = f"{phase_type}_loss" + learning_rate_key = f"{phase_type}_learning_rate_updates" + summary_writer.add_scalar.assert_any_call( + loss_key, mock.ANY, global_step=mock.ANY, walltime=mock.ANY + ) + summary_writer.add_scalar.assert_any_call( + learning_rate_key, + mock.ANY, + global_step=mock.ANY, + walltime=mock.ANY, + ) + avg_loss_key = f"avg_{phase_type}_loss" + summary_writer.add_scalar.assert_any_call( + avg_loss_key, mock.ANY, global_step=mock.ANY + ) + for meter in task.meters: + for name in meter.value: + meter_key = f"{phase_type}_{meter.name}_{name}" + summary_writer.add_scalar.assert_any_call( + meter_key, mock.ANY, global_step=mock.ANY + ) + else: + # add_scalar() shouldn't be called since is_master() is False + summary_writer.add_scalar.assert_not_called() + summary_writer.add_scalar.reset_mock() + + def test_logged_lr(self): + # Mock LR scheduler + def scheduler_mock(where): + return where + + mock_lr_scheduler = mock.Mock(side_effect=scheduler_mock) + mock_lr_scheduler.update_interval = UpdateInterval.STEP + + # Mock Logging + class DummySummaryWriter(object): + def __init__(self): + self.scalar_logs = {} + + def add_scalar(self, key, value, global_step=None, walltime=None) -> None: + self.scalar_logs[key] = self.scalar_logs.get(key, []) + [value] + + config = get_test_mlp_task_config() + config["num_epochs"] = 3 + config["dataset"]["train"]["batchsize_per_replica"] = 5 + config["dataset"]["test"]["batchsize_per_replica"] = 5 + task = build_task(config) + + writer = DummySummaryWriter() + hook = TensorboardPlotHook(writer) + task.set_hooks([hook]) + task.optimizer.lr_scheduler = mock_lr_scheduler + + trainer = LocalTrainer() + trainer.train(task) + + # We have 10 samples, batch size is 5. Each epoch is done in two steps. + self.assertEqual( + writer.scalar_logs["train_learning_rate_updates"], + [0, 1 / 6, 2 / 6, 3 / 6, 4 / 6, 5 / 6], + ) diff --git a/test/manual/hooks_visdom_hook_test.py b/test/manual/hooks_visdom_hook_test.py new file mode 100644 index 0000000000..f44096da2a --- /dev/null +++ b/test/manual/hooks_visdom_hook_test.py @@ -0,0 +1,121 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import copy +import unittest +import unittest.mock as mock +from itertools import product +from test.generic.config_utils import get_test_task_config + +from classy_vision.hooks import VisdomHook +from classy_vision.tasks import build_task +from visdom import Visdom + + +class TestVisdomHook(unittest.TestCase): + @mock.patch("classy_vision.hooks.visdom_hook.is_master") + @mock.patch("classy_vision.hooks.visdom_hook.Visdom", autospec=True) + def test_visdom( + self, mock_visdom_cls: mock.MagicMock, mock_is_master: mock.MagicMock + ) -> None: + """ + Tests that visdom is populated with plots. + """ + mock_visdom = mock.create_autospec(Visdom, instance=True) + mock_visdom_cls.return_value = mock_visdom + + local_variables = {} + + # set up the task and state + config = get_test_task_config() + config["dataset"]["train"]["batchsize_per_replica"] = 2 + config["dataset"]["test"]["batchsize_per_replica"] = 5 + task = build_task(config) + task.prepare() + + losses = [1.2, 2.3, 1.23, 2.33] + loss_vals = {"train": 0.8825, "test": 0.353} + + task.losses = losses + + visdom_server = "localhost" + visdom_port = 8097 + + for master, visdom_conn in product([False, True], [False, True]): + mock_is_master.return_value = master + mock_visdom.check_connection.return_value = visdom_conn + + # create a visdom hook + visdom_hook = VisdomHook(visdom_server, visdom_port) + + mock_visdom_cls.assert_called_once() + mock_visdom_cls.reset_mock() + + counts = {"train": 0, "test": 0} + count = 0 + + for phase_idx in range(10): + train = phase_idx % 2 == 0 + task.train = train + phase_type = "train" if train else "test" + + counts[phase_type] += 1 + count += 1 + + # test that the metrics don't change if losses is empty and that + # visdom.line() is not called + task.losses = [] + original_metrics = copy.deepcopy(visdom_hook.metrics) + visdom_hook.on_phase_end(task, local_variables) + self.assertDictEqual(original_metrics, visdom_hook.metrics) + mock_visdom.line.assert_not_called() + + # test that the metrics are updated correctly when losses + # is non empty + task.losses = [loss * count for loss in losses] + visdom_hook.on_phase_end(task, local_variables) + + # every meter should be present and should have the correct length + for meter in task.meters: + for key in meter.value: + key = phase_type + "_" + meter.name + "_" + key + self.assertTrue( + key in visdom_hook.metrics + and type(visdom_hook.metrics[key]) == list + and len(visdom_hook.metrics[key]) == counts[phase_type] + ) + + # the loss metric should be calculated correctly + loss_key = phase_type + "_loss" + self.assertTrue( + loss_key in visdom_hook.metrics + and type(visdom_hook.metrics[loss_key]) == list + and len(visdom_hook.metrics[loss_key]) == counts[phase_type] + ) + self.assertAlmostEqual( + visdom_hook.metrics[loss_key][-1], + loss_vals[phase_type] * count, + places=4, + ) + + # the lr metric should be correct + lr_key = phase_type + "_learning_rate" + self.assertTrue( + lr_key in visdom_hook.metrics + and type(visdom_hook.metrics[lr_key]) == list + and len(visdom_hook.metrics[lr_key]) == counts[phase_type] + ) + self.assertAlmostEqual( + visdom_hook.metrics[lr_key][-1], task.optimizer.lr, places=4 + ) + + if master and not train and visdom_conn: + # visdom.line() should be called once + mock_visdom.line.assert_called_once() + mock_visdom.line.reset_mock() + else: + # visdom.line() should not be called + mock_visdom.line.assert_not_called() diff --git a/test/manual/models_classy_vision_model_test.py b/test/manual/models_classy_vision_model_test.py new file mode 100644 index 0000000000..58857d40c7 --- /dev/null +++ b/test/manual/models_classy_vision_model_test.py @@ -0,0 +1,113 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import copy +import unittest +from collections import defaultdict +from test.generic.config_utils import get_test_model_configs +from test.generic.utils import compare_model_state + +import torch +from classy_vision.heads import build_head +from classy_vision.models import ClassyModel, build_model + + +class TestClassyModel(unittest.TestCase): + model_configs = get_test_model_configs() + + def _get_config(self, model_config): + return { + "name": "classification_task", + "num_epochs": 12, + "loss": {"name": "test_loss"}, + "dataset": { + "name": "imagenet", + "batchsize_per_replica": 8, + "use_pairs": False, + "num_samples_per_phase": None, + "use_shuffle": {"train": True, "test": False}, + }, + "meters": [], + "model": model_config, + "optimizer": {"name": "test_opt"}, + } + + def _compare_model_state(self, state, state2): + compare_model_state(self, state, state2) + + def test_build_model(self): + for cfg in self.model_configs: + config = self._get_config(cfg) + model = build_model(config["model"]) + self.assertTrue(isinstance(model, ClassyModel)) + self.assertTrue( + type(model.input_shape) == tuple and len(model.input_shape) == 3 + ) + self.assertTrue( + type(model.output_shape) == tuple and len(model.output_shape) == 2 + ) + self.assertTrue(type(model.model_depth) == int) + + def test_get_set_state(self): + config = self._get_config(self.model_configs[0]) + model = build_model(config["model"]) + fake_input = torch.Tensor(1, 3, 224, 224).float() + model.eval() + state = model.get_classy_state() + with torch.no_grad(): + output = model(fake_input) + + model2 = build_model(config["model"]) + model2.set_classy_state(state) + + # compare the states + state2 = model2.get_classy_state() + self._compare_model_state(state, state2) + + model2.eval() + with torch.no_grad(): + output2 = model2(fake_input) + self.assertTrue(torch.allclose(output, output2)) + + # test deep_copy by assigning a deep copied state to model2 + # and then changing the original model's state + state = model.get_classy_state(deep_copy=True) + + model3 = build_model(config["model"]) + state3 = model3.get_classy_state() + + # assign model2's state to model's and also re-assign model's state + model2.set_classy_state(state) + model.set_classy_state(state3) + + # compare the states + state2 = model2.get_classy_state() + self._compare_model_state(state, state2) + + def test_get_set_head_states(self): + config = copy.deepcopy(self._get_config(self.model_configs[0])) + head_configs = config["model"]["heads"] + config["model"]["heads"] = [] + model = build_model(config["model"]) + trunk_state = model.get_classy_state() + + heads = defaultdict(dict) + for head_config in head_configs: + head = build_head(head_config) + heads[head_config["fork_block"]][head.unique_id] = head + model.set_heads(heads) + model_state = model.get_classy_state() + + # the heads should be the same as we set + self.assertEqual(len(heads), len(model.get_heads())) + for block_name, hs in model.get_heads().items(): + self.assertEqual(hs, heads[block_name]) + + model._clear_heads() + self._compare_model_state(model.get_classy_state(), trunk_state) + + model.set_heads(heads) + self._compare_model_state(model.get_classy_state(), model_state) diff --git a/test/meters_accuracy_meter_test.py b/test/meters_accuracy_meter_test.py new file mode 100644 index 0000000000..055b95a1b0 --- /dev/null +++ b/test/meters_accuracy_meter_test.py @@ -0,0 +1,137 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from test.generic.meter_test_utils import ClassificationMeterTest + +import torch +from classy_vision import meters +from classy_vision.meters import AccuracyMeter + + +class TestAccuracyMeter(ClassificationMeterTest): + def test_accuracy_meter_registry(self): + accuracy_meter = meters.build_meter({"name": "accuracy", "topk": [1, 2]}) + self.assertTrue(isinstance(accuracy_meter, AccuracyMeter)) + + def test_single_meter_update_and_reset(self): + """ + This test verifies that the meter works as expected on a single + update + reset + same single update. + """ + meter = AccuracyMeter(topk=[1, 2]) + + # Batchsize = 3, num classes = 3, score is a value in {1, 2, + # 3}...3 is the highest score + model_output = torch.tensor([[3, 2, 1], [3, 1, 2], [1, 3, 2]]) + + # Class 0 is the correct class for sample 1, class 2 for sample 2, etc + target = torch.tensor([0, 1, 2]) + + # Only the first sample has top class correct, first and third + # sample have correct class in top 2 + expected_value = {"top_1": 1 / 3.0, "top_2": 2 / 3.0} + + self.meter_update_and_reset_test(meter, model_output, target, expected_value) + + def test_double_meter_update_and_reset(self): + meter = AccuracyMeter(topk=[1, 2]) + + # Batchsize = 3, num classes = 3, score is a value in {1, 2, + # 3}...3 is the highest score...two batches in this test + model_outputs = [ + torch.tensor([[3, 2, 1], [3, 1, 2], [1, 3, 2]]), + torch.tensor([[3, 2, 1], [1, 3, 2], [1, 3, 2]]), + ] + + # Class 0 is the correct class for sample 1, class 2 for + # sample 2, etc, in both batches + targets = [torch.tensor([0, 1, 2]), torch.tensor([0, 1, 2])] + + # First batch has top-1 accuracy of 1/3.0, top-2 accuracy of 2/3.0 + # Second batch has top-1 accuracy of 2/3.0, top-2 accuracy of 3/3.0 + expected_value = {"top_1": 3 / 6.0, "top_2": 5 / 6.0} + + self.meter_update_and_reset_test(meter, model_outputs, targets, expected_value) + + def test_meter_invalid_model_output(self): + meter = AccuracyMeter(topk=[1, 2]) + # This model output has 3 dimensions instead of expected 2 + model_output = torch.tensor( + [[[3, 2, 1], [1, 2, 3]], [[-1, -3, -4], [-10, -90, -100]]] + ) + target = torch.tensor([0, 1, 2]) + + self.meter_invalid_meter_input_test(meter, model_output, target) + + def test_meter_invalid_target(self): + meter = AccuracyMeter(topk=[1, 2]) + model_output = torch.tensor([[3, 2, 1], [3, 1, 2], [1, 3, 2]]) + # Target has 2 dimensions instead of expected 1 + target = torch.tensor([[0, 1, 2], [0, 1, 2]]) + + self.meter_invalid_meter_input_test(meter, model_output, target) + + def test_meter_invalid_topk(self): + meter = AccuracyMeter(topk=[1, 5]) + model_output = torch.tensor([[3, 2, 1], [3, 1, 2], [1, 3, 2]]) + target = torch.tensor([0, 1, 2]) + + self.meter_invalid_meter_input_test(meter, model_output, target) + + def test_meter_get_set_classy_state_test(self): + # In this test we update meter0 with model_output0 & target0 + # and we update meter1 with model_output1 & target1 then + # transfer the state from meter1 to meter0 and validate they + # give same expected value. + # Expected value is the expected value of meter1 + meters = [AccuracyMeter(topk=[1, 2]), AccuracyMeter(topk=[1, 2])] + + # Batchsize = 3, num classes = 3, score is a value in {1, 2, + # 3}...3 is the highest score + model_outputs = [ + torch.tensor([[1, 2, 3], [1, 2, 3], [2, 3, 1]]), + torch.tensor([[3, 2, 1], [3, 1, 2], [1, 3, 2]]), + ] + + # Class 0 is the correct class for sample 1, class 2 for sample 2, etc + targets = [torch.tensor([0, 1, 2]), torch.tensor([0, 1, 2])] + + # Value for second update + expected_value = {"top_1": 1 / 3.0, "top_2": 2 / 3.0} + + self.meter_get_set_classy_state_test( + meters, model_outputs, targets, expected_value + ) + + def test_meter_distributed(self): + # Meter0 will execute on one process, Meter1 on the other + meters = [AccuracyMeter(topk=[1, 2]), AccuracyMeter(topk=[1, 2])] + + # Batchsize = 3, num classes = 3, score is a value in {1, 2, + # 3}...3 is the highest score + model_outputs = [ + torch.tensor([[3, 2, 1], [3, 1, 2], [1, 3, 2]]), # Meter 0 + torch.tensor([[3, 2, 1], [1, 3, 2], [1, 3, 2]]), # Meter 1 + torch.tensor([[3, 2, 1], [3, 1, 2], [1, 3, 2]]), # Meter 0 + torch.tensor([[3, 2, 1], [1, 3, 2], [1, 3, 2]]), # Meter 1 + ] + + # Class 0 is the correct class for sample 1, class 2 for sample 2, etc + targets = [ + torch.tensor([0, 1, 2]), # Meter 0 + torch.tensor([0, 1, 2]), # Meter 1 + torch.tensor([0, 1, 2]), # Meter 0 + torch.tensor([0, 1, 2]), # Meter 1 + ] + + # In first two updates there are 3 correct top-2, 5 correct in top 2 + # The same occurs in the second two updates and is added to first + expected_values = [ + {"top_1": 3 / 6.0, "top_2": 5 / 6.0}, # After one update to each meter + {"top_1": 6 / 12.0, "top_2": 10 / 12.0}, # After two updates to each meter + ] + + self.meter_distributed_test(meters, model_outputs, targets, expected_values) diff --git a/test/meters_precision_meter_test.py b/test/meters_precision_meter_test.py new file mode 100644 index 0000000000..d87533b57a --- /dev/null +++ b/test/meters_precision_meter_test.py @@ -0,0 +1,191 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from test.generic.meter_test_utils import ClassificationMeterTest + +import torch +from classy_vision import meters +from classy_vision.meters import PrecisionAtKMeter + + +class TestPrecisionAtKMeter(ClassificationMeterTest): + def test_precision_meter_registry(self): + meter = meters.build_meter({"name": "precision_at_k", "topk": [1, 3]}) + self.assertTrue(isinstance(meter, PrecisionAtKMeter)) + + def test_single_meter_update_and_reset(self): + """ + This test verifies that the meter works as expected on a single + update + reset + same single update. + """ + meter = PrecisionAtKMeter(topk=[1, 2]) + + # Batchsize = 3, num classes = 3, score is probability of class + model_output = torch.tensor( + [ + [0.2, 0.4, 0.4], # top-1: 1/2, top-2: 1/2 + [0.2, 0.65, 0.15], # top-1: 1, top-2: 1/0 + [0.33, 0.33, 0.34], # top-1: 2, top-2: 2/0?1 + ] + ) + + # One-hot encoding, 1 = positive for class + # sample-1: 1, sample-2: 0, sample-3: 0,1,2 + target = torch.tensor([[0, 1, 0], [1, 0, 0], [1, 1, 1]]) + + # Note for ties, we select randomly, so we should not use ambiguous ties + expected_value = {"top_1": 2 / 3.0, "top_2": 4 / 6.0} + + self.meter_update_and_reset_test(meter, model_output, target, expected_value) + + def test_double_meter_update_and_reset(self): + meter = PrecisionAtKMeter(topk=[1, 2]) + + # Batchsize = 3, num classes = 3, score is probability of class + model_outputs = [ + torch.tensor([[0.3, 0.4, 0.3], [0.2, 0.65, 0.15], [0.33, 0.33, 0.34]]), + torch.tensor([[0.05, 0.4, 0.05], [0.15, 0.65, 0.2], [0.4, 0.2, 0.4]]), + ] + + # One-hot encoding, 1 = positive for class + # batch-1: sample-1: 1, sample-2: 0, sample-3: 0,1,2 + # batch-2: sample-1: 1, sample-2: 1, sample-3: 1 + targets = [ + torch.tensor([[0, 1, 0], [1, 0, 0], [1, 1, 1]]), + torch.tensor([[0, 1, 0], [0, 1, 0], [0, 1, 0]]), + ] + + # First batch has top-1 precision of 2/3.0, top-2 precision of 4/6.0 + # Second batch has top-1 precision of 2/3.0, top-2 precision of 2/6.0 + expected_value = {"top_1": 4 / 6.0, "top_2": 6 / 12.0} + + self.meter_update_and_reset_test(meter, model_outputs, targets, expected_value) + + def test_meter_invalid_model_output(self): + meter = PrecisionAtKMeter(topk=[1, 2]) + # This model output has 3 dimensions instead of expected 2 + model_output = torch.tensor( + [[[0.33, 0.33, 0.34], [1, 2, 3]], [[-1, -3, -4], [-10, -90, -100]]] + ) + target = torch.tensor([[0, 1, 0], [1, 0, 0], [1, 1, 1]]) + + self.meter_invalid_meter_input_test(meter, model_output, target) + + def test_meter_invalid_target(self): + meter = PrecisionAtKMeter(topk=[1, 2]) + model_output = torch.tensor( + [ + [0.2, 0.4, 0.4], # top-1: 1/2, top-2: 1/2 + [0.2, 0.65, 0.15], # top-1: 1, top-2: 1/0 + [0.33, 0.33, 0.34], # top-1: 2, top-2: 2/0/1 + ] + ) + # Target shape does not match model shape + target = torch.tensor([0, 1, 2]) + + self.meter_invalid_meter_input_test(meter, model_output, target) + + def test_meter_invalid_topk(self): + meter = PrecisionAtKMeter(topk=[1, 5]) + model_output = torch.tensor( + [ + [0.2, 0.4, 0.4], # top-1: 1/2, top-2: 1/2 + [0.2, 0.65, 0.15], # top-1: 1, top-2: 1/0 + [0.33, 0.33, 0.34], # top-1: 2, top-2: 2/0/1 + ] + ) + target = torch.tensor([[0, 1, 0], [1, 0, 0], [1, 1, 1]]) + + self.meter_invalid_meter_input_test(meter, model_output, target) + + def test_meter_get_set_classy_state_test(self): + # In this test we update meter0 with model_output0 & target0 + # and we update meter1 with model_output1 & target1 then + # transfer the state from meter1 to meter0 and validate they + # give same expected value. + # + # Expected value is the expected value of meter1 For this test + # to work, top-1 / top-2 values of meter0 / meter1 should be + # different + meters = [PrecisionAtKMeter(topk=[1, 2]), PrecisionAtKMeter(topk=[1, 2])] + model_outputs = [ + torch.tensor([[0.05, 0.4, 0.05], [0.2, 0.65, 0.15], [0.33, 0.33, 0.34]]), + torch.tensor([[0.05, 0.4, 0.05], [0.15, 0.65, 0.2], [0.4, 0.2, 0.4]]), + ] + targets = [ + torch.tensor([[0, 1, 0], [1, 0, 0], [1, 1, 0]]), + torch.tensor([[0, 1, 0], [0, 1, 0], [0, 1, 0]]), + ] + + # Second update's expected value + expected_value = {"top_1": 2 / 3.0, "top_2": 2 / 6.0} + + self.meter_get_set_classy_state_test( + meters, model_outputs, targets, expected_value + ) + + def test_meter_distributed(self): + # Meter0 will execute on one process, Meter1 on the other + meters = [PrecisionAtKMeter(topk=[1, 2]), PrecisionAtKMeter(topk=[1, 2])] + + # Batchsize = 3, num classes = 3, score is probability of class + model_outputs = [ + torch.tensor( + [[0.3, 0.4, 0.3], [0.2, 0.65, 0.15], [0.33, 0.33, 0.34]] + ), # Meter 0 + torch.tensor( + [[0.05, 0.4, 0.05], [0.15, 0.65, 0.2], [0.4, 0.2, 0.4]] + ), # Meter 1 + torch.tensor( + [[0.3, 0.4, 0.3], [0.2, 0.65, 0.15], [0.33, 0.33, 0.34]] + ), # Meter 0 + torch.tensor( + [[0.05, 0.4, 0.05], [0.15, 0.65, 0.2], [0.4, 0.2, 0.4]] + ), # Meter 1 + ] + + # Class 0 is the correct class for sample 1, class 2 for sample 2, etc + targets = [ + torch.tensor([[0, 1, 0], [1, 0, 0], [1, 1, 1]]), # Meter 0 + torch.tensor([[0, 1, 0], [0, 1, 0], [0, 1, 0]]), # Meter 1 + torch.tensor([[0, 1, 0], [1, 0, 0], [1, 1, 1]]), # Meter 0 + torch.tensor([[0, 1, 0], [0, 1, 0], [0, 1, 0]]), # Meter 1 + ] + + # In first two updates there are 4 correct top-1, 6 correct in top 2 + # The same occurs in the second two updates and is added to first + expected_values = [ + {"top_1": 4 / 6.0, "top_2": 6 / 12.0}, # After one update to each meter + {"top_1": 8 / 12.0, "top_2": 12 / 24.0}, # After two updates to each meter + ] + + self.meter_distributed_test(meters, model_outputs, targets, expected_values) + + def test_non_onehot_target(self): + """ + This test verifies that the meter works as expected on a single + update + reset + same single update. + """ + meter = PrecisionAtKMeter(topk=[1, 2], target_is_one_hot=False, num_classes=3) + + # Batchsize = 2, num classes = 3, score is probability of class + model_outputs = [ + torch.tensor([[0.05, 0.4, 0.05], [0.15, 0.65, 0.2], [0.4, 0.2, 0.4]]), + torch.tensor([[0.2, 0.4, 0.4], [0.2, 0.65, 0.15], [0.1, 0.8, 0.1]]), + ] + + # One-hot encoding, 1 = positive for class + targets = [ + torch.tensor([[1], [1], [1]]), # [[0, 1, 0], [0, 1, 0], [0, 1, 0]] + torch.tensor([[0], [1], [2]]), # [[1, 0, 0], [0, 1, 0], [0, 0, 1]] + ] + + # Note for ties, we select randomly, so we should not use ambiguous ties + # First batch has top-1 precision of 2/3.0, top-2 precision of 2/6.0 + # Second batch has top-1 precision of 1/3.0, top-2 precision of 1/6.0 + expected_value = {"top_1": 3 / 6.0, "top_2": 3 / 12.0} + + self.meter_update_and_reset_test(meter, model_outputs, targets, expected_value) diff --git a/test/meters_recall_meter_test.py b/test/meters_recall_meter_test.py new file mode 100644 index 0000000000..93789b9d65 --- /dev/null +++ b/test/meters_recall_meter_test.py @@ -0,0 +1,192 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from test.generic.meter_test_utils import ClassificationMeterTest + +import torch +from classy_vision import meters +from classy_vision.meters import RecallAtKMeter + + +class TestRecallAtKMeter(ClassificationMeterTest): + def test_recall_meter_registry(self): + meter = meters.build_meter({"name": "recall_at_k", "topk": [1, 3]}) + self.assertTrue(isinstance(meter, RecallAtKMeter)) + + def test_single_meter_update_and_reset(self): + """ + This test verifies that the meter works as expected on a single + update + reset + same single update. + """ + meter = RecallAtKMeter(topk=[1, 2]) + + # Batchsize = 3, num classes = 3, score is probability of class + model_output = torch.tensor( + [ + [0.2, 0.4, 0.4], # top-1: 1/2, top-2: 1/2 + [0.2, 0.65, 0.15], # top-1: 1, top-2: 1/0 + [0.33, 0.33, 0.34], # top-1: 2, top-2: 2/0?1 + ] + ) + + # One-hot encoding, 1 = positive for class + # sample-1: 1, sample-2: 0, sample-3: 0,1,2 + target = torch.tensor([[0, 1, 0], [1, 0, 0], [1, 1, 1]]) + + # Note for ties, we select randomly, so we should not use ambiguous ties + expected_value = {"top_1": 2 / 5.0, "top_2": 4 / 5.0} + + self.meter_update_and_reset_test(meter, model_output, target, expected_value) + + def test_double_meter_update_and_reset(self): + meter = RecallAtKMeter(topk=[1, 2]) + + # Batchsize = 3, num classes = 3, score is probability of class + model_outputs = [ + torch.tensor([[0.3, 0.4, 0.3], [0.2, 0.65, 0.15], [0.33, 0.33, 0.34]]), + torch.tensor([[0.05, 0.4, 0.05], [0.15, 0.65, 0.2], [0.4, 0.2, 0.4]]), + ] + + # One-hot encoding, 1 = positive for class + # batch-1: sample-1: 1, sample-2: 0, sample-3: 0,1,2 + # batch-2: sample-1: 1, sample-2: 1, sample-3: 1 + targets = [ + torch.tensor([[0, 1, 0], [1, 0, 0], [1, 1, 1]]), + torch.tensor([[0, 1, 0], [0, 1, 0], [0, 1, 0]]), + ] + + # First batch has top-1 recall of 2/5.0, top-2 recall of 4/5.0 + # Second batch has top-1 recall of 2/3.0, top-2 recall of 2/3.0 + expected_value = {"top_1": 4 / 8.0, "top_2": 6 / 8.0} + + self.meter_update_and_reset_test(meter, model_outputs, targets, expected_value) + + def test_meter_invalid_model_output(self): + meter = RecallAtKMeter(topk=[1, 2]) + # This model output has 3 dimensions instead of expected 2 + model_output = torch.tensor( + [[[0.33, 0.33, 0.34], [1, 2, 3]], [[-1, -3, -4], [-10, -90, -100]]] + ) + target = torch.tensor([[0, 1, 0], [1, 0, 0], [1, 1, 1]]) + + self.meter_invalid_meter_input_test(meter, model_output, target) + + def test_meter_invalid_target(self): + meter = RecallAtKMeter(topk=[1, 2]) + model_output = torch.tensor( + [ + [0.2, 0.4, 0.4], # top-1: 1/2, top-2: 1/2 + [0.2, 0.65, 0.15], # top-1: 1, top-2: 1/0 + [0.33, 0.33, 0.34], # top-1: 2, top-2: 2/0/1 + ] + ) + # Target shape does not match model shape + target = torch.tensor([0, 1, 2]) + + self.meter_invalid_meter_input_test(meter, model_output, target) + + def test_meter_invalid_topk(self): + meter = RecallAtKMeter(topk=[1, 5]) + model_output = torch.tensor( + [ + [0.2, 0.4, 0.4], # top-1: 1/2, top-2: 1/2 + [0.2, 0.65, 0.15], # top-1: 1, top-2: 1/0 + [0.33, 0.33, 0.34], # top-1: 2, top-2: 2/0/1 + ] + ) + target = torch.tensor([[0, 1, 0], [1, 0, 0], [1, 1, 1]]) + + self.meter_invalid_meter_input_test(meter, model_output, target) + + def test_meter_get_set_classy_state_test(self): + # In this test we update meter0 with model_output0 & target0 + # and we update meter1 with model_output1 & target1 then + # transfer the state from meter1 to meter0 and validate they + # give same expected value. + # + # Expected value is the expected value of meter1 For this test + # to work, top-1 / top-2 values of meter0 / meter1 should be + # different + meters = [RecallAtKMeter(topk=[1, 2]), RecallAtKMeter(topk=[1, 2])] + model_outputs = [ + torch.tensor([[0.05, 0.4, 0.05], [0.2, 0.65, 0.15], [0.33, 0.33, 0.34]]), + torch.tensor([[0.05, 0.4, 0.05], [0.15, 0.65, 0.2], [0.4, 0.2, 0.4]]), + ] + targets = [ + torch.tensor([[0, 1, 0], [1, 0, 0], [1, 1, 0]]), + torch.tensor([[0, 1, 0], [0, 1, 0], [0, 1, 0]]), + ] + + # Second update's expected value + expected_value = {"top_1": 2 / 3.0, "top_2": 2 / 3.0} + + self.meter_get_set_classy_state_test( + meters, model_outputs, targets, expected_value + ) + + def test_meter_distributed(self): + # Meter0 will execute on one process, Meter1 on the other + meters = [RecallAtKMeter(topk=[1, 2]), RecallAtKMeter(topk=[1, 2])] + + # Batchsize = 3, num classes = 3, score is probability of class + model_outputs = [ + torch.tensor( + [[0.3, 0.4, 0.3], [0.2, 0.65, 0.15], [0.33, 0.33, 0.34]] + ), # Meter 0 + torch.tensor( + [[0.05, 0.4, 0.05], [0.15, 0.65, 0.2], [0.4, 0.2, 0.4]] + ), # Meter 1 + torch.tensor( + [[0.3, 0.4, 0.3], [0.2, 0.65, 0.15], [0.33, 0.33, 0.34]] + ), # Meter 0 + torch.tensor( + [[0.05, 0.4, 0.05], [0.15, 0.65, 0.2], [0.4, 0.2, 0.4]] + ), # Meter 1 + ] + + # Class 0 is the correct class for sample 1, class 2 for sample 2, etc + targets = [ + torch.tensor([[0, 1, 0], [1, 0, 0], [1, 1, 1]]), # Meter 0 + torch.tensor([[0, 1, 0], [0, 1, 0], [0, 1, 0]]), # Meter 1 + torch.tensor([[0, 1, 0], [1, 0, 0], [1, 1, 1]]), # Meter 0 + torch.tensor([[0, 1, 0], [0, 1, 0], [0, 1, 0]]), # Meter 1 + ] + + # In first two updates there are 4 correct top-1 out of 8 + # total, 6 correct in top 2 out of 8. The same occurs in the + # second two updates and is added to first + expected_values = [ + {"top_1": 4 / 8.0, "top_2": 6 / 8.0}, # After one update to each meter + {"top_1": 8 / 16.0, "top_2": 12 / 16.0}, # After two updates to each meter + ] + + self.meter_distributed_test(meters, model_outputs, targets, expected_values) + + def test_non_onehot_target(self): + """ + This test verifies that the meter works as expected on a single + update + reset + same single update. + """ + meter = RecallAtKMeter(topk=[1, 2], target_is_one_hot=False, num_classes=3) + + # Batchsize = 2, num classes = 3, score is probability of class + model_outputs = [ + torch.tensor([[0.05, 0.4, 0.05], [0.15, 0.65, 0.2], [0.4, 0.2, 0.4]]), + torch.tensor([[0.2, 0.4, 0.4], [0.2, 0.65, 0.15], [0.1, 0.8, 0.1]]), + ] + + # One-hot encoding, 1 = positive for class + targets = [ + torch.tensor([[1], [1], [1]]), # [[0, 1, 0], [0, 1, 0], [0, 1, 0]] + torch.tensor([[0], [1], [2]]), # [[1, 0, 0], [0, 1, 0], [0, 0, 1]] + ] + + # Note for ties, we select randomly, so we should not use ambiguous ties + # First batch has top-1 recall of 2/3.0, top-2 recall of 2/6.0 + # Second batch has top-1 recall of 1/3.0, top-2 recall of 4/6.0 + expected_value = {"top_1": 3 / 6.0, "top_2": 6 / 12.0} + + self.meter_update_and_reset_test(meter, model_outputs, targets, expected_value) diff --git a/test/meters_video_accuracy_meter_test.py b/test/meters_video_accuracy_meter_test.py new file mode 100644 index 0000000000..c40b962c2e --- /dev/null +++ b/test/meters_video_accuracy_meter_test.py @@ -0,0 +1,207 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from test.generic.meter_test_utils import ClassificationMeterTest + +import torch +from classy_vision import meters +from classy_vision.meters import VideoAccuracyMeter + + +class TestVideoAccuracyMeter(ClassificationMeterTest): + def test_accuracy_meter_registry(self): + accuracy_meter = meters.build_meter( + { + "name": "video_accuracy", + "topk": [1, 2], + "clips_per_video_train": 1, + "clips_per_video_test": 2, + } + ) + self.assertTrue(isinstance(accuracy_meter, VideoAccuracyMeter)) + + def test_single_meter_update_and_reset(self): + """ + This test verifies that the meter works as expected on a single + update + reset + same single update. + """ + meter = VideoAccuracyMeter( + topk=[1, 2], clips_per_video_train=1, clips_per_video_test=2 + ) + # Batchsize = 3, num classes = 3, clips_per_video is 2, + # score is a value in {1, 2, 3} + model_output = torch.tensor( + [[3, 2, 1], [3, 1, 2], [1, 2, 2], [1, 2, 3], [2, 2, 2], [1, 3, 2]], + dtype=torch.float, + ) + # Class 0 is the correct class for video 1, class 2 for video 2, and + # class 1 for video + target = torch.tensor([0, 0, 1, 1, 2, 2]) + + # Only the first sample has top class correct, first and third + # sample have correct class in top 2 + expected_value = {"top_1": 1 / 3.0, "top_2": 3 / 3.0} + + self.meter_update_and_reset_test( + meter, model_output, target, expected_value, is_train=False + ) + + def test_double_meter_update_and_reset(self): + meter = VideoAccuracyMeter( + topk=[1, 2], clips_per_video_train=1, clips_per_video_test=2 + ) + # Batchsize = 3, num classes = 3, clips_per_video is 2, + # score is a value in {1, 2, 3}. + # Data of two batch is provided + model_outputs = [ + torch.tensor( + [[3, 2, 1], [3, 1, 2], [1, 2, 2], [1, 2, 3], [2, 2, 2], [1, 3, 2]], + dtype=torch.float, + ), + torch.tensor( + [[3, 2, 1], [3, 1, 2], [1, 2, 2], [1, 2, 3], [2, 2, 2], [1, 3, 2]], + dtype=torch.float, + ), + ] + # Class 0 is the correct class for video 1, class 2 for video 2, and + # class 1 for video, in both batches + targets = [torch.tensor([0, 0, 1, 1, 2, 2]), torch.tensor([0, 0, 1, 1, 2, 2])] + + # First batch has top-1 accuracy of 1/3.0, top-2 accuracy of 2/3.0 + # Second batch has top-1 accuracy of 2/3.0, top-2 accuracy of 3/3.0 + expected_value = {"top_1": 2 / 6.0, "top_2": 6 / 6.0} + + self.meter_update_and_reset_test( + meter, model_outputs, targets, expected_value, is_train=False + ) + + def test_meter_invalid_model_output(self): + meter = VideoAccuracyMeter( + topk=[1, 2], clips_per_video_train=1, clips_per_video_test=2 + ) + # This model output has 3 dimensions instead of expected 2 + model_output = torch.tensor( + [[[3, 2, 1], [1, 2, 3]], [[-1, -3, -4], [-10, -90, -100]]], + dtype=torch.float, + ) + target = torch.tensor([0, 1, 2]) + + self.meter_invalid_meter_input_test(meter, model_output, target) + + def test_meter_invalid_target(self): + meter = VideoAccuracyMeter( + topk=[1, 2], clips_per_video_train=1, clips_per_video_test=2 + ) + model_output = torch.tensor( + [[3, 2, 1], [3, 1, 2], [1, 2, 2], [1, 2, 3], [2, 2, 2], [1, 3, 2]], + dtype=torch.float, + ) + # Target has 2 dimensions instead of expected 1 + target = torch.tensor([[0, 1, 2], [0, 1, 2]]) + + self.meter_invalid_meter_input_test(meter, model_output, target) + # Target of clips from the same video is not consistent + target = torch.tensor([0, 2, 1, 1, 2, 2]) + + self.meter_invalid_update_test(meter, model_output, target, is_train=False) + + def test_meter_invalid_topk(self): + meter = VideoAccuracyMeter( + topk=[1, 5], clips_per_video_train=1, clips_per_video_test=2 + ) + model_output = torch.tensor( + [[3, 2, 1], [3, 1, 2], [1, 2, 2], [1, 2, 3], [2, 2, 2], [1, 3, 2]], + dtype=torch.float, + ) + target = torch.tensor([0, 1, 2]) + self.meter_invalid_meter_input_test(meter, model_output, target) + + def test_meter_get_set_classy_state_test(self): + # In this test we update meter0 with model_output0 & target0 + # and we update meter1 with model_output1 & target1 then + # transfer the state from meter1 to meter0 and validate they + # give same expected value. + # Expected value is the expected value of meter1 + meters = [ + VideoAccuracyMeter( + topk=[1, 2], clips_per_video_train=1, clips_per_video_test=2 + ), + VideoAccuracyMeter( + topk=[1, 2], clips_per_video_train=1, clips_per_video_test=2 + ), + ] + # Batchsize = 3, num classes = 3, score is a value in {1, 2, + # 3}...3 is the highest score + model_outputs = [ + torch.tensor( + [[1, 2, 3], [1, 1, 3], [2, 2, 1], [3, 2, 1], [2, 2, 2], [2, 3, 1]], + dtype=torch.float, + ), + torch.tensor( + [[3, 2, 1], [3, 1, 2], [1, 2, 2], [1, 2, 3], [2, 2, 2], [1, 3, 2]], + dtype=torch.float, + ), + ] + # Class 2 is the correct class for sample 1, class 0 for sample 2, etc + targets = [torch.tensor([0, 0, 1, 1, 2, 2]), torch.tensor([0, 0, 1, 1, 2, 2])] + # Value for second update + expected_value = {"top_1": 1 / 3.0, "top_2": 3 / 3.0} + + self.meter_get_set_classy_state_test( + meters, model_outputs, targets, expected_value, is_train=False + ) + + def test_meter_distributed(self): + # Meter0 will execute on one process, Meter1 on the other + meters = [ + VideoAccuracyMeter( + topk=[1, 2], clips_per_video_train=1, clips_per_video_test=2 + ), + VideoAccuracyMeter( + topk=[1, 2], clips_per_video_train=1, clips_per_video_test=2 + ), + ] + + # Batchsize = 3, num classes = 3, score is a value in {1, 2, + # 3}...3 is the highest score + model_outputs = [ + torch.tensor( + [[1, 2, 3], [1, 1, 3], [2, 2, 1], [3, 2, 1], [2, 2, 2], [2, 3, 1]], + dtype=torch.float, + ), # Meter 0 + torch.tensor( + [[3, 2, 1], [3, 1, 2], [1, 2, 2], [1, 2, 3], [2, 2, 2], [1, 3, 2]], + dtype=torch.float, + ), # Meter 1 + torch.tensor( + [[1, 2, 3], [1, 1, 3], [2, 2, 1], [3, 2, 1], [2, 2, 2], [2, 3, 1]], + dtype=torch.float, + ), # Meter 0 + torch.tensor( + [[3, 2, 1], [3, 1, 2], [1, 2, 2], [1, 2, 3], [2, 2, 2], [1, 3, 2]], + dtype=torch.float, + ), # Meter 1 + ] + + # For meter 0, class 2 is the correct class for sample 1, class 0 for sample 2, + # etc + targets = [ + torch.tensor([0, 0, 1, 1, 2, 2]), # Meter 0 + torch.tensor([0, 0, 1, 1, 2, 2]), # Meter 1 + torch.tensor([0, 0, 1, 1, 2, 2]), # Meter 0 + torch.tensor([0, 0, 1, 1, 2, 2]), # Meter 1 + ] + + # In first two updates there are 3 correct top-2, 5 correct in top 2 + # The same occurs in the second two updates and is added to first + expected_values = [ + {"top_1": 1 / 6.0, "top_2": 4 / 6.0}, # After one update to each meter + {"top_1": 2 / 12.0, "top_2": 8 / 12.0}, # After two updates to each meter + ] + + self.meter_distributed_test( + meters, model_outputs, targets, expected_values, is_train=False + ) diff --git a/test/models_classy_model_test.py b/test/models_classy_model_test.py new file mode 100644 index 0000000000..44d24a320a --- /dev/null +++ b/test/models_classy_model_test.py @@ -0,0 +1,77 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import os +import shutil +import tempfile +import unittest +from test.generic.config_utils import get_test_task_config + +import torch +import torch.nn as nn + +from classy_vision.generic.util import load_checkpoint +from classy_vision.hooks import CheckpointHook +from classy_vision.models import ClassyModel, register_model +from classy_vision.tasks import build_task + + +@register_model("my_test_model") +class MyTestModel(ClassyModel): + def __init__(self): + super().__init__() + self.linear = nn.Linear(10, 5) + + def forward(self, x): + return self.linear(x) + + @classmethod + def from_config(cls, config): + return cls() + + +class TestClassyModel(unittest.TestCase): + def setUp(self) -> None: + self.base_dir = tempfile.mkdtemp() + + def tearDown(self) -> None: + shutil.rmtree(self.base_dir) + + def test_from_checkpoint(self): + config = get_test_task_config() + config["model"] = {"name": "my_test_model"} + task = build_task(config) + task.prepare() + + local_variables = {} + checkpoint_folder = self.base_dir + "/checkpoint_end_test/" + device = "cpu" + input_args = {"config": config} + + # Simulate training by setting the model parameters to zero + for param in task.model.parameters(): + param.data.zero_() + + checkpoint_hook = CheckpointHook( + checkpoint_folder, input_args, phase_types=["train"] + ) + + # Create checkpoint dir, save checkpoint + os.mkdir(checkpoint_folder) + checkpoint_hook.on_start(task, local_variables) + + task.train = True + checkpoint_hook.on_phase_end(task, local_variables) + + # Model should be checkpointed. load and compare + checkpoint = load_checkpoint(checkpoint_folder, device) + + model = ClassyModel.from_checkpoint(checkpoint) + self.assertTrue(isinstance(model, MyTestModel)) + + # All parameters must be zero + for param in model.parameters(): + self.assertTrue(torch.all(param.data == 0)) diff --git a/test/models_classy_model_wrapper_test.py b/test/models_classy_model_wrapper_test.py new file mode 100644 index 0000000000..48053d0410 --- /dev/null +++ b/test/models_classy_model_wrapper_test.py @@ -0,0 +1,83 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import copy +import unittest +from test.generic.config_utils import get_fast_test_task_config + +import torch +import torch.nn as nn +from classy_vision.models import ClassyModel +from classy_vision.models.classy_model_wrapper import ClassyModelWrapper +from classy_vision.tasks import build_task +from classy_vision.trainer import LocalTrainer +from torchvision import models + + +class TestModel(nn.Module): + def __init__(self): + super().__init__() + self.linear = nn.Linear(10, 5) + + def forward(self, x): + return self.linear(x) + + def extract_features(self, x): + return torch.cat([x, x], dim=1) + + +class TestClassyModelWrapper(unittest.TestCase): + def test_classy_model_wrapper(self): + model = TestModel() + classy_model = ClassyModelWrapper(model) + # test that the returned object is an instance of ClassyModel + self.assertIsInstance(classy_model, ClassyModel) + + # test that forward works correctly + input = torch.zeros((100, 10)) + output = classy_model(input) + self.assertEqual(output.shape, (100, 5)) + + # test that extract_features works correctly + input = torch.zeros((100, 10)) + output = classy_model.extract_features(input) + self.assertEqual(output.shape, (100, 20)) + + # test that get_classy_state and set_classy_state work + nn.init.constant_(classy_model.model.linear.weight, 1) + weights = copy.deepcopy(classy_model.model.linear.weight.data) + state_dict = classy_model.get_classy_state(deep_copy=True) + nn.init.constant_(classy_model.model.linear.weight, 0) + classy_model.set_classy_state(state_dict) + self.assertTrue(torch.allclose(weights, classy_model.model.linear.weight.data)) + + def test_classy_model_wrapper_properties(self): + # test that the properties work correctly when passed to the wrapper + model = TestModel() + num_classes = 5 + input_shape = (10,) + output_shape = (num_classes,) + model_depth = 1 + classy_model = ClassyModelWrapper( + model, + input_shape=input_shape, + output_shape=output_shape, + model_depth=model_depth, + ) + self.assertEqual(classy_model.input_shape, input_shape) + self.assertEqual(classy_model.output_shape, output_shape) + self.assertEqual(classy_model.model_depth, model_depth) + + def test_train_step(self): + # test that the model can be run in a train step + model = models.resnet34(pretrained=False) + classy_model = ClassyModelWrapper(model) + + config = get_fast_test_task_config() + task = build_task(config) + task.set_model(classy_model) + trainer = LocalTrainer() + trainer.train(task) diff --git a/test/models_densenet_test.py b/test/models_densenet_test.py new file mode 100644 index 0000000000..eba18a1bed --- /dev/null +++ b/test/models_densenet_test.py @@ -0,0 +1,53 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import unittest +from test.generic.utils import compare_model_state + +import torch +from classy_vision.models import build_model + + +MODELS = { + "small_densenet": { + "name": "densenet", + "num_blocks": [1, 1, 1, 1], + "num_classes": 1000, + "init_planes": 4, + "growth_rate": 32, + "expansion": 4, + "final_bn_relu": True, + "small_input": True, + } +} + + +class TestDensenet(unittest.TestCase): + def _test_model(self, model_config): + """This test will build Densenet models, run a forward pass and + verify output shape, and then verify that get / set state + works. + + I do this in one test so that we construct the model a minimum + number of times. + """ + model = build_model(model_config) + + # Verify forward pass works + input = torch.ones([1, 3, 32, 32]) + output = model.forward(input) + self.assertEqual(output.size(), (1, 1000)) + + # Verify get_set_state + new_model = build_model(model_config) + state = model.get_classy_state() + new_model.set_classy_state(state) + new_state = new_model.get_classy_state() + + compare_model_state(self, state, new_state, check_heads=True) + + def test_small_resnet(self): + self._test_model(MODELS["small_densenet"]) diff --git a/test/models_mlp_test.py b/test/models_mlp_test.py new file mode 100644 index 0000000000..262f84270b --- /dev/null +++ b/test/models_mlp_test.py @@ -0,0 +1,26 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import unittest + +import torch +from classy_vision.models import ClassyModel, build_model + + +class TestMLPModel(unittest.TestCase): + def test_build_model(self): + config = {"name": "mlp", "input_dim": 3, "output_dim": 1, "hidden_dims": [2]} + model = build_model(config) + self.assertTrue(isinstance(model, ClassyModel)) + self.assertEqual(model.model_depth, 2) + + tensor = torch.tensor([[1, 2, 3]], dtype=torch.float) + output = model.forward(tensor) + self.assertEqual(output.shape, torch.Size([1, 1])) + + tensor = torch.tensor([[1, 2, 3], [1, 2, 3]], dtype=torch.float) + output = model.forward(tensor) + self.assertEqual(output.shape, torch.Size([2, 1])) diff --git a/test/models_resnext3d_test.py b/test/models_resnext3d_test.py new file mode 100644 index 0000000000..f575ab1533 --- /dev/null +++ b/test/models_resnext3d_test.py @@ -0,0 +1,236 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import copy +import unittest +from test.generic.utils import compare_model_state + +import torch +from classy_vision.models import ClassyModel, build_model + + +class TestResNeXt3D(unittest.TestCase): + def setUp(self): + model_config_template = { + "name": "resnext3d", + "input_key": "video", + "clip_crop_size": 112, + "skip_transformation_type": "postactivated_shortcut", + "frames_per_clip": 32, + "input_planes": 3, + "stem_name": "resnext3d_stem", + "stem_planes": 64, + "stem_temporal_kernel": 3, + "stage_planes": 64, + "num_groups": 1, + "width_per_group": 16, + "heads": [ + { + "name": "fully_convolutional_linear", + "unique_id": "default_head", + "in_plane": 512, + "pool_size": (2, 7, 7), + "activation_func": "softmax", + "num_classes": 2, + } + ], + } + pbt = "postactivated_bottleneck_transformation" + model_config_variants = [ + # ResNeXt3D-34 + { + "residual_transformation_type": "basic_transformation", + "num_blocks": [3, 4, 6, 3], + }, + # ResNeXt3D-50 + {"residual_transformation_type": pbt, "num_blocks": [3, 4, 6, 3]}, + # ResNeXt3D-101 + {"residual_transformation_type": pbt, "num_blocks": [3, 4, 23, 3]}, + ] + + self.model_configs = [] + for variant in model_config_variants: + model_config = copy.deepcopy(model_config_template) + model_config.update(variant) + + block_idx = model_config["num_blocks"][-1] + # attach the head at the last block + model_config["heads"][0]["fork_block"] = "pathway0-stage4-block%d" % ( + block_idx - 1 + ) + + self.model_configs.append(model_config) + + self.batchsize = 1 + + self.forward_pass_configs = { + "train": { + # input shape: N x C x T x H x W + "input": {"video": torch.rand(self.batchsize, 3, 16, 112, 112)}, + "model": { + "stem_maxpool": False, + "stage_temporal_stride": [1, 2, 2, 2], + "stage_spatial_stride": [1, 2, 2, 2], + }, + }, + "test": { + "input": {"video": torch.rand(self.batchsize, 3, 16, 256, 320)}, + "model": { + "stem_maxpool": True, + "stage_temporal_stride": [1, 2, 2, 2], + "stage_spatial_stride": [1, 2, 2, 2], + }, + }, + } + + def test_build_model(self): + for model_config in self.model_configs: + model = build_model(model_config) + self.assertTrue(isinstance(model, ClassyModel)) + self.assertTrue( + type(model.output_shape) == tuple and len(model.output_shape) == 2 + ) + self.assertTrue(type(model.model_depth) == int) + + def test_forward_pass(self): + for split, split_config in self.forward_pass_configs.items(): + for model_config in self.model_configs: + forward_pass_model_config = copy.deepcopy(model_config) + forward_pass_model_config.update(split_config["model"]) + + num_classes = forward_pass_model_config["heads"][0]["num_classes"] + + model = build_model(forward_pass_model_config) + model.train(split == "train") + + out = model(split_config["input"]) + + self.assertEqual(out.size(), (self.batchsize, num_classes)) + + def test_set_classy_state_plain(self): + # We use the same model architecture to save and load a model state. + # This is a plain use case of `set_classy_state` method + for model_config in self.model_configs: + model = build_model(model_config) + model_state = model.get_classy_state() + + model2 = build_model(model_config) + model2.set_classy_state(model_state) + model2_state = model2.get_classy_state() + compare_model_state(self, model_state, model2_state) + + def _get_model_config_weight_inflation(self): + model_2d_config = { + "name": "resnext3d", + "frames_per_clip": 1, + "input_planes": 3, + "clip_crop_size": 224, + "skip_transformation_type": "postactivated_shortcut", + "residual_transformation_type": "postactivated_bottleneck_transformation", + "num_blocks": [3, 4, 6, 3], + "stem_name": "resnext3d_stem", + "stem_planes": 64, + "stem_temporal_kernel": 1, + "stem_spatial_kernel": 7, + "stem_maxpool": True, + "stage_planes": 256, + "stage_temporal_kernel_basis": [[1], [1], [1], [1]], + "temporal_conv_1x1": [True, True, True, True], + "stage_temporal_stride": [1, 1, 1, 1], + "stage_spatial_stride": [1, 2, 2, 2], + "num_groups": 1, + "width_per_group": 64, + "num_classes": 1000, + "zero_init_residual_transform": True, + "heads": [ + { + "name": "fully_convolutional_linear", + "unique_id": "default_head", + "pool_size": [1, 7, 7], + "activation_func": "softmax", + "num_classes": 1000, + "fork_block": "pathway0-stage4-block2", + "in_plane": 2048, + "use_dropout": False, + } + ], + } + + model_3d_config = { + "name": "resnext3d", + "frames_per_clip": 8, + "input_planes": 3, + "clip_crop_size": 224, + "skip_transformation_type": "postactivated_shortcut", + "residual_transformation_type": "postactivated_bottleneck_transformation", + "num_blocks": [3, 4, 6, 3], + "input_key": "video", + "stem_name": "resnext3d_stem", + "stem_planes": 64, + "stem_temporal_kernel": 5, + "stem_spatial_kernel": 7, + "stem_maxpool": True, + "stage_planes": 256, + "stage_temporal_kernel_basis": [[3], [3, 1], [3, 1], [1, 3]], + "temporal_conv_1x1": [True, True, True, True], + "stage_temporal_stride": [1, 1, 1, 1], + "stage_spatial_stride": [1, 2, 2, 2], + "num_groups": 1, + "width_per_group": 64, + "num_classes": 1000, + "freeze_trunk": False, + "zero_init_residual_transform": True, + "heads": [ + { + "name": "fully_convolutional_linear", + "unique_id": "default_head", + "pool_size": [8, 7, 7], + "activation_func": "softmax", + "num_classes": 1000, + "fork_block": "pathway0-stage4-block2", + "in_plane": 2048, + "use_dropout": True, + } + ], + } + return model_2d_config, model_3d_config + + def test_set_classy_state_weight_inflation(self): + # Get model state from a 2D ResNet model, inflate the 2D conv weights, + # and use them to initialize 3D conv weights. This is an advanced use of + # `set_classy_state` method. + model_2d_config, model_3d_config = self._get_model_config_weight_inflation() + model_2d = build_model(model_2d_config) + model_2d_state = model_2d.get_classy_state() + + model_3d = build_model(model_3d_config) + model_3d.set_classy_state(model_2d_state) + model_3d_state = model_3d.get_classy_state() + + for name, weight_2d in model_2d_state["model"]["trunk"].items(): + weight_3d = model_3d_state["model"]["trunk"][name] + if weight_2d.dim() == 5: + # inflation only applies to conv weights + self.assertEqual(weight_3d.dim(), 5) + if weight_2d.shape[2] == 1 and weight_3d.shape[2] > 1: + weight_2d_inflated = ( + weight_2d.repeat(1, 1, weight_3d.shape[2], 1, 1) + / weight_3d.shape[2] + ) + self.assertTrue(torch.equal(weight_3d, weight_2d_inflated)) + + def test_set_classy_state_weight_inflation_inconsistent_kernel_size(self): + # Get model state from a 2D ResNet model, inflate the 2D conv weights, + # and use them to initialize 3D conv weights. + model_2d_config, model_3d_config = self._get_model_config_weight_inflation() + # Modify conv kernel size in the stem layer of 2D model to 5, which is + # inconsistent with the kernel size 7 used in 3D model. + model_2d_config["stem_spatial_kernel"] = 5 + model_2d = build_model(model_2d_config) + model_2d_state = model_2d.get_classy_state() + model_3d = build_model(model_3d_config) + with self.assertRaises(AssertionError): + model_3d.set_classy_state(model_2d_state) diff --git a/test/models_resnext_test.py b/test/models_resnext_test.py new file mode 100644 index 0000000000..c246e977d0 --- /dev/null +++ b/test/models_resnext_test.py @@ -0,0 +1,85 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import unittest +from test.generic.utils import compare_model_state + +import torch +from classy_vision.models import build_model + + +MODELS = { + "small_resnext": { + "name": "resnext", + "num_blocks": [1, 1, 1, 1], + "init_planes": 4, + "reduction": 4, + "base_width_and_cardinality": [2, 32], + "small_input": True, + "zero_init_bn_residuals": True, + "basic_layer": True, + "final_bn_relu": True, + "heads": [ + { + "name": "fully_connected", + "unique_id": "default_head", + "num_classes": 1000, + "fork_block": "block3-0", + "in_plane": 128, + } + ], + }, + "small_resnet": { + "name": "resnet", + "num_blocks": [1, 1, 1, 1], + "init_planes": 4, + "reduction": 4, + "small_input": True, + "zero_init_bn_residuals": True, + "basic_layer": True, + "final_bn_relu": True, + "heads": [ + { + "name": "fully_connected", + "unique_id": "default_head", + "num_classes": 1000, + "fork_block": "block3-0", + "in_plane": 128, + } + ], + }, +} + + +class TestResnext(unittest.TestCase): + def _test_model(self, model_config): + """This test will build ResNeXt-* models, run a forward pass and + verify output shape, and then verify that get / set state + works. + + I do this in one test so that we construct the model a minimum + number of times. + """ + model = build_model(model_config) + + # Verify forward pass works + input = torch.ones([1, 3, 32, 32]) + output = model.forward(input) + self.assertEqual(output.size(), (1, 1000)) + + # Verify get_set_state + new_model = build_model(model_config) + state = model.get_classy_state() + new_model.set_classy_state(state) + new_state = new_model.get_classy_state() + + compare_model_state(self, state, new_state, check_heads=True) + + def test_small_resnext(self): + self._test_model(MODELS["small_resnext"]) + + def test_small_resnet(self): + self._test_model(MODELS["small_resnet"]) diff --git a/test/optim_param_scheduler_composite_test.py b/test/optim_param_scheduler_composite_test.py new file mode 100644 index 0000000000..a5e6357b71 --- /dev/null +++ b/test/optim_param_scheduler_composite_test.py @@ -0,0 +1,254 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import copy +import unittest + +from classy_vision.optim.param_scheduler import build_param_scheduler +from classy_vision.optim.param_scheduler.composite_scheduler import ( + CompositeParamScheduler, + UpdateInterval, +) + + +class TestCompositeScheduler(unittest.TestCase): + _num_epochs = 10 + + def _get_valid_long_config(self): + return { + "name": "composite", + "schedulers": [ + {"name": "constant", "value": 0.1}, + {"name": "constant", "value": 0.2}, + {"name": "constant", "value": 0.3}, + {"name": "constant", "value": 0.4}, + ], + "lengths": [0.2, 0.4, 0.1, 0.3], + } + + def _get_lengths_sum_less_one_config(self): + return { + "name": "composite", + "schedulers": [ + {"name": "constant", "value": 0.1}, + {"name": "constant", "value": 0.2}, + ], + "lengths": [0.7, 0.2999], + } + + def _get_valid_mixed_config(self): + return { + "name": "composite", + "schedulers": [ + {"name": "step", "values": [0.1, 0.2, 0.3, 0.4, 0.5], "num_epochs": 10}, + {"name": "cosine", "start_lr": 0.42, "end_lr": 0.0001}, + ], + "lengths": [0.5, 0.5], + } + + def _get_valid_linear_config(self): + return { + "name": "composite", + "schedulers": [ + {"name": "linear", "start_lr": 0.0, "end_lr": 0.5}, + {"name": "linear", "start_lr": 0.5, "end_lr": 1.0}, + ], + "lengths": [0.5, 0.5], + "interval_scaling": ["rescaled", "rescaled"], + } + + def test_invalid_config(self): + config = self._get_valid_mixed_config() + bad_config = copy.deepcopy(config) + + # No schedulers + bad_config["schedulers"] = [] + bad_config["lengths"] = [] + with self.assertRaises(AssertionError): + CompositeParamScheduler.from_config(bad_config) + + # Size of schedulers and lengths doesn't match + bad_config["schedulers"] = copy.deepcopy(config["schedulers"]) + bad_config["lengths"] = copy.deepcopy(config["lengths"]) + bad_config["schedulers"].append(bad_config["schedulers"][-1]) + with self.assertRaises(AssertionError): + CompositeParamScheduler.from_config(bad_config) + + # Sum of lengths < 1 + bad_config["schedulers"] = copy.deepcopy(config["schedulers"]) + bad_config["lengths"][-1] -= 0.1 + with self.assertRaises(AssertionError): + CompositeParamScheduler.from_config(bad_config) + + # Sum of lengths > 1 + bad_config["lengths"] = copy.deepcopy(config["lengths"]) + bad_config["lengths"][-1] += 0.1 + with self.assertRaises(AssertionError): + CompositeParamScheduler.from_config(bad_config) + + # Bad value for update_interval + bad_config["lengths"] = copy.deepcopy(config["lengths"]) + bad_config["update_interval"] = "epochs" + with self.assertRaises(AssertionError): + CompositeParamScheduler.from_config(bad_config) + + # Bad value for composition_mode + del bad_config["update_interval"] + bad_config["interval_scaling"] = ["rescaled", "rescaleds"] + with self.assertRaises(AssertionError): + CompositeParamScheduler.from_config(bad_config) + + # Wrong number composition modes + del bad_config["interval_scaling"] + bad_config["interval_scaling"] = ["rescaled"] + with self.assertRaises(AssertionError): + CompositeParamScheduler.from_config(bad_config) + + # Missing required parameters + del bad_config["interval_scaling"] + bad_config["lengths"] = config["lengths"] + del bad_config["lengths"] + with self.assertRaises(AssertionError): + CompositeParamScheduler.from_config(bad_config) + + bad_config["lengths"] = config["lengths"] + del bad_config["schedulers"] + with self.assertRaises(AssertionError): + CompositeParamScheduler.from_config(bad_config) + + def test_long_scheduler(self): + config = self._get_valid_long_config() + + scheduler = CompositeParamScheduler.from_config(config) + schedule = [ + scheduler(epoch_num / self._num_epochs) + for epoch_num in range(self._num_epochs) + ] + expected_schedule = [0.1, 0.1, 0.2, 0.2, 0.2, 0.2, 0.3, 0.4, 0.4, 0.4] + + self.assertEqual(schedule, expected_schedule) + + def test_scheduler_lengths_within_epsilon_of_one(self): + config = self._get_lengths_sum_less_one_config() + scheduler = CompositeParamScheduler.from_config(config) + schedule = [ + scheduler(epoch_num / self._num_epochs) + for epoch_num in range(self._num_epochs) + ] + expected_schedule = [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.2, 0.2, 0.2] + self.assertEqual(schedule, expected_schedule) + + def test_scheduler_update_interval(self): + config = self._get_valid_mixed_config() + + # Check default + scheduler = CompositeParamScheduler.from_config(config) + self.assertEqual(scheduler.update_interval, UpdateInterval.STEP) + + # Check step + step_config = copy.deepcopy(config) + step_config["update_interval"] = "step" + scheduler = build_param_scheduler(step_config) + self.assertEqual(scheduler.update_interval, UpdateInterval.STEP) + + # Check epoch + epoch_config = copy.deepcopy(config) + epoch_config["update_interval"] = "epoch" + scheduler = build_param_scheduler(epoch_config) + self.assertEqual(scheduler.update_interval, UpdateInterval.EPOCH) + + def test_build_composite_scheduler(self): + config = self._get_valid_mixed_config() + scheduler = build_param_scheduler(config) + self.assertTrue(isinstance(scheduler, CompositeParamScheduler)) + + def test_scheduler_with_mixed_types(self): + config = self._get_valid_mixed_config() + scheduler_0 = build_param_scheduler(config["schedulers"][0]) + scheduler_1 = build_param_scheduler(config["schedulers"][1]) + + # Check scaled + config["interval_scaling"] = ["rescaled", "rescaled"] + scheduler = CompositeParamScheduler.from_config(config) + scaled_schedule = [ + round(scheduler(epoch_num / self._num_epochs), 4) + for epoch_num in range(self._num_epochs) + ] + expected_schedule = [ + round(scheduler_0(epoch_num / self._num_epochs), 4) + for epoch_num in range(0, self._num_epochs, 2) + ] + [ + round(scheduler_1(epoch_num / self._num_epochs), 4) + for epoch_num in range(0, self._num_epochs, 2) + ] + self.assertEqual(scaled_schedule, expected_schedule) + + # Check fixed + config["interval_scaling"] = ["fixed", "fixed"] + scheduler = CompositeParamScheduler.from_config(config) + fixed_schedule = [ + round(scheduler(epoch_num / self._num_epochs), 4) + for epoch_num in range(self._num_epochs) + ] + expected_schedule = [ + round(scheduler_0(epoch_num / self._num_epochs), 4) + for epoch_num in range(0, int(self._num_epochs / 2)) + ] + [ + round(scheduler_1(epoch_num / self._num_epochs), 4) + for epoch_num in range(int(self._num_epochs / 2), self._num_epochs) + ] + self.assertEqual(fixed_schedule, expected_schedule) + + # Check that default is rescaled + del config["interval_scaling"] + scheduler = CompositeParamScheduler.from_config(config) + schedule = [ + round(scheduler(epoch_num / self._num_epochs), 4) + for epoch_num in range(self._num_epochs) + ] + self.assertEqual(scaled_schedule, schedule) + # Check warmup of rescaled then fixed + config["interval_scaling"] = ["rescaled", "fixed"] + scheduler = CompositeParamScheduler.from_config(config) + fixed_schedule = [ + round(scheduler(epoch_num / self._num_epochs), 4) + for epoch_num in range(self._num_epochs) + ] + expected_schedule = [ + round(scheduler_0(epoch_num / self._num_epochs), 4) + for epoch_num in range(0, int(self._num_epochs), 2) + ] + [ + round(scheduler_1(epoch_num / self._num_epochs), 4) + for epoch_num in range(int(self._num_epochs / 2), self._num_epochs) + ] + self.assertEqual(fixed_schedule, expected_schedule) + + def test_linear_scheduler_no_gaps(self): + config = self._get_valid_linear_config() + + # Check rescaled + scheduler = CompositeParamScheduler.from_config(config) + schedule = [ + scheduler(epoch_num / self._num_epochs) + for epoch_num in range(self._num_epochs) + ] + expected_schedule = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9] + self.assertEqual(expected_schedule, schedule) + + # Check fixed composition gives same result as only 1 scheduler + config["schedulers"][1] = config["schedulers"][0] + config["interval_scaling"] = ["fixed", "fixed"] + scheduler = CompositeParamScheduler.from_config(config) + linear_scheduler = build_param_scheduler(config["schedulers"][0]) + schedule = [ + scheduler(epoch_num / self._num_epochs) + for epoch_num in range(self._num_epochs) + ] + expected_schedule = [ + linear_scheduler(epoch_num / self._num_epochs) + for epoch_num in range(self._num_epochs) + ] + self.assertEqual(expected_schedule, schedule) diff --git a/test/optim_param_scheduler_constant_test.py b/test/optim_param_scheduler_constant_test.py new file mode 100644 index 0000000000..eb1ff8aa36 --- /dev/null +++ b/test/optim_param_scheduler_constant_test.py @@ -0,0 +1,49 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import copy +import unittest + +from classy_vision.optim.param_scheduler import build_param_scheduler +from classy_vision.optim.param_scheduler.constant_scheduler import ( + ConstantParamScheduler, +) + + +class TestFixedScheduler(unittest.TestCase): + _num_epochs = 12 + + def _get_valid_config(self): + return {"name": "constant", "num_epochs": self._num_epochs, "value": 0.1} + + def test_invalid_config(self): + # Invalid num epochs + config = self._get_valid_config() + + bad_config = copy.deepcopy(config) + del bad_config["value"] + with self.assertRaises(AssertionError): + ConstantParamScheduler.from_config(bad_config) + + def test_scheduler(self): + config = self._get_valid_config() + + scheduler = ConstantParamScheduler.from_config(config) + schedule = [ + scheduler(epoch_num / self._num_epochs) + for epoch_num in range(self._num_epochs) + ] + expected_schedule = [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1] + + self.assertEqual(schedule, expected_schedule) + # The input for the scheduler should be in the interval [0;1), open + with self.assertRaises(RuntimeError): + scheduler(1) + + def test_build_constant_scheduler(self): + config = self._get_valid_config() + scheduler = build_param_scheduler(config) + self.assertTrue(isinstance(scheduler, ConstantParamScheduler)) diff --git a/test/optim_param_scheduler_cosine_test.py b/test/optim_param_scheduler_cosine_test.py new file mode 100644 index 0000000000..a93028174a --- /dev/null +++ b/test/optim_param_scheduler_cosine_test.py @@ -0,0 +1,95 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import copy +import unittest + +from classy_vision.optim.param_scheduler import build_param_scheduler +from classy_vision.optim.param_scheduler.cosine_scheduler import CosineParamScheduler + + +class TestCosineScheduler(unittest.TestCase): + _num_epochs = 10 + + def _get_valid_decay_config(self): + return {"name": "cosine", "start_lr": 0.1, "end_lr": 0} + + def _get_valid_decay_config_intermediate_values(self): + return [0.0976, 0.0905, 0.0794, 0.0655, 0.05, 0.0345, 0.0206, 0.0095, 0.0024] + + def test_invalid_config(self): + # Invalid num epochs + config = self._get_valid_decay_config() + + bad_config = copy.deepcopy(config) + # Invalid Base lr + del bad_config["start_lr"] + with self.assertRaises(AssertionError): + CosineParamScheduler.from_config(bad_config) + + # Invalid end_lr + bad_config["start_lr"] = config["start_lr"] + del bad_config["end_lr"] + with self.assertRaises(AssertionError): + CosineParamScheduler.from_config(bad_config) + + def test_scheduler_as_decay(self): + config = self._get_valid_decay_config() + + scheduler = CosineParamScheduler.from_config(config) + schedule = [ + round(scheduler(epoch_num / self._num_epochs), 4) + for epoch_num in range(self._num_epochs) + ] + expected_schedule = [ + config["start_lr"] + ] + self._get_valid_decay_config_intermediate_values() + + self.assertEqual(schedule, expected_schedule) + + def test_scheduler_as_warmup(self): + config = self._get_valid_decay_config() + # Swap start and end lr to change to warmup + tmp = config["start_lr"] + config["start_lr"] = config["end_lr"] + config["end_lr"] = tmp + + scheduler = CosineParamScheduler.from_config(config) + schedule = [ + round(scheduler(epoch_num / self._num_epochs), 4) + for epoch_num in range(self._num_epochs) + ] + # Schedule should be decay reversed + expected_schedule = [config["start_lr"]] + list( + reversed(self._get_valid_decay_config_intermediate_values()) + ) + + self.assertEqual(schedule, expected_schedule) + + def test_scheduler_warmup_decay_match(self): + decay_config = self._get_valid_decay_config() + decay_scheduler = CosineParamScheduler.from_config(decay_config) + + warmup_config = copy.deepcopy(decay_config) + # Swap start and end lr to change to warmup + tmp = warmup_config["start_lr"] + warmup_config["start_lr"] = warmup_config["end_lr"] + warmup_config["end_lr"] = tmp + warmup_scheduler = CosineParamScheduler.from_config(warmup_config) + + decay_schedule = [ + round(decay_scheduler(epoch_num / 1000), 8) for epoch_num in range(1, 1000) + ] + warmup_schedule = [ + round(warmup_scheduler(epoch_num / 1000), 8) for epoch_num in range(1, 1000) + ] + + self.assertEqual(decay_schedule, list(reversed(warmup_schedule))) + + def test_build_cosine_scheduler(self): + config = self._get_valid_decay_config() + scheduler = build_param_scheduler(config) + self.assertTrue(isinstance(scheduler, CosineParamScheduler)) diff --git a/test/optim_param_scheduler_linear_test.py b/test/optim_param_scheduler_linear_test.py new file mode 100644 index 0000000000..9c38a9a667 --- /dev/null +++ b/test/optim_param_scheduler_linear_test.py @@ -0,0 +1,67 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import copy +import unittest + +from classy_vision.optim.param_scheduler import build_param_scheduler +from classy_vision.optim.param_scheduler.linear_scheduler import LinearParamScheduler + + +class TestLienarScheduler(unittest.TestCase): + _num_epochs = 10 + + def _get_valid_intermediate(self): + return [0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09] + + def _get_valid_config(self): + return {"name": "linear", "start_lr": 0.0, "end_lr": 0.1} + + def test_invalid_config(self): + config = self._get_valid_config() + + bad_config = copy.deepcopy(config) + # No start lr + del bad_config["start_lr"] + with self.assertRaises(AssertionError): + LinearParamScheduler.from_config(bad_config) + + # No end lr + bad_config["start_lr"] = config["start_lr"] + del bad_config["end_lr"] + with self.assertRaises(AssertionError): + LinearParamScheduler.from_config(bad_config) + + def test_scheduler(self): + config = self._get_valid_config() + + # Check as warmup + scheduler = LinearParamScheduler.from_config(config) + schedule = [ + round(scheduler(epoch_num / self._num_epochs), 4) + for epoch_num in range(self._num_epochs) + ] + expected_schedule = [config["start_lr"]] + self._get_valid_intermediate() + self.assertEqual(schedule, expected_schedule) + + # Check as decay + tmp = config["start_lr"] + config["start_lr"] = config["end_lr"] + config["end_lr"] = tmp + scheduler = LinearParamScheduler.from_config(config) + schedule = [ + round(scheduler(epoch_num / self._num_epochs), 4) + for epoch_num in range(self._num_epochs) + ] + expected_schedule = [config["start_lr"]] + list( + reversed(self._get_valid_intermediate()) + ) + self.assertEqual(schedule, expected_schedule) + + def test_build_linear_scheduler(self): + config = self._get_valid_config() + scheduler = build_param_scheduler(config) + self.assertTrue(isinstance(scheduler, LinearParamScheduler)) diff --git a/test/optim_param_scheduler_multi_step_test.py b/test/optim_param_scheduler_multi_step_test.py new file mode 100644 index 0000000000..389edc5e59 --- /dev/null +++ b/test/optim_param_scheduler_multi_step_test.py @@ -0,0 +1,126 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import copy +import unittest + +from classy_vision.optim.param_scheduler import build_param_scheduler +from classy_vision.optim.param_scheduler.multi_step_scheduler import ( + MultiStepParamScheduler, +) + + +class TestMultiStepParamScheduler(unittest.TestCase): + _num_epochs = 12 + + def _get_valid_config(self): + return { + "name": "multistep", + "num_epochs": self._num_epochs, + "values": [0.1, 0.01, 0.001, 0.0001], + "milestones": [4, 6, 8], + } + + def test_invalid_config(self): + # Invalid num epochs + config = self._get_valid_config() + + bad_config = copy.deepcopy(config) + bad_config["num_epochs"] = -1 + with self.assertRaises(AssertionError): + MultiStepParamScheduler.from_config(bad_config) + + # Invalid values + bad_config["num_epochs"] = config["num_epochs"] + del bad_config["values"] + with self.assertRaises(AssertionError): + MultiStepParamScheduler.from_config(bad_config) + + bad_config["values"] = {"a": "b"} + with self.assertRaises(AssertionError): + MultiStepParamScheduler.from_config(bad_config) + + bad_config["values"] = [] + with self.assertRaises(AssertionError): + MultiStepParamScheduler.from_config(bad_config) + + # Invalid drop epochs + bad_config["values"] = config["values"] + bad_config["milestones"] = {"a": "b"} + with self.assertRaises(AssertionError): + MultiStepParamScheduler.from_config(bad_config) + + # Too many + bad_config["milestones"] = [3, 6, 8, 12] + with self.assertRaises(AssertionError): + MultiStepParamScheduler.from_config(bad_config) + + # Too few + bad_config["milestones"] = [3, 6] + with self.assertRaises(AssertionError): + MultiStepParamScheduler.from_config(bad_config) + + # Exceeds num_epochs + bad_config["milestones"] = [3, 6, 12] + with self.assertRaises(AssertionError): + MultiStepParamScheduler.from_config(bad_config) + + # Out of order + bad_config["milestones"] = [3, 8, 6] + with self.assertRaises(AssertionError): + MultiStepParamScheduler.from_config(bad_config) + + def _test_config_scheduler(self, config, expected_schedule): + scheduler = MultiStepParamScheduler.from_config(config) + schedule = [ + scheduler(epoch_num / self._num_epochs) + for epoch_num in range(self._num_epochs) + ] + self.assertEqual(schedule, expected_schedule) + + def test_scheduler(self): + config = self._get_valid_config() + expected_schedule = [ + 0.1, + 0.1, + 0.1, + 0.1, + 0.01, + 0.01, + 0.001, + 0.001, + 0.0001, + 0.0001, + 0.0001, + 0.0001, + ] + self._test_config_scheduler(config, expected_schedule) + + def test_default_config(self): + config = self._get_valid_config() + default_config = copy.deepcopy(config) + # Default equispaced drop_epochs behavior + del default_config["milestones"] + expected_schedule = [ + 0.1, + 0.1, + 0.1, + 0.01, + 0.01, + 0.01, + 0.001, + 0.001, + 0.001, + 0.0001, + 0.0001, + 0.0001, + ] + self._test_config_scheduler(default_config, expected_schedule) + + def test_build_non_equi_step_scheduler(self): + config = self._get_valid_config() + scheduler = build_param_scheduler(config) + self.assertTrue(isinstance(scheduler, MultiStepParamScheduler)) diff --git a/test/optim_param_scheduler_polynomial_test.py b/test/optim_param_scheduler_polynomial_test.py new file mode 100644 index 0000000000..70e8971334 --- /dev/null +++ b/test/optim_param_scheduler_polynomial_test.py @@ -0,0 +1,58 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import copy +import unittest + +from classy_vision.optim.param_scheduler import build_param_scheduler +from classy_vision.optim.param_scheduler.polynomial_decay_scheduler import ( + PolynomialDecayParamScheduler, +) + + +class TestPolynomialScheduler(unittest.TestCase): + _num_epochs = 10 + + def _get_valid_config(self): + return { + "name": "polynomial", + "num_epochs": self._num_epochs, + "base_lr": 0.1, + "power": 1, + } + + def test_invalid_config(self): + # Invalid num epochs + config = self._get_valid_config() + + # Invalid Base lr + bad_config = copy.deepcopy(config) + del bad_config["base_lr"] + with self.assertRaises(AssertionError): + PolynomialDecayParamScheduler.from_config(bad_config) + + # Invalid Power + bad_config = copy.deepcopy(config) + del bad_config["power"] + with self.assertRaises(AssertionError): + PolynomialDecayParamScheduler.from_config(bad_config) + + def test_scheduler(self): + config = self._get_valid_config() + + scheduler = PolynomialDecayParamScheduler.from_config(config) + schedule = [ + round(scheduler(epoch_num / self._num_epochs), 2) + for epoch_num in range(self._num_epochs) + ] + expected_schedule = [0.1, 0.09, 0.08, 0.07, 0.06, 0.05, 0.04, 0.03, 0.02, 0.01] + + self.assertEqual(schedule, expected_schedule) + + def test_build_polynomial_scheduler(self): + config = self._get_valid_config() + scheduler = build_param_scheduler(config) + self.assertTrue(isinstance(scheduler, PolynomialDecayParamScheduler)) diff --git a/test/optim_param_scheduler_step_test.py b/test/optim_param_scheduler_step_test.py new file mode 100644 index 0000000000..26a342206f --- /dev/null +++ b/test/optim_param_scheduler_step_test.py @@ -0,0 +1,75 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import copy +import unittest + +from classy_vision.optim.param_scheduler import build_param_scheduler +from classy_vision.optim.param_scheduler.step_scheduler import StepParamScheduler + + +class TestStepScheduler(unittest.TestCase): + _num_epochs = 12 + + def _get_valid_config(self): + return { + "name": "step", + "num_epochs": self._num_epochs, + "values": [0.1, 0.01, 0.001, 0.0001], + } + + def test_invalid_config(self): + # Invalid num epochs + config = self._get_valid_config() + + bad_config = copy.deepcopy(config) + bad_config["num_epochs"] = -1 + with self.assertRaises(AssertionError): + StepParamScheduler.from_config(bad_config) + + # Invalid Values + bad_config["num_epochs"] = config["num_epochs"] + del bad_config["values"] + with self.assertRaises(AssertionError): + StepParamScheduler.from_config(bad_config) + + bad_config["values"] = {"a": "b"} + with self.assertRaises(AssertionError): + StepParamScheduler.from_config(bad_config) + + bad_config["values"] = [] + with self.assertRaises(AssertionError): + StepParamScheduler.from_config(bad_config) + + def test_scheduler(self): + config = self._get_valid_config() + + scheduler = StepParamScheduler.from_config(config) + schedule = [ + scheduler(epoch_num / self._num_epochs) + for epoch_num in range(self._num_epochs) + ] + expected_schedule = [ + 0.1, + 0.1, + 0.1, + 0.01, + 0.01, + 0.01, + 0.001, + 0.001, + 0.001, + 0.0001, + 0.0001, + 0.0001, + ] + + self.assertEqual(schedule, expected_schedule) + + def test_build_step_scheduler(self): + config = self._get_valid_config() + scheduler = build_param_scheduler(config) + self.assertTrue(isinstance(scheduler, StepParamScheduler)) diff --git a/test/optim_param_scheduler_step_with_fixed_gamma_test.py b/test/optim_param_scheduler_step_with_fixed_gamma_test.py new file mode 100644 index 0000000000..41eb44681e --- /dev/null +++ b/test/optim_param_scheduler_step_with_fixed_gamma_test.py @@ -0,0 +1,83 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import copy +import unittest + +from classy_vision.optim.param_scheduler import build_param_scheduler +from classy_vision.optim.param_scheduler.step_with_fixed_gamma_scheduler import ( + StepWithFixedGammaParamScheduler, +) + + +class TestStepWithFixedGammaScheduler(unittest.TestCase): + _num_epochs = 12 + + def _get_valid_config(self): + return { + "name": "step_with_fixed_gamma", + "base_lr": 1, + "gamma": 0.1, + "num_decays": 3, + "num_epochs": self._num_epochs, + } + + def test_invalid_config(self): + config = self._get_valid_config() + + # Invalid num epochs + bad_config = copy.deepcopy(config) + bad_config["num_epochs"] = -1 + with self.assertRaises(AssertionError): + StepWithFixedGammaParamScheduler.from_config(bad_config) + + # Invalid num_decays + bad_config["num_decays"] = 0 + with self.assertRaises(AssertionError): + StepWithFixedGammaParamScheduler.from_config(bad_config) + + # Invalid base_lr + bad_config = copy.deepcopy(config) + bad_config["base_lr"] = -0.01 + with self.assertRaises(AssertionError): + StepWithFixedGammaParamScheduler.from_config(bad_config) + + # Invalid gamma + bad_config = copy.deepcopy(config) + bad_config["gamma"] = [2] + with self.assertRaises(AssertionError): + StepWithFixedGammaParamScheduler.from_config(bad_config) + + def test_scheduler(self): + config = self._get_valid_config() + + scheduler = StepWithFixedGammaParamScheduler.from_config(config) + schedule = [ + scheduler(epoch_num / self._num_epochs) + for epoch_num in range(self._num_epochs) + ] + expected_schedule = [ + 1, + 1, + 1, + 0.1, + 0.1, + 0.1, + 0.01, + 0.01, + 0.01, + 0.001, + 0.001, + 0.001, + ] + + for param, expected_param in zip(schedule, expected_schedule): + self.assertAlmostEqual(param, expected_param) + + def test_build_step_with_fixed_gamma_scheduler(self): + config = self._get_valid_config() + scheduler = build_param_scheduler(config) + self.assertTrue(isinstance(scheduler, StepWithFixedGammaParamScheduler)) diff --git a/test/optim_param_scheduler_test.py b/test/optim_param_scheduler_test.py new file mode 100644 index 0000000000..26f6cca605 --- /dev/null +++ b/test/optim_param_scheduler_test.py @@ -0,0 +1,179 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import unittest +from unittest.mock import Mock + +from classy_vision.dataset import build_dataset +from classy_vision.hooks import ClassyHook +from classy_vision.losses import build_loss +from classy_vision.models import build_model +from classy_vision.optim import build_optimizer +from classy_vision.optim.param_scheduler import UpdateInterval +from classy_vision.tasks import ClassificationTask, ClassyTask +from classy_vision.trainer import LocalTrainer + + +class TestParamSchedulerIntegration(unittest.TestCase): + def _get_config(self): + return { + "loss": {"name": "CrossEntropyLoss"}, + "dataset": { + "train": { + "name": "synthetic_image", + "split": "train", + "num_classes": 2, + "crop_size": 20, + "class_ratio": 0.5, + "num_samples": 10, + "seed": 0, + "batchsize_per_replica": 5, + "use_shuffle": True, + "transforms": [ + { + "name": "apply_transform_to_key", + "transforms": [ + {"name": "ToTensor"}, + { + "name": "Normalize", + "mean": [0.485, 0.456, 0.406], + "std": [0.229, 0.224, 0.225], + }, + ], + "key": "input", + } + ], + }, + "test": { + "name": "synthetic_image", + "split": "test", + "num_classes": 2, + "crop_size": 20, + "class_ratio": 0.5, + "num_samples": 10, + "seed": 0, + "batchsize_per_replica": 5, + "use_shuffle": False, + "transforms": [ + { + "name": "apply_transform_to_key", + "transforms": [ + {"name": "ToTensor"}, + { + "name": "Normalize", + "mean": [0.485, 0.456, 0.406], + "std": [0.229, 0.224, 0.225], + }, + ], + "key": "input", + } + ], + }, + }, + "model": { + "name": "mlp", + # 3x20x20 = 1200 + "input_dim": 1200, + "output_dim": 1000, + "hidden_dims": [10], + }, + "meters": {"accuracy": {"topk": [1]}}, + "optimizer": { + "name": "sgd", + "num_epochs": 10, + "lr": 0.1, + "weight_decay": 1e-4, + "momentum": 0.9, + }, + } + + def _build_task(self, num_epochs): + config = self._get_config() + config["optimizer"]["num_epochs"] = num_epochs + task = ( + ClassificationTask() + .set_num_epochs(num_epochs) + .set_loss(build_loss(config["loss"])) + .set_model(build_model(config["model"])) + .set_optimizer(build_optimizer(config["optimizer"])) + ) + for phase_type in ["train", "test"]: + dataset = build_dataset(config["dataset"][phase_type]) + task.set_dataset(dataset, phase_type) + + self.assertTrue(task is not None) + return task + + def test_param_scheduler_epoch(self): + task = self._build_task(num_epochs=3) + + where_list = [] + + def scheduler_mock(where): + where_list.append(where) + return 0.1 + + mock = Mock(side_effect=scheduler_mock) + mock.update_interval = UpdateInterval.EPOCH + task.optimizer.lr_scheduler = mock + + trainer = LocalTrainer() + trainer.train(task) + + self.assertEqual(where_list, [0, 1 / 3, 2 / 3]) + + def test_param_scheduler_step(self): + task = self._build_task(num_epochs=3) + + where_list = [] + + def scheduler_mock(where): + where_list.append(where) + return 0.1 + + mock = Mock(side_effect=scheduler_mock) + mock.update_interval = UpdateInterval.STEP + task.optimizer.lr_scheduler = mock + + trainer = LocalTrainer() + trainer.train(task) + + # We have 10 samples, batch size is 5. Each epoch is done in two steps. + self.assertEqual(where_list, [0, 1 / 6, 2 / 6, 3 / 6, 4 / 6, 5 / 6]) + + def test_hook(self): + task = self._build_task(num_epochs=3) + + lr_list = [] + + class TestHook(ClassyHook): + on_rendezvous = ClassyHook._noop + on_start = ClassyHook._noop + on_phase_start = ClassyHook._noop + on_sample = ClassyHook._noop + on_forward = ClassyHook._noop + on_loss_and_meter = ClassyHook._noop + on_backward = ClassyHook._noop + on_phase_end = ClassyHook._noop + on_end = ClassyHook._noop + + def on_update(self, task: ClassyTask, local_variables) -> None: + lr_list.append(task.optimizer.lr) + + task.set_hooks([TestHook()]) + + def scheduler_mock(where): + return where + + mock = Mock(side_effect=scheduler_mock) + mock.update_interval = UpdateInterval.STEP + task.optimizer.lr_scheduler = mock + + trainer = LocalTrainer() + trainer.train(task) + + # We have 10 samples, batch size is 5. Each epoch is done in two steps. + self.assertEqual(lr_list, [0, 1 / 6, 2 / 6, 3 / 6, 4 / 6, 5 / 6]) diff --git a/test/optim_rmsprop_test.py b/test/optim_rmsprop_test.py new file mode 100644 index 0000000000..f2735617d8 --- /dev/null +++ b/test/optim_rmsprop_test.py @@ -0,0 +1,27 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import unittest +from test.generic.optim_test_util import TestOptimizer + +from classy_vision.optim.rmsprop import RMSProp + + +class TestRMSPropOptimizer(TestOptimizer, unittest.TestCase): + def _get_config(self): + return { + "name": "rmsprop", + "num_epochs": 90, + "lr": 0.1, + "momentum": 0.9, + "weight_decay": 0.0001, + "alpha": 0.9, + "eps": 1e-8, + "centered": False, + } + + def _instance_to_test(self): + return RMSProp diff --git a/test/optim_sgd_test.py b/test/optim_sgd_test.py new file mode 100644 index 0000000000..b6147acc53 --- /dev/null +++ b/test/optim_sgd_test.py @@ -0,0 +1,25 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import unittest +from test.generic.optim_test_util import TestOptimizer + +from classy_vision.optim.sgd import SGD + + +class TestSGDOptimizer(TestOptimizer, unittest.TestCase): + def _get_config(self): + return { + "name": "sgd", + "num_epochs": 90, + "lr": 0.1, + "momentum": 0.9, + "weight_decay": 0.0001, + "nesterov": False, + } + + def _instance_to_test(self): + return SGD diff --git a/test/suites.py b/test/suites.py new file mode 100644 index 0000000000..8834f2d797 --- /dev/null +++ b/test/suites.py @@ -0,0 +1,48 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +"""Various test loaders.""" + +import os +import random +import unittest +from itertools import chain + + +def _circleci_parallelism(suite): + """Allow for parallelism in CircleCI for speedier tests..""" + if int(os.environ.get("CIRCLE_NODE_TOTAL", 0)) <= 1: + # either not running on circleci, or we're not using parallelism. + return suite + # tests are automatically sorted by discover, so we will get the same ordering + # on all hosts. + total = int(os.environ["CIRCLE_NODE_TOTAL"]) + index = int(os.environ["CIRCLE_NODE_INDEX"]) + + # right now each test is corresponds to a /file/. Certain files are slower than + # others, so we want to flatten it + tests = [testfile._tests for testfile in suite._tests] + tests = list(chain.from_iterable(tests)) + random.Random(42).shuffle(tests) + tests = [t for i, t in enumerate(tests) if i % total == index] + return unittest.TestSuite(tests) + + +def unittests(): + """ + Short tests. + + Runs on CircleCI on every commit. Returns everything in the tests root directory. + """ + test_loader = unittest.TestLoader() + test_suite = test_loader.discover("test", pattern="*_test.py") + test_suite = _circleci_parallelism(test_suite) + return test_suite + + +if __name__ == "__main__": + runner = unittest.TextTestRunner() + runner.run(unittests()) diff --git a/test/tasks_classification_task_test.py b/test/tasks_classification_task_test.py new file mode 100644 index 0000000000..6338a11282 --- /dev/null +++ b/test/tasks_classification_task_test.py @@ -0,0 +1,112 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import unittest +from test.generic.config_utils import get_fast_test_task_config, get_test_task_config +from test.generic.utils import compare_model_state, compare_samples, compare_states + +import torch +from classy_vision.dataset import build_dataset +from classy_vision.hooks import LossLrMeterLoggingHook +from classy_vision.losses import build_loss +from classy_vision.models import build_model +from classy_vision.optim import build_optimizer +from classy_vision.tasks import ClassificationTask, build_task +from classy_vision.trainer import LocalTrainer + + +class TestClassificationTask(unittest.TestCase): + def _compare_model_state(self, model_state_1, model_state_2, check_heads=True): + compare_model_state(self, model_state_1, model_state_2, check_heads) + + def _compare_samples(self, sample_1, sample_2): + compare_samples(self, sample_1, sample_2) + + def _compare_states(self, state_1, state_2, check_heads=True): + compare_states(self, state_1, state_2) + + def test_build_task(self): + config = get_test_task_config() + task = build_task(config) + self.assertTrue(isinstance(task, ClassificationTask)) + + def test_get_state(self): + config = get_test_task_config() + loss = build_loss(config["loss"]) + task = ( + ClassificationTask() + .set_num_epochs(1) + .set_loss(loss) + .set_model(build_model(config["model"])) + .set_optimizer(build_optimizer(config["optimizer"])) + ) + for phase_type in ["train", "test"]: + dataset = build_dataset(config["dataset"][phase_type]) + task.set_dataset(dataset, phase_type) + + task.prepare(num_dataloader_workers=1, pin_memory=False) + + task = build_task(config) + task.prepare(num_dataloader_workers=1, pin_memory=False) + + def test_get_set_state(self): + """ + Tests the {set, get}_classy_state methods by running train_steps + to make sure the train_steps run the same way. + """ + config = get_fast_test_task_config() + task = build_task(config).set_hooks([LossLrMeterLoggingHook()]) + task_2 = build_task(config).set_hooks([LossLrMeterLoggingHook()]) + + use_gpu = torch.cuda.is_available() + local_variables = {} + + # prepare the tasks for the right device + task.prepare(use_gpu=use_gpu) + task_2.prepare(use_gpu=use_gpu) + + # test in both train and test mode + for _ in range(2): + task.advance_phase() + + # task 2 should have the same state + task_2.set_classy_state(task.get_classy_state(deep_copy=True)) + self._compare_states(task.get_classy_state(), task_2.get_classy_state()) + + # this tests that both states' iterators return the same samples + sample = next(task.get_data_iterator()) + sample_2 = next(task_2.get_data_iterator()) + self._compare_samples(sample, sample_2) + + # test that the train step runs the same way on both states + # and the loss remains the same + task.train_step(use_gpu, local_variables) + task_2.train_step(use_gpu, local_variables) + self._compare_states(task.get_classy_state(), task_2.get_classy_state()) + + @unittest.skipUnless(torch.cuda.is_available(), "This test needs a gpu to run") + def test_get_set_state_different_devices(self): + config = get_fast_test_task_config() + task = build_task(config) + task_2 = build_task(config) + + for use_gpu in [True, False]: + task.prepare(use_gpu=use_gpu) + task_2.prepare(use_gpu=not use_gpu) + + task_2.set_classy_state(task.get_classy_state(deep_copy=True)) + + # the parameters are in different devices + with self.assertRaises(Exception): + self._compare_states(task.get_classy_state(), task_2.get_classy_state()) + + # prepare the task for the right device + task_2.prepare(use_gpu=use_gpu) + self._compare_states(task.get_classy_state(), task_2.get_classy_state()) + + # we should be able to run the trainer using state from a different device + trainer = LocalTrainer(use_gpu=use_gpu) + trainer.train(task_2) diff --git a/test/tasks_fine_tuning_task_test.py b/test/tasks_fine_tuning_task_test.py new file mode 100644 index 0000000000..e0c4031ad2 --- /dev/null +++ b/test/tasks_fine_tuning_task_test.py @@ -0,0 +1,112 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import copy +import unittest +from test.generic.config_utils import get_fast_test_task_config +from test.generic.utils import compare_model_state + +from classy_vision.generic.util import get_checkpoint_dict +from classy_vision.tasks import FineTuningTask, build_task +from classy_vision.trainer import LocalTrainer + + +class TestFineTuningTask(unittest.TestCase): + def _compare_model_state(self, state_1, state_2, check_heads=True): + return compare_model_state(self, state_1, state_2, check_heads=check_heads) + + def _get_fine_tuning_config(self, head_num_classes=1000): + config = get_fast_test_task_config(head_num_classes=head_num_classes) + config["name"] = "fine_tuning" + config["num_epochs"] = 10 + return config + + def _get_pre_train_config(self, head_num_classes=1000): + config = get_fast_test_task_config(head_num_classes=head_num_classes) + config["num_epochs"] = 10 + return config + + def test_build_task(self): + config = self._get_fine_tuning_config() + task = build_task(config) + self.assertIsInstance(task, FineTuningTask) + + def test_prepare(self): + pre_train_config = self._get_pre_train_config() + pre_train_task = build_task(pre_train_config) + pre_train_task.prepare() + checkpoint = get_checkpoint_dict(pre_train_task, {}) + + fine_tuning_config = self._get_fine_tuning_config() + fine_tuning_task = build_task(fine_tuning_config) + # cannot prepare a fine tuning task without a pre training checkpoint + with self.assertRaises(Exception): + fine_tuning_task.prepare() + + fine_tuning_task.set_pretrained_checkpoint(checkpoint) + fine_tuning_task.prepare() + + # test a fine tuning task with incompatible heads + fine_tuning_config = self._get_fine_tuning_config(head_num_classes=10) + fine_tuning_task = build_task(fine_tuning_config) + fine_tuning_task.set_pretrained_checkpoint(checkpoint) + # cannot prepare a fine tuning task with a pre training checkpoint which + # has incompatible heads + with self.assertRaises(Exception): + fine_tuning_task.prepare() + + fine_tuning_task.set_pretrained_checkpoint(checkpoint).set_reset_heads(True) + fine_tuning_task.prepare() + + def test_train(self): + pre_train_config = self._get_pre_train_config(head_num_classes=1000) + pre_train_task = build_task(pre_train_config) + trainer = LocalTrainer() + trainer.train(pre_train_task) + checkpoint = get_checkpoint_dict(pre_train_task, {}) + + for reset_heads, heads_num_classes in [(False, 1000), (True, 200)]: + for freeze_trunk in [True, False]: + fine_tuning_config = self._get_fine_tuning_config( + head_num_classes=heads_num_classes + ) + fine_tuning_task = build_task(fine_tuning_config) + fine_tuning_task = ( + fine_tuning_task.set_pretrained_checkpoint( + copy.deepcopy(checkpoint) + ) + .set_reset_heads(reset_heads) + .set_freeze_trunk(freeze_trunk) + ) + # run in test mode to compare the model state + fine_tuning_task.set_test_only(True) + trainer.train(fine_tuning_task) + self._compare_model_state( + pre_train_task.model.get_classy_state(), + fine_tuning_task.model.get_classy_state(), + check_heads=not reset_heads, + ) + # run in train mode to check accuracy + fine_tuning_task.set_test_only(False) + trainer.train(fine_tuning_task) + if freeze_trunk: + # if trunk is frozen the states should be the same + self._compare_model_state( + pre_train_task.model.get_classy_state(), + fine_tuning_task.model.get_classy_state(), + check_heads=False, + ) + else: + # trunk isn't frozen, the states should be different + with self.assertRaises(Exception): + self._compare_model_state( + pre_train_task.model.get_classy_state(), + fine_tuning_task.model.get_classy_state(), + check_heads=False, + ) + + accuracy = fine_tuning_task.meters[0].value["top_1"] + self.assertAlmostEqual(accuracy, 1.0) diff --git a/test/trainer_distributed_trainer_test.py b/test/trainer_distributed_trainer_test.py new file mode 100644 index 0000000000..d1966ab2bc --- /dev/null +++ b/test/trainer_distributed_trainer_test.py @@ -0,0 +1,64 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import copy +import json +import os +import subprocess +import sys +import tempfile +import unittest +from pathlib import Path +from test.generic.config_utils import get_test_mlp_task_config + +import torch + + +class TestDistributedTrainer(unittest.TestCase): + def setUp(self): + config = get_test_mlp_task_config() + invalid_config = copy.deepcopy(config) + invalid_config["name"] = "invalid_task" + self.config_files = {} + for config_key, config in [ + ("config", config), + ("invalid_config", invalid_config), + ]: + with tempfile.NamedTemporaryFile(mode="w", delete=False) as f: + json.dump(config, f) + f.flush() + self.config_files[config_key] = f.name + self.path = Path(__file__).parent.absolute() + + def tearDown(self): + for config_file in self.config_files.values(): + os.unlink(config_file) + + def test_training(self): + """Checks we can train a small MLP model.""" + + num_processes = 2 + device = "gpu" if torch.cuda.is_available() else "cpu" + + for config_key, expected_success in [ + ("invalid_config", False), + ("config", True), + ]: + cmd = f"""{sys.executable} -m torch.distributed.launch \ + --nnodes=1 \ + --nproc_per_node={num_processes} \ + --master_addr=127.0.0.1 \ + --master_port=29500 \ + --use_env \ + {self.path}/../classy_train.py \ + --device={device} \ + --config={self.config_files[config_key]} \ + --num_workers=4 \ + --log_freq=100 + """ + result = subprocess.run(cmd, shell=True) + success = result.returncode == 0 + self.assertEqual(success, expected_success) diff --git a/test/trainer_local_trainer_test.py b/test/trainer_local_trainer_test.py new file mode 100644 index 0000000000..6b6e313c28 --- /dev/null +++ b/test/trainer_local_trainer_test.py @@ -0,0 +1,42 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import unittest +from test.generic.config_utils import get_test_mlp_task_config + +from classy_vision.dataset import build_dataset +from classy_vision.hooks import LossLrMeterLoggingHook +from classy_vision.losses import build_loss +from classy_vision.meters import AccuracyMeter +from classy_vision.models import build_model +from classy_vision.optim import build_optimizer +from classy_vision.tasks import ClassificationTask +from classy_vision.trainer import LocalTrainer + + +class TestLocalTrainer(unittest.TestCase): + def test_training(self): + """Checks we can train a small MLP model.""" + config = get_test_mlp_task_config() + task = ( + ClassificationTask() + .set_num_epochs(10) + .set_loss(build_loss(config["loss"])) + .set_model(build_model(config["model"])) + .set_optimizer(build_optimizer(config["optimizer"])) + .set_meters([AccuracyMeter(topk=[1])]) + .set_hooks([LossLrMeterLoggingHook()]) + ) + for split in ["train", "test"]: + dataset = build_dataset(config["dataset"][split]) + task.set_dataset(dataset, split) + + self.assertTrue(task is not None) + + trainer = LocalTrainer() + trainer.train(task) + accuracy = task.meters[0].value["top_1"] + self.assertAlmostEqual(accuracy, 1.0) diff --git a/tutorials/classy_dataset.ipynb b/tutorials/classy_dataset.ipynb new file mode 100644 index 0000000000..69e50e6116 --- /dev/null +++ b/tutorials/classy_dataset.ipynb @@ -0,0 +1,615 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "collapsed": true + }, + "source": [ + "# Creating a custom dataset" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In this tutorial we will learn how to do the following: \n", + "\n", + "1. Create a custom dataset within Classy Vision\n", + "2. Integrate a dataset with Classy Vision's configuration system\n", + "3. Iterate over the samples contained in a dataset\n", + "4. Using transforms with Classy Vision\n", + "5. Create a ImageNet dataset, using standard transforms, using torchvision\n", + "\n", + "If you haven't already read our [Getting started](https://classyvision.ai/tutorials/getting_started) tutorial, we recommend starting there before reading this tutorial." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1. Create a custom dataset within Classy Vision\n", + "\n", + "Creating a dataset for use / using an existing dataset in Classy Vision is as easy as it is in PyTorch, it only requires wrapping the dataset in our dataloading class, ClassyDataset.\n", + "\n", + "First, specify a dataset with a `__getitem__` and `__len__` function, the same as required by torch.utils.data.Dataset" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import torch.utils.data\n", + "import torch\n", + "\n", + "class MyDataset(torch.utils.data.Dataset):\n", + " def __init__(self):\n", + " super().__init__()\n", + " self.length = 100\n", + " \n", + " def __getitem__(self, idx):\n", + " assert idx >= 0 and idx < self.length, \\\n", + " \"Provided index {} must be in range [0, {}).\".format(idx, self.length)\n", + " return torch.rand(3, 100, 100)\n", + " \n", + " def __len__(self):\n", + " return self.length" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now for most training tasks we want to be able to configure the batchsize on the fly, transform samples, shuffle the dataset, maybe limit the number of samples to shorten a training run, and then construct an iterator for the training loop. ClassyDataset is a simple wrapper that provides this functionality. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from classy_vision.dataset import ClassyDataset\n", + "\n", + "class MyClassyDataset(ClassyDataset):\n", + " def __init__(self, split, batchsize_per_replica, shuffle, transform, num_samples):\n", + " dataset = MyDataset()\n", + " super().__init__(dataset, split, batchsize_per_replica, shuffle, transform, num_samples)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "It's that easy! Later in the tutorial we will see how to use the iterator, but before moving on, let's talk about what each of these arguments does.\n", + "- __split__: is a string specifying the split of the data, typically either \"train\" or \"test\". This is optional, not needed for many datasets.\n", + "- __batchsize_per_replica__: the batchsize per trainer (so if you have 8 GPUs with 1 trainer processes and a batchsize_per_replica of 32, then your batchsize for single update is 8 * 32 = 256).\n", + "- __shuffle__: If true, then shuffle the dataset before each epoch.\n", + "- __transform__: A callable applied to each sample before returning. Note that this can get tricky since many datasets (e.g. torchvision datasets) return complex samples containing both the image / video content and a label and possibly additional metadata. We pass the _whole_ sample to the transform, so it needs to know how to parse the sample...more on this later.\n", + "- __num_samples__: Not needed in the standard use cases, but this allows a user to adjust the length of samples retrieved in an epoch, can be convenient for debugging via config (e.g. setting num_samples = 10 will speed up training). By default this is set to None and iteration proceeds over the whole dataset.\n", + "\n", + "To get started with a basic task just do:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from classy_vision.tasks import ClassificationTask\n", + "\n", + "my_dataset = MyClassyDataset(\n", + " split=\"train\", \n", + " batchsize_per_replica=10, \n", + " shuffle=True, \n", + " transform=None, \n", + " num_samples=None,\n", + ")\n", + "\n", + "# Note, the \"train\" here is the phase type, which is unrelated to the split name.\n", + "# It tells the task to set the model in train mode / do a backwards pass, etc using\n", + "# this dataset...the split argument helps the dataset decide which training data to load.\n", + "my_task = ClassificationTask().set_dataset(my_dataset, \"train\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "For more details on training a model, please see our [Getting started](https://classyvision.ai/tutorials/getting_started) tutorial." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2. Integrating a dataset with Classy Vision's configuration system\n", + "\n", + "Classy Vision is also able to read a configuration file and instantiate the dataset. This is useful to keep your experiments organized and reproducible. For that, you have to:\n", + "\n", + "- Implement a from_config method\n", + "- Add the register_model decorator to MyClassyDataset" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from classy_vision.dataset import ClassyDataset, register_dataset\n", + "from classy_vision.dataset.transforms import build_transforms\n", + "\n", + "@register_dataset(\"my_dataset\")\n", + "class MyClassyDataset(ClassyDataset):\n", + " def __init__(self, split, batchsize_per_replica, shuffle, transform, num_samples):\n", + " dataset = MyDataset()\n", + " super().__init__(dataset, split, batchsize_per_replica, shuffle, transform, num_samples)\n", + " \n", + " @classmethod\n", + " def from_config(cls, config):\n", + " transform = build_transforms(config[\"transforms\"])\n", + " return cls(\n", + " split=config[\"split\"],\n", + " batchsize_per_replica=config[\"batchsize_per_replica\"],\n", + " shuffle=config[\"shuffle\"],\n", + " transform=transform,\n", + " num_samples=config[\"num_samples\"],\n", + " )" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now we can start using this dataset in our configurations. The string argument passed to the register_dataset is a unique identifier for this model (if you try to register two models with the same name, it will throw an error):\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from classy_vision.dataset import build_dataset\n", + "import torch\n", + "\n", + "dataset_config = {\n", + " \"name\": \"my_dataset\",\n", + " \"split\": \"train\",\n", + " \"batchsize_per_replica\": 10,\n", + " \"shuffle\": True,\n", + " \"transforms\": [{\"name\": \"Normalize\", \"mean\": [0.485, 0.456, 0.406], \"std\": [0.229, 0.224, 0.225]}],\n", + " \"num_samples\": None,\n", + "}\n", + "my_dataset = build_dataset(dataset_config)\n", + "assert isinstance(my_dataset, MyClassyDataset)\n", + "\n", + "sample = my_dataset[0]\n", + "print(sample.size())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 3. Iterate over the samples contained in a dataset\n", + "\n", + "As mentioned above, the ClassyDataset class adds several pieces of basic logic for constructing a torch.utils.data.Dataloader for your dataset. ClassyDataset supports local and distributed training out-of-box by internally using a PyTorch DistributedSampler for sampling the dataset along with the PyTorch Dataloader for batching and parallelizing sample retrieval. To get an iterable for epoch 0, do the following:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from classy_vision.dataset import build_dataset\n", + "import torch\n", + "\n", + "dataset_config = {\n", + " \"name\": \"my_dataset\",\n", + " \"split\": \"train\",\n", + " \"batchsize_per_replica\": 10,\n", + " \"shuffle\": True,\n", + " \"transforms\": [],\n", + " \"num_samples\": None,\n", + "}\n", + "my_dataset = build_dataset(dataset_config)\n", + "assert isinstance(my_dataset, MyClassyDataset)\n", + "\n", + "# multiprocessing_context can be set to \"spawn\", \"forkserver\", \"fork\" or None.\n", + "# If None is used, then the dataloader inherits the context of the parent thread.\n", + "# If num_workers is 0, then multiprocessing is not used by the dataloader\n", + "#\n", + "# A warning, while fork is fast and simple to get started with, it \n", + "# is unsafe to use with threading and can lead to difficult to debug errors.\n", + "# Spawn / forkserver are threadsafe, but they come with additional startup costs.\n", + "iterator = my_dataset.iterator(\n", + " shuffle_seed=0,\n", + " epoch=0,\n", + " num_workers=0, # 0 indicates to do dataloading on the master process\n", + " pin_memory=False,\n", + " multiprocessing_context=None,\n", + ")\n", + "assert isinstance(iterator, torch.utils.data.DataLoader)\n", + "\n", + "# Iterate over all 100 samples.\n", + "for sample in iter(iterator):\n", + " # Do stuff with sample...\n", + " # Note that size now has an extra dimension representing the batchsize\n", + " assert sample.size() == torch.Size([10, 3, 100, 100])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You can also provide a custom iterator function if you would like to return a custom iterator or a custom sampler. Please see the ClassyDataset code for more details." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 4. Using transforms with Classy Vision\n", + "\n", + "You may have noticed in the configuration section that we did something slightly more complicated with the transform configuration. In particular, just like our datasets / models etc, we have a registration / build mechanism for transforms so that transforms can be specified via config. " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Transform example using Classy Vision's synthetic image dataset\n", + "We also automatically register torchvision transforms, so let's start with an example of how to specify torchvision transforms and the synthetic image dataset we provide for testing / proto-typing.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import torchvision.transforms as transforms\n", + "from classy_vision.dataset import build_dataset\n", + "from classy_vision.dataset.classy_synthetic_image import SyntheticImageDataset\n", + "from classy_vision.dataset.transforms import build_transforms\n", + "\n", + "# Declarative approach\n", + "\n", + "# Transform to be applied to image\n", + "image_transform = transforms.Compose([\n", + " transforms.Resize(256),\n", + " transforms.CenterCrop(224),\n", + " transforms.ToTensor(),\n", + " transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),\n", + "])\n", + "\n", + "decl_dataset = SyntheticImageDataset(\n", + " batchsize_per_replica=10,\n", + " shuffle=True,\n", + " transform=image_transform,\n", + " num_samples=100,\n", + " crop_size=320,\n", + " class_ratio=4,\n", + " seed=0,\n", + ")\n", + "\n", + "# FAILS!!!!\n", + "# decl_dataset[0]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This fails! Why?\n", + "\n", + "It fails because most datasets don't return just an image, they return image or video content data, label data, and (potentially) sample metadata. In Classy Vision, the sample format is specified by the task and our classification_task expects a dict with input / target keys.\n", + "\n", + "For example, the sample format for the SyntheticImageDataset looks like:" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`{\"input\": , \"target\": }`" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "For our transforms to work, we need to specify which key to apply the transform to." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import torchvision.transforms as transforms\n", + "from classy_vision.dataset import build_dataset\n", + "from classy_vision.dataset.classy_synthetic_image import SyntheticImageDataset\n", + "from classy_vision.dataset.transforms import build_transforms, ApplyTransformToKey\n", + "\n", + "# Declarative approach\n", + "\n", + "# Transform to be applied to image\n", + "image_transform = transforms.Compose([\n", + " transforms.Resize(256),\n", + " transforms.CenterCrop(224),\n", + " transforms.ToTensor(),\n", + " transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),\n", + "])\n", + "\n", + "# Transform wrapper that says which key to apply the transform to\n", + "transform = ApplyTransformToKey(\n", + " transform=image_transform,\n", + " key=\"input\",\n", + ")\n", + "\n", + "decl_dataset = SyntheticImageDataset(\n", + " batchsize_per_replica=10,\n", + " shuffle=True,\n", + " transform=transform,\n", + " num_samples=100,\n", + " crop_size=320,\n", + " class_ratio=4,\n", + " seed=0,\n", + ")\n", + "\n", + "# Success!!!!\n", + "decl_dataset[0]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now let's see how to do the same thing via a config." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Note that this cell won't work until we fix the synthetic dataset from_config function\n", + "\n", + "from classy_vision.dataset import build_dataset\n", + "\n", + "# Configuration approach\n", + "config = {\n", + " \"name\": \"synthetic_image\",\n", + " \"batchsize_per_replica\": 10,\n", + " \"use_shuffle\": True,\n", + " \"transforms\": [\n", + " {\n", + " \"name\": \"apply_transform_to_key\",\n", + " \"transforms\": [\n", + " {\"name\": \"Resize\", \"size\": 256},\n", + " {\"name\": \"CenterCrop\", \"size\": 224},\n", + " {\"name\": \"ToTensor\"},\n", + " {\"name\": \"Normalize\", \"mean\": [0.485, 0.456, 0.406], \"std\": [0.229, 0.224, 0.225]},\n", + " ],\n", + " \"key\": \"input\",\n", + " },\n", + " ],\n", + " \"num_samples\": 100,\n", + " \"crop_size\": 320,\n", + " \"class_ratio\": 4,\n", + " \"seed\": 0\n", + "}\n", + "\n", + "config_dataset = build_dataset(config)\n", + "\n", + "# Sample should be the same as that provided by the decl_dataset\n", + "assert torch.allclose(config_dataset[0][\"input\"], decl_dataset[0][\"input\"])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Transform example for a torchvision dataset\n", + "Torchvision has a different sample format using tuples for images: \n", + "\n", + "`(, )`\n", + "\n", + "The ApplyTransformToKey will still work (the key in this case is '0'), but for our classification tasks, we also want a sample that is a dict with \"input\"/\"target\" keys. \n", + "\n", + "Because this is a common dataset format, we provide a convenience transform called \"GenericImageTransform\" which applies a specified transform to the torchvision tuple image key and then maps the whole sample to a dict. This is just a convenience transform, we can also do this using raw composable blocks, but it makes things more verbose.\n", + "\n", + "All of the transforms in the next cell have the same effect on an image:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from torchvision.transforms import Compose\n", + "from classy_vision.dataset.transforms import build_transforms\n", + "from classy_vision.dataset.transforms.util import GenericImageTransform\n", + "\n", + "# Declarative\n", + "image_transform = transforms.Compose([\n", + " transforms.Resize(256),\n", + " transforms.CenterCrop(224),\n", + " transforms.ToTensor(),\n", + " transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),\n", + "])\n", + "decl_transform = GenericImageTransform(transform=image_transform)\n", + "\n", + "# Configuration with helper function\n", + "transform_config = [{\n", + " \"name\": \"generic_image_transform\",\n", + " \"transforms\": [\n", + " {\"name\": \"Resize\", \"size\": 256},\n", + " {\"name\": \"CenterCrop\", \"size\": 224},\n", + " {\"name\": \"ToTensor\"},\n", + " {\"name\": \"Normalize\", \"mean\": [0.485, 0.456, 0.406], \"std\": [0.229, 0.224, 0.225]},\n", + " ], \n", + "}]\n", + "config_helper_transform = build_transforms(transform_config)\n", + "\n", + "# Configuration using raw, composable functions:\n", + "transform_config = [\n", + " {\"name\": \"tuple_to_map\", \"list_of_map_keys\": [\"input\", \"target\"]},\n", + " {\n", + " \"name\": \"apply_transform_to_key\",\n", + " \"transforms\": [\n", + " {\"name\": \"Resize\", \"size\": 256},\n", + " {\"name\": \"CenterCrop\", \"size\": 224},\n", + " {\"name\": \"ToTensor\"},\n", + " {\"name\": \"Normalize\", \"mean\": [0.485, 0.456, 0.406], \"std\": [0.229, 0.224, 0.225]},\n", + " ], \n", + " \"key\": \"input\",\n", + " },\n", + "]\n", + "config_raw_transform = build_transforms(transform_config)\n", + "\n", + "# These transforms are all functionally the same" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 5. Create a Classy Imagenet\n", + "\n", + "Now, to complete this tutorial, we show our code for creating an ImageNet dataset in classy vision using the pre-existing torchvision dataset. Code very similar to this (+ some typing and helper functions) is in the datasets folder of the base Classy Vision repository.\n", + "\n", + "Note, we do not distribute any of the underlying dataset data with Classy Vision. Before this will work, you will need to download a torchvision compatible copy of the Imagenet dataset yourself." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from classy_vision.dataset import ClassyDataset, register_dataset\n", + "from classy_vision.dataset.transforms import ClassyTransform, build_transforms\n", + "from torchvision.datasets.imagenet import ImageNet\n", + " \n", + " \n", + "@register_dataset(\"example_imagenet\")\n", + "class ExampleImageNetDataset(ClassyDataset):\n", + " def __init__(\n", + " self,\n", + " split,\n", + " batchsize_per_replica,\n", + " shuffle,\n", + " transform,\n", + " num_samples,\n", + " root, # Root directory for your Imagenet dataset\n", + " ): \n", + " # Create torchvision dataset\n", + " dataset = ImageNet(root=root, split=split)\n", + " super().__init__(\n", + " dataset, split, batchsize_per_replica, shuffle, transform, num_samples\n", + " ) \n", + "\n", + " @classmethod\n", + " def from_config(cls, config):\n", + " batchsize_per_replica = config.get(\"batchsize_per_replica\")\n", + " shuffle = config.get(\"use_shuffle\")\n", + " num_samples = config.get(\"num_samples\")\n", + " transform_config = config.get(\"transforms\")\n", + " split = config.get(\"split\")\n", + " root = config.get(\"root\")\n", + " download = config.get(\"download\")\n", + " \n", + " transform = build_transforms(transform_config)\n", + " return cls(\n", + " split=split,\n", + " batchsize_per_replica=batchsize_per_replica,\n", + " shuffle=shuffle,\n", + " transform=transform,\n", + " num_samples=num_samples,\n", + " root=root,\n", + " download=download,\n", + " )" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Conclusion\n", + "In this tutorial we have seen how to create a custom dataset using ClassyDataset, how to integrate this dataset with the configuration system, how to iterate over samples / use multiple workers, how to use transforms in the configuration system and finally we showed an example of how to use a torchvision dataset in Classy Vision. \n", + "\n", + "For more details on how to use the dataset for training, please see [Getting started](https://classyvision.ai/tutorials/getting_started)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "bento_stylesheets": { + "bento/extensions/flow/main.css": true, + "bento/extensions/kernel_selector/main.css": true, + "bento/extensions/kernel_ui/main.css": true, + "bento/extensions/new_kernel/main.css": true, + "bento/extensions/system_usage/main.css": true, + "bento/extensions/theme/main.css": true + }, + "disseminate_notebook_id": { + "notebook_id": "1152520058270736" + }, + "disseminate_notebook_info": { + "bento_version": "20191118-000256", + "description": "Test of Classy Dataset tutorial\n\nv3", + "hide_code": false, + "hipster_group": "", + "kernel_build_info": { + "deps": [ + "//fblearner/flow/projects/vision/classy_vision:classy_vision_workflow_lib" + ], + "external_deps": [] + }, + "no_uii": true, + "notebook_number": "179685", + "others_can_edit": true, + "reviewers": "", + "revision_id": "833561470396445", + "tags": "classy_vision", + "tasks": "", + "title": "classy_dataset" + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.9" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/tutorials/classy_loss.ipynb b/tutorials/classy_loss.ipynb new file mode 100644 index 0000000000..db5695967a --- /dev/null +++ b/tutorials/classy_loss.ipynb @@ -0,0 +1,174 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Creating a custom loss" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Loss functions are crucial because they define the objective to optimize for during training. Classy Vision can work directly with loss functions defined in [PyTorch](https://pytorch.org/docs/stable/_modules/torch/nn/modules/loss.html) without the need for any wrapper classes, but during research it's common to create custom losses with hyperparameters. Using `ClassyLoss` you can expose these hyperparameters via a configuration file.\n", + "\n", + "This tutorial will demonstrate: \n", + "1. How to create a custom loss within Classy Vision; \n", + "2. How to integrate your loss with Classy Vision's configuration system;\n", + "3. How to use a ClassyLoss independently, without other Classy Vision abstractions.\n", + "\n", + "## 1. Defining a loss\n", + "\n", + "Creating a new loss in Classy Vision is as simple as adding a new loss within PyTorch. The loss has to derive from `ClassyLoss` (which inherits from [`torch.nn.Module`](https://pytorch.org/docs/stable/nn.html#module)), and implement a `forward` method.\n", + "\n", + "> **Note**: The forward method should take the right arguments depending on the task the loss will be used for. For instance, a `ClassificationTask` passes the `output` and `target` to `forward`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from classy_vision.losses import ClassyLoss\n", + "\n", + "class MyLoss(ClassyLoss):\n", + " def __init__(self, alpha):\n", + " super().__init__()\n", + " self.alpha = alpha\n", + " \n", + " def forward(self, output, target):\n", + " return (output - target).pow(2) * self.alpha" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now we can start using this loss for training. Take a look at our [Getting started](https://classyvision.ai/tutorials/getting_started) tutorial for more details on how to train a model from a Jupyter notebook." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from classy_vision.tasks import ClassificationTask\n", + "\n", + "my_loss = MyLoss(alpha=5)\n", + "my_task = ClassificationTask().set_loss(my_loss)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2. Integrate it with the configuration system\n", + "\n", + "To be able to use the registration mechanism to be able to pick up the loss from a configuration, we need to do two additional things -\n", + "- Implement a `from_config` method\n", + "- Add the `register_loss` decorator to `MyLoss`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from classy_vision.losses import ClassyLoss, register_loss\n", + "\n", + "@register_loss(\"my_loss\")\n", + "class MyLoss(ClassyLoss):\n", + " def __init__(self, alpha):\n", + " super().__init__()\n", + " self.alpha = alpha\n", + "\n", + " @classmethod\n", + " def from_config(cls, config):\n", + " if \"alpha\" not in config:\n", + " raise ValueError('Need \"alpha\" in config for MyLoss')\n", + " return cls(alpha=config[\"alpha\"])\n", + " \n", + " def forward(self, output, target):\n", + " return (output - target).pow(2).sum() * self.alpha" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now we can start using this loss in our configurations." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from classy_vision.losses import build_loss\n", + "import torch\n", + "\n", + "loss_config = {\n", + " \"name\": \"my_loss\",\n", + " \"alpha\": 5\n", + "}\n", + "my_loss = build_loss(loss_config)\n", + "assert isinstance(my_loss, MyLoss)\n", + "\n", + "# ClassyLoss inherits from torch.nn.Module, so it works as expected\n", + "with torch.no_grad():\n", + " y_hat, target = torch.rand((1, 10)), torch.rand((1, 10))\n", + " print(my_loss(y_hat, target))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now that your loss is integrated with the configuration system, you can train it using `classy_train.py` as described in the [Getting started](https://classyvision.ai/tutorials/getting_started) tutorial, no further changes are needed! Just make sure the code defining your model is in the `losses` folder of your classy project." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 3. Conclusion\n", + "\n", + "In this tutorial, we learned how to make your loss compatible with Classy Vision and how to integrate it with the configuration system. Refer to our documentation to learn more about [ClassyLoss](https://classyvision.ai/api/losses.html)." + ] + } + ], + "metadata": { + "bento_stylesheets": { + "bento/extensions/flow/main.css": true, + "bento/extensions/kernel_selector/main.css": true, + "bento/extensions/kernel_ui/main.css": true, + "bento/extensions/new_kernel/main.css": true, + "bento/extensions/system_usage/main.css": true, + "bento/extensions/theme/main.css": true + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.7" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/tutorials/classy_model.ipynb b/tutorials/classy_model.ipynb new file mode 100644 index 0000000000..bdef9bef82 --- /dev/null +++ b/tutorials/classy_model.ipynb @@ -0,0 +1,195 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Creating a custom model" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This tutorial will demonstrate: (1) how to create a custom model within Classy Vision; (2) how to integrate your model with Classy Vision's configuration system; (3) how to use the model for training and inference;\n", + "\n", + "## 1. Defining a model\n", + "\n", + "Creating a new model in Classy Vision is the simple as creating one within PyTorch. The model needs to derive from `ClassyModel` and implement a `forward` method to perform inference. `ClassyModel` inherits from [`torch.nn.Module`](https://pytorch.org/docs/stable/nn.html#module), so it works exactly as you would expect." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import torch.nn as nn\n", + "\n", + "from classy_vision.models import ClassyModel\n", + "\n", + "\n", + "class MyModel(ClassyModel):\n", + " def __init__(self, num_classes):\n", + " super().__init__()\n", + " \n", + " # Average all the pixels, generate one output per class\n", + " self.avgpool = nn.AdaptiveAvgPool2d((1, 1))\n", + " num_channels = 3\n", + " self.fc = nn.Linear(num_channels, num_classes)\n", + " \n", + " def forward(self, x):\n", + " # perform average pooling\n", + " out = self.avgpool(x)\n", + "\n", + " # reshape the output and apply the fc layer\n", + " out = out.reshape(out.size(0), -1)\n", + " out = self.fc(out)\n", + " return out" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now we can start using this model for training. Take a look at our [Getting started](https://classyvision.ai/tutorials/getting_started) tutorial for more details on how to train a model from a Jupyter notebook." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from classy_vision.tasks import ClassificationTask\n", + "\n", + "my_model = MyModel(num_classes=1000)\n", + "my_task = ClassificationTask().set_model(my_model)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2. Integrate it with the configuration system\n", + "\n", + "Classy Vision is also able to read a configuration file and instantiate the model. This is useful to keep your experiments organized and reproducible. For that, you have to:\n", + "\n", + "- Implement a `from_config` method\n", + "- Add the `register_model` decorator to `MyModel`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import torch.nn as nn\n", + "\n", + "from classy_vision.models import ClassyModel, register_model\n", + "\n", + "\n", + "@register_model(\"my_model\")\n", + "class MyModel(ClassyModel):\n", + " def __init__(self, num_classes):\n", + " super().__init__()\n", + " \n", + " # Average all the pixels, generate one output per class\n", + " self.avgpool = nn.AdaptiveAvgPool2d((1, 1))\n", + " num_channels = 3\n", + " self.fc = nn.Linear(num_channels, num_classes)\n", + "\n", + " @classmethod\n", + " def from_config(cls, config):\n", + " # This method takes a configuration dictionary \n", + " # and returns an instance of the class. In this case, \n", + " # we'll let the number of classes be configurable.\n", + " return cls(num_classes=config[\"num_classes\"])\n", + " \n", + " def forward(self, x):\n", + " # perform average pooling\n", + " out = self.avgpool(x)\n", + "\n", + " # reshape the output and apply the fc layer\n", + " out = out.reshape(out.size(0), -1)\n", + " out = self.fc(out)\n", + " return out" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now we can start using this model in our configurations. The argument passed to `register_model` is used to identify the model class in the configuration:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from classy_vision.models import build_model\n", + "import torch\n", + "\n", + "model_config = {\n", + " \"name\": \"my_model\",\n", + " \"num_classes\": 3\n", + "}\n", + "my_model = build_model(model_config)\n", + "assert isinstance(my_model, MyModel)\n", + "\n", + "# my_model inherits from torch.nn.Module, so inference works as usual:\n", + "x = torch.rand((1, 3, 200, 200))\n", + "with torch.no_grad():\n", + " print(my_model(x))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now that your model is integrated with the configuration system, you can train it using `classy_train.py` as described in the [Getting started](https://classyvision.ai/tutorials/getting_started) tutorial, no further changes are needed! Just make sure the code defining your model is in the `models` folder of your classy project." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 3. Conclusion\n", + "\n", + "In this tutorial, we learned how to make your model compatible with Classy Vision and how to integrate it with the configuration system. Refer to our documentation to learn more about [ClassyModel](https://classyvision.ai/api/models.html)." + ] + } + ], + "metadata": { + "bento_stylesheets": { + "bento/extensions/flow/main.css": true, + "bento/extensions/kernel_selector/main.css": true, + "bento/extensions/kernel_ui/main.css": true, + "bento/extensions/new_kernel/main.css": true, + "bento/extensions/system_usage/main.css": true, + "bento/extensions/theme/main.css": true + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.7" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/tutorials/fine_tuning.ipynb b/tutorials/fine_tuning.ipynb new file mode 100644 index 0000000000..b55eec15f9 --- /dev/null +++ b/tutorials/fine_tuning.ipynb @@ -0,0 +1,693 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Fine tuning a model\n", + "\n", + "Fine tuning is a form of transfer learning: when you only have a small labeled dataset for a specific task, you can pick up a model trained for a different task and fine-tune it for your specific dataset. These pre-trained models are usually trained on much larger datasets, which helps improving performance. \n", + "\n", + "In this tutorial we'll look into how to pick up a pre-trained model and fine tune it for a different task. In part (1) we'll train a model and save it to a checkpoint file. In part (2), we'll load the checkpoint file and run the fine-tuning. Feel free to skip part (1) if you already have a checkpoint file to begin with." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1. Training a model\n", + "Let us begin by pre-training a model using a head with 1000 classes" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We want to train for 4 epochs." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "num_epochs = 4" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We will be using synthetic train and test datasets for this example. The transforms used are from torchvision and are applied to the input value in the sample (rather than the target)." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from classy_vision.dataset import SyntheticImageDataset\n", + "\n", + "train_dataset = SyntheticImageDataset.from_config({\n", + " \"batchsize_per_replica\": 32,\n", + " \"num_samples\": 2000,\n", + " \"crop_size\": 224,\n", + " \"class_ratio\": 0.5,\n", + " \"seed\": 0,\n", + " \"use_shuffle\": True,\n", + " \"split\": \"train\",\n", + " \"transforms\": [{\n", + " \"name\": \"apply_transform_to_key\",\n", + " \"transforms\": [\n", + " {\"name\": \"ToTensor\"},\n", + " {\"name\": \"Normalize\", \"mean\": [0.485, 0.456, 0.406], \"std\": [0.229, 0.224, 0.225]}\n", + " ],\n", + " \"key\": \"input\"\n", + " }]\n", + "})\n", + "test_dataset = SyntheticImageDataset.from_config({\n", + " \"batchsize_per_replica\": 32,\n", + " \"num_samples\": 200,\n", + " \"crop_size\": 224,\n", + " \"class_ratio\": 0.5,\n", + " \"seed\": 0,\n", + " \"use_shuffle\": False,\n", + " \"split\": \"test\",\n", + " \"transforms\": [{\n", + " \"name\": \"apply_transform_to_key\",\n", + " \"transforms\": [\n", + " {\"name\": \"ToTensor\"},\n", + " {\"name\": \"Normalize\", \"mean\": [0.485, 0.456, 0.406], \"std\": [0.229, 0.224, 0.225]}\n", + " ],\n", + " \"key\": \"input\"\n", + " }]\n", + "})" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let us create a ResNet 50 model now." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "from classy_vision.models import ResNet\n", + "\n", + "model = ResNet.from_config({\n", + " \"num_blocks\": [3, 4, 6, 3],\n", + " \"small_input\": False,\n", + " \"zero_init_bn_residuals\": True\n", + "})" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now, we will create a head with 1000 classes." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "from classy_vision.heads import FullyConnectedHead\n", + "\n", + "head = FullyConnectedHead(unique_id=\"default_head\", num_classes=1000, in_plane=2048)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let us attach the head to the final block of the model.\n", + "\n", + "For ResNet 50, we want to attach to the `3`rd block in the `4`th layer (based on `[3, 4, 6, 3]`). The blocks use 0 indexing, so this maps to `\"block3-2\"`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "model.set_heads({\"block3-2\": {head.unique_id: head}})" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can use a cross entropy loss from Pytorch." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "from torch.nn.modules.loss import CrossEntropyLoss\n", + "\n", + "loss = CrossEntropyLoss()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "For the optimizer, we will be using SGD." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "from classy_vision.optim import build_optimizer\n", + "\n", + "\n", + "optimizer = build_optimizer({\n", + " \"name\": \"sgd\",\n", + " \"lr\": {\"name\": \"step\", \"values\": [0.1, 0.01]},\n", + " \"weight_decay\": 1e-4,\n", + " \"momentum\": 0.9,\n", + " \"num_epochs\": num_epochs\n", + "})" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We want to track the top-1 and top-5 accuracies of the model." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "from classy_vision.meters import AccuracyMeter\n", + "\n", + "meters = [AccuracyMeter(topk=[1, 5])]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's create a directory to save the checkpoints." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "import os\n", + "import time\n", + "\n", + "pretrain_checkpoint_dir = f\"/tmp/checkpoint_{time.time()}\"\n", + "os.mkdir(pretrain_checkpoint_dir)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Add `LossLrMeterLoggingHook` to monitor the loss and `CheckpointHook` to save the checkpoints." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "from classy_vision.hooks import CheckpointHook, LossLrMeterLoggingHook, ProgressBarHook\n", + "\n", + "hooks = [\n", + " LossLrMeterLoggingHook(),\n", + " CheckpointHook(pretrain_checkpoint_dir, input_args={})\n", + "]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We have all the components ready to setup our pre-training task which trains for 4 epochs." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "from classy_vision.tasks import ClassificationTask\n", + "\n", + "pretrain_task = (\n", + " ClassificationTask()\n", + " .set_num_epochs(num_epochs)\n", + " .set_loss(loss)\n", + " .set_model(model)\n", + " .set_optimizer(optimizer)\n", + " .set_meters(meters)\n", + " .set_hooks(hooks)\n", + " .set_dataset(train_dataset, \"train\")\n", + " .set_dataset(test_dataset, \"test\")\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let us train using a local trainer instance." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "from classy_vision.trainer import LocalTrainer\n", + "\n", + "trainer = LocalTrainer()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now, we can start training!" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "trainer.train(pretrain_task)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Training is done! Let us now load the saved checkpoint, we will use this later for fine tuning." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "from classy_vision.generic.util import load_checkpoint\n", + "\n", + "pretrained_checkpoint = load_checkpoint(pretrain_checkpoint_dir)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2. Fine-tuning the model" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The original model was trained for 1000 classes. Let's fine-tune it for a problem with only 2 classes. To keep things fast we'll run a single epoch:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "num_epochs = 1" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can re-use the same synthetic datasets as before." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let us again create a ResNet 50 model." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "from classy_vision.models import ResNet\n", + "\n", + "model = ResNet.from_config({\n", + " \"num_blocks\": [3, 4, 6, 3],\n", + " \"small_input\": False,\n", + " \"zero_init_bn_residuals\": True\n", + "})" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "For fine tuning, we will create a head with just 2 classes" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "from classy_vision.heads import FullyConnectedHead\n", + "\n", + "head = FullyConnectedHead(unique_id=\"default_head\", num_classes=2, in_plane=2048)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let us attach the head to the final block of the model, like before." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "model.set_heads({\"block3-2\": {head.unique_id: head}})" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "For the optimizer, we will be using RMSProp this time." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "from classy_vision.optim import build_optimizer\n", + "\n", + "\n", + "optimizer = build_optimizer({\n", + " \"name\": \"rmsprop\",\n", + " \"lr\": {\"name\": \"step\", \"values\": [0.1, 0.01]},\n", + " \"weight_decay\": 1e-4,\n", + " \"momentum\": 0.9,\n", + " \"alpha\": 0.9,\n", + " \"eps\": 1e-3,\n", + " \"num_epochs\": num_epochs\n", + "})" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We want to track the top-1 accuracy of the model." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "from classy_vision.meters import AccuracyMeter\n", + "\n", + "meters = [AccuracyMeter(topk=[1])]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We will create a new directory to save the checkpoints for our fine tuning run." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "import os\n", + "import time\n", + "\n", + "fine_tuning_checkpoint_dir = f\"/tmp/checkpoint_{time.time()}\"\n", + "os.mkdir(fine_tuning_checkpoint_dir)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Hooks are also the same as before." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "from classy_vision.hooks import CheckpointHook, LossLrMeterLoggingHook\n", + "\n", + "hooks = [\n", + " LossLrMeterLoggingHook(),\n", + " CheckpointHook(fine_tuning_checkpoint_dir, input_args={})\n", + "]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now we can setup our fine tuning task." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "from classy_vision.tasks import FineTuningTask\n", + "\n", + "fine_tuning_task = (\n", + " FineTuningTask()\n", + " .set_num_epochs(num_epochs)\n", + " .set_loss(loss)\n", + " .set_model(model)\n", + " .set_optimizer(optimizer)\n", + " .set_meters(meters)\n", + " .set_hooks(hooks)\n", + " .set_dataset(train_dataset, \"train\")\n", + " .set_dataset(test_dataset, \"test\")\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Since this is a fine tuning task, there are some other configurations which need to be done." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We don't want to re-train the trunk, so we will be freezing it. This is optional." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "fine_tuning_task.set_freeze_trunk(True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We want to start training the heads from scratch, so we will be resetting them. This is required in this example since the pre-trained heads are not compatible with the heads in fine tuning (they have different number of classes). Otherwise, this is also optional." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "fine_tuning_task.set_reset_heads(True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We need to give our task the pre-trained checkpoint, which it'll need to start pre-training on." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "fine_tuning_task.set_pretrained_checkpoint(pretrained_checkpoint)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let us fine tune!" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "trainer.train(fine_tuning_task)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 3. Conclusion\n", + "\n", + "In this tutorial, we learned how to load a pre-trained model in Classy Vision and how to fine-tune it for a different task. We did that by using the `FineTuningTask` abstraction, which lets you load the pretrained model weights, attaching a new head to the model and optionally freeze the weights of the original model. \n", + "\n", + "To learn more about about fine-tuning, check out our documentation for [FineTuningTask](https://classyvision.ai/api/tasks.html#classy_vision.tasks.FineTuningTask) and [ClassyHead](https://classyvision.ai/api/heads.html)" + ] + } + ], + "metadata": { + "bento_stylesheets": { + "bento/extensions/flow/main.css": true, + "bento/extensions/kernel_selector/main.css": true, + "bento/extensions/kernel_ui/main.css": true, + "bento/extensions/new_kernel/main.css": true, + "bento/extensions/system_usage/main.css": true, + "bento/extensions/theme/main.css": true + }, + "kernelspec": { + "display_name": "Classy Vision", + "language": "python", + "name": "bento_kernel_classy_vision" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.5+" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/tutorials/getting_started.ipynb b/tutorials/getting_started.ipynb new file mode 100644 index 0000000000..21f247ba20 --- /dev/null +++ b/tutorials/getting_started.ipynb @@ -0,0 +1,559 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Getting started with Classy Vision" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Classy Vision is an end-to-end framework for image and video classification. Classy Vision makes it easy to write and launch distributed training jobs.\n", + "\n", + "In this tutorial, we will cover:\n", + "1. How to start a new project;\n", + "2. How to launch a single node training run; \n", + "3. How to launch a distributed training run; \n", + "4. How to visualize results with Tensorboard; \n", + "5. How to load checkpoints and interact with the trained model; \n", + "6. How to start training from a Jupyter notebook;\n", + "7. How to train a ResNet 50 model on ImageNet;\n", + "\n", + "## 0. Setup\n", + "\n", + "Make sure you have Classy Vision installed. To install it, run this in your terminal:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "! pip install classy_vision" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If you would like to use GPUs for training, make sure your environment has a working version of PyTorch with CUDA:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "import torch\n", + "torch.cuda.is_available()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The cell above should output `True`. Check out [this link](https://pytorch.org/get-started/locally/) for more details on how to install PyTorch. For this tutorial, we will be using [Tensorboard](https://www.tensorflow.org/tensorboard). Install it with the following (on your terminal):" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "! pip install tensorboard tensorboardX" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1. Start a new project\n", + "\n", + "To start, let's create a new project. Run this in your terminal:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "! classy-project my-project" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "% cd my-project" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To launch a training run on the current machine, run the following:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "! ./classy_train.py --config configs/template_config.json" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "That's it! You've launched your first training run. This trained a small MLP model on a dataset made of random noise, which is not that useful. The `classy-project` utility creates the scaffolding for you project, and you should modify it according to your needs. We'll learn how to customize your runs on the next few tutorials.\n", + "\n", + "Let's take a look at what `classy-project` has created for us:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "! find . | grep -v \\.pyc | sort" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Here's what each folder means:\n", + "\n", + " * `configs`: stores your experiment configurations. Keeping all your experiments as separate configuration files helps making your research reproducible;\n", + " * `models`: code for your custom model architectures;\n", + " * `losses`: code for your custom loss functions;\n", + " * `datasets`: code for your custom datasets;\n", + " * `classy_train.py`: script to execute a training job; This uses the Classy Vision library to configure the job and execute it, and you might change it according to your needs;\n", + " * `template_config.json`: experiment configuration file. This file is read by `classy_train.py` to configure your training job and launch it." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's take a peek at the configuration file:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "! cat configs/template_config.json" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "That file can be shared with other researchers whenever you want them to reproduce your experiments. We generate `json` files by default, but `YAML` will be officially supported soon.\n", + "\n", + "## 2. Distributed training\n", + "\n", + "`classy_train.py` can also be called from `torch.distributed.launch`, similar to regular PyTorch distributed scripts:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "! python -m torch.distributed.launch --use_env --nproc_per_node=2 ./classy_train.py --config configs/template_config.json --distributed_backend ddp" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If you have two GPUs on your current machine, that command will launch one process per GPU and start a [DistributedDataParallel](https://pytorch.org/tutorials/intermediate/ddp_tutorial.html) training run. \n", + "\n", + "## 3. Tensorboard integration\n", + "\n", + "[Tensorboard](https://www.tensorflow.org/tensorboard) is a very useful tool for visualizing training progress. Classy Vision works with tensorboard out-of-the-box, just make sure you have it installed as described in the Setup section. By default `classy_train.py` will output tensorboard data in a subdirectory of your project directory (typically named `output_/tensorboard`), so in our case we can just launch tensorboard in the current working directory:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true, + "scrolled": true + }, + "outputs": [], + "source": [ + "%load_ext tensorboard\n", + "%tensorboard --logdir ." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You can also customize the tensorboard output directory by editing `classy_train.py`." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 4. Loading checkpoints\n", + "\n", + "Now that we've run `classy_train.py`, let's see how to load the resulting model. At the end of execution, `classy_train.py` will print the checkpoint directory used for that run. Each run will output to a different directory, typically named `output_/checkpoints`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "from classy_vision.generic.util import load_checkpoint\n", + "from classy_vision.models import ClassyModel\n", + "\n", + "# This is important: importing models here will register your custom models with Classy Vision\n", + "# so that it can instantiate them appropriately from the checkpoint file\n", + "# See more information at https://classyvision.ai/api/models.html#classy_vision.models.register_model\n", + "import models\n", + "\n", + "# Update this with your actual directory:\n", + "checkpoint_dir = './output_/checkpoints'\n", + "checkpoint_data = load_checkpoint(checkpoint_dir)\n", + "model = ClassyModel.from_checkpoint(checkpoint_data)\n", + "model" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "That's it! You can now use that model for inference as usual.\n", + "\n", + "## 5. Resuming from checkpoints\n", + "\n", + "Resuming from a checkpoint is as simple as training: `classy_train.py` takes a `--checkpoint_folder` argument, which specifies the checkpoint to resume from:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "! ./classy_train.py --config configs/template_config.json --checkpoint_folder ./output_/checkpoints" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 6. Interactive development\n", + "\n", + "Training scripts and configuration files are useful for running large training jobs on a GPU cluster (see our [AWS tutorial](https://classyvision.ai/tutorials/ray_aws)), but a lot of day-to-day work happens interactively within Jupyter notebooks. Classy Vision is designed as a library that can be used without our built-in training scripts. Let's take a look at how to do the same training run as before, but within Jupyter instead of using `classy_train.py`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "import classy_vision" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "from datasets.my_dataset import MyDataset\n", + "from models.my_model import MyModel\n", + "from losses.my_loss import MyLoss\n", + "from classy_vision.dataset.transforms import GenericImageTransform\n", + "from torchvision import transforms\n", + "\n", + "train_dataset = MyDataset(\n", + " batchsize_per_replica=32,\n", + " shuffle=False,\n", + " transform=GenericImageTransform(\n", + " transform=transforms.Compose(\n", + " [\n", + " transforms.RandomResizedCrop(224),\n", + " transforms.RandomHorizontalFlip(),\n", + " transforms.ToTensor(),\n", + " transforms.Normalize(\n", + " mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]\n", + " ),\n", + " ]\n", + " )\n", + " ),\n", + " num_samples=100,\n", + " crop_size=224,\n", + " class_ratio=0.5,\n", + " seed=0,\n", + ")\n", + "\n", + "test_dataset = MyDataset(\n", + " batchsize_per_replica=32,\n", + " shuffle=False,\n", + " transform=GenericImageTransform(\n", + " transform=transforms.Compose(\n", + " [\n", + " transforms.Resize(256),\n", + " transforms.CenterCrop(224),\n", + " transforms.ToTensor(),\n", + " transforms.Normalize(\n", + " mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]\n", + " ),\n", + " ]\n", + " )\n", + " ),\n", + " num_samples=100,\n", + " crop_size=224,\n", + " class_ratio=0.5,\n", + " seed=0,\n", + ")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "from classy_vision.tasks import ClassificationTask\n", + "from classy_vision.optim import SGD\n", + "from classy_vision.optim.param_scheduler import ConstantParamScheduler\n", + "\n", + "model = MyModel()\n", + "loss = MyLoss()\n", + "\n", + "optimizer = SGD(\n", + " lr_scheduler=ConstantParamScheduler(0.01)\n", + ")\n", + "\n", + "from classy_vision.trainer import LocalTrainer\n", + "\n", + "task = ClassificationTask() \\\n", + " .set_model(model) \\\n", + " .set_dataset(train_dataset, \"train\") \\\n", + " .set_dataset(test_dataset, \"test\") \\\n", + " .set_loss(loss) \\\n", + " .set_optimizer(optimizer) \\\n", + " .set_num_epochs(1)\n", + "\n", + "trainer = LocalTrainer()\n", + "trainer.train(task)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "That's it! Your model is trained now and ready for inference:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "import torch\n", + "x = torch.randn((1, 3, 224, 224))\n", + "with torch.no_grad():\n", + " y_hat = model(x)\n", + "\n", + "y_hat" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 7. Training a ResNet 50 on ImageNet\n", + "\n", + "We have looked at training models using synthetic data so far. A more typical workflow involves training a model on a real world dataset like [ImageNet](http://image-net.org/), which we will cover in this section.\n", + "\n", + "To be able to train using ImageNet, first download the dataset archives from http://image-net.org/. Then, extract the data to a format expected by [`torchvision.datasets.ImageFolder`](https://pytorch.org/docs/stable/torchvision/datasets.html#imagefolder) inside subdirectories for the individual splits (`train` and `val`). We can then pass the root path containing these archives to the [`ImageNetDataset`](https://classyvision.ai/api/dataset.html#classy_vision.dataset.ImageNetDataset).\n", + "\n", + "The following configuration can be used to train a ResNet 50 on ImageNet to `76.2%` top-1 accuracy in 90 epochs. The optimizer configuration uses SGD with momentum, gradual learning rate warm up for the first 5 epochs and 1/10 learning rate drops at epochs 30, 60 and 80. The learning rate is calculated for a setup with 32 GPUs and can be scaled based on the overall batch size [1]." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "config = {\n", + " \"name\": \"classification_task\",\n", + " \"num_epochs\": 90,\n", + " \"loss\": {\n", + " \"name\": \"CrossEntropyLoss\"\n", + " },\n", + " \"dataset\": {\n", + " \"train\": {\n", + " \"name\": \"classy_imagenet\",\n", + " \"split\": \"train\",\n", + " \"batchsize_per_replica\": 32,\n", + " \"num_samples\": None,\n", + " \"use_shuffle\": True,\n", + " \"root\": \"/path/to/imagenet/\" # replace with path to the extracted dataset\n", + " },\n", + " \"test\": {\n", + " \"name\": \"classy_imagenet\",\n", + " \"split\": \"val\",\n", + " \"batchsize_per_replica\": 32,\n", + " \"num_samples\": None,\n", + " \"use_shuffle\": False,\n", + " \"root\": \"/path/to/imagenet/\" # replace with path to the extracted dataset\n", + " }\n", + " },\n", + " \"meters\": {\n", + " \"accuracy\": {\n", + " \"topk\": [1, 5]\n", + " }\n", + " },\n", + " \"model\": {\n", + " \"name\": \"resnet\",\n", + " \"num_blocks\": [3, 4, 6, 3],\n", + " \"small_input\": False,\n", + " \"zero_init_bn_residuals\": True,\n", + " \"heads\": [\n", + " {\n", + " \"name\": \"fully_connected\",\n", + " \"unique_id\": \"default_head\",\n", + " \"num_classes\": 1000,\n", + " \"fork_block\": \"block3-2\",\n", + " \"in_plane\": 2048\n", + " }\n", + " ]\n", + " },\n", + " \"optimizer\": {\n", + " \"name\": \"sgd\",\n", + " \"lr\": {\n", + " \"name\": \"composite\",\n", + " \"schedulers\": [\n", + " {\"name\": \"linear\", \"start_lr\": 0.1, \"end_lr\": 0.4},\n", + " {\"name\": \"multistep\", \"values\": [0.4, 0.04, 0.004, 0.0004], \"milestones\": [30, 60, 80]}\n", + " ],\n", + " \"update_interval\": \"epoch\",\n", + " \"interval_scaling\": [\"rescaled\", \"fixed\"],\n", + " \"lengths\": [0.0555, 0.9445]\n", + " },\n", + " \"weight_decay\": 1e-4,\n", + " \"momentum\": 0.9\n", + " }\n", + "}" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 8. Conclusion\n", + "\n", + "In this tutorial, we learned how to start a new project using Classy Vision, how to perform tranining locally and how to do multi-gpu training on a single machine. We also saw how to use Tensorboard to visualize training progress, how to load models from checkpoints and how resume training from a checkpoint file. We also went over how to use the ImageNet dataset to train a ResNet 50. In the next tutorials, we'll look into how to add custom datasets, models and loss functions to Classy Vision so you can adapt it to your needs, and how to launch distributed training on multiple nodes." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 9. References\n", + "\n", + "[1] Goyal, Priya, et al. \"Accurate, large minibatch sgd: Training imagenet in 1 hour.\" arXiv preprint arXiv:1706.02677 (2017)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "bento_stylesheets": { + "bento/extensions/flow/main.css": true, + "bento/extensions/kernel_selector/main.css": true, + "bento/extensions/kernel_ui/main.css": true, + "bento/extensions/new_kernel/main.css": true, + "bento/extensions/system_usage/main.css": true, + "bento/extensions/theme/main.css": true + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.5+" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/tutorials/pet_aws.ipynb b/tutorials/pet_aws.ipynb new file mode 100644 index 0000000000..e243228b16 --- /dev/null +++ b/tutorials/pet_aws.ipynb @@ -0,0 +1,295 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Elastic training with Classy Vision\n", + "\n", + "This tutorial will demonstrate how to launch an training job on Amazon Web Services ([AWS](https://aws.amazon.com/)) using [PyTorch Elastic](https://github.com/pytorch/elastic) and Classy Vision.\n", + "\n", + "## Prerequisites\n", + "\n", + "1. Familiarity with basic AWS (EC2, Auto Scaling Groups, S3, EFS).\n", + "2. (suggested) install and setup [`awscli`](https://docs.aws.amazon.com/cli/latest/userguide/cli-chap-install.html).\n", + "3. Basic knowledge of containers (we use Docker in our examples).\n", + "\n", + "## 1. Setup" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Download the PyTorch Elastic repository and install it. Run in your terminal:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "! git clone https://github.com/pytorch/elastic.git\n", + "! pip install torchelastic" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Install the required dependencies for AWS:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "% cd elastic/aws\n", + "! pip install -r requirements.txt" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Make sure you are familiar with the following AWS resources:\n", + "\n", + " 1. EC2 [instance profile](https://docs.aws.amazon.com/IAM/latest/UserGuide/id_roles_use_switch-role-ec2_instance-profiles.html)\n", + " 2. EC2 [key pair](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ec2-key-pairs.html)\n", + " 3. [Subnet(s)](https://docs.aws.amazon.com/vpc/latest/userguide/default-vpc.html#create-default-subnet)\n", + " 4. [Security group](https://docs.aws.amazon.com/vpc/latest/userguide/VPC_SecurityGroups.html#DefaultSecurityGroup)\n", + " 5. EFS volume\n", + " 6. S3 bucket\n", + " \n", + "[Install](https://docs.aws.amazon.com/systems-manager/latest/userguide/session-manager-working-with-install-plugin.html)\n", + " the AWS Session Manager plugin." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2. Create the cluster\n", + "\n", + "`petctl` is a commandline tool that helps run distributed jobs written with torchelastic on EC2 instances. It's available in the `aws` directory of the `torchelastic` repo. To get started, run this on your terminal:\n", + "\n", + "```bash\n", + "python3 petctl.py setup\n", + "```\n", + "\n", + "This will bootstrap all the AWS resources required to run a torchelastic\n", + "job. For details take a look at the CloudFormation [template](cfn/setup.yml) .\n", + "\n", + "Use `--s3_bucket` and `--efs_id` to use an existing S3 bucket and EFS \n", + "file system. Otherwise an S3 bucket and EFS volume will be created.\n", + "\n", + "> **IMPORTANT** when specifying `--efs_id` you MUST ensure that NO mount targets\n", + "exist on the EFS file system. torchelastic's cfn stack will attempt to create\n", + "mount targets for the subnets it creates and WILL FAIL if the file system already\n", + "has mount targets on a different VPC. For more information refer to \n", + "the [EFS docs](https://docs.aws.amazon.com/efs/latest/ug/accessing-fs.html). \n", + "\n", + "**TIP:** If the stack creation fails, log into the CloudFormation console, inspect\n", + "the failure reason, address the failure, then manually delete the stack and re-run\n", + "`petctl configure`.\n", + "\n", + "If you are familiar with AWS or already have the resources specified in the \n", + "**Requirements** section above, then you can follow the [Manual Setup](https://github.com/pytorch/elastic/blob/master/aws/README.md) instructions\n", + "in the `torchelastic` repository. Simply copy the sample specs file and fill\n", + "in the template, then run `python petctl.py configure`. " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 3. Create your Classy Vision project\n", + "\n", + "If you already have a Classy Vision project to use with `torchelastic`, great! You only need to modify `classy_train.py` to use an `ElasticTrainer` instead of a `DistributedTrainer`. See our [getting started](https://classyvision.ai/tutorials/getting_started) tutorial for more details about `classy_train.py`.\n", + "\n", + "To make things easier, we provided an example of how to use `ElasticTrainer`: it's under `./examples/classy_vision/main.py` in the `torchelastic` repo. You can start by copying that file and use it to replace `classy_train.py`." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "## 4. Start training\n", + "\n", + "Normally you would run the training script directly to start training. For elastic training, we'll use `petctl` to launch it. Here's how you launch our example script in your terminal:\n", + "\n", + "``` bash\n", + "python3 aws/petctl.py run_job --size 2 --min_size 2 --max_size 2 --name ${USER}-job examples/classy_vision/main.py -- --config_file classy-vision://configs/resnet50_synthetic_image_classy_config.json --num_workers 0```\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In the example above, the named arguments, such as, `--size` , `--min_size`, and\n", + "`--max_size` are parameters to the `run_job` sub-command of `petctl`. In the example\n", + "above, we created an **elastic** job where the initial worker `--size=2`, we are\n", + "allowed to scale down to `--min_size` and up to `--max_size`. This is used by\n", + "torchelastic's rendezvous algorithm to determine how many nodes to admit on each\n", + "re-rendezvous before considering the group *final* and start the `train_step`.\n", + "\n", + "Because all the size parameters are the same in this case, that means we are disabling elasticity. You might want to do that for reproducibility reasons, for instance. Training this way still provides benefits, since `torchelastic` increases robustness: when a node fails, we can start a new node and resume training from the last minibatch, without reverting back to the previous checkpoint. \n", + "\n", + "The other positional arguments have the form:\n", + "\n", + "```\n", + "[local script] -- [script args ...]\n", + " -- or -- \n", + "[local directory] -- [script] [script args...]\n", + "```\n", + "\n", + "If the first positional argument is a path to a script file, then the script\n", + "is uploaded to S3 and the script arguments specified after the `--` delimiter\n", + "are passed through to the script.\n", + "\n", + "If the first positional argument is a directory, then a tarball of the directory\n", + "is created and uploaded to S3 and is extracted on the worker-side. In this case\n", + "the first argument after the `--` delimiter is the path to the script **relative** to the\n", + "specified directory and the rest of the arguments after the delimiter is passed \n", + "to the script.\n", + "\n", + "\n", + "In our example we specified\n", + "```\n", + "petctl.py run_job [...] classy_vision/main.py --config_file [...]\n", + "```\n", + "\n", + "We could have decided to specify the directory instead\n", + "```\n", + "petctl.py run_job [...] classy_vision -- main.py --config_file [...]\n", + "```\n", + "\n", + "**TIP 1:** Besides a local script or directory you can run with scripts or `tar` files\n", + "that have already been uploaded to S3 or directly point it to a file or directory\n", + "on the container.\n", + "``` bash\n", + "python3 petctl.py run_job [...] s3://my-bucket/my_script.py\n", + "python3 petctl.py run_job [...] s3://my-bucket/my_dir.tar.gz -- my_script.py\n", + "\n", + "# or\n", + "python3 petctl.py run_job [...] docker:///abs/path/in/container/dir -- my_script.py\n", + "python3 petctl.py run_job [...] docker://rel/path/in/container/dir/my_script.py\n", + "```\n", + "\n", + "**TIP 2:** To iterate quickly, simply make changes to your local script and\n", + "upload the script to S3 using\n", + "```bash \n", + "python3 petctl.py upload examples/imagenet/main.py s3://// \n", + "```\n", + "\n", + "**TIP 3:** Use the EFS volume attached on `/mnt/efs/fs1` on all the workers to \n", + "save input data, checkpoints and job output.\n", + "\n", + "Once the `run_job` command returns log into the EC2 console, you will see two\n", + "Auto Scaling Groups\n", + "1. etcd server \n", + "2. workers\n", + "\n", + "## 5. Inspect the logs\n", + "Log into the AWS CloudWatch Logs console. You should see a log group called\n", + "`torchelastic/$USER`. Under it there will be a log stream per instance with the \n", + "name `$job_name/$instance_id` (e.g. `my_job/i0b938EXAMPLE`).\n", + "\n", + "#### Troubleshooting\n", + "To SSH onto the worker nodes to debug/inspect the worker process use AWS \n", + "Session Manager instead of the ec2 key pair. [Install](https://docs.aws.amazon.com/systems-manager/latest/userguide/session-manager-working-with-install-plugin.html)\n", + " the Session Manager plugin and run\n", + "\n", + "``` bash\n", + "# get the instance ids of the workers\n", + "python3 petctl.py list_hosts \n", + "\n", + "# ssh onto one of the workers\n", + "awscli ssm start-session --target \n", + " -- example --\n", + "awscli ssm start-session --target i-00b00EXAMPLE\n", + "```\n", + "\n", + "Once SSH'ed, the workers run in a docker container managed by `systemd`.\n", + "You can take a look at their console outputs by running\n", + "\n", + "``` bash\n", + "# see the status of the worker\n", + "sudo systemctl status torchelastic_worker\n", + "# get the container id\n", + "sudo docker ps\n", + "# tail the container logs\n", + "sudo docker logs -f \n", + "```\n", + "\n", + "You can also manually stop and start the workers by running\n", + "``` bash\n", + "sudo systemctl stop torchelastic_worker\n", + "sudo systemctl start torchelastic_worker\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "> **EXERCISE:** Open up two terminals and SSH onto each worker. Tail the docker logs\n", + "on each worker. Now stop worker 1 and observe the worker 2 re-rendezvous and\n", + "since `--min_size=1` it continues training by itself. Now restart worker 1 and\n", + "observe that worker 2 notices that worker 1 is waiting to join and re-rendezvous,\n", + "the `state` object in worker 2 is `sync()`'ed to worker 1 and both resume training\n", + "without loss of progress." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "> **Note**: by design, `petctl` tries to use the least number of AWS services. This\n", + "was done intentionally to allow non-AWS users to easily transfer the functionality\n", + "to their environment. Hence it currently does not have the functionality to query\n", + "status of the job or to terminate the ASG when the job is done (there is nothing\n", + "that is monitoring the job!). In practice consider using EKS, Batch, or SageMaker." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 6. Stop training\n", + "To stop the job and tear down the resources, use the `kill_job` command:\n", + "\n", + "``` bash\n", + "python3 petctl.py kill_job ${USER}-job\n", + "```\n", + "\n", + "You'll notice that the two ASGs created with the `run_job` command are deleted." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.7" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/tutorials/ray_aws.ipynb b/tutorials/ray_aws.ipynb new file mode 100644 index 0000000000..d9043dce14 --- /dev/null +++ b/tutorials/ray_aws.ipynb @@ -0,0 +1,255 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Distributed training on AWS" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In this tutorial we will learn: \n", + "1. How to start a cluster on AWS for use with Classy Vision; \n", + "2. How to start a new project on the cluster; \n", + "3. How to launch training jobs on the cluster;\n", + "\n", + "## 1. Setup\n", + "\n", + "Make sure you have Classy Vision installed, as described in our [Getting started](https://classyvision.ai/tutorials/getting_started) tutorial. \n", + "\n", + "For this tutorial we will also need the Classy Vision sources, you can clone it with this command (on your terminal):" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "! git clone https://github.com/facebookresearch/ClassyVision.git" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In this tutorial we'll use [Ray](https://github.com/ray-project/ray) to manage the AWS resources. Install Ray and all its required dependencies with:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "% cd ./ClassyVision/examples/ray\n", + "! pip install -r requirements.txt" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You should also set up your AWS CLI and credentials as described [here](https://docs.aws.amazon.com/cli/latest/userguide/cli-chap-configure.html#cli-quick-configuration). To make sure everything is working, run on your terminal:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "! aws ec2 describe-instances" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "That should print a JSON file with all your current AWS instances (or empty if you don't have any). " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2. Cluster setup" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We have a sample cluster configuration file stored in the Classy Vision repository, under `./examples/ray/cluster_config.yml`. Let's verify that Ray can start the cluster appropriately:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "! ray up cluster_config.yml -y" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "That will take about 10 minutes, and at the end you should see a message explaining how to connect to the cluster. Assuming everything worked successfully, now tear down the cluster:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "! ray down cluster_config.yml -y" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We will now set up an EFS volume to store our code and datasets. Follow [this tutorial](https://aws.amazon.com/getting-started/tutorials/create-network-file-system/) to setup the EFS volume in your AWS account. \n", + "\n", + "When you're done with that tutorial, go back to the EFS section in the AWS console, find your filesystem there and click `Manage file system access`. Add the `ray-autoscaler-default` security group to the list of security groups allowed to use your EFS volume. That security group should have been created by the `ray up` command we ran earlier.\n", + "\n", + "You should now have an identifier for your EFS volume. Open `cluster_config.yml` in your favorite text editor and replace `{{FileSystemId}}` with your own EFS id. We are now ready to launch our cluster again:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "! ray up cluster_config.yml" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 3. Create a project\n", + "\n", + "When it's done, let's attach to the head node of the cluster:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "! ray attach cluster_config.yml" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "That will give you an SSH session into the head node, which coordinates all the worker nodes in Ray. In our example configuration file, the head node is a CPU-only machine, and the workers all have GPUs.\n", + "\n", + "Both the head node and the worker nodes will have the same EFS volume mounted, so we'll use that to send code from the head to the workers. The following commands are meant to run on the head node (e.g. in the terminal prompt you got from `ray attach`). Let's start a project in the EFS folder:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "$ cd efs\n", + "$ classy-project my_project" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 4. Start training\n", + "\n", + "Classy Vision comes with a launcher analogous to `torch.distributed.launch`, but that launches jobs on multiple machines using Ray. To use it, simply run:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "$ python -m classy_vision.distributed.launch_ray --nnodes=2 --use_env ~/efs/my_project/classy_train.py --config ~/efs/my_project/configs/template_config.json --distributed_backend ddp" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Your first time running this you might see logs like `Not enough GPUs available`. That's normal, and it's because the worker nodes are still being set up. The `ray up` command should have printed a command line you can use to follow their progress. But there's no need to do anything, the launcher will wait until the workers are available and execute the command automatically.\n", + "\n", + "That's it! When that command is done it should print the folder where the checkpoints are.\n", + "\n", + "> Note that we specified the full absolute path for the config in the argument list. That's because the `classy_train.py` command is running on a remote machine and we are relying on the fact that the EFS folder is mounted at exactly the same location on the head and worker nodes. Keep that in mind if you modify this setup.\n", + "\n", + "> Remember to tear down the cluster with `ray down cluster_config.yml` when you're done. You will be billed as long as the machines are up, even when not using them." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 5. Troubleshooting\n", + "\n", + "If you hit an error during this tutorial, here are a few things that might help to debug what is going on:\n", + "\n", + "### Make sure all workers have initialized properly\n", + "\n", + "When the `ray up` command finishes, it prints a command line to tail the logs. It should look like:\n", + "```bash\n", + "ray exec cluster_config.yml 'tail -n 100 -f /tmp/ray/session_*/logs/monitor*'\n", + "```\n", + "\n", + "Run that command and look for any errors. When the workers are done initializing, you should see `-- StandardAutoscaler: 2/2 target nodes (0 pending)` printed repeatedly on the logs.\n", + "\n", + "### Make sure EFS volumes are mounted on all machines\n", + "\n", + "Sometimes the EFS package fails to install on workers. To verify EFS is working, get the worker node IPs with `ray get-worker-ips cluster_config.yml`, then ssh on them with:\n", + "```bash\n", + "ssh -i ~/.ssh/ray-autoscaler_us-west-2.pem ubuntu@\n", + "```\n", + "\n", + "Once in a worker machine, run `df -h` to list all the current mounts. Verify `/home/ubuntu/efs` is on that list. If it's not, look for the EFS setup commands on the `cluster_config.yml` file and run them yourself. That should clarify what the issue is. If you didn't setup the EFS security groups correctly (as described in step 2), the `mount` command will hang for a few minutes then fail.\n", + "\n", + "## 6. Conclusion\n", + "\n", + "In this tutorial we covered how to start using Classy Vision on AWS using Ray. For more information about Ray, check out their [repository](https://github.com/ray-project/ray). The next tutorials ([[1]](https://classyvision.ai/tutorials/classy_model), [[2]](https://classyvision.ai/tutorials/classy_loss), [[3]](https://classyvision.ai/tutorials/classy_dataset)) will demonstrate how to customize the project created by the `classy-project` utility for your own needs. To learn more about how to train models in Classy Vision and how to use Tensorboard to visualize training progress, check out our [Getting started](https://classyvision.ai/tutorials/getting_started) tutorial." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.7" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/tutorials/torchscript.ipynb b/tutorials/torchscript.ipynb new file mode 100644 index 0000000000..139b52666a --- /dev/null +++ b/tutorials/torchscript.ipynb @@ -0,0 +1,142 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Using torchscript with Classy Vision" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "[torchscript](https://pytorch.org/tutorials/beginner/Intro_to_TorchScript_tutorial.html) is commonly used to export PyTorch models from Python to C++. This is useful for productionizing models, when you typically perform inference on a CPU. This tutorial will demonstrate how to export a Classy Vision model using `torchscript`'s tracing mode and how to load a torchscript model.\n", + "\n", + "## 1. Build and train the model\n", + "\n", + "Our [Getting started](https://classyvision.ai/tutorials/getting_started) tutorial covered many ways of training a model, here we'll simply instantiate a ResNeXT model from a config:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from classy_vision.models import build_model\n", + "import torch\n", + "\n", + "config = {\n", + " \"name\": \"resnext\",\n", + " \"num_blocks\": [3, 4, 23, 3],\n", + " \"num_classes\": 1000,\n", + " \"base_width_and_cardinality\": [4, 32],\n", + " \"small_input\": False,\n", + " \"heads\": [\n", + " {\n", + " \"name\": \"fully_connected\",\n", + " \"unique_id\": \"default_head\",\n", + " \"num_classes\": 1000,\n", + " \"fork_block\": \"block3-2\",\n", + " \"in_plane\": 2048\n", + " }\n", + " ]\n", + "}\n", + "\n", + "model = build_model(config)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2. Export the model\n", + "\n", + "Now that the model is built/trained, you can export it using `torch.jit.trace`. To check the results, we'll perform inference on the actual model and on the torchscripted model:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "with torch.no_grad():\n", + " script = torch.jit.trace(model, torch.randn(1, 3, 224, 224, dtype=torch.float))\n", + " input = torch.randn(1, 3, 224, 224, dtype=torch.float)\n", + " origin_outs = model(input)\n", + " script_outs = script(input)\n", + "\n", + "assert torch.allclose(origin_outs, script_outs)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "After verifying the torchscripted model works as expected, you can save it using `torch.jit.save`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "torch.jit.save(script, \"/tmp/resnext_101.pt\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 3. Loading a model\n", + "\n", + "Loading a torchscripted model is as simple as calling `torch.jit.load`. If you need to fine-tune or continue training the model, the loaded model can be attached directly to a `ClassificationTask` or `FineTuningTask` in Classy Vision:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "loaded_model = torch.jit.load(\"/tmp/resnext_101.pt\")\n", + "loaded_outs = loaded_model(input)\n", + "\n", + "assert torch.allclose(loaded_outs, origin_outs)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 4. Conclusion\n", + "\n", + "`torchscript` makes it really easy to transfer models between research and production with PyTorch, and it works seamlessly with Classy Vision. Check out the [torchscript tutorial](https://pytorch.org/tutorials/beginner/Intro_to_TorchScript_tutorial.html) for more information about how to export a model correctly. " + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.7" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/tutorials/video_classification.ipynb b/tutorials/video_classification.ipynb new file mode 100644 index 0000000000..d01e66e199 --- /dev/null +++ b/tutorials/video_classification.ipynb @@ -0,0 +1,313 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# How to do video classification " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In this tutorial, we will show how to train a video classification model in Classy Vision. Given an input video, the video classification task is to predict the most probable class label. This is very similar to image classification, which was covered in other tutorials, but there are a few differences that make video special. As the video duration can be long, we sample short video clips of a small number of frames, use the classifier to make predictions, and finally average the clip-level predictions to get the final video-level predictions. \n", + "\n", + "In this tutorial we will: (1) load a video dataset; (2) configure a video model; (3) configure video meters; (4) build a task; (5) start training; Please note that these steps are being done separately in the tutorial for easy of exposition in the notebook format. As described in our [Getting started](https://classyvision.ai/tutorials/getting_started) tutorial, you can combine all configs used in this tutorial into a single config for ClassificationTask and train it using `classy_train.py`." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 1. Prepare the dataset\n", + "\n", + "All right! Let's start with the dataset. [UCF-101](https://www.crcv.ucf.edu/data/UCF101.php) is a canonical action recognition dataset. It has 101 action classes, and has 3 folds with different training/testing splitting . We use fold 1 in this tutorial. Classy Vision has implemented the dataset `ucf101`, which can be used to load the training and testing splits. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from classy_vision.dataset import build_dataset\n", + "\n", + "# set it to the folder where video files are saved\n", + "video_dir = \"[PUT YOUR VIDEO FOLDER HERE]\"\n", + "# set it to the folder where dataset splitting files are saved\n", + "splits_dir = \"[PUT THE FOLDER WHICH CONTAINS SPLITTING FILES HERE]\"\n", + "# set it to the file path for saving the metadata\n", + "metadata_file = \"[PUT THE FILE PATH OF DATASET META DATA HERE]\"\n", + "\n", + "datasets = {}\n", + "datasets[\"train\"] = build_dataset({\n", + " \"name\": \"ucf101\",\n", + " \"split\": \"train\",\n", + " \"batchsize_per_replica\": 8, # For training, we use 8 clips in a minibatch in each model replica\n", + " \"use_shuffle\": True, # We shuffle the clips in the training split\n", + " \"num_samples\": 64, # We train on 16 clips in one training epoch\n", + " \"clips_per_video\": 1, # For training, we randomly sample 1 clip from each video\n", + " \"frames_per_clip\": 8, # The video clip contains 8 frames\n", + " \"video_dir\": video_dir,\n", + " \"splits_dir\": splits_dir,\n", + " \"metadata_file\": metadata_file,\n", + " \"fold\": 1,\n", + " \"transforms\": {\n", + " \"video\": [\n", + " {\n", + " \"name\": \"video_default_augment\",\n", + " \"crop_size\": 112,\n", + " \"size_range\": [128, 160]\n", + " }\n", + " ]\n", + " }\n", + "})\n", + "datasets[\"test\"] = build_dataset({\n", + " \"name\": \"ucf101\",\n", + " \"split\": \"test\",\n", + " \"batchsize_per_replica\": 10, # For testing, we will take 1 video once a time, and sample 10 clips per video\n", + " \"use_shuffle\": False, # We do not shuffle clips in the testing split\n", + " \"num_samples\": 80, # We test on 80 clips in one testing epoch\n", + " \"clips_per_video\": 10, # We sample 10 clips per video\n", + " \"frames_per_clip\": 8,\n", + " \"video_dir\": video_dir,\n", + " \"splits_dir\": splits_dir,\n", + " \"metadata_file\": metadata_file,\n", + " \"fold\": 1,\n", + " \"transforms\": {\n", + " \"video\": [\n", + " {\n", + " \"name\": \"video_default_no_augment\",\n", + " \"size\": 128\n", + " }\n", + " ]\n", + " } \n", + "})" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Note we specify different transforms for training and testing split. For training split, we first randomly select a size from `size_range` [128, 160], and resize the video clip so that its short edge is equal to the random size. After that, we take a random crop of spatial size 112 x 112. We find such data augmentation helps the model generalize better, and use it as the default transform with data augmentation. For testing split, we resize the video clip to have short edge of size 128, and skip the random cropping to use the entire video clip. This is the default transform without data augmentation." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 2. Define a model trunk and a head\n", + "\n", + "Next, let's create the video model, which consists of a trunk and a head. The trunk can be viewed as a feature extractor for computing discriminative features from raw video pixels while the head is viewed as a classifier for producing the final predictions. Let's first create the trunk of architecture ResNet3D-18 by using the built-in `resnext3d` model in Classy Vision." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from classy_vision.models import build_model\n", + "\n", + "model = build_model({\n", + " \"name\": \"resnext3d\",\n", + " \"frames_per_clip\": 8, # The number of frames we have in each video clip\n", + " \"input_planes\": 3, # We use RGB video frames. So the input planes is 3\n", + " \"clip_crop_size\": 112, # We take croppings of size 112 x 112 from the video frames \n", + " \"skip_transformation_type\": \"postactivated_shortcut\", # The type of skip connection in residual unit\n", + " \"residual_transformation_type\": \"basic_transformation\", # The type of residual connection in residual unit\n", + " \"num_blocks\": [2, 2, 2, 2], # The number of residual blocks in each of the 4 stages \n", + " \"input_key\": \"video\", # The key used to index into the model input of dict type \n", + " \"stage_planes\": 64, \n", + " \"num_classes\": 101 # the number of classes\n", + "})" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We also need to create a model head, which consists of an average pooling layer and a linear layer, by using the `fully_convolutional_linear` head. At test time, the shape (channels, frames, height, width) of input tensor is typically `(3 x 8 x 128 x 173)`. The shape of input tensor to the average pooling layer is `(2048, 1, 8, 10)`. Since we do not use a global average pooling but an average pooling layer of kernel size `(1, 7, 7)`, the pooled feature map has shape `(2048, 1, 2, 5)`. The shape of prediction tensor from the linear layer is `(1, 2, 5, 101)`, which indicates the model computes a 101-D prediction vector densely over a `2 x 5` grid. That's why we name the head as `FullyConvolutionalLinearHead` because we use the linear layer as a `1x1` convolution layer to produce spatially dense predictions. Finally, predictions over the `2 x 5` grid are averaged." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from classy_vision.heads import build_head\n", + "from collections import defaultdict\n", + "\n", + "unique_id = \"default_head\"\n", + "head = build_head({\n", + " \"name\": \"fully_convolutional_linear\",\n", + " \"unique_id\": unique_id,\n", + " \"pool_size\": [1, 7, 7],\n", + " \"num_classes\": 101,\n", + " \"in_plane\": 512 \n", + "})\n", + "# In Classy Vision, the head can be attached to any residual block in the trunk. \n", + "# Here we attach the head to the last block as in the standard ResNet model\n", + "fork_block = \"pathway0-stage4-block1\"\n", + "heads = defaultdict(dict)\n", + "heads[fork_block][unique_id] = head\n", + "model.set_heads(heads)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 3. Choose the meters\n", + "\n", + "This is the biggest difference between video and image classification. For images we used `AccuracyMeter` to measure top-1 and top-5 accuracy. For videos you can also use both `AccuracyMeter` and `VideoAccuracyMeter`, but they behave differently:\n", + " * `AccuracyMeter` takes one clip-level prediction and compare it with groundtruth video label. It reports the clip-level accuracy.\n", + " * `VideoAccuracyMeter` takes multiple clip-level predictions from the same video, averages them and compares that with groundtruth video label. It reports the video-level accuracy which is usually higher than clip-level accuracy. \n", + " \n", + " Both meters report top-1 and top-5 accuracy." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from classy_vision.meters import build_meters, AccuracyMeter, VideoAccuracyMeter\n", + "\n", + "meters = build_meters({\n", + " \"accuracy\": {\n", + " \"topk\": [1, 5]\n", + " },\n", + " \"video_accuracy\": {\n", + " \"topk\": [1, 5],\n", + " \"clips_per_video_train\": 1,\n", + " \"clips_per_video_test\": 10\n", + " }\n", + "})" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 4. Build a task\n", + "Great! we have defined the minimal set of components necessary for video classification, including dataset, model, loss function, meters and optimizer. We proceed to define a video classification task, and populate it with all the components." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from classy_vision.tasks import ClassificationTask\n", + "from classy_vision.optim import build_optimizer\n", + "from classy_vision.losses import build_loss\n", + "\n", + "loss = build_loss({\"name\": \"CrossEntropyLoss\"})\n", + "\n", + "optimizer = build_optimizer({\n", + " \"name\": \"sgd\",\n", + " \"lr\": {\n", + " \"name\": \"multistep\",\n", + " \"values\": [0.005, 0.0005],\n", + " \"milestones\": [1]\n", + " },\n", + " \"num_epochs\": 2,\n", + " \"weight_decay\": 0.0001,\n", + " \"momentum\": 0.9\n", + "})\n", + "\n", + "num_epochs = 2\n", + "task = (\n", + " ClassificationTask()\n", + " .set_num_epochs(num_epochs)\n", + " .set_loss(loss)\n", + " .set_model(model)\n", + " .set_optimizer(optimizer)\n", + " .set_meters(meters)\n", + ") \n", + "for phase in [\"train\", \"test\"]:\n", + " task.set_dataset(datasets[phase], phase)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 5. Start training\n", + "\n", + "After creating a task, you can simply pass that to a Trainer to start training. Here we will train on a single node and \n", + "configure logging and checkpoints for training:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import time\n", + "import os\n", + "\n", + "from classy_vision.trainer import LocalTrainer\n", + "from classy_vision.hooks import CheckpointHook\n", + "from classy_vision.hooks import LossLrMeterLoggingHook\n", + "\n", + "hooks = [LossLrMeterLoggingHook(log_freq=4)]\n", + "\n", + "checkpoint_dir = f\"/tmp/classy_checkpoint_{time.time()}\"\n", + "os.mkdir(checkpoint_dir)\n", + "hooks.append(CheckpointHook(checkpoint_dir, input_args={}))\n", + "\n", + "task = task.set_hooks(hooks)\n", + "\n", + "trainer = LocalTrainer()\n", + "trainer.train(task)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "As the training progresses, you should see `LossLrMeterLoggingHook` printing the loss, learning rate and meter metrics. Checkpoints will be available in the folder created above.\n", + "\n", + "## 6. Conclusion\n", + "\n", + "Video classification is very similar to image classification in Classy Vision, you just need to use an appropriate dataset, model and meters. This tutorial glossed over many details about training, please take a look at our [Getting started](https://classyvision.ai/tutorials/getting_started) tutorial to learn more. Refer to our API reference for more details about [ResNeXt3D](https://classyvision.ai/api/models.html#classy_vision.models.ResNeXt3D) models, [UCF101](https://classyvision.ai/api/dataset.html#classy_vision.dataset.UCF101Dataset) dataset and [VideoAccuracy](http://classyvision.ai/api/meters.html#classy_vision.meters.VideoAccuracyMeter) meters.\n" + ] + } + ], + "metadata": { + "bento_stylesheets": { + "bento/extensions/flow/main.css": true, + "bento/extensions/kernel_selector/main.css": true, + "bento/extensions/kernel_ui/main.css": true, + "bento/extensions/new_kernel/main.css": true, + "bento/extensions/system_usage/main.css": true, + "bento/extensions/theme/main.css": true + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.7" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/tutorials/wsl_model_predict.ipynb b/tutorials/wsl_model_predict.ipynb new file mode 100644 index 0000000000..45fd2d3792 --- /dev/null +++ b/tutorials/wsl_model_predict.ipynb @@ -0,0 +1,167 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "# Copyright (c) Facebook, Inc. and its affiliates.\n", + "# This source code is licensed under the MIT license found in the\n", + "# LICENSE file in the root directory of this source tree." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import torch" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "# TODO(@mannatsingh): use torchhub when the repo is public. The replacement code\n", + "# must be run from the top level directory in the meantime.\n", + "# classy_interface = torch.hub.load(\"facebookresearch/ClassyVision\", \"resnext101_32x8d_wsl\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import sys\n", + "path = os.path.abspath(os.path.join(os.path.abspath(\"\"), \"../..\"))\n", + "sys.path.append(path)\n", + "import hubconf\n", + "classy_interface = hubconf.resnext101_32x8d_wsl()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "--2019-10-17 14:39:31-- https://github.com/pytorch/hub/raw/master/dog.jpg\n", + "Resolving fwdproxy (fwdproxy)... 2401:db00:20ff:ff70:face:b00c:0:1e10\n", + "Connecting to fwdproxy (fwdproxy)|2401:db00:20ff:ff70:face:b00c:0:1e10|:8080... connected.\n", + "Proxy request sent, awaiting response... 302 Found\n", + "Location: https://raw.githubusercontent.com/pytorch/hub/master/dog.jpg [following]\n", + "--2019-10-17 14:39:31-- https://raw.githubusercontent.com/pytorch/hub/master/dog.jpg\n", + "Connecting to fwdproxy (fwdproxy)|2401:db00:20ff:ff70:face:b00c:0:1e10|:8080... connected.\n", + "Proxy request sent, awaiting response... 200 OK\n", + "Length: 661378 (646K) [image/jpeg]\n", + "Saving to: ‘dog.jpg’\n", + "\n", + "100%[======================================>] 661,378 --.-K/s in 0.01s \n", + "\n", + "2019-10-17 14:39:31 (47.0 MB/s) - ‘dog.jpg’ saved [661378/661378]\n", + "\n" + ] + }, + { + "data": { + "image/jpeg": "\n", + "text/plain": [ + "" + ] + }, + "execution_count": 5, + "metadata": { + "bento_obj_id": "140494349111984" + }, + "output_type": "execute_result" + } + ], + "source": [ + "# Download an example image from the pytorch website\n", + "!wget https://github.com/pytorch/hub/raw/master/dog.jpg -O dog.jpg\n", + "from IPython.display import Image\n", + "Image(filename='dog.jpg')" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "258" + ] + }, + "execution_count": 7, + "metadata": { + "bento_obj_id": "140493963418320" + }, + "output_type": "execute_result" + } + ], + "source": [ + "dataset = classy_interface.create_image_dataset([\"./dog.jpg\"], split=\"test\")\n", + "data_iterator = classy_interface.get_data_iterator(dataset)\n", + "input = next(data_iterator)\n", + "# set the model to eval mode\n", + "classy_interface.eval()\n", + "output = classy_interface.predict(input)\n", + "# see the prediction for the input\n", + "classy_interface.predict(input).argmax().item()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "bento_stylesheets": { + "bento/extensions/flow/main.css": true, + "bento/extensions/kernel_selector/main.css": true, + "bento/extensions/kernel_ui/main.css": true, + "bento/extensions/new_kernel/main.css": true, + "bento/extensions/system_usage/main.css": true, + "bento/extensions/theme/main.css": true + }, + "kernelspec": { + "display_name": "pytorch", + "language": "python", + "name": "bento_kernel_pytorch" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.3rc1+" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/website/README.md b/website/README.md new file mode 100644 index 0000000000..7391779aba --- /dev/null +++ b/website/README.md @@ -0,0 +1,193 @@ +This website was created with [Docusaurus](https://docusaurus.io/). + +# What's In This Document + +* [Get Started in 5 Minutes](#get-started-in-5-minutes) +* [Directory Structure](#directory-structure) +* [Editing Content](#editing-content) +* [Adding Content](#adding-content) +* [Full Documentation](#full-documentation) + +# Get Started in 5 Minutes + +1. Make sure all the dependencies for the website are installed: + +```sh +# Install dependencies +$ yarn +``` +2. Run your dev server: + +```sh +# Start the site +$ yarn start +``` + +## Directory Structure + +Your project file structure should look something like this + +``` +my-docusaurus/ + docs/ + doc-1.md + doc-2.md + doc-3.md + website/ + blog/ + 2016-3-11-oldest-post.md + 2017-10-24-newest-post.md + core/ + node_modules/ + pages/ + static/ + css/ + img/ + package.json + sidebar.json + siteConfig.js +``` + +# Editing Content + +## Editing an existing docs page + +Edit docs by navigating to `docs/` and editing the corresponding document: + +`docs/doc-to-be-edited.md` + +```markdown +--- +id: page-needs-edit +title: This Doc Needs To Be Edited +--- + +Edit me... +``` + +For more information about docs, click [here](https://docusaurus.io/docs/en/navigation) + +## Editing an existing blog post + +Edit blog posts by navigating to `website/blog` and editing the corresponding post: + +`website/blog/post-to-be-edited.md` +```markdown +--- +id: post-needs-edit +title: This Blog Post Needs To Be Edited +--- + +Edit me... +``` + +For more information about blog posts, click [here](https://docusaurus.io/docs/en/adding-blog) + +# Adding Content + +## Adding a new docs page to an existing sidebar + +1. Create the doc as a new markdown file in `/docs`, example `docs/newly-created-doc.md`: + +```md +--- +id: newly-created-doc +title: This Doc Needs To Be Edited +--- + +My new content here.. +``` + +1. Refer to that doc's ID in an existing sidebar in `website/sidebar.json`: + +```javascript +// Add newly-created-doc to the Getting Started category of docs +{ + "docs": { + "Getting Started": [ + "quick-start", + "newly-created-doc" // new doc here + ], + ... + }, + ... +} +``` + +For more information about adding new docs, click [here](https://docusaurus.io/docs/en/navigation) + +## Adding a new blog post + +1. Make sure there is a header link to your blog in `website/siteConfig.js`: + +`website/siteConfig.js` +```javascript +headerLinks: [ + ... + { blog: true, label: 'Blog' }, + ... +] +``` + +2. Create the blog post with the format `YYYY-MM-DD-My-Blog-Post-Title.md` in `website/blog`: + +`website/blog/2018-05-21-New-Blog-Post.md` + +```markdown +--- +author: Frank Li +authorURL: https://twitter.com/foobarbaz +authorFBID: 503283835 +title: New Blog Post +--- + +Lorem Ipsum... +``` + +For more information about blog posts, click [here](https://docusaurus.io/docs/en/adding-blog) + +## Adding items to your site's top navigation bar + +1. Add links to docs, custom pages or external links by editing the headerLinks field of `website/siteConfig.js`: + +`website/siteConfig.js` +```javascript +{ + headerLinks: [ + ... + /* you can add docs */ + { doc: 'my-examples', label: 'Examples' }, + /* you can add custom pages */ + { page: 'help', label: 'Help' }, + /* you can add external links */ + { href: 'https://github.com/facebook/docusaurus', label: 'GitHub' }, + ... + ], + ... +} +``` + +For more information about the navigation bar, click [here](https://docusaurus.io/docs/en/navigation) + +## Adding custom pages + +1. Docusaurus uses React components to build pages. The components are saved as .js files in `website/pages/en`: +1. If you want your page to show up in your navigation header, you will need to update `website/siteConfig.js` to add to the `headerLinks` element: + +`website/siteConfig.js` +```javascript +{ + headerLinks: [ + ... + { page: 'my-new-custom-page', label: 'My New Custom Page' }, + ... + ], + ... +} +``` + +For more information about custom pages, click [here](https://docusaurus.io/docs/en/custom-pages). + +# Full Documentation + +Full documentation can be found on the [website](https://docusaurus.io/). diff --git a/website/core/Footer.js b/website/core/Footer.js new file mode 100644 index 0000000000..fec406770b --- /dev/null +++ b/website/core/Footer.js @@ -0,0 +1,111 @@ +/** + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + * + * @format + */ + +const PropTypes = require("prop-types"); +const React = require("react"); + +function SocialFooter(props) { + const repoUrl = `https://github.com/${props.config.organizationName}/${props.config.projectName}`; + return ( + + ); +} + +SocialFooter.propTypes = { + config: PropTypes.object +}; + +class Footer extends React.Component { + docUrl(doc, language) { + const baseUrl = this.props.config.baseUrl; + const docsUrl = this.props.config.docsUrl; + const docsPart = `${docsUrl ? `${docsUrl}/` : ""}`; + const langPart = `${language ? `${language}/` : ""}`; + return `${baseUrl}${docsPart}${langPart}${doc}`; + } + + pageUrl(doc, language) { + const baseUrl = this.props.config.baseUrl; + return baseUrl + (language ? `${language}/` : "") + doc; + } + + render() { + const currentYear = new Date().getFullYear(); + return ( +