diff --git a/mmsegmentation/configs/_trash_/_base_/datasets/trash.py b/mmsegmentation/configs/_trash_/_base_/datasets/trash.py new file mode 100644 index 0000000..18717b0 --- /dev/null +++ b/mmsegmentation/configs/_trash_/_base_/datasets/trash.py @@ -0,0 +1,82 @@ +# dataset settings +dataset_type = "COCOTrashDataset" +data_root = "/opt/ml/input/mmseg/trash" + +classes = ( + "Background", + "General trash", + "Paper", + "Paper pack", + "Metal", + "Glass", + "Plastic", + "Styrofoam", + "Plastic bag", + "Battery", + "Clothing", +) + +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True +) +img_scale = (512, 512) +# crop_size = (256, 256) +train_pipeline = [ + dict(type="LoadImageFromFile"), + dict(type="LoadAnnotations"), + dict(type="Resize", img_scale=img_scale, ratio_range=(0.5, 2.0)), + # dict(type="RandomCrop", crop_size=crop_size, cat_max_ratio=0.75), + dict(type="RandomFlip", prob=0.5), + dict(type="PhotoMetricDistortion"), + dict(type="Normalize", **img_norm_cfg), + # dict(type="Pad", size=crop_size, pad_val=0, seg_pad_val=255), + dict(type="DefaultFormatBundle"), + dict(type="Collect", keys=["img", "gt_semantic_seg"]), +] +test_pipeline = [ + dict(type="LoadImageFromFile"), + dict( + type="MultiScaleFlipAug", + img_scale=img_scale, + # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0], + flip=False, + transforms=[ + dict(type="Resize", keep_ratio=True), + dict(type="RandomFlip"), + dict(type="Normalize", **img_norm_cfg), + dict(type="ImageToTensor", keys=["img"]), + dict(type="Collect", keys=["img"]), + ], + ), +] + +data = dict( + samples_per_gpu=8, + workers_per_gpu=8, + train=dict( + type="RepeatDataset", + times=40000, + dataset=dict( + type=dataset_type, + data_root=data_root, + img_dir="img_dir/train", + ann_dir="ann_dir/train", + pipeline=train_pipeline, + ), + ), + val=dict( + type=dataset_type, + data_root=data_root, + img_dir="img_dir/val", + ann_dir="ann_dir/val", + pipeline=test_pipeline, + ), + test=dict( + type=dataset_type, + data_root=data_root, + img_dir="img_dir/test", + ann_dir="ann_dir/test", + pipeline=test_pipeline, + test_mode=True, + ), +) diff --git a/mmsegmentation/configs/_trash_/_base_/default_runtime.py b/mmsegmentation/configs/_trash_/_base_/default_runtime.py new file mode 100644 index 0000000..c59f59c --- /dev/null +++ b/mmsegmentation/configs/_trash_/_base_/default_runtime.py @@ -0,0 +1,32 @@ +# yapf:disable +log_config = dict( + interval=50, + hooks=[ + dict(type="TextLoggerHook", by_epoch=False), + # dict(type='MlflowLoggerHook'), + # dict(type='TensorboardLoggerHook'), + dict( + type="MMSegWandbHook", # "WandbLoggerHook", + init_kwargs={ + "project": "Semantic Segmentation", + "entity": "boostcamp-ai-tech-4-cv-17", + # "name": "mmseg", + }, + interval=10, + log_checkpoint=True, + log_checkpoint_metadata=True, + num_eval_images=100, + ), + ], +) +# yapf:enable + +dist_params = dict(backend="nccl") +log_level = "INFO" +load_from = None +resume_from = None + +workflow = [("train", 1)] +# workflow = [("train", 1), ("val", 1)] + +cudnn_benchmark = True diff --git a/mmsegmentation/configs/_trash_/_base_/models/ann_r50-d8.py b/mmsegmentation/configs/_trash_/_base_/models/ann_r50-d8.py new file mode 100644 index 0000000..3b6e465 --- /dev/null +++ b/mmsegmentation/configs/_trash_/_base_/models/ann_r50-d8.py @@ -0,0 +1,48 @@ +# model settings +norm_cfg = dict(type="SyncBN", requires_grad=True) +model = dict( + type="EncoderDecoder", + pretrained="open-mmlab://resnet50_v1c", + backbone=dict( + type="ResNetV1c", + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style="pytorch", + contract_dilation=True, + ), + decode_head=dict( + type="ANNHead", + in_channels=[1024, 2048], + in_index=[2, 3], + channels=512, + project_channels=256, + query_scales=(1,), + key_pool_scales=(1, 3, 6, 8), + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=1.0), + ), + auxiliary_head=dict( + type="FCNHead", + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=0.4), + ), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode="whole"), +) diff --git a/mmsegmentation/configs/_trash_/_base_/models/apcnet_r50-d8.py b/mmsegmentation/configs/_trash_/_base_/models/apcnet_r50-d8.py new file mode 100644 index 0000000..5662334 --- /dev/null +++ b/mmsegmentation/configs/_trash_/_base_/models/apcnet_r50-d8.py @@ -0,0 +1,46 @@ +# model settings +norm_cfg = dict(type="SyncBN", requires_grad=True) +model = dict( + type="EncoderDecoder", + pretrained="open-mmlab://resnet50_v1c", + backbone=dict( + type="ResNetV1c", + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style="pytorch", + contract_dilation=True, + ), + decode_head=dict( + type="APCHead", + in_channels=2048, + in_index=3, + channels=512, + pool_scales=(1, 2, 3, 6), + dropout_ratio=0.1, + num_classes=19, + norm_cfg=dict(type="SyncBN", requires_grad=True), + align_corners=False, + loss_decode=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=1.0), + ), + auxiliary_head=dict( + type="FCNHead", + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=0.4), + ), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode="whole"), +) diff --git a/mmsegmentation/configs/_trash_/_base_/models/bisenetv1_r18-d32.py b/mmsegmentation/configs/_trash_/_base_/models/bisenetv1_r18-d32.py new file mode 100644 index 0000000..4815cb7 --- /dev/null +++ b/mmsegmentation/configs/_trash_/_base_/models/bisenetv1_r18-d32.py @@ -0,0 +1,75 @@ +# model settings +norm_cfg = dict(type="SyncBN", requires_grad=True) +model = dict( + type="EncoderDecoder", + backbone=dict( + type="BiSeNetV1", + in_channels=3, + context_channels=(128, 256, 512), + spatial_channels=(64, 64, 64, 128), + out_indices=(0, 1, 2), + out_channels=256, + backbone_cfg=dict( + type="ResNet", + in_channels=3, + depth=18, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 1, 1), + strides=(1, 2, 2, 2), + norm_cfg=norm_cfg, + norm_eval=False, + style="pytorch", + contract_dilation=True, + ), + norm_cfg=norm_cfg, + align_corners=False, + init_cfg=None, + ), + decode_head=dict( + type="FCNHead", + in_channels=256, + in_index=0, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=1.0), + ), + auxiliary_head=[ + dict( + type="FCNHead", + in_channels=128, + channels=64, + num_convs=1, + num_classes=19, + in_index=1, + norm_cfg=norm_cfg, + concat_input=False, + align_corners=False, + loss_decode=dict( + type="CrossEntropyLoss", use_sigmoid=False, loss_weight=1.0 + ), + ), + dict( + type="FCNHead", + in_channels=128, + channels=64, + num_convs=1, + num_classes=19, + in_index=2, + norm_cfg=norm_cfg, + concat_input=False, + align_corners=False, + loss_decode=dict( + type="CrossEntropyLoss", use_sigmoid=False, loss_weight=1.0 + ), + ), + ], + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode="whole"), +) diff --git a/mmsegmentation/configs/_trash_/_base_/models/bisenetv2.py b/mmsegmentation/configs/_trash_/_base_/models/bisenetv2.py new file mode 100644 index 0000000..ddf72e1 --- /dev/null +++ b/mmsegmentation/configs/_trash_/_base_/models/bisenetv2.py @@ -0,0 +1,90 @@ +# model settings +norm_cfg = dict(type="SyncBN", requires_grad=True) +model = dict( + type="EncoderDecoder", + pretrained=None, + backbone=dict( + type="BiSeNetV2", + detail_channels=(64, 64, 128), + semantic_channels=(16, 32, 64, 128), + semantic_expansion_ratio=6, + bga_channels=128, + out_indices=(0, 1, 2, 3, 4), + init_cfg=None, + align_corners=False, + ), + decode_head=dict( + type="FCNHead", + in_channels=128, + in_index=0, + channels=1024, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=1.0), + ), + auxiliary_head=[ + dict( + type="FCNHead", + in_channels=16, + channels=16, + num_convs=2, + num_classes=19, + in_index=1, + norm_cfg=norm_cfg, + concat_input=False, + align_corners=False, + loss_decode=dict( + type="CrossEntropyLoss", use_sigmoid=False, loss_weight=1.0 + ), + ), + dict( + type="FCNHead", + in_channels=32, + channels=64, + num_convs=2, + num_classes=19, + in_index=2, + norm_cfg=norm_cfg, + concat_input=False, + align_corners=False, + loss_decode=dict( + type="CrossEntropyLoss", use_sigmoid=False, loss_weight=1.0 + ), + ), + dict( + type="FCNHead", + in_channels=64, + channels=256, + num_convs=2, + num_classes=19, + in_index=3, + norm_cfg=norm_cfg, + concat_input=False, + align_corners=False, + loss_decode=dict( + type="CrossEntropyLoss", use_sigmoid=False, loss_weight=1.0 + ), + ), + dict( + type="FCNHead", + in_channels=128, + channels=1024, + num_convs=2, + num_classes=19, + in_index=4, + norm_cfg=norm_cfg, + concat_input=False, + align_corners=False, + loss_decode=dict( + type="CrossEntropyLoss", use_sigmoid=False, loss_weight=1.0 + ), + ), + ], + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode="whole"), +) diff --git a/mmsegmentation/configs/_trash_/_base_/models/ccnet_r50-d8.py b/mmsegmentation/configs/_trash_/_base_/models/ccnet_r50-d8.py new file mode 100644 index 0000000..9d618ca --- /dev/null +++ b/mmsegmentation/configs/_trash_/_base_/models/ccnet_r50-d8.py @@ -0,0 +1,46 @@ +# model settings +norm_cfg = dict(type="SyncBN", requires_grad=True) +model = dict( + type="EncoderDecoder", + pretrained="open-mmlab://resnet50_v1c", + backbone=dict( + type="ResNetV1c", + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style="pytorch", + contract_dilation=True, + ), + decode_head=dict( + type="CCHead", + in_channels=2048, + in_index=3, + channels=512, + recurrence=2, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=1.0), + ), + auxiliary_head=dict( + type="FCNHead", + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=0.4), + ), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode="whole"), +) diff --git a/mmsegmentation/configs/_trash_/_base_/models/cgnet.py b/mmsegmentation/configs/_trash_/_base_/models/cgnet.py new file mode 100644 index 0000000..1db8655 --- /dev/null +++ b/mmsegmentation/configs/_trash_/_base_/models/cgnet.py @@ -0,0 +1,54 @@ +# model settings +norm_cfg = dict(type="SyncBN", eps=1e-03, requires_grad=True) +model = dict( + type="EncoderDecoder", + backbone=dict( + type="CGNet", + norm_cfg=norm_cfg, + in_channels=3, + num_channels=(32, 64, 128), + num_blocks=(3, 21), + dilations=(2, 4), + reductions=(8, 16), + ), + decode_head=dict( + type="FCNHead", + in_channels=256, + in_index=2, + channels=256, + num_convs=0, + concat_input=False, + dropout_ratio=0, + num_classes=19, + norm_cfg=norm_cfg, + loss_decode=dict( + type="CrossEntropyLoss", + use_sigmoid=False, + loss_weight=1.0, + class_weight=[ + 2.5959933, + 6.7415504, + 3.5354059, + 9.8663225, + 9.690899, + 9.369352, + 10.289121, + 9.953208, + 4.3097677, + 9.490387, + 7.674431, + 9.396905, + 10.347791, + 6.3927646, + 10.226669, + 10.241062, + 10.280587, + 10.396974, + 10.055647, + ], + ), + ), + # model training and testing settings + train_cfg=dict(sampler=None), + test_cfg=dict(mode="whole"), +) diff --git a/mmsegmentation/configs/_trash_/_base_/models/danet_r50-d8.py b/mmsegmentation/configs/_trash_/_base_/models/danet_r50-d8.py new file mode 100644 index 0000000..9609c1a --- /dev/null +++ b/mmsegmentation/configs/_trash_/_base_/models/danet_r50-d8.py @@ -0,0 +1,46 @@ +# model settings +norm_cfg = dict(type="SyncBN", requires_grad=True) +model = dict( + type="EncoderDecoder", + pretrained="open-mmlab://resnet50_v1c", + backbone=dict( + type="ResNetV1c", + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style="pytorch", + contract_dilation=True, + ), + decode_head=dict( + type="DAHead", + in_channels=2048, + in_index=3, + channels=512, + pam_channels=64, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=1.0), + ), + auxiliary_head=dict( + type="FCNHead", + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=0.4), + ), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode="whole"), +) diff --git a/mmsegmentation/configs/_trash_/_base_/models/deeplabv3_r50-d8.py b/mmsegmentation/configs/_trash_/_base_/models/deeplabv3_r50-d8.py new file mode 100644 index 0000000..6671634 --- /dev/null +++ b/mmsegmentation/configs/_trash_/_base_/models/deeplabv3_r50-d8.py @@ -0,0 +1,46 @@ +# model settings +norm_cfg = dict(type="SyncBN", requires_grad=True) +model = dict( + type="EncoderDecoder", + pretrained="open-mmlab://resnet50_v1c", + backbone=dict( + type="ResNetV1c", + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style="pytorch", + contract_dilation=True, + ), + decode_head=dict( + type="ASPPHead", + in_channels=2048, + in_index=3, + channels=512, + dilations=(1, 12, 24, 36), + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=1.0), + ), + auxiliary_head=dict( + type="FCNHead", + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=0.4), + ), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode="whole"), +) diff --git a/mmsegmentation/configs/_trash_/_base_/models/deeplabv3_unet_s5-d16.py b/mmsegmentation/configs/_trash_/_base_/models/deeplabv3_unet_s5-d16.py new file mode 100644 index 0000000..860aeb3 --- /dev/null +++ b/mmsegmentation/configs/_trash_/_base_/models/deeplabv3_unet_s5-d16.py @@ -0,0 +1,52 @@ +# model settings +norm_cfg = dict(type="SyncBN", requires_grad=True) +model = dict( + type="EncoderDecoder", + pretrained=None, + backbone=dict( + type="UNet", + in_channels=3, + base_channels=64, + num_stages=5, + strides=(1, 1, 1, 1, 1), + enc_num_convs=(2, 2, 2, 2, 2), + dec_num_convs=(2, 2, 2, 2), + downsamples=(True, True, True, True), + enc_dilations=(1, 1, 1, 1, 1), + dec_dilations=(1, 1, 1, 1), + with_cp=False, + conv_cfg=None, + norm_cfg=norm_cfg, + act_cfg=dict(type="ReLU"), + upsample_cfg=dict(type="InterpConv"), + norm_eval=False, + ), + decode_head=dict( + type="ASPPHead", + in_channels=64, + in_index=4, + channels=16, + dilations=(1, 12, 24, 36), + dropout_ratio=0.1, + num_classes=2, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=1.0), + ), + auxiliary_head=dict( + type="FCNHead", + in_channels=128, + in_index=3, + channels=64, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=2, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=0.4), + ), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode="slide", crop_size=(256, 256), stride=(170, 170)), +) diff --git a/mmsegmentation/configs/_trash_/_base_/models/deeplabv3plus_r50-d8.py b/mmsegmentation/configs/_trash_/_base_/models/deeplabv3plus_r50-d8.py new file mode 100644 index 0000000..f46aac2 --- /dev/null +++ b/mmsegmentation/configs/_trash_/_base_/models/deeplabv3plus_r50-d8.py @@ -0,0 +1,48 @@ +# model settings +norm_cfg = dict(type="SyncBN", requires_grad=True) +model = dict( + type="EncoderDecoder", + pretrained="open-mmlab://resnet50_v1c", + backbone=dict( + type="ResNetV1c", + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style="pytorch", + contract_dilation=True, + ), + decode_head=dict( + type="DepthwiseSeparableASPPHead", + in_channels=2048, + in_index=3, + channels=512, + dilations=(1, 12, 24, 36), + c1_in_channels=256, + c1_channels=48, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=1.0), + ), + auxiliary_head=dict( + type="FCNHead", + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=0.4), + ), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode="whole"), +) diff --git a/mmsegmentation/configs/_trash_/_base_/models/dmnet_r50-d8.py b/mmsegmentation/configs/_trash_/_base_/models/dmnet_r50-d8.py new file mode 100644 index 0000000..8522540 --- /dev/null +++ b/mmsegmentation/configs/_trash_/_base_/models/dmnet_r50-d8.py @@ -0,0 +1,46 @@ +# model settings +norm_cfg = dict(type="SyncBN", requires_grad=True) +model = dict( + type="EncoderDecoder", + pretrained="open-mmlab://resnet50_v1c", + backbone=dict( + type="ResNetV1c", + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style="pytorch", + contract_dilation=True, + ), + decode_head=dict( + type="DMHead", + in_channels=2048, + in_index=3, + channels=512, + filter_sizes=(1, 3, 5, 7), + dropout_ratio=0.1, + num_classes=19, + norm_cfg=dict(type="SyncBN", requires_grad=True), + align_corners=False, + loss_decode=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=1.0), + ), + auxiliary_head=dict( + type="FCNHead", + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=0.4), + ), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode="whole"), +) diff --git a/mmsegmentation/configs/_trash_/_base_/models/dnl_r50-d8.py b/mmsegmentation/configs/_trash_/_base_/models/dnl_r50-d8.py new file mode 100644 index 0000000..3da793d --- /dev/null +++ b/mmsegmentation/configs/_trash_/_base_/models/dnl_r50-d8.py @@ -0,0 +1,48 @@ +# model settings +norm_cfg = dict(type="SyncBN", requires_grad=True) +model = dict( + type="EncoderDecoder", + pretrained="open-mmlab://resnet50_v1c", + backbone=dict( + type="ResNetV1c", + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style="pytorch", + contract_dilation=True, + ), + decode_head=dict( + type="DNLHead", + in_channels=2048, + in_index=3, + channels=512, + dropout_ratio=0.1, + reduction=2, + use_scale=True, + mode="embedded_gaussian", + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=1.0), + ), + auxiliary_head=dict( + type="FCNHead", + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=0.4), + ), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode="whole"), +) diff --git a/mmsegmentation/configs/_trash_/_base_/models/dpt_vit-b16.py b/mmsegmentation/configs/_trash_/_base_/models/dpt_vit-b16.py new file mode 100644 index 0000000..cd0b724 --- /dev/null +++ b/mmsegmentation/configs/_trash_/_base_/models/dpt_vit-b16.py @@ -0,0 +1,33 @@ +norm_cfg = dict(type="SyncBN", requires_grad=True) +model = dict( + type="EncoderDecoder", + pretrained="pretrain/vit-b16_p16_224-80ecf9dd.pth", # noqa + backbone=dict( + type="VisionTransformer", + img_size=224, + embed_dims=768, + num_layers=12, + num_heads=12, + out_indices=(2, 5, 8, 11), + final_norm=False, + with_cls_token=True, + output_cls_token=True, + ), + decode_head=dict( + type="DPTHead", + in_channels=(768, 768, 768, 768), + channels=256, + embed_dims=768, + post_process_channels=[96, 192, 384, 768], + num_classes=150, + readout_type="project", + input_transform="multiple_select", + in_index=(0, 1, 2, 3), + norm_cfg=norm_cfg, + loss_decode=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=1.0), + ), + auxiliary_head=None, + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode="whole"), +) # yapf: disable diff --git a/mmsegmentation/configs/_trash_/_base_/models/emanet_r50-d8.py b/mmsegmentation/configs/_trash_/_base_/models/emanet_r50-d8.py new file mode 100644 index 0000000..883ea05 --- /dev/null +++ b/mmsegmentation/configs/_trash_/_base_/models/emanet_r50-d8.py @@ -0,0 +1,49 @@ +# model settings +norm_cfg = dict(type="SyncBN", requires_grad=True) +model = dict( + type="EncoderDecoder", + pretrained="open-mmlab://resnet50_v1c", + backbone=dict( + type="ResNetV1c", + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style="pytorch", + contract_dilation=True, + ), + decode_head=dict( + type="EMAHead", + in_channels=2048, + in_index=3, + channels=256, + ema_channels=512, + num_bases=64, + num_stages=3, + momentum=0.1, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=1.0), + ), + auxiliary_head=dict( + type="FCNHead", + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=0.4), + ), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode="whole"), +) diff --git a/mmsegmentation/configs/_trash_/_base_/models/encnet_r50-d8.py b/mmsegmentation/configs/_trash_/_base_/models/encnet_r50-d8.py new file mode 100644 index 0000000..94094ee --- /dev/null +++ b/mmsegmentation/configs/_trash_/_base_/models/encnet_r50-d8.py @@ -0,0 +1,49 @@ +# model settings +norm_cfg = dict(type="SyncBN", requires_grad=True) +model = dict( + type="EncoderDecoder", + pretrained="open-mmlab://resnet50_v1c", + backbone=dict( + type="ResNetV1c", + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style="pytorch", + contract_dilation=True, + ), + decode_head=dict( + type="EncHead", + in_channels=[512, 1024, 2048], + in_index=(1, 2, 3), + channels=512, + num_codes=32, + use_se_loss=True, + add_lateral=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=1.0), + loss_se_decode=dict(type="CrossEntropyLoss", use_sigmoid=True, loss_weight=0.2), + ), + auxiliary_head=dict( + type="FCNHead", + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=0.4), + ), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode="whole"), +) diff --git a/mmsegmentation/configs/_trash_/_base_/models/erfnet_fcn.py b/mmsegmentation/configs/_trash_/_base_/models/erfnet_fcn.py new file mode 100644 index 0000000..e23d9bd --- /dev/null +++ b/mmsegmentation/configs/_trash_/_base_/models/erfnet_fcn.py @@ -0,0 +1,34 @@ +# model settings +norm_cfg = dict(type="SyncBN", requires_grad=True) +model = dict( + type="EncoderDecoder", + pretrained=None, + backbone=dict( + type="ERFNet", + in_channels=3, + enc_downsample_channels=(16, 64, 128), + enc_stage_non_bottlenecks=(5, 8), + enc_non_bottleneck_dilations=(2, 4, 8, 16), + enc_non_bottleneck_channels=(64, 128), + dec_upsample_channels=(64, 16), + dec_stages_non_bottleneck=(2, 2), + dec_non_bottleneck_channels=(64, 16), + dropout_ratio=0.1, + init_cfg=None, + ), + decode_head=dict( + type="FCNHead", + in_channels=16, + channels=128, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=1.0), + ), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode="whole"), +) diff --git a/mmsegmentation/configs/_trash_/_base_/models/fast_scnn.py b/mmsegmentation/configs/_trash_/_base_/models/fast_scnn.py new file mode 100644 index 0000000..f8c9724 --- /dev/null +++ b/mmsegmentation/configs/_trash_/_base_/models/fast_scnn.py @@ -0,0 +1,63 @@ +# model settings +norm_cfg = dict(type="SyncBN", requires_grad=True, momentum=0.01) +model = dict( + type="EncoderDecoder", + backbone=dict( + type="FastSCNN", + downsample_dw_channels=(32, 48), + global_in_channels=64, + global_block_channels=(64, 96, 128), + global_block_strides=(2, 2, 1), + global_out_channels=128, + higher_in_channels=64, + lower_in_channels=128, + fusion_out_channels=128, + out_indices=(0, 1, 2), + norm_cfg=norm_cfg, + align_corners=False, + ), + decode_head=dict( + type="DepthwiseSeparableFCNHead", + in_channels=128, + channels=128, + concat_input=False, + num_classes=19, + in_index=-1, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict(type="CrossEntropyLoss", use_sigmoid=True, loss_weight=1), + ), + auxiliary_head=[ + dict( + type="FCNHead", + in_channels=128, + channels=32, + num_convs=1, + num_classes=19, + in_index=-2, + norm_cfg=norm_cfg, + concat_input=False, + align_corners=False, + loss_decode=dict( + type="CrossEntropyLoss", use_sigmoid=True, loss_weight=0.4 + ), + ), + dict( + type="FCNHead", + in_channels=64, + channels=32, + num_convs=1, + num_classes=19, + in_index=-3, + norm_cfg=norm_cfg, + concat_input=False, + align_corners=False, + loss_decode=dict( + type="CrossEntropyLoss", use_sigmoid=True, loss_weight=0.4 + ), + ), + ], + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode="whole"), +) diff --git a/mmsegmentation/configs/_trash_/_base_/models/fastfcn_r50-d32_jpu_psp.py b/mmsegmentation/configs/_trash_/_base_/models/fastfcn_r50-d32_jpu_psp.py new file mode 100644 index 0000000..d02e9f3 --- /dev/null +++ b/mmsegmentation/configs/_trash_/_base_/models/fastfcn_r50-d32_jpu_psp.py @@ -0,0 +1,56 @@ +# model settings +norm_cfg = dict(type="SyncBN", requires_grad=True) +model = dict( + type="EncoderDecoder", + pretrained="open-mmlab://resnet50_v1c", + backbone=dict( + type="ResNetV1c", + depth=50, + num_stages=4, + dilations=(1, 1, 2, 4), + strides=(1, 2, 2, 2), + out_indices=(1, 2, 3), + norm_cfg=norm_cfg, + norm_eval=False, + style="pytorch", + contract_dilation=True, + ), + neck=dict( + type="JPU", + in_channels=(512, 1024, 2048), + mid_channels=512, + start_level=0, + end_level=-1, + dilations=(1, 2, 4, 8), + align_corners=False, + norm_cfg=norm_cfg, + ), + decode_head=dict( + type="PSPHead", + in_channels=2048, + in_index=2, + channels=512, + pool_scales=(1, 2, 3, 6), + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=1.0), + ), + auxiliary_head=dict( + type="FCNHead", + in_channels=1024, + in_index=1, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=0.4), + ), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode="whole"), +) diff --git a/mmsegmentation/configs/_trash_/_base_/models/fcn_hr18.py b/mmsegmentation/configs/_trash_/_base_/models/fcn_hr18.py new file mode 100644 index 0000000..538936b --- /dev/null +++ b/mmsegmentation/configs/_trash_/_base_/models/fcn_hr18.py @@ -0,0 +1,64 @@ +# model settings +norm_cfg = dict(type="SyncBN", requires_grad=True) +# norm_cfg = dict(type="BN", requires_grad=True) +# https://github.com/open-mmlab/mmsegmentation/issues/235#issuecomment-723787008 +# https://github.com/open-mmlab/mmsegmentation/issues/8 +model = dict( + type="EncoderDecoder", + pretrained="open-mmlab://msra/hrnetv2_w18", + backbone=dict( + type="HRNet", + # https://github.com/open-mmlab/mmsegmentation/issues/81#issuecomment-678144921 + # norm_cfg=norm_cfg, + norm_eval=False, + extra=dict( + stage1=dict( + num_modules=1, + num_branches=1, + block="BOTTLENECK", + num_blocks=(4,), + num_channels=(64,), + ), + stage2=dict( + num_modules=1, + num_branches=2, + block="BASIC", + num_blocks=(4, 4), + num_channels=(18, 36), + ), + stage3=dict( + num_modules=4, + num_branches=3, + block="BASIC", + num_blocks=(4, 4, 4), + num_channels=(18, 36, 72), + ), + stage4=dict( + num_modules=3, + num_branches=4, + block="BASIC", + num_blocks=(4, 4, 4, 4), + num_channels=(18, 36, 72, 144), + ), + ), + ), + decode_head=dict( + type="FCNHead", + in_channels=[18, 36, 72, 144], + in_index=(0, 1, 2, 3), + channels=sum([18, 36, 72, 144]), + input_transform="resize_concat", + kernel_size=1, + num_convs=1, + concat_input=False, + dropout_ratio=-1, + # num_classes=19, + num_classes=11, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=1.0), + ), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode="whole"), +) diff --git a/mmsegmentation/configs/_trash_/_base_/models/fcn_r50-d8.py b/mmsegmentation/configs/_trash_/_base_/models/fcn_r50-d8.py new file mode 100644 index 0000000..8ca03e8 --- /dev/null +++ b/mmsegmentation/configs/_trash_/_base_/models/fcn_r50-d8.py @@ -0,0 +1,47 @@ +# model settings +norm_cfg = dict(type="SyncBN", requires_grad=True) +model = dict( + type="EncoderDecoder", + pretrained="open-mmlab://resnet50_v1c", + backbone=dict( + type="ResNetV1c", + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style="pytorch", + contract_dilation=True, + ), + decode_head=dict( + type="FCNHead", + in_channels=2048, + in_index=3, + channels=512, + num_convs=2, + concat_input=True, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=1.0), + ), + auxiliary_head=dict( + type="FCNHead", + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=0.4), + ), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode="whole"), +) diff --git a/mmsegmentation/configs/_trash_/_base_/models/fcn_unet_s5-d16.py b/mmsegmentation/configs/_trash_/_base_/models/fcn_unet_s5-d16.py new file mode 100644 index 0000000..a181d52 --- /dev/null +++ b/mmsegmentation/configs/_trash_/_base_/models/fcn_unet_s5-d16.py @@ -0,0 +1,53 @@ +# model settings +norm_cfg = dict(type="SyncBN", requires_grad=True) +model = dict( + type="EncoderDecoder", + pretrained=None, + backbone=dict( + type="UNet", + in_channels=3, + base_channels=64, + num_stages=5, + strides=(1, 1, 1, 1, 1), + enc_num_convs=(2, 2, 2, 2, 2), + dec_num_convs=(2, 2, 2, 2), + downsamples=(True, True, True, True), + enc_dilations=(1, 1, 1, 1, 1), + dec_dilations=(1, 1, 1, 1), + with_cp=False, + conv_cfg=None, + norm_cfg=norm_cfg, + act_cfg=dict(type="ReLU"), + upsample_cfg=dict(type="InterpConv"), + norm_eval=False, + ), + decode_head=dict( + type="FCNHead", + in_channels=64, + in_index=4, + channels=64, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=2, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=1.0), + ), + auxiliary_head=dict( + type="FCNHead", + in_channels=128, + in_index=3, + channels=64, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=2, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=0.4), + ), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode="slide", crop_size=(256, 256), stride=(170, 170)), +) diff --git a/mmsegmentation/configs/_trash_/_base_/models/fpn_poolformer_s12.py b/mmsegmentation/configs/_trash_/_base_/models/fpn_poolformer_s12.py new file mode 100644 index 0000000..94b3c83 --- /dev/null +++ b/mmsegmentation/configs/_trash_/_base_/models/fpn_poolformer_s12.py @@ -0,0 +1,42 @@ +# model settings +norm_cfg = dict(type="SyncBN", requires_grad=True) +checkpoint_file = "https://download.openmmlab.com/mmclassification/v0/poolformer/poolformer-s12_3rdparty_32xb128_in1k_20220414-f8d83051.pth" # noqa +custom_imports = dict(imports="mmcls.models", allow_failed_imports=False) +model = dict( + type="EncoderDecoder", + backbone=dict( + type="mmcls.PoolFormer", + arch="s12", + init_cfg=dict( + type="Pretrained", checkpoint=checkpoint_file, prefix="backbone." + ), + in_patch_size=7, + in_stride=4, + in_pad=2, + down_patch_size=3, + down_stride=2, + down_pad=1, + drop_rate=0.0, + drop_path_rate=0.0, + out_indices=(0, 2, 4, 6), + frozen_stages=0, + ), + neck=dict( + type="FPN", in_channels=[256, 512, 1024, 2048], out_channels=256, num_outs=4 + ), + decode_head=dict( + type="FPNHead", + in_channels=[256, 256, 256, 256], + in_index=[0, 1, 2, 3], + feature_strides=[4, 8, 16, 32], + channels=128, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=1.0), + ), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode="whole"), +) diff --git a/mmsegmentation/configs/_trash_/_base_/models/fpn_r50.py b/mmsegmentation/configs/_trash_/_base_/models/fpn_r50.py new file mode 100644 index 0000000..77058d5 --- /dev/null +++ b/mmsegmentation/configs/_trash_/_base_/models/fpn_r50.py @@ -0,0 +1,36 @@ +# model settings +norm_cfg = dict(type="SyncBN", requires_grad=True) +model = dict( + type="EncoderDecoder", + pretrained="open-mmlab://resnet50_v1c", + backbone=dict( + type="ResNetV1c", + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 1, 1), + strides=(1, 2, 2, 2), + norm_cfg=norm_cfg, + norm_eval=False, + style="pytorch", + contract_dilation=True, + ), + neck=dict( + type="FPN", in_channels=[256, 512, 1024, 2048], out_channels=256, num_outs=4 + ), + decode_head=dict( + type="FPNHead", + in_channels=[256, 256, 256, 256], + in_index=[0, 1, 2, 3], + feature_strides=[4, 8, 16, 32], + channels=128, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=1.0), + ), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode="whole"), +) diff --git a/mmsegmentation/configs/_trash_/_base_/models/gcnet_r50-d8.py b/mmsegmentation/configs/_trash_/_base_/models/gcnet_r50-d8.py new file mode 100644 index 0000000..16f6261 --- /dev/null +++ b/mmsegmentation/configs/_trash_/_base_/models/gcnet_r50-d8.py @@ -0,0 +1,48 @@ +# model settings +norm_cfg = dict(type="SyncBN", requires_grad=True) +model = dict( + type="EncoderDecoder", + pretrained="open-mmlab://resnet50_v1c", + backbone=dict( + type="ResNetV1c", + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style="pytorch", + contract_dilation=True, + ), + decode_head=dict( + type="GCHead", + in_channels=2048, + in_index=3, + channels=512, + ratio=1 / 4.0, + pooling_type="att", + fusion_types=("channel_add",), + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=1.0), + ), + auxiliary_head=dict( + type="FCNHead", + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=0.4), + ), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode="whole"), +) diff --git a/mmsegmentation/configs/_trash_/_base_/models/icnet_r50-d8.py b/mmsegmentation/configs/_trash_/_base_/models/icnet_r50-d8.py new file mode 100644 index 0000000..5484955 --- /dev/null +++ b/mmsegmentation/configs/_trash_/_base_/models/icnet_r50-d8.py @@ -0,0 +1,81 @@ +# model settings +norm_cfg = dict(type="SyncBN", requires_grad=True) +model = dict( + type="EncoderDecoder", + backbone=dict( + type="ICNet", + backbone_cfg=dict( + type="ResNetV1c", + in_channels=3, + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style="pytorch", + contract_dilation=True, + ), + in_channels=3, + layer_channels=(512, 2048), + light_branch_middle_channels=32, + psp_out_channels=512, + out_channels=(64, 256, 256), + norm_cfg=norm_cfg, + align_corners=False, + ), + neck=dict( + type="ICNeck", + in_channels=(64, 256, 256), + out_channels=128, + norm_cfg=norm_cfg, + align_corners=False, + ), + decode_head=dict( + type="FCNHead", + in_channels=128, + channels=128, + num_convs=1, + in_index=2, + dropout_ratio=0, + num_classes=19, + norm_cfg=norm_cfg, + concat_input=False, + align_corners=False, + loss_decode=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=1.0), + ), + auxiliary_head=[ + dict( + type="FCNHead", + in_channels=128, + channels=128, + num_convs=1, + num_classes=19, + in_index=0, + norm_cfg=norm_cfg, + concat_input=False, + align_corners=False, + loss_decode=dict( + type="CrossEntropyLoss", use_sigmoid=False, loss_weight=0.4 + ), + ), + dict( + type="FCNHead", + in_channels=128, + channels=128, + num_convs=1, + num_classes=19, + in_index=1, + norm_cfg=norm_cfg, + concat_input=False, + align_corners=False, + loss_decode=dict( + type="CrossEntropyLoss", use_sigmoid=False, loss_weight=0.4 + ), + ), + ], + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode="whole"), +) diff --git a/mmsegmentation/configs/_trash_/_base_/models/isanet_r50-d8.py b/mmsegmentation/configs/_trash_/_base_/models/isanet_r50-d8.py new file mode 100644 index 0000000..65521a2 --- /dev/null +++ b/mmsegmentation/configs/_trash_/_base_/models/isanet_r50-d8.py @@ -0,0 +1,47 @@ +# model settings +norm_cfg = dict(type="SyncBN", requires_grad=True) +model = dict( + type="EncoderDecoder", + pretrained="open-mmlab://resnet50_v1c", + backbone=dict( + type="ResNetV1c", + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style="pytorch", + contract_dilation=True, + ), + decode_head=dict( + type="ISAHead", + in_channels=2048, + in_index=3, + channels=512, + isa_channels=256, + down_factor=(8, 8), + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=1.0), + ), + auxiliary_head=dict( + type="FCNHead", + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=0.4), + ), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode="whole"), +) diff --git a/mmsegmentation/configs/_trash_/_base_/models/lraspp_m-v3-d8.py b/mmsegmentation/configs/_trash_/_base_/models/lraspp_m-v3-d8.py new file mode 100644 index 0000000..9c0e91a --- /dev/null +++ b/mmsegmentation/configs/_trash_/_base_/models/lraspp_m-v3-d8.py @@ -0,0 +1,24 @@ +# model settings +norm_cfg = dict(type="SyncBN", eps=0.001, requires_grad=True) +model = dict( + type="EncoderDecoder", + backbone=dict( + type="MobileNetV3", arch="large", out_indices=(1, 3, 16), norm_cfg=norm_cfg + ), + decode_head=dict( + type="LRASPPHead", + in_channels=(16, 24, 960), + in_index=(0, 1, 2), + channels=128, + input_transform="multiple_select", + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + act_cfg=dict(type="ReLU"), + align_corners=False, + loss_decode=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=1.0), + ), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode="whole"), +) diff --git a/mmsegmentation/configs/_trash_/_base_/models/nonlocal_r50-d8.py b/mmsegmentation/configs/_trash_/_base_/models/nonlocal_r50-d8.py new file mode 100644 index 0000000..31fdce5 --- /dev/null +++ b/mmsegmentation/configs/_trash_/_base_/models/nonlocal_r50-d8.py @@ -0,0 +1,48 @@ +# model settings +norm_cfg = dict(type="SyncBN", requires_grad=True) +model = dict( + type="EncoderDecoder", + pretrained="open-mmlab://resnet50_v1c", + backbone=dict( + type="ResNetV1c", + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style="pytorch", + contract_dilation=True, + ), + decode_head=dict( + type="NLHead", + in_channels=2048, + in_index=3, + channels=512, + dropout_ratio=0.1, + reduction=2, + use_scale=True, + mode="embedded_gaussian", + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=1.0), + ), + auxiliary_head=dict( + type="FCNHead", + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=0.4), + ), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode="whole"), +) diff --git a/mmsegmentation/configs/_trash_/_base_/models/ocrnet_hr18.py b/mmsegmentation/configs/_trash_/_base_/models/ocrnet_hr18.py new file mode 100644 index 0000000..7fccc99 --- /dev/null +++ b/mmsegmentation/configs/_trash_/_base_/models/ocrnet_hr18.py @@ -0,0 +1,79 @@ +# model settings +norm_cfg = dict(type="SyncBN", requires_grad=True) +model = dict( + type="CascadeEncoderDecoder", + num_stages=2, + pretrained="open-mmlab://msra/hrnetv2_w18", + backbone=dict( + type="HRNet", + norm_cfg=norm_cfg, + norm_eval=False, + extra=dict( + stage1=dict( + num_modules=1, + num_branches=1, + block="BOTTLENECK", + num_blocks=(4,), + num_channels=(64,), + ), + stage2=dict( + num_modules=1, + num_branches=2, + block="BASIC", + num_blocks=(4, 4), + num_channels=(18, 36), + ), + stage3=dict( + num_modules=4, + num_branches=3, + block="BASIC", + num_blocks=(4, 4, 4), + num_channels=(18, 36, 72), + ), + stage4=dict( + num_modules=3, + num_branches=4, + block="BASIC", + num_blocks=(4, 4, 4, 4), + num_channels=(18, 36, 72, 144), + ), + ), + ), + decode_head=[ + dict( + type="FCNHead", + in_channels=[18, 36, 72, 144], + channels=sum([18, 36, 72, 144]), + in_index=(0, 1, 2, 3), + input_transform="resize_concat", + kernel_size=1, + num_convs=1, + concat_input=False, + dropout_ratio=-1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type="CrossEntropyLoss", use_sigmoid=False, loss_weight=0.4 + ), + ), + dict( + type="OCRHead", + in_channels=[18, 36, 72, 144], + in_index=(0, 1, 2, 3), + input_transform="resize_concat", + channels=512, + ocr_channels=256, + dropout_ratio=-1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type="CrossEntropyLoss", use_sigmoid=False, loss_weight=1.0 + ), + ), + ], + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode="whole"), +) diff --git a/mmsegmentation/configs/_trash_/_base_/models/ocrnet_r50-d8.py b/mmsegmentation/configs/_trash_/_base_/models/ocrnet_r50-d8.py new file mode 100644 index 0000000..e920b79 --- /dev/null +++ b/mmsegmentation/configs/_trash_/_base_/models/ocrnet_r50-d8.py @@ -0,0 +1,53 @@ +# model settings +norm_cfg = dict(type="SyncBN", requires_grad=True) +model = dict( + type="CascadeEncoderDecoder", + num_stages=2, + pretrained="open-mmlab://resnet50_v1c", + backbone=dict( + type="ResNetV1c", + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style="pytorch", + contract_dilation=True, + ), + decode_head=[ + dict( + type="FCNHead", + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type="CrossEntropyLoss", use_sigmoid=False, loss_weight=0.4 + ), + ), + dict( + type="OCRHead", + in_channels=2048, + in_index=3, + channels=512, + ocr_channels=256, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type="CrossEntropyLoss", use_sigmoid=False, loss_weight=1.0 + ), + ), + ], + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode="whole"), +) diff --git a/mmsegmentation/configs/_trash_/_base_/models/pointrend_r50.py b/mmsegmentation/configs/_trash_/_base_/models/pointrend_r50.py new file mode 100644 index 0000000..25bb6bf --- /dev/null +++ b/mmsegmentation/configs/_trash_/_base_/models/pointrend_r50.py @@ -0,0 +1,57 @@ +# model settings +norm_cfg = dict(type="SyncBN", requires_grad=True) +model = dict( + type="CascadeEncoderDecoder", + num_stages=2, + pretrained="open-mmlab://resnet50_v1c", + backbone=dict( + type="ResNetV1c", + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 1, 1), + strides=(1, 2, 2, 2), + norm_cfg=norm_cfg, + norm_eval=False, + style="pytorch", + contract_dilation=True, + ), + neck=dict( + type="FPN", in_channels=[256, 512, 1024, 2048], out_channels=256, num_outs=4 + ), + decode_head=[ + dict( + type="FPNHead", + in_channels=[256, 256, 256, 256], + in_index=[0, 1, 2, 3], + feature_strides=[4, 8, 16, 32], + channels=128, + dropout_ratio=-1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type="CrossEntropyLoss", use_sigmoid=False, loss_weight=1.0 + ), + ), + dict( + type="PointHead", + in_channels=[256], + in_index=[0], + channels=256, + num_fcs=3, + coarse_pred_each_layer=True, + dropout_ratio=-1, + num_classes=19, + align_corners=False, + loss_decode=dict( + type="CrossEntropyLoss", use_sigmoid=False, loss_weight=1.0 + ), + ), + ], + # model training and testing settings + train_cfg=dict(num_points=2048, oversample_ratio=3, importance_sample_ratio=0.75), + test_cfg=dict( + mode="whole", subdivision_steps=2, subdivision_num_points=8196, scale_factor=2 + ), +) diff --git a/mmsegmentation/configs/_trash_/_base_/models/psanet_r50-d8.py b/mmsegmentation/configs/_trash_/_base_/models/psanet_r50-d8.py new file mode 100644 index 0000000..e9209db --- /dev/null +++ b/mmsegmentation/configs/_trash_/_base_/models/psanet_r50-d8.py @@ -0,0 +1,51 @@ +# model settings +norm_cfg = dict(type="SyncBN", requires_grad=True) +model = dict( + type="EncoderDecoder", + pretrained="open-mmlab://resnet50_v1c", + backbone=dict( + type="ResNetV1c", + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style="pytorch", + contract_dilation=True, + ), + decode_head=dict( + type="PSAHead", + in_channels=2048, + in_index=3, + channels=512, + mask_size=(97, 97), + psa_type="bi-direction", + compact=False, + shrink_factor=2, + normalization_factor=1.0, + psa_softmax=True, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=1.0), + ), + auxiliary_head=dict( + type="FCNHead", + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=0.4), + ), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode="whole"), +) diff --git a/mmsegmentation/configs/_trash_/_base_/models/pspnet_r50-d8.py b/mmsegmentation/configs/_trash_/_base_/models/pspnet_r50-d8.py new file mode 100644 index 0000000..136d6ff --- /dev/null +++ b/mmsegmentation/configs/_trash_/_base_/models/pspnet_r50-d8.py @@ -0,0 +1,46 @@ +# model settings +norm_cfg = dict(type="SyncBN", requires_grad=True) +model = dict( + type="EncoderDecoder", + pretrained="open-mmlab://resnet50_v1c", + backbone=dict( + type="ResNetV1c", + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style="pytorch", + contract_dilation=True, + ), + decode_head=dict( + type="PSPHead", + in_channels=2048, + in_index=3, + channels=512, + pool_scales=(1, 2, 3, 6), + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=1.0), + ), + auxiliary_head=dict( + type="FCNHead", + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=0.4), + ), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode="whole"), +) diff --git a/mmsegmentation/configs/_trash_/_base_/models/pspnet_unet_s5-d16.py b/mmsegmentation/configs/_trash_/_base_/models/pspnet_unet_s5-d16.py new file mode 100644 index 0000000..4e8f915 --- /dev/null +++ b/mmsegmentation/configs/_trash_/_base_/models/pspnet_unet_s5-d16.py @@ -0,0 +1,52 @@ +# model settings +norm_cfg = dict(type="SyncBN", requires_grad=True) +model = dict( + type="EncoderDecoder", + pretrained=None, + backbone=dict( + type="UNet", + in_channels=3, + base_channels=64, + num_stages=5, + strides=(1, 1, 1, 1, 1), + enc_num_convs=(2, 2, 2, 2, 2), + dec_num_convs=(2, 2, 2, 2), + downsamples=(True, True, True, True), + enc_dilations=(1, 1, 1, 1, 1), + dec_dilations=(1, 1, 1, 1), + with_cp=False, + conv_cfg=None, + norm_cfg=norm_cfg, + act_cfg=dict(type="ReLU"), + upsample_cfg=dict(type="InterpConv"), + norm_eval=False, + ), + decode_head=dict( + type="PSPHead", + in_channels=64, + in_index=4, + channels=16, + pool_scales=(1, 2, 3, 6), + dropout_ratio=0.1, + num_classes=2, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=1.0), + ), + auxiliary_head=dict( + type="FCNHead", + in_channels=128, + in_index=3, + channels=64, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=2, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=0.4), + ), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode="slide", crop_size=(256, 256), stride=(170, 170)), +) diff --git a/mmsegmentation/configs/_trash_/_base_/models/segformer_mit-b0.py b/mmsegmentation/configs/_trash_/_base_/models/segformer_mit-b0.py new file mode 100644 index 0000000..cd90c1e --- /dev/null +++ b/mmsegmentation/configs/_trash_/_base_/models/segformer_mit-b0.py @@ -0,0 +1,36 @@ +# model settings +norm_cfg = dict(type="SyncBN", requires_grad=True) +model = dict( + type="EncoderDecoder", + pretrained=None, + backbone=dict( + type="MixVisionTransformer", + in_channels=3, + embed_dims=32, + num_stages=4, + num_layers=[2, 2, 2, 2], + num_heads=[1, 2, 5, 8], + patch_sizes=[7, 3, 3, 3], + sr_ratios=[8, 4, 2, 1], + out_indices=(0, 1, 2, 3), + mlp_ratio=4, + qkv_bias=True, + drop_rate=0.0, + attn_drop_rate=0.0, + drop_path_rate=0.1, + ), + decode_head=dict( + type="SegformerHead", + in_channels=[32, 64, 160, 256], + in_index=[0, 1, 2, 3], + channels=256, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=1.0), + ), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode="whole"), +) diff --git a/mmsegmentation/configs/_trash_/_base_/models/segmenter_vit-b16_mask.py b/mmsegmentation/configs/_trash_/_base_/models/segmenter_vit-b16_mask.py new file mode 100644 index 0000000..4f5f235 --- /dev/null +++ b/mmsegmentation/configs/_trash_/_base_/models/segmenter_vit-b16_mask.py @@ -0,0 +1,35 @@ +checkpoint = "https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/segmenter/vit_base_p16_384_20220308-96dfe169.pth" # noqa +# model settings +backbone_norm_cfg = dict(type="LN", eps=1e-6, requires_grad=True) +model = dict( + type="EncoderDecoder", + pretrained=checkpoint, + backbone=dict( + type="VisionTransformer", + img_size=(512, 512), + patch_size=16, + in_channels=3, + embed_dims=768, + num_layers=12, + num_heads=12, + drop_path_rate=0.1, + attn_drop_rate=0.0, + drop_rate=0.0, + final_norm=True, + norm_cfg=backbone_norm_cfg, + with_cls_token=True, + interpolate_mode="bicubic", + ), + decode_head=dict( + type="SegmenterMaskTransformerHead", + in_channels=768, + channels=768, + num_classes=150, + num_layers=2, + num_heads=12, + embed_dims=768, + dropout_ratio=0.0, + loss_decode=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=1.0), + ), + test_cfg=dict(mode="slide", crop_size=(512, 512), stride=(480, 480)), +) diff --git a/mmsegmentation/configs/_trash_/_base_/models/setr_mla.py b/mmsegmentation/configs/_trash_/_base_/models/setr_mla.py new file mode 100644 index 0000000..0779002 --- /dev/null +++ b/mmsegmentation/configs/_trash_/_base_/models/setr_mla.py @@ -0,0 +1,104 @@ +# model settings +backbone_norm_cfg = dict(type="LN", eps=1e-6, requires_grad=True) +norm_cfg = dict(type="SyncBN", requires_grad=True) +model = dict( + type="EncoderDecoder", + pretrained="pretrain/jx_vit_large_p16_384-b3be5167.pth", + backbone=dict( + type="VisionTransformer", + img_size=(768, 768), + patch_size=16, + in_channels=3, + embed_dims=1024, + num_layers=24, + num_heads=16, + out_indices=(5, 11, 17, 23), + drop_rate=0.1, + norm_cfg=backbone_norm_cfg, + with_cls_token=False, + interpolate_mode="bilinear", + ), + neck=dict( + type="MLANeck", + in_channels=[1024, 1024, 1024, 1024], + out_channels=256, + norm_cfg=norm_cfg, + act_cfg=dict(type="ReLU"), + ), + decode_head=dict( + type="SETRMLAHead", + in_channels=(256, 256, 256, 256), + channels=512, + in_index=(0, 1, 2, 3), + dropout_ratio=0, + mla_channels=128, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=1.0), + ), + auxiliary_head=[ + dict( + type="FCNHead", + in_channels=256, + channels=256, + in_index=0, + dropout_ratio=0, + num_convs=0, + kernel_size=1, + concat_input=False, + num_classes=19, + align_corners=False, + loss_decode=dict( + type="CrossEntropyLoss", use_sigmoid=False, loss_weight=0.4 + ), + ), + dict( + type="FCNHead", + in_channels=256, + channels=256, + in_index=1, + dropout_ratio=0, + num_convs=0, + kernel_size=1, + concat_input=False, + num_classes=19, + align_corners=False, + loss_decode=dict( + type="CrossEntropyLoss", use_sigmoid=False, loss_weight=0.4 + ), + ), + dict( + type="FCNHead", + in_channels=256, + channels=256, + in_index=2, + dropout_ratio=0, + num_convs=0, + kernel_size=1, + concat_input=False, + num_classes=19, + align_corners=False, + loss_decode=dict( + type="CrossEntropyLoss", use_sigmoid=False, loss_weight=0.4 + ), + ), + dict( + type="FCNHead", + in_channels=256, + channels=256, + in_index=3, + dropout_ratio=0, + num_convs=0, + kernel_size=1, + concat_input=False, + num_classes=19, + align_corners=False, + loss_decode=dict( + type="CrossEntropyLoss", use_sigmoid=False, loss_weight=0.4 + ), + ), + ], + train_cfg=dict(), + test_cfg=dict(mode="whole"), +) diff --git a/mmsegmentation/configs/_trash_/_base_/models/setr_naive.py b/mmsegmentation/configs/_trash_/_base_/models/setr_naive.py new file mode 100644 index 0000000..cd817ef --- /dev/null +++ b/mmsegmentation/configs/_trash_/_base_/models/setr_naive.py @@ -0,0 +1,87 @@ +# model settings +backbone_norm_cfg = dict(type="LN", eps=1e-6, requires_grad=True) +norm_cfg = dict(type="SyncBN", requires_grad=True) +model = dict( + type="EncoderDecoder", + pretrained="pretrain/jx_vit_large_p16_384-b3be5167.pth", + backbone=dict( + type="VisionTransformer", + img_size=(768, 768), + patch_size=16, + in_channels=3, + embed_dims=1024, + num_layers=24, + num_heads=16, + out_indices=(9, 14, 19, 23), + drop_rate=0.1, + norm_cfg=backbone_norm_cfg, + with_cls_token=True, + interpolate_mode="bilinear", + ), + decode_head=dict( + type="SETRUPHead", + in_channels=1024, + channels=256, + in_index=3, + num_classes=19, + dropout_ratio=0, + norm_cfg=norm_cfg, + num_convs=1, + up_scale=4, + kernel_size=1, + align_corners=False, + loss_decode=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=1.0), + ), + auxiliary_head=[ + dict( + type="SETRUPHead", + in_channels=1024, + channels=256, + in_index=0, + num_classes=19, + dropout_ratio=0, + norm_cfg=norm_cfg, + num_convs=1, + up_scale=4, + kernel_size=1, + align_corners=False, + loss_decode=dict( + type="CrossEntropyLoss", use_sigmoid=False, loss_weight=0.4 + ), + ), + dict( + type="SETRUPHead", + in_channels=1024, + channels=256, + in_index=1, + num_classes=19, + dropout_ratio=0, + norm_cfg=norm_cfg, + num_convs=1, + up_scale=4, + kernel_size=1, + align_corners=False, + loss_decode=dict( + type="CrossEntropyLoss", use_sigmoid=False, loss_weight=0.4 + ), + ), + dict( + type="SETRUPHead", + in_channels=1024, + channels=256, + in_index=2, + num_classes=19, + dropout_ratio=0, + norm_cfg=norm_cfg, + num_convs=1, + up_scale=4, + kernel_size=1, + align_corners=False, + loss_decode=dict( + type="CrossEntropyLoss", use_sigmoid=False, loss_weight=0.4 + ), + ), + ], + train_cfg=dict(), + test_cfg=dict(mode="whole"), +) diff --git a/mmsegmentation/configs/_trash_/_base_/models/setr_pup.py b/mmsegmentation/configs/_trash_/_base_/models/setr_pup.py new file mode 100644 index 0000000..8a9ffbe --- /dev/null +++ b/mmsegmentation/configs/_trash_/_base_/models/setr_pup.py @@ -0,0 +1,87 @@ +# model settings +backbone_norm_cfg = dict(type="LN", eps=1e-6, requires_grad=True) +norm_cfg = dict(type="SyncBN", requires_grad=True) +model = dict( + type="EncoderDecoder", + pretrained="pretrain/jx_vit_large_p16_384-b3be5167.pth", + backbone=dict( + type="VisionTransformer", + img_size=(768, 768), + patch_size=16, + in_channels=3, + embed_dims=1024, + num_layers=24, + num_heads=16, + out_indices=(9, 14, 19, 23), + drop_rate=0.1, + norm_cfg=backbone_norm_cfg, + with_cls_token=True, + interpolate_mode="bilinear", + ), + decode_head=dict( + type="SETRUPHead", + in_channels=1024, + channels=256, + in_index=3, + num_classes=19, + dropout_ratio=0, + norm_cfg=norm_cfg, + num_convs=4, + up_scale=2, + kernel_size=3, + align_corners=False, + loss_decode=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=1.0), + ), + auxiliary_head=[ + dict( + type="SETRUPHead", + in_channels=1024, + channels=256, + in_index=0, + num_classes=19, + dropout_ratio=0, + norm_cfg=norm_cfg, + num_convs=1, + up_scale=4, + kernel_size=3, + align_corners=False, + loss_decode=dict( + type="CrossEntropyLoss", use_sigmoid=False, loss_weight=0.4 + ), + ), + dict( + type="SETRUPHead", + in_channels=1024, + channels=256, + in_index=1, + num_classes=19, + dropout_ratio=0, + norm_cfg=norm_cfg, + num_convs=1, + up_scale=4, + kernel_size=3, + align_corners=False, + loss_decode=dict( + type="CrossEntropyLoss", use_sigmoid=False, loss_weight=0.4 + ), + ), + dict( + type="SETRUPHead", + in_channels=1024, + channels=256, + in_index=2, + num_classes=19, + dropout_ratio=0, + norm_cfg=norm_cfg, + num_convs=1, + up_scale=4, + kernel_size=3, + align_corners=False, + loss_decode=dict( + type="CrossEntropyLoss", use_sigmoid=False, loss_weight=0.4 + ), + ), + ], + train_cfg=dict(), + test_cfg=dict(mode="whole"), +) diff --git a/mmsegmentation/configs/_trash_/_base_/models/stdc.py b/mmsegmentation/configs/_trash_/_base_/models/stdc.py new file mode 100644 index 0000000..66ba763 --- /dev/null +++ b/mmsegmentation/configs/_trash_/_base_/models/stdc.py @@ -0,0 +1,92 @@ +norm_cfg = dict(type="BN", requires_grad=True) +model = dict( + type="EncoderDecoder", + pretrained=None, + backbone=dict( + type="STDCContextPathNet", + backbone_cfg=dict( + type="STDCNet", + stdc_type="STDCNet1", + in_channels=3, + channels=(32, 64, 256, 512, 1024), + bottleneck_type="cat", + num_convs=4, + norm_cfg=norm_cfg, + act_cfg=dict(type="ReLU"), + with_final_conv=False, + ), + last_in_channels=(1024, 512), + out_channels=128, + ffm_cfg=dict(in_channels=384, out_channels=256, scale_factor=4), + ), + decode_head=dict( + type="FCNHead", + in_channels=256, + channels=256, + num_convs=1, + num_classes=19, + in_index=3, + concat_input=False, + dropout_ratio=0.1, + norm_cfg=norm_cfg, + align_corners=True, + sampler=dict(type="OHEMPixelSampler", thresh=0.7, min_kept=10000), + loss_decode=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=1.0), + ), + auxiliary_head=[ + dict( + type="FCNHead", + in_channels=128, + channels=64, + num_convs=1, + num_classes=19, + in_index=2, + norm_cfg=norm_cfg, + concat_input=False, + align_corners=False, + sampler=dict(type="OHEMPixelSampler", thresh=0.7, min_kept=10000), + loss_decode=dict( + type="CrossEntropyLoss", use_sigmoid=False, loss_weight=1.0 + ), + ), + dict( + type="FCNHead", + in_channels=128, + channels=64, + num_convs=1, + num_classes=19, + in_index=1, + norm_cfg=norm_cfg, + concat_input=False, + align_corners=False, + sampler=dict(type="OHEMPixelSampler", thresh=0.7, min_kept=10000), + loss_decode=dict( + type="CrossEntropyLoss", use_sigmoid=False, loss_weight=1.0 + ), + ), + dict( + type="STDCHead", + in_channels=256, + channels=64, + num_convs=1, + num_classes=2, + boundary_threshold=0.1, + in_index=0, + norm_cfg=norm_cfg, + concat_input=False, + align_corners=True, + loss_decode=[ + dict( + type="CrossEntropyLoss", + loss_name="loss_ce", + use_sigmoid=True, + loss_weight=1.0, + ), + dict(type="DiceLoss", loss_name="loss_dice", loss_weight=1.0), + ], + ), + ], + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode="whole"), +) diff --git a/mmsegmentation/configs/_trash_/_base_/models/twins_pcpvt-s_fpn.py b/mmsegmentation/configs/_trash_/_base_/models/twins_pcpvt-s_fpn.py new file mode 100644 index 0000000..ebea511 --- /dev/null +++ b/mmsegmentation/configs/_trash_/_base_/models/twins_pcpvt-s_fpn.py @@ -0,0 +1,45 @@ +checkpoint = "https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/twins/pcpvt_small_20220308-e638c41c.pth" # noqa + +# model settings +backbone_norm_cfg = dict(type="LN") +norm_cfg = dict(type="SyncBN", requires_grad=True) +model = dict( + type="EncoderDecoder", + backbone=dict( + type="PCPVT", + init_cfg=dict(type="Pretrained", checkpoint=checkpoint), + in_channels=3, + embed_dims=[64, 128, 320, 512], + num_heads=[1, 2, 5, 8], + patch_sizes=[4, 2, 2, 2], + strides=[4, 2, 2, 2], + mlp_ratios=[8, 8, 4, 4], + out_indices=(0, 1, 2, 3), + qkv_bias=True, + norm_cfg=backbone_norm_cfg, + depths=[3, 4, 6, 3], + sr_ratios=[8, 4, 2, 1], + norm_after_stage=False, + drop_rate=0.0, + attn_drop_rate=0.0, + drop_path_rate=0.2, + ), + neck=dict( + type="FPN", in_channels=[64, 128, 320, 512], out_channels=256, num_outs=4 + ), + decode_head=dict( + type="FPNHead", + in_channels=[256, 256, 256, 256], + in_index=[0, 1, 2, 3], + feature_strides=[4, 8, 16, 32], + channels=128, + dropout_ratio=0.1, + num_classes=150, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=1.0), + ), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode="whole"), +) diff --git a/mmsegmentation/configs/_trash_/_base_/models/twins_pcpvt-s_upernet.py b/mmsegmentation/configs/_trash_/_base_/models/twins_pcpvt-s_upernet.py new file mode 100644 index 0000000..8b067a4 --- /dev/null +++ b/mmsegmentation/configs/_trash_/_base_/models/twins_pcpvt-s_upernet.py @@ -0,0 +1,55 @@ +checkpoint = "https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/twins/pcpvt_small_20220308-e638c41c.pth" # noqa + +# model settings +backbone_norm_cfg = dict(type="LN") +norm_cfg = dict(type="SyncBN", requires_grad=True) +model = dict( + type="EncoderDecoder", + backbone=dict( + type="PCPVT", + init_cfg=dict(type="Pretrained", checkpoint=checkpoint), + in_channels=3, + embed_dims=[64, 128, 320, 512], + num_heads=[1, 2, 5, 8], + patch_sizes=[4, 2, 2, 2], + strides=[4, 2, 2, 2], + mlp_ratios=[8, 8, 4, 4], + out_indices=(0, 1, 2, 3), + qkv_bias=True, + norm_cfg=backbone_norm_cfg, + depths=[3, 4, 6, 3], + sr_ratios=[8, 4, 2, 1], + norm_after_stage=False, + drop_rate=0.0, + attn_drop_rate=0.0, + drop_path_rate=0.2, + ), + decode_head=dict( + type="UPerHead", + in_channels=[64, 128, 320, 512], + in_index=[0, 1, 2, 3], + pool_scales=(1, 2, 3, 6), + channels=512, + dropout_ratio=0.1, + num_classes=150, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=1.0), + ), + auxiliary_head=dict( + type="FCNHead", + in_channels=320, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=150, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=0.4), + ), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode="whole"), +) diff --git a/mmsegmentation/configs/_trash_/_base_/models/upernet_beit.py b/mmsegmentation/configs/_trash_/_base_/models/upernet_beit.py new file mode 100644 index 0000000..73795ba --- /dev/null +++ b/mmsegmentation/configs/_trash_/_base_/models/upernet_beit.py @@ -0,0 +1,52 @@ +norm_cfg = dict(type="SyncBN", requires_grad=True) +model = dict( + type="EncoderDecoder", + pretrained=None, + backbone=dict( + type="BEiT", + img_size=(640, 640), + patch_size=16, + in_channels=3, + embed_dims=768, + num_layers=12, + num_heads=12, + mlp_ratio=4, + out_indices=(3, 5, 7, 11), + qv_bias=True, + attn_drop_rate=0.0, + drop_path_rate=0.1, + norm_cfg=dict(type="LN", eps=1e-6), + act_cfg=dict(type="GELU"), + norm_eval=False, + init_values=0.1, + ), + neck=dict(type="Feature2Pyramid", embed_dim=768, rescales=[4, 2, 1, 0.5]), + decode_head=dict( + type="UPerHead", + in_channels=[768, 768, 768, 768], + in_index=[0, 1, 2, 3], + pool_scales=(1, 2, 3, 6), + channels=768, + dropout_ratio=0.1, + num_classes=150, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=1.0), + ), + auxiliary_head=dict( + type="FCNHead", + in_channels=768, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=150, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=0.4), + ), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode="whole"), +) diff --git a/mmsegmentation/configs/_trash_/_base_/models/upernet_convnext.py b/mmsegmentation/configs/_trash_/_base_/models/upernet_convnext.py new file mode 100644 index 0000000..655579b --- /dev/null +++ b/mmsegmentation/configs/_trash_/_base_/models/upernet_convnext.py @@ -0,0 +1,46 @@ +norm_cfg = dict(type="SyncBN", requires_grad=True) +custom_imports = dict(imports="mmcls.models", allow_failed_imports=False) +checkpoint_file = "https://download.openmmlab.com/mmclassification/v0/convnext/downstream/convnext-base_3rdparty_32xb128-noema_in1k_20220301-2a0ee547.pth" # noqa +model = dict( + type="EncoderDecoder", + pretrained=None, + backbone=dict( + type="mmcls.ConvNeXt", + arch="base", + out_indices=[0, 1, 2, 3], + drop_path_rate=0.4, + layer_scale_init_value=1.0, + gap_before_final_norm=False, + init_cfg=dict( + type="Pretrained", checkpoint=checkpoint_file, prefix="backbone." + ), + ), + decode_head=dict( + type="UPerHead", + in_channels=[128, 256, 512, 1024], + in_index=[0, 1, 2, 3], + pool_scales=(1, 2, 3, 6), + channels=512, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=1.0), + ), + auxiliary_head=dict( + type="FCNHead", + in_channels=384, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=0.4), + ), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode="whole"), +) diff --git a/mmsegmentation/configs/_trash_/_base_/models/upernet_mae.py b/mmsegmentation/configs/_trash_/_base_/models/upernet_mae.py new file mode 100644 index 0000000..8a3cfbc --- /dev/null +++ b/mmsegmentation/configs/_trash_/_base_/models/upernet_mae.py @@ -0,0 +1,51 @@ +norm_cfg = dict(type="SyncBN", requires_grad=True) +model = dict( + type="EncoderDecoder", + pretrained=None, + backbone=dict( + type="MAE", + img_size=(640, 640), + patch_size=16, + in_channels=3, + embed_dims=768, + num_layers=12, + num_heads=12, + mlp_ratio=4, + out_indices=(3, 5, 7, 11), + attn_drop_rate=0.0, + drop_path_rate=0.1, + norm_cfg=dict(type="LN", eps=1e-6), + act_cfg=dict(type="GELU"), + norm_eval=False, + init_values=0.1, + ), + neck=dict(type="Feature2Pyramid", embed_dim=768, rescales=[4, 2, 1, 0.5]), + decode_head=dict( + type="UPerHead", + in_channels=[384, 384, 384, 384], + in_index=[0, 1, 2, 3], + pool_scales=(1, 2, 3, 6), + channels=512, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=1.0), + ), + auxiliary_head=dict( + type="FCNHead", + in_channels=384, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=0.4), + ), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode="whole"), +) diff --git a/mmsegmentation/configs/_trash_/_base_/models/upernet_r50.py b/mmsegmentation/configs/_trash_/_base_/models/upernet_r50.py new file mode 100644 index 0000000..9ca030e --- /dev/null +++ b/mmsegmentation/configs/_trash_/_base_/models/upernet_r50.py @@ -0,0 +1,46 @@ +# model settings +norm_cfg = dict(type="SyncBN", requires_grad=True) +model = dict( + type="EncoderDecoder", + pretrained="open-mmlab://resnet50_v1c", + backbone=dict( + type="ResNetV1c", + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 1, 1), + strides=(1, 2, 2, 2), + norm_cfg=norm_cfg, + norm_eval=False, + style="pytorch", + contract_dilation=True, + ), + decode_head=dict( + type="UPerHead", + in_channels=[256, 512, 1024, 2048], + in_index=[0, 1, 2, 3], + pool_scales=(1, 2, 3, 6), + channels=512, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=1.0), + ), + auxiliary_head=dict( + type="FCNHead", + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=0.4), + ), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode="whole"), +) diff --git a/mmsegmentation/configs/_trash_/_base_/models/upernet_swin.py b/mmsegmentation/configs/_trash_/_base_/models/upernet_swin.py new file mode 100644 index 0000000..dbc9795 --- /dev/null +++ b/mmsegmentation/configs/_trash_/_base_/models/upernet_swin.py @@ -0,0 +1,56 @@ +# model settings +norm_cfg = dict(type="SyncBN", requires_grad=True) +backbone_norm_cfg = dict(type="LN", requires_grad=True) +model = dict( + type="EncoderDecoder", + pretrained=None, + backbone=dict( + type="SwinTransformer", + pretrain_img_size=224, + embed_dims=96, + patch_size=4, + window_size=7, + mlp_ratio=4, + depths=[2, 2, 6, 2], + num_heads=[3, 6, 12, 24], + strides=(4, 2, 2, 2), + out_indices=(0, 1, 2, 3), + qkv_bias=True, + qk_scale=None, + patch_norm=True, + drop_rate=0.0, + attn_drop_rate=0.0, + drop_path_rate=0.3, + use_abs_pos_embed=False, + act_cfg=dict(type="GELU"), + norm_cfg=backbone_norm_cfg, + ), + decode_head=dict( + type="UPerHead", + in_channels=[96, 192, 384, 768], + in_index=[0, 1, 2, 3], + pool_scales=(1, 2, 3, 6), + channels=512, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=1.0), + ), + auxiliary_head=dict( + type="FCNHead", + in_channels=384, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=0.4), + ), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode="whole"), +) diff --git a/mmsegmentation/configs/_trash_/_base_/models/upernet_vit-b16_ln_mln.py b/mmsegmentation/configs/_trash_/_base_/models/upernet_vit-b16_ln_mln.py new file mode 100644 index 0000000..c9ed2cd --- /dev/null +++ b/mmsegmentation/configs/_trash_/_base_/models/upernet_vit-b16_ln_mln.py @@ -0,0 +1,60 @@ +# model settings +norm_cfg = dict(type="SyncBN", requires_grad=True) +model = dict( + type="EncoderDecoder", + pretrained="pretrain/jx_vit_base_p16_224-80ecf9dd.pth", + backbone=dict( + type="VisionTransformer", + img_size=(512, 512), + patch_size=16, + in_channels=3, + embed_dims=768, + num_layers=12, + num_heads=12, + mlp_ratio=4, + out_indices=(2, 5, 8, 11), + qkv_bias=True, + drop_rate=0.0, + attn_drop_rate=0.0, + drop_path_rate=0.0, + with_cls_token=True, + norm_cfg=dict(type="LN", eps=1e-6), + act_cfg=dict(type="GELU"), + norm_eval=False, + interpolate_mode="bicubic", + ), + neck=dict( + type="MultiLevelNeck", + in_channels=[768, 768, 768, 768], + out_channels=768, + scales=[4, 2, 1, 0.5], + ), + decode_head=dict( + type="UPerHead", + in_channels=[768, 768, 768, 768], + in_index=[0, 1, 2, 3], + pool_scales=(1, 2, 3, 6), + channels=512, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=1.0), + ), + auxiliary_head=dict( + type="FCNHead", + in_channels=768, + in_index=3, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=0.4), + ), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode="whole"), +) # yapf: disable diff --git a/mmsegmentation/configs/_trash_/_base_/schedules/schedule_160k.py b/mmsegmentation/configs/_trash_/_base_/schedules/schedule_160k.py new file mode 100644 index 0000000..b8107b1 --- /dev/null +++ b/mmsegmentation/configs/_trash_/_base_/schedules/schedule_160k.py @@ -0,0 +1,9 @@ +# optimizer +optimizer = dict(type="SGD", lr=0.01, momentum=0.9, weight_decay=0.0005) +optimizer_config = dict() +# learning policy +lr_config = dict(policy="poly", power=0.9, min_lr=1e-4, by_epoch=False) +# runtime settings +runner = dict(type="IterBasedRunner", max_iters=160000) +checkpoint_config = dict(by_epoch=False, interval=16000) +evaluation = dict(interval=16000, metric="mIoU", pre_eval=True) diff --git a/mmsegmentation/configs/_trash_/_base_/schedules/schedule_20k.py b/mmsegmentation/configs/_trash_/_base_/schedules/schedule_20k.py new file mode 100644 index 0000000..c4db396 --- /dev/null +++ b/mmsegmentation/configs/_trash_/_base_/schedules/schedule_20k.py @@ -0,0 +1,9 @@ +# optimizer +optimizer = dict(type="SGD", lr=0.01, momentum=0.9, weight_decay=0.0005) +optimizer_config = dict() +# learning policy +lr_config = dict(policy="poly", power=0.9, min_lr=1e-4, by_epoch=False) +# runtime settings +runner = dict(type="IterBasedRunner", max_iters=20000) +checkpoint_config = dict(by_epoch=False, interval=2000) +evaluation = dict(interval=2000, metric="mIoU", pre_eval=True) diff --git a/mmsegmentation/configs/_trash_/_base_/schedules/schedule_320k.py b/mmsegmentation/configs/_trash_/_base_/schedules/schedule_320k.py new file mode 100644 index 0000000..c81d6c5 --- /dev/null +++ b/mmsegmentation/configs/_trash_/_base_/schedules/schedule_320k.py @@ -0,0 +1,9 @@ +# optimizer +optimizer = dict(type="SGD", lr=0.01, momentum=0.9, weight_decay=0.0005) +optimizer_config = dict() +# learning policy +lr_config = dict(policy="poly", power=0.9, min_lr=1e-4, by_epoch=False) +# runtime settings +runner = dict(type="IterBasedRunner", max_iters=320000) +checkpoint_config = dict(by_epoch=False, interval=32000) +evaluation = dict(interval=32000, metric="mIoU") diff --git a/mmsegmentation/configs/_trash_/_base_/schedules/schedule_40k.py b/mmsegmentation/configs/_trash_/_base_/schedules/schedule_40k.py new file mode 100644 index 0000000..71ac480 --- /dev/null +++ b/mmsegmentation/configs/_trash_/_base_/schedules/schedule_40k.py @@ -0,0 +1,9 @@ +# optimizer +optimizer = dict(type="SGD", lr=0.01, momentum=0.9, weight_decay=0.0005) +optimizer_config = dict() +# learning policy +lr_config = dict(policy="poly", power=0.9, min_lr=1e-4, by_epoch=False) +# runtime settings +runner = dict(type="IterBasedRunner", max_iters=40000) +checkpoint_config = dict(by_epoch=False, interval=4000) +evaluation = dict(interval=4000, metric="mIoU", pre_eval=True) diff --git a/mmsegmentation/configs/_trash_/_base_/schedules/schedule_80k.py b/mmsegmentation/configs/_trash_/_base_/schedules/schedule_80k.py new file mode 100644 index 0000000..6dcf875 --- /dev/null +++ b/mmsegmentation/configs/_trash_/_base_/schedules/schedule_80k.py @@ -0,0 +1,9 @@ +# optimizer +optimizer = dict(type="SGD", lr=0.01, momentum=0.9, weight_decay=0.0005) +optimizer_config = dict() +# learning policy +lr_config = dict(policy="poly", power=0.9, min_lr=1e-4, by_epoch=False) +# runtime settings +runner = dict(type="IterBasedRunner", max_iters=80000) +checkpoint_config = dict(by_epoch=False, interval=8000) +evaluation = dict(interval=8000, metric="mIoU", pre_eval=True) diff --git a/mmsegmentation/configs/_trash_/hrnet/README.md b/mmsegmentation/configs/_trash_/hrnet/README.md new file mode 100644 index 0000000..9ebbf4d --- /dev/null +++ b/mmsegmentation/configs/_trash_/hrnet/README.md @@ -0,0 +1,122 @@ +# HRNet + +[Deep High-Resolution Representation Learning for Human Pose Estimation](https://arxiv.org/abs/1908.07919) + +## Introduction + + + +Official Repo + +Code Snippet + +## Abstract + + + +High-resolution representations are essential for position-sensitive vision problems, such as human pose estimation, semantic segmentation, and object detection. Existing state-of-the-art frameworks first encode the input image as a low-resolution representation through a subnetwork that is formed by connecting high-to-low resolution convolutions \\emph{in series} (e.g., ResNet, VGGNet), and then recover the high-resolution representation from the encoded low-resolution representation. Instead, our proposed network, named as High-Resolution Network (HRNet), maintains high-resolution representations through the whole process. There are two key characteristics: (i) Connect the high-to-low resolution convolution streams \\emph{in parallel}; (ii) Repeatedly exchange the information across resolutions. The benefit is that the resulting representation is semantically richer and spatially more precise. We show the superiority of the proposed HRNet in a wide range of applications, including human pose estimation, semantic segmentation, and object detection, suggesting that the HRNet is a stronger backbone for computer vision problems. All the codes are available at [this https URL](https://github.com/HRNet). + + + +