update first

CzJaewan · Jul 20, 2022 · 5481b91 · 5481b91
1 parent 02043a3
commit 5481b91
Show file tree

Hide file tree

Showing 92 changed files with 14,579 additions and 1 deletion.
diff --git a/LICENSE b/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2020 Gongfan Fang
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/README.md b/README.md
@@ -1 +1,230 @@
-# deeplabv3_pytorch-ade20k
+# DeepLabv3Plus-Pytorch
+
+DeepLabv3, DeepLabv3+ with pretrained models for Pascal VOC & Cityscapes.
+
+## Quick Start 
+
+### 1. Available Architectures
+Specify the model architecture with '--model ARCH_NAME' and set the output stride using '--output_stride OUTPUT_STRIDE'.
+
+| DeepLabV3    |  DeepLabV3+        |
+| :---: | :---:     |
+|deeplabv3_resnet50|deeplabv3plus_resnet50|
+|deeplabv3_resnet101|deeplabv3plus_resnet101|
+|deeplabv3_mobilenet|deeplabv3plus_mobilenet ||
+|deeplabv3_hrnetv2_48 | deeplabv3plus_hrnetv2_48 |
+|deeplabv3_hrnetv2_32 | deeplabv3plus_hrnetv2_32 |
+
+All pretrained models: [Dropbox](https://www.dropbox.com/sh/w3z9z8lqpi8b2w7/AAB0vkl4F5vy6HdIhmRCTKHSa?dl=0), [Tencent Weiyun](https://share.weiyun.com/qqx78Pv5)
+
+Note: The HRNet backbone was contributed by @timothylimyl. A pre-trained backbone is available at [google drive](https://drive.google.com/file/d/1NxCK7Zgn5PmeS7W1jYLt5J9E0RRZ2oyF/view?usp=sharing).
+
+### 2. Load the pretrained model:
+```python
+model.load_state_dict( torch.load( CKPT_PATH )['model_state']  )
+```
+### 3. Visualize segmentation outputs:
+```python
+outputs = model(images)
+preds = outputs.max(1)[1].detach().cpu().numpy()
+colorized_preds = val_dst.decode_target(preds).astype('uint8') # To RGB images, (N, H, W, 3), ranged 0~255, numpy array
+# Do whatever you like here with the colorized segmentation maps
+colorized_preds = Image.fromarray(colorized_preds[0]) # to PIL Image
+```
+
+### 4. Atrous Separable Convolution
+
+**Note**: pre-trained models in this repo **do not** use Seperable Conv.
+
+Atrous Separable Convolution is supported in this repo. We provide a simple tool ``network.convert_to_separable_conv`` to convert ``nn.Conv2d`` to ``AtrousSeparableConvolution``. **Please run main.py with '--separable_conv' if it is required**. See 'main.py' and 'network/_deeplab.py' for more details. 
+
+### 5. Prediction
+Single image:
+```bash
+python predict.py --input datasets/data/cityscapes/leftImg8bit/train/bremen/bremen_000000_000019_leftImg8bit.png  --dataset cityscapes --model deeplabv3plus_mobilenet --ckpt checkpoints/best_deeplabv3plus_mobilenet_cityscapes_os16.pth --save_val_results_to test_results
+```
+
+Image folder:
+```bash
+python predict.py --input datasets/data/cityscapes/leftImg8bit/train/bremen  --dataset cityscapes --model deeplabv3plus_mobilenet --ckpt checkpoints/best_deeplabv3plus_mobilenet_cityscapes_os16.pth --save_val_results_to test_results
+```
+
+## Results
+
+### 1. Performance on Pascal VOC2012 Aug (21 classes, 513 x 513)
+
+Training: 513x513 random crop  
+validation: 513x513 center crop
+
+|  Model          | Batch Size  | FLOPs  | train/val OS   |  mIoU        | Dropbox  | Tencent Weiyun  | 
+| :--------        | :-------------: | :----:   | :-----------: | :--------: | :--------: | :----:   |
+| DeepLabV3-MobileNet       | 16      |  6.0G      |   16/16  |  0.701     |    [Download](https://www.dropbox.com/s/uhksxwfcim3nkpo/best_deeplabv3_mobilenet_voc_os16.pth?dl=0)       | [Download](https://share.weiyun.com/A4ubD1DD) |
+| DeepLabV3-ResNet50         | 16      |  51.4G     |  16/16   |  0.769     |    [Download](https://www.dropbox.com/s/3eag5ojccwiexkq/best_deeplabv3_resnet50_voc_os16.pth?dl=0) | [Download](https://share.weiyun.com/33eLjnVL) |
+| DeepLabV3-ResNet101         | 16      |  72.1G     |  16/16   |  0.773     |    [Download](https://www.dropbox.com/s/vtenndnsrnh4068/best_deeplabv3_resnet101_voc_os16.pth?dl=0)       | [Download](https://share.weiyun.com/iCkzATAw)  |
+| DeepLabV3Plus-MobileNet   | 16      |  17.0G      |  16/16   |  0.711    |    [Download](https://www.dropbox.com/s/0idrhwz6opaj7q4/best_deeplabv3plus_mobilenet_voc_os16.pth?dl=0)   | [Download](https://share.weiyun.com/djX6MDwM) |
+| DeepLabV3Plus-ResNet50    | 16      |   62.7G     |  16/16   |  0.772     |    [Download](https://www.dropbox.com/s/dgxyd3jkyz24voa/best_deeplabv3plus_resnet50_voc_os16.pth?dl=0)   | [Download](https://share.weiyun.com/uTM4i2jG) |
+| DeepLabV3Plus-ResNet101     | 16      |  83.4G     |  16/16   |  0.783     |    [Download](https://www.dropbox.com/s/bm3hxe7wmakaqc5/best_deeplabv3plus_resnet101_voc_os16.pth?dl=0)   | [Download](https://share.weiyun.com/UNPZr3dk) |
+
+
+### 2. Performance on Cityscapes (19 classes, 1024 x 2048)
+
+Training: 768x768 random crop  
+validation: 1024x2048
+
+|  Model          | Batch Size  | FLOPs  | train/val OS   |  mIoU        | Dropbox  |  Tencent Weiyun  |
+| :--------        | :-------------: | :----:   | :-----------: | :--------: | :--------: |  :----:   |
+| DeepLabV3Plus-MobileNet   | 16      |  135G      |  16/16   |  0.721  |    [Download](https://www.dropbox.com/s/753ojyvsh3vdjol/best_deeplabv3plus_mobilenet_cityscapes_os16.pth?dl=0) | [Download](https://share.weiyun.com/aSKjdpbL) 
+| DeepLabV3Plus-ResNet101   | 16      |  N/A      |  16/16   |  0.762  |    [Download](https://drive.google.com/file/d/1t7TC8mxQaFECt4jutdq_NMnWxdm6B-Nb/view?usp=sharing) | [Comming Soon]()
+
+
+#### Segmentation Results on Pascal VOC2012 (DeepLabv3Plus-MobileNet)
+
+<div>
+<img src="samples/1_image.png"   width="20%">
+<img src="samples/1_target.png"  width="20%">
+<img src="samples/1_pred.png"    width="20%">
+<img src="samples/1_overlay.png" width="20%">
+</div>
+
+<div>
+<img src="samples/23_image.png"   width="20%">
+<img src="samples/23_target.png"  width="20%">
+<img src="samples/23_pred.png"    width="20%">
+<img src="samples/23_overlay.png" width="20%">
+</div>
+
+<div>
+<img src="samples/114_image.png"   width="20%">
+<img src="samples/114_target.png"  width="20%">
+<img src="samples/114_pred.png"    width="20%">
+<img src="samples/114_overlay.png" width="20%">
+</div>
+
+#### Segmentation Results on Cityscapes (DeepLabv3Plus-MobileNet)
+
+<div>
+<img src="samples/city_1_target.png"   width="45%">
+<img src="samples/city_1_overlay.png"  width="45%">
+</div>
+
+<div>
+<img src="samples/city_6_target.png"   width="45%">
+<img src="samples/city_6_overlay.png"  width="45%">
+</div>
+
+
+#### Visualization of training
+
+![trainvis](samples/visdom-screenshoot.png)
+
+
+## Pascal VOC
+
+### 1. Requirements
+
+```bash
+pip install -r requirements.txt
+```
+
+### 2. Prepare Datasets
+
+#### 2.1 Standard Pascal VOC
+You can run train.py with "--download" option to download and extract pascal voc dataset. The defaut path is './datasets/data':
+
+```
+/datasets
+    /data
+        /VOCdevkit 
+            /VOC2012 
+                /SegmentationClass
+                /JPEGImages
+                ...
+            ...
+        /VOCtrainval_11-May-2012.tar
+        ...
+```
+
+#### 2.2  Pascal VOC trainaug (Recommended!!)
+
+See chapter 4 of [2]
+
+        The original dataset contains 1464 (train), 1449 (val), and 1456 (test) pixel-level annotated images. We augment the dataset by the extra annotations provided by [76], resulting in 10582 (trainaug) training images. The performance is measured in terms of pixel intersection-over-union averaged across the 21 classes (mIOU).
+
+*./datasets/data/train_aug.txt* includes the file names of 10582 trainaug images (val images are excluded). Please to download their labels from [Dropbox](https://www.dropbox.com/s/oeu149j8qtbs1x0/SegmentationClassAug.zip?dl=0) or [Tencent Weiyun](https://share.weiyun.com/5NmJ6Rk). Those labels come from [DrSleep's repo](https://github.com/DrSleep/tensorflow-deeplab-resnet).
+
+Extract trainaug labels (SegmentationClassAug) to the VOC2012 directory.
+
+```
+/datasets
+    /data
+        /VOCdevkit  
+            /VOC2012
+                /SegmentationClass
+                /SegmentationClassAug  # <= the trainaug labels
+                /JPEGImages
+                ...
+            ...
+        /VOCtrainval_11-May-2012.tar
+        ...
+```
+
+### 3. Training on Pascal VOC2012 Aug
+
+#### 3.1 Visualize training (Optional)
+
+Start visdom sever for visualization. Please remove '--enable_vis' if visualization is not needed. 
+
+```bash
+# Run visdom server on port 28333
+visdom -port 28333
+```
+
+#### 3.2 Training with OS=16
+
+Run main.py with *"--year 2012_aug"* to train your model on Pascal VOC2012 Aug. You can also parallel your training on 4 GPUs with '--gpu_id 0,1,2,3'
+
+**Note: There is no SyncBN in this repo, so training with *multple GPUs and small batch size* may degrades the performance. See [PyTorch-Encoding](https://hangzhang.org/PyTorch-Encoding/tutorials/syncbn.html) for more details about SyncBN**
+
+```bash
+python main.py --model deeplabv3plus_mobilenet --enable_vis --vis_port 28333 --gpu_id 0 --year 2012_aug --crop_val --lr 0.01 --crop_size 513 --batch_size 16 --output_stride 16
+```
+
+#### 3.3 Continue training
+
+Run main.py with '--continue_training' to restore the state_dict of optimizer and scheduler from YOUR_CKPT.
+
+```bash
+python main.py ... --ckpt YOUR_CKPT --continue_training
+```
+
+#### 3.4. Testing
+
+Results will be saved at ./results.
+
+```bash
+python main.py --model deeplabv3plus_mobilenet --enable_vis --vis_port 28333 --gpu_id 0 --year 2012_aug --crop_val --lr 0.01 --crop_size 513 --batch_size 16 --output_stride 16 --ckpt checkpoints/best_deeplabv3plus_mobilenet_voc_os16.pth --test_only --save_val_results
+```
+
+## Cityscapes
+
+### 1. Download cityscapes and extract it to 'datasets/data/cityscapes'
+
+```
+/datasets
+    /data
+        /cityscapes
+            /gtFine
+            /leftImg8bit
+```
+
+### 2. Train your model on Cityscapes
+
+```bash
+python main.py --model deeplabv3plus_mobilenet --dataset cityscapes --enable_vis --vis_port 28333 --gpu_id 0  --lr 0.1  --crop_size 768 --batch_size 16 --output_stride 16 --data_root ./datasets/data/cityscapes 
+```
+
+## Reference
+
+[1] [Rethinking Atrous Convolution for Semantic Image Segmentation](https://arxiv.org/abs/1706.05587)
+
+[2] [Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation](https://arxiv.org/abs/1802.02611)
diff --git a/datasets/__init__.py b/datasets/__init__.py
@@ -0,0 +1,3 @@
+from .voc import VOCSegmentation
+from .cityscapes import Cityscapes
+from .ade20k import ADE20KSegmentation
diff --git a/datasets/__pycache__/__init__.cpython-37.pyc b/datasets/__pycache__/__init__.cpython-37.pyc
diff --git a/datasets/__pycache__/__init__.cpython-38.pyc b/datasets/__pycache__/__init__.cpython-38.pyc
diff --git a/datasets/__pycache__/ade20k.cpython-37.pyc b/datasets/__pycache__/ade20k.cpython-37.pyc
diff --git a/datasets/__pycache__/cityscapes.cpython-37.pyc b/datasets/__pycache__/cityscapes.cpython-37.pyc
diff --git a/datasets/__pycache__/cityscapes.cpython-38.pyc b/datasets/__pycache__/cityscapes.cpython-38.pyc
diff --git a/datasets/__pycache__/voc.cpython-37.pyc b/datasets/__pycache__/voc.cpython-37.pyc
diff --git a/datasets/__pycache__/voc.cpython-38.pyc b/datasets/__pycache__/voc.cpython-38.pyc
diff --git a/datasets/ade20k.py b/datasets/ade20k.py
@@ -0,0 +1,123 @@
+import os
+import json
+from sre_parse import OCTDIGITS
+import torch
+from torchvision import transforms
+import numpy as np
+from PIL import Image
+import torch.utils.data as data
+
+def voc_cmap(N=256, normalized=False):
+    def bitget(byteval, idx):
+        return ((byteval & (1 << idx)) != 0)
+
+    dtype = 'float32' if normalized else 'uint8'
+    cmap = np.zeros((N, 3), dtype=dtype)
+    for i in range(N):
+        r = g = b = 0
+        c = i
+        for j in range(8):
+            r = r | (bitget(c, 0) << 7-j)
+            g = g | (bitget(c, 1) << 7-j)
+            b = b | (bitget(c, 2) << 7-j)
+            c = c >> 3
+
+        cmap[i] = np.array([r, g, b])
+
+    cmap = cmap/255 if normalized else cmap
+    return cmap
+
+class ADE20KSegmentation(data.Dataset):
+    cmap = voc_cmap()
+    def __init__(self, root, image_set='train', transform=None, dram_class=False):
+
+        self.root = os.path.expanduser(root)
+        self.ade20k_path = "ade20k"
+        self.transform = transform
+        self.image_set = image_set
+        self.odgt_name = ""
+        self.dram_class = dram_class
+
+        if image_set == 'train':
+            self.odgt_name = "training.odgt"
+        else:
+            self.odgt_name = "validation.odgt"
+        self.root_ade20k = os.path.join(self.root, self.ade20k_path)
+        self.odgt = os.path.join(self.root_ade20k, self.odgt_name)
+
+        self.list_sample = []
+        self.num_samle = 0
+        self.images = []
+        self.masks = []
+
+        self.parse_input_list(self.odgt)
+
+        self._get_img_list()
+
+    def parse_input_list(self, odgt, max_sample=-1, start_idx=-1, end_idx=-1):
+        if isinstance(odgt, list):
+            self.list_sample = odgt
+        elif isinstance(odgt, str):
+            self.list_sample = [json.loads(x.rstrip()) for x in open(odgt, 'r')]
+
+        if max_sample > 0:
+            self.list_sample = self.list_sample[0:max_sample]
+        if start_idx >= 0 and end_idx >= 0:     # divide file list
+            self.list_sample = self.list_sample[start_idx:end_idx]
+
+        self.num_sample = len(self.list_sample)
+        assert self.num_sample > 0
+        print('# samples: {}'.format(self.num_sample))
+
+    def _get_img_list(self):
+        for idx in range(self.num_sample):
+            self.images.append(os.path.join(self.root_ade20k, self.list_sample[idx]['fpath_img']))
+        for idx in range(self.num_sample):
+            self.masks.append(os.path.join(self.root_ade20k, self.list_sample[idx]['fpath_segm']))
+        print(self.images[1])
+        #print(self.images)
+
+    def class_changer(self, mask):
+        num_mask = np.array(mask)
+        # changed wall 1 <- 9,15,33,43,44,145
+        np.place(num_mask, ((num_mask == 9) | (num_mask == 15) | (num_mask == 33) | (num_mask == 43) | (num_mask == 44) | (num_mask == 145) ), 1)
+        # changed floor 4 <- 7,14,30,53,55
+        np.place(num_mask, ((num_mask == 7) | (num_mask == 14) | (num_mask == 30) | (num_mask == 53) | (num_mask == 55)), 4)
+        # changed tree 5 <- 8,11,14,16,19,20,25,34
+        np.place(num_mask, (num_mask == 18), 5) 
+        # changed furniture 8 <- 8,11,14,16,19,20,25,34
+        np.place(num_mask, ((num_mask == 11) | (num_mask == 14) | (num_mask == 16) | (num_mask == 19) | (num_mask == 20) | (num_mask == 25) | (num_mask == 34)), 8)
+        # changed stairs 7 <- 54
+        np.place(num_mask, (num_mask == 54), 7)
+        # changed other 26
+        np.place(num_mask, ((num_mask != 0) & (num_mask != 1) & (num_mask != 4) & (num_mask != 5) & (num_mask != 7) & (num_mask != 8)), 26)
+
+        pil_mask = Image.fromarray(num_mask)
+
+        return pil_mask
+
+    def __getitem__(self, index):
+        """
+        Args:
+            index (int): Index
+        Returns:
+            tuple: (image, target) where target is the image segmentation.
+        """
+        img = Image.open(self.images[index]).convert('RGB')
+        target = Image.open(self.masks[index])
+
+        if self.dram_class is True:
+            target = self.class_changer(target)
+
+        if self.transform is not None:
+            img, target = self.transform(img, target)
+
+        return img, target
+
+    def __len__(self):
+        return len(self.images)
+
+    @classmethod
+    def decode_target(cls, mask):
+        """decode semantic mask to RGB image"""
+        return cls.cmap[mask]