forked from ScaramuzzinoGiovanna/Watermark-DnCNN
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Preprocess_dataset_for_dncnn.py
119 lines (104 loc) · 4.26 KB
/
Preprocess_dataset_for_dncnn.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
import glob, os
import numpy as np
from PIL import Image
import PIL
import random
# the pixel value range is '0-255'(uint8 ) of training data
# macro
DATA_AUG_TIMES = 1 # transform a sample to a different sample for DATA_AUG_TIMES times
def data_augmentation(image, mode):
if mode == 0:
# original
return image
elif mode == 1:
# flip up and down
return np.flipud(image)
elif mode == 2:
# rotate counterwise 90 degree
return np.rot90(image)
elif mode == 3:
# rotate 90 degree and flip up and down
image = np.rot90(image)
return np.flipud(image)
elif mode == 4:
# rotate 180 degree
return np.rot90(image, k=2)
elif mode == 5:
# rotate 180 degree and flip
image = np.rot90(image, k=2)
return np.flipud(image)
elif mode == 6:
# rotate 270 degree
return np.rot90(image, k=3)
elif mode == 7:
# rotate 270 degree and flip
image = np.rot90(image, k=3)
return np.flipud(image)
def generate_patches(bat_size=100, step=40, stride=80, pat_size=40, out_name="img_clean_pats", save_dir='./data', src_dir='./dataset/Train400'):
global DATA_AUG_TIMES
count = 0
filepaths = glob.glob(src_dir + '/*.png')
# filepaths = filepaths[:10] used on debug. But why?
print ("number of training data %d" % len(filepaths))
scales = [1, 0.9, 0.8, 0.7]
# calculate the number of patches
for i in range(len(filepaths)):
img = Image.open(filepaths[i]).convert('L') # convert RGB to gray
for s in range(len(scales)):
newsize = (int(img.size[0] * scales[s]), int(img.size[1] * scales[s]))
img_s = img.resize(newsize, resample=PIL.Image.BICUBIC) # do not change the original img
im_h, im_w = img_s.size
for x in range(0 + step, (im_h - pat_size), stride):
for y in range(0 + step, (im_w - pat_size), stride):
count += 1
origin_patch_num = count * DATA_AUG_TIMES
print (origin_patch_num)
if origin_patch_num % bat_size != 0:
print ("++++++++++++++++")
numPatches = (origin_patch_num // bat_size + 1) * bat_size
else:
numPatches = origin_patch_num
print ("total patches = %d , batch size = %d, total batches = %d" % \
(numPatches, bat_size, numPatches // bat_size))
# data matrix 4-D
inputs = np.zeros((numPatches, pat_size, pat_size, 1), dtype="uint8")
count = 0
# generate patches
for i in range(len(filepaths)):
img = Image.open(filepaths[i]).convert('L')
for s in range(len(scales)):
newsize = (int(img.size[0] * scales[s]), int(img.size[1] * scales[s]))
# print newsize
img_s = img.resize(newsize, resample=PIL.Image.BICUBIC)
img_s = np.reshape(np.array(img_s, dtype="uint8"),
(img_s.size[0], img_s.size[1], 1)) # extend one dimension
for j in range(DATA_AUG_TIMES):
im_h, im_w, _ = img_s.shape
for x in range(0 + step, im_h - pat_size, stride):
for y in range(0 + step, im_w - pat_size, stride):
inputs[count, :, :, :] = data_augmentation(img_s[x:x + pat_size, y:y + pat_size, :], random.randint(0, 7))
count += 1
# pad the batch
if count < numPatches:
to_pad = numPatches - count
inputs[-to_pad:, :, :, :] = inputs[:to_pad, :, :, :]
if not os.path.exists(save_dir):
os.mkdir(save_dir)
np.save(os.path.join(save_dir, out_name), inputs)
print ("size of inputs tensor = " + str(inputs.shape))
if __name__ == '__main__':
# Generate numpy files for training and extra for finetuning
generate_patches(bat_size=100,
step=40,
stride=80,
pat_size=40,
out_name='img_clean_pats',
save_dir='./data',
src_dir='./dataset/Train400')
generate_patches(bat_size=100,
step=40,
stride=80,
pat_size=40,
out_name='img_clean_KTH_TIPS',
save_dir='./data',
src_dir='./dataset/Train_KTH_TIPS')