-
Notifications
You must be signed in to change notification settings - Fork 2
/
depth_image_encoding.py
229 lines (179 loc) · 8.4 KB
/
depth_image_encoding.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
"""Creates an image from a numpy array of floating point depth data.
For details about the encoding see:
https://sites.google.com/site/brainrobotdata/home/depth-image-encoding
Examples:
depth_array is a 2D numpy array of floating point depth data in meters.
depth_rgb = FloatArrayToRgbImage(depth_array)
depth_rgb is a PIL Image object containing the same data as 24-bit
integers encoded in the RGB bytes.
depth_rgb.save('image_file.png') - to save to a file.
depth_array2 = ImageToFloatArray(depth_rgb)
depth_array2 is a 2D numpy array containing the same data as
depth_array up to the precision of the RGB image (1/256 mm).
depth_gray = FloatArrayToGrayImage(depth_array)
depth_gray is a PIL Image object containing the same data rounded to
8-bit integers.
depth_gray.save('image_file.jpg', quality=95) - to save to a file.
depth_array3 = ImageToFloatArray(depth_gray)
depth_array3 is a 2D numpy array containing the same data as
depth_array up to the precision of the grayscale image (1 cm).
The image conversions first scale and round the values and then pack
them into the desired type in a numpy array before converting the
array to a PIL Image object. The Image can be saved in any format.
We are using PNG for RGB and high quality JPEG for grayscale images.
You can use different numeric types (e.g. np.uint16, np.int32), but
not all combinations of numeric type and image format are supported by
PIL or standard image viewers.
"""
import numpy as np
from PIL import Image
from skimage import img_as_ubyte
from skimage.color import grey2rgb
def ClipFloatValues(float_array, min_value, max_value):
"""Clips values to the range [min_value, max_value].
First checks if any values are out of range and prints a message.
Then clips all values to the given range.
Args:
float_array: 2D array of floating point values to be clipped.
min_value: Minimum value of clip range.
max_value: Maximum value of clip range.
Returns:
The clipped array.
"""
if float_array.min() < min_value or float_array.max() > max_value:
print('Image has out-of-range values [%f,%f] not in [%d,%d]',
float_array.min(), float_array.max(), min_value, max_value)
float_array = np.clip(float_array, min_value, max_value)
return float_array
DEFAULT_RGB_SCALE_FACTOR = 256000.0
def FloatArrayToRgbImage(float_array,
scale_factor=DEFAULT_RGB_SCALE_FACTOR,
drop_blue=False):
"""Convert a floating point array of values to an RGB image.
Convert floating point values to a fixed point representation where
the RGB bytes represent a 24-bit integer.
R is the high order byte.
B is the low order byte.
The precision of the depth image is 1/256 mm.
Floating point values are scaled so that the integer values cover
the representable range of depths.
This image representation should only use lossless compression.
Args:
float_array: Input array of floating point depth values in meters.
scale_factor: Scale value applied to all float values.
drop_blue: Zero out the blue channel to improve compression, results in 1mm
precision depth values.
Returns:
24-bit RGB PIL Image object representing depth values.
"""
float_array = np.squeeze(float_array)
# Scale the floating point array.
scaled_array = np.floor(float_array * scale_factor + 0.5)
# Convert the array to integer type and clip to representable range.
min_inttype = 0
max_inttype = 2**24 - 1
scaled_array = ClipFloatValues(scaled_array, min_inttype, max_inttype)
int_array = scaled_array.astype(np.uint32)
# Calculate:
# r = (f / 256) / 256 high byte
# g = (f / 256) % 256 middle byte
# b = f % 256 low byte
rg = np.divide(int_array, 256)
r = np.divide(rg, 256)
g = np.mod(rg, 256)
image_shape = int_array.shape
rgb_array = np.zeros((image_shape[0], image_shape[1], 3), dtype=np.uint8)
rgb_array[..., 0] = r
rgb_array[..., 1] = g
if not drop_blue:
# Calculate the blue channel and add it to the array.
b = np.mod(int_array, 256)
rgb_array[..., 2] = b
image_mode = 'RGB'
image = Image.fromarray(rgb_array, mode=image_mode)
return image
DEFAULT_GRAY_SCALE_FACTOR = {np.uint8: 100.0,
np.uint16: 1000.0,
np.int32: DEFAULT_RGB_SCALE_FACTOR}
def FloatArrayToGrayImage(float_array, scale_factor=None, image_dtype=np.uint8):
"""Convert a floating point array of values to an RGB image.
Convert floating point values to a fixed point representation with
the given bit depth.
The precision of the depth image with default scale_factor is:
uint8: 1cm, with a range of [0, 2.55m]
uint16: 1mm, with a range of [0, 65.5m]
int32: 1/256mm, with a range of [0, 8388m]
Right now, PIL turns uint16 images into a very strange format and
does not decode int32 images properly. Only uint8 works correctly.
Args:
float_array: Input array of floating point depth values in meters.
scale_factor: Scale value applied to all float values.
image_dtype: Image datatype, which controls the bit depth of the grayscale
image.
Returns:
Grayscale PIL Image object representing depth values.
"""
# Ensure that we have a valid numeric type for the image.
if image_dtype == np.uint16:
image_mode = 'I;16'
elif image_dtype == np.int32:
image_mode = 'I'
else:
image_dtype = np.uint8
image_mode = 'L'
if scale_factor is None:
scale_factor = DEFAULT_GRAY_SCALE_FACTOR[image_dtype]
# Scale the floating point array.
scaled_array = np.floor(float_array * scale_factor + 0.5)
# Convert the array to integer type and clip to representable range.
min_dtype = np.iinfo(image_dtype).min
max_dtype = np.iinfo(image_dtype).max
scaled_array = ClipFloatValues(scaled_array, min_dtype, max_dtype)
image_array = scaled_array.astype(image_dtype)
image = Image.fromarray(image_array, mode=image_mode)
return image
def ImageToFloatArray(image, scale_factor=None):
"""Recovers the depth values from an image.
Reverses the depth to image conversion performed by FloatArrayToRgbImage or
FloatArrayToGrayImage.
The image is treated as an array of fixed point depth values. Each
value is converted to float and scaled by the inverse of the factor
that was used to generate the Image object from depth values. If
scale_factor is specified, it should be the same value that was
specified in the original conversion.
The result of this function should be equal to the original input
within the precision of the conversion.
For details see https://sites.google.com/site/brainrobotdata/home/depth-image-encoding.
Args:
image: Depth image output of FloatArrayTo[Format]Image.
scale_factor: Fixed point scale factor.
Returns:
A 2D floating point numpy array representing a depth image.
"""
image_array = np.array(image)
image_dtype = image_array.dtype
image_shape = image_array.shape
channels = image_shape[2] if len(image_shape) > 2 else 1
assert 2 <= len(image_shape) <= 3
if channels == 3:
# RGB image needs to be converted to 24 bit integer.
float_array = np.sum(image_array * [65536, 256, 1], axis=2)
if scale_factor is None:
scale_factor = DEFAULT_RGB_SCALE_FACTOR
else:
if scale_factor is None:
scale_factor = DEFAULT_GRAY_SCALE_FACTOR[image_dtype.type]
float_array = image_array.astype(np.float32)
scaled_array = float_array / scale_factor
return scaled_array
def FloatArrayToRawRGB(im, min_value=0.0, max_value=1.0):
"""Convert a grayscale image to rgb, no encoding.
For proper display try matplotlib's rendering/conversion instead of this version.
Please be aware that this does not incorporate a proper color transform.
http://pillow.readthedocs.io/en/3.4.x/reference/Image.html#PIL.Image.Image.convert
https://en.wikipedia.org/wiki/Rec._601
"""
im = img_as_ubyte(im)
if im.shape[-1] == 1:
im = grey2rgb(im)
return im