Added support for random image access, switched to Pillow for image l…

…oading, removed imformat option.
utiasSTARS · Mar 18, 2018 · 19d29b6 · 19d29b6
1 parent 08b3199
commit 19d29b6
Show file tree

Hide file tree

Showing 8 changed files with 269 additions and 214 deletions.
diff --git a/README.md b/README.md
@@ -31,7 +31,9 @@ Homogeneous coordinate transformations are provided as 4x4 `numpy.array` objects
 Pinhole camera intrinsics for camera `N` are provided as 3x3 `numpy.array` objects and are denoted as `K_camN`. Stereo pair baselines are given in meters as `b_gray` for the monochrome stereo pair (`cam0` and `cam1`), and `b_rgb` for the color stereo pair (`cam2` and `cam3`).
 
 ## Example
-More detailed examples can be found in the `demos` directory, but the general idea is to specify what dataset you want to load, then access the parts you need and do something with them.
+More detailed examples can be found in the `demos` directory, but the general idea is to specify what dataset you want to load, then access the parts you need and do something with them. 
+
+Camera and velodyne data are available via generators for easy sequential access (e.g., for visual odometry), and by indexed getter methods for random access (e.g., for deep learning). Images are loaded as `PIL.Image` objects using Pillow.
 
 ```python
 import pykitti
@@ -41,18 +43,22 @@ date = '2011_09_26'
 drive = '0019'
 
 # The 'frames' argument is optional - default: None, which loads the whole dataset.
-# Calibration and timestamp data are read automatically. 
-# Other sensor data (cameras, IMU, Velodyne) are available via properties 
-# that create generators when accessed.
+# Calibration, timestamps, and IMU data are read automatically. 
+# Camera and velodyne data are available via properties that create generators
+# when accessed, or through getter methods that provide random access.
 data = pykitti.raw(basedir, date, drive, frames=range(0, 50, 5))
 
-# dataset.calib:      Calibration data are accessible as a named tuple
-# dataset.timestamps: Timestamps are parsed into a list of datetime objects
-# dataset.oxts:       Returns a generator that loads OXTS packets as named tuples
-# dataset.camN:       Returns a generator that loads individual images from camera N
-# dataset.gray:       Returns a generator that loads monochrome stereo pairs (cam0, cam1)
-# dataset.rgb:        Returns a generator that loads RGB stereo pairs (cam2, cam3)
-# dataset.velo:       Returns a generator that loads velodyne scans as [x,y,z,reflectance]
+# dataset.calib:         Calibration data are accessible as a named tuple
+# dataset.timestamps:    Timestamps are parsed into a list of datetime objects
+# dataset.oxts:          List of OXTS packets and 6-dof poses as named tuples
+# dataset.camN:          Returns a generator that loads individual images from camera N
+# dataset.get_camN(idx): Returns the image from camera N at idx  
+# dataset.gray:          Returns a generator that loads monochrome stereo pairs (cam0, cam1)
+# dataset.get_gray(idx): Returns the monochrome stereo pair at idx  
+# dataset.rgb:           Returns a generator that loads RGB stereo pairs (cam2, cam3)
+# dataset.get_rgb(idx):  Returns the RGB stereo pair at idx  
+# dataset.velo:          Returns a generator that loads velodyne scans as [x,y,z,reflectance]
+# dataset.get_velo(idx): Returns the velodyne scan at idx  
 
 point_velo = np.array([0,0,0,1])
 point_cam0 = data.calib.T_cam0_velo.dot(point_velo)
@@ -61,16 +67,17 @@ point_imu = np.array([0,0,0,1])
 point_w = [o.T_w_imu.dot(point_imu) for o in data.oxts]
 
 for cam0_image in data.cam0:
+    # do something
     pass
 
-rgb_iterator = data.rgb # Assign the generator so it doesn't 
-cam2_image, cam3_image = next(rgb_iterator)
+cam2_image, cam3_image = data.get_rgb(3)
 ```
 ### OpenCV
-Image data can be automatically converted to an OpenCV-friendly format (i.e., `uint8` with `BGR` color channel ordering) simply by specifying an additional parameter in the constructor:
+PIL Image data can be converted to an OpenCV-friendly format using numpy and `cv2.cvtColor`:
 
 ```python
-data = pykitti.raw(basedir, date, drive, frames=range(0, 50, 5), imformat='cv2')
+img_np = np.array(img)
+img_cv2 = cv2.cvtColor(img_np, cv2.COLOR_RGB2BGR)
 ```
 
 Note: This package does not actually require that OpenCV be installed on your system, except to run `demo_raw_cv2.py`.

diff --git a/demos/demo_odometry.py b/demos/demo_odometry.py
@@ -13,29 +13,27 @@
 basedir = '/Users/leeclement/Desktop/KITTI/odometry/dataset'
 
 # Specify the dataset to load
-sequence = '01'
+sequence = '04'
 
 # Load the data. Optionally, specify the frame range to load.
-# Passing imformat='cv2' will convert images to uint8 and BGR for
-# easy use with OpenCV.
 # dataset = pykitti.odometry(basedir, sequence)
 dataset = pykitti.odometry(basedir, sequence, frames=range(0, 20, 5))
 
 # dataset.calib:      Calibration data are accessible as a named tuple
 # dataset.timestamps: Timestamps are parsed into a list of timedelta objects
-# dataset.poses:      Generator to load ground truth poses T_w_cam0
+# dataset.poses:      List of ground truth poses T_w_cam0
 # dataset.camN:       Generator to load individual images from camera N
 # dataset.gray:       Generator to load monochrome stereo pairs (cam0, cam1)
 # dataset.rgb:        Generator to load RGB stereo pairs (cam2, cam3)
 # dataset.velo:       Generator to load velodyne scans as [x,y,z,reflectance]
 
 # Grab some data
-second_pose = next(iter(itertools.islice(dataset.poses, 1, None)))
+second_pose = dataset.poses[1]
 first_gray = next(iter(dataset.gray))
 first_cam1 = next(iter(dataset.cam1))
-first_rgb = next(iter(dataset.rgb))
-first_cam2 = next(iter(dataset.cam2))
-third_velo = next(iter(itertools.islice(dataset.velo, 2, None)))
+first_rgb = dataset.get_rgb(0)
+first_cam2 = dataset.get_cam2(0)
+third_velo = dataset.get_velo(2)
 
 # Display some of the data
 np.set_printoptions(precision=4, suppress=True)

diff --git a/demos/demo_raw.py b/demos/demo_raw.py
@@ -14,29 +14,31 @@
 
 # Specify the dataset to load
 date = '2011_09_30'
-drive = '0016'
+drive = '0034'
 
 # Load the data. Optionally, specify the frame range to load.
-# Passing imformat='cv2' will convert images to uint8 and BGR for
-# easy use with OpenCV.
 # dataset = pykitti.raw(basedir, date, drive)
 dataset = pykitti.raw(basedir, date, drive, frames=range(0, 20, 5))
 
-# dataset.calib:      Calibration data are accessible as a named tuple
-# dataset.timestamps: Timestamps are parsed into a list of datetime objects
-# dataset.oxts:       Generator to load OXTS packets as named tuples
-# dataset.camN:       Generator to load individual images from camera N
-# dataset.gray:       Generator to load monochrome stereo pairs (cam0, cam1)
-# dataset.rgb:        Generator to load RGB stereo pairs (cam2, cam3)
-# dataset.velo:       Generator to load velodyne scans as [x,y,z,reflectance]
+# dataset.calib:         Calibration data are accessible as a named tuple
+# dataset.timestamps:    Timestamps are parsed into a list of datetime objects
+# dataset.oxts:          List of OXTS packets and 6-dof poses as named tuples
+# dataset.camN:          Returns a generator that loads individual images from camera N
+# dataset.get_camN(idx): Returns the image from camera N at idx
+# dataset.gray:          Returns a generator that loads monochrome stereo pairs (cam0, cam1)
+# dataset.get_gray(idx): Returns the monochrome stereo pair at idx
+# dataset.rgb:           Returns a generator that loads RGB stereo pairs (cam2, cam3)
+# dataset.get_rgb(idx):  Returns the RGB stereo pair at idx
+# dataset.velo:          Returns a generator that loads velodyne scans as [x,y,z,reflectance]
+# dataset.get_velo(idx): Returns the velodyne scan at idx
 
 # Grab some data
-second_pose = next(iter(itertools.islice(dataset.oxts, 1, None))).T_w_imu
+second_pose = dataset.oxts[1].T_w_imu
 first_gray = next(iter(dataset.gray))
 first_cam1 = next(iter(dataset.cam1))
-first_rgb = next(iter(dataset.rgb))
-first_cam2 = next(iter(dataset.cam2))
-third_velo = next(iter(itertools.islice(dataset.velo, 2, None)))
+first_rgb = dataset.get_rgb(0)
+first_cam2 = dataset.get_cam2(0)
+third_velo = dataset.get_velo(2)
 
 # Display some of the data
 np.set_printoptions(precision=4, suppress=True)

diff --git a/demos/demo_raw_cv2.py b/demos/demo_raw_cv2.py
@@ -1,5 +1,6 @@
 """Example of pykitti.raw usage with OpenCV."""
 import cv2
+import numpy as np
 import matplotlib.pyplot as plt
 
 import pykitti
@@ -12,32 +13,34 @@
 
 # Specify the dataset to load
 date = '2011_09_30'
-drive = '0016'
+drive = '0034'
 
 # Load the data. Optionally, specify the frame range to load.
-# Passing imformat='cv2' will convert images to uint8 and BGR for
-# easy use with OpenCV.
 dataset = pykitti.raw(basedir, date, drive,
-                      frames=range(0, 20, 5), imformat='cv2')
-
-# dataset.calib:      Calibration data are accessible as a named tuple
-# dataset.timestamps: Timestamps are parsed into a list of datetime objects
-# dataset.oxts:       Generator to load OXTS packets as named tuples
-# dataset.camN:       Generator to load individual images from camera N
-# dataset.gray:       Generator to load monochrome stereo pairs (cam0, cam1)
-# dataset.rgb:        Generator to load RGB stereo pairs (cam2, cam3)
-# dataset.velo:       Generator to load velodyne scans as [x,y,z,reflectance]
+                      frames=range(0, 20, 5))
+
+# dataset.calib:         Calibration data are accessible as a named tuple
+# dataset.timestamps:    Timestamps are parsed into a list of datetime objects
+# dataset.oxts:          List of OXTS packets and 6-dof poses as named tuples
+# dataset.camN:          Returns a generator that loads individual images from camera N
+# dataset.get_camN(idx): Returns the image from camera N at idx
+# dataset.gray:          Returns a generator that loads monochrome stereo pairs (cam0, cam1)
+# dataset.get_gray(idx): Returns the monochrome stereo pair at idx
+# dataset.rgb:           Returns a generator that loads RGB stereo pairs (cam2, cam3)
+# dataset.get_rgb(idx):  Returns the RGB stereo pair at idx
+# dataset.velo:          Returns a generator that loads velodyne scans as [x,y,z,reflectance]
+# dataset.get_velo(idx): Returns the velodyne scan at idx
 
 # Grab some data
-first_gray = next(iter(dataset.gray))
-first_rgb = next(iter(dataset.rgb))
+first_gray = dataset.get_gray(0)
+first_rgb = dataset.get_rgb(0)
 
 # Do some stereo processing
 stereo = cv2.StereoBM_create()
-disp_gray = stereo.compute(first_gray[0], first_gray[1])
+disp_gray = stereo.compute(np.array(first_gray[0]), np.array(first_gray[1]))
 disp_rgb = stereo.compute(
-    cv2.cvtColor(first_rgb[0], cv2.COLOR_BGR2GRAY),
-    cv2.cvtColor(first_rgb[1], cv2.COLOR_BGR2GRAY))
+    cv2.cvtColor(np.array(first_rgb[0]), cv2.COLOR_RGB2GRAY),
+    cv2.cvtColor(np.array(first_rgb[1]), cv2.COLOR_RGB2GRAY))
 
 # Display some data
 f, ax = plt.subplots(2, 2, figsize=(15, 5))
@@ -47,7 +50,7 @@
 ax[0, 1].imshow(disp_gray, cmap='viridis')
 ax[0, 1].set_title('Gray Stereo Disparity')
 
-ax[1, 0].imshow(cv2.cvtColor(first_rgb[0], cv2.COLOR_BGR2RGB))
+ax[1, 0].imshow(first_rgb[0])
 ax[1, 0].set_title('Left RGB Image (cam2)')
 
 ax[1, 1].imshow(disp_rgb, cmap='viridis')