forked from IntelRealSense/librealsense
-
Notifications
You must be signed in to change notification settings - Fork 0
/
rs-face-dlib.cpp
142 lines (122 loc) · 5.87 KB
/
rs-face-dlib.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
// License: Apache 2.0. See LICENSE file in root directory.
// Copyright(c) 2019 Intel Corporation. All Rights Reserved.
#include <librealsense2/rs.hpp> // Include RealSense Cross Platform API
#include <dlib/image_processing/frontal_face_detector.h>
#include <dlib/image_processing/render_face_detections.h>
#include <dlib/image_processing.h>
#include <dlib/gui_widgets.h> // image_window, etc.
#include "../rs_frame_image.h"
#include "validate_face.h"
#include "render_face.h"
/*
The data pointed to by 'frame.get_data()' is a uint16_t 'depth unit' that needs to be scaled.
We return the scaling factor to convert these pixels to meters. This can differ depending
on different cameras.
*/
float get_depth_scale( rs2::device dev )
{
// Go over the device's sensors
for( rs2::sensor& sensor : dev.query_sensors() )
{
// Check if the sensor if a depth sensor
if( rs2::depth_sensor dpt = sensor.as<rs2::depth_sensor>() )
{
return dpt.get_depth_scale();
}
}
throw std::runtime_error( "Device does not have a depth sensor" );
}
int main(int argc, char * argv[]) try
{
// Declare RealSense pipeline, encapsulating the actual device and sensors
rs2::pipeline pipe;
// Start streaming with default recommended configuration
rs2::pipeline_profile profile = pipe.start();
// Each depth camera might have different units for depth pixels, so we get it here
// Using the pipeline's profile, we can retrieve the device that the pipeline uses
float const DEVICE_DEPTH_SCALE = get_depth_scale( profile.get_device() );
/*
The face detector we use is made using the classic Histogram of Oriented
Gradients (HOG) feature combined with a linear classifier, an image pyramid,
and sliding window detection scheme, using dlib's implementation of:
One Millisecond Face Alignment with an Ensemble of Regression Trees by
Vahid Kazemi and Josephine Sullivan, CVPR 2014
and was trained on the iBUG 300-W face landmark dataset (see
https://ibug.doc.ic.ac.uk/resources/facial-point-annotations/):
C. Sagonas, E. Antonakos, G, Tzimiropoulos, S. Zafeiriou, M. Pantic.
300 faces In-the-wild challenge: Database and results.
Image and Vision Computing (IMAVIS), Special Issue on Facial Landmark Localisation "In-The-Wild". 2016.
You can get the trained model file from:
http://dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2
Note that the license for the iBUG 300-W dataset excludes commercial use.
So you should contact Imperial College London to find out if it's OK for
you to use this model file in a commercial product.
*/
dlib::frontal_face_detector face_bbox_detector = dlib::get_frontal_face_detector();
dlib::shape_predictor face_landmark_annotator;
dlib::deserialize( "shape_predictor_68_face_landmarks.dat" ) >> face_landmark_annotator;
/*
The 5-point landmarks model file can be used, instead. It's 10 times smaller and runs
faster, from:
http://dlib.net/files/shape_predictor_5_face_landmarks.dat.bz2
But the validate_face() and render_face() functions will then need to be changed
to handle 5 points.
*/
/*
We need to map pixels on the color frame to a depth frame, so we can
determine their depth. The problem is that the frames may (and probably
will!) be of different resolutions, and so alignment is necessary.
See the rs-align example for a better understanding.
We align the depth frame so it fits in resolution with the color frame
since we go from color to depth.
*/
rs2::align align_to_color( RS2_STREAM_COLOR );
// Display annotations in one of two colors: if the face is determined to be that of a real
// person, it is "good". Otherwise, "bad".
dlib::rgb_pixel bad_color( 255, 0, 0 );
dlib::rgb_pixel good_color( 0, 255, 0 );
dlib::image_window win;
while( ! win.is_closed() )
{
rs2::frameset data = pipe.wait_for_frames(); // Wait for next set of frames from the camera
data = align_to_color.process( data ); // Replace with aligned frames
auto depth_frame = data.get_depth_frame();
auto color_frame = data.get_color_frame();
// Create a dlib image for face detection
rs_frame_image< dlib::rgb_pixel, RS2_FORMAT_RGB8 > image( color_frame );
// Detect faces: find bounding boxes around all faces, then annotate each to find its landmarks
std::vector< dlib::rectangle > face_bboxes = face_bbox_detector( image );
std::vector< dlib::full_object_detection > faces;
for( auto const & bbox : face_bboxes )
faces.push_back( face_landmark_annotator( image, bbox ));
// Display it all on the screen
win.clear_overlay();
win.set_image( image );
for( auto const & face : faces )
{
dlib::rgb_pixel & color =
validate_face( depth_frame, DEVICE_DEPTH_SCALE, face )
? good_color : bad_color;
win.add_overlay( render_face( face, color ));
}
}
return EXIT_SUCCESS;
}
catch( dlib::serialization_error const & e )
{
std::cerr << "You need dlib's default face landmarking model file to run this example." << std::endl;
std::cerr << "You can get it from the following URL: " << std::endl;
std::cerr << " http://dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2" << std::endl;
std::cerr << std::endl << e.what() << std::endl;
return EXIT_FAILURE;
}
catch (const rs2::error & e)
{
std::cerr << "RealSense error calling " << e.get_failed_function() << "(" << e.get_failed_args() << "):\n " << e.what() << std::endl;
return EXIT_FAILURE;
}
catch (const std::exception& e)
{
std::cerr << e.what() << std::endl;
return EXIT_FAILURE;
}