-
Notifications
You must be signed in to change notification settings - Fork 2
/
VideoTracker.py
290 lines (243 loc) · 10.8 KB
/
VideoTracker.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
import cv2
from random import randint
import scan
class VideoTracker:
"""Video Tracker Class"""
trackerTypes = ['BOOSTING', 'MIL', 'KCF', 'TLD', 'MEDIANFLOW', 'GOTURN', 'MOSSE', 'CSRT']
def __init__(self, video_path, tracker_type="CSRT", auto_calibrate=False, output_path='./test_output/output.mp4',
show_frame=False):
"""
init video tracker
:param video_path: Path of input video
:param tracker_type: type of OpenCV tracker to use, CSRT seems to work the best so far and is default
:param auto_calibrate: If True, will use pre-defined bounding boxes instead of manual
:param output_path: Path of output video, will create if does not exist
:param show_frame: If true, tracker displays the frame at each iteration
"""
# Create a video capture object to read videos
self.cap = cv2.VideoCapture(video_path)
# video info
self.vid_width = int(self.cap.get(cv2.CAP_PROP_FRAME_WIDTH))
self.vid_height = int(self.cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
self.fps = int(self.cap.get(cv2.CAP_PROP_FPS))
self.n_frames = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT))
# Output info
self.output_path = output_path
# Calibration of boxes
if auto_calibrate:
# use predefined bounding boxes
bboxes, frame, colors = self.automatic_calibration()
else:
# manually draw bounding boxes
bboxes, frame, colors = self.manual_calibration()
# initialize multitracker object based on bounding boxes and selected tracker type
multi_tracker = self.init_multitracker(bboxes, tracker_type, frame)
# video saving format
output_format = cv2.VideoWriter_fourcc(*'mp4v')
# open and set properties
video_out = cv2.VideoWriter()
video_out.open(self.output_path, output_format, self.fps, (self.vid_width, self.vid_height), True)
self.transformation_metadata = scan.get_transform_video(video_path, (11.5, 11.0))
# run tracker and save video
print(self.process_tracker(self.cap, multi_tracker, colors, video_out, show_frame))
def init_multitracker(self, bboxes, tracker_type, frame):
"""
Init an Opencv tracker instance, given a set of bounding boxes (e.g. one for each finger), and type
:param bboxes: Bounding boxes
:param tracker_type: E.g. CSRT
:param frame: First frame, used for init
:return: multitracker instance
"""
# Create MultiTracker object
multi_tracker = cv2.MultiTracker_create()
# Initialize MultiTracker
for bbox in bboxes:
multi_tracker.add(self.create_tracker_by_name(tracker_type), frame, bbox)
return multi_tracker
def create_tracker_by_name(self, tracker_type):
"""
Given input string, init the correct tracker
:param tracker_type: e.g. 'CSRT'
:return:
"""
# Create a tracker based on tracker name
if tracker_type == self.trackerTypes[0]:
tracker = cv2.TrackerBoosting_create()
elif tracker_type == self.trackerTypes[1]:
tracker = cv2.TrackerMIL_create()
elif tracker_type == self.trackerTypes[2]:
tracker = cv2.TrackerKCF_create()
elif tracker_type == self.trackerTypes[3]:
tracker = cv2.TrackerTLD_create()
elif tracker_type == self.trackerTypes[4]:
tracker = cv2.TrackerMedianFlow_create()
elif tracker_type == self.trackerTypes[5]:
tracker = cv2.TrackerGOTURN_create()
elif tracker_type == self.trackerTypes[6]:
tracker = cv2.TrackerMOSSE_create()
elif tracker_type == self.trackerTypes[7]:
tracker = cv2.TrackerCSRT_create()
else:
tracker = None
print('Incorrect tracker name')
print('Available trackers are:')
for t in self.trackerTypes:
print(t)
return tracker
def read_first_frame(self):
"""
Reads first frame for purposes of calibration & tracker init
:return:
"""
# Read first frame
success, frame = self.cap.read()
try:
frame = cv2.resize(frame, (1920, 1080))
except cv2.error as err:
print("Failed to read video")
raise err
self.vid_width = 1920
self.vid_height = 1080
# quit if unable to read the video file
if not success:
print('Failed to read video')
raise Exception("Failed to read video")
return frame
def manual_calibration(self):
"""
Manually draw bounding boxes
:return: bounding boxes, first frame, color of each box
"""
frame = self.read_first_frame()
# Select boxes
bboxes = []
colors = []
# OpenCV's selectROI function doesn't work for selecting multiple objects in Python
# So we will call this function in a loop till we are done selecting all objects
while True:
# draw bounding boxes over objects
# selectROI's default behaviour is to draw box starting from the center
# when fromCenter is set to false, you can draw box starting from top left corner
cv2.namedWindow('MultiTracker', 2)
#cv2.setWindowProperty("MultiTracker",cv2.WND_PROP_FULLSCREEN,cv2.WINDOW_FULLSCREEN)
bbox = cv2.selectROI('MultiTracker', frame)
bboxes.append(bbox)
colors.append((randint(0, 255), randint(0, 255), randint(0, 255)))
print("Press q to quit selecting boxes and start tracking")
print("Press any other key to select next object")
k = cv2.waitKey(0) & 0xFF
if k == 113: # q is pressed
break
print('Selected bounding boxes {}'.format(bboxes))
return bboxes, frame, colors
def automatic_calibration(self):
"""
Draw bounding boxes from predefined coordinates
:return: bounding boxes, first frame, color of each box
"""
defined_calibration_pts = [(371, 887, 77, 68),
(571, 999, 99, 46),
(692, 991, 111, 56),
(801, 983, 95, 64),
(998, 991, 93, 56),
(1100, 981, 98, 66),
(1248, 983, 101, 63),
(1359, 881, 94, 60)]
frame = self.read_first_frame()
# Select boxes
bboxes = []
colors = []
for this_box in defined_calibration_pts:
bboxes.append(this_box)
this_color = randint(0, 255), randint(0, 255), randint(0, 255)
colors.append(this_color)
return bboxes, frame, colors
def generate_output_file(self, x_centers, y_centers):
"""
Generates tab delimited output file
:param x_centers: center of each box, x coord
:param y_centers: center of each box, y coord
:return:
"""
with open('BrailleOutput.txt', 'w+') as outfile:
outfile.write('Frame\tX1'
'\tY1'
'\tX2'
'\tY2'
'\tX3'
'\tY3'
'\tX4'
'\tY4'
'\tX5'
'\tY5'
'\tX6'
'\tY6'
'\tX7'
'\tY7'
'\tX8'
'\tY8\n')
for i in range(len(x_centers)):
frame_str = str(i) + '\t' + "\t".join(["{0}\t{1}".format(x, y) for x, y in zip(x_centers[i], y_centers[i])])
# for box in range(8):
# if box != 7:
# row_data += str(x_centers[i][box]) + '\t' + str(y_centers[i][box]) + '\t'
# else:
# row_data += str(x_centers[8][box]) + '\t' + str(y_centers[8][box])
outfile.write(frame_str + '\n')
return x_centers, y_centers
def process_tracker(self, cap, multi_tracker, colors, video_out, show_frame=False):
"""
Given captured video & tracker object, track objects and output video + coordinates
:param cap: OpenCV Cap object representing video stream
:param multi_tracker: OpenCV tracker object
:param colors: Colors of each bounding box
:param video_out: Output video path
:param show_frame: If True, program updates the frame during processing
:return:
"""
# Initialize Coordinate List
x_centers = []
y_centers = []
frame_num = 0
# Process video and track objects
while cap.isOpened():
print("Processing frame: {0}".format(frame_num))
success, frame = cap.read()
if not success:
break
try:
if not success:
break
frame = cv2.resize(frame, (1920, 1080))
except Exception("Frame read failure"):
break
# get updated location of objects in subsequent frames
success, boxes = multi_tracker.update(frame)
x_centers_per_frame = [0] * 8
y_centers_per_frame = [0] * 8
# draw tracked objects
for i, newbox in enumerate(boxes):
p1 = (int(newbox[0]), int(newbox[1]))
p2 = (int(newbox[0] + newbox[2]), int(newbox[1] + newbox[3]))
cv2.rectangle(frame, p1, p2, colors[i], 2, 1)
x_center_pixel = boxes[i][0] + boxes[i][2] / 2
y_center_pixel = boxes[i][1] + boxes[i][3] / 2
x_centers_per_frame[i], y_centers_per_frame[i] = scan.transform_point((x_center_pixel, y_center_pixel), self.transformation_metadata)
#x_centers_per_frame[i] = x_center_pixel
#y_centers_per_frame[i] = y_center_pixel
# add coordinates from this frame to overall coordinate list
x_centers.append(x_centers_per_frame)
y_centers.append(y_centers_per_frame)
frame_num += 1
#print(x_centers, y_centers)
# show frame
if show_frame:
cv2.imshow('MultiTracker', frame)
video_out.write(frame)
# quit on ESC button
if cv2.waitKey(1) & 0xFF == 27: # Esc pressed
break
x_centers, y_centers = self.generate_output_file(x_centers, y_centers)
return x_centers, y_centers
if __name__ == '__main__':
tracker = VideoTracker("./test_images/full_page2.MOV", auto_calibrate=False, show_frame=True, tracker_type="CSRT")