Added code for webstreaming of resulting frame

sebastianfis · Mar 25, 2024 · 15840d9 · 15840d9
1 parent f8007eb
commit 15840d9
Show file tree

Hide file tree

Showing 3 changed files with 192 additions and 6 deletions.
diff --git a/edgetpumodel.py b/edgetpumodel.py
@@ -117,9 +117,8 @@ def get_image_size(self):
             logger.debug("Expecting input shape: {}".format(self.input_size))
             return self.input_size
         else:
-            logger.warn("Interpreter is not yet loaded")
-
-
+            logger.warning("Interpreter is not yet loaded")
+
     def predict(self, image_path, save_img=True, save_txt=True):
         logger.info("Attempting to load {}".format(image_path))
 
@@ -132,9 +131,7 @@ def predict(self, image_path, save_img=True, save_txt=True):
         det = self.process_predictions(pred[0], full_image, pad, output_path, save_img=save_img, save_txt=save_txt)
 
         return det
-
-
-
+
     def forward(self, x:np.ndarray, with_nms=True) -> np.ndarray:
         """
         Predict function using the EdgeTPU
@@ -280,3 +277,34 @@ def process_predictions(self, det, output_image, pad, output_path="detection.jpg
               cv2.imwrite(output_path, output_image)
 
         return det
+
+    def process_images(self, det, output_image, pad, hide_labels=False, hide_conf=False):
+        """
+        Process predictions and optionally output an image with annotations
+        """
+        if len(det):
+            # Rescale boxes from img_size to im0 size
+            # x1, y1, x2, y2=
+            det[:, :4] = self.get_scaled_coords(det[:, :4], output_image, pad)
+
+            s = ""
+
+            # Print results
+            for c in np.unique(det[:, -1]):
+                n = (det[:, -1] == c).sum()  # detections per class
+                s += f"{n} {self.names[int(c)]}{'s' * (n > 1)}, "  # add to string
+
+            if s != "":
+                s = s.strip()
+                s = s[:-1]
+
+            logger.info("Detected: {}".format(s))
+
+            # Write results
+            for *xyxy, conf, cls in reversed(det):
+                # Add bbox to image
+                c = int(cls)  # integer class
+                label = None if hide_labels else (self.names[c] if hide_conf else f'{self.names[c]} {conf:.2f}')
+                output_image = plot_one_box(xyxy, output_image, label=label, color=self.colors(c, True))
+
+        return output_image
diff --git a/templates/index.html b/templates/index.html
@@ -0,0 +1,9 @@
+<html>
+  <head>
+    <title>Robi object detection</title>
+  </head>
+  <body>
+    <h1>Robi vision object detection</h1>
+    <img src="{{ url_for('video_feed') }}">
+  </body>
+</html>
diff --git a/webstreaming.py b/webstreaming.py
@@ -0,0 +1,149 @@
+# import the necessary packages
+from flask import Response
+from flask import Flask
+from flask import render_template
+import numpy as np
+import threading
+import argparse
+import datetime
+import time
+import cv2
+import argparse
+import logging
+
+from edgetpumodel import EdgeTPUModel
+from utils import get_image_tensor
+
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+# initialize the output frame and a lock used to ensure thread-safe
+# exchanges of the output frames (useful when multiple browsers/tabs
+# are viewing the stream)
+outputFrame = None
+lock = threading.Lock()
+# initialize a flask object
+app = Flask(__name__)
+# initialize the video stream and allow the camera sensor to
+# warmup
+time.sleep(2.0)
+
+
+@app.route("/")
+def index():
+	# return the rendered template
+	return render_template("index.html")
+
+
+def detect_stream(model: EdgeTPUModel, video_device: str, max_items: int):
+	# grab global references to the video stream, output frame, and
+	# lock variables
+	global outputFrame, lock
+	# initialize the motion detector and the total number of frames
+	# read thus far
+	input_size = model.get_image_size()
+	cam = cv2.VideoCapture(video_device)
+
+	while True:
+		try:
+			res, image = cam.read()
+
+			if res is False:
+				logger.error("Empty image received")
+				break
+			else:
+				full_image, net_image, pad = get_image_tensor(image, input_size[0])
+				pred = model.forward(net_image)
+				frame = model.process_images(pred[0], full_image, pad)
+
+				timestamp = datetime.datetime.now()
+				tinference, tnms = model.get_last_inference_time()
+				logger.info("Frame done in {}".format(tinference + tnms))
+				cv2.putText(image, timestamp.strftime(
+					"%A %d %B %Y %I:%M:%S%p"), (10, image.shape[0] - 10),
+							cv2.FONT_HERSHEY_SIMPLEX, 0.35, (0, 0, 255), 1)
+
+				with lock:
+					outputFrame = frame.copy()
+
+		except KeyboardInterrupt:
+			break
+
+	cam.release()
+
+
+
+def append_objs_to_img(image, inference_size, objs, labels):
+	height, width, channels = image.shape
+	scale_x, scale_y = width / inference_size[0], height / inference_size[1]
+	for obj in objs:
+		bbox = obj.bbox.scale(scale_x, scale_y)
+		x0, y0 = int(bbox.xmin), int(bbox.ymin)
+		x1, y1 = int(bbox.xmax), int(bbox.ymax)
+
+		percent = int(100 * obj.score)
+		label = '{}% {}'.format(percent, labels.get(obj.id, obj.id))
+
+		image = cv2.rectangle(image, (x0, y0), (x1, y1), (0, 255, 0), 2)
+		image = cv2.putText(image, label, (x0, y0+30),
+							cv2.FONT_HERSHEY_SIMPLEX, 1.0, (255, 0, 0), 2)
+	return image
+
+
+def generate():
+	# grab global references to the output frame and lock variables
+	global outputFrame, lock
+	# loop over frames from the output stream
+	while True:
+		# wait until the lock is acquired
+		with lock:
+			# check if the output frame is available, otherwise skip
+			# the iteration of the loop
+			if outputFrame is None:
+				continue
+			# encode the frame in JPEG format
+			(flag, encodedImage) = cv2.imencode(".jpg", outputFrame)
+			# ensure the frame was successfully encoded
+			if not flag:
+				continue
+		# yield the output frame in the byte format
+		yield(b'--frame\r\n' b'Content-Type: image/jpeg\r\n\r\n' + 
+			bytearray(encodedImage) + b'\r\n')
+
+
+@app.route("/video_feed")
+def video_feed():
+	# return the response generated along with the specific media
+	# type (mime type)
+	return Response(generate(), mimetype="multipart/x-mixed-replace; boundary=frame")
+
+# check to see if this is the main thread of execution
+
+
+if __name__ == '__main__':
+	# construct the argument parser and parse command line arguments
+	ap = argparse.ArgumentParser()
+	ap.add_argument("-i", "--ip", type=str, default='192.168.178.47', help="ip address of the device")
+	ap.add_argument("-o", "--port", type=int, default='4664', help="port number of the server (1024 to 65535)")
+	ap.add_argument("--model", "-m", help="weights file", required=True)
+	ap.add_argument("--conf_thresh", type=float, default=0.25, help="model confidence threshold")
+	ap.add_argument("--iou_thresh", type=float, default=0.45, help="NMS IOU threshold")
+	ap.add_argument("--names", type=str, default='data/coco.yaml', help="Names file")
+	ap.add_argument("--device", type=int, default=1, help="Image capture device to run live detection")
+	ap.add_argument("--quiet", "-q", action='store_true', help="Disable logging (except errors)")
+	ap.add_argument("--v8", action='store_true', help="yolov8 model?")
+
+	args = ap.parse_args()
+	# start a thread that will perform motion detection
+	if args.quiet:
+		logging.disable(logging.CRITICAL)
+		logger.disabled = True
+
+	logger.info("Opening stream on device: {}".format(args.device))
+	model = EdgeTPUModel(args.model, args.names, conf_thresh=args.conf_thresh, iou_thresh=args.iou_thresh, v8=args.v8)
+
+	t = threading.Thread(target=detect_stream, args=(model, args.device))
+	t.daemon = True
+	t.start()
+	# start the flask app
+	app.run(host=args.ip, port=args.port, debug=True, threaded=True, use_reloader=False)