Skip to content

Commit

Permalink
Merge pull request #108 from lf-lang/working
Browse files Browse the repository at this point in the history
Resurrection of the YOLO example
  • Loading branch information
lhstrh authored May 1, 2024
2 parents 3155c13 + d925f34 commit 2fd32be
Show file tree
Hide file tree
Showing 5 changed files with 204 additions and 132 deletions.
19 changes: 14 additions & 5 deletions examples/Python/src/YOLOv5/README.md
Original file line number Diff line number Diff line change
@@ -1,13 +1,22 @@
To run the example(s):
# YOLO

This collection of examples show how to process video data in Python and how to invoke a DNN-based object recognition algorithm on the video frames.

# Setup
First, go to the PyTorch website and follow the instructions to install PyTorch: https://pytorch.org/get-started/locally/

IMPORTANT: If running with NVidia GPU, select the correct CUDA version on the installation page.

Then, install other libraries and compile the LF file:

Then, install other libraries:

python3 -m pip install -r requirements.txt
lfc YOLOv5_Webcam.lf # (or lfc YOLOv5_Webcam_Timer.lf)

Follow the instructions printed by `lfc` to run the program.
Compile the programs with `lfc`.

# Examples

* [Video.lf](Video.lf): Simple video capture and display. Here, the timing of capturing of frames is controled by a Lingua Franca timer whose period is a parameter of the `WebCam` reactor.
* [VideoAsync.lf](VideoAsync.lf): This is similar except that the frame rate is set on the camera and the `WebCamAsync` reactor blocks on input video frames. This puts the camera in charge of the timing of program execution.
* [YOLOv5_Webcam.lf](YOLOv5_Webcam.lf): This example analyzes each video frame using a pre-trained object-recognition DNN and displays an annotated image. This version uses the `WebCamAsync` reactor from `VideoAsync.l`.
* * [YOLOv5_Webcam_Timer.lf](YOLOv5_Webcam_Timer.lf): This example is similar but use `WebCam` from `Video.lf`, so its timing is driven by a timer rather than by the camera.

77 changes: 77 additions & 0 deletions examples/Python/src/YOLOv5/Video.lf
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
/** Video capture and playback example using OpenCV. Please see README.md for instructions. */
target Python {
single-threaded: true # OpenCV crashes if we use the multithreaded version.
}

preamble {=
import cv2
=}

/**
* Produce a sequence of frames with the specified offset and period.
* @param webcam_id The ID of the camera (default 0).
* @param offset Time until frames start to be captured.
* @param period The period with which frames will be read.
*/
reactor WebCam(webcam_id=0, offset = 0 s, period = 100 ms) {
output camera_frame

state stream
timer camera_tick(offset, period)

reaction(startup) {=
self.stream = cv2.VideoCapture(self.webcam_id, cv2.CAP_ANY) # or CAP_DSHOW
if not self.stream.isOpened():
sys.stderr.write("Error: Failed to capture from the webcam.\n")
exit(1)

# Here, LF is in charge of the timing, so do not set the frame rate.
# self.stream.set(cv2.CAP_PROP_FPS, 30) # Set the camera's FPS to 30
=}

reaction(camera_tick) -> camera_frame {=
# read() is a combination of grab() and retrieve().
ret, frame = self.stream.read()
if ret:
camera_frame.set(frame)
else:
print("WARNING: Camera frame missing.")
=}

reaction(shutdown) {=
self.stream.release()
=}
}

/** Display video frames. */
reactor Display {
input frame
state frame_count = 0

reaction(startup) {=
print("\n******* Press 'q' in the video window to exit *******\n")
=}

reaction(frame) {=
self.frame_count += 1
# Every hundred or so frames, report the frame rate.
if (self.frame_count % 100 == 0):
print(f"** Average frame rate: {self.frame_count * SEC(1) / lf.time.physical_elapsed()} f/s")

cv2.imshow("frame", frame.value)
# press 'Q' if you want to exit
if cv2.waitKey(1) & 0xFF == ord('q'):
request_stop()
=}

reaction(shutdown) {=
# Destroy the all windows now
cv2.destroyAllWindows()
=}
}

main reactor {
webcam = new WebCam()
display = new Display()
webcam.camera_frame -> display.frame
}
68 changes: 68 additions & 0 deletions examples/Python/src/YOLOv5/VideoAsync.lf
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
/**
* Video capture and playback example using OpenCV with the camera driving the timing. Please see
* README.md for instructions.
*/
target Python {
keepalive: true,
single-threaded: true # OpenCV crashes if we use the multithreaded version.
}

import Display from "Video.lf"

preamble {=
import cv2
=}

/**
* Produce a sequence of frames as they are delivered by the camera. This version uses blocking
* reads to read a video frame and starts the read shortly after completing the previous read. This
* version should only be used in programs where the camera frames drive everything because the
* WebCamAsync will block until it gets a camera frame.
*
* @param webcam_id The ID of the camera (default 0).
* @param offset Time until frames start to be captured.
* @param frames_per_second The number of frames per second to set the camera to.
*/
reactor WebCamAsync(webcam_id=0, offset = 0 s, frames_per_second=30) {
input trigger
output camera_frame

timer start(offset)
state stream

reaction(start) -> camera_frame {=
self.stream = cv2.VideoCapture(self.webcam_id, cv2.CAP_ANY)
if (self.stream.isOpened() is not True):
sys.stderr.write("Error: Failed to open the camera.\n")
exit(1)

self.stream.set(cv2.CAP_PROP_FPS, self.frames_per_second)

# Read the first frame. This is a blocking read.
ret, frame = self.stream.read()
if ret is True:
camera_frame.set(frame)
else:
print("Warning, failed to get first frame.")
=}

reaction(trigger) -> camera_frame {=
# Read a frame. This is a blocking read.
ret, frame = self.stream.read()
if ret is True:
camera_frame.set(frame)
else:
print("Warning, failed to get first frame.")
=}

reaction(shutdown) {=
self.stream.release()
=}
}

main reactor {
webcam = new WebCamAsync()
display = new Display()
webcam.camera_frame -> display.frame
webcam.camera_frame ~> webcam.trigger
}
115 changes: 32 additions & 83 deletions examples/Python/src/YOLOv5/YOLOv5_Webcam.lf
Original file line number Diff line number Diff line change
@@ -1,68 +1,22 @@
/**
* Example of a Deep Neural Network (YOLOv5) in LF. Please see README.md for instructions. Adapted
* from
* @brief Example of a Deep Neural Network (YOLOv5) in LF.
*
* Please see README.md for instructions. This uses ultralytics/yolov5. Adapted from:
* https://towardsdatascience.com/implementing-real-time-object-detection-system-using-pytorch-and-opencv-70bac41148f7
*/
target Python
target Python {
keepalive: true,
single-threaded: true # OpenCV crashes if we use the multithreaded version.
}

import WebCamAsync from "VideoAsync.lf"
import Display from "Video.lf"

preamble {=
BILLION = 1_000_000_000
import cv2
=}

/**
* Use OpenCV2 to read from the user webcam.
*
* Camera frames are captured into the LF program via a physical action.
*
* 'webcam_id' (default 0) can be adjusted according your the local setup.
*/
reactor WebCam(webcam_id=0) {
output camera_frame
state stream
state video_capture_thread
state thread_should_be_running
physical action frame_action
preamble {=
from cv2 import cv2
import threading

def video_capture(self, frame_action, running):
# Read a frame
ret, frame = self.stream.read()
while running.is_set():
if ret is True:
# If got a frame, schedule the physical action
frame_action.schedule(0, (lf.time.physical_elapsed(), frame))
ret, frame = self.stream.read()
return None
=}

reaction(startup) -> frame_action {=
self.stream = self.cv2.VideoCapture(self.webcam_id, self.cv2.CAP_ANY)
if (self.stream.isOpened() is not True):
sys.stderr.write("Error: Failed to capture from the webcam.\n")
exit(1)

self.stream.set(self.cv2.CAP_PROP_FPS, 30) # Set the camera's FPS to 30

self.thread_should_be_running = self.threading.Event()
self.thread_should_be_running.set()

self.video_capture_thread = self.threading.Thread(target=self.video_capture, args=(frame_action, self.thread_should_be_running))
self.video_capture_thread.start()
=}

reaction(frame_action) -> camera_frame {=
camera_frame.set(frame_action.value)
=}

reaction(shutdown) {=
self.thread_should_be_running.clear()
self.video_capture_thread.join()
self.stream.release()
=}
}

/**
* A YOLOv5 DNN that takes a frame as input and produces object 'labels' and object label
* coordinates (where each label/object is on the frame).
Expand Down Expand Up @@ -93,9 +47,8 @@ reactor DNN {
=}

reaction(frame) -> labels, label_coordinates {=
_, frame_data = frame.value
# Convert the frame into a tuple
fr = [frame_data]
fr = [frame.value]
# Run the model on the frame
results = self._model(fr)
# Extract the labels
Expand All @@ -111,17 +64,15 @@ reactor Plotter(label_deadline = 100 msec) {
input labels
input label_coordinates
input model

output result

state _model # Keep the model
state _prev_time = 0

preamble {=
from cv2 import cv2
=}

/** Receive the DNN model */
reaction(model) {=
self._model = model.value
print("\n******* Press 'q' to exit *******\n")
=}

/** Impose a deadline on object labels */
Expand All @@ -132,20 +83,20 @@ reactor Plotter(label_deadline = 100 msec) {
=}

/**
* Given a frame, object labels, and the corresponding object label coordinates, draw an
* interactive OpenCV window.
* Given a frame, object labels, and the corresponding object label coordinates, draw on the frame
* and produce an output.
*/
reaction(frame, labels, label_coordinates) {=
reaction(frame, labels, label_coordinates) -> result {=
if (not frame.is_present or
not labels.is_present or
not label_coordinates.is_present):
sys.stderr.write("Error: Expected all inputs to be present at the same time.\n")
request_stop()
return

elapsed_time, frame_data = frame.value
# Get how many labels we have
n = len(labels.value)
x_shape, y_shape = frame_data.shape[1], frame_data.shape[0]
x_shape, y_shape = frame.value.shape[1], frame.value.shape[0]
for i in range(n):
row = label_coordinates.value[i]
# If score is less than 0.2 we avoid making a prediction.
Expand All @@ -157,36 +108,30 @@ reactor Plotter(label_deadline = 100 msec) {
y2 = int(row[3]*y_shape)
bgr = (0, 255, 0) # color of the box
classes = self._model.names # Get the name of label index
label_font = self.cv2.FONT_HERSHEY_SIMPLEX #Font for the label.
self.cv2.rectangle(frame_data, \
label_font = cv2.FONT_HERSHEY_SIMPLEX #Font for the label.
cv2.rectangle(frame.value, \
(x1, y1), (x2, y2), \
bgr, 2) #Plot the boxes
self.cv2.putText(frame_data,\
cv2.putText(frame.value,\
classes[int(labels.value[i])], \
(x1, y1), \
label_font, 0.9, bgr, 2) #Put a label over box.

fps = int(1 / (elapsed_time / BILLION - self._prev_time / BILLION))
self._prev_time = elapsed_time
self.cv2.putText(frame_data, str(fps), (7, 70),
self.cv2.FONT_HERSHEY_SIMPLEX, 3,
(100, 255, 0), 3, self.cv2.LINE_AA)
self.cv2.imshow("frame", frame_data)
# press 'Q' if you want to exit
if self.cv2.waitKey(1) & 0xFF == ord('q'):
request_stop()
result.set(frame.value)
=}

reaction(shutdown) {=
# Destroy the all windows now
self.cv2.destroyAllWindows()
cv2.destroyAllWindows()
=}
}

main reactor {
webcam = new WebCam()
# Offset allows time for the model to load.
webcam = new WebCamAsync(offset = 2 s)
dnn = new DNN()
plotter = new Plotter()
display = new Display()

# Send the camera frame to the DNN to be process and to the plotter to be depicted
(webcam.camera_frame)+ -> dnn.frame, plotter.frame
Expand All @@ -196,4 +141,8 @@ main reactor {
# Send the DNN model to the plotter. It will be used to extract the human-readable names
# of each label.
dnn.model -> plotter.model

webcam.camera_frame ~> webcam.trigger

plotter.result -> display.frame
}
Loading

0 comments on commit 2fd32be

Please sign in to comment.