Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

yolo nas demo #274

Merged
merged 3 commits into from
Jan 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions demos/yolo_nas/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# https://catalog.ngc.nvidia.com/orgs/nvidia/containers/pytorch/tags
FROM nvcr.io/nvidia/pytorch:22.08-py3

RUN apt-get update && \
apt-get install -y libgl1 && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*

RUN pip install --upgrade pip

RUN pip install -r https://raw.githubusercontent.com/Deci-AI/super-gradients/master/requirements.txt && \
# For some reason it doesn't work with latest version of OpenCV \ --> AttributeError: partially initialized module 'cv2' has no attribute '_registerMatType' (most likely due to a circular import)
pip install opencv-python==4.5.5.64

RUN pip install git+https://github.com/tryolabs/norfair.git@master#egg=norfair

COPY requirements.txt requirements.txt
RUN pip install -r requirements.txt

WORKDIR /demo/src/
22 changes: 22 additions & 0 deletions demos/yolo_nas/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# YOLO nas example

Simplest possible example of tracking. Based on [YOLO-NAS-L](https://github.com/Deci-AI/super-gradients/blob/master/YOLONAS.md).

## Instructions

1. Build and run the Docker container with `./run_gpu.sh`.
2. Copy a video to the `src` folder.
3. Within the container, run with the default parameters:

```bash
python demo.py <video>.mp4
```

For additional settings, you may display the instructions using `python demo.py --help`.

## Explanation

This example tracks objects using a single point per detection: the centroid of the bounding boxes around cars returned by YOLO-NAS-L

https://github.com/agosl/norfair/assets/35232517/3faffb87-6d18-4bcd-9321-3742080ef2e4

1 change: 1 addition & 0 deletions demos/yolo_nas/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
super-gradient==3.1.1
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

see comment above

8 changes: 8 additions & 0 deletions demos/yolo_nas/run_gpu.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#!/usr/bin/env -S bash -e
docker build . -t norfair-yolonas
docker run -it --rm \
--gpus all \
--shm-size=1gb \
-v `realpath .`:/demo \
norfair-yolonas \
bash
161 changes: 161 additions & 0 deletions demos/yolo_nas/src/demo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
import argparse
from typing import List, Optional, Union

import norfair
import numpy as np
import super_gradients
import torch
from norfair import Detection, Tracker, Video

DISTANCE_THRESHOLD_BBOX: float = 0.7
DISTANCE_THRESHOLD_CENTROID: int = 30
MAX_DISTANCE: int = 10000


class YOLO_NAS:
def __init__(self, model_name: str, device: Optional[str] = None):
if device is not None and "cuda" in device and not torch.cuda.is_available():
raise Exception(
"Selected device='cuda', but cuda is not available to Pytorch."
)
# automatically set device if its None
elif device is None:
device = "cuda:0" if torch.cuda.is_available() else "cpu"

# load model
else:
self.model = super_gradients.training.models.get(
"yolo_nas_l", pretrained_weights="coco"
).cuda()

def __call__(
self,
img: Union[str, np.ndarray],
conf_threshold: float = 0.35,
iou_threshold: float = 0.45,
image_size: int = 720,
classes: Optional[List[int]] = None,
) -> torch.tensor:

if classes is not None:
self.model.classes = classes

detections = self.model.predict(img, iou_threshold, conf_threshold)
return detections


def yolo_detections_to_norfair_detections(
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not something to implement in this PR but to think about and maybe add to the backlog

We've used this function so often that I believe it should be included in Norfair's utils. Maybe rename it to xywh_detections_to_norfair and call it with yolo_detections.xywh

yolo_detections: torch.tensor, track_points: str = "centroid" # bbox or centroid
) -> List[Detection]:
"""convert detections_as_xywh to norfair detections"""
norfair_detections: List[Detection] = []

if track_points == "centroid":
detections_as_xywh = yolo_detections.xywh[0]
for detection_as_xywh in detections_as_xywh:
centroid = np.array(
[detection_as_xywh[0].item(), detection_as_xywh[1].item()]
)
scores = np.array([detection_as_xywh[4].item()])
norfair_detections.append(
Detection(
points=centroid,
scores=scores,
label=int(detection_as_xywh[-1].item()),
)
)

elif track_points == "bbox":

# yolo_nas detections
detections_as_xyxy = yolo_detections[0]
class_names = detections_as_xyxy.class_names
labels = detections_as_xyxy.prediction.labels
confidence = detections_as_xyxy.prediction.confidence
bboxes = detections_as_xyxy.prediction.bboxes_xyxy

for i, (label, conf, bbox_yolo) in enumerate(zip(labels, confidence, bboxes)):
bbox = np.array(
[
[bbox_yolo[0], bbox_yolo[1]],
[bbox_yolo[2], bbox_yolo[3]],
]
)

scores = np.array([conf, conf])
norfair_detections.append(
Detection(points=bbox, scores=scores, label=class_names[int(label)])
)

return norfair_detections


parser = argparse.ArgumentParser(description="Track objects in a video.")
parser.add_argument("files", type=str, nargs="+", help="Video files to process")
parser.add_argument(
"--model-name", type=str, default="yolovnas", help="YOLOv5 model name"
)
parser.add_argument(
"--img-size", type=int, default="720", help="YOLO_nas inference size (pixels)"
)
parser.add_argument(
"--conf-threshold",
type=float,
default="0.25",
help="YOLOv5 object confidence threshold",
)
parser.add_argument(
"--iou-threshold", type=float, default="0.45", help="YOLOv5 IOU threshold for NMS"
)
parser.add_argument(
"--classes",
nargs="+",
type=int,
help="Filter by class: --classes 0, or --classes 0 2 3",
)
parser.add_argument(
"--device", type=str, default="cuda", help="Inference device: 'cpu' or 'cuda'"
)
parser.add_argument(
"--track-points",
type=str,
default="bbox",
help="Track points: 'centroid' or 'bbox'",
)
args = parser.parse_args()

model = YOLO_NAS(args.model_name, device=args.device)

for input_path in args.files:
video = Video(input_path=input_path)
distance_function = "iou" if args.track_points == "bbox" else "euclidean"
distance_threshold = (
DISTANCE_THRESHOLD_BBOX
if args.track_points == "bbox"
else DISTANCE_THRESHOLD_CENTROID
)

tracker = Tracker(
distance_function=distance_function,
distance_threshold=distance_threshold,
)

for frame in video:
yolo_detections = model(
frame,
conf_threshold=args.conf_threshold,
iou_threshold=args.iou_threshold,
image_size=args.img_size,
)

detections = yolo_detections_to_norfair_detections(
yolo_detections, track_points=args.track_points
)
tracked_objects = tracker.update(detections=detections)
if args.track_points == "centroid":
norfair.draw_points(frame, detections)
norfair.draw_tracked_objects(frame, tracked_objects)
elif args.track_points == "bbox":
norfair.draw_boxes(frame, detections)
norfair.draw_boxes(frame, tracked_objects, draw_ids=True)
video.write(frame)
Loading