Merge pull request #122 from robmarkcole/add-roi

Add roi
robmarkcole · May 14, 2020 · 1931ca7 · 1931ca7
2 parents 791b1e4 + 3c43bbb
commit 1931ca7
Show file tree

Hide file tree

Showing 4 changed files with 86 additions and 5 deletions.
diff --git a/README.md b/README.md
@@ -40,7 +40,7 @@ Which should return something like:
 ```
 
 ## Usage of this component
-The `deepstack_object` component adds an `image_processing` entity where the state of the entity is the total count of target objects that are above a `confidence` threshold which has a default value of 80%. You can have a single target object class, or multiple. The time of the last detection of any target object is in the `last target detection` attribute. The type and number of objects (of any confidence) is listed in the `summary` attributes. Optionally the processed image can be saved to disk, with bounding boxes showing the location of detected objects. If `save_file_folder` is configured, an image with filename of format `deepstack_object_{source name}_latest.jpg` is over-written on each new detection of a target. Optionally this image can also be saved with a timestamp in the filename, if `save_timestamped_file` is configured as `True`. An event `deepstack.object_detected` is fired for each object detected. If you are a power user with advanced needs such as zoning detections or you want to track multiple object types, you will need to use the `deepstack.object_detected` events.
+The `deepstack_object` component adds an `image_processing` entity where the state of the entity is the total count of target objects that are above a `confidence` threshold which has a default value of 80%. You can have a single target object class, or multiple. The time of the last detection of any target object is in the `last target detection` attribute. The type and number of objects (of any confidence) is listed in the `summary` attributes. Optionally a region of interest (ROI) can be configured, and only objects with their center (represented by a `x`) within the ROI will be included in the state count. The ROI will be displayed as a green box, and objects with their center in the ROI have a red box, whilst objects with their center outside the ROI have a yellow box. Also optionally the processed image can be saved to disk, with bounding boxes showing the location of detected objects. If `save_file_folder` is configured, an image with filename of format `deepstack_object_{source name}_latest.jpg` is over-written on each new detection of a target. Optionally this image can also be saved with a timestamp in the filename, if `save_timestamped_file` is configured as `True`. An event `deepstack.object_detected` is fired for each object detected. If you are a power user with advanced needs such as zoning detections or you want to track multiple object types, you will need to use the `deepstack.object_detected` events.
 
 **Note** that by default the component will **not** automatically scan images, but requires you to call the `image_processing.scan` service e.g. using an automation triggered by motion.
 
@@ -59,6 +59,10 @@ image_processing:
     api_key: mysecretkey
     save_file_folder: /config/snapshots/
     save_timestamped_file: True
+    # roi_x_min: 0.35
+    roi_x_max: 0.8
+    #roi_y_min: 0.4
+    roi_y_max: 0.8
     targets:
       - person
       - car
@@ -73,11 +77,17 @@ Configuration variables:
 - **timeout**: (Optional, default 10 seconds) The timeout for requests to deepstack.
 - **save_file_folder**: (Optional) The folder to save processed images to. Note that folder path should be added to [whitelist_external_dirs](https://www.home-assistant.io/docs/configuration/basic/)
 - **save_timestamped_file**: (Optional, default `False`, requires `save_file_folder` to be configured) Save the processed image with the time of detection in the filename.
+- **roi_x_min**: (optional, default 0), range 0-1, must be less than roi_x_max
+- **roi_x_max**: (optional, default 1), range 0-1, must be more than roi_x_min
+- **roi_y_min**: (optional, default 0), range 0-1, must be less than roi_y_max
+- **roi_y_max**: (optional, default 1), range 0-1, must be more than roi_y_min
 - **source**: Must be a camera.
 - **targets**: The list of target objects, default `person`.
 - **confidence**: (Optional) The confidence (in %) above which detected targets are counted in the sensor state. Default value: 80
 - **name**: (Optional) A custom name for the the entity.
 
+For the ROI, the (x=0,y=0) position is the top left pixel of the image, and the (x=1,y=1) position is the bottom right pixel of the image. It might seem a bit odd to have y running from top to bottom of the image, but that is the coordinate system used by pillow.
+
 <p align="center">
 <img src="https://github.com/robmarkcole/HASS-Deepstack-object/blob/master/docs/object_usage.png" width="500">
 </p>

diff --git a/custom_components/deepstack_object/image_processing.py b/custom_components/deepstack_object/image_processing.py
@@ -4,6 +4,7 @@
 For more details about this platform, please refer to the documentation at
 https://home-assistant.io/components/image_processing.deepstack_object
 """
+from collections import namedtuple
 import datetime
 import io
 import logging
@@ -47,18 +48,35 @@
 CONF_TIMEOUT = "timeout"
 CONF_SAVE_FILE_FOLDER = "save_file_folder"
 CONF_SAVE_TIMESTAMPTED_FILE = "save_timestamped_file"
+CONF_ROI_Y_MIN = "roi_y_min"
+CONF_ROI_X_MIN = "roi_x_min"
+CONF_ROI_Y_MAX = "roi_y_max"
+CONF_ROI_X_MAX = "roi_x_max"
 
 DATETIME_FORMAT = "%Y-%m-%d_%H:%M:%S"
 DEFAULT_API_KEY = ""
 DEFAULT_TARGETS = ["person"]
 DEFAULT_TIMEOUT = 10
+DEFAULT_ROI_Y_MIN = 0.0
+DEFAULT_ROI_Y_MAX = 1.0
+DEFAULT_ROI_X_MIN = 0.0
+DEFAULT_ROI_X_MAX = 1.0
+DEFAULT_ROI = (
+    DEFAULT_ROI_Y_MIN,
+    DEFAULT_ROI_X_MIN,
+    DEFAULT_ROI_Y_MAX,
+    DEFAULT_ROI_X_MAX,
+)
 
 EVENT_OBJECT_DETECTED = "deepstack.object_detected"
 BOX = "box"
 FILE = "file"
 OBJECT = "object"
 
-RED = (255, 0, 0)
+# rgb(red, green, blue)
+RED = (255, 0, 0)  # For objects within the ROI
+GREEN = (0, 255, 0)  # For ROI box
+YELLOW = (255, 255, 0)  # For objects outside the ROI
 
 
 PLATFORM_SCHEMA = PLATFORM_SCHEMA.extend(
@@ -70,11 +88,32 @@
         vol.Optional(CONF_TARGETS, default=DEFAULT_TARGETS): vol.All(
             cv.ensure_list, [cv.string]
         ),
+        vol.Optional(CONF_ROI_Y_MIN, default=DEFAULT_ROI_Y_MIN): cv.small_float,
+        vol.Optional(CONF_ROI_X_MIN, default=DEFAULT_ROI_X_MIN): cv.small_float,
+        vol.Optional(CONF_ROI_Y_MAX, default=DEFAULT_ROI_Y_MAX): cv.small_float,
+        vol.Optional(CONF_ROI_X_MAX, default=DEFAULT_ROI_X_MAX): cv.small_float,
         vol.Optional(CONF_SAVE_FILE_FOLDER): cv.isdir,
         vol.Optional(CONF_SAVE_TIMESTAMPTED_FILE, default=False): cv.boolean,
     }
 )
 
+Box = namedtuple("Box", "y_min x_min y_max x_max")
+Point = namedtuple("Point", "y x")
+
+
+def point_in_box(box: Box, point: Point) -> bool:
+    """Return true if point lies in box"""
+    if (box.x_min <= point.x <= box.x_max) and (box.y_min <= point.y <= box.y_max):
+        return True
+    return False
+
+
+def object_in_roi(roi: dict, centroid: dict) -> bool:
+    """Convenience to convert dicts to the Point and Box."""
+    target_center_point = Point(centroid["y"], centroid["x"])
+    roi_box = Box(roi["y_min"], roi["x_min"], roi["y_max"], roi["x_max"])
+    return point_in_box(roi_box, target_center_point)
+
 
 def get_valid_filename(name: str) -> str:
     return re.sub(r"(?u)[^-\w.]", "", str(name).strip().replace(" ", "_"))
@@ -131,6 +170,10 @@ def setup_platform(hass, config, add_devices, discovery_info=None):
             config.get(CONF_TIMEOUT),
             targets,
             config.get(ATTR_CONFIDENCE),
+            config[CONF_ROI_Y_MIN],
+            config[CONF_ROI_X_MIN],
+            config[CONF_ROI_Y_MAX],
+            config[CONF_ROI_X_MAX],
             save_file_folder,
             config.get(CONF_SAVE_TIMESTAMPTED_FILE),
             camera.get(CONF_ENTITY_ID),
@@ -151,6 +194,10 @@ def __init__(
         timeout,
         targets,
         confidence,
+        roi_y_min,
+        roi_x_min,
+        roi_y_max,
+        roi_x_max,
         save_file_folder,
         save_timestamped_file,
         camera_entity,
@@ -173,6 +220,13 @@ def __init__(
         self._targets_found = []
         self._summary = {}
 
+        self._roi_dict = {
+            "y_min": roi_y_min,
+            "x_min": roi_x_min,
+            "y_max": roi_y_max,
+            "x_max": roi_x_max,
+        }
+
         self._last_detection = None
         self._image_width = None
         self._image_height = None
@@ -201,7 +255,9 @@ def process_image(self, image):
         self._targets_found = [
             obj
             for obj in self._objects
-            if (obj["name"] in self._targets) and (obj["confidence"] > self._confidence)
+            if (obj["name"] in self._targets)
+            and (obj["confidence"] > self._confidence)
+            and (object_in_roi(self._roi_dict, obj["centroid"]))
         ]
 
         self._state = len(self._targets_found)
@@ -249,6 +305,9 @@ def device_state_attributes(self):
         """Return device specific state attributes."""
         attr = {}
         for target in self._targets:
+            attr[f"ROI {target} count"] = len(
+                [t for t in self._targets_found if t["name"] == target]
+            )
             attr[f"ALL {target} count"] = len(
                 [t for t in self._objects if t["name"] == target]
             )
@@ -267,6 +326,12 @@ def save_image(self, image, targets, confidence, directory):
             return
         draw = ImageDraw.Draw(img)
 
+        roi_tuple = tuple(self._roi_dict.values())
+        if roi_tuple != DEFAULT_ROI:
+            draw_box(
+                draw, roi_tuple, img.width, img.height, text="ROI", color=GREEN,
+            )
+
         for obj in self._objects:
             if not obj["name"] in self._targets:
                 continue
@@ -275,20 +340,26 @@ def save_image(self, image, targets, confidence, directory):
             box = obj["bounding_box"]
             centroid = obj["centroid"]
             box_label = f"{name}: {confidence:.1f}%"
+
+            if object_in_roi(self._roi_dict, centroid):
+                box_colour = RED
+            else:
+                box_colour = YELLOW
+
             draw_box(
                 draw,
                 (box["y_min"], box["x_min"], box["y_max"], box["x_max"]),
                 img.width,
                 img.height,
                 text=box_label,
-                color=RED,
+                color=box_colour,
             )
 
             # draw bullseye
             draw.text(
                 (centroid["x"] * img.width, centroid["y"] * img.height),
                 text="X",
-                fill=RED,
+                fill=box_colour,
             )
 
         latest_save_path = (

diff --git a/docs/object_detail.png b/docs/object_detail.png
diff --git a/docs/object_usage.png b/docs/object_usage.png