Clarify the face detection logic

modelscope · Dec 6, 2023 · 88e039d · 88e039d
1 parent 37c4e3a
commit 88e039d
Show file tree

Hide file tree

Showing 3 changed files with 27 additions and 33 deletions.
diff --git a/configs/config_all.yaml b/configs/config_all.yaml
@@ -110,6 +110,10 @@ process:
       rep_len: 10                                             # repetition length for char-level n-gram
       min_ratio: 0.0                                          # the min ratio of filter range
       max_ratio: 0.5                                          # the max ratio of filter range
+  - face_area_filter:                                       # filter samples according to the face area ratios in images (r=face_area/image_area). If multiple faces are available, we use the largest one.
+      min_ratio: 0.0                                          # the min face area ratio of filter range
+      max_ratio: 0.4                                          # the max face area ratio of filter range
+      upsample_num_times: 0                                   # optional argument passing to the underlying dlib face detector
   - flagged_words_filter:                                   # filter text with the flagged-word ratio larger than a specific max value
       lang: en                                                # consider flagged words in what language
       tokenization: false                                     # whether to use model to tokenize documents

diff --git a/data_juicer/ops/filter/face_area_filter.py b/data_juicer/ops/filter/face_area_filter.py
@@ -63,10 +63,10 @@ def compute_stats(self, sample, context=False):
         if StatsKeys.face_ratios in sample[Fields.stats]:
             return sample
 
-        # there is no image in this sample, still default ratio 0.0
+        # there is no image in this sample
         if self.image_key not in sample or not sample[self.image_key]:
-            sample[Fields.stats][StatsKeys.face_ratios] = np.empty(0,
-                                                                   dtype=float)
+            sample[Fields.stats][StatsKeys.face_ratios] = np.array(
+                [], dtype=np.float64)
             return sample
 
         # load images
@@ -86,38 +86,29 @@ def compute_stats(self, sample, context=False):
                     # store the image data into context
                     sample[Fields.context][loaded_image_key] = image
 
-        # check if faces detected already
-        if StatsKeys.face_detections not in sample[Fields.stats]:
-            face_detections = {}
-            for key, image in images.items():
-                img = pil_to_opencv(image)
-                dets = self.detector(img, **self.detector_kwargs)
-                dets_formatted = [[
-                    det.left(),
-                    det.top(),
-                    det.width(),
-                    det.height()
-                ] for det in dets] if dets else [[0, 0, 0, 0]]
-                face_detections[key] = dets_formatted
-            sample[Fields.stats][StatsKeys.face_detections] = [
-                face_detections[key] for key in loaded_image_keys
-            ]
-
-        max_face_ratios = []
-        for key, dets in zip(loaded_image_keys,
-                             sample[Fields.stats][StatsKeys.face_detections]):
-            img_area = images[key].width * images[key].height
-            # Calculate the max face ratio for the current image
-            max_face_ratios.append(
-                max([w * h / img_area for _, _, w, h in dets], default=0.0))
-        sample[Fields.stats][StatsKeys.face_ratios] = max_face_ratios
-
+        # detect faces
+        face_detections = {}
+        for key, image in images.items():
+            img = pil_to_opencv(image)
+            dets = self.detector(img, **self.detector_kwargs)
+            face_detections[key] = [[
+                det.left(), det.top(),
+                det.width(), det.height()
+            ] for det in dets]
+
+        # compute face area ratios for each image considering the largest face
+        face_area_ratios = {}
+        for key, dets in face_detections.items():
+            image_area = images[key].width * images[key].height
+            face_area_ratios[key] = max(
+                [w * h / image_area for _, _, w, h in dets], default=0.0)
+
+        sample[Fields.stats][StatsKeys.face_ratios] = [
+            face_area_ratios[key] for key in loaded_image_keys
+        ]
         return sample
 
     def process(self, sample):
-        if self.image_key not in sample or not sample[self.image_key]:
-            return True
-
         face_ratios = sample[Fields.stats][StatsKeys.face_ratios]
         if len(face_ratios) <= 0:
             return True

diff --git a/tests/ops/filter/test_face_area_filter.py b/tests/ops/filter/test_face_area_filter.py
@@ -2,7 +2,6 @@
 import unittest
 
 from datasets import Dataset
-# from data_juicer.core.data import NestedDataset as Dataset
 
 from data_juicer.ops.filter.face_area_filter import FaceAreaFilter
 from data_juicer.utils.constant import Fields