Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ops/image face blur mapper #249

Merged
merged 6 commits into from
Mar 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 10 additions & 8 deletions configs/config_all.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,9 @@ process:
keep_original_sample: true # whether to keep the original sample. If it's set to False, there will be only generated images in the final datasets and the original images will be removed. It's True in default.
caption_key: null # the key name of fields in samples to store captions for each images, the caption guide the diffusion model to produce what the image is
hf_img2seq: 'Salesforce/blip2-opt-2.7b' # model name on huggingface to generate caption if caption_key is null
- image_face_blur_mapper: # mapper to blur faces detected in images.
blur_type: 'gaussian' # type of blur kernel, including ['mean', 'box', 'gaussian']
radius: 2 # radius of blur kernel
- nlpaug_en_mapper: # simply augment texts in English based on the nlpaug library
sequential: false # whether combine all augmentation methods to a sequence. If it's True, a sample will be augmented by all opened augmentation methods sequentially. If it's False, each opened augmentation method would generate its augmented samples independently.
aug_num: 1 # number of augmented samples to be generated. If `sequential` is True, there will be total aug_num augmented samples generated. If it's False, there will be (aug_num * #opened_aug_method) augmented samples generated.
Expand Down Expand Up @@ -210,10 +213,6 @@ process:
rep_len: 10 # repetition length for char-level n-gram
min_ratio: 0.0 # the min ratio of filter range
max_ratio: 0.5 # the max ratio of filter range
- face_area_filter: # filter samples according to the face area ratios in images (r=face_area/image_area). If multiple faces are available, we use the largest one.
min_ratio: 0.0 # the min face area ratio of filter range
max_ratio: 0.4 # the max face area ratio of filter range
upsample_num_times: 0 # optional argument passing to the underlying dlib face detector
- flagged_words_filter: # filter text with the flagged-word ratio larger than a specific max value
lang: en # consider flagged words in what language
tokenization: false # whether to use model to tokenize documents
Expand All @@ -222,15 +221,18 @@ process:
use_words_aug: false # whether to augment words, especially for Chinese and Vietnamese
words_aug_group_sizes: [2] # the group size of words to augment
words_aug_join_char: "" # the join char between words to augment
- image_aspect_ratio_filter: # filter samples according to the aspect ratios of images (a fraction of width by height, r=w/h) in them
min_ratio: 0.333 # the min aspect ratio of filter range
max_ratio: 3.0 # the max aspect ratio of filter range
any_or_all: any # keep this sample when any/all images meet the filter condition
- image_aesthetics_filter: # filter samples according to the aesthetics score of images.
hf_scorer_model: shunk031/aesthetics-predictor-v2-sac-logos-ava1-l14-linearMSE # Huggingface model name for the aesthetics predictor
min_score: 0.3 # the min aesthetics score of filter range
max_score: 1.0 # the max aesthetics score of filter range
any_or_all: any # keep this sample when any/all images meet the filter condition
- image_aspect_ratio_filter: # filter samples according to the aspect ratios of images (a fraction of width by height, r=w/h) in them
min_ratio: 0.333 # the min aspect ratio of filter range
max_ratio: 3.0 # the max aspect ratio of filter range
any_or_all: any # keep this sample when any/all images meet the filter condition
- image_face_ratio_filter: # filter samples according to the face area ratios in images (r=face_area/image_area). If multiple faces are available, we use the largest one.
min_ratio: 0.0 # the min face area ratio of filter range
max_ratio: 0.4 # the max face area ratio of filter range
- image_shape_filter: # filter samples according to the widths and heights of images in them
min_width: 200 # the min width of width filter range
max_width: 5000 # the max width of width filter range
Expand Down
20 changes: 10 additions & 10 deletions data_juicer/ops/filter/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,16 @@
from . import (alphanumeric_filter, audio_duration_filter,
audio_nmf_snr_filter, audio_size_filter,
average_line_length_filter, character_repetition_filter,
face_area_filter, flagged_words_filter, image_aesthetics_filter,
image_aspect_ratio_filter, image_shape_filter,
image_size_filter, image_text_matching_filter,
image_text_similarity_filter, language_id_score_filter,
maximum_line_length_filter, perplexity_filter,
phrase_grounding_recall_filter, special_characters_filter,
specified_field_filter, specified_numeric_field_filter,
stopwords_filter, suffix_filter, text_action_filter,
text_entity_dependency_filter, text_length_filter,
token_num_filter, video_aesthetics_filter,
flagged_words_filter, image_aesthetics_filter,
image_aspect_ratio_filter, image_face_ratio_filter,
image_shape_filter, image_size_filter,
image_text_matching_filter, image_text_similarity_filter,
language_id_score_filter, maximum_line_length_filter,
perplexity_filter, phrase_grounding_recall_filter,
special_characters_filter, specified_field_filter,
specified_numeric_field_filter, stopwords_filter, suffix_filter,
text_action_filter, text_entity_dependency_filter,
text_length_filter, token_num_filter, video_aesthetics_filter,
video_aspect_ratio_filter, video_duration_filter,
video_frames_text_similarity_filter, video_motion_score_filter,
video_ocr_area_ratio_filter, video_resolution_filter,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,18 +10,20 @@
from ..base_op import OPERATORS, Filter
from ..op_fusion import LOADED_IMAGES

OP_NAME = 'face_area_filter'
OP_NAME = 'image_face_ratio_filter'

with AvailabilityChecking(['dlib'], OP_NAME):
import dlib


@OPERATORS.register_module(OP_NAME)
@LOADED_IMAGES.register_module(OP_NAME)
class FaceAreaFilter(Filter):
"""Filter to keep samples with face area ratio within a specific range.
class ImageFaceRatioFilter(Filter):
"""Filter to keep samples with face area ratios within a specific range.
"""

_default_kwargs = {'upsample_num_times': 0}

def __init__(self,
min_ratio: ClosedUnitInterval = 0.0,
max_ratio: ClosedUnitInterval = 0.4,
Expand All @@ -40,18 +42,15 @@ def __init__(self,
:param args: Extra positional arguments.
:param kwargs: Extra keyword arguments.
"""

# Extract face detector arguments from kwargs
detector_keys = ['upsample_num_times']
self.detector_kwargs = {
key: kwargs.pop(key)
for key in detector_keys if key in kwargs
}

super().__init__(*args, **kwargs)
self.min_ratio = min_ratio
self.max_ratio = max_ratio

self.extra_kwargs = {
k: kwargs.get(k, v)
for k, v in self._default_kwargs.items()
}

if any_or_all not in ['any', 'all']:
raise ValueError(f'Keep strategy [{any_or_all}] is not supported. '
f'Can only be one of ["any", "all"].')
Expand Down Expand Up @@ -80,7 +79,7 @@ def compute_stats(self, sample, context=False):
face_detections = {}
for key, image in images.items():
img = pil_to_opencv(image)
dets = self.detector(img, **self.detector_kwargs)
dets = self.detector(img, **self.extra_kwargs)
face_detections[key] = [[
max(det.left(), 0),
max(det.top(), 0),
Expand Down
3 changes: 2 additions & 1 deletion data_juicer/ops/mapper/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@
clean_ip_mapper, clean_links_mapper, expand_macro_mapper,
fix_unicode_mapper, image_blur_mapper,
image_captioning_from_gpt4v_mapper, image_captioning_mapper,
image_diffusion_mapper, nlpaug_en_mapper, nlpcda_zh_mapper,
image_diffusion_mapper, image_face_blur_mapper,
nlpaug_en_mapper, nlpcda_zh_mapper,
punctuation_normalization_mapper, remove_bibliography_mapper,
remove_comments_mapper, remove_header_mapper,
remove_long_words_mapper, remove_non_chinese_character_mapper,
Expand Down
Loading
Loading