forked from kunalagarwal101/Face-Clustering
-
Notifications
You must be signed in to change notification settings - Fork 1
/
cluster_faces.py
109 lines (86 loc) · 4.13 KB
/
cluster_faces.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
# USAGE
# python cluster_faces.py --encodings encodings.pickle
# import the necessary packages
# DBSCAN model for clustering similar encodings
from sklearn.cluster import DBSCAN
from imutils import build_montages
import numpy as np
import argparse
import pickle
import cv2
import os
from constants import ENCODINGS_PATH, CLUSTERING_RESULT_PATH
# add constants file in the code (clustering_result)
def copy_image(image, id, labelID, montage=False):
if montage:
filename = "Face ID #{}".format(labelID) if labelID != 1 else "Unknown Faces"
path = args["output"] + "/montage/"
else:
filename = str(id)
path = args['output'] + '/label'
path += str(labelID) + '/'
filename += ".jpg"
os.makedirs(path, exist_ok=True)
# Using cv2.imwrite() method
# Saving the image
cv2.imwrite(os.path.join(path, filename), image)
return
if __name__ == "__main__":
# construct the argument parser and parse the arguments
ap = argparse.ArgumentParser()
ap.add_argument("-e", "--encodings", type=str, default=ENCODINGS_PATH,
help="path to serialized db of facial encodings")
ap.add_argument("-j", "--jobs", type=int, default=-1,
help="# of parallel jobs to run (-1 will use all CPUs)")
ap.add_argument("-o", "--output", type=str, default=CLUSTERING_RESULT_PATH,
help="path to output directory")
args = vars(ap.parse_args())
os.makedirs(args['output'], exist_ok=True)
# load the serialized face encodings + bounding box locations from
# disk/encodings pickle file, then extract the set of encodings to so we can cluster on them
print("[INFO] loading encodings...")
data = pickle.loads(open(args["encodings"], "rb").read())
data = np.array(data)
encodings = [d["encoding"] for d in data]
# cluster the embeddings
print("[INFO] clustering...")
# creating DBSCAN object for clustering the encodings with the metric "euclidean"
clt = DBSCAN(metric="euclidean", n_jobs=args["jobs"])
clt.fit(encodings)
# determine the total number of unique faces found in the dataset
# clt.labels_ contains the label ID for all faces in our dataset (i.e., which cluster each face belongs to).
# To find the unique faces/unique label IDs, used NumPy’s unique function.
# The result is a list of unique labelIDs
labelIDs = np.unique(clt.labels_)
# we count the numUniqueFaces . There could potentially be a value of -1 in labelIDs — this value corresponds
# to the “outlier” class where a 128-d embedding was too far away from any other clusters to be added to it.
# “outliers” could either be worth examining or simply discarding based on the application of face clustering.
numUniqueFaces = len(np.where(labelIDs > -1)[0])
print("[INFO] # unique faces: {}".format(numUniqueFaces))
# loop over the unique face integers
for labelID in labelIDs:
# find all indexes into the `data` array that belong to the
# current label ID, then randomly sample a maximum of 25 indexes
# from the set
print("[INFO] faces for face ID: {}".format(labelID))
idxs = np.where(clt.labels_ == labelID)[0]
idxs = np.random.choice(idxs, size=min(25, len(idxs)),
replace=False)
# initialize the list of faces to include in the montage
faces = []
# loop over the sampled indexes
for i in idxs:
# load the input image and extract the face ROI
image = cv2.imread(data[i]["imagePath"])
(top, right, bottom, left) = data[i]["loc"]
face = image[top:bottom, left:right]
# putting the image in the clustered folder
copy_image(image, i, labelID)
# force resize the face ROI to 96mx96 and then add it to the
# faces montage list
face = cv2.resize(face, (96, 96))
faces.append(face)
# create a montage using 96x96 "tiles" with 5 rows and 5 columns
montage = build_montages(faces, (96, 96), (5, 5))[0]
# write on disk
copy_image(montage, None, labelID, montage=True)