-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
add: changed to work with custom framework
Just completed a framework as part of a thesis for advanced near duplicate image detection. I have now plugged in into this front end. Obviosuly more work needed but its a start for the front end :)
- Loading branch information
1 parent
17f223e
commit 9e414c3
Showing
519 changed files
with
137,311 additions
and
4,370 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
import imagehash | ||
from PIL import Image | ||
|
||
|
||
class Ahash: | ||
def __init__(self, threshold=5): | ||
self.name = "Ahash" | ||
self.threshold = threshold | ||
self.duplicates = [] | ||
self.possible_duplicates = [] | ||
|
||
def process(self, image_paths): | ||
""" | ||
Takes provided image paths and classifies them as duplicates, not duplicates, or unsure. | ||
""" | ||
image_paths = set(image_paths) | ||
|
||
hashes = {image_path: self._ahash(image_path) for image_path in image_paths} | ||
checked_pairs = set() | ||
|
||
for path1, hash1 in hashes.items(): | ||
for path2, hash2 in hashes.items(): | ||
if ( | ||
path1 == path2 | ||
or (path1, path2) in checked_pairs | ||
or (path2, path1) in checked_pairs | ||
): | ||
continue | ||
|
||
result = self._filter(hash1, hash2) | ||
|
||
if result == 0: | ||
self.duplicates.append((path1, path2)) | ||
elif result == 1: | ||
self.possible_duplicates.extend((path1, path2)) | ||
|
||
checked_pairs.add((path1, path2)) | ||
checked_pairs.add((path2, path1)) | ||
|
||
def _ahash(self, image_path): | ||
try: | ||
with Image.open(image_path) as image: | ||
image = image.convert("L").resize((9, 8), Image.LANCZOS) | ||
return imagehash.average_hash(image) | ||
except IOError as e: | ||
print(f"Error accessing image: {image_path}: {e}") | ||
return None | ||
|
||
def _filter(self, h1, h2): | ||
""" | ||
Uses hamming distance to classify images. | ||
0 = duplicates, 1 = possible duplicates | ||
""" | ||
hamming_distance = h1 - h2 | ||
if hamming_distance <= self.threshold: | ||
return 0 | ||
return 1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
import imagehash | ||
from PIL import Image | ||
|
||
|
||
class Dhash: | ||
def __init__(self, threshold=0.95, sim=True): | ||
self.name = "Dhash" | ||
self.threshold = threshold | ||
self.duplicates = [] | ||
self.possible_duplicates = [] | ||
self.sim = sim | ||
|
||
def process(self, image_paths): | ||
""" | ||
Takes provided image paths and classifies them as duplicates, not duplicates, or unsure. | ||
""" | ||
image_paths = set(image_paths) | ||
|
||
hashes = {image_path: self._dhash(image_path) for image_path in image_paths} | ||
checked_pairs = set() | ||
|
||
for path1, hash1 in hashes.items(): | ||
for path2, hash2 in hashes.items(): | ||
if ( | ||
path1 == path2 | ||
or (path1, path2) in checked_pairs | ||
or (path2, path1) in checked_pairs | ||
): | ||
continue | ||
|
||
result = self._filter(hash1, hash2) | ||
|
||
if result == 0: | ||
self.duplicates.append((path1, path2)) | ||
elif result == 1: | ||
self.possible_duplicates.append(path1) | ||
self.possible_duplicates.append(path2) | ||
|
||
checked_pairs.add((path1, path2)) | ||
checked_pairs.add((path2, path1)) | ||
|
||
def _dhash(self, image_path): | ||
try: | ||
with Image.open(image_path) as image: | ||
image = image.convert("L").resize((9, 8), Image.LANCZOS) | ||
return imagehash.dhash(image) | ||
except IOError as e: | ||
print(f"Error accessing image: {image_path}: {e}") | ||
return None | ||
|
||
def _filter(self, h1, h2): | ||
""" | ||
Uses hamming distance or similarity to classify images. | ||
0 = duplicates, 1 = possible duplicates | ||
""" | ||
if h1 and h2: | ||
hamming_distance = h1 - h2 | ||
else: | ||
return | ||
|
||
if self.sim: | ||
hash_squared = len(h1) | ||
similarity = (hash_squared - hamming_distance) / hash_squared | ||
if similarity > self.threshold: | ||
return 0 | ||
return 1 | ||
|
||
if hamming_distance > self.threshold: | ||
return 0 | ||
return 1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
import imagehash | ||
from PIL import Image | ||
|
||
|
||
class Phash: | ||
def __init__(self, threshold=5): | ||
self.name = "Phash" | ||
self.threshold = threshold | ||
self.duplicates = [] | ||
self.possible_duplicates = [] | ||
|
||
def process(self, image_paths): | ||
""" | ||
Takes provided image paths and classifies them as duplicates, not duplicates, or unsure. | ||
""" | ||
image_paths = set(image_paths) | ||
|
||
hashes = {image_path: self._phash(image_path) for image_path in image_paths} | ||
checked_pairs = set() | ||
|
||
for path1, hash1 in hashes.items(): | ||
for path2, hash2 in hashes.items(): | ||
if ( | ||
path1 == path2 | ||
or (path1, path2) in checked_pairs | ||
or (path2, path1) in checked_pairs | ||
): | ||
continue | ||
|
||
result = self._filter(hash1, hash2) | ||
|
||
if result == 0: | ||
self.duplicates.append((path1, path2)) | ||
elif result == 1: | ||
self.possible_duplicates.extend((path1, path2)) | ||
|
||
checked_pairs.add((path1, path2)) | ||
checked_pairs.add((path2, path1)) | ||
|
||
def _phash(self, image_path): | ||
try: | ||
with Image.open(image_path) as image: | ||
image = image.convert("L").resize((8, 8), Image.LANCZOS) | ||
return imagehash.phash(image) | ||
except IOError as e: | ||
print(f"Error accessing image: {image_path}: {e}") | ||
return None | ||
|
||
def _filter(self, h1, h2): | ||
""" | ||
Uses hamming distance to classify images. | ||
0 = duplicates, 1 = possible duplicates, 2 = not duplicates | ||
""" | ||
if h1 and h2: | ||
hamming_distance = h1 - h2 | ||
else: | ||
return | ||
|
||
if hamming_distance <= self.threshold: | ||
return 0 | ||
return 1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,115 @@ | ||
from concurrent.futures import ThreadPoolExecutor | ||
from itertools import combinations | ||
|
||
import cv2 | ||
|
||
|
||
class SIFT: | ||
""" | ||
Uses OpenCV to implement SIFT, optimized with parallel processing for image comparisons. | ||
""" | ||
|
||
def __init__( | ||
self, | ||
threshold=30, | ||
sigma=1.6, | ||
edge_threshold=10, | ||
n_octave_layers=3, | ||
contrast_threshold=0.04, | ||
image_ratio=0.3, | ||
showprogress=False, # in the perumtation its hard to set this | ||
): | ||
self.name = "SIFT" | ||
self.threshold = threshold | ||
self.image_ratio = image_ratio | ||
self.sigma = sigma | ||
self.edge_threshold = edge_threshold | ||
self.n_octave_layers = n_octave_layers | ||
self.contrast_threshold = contrast_threshold | ||
self.max_workers = 4 | ||
self.duplicates = [] | ||
self.possible_duplicates = [] | ||
self.showprogress = showprogress | ||
|
||
def process(self, image_paths): | ||
""" | ||
Process images in parallel to classify duplicates. | ||
""" | ||
preprocessed_images = self._preprocess_images(image_paths) | ||
total_pairs = len(image_paths) * (len(image_paths) - 1) / 2 | ||
processed_pairs = 0 | ||
|
||
pairs = list(combinations(preprocessed_images.keys(), 2)) | ||
with ThreadPoolExecutor(max_workers=self.max_workers) as executor: | ||
future_to_pair = { | ||
executor.submit(self._process_pair, pair, preprocessed_images): pair | ||
for pair in pairs | ||
} | ||
for future in future_to_pair: | ||
result = future.result() | ||
if result is not None: | ||
result_type, path1, path2 = result | ||
if result_type == 0: | ||
self.duplicates.append((path1, path2)) | ||
elif result_type == 1: | ||
self.possible_duplicates.extend((path1, path2)) | ||
processed_pairs += 1 | ||
p = (processed_pairs / total_pairs) * 100 | ||
|
||
if self.showprogress == True: | ||
print(f"Processed {p}%...") | ||
|
||
def _process_pair(self, pair, preprocessed_images): | ||
path1, path2 = pair | ||
_, _, descriptors1 = preprocessed_images[path1] | ||
_, _, descriptors2 = preprocessed_images[path2] | ||
|
||
if descriptors1 is None or descriptors2 is None: | ||
return None | ||
|
||
matches = cv2.BFMatcher().knnMatch(descriptors1, descriptors2, k=2) | ||
close_enough_matches = self._calc_lowe(matches) | ||
|
||
result = self._filter(close_enough_matches) | ||
return result, path1, path2 | ||
|
||
def _preprocess_images(self, image_paths): | ||
with ThreadPoolExecutor(max_workers=self.max_workers) as executor: | ||
results = list(executor.map(self._preprocess_single_image, image_paths)) | ||
return dict(zip(image_paths, results)) | ||
|
||
def _preprocess_single_image(self, image_path): | ||
img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE) | ||
if img is None: | ||
print(f"Failed to open image: {image_path}.") | ||
return None, None, None | ||
img = cv2.equalizeHist(img) | ||
img = cv2.resize( | ||
img, | ||
None, | ||
fx=self.image_ratio, | ||
fy=self.image_ratio, | ||
interpolation=cv2.INTER_AREA, | ||
) | ||
sift = cv2.SIFT_create( | ||
# nOctaveLayers=self.n_octave_layers, | ||
contrastThreshold=self.contrast_threshold, | ||
edgeThreshold=self.edge_threshold, | ||
sigma=self.sigma, | ||
) | ||
keypoints, descriptors = sift.detectAndCompute(img, None) | ||
return img, keypoints, descriptors | ||
|
||
def _calc_lowe(self, matches): | ||
close_enough_matches = [] | ||
for match_pair in matches: | ||
if len(match_pair) >= 2: | ||
m, n = match_pair | ||
if m.distance < 0.7 * n.distance: | ||
close_enough_matches.append(m) | ||
return close_enough_matches | ||
|
||
def _filter(self, matches): | ||
if len(matches) > self.threshold: | ||
return 0 | ||
return 1 |
Oops, something went wrong.