Skip to content

Commit

Permalink
add: changed to work with custom framework
Browse files Browse the repository at this point in the history
Just completed a framework as part of a thesis for advanced near duplicate image detection. I have now plugged in into this front end. Obviosuly more work needed but its a start for the front end :)
  • Loading branch information
Logan-Fouts committed May 17, 2024
1 parent 17f223e commit 9e414c3
Show file tree
Hide file tree
Showing 519 changed files with 137,311 additions and 4,370 deletions.
809 changes: 440 additions & 369 deletions Client/package-lock.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion Client/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
"@types/react-dom": "^18.2.6",
"@typescript-eslint/eslint-plugin": "^5.61.0",
"@typescript-eslint/parser": "^5.61.0",
"@vitejs/plugin-react": "^4.0.1",
"@vitejs/plugin-react": "^4.2.1",
"eslint": "^8.44.0",
"eslint-plugin-react-hooks": "^4.6.0",
"eslint-plugin-react-refresh": "^0.4.1",
Expand Down
24 changes: 11 additions & 13 deletions Client/src/components/Options.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,14 @@ function Options() {
const [progress, setProgress] = useState(0);
const [duplicates, setDuplicates] = useState(0);
const [selectedDetection, setSelectedDetection] = useState<number | null>(
null
null,
);

const imageMappings = {
1: "/src/components/images/Examples/1/image1.png",
2: "/src/components/images/Examples/1/image2.png",
3: "/src/components/images/Examples/2/image1.png",
4: "/src/components/images/Examples/2/image2.png",
1: "/src/components/images/Examples/Personal/1.jpg",
2: "/src/components/images/Examples/Personal/2.jpg",
3: "/src/components/images/Examples/fingers/fingerprint2.png",
4: "/src/components/images/Examples/fingers/fingerprint2.png",
5: "/src/components/images/Examples/3/image1.png",
6: "/src/components/images/Examples/3/image2.png",
7: "/src/components/images/Examples/4/image1.png",
Expand All @@ -43,7 +43,7 @@ function Options() {
"progressUpdate",
(newProgress: number) => {
setProgress(newProgress);
}
},
);

let updateDuplicate = true;
Expand Down Expand Up @@ -71,7 +71,7 @@ function Options() {
await (window as any).electronAPI.process(
folderPath,
selectedDetection,
removeNonMedia
removeNonMedia,
);
};

Expand All @@ -94,9 +94,7 @@ function Options() {

return (
<div className="Options">
<div className="DarkModeButton">
<Darkmode></Darkmode>
</div>
<div className="DarkModeButton">{/* <Darkmode></Darkmode> */}</div>
<ul className="SelectFolderText">
<li>Select Folder</li>
</ul>
Expand All @@ -107,16 +105,16 @@ function Options() {
<div className="rightside"></div>
<ul className="DetectionLevel">
<ul className="DetectionLevelText">
<li className="DetectionLevelMainText">Detection Level</li>
<li className="DetectionLevelMainText">Preset</li>
<li className="DetectionLevelSubText">
Adjust the level of sensitivity for duplicate detection
Select a preset that matches your image set domain
</li>
</ul>
<ul className="DetectionWheelSection">
<DetectionWheel onDetectionSelect={handleDetectionSelect} />
<div className="RectangleContainer">
<h5 className="ExampleText">
Possible Duplicates <br></br> At This Level
Example Images <br></br> For This Preset
</h5>
<div className="GreyRectangle">
{selectedDetection !== null && (
Expand Down
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
57 changes: 57 additions & 0 deletions Electron/Framework/Algorithms/Ahash/ahash.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
import imagehash
from PIL import Image


class Ahash:
def __init__(self, threshold=5):
self.name = "Ahash"
self.threshold = threshold
self.duplicates = []
self.possible_duplicates = []

def process(self, image_paths):
"""
Takes provided image paths and classifies them as duplicates, not duplicates, or unsure.
"""
image_paths = set(image_paths)

hashes = {image_path: self._ahash(image_path) for image_path in image_paths}
checked_pairs = set()

for path1, hash1 in hashes.items():
for path2, hash2 in hashes.items():
if (
path1 == path2
or (path1, path2) in checked_pairs
or (path2, path1) in checked_pairs
):
continue

result = self._filter(hash1, hash2)

if result == 0:
self.duplicates.append((path1, path2))
elif result == 1:
self.possible_duplicates.extend((path1, path2))

checked_pairs.add((path1, path2))
checked_pairs.add((path2, path1))

def _ahash(self, image_path):
try:
with Image.open(image_path) as image:
image = image.convert("L").resize((9, 8), Image.LANCZOS)
return imagehash.average_hash(image)
except IOError as e:
print(f"Error accessing image: {image_path}: {e}")
return None

def _filter(self, h1, h2):
"""
Uses hamming distance to classify images.
0 = duplicates, 1 = possible duplicates
"""
hamming_distance = h1 - h2
if hamming_distance <= self.threshold:
return 0
return 1
70 changes: 70 additions & 0 deletions Electron/Framework/Algorithms/Dhash/dhash.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
import imagehash
from PIL import Image


class Dhash:
def __init__(self, threshold=0.95, sim=True):
self.name = "Dhash"
self.threshold = threshold
self.duplicates = []
self.possible_duplicates = []
self.sim = sim

def process(self, image_paths):
"""
Takes provided image paths and classifies them as duplicates, not duplicates, or unsure.
"""
image_paths = set(image_paths)

hashes = {image_path: self._dhash(image_path) for image_path in image_paths}
checked_pairs = set()

for path1, hash1 in hashes.items():
for path2, hash2 in hashes.items():
if (
path1 == path2
or (path1, path2) in checked_pairs
or (path2, path1) in checked_pairs
):
continue

result = self._filter(hash1, hash2)

if result == 0:
self.duplicates.append((path1, path2))
elif result == 1:
self.possible_duplicates.append(path1)
self.possible_duplicates.append(path2)

checked_pairs.add((path1, path2))
checked_pairs.add((path2, path1))

def _dhash(self, image_path):
try:
with Image.open(image_path) as image:
image = image.convert("L").resize((9, 8), Image.LANCZOS)
return imagehash.dhash(image)
except IOError as e:
print(f"Error accessing image: {image_path}: {e}")
return None

def _filter(self, h1, h2):
"""
Uses hamming distance or similarity to classify images.
0 = duplicates, 1 = possible duplicates
"""
if h1 and h2:
hamming_distance = h1 - h2
else:
return

if self.sim:
hash_squared = len(h1)
similarity = (hash_squared - hamming_distance) / hash_squared
if similarity > self.threshold:
return 0
return 1

if hamming_distance > self.threshold:
return 0
return 1
61 changes: 61 additions & 0 deletions Electron/Framework/Algorithms/Phash/phash.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
import imagehash
from PIL import Image


class Phash:
def __init__(self, threshold=5):
self.name = "Phash"
self.threshold = threshold
self.duplicates = []
self.possible_duplicates = []

def process(self, image_paths):
"""
Takes provided image paths and classifies them as duplicates, not duplicates, or unsure.
"""
image_paths = set(image_paths)

hashes = {image_path: self._phash(image_path) for image_path in image_paths}
checked_pairs = set()

for path1, hash1 in hashes.items():
for path2, hash2 in hashes.items():
if (
path1 == path2
or (path1, path2) in checked_pairs
or (path2, path1) in checked_pairs
):
continue

result = self._filter(hash1, hash2)

if result == 0:
self.duplicates.append((path1, path2))
elif result == 1:
self.possible_duplicates.extend((path1, path2))

checked_pairs.add((path1, path2))
checked_pairs.add((path2, path1))

def _phash(self, image_path):
try:
with Image.open(image_path) as image:
image = image.convert("L").resize((8, 8), Image.LANCZOS)
return imagehash.phash(image)
except IOError as e:
print(f"Error accessing image: {image_path}: {e}")
return None

def _filter(self, h1, h2):
"""
Uses hamming distance to classify images.
0 = duplicates, 1 = possible duplicates, 2 = not duplicates
"""
if h1 and h2:
hamming_distance = h1 - h2
else:
return

if hamming_distance <= self.threshold:
return 0
return 1
115 changes: 115 additions & 0 deletions Electron/Framework/Algorithms/SIFT/sift.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
from concurrent.futures import ThreadPoolExecutor
from itertools import combinations

import cv2


class SIFT:
"""
Uses OpenCV to implement SIFT, optimized with parallel processing for image comparisons.
"""

def __init__(
self,
threshold=30,
sigma=1.6,
edge_threshold=10,
n_octave_layers=3,
contrast_threshold=0.04,
image_ratio=0.3,
showprogress=False, # in the perumtation its hard to set this
):
self.name = "SIFT"
self.threshold = threshold
self.image_ratio = image_ratio
self.sigma = sigma
self.edge_threshold = edge_threshold
self.n_octave_layers = n_octave_layers
self.contrast_threshold = contrast_threshold
self.max_workers = 4
self.duplicates = []
self.possible_duplicates = []
self.showprogress = showprogress

def process(self, image_paths):
"""
Process images in parallel to classify duplicates.
"""
preprocessed_images = self._preprocess_images(image_paths)
total_pairs = len(image_paths) * (len(image_paths) - 1) / 2
processed_pairs = 0

pairs = list(combinations(preprocessed_images.keys(), 2))
with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
future_to_pair = {
executor.submit(self._process_pair, pair, preprocessed_images): pair
for pair in pairs
}
for future in future_to_pair:
result = future.result()
if result is not None:
result_type, path1, path2 = result
if result_type == 0:
self.duplicates.append((path1, path2))
elif result_type == 1:
self.possible_duplicates.extend((path1, path2))
processed_pairs += 1
p = (processed_pairs / total_pairs) * 100

if self.showprogress == True:
print(f"Processed {p}%...")

def _process_pair(self, pair, preprocessed_images):
path1, path2 = pair
_, _, descriptors1 = preprocessed_images[path1]
_, _, descriptors2 = preprocessed_images[path2]

if descriptors1 is None or descriptors2 is None:
return None

matches = cv2.BFMatcher().knnMatch(descriptors1, descriptors2, k=2)
close_enough_matches = self._calc_lowe(matches)

result = self._filter(close_enough_matches)
return result, path1, path2

def _preprocess_images(self, image_paths):
with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
results = list(executor.map(self._preprocess_single_image, image_paths))
return dict(zip(image_paths, results))

def _preprocess_single_image(self, image_path):
img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
if img is None:
print(f"Failed to open image: {image_path}.")
return None, None, None
img = cv2.equalizeHist(img)
img = cv2.resize(
img,
None,
fx=self.image_ratio,
fy=self.image_ratio,
interpolation=cv2.INTER_AREA,
)
sift = cv2.SIFT_create(
# nOctaveLayers=self.n_octave_layers,
contrastThreshold=self.contrast_threshold,
edgeThreshold=self.edge_threshold,
sigma=self.sigma,
)
keypoints, descriptors = sift.detectAndCompute(img, None)
return img, keypoints, descriptors

def _calc_lowe(self, matches):
close_enough_matches = []
for match_pair in matches:
if len(match_pair) >= 2:
m, n = match_pair
if m.distance < 0.7 * n.distance:
close_enough_matches.append(m)
return close_enough_matches

def _filter(self, matches):
if len(matches) > self.threshold:
return 0
return 1
Loading

0 comments on commit 9e414c3

Please sign in to comment.