add: changed to work with custom framework

Just completed a framework as part of a thesis for advanced near duplicate image detection. I have now plugged in into this front end. Obviosuly more work needed but its a start for the front end :)
Logan-Fouts · May 17, 2024 · 9e414c3 · 9e414c3
1 parent 17f223e
commit 9e414c3
Show file tree

Hide file tree

Showing 519 changed files with 137,311 additions and 4,370 deletions.
diff --git a/Client/package-lock.json b/Client/package-lock.json
diff --git a/Client/package.json b/Client/package.json
@@ -21,7 +21,7 @@
     "@types/react-dom": "^18.2.6",
     "@typescript-eslint/eslint-plugin": "^5.61.0",
     "@typescript-eslint/parser": "^5.61.0",
-    "@vitejs/plugin-react": "^4.0.1",
+    "@vitejs/plugin-react": "^4.2.1",
     "eslint": "^8.44.0",
     "eslint-plugin-react-hooks": "^4.6.0",
     "eslint-plugin-react-refresh": "^0.4.1",

diff --git a/Client/src/components/Options.tsx b/Client/src/components/Options.tsx
@@ -11,14 +11,14 @@ function Options() {
   const [progress, setProgress] = useState(0);
   const [duplicates, setDuplicates] = useState(0);
   const [selectedDetection, setSelectedDetection] = useState<number | null>(
-    null
+    null,
   );
 
   const imageMappings = {
-    1: "/src/components/images/Examples/1/image1.png",
-    2: "/src/components/images/Examples/1/image2.png",
-    3: "/src/components/images/Examples/2/image1.png",
-    4: "/src/components/images/Examples/2/image2.png",
+    1: "/src/components/images/Examples/Personal/1.jpg",
+    2: "/src/components/images/Examples/Personal/2.jpg",
+    3: "/src/components/images/Examples/fingers/fingerprint2.png",
+    4: "/src/components/images/Examples/fingers/fingerprint2.png",
     5: "/src/components/images/Examples/3/image1.png",
     6: "/src/components/images/Examples/3/image2.png",
     7: "/src/components/images/Examples/4/image1.png",
@@ -43,7 +43,7 @@ function Options() {
       "progressUpdate",
       (newProgress: number) => {
         setProgress(newProgress);
-      }
+      },
     );
 
     let updateDuplicate = true;
@@ -71,7 +71,7 @@ function Options() {
     await (window as any).electronAPI.process(
       folderPath,
       selectedDetection,
-      removeNonMedia
+      removeNonMedia,
     );
   };
 
@@ -94,9 +94,7 @@ function Options() {
 
   return (
     <div className="Options">
-      <div className="DarkModeButton">
-        <Darkmode></Darkmode>
-      </div>
+      <div className="DarkModeButton">{/* <Darkmode></Darkmode> */}</div>
       <ul className="SelectFolderText">
         <li>Select Folder</li>
       </ul>
@@ -107,16 +105,16 @@ function Options() {
       <div className="rightside"></div>
       <ul className="DetectionLevel">
         <ul className="DetectionLevelText">
-          <li className="DetectionLevelMainText">Detection Level</li>
+          <li className="DetectionLevelMainText">Preset</li>
           <li className="DetectionLevelSubText">
-            Adjust the level of sensitivity for duplicate detection
+            Select a preset that matches your image set domain
           </li>
         </ul>
         <ul className="DetectionWheelSection">
           <DetectionWheel onDetectionSelect={handleDetectionSelect} />
           <div className="RectangleContainer">
             <h5 className="ExampleText">
-              Possible Duplicates <br></br> At This Level
+              Example Images <br></br> For This Preset
             </h5>
             <div className="GreyRectangle">
               {selectedDetection !== null && (

diff --git a/Client/src/components/images/Examples/Personal/1.jpg b/Client/src/components/images/Examples/Personal/1.jpg
diff --git a/Client/src/components/images/Examples/Personal/2.jpg b/Client/src/components/images/Examples/Personal/2.jpg
diff --git a/Client/src/components/images/Examples/fingers/fingerprint1.png b/Client/src/components/images/Examples/fingers/fingerprint1.png
diff --git a/Client/src/components/images/Examples/fingers/fingerprint2.png b/Client/src/components/images/Examples/fingers/fingerprint2.png
diff --git a/Client/src/components/images/Examples/fingers/fingerprint22.png b/Client/src/components/images/Examples/fingers/fingerprint22.png
diff --git a/Electron/Framework/Algorithms/Ahash/ahash.py b/Electron/Framework/Algorithms/Ahash/ahash.py
@@ -0,0 +1,57 @@
+import imagehash
+from PIL import Image
+
+
+class Ahash:
+    def __init__(self, threshold=5):
+        self.name = "Ahash"
+        self.threshold = threshold
+        self.duplicates = []
+        self.possible_duplicates = []
+
+    def process(self, image_paths):
+        """
+        Takes provided image paths and classifies them as duplicates, not duplicates, or unsure.
+        """
+        image_paths = set(image_paths)
+
+        hashes = {image_path: self._ahash(image_path) for image_path in image_paths}
+        checked_pairs = set()
+
+        for path1, hash1 in hashes.items():
+            for path2, hash2 in hashes.items():
+                if (
+                    path1 == path2
+                    or (path1, path2) in checked_pairs
+                    or (path2, path1) in checked_pairs
+                ):
+                    continue
+
+                result = self._filter(hash1, hash2)
+
+                if result == 0:
+                    self.duplicates.append((path1, path2))
+                elif result == 1:
+                    self.possible_duplicates.extend((path1, path2))
+
+                checked_pairs.add((path1, path2))
+                checked_pairs.add((path2, path1))
+
+    def _ahash(self, image_path):
+        try:
+            with Image.open(image_path) as image:
+                image = image.convert("L").resize((9, 8), Image.LANCZOS)
+                return imagehash.average_hash(image)
+        except IOError as e:
+            print(f"Error accessing image: {image_path}: {e}")
+            return None
+
+    def _filter(self, h1, h2):
+        """
+        Uses hamming distance to classify images.
+        0 = duplicates, 1 = possible duplicates
+        """
+        hamming_distance = h1 - h2
+        if hamming_distance <= self.threshold:
+            return 0
+        return 1
diff --git a/Electron/Framework/Algorithms/Dhash/dhash.py b/Electron/Framework/Algorithms/Dhash/dhash.py
@@ -0,0 +1,70 @@
+import imagehash
+from PIL import Image
+
+
+class Dhash:
+    def __init__(self, threshold=0.95, sim=True):
+        self.name = "Dhash"
+        self.threshold = threshold
+        self.duplicates = []
+        self.possible_duplicates = []
+        self.sim = sim
+
+    def process(self, image_paths):
+        """
+        Takes provided image paths and classifies them as duplicates, not duplicates, or unsure.
+        """
+        image_paths = set(image_paths)
+
+        hashes = {image_path: self._dhash(image_path) for image_path in image_paths}
+        checked_pairs = set()
+
+        for path1, hash1 in hashes.items():
+            for path2, hash2 in hashes.items():
+                if (
+                    path1 == path2
+                    or (path1, path2) in checked_pairs
+                    or (path2, path1) in checked_pairs
+                ):
+                    continue
+
+                result = self._filter(hash1, hash2)
+
+                if result == 0:
+                    self.duplicates.append((path1, path2))
+                elif result == 1:
+                    self.possible_duplicates.append(path1)
+                    self.possible_duplicates.append(path2)
+
+                checked_pairs.add((path1, path2))
+                checked_pairs.add((path2, path1))
+
+    def _dhash(self, image_path):
+        try:
+            with Image.open(image_path) as image:
+                image = image.convert("L").resize((9, 8), Image.LANCZOS)
+                return imagehash.dhash(image)
+        except IOError as e:
+            print(f"Error accessing image: {image_path}: {e}")
+            return None
+
+    def _filter(self, h1, h2):
+        """
+        Uses hamming distance or similarity to classify images.
+        0 = duplicates, 1 = possible duplicates
+        """
+        if h1 and h2:
+            hamming_distance = h1 - h2
+        else:
+            return
+
+        if self.sim:
+            hash_squared = len(h1)
+            similarity = (hash_squared - hamming_distance) / hash_squared
+            if similarity > self.threshold:
+                return 0
+            return 1
+
+        if hamming_distance > self.threshold:
+            return 0
+        return 1
diff --git a/Electron/Framework/Algorithms/Phash/phash.py b/Electron/Framework/Algorithms/Phash/phash.py
@@ -0,0 +1,61 @@
+import imagehash
+from PIL import Image
+
+
+class Phash:
+    def __init__(self, threshold=5):
+        self.name = "Phash"
+        self.threshold = threshold
+        self.duplicates = []
+        self.possible_duplicates = []
+
+    def process(self, image_paths):
+        """
+        Takes provided image paths and classifies them as duplicates, not duplicates, or unsure.
+        """
+        image_paths = set(image_paths)
+
+        hashes = {image_path: self._phash(image_path) for image_path in image_paths}
+        checked_pairs = set()
+
+        for path1, hash1 in hashes.items():
+            for path2, hash2 in hashes.items():
+                if (
+                    path1 == path2
+                    or (path1, path2) in checked_pairs
+                    or (path2, path1) in checked_pairs
+                ):
+                    continue
+
+                result = self._filter(hash1, hash2)
+
+                if result == 0:
+                    self.duplicates.append((path1, path2))
+                elif result == 1:
+                    self.possible_duplicates.extend((path1, path2))
+
+                checked_pairs.add((path1, path2))
+                checked_pairs.add((path2, path1))
+
+    def _phash(self, image_path):
+        try:
+            with Image.open(image_path) as image:
+                image = image.convert("L").resize((8, 8), Image.LANCZOS)
+                return imagehash.phash(image)
+        except IOError as e:
+            print(f"Error accessing image: {image_path}: {e}")
+            return None
+
+    def _filter(self, h1, h2):
+        """
+        Uses hamming distance to classify images.
+        0 = duplicates, 1 = possible duplicates, 2 = not duplicates
+        """
+        if h1 and h2:
+            hamming_distance = h1 - h2
+        else:
+            return
+
+        if hamming_distance <= self.threshold:
+            return 0
+        return 1
diff --git a/Electron/Framework/Algorithms/SIFT/sift.py b/Electron/Framework/Algorithms/SIFT/sift.py
@@ -0,0 +1,115 @@
+from concurrent.futures import ThreadPoolExecutor
+from itertools import combinations
+
+import cv2
+
+
+class SIFT:
+    """
+    Uses OpenCV to implement SIFT, optimized with parallel processing for image comparisons.
+    """
+
+    def __init__(
+        self,
+        threshold=30,
+        sigma=1.6,
+        edge_threshold=10,
+        n_octave_layers=3,
+        contrast_threshold=0.04,
+        image_ratio=0.3,
+        showprogress=False, # in the perumtation its hard to set this
+    ):
+        self.name = "SIFT"
+        self.threshold = threshold
+        self.image_ratio = image_ratio
+        self.sigma = sigma
+        self.edge_threshold = edge_threshold
+        self.n_octave_layers = n_octave_layers
+        self.contrast_threshold = contrast_threshold
+        self.max_workers = 4
+        self.duplicates = []
+        self.possible_duplicates = []
+        self.showprogress = showprogress
+
+    def process(self, image_paths):
+        """
+        Process images in parallel to classify duplicates.
+        """
+        preprocessed_images = self._preprocess_images(image_paths)
+        total_pairs = len(image_paths) * (len(image_paths) - 1) / 2
+        processed_pairs = 0
+
+        pairs = list(combinations(preprocessed_images.keys(), 2))
+        with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
+            future_to_pair = {
+                executor.submit(self._process_pair, pair, preprocessed_images): pair
+                for pair in pairs
+            }
+            for future in future_to_pair:
+                result = future.result()
+                if result is not None:
+                    result_type, path1, path2 = result
+                    if result_type == 0:
+                        self.duplicates.append((path1, path2))
+                    elif result_type == 1:
+                        self.possible_duplicates.extend((path1, path2))
+                    processed_pairs += 1
+                    p = (processed_pairs / total_pairs) * 100
+
+                    if self.showprogress == True:
+                        print(f"Processed {p}%...")
+
+    def _process_pair(self, pair, preprocessed_images):
+        path1, path2 = pair
+        _, _, descriptors1 = preprocessed_images[path1]
+        _, _, descriptors2 = preprocessed_images[path2]
+
+        if descriptors1 is None or descriptors2 is None:
+            return None
+
+        matches = cv2.BFMatcher().knnMatch(descriptors1, descriptors2, k=2)
+        close_enough_matches = self._calc_lowe(matches)
+
+        result = self._filter(close_enough_matches)
+        return result, path1, path2
+
+    def _preprocess_images(self, image_paths):
+        with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
+            results = list(executor.map(self._preprocess_single_image, image_paths))
+        return dict(zip(image_paths, results))
+
+    def _preprocess_single_image(self, image_path):
+        img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
+        if img is None:
+            print(f"Failed to open image: {image_path}.")
+            return None, None, None
+        img = cv2.equalizeHist(img)
+        img = cv2.resize(
+            img,
+            None,
+            fx=self.image_ratio,
+            fy=self.image_ratio,
+            interpolation=cv2.INTER_AREA,
+        )
+        sift = cv2.SIFT_create(
+            # nOctaveLayers=self.n_octave_layers,
+            contrastThreshold=self.contrast_threshold,
+            edgeThreshold=self.edge_threshold,
+            sigma=self.sigma,
+        )
+        keypoints, descriptors = sift.detectAndCompute(img, None)
+        return img, keypoints, descriptors
+
+    def _calc_lowe(self, matches):
+        close_enough_matches = []
+        for match_pair in matches:
+            if len(match_pair) >= 2:
+                m, n = match_pair
+                if m.distance < 0.7 * n.distance:
+                    close_enough_matches.append(m)
+        return close_enough_matches
+
+    def _filter(self, matches):
+        if len(matches) > self.threshold:
+            return 0
+        return 1