Refactor

siliconflow · Jul 25, 2024 · 62a1f3f · 62a1f3f
1 parent e2d3858
commit 62a1f3f
Show file tree

Hide file tree

Showing 46 changed files with 191 additions and 83 deletions.
diff --git a/README.md b/README.md
@@ -196,3 +196,15 @@ Evaluate using the last frame of the video.
 - SUN Zhengwentai. clip-score: CLIP Score for PyTorch. https://github.com/Taited/clip-score, 2023.
 - Christoph Schuhmann. CLIP+MLP Aesthetic Score Predictor. https://github.com/christophschuhmann/improved-aesthetic-predictor, 2022.
 - Tim Salimans, Ian Goodfellow, Wojciech Zaremba, Vicki Cheung, Alec Radford, and Xi Chen. Improved techniques for training gans. NeurIPS, 29, 2016.
+
+
+
+
+Evaluating the use of all generative models is divided into two steps: 
+1. Generate a large number of benchmark images. 
+
+
+
+2. Test using multiple indicators with scripts.
+
+
diff --git a/benchmark_log.txt b/benchmark_log.txt
@@ -0,0 +1,9 @@
+Starting the benchmarking process at 2024-07-26T00:30:13 CST
+Calculating FID Score...
+/home/lixiang/anaconda3/envs/onediff_nexfort/lib/python3.10/site-packages/torchvision/models/_utils.py:208: UserWarning: The parameter 'pretrained' is deprecated since 0.13 and may be removed in the future, please use 'weights' instead.
+  warnings.warn(
+/home/lixiang/anaconda3/envs/onediff_nexfort/lib/python3.10/site-packages/torchvision/models/_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=None`.
+  warnings.warn(msg)
+/home/lixiang/data/fid_kolors_torch
+Processing: ImageDataset(30000 items)
+  0%|          | 0/235 [00:00<?, ?it/s]  0%|          | 1/235 [00:05<21:23,  5.48s/it]  1%|▏         | 3/235 [00:05<05:45,  1.49s/it]  2%|▏         | 5/235 [00:05<02:56,  1.30it/s]  3%|▎         | 7/235 [00:06<01:49,  2.08it/s]  4%|▍         | 9/235 [00:06<01:15,  2.98it/s]  5%|▍         | 11/235 [00:06<00:57,  3.92it/s]  5%|▌         | 12/235 [00:06<00:50,  4.44it/s]  6%|▌         | 14/235 [00:06<00:39,  5.64it/s]  7%|▋         | 16/235 [00:06<00:31,  6.89it/s]  8%|▊         | 18/235 [00:10<02:22,  1.52it/s]  9%|▊         | 20/235 [00:10<01:42,  2.10it/s]  9%|▉         | 22/235 [00:10<01:15,  2.83it/s] 10%|█         | 24/235 [00:10<00:57,  3.68it/s] 11%|█         | 26/235 [00:11<00:45,  4.55it/s] 12%|█▏        | 28/235 [00:11<00:38,  5.34it/s] 12%|█▏        | 29/235 [00:11<00:35,  5.78it/s] 13%|█▎        | 31/235 [00:11<00:29,  6.92it/s] 14%|█▍        | 33/235 [00:14<02:10,  1.55it/s] 14%|█▍        | 34/235 [00:15<01:54,  1.76it/s] 15%|█▌        | 36/235 [00:15<01:19,  2.49it/s] 16%|█▌        | 38/235 [00:15<00:58,  3.39it/s] 17%|█▋        | 40/235 [00:15<00:44,  4.34it/s] 18%|█▊        | 42/235 [00:15<00:36,  5.29it/s] 18%|█▊        | 43/235 [00:15<00:33,  5.73it/s] 19%|█▊        | 44/235 [00:16<00:30,  6.24it/s] 19%|█▉        | 45/235 [00:16<00:27,  6.79it/s] 20%|██        | 47/235 [00:16<00:22,  8.18it/s] 21%|██        | 49/235 [00:19<02:05,  1.48it/s] 21%|██▏       | 50/235 [00:19<01:46,  1.73it/s] 22%|██▏       | 52/235 [00:20<01:12,  2.52it/s] 23%|██▎       | 54/235 [00:20<00:52,  3.44it/s] 24%|██▍       | 56/235 [00:20<00:40,  4.43it/s] 25%|██▍       | 58/235 [00:20<00:32,  5.40it/s] 25%|██▌       | 59/235 [00:20<00:30,  5.85it/s] 26%|██▌       | 60/235 [00:20<00:27,  6.34it/s] 26%|██▌       | 61/235 [00:20<00:25,  6.89it/s] 27%|██▋       | 63/235 [00:21<00:20,  8.27it/s] 28%|██▊       | 65/235 [00:24<01:54,  1.48it/s] 28%|██▊       | 66/235 [00:24<01:37,  1.73it/s] 29%|██▉       | 68/235 [00:24<01:06,  2.52it/s] 29%|██▉       | 69/235 [00:24<00:55,  2.98it/s] 30%|███       | 71/235 [00:25<00:39,  4.12it/s] 31%|███       | 73/235 [00:25<00:30,  5.25it/s] 31%|███▏      | 74/235 [00:25<00:27,  5.77it/s] 32%|███▏      | 75/235 [00:25<00:25,  6.25it/s] 32%|███▏      | 76/235 [00:25<00:23,  6.76it/s] 33%|███▎      | 77/235 [00:25<00:21,  7.21it/s] 33%|███▎      | 78/235 [00:25<00:20,  7.76it/s] 34%|███▍      | 80/235 [00:25<00:16,  9.22it/s] 35%|███▍      | 82/235 [00:29<01:48,  1.41it/s] 36%|███▌      | 84/235 [00:29<01:14,  2.04it/s] 37%|███▋      | 86/235 [00:29<00:52,  2.83it/s] 37%|███▋      | 88/235 [00:29<00:39,  3.72it/s] 38%|███▊      | 90/235 [00:30<00:31,  4.67it/s] 39%|███▉      | 92/235 [00:30<00:25,  5.58it/s] 40%|████      | 94/235 [00:30<00:21,  6.54it/s] 41%|████      | 96/235 [00:30<00:18,  7.63it/s] 42%|████▏     | 98/235 [00:34<01:26,  1.58it/s] 43%|████▎     | 100/235 [00:34<01:03,  2.14it/s] 43%|████▎     | 102/235 [00:34<00:46,  2.85it/s] 44%|████▍     | 104/235 [00:34<00:35,  3.68it/s] 45%|████▌     | 106/235 [00:34<00:28,  4.58it/s] 46%|████▌     | 108/235 [00:35<00:23,  5.45it/s] 47%|████▋     | 110/235 [00:35<00:19,  6.39it/s] 48%|████▊     | 112/235 [00:35<00:16,  7.47it/s] 49%|████▊     | 114/235 [00:38<01:15,  1.60it/s] 49%|████▉     | 116/235 [00:39<00:54,  2.17it/s] 50%|█████     | 118/235 [00:39<00:40,  2.88it/s] 51%|█████     | 120/235 [00:39<00:31,  3.71it/s] 52%|█████▏    | 122/235 [00:39<00:24,  4.60it/s] 53%|█████▎    | 124/235 [00:39<00:20,  5.48it/s] 54%|█████▎    | 126/235 [00:40<00:16,  6.44it/s] 54%|█████▍    | 128/235 [00:40<00:14,  7.52it/s] 55%|█████▌    | 130/235 [00:43<01:04,  1.62it/s] 56%|█████▌    | 132/235 [00:43<00:47,  2.19it/s] 57%|█████▋    | 134/235 [00:44<00:34,  2.90it/s] 58%|█████▊    | 136/235 [00:44<00:26,  3.72it/s] 59%|█████▊    | 138/235 [00:44<00:21,  4.61it/s] 60%|█████▉    | 140/235 [00:44<00:17,  5.49it/s] 60%|██████    | 142/235 [00:44<00:14,  6.42it/s] 61%|██████▏   | 144/235 [00:44<00:12,  7.51it/s] 62%|██████▏   | 146/235 [00:48<00:55,  1.61it/s] 63%|██████▎   | 148/235 [00:48<00:40,  2.17it/s] 64%|██████▍   | 150/235 [00:48<00:29,  2.88it/s] 65%|██████▍   | 152/235 [00:48<00:22,  3.70it/s] 66%|██████▌   | 154/235 [00:49<00:17,  4.59it/s] 66%|██████▋   | 156/235 [00:49<00:14,  5.47it/s] 67%|██████▋   | 158/235 [00:49<00:11,  6.42it/s] 68%|██████▊   | 160/235 [00:49<00:10,  7.49it/s] 69%|██████▉   | 162/235 [00:53<00:45,  1.60it/s] 70%|██████▉   | 164/235 [00:53<00:32,  2.16it/s] 71%|███████   | 166/235 [00:53<00:24,  2.86it/s] 71%|███████▏  | 168/235 [00:53<00:18,  3.69it/s] 72%|███████▏  | 170/235 [00:53<00:14,  4.58it/s] 73%|███████▎  | 172/235 [00:54<00:11,  5.46it/s] 74%|███████▍  | 174/235 [00:54<00:09,  6.42it/s] 75%|███████▍  | 176/235 [00:54<00:07,  7.49it/s] 76%|███████▌  | 178/235 [00:58<00:35,  1.61it/s] 77%|███████▋  | 180/235 [00:58<00:25,  2.17it/s] 77%|███████▋  | 182/235 [00:58<00:18,  2.88it/s] 78%|███████▊  | 184/235 [00:58<00:13,  3.70it/s] 79%|███████▉  | 186/235 [00:58<00:10,  4.60it/s] 80%|████████  | 188/235 [00:58<00:08,  5.51it/s] 81%|████████  | 190/235 [00:59<00:07,  6.40it/s] 82%|████████▏ | 192/235 [00:59<00:05,  7.47it/s] 83%|████████▎ | 194/235 [01:02<00:25,  1.63it/s] 83%|████████▎ | 196/235 [01:02<00:17,  2.19it/s] 84%|████████▍ | 198/235 [01:03<00:12,  2.91it/s] 85%|████████▌ | 200/235 [01:03<00:09,  3.73it/s] 86%|████████▌ | 202/235 [01:03<00:07,  4.63it/s] 87%|████████▋ | 204/235 [01:03<00:05,  5.51it/s] 88%|████████▊ | 206/235 [01:03<00:04,  6.47it/s] 89%|████████▊ | 208/235 [01:04<00:03,  7.54it/s] 89%|████████▉ | 210/235 [01:07<00:15,  1.62it/s] 90%|█████████ | 212/235 [01:07<00:10,  2.19it/s] 91%|█████████ | 214/235 [01:07<00:07,  2.91it/s] 92%|█████████▏| 216/235 [01:08<00:05,  3.74it/s] 93%|█████████▎| 218/235 [01:08<00:03,  4.64it/s] 94%|█████████▎| 220/235 [01:08<00:02,  5.53it/s] 94%|█████████▍| 222/235 [01:08<00:02,  6.48it/s] 95%|█████████▌| 224/235 [01:08<00:01,  7.56it/s]

diff --git a/T2IBenchmark/__init__.py → metrics/T2IBenchmark/__init__.py b/T2IBenchmark/__init__.py → metrics/T2IBenchmark/__init__.py
diff --git a/T2IBenchmark/datasets/__init__.py → metrics/T2IBenchmark/datasets/__init__.py b/T2IBenchmark/datasets/__init__.py → metrics/T2IBenchmark/datasets/__init__.py
diff --git a/T2IBenchmark/datasets/coco2014/__init__.py → ...2IBenchmark/datasets/coco2014/__init__.py b/T2IBenchmark/datasets/coco2014/__init__.py → ...2IBenchmark/datasets/coco2014/__init__.py
diff --git a/T2IBenchmark/datasets/coco2014/dataset.py → ...T2IBenchmark/datasets/coco2014/dataset.py b/T2IBenchmark/datasets/coco2014/dataset.py → ...T2IBenchmark/datasets/coco2014/dataset.py
diff --git a/T2IBenchmark/feature_extractors/__init__.py → ...IBenchmark/feature_extractors/__init__.py b/T2IBenchmark/feature_extractors/__init__.py → ...IBenchmark/feature_extractors/__init__.py
diff --git a/...ture_extractors/base_feature_extractor.py → ...ture_extractors/base_feature_extractor.py b/...ture_extractors/base_feature_extractor.py → ...ture_extractors/base_feature_extractor.py
diff --git a/...nchmark/feature_extractors/inceptionV3.py → ...nchmark/feature_extractors/inceptionV3.py b/...nchmark/feature_extractors/inceptionV3.py → ...nchmark/feature_extractors/inceptionV3.py
diff --git a/...tractors/inceptionV3_feature_extractor.py → ...tractors/inceptionV3_feature_extractor.py b/...tractors/inceptionV3_feature_extractor.py → ...tractors/inceptionV3_feature_extractor.py
diff --git a/T2IBenchmark/loaders.py → metrics/T2IBenchmark/loaders.py b/T2IBenchmark/loaders.py → metrics/T2IBenchmark/loaders.py
diff --git a/T2IBenchmark/metrics/__init__.py → metrics/T2IBenchmark/metrics/__init__.py b/T2IBenchmark/metrics/__init__.py → metrics/T2IBenchmark/metrics/__init__.py
diff --git a/T2IBenchmark/metrics/fid.py → metrics/T2IBenchmark/metrics/fid.py b/T2IBenchmark/metrics/fid.py → metrics/T2IBenchmark/metrics/fid.py
diff --git a/T2IBenchmark/model_wrapper.py → metrics/T2IBenchmark/model_wrapper.py b/T2IBenchmark/model_wrapper.py → metrics/T2IBenchmark/model_wrapper.py
diff --git a/T2IBenchmark/models/__init__.py → metrics/T2IBenchmark/models/__init__.py b/T2IBenchmark/models/__init__.py → metrics/T2IBenchmark/models/__init__.py
diff --git a/T2IBenchmark/models/kandinsky21/__init__.py → ...IBenchmark/models/kandinsky21/__init__.py b/T2IBenchmark/models/kandinsky21/__init__.py → ...IBenchmark/models/kandinsky21/__init__.py
diff --git a/...hmark/models/kandinsky21/requirements.txt → ...hmark/models/kandinsky21/requirements.txt b/...hmark/models/kandinsky21/requirements.txt → ...hmark/models/kandinsky21/requirements.txt
diff --git a/T2IBenchmark/models/kandinsky21/wrapper.py → ...2IBenchmark/models/kandinsky21/wrapper.py b/T2IBenchmark/models/kandinsky21/wrapper.py → ...2IBenchmark/models/kandinsky21/wrapper.py
diff --git a/T2IBenchmark/pipelines.py → metrics/T2IBenchmark/pipelines.py b/T2IBenchmark/pipelines.py → metrics/T2IBenchmark/pipelines.py
diff --git a/T2IBenchmark/utils/__init__.py → metrics/T2IBenchmark/utils/__init__.py b/T2IBenchmark/utils/__init__.py → metrics/T2IBenchmark/utils/__init__.py
diff --git a/T2IBenchmark/utils/defaults.py → metrics/T2IBenchmark/utils/defaults.py b/T2IBenchmark/utils/defaults.py → metrics/T2IBenchmark/utils/defaults.py
diff --git a/T2IBenchmark/utils/helpers.py → metrics/T2IBenchmark/utils/helpers.py b/T2IBenchmark/utils/helpers.py → metrics/T2IBenchmark/utils/helpers.py
diff --git a/T2IBenchmark/utils/resizers.py → metrics/T2IBenchmark/utils/resizers.py b/T2IBenchmark/utils/resizers.py → metrics/T2IBenchmark/utils/resizers.py
diff --git a/T2IBenchmark/utils/seed_setter.py → metrics/T2IBenchmark/utils/seed_setter.py b/T2IBenchmark/utils/seed_setter.py → metrics/T2IBenchmark/utils/seed_setter.py
diff --git a/metrics/aesthetic_score.py b/metrics/aesthetic_score.py
@@ -9,22 +9,6 @@
 import argparse
 
 
-def parse_args():
-    parser = argparse.ArgumentParser()
-    parser.add_argument(
-        "--image_path",
-        type=str,
-        default="./output_images",
-        help="The path to save generated images",
-    )
-
-    args = parser.parse_args()
-    return args
-
-
-args = parse_args()
-
-
 class MLP(pl.LightningModule):
     def __init__(self, input_size):
         super().__init__()
@@ -49,16 +33,13 @@ def normalized(a, axis=-1, order=2):
     return a / np.expand_dims(l2, axis)
 
 
-# Set up model and CLIP
-model_path = "pretrained/sac+logos+ava1-l14-linearMSE.pth"
-device = "cuda" if torch.cuda.is_available() else "cpu"
-model = MLP(768).to(device)
-model.load_state_dict(torch.load(model_path))
-model.eval()
-model2, preprocess = clip.load("ViT-L/14", device=device)
+def evaluate_images(folder_path, model_path="resources/sac+logos+ava1-l14-linearMSE.pth"):
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    model = MLP(768).to(device)
+    model.load_state_dict(torch.load(model_path))
+    model.eval()
+    model2, preprocess = clip.load("ViT-L/14", device=device)
 
-
-def evaluate_images(folder_path):
     path = Path(folder_path)
     images = list(path.rglob("*.png"))
     scores = []
@@ -86,4 +67,23 @@ def evaluate_images(folder_path):
         return scores, None
 
 
-scores, average_score = evaluate_images(args.image_path)
+if __name__ == "__main__":
+    def parse_args():
+        parser = argparse.ArgumentParser()
+        parser.add_argument(
+            "--image_path",
+            type=str,
+            default="./output_images",
+            help="The path to the directory containing the images",
+        )
+        parser.add_argument(
+            "--model_path",
+            type=str,
+            default="resources/sac+logos+ava1-l14-linearMSE.pth",
+            help="The path to the pretrained model",
+        )
+        return parser.parse_args()
+
+    args = parse_args()
+
+    scores, average_score = evaluate_images(args.image_path, args.model_path)
diff --git a/metrics/clip_score.py b/metrics/clip_score.py
@@ -2,27 +2,50 @@
 from glob import glob
 from T2IBenchmark import calculate_clip_score
 
-cat_paths = sorted(glob('/home/lixiang/data/fid_kolors_nexfort/*.png'), key=lambda x: int(x.split('_')[-1].split('.')[0]))
+def odeval_clip_score(image_dir, csv_path):
+    """
+    Calculate the CLIP score for given images and captions.
 
-print(f"Number of image files: {len(cat_paths)}")
+    Parameters:
+    image_dir (str): The directory containing the images.
+    csv_path (str): The path to the CSV file containing the captions.
 
-csv_path = '/home/lixiang/odeval/MS-COCO_val2014_30k_captions.csv'
-try:
-    captions_df = pd.read_csv(csv_path)
-    print(f"Number of captions read: {len(captions_df)}")
+    Returns:
+    float: The calculated CLIP score.
+    """
+    try:
+        cat_paths = sorted(glob(f'{image_dir}/*.png'), key=lambda x: int(x.split('_')[-1].split('.')[0]))
+        print(f"Number of image files: {len(cat_paths)}")
+
+        captions_df = pd.read_csv(csv_path)
+        print(f"Number of captions read: {len(captions_df)}")
+
+        if len(cat_paths) != len(captions_df):
+            raise ValueError("The number of images does not match the number of captions.")
 
-    if len(cat_paths) != len(captions_df):
-        print("Error: The number of images does not match the number of captions.")
-    else:
         captions_mapping = {cat_paths[i]: captions_df.iloc[i, 1] for i in range(len(cat_paths))}
         print("Captions mapping created successfully.")
 
         clip_score = calculate_clip_score(cat_paths, captions_mapping=captions_mapping)
-        print(f"CLIP Score: {clip_score}")
-
-except FileNotFoundError:
-    print(f"Error: The file {csv_path} was not found.")
-except pd.errors.EmptyDataError:
-    print("Error: No data found in the CSV file.")
-except Exception as e:
-    print(f"An error occurred: {e}")
+        return clip_score
+
+    except FileNotFoundError:
+        print(f"Error: The file {csv_path} was not found.")
+        return None
+    except pd.errors.EmptyDataError:
+        print("Error: No data found in the CSV file.")
+        return None
+    except Exception as e:
+        print(f"An error occurred: {e}")
+        return None
+
+if __name__ == "__main__":
+    import sys
+    if len(sys.argv) != 3:
+        print("Usage: python calculate_clip_score.py <image_dir> <csv_path>")
+    else:
+        image_dir = sys.argv[1]
+        csv_path = sys.argv[2]
+        score = odeval_clip_score(image_dir, csv_path)
+        if score is not None:
+            print(f"CLIP Score: {score}")
diff --git a/metrics/fid.py b/metrics/fid.py
diff --git a/metrics/fid_score.py b/metrics/fid_score.py
@@ -0,0 +1,28 @@
+from T2IBenchmark import calculate_fid
+from T2IBenchmark.datasets import get_coco_fid_stats
+
+def calculate_fid_score(image_path):
+    """
+    Calculate the FID score for a given path of images.
+
+    Parameters:
+    image_path (str): The path to the directory containing the generated images.
+
+    Returns:
+    float: The calculated FID score.
+    """
+    fid, _ = calculate_fid(
+        image_path,
+        get_coco_fid_stats()
+    )
+    return fid
+
+if __name__ == "__main__":
+    import sys
+    if len(sys.argv) != 2:
+        print("Usage: python calculate_fid_score.py <image_path>")
+    else:
+        image_path = sys.argv[1]
+        print(image_path)
+        fid = calculate_fid_score(image_path)
+        print(f"FID score: {fid}")
diff --git a/metrics/inception_score.py b/metrics/inception_score.py
@@ -4,21 +4,12 @@
 from torch.nn import functional as F
 import torch.utils.data
 import os
-import imageio
-from torchvision.models.inception import inception_v3
-from utils.load_img_data import Dataset
 import numpy as np
 from scipy.stats import entropy
 from tqdm import tqdm
-from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter
-
-
-parser = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter)
-parser.add_argument(
-    "-c", "--gpu", default="0", type=str, help="GPU to use (leave blank for CPU only)"
-)
-parser.add_argument("--path", type=str, default="/path/to/your/output")
-
+from torchvision.models.inception import inception_v3
+import torchvision.transforms as transforms
+from utils.load_img_data import Dataset
 
 def inception_score(imgs, cuda=True, batch_size=32, resize=True, splits=10):
     N = len(imgs)
@@ -45,7 +36,7 @@ def get_pred(x):
         if resize:
             x = up(x)
         x = inception_model(x)
-        return F.softmax(x).data.cpu().numpy()
+        return F.softmax(x, dim=1).data.cpu().numpy()
 
     preds = np.zeros((N, 1000))
 
@@ -69,26 +60,9 @@ def get_pred(x):
 
     return np.mean(split_scores), np.std(split_scores)
 
-
-if __name__ == "__main__":
-
-    class IgnoreLabelDataset(torch.utils.data.Dataset):
-        def __init__(self, orig):
-            self.orig = orig
-
-        def __getitem__(self, index):
-            return self.orig[index][0]
-
-        def __len__(self):
-            return len(self.orig)
-
-    import torchvision.transforms as transforms
-
-    args = parser.parse_args()
-    os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu
-
+def calculate_inception_score(image_dir, cuda=True, batch_size=32, resize=True, splits=10):
     imgs = Dataset(
-        args.path,
+        image_dir,
         transforms.Compose(
             [
                 transforms.ToTensor(),
@@ -97,4 +71,26 @@ def __len__(self):
         ),
     )
 
-    print(inception_score(imgs, cuda=True, batch_size=32, resize=True, splits=10))
+    return inception_score(imgs, cuda=cuda, batch_size=batch_size, resize=resize, splits=splits)
+
+if __name__ == "__main__":
+    from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter
+
+    parser = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter)
+    parser.add_argument("--path", type=str, required=True, help="Path to the directory containing the images")
+    parser.add_argument("--batch_size", type=int, default=32, help="Batch size for data loader")
+    parser.add_argument("--splits", type=int, default=10, help="Number of splits for Inception Score calculation")
+    parser.add_argument("--resize", type=bool, default=True, help="Whether to resize images to 299x299")
+    parser.add_argument("--cuda", type=bool, default=True, help="Whether to use GPU for calculations")
+
+    args = parser.parse_args()
+
+    score, std = calculate_inception_score(
+        args.path, 
+        cuda=args.cuda, 
+        batch_size=args.batch_size, 
+        resize=args.resize, 
+        splits=args.splits, 
+    )
+
+    print(f"Inception Score: {score} ± {std}")
diff --git a/utils/__init__.py → metrics/utils/__init__.py b/utils/__init__.py → metrics/utils/__init__.py
diff --git a/utils/load_img_data.py → metrics/utils/load_img_data.py b/utils/load_img_data.py → metrics/utils/load_img_data.py
diff --git a/...age_sd1_5_enterprise_quality_benchmark.py → ...age_sd1_5_enterprise_quality_benchmark.py b/...age_sd1_5_enterprise_quality_benchmark.py → ...age_sd1_5_enterprise_quality_benchmark.py
diff --git a/sd/text_to_image_sd1_5_quality_benchmark.py → .../text_to_image_sd1_5_quality_benchmark.py b/sd/text_to_image_sd1_5_quality_benchmark.py → .../text_to_image_sd1_5_quality_benchmark.py
diff --git a/...mage_sdxl_enterprise_quality_benchmark.py → ...mage_sdxl_enterprise_quality_benchmark.py b/...mage_sdxl_enterprise_quality_benchmark.py → ...mage_sdxl_enterprise_quality_benchmark.py
diff --git a/sdxl/text_to_image_sdxl_quality_benchmark.py → ...l/text_to_image_sdxl_quality_benchmark.py b/sdxl/text_to_image_sdxl_quality_benchmark.py → ...l/text_to_image_sdxl_quality_benchmark.py
diff --git a/...video_svd_enterprise_quality_benchmark.py → ...video_svd_enterprise_quality_benchmark.py b/...video_svd_enterprise_quality_benchmark.py → ...video_svd_enterprise_quality_benchmark.py
diff --git a/svd/text_to_video_svd_quality_benchmark.py → ...vd/text_to_video_svd_quality_benchmark.py b/svd/text_to_video_svd_quality_benchmark.py → ...vd/text_to_video_svd_quality_benchmark.py
diff --git a/svd/utils.py → models/svd/utils.py b/svd/utils.py → models/svd/utils.py
diff --git a/MS-COCO_val2014_30k_captions.csv → resources/MS-COCO_val2014_30k_captions.csv b/MS-COCO_val2014_30k_captions.csv → resources/MS-COCO_val2014_30k_captions.csv
diff --git a/prompts.txt → resources/prompts.txt b/prompts.txt → resources/prompts.txt
diff --git a/prompts_cn.txt → resources/prompts_cn.txt b/prompts_cn.txt → resources/prompts_cn.txt
diff --git a/pretrained/sac+logos+ava1-l14-linearMSE.pth → resources/sac+logos+ava1-l14-linearMSE.pth b/pretrained/sac+logos+ava1-l14-linearMSE.pth → resources/sac+logos+ava1-l14-linearMSE.pth
diff --git a/scripts/run_kolors_tests.sh b/scripts/run_kolors_tests.sh
@@ -0,0 +1,47 @@
+#!/bin/bash
+
+# Setup script for image generation model benchmarking
+
+# Directories for images
+coco_30k_image_dir="/home/lixiang/data/fid_kolors_torch"
+
+# CSV file path
+csv_path="resources/MS-COCO_val2014_30k_captions.csv"
+
+# Log file path
+log_file="benchmark_log.txt"
+
+# Redirect all output and errors to log file
+exec > >(tee -a "$log_file") 2>&1
+
+echo "Starting the benchmarking process at $(date)"
+
+# Check if image directory exists
+if [ ! -d "$coco_30k_image_dir" ]; then
+    echo "Error: Image directory does not exist: $coco_30k_image_dir"
+    exit 1
+fi
+
+# Check if CSV file exists
+if [ ! -f "$csv_path" ]; then
+    echo "Error: CSV file does not exist: $csv_path"
+    exit 1
+fi
+
+# Execute Python scripts for different metrics and log results
+echo "Calculating FID Score..."
+python3 metrics/fid_score.py "$coco_30k_image_dir"
+
+echo "Calculating Inception Score..."
+python3 metrics/inception_score.py --path "$coco_30k_image_dir"
+
+echo "Calculating CLIP Score..."
+python3 metrics/clip_score.py "$coco_30k_image_dir" "$csv_path"
+
+echo "Calculating Aesthetic Score..."
+python3 metrics/aesthetic_score.py --image_path "$coco_30k_image_dir"
+
+echo "Calculating Structural Similarity Index..."
+python3 metrics/structural_similarity.py --folder1 "$coco_30k_image_dir" --folder2 "$coco_30k_image_dir"
+
+echo "Benchmarking process completed at $(date)"
diff --git a/run_sd_tests.sh → scripts/run_sd_tests.sh b/run_sd_tests.sh → scripts/run_sd_tests.sh
diff --git a/run_sdxl_tests.sh → scripts/run_sdxl_tests.sh b/run_sdxl_tests.sh → scripts/run_sdxl_tests.sh
diff --git a/run_svd_tests.sh → scripts/run_svd_tests.sh b/run_svd_tests.sh → scripts/run_svd_tests.sh