From ec6cf4a54e975ab78662ef15b44cbbe45fcbc8c7 Mon Sep 17 00:00:00 2001 From: choisieun Date: Wed, 18 Dec 2024 15:58:01 +0900 Subject: [PATCH] =?UTF-8?q?Feat:=20EfficientNet-b1=20Multi=20Modal=20Model?= =?UTF-8?q?=20=EC=B6=94=EA=B0=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ...tNet_fine_tuning_cosine-batchup-over.ipynb | 918 ++++++++++++++++++ 1 file changed, 918 insertions(+) create mode 100644 AI/EfficientNet_fine_tuning_cosine-batchup-over.ipynb diff --git a/AI/EfficientNet_fine_tuning_cosine-batchup-over.ipynb b/AI/EfficientNet_fine_tuning_cosine-batchup-over.ipynb new file mode 100644 index 0000000..5cb3811 --- /dev/null +++ b/AI/EfficientNet_fine_tuning_cosine-batchup-over.ipynb @@ -0,0 +1,918 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "de802327", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Defaulting to user installation because normal site-packages is not writeable\n", + "Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com\n", + "Requirement already satisfied: opencv-python==4.8.0.74 in /home/work/.local/lib/python3.10/site-packages (4.8.0.74)\n", + "Requirement already satisfied: numpy>=1.21.2 in /usr/local/lib/python3.10/dist-packages (from opencv-python==4.8.0.74) (1.22.2)\n", + "\u001b[33mDEPRECATION: devscripts 2.22.1ubuntu1 has a non-standard version number. pip 23.3 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of devscripts or contact the author to suggest that they release a version with a conforming version number. Discussion can be found at https://github.com/pypa/pip/issues/12063\u001b[0m\u001b[33m\n", + "\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.2.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpython -m pip install --upgrade pip\u001b[0m\n", + "Found existing installation: scikit-learn 1.2.2\n", + "Uninstalling scikit-learn-1.2.2:\n", + " Successfully uninstalled scikit-learn-1.2.2\n", + "Found existing installation: imbalanced-learn 0.10.1\n", + "Uninstalling imbalanced-learn-0.10.1:\n", + " Successfully uninstalled imbalanced-learn-0.10.1\n", + "Defaulting to user installation because normal site-packages is not writeable\n", + "Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com\n", + "Collecting scikit-learn==1.2.2\n", + " Obtaining dependency information for scikit-learn==1.2.2 from https://files.pythonhosted.org/packages/fa/1e/36d7609e84b50d4a2e5bc43cd5013d9ea885799e5813a1e9cf5bb1afd3f4/scikit_learn-1.2.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata\n", + " Downloading scikit_learn-1.2.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (11 kB)\n", + "Collecting imbalanced-learn==0.10.1\n", + " Obtaining dependency information for imbalanced-learn==0.10.1 from https://files.pythonhosted.org/packages/11/80/911e581a4fc973179e3a48c1272435aa09cce21c12af122c3886d3d35cb5/imbalanced_learn-0.10.1-py3-none-any.whl.metadata\n", + " Downloading imbalanced_learn-0.10.1-py3-none-any.whl.metadata (8.2 kB)\n", + "Requirement already satisfied: numpy>=1.17.3 in /usr/local/lib/python3.10/dist-packages (from scikit-learn==1.2.2) (1.22.2)\n", + "Requirement already satisfied: scipy>=1.3.2 in /usr/local/lib/python3.10/dist-packages (from scikit-learn==1.2.2) (1.11.1)\n", + "Requirement already satisfied: joblib>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from scikit-learn==1.2.2) (1.3.2)\n", + "Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn==1.2.2) (3.2.0)\n", + "Downloading scikit_learn-1.2.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (9.6 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m9.6/9.6 MB\u001b[0m \u001b[31m31.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n", + "\u001b[?25hDownloading imbalanced_learn-0.10.1-py3-none-any.whl (226 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m226.0/226.0 kB\u001b[0m \u001b[31m338.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h\u001b[33mDEPRECATION: devscripts 2.22.1ubuntu1 has a non-standard version number. pip 23.3 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of devscripts or contact the author to suggest that they release a version with a conforming version number. Discussion can be found at https://github.com/pypa/pip/issues/12063\u001b[0m\u001b[33m\n", + "\u001b[0mInstalling collected packages: scikit-learn, imbalanced-learn\n", + "Successfully installed imbalanced-learn-0.10.1 scikit-learn-1.2.2\n", + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.2.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpython -m pip install --upgrade pip\u001b[0m\n", + "Defaulting to user installation because normal site-packages is not writeable\n", + "Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com\n", + "Requirement already satisfied: efficientnet_pytorch in /home/work/.local/lib/python3.10/site-packages (0.7.1)\n", + "Requirement already satisfied: torch in /usr/local/lib/python3.10/dist-packages (from efficientnet_pytorch) (2.1.0a0+32f93b1)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from torch->efficientnet_pytorch) (3.11.0)\n", + "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.10/dist-packages (from torch->efficientnet_pytorch) (4.7.1)\n", + "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch->efficientnet_pytorch) (1.12)\n", + "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch->efficientnet_pytorch) (2.6.3)\n", + "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch->efficientnet_pytorch) (2.11.3)\n", + "Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from torch->efficientnet_pytorch) (2023.6.0)\n", + "Requirement already satisfied: MarkupSafe>=0.23 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch->efficientnet_pytorch) (2.0.1)\n", + "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch->efficientnet_pytorch) (1.3.0)\n", + "\u001b[33mDEPRECATION: devscripts 2.22.1ubuntu1 has a non-standard version number. pip 23.3 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of devscripts or contact the author to suggest that they release a version with a conforming version number. Discussion can be found at https://github.com/pypa/pip/issues/12063\u001b[0m\u001b[33m\n", + "\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.2.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpython -m pip install --upgrade pip\u001b[0m\n", + "Defaulting to user installation because normal site-packages is not writeable\n", + "Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com\n", + "Requirement already satisfied: albumentations in /usr/local/lib/python3.10/dist-packages (1.1.0)\n", + "Requirement already satisfied: numpy>=1.11.1 in /usr/local/lib/python3.10/dist-packages (from albumentations) (1.22.2)\n", + "Requirement already satisfied: scipy in /usr/local/lib/python3.10/dist-packages (from albumentations) (1.11.1)\n", + "Requirement already satisfied: scikit-image>=0.16.1 in /usr/local/lib/python3.10/dist-packages (from albumentations) (0.19.0)\n", + "Requirement already satisfied: PyYAML in /usr/local/lib/python3.10/dist-packages (from albumentations) (6.0.1)\n", + "Requirement already satisfied: qudida>=0.0.4 in /usr/local/lib/python3.10/dist-packages (from albumentations) (0.0.4)\n", + "Requirement already satisfied: opencv-python-headless>=4.1.1 in /usr/local/lib/python3.10/dist-packages (from albumentations) (4.6.0.66)\n", + "Requirement already satisfied: scikit-learn>=0.19.1 in /home/work/.local/lib/python3.10/site-packages (from qudida>=0.0.4->albumentations) (1.2.2)\n", + "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.10/dist-packages (from qudida>=0.0.4->albumentations) (4.7.1)\n", + "Requirement already satisfied: networkx>=2.2 in /usr/local/lib/python3.10/dist-packages (from scikit-image>=0.16.1->albumentations) (2.6.3)\n", + "Requirement already satisfied: pillow!=7.1.0,!=7.1.1,!=8.3.0,>=6.1.0 in /usr/local/lib/python3.10/dist-packages (from scikit-image>=0.16.1->albumentations) (9.0.1)\n", + "Requirement already satisfied: imageio>=2.4.1 in /usr/local/lib/python3.10/dist-packages (from scikit-image>=0.16.1->albumentations) (2.8.0)\n", + "Requirement already satisfied: tifffile>=2019.7.26 in /usr/local/lib/python3.10/dist-packages (from scikit-image>=0.16.1->albumentations) (2022.5.4)\n", + "Requirement already satisfied: PyWavelets>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from scikit-image>=0.16.1->albumentations) (1.1.1)\n", + "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from scikit-image>=0.16.1->albumentations) (23.1)\n", + "Requirement already satisfied: joblib>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from scikit-learn>=0.19.1->qudida>=0.0.4->albumentations) (1.3.2)\n", + "Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn>=0.19.1->qudida>=0.0.4->albumentations) (3.2.0)\n", + "\u001b[33mDEPRECATION: devscripts 2.22.1ubuntu1 has a non-standard version number. pip 23.3 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of devscripts or contact the author to suggest that they release a version with a conforming version number. Discussion can be found at https://github.com/pypa/pip/issues/12063\u001b[0m\u001b[33m\n", + "\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.2.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpython -m pip install --upgrade pip\u001b[0m\n", + "Defaulting to user installation because normal site-packages is not writeable\n", + "Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com\n", + "Requirement already satisfied: opencv-python in /home/work/.local/lib/python3.10/site-packages (4.8.0.74)\n", + "Requirement already satisfied: numpy>=1.21.2 in /usr/local/lib/python3.10/dist-packages (from opencv-python) (1.22.2)\n", + "\u001b[33mDEPRECATION: devscripts 2.22.1ubuntu1 has a non-standard version number. pip 23.3 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of devscripts or contact the author to suggest that they release a version with a conforming version number. Discussion can be found at https://github.com/pypa/pip/issues/12063\u001b[0m\u001b[33m\n", + "\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.2.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpython -m pip install --upgrade pip\u001b[0m\n" + ] + } + ], + "source": [ + "#!pip install -qqq timm torchmetrics\n", + "!pip install opencv-python==4.8.0.74\n", + "!pip uninstall -y scikit-learn imbalanced-learn\n", + "!pip install scikit-learn==1.2.2 imbalanced-learn==0.10.1\n", + "!pip install efficientnet_pytorch\n", + "!pip install albumentations\n", + "!pip install opencv-python" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "df90c1de", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import re\n", + "import glob\n", + "import cv2\n", + "import random\n", + "import time\n", + "import gc\n", + "\n", + "import numpy as np\n", + "import pandas as pd\n", + "\n", + "import matplotlib.pyplot as plt\n", + "\n", + "import torch\n", + "import torch.nn as nn\n", + "import torch.nn.functional as F\n", + "import torch.optim as optim\n", + "from torch.utils.data import Dataset, DataLoader\n", + "\n", + "# torchvision\n", + "import torchvision\n", + "import torchvision.transforms as transforms\n", + "import torchvision.datasets as datasets\n", + "#import torchvision.models as models\n", + "\n", + "# image data augmentation을 위한 albumentations\n", + "import albumentations as A\n", + "from albumentations.pytorch.transforms import ToTensorV2\n", + "\n", + "# timm에서 굉장히 많은 pretrained model을 가져와서 사용할 수 있습니다.\n", + "#import timm\n", + "#from timm import create_model\n", + "from efficientnet_pytorch import EfficientNet\n", + "\n", + "import torchmetrics\n", + "\n", + "from sklearn.preprocessing import LabelEncoder\n", + "\n", + "# Utils\n", + "from PIL import Image\n", + "\n", + "from tqdm.auto import tqdm, trange\n", + "\n", + "from imblearn.under_sampling import RandomUnderSampler\n", + "\n", + "device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n", + "\n", + "import warnings\n", + "warnings.filterwarnings(action='ignore') " + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "27a63ae6", + "metadata": {}, + "outputs": [], + "source": [ + "torch.manual_seed(42)\n", + "torch.cuda.manual_seed_all(42)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "82bfdc8d", + "metadata": {}, + "outputs": [], + "source": [ + "config ={\n", + " 'model': 'efficientnet_b0', \n", + " 'model_save' : './',\n", + " 'sub_path' : './',\n", + " 'data_path' : './data/',\n", + " 'learning_rate': 3e-4, \n", + " 'seed': 42,\n", + " 'img_size': 224,\n", + " 'n_epochs': 5,\n", + " 'ratio': 0.7,\n", + " \"batch_size\": 64,\n", + " \"min_lr\": 1e-6,\n", + " \"T_max\": 10,\n", + " \"weight_decay\": 1e-6,\n", + " \"device\": torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\"),\n", + "}\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "f82b1fb8", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Int64Index: 427993 entries, 0 to 433872\n", + "Data columns (total 7 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 imgID 427993 non-null object\n", + " 1 breed 427993 non-null object\n", + " 2 age 427993 non-null int64 \n", + " 3 gender 427993 non-null object\n", + " 4 species 427993 non-null object\n", + " 5 lesions 427993 non-null object\n", + " 6 image_path 427993 non-null object\n", + "dtypes: int64(1), object(6)\n", + "memory usage: 26.1+ MB\n", + "None\n", + "\n", + "Int64Index: 52939 entries, 0 to 54232\n", + "Data columns (total 7 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 imgID 52939 non-null object\n", + " 1 breed 52939 non-null object\n", + " 2 age 52939 non-null int64 \n", + " 3 gender 52939 non-null object\n", + " 4 species 52939 non-null object\n", + " 5 lesions 52939 non-null object\n", + " 6 image_path 52939 non-null object\n", + "dtypes: int64(1), object(6)\n", + "memory usage: 3.2+ MB\n", + "None\n" + ] + } + ], + "source": [ + "train_csv = pd.read_csv(config['data_path'] + 'train_data.csv')\n", + "val_csv = pd.read_csv(config['data_path'] + 'val_data.csv')\n", + "\n", + "train_csv.drop(columns=['identifier', 'symptoms'], inplace=True)\n", + "val_csv.drop(columns=['identifier', 'symptoms'], inplace=True)\n", + "\n", + "train_csv = train_csv.dropna()\n", + "val_csv = val_csv.dropna()\n", + "\n", + "print(train_csv.info())\n", + "print(val_csv.info())" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "8244d6e0", + "metadata": {}, + "outputs": [], + "source": [ + "# 'breed'가 'D'인 데이터만 필터링\n", + "train_csv = train_csv[train_csv['species'] == 'D']\n", + "val_csv = val_csv[val_csv['species'] == 'D']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "08015fea", + "metadata": {}, + "outputs": [], + "source": [ + "import albumentations as A\n", + "from albumentations.pytorch import ToTensorV2\n", + "import cv2\n", + "\n", + "# Albumentations 변환 설정\n", + "transform = A.Compose([\n", + " A.HorizontalFlip(p=0.5), # 좌우 반전\n", + " A.RandomBrightnessContrast(p=0.2),# 밝기/대비 조정\n", + " A.Rotate(limit=30, p=0.5), # 회전\n", + " A.ShiftScaleRotate(shift_limit=0.05, scale_limit=0.05, rotate_limit=15, p=0.5),\n", + " A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),\n", + " ToTensorV2()\n", + "])\n", + "\n", + "# 소수 클래스만 증강\n", + "augmented_images = []\n", + "augmented_labels = []\n", + "\n", + "for _, row in train_csv[train_csv['lesions'].isin(['A5', 'A6', 'A4'])].iterrows():\n", + " image_path = row['image_path']\n", + " label = row['lesions']\n", + " \n", + " # 원본 이미지를 3배 증강\n", + " for _ in range(3):\n", + " image = cv2.imread(image_path)\n", + " image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)\n", + " augmented = transform(image=image)\n", + " augmented_images.append(augmented['image'])\n", + " augmented_labels.append(label)\n", + "\n", + "# 증강된 데이터 결합\n", + "augmented_df = pd.DataFrame({'image_path': [None] * len(augmented_images), 'lesions': augmented_labels}) # 경로는 None으로 유지\n", + "train_csv = pd.concat([train_csv, augmented_df], ignore_index=True)\n", + "\n", + "print(\"데이터 증강 후 클래스 분포:\")\n", + "print(train_csv['lesions'].value_counts())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8b7ebb22", + "metadata": {}, + "outputs": [], + "source": [ + "rus = RandomUnderSampler(random_state=42)\n", + "\n", + "X_resampled, y_resampled = rus.fit_resample(train_csv.drop(columns=['lesions']), train_csv['lesions'])\n", + "\n", + "train_resampled = pd.concat(\n", + " [pd.DataFrame(X_resampled, columns=['imgID', 'breed', 'age', 'gender', 'image_path']),\n", + " pd.DataFrame(y_resampled, columns=['lesions'])],\n", + " axis=1\n", + ")\n", + "\n", + "X_resampled, y_resampled = rus.fit_resample(val_csv.drop(columns=['lesions']), val_csv['lesions'])\n", + "\n", + "val_resampled = pd.concat(\n", + " [pd.DataFrame(X_resampled, columns=['imgID', 'breed', 'age', 'gender', 'image_path']),\n", + " pd.DataFrame(y_resampled, columns=['lesions'])],\n", + " axis=1\n", + ")\n", + "\n", + "print(train_resampled.head())\n", + "print(val_resampled.head())\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "55d78ae4", + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.preprocessing import MinMaxScaler\n", + "\n", + "def preprocess_dataframe(df):\n", + " \"\"\"\n", + " 데이터프레임에 대해 전처리 수행:\n", + " - 범주형 변수 원핫 인코딩\n", + " - 기존 범주형 컬럼 제거\n", + " - 연속형 변수 스케일링\n", + " \"\"\"\n", + " # 원핫 인코딩 적용할 컬럼\n", + " categorical_cols = ['breed', 'gender', 'lesions']\n", + " df = pd.get_dummies(df, columns=categorical_cols, drop_first=False)\n", + " \n", + " # 스케일링 적용할 컬럼\n", + " scaler = MinMaxScaler()\n", + " if 'age' in df.columns:\n", + " df['age'] = scaler.fit_transform(df[['age']])\n", + " \n", + " return df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9f4d520e", + "metadata": {}, + "outputs": [], + "source": [ + "train_df = preprocess_dataframe(train_resampled)\n", + "val_df = preprocess_dataframe(val_resampled)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "967dde0a", + "metadata": {}, + "outputs": [], + "source": [ + "def find_image_path(row, phase):\n", + " base_path = f'./data/{phase}/image'\n", + " species = str(row['species']) # species를 문자열로 변환\n", + " imgID = row['imgID'] # 이미지 파일 이름\n", + " \n", + " # species 폴더 정의\n", + " if species == 'D': # 반려견\n", + " species_folders = ['반려견_01', '반려견_02'] if phase == 'train' else ['반려견']\n", + " elif species == 'C': # 반려묘\n", + " species_folders = ['반려묘']\n", + " else:\n", + " return None # 잘못된 species 값\n", + " \n", + " # 폴더들에서 이미지 경로 탐색\n", + " for folder in species_folders:\n", + " target_folder = os.path.join(base_path, folder, symptoms)\n", + " \n", + " # symptoms 폴더 내의 하위 폴더를 탐색\n", + " if os.path.exists(target_folder):\n", + " # symptoms 폴더 내의 하위 폴더들을 리스트업\n", + " for sub_folder in os.listdir(target_folder):\n", + " sub_folder_path = os.path.join(target_folder, sub_folder)\n", + " if os.path.isdir(sub_folder_path):\n", + " img_path = os.path.join(sub_folder_path, imgID)\n", + " if os.path.exists(img_path):\n", + " return img_path\n", + " return None # 파일이 없으면 None 반환" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3b4599ec", + "metadata": {}, + "outputs": [], + "source": [ + "size = 224 # 정사각형 이미지 기준 한 변의 길이\n", + "mean = (0.485, 0.456, 0.406)\n", + "std = (0.229, 0.224, 0.225)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5d2e9e55", + "metadata": {}, + "outputs": [], + "source": [ + "class ImageTransform(): \n", + " def __init__(self, resize, mean, std):\n", + " \"\"\"\n", + " 이미지 변환 클래스를 초기화합니다.\n", + " \n", + " Args:\n", + " resize (int): 변환된 이미지의 크기 (정사각형 크기).\n", + " mean (tuple): Normalize 과정에서 사용할 평균값.\n", + " std (tuple): Normalize 과정에서 사용할 표준편차값.\n", + " \"\"\"\n", + " self.data_transform = {\n", + " # 학습 데이터 변환 파이프라인\n", + " 'train': transforms.Compose([\n", + " # 이미지를 랜덤 크기로 잘라내어 학습 데이터 다양화\n", + " transforms.RandomResizedCrop(resize, scale=(0.5, 1.0)),\n", + " # 이미지를 좌우로 랜덤 뒤집기\n", + " transforms.RandomHorizontalFlip(),\n", + " # 밝기, 대비, 채도, 색상을 랜덤하게 변경\n", + " transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),\n", + " # 이미지를 -15도에서 +15도 사이로 랜덤 회전\n", + " transforms.RandomRotation(degrees=15),\n", + " # 이미지를 랜덤하게 왜곡하여 시점 다양화\n", + " transforms.RandomPerspective(distortion_scale=0.5, p=0.5),\n", + " # 이미지를 Tensor로 변환 (HWC -> CHW)\n", + " transforms.ToTensor(),\n", + " # 이미지 정규화 (Normalize)\n", + " transforms.Normalize(mean, std),\n", + " # 이미지의 일부분을 랜덤 삭제하여 특정 패턴 의존 방지\n", + " transforms.RandomErasing(p=0.5, scale=(0.02, 0.33), ratio=(0.3, 3.3))\n", + " ]),\n", + " # 검증 데이터 변환 파이프라인\n", + " 'val': transforms.Compose([\n", + " # 이미지를 224x224으로 조정\n", + " transforms.Resize(224),\n", + " # 이미지를 중앙에서 정사각형으로 잘라냄\n", + " transforms.CenterCrop(resize),\n", + " # 이미지를 Tensor로 변환 (HWC -> CHW)\n", + " transforms.ToTensor(),\n", + " # 이미지 정규화 (Normalize)\n", + " transforms.Normalize(mean, std)\n", + " ])\n", + " }\n", + " \n", + " def __call__(self, img, phase):\n", + " \"\"\"\n", + " 이미지를 변환합니다.\n", + " \n", + " Args:\n", + " img (PIL.Image): 입력 이미지.\n", + " phase (str): 변환 단계 ('train' 또는 'val').\n", + " \n", + " Returns:\n", + " Tensor: 변환된 이미지.\n", + " \"\"\"\n", + " return self.data_transform[phase](img)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2ec8eb74", + "metadata": {}, + "outputs": [], + "source": [ + "class PetSkinDataset(Dataset):\n", + " def __init__(self, dataframe, transform=None, phase='train'):\n", + " self.dataframe = dataframe\n", + " self.transform = transform\n", + " self.phase = phase\n", + " \n", + " # 레이블로 사용할 컬럼 지정\n", + " self.label_columns = [col for col in dataframe.columns if col.startswith('lesions_')]\n", + " \n", + " def __len__(self):\n", + " return len(self.dataframe)\n", + " \n", + " def __getitem__(self, idx):\n", + " row = self.dataframe.iloc[idx]\n", + " \n", + " # 이미지 경로 가져오기\n", + " img_path = row['image_path']\n", + " if pd.isna(img_path):\n", + " raise FileNotFoundError(f\"Image not found for imgID: {row['imgID']}\")\n", + " \n", + " # 이미지 로드\n", + " image = Image.open(img_path).convert('RGB')\n", + " if self.transform:\n", + " image = self.transform(image, phase=self.phase)\n", + " \n", + " # 원핫 인코딩된 레이블 추출\n", + " label_values = row[self.label_columns].values.astype(float)\n", + " label = torch.tensor(label_values, dtype=torch.float32)\n", + " \n", + " # 추가 feature 데이터 (age 등)\n", + " features = row.drop(['imgID', 'image_path'] + self.label_columns).values.astype(float)\n", + " features = torch.tensor(features, dtype=torch.float32)\n", + " \n", + " return image, features, label\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a2f71f2a", + "metadata": {}, + "outputs": [], + "source": [ + "train_dataset = PetSkinDataset(train_df, transform=ImageTransform(size, mean, std), phase='train')\n", + "val_dataset = PetSkinDataset(val_df, transform=ImageTransform(size, mean, std), phase='val')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8da2ca5b", + "metadata": {}, + "outputs": [], + "source": [ + "train_iterator = DataLoader(train_dataset, batch_size=config['batch_size'], shuffle=True)\n", + "valid_iterator = DataLoader(val_dataset, batch_size=config['batch_size'], shuffle=False)\n", + "dataloader_dict = {'train': train_iterator, 'val': valid_iterator}\n", + "\n", + "batch_iterator = iter(train_iterator)\n", + "image, features, label = next(batch_iterator)\n", + "print(features.size())\n", + "#print(label)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "251b756a", + "metadata": {}, + "outputs": [], + "source": [ + "class MultimodalModel(nn.Module):\n", + " def __init__(self, image_model, output_dim):\n", + " super(MultimodalModel, self).__init__()\n", + " \n", + " self.image_model = image_model\n", + " \n", + " # EfficientNet 출력 후 Dropout 추가\n", + " self.image_dropout = nn.Dropout(p=0.5) # Dropout 적용\n", + " \n", + " # 추가 특징을 처리하는 MLP\n", + " self.fc_additional = nn.Sequential(\n", + " nn.Linear(0, 64), # 나중에 forward에서 동적으로 설정\n", + " nn.ReLU(),\n", + " nn.Dropout(p=0.5),\n", + " nn.Linear(64, 64),\n", + " nn.ReLU(),\n", + " nn.Dropout(p=0.5)\n", + " )\n", + " \n", + " # 이미지 특징과 추가 feature 결합 후 예측하는 fully connected layer\n", + " self.fc_combined = nn.Sequential(\n", + " nn.Linear(1000 + 64, 128),\n", + " nn.ReLU(),\n", + " nn.Dropout(p=0.5),\n", + " nn.Linear(128, output_dim)\n", + " )\n", + " \n", + " def forward(self, image, additional_features):\n", + " # EfficientNet에서 이미지 특징 추출\n", + " image_features = self.image_model(image)\n", + " \n", + " # EfficientNet 출력 후 Dropout 적용\n", + " image_features = self.image_dropout(image_features)\n", + " \n", + " # 추가 특징의 입력 크기 동적으로 설정\n", + " self.fc_additional[0] = nn.Linear(additional_features.shape[1], 64).to(additional_features.device)\n", + " \n", + " # 추가 특징 처리\n", + " additional_features = self.fc_additional(additional_features) \n", + " \n", + " # 이미지 특징과 추가 특징 결합\n", + " combined_features = torch.cat((image_features, additional_features), dim=1)\n", + " \n", + " # 결합된 특징을 통해 최종 예측\n", + " output = self.fc_combined(combined_features)\n", + " return output\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8ce582af", + "metadata": {}, + "outputs": [], + "source": [ + "#model = models.efficientnet_b1(pretrained=False)\n", + "model = EfficientNet.from_pretrained('efficientnet-b1')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c27f6012", + "metadata": {}, + "outputs": [], + "source": [ + "# 임의의 입력 텐서 생성 (배치 크기: 1, 채널: 3, 크기: 224x224)\n", + "input_tensor = torch.randn(1, 3, 224, 224)\n", + "\n", + "# 모델에 입력 통과시키기\n", + "output = model(input_tensor)\n", + "\n", + "# 출력 텐서 크기 확인\n", + "print(output.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "39d2699c", + "metadata": {}, + "outputs": [], + "source": [ + "OUTPUT_DIM = 8 # 클래스의 개수\n", + "cnn_model = model\n", + "multi_modal_model = MultimodalModel(cnn_model, OUTPUT_DIM)\n", + "#multi_modal_model = torch.load('EfficientNet_Multi_Modal_model_v2_fine_tuning_CosineLR.pt')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6017c6dd", + "metadata": {}, + "outputs": [], + "source": [ + "optimizer = optim.Adam(cnn_model.parameters(), lr=1e-3)\n", + "scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=100)\n", + "criterion = nn.CrossEntropyLoss()\n", + "\n", + "cnn_model = cnn_model.to(device)\n", + "multi_modal_model = multi_modal_model.to(device)\n", + "criterion = criterion.to(device)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "560b9e57", + "metadata": {}, + "outputs": [], + "source": [ + "def calculate_accuracy(y_pred, y):\n", + " with torch.no_grad():\n", + " # y_pred에서 가장 큰 값을 가진 인덱스를 예측값으로 사용\n", + " _, predicted = torch.max(y_pred, 1)\n", + " \n", + " # 실제 값과 예측 값 비교하여 정확도 계산\n", + " correct = (predicted == y).sum().item() # 맞는 예측의 개수\n", + " accuracy = correct / y.size(0) # 정확도 계산\n", + " \n", + " return accuracy\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "31b8c9d8", + "metadata": {}, + "outputs": [], + "source": [ + "def train(model, iterator, optimizer, scheduler, criterion, device): \n", + " epoch_loss = 0\n", + " epoch_acc = 0\n", + " \n", + " model.train() \n", + " \n", + " accumulation_steps = 2\n", + " optimizer.zero_grad()\n", + "\n", + " # tqdm을 사용하여 iterator를 감싸 진행 바 추가\n", + " for batch_idx, (image, x, y) in enumerate(tqdm(iterator, desc=\"Training\", unit=\"batch\")):\n", + " image = image.to(device)\n", + " x = x.to(device)\n", + " y = y.to(device)\n", + " \n", + " y = torch.argmax(y, dim=1)\n", + " \n", + " y_pred = model(image, x) \n", + "\n", + " loss = criterion(y_pred, y)\n", + " \n", + " acc = calculate_accuracy(y_pred, y)\n", + " \n", + " # Backward\n", + " (loss / accumulation_steps).backward() # 손실을 누적 단계로 나눔 \n", + " \n", + " # Gradient 업데이트는 accumulation_steps마다 수행\n", + " if (batch_idx + 1) % accumulation_steps == 0 or (batch_idx + 1) == len(iterator):\n", + " optimizer.step()\n", + " optimizer.zero_grad() \n", + " \n", + " epoch_loss += loss.item()\n", + " epoch_acc += acc\n", + " if batch_idx % 100 == 0:\n", + " print(f\"Batch {batch_idx} - Loss: {loss.item():.4f}, Accuracy: {acc:.2f}\")\n", + " \n", + " epoch_loss /= len(iterator)\n", + " epoch_acc /= len(iterator) \n", + " return epoch_loss, epoch_acc, y_pred" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1f28f2b8", + "metadata": {}, + "outputs": [], + "source": [ + "def evaluate(model, iterator, criterion, device): \n", + " epoch_loss = 0\n", + " epoch_acc = 0\n", + " \n", + " model.eval() \n", + " with torch.no_grad(): \n", + " # tqdm을 사용하여 iterator를 감싸 진행 바 추가\n", + " for (image, x, y) in tqdm(iterator, desc=\"Evaluating\", unit=\"batch\"):\n", + " image = image.to(device)\n", + " x = x.to(device)\n", + " y = y.to(device)\n", + " \n", + " y = torch.argmax(y, dim=1)\n", + " \n", + " y_pred = model(image, x)\n", + " \n", + " loss = criterion(y_pred, y)\n", + " \n", + " acc = calculate_accuracy(y_pred, y)\n", + "\n", + " epoch_loss += loss.item()\n", + " epoch_acc += acc\n", + " \n", + " epoch_loss /= len(iterator)\n", + " epoch_acc /= len(iterator) \n", + " \n", + " return epoch_loss, epoch_acc, y_pred\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2627823f", + "metadata": {}, + "outputs": [], + "source": [ + "def epoch_time(start_time, end_time):\n", + " elapsed_time = end_time - start_time\n", + " elapsed_mins = int(elapsed_time / 60)\n", + " elapsed_secs = int(elapsed_time - (elapsed_mins * 60))\n", + " return elapsed_mins, elapsed_secs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c73083d3", + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "best_valid_loss = float('inf')\n", + "EPOCHS = 100\n", + "softmax = nn.Softmax(dim=1)\n", + "cnt = 0\n", + "\n", + "label_names = ['lesions_A1', 'lesions_A2', 'lesions_A3', 'lesions_A4', 'lesions_A5', 'lesions_A6', 'lesions_A7']\n", + "\n", + "# Loss와 Accuracy를 저장할 리스트 초기화\n", + "train_losses = []\n", + "valid_losses = []\n", + "train_accuracies = []\n", + "valid_accuracies = []\n", + "\n", + "# tqdm을 사용해 전체 에폭에 대한 진행 상태 표시\n", + "for epoch in tqdm(range(EPOCHS), desc=\"Training Progress\", unit=\"epoch\", leave=True):\n", + " start_time = time.monotonic()\n", + "\n", + " train_loss, train_acc, logits = train(multi_modal_model, train_iterator, optimizer, scheduler, criterion, device)\n", + " valid_loss, valid_acc, logits = evaluate(multi_modal_model, valid_iterator, criterion, device)\n", + " \n", + " # 기록 저장\n", + " train_losses.append(train_loss)\n", + " valid_losses.append(valid_loss)\n", + " train_accuracies.append(train_acc)\n", + " valid_accuracies.append(valid_acc)\n", + " \n", + " # 학습률 스케줄러 업데이트\n", + " scheduler.step() # 매 에폭이 끝난 후 호출\n", + " \n", + " if valid_loss < best_valid_loss:\n", + " best_valid_loss = valid_loss\n", + " torch.save(multi_modal_model, 'EfficientNet_Multi_Modal_model_v2_fine_tuning_CosineLR_batch256.pt')\n", + " cnt = 0\n", + " else:\n", + " cnt += 1\n", + " if cnt == 10:\n", + " print('Early Stopping')\n", + " break\n", + " \n", + " end_time = time.monotonic()\n", + " epoch_mins, epoch_secs = epoch_time(start_time, end_time)\n", + " \n", + " probabilities = softmax(logits)\n", + " \n", + " print(f'Epoch: {epoch+1:02} | Epoch Time: {epoch_mins}m {epoch_secs}s')\n", + " print(f'\\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:6.2f}%')\n", + " print(f'\\tValid Loss: {valid_loss:.3f} | Valid Acc: {valid_acc*100:6.2f}%')\n", + " #for i, label_name in enumerate(label_names):\n", + " # 각 클래스에 대해 평균 확률을 출력\n", + " #print(f\"{label_name}: {probabilities[:, i].mean().item():.4f}\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8e7684c0", + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "\n", + "# Loss 그래프\n", + "plt.figure(figsize=(10, 5))\n", + "plt.plot(train_losses, label='Train Loss', marker='o') # 점 추가\n", + "plt.plot(valid_losses, label='Validation Loss', marker='o') # 점 추가\n", + "plt.xlabel('Epoch')\n", + "plt.ylabel('Loss')\n", + "plt.title('Train and Validation Loss')\n", + "plt.legend()\n", + "plt.show()\n", + "\n", + "# Accuracy 그래프\n", + "plt.figure(figsize=(10, 5))\n", + "plt.plot(train_accuracies, label='Train Accuracy', marker='o') # 점 추가\n", + "plt.plot(valid_accuracies, label='Validation Accuracy', marker='o') # 점 추가\n", + "plt.xlabel('Epoch')\n", + "plt.ylabel('Accuracy')\n", + "plt.title('Train and Validation Accuracy')\n", + "plt.legend()\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d287b6a5", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "PyTorch 2.1 (NGC 23.09/Python 3.10) on Backend.AI", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}