From 416376b36ebc0bb9449d98a38ddac47aedf2c502 Mon Sep 17 00:00:00 2001 From: Haesun Park Date: Wed, 17 Feb 2021 23:45:10 +0900 Subject: [PATCH] =?UTF-8?q?=EC=BD=94=EB=9E=A9,=20=ED=85=90=EC=84=9C?= =?UTF-8?q?=ED=94=8C=EB=A1=9C=202.4=EC=97=90=EC=84=9C=20=EC=8B=A4=ED=96=89?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- 11_training_deep_neural_networks.ipynb | 10667 +++++++++++++---------- 1 file changed, 5840 insertions(+), 4827 deletions(-) diff --git a/11_training_deep_neural_networks.ipynb b/11_training_deep_neural_networks.ipynb index 26ab2d5fc..470e5dc4f 100644 --- a/11_training_deep_neural_networks.ipynb +++ b/11_training_deep_neural_networks.ipynb @@ -1,4830 +1,5843 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "**11장 – 심층 신경망 훈련하기**" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "_이 노트북은 11장에 있는 모든 샘플 코드와 연습문제 해답을 가지고 있습니다._" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - " \n", - "
\n", - " 구글 코랩에서 실행하기\n", - "
" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# 설정" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "먼저 몇 개의 모듈을 임포트합니다. 맷플롯립 그래프를 인라인으로 출력하도록 만들고 그림을 저장하는 함수를 준비합니다. 또한 파이썬 버전이 3.5 이상인지 확인합니다(파이썬 2.x에서도 동작하지만 곧 지원이 중단되므로 파이썬 3을 사용하는 것이 좋습니다). 사이킷런 버전이 0.20 이상인지와 텐서플로 버전이 2.0 이상인지 확인합니다." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "# 파이썬 ≥3.5 필수\n", - "import sys\n", - "assert sys.version_info >= (3, 5)\n", - "\n", - "# 사이킷런 ≥0.20 필수\n", - "import sklearn\n", - "assert sklearn.__version__ >= \"0.20\"\n", - "\n", - "# 텐서플로 ≥2.0 필수\n", - "import tensorflow as tf\n", - "from tensorflow import keras\n", - "assert tf.__version__ >= \"2.0\"\n", - "\n", - "%load_ext tensorboard\n", - "\n", - "# 공통 모듈 임포트\n", - "import numpy as np\n", - "import os\n", - "\n", - "# 노트북 실행 결과를 동일하게 유지하기 위해\n", - "np.random.seed(42)\n", - "\n", - "# 깔끔한 그래프 출력을 위해\n", - "%matplotlib inline\n", - "import matplotlib as mpl\n", - "import matplotlib.pyplot as plt\n", - "mpl.rc('axes', labelsize=14)\n", - "mpl.rc('xtick', labelsize=12)\n", - "mpl.rc('ytick', labelsize=12)\n", - "\n", - "# 그림을 저장할 위치\n", - "PROJECT_ROOT_DIR = \".\"\n", - "CHAPTER_ID = \"deep\"\n", - "IMAGES_PATH = os.path.join(PROJECT_ROOT_DIR, \"images\", CHAPTER_ID)\n", - "os.makedirs(IMAGES_PATH, exist_ok=True)\n", - "\n", - "def save_fig(fig_id, tight_layout=True, fig_extension=\"png\", resolution=300):\n", - " path = os.path.join(IMAGES_PATH, fig_id + \".\" + fig_extension)\n", - " print(\"그림 저장:\", fig_id)\n", - " if tight_layout:\n", - " plt.tight_layout()\n", - " plt.savefig(path, format=fig_extension, dpi=resolution)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# 그레이디언트 소실과 폭주 문제" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "def logit(z):\n", - " return 1 / (1 + np.exp(-z))" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "그림 저장: sigmoid_saturation_plot\n" - ] - }, - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAagAAAEYCAYAAAAJeGK1AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAgAElEQVR4nOzdd3wUxfvA8c8kl54AoYVelBqqFKUIiRSRonQVKUFQpNgoIoIgiIqgtK9Y8CcYiiBdKYKgEEBAIWAChBJFqoQSIEBC+s3vjz1iygUCXHIpz/v12ldyu3M7z20u99zszs4orTVCCCFEbuNg7wCEEEIIayRBCSGEyJUkQQkhhMiVJEEJIYTIlSRBCSGEyJUkQQkhhMiVJEGJB6KUClJKzbF3HJC1WJRSh5VSE3MopNT1Biql1udAPf5KKa2UKp4DdQ1SSp1RSpntcUzTxdJfKRVtzxiE7Sm5D0pkRilVApgEdABKA1HAYeBjrfUWS5miQKLW+qbdArXISixKqcPASq31xGyKwR/YBpTQWkemWl8Y4/8tyoZ1nQLmaK0/TbXOGSgKXNTZ+M+tlPIGLgEjgJXATa11jiQIpZQGemqtV6Za5wZ4aa0v5UQMImeY7B2AyNVWAe7AQOBvoCTgBxS7XUBrfdU+oWWUm2JJT2t9PYfqSQAu5EBVFTE+P9ZrrSNyoL470lrHArH2jkPYmNZaFlkyLEARQANt7lIuCONb/O3HPsBajA+L08CLGK2uianKaGAI8CNwCwgHngDKAT8DMUAI0CBdXd2AQ0A8cBYYh+UsQCaxlLTUcTuWAeljsfJ6HrY854IljgNAp3RlnIGPLPuMB/4BXgcqWV5b6iXQ8pxAjA9zgEHARcAx3X6XAGuzEofltaapy7Le3/K4+D0ct1PAu8Bc4AZwDnjrDseov5XXWQmYCBy2UjY61eOJlr/B88AJ4CbwQ+p4LeUCUsV8EViQKtbU9Z6yVo9l3SsYX6wSLD9fTrddW/4WKyzH+B+gj73/92T5b5FrUCIz0ZblGaWU6z08bwHGt+tWQGegj+Vxeu8C3wP1gGDL7/OAL4BHgPMYH+oAKKUaYnyQrAbqAGOAd4BX7xBLIFAFaAN0AfphfJDeiSewEWhriW0VsFopVSPda+yHcXqrJkYLMwrjw7+7pUwtjNOib1ipYwVQ2FLH7dfniXG8Fmcxjm4YieR9Sz2lrb2YezhuwzESQgNgKjBNKdXU2j6BZcBTlt8ftdR9NpOy1lQCngO6Ak9i/L0/TBXzKxjJ8lugLsYp5sOWzY0tP1+21Hv7cRpKqa7AHGAWUBuYDXyhlHo6XdEJGF8E6lle13ylVIV7eC0iO9k7Q8qSexeMD9urQBywB/gUeCxdmSAsrRagOsa30iaptpcHksnYgpqS6nFty7oRqdb5k6olAHwHbE1X90TgXCaxVLM8v3mq7RXTx5LF4/A78K7l96qW/T6VSdk0cadaH4ilBWV5vBpYlOpxH+A64JqVOCyPTwGj7lR/Fo/bKWBpujJ/pa7LSiyNLPVUSrffrLSg4oDCqdaNA/5O9fgcxnXOzOrWQI+71LMLmG/lb/DbHd6HJowWvbSicskiLSiRKa31KqAM8DTGt/lmwO9KqbGZPKUGYMZoEd3ex1mM1lB6B1P9ftHy85CVdSUtP2tifOik9htQVilVyMr+a1pi2ZsqltOZxJJCKeWhlJqmlDqilLpm6RnWCLj9rfoRy3633Wk/WbAY6KKUcrc87g2s0lrHZTGOrMrqcTuYrsx5/jv2tnZap70ml1KXUqokUBb49QHryOx1+6Zbl/K6tdZJwGWy73WLeyQJStyR1jpOa71Fa/2+1roZxmm4iZbeYg8iMXU1d1iXlffonXqr3WtPtk+BnsB4jA4h9TGS3IO+3vQ2AElAZ8uHchv+O72XU3GkPjaJVrbd6+eDGVDp1jlZKWeLuu5X+veDPWMRdyF/CHGvjmCcCrF2XeoYxnuq4e0VSqlyGK2wB3UUaJ5u3eMYp6qsdSu/HcujqWKpkIVYHgcWaq1Xaa0PYpxuejjV9hDLfp/I5PkJlp+Od6pEax2PcW2oN8b1mAsYpyizGsftuu5YD/d+3B7EZcBHKZU6SdW/lx1oo5v4v0DrOxRL5P5f95F7iUfYlyQoYZVSqphSaqtSqo9Sqq5SqrJSqicwGvhVa30j/XO01scxeuF9pZRqopSqj3Gh+xb33pJJbzrgp5SaqJSqppTqDYwEplkrbIllEzBXKdXUEksgd++KHA50VUo1UErVwWjVpCRjrXU4sBz4RinV3XJcWiil+lqKnMZ4rR2VUiUsnR8ysxhoBwzGuAZkzmocFqeAFkqpsne4MfeejtsDCsK4B2usUuphpdRAoMd97OdD4E2l1HBLzPWVUiNTbT8FtFZKlbLcj2XNJ0BfpdQwpVRVpdRrGF8GsuN1i2wiCUpkJhrjovwbwHYgDKNr9RKMb/yZ6Y/xbT8Io7v5dxg3dMY9SDBa6wMYp7y6Y7lZ2LLcaeSI/sBJYCuwzhL7qbtUNcIS706M626/W35PrZ9lX//DaKkFYvTKQ2v9L/AexofsxbvEtxOjteBL2tN7WY1jAkYnlBMYrZcM7vO43Ret9VGM2wcGYVzbaYvxnrnX/XwJDMPoqXcY44tGrVRFRmK0YM8Cf2ayjx+A1zB6Jx7BeB8P1Vqvu9d4hP3ISBIiW1m+2Z8Helk6XQghRJbISBLCppRSrQAvjB55JTFaEpEY34KFECLLbHaKTyn1qlIqWCkVr5QKvEO5AKXUfqXUDaXUOUtXWkmU+YcT8AFGglqHcf2ppdY6xq5RCSHyHJud4lNKdcPoZtoOcNNa98+k3BCM88p/ACUwrlOs0Fp/bJNAhBBC5As2a7lorVcDKKUaYYypllm5L1M9/Fcp9R2Zd9kVQghRQOWGU2stMXqIWaWUGoTRKwg3N7eG5cuXz6m4ssxsNuPgIB0i70aOU9acPXsWrTUVKsiQcFlhz/dVkk7ClIeuUOTW/8Hw8PBIrXWJ9OvtemSVUgMwhm95KbMyWuuvga8BGjVqpIODgzMrajdBQUH4+/vbO4xcT45T1vj7+xMVFUVISIi9Q8kTcvJ9dSP+Bi+tfYmpbaZS2btyjtRpS7n1f1ApddraerulUqVUF2AK0F6nmthNCCFyo7ikOLp834U1x9YQfiXc3uEUCHZpQSmlngL+D+iotT50t/JCCGFPyeZkeq/uzbZT21jcdTHtqrSzd0gFgs0SlKWruAljjCxHyxxCSZYRglOXa4UxukBXrfXejHsSQojcQ2vN0A1DWX10NbPazaJ33d72DqnAsOUpvncxxjkbgzG3TSzwrlKqglIqOtUkYOMxhoX5ybI+Wim10YZxCCGEzUQnRHPgwgHGPj6WN5pYm39SZBdbdjOfiDEZmTWeqcpJl3IhRJ6gtcbLxYvt/bfjZnKzdzgFTu7rbyiEELnAkkNL6LikIzEJMbg7uZN2FhGREyRBCSFEOpv+3kTADwHcSryFo8Pdpp4S2UUSlBBCpPLHuT/ovrw7tUvW5sfnf8TVZG1uTpETJEEJIYTF0ctH6bCkA6U9S7Op9yYKuxa2d0gFmiQoIYSwSEhOoELhCmzuuxkfTx97h1Pg5Z1BpIQQIpvEJcXhanKlXql6HBh0QDpE5BLSghJCFGjRCdH4B/ozfut4AElOuYgkKCFEgZWQnECP5T3Yd34fDcs0tHc4Ih05xSeEKJDM2kz/H/rz84mf+ebpb+hSo4u9QxLpSAtKCFEgjfh5BEsPL2VK6ykMbDDQ3uEIK6QFJYQokJqVb4aryZW3m79t71BEJiRBCSEKlIibEZT2Ks2ztZ7l2VrP2jsccQdyik8IUWCsOrKKh/73ENtObrN3KCILJEEJIQqEbSe38cLqF2hQugGPlXvM3uGILJAEJYTI9w5EHKDz952pWrQq63qtw93J3d4hiSyQBCWEyNcibkbw1OKn8Hbz5uc+P1PUrai9QxJZJJ0khBD5WinPUrz+2Ov09O1J2UJl7R2OuAeSoIQQ+VJUXBRXbl3h4aIP827Ld+0djrgPcopPCJHvxCbG8vTSp3liwRPEJcXZOxxxn6QFJYTIV5LMSTy38jl2ndnF9z2+lwkH8zBJUEKIfENrzcvrXmZd+Dq+6PCF3Iibx8kpPiFEvvHFvi8IDAlkot9EhjQeYu9wxAOSFpQQIt/oX78/jg6OvNLwFXuHImzApi0opdSrSqlgpVS8UirwLmWHK6UuKKVuKKXmK6VcbBmLEKLg2PT3Jm7E38DD2YPBjQbLpIP5hK1P8Z0HPgDm36mQUqodMAZoDVQEHgIm2TgWIUQBsOfKHjot6cSEbRPsHYqwMaW1tv1OlfoAKKe17p/J9iXAKa31WMvj1sB3WutSd9qvl5eXbtgw7ayXzz77LEOHDuXWrVt06NAhw3P69+9P//79iYyMpEePHhm2DxkyhOeee46zZ8/St2/fDNtHjhzJ008/zfHjx3nllYynDd59911MJhNFihThzTffzLD9o48+olmzZuzevZuxY8dm2D5r1izq16/PL7/8wgcffJBh+9y5c6levTrr1q1j+vTpGbYvWrSI8uXLs2zZMr788ssM21euXEnx4sUJDAwkMDAww/affvoJd3d3vvjiC5YvX55he1BQEACffvop69evT7PNzc2NjRs3AjB58mR+/fXXNNuLFSvGqlWrAHjnnXfYuHEjRYoUSdlerlw5Fi9eDMCbb75JSEhImudXq1aNr7/+GoBBgwYRHh6eZnv9+vWZNWsWAH369OHcuXNptjdt2pQpU6YA0L17d65cuZJme+vWrRk/3pjmu3379sTGxqbZ3qlTJ0aNGgWAv79/hmOTXe+9kJAQkpKSWLp06V3fe23atCEkJKTAvvd+O/Mb/vP8cY92p25IXUzJxlWL9O+9PXv2pHl+QX3vRUVFUaRIEZt87tnyvbd9+/b9WutG6cvZ6xpULeDHVI9DAR+lVDGtdZq/pFJqEDAIwMnJiaioqDQ7Cg8PJygoiLi4uAzbAI4dO0ZQUBDXr1+3uj0sLIygoCAuXbpkdfuhQ4fw8vLizJkzVreHhoZSvXp1/v77b6vbDxw4QEJCAocPH7a6PTg4mKioKEJDQ61u/+OPP4iIiODQoUNWt+/Zs4cTJ04QFhZmdfuuXbsoXLgwx44ds7p9x44duLq6Eh4ebnX77Q+JEydOZNgeGxubsv3kyZMZtpvN5pTtZ86cITk5OU0ZJyenlO3nzp3L8Pzz58+nbD9//nyG7efOnUvZfvHixQzbz5w5k7L98uXL3LhxI832kydPpmy/evUq8fHxabafOHEiZbu1Y5Nd772kpCS01ll675lMpgL73pu/fj5vhL6Be5I7FXZWIDohOmV7+vde+ucX1Pfe7f/BB/3cCwkJJTHRhbCwM1y8WIjkZHfMZle0dsNsduH//u8mP/54jJMnEwgPfxqtXTCbjW1auzB0qDvOzpe4dKkyZ89+DDTNUAfYrwV1Ahimtd5keewEJACVtdanMttvo0aNdHBwsM3jfVBBQUFWv+WItOQ4ZY2/vz9RUVEZvtWL/2iteeybx/j35r9M953O8089b++Q8oSgoCD8/PyJjYWrV9MuV64YP69dg5s3jeXGjf9+T70uOhpsmzpUrmpBRQOFUj2+/ftNO8QihMhjlFIs77mcmIQYLh+5bO9w7C4uDi5ehAsX/vuZ+vfISCMBXbjQlOhoSNdguy9ubuDl9d/i7m6sS79ktj710q6d9TrslaDCgHrA7RPP9YCL6U/vCSFEajfib/B/+/+P4U2HU6lIJQCCjgTZNabsFhcHZ88ay5kzaZezZyEiAq5fz+rejM7Szs5QrBgULZpx8faGQoXSJp/0j728wPQA2SMhIYHvvvuOHj36YrrDjmyaoJRSJss+HQFHpZQrkKS1TkpXdCEQqJT6DqPn37tAoC1jEULkL3FJcXT5vgs7Tu/Av5I/Dcs0vPuT8gCtjRbOX3+lXf75x0hCly7dfR8mE/j4QKlSxnL799s/ixc3ElJ4+B46dmyKmxvYqyf+P//8w9NPP82RI0do06YN5cuXz7SsrVtQ7wLvpXrcB5iklJoPHAF8tdZntNablFLTgG2AG7Aq3fOEECJFsjmZPqv7sO3UNhZ2WZgnk5PWcPo0HD4Mhw4ZP8PDjWR0pxaQyQTlykGFChmX8uWhdGmj1eOQhZuGrl2Lx92OczUuW7aMgQMHEhsbi7u7+13vV7NpgtJaTwQmZrLZM13ZGcAMW9YvhMh/tNYM+2kYq46uYsaTM+hbL2O36NwmJgb+/NNYbiejw4eNTgbWeHlB1arGUq2a8fPhh6FiRaMF5OiYs/HbWlxcHEOHDmXZsmXcunUrZb3DXbKqDHUkhMjVjkYeJTAkkDHNxzC86XB7h5NBYqKRfPbuhX37jJ9hYWA2ZyxbsiTUqQO1axtLjRpGMipZ0n6n3LLbsWPH6NSpE+fPn09zv5fWOmdbUEIIYWu+JXz585U/qVG8hr1DAYzTcbt2wY4dsHMnHDhgdGRIzdER6teHhg2NhHQ7KZUsaZ+Y7WXBggUMHTqU2NhYrN3SJC0oIUSe9P3h70lITqBfvX7ULFHTbnHcvAnbtkFQEGzfDiEhGVtHVarAo49C48bGz/r1seu1HnuLjo5m4MCBrF+/Ps0pvdSkBSWEyJN+/vtn+q7pS/PyzelTtw8OKudmBjKb4eBB2LQJfv7ZaC0lJv633WSCxx6Dli2NpUkTo3u2MBw8eJBOnTpx+fJl4tI3LdORBCWEyFP2/ruX7su7U6tELX58/sccSU5xcfDrr7BmDaxfb9zgepuDg5GE2rYFPz/jdw+PbA8pT1qxYgV9+/bNMHSTNVprOcUnhMg7jkUeo8N3HfDx9GFTn00Udi2cbXXduAE//WQkpZ9+Mobvua1sWWN0g6eegtatpYWUVd7e3hQtWpQbN24QExNz1/KSoIQQecamvzdhcjCxuc9mSnnecXKD+xIfDxs3wuLFRksp9Rf9+vWha1fo0sXo1JBfe9VlpzZt2nDmzBkWLlzIO++8Q3R0tFyDEkLkD282eZO+dftSzL2YzfapNezeDYsWwfLlxmCoYCSgFi3+S0qVK9usygLNZDIxYMAAjh49ymeffXbHstKCEkLkajEJMTy38jkm+E3g0bKP2iw5Xb4MgYEwdy6cOPHf+nr1oE8f6NXLOJUnbC8yMpLPP/88zbUoZ2dnnJycUk79ZaUFlXNdY4QQIp2E5AS6L+/Oxr83cv7m+Qfen9bG/UkvvGAMDzR6tJGcypY1fj940OgmPmqUJKfs9MEHH2BO1xffwcGBsWPHUqRIEdzd3UlOTr5rC0oSlBDCLszazIs/vsjPJ37m605f06VGl/veV2wsfPWVcTOsnx8sXWp0De/UybjWdPo0TJ1qXFsS2evixYt8/fXXGVpPAQEBjB07lvPnzzN58mTq1KmDs7PzHfclp/iEEDlOa83wTcNZcmgJU1pPYWCDgfe1n8hIWLCgIs8+a5zSA2Pw1JdeMpYKFWwYtMiSyZMnk5ycnGado6MjEydOBMDNzY0RI0YwYsSIu+5LEpQQIsclmZM4df0Uw5sM5+3mb9/z80+cgBkz4NtvITbW6N3QqJFx6q5bN3BysnXEIisiIiKYN28eCQkJKeucnZ0ZMGAApUrde69MSVBCiByVbE7GydGJVc+uwkE53PVCeWr//APvv2/0yLt9ieOxx67w8cfF8POTruH2NnHixAzXnhwdHRk/fvx97U+uQQkhcszqo6tp/H+NuRh9EZODKcujRJw+DS+/DNWrw4IFxugO/fsbo4h//PEh/P0lOdnbuXPnWLhwYZrWk4uLC4MGDcLHx+e+9ikJSgiRI7ad3EavVb1wNbni6ex59ycA//4LQ4caU1J8843RaurfH44fN07v1aqVvTGLrHvvvfcyXHtycHDg3Xffve99yik+IUS2+zPiTzp/35kqRauw/oX1eDjfeTC7mBj45BOYNs3ooacU9O4NEyYYE/qJ3OXMmTMsWbKExFSj6rq4uDBs2DCKFy9+3/uVBCWEyFZ/X/2bp757Cm83b37u8zNF3TIf2M5shiVLYMwYo/UERqeHyZPB1zeHAhb3bPz48VZ77r3zzjsPtF85xSeEyFauJldqFq/J5j6bKVeoXKbldu82Rgrv29dITg0aGPMvrVolySk3O3XqFMuXL0/TenJ1deX111+n6AOOsistKCFEtohOiMbN5Ea5QuXYFrAt0956kZEwciQsXGg8Ll0aPvoI+vUzOkOI3G3cuHEkJSWlWefo6Mjo0aMfeN/y5xdC2FxsYiwdvutAwA8BgPWJ6bQ2euTVqGEkJxcXePddCA83OkJIcsr9Tpw4werVq9MkKDc3N4YPH463t/cD719aUEIIm0oyJ/H8quf57cxvfN/je6tl/voLBg+GrVuNx61aGUMVVa2ag4GKB/bOO++kObUHRutp1KhRNtm/fEcRQtiM1ppB6wax9vha5nSYw7O1nk2zPTERPvzQGBNv61YoVsxoRf3yiySnvCY8PJx169al6Rzh5ubGW2+9ReHCtplo0qYJSilVVCm1RikVo5Q6rZR6IZNyLkqpr5RSF5VSV5VS65RSMrawEHnchG0T+DbkW97ze4+hjYem2Xb8ODRvbpzGi4+HgAA4dsy41iQ32eY91lpPJpOJ4cOH26wOW5/i+xxIAHyA+sAGpVSo1josXbk3gKZAXeA68DXwGdDNxvEIIXLQU1WeIiE5gff83ktZZzbDF18Y013ExkL58jB/PrRpY8dAxQO5efMmP/zwQ5phjdzd3RkzZgxeXl42q8dmLSillAfQHRivtY7WWv8GrAX6WileGfhZa31Rax0HLAPknnAh8qgTV40ZAZtXaM7UtlNTOkX8+y889RS89pqRnPr1g0OHJDnldV5eXmzfvp3GjRvj4WHcdG0ymXj99ddtWo8tW1DVgCStdXiqdaGAn5Wy84DZSqkyQBTQG9hobadKqUHAIAAfHx+CgoJsGLJtREdH58q4chs5TlkTFRVFcnJynjlWe67sYXzYeN6p8Q6tS7ZOWb9tWwlmzKhGdLQThQolMmLEcfz8IvnzT9vWL++rrLP1sZo2bRohISF89dVXtGvXjuDgYJvtGzAuatpiAVoAF9KtexkIslK2MPA9oIEk4E+g6N3qaNiwoc6Ntm3bZu8Q8gQ5Tlnj5+en69WrZ+8wsuS3079ptw/cdMO5DfWNuBtaa61jY7UeMkRroyO51h06aB0RkX0xyPsq63LrsQKCtZXPfFt2kogGCqVbVwi4aaXs54ALUAzwAFaTSQtKCJE7Hb50mE5LO1G+cHk29t6Il4sXJ05As2bw5Zfg7Axz5hgz2t7HVEBC2DRBhQMmpVTqzqL1gPQdJMDoQBGotb6qtY7H6CDxqFLq/kcVFELkmJvxN2m3uB3uTu5s7rOZEh4lWLXKGJ7ozz/hoYeMoYuGDZMeeuL+2SxBaa1jMFpC7yulPJRSzYHOwCIrxfcB/ZRShZVSTsBQ4LzWOtJW8Qghso+XixcfPPEBP/f5mdLuFXnjDejRA27cMAZ3PXAAGja0d5Qir7P1jbpDATfgErAUGKK1DlNKtVBKRacqNwqIA/4CLgMdgK42jkUIYWM3428SfN64EP7iIy9SQtemVSv43/+MadZnz4aVK8FG92mKAs6m90Fpra8CXays3wl4pnp8BaPnnhAij4hPiqfLsi4Enw/m5BsnOXW0KF26wNmzULYsrF4Njz5q7yhFev7+/tSuXZs5c+bYO5R7JkMdCSHuKtmcTJ81fdh6ciuftf+MLWuL8vjjRnJq2hSCg/NXcrp8+TJDhw6lUqVKuLi44OPjQ+vWrdmyZUuWnh8UFIRSisjInLtqERgYiKdnxpmKV69ezZQpU3IsDluSwWKFEHektWbYT8NYeWQln7SZTviKfnz4obHtxReNHnsuLvaN0da6d+/OrVu3mDdvHlWqVOHSpUts376dK1eu5HgsCQkJODs73/fzH3ROJnuSFpQQ4o6Why1n7v65DG/wLjunjeDDD42pMGbNgnnz8l9yioqKYufOnXz88ce0bt2aihUr0rhxY0aNGsXzzz8PwOLFi2ncuDFeXl6ULFmSnj178q9lCuBTp07xxBNPAFCiRAmUUvTv3x8wTre9+uqraerr378/nTp1Snns7+/PkCFDGDVqFCVKlKB58+YAzJgxg7p16+Lh4UHZsmV56aWXiIqKAowW24svvkhMTAxKKZRSTJw40WqdlSpV4oMPPuCVV16hUKFClCtXjk8++SRNTOHh4fj5+eHq6kr16tX56aef8PT0JDAw0DYHOYskQQkh7qiHbw8+a7GCHZPeZ+1a8PaGTZvgjTfyZxdyT09PPD09Wbt2LXFxcVbLJCQkMGnSJEJDQ1m/fj2RkZH06tULgPLly7Nq1SoAwsLCiIiIYPbs2fcUw+LFi9Fas3PnThZaZnJ0cHBg1qxZhIWFsWTJEvbu3ctrr70GQLNmzZg1axbu7u5EREQQERFxxykvZs6cSZ06dThw4ABvv/02o0ePZs+ePQCYzWa6du2KyWTi999/JzAwkEmTJhEfH39Pr8EW5BSfEMKq9eHrqV+qPtHnyzF9YA9OnTLub9q4EapVs3d02cdkMhEYGMjLL7/M119/zSOPPELz5s3p2bMnjz32GAADBgxIKf/QQw/x5ZdfUrNmTc6dO0e5cuVSTquVLFmS4sXv/fbOypUrM3369DTr3nzzzZTfK1WqxLRp0+jcuTMLFizA2dmZwoULo5SiVBbuin7yySdTWlWvvfYa//vf//j1119p2rQpW7Zs4fjx42zevJmyZY1JJmbOnJnSkstJ0oISQmSw+cRmui3rxoufzaNZMzh1Cho3hj178ndyuq179+6cP3+edevW0b59e3bv3k2TJk346KOPADhw4ACdO3emYsWKeHl50ahRIwDOnDljk/obWrmJbOvWrbRt25Zy5crh5eVFt27dSEhI4MKFC/e8/7p166Z5XKZMGS5dugTAsWPHKFOmTEpyAmjcuDEOdpjiWBKUECKNvf/upduybpQ+8wY7J0/g2jV4+mnYtg1KlrR3dDnH1dWVtm3bMmHCBHbv3s3AgV9CocMAACAASURBVAOZOHEi169fp127dri7u7No0SL27dvHpk2bAOPU3504ODjcHo80Rfo5lYCUEcJvO336NB07dqRmzZqsWLGC/fv3M3/+/CzVaY2Tk1Oax0qpNFNn5BaSoIQQKY5FHqPDdx1w2TeGM998Qny8YuhQWLMG0n1mFji+vr4kJSUREhJCZGQkH330ES1btqRGjRoprY/bbve6Sz3bLBidJiIiItKsCw0NvWvdwcHBJCQkMHPmTJo2bUq1atU4f/58hjrT13c/atSowfnz59PsPzg42C4JTBKUECLFqM1vEbt5LFd/eBeAqVONAV8dHe0cWA66cuUKrVq1YvHixRw8eJCTJ0+yYsUKpk2bRuvWrfH19cXFxYU5c+bwzz//sGHDBsaPH59mHxUrVkQpxYYNG7h8+TLR0cZAOq1atWLjxo2sXbuW48ePM2LECM6ePXvXmKpWrYrZbGbWrFmcPHmSpUuXMmvWrDRlKlWqRFxcHFu2bCEyMpJbt27d1+tv27Yt1atXJyAggNDQUH7//XdGjBiByWRKmecrp0iCEkIAxsy3ZXau5NavI3B0hAULjFlw82NPvTvx9PSkSZMmzJ49Gz8/P2rVqsXYsWN54YUXWLZsGSVKlGDBggX88MMP+Pr6MmnSJGbMmJFmH2XLlmXSpEmMGzcOHx+flA4JAwYMSFmaN2+Ol5cXXbvefZS3unXrMnv2bGbMmIGvry/ffPMNn376aZoyzZo1Y/DgwfTq1YsSJUowbdq0+3r9Dg4OrFmzhvj4eB599FECAgIYN24cSilcXV3va5/3zdocHLl1kfmg8jY5TlmT0/NBRcdH67c3vat7vZCoQWtnZ63XrMmx6h+YvK+y7n6PVUhIiAZ0cHCwbQOyIJP5oKSbuRAFWGJyIl2/68WWqS/BcRMeHvDjj9C69d2fK/KvNWvW4OHhQdWqVTl16hQjRoygXr16NGjQIEfjkAQlRAFl1mb6fD+YLe+/Cada4e1t3ONkudVHFGA3b97k7bff5uzZs3h7e+Pv78/MmTNz/BqUJCghCiCtNcNWv8PydwbA2eaULg2bN0Pt2vaOTOQG/fr1o1+/fvYOQxKUEAXR8X8v8M3wnnC2EeXLa7ZtUzz8sL2jEiItSVBCFDDXrkG/bqVJOluaihWN5FS5sr2jEiIj6WYuRAGycM96aj12nn37oHJl2L5dkpPIvaQFJUQB8eOBXfTvVg59oQwPPWwmaJsD5cvbOyohMicJSogCYFvYQbp1LIS+UIeHqySzPciRVGOBCpErySk+IfK5Ayf/4cl2GvOFOjxcNZGdOyQ5ibxBWlBC5GM3b8LzXQuT9O9DVKiUwI4gZ0qXtndUQmSNtKCEyKeiozUdO8JfocWoUNHMzu3OlClj76iEyDppQQmRD129EUvVpke5eqQBZcvC1l8dqFDB3lEJcW9s2oJSShVVSq1RSsUopU4rpV64Q9kGSqkdSqlopdRFpdQbtoxFiILqVlwSNf3CuHqkAYWLxfLrr8hNuCJPsnUL6nMgAfAB6gMblFKhWuuw1IWUUsWBTcBwYCXgDJSzcSxCFDgJCRrfJ0K5FNIIzyKx7NruRvXq9o5KiPtjsxaUUsoD6A6M11pHa61/A9YCfa0UHwH8rLX+Tmsdr7W+qbU+aqtYhCiIkpOhfruDnP69Ia6esezc5katWvaOSoj7Z8sWVDUgSWsdnmpdKOBnpWwT4JBSajdQBfgDGKa1PpO+oFJqEDAIwMfHh6CgIBuGbBvR0dG5Mq7cRo5T1kRFRZGcnHxPx0pr+OSTahwNqofJ9RYzph0hKiqagnC45X2VdXntWNkyQXkCN9Ktuw54WSlbDmgAtAUOAdOApUDz9AW11l8DXwM0atRI+/v72y5iGwkKCiI3xpXbyHHKmiJFihAVFXVPx2rkqCQ2bjTh5qb5ebMrLR5vlH0B5jLyvsq6vHasbJmgooFC6dYVAm5aKRsLrNFa7wNQSk0CIpVShbXW120YkxD53otvHSVwek1MJs3q1YoWjxewOdpFvmXLXnzhgEkpVTXVunpAmJWyBwGd6rG2UkYIcRdjpv1N4Kc1QZmZOz+Wp56yd0RC2I7NEpTWOgZYDbyvlPJQSjUHOgOLrBT/FuiqlKqvlHICxgO/SetJiKybOf8MU8cYQ5FPnRHDgL7udo5ICNuy9UgSQwE34BLGNaUhWuswpVQLpVT07UJa663AWGCDpWwVINN7poQQaS1Ze5ERg3xAOzJ87DVGv2ntUq8QeZtN74PSWl8FulhZvxOjE0XqdV8CX9qyfiEKgn374JXeJSFZ8cJLkUz/oLi9QxIiW8hYfELkIcGhMbRvr4mOVvTuDYvmFkdJnwiRT8lYfELkEX/9E8/jrWKIv+pBh46ab79VOMhXTJGPydtbiDzgwsVkGraIJP5qSao9cpEVyxVOTvaOSojsJQlKiFzuxg1N3cfPcfN8WUpXucQfW31wlw57ogCQBCVELhYXBw2eOMPlvytSpEwkB3aWpEgRe0clRM6QBCVELpWUBL16wYkDFfEoepPgHcUoVcreUQmRcyRBCZELaQ09+13hhx+gSBHYvc2Lhx+W7nqiYJFefELkQqevD+Hg0mI4uSSyfr0TdevaOyIhcp60oITIZf662JXrp18Bh0S+WxZP8wxj/AtRMEiCEiIXmTwzgvPH3gDMfP5/0fTs7HnX5wiRX0mCEiKXWL4yiQkjSwLgU/lDhg7wtnNEQtiXJCghcoGtW6FvbxNoR0pX/YpShVbZOyQh7E46SQhhZzv3xNHpGRMJCSZefRUOHvye61Ymnvnyyy+JiYnB19eXmjVrUrFiRRxkrCORj0mCEsKODoUl0vrJeBJjXOncI5rZsz1p1cp62a1bt7JmzRo8PDxISkoiMTGRcuXKUatWLRo1akStWrXw9fWlSpUqODs75+wLESIbSIISwk5OnTbTxO8GidHFqN3sDCuWVLjj4K9Tp05l/fr13LhxI2XdyZMnOXnyJBs3bsTDwwOz2UxsbCw+Pj7UqFGDRo0aMXz4cErJHb4iD5LzA0LYwcWLmgbNI7l1pRgV65zljy0V7jr460MPPUSvXr1wslIwOTmZGzduEB0dTXJyMufPn2fr1q1Mnz6dqKiobHoVQmQvSVBC5LDr1+Hx1tFc+7ckxR/6lz+3l8vy4K8ffvghjo6OWSrr7u7OlClTqFGjxgNEK4T9SIISIgfFxsLTT8PfYV6UrhjDwV2l8fbO+hBGpUuXZvDgwbi6ut6xnMlkokGDBowcOfJBQxbCbiRBCZFDEhOhZYeL7NwJZcvC7iAPSpe693/B8ePH37X3npOTE97e3sTExNxvuELYnSQoIXKA2Qwdel4kOMgHJ8/rbN4MlSrd376KFi3KW2+9hZubW6ZlYmNj2bx5M9WrV2fv3r33V5EQdiYJSohspjW88NJlfvnRBweXGDb8pPH1fbB9jho16q5dyePj44mIiMDf35/JkyeTnJz8YJUKkcMkQQmRzV4ffZVl35YAUxxLVsTQtsWDzzjo6enJe++9h4eHR5r17lZ6W8TGxvLxxx/TrFkz/v333weuW4icYtMEpZQqqpRao5SKUUqdVkq9cJfyzkqpo0qpc7aMQ4jcYuZMmPNpUXBIYs78SJ57uqTN9j106NA0p/nc3d0ZM2YM7u7uKJW248WtW7c4cOAANWvWZPXq1TaLQYjsZOsW1OdAAuAD9Aa+VErVukP5t4DLNo5BiFwhMBBGjDB+nzL7EsP6lrPp/l1cXPj444/x8PDA3d2dqVOnMn78eEJCQqhRo0aGa1RJSUncvHmTvn37EhAQwK1bt2wajxC2ZrMEpZTyALoD47XW0Vrr34C1QN9MylcG+gBTbBWDELnFwu8SGDDQDMCsWTDm1TLZUk9AQADFihWjWbNmDBs2DICqVasSEhLCK6+8YrUjxa1bt1i+fDk1atQgJCQkW+ISwhaU1to2O1LqEWCX1to91bpRgJ/W+mkr5dcD84BrwGKttdWvl0qpQcAgAB8fn4bff/+9TeK1pejoaDw9Zd6euykox2n7jqJMnOQLZhOtngti/OB7e/6bb75JcnIyn332WZbKX758GU9PT6vJaP/+/UyaNInY2FiSkpIybHdxceHFF1+kZ8+eeXbg2YLyvrKF3Hqsnnjiif1a60YZNmitbbIALYAL6da9DARZKdsV2Gj53R84l5U6GjZsqHOjbdu22TuEPKEgHKf1683awZSoQesnA/be1z78/Px0vXr1bBbT5cuXdZs2bbSHh4cGMizu7u66RYsW+sKFCzarMycVhPeVreTWYwUEayuf+bb8yhQNFEq3rhBwM/UKy6nAacDrNqxbCLv75Rfo3DUJc5KJJj13senbxvYOCYDixYuzefNmpk6dmmkHij179lC9enV++uknO0UpREa2TFDhgEkpVTXVunpAWLpyVYFKwE6l1AVgNVBaKXVBKVXJhvEIkWN27IBnntEkJzrh2yGIXd83Q2V9BKNsp5Ri2LBh7Nu3j4ceeshqB4rr16/To0cPBg8eTFxcnJ0iFeI/NktQWusYjGTzvlLKQynVHOgMLEpX9DBQHqhvWV4CLlp+P2ureITIKXv2QMeOEBur6B0QR8iPLXBwyEXZKRVfX18OHz5Mv379rF6zio2NZeHChdSpU4cjR47YIUIh/mPrq6JDATfgErAUGKK1DlNKtVBKRQNorZO01hduL8BVwGx5LLe6izwlOBjaPJlIdDS80NvMgnmuOJmyNtq4vbi6uvLVV1+xYsUKChcujMmUdlq42NhYTpw4QePGjfn8889vXzcWIsfZNEFpra9qrbtorT201hW01kss63dqra12HdFaB+lMevAJkZuFhECrNoncinaiSMMtzJkbQxZnwsgVOnbsyLFjx2jSpEmGESm01ty6dYvRo0fz5JNPEhkZaacoRUGWN/uVCqv8/f159dVX7R1GgbB/P/g9kcTN60541P6Fw7/Uw9vDy95h3bNSpUqxfft2Jk6cmOk9U9u3b6d69eps3brVDhGKgqzAJ6jLly8zdOhQKlWqhIuLCz4+PrRu3ZotW7Zk6flBQUE88cQTOfoNMzAw0Oq9DKtXr2bKFLnvObvt3QtPtDJzI8qES61NBG95mLJFbDeEUU5zcHBg1KhR7Nq1i/Lly2eYayoxMZGrV6/SqVMnhg8fTkJCgp0iFQVNgU9Q3bt3Z+/evcybN4/w8HDWr19P+/btuXLlSo7H8qD/+EWLFsXLK+99i89Lfv8d2raFmzcccKu7kd0by1GjVGV7h2UTjzzyCEePHqVnz56ZDjr79ddfU79+ff766y87RCgKHGs3R+XWxdY36l67dk0DesuWLZmWWbRokW7UqJH29PTUJUqU0D169NDnzp3TWmt98uTJDDc9BgQEaK2Nmy2HDRuWZl8BAQG6Y8eOKY/9/Pz04MGD9ciRI3Xx4sV1o0aNtNZaT58+XdepU0e7u7vrMmXK6IEDB+pr165prY0b7dLX+d5771mts2LFinry5Ml60KBB2svLS5ctW1ZPmzYtTUzHjx/XLVu21C4uLrpatWp6w4YN2sPDQ3/77bf3dUzvJLfeJJhVv/2mtZeXWYPWPXtqHR0bny312PpG3fuxcuVK7eXlpR0dHTO835RS2t3dXc+fP1+bzWa7xql13n9f5aTceqzIgRt18xxPT088PT1Zu3Ztpvd9JCQkMGnSJEJDQ1m/fj2RkZH06tULgPLly7Nq1SoAwsLCiIiIYPbs2fcUw+LFi9Fas3PnThYuXAgYp1xmzZpFWFgYS5YsYe/evbz22msANGvWjFmzZuHu7k5ERAQRERGMGjUq0/3PnDmTOnXqcODAAd5++21Gjx7Nnj17ADCbzXTt2hWTycTvv/9OYGAgkyZNIj4+/p5eQ0Gwcye0a6e5eVPxSNvjLFkCHq53no8pL+vevTtHjhyhQYMGGVpT2tKB4tVXX6Vz585ERUXZKUqR71nLWrl1yY6hjlauXKm9vb21i4uLbtKkiR45cqT+/fffMy1/9OhRDeizZ89qrf9r0Vy+fDlNuay2oOrUqXPXGDdu3KidnZ11cnKy1lrrb7/9Vnt4eGQoZ60F9fzzz6cpU6VKFT158mSttdabNm3Sjo6OKS1CrbXetWuXBqQFlcqmTVq7uRktJ+os0vODF2ZrfbmhBXVbUlKSfv/997Wbm5vVYZJcXFx0iRIl9M6dO+0WY159X9lDbj1WSAvKuu7du3P+/HnWrVtH+/bt2b17N02aNOGjjz4C4MCBA3Tu3JmKFSvi5eVFo0bGeIZnzpyxSf0NGzbMsG7r1q20bduWcuXK4eXlRbdu3UhISODChQv3vP+6deumeVymTBkuXboEwLFjxyhTpgxly5ZN2d64ceM8O2hodli5Ep5+WhMbq6D+t0z9/CIvNrQ6QH++5OjoyPjx4wkKCqJ06dK4uLik2R4fH8/ly5d58sknGTt2rNUBaYW4X/JJhHHjYtu2bZkwYQK7d+9m4MCBTJw4kevXr9OuXTvc3d1ZtGgR+/btY9OmTcDdOzQ4ODhkuMExMTExQ7n095+cPn2ajh07UrNmTVasWMH+/fuZP39+luq0xsnJKc1jpRRms/me91MQzZsHzz0HiYkKmsxk1MfHGN1ipL3DsotHH32U48eP88wzz2TagWL27Nk0atSIU6dO5XyAIl+SBGWFr68vSUlJhISEEBkZyUcffUTLli2pUaNGSuvjNmdn4zpEcnLaQTBKlChBREREmnWhoaF3rTs4OJiEhARmzpxJ06ZNqVatGufPn89QZ/r67keNGjU4f/58mv0HBwdLAgOmT4eXXgKzGdq//Dsvvn2YaU9+bO+w7MrLy4vly5czd+5cPDw8MrS0b926xeHDh3nrrbfsFKHIbwp0grpy5QqtWrVi8eLFHDx4kJMnT7JixQqmTZtG69at8fX1xcXFhTlz5vDPP/+wYcMGxo8fn2YfFStWRCnFhg0buHz5MtHR0QC0atWKjRs3snbtWo4fP86IESM4e/buQw1WrVoVs9nMrFmzOHnyJEuXLmXWrFlpylSqVIm4uDi2bNlCZGTkfc+M2rZtW6pXr05AQAChoaH8/vvvjBgxApPJlGHE64JCa3j3Xbjd72T2bPjp6ybM6/xNgT0m6fXp04dDhw5Ru3btDK0pV1dXpk2bZqfIRH5ToBOUp6cnTZo0Yfbs2fj5+VGrVi3Gjh3LCy+8wLJlyyhRogQLFizghx9+wNfXl0mTJjFjxow0+yhbtiz9+/dn3Lhx+Pj4pIzkMGDAgJSlefPmeHl50bVr17vGVLduXWbPns2MGTPw9fXlm2++4dNPP01TplmzZgwePJhevXpRokSJ+/5AcHBwYM2aNcTHx/Poo48SEBDAuHHjUEpluFmzIEhKgiFD4MMPwcHRjMezQ2nc1ejxKMkprcqVK7N//35ef/31lBEo3N3dmTt3LpUr54/7wkQuYK3nRG5dZMLC7BcSEqIBHRwcbPN95+bjdPOm1h07ag1aO7ska5fez+k6X9TRV29dzfFYclMvvqzYsWOHLl68uO7Zs6dd6s/N76vcJrceKzLpxWe6WwIT+duaNWvw8PCgatWqnDp1ihEjRlCvXj0aNGhg79ByzMWLxnQZ+/dDYe9k6PUM3tWOsKnPLrzdvO0dXq7XokULzp49m2FUdCEelLyjCribN2/y9ttvc/bsWby9vfH392fmzJkF5pTWsWPQvj2cOgUVKiWR2KstSd5hbO6zizJeZewdXp5REE8Ji+wnCaqA69evH/369bN3GHbx22/wzDNw7Ro0bgxrfoQpf9ZiwCPTqVqs6t13IITIVpKgRIG0aBG8/DLEx0OHjkl8Nu8KZX18mFN6jr1DE0JYFOhefKLgSU6Gt96Cfv2M5DR4SDI8342nlrcgLsn6eIwi51SqVClDr1VRcEkLShQYUVHQqxds2gQmE8yabeaP0i/y08F1fNXxK1xNch0lJ/Tv35/IyEjWr1+fYdu+ffsyjK4iCq4C0YIaM2YMr732GidOnLB3KMJOjh+Hxx4zklOxYrBli+afh99i0cFFTH5iMq80esXeIQqMEVisDaWU02RSxtwh3yeoS5cuMXv2bObOnUvt2rVp2bIlv/zyi73DEjlo40YjOYWHQ506sG8fhHv9HzN+n8Hrj77OuBbj7B2isEh/ik8pxddff03Pnj3x8PDgoYceYvHixWmec/nyZZ5//nm8vb3x9vamY8eOaSZUPHHiBJ07d6ZUqVJ4eHjQoEGDDK23SpUqMXHiRAYMGECRIkXo3bt39r5QkSX5PkF99dVXgDFQa1xcHDt37uTZZ5+1c1QiJyQlGcMWdegA169Dt26wezdUrgzda3bnff/3mflUwelSn1e9//77dO7cmdDQUJ577jkGDBiQMpvArVu3GDFiBK6urmzfvp09e/ZQunRp2rRpkzIEWHR0NO3bt2fLli2EhobSvXt3unXrxrFjx9LUM2PGDGrUqEFwcHDKbAbCvvJ1gkpKSuJ///tfmskInZ2dGTBggB2jEjkhIgLatLEMW+QAkyfDihUQFvUHCckJFHMvxni/8TiofP0vkC/07duXPn36UKVKFSZPnozJZGLHjh0AfP/992it+fbbb6lbty41atRg7ty5REdHp7SS6tWrx+DBg6lTpw5VqlRh3LhxNGjQgJUrV6apx8/Pj9GjR1OlShWqVpXbDHKDfP3fuW7dugyzwzo4OPD666/bKSKRE379FerXh+3bwccHfvnFaEn9dnYHfoF+vPPLO/YOUdyD1HOamUwmSpQokTKrwP79+4mIiMDLyytlhuzChQtz7dq1lGvOMTExjB49Gl9fX7y9vfH09CQ4ODjDnG6353oTuYdNe/EppYoC84AngUjgHa31Eivl3gICgIqWcl9orT+xZSwAU6ZMSRld/LbmzZtToUIFW1clcoHkZPjgA5g0yRiV/IknYMkSKFUKQi+E8vTSp6nsXZl3WkiCykvuNKeZ2WymSpUqbNiwIcPzihYtCsCoUaPYtGkTn376KVWrVsXd3Z1+/fpl6AghvQdzH1t3M/8cSAB8gPrABqVUqNY6LF05BfQDDgIPA5uVUme11t/bKpCjR49y+PDhNOs8PT0ZM2aMraoQucjJk9C/P+zYAUrBhAnG4ugI/1z7h3aL21HIpRA/9/mZ4u7F7R2usJEGDRqwaNEiihcvTpEiRayW+e233+jXrx/du3cHIC4ujhMnTlCtWrWcDFXcB5ud4lNKeQDdgfFa62it9W/AWiDD/Nha62la6wNa6ySt9XHgR6C5rWIB44Jn+m9IhQsXpnXr1rasRtiZ1jB/PtStayQnHx/4+WejFeXoaIzW/+yKZ0k0J7K5z2YqFJbWc25w48YNQkJC0iz3MxNv7969KVq0KJ07d2b79u2cPHmSHTt2MHLkyJSefNWqVWPNmjUcOHCAQ4cO0adPnzTXpUXuZcsWVDUgSWsdnmpdKOB3pycpowtVC2BuJtsHAYMAfHx8CAoKumsgt27dYtGiRWlmnXVxcaFLly5s3779rs+/V9HR0VmKq6Cz9XG6ds2J6dOrs2uX0SJq2fIyI0aE4+SUSOpqhpQZQkKpBC6GXeQiF21Wf3aJiooiOTk5376nLly4wM6dO3nkkUfSrG/ZsmVK6yb1aw8LC6N48f9avenLfPjhhyxZsoQuXboQExNDsWLFqF+/PkeOHOHff/+lZ8+efPLJJzRv3hxPT0969OiBr68vFy5cSNmHtXrzozz3WWVtDo77WTCSzIV0614Ggu7yvEkYiczlbnVkdT6ozz77THt4eGggZXF1ddVRUVFZnp/kXuTWOVZyG1sepx9+0LpECWP+pkKFtF64UGuz+b/tsYmxenHoYm1OvTKPyGvzQdmb/P9lXW49VmQyH5Qte/FFA4XSrSsE3MzsCUqpVzGuRXXUWsdnVu5eaK2ZNm0aMTExKescHR15/vnnKVy4sC2qEHYUEQHPPgtdusDly9CqFRw6BH37GteeAJLMSfRa1Ys+a/rw54U/7RuwEOK+2TJBhQMmpVTqGwjqAek7SACglBoAjAFaa63P2SqIoKAgrl27lmads7MzI0eOtFUVwg7MZvjqK6hZ07ifyd0dZs2CLVsgdadMrTVD1g/hh2M/MPup2TQoXXAmXhQiv7HZNSitdYxSajXwvlLqJYxefJ2BZunLKqV6Ax8BT2it/7FVDABTp07N0LW8Zs2a1K5d25bViBx0+DC88ooxCgQYs99+/jlUrJix7Pht4/nmz28Y12Icrz8m97sJkZfZ+kbdoYAbcAlYCgzRWocppVoopVJnjQ+AYsA+pVS0ZfnqQSs/d+5chguAXl5e0rU8j7pxA8aMgUceMZJTqVKwfDmsW2c9OR2+dJiPdn7Eyw1eZvITk3M+YCGETdn0Piit9VWgi5X1OwHPVI8r27Le2+bMmXO740UKR0dHunTJEJLIxZKT4dtvjdEfLlo63Q0eDFOmQCa3ugBQu2Rtdry4g6blmsr4ekLkA/lmPqj4+Hi+/PLLNPc+ubq68tprr2W4E13kXtu2wfDhEBpqPG7aFGbONEYjz8ymvzehtaZ91fY8XuHxnAlUCJHt8s1YfCtXrkwZ/uQ2rTVDhgyxU0TiXhw9Cl27Gr3yQkONjg9Ll8KuXXdOTnvO7qHbsm5M2j4JszZnXlAIkefkmxZU+s4RSinatm1L6dKl7RiVuJu//oL33zfGzDObwcMD3nkHRowAN7c7PzfsUhgdl3SkbKGy/Pj8jzIyuRD5TL5IUH/++WeG2XLd3d15++237RSRuJuTJ40pMBYuNK45OTnBoEHG+HlZ+U5x5voZ2i1uh4vJhc19NuPj6ZP9QQshclS+SFCffvpphrG1SpYsSfPmNh3eT9jAX3/Bp58a4+clJRnj5Q0caHSIqFQp6/sJDAkkOiGaHS/uoLJ3tvS5EULYWZ5PUFevXmX16tVprj95eHgwevRo9QbjigAADwFJREFU6cmVi+zZAxMm1OK334wBXh0cjNEfJkyAKlXufX/jW46nb92+kpyEyMfy/En7efPmZUhEWmv69s0wiLrIYWYz/PADNG8OzZrBzp0lcHIyWkxhYcbpvXtJTgnJCQz8cSDhV8JRSklyEiKfy1MJKiEhgRUrVqTMkms2m5k+fTqxsbEpZUwmEwEBATL5mB1dugRTp0LVqkbPvN27jfuXevc+zalT8M03UKPGve3TrM30W9OP+SHz2fvv3myJWwiRu+SpU3zR0dH06tULDw8PXnnlFapVq5ZmUFgwEtTw4cPtFGHBpbUxxfpXX8Hq1ZCYaKyvVAnefNNoNQUHn6R0aStDQNx135o3Nr7BsrBlTGszjT51+9g2eCFErpSnEpSjoyMeHh7cuHGD2bNno7Um8fYnoUWDBg2oWrVqJnsQtnbmjNFFfMECOHbMWOfgAM88Y4z+8OSTRkeIB/HBjg+Ys28Oo5qO4q3mbz140EKIPCHPJajb15vSz5YLxrh7b775Zk6HVeBcuwYrV8LixcYstreVLg0vvwwvvQTly9umroTkBDb/s5mAegFMbTvVNjsVQuQJeSpBmUymDKNFpJaUlERAQACbNm1i5MiR+Pr65mB0+du1a7B+vXH67qef4Pb3A1dX6NwZeveGp54y7meyFa01zo7ObO6zGZODSW7EFaKAyVP/8Y6OjhlO6f1/e/cfXFV95nH8/dwQIEBiIGAoiKW1AiNRsIKVOkDsEBAtoO4f7iiCv5DS1rLqstVx6RQZt6uWbnW0i7SssgRL21ncLWWVuMgPWUcLLD8UW5EpgsgwRSAESAj58ewfJ4FwE5JLcsk5N/fzmvnOzT33e+99OJycJ+ec73m+DVVUVFBRUcGSJUu45pprmDdvXjtG1/EcOAC/+AUUFcGll8K0acGovKoqGDcOXn01KOa6fDlMmpTc5LTmL2u4ednNHDt1jKzMLDIzVE9RJN2k3BFUU6f24sViMfLz85k6VRfTL0RVVXC/0urVQduy5exrGRlBnbzbbw9a//4XL44tB7Zw229uY2DuQNXXE0ljKZWgzKzFJJWVlcVVV11FSUkJvXr1asfoUo87fPwxrFsXJKQ1a+D48bOvd+0KEyYECenb34a8vIsf067Du5i4bCJ5WXmsnrqanlk9L/6XikgkpVSCgqBKxPkSVLdu3bjlllsoLi6mS5cu7RxZ9NXWBrPTrl8fDG7YsCG4Z6mhIUOCa0kTJsCYMcHU6u3lwPEDjF86HoC37nmLftn92u/LRSRyUi5BZWdnc/To0UbLs7KyeOSRR5g/f75KHBEcHX3+Ofzxj7BpU/C4eXMwS21D+flBIioqCpLS5ZeHEy9AWWUZ3Tt3Z8WdK7gyT7cKiKS7lEtQubm57Nu375xlWVlZLFy4kGnTpoUUVbhqa2Hv3uDoaPv2swnp4MHGfQcMgLFjg6Q0dmxQ7SHsfH665jSZsUyG9B7Cju/sICPWxhunRKRDSLkE1fC6kpmRnZ3NypUrGTNmTIhRtQ/3YNTcRx8FyeiDD4K2cyc0mArrjNxcGDkyaNdfHzz2i9hZs6qaKu74zR1c0fMKnp/4vJKTiJyRcgmqd+/eAGRmZtKnTx/Wrl3LoEGDQo4qedzh8OFgWoqmWsNBDA3l58PVV0NBwdmk9LWvhX901Jxar+WB3z/Aqk9WsfDWhWGHIyIRk3IJKj8/n1gsRkFBASUlJWcSVqqorIT9+4MSQZ99FjzGt7jygufIzQ0GMlx99dmEVFAAffq0378hGdydOSVzWLpjKfNvms/METPDDklEIiblEtTIkSM5cuQIr7zySmRG6tXUGIcOBaffDh48+9jUz/Gj5pqSnR1cG2qq5eVF+6goUT9996f87L2f8fD1D/Pk6CfDDkdEIijlEtT06dOZPn16Uj/THcrLg9NnDVtZGRw5ErTDh8/+HN+OHRub8HdlZAQ3uV5+edNtwAC45JKOkYSaM7j3YO4bfh8/v/nnGnUpIk1KaoIys17AYmA88AXwhLu/1kQ/A/4ZeLBu0a+Ax93dm/v86mr49FOoqGjcysubXl7/Wnl5kHDik1B9a6bEXwL/bqdnTyM/P7gW1Ldv0Jr6uU8f6JRyfxYkzxflX9C7W28mD57M5MGTww5HRCIs2bvKl4DTQD4wHFhlZtvdfWdcv4eA24BhgANvAXuAZq+Ub98OX7lIk6h27RqcWsvOhpycs4+9ep3b8vIaL9u6dT3f+lbhxQmsA9lRuoNJz0+i+PZipgyZEnY4IhJx1sJBS+IfZNYdOAoUuPuuumVLgc/d/fG4vu8Cr7r7orrnDwAz3P2G5r4jFrvWO3d+g1jsNBkZlcRi9e00sVhl3LL65/WvnSIjo/xM69Spou7nk2RkVBCL1bT6315aWkpubm6r358OTnQ/wdbhW+lS1YVrt15LZpWKv57Ptm3bqK6uZsSIEWGHkhL0+5e4qK6r9evXb3H3Rht8Mo+gBgHV9cmpznagqQs0Q+tea9hvaFMfamYPERxxkZmZyZAhN7c5UPegMGozhdEvSE1NDaWlpcn5sA6oslslu0ftJlYdY+A7AzlZ0cwwRaG6uhp31zaVIP3+JS7V1lUyE1QPIK6QDseA7PP0PRbXr4eZWfx1qLqjrEUAI0aM8M2bNycv4iRZt24dhYWFYYcRSWWVZVy36DpyKnJYMHQB9z5zb9ghRV5hYSGlpaVs27Yt7FBSgn7/EhfVdXW+gVLJTFAngJy4ZTlAU7eWxvfNAU60NEhCUk9252zuH34/N33lJk7tPhV2OCKSQpI5YeEuoJOZNazyOQyIHyBB3bJhCfSTFHWq+hS7j+zGzHhi9BPccFmzlxdFRBpJWoJy95PACuApM+tuZjcCU4ClTXT/d+BRM+tvZv2Ax4BXkxWLhKumtoa7/uMubvjVDRytaFx5XkQkEcme8v27QBbwV+DXwCx332lmo82sYTnTl4GVwAfAh8CqumWS4tydWatm8fqfX+dHY3+kCQdFpNWSeh+Uux8huL8pfvk7BAMj6p878A91TTqQuWvn8sv/+yVPjn6SH3zjB2GHIyIpLNlHUJLGfrfzdzz9ztPM+PoM5t80P+xwRCTFpXHRHUm2SYMnsWD8AmZ/Y7bq64lIm+kIStps476NHK04StdOXXl01KOadFBEkkIJStrkvf3vMaF4Ag+/8XDYoYhIB6MEJa320aGPuPW1W/lSjy+xYPyCsMMRkQ5GCUpaZd+xfUwonkDnjM6U3FNCfo/8sEMSkQ5GgySkVWb+YSZllWVsuHcDX+351bDDEZEOSAlKWmXx5MXsLd3LsL7DWu4sItIKOsUnCTtdc5oX3n+B6tpq+mX3Y9SAUWGHJCIdmBKUJKTWa5n+n9OZ/eZs1u5ZG3Y4IpIGlKCkRe7O7Ddms/zD5Twz7hmKrigKOyQRSQNKUNKip995mhc3vchjox5jzjfnhB2OiKQJJShp1v6y/fxk40+YNmwazxY9qxJGItJuNIpPmnVZzmW8/+D7DM4bTMz094yItB/tcaRJb+95m5c3B1N0FVxaQGZGZsgRiUi6UYKSRrYc2MKU5VN4cdOLVFZXhh2OiKQpJSg5xyeHP2HisonkZeXx5t1v0qVTl7BDEpE0pQQlZxw4foDxxeNxnJJ7Suif0z/skEQkjWmQhJyxevdqDpcf5u3pbzMob1DY4YhImlOCkjPuu/Y+Jl45kb49+oYdioiITvGlu6qaKqaumMqGvRsAlJxEJDKUoNKYuzNj5QyWfbCMPx36U9jhiIicQwkqjf3wf37Iku1LmFc4j5kjZoYdjojIOZKSoMysl5m9bmYnzWyvmd3VTN85ZvahmR03sz1mpuJuIXjuf5/juXef43sjv8fcMXPDDkdEpJFkDZJ4CTgN5APDgVVmtt3ddzbR14BpwA7gCqDEzD5z9+VJikVa4O5sPbiVO4feyQsTX1B9PRGJpDYnKDPrDvwNUODuJ4CNZvZ74B7g8fj+7v5sg6cfm9l/ATcCSlDtoNZriVmM4juKqa6tVn09EYmsZBxBDQKq3X1Xg2XbgbEtvdGCP91HAy830+ch4KG6pyfM7OM2xHqx9Aa+CDuIFKD1lLjeZqZ1lRhtV4mL6rr6clMLk5GgegBlccuOAdkJvPfHBNfBXjlfB3dfBCxqbXDtwcw2u/uIsOOIOq2nxGldJU7rKnGptq5aPL9jZuvMzM/TNgIngJy4t+UAx1v43O8TXIu61d1VkVRERM7R4hGUuxc293rdNahOZnalu39St3gY0NQAifr33E9wfWqMu+9PPFwREUkXbb5C7u4ngRXAU2bW3cxuBKYAS5vqb2Z3A/8EFLn7X9r6/RER6VOQEaL1lDitq8RpXSUupdaVuXvbP8SsF/BvQBFwGHjc3V+re2008Ia796h7vge4DGh4Wq/Y3b/T5kBERKTDSEqCEhERSTbdBCMiIpGkBCUiIpGkBJVkZnalmZ0ys+KwY4kiM+tiZovrajYeN7NtZjYx7Lii4kLqWqYzbUetk2r7JyWo5HsJ2BR2EBHWCfiMoNLIJcA/Ar81s4EhxhQlDeta3g38q5kNDTekSNJ21DoptX9SgkoiM/tboBRYE3YsUeXuJ939x+7+qbvXuvsfgD3AdWHHFrYGdS3nuvsJd98I1Ne1lAa0HV24VNw/KUEliZnlAE8Bj4YdSyoxs3yCeo7nvbE7jZyvrqWOoFqg7ah5qbp/UoJKnvnAYlXGSJyZZQLLgCXu/uew44mAttS1TFvajhKSkvsnJagEtFSP0MyGA+OAfwk71rAlULuxvl+MoNrIaeD7oQUcLa2qa5nOtB21LJX3T8masLBDS6Ae4d8BA4F9dZP/9QAyzOwqd//6RQ8wQlpaV3BmmpXFBAMBbnH3qosdV4rYxQXWtUxn2o4SVkiK7p9USSIJzKwb5/7l+/cEG8Qsdz8USlARZmYLCWZeHlc3yaXUMbPlgAMPEqyj/wa+eZ7ZqdOatqPEpPL+SUdQSeDu5UB5/XMzOwGcivp/fhjM7MvATIJajAcbTDc/092XhRZYdHyXoK7lXwnqWs5ScmpM21HiUnn/pCMoERGJJA2SEBGRSFKCEhGRSFKCEhGRSFKCEhGRSFKCEhGRSFKCEhGRSFKCEhGRSFKCEhGRSPp/Ift7wiHFVloAAAAASUVORK5CYII=\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], - "source": [ - "z = np.linspace(-5, 5, 200)\n", - "\n", - "plt.plot([-5, 5], [0, 0], 'k-')\n", - "plt.plot([-5, 5], [1, 1], 'k--')\n", - "plt.plot([0, 0], [-0.2, 1.2], 'k-')\n", - "plt.plot([-5, 5], [-3/4, 7/4], 'g--')\n", - "plt.plot(z, logit(z), \"b-\", linewidth=2)\n", - "props = dict(facecolor='black', shrink=0.1)\n", - "plt.annotate('Saturating', xytext=(3.5, 0.7), xy=(5, 1), arrowprops=props, fontsize=14, ha=\"center\")\n", - "plt.annotate('Saturating', xytext=(-3.5, 0.3), xy=(-5, 0), arrowprops=props, fontsize=14, ha=\"center\")\n", - "plt.annotate('Linear', xytext=(2, 0.2), xy=(0, 0.5), arrowprops=props, fontsize=14, ha=\"center\")\n", - "plt.grid(True)\n", - "plt.title(\"Sigmoid activation function\", fontsize=14)\n", - "plt.axis([-5, 5, -0.2, 1.2])\n", - "\n", - "save_fig(\"sigmoid_saturation_plot\")\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Xavier 초기화와 He 초기화" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['Constant',\n", - " 'GlorotNormal',\n", - " 'GlorotUniform',\n", - " 'HeNormal',\n", - " 'HeUniform',\n", - " 'Identity',\n", - " 'Initializer',\n", - " 'LecunNormal',\n", - " 'LecunUniform',\n", - " 'Ones',\n", - " 'Orthogonal',\n", - " 'RandomNormal',\n", - " 'RandomUniform',\n", - " 'TruncatedNormal',\n", - " 'VarianceScaling',\n", - " 'Zeros',\n", - " 'constant',\n", - " 'deserialize',\n", - " 'get',\n", - " 'glorot_normal',\n", - " 'glorot_uniform',\n", - " 'he_normal',\n", - " 'he_uniform',\n", - " 'identity',\n", - " 'lecun_normal',\n", - " 'lecun_uniform',\n", - " 'ones',\n", - " 'orthogonal',\n", - " 'random_normal',\n", - " 'random_uniform',\n", - " 'serialize',\n", - " 'truncated_normal',\n", - " 'variance_scaling',\n", - " 'zeros']" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "[name for name in dir(keras.initializers) if not name.startswith(\"_\")]" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "keras.layers.Dense(10, activation=\"relu\", kernel_initializer=\"he_normal\")" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "init = keras.initializers.VarianceScaling(scale=2., mode='fan_avg',\n", - " distribution='uniform')\n", - "keras.layers.Dense(10, activation=\"relu\", kernel_initializer=init)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 수렴하지 않는 활성화 함수" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### LeakyReLU" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [], - "source": [ - "def leaky_relu(z, alpha=0.01):\n", - " return np.maximum(alpha*z, z)" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "그림 저장: leaky_relu_plot\n" - ] - }, - { - "data": { - "image/png": "\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], - "source": [ - "plt.plot(z, leaky_relu(z, 0.05), \"b-\", linewidth=2)\n", - "plt.plot([-5, 5], [0, 0], 'k-')\n", - "plt.plot([0, 0], [-0.5, 4.2], 'k-')\n", - "plt.grid(True)\n", - "props = dict(facecolor='black', shrink=0.1)\n", - "plt.annotate('Leak', xytext=(-3.5, 0.5), xy=(-5, -0.2), arrowprops=props, fontsize=14, ha=\"center\")\n", - "plt.title(\"Leaky ReLU activation function\", fontsize=14)\n", - "plt.axis([-5, 5, -0.5, 4.2])\n", - "\n", - "save_fig(\"leaky_relu_plot\")\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['deserialize',\n", - " 'elu',\n", - " 'exponential',\n", - " 'get',\n", - " 'hard_sigmoid',\n", - " 'linear',\n", - " 'relu',\n", - " 'selu',\n", - " 'serialize',\n", - " 'sigmoid',\n", - " 'softmax',\n", - " 'softplus',\n", - " 'softsign',\n", - " 'swish',\n", - " 'tanh']" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "[m for m in dir(keras.activations) if not m.startswith(\"_\")]" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['LeakyReLU', 'PReLU', 'ReLU', 'ThresholdedReLU']" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "[m for m in dir(keras.layers) if \"relu\" in m.lower()]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "LeakyReLU를 사용해 패션 MNIST에서 신경망을 훈련해 보죠:" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [], - "source": [ - "(X_train_full, y_train_full), (X_test, y_test) = keras.datasets.fashion_mnist.load_data()\n", - "X_train_full = X_train_full / 255.0\n", - "X_test = X_test / 255.0\n", - "X_valid, X_train = X_train_full[:5000], X_train_full[5000:]\n", - "y_valid, y_train = y_train_full[:5000], y_train_full[5000:]" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [], - "source": [ - "tf.random.set_seed(42)\n", - "np.random.seed(42)\n", - "\n", - "model = keras.models.Sequential([\n", - " keras.layers.Flatten(input_shape=[28, 28]),\n", - " keras.layers.Dense(300, kernel_initializer=\"he_normal\"),\n", - " keras.layers.LeakyReLU(),\n", - " keras.layers.Dense(100, kernel_initializer=\"he_normal\"),\n", - " keras.layers.LeakyReLU(),\n", - " keras.layers.Dense(10, activation=\"softmax\")\n", - "])" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [], - "source": [ - "model.compile(loss=\"sparse_categorical_crossentropy\",\n", - " optimizer=keras.optimizers.SGD(lr=1e-3),\n", - " metrics=[\"accuracy\"])" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch 1/10\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 1.2819 - accuracy: 0.6229 - val_loss: 0.8886 - val_accuracy: 0.7160\n", - "Epoch 2/10\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.7955 - accuracy: 0.7361 - val_loss: 0.7130 - val_accuracy: 0.7656\n", - "Epoch 3/10\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.6816 - accuracy: 0.7721 - val_loss: 0.6427 - val_accuracy: 0.7898\n", - "Epoch 4/10\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.6217 - accuracy: 0.7943 - val_loss: 0.5900 - val_accuracy: 0.8066\n", - "Epoch 5/10\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.5832 - accuracy: 0.8075 - val_loss: 0.5582 - val_accuracy: 0.8202\n", - "Epoch 6/10\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.5553 - accuracy: 0.8157 - val_loss: 0.5350 - val_accuracy: 0.8238\n", - "Epoch 7/10\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.5338 - accuracy: 0.8224 - val_loss: 0.5157 - val_accuracy: 0.8304\n", - "Epoch 8/10\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.5172 - accuracy: 0.8273 - val_loss: 0.5079 - val_accuracy: 0.8282\n", - "Epoch 9/10\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.5040 - accuracy: 0.8289 - val_loss: 0.4895 - val_accuracy: 0.8386\n", - "Epoch 10/10\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.4924 - accuracy: 0.8321 - val_loss: 0.4817 - val_accuracy: 0.8396\n" - ] - } - ], - "source": [ - "history = model.fit(X_train, y_train, epochs=10,\n", - " validation_data=(X_valid, y_valid))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "PReLU를 테스트해 보죠:" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [], - "source": [ - "tf.random.set_seed(42)\n", - "np.random.seed(42)\n", - "\n", - "model = keras.models.Sequential([\n", - " keras.layers.Flatten(input_shape=[28, 28]),\n", - " keras.layers.Dense(300, kernel_initializer=\"he_normal\"),\n", - " keras.layers.PReLU(),\n", - " keras.layers.Dense(100, kernel_initializer=\"he_normal\"),\n", - " keras.layers.PReLU(),\n", - " keras.layers.Dense(10, activation=\"softmax\")\n", - "])" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [], - "source": [ - "model.compile(loss=\"sparse_categorical_crossentropy\",\n", - " optimizer=keras.optimizers.SGD(lr=1e-3),\n", - " metrics=[\"accuracy\"])" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch 1/10\n", - "1719/1719 [==============================] - 6s 3ms/step - loss: 1.3461 - accuracy: 0.6209 - val_loss: 0.9255 - val_accuracy: 0.7184\n", - "Epoch 2/10\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.8197 - accuracy: 0.7355 - val_loss: 0.7305 - val_accuracy: 0.7628\n", - "Epoch 3/10\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.6966 - accuracy: 0.7694 - val_loss: 0.6565 - val_accuracy: 0.7880\n", - "Epoch 4/10\n", - "1719/1719 [==============================] - 6s 3ms/step - loss: 0.6331 - accuracy: 0.7909 - val_loss: 0.6003 - val_accuracy: 0.8048\n", - "Epoch 5/10\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.5917 - accuracy: 0.8057 - val_loss: 0.5656 - val_accuracy: 0.8184\n", - "Epoch 6/10\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.5618 - accuracy: 0.8134 - val_loss: 0.5406 - val_accuracy: 0.8238\n", - "Epoch 7/10\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.5390 - accuracy: 0.8206 - val_loss: 0.5196 - val_accuracy: 0.8312\n", - "Epoch 8/10\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.5213 - accuracy: 0.8257 - val_loss: 0.5113 - val_accuracy: 0.8320\n", - "Epoch 9/10\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.5070 - accuracy: 0.8288 - val_loss: 0.4916 - val_accuracy: 0.8380\n", - "Epoch 10/10\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.4945 - accuracy: 0.8315 - val_loss: 0.4826 - val_accuracy: 0.8396\n" - ] - } - ], - "source": [ - "history = model.fit(X_train, y_train, epochs=10,\n", - " validation_data=(X_valid, y_valid))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### ELU" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": {}, - "outputs": [], - "source": [ - "def elu(z, alpha=1):\n", - " return np.where(z < 0, alpha * (np.exp(z) - 1), z)" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "그림 저장: elu_plot\n" - ] - }, - { - "data": { - "image/png": "\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], - "source": [ - "plt.plot(z, elu(z), \"b-\", linewidth=2)\n", - "plt.plot([-5, 5], [0, 0], 'k-')\n", - "plt.plot([-5, 5], [-1, -1], 'k--')\n", - "plt.plot([0, 0], [-2.2, 3.2], 'k-')\n", - "plt.grid(True)\n", - "plt.title(r\"ELU activation function ($\\alpha=1$)\", fontsize=14)\n", - "plt.axis([-5, 5, -2.2, 3.2])\n", - "\n", - "save_fig(\"elu_plot\")\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "텐서플로에서 쉽게 ELU를 적용할 수 있습니다. 층을 만들 때 활성화 함수로 지정하면 됩니다:" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 20, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "keras.layers.Dense(10, activation=\"elu\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### SELU" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Günter Klambauer, Thomas Unterthiner, Andreas Mayr는 2017년 한 [훌륭한 논문](https://arxiv.org/pdf/1706.02515.pdf)에서 SELU 활성화 함수를 소개했습니다. 훈련하는 동안 완전 연결 층만 쌓아서 신경망을 만들고 SELU 활성화 함수와 LeCun 초기화를 사용한다면 자기 정규화됩니다. 각 층의 출력이 평균과\n", - "표준편차를 보존하는 경향이 있습니다. 이는 그레이디언트 소실과 폭주 문제를 막아줍니다. 그 결과로 SELU 활성화 함수는 이런 종류의 네트워크(특히 아주 깊은 네트워크)에서 다른 활성화 함수보다 뛰어난 성능을 종종 냅니다. 따라서 꼭 시도해 봐야 합니다. 하지만 SELU 활성화 함수의 자기 정규화 특징은 쉽게 깨집니다. ℓ1나 ℓ2 정규화, 드롭아웃, 맥스 노름, 스킵 연결이나 시퀀셜하지 않은 다른 토폴로지를 사용할 수 없습니다(즉 순환 신경망은 자기 정규화되지 않습니다). 하지만 실전에서 시퀀셜 CNN과 잘 동작합니다. 자기 정규화가 깨지면 SELU가 다른 활성화 함수보다 더 나은 성능을 내지 않을 것입니다." - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "metadata": {}, - "outputs": [], - "source": [ - "from scipy.special import erfc\n", - "\n", - "# alpha와 scale은 평균 0과 표준 편차 1로 자기 정규화합니다\n", - "# (논문에 있는 식 14 참조):\n", - "alpha_0_1 = -np.sqrt(2 / np.pi) / (erfc(1/np.sqrt(2)) * np.exp(1/2) - 1)\n", - "scale_0_1 = (1 - erfc(1 / np.sqrt(2)) * np.sqrt(np.e)) * np.sqrt(2 * np.pi) * (2 * erfc(np.sqrt(2))*np.e**2 + np.pi*erfc(1/np.sqrt(2))**2*np.e - 2*(2+np.pi)*erfc(1/np.sqrt(2))*np.sqrt(np.e)+np.pi+2)**(-1/2)" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "metadata": {}, - "outputs": [], - "source": [ - "def selu(z, scale=scale_0_1, alpha=alpha_0_1):\n", - " return scale * elu(z, alpha)" - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "그림 저장: selu_plot\n" - ] - }, - { - "data": { - "image/png": "\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], - "source": [ - "plt.plot(z, selu(z), \"b-\", linewidth=2)\n", - "plt.plot([-5, 5], [0, 0], 'k-')\n", - "plt.plot([-5, 5], [-1.758, -1.758], 'k--')\n", - "plt.plot([0, 0], [-2.2, 3.2], 'k-')\n", - "plt.grid(True)\n", - "plt.title(\"SELU activation function\", fontsize=14)\n", - "plt.axis([-5, 5, -2.2, 3.2])\n", - "\n", - "save_fig(\"selu_plot\")\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "기본적으로 SELU 하이퍼파라미터(`scale`과 `alpha`)는 각 뉴런의 평균 출력이 0에 가깝고 표준 편차는 1에 가깝도록 조정됩니다(입력은 평균이 0이고 표준 편차 1로 표준화되었다고 가정합니다). 이 활성화 함수를 사용하면 1,000개의 층이 있는 심층 신경망도 모든 층에 걸쳐 거의 평균이 0이고 표준 편차를 1로 유지합니다. 이를 통해 그레이디언트 폭주와 소실 문제를 피할 수 있습니다:" - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Layer 0: mean -0.00, std deviation 1.00\n", - "Layer 100: mean 0.02, std deviation 0.96\n", - "Layer 200: mean 0.01, std deviation 0.90\n", - "Layer 300: mean -0.02, std deviation 0.92\n", - "Layer 400: mean 0.05, std deviation 0.89\n", - "Layer 500: mean 0.01, std deviation 0.93\n", - "Layer 600: mean 0.02, std deviation 0.92\n", - "Layer 700: mean -0.02, std deviation 0.90\n", - "Layer 800: mean 0.05, std deviation 0.83\n", - "Layer 900: mean 0.02, std deviation 1.00\n" - ] - } - ], - "source": [ - "np.random.seed(42)\n", - "Z = np.random.normal(size=(500, 100)) # 표준화된 입력\n", - "for layer in range(1000):\n", - " W = np.random.normal(size=(100, 100), scale=np.sqrt(1 / 100)) # LeCun 초기화\n", - " Z = selu(np.dot(Z, W))\n", - " means = np.mean(Z, axis=0).mean()\n", - " stds = np.std(Z, axis=0).mean()\n", - " if layer % 100 == 0:\n", - " print(\"Layer {}: mean {:.2f}, std deviation {:.2f}\".format(layer, means, stds))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "쉽게 SELU를 사용할 수 있습니다:" - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 25, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "keras.layers.Dense(10, activation=\"selu\",\n", - " kernel_initializer=\"lecun_normal\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "100개의 은닉층과 SELU 활성화 함수를 사용한 패션 MNIST를 위한 신경망을 만들어 보죠:" - ] - }, - { - "cell_type": "code", - "execution_count": 26, - "metadata": {}, - "outputs": [], - "source": [ - "np.random.seed(42)\n", - "tf.random.set_seed(42)" - ] - }, - { - "cell_type": "code", - "execution_count": 27, - "metadata": {}, - "outputs": [], - "source": [ - "model = keras.models.Sequential()\n", - "model.add(keras.layers.Flatten(input_shape=[28, 28]))\n", - "model.add(keras.layers.Dense(300, activation=\"selu\",\n", - " kernel_initializer=\"lecun_normal\"))\n", - "for layer in range(99):\n", - " model.add(keras.layers.Dense(100, activation=\"selu\",\n", - " kernel_initializer=\"lecun_normal\"))\n", - "model.add(keras.layers.Dense(10, activation=\"softmax\"))" - ] - }, - { - "cell_type": "code", - "execution_count": 28, - "metadata": {}, - "outputs": [], - "source": [ - "model.compile(loss=\"sparse_categorical_crossentropy\",\n", - " optimizer=keras.optimizers.SGD(lr=1e-3),\n", - " metrics=[\"accuracy\"])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "이제 훈련해 보죠. 입력을 평균 0과 표준 편차 1로 바꾸어야 한다는 것을 잊지 마세요:" - ] - }, - { - "cell_type": "code", - "execution_count": 29, - "metadata": {}, - "outputs": [], - "source": [ - "pixel_means = X_train.mean(axis=0, keepdims=True)\n", - "pixel_stds = X_train.std(axis=0, keepdims=True)\n", - "X_train_scaled = (X_train - pixel_means) / pixel_stds\n", - "X_valid_scaled = (X_valid - pixel_means) / pixel_stds\n", - "X_test_scaled = (X_test - pixel_means) / pixel_stds" - ] - }, - { - "cell_type": "code", - "execution_count": 30, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch 1/5\n", - "1719/1719 [==============================] - 32s 19ms/step - loss: 1.4254 - accuracy: 0.4457 - val_loss: 0.9036 - val_accuracy: 0.6758\n", - "Epoch 2/5\n", - "1719/1719 [==============================] - 32s 19ms/step - loss: 0.8673 - accuracy: 0.6903 - val_loss: 0.7675 - val_accuracy: 0.7316\n", - "Epoch 3/5\n", - "1719/1719 [==============================] - 32s 18ms/step - loss: 0.6920 - accuracy: 0.7525 - val_loss: 0.6481 - val_accuracy: 0.7694\n", - "Epoch 4/5\n", - "1719/1719 [==============================] - 32s 18ms/step - loss: 0.6801 - accuracy: 0.7533 - val_loss: 0.6137 - val_accuracy: 0.7852\n", - "Epoch 5/5\n", - "1719/1719 [==============================] - 32s 18ms/step - loss: 0.5883 - accuracy: 0.7845 - val_loss: 0.5503 - val_accuracy: 0.8036\n" - ] - } - ], - "source": [ - "history = model.fit(X_train_scaled, y_train, epochs=5,\n", - " validation_data=(X_valid_scaled, y_valid))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "대신 ReLU 활성화 함수를 사용하면 어떤 일이 일어나는지 확인해 보죠:" - ] - }, - { - "cell_type": "code", - "execution_count": 31, - "metadata": {}, - "outputs": [], - "source": [ - "np.random.seed(42)\n", - "tf.random.set_seed(42)" - ] - }, - { - "cell_type": "code", - "execution_count": 32, - "metadata": {}, - "outputs": [], - "source": [ - "model = keras.models.Sequential()\n", - "model.add(keras.layers.Flatten(input_shape=[28, 28]))\n", - "model.add(keras.layers.Dense(300, activation=\"relu\", kernel_initializer=\"he_normal\"))\n", - "for layer in range(99):\n", - " model.add(keras.layers.Dense(100, activation=\"relu\", kernel_initializer=\"he_normal\"))\n", - "model.add(keras.layers.Dense(10, activation=\"softmax\"))" - ] - }, - { - "cell_type": "code", - "execution_count": 33, - "metadata": {}, - "outputs": [], - "source": [ - "model.compile(loss=\"sparse_categorical_crossentropy\",\n", - " optimizer=keras.optimizers.SGD(lr=1e-3),\n", - " metrics=[\"accuracy\"])" - ] - }, - { - "cell_type": "code", - "execution_count": 34, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch 1/5\n", - "1719/1719 [==============================] - 33s 19ms/step - loss: 1.8139 - accuracy: 0.2607 - val_loss: 1.4307 - val_accuracy: 0.3734\n", - "Epoch 2/5\n", - "1719/1719 [==============================] - 32s 19ms/step - loss: 1.1872 - accuracy: 0.4937 - val_loss: 1.0023 - val_accuracy: 0.5844\n", - "Epoch 3/5\n", - "1719/1719 [==============================] - 32s 19ms/step - loss: 0.9595 - accuracy: 0.6029 - val_loss: 0.8268 - val_accuracy: 0.6698\n", - "Epoch 4/5\n", - "1719/1719 [==============================] - 32s 19ms/step - loss: 0.9046 - accuracy: 0.6324 - val_loss: 0.8080 - val_accuracy: 0.6908\n", - "Epoch 5/5\n", - "1719/1719 [==============================] - 32s 19ms/step - loss: 0.8454 - accuracy: 0.6642 - val_loss: 0.7522 - val_accuracy: 0.7180\n" - ] - } - ], - "source": [ - "history = model.fit(X_train_scaled, y_train, epochs=5,\n", - " validation_data=(X_valid_scaled, y_valid))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "좋지 않군요. 그레이디언트 폭주나 소실 문제가 발생한 것입니다." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# 배치 정규화" - ] - }, - { - "cell_type": "code", - "execution_count": 35, - "metadata": {}, - "outputs": [], - "source": [ - "model = keras.models.Sequential([\n", - " keras.layers.Flatten(input_shape=[28, 28]),\n", - " keras.layers.BatchNormalization(),\n", - " keras.layers.Dense(300, activation=\"relu\"),\n", - " keras.layers.BatchNormalization(),\n", - " keras.layers.Dense(100, activation=\"relu\"),\n", - " keras.layers.BatchNormalization(),\n", - " keras.layers.Dense(10, activation=\"softmax\")\n", - "])" - ] - }, - { - "cell_type": "code", - "execution_count": 36, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Model: \"sequential_4\"\n", - "_________________________________________________________________\n", - "Layer (type) Output Shape Param # \n", - "=================================================================\n", - "flatten_4 (Flatten) (None, 784) 0 \n", - "_________________________________________________________________\n", - "batch_normalization (BatchNo (None, 784) 3136 \n", - "_________________________________________________________________\n", - "dense_212 (Dense) (None, 300) 235500 \n", - "_________________________________________________________________\n", - "batch_normalization_1 (Batch (None, 300) 1200 \n", - "_________________________________________________________________\n", - "dense_213 (Dense) (None, 100) 30100 \n", - "_________________________________________________________________\n", - "batch_normalization_2 (Batch (None, 100) 400 \n", - "_________________________________________________________________\n", - "dense_214 (Dense) (None, 10) 1010 \n", - "=================================================================\n", - "Total params: 271,346\n", - "Trainable params: 268,978\n", - "Non-trainable params: 2,368\n", - "_________________________________________________________________\n" - ] - } - ], - "source": [ - "model.summary()" - ] - }, - { - "cell_type": "code", - "execution_count": 37, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[('batch_normalization/gamma:0', True),\n", - " ('batch_normalization/beta:0', True),\n", - " ('batch_normalization/moving_mean:0', False),\n", - " ('batch_normalization/moving_variance:0', False)]" - ] - }, - "execution_count": 37, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "bn1 = model.layers[1]\n", - "[(var.name, var.trainable) for var in bn1.variables]" - ] - }, - { - "cell_type": "code", - "execution_count": 38, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "WARNING:tensorflow:From :1: Layer.updates (from tensorflow.python.keras.engine.base_layer) is deprecated and will be removed in a future version.\n", - "Instructions for updating:\n", - "This property should not be used in TensorFlow 2.0, as updates are applied automatically.\n" - ] - }, - { - "data": { - "text/plain": [ - "[]" - ] - }, - "execution_count": 38, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "bn1.updates" - ] - }, - { - "cell_type": "code", - "execution_count": 39, - "metadata": {}, - "outputs": [], - "source": [ - "model.compile(loss=\"sparse_categorical_crossentropy\",\n", - " optimizer=keras.optimizers.SGD(lr=1e-3),\n", - " metrics=[\"accuracy\"])" - ] - }, - { - "cell_type": "code", - "execution_count": 40, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch 1/10\n", - "1719/1719 [==============================] - 8s 5ms/step - loss: 0.8750 - accuracy: 0.7123 - val_loss: 0.5525 - val_accuracy: 0.8228\n", - "Epoch 2/10\n", - "1719/1719 [==============================] - 8s 5ms/step - loss: 0.5753 - accuracy: 0.8031 - val_loss: 0.4724 - val_accuracy: 0.8476\n", - "Epoch 3/10\n", - "1719/1719 [==============================] - 8s 5ms/step - loss: 0.5189 - accuracy: 0.8205 - val_loss: 0.4375 - val_accuracy: 0.8546\n", - "Epoch 4/10\n", - "1719/1719 [==============================] - 8s 5ms/step - loss: 0.4827 - accuracy: 0.8322 - val_loss: 0.4152 - val_accuracy: 0.8594\n", - "Epoch 5/10\n", - "1719/1719 [==============================] - 8s 5ms/step - loss: 0.4565 - accuracy: 0.8408 - val_loss: 0.3997 - val_accuracy: 0.8636\n", - "Epoch 6/10\n", - "1719/1719 [==============================] - 8s 5ms/step - loss: 0.4398 - accuracy: 0.8472 - val_loss: 0.3867 - val_accuracy: 0.8700\n", - "Epoch 7/10\n", - "1719/1719 [==============================] - 8s 5ms/step - loss: 0.4242 - accuracy: 0.8511 - val_loss: 0.3762 - val_accuracy: 0.8706\n", - "Epoch 8/10\n", - "1719/1719 [==============================] - 8s 5ms/step - loss: 0.4144 - accuracy: 0.8541 - val_loss: 0.3710 - val_accuracy: 0.8736\n", - "Epoch 9/10\n", - "1719/1719 [==============================] - 8s 5ms/step - loss: 0.4024 - accuracy: 0.8581 - val_loss: 0.3630 - val_accuracy: 0.8756\n", - "Epoch 10/10\n", - "1719/1719 [==============================] - 8s 5ms/step - loss: 0.3915 - accuracy: 0.8623 - val_loss: 0.3572 - val_accuracy: 0.8754\n" - ] - } - ], - "source": [ - "history = model.fit(X_train, y_train, epochs=10,\n", - " validation_data=(X_valid, y_valid))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "이따금 활성화 함수전에 BN을 적용해도 잘 동작합니다(여기에는 논란의 여지가 있습니다). 또한 `BatchNormalization` 층 이전의 층은 편향을 위한 항이 필요 없습니다. `BatchNormalization` 층이 이를 무효화하기 때문입니다. 따라서 필요 없는 파라미터이므로 `use_bias=False`를 지정하여 층을 만들 수 있습니다:" - ] - }, - { - "cell_type": "code", - "execution_count": 41, - "metadata": {}, - "outputs": [], - "source": [ - "model = keras.models.Sequential([\n", - " keras.layers.Flatten(input_shape=[28, 28]),\n", - " keras.layers.BatchNormalization(),\n", - " keras.layers.Dense(300, use_bias=False),\n", - " keras.layers.BatchNormalization(),\n", - " keras.layers.Activation(\"relu\"),\n", - " keras.layers.Dense(100, use_bias=False),\n", - " keras.layers.BatchNormalization(),\n", - " keras.layers.Activation(\"relu\"),\n", - " keras.layers.Dense(10, activation=\"softmax\")\n", - "])" - ] - }, - { - "cell_type": "code", - "execution_count": 42, - "metadata": {}, - "outputs": [], - "source": [ - "model.compile(loss=\"sparse_categorical_crossentropy\",\n", - " optimizer=keras.optimizers.SGD(lr=1e-3),\n", - " metrics=[\"accuracy\"])" - ] - }, - { - "cell_type": "code", - "execution_count": 43, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch 1/10\n", - "1719/1719 [==============================] - 8s 5ms/step - loss: 1.0317 - accuracy: 0.6757 - val_loss: 0.6767 - val_accuracy: 0.7816\n", - "Epoch 2/10\n", - "1719/1719 [==============================] - 8s 5ms/step - loss: 0.6790 - accuracy: 0.7792 - val_loss: 0.5566 - val_accuracy: 0.8180\n", - "Epoch 3/10\n", - "1719/1719 [==============================] - 8s 5ms/step - loss: 0.5960 - accuracy: 0.8037 - val_loss: 0.5007 - val_accuracy: 0.8360\n", - "Epoch 4/10\n", - "1719/1719 [==============================] - 8s 5ms/step - loss: 0.5447 - accuracy: 0.8192 - val_loss: 0.4666 - val_accuracy: 0.8448\n", - "Epoch 5/10\n", - "1719/1719 [==============================] - 8s 5ms/step - loss: 0.5109 - accuracy: 0.8279 - val_loss: 0.4434 - val_accuracy: 0.8534\n", - "Epoch 6/10\n", - "1719/1719 [==============================] - 8s 5ms/step - loss: 0.4898 - accuracy: 0.8336 - val_loss: 0.4263 - val_accuracy: 0.8550\n", - "Epoch 7/10\n", - "1719/1719 [==============================] - 8s 5ms/step - loss: 0.4712 - accuracy: 0.8397 - val_loss: 0.4130 - val_accuracy: 0.8572\n", - "Epoch 8/10\n", - "1719/1719 [==============================] - 8s 5ms/step - loss: 0.4560 - accuracy: 0.8441 - val_loss: 0.4035 - val_accuracy: 0.8606\n", - "Epoch 9/10\n", - "1719/1719 [==============================] - 8s 5ms/step - loss: 0.4441 - accuracy: 0.8473 - val_loss: 0.3943 - val_accuracy: 0.8642\n", - "Epoch 10/10\n", - "1719/1719 [==============================] - 8s 5ms/step - loss: 0.4332 - accuracy: 0.8505 - val_loss: 0.3874 - val_accuracy: 0.8662\n" - ] - } - ], - "source": [ - "history = model.fit(X_train, y_train, epochs=10,\n", - " validation_data=(X_valid, y_valid))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 그레이디언트 클리핑" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "모든 케라스 옵티마이저는 `clipnorm`이나 `clipvalue` 매개변수를 지원합니다:" - ] - }, - { - "cell_type": "code", - "execution_count": 44, - "metadata": {}, - "outputs": [], - "source": [ - "optimizer = keras.optimizers.SGD(clipvalue=1.0)" - ] - }, - { - "cell_type": "code", - "execution_count": 45, - "metadata": {}, - "outputs": [], - "source": [ - "optimizer = keras.optimizers.SGD(clipnorm=1.0)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 사전 훈련된 층 재사용하기" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 케라스 모델 재사용하기" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "패션 MNIST 훈련 세트를 두 개로 나누어 보죠:\n", - "* `X_train_A`: 샌달과 셔츠(클래스 5와 6)을 제외한 모든 이미지\n", - "* `X_train_B`: 샌달과 셔츠 이미지 중 처음 200개만 가진 작은 훈련 세트\n", - "\n", - "검증 세트와 테스트 세트도 이렇게 나눕니다. 하지만 이미지 개수는 제한하지 않습니다.\n", - "\n", - "A 세트(8개의 클래스를 가진 분류 문제)에서 모델을 훈련하고 이를 재사용하여 B 세트(이진 분류)를 해결해 보겠습니다. A 작업에서 B 작업으로 약간의 지식이 전달되기를 기대합니다. 왜냐하면 A 세트의 클래스(스니커즈, 앵클 부츠, 코트, 티셔츠 등)가 B 세트에 있는 클래스(샌달과 셔츠)와 조금 비슷하기 때문입니다. 하지만 `Dense` 층을 사용하기 때문에 동일한 위치에 나타난 패턴만 재사용할 수 있습니다(반대로 합성곱 층은 훨씬 많은 정보를 전송합니다. 학습한 패턴을 이미지의 어느 위치에서나 감지할 수 있기 때문입니다. CNN 장에서 자세히 알아 보겠습니다)." - ] - }, - { - "cell_type": "code", - "execution_count": 46, - "metadata": {}, - "outputs": [], - "source": [ - "def split_dataset(X, y):\n", - " y_5_or_6 = (y == 5) | (y == 6) # sandals or shirts\n", - " y_A = y[~y_5_or_6]\n", - " y_A[y_A > 6] -= 2 # class indices 7, 8, 9 should be moved to 5, 6, 7\n", - " y_B = (y[y_5_or_6] == 6).astype(np.float32) # binary classification task: is it a shirt (class 6)?\n", - " return ((X[~y_5_or_6], y_A),\n", - " (X[y_5_or_6], y_B))\n", - "\n", - "(X_train_A, y_train_A), (X_train_B, y_train_B) = split_dataset(X_train, y_train)\n", - "(X_valid_A, y_valid_A), (X_valid_B, y_valid_B) = split_dataset(X_valid, y_valid)\n", - "(X_test_A, y_test_A), (X_test_B, y_test_B) = split_dataset(X_test, y_test)\n", - "X_train_B = X_train_B[:200]\n", - "y_train_B = y_train_B[:200]" - ] - }, - { - "cell_type": "code", - "execution_count": 47, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(43986, 28, 28)" - ] - }, - "execution_count": 47, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "X_train_A.shape" - ] - }, - { - "cell_type": "code", - "execution_count": 48, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(200, 28, 28)" - ] - }, - "execution_count": 48, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "X_train_B.shape" - ] - }, - { - "cell_type": "code", - "execution_count": 49, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([4, 0, 5, 7, 7, 7, 4, 4, 3, 4, 0, 1, 6, 3, 4, 3, 2, 6, 5, 3, 4, 5,\n", - " 1, 3, 4, 2, 0, 6, 7, 1], dtype=uint8)" - ] - }, - "execution_count": 49, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "y_train_A[:30]" - ] - }, - { - "cell_type": "code", - "execution_count": 50, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([1., 1., 0., 0., 0., 0., 1., 1., 1., 0., 0., 1., 1., 0., 0., 0., 0.,\n", - " 0., 0., 1., 1., 0., 0., 1., 1., 0., 1., 1., 1., 1.], dtype=float32)" - ] - }, - "execution_count": 50, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "y_train_B[:30]" - ] - }, - { - "cell_type": "code", - "execution_count": 51, - "metadata": {}, - "outputs": [], - "source": [ - "tf.random.set_seed(42)\n", - "np.random.seed(42)" - ] - }, - { - "cell_type": "code", - "execution_count": 52, - "metadata": {}, - "outputs": [], - "source": [ - "model_A = keras.models.Sequential()\n", - "model_A.add(keras.layers.Flatten(input_shape=[28, 28]))\n", - "for n_hidden in (300, 100, 50, 50, 50):\n", - " model_A.add(keras.layers.Dense(n_hidden, activation=\"selu\"))\n", - "model_A.add(keras.layers.Dense(8, activation=\"softmax\"))" - ] - }, - { - "cell_type": "code", - "execution_count": 53, - "metadata": {}, - "outputs": [], - "source": [ - "model_A.compile(loss=\"sparse_categorical_crossentropy\",\n", - " optimizer=keras.optimizers.SGD(lr=1e-3),\n", - " metrics=[\"accuracy\"])" - ] - }, - { - "cell_type": "code", - "execution_count": 54, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch 1/20\n", - "1375/1375 [==============================] - 5s 3ms/step - loss: 0.5927 - accuracy: 0.8103 - val_loss: 0.3894 - val_accuracy: 0.8662\n", - "Epoch 2/20\n", - "1375/1375 [==============================] - 4s 3ms/step - loss: 0.3523 - accuracy: 0.8785 - val_loss: 0.3286 - val_accuracy: 0.8834\n", - "Epoch 3/20\n", - "1375/1375 [==============================] - 4s 3ms/step - loss: 0.3170 - accuracy: 0.8896 - val_loss: 0.3011 - val_accuracy: 0.8984\n", - "Epoch 4/20\n", - "1375/1375 [==============================] - 4s 3ms/step - loss: 0.2974 - accuracy: 0.8975 - val_loss: 0.2895 - val_accuracy: 0.9018\n", - "Epoch 5/20\n", - "1375/1375 [==============================] - 4s 3ms/step - loss: 0.2835 - accuracy: 0.9020 - val_loss: 0.2774 - val_accuracy: 0.9071\n", - "Epoch 6/20\n", - "1375/1375 [==============================] - 5s 3ms/step - loss: 0.2730 - accuracy: 0.9063 - val_loss: 0.2736 - val_accuracy: 0.9061\n", - "Epoch 7/20\n", - "1375/1375 [==============================] - 4s 3ms/step - loss: 0.2642 - accuracy: 0.9092 - val_loss: 0.2717 - val_accuracy: 0.9083\n", - "Epoch 8/20\n", - "1375/1375 [==============================] - 4s 3ms/step - loss: 0.2573 - accuracy: 0.9126 - val_loss: 0.2590 - val_accuracy: 0.9141\n", - "Epoch 9/20\n", - "1375/1375 [==============================] - 4s 3ms/step - loss: 0.2519 - accuracy: 0.9137 - val_loss: 0.2562 - val_accuracy: 0.9145\n", - "Epoch 10/20\n", - "1375/1375 [==============================] - 4s 3ms/step - loss: 0.2469 - accuracy: 0.9155 - val_loss: 0.2542 - val_accuracy: 0.9155\n", - "Epoch 11/20\n", - "1375/1375 [==============================] - 4s 3ms/step - loss: 0.2423 - accuracy: 0.9178 - val_loss: 0.2494 - val_accuracy: 0.9163\n", - "Epoch 12/20\n", - "1375/1375 [==============================] - 4s 3ms/step - loss: 0.2383 - accuracy: 0.9188 - val_loss: 0.2512 - val_accuracy: 0.9128\n", - "Epoch 13/20\n", - "1375/1375 [==============================] - 4s 3ms/step - loss: 0.2351 - accuracy: 0.9198 - val_loss: 0.2448 - val_accuracy: 0.9158\n", - "Epoch 14/20\n", - "1375/1375 [==============================] - 4s 3ms/step - loss: 0.2316 - accuracy: 0.9210 - val_loss: 0.2416 - val_accuracy: 0.9175\n", - "Epoch 15/20\n", - "1375/1375 [==============================] - 4s 3ms/step - loss: 0.2288 - accuracy: 0.9213 - val_loss: 0.2451 - val_accuracy: 0.9200\n", - "Epoch 16/20\n", - "1375/1375 [==============================] - 4s 3ms/step - loss: 0.2255 - accuracy: 0.9223 - val_loss: 0.2386 - val_accuracy: 0.9200\n", - "Epoch 17/20\n", - "1375/1375 [==============================] - 4s 3ms/step - loss: 0.2231 - accuracy: 0.9232 - val_loss: 0.2411 - val_accuracy: 0.9178\n", - "Epoch 18/20\n", - "1375/1375 [==============================] - 4s 3ms/step - loss: 0.2201 - accuracy: 0.9245 - val_loss: 0.2428 - val_accuracy: 0.9150\n", - "Epoch 19/20\n", - "1375/1375 [==============================] - 4s 3ms/step - loss: 0.2178 - accuracy: 0.9249 - val_loss: 0.2329 - val_accuracy: 0.9205\n", - "Epoch 20/20\n", - "1375/1375 [==============================] - 4s 3ms/step - loss: 0.2156 - accuracy: 0.9261 - val_loss: 0.2331 - val_accuracy: 0.9208\n" - ] - } - ], - "source": [ - "history = model_A.fit(X_train_A, y_train_A, epochs=20,\n", - " validation_data=(X_valid_A, y_valid_A))" - ] - }, - { - "cell_type": "code", - "execution_count": 55, - "metadata": {}, - "outputs": [], - "source": [ - "model_A.save(\"my_model_A.h5\")" - ] - }, - { - "cell_type": "code", - "execution_count": 56, - "metadata": {}, - "outputs": [], - "source": [ - "model_B = keras.models.Sequential()\n", - "model_B.add(keras.layers.Flatten(input_shape=[28, 28]))\n", - "for n_hidden in (300, 100, 50, 50, 50):\n", - " model_B.add(keras.layers.Dense(n_hidden, activation=\"selu\"))\n", - "model_B.add(keras.layers.Dense(1, activation=\"sigmoid\"))" - ] - }, - { - "cell_type": "code", - "execution_count": 57, - "metadata": {}, - "outputs": [], - "source": [ - "model_B.compile(loss=\"binary_crossentropy\",\n", - " optimizer=keras.optimizers.SGD(lr=1e-3),\n", - " metrics=[\"accuracy\"])" - ] - }, - { - "cell_type": "code", - "execution_count": 58, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch 1/20\n", - "7/7 [==============================] - 0s 42ms/step - loss: 0.9573 - accuracy: 0.4650 - val_loss: 0.6314 - val_accuracy: 0.6004\n", - "Epoch 2/20\n", - "7/7 [==============================] - 0s 16ms/step - loss: 0.5692 - accuracy: 0.7450 - val_loss: 0.4784 - val_accuracy: 0.8529\n", - "Epoch 3/20\n", - "7/7 [==============================] - 0s 16ms/step - loss: 0.4503 - accuracy: 0.8650 - val_loss: 0.4102 - val_accuracy: 0.8945\n", - "Epoch 4/20\n", - "7/7 [==============================] - 0s 16ms/step - loss: 0.3879 - accuracy: 0.8950 - val_loss: 0.3647 - val_accuracy: 0.9178\n", - "Epoch 5/20\n", - "7/7 [==============================] - 0s 17ms/step - loss: 0.3435 - accuracy: 0.9250 - val_loss: 0.3300 - val_accuracy: 0.9320\n", - "Epoch 6/20\n", - "7/7 [==============================] - 0s 16ms/step - loss: 0.3081 - accuracy: 0.9300 - val_loss: 0.3019 - val_accuracy: 0.9402\n", - "Epoch 7/20\n", - "7/7 [==============================] - 0s 15ms/step - loss: 0.2800 - accuracy: 0.9350 - val_loss: 0.2804 - val_accuracy: 0.9422\n", - "Epoch 8/20\n", - "7/7 [==============================] - 0s 16ms/step - loss: 0.2564 - accuracy: 0.9450 - val_loss: 0.2606 - val_accuracy: 0.9473\n", - "Epoch 9/20\n", - "7/7 [==============================] - 0s 17ms/step - loss: 0.2362 - accuracy: 0.9550 - val_loss: 0.2428 - val_accuracy: 0.9523\n", - "Epoch 10/20\n", - "7/7 [==============================] - 0s 16ms/step - loss: 0.2188 - accuracy: 0.9600 - val_loss: 0.2281 - val_accuracy: 0.9544\n", - "Epoch 11/20\n", - "7/7 [==============================] - 0s 18ms/step - loss: 0.2036 - accuracy: 0.9700 - val_loss: 0.2150 - val_accuracy: 0.9584\n", - "Epoch 12/20\n", - "7/7 [==============================] - 0s 16ms/step - loss: 0.1898 - accuracy: 0.9700 - val_loss: 0.2036 - val_accuracy: 0.9584\n", - "Epoch 13/20\n", - "7/7 [==============================] - 0s 17ms/step - loss: 0.1773 - accuracy: 0.9750 - val_loss: 0.1931 - val_accuracy: 0.9615\n", - "Epoch 14/20\n", - "7/7 [==============================] - 0s 16ms/step - loss: 0.1668 - accuracy: 0.9800 - val_loss: 0.1838 - val_accuracy: 0.9635\n", - "Epoch 15/20\n", - "7/7 [==============================] - 0s 16ms/step - loss: 0.1570 - accuracy: 0.9900 - val_loss: 0.1746 - val_accuracy: 0.9686\n", - "Epoch 16/20\n", - "7/7 [==============================] - 0s 16ms/step - loss: 0.1481 - accuracy: 0.9900 - val_loss: 0.1674 - val_accuracy: 0.9686\n", - "Epoch 17/20\n", - "7/7 [==============================] - 0s 16ms/step - loss: 0.1406 - accuracy: 0.9900 - val_loss: 0.1604 - val_accuracy: 0.9706\n", - "Epoch 18/20\n", - "7/7 [==============================] - 0s 16ms/step - loss: 0.1334 - accuracy: 0.9900 - val_loss: 0.1539 - val_accuracy: 0.9706\n", - "Epoch 19/20\n", - "7/7 [==============================] - 0s 18ms/step - loss: 0.1268 - accuracy: 0.9900 - val_loss: 0.1482 - val_accuracy: 0.9716\n", - "Epoch 20/20\n", - "7/7 [==============================] - 0s 16ms/step - loss: 0.1208 - accuracy: 0.9900 - val_loss: 0.1431 - val_accuracy: 0.9716\n" - ] - } - ], - "source": [ - "history = model_B.fit(X_train_B, y_train_B, epochs=20,\n", - " validation_data=(X_valid_B, y_valid_B))" - ] - }, - { - "cell_type": "code", - "execution_count": 59, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Model: \"sequential_5\"\n", - "_________________________________________________________________\n", - "Layer (type) Output Shape Param # \n", - "=================================================================\n", - "flatten_5 (Flatten) (None, 784) 0 \n", - "_________________________________________________________________\n", - "batch_normalization_3 (Batch (None, 784) 3136 \n", - "_________________________________________________________________\n", - "dense_215 (Dense) (None, 300) 235200 \n", - "_________________________________________________________________\n", - "batch_normalization_4 (Batch (None, 300) 1200 \n", - "_________________________________________________________________\n", - "activation (Activation) (None, 300) 0 \n", - "_________________________________________________________________\n", - "dense_216 (Dense) (None, 100) 30000 \n", - "_________________________________________________________________\n", - "batch_normalization_5 (Batch (None, 100) 400 \n", - "_________________________________________________________________\n", - "activation_1 (Activation) (None, 100) 0 \n", - "_________________________________________________________________\n", - "dense_217 (Dense) (None, 10) 1010 \n", - "=================================================================\n", - "Total params: 270,946\n", - "Trainable params: 268,578\n", - "Non-trainable params: 2,368\n", - "_________________________________________________________________\n" - ] - } - ], - "source": [ - "model.summary()" - ] - }, - { - "cell_type": "code", - "execution_count": 60, - "metadata": {}, - "outputs": [], - "source": [ - "model_A = keras.models.load_model(\"my_model_A.h5\")\n", - "model_B_on_A = keras.models.Sequential(model_A.layers[:-1])\n", - "model_B_on_A.add(keras.layers.Dense(1, activation=\"sigmoid\"))" - ] - }, - { - "cell_type": "code", - "execution_count": 61, - "metadata": {}, - "outputs": [], - "source": [ - "model_A_clone = keras.models.clone_model(model_A)\n", - "model_A_clone.set_weights(model_A.get_weights())" - ] - }, - { - "cell_type": "code", - "execution_count": 62, - "metadata": {}, - "outputs": [], - "source": [ - "for layer in model_B_on_A.layers[:-1]:\n", - " layer.trainable = False\n", - "\n", - "model_B_on_A.compile(loss=\"binary_crossentropy\",\n", - " optimizer=keras.optimizers.SGD(lr=1e-3),\n", - " metrics=[\"accuracy\"])" - ] - }, - { - "cell_type": "code", - "execution_count": 63, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch 1/4\n", - "7/7 [==============================] - 0s 39ms/step - loss: 0.5803 - accuracy: 0.6500 - val_loss: 0.5842 - val_accuracy: 0.6329\n", - "Epoch 2/4\n", - "7/7 [==============================] - 0s 16ms/step - loss: 0.5436 - accuracy: 0.6800 - val_loss: 0.5466 - val_accuracy: 0.6724\n", - "Epoch 3/4\n", - "7/7 [==============================] - 0s 16ms/step - loss: 0.5066 - accuracy: 0.7300 - val_loss: 0.5144 - val_accuracy: 0.7099\n", - "Epoch 4/4\n", - "7/7 [==============================] - 0s 16ms/step - loss: 0.4749 - accuracy: 0.7500 - val_loss: 0.4855 - val_accuracy: 0.7312\n", - "Epoch 1/16\n", - "7/7 [==============================] - 0s 41ms/step - loss: 0.3964 - accuracy: 0.8100 - val_loss: 0.3461 - val_accuracy: 0.8631\n", - "Epoch 2/16\n", - "7/7 [==============================] - 0s 15ms/step - loss: 0.2799 - accuracy: 0.9350 - val_loss: 0.2603 - val_accuracy: 0.9260\n", - "Epoch 3/16\n", - "7/7 [==============================] - 0s 16ms/step - loss: 0.2083 - accuracy: 0.9650 - val_loss: 0.2110 - val_accuracy: 0.9544\n", - "Epoch 4/16\n", - "7/7 [==============================] - 0s 16ms/step - loss: 0.1670 - accuracy: 0.9800 - val_loss: 0.1790 - val_accuracy: 0.9696\n", - "Epoch 5/16\n", - "7/7 [==============================] - 0s 18ms/step - loss: 0.1397 - accuracy: 0.9800 - val_loss: 0.1562 - val_accuracy: 0.9757\n", - "Epoch 6/16\n", - "7/7 [==============================] - 0s 16ms/step - loss: 0.1198 - accuracy: 0.9950 - val_loss: 0.1394 - val_accuracy: 0.9807\n", - "Epoch 7/16\n", - "7/7 [==============================] - 0s 16ms/step - loss: 0.1051 - accuracy: 0.9950 - val_loss: 0.1267 - val_accuracy: 0.9838\n", - "Epoch 8/16\n", - "7/7 [==============================] - 0s 16ms/step - loss: 0.0938 - accuracy: 0.9950 - val_loss: 0.1164 - val_accuracy: 0.9858\n", - "Epoch 9/16\n", - "7/7 [==============================] - 0s 15ms/step - loss: 0.0848 - accuracy: 1.0000 - val_loss: 0.1067 - val_accuracy: 0.9888\n", - "Epoch 10/16\n", - "7/7 [==============================] - 0s 16ms/step - loss: 0.0763 - accuracy: 1.0000 - val_loss: 0.1001 - val_accuracy: 0.9899\n", - "Epoch 11/16\n", - "7/7 [==============================] - 0s 15ms/step - loss: 0.0705 - accuracy: 1.0000 - val_loss: 0.0941 - val_accuracy: 0.9899\n", - "Epoch 12/16\n", - "7/7 [==============================] - 0s 15ms/step - loss: 0.0650 - accuracy: 1.0000 - val_loss: 0.0889 - val_accuracy: 0.9899\n", - "Epoch 13/16\n", - "7/7 [==============================] - 0s 17ms/step - loss: 0.0603 - accuracy: 1.0000 - val_loss: 0.0840 - val_accuracy: 0.9899\n", - "Epoch 14/16\n", - "7/7 [==============================] - 0s 18ms/step - loss: 0.0560 - accuracy: 1.0000 - val_loss: 0.0804 - val_accuracy: 0.9899\n", - "Epoch 15/16\n", - "7/7 [==============================] - 0s 18ms/step - loss: 0.0526 - accuracy: 1.0000 - val_loss: 0.0770 - val_accuracy: 0.9899\n", - "Epoch 16/16\n", - "7/7 [==============================] - 0s 18ms/step - loss: 0.0497 - accuracy: 1.0000 - val_loss: 0.0740 - val_accuracy: 0.9899\n" - ] - } - ], - "source": [ - "history = model_B_on_A.fit(X_train_B, y_train_B, epochs=4,\n", - " validation_data=(X_valid_B, y_valid_B))\n", - "\n", - "for layer in model_B_on_A.layers[:-1]:\n", - " layer.trainable = True\n", - "\n", - "model_B_on_A.compile(loss=\"binary_crossentropy\",\n", - " optimizer=keras.optimizers.SGD(lr=1e-3),\n", - " metrics=[\"accuracy\"])\n", - "history = model_B_on_A.fit(X_train_B, y_train_B, epochs=16,\n", - " validation_data=(X_valid_B, y_valid_B))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "마지막 점수는 어떤가요?" - ] - }, - { - "cell_type": "code", - "execution_count": 64, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "63/63 [==============================] - 0s 2ms/step - loss: 0.1408 - accuracy: 0.9705\n" - ] - }, - { - "data": { - "text/plain": [ - "[0.1408407837152481, 0.9704999923706055]" - ] - }, - "execution_count": 64, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "model_B.evaluate(X_test_B, y_test_B)" - ] - }, - { - "cell_type": "code", - "execution_count": 65, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "63/63 [==============================] - 0s 2ms/step - loss: 0.0683 - accuracy: 0.9930\n" - ] - }, - { - "data": { - "text/plain": [ - "[0.0683005154132843, 0.9929999709129333]" - ] - }, - "execution_count": 65, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "model_B_on_A.evaluate(X_test_B, y_test_B)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "훌륭하네요! 꽤 많은 정보를 전달했습니다: 오차율이 4배나 줄었네요!" - ] - }, - { - "cell_type": "code", - "execution_count": 66, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "4.066666666666663" - ] - }, - "execution_count": 66, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "(100 - 96.95) / (100 - 99.25)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# 고속 옵티마이저" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 모멘텀 옵티마이저" - ] - }, - { - "cell_type": "code", - "execution_count": 67, - "metadata": {}, - "outputs": [], - "source": [ - "optimizer = keras.optimizers.SGD(lr=0.001, momentum=0.9)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 네스테로프 가속 경사" - ] - }, - { - "cell_type": "code", - "execution_count": 68, - "metadata": {}, - "outputs": [], - "source": [ - "optimizer = keras.optimizers.SGD(lr=0.001, momentum=0.9, nesterov=True)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## AdaGrad" - ] - }, - { - "cell_type": "code", - "execution_count": 69, - "metadata": {}, - "outputs": [], - "source": [ - "optimizer = keras.optimizers.Adagrad(lr=0.001)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## RMSProp" - ] - }, - { - "cell_type": "code", - "execution_count": 70, - "metadata": {}, - "outputs": [], - "source": [ - "optimizer = keras.optimizers.RMSprop(lr=0.001, rho=0.9)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Adam 옵티마이저" - ] - }, - { - "cell_type": "code", - "execution_count": 71, - "metadata": {}, - "outputs": [], - "source": [ - "optimizer = keras.optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Adamax 옵티마이저" - ] - }, - { - "cell_type": "code", - "execution_count": 72, - "metadata": {}, - "outputs": [], - "source": [ - "optimizer = keras.optimizers.Adamax(lr=0.001, beta_1=0.9, beta_2=0.999)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Nadam 옵티마이저" - ] - }, - { - "cell_type": "code", - "execution_count": 73, - "metadata": {}, - "outputs": [], - "source": [ - "optimizer = keras.optimizers.Nadam(lr=0.001, beta_1=0.9, beta_2=0.999)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 학습률 스케줄링" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 거듭제곱 스케줄링" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "```lr = lr0 / (1 + steps / s)**c```\n", - "* 케라스는 `c=1`과 `s = 1 / decay`을 사용합니다" - ] - }, - { - "cell_type": "code", - "execution_count": 74, - "metadata": {}, - "outputs": [], - "source": [ - "optimizer = keras.optimizers.SGD(lr=0.01, decay=1e-4)" - ] - }, - { - "cell_type": "code", - "execution_count": 75, - "metadata": {}, - "outputs": [], - "source": [ - "model = keras.models.Sequential([\n", - " keras.layers.Flatten(input_shape=[28, 28]),\n", - " keras.layers.Dense(300, activation=\"selu\", kernel_initializer=\"lecun_normal\"),\n", - " keras.layers.Dense(100, activation=\"selu\", kernel_initializer=\"lecun_normal\"),\n", - " keras.layers.Dense(10, activation=\"softmax\")\n", - "])\n", - "model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=optimizer, metrics=[\"accuracy\"])" - ] - }, - { - "cell_type": "code", - "execution_count": 76, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch 1/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.4855 - accuracy: 0.8303 - val_loss: 0.4029 - val_accuracy: 0.8604\n", - "Epoch 2/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.3781 - accuracy: 0.8658 - val_loss: 0.3716 - val_accuracy: 0.8720\n", - "Epoch 3/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.3457 - accuracy: 0.8770 - val_loss: 0.3749 - val_accuracy: 0.8742\n", - "Epoch 4/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.3250 - accuracy: 0.8830 - val_loss: 0.3501 - val_accuracy: 0.8800\n", - "Epoch 5/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.3101 - accuracy: 0.8892 - val_loss: 0.3447 - val_accuracy: 0.8794\n", - "Epoch 6/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.2966 - accuracy: 0.8935 - val_loss: 0.3412 - val_accuracy: 0.8828\n", - "Epoch 7/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.2867 - accuracy: 0.8974 - val_loss: 0.3355 - val_accuracy: 0.8864\n", - "Epoch 8/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.2777 - accuracy: 0.9010 - val_loss: 0.3408 - val_accuracy: 0.8834\n", - "Epoch 9/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.2698 - accuracy: 0.9020 - val_loss: 0.3289 - val_accuracy: 0.8880\n", - "Epoch 10/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.2628 - accuracy: 0.9048 - val_loss: 0.3259 - val_accuracy: 0.8880\n", - "Epoch 11/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.2571 - accuracy: 0.9080 - val_loss: 0.3265 - val_accuracy: 0.8876\n", - "Epoch 12/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.2512 - accuracy: 0.9098 - val_loss: 0.3331 - val_accuracy: 0.8830\n", - "Epoch 13/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.2461 - accuracy: 0.9127 - val_loss: 0.3253 - val_accuracy: 0.8892\n", - "Epoch 14/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.2422 - accuracy: 0.9135 - val_loss: 0.3286 - val_accuracy: 0.8900\n", - "Epoch 15/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.2374 - accuracy: 0.9152 - val_loss: 0.3241 - val_accuracy: 0.8880\n", - "Epoch 16/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.2335 - accuracy: 0.9170 - val_loss: 0.3202 - val_accuracy: 0.8904\n", - "Epoch 17/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.2299 - accuracy: 0.9181 - val_loss: 0.3233 - val_accuracy: 0.8912\n", - "Epoch 18/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.2262 - accuracy: 0.9200 - val_loss: 0.3188 - val_accuracy: 0.8932\n", - "Epoch 19/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.2232 - accuracy: 0.9210 - val_loss: 0.3227 - val_accuracy: 0.8902\n", - "Epoch 20/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.2199 - accuracy: 0.9221 - val_loss: 0.3207 - val_accuracy: 0.8912\n", - "Epoch 21/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.2168 - accuracy: 0.9236 - val_loss: 0.3206 - val_accuracy: 0.8918\n", - "Epoch 22/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.2142 - accuracy: 0.9245 - val_loss: 0.3179 - val_accuracy: 0.8942\n", - "Epoch 23/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.2116 - accuracy: 0.9250 - val_loss: 0.3193 - val_accuracy: 0.8908\n", - "Epoch 24/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.2088 - accuracy: 0.9266 - val_loss: 0.3212 - val_accuracy: 0.8886\n", - "Epoch 25/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.2068 - accuracy: 0.9266 - val_loss: 0.3211 - val_accuracy: 0.8926\n" - ] - } - ], - "source": [ - "n_epochs = 25\n", - "history = model.fit(X_train_scaled, y_train, epochs=n_epochs,\n", - " validation_data=(X_valid_scaled, y_valid))" - ] - }, - { - "cell_type": "code", - "execution_count": 77, - "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZgAAAEeCAYAAAC30gOQAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAgAElEQVR4nO3deXxU1f3/8dcnCxACYZclsqnIKou4glbqbr8uqNV+W9e6YO3Xr/1p1arf2qKttVZtra1r1eJasRVERaUqpgoqoiKbyKIsEjYBWQJJCOHz++Pe4DDMJBPIzCSZ9/PxmEfm3nPunc9cYz6ce849x9wdERGRupaV7gBERKRxUoIREZGkUIIREZGkUIIREZGkUIIREZGkUIIREZGkUIIRaUDMbImZXZeE837fzGr1zIKZXWxmJfG2RZRgpEExszFm5uGrwsy+NLO7zSw/3bElwswuM7MZZlZiZhvNbJaZ/TbdcdWRscB+6Q5C6o+cdAcgsgfeBC4AcoGjgUeBfODKdAZVxcyauPu2GPsvAe4DrgHeIoh/AHBkaiNMDncvBUrTHYfUH2rBSENU7u6r3P0rd38WeAYYWVVoZt8xs2lmVmZmq83sT2bWJCw72cw2m1lOuH1A2Bp6KOL435rZmxHb/cxsYnjcGjP7h5l1iigfY2avmNkvzGw5sDxO3KcD49z9YXdf5O7z3P2f7n5tZCUz+14Yf6mZrTOzl82sWUSVZmb2sJltMrPlZnZ91PGtzOyRMNbNZvYfMzskqs6FZrbUzLaa2StAx6jy0WY2J2pftbfAYtwyG21mc8zsv83sizCWF82sfUSdnPC/zzfh609m9qCZFcX7HGk4lGCkMSglaA1gZoXAa8AMYAhwKfBD4I6w7hSgGVD1B3cEsDb8ScS+ovB8nYF3gDnAYcDxQAtggplF/v9zDDAQOBk4Lk6cq4DDzCzubSQzOxl4CXgDGAp8F/gPu/6/eg0wGzgYuBP4g5kdGR5vwESgEDg1vAbvAJPD74KZHQ6MAR4BBgMvA7fFi2kv9QB+AJwJnBjGc3tE+XXAxcBlwBEE3/NHSYpFUs3d9dKrwbwI/jC+ErF9GEGCGBtu3w4sBLIi6lwMlAPNw+0PgJvC908DvyZIUp2B5mHdo8Ly24C3omJoAzhwWERMXwNNa4i9M/B+eOzC8LMvBHIj6kwFnqvmHEuAf0TtWwj8Mnx/LFAC5EXV+RS4IXz/LPBGVPmjwZ+DndujgTlRdS4GSmqxPRooA1pF7Ps/YFHE9krgxohtA+YDRen+XdNr719qwUhDdHLYSV5G8Af7HeB/w7K+wAfuviOi/hSgCXBAuF3Ety2WYwhaPNPCfcOA7cCHYflQ4Dvh55WEt4C+Csv2j/iMOe5eXl3Q7r7S3Y8EDgLuJfhj+jDwoZk1D6sNIeifqc6sqO0VwD4R8TYHvo6KeUBEvH0Jrluk6O26stTdN8aK1cxaAZ349loHGS5iWxo2dfJLQ/QOMAqoAFa4e0WCx1UNwy0CrjKzvkAB8HG477vAGuB9/7aTPovgllOsocGrI95vSTR4d59DcMvtfjM7CngXOJegJZSI6O/rfHsLLSuM6+gYx21KNEZgB0ECjJRbi+OrVBerNHJKMNIQbXX3RXHK5gHnmllWRCvmKGAb8EW4PQVoCtwATHH3yrBT+W8Ef5xfjzjfJwR//JfWIpHVxmfhzxbhzxkEfTh/28PzfULQYb/D3b+MU2ceQX9HpOjtr4GOZmZhqwKC/po64+4bzWwVcCgwGXb2IR1K0F8lDZz+JSGNzQNAF+ABM+trZv8F/B74q7tvBXD3EoJWy/nA2+FxHwD7EvyhLYo43/1AK2CsmR1uZvuZ2fHhKK2WtQksHB11i5kNN7PuZnYE8CSwFfh3WO124JxwJFs/M+tvZtdE3EKryZsE/TgTzOwUM+tpZkea2a1mVtWquQ843sxuMrNeZnY5QSd8pCKgLXCzme1vZpcC36/N903Qn4EbzOxMM+sN3EPQV6WFqhoBJRhpVNy9GDiFoC/jU+Bx4B/AzVFViwha8EXhcWUE/TDl7NonsAIYTnDL6HVgLkHSKQ9ftfEGcDjwPLAAGB/uP8HdF4Sf9yrBH/tTCFoz/yG4dbdjt7PFELY2vkfQIvgbQYf580Bvgv4P3P0DgtF1VxL055xF0CEfeZ55YfmosM4JwO9q+X0TcTfwFPB3giQPwXUpS8JnSYrZt61fEZH0M7MZBLcu/7fGylKvqQ9GRNLGzLoDJxG01HKBywmeJ7o8nXFJ3UjpLTIza2tm481sS/gUccwHqixwZ/gU87rwvUWUP2Jm881sh5ldHOP4a8xsVfik8+Nm1jSJX0tE9twOgmeBPiQYKn0EcIq7f5TWqKROpLoP5n6C0TwdgfOAB82sf4x6owim/hhE8K+Z04ArIspnAj8lGDGzCzM7CbiRYCROd4LJ926tu68gInXFg+l+jnL3Vu7e0t0Pd/d/13ykNAQp64OxYLbbb4ABVR2aZvYUUOzuN0bVfQ8Y4+6PhNuXApe7+xFR9aYAj7r7mIh9zwJL3P3mcPs44Bl374SIiKRMKvtgDgS2VyWX0EyCJ6mj9Q/LIuvFaunE0h+YEHVsRzNr5+7rIiua2SiC1hJZeQVDc1rts7OsR4EG2AHs2LGDrCxdi2i6LrvTNYmtsV+XBQsWrHX3DrHKUplgWrD7k8QbgVjPErQIyyLrtYh66Ku6z4k+lvBzdkkwYQvpEYCmnXt554vuBaCwdR5Tbzy2ho/JDEVFRYwYMSLdYdQ7ui670zWJrbFfFzNbGq8slWm1hGBajkgFwOYE6hYQTKKXyP28WMcS53N20yw3i+tP6p1IVRERqUYqE8wCIMfMekXsG0Tw4Fq0uWFZTfViiXXs6ujbY/GcMagLI4cUJvhRIiIST8oSjLtvAcYBt5lZvpkNB84geIo32pPAtWZWaGZdgJ8TMRGgmTWxYAEmA3LNrFnE2hxPApeG02y0Bn5JApMI9ijIol/nAmZ8tQE9fCoisvdS3fP0UyCPYMbafwBXuvtcMzvadl0p72GCRZBmE8w6OzHcV+XfBOt3DCPoQykFvgPg7q8DfyCYY2oZsJRgvY8aXXJUTxasLmHqooQaOyIiUo2UPsnv7uuJWNo2Yv+7fDubbNV8SjeEr1jnGVHD5/wR+GNt4zttUGd+/9o8Hp+6mKN6ta/5ABERiavxjp3bA01zsjn/iO5M/nwNX34dd+lxERFJgBJMlPMO706T7CzGvLck3aGIiDRoSjBROrRsyumDu/DPj5azcWsy1pcSEckMSjAx/Hh4D0orKhn70bJ0hyIi0mApwcTQv0srjtivLU+8t5TtlQmt8yQiIlGUYOK4ZHhPijeU8u/PVqc7FBGRBkkJJo7j+nakW9vmPD5lcbpDERFpkJRg4sjOMi4e1oOPln7DzK82pDscEZEGRwmmGuccsi8tmubw96lqxYiI1JYSTDVaNsvl3EO68sqslazeVJbucEREGhQlmBpcPKwHle489X7cJQ9ERCQGJZgadGvXnBP7deSZaUspq6hMdzgiIg2GEkwCLhnek2+2VvDijOJ0hyIi0mAowSTgsJ5t6d+lgMenLtZaMSIiCVKCSYCZcclwrRUjIlIbSjAJOnVQZ9q3aMrjGrIsIpIQJZgENc3J5oJwrZgvtFaMiEiNlGBq4bwjugVrxUxdku5QRETqPSWYWmjfoilnDO7Cvz7WWjEiIjVRgqmlHw/vSWlFJc9N11oxIiLVUYKppX5dCjhyv3Y88d4SrRUjIlINJZg9cMlRPVmxsYxJc7VWjIhIPEowe+DYPvvQLj+Xa8Z+Ss8bJzL895P1lL+ISJScdAfQEL08cwWbyrZTURk81V+8oZSbxs0GYOSQwnSGJiJSb6gFswfumjR/Z3KpUlpRyV2T5qcpIhGR+kcJZg+s2FBaq/0iIplICWYPdGmdV6v9IiKZSAlmD1x/Um/ycrN32ZebbVx/Uu80RSQiUv+ok38PVHXk3zVpPis2lJKTbeTlZnFCv45pjkxEpP5QgtlDI4cU7kw0Hy/9hrMffI/7317EDSf3SXNkIiL1g26R1YGh3dtw1sGFPPruYpas3ZLucERE6gUlmDpy48l9aJKTxW9e+SzdoYiI1AtKMHVkn4JmXH3cAbz1+Rre/nxNusMREUk7JZg6dPGwnuzXIZ/bXvmM8u2V6Q5HRCStUppgzKytmY03sy1mttTMfhSnnpnZnWa2LnzdaWYWUT7YzD42s63hz8ERZU3N7CEzW21m683sZTNLyfwtTXKy+NWp/Vi8dguPT1mSio8UEam3Ut2CuR/YBnQEzgMeNLP+MeqNAkYCg4CBwGnAFQBm1gSYADwNtAGeACaE+wF+BhwZHtcF+Ab4S5K+z25G9N6H4/t25C+TF7J6U1mqPlZEpN5JWYIxs3zgbOAWdy9x9ynAS8AFMapfBNzj7svdvRi4B7g4LBtBMLz6Xncvd/f7AAOODct7ApPcfbW7lwFjgVhJLGl+dWo/tu9w7nh1Xio/VkSkXknlczAHAtvdfUHEvpnAMTHq9g/LIuv1jyib5e6Rs03OCve/DjwG/NnMugAbCFpKr8UKyMxGEbSW6NChA0VFRbX8SvGd1C2bFz9dQf+m6+nVJrvmA+qpkpKSOr0ujYWuy+50TWLL5OuSygTTAtgUtW8j0DJO3Y1R9VqE/TDRZdHnWQh8BRQDlcBs4KpYAbn7I8AjAL179/YRI0Yk+FVqdtiw7Xx0z38Yv6wJL59xFNlZVvNB9VBRURF1eV0aC12X3emaxJbJ1yWVfTAlQEHUvgJgcwJ1C4CSsNVS03nuB5oC7YB8YBxxWjDJ1LxJDjd/ry+frdzEc9OXpfrjRUTSLpUJZgGQY2a9IvYNAubGqDs3LItVby4wMHJUGUGHflX5YGCMu69393KCDv7DzKx9HXyHWjl1YGcO79mWuyfNZ8PWban+eBGRtEpZgnH3LQStidvMLN/MhgNnAE/FqP4kcK2ZFYZ9KT8HxoRlRQS3vq4OhyRX3f6aHP6cDlxoZq3MLBf4KbDC3dcm43tVx8wYfXp/NpZW8Mc3FtR8gIhII5LqYco/BfKANcA/gCvdfa6ZHW1mJRH1HgZeJug/mQNMDPfh7tsIhjBfSNCJfwkwMtwPcB1QRtAX8zXwPeDMJH+vuPp2LuCCI7rz9AdL+WxFdBeUiEjjldLZlN19PUFyiN7/LkHnfdW2AzeEr1jnmQEMjVO2jmDkWL1xzQkH8tLMFYx+eS5jRx3Brnf3REQaJ00VkwKtmzfh+pP68OHi9bw8a2W6wxERSQklmBT5waFdGVBYwO8mzmPrtu3pDkdEJOmUYFIkO8sYfVp/Vm0q47Db36LnjRMZ/vvJvDijON2hiYgkhVa0TKHl35SSbUZJedCCKd5Qyk3jZgPfLsMsItJYqAWTQndNmk/lLjPcQGlFJXdNmp+miEREkkcJJoVWbCit1X4RkYZMCSaFurTOq9V+EZGGTAkmha4/qTd5ubvPrHzliP3SEI2ISHIpwaTQyCGF3HHWQRS2zsOADi2bkm0wae5qduzwGo8XEWlINIosxUYOKdxlxNiz05Zx8/jZ/PXtRVx9XK9qjhQRaVjUgkmzHx7WlTOHFPKnNxcwdVHK5+MUEUmahBOMmXU0s+vM7MGqqe/NbLiZ9UxeeI2fmfHbkQPYv0MLfvbcDFZvKkt3SCIidSKhBGNmQ4H5BJNIXsq3C36dANyenNAyR37THB4872C2lFfyv/+YwfbKHekOSURkryXagrkb+LO7DwHKI/ZPAobXeVQZqFfHlvzurAF8uHg9d/9ba8eISMOXaIIZCjwRY/9KoGPdhZPZzhyyLz88rBsP/ecL3pq3Ot3hiIjslUQTTCnQJsb+PgSLh0kd+fVp/ejfpYBrn5/JV+u3pjscEZE9lmiCmQD82syahttuZj2AO4EXkhBXxmqWm80D5x3Mjh3OVc9+Qvn2ynSHJCKyRxJNMNcBbQmWIG4OTAEWESxZ/MvkhJa5urfL565zBjJz+UbuePXzdIcjIrJHEnrQ0t03AUeZ2bHAwQSJ6RN3fzOZwWWykwd05tKjevLYlMUc2qMt/zWwc7pDEhGplYQSjJldCIx198nA5Ij9TYD/dvcnkxRfRrvxlD7MWPYNv3hhFn07t2S/Di3SHZKISMISvUX2d6BVjP0twzJJgtzsLP76o4PJzTbO+9sHDLtDK2GKSMORaIIxINZsjN2AjXUXjkTr0jqPcw7Zl5WbylmxsQzn25UwlWREpD6r9haZmc0mSCwO/MfMtkcUZwPdgVeTF54ATJy1ard9VSthaqllEamvauqD+Vf4cwAwESiJKNsGLEHDlJNOK2GKSENUbYJx91sBzGwJQSe/ZmJMgy6t8yiOkUy0EqaI1GcJ9cG4+xNKLukTbyXM7w/V7TERqb8SnU25iZndamYLzKzMzCojX8kOMtNFr4TZqaAZ7fNzeXzqEuYUa4yFiNRPia5o+RvgB8AdwJ+A64EewH8DtyQlMtlF9EqYxRtKOfeh97ngsWmMveJIDuzYMo3RiYjsLtFhyucCP3H3h4FKYIK7Xw38mmBNGEmxwtZ5PHPZ4eRmZ3H+o9NYsnZLukMSEdlFogmmI/BZ+L4EaB2+fx04sa6DksT0aJ/PM5cdTkXlDs57dFrMgQAiIumSaIJZBnQJ3y8CTgrfH0kwlb+kSa+OLXnq0sPZVFbBeX/7gDVacllE6olEE8x44Ljw/Z+BW81sMTAGeDQJcUktDChsxZgfH8aazeWc/9g01m/Zlu6QREQSHqZ8k7vfHr7/F3AU8BfgLHf/vyTGJwka2r0Nj150CEvXbeXCx6exqawi3SGJSIZLtAWzC3ef5u5/dPdXzCw/0ePMrK2ZjTezLWa21Mx+FKeemdmdZrYufN1pZhZRPtjMPjazreHPwVHHH2xm75hZiZmtNrOf7cn3bGiG7d+eh84fyvxVm/nx36ezpXx7zQeJiCTJHiUYADNrZmbXA4trcdj9BFPMdATOAx40s/4x6o0CRgKDgIHAacAV4ec2IVhh82mCZZyfACaE+zGz9gSDDx4G2gEHAP+u7fdrqL7bZx/u++8hzFj2DZc/+RFlFXpMSUTSo9oEEz5gebuZTTez98xsZLj/QuBL4P8RPBdTo7ClczZwi7uXuPsU4CXgghjVLwLucffl7l4M3ANcHJaNIHh+5153L3f3+whmez42LL8WmOTuz4Tlm919XiIxNhanHNSZu88ZxPtfruOsB6Zqmn8RSYuaHrQcDfwP8AYwHPinmf2NoMP/JuBZd0/0Zv+BwHZ3XxCxbyZwTIy6/cOyyHr9I8pmuXvk8gGzwv2vA0cAs83sPYLWyzTgf9x9WfSHmNkogtYSHTp0oKioKMGvUv+1BY7qnM27Kzbv3Fe8oZQb/vkpn837jGFdchM6T0lJSaO6LnVF12V3uiaxZfJ1qSnBnAtc7O7jzWwQMIPgtlR/d6/tDf4WwKaofRsJFi2LVXdjVL0WYT9MdFn0efYlWNb5BGA28AfgHwQJchfu/gjwCEDv3r19xIgRiX+bBuD/PphM9CjybTtg4rJsbv7RiITOUVRURGO7LnVB12V3uiaxZfJ1qSnBdAWmA7j7TDPbBty5B8kFggc0C6L2FQCbE6hbAJS4u5tZTecpBca7+3QAM7sVWGtmrdw9oybu0jT/IpJONXXy5wLlEdsV7PkKlguAHDPrFbFvEDA3Rt25YVmsenOBgZGjyggGAlSVz2LX1TdjrcSZEeJN59+hZdMURyIimSiRUWR3mNl9ZnYf0AQYXbUdsb9G7r4FGAfcZmb5ZjYcOAN4Kkb1J4FrzazQzLoAPyd4qBOgiGA+tKvNrKmZXRXunxz+/DtwZjiUOZdgMs4pmdZ6gfjT/G8uq+CDL9elISIRySQ1JZh3gP2Bg8LXe0C3iO2DCFa7TNRPgTxgDUG/yJXuPtfMjg5vfVV5GHiZoA9lDsFqmg8DuPs2giHMFwIbgEuAkeF+3H0ycHN4zBqCjv6Yz9s0dtHT/Be2zuOW/+pLl9Z5XPDYNF74eHm6QxSRRqymFS1H1OWHuft6guQQvf9dgs77qm0Hbghfsc4zAxhazec8CDy4t/E2BtHT/AN8/5Cu/PSZj/n5P2eyZN0Wrjn+QLKyLM4ZRET2zB4/aCkNV6u8XMb8+DB+cEhX/jJ5EVc/N0MPZIpInUt0wTFpZHKzs/j92QexX4d87njtc1ZsKOWRCw+hfQsNABCRuqEWTAYzM644Zn8eOv9gPlu5iTMfmMqiNbFGjYuI1J4SjHDygM6MHXUkZRU7OPOB95i6aG26QxKRRkC3yASAQV1b8+L/DOfSMdO56PEPOfvgQqYsWkfxhlIKP5jM9Sf13m2wgIhIdRJKMGbWLU6RA2Xu/nXdhSTpUtg6j3/+5EjOeeh9xn707RDm4g2l3DRuNoCSjIgkLNFbZEsIpuWPfi0BVpnZN2b2RzNTi6iBa9ksN+ZiZaUVldw1aX4aIhKRhirRhPBDgkkjHyKYnRjgcIKZiEcDrYFfEswH9uu6DVFSbeWGspj7NYeZiNRGognmSuAadx8XsW+ymc0Hfubux5jZGuBWlGAavC6t8yiOkUyaN82mrKKSZjGmnxERiZboLbLDCaZtiTYHODR8/z7BVPnSwMWawyw7y9hSXsnpf53CvJXRqy6IiOwu0QSzlHBhriiXA1ULeXUA1tdFUJJekXOYQdD5f885g3jiksP4ZmsFZ/x1Ko9NWcyOHRk7UbWIJCDRW2Q/B14ws+8Rrg8DHEIwEebZ4fahwPN1G56kS9UcZtGLJb3+s6P5xQuz+c0rn1E0fw13nzOIjgXN0heoiNRbCbVg3H0i0At4iWBxr4LwfW93fzWs84C7X5usQKV+aNeiKX+7cCi3nzmA6UvWc/K97zBp7qp0hyUi9VDCw4rd/SvgpiTGIg2EmXHe4d05vGc7/t/YGVzx1Mf88LCu3HJqP5o30Uh1EQkk/NfAzJoDg4F9iGr5RI0ukwxxwD4tGHflcP74xgIefucLpn25npFDujB2+nJWbCilS+s8zQAgksESfZL/eIIFwtrFKHZA41YzVJOcLG48pQ/HHNiBnzz9EX98Y+HOMs0AIJLZEh1F9meCFSL3dfesqJeSi3Dk/u1i3h7TDAAimSvRW2Q9gNPdfUUSY5EGbtVGzQAgIt9KtAUzFeidzECk4esSPjcTzQye+3CZnpsRyTCJJpiHgLvN7DIzO9zMDo58JTNAaThizQDQNCeL7u2ac+O42Zz54HvMWr4hTdGJSKoleovsX+HPR2KUqZNfgG878u+aNH+XUWRnDO7Ci58Wc/vEzznj/qn88LBuXH9ib9rkN0lzxCKSTIkmmJ5JjUIajaoZAKKdOWRfjuvbkXvfWMgT7y/h1dkrueGkPvzg0K5kZ1nqAxWRpEsowbj70mQHIo1fQbNcfnVaP849dF9+NWEuN4+fzXPTl3HbGQNYsnbLbi0fDW0WadjiJhgzOwt42d0rwvdx6UFLqY0+nQoYO+oIXpq5gt9OnMfI+6eSnWVUhoMA9PyMSONQXQvmX0AnYA3f9sHEoj4YqTUz44zBhRzbZx+OvOMtSsordymven5GCUak4YqbYNw9K9Z7kbrUslkuW6KSSxU9PyPSsClxSNrFe34mK8t4fvpXbK/ckeKIRKQu1Gayy32B7xB7sss/1nFckkGuP6k3N42bTWnFty2ZJtlZdCxoyg0vzOKBokX87PhenD6oUCPORBqQRCe7PA94HNgOfE3Q71LFASUY2WPVPT/z5rw1/PGNBVwzdib3v/0F1xx/IKcM6ESWEo1IvZdoC+Y24B7gFnePfcNcZC/Ee37mhH4dOa7PPrw+dxV/emMB//PsJ/Tp1JJrTziQE/p1ZMKnKzS8WaSeSjTBdAQeVXKRdMjKMr53UGdO6t+JV2at4N43FzLqqY/p2iaP1ZvK2Rb20Wh4s0j9kmgn/6vA4ckMRKQm2VnB0OY3rvkOd31/ICs2lu1MLlW0PIBI/ZFoC+YN4E4z6w/MBioiC/WgpaRSTnYW5xzSlRv+NStmuYY3i9QPiSaYh8OfN8co04OWkhZdWudRHCOZmMHdk+Zz4ZHd2aegWRoiExFI8BZZjFUs92hFSzNra2bjzWyLmS01sx/FqWdmdqeZrQtfd5qZRZQPNrOPzWxr+HNwjHM0MbN5ZrY80fikYYm1PECTnCz6dyng/qJFDL9zMtc+/ylzV2xMU4Qima3GFoyZ5QJTgAvdfW9vbt8PbCMYNDAYmGhmM919blS9UcBIYBBBC+kNYDHwkJk1ASYA9wIPAFcAE8ysl7tvizjH9QRDqlvuZcxST8Ub3jxySCFL123h71OX8M+PvmLcJ8UcsV9bLjtqP47tsw9ZWcaLM4o1+kwkyWpMMOFklz3Z9dmXWjOzfOBsYIC7lwBTzOwl4ALgxqjqFwH3uPvy8Nh7gMsJFj4bEcZ9r7s7cJ+ZXQccC7we1u8JnA9cC/xtb+KW+i3e8Obu7fIZfXp/rjnhQMZOX8aYqUu47MmP6Nk+n4O7tWbi7JWUVWj0mUgyWfA3uoZKZncBuPv1e/xBZkOAqe7ePGLfdcAx7n5aVN2NwInuPi3cPgR4291bmtk1YdkpEfVfCcvvidh+DPgGeNrd940T0yiC1hIdOnQY+vzzz+/p12u0SkpKaNGiRbrD2GuVO5yPVlcyaUkFX26MPfVMu2bGPSOaxyyL1liuS13SNYmtsV+X7373ux+7+yGxyhLt5M8HzjOzE4CPgS2Rhe5+dQLnaAFsitq3kdi3sFqEZZH1WoT9MNFlu5zHzM4Est19vJmNqC4gd3+EcJXO3r17+4gR1VbPSEVFRTSW63IccIM7+930aszm+PoyT/i7NqbrUld0TWLL5OuSaILpC3wSvt8vqizRW2clQEHUvgJgcwJ1C4ASd3czi3ue8DbcH4DvJRiTZBgzizv6DIPRL83lB4d2pW/n6F8xEamtRFe0/G4dfNYCICfsjF8Y7hsERHfwE+4bBHwYo95c4OdmZv7t/b2BBAMIegE9gHfDQWdNgFZmtgo4wt2X1MH3kAYu3uSa/bq05Nlpyxjz3hIG7tuKcw/pyumDu1DQLDeN0Yo0XAnPpkb0gg8AABVkSURBVLy33H2LmY0DbjOzywhGkZ0BDItR/UngWjN7laCF9HPgL2FZEVAJXG1mDxF0/gNMBnYAXSPOMwz4K3AwwYgykWpHn32zZRsvflrM2Olf8csX5/DbiZ/xvQGdOffQrhzes+3Ouc+KN5RS+MFkjT4TqUZtpuv/LvBDoBtBy2Andz82wdP8lGBW5jXAOuBKd59rZkcDr7l7VU/YwwS34maH24+G+3D3bWY2Mtz3e2AeMDJiiPKqiJjXAzvcfec+EYg/+qxNfhN+PLwnFw/rwezijYyd/hUvfbqCcTOKaZ+fy4bS7WzX0s4iCUnoQUszuxh4jaAjfQRBa6ANQcvgs0Q/zN3Xu/tId893927u/my4/92I5IIHbnD3tuHrhojbYbj7DHcf6u557n6wu8+I83lF8UaQiVTHzBi4b2tuP/MgPvy/47nnnEFsKqvcmVyqaO4zkfgSnezyOuAqd/8hwTxkN7n7EOBpgg55kUYrr0k2Zw/dl4o4K2sWbyhl/IzlbC6riFkukqkSvUW2H/Bm+L6cYKgwBP0bRez+oKRIoxNv9Fm2wTVjZ9IkJ4tje+/DqYM6c1yfjuQ10RR9ktkSTTDr+PZ5lWJgADALaAfEXlBdpJGJNfosLzeb340cQLf2zXl55komzl7J63NX0bxJNsf17cipAztzzIEdeH3OKk1NIxkn0QTzLnAiQaf78wTTs5xA8OzaG0mKTaReiRx9VryhlMKoRDG0e1tuObUfHy5ezyuzVvDanFW8PHMFTbON7Tug0jU4QDJLognmKqBq3vM7gO3AcIJk89skxCVSL1WNPov3dHZ2lnHk/u04cv923Hp6f977Yh0/efpjyit3XQy2tKKSO1//XAlGGrVEH7RcH/F+B3Bn0iISaSRysrP4zoEdKN0We6XxlRvLOPeh9zmu7z4c368j+3dovPNVSWaqzXMwHQlmPt4fuMXd15rZcGCFuy9OVoAiDV28wQEtm+WwZdt27njtc+547XN6ts/nuD5BsjmkextysrO0rIA0aAklGDMbCrxFsCZLf+AuYC1wAnAgEHPhMBGJPzjgN2cMYOSQQlZsKOWtz9fw5merefL9pTw6ZTGt8nI5oEM+s4o3UlGpvhtpmBJtwdwN/Nndf21mkZNTTgJ+XPdhiTQe1U1NA0EL54IjunPBEd0pKd/OlIVf88Znaxg/YzlRz3VSWlHJHyap70YahkQTzFDg0hj7VxKsTiki1Yg3NU20Fk1zOHlAZ04e0Jlxn8Re7XvFhjJ+8tTHHNWrPUf3ak/3dvl1Ha5InUg0wZQSTA0TrQ/BvGIiUsfi9d00b5LN7OKNvD43mGKvW9vmQbI5oD3D9m9Pq+a56ruReiHRBDMB+LWZnRNuu5n1IBhN9kIS4hLJeHEf7DzzIM4Y3IUv125hysK1vLtwLS99uoJnpy0jy6CwTR4rN5RpUk5Ju0QTzHXAqwSTXDYHphDcGnsP+GVyQhPJbDX13ezfoQX7d2jBRcN6UFG5g0+/2sC7C9fyYNGimJNy/nbiZ5zYvyPNm6RslQ7JcIk+B7MJOMrMjiWYQTkL+MTd36z+SBHZG4n23eRmZ3Foj7Yc2qMtf3lrYcw6a0u2MXD0vxm4bysO69mOw3u2ZWiPNrssqKZba1KXavVPGXefTLCwFwBm1h24y93PrevARGTPxOu7aZffhHMP7cqHi9fz2JQveeg/X5Bl0K9LAYf1aIfj/OPDZZRVBLNG69aa7K29bSu3Bs6ui0BEpG7E67u55dR+OxNF6bZKZiz7hmmL1/Ph4vU8M20p5dt3X45Aw6Jlb+hmrEgjU1PfDQRr3Aw7oD3DDmgPQPn2Svr88nU8xvlWbCjjgsemMaRbG4Z0a82Qrq1p3fzbRW2rbqtpGWmJpgQj0ggl2ndTpWlOdrXDoteWbOOvkxfufPBzv/b5DOnWBjN4eeaKna0f3VaTSEowIgJUPyx65JBCtpRvZ9byjcz46hs+WbqB/yxYw9qSbbudp7Sikt+9Oo/TB3UhK8tS+RWknqk2wZjZSzUcX1CHsYhIGtV0ay2/ac7OpQgA3J39bno15m21NZvLGXTrv+lfWMBBha0YUNiKgwpb0aNd/s6koxFrjV9NLZh1CZRrJmWRRqI2t9bMLO5ttdZ5uZw6qDOzizfxxPtL2RbeQmvZNIf+hQXk5WYxZdE6TeTZyFWbYNxdE1mKSFzxbquNPr3/zkRRUbmDhatLmF28gdnFG5ldvIkPvly/27lKKyoZ/dJcurVrTp9OLeM+EKqWT8OhPhgR2WM1LSMNwUOg/boU0K9LAT84NNjX88aJMW+tbSit4KwH3sMMurdtTt/OBfTpVEDfzi3p27mAj5as5+bxc3YmNLV86jclGBHZKzUtIx1LvFtrHQua8pszBvD5qs3MW7mJz1dt5vW5q/AwGxnslphKKyq5a9J8JZh6SAlGRFIu3q21m07py4n9O3Fi/047928p38781Zv5fOVmbh4/O+b5ijeUcv6j0+jVsQW99mnJgR1b0KtjS1rlaRqcdFKCEZGUS+Rh0Cr5TXM4uFsbDu7WhvvfXhSz5ZOXm82msgqe+/CrXZLWPi2bcmDHlmQZvP+lBhWkmhKMiKRFbR8GhfgtnzvOCp7V2bHDKd5QysI1m1m4uoQFq0tYuGYzs5dvjHlr7ebxs/l6czn7dchnvw4t6Nomj5zsrF3qqeWz55RgRKTBqKnlk5VldG3bnK5tm3Nsn28X2+1548SY59u6rZLbX523czs32+jeLp/92gcJZ2PpNsZ9UqyZCvaQEoyINCh70vKJN6igsHUeE68+ii++3sKXX5fs/Pnl2i28PX/NzltqkUorKrllwhzMoEe7fHq0y6dV89zd6mmONiUYEckA8W6tXX9Sb1o3b8LQ7k0Y2n3XVeG3V+6g1/+9FnM49eay7fzsuU93brdunkv3dvn0bNec7u3yWbuljH99pJaPEoyINHq1GVRQJSc7K27Lp0urZoy55DCWrN3C0nVbWbxuC0vXbWH6km+YMHPFzmHVkUorKvnli3PYXFax8zbevm3yaJqTvUu9xtTnowQjIhmhLgcV3HByHw7s2JIDO7bc7Zjqlj4oKd/OLRPm7tw2g04FzejaJkg4W8oreOvzNXs02q0+JiYlGBGROPak5VPd0gddWjdj/E+Hs2z9Vr5av5Vl4Wv5+lKmLlrLqk1lux1TWlHJL16YxdRFaylsk0dh6zwK2+Sxb+vmdGrVjCY5Wbw4o3iXRFhfbsmlNMGYWVvgMeBEYC1wk7s/G6OeAb8HLgt3PQrc6B40PM1scHievsA84FJ3/zQsux64COgefsYD7n5XMr+XiDReddryOakPHQua0bGgGYf2aLvbcfGm0CnfvoN3Fn7Nms3lu9x+M4OOLZuxfss2tlXuuiJpaUUld77+OWcM7kLwJzW2ZLZ8Ut2CuR/YBnQEBgMTzWymu8+NqjcKGAkMIpgZ4g2CWZsfMrMmwATgXuAB4Apggpn1cvdtBLNJXAjMAvYH/m1mX7n7c0n/diIiJDZHWyzVjXabeuOxlG+vZNXGMpZ/U0rxN6Us3xD8fOGT5THPt3JjGf1/PYnOrZrRpXUenVs1o3OrPLq0Dn7OW7mRP725kLKK2g9GqEpMTTodMDRenZQlGDPLB84GBrh7CTAlXG/mAuDGqOoXAfe4+/Lw2HuAy4GHgBFh3PeGLZr7zOw64FjgdXf/Q8R55pvZBGA4oAQjIimzJ3O0VTfaDYLbb93b5dO9Xf4ux33w5bqYialVXg5nH9yVlRtLWbGxjPmrvubrkvKYgxCqlFZUcsuLc9iybTudwtZWp1bNaNu8yS5r+UTHGYt5dZ9Uh8xsCDDV3ZtH7LsOOMbdT4uquxE40d2nhduHAG+7e0szuyYsOyWi/ith+T1R5zHgE+Bhd38oRkyjCFpLdOjQYejzzz9fR9+28SgpKaFFixbpDqPe0XXZna5JbLW9Lu+tqOCFBRWsK3PaNTPOPjCXYV12f84m+pgxc7axLeIuWZMsuHhAk92O3b7D+abMWV/m3PHh7n0+8WQbtG5qtGlmLNu0Y+dnrXzi/1G+cmHMe3CpvEXWAtgUtW8jsPswjKDuxqh6LcKEEV1W3XlGA1nA32MF5O6PAI8A9O7d2xP9V0Ymqc2/vjKJrsvudE1iq+11GQHcXMvPGAH024O+lCcXTI47GOGFK4examMZqzeVsWpjGas2le98v2hDTWtRBlKZYErYfYnlAmBzAnULgBJ3dzNL6DxmdhVBX8zR7l6+N4GLiNR3dT0YoXOrPDq3yot53PDfx05M0bJqrFF3FgA5ZtYrYt8gILqDn3DfoDj15gIDbddhEQMjz2NmlxD06xxX1Y8jIiK7GjmkkDvOOojC1nkYwWCCqolDq3P9Sb3Jy82utg6ksAXj7lvMbBxwm5ldRjCK7AxgWIzqTwLXmtmrBKPIfg78JSwrAiqBq83sIYLOf4DJAGZ2HvA74Lvu/mWSvo6ISKOwJy2fyFFyK6upl8oWDMBPgTxgDfAP4Ep3n2tmR4e3vqo8DLwMzAbmABPDfYRDkUcS3P7aAFwCjAz3A/wWaAdMN7OS8LVbB7+IiOy5kUMKmXrjsWxbtejjeHVS+hyMu68nSA7R+98l6Lyv2nbghvAV6zwzgJhjr929Z50EKyIieyXVLRgREckQSjAiIpIUSjAiIpIUSjAiIpIUSjAiIpIUSjAiIpIUSjAiIpIUSjAiIpIUSjAiIpIUSjAiIpIUSjAiIpIUSjAiIpIUSjAiIpIUSjAiIpIUSjAiIpIUSjAiIpIUSjAiIpIUSjAiIpIUSjAiIpIUSjAiIpIUSjAiIpIUSjAiIpIUSjAiIpIUSjAiIpIUSjAiIpIUSjAiIpIUSjAiIpIUSjAiIpIUSjAiIpIUSjAiIpIUSjAiIpIUSjAiIpIUSjAiIpIUSjAiIpIUKU0wZtbWzMab2RYzW2pmP4pTz8zsTjNbF77uNDOLKB9sZh+b2dbw5+BEjxURkdRIdQvmfmAb0BE4D3jQzPrHqDcKGAkMAgYCpwFXAJhZE2AC8DTQBngCmBDur/ZYERFJnZQlGDPLB84GbnH3EnefArwEXBCj+kXAPe6+3N2LgXuAi8OyEUAOcK+7l7v7fYABxyZwrIiIpEhOCj/rQGC7uy+I2DcTOCZG3f5hWWS9/hFls9zdI8pnhftfr+HYXZjZKIIWD0C5mc1J7KtklPbA2nQHUQ/puuxO1yS2xn5duscrSGWCaQFsitq3EWgZp+7GqHotwr6U6LLo88Q9Niop4e6PAI8AmNlH7n5I4l8nM+i6xKbrsjtdk9gy+bqksg+mBCiI2lcAbE6gbgFQEiaIms5T3bEiIpIiqUwwC4AcM+sVsW8QMDdG3blhWax6c4GBUSPDBkaVxztWRERSJGUJxt23AOOA28ws38yGA2cAT8Wo/iRwrZkVmlkX4OfAmLCsCKgErjazpmZ2Vbh/cgLHVueR2n+rjKDrEpuuy+50TWLL2OtiqbxzZGZtgceBE4B1wI3u/qyZHQ285u4twnoG3AlcFh76KPCLqttcZjYk3NcPmAdc6u4zEjlWRERSI6UJRkREMoemihERkaRQghERkaTI+AST6PxomcbMisyszMxKwtf8dMeUamZ2lZl9ZGblZjYmquw4M/s8nA/vbTOL+7BZYxPvuphZDzPziN+ZEjO7JY2hplQ46Oix8O/IZjP71MxOiSjPuN+ZjE8wJD4/Wia6yt1bhK/e6Q4mDVYAvyUYmLKTmbUnGBF5C9AW+AgYm/Lo0ifmdYnQOuL35jcpjCvdcoCvCGYnaQX8Eng+TLwZ+TuTyif5652I+dEGuHsJMMXMquZHuzGtwUnaufs4ADM7BNg3ougsYK67/zMsHw2sNbM+7v55ygNNsWquS0YLH8UYHbHrFTNbDAwF2pGBvzOZ3oKJNz+aWjCBO8xsrZlNNbMR6Q6mHtllvrvwD8sX6PemylIzW25mfw//5Z6RzKwjwd+YuWTo70ymJ5jazI+WaX4B7AcUEjwo9rKZ7Z/ekOqNmubDy1RrgUMJJj8cSnA9nklrRGliZrkE3/2JsIWSkb8zmZ5gajM/WkZx92nuvjlcEuEJYCrwvXTHVU/o9yaGcBmOj9x9u7uvBq4CTjSzRv1HNJqZZRHMULKN4BpAhv7OZHqCqc38aJnOCdbdkaj57sK+vP3R7020qqe4M+bvTDiTyGMEg4bOdveKsCgjf2cy5j98LLWcHy1jmFlrMzvJzJqZWY6ZnQd8h2C9nYwRfvdmQDaQXXU9gPHAADM7Oyz/FcEaRY22szZSvOtiZoebWW8zyzKzdsB9QJG7R98aasweBPoCp7l7acT+zPydcfeMfhEMGXwR2AIsA36U7pjS/QI6ANMJmu8bgA+AE9IdVxquw2iCf4VHvkaHZccDnwOlBBOw9kh3vOm+LsAPgcXh/0srCSae7ZTueFN4XbqH16KM4JZY1eu8TP2d0VxkIiKSFBl9i0xERJJHCUZERJJCCUZERJJCCUZERJJCCUZERJJCCUZERJJCCUakkQrXZvl+uuOQzKUEI5IEZjYm/AMf/fog3bGJpEpGrwcjkmRvEqwtFGlbOgIRSQe1YESSp9zdV0W91sPO21dXmdnEcAndpWZ2fuTBZnaQmb1pZqVmtj5sFbWKqnORmc0Oly9ebWZPRMXQ1sz+GS4J/mX0Z4gkkxKMSPrcCrwEDCZYc+fJcJXIqtl2JxHMZXUYcCYwjIhlis3sCuBh4O/AQILlFOZEfcavgAkEM/mOBR43s27J+0oi39JcZCJJYGZjgPMJJj6MdL+7/8LMHHjU3S+POOZNYJW7n29mlwN3A/u6++awfATwNtDL3ReZ2XLgaXePubx3+Bm/d/ebwu0cggX2Rrn703X4dUViUh+MSPK8A4yK2rch4v37UWXvA/8Vvu9LMJ175IJU7wE7gH5mtolgtdG3aohhVtUbd99uZl8D+yQWvsjeUYIRSZ6t7r4oCeetzW2HiqhtR7fGJUX0iyaSPkfE2J4Xvp8HHBS13PAwgv9n57n7GqAYOC7pUYrsIbVgRJKnqZl1itpX6e5fh+/PMrPpBItPfZ8gWRwelj1DMAjgSTP7FdCGoEN/XESr6HbgT2a2GpgINAeOc/d7kvWFRGpDCUYkeY4nWNkxUjGwb/h+NHA2wdLCXwM/dvfpAO6+1cxOAu4FPiQYLDAB+FnVidz9QTPbBvwcuBNYD7yarC8jUlsaRSaSBuEIr3Pc/V/pjkUkWdQHIyIiSaEEIyIiSaFbZCIikhRqwYiISFIowYiISFIowYiISFIowYiISFIowYiISFL8f0AK1QjupA5nAAAAAElFTkSuQmCC\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], - "source": [ - "learning_rate = 0.01\n", - "decay = 1e-4\n", - "batch_size = 32\n", - "n_steps_per_epoch = len(X_train) // batch_size\n", - "epochs = np.arange(n_epochs)\n", - "lrs = learning_rate / (1 + decay * epochs * n_steps_per_epoch)\n", - "\n", - "plt.plot(epochs, lrs, \"o-\")\n", - "plt.axis([0, n_epochs - 1, 0, 0.01])\n", - "plt.xlabel(\"Epoch\")\n", - "plt.ylabel(\"Learning Rate\")\n", - "plt.title(\"Power Scheduling\", fontsize=14)\n", - "plt.grid(True)\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 지수 기반 스케줄링" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "```lr = lr0 * 0.1**(epoch / s)```" - ] - }, - { - "cell_type": "code", - "execution_count": 78, - "metadata": {}, - "outputs": [], - "source": [ - "def exponential_decay_fn(epoch):\n", - " return 0.01 * 0.1**(epoch / 20)" - ] - }, - { - "cell_type": "code", - "execution_count": 79, - "metadata": {}, - "outputs": [], - "source": [ - "def exponential_decay(lr0, s):\n", - " def exponential_decay_fn(epoch):\n", - " return lr0 * 0.1**(epoch / s)\n", - " return exponential_decay_fn\n", - "\n", - "exponential_decay_fn = exponential_decay(lr0=0.01, s=20)" - ] - }, - { - "cell_type": "code", - "execution_count": 80, - "metadata": {}, - "outputs": [], - "source": [ - "model = keras.models.Sequential([\n", - " keras.layers.Flatten(input_shape=[28, 28]),\n", - " keras.layers.Dense(300, activation=\"selu\", kernel_initializer=\"lecun_normal\"),\n", - " keras.layers.Dense(100, activation=\"selu\", kernel_initializer=\"lecun_normal\"),\n", - " keras.layers.Dense(10, activation=\"softmax\")\n", - "])\n", - "model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=\"nadam\", metrics=[\"accuracy\"])\n", - "n_epochs = 25" - ] - }, - { - "cell_type": "code", - "execution_count": 81, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch 1/25\n", - "1719/1719 [==============================] - 8s 4ms/step - loss: 0.8717 - accuracy: 0.7511 - val_loss: 0.9296 - val_accuracy: 0.7502\n", - "Epoch 2/25\n", - "1719/1719 [==============================] - 7s 4ms/step - loss: 0.7778 - accuracy: 0.7756 - val_loss: 0.6632 - val_accuracy: 0.8124\n", - "Epoch 3/25\n", - "1719/1719 [==============================] - 8s 4ms/step - loss: 0.6531 - accuracy: 0.8051 - val_loss: 0.7064 - val_accuracy: 0.7834\n", - "Epoch 4/25\n", - "1719/1719 [==============================] - 7s 4ms/step - loss: 0.5903 - accuracy: 0.8228 - val_loss: 0.5971 - val_accuracy: 0.8258\n", - "Epoch 5/25\n", - "1719/1719 [==============================] - 7s 4ms/step - loss: 0.5431 - accuracy: 0.8379 - val_loss: 0.5389 - val_accuracy: 0.8488\n", - "Epoch 6/25\n", - "1719/1719 [==============================] - 7s 4ms/step - loss: 0.5012 - accuracy: 0.8489 - val_loss: 0.5297 - val_accuracy: 0.8562\n", - "Epoch 7/25\n", - "1719/1719 [==============================] - 7s 4ms/step - loss: 0.4695 - accuracy: 0.8578 - val_loss: 0.5339 - val_accuracy: 0.8446\n", - "Epoch 8/25\n", - "1719/1719 [==============================] - 7s 4ms/step - loss: 0.4328 - accuracy: 0.8667 - val_loss: 0.7335 - val_accuracy: 0.8280\n", - "Epoch 9/25\n", - "1719/1719 [==============================] - 7s 4ms/step - loss: 0.4036 - accuracy: 0.8750 - val_loss: 0.5740 - val_accuracy: 0.8618\n", - "Epoch 10/25\n", - "1719/1719 [==============================] - 7s 4ms/step - loss: 0.3857 - accuracy: 0.8807 - val_loss: 0.4759 - val_accuracy: 0.8672\n", - "Epoch 11/25\n", - "1719/1719 [==============================] - 7s 4ms/step - loss: 0.3601 - accuracy: 0.8864 - val_loss: 0.4779 - val_accuracy: 0.8630\n", - "Epoch 12/25\n", - "1719/1719 [==============================] - 7s 4ms/step - loss: 0.3322 - accuracy: 0.8924 - val_loss: 0.4897 - val_accuracy: 0.8636\n", - "Epoch 13/25\n", - "1719/1719 [==============================] - 7s 4ms/step - loss: 0.3204 - accuracy: 0.8987 - val_loss: 0.4892 - val_accuracy: 0.8690\n", - "Epoch 14/25\n", - "1719/1719 [==============================] - 7s 4ms/step - loss: 0.2968 - accuracy: 0.9039 - val_loss: 0.4638 - val_accuracy: 0.8734\n", - "Epoch 15/25\n", - "1719/1719 [==============================] - 7s 4ms/step - loss: 0.2779 - accuracy: 0.9104 - val_loss: 0.5087 - val_accuracy: 0.8758\n", - "Epoch 16/25\n", - "1719/1719 [==============================] - 7s 4ms/step - loss: 0.2632 - accuracy: 0.9150 - val_loss: 0.4718 - val_accuracy: 0.8770\n", - "Epoch 17/25\n", - "1719/1719 [==============================] - 7s 4ms/step - loss: 0.2479 - accuracy: 0.9191 - val_loss: 0.5167 - val_accuracy: 0.8774\n", - "Epoch 18/25\n", - "1719/1719 [==============================] - 7s 4ms/step - loss: 0.2369 - accuracy: 0.9243 - val_loss: 0.4961 - val_accuracy: 0.8786\n", - "Epoch 19/25\n", - "1719/1719 [==============================] - 7s 4ms/step - loss: 0.2202 - accuracy: 0.9292 - val_loss: 0.5280 - val_accuracy: 0.8834\n", - "Epoch 20/25\n", - "1719/1719 [==============================] - 7s 4ms/step - loss: 0.2113 - accuracy: 0.9312 - val_loss: 0.5491 - val_accuracy: 0.8758\n", - "Epoch 21/25\n", - "1719/1719 [==============================] - 7s 4ms/step - loss: 0.1985 - accuracy: 0.9363 - val_loss: 0.5469 - val_accuracy: 0.8790\n", - "Epoch 22/25\n", - "1719/1719 [==============================] - 7s 4ms/step - loss: 0.1878 - accuracy: 0.9393 - val_loss: 0.5512 - val_accuracy: 0.8822\n", - "Epoch 23/25\n", - "1719/1719 [==============================] - 7s 4ms/step - loss: 0.1781 - accuracy: 0.9425 - val_loss: 0.5817 - val_accuracy: 0.8848\n", - "Epoch 24/25\n", - "1719/1719 [==============================] - 8s 4ms/step - loss: 0.1709 - accuracy: 0.9450 - val_loss: 0.5742 - val_accuracy: 0.8818\n", - "Epoch 25/25\n", - "1719/1719 [==============================] - 7s 4ms/step - loss: 0.1638 - accuracy: 0.9484 - val_loss: 0.6042 - val_accuracy: 0.8842\n" - ] - } - ], - "source": [ - "lr_scheduler = keras.callbacks.LearningRateScheduler(exponential_decay_fn)\n", - "history = model.fit(X_train_scaled, y_train, epochs=n_epochs,\n", - " validation_data=(X_valid_scaled, y_valid),\n", - " callbacks=[lr_scheduler])" - ] - }, - { - "cell_type": "code", - "execution_count": 82, - "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], - "source": [ - "plt.plot(history.epoch, history.history[\"lr\"], \"o-\")\n", - "plt.axis([0, n_epochs - 1, 0, 0.011])\n", - "plt.xlabel(\"Epoch\")\n", - "plt.ylabel(\"Learning Rate\")\n", - "plt.title(\"Exponential Scheduling\", fontsize=14)\n", - "plt.grid(True)\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "이 스케줄 함수는 두 번째 매개변수로 현재 학습률을 받을 수 있습니다:" - ] - }, - { - "cell_type": "code", - "execution_count": 83, - "metadata": {}, - "outputs": [], - "source": [ - "def exponential_decay_fn(epoch, lr):\n", - " return lr * 0.1**(1 / 20)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "에포크가 아니라 반복마다 학습률을 업데이트하려면 사용자 정의 콜백 클래스를 작성해야 합니다:" - ] - }, - { - "cell_type": "code", - "execution_count": 84, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch 1/25\n", - "1719/1719 [==============================] - 9s 5ms/step - loss: 0.7848 - accuracy: 0.7711 - val_loss: 0.8494 - val_accuracy: 0.7580\n", - "Epoch 2/25\n", - "1719/1719 [==============================] - 9s 5ms/step - loss: 0.6253 - accuracy: 0.8057 - val_loss: 0.7549 - val_accuracy: 0.7640\n", - "Epoch 3/25\n", - "1719/1719 [==============================] - 9s 5ms/step - loss: 0.5786 - accuracy: 0.8196 - val_loss: 0.6209 - val_accuracy: 0.8128\n", - "Epoch 4/25\n", - "1719/1719 [==============================] - 9s 5ms/step - loss: 0.5178 - accuracy: 0.8388 - val_loss: 0.5581 - val_accuracy: 0.8470\n", - "Epoch 5/25\n", - "1719/1719 [==============================] - 9s 5ms/step - loss: 0.4988 - accuracy: 0.8480 - val_loss: 0.5315 - val_accuracy: 0.8428\n", - "Epoch 6/25\n", - "1719/1719 [==============================] - 9s 5ms/step - loss: 0.4408 - accuracy: 0.8641 - val_loss: 0.4676 - val_accuracy: 0.8598\n", - "Epoch 7/25\n", - "1719/1719 [==============================] - 9s 5ms/step - loss: 0.4063 - accuracy: 0.8689 - val_loss: 0.6225 - val_accuracy: 0.8402\n", - "Epoch 8/25\n", - "1719/1719 [==============================] - 9s 5ms/step - loss: 0.3650 - accuracy: 0.8791 - val_loss: 0.4607 - val_accuracy: 0.8638\n", - "Epoch 9/25\n", - "1719/1719 [==============================] - 9s 5ms/step - loss: 0.3442 - accuracy: 0.8854 - val_loss: 0.4588 - val_accuracy: 0.8570\n", - "Epoch 10/25\n", - "1719/1719 [==============================] - 9s 5ms/step - loss: 0.3196 - accuracy: 0.8927 - val_loss: 0.4427 - val_accuracy: 0.8814\n", - "Epoch 11/25\n", - "1719/1719 [==============================] - 9s 5ms/step - loss: 0.2950 - accuracy: 0.8996 - val_loss: 0.4303 - val_accuracy: 0.8810\n", - "Epoch 12/25\n", - "1719/1719 [==============================] - 9s 5ms/step - loss: 0.2699 - accuracy: 0.9077 - val_loss: 0.4377 - val_accuracy: 0.8674\n", - "Epoch 13/25\n", - "1719/1719 [==============================] - 9s 5ms/step - loss: 0.2528 - accuracy: 0.9119 - val_loss: 0.4323 - val_accuracy: 0.8862\n", - "Epoch 14/25\n", - "1719/1719 [==============================] - 9s 5ms/step - loss: 0.2340 - accuracy: 0.9187 - val_loss: 0.4424 - val_accuracy: 0.8794\n", - "Epoch 15/25\n", - "1719/1719 [==============================] - 9s 5ms/step - loss: 0.2159 - accuracy: 0.9236 - val_loss: 0.4204 - val_accuracy: 0.8878\n", - "Epoch 16/25\n", - "1719/1719 [==============================] - 9s 5ms/step - loss: 0.2008 - accuracy: 0.9302 - val_loss: 0.4568 - val_accuracy: 0.8928\n", - "Epoch 17/25\n", - "1719/1719 [==============================] - 9s 5ms/step - loss: 0.1866 - accuracy: 0.9343 - val_loss: 0.4430 - val_accuracy: 0.8920\n", - "Epoch 18/25\n", - "1719/1719 [==============================] - 9s 5ms/step - loss: 0.1738 - accuracy: 0.9395 - val_loss: 0.4850 - val_accuracy: 0.8928\n", - "Epoch 19/25\n", - "1719/1719 [==============================] - 9s 5ms/step - loss: 0.1589 - accuracy: 0.9454 - val_loss: 0.4853 - val_accuracy: 0.8948\n", - "Epoch 20/25\n", - "1719/1719 [==============================] - 9s 5ms/step - loss: 0.1488 - accuracy: 0.9493 - val_loss: 0.4679 - val_accuracy: 0.8890\n", - "Epoch 21/25\n", - "1719/1719 [==============================] - 9s 5ms/step - loss: 0.1392 - accuracy: 0.9526 - val_loss: 0.5147 - val_accuracy: 0.8906\n", - "Epoch 22/25\n", - "1719/1719 [==============================] - 9s 5ms/step - loss: 0.1298 - accuracy: 0.9567 - val_loss: 0.5229 - val_accuracy: 0.8906\n", - "Epoch 23/25\n", - "1719/1719 [==============================] - 9s 5ms/step - loss: 0.1223 - accuracy: 0.9599 - val_loss: 0.5352 - val_accuracy: 0.8884\n", - "Epoch 24/25\n", - "1719/1719 [==============================] - 9s 5ms/step - loss: 0.1117 - accuracy: 0.9631 - val_loss: 0.5800 - val_accuracy: 0.8884\n", - "Epoch 25/25\n", - "1719/1719 [==============================] - 9s 5ms/step - loss: 0.1068 - accuracy: 0.9648 - val_loss: 0.5695 - val_accuracy: 0.8898\n" - ] - } - ], - "source": [ - "K = keras.backend\n", - "\n", - "class ExponentialDecay(keras.callbacks.Callback):\n", - " def __init__(self, s=40000):\n", - " super().__init__()\n", - " self.s = s\n", - "\n", - " def on_batch_begin(self, batch, logs=None):\n", - " # 노트: 에포크마다 `batch` 매개변수가 재설정됩니다\n", - " lr = K.get_value(self.model.optimizer.lr)\n", - " K.set_value(self.model.optimizer.lr, lr * 0.1**(1 / s))\n", - "\n", - " def on_epoch_end(self, epoch, logs=None):\n", - " logs = logs or {}\n", - " logs['lr'] = K.get_value(self.model.optimizer.lr)\n", - "\n", - "model = keras.models.Sequential([\n", - " keras.layers.Flatten(input_shape=[28, 28]),\n", - " keras.layers.Dense(300, activation=\"selu\", kernel_initializer=\"lecun_normal\"),\n", - " keras.layers.Dense(100, activation=\"selu\", kernel_initializer=\"lecun_normal\"),\n", - " keras.layers.Dense(10, activation=\"softmax\")\n", - "])\n", - "lr0 = 0.01\n", - "optimizer = keras.optimizers.Nadam(lr=lr0)\n", - "model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=optimizer, metrics=[\"accuracy\"])\n", - "n_epochs = 25\n", - "\n", - "s = 20 * len(X_train) // 32 # 20 에포크 동안 스텝 횟수 (배치 크기 = 32)\n", - "exp_decay = ExponentialDecay(s)\n", - "history = model.fit(X_train_scaled, y_train, epochs=n_epochs,\n", - " validation_data=(X_valid_scaled, y_valid),\n", - " callbacks=[exp_decay])" - ] - }, - { - "cell_type": "code", - "execution_count": 85, - "metadata": {}, - "outputs": [], - "source": [ - "n_steps = n_epochs * len(X_train) // 32\n", - "steps = np.arange(n_steps)\n", - "lrs = lr0 * 0.1**(steps / s)" - ] - }, - { - "cell_type": "code", - "execution_count": 86, - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "data": { - "image/png": "\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], - "source": [ - "plt.plot(steps, lrs, \"-\", linewidth=2)\n", - "plt.axis([0, n_steps - 1, 0, lr0 * 1.1])\n", - "plt.xlabel(\"Batch\")\n", - "plt.ylabel(\"Learning Rate\")\n", - "plt.title(\"Exponential Scheduling (per batch)\", fontsize=14)\n", - "plt.grid(True)\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 기간별 고정 스케줄링" - ] - }, - { - "cell_type": "code", - "execution_count": 87, - "metadata": {}, - "outputs": [], - "source": [ - "def piecewise_constant_fn(epoch):\n", - " if epoch < 5:\n", - " return 0.01\n", - " elif epoch < 15:\n", - " return 0.005\n", - " else:\n", - " return 0.001" - ] - }, - { - "cell_type": "code", - "execution_count": 88, - "metadata": {}, - "outputs": [], - "source": [ - "def piecewise_constant(boundaries, values):\n", - " boundaries = np.array([0] + boundaries)\n", - " values = np.array(values)\n", - " def piecewise_constant_fn(epoch):\n", - " return values[np.argmax(boundaries > epoch) - 1]\n", - " return piecewise_constant_fn\n", - "\n", - "piecewise_constant_fn = piecewise_constant([5, 15], [0.01, 0.005, 0.001])" - ] - }, - { - "cell_type": "code", - "execution_count": 89, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch 1/25\n", - "1719/1719 [==============================] - 8s 5ms/step - loss: 0.8830 - accuracy: 0.7497 - val_loss: 1.0209 - val_accuracy: 0.7004\n", - "Epoch 2/25\n", - "1719/1719 [==============================] - 8s 4ms/step - loss: 0.9927 - accuracy: 0.6931 - val_loss: 0.8880 - val_accuracy: 0.7178\n", - "Epoch 3/25\n", - "1719/1719 [==============================] - 8s 4ms/step - loss: 0.8996 - accuracy: 0.7185 - val_loss: 1.0695 - val_accuracy: 0.6856\n", - "Epoch 4/25\n", - "1719/1719 [==============================] - 8s 4ms/step - loss: 0.9428 - accuracy: 0.7076 - val_loss: 0.9724 - val_accuracy: 0.7206\n", - "Epoch 5/25\n", - "1719/1719 [==============================] - 8s 4ms/step - loss: 0.9195 - accuracy: 0.7078 - val_loss: 1.2058 - val_accuracy: 0.7076\n", - "Epoch 6/25\n", - "1719/1719 [==============================] - 8s 4ms/step - loss: 0.6494 - accuracy: 0.7722 - val_loss: 0.6743 - val_accuracy: 0.7360\n", - "Epoch 7/25\n", - "1719/1719 [==============================] - 8s 4ms/step - loss: 0.6002 - accuracy: 0.7929 - val_loss: 0.7192 - val_accuracy: 0.7718\n", - "Epoch 8/25\n", - "1719/1719 [==============================] - 8s 4ms/step - loss: 0.5850 - accuracy: 0.7999 - val_loss: 0.6331 - val_accuracy: 0.7328\n", - "Epoch 9/25\n", - "1719/1719 [==============================] - 8s 4ms/step - loss: 0.5570 - accuracy: 0.8213 - val_loss: 0.6222 - val_accuracy: 0.7668\n", - "Epoch 10/25\n", - "1719/1719 [==============================] - 8s 4ms/step - loss: 0.5511 - accuracy: 0.8165 - val_loss: 0.6102 - val_accuracy: 0.8204\n", - "Epoch 11/25\n", - "1719/1719 [==============================] - 8s 4ms/step - loss: 0.5360 - accuracy: 0.8182 - val_loss: 0.6345 - val_accuracy: 0.8292\n", - "Epoch 12/25\n", - "1719/1719 [==============================] - 7s 4ms/step - loss: 0.5291 - accuracy: 0.8363 - val_loss: 0.6106 - val_accuracy: 0.8334\n", - "Epoch 13/25\n", - "1719/1719 [==============================] - 8s 4ms/step - loss: 0.5104 - accuracy: 0.8400 - val_loss: 0.6339 - val_accuracy: 0.8244\n", - "Epoch 14/25\n", - "1719/1719 [==============================] - 8s 4ms/step - loss: 0.4812 - accuracy: 0.8557 - val_loss: 0.6163 - val_accuracy: 0.7798\n", - "Epoch 15/25\n", - "1719/1719 [==============================] - 8s 4ms/step - loss: 0.4852 - accuracy: 0.8543 - val_loss: 0.8202 - val_accuracy: 0.8402\n", - "Epoch 16/25\n", - "1719/1719 [==============================] - 8s 4ms/step - loss: 0.3420 - accuracy: 0.8901 - val_loss: 0.5218 - val_accuracy: 0.8708\n", - "Epoch 17/25\n", - "1719/1719 [==============================] - 8s 4ms/step - loss: 0.3108 - accuracy: 0.9007 - val_loss: 0.5677 - val_accuracy: 0.8652\n", - "Epoch 18/25\n", - "1719/1719 [==============================] - 8s 4ms/step - loss: 0.2922 - accuracy: 0.9061 - val_loss: 0.6053 - val_accuracy: 0.8768\n", - "Epoch 19/25\n", - "1719/1719 [==============================] - 8s 4ms/step - loss: 0.2756 - accuracy: 0.9134 - val_loss: 0.5898 - val_accuracy: 0.8774\n", - "Epoch 20/25\n", - "1719/1719 [==============================] - 8s 4ms/step - loss: 0.2668 - accuracy: 0.9170 - val_loss: 0.5603 - val_accuracy: 0.8854\n", - "Epoch 21/25\n", - "1719/1719 [==============================] - 8s 4ms/step - loss: 0.2574 - accuracy: 0.9201 - val_loss: 0.5782 - val_accuracy: 0.8792\n", - "Epoch 22/25\n", - "1719/1719 [==============================] - 8s 4ms/step - loss: 0.2518 - accuracy: 0.9230 - val_loss: 0.5958 - val_accuracy: 0.8758\n", - "Epoch 23/25\n", - "1719/1719 [==============================] - 8s 4ms/step - loss: 0.2396 - accuracy: 0.9254 - val_loss: 0.6070 - val_accuracy: 0.8762\n", - "Epoch 24/25\n", - "1719/1719 [==============================] - 8s 4ms/step - loss: 0.2405 - accuracy: 0.9264 - val_loss: 0.6612 - val_accuracy: 0.8788\n", - "Epoch 25/25\n", - "1719/1719 [==============================] - 8s 4ms/step - loss: 0.2244 - accuracy: 0.9291 - val_loss: 0.6447 - val_accuracy: 0.8786\n" - ] - } - ], - "source": [ - "lr_scheduler = keras.callbacks.LearningRateScheduler(piecewise_constant_fn)\n", - "\n", - "model = keras.models.Sequential([\n", - " keras.layers.Flatten(input_shape=[28, 28]),\n", - " keras.layers.Dense(300, activation=\"selu\", kernel_initializer=\"lecun_normal\"),\n", - " keras.layers.Dense(100, activation=\"selu\", kernel_initializer=\"lecun_normal\"),\n", - " keras.layers.Dense(10, activation=\"softmax\")\n", - "])\n", - "model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=\"nadam\", metrics=[\"accuracy\"])\n", - "n_epochs = 25\n", - "history = model.fit(X_train_scaled, y_train, epochs=n_epochs,\n", - " validation_data=(X_valid_scaled, y_valid),\n", - " callbacks=[lr_scheduler])" - ] - }, - { - "cell_type": "code", - "execution_count": 90, - "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], - "source": [ - "plt.plot(history.epoch, [piecewise_constant_fn(epoch) for epoch in history.epoch], \"o-\")\n", - "plt.axis([0, n_epochs - 1, 0, 0.011])\n", - "plt.xlabel(\"Epoch\")\n", - "plt.ylabel(\"Learning Rate\")\n", - "plt.title(\"Piecewise Constant Scheduling\", fontsize=14)\n", - "plt.grid(True)\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 성능 기반 스케줄링" - ] - }, - { - "cell_type": "code", - "execution_count": 91, - "metadata": {}, - "outputs": [], - "source": [ - "tf.random.set_seed(42)\n", - "np.random.seed(42)" - ] - }, - { - "cell_type": "code", - "execution_count": 92, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch 1/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.5882 - accuracy: 0.8079 - val_loss: 0.4696 - val_accuracy: 0.8538\n", - "Epoch 2/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.4975 - accuracy: 0.8391 - val_loss: 0.5668 - val_accuracy: 0.8406\n", - "Epoch 3/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.5165 - accuracy: 0.8419 - val_loss: 0.5295 - val_accuracy: 0.8496\n", - "Epoch 4/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.5144 - accuracy: 0.8454 - val_loss: 0.5411 - val_accuracy: 0.8480\n", - "Epoch 5/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.5096 - accuracy: 0.8496 - val_loss: 0.4733 - val_accuracy: 0.8490\n", - "Epoch 6/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.5276 - accuracy: 0.8515 - val_loss: 0.7935 - val_accuracy: 0.8416\n", - "Epoch 7/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.3111 - accuracy: 0.8925 - val_loss: 0.4018 - val_accuracy: 0.8694\n", - "Epoch 8/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.2627 - accuracy: 0.9045 - val_loss: 0.4398 - val_accuracy: 0.8712\n", - "Epoch 9/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.2391 - accuracy: 0.9116 - val_loss: 0.3970 - val_accuracy: 0.8890\n", - "Epoch 10/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.2252 - accuracy: 0.9175 - val_loss: 0.4090 - val_accuracy: 0.8892\n", - "Epoch 11/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.2109 - accuracy: 0.9216 - val_loss: 0.4515 - val_accuracy: 0.8844\n", - "Epoch 12/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.2034 - accuracy: 0.9247 - val_loss: 0.4781 - val_accuracy: 0.8808\n", - "Epoch 13/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.1892 - accuracy: 0.9294 - val_loss: 0.4578 - val_accuracy: 0.8904\n", - "Epoch 14/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.1850 - accuracy: 0.9307 - val_loss: 0.4853 - val_accuracy: 0.8808\n", - "Epoch 15/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.1252 - accuracy: 0.9504 - val_loss: 0.4423 - val_accuracy: 0.8902\n", - "Epoch 16/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.1088 - accuracy: 0.9579 - val_loss: 0.4663 - val_accuracy: 0.8946\n", - "Epoch 17/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.0992 - accuracy: 0.9620 - val_loss: 0.4872 - val_accuracy: 0.8932\n", - "Epoch 18/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.0938 - accuracy: 0.9641 - val_loss: 0.5198 - val_accuracy: 0.8862\n", - "Epoch 19/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.0883 - accuracy: 0.9660 - val_loss: 0.5091 - val_accuracy: 0.8922\n", - "Epoch 20/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.0668 - accuracy: 0.9752 - val_loss: 0.5149 - val_accuracy: 0.8946\n", - "Epoch 21/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.0608 - accuracy: 0.9779 - val_loss: 0.5289 - val_accuracy: 0.8946\n", - "Epoch 22/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.0575 - accuracy: 0.9786 - val_loss: 0.5383 - val_accuracy: 0.8928\n", - "Epoch 23/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.0538 - accuracy: 0.9801 - val_loss: 0.5474 - val_accuracy: 0.8928\n", - "Epoch 24/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.0506 - accuracy: 0.9818 - val_loss: 0.5690 - val_accuracy: 0.8908\n", - "Epoch 25/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.0425 - accuracy: 0.9858 - val_loss: 0.5667 - val_accuracy: 0.8904\n" - ] - } - ], - "source": [ - "lr_scheduler = keras.callbacks.ReduceLROnPlateau(factor=0.5, patience=5)\n", - "\n", - "model = keras.models.Sequential([\n", - " keras.layers.Flatten(input_shape=[28, 28]),\n", - " keras.layers.Dense(300, activation=\"selu\", kernel_initializer=\"lecun_normal\"),\n", - " keras.layers.Dense(100, activation=\"selu\", kernel_initializer=\"lecun_normal\"),\n", - " keras.layers.Dense(10, activation=\"softmax\")\n", - "])\n", - "optimizer = keras.optimizers.SGD(lr=0.02, momentum=0.9)\n", - "model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=optimizer, metrics=[\"accuracy\"])\n", - "n_epochs = 25\n", - "history = model.fit(X_train_scaled, y_train, epochs=n_epochs,\n", - " validation_data=(X_valid_scaled, y_valid),\n", - " callbacks=[lr_scheduler])" - ] - }, - { - "cell_type": "code", - "execution_count": 93, - "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], - "source": [ - "plt.plot(history.epoch, history.history[\"lr\"], \"bo-\")\n", - "plt.xlabel(\"Epoch\")\n", - "plt.ylabel(\"Learning Rate\", color='b')\n", - "plt.tick_params('y', colors='b')\n", - "plt.gca().set_xlim(0, n_epochs - 1)\n", - "plt.grid(True)\n", - "\n", - "ax2 = plt.gca().twinx()\n", - "ax2.plot(history.epoch, history.history[\"val_loss\"], \"r^-\")\n", - "ax2.set_ylabel('Validation Loss', color='r')\n", - "ax2.tick_params('y', colors='r')\n", - "\n", - "plt.title(\"Reduce LR on Plateau\", fontsize=14)\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### tf.keras 스케줄러" - ] - }, - { - "cell_type": "code", - "execution_count": 94, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch 1/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.4894 - accuracy: 0.8277 - val_loss: 0.4096 - val_accuracy: 0.8592\n", - "Epoch 2/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.3820 - accuracy: 0.8650 - val_loss: 0.3742 - val_accuracy: 0.8700\n", - "Epoch 3/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.3487 - accuracy: 0.8767 - val_loss: 0.3736 - val_accuracy: 0.8686\n", - "Epoch 4/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.3265 - accuracy: 0.8838 - val_loss: 0.3496 - val_accuracy: 0.8798\n", - "Epoch 5/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.3105 - accuracy: 0.8899 - val_loss: 0.3434 - val_accuracy: 0.8800\n", - "Epoch 6/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.2959 - accuracy: 0.8950 - val_loss: 0.3415 - val_accuracy: 0.8808\n", - "Epoch 7/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.2855 - accuracy: 0.8987 - val_loss: 0.3354 - val_accuracy: 0.8818\n", - "Epoch 8/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.2761 - accuracy: 0.9016 - val_loss: 0.3366 - val_accuracy: 0.8810\n", - "Epoch 9/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.2678 - accuracy: 0.9053 - val_loss: 0.3265 - val_accuracy: 0.8852\n", - "Epoch 10/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.2608 - accuracy: 0.9069 - val_loss: 0.3240 - val_accuracy: 0.8848\n", - "Epoch 11/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.2551 - accuracy: 0.9088 - val_loss: 0.3251 - val_accuracy: 0.8868\n", - "Epoch 12/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.2497 - accuracy: 0.9126 - val_loss: 0.3302 - val_accuracy: 0.8810\n", - "Epoch 13/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.2449 - accuracy: 0.9136 - val_loss: 0.3218 - val_accuracy: 0.8872\n", - "Epoch 14/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.2415 - accuracy: 0.9147 - val_loss: 0.3222 - val_accuracy: 0.8860\n", - "Epoch 15/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.2375 - accuracy: 0.9167 - val_loss: 0.3208 - val_accuracy: 0.8876\n", - "Epoch 16/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.2343 - accuracy: 0.9179 - val_loss: 0.3185 - val_accuracy: 0.8882\n", - "Epoch 17/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.2317 - accuracy: 0.9186 - val_loss: 0.3198 - val_accuracy: 0.8890\n", - "Epoch 18/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.2291 - accuracy: 0.9199 - val_loss: 0.3169 - val_accuracy: 0.8904\n", - "Epoch 19/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.2269 - accuracy: 0.9206 - val_loss: 0.3197 - val_accuracy: 0.8888\n", - "Epoch 20/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.2250 - accuracy: 0.9220 - val_loss: 0.3169 - val_accuracy: 0.8902\n", - "Epoch 21/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.2229 - accuracy: 0.9224 - val_loss: 0.3180 - val_accuracy: 0.8904\n", - "Epoch 22/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.2216 - accuracy: 0.9225 - val_loss: 0.3163 - val_accuracy: 0.8912\n", - "Epoch 23/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.2201 - accuracy: 0.9233 - val_loss: 0.3171 - val_accuracy: 0.8906\n", - "Epoch 24/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.2188 - accuracy: 0.9243 - val_loss: 0.3166 - val_accuracy: 0.8908\n", - "Epoch 25/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.2179 - accuracy: 0.9243 - val_loss: 0.3165 - val_accuracy: 0.8904\n" - ] - } - ], - "source": [ - "model = keras.models.Sequential([\n", - " keras.layers.Flatten(input_shape=[28, 28]),\n", - " keras.layers.Dense(300, activation=\"selu\", kernel_initializer=\"lecun_normal\"),\n", - " keras.layers.Dense(100, activation=\"selu\", kernel_initializer=\"lecun_normal\"),\n", - " keras.layers.Dense(10, activation=\"softmax\")\n", - "])\n", - "s = 20 * len(X_train) // 32 # number of steps in 20 epochs (batch size = 32)\n", - "learning_rate = keras.optimizers.schedules.ExponentialDecay(0.01, s, 0.1)\n", - "optimizer = keras.optimizers.SGD(learning_rate)\n", - "model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=optimizer, metrics=[\"accuracy\"])\n", - "n_epochs = 25\n", - "history = model.fit(X_train_scaled, y_train, epochs=n_epochs,\n", - " validation_data=(X_valid_scaled, y_valid))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "구간별 고정 스케줄링은 다음을 사용하세요:" - ] - }, - { - "cell_type": "code", - "execution_count": 95, - "metadata": {}, - "outputs": [], - "source": [ - "learning_rate = keras.optimizers.schedules.PiecewiseConstantDecay(\n", - " boundaries=[5. * n_steps_per_epoch, 15. * n_steps_per_epoch],\n", - " values=[0.01, 0.005, 0.001])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 1사이클 스케줄링" - ] - }, - { - "cell_type": "code", - "execution_count": 96, - "metadata": {}, - "outputs": [], - "source": [ - "K = keras.backend\n", - "\n", - "class ExponentialLearningRate(keras.callbacks.Callback):\n", - " def __init__(self, factor):\n", - " self.factor = factor\n", - " self.rates = []\n", - " self.losses = []\n", - " def on_batch_end(self, batch, logs):\n", - " self.rates.append(K.get_value(self.model.optimizer.lr))\n", - " self.losses.append(logs[\"loss\"])\n", - " K.set_value(self.model.optimizer.lr, self.model.optimizer.lr * self.factor)\n", - "\n", - "def find_learning_rate(model, X, y, epochs=1, batch_size=32, min_rate=10**-5, max_rate=10):\n", - " init_weights = model.get_weights()\n", - " iterations = len(X) // batch_size * epochs\n", - " factor = np.exp(np.log(max_rate / min_rate) / iterations)\n", - " init_lr = K.get_value(model.optimizer.lr)\n", - " K.set_value(model.optimizer.lr, min_rate)\n", - " exp_lr = ExponentialLearningRate(factor)\n", - " history = model.fit(X, y, epochs=epochs, batch_size=batch_size,\n", - " callbacks=[exp_lr])\n", - " K.set_value(model.optimizer.lr, init_lr)\n", - " model.set_weights(init_weights)\n", - " return exp_lr.rates, exp_lr.losses\n", - "\n", - "def plot_lr_vs_loss(rates, losses):\n", - " plt.plot(rates, losses)\n", - " plt.gca().set_xscale('log')\n", - " plt.hlines(min(losses), min(rates), max(rates))\n", - " plt.axis([min(rates), max(rates), min(losses), (losses[0] + min(losses)) / 2])\n", - " plt.xlabel(\"Learning rate\")\n", - " plt.ylabel(\"Loss\")" - ] - }, - { - "cell_type": "code", - "execution_count": 97, - "metadata": {}, - "outputs": [], - "source": [ - "tf.random.set_seed(42)\n", - "np.random.seed(42)\n", - "\n", - "model = keras.models.Sequential([\n", - " keras.layers.Flatten(input_shape=[28, 28]),\n", - " keras.layers.Dense(300, activation=\"selu\", kernel_initializer=\"lecun_normal\"),\n", - " keras.layers.Dense(100, activation=\"selu\", kernel_initializer=\"lecun_normal\"),\n", - " keras.layers.Dense(10, activation=\"softmax\")\n", - "])\n", - "model.compile(loss=\"sparse_categorical_crossentropy\",\n", - " optimizer=keras.optimizers.SGD(lr=1e-3),\n", - " metrics=[\"accuracy\"])" - ] - }, - { - "cell_type": "code", - "execution_count": 98, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "430/430 [==============================] - 2s 4ms/step - loss: nan - accuracy: 0.3859 \n" - ] - }, - { - "data": { - "image/png": "\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], - "source": [ - "batch_size = 128\n", - "rates, losses = find_learning_rate(model, X_train_scaled, y_train, epochs=1, batch_size=batch_size)\n", - "plot_lr_vs_loss(rates, losses)" - ] - }, - { - "cell_type": "code", - "execution_count": 99, - "metadata": {}, - "outputs": [], - "source": [ - "class OneCycleScheduler(keras.callbacks.Callback):\n", - " def __init__(self, iterations, max_rate, start_rate=None,\n", - " last_iterations=None, last_rate=None):\n", - " self.iterations = iterations\n", - " self.max_rate = max_rate\n", - " self.start_rate = start_rate or max_rate / 10\n", - " self.last_iterations = last_iterations or iterations // 10 + 1\n", - " self.half_iteration = (iterations - self.last_iterations) // 2\n", - " self.last_rate = last_rate or self.start_rate / 1000\n", - " self.iteration = 0\n", - " def _interpolate(self, iter1, iter2, rate1, rate2):\n", - " return ((rate2 - rate1) * (self.iteration - iter1)\n", - " / (iter2 - iter1) + rate1)\n", - " def on_batch_begin(self, batch, logs):\n", - " if self.iteration < self.half_iteration:\n", - " rate = self._interpolate(0, self.half_iteration, self.start_rate, self.max_rate)\n", - " elif self.iteration < 2 * self.half_iteration:\n", - " rate = self._interpolate(self.half_iteration, 2 * self.half_iteration,\n", - " self.max_rate, self.start_rate)\n", - " else:\n", - " rate = self._interpolate(2 * self.half_iteration, self.iterations,\n", - " self.start_rate, self.last_rate)\n", - " rate = max(rate, self.last_rate)\n", - " self.iteration += 1\n", - " K.set_value(self.model.optimizer.lr, rate)" - ] - }, - { - "cell_type": "code", - "execution_count": 100, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch 1/25\n", - "430/430 [==============================] - 2s 4ms/step - loss: 0.6572 - accuracy: 0.7740 - val_loss: 0.4872 - val_accuracy: 0.8338\n", - "Epoch 2/25\n", - "430/430 [==============================] - 1s 3ms/step - loss: 0.4581 - accuracy: 0.8397 - val_loss: 0.4274 - val_accuracy: 0.8524\n", - "Epoch 3/25\n", - "430/430 [==============================] - 1s 3ms/step - loss: 0.4121 - accuracy: 0.8545 - val_loss: 0.4116 - val_accuracy: 0.8588\n", - "Epoch 4/25\n", - "430/430 [==============================] - 1s 3ms/step - loss: 0.3837 - accuracy: 0.8641 - val_loss: 0.3870 - val_accuracy: 0.8686\n", - "Epoch 5/25\n", - "430/430 [==============================] - 1s 3ms/step - loss: 0.3639 - accuracy: 0.8717 - val_loss: 0.3765 - val_accuracy: 0.8676\n", - "Epoch 6/25\n", - "430/430 [==============================] - 1s 3ms/step - loss: 0.3457 - accuracy: 0.8774 - val_loss: 0.3742 - val_accuracy: 0.8708\n", - "Epoch 7/25\n", - "430/430 [==============================] - 1s 3ms/step - loss: 0.3330 - accuracy: 0.8811 - val_loss: 0.3634 - val_accuracy: 0.8704\n", - "Epoch 8/25\n", - "430/430 [==============================] - 1s 3ms/step - loss: 0.3185 - accuracy: 0.8862 - val_loss: 0.3958 - val_accuracy: 0.8608\n", - "Epoch 9/25\n", - "430/430 [==============================] - 1s 3ms/step - loss: 0.3065 - accuracy: 0.8893 - val_loss: 0.3483 - val_accuracy: 0.8762\n", - "Epoch 10/25\n", - "430/430 [==============================] - 1s 3ms/step - loss: 0.2945 - accuracy: 0.8924 - val_loss: 0.3396 - val_accuracy: 0.8812\n", - "Epoch 11/25\n", - "430/430 [==============================] - 2s 4ms/step - loss: 0.2838 - accuracy: 0.8963 - val_loss: 0.3460 - val_accuracy: 0.8796\n", - "Epoch 12/25\n", - "430/430 [==============================] - 1s 3ms/step - loss: 0.2709 - accuracy: 0.9023 - val_loss: 0.3644 - val_accuracy: 0.8696\n", - "Epoch 13/25\n", - "430/430 [==============================] - 1s 3ms/step - loss: 0.2536 - accuracy: 0.9081 - val_loss: 0.3350 - val_accuracy: 0.8838\n", - "Epoch 14/25\n", - "430/430 [==============================] - 1s 3ms/step - loss: 0.2405 - accuracy: 0.9134 - val_loss: 0.3466 - val_accuracy: 0.8812\n", - "Epoch 15/25\n", - "430/430 [==============================] - 1s 3ms/step - loss: 0.2280 - accuracy: 0.9183 - val_loss: 0.3260 - val_accuracy: 0.8840\n", - "Epoch 16/25\n", - "430/430 [==============================] - 1s 3ms/step - loss: 0.2160 - accuracy: 0.9234 - val_loss: 0.3292 - val_accuracy: 0.8834\n", - "Epoch 17/25\n", - "430/430 [==============================] - 1s 3ms/step - loss: 0.2062 - accuracy: 0.9264 - val_loss: 0.3354 - val_accuracy: 0.8862\n", - "Epoch 18/25\n", - "430/430 [==============================] - 1s 3ms/step - loss: 0.1978 - accuracy: 0.9305 - val_loss: 0.3236 - val_accuracy: 0.8906\n", - "Epoch 19/25\n", - "430/430 [==============================] - 1s 3ms/step - loss: 0.1892 - accuracy: 0.9337 - val_loss: 0.3233 - val_accuracy: 0.8904\n", - "Epoch 20/25\n", - "430/430 [==============================] - 2s 4ms/step - loss: 0.1821 - accuracy: 0.9369 - val_loss: 0.3221 - val_accuracy: 0.8926\n", - "Epoch 21/25\n", - "430/430 [==============================] - 1s 3ms/step - loss: 0.1752 - accuracy: 0.9401 - val_loss: 0.3215 - val_accuracy: 0.8904\n", - "Epoch 22/25\n", - "430/430 [==============================] - 1s 3ms/step - loss: 0.1701 - accuracy: 0.9418 - val_loss: 0.3180 - val_accuracy: 0.8956\n", - "Epoch 23/25\n", - "430/430 [==============================] - 1s 3ms/step - loss: 0.1655 - accuracy: 0.9438 - val_loss: 0.3186 - val_accuracy: 0.8942\n", - "Epoch 24/25\n", - "430/430 [==============================] - 2s 4ms/step - loss: 0.1628 - accuracy: 0.9458 - val_loss: 0.3176 - val_accuracy: 0.8924\n", - "Epoch 25/25\n", - "430/430 [==============================] - 1s 3ms/step - loss: 0.1611 - accuracy: 0.9460 - val_loss: 0.3169 - val_accuracy: 0.8930\n" - ] - } - ], - "source": [ - "n_epochs = 25\n", - "onecycle = OneCycleScheduler(len(X_train) // batch_size * n_epochs, max_rate=0.05)\n", - "history = model.fit(X_train_scaled, y_train, epochs=n_epochs, batch_size=batch_size,\n", - " validation_data=(X_valid_scaled, y_valid),\n", - " callbacks=[onecycle])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# 규제를 사용해 과대적합 피하기" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## $\\ell_1$과 $\\ell_2$ 규제" - ] - }, - { - "cell_type": "code", - "execution_count": 101, - "metadata": {}, - "outputs": [], - "source": [ - "layer = keras.layers.Dense(100, activation=\"elu\",\n", - " kernel_initializer=\"he_normal\",\n", - " kernel_regularizer=keras.regularizers.l2(0.01))\n", - "# or l1(0.1) for ℓ1 regularization with a factor or 0.1\n", - "# or l1_l2(0.1, 0.01) for both ℓ1 and ℓ2 regularization, with factors 0.1 and 0.01 respectively" - ] - }, - { - "cell_type": "code", - "execution_count": 102, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch 1/2\n", - "1719/1719 [==============================] - 8s 5ms/step - loss: 1.5956 - accuracy: 0.8124 - val_loss: 0.7169 - val_accuracy: 0.8340\n", - "Epoch 2/2\n", - "1719/1719 [==============================] - 8s 5ms/step - loss: 0.7197 - accuracy: 0.8274 - val_loss: 0.6850 - val_accuracy: 0.8376\n" - ] - } - ], - "source": [ - "model = keras.models.Sequential([\n", - " keras.layers.Flatten(input_shape=[28, 28]),\n", - " keras.layers.Dense(300, activation=\"elu\",\n", - " kernel_initializer=\"he_normal\",\n", - " kernel_regularizer=keras.regularizers.l2(0.01)),\n", - " keras.layers.Dense(100, activation=\"elu\",\n", - " kernel_initializer=\"he_normal\",\n", - " kernel_regularizer=keras.regularizers.l2(0.01)),\n", - " keras.layers.Dense(10, activation=\"softmax\",\n", - " kernel_regularizer=keras.regularizers.l2(0.01))\n", - "])\n", - "model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=\"nadam\", metrics=[\"accuracy\"])\n", - "n_epochs = 2\n", - "history = model.fit(X_train_scaled, y_train, epochs=n_epochs,\n", - " validation_data=(X_valid_scaled, y_valid))" - ] - }, - { - "cell_type": "code", - "execution_count": 103, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch 1/2\n", - "1719/1719 [==============================] - 8s 5ms/step - loss: 1.6313 - accuracy: 0.8113 - val_loss: 0.7218 - val_accuracy: 0.8310\n", - "Epoch 2/2\n", - "1719/1719 [==============================] - 8s 5ms/step - loss: 0.7187 - accuracy: 0.8273 - val_loss: 0.6826 - val_accuracy: 0.8382\n" - ] - } - ], - "source": [ - "from functools import partial\n", - "\n", - "RegularizedDense = partial(keras.layers.Dense,\n", - " activation=\"elu\",\n", - " kernel_initializer=\"he_normal\",\n", - " kernel_regularizer=keras.regularizers.l2(0.01))\n", - "\n", - "model = keras.models.Sequential([\n", - " keras.layers.Flatten(input_shape=[28, 28]),\n", - " RegularizedDense(300),\n", - " RegularizedDense(100),\n", - " RegularizedDense(10, activation=\"softmax\")\n", - "])\n", - "model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=\"nadam\", metrics=[\"accuracy\"])\n", - "n_epochs = 2\n", - "history = model.fit(X_train_scaled, y_train, epochs=n_epochs,\n", - " validation_data=(X_valid_scaled, y_valid))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 드롭아웃" - ] - }, - { - "cell_type": "code", - "execution_count": 104, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch 1/2\n", - "1719/1719 [==============================] - 8s 5ms/step - loss: 0.5838 - accuracy: 0.7998 - val_loss: 0.3730 - val_accuracy: 0.8644\n", - "Epoch 2/2\n", - "1719/1719 [==============================] - 8s 5ms/step - loss: 0.4209 - accuracy: 0.8443 - val_loss: 0.3406 - val_accuracy: 0.8724\n" - ] - } - ], - "source": [ - "model = keras.models.Sequential([\n", - " keras.layers.Flatten(input_shape=[28, 28]),\n", - " keras.layers.Dropout(rate=0.2),\n", - " keras.layers.Dense(300, activation=\"elu\", kernel_initializer=\"he_normal\"),\n", - " keras.layers.Dropout(rate=0.2),\n", - " keras.layers.Dense(100, activation=\"elu\", kernel_initializer=\"he_normal\"),\n", - " keras.layers.Dropout(rate=0.2),\n", - " keras.layers.Dense(10, activation=\"softmax\")\n", - "])\n", - "model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=\"nadam\", metrics=[\"accuracy\"])\n", - "n_epochs = 2\n", - "history = model.fit(X_train_scaled, y_train, epochs=n_epochs,\n", - " validation_data=(X_valid_scaled, y_valid))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 알파 드롭아웃" - ] - }, - { - "cell_type": "code", - "execution_count": 105, - "metadata": {}, - "outputs": [], - "source": [ - "tf.random.set_seed(42)\n", - "np.random.seed(42)" - ] - }, - { - "cell_type": "code", - "execution_count": 106, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch 1/20\n", - "1719/1719 [==============================] - 6s 3ms/step - loss: 0.6654 - accuracy: 0.7595 - val_loss: 0.5929 - val_accuracy: 0.8406\n", - "Epoch 2/20\n", - "1719/1719 [==============================] - 6s 3ms/step - loss: 0.5605 - accuracy: 0.7933 - val_loss: 0.5605 - val_accuracy: 0.8400\n", - "Epoch 3/20\n", - "1719/1719 [==============================] - 6s 3ms/step - loss: 0.5283 - accuracy: 0.8050 - val_loss: 0.4858 - val_accuracy: 0.8596\n", - "Epoch 4/20\n", - "1719/1719 [==============================] - 6s 3ms/step - loss: 0.5072 - accuracy: 0.8125 - val_loss: 0.4629 - val_accuracy: 0.8582\n", - "Epoch 5/20\n", - "1719/1719 [==============================] - 6s 3ms/step - loss: 0.4915 - accuracy: 0.8186 - val_loss: 0.4698 - val_accuracy: 0.8552\n", - "Epoch 6/20\n", - "1719/1719 [==============================] - 6s 3ms/step - loss: 0.4866 - accuracy: 0.8186 - val_loss: 0.4810 - val_accuracy: 0.8612\n", - "Epoch 7/20\n", - "1719/1719 [==============================] - 6s 3ms/step - loss: 0.4724 - accuracy: 0.8251 - val_loss: 0.4792 - val_accuracy: 0.8650\n", - "Epoch 8/20\n", - "1719/1719 [==============================] - 6s 3ms/step - loss: 0.4634 - accuracy: 0.8287 - val_loss: 0.4587 - val_accuracy: 0.8628\n", - "Epoch 9/20\n", - "1719/1719 [==============================] - 6s 3ms/step - loss: 0.4578 - accuracy: 0.8307 - val_loss: 0.4105 - val_accuracy: 0.8718\n", - "Epoch 10/20\n", - "1719/1719 [==============================] - 6s 3ms/step - loss: 0.4531 - accuracy: 0.8333 - val_loss: 0.4714 - val_accuracy: 0.8648\n", - "Epoch 11/20\n", - "1719/1719 [==============================] - 6s 3ms/step - loss: 0.4484 - accuracy: 0.8326 - val_loss: 0.4181 - val_accuracy: 0.8674\n", - "Epoch 12/20\n", - "1719/1719 [==============================] - 6s 3ms/step - loss: 0.4468 - accuracy: 0.8330 - val_loss: 0.5498 - val_accuracy: 0.8500\n", - "Epoch 13/20\n", - "1719/1719 [==============================] - 6s 3ms/step - loss: 0.4421 - accuracy: 0.8363 - val_loss: 0.4546 - val_accuracy: 0.8630\n", - "Epoch 14/20\n", - "1719/1719 [==============================] - 6s 3ms/step - loss: 0.4314 - accuracy: 0.8396 - val_loss: 0.4554 - val_accuracy: 0.8676\n", - "Epoch 15/20\n", - "1719/1719 [==============================] - 6s 3ms/step - loss: 0.4341 - accuracy: 0.8367 - val_loss: 0.4458 - val_accuracy: 0.8662\n", - "Epoch 16/20\n", - "1719/1719 [==============================] - 6s 3ms/step - loss: 0.4312 - accuracy: 0.8412 - val_loss: 0.4325 - val_accuracy: 0.8750\n", - "Epoch 17/20\n", - "1719/1719 [==============================] - 6s 3ms/step - loss: 0.4261 - accuracy: 0.8419 - val_loss: 0.5457 - val_accuracy: 0.8554\n", - "Epoch 18/20\n", - "1719/1719 [==============================] - 6s 3ms/step - loss: 0.4258 - accuracy: 0.8417 - val_loss: 0.5050 - val_accuracy: 0.8700\n", - "Epoch 19/20\n", - "1719/1719 [==============================] - 6s 3ms/step - loss: 0.4208 - accuracy: 0.8437 - val_loss: 0.4899 - val_accuracy: 0.8696\n", - "Epoch 20/20\n", - "1719/1719 [==============================] - 6s 3ms/step - loss: 0.4234 - accuracy: 0.8414 - val_loss: 0.4014 - val_accuracy: 0.8782\n" - ] - } - ], - "source": [ - "model = keras.models.Sequential([\n", - " keras.layers.Flatten(input_shape=[28, 28]),\n", - " keras.layers.AlphaDropout(rate=0.2),\n", - " keras.layers.Dense(300, activation=\"selu\", kernel_initializer=\"lecun_normal\"),\n", - " keras.layers.AlphaDropout(rate=0.2),\n", - " keras.layers.Dense(100, activation=\"selu\", kernel_initializer=\"lecun_normal\"),\n", - " keras.layers.AlphaDropout(rate=0.2),\n", - " keras.layers.Dense(10, activation=\"softmax\")\n", - "])\n", - "optimizer = keras.optimizers.SGD(lr=0.01, momentum=0.9, nesterov=True)\n", - "model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=optimizer, metrics=[\"accuracy\"])\n", - "n_epochs = 20\n", - "history = model.fit(X_train_scaled, y_train, epochs=n_epochs,\n", - " validation_data=(X_valid_scaled, y_valid))" - ] - }, - { - "cell_type": "code", - "execution_count": 107, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "313/313 [==============================] - 1s 2ms/step - loss: 0.4338 - accuracy: 0.8697\n" - ] - }, - { - "data": { - "text/plain": [ - "[0.4337695240974426, 0.869700014591217]" - ] - }, - "execution_count": 107, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "model.evaluate(X_test_scaled, y_test)" - ] - }, - { - "cell_type": "code", - "execution_count": 108, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "1719/1719 [==============================] - 4s 2ms/step - loss: 0.3243 - accuracy: 0.8887\n" - ] - }, - { - "data": { - "text/plain": [ - "[0.32432350516319275, 0.8887272477149963]" - ] - }, - "execution_count": 108, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "model.evaluate(X_train_scaled, y_train)" - ] - }, - { - "cell_type": "code", - "execution_count": 109, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.4167 - accuracy: 0.8463\n" - ] - } - ], - "source": [ - "history = model.fit(X_train_scaled, y_train)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## MC 드롭아웃" - ] - }, - { - "cell_type": "code", - "execution_count": 110, - "metadata": {}, - "outputs": [], - "source": [ - "tf.random.set_seed(42)\n", - "np.random.seed(42)" - ] - }, - { - "cell_type": "code", - "execution_count": 111, - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "y_probas = np.stack([model(X_test_scaled, training=True)\n", - " for sample in range(100)])\n", - "y_proba = y_probas.mean(axis=0)\n", - "y_std = y_probas.std(axis=0)" - ] - }, - { - "cell_type": "code", - "execution_count": 112, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 1.]], dtype=float32)" - ] - }, - "execution_count": 112, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "np.round(model.predict(X_test_scaled[:1]), 2)" - ] - }, - { - "cell_type": "code", - "execution_count": 113, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([[[0. , 0. , 0. , 0. , 0. , 0.43, 0. , 0.18, 0. , 0.39]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.01, 0. , 0.75, 0. , 0.25]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.42, 0. , 0. , 0. , 0.58]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.01, 0. , 0.26, 0. , 0.72]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.12, 0. , 0.07, 0. , 0.8 ]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.3 , 0. , 0.7 ]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.36, 0. , 0.13, 0. , 0.51]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.34, 0. , 0.66]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.75, 0. , 0.02, 0. , 0.23]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.02, 0. , 0.04, 0. , 0.94]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.02, 0. , 0.03, 0. , 0.95]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.64, 0. , 0. , 0. , 0.36]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.68, 0. , 0.05, 0. , 0.28]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.31, 0. , 0.04, 0. , 0.65]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.05, 0. , 0.02, 0. , 0.93]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.42, 0. , 0.02, 0. , 0.57]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.02, 0. , 0. , 0. , 0.98]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.01, 0. , 0.03, 0. , 0.96]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.52, 0. , 0.48]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.28, 0. , 0.12, 0. , 0.6 ]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.42, 0. , 0.01, 0. , 0.56]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.96, 0. , 0.01, 0. , 0.03]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 1. ]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.04, 0. , 0.96]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.01, 0. , 0. , 0. , 0.99]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.75, 0. , 0.11, 0. , 0.14]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.21, 0. , 0.18, 0. , 0.61]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.44, 0. , 0.08, 0. , 0.48]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.63, 0. , 0.01, 0. , 0.36]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.23, 0. , 0.55, 0. , 0.22]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.31, 0. , 0.01, 0. , 0.68]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.06, 0. , 0.01, 0. , 0.93]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.3 , 0. , 0.02, 0. , 0.68]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.91, 0. , 0.04, 0. , 0.05]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.06, 0. , 0.02, 0. , 0.93]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.33, 0. , 0.36, 0. , 0.31]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.01, 0. , 0.99]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.6 , 0. , 0.03, 0. , 0.37]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.14, 0. , 0.08, 0. , 0.77]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.66, 0. , 0. , 0. , 0.34]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.11, 0. , 0.01, 0. , 0.88]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.01, 0. , 0. , 0. , 0.99]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.47, 0. , 0.08, 0. , 0.44]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.01, 0. , 0.99]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.01, 0. , 0.01, 0. , 0.98]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 1. ]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.12, 0. , 0.11, 0. , 0.77]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.01, 0. , 0.21, 0. , 0.78]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.08, 0. , 0.92]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.01, 0. , 0.2 , 0. , 0.79]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.49, 0. , 0.11, 0. , 0.41]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.69, 0. , 0.06, 0. , 0.25]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 1. ]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.31, 0. , 0.01, 0. , 0.68]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.03, 0. , 0.01, 0. , 0.97]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.05, 0. , 0.25, 0. , 0.71]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.02, 0. , 0.47, 0. , 0.51]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.07, 0. , 0.72, 0. , 0.21]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.01, 0. , 0.49, 0. , 0.5 ]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.06, 0. , 0.02, 0. , 0.92]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.6 , 0. , 0.02, 0. , 0.38]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.08, 0. , 0.02, 0. , 0.9 ]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.14, 0. , 0.02, 0. , 0.84]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.01, 0. , 0.02, 0. , 0.97]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.44, 0. , 0.02, 0. , 0.54]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.18, 0. , 0.08, 0. , 0.74]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.49, 0. , 0.04, 0. , 0.47]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.01, 0. , 0.99]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.11, 0. , 0.08, 0. , 0.81]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.07, 0. , 0.09, 0. , 0.84]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.01, 0. , 0. , 0. , 0.98]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.03, 0. , 0.04, 0. , 0.93]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.38, 0. , 0.03, 0. , 0.59]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.01, 0. , 0.01, 0. , 0.98]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.1 , 0. , 0.02, 0. , 0.88]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.01, 0. , 0.98]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.02, 0. , 0.01, 0. , 0.98]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.49, 0. , 0.22, 0. , 0.29]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.01, 0. , 0.03, 0. , 0.96]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.01, 0. , 0.03, 0. , 0.96]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.01, 0. , 0.3 , 0. , 0.69]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.28, 0. , 0. , 0. , 0.72]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 1. ]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.03, 0. , 0.03, 0. , 0.94]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.04, 0. , 0.09, 0. , 0.87]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.12, 0. , 0.03, 0. , 0.85]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.98, 0. , 0. , 0. , 0.02]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.17, 0. , 0. , 0. , 0.83]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.05, 0. , 0.01, 0. , 0.94]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 1. ]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.02, 0. , 0. , 0. , 0.98]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.04, 0. , 0.13, 0. , 0.83]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.93, 0. , 0.03, 0. , 0.04]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.86, 0. , 0.01, 0. , 0.13]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.59, 0. , 0.02, 0. , 0.38]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.02, 0. , 0.01, 0. , 0.97]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.11, 0. , 0.01, 0. , 0.88]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.01, 0. , 0.01, 0. , 0.98]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.78, 0. , 0.01, 0. , 0.21]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.48, 0. , 0.01, 0. , 0.51]]],\n", - " dtype=float32)" - ] - }, - "execution_count": 113, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "np.round(y_probas[:, :1], 2)" - ] - }, - { - "cell_type": "code", - "execution_count": 114, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([[0. , 0. , 0. , 0. , 0. , 0.23, 0. , 0.09, 0. , 0.68]],\n", - " dtype=float32)" - ] - }, - "execution_count": 114, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "np.round(y_proba[:1], 2)" - ] - }, - { - "cell_type": "code", - "execution_count": 115, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([[0. , 0. , 0. , 0. , 0. , 0.28, 0. , 0.15, 0. , 0.29]],\n", - " dtype=float32)" - ] - }, - "execution_count": 115, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "y_std = y_probas.std(axis=0)\n", - "np.round(y_std[:1], 2)" - ] - }, - { - "cell_type": "code", - "execution_count": 116, - "metadata": {}, - "outputs": [], - "source": [ - "y_pred = np.argmax(y_proba, axis=1)" - ] - }, - { - "cell_type": "code", - "execution_count": 117, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0.8666" - ] - }, - "execution_count": 117, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "accuracy = np.sum(y_pred == y_test) / len(y_test)\n", - "accuracy" - ] - }, - { - "cell_type": "code", - "execution_count": 118, - "metadata": {}, - "outputs": [], - "source": [ - "class MCDropout(keras.layers.Dropout):\n", - " def call(self, inputs):\n", - " return super().call(inputs, training=True)\n", - "\n", - "class MCAlphaDropout(keras.layers.AlphaDropout):\n", - " def call(self, inputs):\n", - " return super().call(inputs, training=True)" - ] - }, - { - "cell_type": "code", - "execution_count": 119, - "metadata": {}, - "outputs": [], - "source": [ - "tf.random.set_seed(42)\n", - "np.random.seed(42)" - ] - }, - { - "cell_type": "code", - "execution_count": 120, - "metadata": {}, - "outputs": [], - "source": [ - "mc_model = keras.models.Sequential([\n", - " MCAlphaDropout(layer.rate) if isinstance(layer, keras.layers.AlphaDropout) else layer\n", - " for layer in model.layers\n", - "])" - ] - }, - { - "cell_type": "code", - "execution_count": 121, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Model: \"sequential_20\"\n", - "_________________________________________________________________\n", - "Layer (type) Output Shape Param # \n", - "=================================================================\n", - "flatten_18 (Flatten) (None, 784) 0 \n", - "_________________________________________________________________\n", - "mc_alpha_dropout (MCAlphaDro (None, 784) 0 \n", - "_________________________________________________________________\n", - "dense_262 (Dense) (None, 300) 235500 \n", - "_________________________________________________________________\n", - "mc_alpha_dropout_1 (MCAlphaD (None, 300) 0 \n", - "_________________________________________________________________\n", - "dense_263 (Dense) (None, 100) 30100 \n", - "_________________________________________________________________\n", - "mc_alpha_dropout_2 (MCAlphaD (None, 100) 0 \n", - "_________________________________________________________________\n", - "dense_264 (Dense) (None, 10) 1010 \n", - "=================================================================\n", - "Total params: 266,610\n", - "Trainable params: 266,610\n", - "Non-trainable params: 0\n", - "_________________________________________________________________\n" - ] - } - ], - "source": [ - "mc_model.summary()" - ] - }, - { - "cell_type": "code", - "execution_count": 122, - "metadata": {}, - "outputs": [], - "source": [ - "optimizer = keras.optimizers.SGD(lr=0.01, momentum=0.9, nesterov=True)\n", - "mc_model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=optimizer, metrics=[\"accuracy\"])" - ] - }, - { - "cell_type": "code", - "execution_count": 123, - "metadata": {}, - "outputs": [], - "source": [ - "mc_model.set_weights(model.get_weights())" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "이제 MC 드롭아웃을 모델에 사용할 수 있습니다:" - ] - }, - { - "cell_type": "code", - "execution_count": 124, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([[0. , 0. , 0. , 0. , 0. , 0.22, 0. , 0.15, 0. , 0.63]],\n", - " dtype=float32)" - ] - }, - "execution_count": 124, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "np.round(np.mean([mc_model.predict(X_test_scaled[:1]) for sample in range(100)], axis=0), 2)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 맥스 노름" - ] - }, - { - "cell_type": "code", - "execution_count": 125, - "metadata": {}, - "outputs": [], - "source": [ - "layer = keras.layers.Dense(100, activation=\"selu\", kernel_initializer=\"lecun_normal\",\n", - " kernel_constraint=keras.constraints.max_norm(1.))" - ] - }, - { - "cell_type": "code", - "execution_count": 126, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch 1/2\n", - "1719/1719 [==============================] - 8s 5ms/step - loss: 0.4749 - accuracy: 0.8337 - val_loss: 0.3665 - val_accuracy: 0.8676\n", - "Epoch 2/2\n", - "1719/1719 [==============================] - 8s 5ms/step - loss: 0.3539 - accuracy: 0.8703 - val_loss: 0.3700 - val_accuracy: 0.8672\n" - ] - } - ], - "source": [ - "MaxNormDense = partial(keras.layers.Dense,\n", - " activation=\"selu\", kernel_initializer=\"lecun_normal\",\n", - " kernel_constraint=keras.constraints.max_norm(1.))\n", - "\n", - "model = keras.models.Sequential([\n", - " keras.layers.Flatten(input_shape=[28, 28]),\n", - " MaxNormDense(300),\n", - " MaxNormDense(100),\n", - " keras.layers.Dense(10, activation=\"softmax\")\n", - "])\n", - "model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=\"nadam\", metrics=[\"accuracy\"])\n", - "n_epochs = 2\n", - "history = model.fit(X_train_scaled, y_train, epochs=n_epochs,\n", - " validation_data=(X_valid_scaled, y_valid))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# 연습문제 해답" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 1. to 7." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "부록 A 참조." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 8. CIFAR10에서 딥러닝" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### a.\n", - "*문제: 100개의 뉴런을 가진 은닉층 20개로 심층 신경망을 만들어보세요(너무 많은 것 같지만 이 연습문제의 핵심입니다). He 초기화와 ELU 활성화 함수를 사용하세요.*" - ] - }, - { - "cell_type": "code", - "execution_count": 127, - "metadata": {}, - "outputs": [], - "source": [ - "keras.backend.clear_session()\n", - "tf.random.set_seed(42)\n", - "np.random.seed(42)\n", - "\n", - "model = keras.models.Sequential()\n", - "model.add(keras.layers.Flatten(input_shape=[32, 32, 3]))\n", - "for _ in range(20):\n", - " model.add(keras.layers.Dense(100,\n", - " activation=\"elu\",\n", - " kernel_initializer=\"he_normal\"))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### b.\n", - "*문제: Nadam 옵티마이저와 조기 종료를 사용하여 CIFAR10 데이터셋에 이 네트워크를 훈련하세요. `keras.datasets.cifar10.load_ data()`를 사용하여 데이터를 적재할 수 있습니다. 이 데이터셋은 10개의 클래스와 32×32 크기의 컬러 이미지 60,000개로 구성됩니다(50,000개는 훈련, 10,000개는 테스트). 따라서 10개의 뉴런과 소프트맥스 활성화 함수를 사용하는 출력층이 필요합니다. 모델 구조와 하이퍼파라미터를 바꿀 때마다 적절한 학습률을 찾아야 한다는 것을 기억하세요.*" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "모델에 출력층을 추가합니다:" - ] - }, - { - "cell_type": "code", - "execution_count": 128, - "metadata": {}, - "outputs": [], - "source": [ - "model.add(keras.layers.Dense(10, activation=\"softmax\"))" - ] + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "kernelspec": { + "display_name": "TensorFlow 2.3 on Python 3.6 (CUDA 10.1)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.9" + }, + "nav_menu": { + "height": "360px", + "width": "416px" + }, + "toc": { + "navigate_menu": true, + "number_sections": true, + "sideBar": true, + "threshold": 6, + "toc_cell": false, + "toc_section_display": "block", + "toc_window_display": false + }, + "colab": { + "name": "11_training_deep_neural_networks.ipynb", + "provenance": [] + }, + "accelerator": "GPU" }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "학습률 5e-5인 Nadam 옵티마이저를 사용해 보죠. 학습률 1e-5, 3e-5, 1e-4, 3e-4, 1e-3, 3e-3, 1e-2를 테스트하고 10번의 에포크 동안 (아래 텐서보드 콜백으로) 학습 곡선을 비교해 보았습니다. 학습률 3e-5와 1e-4가 꽤 좋았기 때문에 5e-5를 시도해 보았고 조금 더 나은 결과를 냈습니다." - ] - }, - { - "cell_type": "code", - "execution_count": 129, - "metadata": {}, - "outputs": [], - "source": [ - "optimizer = keras.optimizers.Nadam(lr=5e-5)\n", - "model.compile(loss=\"sparse_categorical_crossentropy\",\n", - " optimizer=optimizer,\n", - " metrics=[\"accuracy\"])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "CIFAR10 데이터셋을 로드하죠. 조기 종료를 사용하기 때문에 검증 세트가 필요합니다. 원본 훈련 세트에서 처음 5,000개를 검증 세트로 사용하겠습니다:" - ] - }, - { - "cell_type": "code", - "execution_count": 130, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz\n", - "170500096/170498071 [==============================] - 18s 0us/step\n" - ] - } - ], - "source": [ - "(X_train_full, y_train_full), (X_test, y_test) = keras.datasets.cifar10.load_data()\n", - "\n", - "X_train = X_train_full[5000:]\n", - "y_train = y_train_full[5000:]\n", - "X_valid = X_train_full[:5000]\n", - "y_valid = y_train_full[:5000]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "이제 콜백을 만들고 모델을 훈련합니다:" - ] - }, - { - "cell_type": "code", - "execution_count": 131, - "metadata": {}, - "outputs": [], - "source": [ - "early_stopping_cb = keras.callbacks.EarlyStopping(patience=20)\n", - "model_checkpoint_cb = keras.callbacks.ModelCheckpoint(\"my_cifar10_model.h5\", save_best_only=True)\n", - "run_index = 1 # 모델을 훈련할 때마다 증가시킴\n", - "run_logdir = os.path.join(os.curdir, \"my_cifar10_logs\", \"run_{:03d}\".format(run_index))\n", - "tensorboard_cb = keras.callbacks.TensorBoard(run_logdir)\n", - "callbacks = [early_stopping_cb, model_checkpoint_cb, tensorboard_cb]" - ] - }, - { - "cell_type": "code", - "execution_count": 132, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "ERROR: Failed to launch TensorBoard (exited with 255).\n", - "Contents of stderr:\n", - "E0809 03:11:42.951561 139734898673472 program.py:312] TensorBoard could not bind to port 6006, it was already in use\n", - "ERROR: TensorBoard could not bind to port 6006, it was already in use" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "%tensorboard --logdir=./my_cifar10_logs --port=6006" - ] - }, - { - "cell_type": "code", - "execution_count": 133, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch 1/100\n", - " 1/1407 [..............................] - ETA: 0s - loss: 165.9660 - accuracy: 0.0625WARNING:tensorflow:From /home/work/.local/lib/python3.6/site-packages/tensorflow/python/ops/summary_ops_v2.py:1277: stop (from tensorflow.python.eager.profiler) is deprecated and will be removed after 2020-07-01.\n", - "Instructions for updating:\n", - "use `tf.profiler.experimental.stop` instead.\n", - " 2/1407 [..............................] - ETA: 1:52 - loss: 133.4792 - accuracy: 0.1250WARNING:tensorflow:Callbacks method `on_train_batch_end` is slow compared to the batch time (batch time: 0.0164s vs `on_train_batch_end` time: 0.1398s). Check your callbacks.\n", - "1407/1407 [==============================] - 22s 16ms/step - loss: 4.0015 - accuracy: 0.1711 - val_loss: 2.0790 - val_accuracy: 0.2354\n", - "Epoch 2/100\n", - "1407/1407 [==============================] - 22s 16ms/step - loss: 2.0265 - accuracy: 0.2568 - val_loss: 2.0408 - val_accuracy: 0.2488\n", - "Epoch 3/100\n", - "1407/1407 [==============================] - 22s 16ms/step - loss: 1.9195 - accuracy: 0.2983 - val_loss: 1.9150 - val_accuracy: 0.2916\n", - "Epoch 4/100\n", - "1407/1407 [==============================] - 22s 15ms/step - loss: 1.8406 - accuracy: 0.3284 - val_loss: 1.9163 - val_accuracy: 0.3050\n", - "Epoch 5/100\n", - "1407/1407 [==============================] - 22s 16ms/step - loss: 1.7846 - accuracy: 0.3525 - val_loss: 1.7578 - val_accuracy: 0.3592\n", - "Epoch 6/100\n", - "1407/1407 [==============================] - 22s 16ms/step - loss: 1.7352 - accuracy: 0.3698 - val_loss: 1.7461 - val_accuracy: 0.3570\n", - "Epoch 7/100\n", - "1407/1407 [==============================] - 22s 15ms/step - loss: 1.6930 - accuracy: 0.3869 - val_loss: 1.7101 - val_accuracy: 0.3752\n", - "Epoch 8/100\n", - "1407/1407 [==============================] - 22s 16ms/step - loss: 1.6576 - accuracy: 0.4002 - val_loss: 1.6530 - val_accuracy: 0.3962\n", - "Epoch 9/100\n", - "1407/1407 [==============================] - 22s 15ms/step - loss: 1.6233 - accuracy: 0.4169 - val_loss: 1.6236 - val_accuracy: 0.4070\n", - "Epoch 10/100\n", - "1407/1407 [==============================] - 22s 15ms/step - loss: 1.5965 - accuracy: 0.4248 - val_loss: 1.6644 - val_accuracy: 0.4026\n", - "Epoch 11/100\n", - "1407/1407 [==============================] - 21s 15ms/step - loss: 1.5763 - accuracy: 0.4328 - val_loss: 1.6739 - val_accuracy: 0.3946\n", - "Epoch 12/100\n", - "1407/1407 [==============================] - 22s 16ms/step - loss: 1.5534 - accuracy: 0.4420 - val_loss: 1.6191 - val_accuracy: 0.4186\n", - "Epoch 13/100\n", - "1407/1407 [==============================] - 22s 16ms/step - loss: 1.5332 - accuracy: 0.4468 - val_loss: 1.6050 - val_accuracy: 0.4168\n", - "Epoch 14/100\n", - "1407/1407 [==============================] - 22s 16ms/step - loss: 1.5181 - accuracy: 0.4531 - val_loss: 1.6009 - val_accuracy: 0.4312\n", - "Epoch 15/100\n", - "1407/1407 [==============================] - 22s 16ms/step - loss: 1.4995 - accuracy: 0.4616 - val_loss: 1.5658 - val_accuracy: 0.4366\n", - "Epoch 16/100\n", - "1407/1407 [==============================] - 22s 15ms/step - loss: 1.4827 - accuracy: 0.4652 - val_loss: 1.5856 - val_accuracy: 0.4376\n", - "Epoch 17/100\n", - "1407/1407 [==============================] - 22s 16ms/step - loss: 1.4717 - accuracy: 0.4713 - val_loss: 1.5381 - val_accuracy: 0.4478\n", - "Epoch 18/100\n", - "1407/1407 [==============================] - 22s 15ms/step - loss: 1.4565 - accuracy: 0.4756 - val_loss: 1.5496 - val_accuracy: 0.4478\n", - "Epoch 19/100\n", - "1407/1407 [==============================] - 22s 15ms/step - loss: 1.4422 - accuracy: 0.4797 - val_loss: 1.5506 - val_accuracy: 0.4478\n", - "Epoch 20/100\n", - "1407/1407 [==============================] - 22s 15ms/step - loss: 1.4327 - accuracy: 0.4816 - val_loss: 1.5574 - val_accuracy: 0.4528\n", - "Epoch 21/100\n", - "1407/1407 [==============================] - 22s 15ms/step - loss: 1.4213 - accuracy: 0.4875 - val_loss: 1.5716 - val_accuracy: 0.4410\n", - "Epoch 22/100\n", - "1407/1407 [==============================] - 22s 16ms/step - loss: 1.4108 - accuracy: 0.4903 - val_loss: 1.5377 - val_accuracy: 0.4544\n", - "Epoch 23/100\n", - "1407/1407 [==============================] - 22s 15ms/step - loss: 1.3972 - accuracy: 0.4968 - val_loss: 1.5215 - val_accuracy: 0.4526\n", - "Epoch 24/100\n", - "1407/1407 [==============================] - 22s 15ms/step - loss: 1.3865 - accuracy: 0.4998 - val_loss: 1.5374 - val_accuracy: 0.4470\n", - "Epoch 25/100\n", - "1407/1407 [==============================] - 22s 15ms/step - loss: 1.3781 - accuracy: 0.5056 - val_loss: 1.5694 - val_accuracy: 0.4466\n", - "Epoch 26/100\n", - "1407/1407 [==============================] - 22s 15ms/step - loss: 1.3672 - accuracy: 0.5104 - val_loss: 1.5405 - val_accuracy: 0.4542\n", - "Epoch 27/100\n", - "1407/1407 [==============================] - 22s 16ms/step - loss: 1.3583 - accuracy: 0.5110 - val_loss: 1.5103 - val_accuracy: 0.4690\n", - "Epoch 28/100\n", - "1407/1407 [==============================] - 22s 15ms/step - loss: 1.3481 - accuracy: 0.5161 - val_loss: 1.5282 - val_accuracy: 0.4572\n", - "Epoch 29/100\n", - "1407/1407 [==============================] - 22s 15ms/step - loss: 1.3387 - accuracy: 0.5186 - val_loss: 1.5302 - val_accuracy: 0.4540\n", - "Epoch 30/100\n", - "1407/1407 [==============================] - 22s 15ms/step - loss: 1.3307 - accuracy: 0.5206 - val_loss: 1.5451 - val_accuracy: 0.4666\n", - "Epoch 31/100\n", - "1407/1407 [==============================] - 22s 15ms/step - loss: 1.3235 - accuracy: 0.5245 - val_loss: 1.5553 - val_accuracy: 0.4604\n", - "Epoch 32/100\n", - "1407/1407 [==============================] - 22s 15ms/step - loss: 1.3138 - accuracy: 0.5274 - val_loss: 1.5263 - val_accuracy: 0.4604\n", - "Epoch 33/100\n", - "1407/1407 [==============================] - 21s 15ms/step - loss: 1.3025 - accuracy: 0.5323 - val_loss: 1.5133 - val_accuracy: 0.4736\n", - "Epoch 34/100\n", - "1407/1407 [==============================] - 22s 16ms/step - loss: 1.2956 - accuracy: 0.5340 - val_loss: 1.5021 - val_accuracy: 0.4748\n", - "Epoch 35/100\n", - "1407/1407 [==============================] - 22s 15ms/step - loss: 1.2876 - accuracy: 0.5345 - val_loss: 1.5427 - val_accuracy: 0.4606\n", - "Epoch 36/100\n", - "1407/1407 [==============================] - 22s 15ms/step - loss: 1.2794 - accuracy: 0.5408 - val_loss: 1.5662 - val_accuracy: 0.4586\n", - "Epoch 37/100\n", - "1407/1407 [==============================] - 22s 16ms/step - loss: 1.2706 - accuracy: 0.5423 - val_loss: 1.5014 - val_accuracy: 0.4778\n", - "Epoch 38/100\n", - "1407/1407 [==============================] - 22s 15ms/step - loss: 1.2615 - accuracy: 0.5464 - val_loss: 1.5048 - val_accuracy: 0.4736\n", - "Epoch 39/100\n", - "1407/1407 [==============================] - 22s 15ms/step - loss: 1.2541 - accuracy: 0.5483 - val_loss: 1.5195 - val_accuracy: 0.4656\n", - "Epoch 40/100\n", - "1407/1407 [==============================] - 22s 15ms/step - loss: 1.2448 - accuracy: 0.5542 - val_loss: 1.5167 - val_accuracy: 0.4790\n", - "Epoch 41/100\n", - "1407/1407 [==============================] - 21s 15ms/step - loss: 1.2387 - accuracy: 0.5545 - val_loss: 1.5400 - val_accuracy: 0.4660\n", - "Epoch 42/100\n", - "1407/1407 [==============================] - 21s 15ms/step - loss: 1.2335 - accuracy: 0.5566 - val_loss: 1.5325 - val_accuracy: 0.4680\n", - "Epoch 43/100\n", - "1407/1407 [==============================] - 22s 15ms/step - loss: 1.2257 - accuracy: 0.5580 - val_loss: 1.5342 - val_accuracy: 0.4722\n", - "Epoch 44/100\n", - "1407/1407 [==============================] - 22s 15ms/step - loss: 1.2173 - accuracy: 0.5606 - val_loss: 1.5537 - val_accuracy: 0.4580\n", - "Epoch 45/100\n", - "1407/1407 [==============================] - 22s 15ms/step - loss: 1.2082 - accuracy: 0.5643 - val_loss: 1.5200 - val_accuracy: 0.4770\n", - "Epoch 46/100\n", - "1407/1407 [==============================] - 22s 15ms/step - loss: 1.2037 - accuracy: 0.5666 - val_loss: 1.5349 - val_accuracy: 0.4716\n", - "Epoch 47/100\n", - "1407/1407 [==============================] - 21s 15ms/step - loss: 1.1935 - accuracy: 0.5673 - val_loss: 1.5299 - val_accuracy: 0.4774\n", - "Epoch 48/100\n", - "1407/1407 [==============================] - 22s 15ms/step - loss: 1.1884 - accuracy: 0.5720 - val_loss: 1.5361 - val_accuracy: 0.4724\n", - "Epoch 49/100\n", - "1407/1407 [==============================] - 22s 15ms/step - loss: 1.1769 - accuracy: 0.5766 - val_loss: 1.5410 - val_accuracy: 0.4720\n", - "Epoch 50/100\n", - "1407/1407 [==============================] - 21s 15ms/step - loss: 1.1782 - accuracy: 0.5752 - val_loss: 1.5620 - val_accuracy: 0.4714\n", - "Epoch 51/100\n", - "1407/1407 [==============================] - 22s 15ms/step - loss: 1.1649 - accuracy: 0.5804 - val_loss: 1.5364 - val_accuracy: 0.4710\n", - "Epoch 52/100\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "1407/1407 [==============================] - 22s 15ms/step - loss: 1.1599 - accuracy: 0.5818 - val_loss: 1.5514 - val_accuracy: 0.4658\n", - "Epoch 53/100\n", - "1407/1407 [==============================] - 22s 15ms/step - loss: 1.1511 - accuracy: 0.5853 - val_loss: 1.5512 - val_accuracy: 0.4798\n", - "Epoch 54/100\n", - "1407/1407 [==============================] - 22s 15ms/step - loss: 1.1485 - accuracy: 0.5887 - val_loss: 1.5754 - val_accuracy: 0.4648\n", - "Epoch 55/100\n", - "1407/1407 [==============================] - 21s 15ms/step - loss: 1.1398 - accuracy: 0.5905 - val_loss: 1.5335 - val_accuracy: 0.4806\n", - "Epoch 56/100\n", - "1407/1407 [==============================] - 22s 15ms/step - loss: 1.1352 - accuracy: 0.5909 - val_loss: 1.5603 - val_accuracy: 0.4732\n", - "Epoch 57/100\n", - "1407/1407 [==============================] - 22s 15ms/step - loss: 1.1285 - accuracy: 0.5933 - val_loss: 1.5535 - val_accuracy: 0.4824\n" - ] - }, - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 133, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "model.fit(X_train, y_train, epochs=100,\n", - " validation_data=(X_valid, y_valid),\n", - " callbacks=callbacks)" - ] - }, - { - "cell_type": "code", - "execution_count": 134, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "157/157 [==============================] - 0s 3ms/step - loss: 1.5014 - accuracy: 0.0882\n" - ] - }, - { - "data": { - "text/plain": [ - "[1.5013599395751953, 0.08820000290870667]" - ] - }, - "execution_count": 134, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "model = keras.models.load_model(\"my_cifar10_model.h5\")\n", - "model.evaluate(X_valid, y_valid)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "가장 낮은 검증 손실을 내는 모델은 검증 세트에서 약 47% 정확도를 얻었습니다. 이 검증 점수에 도달하는데 39번의 에포크가 걸렸습니다. (GPU가 없는) 제 노트북에서 에포크당 약 10초 정도 걸렸습니다. 배치 정규화를 사용해 성능을 올릴 수 있는지 확인해 보죠." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### c.\n", - "*문제: 배치 정규화를 추가하고 학습 곡선을 비교해보세요. 이전보다 빠르게 수렴하나요? 더 좋은 모델이 만들어지나요? 훈련 속도에는 어떤 영향을 미치나요?*" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "다음 코드는 위의 코드와 배우 비슷합니다. 몇 가지 다른 점은 아래와 같습니다:\n", - "\n", - "* 출력층을 제외하고 모든 `Dense` 층 다음에 (활성화 함수 전에) BN 층을 추가했습니다. 처음 은닉층 전에도 BN 층을 추가했습니다.\n", - "* 학습률을 5e-4로 바꾸었습니다. 1e-5, 3e-5, 5e-5, 1e-4, 3e-4, 5e-4, 1e-3, 3e-3를 시도해 보고 20번 에포크 후에 검증 세트 성능이 가장 좋은 것을 선택했습니다.\n", - "* run_logdir를 run_bn_* 으로 이름을 바꾸고 모델 파일 이름을 my_cifar10_bn_model.h5로 변경했습니다." - ] - }, - { - "cell_type": "code", - "execution_count": 135, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch 1/100\n", - " 2/1407 [..............................] - ETA: 9:29 - loss: 2.8693 - accuracy: 0.1094WARNING:tensorflow:Callbacks method `on_train_batch_end` is slow compared to the batch time (batch time: 0.0364s vs `on_train_batch_end` time: 0.7737s). Check your callbacks.\n", - "1407/1407 [==============================] - 51s 36ms/step - loss: 1.8431 - accuracy: 0.3390 - val_loss: 1.7148 - val_accuracy: 0.3886\n", - "Epoch 2/100\n", - "1407/1407 [==============================] - 49s 35ms/step - loss: 1.6690 - accuracy: 0.4046 - val_loss: 1.6174 - val_accuracy: 0.4144\n", - "Epoch 3/100\n", - "1407/1407 [==============================] - 49s 35ms/step - loss: 1.5972 - accuracy: 0.4320 - val_loss: 1.5171 - val_accuracy: 0.4478\n", - "Epoch 4/100\n", - "1407/1407 [==============================] - 50s 35ms/step - loss: 1.5463 - accuracy: 0.4495 - val_loss: 1.4883 - val_accuracy: 0.4688\n", - "Epoch 5/100\n", - "1407/1407 [==============================] - 50s 35ms/step - loss: 1.5051 - accuracy: 0.4641 - val_loss: 1.4369 - val_accuracy: 0.4892\n", - "Epoch 6/100\n", - "1407/1407 [==============================] - 49s 35ms/step - loss: 1.4684 - accuracy: 0.4793 - val_loss: 1.4056 - val_accuracy: 0.5018\n", - "Epoch 7/100\n", - "1407/1407 [==============================] - 49s 35ms/step - loss: 1.4350 - accuracy: 0.4895 - val_loss: 1.4292 - val_accuracy: 0.4888\n", - "Epoch 8/100\n", - "1407/1407 [==============================] - 49s 35ms/step - loss: 1.4087 - accuracy: 0.5006 - val_loss: 1.4021 - val_accuracy: 0.5088\n", - "Epoch 9/100\n", - "1407/1407 [==============================] - 49s 35ms/step - loss: 1.3834 - accuracy: 0.5095 - val_loss: 1.3738 - val_accuracy: 0.5110\n", - "Epoch 10/100\n", - "1407/1407 [==============================] - 49s 35ms/step - loss: 1.3645 - accuracy: 0.5167 - val_loss: 1.3432 - val_accuracy: 0.5252\n", - "Epoch 11/100\n", - "1407/1407 [==============================] - 49s 35ms/step - loss: 1.3428 - accuracy: 0.5258 - val_loss: 1.3583 - val_accuracy: 0.5132\n", - "Epoch 12/100\n", - "1407/1407 [==============================] - 49s 35ms/step - loss: 1.3227 - accuracy: 0.5316 - val_loss: 1.3820 - val_accuracy: 0.5052\n", - "Epoch 13/100\n", - "1407/1407 [==============================] - 48s 34ms/step - loss: 1.3010 - accuracy: 0.5371 - val_loss: 1.3794 - val_accuracy: 0.5094\n", - "Epoch 14/100\n", - "1407/1407 [==============================] - 49s 35ms/step - loss: 1.2838 - accuracy: 0.5446 - val_loss: 1.3531 - val_accuracy: 0.5260\n", - "Epoch 15/100\n", - "1407/1407 [==============================] - 49s 35ms/step - loss: 1.2621 - accuracy: 0.5548 - val_loss: 1.3641 - val_accuracy: 0.5256\n", - "Epoch 16/100\n", - "1407/1407 [==============================] - 49s 35ms/step - loss: 1.2535 - accuracy: 0.5572 - val_loss: 1.3720 - val_accuracy: 0.5276\n", - "Epoch 17/100\n", - "1407/1407 [==============================] - 49s 35ms/step - loss: 1.2355 - accuracy: 0.5609 - val_loss: 1.3184 - val_accuracy: 0.5348\n", - "Epoch 18/100\n", - "1407/1407 [==============================] - 49s 35ms/step - loss: 1.2164 - accuracy: 0.5685 - val_loss: 1.3487 - val_accuracy: 0.5296\n", - "Epoch 19/100\n", - "1407/1407 [==============================] - 49s 35ms/step - loss: 1.2037 - accuracy: 0.5770 - val_loss: 1.3278 - val_accuracy: 0.5366\n", - "Epoch 20/100\n", - "1407/1407 [==============================] - 49s 35ms/step - loss: 1.1916 - accuracy: 0.5789 - val_loss: 1.3592 - val_accuracy: 0.5260\n", - "Epoch 21/100\n", - "1407/1407 [==============================] - 49s 35ms/step - loss: 1.1782 - accuracy: 0.5848 - val_loss: 1.3478 - val_accuracy: 0.5302\n", - "Epoch 22/100\n", - "1407/1407 [==============================] - 49s 35ms/step - loss: 1.1587 - accuracy: 0.5913 - val_loss: 1.3477 - val_accuracy: 0.5308\n", - "Epoch 23/100\n", - "1407/1407 [==============================] - 49s 35ms/step - loss: 1.1481 - accuracy: 0.5933 - val_loss: 1.3285 - val_accuracy: 0.5378\n", - "Epoch 24/100\n", - "1407/1407 [==============================] - 49s 35ms/step - loss: 1.1395 - accuracy: 0.5989 - val_loss: 1.3393 - val_accuracy: 0.5388\n", - "Epoch 25/100\n", - "1407/1407 [==============================] - 49s 35ms/step - loss: 1.1285 - accuracy: 0.6044 - val_loss: 1.3436 - val_accuracy: 0.5354\n", - "Epoch 26/100\n", - "1407/1407 [==============================] - 49s 35ms/step - loss: 1.1080 - accuracy: 0.6085 - val_loss: 1.3496 - val_accuracy: 0.5258\n", - "Epoch 27/100\n", - "1407/1407 [==============================] - 49s 35ms/step - loss: 1.0971 - accuracy: 0.6143 - val_loss: 1.3484 - val_accuracy: 0.5350\n", - "Epoch 28/100\n", - "1407/1407 [==============================] - 49s 35ms/step - loss: 1.0978 - accuracy: 0.6121 - val_loss: 1.3698 - val_accuracy: 0.5274\n", - "Epoch 29/100\n", - "1407/1407 [==============================] - 49s 35ms/step - loss: 1.0825 - accuracy: 0.6198 - val_loss: 1.3416 - val_accuracy: 0.5348\n", - "Epoch 30/100\n", - "1407/1407 [==============================] - 49s 35ms/step - loss: 1.0698 - accuracy: 0.6219 - val_loss: 1.3363 - val_accuracy: 0.5366\n", - "Epoch 31/100\n", - "1407/1407 [==============================] - 49s 35ms/step - loss: 1.0569 - accuracy: 0.6262 - val_loss: 1.3536 - val_accuracy: 0.5356\n", - "Epoch 32/100\n", - "1407/1407 [==============================] - 49s 35ms/step - loss: 1.0489 - accuracy: 0.6306 - val_loss: 1.3822 - val_accuracy: 0.5220\n", - "Epoch 33/100\n", - "1407/1407 [==============================] - 49s 35ms/step - loss: 1.0387 - accuracy: 0.6338 - val_loss: 1.3633 - val_accuracy: 0.5404\n", - "Epoch 34/100\n", - "1407/1407 [==============================] - 49s 35ms/step - loss: 1.0342 - accuracy: 0.6344 - val_loss: 1.3611 - val_accuracy: 0.5364\n", - "Epoch 35/100\n", - "1407/1407 [==============================] - 49s 35ms/step - loss: 1.0163 - accuracy: 0.6422 - val_loss: 1.3904 - val_accuracy: 0.5356\n", - "Epoch 36/100\n", - "1407/1407 [==============================] - 49s 35ms/step - loss: 1.0137 - accuracy: 0.6421 - val_loss: 1.3795 - val_accuracy: 0.5408\n", - "Epoch 37/100\n", - "1407/1407 [==============================] - 49s 35ms/step - loss: 0.9991 - accuracy: 0.6491 - val_loss: 1.3334 - val_accuracy: 0.5444\n", - "157/157 [==============================] - 1s 5ms/step - loss: 1.3184 - accuracy: 0.1154\n" - ] - }, - { - "data": { - "text/plain": [ - "[1.3183687925338745, 0.11540000140666962]" - ] - }, - "execution_count": 135, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "keras.backend.clear_session()\n", - "tf.random.set_seed(42)\n", - "np.random.seed(42)\n", - "\n", - "model = keras.models.Sequential()\n", - "model.add(keras.layers.Flatten(input_shape=[32, 32, 3]))\n", - "model.add(keras.layers.BatchNormalization())\n", - "for _ in range(20):\n", - " model.add(keras.layers.Dense(100, kernel_initializer=\"he_normal\"))\n", - " model.add(keras.layers.BatchNormalization())\n", - " model.add(keras.layers.Activation(\"elu\"))\n", - "model.add(keras.layers.Dense(10, activation=\"softmax\"))\n", - "\n", - "optimizer = keras.optimizers.Nadam(lr=5e-4)\n", - "model.compile(loss=\"sparse_categorical_crossentropy\",\n", - " optimizer=optimizer,\n", - " metrics=[\"accuracy\"])\n", - "\n", - "early_stopping_cb = keras.callbacks.EarlyStopping(patience=20)\n", - "model_checkpoint_cb = keras.callbacks.ModelCheckpoint(\"my_cifar10_bn_model.h5\", save_best_only=True)\n", - "run_index = 1 # 모델을 훈련할 때마다 증가시킴\n", - "run_logdir = os.path.join(os.curdir, \"my_cifar10_logs\", \"run_bn_{:03d}\".format(run_index))\n", - "tensorboard_cb = keras.callbacks.TensorBoard(run_logdir)\n", - "callbacks = [early_stopping_cb, model_checkpoint_cb, tensorboard_cb]\n", - "\n", - "model.fit(X_train, y_train, epochs=100,\n", - " validation_data=(X_valid, y_valid),\n", - " callbacks=callbacks)\n", - "\n", - "model = keras.models.load_model(\"my_cifar10_bn_model.h5\")\n", - "model.evaluate(X_valid, y_valid)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "* *이전보다 빠르게 수렴하나요?* 훨씬 빠릅니다! 이전 모델은 가장 낮은 검증 손실에 도달하기 위해 39 에포크가 걸렸지만 BN을 사용한 새 모델은 18 에포크가 걸렸습니다. 이전 모델보다 두 배 이상 빠릅니다. BN 층은 훈련을 안정적으로 수행하고 더 큰 학습률을 사용할 수 있기 때문에 수렴이 빨라졌습니다.\n", - "* *BN이 더 좋은 모델을 만드나요?* 네! 최종 모델의 성능이 47%가 아니라 55% 정확도로 더 좋습니다. 이는 아주 좋은 모델이 아니지만 적어도 이전보다는 낫습니다(합성곱 신경망이 더 낫겠지만 이는 다른 주제입니다. 14장을 참고하세요).\n", - "* *BN이 훈련 속도에 영향을 미치나요?* 모델이 두 배나 빠르게 수렴했지만 각 에포크는 10초가 아니라 16초가 걸렸습니다. BN 층에서 추가된 계산 때문입니다. 따라서 전체적으로 에포크 횟수가 50% 정도 줄었지만 훈련 시간(탁상 시계 시간)은 30% 정도 줄었습니다. 결국 크게 향상되었습니다!" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### d.\n", - "*문제: 배치 정규화를 SELU로 바꾸어보세요. 네트워크가 자기 정규화하기 위해 필요한 변경 사항을 적용해보세요(즉, 입력 특성 표준화, 르쿤 정규분포 초기화, 완전 연결 층만 순차적으로 쌓은 심층 신경망 등).*" - ] - }, - { - "cell_type": "code", - "execution_count": 136, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch 1/100\n", - " 2/1407 [..............................] - ETA: 5:52 - loss: 3.0440 - accuracy: 0.1094WARNING:tensorflow:Callbacks method `on_train_batch_end` is slow compared to the batch time (batch time: 0.0169s vs `on_train_batch_end` time: 0.4852s). Check your callbacks.\n", - "1407/1407 [==============================] - 23s 16ms/step - loss: 1.9306 - accuracy: 0.3076 - val_loss: 1.8329 - val_accuracy: 0.3406\n", - "Epoch 2/100\n", - "1407/1407 [==============================] - 22s 16ms/step - loss: 1.7066 - accuracy: 0.3947 - val_loss: 1.7136 - val_accuracy: 0.3778\n", - "Epoch 3/100\n", - "1407/1407 [==============================] - 22s 16ms/step - loss: 1.6115 - accuracy: 0.4309 - val_loss: 1.6743 - val_accuracy: 0.3992\n", - "Epoch 4/100\n", - "1407/1407 [==============================] - 22s 16ms/step - loss: 1.5459 - accuracy: 0.4578 - val_loss: 1.6328 - val_accuracy: 0.4376\n", - "Epoch 5/100\n", - "1407/1407 [==============================] - 22s 16ms/step - loss: 1.4916 - accuracy: 0.4795 - val_loss: 1.6314 - val_accuracy: 0.4330\n", - "Epoch 6/100\n", - "1407/1407 [==============================] - 22s 16ms/step - loss: 1.4437 - accuracy: 0.4977 - val_loss: 1.5327 - val_accuracy: 0.4724\n", - "Epoch 7/100\n", - "1407/1407 [==============================] - 22s 16ms/step - loss: 1.4014 - accuracy: 0.5092 - val_loss: 1.5317 - val_accuracy: 0.4680\n", - "Epoch 8/100\n", - "1407/1407 [==============================] - 22s 16ms/step - loss: 1.3726 - accuracy: 0.5226 - val_loss: 1.4981 - val_accuracy: 0.4874\n", - "Epoch 9/100\n", - "1407/1407 [==============================] - 22s 16ms/step - loss: 1.3349 - accuracy: 0.5344 - val_loss: 1.5136 - val_accuracy: 0.4734\n", - "Epoch 10/100\n", - "1407/1407 [==============================] - 22s 15ms/step - loss: 1.2971 - accuracy: 0.5486 - val_loss: 1.5214 - val_accuracy: 0.4786\n", - "Epoch 11/100\n", - "1407/1407 [==============================] - 22s 16ms/step - loss: 1.2706 - accuracy: 0.5600 - val_loss: 1.5285 - val_accuracy: 0.4838\n", - "Epoch 12/100\n", - "1407/1407 [==============================] - 22s 16ms/step - loss: 1.2470 - accuracy: 0.5695 - val_loss: 1.4795 - val_accuracy: 0.4980\n", - "Epoch 13/100\n", - "1407/1407 [==============================] - 22s 16ms/step - loss: 1.2192 - accuracy: 0.5766 - val_loss: 1.4753 - val_accuracy: 0.4980\n", - "Epoch 14/100\n", - "1407/1407 [==============================] - 22s 16ms/step - loss: 1.1908 - accuracy: 0.5918 - val_loss: 1.4862 - val_accuracy: 0.4942\n", - "Epoch 15/100\n", - "1407/1407 [==============================] - 22s 16ms/step - loss: 1.1665 - accuracy: 0.5987 - val_loss: 1.5071 - val_accuracy: 0.5012\n", - "Epoch 16/100\n", - "1407/1407 [==============================] - 22s 16ms/step - loss: 1.1519 - accuracy: 0.6057 - val_loss: 1.5167 - val_accuracy: 0.5024\n", - "Epoch 17/100\n", - "1407/1407 [==============================] - 21s 15ms/step - loss: 1.1274 - accuracy: 0.6107 - val_loss: 1.5477 - val_accuracy: 0.4968\n", - "Epoch 18/100\n", - "1407/1407 [==============================] - 22s 16ms/step - loss: 1.1049 - accuracy: 0.6254 - val_loss: 1.5362 - val_accuracy: 0.5068\n", - "Epoch 19/100\n", - "1407/1407 [==============================] - 22s 15ms/step - loss: 1.0837 - accuracy: 0.6309 - val_loss: 1.5754 - val_accuracy: 0.5022\n", - "Epoch 20/100\n", - "1407/1407 [==============================] - 22s 16ms/step - loss: 1.0680 - accuracy: 0.6372 - val_loss: 1.5238 - val_accuracy: 0.5052\n", - "Epoch 21/100\n", - "1407/1407 [==============================] - 22s 16ms/step - loss: 1.0469 - accuracy: 0.6425 - val_loss: 1.5312 - val_accuracy: 0.5156\n", - "Epoch 22/100\n", - "1407/1407 [==============================] - 22s 16ms/step - loss: 1.0303 - accuracy: 0.6500 - val_loss: 1.5359 - val_accuracy: 0.5084\n", - "Epoch 23/100\n", - "1407/1407 [==============================] - 22s 15ms/step - loss: 1.0084 - accuracy: 0.6583 - val_loss: 1.5369 - val_accuracy: 0.5042\n", - "Epoch 24/100\n", - "1407/1407 [==============================] - 22s 16ms/step - loss: 0.9944 - accuracy: 0.6624 - val_loss: 1.6013 - val_accuracy: 0.5006\n", - "Epoch 25/100\n", - "1407/1407 [==============================] - 22s 16ms/step - loss: 0.9793 - accuracy: 0.6706 - val_loss: 1.5512 - val_accuracy: 0.5174\n", - "Epoch 26/100\n", - "1407/1407 [==============================] - 22s 15ms/step - loss: 0.9579 - accuracy: 0.6772 - val_loss: 1.6008 - val_accuracy: 0.4988\n", - "Epoch 27/100\n", - "1407/1407 [==============================] - 22s 16ms/step - loss: 0.9484 - accuracy: 0.6799 - val_loss: 1.5746 - val_accuracy: 0.5110\n", - "Epoch 28/100\n", - "1407/1407 [==============================] - 22s 16ms/step - loss: 48.1621 - accuracy: 0.6212 - val_loss: 1.6674 - val_accuracy: 0.4208\n", - "Epoch 29/100\n", - "1407/1407 [==============================] - 22s 16ms/step - loss: 1.2912 - accuracy: 0.5517 - val_loss: 1.5598 - val_accuracy: 0.4728\n", - "Epoch 30/100\n", - "1407/1407 [==============================] - 22s 16ms/step - loss: 1.2315 - accuracy: 0.5735 - val_loss: 1.5738 - val_accuracy: 0.4764\n", - "Epoch 31/100\n", - "1407/1407 [==============================] - 22s 15ms/step - loss: 1.1684 - accuracy: 0.5940 - val_loss: 1.5514 - val_accuracy: 0.4878\n", - "Epoch 32/100\n", - "1407/1407 [==============================] - 22s 15ms/step - loss: 1.1293 - accuracy: 0.6084 - val_loss: 1.5258 - val_accuracy: 0.4978\n", - "Epoch 33/100\n", - "1407/1407 [==============================] - 22s 15ms/step - loss: 1.1000 - accuracy: 0.6188 - val_loss: 1.5369 - val_accuracy: 0.4970\n", - "157/157 [==============================] - 1s 3ms/step - loss: 1.4753 - accuracy: 0.1256\n" - ] - }, - { - "data": { - "text/plain": [ - "[1.475338339805603, 0.12559999525547028]" - ] - }, - "execution_count": 136, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "keras.backend.clear_session()\n", - "tf.random.set_seed(42)\n", - "np.random.seed(42)\n", - "\n", - "model = keras.models.Sequential()\n", - "model.add(keras.layers.Flatten(input_shape=[32, 32, 3]))\n", - "for _ in range(20):\n", - " model.add(keras.layers.Dense(100,\n", - " kernel_initializer=\"lecun_normal\",\n", - " activation=\"selu\"))\n", - "model.add(keras.layers.Dense(10, activation=\"softmax\"))\n", - "\n", - "optimizer = keras.optimizers.Nadam(lr=7e-4)\n", - "model.compile(loss=\"sparse_categorical_crossentropy\",\n", - " optimizer=optimizer,\n", - " metrics=[\"accuracy\"])\n", - "\n", - "early_stopping_cb = keras.callbacks.EarlyStopping(patience=20)\n", - "model_checkpoint_cb = keras.callbacks.ModelCheckpoint(\"my_cifar10_selu_model.h5\", save_best_only=True)\n", - "run_index = 1 # 모델을 훈련할 때마다 증가시킴\n", - "run_logdir = os.path.join(os.curdir, \"my_cifar10_logs\", \"run_selu_{:03d}\".format(run_index))\n", - "tensorboard_cb = keras.callbacks.TensorBoard(run_logdir)\n", - "callbacks = [early_stopping_cb, model_checkpoint_cb, tensorboard_cb]\n", - "\n", - "X_means = X_train.mean(axis=0)\n", - "X_stds = X_train.std(axis=0)\n", - "X_train_scaled = (X_train - X_means) / X_stds\n", - "X_valid_scaled = (X_valid - X_means) / X_stds\n", - "X_test_scaled = (X_test - X_means) / X_stds\n", - "\n", - "model.fit(X_train_scaled, y_train, epochs=100,\n", - " validation_data=(X_valid_scaled, y_valid),\n", - " callbacks=callbacks)\n", - "\n", - "model = keras.models.load_model(\"my_cifar10_selu_model.h5\")\n", - "model.evaluate(X_valid_scaled, y_valid)" - ] - }, - { - "cell_type": "code", - "execution_count": 137, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "157/157 [==============================] - 1s 3ms/step - loss: 1.4753 - accuracy: 0.1256\n" - ] - }, - { - "data": { - "text/plain": [ - "[1.475338339805603, 0.12559999525547028]" - ] - }, - "execution_count": 137, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "model = keras.models.load_model(\"my_cifar10_selu_model.h5\")\n", - "model.evaluate(X_valid_scaled, y_valid)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "51.4% 정확도를 얻었습니다. 원래 모델보다 더 좋습니다. 하지만 배치 정규화를 사용한 모델만큼 좋지는 않습니다. 최고의 모델에 도달하는데 13 에포크가 걸렸습니다. 이는 원본 모델이나 BN 모델보다 더 빠른 것입니다. 각 에포크는 원본 모델처럼 10초만 걸렸습니다. 따라서 이 모델이 지금까지 가장 빠른 모델입니다(에포크와 탁상 시계 기준으로)." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### e.\n", - "*문제: 알파 드롭아웃으로 모델에 규제를 적용해보세요. 그다음 모델을 다시 훈련하지 않고 MC 드롭아웃으로 더 높은 정확도를 얻을 수 있는지 확인해보세요.*" - ] - }, - { - "cell_type": "code", - "execution_count": 138, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch 1/100\n", - " 2/1407 [..............................] - ETA: 4:07 - loss: 2.9857 - accuracy: 0.0938WARNING:tensorflow:Callbacks method `on_train_batch_end` is slow compared to the batch time (batch time: 0.0168s vs `on_train_batch_end` time: 0.3359s). Check your callbacks.\n", - "1407/1407 [==============================] - 23s 17ms/step - loss: 1.8896 - accuracy: 0.3275 - val_loss: 1.7313 - val_accuracy: 0.3970\n", - "Epoch 2/100\n", - "1407/1407 [==============================] - 23s 16ms/step - loss: 1.6589 - accuracy: 0.4157 - val_loss: 1.7183 - val_accuracy: 0.3916\n", - "Epoch 3/100\n", - "1407/1407 [==============================] - 23s 16ms/step - loss: 1.5727 - accuracy: 0.4479 - val_loss: 1.6073 - val_accuracy: 0.4364\n", - "Epoch 4/100\n", - "1407/1407 [==============================] - 23s 16ms/step - loss: 1.5085 - accuracy: 0.4734 - val_loss: 1.5741 - val_accuracy: 0.4524\n", - "Epoch 5/100\n", - "1407/1407 [==============================] - 23s 16ms/step - loss: 1.4525 - accuracy: 0.4946 - val_loss: 1.5663 - val_accuracy: 0.4592\n", - "Epoch 6/100\n", - "1407/1407 [==============================] - 23s 16ms/step - loss: 1.4032 - accuracy: 0.5124 - val_loss: 1.5255 - val_accuracy: 0.4644\n", - "Epoch 7/100\n", - "1407/1407 [==============================] - 22s 16ms/step - loss: 1.3581 - accuracy: 0.5255 - val_loss: 1.6598 - val_accuracy: 0.4662\n", - "Epoch 8/100\n", - "1407/1407 [==============================] - 22s 16ms/step - loss: 1.3209 - accuracy: 0.5400 - val_loss: 1.5027 - val_accuracy: 0.5002\n", - "Epoch 9/100\n", - "1407/1407 [==============================] - 22s 16ms/step - loss: 1.2845 - accuracy: 0.5562 - val_loss: 1.5246 - val_accuracy: 0.4896\n", - "Epoch 10/100\n", - "1407/1407 [==============================] - 22s 16ms/step - loss: 1.2526 - accuracy: 0.5659 - val_loss: 1.5510 - val_accuracy: 0.4956\n", - "Epoch 11/100\n", - "1407/1407 [==============================] - 22s 16ms/step - loss: 1.2160 - accuracy: 0.5808 - val_loss: 1.5559 - val_accuracy: 0.5002\n", - "Epoch 12/100\n", - "1407/1407 [==============================] - 22s 16ms/step - loss: 1.1902 - accuracy: 0.5900 - val_loss: 1.5478 - val_accuracy: 0.4968\n", - "Epoch 13/100\n", - "1407/1407 [==============================] - 22s 16ms/step - loss: 1.1602 - accuracy: 0.6021 - val_loss: 1.5727 - val_accuracy: 0.5124\n", - "Epoch 14/100\n", - "1407/1407 [==============================] - 22s 16ms/step - loss: 1.1392 - accuracy: 0.6102 - val_loss: 1.5654 - val_accuracy: 0.4944\n", - "Epoch 15/100\n", - "1407/1407 [==============================] - 22s 16ms/step - loss: 1.1086 - accuracy: 0.6210 - val_loss: 1.5868 - val_accuracy: 0.5064\n", - "Epoch 16/100\n", - "1407/1407 [==============================] - 22s 16ms/step - loss: 1.0856 - accuracy: 0.6289 - val_loss: 1.6016 - val_accuracy: 0.5042\n", - "Epoch 17/100\n", - "1407/1407 [==============================] - 22s 16ms/step - loss: 1.0620 - accuracy: 0.6397 - val_loss: 1.6458 - val_accuracy: 0.4968\n", - "Epoch 18/100\n", - "1407/1407 [==============================] - 22s 16ms/step - loss: 1.0511 - accuracy: 0.6405 - val_loss: 1.6276 - val_accuracy: 0.5096\n", - "Epoch 19/100\n", - "1407/1407 [==============================] - 22s 16ms/step - loss: 1.0203 - accuracy: 0.6514 - val_loss: 1.7246 - val_accuracy: 0.5062\n", - "Epoch 20/100\n", - "1407/1407 [==============================] - 22s 16ms/step - loss: 1.0024 - accuracy: 0.6598 - val_loss: 1.6570 - val_accuracy: 0.5064\n", - "Epoch 21/100\n", - "1407/1407 [==============================] - 22s 16ms/step - loss: 0.9845 - accuracy: 0.6662 - val_loss: 1.6697 - val_accuracy: 0.4990\n", - "Epoch 22/100\n", - "1407/1407 [==============================] - 22s 16ms/step - loss: 0.9641 - accuracy: 0.6738 - val_loss: 1.7560 - val_accuracy: 0.5010\n", - "Epoch 23/100\n", - "1407/1407 [==============================] - 22s 16ms/step - loss: 0.9387 - accuracy: 0.6797 - val_loss: 1.7716 - val_accuracy: 0.5008\n", - "Epoch 24/100\n", - "1407/1407 [==============================] - 22s 16ms/step - loss: 0.9290 - accuracy: 0.6852 - val_loss: 1.7688 - val_accuracy: 0.5026\n", - "Epoch 25/100\n", - "1407/1407 [==============================] - 22s 16ms/step - loss: 0.9176 - accuracy: 0.6899 - val_loss: 1.8131 - val_accuracy: 0.5042\n", - "Epoch 26/100\n", - "1407/1407 [==============================] - 22s 16ms/step - loss: 0.8925 - accuracy: 0.6986 - val_loss: 1.8228 - val_accuracy: 0.4904\n", - "Epoch 27/100\n", - "1407/1407 [==============================] - 22s 16ms/step - loss: 0.8680 - accuracy: 0.7060 - val_loss: 1.8546 - val_accuracy: 0.5048\n", - "Epoch 28/100\n", - "1407/1407 [==============================] - 22s 16ms/step - loss: 0.8638 - accuracy: 0.7091 - val_loss: 1.8004 - val_accuracy: 0.4954\n", - "157/157 [==============================] - 1s 3ms/step - loss: 1.5027 - accuracy: 0.0914\n" - ] - }, - { - "data": { - "text/plain": [ - "[1.5026599168777466, 0.09139999747276306]" - ] - }, - "execution_count": 138, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "keras.backend.clear_session()\n", - "tf.random.set_seed(42)\n", - "np.random.seed(42)\n", - "\n", - "model = keras.models.Sequential()\n", - "model.add(keras.layers.Flatten(input_shape=[32, 32, 3]))\n", - "for _ in range(20):\n", - " model.add(keras.layers.Dense(100,\n", - " kernel_initializer=\"lecun_normal\",\n", - " activation=\"selu\"))\n", - "\n", - "model.add(keras.layers.AlphaDropout(rate=0.1))\n", - "model.add(keras.layers.Dense(10, activation=\"softmax\"))\n", - "\n", - "optimizer = keras.optimizers.Nadam(lr=5e-4)\n", - "model.compile(loss=\"sparse_categorical_crossentropy\",\n", - " optimizer=optimizer,\n", - " metrics=[\"accuracy\"])\n", - "\n", - "early_stopping_cb = keras.callbacks.EarlyStopping(patience=20)\n", - "model_checkpoint_cb = keras.callbacks.ModelCheckpoint(\"my_cifar10_alpha_dropout_model.h5\", save_best_only=True)\n", - "run_index = 1 # 모델을 훈련할 때마다 증가시킴\n", - "run_logdir = os.path.join(os.curdir, \"my_cifar10_logs\", \"run_alpha_dropout_{:03d}\".format(run_index))\n", - "tensorboard_cb = keras.callbacks.TensorBoard(run_logdir)\n", - "callbacks = [early_stopping_cb, model_checkpoint_cb, tensorboard_cb]\n", - "\n", - "X_means = X_train.mean(axis=0)\n", - "X_stds = X_train.std(axis=0)\n", - "X_train_scaled = (X_train - X_means) / X_stds\n", - "X_valid_scaled = (X_valid - X_means) / X_stds\n", - "X_test_scaled = (X_test - X_means) / X_stds\n", - "\n", - "model.fit(X_train_scaled, y_train, epochs=100,\n", - " validation_data=(X_valid_scaled, y_valid),\n", - " callbacks=callbacks)\n", - "\n", - "model = keras.models.load_model(\"my_cifar10_alpha_dropout_model.h5\")\n", - "model.evaluate(X_valid_scaled, y_valid)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "이 모델은 검증 세트에서 50.8% 정확도에 도달합니다. 드롭아웃이 없을 때보다(51.4%) 조금 더 나쁩니다. 하이퍼파라미터 탐색을 좀 많이 수행해 보면 더 나아 질 수 있습니다(드롭아웃 비율 5%, 10%, 20%, 40%과 학습률 1e-4, 3e-4, 5e-4, 1e-3을 시도했습니다). 하지만 이 경우에는 크지 않을 것 같습니다." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "이제 MC 드롭아웃을 사용해 보죠. 앞서 사용한 `MCAlphaDropout` 클래스를 복사해 사용하겠습니다:" - ] - }, - { - "cell_type": "code", - "execution_count": 139, - "metadata": {}, - "outputs": [], - "source": [ - "class MCAlphaDropout(keras.layers.AlphaDropout):\n", - " def call(self, inputs):\n", - " return super().call(inputs, training=True)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "방금 훈련했던 모델과 (같은 가중치를 가진) 동일한 새로운 모델을 만들어 보죠. 하지만 `AlphaDropout` 층 대신 `MCAlphaDropout` 드롭아웃 층을 사용합니다:" - ] - }, - { - "cell_type": "code", - "execution_count": 140, - "metadata": {}, - "outputs": [], - "source": [ - "mc_model = keras.models.Sequential([\n", - " MCAlphaDropout(layer.rate) if isinstance(layer, keras.layers.AlphaDropout) else layer\n", - " for layer in model.layers\n", - "])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "그다음 몇 가지 유틸리티 함수를 추가합니다. 첫 번째 함수는 모델을 여러 번 실행합니다(기본적으로 10번). 그다음 평균한 예측 클래스 확률을 반환합니다. 두 번째 함수는 이 평균 확률을 사용해 각 샘플의 클래스를 예측합니다:" - ] - }, - { - "cell_type": "code", - "execution_count": 141, - "metadata": {}, - "outputs": [], - "source": [ - "def mc_dropout_predict_probas(mc_model, X, n_samples=10):\n", - " Y_probas = [mc_model.predict(X) for sample in range(n_samples)]\n", - " return np.mean(Y_probas, axis=0)\n", - "\n", - "def mc_dropout_predict_classes(mc_model, X, n_samples=10):\n", - " Y_probas = mc_dropout_predict_probas(mc_model, X, n_samples)\n", - " return np.argmax(Y_probas, axis=1)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "이제 검증 세트의 모든 샘플에 대해 예측을 만들고 정확도를 계산해 보죠:" - ] - }, - { - "cell_type": "code", - "execution_count": 142, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0.497" - ] - }, - "execution_count": 142, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "keras.backend.clear_session()\n", - "tf.random.set_seed(42)\n", - "np.random.seed(42)\n", - "\n", - "y_pred = mc_dropout_predict_classes(mc_model, X_valid_scaled)\n", - "accuracy = np.mean(y_pred == y_valid[:, 0])\n", - "accuracy" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "이 경우에는 실제적인 정확도 향상이 없습니다(50.8%에서 50.9%).\n", - "\n", - "따라서 이 연습문에서 얻은 최상의 모델은 배치 정규화 모델입니다." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### f.\n", - "*문제: 1사이클 스케줄링으로 모델을 다시 훈련하고 훈련 속도와 모델 정확도가 향상되는지 확인해보세요.*" - ] - }, - { - "cell_type": "code", - "execution_count": 143, - "metadata": {}, - "outputs": [], - "source": [ - "keras.backend.clear_session()\n", - "tf.random.set_seed(42)\n", - "np.random.seed(42)\n", - "\n", - "model = keras.models.Sequential()\n", - "model.add(keras.layers.Flatten(input_shape=[32, 32, 3]))\n", - "for _ in range(20):\n", - " model.add(keras.layers.Dense(100,\n", - " kernel_initializer=\"lecun_normal\",\n", - " activation=\"selu\"))\n", - "\n", - "model.add(keras.layers.AlphaDropout(rate=0.1))\n", - "model.add(keras.layers.Dense(10, activation=\"softmax\"))\n", - "\n", - "optimizer = keras.optimizers.SGD(lr=1e-3)\n", - "model.compile(loss=\"sparse_categorical_crossentropy\",\n", - " optimizer=optimizer,\n", - " metrics=[\"accuracy\"])" - ] - }, - { - "cell_type": "code", - "execution_count": 144, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "352/352 [==============================] - 2s 7ms/step - loss: nan - accuracy: 0.1399\n" - ] - }, - { - "data": { - "text/plain": [ - "[1e-05, 9.999868, 2.6130447387695312, 4.006446089063372]" - ] - }, - "execution_count": 144, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], - "source": [ - "batch_size = 128\n", - "rates, losses = find_learning_rate(model, X_train_scaled, y_train, epochs=1, batch_size=batch_size)\n", - "plot_lr_vs_loss(rates, losses)\n", - "plt.axis([min(rates), max(rates), min(losses), (losses[0] + min(losses)) / 1.4])" - ] - }, - { - "cell_type": "code", - "execution_count": 145, - "metadata": {}, - "outputs": [], - "source": [ - "keras.backend.clear_session()\n", - "tf.random.set_seed(42)\n", - "np.random.seed(42)\n", - "\n", - "model = keras.models.Sequential()\n", - "model.add(keras.layers.Flatten(input_shape=[32, 32, 3]))\n", - "for _ in range(20):\n", - " model.add(keras.layers.Dense(100,\n", - " kernel_initializer=\"lecun_normal\",\n", - " activation=\"selu\"))\n", - "\n", - "model.add(keras.layers.AlphaDropout(rate=0.1))\n", - "model.add(keras.layers.Dense(10, activation=\"softmax\"))\n", - "\n", - "optimizer = keras.optimizers.SGD(lr=1e-2)\n", - "model.compile(loss=\"sparse_categorical_crossentropy\",\n", - " optimizer=optimizer,\n", - " metrics=[\"accuracy\"])" - ] - }, - { - "cell_type": "code", - "execution_count": 146, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch 1/15\n", - "352/352 [==============================] - 3s 9ms/step - loss: 2.0537 - accuracy: 0.2843 - val_loss: 1.7811 - val_accuracy: 0.3744\n", - "Epoch 2/15\n", - "352/352 [==============================] - 3s 7ms/step - loss: 1.7635 - accuracy: 0.3765 - val_loss: 1.6431 - val_accuracy: 0.4252\n", - "Epoch 3/15\n", - "352/352 [==============================] - 3s 7ms/step - loss: 1.6241 - accuracy: 0.4217 - val_loss: 1.6001 - val_accuracy: 0.4368\n", - "Epoch 4/15\n", - "352/352 [==============================] - 3s 7ms/step - loss: 1.5434 - accuracy: 0.4520 - val_loss: 1.6114 - val_accuracy: 0.4310\n", - "Epoch 5/15\n", - "352/352 [==============================] - 3s 7ms/step - loss: 1.4914 - accuracy: 0.4710 - val_loss: 1.5895 - val_accuracy: 0.4434\n", - "Epoch 6/15\n", - "352/352 [==============================] - 3s 7ms/step - loss: 1.4510 - accuracy: 0.4818 - val_loss: 1.5678 - val_accuracy: 0.4506\n", - "Epoch 7/15\n", - "352/352 [==============================] - 3s 7ms/step - loss: 1.4143 - accuracy: 0.4979 - val_loss: 1.6717 - val_accuracy: 0.4294\n", - "Epoch 8/15\n", - "352/352 [==============================] - 3s 7ms/step - loss: 1.3462 - accuracy: 0.5199 - val_loss: 1.4928 - val_accuracy: 0.4956\n", - "Epoch 9/15\n", - "352/352 [==============================] - 3s 7ms/step - loss: 1.2691 - accuracy: 0.5481 - val_loss: 1.5294 - val_accuracy: 0.4818\n", - "Epoch 10/15\n", - "352/352 [==============================] - 3s 7ms/step - loss: 1.1994 - accuracy: 0.5713 - val_loss: 1.5165 - val_accuracy: 0.4978\n", - "Epoch 11/15\n", - "352/352 [==============================] - 3s 7ms/step - loss: 1.1308 - accuracy: 0.5980 - val_loss: 1.5070 - val_accuracy: 0.5100\n", - "Epoch 12/15\n", - "352/352 [==============================] - 3s 7ms/step - loss: 1.0632 - accuracy: 0.6184 - val_loss: 1.4833 - val_accuracy: 0.5244\n", - "Epoch 13/15\n", - "352/352 [==============================] - 3s 7ms/step - loss: 0.9932 - accuracy: 0.6447 - val_loss: 1.5314 - val_accuracy: 0.5292\n", - "Epoch 14/15\n", - "352/352 [==============================] - 3s 7ms/step - loss: 0.9279 - accuracy: 0.6671 - val_loss: 1.5495 - val_accuracy: 0.5248\n", - "Epoch 15/15\n", - "352/352 [==============================] - 3s 7ms/step - loss: 0.8880 - accuracy: 0.6845 - val_loss: 1.5840 - val_accuracy: 0.5288\n" - ] + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "YT0En5VzLOo4" + }, + "source": [ + "**11장 – 심층 신경망 훈련하기**" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "fmWAAqyeLOo9" + }, + "source": [ + "_이 노트북은 11장에 있는 모든 샘플 코드와 연습문제 해답을 가지고 있습니다._" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "mYZM1mKhLOo9" + }, + "source": [ + "\n", + " \n", + "
\n", + " 구글 코랩에서 실행하기\n", + "
" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "4mgMAdZzLOo-" + }, + "source": [ + "# 설정" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "EnYYn3E9LOo-" + }, + "source": [ + "먼저 몇 개의 모듈을 임포트합니다. 맷플롯립 그래프를 인라인으로 출력하도록 만들고 그림을 저장하는 함수를 준비합니다. 또한 파이썬 버전이 3.5 이상인지 확인합니다(파이썬 2.x에서도 동작하지만 곧 지원이 중단되므로 파이썬 3을 사용하는 것이 좋습니다). 사이킷런 버전이 0.20 이상인지와 텐서플로 버전이 2.0 이상인지 확인합니다." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "M1cwJnMALOo-" + }, + "source": [ + "# 파이썬 ≥3.5 필수\n", + "import sys\n", + "assert sys.version_info >= (3, 5)\n", + "\n", + "# 사이킷런 ≥0.20 필수\n", + "import sklearn\n", + "assert sklearn.__version__ >= \"0.20\"\n", + "\n", + "# 텐서플로 ≥2.0 필수\n", + "import tensorflow as tf\n", + "from tensorflow import keras\n", + "assert tf.__version__ >= \"2.0\"\n", + "\n", + "%load_ext tensorboard\n", + "\n", + "# 공통 모듈 임포트\n", + "import numpy as np\n", + "import os\n", + "\n", + "# 노트북 실행 결과를 동일하게 유지하기 위해\n", + "np.random.seed(42)\n", + "\n", + "# 깔끔한 그래프 출력을 위해\n", + "%matplotlib inline\n", + "import matplotlib as mpl\n", + "import matplotlib.pyplot as plt\n", + "mpl.rc('axes', labelsize=14)\n", + "mpl.rc('xtick', labelsize=12)\n", + "mpl.rc('ytick', labelsize=12)\n", + "\n", + "# 그림을 저장할 위치\n", + "PROJECT_ROOT_DIR = \".\"\n", + "CHAPTER_ID = \"deep\"\n", + "IMAGES_PATH = os.path.join(PROJECT_ROOT_DIR, \"images\", CHAPTER_ID)\n", + "os.makedirs(IMAGES_PATH, exist_ok=True)\n", + "\n", + "def save_fig(fig_id, tight_layout=True, fig_extension=\"png\", resolution=300):\n", + " path = os.path.join(IMAGES_PATH, fig_id + \".\" + fig_extension)\n", + " print(\"그림 저장:\", fig_id)\n", + " if tight_layout:\n", + " plt.tight_layout()\n", + " plt.savefig(path, format=fig_extension, dpi=resolution)" + ], + "execution_count": 1, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Uts4Q0qDLOpA" + }, + "source": [ + "# 그레이디언트 소실과 폭주 문제" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "C27geIKhLOpA" + }, + "source": [ + "def logit(z):\n", + " return 1 / (1 + np.exp(-z))" + ], + "execution_count": 2, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "YnIN46vHLOpA", + "outputId": "58f5e1b1-9ff7-46fd-9779-c7eb28f29c1c", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 315 + } + }, + "source": [ + "z = np.linspace(-5, 5, 200)\n", + "\n", + "plt.plot([-5, 5], [0, 0], 'k-')\n", + "plt.plot([-5, 5], [1, 1], 'k--')\n", + "plt.plot([0, 0], [-0.2, 1.2], 'k-')\n", + "plt.plot([-5, 5], [-3/4, 7/4], 'g--')\n", + "plt.plot(z, logit(z), \"b-\", linewidth=2)\n", + "props = dict(facecolor='black', shrink=0.1)\n", + "plt.annotate('Saturating', xytext=(3.5, 0.7), xy=(5, 1), arrowprops=props, fontsize=14, ha=\"center\")\n", + "plt.annotate('Saturating', xytext=(-3.5, 0.3), xy=(-5, 0), arrowprops=props, fontsize=14, ha=\"center\")\n", + "plt.annotate('Linear', xytext=(2, 0.2), xy=(0, 0.5), arrowprops=props, fontsize=14, ha=\"center\")\n", + "plt.grid(True)\n", + "plt.title(\"Sigmoid activation function\", fontsize=14)\n", + "plt.axis([-5, 5, -0.2, 1.2])\n", + "\n", + "save_fig(\"sigmoid_saturation_plot\")\n", + "plt.show()" + ], + "execution_count": 3, + "outputs": [ + { + "output_type": "stream", + "text": [ + "그림 저장: sigmoid_saturation_plot\n" + ], + "name": "stdout" + }, + { + "output_type": "display_data", + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "tags": [], + "needs_background": "light" + } + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "z9my8UBcLOpB" + }, + "source": [ + "## Xavier 초기화와 He 초기화" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "P0IEVGXYLOpC", + "outputId": "d9bbf038-78f4-43bc-8b80-0e352849cea5", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "[name for name in dir(keras.initializers) if not name.startswith(\"_\")]" + ], + "execution_count": 4, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "['Constant',\n", + " 'GlorotNormal',\n", + " 'GlorotUniform',\n", + " 'HeNormal',\n", + " 'HeUniform',\n", + " 'Identity',\n", + " 'Initializer',\n", + " 'LecunNormal',\n", + " 'LecunUniform',\n", + " 'Ones',\n", + " 'Orthogonal',\n", + " 'RandomNormal',\n", + " 'RandomUniform',\n", + " 'TruncatedNormal',\n", + " 'VarianceScaling',\n", + " 'Zeros',\n", + " 'constant',\n", + " 'deserialize',\n", + " 'get',\n", + " 'glorot_normal',\n", + " 'glorot_uniform',\n", + " 'he_normal',\n", + " 'he_uniform',\n", + " 'identity',\n", + " 'lecun_normal',\n", + " 'lecun_uniform',\n", + " 'ones',\n", + " 'orthogonal',\n", + " 'random_normal',\n", + " 'random_uniform',\n", + " 'serialize',\n", + " 'truncated_normal',\n", + " 'variance_scaling',\n", + " 'zeros']" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 4 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "kBczZ42qLOpC", + "outputId": "72b19158-02f2-4689-b333-3cad5f5ec4bb", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "keras.layers.Dense(10, activation=\"relu\", kernel_initializer=\"he_normal\")" + ], + "execution_count": 5, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 5 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "iSo-rN5kLOpC", + "outputId": "7fb5008f-e89b-48b9-d6f9-5f7bb5f76f8d", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "init = keras.initializers.VarianceScaling(scale=2., mode='fan_avg',\n", + " distribution='uniform')\n", + "keras.layers.Dense(10, activation=\"relu\", kernel_initializer=init)" + ], + "execution_count": 6, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 6 + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ehWhAiMGLOpC" + }, + "source": [ + "## 수렴하지 않는 활성화 함수" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Xm1OvmTKLOpD" + }, + "source": [ + "### LeakyReLU" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "WPfFeM26LOpD" + }, + "source": [ + "def leaky_relu(z, alpha=0.01):\n", + " return np.maximum(alpha*z, z)" + ], + "execution_count": 7, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "mmZMEcRNLOpD", + "outputId": "f26e717a-01f4-436f-d839-57520655d516", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 315 + } + }, + "source": [ + "plt.plot(z, leaky_relu(z, 0.05), \"b-\", linewidth=2)\n", + "plt.plot([-5, 5], [0, 0], 'k-')\n", + "plt.plot([0, 0], [-0.5, 4.2], 'k-')\n", + "plt.grid(True)\n", + "props = dict(facecolor='black', shrink=0.1)\n", + "plt.annotate('Leak', xytext=(-3.5, 0.5), xy=(-5, -0.2), arrowprops=props, fontsize=14, ha=\"center\")\n", + "plt.title(\"Leaky ReLU activation function\", fontsize=14)\n", + "plt.axis([-5, 5, -0.5, 4.2])\n", + "\n", + "save_fig(\"leaky_relu_plot\")\n", + "plt.show()" + ], + "execution_count": 8, + "outputs": [ + { + "output_type": "stream", + "text": [ + "그림 저장: leaky_relu_plot\n" + ], + "name": "stdout" + }, + { + "output_type": "display_data", + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "tags": [], + "needs_background": "light" + } + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "c34zBGD7LOpD", + "outputId": "f4b860b4-3a2d-4a12-e458-de2efaa93051", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "[m for m in dir(keras.activations) if not m.startswith(\"_\")]" + ], + "execution_count": 9, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "['deserialize',\n", + " 'elu',\n", + " 'exponential',\n", + " 'gelu',\n", + " 'get',\n", + " 'hard_sigmoid',\n", + " 'linear',\n", + " 'relu',\n", + " 'selu',\n", + " 'serialize',\n", + " 'sigmoid',\n", + " 'softmax',\n", + " 'softplus',\n", + " 'softsign',\n", + " 'swish',\n", + " 'tanh']" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 9 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "eD78rel6LOpD", + "outputId": "777d81bf-2701-4898-96c1-e9f18627cf82", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "[m for m in dir(keras.layers) if \"relu\" in m.lower()]" + ], + "execution_count": 10, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "['LeakyReLU', 'PReLU', 'ReLU', 'ThresholdedReLU']" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 10 + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "zT5BPUCELOpE" + }, + "source": [ + "LeakyReLU를 사용해 패션 MNIST에서 신경망을 훈련해 보죠:" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "aW_rOsgrLOpE", + "outputId": "43769acc-8742-4108-fe80-83acde336789", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "(X_train_full, y_train_full), (X_test, y_test) = keras.datasets.fashion_mnist.load_data()\n", + "X_train_full = X_train_full / 255.0\n", + "X_test = X_test / 255.0\n", + "X_valid, X_train = X_train_full[:5000], X_train_full[5000:]\n", + "y_valid, y_train = y_train_full[:5000], y_train_full[5000:]" + ], + "execution_count": 11, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-labels-idx1-ubyte.gz\n", + "32768/29515 [=================================] - 0s 0us/step\n", + "Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-images-idx3-ubyte.gz\n", + "26427392/26421880 [==============================] - 0s 0us/step\n", + "Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-labels-idx1-ubyte.gz\n", + "8192/5148 [===============================================] - 0s 0us/step\n", + "Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-images-idx3-ubyte.gz\n", + "4423680/4422102 [==============================] - 0s 0us/step\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "zvSaZ4qGLOpE" + }, + "source": [ + "tf.random.set_seed(42)\n", + "np.random.seed(42)\n", + "\n", + "model = keras.models.Sequential([\n", + " keras.layers.Flatten(input_shape=[28, 28]),\n", + " keras.layers.Dense(300, kernel_initializer=\"he_normal\"),\n", + " keras.layers.LeakyReLU(),\n", + " keras.layers.Dense(100, kernel_initializer=\"he_normal\"),\n", + " keras.layers.LeakyReLU(),\n", + " keras.layers.Dense(10, activation=\"softmax\")\n", + "])" + ], + "execution_count": 12, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "8iTzAFZ5LOpE" + }, + "source": [ + "model.compile(loss=\"sparse_categorical_crossentropy\",\n", + " optimizer=keras.optimizers.SGD(lr=1e-3),\n", + " metrics=[\"accuracy\"])" + ], + "execution_count": 13, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "scrolled": true, + "id": "JqVTPhfHLOpE", + "outputId": "9efa9999-4ff7-44b9-94b0-9e5bbd8c1a61", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "history = model.fit(X_train, y_train, epochs=10,\n", + " validation_data=(X_valid, y_valid))" + ], + "execution_count": 14, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Epoch 1/10\n", + "1719/1719 [==============================] - 5s 2ms/step - loss: 1.6314 - accuracy: 0.5054 - val_loss: 0.8886 - val_accuracy: 0.7160\n", + "Epoch 2/10\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.8416 - accuracy: 0.7247 - val_loss: 0.7130 - val_accuracy: 0.7656\n", + "Epoch 3/10\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.7053 - accuracy: 0.7637 - val_loss: 0.6427 - val_accuracy: 0.7898\n", + "Epoch 4/10\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.6325 - accuracy: 0.7908 - val_loss: 0.5900 - val_accuracy: 0.8066\n", + "Epoch 5/10\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.5992 - accuracy: 0.8021 - val_loss: 0.5582 - val_accuracy: 0.8198\n", + "Epoch 6/10\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.5624 - accuracy: 0.8142 - val_loss: 0.5350 - val_accuracy: 0.8238\n", + "Epoch 7/10\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.5379 - accuracy: 0.8217 - val_loss: 0.5156 - val_accuracy: 0.8304\n", + "Epoch 8/10\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.5152 - accuracy: 0.8296 - val_loss: 0.5079 - val_accuracy: 0.8284\n", + "Epoch 9/10\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.5100 - accuracy: 0.8270 - val_loss: 0.4895 - val_accuracy: 0.8388\n", + "Epoch 10/10\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.4918 - accuracy: 0.8339 - val_loss: 0.4817 - val_accuracy: 0.8398\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "O4AS1dmTLOpE" + }, + "source": [ + "PReLU를 테스트해 보죠:" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "eLqcJVxfLOpF" + }, + "source": [ + "tf.random.set_seed(42)\n", + "np.random.seed(42)\n", + "\n", + "model = keras.models.Sequential([\n", + " keras.layers.Flatten(input_shape=[28, 28]),\n", + " keras.layers.Dense(300, kernel_initializer=\"he_normal\"),\n", + " keras.layers.PReLU(),\n", + " keras.layers.Dense(100, kernel_initializer=\"he_normal\"),\n", + " keras.layers.PReLU(),\n", + " keras.layers.Dense(10, activation=\"softmax\")\n", + "])" + ], + "execution_count": 15, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "BMKER0vWLOpF" + }, + "source": [ + "model.compile(loss=\"sparse_categorical_crossentropy\",\n", + " optimizer=keras.optimizers.SGD(lr=1e-3),\n", + " metrics=[\"accuracy\"])" + ], + "execution_count": 16, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "eb_t9z8gLOpF", + "outputId": "20f6177f-04fa-4de4-905d-ab7e94014b71", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "history = model.fit(X_train, y_train, epochs=10,\n", + " validation_data=(X_valid, y_valid))" + ], + "execution_count": 17, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Epoch 1/10\n", + "1719/1719 [==============================] - 4s 2ms/step - loss: 1.6969 - accuracy: 0.4974 - val_loss: 0.9255 - val_accuracy: 0.7186\n", + "Epoch 2/10\n", + "1719/1719 [==============================] - 4s 2ms/step - loss: 0.8706 - accuracy: 0.7246 - val_loss: 0.7305 - val_accuracy: 0.7630\n", + "Epoch 3/10\n", + "1719/1719 [==============================] - 4s 2ms/step - loss: 0.7211 - accuracy: 0.7621 - val_loss: 0.6565 - val_accuracy: 0.7882\n", + "Epoch 4/10\n", + "1719/1719 [==============================] - 4s 2ms/step - loss: 0.6448 - accuracy: 0.7880 - val_loss: 0.6003 - val_accuracy: 0.8046\n", + "Epoch 5/10\n", + "1719/1719 [==============================] - 4s 2ms/step - loss: 0.6078 - accuracy: 0.8003 - val_loss: 0.5656 - val_accuracy: 0.8184\n", + "Epoch 6/10\n", + "1719/1719 [==============================] - 4s 2ms/step - loss: 0.5693 - accuracy: 0.8119 - val_loss: 0.5406 - val_accuracy: 0.8238\n", + "Epoch 7/10\n", + "1719/1719 [==============================] - 4s 2ms/step - loss: 0.5428 - accuracy: 0.8193 - val_loss: 0.5196 - val_accuracy: 0.8314\n", + "Epoch 8/10\n", + "1719/1719 [==============================] - 4s 2ms/step - loss: 0.5193 - accuracy: 0.8283 - val_loss: 0.5113 - val_accuracy: 0.8318\n", + "Epoch 9/10\n", + "1719/1719 [==============================] - 4s 2ms/step - loss: 0.5129 - accuracy: 0.8273 - val_loss: 0.4917 - val_accuracy: 0.8380\n", + "Epoch 10/10\n", + "1719/1719 [==============================] - 4s 2ms/step - loss: 0.4941 - accuracy: 0.8313 - val_loss: 0.4826 - val_accuracy: 0.8396\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Vkr7NgOmLOpF" + }, + "source": [ + "### ELU" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "6znM2r0RLOpF" + }, + "source": [ + "def elu(z, alpha=1):\n", + " return np.where(z < 0, alpha * (np.exp(z) - 1), z)" + ], + "execution_count": 18, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "WpZo3sG7LOpF", + "outputId": "8f1f0b6e-6585-4c53-d651-159fe61b98e4", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 315 + } + }, + "source": [ + "plt.plot(z, elu(z), \"b-\", linewidth=2)\n", + "plt.plot([-5, 5], [0, 0], 'k-')\n", + "plt.plot([-5, 5], [-1, -1], 'k--')\n", + "plt.plot([0, 0], [-2.2, 3.2], 'k-')\n", + "plt.grid(True)\n", + "plt.title(r\"ELU activation function ($\\alpha=1$)\", fontsize=14)\n", + "plt.axis([-5, 5, -2.2, 3.2])\n", + "\n", + "save_fig(\"elu_plot\")\n", + "plt.show()" + ], + "execution_count": 19, + "outputs": [ + { + "output_type": "stream", + "text": [ + "그림 저장: elu_plot\n" + ], + "name": "stdout" + }, + { + "output_type": "display_data", + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "tags": [], + "needs_background": "light" + } + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "CFEUCydjLOpF" + }, + "source": [ + "텐서플로에서 쉽게 ELU를 적용할 수 있습니다. 층을 만들 때 활성화 함수로 지정하면 됩니다:" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "zFuLenUVLOpG", + "outputId": "e03fe963-ee36-47a5-faf8-243a0968f03e", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "keras.layers.Dense(10, activation=\"elu\")" + ], + "execution_count": 20, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 20 + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "mkd4JNYELOpG" + }, + "source": [ + "### SELU" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "mzoAhWWhLOpG" + }, + "source": [ + "Günter Klambauer, Thomas Unterthiner, Andreas Mayr는 2017년 한 [훌륭한 논문](https://arxiv.org/pdf/1706.02515.pdf)에서 SELU 활성화 함수를 소개했습니다. 훈련하는 동안 완전 연결 층만 쌓아서 신경망을 만들고 SELU 활성화 함수와 LeCun 초기화를 사용한다면 자기 정규화됩니다. 각 층의 출력이 평균과\n", + "표준편차를 보존하는 경향이 있습니다. 이는 그레이디언트 소실과 폭주 문제를 막아줍니다. 그 결과로 SELU 활성화 함수는 이런 종류의 네트워크(특히 아주 깊은 네트워크)에서 다른 활성화 함수보다 뛰어난 성능을 종종 냅니다. 따라서 꼭 시도해 봐야 합니다. 하지만 SELU 활성화 함수의 자기 정규화 특징은 쉽게 깨집니다. ℓ1나 ℓ2 정규화, 드롭아웃, 맥스 노름, 스킵 연결이나 시퀀셜하지 않은 다른 토폴로지를 사용할 수 없습니다(즉 순환 신경망은 자기 정규화되지 않습니다). 하지만 실전에서 시퀀셜 CNN과 잘 동작합니다. 자기 정규화가 깨지면 SELU가 다른 활성화 함수보다 더 나은 성능을 내지 않을 것입니다." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "NYXUfjHCLOpG" + }, + "source": [ + "from scipy.special import erfc\n", + "\n", + "# alpha와 scale은 평균 0과 표준 편차 1로 자기 정규화합니다\n", + "# (논문에 있는 식 14 참조):\n", + "alpha_0_1 = -np.sqrt(2 / np.pi) / (erfc(1/np.sqrt(2)) * np.exp(1/2) - 1)\n", + "scale_0_1 = (1 - erfc(1 / np.sqrt(2)) * np.sqrt(np.e)) * np.sqrt(2 * np.pi) * (2 * erfc(np.sqrt(2))*np.e**2 + np.pi*erfc(1/np.sqrt(2))**2*np.e - 2*(2+np.pi)*erfc(1/np.sqrt(2))*np.sqrt(np.e)+np.pi+2)**(-1/2)" + ], + "execution_count": 21, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "fyup_FSkLOpG" + }, + "source": [ + "def selu(z, scale=scale_0_1, alpha=alpha_0_1):\n", + " return scale * elu(z, alpha)" + ], + "execution_count": 22, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "cNpfVGctLOpH", + "outputId": "d29c5855-b472-4dac-d118-465af3c5b7ae", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 315 + } + }, + "source": [ + "plt.plot(z, selu(z), \"b-\", linewidth=2)\n", + "plt.plot([-5, 5], [0, 0], 'k-')\n", + "plt.plot([-5, 5], [-1.758, -1.758], 'k--')\n", + "plt.plot([0, 0], [-2.2, 3.2], 'k-')\n", + "plt.grid(True)\n", + "plt.title(\"SELU activation function\", fontsize=14)\n", + "plt.axis([-5, 5, -2.2, 3.2])\n", + "\n", + "save_fig(\"selu_plot\")\n", + "plt.show()" + ], + "execution_count": 23, + "outputs": [ + { + "output_type": "stream", + "text": [ + "그림 저장: selu_plot\n" + ], + "name": "stdout" + }, + { + "output_type": "display_data", + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "tags": [], + "needs_background": "light" + } + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ZFaak_jnLOpH" + }, + "source": [ + "기본적으로 SELU 하이퍼파라미터(`scale`과 `alpha`)는 각 뉴런의 평균 출력이 0에 가깝고 표준 편차는 1에 가깝도록 조정됩니다(입력은 평균이 0이고 표준 편차 1로 표준화되었다고 가정합니다). 이 활성화 함수를 사용하면 1,000개의 층이 있는 심층 신경망도 모든 층에 걸쳐 거의 평균이 0이고 표준 편차를 1로 유지합니다. 이를 통해 그레이디언트 폭주와 소실 문제를 피할 수 있습니다:" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "qWawJStoLOpH", + "outputId": "38cfdca8-2c4e-4f69-ce42-366f34c49ee6", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "np.random.seed(42)\n", + "Z = np.random.normal(size=(500, 100)) # 표준화된 입력\n", + "for layer in range(1000):\n", + " W = np.random.normal(size=(100, 100), scale=np.sqrt(1 / 100)) # LeCun 초기화\n", + " Z = selu(np.dot(Z, W))\n", + " means = np.mean(Z, axis=0).mean()\n", + " stds = np.std(Z, axis=0).mean()\n", + " if layer % 100 == 0:\n", + " print(\"Layer {}: mean {:.2f}, std deviation {:.2f}\".format(layer, means, stds))" + ], + "execution_count": 24, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Layer 0: mean -0.00, std deviation 1.00\n", + "Layer 100: mean 0.02, std deviation 0.96\n", + "Layer 200: mean 0.01, std deviation 0.90\n", + "Layer 300: mean -0.02, std deviation 0.92\n", + "Layer 400: mean 0.05, std deviation 0.89\n", + "Layer 500: mean 0.01, std deviation 0.93\n", + "Layer 600: mean 0.02, std deviation 0.92\n", + "Layer 700: mean -0.02, std deviation 0.90\n", + "Layer 800: mean 0.05, std deviation 0.83\n", + "Layer 900: mean 0.02, std deviation 1.00\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "j5dAdmg4LOpI" + }, + "source": [ + "쉽게 SELU를 사용할 수 있습니다:" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "qFGwIMj2LOpI", + "outputId": "f9d69899-51cc-4c31-ecdf-429787846e20", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "keras.layers.Dense(10, activation=\"selu\",\n", + " kernel_initializer=\"lecun_normal\")" + ], + "execution_count": 25, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 25 + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "1D-HvWzgLOpI" + }, + "source": [ + "100개의 은닉층과 SELU 활성화 함수를 사용한 패션 MNIST를 위한 신경망을 만들어 보죠:" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "YS-krPA1LOpI" + }, + "source": [ + "np.random.seed(42)\n", + "tf.random.set_seed(42)" + ], + "execution_count": 26, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "fXLkoFMELOpI" + }, + "source": [ + "model = keras.models.Sequential()\n", + "model.add(keras.layers.Flatten(input_shape=[28, 28]))\n", + "model.add(keras.layers.Dense(300, activation=\"selu\",\n", + " kernel_initializer=\"lecun_normal\"))\n", + "for layer in range(99):\n", + " model.add(keras.layers.Dense(100, activation=\"selu\",\n", + " kernel_initializer=\"lecun_normal\"))\n", + "model.add(keras.layers.Dense(10, activation=\"softmax\"))" + ], + "execution_count": 27, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "hxrqewr6LOpI" + }, + "source": [ + "model.compile(loss=\"sparse_categorical_crossentropy\",\n", + " optimizer=keras.optimizers.SGD(lr=1e-3),\n", + " metrics=[\"accuracy\"])" + ], + "execution_count": 28, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "s2oGm-VSLOpJ" + }, + "source": [ + "이제 훈련해 보죠. 입력을 평균 0과 표준 편차 1로 바꾸어야 한다는 것을 잊지 마세요:" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "FBS7ewjeLOpJ" + }, + "source": [ + "pixel_means = X_train.mean(axis=0, keepdims=True)\n", + "pixel_stds = X_train.std(axis=0, keepdims=True)\n", + "X_train_scaled = (X_train - pixel_means) / pixel_stds\n", + "X_valid_scaled = (X_valid - pixel_means) / pixel_stds\n", + "X_test_scaled = (X_test - pixel_means) / pixel_stds" + ], + "execution_count": 29, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "DVONLiENLOpJ", + "outputId": "fa5c2881-0f6e-41e1-dbb4-b628f3e84538", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "history = model.fit(X_train_scaled, y_train, epochs=5,\n", + " validation_data=(X_valid_scaled, y_valid))" + ], + "execution_count": 30, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Epoch 1/5\n", + "1719/1719 [==============================] - 17s 8ms/step - loss: 1.5863 - accuracy: 0.3956 - val_loss: 0.9179 - val_accuracy: 0.6592\n", + "Epoch 2/5\n", + "1719/1719 [==============================] - 14s 8ms/step - loss: 0.8182 - accuracy: 0.6961 - val_loss: 0.6438 - val_accuracy: 0.7696\n", + "Epoch 3/5\n", + "1719/1719 [==============================] - 14s 8ms/step - loss: 0.6735 - accuracy: 0.7521 - val_loss: 0.6299 - val_accuracy: 0.7524\n", + "Epoch 4/5\n", + "1719/1719 [==============================] - 14s 8ms/step - loss: 0.5877 - accuracy: 0.7837 - val_loss: 0.5827 - val_accuracy: 0.7988\n", + "Epoch 5/5\n", + "1719/1719 [==============================] - 14s 8ms/step - loss: 0.5487 - accuracy: 0.8016 - val_loss: 0.5130 - val_accuracy: 0.8224\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "5k-Ud-OSLOpJ" + }, + "source": [ + "대신 ReLU 활성화 함수를 사용하면 어떤 일이 일어나는지 확인해 보죠:" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "EFFeHw5pLOpK" + }, + "source": [ + "np.random.seed(42)\n", + "tf.random.set_seed(42)" + ], + "execution_count": 31, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "EIUDPcNJLOpK" + }, + "source": [ + "model = keras.models.Sequential()\n", + "model.add(keras.layers.Flatten(input_shape=[28, 28]))\n", + "model.add(keras.layers.Dense(300, activation=\"relu\", kernel_initializer=\"he_normal\"))\n", + "for layer in range(99):\n", + " model.add(keras.layers.Dense(100, activation=\"relu\", kernel_initializer=\"he_normal\"))\n", + "model.add(keras.layers.Dense(10, activation=\"softmax\"))" + ], + "execution_count": 32, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "xZxJ2w38LOpK" + }, + "source": [ + "model.compile(loss=\"sparse_categorical_crossentropy\",\n", + " optimizer=keras.optimizers.SGD(lr=1e-3),\n", + " metrics=[\"accuracy\"])" + ], + "execution_count": 33, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "R94X2ls5LOpL", + "outputId": "3e1ad881-0171-4bd1-9ff5-7d969926d285", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "history = model.fit(X_train_scaled, y_train, epochs=5,\n", + " validation_data=(X_valid_scaled, y_valid))" + ], + "execution_count": 34, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Epoch 1/5\n", + "1719/1719 [==============================] - 17s 8ms/step - loss: 2.0576 - accuracy: 0.2020 - val_loss: 1.3588 - val_accuracy: 0.3958\n", + "Epoch 2/5\n", + "1719/1719 [==============================] - 14s 8ms/step - loss: 1.2621 - accuracy: 0.4541 - val_loss: 0.9904 - val_accuracy: 0.5662\n", + "Epoch 3/5\n", + "1719/1719 [==============================] - 14s 8ms/step - loss: 0.9950 - accuracy: 0.5893 - val_loss: 0.8148 - val_accuracy: 0.6720\n", + "Epoch 4/5\n", + "1719/1719 [==============================] - 14s 8ms/step - loss: 0.8142 - accuracy: 0.6783 - val_loss: 0.7579 - val_accuracy: 0.7082\n", + "Epoch 5/5\n", + "1719/1719 [==============================] - 14s 8ms/step - loss: 0.8113 - accuracy: 0.6770 - val_loss: 0.8736 - val_accuracy: 0.6720\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pBPfwJ4WLOpL" + }, + "source": [ + "좋지 않군요. 그레이디언트 폭주나 소실 문제가 발생한 것입니다." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "PVu2Q063LOpL" + }, + "source": [ + "# 배치 정규화" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "RLBH8PjPLOpL" + }, + "source": [ + "model = keras.models.Sequential([\n", + " keras.layers.Flatten(input_shape=[28, 28]),\n", + " keras.layers.BatchNormalization(),\n", + " keras.layers.Dense(300, activation=\"relu\"),\n", + " keras.layers.BatchNormalization(),\n", + " keras.layers.Dense(100, activation=\"relu\"),\n", + " keras.layers.BatchNormalization(),\n", + " keras.layers.Dense(10, activation=\"softmax\")\n", + "])" + ], + "execution_count": 35, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "d5L_RucvLOpL", + "outputId": "b4b0c837-c48a-4ac0-da2d-a946764b8d58", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "model.summary()" + ], + "execution_count": 36, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Model: \"sequential_4\"\n", + "_________________________________________________________________\n", + "Layer (type) Output Shape Param # \n", + "=================================================================\n", + "flatten_4 (Flatten) (None, 784) 0 \n", + "_________________________________________________________________\n", + "batch_normalization (BatchNo (None, 784) 3136 \n", + "_________________________________________________________________\n", + "dense_212 (Dense) (None, 300) 235500 \n", + "_________________________________________________________________\n", + "batch_normalization_1 (Batch (None, 300) 1200 \n", + "_________________________________________________________________\n", + "dense_213 (Dense) (None, 100) 30100 \n", + "_________________________________________________________________\n", + "batch_normalization_2 (Batch (None, 100) 400 \n", + "_________________________________________________________________\n", + "dense_214 (Dense) (None, 10) 1010 \n", + "=================================================================\n", + "Total params: 271,346\n", + "Trainable params: 268,978\n", + "Non-trainable params: 2,368\n", + "_________________________________________________________________\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "S4YmgIVtLOpL", + "outputId": "1830a1e3-5a00-4dab-b6c1-df4a11b7c0b4", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "bn1 = model.layers[1]\n", + "[(var.name, var.trainable) for var in bn1.variables]" + ], + "execution_count": 37, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[('batch_normalization/gamma:0', True),\n", + " ('batch_normalization/beta:0', True),\n", + " ('batch_normalization/moving_mean:0', False),\n", + " ('batch_normalization/moving_variance:0', False)]" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 37 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "ya2gRrxPLOpM", + "outputId": "90f2b7c4-9fd7-4aa1-b182-9508cc95ddd1", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "bn1.updates" + ], + "execution_count": 38, + "outputs": [ + { + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/base_layer.py:1402: UserWarning: `layer.updates` will be removed in a future version. This property should not be used in TensorFlow 2.0, as `updates` are applied automatically.\n", + " warnings.warn('`layer.updates` will be removed in a future version. '\n" + ], + "name": "stderr" + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[]" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 38 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "TqJdxp-vLOpN" + }, + "source": [ + "model.compile(loss=\"sparse_categorical_crossentropy\",\n", + " optimizer=keras.optimizers.SGD(lr=1e-3),\n", + " metrics=[\"accuracy\"])" + ], + "execution_count": 39, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "yNutyZRELOpN", + "outputId": "ef471abc-7bc4-44be-cc40-5068a8b7b2c9", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "history = model.fit(X_train, y_train, epochs=10,\n", + " validation_data=(X_valid, y_valid))" + ], + "execution_count": 40, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Epoch 1/10\n", + "1719/1719 [==============================] - 6s 3ms/step - loss: 1.2287 - accuracy: 0.5994 - val_loss: 0.5525 - val_accuracy: 0.8230\n", + "Epoch 2/10\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.5995 - accuracy: 0.7958 - val_loss: 0.4725 - val_accuracy: 0.8470\n", + "Epoch 3/10\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.5312 - accuracy: 0.8172 - val_loss: 0.4375 - val_accuracy: 0.8550\n", + "Epoch 4/10\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.4885 - accuracy: 0.8294 - val_loss: 0.4152 - val_accuracy: 0.8604\n", + "Epoch 5/10\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.4718 - accuracy: 0.8349 - val_loss: 0.3997 - val_accuracy: 0.8644\n", + "Epoch 6/10\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.4420 - accuracy: 0.8461 - val_loss: 0.3867 - val_accuracy: 0.8690\n", + "Epoch 7/10\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.4286 - accuracy: 0.8495 - val_loss: 0.3763 - val_accuracy: 0.8702\n", + "Epoch 8/10\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.4087 - accuracy: 0.8551 - val_loss: 0.3713 - val_accuracy: 0.8740\n", + "Epoch 9/10\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.4080 - accuracy: 0.8564 - val_loss: 0.3631 - val_accuracy: 0.8750\n", + "Epoch 10/10\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.3903 - accuracy: 0.8616 - val_loss: 0.3571 - val_accuracy: 0.8754\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "IeYDw1BwLOpN" + }, + "source": [ + "이따금 활성화 함수전에 BN을 적용해도 잘 동작합니다(여기에는 논란의 여지가 있습니다). 또한 `BatchNormalization` 층 이전의 층은 편향을 위한 항이 필요 없습니다. `BatchNormalization` 층이 이를 무효화하기 때문입니다. 따라서 필요 없는 파라미터이므로 `use_bias=False`를 지정하여 층을 만들 수 있습니다:" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "-QU5Mn0fLOpN" + }, + "source": [ + "model = keras.models.Sequential([\n", + " keras.layers.Flatten(input_shape=[28, 28]),\n", + " keras.layers.BatchNormalization(),\n", + " keras.layers.Dense(300, use_bias=False),\n", + " keras.layers.BatchNormalization(),\n", + " keras.layers.Activation(\"relu\"),\n", + " keras.layers.Dense(100, use_bias=False),\n", + " keras.layers.BatchNormalization(),\n", + " keras.layers.Activation(\"relu\"),\n", + " keras.layers.Dense(10, activation=\"softmax\")\n", + "])" + ], + "execution_count": 41, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "1Y-7t8DsLOpN" + }, + "source": [ + "model.compile(loss=\"sparse_categorical_crossentropy\",\n", + " optimizer=keras.optimizers.SGD(lr=1e-3),\n", + " metrics=[\"accuracy\"])" + ], + "execution_count": 42, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "fhatQCD6LOpN", + "outputId": "804ad977-910d-4ed8-a241-8e914ed0ac0c", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "history = model.fit(X_train, y_train, epochs=10,\n", + " validation_data=(X_valid, y_valid))" + ], + "execution_count": 43, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Epoch 1/10\n", + "1719/1719 [==============================] - 6s 3ms/step - loss: 1.3677 - accuracy: 0.5605 - val_loss: 0.6767 - val_accuracy: 0.7812\n", + "Epoch 2/10\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.7136 - accuracy: 0.7702 - val_loss: 0.5566 - val_accuracy: 0.8182\n", + "Epoch 3/10\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.6123 - accuracy: 0.7990 - val_loss: 0.5007 - val_accuracy: 0.8362\n", + "Epoch 4/10\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.5547 - accuracy: 0.8148 - val_loss: 0.4666 - val_accuracy: 0.8448\n", + "Epoch 5/10\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.5254 - accuracy: 0.8232 - val_loss: 0.4433 - val_accuracy: 0.8532\n", + "Epoch 6/10\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.4947 - accuracy: 0.8326 - val_loss: 0.4262 - val_accuracy: 0.8550\n", + "Epoch 7/10\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.4736 - accuracy: 0.8388 - val_loss: 0.4130 - val_accuracy: 0.8564\n", + "Epoch 8/10\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.4550 - accuracy: 0.8443 - val_loss: 0.4034 - val_accuracy: 0.8612\n", + "Epoch 9/10\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.4495 - accuracy: 0.8439 - val_loss: 0.3943 - val_accuracy: 0.8638\n", + "Epoch 10/10\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.4333 - accuracy: 0.8495 - val_loss: 0.3874 - val_accuracy: 0.8660\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "CWhcWzXCLOpO" + }, + "source": [ + "## 그레이디언트 클리핑" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "L73OupHpLOpO" + }, + "source": [ + "모든 케라스 옵티마이저는 `clipnorm`이나 `clipvalue` 매개변수를 지원합니다:" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "PN-aVXL-LOpO" + }, + "source": [ + "optimizer = keras.optimizers.SGD(clipvalue=1.0)" + ], + "execution_count": 44, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "g9Wj01r7LOpO" + }, + "source": [ + "optimizer = keras.optimizers.SGD(clipnorm=1.0)" + ], + "execution_count": 45, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "rxEJ6pNyLOpO" + }, + "source": [ + "## 사전 훈련된 층 재사용하기" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "u-g_PGcmLOpO" + }, + "source": [ + "### 케라스 모델 재사용하기" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "rdiVjfkOLOpO" + }, + "source": [ + "패션 MNIST 훈련 세트를 두 개로 나누어 보죠:\n", + "* `X_train_A`: 샌달과 셔츠(클래스 5와 6)을 제외한 모든 이미지\n", + "* `X_train_B`: 샌달과 셔츠 이미지 중 처음 200개만 가진 작은 훈련 세트\n", + "\n", + "검증 세트와 테스트 세트도 이렇게 나눕니다. 하지만 이미지 개수는 제한하지 않습니다.\n", + "\n", + "A 세트(8개의 클래스를 가진 분류 문제)에서 모델을 훈련하고 이를 재사용하여 B 세트(이진 분류)를 해결해 보겠습니다. A 작업에서 B 작업으로 약간의 지식이 전달되기를 기대합니다. 왜냐하면 A 세트의 클래스(스니커즈, 앵클 부츠, 코트, 티셔츠 등)가 B 세트에 있는 클래스(샌달과 셔츠)와 조금 비슷하기 때문입니다. 하지만 `Dense` 층을 사용하기 때문에 동일한 위치에 나타난 패턴만 재사용할 수 있습니다(반대로 합성곱 층은 훨씬 많은 정보를 전송합니다. 학습한 패턴을 이미지의 어느 위치에서나 감지할 수 있기 때문입니다. CNN 장에서 자세히 알아 보겠습니다)." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "QKHsibRsLOpO" + }, + "source": [ + "def split_dataset(X, y):\n", + " y_5_or_6 = (y == 5) | (y == 6) # sandals or shirts\n", + " y_A = y[~y_5_or_6]\n", + " y_A[y_A > 6] -= 2 # class indices 7, 8, 9 should be moved to 5, 6, 7\n", + " y_B = (y[y_5_or_6] == 6).astype(np.float32) # binary classification task: is it a shirt (class 6)?\n", + " return ((X[~y_5_or_6], y_A),\n", + " (X[y_5_or_6], y_B))\n", + "\n", + "(X_train_A, y_train_A), (X_train_B, y_train_B) = split_dataset(X_train, y_train)\n", + "(X_valid_A, y_valid_A), (X_valid_B, y_valid_B) = split_dataset(X_valid, y_valid)\n", + "(X_test_A, y_test_A), (X_test_B, y_test_B) = split_dataset(X_test, y_test)\n", + "X_train_B = X_train_B[:200]\n", + "y_train_B = y_train_B[:200]" + ], + "execution_count": 46, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "2s04GUdELOpO", + "outputId": "e7e01e53-fa88-485a-b086-c2269d7e2c61", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "X_train_A.shape" + ], + "execution_count": 47, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "(43986, 28, 28)" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 47 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "kcEEehIGLOpP", + "outputId": "65d1002c-3fba-49fb-cd77-bea76dddc1c6", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "X_train_B.shape" + ], + "execution_count": 48, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "(200, 28, 28)" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 48 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "smwubTT0LOpP", + "outputId": "c801b7f2-5397-483e-e840-d3141878d4ae", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "y_train_A[:30]" + ], + "execution_count": 49, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "array([4, 0, 5, 7, 7, 7, 4, 4, 3, 4, 0, 1, 6, 3, 4, 3, 2, 6, 5, 3, 4, 5,\n", + " 1, 3, 4, 2, 0, 6, 7, 1], dtype=uint8)" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 49 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "hNJqisASLOpP", + "outputId": "9551ff38-fe97-4a93-94d2-f84b19ab4efb", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "y_train_B[:30]" + ], + "execution_count": 50, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "array([1., 1., 0., 0., 0., 0., 1., 1., 1., 0., 0., 1., 1., 0., 0., 0., 0.,\n", + " 0., 0., 1., 1., 0., 0., 1., 1., 0., 1., 1., 1., 1.], dtype=float32)" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 50 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "2qkd71BzLOpP" + }, + "source": [ + "tf.random.set_seed(42)\n", + "np.random.seed(42)" + ], + "execution_count": 51, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "_dou1YzGLOpP" + }, + "source": [ + "model_A = keras.models.Sequential()\n", + "model_A.add(keras.layers.Flatten(input_shape=[28, 28]))\n", + "for n_hidden in (300, 100, 50, 50, 50):\n", + " model_A.add(keras.layers.Dense(n_hidden, activation=\"selu\"))\n", + "model_A.add(keras.layers.Dense(8, activation=\"softmax\"))" + ], + "execution_count": 52, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "C6TIeUe-LOpQ" + }, + "source": [ + "model_A.compile(loss=\"sparse_categorical_crossentropy\",\n", + " optimizer=keras.optimizers.SGD(lr=1e-3),\n", + " metrics=[\"accuracy\"])" + ], + "execution_count": 53, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "4xR6ixj8LOpQ", + "outputId": "ec8f3003-4b1d-4c68-f8fc-b154cc26a402", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "history = model_A.fit(X_train_A, y_train_A, epochs=20,\n", + " validation_data=(X_valid_A, y_valid_A))" + ], + "execution_count": 54, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Epoch 1/20\n", + "1375/1375 [==============================] - 4s 2ms/step - loss: 0.9248 - accuracy: 0.6994 - val_loss: 0.3894 - val_accuracy: 0.8665\n", + "Epoch 2/20\n", + "1375/1375 [==============================] - 3s 2ms/step - loss: 0.3651 - accuracy: 0.8748 - val_loss: 0.3288 - val_accuracy: 0.8829\n", + "Epoch 3/20\n", + "1375/1375 [==============================] - 3s 2ms/step - loss: 0.3182 - accuracy: 0.8894 - val_loss: 0.3012 - val_accuracy: 0.8996\n", + "Epoch 4/20\n", + "1375/1375 [==============================] - 3s 2ms/step - loss: 0.3049 - accuracy: 0.8955 - val_loss: 0.2895 - val_accuracy: 0.9013\n", + "Epoch 5/20\n", + "1375/1375 [==============================] - 3s 2ms/step - loss: 0.2804 - accuracy: 0.9027 - val_loss: 0.2774 - val_accuracy: 0.9063\n", + "Epoch 6/20\n", + "1375/1375 [==============================] - 3s 2ms/step - loss: 0.2702 - accuracy: 0.9079 - val_loss: 0.2734 - val_accuracy: 0.9066\n", + "Epoch 7/20\n", + "1375/1375 [==============================] - 3s 2ms/step - loss: 0.2626 - accuracy: 0.9088 - val_loss: 0.2720 - val_accuracy: 0.9083\n", + "Epoch 8/20\n", + "1375/1375 [==============================] - 3s 2ms/step - loss: 0.2609 - accuracy: 0.9119 - val_loss: 0.2591 - val_accuracy: 0.9138\n", + "Epoch 9/20\n", + "1375/1375 [==============================] - 3s 2ms/step - loss: 0.2558 - accuracy: 0.9110 - val_loss: 0.2563 - val_accuracy: 0.9143\n", + "Epoch 10/20\n", + "1375/1375 [==============================] - 3s 2ms/step - loss: 0.2511 - accuracy: 0.9139 - val_loss: 0.2543 - val_accuracy: 0.9158\n", + "Epoch 11/20\n", + "1375/1375 [==============================] - 3s 2ms/step - loss: 0.2431 - accuracy: 0.9173 - val_loss: 0.2496 - val_accuracy: 0.9153\n", + "Epoch 12/20\n", + "1375/1375 [==============================] - 3s 2ms/step - loss: 0.2422 - accuracy: 0.9170 - val_loss: 0.2512 - val_accuracy: 0.9126\n", + "Epoch 13/20\n", + "1375/1375 [==============================] - 3s 2ms/step - loss: 0.2360 - accuracy: 0.9179 - val_loss: 0.2446 - val_accuracy: 0.9158\n", + "Epoch 14/20\n", + "1375/1375 [==============================] - 3s 2ms/step - loss: 0.2267 - accuracy: 0.9230 - val_loss: 0.2416 - val_accuracy: 0.9175\n", + "Epoch 15/20\n", + "1375/1375 [==============================] - 3s 2ms/step - loss: 0.2225 - accuracy: 0.9241 - val_loss: 0.2450 - val_accuracy: 0.9188\n", + "Epoch 16/20\n", + "1375/1375 [==============================] - 3s 2ms/step - loss: 0.2262 - accuracy: 0.9214 - val_loss: 0.2386 - val_accuracy: 0.9193\n", + "Epoch 17/20\n", + "1375/1375 [==============================] - 3s 2ms/step - loss: 0.2191 - accuracy: 0.9252 - val_loss: 0.2405 - val_accuracy: 0.9178\n", + "Epoch 18/20\n", + "1375/1375 [==============================] - 3s 2ms/step - loss: 0.2172 - accuracy: 0.9253 - val_loss: 0.2426 - val_accuracy: 0.9158\n", + "Epoch 19/20\n", + "1375/1375 [==============================] - 3s 2ms/step - loss: 0.2181 - accuracy: 0.9246 - val_loss: 0.2331 - val_accuracy: 0.9213\n", + "Epoch 20/20\n", + "1375/1375 [==============================] - 3s 2ms/step - loss: 0.2113 - accuracy: 0.9271 - val_loss: 0.2332 - val_accuracy: 0.9203\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "8UMBGi07LOpQ" + }, + "source": [ + "model_A.save(\"my_model_A.h5\")" + ], + "execution_count": 55, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "qb4XQRc2LOpQ" + }, + "source": [ + "model_B = keras.models.Sequential()\n", + "model_B.add(keras.layers.Flatten(input_shape=[28, 28]))\n", + "for n_hidden in (300, 100, 50, 50, 50):\n", + " model_B.add(keras.layers.Dense(n_hidden, activation=\"selu\"))\n", + "model_B.add(keras.layers.Dense(1, activation=\"sigmoid\"))" + ], + "execution_count": 56, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "bTfx9aSGLOpQ" + }, + "source": [ + "model_B.compile(loss=\"binary_crossentropy\",\n", + " optimizer=keras.optimizers.SGD(lr=1e-3),\n", + " metrics=[\"accuracy\"])" + ], + "execution_count": 57, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "dd6o7rDmLOpQ", + "outputId": "b1022791-e04c-438b-db1d-7da924db831c", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "history = model_B.fit(X_train_B, y_train_B, epochs=20,\n", + " validation_data=(X_valid_B, y_valid_B))" + ], + "execution_count": 58, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Epoch 1/20\n", + "7/7 [==============================] - 1s 35ms/step - loss: 1.0360 - accuracy: 0.4975 - val_loss: 0.6314 - val_accuracy: 0.6004\n", + "Epoch 2/20\n", + "7/7 [==============================] - 0s 16ms/step - loss: 0.5883 - accuracy: 0.6971 - val_loss: 0.4784 - val_accuracy: 0.8529\n", + "Epoch 3/20\n", + "7/7 [==============================] - 0s 14ms/step - loss: 0.4380 - accuracy: 0.8854 - val_loss: 0.4102 - val_accuracy: 0.8945\n", + "Epoch 4/20\n", + "7/7 [==============================] - 0s 13ms/step - loss: 0.4021 - accuracy: 0.8712 - val_loss: 0.3647 - val_accuracy: 0.9178\n", + "Epoch 5/20\n", + "7/7 [==============================] - 0s 14ms/step - loss: 0.3361 - accuracy: 0.9348 - val_loss: 0.3300 - val_accuracy: 0.9320\n", + "Epoch 6/20\n", + "7/7 [==============================] - 0s 14ms/step - loss: 0.3113 - accuracy: 0.9233 - val_loss: 0.3019 - val_accuracy: 0.9402\n", + "Epoch 7/20\n", + "7/7 [==============================] - 0s 14ms/step - loss: 0.2817 - accuracy: 0.9299 - val_loss: 0.2804 - val_accuracy: 0.9422\n", + "Epoch 8/20\n", + "7/7 [==============================] - 0s 14ms/step - loss: 0.2632 - accuracy: 0.9379 - val_loss: 0.2606 - val_accuracy: 0.9473\n", + "Epoch 9/20\n", + "7/7 [==============================] - 0s 16ms/step - loss: 0.2373 - accuracy: 0.9481 - val_loss: 0.2428 - val_accuracy: 0.9523\n", + "Epoch 10/20\n", + "7/7 [==============================] - 0s 14ms/step - loss: 0.2229 - accuracy: 0.9657 - val_loss: 0.2281 - val_accuracy: 0.9544\n", + "Epoch 11/20\n", + "7/7 [==============================] - 0s 14ms/step - loss: 0.2155 - accuracy: 0.9590 - val_loss: 0.2150 - val_accuracy: 0.9584\n", + "Epoch 12/20\n", + "7/7 [==============================] - 0s 14ms/step - loss: 0.1834 - accuracy: 0.9738 - val_loss: 0.2036 - val_accuracy: 0.9584\n", + "Epoch 13/20\n", + "7/7 [==============================] - 0s 14ms/step - loss: 0.1671 - accuracy: 0.9828 - val_loss: 0.1931 - val_accuracy: 0.9615\n", + "Epoch 14/20\n", + "7/7 [==============================] - 0s 14ms/step - loss: 0.1527 - accuracy: 0.9915 - val_loss: 0.1838 - val_accuracy: 0.9635\n", + "Epoch 15/20\n", + "7/7 [==============================] - 0s 14ms/step - loss: 0.1595 - accuracy: 0.9904 - val_loss: 0.1746 - val_accuracy: 0.9686\n", + "Epoch 16/20\n", + "7/7 [==============================] - 0s 14ms/step - loss: 0.1473 - accuracy: 0.9937 - val_loss: 0.1674 - val_accuracy: 0.9686\n", + "Epoch 17/20\n", + "7/7 [==============================] - 0s 14ms/step - loss: 0.1412 - accuracy: 0.9944 - val_loss: 0.1604 - val_accuracy: 0.9706\n", + "Epoch 18/20\n", + "7/7 [==============================] - 0s 14ms/step - loss: 0.1242 - accuracy: 0.9931 - val_loss: 0.1539 - val_accuracy: 0.9706\n", + "Epoch 19/20\n", + "7/7 [==============================] - 0s 14ms/step - loss: 0.1224 - accuracy: 0.9931 - val_loss: 0.1482 - val_accuracy: 0.9716\n", + "Epoch 20/20\n", + "7/7 [==============================] - 0s 15ms/step - loss: 0.1096 - accuracy: 0.9912 - val_loss: 0.1431 - val_accuracy: 0.9716\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "Bh48HMITLOpQ", + "outputId": "8996eafe-9ad5-4013-8f80-733cd9f94a78", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "model.summary()" + ], + "execution_count": 59, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Model: \"sequential_5\"\n", + "_________________________________________________________________\n", + "Layer (type) Output Shape Param # \n", + "=================================================================\n", + "flatten_5 (Flatten) (None, 784) 0 \n", + "_________________________________________________________________\n", + "batch_normalization_3 (Batch (None, 784) 3136 \n", + "_________________________________________________________________\n", + "dense_215 (Dense) (None, 300) 235200 \n", + "_________________________________________________________________\n", + "batch_normalization_4 (Batch (None, 300) 1200 \n", + "_________________________________________________________________\n", + "activation (Activation) (None, 300) 0 \n", + "_________________________________________________________________\n", + "dense_216 (Dense) (None, 100) 30000 \n", + "_________________________________________________________________\n", + "batch_normalization_5 (Batch (None, 100) 400 \n", + "_________________________________________________________________\n", + "activation_1 (Activation) (None, 100) 0 \n", + "_________________________________________________________________\n", + "dense_217 (Dense) (None, 10) 1010 \n", + "=================================================================\n", + "Total params: 270,946\n", + "Trainable params: 268,578\n", + "Non-trainable params: 2,368\n", + "_________________________________________________________________\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "xpurR4NDLOpQ" + }, + "source": [ + "model_A = keras.models.load_model(\"my_model_A.h5\")\n", + "model_B_on_A = keras.models.Sequential(model_A.layers[:-1])\n", + "model_B_on_A.add(keras.layers.Dense(1, activation=\"sigmoid\"))" + ], + "execution_count": 60, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "XPRZk6DqLOpR" + }, + "source": [ + "model_A_clone = keras.models.clone_model(model_A)\n", + "model_A_clone.set_weights(model_A.get_weights())" + ], + "execution_count": 61, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "kNxjby1PLOpR" + }, + "source": [ + "for layer in model_B_on_A.layers[:-1]:\n", + " layer.trainable = False\n", + "\n", + "model_B_on_A.compile(loss=\"binary_crossentropy\",\n", + " optimizer=keras.optimizers.SGD(lr=1e-3),\n", + " metrics=[\"accuracy\"])" + ], + "execution_count": 62, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "yvlj3S1jLOpR", + "outputId": "0a930150-7ddf-4eff-bd36-ac3bd55bb243", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "history = model_B_on_A.fit(X_train_B, y_train_B, epochs=4,\n", + " validation_data=(X_valid_B, y_valid_B))\n", + "\n", + "for layer in model_B_on_A.layers[:-1]:\n", + " layer.trainable = True\n", + "\n", + "model_B_on_A.compile(loss=\"binary_crossentropy\",\n", + " optimizer=keras.optimizers.SGD(lr=1e-3),\n", + " metrics=[\"accuracy\"])\n", + "history = model_B_on_A.fit(X_train_B, y_train_B, epochs=16,\n", + " validation_data=(X_valid_B, y_valid_B))" + ], + "execution_count": 63, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Epoch 1/4\n", + "7/7 [==============================] - 1s 56ms/step - loss: 0.6098 - accuracy: 0.6233 - val_loss: 0.5795 - val_accuracy: 0.6389\n", + "Epoch 2/4\n", + "7/7 [==============================] - 0s 16ms/step - loss: 0.5498 - accuracy: 0.6707 - val_loss: 0.5424 - val_accuracy: 0.6815\n", + "Epoch 3/4\n", + "7/7 [==============================] - 0s 14ms/step - loss: 0.4847 - accuracy: 0.7509 - val_loss: 0.5107 - val_accuracy: 0.7110\n", + "Epoch 4/4\n", + "7/7 [==============================] - 0s 14ms/step - loss: 0.4849 - accuracy: 0.7405 - val_loss: 0.4823 - val_accuracy: 0.7343\n", + "Epoch 1/16\n", + "7/7 [==============================] - 1s 36ms/step - loss: 0.4345 - accuracy: 0.7823 - val_loss: 0.3440 - val_accuracy: 0.8671\n", + "Epoch 2/16\n", + "7/7 [==============================] - 0s 14ms/step - loss: 0.2951 - accuracy: 0.9143 - val_loss: 0.2593 - val_accuracy: 0.9290\n", + "Epoch 3/16\n", + "7/7 [==============================] - 0s 14ms/step - loss: 0.2023 - accuracy: 0.9777 - val_loss: 0.2104 - val_accuracy: 0.9554\n", + "Epoch 4/16\n", + "7/7 [==============================] - 0s 15ms/step - loss: 0.1745 - accuracy: 0.9789 - val_loss: 0.1786 - val_accuracy: 0.9696\n", + "Epoch 5/16\n", + "7/7 [==============================] - 0s 14ms/step - loss: 0.1341 - accuracy: 0.9809 - val_loss: 0.1559 - val_accuracy: 0.9767\n", + "Epoch 6/16\n", + "7/7 [==============================] - 0s 14ms/step - loss: 0.1169 - accuracy: 0.9973 - val_loss: 0.1391 - val_accuracy: 0.9807\n", + "Epoch 7/16\n", + "7/7 [==============================] - 0s 14ms/step - loss: 0.1134 - accuracy: 0.9931 - val_loss: 0.1266 - val_accuracy: 0.9838\n", + "Epoch 8/16\n", + "7/7 [==============================] - 0s 14ms/step - loss: 0.0998 - accuracy: 0.9931 - val_loss: 0.1163 - val_accuracy: 0.9858\n", + "Epoch 9/16\n", + "7/7 [==============================] - 0s 14ms/step - loss: 0.0832 - accuracy: 1.0000 - val_loss: 0.1066 - val_accuracy: 0.9888\n", + "Epoch 10/16\n", + "7/7 [==============================] - 0s 14ms/step - loss: 0.0774 - accuracy: 1.0000 - val_loss: 0.1000 - val_accuracy: 0.9899\n", + "Epoch 11/16\n", + "7/7 [==============================] - 0s 15ms/step - loss: 0.0689 - accuracy: 1.0000 - val_loss: 0.0940 - val_accuracy: 0.9899\n", + "Epoch 12/16\n", + "7/7 [==============================] - 0s 14ms/step - loss: 0.0719 - accuracy: 1.0000 - val_loss: 0.0889 - val_accuracy: 0.9899\n", + "Epoch 13/16\n", + "7/7 [==============================] - 0s 14ms/step - loss: 0.0566 - accuracy: 1.0000 - val_loss: 0.0840 - val_accuracy: 0.9899\n", + "Epoch 14/16\n", + "7/7 [==============================] - 0s 14ms/step - loss: 0.0494 - accuracy: 1.0000 - val_loss: 0.0803 - val_accuracy: 0.9899\n", + "Epoch 15/16\n", + "7/7 [==============================] - 0s 15ms/step - loss: 0.0545 - accuracy: 1.0000 - val_loss: 0.0770 - val_accuracy: 0.9899\n", + "Epoch 16/16\n", + "7/7 [==============================] - 0s 14ms/step - loss: 0.0472 - accuracy: 1.0000 - val_loss: 0.0740 - val_accuracy: 0.9899\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ceskFW97LOpR" + }, + "source": [ + "마지막 점수는 어떤가요?" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "fmdxUJ9FLOpR", + "outputId": "6d812b28-28b8-46e3-eebc-84aa1f6bbb7f", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "model_B.evaluate(X_test_B, y_test_B)" + ], + "execution_count": 64, + "outputs": [ + { + "output_type": "stream", + "text": [ + "63/63 [==============================] - 0s 2ms/step - loss: 0.1408 - accuracy: 0.9705\n" + ], + "name": "stdout" + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[0.1408407986164093, 0.9704999923706055]" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 64 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "804aagCRLOpR", + "outputId": "bfa379e0-7c67-45bd-ff9c-177f0ceb1645", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "model_B_on_A.evaluate(X_test_B, y_test_B)" + ], + "execution_count": 65, + "outputs": [ + { + "output_type": "stream", + "text": [ + "63/63 [==============================] - 0s 2ms/step - loss: 0.0682 - accuracy: 0.9935\n" + ], + "name": "stdout" + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[0.06819812208414078, 0.9934999942779541]" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 65 + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "eoEbJs2sLOpR" + }, + "source": [ + "훌륭하네요! 꽤 많은 정보를 전달했습니다: 오차율이 4배나 줄었네요!" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "2Oge57HRLOpS", + "outputId": "213f9022-8f47-46c8-bb2f-e3d375e8b215", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "(100 - 96.95) / (100 - 99.25)" + ], + "execution_count": 66, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "4.066666666666663" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 66 + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "t1Y9NJBjLOpS" + }, + "source": [ + "# 고속 옵티마이저" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Ie9qzCm6LOpS" + }, + "source": [ + "## 모멘텀 옵티마이저" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "-T_eBS6iLOpS" + }, + "source": [ + "optimizer = keras.optimizers.SGD(lr=0.001, momentum=0.9)" + ], + "execution_count": 67, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "gvoYzuDRLOpT" + }, + "source": [ + "## 네스테로프 가속 경사" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "DbUw8WL2LOpT" + }, + "source": [ + "optimizer = keras.optimizers.SGD(lr=0.001, momentum=0.9, nesterov=True)" + ], + "execution_count": 68, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MzY29JPMLOpT" + }, + "source": [ + "## AdaGrad" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "dR9NgMEoLOpT" + }, + "source": [ + "optimizer = keras.optimizers.Adagrad(lr=0.001)" + ], + "execution_count": 69, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "RdzjwRZOLOpT" + }, + "source": [ + "## RMSProp" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "TkjnInhCLOpT" + }, + "source": [ + "optimizer = keras.optimizers.RMSprop(lr=0.001, rho=0.9)" + ], + "execution_count": 70, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Aqd3XjSlLOpT" + }, + "source": [ + "## Adam 옵티마이저" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "ibdJe_cILOpU" + }, + "source": [ + "optimizer = keras.optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999)" + ], + "execution_count": 71, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "vOuhwhjQLOpU" + }, + "source": [ + "## Adamax 옵티마이저" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "_pwlFvReLOpU" + }, + "source": [ + "optimizer = keras.optimizers.Adamax(lr=0.001, beta_1=0.9, beta_2=0.999)" + ], + "execution_count": 72, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "-UL-CtdMLOpU" + }, + "source": [ + "## Nadam 옵티마이저" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "L2jGWQ8vLOpU" + }, + "source": [ + "optimizer = keras.optimizers.Nadam(lr=0.001, beta_1=0.9, beta_2=0.999)" + ], + "execution_count": 73, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Ow2z1pbiLOpU" + }, + "source": [ + "## 학습률 스케줄링" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "DziEJzDxLOpU" + }, + "source": [ + "### 거듭제곱 스케줄링" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_QKqYIBSLOpU" + }, + "source": [ + "```lr = lr0 / (1 + steps / s)**c```\n", + "* 케라스는 `c=1`과 `s = 1 / decay`을 사용합니다" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "mEhuxik_LOpU" + }, + "source": [ + "optimizer = keras.optimizers.SGD(lr=0.01, decay=1e-4)" + ], + "execution_count": 74, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "3ZRa8D0PLOpV" + }, + "source": [ + "model = keras.models.Sequential([\n", + " keras.layers.Flatten(input_shape=[28, 28]),\n", + " keras.layers.Dense(300, activation=\"selu\", kernel_initializer=\"lecun_normal\"),\n", + " keras.layers.Dense(100, activation=\"selu\", kernel_initializer=\"lecun_normal\"),\n", + " keras.layers.Dense(10, activation=\"softmax\")\n", + "])\n", + "model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=optimizer, metrics=[\"accuracy\"])" + ], + "execution_count": 75, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "VpPb6h-cLOpV", + "outputId": "4ef44500-2026-4c95-a8d1-e5ad2bcc4757", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "n_epochs = 25\n", + "history = model.fit(X_train_scaled, y_train, epochs=n_epochs,\n", + " validation_data=(X_valid_scaled, y_valid))" + ], + "execution_count": 76, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Epoch 1/25\n", + "1719/1719 [==============================] - 4s 2ms/step - loss: 0.5980 - accuracy: 0.7933 - val_loss: 0.4029 - val_accuracy: 0.8604\n", + "Epoch 2/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.3830 - accuracy: 0.8637 - val_loss: 0.3716 - val_accuracy: 0.8720\n", + "Epoch 3/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.3492 - accuracy: 0.8773 - val_loss: 0.3749 - val_accuracy: 0.8732\n", + "Epoch 4/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.3276 - accuracy: 0.8812 - val_loss: 0.3499 - val_accuracy: 0.8800\n", + "Epoch 5/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.3172 - accuracy: 0.8858 - val_loss: 0.3448 - val_accuracy: 0.8788\n", + "Epoch 6/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.2923 - accuracy: 0.8938 - val_loss: 0.3410 - val_accuracy: 0.8832\n", + "Epoch 7/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.2870 - accuracy: 0.8972 - val_loss: 0.3354 - val_accuracy: 0.8870\n", + "Epoch 8/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.2721 - accuracy: 0.9032 - val_loss: 0.3405 - val_accuracy: 0.8836\n", + "Epoch 9/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.2729 - accuracy: 0.9004 - val_loss: 0.3288 - val_accuracy: 0.8888\n", + "Epoch 10/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.2584 - accuracy: 0.9071 - val_loss: 0.3258 - val_accuracy: 0.8884\n", + "Epoch 11/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.2529 - accuracy: 0.9096 - val_loss: 0.3265 - val_accuracy: 0.8880\n", + "Epoch 12/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.2484 - accuracy: 0.9100 - val_loss: 0.3331 - val_accuracy: 0.8826\n", + "Epoch 13/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.2420 - accuracy: 0.9145 - val_loss: 0.3252 - val_accuracy: 0.8892\n", + "Epoch 14/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.2371 - accuracy: 0.9147 - val_loss: 0.3286 - val_accuracy: 0.8898\n", + "Epoch 15/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.2363 - accuracy: 0.9158 - val_loss: 0.3240 - val_accuracy: 0.8876\n", + "Epoch 16/25\n", + "1719/1719 [==============================] - 4s 2ms/step - loss: 0.2311 - accuracy: 0.9176 - val_loss: 0.3201 - val_accuracy: 0.8900\n", + "Epoch 17/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.2234 - accuracy: 0.9207 - val_loss: 0.3233 - val_accuracy: 0.8906\n", + "Epoch 18/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.2248 - accuracy: 0.9194 - val_loss: 0.3187 - val_accuracy: 0.8938\n", + "Epoch 19/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.2236 - accuracy: 0.9213 - val_loss: 0.3227 - val_accuracy: 0.8904\n", + "Epoch 20/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.2229 - accuracy: 0.9221 - val_loss: 0.3208 - val_accuracy: 0.8916\n", + "Epoch 21/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.2194 - accuracy: 0.9233 - val_loss: 0.3204 - val_accuracy: 0.8914\n", + "Epoch 22/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.2163 - accuracy: 0.9231 - val_loss: 0.3177 - val_accuracy: 0.8942\n", + "Epoch 23/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.2129 - accuracy: 0.9243 - val_loss: 0.3192 - val_accuracy: 0.8902\n", + "Epoch 24/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.2077 - accuracy: 0.9276 - val_loss: 0.3210 - val_accuracy: 0.8894\n", + "Epoch 25/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.2102 - accuracy: 0.9257 - val_loss: 0.3210 - val_accuracy: 0.8926\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "We15bby8LOpV", + "outputId": "9c836093-b473-4b24-8eab-49e38af1f30e", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 303 + } + }, + "source": [ + "learning_rate = 0.01\n", + "decay = 1e-4\n", + "batch_size = 32\n", + "n_steps_per_epoch = len(X_train) // batch_size\n", + "epochs = np.arange(n_epochs)\n", + "lrs = learning_rate / (1 + decay * epochs * n_steps_per_epoch)\n", + "\n", + "plt.plot(epochs, lrs, \"o-\")\n", + "plt.axis([0, n_epochs - 1, 0, 0.01])\n", + "plt.xlabel(\"Epoch\")\n", + "plt.ylabel(\"Learning Rate\")\n", + "plt.title(\"Power Scheduling\", fontsize=14)\n", + "plt.grid(True)\n", + "plt.show()" + ], + "execution_count": 77, + "outputs": [ + { + "output_type": "display_data", + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "tags": [], + "needs_background": "light" + } + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "txkhYc0BLOpV" + }, + "source": [ + "### 지수 기반 스케줄링" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "m1fGE70_LOpV" + }, + "source": [ + "```lr = lr0 * 0.1**(epoch / s)```" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "MR9yQ-PhLOpV" + }, + "source": [ + "def exponential_decay_fn(epoch):\n", + " return 0.01 * 0.1**(epoch / 20)" + ], + "execution_count": 78, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "rkHd1gawLOpW" + }, + "source": [ + "def exponential_decay(lr0, s):\n", + " def exponential_decay_fn(epoch):\n", + " return lr0 * 0.1**(epoch / s)\n", + " return exponential_decay_fn\n", + "\n", + "exponential_decay_fn = exponential_decay(lr0=0.01, s=20)" + ], + "execution_count": 79, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "ZpgT0jO3LOpW" + }, + "source": [ + "model = keras.models.Sequential([\n", + " keras.layers.Flatten(input_shape=[28, 28]),\n", + " keras.layers.Dense(300, activation=\"selu\", kernel_initializer=\"lecun_normal\"),\n", + " keras.layers.Dense(100, activation=\"selu\", kernel_initializer=\"lecun_normal\"),\n", + " keras.layers.Dense(10, activation=\"softmax\")\n", + "])\n", + "model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=\"nadam\", metrics=[\"accuracy\"])\n", + "n_epochs = 25" + ], + "execution_count": 80, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "cQu05HxbLOpW", + "outputId": "2145961f-e67b-4e92-b9b3-1a28482bccf3", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "lr_scheduler = keras.callbacks.LearningRateScheduler(exponential_decay_fn)\n", + "history = model.fit(X_train_scaled, y_train, epochs=n_epochs,\n", + " validation_data=(X_valid_scaled, y_valid),\n", + " callbacks=[lr_scheduler])" + ], + "execution_count": 81, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Epoch 1/25\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 1.1106 - accuracy: 0.7344 - val_loss: 0.8633 - val_accuracy: 0.7328\n", + "Epoch 2/25\n", + "1719/1719 [==============================] - 4s 3ms/step - loss: 0.7067 - accuracy: 0.7824 - val_loss: 0.7695 - val_accuracy: 0.7576\n", + "Epoch 3/25\n", + "1719/1719 [==============================] - 4s 3ms/step - loss: 0.6652 - accuracy: 0.8040 - val_loss: 0.6658 - val_accuracy: 0.8108\n", + "Epoch 4/25\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.5463 - accuracy: 0.8319 - val_loss: 0.5714 - val_accuracy: 0.8504\n", + "Epoch 5/25\n", + "1719/1719 [==============================] - 4s 3ms/step - loss: 0.5098 - accuracy: 0.8400 - val_loss: 0.5460 - val_accuracy: 0.8506\n", + "Epoch 6/25\n", + "1719/1719 [==============================] - 4s 3ms/step - loss: 0.4484 - accuracy: 0.8567 - val_loss: 0.5718 - val_accuracy: 0.8620\n", + "Epoch 7/25\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.4328 - accuracy: 0.8703 - val_loss: 0.4861 - val_accuracy: 0.8628\n", + "Epoch 8/25\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.3765 - accuracy: 0.8795 - val_loss: 0.5380 - val_accuracy: 0.8580\n", + "Epoch 9/25\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.3632 - accuracy: 0.8844 - val_loss: 0.4584 - val_accuracy: 0.8704\n", + "Epoch 10/25\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.3122 - accuracy: 0.8944 - val_loss: 0.4571 - val_accuracy: 0.8796\n", + "Epoch 11/25\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.2846 - accuracy: 0.9025 - val_loss: 0.4326 - val_accuracy: 0.8786\n", + "Epoch 12/25\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.2725 - accuracy: 0.9079 - val_loss: 0.4818 - val_accuracy: 0.8758\n", + "Epoch 13/25\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.2495 - accuracy: 0.9137 - val_loss: 0.4428 - val_accuracy: 0.8822\n", + "Epoch 14/25\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.2275 - accuracy: 0.9207 - val_loss: 0.4235 - val_accuracy: 0.8786\n", + "Epoch 15/25\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.2211 - accuracy: 0.9237 - val_loss: 0.4484 - val_accuracy: 0.8858\n", + "Epoch 16/25\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.2007 - accuracy: 0.9297 - val_loss: 0.4862 - val_accuracy: 0.8778\n", + "Epoch 17/25\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.1796 - accuracy: 0.9362 - val_loss: 0.4971 - val_accuracy: 0.8846\n", + "Epoch 18/25\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.1682 - accuracy: 0.9414 - val_loss: 0.5270 - val_accuracy: 0.8832\n", + "Epoch 19/25\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.1587 - accuracy: 0.9450 - val_loss: 0.5135 - val_accuracy: 0.8902\n", + "Epoch 20/25\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.1439 - accuracy: 0.9486 - val_loss: 0.4861 - val_accuracy: 0.8860\n", + "Epoch 21/25\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.1377 - accuracy: 0.9510 - val_loss: 0.5442 - val_accuracy: 0.8870\n", + "Epoch 22/25\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.1350 - accuracy: 0.9546 - val_loss: 0.5270 - val_accuracy: 0.8890\n", + "Epoch 23/25\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.1185 - accuracy: 0.9589 - val_loss: 0.5708 - val_accuracy: 0.8870\n", + "Epoch 24/25\n", + "1719/1719 [==============================] - 4s 3ms/step - loss: 0.1078 - accuracy: 0.9637 - val_loss: 0.6175 - val_accuracy: 0.8864\n", + "Epoch 25/25\n", + "1719/1719 [==============================] - 4s 3ms/step - loss: 0.1110 - accuracy: 0.9623 - val_loss: 0.6164 - val_accuracy: 0.8856\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "o06AUsZjLOpW", + "outputId": "d9bf73fb-2b6d-4c02-eabd-61d2384be458", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 303 + } + }, + "source": [ + "plt.plot(history.epoch, history.history[\"lr\"], \"o-\")\n", + "plt.axis([0, n_epochs - 1, 0, 0.011])\n", + "plt.xlabel(\"Epoch\")\n", + "plt.ylabel(\"Learning Rate\")\n", + "plt.title(\"Exponential Scheduling\", fontsize=14)\n", + "plt.grid(True)\n", + "plt.show()" + ], + "execution_count": 82, + "outputs": [ + { + "output_type": "display_data", + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "tags": [], + "needs_background": "light" + } + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "y4leAQjZLOpW" + }, + "source": [ + "이 스케줄 함수는 두 번째 매개변수로 현재 학습률을 받을 수 있습니다:" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "S5QPkNZmLOpW" + }, + "source": [ + "def exponential_decay_fn(epoch, lr):\n", + " return lr * 0.1**(1 / 20)" + ], + "execution_count": 83, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "2MB6gpmtLOpX" + }, + "source": [ + "에포크가 아니라 반복마다 학습률을 업데이트하려면 사용자 정의 콜백 클래스를 작성해야 합니다:" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "qpWM_UXaLOpX", + "outputId": "332f517a-6770-4ead-e4f8-ac358bcb3adc", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "K = keras.backend\n", + "\n", + "class ExponentialDecay(keras.callbacks.Callback):\n", + " def __init__(self, s=40000):\n", + " super().__init__()\n", + " self.s = s\n", + "\n", + " def on_batch_begin(self, batch, logs=None):\n", + " # 노트: 에포크마다 `batch` 매개변수가 재설정됩니다\n", + " lr = K.get_value(self.model.optimizer.lr)\n", + " K.set_value(self.model.optimizer.lr, lr * 0.1**(1 / s))\n", + "\n", + " def on_epoch_end(self, epoch, logs=None):\n", + " logs = logs or {}\n", + " logs['lr'] = K.get_value(self.model.optimizer.lr)\n", + "\n", + "model = keras.models.Sequential([\n", + " keras.layers.Flatten(input_shape=[28, 28]),\n", + " keras.layers.Dense(300, activation=\"selu\", kernel_initializer=\"lecun_normal\"),\n", + " keras.layers.Dense(100, activation=\"selu\", kernel_initializer=\"lecun_normal\"),\n", + " keras.layers.Dense(10, activation=\"softmax\")\n", + "])\n", + "lr0 = 0.01\n", + "optimizer = keras.optimizers.Nadam(lr=lr0)\n", + "model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=optimizer, metrics=[\"accuracy\"])\n", + "n_epochs = 25\n", + "\n", + "s = 20 * len(X_train) // 32 # 20 에포크 동안 스텝 횟수 (배치 크기 = 32)\n", + "exp_decay = ExponentialDecay(s)\n", + "history = model.fit(X_train_scaled, y_train, epochs=n_epochs,\n", + " validation_data=(X_valid_scaled, y_valid),\n", + " callbacks=[exp_decay])" + ], + "execution_count": 84, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Epoch 1/25\n", + "1719/1719 [==============================] - 6s 3ms/step - loss: 1.1115 - accuracy: 0.7381 - val_loss: 0.7662 - val_accuracy: 0.7760\n", + "Epoch 2/25\n", + "1719/1719 [==============================] - 6s 3ms/step - loss: 0.6374 - accuracy: 0.7985 - val_loss: 0.5625 - val_accuracy: 0.8226\n", + "Epoch 3/25\n", + "1719/1719 [==============================] - 6s 3ms/step - loss: 0.5862 - accuracy: 0.8178 - val_loss: 0.8932 - val_accuracy: 0.7694\n", + "Epoch 4/25\n", + "1719/1719 [==============================] - 6s 3ms/step - loss: 0.5377 - accuracy: 0.8325 - val_loss: 0.4869 - val_accuracy: 0.8520\n", + "Epoch 5/25\n", + "1719/1719 [==============================] - 6s 3ms/step - loss: 0.4675 - accuracy: 0.8487 - val_loss: 0.4377 - val_accuracy: 0.8692\n", + "Epoch 6/25\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.4113 - accuracy: 0.8651 - val_loss: 0.4454 - val_accuracy: 0.8648\n", + "Epoch 7/25\n", + "1719/1719 [==============================] - 6s 3ms/step - loss: 0.3786 - accuracy: 0.8751 - val_loss: 0.5048 - val_accuracy: 0.8596\n", + "Epoch 8/25\n", + "1719/1719 [==============================] - 6s 3ms/step - loss: 0.3625 - accuracy: 0.8837 - val_loss: 0.4919 - val_accuracy: 0.8526\n", + "Epoch 9/25\n", + "1719/1719 [==============================] - 6s 3ms/step - loss: 0.3259 - accuracy: 0.8895 - val_loss: 0.4738 - val_accuracy: 0.8604\n", + "Epoch 10/25\n", + "1719/1719 [==============================] - 6s 3ms/step - loss: 0.3111 - accuracy: 0.8963 - val_loss: 0.4203 - val_accuracy: 0.8876\n", + "Epoch 11/25\n", + "1719/1719 [==============================] - 6s 3ms/step - loss: 0.2740 - accuracy: 0.9079 - val_loss: 0.4345 - val_accuracy: 0.8852\n", + "Epoch 12/25\n", + "1719/1719 [==============================] - 6s 3ms/step - loss: 0.2592 - accuracy: 0.9117 - val_loss: 0.4351 - val_accuracy: 0.8820\n", + "Epoch 13/25\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.2383 - accuracy: 0.9200 - val_loss: 0.4287 - val_accuracy: 0.8882\n", + "Epoch 14/25\n", + "1719/1719 [==============================] - 6s 3ms/step - loss: 0.2309 - accuracy: 0.9236 - val_loss: 0.4414 - val_accuracy: 0.8838\n", + "Epoch 15/25\n", + "1719/1719 [==============================] - 6s 3ms/step - loss: 0.2098 - accuracy: 0.9290 - val_loss: 0.4232 - val_accuracy: 0.8916\n", + "Epoch 16/25\n", + "1719/1719 [==============================] - 6s 3ms/step - loss: 0.1936 - accuracy: 0.9342 - val_loss: 0.4336 - val_accuracy: 0.8896\n", + "Epoch 17/25\n", + "1719/1719 [==============================] - 6s 3ms/step - loss: 0.1782 - accuracy: 0.9392 - val_loss: 0.4625 - val_accuracy: 0.8890\n", + "Epoch 18/25\n", + "1719/1719 [==============================] - 6s 3ms/step - loss: 0.1644 - accuracy: 0.9456 - val_loss: 0.4693 - val_accuracy: 0.8904\n", + "Epoch 19/25\n", + "1719/1719 [==============================] - 6s 3ms/step - loss: 0.1554 - accuracy: 0.9496 - val_loss: 0.4725 - val_accuracy: 0.8982\n", + "Epoch 20/25\n", + "1719/1719 [==============================] - 6s 3ms/step - loss: 0.1481 - accuracy: 0.9515 - val_loss: 0.5076 - val_accuracy: 0.8902\n", + "Epoch 21/25\n", + "1719/1719 [==============================] - 6s 3ms/step - loss: 0.1368 - accuracy: 0.9555 - val_loss: 0.5094 - val_accuracy: 0.8940\n", + "Epoch 22/25\n", + "1719/1719 [==============================] - 6s 3ms/step - loss: 0.1278 - accuracy: 0.9575 - val_loss: 0.5544 - val_accuracy: 0.8934\n", + "Epoch 23/25\n", + "1719/1719 [==============================] - 6s 3ms/step - loss: 0.1194 - accuracy: 0.9619 - val_loss: 0.5734 - val_accuracy: 0.8902\n", + "Epoch 24/25\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.1135 - accuracy: 0.9647 - val_loss: 0.5960 - val_accuracy: 0.8928\n", + "Epoch 25/25\n", + "1719/1719 [==============================] - 6s 3ms/step - loss: 0.1080 - accuracy: 0.9659 - val_loss: 0.6002 - val_accuracy: 0.8896\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "xsajfS10LOpX" + }, + "source": [ + "n_steps = n_epochs * len(X_train) // 32\n", + "steps = np.arange(n_steps)\n", + "lrs = lr0 * 0.1**(steps / s)" + ], + "execution_count": 85, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "scrolled": true, + "id": "nn7VunvILOpX", + "outputId": "a0f14eed-c080-434a-9f35-20e29c6d8745", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 303 + } + }, + "source": [ + "plt.plot(steps, lrs, \"-\", linewidth=2)\n", + "plt.axis([0, n_steps - 1, 0, lr0 * 1.1])\n", + "plt.xlabel(\"Batch\")\n", + "plt.ylabel(\"Learning Rate\")\n", + "plt.title(\"Exponential Scheduling (per batch)\", fontsize=14)\n", + "plt.grid(True)\n", + "plt.show()" + ], + "execution_count": 86, + "outputs": [ + { + "output_type": "display_data", + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "tags": [], + "needs_background": "light" + } + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "t_Zq20ztLOpX" + }, + "source": [ + "### 기간별 고정 스케줄링" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "up9WMAqWLOpX" + }, + "source": [ + "def piecewise_constant_fn(epoch):\n", + " if epoch < 5:\n", + " return 0.01\n", + " elif epoch < 15:\n", + " return 0.005\n", + " else:\n", + " return 0.001" + ], + "execution_count": 87, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "8isT8EI0LOpY" + }, + "source": [ + "def piecewise_constant(boundaries, values):\n", + " boundaries = np.array([0] + boundaries)\n", + " values = np.array(values)\n", + " def piecewise_constant_fn(epoch):\n", + " return values[np.argmax(boundaries > epoch) - 1]\n", + " return piecewise_constant_fn\n", + "\n", + "piecewise_constant_fn = piecewise_constant([5, 15], [0.01, 0.005, 0.001])" + ], + "execution_count": 88, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "-1w63apOLOpY", + "outputId": "d2623190-28dd-4400-a52d-4ad907c3d199", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "lr_scheduler = keras.callbacks.LearningRateScheduler(piecewise_constant_fn)\n", + "\n", + "model = keras.models.Sequential([\n", + " keras.layers.Flatten(input_shape=[28, 28]),\n", + " keras.layers.Dense(300, activation=\"selu\", kernel_initializer=\"lecun_normal\"),\n", + " keras.layers.Dense(100, activation=\"selu\", kernel_initializer=\"lecun_normal\"),\n", + " keras.layers.Dense(10, activation=\"softmax\")\n", + "])\n", + "model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=\"nadam\", metrics=[\"accuracy\"])\n", + "n_epochs = 25\n", + "history = model.fit(X_train_scaled, y_train, epochs=n_epochs,\n", + " validation_data=(X_valid_scaled, y_valid),\n", + " callbacks=[lr_scheduler])" + ], + "execution_count": 89, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Epoch 1/25\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 1.2318 - accuracy: 0.7142 - val_loss: 0.9157 - val_accuracy: 0.7232\n", + "Epoch 2/25\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.7203 - accuracy: 0.7776 - val_loss: 0.6123 - val_accuracy: 0.8198\n", + "Epoch 3/25\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.8270 - accuracy: 0.7619 - val_loss: 1.4205 - val_accuracy: 0.6206\n", + "Epoch 4/25\n", + "1719/1719 [==============================] - 4s 3ms/step - loss: 0.8446 - accuracy: 0.7524 - val_loss: 0.9360 - val_accuracy: 0.7016\n", + "Epoch 5/25\n", + "1719/1719 [==============================] - 4s 3ms/step - loss: 0.8689 - accuracy: 0.7226 - val_loss: 0.8600 - val_accuracy: 0.7660\n", + "Epoch 6/25\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.6233 - accuracy: 0.7984 - val_loss: 0.6819 - val_accuracy: 0.8138\n", + "Epoch 7/25\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.5443 - accuracy: 0.8315 - val_loss: 0.5935 - val_accuracy: 0.8408\n", + "Epoch 8/25\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.5136 - accuracy: 0.8388 - val_loss: 0.7235 - val_accuracy: 0.7882\n", + "Epoch 9/25\n", + "1719/1719 [==============================] - 4s 3ms/step - loss: 0.5320 - accuracy: 0.8341 - val_loss: 0.6815 - val_accuracy: 0.7836\n", + "Epoch 10/25\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.4771 - accuracy: 0.8513 - val_loss: 0.6984 - val_accuracy: 0.8196\n", + "Epoch 11/25\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.4934 - accuracy: 0.8493 - val_loss: 0.6040 - val_accuracy: 0.8466\n", + "Epoch 12/25\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.4599 - accuracy: 0.8600 - val_loss: 0.6230 - val_accuracy: 0.8420\n", + "Epoch 13/25\n", + "1719/1719 [==============================] - 4s 3ms/step - loss: 0.4432 - accuracy: 0.8611 - val_loss: 0.5845 - val_accuracy: 0.8510\n", + "Epoch 14/25\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.4706 - accuracy: 0.8597 - val_loss: 0.6285 - val_accuracy: 0.8418\n", + "Epoch 15/25\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.4539 - accuracy: 0.8629 - val_loss: 0.6618 - val_accuracy: 0.8432\n", + "Epoch 16/25\n", + "1719/1719 [==============================] - 4s 3ms/step - loss: 0.3486 - accuracy: 0.8865 - val_loss: 0.4722 - val_accuracy: 0.8680\n", + "Epoch 17/25\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.2977 - accuracy: 0.9014 - val_loss: 0.4846 - val_accuracy: 0.8630\n", + "Epoch 18/25\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.2881 - accuracy: 0.9052 - val_loss: 0.5084 - val_accuracy: 0.8698\n", + "Epoch 19/25\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.2758 - accuracy: 0.9102 - val_loss: 0.4696 - val_accuracy: 0.8700\n", + "Epoch 20/25\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.2721 - accuracy: 0.9082 - val_loss: 0.4798 - val_accuracy: 0.8736\n", + "Epoch 21/25\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.2666 - accuracy: 0.9129 - val_loss: 0.5165 - val_accuracy: 0.8674\n", + "Epoch 22/25\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.2562 - accuracy: 0.9160 - val_loss: 0.5196 - val_accuracy: 0.8728\n", + "Epoch 23/25\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.2504 - accuracy: 0.9179 - val_loss: 0.5501 - val_accuracy: 0.8680\n", + "Epoch 24/25\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.2419 - accuracy: 0.9202 - val_loss: 0.6129 - val_accuracy: 0.8692\n", + "Epoch 25/25\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.2459 - accuracy: 0.9209 - val_loss: 0.5537 - val_accuracy: 0.8682\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "iP46umHULOpY", + "outputId": "039e04fb-2e51-4fc0-c056-db18aa65606b", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 303 + } + }, + "source": [ + "plt.plot(history.epoch, [piecewise_constant_fn(epoch) for epoch in history.epoch], \"o-\")\n", + "plt.axis([0, n_epochs - 1, 0, 0.011])\n", + "plt.xlabel(\"Epoch\")\n", + "plt.ylabel(\"Learning Rate\")\n", + "plt.title(\"Piecewise Constant Scheduling\", fontsize=14)\n", + "plt.grid(True)\n", + "plt.show()" + ], + "execution_count": 90, + "outputs": [ + { + "output_type": "display_data", + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "tags": [], + "needs_background": "light" + } + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "PcBjd75fLOpY" + }, + "source": [ + "### 성능 기반 스케줄링" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "JBL86sCNLOpY" + }, + "source": [ + "tf.random.set_seed(42)\n", + "np.random.seed(42)" + ], + "execution_count": 91, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "e_Bc-HBcLOpY", + "outputId": "2867d0a7-dc95-4878-e655-7e0934e01bc7", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "lr_scheduler = keras.callbacks.ReduceLROnPlateau(factor=0.5, patience=5)\n", + "\n", + "model = keras.models.Sequential([\n", + " keras.layers.Flatten(input_shape=[28, 28]),\n", + " keras.layers.Dense(300, activation=\"selu\", kernel_initializer=\"lecun_normal\"),\n", + " keras.layers.Dense(100, activation=\"selu\", kernel_initializer=\"lecun_normal\"),\n", + " keras.layers.Dense(10, activation=\"softmax\")\n", + "])\n", + "optimizer = keras.optimizers.SGD(lr=0.02, momentum=0.9)\n", + "model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=optimizer, metrics=[\"accuracy\"])\n", + "n_epochs = 25\n", + "history = model.fit(X_train_scaled, y_train, epochs=n_epochs,\n", + " validation_data=(X_valid_scaled, y_valid),\n", + " callbacks=[lr_scheduler])" + ], + "execution_count": 92, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Epoch 1/25\n", + "1719/1719 [==============================] - 4s 2ms/step - loss: 0.7100 - accuracy: 0.7765 - val_loss: 0.4821 - val_accuracy: 0.8490\n", + "Epoch 2/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.4904 - accuracy: 0.8387 - val_loss: 0.5661 - val_accuracy: 0.8390\n", + "Epoch 3/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.4964 - accuracy: 0.8431 - val_loss: 0.5486 - val_accuracy: 0.8502\n", + "Epoch 4/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.5018 - accuracy: 0.8470 - val_loss: 0.4818 - val_accuracy: 0.8566\n", + "Epoch 5/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.5284 - accuracy: 0.8433 - val_loss: 0.5047 - val_accuracy: 0.8502\n", + "Epoch 6/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.5061 - accuracy: 0.8549 - val_loss: 0.5721 - val_accuracy: 0.8520\n", + "Epoch 7/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.5315 - accuracy: 0.8530 - val_loss: 0.5131 - val_accuracy: 0.8626\n", + "Epoch 8/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.5037 - accuracy: 0.8578 - val_loss: 0.6451 - val_accuracy: 0.8064\n", + "Epoch 9/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.4976 - accuracy: 0.8588 - val_loss: 0.5515 - val_accuracy: 0.8620\n", + "Epoch 10/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.3127 - accuracy: 0.8938 - val_loss: 0.4293 - val_accuracy: 0.8776\n", + "Epoch 11/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.2376 - accuracy: 0.9126 - val_loss: 0.4315 - val_accuracy: 0.8856\n", + "Epoch 12/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.2253 - accuracy: 0.9182 - val_loss: 0.4620 - val_accuracy: 0.8760\n", + "Epoch 13/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.2032 - accuracy: 0.9240 - val_loss: 0.4370 - val_accuracy: 0.8890\n", + "Epoch 14/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.1862 - accuracy: 0.9286 - val_loss: 0.5031 - val_accuracy: 0.8702\n", + "Epoch 15/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.1882 - accuracy: 0.9294 - val_loss: 0.4409 - val_accuracy: 0.8862\n", + "Epoch 16/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.1423 - accuracy: 0.9449 - val_loss: 0.4379 - val_accuracy: 0.8930\n", + "Epoch 17/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.1136 - accuracy: 0.9557 - val_loss: 0.4688 - val_accuracy: 0.8924\n", + "Epoch 18/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.1068 - accuracy: 0.9582 - val_loss: 0.4734 - val_accuracy: 0.8876\n", + "Epoch 19/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.1023 - accuracy: 0.9604 - val_loss: 0.4930 - val_accuracy: 0.8938\n", + "Epoch 20/25\n", + "1719/1719 [==============================] - 4s 2ms/step - loss: 0.0986 - accuracy: 0.9611 - val_loss: 0.5015 - val_accuracy: 0.8932\n", + "Epoch 21/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.0803 - accuracy: 0.9684 - val_loss: 0.5080 - val_accuracy: 0.8966\n", + "Epoch 22/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.0751 - accuracy: 0.9712 - val_loss: 0.5266 - val_accuracy: 0.8964\n", + "Epoch 23/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.0700 - accuracy: 0.9742 - val_loss: 0.5284 - val_accuracy: 0.8946\n", + "Epoch 24/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.0654 - accuracy: 0.9759 - val_loss: 0.5423 - val_accuracy: 0.8932\n", + "Epoch 25/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.0632 - accuracy: 0.9770 - val_loss: 0.5530 - val_accuracy: 0.8970\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "8pgrsYWWLOpZ", + "outputId": "011acd77-c574-43e2-f8a3-bfa865f7f146", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 303 + } + }, + "source": [ + "plt.plot(history.epoch, history.history[\"lr\"], \"bo-\")\n", + "plt.xlabel(\"Epoch\")\n", + "plt.ylabel(\"Learning Rate\", color='b')\n", + "plt.tick_params('y', colors='b')\n", + "plt.gca().set_xlim(0, n_epochs - 1)\n", + "plt.grid(True)\n", + "\n", + "ax2 = plt.gca().twinx()\n", + "ax2.plot(history.epoch, history.history[\"val_loss\"], \"r^-\")\n", + "ax2.set_ylabel('Validation Loss', color='r')\n", + "ax2.tick_params('y', colors='r')\n", + "\n", + "plt.title(\"Reduce LR on Plateau\", fontsize=14)\n", + "plt.show()" + ], + "execution_count": 93, + "outputs": [ + { + "output_type": "display_data", + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "tags": [], + "needs_background": "light" + } + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "HezOhTgrLOpZ" + }, + "source": [ + "### tf.keras 스케줄러" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "mM6gXVz4LOpZ", + "outputId": "4af8e052-d12e-464a-9c5a-922ae9842c13", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "model = keras.models.Sequential([\n", + " keras.layers.Flatten(input_shape=[28, 28]),\n", + " keras.layers.Dense(300, activation=\"selu\", kernel_initializer=\"lecun_normal\"),\n", + " keras.layers.Dense(100, activation=\"selu\", kernel_initializer=\"lecun_normal\"),\n", + " keras.layers.Dense(10, activation=\"softmax\")\n", + "])\n", + "s = 20 * len(X_train) // 32 # number of steps in 20 epochs (batch size = 32)\n", + "learning_rate = keras.optimizers.schedules.ExponentialDecay(0.01, s, 0.1)\n", + "optimizer = keras.optimizers.SGD(learning_rate)\n", + "model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=optimizer, metrics=[\"accuracy\"])\n", + "n_epochs = 25\n", + "history = model.fit(X_train_scaled, y_train, epochs=n_epochs,\n", + " validation_data=(X_valid_scaled, y_valid))" + ], + "execution_count": 94, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Epoch 1/25\n", + "1719/1719 [==============================] - 4s 2ms/step - loss: 0.5995 - accuracy: 0.7923 - val_loss: 0.4092 - val_accuracy: 0.8604\n", + "Epoch 2/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.3890 - accuracy: 0.8613 - val_loss: 0.3737 - val_accuracy: 0.8694\n", + "Epoch 3/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.3531 - accuracy: 0.8774 - val_loss: 0.3731 - val_accuracy: 0.8692\n", + "Epoch 4/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.3296 - accuracy: 0.8813 - val_loss: 0.3492 - val_accuracy: 0.8800\n", + "Epoch 5/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.3176 - accuracy: 0.8866 - val_loss: 0.3430 - val_accuracy: 0.8792\n", + "Epoch 6/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.2928 - accuracy: 0.8954 - val_loss: 0.3414 - val_accuracy: 0.8812\n", + "Epoch 7/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.2852 - accuracy: 0.8985 - val_loss: 0.3356 - val_accuracy: 0.8816\n", + "Epoch 8/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.2713 - accuracy: 0.9039 - val_loss: 0.3365 - val_accuracy: 0.8814\n", + "Epoch 9/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.2713 - accuracy: 0.9044 - val_loss: 0.3266 - val_accuracy: 0.8860\n", + "Epoch 10/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.2569 - accuracy: 0.9084 - val_loss: 0.3240 - val_accuracy: 0.8848\n", + "Epoch 11/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.2500 - accuracy: 0.9115 - val_loss: 0.3252 - val_accuracy: 0.8866\n", + "Epoch 12/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.2452 - accuracy: 0.9147 - val_loss: 0.3302 - val_accuracy: 0.8812\n", + "Epoch 13/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.2408 - accuracy: 0.9155 - val_loss: 0.3219 - val_accuracy: 0.8858\n", + "Epoch 14/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.2377 - accuracy: 0.9159 - val_loss: 0.3223 - val_accuracy: 0.8864\n", + "Epoch 15/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.2377 - accuracy: 0.9171 - val_loss: 0.3209 - val_accuracy: 0.8878\n", + "Epoch 16/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.2316 - accuracy: 0.9192 - val_loss: 0.3185 - val_accuracy: 0.8896\n", + "Epoch 17/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.2264 - accuracy: 0.9213 - val_loss: 0.3198 - val_accuracy: 0.8886\n", + "Epoch 18/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.2283 - accuracy: 0.9188 - val_loss: 0.3169 - val_accuracy: 0.8902\n", + "Epoch 19/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.2283 - accuracy: 0.9203 - val_loss: 0.3198 - val_accuracy: 0.8894\n", + "Epoch 20/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.2287 - accuracy: 0.9218 - val_loss: 0.3170 - val_accuracy: 0.8902\n", + "Epoch 21/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.2264 - accuracy: 0.9209 - val_loss: 0.3180 - val_accuracy: 0.8904\n", + "Epoch 22/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.2256 - accuracy: 0.9200 - val_loss: 0.3164 - val_accuracy: 0.8914\n", + "Epoch 23/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.2222 - accuracy: 0.9233 - val_loss: 0.3171 - val_accuracy: 0.8902\n", + "Epoch 24/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.2180 - accuracy: 0.9241 - val_loss: 0.3166 - val_accuracy: 0.8898\n", + "Epoch 25/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.2221 - accuracy: 0.9234 - val_loss: 0.3165 - val_accuracy: 0.8914\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "e7s1E4aILOpZ" + }, + "source": [ + "구간별 고정 스케줄링은 다음을 사용하세요:" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "mB1XJs34LOpa" + }, + "source": [ + "learning_rate = keras.optimizers.schedules.PiecewiseConstantDecay(\n", + " boundaries=[5. * n_steps_per_epoch, 15. * n_steps_per_epoch],\n", + " values=[0.01, 0.005, 0.001])" + ], + "execution_count": 95, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "-QgJ_wlTLOpa" + }, + "source": [ + "### 1사이클 스케줄링" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "v2ZxFRyHLOpa" + }, + "source": [ + "K = keras.backend\n", + "\n", + "class ExponentialLearningRate(keras.callbacks.Callback):\n", + " def __init__(self, factor):\n", + " self.factor = factor\n", + " self.rates = []\n", + " self.losses = []\n", + " def on_batch_end(self, batch, logs):\n", + " self.rates.append(K.get_value(self.model.optimizer.lr))\n", + " self.losses.append(logs[\"loss\"])\n", + " K.set_value(self.model.optimizer.lr, self.model.optimizer.lr * self.factor)\n", + "\n", + "def find_learning_rate(model, X, y, epochs=1, batch_size=32, min_rate=10**-5, max_rate=10):\n", + " init_weights = model.get_weights()\n", + " iterations = len(X) // batch_size * epochs\n", + " factor = np.exp(np.log(max_rate / min_rate) / iterations)\n", + " init_lr = K.get_value(model.optimizer.lr)\n", + " K.set_value(model.optimizer.lr, min_rate)\n", + " exp_lr = ExponentialLearningRate(factor)\n", + " history = model.fit(X, y, epochs=epochs, batch_size=batch_size,\n", + " callbacks=[exp_lr])\n", + " K.set_value(model.optimizer.lr, init_lr)\n", + " model.set_weights(init_weights)\n", + " return exp_lr.rates, exp_lr.losses\n", + "\n", + "def plot_lr_vs_loss(rates, losses):\n", + " plt.plot(rates, losses)\n", + " plt.gca().set_xscale('log')\n", + " plt.hlines(min(losses), min(rates), max(rates))\n", + " plt.axis([min(rates), max(rates), min(losses), (losses[0] + min(losses)) / 2])\n", + " plt.xlabel(\"Learning rate\")\n", + " plt.ylabel(\"Loss\")" + ], + "execution_count": 96, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "tmWjq-nILOpa" + }, + "source": [ + "tf.random.set_seed(42)\n", + "np.random.seed(42)\n", + "\n", + "model = keras.models.Sequential([\n", + " keras.layers.Flatten(input_shape=[28, 28]),\n", + " keras.layers.Dense(300, activation=\"selu\", kernel_initializer=\"lecun_normal\"),\n", + " keras.layers.Dense(100, activation=\"selu\", kernel_initializer=\"lecun_normal\"),\n", + " keras.layers.Dense(10, activation=\"softmax\")\n", + "])\n", + "model.compile(loss=\"sparse_categorical_crossentropy\",\n", + " optimizer=keras.optimizers.SGD(lr=1e-3),\n", + " metrics=[\"accuracy\"])" + ], + "execution_count": 97, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "N7dpX40rLOpa", + "outputId": "fcea5e32-513b-4269-960e-939e0f398353", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 345 + } + }, + "source": [ + "batch_size = 128\n", + "rates, losses = find_learning_rate(model, X_train_scaled, y_train, epochs=1, batch_size=batch_size)\n", + "plot_lr_vs_loss(rates, losses)" + ], + "execution_count": 98, + "outputs": [ + { + "output_type": "stream", + "text": [ + " 1/430 [..............................] - ETA: 1:35 - loss: 2.6624 - accuracy: 0.1406WARNING:tensorflow:Callback method `on_train_batch_end` is slow compared to the batch time (batch time: 0.0016s vs `on_train_batch_end` time: 0.0017s). Check your callbacks.\n", + "430/430 [==============================] - 1s 3ms/step - loss: nan - accuracy: 0.3135\n" + ], + "name": "stdout" + }, + { + "output_type": "display_data", + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "tags": [], + "needs_background": "light" + } + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "20u-WHSyLOpc" + }, + "source": [ + "class OneCycleScheduler(keras.callbacks.Callback):\n", + " def __init__(self, iterations, max_rate, start_rate=None,\n", + " last_iterations=None, last_rate=None):\n", + " self.iterations = iterations\n", + " self.max_rate = max_rate\n", + " self.start_rate = start_rate or max_rate / 10\n", + " self.last_iterations = last_iterations or iterations // 10 + 1\n", + " self.half_iteration = (iterations - self.last_iterations) // 2\n", + " self.last_rate = last_rate or self.start_rate / 1000\n", + " self.iteration = 0\n", + " def _interpolate(self, iter1, iter2, rate1, rate2):\n", + " return ((rate2 - rate1) * (self.iteration - iter1)\n", + " / (iter2 - iter1) + rate1)\n", + " def on_batch_begin(self, batch, logs):\n", + " if self.iteration < self.half_iteration:\n", + " rate = self._interpolate(0, self.half_iteration, self.start_rate, self.max_rate)\n", + " elif self.iteration < 2 * self.half_iteration:\n", + " rate = self._interpolate(self.half_iteration, 2 * self.half_iteration,\n", + " self.max_rate, self.start_rate)\n", + " else:\n", + " rate = self._interpolate(2 * self.half_iteration, self.iterations,\n", + " self.start_rate, self.last_rate)\n", + " rate = max(rate, self.last_rate)\n", + " self.iteration += 1\n", + " K.set_value(self.model.optimizer.lr, rate)" + ], + "execution_count": 99, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "3T9DaJx8LOpc", + "outputId": "8bc95003-5f2e-43bc-f8f0-3b729625f0a0", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "n_epochs = 25\n", + "onecycle = OneCycleScheduler(len(X_train) // batch_size * n_epochs, max_rate=0.05)\n", + "history = model.fit(X_train_scaled, y_train, epochs=n_epochs, batch_size=batch_size,\n", + " validation_data=(X_valid_scaled, y_valid),\n", + " callbacks=[onecycle])" + ], + "execution_count": 100, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Epoch 1/25\n", + "430/430 [==============================] - 1s 3ms/step - loss: 0.6572 - accuracy: 0.7739 - val_loss: 0.4871 - val_accuracy: 0.8336\n", + "Epoch 2/25\n", + "430/430 [==============================] - 1s 3ms/step - loss: 0.4581 - accuracy: 0.8396 - val_loss: 0.4274 - val_accuracy: 0.8526\n", + "Epoch 3/25\n", + "430/430 [==============================] - 1s 2ms/step - loss: 0.4121 - accuracy: 0.8546 - val_loss: 0.4114 - val_accuracy: 0.8584\n", + "Epoch 4/25\n", + "430/430 [==============================] - 1s 3ms/step - loss: 0.3837 - accuracy: 0.8641 - val_loss: 0.3871 - val_accuracy: 0.8688\n", + "Epoch 5/25\n", + "430/430 [==============================] - 1s 3ms/step - loss: 0.3639 - accuracy: 0.8717 - val_loss: 0.3765 - val_accuracy: 0.8680\n", + "Epoch 6/25\n", + "430/430 [==============================] - 1s 2ms/step - loss: 0.3456 - accuracy: 0.8774 - val_loss: 0.3744 - val_accuracy: 0.8706\n", + "Epoch 7/25\n", + "430/430 [==============================] - 1s 3ms/step - loss: 0.3329 - accuracy: 0.8809 - val_loss: 0.3634 - val_accuracy: 0.8706\n", + "Epoch 8/25\n", + "430/430 [==============================] - 1s 3ms/step - loss: 0.3184 - accuracy: 0.8858 - val_loss: 0.3949 - val_accuracy: 0.8612\n", + "Epoch 9/25\n", + "430/430 [==============================] - 1s 3ms/step - loss: 0.3065 - accuracy: 0.8891 - val_loss: 0.3487 - val_accuracy: 0.8772\n", + "Epoch 10/25\n", + "430/430 [==============================] - 1s 2ms/step - loss: 0.2944 - accuracy: 0.8922 - val_loss: 0.3398 - val_accuracy: 0.8808\n", + "Epoch 11/25\n", + "430/430 [==============================] - 1s 3ms/step - loss: 0.2839 - accuracy: 0.8960 - val_loss: 0.3456 - val_accuracy: 0.8820\n", + "Epoch 12/25\n", + "430/430 [==============================] - 1s 3ms/step - loss: 0.2707 - accuracy: 0.9026 - val_loss: 0.3652 - val_accuracy: 0.8694\n", + "Epoch 13/25\n", + "430/430 [==============================] - 1s 2ms/step - loss: 0.2537 - accuracy: 0.9081 - val_loss: 0.3364 - val_accuracy: 0.8830\n", + "Epoch 14/25\n", + "430/430 [==============================] - 1s 2ms/step - loss: 0.2403 - accuracy: 0.9137 - val_loss: 0.3464 - val_accuracy: 0.8808\n", + "Epoch 15/25\n", + "430/430 [==============================] - 1s 3ms/step - loss: 0.2279 - accuracy: 0.9181 - val_loss: 0.3261 - val_accuracy: 0.8848\n", + "Epoch 16/25\n", + "430/430 [==============================] - 1s 3ms/step - loss: 0.2160 - accuracy: 0.9232 - val_loss: 0.3297 - val_accuracy: 0.8844\n", + "Epoch 17/25\n", + "430/430 [==============================] - 1s 3ms/step - loss: 0.2062 - accuracy: 0.9269 - val_loss: 0.3357 - val_accuracy: 0.8862\n", + "Epoch 18/25\n", + "430/430 [==============================] - 1s 2ms/step - loss: 0.1979 - accuracy: 0.9306 - val_loss: 0.3251 - val_accuracy: 0.8894\n", + "Epoch 19/25\n", + "430/430 [==============================] - 1s 3ms/step - loss: 0.1893 - accuracy: 0.9340 - val_loss: 0.3234 - val_accuracy: 0.8908\n", + "Epoch 20/25\n", + "430/430 [==============================] - 1s 3ms/step - loss: 0.1821 - accuracy: 0.9365 - val_loss: 0.3227 - val_accuracy: 0.8932\n", + "Epoch 21/25\n", + "430/430 [==============================] - 1s 3ms/step - loss: 0.1753 - accuracy: 0.9403 - val_loss: 0.3223 - val_accuracy: 0.8918\n", + "Epoch 22/25\n", + "430/430 [==============================] - 1s 3ms/step - loss: 0.1701 - accuracy: 0.9417 - val_loss: 0.3187 - val_accuracy: 0.8946\n", + "Epoch 23/25\n", + "430/430 [==============================] - 1s 3ms/step - loss: 0.1656 - accuracy: 0.9438 - val_loss: 0.3191 - val_accuracy: 0.8940\n", + "Epoch 24/25\n", + "430/430 [==============================] - 1s 3ms/step - loss: 0.1628 - accuracy: 0.9457 - val_loss: 0.3181 - val_accuracy: 0.8934\n", + "Epoch 25/25\n", + "430/430 [==============================] - 1s 3ms/step - loss: 0.1611 - accuracy: 0.9461 - val_loss: 0.3174 - val_accuracy: 0.8942\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "i8jPCyOvLOpc" + }, + "source": [ + "# 규제를 사용해 과대적합 피하기" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "bNDKKZZxLOpc" + }, + "source": [ + "## $\\ell_1$과 $\\ell_2$ 규제" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "58lwkbU-LOpc" + }, + "source": [ + "layer = keras.layers.Dense(100, activation=\"elu\",\n", + " kernel_initializer=\"he_normal\",\n", + " kernel_regularizer=keras.regularizers.l2(0.01))\n", + "# or l1(0.1) for ℓ1 regularization with a factor or 0.1\n", + "# or l1_l2(0.1, 0.01) for both ℓ1 and ℓ2 regularization, with factors 0.1 and 0.01 respectively" + ], + "execution_count": 101, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "vcZsRVXwLOpd", + "outputId": "acd092e5-9485-48bc-824d-dac90b171290", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "model = keras.models.Sequential([\n", + " keras.layers.Flatten(input_shape=[28, 28]),\n", + " keras.layers.Dense(300, activation=\"elu\",\n", + " kernel_initializer=\"he_normal\",\n", + " kernel_regularizer=keras.regularizers.l2(0.01)),\n", + " keras.layers.Dense(100, activation=\"elu\",\n", + " kernel_initializer=\"he_normal\",\n", + " kernel_regularizer=keras.regularizers.l2(0.01)),\n", + " keras.layers.Dense(10, activation=\"softmax\",\n", + " kernel_regularizer=keras.regularizers.l2(0.01))\n", + "])\n", + "model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=\"nadam\", metrics=[\"accuracy\"])\n", + "n_epochs = 2\n", + "history = model.fit(X_train_scaled, y_train, epochs=n_epochs,\n", + " validation_data=(X_valid_scaled, y_valid))" + ], + "execution_count": 102, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Epoch 1/2\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 3.2189 - accuracy: 0.7967 - val_loss: 0.7169 - val_accuracy: 0.8340\n", + "Epoch 2/2\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.7280 - accuracy: 0.8247 - val_loss: 0.6850 - val_accuracy: 0.8376\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "XuNxc8x3LOpd", + "outputId": "baae0e04-7717-402f-eb20-9c6bc906d8a7", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "from functools import partial\n", + "\n", + "RegularizedDense = partial(keras.layers.Dense,\n", + " activation=\"elu\",\n", + " kernel_initializer=\"he_normal\",\n", + " kernel_regularizer=keras.regularizers.l2(0.01))\n", + "\n", + "model = keras.models.Sequential([\n", + " keras.layers.Flatten(input_shape=[28, 28]),\n", + " RegularizedDense(300),\n", + " RegularizedDense(100),\n", + " RegularizedDense(10, activation=\"softmax\")\n", + "])\n", + "model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=\"nadam\", metrics=[\"accuracy\"])\n", + "n_epochs = 2\n", + "history = model.fit(X_train_scaled, y_train, epochs=n_epochs,\n", + " validation_data=(X_valid_scaled, y_valid))" + ], + "execution_count": 103, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Epoch 1/2\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 3.2911 - accuracy: 0.7924 - val_loss: 0.7218 - val_accuracy: 0.8310\n", + "Epoch 2/2\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.7282 - accuracy: 0.8245 - val_loss: 0.6826 - val_accuracy: 0.8382\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "xdTO1zZbLOpd" + }, + "source": [ + "## 드롭아웃" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "ppZX7nrcLOpd", + "outputId": "78c2ea5f-c378-48ab-d877-9bad958ec21b", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "model = keras.models.Sequential([\n", + " keras.layers.Flatten(input_shape=[28, 28]),\n", + " keras.layers.Dropout(rate=0.2),\n", + " keras.layers.Dense(300, activation=\"elu\", kernel_initializer=\"he_normal\"),\n", + " keras.layers.Dropout(rate=0.2),\n", + " keras.layers.Dense(100, activation=\"elu\", kernel_initializer=\"he_normal\"),\n", + " keras.layers.Dropout(rate=0.2),\n", + " keras.layers.Dense(10, activation=\"softmax\")\n", + "])\n", + "model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=\"nadam\", metrics=[\"accuracy\"])\n", + "n_epochs = 2\n", + "history = model.fit(X_train_scaled, y_train, epochs=n_epochs,\n", + " validation_data=(X_valid_scaled, y_valid))" + ], + "execution_count": 104, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Epoch 1/2\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.7611 - accuracy: 0.7576 - val_loss: 0.3730 - val_accuracy: 0.8644\n", + "Epoch 2/2\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.4306 - accuracy: 0.8403 - val_loss: 0.3408 - val_accuracy: 0.8726\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "FT_W0H3rLOpd" + }, + "source": [ + "## 알파 드롭아웃" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "BPvHOOYcLOpd" + }, + "source": [ + "tf.random.set_seed(42)\n", + "np.random.seed(42)" + ], + "execution_count": 105, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "CB_Oe24YLOpe", + "outputId": "15e5a530-ca81-4bca-ac0a-cc2184ea6bd3", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "model = keras.models.Sequential([\n", + " keras.layers.Flatten(input_shape=[28, 28]),\n", + " keras.layers.AlphaDropout(rate=0.2),\n", + " keras.layers.Dense(300, activation=\"selu\", kernel_initializer=\"lecun_normal\"),\n", + " keras.layers.AlphaDropout(rate=0.2),\n", + " keras.layers.Dense(100, activation=\"selu\", kernel_initializer=\"lecun_normal\"),\n", + " keras.layers.AlphaDropout(rate=0.2),\n", + " keras.layers.Dense(10, activation=\"softmax\")\n", + "])\n", + "optimizer = keras.optimizers.SGD(lr=0.01, momentum=0.9, nesterov=True)\n", + "model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=optimizer, metrics=[\"accuracy\"])\n", + "n_epochs = 20\n", + "history = model.fit(X_train_scaled, y_train, epochs=n_epochs,\n", + " validation_data=(X_valid_scaled, y_valid))" + ], + "execution_count": 106, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Epoch 1/20\n", + "1719/1719 [==============================] - 4s 2ms/step - loss: 0.8023 - accuracy: 0.7146 - val_loss: 0.5778 - val_accuracy: 0.8446\n", + "Epoch 2/20\n", + "1719/1719 [==============================] - 4s 2ms/step - loss: 0.5662 - accuracy: 0.7904 - val_loss: 0.5146 - val_accuracy: 0.8536\n", + "Epoch 3/20\n", + "1719/1719 [==============================] - 4s 2ms/step - loss: 0.5259 - accuracy: 0.8057 - val_loss: 0.4904 - val_accuracy: 0.8598\n", + "Epoch 4/20\n", + "1719/1719 [==============================] - 4s 2ms/step - loss: 0.5127 - accuracy: 0.8093 - val_loss: 0.4837 - val_accuracy: 0.8596\n", + "Epoch 5/20\n", + "1719/1719 [==============================] - 4s 2ms/step - loss: 0.5074 - accuracy: 0.8123 - val_loss: 0.4244 - val_accuracy: 0.8692\n", + "Epoch 6/20\n", + "1719/1719 [==============================] - 4s 2ms/step - loss: 0.4787 - accuracy: 0.8205 - val_loss: 0.4594 - val_accuracy: 0.8640\n", + "Epoch 7/20\n", + "1719/1719 [==============================] - 4s 2ms/step - loss: 0.4721 - accuracy: 0.8267 - val_loss: 0.4696 - val_accuracy: 0.8608\n", + "Epoch 8/20\n", + "1719/1719 [==============================] - 4s 2ms/step - loss: 0.4575 - accuracy: 0.8306 - val_loss: 0.4156 - val_accuracy: 0.8708\n", + "Epoch 9/20\n", + "1719/1719 [==============================] - 4s 2ms/step - loss: 0.4626 - accuracy: 0.8275 - val_loss: 0.4342 - val_accuracy: 0.8734\n", + "Epoch 10/20\n", + "1719/1719 [==============================] - 4s 2ms/step - loss: 0.4550 - accuracy: 0.8323 - val_loss: 0.4300 - val_accuracy: 0.8656\n", + "Epoch 11/20\n", + "1719/1719 [==============================] - 4s 2ms/step - loss: 0.4453 - accuracy: 0.8340 - val_loss: 0.4266 - val_accuracy: 0.8746\n", + "Epoch 12/20\n", + "1719/1719 [==============================] - 4s 2ms/step - loss: 0.4420 - accuracy: 0.8349 - val_loss: 0.5200 - val_accuracy: 0.8574\n", + "Epoch 13/20\n", + "1719/1719 [==============================] - 4s 2ms/step - loss: 0.4344 - accuracy: 0.8393 - val_loss: 0.4301 - val_accuracy: 0.8740\n", + "Epoch 14/20\n", + "1719/1719 [==============================] - 4s 2ms/step - loss: 0.4307 - accuracy: 0.8389 - val_loss: 0.4462 - val_accuracy: 0.8656\n", + "Epoch 15/20\n", + "1719/1719 [==============================] - 4s 2ms/step - loss: 0.4313 - accuracy: 0.8374 - val_loss: 0.4415 - val_accuracy: 0.8658\n", + "Epoch 16/20\n", + "1719/1719 [==============================] - 4s 2ms/step - loss: 0.4270 - accuracy: 0.8391 - val_loss: 0.4181 - val_accuracy: 0.8776\n", + "Epoch 17/20\n", + "1719/1719 [==============================] - 4s 2ms/step - loss: 0.4207 - accuracy: 0.8426 - val_loss: 0.5417 - val_accuracy: 0.8564\n", + "Epoch 18/20\n", + "1719/1719 [==============================] - 4s 2ms/step - loss: 0.4362 - accuracy: 0.8378 - val_loss: 0.4669 - val_accuracy: 0.8718\n", + "Epoch 19/20\n", + "1719/1719 [==============================] - 4s 2ms/step - loss: 0.4271 - accuracy: 0.8418 - val_loss: 0.4745 - val_accuracy: 0.8700\n", + "Epoch 20/20\n", + "1719/1719 [==============================] - 4s 2ms/step - loss: 0.4188 - accuracy: 0.8415 - val_loss: 0.4486 - val_accuracy: 0.8714\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "fisM8Wc8LOpe", + "outputId": "8783b0b2-0a9c-427d-a745-517443a53217", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "model.evaluate(X_test_scaled, y_test)" + ], + "execution_count": 107, + "outputs": [ + { + "output_type": "stream", + "text": [ + "313/313 [==============================] - 0s 2ms/step - loss: 0.4861 - accuracy: 0.8584\n" + ], + "name": "stdout" + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[0.48614442348480225, 0.8583999872207642]" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 107 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "C3UO369ULOpe", + "outputId": "bb158abb-93c6-4eef-acee-b55d70e8419e", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "model.evaluate(X_train_scaled, y_train)" + ], + "execution_count": 108, + "outputs": [ + { + "output_type": "stream", + "text": [ + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.3614 - accuracy: 0.8815\n" + ], + "name": "stdout" + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[0.3614204227924347, 0.8815454840660095]" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 108 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "TLppurUFLOpe", + "outputId": "4cf53ffb-1221-410a-9652-833b0a85e6c6", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "history = model.fit(X_train_scaled, y_train)" + ], + "execution_count": 109, + "outputs": [ + { + "output_type": "stream", + "text": [ + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.4217 - accuracy: 0.8434\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "TP8X2xCnLOpe" + }, + "source": [ + "## MC 드롭아웃" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "Uy_uwuIRLOpe" + }, + "source": [ + "tf.random.set_seed(42)\n", + "np.random.seed(42)" + ], + "execution_count": 110, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "scrolled": true, + "id": "GPDfhAMBLOpf" + }, + "source": [ + "y_probas = np.stack([model(X_test_scaled, training=True)\n", + " for sample in range(100)])\n", + "y_proba = y_probas.mean(axis=0)\n", + "y_std = y_probas.std(axis=0)" + ], + "execution_count": 111, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "nzrxJDyrLOpf", + "outputId": "3b272a1c-75a1-4d81-cfbb-e53079e2bc12", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "np.round(model.predict(X_test_scaled[:1]), 2)" + ], + "execution_count": 112, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "array([[0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.02, 0. , 0.98]],\n", + " dtype=float32)" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 112 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "08HJBSaVLOpf", + "outputId": "12a1a91f-04da-42bd-8b09-8c7713f29918", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "np.round(y_probas[:, :1], 2)" + ], + "execution_count": 113, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "array([[[0. , 0. , 0. , 0. , 0. , 0.01, 0. , 0.73, 0. , 0.26]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.05, 0. , 0.92, 0. , 0.04]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.03, 0. , 0. , 0. , 0.97]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.01, 0. , 0.1 , 0. , 0.89]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.53, 0. , 0.46]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.71, 0. , 0.29]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.09, 0. , 0.47, 0. , 0.44]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.18, 0. , 0.26, 0. , 0.56]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.55, 0. , 0.07, 0. , 0.37]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.01, 0. , 0.99]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.02, 0. , 0.14, 0. , 0.84]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.21, 0. , 0.22, 0. , 0.57]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.01, 0. , 0.3 , 0. , 0.68]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.29, 0. , 0.28, 0. , 0.44]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.04, 0. , 0.14, 0. , 0.82]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.21, 0. , 0.12, 0. , 0.68]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.01, 0. , 0. , 0. , 0.99]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.02, 0. , 0.77, 0. , 0.21]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.01, 0. , 0.34, 0. , 0.65]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.02, 0. , 0.02, 0. , 0.96]],\n", + "\n", + " [[0.01, 0. , 0. , 0. , 0. , 0.86, 0.02, 0.01, 0. , 0.1 ]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.12, 0. , 0.07, 0. , 0.81]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 1. ]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.11, 0. , 0.88]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.08, 0. , 0.06, 0. , 0.86]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.04, 0. , 0.72, 0. , 0.23]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.03, 0. , 0.2 , 0. , 0.78]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.19, 0. , 0.31, 0. , 0.5 ]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.03, 0. , 0.05, 0. , 0.92]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.2 , 0. , 0.64, 0. , 0.16]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.99]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 1. ]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.68, 0. , 0.02, 0. , 0.3 ]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.15, 0. , 0.74, 0. , 0.11]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.02, 0. , 0.19, 0. , 0.79]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.09, 0. , 0.63, 0. , 0.29]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.04, 0. , 0.95]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.02, 0. , 0.09, 0. , 0.89]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.07, 0. , 0.38, 0. , 0.56]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.78, 0. , 0.02, 0. , 0.2 ]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.01, 0. , 0.16, 0. , 0.82]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.01, 0. , 0.99]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.33, 0. , 0.37, 0. , 0.3 ]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.01, 0. , 0.99]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.01, 0. , 0.01, 0. , 0.98]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.01, 0. , 0.99]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.17, 0. , 0.12, 0. , 0.71]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.03, 0. , 0.96]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.29, 0. , 0.71]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.01, 0. , 0.2 , 0. , 0.79]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.27, 0. , 0.29, 0. , 0.45]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.01, 0. , 0.89, 0. , 0.1 ]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.01, 0. , 0.99]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.2 , 0. , 0.04, 0. , 0.76]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.13, 0. , 0.02, 0. , 0.85]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.01, 0. , 0.1 , 0. , 0.89]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.16, 0. , 0.84]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.04, 0. , 0.16, 0. , 0.8 ]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.07, 0. , 0.25, 0. , 0.68]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.14, 0. , 0.2 , 0. , 0.66]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.22, 0. , 0.03, 0. , 0.75]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.13, 0. , 0.87]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.04, 0. , 0.05, 0. , 0.91]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.04, 0. , 0.35, 0. , 0.61]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.49, 0. , 0.46, 0. , 0.05]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.51, 0. , 0.45, 0. , 0.04]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.1 , 0. , 0.47, 0. , 0.43]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.05, 0. , 0.95]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.05, 0. , 0.37, 0. , 0.57]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.15, 0. , 0.61, 0.02, 0.22]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.01, 0. , 0.99]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.08, 0. , 0.28, 0. , 0.64]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.02, 0. , 0.1 , 0. , 0.88]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.02, 0. , 0.37, 0. , 0.61]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.1 , 0. , 0.52, 0. , 0.37]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.12, 0. , 0.03, 0. , 0.85]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.05, 0. , 0.15, 0. , 0.8 ]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.01, 0. , 0.38, 0. , 0.61]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.01, 0. , 0.39, 0. , 0.6 ]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.02, 0. , 0.24, 0. , 0.74]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.06, 0. , 0.94]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.01, 0. , 0.01, 0. , 0.98]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 1. ]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.01, 0. , 0.02, 0. , 0.97]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.04, 0. , 0.09, 0. , 0.87]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.03, 0. , 0.55, 0. , 0.41]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.92, 0. , 0.04, 0. , 0.04]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.4 , 0. , 0.02, 0. , 0.58]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.02, 0. , 0.98]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.03, 0. , 0.97]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.02, 0. , 0.04, 0. , 0.93]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.03, 0. , 0.12, 0. , 0.84]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.28, 0. , 0.38, 0. , 0.35]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.39, 0. , 0.17, 0. , 0.44]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.02, 0. , 0.16, 0. , 0.82]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.01, 0. , 0.01, 0. , 0.98]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.02, 0. , 0.35, 0. , 0.64]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.01, 0. , 0.04, 0. , 0.96]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.2 , 0. , 0.33, 0. , 0.47]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.11, 0. , 0.11, 0. , 0.78]]],\n", + " dtype=float32)" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 113 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "XMqmf4UmLOpf", + "outputId": "69d29b09-945c-441e-c2f7-9c3b8e3c564c", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "np.round(y_proba[:1], 2)" + ], + "execution_count": 114, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "array([[0. , 0. , 0. , 0. , 0. , 0.11, 0. , 0.22, 0. , 0.67]],\n", + " dtype=float32)" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 114 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "lISHnMdnLOpf", + "outputId": "9d1137d4-bbd6-4f9e-a527-66cf9daef495", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "y_std = y_probas.std(axis=0)\n", + "np.round(y_std[:1], 2)" + ], + "execution_count": 115, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "array([[0. , 0. , 0. , 0. , 0. , 0.19, 0. , 0.23, 0. , 0.29]],\n", + " dtype=float32)" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 115 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "iW-062pHLOpf" + }, + "source": [ + "y_pred = np.argmax(y_proba, axis=1)" + ], + "execution_count": 116, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "DL6bPgKCLOpg", + "outputId": "cf362713-825c-4238-c802-cac884209a40", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "accuracy = np.sum(y_pred == y_test) / len(y_test)\n", + "accuracy" + ], + "execution_count": 117, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "0.8656" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 117 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "yxvL3LWZLOpg" + }, + "source": [ + "class MCDropout(keras.layers.Dropout):\n", + " def call(self, inputs):\n", + " return super().call(inputs, training=True)\n", + "\n", + "class MCAlphaDropout(keras.layers.AlphaDropout):\n", + " def call(self, inputs):\n", + " return super().call(inputs, training=True)" + ], + "execution_count": 118, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "Zzd5DCm7LOpg" + }, + "source": [ + "tf.random.set_seed(42)\n", + "np.random.seed(42)" + ], + "execution_count": 119, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "WvapJbsLLOpg" + }, + "source": [ + "mc_model = keras.models.Sequential([\n", + " MCAlphaDropout(layer.rate) if isinstance(layer, keras.layers.AlphaDropout) else layer\n", + " for layer in model.layers\n", + "])" + ], + "execution_count": 120, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "tkkDpmTaLOpg", + "outputId": "7378308e-f78f-446b-83fe-bb6e9a998a62", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "mc_model.summary()" + ], + "execution_count": 121, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Model: \"sequential_20\"\n", + "_________________________________________________________________\n", + "Layer (type) Output Shape Param # \n", + "=================================================================\n", + "flatten_18 (Flatten) (None, 784) 0 \n", + "_________________________________________________________________\n", + "mc_alpha_dropout (MCAlphaDro (None, 784) 0 \n", + "_________________________________________________________________\n", + "dense_262 (Dense) (None, 300) 235500 \n", + "_________________________________________________________________\n", + "mc_alpha_dropout_1 (MCAlphaD (None, 300) 0 \n", + "_________________________________________________________________\n", + "dense_263 (Dense) (None, 100) 30100 \n", + "_________________________________________________________________\n", + "mc_alpha_dropout_2 (MCAlphaD (None, 100) 0 \n", + "_________________________________________________________________\n", + "dense_264 (Dense) (None, 10) 1010 \n", + "=================================================================\n", + "Total params: 266,610\n", + "Trainable params: 266,610\n", + "Non-trainable params: 0\n", + "_________________________________________________________________\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "dXbWp13vLOpg" + }, + "source": [ + "optimizer = keras.optimizers.SGD(lr=0.01, momentum=0.9, nesterov=True)\n", + "mc_model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=optimizer, metrics=[\"accuracy\"])" + ], + "execution_count": 122, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "9ywhSDViLOpg" + }, + "source": [ + "mc_model.set_weights(model.get_weights())" + ], + "execution_count": 123, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "JZd8UdsALOph" + }, + "source": [ + "이제 MC 드롭아웃을 모델에 사용할 수 있습니다:" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "IVSdXF7TLOph", + "outputId": "a580e136-de40-46e7-e200-c730000cff26", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "np.round(np.mean([mc_model.predict(X_test_scaled[:1]) for sample in range(100)], axis=0), 2)" + ], + "execution_count": 124, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "array([[0. , 0. , 0. , 0. , 0. , 0.14, 0. , 0.25, 0. , 0.61]],\n", + " dtype=float32)" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 124 + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MafcuHfYLOph" + }, + "source": [ + "## 맥스 노름" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "Rm22pePYLOph" + }, + "source": [ + "layer = keras.layers.Dense(100, activation=\"selu\", kernel_initializer=\"lecun_normal\",\n", + " kernel_constraint=keras.constraints.max_norm(1.))" + ], + "execution_count": 125, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "gWqSW1bvLOpi", + "outputId": "d2224c69-2eb1-4a62-e14d-34dfb1360660", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "MaxNormDense = partial(keras.layers.Dense,\n", + " activation=\"selu\", kernel_initializer=\"lecun_normal\",\n", + " kernel_constraint=keras.constraints.max_norm(1.))\n", + "\n", + "model = keras.models.Sequential([\n", + " keras.layers.Flatten(input_shape=[28, 28]),\n", + " MaxNormDense(300),\n", + " MaxNormDense(100),\n", + " keras.layers.Dense(10, activation=\"softmax\")\n", + "])\n", + "model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=\"nadam\", metrics=[\"accuracy\"])\n", + "n_epochs = 2\n", + "history = model.fit(X_train_scaled, y_train, epochs=n_epochs,\n", + " validation_data=(X_valid_scaled, y_valid))" + ], + "execution_count": 126, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Epoch 1/2\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.5766 - accuracy: 0.8018 - val_loss: 0.3723 - val_accuracy: 0.8638\n", + "Epoch 2/2\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.3530 - accuracy: 0.8692 - val_loss: 0.3769 - val_accuracy: 0.8684\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "A53qrK-PLOpi" + }, + "source": [ + "# 연습문제 해답" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "IOsQc6lpLOpi" + }, + "source": [ + "## 1. to 7." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ebf2enC_LOpi" + }, + "source": [ + "부록 A 참조." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "h1PVxctOLOpi" + }, + "source": [ + "## 8. CIFAR10에서 딥러닝" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "qGwtSO0fLOpi" + }, + "source": [ + "### a.\n", + "*문제: 100개의 뉴런을 가진 은닉층 20개로 심층 신경망을 만들어보세요(너무 많은 것 같지만 이 연습문제의 핵심입니다). He 초기화와 ELU 활성화 함수를 사용하세요.*" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "p1Mix75VLOpj" + }, + "source": [ + "keras.backend.clear_session()\n", + "tf.random.set_seed(42)\n", + "np.random.seed(42)\n", + "\n", + "model = keras.models.Sequential()\n", + "model.add(keras.layers.Flatten(input_shape=[32, 32, 3]))\n", + "for _ in range(20):\n", + " model.add(keras.layers.Dense(100,\n", + " activation=\"elu\",\n", + " kernel_initializer=\"he_normal\"))" + ], + "execution_count": 127, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "2glrTwRcLOpj" + }, + "source": [ + "### b.\n", + "*문제: Nadam 옵티마이저와 조기 종료를 사용하여 CIFAR10 데이터셋에 이 네트워크를 훈련하세요. `keras.datasets.cifar10.load_ data()`를 사용하여 데이터를 적재할 수 있습니다. 이 데이터셋은 10개의 클래스와 32×32 크기의 컬러 이미지 60,000개로 구성됩니다(50,000개는 훈련, 10,000개는 테스트). 따라서 10개의 뉴런과 소프트맥스 활성화 함수를 사용하는 출력층이 필요합니다. 모델 구조와 하이퍼파라미터를 바꿀 때마다 적절한 학습률을 찾아야 한다는 것을 기억하세요.*" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "8fkKGlD0LOpj" + }, + "source": [ + "모델에 출력층을 추가합니다:" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "pvTdbgiCLOpj" + }, + "source": [ + "model.add(keras.layers.Dense(10, activation=\"softmax\"))" + ], + "execution_count": 128, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ux0ROP0tLOpk" + }, + "source": [ + "학습률 5e-5인 Nadam 옵티마이저를 사용해 보죠. 학습률 1e-5, 3e-5, 1e-4, 3e-4, 1e-3, 3e-3, 1e-2를 테스트하고 10번의 에포크 동안 (아래 텐서보드 콜백으로) 학습 곡선을 비교해 보았습니다. 학습률 3e-5와 1e-4가 꽤 좋았기 때문에 5e-5를 시도해 보았고 조금 더 나은 결과를 냈습니다." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "1oReiGeCLOpk" + }, + "source": [ + "optimizer = keras.optimizers.Nadam(lr=5e-5)\n", + "model.compile(loss=\"sparse_categorical_crossentropy\",\n", + " optimizer=optimizer,\n", + " metrics=[\"accuracy\"])" + ], + "execution_count": 129, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "24PcKfxeLOpk" + }, + "source": [ + "CIFAR10 데이터셋을 로드하죠. 조기 종료를 사용하기 때문에 검증 세트가 필요합니다. 원본 훈련 세트에서 처음 5,000개를 검증 세트로 사용하겠습니다:" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "cLWnS6RKLOpk", + "outputId": "8b69d659-ad10-4a5d-a6be-6484e343b258", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "(X_train_full, y_train_full), (X_test, y_test) = keras.datasets.cifar10.load_data()\n", + "\n", + "X_train = X_train_full[5000:]\n", + "y_train = y_train_full[5000:]\n", + "X_valid = X_train_full[:5000]\n", + "y_valid = y_train_full[:5000]" + ], + "execution_count": 130, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz\n", + "170500096/170498071 [==============================] - 3s 0us/step\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "H76ZTjNxLOpk" + }, + "source": [ + "이제 콜백을 만들고 모델을 훈련합니다:" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "FuQ8FelNLOpk" + }, + "source": [ + "early_stopping_cb = keras.callbacks.EarlyStopping(patience=20)\n", + "model_checkpoint_cb = keras.callbacks.ModelCheckpoint(\"my_cifar10_model.h5\", save_best_only=True)\n", + "run_index = 1 # 모델을 훈련할 때마다 증가시킴\n", + "run_logdir = os.path.join(os.curdir, \"my_cifar10_logs\", \"run_{:03d}\".format(run_index))\n", + "tensorboard_cb = keras.callbacks.TensorBoard(run_logdir)\n", + "callbacks = [early_stopping_cb, model_checkpoint_cb, tensorboard_cb]" + ], + "execution_count": 131, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "7VuUEbNacg7v" + }, + "source": [ + "코랩에서 아래 셀에 있는 `%tensorboard` 명령을 실행하면 다음과 같은 텐서보드 화면을 볼 수 있습니다." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "x_NtuiwVcQiV" + }, + "source": [ + "![스크린샷 2021-02-17 오후 11.42.41.png]()" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "GAv7c6NtLOpl", + "outputId": "94e3f040-b384-4fb9-a530-d86e2ee4e664", + "colab": { + "resources": { + "https://localhost:6006/?tensorboardColab=true": { + "data": "", + "ok": true, + "headers": [ + [ + "content-type", + "text/html; charset=utf-8" + ] + ], + "status": 200, + "status_text": "" + }, + "https://localhost:6006/index.js": { + "data": "", + "ok": true, + "headers": [ + [ + "content-type", + "application/javascript; charset=utf-8" + ] + ], + "status": 200, + "status_text": "" + }, + "https://localhost:6006/data/environment": { + "data": "eyJkYXRhX2xvY2F0aW9uIjogIi4vbXlfY2lmYXIxMF9sb2dzIiwgIndpbmRvd190aXRsZSI6ICIifQ==", + "ok": true, + "headers": [ + [ + "content-type", + "application/json" + ] + ], + "status": 200, + "status_text": "" + }, + "https://localhost:6006/data/plugins_listing": { + "data": "eyJzY2FsYXJzIjogeyJkaXNhYmxlX3JlbG9hZCI6IGZhbHNlLCAiZW5hYmxlZCI6IGZhbHNlLCAicmVtb3ZlX2RvbSI6IGZhbHNlLCAidGFiX25hbWUiOiAic2NhbGFycyIsICJsb2FkaW5nX21lY2hhbmlzbSI6IHsidHlwZSI6ICJDVVNUT01fRUxFTUVOVCIsICJlbGVtZW50X25hbWUiOiAidGYtc2NhbGFyLWRhc2hib2FyZCJ9fSwgImN1c3RvbV9zY2FsYXJzIjogeyJkaXNhYmxlX3JlbG9hZCI6IGZhbHNlLCAiZW5hYmxlZCI6IGZhbHNlLCAicmVtb3ZlX2RvbSI6IGZhbHNlLCAidGFiX25hbWUiOiAiQ3VzdG9tIFNjYWxhcnMiLCAibG9hZGluZ19tZWNoYW5pc20iOiB7InR5cGUiOiAiQ1VTVE9NX0VMRU1FTlQiLCAiZWxlbWVudF9uYW1lIjogInRmLWN1c3RvbS1zY2FsYXItZGFzaGJvYXJkIn19LCAiaW1hZ2VzIjogeyJkaXNhYmxlX3JlbG9hZCI6IGZhbHNlLCAiZW5hYmxlZCI6IGZhbHNlLCAicmVtb3ZlX2RvbSI6IGZhbHNlLCAidGFiX25hbWUiOiAiaW1hZ2VzIiwgImxvYWRpbmdfbWVjaGFuaXNtIjogeyJ0eXBlIjogIkNVU1RPTV9FTEVNRU5UIiwgImVsZW1lbnRfbmFtZSI6ICJ0Zi1pbWFnZS1kYXNoYm9hcmQifX0sICJhdWRpbyI6IHsiZGlzYWJsZV9yZWxvYWQiOiBmYWxzZSwgImVuYWJsZWQiOiBmYWxzZSwgInJlbW92ZV9kb20iOiBmYWxzZSwgInRhYl9uYW1lIjogImF1ZGlvIiwgImxvYWRpbmdfbWVjaGFuaXNtIjogeyJ0eXBlIjogIkNVU1RPTV9FTEVNRU5UIiwgImVsZW1lbnRfbmFtZSI6ICJ0Zi1hdWRpby1kYXNoYm9hcmQifX0sICJkZWJ1Z2dlci12MiI6IHsiZGlzYWJsZV9yZWxvYWQiOiBmYWxzZSwgImVuYWJsZWQiOiBmYWxzZSwgInJlbW92ZV9kb20iOiBmYWxzZSwgInRhYl9uYW1lIjogIkRlYnVnZ2VyIFYyIiwgImxvYWRpbmdfbWVjaGFuaXNtIjogeyJ0eXBlIjogIk5HX0NPTVBPTkVOVCJ9fSwgImdyYXBocyI6IHsiZGlzYWJsZV9yZWxvYWQiOiB0cnVlLCAiZW5hYmxlZCI6IHRydWUsICJyZW1vdmVfZG9tIjogZmFsc2UsICJ0YWJfbmFtZSI6ICJncmFwaHMiLCAibG9hZGluZ19tZWNoYW5pc20iOiB7InR5cGUiOiAiQ1VTVE9NX0VMRU1FTlQiLCAiZWxlbWVudF9uYW1lIjogInRmLWdyYXBoLWRhc2hib2FyZCJ9fSwgImRpc3RyaWJ1dGlvbnMiOiB7ImRpc2FibGVfcmVsb2FkIjogZmFsc2UsICJlbmFibGVkIjogZmFsc2UsICJyZW1vdmVfZG9tIjogZmFsc2UsICJ0YWJfbmFtZSI6ICJkaXN0cmlidXRpb25zIiwgImxvYWRpbmdfbWVjaGFuaXNtIjogeyJ0eXBlIjogIkNVU1RPTV9FTEVNRU5UIiwgImVsZW1lbnRfbmFtZSI6ICJ0Zi1kaXN0cmlidXRpb24tZGFzaGJvYXJkIn19LCAiaGlzdG9ncmFtcyI6IHsiZGlzYWJsZV9yZWxvYWQiOiBmYWxzZSwgImVuYWJsZWQiOiBmYWxzZSwgInJlbW92ZV9kb20iOiBmYWxzZSwgInRhYl9uYW1lIjogImhpc3RvZ3JhbXMiLCAibG9hZGluZ19tZWNoYW5pc20iOiB7InR5cGUiOiAiQ1VTVE9NX0VMRU1FTlQiLCAiZWxlbWVudF9uYW1lIjogInRmLWhpc3RvZ3JhbS1kYXNoYm9hcmQifX0sICJ0ZXh0IjogeyJkaXNhYmxlX3JlbG9hZCI6IGZhbHNlLCAiZW5hYmxlZCI6IGZhbHNlLCAicmVtb3ZlX2RvbSI6IGZhbHNlLCAidGFiX25hbWUiOiAidGV4dCIsICJsb2FkaW5nX21lY2hhbmlzbSI6IHsidHlwZSI6ICJDVVNUT01fRUxFTUVOVCIsICJlbGVtZW50X25hbWUiOiAidGYtdGV4dC1kYXNoYm9hcmQifX0sICJwcl9jdXJ2ZXMiOiB7ImRpc2FibGVfcmVsb2FkIjogZmFsc2UsICJlbmFibGVkIjogZmFsc2UsICJyZW1vdmVfZG9tIjogZmFsc2UsICJ0YWJfbmFtZSI6ICJQUiBDdXJ2ZXMiLCAibG9hZGluZ19tZWNoYW5pc20iOiB7InR5cGUiOiAiQ1VTVE9NX0VMRU1FTlQiLCAiZWxlbWVudF9uYW1lIjogInRmLXByLWN1cnZlLWRhc2hib2FyZCJ9fSwgInByb2ZpbGVfcmVkaXJlY3QiOiB7ImRpc2FibGVfcmVsb2FkIjogZmFsc2UsICJlbmFibGVkIjogZmFsc2UsICJyZW1vdmVfZG9tIjogZmFsc2UsICJ0YWJfbmFtZSI6ICJQcm9maWxlIiwgImxvYWRpbmdfbWVjaGFuaXNtIjogeyJ0eXBlIjogIkNVU1RPTV9FTEVNRU5UIiwgImVsZW1lbnRfbmFtZSI6ICJ0Zi1wcm9maWxlLXJlZGlyZWN0LWRhc2hib2FyZCJ9fSwgImhwYXJhbXMiOiB7ImRpc2FibGVfcmVsb2FkIjogZmFsc2UsICJlbmFibGVkIjogZmFsc2UsICJyZW1vdmVfZG9tIjogZmFsc2UsICJ0YWJfbmFtZSI6ICJocGFyYW1zIiwgImxvYWRpbmdfbWVjaGFuaXNtIjogeyJ0eXBlIjogIkNVU1RPTV9FTEVNRU5UIiwgImVsZW1lbnRfbmFtZSI6ICJ0Zi1ocGFyYW1zLWRhc2hib2FyZCJ9fSwgIm1lc2giOiB7ImRpc2FibGVfcmVsb2FkIjogZmFsc2UsICJlbmFibGVkIjogZmFsc2UsICJyZW1vdmVfZG9tIjogZmFsc2UsICJ0YWJfbmFtZSI6ICJtZXNoIiwgImxvYWRpbmdfbWVjaGFuaXNtIjogeyJ0eXBlIjogIkNVU1RPTV9FTEVNRU5UIiwgImVsZW1lbnRfbmFtZSI6ICJtZXNoLWRhc2hib2FyZCJ9fSwgInRpbWVzZXJpZXMiOiB7ImRpc2FibGVfcmVsb2FkIjogZmFsc2UsICJlbmFibGVkIjogZmFsc2UsICJyZW1vdmVfZG9tIjogZmFsc2UsICJ0YWJfbmFtZSI6ICJUaW1lIFNlcmllcyIsICJsb2FkaW5nX21lY2hhbmlzbSI6IHsidHlwZSI6ICJOR19DT01QT05FTlQifX0sICJwcm9qZWN0b3IiOiB7ImRpc2FibGVfcmVsb2FkIjogdHJ1ZSwgImVuYWJsZWQiOiBmYWxzZSwgInJlbW92ZV9kb20iOiBmYWxzZSwgInRhYl9uYW1lIjogInByb2plY3RvciIsICJsb2FkaW5nX21lY2hhbmlzbSI6IHsidHlwZSI6ICJJRlJBTUUiLCAibW9kdWxlX3BhdGgiOiAiL2RhdGEvcGx1Z2luL3Byb2plY3Rvci9pbmRleC5qcyJ9fSwgIndoYXRpZiI6IHsiZGlzYWJsZV9yZWxvYWQiOiBmYWxzZSwgImVuYWJsZWQiOiBmYWxzZSwgInJlbW92ZV9kb20iOiBmYWxzZSwgInRhYl9uYW1lIjogIldoYXQtSWYgVG9vbCIsICJsb2FkaW5nX21lY2hhbmlzbSI6IHsidHlwZSI6ICJJRlJBTUUiLCAibW9kdWxlX3BhdGgiOiAiL2RhdGEvcGx1Z2luL3doYXRpZi9pbmRleC5qcyJ9fX0=", + "ok": true, + "headers": [ + [ + "content-type", + "application/json" + ] + ], + "status": 200, + "status_text": "" + }, + "https://localhost:6006/icon_bundle.svg": { + "data": "PHN2Zz48ZGVmcz4KPHN2ZyBpZD0iYXJyb3dfZG93bndhcmRfMjRweCIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIiB3aWR0aD0iMjQiIGhlaWdodD0iMjQiIHZpZXdCb3g9IjAgMCAyNCAyNCI+PHBhdGggZmlsbD0iIzAxMDEwMSIgZD0iTTIwIDEybC0xLjQxLTEuNDFMMTMgMTYuMTdWNGgtMnYxMi4xN2wtNS41OC01LjU5TDQgMTJsOCA4IDgtOHoiLz48L3N2Zz48c3ZnIGlkPSJhcnJvd191cHdhcmRfMjRweCIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIiB3aWR0aD0iMjQiIGhlaWdodD0iMjQiIHZpZXdCb3g9IjAgMCAyNCAyNCI+PHBhdGggZD0iTTQgMTJsMS40MSAxLjQxTDExIDcuODNWMjBoMlY3LjgzbDUuNTggNS41OUwyMCAxMmwtOC04LTggOHoiLz48L3N2Zz48c3ZnIGlkPSJidWdfcmVwb3J0XzI0cHgiIHhtbG5zPSJodHRwOi8vd3d3LnczLm9yZy8yMDAwL3N2ZyIgaGVpZ2h0PSIyNCIgdmlld0JveD0iMCAwIDI0IDI0IiB3aWR0aD0iMjQiPjxwYXRoIGQ9Ik0wIDBoMjR2MjRIMHoiIGZpbGw9Im5vbmUiLz48cGF0aCBkPSJNMjAgOGgtMi44MWMtLjQ1LS43OC0xLjA3LTEuNDUtMS44Mi0xLjk2TDE3IDQuNDEgMTUuNTkgM2wtMi4xNyAyLjE3QzEyLjk2IDUuMDYgMTIuNDkgNSAxMiA1Yy0uNDkgMC0uOTYuMDYtMS40MS4xN0w4LjQxIDMgNyA0LjQxbDEuNjIgMS42M0M3Ljg4IDYuNTUgNy4yNiA3LjIyIDYuODEgOEg0djJoMi4wOWMtLjA1LjMzLS4wOS42Ni0uMDkgMXYxSDR2MmgydjFjMCAuMzQuMDQuNjcuMDkgMUg0djJoMi44MWMxLjA0IDEuNzkgMi45NyAzIDUuMTkgM3M0LjE1LTEuMjEgNS4xOS0zSDIwdi0yaC0yLjA5Yy4wNS0uMzMuMDktLjY2LjA5LTF2LTFoMnYtMmgtMnYtMWMwLS4zNC0uMDQtLjY3LS4wOS0xSDIwVjh6bS02IDhoLTR2LTJoNHYyem0wLTRoLTR2LTJoNHYyeiIvPjwvc3ZnPjxzdmcgaWQ9ImNhbmNlbF8yNHB4IiB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSIyNCIgaGVpZ2h0PSIyNCIgdmlld0JveD0iMCAwIDI0IDI0Ij48cGF0aCBkPSJNMTIgMkM2LjQ3IDIgMiA2LjQ3IDIgMTJzNC40NyAxMCAxMCAxMCAxMC00LjQ3IDEwLTEwUzE3LjUzIDIgMTIgMnptNSAxMy41OUwxNS41OSAxNyAxMiAxMy40MSA4LjQxIDE3IDcgMTUuNTkgMTAuNTkgMTIgNyA4LjQxIDguNDEgNyAxMiAxMC41OSAxNS41OSA3IDE3IDguNDEgMTMuNDEgMTIgMTcgMTUuNTl6Ii8+PC9zdmc+PHN2ZyBpZD0iY2hldnJvbl9sZWZ0XzI0cHgiIHhtbG5zPSJodHRwOi8vd3d3LnczLm9yZy8yMDAwL3N2ZyIgd2lkdGg9IjI0IiBoZWlnaHQ9IjI0IiB2aWV3Qm94PSIwIDAgMjQgMjQiPjxwYXRoIGQ9Ik0xNS40MSA3LjQxTDE0IDZsLTYgNiA2IDYgMS40MS0xLjQxTDEwLjgzIDEyeiIvPjwvc3ZnPjxzdmcgaWQ9ImNoZXZyb25fcmlnaHRfMjRweCIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIiB3aWR0aD0iMjQiIGhlaWdodD0iMjQiIHZpZXdCb3g9IjAgMCAyNCAyNCI+PHBhdGggZD0iTTEwIDZMOC41OSA3LjQxIDEzLjE3IDEybC00LjU4IDQuNTlMMTAgMThsNi02eiIvPjwvc3ZnPjxzdmcgaWQ9ImNsZWFyXzI0cHgiIHhtbG5zPSJodHRwOi8vd3d3LnczLm9yZy8yMDAwL3N2ZyIgd2lkdGg9IjI0IiBoZWlnaHQ9IjI0IiB2aWV3Qm94PSIwIDAgMjQgMjQiPjxwYXRoIGQ9Ik0xOSA2LjQxTDE3LjU5IDUgMTIgMTAuNTkgNi40MSA1IDUgNi40MSAxMC41OSAxMiA1IDE3LjU5IDYuNDEgMTkgMTIgMTMuNDEgMTcuNTkgMTkgMTkgMTcuNTkgMTMuNDEgMTJ6Ii8+PC9zdmc+PHN2ZyBpZD0iY2xvc2VfMjRweCIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIiBoZWlnaHQ9IjI0IiB2aWV3Qm94PSIwIDAgMjQgMjQiIHdpZHRoPSIyNCI+PHBhdGggZD0iTTAgMGgyNHYyNEgweiIgZmlsbD0ibm9uZSIvPjxwYXRoIGQ9Ik0xOSA2LjQxTDE3LjU5IDUgMTIgMTAuNTkgNi40MSA1IDUgNi40MSAxMC41OSAxMiA1IDE3LjU5IDYuNDEgMTkgMTIgMTMuNDEgMTcuNTkgMTkgMTkgMTcuNTkgMTMuNDEgMTJ6Ii8+PC9zdmc+PHN2ZyBpZD0iY29udGVudF9jb3B5XzI0cHgiIHhtbG5zPSJodHRwOi8vd3d3LnczLm9yZy8yMDAwL3N2ZyIgd2lkdGg9IjI0IiBoZWlnaHQ9IjI0IiB2aWV3Qm94PSIwIDAgMjQgMjQiPjxwYXRoIGQ9Ik0xNiAxSDRjLTEuMSAwLTIgLjktMiAydjE0aDJWM2gxMlYxem0zIDRIOGMtMS4xIDAtMiAuOS0yIDJ2MTRjMCAxLjEuOSAyIDIgMmgxMWMxLjEgMCAyLS45IDItMlY3YzAtMS4xLS45LTItMi0yem0wIDE2SDhWN2gxMXYxNHoiLz48L3N2Zz48c3ZnIGlkPSJlcnJvcl8yNHB4IiB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSIyNCIgaGVpZ2h0PSIyNCIgdmlld0JveD0iMCAwIDI0IDI0Ij48cGF0aCBkPSJNMTIgMkM2LjQ4IDIgMiA2LjQ4IDIgMTJzNC40OCAxMCAxMCAxMCAxMC00LjQ4IDEwLTEwUzE3LjUyIDIgMTIgMnptMSAxNWgtMnYtMmgydjJ6bTAtNGgtMlY3aDJ2NnoiLz48L3N2Zz48c3ZnIGlkPSJleHBhbmRfbGVzc18yNHB4IiB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSIyNCIgaGVpZ2h0PSIyNCIgdmlld0JveD0iMCAwIDI0IDI0Ij48cGF0aCBkPSJNMTIgOGwtNiA2IDEuNDEgMS40MUwxMiAxMC44M2w0LjU5IDQuNThMMTggMTR6Ii8+PC9zdmc+PHN2ZyBpZD0iZXhwYW5kX21vcmVfMjRweCIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIiB3aWR0aD0iMjQiIGhlaWdodD0iMjQiIHZpZXdCb3g9IjAgMCAyNCAyNCI+PHBhdGggZD0iTTE2LjU5IDguNTlMMTIgMTMuMTcgNy40MSA4LjU5IDYgMTBsNiA2IDYtNnoiLz48L3N2Zz48c3ZnIGlkPSJmaWx0ZXJfYWx0XzI0cHgiIHhtbG5zPSJodHRwOi8vd3d3LnczLm9yZy8yMDAwL3N2ZyIgZW5hYmxlLWJhY2tncm91bmQ9Im5ldyAwIDAgMjQgMjQiIGhlaWdodD0iMjQiIHZpZXdCb3g9IjAgMCAyNCAyNCIgd2lkdGg9IjI0Ij48Zz48cGF0aCBkPSJNMCwwaDI0IE0yNCwyNEgwIiBmaWxsPSJub25lIi8+PHBhdGggZD0iTTQuMjUsNS42MUM2LjI3LDguMiwxMCwxMywxMCwxM3Y2YzAsMC41NSwwLjQ1LDEsMSwxaDJjMC41NSwwLDEtMC40NSwxLTF2LTZjMCwwLDMuNzItNC44LDUuNzQtNy4zOSBDMjAuMjUsNC45NSwxOS43OCw0LDE4Ljk1LDRINS4wNEM0LjIxLDQsMy43NCw0Ljk1LDQuMjUsNS42MXoiLz48cGF0aCBkPSJNMCwwaDI0djI0SDBWMHoiIGZpbGw9Im5vbmUiLz48L2c+PC9zdmc+PHN2ZyBpZD0iZmxhZ18yNHB4IiB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSIyNCIgaGVpZ2h0PSIyNCIgdmlld0JveD0iMCAwIDI0IDI0Ij48cGF0aCBkPSJNMTQuNCA2TDE0IDRINXYxN2gydi03aDUuNmwuNCAyaDdWNnoiLz48L3N2Zz48c3ZnIGlkPSJmdWxsc2NyZWVuXzI0cHgiIHhtbG5zPSJodHRwOi8vd3d3LnczLm9yZy8yMDAwL3N2ZyIgaGVpZ2h0PSIyNCIgdmlld0JveD0iMCAwIDI0IDI0IiB3aWR0aD0iMjQiPjxwYXRoIGQ9Ik0wIDBoMjR2MjRIMHoiIGZpbGw9Im5vbmUiLz48cGF0aCBkPSJNNyAxNEg1djVoNXYtMkg3di0zem0tMi00aDJWN2gzVjVINXY1em0xMiA3aC0zdjJoNXYtNWgtMnYzek0xNCA1djJoM3YzaDJWNWgtNXoiLz48L3N2Zz48c3ZnIGlkPSJmdWxsc2NyZWVuX2V4aXRfMjRweCIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIiBoZWlnaHQ9IjI0IiB2aWV3Qm94PSIwIDAgMjQgMjQiIHdpZHRoPSIyNCI+PHBhdGggZD0iTTAgMGgyNHYyNEgweiIgZmlsbD0ibm9uZSIvPjxwYXRoIGQ9Ik01IDE2aDN2M2gydi01SDV2MnptMy04SDV2Mmg1VjVIOHYzem02IDExaDJ2LTNoM3YtMmgtNXY1em0yLTExVjVoLTJ2NWg1VjhoLTN6Ii8+PC9zdmc+PHN2ZyBpZD0iZ2V0X2FwcF8yNHB4IiB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSIyNCIgaGVpZ2h0PSIyNCIgdmlld0JveD0iMCAwIDI0IDI0Ij48cGF0aCBkPSJNMTkgOWgtNFYzSDl2Nkg1bDcgNyA3LTd6TTUgMTh2MmgxNHYtMkg1eiIvPjwvc3ZnPjxzdmcgaWQ9ImhlbHBfb3V0bGluZV8yNHB4IiB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSIyNCIgaGVpZ2h0PSIyNCIgdmlld0JveD0iMCAwIDI0IDI0Ij48cGF0aCBkPSJNMTEgMThoMnYtMmgtMnYyem0xLTE2QzYuNDggMiAyIDYuNDggMiAxMnM0LjQ4IDEwIDEwIDEwIDEwLTQuNDggMTAtMTBTMTcuNTIgMiAxMiAyem0wIDE4Yy00LjQxIDAtOC0zLjU5LTgtOHMzLjU5LTggOC04IDggMy41OSA4IDgtMy41OSA4LTggOHptMC0xNGMtMi4yMSAwLTQgMS43OS00IDRoMmMwLTEuMS45LTIgMi0yczIgLjkgMiAyYzAgMi0zIDEuNzUtMyA1aDJjMC0yLjI1IDMtMi41IDMtNSAwLTIuMjEtMS43OS00LTQtNHoiLz48L3N2Zz48c3ZnIGlkPSJpbWFnZV9zZWFyY2hfMjRweCIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIiBoZWlnaHQ9IjI0IiB2aWV3Qm94PSIwIDAgMjQgMjQiIHdpZHRoPSIyNCI+PHBhdGggZD0iTTAgMGgyNHYyNEgweiIgZmlsbD0ibm9uZSIvPjxwYXRoIGQ9Ik0wIDBoMjR2MjRIMFYweiIgZmlsbD0ibm9uZSIvPjxwYXRoIGQ9Ik0xOCAxM3Y3SDRWNmg1LjAyYy4wNS0uNzEuMjItMS4zOC40OC0ySDRjLTEuMSAwLTIgLjktMiAydjE0YzAgMS4xLjkgMiAyIDJoMTRjMS4xIDAgMi0uOSAyLTJ2LTVsLTItMnptLTEuNSA1aC0xMWwyLjc1LTMuNTMgMS45NiAyLjM2IDIuNzUtMy41NHptMi44LTkuMTFjLjQ0LS43LjctMS41MS43LTIuMzlDMjAgNC4wMSAxNy45OSAyIDE1LjUgMlMxMSA0LjAxIDExIDYuNXMyLjAxIDQuNSA0LjQ5IDQuNWMuODggMCAxLjctLjI2IDIuMzktLjdMMjEgMTMuNDIgMjIuNDIgMTIgMTkuMyA4Ljg5ek0xNS41IDlDMTQuMTIgOSAxMyA3Ljg4IDEzIDYuNVMxNC4xMiA0IDE1LjUgNCAxOCA1LjEyIDE4IDYuNSAxNi44OCA5IDE1LjUgOXoiLz48L3N2Zz48c3ZnIGlkPSJpbmZvX291dGxpbmVfMjRweCIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIiB3aWR0aD0iMjQiIGhlaWdodD0iMjQiIHZpZXdCb3g9IjAgMCAyNCAyNCI+PHBhdGggZD0iTTExIDE3aDJ2LTZoLTJ2NnptMS0xNUM2LjQ4IDIgMiA2LjQ4IDIgMTJzNC40OCAxMCAxMCAxMCAxMC00LjQ4IDEwLTEwUzE3LjUyIDIgMTIgMnptMCAxOGMtNC40MSAwLTgtMy41OS04LThzMy41OS04IDgtOCA4IDMuNTkgOCA4LTMuNTkgOC04IDh6TTExIDloMlY3aC0ydjJ6Ii8+PC9zdmc+PHN2ZyBpZD0ia2VlcF8yNHB4IiB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIGVuYWJsZS1iYWNrZ3JvdW5kPSJuZXcgMCAwIDI0IDI0IiBoZWlnaHQ9IjI0IiB2aWV3Qm94PSIwIDAgMjQgMjQiIHdpZHRoPSIyNCI+PGc+PHJlY3QgZmlsbD0ibm9uZSIgaGVpZ2h0PSIyNCIgd2lkdGg9IjI0Ii8+PC9nPjxnPjxwYXRoIGQ9Ik0xNiw5VjRsMSwwYzAuNTUsMCwxLTAuNDUsMS0xdjBjMC0wLjU1LTAuNDUtMS0xLTFIN0M2LjQ1LDIsNiwyLjQ1LDYsM3YwIGMwLDAuNTUsMC40NSwxLDEsMWwxLDB2NWMwLDEuNjYtMS4zNCwzLTMsM2gwdjJoNS45N3Y3bDEsMWwxLTF2LTdIMTl2LTJoMEMxNy4zNCwxMiwxNiwxMC42NiwxNiw5eiIgZmlsbC1ydWxlPSJldmVub2RkIi8+PC9nPjwvc3ZnPjxzdmcgaWQ9ImtlZXBfb3V0bGluZV8yNHB4IiB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIGVuYWJsZS1iYWNrZ3JvdW5kPSJuZXcgMCAwIDI0IDI0IiBoZWlnaHQ9IjI0IiB2aWV3Qm94PSIwIDAgMjQgMjQiIHdpZHRoPSIyNCI+PGc+PHJlY3QgZmlsbD0ibm9uZSIgaGVpZ2h0PSIyNCIgd2lkdGg9IjI0Ii8+PC9nPjxnPjxwYXRoIGQ9Ik0xNCw0djVjMCwxLjEyLDAuMzcsMi4xNiwxLDNIOWMwLjY1LTAuODYsMS0xLjksMS0zVjRIMTQgTTE3LDJIN0M2LjQ1LDIsNiwyLjQ1LDYsM2MwLDAuNTUsMC40NSwxLDEsMWMwLDAsMCwwLDAsMGwxLDB2NSBjMCwxLjY2LTEuMzQsMy0zLDN2Mmg1Ljk3djdsMSwxbDEtMXYtN0gxOXYtMmMwLDAsMCwwLDAsMGMtMS42NiwwLTMtMS4zNC0zLTNWNGwxLDBjMCwwLDAsMCwwLDBjMC41NSwwLDEtMC40NSwxLTEgQzE4LDIuNDUsMTcuNTUsMiwxNywyTDE3LDJ6Ii8+PC9nPjwvc3ZnPjxzdmcgaWQ9ImxpbmVfd2VpZ2h0XzI0cHgiIHhtbG5zPSJodHRwOi8vd3d3LnczLm9yZy8yMDAwL3N2ZyIgZW5hYmxlLWJhY2tncm91bmQ9Im5ldyAwIDAgMjQgMjQiIGhlaWdodD0iMjQiIHZpZXdCb3g9IjAgMCAyNCAyNCIgd2lkdGg9IjI0Ij48Zz48cmVjdCBmaWxsPSJub25lIiBoZWlnaHQ9IjI0IiB3aWR0aD0iMjQiIHg9IjAiLz48L2c+PGc+PGc+PGc+PHBhdGggZD0iTTMsMTdoMTh2LTJIM1YxN3ogTTMsMjBoMTh2LTFIM1YyMHogTTMsMTNoMTh2LTNIM1YxM3ogTTMsNHY0aDE4VjRIM3oiLz48L2c+PC9nPjwvZz48L3N2Zz48c3ZnIGlkPSJtb3JlX3ZlcnRfMjRweCIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIiBoZWlnaHQ9IjI0IiB2aWV3Qm94PSIwIDAgMjQgMjQiIHdpZHRoPSIyNCI+PHBhdGggZD0iTTAgMGgyNHYyNEgweiIgZmlsbD0ibm9uZSIvPjxwYXRoIGQ9Ik0xMiA4YzEuMSAwIDItLjkgMi0ycy0uOS0yLTItMi0yIC45LTIgMiAuOSAyIDIgMnptMCAyYy0xLjEgMC0yIC45LTIgMnMuOSAyIDIgMiAyLS45IDItMi0uOS0yLTItMnptMCA2Yy0xLjEgMC0yIC45LTIgMnMuOSAyIDIgMiAyLS45IDItMi0uOS0yLTItMnoiLz48L3N2Zz48c3ZnIGlkPSJyZWZyZXNoXzI0cHgiIHhtbG5zPSJodHRwOi8vd3d3LnczLm9yZy8yMDAwL3N2ZyIgd2lkdGg9IjI0IiBoZWlnaHQ9IjI0IiB2aWV3Qm94PSIwIDAgMjQgMjQiPjxwYXRoIGQ9Ik0xNy42NSA2LjM1QzE2LjIgNC45IDE0LjIxIDQgMTIgNGMtNC40MiAwLTcuOTkgMy41OC03Ljk5IDhzMy41NyA4IDcuOTkgOGMzLjczIDAgNi44NC0yLjU1IDcuNzMtNmgtMi4wOGMtLjgyIDIuMzMtMy4wNCA0LTUuNjUgNC0zLjMxIDAtNi0yLjY5LTYtNnMyLjY5LTYgNi02YzEuNjYgMCAzLjE0LjY5IDQuMjIgMS43OEwxMyAxMWg3VjRsLTIuMzUgMi4zNXoiLz48L3N2Zz48c3ZnIGlkPSJzZWFyY2hfMjRweCIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIiB3aWR0aD0iMjQiIGhlaWdodD0iMjQiIHZpZXdCb3g9IjAgMCAyNCAyNCI+PHBhdGggZD0iTTE1LjUgMTRoLS43OWwtLjI4LS4yN0MxNS40MSAxMi41OSAxNiAxMS4xMSAxNiA5LjUgMTYgNS45MSAxMy4wOSAzIDkuNSAzUzMgNS45MSAzIDkuNSA1LjkxIDE2IDkuNSAxNmMxLjYxIDAgMy4wOS0uNTkgNC4yMy0xLjU3bC4yNy4yOHYuNzlsNSA0Ljk5TDIwLjQ5IDE5bC00Ljk5LTV6bS02IDBDNy4wMSAxNCA1IDExLjk5IDUgOS41UzcuMDEgNSA5LjUgNSAxNCA3LjAxIDE0IDkuNSAxMS45OSAxNCA5LjUgMTR6Ii8+PC9zdmc+PHN2ZyBpZD0ic2V0dGluZ3NfMjRweCIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIiB3aWR0aD0iMjQiIGhlaWdodD0iMjQiIHZpZXdCb3g9IjAgMCAyNCAyNCI+PHBhdGggZD0iTTE5LjQzIDEyLjk4Yy4wNC0uMzIuMDctLjY0LjA3LS45OHMtLjAzLS42Ni0uMDctLjk4bDIuMTEtMS42NWMuMTktLjE1LjI0LS40Mi4xMi0uNjRsLTItMy40NmMtLjEyLS4yMi0uMzktLjMtLjYxLS4yMmwtMi40OSAxYy0uNTItLjQtMS4wOC0uNzMtMS42OS0uOThsLS4zOC0yLjY1QzE0LjQ2IDIuMTggMTQuMjUgMiAxNCAyaC00Yy0uMjUgMC0uNDYuMTgtLjQ5LjQybC0uMzggMi42NWMtLjYxLjI1LTEuMTcuNTktMS42OS45OGwtMi40OS0xYy0uMjMtLjA5LS40OSAwLS42MS4yMmwtMiAzLjQ2Yy0uMTMuMjItLjA3LjQ5LjEyLjY0bDIuMTEgMS42NWMtLjA0LjMyLS4wNy42NS0uMDcuOThzLjAzLjY2LjA3Ljk4bC0yLjExIDEuNjVjLS4xOS4xNS0uMjQuNDItLjEyLjY0bDIgMy40NmMuMTIuMjIuMzkuMy42MS4yMmwyLjQ5LTFjLjUyLjQgMS4wOC43MyAxLjY5Ljk4bC4zOCAyLjY1Yy4wMy4yNC4yNC40Mi40OS40Mmg0Yy4yNSAwIC40Ni0uMTguNDktLjQybC4zOC0yLjY1Yy42MS0uMjUgMS4xNy0uNTkgMS42OS0uOThsMi40OSAxYy4yMy4wOS40OSAwIC42MS0uMjJsMi0zLjQ2Yy4xMi0uMjIuMDctLjQ5LS4xMi0uNjRsLTIuMTEtMS42NXpNMTIgMTUuNWMtMS45MyAwLTMuNS0xLjU3LTMuNS0zLjVzMS41Ny0zLjUgMy41LTMuNSAzLjUgMS41NyAzLjUgMy41LTEuNTcgMy41LTMuNSAzLjV6Ii8+PC9zdmc+PHN2ZyBpZD0ic2V0dGluZ3NfYmFja3VwX3Jlc3RvcmVfMjRweCIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIiBoZWlnaHQ9IjI0IiB2aWV3Qm94PSIwIDAgMjQgMjQiIHdpZHRoPSIyNCI+PHBhdGggZD0iTTAgMGgyNHYyNEgweiIgZmlsbD0ibm9uZSIvPjxwYXRoIGQ9Ik0xNCAxMmMwLTEuMS0uOS0yLTItMnMtMiAuOS0yIDIgLjkgMiAyIDIgMi0uOSAyLTJ6bS0yLTljLTQuOTcgMC05IDQuMDMtOSA5SDBsNCA0IDQtNEg1YzAtMy44NyAzLjEzLTcgNy03czcgMy4xMyA3IDctMy4xMyA3LTcgN2MtMS41MSAwLTIuOTEtLjQ5LTQuMDYtMS4zbC0xLjQyIDEuNDRDOC4wNCAyMC4zIDkuOTQgMjEgMTIgMjFjNC45NyAwIDktNC4wMyA5LTlzLTQuMDMtOS05LTl6Ii8+PC9zdmc+PHN2ZyBpZD0ic2V0dGluZ3Nfb3ZlcnNjYW5fMjRweCIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIiBoZWlnaHQ9IjI0IiB2aWV3Qm94PSIwIDAgMjQgMjQiIHdpZHRoPSIyNCI+PHBhdGggZD0iTTAgMGgyNHYyNEgweiIgZmlsbD0ibm9uZSIvPjxwYXRoIGQ9Ik0xMi4wMSA1LjVMMTAgOGg0bC0xLjk5LTIuNXpNMTggMTB2NGwyLjUtMS45OUwxOCAxMHpNNiAxMGwtMi41IDIuMDFMNiAxNHYtNHptOCA2aC00bDIuMDEgMi41TDE0IDE2em03LTEzSDNjLTEuMSAwLTIgLjktMiAydjE0YzAgMS4xLjkgMiAyIDJoMThjMS4xIDAgMi0uOSAyLTJWNWMwLTEuMS0uOS0yLTItMnptMCAxNi4wMUgzVjQuOTloMTh2MTQuMDJ6Ii8+PC9zdmc+PHN2ZyBpZD0idmlzaWJpbGl0eV9vZmZfMjRweCIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIiB3aWR0aD0iMjQiIGhlaWdodD0iMjQiIHZpZXdCb3g9IjAgMCAyNCAyNCI+PHBhdGggZD0iTTEyIDdjMi43NiAwIDUgMi4yNCA1IDUgMCAuNjUtLjEzIDEuMjYtLjM2IDEuODNsMi45MiAyLjkyYzEuNTEtMS4yNiAyLjctMi44OSAzLjQzLTQuNzUtMS43My00LjM5LTYtNy41LTExLTcuNS0xLjQgMC0yLjc0LjI1LTMuOTguN2wyLjE2IDIuMTZDMTAuNzQgNy4xMyAxMS4zNSA3IDEyIDd6TTIgNC4yN2wyLjI4IDIuMjguNDYuNDZDMy4wOCA4LjMgMS43OCAxMC4wMiAxIDEyYzEuNzMgNC4zOSA2IDcuNSAxMSA3LjUgMS41NSAwIDMuMDMtLjMgNC4zOC0uODRsLjQyLjQyTDE5LjczIDIyIDIxIDIwLjczIDMuMjcgMyAyIDQuMjd6TTcuNTMgOS44bDEuNTUgMS41NWMtLjA1LjIxLS4wOC40My0uMDguNjUgMCAxLjY2IDEuMzQgMyAzIDMgLjIyIDAgLjQ0LS4wMy42NS0uMDhsMS41NSAxLjU1Yy0uNjcuMzMtMS40MS41My0yLjIuNTMtMi43NiAwLTUtMi4yNC01LTUgMC0uNzkuMi0xLjUzLjUzLTIuMnptNC4zMS0uNzhsMy4xNSAzLjE1LjAyLS4xNmMwLTEuNjYtMS4zNC0zLTMtM2wtLjE3LjAxeiIvPjwvc3ZnPjxzdmcgaWQ9Indhcm5pbmdfMjRweCIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIiBoZWlnaHQ9IjI0IiB2aWV3Qm94PSIwIDAgMjQgMjQiIHdpZHRoPSIyNCI+PHBhdGggZD0iTTAgMGgyNHYyNEgweiIgZmlsbD0ibm9uZSIvPjxwYXRoIGQ9Ik0xIDIxaDIyTDEyIDIgMSAyMXptMTItM2gtMnYtMmgydjJ6bTAtNGgtMnYtNGgydjR6Ii8+PC9zdmc+PC9kZWZzPjwvc3ZnPgo=", + "ok": true, + "headers": [ + [ + "content-type", + "image/svg+xml; charset=utf-8" + ] + ], + "status": 200, + "status_text": "" + }, + "https://localhost:6006/data/runs": { + "data": "WyJydW5fMDAxL3RyYWluIl0=", + "ok": true, + "headers": [ + [ + "content-type", + "application/json" + ] + ], + "status": 200, + "status_text": "" + }, + "https://localhost:6006/data/plugin/graphs/info": { + "data": "eyJydW5fMDAxL3RyYWluIjogeyJydW4iOiAicnVuXzAwMS90cmFpbiIsICJ0YWdzIjogeyJiYXRjaF8yIjogeyJ0YWciOiAiYmF0Y2hfMiIsICJjb25jZXB0dWFsX2dyYXBoIjogZmFsc2UsICJvcF9ncmFwaCI6IHRydWUsICJwcm9maWxlIjogZmFsc2V9LCAia2VyYXMiOiB7InRhZyI6ICJrZXJhcyIsICJjb25jZXB0dWFsX2dyYXBoIjogdHJ1ZSwgIm9wX2dyYXBoIjogZmFsc2UsICJwcm9maWxlIjogZmFsc2V9fSwgInJ1bl9ncmFwaCI6IHRydWV9fQ==", + "ok": true, + "headers": [ + [ + "content-type", + "application/json" + ] + ], + "status": 200, + "status_text": "" + }, + "https://localhost:6006/data/plugin/graphs/graph?run=run_001%2Ftrain&conceptual=false": { + "data": "", + "ok": true, + "headers": [ + [ + "content-type", + "text/x-protobuf; charset=utf-8" + ] + ], + "status": 200, + "status_text": "" + }, + "https://localhost:6006/font-roboto/RxZJdnzeo3R5zSexge8UUZBw1xU1rKptJj_0jans920.woff2": { + "data": "", + "ok": true, + "headers": [ + [ + "content-type", + "font/woff2" + ] + ], + "status": 200, + "status_text": "" + }, + "https://localhost:6006/font-roboto/oMMgfZMQthOryQo9n22dcuvvDin1pK8aKteLpeZ5c0A.woff2": { + "data": "", + "ok": true, + "headers": [ + [ + "content-type", + "font/woff2" + ] + ], + "status": 200, + "status_text": "" + } + }, + "base_uri": "https://localhost:8080/", + "height": 822 + } + }, + "source": [ + "%tensorboard --logdir=./my_cifar10_logs --port=6006" + ], + "execution_count": 132, + "outputs": [ + { + "output_type": "display_data", + "data": { + "application/javascript": [ + "\n", + " (async () => {\n", + " const url = new URL(await google.colab.kernel.proxyPort(6006, {'cache': true}));\n", + " url.searchParams.set('tensorboardColab', 'true');\n", + " const iframe = document.createElement('iframe');\n", + " iframe.src = url;\n", + " iframe.setAttribute('width', '100%');\n", + " iframe.setAttribute('height', '800');\n", + " iframe.setAttribute('frameborder', 0);\n", + " document.body.appendChild(iframe);\n", + " })();\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": { + "tags": [] + } + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "4huFUfvOLOpl", + "outputId": "a0f6fcfc-c02b-4861-b396-d64f7281184d", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "model.fit(X_train, y_train, epochs=100,\n", + " validation_data=(X_valid, y_valid),\n", + " callbacks=callbacks)" + ], + "execution_count": 133, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Epoch 1/100\n", + "1407/1407 [==============================] - 16s 9ms/step - loss: 9.5976 - accuracy: 0.1365 - val_loss: 2.1086 - val_accuracy: 0.2342\n", + "Epoch 2/100\n", + "1407/1407 [==============================] - 12s 9ms/step - loss: 2.0597 - accuracy: 0.2457 - val_loss: 2.0227 - val_accuracy: 0.2528\n", + "Epoch 3/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.9423 - accuracy: 0.2883 - val_loss: 1.9173 - val_accuracy: 0.2974\n", + "Epoch 4/100\n", + "1407/1407 [==============================] - 12s 9ms/step - loss: 1.8595 - accuracy: 0.3242 - val_loss: 1.9241 - val_accuracy: 0.3242\n", + "Epoch 5/100\n", + "1407/1407 [==============================] - 12s 9ms/step - loss: 1.7853 - accuracy: 0.3508 - val_loss: 1.7868 - val_accuracy: 0.3550\n", + "Epoch 6/100\n", + "1407/1407 [==============================] - 12s 9ms/step - loss: 1.7293 - accuracy: 0.3703 - val_loss: 1.7313 - val_accuracy: 0.3782\n", + "Epoch 7/100\n", + "1407/1407 [==============================] - 12s 9ms/step - loss: 1.6884 - accuracy: 0.3880 - val_loss: 1.7046 - val_accuracy: 0.3728\n", + "Epoch 8/100\n", + "1407/1407 [==============================] - 12s 9ms/step - loss: 1.6473 - accuracy: 0.4009 - val_loss: 1.6535 - val_accuracy: 0.4072\n", + "Epoch 9/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.6216 - accuracy: 0.4174 - val_loss: 1.6390 - val_accuracy: 0.4072\n", + "Epoch 10/100\n", + "1407/1407 [==============================] - 12s 9ms/step - loss: 1.5892 - accuracy: 0.4274 - val_loss: 1.6882 - val_accuracy: 0.3892\n", + "Epoch 11/100\n", + "1407/1407 [==============================] - 12s 9ms/step - loss: 1.5679 - accuracy: 0.4388 - val_loss: 1.6097 - val_accuracy: 0.4182\n", + "Epoch 12/100\n", + "1407/1407 [==============================] - 12s 9ms/step - loss: 1.5494 - accuracy: 0.4435 - val_loss: 1.6076 - val_accuracy: 0.4224\n", + "Epoch 13/100\n", + "1407/1407 [==============================] - 12s 9ms/step - loss: 1.5264 - accuracy: 0.4501 - val_loss: 1.6094 - val_accuracy: 0.4174\n", + "Epoch 14/100\n", + "1407/1407 [==============================] - 12s 9ms/step - loss: 1.5058 - accuracy: 0.4584 - val_loss: 1.5780 - val_accuracy: 0.4304\n", + "Epoch 15/100\n", + "1407/1407 [==============================] - 12s 9ms/step - loss: 1.5017 - accuracy: 0.4586 - val_loss: 1.5628 - val_accuracy: 0.4468\n", + "Epoch 16/100\n", + "1407/1407 [==============================] - 12s 9ms/step - loss: 1.4832 - accuracy: 0.4655 - val_loss: 1.5483 - val_accuracy: 0.4450\n", + "Epoch 17/100\n", + "1407/1407 [==============================] - 12s 9ms/step - loss: 1.4690 - accuracy: 0.4741 - val_loss: 1.5686 - val_accuracy: 0.4398\n", + "Epoch 18/100\n", + "1407/1407 [==============================] - 12s 9ms/step - loss: 1.4530 - accuracy: 0.4814 - val_loss: 1.5176 - val_accuracy: 0.4588\n", + "Epoch 19/100\n", + "1407/1407 [==============================] - 12s 9ms/step - loss: 1.4254 - accuracy: 0.4887 - val_loss: 1.5407 - val_accuracy: 0.4556\n", + "Epoch 20/100\n", + "1407/1407 [==============================] - 12s 9ms/step - loss: 1.4167 - accuracy: 0.4908 - val_loss: 1.5124 - val_accuracy: 0.4598\n", + "Epoch 21/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.4132 - accuracy: 0.4940 - val_loss: 1.5697 - val_accuracy: 0.4456\n", + "Epoch 22/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.4092 - accuracy: 0.4909 - val_loss: 1.5215 - val_accuracy: 0.4550\n", + "Epoch 23/100\n", + "1407/1407 [==============================] - 12s 9ms/step - loss: 1.3812 - accuracy: 0.5018 - val_loss: 1.5259 - val_accuracy: 0.4558\n", + "Epoch 24/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.3708 - accuracy: 0.5068 - val_loss: 1.5368 - val_accuracy: 0.4616\n", + "Epoch 25/100\n", + "1407/1407 [==============================] - 12s 9ms/step - loss: 1.3730 - accuracy: 0.5061 - val_loss: 1.5012 - val_accuracy: 0.4656\n", + "Epoch 26/100\n", + "1407/1407 [==============================] - 12s 9ms/step - loss: 1.3413 - accuracy: 0.5160 - val_loss: 1.5261 - val_accuracy: 0.4538\n", + "Epoch 27/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.3396 - accuracy: 0.5184 - val_loss: 1.5085 - val_accuracy: 0.4638\n", + "Epoch 28/100\n", + "1407/1407 [==============================] - 12s 9ms/step - loss: 1.3297 - accuracy: 0.5219 - val_loss: 1.5276 - val_accuracy: 0.4604\n", + "Epoch 29/100\n", + "1407/1407 [==============================] - 12s 9ms/step - loss: 1.3327 - accuracy: 0.5197 - val_loss: 1.5129 - val_accuracy: 0.4642\n", + "Epoch 30/100\n", + "1407/1407 [==============================] - 12s 9ms/step - loss: 1.3217 - accuracy: 0.5230 - val_loss: 1.5549 - val_accuracy: 0.4634\n", + "Epoch 31/100\n", + "1407/1407 [==============================] - 12s 9ms/step - loss: 1.3237 - accuracy: 0.5222 - val_loss: 1.5363 - val_accuracy: 0.4582\n", + "Epoch 32/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.2889 - accuracy: 0.5341 - val_loss: 1.4979 - val_accuracy: 0.4766\n", + "Epoch 33/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.2933 - accuracy: 0.5362 - val_loss: 1.5370 - val_accuracy: 0.4622\n", + "Epoch 34/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.2736 - accuracy: 0.5390 - val_loss: 1.5247 - val_accuracy: 0.4698\n", + "Epoch 35/100\n", + "1407/1407 [==============================] - 12s 9ms/step - loss: 1.2719 - accuracy: 0.5433 - val_loss: 1.4904 - val_accuracy: 0.4768\n", + "Epoch 36/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.2673 - accuracy: 0.5467 - val_loss: 1.5016 - val_accuracy: 0.4780\n", + "Epoch 37/100\n", + "1407/1407 [==============================] - 12s 9ms/step - loss: 1.2518 - accuracy: 0.5478 - val_loss: 1.4951 - val_accuracy: 0.4810\n", + "Epoch 38/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.2393 - accuracy: 0.5572 - val_loss: 1.4962 - val_accuracy: 0.4770\n", + "Epoch 39/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.2372 - accuracy: 0.5526 - val_loss: 1.5172 - val_accuracy: 0.4732\n", + "Epoch 40/100\n", + "1407/1407 [==============================] - 12s 9ms/step - loss: 1.2280 - accuracy: 0.5606 - val_loss: 1.5011 - val_accuracy: 0.4768\n", + "Epoch 41/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.2190 - accuracy: 0.5625 - val_loss: 1.5301 - val_accuracy: 0.4740\n", + "Epoch 42/100\n", + "1407/1407 [==============================] - 12s 9ms/step - loss: 1.2113 - accuracy: 0.5695 - val_loss: 1.5344 - val_accuracy: 0.4746\n", + "Epoch 43/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.2086 - accuracy: 0.5617 - val_loss: 1.5503 - val_accuracy: 0.4638\n", + "Epoch 44/100\n", + "1407/1407 [==============================] - 12s 9ms/step - loss: 1.1997 - accuracy: 0.5712 - val_loss: 1.5310 - val_accuracy: 0.4814\n", + "Epoch 45/100\n", + "1407/1407 [==============================] - 12s 9ms/step - loss: 1.1881 - accuracy: 0.5744 - val_loss: 1.4936 - val_accuracy: 0.4852\n", + "Epoch 46/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.1747 - accuracy: 0.5788 - val_loss: 1.5113 - val_accuracy: 0.4778\n", + "Epoch 47/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.1738 - accuracy: 0.5753 - val_loss: 1.5495 - val_accuracy: 0.4750\n", + "Epoch 48/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.1679 - accuracy: 0.5796 - val_loss: 1.4970 - val_accuracy: 0.4878\n", + "Epoch 49/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.1723 - accuracy: 0.5763 - val_loss: 1.5314 - val_accuracy: 0.4812\n", + "Epoch 50/100\n", + "1407/1407 [==============================] - 12s 9ms/step - loss: 1.1544 - accuracy: 0.5855 - val_loss: 1.5753 - val_accuracy: 0.4734\n", + "Epoch 51/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.1567 - accuracy: 0.5833 - val_loss: 1.5835 - val_accuracy: 0.4692\n", + "Epoch 52/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.1442 - accuracy: 0.5877 - val_loss: 1.5382 - val_accuracy: 0.4800\n", + "Epoch 53/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.1309 - accuracy: 0.5963 - val_loss: 1.5327 - val_accuracy: 0.4848\n", + "Epoch 54/100\n", + "1407/1407 [==============================] - 12s 9ms/step - loss: 1.1219 - accuracy: 0.5970 - val_loss: 1.5581 - val_accuracy: 0.4826\n", + "Epoch 55/100\n", + "1407/1407 [==============================] - 12s 9ms/step - loss: 1.1241 - accuracy: 0.5986 - val_loss: 1.5292 - val_accuracy: 0.4886\n" + ], + "name": "stdout" + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 133 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "2WziHKxHLOpl", + "outputId": "2e8c64ce-7966-43e1-e0a2-6e65ac4e24d5", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "model = keras.models.load_model(\"my_cifar10_model.h5\")\n", + "model.evaluate(X_valid, y_valid)" + ], + "execution_count": 134, + "outputs": [ + { + "output_type": "stream", + "text": [ + "157/157 [==============================] - 1s 2ms/step - loss: 1.4904 - accuracy: 0.4768\n" + ], + "name": "stdout" + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[1.4903972148895264, 0.47679999470710754]" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 134 + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "eaO_qFxHLOpl" + }, + "source": [ + "가장 낮은 검증 손실을 내는 모델은 검증 세트에서 약 47% 정확도를 얻었습니다. 이 검증 점수에 도달하는데 39번의 에포크가 걸렸습니다. (GPU가 없는) 제 노트북에서 에포크당 약 10초 정도 걸렸습니다. 배치 정규화를 사용해 성능을 올릴 수 있는지 확인해 보죠." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "4zqEUxjPLOpl" + }, + "source": [ + "### c.\n", + "*문제: 배치 정규화를 추가하고 학습 곡선을 비교해보세요. 이전보다 빠르게 수렴하나요? 더 좋은 모델이 만들어지나요? 훈련 속도에는 어떤 영향을 미치나요?*" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "C41Agl-yLOpl" + }, + "source": [ + "다음 코드는 위의 코드와 배우 비슷합니다. 몇 가지 다른 점은 아래와 같습니다:\n", + "\n", + "* 출력층을 제외하고 모든 `Dense` 층 다음에 (활성화 함수 전에) BN 층을 추가했습니다. 처음 은닉층 전에도 BN 층을 추가했습니다.\n", + "* 학습률을 5e-4로 바꾸었습니다. 1e-5, 3e-5, 5e-5, 1e-4, 3e-4, 5e-4, 1e-3, 3e-3를 시도해 보고 20번 에포크 후에 검증 세트 성능이 가장 좋은 것을 선택했습니다.\n", + "* run_logdir를 run_bn_* 으로 이름을 바꾸고 모델 파일 이름을 my_cifar10_bn_model.h5로 변경했습니다." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "LczWnbDULOpl", + "outputId": "76d0babb-d132-4b87-ac73-e5e91002a6c1", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "keras.backend.clear_session()\n", + "tf.random.set_seed(42)\n", + "np.random.seed(42)\n", + "\n", + "model = keras.models.Sequential()\n", + "model.add(keras.layers.Flatten(input_shape=[32, 32, 3]))\n", + "model.add(keras.layers.BatchNormalization())\n", + "for _ in range(20):\n", + " model.add(keras.layers.Dense(100, kernel_initializer=\"he_normal\"))\n", + " model.add(keras.layers.BatchNormalization())\n", + " model.add(keras.layers.Activation(\"elu\"))\n", + "model.add(keras.layers.Dense(10, activation=\"softmax\"))\n", + "\n", + "optimizer = keras.optimizers.Nadam(lr=5e-4)\n", + "model.compile(loss=\"sparse_categorical_crossentropy\",\n", + " optimizer=optimizer,\n", + " metrics=[\"accuracy\"])\n", + "\n", + "early_stopping_cb = keras.callbacks.EarlyStopping(patience=20)\n", + "model_checkpoint_cb = keras.callbacks.ModelCheckpoint(\"my_cifar10_bn_model.h5\", save_best_only=True)\n", + "run_index = 1 # 모델을 훈련할 때마다 증가시킴\n", + "run_logdir = os.path.join(os.curdir, \"my_cifar10_logs\", \"run_bn_{:03d}\".format(run_index))\n", + "tensorboard_cb = keras.callbacks.TensorBoard(run_logdir)\n", + "callbacks = [early_stopping_cb, model_checkpoint_cb, tensorboard_cb]\n", + "\n", + "model.fit(X_train, y_train, epochs=100,\n", + " validation_data=(X_valid, y_valid),\n", + " callbacks=callbacks)\n", + "\n", + "model = keras.models.load_model(\"my_cifar10_bn_model.h5\")\n", + "model.evaluate(X_valid, y_valid)" + ], + "execution_count": 135, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Epoch 1/100\n", + "1407/1407 [==============================] - 44s 26ms/step - loss: 1.9805 - accuracy: 0.2905 - val_loss: 1.6707 - val_accuracy: 0.3934\n", + "Epoch 2/100\n", + "1407/1407 [==============================] - 35s 25ms/step - loss: 1.6843 - accuracy: 0.3997 - val_loss: 1.5906 - val_accuracy: 0.4302\n", + "Epoch 3/100\n", + "1407/1407 [==============================] - 35s 25ms/step - loss: 1.6114 - accuracy: 0.4293 - val_loss: 1.6091 - val_accuracy: 0.4318\n", + "Epoch 4/100\n", + "1407/1407 [==============================] - 35s 25ms/step - loss: 1.5527 - accuracy: 0.4483 - val_loss: 1.5315 - val_accuracy: 0.4504\n", + "Epoch 5/100\n", + "1407/1407 [==============================] - 35s 25ms/step - loss: 1.5020 - accuracy: 0.4681 - val_loss: 1.4365 - val_accuracy: 0.4858\n", + "Epoch 6/100\n", + "1407/1407 [==============================] - 35s 25ms/step - loss: 1.4614 - accuracy: 0.4817 - val_loss: 1.4284 - val_accuracy: 0.4914\n", + "Epoch 7/100\n", + "1407/1407 [==============================] - 35s 25ms/step - loss: 1.4271 - accuracy: 0.4942 - val_loss: 1.4083 - val_accuracy: 0.4990\n", + "Epoch 8/100\n", + "1407/1407 [==============================] - 35s 25ms/step - loss: 1.4021 - accuracy: 0.5032 - val_loss: 1.3799 - val_accuracy: 0.5064\n", + "Epoch 9/100\n", + "1407/1407 [==============================] - 35s 25ms/step - loss: 1.3795 - accuracy: 0.5143 - val_loss: 1.3858 - val_accuracy: 0.5102\n", + "Epoch 10/100\n", + "1407/1407 [==============================] - 35s 25ms/step - loss: 1.3562 - accuracy: 0.5171 - val_loss: 1.3409 - val_accuracy: 0.5198\n", + "Epoch 11/100\n", + "1407/1407 [==============================] - 35s 25ms/step - loss: 1.3239 - accuracy: 0.5293 - val_loss: 1.3554 - val_accuracy: 0.5210\n", + "Epoch 12/100\n", + "1407/1407 [==============================] - 35s 25ms/step - loss: 1.3125 - accuracy: 0.5374 - val_loss: 1.3739 - val_accuracy: 0.5104\n", + "Epoch 13/100\n", + "1407/1407 [==============================] - 35s 25ms/step - loss: 1.2838 - accuracy: 0.5461 - val_loss: 1.3909 - val_accuracy: 0.5128\n", + "Epoch 14/100\n", + "1407/1407 [==============================] - 35s 25ms/step - loss: 1.2656 - accuracy: 0.5475 - val_loss: 1.3442 - val_accuracy: 0.5294\n", + "Epoch 15/100\n", + "1407/1407 [==============================] - 35s 25ms/step - loss: 1.2607 - accuracy: 0.5568 - val_loss: 1.3741 - val_accuracy: 0.5220\n", + "Epoch 16/100\n", + "1407/1407 [==============================] - 35s 25ms/step - loss: 1.2468 - accuracy: 0.5587 - val_loss: 1.3371 - val_accuracy: 0.5312\n", + "Epoch 17/100\n", + "1407/1407 [==============================] - 35s 25ms/step - loss: 1.2273 - accuracy: 0.5681 - val_loss: 1.3262 - val_accuracy: 0.5360\n", + "Epoch 18/100\n", + "1407/1407 [==============================] - 35s 25ms/step - loss: 1.2085 - accuracy: 0.5716 - val_loss: 1.3367 - val_accuracy: 0.5334\n", + "Epoch 19/100\n", + "1407/1407 [==============================] - 35s 25ms/step - loss: 1.1996 - accuracy: 0.5772 - val_loss: 1.3809 - val_accuracy: 0.5242\n", + "Epoch 20/100\n", + "1407/1407 [==============================] - 35s 25ms/step - loss: 1.1907 - accuracy: 0.5818 - val_loss: 1.3691 - val_accuracy: 0.5260\n", + "Epoch 21/100\n", + "1407/1407 [==============================] - 34s 24ms/step - loss: 1.1703 - accuracy: 0.5848 - val_loss: 1.3534 - val_accuracy: 0.5306\n", + "Epoch 22/100\n", + "1407/1407 [==============================] - 35s 25ms/step - loss: 1.1584 - accuracy: 0.5909 - val_loss: 1.3587 - val_accuracy: 0.5240\n", + "Epoch 23/100\n", + "1407/1407 [==============================] - 35s 25ms/step - loss: 1.1434 - accuracy: 0.5985 - val_loss: 1.3365 - val_accuracy: 0.5412\n", + "Epoch 24/100\n", + "1407/1407 [==============================] - 35s 25ms/step - loss: 1.1331 - accuracy: 0.6005 - val_loss: 1.3189 - val_accuracy: 0.5474\n", + "Epoch 25/100\n", + "1407/1407 [==============================] - 35s 25ms/step - loss: 1.1182 - accuracy: 0.6056 - val_loss: 1.3208 - val_accuracy: 0.5406\n", + "Epoch 26/100\n", + "1407/1407 [==============================] - 34s 24ms/step - loss: 1.0999 - accuracy: 0.6107 - val_loss: 1.3565 - val_accuracy: 0.5348\n", + "Epoch 27/100\n", + "1407/1407 [==============================] - 34s 24ms/step - loss: 1.0898 - accuracy: 0.6168 - val_loss: 1.3557 - val_accuracy: 0.5328\n", + "Epoch 28/100\n", + "1407/1407 [==============================] - 34s 24ms/step - loss: 1.0888 - accuracy: 0.6197 - val_loss: 1.3448 - val_accuracy: 0.5334\n", + "Epoch 29/100\n", + "1407/1407 [==============================] - 34s 24ms/step - loss: 1.0868 - accuracy: 0.6142 - val_loss: 1.3385 - val_accuracy: 0.5428\n", + "Epoch 30/100\n", + "1407/1407 [==============================] - 34s 24ms/step - loss: 1.0712 - accuracy: 0.6226 - val_loss: 1.3513 - val_accuracy: 0.5376\n", + "Epoch 31/100\n", + "1407/1407 [==============================] - 34s 24ms/step - loss: 1.0632 - accuracy: 0.6221 - val_loss: 1.3549 - val_accuracy: 0.5388\n", + "Epoch 32/100\n", + "1407/1407 [==============================] - 35s 25ms/step - loss: 1.0427 - accuracy: 0.6313 - val_loss: 1.3651 - val_accuracy: 0.5420\n", + "Epoch 33/100\n", + "1407/1407 [==============================] - 35s 25ms/step - loss: 1.0328 - accuracy: 0.6341 - val_loss: 1.3301 - val_accuracy: 0.5442\n", + "Epoch 34/100\n", + "1407/1407 [==============================] - 35s 25ms/step - loss: 1.0265 - accuracy: 0.6392 - val_loss: 1.3481 - val_accuracy: 0.5412\n", + "Epoch 35/100\n", + "1407/1407 [==============================] - 34s 24ms/step - loss: 1.0217 - accuracy: 0.6412 - val_loss: 1.3520 - val_accuracy: 0.5414\n", + "Epoch 36/100\n", + "1407/1407 [==============================] - 34s 25ms/step - loss: 1.0009 - accuracy: 0.6463 - val_loss: 1.3634 - val_accuracy: 0.5320\n", + "Epoch 37/100\n", + "1407/1407 [==============================] - 34s 24ms/step - loss: 0.9865 - accuracy: 0.6520 - val_loss: 1.3685 - val_accuracy: 0.5304\n", + "Epoch 38/100\n", + "1407/1407 [==============================] - 34s 25ms/step - loss: 0.9805 - accuracy: 0.6567 - val_loss: 1.3807 - val_accuracy: 0.5314\n", + "Epoch 39/100\n", + "1407/1407 [==============================] - 34s 24ms/step - loss: 0.9782 - accuracy: 0.6517 - val_loss: 1.3851 - val_accuracy: 0.5438\n", + "Epoch 40/100\n", + "1407/1407 [==============================] - 34s 24ms/step - loss: 0.9646 - accuracy: 0.6644 - val_loss: 1.4195 - val_accuracy: 0.5328\n", + "Epoch 41/100\n", + "1407/1407 [==============================] - 35s 25ms/step - loss: 0.9540 - accuracy: 0.6661 - val_loss: 1.3478 - val_accuracy: 0.5542\n", + "Epoch 42/100\n", + "1407/1407 [==============================] - 35s 25ms/step - loss: 0.9528 - accuracy: 0.6655 - val_loss: 1.3789 - val_accuracy: 0.5440\n", + "Epoch 43/100\n", + "1407/1407 [==============================] - 34s 24ms/step - loss: 0.9449 - accuracy: 0.6667 - val_loss: 1.3721 - val_accuracy: 0.5410\n", + "Epoch 44/100\n", + "1407/1407 [==============================] - 34s 24ms/step - loss: 0.9345 - accuracy: 0.6723 - val_loss: 1.4136 - val_accuracy: 0.5348\n", + "157/157 [==============================] - 1s 3ms/step - loss: 1.3189 - accuracy: 0.5474\n" + ], + "name": "stdout" + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[1.318870186805725, 0.5473999977111816]" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 135 + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "wKjSeUIXLOpl" + }, + "source": [ + "* *이전보다 빠르게 수렴하나요?* 훨씬 빠릅니다! 이전 모델은 가장 낮은 검증 손실에 도달하기 위해 39 에포크가 걸렸지만 BN을 사용한 새 모델은 18 에포크가 걸렸습니다. 이전 모델보다 두 배 이상 빠릅니다. BN 층은 훈련을 안정적으로 수행하고 더 큰 학습률을 사용할 수 있기 때문에 수렴이 빨라졌습니다.\n", + "* *BN이 더 좋은 모델을 만드나요?* 네! 최종 모델의 성능이 47%가 아니라 55% 정확도로 더 좋습니다. 이는 아주 좋은 모델이 아니지만 적어도 이전보다는 낫습니다(합성곱 신경망이 더 낫겠지만 이는 다른 주제입니다. 14장을 참고하세요).\n", + "* *BN이 훈련 속도에 영향을 미치나요?* 모델이 두 배나 빠르게 수렴했지만 각 에포크는 10초가 아니라 16초가 걸렸습니다. BN 층에서 추가된 계산 때문입니다. 따라서 전체적으로 에포크 횟수가 50% 정도 줄었지만 훈련 시간(탁상 시계 시간)은 30% 정도 줄었습니다. 결국 크게 향상되었습니다!" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "oJr9kBV5LOpm" + }, + "source": [ + "### d.\n", + "*문제: 배치 정규화를 SELU로 바꾸어보세요. 네트워크가 자기 정규화하기 위해 필요한 변경 사항을 적용해보세요(즉, 입력 특성 표준화, 르쿤 정규분포 초기화, 완전 연결 층만 순차적으로 쌓은 심층 신경망 등).*" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "ZAfEp5gNLOpm", + "outputId": "37cf90e0-b80f-4616-f168-03a99c6c5648", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "keras.backend.clear_session()\n", + "tf.random.set_seed(42)\n", + "np.random.seed(42)\n", + "\n", + "model = keras.models.Sequential()\n", + "model.add(keras.layers.Flatten(input_shape=[32, 32, 3]))\n", + "for _ in range(20):\n", + " model.add(keras.layers.Dense(100,\n", + " kernel_initializer=\"lecun_normal\",\n", + " activation=\"selu\"))\n", + "model.add(keras.layers.Dense(10, activation=\"softmax\"))\n", + "\n", + "optimizer = keras.optimizers.Nadam(lr=7e-4)\n", + "model.compile(loss=\"sparse_categorical_crossentropy\",\n", + " optimizer=optimizer,\n", + " metrics=[\"accuracy\"])\n", + "\n", + "early_stopping_cb = keras.callbacks.EarlyStopping(patience=20)\n", + "model_checkpoint_cb = keras.callbacks.ModelCheckpoint(\"my_cifar10_selu_model.h5\", save_best_only=True)\n", + "run_index = 1 # 모델을 훈련할 때마다 증가시킴\n", + "run_logdir = os.path.join(os.curdir, \"my_cifar10_logs\", \"run_selu_{:03d}\".format(run_index))\n", + "tensorboard_cb = keras.callbacks.TensorBoard(run_logdir)\n", + "callbacks = [early_stopping_cb, model_checkpoint_cb, tensorboard_cb]\n", + "\n", + "X_means = X_train.mean(axis=0)\n", + "X_stds = X_train.std(axis=0)\n", + "X_train_scaled = (X_train - X_means) / X_stds\n", + "X_valid_scaled = (X_valid - X_means) / X_stds\n", + "X_test_scaled = (X_test - X_means) / X_stds\n", + "\n", + "model.fit(X_train_scaled, y_train, epochs=100,\n", + " validation_data=(X_valid_scaled, y_valid),\n", + " callbacks=callbacks)\n", + "\n", + "model = keras.models.load_model(\"my_cifar10_selu_model.h5\")\n", + "model.evaluate(X_valid_scaled, y_valid)" + ], + "execution_count": 136, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Epoch 1/100\n", + "1407/1407 [==============================] - 17s 10ms/step - loss: 2.0543 - accuracy: 0.2663 - val_loss: 1.7895 - val_accuracy: 0.3680\n", + "Epoch 2/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.7162 - accuracy: 0.3874 - val_loss: 1.8017 - val_accuracy: 0.3720\n", + "Epoch 3/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.6257 - accuracy: 0.4268 - val_loss: 1.6563 - val_accuracy: 0.4172\n", + "Epoch 4/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.5436 - accuracy: 0.4569 - val_loss: 1.6334 - val_accuracy: 0.4386\n", + "Epoch 5/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.4967 - accuracy: 0.4752 - val_loss: 1.6055 - val_accuracy: 0.4424\n", + "Epoch 6/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.4495 - accuracy: 0.4914 - val_loss: 1.5408 - val_accuracy: 0.4574\n", + "Epoch 7/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.4016 - accuracy: 0.5107 - val_loss: 1.5620 - val_accuracy: 0.4540\n", + "Epoch 8/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.3616 - accuracy: 0.5232 - val_loss: 1.5098 - val_accuracy: 0.4726\n", + "Epoch 9/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.3246 - accuracy: 0.5393 - val_loss: 1.4862 - val_accuracy: 0.4698\n", + "Epoch 10/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.2975 - accuracy: 0.5491 - val_loss: 1.4937 - val_accuracy: 0.4912\n", + "Epoch 11/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.2568 - accuracy: 0.5690 - val_loss: 1.5241 - val_accuracy: 0.4978\n", + "Epoch 12/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.2373 - accuracy: 0.5745 - val_loss: 1.5144 - val_accuracy: 0.4754\n", + "Epoch 13/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.2129 - accuracy: 0.5821 - val_loss: 1.4959 - val_accuracy: 0.5082\n", + "Epoch 14/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.1765 - accuracy: 0.5976 - val_loss: 1.4949 - val_accuracy: 0.5020\n", + "Epoch 15/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.1584 - accuracy: 0.6032 - val_loss: 1.5359 - val_accuracy: 0.4958\n", + "Epoch 16/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.1476 - accuracy: 0.6054 - val_loss: 1.5124 - val_accuracy: 0.5024\n", + "Epoch 17/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.1305 - accuracy: 0.6166 - val_loss: 1.5296 - val_accuracy: 0.5036\n", + "Epoch 18/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.0998 - accuracy: 0.6253 - val_loss: 1.4755 - val_accuracy: 0.5022\n", + "Epoch 19/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.0959 - accuracy: 0.6324 - val_loss: 1.6318 - val_accuracy: 0.4636\n", + "Epoch 20/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.1792 - accuracy: 0.5927 - val_loss: 1.5011 - val_accuracy: 0.5054\n", + "Epoch 21/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.0341 - accuracy: 0.6458 - val_loss: 1.5519 - val_accuracy: 0.5038\n", + "Epoch 22/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.0028 - accuracy: 0.6588 - val_loss: 1.5129 - val_accuracy: 0.5106\n", + "Epoch 23/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.0480 - accuracy: 0.6548 - val_loss: 1.5322 - val_accuracy: 0.4728\n", + "Epoch 24/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.1508 - accuracy: 0.6021 - val_loss: 1.5160 - val_accuracy: 0.5052\n", + "Epoch 25/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.0125 - accuracy: 0.6555 - val_loss: 1.5517 - val_accuracy: 0.5004\n", + "Epoch 26/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.0041 - accuracy: 0.6594 - val_loss: 1.5464 - val_accuracy: 0.4994\n", + "Epoch 27/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.0239 - accuracy: 0.6504 - val_loss: 1.5180 - val_accuracy: 0.5124\n", + "Epoch 28/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 0.9534 - accuracy: 0.6749 - val_loss: 1.5762 - val_accuracy: 0.5006\n", + "Epoch 29/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 0.9281 - accuracy: 0.6852 - val_loss: 1.5844 - val_accuracy: 0.5174\n", + "Epoch 30/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 0.9285 - accuracy: 0.6879 - val_loss: 1.5761 - val_accuracy: 0.5070\n", + "Epoch 31/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 0.9284 - accuracy: 0.6851 - val_loss: 1.5662 - val_accuracy: 0.5022\n", + "Epoch 32/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 0.9068 - accuracy: 0.6945 - val_loss: 1.5796 - val_accuracy: 0.5050\n", + "Epoch 33/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 0.9320 - accuracy: 0.6830 - val_loss: 1.5989 - val_accuracy: 0.5112\n", + "Epoch 34/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 128.6344 - accuracy: 0.6796 - val_loss: 1.6077 - val_accuracy: 0.4778\n", + "Epoch 35/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.1029 - accuracy: 0.6221 - val_loss: 1.5892 - val_accuracy: 0.4914\n", + "Epoch 36/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.0178 - accuracy: 0.6509 - val_loss: 1.6135 - val_accuracy: 0.4984\n", + "Epoch 37/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 0.9726 - accuracy: 0.6713 - val_loss: 1.6034 - val_accuracy: 0.5038\n", + "Epoch 38/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 0.9423 - accuracy: 0.6813 - val_loss: 1.6206 - val_accuracy: 0.5006\n", + "157/157 [==============================] - 1s 2ms/step - loss: 1.4755 - accuracy: 0.5022\n" + ], + "name": "stdout" + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[1.4755374193191528, 0.5022000074386597]" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 136 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "c47RjcCqLOpm", + "outputId": "f3aefa6c-a958-42fb-da55-c137c672b7ee", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "model = keras.models.load_model(\"my_cifar10_selu_model.h5\")\n", + "model.evaluate(X_valid_scaled, y_valid)" + ], + "execution_count": 137, + "outputs": [ + { + "output_type": "stream", + "text": [ + "157/157 [==============================] - 1s 3ms/step - loss: 1.4755 - accuracy: 0.5022\n" + ], + "name": "stdout" + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[1.4755374193191528, 0.5022000074386597]" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 137 + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "tiqKhLWPLOpn" + }, + "source": [ + "51.4% 정확도를 얻었습니다. 원래 모델보다 더 좋습니다. 하지만 배치 정규화를 사용한 모델만큼 좋지는 않습니다. 최고의 모델에 도달하는데 13 에포크가 걸렸습니다. 이는 원본 모델이나 BN 모델보다 더 빠른 것입니다. 각 에포크는 원본 모델처럼 10초만 걸렸습니다. 따라서 이 모델이 지금까지 가장 빠른 모델입니다(에포크와 탁상 시계 기준으로)." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "q1pbE7gjLOpn" + }, + "source": [ + "### e.\n", + "*문제: 알파 드롭아웃으로 모델에 규제를 적용해보세요. 그다음 모델을 다시 훈련하지 않고 MC 드롭아웃으로 더 높은 정확도를 얻을 수 있는지 확인해보세요.*" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "RnApp_3mLOpn", + "outputId": "ccdd98f7-f09c-4fb9-d3db-af4ddf65a8ab", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "keras.backend.clear_session()\n", + "tf.random.set_seed(42)\n", + "np.random.seed(42)\n", + "\n", + "model = keras.models.Sequential()\n", + "model.add(keras.layers.Flatten(input_shape=[32, 32, 3]))\n", + "for _ in range(20):\n", + " model.add(keras.layers.Dense(100,\n", + " kernel_initializer=\"lecun_normal\",\n", + " activation=\"selu\"))\n", + "\n", + "model.add(keras.layers.AlphaDropout(rate=0.1))\n", + "model.add(keras.layers.Dense(10, activation=\"softmax\"))\n", + "\n", + "optimizer = keras.optimizers.Nadam(lr=5e-4)\n", + "model.compile(loss=\"sparse_categorical_crossentropy\",\n", + " optimizer=optimizer,\n", + " metrics=[\"accuracy\"])\n", + "\n", + "early_stopping_cb = keras.callbacks.EarlyStopping(patience=20)\n", + "model_checkpoint_cb = keras.callbacks.ModelCheckpoint(\"my_cifar10_alpha_dropout_model.h5\", save_best_only=True)\n", + "run_index = 1 # 모델을 훈련할 때마다 증가시킴\n", + "run_logdir = os.path.join(os.curdir, \"my_cifar10_logs\", \"run_alpha_dropout_{:03d}\".format(run_index))\n", + "tensorboard_cb = keras.callbacks.TensorBoard(run_logdir)\n", + "callbacks = [early_stopping_cb, model_checkpoint_cb, tensorboard_cb]\n", + "\n", + "X_means = X_train.mean(axis=0)\n", + "X_stds = X_train.std(axis=0)\n", + "X_train_scaled = (X_train - X_means) / X_stds\n", + "X_valid_scaled = (X_valid - X_means) / X_stds\n", + "X_test_scaled = (X_test - X_means) / X_stds\n", + "\n", + "model.fit(X_train_scaled, y_train, epochs=100,\n", + " validation_data=(X_valid_scaled, y_valid),\n", + " callbacks=callbacks)\n", + "\n", + "model = keras.models.load_model(\"my_cifar10_alpha_dropout_model.h5\")\n", + "model.evaluate(X_valid_scaled, y_valid)" + ], + "execution_count": 138, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Epoch 1/100\n", + "1407/1407 [==============================] - 17s 10ms/step - loss: 2.0545 - accuracy: 0.2804 - val_loss: 1.7849 - val_accuracy: 0.3802\n", + "Epoch 2/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.6891 - accuracy: 0.4028 - val_loss: 1.6433 - val_accuracy: 0.4222\n", + "Epoch 3/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.5892 - accuracy: 0.4414 - val_loss: 1.6071 - val_accuracy: 0.4362\n", + "Epoch 4/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.5089 - accuracy: 0.4714 - val_loss: 1.5975 - val_accuracy: 0.4482\n", + "Epoch 5/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.4522 - accuracy: 0.4934 - val_loss: 1.6036 - val_accuracy: 0.4602\n", + "Epoch 6/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.4014 - accuracy: 0.5114 - val_loss: 1.5332 - val_accuracy: 0.4778\n", + "Epoch 7/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.3527 - accuracy: 0.5290 - val_loss: 1.5837 - val_accuracy: 0.4700\n", + "Epoch 8/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.3182 - accuracy: 0.5379 - val_loss: 1.4835 - val_accuracy: 0.5016\n", + "Epoch 9/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.2835 - accuracy: 0.5536 - val_loss: 1.5400 - val_accuracy: 0.4866\n", + "Epoch 10/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.2473 - accuracy: 0.5677 - val_loss: 1.5282 - val_accuracy: 0.4944\n", + "Epoch 11/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.2099 - accuracy: 0.5855 - val_loss: 1.5768 - val_accuracy: 0.5080\n", + "Epoch 12/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.1898 - accuracy: 0.5905 - val_loss: 1.5192 - val_accuracy: 0.5046\n", + "Epoch 13/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.1486 - accuracy: 0.6044 - val_loss: 1.5302 - val_accuracy: 0.5092\n", + "Epoch 14/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.1219 - accuracy: 0.6141 - val_loss: 1.5072 - val_accuracy: 0.5086\n", + "Epoch 15/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.1008 - accuracy: 0.6171 - val_loss: 1.6471 - val_accuracy: 0.5088\n", + "Epoch 16/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.0778 - accuracy: 0.6309 - val_loss: 1.6557 - val_accuracy: 0.5152\n", + "Epoch 17/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.0515 - accuracy: 0.6434 - val_loss: 1.6130 - val_accuracy: 0.5208\n", + "Epoch 18/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.0246 - accuracy: 0.6530 - val_loss: 1.6559 - val_accuracy: 0.5082\n", + "Epoch 19/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.0088 - accuracy: 0.6552 - val_loss: 1.7325 - val_accuracy: 0.5126\n", + "Epoch 20/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 0.9908 - accuracy: 0.6627 - val_loss: 1.6986 - val_accuracy: 0.5134\n", + "Epoch 21/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 0.9657 - accuracy: 0.6694 - val_loss: 1.7291 - val_accuracy: 0.5010\n", + "Epoch 22/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 0.9548 - accuracy: 0.6728 - val_loss: 1.7533 - val_accuracy: 0.5140\n", + "Epoch 23/100\n", + "1407/1407 [==============================] - 13s 10ms/step - loss: 0.9199 - accuracy: 0.6913 - val_loss: 1.7172 - val_accuracy: 0.5042\n", + "Epoch 24/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 0.9132 - accuracy: 0.6906 - val_loss: 1.6688 - val_accuracy: 0.5206\n", + "Epoch 25/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 0.8955 - accuracy: 0.6978 - val_loss: 1.7418 - val_accuracy: 0.5156\n", + "Epoch 26/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 0.8596 - accuracy: 0.7116 - val_loss: 1.8324 - val_accuracy: 0.5188\n", + "Epoch 27/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 0.8607 - accuracy: 0.7123 - val_loss: 1.7325 - val_accuracy: 0.5100\n", + "Epoch 28/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 0.8528 - accuracy: 0.7139 - val_loss: 1.8382 - val_accuracy: 0.5002\n", + "157/157 [==============================] - 1s 2ms/step - loss: 1.4835 - accuracy: 0.5016\n" + ], + "name": "stdout" + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[1.4834871292114258, 0.5016000270843506]" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 138 + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "AsDa8NPNLOpn" + }, + "source": [ + "이 모델은 검증 세트에서 50.8% 정확도에 도달합니다. 드롭아웃이 없을 때보다(51.4%) 조금 더 나쁩니다. 하이퍼파라미터 탐색을 좀 많이 수행해 보면 더 나아 질 수 있습니다(드롭아웃 비율 5%, 10%, 20%, 40%과 학습률 1e-4, 3e-4, 5e-4, 1e-3을 시도했습니다). 하지만 이 경우에는 크지 않을 것 같습니다." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "bNuTrZp6LOpn" + }, + "source": [ + "이제 MC 드롭아웃을 사용해 보죠. 앞서 사용한 `MCAlphaDropout` 클래스를 복사해 사용하겠습니다:" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "xUA1Qi9lLOpn" + }, + "source": [ + "class MCAlphaDropout(keras.layers.AlphaDropout):\n", + " def call(self, inputs):\n", + " return super().call(inputs, training=True)" + ], + "execution_count": 139, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "HgMCXU-lLOpn" + }, + "source": [ + "방금 훈련했던 모델과 (같은 가중치를 가진) 동일한 새로운 모델을 만들어 보죠. 하지만 `AlphaDropout` 층 대신 `MCAlphaDropout` 드롭아웃 층을 사용합니다:" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "TrqaKF9PLOpn" + }, + "source": [ + "mc_model = keras.models.Sequential([\n", + " MCAlphaDropout(layer.rate) if isinstance(layer, keras.layers.AlphaDropout) else layer\n", + " for layer in model.layers\n", + "])" + ], + "execution_count": 140, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "cqVC9dDXLOpn" + }, + "source": [ + "그다음 몇 가지 유틸리티 함수를 추가합니다. 첫 번째 함수는 모델을 여러 번 실행합니다(기본적으로 10번). 그다음 평균한 예측 클래스 확률을 반환합니다. 두 번째 함수는 이 평균 확률을 사용해 각 샘플의 클래스를 예측합니다:" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "_eXR7S5ILOpo" + }, + "source": [ + "def mc_dropout_predict_probas(mc_model, X, n_samples=10):\n", + " Y_probas = [mc_model.predict(X) for sample in range(n_samples)]\n", + " return np.mean(Y_probas, axis=0)\n", + "\n", + "def mc_dropout_predict_classes(mc_model, X, n_samples=10):\n", + " Y_probas = mc_dropout_predict_probas(mc_model, X, n_samples)\n", + " return np.argmax(Y_probas, axis=1)" + ], + "execution_count": 141, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Iuu0Qr59LOpo" + }, + "source": [ + "이제 검증 세트의 모든 샘플에 대해 예측을 만들고 정확도를 계산해 보죠:" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "5HPOAWCLLOpo", + "outputId": "91fd39e3-2205-42aa-af3f-80a7af4b1867", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "keras.backend.clear_session()\n", + "tf.random.set_seed(42)\n", + "np.random.seed(42)\n", + "\n", + "y_pred = mc_dropout_predict_classes(mc_model, X_valid_scaled)\n", + "accuracy = np.mean(y_pred == y_valid[:, 0])\n", + "accuracy" + ], + "execution_count": 142, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "0.5024" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 142 + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "-eNayl7MLOpo" + }, + "source": [ + "이 경우에는 실제적인 정확도 향상이 없습니다(50.8%에서 50.9%).\n", + "\n", + "따라서 이 연습문에서 얻은 최상의 모델은 배치 정규화 모델입니다." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "XvfEibM4LOpo" + }, + "source": [ + "### f.\n", + "*문제: 1사이클 스케줄링으로 모델을 다시 훈련하고 훈련 속도와 모델 정확도가 향상되는지 확인해보세요.*" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "93aZAECALOpo" + }, + "source": [ + "keras.backend.clear_session()\n", + "tf.random.set_seed(42)\n", + "np.random.seed(42)\n", + "\n", + "model = keras.models.Sequential()\n", + "model.add(keras.layers.Flatten(input_shape=[32, 32, 3]))\n", + "for _ in range(20):\n", + " model.add(keras.layers.Dense(100,\n", + " kernel_initializer=\"lecun_normal\",\n", + " activation=\"selu\"))\n", + "\n", + "model.add(keras.layers.AlphaDropout(rate=0.1))\n", + "model.add(keras.layers.Dense(10, activation=\"softmax\"))\n", + "\n", + "optimizer = keras.optimizers.SGD(lr=1e-3)\n", + "model.compile(loss=\"sparse_categorical_crossentropy\",\n", + " optimizer=optimizer,\n", + " metrics=[\"accuracy\"])" + ], + "execution_count": 143, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "Hy3EYeBLLOpo", + "outputId": "4740e094-df47-4ef0-ac39-6c0bc5a93aa5", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 376 + } + }, + "source": [ + "batch_size = 128\n", + "rates, losses = find_learning_rate(model, X_train_scaled, y_train, epochs=1, batch_size=batch_size)\n", + "plot_lr_vs_loss(rates, losses)\n", + "plt.axis([min(rates), max(rates), min(losses), (losses[0] + min(losses)) / 1.4])" + ], + "execution_count": 144, + "outputs": [ + { + "output_type": "stream", + "text": [ + "352/352 [==============================] - 2s 5ms/step - loss: nan - accuracy: 0.1251\n" + ], + "name": "stdout" + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "(9.999999747378752e-06,\n", + " 9.999868392944336,\n", + " 2.6116409301757812,\n", + " 3.931788035801479)" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 144 + }, + { + "output_type": "display_data", + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "tags": [], + "needs_background": "light" + } + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "G8aLsTeKLOpo" + }, + "source": [ + "keras.backend.clear_session()\n", + "tf.random.set_seed(42)\n", + "np.random.seed(42)\n", + "\n", + "model = keras.models.Sequential()\n", + "model.add(keras.layers.Flatten(input_shape=[32, 32, 3]))\n", + "for _ in range(20):\n", + " model.add(keras.layers.Dense(100,\n", + " kernel_initializer=\"lecun_normal\",\n", + " activation=\"selu\"))\n", + "\n", + "model.add(keras.layers.AlphaDropout(rate=0.1))\n", + "model.add(keras.layers.Dense(10, activation=\"softmax\"))\n", + "\n", + "optimizer = keras.optimizers.SGD(lr=1e-2)\n", + "model.compile(loss=\"sparse_categorical_crossentropy\",\n", + " optimizer=optimizer,\n", + " metrics=[\"accuracy\"])" + ], + "execution_count": 145, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "MoHWUDtBLOpp", + "outputId": "10416f81-f6d1-4e78-8382-15ca285a9a4e", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "n_epochs = 15\n", + "onecycle = OneCycleScheduler(len(X_train_scaled) // batch_size * n_epochs, max_rate=0.05)\n", + "history = model.fit(X_train_scaled, y_train, epochs=n_epochs, batch_size=batch_size,\n", + " validation_data=(X_valid_scaled, y_valid),\n", + " callbacks=[onecycle])" + ], + "execution_count": 146, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Epoch 1/15\n", + "352/352 [==============================] - 3s 5ms/step - loss: 2.2329 - accuracy: 0.2356 - val_loss: 1.7666 - val_accuracy: 0.3756\n", + "Epoch 2/15\n", + "352/352 [==============================] - 2s 5ms/step - loss: 1.7975 - accuracy: 0.3638 - val_loss: 1.6788 - val_accuracy: 0.4198\n", + "Epoch 3/15\n", + "352/352 [==============================] - 2s 5ms/step - loss: 1.6467 - accuracy: 0.4186 - val_loss: 1.6238 - val_accuracy: 0.4298\n", + "Epoch 4/15\n", + "352/352 [==============================] - 2s 5ms/step - loss: 1.5460 - accuracy: 0.4497 - val_loss: 1.7127 - val_accuracy: 0.4134\n", + "Epoch 5/15\n", + "352/352 [==============================] - 2s 5ms/step - loss: 1.4914 - accuracy: 0.4703 - val_loss: 1.6201 - val_accuracy: 0.4420\n", + "Epoch 6/15\n", + "352/352 [==============================] - 2s 5ms/step - loss: 1.4389 - accuracy: 0.4839 - val_loss: 1.5667 - val_accuracy: 0.4528\n", + "Epoch 7/15\n", + "352/352 [==============================] - 2s 5ms/step - loss: 1.4040 - accuracy: 0.5038 - val_loss: 1.5114 - val_accuracy: 0.4684\n", + "Epoch 8/15\n", + "352/352 [==============================] - 2s 5ms/step - loss: 1.3507 - accuracy: 0.5187 - val_loss: 1.4993 - val_accuracy: 0.4828\n", + "Epoch 9/15\n", + "352/352 [==============================] - 2s 5ms/step - loss: 1.2743 - accuracy: 0.5475 - val_loss: 1.5490 - val_accuracy: 0.4810\n", + "Epoch 10/15\n", + "352/352 [==============================] - 2s 5ms/step - loss: 1.1955 - accuracy: 0.5722 - val_loss: 1.5555 - val_accuracy: 0.4952\n", + "Epoch 11/15\n", + "352/352 [==============================] - 2s 5ms/step - loss: 1.1234 - accuracy: 0.6027 - val_loss: 1.5497 - val_accuracy: 0.5010\n", + "Epoch 12/15\n", + "352/352 [==============================] - 2s 5ms/step - loss: 1.0704 - accuracy: 0.6184 - val_loss: 1.4914 - val_accuracy: 0.5132\n", + "Epoch 13/15\n", + "352/352 [==============================] - 2s 5ms/step - loss: 0.9946 - accuracy: 0.6453 - val_loss: 1.5379 - val_accuracy: 0.5220\n", + "Epoch 14/15\n", + "352/352 [==============================] - 2s 5ms/step - loss: 0.9180 - accuracy: 0.6725 - val_loss: 1.5380 - val_accuracy: 0.5330\n", + "Epoch 15/15\n", + "352/352 [==============================] - 2s 5ms/step - loss: 0.8923 - accuracy: 0.6833 - val_loss: 1.5679 - val_accuracy: 0.5334\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "VfCKlk3BLOpp" + }, + "source": [ + "1사이클 방식을 사용해 모델을 15에포크 동안 훈련했습니다. (큰 배치 크기 덕분에) 각 에포크는 3초만 걸렸습니다. 이는 지금까지 훈련한 가장 빠른 모델보다 3배나 더 빠릅니다. 또한 모델 성능도 올라갔습니다(50.8%에서 52.8%). 배치 정규화 모델이 조금 더 성능이 높지만 훈련 속도가 더 느립니다." + ] } - ], - "source": [ - "n_epochs = 15\n", - "onecycle = OneCycleScheduler(len(X_train_scaled) // batch_size * n_epochs, max_rate=0.05)\n", - "history = model.fit(X_train_scaled, y_train, epochs=n_epochs, batch_size=batch_size,\n", - " validation_data=(X_valid_scaled, y_valid),\n", - " callbacks=[onecycle])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "1사이클 방식을 사용해 모델을 15에포크 동안 훈련했습니다. (큰 배치 크기 덕분에) 각 에포크는 3초만 걸렸습니다. 이는 지금까지 훈련한 가장 빠른 모델보다 3배나 더 빠릅니다. 또한 모델 성능도 올라갔습니다(50.8%에서 52.8%). 배치 정규화 모델이 조금 더 성능이 높지만 훈련 속도가 더 느립니다." - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "TensorFlow 2.3 on Python 3.6 (CUDA 10.1)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.9" - }, - "nav_menu": { - "height": "360px", - "width": "416px" - }, - "toc": { - "navigate_menu": true, - "number_sections": true, - "sideBar": true, - "threshold": 6, - "toc_cell": false, - "toc_section_display": "block", - "toc_window_display": false - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} + ] +} \ No newline at end of file