diff --git a/11_training_deep_neural_networks.ipynb b/11_training_deep_neural_networks.ipynb index 26ab2d5fc..470e5dc4f 100644 --- a/11_training_deep_neural_networks.ipynb +++ b/11_training_deep_neural_networks.ipynb @@ -1,4830 +1,5843 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "**11장 – 심층 신경망 훈련하기**" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "_이 노트북은 11장에 있는 모든 샘플 코드와 연습문제 해답을 가지고 있습니다._" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - " \n", - "
\n", - " 구글 코랩에서 실행하기\n", - "
" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# 설정" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "먼저 몇 개의 모듈을 임포트합니다. 맷플롯립 그래프를 인라인으로 출력하도록 만들고 그림을 저장하는 함수를 준비합니다. 또한 파이썬 버전이 3.5 이상인지 확인합니다(파이썬 2.x에서도 동작하지만 곧 지원이 중단되므로 파이썬 3을 사용하는 것이 좋습니다). 사이킷런 버전이 0.20 이상인지와 텐서플로 버전이 2.0 이상인지 확인합니다." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "# 파이썬 ≥3.5 필수\n", - "import sys\n", - "assert sys.version_info >= (3, 5)\n", - "\n", - "# 사이킷런 ≥0.20 필수\n", - "import sklearn\n", - "assert sklearn.__version__ >= \"0.20\"\n", - "\n", - "# 텐서플로 ≥2.0 필수\n", - "import tensorflow as tf\n", - "from tensorflow import keras\n", - "assert tf.__version__ >= \"2.0\"\n", - "\n", - "%load_ext tensorboard\n", - "\n", - "# 공통 모듈 임포트\n", - "import numpy as np\n", - "import os\n", - "\n", - "# 노트북 실행 결과를 동일하게 유지하기 위해\n", - "np.random.seed(42)\n", - "\n", - "# 깔끔한 그래프 출력을 위해\n", - "%matplotlib inline\n", - "import matplotlib as mpl\n", - "import matplotlib.pyplot as plt\n", - "mpl.rc('axes', labelsize=14)\n", - "mpl.rc('xtick', labelsize=12)\n", - "mpl.rc('ytick', labelsize=12)\n", - "\n", - "# 그림을 저장할 위치\n", - "PROJECT_ROOT_DIR = \".\"\n", - "CHAPTER_ID = \"deep\"\n", - "IMAGES_PATH = os.path.join(PROJECT_ROOT_DIR, \"images\", CHAPTER_ID)\n", - "os.makedirs(IMAGES_PATH, exist_ok=True)\n", - "\n", - "def save_fig(fig_id, tight_layout=True, fig_extension=\"png\", resolution=300):\n", - " path = os.path.join(IMAGES_PATH, fig_id + \".\" + fig_extension)\n", - " print(\"그림 저장:\", fig_id)\n", - " if tight_layout:\n", - " plt.tight_layout()\n", - " plt.savefig(path, format=fig_extension, dpi=resolution)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# 그레이디언트 소실과 폭주 문제" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "def logit(z):\n", - " return 1 / (1 + np.exp(-z))" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "그림 저장: sigmoid_saturation_plot\n" - ] - }, - { - "data": { - "image/png": "\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], - "source": [ - "z = np.linspace(-5, 5, 200)\n", - "\n", - "plt.plot([-5, 5], [0, 0], 'k-')\n", - "plt.plot([-5, 5], [1, 1], 'k--')\n", - "plt.plot([0, 0], [-0.2, 1.2], 'k-')\n", - "plt.plot([-5, 5], [-3/4, 7/4], 'g--')\n", - "plt.plot(z, logit(z), \"b-\", linewidth=2)\n", - "props = dict(facecolor='black', shrink=0.1)\n", - "plt.annotate('Saturating', xytext=(3.5, 0.7), xy=(5, 1), arrowprops=props, fontsize=14, ha=\"center\")\n", - "plt.annotate('Saturating', xytext=(-3.5, 0.3), xy=(-5, 0), arrowprops=props, fontsize=14, ha=\"center\")\n", - "plt.annotate('Linear', xytext=(2, 0.2), xy=(0, 0.5), arrowprops=props, fontsize=14, ha=\"center\")\n", - "plt.grid(True)\n", - "plt.title(\"Sigmoid activation function\", fontsize=14)\n", - "plt.axis([-5, 5, -0.2, 1.2])\n", - "\n", - "save_fig(\"sigmoid_saturation_plot\")\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Xavier 초기화와 He 초기화" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['Constant',\n", - " 'GlorotNormal',\n", - " 'GlorotUniform',\n", - " 'HeNormal',\n", - " 'HeUniform',\n", - " 'Identity',\n", - " 'Initializer',\n", - " 'LecunNormal',\n", - " 'LecunUniform',\n", - " 'Ones',\n", - " 'Orthogonal',\n", - " 'RandomNormal',\n", - " 'RandomUniform',\n", - " 'TruncatedNormal',\n", - " 'VarianceScaling',\n", - " 'Zeros',\n", - " 'constant',\n", - " 'deserialize',\n", - " 'get',\n", - " 'glorot_normal',\n", - " 'glorot_uniform',\n", - " 'he_normal',\n", - " 'he_uniform',\n", - " 'identity',\n", - " 'lecun_normal',\n", - " 'lecun_uniform',\n", - " 'ones',\n", - " 'orthogonal',\n", - " 'random_normal',\n", - " 'random_uniform',\n", - " 'serialize',\n", - " 'truncated_normal',\n", - " 'variance_scaling',\n", - " 'zeros']" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "[name for name in dir(keras.initializers) if not name.startswith(\"_\")]" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "keras.layers.Dense(10, activation=\"relu\", kernel_initializer=\"he_normal\")" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "init = keras.initializers.VarianceScaling(scale=2., mode='fan_avg',\n", - " distribution='uniform')\n", - "keras.layers.Dense(10, activation=\"relu\", kernel_initializer=init)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 수렴하지 않는 활성화 함수" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### LeakyReLU" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [], - "source": [ - "def leaky_relu(z, alpha=0.01):\n", - " return np.maximum(alpha*z, z)" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "그림 저장: leaky_relu_plot\n" - ] - }, - { - "data": { - "image/png": "\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], - "source": [ - "plt.plot(z, leaky_relu(z, 0.05), \"b-\", linewidth=2)\n", - "plt.plot([-5, 5], [0, 0], 'k-')\n", - "plt.plot([0, 0], [-0.5, 4.2], 'k-')\n", - "plt.grid(True)\n", - "props = dict(facecolor='black', shrink=0.1)\n", - "plt.annotate('Leak', xytext=(-3.5, 0.5), xy=(-5, -0.2), arrowprops=props, fontsize=14, ha=\"center\")\n", - "plt.title(\"Leaky ReLU activation function\", fontsize=14)\n", - "plt.axis([-5, 5, -0.5, 4.2])\n", - "\n", - "save_fig(\"leaky_relu_plot\")\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['deserialize',\n", - " 'elu',\n", - " 'exponential',\n", - " 'get',\n", - " 'hard_sigmoid',\n", - " 'linear',\n", - " 'relu',\n", - " 'selu',\n", - " 'serialize',\n", - " 'sigmoid',\n", - " 'softmax',\n", - " 'softplus',\n", - " 'softsign',\n", - " 'swish',\n", - " 'tanh']" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "[m for m in dir(keras.activations) if not m.startswith(\"_\")]" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['LeakyReLU', 'PReLU', 'ReLU', 'ThresholdedReLU']" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "[m for m in dir(keras.layers) if \"relu\" in m.lower()]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "LeakyReLU를 사용해 패션 MNIST에서 신경망을 훈련해 보죠:" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [], - "source": [ - "(X_train_full, y_train_full), (X_test, y_test) = keras.datasets.fashion_mnist.load_data()\n", - "X_train_full = X_train_full / 255.0\n", - "X_test = X_test / 255.0\n", - "X_valid, X_train = X_train_full[:5000], X_train_full[5000:]\n", - "y_valid, y_train = y_train_full[:5000], y_train_full[5000:]" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [], - "source": [ - "tf.random.set_seed(42)\n", - "np.random.seed(42)\n", - "\n", - "model = keras.models.Sequential([\n", - " keras.layers.Flatten(input_shape=[28, 28]),\n", - " keras.layers.Dense(300, kernel_initializer=\"he_normal\"),\n", - " keras.layers.LeakyReLU(),\n", - " keras.layers.Dense(100, kernel_initializer=\"he_normal\"),\n", - " keras.layers.LeakyReLU(),\n", - " keras.layers.Dense(10, activation=\"softmax\")\n", - "])" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [], - "source": [ - "model.compile(loss=\"sparse_categorical_crossentropy\",\n", - " optimizer=keras.optimizers.SGD(lr=1e-3),\n", - " metrics=[\"accuracy\"])" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch 1/10\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 1.2819 - accuracy: 0.6229 - val_loss: 0.8886 - val_accuracy: 0.7160\n", - "Epoch 2/10\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.7955 - accuracy: 0.7361 - val_loss: 0.7130 - val_accuracy: 0.7656\n", - "Epoch 3/10\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.6816 - accuracy: 0.7721 - val_loss: 0.6427 - val_accuracy: 0.7898\n", - "Epoch 4/10\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.6217 - accuracy: 0.7943 - val_loss: 0.5900 - val_accuracy: 0.8066\n", - "Epoch 5/10\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.5832 - accuracy: 0.8075 - val_loss: 0.5582 - val_accuracy: 0.8202\n", - "Epoch 6/10\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.5553 - accuracy: 0.8157 - val_loss: 0.5350 - val_accuracy: 0.8238\n", - "Epoch 7/10\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.5338 - accuracy: 0.8224 - val_loss: 0.5157 - val_accuracy: 0.8304\n", - "Epoch 8/10\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.5172 - accuracy: 0.8273 - val_loss: 0.5079 - val_accuracy: 0.8282\n", - "Epoch 9/10\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.5040 - accuracy: 0.8289 - val_loss: 0.4895 - val_accuracy: 0.8386\n", - "Epoch 10/10\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.4924 - accuracy: 0.8321 - val_loss: 0.4817 - val_accuracy: 0.8396\n" - ] - } - ], - "source": [ - "history = model.fit(X_train, y_train, epochs=10,\n", - " validation_data=(X_valid, y_valid))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "PReLU를 테스트해 보죠:" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [], - "source": [ - "tf.random.set_seed(42)\n", - "np.random.seed(42)\n", - "\n", - "model = keras.models.Sequential([\n", - " keras.layers.Flatten(input_shape=[28, 28]),\n", - " keras.layers.Dense(300, kernel_initializer=\"he_normal\"),\n", - " keras.layers.PReLU(),\n", - " keras.layers.Dense(100, kernel_initializer=\"he_normal\"),\n", - " keras.layers.PReLU(),\n", - " keras.layers.Dense(10, activation=\"softmax\")\n", - "])" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [], - "source": [ - "model.compile(loss=\"sparse_categorical_crossentropy\",\n", - " optimizer=keras.optimizers.SGD(lr=1e-3),\n", - " metrics=[\"accuracy\"])" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch 1/10\n", - "1719/1719 [==============================] - 6s 3ms/step - loss: 1.3461 - accuracy: 0.6209 - val_loss: 0.9255 - val_accuracy: 0.7184\n", - "Epoch 2/10\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.8197 - accuracy: 0.7355 - val_loss: 0.7305 - val_accuracy: 0.7628\n", - "Epoch 3/10\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.6966 - accuracy: 0.7694 - val_loss: 0.6565 - val_accuracy: 0.7880\n", - "Epoch 4/10\n", - "1719/1719 [==============================] - 6s 3ms/step - loss: 0.6331 - accuracy: 0.7909 - val_loss: 0.6003 - val_accuracy: 0.8048\n", - "Epoch 5/10\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.5917 - accuracy: 0.8057 - val_loss: 0.5656 - val_accuracy: 0.8184\n", - "Epoch 6/10\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.5618 - accuracy: 0.8134 - val_loss: 0.5406 - val_accuracy: 0.8238\n", - "Epoch 7/10\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.5390 - accuracy: 0.8206 - val_loss: 0.5196 - val_accuracy: 0.8312\n", - "Epoch 8/10\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.5213 - accuracy: 0.8257 - val_loss: 0.5113 - val_accuracy: 0.8320\n", - "Epoch 9/10\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.5070 - accuracy: 0.8288 - val_loss: 0.4916 - val_accuracy: 0.8380\n", - "Epoch 10/10\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.4945 - accuracy: 0.8315 - val_loss: 0.4826 - val_accuracy: 0.8396\n" - ] - } - ], - "source": [ - "history = model.fit(X_train, y_train, epochs=10,\n", - " validation_data=(X_valid, y_valid))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### ELU" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": {}, - "outputs": [], - "source": [ - "def elu(z, alpha=1):\n", - " return np.where(z < 0, alpha * (np.exp(z) - 1), z)" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "그림 저장: elu_plot\n" - ] - }, - { - "data": { - "image/png": "\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], - "source": [ - "plt.plot(z, elu(z), \"b-\", linewidth=2)\n", - "plt.plot([-5, 5], [0, 0], 'k-')\n", - "plt.plot([-5, 5], [-1, -1], 'k--')\n", - "plt.plot([0, 0], [-2.2, 3.2], 'k-')\n", - "plt.grid(True)\n", - "plt.title(r\"ELU activation function ($\\alpha=1$)\", fontsize=14)\n", - "plt.axis([-5, 5, -2.2, 3.2])\n", - "\n", - "save_fig(\"elu_plot\")\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "텐서플로에서 쉽게 ELU를 적용할 수 있습니다. 층을 만들 때 활성화 함수로 지정하면 됩니다:" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 20, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "keras.layers.Dense(10, activation=\"elu\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### SELU" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Günter Klambauer, Thomas Unterthiner, Andreas Mayr는 2017년 한 [훌륭한 논문](https://arxiv.org/pdf/1706.02515.pdf)에서 SELU 활성화 함수를 소개했습니다. 훈련하는 동안 완전 연결 층만 쌓아서 신경망을 만들고 SELU 활성화 함수와 LeCun 초기화를 사용한다면 자기 정규화됩니다. 각 층의 출력이 평균과\n", - "표준편차를 보존하는 경향이 있습니다. 이는 그레이디언트 소실과 폭주 문제를 막아줍니다. 그 결과로 SELU 활성화 함수는 이런 종류의 네트워크(특히 아주 깊은 네트워크)에서 다른 활성화 함수보다 뛰어난 성능을 종종 냅니다. 따라서 꼭 시도해 봐야 합니다. 하지만 SELU 활성화 함수의 자기 정규화 특징은 쉽게 깨집니다. ℓ1나 ℓ2 정규화, 드롭아웃, 맥스 노름, 스킵 연결이나 시퀀셜하지 않은 다른 토폴로지를 사용할 수 없습니다(즉 순환 신경망은 자기 정규화되지 않습니다). 하지만 실전에서 시퀀셜 CNN과 잘 동작합니다. 자기 정규화가 깨지면 SELU가 다른 활성화 함수보다 더 나은 성능을 내지 않을 것입니다." - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "metadata": {}, - "outputs": [], - "source": [ - "from scipy.special import erfc\n", - "\n", - "# alpha와 scale은 평균 0과 표준 편차 1로 자기 정규화합니다\n", - "# (논문에 있는 식 14 참조):\n", - "alpha_0_1 = -np.sqrt(2 / np.pi) / (erfc(1/np.sqrt(2)) * np.exp(1/2) - 1)\n", - "scale_0_1 = (1 - erfc(1 / np.sqrt(2)) * np.sqrt(np.e)) * np.sqrt(2 * np.pi) * (2 * erfc(np.sqrt(2))*np.e**2 + np.pi*erfc(1/np.sqrt(2))**2*np.e - 2*(2+np.pi)*erfc(1/np.sqrt(2))*np.sqrt(np.e)+np.pi+2)**(-1/2)" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "metadata": {}, - "outputs": [], - "source": [ - "def selu(z, scale=scale_0_1, alpha=alpha_0_1):\n", - " return scale * elu(z, alpha)" - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "그림 저장: selu_plot\n" - ] - }, - { - "data": { - "image/png": "\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], - "source": [ - "plt.plot(z, selu(z), \"b-\", linewidth=2)\n", - "plt.plot([-5, 5], [0, 0], 'k-')\n", - "plt.plot([-5, 5], [-1.758, -1.758], 'k--')\n", - "plt.plot([0, 0], [-2.2, 3.2], 'k-')\n", - "plt.grid(True)\n", - "plt.title(\"SELU activation function\", fontsize=14)\n", - "plt.axis([-5, 5, -2.2, 3.2])\n", - "\n", - "save_fig(\"selu_plot\")\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "기본적으로 SELU 하이퍼파라미터(`scale`과 `alpha`)는 각 뉴런의 평균 출력이 0에 가깝고 표준 편차는 1에 가깝도록 조정됩니다(입력은 평균이 0이고 표준 편차 1로 표준화되었다고 가정합니다). 이 활성화 함수를 사용하면 1,000개의 층이 있는 심층 신경망도 모든 층에 걸쳐 거의 평균이 0이고 표준 편차를 1로 유지합니다. 이를 통해 그레이디언트 폭주와 소실 문제를 피할 수 있습니다:" - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Layer 0: mean -0.00, std deviation 1.00\n", - "Layer 100: mean 0.02, std deviation 0.96\n", - "Layer 200: mean 0.01, std deviation 0.90\n", - "Layer 300: mean -0.02, std deviation 0.92\n", - "Layer 400: mean 0.05, std deviation 0.89\n", - "Layer 500: mean 0.01, std deviation 0.93\n", - "Layer 600: mean 0.02, std deviation 0.92\n", - "Layer 700: mean -0.02, std deviation 0.90\n", - "Layer 800: mean 0.05, std deviation 0.83\n", - "Layer 900: mean 0.02, std deviation 1.00\n" - ] - } - ], - "source": [ - "np.random.seed(42)\n", - "Z = np.random.normal(size=(500, 100)) # 표준화된 입력\n", - "for layer in range(1000):\n", - " W = np.random.normal(size=(100, 100), scale=np.sqrt(1 / 100)) # LeCun 초기화\n", - " Z = selu(np.dot(Z, W))\n", - " means = np.mean(Z, axis=0).mean()\n", - " stds = np.std(Z, axis=0).mean()\n", - " if layer % 100 == 0:\n", - " print(\"Layer {}: mean {:.2f}, std deviation {:.2f}\".format(layer, means, stds))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "쉽게 SELU를 사용할 수 있습니다:" - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 25, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "keras.layers.Dense(10, activation=\"selu\",\n", - " kernel_initializer=\"lecun_normal\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "100개의 은닉층과 SELU 활성화 함수를 사용한 패션 MNIST를 위한 신경망을 만들어 보죠:" - ] - }, - { - "cell_type": "code", - "execution_count": 26, - "metadata": {}, - "outputs": [], - "source": [ - "np.random.seed(42)\n", - "tf.random.set_seed(42)" - ] - }, - { - "cell_type": "code", - "execution_count": 27, - "metadata": {}, - "outputs": [], - "source": [ - "model = keras.models.Sequential()\n", - "model.add(keras.layers.Flatten(input_shape=[28, 28]))\n", - "model.add(keras.layers.Dense(300, activation=\"selu\",\n", - " kernel_initializer=\"lecun_normal\"))\n", - "for layer in range(99):\n", - " model.add(keras.layers.Dense(100, activation=\"selu\",\n", - " kernel_initializer=\"lecun_normal\"))\n", - "model.add(keras.layers.Dense(10, activation=\"softmax\"))" - ] - }, - { - "cell_type": "code", - "execution_count": 28, - "metadata": {}, - "outputs": [], - "source": [ - "model.compile(loss=\"sparse_categorical_crossentropy\",\n", - " optimizer=keras.optimizers.SGD(lr=1e-3),\n", - " metrics=[\"accuracy\"])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "이제 훈련해 보죠. 입력을 평균 0과 표준 편차 1로 바꾸어야 한다는 것을 잊지 마세요:" - ] - }, - { - "cell_type": "code", - "execution_count": 29, - "metadata": {}, - "outputs": [], - "source": [ - "pixel_means = X_train.mean(axis=0, keepdims=True)\n", - "pixel_stds = X_train.std(axis=0, keepdims=True)\n", - "X_train_scaled = (X_train - pixel_means) / pixel_stds\n", - "X_valid_scaled = (X_valid - pixel_means) / pixel_stds\n", - "X_test_scaled = (X_test - pixel_means) / pixel_stds" - ] - }, - { - "cell_type": "code", - "execution_count": 30, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch 1/5\n", - "1719/1719 [==============================] - 32s 19ms/step - loss: 1.4254 - accuracy: 0.4457 - val_loss: 0.9036 - val_accuracy: 0.6758\n", - "Epoch 2/5\n", - "1719/1719 [==============================] - 32s 19ms/step - loss: 0.8673 - accuracy: 0.6903 - val_loss: 0.7675 - val_accuracy: 0.7316\n", - "Epoch 3/5\n", - "1719/1719 [==============================] - 32s 18ms/step - loss: 0.6920 - accuracy: 0.7525 - val_loss: 0.6481 - val_accuracy: 0.7694\n", - "Epoch 4/5\n", - "1719/1719 [==============================] - 32s 18ms/step - loss: 0.6801 - accuracy: 0.7533 - val_loss: 0.6137 - val_accuracy: 0.7852\n", - "Epoch 5/5\n", - "1719/1719 [==============================] - 32s 18ms/step - loss: 0.5883 - accuracy: 0.7845 - val_loss: 0.5503 - val_accuracy: 0.8036\n" - ] - } - ], - "source": [ - "history = model.fit(X_train_scaled, y_train, epochs=5,\n", - " validation_data=(X_valid_scaled, y_valid))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "대신 ReLU 활성화 함수를 사용하면 어떤 일이 일어나는지 확인해 보죠:" - ] - }, - { - "cell_type": "code", - "execution_count": 31, - "metadata": {}, - "outputs": [], - "source": [ - "np.random.seed(42)\n", - "tf.random.set_seed(42)" - ] - }, - { - "cell_type": "code", - "execution_count": 32, - "metadata": {}, - "outputs": [], - "source": [ - "model = keras.models.Sequential()\n", - "model.add(keras.layers.Flatten(input_shape=[28, 28]))\n", - "model.add(keras.layers.Dense(300, activation=\"relu\", kernel_initializer=\"he_normal\"))\n", - "for layer in range(99):\n", - " model.add(keras.layers.Dense(100, activation=\"relu\", kernel_initializer=\"he_normal\"))\n", - "model.add(keras.layers.Dense(10, activation=\"softmax\"))" - ] - }, - { - "cell_type": "code", - "execution_count": 33, - "metadata": {}, - "outputs": [], - "source": [ - "model.compile(loss=\"sparse_categorical_crossentropy\",\n", - " optimizer=keras.optimizers.SGD(lr=1e-3),\n", - " metrics=[\"accuracy\"])" - ] - }, - { - "cell_type": "code", - "execution_count": 34, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch 1/5\n", - "1719/1719 [==============================] - 33s 19ms/step - loss: 1.8139 - accuracy: 0.2607 - val_loss: 1.4307 - val_accuracy: 0.3734\n", - "Epoch 2/5\n", - "1719/1719 [==============================] - 32s 19ms/step - loss: 1.1872 - accuracy: 0.4937 - val_loss: 1.0023 - val_accuracy: 0.5844\n", - "Epoch 3/5\n", - "1719/1719 [==============================] - 32s 19ms/step - loss: 0.9595 - accuracy: 0.6029 - val_loss: 0.8268 - val_accuracy: 0.6698\n", - "Epoch 4/5\n", - "1719/1719 [==============================] - 32s 19ms/step - loss: 0.9046 - accuracy: 0.6324 - val_loss: 0.8080 - val_accuracy: 0.6908\n", - "Epoch 5/5\n", - "1719/1719 [==============================] - 32s 19ms/step - loss: 0.8454 - accuracy: 0.6642 - val_loss: 0.7522 - val_accuracy: 0.7180\n" - ] - } - ], - "source": [ - "history = model.fit(X_train_scaled, y_train, epochs=5,\n", - " validation_data=(X_valid_scaled, y_valid))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "좋지 않군요. 그레이디언트 폭주나 소실 문제가 발생한 것입니다." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# 배치 정규화" - ] - }, - { - "cell_type": "code", - "execution_count": 35, - "metadata": {}, - "outputs": [], - "source": [ - "model = keras.models.Sequential([\n", - " keras.layers.Flatten(input_shape=[28, 28]),\n", - " keras.layers.BatchNormalization(),\n", - " keras.layers.Dense(300, activation=\"relu\"),\n", - " keras.layers.BatchNormalization(),\n", - " keras.layers.Dense(100, activation=\"relu\"),\n", - " keras.layers.BatchNormalization(),\n", - " keras.layers.Dense(10, activation=\"softmax\")\n", - "])" - ] - }, - { - "cell_type": "code", - "execution_count": 36, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Model: \"sequential_4\"\n", - "_________________________________________________________________\n", - "Layer (type) Output Shape Param # \n", - "=================================================================\n", - "flatten_4 (Flatten) (None, 784) 0 \n", - "_________________________________________________________________\n", - "batch_normalization (BatchNo (None, 784) 3136 \n", - "_________________________________________________________________\n", - "dense_212 (Dense) (None, 300) 235500 \n", - "_________________________________________________________________\n", - "batch_normalization_1 (Batch (None, 300) 1200 \n", - "_________________________________________________________________\n", - "dense_213 (Dense) (None, 100) 30100 \n", - "_________________________________________________________________\n", - "batch_normalization_2 (Batch (None, 100) 400 \n", - "_________________________________________________________________\n", - "dense_214 (Dense) (None, 10) 1010 \n", - "=================================================================\n", - "Total params: 271,346\n", - "Trainable params: 268,978\n", - "Non-trainable params: 2,368\n", - "_________________________________________________________________\n" - ] - } - ], - "source": [ - "model.summary()" - ] - }, - { - "cell_type": "code", - "execution_count": 37, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[('batch_normalization/gamma:0', True),\n", - " ('batch_normalization/beta:0', True),\n", - " ('batch_normalization/moving_mean:0', False),\n", - " ('batch_normalization/moving_variance:0', False)]" - ] - }, - "execution_count": 37, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "bn1 = model.layers[1]\n", - "[(var.name, var.trainable) for var in bn1.variables]" - ] - }, - { - "cell_type": "code", - "execution_count": 38, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "WARNING:tensorflow:From :1: Layer.updates (from tensorflow.python.keras.engine.base_layer) is deprecated and will be removed in a future version.\n", - "Instructions for updating:\n", - "This property should not be used in TensorFlow 2.0, as updates are applied automatically.\n" - ] - }, - { - "data": { - "text/plain": [ - "[]" - ] - }, - "execution_count": 38, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "bn1.updates" - ] - }, - { - "cell_type": "code", - "execution_count": 39, - "metadata": {}, - "outputs": [], - "source": [ - "model.compile(loss=\"sparse_categorical_crossentropy\",\n", - " optimizer=keras.optimizers.SGD(lr=1e-3),\n", - " metrics=[\"accuracy\"])" - ] - }, - { - "cell_type": "code", - "execution_count": 40, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch 1/10\n", - "1719/1719 [==============================] - 8s 5ms/step - loss: 0.8750 - accuracy: 0.7123 - val_loss: 0.5525 - val_accuracy: 0.8228\n", - "Epoch 2/10\n", - "1719/1719 [==============================] - 8s 5ms/step - loss: 0.5753 - accuracy: 0.8031 - val_loss: 0.4724 - val_accuracy: 0.8476\n", - "Epoch 3/10\n", - "1719/1719 [==============================] - 8s 5ms/step - loss: 0.5189 - accuracy: 0.8205 - val_loss: 0.4375 - val_accuracy: 0.8546\n", - "Epoch 4/10\n", - "1719/1719 [==============================] - 8s 5ms/step - loss: 0.4827 - accuracy: 0.8322 - val_loss: 0.4152 - val_accuracy: 0.8594\n", - "Epoch 5/10\n", - "1719/1719 [==============================] - 8s 5ms/step - loss: 0.4565 - accuracy: 0.8408 - val_loss: 0.3997 - val_accuracy: 0.8636\n", - "Epoch 6/10\n", - "1719/1719 [==============================] - 8s 5ms/step - loss: 0.4398 - accuracy: 0.8472 - val_loss: 0.3867 - val_accuracy: 0.8700\n", - "Epoch 7/10\n", - "1719/1719 [==============================] - 8s 5ms/step - loss: 0.4242 - accuracy: 0.8511 - val_loss: 0.3762 - val_accuracy: 0.8706\n", - "Epoch 8/10\n", - "1719/1719 [==============================] - 8s 5ms/step - loss: 0.4144 - accuracy: 0.8541 - val_loss: 0.3710 - val_accuracy: 0.8736\n", - "Epoch 9/10\n", - "1719/1719 [==============================] - 8s 5ms/step - loss: 0.4024 - accuracy: 0.8581 - val_loss: 0.3630 - val_accuracy: 0.8756\n", - "Epoch 10/10\n", - "1719/1719 [==============================] - 8s 5ms/step - loss: 0.3915 - accuracy: 0.8623 - val_loss: 0.3572 - val_accuracy: 0.8754\n" - ] - } - ], - "source": [ - "history = model.fit(X_train, y_train, epochs=10,\n", - " validation_data=(X_valid, y_valid))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "이따금 활성화 함수전에 BN을 적용해도 잘 동작합니다(여기에는 논란의 여지가 있습니다). 또한 `BatchNormalization` 층 이전의 층은 편향을 위한 항이 필요 없습니다. `BatchNormalization` 층이 이를 무효화하기 때문입니다. 따라서 필요 없는 파라미터이므로 `use_bias=False`를 지정하여 층을 만들 수 있습니다:" - ] - }, - { - "cell_type": "code", - "execution_count": 41, - "metadata": {}, - "outputs": [], - "source": [ - "model = keras.models.Sequential([\n", - " keras.layers.Flatten(input_shape=[28, 28]),\n", - " keras.layers.BatchNormalization(),\n", - " keras.layers.Dense(300, use_bias=False),\n", - " keras.layers.BatchNormalization(),\n", - " keras.layers.Activation(\"relu\"),\n", - " keras.layers.Dense(100, use_bias=False),\n", - " keras.layers.BatchNormalization(),\n", - " keras.layers.Activation(\"relu\"),\n", - " keras.layers.Dense(10, activation=\"softmax\")\n", - "])" - ] - }, - { - "cell_type": "code", - "execution_count": 42, - "metadata": {}, - "outputs": [], - "source": [ - "model.compile(loss=\"sparse_categorical_crossentropy\",\n", - " optimizer=keras.optimizers.SGD(lr=1e-3),\n", - " metrics=[\"accuracy\"])" - ] - }, - { - "cell_type": "code", - "execution_count": 43, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch 1/10\n", - "1719/1719 [==============================] - 8s 5ms/step - loss: 1.0317 - accuracy: 0.6757 - val_loss: 0.6767 - val_accuracy: 0.7816\n", - "Epoch 2/10\n", - "1719/1719 [==============================] - 8s 5ms/step - loss: 0.6790 - accuracy: 0.7792 - val_loss: 0.5566 - val_accuracy: 0.8180\n", - "Epoch 3/10\n", - "1719/1719 [==============================] - 8s 5ms/step - loss: 0.5960 - accuracy: 0.8037 - val_loss: 0.5007 - val_accuracy: 0.8360\n", - "Epoch 4/10\n", - "1719/1719 [==============================] - 8s 5ms/step - loss: 0.5447 - accuracy: 0.8192 - val_loss: 0.4666 - val_accuracy: 0.8448\n", - "Epoch 5/10\n", - "1719/1719 [==============================] - 8s 5ms/step - loss: 0.5109 - accuracy: 0.8279 - val_loss: 0.4434 - val_accuracy: 0.8534\n", - "Epoch 6/10\n", - "1719/1719 [==============================] - 8s 5ms/step - loss: 0.4898 - accuracy: 0.8336 - val_loss: 0.4263 - val_accuracy: 0.8550\n", - "Epoch 7/10\n", - "1719/1719 [==============================] - 8s 5ms/step - loss: 0.4712 - accuracy: 0.8397 - val_loss: 0.4130 - val_accuracy: 0.8572\n", - "Epoch 8/10\n", - "1719/1719 [==============================] - 8s 5ms/step - loss: 0.4560 - accuracy: 0.8441 - val_loss: 0.4035 - val_accuracy: 0.8606\n", - "Epoch 9/10\n", - "1719/1719 [==============================] - 8s 5ms/step - loss: 0.4441 - accuracy: 0.8473 - val_loss: 0.3943 - val_accuracy: 0.8642\n", - "Epoch 10/10\n", - "1719/1719 [==============================] - 8s 5ms/step - loss: 0.4332 - accuracy: 0.8505 - val_loss: 0.3874 - val_accuracy: 0.8662\n" - ] - } - ], - "source": [ - "history = model.fit(X_train, y_train, epochs=10,\n", - " validation_data=(X_valid, y_valid))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 그레이디언트 클리핑" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "모든 케라스 옵티마이저는 `clipnorm`이나 `clipvalue` 매개변수를 지원합니다:" - ] - }, - { - "cell_type": "code", - "execution_count": 44, - "metadata": {}, - "outputs": [], - "source": [ - "optimizer = keras.optimizers.SGD(clipvalue=1.0)" - ] - }, - { - "cell_type": "code", - "execution_count": 45, - "metadata": {}, - "outputs": [], - "source": [ - "optimizer = keras.optimizers.SGD(clipnorm=1.0)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 사전 훈련된 층 재사용하기" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 케라스 모델 재사용하기" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "패션 MNIST 훈련 세트를 두 개로 나누어 보죠:\n", - "* `X_train_A`: 샌달과 셔츠(클래스 5와 6)을 제외한 모든 이미지\n", - "* `X_train_B`: 샌달과 셔츠 이미지 중 처음 200개만 가진 작은 훈련 세트\n", - "\n", - "검증 세트와 테스트 세트도 이렇게 나눕니다. 하지만 이미지 개수는 제한하지 않습니다.\n", - "\n", - "A 세트(8개의 클래스를 가진 분류 문제)에서 모델을 훈련하고 이를 재사용하여 B 세트(이진 분류)를 해결해 보겠습니다. A 작업에서 B 작업으로 약간의 지식이 전달되기를 기대합니다. 왜냐하면 A 세트의 클래스(스니커즈, 앵클 부츠, 코트, 티셔츠 등)가 B 세트에 있는 클래스(샌달과 셔츠)와 조금 비슷하기 때문입니다. 하지만 `Dense` 층을 사용하기 때문에 동일한 위치에 나타난 패턴만 재사용할 수 있습니다(반대로 합성곱 층은 훨씬 많은 정보를 전송합니다. 학습한 패턴을 이미지의 어느 위치에서나 감지할 수 있기 때문입니다. CNN 장에서 자세히 알아 보겠습니다)." - ] - }, - { - "cell_type": "code", - "execution_count": 46, - "metadata": {}, - "outputs": [], - "source": [ - "def split_dataset(X, y):\n", - " y_5_or_6 = (y == 5) | (y == 6) # sandals or shirts\n", - " y_A = y[~y_5_or_6]\n", - " y_A[y_A > 6] -= 2 # class indices 7, 8, 9 should be moved to 5, 6, 7\n", - " y_B = (y[y_5_or_6] == 6).astype(np.float32) # binary classification task: is it a shirt (class 6)?\n", - " return ((X[~y_5_or_6], y_A),\n", - " (X[y_5_or_6], y_B))\n", - "\n", - "(X_train_A, y_train_A), (X_train_B, y_train_B) = split_dataset(X_train, y_train)\n", - "(X_valid_A, y_valid_A), (X_valid_B, y_valid_B) = split_dataset(X_valid, y_valid)\n", - "(X_test_A, y_test_A), (X_test_B, y_test_B) = split_dataset(X_test, y_test)\n", - "X_train_B = X_train_B[:200]\n", - "y_train_B = y_train_B[:200]" - ] - }, - { - "cell_type": "code", - "execution_count": 47, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(43986, 28, 28)" - ] - }, - "execution_count": 47, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "X_train_A.shape" - ] - }, - { - "cell_type": "code", - "execution_count": 48, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(200, 28, 28)" - ] - }, - "execution_count": 48, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "X_train_B.shape" - ] - }, - { - "cell_type": "code", - "execution_count": 49, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([4, 0, 5, 7, 7, 7, 4, 4, 3, 4, 0, 1, 6, 3, 4, 3, 2, 6, 5, 3, 4, 5,\n", - " 1, 3, 4, 2, 0, 6, 7, 1], dtype=uint8)" - ] - }, - "execution_count": 49, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "y_train_A[:30]" - ] - }, - { - "cell_type": "code", - "execution_count": 50, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([1., 1., 0., 0., 0., 0., 1., 1., 1., 0., 0., 1., 1., 0., 0., 0., 0.,\n", - " 0., 0., 1., 1., 0., 0., 1., 1., 0., 1., 1., 1., 1.], dtype=float32)" - ] - }, - "execution_count": 50, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "y_train_B[:30]" - ] - }, - { - "cell_type": "code", - "execution_count": 51, - "metadata": {}, - "outputs": [], - "source": [ - "tf.random.set_seed(42)\n", - "np.random.seed(42)" - ] - }, - { - "cell_type": "code", - "execution_count": 52, - "metadata": {}, - "outputs": [], - "source": [ - "model_A = keras.models.Sequential()\n", - "model_A.add(keras.layers.Flatten(input_shape=[28, 28]))\n", - "for n_hidden in (300, 100, 50, 50, 50):\n", - " model_A.add(keras.layers.Dense(n_hidden, activation=\"selu\"))\n", - "model_A.add(keras.layers.Dense(8, activation=\"softmax\"))" - ] - }, - { - "cell_type": "code", - "execution_count": 53, - "metadata": {}, - "outputs": [], - "source": [ - "model_A.compile(loss=\"sparse_categorical_crossentropy\",\n", - " optimizer=keras.optimizers.SGD(lr=1e-3),\n", - " metrics=[\"accuracy\"])" - ] - }, - { - "cell_type": "code", - "execution_count": 54, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch 1/20\n", - "1375/1375 [==============================] - 5s 3ms/step - loss: 0.5927 - accuracy: 0.8103 - val_loss: 0.3894 - val_accuracy: 0.8662\n", - "Epoch 2/20\n", - "1375/1375 [==============================] - 4s 3ms/step - loss: 0.3523 - accuracy: 0.8785 - val_loss: 0.3286 - val_accuracy: 0.8834\n", - "Epoch 3/20\n", - "1375/1375 [==============================] - 4s 3ms/step - loss: 0.3170 - accuracy: 0.8896 - val_loss: 0.3011 - val_accuracy: 0.8984\n", - "Epoch 4/20\n", - "1375/1375 [==============================] - 4s 3ms/step - loss: 0.2974 - accuracy: 0.8975 - val_loss: 0.2895 - val_accuracy: 0.9018\n", - "Epoch 5/20\n", - "1375/1375 [==============================] - 4s 3ms/step - loss: 0.2835 - accuracy: 0.9020 - val_loss: 0.2774 - val_accuracy: 0.9071\n", - "Epoch 6/20\n", - "1375/1375 [==============================] - 5s 3ms/step - loss: 0.2730 - accuracy: 0.9063 - val_loss: 0.2736 - val_accuracy: 0.9061\n", - "Epoch 7/20\n", - "1375/1375 [==============================] - 4s 3ms/step - loss: 0.2642 - accuracy: 0.9092 - val_loss: 0.2717 - val_accuracy: 0.9083\n", - "Epoch 8/20\n", - "1375/1375 [==============================] - 4s 3ms/step - loss: 0.2573 - accuracy: 0.9126 - val_loss: 0.2590 - val_accuracy: 0.9141\n", - "Epoch 9/20\n", - "1375/1375 [==============================] - 4s 3ms/step - loss: 0.2519 - accuracy: 0.9137 - val_loss: 0.2562 - val_accuracy: 0.9145\n", - "Epoch 10/20\n", - "1375/1375 [==============================] - 4s 3ms/step - loss: 0.2469 - accuracy: 0.9155 - val_loss: 0.2542 - val_accuracy: 0.9155\n", - "Epoch 11/20\n", - "1375/1375 [==============================] - 4s 3ms/step - loss: 0.2423 - accuracy: 0.9178 - val_loss: 0.2494 - val_accuracy: 0.9163\n", - "Epoch 12/20\n", - "1375/1375 [==============================] - 4s 3ms/step - loss: 0.2383 - accuracy: 0.9188 - val_loss: 0.2512 - val_accuracy: 0.9128\n", - "Epoch 13/20\n", - "1375/1375 [==============================] - 4s 3ms/step - loss: 0.2351 - accuracy: 0.9198 - val_loss: 0.2448 - val_accuracy: 0.9158\n", - "Epoch 14/20\n", - "1375/1375 [==============================] - 4s 3ms/step - loss: 0.2316 - accuracy: 0.9210 - val_loss: 0.2416 - val_accuracy: 0.9175\n", - "Epoch 15/20\n", - "1375/1375 [==============================] - 4s 3ms/step - loss: 0.2288 - accuracy: 0.9213 - val_loss: 0.2451 - val_accuracy: 0.9200\n", - "Epoch 16/20\n", - "1375/1375 [==============================] - 4s 3ms/step - loss: 0.2255 - accuracy: 0.9223 - val_loss: 0.2386 - val_accuracy: 0.9200\n", - "Epoch 17/20\n", - "1375/1375 [==============================] - 4s 3ms/step - loss: 0.2231 - accuracy: 0.9232 - val_loss: 0.2411 - val_accuracy: 0.9178\n", - "Epoch 18/20\n", - "1375/1375 [==============================] - 4s 3ms/step - loss: 0.2201 - accuracy: 0.9245 - val_loss: 0.2428 - val_accuracy: 0.9150\n", - "Epoch 19/20\n", - "1375/1375 [==============================] - 4s 3ms/step - loss: 0.2178 - accuracy: 0.9249 - val_loss: 0.2329 - val_accuracy: 0.9205\n", - "Epoch 20/20\n", - "1375/1375 [==============================] - 4s 3ms/step - loss: 0.2156 - accuracy: 0.9261 - val_loss: 0.2331 - val_accuracy: 0.9208\n" - ] - } - ], - "source": [ - "history = model_A.fit(X_train_A, y_train_A, epochs=20,\n", - " validation_data=(X_valid_A, y_valid_A))" - ] - }, - { - "cell_type": "code", - "execution_count": 55, - "metadata": {}, - "outputs": [], - "source": [ - "model_A.save(\"my_model_A.h5\")" - ] - }, - { - "cell_type": "code", - "execution_count": 56, - "metadata": {}, - "outputs": [], - "source": [ - "model_B = keras.models.Sequential()\n", - "model_B.add(keras.layers.Flatten(input_shape=[28, 28]))\n", - "for n_hidden in (300, 100, 50, 50, 50):\n", - " model_B.add(keras.layers.Dense(n_hidden, activation=\"selu\"))\n", - "model_B.add(keras.layers.Dense(1, activation=\"sigmoid\"))" - ] - }, - { - "cell_type": "code", - "execution_count": 57, - "metadata": {}, - "outputs": [], - "source": [ - "model_B.compile(loss=\"binary_crossentropy\",\n", - " optimizer=keras.optimizers.SGD(lr=1e-3),\n", - " metrics=[\"accuracy\"])" - ] - }, - { - "cell_type": "code", - "execution_count": 58, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch 1/20\n", - "7/7 [==============================] - 0s 42ms/step - loss: 0.9573 - accuracy: 0.4650 - val_loss: 0.6314 - val_accuracy: 0.6004\n", - "Epoch 2/20\n", - "7/7 [==============================] - 0s 16ms/step - loss: 0.5692 - accuracy: 0.7450 - val_loss: 0.4784 - val_accuracy: 0.8529\n", - "Epoch 3/20\n", - "7/7 [==============================] - 0s 16ms/step - loss: 0.4503 - accuracy: 0.8650 - val_loss: 0.4102 - val_accuracy: 0.8945\n", - "Epoch 4/20\n", - "7/7 [==============================] - 0s 16ms/step - loss: 0.3879 - accuracy: 0.8950 - val_loss: 0.3647 - val_accuracy: 0.9178\n", - "Epoch 5/20\n", - "7/7 [==============================] - 0s 17ms/step - loss: 0.3435 - accuracy: 0.9250 - val_loss: 0.3300 - val_accuracy: 0.9320\n", - "Epoch 6/20\n", - "7/7 [==============================] - 0s 16ms/step - loss: 0.3081 - accuracy: 0.9300 - val_loss: 0.3019 - val_accuracy: 0.9402\n", - "Epoch 7/20\n", - "7/7 [==============================] - 0s 15ms/step - loss: 0.2800 - accuracy: 0.9350 - val_loss: 0.2804 - val_accuracy: 0.9422\n", - "Epoch 8/20\n", - "7/7 [==============================] - 0s 16ms/step - loss: 0.2564 - accuracy: 0.9450 - val_loss: 0.2606 - val_accuracy: 0.9473\n", - "Epoch 9/20\n", - "7/7 [==============================] - 0s 17ms/step - loss: 0.2362 - accuracy: 0.9550 - val_loss: 0.2428 - val_accuracy: 0.9523\n", - "Epoch 10/20\n", - "7/7 [==============================] - 0s 16ms/step - loss: 0.2188 - accuracy: 0.9600 - val_loss: 0.2281 - val_accuracy: 0.9544\n", - "Epoch 11/20\n", - "7/7 [==============================] - 0s 18ms/step - loss: 0.2036 - accuracy: 0.9700 - val_loss: 0.2150 - val_accuracy: 0.9584\n", - "Epoch 12/20\n", - "7/7 [==============================] - 0s 16ms/step - loss: 0.1898 - accuracy: 0.9700 - val_loss: 0.2036 - val_accuracy: 0.9584\n", - "Epoch 13/20\n", - "7/7 [==============================] - 0s 17ms/step - loss: 0.1773 - accuracy: 0.9750 - val_loss: 0.1931 - val_accuracy: 0.9615\n", - "Epoch 14/20\n", - "7/7 [==============================] - 0s 16ms/step - loss: 0.1668 - accuracy: 0.9800 - val_loss: 0.1838 - val_accuracy: 0.9635\n", - "Epoch 15/20\n", - "7/7 [==============================] - 0s 16ms/step - loss: 0.1570 - accuracy: 0.9900 - val_loss: 0.1746 - val_accuracy: 0.9686\n", - "Epoch 16/20\n", - "7/7 [==============================] - 0s 16ms/step - loss: 0.1481 - accuracy: 0.9900 - val_loss: 0.1674 - val_accuracy: 0.9686\n", - "Epoch 17/20\n", - "7/7 [==============================] - 0s 16ms/step - loss: 0.1406 - accuracy: 0.9900 - val_loss: 0.1604 - val_accuracy: 0.9706\n", - "Epoch 18/20\n", - "7/7 [==============================] - 0s 16ms/step - loss: 0.1334 - accuracy: 0.9900 - val_loss: 0.1539 - val_accuracy: 0.9706\n", - "Epoch 19/20\n", - "7/7 [==============================] - 0s 18ms/step - loss: 0.1268 - accuracy: 0.9900 - val_loss: 0.1482 - val_accuracy: 0.9716\n", - "Epoch 20/20\n", - "7/7 [==============================] - 0s 16ms/step - loss: 0.1208 - accuracy: 0.9900 - val_loss: 0.1431 - val_accuracy: 0.9716\n" - ] - } - ], - "source": [ - "history = model_B.fit(X_train_B, y_train_B, epochs=20,\n", - " validation_data=(X_valid_B, y_valid_B))" - ] - }, - { - "cell_type": "code", - "execution_count": 59, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Model: \"sequential_5\"\n", - "_________________________________________________________________\n", - "Layer (type) Output Shape Param # \n", - "=================================================================\n", - "flatten_5 (Flatten) (None, 784) 0 \n", - "_________________________________________________________________\n", - "batch_normalization_3 (Batch (None, 784) 3136 \n", - "_________________________________________________________________\n", - "dense_215 (Dense) (None, 300) 235200 \n", - "_________________________________________________________________\n", - "batch_normalization_4 (Batch (None, 300) 1200 \n", - "_________________________________________________________________\n", - "activation (Activation) (None, 300) 0 \n", - "_________________________________________________________________\n", - "dense_216 (Dense) (None, 100) 30000 \n", - "_________________________________________________________________\n", - "batch_normalization_5 (Batch (None, 100) 400 \n", - "_________________________________________________________________\n", - "activation_1 (Activation) (None, 100) 0 \n", - "_________________________________________________________________\n", - "dense_217 (Dense) (None, 10) 1010 \n", - "=================================================================\n", - "Total params: 270,946\n", - "Trainable params: 268,578\n", - "Non-trainable params: 2,368\n", - "_________________________________________________________________\n" - ] - } - ], - "source": [ - "model.summary()" - ] - }, - { - "cell_type": "code", - "execution_count": 60, - "metadata": {}, - "outputs": [], - "source": [ - "model_A = keras.models.load_model(\"my_model_A.h5\")\n", - "model_B_on_A = keras.models.Sequential(model_A.layers[:-1])\n", - "model_B_on_A.add(keras.layers.Dense(1, activation=\"sigmoid\"))" - ] - }, - { - "cell_type": "code", - "execution_count": 61, - "metadata": {}, - "outputs": [], - "source": [ - "model_A_clone = keras.models.clone_model(model_A)\n", - "model_A_clone.set_weights(model_A.get_weights())" - ] - }, - { - "cell_type": "code", - "execution_count": 62, - "metadata": {}, - "outputs": [], - "source": [ - "for layer in model_B_on_A.layers[:-1]:\n", - " layer.trainable = False\n", - "\n", - "model_B_on_A.compile(loss=\"binary_crossentropy\",\n", - " optimizer=keras.optimizers.SGD(lr=1e-3),\n", - " metrics=[\"accuracy\"])" - ] - }, - { - "cell_type": "code", - "execution_count": 63, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch 1/4\n", - "7/7 [==============================] - 0s 39ms/step - loss: 0.5803 - accuracy: 0.6500 - val_loss: 0.5842 - val_accuracy: 0.6329\n", - "Epoch 2/4\n", - "7/7 [==============================] - 0s 16ms/step - loss: 0.5436 - accuracy: 0.6800 - val_loss: 0.5466 - val_accuracy: 0.6724\n", - "Epoch 3/4\n", - "7/7 [==============================] - 0s 16ms/step - loss: 0.5066 - accuracy: 0.7300 - val_loss: 0.5144 - val_accuracy: 0.7099\n", - "Epoch 4/4\n", - "7/7 [==============================] - 0s 16ms/step - loss: 0.4749 - accuracy: 0.7500 - val_loss: 0.4855 - val_accuracy: 0.7312\n", - "Epoch 1/16\n", - "7/7 [==============================] - 0s 41ms/step - loss: 0.3964 - accuracy: 0.8100 - val_loss: 0.3461 - val_accuracy: 0.8631\n", - "Epoch 2/16\n", - "7/7 [==============================] - 0s 15ms/step - loss: 0.2799 - accuracy: 0.9350 - val_loss: 0.2603 - val_accuracy: 0.9260\n", - "Epoch 3/16\n", - "7/7 [==============================] - 0s 16ms/step - loss: 0.2083 - accuracy: 0.9650 - val_loss: 0.2110 - val_accuracy: 0.9544\n", - "Epoch 4/16\n", - "7/7 [==============================] - 0s 16ms/step - loss: 0.1670 - accuracy: 0.9800 - val_loss: 0.1790 - val_accuracy: 0.9696\n", - "Epoch 5/16\n", - "7/7 [==============================] - 0s 18ms/step - loss: 0.1397 - accuracy: 0.9800 - val_loss: 0.1562 - val_accuracy: 0.9757\n", - "Epoch 6/16\n", - "7/7 [==============================] - 0s 16ms/step - loss: 0.1198 - accuracy: 0.9950 - val_loss: 0.1394 - val_accuracy: 0.9807\n", - "Epoch 7/16\n", - "7/7 [==============================] - 0s 16ms/step - loss: 0.1051 - accuracy: 0.9950 - val_loss: 0.1267 - val_accuracy: 0.9838\n", - "Epoch 8/16\n", - "7/7 [==============================] - 0s 16ms/step - loss: 0.0938 - accuracy: 0.9950 - val_loss: 0.1164 - val_accuracy: 0.9858\n", - "Epoch 9/16\n", - "7/7 [==============================] - 0s 15ms/step - loss: 0.0848 - accuracy: 1.0000 - val_loss: 0.1067 - val_accuracy: 0.9888\n", - "Epoch 10/16\n", - "7/7 [==============================] - 0s 16ms/step - loss: 0.0763 - accuracy: 1.0000 - val_loss: 0.1001 - val_accuracy: 0.9899\n", - "Epoch 11/16\n", - "7/7 [==============================] - 0s 15ms/step - loss: 0.0705 - accuracy: 1.0000 - val_loss: 0.0941 - val_accuracy: 0.9899\n", - "Epoch 12/16\n", - "7/7 [==============================] - 0s 15ms/step - loss: 0.0650 - accuracy: 1.0000 - val_loss: 0.0889 - val_accuracy: 0.9899\n", - "Epoch 13/16\n", - "7/7 [==============================] - 0s 17ms/step - loss: 0.0603 - accuracy: 1.0000 - val_loss: 0.0840 - val_accuracy: 0.9899\n", - "Epoch 14/16\n", - "7/7 [==============================] - 0s 18ms/step - loss: 0.0560 - accuracy: 1.0000 - val_loss: 0.0804 - val_accuracy: 0.9899\n", - "Epoch 15/16\n", - "7/7 [==============================] - 0s 18ms/step - loss: 0.0526 - accuracy: 1.0000 - val_loss: 0.0770 - val_accuracy: 0.9899\n", - "Epoch 16/16\n", - "7/7 [==============================] - 0s 18ms/step - loss: 0.0497 - accuracy: 1.0000 - val_loss: 0.0740 - val_accuracy: 0.9899\n" - ] - } - ], - "source": [ - "history = model_B_on_A.fit(X_train_B, y_train_B, epochs=4,\n", - " validation_data=(X_valid_B, y_valid_B))\n", - "\n", - "for layer in model_B_on_A.layers[:-1]:\n", - " layer.trainable = True\n", - "\n", - "model_B_on_A.compile(loss=\"binary_crossentropy\",\n", - " optimizer=keras.optimizers.SGD(lr=1e-3),\n", - " metrics=[\"accuracy\"])\n", - "history = model_B_on_A.fit(X_train_B, y_train_B, epochs=16,\n", - " validation_data=(X_valid_B, y_valid_B))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "마지막 점수는 어떤가요?" - ] - }, - { - "cell_type": "code", - "execution_count": 64, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "63/63 [==============================] - 0s 2ms/step - loss: 0.1408 - accuracy: 0.9705\n" - ] - }, - { - "data": { - "text/plain": [ - "[0.1408407837152481, 0.9704999923706055]" - ] - }, - "execution_count": 64, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "model_B.evaluate(X_test_B, y_test_B)" - ] - }, - { - "cell_type": "code", - "execution_count": 65, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "63/63 [==============================] - 0s 2ms/step - loss: 0.0683 - accuracy: 0.9930\n" - ] - }, - { - "data": { - "text/plain": [ - "[0.0683005154132843, 0.9929999709129333]" - ] - }, - "execution_count": 65, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "model_B_on_A.evaluate(X_test_B, y_test_B)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "훌륭하네요! 꽤 많은 정보를 전달했습니다: 오차율이 4배나 줄었네요!" - ] - }, - { - "cell_type": "code", - "execution_count": 66, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "4.066666666666663" - ] - }, - "execution_count": 66, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "(100 - 96.95) / (100 - 99.25)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# 고속 옵티마이저" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 모멘텀 옵티마이저" - ] - }, - { - "cell_type": "code", - "execution_count": 67, - "metadata": {}, - "outputs": [], - "source": [ - "optimizer = keras.optimizers.SGD(lr=0.001, momentum=0.9)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 네스테로프 가속 경사" - ] - }, - { - "cell_type": "code", - "execution_count": 68, - "metadata": {}, - "outputs": [], - "source": [ - "optimizer = keras.optimizers.SGD(lr=0.001, momentum=0.9, nesterov=True)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## AdaGrad" - ] - }, - { - "cell_type": "code", - "execution_count": 69, - "metadata": {}, - "outputs": [], - "source": [ - "optimizer = keras.optimizers.Adagrad(lr=0.001)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## RMSProp" - ] - }, - { - "cell_type": "code", - "execution_count": 70, - "metadata": {}, - "outputs": [], - "source": [ - "optimizer = keras.optimizers.RMSprop(lr=0.001, rho=0.9)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Adam 옵티마이저" - ] - }, - { - "cell_type": "code", - "execution_count": 71, - "metadata": {}, - "outputs": [], - "source": [ - "optimizer = keras.optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Adamax 옵티마이저" - ] - }, - { - "cell_type": "code", - "execution_count": 72, - "metadata": {}, - "outputs": [], - "source": [ - "optimizer = keras.optimizers.Adamax(lr=0.001, beta_1=0.9, beta_2=0.999)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Nadam 옵티마이저" - ] - }, - { - "cell_type": "code", - "execution_count": 73, - "metadata": {}, - "outputs": [], - "source": [ - "optimizer = keras.optimizers.Nadam(lr=0.001, beta_1=0.9, beta_2=0.999)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 학습률 스케줄링" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 거듭제곱 스케줄링" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "```lr = lr0 / (1 + steps / s)**c```\n", - "* 케라스는 `c=1`과 `s = 1 / decay`을 사용합니다" - ] - }, - { - "cell_type": "code", - "execution_count": 74, - "metadata": {}, - "outputs": [], - "source": [ - "optimizer = keras.optimizers.SGD(lr=0.01, decay=1e-4)" - ] - }, - { - "cell_type": "code", - "execution_count": 75, - "metadata": {}, - "outputs": [], - "source": [ - "model = keras.models.Sequential([\n", - " keras.layers.Flatten(input_shape=[28, 28]),\n", - " keras.layers.Dense(300, activation=\"selu\", kernel_initializer=\"lecun_normal\"),\n", - " keras.layers.Dense(100, activation=\"selu\", kernel_initializer=\"lecun_normal\"),\n", - " keras.layers.Dense(10, activation=\"softmax\")\n", - "])\n", - "model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=optimizer, metrics=[\"accuracy\"])" - ] - }, - { - "cell_type": "code", - "execution_count": 76, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch 1/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.4855 - accuracy: 0.8303 - val_loss: 0.4029 - val_accuracy: 0.8604\n", - "Epoch 2/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.3781 - accuracy: 0.8658 - val_loss: 0.3716 - val_accuracy: 0.8720\n", - "Epoch 3/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.3457 - accuracy: 0.8770 - val_loss: 0.3749 - val_accuracy: 0.8742\n", - "Epoch 4/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.3250 - accuracy: 0.8830 - val_loss: 0.3501 - val_accuracy: 0.8800\n", - "Epoch 5/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.3101 - accuracy: 0.8892 - val_loss: 0.3447 - val_accuracy: 0.8794\n", - "Epoch 6/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.2966 - accuracy: 0.8935 - val_loss: 0.3412 - val_accuracy: 0.8828\n", - "Epoch 7/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.2867 - accuracy: 0.8974 - val_loss: 0.3355 - val_accuracy: 0.8864\n", - "Epoch 8/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.2777 - accuracy: 0.9010 - val_loss: 0.3408 - val_accuracy: 0.8834\n", - "Epoch 9/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.2698 - accuracy: 0.9020 - val_loss: 0.3289 - val_accuracy: 0.8880\n", - "Epoch 10/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.2628 - accuracy: 0.9048 - val_loss: 0.3259 - val_accuracy: 0.8880\n", - "Epoch 11/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.2571 - accuracy: 0.9080 - val_loss: 0.3265 - val_accuracy: 0.8876\n", - "Epoch 12/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.2512 - accuracy: 0.9098 - val_loss: 0.3331 - val_accuracy: 0.8830\n", - "Epoch 13/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.2461 - accuracy: 0.9127 - val_loss: 0.3253 - val_accuracy: 0.8892\n", - "Epoch 14/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.2422 - accuracy: 0.9135 - val_loss: 0.3286 - val_accuracy: 0.8900\n", - "Epoch 15/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.2374 - accuracy: 0.9152 - val_loss: 0.3241 - val_accuracy: 0.8880\n", - "Epoch 16/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.2335 - accuracy: 0.9170 - val_loss: 0.3202 - val_accuracy: 0.8904\n", - "Epoch 17/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.2299 - accuracy: 0.9181 - val_loss: 0.3233 - val_accuracy: 0.8912\n", - "Epoch 18/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.2262 - accuracy: 0.9200 - val_loss: 0.3188 - val_accuracy: 0.8932\n", - "Epoch 19/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.2232 - accuracy: 0.9210 - val_loss: 0.3227 - val_accuracy: 0.8902\n", - "Epoch 20/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.2199 - accuracy: 0.9221 - val_loss: 0.3207 - val_accuracy: 0.8912\n", - "Epoch 21/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.2168 - accuracy: 0.9236 - val_loss: 0.3206 - val_accuracy: 0.8918\n", - "Epoch 22/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.2142 - accuracy: 0.9245 - val_loss: 0.3179 - val_accuracy: 0.8942\n", - "Epoch 23/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.2116 - accuracy: 0.9250 - val_loss: 0.3193 - val_accuracy: 0.8908\n", - "Epoch 24/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.2088 - accuracy: 0.9266 - val_loss: 0.3212 - val_accuracy: 0.8886\n", - "Epoch 25/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.2068 - accuracy: 0.9266 - val_loss: 0.3211 - val_accuracy: 0.8926\n" - ] - } - ], - "source": [ - "n_epochs = 25\n", - "history = model.fit(X_train_scaled, y_train, epochs=n_epochs,\n", - " validation_data=(X_valid_scaled, y_valid))" - ] - }, - { - "cell_type": "code", - "execution_count": 77, - "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], - "source": [ - "learning_rate = 0.01\n", - "decay = 1e-4\n", - "batch_size = 32\n", - "n_steps_per_epoch = len(X_train) // batch_size\n", - "epochs = np.arange(n_epochs)\n", - "lrs = learning_rate / (1 + decay * epochs * n_steps_per_epoch)\n", - "\n", - "plt.plot(epochs, lrs, \"o-\")\n", - "plt.axis([0, n_epochs - 1, 0, 0.01])\n", - "plt.xlabel(\"Epoch\")\n", - "plt.ylabel(\"Learning Rate\")\n", - "plt.title(\"Power Scheduling\", fontsize=14)\n", - "plt.grid(True)\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 지수 기반 스케줄링" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "```lr = lr0 * 0.1**(epoch / s)```" - ] - }, - { - "cell_type": "code", - "execution_count": 78, - "metadata": {}, - "outputs": [], - "source": [ - "def exponential_decay_fn(epoch):\n", - " return 0.01 * 0.1**(epoch / 20)" - ] - }, - { - "cell_type": "code", - "execution_count": 79, - "metadata": {}, - "outputs": [], - "source": [ - "def exponential_decay(lr0, s):\n", - " def exponential_decay_fn(epoch):\n", - " return lr0 * 0.1**(epoch / s)\n", - " return exponential_decay_fn\n", - "\n", - "exponential_decay_fn = exponential_decay(lr0=0.01, s=20)" - ] - }, - { - "cell_type": "code", - "execution_count": 80, - "metadata": {}, - "outputs": [], - "source": [ - "model = keras.models.Sequential([\n", - " keras.layers.Flatten(input_shape=[28, 28]),\n", - " keras.layers.Dense(300, activation=\"selu\", kernel_initializer=\"lecun_normal\"),\n", - " keras.layers.Dense(100, activation=\"selu\", kernel_initializer=\"lecun_normal\"),\n", - " keras.layers.Dense(10, activation=\"softmax\")\n", - "])\n", - "model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=\"nadam\", metrics=[\"accuracy\"])\n", - "n_epochs = 25" - ] - }, - { - "cell_type": "code", - "execution_count": 81, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch 1/25\n", - "1719/1719 [==============================] - 8s 4ms/step - loss: 0.8717 - accuracy: 0.7511 - val_loss: 0.9296 - val_accuracy: 0.7502\n", - "Epoch 2/25\n", - "1719/1719 [==============================] - 7s 4ms/step - loss: 0.7778 - accuracy: 0.7756 - val_loss: 0.6632 - val_accuracy: 0.8124\n", - "Epoch 3/25\n", - "1719/1719 [==============================] - 8s 4ms/step - loss: 0.6531 - accuracy: 0.8051 - val_loss: 0.7064 - val_accuracy: 0.7834\n", - "Epoch 4/25\n", - "1719/1719 [==============================] - 7s 4ms/step - loss: 0.5903 - accuracy: 0.8228 - val_loss: 0.5971 - val_accuracy: 0.8258\n", - "Epoch 5/25\n", - "1719/1719 [==============================] - 7s 4ms/step - loss: 0.5431 - accuracy: 0.8379 - val_loss: 0.5389 - val_accuracy: 0.8488\n", - "Epoch 6/25\n", - "1719/1719 [==============================] - 7s 4ms/step - loss: 0.5012 - accuracy: 0.8489 - val_loss: 0.5297 - val_accuracy: 0.8562\n", - "Epoch 7/25\n", - "1719/1719 [==============================] - 7s 4ms/step - loss: 0.4695 - accuracy: 0.8578 - val_loss: 0.5339 - val_accuracy: 0.8446\n", - "Epoch 8/25\n", - "1719/1719 [==============================] - 7s 4ms/step - loss: 0.4328 - accuracy: 0.8667 - val_loss: 0.7335 - val_accuracy: 0.8280\n", - "Epoch 9/25\n", - "1719/1719 [==============================] - 7s 4ms/step - loss: 0.4036 - accuracy: 0.8750 - val_loss: 0.5740 - val_accuracy: 0.8618\n", - "Epoch 10/25\n", - "1719/1719 [==============================] - 7s 4ms/step - loss: 0.3857 - accuracy: 0.8807 - val_loss: 0.4759 - val_accuracy: 0.8672\n", - "Epoch 11/25\n", - "1719/1719 [==============================] - 7s 4ms/step - loss: 0.3601 - accuracy: 0.8864 - val_loss: 0.4779 - val_accuracy: 0.8630\n", - "Epoch 12/25\n", - "1719/1719 [==============================] - 7s 4ms/step - loss: 0.3322 - accuracy: 0.8924 - val_loss: 0.4897 - val_accuracy: 0.8636\n", - "Epoch 13/25\n", - "1719/1719 [==============================] - 7s 4ms/step - loss: 0.3204 - accuracy: 0.8987 - val_loss: 0.4892 - val_accuracy: 0.8690\n", - "Epoch 14/25\n", - "1719/1719 [==============================] - 7s 4ms/step - loss: 0.2968 - accuracy: 0.9039 - val_loss: 0.4638 - val_accuracy: 0.8734\n", - "Epoch 15/25\n", - "1719/1719 [==============================] - 7s 4ms/step - loss: 0.2779 - accuracy: 0.9104 - val_loss: 0.5087 - val_accuracy: 0.8758\n", - "Epoch 16/25\n", - "1719/1719 [==============================] - 7s 4ms/step - loss: 0.2632 - accuracy: 0.9150 - val_loss: 0.4718 - val_accuracy: 0.8770\n", - "Epoch 17/25\n", - "1719/1719 [==============================] - 7s 4ms/step - loss: 0.2479 - accuracy: 0.9191 - val_loss: 0.5167 - val_accuracy: 0.8774\n", - "Epoch 18/25\n", - "1719/1719 [==============================] - 7s 4ms/step - loss: 0.2369 - accuracy: 0.9243 - val_loss: 0.4961 - val_accuracy: 0.8786\n", - "Epoch 19/25\n", - "1719/1719 [==============================] - 7s 4ms/step - loss: 0.2202 - accuracy: 0.9292 - val_loss: 0.5280 - val_accuracy: 0.8834\n", - "Epoch 20/25\n", - "1719/1719 [==============================] - 7s 4ms/step - loss: 0.2113 - accuracy: 0.9312 - val_loss: 0.5491 - val_accuracy: 0.8758\n", - "Epoch 21/25\n", - "1719/1719 [==============================] - 7s 4ms/step - loss: 0.1985 - accuracy: 0.9363 - val_loss: 0.5469 - val_accuracy: 0.8790\n", - "Epoch 22/25\n", - "1719/1719 [==============================] - 7s 4ms/step - loss: 0.1878 - accuracy: 0.9393 - val_loss: 0.5512 - val_accuracy: 0.8822\n", - "Epoch 23/25\n", - "1719/1719 [==============================] - 7s 4ms/step - loss: 0.1781 - accuracy: 0.9425 - val_loss: 0.5817 - val_accuracy: 0.8848\n", - "Epoch 24/25\n", - "1719/1719 [==============================] - 8s 4ms/step - loss: 0.1709 - accuracy: 0.9450 - val_loss: 0.5742 - val_accuracy: 0.8818\n", - "Epoch 25/25\n", - "1719/1719 [==============================] - 7s 4ms/step - loss: 0.1638 - accuracy: 0.9484 - val_loss: 0.6042 - val_accuracy: 0.8842\n" - ] - } - ], - "source": [ - "lr_scheduler = keras.callbacks.LearningRateScheduler(exponential_decay_fn)\n", - "history = model.fit(X_train_scaled, y_train, epochs=n_epochs,\n", - " validation_data=(X_valid_scaled, y_valid),\n", - " callbacks=[lr_scheduler])" - ] - }, - { - "cell_type": "code", - "execution_count": 82, - "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], - "source": [ - "plt.plot(history.epoch, history.history[\"lr\"], \"o-\")\n", - "plt.axis([0, n_epochs - 1, 0, 0.011])\n", - "plt.xlabel(\"Epoch\")\n", - "plt.ylabel(\"Learning Rate\")\n", - "plt.title(\"Exponential Scheduling\", fontsize=14)\n", - "plt.grid(True)\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "이 스케줄 함수는 두 번째 매개변수로 현재 학습률을 받을 수 있습니다:" - ] - }, - { - "cell_type": "code", - "execution_count": 83, - "metadata": {}, - "outputs": [], - "source": [ - "def exponential_decay_fn(epoch, lr):\n", - " return lr * 0.1**(1 / 20)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "에포크가 아니라 반복마다 학습률을 업데이트하려면 사용자 정의 콜백 클래스를 작성해야 합니다:" - ] - }, - { - "cell_type": "code", - "execution_count": 84, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch 1/25\n", - "1719/1719 [==============================] - 9s 5ms/step - loss: 0.7848 - accuracy: 0.7711 - val_loss: 0.8494 - val_accuracy: 0.7580\n", - "Epoch 2/25\n", - "1719/1719 [==============================] - 9s 5ms/step - loss: 0.6253 - accuracy: 0.8057 - val_loss: 0.7549 - val_accuracy: 0.7640\n", - "Epoch 3/25\n", - "1719/1719 [==============================] - 9s 5ms/step - loss: 0.5786 - accuracy: 0.8196 - val_loss: 0.6209 - val_accuracy: 0.8128\n", - "Epoch 4/25\n", - "1719/1719 [==============================] - 9s 5ms/step - loss: 0.5178 - accuracy: 0.8388 - val_loss: 0.5581 - val_accuracy: 0.8470\n", - "Epoch 5/25\n", - "1719/1719 [==============================] - 9s 5ms/step - loss: 0.4988 - accuracy: 0.8480 - val_loss: 0.5315 - val_accuracy: 0.8428\n", - "Epoch 6/25\n", - "1719/1719 [==============================] - 9s 5ms/step - loss: 0.4408 - accuracy: 0.8641 - val_loss: 0.4676 - val_accuracy: 0.8598\n", - "Epoch 7/25\n", - "1719/1719 [==============================] - 9s 5ms/step - loss: 0.4063 - accuracy: 0.8689 - val_loss: 0.6225 - val_accuracy: 0.8402\n", - "Epoch 8/25\n", - "1719/1719 [==============================] - 9s 5ms/step - loss: 0.3650 - accuracy: 0.8791 - val_loss: 0.4607 - val_accuracy: 0.8638\n", - "Epoch 9/25\n", - "1719/1719 [==============================] - 9s 5ms/step - loss: 0.3442 - accuracy: 0.8854 - val_loss: 0.4588 - val_accuracy: 0.8570\n", - "Epoch 10/25\n", - "1719/1719 [==============================] - 9s 5ms/step - loss: 0.3196 - accuracy: 0.8927 - val_loss: 0.4427 - val_accuracy: 0.8814\n", - "Epoch 11/25\n", - "1719/1719 [==============================] - 9s 5ms/step - loss: 0.2950 - accuracy: 0.8996 - val_loss: 0.4303 - val_accuracy: 0.8810\n", - "Epoch 12/25\n", - "1719/1719 [==============================] - 9s 5ms/step - loss: 0.2699 - accuracy: 0.9077 - val_loss: 0.4377 - val_accuracy: 0.8674\n", - "Epoch 13/25\n", - "1719/1719 [==============================] - 9s 5ms/step - loss: 0.2528 - accuracy: 0.9119 - val_loss: 0.4323 - val_accuracy: 0.8862\n", - "Epoch 14/25\n", - "1719/1719 [==============================] - 9s 5ms/step - loss: 0.2340 - accuracy: 0.9187 - val_loss: 0.4424 - val_accuracy: 0.8794\n", - "Epoch 15/25\n", - "1719/1719 [==============================] - 9s 5ms/step - loss: 0.2159 - accuracy: 0.9236 - val_loss: 0.4204 - val_accuracy: 0.8878\n", - "Epoch 16/25\n", - "1719/1719 [==============================] - 9s 5ms/step - loss: 0.2008 - accuracy: 0.9302 - val_loss: 0.4568 - val_accuracy: 0.8928\n", - "Epoch 17/25\n", - "1719/1719 [==============================] - 9s 5ms/step - loss: 0.1866 - accuracy: 0.9343 - val_loss: 0.4430 - val_accuracy: 0.8920\n", - "Epoch 18/25\n", - "1719/1719 [==============================] - 9s 5ms/step - loss: 0.1738 - accuracy: 0.9395 - val_loss: 0.4850 - val_accuracy: 0.8928\n", - "Epoch 19/25\n", - "1719/1719 [==============================] - 9s 5ms/step - loss: 0.1589 - accuracy: 0.9454 - val_loss: 0.4853 - val_accuracy: 0.8948\n", - "Epoch 20/25\n", - "1719/1719 [==============================] - 9s 5ms/step - loss: 0.1488 - accuracy: 0.9493 - val_loss: 0.4679 - val_accuracy: 0.8890\n", - "Epoch 21/25\n", - "1719/1719 [==============================] - 9s 5ms/step - loss: 0.1392 - accuracy: 0.9526 - val_loss: 0.5147 - val_accuracy: 0.8906\n", - "Epoch 22/25\n", - "1719/1719 [==============================] - 9s 5ms/step - loss: 0.1298 - accuracy: 0.9567 - val_loss: 0.5229 - val_accuracy: 0.8906\n", - "Epoch 23/25\n", - "1719/1719 [==============================] - 9s 5ms/step - loss: 0.1223 - accuracy: 0.9599 - val_loss: 0.5352 - val_accuracy: 0.8884\n", - "Epoch 24/25\n", - "1719/1719 [==============================] - 9s 5ms/step - loss: 0.1117 - accuracy: 0.9631 - val_loss: 0.5800 - val_accuracy: 0.8884\n", - "Epoch 25/25\n", - "1719/1719 [==============================] - 9s 5ms/step - loss: 0.1068 - accuracy: 0.9648 - val_loss: 0.5695 - val_accuracy: 0.8898\n" - ] - } - ], - "source": [ - "K = keras.backend\n", - "\n", - "class ExponentialDecay(keras.callbacks.Callback):\n", - " def __init__(self, s=40000):\n", - " super().__init__()\n", - " self.s = s\n", - "\n", - " def on_batch_begin(self, batch, logs=None):\n", - " # 노트: 에포크마다 `batch` 매개변수가 재설정됩니다\n", - " lr = K.get_value(self.model.optimizer.lr)\n", - " K.set_value(self.model.optimizer.lr, lr * 0.1**(1 / s))\n", - "\n", - " def on_epoch_end(self, epoch, logs=None):\n", - " logs = logs or {}\n", - " logs['lr'] = K.get_value(self.model.optimizer.lr)\n", - "\n", - "model = keras.models.Sequential([\n", - " keras.layers.Flatten(input_shape=[28, 28]),\n", - " keras.layers.Dense(300, activation=\"selu\", kernel_initializer=\"lecun_normal\"),\n", - " keras.layers.Dense(100, activation=\"selu\", kernel_initializer=\"lecun_normal\"),\n", - " keras.layers.Dense(10, activation=\"softmax\")\n", - "])\n", - "lr0 = 0.01\n", - "optimizer = keras.optimizers.Nadam(lr=lr0)\n", - "model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=optimizer, metrics=[\"accuracy\"])\n", - "n_epochs = 25\n", - "\n", - "s = 20 * len(X_train) // 32 # 20 에포크 동안 스텝 횟수 (배치 크기 = 32)\n", - "exp_decay = ExponentialDecay(s)\n", - "history = model.fit(X_train_scaled, y_train, epochs=n_epochs,\n", - " validation_data=(X_valid_scaled, y_valid),\n", - " callbacks=[exp_decay])" - ] - }, - { - "cell_type": "code", - "execution_count": 85, - "metadata": {}, - "outputs": [], - "source": [ - "n_steps = n_epochs * len(X_train) // 32\n", - "steps = np.arange(n_steps)\n", - "lrs = lr0 * 0.1**(steps / s)" - ] - }, - { - "cell_type": "code", - "execution_count": 86, - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "data": { - "image/png": "\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], - "source": [ - "plt.plot(steps, lrs, \"-\", linewidth=2)\n", - "plt.axis([0, n_steps - 1, 0, lr0 * 1.1])\n", - "plt.xlabel(\"Batch\")\n", - "plt.ylabel(\"Learning Rate\")\n", - "plt.title(\"Exponential Scheduling (per batch)\", fontsize=14)\n", - "plt.grid(True)\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 기간별 고정 스케줄링" - ] - }, - { - "cell_type": "code", - "execution_count": 87, - "metadata": {}, - "outputs": [], - "source": [ - "def piecewise_constant_fn(epoch):\n", - " if epoch < 5:\n", - " return 0.01\n", - " elif epoch < 15:\n", - " return 0.005\n", - " else:\n", - " return 0.001" - ] - }, - { - "cell_type": "code", - "execution_count": 88, - "metadata": {}, - "outputs": [], - "source": [ - "def piecewise_constant(boundaries, values):\n", - " boundaries = np.array([0] + boundaries)\n", - " values = np.array(values)\n", - " def piecewise_constant_fn(epoch):\n", - " return values[np.argmax(boundaries > epoch) - 1]\n", - " return piecewise_constant_fn\n", - "\n", - "piecewise_constant_fn = piecewise_constant([5, 15], [0.01, 0.005, 0.001])" - ] - }, - { - "cell_type": "code", - "execution_count": 89, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch 1/25\n", - "1719/1719 [==============================] - 8s 5ms/step - loss: 0.8830 - accuracy: 0.7497 - val_loss: 1.0209 - val_accuracy: 0.7004\n", - "Epoch 2/25\n", - "1719/1719 [==============================] - 8s 4ms/step - loss: 0.9927 - accuracy: 0.6931 - val_loss: 0.8880 - val_accuracy: 0.7178\n", - "Epoch 3/25\n", - "1719/1719 [==============================] - 8s 4ms/step - loss: 0.8996 - accuracy: 0.7185 - val_loss: 1.0695 - val_accuracy: 0.6856\n", - "Epoch 4/25\n", - "1719/1719 [==============================] - 8s 4ms/step - loss: 0.9428 - accuracy: 0.7076 - val_loss: 0.9724 - val_accuracy: 0.7206\n", - "Epoch 5/25\n", - "1719/1719 [==============================] - 8s 4ms/step - loss: 0.9195 - accuracy: 0.7078 - val_loss: 1.2058 - val_accuracy: 0.7076\n", - "Epoch 6/25\n", - "1719/1719 [==============================] - 8s 4ms/step - loss: 0.6494 - accuracy: 0.7722 - val_loss: 0.6743 - val_accuracy: 0.7360\n", - "Epoch 7/25\n", - "1719/1719 [==============================] - 8s 4ms/step - loss: 0.6002 - accuracy: 0.7929 - val_loss: 0.7192 - val_accuracy: 0.7718\n", - "Epoch 8/25\n", - "1719/1719 [==============================] - 8s 4ms/step - loss: 0.5850 - accuracy: 0.7999 - val_loss: 0.6331 - val_accuracy: 0.7328\n", - "Epoch 9/25\n", - "1719/1719 [==============================] - 8s 4ms/step - loss: 0.5570 - accuracy: 0.8213 - val_loss: 0.6222 - val_accuracy: 0.7668\n", - "Epoch 10/25\n", - "1719/1719 [==============================] - 8s 4ms/step - loss: 0.5511 - accuracy: 0.8165 - val_loss: 0.6102 - val_accuracy: 0.8204\n", - "Epoch 11/25\n", - "1719/1719 [==============================] - 8s 4ms/step - loss: 0.5360 - accuracy: 0.8182 - val_loss: 0.6345 - val_accuracy: 0.8292\n", - "Epoch 12/25\n", - "1719/1719 [==============================] - 7s 4ms/step - loss: 0.5291 - accuracy: 0.8363 - val_loss: 0.6106 - val_accuracy: 0.8334\n", - "Epoch 13/25\n", - "1719/1719 [==============================] - 8s 4ms/step - loss: 0.5104 - accuracy: 0.8400 - val_loss: 0.6339 - val_accuracy: 0.8244\n", - "Epoch 14/25\n", - "1719/1719 [==============================] - 8s 4ms/step - loss: 0.4812 - accuracy: 0.8557 - val_loss: 0.6163 - val_accuracy: 0.7798\n", - "Epoch 15/25\n", - "1719/1719 [==============================] - 8s 4ms/step - loss: 0.4852 - accuracy: 0.8543 - val_loss: 0.8202 - val_accuracy: 0.8402\n", - "Epoch 16/25\n", - "1719/1719 [==============================] - 8s 4ms/step - loss: 0.3420 - accuracy: 0.8901 - val_loss: 0.5218 - val_accuracy: 0.8708\n", - "Epoch 17/25\n", - "1719/1719 [==============================] - 8s 4ms/step - loss: 0.3108 - accuracy: 0.9007 - val_loss: 0.5677 - val_accuracy: 0.8652\n", - "Epoch 18/25\n", - "1719/1719 [==============================] - 8s 4ms/step - loss: 0.2922 - accuracy: 0.9061 - val_loss: 0.6053 - val_accuracy: 0.8768\n", - "Epoch 19/25\n", - "1719/1719 [==============================] - 8s 4ms/step - loss: 0.2756 - accuracy: 0.9134 - val_loss: 0.5898 - val_accuracy: 0.8774\n", - "Epoch 20/25\n", - "1719/1719 [==============================] - 8s 4ms/step - loss: 0.2668 - accuracy: 0.9170 - val_loss: 0.5603 - val_accuracy: 0.8854\n", - "Epoch 21/25\n", - "1719/1719 [==============================] - 8s 4ms/step - loss: 0.2574 - accuracy: 0.9201 - val_loss: 0.5782 - val_accuracy: 0.8792\n", - "Epoch 22/25\n", - "1719/1719 [==============================] - 8s 4ms/step - loss: 0.2518 - accuracy: 0.9230 - val_loss: 0.5958 - val_accuracy: 0.8758\n", - "Epoch 23/25\n", - "1719/1719 [==============================] - 8s 4ms/step - loss: 0.2396 - accuracy: 0.9254 - val_loss: 0.6070 - val_accuracy: 0.8762\n", - "Epoch 24/25\n", - "1719/1719 [==============================] - 8s 4ms/step - loss: 0.2405 - accuracy: 0.9264 - val_loss: 0.6612 - val_accuracy: 0.8788\n", - "Epoch 25/25\n", - "1719/1719 [==============================] - 8s 4ms/step - loss: 0.2244 - accuracy: 0.9291 - val_loss: 0.6447 - val_accuracy: 0.8786\n" - ] - } - ], - "source": [ - "lr_scheduler = keras.callbacks.LearningRateScheduler(piecewise_constant_fn)\n", - "\n", - "model = keras.models.Sequential([\n", - " keras.layers.Flatten(input_shape=[28, 28]),\n", - " keras.layers.Dense(300, activation=\"selu\", kernel_initializer=\"lecun_normal\"),\n", - " keras.layers.Dense(100, activation=\"selu\", kernel_initializer=\"lecun_normal\"),\n", - " keras.layers.Dense(10, activation=\"softmax\")\n", - "])\n", - "model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=\"nadam\", metrics=[\"accuracy\"])\n", - "n_epochs = 25\n", - "history = model.fit(X_train_scaled, y_train, epochs=n_epochs,\n", - " validation_data=(X_valid_scaled, y_valid),\n", - " callbacks=[lr_scheduler])" - ] - }, - { - "cell_type": "code", - "execution_count": 90, - "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZgAAAEeCAYAAAC30gOQAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAgAElEQVR4nO3dfZycVX3//9c7t2w2CYFNSCBoFgQWjEIQb6qREkGk2vIjgl9bQYFSha+WL1puLLSiiLUYhVaoFEmtBsRarQIBsdBi3CqKityGCEnlJpKEuwQTsrm/+fz+OGfDZDKze22yM5PdeT8fj3nszHXOdc25zu7OZ851znWOIgIzM7P+NqTRBTAzs8HJAcbMzGrCAcbMzGrCAcbMzGrCAcbMzGrCAcbMzGrCAcZ2IKldUkh6Y6PLUo2kTklfaXQ5rBhJZ0rqqtGxH5V0WR/3eVrShdVeW/9wgGlCkubkABKSNkl6UtKVklpzlmeAfYGHGljM3pwMXFLLN1DyYUn3Slot6WVJD0j6pKSxtXzvsnLU7MOv6LElHSDpJklLJG2QtEzSHZKOrEW5GuBNwD83uhCDzbBGF8Aa5m7gQ8Bw4Gjga0Ar8NGI2AI818Cy9SoiXqrD23wTOAX4e+DjwAvAVODc/HxOHcrQcJKGA/8NPAG8H1gK7Ae8C9i7gUXrNxHxYqPLMChFhB9N9iB9MP6gbNu/AM/m5+1AAG8sSX8tcAewmvTh+m1gUtkxzgDmAxuA54EbStL2BGbnfVcD/1N2/GeBPyt5fU/ONyy/PiiXaf/8uhP4Skn+k4FHgHXAS/n4E0vSTwTuB9YDTwGfB0b0UEfvz+93cpX0cfnnEOBSUqtvQz7/k0ryddflKaQP6bXAb4DjS/IMB64BluVjPAN8oeQ8o/SRt7fl38GSfM4LgD8vK2Mn6Vv53wPLc91fCQzp6dgVznVaTj+ol7+rPYHr8u9yPfAY8Kc57UygCzgOeBRYA/wYOKDsGD3+noB9gLn5nBcDZ+XjXVaSJ4D3lR33aeDCPrwO4GzgP3JZnwQ+WHbMtwAP5LI+CLwn7zej0f/ju8vDl8is2zrSB90OJO0L/IT0j/xm4J3AaGCupCE5zznA9cA3gMNJ/2yP5jSRgtNk4E+AI/Px5uVjQwoIM3L+UaRLFhuA7n6gGcATEbGkQvkmAf8O3AAcBvwhqfXRnX4C8C3gK6QWyFnA+0gfvNWcBiyKiJsrJUbEyvz048BFwF8DrwduAW6WNK1sl8+TgsgRwH3Av0sandPOA94L/BlwMPCnwMKcdjIpiFxOumzZXV97kD7c/iSf09XA9ZKOq3Aem4G3kVpen8jH7+nY5V4EtgKnSKp41SP/jn8IHAP8OekLyfnAxpJsI0mXNc8C3gqMA75acowiv6c5pC8b7wRmAqeTgngtfJoUzI4AvgN8XdKrc1lHAz8AHgeOAj4JfKlG5Ri4Gh3h/Kj/g7IWDCloLAe+k1+3U9KCIX0A/ajsGHvlPG/Or5eQv3VXeL9jSd9eW8q2PwR8Mj//v8DC/PydpG+/c4BL8rabgK+V7NtJbsEAb8hlmVLl/X8CXFq2bWYuk6rs8xtgboG6XAp8umxbJ3BTWV2eU5I+OW97e359DfCjHsryNCXfrnsoy79XqKN7y/L8d1meosf+S9I3+S7Sl4HPAVNL0o8nBaHDqux/Zj7njpJtp5G+RKjI7wk4JB9jekn6FGALtWnBXFHyehip9fnB/PocUku5pSTPqbgFs93DLZjm9UeSuiStB+4l/XP/vyp5jwL+MOfvyqOBnslpr5G0D+lD80c97D8KeLHsGK8DXpPzdAKH5BbNDNLlk878HNI3484qx3+Y1Kf0qKTvS/qopAll7/+3Ze/9b6Q+p0lVjqkq21/JkDr69wN+VpZ0D+kbfKlHSp4vyz/3yT/nkC5DLZJ0raQ/7m4Z9vDeQyX9raRHJK3I53Qy8Ooe3rf7vfehjyLiWlJdnUo6v5OAhyR9KGc5knSJ9bEeDrMhIhaWvF4GjCB9WYHef0+HkYLYr0rKtZhX6rO/bau7iNhMasl1192hwKMRsa4k/y9rVI4By538zesnpGvMm4BlEbGph7xDSJe4Ko02eh5o6eW9huR8R1dIexkgIh6X9BzwDlJQuZp0Kekrkg4D9qdKgImILZLeBfwBqeP5L4ArJB0TEQ/n9/8s6Xp6uWqdu4tIH2g7q3ya8m31GxGRriilL3gR8YCkduAEUh/FDcDDko6PiK1Vjn8hcAHpEt180rf8v2fH4FH+ew12cvRoRKwGbgNuk/Qp4C5SS+abPe74is0VykJJeYr+nnqbAj7Y8QtCxcu/vei3umtWDjDNa21E/LZg3gdInd6LqwSi1ZKWkj4c/7vK/hOBrRHxZA/v8z/AH5P6XToj4kVJy0nXtyv2v3SLdI3iXuBeSZeTOr3/lNS6eQA4tA/nC+mb879LOjkq9MNIGhcRKyUtA6azfevt7aRLbIXlD+/vAd+TNAf4BamvYRGpH2No2S5vB26PiG/m8nRfQlpJ31Q6dpHyhqTHSZcnIXVy7yvpsF5aMT3p8feU328I6ZLuz/O2V5NakaVepKQ/SdJEqvcv7azHgTMktZS0Yt7cz+8x4DkaWxHXkkYIfUfSWyQdKOmdkmZLGpPzfB74hKS/knSIpGmSLshpd5MuI82V9O58T8VbJX1WUmmrppMUyH4brwwb7QQ+SPXLY0j6A0mfkvSm/IHz/wGv4pUP+cuBUyVdLul1kg6V9D5JX+zhnL9L6tj9lqRL87GnSPojSXeQ+gYgdexeKOkD+bwvJ7XUruzh2OXlPz/vf5ikg0iXoV4m9WtB6h84WtJkSePztkXAcZLeLulQUsf4AUXfs0SlY5eXb5qkubnOXivpIEl/QeqEvyVn+xHpEtH3JZ2Qf8fHS5pZ6ZhV9Ph7ypfX7iQNZnhrHkgxhzRApdQ84C8lvVHpPp05pJFe/enfSH0//5Lr5J3A3+Q0L7KVOcBYryKi+1v6VtI/+AJS0NmQH0TEdaSO4I+QRo/dSRoJ1N26eA/pH/9fSCOkvgt0sP31805Sq7qzl23lVuXy/QD4X+Aq4HMRcVN+/7tILaN3kK7f/wq4GPhdD+ccwAdIl6D+hNQnNB+4gtTS+n7Oeg0pyHwxn/d7gVPypbmiVpNGov2K9C1+GvDuiFib0z9NCphP8Mqlor/L+f+TdLlzDWkEVl9VOna5JaRhup8mtaweIl2eu5Lcb5cv5b2b9EXiJtIgjatJfSyFFPw9nUkavjwPuJ30Qf902aEuyOXtJLUKv0Yaot1vcovzRNLf+IOkv4HLcnJ/B7MBq3v0hpmZ7QJJJ5FadPtExPJGl2d34D4YM7OdIOkMUkvpGdKIyC+T+sUcXDIHGDOznTORNOptX9LUSneQbri1zJfIzMysJtzJb2ZmNeFLZNm4cePioIMOanQxdjtr1qyhtbW194xNxvWyI9dJZYO9Xu6///7lETGhUpoDTDZx4kR+/etfN7oYu53Ozk5mzJjR6GLsdlwvO3KdVDbY60XS4mppvkRmZmY14QBjZmY14QBjZmY14QBjZmY14QBjZmY14QBjZmY14QBjZmY14QBjZmY14QBjZmY14QBjZmY14QBjZmY14QBjZmY14QBjZmY14QBjZmY14QBjZmY1UdcAI2lvSbdIWiNpsaRTq+STpFmSVuTHLEkqSZ8taaGkrZLOrLD/X0l6TtLLkr4uaWRvZXv65a1M/8I8bn1waaFzufXBpUz/wjwOuPiOQbmfmdmuqncL5lpgIzAROA24TtLUCvnOBmYCRwCHAycC55SkPwx8DHigfEdJJwAXA8cBU4ADgc8WKdzSleu45Ob5vX4I3/rgUi65eT5LV64jBuF+Zmb9oW4rWkpqBU4BXhcRXcA9km4DPkQKCKXOAK6KiCV536uAjwBfBYiIa/P29RXe6gzgXyNiQc7zOeBbFd6jonWbtvA3t8znnt8ur5rnh/OfZd2mLQN2vy/dtZCZR06uup+ZWX+o55LJhwCbI2JRybaHgWMq5J2a00rzVWrpVDIVmFu270RJbRGxojSjpLNJrSVGTDpo2/a1G7fw4wXVv+Wv3RhVtg+M/ZauXEdnZ2fV/Up1dXUVzttMXC87cp1U1sz1Us8AMxp4uWzbKmBMlbyryvKNlqSIqPyp2fO+5PfZLsBExGxgNsDIfQ/edtzJ41r42cXHVn2D6V+Yx9KV63bYPpD2K7pG+GBfT3xnuV525DqprJnrpZ59MF3A2LJtY4HVBfKOBboKBJdq+1LlfXbQMnwoF53Q0WOei07ooGX40EG7n5lZf6hngFkEDJN0cMm2I4AFFfIuyGm95auk0r7Pl18eq2TyuBauOPn1vfZPzDxyMlec/Homj2tBA2C/ttYRAIwfPaLQfmZm/aFul8giYo2km4HLJX0YmAacBLytQvYbgfMl/RAI4ALgn7oTJY0gBUcBwyXtAWyMiK153zmSvgUsAz4FzOmtfO1jh/R4uanczCMn79QHdSP2O+JV43jHlZ1c/O7DHFzMrG7qPUz5Y0AL8ALwbeCjEbFA0tGSukryXQ/cDswHHgXuyNu6/RewjhScZufnfwgQEXcCXwR+DPwOWAx8pobntNvbf68Whg4Ri1esaXRRzKyJ1LOTn4h4iXR/S/n2n5I657tfB/DJ/Kh0nBm9vM8/AP+wK2UdTIYPHcL+e7Xw1HIHGDOrH08V0ySmtLWyeMXaRhfDzJqIA0yTaG8bxdMr1lBsIJ6Z2a5zgGkSU9paWb1+M79fu6nRRTGzJuEA0yTa20YB8LQ7+s2sThxgmsSUtlYAjyQzs7pxgGkSr9q7BQmeXu6OfjOrDweYJjFy2FD227PFl8jMrG4cYJrIAeNbedpDlc2sThxgmsiUtlHugzGzunGAaSLtba2sXLuJlWs3NrooZtYEHGCayJQ8VNl39JtZPTjANJH28Wmosjv6zaweHGCayKv3dgvGzOrHAaaJ7DF8KPvuuYdbMGZWFw4wTaa9rZWnPW2/mdWBA0yTaR8/ypfIzKwuHGCazJS2Vlas2cjL6z2rspnVlgNMk+meVfl3bsWYWY05wDSZ7lmV3dFvZrXmANNkfLOlmdWLA0yTGTViGPuMGemRZGZWcw4wTai9rdWXyMys5hxgmlD7+FGett/Mas4BpglNaWvlxdUbWLNhc6OLYmaDmANME2rPI8nc0W9mteQA04ReGUnmfhgzqx0HmCbUHWDcD2NmteQA04TG7DGc8aNHuAVjZjXlANOkpniospnVmANMk0rT9vsSmZnVTl0DjKS9Jd0iaY2kxZJOrZJPkmZJWpEfsySpJH2apPslrc0/p5WkjZT0VUnPS3pJ0u2SJtfj/AaS9rZRPPfyetZt3NLoopjZIFXvFsy1wEZgInAacJ2kqRXynQ3MBI4ADgdOBM4BkDQCmAvcBOwF3ADMzdsBPg68Ne+3H/B74J9qdD4D1pTxaajy715yK8bMaqNuAUZSK3AKcGlEdEXEPcBtwIcqZD8DuCoilkTEUuAq4MycNgMYBnw5IjZExDWAgGNz+gHAXRHxfESsB74DVApiTa1920gy98OYWW0Mq+N7HQJsjohFJdseBo6pkHdqTivNN7Uk7ZGIiJL0R/L2O4F/Ba6WtB+wktRS+s9KBZJ0Nqm1xIQJE+js7OzjKQ1cazal6pv3q/mMfPHxqvm6urqaql6Kcr3syHVSWTPXSz0DzGjg5bJtq4AxVfKuKss3OvfDlKeVH+d/gWeApcAWYD5wbqUCRcRsYDZAR0dHzJgxo+CpDA6fuve/GDpuEjNmvL5qns7OTpqtXopwvezIdVJZM9dL4UtkkiZKulDSdZLG523TJR1Q8BBdwNiybWOB1QXyjgW6cqult+NcC4wE2oBW4GaqtGCa3ZS2Vt8LY2Y1UyjASDoKWEi63PQXvPIBfzzw+YLvtQgYJungkm1HAAsq5F2Q0yrlWwAcXjqqjNSh350+DZgTES9FxAZSB/+bu4OivaK9bZSHKptZzRRtwVwJXB0RRwIbSrbfBUwvcoCIWENqTVwuqVXSdOAk4JsVst8InC9pcu5LuQCYk9M6SZe+zstDkrsvf83LP+8DTpe0p6ThwMeAZRGxvNipNo/28a0sW7WO9Zs8VNnM+l/RAHMUaThwuWdJQ46L+hjQArwAfBv4aEQskHS0pK6SfNcDt5P6Tx4F7sjbiIiNpCHMp5M68c8CZubtABcC60l9MS8C7wHe24cyNo32tlYiYMnv3Yoxs/5XtJN/Hemek3KHkoJFIRHxEik4lG//Kanzvvt1AJ/Mj0rHeZAU9CqlrSBdyrNebJv0cvlaDtqn0lgLM7OdV7QFMxf4jKSR+XVIagdmAd+vQbmsDrrXhfG9MGZWC0UDzIXA3qRLTqOAe4Dfki5Rfao2RbNaGzdqOGP3GOaFx8ysJgpdIouIl4G3SzoWeAMpMD0QEXfXsnBWW5JoH+9Zlc2sNgoFGEmnA9+JiHm8Mlqre16wP4uIG2tUPquxKW2tPPzMykYXw8wGoaKXyL4B7Flh+5icZgPUAW2jWPL7tWzcvLXRRTGzQaZogBEQFba/mh2nbbEBZEpbK1s9VNnMaqDHS2SS5pMCSwD/I2lzSfJQYArww9oVz2qtfXwaqrx4xVoOnDC6l9xmZsX11gfzvfzzdaSbHUtvhtwIPI2HKQ9oUzxU2cxqpMcAExGfBZD0NKmTf309CmX109Y6gtEjPVTZzPpf0WHKlaaJsUFAElPaRrkFY2b9ruhsyiMkfVbSIknrJW0pfdS6kFZb7W2tbsGYWb8rOorsc+RljIGtwEWkdVdWkCawtAFsStsonnlpLZu3eKiymfWfogHm/cD/jYjrSVPlz42I84DPkNaEsQGsfXwrm7cGS1eua3RRzGwQKRpgJgK/yc+7gHH5+Z3Au/q7UFZfr0x66ctkZtZ/igaY3wH75ee/BU7Iz99KmsrfBrD2tu57YdzRb2b9p2iAuQU4Lj+/GvispKdIq0x+rQblsjqaMGYkLcOHevlkM+tXRYcpX1Ly/HuSniEtlbwoIn5Qq8JZfXQPVXYLxsz6U9EVLbcTEb8EfgkgqTUi/Mk0wLW3tfK/L6xudDHMbBApeolsB5L2kHQR8FQ/lscaZMr4UTzz0jq2bK00p6mZWd/1GGDyDZafl3SfpJ9Lmpm3nw48CXwC+Mc6lNNq7IC2VjZu2coyD1U2s37SWwvmMuBcYDFwAPAfkv4Z+FvgEqA9Iq6oaQmtLronvfQd/WbWX3oLMO8HzoyI9wF/RJqify9gakTcEBGbal1Aq4/uafs9J5mZ9ZfeAsyrgPsAIuJh0hT9syJic4972YAzccwejBw2xCPJzKzf9BZghgMbSl5vwitYDkpDhnTPquxLZGbWP4oMU75CUvenzgjgMknbBZk8L5kNcFPaWt2CMbN+01uA+QnwmpLXPwdeXZbH41oHifa2Ufxk0Yts3RoMGaJGF8fMBrjeVrScUady2G5gSlsrGzZv5bmX17PfuJZGF8fMBridvtHSBp8DxnfPquzLZGa26xxgbJsp22ZVdke/me26ugYYSXtLukXSGkmLJZ1aJZ8kzZK0Ij9mSVJJ+jRJ90tam39OK9v/DZJ+IqlL0vOSPl7rcxsM9t2zhRFDh7gFY2b9ot4tmGtJ99JMBE4DrpM0tUK+s4GZwBHA4cCJwDmQpq8B5gI3kW76vAGYm7cjaTxpIbTrgTbgIOC/andKg8fQIeJVe7ew2NP2m1k/qFuAkdQKnAJcGhFdEXEPcBvwoQrZzwCuioglEbEUuAo4M6fNIA1O+HJEbIiIawABx+b084G7IuJbOX11RDxWsxMbZNrbWt2CMbN+UWi6fknlQ5O7BbA+Il4scJhDgM0Rsahk28PAMRXyTs1ppfmmlqQ9EhGlw6MfydvvBP4AmC/p56TWyy+Bv4yI35W/iaSzSa0lJkyYQGdnZ4HTGNyGrtvAky9u5sc//jGS6Orqcr1U4HrZkeuksmaul6LrwTxND/e7SHoZ+AbwyR6mkRkNvFy2bRUwpkreVWX5Rud+mPK08uPsD7wBOB6YD3wR+DZpgbTtRMRsYDZAR0dHzJgxo0rRm8fvRj7Nfy1ewNSj3so+Y/egs7MT18uOXC87cp1U1sz1UjTAfID0Qf1V8kJjwFtI3/4vA8YBnwJWA5+pcowuYGzZtrF5n97yjgW6IiIk9XacdcAtEXEfgKTPAssl7RkRnuamF+15VuWnlq9hn7F7NLg0ZjaQFe2D+SjwVxFxRUTMy48rgAuAsyLiauA8UiCqZhEwTNLBJduOABZUyLsgp1XKtwA4vHRUGWkgQHf6I2zf2vJMA33Q7mn7zayfFA0wbyFdbir3KPCm/Pxe0uWpivKyyjcDl0tqlTQdOAn4ZoXsNwLnS5osaT9SIJuT0zqBLcB5kkZKOjdvn5d/fgN4bx7KPBy4FLjHrZdi9hu3B8OGyB39ZrbLigaYxeTO8DIfAbo7zycAL/VynI8BLcALpH6Rj0bEAklH50tf3a4HbicFtUeBO/I2ImIjaQjz6cBK4CxgZt5ORMwD/ibv8wKpo7/i/Ta2o2FDh/CqvUe5BWNmu6xoH8wFwPclvYe8PgzwRtJEmKfk128CvtvTQSLiJVJwKN/+U1LnfffrAD6ZH5WO8yBwVA/vcx1wXU9lserStP1uwZjZrikUYCLijtx38jGgI2++Dfhq9/DfiPjn2hTR6q29rZVfP/17th8JbmbWN0VbMETEM8AlNSyL7SamtI2ia8NmVqzZ2OiimNkAVjjASBoFTAP2oazvJiJu7udyWQN1jyR7erkvk5nZzit6J/87SZ3ybRWSAxjan4WyxmrfNm3/WsY3uCxmNnAVHUV2NWlU1v4RMaTs4eAyyEwe18LQIfLyyWa2S4oGmHbgcxGxrIZlsd3EiGFDmDyuhac9VNnMdkHRAPMzXhk9Zk1gStsot2DMbJcU7eT/KnBlvqt+PrCpNDEiHujvglljtbe1cutDS4kY0eiimNkAVTTAfC//nF0hzZ38g9CUtlGsXr+ZNZscYMxs5xQNMAfUtBS22+keqvz82q0NLomZDVRF7+RfXOuC2O7liRfT1HCf+8V6vv74PC46oYOZR07udb9bH1zKl+5ayLKV69hvXMug3W/pynVM/oXrxawnVQOMpJOB2yNiU35elW+0HFxufXAp/3j3KwuPLl25jktuTpNp9/Rhc+uDS7nk5vms27TF+zXRfmbVqNp8U5K2ApMi4oX8vJoYDPfCdHR0xMKFCxtdjN3C9C/MY+nKdTtsHz5UvHa/Pavu95tlq9i0Zce/J+83sPebPK6Fn118bNX9ujXzyo09Gez1Iun+iHhjpbSqLZiIGFLpuQ1+yyoEF4BNW4JxLcOr7lfpw8n7Dfz9qv09mPWm8Fxk1jz2G9dSsQUzeVwLN5z15qr7VWv5eL+Bvd9+41qq7mPWk8ItE0n7SzpV0icknV/6qGUBrf4uOqGDluHbX/VsGT6Ui07o+V5b79ec+5lVU3Syy9OArwObgRfZcc37f+j/olmjdHfobhstVXA0Uel+fRmFNBD3G8z10t3RX/T8zKqp2sm/XSbpCeA7wKURsaXmpWoAd/JXNtg7KHfWYK6X6zqfYNadj/Pwp9/FnqOq99mUG8x1sisGe7301Mlf9BLZROBrgzW4mNkrDp00BoCFz69ucElsoCsaYH4IvKWWBTGz3UNHd4B57uUGl8QGuqKjyP4bmCVpKpUnu/SNlmaDxL577sHYPYbx+HNuwdiuKRpgrs8//6ZCmie7NBtEJHHopLEsdICxXVToElmFVSy9oqXZINYxaQwLn1tNkUFAZtX0GmAkDZf0S0keDG/WJDomjWH1hs0Vb7w0K6rXABMRm0jT9furjFmT2DaSzJfJbBcUHUV2A/CRWhbEzHYfh+QA445+2xVFO/lbgdMkHQ/cD2y3WHtEnNffBTOzxhm7x3Amj2txC8Z2SdEAcxjwQH5+YFmaL52ZDUKH5o5+s51VdEXLd9S6IGa2e+mYNIb/WfQiGzdvZcQwr9hhfee/GjOrqGPSGDZvjW3LZ5v1VV+m63+HpNmS7pQ0r/TRh2PsLekWSWskLZZ0apV8kjRL0or8mCVJJenTJN0vaW3+Oa3CMUZIekzSkqLlM7NXHDppLOCRZLbzCgUYSWcC/wmMAWaQpuzfC3gD8Js+vN+1wEbS5JmnAdfl6WfKnQ3MBI4ADgdOBM7JZRkBzAVuymW4AZibt5e6KJfTzHbCgRNaGT5UHklmO61oC+ZC4NyI+ABpHrJLIuJI0od8ofazpFbgFNKU/10RcQ9wG/ChCtnPAK6KiCURsRS4Cjgzp80g9R19OSI2RMQ1gIBti4ZLOgD4IHBFwfMzszLDhw7hNRNGe9JL22lFR5EdCNydn28ARufnXwE6gYsLHOMQYHNELCrZ9jBwTIW8U3Naab6pJWmPxPZzWDySt9+ZX/8Tad60Hm9DlnQ2qbXEhAkT6OzsLHAazaWrq8v1UkGz1MteWs/Di4uda7PUSV81c70UDTArSJfHAJYCryN9qLcBRRfsHg2UfxVaVXLc8ryryvKNzv0w5WnbHUfSe4GhEXGLpBk9FSgiZgOzIS04NpgXBdpZg32xpJ3VLPXyGE9w752Pc+Sbp/e6+Fiz1ElfNXO9FL1E9lPgXfn5d4FrJH0D+DZpKv8iuoCxZdvGApUu8JbnHQt05VZL1ePky3BfBHzjp1k/8OJjtiuKBphzScEEUr/Gl0itl+8CHy54jEXAMEkHl2w7AlhQIe+CnFYp3wLg8NJRZaSBAAuAg4F24KeSngNuBvaV9Jyk9oLlNLPMi4/Zrih6o+VLJc+3ArP6+kYRsUbSzcDlkj4MTANOAt5WIfuNwPmSfkiaKeACUr8KpD6fLcB5kr7KK3OkzQO2Aq8qOc7bSP1Eb8Ajysz6bN8992CMFx+zndSX+2AmSrpQ0nWSxudt0/OIraI+RuqzeYHUIvpoRCyQdLSk0tFo1wO3k1bPfBS4I28jIjaShjCfDqwEzgJmRsTGiNgcEc91P4CXgK359ZY+lNPM6F58zFPG2M4p1IKRdBTwI+Ap0mitLwHLgeNJo8Mq3jBZLreEZlbY/lNeGZlG7mv5ZH5UOs6DwClzDwkAAA2qSURBVFEF3q8T2L9I2cysskMnjeXWh5YSEWx/ZdqsZ0VbMFcCV+d7XzaUbL8LmN7vpTKz3UbHpDGsXr+ZZavWN7ooNsAUDTBHke6YL/cs6a58MxukukeSPf6sO/qtb4oGmHWkaVnKHUrqTzGzQcqLj9nOKhpg5gKfkTQyv4487HcW8P0alMvMdhNefMx2Vl/mItubNNR3FHAP8FvSHfSfqk3RzGx30eGRZLYTit4H8zLwdknHku4pGQI8EBF397ynmQ0Gh04aw0+8+Jj1UdG5yACIiHmkGxoBkDQF+FJEvL+/C2Zmu4/uxceeXN61bZ0Ys97s6leRcaQp+M1sEOsOKo8/68tkVpzbumbWKy8+ZjvDAcbMeuXFx2xnOMCYWSEeSWZ91WMnv6TbetnfvX1mTeLQSWOZ+9AyVq3bxJ4tPS8+Zga9jyJbUSD9qX4qi5ntxrqnjFn0/Gre1L53g0tjA0GPASYi/rxeBTGz3VtHyZxkDjBWhPtgzKwQLz5mfeUAY2aFePEx6ysHGDMrrGPSGBY+v5q0JqBZzxxgzKywQyeN9eJjVpgDjJkV1j2SzDdcWhEOMGZWWPfiY495TjIrwAHGzArz4mPWFw4wZtYnnjLGinKAMbM+6Zg0hide7GLj5q2NLort5hxgzKxPDi1ZfMysJw4wZtYn3YuP+TKZ9cYBxsz6pHvxMY8ks944wJhZn3jxMSvKAcbM+swjyawIBxgz67OOSWNYtmo9q9ZtanRRbDdW1wAjaW9Jt0haI2mxpFOr5JOkWZJW5McsSSpJnybpfklr889pJWkXSXpU0mpJT0m6qB7nZtZMDssd/YuedyvGqqt3C+ZaYCMwETgNuE7S1Ar5zgZmAkcAhwMnAucASBoBzAVuAvYCbgDm5u0AAk7PaX8EnCvpz2p1QmbNaNviY75MZj2oW4CR1AqcAlwaEV0RcQ9wG/ChCtnPAK6KiCURsRS4Cjgzp80grcT55YjYEBHXkILKsQAR8cWIeCAiNkfEQlIwml7DUzNrOtsWH3vWHf1WXY9LJvezQ4DNEbGoZNvDwDEV8k7NaaX5ppakPRLbL0jxSN5+Z+lB8mW1o4HrKxVI0tmk1hITJkygs7Oz6Lk0ja6uLtdLBa4X2LdlK79auITOzhWA66SaZq6XegaY0UD5151VwJgqeVeV5RudA0Z5Wk/HuYzUSvtGpQJFxGxgNkBHR0fMmDGjxxNoRp2dnbheduR6gbtXzmfuQ8s45phjkOQ6qaKZ66WefTBdwNiybWOBShdxy/OOBbpyq6XQcSSdS+qL+eOI2LAL5TazCjq8+Jj1op4BZhEwTNLBJduOABZUyLsgp1XKtwA4vHRUGWkgwLbjSDoLuBg4LiKW9EPZzayMFx+z3tQtwETEGuBm4HJJrZKmAycB36yQ/UbgfEmTJe0HXADMyWmdwBbgPEkjc0sFYB6ApNOAvweOj4gna3U+Zs3OI8msN/UepvwxoAV4Afg28NGIWCDpaEmlU7NeD9wOzAceBe7I24iIjaQhzKcDK4GzgJl5O8DfAW3AfZK68uOrtT81s+bSvfjY456TzKqoZyc/EfESKTiUb/8pqfO++3UAn8yPSsd5EDiqStoB/VJYM+uVp4yxnniqGDPbaV58zHriAGNmO82Lj1lPHGDMbKd1bBtJ5stktiMHGDPbaQeOH83wofJIMqvIAcbMdtqIYd2LjznA2I4cYMxsl3RMGuNJL60iBxgz2yXdi4+t2RS9Z7am4gBjZruke8qYpV0eqmzbc4Axs13SkVe3XLLaAca25wBjZrtkv7z4mAOMlavrVDFmNvjMfWgZGzZtZd4zW5n+hXlcdEIHM4+c3Ot+tz64lC/dtZBlK9ex37iWQbvf0pXrmPyLwVsvIyYdVHHaLnCAMbNdcOuDS7nk5vls3JJaL0tXruOSm+cD9Pgh1b3fuk1bvN8g2K8abb/ycPPq6OiIhQsXNroYu51mXo2vJ66XZPoX5rF05bodto8cNoS3HNhWdb9fPrmCDRXmL/N+A2+/Z2/4BBue/V9VyucWjJnttGUVggvAhs1beXndpqr7VfpQ834Df79yDjBmttP2G9dSsQUzeVwLt/7l9Kr7VWv5eL+BvV85jyIzs5120QkdtAwfut22luFDueiEDu/XZPtV4haMme207o7gbaOlCo5CKt2vL6OXBuJ+g71enu0hnzv5M3fyV+bO7MpcLztynVQ22OtF0v0R8cZKab5EZmZmNeEAY2ZmNeEAY2ZmNeEAY2ZmNeEAY2ZmNeEAY2ZmNeEAY2ZmNeEAY2ZmNeEAY2ZmNeEAY2ZmNeEAY2ZmNeEAY2ZmNVHXACNpb0m3SFojabGkU6vkk6RZklbkxyxJKkmfJul+SWvzz2lF9zUzs/qodwvmWmAjMBE4DbhO0tQK+c4GZgJHAIcDJwLnAEgaAcwFbgL2Am4A5ubtPe5rZmb1U7cAI6kVOAW4NCK6IuIe4DbgQxWynwFcFRFLImIpcBVwZk6bQVrH5ssRsSEirgEEHFtgXzMzq5N6Ljh2CLA5IhaVbHsYOKZC3qk5rTTf1JK0R2L7hWweydvv7GXf7Ug6m9TiAdgg6dFip9JUxgPLG12I3ZDrZUeuk8oGe71MqZZQzwAzGni5bNsqYEyVvKvK8o3OfSnlaeXHqbpvWVAiImYDswEk/braojnNzPVSmetlR66Typq5XurZB9MFjC3bNhZYXSDvWKArB4jejtPTvmZmVif1DDCLgGGSDi7ZdgSwoELeBTmtUr4FwOFlI8MOL0uvtq+ZmdVJ3QJMRKwBbgYul9QqaTpwEvDNCtlvBM6XNFnSfsAFwJyc1glsAc6TNFLSuXn7vAL79mR238+qKbheKnO97Mh1UlnT1ovqeeVI0t7A14HjgRXAxRHxb5KOBv4zIkbnfAJmAR/Ou34N+Ovuy1ySjszbXgs8BvxFRDxYZF8zM6uPugYYMzNrHp4qxszMasIBxszMaqLpA0zR+dGajaROSesldeXHwkaXqd4knSvp15I2SJpTlnacpMfzfHg/llT1ZrPBplq9SGqXFCV/M12SLm1gUesqDzr61/w5slrSQ5LeXZLedH8zTR9gKD4/WjM6NyJG50dHowvTAMuAvyMNTNlG0njSiMhLgb2BXwPfqXvpGqdivZQYV/J387k6lqvRhgHPkGYn2RP4FPDdHHib8m+mnnfy73ZK5kd7XUR0AfdI6p4f7eKGFs4aLiJuBpD0RmD/kqSTgQUR8R85/TJguaRDI+Lxuhe0znqol6aWb8W4rGTTDyQ9BRwFtNGEfzPN3oKpNj+aWzDJFZKWS/qZpBmNLsxuZLv57vIHyxP476bbYklLJH0jf3NvSpImkj5jFtCkfzPNHmD6Mj9as/lr4EBgMulGsdslvaaxRdpt9DYfXrNaDryJNPnhUaT6+FZDS9QgkoaTzv2G3EJpyr+ZZg8wfZkfralExC8jYnVeEuEG4GfAexpdrt2E/24qyMtw/DoiNkfE88C5wLskDeoP0XKShpBmKNlIqgNo0r+ZZg8wfZkfrdkFad0dK5vvLvflvQb/3ZTrvou7aT5n8kwi/0oaNHRKRGzKSU35N9M0v/hK+jg/WtOQNE7SCZL2kDRM0mnAH5LW22ka+dz3AIYCQ7vrA7gFeJ2kU3L6p0lrFA3aztpS1epF0lskdUgaIqkNuAbojIjyS0OD2XXAYcCJEbGuZHtz/s1ERFM/SEMGbwXWAL8DTm10mRr9ACYA95Ga7yuBXwDHN7pcDaiHy0jfwksfl+W0dwKPA+tIE7C2N7q8ja4X4APAU/l/6VnSxLOTGl3eOtbLlFwX60mXxLofpzXr34znIjMzs5po6ktkZmZWOw4wZmZWEw4wZmZWEw4wZmZWEw4wZmZWEw4wZmZWEw4wZoNUXpvlfY0uhzUvBxizGpA0J3/Alz9+0eiymdVLU68HY1Zjd5PWFiq1sREFMWsEt2DMamdDRDxX9ngJtl2+OlfSHXkJ3cWSPli6s6TXS7pb0jpJL+VW0Z5lec6QND8vX/y8pBvKyrC3pP/IS4I/Wf4eZrXkAGPWOJ8FbgOmkdbcuTGvEtk92+5dpLms3gy8F3gbJcsUSzoHuB74BnA4aTmFR8ve49PAXNJMvt8Bvi7p1bU7JbNXeC4ysxqQNAf4IGniw1LXRsRfSwrgaxHxkZJ97gaei4gPSvoIcCWwf0SszukzgB8DB0fEbyUtAW6KiIrLe+f3+EJEXJJfDyMtsHd2RNzUj6drVpH7YMxq5yfA2WXbVpY8v7cs7V7gj/Pzw0jTuZcuSPVzYCvwWkkvk1Yb/VEvZXik+0lEbJb0IrBPseKb7RoHGLPaWRsRv63Bcfty2WFT2evAl8atTvyHZtY4f1Dh9WP5+WPA68uWG34b6X/2sYh4AVgKHFfzUprtJLdgzGpnpKRJZdu2RMSL+fnJku4jLT71PlKweEtO+xZpEMCNkj4N7EXq0L+5pFX0eeAfJT0P3AGMAo6LiKtqdUJmfeEAY1Y77ySt7FhqKbB/fn4ZcAppaeEXgT+PiPsAImKtpBOALwO/Ig0WmAt8vPtAEXGdpI3ABcAs4CXgh7U6GbO+8igyswbII7z+T0R8r9FlMasV98GYmVlNOMCYmVlN+BKZmZnVhFswZmZWEw4wZmZWEw4wZmZWEw4wZmZWEw4wZmZWE/8/2PX1htKmypUAAAAASUVORK5CYII=\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], - "source": [ - "plt.plot(history.epoch, [piecewise_constant_fn(epoch) for epoch in history.epoch], \"o-\")\n", - "plt.axis([0, n_epochs - 1, 0, 0.011])\n", - "plt.xlabel(\"Epoch\")\n", - "plt.ylabel(\"Learning Rate\")\n", - "plt.title(\"Piecewise Constant Scheduling\", fontsize=14)\n", - "plt.grid(True)\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 성능 기반 스케줄링" - ] - }, - { - "cell_type": "code", - "execution_count": 91, - "metadata": {}, - "outputs": [], - "source": [ - "tf.random.set_seed(42)\n", - "np.random.seed(42)" - ] - }, - { - "cell_type": "code", - "execution_count": 92, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch 1/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.5882 - accuracy: 0.8079 - val_loss: 0.4696 - val_accuracy: 0.8538\n", - "Epoch 2/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.4975 - accuracy: 0.8391 - val_loss: 0.5668 - val_accuracy: 0.8406\n", - "Epoch 3/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.5165 - accuracy: 0.8419 - val_loss: 0.5295 - val_accuracy: 0.8496\n", - "Epoch 4/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.5144 - accuracy: 0.8454 - val_loss: 0.5411 - val_accuracy: 0.8480\n", - "Epoch 5/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.5096 - accuracy: 0.8496 - val_loss: 0.4733 - val_accuracy: 0.8490\n", - "Epoch 6/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.5276 - accuracy: 0.8515 - val_loss: 0.7935 - val_accuracy: 0.8416\n", - "Epoch 7/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.3111 - accuracy: 0.8925 - val_loss: 0.4018 - val_accuracy: 0.8694\n", - "Epoch 8/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.2627 - accuracy: 0.9045 - val_loss: 0.4398 - val_accuracy: 0.8712\n", - "Epoch 9/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.2391 - accuracy: 0.9116 - val_loss: 0.3970 - val_accuracy: 0.8890\n", - "Epoch 10/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.2252 - accuracy: 0.9175 - val_loss: 0.4090 - val_accuracy: 0.8892\n", - "Epoch 11/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.2109 - accuracy: 0.9216 - val_loss: 0.4515 - val_accuracy: 0.8844\n", - "Epoch 12/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.2034 - accuracy: 0.9247 - val_loss: 0.4781 - val_accuracy: 0.8808\n", - "Epoch 13/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.1892 - accuracy: 0.9294 - val_loss: 0.4578 - val_accuracy: 0.8904\n", - "Epoch 14/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.1850 - accuracy: 0.9307 - val_loss: 0.4853 - val_accuracy: 0.8808\n", - "Epoch 15/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.1252 - accuracy: 0.9504 - val_loss: 0.4423 - val_accuracy: 0.8902\n", - "Epoch 16/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.1088 - accuracy: 0.9579 - val_loss: 0.4663 - val_accuracy: 0.8946\n", - "Epoch 17/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.0992 - accuracy: 0.9620 - val_loss: 0.4872 - val_accuracy: 0.8932\n", - "Epoch 18/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.0938 - accuracy: 0.9641 - val_loss: 0.5198 - val_accuracy: 0.8862\n", - "Epoch 19/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.0883 - accuracy: 0.9660 - val_loss: 0.5091 - val_accuracy: 0.8922\n", - "Epoch 20/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.0668 - accuracy: 0.9752 - val_loss: 0.5149 - val_accuracy: 0.8946\n", - "Epoch 21/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.0608 - accuracy: 0.9779 - val_loss: 0.5289 - val_accuracy: 0.8946\n", - "Epoch 22/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.0575 - accuracy: 0.9786 - val_loss: 0.5383 - val_accuracy: 0.8928\n", - "Epoch 23/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.0538 - accuracy: 0.9801 - val_loss: 0.5474 - val_accuracy: 0.8928\n", - "Epoch 24/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.0506 - accuracy: 0.9818 - val_loss: 0.5690 - val_accuracy: 0.8908\n", - "Epoch 25/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.0425 - accuracy: 0.9858 - val_loss: 0.5667 - val_accuracy: 0.8904\n" - ] - } - ], - "source": [ - "lr_scheduler = keras.callbacks.ReduceLROnPlateau(factor=0.5, patience=5)\n", - "\n", - "model = keras.models.Sequential([\n", - " keras.layers.Flatten(input_shape=[28, 28]),\n", - " keras.layers.Dense(300, activation=\"selu\", kernel_initializer=\"lecun_normal\"),\n", - " keras.layers.Dense(100, activation=\"selu\", kernel_initializer=\"lecun_normal\"),\n", - " keras.layers.Dense(10, activation=\"softmax\")\n", - "])\n", - "optimizer = keras.optimizers.SGD(lr=0.02, momentum=0.9)\n", - "model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=optimizer, metrics=[\"accuracy\"])\n", - "n_epochs = 25\n", - "history = model.fit(X_train_scaled, y_train, epochs=n_epochs,\n", - " validation_data=(X_valid_scaled, y_valid),\n", - " callbacks=[lr_scheduler])" - ] - }, - { - "cell_type": "code", - "execution_count": 93, - "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], - "source": [ - "plt.plot(history.epoch, history.history[\"lr\"], \"bo-\")\n", - "plt.xlabel(\"Epoch\")\n", - "plt.ylabel(\"Learning Rate\", color='b')\n", - "plt.tick_params('y', colors='b')\n", - "plt.gca().set_xlim(0, n_epochs - 1)\n", - "plt.grid(True)\n", - "\n", - "ax2 = plt.gca().twinx()\n", - "ax2.plot(history.epoch, history.history[\"val_loss\"], \"r^-\")\n", - "ax2.set_ylabel('Validation Loss', color='r')\n", - "ax2.tick_params('y', colors='r')\n", - "\n", - "plt.title(\"Reduce LR on Plateau\", fontsize=14)\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### tf.keras 스케줄러" - ] - }, - { - "cell_type": "code", - "execution_count": 94, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch 1/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.4894 - accuracy: 0.8277 - val_loss: 0.4096 - val_accuracy: 0.8592\n", - "Epoch 2/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.3820 - accuracy: 0.8650 - val_loss: 0.3742 - val_accuracy: 0.8700\n", - "Epoch 3/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.3487 - accuracy: 0.8767 - val_loss: 0.3736 - val_accuracy: 0.8686\n", - "Epoch 4/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.3265 - accuracy: 0.8838 - val_loss: 0.3496 - val_accuracy: 0.8798\n", - "Epoch 5/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.3105 - accuracy: 0.8899 - val_loss: 0.3434 - val_accuracy: 0.8800\n", - "Epoch 6/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.2959 - accuracy: 0.8950 - val_loss: 0.3415 - val_accuracy: 0.8808\n", - "Epoch 7/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.2855 - accuracy: 0.8987 - val_loss: 0.3354 - val_accuracy: 0.8818\n", - "Epoch 8/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.2761 - accuracy: 0.9016 - val_loss: 0.3366 - val_accuracy: 0.8810\n", - "Epoch 9/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.2678 - accuracy: 0.9053 - val_loss: 0.3265 - val_accuracy: 0.8852\n", - "Epoch 10/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.2608 - accuracy: 0.9069 - val_loss: 0.3240 - val_accuracy: 0.8848\n", - "Epoch 11/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.2551 - accuracy: 0.9088 - val_loss: 0.3251 - val_accuracy: 0.8868\n", - "Epoch 12/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.2497 - accuracy: 0.9126 - val_loss: 0.3302 - val_accuracy: 0.8810\n", - "Epoch 13/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.2449 - accuracy: 0.9136 - val_loss: 0.3218 - val_accuracy: 0.8872\n", - "Epoch 14/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.2415 - accuracy: 0.9147 - val_loss: 0.3222 - val_accuracy: 0.8860\n", - "Epoch 15/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.2375 - accuracy: 0.9167 - val_loss: 0.3208 - val_accuracy: 0.8876\n", - "Epoch 16/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.2343 - accuracy: 0.9179 - val_loss: 0.3185 - val_accuracy: 0.8882\n", - "Epoch 17/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.2317 - accuracy: 0.9186 - val_loss: 0.3198 - val_accuracy: 0.8890\n", - "Epoch 18/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.2291 - accuracy: 0.9199 - val_loss: 0.3169 - val_accuracy: 0.8904\n", - "Epoch 19/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.2269 - accuracy: 0.9206 - val_loss: 0.3197 - val_accuracy: 0.8888\n", - "Epoch 20/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.2250 - accuracy: 0.9220 - val_loss: 0.3169 - val_accuracy: 0.8902\n", - "Epoch 21/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.2229 - accuracy: 0.9224 - val_loss: 0.3180 - val_accuracy: 0.8904\n", - "Epoch 22/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.2216 - accuracy: 0.9225 - val_loss: 0.3163 - val_accuracy: 0.8912\n", - "Epoch 23/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.2201 - accuracy: 0.9233 - val_loss: 0.3171 - val_accuracy: 0.8906\n", - "Epoch 24/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.2188 - accuracy: 0.9243 - val_loss: 0.3166 - val_accuracy: 0.8908\n", - "Epoch 25/25\n", - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.2179 - accuracy: 0.9243 - val_loss: 0.3165 - val_accuracy: 0.8904\n" - ] - } - ], - "source": [ - "model = keras.models.Sequential([\n", - " keras.layers.Flatten(input_shape=[28, 28]),\n", - " keras.layers.Dense(300, activation=\"selu\", kernel_initializer=\"lecun_normal\"),\n", - " keras.layers.Dense(100, activation=\"selu\", kernel_initializer=\"lecun_normal\"),\n", - " keras.layers.Dense(10, activation=\"softmax\")\n", - "])\n", - "s = 20 * len(X_train) // 32 # number of steps in 20 epochs (batch size = 32)\n", - "learning_rate = keras.optimizers.schedules.ExponentialDecay(0.01, s, 0.1)\n", - "optimizer = keras.optimizers.SGD(learning_rate)\n", - "model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=optimizer, metrics=[\"accuracy\"])\n", - "n_epochs = 25\n", - "history = model.fit(X_train_scaled, y_train, epochs=n_epochs,\n", - " validation_data=(X_valid_scaled, y_valid))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "구간별 고정 스케줄링은 다음을 사용하세요:" - ] - }, - { - "cell_type": "code", - "execution_count": 95, - "metadata": {}, - "outputs": [], - "source": [ - "learning_rate = keras.optimizers.schedules.PiecewiseConstantDecay(\n", - " boundaries=[5. * n_steps_per_epoch, 15. * n_steps_per_epoch],\n", - " values=[0.01, 0.005, 0.001])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 1사이클 스케줄링" - ] - }, - { - "cell_type": "code", - "execution_count": 96, - "metadata": {}, - "outputs": [], - "source": [ - "K = keras.backend\n", - "\n", - "class ExponentialLearningRate(keras.callbacks.Callback):\n", - " def __init__(self, factor):\n", - " self.factor = factor\n", - " self.rates = []\n", - " self.losses = []\n", - " def on_batch_end(self, batch, logs):\n", - " self.rates.append(K.get_value(self.model.optimizer.lr))\n", - " self.losses.append(logs[\"loss\"])\n", - " K.set_value(self.model.optimizer.lr, self.model.optimizer.lr * self.factor)\n", - "\n", - "def find_learning_rate(model, X, y, epochs=1, batch_size=32, min_rate=10**-5, max_rate=10):\n", - " init_weights = model.get_weights()\n", - " iterations = len(X) // batch_size * epochs\n", - " factor = np.exp(np.log(max_rate / min_rate) / iterations)\n", - " init_lr = K.get_value(model.optimizer.lr)\n", - " K.set_value(model.optimizer.lr, min_rate)\n", - " exp_lr = ExponentialLearningRate(factor)\n", - " history = model.fit(X, y, epochs=epochs, batch_size=batch_size,\n", - " callbacks=[exp_lr])\n", - " K.set_value(model.optimizer.lr, init_lr)\n", - " model.set_weights(init_weights)\n", - " return exp_lr.rates, exp_lr.losses\n", - "\n", - "def plot_lr_vs_loss(rates, losses):\n", - " plt.plot(rates, losses)\n", - " plt.gca().set_xscale('log')\n", - " plt.hlines(min(losses), min(rates), max(rates))\n", - " plt.axis([min(rates), max(rates), min(losses), (losses[0] + min(losses)) / 2])\n", - " plt.xlabel(\"Learning rate\")\n", - " plt.ylabel(\"Loss\")" - ] - }, - { - "cell_type": "code", - "execution_count": 97, - "metadata": {}, - "outputs": [], - "source": [ - "tf.random.set_seed(42)\n", - "np.random.seed(42)\n", - "\n", - "model = keras.models.Sequential([\n", - " keras.layers.Flatten(input_shape=[28, 28]),\n", - " keras.layers.Dense(300, activation=\"selu\", kernel_initializer=\"lecun_normal\"),\n", - " keras.layers.Dense(100, activation=\"selu\", kernel_initializer=\"lecun_normal\"),\n", - " keras.layers.Dense(10, activation=\"softmax\")\n", - "])\n", - "model.compile(loss=\"sparse_categorical_crossentropy\",\n", - " optimizer=keras.optimizers.SGD(lr=1e-3),\n", - " metrics=[\"accuracy\"])" - ] - }, - { - "cell_type": "code", - "execution_count": 98, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "430/430 [==============================] - 2s 4ms/step - loss: nan - accuracy: 0.3859 \n" - ] - }, - { - "data": { - "image/png": "\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], - "source": [ - "batch_size = 128\n", - "rates, losses = find_learning_rate(model, X_train_scaled, y_train, epochs=1, batch_size=batch_size)\n", - "plot_lr_vs_loss(rates, losses)" - ] - }, - { - "cell_type": "code", - "execution_count": 99, - "metadata": {}, - "outputs": [], - "source": [ - "class OneCycleScheduler(keras.callbacks.Callback):\n", - " def __init__(self, iterations, max_rate, start_rate=None,\n", - " last_iterations=None, last_rate=None):\n", - " self.iterations = iterations\n", - " self.max_rate = max_rate\n", - " self.start_rate = start_rate or max_rate / 10\n", - " self.last_iterations = last_iterations or iterations // 10 + 1\n", - " self.half_iteration = (iterations - self.last_iterations) // 2\n", - " self.last_rate = last_rate or self.start_rate / 1000\n", - " self.iteration = 0\n", - " def _interpolate(self, iter1, iter2, rate1, rate2):\n", - " return ((rate2 - rate1) * (self.iteration - iter1)\n", - " / (iter2 - iter1) + rate1)\n", - " def on_batch_begin(self, batch, logs):\n", - " if self.iteration < self.half_iteration:\n", - " rate = self._interpolate(0, self.half_iteration, self.start_rate, self.max_rate)\n", - " elif self.iteration < 2 * self.half_iteration:\n", - " rate = self._interpolate(self.half_iteration, 2 * self.half_iteration,\n", - " self.max_rate, self.start_rate)\n", - " else:\n", - " rate = self._interpolate(2 * self.half_iteration, self.iterations,\n", - " self.start_rate, self.last_rate)\n", - " rate = max(rate, self.last_rate)\n", - " self.iteration += 1\n", - " K.set_value(self.model.optimizer.lr, rate)" - ] - }, - { - "cell_type": "code", - "execution_count": 100, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch 1/25\n", - "430/430 [==============================] - 2s 4ms/step - loss: 0.6572 - accuracy: 0.7740 - val_loss: 0.4872 - val_accuracy: 0.8338\n", - "Epoch 2/25\n", - "430/430 [==============================] - 1s 3ms/step - loss: 0.4581 - accuracy: 0.8397 - val_loss: 0.4274 - val_accuracy: 0.8524\n", - "Epoch 3/25\n", - "430/430 [==============================] - 1s 3ms/step - loss: 0.4121 - accuracy: 0.8545 - val_loss: 0.4116 - val_accuracy: 0.8588\n", - "Epoch 4/25\n", - "430/430 [==============================] - 1s 3ms/step - loss: 0.3837 - accuracy: 0.8641 - val_loss: 0.3870 - val_accuracy: 0.8686\n", - "Epoch 5/25\n", - "430/430 [==============================] - 1s 3ms/step - loss: 0.3639 - accuracy: 0.8717 - val_loss: 0.3765 - val_accuracy: 0.8676\n", - "Epoch 6/25\n", - "430/430 [==============================] - 1s 3ms/step - loss: 0.3457 - accuracy: 0.8774 - val_loss: 0.3742 - val_accuracy: 0.8708\n", - "Epoch 7/25\n", - "430/430 [==============================] - 1s 3ms/step - loss: 0.3330 - accuracy: 0.8811 - val_loss: 0.3634 - val_accuracy: 0.8704\n", - "Epoch 8/25\n", - "430/430 [==============================] - 1s 3ms/step - loss: 0.3185 - accuracy: 0.8862 - val_loss: 0.3958 - val_accuracy: 0.8608\n", - "Epoch 9/25\n", - "430/430 [==============================] - 1s 3ms/step - loss: 0.3065 - accuracy: 0.8893 - val_loss: 0.3483 - val_accuracy: 0.8762\n", - "Epoch 10/25\n", - "430/430 [==============================] - 1s 3ms/step - loss: 0.2945 - accuracy: 0.8924 - val_loss: 0.3396 - val_accuracy: 0.8812\n", - "Epoch 11/25\n", - "430/430 [==============================] - 2s 4ms/step - loss: 0.2838 - accuracy: 0.8963 - val_loss: 0.3460 - val_accuracy: 0.8796\n", - "Epoch 12/25\n", - "430/430 [==============================] - 1s 3ms/step - loss: 0.2709 - accuracy: 0.9023 - val_loss: 0.3644 - val_accuracy: 0.8696\n", - "Epoch 13/25\n", - "430/430 [==============================] - 1s 3ms/step - loss: 0.2536 - accuracy: 0.9081 - val_loss: 0.3350 - val_accuracy: 0.8838\n", - "Epoch 14/25\n", - "430/430 [==============================] - 1s 3ms/step - loss: 0.2405 - accuracy: 0.9134 - val_loss: 0.3466 - val_accuracy: 0.8812\n", - "Epoch 15/25\n", - "430/430 [==============================] - 1s 3ms/step - loss: 0.2280 - accuracy: 0.9183 - val_loss: 0.3260 - val_accuracy: 0.8840\n", - "Epoch 16/25\n", - "430/430 [==============================] - 1s 3ms/step - loss: 0.2160 - accuracy: 0.9234 - val_loss: 0.3292 - val_accuracy: 0.8834\n", - "Epoch 17/25\n", - "430/430 [==============================] - 1s 3ms/step - loss: 0.2062 - accuracy: 0.9264 - val_loss: 0.3354 - val_accuracy: 0.8862\n", - "Epoch 18/25\n", - "430/430 [==============================] - 1s 3ms/step - loss: 0.1978 - accuracy: 0.9305 - val_loss: 0.3236 - val_accuracy: 0.8906\n", - "Epoch 19/25\n", - "430/430 [==============================] - 1s 3ms/step - loss: 0.1892 - accuracy: 0.9337 - val_loss: 0.3233 - val_accuracy: 0.8904\n", - "Epoch 20/25\n", - "430/430 [==============================] - 2s 4ms/step - loss: 0.1821 - accuracy: 0.9369 - val_loss: 0.3221 - val_accuracy: 0.8926\n", - "Epoch 21/25\n", - "430/430 [==============================] - 1s 3ms/step - loss: 0.1752 - accuracy: 0.9401 - val_loss: 0.3215 - val_accuracy: 0.8904\n", - "Epoch 22/25\n", - "430/430 [==============================] - 1s 3ms/step - loss: 0.1701 - accuracy: 0.9418 - val_loss: 0.3180 - val_accuracy: 0.8956\n", - "Epoch 23/25\n", - "430/430 [==============================] - 1s 3ms/step - loss: 0.1655 - accuracy: 0.9438 - val_loss: 0.3186 - val_accuracy: 0.8942\n", - "Epoch 24/25\n", - "430/430 [==============================] - 2s 4ms/step - loss: 0.1628 - accuracy: 0.9458 - val_loss: 0.3176 - val_accuracy: 0.8924\n", - "Epoch 25/25\n", - "430/430 [==============================] - 1s 3ms/step - loss: 0.1611 - accuracy: 0.9460 - val_loss: 0.3169 - val_accuracy: 0.8930\n" - ] - } - ], - "source": [ - "n_epochs = 25\n", - "onecycle = OneCycleScheduler(len(X_train) // batch_size * n_epochs, max_rate=0.05)\n", - "history = model.fit(X_train_scaled, y_train, epochs=n_epochs, batch_size=batch_size,\n", - " validation_data=(X_valid_scaled, y_valid),\n", - " callbacks=[onecycle])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# 규제를 사용해 과대적합 피하기" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## $\\ell_1$과 $\\ell_2$ 규제" - ] - }, - { - "cell_type": "code", - "execution_count": 101, - "metadata": {}, - "outputs": [], - "source": [ - "layer = keras.layers.Dense(100, activation=\"elu\",\n", - " kernel_initializer=\"he_normal\",\n", - " kernel_regularizer=keras.regularizers.l2(0.01))\n", - "# or l1(0.1) for ℓ1 regularization with a factor or 0.1\n", - "# or l1_l2(0.1, 0.01) for both ℓ1 and ℓ2 regularization, with factors 0.1 and 0.01 respectively" - ] - }, - { - "cell_type": "code", - "execution_count": 102, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch 1/2\n", - "1719/1719 [==============================] - 8s 5ms/step - loss: 1.5956 - accuracy: 0.8124 - val_loss: 0.7169 - val_accuracy: 0.8340\n", - "Epoch 2/2\n", - "1719/1719 [==============================] - 8s 5ms/step - loss: 0.7197 - accuracy: 0.8274 - val_loss: 0.6850 - val_accuracy: 0.8376\n" - ] - } - ], - "source": [ - "model = keras.models.Sequential([\n", - " keras.layers.Flatten(input_shape=[28, 28]),\n", - " keras.layers.Dense(300, activation=\"elu\",\n", - " kernel_initializer=\"he_normal\",\n", - " kernel_regularizer=keras.regularizers.l2(0.01)),\n", - " keras.layers.Dense(100, activation=\"elu\",\n", - " kernel_initializer=\"he_normal\",\n", - " kernel_regularizer=keras.regularizers.l2(0.01)),\n", - " keras.layers.Dense(10, activation=\"softmax\",\n", - " kernel_regularizer=keras.regularizers.l2(0.01))\n", - "])\n", - "model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=\"nadam\", metrics=[\"accuracy\"])\n", - "n_epochs = 2\n", - "history = model.fit(X_train_scaled, y_train, epochs=n_epochs,\n", - " validation_data=(X_valid_scaled, y_valid))" - ] - }, - { - "cell_type": "code", - "execution_count": 103, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch 1/2\n", - "1719/1719 [==============================] - 8s 5ms/step - loss: 1.6313 - accuracy: 0.8113 - val_loss: 0.7218 - val_accuracy: 0.8310\n", - "Epoch 2/2\n", - "1719/1719 [==============================] - 8s 5ms/step - loss: 0.7187 - accuracy: 0.8273 - val_loss: 0.6826 - val_accuracy: 0.8382\n" - ] - } - ], - "source": [ - "from functools import partial\n", - "\n", - "RegularizedDense = partial(keras.layers.Dense,\n", - " activation=\"elu\",\n", - " kernel_initializer=\"he_normal\",\n", - " kernel_regularizer=keras.regularizers.l2(0.01))\n", - "\n", - "model = keras.models.Sequential([\n", - " keras.layers.Flatten(input_shape=[28, 28]),\n", - " RegularizedDense(300),\n", - " RegularizedDense(100),\n", - " RegularizedDense(10, activation=\"softmax\")\n", - "])\n", - "model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=\"nadam\", metrics=[\"accuracy\"])\n", - "n_epochs = 2\n", - "history = model.fit(X_train_scaled, y_train, epochs=n_epochs,\n", - " validation_data=(X_valid_scaled, y_valid))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 드롭아웃" - ] - }, - { - "cell_type": "code", - "execution_count": 104, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch 1/2\n", - "1719/1719 [==============================] - 8s 5ms/step - loss: 0.5838 - accuracy: 0.7998 - val_loss: 0.3730 - val_accuracy: 0.8644\n", - "Epoch 2/2\n", - "1719/1719 [==============================] - 8s 5ms/step - loss: 0.4209 - accuracy: 0.8443 - val_loss: 0.3406 - val_accuracy: 0.8724\n" - ] - } - ], - "source": [ - "model = keras.models.Sequential([\n", - " keras.layers.Flatten(input_shape=[28, 28]),\n", - " keras.layers.Dropout(rate=0.2),\n", - " keras.layers.Dense(300, activation=\"elu\", kernel_initializer=\"he_normal\"),\n", - " keras.layers.Dropout(rate=0.2),\n", - " keras.layers.Dense(100, activation=\"elu\", kernel_initializer=\"he_normal\"),\n", - " keras.layers.Dropout(rate=0.2),\n", - " keras.layers.Dense(10, activation=\"softmax\")\n", - "])\n", - "model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=\"nadam\", metrics=[\"accuracy\"])\n", - "n_epochs = 2\n", - "history = model.fit(X_train_scaled, y_train, epochs=n_epochs,\n", - " validation_data=(X_valid_scaled, y_valid))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 알파 드롭아웃" - ] - }, - { - "cell_type": "code", - "execution_count": 105, - "metadata": {}, - "outputs": [], - "source": [ - "tf.random.set_seed(42)\n", - "np.random.seed(42)" - ] - }, - { - "cell_type": "code", - "execution_count": 106, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch 1/20\n", - "1719/1719 [==============================] - 6s 3ms/step - loss: 0.6654 - accuracy: 0.7595 - val_loss: 0.5929 - val_accuracy: 0.8406\n", - "Epoch 2/20\n", - "1719/1719 [==============================] - 6s 3ms/step - loss: 0.5605 - accuracy: 0.7933 - val_loss: 0.5605 - val_accuracy: 0.8400\n", - "Epoch 3/20\n", - "1719/1719 [==============================] - 6s 3ms/step - loss: 0.5283 - accuracy: 0.8050 - val_loss: 0.4858 - val_accuracy: 0.8596\n", - "Epoch 4/20\n", - "1719/1719 [==============================] - 6s 3ms/step - loss: 0.5072 - accuracy: 0.8125 - val_loss: 0.4629 - val_accuracy: 0.8582\n", - "Epoch 5/20\n", - "1719/1719 [==============================] - 6s 3ms/step - loss: 0.4915 - accuracy: 0.8186 - val_loss: 0.4698 - val_accuracy: 0.8552\n", - "Epoch 6/20\n", - "1719/1719 [==============================] - 6s 3ms/step - loss: 0.4866 - accuracy: 0.8186 - val_loss: 0.4810 - val_accuracy: 0.8612\n", - "Epoch 7/20\n", - "1719/1719 [==============================] - 6s 3ms/step - loss: 0.4724 - accuracy: 0.8251 - val_loss: 0.4792 - val_accuracy: 0.8650\n", - "Epoch 8/20\n", - "1719/1719 [==============================] - 6s 3ms/step - loss: 0.4634 - accuracy: 0.8287 - val_loss: 0.4587 - val_accuracy: 0.8628\n", - "Epoch 9/20\n", - "1719/1719 [==============================] - 6s 3ms/step - loss: 0.4578 - accuracy: 0.8307 - val_loss: 0.4105 - val_accuracy: 0.8718\n", - "Epoch 10/20\n", - "1719/1719 [==============================] - 6s 3ms/step - loss: 0.4531 - accuracy: 0.8333 - val_loss: 0.4714 - val_accuracy: 0.8648\n", - "Epoch 11/20\n", - "1719/1719 [==============================] - 6s 3ms/step - loss: 0.4484 - accuracy: 0.8326 - val_loss: 0.4181 - val_accuracy: 0.8674\n", - "Epoch 12/20\n", - "1719/1719 [==============================] - 6s 3ms/step - loss: 0.4468 - accuracy: 0.8330 - val_loss: 0.5498 - val_accuracy: 0.8500\n", - "Epoch 13/20\n", - "1719/1719 [==============================] - 6s 3ms/step - loss: 0.4421 - accuracy: 0.8363 - val_loss: 0.4546 - val_accuracy: 0.8630\n", - "Epoch 14/20\n", - "1719/1719 [==============================] - 6s 3ms/step - loss: 0.4314 - accuracy: 0.8396 - val_loss: 0.4554 - val_accuracy: 0.8676\n", - "Epoch 15/20\n", - "1719/1719 [==============================] - 6s 3ms/step - loss: 0.4341 - accuracy: 0.8367 - val_loss: 0.4458 - val_accuracy: 0.8662\n", - "Epoch 16/20\n", - "1719/1719 [==============================] - 6s 3ms/step - loss: 0.4312 - accuracy: 0.8412 - val_loss: 0.4325 - val_accuracy: 0.8750\n", - "Epoch 17/20\n", - "1719/1719 [==============================] - 6s 3ms/step - loss: 0.4261 - accuracy: 0.8419 - val_loss: 0.5457 - val_accuracy: 0.8554\n", - "Epoch 18/20\n", - "1719/1719 [==============================] - 6s 3ms/step - loss: 0.4258 - accuracy: 0.8417 - val_loss: 0.5050 - val_accuracy: 0.8700\n", - "Epoch 19/20\n", - "1719/1719 [==============================] - 6s 3ms/step - loss: 0.4208 - accuracy: 0.8437 - val_loss: 0.4899 - val_accuracy: 0.8696\n", - "Epoch 20/20\n", - "1719/1719 [==============================] - 6s 3ms/step - loss: 0.4234 - accuracy: 0.8414 - val_loss: 0.4014 - val_accuracy: 0.8782\n" - ] - } - ], - "source": [ - "model = keras.models.Sequential([\n", - " keras.layers.Flatten(input_shape=[28, 28]),\n", - " keras.layers.AlphaDropout(rate=0.2),\n", - " keras.layers.Dense(300, activation=\"selu\", kernel_initializer=\"lecun_normal\"),\n", - " keras.layers.AlphaDropout(rate=0.2),\n", - " keras.layers.Dense(100, activation=\"selu\", kernel_initializer=\"lecun_normal\"),\n", - " keras.layers.AlphaDropout(rate=0.2),\n", - " keras.layers.Dense(10, activation=\"softmax\")\n", - "])\n", - "optimizer = keras.optimizers.SGD(lr=0.01, momentum=0.9, nesterov=True)\n", - "model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=optimizer, metrics=[\"accuracy\"])\n", - "n_epochs = 20\n", - "history = model.fit(X_train_scaled, y_train, epochs=n_epochs,\n", - " validation_data=(X_valid_scaled, y_valid))" - ] - }, - { - "cell_type": "code", - "execution_count": 107, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "313/313 [==============================] - 1s 2ms/step - loss: 0.4338 - accuracy: 0.8697\n" - ] - }, - { - "data": { - "text/plain": [ - "[0.4337695240974426, 0.869700014591217]" - ] - }, - "execution_count": 107, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "model.evaluate(X_test_scaled, y_test)" - ] - }, - { - "cell_type": "code", - "execution_count": 108, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "1719/1719 [==============================] - 4s 2ms/step - loss: 0.3243 - accuracy: 0.8887\n" - ] - }, - { - "data": { - "text/plain": [ - "[0.32432350516319275, 0.8887272477149963]" - ] - }, - "execution_count": 108, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "model.evaluate(X_train_scaled, y_train)" - ] - }, - { - "cell_type": "code", - "execution_count": 109, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "1719/1719 [==============================] - 5s 3ms/step - loss: 0.4167 - accuracy: 0.8463\n" - ] - } - ], - "source": [ - "history = model.fit(X_train_scaled, y_train)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## MC 드롭아웃" - ] - }, - { - "cell_type": "code", - "execution_count": 110, - "metadata": {}, - "outputs": [], - "source": [ - "tf.random.set_seed(42)\n", - "np.random.seed(42)" - ] - }, - { - "cell_type": "code", - "execution_count": 111, - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "y_probas = np.stack([model(X_test_scaled, training=True)\n", - " for sample in range(100)])\n", - "y_proba = y_probas.mean(axis=0)\n", - "y_std = y_probas.std(axis=0)" - ] - }, - { - "cell_type": "code", - "execution_count": 112, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 1.]], dtype=float32)" - ] - }, - "execution_count": 112, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "np.round(model.predict(X_test_scaled[:1]), 2)" - ] - }, - { - "cell_type": "code", - "execution_count": 113, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([[[0. , 0. , 0. , 0. , 0. , 0.43, 0. , 0.18, 0. , 0.39]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.01, 0. , 0.75, 0. , 0.25]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.42, 0. , 0. , 0. , 0.58]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.01, 0. , 0.26, 0. , 0.72]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.12, 0. , 0.07, 0. , 0.8 ]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.3 , 0. , 0.7 ]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.36, 0. , 0.13, 0. , 0.51]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.34, 0. , 0.66]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.75, 0. , 0.02, 0. , 0.23]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.02, 0. , 0.04, 0. , 0.94]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.02, 0. , 0.03, 0. , 0.95]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.64, 0. , 0. , 0. , 0.36]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.68, 0. , 0.05, 0. , 0.28]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.31, 0. , 0.04, 0. , 0.65]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.05, 0. , 0.02, 0. , 0.93]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.42, 0. , 0.02, 0. , 0.57]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.02, 0. , 0. , 0. , 0.98]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.01, 0. , 0.03, 0. , 0.96]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.52, 0. , 0.48]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.28, 0. , 0.12, 0. , 0.6 ]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.42, 0. , 0.01, 0. , 0.56]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.96, 0. , 0.01, 0. , 0.03]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 1. ]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.04, 0. , 0.96]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.01, 0. , 0. , 0. , 0.99]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.75, 0. , 0.11, 0. , 0.14]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.21, 0. , 0.18, 0. , 0.61]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.44, 0. , 0.08, 0. , 0.48]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.63, 0. , 0.01, 0. , 0.36]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.23, 0. , 0.55, 0. , 0.22]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.31, 0. , 0.01, 0. , 0.68]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.06, 0. , 0.01, 0. , 0.93]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.3 , 0. , 0.02, 0. , 0.68]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.91, 0. , 0.04, 0. , 0.05]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.06, 0. , 0.02, 0. , 0.93]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.33, 0. , 0.36, 0. , 0.31]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.01, 0. , 0.99]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.6 , 0. , 0.03, 0. , 0.37]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.14, 0. , 0.08, 0. , 0.77]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.66, 0. , 0. , 0. , 0.34]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.11, 0. , 0.01, 0. , 0.88]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.01, 0. , 0. , 0. , 0.99]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.47, 0. , 0.08, 0. , 0.44]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.01, 0. , 0.99]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.01, 0. , 0.01, 0. , 0.98]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 1. ]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.12, 0. , 0.11, 0. , 0.77]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.01, 0. , 0.21, 0. , 0.78]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.08, 0. , 0.92]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.01, 0. , 0.2 , 0. , 0.79]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.49, 0. , 0.11, 0. , 0.41]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.69, 0. , 0.06, 0. , 0.25]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 1. ]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.31, 0. , 0.01, 0. , 0.68]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.03, 0. , 0.01, 0. , 0.97]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.05, 0. , 0.25, 0. , 0.71]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.02, 0. , 0.47, 0. , 0.51]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.07, 0. , 0.72, 0. , 0.21]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.01, 0. , 0.49, 0. , 0.5 ]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.06, 0. , 0.02, 0. , 0.92]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.6 , 0. , 0.02, 0. , 0.38]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.08, 0. , 0.02, 0. , 0.9 ]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.14, 0. , 0.02, 0. , 0.84]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.01, 0. , 0.02, 0. , 0.97]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.44, 0. , 0.02, 0. , 0.54]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.18, 0. , 0.08, 0. , 0.74]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.49, 0. , 0.04, 0. , 0.47]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.01, 0. , 0.99]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.11, 0. , 0.08, 0. , 0.81]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.07, 0. , 0.09, 0. , 0.84]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.01, 0. , 0. , 0. , 0.98]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.03, 0. , 0.04, 0. , 0.93]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.38, 0. , 0.03, 0. , 0.59]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.01, 0. , 0.01, 0. , 0.98]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.1 , 0. , 0.02, 0. , 0.88]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.01, 0. , 0.98]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.02, 0. , 0.01, 0. , 0.98]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.49, 0. , 0.22, 0. , 0.29]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.01, 0. , 0.03, 0. , 0.96]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.01, 0. , 0.03, 0. , 0.96]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.01, 0. , 0.3 , 0. , 0.69]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.28, 0. , 0. , 0. , 0.72]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 1. ]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.03, 0. , 0.03, 0. , 0.94]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.04, 0. , 0.09, 0. , 0.87]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.12, 0. , 0.03, 0. , 0.85]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.98, 0. , 0. , 0. , 0.02]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.17, 0. , 0. , 0. , 0.83]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.05, 0. , 0.01, 0. , 0.94]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 1. ]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.02, 0. , 0. , 0. , 0.98]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.04, 0. , 0.13, 0. , 0.83]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.93, 0. , 0.03, 0. , 0.04]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.86, 0. , 0.01, 0. , 0.13]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.59, 0. , 0.02, 0. , 0.38]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.02, 0. , 0.01, 0. , 0.97]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.11, 0. , 0.01, 0. , 0.88]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.01, 0. , 0.01, 0. , 0.98]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.78, 0. , 0.01, 0. , 0.21]],\n", - "\n", - " [[0. , 0. , 0. , 0. , 0. , 0.48, 0. , 0.01, 0. , 0.51]]],\n", - " dtype=float32)" - ] - }, - "execution_count": 113, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "np.round(y_probas[:, :1], 2)" - ] - }, - { - "cell_type": "code", - "execution_count": 114, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([[0. , 0. , 0. , 0. , 0. , 0.23, 0. , 0.09, 0. , 0.68]],\n", - " dtype=float32)" - ] - }, - "execution_count": 114, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "np.round(y_proba[:1], 2)" - ] - }, - { - "cell_type": "code", - "execution_count": 115, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([[0. , 0. , 0. , 0. , 0. , 0.28, 0. , 0.15, 0. , 0.29]],\n", - " dtype=float32)" - ] - }, - "execution_count": 115, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "y_std = y_probas.std(axis=0)\n", - "np.round(y_std[:1], 2)" - ] - }, - { - "cell_type": "code", - "execution_count": 116, - "metadata": {}, - "outputs": [], - "source": [ - "y_pred = np.argmax(y_proba, axis=1)" - ] - }, - { - "cell_type": "code", - "execution_count": 117, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0.8666" - ] - }, - "execution_count": 117, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "accuracy = np.sum(y_pred == y_test) / len(y_test)\n", - "accuracy" - ] - }, - { - "cell_type": "code", - "execution_count": 118, - "metadata": {}, - "outputs": [], - "source": [ - "class MCDropout(keras.layers.Dropout):\n", - " def call(self, inputs):\n", - " return super().call(inputs, training=True)\n", - "\n", - "class MCAlphaDropout(keras.layers.AlphaDropout):\n", - " def call(self, inputs):\n", - " return super().call(inputs, training=True)" - ] - }, - { - "cell_type": "code", - "execution_count": 119, - "metadata": {}, - "outputs": [], - "source": [ - "tf.random.set_seed(42)\n", - "np.random.seed(42)" - ] - }, - { - "cell_type": "code", - "execution_count": 120, - "metadata": {}, - "outputs": [], - "source": [ - "mc_model = keras.models.Sequential([\n", - " MCAlphaDropout(layer.rate) if isinstance(layer, keras.layers.AlphaDropout) else layer\n", - " for layer in model.layers\n", - "])" - ] - }, - { - "cell_type": "code", - "execution_count": 121, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Model: \"sequential_20\"\n", - "_________________________________________________________________\n", - "Layer (type) Output Shape Param # \n", - "=================================================================\n", - "flatten_18 (Flatten) (None, 784) 0 \n", - "_________________________________________________________________\n", - "mc_alpha_dropout (MCAlphaDro (None, 784) 0 \n", - "_________________________________________________________________\n", - "dense_262 (Dense) (None, 300) 235500 \n", - "_________________________________________________________________\n", - "mc_alpha_dropout_1 (MCAlphaD (None, 300) 0 \n", - "_________________________________________________________________\n", - "dense_263 (Dense) (None, 100) 30100 \n", - "_________________________________________________________________\n", - "mc_alpha_dropout_2 (MCAlphaD (None, 100) 0 \n", - "_________________________________________________________________\n", - "dense_264 (Dense) (None, 10) 1010 \n", - "=================================================================\n", - "Total params: 266,610\n", - "Trainable params: 266,610\n", - "Non-trainable params: 0\n", - "_________________________________________________________________\n" - ] - } - ], - "source": [ - "mc_model.summary()" - ] - }, - { - "cell_type": "code", - "execution_count": 122, - "metadata": {}, - "outputs": [], - "source": [ - "optimizer = keras.optimizers.SGD(lr=0.01, momentum=0.9, nesterov=True)\n", - "mc_model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=optimizer, metrics=[\"accuracy\"])" - ] - }, - { - "cell_type": "code", - "execution_count": 123, - "metadata": {}, - "outputs": [], - "source": [ - "mc_model.set_weights(model.get_weights())" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "이제 MC 드롭아웃을 모델에 사용할 수 있습니다:" - ] - }, - { - "cell_type": "code", - "execution_count": 124, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([[0. , 0. , 0. , 0. , 0. , 0.22, 0. , 0.15, 0. , 0.63]],\n", - " dtype=float32)" - ] - }, - "execution_count": 124, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "np.round(np.mean([mc_model.predict(X_test_scaled[:1]) for sample in range(100)], axis=0), 2)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 맥스 노름" - ] - }, - { - "cell_type": "code", - "execution_count": 125, - "metadata": {}, - "outputs": [], - "source": [ - "layer = keras.layers.Dense(100, activation=\"selu\", kernel_initializer=\"lecun_normal\",\n", - " kernel_constraint=keras.constraints.max_norm(1.))" - ] - }, - { - "cell_type": "code", - "execution_count": 126, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch 1/2\n", - "1719/1719 [==============================] - 8s 5ms/step - loss: 0.4749 - accuracy: 0.8337 - val_loss: 0.3665 - val_accuracy: 0.8676\n", - "Epoch 2/2\n", - "1719/1719 [==============================] - 8s 5ms/step - loss: 0.3539 - accuracy: 0.8703 - val_loss: 0.3700 - val_accuracy: 0.8672\n" - ] - } - ], - "source": [ - "MaxNormDense = partial(keras.layers.Dense,\n", - " activation=\"selu\", kernel_initializer=\"lecun_normal\",\n", - " kernel_constraint=keras.constraints.max_norm(1.))\n", - "\n", - "model = keras.models.Sequential([\n", - " keras.layers.Flatten(input_shape=[28, 28]),\n", - " MaxNormDense(300),\n", - " MaxNormDense(100),\n", - " keras.layers.Dense(10, activation=\"softmax\")\n", - "])\n", - "model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=\"nadam\", metrics=[\"accuracy\"])\n", - "n_epochs = 2\n", - "history = model.fit(X_train_scaled, y_train, epochs=n_epochs,\n", - " validation_data=(X_valid_scaled, y_valid))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# 연습문제 해답" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 1. to 7." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "부록 A 참조." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 8. CIFAR10에서 딥러닝" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### a.\n", - "*문제: 100개의 뉴런을 가진 은닉층 20개로 심층 신경망을 만들어보세요(너무 많은 것 같지만 이 연습문제의 핵심입니다). He 초기화와 ELU 활성화 함수를 사용하세요.*" - ] - }, - { - "cell_type": "code", - "execution_count": 127, - "metadata": {}, - "outputs": [], - "source": [ - "keras.backend.clear_session()\n", - "tf.random.set_seed(42)\n", - "np.random.seed(42)\n", - "\n", - "model = keras.models.Sequential()\n", - "model.add(keras.layers.Flatten(input_shape=[32, 32, 3]))\n", - "for _ in range(20):\n", - " model.add(keras.layers.Dense(100,\n", - " activation=\"elu\",\n", - " kernel_initializer=\"he_normal\"))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### b.\n", - "*문제: Nadam 옵티마이저와 조기 종료를 사용하여 CIFAR10 데이터셋에 이 네트워크를 훈련하세요. `keras.datasets.cifar10.load_ data()`를 사용하여 데이터를 적재할 수 있습니다. 이 데이터셋은 10개의 클래스와 32×32 크기의 컬러 이미지 60,000개로 구성됩니다(50,000개는 훈련, 10,000개는 테스트). 따라서 10개의 뉴런과 소프트맥스 활성화 함수를 사용하는 출력층이 필요합니다. 모델 구조와 하이퍼파라미터를 바꿀 때마다 적절한 학습률을 찾아야 한다는 것을 기억하세요.*" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "모델에 출력층을 추가합니다:" - ] - }, - { - "cell_type": "code", - "execution_count": 128, - "metadata": {}, - "outputs": [], - "source": [ - "model.add(keras.layers.Dense(10, activation=\"softmax\"))" - ] + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "kernelspec": { + "display_name": "TensorFlow 2.3 on Python 3.6 (CUDA 10.1)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.9" + }, + "nav_menu": { + "height": "360px", + "width": "416px" + }, + "toc": { + "navigate_menu": true, + "number_sections": true, + "sideBar": true, + "threshold": 6, + "toc_cell": false, + "toc_section_display": "block", + "toc_window_display": false + }, + "colab": { + "name": "11_training_deep_neural_networks.ipynb", + "provenance": [] + }, + "accelerator": "GPU" }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "학습률 5e-5인 Nadam 옵티마이저를 사용해 보죠. 학습률 1e-5, 3e-5, 1e-4, 3e-4, 1e-3, 3e-3, 1e-2를 테스트하고 10번의 에포크 동안 (아래 텐서보드 콜백으로) 학습 곡선을 비교해 보았습니다. 학습률 3e-5와 1e-4가 꽤 좋았기 때문에 5e-5를 시도해 보았고 조금 더 나은 결과를 냈습니다." - ] - }, - { - "cell_type": "code", - "execution_count": 129, - "metadata": {}, - "outputs": [], - "source": [ - "optimizer = keras.optimizers.Nadam(lr=5e-5)\n", - "model.compile(loss=\"sparse_categorical_crossentropy\",\n", - " optimizer=optimizer,\n", - " metrics=[\"accuracy\"])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "CIFAR10 데이터셋을 로드하죠. 조기 종료를 사용하기 때문에 검증 세트가 필요합니다. 원본 훈련 세트에서 처음 5,000개를 검증 세트로 사용하겠습니다:" - ] - }, - { - "cell_type": "code", - "execution_count": 130, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz\n", - "170500096/170498071 [==============================] - 18s 0us/step\n" - ] - } - ], - "source": [ - "(X_train_full, y_train_full), (X_test, y_test) = keras.datasets.cifar10.load_data()\n", - "\n", - "X_train = X_train_full[5000:]\n", - "y_train = y_train_full[5000:]\n", - "X_valid = X_train_full[:5000]\n", - "y_valid = y_train_full[:5000]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "이제 콜백을 만들고 모델을 훈련합니다:" - ] - }, - { - "cell_type": "code", - "execution_count": 131, - "metadata": {}, - "outputs": [], - "source": [ - "early_stopping_cb = keras.callbacks.EarlyStopping(patience=20)\n", - "model_checkpoint_cb = keras.callbacks.ModelCheckpoint(\"my_cifar10_model.h5\", save_best_only=True)\n", - "run_index = 1 # 모델을 훈련할 때마다 증가시킴\n", - "run_logdir = os.path.join(os.curdir, \"my_cifar10_logs\", \"run_{:03d}\".format(run_index))\n", - "tensorboard_cb = keras.callbacks.TensorBoard(run_logdir)\n", - "callbacks = [early_stopping_cb, model_checkpoint_cb, tensorboard_cb]" - ] - }, - { - "cell_type": "code", - "execution_count": 132, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "ERROR: Failed to launch TensorBoard (exited with 255).\n", - "Contents of stderr:\n", - "E0809 03:11:42.951561 139734898673472 program.py:312] TensorBoard could not bind to port 6006, it was already in use\n", - "ERROR: TensorBoard could not bind to port 6006, it was already in use" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "%tensorboard --logdir=./my_cifar10_logs --port=6006" - ] - }, - { - "cell_type": "code", - "execution_count": 133, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch 1/100\n", - " 1/1407 [..............................] - ETA: 0s - loss: 165.9660 - accuracy: 0.0625WARNING:tensorflow:From /home/work/.local/lib/python3.6/site-packages/tensorflow/python/ops/summary_ops_v2.py:1277: stop (from tensorflow.python.eager.profiler) is deprecated and will be removed after 2020-07-01.\n", - "Instructions for updating:\n", - "use `tf.profiler.experimental.stop` instead.\n", - " 2/1407 [..............................] - ETA: 1:52 - loss: 133.4792 - accuracy: 0.1250WARNING:tensorflow:Callbacks method `on_train_batch_end` is slow compared to the batch time (batch time: 0.0164s vs `on_train_batch_end` time: 0.1398s). Check your callbacks.\n", - "1407/1407 [==============================] - 22s 16ms/step - loss: 4.0015 - accuracy: 0.1711 - val_loss: 2.0790 - val_accuracy: 0.2354\n", - "Epoch 2/100\n", - "1407/1407 [==============================] - 22s 16ms/step - loss: 2.0265 - accuracy: 0.2568 - val_loss: 2.0408 - val_accuracy: 0.2488\n", - "Epoch 3/100\n", - "1407/1407 [==============================] - 22s 16ms/step - loss: 1.9195 - accuracy: 0.2983 - val_loss: 1.9150 - val_accuracy: 0.2916\n", - "Epoch 4/100\n", - "1407/1407 [==============================] - 22s 15ms/step - loss: 1.8406 - accuracy: 0.3284 - val_loss: 1.9163 - val_accuracy: 0.3050\n", - "Epoch 5/100\n", - "1407/1407 [==============================] - 22s 16ms/step - loss: 1.7846 - accuracy: 0.3525 - val_loss: 1.7578 - val_accuracy: 0.3592\n", - "Epoch 6/100\n", - "1407/1407 [==============================] - 22s 16ms/step - loss: 1.7352 - accuracy: 0.3698 - val_loss: 1.7461 - val_accuracy: 0.3570\n", - "Epoch 7/100\n", - "1407/1407 [==============================] - 22s 15ms/step - loss: 1.6930 - accuracy: 0.3869 - val_loss: 1.7101 - val_accuracy: 0.3752\n", - "Epoch 8/100\n", - "1407/1407 [==============================] - 22s 16ms/step - loss: 1.6576 - accuracy: 0.4002 - val_loss: 1.6530 - val_accuracy: 0.3962\n", - "Epoch 9/100\n", - "1407/1407 [==============================] - 22s 15ms/step - loss: 1.6233 - accuracy: 0.4169 - val_loss: 1.6236 - val_accuracy: 0.4070\n", - "Epoch 10/100\n", - "1407/1407 [==============================] - 22s 15ms/step - loss: 1.5965 - accuracy: 0.4248 - val_loss: 1.6644 - val_accuracy: 0.4026\n", - "Epoch 11/100\n", - "1407/1407 [==============================] - 21s 15ms/step - loss: 1.5763 - accuracy: 0.4328 - val_loss: 1.6739 - val_accuracy: 0.3946\n", - "Epoch 12/100\n", - "1407/1407 [==============================] - 22s 16ms/step - loss: 1.5534 - accuracy: 0.4420 - val_loss: 1.6191 - val_accuracy: 0.4186\n", - "Epoch 13/100\n", - "1407/1407 [==============================] - 22s 16ms/step - loss: 1.5332 - accuracy: 0.4468 - val_loss: 1.6050 - val_accuracy: 0.4168\n", - "Epoch 14/100\n", - "1407/1407 [==============================] - 22s 16ms/step - loss: 1.5181 - accuracy: 0.4531 - val_loss: 1.6009 - val_accuracy: 0.4312\n", - "Epoch 15/100\n", - "1407/1407 [==============================] - 22s 16ms/step - loss: 1.4995 - accuracy: 0.4616 - val_loss: 1.5658 - val_accuracy: 0.4366\n", - "Epoch 16/100\n", - "1407/1407 [==============================] - 22s 15ms/step - loss: 1.4827 - accuracy: 0.4652 - val_loss: 1.5856 - val_accuracy: 0.4376\n", - "Epoch 17/100\n", - "1407/1407 [==============================] - 22s 16ms/step - loss: 1.4717 - accuracy: 0.4713 - val_loss: 1.5381 - val_accuracy: 0.4478\n", - "Epoch 18/100\n", - "1407/1407 [==============================] - 22s 15ms/step - loss: 1.4565 - accuracy: 0.4756 - val_loss: 1.5496 - val_accuracy: 0.4478\n", - "Epoch 19/100\n", - "1407/1407 [==============================] - 22s 15ms/step - loss: 1.4422 - accuracy: 0.4797 - val_loss: 1.5506 - val_accuracy: 0.4478\n", - "Epoch 20/100\n", - "1407/1407 [==============================] - 22s 15ms/step - loss: 1.4327 - accuracy: 0.4816 - val_loss: 1.5574 - val_accuracy: 0.4528\n", - "Epoch 21/100\n", - "1407/1407 [==============================] - 22s 15ms/step - loss: 1.4213 - accuracy: 0.4875 - val_loss: 1.5716 - val_accuracy: 0.4410\n", - "Epoch 22/100\n", - "1407/1407 [==============================] - 22s 16ms/step - loss: 1.4108 - accuracy: 0.4903 - val_loss: 1.5377 - val_accuracy: 0.4544\n", - "Epoch 23/100\n", - "1407/1407 [==============================] - 22s 15ms/step - loss: 1.3972 - accuracy: 0.4968 - val_loss: 1.5215 - val_accuracy: 0.4526\n", - "Epoch 24/100\n", - "1407/1407 [==============================] - 22s 15ms/step - loss: 1.3865 - accuracy: 0.4998 - val_loss: 1.5374 - val_accuracy: 0.4470\n", - "Epoch 25/100\n", - "1407/1407 [==============================] - 22s 15ms/step - loss: 1.3781 - accuracy: 0.5056 - val_loss: 1.5694 - val_accuracy: 0.4466\n", - "Epoch 26/100\n", - "1407/1407 [==============================] - 22s 15ms/step - loss: 1.3672 - accuracy: 0.5104 - val_loss: 1.5405 - val_accuracy: 0.4542\n", - "Epoch 27/100\n", - "1407/1407 [==============================] - 22s 16ms/step - loss: 1.3583 - accuracy: 0.5110 - val_loss: 1.5103 - val_accuracy: 0.4690\n", - "Epoch 28/100\n", - "1407/1407 [==============================] - 22s 15ms/step - loss: 1.3481 - accuracy: 0.5161 - val_loss: 1.5282 - val_accuracy: 0.4572\n", - "Epoch 29/100\n", - "1407/1407 [==============================] - 22s 15ms/step - loss: 1.3387 - accuracy: 0.5186 - val_loss: 1.5302 - val_accuracy: 0.4540\n", - "Epoch 30/100\n", - "1407/1407 [==============================] - 22s 15ms/step - loss: 1.3307 - accuracy: 0.5206 - val_loss: 1.5451 - val_accuracy: 0.4666\n", - "Epoch 31/100\n", - "1407/1407 [==============================] - 22s 15ms/step - loss: 1.3235 - accuracy: 0.5245 - val_loss: 1.5553 - val_accuracy: 0.4604\n", - "Epoch 32/100\n", - "1407/1407 [==============================] - 22s 15ms/step - loss: 1.3138 - accuracy: 0.5274 - val_loss: 1.5263 - val_accuracy: 0.4604\n", - "Epoch 33/100\n", - "1407/1407 [==============================] - 21s 15ms/step - loss: 1.3025 - accuracy: 0.5323 - val_loss: 1.5133 - val_accuracy: 0.4736\n", - "Epoch 34/100\n", - "1407/1407 [==============================] - 22s 16ms/step - loss: 1.2956 - accuracy: 0.5340 - val_loss: 1.5021 - val_accuracy: 0.4748\n", - "Epoch 35/100\n", - "1407/1407 [==============================] - 22s 15ms/step - loss: 1.2876 - accuracy: 0.5345 - val_loss: 1.5427 - val_accuracy: 0.4606\n", - "Epoch 36/100\n", - "1407/1407 [==============================] - 22s 15ms/step - loss: 1.2794 - accuracy: 0.5408 - val_loss: 1.5662 - val_accuracy: 0.4586\n", - "Epoch 37/100\n", - "1407/1407 [==============================] - 22s 16ms/step - loss: 1.2706 - accuracy: 0.5423 - val_loss: 1.5014 - val_accuracy: 0.4778\n", - "Epoch 38/100\n", - "1407/1407 [==============================] - 22s 15ms/step - loss: 1.2615 - accuracy: 0.5464 - val_loss: 1.5048 - val_accuracy: 0.4736\n", - "Epoch 39/100\n", - "1407/1407 [==============================] - 22s 15ms/step - loss: 1.2541 - accuracy: 0.5483 - val_loss: 1.5195 - val_accuracy: 0.4656\n", - "Epoch 40/100\n", - "1407/1407 [==============================] - 22s 15ms/step - loss: 1.2448 - accuracy: 0.5542 - val_loss: 1.5167 - val_accuracy: 0.4790\n", - "Epoch 41/100\n", - "1407/1407 [==============================] - 21s 15ms/step - loss: 1.2387 - accuracy: 0.5545 - val_loss: 1.5400 - val_accuracy: 0.4660\n", - "Epoch 42/100\n", - "1407/1407 [==============================] - 21s 15ms/step - loss: 1.2335 - accuracy: 0.5566 - val_loss: 1.5325 - val_accuracy: 0.4680\n", - "Epoch 43/100\n", - "1407/1407 [==============================] - 22s 15ms/step - loss: 1.2257 - accuracy: 0.5580 - val_loss: 1.5342 - val_accuracy: 0.4722\n", - "Epoch 44/100\n", - "1407/1407 [==============================] - 22s 15ms/step - loss: 1.2173 - accuracy: 0.5606 - val_loss: 1.5537 - val_accuracy: 0.4580\n", - "Epoch 45/100\n", - "1407/1407 [==============================] - 22s 15ms/step - loss: 1.2082 - accuracy: 0.5643 - val_loss: 1.5200 - val_accuracy: 0.4770\n", - "Epoch 46/100\n", - "1407/1407 [==============================] - 22s 15ms/step - loss: 1.2037 - accuracy: 0.5666 - val_loss: 1.5349 - val_accuracy: 0.4716\n", - "Epoch 47/100\n", - "1407/1407 [==============================] - 21s 15ms/step - loss: 1.1935 - accuracy: 0.5673 - val_loss: 1.5299 - val_accuracy: 0.4774\n", - "Epoch 48/100\n", - "1407/1407 [==============================] - 22s 15ms/step - loss: 1.1884 - accuracy: 0.5720 - val_loss: 1.5361 - val_accuracy: 0.4724\n", - "Epoch 49/100\n", - "1407/1407 [==============================] - 22s 15ms/step - loss: 1.1769 - accuracy: 0.5766 - val_loss: 1.5410 - val_accuracy: 0.4720\n", - "Epoch 50/100\n", - "1407/1407 [==============================] - 21s 15ms/step - loss: 1.1782 - accuracy: 0.5752 - val_loss: 1.5620 - val_accuracy: 0.4714\n", - "Epoch 51/100\n", - "1407/1407 [==============================] - 22s 15ms/step - loss: 1.1649 - accuracy: 0.5804 - val_loss: 1.5364 - val_accuracy: 0.4710\n", - "Epoch 52/100\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "1407/1407 [==============================] - 22s 15ms/step - loss: 1.1599 - accuracy: 0.5818 - val_loss: 1.5514 - val_accuracy: 0.4658\n", - "Epoch 53/100\n", - "1407/1407 [==============================] - 22s 15ms/step - loss: 1.1511 - accuracy: 0.5853 - val_loss: 1.5512 - val_accuracy: 0.4798\n", - "Epoch 54/100\n", - "1407/1407 [==============================] - 22s 15ms/step - loss: 1.1485 - accuracy: 0.5887 - val_loss: 1.5754 - val_accuracy: 0.4648\n", - "Epoch 55/100\n", - "1407/1407 [==============================] - 21s 15ms/step - loss: 1.1398 - accuracy: 0.5905 - val_loss: 1.5335 - val_accuracy: 0.4806\n", - "Epoch 56/100\n", - "1407/1407 [==============================] - 22s 15ms/step - loss: 1.1352 - accuracy: 0.5909 - val_loss: 1.5603 - val_accuracy: 0.4732\n", - "Epoch 57/100\n", - "1407/1407 [==============================] - 22s 15ms/step - loss: 1.1285 - accuracy: 0.5933 - val_loss: 1.5535 - val_accuracy: 0.4824\n" - ] - }, - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 133, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "model.fit(X_train, y_train, epochs=100,\n", - " validation_data=(X_valid, y_valid),\n", - " callbacks=callbacks)" - ] - }, - { - "cell_type": "code", - "execution_count": 134, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "157/157 [==============================] - 0s 3ms/step - loss: 1.5014 - accuracy: 0.0882\n" - ] - }, - { - "data": { - "text/plain": [ - "[1.5013599395751953, 0.08820000290870667]" - ] - }, - "execution_count": 134, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "model = keras.models.load_model(\"my_cifar10_model.h5\")\n", - "model.evaluate(X_valid, y_valid)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "가장 낮은 검증 손실을 내는 모델은 검증 세트에서 약 47% 정확도를 얻었습니다. 이 검증 점수에 도달하는데 39번의 에포크가 걸렸습니다. (GPU가 없는) 제 노트북에서 에포크당 약 10초 정도 걸렸습니다. 배치 정규화를 사용해 성능을 올릴 수 있는지 확인해 보죠." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### c.\n", - "*문제: 배치 정규화를 추가하고 학습 곡선을 비교해보세요. 이전보다 빠르게 수렴하나요? 더 좋은 모델이 만들어지나요? 훈련 속도에는 어떤 영향을 미치나요?*" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "다음 코드는 위의 코드와 배우 비슷합니다. 몇 가지 다른 점은 아래와 같습니다:\n", - "\n", - "* 출력층을 제외하고 모든 `Dense` 층 다음에 (활성화 함수 전에) BN 층을 추가했습니다. 처음 은닉층 전에도 BN 층을 추가했습니다.\n", - "* 학습률을 5e-4로 바꾸었습니다. 1e-5, 3e-5, 5e-5, 1e-4, 3e-4, 5e-4, 1e-3, 3e-3를 시도해 보고 20번 에포크 후에 검증 세트 성능이 가장 좋은 것을 선택했습니다.\n", - "* run_logdir를 run_bn_* 으로 이름을 바꾸고 모델 파일 이름을 my_cifar10_bn_model.h5로 변경했습니다." - ] - }, - { - "cell_type": "code", - "execution_count": 135, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch 1/100\n", - " 2/1407 [..............................] - ETA: 9:29 - loss: 2.8693 - accuracy: 0.1094WARNING:tensorflow:Callbacks method `on_train_batch_end` is slow compared to the batch time (batch time: 0.0364s vs `on_train_batch_end` time: 0.7737s). Check your callbacks.\n", - "1407/1407 [==============================] - 51s 36ms/step - loss: 1.8431 - accuracy: 0.3390 - val_loss: 1.7148 - val_accuracy: 0.3886\n", - "Epoch 2/100\n", - "1407/1407 [==============================] - 49s 35ms/step - loss: 1.6690 - accuracy: 0.4046 - val_loss: 1.6174 - val_accuracy: 0.4144\n", - "Epoch 3/100\n", - "1407/1407 [==============================] - 49s 35ms/step - loss: 1.5972 - accuracy: 0.4320 - val_loss: 1.5171 - val_accuracy: 0.4478\n", - "Epoch 4/100\n", - "1407/1407 [==============================] - 50s 35ms/step - loss: 1.5463 - accuracy: 0.4495 - val_loss: 1.4883 - val_accuracy: 0.4688\n", - "Epoch 5/100\n", - "1407/1407 [==============================] - 50s 35ms/step - loss: 1.5051 - accuracy: 0.4641 - val_loss: 1.4369 - val_accuracy: 0.4892\n", - "Epoch 6/100\n", - "1407/1407 [==============================] - 49s 35ms/step - loss: 1.4684 - accuracy: 0.4793 - val_loss: 1.4056 - val_accuracy: 0.5018\n", - "Epoch 7/100\n", - "1407/1407 [==============================] - 49s 35ms/step - loss: 1.4350 - accuracy: 0.4895 - val_loss: 1.4292 - val_accuracy: 0.4888\n", - "Epoch 8/100\n", - "1407/1407 [==============================] - 49s 35ms/step - loss: 1.4087 - accuracy: 0.5006 - val_loss: 1.4021 - val_accuracy: 0.5088\n", - "Epoch 9/100\n", - "1407/1407 [==============================] - 49s 35ms/step - loss: 1.3834 - accuracy: 0.5095 - val_loss: 1.3738 - val_accuracy: 0.5110\n", - "Epoch 10/100\n", - "1407/1407 [==============================] - 49s 35ms/step - loss: 1.3645 - accuracy: 0.5167 - val_loss: 1.3432 - val_accuracy: 0.5252\n", - "Epoch 11/100\n", - "1407/1407 [==============================] - 49s 35ms/step - loss: 1.3428 - accuracy: 0.5258 - val_loss: 1.3583 - val_accuracy: 0.5132\n", - "Epoch 12/100\n", - "1407/1407 [==============================] - 49s 35ms/step - loss: 1.3227 - accuracy: 0.5316 - val_loss: 1.3820 - val_accuracy: 0.5052\n", - "Epoch 13/100\n", - "1407/1407 [==============================] - 48s 34ms/step - loss: 1.3010 - accuracy: 0.5371 - val_loss: 1.3794 - val_accuracy: 0.5094\n", - "Epoch 14/100\n", - "1407/1407 [==============================] - 49s 35ms/step - loss: 1.2838 - accuracy: 0.5446 - val_loss: 1.3531 - val_accuracy: 0.5260\n", - "Epoch 15/100\n", - "1407/1407 [==============================] - 49s 35ms/step - loss: 1.2621 - accuracy: 0.5548 - val_loss: 1.3641 - val_accuracy: 0.5256\n", - "Epoch 16/100\n", - "1407/1407 [==============================] - 49s 35ms/step - loss: 1.2535 - accuracy: 0.5572 - val_loss: 1.3720 - val_accuracy: 0.5276\n", - "Epoch 17/100\n", - "1407/1407 [==============================] - 49s 35ms/step - loss: 1.2355 - accuracy: 0.5609 - val_loss: 1.3184 - val_accuracy: 0.5348\n", - "Epoch 18/100\n", - "1407/1407 [==============================] - 49s 35ms/step - loss: 1.2164 - accuracy: 0.5685 - val_loss: 1.3487 - val_accuracy: 0.5296\n", - "Epoch 19/100\n", - "1407/1407 [==============================] - 49s 35ms/step - loss: 1.2037 - accuracy: 0.5770 - val_loss: 1.3278 - val_accuracy: 0.5366\n", - "Epoch 20/100\n", - "1407/1407 [==============================] - 49s 35ms/step - loss: 1.1916 - accuracy: 0.5789 - val_loss: 1.3592 - val_accuracy: 0.5260\n", - "Epoch 21/100\n", - "1407/1407 [==============================] - 49s 35ms/step - loss: 1.1782 - accuracy: 0.5848 - val_loss: 1.3478 - val_accuracy: 0.5302\n", - "Epoch 22/100\n", - "1407/1407 [==============================] - 49s 35ms/step - loss: 1.1587 - accuracy: 0.5913 - val_loss: 1.3477 - val_accuracy: 0.5308\n", - "Epoch 23/100\n", - "1407/1407 [==============================] - 49s 35ms/step - loss: 1.1481 - accuracy: 0.5933 - val_loss: 1.3285 - val_accuracy: 0.5378\n", - "Epoch 24/100\n", - "1407/1407 [==============================] - 49s 35ms/step - loss: 1.1395 - accuracy: 0.5989 - val_loss: 1.3393 - val_accuracy: 0.5388\n", - "Epoch 25/100\n", - "1407/1407 [==============================] - 49s 35ms/step - loss: 1.1285 - accuracy: 0.6044 - val_loss: 1.3436 - val_accuracy: 0.5354\n", - "Epoch 26/100\n", - "1407/1407 [==============================] - 49s 35ms/step - loss: 1.1080 - accuracy: 0.6085 - val_loss: 1.3496 - val_accuracy: 0.5258\n", - "Epoch 27/100\n", - "1407/1407 [==============================] - 49s 35ms/step - loss: 1.0971 - accuracy: 0.6143 - val_loss: 1.3484 - val_accuracy: 0.5350\n", - "Epoch 28/100\n", - "1407/1407 [==============================] - 49s 35ms/step - loss: 1.0978 - accuracy: 0.6121 - val_loss: 1.3698 - val_accuracy: 0.5274\n", - "Epoch 29/100\n", - "1407/1407 [==============================] - 49s 35ms/step - loss: 1.0825 - accuracy: 0.6198 - val_loss: 1.3416 - val_accuracy: 0.5348\n", - "Epoch 30/100\n", - "1407/1407 [==============================] - 49s 35ms/step - loss: 1.0698 - accuracy: 0.6219 - val_loss: 1.3363 - val_accuracy: 0.5366\n", - "Epoch 31/100\n", - "1407/1407 [==============================] - 49s 35ms/step - loss: 1.0569 - accuracy: 0.6262 - val_loss: 1.3536 - val_accuracy: 0.5356\n", - "Epoch 32/100\n", - "1407/1407 [==============================] - 49s 35ms/step - loss: 1.0489 - accuracy: 0.6306 - val_loss: 1.3822 - val_accuracy: 0.5220\n", - "Epoch 33/100\n", - "1407/1407 [==============================] - 49s 35ms/step - loss: 1.0387 - accuracy: 0.6338 - val_loss: 1.3633 - val_accuracy: 0.5404\n", - "Epoch 34/100\n", - "1407/1407 [==============================] - 49s 35ms/step - loss: 1.0342 - accuracy: 0.6344 - val_loss: 1.3611 - val_accuracy: 0.5364\n", - "Epoch 35/100\n", - "1407/1407 [==============================] - 49s 35ms/step - loss: 1.0163 - accuracy: 0.6422 - val_loss: 1.3904 - val_accuracy: 0.5356\n", - "Epoch 36/100\n", - "1407/1407 [==============================] - 49s 35ms/step - loss: 1.0137 - accuracy: 0.6421 - val_loss: 1.3795 - val_accuracy: 0.5408\n", - "Epoch 37/100\n", - "1407/1407 [==============================] - 49s 35ms/step - loss: 0.9991 - accuracy: 0.6491 - val_loss: 1.3334 - val_accuracy: 0.5444\n", - "157/157 [==============================] - 1s 5ms/step - loss: 1.3184 - accuracy: 0.1154\n" - ] - }, - { - "data": { - "text/plain": [ - "[1.3183687925338745, 0.11540000140666962]" - ] - }, - "execution_count": 135, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "keras.backend.clear_session()\n", - "tf.random.set_seed(42)\n", - "np.random.seed(42)\n", - "\n", - "model = keras.models.Sequential()\n", - "model.add(keras.layers.Flatten(input_shape=[32, 32, 3]))\n", - "model.add(keras.layers.BatchNormalization())\n", - "for _ in range(20):\n", - " model.add(keras.layers.Dense(100, kernel_initializer=\"he_normal\"))\n", - " model.add(keras.layers.BatchNormalization())\n", - " model.add(keras.layers.Activation(\"elu\"))\n", - "model.add(keras.layers.Dense(10, activation=\"softmax\"))\n", - "\n", - "optimizer = keras.optimizers.Nadam(lr=5e-4)\n", - "model.compile(loss=\"sparse_categorical_crossentropy\",\n", - " optimizer=optimizer,\n", - " metrics=[\"accuracy\"])\n", - "\n", - "early_stopping_cb = keras.callbacks.EarlyStopping(patience=20)\n", - "model_checkpoint_cb = keras.callbacks.ModelCheckpoint(\"my_cifar10_bn_model.h5\", save_best_only=True)\n", - "run_index = 1 # 모델을 훈련할 때마다 증가시킴\n", - "run_logdir = os.path.join(os.curdir, \"my_cifar10_logs\", \"run_bn_{:03d}\".format(run_index))\n", - "tensorboard_cb = keras.callbacks.TensorBoard(run_logdir)\n", - "callbacks = [early_stopping_cb, model_checkpoint_cb, tensorboard_cb]\n", - "\n", - "model.fit(X_train, y_train, epochs=100,\n", - " validation_data=(X_valid, y_valid),\n", - " callbacks=callbacks)\n", - "\n", - "model = keras.models.load_model(\"my_cifar10_bn_model.h5\")\n", - "model.evaluate(X_valid, y_valid)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "* *이전보다 빠르게 수렴하나요?* 훨씬 빠릅니다! 이전 모델은 가장 낮은 검증 손실에 도달하기 위해 39 에포크가 걸렸지만 BN을 사용한 새 모델은 18 에포크가 걸렸습니다. 이전 모델보다 두 배 이상 빠릅니다. BN 층은 훈련을 안정적으로 수행하고 더 큰 학습률을 사용할 수 있기 때문에 수렴이 빨라졌습니다.\n", - "* *BN이 더 좋은 모델을 만드나요?* 네! 최종 모델의 성능이 47%가 아니라 55% 정확도로 더 좋습니다. 이는 아주 좋은 모델이 아니지만 적어도 이전보다는 낫습니다(합성곱 신경망이 더 낫겠지만 이는 다른 주제입니다. 14장을 참고하세요).\n", - "* *BN이 훈련 속도에 영향을 미치나요?* 모델이 두 배나 빠르게 수렴했지만 각 에포크는 10초가 아니라 16초가 걸렸습니다. BN 층에서 추가된 계산 때문입니다. 따라서 전체적으로 에포크 횟수가 50% 정도 줄었지만 훈련 시간(탁상 시계 시간)은 30% 정도 줄었습니다. 결국 크게 향상되었습니다!" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### d.\n", - "*문제: 배치 정규화를 SELU로 바꾸어보세요. 네트워크가 자기 정규화하기 위해 필요한 변경 사항을 적용해보세요(즉, 입력 특성 표준화, 르쿤 정규분포 초기화, 완전 연결 층만 순차적으로 쌓은 심층 신경망 등).*" - ] - }, - { - "cell_type": "code", - "execution_count": 136, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch 1/100\n", - " 2/1407 [..............................] - ETA: 5:52 - loss: 3.0440 - accuracy: 0.1094WARNING:tensorflow:Callbacks method `on_train_batch_end` is slow compared to the batch time (batch time: 0.0169s vs `on_train_batch_end` time: 0.4852s). Check your callbacks.\n", - "1407/1407 [==============================] - 23s 16ms/step - loss: 1.9306 - accuracy: 0.3076 - val_loss: 1.8329 - val_accuracy: 0.3406\n", - "Epoch 2/100\n", - "1407/1407 [==============================] - 22s 16ms/step - loss: 1.7066 - accuracy: 0.3947 - val_loss: 1.7136 - val_accuracy: 0.3778\n", - "Epoch 3/100\n", - "1407/1407 [==============================] - 22s 16ms/step - loss: 1.6115 - accuracy: 0.4309 - val_loss: 1.6743 - val_accuracy: 0.3992\n", - "Epoch 4/100\n", - "1407/1407 [==============================] - 22s 16ms/step - loss: 1.5459 - accuracy: 0.4578 - val_loss: 1.6328 - val_accuracy: 0.4376\n", - "Epoch 5/100\n", - "1407/1407 [==============================] - 22s 16ms/step - loss: 1.4916 - accuracy: 0.4795 - val_loss: 1.6314 - val_accuracy: 0.4330\n", - "Epoch 6/100\n", - "1407/1407 [==============================] - 22s 16ms/step - loss: 1.4437 - accuracy: 0.4977 - val_loss: 1.5327 - val_accuracy: 0.4724\n", - "Epoch 7/100\n", - "1407/1407 [==============================] - 22s 16ms/step - loss: 1.4014 - accuracy: 0.5092 - val_loss: 1.5317 - val_accuracy: 0.4680\n", - "Epoch 8/100\n", - "1407/1407 [==============================] - 22s 16ms/step - loss: 1.3726 - accuracy: 0.5226 - val_loss: 1.4981 - val_accuracy: 0.4874\n", - "Epoch 9/100\n", - "1407/1407 [==============================] - 22s 16ms/step - loss: 1.3349 - accuracy: 0.5344 - val_loss: 1.5136 - val_accuracy: 0.4734\n", - "Epoch 10/100\n", - "1407/1407 [==============================] - 22s 15ms/step - loss: 1.2971 - accuracy: 0.5486 - val_loss: 1.5214 - val_accuracy: 0.4786\n", - "Epoch 11/100\n", - "1407/1407 [==============================] - 22s 16ms/step - loss: 1.2706 - accuracy: 0.5600 - val_loss: 1.5285 - val_accuracy: 0.4838\n", - "Epoch 12/100\n", - "1407/1407 [==============================] - 22s 16ms/step - loss: 1.2470 - accuracy: 0.5695 - val_loss: 1.4795 - val_accuracy: 0.4980\n", - "Epoch 13/100\n", - "1407/1407 [==============================] - 22s 16ms/step - loss: 1.2192 - accuracy: 0.5766 - val_loss: 1.4753 - val_accuracy: 0.4980\n", - "Epoch 14/100\n", - "1407/1407 [==============================] - 22s 16ms/step - loss: 1.1908 - accuracy: 0.5918 - val_loss: 1.4862 - val_accuracy: 0.4942\n", - "Epoch 15/100\n", - "1407/1407 [==============================] - 22s 16ms/step - loss: 1.1665 - accuracy: 0.5987 - val_loss: 1.5071 - val_accuracy: 0.5012\n", - "Epoch 16/100\n", - "1407/1407 [==============================] - 22s 16ms/step - loss: 1.1519 - accuracy: 0.6057 - val_loss: 1.5167 - val_accuracy: 0.5024\n", - "Epoch 17/100\n", - "1407/1407 [==============================] - 21s 15ms/step - loss: 1.1274 - accuracy: 0.6107 - val_loss: 1.5477 - val_accuracy: 0.4968\n", - "Epoch 18/100\n", - "1407/1407 [==============================] - 22s 16ms/step - loss: 1.1049 - accuracy: 0.6254 - val_loss: 1.5362 - val_accuracy: 0.5068\n", - "Epoch 19/100\n", - "1407/1407 [==============================] - 22s 15ms/step - loss: 1.0837 - accuracy: 0.6309 - val_loss: 1.5754 - val_accuracy: 0.5022\n", - "Epoch 20/100\n", - "1407/1407 [==============================] - 22s 16ms/step - loss: 1.0680 - accuracy: 0.6372 - val_loss: 1.5238 - val_accuracy: 0.5052\n", - "Epoch 21/100\n", - "1407/1407 [==============================] - 22s 16ms/step - loss: 1.0469 - accuracy: 0.6425 - val_loss: 1.5312 - val_accuracy: 0.5156\n", - "Epoch 22/100\n", - "1407/1407 [==============================] - 22s 16ms/step - loss: 1.0303 - accuracy: 0.6500 - val_loss: 1.5359 - val_accuracy: 0.5084\n", - "Epoch 23/100\n", - "1407/1407 [==============================] - 22s 15ms/step - loss: 1.0084 - accuracy: 0.6583 - val_loss: 1.5369 - val_accuracy: 0.5042\n", - "Epoch 24/100\n", - "1407/1407 [==============================] - 22s 16ms/step - loss: 0.9944 - accuracy: 0.6624 - val_loss: 1.6013 - val_accuracy: 0.5006\n", - "Epoch 25/100\n", - "1407/1407 [==============================] - 22s 16ms/step - loss: 0.9793 - accuracy: 0.6706 - val_loss: 1.5512 - val_accuracy: 0.5174\n", - "Epoch 26/100\n", - "1407/1407 [==============================] - 22s 15ms/step - loss: 0.9579 - accuracy: 0.6772 - val_loss: 1.6008 - val_accuracy: 0.4988\n", - "Epoch 27/100\n", - "1407/1407 [==============================] - 22s 16ms/step - loss: 0.9484 - accuracy: 0.6799 - val_loss: 1.5746 - val_accuracy: 0.5110\n", - "Epoch 28/100\n", - "1407/1407 [==============================] - 22s 16ms/step - loss: 48.1621 - accuracy: 0.6212 - val_loss: 1.6674 - val_accuracy: 0.4208\n", - "Epoch 29/100\n", - "1407/1407 [==============================] - 22s 16ms/step - loss: 1.2912 - accuracy: 0.5517 - val_loss: 1.5598 - val_accuracy: 0.4728\n", - "Epoch 30/100\n", - "1407/1407 [==============================] - 22s 16ms/step - loss: 1.2315 - accuracy: 0.5735 - val_loss: 1.5738 - val_accuracy: 0.4764\n", - "Epoch 31/100\n", - "1407/1407 [==============================] - 22s 15ms/step - loss: 1.1684 - accuracy: 0.5940 - val_loss: 1.5514 - val_accuracy: 0.4878\n", - "Epoch 32/100\n", - "1407/1407 [==============================] - 22s 15ms/step - loss: 1.1293 - accuracy: 0.6084 - val_loss: 1.5258 - val_accuracy: 0.4978\n", - "Epoch 33/100\n", - "1407/1407 [==============================] - 22s 15ms/step - loss: 1.1000 - accuracy: 0.6188 - val_loss: 1.5369 - val_accuracy: 0.4970\n", - "157/157 [==============================] - 1s 3ms/step - loss: 1.4753 - accuracy: 0.1256\n" - ] - }, - { - "data": { - "text/plain": [ - "[1.475338339805603, 0.12559999525547028]" - ] - }, - "execution_count": 136, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "keras.backend.clear_session()\n", - "tf.random.set_seed(42)\n", - "np.random.seed(42)\n", - "\n", - "model = keras.models.Sequential()\n", - "model.add(keras.layers.Flatten(input_shape=[32, 32, 3]))\n", - "for _ in range(20):\n", - " model.add(keras.layers.Dense(100,\n", - " kernel_initializer=\"lecun_normal\",\n", - " activation=\"selu\"))\n", - "model.add(keras.layers.Dense(10, activation=\"softmax\"))\n", - "\n", - "optimizer = keras.optimizers.Nadam(lr=7e-4)\n", - "model.compile(loss=\"sparse_categorical_crossentropy\",\n", - " optimizer=optimizer,\n", - " metrics=[\"accuracy\"])\n", - "\n", - "early_stopping_cb = keras.callbacks.EarlyStopping(patience=20)\n", - "model_checkpoint_cb = keras.callbacks.ModelCheckpoint(\"my_cifar10_selu_model.h5\", save_best_only=True)\n", - "run_index = 1 # 모델을 훈련할 때마다 증가시킴\n", - "run_logdir = os.path.join(os.curdir, \"my_cifar10_logs\", \"run_selu_{:03d}\".format(run_index))\n", - "tensorboard_cb = keras.callbacks.TensorBoard(run_logdir)\n", - "callbacks = [early_stopping_cb, model_checkpoint_cb, tensorboard_cb]\n", - "\n", - "X_means = X_train.mean(axis=0)\n", - "X_stds = X_train.std(axis=0)\n", - "X_train_scaled = (X_train - X_means) / X_stds\n", - "X_valid_scaled = (X_valid - X_means) / X_stds\n", - "X_test_scaled = (X_test - X_means) / X_stds\n", - "\n", - "model.fit(X_train_scaled, y_train, epochs=100,\n", - " validation_data=(X_valid_scaled, y_valid),\n", - " callbacks=callbacks)\n", - "\n", - "model = keras.models.load_model(\"my_cifar10_selu_model.h5\")\n", - "model.evaluate(X_valid_scaled, y_valid)" - ] - }, - { - "cell_type": "code", - "execution_count": 137, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "157/157 [==============================] - 1s 3ms/step - loss: 1.4753 - accuracy: 0.1256\n" - ] - }, - { - "data": { - "text/plain": [ - "[1.475338339805603, 0.12559999525547028]" - ] - }, - "execution_count": 137, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "model = keras.models.load_model(\"my_cifar10_selu_model.h5\")\n", - "model.evaluate(X_valid_scaled, y_valid)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "51.4% 정확도를 얻었습니다. 원래 모델보다 더 좋습니다. 하지만 배치 정규화를 사용한 모델만큼 좋지는 않습니다. 최고의 모델에 도달하는데 13 에포크가 걸렸습니다. 이는 원본 모델이나 BN 모델보다 더 빠른 것입니다. 각 에포크는 원본 모델처럼 10초만 걸렸습니다. 따라서 이 모델이 지금까지 가장 빠른 모델입니다(에포크와 탁상 시계 기준으로)." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### e.\n", - "*문제: 알파 드롭아웃으로 모델에 규제를 적용해보세요. 그다음 모델을 다시 훈련하지 않고 MC 드롭아웃으로 더 높은 정확도를 얻을 수 있는지 확인해보세요.*" - ] - }, - { - "cell_type": "code", - "execution_count": 138, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch 1/100\n", - " 2/1407 [..............................] - ETA: 4:07 - loss: 2.9857 - accuracy: 0.0938WARNING:tensorflow:Callbacks method `on_train_batch_end` is slow compared to the batch time (batch time: 0.0168s vs `on_train_batch_end` time: 0.3359s). Check your callbacks.\n", - "1407/1407 [==============================] - 23s 17ms/step - loss: 1.8896 - accuracy: 0.3275 - val_loss: 1.7313 - val_accuracy: 0.3970\n", - "Epoch 2/100\n", - "1407/1407 [==============================] - 23s 16ms/step - loss: 1.6589 - accuracy: 0.4157 - val_loss: 1.7183 - val_accuracy: 0.3916\n", - "Epoch 3/100\n", - "1407/1407 [==============================] - 23s 16ms/step - loss: 1.5727 - accuracy: 0.4479 - val_loss: 1.6073 - val_accuracy: 0.4364\n", - "Epoch 4/100\n", - "1407/1407 [==============================] - 23s 16ms/step - loss: 1.5085 - accuracy: 0.4734 - val_loss: 1.5741 - val_accuracy: 0.4524\n", - "Epoch 5/100\n", - "1407/1407 [==============================] - 23s 16ms/step - loss: 1.4525 - accuracy: 0.4946 - val_loss: 1.5663 - val_accuracy: 0.4592\n", - "Epoch 6/100\n", - "1407/1407 [==============================] - 23s 16ms/step - loss: 1.4032 - accuracy: 0.5124 - val_loss: 1.5255 - val_accuracy: 0.4644\n", - "Epoch 7/100\n", - "1407/1407 [==============================] - 22s 16ms/step - loss: 1.3581 - accuracy: 0.5255 - val_loss: 1.6598 - val_accuracy: 0.4662\n", - "Epoch 8/100\n", - "1407/1407 [==============================] - 22s 16ms/step - loss: 1.3209 - accuracy: 0.5400 - val_loss: 1.5027 - val_accuracy: 0.5002\n", - "Epoch 9/100\n", - "1407/1407 [==============================] - 22s 16ms/step - loss: 1.2845 - accuracy: 0.5562 - val_loss: 1.5246 - val_accuracy: 0.4896\n", - "Epoch 10/100\n", - "1407/1407 [==============================] - 22s 16ms/step - loss: 1.2526 - accuracy: 0.5659 - val_loss: 1.5510 - val_accuracy: 0.4956\n", - "Epoch 11/100\n", - "1407/1407 [==============================] - 22s 16ms/step - loss: 1.2160 - accuracy: 0.5808 - val_loss: 1.5559 - val_accuracy: 0.5002\n", - "Epoch 12/100\n", - "1407/1407 [==============================] - 22s 16ms/step - loss: 1.1902 - accuracy: 0.5900 - val_loss: 1.5478 - val_accuracy: 0.4968\n", - "Epoch 13/100\n", - "1407/1407 [==============================] - 22s 16ms/step - loss: 1.1602 - accuracy: 0.6021 - val_loss: 1.5727 - val_accuracy: 0.5124\n", - "Epoch 14/100\n", - "1407/1407 [==============================] - 22s 16ms/step - loss: 1.1392 - accuracy: 0.6102 - val_loss: 1.5654 - val_accuracy: 0.4944\n", - "Epoch 15/100\n", - "1407/1407 [==============================] - 22s 16ms/step - loss: 1.1086 - accuracy: 0.6210 - val_loss: 1.5868 - val_accuracy: 0.5064\n", - "Epoch 16/100\n", - "1407/1407 [==============================] - 22s 16ms/step - loss: 1.0856 - accuracy: 0.6289 - val_loss: 1.6016 - val_accuracy: 0.5042\n", - "Epoch 17/100\n", - "1407/1407 [==============================] - 22s 16ms/step - loss: 1.0620 - accuracy: 0.6397 - val_loss: 1.6458 - val_accuracy: 0.4968\n", - "Epoch 18/100\n", - "1407/1407 [==============================] - 22s 16ms/step - loss: 1.0511 - accuracy: 0.6405 - val_loss: 1.6276 - val_accuracy: 0.5096\n", - "Epoch 19/100\n", - "1407/1407 [==============================] - 22s 16ms/step - loss: 1.0203 - accuracy: 0.6514 - val_loss: 1.7246 - val_accuracy: 0.5062\n", - "Epoch 20/100\n", - "1407/1407 [==============================] - 22s 16ms/step - loss: 1.0024 - accuracy: 0.6598 - val_loss: 1.6570 - val_accuracy: 0.5064\n", - "Epoch 21/100\n", - "1407/1407 [==============================] - 22s 16ms/step - loss: 0.9845 - accuracy: 0.6662 - val_loss: 1.6697 - val_accuracy: 0.4990\n", - "Epoch 22/100\n", - "1407/1407 [==============================] - 22s 16ms/step - loss: 0.9641 - accuracy: 0.6738 - val_loss: 1.7560 - val_accuracy: 0.5010\n", - "Epoch 23/100\n", - "1407/1407 [==============================] - 22s 16ms/step - loss: 0.9387 - accuracy: 0.6797 - val_loss: 1.7716 - val_accuracy: 0.5008\n", - "Epoch 24/100\n", - "1407/1407 [==============================] - 22s 16ms/step - loss: 0.9290 - accuracy: 0.6852 - val_loss: 1.7688 - val_accuracy: 0.5026\n", - "Epoch 25/100\n", - "1407/1407 [==============================] - 22s 16ms/step - loss: 0.9176 - accuracy: 0.6899 - val_loss: 1.8131 - val_accuracy: 0.5042\n", - "Epoch 26/100\n", - "1407/1407 [==============================] - 22s 16ms/step - loss: 0.8925 - accuracy: 0.6986 - val_loss: 1.8228 - val_accuracy: 0.4904\n", - "Epoch 27/100\n", - "1407/1407 [==============================] - 22s 16ms/step - loss: 0.8680 - accuracy: 0.7060 - val_loss: 1.8546 - val_accuracy: 0.5048\n", - "Epoch 28/100\n", - "1407/1407 [==============================] - 22s 16ms/step - loss: 0.8638 - accuracy: 0.7091 - val_loss: 1.8004 - val_accuracy: 0.4954\n", - "157/157 [==============================] - 1s 3ms/step - loss: 1.5027 - accuracy: 0.0914\n" - ] - }, - { - "data": { - "text/plain": [ - "[1.5026599168777466, 0.09139999747276306]" - ] - }, - "execution_count": 138, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "keras.backend.clear_session()\n", - "tf.random.set_seed(42)\n", - "np.random.seed(42)\n", - "\n", - "model = keras.models.Sequential()\n", - "model.add(keras.layers.Flatten(input_shape=[32, 32, 3]))\n", - "for _ in range(20):\n", - " model.add(keras.layers.Dense(100,\n", - " kernel_initializer=\"lecun_normal\",\n", - " activation=\"selu\"))\n", - "\n", - "model.add(keras.layers.AlphaDropout(rate=0.1))\n", - "model.add(keras.layers.Dense(10, activation=\"softmax\"))\n", - "\n", - "optimizer = keras.optimizers.Nadam(lr=5e-4)\n", - "model.compile(loss=\"sparse_categorical_crossentropy\",\n", - " optimizer=optimizer,\n", - " metrics=[\"accuracy\"])\n", - "\n", - "early_stopping_cb = keras.callbacks.EarlyStopping(patience=20)\n", - "model_checkpoint_cb = keras.callbacks.ModelCheckpoint(\"my_cifar10_alpha_dropout_model.h5\", save_best_only=True)\n", - "run_index = 1 # 모델을 훈련할 때마다 증가시킴\n", - "run_logdir = os.path.join(os.curdir, \"my_cifar10_logs\", \"run_alpha_dropout_{:03d}\".format(run_index))\n", - "tensorboard_cb = keras.callbacks.TensorBoard(run_logdir)\n", - "callbacks = [early_stopping_cb, model_checkpoint_cb, tensorboard_cb]\n", - "\n", - "X_means = X_train.mean(axis=0)\n", - "X_stds = X_train.std(axis=0)\n", - "X_train_scaled = (X_train - X_means) / X_stds\n", - "X_valid_scaled = (X_valid - X_means) / X_stds\n", - "X_test_scaled = (X_test - X_means) / X_stds\n", - "\n", - "model.fit(X_train_scaled, y_train, epochs=100,\n", - " validation_data=(X_valid_scaled, y_valid),\n", - " callbacks=callbacks)\n", - "\n", - "model = keras.models.load_model(\"my_cifar10_alpha_dropout_model.h5\")\n", - "model.evaluate(X_valid_scaled, y_valid)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "이 모델은 검증 세트에서 50.8% 정확도에 도달합니다. 드롭아웃이 없을 때보다(51.4%) 조금 더 나쁩니다. 하이퍼파라미터 탐색을 좀 많이 수행해 보면 더 나아 질 수 있습니다(드롭아웃 비율 5%, 10%, 20%, 40%과 학습률 1e-4, 3e-4, 5e-4, 1e-3을 시도했습니다). 하지만 이 경우에는 크지 않을 것 같습니다." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "이제 MC 드롭아웃을 사용해 보죠. 앞서 사용한 `MCAlphaDropout` 클래스를 복사해 사용하겠습니다:" - ] - }, - { - "cell_type": "code", - "execution_count": 139, - "metadata": {}, - "outputs": [], - "source": [ - "class MCAlphaDropout(keras.layers.AlphaDropout):\n", - " def call(self, inputs):\n", - " return super().call(inputs, training=True)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "방금 훈련했던 모델과 (같은 가중치를 가진) 동일한 새로운 모델을 만들어 보죠. 하지만 `AlphaDropout` 층 대신 `MCAlphaDropout` 드롭아웃 층을 사용합니다:" - ] - }, - { - "cell_type": "code", - "execution_count": 140, - "metadata": {}, - "outputs": [], - "source": [ - "mc_model = keras.models.Sequential([\n", - " MCAlphaDropout(layer.rate) if isinstance(layer, keras.layers.AlphaDropout) else layer\n", - " for layer in model.layers\n", - "])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "그다음 몇 가지 유틸리티 함수를 추가합니다. 첫 번째 함수는 모델을 여러 번 실행합니다(기본적으로 10번). 그다음 평균한 예측 클래스 확률을 반환합니다. 두 번째 함수는 이 평균 확률을 사용해 각 샘플의 클래스를 예측합니다:" - ] - }, - { - "cell_type": "code", - "execution_count": 141, - "metadata": {}, - "outputs": [], - "source": [ - "def mc_dropout_predict_probas(mc_model, X, n_samples=10):\n", - " Y_probas = [mc_model.predict(X) for sample in range(n_samples)]\n", - " return np.mean(Y_probas, axis=0)\n", - "\n", - "def mc_dropout_predict_classes(mc_model, X, n_samples=10):\n", - " Y_probas = mc_dropout_predict_probas(mc_model, X, n_samples)\n", - " return np.argmax(Y_probas, axis=1)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "이제 검증 세트의 모든 샘플에 대해 예측을 만들고 정확도를 계산해 보죠:" - ] - }, - { - "cell_type": "code", - "execution_count": 142, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0.497" - ] - }, - "execution_count": 142, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "keras.backend.clear_session()\n", - "tf.random.set_seed(42)\n", - "np.random.seed(42)\n", - "\n", - "y_pred = mc_dropout_predict_classes(mc_model, X_valid_scaled)\n", - "accuracy = np.mean(y_pred == y_valid[:, 0])\n", - "accuracy" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "이 경우에는 실제적인 정확도 향상이 없습니다(50.8%에서 50.9%).\n", - "\n", - "따라서 이 연습문에서 얻은 최상의 모델은 배치 정규화 모델입니다." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### f.\n", - "*문제: 1사이클 스케줄링으로 모델을 다시 훈련하고 훈련 속도와 모델 정확도가 향상되는지 확인해보세요.*" - ] - }, - { - "cell_type": "code", - "execution_count": 143, - "metadata": {}, - "outputs": [], - "source": [ - "keras.backend.clear_session()\n", - "tf.random.set_seed(42)\n", - "np.random.seed(42)\n", - "\n", - "model = keras.models.Sequential()\n", - "model.add(keras.layers.Flatten(input_shape=[32, 32, 3]))\n", - "for _ in range(20):\n", - " model.add(keras.layers.Dense(100,\n", - " kernel_initializer=\"lecun_normal\",\n", - " activation=\"selu\"))\n", - "\n", - "model.add(keras.layers.AlphaDropout(rate=0.1))\n", - "model.add(keras.layers.Dense(10, activation=\"softmax\"))\n", - "\n", - "optimizer = keras.optimizers.SGD(lr=1e-3)\n", - "model.compile(loss=\"sparse_categorical_crossentropy\",\n", - " optimizer=optimizer,\n", - " metrics=[\"accuracy\"])" - ] - }, - { - "cell_type": "code", - "execution_count": 144, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "352/352 [==============================] - 2s 7ms/step - loss: nan - accuracy: 0.1399\n" - ] - }, - { - "data": { - "text/plain": [ - "[1e-05, 9.999868, 2.6130447387695312, 4.006446089063372]" - ] - }, - "execution_count": 144, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], - "source": [ - "batch_size = 128\n", - "rates, losses = find_learning_rate(model, X_train_scaled, y_train, epochs=1, batch_size=batch_size)\n", - "plot_lr_vs_loss(rates, losses)\n", - "plt.axis([min(rates), max(rates), min(losses), (losses[0] + min(losses)) / 1.4])" - ] - }, - { - "cell_type": "code", - "execution_count": 145, - "metadata": {}, - "outputs": [], - "source": [ - "keras.backend.clear_session()\n", - "tf.random.set_seed(42)\n", - "np.random.seed(42)\n", - "\n", - "model = keras.models.Sequential()\n", - "model.add(keras.layers.Flatten(input_shape=[32, 32, 3]))\n", - "for _ in range(20):\n", - " model.add(keras.layers.Dense(100,\n", - " kernel_initializer=\"lecun_normal\",\n", - " activation=\"selu\"))\n", - "\n", - "model.add(keras.layers.AlphaDropout(rate=0.1))\n", - "model.add(keras.layers.Dense(10, activation=\"softmax\"))\n", - "\n", - "optimizer = keras.optimizers.SGD(lr=1e-2)\n", - "model.compile(loss=\"sparse_categorical_crossentropy\",\n", - " optimizer=optimizer,\n", - " metrics=[\"accuracy\"])" - ] - }, - { - "cell_type": "code", - "execution_count": 146, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch 1/15\n", - "352/352 [==============================] - 3s 9ms/step - loss: 2.0537 - accuracy: 0.2843 - val_loss: 1.7811 - val_accuracy: 0.3744\n", - "Epoch 2/15\n", - "352/352 [==============================] - 3s 7ms/step - loss: 1.7635 - accuracy: 0.3765 - val_loss: 1.6431 - val_accuracy: 0.4252\n", - "Epoch 3/15\n", - "352/352 [==============================] - 3s 7ms/step - loss: 1.6241 - accuracy: 0.4217 - val_loss: 1.6001 - val_accuracy: 0.4368\n", - "Epoch 4/15\n", - "352/352 [==============================] - 3s 7ms/step - loss: 1.5434 - accuracy: 0.4520 - val_loss: 1.6114 - val_accuracy: 0.4310\n", - "Epoch 5/15\n", - "352/352 [==============================] - 3s 7ms/step - loss: 1.4914 - accuracy: 0.4710 - val_loss: 1.5895 - val_accuracy: 0.4434\n", - "Epoch 6/15\n", - "352/352 [==============================] - 3s 7ms/step - loss: 1.4510 - accuracy: 0.4818 - val_loss: 1.5678 - val_accuracy: 0.4506\n", - "Epoch 7/15\n", - "352/352 [==============================] - 3s 7ms/step - loss: 1.4143 - accuracy: 0.4979 - val_loss: 1.6717 - val_accuracy: 0.4294\n", - "Epoch 8/15\n", - "352/352 [==============================] - 3s 7ms/step - loss: 1.3462 - accuracy: 0.5199 - val_loss: 1.4928 - val_accuracy: 0.4956\n", - "Epoch 9/15\n", - "352/352 [==============================] - 3s 7ms/step - loss: 1.2691 - accuracy: 0.5481 - val_loss: 1.5294 - val_accuracy: 0.4818\n", - "Epoch 10/15\n", - "352/352 [==============================] - 3s 7ms/step - loss: 1.1994 - accuracy: 0.5713 - val_loss: 1.5165 - val_accuracy: 0.4978\n", - "Epoch 11/15\n", - "352/352 [==============================] - 3s 7ms/step - loss: 1.1308 - accuracy: 0.5980 - val_loss: 1.5070 - val_accuracy: 0.5100\n", - "Epoch 12/15\n", - "352/352 [==============================] - 3s 7ms/step - loss: 1.0632 - accuracy: 0.6184 - val_loss: 1.4833 - val_accuracy: 0.5244\n", - "Epoch 13/15\n", - "352/352 [==============================] - 3s 7ms/step - loss: 0.9932 - accuracy: 0.6447 - val_loss: 1.5314 - val_accuracy: 0.5292\n", - "Epoch 14/15\n", - "352/352 [==============================] - 3s 7ms/step - loss: 0.9279 - accuracy: 0.6671 - val_loss: 1.5495 - val_accuracy: 0.5248\n", - "Epoch 15/15\n", - "352/352 [==============================] - 3s 7ms/step - loss: 0.8880 - accuracy: 0.6845 - val_loss: 1.5840 - val_accuracy: 0.5288\n" - ] + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "YT0En5VzLOo4" + }, + "source": [ + "**11장 – 심층 신경망 훈련하기**" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "fmWAAqyeLOo9" + }, + "source": [ + "_이 노트북은 11장에 있는 모든 샘플 코드와 연습문제 해답을 가지고 있습니다._" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "mYZM1mKhLOo9" + }, + "source": [ + "\n", + " \n", + "
\n", + " 구글 코랩에서 실행하기\n", + "
" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "4mgMAdZzLOo-" + }, + "source": [ + "# 설정" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "EnYYn3E9LOo-" + }, + "source": [ + "먼저 몇 개의 모듈을 임포트합니다. 맷플롯립 그래프를 인라인으로 출력하도록 만들고 그림을 저장하는 함수를 준비합니다. 또한 파이썬 버전이 3.5 이상인지 확인합니다(파이썬 2.x에서도 동작하지만 곧 지원이 중단되므로 파이썬 3을 사용하는 것이 좋습니다). 사이킷런 버전이 0.20 이상인지와 텐서플로 버전이 2.0 이상인지 확인합니다." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "M1cwJnMALOo-" + }, + "source": [ + "# 파이썬 ≥3.5 필수\n", + "import sys\n", + "assert sys.version_info >= (3, 5)\n", + "\n", + "# 사이킷런 ≥0.20 필수\n", + "import sklearn\n", + "assert sklearn.__version__ >= \"0.20\"\n", + "\n", + "# 텐서플로 ≥2.0 필수\n", + "import tensorflow as tf\n", + "from tensorflow import keras\n", + "assert tf.__version__ >= \"2.0\"\n", + "\n", + "%load_ext tensorboard\n", + "\n", + "# 공통 모듈 임포트\n", + "import numpy as np\n", + "import os\n", + "\n", + "# 노트북 실행 결과를 동일하게 유지하기 위해\n", + "np.random.seed(42)\n", + "\n", + "# 깔끔한 그래프 출력을 위해\n", + "%matplotlib inline\n", + "import matplotlib as mpl\n", + "import matplotlib.pyplot as plt\n", + "mpl.rc('axes', labelsize=14)\n", + "mpl.rc('xtick', labelsize=12)\n", + "mpl.rc('ytick', labelsize=12)\n", + "\n", + "# 그림을 저장할 위치\n", + "PROJECT_ROOT_DIR = \".\"\n", + "CHAPTER_ID = \"deep\"\n", + "IMAGES_PATH = os.path.join(PROJECT_ROOT_DIR, \"images\", CHAPTER_ID)\n", + "os.makedirs(IMAGES_PATH, exist_ok=True)\n", + "\n", + "def save_fig(fig_id, tight_layout=True, fig_extension=\"png\", resolution=300):\n", + " path = os.path.join(IMAGES_PATH, fig_id + \".\" + fig_extension)\n", + " print(\"그림 저장:\", fig_id)\n", + " if tight_layout:\n", + " plt.tight_layout()\n", + " plt.savefig(path, format=fig_extension, dpi=resolution)" + ], + "execution_count": 1, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Uts4Q0qDLOpA" + }, + "source": [ + "# 그레이디언트 소실과 폭주 문제" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "C27geIKhLOpA" + }, + "source": [ + "def logit(z):\n", + " return 1 / (1 + np.exp(-z))" + ], + "execution_count": 2, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "YnIN46vHLOpA", + "outputId": "58f5e1b1-9ff7-46fd-9779-c7eb28f29c1c", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 315 + } + }, + "source": [ + "z = np.linspace(-5, 5, 200)\n", + "\n", + "plt.plot([-5, 5], [0, 0], 'k-')\n", + "plt.plot([-5, 5], [1, 1], 'k--')\n", + "plt.plot([0, 0], [-0.2, 1.2], 'k-')\n", + "plt.plot([-5, 5], [-3/4, 7/4], 'g--')\n", + "plt.plot(z, logit(z), \"b-\", linewidth=2)\n", + "props = dict(facecolor='black', shrink=0.1)\n", + "plt.annotate('Saturating', xytext=(3.5, 0.7), xy=(5, 1), arrowprops=props, fontsize=14, ha=\"center\")\n", + "plt.annotate('Saturating', xytext=(-3.5, 0.3), xy=(-5, 0), arrowprops=props, fontsize=14, ha=\"center\")\n", + "plt.annotate('Linear', xytext=(2, 0.2), xy=(0, 0.5), arrowprops=props, fontsize=14, ha=\"center\")\n", + "plt.grid(True)\n", + "plt.title(\"Sigmoid activation function\", fontsize=14)\n", + "plt.axis([-5, 5, -0.2, 1.2])\n", + "\n", + "save_fig(\"sigmoid_saturation_plot\")\n", + "plt.show()" + ], + "execution_count": 3, + "outputs": [ + { + "output_type": "stream", + "text": [ + "그림 저장: sigmoid_saturation_plot\n" + ], + "name": "stdout" + }, + { + "output_type": "display_data", + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "tags": [], + "needs_background": "light" + } + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "z9my8UBcLOpB" + }, + "source": [ + "## Xavier 초기화와 He 초기화" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "P0IEVGXYLOpC", + "outputId": "d9bbf038-78f4-43bc-8b80-0e352849cea5", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "[name for name in dir(keras.initializers) if not name.startswith(\"_\")]" + ], + "execution_count": 4, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "['Constant',\n", + " 'GlorotNormal',\n", + " 'GlorotUniform',\n", + " 'HeNormal',\n", + " 'HeUniform',\n", + " 'Identity',\n", + " 'Initializer',\n", + " 'LecunNormal',\n", + " 'LecunUniform',\n", + " 'Ones',\n", + " 'Orthogonal',\n", + " 'RandomNormal',\n", + " 'RandomUniform',\n", + " 'TruncatedNormal',\n", + " 'VarianceScaling',\n", + " 'Zeros',\n", + " 'constant',\n", + " 'deserialize',\n", + " 'get',\n", + " 'glorot_normal',\n", + " 'glorot_uniform',\n", + " 'he_normal',\n", + " 'he_uniform',\n", + " 'identity',\n", + " 'lecun_normal',\n", + " 'lecun_uniform',\n", + " 'ones',\n", + " 'orthogonal',\n", + " 'random_normal',\n", + " 'random_uniform',\n", + " 'serialize',\n", + " 'truncated_normal',\n", + " 'variance_scaling',\n", + " 'zeros']" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 4 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "kBczZ42qLOpC", + "outputId": "72b19158-02f2-4689-b333-3cad5f5ec4bb", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "keras.layers.Dense(10, activation=\"relu\", kernel_initializer=\"he_normal\")" + ], + "execution_count": 5, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 5 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "iSo-rN5kLOpC", + "outputId": "7fb5008f-e89b-48b9-d6f9-5f7bb5f76f8d", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "init = keras.initializers.VarianceScaling(scale=2., mode='fan_avg',\n", + " distribution='uniform')\n", + "keras.layers.Dense(10, activation=\"relu\", kernel_initializer=init)" + ], + "execution_count": 6, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 6 + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ehWhAiMGLOpC" + }, + "source": [ + "## 수렴하지 않는 활성화 함수" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Xm1OvmTKLOpD" + }, + "source": [ + "### LeakyReLU" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "WPfFeM26LOpD" + }, + "source": [ + "def leaky_relu(z, alpha=0.01):\n", + " return np.maximum(alpha*z, z)" + ], + "execution_count": 7, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "mmZMEcRNLOpD", + "outputId": "f26e717a-01f4-436f-d839-57520655d516", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 315 + } + }, + "source": [ + "plt.plot(z, leaky_relu(z, 0.05), \"b-\", linewidth=2)\n", + "plt.plot([-5, 5], [0, 0], 'k-')\n", + "plt.plot([0, 0], [-0.5, 4.2], 'k-')\n", + "plt.grid(True)\n", + "props = dict(facecolor='black', shrink=0.1)\n", + "plt.annotate('Leak', xytext=(-3.5, 0.5), xy=(-5, -0.2), arrowprops=props, fontsize=14, ha=\"center\")\n", + "plt.title(\"Leaky ReLU activation function\", fontsize=14)\n", + "plt.axis([-5, 5, -0.5, 4.2])\n", + "\n", + "save_fig(\"leaky_relu_plot\")\n", + "plt.show()" + ], + "execution_count": 8, + "outputs": [ + { + "output_type": "stream", + "text": [ + "그림 저장: leaky_relu_plot\n" + ], + "name": "stdout" + }, + { + "output_type": "display_data", + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "tags": [], + "needs_background": "light" + } + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "c34zBGD7LOpD", + "outputId": "f4b860b4-3a2d-4a12-e458-de2efaa93051", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "[m for m in dir(keras.activations) if not m.startswith(\"_\")]" + ], + "execution_count": 9, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "['deserialize',\n", + " 'elu',\n", + " 'exponential',\n", + " 'gelu',\n", + " 'get',\n", + " 'hard_sigmoid',\n", + " 'linear',\n", + " 'relu',\n", + " 'selu',\n", + " 'serialize',\n", + " 'sigmoid',\n", + " 'softmax',\n", + " 'softplus',\n", + " 'softsign',\n", + " 'swish',\n", + " 'tanh']" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 9 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "eD78rel6LOpD", + "outputId": "777d81bf-2701-4898-96c1-e9f18627cf82", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "[m for m in dir(keras.layers) if \"relu\" in m.lower()]" + ], + "execution_count": 10, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "['LeakyReLU', 'PReLU', 'ReLU', 'ThresholdedReLU']" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 10 + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "zT5BPUCELOpE" + }, + "source": [ + "LeakyReLU를 사용해 패션 MNIST에서 신경망을 훈련해 보죠:" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "aW_rOsgrLOpE", + "outputId": "43769acc-8742-4108-fe80-83acde336789", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "(X_train_full, y_train_full), (X_test, y_test) = keras.datasets.fashion_mnist.load_data()\n", + "X_train_full = X_train_full / 255.0\n", + "X_test = X_test / 255.0\n", + "X_valid, X_train = X_train_full[:5000], X_train_full[5000:]\n", + "y_valid, y_train = y_train_full[:5000], y_train_full[5000:]" + ], + "execution_count": 11, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-labels-idx1-ubyte.gz\n", + "32768/29515 [=================================] - 0s 0us/step\n", + "Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-images-idx3-ubyte.gz\n", + "26427392/26421880 [==============================] - 0s 0us/step\n", + "Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-labels-idx1-ubyte.gz\n", + "8192/5148 [===============================================] - 0s 0us/step\n", + "Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-images-idx3-ubyte.gz\n", + "4423680/4422102 [==============================] - 0s 0us/step\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "zvSaZ4qGLOpE" + }, + "source": [ + "tf.random.set_seed(42)\n", + "np.random.seed(42)\n", + "\n", + "model = keras.models.Sequential([\n", + " keras.layers.Flatten(input_shape=[28, 28]),\n", + " keras.layers.Dense(300, kernel_initializer=\"he_normal\"),\n", + " keras.layers.LeakyReLU(),\n", + " keras.layers.Dense(100, kernel_initializer=\"he_normal\"),\n", + " keras.layers.LeakyReLU(),\n", + " keras.layers.Dense(10, activation=\"softmax\")\n", + "])" + ], + "execution_count": 12, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "8iTzAFZ5LOpE" + }, + "source": [ + "model.compile(loss=\"sparse_categorical_crossentropy\",\n", + " optimizer=keras.optimizers.SGD(lr=1e-3),\n", + " metrics=[\"accuracy\"])" + ], + "execution_count": 13, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "scrolled": true, + "id": "JqVTPhfHLOpE", + "outputId": "9efa9999-4ff7-44b9-94b0-9e5bbd8c1a61", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "history = model.fit(X_train, y_train, epochs=10,\n", + " validation_data=(X_valid, y_valid))" + ], + "execution_count": 14, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Epoch 1/10\n", + "1719/1719 [==============================] - 5s 2ms/step - loss: 1.6314 - accuracy: 0.5054 - val_loss: 0.8886 - val_accuracy: 0.7160\n", + "Epoch 2/10\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.8416 - accuracy: 0.7247 - val_loss: 0.7130 - val_accuracy: 0.7656\n", + "Epoch 3/10\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.7053 - accuracy: 0.7637 - val_loss: 0.6427 - val_accuracy: 0.7898\n", + "Epoch 4/10\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.6325 - accuracy: 0.7908 - val_loss: 0.5900 - val_accuracy: 0.8066\n", + "Epoch 5/10\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.5992 - accuracy: 0.8021 - val_loss: 0.5582 - val_accuracy: 0.8198\n", + "Epoch 6/10\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.5624 - accuracy: 0.8142 - val_loss: 0.5350 - val_accuracy: 0.8238\n", + "Epoch 7/10\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.5379 - accuracy: 0.8217 - val_loss: 0.5156 - val_accuracy: 0.8304\n", + "Epoch 8/10\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.5152 - accuracy: 0.8296 - val_loss: 0.5079 - val_accuracy: 0.8284\n", + "Epoch 9/10\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.5100 - accuracy: 0.8270 - val_loss: 0.4895 - val_accuracy: 0.8388\n", + "Epoch 10/10\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.4918 - accuracy: 0.8339 - val_loss: 0.4817 - val_accuracy: 0.8398\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "O4AS1dmTLOpE" + }, + "source": [ + "PReLU를 테스트해 보죠:" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "eLqcJVxfLOpF" + }, + "source": [ + "tf.random.set_seed(42)\n", + "np.random.seed(42)\n", + "\n", + "model = keras.models.Sequential([\n", + " keras.layers.Flatten(input_shape=[28, 28]),\n", + " keras.layers.Dense(300, kernel_initializer=\"he_normal\"),\n", + " keras.layers.PReLU(),\n", + " keras.layers.Dense(100, kernel_initializer=\"he_normal\"),\n", + " keras.layers.PReLU(),\n", + " keras.layers.Dense(10, activation=\"softmax\")\n", + "])" + ], + "execution_count": 15, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "BMKER0vWLOpF" + }, + "source": [ + "model.compile(loss=\"sparse_categorical_crossentropy\",\n", + " optimizer=keras.optimizers.SGD(lr=1e-3),\n", + " metrics=[\"accuracy\"])" + ], + "execution_count": 16, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "eb_t9z8gLOpF", + "outputId": "20f6177f-04fa-4de4-905d-ab7e94014b71", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "history = model.fit(X_train, y_train, epochs=10,\n", + " validation_data=(X_valid, y_valid))" + ], + "execution_count": 17, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Epoch 1/10\n", + "1719/1719 [==============================] - 4s 2ms/step - loss: 1.6969 - accuracy: 0.4974 - val_loss: 0.9255 - val_accuracy: 0.7186\n", + "Epoch 2/10\n", + "1719/1719 [==============================] - 4s 2ms/step - loss: 0.8706 - accuracy: 0.7246 - val_loss: 0.7305 - val_accuracy: 0.7630\n", + "Epoch 3/10\n", + "1719/1719 [==============================] - 4s 2ms/step - loss: 0.7211 - accuracy: 0.7621 - val_loss: 0.6565 - val_accuracy: 0.7882\n", + "Epoch 4/10\n", + "1719/1719 [==============================] - 4s 2ms/step - loss: 0.6448 - accuracy: 0.7880 - val_loss: 0.6003 - val_accuracy: 0.8046\n", + "Epoch 5/10\n", + "1719/1719 [==============================] - 4s 2ms/step - loss: 0.6078 - accuracy: 0.8003 - val_loss: 0.5656 - val_accuracy: 0.8184\n", + "Epoch 6/10\n", + "1719/1719 [==============================] - 4s 2ms/step - loss: 0.5693 - accuracy: 0.8119 - val_loss: 0.5406 - val_accuracy: 0.8238\n", + "Epoch 7/10\n", + "1719/1719 [==============================] - 4s 2ms/step - loss: 0.5428 - accuracy: 0.8193 - val_loss: 0.5196 - val_accuracy: 0.8314\n", + "Epoch 8/10\n", + "1719/1719 [==============================] - 4s 2ms/step - loss: 0.5193 - accuracy: 0.8283 - val_loss: 0.5113 - val_accuracy: 0.8318\n", + "Epoch 9/10\n", + "1719/1719 [==============================] - 4s 2ms/step - loss: 0.5129 - accuracy: 0.8273 - val_loss: 0.4917 - val_accuracy: 0.8380\n", + "Epoch 10/10\n", + "1719/1719 [==============================] - 4s 2ms/step - loss: 0.4941 - accuracy: 0.8313 - val_loss: 0.4826 - val_accuracy: 0.8396\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Vkr7NgOmLOpF" + }, + "source": [ + "### ELU" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "6znM2r0RLOpF" + }, + "source": [ + "def elu(z, alpha=1):\n", + " return np.where(z < 0, alpha * (np.exp(z) - 1), z)" + ], + "execution_count": 18, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "WpZo3sG7LOpF", + "outputId": "8f1f0b6e-6585-4c53-d651-159fe61b98e4", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 315 + } + }, + "source": [ + "plt.plot(z, elu(z), \"b-\", linewidth=2)\n", + "plt.plot([-5, 5], [0, 0], 'k-')\n", + "plt.plot([-5, 5], [-1, -1], 'k--')\n", + "plt.plot([0, 0], [-2.2, 3.2], 'k-')\n", + "plt.grid(True)\n", + "plt.title(r\"ELU activation function ($\\alpha=1$)\", fontsize=14)\n", + "plt.axis([-5, 5, -2.2, 3.2])\n", + "\n", + "save_fig(\"elu_plot\")\n", + "plt.show()" + ], + "execution_count": 19, + "outputs": [ + { + "output_type": "stream", + "text": [ + "그림 저장: elu_plot\n" + ], + "name": "stdout" + }, + { + "output_type": "display_data", + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "tags": [], + "needs_background": "light" + } + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "CFEUCydjLOpF" + }, + "source": [ + "텐서플로에서 쉽게 ELU를 적용할 수 있습니다. 층을 만들 때 활성화 함수로 지정하면 됩니다:" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "zFuLenUVLOpG", + "outputId": "e03fe963-ee36-47a5-faf8-243a0968f03e", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "keras.layers.Dense(10, activation=\"elu\")" + ], + "execution_count": 20, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 20 + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "mkd4JNYELOpG" + }, + "source": [ + "### SELU" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "mzoAhWWhLOpG" + }, + "source": [ + "Günter Klambauer, Thomas Unterthiner, Andreas Mayr는 2017년 한 [훌륭한 논문](https://arxiv.org/pdf/1706.02515.pdf)에서 SELU 활성화 함수를 소개했습니다. 훈련하는 동안 완전 연결 층만 쌓아서 신경망을 만들고 SELU 활성화 함수와 LeCun 초기화를 사용한다면 자기 정규화됩니다. 각 층의 출력이 평균과\n", + "표준편차를 보존하는 경향이 있습니다. 이는 그레이디언트 소실과 폭주 문제를 막아줍니다. 그 결과로 SELU 활성화 함수는 이런 종류의 네트워크(특히 아주 깊은 네트워크)에서 다른 활성화 함수보다 뛰어난 성능을 종종 냅니다. 따라서 꼭 시도해 봐야 합니다. 하지만 SELU 활성화 함수의 자기 정규화 특징은 쉽게 깨집니다. ℓ1나 ℓ2 정규화, 드롭아웃, 맥스 노름, 스킵 연결이나 시퀀셜하지 않은 다른 토폴로지를 사용할 수 없습니다(즉 순환 신경망은 자기 정규화되지 않습니다). 하지만 실전에서 시퀀셜 CNN과 잘 동작합니다. 자기 정규화가 깨지면 SELU가 다른 활성화 함수보다 더 나은 성능을 내지 않을 것입니다." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "NYXUfjHCLOpG" + }, + "source": [ + "from scipy.special import erfc\n", + "\n", + "# alpha와 scale은 평균 0과 표준 편차 1로 자기 정규화합니다\n", + "# (논문에 있는 식 14 참조):\n", + "alpha_0_1 = -np.sqrt(2 / np.pi) / (erfc(1/np.sqrt(2)) * np.exp(1/2) - 1)\n", + "scale_0_1 = (1 - erfc(1 / np.sqrt(2)) * np.sqrt(np.e)) * np.sqrt(2 * np.pi) * (2 * erfc(np.sqrt(2))*np.e**2 + np.pi*erfc(1/np.sqrt(2))**2*np.e - 2*(2+np.pi)*erfc(1/np.sqrt(2))*np.sqrt(np.e)+np.pi+2)**(-1/2)" + ], + "execution_count": 21, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "fyup_FSkLOpG" + }, + "source": [ + "def selu(z, scale=scale_0_1, alpha=alpha_0_1):\n", + " return scale * elu(z, alpha)" + ], + "execution_count": 22, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "cNpfVGctLOpH", + "outputId": "d29c5855-b472-4dac-d118-465af3c5b7ae", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 315 + } + }, + "source": [ + "plt.plot(z, selu(z), \"b-\", linewidth=2)\n", + "plt.plot([-5, 5], [0, 0], 'k-')\n", + "plt.plot([-5, 5], [-1.758, -1.758], 'k--')\n", + "plt.plot([0, 0], [-2.2, 3.2], 'k-')\n", + "plt.grid(True)\n", + "plt.title(\"SELU activation function\", fontsize=14)\n", + "plt.axis([-5, 5, -2.2, 3.2])\n", + "\n", + "save_fig(\"selu_plot\")\n", + "plt.show()" + ], + "execution_count": 23, + "outputs": [ + { + "output_type": "stream", + "text": [ + "그림 저장: selu_plot\n" + ], + "name": "stdout" + }, + { + "output_type": "display_data", + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "tags": [], + "needs_background": "light" + } + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ZFaak_jnLOpH" + }, + "source": [ + "기본적으로 SELU 하이퍼파라미터(`scale`과 `alpha`)는 각 뉴런의 평균 출력이 0에 가깝고 표준 편차는 1에 가깝도록 조정됩니다(입력은 평균이 0이고 표준 편차 1로 표준화되었다고 가정합니다). 이 활성화 함수를 사용하면 1,000개의 층이 있는 심층 신경망도 모든 층에 걸쳐 거의 평균이 0이고 표준 편차를 1로 유지합니다. 이를 통해 그레이디언트 폭주와 소실 문제를 피할 수 있습니다:" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "qWawJStoLOpH", + "outputId": "38cfdca8-2c4e-4f69-ce42-366f34c49ee6", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "np.random.seed(42)\n", + "Z = np.random.normal(size=(500, 100)) # 표준화된 입력\n", + "for layer in range(1000):\n", + " W = np.random.normal(size=(100, 100), scale=np.sqrt(1 / 100)) # LeCun 초기화\n", + " Z = selu(np.dot(Z, W))\n", + " means = np.mean(Z, axis=0).mean()\n", + " stds = np.std(Z, axis=0).mean()\n", + " if layer % 100 == 0:\n", + " print(\"Layer {}: mean {:.2f}, std deviation {:.2f}\".format(layer, means, stds))" + ], + "execution_count": 24, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Layer 0: mean -0.00, std deviation 1.00\n", + "Layer 100: mean 0.02, std deviation 0.96\n", + "Layer 200: mean 0.01, std deviation 0.90\n", + "Layer 300: mean -0.02, std deviation 0.92\n", + "Layer 400: mean 0.05, std deviation 0.89\n", + "Layer 500: mean 0.01, std deviation 0.93\n", + "Layer 600: mean 0.02, std deviation 0.92\n", + "Layer 700: mean -0.02, std deviation 0.90\n", + "Layer 800: mean 0.05, std deviation 0.83\n", + "Layer 900: mean 0.02, std deviation 1.00\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "j5dAdmg4LOpI" + }, + "source": [ + "쉽게 SELU를 사용할 수 있습니다:" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "qFGwIMj2LOpI", + "outputId": "f9d69899-51cc-4c31-ecdf-429787846e20", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "keras.layers.Dense(10, activation=\"selu\",\n", + " kernel_initializer=\"lecun_normal\")" + ], + "execution_count": 25, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 25 + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "1D-HvWzgLOpI" + }, + "source": [ + "100개의 은닉층과 SELU 활성화 함수를 사용한 패션 MNIST를 위한 신경망을 만들어 보죠:" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "YS-krPA1LOpI" + }, + "source": [ + "np.random.seed(42)\n", + "tf.random.set_seed(42)" + ], + "execution_count": 26, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "fXLkoFMELOpI" + }, + "source": [ + "model = keras.models.Sequential()\n", + "model.add(keras.layers.Flatten(input_shape=[28, 28]))\n", + "model.add(keras.layers.Dense(300, activation=\"selu\",\n", + " kernel_initializer=\"lecun_normal\"))\n", + "for layer in range(99):\n", + " model.add(keras.layers.Dense(100, activation=\"selu\",\n", + " kernel_initializer=\"lecun_normal\"))\n", + "model.add(keras.layers.Dense(10, activation=\"softmax\"))" + ], + "execution_count": 27, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "hxrqewr6LOpI" + }, + "source": [ + "model.compile(loss=\"sparse_categorical_crossentropy\",\n", + " optimizer=keras.optimizers.SGD(lr=1e-3),\n", + " metrics=[\"accuracy\"])" + ], + "execution_count": 28, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "s2oGm-VSLOpJ" + }, + "source": [ + "이제 훈련해 보죠. 입력을 평균 0과 표준 편차 1로 바꾸어야 한다는 것을 잊지 마세요:" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "FBS7ewjeLOpJ" + }, + "source": [ + "pixel_means = X_train.mean(axis=0, keepdims=True)\n", + "pixel_stds = X_train.std(axis=0, keepdims=True)\n", + "X_train_scaled = (X_train - pixel_means) / pixel_stds\n", + "X_valid_scaled = (X_valid - pixel_means) / pixel_stds\n", + "X_test_scaled = (X_test - pixel_means) / pixel_stds" + ], + "execution_count": 29, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "DVONLiENLOpJ", + "outputId": "fa5c2881-0f6e-41e1-dbb4-b628f3e84538", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "history = model.fit(X_train_scaled, y_train, epochs=5,\n", + " validation_data=(X_valid_scaled, y_valid))" + ], + "execution_count": 30, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Epoch 1/5\n", + "1719/1719 [==============================] - 17s 8ms/step - loss: 1.5863 - accuracy: 0.3956 - val_loss: 0.9179 - val_accuracy: 0.6592\n", + "Epoch 2/5\n", + "1719/1719 [==============================] - 14s 8ms/step - loss: 0.8182 - accuracy: 0.6961 - val_loss: 0.6438 - val_accuracy: 0.7696\n", + "Epoch 3/5\n", + "1719/1719 [==============================] - 14s 8ms/step - loss: 0.6735 - accuracy: 0.7521 - val_loss: 0.6299 - val_accuracy: 0.7524\n", + "Epoch 4/5\n", + "1719/1719 [==============================] - 14s 8ms/step - loss: 0.5877 - accuracy: 0.7837 - val_loss: 0.5827 - val_accuracy: 0.7988\n", + "Epoch 5/5\n", + "1719/1719 [==============================] - 14s 8ms/step - loss: 0.5487 - accuracy: 0.8016 - val_loss: 0.5130 - val_accuracy: 0.8224\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "5k-Ud-OSLOpJ" + }, + "source": [ + "대신 ReLU 활성화 함수를 사용하면 어떤 일이 일어나는지 확인해 보죠:" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "EFFeHw5pLOpK" + }, + "source": [ + "np.random.seed(42)\n", + "tf.random.set_seed(42)" + ], + "execution_count": 31, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "EIUDPcNJLOpK" + }, + "source": [ + "model = keras.models.Sequential()\n", + "model.add(keras.layers.Flatten(input_shape=[28, 28]))\n", + "model.add(keras.layers.Dense(300, activation=\"relu\", kernel_initializer=\"he_normal\"))\n", + "for layer in range(99):\n", + " model.add(keras.layers.Dense(100, activation=\"relu\", kernel_initializer=\"he_normal\"))\n", + "model.add(keras.layers.Dense(10, activation=\"softmax\"))" + ], + "execution_count": 32, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "xZxJ2w38LOpK" + }, + "source": [ + "model.compile(loss=\"sparse_categorical_crossentropy\",\n", + " optimizer=keras.optimizers.SGD(lr=1e-3),\n", + " metrics=[\"accuracy\"])" + ], + "execution_count": 33, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "R94X2ls5LOpL", + "outputId": "3e1ad881-0171-4bd1-9ff5-7d969926d285", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "history = model.fit(X_train_scaled, y_train, epochs=5,\n", + " validation_data=(X_valid_scaled, y_valid))" + ], + "execution_count": 34, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Epoch 1/5\n", + "1719/1719 [==============================] - 17s 8ms/step - loss: 2.0576 - accuracy: 0.2020 - val_loss: 1.3588 - val_accuracy: 0.3958\n", + "Epoch 2/5\n", + "1719/1719 [==============================] - 14s 8ms/step - loss: 1.2621 - accuracy: 0.4541 - val_loss: 0.9904 - val_accuracy: 0.5662\n", + "Epoch 3/5\n", + "1719/1719 [==============================] - 14s 8ms/step - loss: 0.9950 - accuracy: 0.5893 - val_loss: 0.8148 - val_accuracy: 0.6720\n", + "Epoch 4/5\n", + "1719/1719 [==============================] - 14s 8ms/step - loss: 0.8142 - accuracy: 0.6783 - val_loss: 0.7579 - val_accuracy: 0.7082\n", + "Epoch 5/5\n", + "1719/1719 [==============================] - 14s 8ms/step - loss: 0.8113 - accuracy: 0.6770 - val_loss: 0.8736 - val_accuracy: 0.6720\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pBPfwJ4WLOpL" + }, + "source": [ + "좋지 않군요. 그레이디언트 폭주나 소실 문제가 발생한 것입니다." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "PVu2Q063LOpL" + }, + "source": [ + "# 배치 정규화" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "RLBH8PjPLOpL" + }, + "source": [ + "model = keras.models.Sequential([\n", + " keras.layers.Flatten(input_shape=[28, 28]),\n", + " keras.layers.BatchNormalization(),\n", + " keras.layers.Dense(300, activation=\"relu\"),\n", + " keras.layers.BatchNormalization(),\n", + " keras.layers.Dense(100, activation=\"relu\"),\n", + " keras.layers.BatchNormalization(),\n", + " keras.layers.Dense(10, activation=\"softmax\")\n", + "])" + ], + "execution_count": 35, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "d5L_RucvLOpL", + "outputId": "b4b0c837-c48a-4ac0-da2d-a946764b8d58", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "model.summary()" + ], + "execution_count": 36, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Model: \"sequential_4\"\n", + "_________________________________________________________________\n", + "Layer (type) Output Shape Param # \n", + "=================================================================\n", + "flatten_4 (Flatten) (None, 784) 0 \n", + "_________________________________________________________________\n", + "batch_normalization (BatchNo (None, 784) 3136 \n", + "_________________________________________________________________\n", + "dense_212 (Dense) (None, 300) 235500 \n", + "_________________________________________________________________\n", + "batch_normalization_1 (Batch (None, 300) 1200 \n", + "_________________________________________________________________\n", + "dense_213 (Dense) (None, 100) 30100 \n", + "_________________________________________________________________\n", + "batch_normalization_2 (Batch (None, 100) 400 \n", + "_________________________________________________________________\n", + "dense_214 (Dense) (None, 10) 1010 \n", + "=================================================================\n", + "Total params: 271,346\n", + "Trainable params: 268,978\n", + "Non-trainable params: 2,368\n", + "_________________________________________________________________\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "S4YmgIVtLOpL", + "outputId": "1830a1e3-5a00-4dab-b6c1-df4a11b7c0b4", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "bn1 = model.layers[1]\n", + "[(var.name, var.trainable) for var in bn1.variables]" + ], + "execution_count": 37, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[('batch_normalization/gamma:0', True),\n", + " ('batch_normalization/beta:0', True),\n", + " ('batch_normalization/moving_mean:0', False),\n", + " ('batch_normalization/moving_variance:0', False)]" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 37 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "ya2gRrxPLOpM", + "outputId": "90f2b7c4-9fd7-4aa1-b182-9508cc95ddd1", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "bn1.updates" + ], + "execution_count": 38, + "outputs": [ + { + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/base_layer.py:1402: UserWarning: `layer.updates` will be removed in a future version. This property should not be used in TensorFlow 2.0, as `updates` are applied automatically.\n", + " warnings.warn('`layer.updates` will be removed in a future version. '\n" + ], + "name": "stderr" + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[]" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 38 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "TqJdxp-vLOpN" + }, + "source": [ + "model.compile(loss=\"sparse_categorical_crossentropy\",\n", + " optimizer=keras.optimizers.SGD(lr=1e-3),\n", + " metrics=[\"accuracy\"])" + ], + "execution_count": 39, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "yNutyZRELOpN", + "outputId": "ef471abc-7bc4-44be-cc40-5068a8b7b2c9", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "history = model.fit(X_train, y_train, epochs=10,\n", + " validation_data=(X_valid, y_valid))" + ], + "execution_count": 40, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Epoch 1/10\n", + "1719/1719 [==============================] - 6s 3ms/step - loss: 1.2287 - accuracy: 0.5994 - val_loss: 0.5525 - val_accuracy: 0.8230\n", + "Epoch 2/10\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.5995 - accuracy: 0.7958 - val_loss: 0.4725 - val_accuracy: 0.8470\n", + "Epoch 3/10\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.5312 - accuracy: 0.8172 - val_loss: 0.4375 - val_accuracy: 0.8550\n", + "Epoch 4/10\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.4885 - accuracy: 0.8294 - val_loss: 0.4152 - val_accuracy: 0.8604\n", + "Epoch 5/10\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.4718 - accuracy: 0.8349 - val_loss: 0.3997 - val_accuracy: 0.8644\n", + "Epoch 6/10\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.4420 - accuracy: 0.8461 - val_loss: 0.3867 - val_accuracy: 0.8690\n", + "Epoch 7/10\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.4286 - accuracy: 0.8495 - val_loss: 0.3763 - val_accuracy: 0.8702\n", + "Epoch 8/10\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.4087 - accuracy: 0.8551 - val_loss: 0.3713 - val_accuracy: 0.8740\n", + "Epoch 9/10\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.4080 - accuracy: 0.8564 - val_loss: 0.3631 - val_accuracy: 0.8750\n", + "Epoch 10/10\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.3903 - accuracy: 0.8616 - val_loss: 0.3571 - val_accuracy: 0.8754\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "IeYDw1BwLOpN" + }, + "source": [ + "이따금 활성화 함수전에 BN을 적용해도 잘 동작합니다(여기에는 논란의 여지가 있습니다). 또한 `BatchNormalization` 층 이전의 층은 편향을 위한 항이 필요 없습니다. `BatchNormalization` 층이 이를 무효화하기 때문입니다. 따라서 필요 없는 파라미터이므로 `use_bias=False`를 지정하여 층을 만들 수 있습니다:" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "-QU5Mn0fLOpN" + }, + "source": [ + "model = keras.models.Sequential([\n", + " keras.layers.Flatten(input_shape=[28, 28]),\n", + " keras.layers.BatchNormalization(),\n", + " keras.layers.Dense(300, use_bias=False),\n", + " keras.layers.BatchNormalization(),\n", + " keras.layers.Activation(\"relu\"),\n", + " keras.layers.Dense(100, use_bias=False),\n", + " keras.layers.BatchNormalization(),\n", + " keras.layers.Activation(\"relu\"),\n", + " keras.layers.Dense(10, activation=\"softmax\")\n", + "])" + ], + "execution_count": 41, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "1Y-7t8DsLOpN" + }, + "source": [ + "model.compile(loss=\"sparse_categorical_crossentropy\",\n", + " optimizer=keras.optimizers.SGD(lr=1e-3),\n", + " metrics=[\"accuracy\"])" + ], + "execution_count": 42, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "fhatQCD6LOpN", + "outputId": "804ad977-910d-4ed8-a241-8e914ed0ac0c", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "history = model.fit(X_train, y_train, epochs=10,\n", + " validation_data=(X_valid, y_valid))" + ], + "execution_count": 43, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Epoch 1/10\n", + "1719/1719 [==============================] - 6s 3ms/step - loss: 1.3677 - accuracy: 0.5605 - val_loss: 0.6767 - val_accuracy: 0.7812\n", + "Epoch 2/10\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.7136 - accuracy: 0.7702 - val_loss: 0.5566 - val_accuracy: 0.8182\n", + "Epoch 3/10\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.6123 - accuracy: 0.7990 - val_loss: 0.5007 - val_accuracy: 0.8362\n", + "Epoch 4/10\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.5547 - accuracy: 0.8148 - val_loss: 0.4666 - val_accuracy: 0.8448\n", + "Epoch 5/10\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.5254 - accuracy: 0.8232 - val_loss: 0.4433 - val_accuracy: 0.8532\n", + "Epoch 6/10\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.4947 - accuracy: 0.8326 - val_loss: 0.4262 - val_accuracy: 0.8550\n", + "Epoch 7/10\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.4736 - accuracy: 0.8388 - val_loss: 0.4130 - val_accuracy: 0.8564\n", + "Epoch 8/10\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.4550 - accuracy: 0.8443 - val_loss: 0.4034 - val_accuracy: 0.8612\n", + "Epoch 9/10\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.4495 - accuracy: 0.8439 - val_loss: 0.3943 - val_accuracy: 0.8638\n", + "Epoch 10/10\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.4333 - accuracy: 0.8495 - val_loss: 0.3874 - val_accuracy: 0.8660\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "CWhcWzXCLOpO" + }, + "source": [ + "## 그레이디언트 클리핑" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "L73OupHpLOpO" + }, + "source": [ + "모든 케라스 옵티마이저는 `clipnorm`이나 `clipvalue` 매개변수를 지원합니다:" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "PN-aVXL-LOpO" + }, + "source": [ + "optimizer = keras.optimizers.SGD(clipvalue=1.0)" + ], + "execution_count": 44, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "g9Wj01r7LOpO" + }, + "source": [ + "optimizer = keras.optimizers.SGD(clipnorm=1.0)" + ], + "execution_count": 45, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "rxEJ6pNyLOpO" + }, + "source": [ + "## 사전 훈련된 층 재사용하기" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "u-g_PGcmLOpO" + }, + "source": [ + "### 케라스 모델 재사용하기" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "rdiVjfkOLOpO" + }, + "source": [ + "패션 MNIST 훈련 세트를 두 개로 나누어 보죠:\n", + "* `X_train_A`: 샌달과 셔츠(클래스 5와 6)을 제외한 모든 이미지\n", + "* `X_train_B`: 샌달과 셔츠 이미지 중 처음 200개만 가진 작은 훈련 세트\n", + "\n", + "검증 세트와 테스트 세트도 이렇게 나눕니다. 하지만 이미지 개수는 제한하지 않습니다.\n", + "\n", + "A 세트(8개의 클래스를 가진 분류 문제)에서 모델을 훈련하고 이를 재사용하여 B 세트(이진 분류)를 해결해 보겠습니다. A 작업에서 B 작업으로 약간의 지식이 전달되기를 기대합니다. 왜냐하면 A 세트의 클래스(스니커즈, 앵클 부츠, 코트, 티셔츠 등)가 B 세트에 있는 클래스(샌달과 셔츠)와 조금 비슷하기 때문입니다. 하지만 `Dense` 층을 사용하기 때문에 동일한 위치에 나타난 패턴만 재사용할 수 있습니다(반대로 합성곱 층은 훨씬 많은 정보를 전송합니다. 학습한 패턴을 이미지의 어느 위치에서나 감지할 수 있기 때문입니다. CNN 장에서 자세히 알아 보겠습니다)." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "QKHsibRsLOpO" + }, + "source": [ + "def split_dataset(X, y):\n", + " y_5_or_6 = (y == 5) | (y == 6) # sandals or shirts\n", + " y_A = y[~y_5_or_6]\n", + " y_A[y_A > 6] -= 2 # class indices 7, 8, 9 should be moved to 5, 6, 7\n", + " y_B = (y[y_5_or_6] == 6).astype(np.float32) # binary classification task: is it a shirt (class 6)?\n", + " return ((X[~y_5_or_6], y_A),\n", + " (X[y_5_or_6], y_B))\n", + "\n", + "(X_train_A, y_train_A), (X_train_B, y_train_B) = split_dataset(X_train, y_train)\n", + "(X_valid_A, y_valid_A), (X_valid_B, y_valid_B) = split_dataset(X_valid, y_valid)\n", + "(X_test_A, y_test_A), (X_test_B, y_test_B) = split_dataset(X_test, y_test)\n", + "X_train_B = X_train_B[:200]\n", + "y_train_B = y_train_B[:200]" + ], + "execution_count": 46, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "2s04GUdELOpO", + "outputId": "e7e01e53-fa88-485a-b086-c2269d7e2c61", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "X_train_A.shape" + ], + "execution_count": 47, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "(43986, 28, 28)" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 47 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "kcEEehIGLOpP", + "outputId": "65d1002c-3fba-49fb-cd77-bea76dddc1c6", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "X_train_B.shape" + ], + "execution_count": 48, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "(200, 28, 28)" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 48 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "smwubTT0LOpP", + "outputId": "c801b7f2-5397-483e-e840-d3141878d4ae", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "y_train_A[:30]" + ], + "execution_count": 49, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "array([4, 0, 5, 7, 7, 7, 4, 4, 3, 4, 0, 1, 6, 3, 4, 3, 2, 6, 5, 3, 4, 5,\n", + " 1, 3, 4, 2, 0, 6, 7, 1], dtype=uint8)" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 49 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "hNJqisASLOpP", + "outputId": "9551ff38-fe97-4a93-94d2-f84b19ab4efb", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "y_train_B[:30]" + ], + "execution_count": 50, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "array([1., 1., 0., 0., 0., 0., 1., 1., 1., 0., 0., 1., 1., 0., 0., 0., 0.,\n", + " 0., 0., 1., 1., 0., 0., 1., 1., 0., 1., 1., 1., 1.], dtype=float32)" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 50 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "2qkd71BzLOpP" + }, + "source": [ + "tf.random.set_seed(42)\n", + "np.random.seed(42)" + ], + "execution_count": 51, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "_dou1YzGLOpP" + }, + "source": [ + "model_A = keras.models.Sequential()\n", + "model_A.add(keras.layers.Flatten(input_shape=[28, 28]))\n", + "for n_hidden in (300, 100, 50, 50, 50):\n", + " model_A.add(keras.layers.Dense(n_hidden, activation=\"selu\"))\n", + "model_A.add(keras.layers.Dense(8, activation=\"softmax\"))" + ], + "execution_count": 52, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "C6TIeUe-LOpQ" + }, + "source": [ + "model_A.compile(loss=\"sparse_categorical_crossentropy\",\n", + " optimizer=keras.optimizers.SGD(lr=1e-3),\n", + " metrics=[\"accuracy\"])" + ], + "execution_count": 53, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "4xR6ixj8LOpQ", + "outputId": "ec8f3003-4b1d-4c68-f8fc-b154cc26a402", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "history = model_A.fit(X_train_A, y_train_A, epochs=20,\n", + " validation_data=(X_valid_A, y_valid_A))" + ], + "execution_count": 54, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Epoch 1/20\n", + "1375/1375 [==============================] - 4s 2ms/step - loss: 0.9248 - accuracy: 0.6994 - val_loss: 0.3894 - val_accuracy: 0.8665\n", + "Epoch 2/20\n", + "1375/1375 [==============================] - 3s 2ms/step - loss: 0.3651 - accuracy: 0.8748 - val_loss: 0.3288 - val_accuracy: 0.8829\n", + "Epoch 3/20\n", + "1375/1375 [==============================] - 3s 2ms/step - loss: 0.3182 - accuracy: 0.8894 - val_loss: 0.3012 - val_accuracy: 0.8996\n", + "Epoch 4/20\n", + "1375/1375 [==============================] - 3s 2ms/step - loss: 0.3049 - accuracy: 0.8955 - val_loss: 0.2895 - val_accuracy: 0.9013\n", + "Epoch 5/20\n", + "1375/1375 [==============================] - 3s 2ms/step - loss: 0.2804 - accuracy: 0.9027 - val_loss: 0.2774 - val_accuracy: 0.9063\n", + "Epoch 6/20\n", + "1375/1375 [==============================] - 3s 2ms/step - loss: 0.2702 - accuracy: 0.9079 - val_loss: 0.2734 - val_accuracy: 0.9066\n", + "Epoch 7/20\n", + "1375/1375 [==============================] - 3s 2ms/step - loss: 0.2626 - accuracy: 0.9088 - val_loss: 0.2720 - val_accuracy: 0.9083\n", + "Epoch 8/20\n", + "1375/1375 [==============================] - 3s 2ms/step - loss: 0.2609 - accuracy: 0.9119 - val_loss: 0.2591 - val_accuracy: 0.9138\n", + "Epoch 9/20\n", + "1375/1375 [==============================] - 3s 2ms/step - loss: 0.2558 - accuracy: 0.9110 - val_loss: 0.2563 - val_accuracy: 0.9143\n", + "Epoch 10/20\n", + "1375/1375 [==============================] - 3s 2ms/step - loss: 0.2511 - accuracy: 0.9139 - val_loss: 0.2543 - val_accuracy: 0.9158\n", + "Epoch 11/20\n", + "1375/1375 [==============================] - 3s 2ms/step - loss: 0.2431 - accuracy: 0.9173 - val_loss: 0.2496 - val_accuracy: 0.9153\n", + "Epoch 12/20\n", + "1375/1375 [==============================] - 3s 2ms/step - loss: 0.2422 - accuracy: 0.9170 - val_loss: 0.2512 - val_accuracy: 0.9126\n", + "Epoch 13/20\n", + "1375/1375 [==============================] - 3s 2ms/step - loss: 0.2360 - accuracy: 0.9179 - val_loss: 0.2446 - val_accuracy: 0.9158\n", + "Epoch 14/20\n", + "1375/1375 [==============================] - 3s 2ms/step - loss: 0.2267 - accuracy: 0.9230 - val_loss: 0.2416 - val_accuracy: 0.9175\n", + "Epoch 15/20\n", + "1375/1375 [==============================] - 3s 2ms/step - loss: 0.2225 - accuracy: 0.9241 - val_loss: 0.2450 - val_accuracy: 0.9188\n", + "Epoch 16/20\n", + "1375/1375 [==============================] - 3s 2ms/step - loss: 0.2262 - accuracy: 0.9214 - val_loss: 0.2386 - val_accuracy: 0.9193\n", + "Epoch 17/20\n", + "1375/1375 [==============================] - 3s 2ms/step - loss: 0.2191 - accuracy: 0.9252 - val_loss: 0.2405 - val_accuracy: 0.9178\n", + "Epoch 18/20\n", + "1375/1375 [==============================] - 3s 2ms/step - loss: 0.2172 - accuracy: 0.9253 - val_loss: 0.2426 - val_accuracy: 0.9158\n", + "Epoch 19/20\n", + "1375/1375 [==============================] - 3s 2ms/step - loss: 0.2181 - accuracy: 0.9246 - val_loss: 0.2331 - val_accuracy: 0.9213\n", + "Epoch 20/20\n", + "1375/1375 [==============================] - 3s 2ms/step - loss: 0.2113 - accuracy: 0.9271 - val_loss: 0.2332 - val_accuracy: 0.9203\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "8UMBGi07LOpQ" + }, + "source": [ + "model_A.save(\"my_model_A.h5\")" + ], + "execution_count": 55, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "qb4XQRc2LOpQ" + }, + "source": [ + "model_B = keras.models.Sequential()\n", + "model_B.add(keras.layers.Flatten(input_shape=[28, 28]))\n", + "for n_hidden in (300, 100, 50, 50, 50):\n", + " model_B.add(keras.layers.Dense(n_hidden, activation=\"selu\"))\n", + "model_B.add(keras.layers.Dense(1, activation=\"sigmoid\"))" + ], + "execution_count": 56, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "bTfx9aSGLOpQ" + }, + "source": [ + "model_B.compile(loss=\"binary_crossentropy\",\n", + " optimizer=keras.optimizers.SGD(lr=1e-3),\n", + " metrics=[\"accuracy\"])" + ], + "execution_count": 57, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "dd6o7rDmLOpQ", + "outputId": "b1022791-e04c-438b-db1d-7da924db831c", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "history = model_B.fit(X_train_B, y_train_B, epochs=20,\n", + " validation_data=(X_valid_B, y_valid_B))" + ], + "execution_count": 58, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Epoch 1/20\n", + "7/7 [==============================] - 1s 35ms/step - loss: 1.0360 - accuracy: 0.4975 - val_loss: 0.6314 - val_accuracy: 0.6004\n", + "Epoch 2/20\n", + "7/7 [==============================] - 0s 16ms/step - loss: 0.5883 - accuracy: 0.6971 - val_loss: 0.4784 - val_accuracy: 0.8529\n", + "Epoch 3/20\n", + "7/7 [==============================] - 0s 14ms/step - loss: 0.4380 - accuracy: 0.8854 - val_loss: 0.4102 - val_accuracy: 0.8945\n", + "Epoch 4/20\n", + "7/7 [==============================] - 0s 13ms/step - loss: 0.4021 - accuracy: 0.8712 - val_loss: 0.3647 - val_accuracy: 0.9178\n", + "Epoch 5/20\n", + "7/7 [==============================] - 0s 14ms/step - loss: 0.3361 - accuracy: 0.9348 - val_loss: 0.3300 - val_accuracy: 0.9320\n", + "Epoch 6/20\n", + "7/7 [==============================] - 0s 14ms/step - loss: 0.3113 - accuracy: 0.9233 - val_loss: 0.3019 - val_accuracy: 0.9402\n", + "Epoch 7/20\n", + "7/7 [==============================] - 0s 14ms/step - loss: 0.2817 - accuracy: 0.9299 - val_loss: 0.2804 - val_accuracy: 0.9422\n", + "Epoch 8/20\n", + "7/7 [==============================] - 0s 14ms/step - loss: 0.2632 - accuracy: 0.9379 - val_loss: 0.2606 - val_accuracy: 0.9473\n", + "Epoch 9/20\n", + "7/7 [==============================] - 0s 16ms/step - loss: 0.2373 - accuracy: 0.9481 - val_loss: 0.2428 - val_accuracy: 0.9523\n", + "Epoch 10/20\n", + "7/7 [==============================] - 0s 14ms/step - loss: 0.2229 - accuracy: 0.9657 - val_loss: 0.2281 - val_accuracy: 0.9544\n", + "Epoch 11/20\n", + "7/7 [==============================] - 0s 14ms/step - loss: 0.2155 - accuracy: 0.9590 - val_loss: 0.2150 - val_accuracy: 0.9584\n", + "Epoch 12/20\n", + "7/7 [==============================] - 0s 14ms/step - loss: 0.1834 - accuracy: 0.9738 - val_loss: 0.2036 - val_accuracy: 0.9584\n", + "Epoch 13/20\n", + "7/7 [==============================] - 0s 14ms/step - loss: 0.1671 - accuracy: 0.9828 - val_loss: 0.1931 - val_accuracy: 0.9615\n", + "Epoch 14/20\n", + "7/7 [==============================] - 0s 14ms/step - loss: 0.1527 - accuracy: 0.9915 - val_loss: 0.1838 - val_accuracy: 0.9635\n", + "Epoch 15/20\n", + "7/7 [==============================] - 0s 14ms/step - loss: 0.1595 - accuracy: 0.9904 - val_loss: 0.1746 - val_accuracy: 0.9686\n", + "Epoch 16/20\n", + "7/7 [==============================] - 0s 14ms/step - loss: 0.1473 - accuracy: 0.9937 - val_loss: 0.1674 - val_accuracy: 0.9686\n", + "Epoch 17/20\n", + "7/7 [==============================] - 0s 14ms/step - loss: 0.1412 - accuracy: 0.9944 - val_loss: 0.1604 - val_accuracy: 0.9706\n", + "Epoch 18/20\n", + "7/7 [==============================] - 0s 14ms/step - loss: 0.1242 - accuracy: 0.9931 - val_loss: 0.1539 - val_accuracy: 0.9706\n", + "Epoch 19/20\n", + "7/7 [==============================] - 0s 14ms/step - loss: 0.1224 - accuracy: 0.9931 - val_loss: 0.1482 - val_accuracy: 0.9716\n", + "Epoch 20/20\n", + "7/7 [==============================] - 0s 15ms/step - loss: 0.1096 - accuracy: 0.9912 - val_loss: 0.1431 - val_accuracy: 0.9716\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "Bh48HMITLOpQ", + "outputId": "8996eafe-9ad5-4013-8f80-733cd9f94a78", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "model.summary()" + ], + "execution_count": 59, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Model: \"sequential_5\"\n", + "_________________________________________________________________\n", + "Layer (type) Output Shape Param # \n", + "=================================================================\n", + "flatten_5 (Flatten) (None, 784) 0 \n", + "_________________________________________________________________\n", + "batch_normalization_3 (Batch (None, 784) 3136 \n", + "_________________________________________________________________\n", + "dense_215 (Dense) (None, 300) 235200 \n", + "_________________________________________________________________\n", + "batch_normalization_4 (Batch (None, 300) 1200 \n", + "_________________________________________________________________\n", + "activation (Activation) (None, 300) 0 \n", + "_________________________________________________________________\n", + "dense_216 (Dense) (None, 100) 30000 \n", + "_________________________________________________________________\n", + "batch_normalization_5 (Batch (None, 100) 400 \n", + "_________________________________________________________________\n", + "activation_1 (Activation) (None, 100) 0 \n", + "_________________________________________________________________\n", + "dense_217 (Dense) (None, 10) 1010 \n", + "=================================================================\n", + "Total params: 270,946\n", + "Trainable params: 268,578\n", + "Non-trainable params: 2,368\n", + "_________________________________________________________________\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "xpurR4NDLOpQ" + }, + "source": [ + "model_A = keras.models.load_model(\"my_model_A.h5\")\n", + "model_B_on_A = keras.models.Sequential(model_A.layers[:-1])\n", + "model_B_on_A.add(keras.layers.Dense(1, activation=\"sigmoid\"))" + ], + "execution_count": 60, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "XPRZk6DqLOpR" + }, + "source": [ + "model_A_clone = keras.models.clone_model(model_A)\n", + "model_A_clone.set_weights(model_A.get_weights())" + ], + "execution_count": 61, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "kNxjby1PLOpR" + }, + "source": [ + "for layer in model_B_on_A.layers[:-1]:\n", + " layer.trainable = False\n", + "\n", + "model_B_on_A.compile(loss=\"binary_crossentropy\",\n", + " optimizer=keras.optimizers.SGD(lr=1e-3),\n", + " metrics=[\"accuracy\"])" + ], + "execution_count": 62, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "yvlj3S1jLOpR", + "outputId": "0a930150-7ddf-4eff-bd36-ac3bd55bb243", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "history = model_B_on_A.fit(X_train_B, y_train_B, epochs=4,\n", + " validation_data=(X_valid_B, y_valid_B))\n", + "\n", + "for layer in model_B_on_A.layers[:-1]:\n", + " layer.trainable = True\n", + "\n", + "model_B_on_A.compile(loss=\"binary_crossentropy\",\n", + " optimizer=keras.optimizers.SGD(lr=1e-3),\n", + " metrics=[\"accuracy\"])\n", + "history = model_B_on_A.fit(X_train_B, y_train_B, epochs=16,\n", + " validation_data=(X_valid_B, y_valid_B))" + ], + "execution_count": 63, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Epoch 1/4\n", + "7/7 [==============================] - 1s 56ms/step - loss: 0.6098 - accuracy: 0.6233 - val_loss: 0.5795 - val_accuracy: 0.6389\n", + "Epoch 2/4\n", + "7/7 [==============================] - 0s 16ms/step - loss: 0.5498 - accuracy: 0.6707 - val_loss: 0.5424 - val_accuracy: 0.6815\n", + "Epoch 3/4\n", + "7/7 [==============================] - 0s 14ms/step - loss: 0.4847 - accuracy: 0.7509 - val_loss: 0.5107 - val_accuracy: 0.7110\n", + "Epoch 4/4\n", + "7/7 [==============================] - 0s 14ms/step - loss: 0.4849 - accuracy: 0.7405 - val_loss: 0.4823 - val_accuracy: 0.7343\n", + "Epoch 1/16\n", + "7/7 [==============================] - 1s 36ms/step - loss: 0.4345 - accuracy: 0.7823 - val_loss: 0.3440 - val_accuracy: 0.8671\n", + "Epoch 2/16\n", + "7/7 [==============================] - 0s 14ms/step - loss: 0.2951 - accuracy: 0.9143 - val_loss: 0.2593 - val_accuracy: 0.9290\n", + "Epoch 3/16\n", + "7/7 [==============================] - 0s 14ms/step - loss: 0.2023 - accuracy: 0.9777 - val_loss: 0.2104 - val_accuracy: 0.9554\n", + "Epoch 4/16\n", + "7/7 [==============================] - 0s 15ms/step - loss: 0.1745 - accuracy: 0.9789 - val_loss: 0.1786 - val_accuracy: 0.9696\n", + "Epoch 5/16\n", + "7/7 [==============================] - 0s 14ms/step - loss: 0.1341 - accuracy: 0.9809 - val_loss: 0.1559 - val_accuracy: 0.9767\n", + "Epoch 6/16\n", + "7/7 [==============================] - 0s 14ms/step - loss: 0.1169 - accuracy: 0.9973 - val_loss: 0.1391 - val_accuracy: 0.9807\n", + "Epoch 7/16\n", + "7/7 [==============================] - 0s 14ms/step - loss: 0.1134 - accuracy: 0.9931 - val_loss: 0.1266 - val_accuracy: 0.9838\n", + "Epoch 8/16\n", + "7/7 [==============================] - 0s 14ms/step - loss: 0.0998 - accuracy: 0.9931 - val_loss: 0.1163 - val_accuracy: 0.9858\n", + "Epoch 9/16\n", + "7/7 [==============================] - 0s 14ms/step - loss: 0.0832 - accuracy: 1.0000 - val_loss: 0.1066 - val_accuracy: 0.9888\n", + "Epoch 10/16\n", + "7/7 [==============================] - 0s 14ms/step - loss: 0.0774 - accuracy: 1.0000 - val_loss: 0.1000 - val_accuracy: 0.9899\n", + "Epoch 11/16\n", + "7/7 [==============================] - 0s 15ms/step - loss: 0.0689 - accuracy: 1.0000 - val_loss: 0.0940 - val_accuracy: 0.9899\n", + "Epoch 12/16\n", + "7/7 [==============================] - 0s 14ms/step - loss: 0.0719 - accuracy: 1.0000 - val_loss: 0.0889 - val_accuracy: 0.9899\n", + "Epoch 13/16\n", + "7/7 [==============================] - 0s 14ms/step - loss: 0.0566 - accuracy: 1.0000 - val_loss: 0.0840 - val_accuracy: 0.9899\n", + "Epoch 14/16\n", + "7/7 [==============================] - 0s 14ms/step - loss: 0.0494 - accuracy: 1.0000 - val_loss: 0.0803 - val_accuracy: 0.9899\n", + "Epoch 15/16\n", + "7/7 [==============================] - 0s 15ms/step - loss: 0.0545 - accuracy: 1.0000 - val_loss: 0.0770 - val_accuracy: 0.9899\n", + "Epoch 16/16\n", + "7/7 [==============================] - 0s 14ms/step - loss: 0.0472 - accuracy: 1.0000 - val_loss: 0.0740 - val_accuracy: 0.9899\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ceskFW97LOpR" + }, + "source": [ + "마지막 점수는 어떤가요?" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "fmdxUJ9FLOpR", + "outputId": "6d812b28-28b8-46e3-eebc-84aa1f6bbb7f", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "model_B.evaluate(X_test_B, y_test_B)" + ], + "execution_count": 64, + "outputs": [ + { + "output_type": "stream", + "text": [ + "63/63 [==============================] - 0s 2ms/step - loss: 0.1408 - accuracy: 0.9705\n" + ], + "name": "stdout" + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[0.1408407986164093, 0.9704999923706055]" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 64 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "804aagCRLOpR", + "outputId": "bfa379e0-7c67-45bd-ff9c-177f0ceb1645", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "model_B_on_A.evaluate(X_test_B, y_test_B)" + ], + "execution_count": 65, + "outputs": [ + { + "output_type": "stream", + "text": [ + "63/63 [==============================] - 0s 2ms/step - loss: 0.0682 - accuracy: 0.9935\n" + ], + "name": "stdout" + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[0.06819812208414078, 0.9934999942779541]" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 65 + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "eoEbJs2sLOpR" + }, + "source": [ + "훌륭하네요! 꽤 많은 정보를 전달했습니다: 오차율이 4배나 줄었네요!" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "2Oge57HRLOpS", + "outputId": "213f9022-8f47-46c8-bb2f-e3d375e8b215", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "(100 - 96.95) / (100 - 99.25)" + ], + "execution_count": 66, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "4.066666666666663" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 66 + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "t1Y9NJBjLOpS" + }, + "source": [ + "# 고속 옵티마이저" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Ie9qzCm6LOpS" + }, + "source": [ + "## 모멘텀 옵티마이저" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "-T_eBS6iLOpS" + }, + "source": [ + "optimizer = keras.optimizers.SGD(lr=0.001, momentum=0.9)" + ], + "execution_count": 67, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "gvoYzuDRLOpT" + }, + "source": [ + "## 네스테로프 가속 경사" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "DbUw8WL2LOpT" + }, + "source": [ + "optimizer = keras.optimizers.SGD(lr=0.001, momentum=0.9, nesterov=True)" + ], + "execution_count": 68, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MzY29JPMLOpT" + }, + "source": [ + "## AdaGrad" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "dR9NgMEoLOpT" + }, + "source": [ + "optimizer = keras.optimizers.Adagrad(lr=0.001)" + ], + "execution_count": 69, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "RdzjwRZOLOpT" + }, + "source": [ + "## RMSProp" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "TkjnInhCLOpT" + }, + "source": [ + "optimizer = keras.optimizers.RMSprop(lr=0.001, rho=0.9)" + ], + "execution_count": 70, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Aqd3XjSlLOpT" + }, + "source": [ + "## Adam 옵티마이저" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "ibdJe_cILOpU" + }, + "source": [ + "optimizer = keras.optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999)" + ], + "execution_count": 71, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "vOuhwhjQLOpU" + }, + "source": [ + "## Adamax 옵티마이저" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "_pwlFvReLOpU" + }, + "source": [ + "optimizer = keras.optimizers.Adamax(lr=0.001, beta_1=0.9, beta_2=0.999)" + ], + "execution_count": 72, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "-UL-CtdMLOpU" + }, + "source": [ + "## Nadam 옵티마이저" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "L2jGWQ8vLOpU" + }, + "source": [ + "optimizer = keras.optimizers.Nadam(lr=0.001, beta_1=0.9, beta_2=0.999)" + ], + "execution_count": 73, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Ow2z1pbiLOpU" + }, + "source": [ + "## 학습률 스케줄링" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "DziEJzDxLOpU" + }, + "source": [ + "### 거듭제곱 스케줄링" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_QKqYIBSLOpU" + }, + "source": [ + "```lr = lr0 / (1 + steps / s)**c```\n", + "* 케라스는 `c=1`과 `s = 1 / decay`을 사용합니다" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "mEhuxik_LOpU" + }, + "source": [ + "optimizer = keras.optimizers.SGD(lr=0.01, decay=1e-4)" + ], + "execution_count": 74, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "3ZRa8D0PLOpV" + }, + "source": [ + "model = keras.models.Sequential([\n", + " keras.layers.Flatten(input_shape=[28, 28]),\n", + " keras.layers.Dense(300, activation=\"selu\", kernel_initializer=\"lecun_normal\"),\n", + " keras.layers.Dense(100, activation=\"selu\", kernel_initializer=\"lecun_normal\"),\n", + " keras.layers.Dense(10, activation=\"softmax\")\n", + "])\n", + "model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=optimizer, metrics=[\"accuracy\"])" + ], + "execution_count": 75, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "VpPb6h-cLOpV", + "outputId": "4ef44500-2026-4c95-a8d1-e5ad2bcc4757", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "n_epochs = 25\n", + "history = model.fit(X_train_scaled, y_train, epochs=n_epochs,\n", + " validation_data=(X_valid_scaled, y_valid))" + ], + "execution_count": 76, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Epoch 1/25\n", + "1719/1719 [==============================] - 4s 2ms/step - loss: 0.5980 - accuracy: 0.7933 - val_loss: 0.4029 - val_accuracy: 0.8604\n", + "Epoch 2/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.3830 - accuracy: 0.8637 - val_loss: 0.3716 - val_accuracy: 0.8720\n", + "Epoch 3/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.3492 - accuracy: 0.8773 - val_loss: 0.3749 - val_accuracy: 0.8732\n", + "Epoch 4/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.3276 - accuracy: 0.8812 - val_loss: 0.3499 - val_accuracy: 0.8800\n", + "Epoch 5/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.3172 - accuracy: 0.8858 - val_loss: 0.3448 - val_accuracy: 0.8788\n", + "Epoch 6/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.2923 - accuracy: 0.8938 - val_loss: 0.3410 - val_accuracy: 0.8832\n", + "Epoch 7/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.2870 - accuracy: 0.8972 - val_loss: 0.3354 - val_accuracy: 0.8870\n", + "Epoch 8/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.2721 - accuracy: 0.9032 - val_loss: 0.3405 - val_accuracy: 0.8836\n", + "Epoch 9/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.2729 - accuracy: 0.9004 - val_loss: 0.3288 - val_accuracy: 0.8888\n", + "Epoch 10/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.2584 - accuracy: 0.9071 - val_loss: 0.3258 - val_accuracy: 0.8884\n", + "Epoch 11/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.2529 - accuracy: 0.9096 - val_loss: 0.3265 - val_accuracy: 0.8880\n", + "Epoch 12/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.2484 - accuracy: 0.9100 - val_loss: 0.3331 - val_accuracy: 0.8826\n", + "Epoch 13/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.2420 - accuracy: 0.9145 - val_loss: 0.3252 - val_accuracy: 0.8892\n", + "Epoch 14/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.2371 - accuracy: 0.9147 - val_loss: 0.3286 - val_accuracy: 0.8898\n", + "Epoch 15/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.2363 - accuracy: 0.9158 - val_loss: 0.3240 - val_accuracy: 0.8876\n", + "Epoch 16/25\n", + "1719/1719 [==============================] - 4s 2ms/step - loss: 0.2311 - accuracy: 0.9176 - val_loss: 0.3201 - val_accuracy: 0.8900\n", + "Epoch 17/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.2234 - accuracy: 0.9207 - val_loss: 0.3233 - val_accuracy: 0.8906\n", + "Epoch 18/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.2248 - accuracy: 0.9194 - val_loss: 0.3187 - val_accuracy: 0.8938\n", + "Epoch 19/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.2236 - accuracy: 0.9213 - val_loss: 0.3227 - val_accuracy: 0.8904\n", + "Epoch 20/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.2229 - accuracy: 0.9221 - val_loss: 0.3208 - val_accuracy: 0.8916\n", + "Epoch 21/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.2194 - accuracy: 0.9233 - val_loss: 0.3204 - val_accuracy: 0.8914\n", + "Epoch 22/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.2163 - accuracy: 0.9231 - val_loss: 0.3177 - val_accuracy: 0.8942\n", + "Epoch 23/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.2129 - accuracy: 0.9243 - val_loss: 0.3192 - val_accuracy: 0.8902\n", + "Epoch 24/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.2077 - accuracy: 0.9276 - val_loss: 0.3210 - val_accuracy: 0.8894\n", + "Epoch 25/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.2102 - accuracy: 0.9257 - val_loss: 0.3210 - val_accuracy: 0.8926\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "We15bby8LOpV", + "outputId": "9c836093-b473-4b24-8eab-49e38af1f30e", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 303 + } + }, + "source": [ + "learning_rate = 0.01\n", + "decay = 1e-4\n", + "batch_size = 32\n", + "n_steps_per_epoch = len(X_train) // batch_size\n", + "epochs = np.arange(n_epochs)\n", + "lrs = learning_rate / (1 + decay * epochs * n_steps_per_epoch)\n", + "\n", + "plt.plot(epochs, lrs, \"o-\")\n", + "plt.axis([0, n_epochs - 1, 0, 0.01])\n", + "plt.xlabel(\"Epoch\")\n", + "plt.ylabel(\"Learning Rate\")\n", + "plt.title(\"Power Scheduling\", fontsize=14)\n", + "plt.grid(True)\n", + "plt.show()" + ], + "execution_count": 77, + "outputs": [ + { + "output_type": "display_data", + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "tags": [], + "needs_background": "light" + } + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "txkhYc0BLOpV" + }, + "source": [ + "### 지수 기반 스케줄링" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "m1fGE70_LOpV" + }, + "source": [ + "```lr = lr0 * 0.1**(epoch / s)```" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "MR9yQ-PhLOpV" + }, + "source": [ + "def exponential_decay_fn(epoch):\n", + " return 0.01 * 0.1**(epoch / 20)" + ], + "execution_count": 78, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "rkHd1gawLOpW" + }, + "source": [ + "def exponential_decay(lr0, s):\n", + " def exponential_decay_fn(epoch):\n", + " return lr0 * 0.1**(epoch / s)\n", + " return exponential_decay_fn\n", + "\n", + "exponential_decay_fn = exponential_decay(lr0=0.01, s=20)" + ], + "execution_count": 79, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "ZpgT0jO3LOpW" + }, + "source": [ + "model = keras.models.Sequential([\n", + " keras.layers.Flatten(input_shape=[28, 28]),\n", + " keras.layers.Dense(300, activation=\"selu\", kernel_initializer=\"lecun_normal\"),\n", + " keras.layers.Dense(100, activation=\"selu\", kernel_initializer=\"lecun_normal\"),\n", + " keras.layers.Dense(10, activation=\"softmax\")\n", + "])\n", + "model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=\"nadam\", metrics=[\"accuracy\"])\n", + "n_epochs = 25" + ], + "execution_count": 80, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "cQu05HxbLOpW", + "outputId": "2145961f-e67b-4e92-b9b3-1a28482bccf3", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "lr_scheduler = keras.callbacks.LearningRateScheduler(exponential_decay_fn)\n", + "history = model.fit(X_train_scaled, y_train, epochs=n_epochs,\n", + " validation_data=(X_valid_scaled, y_valid),\n", + " callbacks=[lr_scheduler])" + ], + "execution_count": 81, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Epoch 1/25\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 1.1106 - accuracy: 0.7344 - val_loss: 0.8633 - val_accuracy: 0.7328\n", + "Epoch 2/25\n", + "1719/1719 [==============================] - 4s 3ms/step - loss: 0.7067 - accuracy: 0.7824 - val_loss: 0.7695 - val_accuracy: 0.7576\n", + "Epoch 3/25\n", + "1719/1719 [==============================] - 4s 3ms/step - loss: 0.6652 - accuracy: 0.8040 - val_loss: 0.6658 - val_accuracy: 0.8108\n", + "Epoch 4/25\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.5463 - accuracy: 0.8319 - val_loss: 0.5714 - val_accuracy: 0.8504\n", + "Epoch 5/25\n", + "1719/1719 [==============================] - 4s 3ms/step - loss: 0.5098 - accuracy: 0.8400 - val_loss: 0.5460 - val_accuracy: 0.8506\n", + "Epoch 6/25\n", + "1719/1719 [==============================] - 4s 3ms/step - loss: 0.4484 - accuracy: 0.8567 - val_loss: 0.5718 - val_accuracy: 0.8620\n", + "Epoch 7/25\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.4328 - accuracy: 0.8703 - val_loss: 0.4861 - val_accuracy: 0.8628\n", + "Epoch 8/25\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.3765 - accuracy: 0.8795 - val_loss: 0.5380 - val_accuracy: 0.8580\n", + "Epoch 9/25\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.3632 - accuracy: 0.8844 - val_loss: 0.4584 - val_accuracy: 0.8704\n", + "Epoch 10/25\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.3122 - accuracy: 0.8944 - val_loss: 0.4571 - val_accuracy: 0.8796\n", + "Epoch 11/25\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.2846 - accuracy: 0.9025 - val_loss: 0.4326 - val_accuracy: 0.8786\n", + "Epoch 12/25\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.2725 - accuracy: 0.9079 - val_loss: 0.4818 - val_accuracy: 0.8758\n", + "Epoch 13/25\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.2495 - accuracy: 0.9137 - val_loss: 0.4428 - val_accuracy: 0.8822\n", + "Epoch 14/25\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.2275 - accuracy: 0.9207 - val_loss: 0.4235 - val_accuracy: 0.8786\n", + "Epoch 15/25\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.2211 - accuracy: 0.9237 - val_loss: 0.4484 - val_accuracy: 0.8858\n", + "Epoch 16/25\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.2007 - accuracy: 0.9297 - val_loss: 0.4862 - val_accuracy: 0.8778\n", + "Epoch 17/25\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.1796 - accuracy: 0.9362 - val_loss: 0.4971 - val_accuracy: 0.8846\n", + "Epoch 18/25\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.1682 - accuracy: 0.9414 - val_loss: 0.5270 - val_accuracy: 0.8832\n", + "Epoch 19/25\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.1587 - accuracy: 0.9450 - val_loss: 0.5135 - val_accuracy: 0.8902\n", + "Epoch 20/25\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.1439 - accuracy: 0.9486 - val_loss: 0.4861 - val_accuracy: 0.8860\n", + "Epoch 21/25\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.1377 - accuracy: 0.9510 - val_loss: 0.5442 - val_accuracy: 0.8870\n", + "Epoch 22/25\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.1350 - accuracy: 0.9546 - val_loss: 0.5270 - val_accuracy: 0.8890\n", + "Epoch 23/25\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.1185 - accuracy: 0.9589 - val_loss: 0.5708 - val_accuracy: 0.8870\n", + "Epoch 24/25\n", + "1719/1719 [==============================] - 4s 3ms/step - loss: 0.1078 - accuracy: 0.9637 - val_loss: 0.6175 - val_accuracy: 0.8864\n", + "Epoch 25/25\n", + "1719/1719 [==============================] - 4s 3ms/step - loss: 0.1110 - accuracy: 0.9623 - val_loss: 0.6164 - val_accuracy: 0.8856\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "o06AUsZjLOpW", + "outputId": "d9bf73fb-2b6d-4c02-eabd-61d2384be458", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 303 + } + }, + "source": [ + "plt.plot(history.epoch, history.history[\"lr\"], \"o-\")\n", + "plt.axis([0, n_epochs - 1, 0, 0.011])\n", + "plt.xlabel(\"Epoch\")\n", + "plt.ylabel(\"Learning Rate\")\n", + "plt.title(\"Exponential Scheduling\", fontsize=14)\n", + "plt.grid(True)\n", + "plt.show()" + ], + "execution_count": 82, + "outputs": [ + { + "output_type": "display_data", + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "tags": [], + "needs_background": "light" + } + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "y4leAQjZLOpW" + }, + "source": [ + "이 스케줄 함수는 두 번째 매개변수로 현재 학습률을 받을 수 있습니다:" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "S5QPkNZmLOpW" + }, + "source": [ + "def exponential_decay_fn(epoch, lr):\n", + " return lr * 0.1**(1 / 20)" + ], + "execution_count": 83, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "2MB6gpmtLOpX" + }, + "source": [ + "에포크가 아니라 반복마다 학습률을 업데이트하려면 사용자 정의 콜백 클래스를 작성해야 합니다:" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "qpWM_UXaLOpX", + "outputId": "332f517a-6770-4ead-e4f8-ac358bcb3adc", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "K = keras.backend\n", + "\n", + "class ExponentialDecay(keras.callbacks.Callback):\n", + " def __init__(self, s=40000):\n", + " super().__init__()\n", + " self.s = s\n", + "\n", + " def on_batch_begin(self, batch, logs=None):\n", + " # 노트: 에포크마다 `batch` 매개변수가 재설정됩니다\n", + " lr = K.get_value(self.model.optimizer.lr)\n", + " K.set_value(self.model.optimizer.lr, lr * 0.1**(1 / s))\n", + "\n", + " def on_epoch_end(self, epoch, logs=None):\n", + " logs = logs or {}\n", + " logs['lr'] = K.get_value(self.model.optimizer.lr)\n", + "\n", + "model = keras.models.Sequential([\n", + " keras.layers.Flatten(input_shape=[28, 28]),\n", + " keras.layers.Dense(300, activation=\"selu\", kernel_initializer=\"lecun_normal\"),\n", + " keras.layers.Dense(100, activation=\"selu\", kernel_initializer=\"lecun_normal\"),\n", + " keras.layers.Dense(10, activation=\"softmax\")\n", + "])\n", + "lr0 = 0.01\n", + "optimizer = keras.optimizers.Nadam(lr=lr0)\n", + "model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=optimizer, metrics=[\"accuracy\"])\n", + "n_epochs = 25\n", + "\n", + "s = 20 * len(X_train) // 32 # 20 에포크 동안 스텝 횟수 (배치 크기 = 32)\n", + "exp_decay = ExponentialDecay(s)\n", + "history = model.fit(X_train_scaled, y_train, epochs=n_epochs,\n", + " validation_data=(X_valid_scaled, y_valid),\n", + " callbacks=[exp_decay])" + ], + "execution_count": 84, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Epoch 1/25\n", + "1719/1719 [==============================] - 6s 3ms/step - loss: 1.1115 - accuracy: 0.7381 - val_loss: 0.7662 - val_accuracy: 0.7760\n", + "Epoch 2/25\n", + "1719/1719 [==============================] - 6s 3ms/step - loss: 0.6374 - accuracy: 0.7985 - val_loss: 0.5625 - val_accuracy: 0.8226\n", + "Epoch 3/25\n", + "1719/1719 [==============================] - 6s 3ms/step - loss: 0.5862 - accuracy: 0.8178 - val_loss: 0.8932 - val_accuracy: 0.7694\n", + "Epoch 4/25\n", + "1719/1719 [==============================] - 6s 3ms/step - loss: 0.5377 - accuracy: 0.8325 - val_loss: 0.4869 - val_accuracy: 0.8520\n", + "Epoch 5/25\n", + "1719/1719 [==============================] - 6s 3ms/step - loss: 0.4675 - accuracy: 0.8487 - val_loss: 0.4377 - val_accuracy: 0.8692\n", + "Epoch 6/25\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.4113 - accuracy: 0.8651 - val_loss: 0.4454 - val_accuracy: 0.8648\n", + "Epoch 7/25\n", + "1719/1719 [==============================] - 6s 3ms/step - loss: 0.3786 - accuracy: 0.8751 - val_loss: 0.5048 - val_accuracy: 0.8596\n", + "Epoch 8/25\n", + "1719/1719 [==============================] - 6s 3ms/step - loss: 0.3625 - accuracy: 0.8837 - val_loss: 0.4919 - val_accuracy: 0.8526\n", + "Epoch 9/25\n", + "1719/1719 [==============================] - 6s 3ms/step - loss: 0.3259 - accuracy: 0.8895 - val_loss: 0.4738 - val_accuracy: 0.8604\n", + "Epoch 10/25\n", + "1719/1719 [==============================] - 6s 3ms/step - loss: 0.3111 - accuracy: 0.8963 - val_loss: 0.4203 - val_accuracy: 0.8876\n", + "Epoch 11/25\n", + "1719/1719 [==============================] - 6s 3ms/step - loss: 0.2740 - accuracy: 0.9079 - val_loss: 0.4345 - val_accuracy: 0.8852\n", + "Epoch 12/25\n", + "1719/1719 [==============================] - 6s 3ms/step - loss: 0.2592 - accuracy: 0.9117 - val_loss: 0.4351 - val_accuracy: 0.8820\n", + "Epoch 13/25\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.2383 - accuracy: 0.9200 - val_loss: 0.4287 - val_accuracy: 0.8882\n", + "Epoch 14/25\n", + "1719/1719 [==============================] - 6s 3ms/step - loss: 0.2309 - accuracy: 0.9236 - val_loss: 0.4414 - val_accuracy: 0.8838\n", + "Epoch 15/25\n", + "1719/1719 [==============================] - 6s 3ms/step - loss: 0.2098 - accuracy: 0.9290 - val_loss: 0.4232 - val_accuracy: 0.8916\n", + "Epoch 16/25\n", + "1719/1719 [==============================] - 6s 3ms/step - loss: 0.1936 - accuracy: 0.9342 - val_loss: 0.4336 - val_accuracy: 0.8896\n", + "Epoch 17/25\n", + "1719/1719 [==============================] - 6s 3ms/step - loss: 0.1782 - accuracy: 0.9392 - val_loss: 0.4625 - val_accuracy: 0.8890\n", + "Epoch 18/25\n", + "1719/1719 [==============================] - 6s 3ms/step - loss: 0.1644 - accuracy: 0.9456 - val_loss: 0.4693 - val_accuracy: 0.8904\n", + "Epoch 19/25\n", + "1719/1719 [==============================] - 6s 3ms/step - loss: 0.1554 - accuracy: 0.9496 - val_loss: 0.4725 - val_accuracy: 0.8982\n", + "Epoch 20/25\n", + "1719/1719 [==============================] - 6s 3ms/step - loss: 0.1481 - accuracy: 0.9515 - val_loss: 0.5076 - val_accuracy: 0.8902\n", + "Epoch 21/25\n", + "1719/1719 [==============================] - 6s 3ms/step - loss: 0.1368 - accuracy: 0.9555 - val_loss: 0.5094 - val_accuracy: 0.8940\n", + "Epoch 22/25\n", + "1719/1719 [==============================] - 6s 3ms/step - loss: 0.1278 - accuracy: 0.9575 - val_loss: 0.5544 - val_accuracy: 0.8934\n", + "Epoch 23/25\n", + "1719/1719 [==============================] - 6s 3ms/step - loss: 0.1194 - accuracy: 0.9619 - val_loss: 0.5734 - val_accuracy: 0.8902\n", + "Epoch 24/25\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.1135 - accuracy: 0.9647 - val_loss: 0.5960 - val_accuracy: 0.8928\n", + "Epoch 25/25\n", + "1719/1719 [==============================] - 6s 3ms/step - loss: 0.1080 - accuracy: 0.9659 - val_loss: 0.6002 - val_accuracy: 0.8896\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "xsajfS10LOpX" + }, + "source": [ + "n_steps = n_epochs * len(X_train) // 32\n", + "steps = np.arange(n_steps)\n", + "lrs = lr0 * 0.1**(steps / s)" + ], + "execution_count": 85, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "scrolled": true, + "id": "nn7VunvILOpX", + "outputId": "a0f14eed-c080-434a-9f35-20e29c6d8745", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 303 + } + }, + "source": [ + "plt.plot(steps, lrs, \"-\", linewidth=2)\n", + "plt.axis([0, n_steps - 1, 0, lr0 * 1.1])\n", + "plt.xlabel(\"Batch\")\n", + "plt.ylabel(\"Learning Rate\")\n", + "plt.title(\"Exponential Scheduling (per batch)\", fontsize=14)\n", + "plt.grid(True)\n", + "plt.show()" + ], + "execution_count": 86, + "outputs": [ + { + "output_type": "display_data", + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "tags": [], + "needs_background": "light" + } + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "t_Zq20ztLOpX" + }, + "source": [ + "### 기간별 고정 스케줄링" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "up9WMAqWLOpX" + }, + "source": [ + "def piecewise_constant_fn(epoch):\n", + " if epoch < 5:\n", + " return 0.01\n", + " elif epoch < 15:\n", + " return 0.005\n", + " else:\n", + " return 0.001" + ], + "execution_count": 87, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "8isT8EI0LOpY" + }, + "source": [ + "def piecewise_constant(boundaries, values):\n", + " boundaries = np.array([0] + boundaries)\n", + " values = np.array(values)\n", + " def piecewise_constant_fn(epoch):\n", + " return values[np.argmax(boundaries > epoch) - 1]\n", + " return piecewise_constant_fn\n", + "\n", + "piecewise_constant_fn = piecewise_constant([5, 15], [0.01, 0.005, 0.001])" + ], + "execution_count": 88, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "-1w63apOLOpY", + "outputId": "d2623190-28dd-4400-a52d-4ad907c3d199", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "lr_scheduler = keras.callbacks.LearningRateScheduler(piecewise_constant_fn)\n", + "\n", + "model = keras.models.Sequential([\n", + " keras.layers.Flatten(input_shape=[28, 28]),\n", + " keras.layers.Dense(300, activation=\"selu\", kernel_initializer=\"lecun_normal\"),\n", + " keras.layers.Dense(100, activation=\"selu\", kernel_initializer=\"lecun_normal\"),\n", + " keras.layers.Dense(10, activation=\"softmax\")\n", + "])\n", + "model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=\"nadam\", metrics=[\"accuracy\"])\n", + "n_epochs = 25\n", + "history = model.fit(X_train_scaled, y_train, epochs=n_epochs,\n", + " validation_data=(X_valid_scaled, y_valid),\n", + " callbacks=[lr_scheduler])" + ], + "execution_count": 89, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Epoch 1/25\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 1.2318 - accuracy: 0.7142 - val_loss: 0.9157 - val_accuracy: 0.7232\n", + "Epoch 2/25\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.7203 - accuracy: 0.7776 - val_loss: 0.6123 - val_accuracy: 0.8198\n", + "Epoch 3/25\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.8270 - accuracy: 0.7619 - val_loss: 1.4205 - val_accuracy: 0.6206\n", + "Epoch 4/25\n", + "1719/1719 [==============================] - 4s 3ms/step - loss: 0.8446 - accuracy: 0.7524 - val_loss: 0.9360 - val_accuracy: 0.7016\n", + "Epoch 5/25\n", + "1719/1719 [==============================] - 4s 3ms/step - loss: 0.8689 - accuracy: 0.7226 - val_loss: 0.8600 - val_accuracy: 0.7660\n", + "Epoch 6/25\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.6233 - accuracy: 0.7984 - val_loss: 0.6819 - val_accuracy: 0.8138\n", + "Epoch 7/25\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.5443 - accuracy: 0.8315 - val_loss: 0.5935 - val_accuracy: 0.8408\n", + "Epoch 8/25\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.5136 - accuracy: 0.8388 - val_loss: 0.7235 - val_accuracy: 0.7882\n", + "Epoch 9/25\n", + "1719/1719 [==============================] - 4s 3ms/step - loss: 0.5320 - accuracy: 0.8341 - val_loss: 0.6815 - val_accuracy: 0.7836\n", + "Epoch 10/25\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.4771 - accuracy: 0.8513 - val_loss: 0.6984 - val_accuracy: 0.8196\n", + "Epoch 11/25\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.4934 - accuracy: 0.8493 - val_loss: 0.6040 - val_accuracy: 0.8466\n", + "Epoch 12/25\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.4599 - accuracy: 0.8600 - val_loss: 0.6230 - val_accuracy: 0.8420\n", + "Epoch 13/25\n", + "1719/1719 [==============================] - 4s 3ms/step - loss: 0.4432 - accuracy: 0.8611 - val_loss: 0.5845 - val_accuracy: 0.8510\n", + "Epoch 14/25\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.4706 - accuracy: 0.8597 - val_loss: 0.6285 - val_accuracy: 0.8418\n", + "Epoch 15/25\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.4539 - accuracy: 0.8629 - val_loss: 0.6618 - val_accuracy: 0.8432\n", + "Epoch 16/25\n", + "1719/1719 [==============================] - 4s 3ms/step - loss: 0.3486 - accuracy: 0.8865 - val_loss: 0.4722 - val_accuracy: 0.8680\n", + "Epoch 17/25\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.2977 - accuracy: 0.9014 - val_loss: 0.4846 - val_accuracy: 0.8630\n", + "Epoch 18/25\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.2881 - accuracy: 0.9052 - val_loss: 0.5084 - val_accuracy: 0.8698\n", + "Epoch 19/25\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.2758 - accuracy: 0.9102 - val_loss: 0.4696 - val_accuracy: 0.8700\n", + "Epoch 20/25\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.2721 - accuracy: 0.9082 - val_loss: 0.4798 - val_accuracy: 0.8736\n", + "Epoch 21/25\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.2666 - accuracy: 0.9129 - val_loss: 0.5165 - val_accuracy: 0.8674\n", + "Epoch 22/25\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.2562 - accuracy: 0.9160 - val_loss: 0.5196 - val_accuracy: 0.8728\n", + "Epoch 23/25\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.2504 - accuracy: 0.9179 - val_loss: 0.5501 - val_accuracy: 0.8680\n", + "Epoch 24/25\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.2419 - accuracy: 0.9202 - val_loss: 0.6129 - val_accuracy: 0.8692\n", + "Epoch 25/25\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.2459 - accuracy: 0.9209 - val_loss: 0.5537 - val_accuracy: 0.8682\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "iP46umHULOpY", + "outputId": "039e04fb-2e51-4fc0-c056-db18aa65606b", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 303 + } + }, + "source": [ + "plt.plot(history.epoch, [piecewise_constant_fn(epoch) for epoch in history.epoch], \"o-\")\n", + "plt.axis([0, n_epochs - 1, 0, 0.011])\n", + "plt.xlabel(\"Epoch\")\n", + "plt.ylabel(\"Learning Rate\")\n", + "plt.title(\"Piecewise Constant Scheduling\", fontsize=14)\n", + "plt.grid(True)\n", + "plt.show()" + ], + "execution_count": 90, + "outputs": [ + { + "output_type": "display_data", + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "tags": [], + "needs_background": "light" + } + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "PcBjd75fLOpY" + }, + "source": [ + "### 성능 기반 스케줄링" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "JBL86sCNLOpY" + }, + "source": [ + "tf.random.set_seed(42)\n", + "np.random.seed(42)" + ], + "execution_count": 91, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "e_Bc-HBcLOpY", + "outputId": "2867d0a7-dc95-4878-e655-7e0934e01bc7", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "lr_scheduler = keras.callbacks.ReduceLROnPlateau(factor=0.5, patience=5)\n", + "\n", + "model = keras.models.Sequential([\n", + " keras.layers.Flatten(input_shape=[28, 28]),\n", + " keras.layers.Dense(300, activation=\"selu\", kernel_initializer=\"lecun_normal\"),\n", + " keras.layers.Dense(100, activation=\"selu\", kernel_initializer=\"lecun_normal\"),\n", + " keras.layers.Dense(10, activation=\"softmax\")\n", + "])\n", + "optimizer = keras.optimizers.SGD(lr=0.02, momentum=0.9)\n", + "model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=optimizer, metrics=[\"accuracy\"])\n", + "n_epochs = 25\n", + "history = model.fit(X_train_scaled, y_train, epochs=n_epochs,\n", + " validation_data=(X_valid_scaled, y_valid),\n", + " callbacks=[lr_scheduler])" + ], + "execution_count": 92, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Epoch 1/25\n", + "1719/1719 [==============================] - 4s 2ms/step - loss: 0.7100 - accuracy: 0.7765 - val_loss: 0.4821 - val_accuracy: 0.8490\n", + "Epoch 2/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.4904 - accuracy: 0.8387 - val_loss: 0.5661 - val_accuracy: 0.8390\n", + "Epoch 3/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.4964 - accuracy: 0.8431 - val_loss: 0.5486 - val_accuracy: 0.8502\n", + "Epoch 4/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.5018 - accuracy: 0.8470 - val_loss: 0.4818 - val_accuracy: 0.8566\n", + "Epoch 5/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.5284 - accuracy: 0.8433 - val_loss: 0.5047 - val_accuracy: 0.8502\n", + "Epoch 6/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.5061 - accuracy: 0.8549 - val_loss: 0.5721 - val_accuracy: 0.8520\n", + "Epoch 7/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.5315 - accuracy: 0.8530 - val_loss: 0.5131 - val_accuracy: 0.8626\n", + "Epoch 8/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.5037 - accuracy: 0.8578 - val_loss: 0.6451 - val_accuracy: 0.8064\n", + "Epoch 9/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.4976 - accuracy: 0.8588 - val_loss: 0.5515 - val_accuracy: 0.8620\n", + "Epoch 10/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.3127 - accuracy: 0.8938 - val_loss: 0.4293 - val_accuracy: 0.8776\n", + "Epoch 11/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.2376 - accuracy: 0.9126 - val_loss: 0.4315 - val_accuracy: 0.8856\n", + "Epoch 12/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.2253 - accuracy: 0.9182 - val_loss: 0.4620 - val_accuracy: 0.8760\n", + "Epoch 13/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.2032 - accuracy: 0.9240 - val_loss: 0.4370 - val_accuracy: 0.8890\n", + "Epoch 14/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.1862 - accuracy: 0.9286 - val_loss: 0.5031 - val_accuracy: 0.8702\n", + "Epoch 15/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.1882 - accuracy: 0.9294 - val_loss: 0.4409 - val_accuracy: 0.8862\n", + "Epoch 16/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.1423 - accuracy: 0.9449 - val_loss: 0.4379 - val_accuracy: 0.8930\n", + "Epoch 17/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.1136 - accuracy: 0.9557 - val_loss: 0.4688 - val_accuracy: 0.8924\n", + "Epoch 18/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.1068 - accuracy: 0.9582 - val_loss: 0.4734 - val_accuracy: 0.8876\n", + "Epoch 19/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.1023 - accuracy: 0.9604 - val_loss: 0.4930 - val_accuracy: 0.8938\n", + "Epoch 20/25\n", + "1719/1719 [==============================] - 4s 2ms/step - loss: 0.0986 - accuracy: 0.9611 - val_loss: 0.5015 - val_accuracy: 0.8932\n", + "Epoch 21/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.0803 - accuracy: 0.9684 - val_loss: 0.5080 - val_accuracy: 0.8966\n", + "Epoch 22/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.0751 - accuracy: 0.9712 - val_loss: 0.5266 - val_accuracy: 0.8964\n", + "Epoch 23/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.0700 - accuracy: 0.9742 - val_loss: 0.5284 - val_accuracy: 0.8946\n", + "Epoch 24/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.0654 - accuracy: 0.9759 - val_loss: 0.5423 - val_accuracy: 0.8932\n", + "Epoch 25/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.0632 - accuracy: 0.9770 - val_loss: 0.5530 - val_accuracy: 0.8970\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "8pgrsYWWLOpZ", + "outputId": "011acd77-c574-43e2-f8a3-bfa865f7f146", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 303 + } + }, + "source": [ + "plt.plot(history.epoch, history.history[\"lr\"], \"bo-\")\n", + "plt.xlabel(\"Epoch\")\n", + "plt.ylabel(\"Learning Rate\", color='b')\n", + "plt.tick_params('y', colors='b')\n", + "plt.gca().set_xlim(0, n_epochs - 1)\n", + "plt.grid(True)\n", + "\n", + "ax2 = plt.gca().twinx()\n", + "ax2.plot(history.epoch, history.history[\"val_loss\"], \"r^-\")\n", + "ax2.set_ylabel('Validation Loss', color='r')\n", + "ax2.tick_params('y', colors='r')\n", + "\n", + "plt.title(\"Reduce LR on Plateau\", fontsize=14)\n", + "plt.show()" + ], + "execution_count": 93, + "outputs": [ + { + "output_type": "display_data", + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "tags": [], + "needs_background": "light" + } + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "HezOhTgrLOpZ" + }, + "source": [ + "### tf.keras 스케줄러" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "mM6gXVz4LOpZ", + "outputId": "4af8e052-d12e-464a-9c5a-922ae9842c13", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "model = keras.models.Sequential([\n", + " keras.layers.Flatten(input_shape=[28, 28]),\n", + " keras.layers.Dense(300, activation=\"selu\", kernel_initializer=\"lecun_normal\"),\n", + " keras.layers.Dense(100, activation=\"selu\", kernel_initializer=\"lecun_normal\"),\n", + " keras.layers.Dense(10, activation=\"softmax\")\n", + "])\n", + "s = 20 * len(X_train) // 32 # number of steps in 20 epochs (batch size = 32)\n", + "learning_rate = keras.optimizers.schedules.ExponentialDecay(0.01, s, 0.1)\n", + "optimizer = keras.optimizers.SGD(learning_rate)\n", + "model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=optimizer, metrics=[\"accuracy\"])\n", + "n_epochs = 25\n", + "history = model.fit(X_train_scaled, y_train, epochs=n_epochs,\n", + " validation_data=(X_valid_scaled, y_valid))" + ], + "execution_count": 94, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Epoch 1/25\n", + "1719/1719 [==============================] - 4s 2ms/step - loss: 0.5995 - accuracy: 0.7923 - val_loss: 0.4092 - val_accuracy: 0.8604\n", + "Epoch 2/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.3890 - accuracy: 0.8613 - val_loss: 0.3737 - val_accuracy: 0.8694\n", + "Epoch 3/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.3531 - accuracy: 0.8774 - val_loss: 0.3731 - val_accuracy: 0.8692\n", + "Epoch 4/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.3296 - accuracy: 0.8813 - val_loss: 0.3492 - val_accuracy: 0.8800\n", + "Epoch 5/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.3176 - accuracy: 0.8866 - val_loss: 0.3430 - val_accuracy: 0.8792\n", + "Epoch 6/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.2928 - accuracy: 0.8954 - val_loss: 0.3414 - val_accuracy: 0.8812\n", + "Epoch 7/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.2852 - accuracy: 0.8985 - val_loss: 0.3356 - val_accuracy: 0.8816\n", + "Epoch 8/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.2713 - accuracy: 0.9039 - val_loss: 0.3365 - val_accuracy: 0.8814\n", + "Epoch 9/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.2713 - accuracy: 0.9044 - val_loss: 0.3266 - val_accuracy: 0.8860\n", + "Epoch 10/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.2569 - accuracy: 0.9084 - val_loss: 0.3240 - val_accuracy: 0.8848\n", + "Epoch 11/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.2500 - accuracy: 0.9115 - val_loss: 0.3252 - val_accuracy: 0.8866\n", + "Epoch 12/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.2452 - accuracy: 0.9147 - val_loss: 0.3302 - val_accuracy: 0.8812\n", + "Epoch 13/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.2408 - accuracy: 0.9155 - val_loss: 0.3219 - val_accuracy: 0.8858\n", + "Epoch 14/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.2377 - accuracy: 0.9159 - val_loss: 0.3223 - val_accuracy: 0.8864\n", + "Epoch 15/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.2377 - accuracy: 0.9171 - val_loss: 0.3209 - val_accuracy: 0.8878\n", + "Epoch 16/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.2316 - accuracy: 0.9192 - val_loss: 0.3185 - val_accuracy: 0.8896\n", + "Epoch 17/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.2264 - accuracy: 0.9213 - val_loss: 0.3198 - val_accuracy: 0.8886\n", + "Epoch 18/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.2283 - accuracy: 0.9188 - val_loss: 0.3169 - val_accuracy: 0.8902\n", + "Epoch 19/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.2283 - accuracy: 0.9203 - val_loss: 0.3198 - val_accuracy: 0.8894\n", + "Epoch 20/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.2287 - accuracy: 0.9218 - val_loss: 0.3170 - val_accuracy: 0.8902\n", + "Epoch 21/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.2264 - accuracy: 0.9209 - val_loss: 0.3180 - val_accuracy: 0.8904\n", + "Epoch 22/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.2256 - accuracy: 0.9200 - val_loss: 0.3164 - val_accuracy: 0.8914\n", + "Epoch 23/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.2222 - accuracy: 0.9233 - val_loss: 0.3171 - val_accuracy: 0.8902\n", + "Epoch 24/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.2180 - accuracy: 0.9241 - val_loss: 0.3166 - val_accuracy: 0.8898\n", + "Epoch 25/25\n", + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.2221 - accuracy: 0.9234 - val_loss: 0.3165 - val_accuracy: 0.8914\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "e7s1E4aILOpZ" + }, + "source": [ + "구간별 고정 스케줄링은 다음을 사용하세요:" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "mB1XJs34LOpa" + }, + "source": [ + "learning_rate = keras.optimizers.schedules.PiecewiseConstantDecay(\n", + " boundaries=[5. * n_steps_per_epoch, 15. * n_steps_per_epoch],\n", + " values=[0.01, 0.005, 0.001])" + ], + "execution_count": 95, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "-QgJ_wlTLOpa" + }, + "source": [ + "### 1사이클 스케줄링" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "v2ZxFRyHLOpa" + }, + "source": [ + "K = keras.backend\n", + "\n", + "class ExponentialLearningRate(keras.callbacks.Callback):\n", + " def __init__(self, factor):\n", + " self.factor = factor\n", + " self.rates = []\n", + " self.losses = []\n", + " def on_batch_end(self, batch, logs):\n", + " self.rates.append(K.get_value(self.model.optimizer.lr))\n", + " self.losses.append(logs[\"loss\"])\n", + " K.set_value(self.model.optimizer.lr, self.model.optimizer.lr * self.factor)\n", + "\n", + "def find_learning_rate(model, X, y, epochs=1, batch_size=32, min_rate=10**-5, max_rate=10):\n", + " init_weights = model.get_weights()\n", + " iterations = len(X) // batch_size * epochs\n", + " factor = np.exp(np.log(max_rate / min_rate) / iterations)\n", + " init_lr = K.get_value(model.optimizer.lr)\n", + " K.set_value(model.optimizer.lr, min_rate)\n", + " exp_lr = ExponentialLearningRate(factor)\n", + " history = model.fit(X, y, epochs=epochs, batch_size=batch_size,\n", + " callbacks=[exp_lr])\n", + " K.set_value(model.optimizer.lr, init_lr)\n", + " model.set_weights(init_weights)\n", + " return exp_lr.rates, exp_lr.losses\n", + "\n", + "def plot_lr_vs_loss(rates, losses):\n", + " plt.plot(rates, losses)\n", + " plt.gca().set_xscale('log')\n", + " plt.hlines(min(losses), min(rates), max(rates))\n", + " plt.axis([min(rates), max(rates), min(losses), (losses[0] + min(losses)) / 2])\n", + " plt.xlabel(\"Learning rate\")\n", + " plt.ylabel(\"Loss\")" + ], + "execution_count": 96, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "tmWjq-nILOpa" + }, + "source": [ + "tf.random.set_seed(42)\n", + "np.random.seed(42)\n", + "\n", + "model = keras.models.Sequential([\n", + " keras.layers.Flatten(input_shape=[28, 28]),\n", + " keras.layers.Dense(300, activation=\"selu\", kernel_initializer=\"lecun_normal\"),\n", + " keras.layers.Dense(100, activation=\"selu\", kernel_initializer=\"lecun_normal\"),\n", + " keras.layers.Dense(10, activation=\"softmax\")\n", + "])\n", + "model.compile(loss=\"sparse_categorical_crossentropy\",\n", + " optimizer=keras.optimizers.SGD(lr=1e-3),\n", + " metrics=[\"accuracy\"])" + ], + "execution_count": 97, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "N7dpX40rLOpa", + "outputId": "fcea5e32-513b-4269-960e-939e0f398353", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 345 + } + }, + "source": [ + "batch_size = 128\n", + "rates, losses = find_learning_rate(model, X_train_scaled, y_train, epochs=1, batch_size=batch_size)\n", + "plot_lr_vs_loss(rates, losses)" + ], + "execution_count": 98, + "outputs": [ + { + "output_type": "stream", + "text": [ + " 1/430 [..............................] - ETA: 1:35 - loss: 2.6624 - accuracy: 0.1406WARNING:tensorflow:Callback method `on_train_batch_end` is slow compared to the batch time (batch time: 0.0016s vs `on_train_batch_end` time: 0.0017s). Check your callbacks.\n", + "430/430 [==============================] - 1s 3ms/step - loss: nan - accuracy: 0.3135\n" + ], + "name": "stdout" + }, + { + "output_type": "display_data", + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "tags": [], + "needs_background": "light" + } + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "20u-WHSyLOpc" + }, + "source": [ + "class OneCycleScheduler(keras.callbacks.Callback):\n", + " def __init__(self, iterations, max_rate, start_rate=None,\n", + " last_iterations=None, last_rate=None):\n", + " self.iterations = iterations\n", + " self.max_rate = max_rate\n", + " self.start_rate = start_rate or max_rate / 10\n", + " self.last_iterations = last_iterations or iterations // 10 + 1\n", + " self.half_iteration = (iterations - self.last_iterations) // 2\n", + " self.last_rate = last_rate or self.start_rate / 1000\n", + " self.iteration = 0\n", + " def _interpolate(self, iter1, iter2, rate1, rate2):\n", + " return ((rate2 - rate1) * (self.iteration - iter1)\n", + " / (iter2 - iter1) + rate1)\n", + " def on_batch_begin(self, batch, logs):\n", + " if self.iteration < self.half_iteration:\n", + " rate = self._interpolate(0, self.half_iteration, self.start_rate, self.max_rate)\n", + " elif self.iteration < 2 * self.half_iteration:\n", + " rate = self._interpolate(self.half_iteration, 2 * self.half_iteration,\n", + " self.max_rate, self.start_rate)\n", + " else:\n", + " rate = self._interpolate(2 * self.half_iteration, self.iterations,\n", + " self.start_rate, self.last_rate)\n", + " rate = max(rate, self.last_rate)\n", + " self.iteration += 1\n", + " K.set_value(self.model.optimizer.lr, rate)" + ], + "execution_count": 99, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "3T9DaJx8LOpc", + "outputId": "8bc95003-5f2e-43bc-f8f0-3b729625f0a0", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "n_epochs = 25\n", + "onecycle = OneCycleScheduler(len(X_train) // batch_size * n_epochs, max_rate=0.05)\n", + "history = model.fit(X_train_scaled, y_train, epochs=n_epochs, batch_size=batch_size,\n", + " validation_data=(X_valid_scaled, y_valid),\n", + " callbacks=[onecycle])" + ], + "execution_count": 100, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Epoch 1/25\n", + "430/430 [==============================] - 1s 3ms/step - loss: 0.6572 - accuracy: 0.7739 - val_loss: 0.4871 - val_accuracy: 0.8336\n", + "Epoch 2/25\n", + "430/430 [==============================] - 1s 3ms/step - loss: 0.4581 - accuracy: 0.8396 - val_loss: 0.4274 - val_accuracy: 0.8526\n", + "Epoch 3/25\n", + "430/430 [==============================] - 1s 2ms/step - loss: 0.4121 - accuracy: 0.8546 - val_loss: 0.4114 - val_accuracy: 0.8584\n", + "Epoch 4/25\n", + "430/430 [==============================] - 1s 3ms/step - loss: 0.3837 - accuracy: 0.8641 - val_loss: 0.3871 - val_accuracy: 0.8688\n", + "Epoch 5/25\n", + "430/430 [==============================] - 1s 3ms/step - loss: 0.3639 - accuracy: 0.8717 - val_loss: 0.3765 - val_accuracy: 0.8680\n", + "Epoch 6/25\n", + "430/430 [==============================] - 1s 2ms/step - loss: 0.3456 - accuracy: 0.8774 - val_loss: 0.3744 - val_accuracy: 0.8706\n", + "Epoch 7/25\n", + "430/430 [==============================] - 1s 3ms/step - loss: 0.3329 - accuracy: 0.8809 - val_loss: 0.3634 - val_accuracy: 0.8706\n", + "Epoch 8/25\n", + "430/430 [==============================] - 1s 3ms/step - loss: 0.3184 - accuracy: 0.8858 - val_loss: 0.3949 - val_accuracy: 0.8612\n", + "Epoch 9/25\n", + "430/430 [==============================] - 1s 3ms/step - loss: 0.3065 - accuracy: 0.8891 - val_loss: 0.3487 - val_accuracy: 0.8772\n", + "Epoch 10/25\n", + "430/430 [==============================] - 1s 2ms/step - loss: 0.2944 - accuracy: 0.8922 - val_loss: 0.3398 - val_accuracy: 0.8808\n", + "Epoch 11/25\n", + "430/430 [==============================] - 1s 3ms/step - loss: 0.2839 - accuracy: 0.8960 - val_loss: 0.3456 - val_accuracy: 0.8820\n", + "Epoch 12/25\n", + "430/430 [==============================] - 1s 3ms/step - loss: 0.2707 - accuracy: 0.9026 - val_loss: 0.3652 - val_accuracy: 0.8694\n", + "Epoch 13/25\n", + "430/430 [==============================] - 1s 2ms/step - loss: 0.2537 - accuracy: 0.9081 - val_loss: 0.3364 - val_accuracy: 0.8830\n", + "Epoch 14/25\n", + "430/430 [==============================] - 1s 2ms/step - loss: 0.2403 - accuracy: 0.9137 - val_loss: 0.3464 - val_accuracy: 0.8808\n", + "Epoch 15/25\n", + "430/430 [==============================] - 1s 3ms/step - loss: 0.2279 - accuracy: 0.9181 - val_loss: 0.3261 - val_accuracy: 0.8848\n", + "Epoch 16/25\n", + "430/430 [==============================] - 1s 3ms/step - loss: 0.2160 - accuracy: 0.9232 - val_loss: 0.3297 - val_accuracy: 0.8844\n", + "Epoch 17/25\n", + "430/430 [==============================] - 1s 3ms/step - loss: 0.2062 - accuracy: 0.9269 - val_loss: 0.3357 - val_accuracy: 0.8862\n", + "Epoch 18/25\n", + "430/430 [==============================] - 1s 2ms/step - loss: 0.1979 - accuracy: 0.9306 - val_loss: 0.3251 - val_accuracy: 0.8894\n", + "Epoch 19/25\n", + "430/430 [==============================] - 1s 3ms/step - loss: 0.1893 - accuracy: 0.9340 - val_loss: 0.3234 - val_accuracy: 0.8908\n", + "Epoch 20/25\n", + "430/430 [==============================] - 1s 3ms/step - loss: 0.1821 - accuracy: 0.9365 - val_loss: 0.3227 - val_accuracy: 0.8932\n", + "Epoch 21/25\n", + "430/430 [==============================] - 1s 3ms/step - loss: 0.1753 - accuracy: 0.9403 - val_loss: 0.3223 - val_accuracy: 0.8918\n", + "Epoch 22/25\n", + "430/430 [==============================] - 1s 3ms/step - loss: 0.1701 - accuracy: 0.9417 - val_loss: 0.3187 - val_accuracy: 0.8946\n", + "Epoch 23/25\n", + "430/430 [==============================] - 1s 3ms/step - loss: 0.1656 - accuracy: 0.9438 - val_loss: 0.3191 - val_accuracy: 0.8940\n", + "Epoch 24/25\n", + "430/430 [==============================] - 1s 3ms/step - loss: 0.1628 - accuracy: 0.9457 - val_loss: 0.3181 - val_accuracy: 0.8934\n", + "Epoch 25/25\n", + "430/430 [==============================] - 1s 3ms/step - loss: 0.1611 - accuracy: 0.9461 - val_loss: 0.3174 - val_accuracy: 0.8942\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "i8jPCyOvLOpc" + }, + "source": [ + "# 규제를 사용해 과대적합 피하기" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "bNDKKZZxLOpc" + }, + "source": [ + "## $\\ell_1$과 $\\ell_2$ 규제" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "58lwkbU-LOpc" + }, + "source": [ + "layer = keras.layers.Dense(100, activation=\"elu\",\n", + " kernel_initializer=\"he_normal\",\n", + " kernel_regularizer=keras.regularizers.l2(0.01))\n", + "# or l1(0.1) for ℓ1 regularization with a factor or 0.1\n", + "# or l1_l2(0.1, 0.01) for both ℓ1 and ℓ2 regularization, with factors 0.1 and 0.01 respectively" + ], + "execution_count": 101, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "vcZsRVXwLOpd", + "outputId": "acd092e5-9485-48bc-824d-dac90b171290", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "model = keras.models.Sequential([\n", + " keras.layers.Flatten(input_shape=[28, 28]),\n", + " keras.layers.Dense(300, activation=\"elu\",\n", + " kernel_initializer=\"he_normal\",\n", + " kernel_regularizer=keras.regularizers.l2(0.01)),\n", + " keras.layers.Dense(100, activation=\"elu\",\n", + " kernel_initializer=\"he_normal\",\n", + " kernel_regularizer=keras.regularizers.l2(0.01)),\n", + " keras.layers.Dense(10, activation=\"softmax\",\n", + " kernel_regularizer=keras.regularizers.l2(0.01))\n", + "])\n", + "model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=\"nadam\", metrics=[\"accuracy\"])\n", + "n_epochs = 2\n", + "history = model.fit(X_train_scaled, y_train, epochs=n_epochs,\n", + " validation_data=(X_valid_scaled, y_valid))" + ], + "execution_count": 102, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Epoch 1/2\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 3.2189 - accuracy: 0.7967 - val_loss: 0.7169 - val_accuracy: 0.8340\n", + "Epoch 2/2\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.7280 - accuracy: 0.8247 - val_loss: 0.6850 - val_accuracy: 0.8376\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "XuNxc8x3LOpd", + "outputId": "baae0e04-7717-402f-eb20-9c6bc906d8a7", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "from functools import partial\n", + "\n", + "RegularizedDense = partial(keras.layers.Dense,\n", + " activation=\"elu\",\n", + " kernel_initializer=\"he_normal\",\n", + " kernel_regularizer=keras.regularizers.l2(0.01))\n", + "\n", + "model = keras.models.Sequential([\n", + " keras.layers.Flatten(input_shape=[28, 28]),\n", + " RegularizedDense(300),\n", + " RegularizedDense(100),\n", + " RegularizedDense(10, activation=\"softmax\")\n", + "])\n", + "model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=\"nadam\", metrics=[\"accuracy\"])\n", + "n_epochs = 2\n", + "history = model.fit(X_train_scaled, y_train, epochs=n_epochs,\n", + " validation_data=(X_valid_scaled, y_valid))" + ], + "execution_count": 103, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Epoch 1/2\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 3.2911 - accuracy: 0.7924 - val_loss: 0.7218 - val_accuracy: 0.8310\n", + "Epoch 2/2\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.7282 - accuracy: 0.8245 - val_loss: 0.6826 - val_accuracy: 0.8382\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "xdTO1zZbLOpd" + }, + "source": [ + "## 드롭아웃" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "ppZX7nrcLOpd", + "outputId": "78c2ea5f-c378-48ab-d877-9bad958ec21b", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "model = keras.models.Sequential([\n", + " keras.layers.Flatten(input_shape=[28, 28]),\n", + " keras.layers.Dropout(rate=0.2),\n", + " keras.layers.Dense(300, activation=\"elu\", kernel_initializer=\"he_normal\"),\n", + " keras.layers.Dropout(rate=0.2),\n", + " keras.layers.Dense(100, activation=\"elu\", kernel_initializer=\"he_normal\"),\n", + " keras.layers.Dropout(rate=0.2),\n", + " keras.layers.Dense(10, activation=\"softmax\")\n", + "])\n", + "model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=\"nadam\", metrics=[\"accuracy\"])\n", + "n_epochs = 2\n", + "history = model.fit(X_train_scaled, y_train, epochs=n_epochs,\n", + " validation_data=(X_valid_scaled, y_valid))" + ], + "execution_count": 104, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Epoch 1/2\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.7611 - accuracy: 0.7576 - val_loss: 0.3730 - val_accuracy: 0.8644\n", + "Epoch 2/2\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.4306 - accuracy: 0.8403 - val_loss: 0.3408 - val_accuracy: 0.8726\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "FT_W0H3rLOpd" + }, + "source": [ + "## 알파 드롭아웃" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "BPvHOOYcLOpd" + }, + "source": [ + "tf.random.set_seed(42)\n", + "np.random.seed(42)" + ], + "execution_count": 105, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "CB_Oe24YLOpe", + "outputId": "15e5a530-ca81-4bca-ac0a-cc2184ea6bd3", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "model = keras.models.Sequential([\n", + " keras.layers.Flatten(input_shape=[28, 28]),\n", + " keras.layers.AlphaDropout(rate=0.2),\n", + " keras.layers.Dense(300, activation=\"selu\", kernel_initializer=\"lecun_normal\"),\n", + " keras.layers.AlphaDropout(rate=0.2),\n", + " keras.layers.Dense(100, activation=\"selu\", kernel_initializer=\"lecun_normal\"),\n", + " keras.layers.AlphaDropout(rate=0.2),\n", + " keras.layers.Dense(10, activation=\"softmax\")\n", + "])\n", + "optimizer = keras.optimizers.SGD(lr=0.01, momentum=0.9, nesterov=True)\n", + "model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=optimizer, metrics=[\"accuracy\"])\n", + "n_epochs = 20\n", + "history = model.fit(X_train_scaled, y_train, epochs=n_epochs,\n", + " validation_data=(X_valid_scaled, y_valid))" + ], + "execution_count": 106, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Epoch 1/20\n", + "1719/1719 [==============================] - 4s 2ms/step - loss: 0.8023 - accuracy: 0.7146 - val_loss: 0.5778 - val_accuracy: 0.8446\n", + "Epoch 2/20\n", + "1719/1719 [==============================] - 4s 2ms/step - loss: 0.5662 - accuracy: 0.7904 - val_loss: 0.5146 - val_accuracy: 0.8536\n", + "Epoch 3/20\n", + "1719/1719 [==============================] - 4s 2ms/step - loss: 0.5259 - accuracy: 0.8057 - val_loss: 0.4904 - val_accuracy: 0.8598\n", + "Epoch 4/20\n", + "1719/1719 [==============================] - 4s 2ms/step - loss: 0.5127 - accuracy: 0.8093 - val_loss: 0.4837 - val_accuracy: 0.8596\n", + "Epoch 5/20\n", + "1719/1719 [==============================] - 4s 2ms/step - loss: 0.5074 - accuracy: 0.8123 - val_loss: 0.4244 - val_accuracy: 0.8692\n", + "Epoch 6/20\n", + "1719/1719 [==============================] - 4s 2ms/step - loss: 0.4787 - accuracy: 0.8205 - val_loss: 0.4594 - val_accuracy: 0.8640\n", + "Epoch 7/20\n", + "1719/1719 [==============================] - 4s 2ms/step - loss: 0.4721 - accuracy: 0.8267 - val_loss: 0.4696 - val_accuracy: 0.8608\n", + "Epoch 8/20\n", + "1719/1719 [==============================] - 4s 2ms/step - loss: 0.4575 - accuracy: 0.8306 - val_loss: 0.4156 - val_accuracy: 0.8708\n", + "Epoch 9/20\n", + "1719/1719 [==============================] - 4s 2ms/step - loss: 0.4626 - accuracy: 0.8275 - val_loss: 0.4342 - val_accuracy: 0.8734\n", + "Epoch 10/20\n", + "1719/1719 [==============================] - 4s 2ms/step - loss: 0.4550 - accuracy: 0.8323 - val_loss: 0.4300 - val_accuracy: 0.8656\n", + "Epoch 11/20\n", + "1719/1719 [==============================] - 4s 2ms/step - loss: 0.4453 - accuracy: 0.8340 - val_loss: 0.4266 - val_accuracy: 0.8746\n", + "Epoch 12/20\n", + "1719/1719 [==============================] - 4s 2ms/step - loss: 0.4420 - accuracy: 0.8349 - val_loss: 0.5200 - val_accuracy: 0.8574\n", + "Epoch 13/20\n", + "1719/1719 [==============================] - 4s 2ms/step - loss: 0.4344 - accuracy: 0.8393 - val_loss: 0.4301 - val_accuracy: 0.8740\n", + "Epoch 14/20\n", + "1719/1719 [==============================] - 4s 2ms/step - loss: 0.4307 - accuracy: 0.8389 - val_loss: 0.4462 - val_accuracy: 0.8656\n", + "Epoch 15/20\n", + "1719/1719 [==============================] - 4s 2ms/step - loss: 0.4313 - accuracy: 0.8374 - val_loss: 0.4415 - val_accuracy: 0.8658\n", + "Epoch 16/20\n", + "1719/1719 [==============================] - 4s 2ms/step - loss: 0.4270 - accuracy: 0.8391 - val_loss: 0.4181 - val_accuracy: 0.8776\n", + "Epoch 17/20\n", + "1719/1719 [==============================] - 4s 2ms/step - loss: 0.4207 - accuracy: 0.8426 - val_loss: 0.5417 - val_accuracy: 0.8564\n", + "Epoch 18/20\n", + "1719/1719 [==============================] - 4s 2ms/step - loss: 0.4362 - accuracy: 0.8378 - val_loss: 0.4669 - val_accuracy: 0.8718\n", + "Epoch 19/20\n", + "1719/1719 [==============================] - 4s 2ms/step - loss: 0.4271 - accuracy: 0.8418 - val_loss: 0.4745 - val_accuracy: 0.8700\n", + "Epoch 20/20\n", + "1719/1719 [==============================] - 4s 2ms/step - loss: 0.4188 - accuracy: 0.8415 - val_loss: 0.4486 - val_accuracy: 0.8714\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "fisM8Wc8LOpe", + "outputId": "8783b0b2-0a9c-427d-a745-517443a53217", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "model.evaluate(X_test_scaled, y_test)" + ], + "execution_count": 107, + "outputs": [ + { + "output_type": "stream", + "text": [ + "313/313 [==============================] - 0s 2ms/step - loss: 0.4861 - accuracy: 0.8584\n" + ], + "name": "stdout" + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[0.48614442348480225, 0.8583999872207642]" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 107 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "C3UO369ULOpe", + "outputId": "bb158abb-93c6-4eef-acee-b55d70e8419e", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "model.evaluate(X_train_scaled, y_train)" + ], + "execution_count": 108, + "outputs": [ + { + "output_type": "stream", + "text": [ + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.3614 - accuracy: 0.8815\n" + ], + "name": "stdout" + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[0.3614204227924347, 0.8815454840660095]" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 108 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "TLppurUFLOpe", + "outputId": "4cf53ffb-1221-410a-9652-833b0a85e6c6", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "history = model.fit(X_train_scaled, y_train)" + ], + "execution_count": 109, + "outputs": [ + { + "output_type": "stream", + "text": [ + "1719/1719 [==============================] - 3s 2ms/step - loss: 0.4217 - accuracy: 0.8434\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "TP8X2xCnLOpe" + }, + "source": [ + "## MC 드롭아웃" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "Uy_uwuIRLOpe" + }, + "source": [ + "tf.random.set_seed(42)\n", + "np.random.seed(42)" + ], + "execution_count": 110, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "scrolled": true, + "id": "GPDfhAMBLOpf" + }, + "source": [ + "y_probas = np.stack([model(X_test_scaled, training=True)\n", + " for sample in range(100)])\n", + "y_proba = y_probas.mean(axis=0)\n", + "y_std = y_probas.std(axis=0)" + ], + "execution_count": 111, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "nzrxJDyrLOpf", + "outputId": "3b272a1c-75a1-4d81-cfbb-e53079e2bc12", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "np.round(model.predict(X_test_scaled[:1]), 2)" + ], + "execution_count": 112, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "array([[0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.02, 0. , 0.98]],\n", + " dtype=float32)" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 112 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "08HJBSaVLOpf", + "outputId": "12a1a91f-04da-42bd-8b09-8c7713f29918", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "np.round(y_probas[:, :1], 2)" + ], + "execution_count": 113, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "array([[[0. , 0. , 0. , 0. , 0. , 0.01, 0. , 0.73, 0. , 0.26]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.05, 0. , 0.92, 0. , 0.04]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.03, 0. , 0. , 0. , 0.97]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.01, 0. , 0.1 , 0. , 0.89]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.53, 0. , 0.46]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.71, 0. , 0.29]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.09, 0. , 0.47, 0. , 0.44]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.18, 0. , 0.26, 0. , 0.56]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.55, 0. , 0.07, 0. , 0.37]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.01, 0. , 0.99]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.02, 0. , 0.14, 0. , 0.84]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.21, 0. , 0.22, 0. , 0.57]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.01, 0. , 0.3 , 0. , 0.68]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.29, 0. , 0.28, 0. , 0.44]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.04, 0. , 0.14, 0. , 0.82]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.21, 0. , 0.12, 0. , 0.68]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.01, 0. , 0. , 0. , 0.99]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.02, 0. , 0.77, 0. , 0.21]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.01, 0. , 0.34, 0. , 0.65]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.02, 0. , 0.02, 0. , 0.96]],\n", + "\n", + " [[0.01, 0. , 0. , 0. , 0. , 0.86, 0.02, 0.01, 0. , 0.1 ]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.12, 0. , 0.07, 0. , 0.81]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 1. ]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.11, 0. , 0.88]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.08, 0. , 0.06, 0. , 0.86]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.04, 0. , 0.72, 0. , 0.23]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.03, 0. , 0.2 , 0. , 0.78]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.19, 0. , 0.31, 0. , 0.5 ]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.03, 0. , 0.05, 0. , 0.92]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.2 , 0. , 0.64, 0. , 0.16]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.99]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 1. ]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.68, 0. , 0.02, 0. , 0.3 ]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.15, 0. , 0.74, 0. , 0.11]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.02, 0. , 0.19, 0. , 0.79]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.09, 0. , 0.63, 0. , 0.29]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.04, 0. , 0.95]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.02, 0. , 0.09, 0. , 0.89]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.07, 0. , 0.38, 0. , 0.56]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.78, 0. , 0.02, 0. , 0.2 ]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.01, 0. , 0.16, 0. , 0.82]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.01, 0. , 0.99]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.33, 0. , 0.37, 0. , 0.3 ]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.01, 0. , 0.99]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.01, 0. , 0.01, 0. , 0.98]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.01, 0. , 0.99]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.17, 0. , 0.12, 0. , 0.71]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.03, 0. , 0.96]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.29, 0. , 0.71]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.01, 0. , 0.2 , 0. , 0.79]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.27, 0. , 0.29, 0. , 0.45]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.01, 0. , 0.89, 0. , 0.1 ]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.01, 0. , 0.99]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.2 , 0. , 0.04, 0. , 0.76]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.13, 0. , 0.02, 0. , 0.85]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.01, 0. , 0.1 , 0. , 0.89]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.16, 0. , 0.84]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.04, 0. , 0.16, 0. , 0.8 ]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.07, 0. , 0.25, 0. , 0.68]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.14, 0. , 0.2 , 0. , 0.66]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.22, 0. , 0.03, 0. , 0.75]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.13, 0. , 0.87]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.04, 0. , 0.05, 0. , 0.91]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.04, 0. , 0.35, 0. , 0.61]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.49, 0. , 0.46, 0. , 0.05]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.51, 0. , 0.45, 0. , 0.04]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.1 , 0. , 0.47, 0. , 0.43]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.05, 0. , 0.95]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.05, 0. , 0.37, 0. , 0.57]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.15, 0. , 0.61, 0.02, 0.22]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.01, 0. , 0.99]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.08, 0. , 0.28, 0. , 0.64]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.02, 0. , 0.1 , 0. , 0.88]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.02, 0. , 0.37, 0. , 0.61]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.1 , 0. , 0.52, 0. , 0.37]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.12, 0. , 0.03, 0. , 0.85]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.05, 0. , 0.15, 0. , 0.8 ]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.01, 0. , 0.38, 0. , 0.61]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.01, 0. , 0.39, 0. , 0.6 ]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.02, 0. , 0.24, 0. , 0.74]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.06, 0. , 0.94]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.01, 0. , 0.01, 0. , 0.98]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 1. ]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.01, 0. , 0.02, 0. , 0.97]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.04, 0. , 0.09, 0. , 0.87]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.03, 0. , 0.55, 0. , 0.41]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.92, 0. , 0.04, 0. , 0.04]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.4 , 0. , 0.02, 0. , 0.58]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.02, 0. , 0.98]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.03, 0. , 0.97]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.02, 0. , 0.04, 0. , 0.93]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.03, 0. , 0.12, 0. , 0.84]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.28, 0. , 0.38, 0. , 0.35]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.39, 0. , 0.17, 0. , 0.44]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.02, 0. , 0.16, 0. , 0.82]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.01, 0. , 0.01, 0. , 0.98]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.02, 0. , 0.35, 0. , 0.64]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.01, 0. , 0.04, 0. , 0.96]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.2 , 0. , 0.33, 0. , 0.47]],\n", + "\n", + " [[0. , 0. , 0. , 0. , 0. , 0.11, 0. , 0.11, 0. , 0.78]]],\n", + " dtype=float32)" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 113 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "XMqmf4UmLOpf", + "outputId": "69d29b09-945c-441e-c2f7-9c3b8e3c564c", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "np.round(y_proba[:1], 2)" + ], + "execution_count": 114, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "array([[0. , 0. , 0. , 0. , 0. , 0.11, 0. , 0.22, 0. , 0.67]],\n", + " dtype=float32)" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 114 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "lISHnMdnLOpf", + "outputId": "9d1137d4-bbd6-4f9e-a527-66cf9daef495", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "y_std = y_probas.std(axis=0)\n", + "np.round(y_std[:1], 2)" + ], + "execution_count": 115, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "array([[0. , 0. , 0. , 0. , 0. , 0.19, 0. , 0.23, 0. , 0.29]],\n", + " dtype=float32)" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 115 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "iW-062pHLOpf" + }, + "source": [ + "y_pred = np.argmax(y_proba, axis=1)" + ], + "execution_count": 116, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "DL6bPgKCLOpg", + "outputId": "cf362713-825c-4238-c802-cac884209a40", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "accuracy = np.sum(y_pred == y_test) / len(y_test)\n", + "accuracy" + ], + "execution_count": 117, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "0.8656" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 117 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "yxvL3LWZLOpg" + }, + "source": [ + "class MCDropout(keras.layers.Dropout):\n", + " def call(self, inputs):\n", + " return super().call(inputs, training=True)\n", + "\n", + "class MCAlphaDropout(keras.layers.AlphaDropout):\n", + " def call(self, inputs):\n", + " return super().call(inputs, training=True)" + ], + "execution_count": 118, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "Zzd5DCm7LOpg" + }, + "source": [ + "tf.random.set_seed(42)\n", + "np.random.seed(42)" + ], + "execution_count": 119, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "WvapJbsLLOpg" + }, + "source": [ + "mc_model = keras.models.Sequential([\n", + " MCAlphaDropout(layer.rate) if isinstance(layer, keras.layers.AlphaDropout) else layer\n", + " for layer in model.layers\n", + "])" + ], + "execution_count": 120, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "tkkDpmTaLOpg", + "outputId": "7378308e-f78f-446b-83fe-bb6e9a998a62", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "mc_model.summary()" + ], + "execution_count": 121, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Model: \"sequential_20\"\n", + "_________________________________________________________________\n", + "Layer (type) Output Shape Param # \n", + "=================================================================\n", + "flatten_18 (Flatten) (None, 784) 0 \n", + "_________________________________________________________________\n", + "mc_alpha_dropout (MCAlphaDro (None, 784) 0 \n", + "_________________________________________________________________\n", + "dense_262 (Dense) (None, 300) 235500 \n", + "_________________________________________________________________\n", + "mc_alpha_dropout_1 (MCAlphaD (None, 300) 0 \n", + "_________________________________________________________________\n", + "dense_263 (Dense) (None, 100) 30100 \n", + "_________________________________________________________________\n", + "mc_alpha_dropout_2 (MCAlphaD (None, 100) 0 \n", + "_________________________________________________________________\n", + "dense_264 (Dense) (None, 10) 1010 \n", + "=================================================================\n", + "Total params: 266,610\n", + "Trainable params: 266,610\n", + "Non-trainable params: 0\n", + "_________________________________________________________________\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "dXbWp13vLOpg" + }, + "source": [ + "optimizer = keras.optimizers.SGD(lr=0.01, momentum=0.9, nesterov=True)\n", + "mc_model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=optimizer, metrics=[\"accuracy\"])" + ], + "execution_count": 122, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "9ywhSDViLOpg" + }, + "source": [ + "mc_model.set_weights(model.get_weights())" + ], + "execution_count": 123, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "JZd8UdsALOph" + }, + "source": [ + "이제 MC 드롭아웃을 모델에 사용할 수 있습니다:" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "IVSdXF7TLOph", + "outputId": "a580e136-de40-46e7-e200-c730000cff26", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "np.round(np.mean([mc_model.predict(X_test_scaled[:1]) for sample in range(100)], axis=0), 2)" + ], + "execution_count": 124, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "array([[0. , 0. , 0. , 0. , 0. , 0.14, 0. , 0.25, 0. , 0.61]],\n", + " dtype=float32)" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 124 + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MafcuHfYLOph" + }, + "source": [ + "## 맥스 노름" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "Rm22pePYLOph" + }, + "source": [ + "layer = keras.layers.Dense(100, activation=\"selu\", kernel_initializer=\"lecun_normal\",\n", + " kernel_constraint=keras.constraints.max_norm(1.))" + ], + "execution_count": 125, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "gWqSW1bvLOpi", + "outputId": "d2224c69-2eb1-4a62-e14d-34dfb1360660", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "MaxNormDense = partial(keras.layers.Dense,\n", + " activation=\"selu\", kernel_initializer=\"lecun_normal\",\n", + " kernel_constraint=keras.constraints.max_norm(1.))\n", + "\n", + "model = keras.models.Sequential([\n", + " keras.layers.Flatten(input_shape=[28, 28]),\n", + " MaxNormDense(300),\n", + " MaxNormDense(100),\n", + " keras.layers.Dense(10, activation=\"softmax\")\n", + "])\n", + "model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=\"nadam\", metrics=[\"accuracy\"])\n", + "n_epochs = 2\n", + "history = model.fit(X_train_scaled, y_train, epochs=n_epochs,\n", + " validation_data=(X_valid_scaled, y_valid))" + ], + "execution_count": 126, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Epoch 1/2\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.5766 - accuracy: 0.8018 - val_loss: 0.3723 - val_accuracy: 0.8638\n", + "Epoch 2/2\n", + "1719/1719 [==============================] - 5s 3ms/step - loss: 0.3530 - accuracy: 0.8692 - val_loss: 0.3769 - val_accuracy: 0.8684\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "A53qrK-PLOpi" + }, + "source": [ + "# 연습문제 해답" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "IOsQc6lpLOpi" + }, + "source": [ + "## 1. to 7." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ebf2enC_LOpi" + }, + "source": [ + "부록 A 참조." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "h1PVxctOLOpi" + }, + "source": [ + "## 8. CIFAR10에서 딥러닝" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "qGwtSO0fLOpi" + }, + "source": [ + "### a.\n", + "*문제: 100개의 뉴런을 가진 은닉층 20개로 심층 신경망을 만들어보세요(너무 많은 것 같지만 이 연습문제의 핵심입니다). He 초기화와 ELU 활성화 함수를 사용하세요.*" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "p1Mix75VLOpj" + }, + "source": [ + "keras.backend.clear_session()\n", + "tf.random.set_seed(42)\n", + "np.random.seed(42)\n", + "\n", + "model = keras.models.Sequential()\n", + "model.add(keras.layers.Flatten(input_shape=[32, 32, 3]))\n", + "for _ in range(20):\n", + " model.add(keras.layers.Dense(100,\n", + " activation=\"elu\",\n", + " kernel_initializer=\"he_normal\"))" + ], + "execution_count": 127, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "2glrTwRcLOpj" + }, + "source": [ + "### b.\n", + "*문제: Nadam 옵티마이저와 조기 종료를 사용하여 CIFAR10 데이터셋에 이 네트워크를 훈련하세요. `keras.datasets.cifar10.load_ data()`를 사용하여 데이터를 적재할 수 있습니다. 이 데이터셋은 10개의 클래스와 32×32 크기의 컬러 이미지 60,000개로 구성됩니다(50,000개는 훈련, 10,000개는 테스트). 따라서 10개의 뉴런과 소프트맥스 활성화 함수를 사용하는 출력층이 필요합니다. 모델 구조와 하이퍼파라미터를 바꿀 때마다 적절한 학습률을 찾아야 한다는 것을 기억하세요.*" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "8fkKGlD0LOpj" + }, + "source": [ + "모델에 출력층을 추가합니다:" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "pvTdbgiCLOpj" + }, + "source": [ + "model.add(keras.layers.Dense(10, activation=\"softmax\"))" + ], + "execution_count": 128, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ux0ROP0tLOpk" + }, + "source": [ + "학습률 5e-5인 Nadam 옵티마이저를 사용해 보죠. 학습률 1e-5, 3e-5, 1e-4, 3e-4, 1e-3, 3e-3, 1e-2를 테스트하고 10번의 에포크 동안 (아래 텐서보드 콜백으로) 학습 곡선을 비교해 보았습니다. 학습률 3e-5와 1e-4가 꽤 좋았기 때문에 5e-5를 시도해 보았고 조금 더 나은 결과를 냈습니다." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "1oReiGeCLOpk" + }, + "source": [ + "optimizer = keras.optimizers.Nadam(lr=5e-5)\n", + "model.compile(loss=\"sparse_categorical_crossentropy\",\n", + " optimizer=optimizer,\n", + " metrics=[\"accuracy\"])" + ], + "execution_count": 129, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "24PcKfxeLOpk" + }, + "source": [ + "CIFAR10 데이터셋을 로드하죠. 조기 종료를 사용하기 때문에 검증 세트가 필요합니다. 원본 훈련 세트에서 처음 5,000개를 검증 세트로 사용하겠습니다:" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "cLWnS6RKLOpk", + "outputId": "8b69d659-ad10-4a5d-a6be-6484e343b258", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "(X_train_full, y_train_full), (X_test, y_test) = keras.datasets.cifar10.load_data()\n", + "\n", + "X_train = X_train_full[5000:]\n", + "y_train = y_train_full[5000:]\n", + "X_valid = X_train_full[:5000]\n", + "y_valid = y_train_full[:5000]" + ], + "execution_count": 130, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz\n", + "170500096/170498071 [==============================] - 3s 0us/step\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "H76ZTjNxLOpk" + }, + "source": [ + "이제 콜백을 만들고 모델을 훈련합니다:" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "FuQ8FelNLOpk" + }, + "source": [ + "early_stopping_cb = keras.callbacks.EarlyStopping(patience=20)\n", + "model_checkpoint_cb = keras.callbacks.ModelCheckpoint(\"my_cifar10_model.h5\", save_best_only=True)\n", + "run_index = 1 # 모델을 훈련할 때마다 증가시킴\n", + "run_logdir = os.path.join(os.curdir, \"my_cifar10_logs\", \"run_{:03d}\".format(run_index))\n", + "tensorboard_cb = keras.callbacks.TensorBoard(run_logdir)\n", + "callbacks = [early_stopping_cb, model_checkpoint_cb, tensorboard_cb]" + ], + "execution_count": 131, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "7VuUEbNacg7v" + }, + "source": [ + "코랩에서 아래 셀에 있는 `%tensorboard` 명령을 실행하면 다음과 같은 텐서보드 화면을 볼 수 있습니다." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "x_NtuiwVcQiV" + }, + "source": [ + "![스크린샷 2021-02-17 오후 11.42.41.png]()" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "GAv7c6NtLOpl", + "outputId": "94e3f040-b384-4fb9-a530-d86e2ee4e664", + "colab": { + "resources": { + "https://localhost:6006/?tensorboardColab=true": { + "data": "", + "ok": true, + "headers": [ + [ + "content-type", + "text/html; charset=utf-8" + ] + ], + "status": 200, + "status_text": "" + }, + "https://localhost:6006/index.js": { + "data": "", + "ok": true, + "headers": [ + [ + "content-type", + "application/javascript; charset=utf-8" + ] + ], + "status": 200, + "status_text": "" + }, + "https://localhost:6006/data/environment": { + "data": "eyJkYXRhX2xvY2F0aW9uIjogIi4vbXlfY2lmYXIxMF9sb2dzIiwgIndpbmRvd190aXRsZSI6ICIifQ==", + "ok": true, + "headers": [ + [ + "content-type", + "application/json" + ] + ], + "status": 200, + "status_text": "" + }, + "https://localhost:6006/data/plugins_listing": { + "data": "eyJzY2FsYXJzIjogeyJkaXNhYmxlX3JlbG9hZCI6IGZhbHNlLCAiZW5hYmxlZCI6IGZhbHNlLCAicmVtb3ZlX2RvbSI6IGZhbHNlLCAidGFiX25hbWUiOiAic2NhbGFycyIsICJsb2FkaW5nX21lY2hhbmlzbSI6IHsidHlwZSI6ICJDVVNUT01fRUxFTUVOVCIsICJlbGVtZW50X25hbWUiOiAidGYtc2NhbGFyLWRhc2hib2FyZCJ9fSwgImN1c3RvbV9zY2FsYXJzIjogeyJkaXNhYmxlX3JlbG9hZCI6IGZhbHNlLCAiZW5hYmxlZCI6IGZhbHNlLCAicmVtb3ZlX2RvbSI6IGZhbHNlLCAidGFiX25hbWUiOiAiQ3VzdG9tIFNjYWxhcnMiLCAibG9hZGluZ19tZWNoYW5pc20iOiB7InR5cGUiOiAiQ1VTVE9NX0VMRU1FTlQiLCAiZWxlbWVudF9uYW1lIjogInRmLWN1c3RvbS1zY2FsYXItZGFzaGJvYXJkIn19LCAiaW1hZ2VzIjogeyJkaXNhYmxlX3JlbG9hZCI6IGZhbHNlLCAiZW5hYmxlZCI6IGZhbHNlLCAicmVtb3ZlX2RvbSI6IGZhbHNlLCAidGFiX25hbWUiOiAiaW1hZ2VzIiwgImxvYWRpbmdfbWVjaGFuaXNtIjogeyJ0eXBlIjogIkNVU1RPTV9FTEVNRU5UIiwgImVsZW1lbnRfbmFtZSI6ICJ0Zi1pbWFnZS1kYXNoYm9hcmQifX0sICJhdWRpbyI6IHsiZGlzYWJsZV9yZWxvYWQiOiBmYWxzZSwgImVuYWJsZWQiOiBmYWxzZSwgInJlbW92ZV9kb20iOiBmYWxzZSwgInRhYl9uYW1lIjogImF1ZGlvIiwgImxvYWRpbmdfbWVjaGFuaXNtIjogeyJ0eXBlIjogIkNVU1RPTV9FTEVNRU5UIiwgImVsZW1lbnRfbmFtZSI6ICJ0Zi1hdWRpby1kYXNoYm9hcmQifX0sICJkZWJ1Z2dlci12MiI6IHsiZGlzYWJsZV9yZWxvYWQiOiBmYWxzZSwgImVuYWJsZWQiOiBmYWxzZSwgInJlbW92ZV9kb20iOiBmYWxzZSwgInRhYl9uYW1lIjogIkRlYnVnZ2VyIFYyIiwgImxvYWRpbmdfbWVjaGFuaXNtIjogeyJ0eXBlIjogIk5HX0NPTVBPTkVOVCJ9fSwgImdyYXBocyI6IHsiZGlzYWJsZV9yZWxvYWQiOiB0cnVlLCAiZW5hYmxlZCI6IHRydWUsICJyZW1vdmVfZG9tIjogZmFsc2UsICJ0YWJfbmFtZSI6ICJncmFwaHMiLCAibG9hZGluZ19tZWNoYW5pc20iOiB7InR5cGUiOiAiQ1VTVE9NX0VMRU1FTlQiLCAiZWxlbWVudF9uYW1lIjogInRmLWdyYXBoLWRhc2hib2FyZCJ9fSwgImRpc3RyaWJ1dGlvbnMiOiB7ImRpc2FibGVfcmVsb2FkIjogZmFsc2UsICJlbmFibGVkIjogZmFsc2UsICJyZW1vdmVfZG9tIjogZmFsc2UsICJ0YWJfbmFtZSI6ICJkaXN0cmlidXRpb25zIiwgImxvYWRpbmdfbWVjaGFuaXNtIjogeyJ0eXBlIjogIkNVU1RPTV9FTEVNRU5UIiwgImVsZW1lbnRfbmFtZSI6ICJ0Zi1kaXN0cmlidXRpb24tZGFzaGJvYXJkIn19LCAiaGlzdG9ncmFtcyI6IHsiZGlzYWJsZV9yZWxvYWQiOiBmYWxzZSwgImVuYWJsZWQiOiBmYWxzZSwgInJlbW92ZV9kb20iOiBmYWxzZSwgInRhYl9uYW1lIjogImhpc3RvZ3JhbXMiLCAibG9hZGluZ19tZWNoYW5pc20iOiB7InR5cGUiOiAiQ1VTVE9NX0VMRU1FTlQiLCAiZWxlbWVudF9uYW1lIjogInRmLWhpc3RvZ3JhbS1kYXNoYm9hcmQifX0sICJ0ZXh0IjogeyJkaXNhYmxlX3JlbG9hZCI6IGZhbHNlLCAiZW5hYmxlZCI6IGZhbHNlLCAicmVtb3ZlX2RvbSI6IGZhbHNlLCAidGFiX25hbWUiOiAidGV4dCIsICJsb2FkaW5nX21lY2hhbmlzbSI6IHsidHlwZSI6ICJDVVNUT01fRUxFTUVOVCIsICJlbGVtZW50X25hbWUiOiAidGYtdGV4dC1kYXNoYm9hcmQifX0sICJwcl9jdXJ2ZXMiOiB7ImRpc2FibGVfcmVsb2FkIjogZmFsc2UsICJlbmFibGVkIjogZmFsc2UsICJyZW1vdmVfZG9tIjogZmFsc2UsICJ0YWJfbmFtZSI6ICJQUiBDdXJ2ZXMiLCAibG9hZGluZ19tZWNoYW5pc20iOiB7InR5cGUiOiAiQ1VTVE9NX0VMRU1FTlQiLCAiZWxlbWVudF9uYW1lIjogInRmLXByLWN1cnZlLWRhc2hib2FyZCJ9fSwgInByb2ZpbGVfcmVkaXJlY3QiOiB7ImRpc2FibGVfcmVsb2FkIjogZmFsc2UsICJlbmFibGVkIjogZmFsc2UsICJyZW1vdmVfZG9tIjogZmFsc2UsICJ0YWJfbmFtZSI6ICJQcm9maWxlIiwgImxvYWRpbmdfbWVjaGFuaXNtIjogeyJ0eXBlIjogIkNVU1RPTV9FTEVNRU5UIiwgImVsZW1lbnRfbmFtZSI6ICJ0Zi1wcm9maWxlLXJlZGlyZWN0LWRhc2hib2FyZCJ9fSwgImhwYXJhbXMiOiB7ImRpc2FibGVfcmVsb2FkIjogZmFsc2UsICJlbmFibGVkIjogZmFsc2UsICJyZW1vdmVfZG9tIjogZmFsc2UsICJ0YWJfbmFtZSI6ICJocGFyYW1zIiwgImxvYWRpbmdfbWVjaGFuaXNtIjogeyJ0eXBlIjogIkNVU1RPTV9FTEVNRU5UIiwgImVsZW1lbnRfbmFtZSI6ICJ0Zi1ocGFyYW1zLWRhc2hib2FyZCJ9fSwgIm1lc2giOiB7ImRpc2FibGVfcmVsb2FkIjogZmFsc2UsICJlbmFibGVkIjogZmFsc2UsICJyZW1vdmVfZG9tIjogZmFsc2UsICJ0YWJfbmFtZSI6ICJtZXNoIiwgImxvYWRpbmdfbWVjaGFuaXNtIjogeyJ0eXBlIjogIkNVU1RPTV9FTEVNRU5UIiwgImVsZW1lbnRfbmFtZSI6ICJtZXNoLWRhc2hib2FyZCJ9fSwgInRpbWVzZXJpZXMiOiB7ImRpc2FibGVfcmVsb2FkIjogZmFsc2UsICJlbmFibGVkIjogZmFsc2UsICJyZW1vdmVfZG9tIjogZmFsc2UsICJ0YWJfbmFtZSI6ICJUaW1lIFNlcmllcyIsICJsb2FkaW5nX21lY2hhbmlzbSI6IHsidHlwZSI6ICJOR19DT01QT05FTlQifX0sICJwcm9qZWN0b3IiOiB7ImRpc2FibGVfcmVsb2FkIjogdHJ1ZSwgImVuYWJsZWQiOiBmYWxzZSwgInJlbW92ZV9kb20iOiBmYWxzZSwgInRhYl9uYW1lIjogInByb2plY3RvciIsICJsb2FkaW5nX21lY2hhbmlzbSI6IHsidHlwZSI6ICJJRlJBTUUiLCAibW9kdWxlX3BhdGgiOiAiL2RhdGEvcGx1Z2luL3Byb2plY3Rvci9pbmRleC5qcyJ9fSwgIndoYXRpZiI6IHsiZGlzYWJsZV9yZWxvYWQiOiBmYWxzZSwgImVuYWJsZWQiOiBmYWxzZSwgInJlbW92ZV9kb20iOiBmYWxzZSwgInRhYl9uYW1lIjogIldoYXQtSWYgVG9vbCIsICJsb2FkaW5nX21lY2hhbmlzbSI6IHsidHlwZSI6ICJJRlJBTUUiLCAibW9kdWxlX3BhdGgiOiAiL2RhdGEvcGx1Z2luL3doYXRpZi9pbmRleC5qcyJ9fX0=", + "ok": true, + "headers": [ + [ + "content-type", + "application/json" + ] + ], + "status": 200, + "status_text": "" + }, + "https://localhost:6006/icon_bundle.svg": { + "data": "", + "ok": true, + "headers": [ + [ + "content-type", + "image/svg+xml; charset=utf-8" + ] + ], + "status": 200, + "status_text": "" + }, + "https://localhost:6006/data/runs": { + "data": "WyJydW5fMDAxL3RyYWluIl0=", + "ok": true, + "headers": [ + [ + "content-type", + "application/json" + ] + ], + "status": 200, + "status_text": "" + }, + "https://localhost:6006/data/plugin/graphs/info": { + "data": "eyJydW5fMDAxL3RyYWluIjogeyJydW4iOiAicnVuXzAwMS90cmFpbiIsICJ0YWdzIjogeyJiYXRjaF8yIjogeyJ0YWciOiAiYmF0Y2hfMiIsICJjb25jZXB0dWFsX2dyYXBoIjogZmFsc2UsICJvcF9ncmFwaCI6IHRydWUsICJwcm9maWxlIjogZmFsc2V9LCAia2VyYXMiOiB7InRhZyI6ICJrZXJhcyIsICJjb25jZXB0dWFsX2dyYXBoIjogdHJ1ZSwgIm9wX2dyYXBoIjogZmFsc2UsICJwcm9maWxlIjogZmFsc2V9fSwgInJ1bl9ncmFwaCI6IHRydWV9fQ==", + "ok": true, + "headers": [ + [ + "content-type", + "application/json" + ] + ], + "status": 200, + "status_text": "" + }, + "https://localhost:6006/data/plugin/graphs/graph?run=run_001%2Ftrain&conceptual=false": { + "data": "", + "ok": true, + "headers": [ + [ + "content-type", + "text/x-protobuf; charset=utf-8" + ] + ], + "status": 200, + "status_text": "" + }, + "https://localhost:6006/font-roboto/RxZJdnzeo3R5zSexge8UUZBw1xU1rKptJj_0jans920.woff2": { + "data": "", + "ok": true, + "headers": [ + [ + "content-type", + "font/woff2" + ] + ], + "status": 200, + "status_text": "" + }, + "https://localhost:6006/font-roboto/oMMgfZMQthOryQo9n22dcuvvDin1pK8aKteLpeZ5c0A.woff2": { + "data": "", + "ok": true, + "headers": [ + [ + "content-type", + "font/woff2" + ] + ], + "status": 200, + "status_text": "" + } + }, + "base_uri": "https://localhost:8080/", + "height": 822 + } + }, + "source": [ + "%tensorboard --logdir=./my_cifar10_logs --port=6006" + ], + "execution_count": 132, + "outputs": [ + { + "output_type": "display_data", + "data": { + "application/javascript": [ + "\n", + " (async () => {\n", + " const url = new URL(await google.colab.kernel.proxyPort(6006, {'cache': true}));\n", + " url.searchParams.set('tensorboardColab', 'true');\n", + " const iframe = document.createElement('iframe');\n", + " iframe.src = url;\n", + " iframe.setAttribute('width', '100%');\n", + " iframe.setAttribute('height', '800');\n", + " iframe.setAttribute('frameborder', 0);\n", + " document.body.appendChild(iframe);\n", + " })();\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": { + "tags": [] + } + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "4huFUfvOLOpl", + "outputId": "a0f6fcfc-c02b-4861-b396-d64f7281184d", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "model.fit(X_train, y_train, epochs=100,\n", + " validation_data=(X_valid, y_valid),\n", + " callbacks=callbacks)" + ], + "execution_count": 133, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Epoch 1/100\n", + "1407/1407 [==============================] - 16s 9ms/step - loss: 9.5976 - accuracy: 0.1365 - val_loss: 2.1086 - val_accuracy: 0.2342\n", + "Epoch 2/100\n", + "1407/1407 [==============================] - 12s 9ms/step - loss: 2.0597 - accuracy: 0.2457 - val_loss: 2.0227 - val_accuracy: 0.2528\n", + "Epoch 3/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.9423 - accuracy: 0.2883 - val_loss: 1.9173 - val_accuracy: 0.2974\n", + "Epoch 4/100\n", + "1407/1407 [==============================] - 12s 9ms/step - loss: 1.8595 - accuracy: 0.3242 - val_loss: 1.9241 - val_accuracy: 0.3242\n", + "Epoch 5/100\n", + "1407/1407 [==============================] - 12s 9ms/step - loss: 1.7853 - accuracy: 0.3508 - val_loss: 1.7868 - val_accuracy: 0.3550\n", + "Epoch 6/100\n", + "1407/1407 [==============================] - 12s 9ms/step - loss: 1.7293 - accuracy: 0.3703 - val_loss: 1.7313 - val_accuracy: 0.3782\n", + "Epoch 7/100\n", + "1407/1407 [==============================] - 12s 9ms/step - loss: 1.6884 - accuracy: 0.3880 - val_loss: 1.7046 - val_accuracy: 0.3728\n", + "Epoch 8/100\n", + "1407/1407 [==============================] - 12s 9ms/step - loss: 1.6473 - accuracy: 0.4009 - val_loss: 1.6535 - val_accuracy: 0.4072\n", + "Epoch 9/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.6216 - accuracy: 0.4174 - val_loss: 1.6390 - val_accuracy: 0.4072\n", + "Epoch 10/100\n", + "1407/1407 [==============================] - 12s 9ms/step - loss: 1.5892 - accuracy: 0.4274 - val_loss: 1.6882 - val_accuracy: 0.3892\n", + "Epoch 11/100\n", + "1407/1407 [==============================] - 12s 9ms/step - loss: 1.5679 - accuracy: 0.4388 - val_loss: 1.6097 - val_accuracy: 0.4182\n", + "Epoch 12/100\n", + "1407/1407 [==============================] - 12s 9ms/step - loss: 1.5494 - accuracy: 0.4435 - val_loss: 1.6076 - val_accuracy: 0.4224\n", + "Epoch 13/100\n", + "1407/1407 [==============================] - 12s 9ms/step - loss: 1.5264 - accuracy: 0.4501 - val_loss: 1.6094 - val_accuracy: 0.4174\n", + "Epoch 14/100\n", + "1407/1407 [==============================] - 12s 9ms/step - loss: 1.5058 - accuracy: 0.4584 - val_loss: 1.5780 - val_accuracy: 0.4304\n", + "Epoch 15/100\n", + "1407/1407 [==============================] - 12s 9ms/step - loss: 1.5017 - accuracy: 0.4586 - val_loss: 1.5628 - val_accuracy: 0.4468\n", + "Epoch 16/100\n", + "1407/1407 [==============================] - 12s 9ms/step - loss: 1.4832 - accuracy: 0.4655 - val_loss: 1.5483 - val_accuracy: 0.4450\n", + "Epoch 17/100\n", + "1407/1407 [==============================] - 12s 9ms/step - loss: 1.4690 - accuracy: 0.4741 - val_loss: 1.5686 - val_accuracy: 0.4398\n", + "Epoch 18/100\n", + "1407/1407 [==============================] - 12s 9ms/step - loss: 1.4530 - accuracy: 0.4814 - val_loss: 1.5176 - val_accuracy: 0.4588\n", + "Epoch 19/100\n", + "1407/1407 [==============================] - 12s 9ms/step - loss: 1.4254 - accuracy: 0.4887 - val_loss: 1.5407 - val_accuracy: 0.4556\n", + "Epoch 20/100\n", + "1407/1407 [==============================] - 12s 9ms/step - loss: 1.4167 - accuracy: 0.4908 - val_loss: 1.5124 - val_accuracy: 0.4598\n", + "Epoch 21/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.4132 - accuracy: 0.4940 - val_loss: 1.5697 - val_accuracy: 0.4456\n", + "Epoch 22/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.4092 - accuracy: 0.4909 - val_loss: 1.5215 - val_accuracy: 0.4550\n", + "Epoch 23/100\n", + "1407/1407 [==============================] - 12s 9ms/step - loss: 1.3812 - accuracy: 0.5018 - val_loss: 1.5259 - val_accuracy: 0.4558\n", + "Epoch 24/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.3708 - accuracy: 0.5068 - val_loss: 1.5368 - val_accuracy: 0.4616\n", + "Epoch 25/100\n", + "1407/1407 [==============================] - 12s 9ms/step - loss: 1.3730 - accuracy: 0.5061 - val_loss: 1.5012 - val_accuracy: 0.4656\n", + "Epoch 26/100\n", + "1407/1407 [==============================] - 12s 9ms/step - loss: 1.3413 - accuracy: 0.5160 - val_loss: 1.5261 - val_accuracy: 0.4538\n", + "Epoch 27/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.3396 - accuracy: 0.5184 - val_loss: 1.5085 - val_accuracy: 0.4638\n", + "Epoch 28/100\n", + "1407/1407 [==============================] - 12s 9ms/step - loss: 1.3297 - accuracy: 0.5219 - val_loss: 1.5276 - val_accuracy: 0.4604\n", + "Epoch 29/100\n", + "1407/1407 [==============================] - 12s 9ms/step - loss: 1.3327 - accuracy: 0.5197 - val_loss: 1.5129 - val_accuracy: 0.4642\n", + "Epoch 30/100\n", + "1407/1407 [==============================] - 12s 9ms/step - loss: 1.3217 - accuracy: 0.5230 - val_loss: 1.5549 - val_accuracy: 0.4634\n", + "Epoch 31/100\n", + "1407/1407 [==============================] - 12s 9ms/step - loss: 1.3237 - accuracy: 0.5222 - val_loss: 1.5363 - val_accuracy: 0.4582\n", + "Epoch 32/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.2889 - accuracy: 0.5341 - val_loss: 1.4979 - val_accuracy: 0.4766\n", + "Epoch 33/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.2933 - accuracy: 0.5362 - val_loss: 1.5370 - val_accuracy: 0.4622\n", + "Epoch 34/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.2736 - accuracy: 0.5390 - val_loss: 1.5247 - val_accuracy: 0.4698\n", + "Epoch 35/100\n", + "1407/1407 [==============================] - 12s 9ms/step - loss: 1.2719 - accuracy: 0.5433 - val_loss: 1.4904 - val_accuracy: 0.4768\n", + "Epoch 36/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.2673 - accuracy: 0.5467 - val_loss: 1.5016 - val_accuracy: 0.4780\n", + "Epoch 37/100\n", + "1407/1407 [==============================] - 12s 9ms/step - loss: 1.2518 - accuracy: 0.5478 - val_loss: 1.4951 - val_accuracy: 0.4810\n", + "Epoch 38/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.2393 - accuracy: 0.5572 - val_loss: 1.4962 - val_accuracy: 0.4770\n", + "Epoch 39/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.2372 - accuracy: 0.5526 - val_loss: 1.5172 - val_accuracy: 0.4732\n", + "Epoch 40/100\n", + "1407/1407 [==============================] - 12s 9ms/step - loss: 1.2280 - accuracy: 0.5606 - val_loss: 1.5011 - val_accuracy: 0.4768\n", + "Epoch 41/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.2190 - accuracy: 0.5625 - val_loss: 1.5301 - val_accuracy: 0.4740\n", + "Epoch 42/100\n", + "1407/1407 [==============================] - 12s 9ms/step - loss: 1.2113 - accuracy: 0.5695 - val_loss: 1.5344 - val_accuracy: 0.4746\n", + "Epoch 43/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.2086 - accuracy: 0.5617 - val_loss: 1.5503 - val_accuracy: 0.4638\n", + "Epoch 44/100\n", + "1407/1407 [==============================] - 12s 9ms/step - loss: 1.1997 - accuracy: 0.5712 - val_loss: 1.5310 - val_accuracy: 0.4814\n", + "Epoch 45/100\n", + "1407/1407 [==============================] - 12s 9ms/step - loss: 1.1881 - accuracy: 0.5744 - val_loss: 1.4936 - val_accuracy: 0.4852\n", + "Epoch 46/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.1747 - accuracy: 0.5788 - val_loss: 1.5113 - val_accuracy: 0.4778\n", + "Epoch 47/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.1738 - accuracy: 0.5753 - val_loss: 1.5495 - val_accuracy: 0.4750\n", + "Epoch 48/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.1679 - accuracy: 0.5796 - val_loss: 1.4970 - val_accuracy: 0.4878\n", + "Epoch 49/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.1723 - accuracy: 0.5763 - val_loss: 1.5314 - val_accuracy: 0.4812\n", + "Epoch 50/100\n", + "1407/1407 [==============================] - 12s 9ms/step - loss: 1.1544 - accuracy: 0.5855 - val_loss: 1.5753 - val_accuracy: 0.4734\n", + "Epoch 51/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.1567 - accuracy: 0.5833 - val_loss: 1.5835 - val_accuracy: 0.4692\n", + "Epoch 52/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.1442 - accuracy: 0.5877 - val_loss: 1.5382 - val_accuracy: 0.4800\n", + "Epoch 53/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.1309 - accuracy: 0.5963 - val_loss: 1.5327 - val_accuracy: 0.4848\n", + "Epoch 54/100\n", + "1407/1407 [==============================] - 12s 9ms/step - loss: 1.1219 - accuracy: 0.5970 - val_loss: 1.5581 - val_accuracy: 0.4826\n", + "Epoch 55/100\n", + "1407/1407 [==============================] - 12s 9ms/step - loss: 1.1241 - accuracy: 0.5986 - val_loss: 1.5292 - val_accuracy: 0.4886\n" + ], + "name": "stdout" + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 133 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "2WziHKxHLOpl", + "outputId": "2e8c64ce-7966-43e1-e0a2-6e65ac4e24d5", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "model = keras.models.load_model(\"my_cifar10_model.h5\")\n", + "model.evaluate(X_valid, y_valid)" + ], + "execution_count": 134, + "outputs": [ + { + "output_type": "stream", + "text": [ + "157/157 [==============================] - 1s 2ms/step - loss: 1.4904 - accuracy: 0.4768\n" + ], + "name": "stdout" + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[1.4903972148895264, 0.47679999470710754]" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 134 + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "eaO_qFxHLOpl" + }, + "source": [ + "가장 낮은 검증 손실을 내는 모델은 검증 세트에서 약 47% 정확도를 얻었습니다. 이 검증 점수에 도달하는데 39번의 에포크가 걸렸습니다. (GPU가 없는) 제 노트북에서 에포크당 약 10초 정도 걸렸습니다. 배치 정규화를 사용해 성능을 올릴 수 있는지 확인해 보죠." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "4zqEUxjPLOpl" + }, + "source": [ + "### c.\n", + "*문제: 배치 정규화를 추가하고 학습 곡선을 비교해보세요. 이전보다 빠르게 수렴하나요? 더 좋은 모델이 만들어지나요? 훈련 속도에는 어떤 영향을 미치나요?*" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "C41Agl-yLOpl" + }, + "source": [ + "다음 코드는 위의 코드와 배우 비슷합니다. 몇 가지 다른 점은 아래와 같습니다:\n", + "\n", + "* 출력층을 제외하고 모든 `Dense` 층 다음에 (활성화 함수 전에) BN 층을 추가했습니다. 처음 은닉층 전에도 BN 층을 추가했습니다.\n", + "* 학습률을 5e-4로 바꾸었습니다. 1e-5, 3e-5, 5e-5, 1e-4, 3e-4, 5e-4, 1e-3, 3e-3를 시도해 보고 20번 에포크 후에 검증 세트 성능이 가장 좋은 것을 선택했습니다.\n", + "* run_logdir를 run_bn_* 으로 이름을 바꾸고 모델 파일 이름을 my_cifar10_bn_model.h5로 변경했습니다." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "LczWnbDULOpl", + "outputId": "76d0babb-d132-4b87-ac73-e5e91002a6c1", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "keras.backend.clear_session()\n", + "tf.random.set_seed(42)\n", + "np.random.seed(42)\n", + "\n", + "model = keras.models.Sequential()\n", + "model.add(keras.layers.Flatten(input_shape=[32, 32, 3]))\n", + "model.add(keras.layers.BatchNormalization())\n", + "for _ in range(20):\n", + " model.add(keras.layers.Dense(100, kernel_initializer=\"he_normal\"))\n", + " model.add(keras.layers.BatchNormalization())\n", + " model.add(keras.layers.Activation(\"elu\"))\n", + "model.add(keras.layers.Dense(10, activation=\"softmax\"))\n", + "\n", + "optimizer = keras.optimizers.Nadam(lr=5e-4)\n", + "model.compile(loss=\"sparse_categorical_crossentropy\",\n", + " optimizer=optimizer,\n", + " metrics=[\"accuracy\"])\n", + "\n", + "early_stopping_cb = keras.callbacks.EarlyStopping(patience=20)\n", + "model_checkpoint_cb = keras.callbacks.ModelCheckpoint(\"my_cifar10_bn_model.h5\", save_best_only=True)\n", + "run_index = 1 # 모델을 훈련할 때마다 증가시킴\n", + "run_logdir = os.path.join(os.curdir, \"my_cifar10_logs\", \"run_bn_{:03d}\".format(run_index))\n", + "tensorboard_cb = keras.callbacks.TensorBoard(run_logdir)\n", + "callbacks = [early_stopping_cb, model_checkpoint_cb, tensorboard_cb]\n", + "\n", + "model.fit(X_train, y_train, epochs=100,\n", + " validation_data=(X_valid, y_valid),\n", + " callbacks=callbacks)\n", + "\n", + "model = keras.models.load_model(\"my_cifar10_bn_model.h5\")\n", + "model.evaluate(X_valid, y_valid)" + ], + "execution_count": 135, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Epoch 1/100\n", + "1407/1407 [==============================] - 44s 26ms/step - loss: 1.9805 - accuracy: 0.2905 - val_loss: 1.6707 - val_accuracy: 0.3934\n", + "Epoch 2/100\n", + "1407/1407 [==============================] - 35s 25ms/step - loss: 1.6843 - accuracy: 0.3997 - val_loss: 1.5906 - val_accuracy: 0.4302\n", + "Epoch 3/100\n", + "1407/1407 [==============================] - 35s 25ms/step - loss: 1.6114 - accuracy: 0.4293 - val_loss: 1.6091 - val_accuracy: 0.4318\n", + "Epoch 4/100\n", + "1407/1407 [==============================] - 35s 25ms/step - loss: 1.5527 - accuracy: 0.4483 - val_loss: 1.5315 - val_accuracy: 0.4504\n", + "Epoch 5/100\n", + "1407/1407 [==============================] - 35s 25ms/step - loss: 1.5020 - accuracy: 0.4681 - val_loss: 1.4365 - val_accuracy: 0.4858\n", + "Epoch 6/100\n", + "1407/1407 [==============================] - 35s 25ms/step - loss: 1.4614 - accuracy: 0.4817 - val_loss: 1.4284 - val_accuracy: 0.4914\n", + "Epoch 7/100\n", + "1407/1407 [==============================] - 35s 25ms/step - loss: 1.4271 - accuracy: 0.4942 - val_loss: 1.4083 - val_accuracy: 0.4990\n", + "Epoch 8/100\n", + "1407/1407 [==============================] - 35s 25ms/step - loss: 1.4021 - accuracy: 0.5032 - val_loss: 1.3799 - val_accuracy: 0.5064\n", + "Epoch 9/100\n", + "1407/1407 [==============================] - 35s 25ms/step - loss: 1.3795 - accuracy: 0.5143 - val_loss: 1.3858 - val_accuracy: 0.5102\n", + "Epoch 10/100\n", + "1407/1407 [==============================] - 35s 25ms/step - loss: 1.3562 - accuracy: 0.5171 - val_loss: 1.3409 - val_accuracy: 0.5198\n", + "Epoch 11/100\n", + "1407/1407 [==============================] - 35s 25ms/step - loss: 1.3239 - accuracy: 0.5293 - val_loss: 1.3554 - val_accuracy: 0.5210\n", + "Epoch 12/100\n", + "1407/1407 [==============================] - 35s 25ms/step - loss: 1.3125 - accuracy: 0.5374 - val_loss: 1.3739 - val_accuracy: 0.5104\n", + "Epoch 13/100\n", + "1407/1407 [==============================] - 35s 25ms/step - loss: 1.2838 - accuracy: 0.5461 - val_loss: 1.3909 - val_accuracy: 0.5128\n", + "Epoch 14/100\n", + "1407/1407 [==============================] - 35s 25ms/step - loss: 1.2656 - accuracy: 0.5475 - val_loss: 1.3442 - val_accuracy: 0.5294\n", + "Epoch 15/100\n", + "1407/1407 [==============================] - 35s 25ms/step - loss: 1.2607 - accuracy: 0.5568 - val_loss: 1.3741 - val_accuracy: 0.5220\n", + "Epoch 16/100\n", + "1407/1407 [==============================] - 35s 25ms/step - loss: 1.2468 - accuracy: 0.5587 - val_loss: 1.3371 - val_accuracy: 0.5312\n", + "Epoch 17/100\n", + "1407/1407 [==============================] - 35s 25ms/step - loss: 1.2273 - accuracy: 0.5681 - val_loss: 1.3262 - val_accuracy: 0.5360\n", + "Epoch 18/100\n", + "1407/1407 [==============================] - 35s 25ms/step - loss: 1.2085 - accuracy: 0.5716 - val_loss: 1.3367 - val_accuracy: 0.5334\n", + "Epoch 19/100\n", + "1407/1407 [==============================] - 35s 25ms/step - loss: 1.1996 - accuracy: 0.5772 - val_loss: 1.3809 - val_accuracy: 0.5242\n", + "Epoch 20/100\n", + "1407/1407 [==============================] - 35s 25ms/step - loss: 1.1907 - accuracy: 0.5818 - val_loss: 1.3691 - val_accuracy: 0.5260\n", + "Epoch 21/100\n", + "1407/1407 [==============================] - 34s 24ms/step - loss: 1.1703 - accuracy: 0.5848 - val_loss: 1.3534 - val_accuracy: 0.5306\n", + "Epoch 22/100\n", + "1407/1407 [==============================] - 35s 25ms/step - loss: 1.1584 - accuracy: 0.5909 - val_loss: 1.3587 - val_accuracy: 0.5240\n", + "Epoch 23/100\n", + "1407/1407 [==============================] - 35s 25ms/step - loss: 1.1434 - accuracy: 0.5985 - val_loss: 1.3365 - val_accuracy: 0.5412\n", + "Epoch 24/100\n", + "1407/1407 [==============================] - 35s 25ms/step - loss: 1.1331 - accuracy: 0.6005 - val_loss: 1.3189 - val_accuracy: 0.5474\n", + "Epoch 25/100\n", + "1407/1407 [==============================] - 35s 25ms/step - loss: 1.1182 - accuracy: 0.6056 - val_loss: 1.3208 - val_accuracy: 0.5406\n", + "Epoch 26/100\n", + "1407/1407 [==============================] - 34s 24ms/step - loss: 1.0999 - accuracy: 0.6107 - val_loss: 1.3565 - val_accuracy: 0.5348\n", + "Epoch 27/100\n", + "1407/1407 [==============================] - 34s 24ms/step - loss: 1.0898 - accuracy: 0.6168 - val_loss: 1.3557 - val_accuracy: 0.5328\n", + "Epoch 28/100\n", + "1407/1407 [==============================] - 34s 24ms/step - loss: 1.0888 - accuracy: 0.6197 - val_loss: 1.3448 - val_accuracy: 0.5334\n", + "Epoch 29/100\n", + "1407/1407 [==============================] - 34s 24ms/step - loss: 1.0868 - accuracy: 0.6142 - val_loss: 1.3385 - val_accuracy: 0.5428\n", + "Epoch 30/100\n", + "1407/1407 [==============================] - 34s 24ms/step - loss: 1.0712 - accuracy: 0.6226 - val_loss: 1.3513 - val_accuracy: 0.5376\n", + "Epoch 31/100\n", + "1407/1407 [==============================] - 34s 24ms/step - loss: 1.0632 - accuracy: 0.6221 - val_loss: 1.3549 - val_accuracy: 0.5388\n", + "Epoch 32/100\n", + "1407/1407 [==============================] - 35s 25ms/step - loss: 1.0427 - accuracy: 0.6313 - val_loss: 1.3651 - val_accuracy: 0.5420\n", + "Epoch 33/100\n", + "1407/1407 [==============================] - 35s 25ms/step - loss: 1.0328 - accuracy: 0.6341 - val_loss: 1.3301 - val_accuracy: 0.5442\n", + "Epoch 34/100\n", + "1407/1407 [==============================] - 35s 25ms/step - loss: 1.0265 - accuracy: 0.6392 - val_loss: 1.3481 - val_accuracy: 0.5412\n", + "Epoch 35/100\n", + "1407/1407 [==============================] - 34s 24ms/step - loss: 1.0217 - accuracy: 0.6412 - val_loss: 1.3520 - val_accuracy: 0.5414\n", + "Epoch 36/100\n", + "1407/1407 [==============================] - 34s 25ms/step - loss: 1.0009 - accuracy: 0.6463 - val_loss: 1.3634 - val_accuracy: 0.5320\n", + "Epoch 37/100\n", + "1407/1407 [==============================] - 34s 24ms/step - loss: 0.9865 - accuracy: 0.6520 - val_loss: 1.3685 - val_accuracy: 0.5304\n", + "Epoch 38/100\n", + "1407/1407 [==============================] - 34s 25ms/step - loss: 0.9805 - accuracy: 0.6567 - val_loss: 1.3807 - val_accuracy: 0.5314\n", + "Epoch 39/100\n", + "1407/1407 [==============================] - 34s 24ms/step - loss: 0.9782 - accuracy: 0.6517 - val_loss: 1.3851 - val_accuracy: 0.5438\n", + "Epoch 40/100\n", + "1407/1407 [==============================] - 34s 24ms/step - loss: 0.9646 - accuracy: 0.6644 - val_loss: 1.4195 - val_accuracy: 0.5328\n", + "Epoch 41/100\n", + "1407/1407 [==============================] - 35s 25ms/step - loss: 0.9540 - accuracy: 0.6661 - val_loss: 1.3478 - val_accuracy: 0.5542\n", + "Epoch 42/100\n", + "1407/1407 [==============================] - 35s 25ms/step - loss: 0.9528 - accuracy: 0.6655 - val_loss: 1.3789 - val_accuracy: 0.5440\n", + "Epoch 43/100\n", + "1407/1407 [==============================] - 34s 24ms/step - loss: 0.9449 - accuracy: 0.6667 - val_loss: 1.3721 - val_accuracy: 0.5410\n", + "Epoch 44/100\n", + "1407/1407 [==============================] - 34s 24ms/step - loss: 0.9345 - accuracy: 0.6723 - val_loss: 1.4136 - val_accuracy: 0.5348\n", + "157/157 [==============================] - 1s 3ms/step - loss: 1.3189 - accuracy: 0.5474\n" + ], + "name": "stdout" + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[1.318870186805725, 0.5473999977111816]" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 135 + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "wKjSeUIXLOpl" + }, + "source": [ + "* *이전보다 빠르게 수렴하나요?* 훨씬 빠릅니다! 이전 모델은 가장 낮은 검증 손실에 도달하기 위해 39 에포크가 걸렸지만 BN을 사용한 새 모델은 18 에포크가 걸렸습니다. 이전 모델보다 두 배 이상 빠릅니다. BN 층은 훈련을 안정적으로 수행하고 더 큰 학습률을 사용할 수 있기 때문에 수렴이 빨라졌습니다.\n", + "* *BN이 더 좋은 모델을 만드나요?* 네! 최종 모델의 성능이 47%가 아니라 55% 정확도로 더 좋습니다. 이는 아주 좋은 모델이 아니지만 적어도 이전보다는 낫습니다(합성곱 신경망이 더 낫겠지만 이는 다른 주제입니다. 14장을 참고하세요).\n", + "* *BN이 훈련 속도에 영향을 미치나요?* 모델이 두 배나 빠르게 수렴했지만 각 에포크는 10초가 아니라 16초가 걸렸습니다. BN 층에서 추가된 계산 때문입니다. 따라서 전체적으로 에포크 횟수가 50% 정도 줄었지만 훈련 시간(탁상 시계 시간)은 30% 정도 줄었습니다. 결국 크게 향상되었습니다!" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "oJr9kBV5LOpm" + }, + "source": [ + "### d.\n", + "*문제: 배치 정규화를 SELU로 바꾸어보세요. 네트워크가 자기 정규화하기 위해 필요한 변경 사항을 적용해보세요(즉, 입력 특성 표준화, 르쿤 정규분포 초기화, 완전 연결 층만 순차적으로 쌓은 심층 신경망 등).*" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "ZAfEp5gNLOpm", + "outputId": "37cf90e0-b80f-4616-f168-03a99c6c5648", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "keras.backend.clear_session()\n", + "tf.random.set_seed(42)\n", + "np.random.seed(42)\n", + "\n", + "model = keras.models.Sequential()\n", + "model.add(keras.layers.Flatten(input_shape=[32, 32, 3]))\n", + "for _ in range(20):\n", + " model.add(keras.layers.Dense(100,\n", + " kernel_initializer=\"lecun_normal\",\n", + " activation=\"selu\"))\n", + "model.add(keras.layers.Dense(10, activation=\"softmax\"))\n", + "\n", + "optimizer = keras.optimizers.Nadam(lr=7e-4)\n", + "model.compile(loss=\"sparse_categorical_crossentropy\",\n", + " optimizer=optimizer,\n", + " metrics=[\"accuracy\"])\n", + "\n", + "early_stopping_cb = keras.callbacks.EarlyStopping(patience=20)\n", + "model_checkpoint_cb = keras.callbacks.ModelCheckpoint(\"my_cifar10_selu_model.h5\", save_best_only=True)\n", + "run_index = 1 # 모델을 훈련할 때마다 증가시킴\n", + "run_logdir = os.path.join(os.curdir, \"my_cifar10_logs\", \"run_selu_{:03d}\".format(run_index))\n", + "tensorboard_cb = keras.callbacks.TensorBoard(run_logdir)\n", + "callbacks = [early_stopping_cb, model_checkpoint_cb, tensorboard_cb]\n", + "\n", + "X_means = X_train.mean(axis=0)\n", + "X_stds = X_train.std(axis=0)\n", + "X_train_scaled = (X_train - X_means) / X_stds\n", + "X_valid_scaled = (X_valid - X_means) / X_stds\n", + "X_test_scaled = (X_test - X_means) / X_stds\n", + "\n", + "model.fit(X_train_scaled, y_train, epochs=100,\n", + " validation_data=(X_valid_scaled, y_valid),\n", + " callbacks=callbacks)\n", + "\n", + "model = keras.models.load_model(\"my_cifar10_selu_model.h5\")\n", + "model.evaluate(X_valid_scaled, y_valid)" + ], + "execution_count": 136, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Epoch 1/100\n", + "1407/1407 [==============================] - 17s 10ms/step - loss: 2.0543 - accuracy: 0.2663 - val_loss: 1.7895 - val_accuracy: 0.3680\n", + "Epoch 2/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.7162 - accuracy: 0.3874 - val_loss: 1.8017 - val_accuracy: 0.3720\n", + "Epoch 3/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.6257 - accuracy: 0.4268 - val_loss: 1.6563 - val_accuracy: 0.4172\n", + "Epoch 4/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.5436 - accuracy: 0.4569 - val_loss: 1.6334 - val_accuracy: 0.4386\n", + "Epoch 5/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.4967 - accuracy: 0.4752 - val_loss: 1.6055 - val_accuracy: 0.4424\n", + "Epoch 6/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.4495 - accuracy: 0.4914 - val_loss: 1.5408 - val_accuracy: 0.4574\n", + "Epoch 7/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.4016 - accuracy: 0.5107 - val_loss: 1.5620 - val_accuracy: 0.4540\n", + "Epoch 8/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.3616 - accuracy: 0.5232 - val_loss: 1.5098 - val_accuracy: 0.4726\n", + "Epoch 9/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.3246 - accuracy: 0.5393 - val_loss: 1.4862 - val_accuracy: 0.4698\n", + "Epoch 10/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.2975 - accuracy: 0.5491 - val_loss: 1.4937 - val_accuracy: 0.4912\n", + "Epoch 11/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.2568 - accuracy: 0.5690 - val_loss: 1.5241 - val_accuracy: 0.4978\n", + "Epoch 12/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.2373 - accuracy: 0.5745 - val_loss: 1.5144 - val_accuracy: 0.4754\n", + "Epoch 13/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.2129 - accuracy: 0.5821 - val_loss: 1.4959 - val_accuracy: 0.5082\n", + "Epoch 14/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.1765 - accuracy: 0.5976 - val_loss: 1.4949 - val_accuracy: 0.5020\n", + "Epoch 15/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.1584 - accuracy: 0.6032 - val_loss: 1.5359 - val_accuracy: 0.4958\n", + "Epoch 16/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.1476 - accuracy: 0.6054 - val_loss: 1.5124 - val_accuracy: 0.5024\n", + "Epoch 17/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.1305 - accuracy: 0.6166 - val_loss: 1.5296 - val_accuracy: 0.5036\n", + "Epoch 18/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.0998 - accuracy: 0.6253 - val_loss: 1.4755 - val_accuracy: 0.5022\n", + "Epoch 19/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.0959 - accuracy: 0.6324 - val_loss: 1.6318 - val_accuracy: 0.4636\n", + "Epoch 20/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.1792 - accuracy: 0.5927 - val_loss: 1.5011 - val_accuracy: 0.5054\n", + "Epoch 21/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.0341 - accuracy: 0.6458 - val_loss: 1.5519 - val_accuracy: 0.5038\n", + "Epoch 22/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.0028 - accuracy: 0.6588 - val_loss: 1.5129 - val_accuracy: 0.5106\n", + "Epoch 23/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.0480 - accuracy: 0.6548 - val_loss: 1.5322 - val_accuracy: 0.4728\n", + "Epoch 24/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.1508 - accuracy: 0.6021 - val_loss: 1.5160 - val_accuracy: 0.5052\n", + "Epoch 25/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.0125 - accuracy: 0.6555 - val_loss: 1.5517 - val_accuracy: 0.5004\n", + "Epoch 26/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.0041 - accuracy: 0.6594 - val_loss: 1.5464 - val_accuracy: 0.4994\n", + "Epoch 27/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.0239 - accuracy: 0.6504 - val_loss: 1.5180 - val_accuracy: 0.5124\n", + "Epoch 28/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 0.9534 - accuracy: 0.6749 - val_loss: 1.5762 - val_accuracy: 0.5006\n", + "Epoch 29/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 0.9281 - accuracy: 0.6852 - val_loss: 1.5844 - val_accuracy: 0.5174\n", + "Epoch 30/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 0.9285 - accuracy: 0.6879 - val_loss: 1.5761 - val_accuracy: 0.5070\n", + "Epoch 31/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 0.9284 - accuracy: 0.6851 - val_loss: 1.5662 - val_accuracy: 0.5022\n", + "Epoch 32/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 0.9068 - accuracy: 0.6945 - val_loss: 1.5796 - val_accuracy: 0.5050\n", + "Epoch 33/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 0.9320 - accuracy: 0.6830 - val_loss: 1.5989 - val_accuracy: 0.5112\n", + "Epoch 34/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 128.6344 - accuracy: 0.6796 - val_loss: 1.6077 - val_accuracy: 0.4778\n", + "Epoch 35/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.1029 - accuracy: 0.6221 - val_loss: 1.5892 - val_accuracy: 0.4914\n", + "Epoch 36/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.0178 - accuracy: 0.6509 - val_loss: 1.6135 - val_accuracy: 0.4984\n", + "Epoch 37/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 0.9726 - accuracy: 0.6713 - val_loss: 1.6034 - val_accuracy: 0.5038\n", + "Epoch 38/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 0.9423 - accuracy: 0.6813 - val_loss: 1.6206 - val_accuracy: 0.5006\n", + "157/157 [==============================] - 1s 2ms/step - loss: 1.4755 - accuracy: 0.5022\n" + ], + "name": "stdout" + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[1.4755374193191528, 0.5022000074386597]" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 136 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "c47RjcCqLOpm", + "outputId": "f3aefa6c-a958-42fb-da55-c137c672b7ee", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "model = keras.models.load_model(\"my_cifar10_selu_model.h5\")\n", + "model.evaluate(X_valid_scaled, y_valid)" + ], + "execution_count": 137, + "outputs": [ + { + "output_type": "stream", + "text": [ + "157/157 [==============================] - 1s 3ms/step - loss: 1.4755 - accuracy: 0.5022\n" + ], + "name": "stdout" + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[1.4755374193191528, 0.5022000074386597]" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 137 + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "tiqKhLWPLOpn" + }, + "source": [ + "51.4% 정확도를 얻었습니다. 원래 모델보다 더 좋습니다. 하지만 배치 정규화를 사용한 모델만큼 좋지는 않습니다. 최고의 모델에 도달하는데 13 에포크가 걸렸습니다. 이는 원본 모델이나 BN 모델보다 더 빠른 것입니다. 각 에포크는 원본 모델처럼 10초만 걸렸습니다. 따라서 이 모델이 지금까지 가장 빠른 모델입니다(에포크와 탁상 시계 기준으로)." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "q1pbE7gjLOpn" + }, + "source": [ + "### e.\n", + "*문제: 알파 드롭아웃으로 모델에 규제를 적용해보세요. 그다음 모델을 다시 훈련하지 않고 MC 드롭아웃으로 더 높은 정확도를 얻을 수 있는지 확인해보세요.*" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "RnApp_3mLOpn", + "outputId": "ccdd98f7-f09c-4fb9-d3db-af4ddf65a8ab", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "keras.backend.clear_session()\n", + "tf.random.set_seed(42)\n", + "np.random.seed(42)\n", + "\n", + "model = keras.models.Sequential()\n", + "model.add(keras.layers.Flatten(input_shape=[32, 32, 3]))\n", + "for _ in range(20):\n", + " model.add(keras.layers.Dense(100,\n", + " kernel_initializer=\"lecun_normal\",\n", + " activation=\"selu\"))\n", + "\n", + "model.add(keras.layers.AlphaDropout(rate=0.1))\n", + "model.add(keras.layers.Dense(10, activation=\"softmax\"))\n", + "\n", + "optimizer = keras.optimizers.Nadam(lr=5e-4)\n", + "model.compile(loss=\"sparse_categorical_crossentropy\",\n", + " optimizer=optimizer,\n", + " metrics=[\"accuracy\"])\n", + "\n", + "early_stopping_cb = keras.callbacks.EarlyStopping(patience=20)\n", + "model_checkpoint_cb = keras.callbacks.ModelCheckpoint(\"my_cifar10_alpha_dropout_model.h5\", save_best_only=True)\n", + "run_index = 1 # 모델을 훈련할 때마다 증가시킴\n", + "run_logdir = os.path.join(os.curdir, \"my_cifar10_logs\", \"run_alpha_dropout_{:03d}\".format(run_index))\n", + "tensorboard_cb = keras.callbacks.TensorBoard(run_logdir)\n", + "callbacks = [early_stopping_cb, model_checkpoint_cb, tensorboard_cb]\n", + "\n", + "X_means = X_train.mean(axis=0)\n", + "X_stds = X_train.std(axis=0)\n", + "X_train_scaled = (X_train - X_means) / X_stds\n", + "X_valid_scaled = (X_valid - X_means) / X_stds\n", + "X_test_scaled = (X_test - X_means) / X_stds\n", + "\n", + "model.fit(X_train_scaled, y_train, epochs=100,\n", + " validation_data=(X_valid_scaled, y_valid),\n", + " callbacks=callbacks)\n", + "\n", + "model = keras.models.load_model(\"my_cifar10_alpha_dropout_model.h5\")\n", + "model.evaluate(X_valid_scaled, y_valid)" + ], + "execution_count": 138, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Epoch 1/100\n", + "1407/1407 [==============================] - 17s 10ms/step - loss: 2.0545 - accuracy: 0.2804 - val_loss: 1.7849 - val_accuracy: 0.3802\n", + "Epoch 2/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.6891 - accuracy: 0.4028 - val_loss: 1.6433 - val_accuracy: 0.4222\n", + "Epoch 3/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.5892 - accuracy: 0.4414 - val_loss: 1.6071 - val_accuracy: 0.4362\n", + "Epoch 4/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.5089 - accuracy: 0.4714 - val_loss: 1.5975 - val_accuracy: 0.4482\n", + "Epoch 5/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.4522 - accuracy: 0.4934 - val_loss: 1.6036 - val_accuracy: 0.4602\n", + "Epoch 6/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.4014 - accuracy: 0.5114 - val_loss: 1.5332 - val_accuracy: 0.4778\n", + "Epoch 7/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.3527 - accuracy: 0.5290 - val_loss: 1.5837 - val_accuracy: 0.4700\n", + "Epoch 8/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.3182 - accuracy: 0.5379 - val_loss: 1.4835 - val_accuracy: 0.5016\n", + "Epoch 9/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.2835 - accuracy: 0.5536 - val_loss: 1.5400 - val_accuracy: 0.4866\n", + "Epoch 10/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.2473 - accuracy: 0.5677 - val_loss: 1.5282 - val_accuracy: 0.4944\n", + "Epoch 11/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.2099 - accuracy: 0.5855 - val_loss: 1.5768 - val_accuracy: 0.5080\n", + "Epoch 12/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.1898 - accuracy: 0.5905 - val_loss: 1.5192 - val_accuracy: 0.5046\n", + "Epoch 13/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.1486 - accuracy: 0.6044 - val_loss: 1.5302 - val_accuracy: 0.5092\n", + "Epoch 14/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.1219 - accuracy: 0.6141 - val_loss: 1.5072 - val_accuracy: 0.5086\n", + "Epoch 15/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.1008 - accuracy: 0.6171 - val_loss: 1.6471 - val_accuracy: 0.5088\n", + "Epoch 16/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.0778 - accuracy: 0.6309 - val_loss: 1.6557 - val_accuracy: 0.5152\n", + "Epoch 17/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.0515 - accuracy: 0.6434 - val_loss: 1.6130 - val_accuracy: 0.5208\n", + "Epoch 18/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.0246 - accuracy: 0.6530 - val_loss: 1.6559 - val_accuracy: 0.5082\n", + "Epoch 19/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 1.0088 - accuracy: 0.6552 - val_loss: 1.7325 - val_accuracy: 0.5126\n", + "Epoch 20/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 0.9908 - accuracy: 0.6627 - val_loss: 1.6986 - val_accuracy: 0.5134\n", + "Epoch 21/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 0.9657 - accuracy: 0.6694 - val_loss: 1.7291 - val_accuracy: 0.5010\n", + "Epoch 22/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 0.9548 - accuracy: 0.6728 - val_loss: 1.7533 - val_accuracy: 0.5140\n", + "Epoch 23/100\n", + "1407/1407 [==============================] - 13s 10ms/step - loss: 0.9199 - accuracy: 0.6913 - val_loss: 1.7172 - val_accuracy: 0.5042\n", + "Epoch 24/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 0.9132 - accuracy: 0.6906 - val_loss: 1.6688 - val_accuracy: 0.5206\n", + "Epoch 25/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 0.8955 - accuracy: 0.6978 - val_loss: 1.7418 - val_accuracy: 0.5156\n", + "Epoch 26/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 0.8596 - accuracy: 0.7116 - val_loss: 1.8324 - val_accuracy: 0.5188\n", + "Epoch 27/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 0.8607 - accuracy: 0.7123 - val_loss: 1.7325 - val_accuracy: 0.5100\n", + "Epoch 28/100\n", + "1407/1407 [==============================] - 13s 9ms/step - loss: 0.8528 - accuracy: 0.7139 - val_loss: 1.8382 - val_accuracy: 0.5002\n", + "157/157 [==============================] - 1s 2ms/step - loss: 1.4835 - accuracy: 0.5016\n" + ], + "name": "stdout" + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[1.4834871292114258, 0.5016000270843506]" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 138 + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "AsDa8NPNLOpn" + }, + "source": [ + "이 모델은 검증 세트에서 50.8% 정확도에 도달합니다. 드롭아웃이 없을 때보다(51.4%) 조금 더 나쁩니다. 하이퍼파라미터 탐색을 좀 많이 수행해 보면 더 나아 질 수 있습니다(드롭아웃 비율 5%, 10%, 20%, 40%과 학습률 1e-4, 3e-4, 5e-4, 1e-3을 시도했습니다). 하지만 이 경우에는 크지 않을 것 같습니다." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "bNuTrZp6LOpn" + }, + "source": [ + "이제 MC 드롭아웃을 사용해 보죠. 앞서 사용한 `MCAlphaDropout` 클래스를 복사해 사용하겠습니다:" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "xUA1Qi9lLOpn" + }, + "source": [ + "class MCAlphaDropout(keras.layers.AlphaDropout):\n", + " def call(self, inputs):\n", + " return super().call(inputs, training=True)" + ], + "execution_count": 139, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "HgMCXU-lLOpn" + }, + "source": [ + "방금 훈련했던 모델과 (같은 가중치를 가진) 동일한 새로운 모델을 만들어 보죠. 하지만 `AlphaDropout` 층 대신 `MCAlphaDropout` 드롭아웃 층을 사용합니다:" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "TrqaKF9PLOpn" + }, + "source": [ + "mc_model = keras.models.Sequential([\n", + " MCAlphaDropout(layer.rate) if isinstance(layer, keras.layers.AlphaDropout) else layer\n", + " for layer in model.layers\n", + "])" + ], + "execution_count": 140, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "cqVC9dDXLOpn" + }, + "source": [ + "그다음 몇 가지 유틸리티 함수를 추가합니다. 첫 번째 함수는 모델을 여러 번 실행합니다(기본적으로 10번). 그다음 평균한 예측 클래스 확률을 반환합니다. 두 번째 함수는 이 평균 확률을 사용해 각 샘플의 클래스를 예측합니다:" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "_eXR7S5ILOpo" + }, + "source": [ + "def mc_dropout_predict_probas(mc_model, X, n_samples=10):\n", + " Y_probas = [mc_model.predict(X) for sample in range(n_samples)]\n", + " return np.mean(Y_probas, axis=0)\n", + "\n", + "def mc_dropout_predict_classes(mc_model, X, n_samples=10):\n", + " Y_probas = mc_dropout_predict_probas(mc_model, X, n_samples)\n", + " return np.argmax(Y_probas, axis=1)" + ], + "execution_count": 141, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Iuu0Qr59LOpo" + }, + "source": [ + "이제 검증 세트의 모든 샘플에 대해 예측을 만들고 정확도를 계산해 보죠:" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "5HPOAWCLLOpo", + "outputId": "91fd39e3-2205-42aa-af3f-80a7af4b1867", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "keras.backend.clear_session()\n", + "tf.random.set_seed(42)\n", + "np.random.seed(42)\n", + "\n", + "y_pred = mc_dropout_predict_classes(mc_model, X_valid_scaled)\n", + "accuracy = np.mean(y_pred == y_valid[:, 0])\n", + "accuracy" + ], + "execution_count": 142, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "0.5024" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 142 + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "-eNayl7MLOpo" + }, + "source": [ + "이 경우에는 실제적인 정확도 향상이 없습니다(50.8%에서 50.9%).\n", + "\n", + "따라서 이 연습문에서 얻은 최상의 모델은 배치 정규화 모델입니다." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "XvfEibM4LOpo" + }, + "source": [ + "### f.\n", + "*문제: 1사이클 스케줄링으로 모델을 다시 훈련하고 훈련 속도와 모델 정확도가 향상되는지 확인해보세요.*" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "93aZAECALOpo" + }, + "source": [ + "keras.backend.clear_session()\n", + "tf.random.set_seed(42)\n", + "np.random.seed(42)\n", + "\n", + "model = keras.models.Sequential()\n", + "model.add(keras.layers.Flatten(input_shape=[32, 32, 3]))\n", + "for _ in range(20):\n", + " model.add(keras.layers.Dense(100,\n", + " kernel_initializer=\"lecun_normal\",\n", + " activation=\"selu\"))\n", + "\n", + "model.add(keras.layers.AlphaDropout(rate=0.1))\n", + "model.add(keras.layers.Dense(10, activation=\"softmax\"))\n", + "\n", + "optimizer = keras.optimizers.SGD(lr=1e-3)\n", + "model.compile(loss=\"sparse_categorical_crossentropy\",\n", + " optimizer=optimizer,\n", + " metrics=[\"accuracy\"])" + ], + "execution_count": 143, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "Hy3EYeBLLOpo", + "outputId": "4740e094-df47-4ef0-ac39-6c0bc5a93aa5", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 376 + } + }, + "source": [ + "batch_size = 128\n", + "rates, losses = find_learning_rate(model, X_train_scaled, y_train, epochs=1, batch_size=batch_size)\n", + "plot_lr_vs_loss(rates, losses)\n", + "plt.axis([min(rates), max(rates), min(losses), (losses[0] + min(losses)) / 1.4])" + ], + "execution_count": 144, + "outputs": [ + { + "output_type": "stream", + "text": [ + "352/352 [==============================] - 2s 5ms/step - loss: nan - accuracy: 0.1251\n" + ], + "name": "stdout" + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "(9.999999747378752e-06,\n", + " 9.999868392944336,\n", + " 2.6116409301757812,\n", + " 3.931788035801479)" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 144 + }, + { + "output_type": "display_data", + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYkAAAERCAYAAACO6FuTAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAgAElEQVR4nO3deXxU5dn/8c+VnZCQQBLCGiIg+yKCirIq7lVb971aa23VPi59bK1PN9vaWqxtf7V1o+2jdV8qPloFXAAF3NgEFWSXsAphFQgEyFy/P2aCMc1AQpI5mcn3/XqdFzPn3HNy3Zkw19zLObe5OyIiIjVJCjoAERFpupQkREQkKiUJERGJSklCRESiUpIQEZGolCRERCSqlKADaEj5+fleXFwcdBgi0gg27ihnwxd76NchB7Ogo0ksc+bM2eTuBTUdS6gkUVxczOzZs4MOQ0Qawf1Tl/H71xbz7l2nk56SHHQ4CcXMSqIdU3eTiIhEpSQhIiJRKUmIiEhUShIiIhKVkoSIiESlJCEiIlEpSYiISFRKEiIiEpWShIiIRKUkISIiUSlJiIhIVEoSIiISlZKEiIhEpSQhIiJRKUmIiEhUShIiIhJVTJOEmT1hZuvN7AszW2Jm10YpZ2Z2l5mtNbPtZvaWmfWNZawiIhL7lsTdQLG7twLOAe4ys8E1lLsQuAYYAbQB3gMej1mUIiICxDhJuPsCdy+vfBrZutVQ9AhghruvcPcK4AmgT4zCFBGRiJiPSZjZA2ZWBiwC1gMTaij2DNDNzHqYWSpwFTApyvmuM7PZZja7tLS00eIWEWmOYp4k3P0GIJtwV9J4oLyGYuuBGcBiYDfh7qdbo5xvnLsPcfchBQUFjRO0iEgzFcjsJnevcPcZQCfg+hqK/Bw4BugMZAC/BKaYWWbsohQRkaCnwKZQ85jEUcCz7r7G3fe7+6NAazQuISISUzFLEmbW1swuMbMsM0s2s9OAS4HJNRSfBVxoZoVmlmRmVwKpwLJYxSsiIuFv8rHihLuWHiKcnEqAW9z9ZTMrAhYCfdx9FTAWaAvMA1oSTg7nu/u2GMYrItLsxSxJuHspMCrKsVVAVpXne4AbI5uIiAQk6DEJERFpwpQkREQkKiUJERGJSklCRESiUpIQEZGolCRERCQqJQkREYlKSUJERKJSkhARkaiUJEREJColCRERiUpJQkREolKSEBGRqJQkREQkKiUJERGJSklCRESiUpIQEZGolCRERCQqJQkREYlKSUJERKJSkhARkaiUJEREJColCRERiSqmScLMnjCz9Wb2hZktMbNrD1K2q5m9YmY7zGyTmd0Ty1hFRCT2LYm7gWJ3bwWcA9xlZoOrFzKzNOANYArQDugEPBHLQEVEJMZJwt0XuHt55dPI1q2GolcD69z9j+6+y933uPtHsYpTRETCYj4mYWYPmFkZsAhYD0yoodhQYKWZTYx0Nb1lZv2jnO86M5ttZrNLS0sbMXIRkeYn5knC3W8AsoERwHigvIZinYBLgPuADsCrwEuRbqjq5xvn7kPcfUhBQUHjBS4i0gwFMrvJ3SvcfQbhZHB9DUV2AzPcfaK77wXuBfKA3jEMU0Sk2Qt6CmwKNY9JfER4vEJERAIUsyRhZm3N7BIzyzKzZDM7DbgUmFxD8SeAoWZ2spklA7cAm4BPYxWviIjEtiXhhLuW1gBbCXch3eLuL5tZkZntNLMiAHdfDFwBPBQp+3XgnEjXk4iIxEhKrH6Qu5cCo6IcWwVkVds3nvDAtoiIBCToMQkREWnClCRERCQqJQkREYlKSUJERKJSkhARkaiUJEREJColCRERiUpJQkREolKSEBGRqJQkREQkKiUJERGJSklCRESiUpIQEZGolCRERCQqJQkREYlKSUJERKJSkhARkaiUJEREJColCRERiUpJQkREolKSEBGRqJQkREQkKiUJERGJKqZJwsyeMLP1ZvaFmS0xs2tr8ZrJZuZmlhKLGEVE5EuxbkncDRS7eyvgHOAuMxscrbCZXQ6kxio4ERH5qpgmCXdf4O7llU8jW7eayppZDvAL4EcxCk9ERKqJ+ZiEmT1gZmXAImA9MCFK0d8CDwKfH+J815nZbDObXVpa2rDBiog0czFPEu5+A5ANjADGA+XVy5jZEGAY8JdanG+cuw9x9yEFBQUNHa6ISLMWyOwmd69w9xlAJ+D6qsfMLAl4ALjZ3fcHEZ+IiIQFPQU2hf8ck2gFDAGeNbPPgVmR/WvMbEQsgxMRae5iNq3UzNoCJwGvALuBk4FLI1tV24EOVZ53BmYCgwENOoiIxFAsrz1wwl1LDxFuwZQAt7j7y2ZWBCwE+rj7KqoMVptZRuThBnU/iYjEVsyShLuXAqOiHFsFZEU5thKwxotMRESiCXpMQkREmjAlCRERiareScLMdNsMEZEEVackYWY3mdn5VZ7/A9htZovNrGeDRyciIoGqa0viJiLTUM1sJHARcBkwD/hDw4YmIiJBq+vspo7AZ5HHZwPPu/tzZvYxML1BIxMRkcDVtSXxBdA28vgUYHLk8T4go8ZXiIhI3KprS+J14G9mNhfoDkyM7O/Lly0MERFJEHVtSdwIvAMUABe4+5bI/qOBpxsyMBERCV6dWhLu/gXwXzXs/0WDRSQiIk1GXafA9qk61dXMTomsW32HmSU3fHgiIhKkunY3/S8wCMDMOgMvAW0Id0Pd1bChiYhI0OqaJHoBcyOPLwA+cPczgSv5z1t+i4hInKtrkkgG9kYej+HL9amXA4UNFZSIiDQNdU0SnwDXR1aIGwNMiuzvCGxqyMBERCR4dU0StwPfAd4Cnnb3jyP7zyG8epyIiCSQuk6BnWZmBUArd99a5dDDQFmDRiYiIoGr88p07l5hZrvNrB/hJUmXR1aPExGRBFPX6yRSzOz3wFZgPvAxsNXM7tG6EiIiiaeuLYl7CE91/R4wI7JvBHA34YRzW8OFJiIiQatrkrgMuMbdJ1TZt9zMSoG/oyQhIpJQ6jq7KYfwNRHVLQdy6x+OiIg0JXVNEvMJr05X3c2RYyIikkDqmiR+BFwVWdP6n5FtMXAFtehqitwMcL2ZfWFmS8zs2ijlrjKzOZFyayID43WeiSUiiSMUcgCSzQKOpHmpU5Jw92lAD+BfQFZkex44jZpbGNXdDRS7eyvCF+DdZWaDayiXCdwC5APHEb66W+MdIs1YhYeTRJKSREwdznUS64CfVN1nZgOB82vx2gVVn0a2bsCcauUerPJ0rZk9CZxY11hFJHFUtiSSkpQkYqmu3U31ZmYPmFkZsAhYz5c3CTyYkcCCmg6Y2XVmNtvMZpeWljZgpCLSlFS4k6wEEXMxTxLufgOQTfj6ivFA+cHKm9k1wBDg3ijnG+fuQ9x9SEFBQUOHKyJNREVI4xFBiHmSgPCtPdx9BtAJuD5aOTP7BuFxjDPcXXeZFWnGQu4kBfKJ1bzVakzCzF4+RJFW9fj53aL8zNOBvwFfq3K3WRFppkIhV0siALUduN5ci+OfHayAmbUFTgJeAXYDJxO+xcd/rGhnZicBTwLnurtuQS4iVLhrZlMAapUk3P1bDfCznHDX0kOEu7lKgFvc/WUzKwIWAn3cfRXwM8JXd0+wL/8oprv7GQ0Qh4jEoVDINbMpADG7QM3dS4FRUY6tInzNReVzTXcVka/Q7KZgaBhIROJCRUgX0gVBSUJE4kIo5CTrEyvm9CsXkbgQcs1uCoKShIjEhQp3TEki5pQkRCQuhLublCRiTUlCROJChaMkEQAlCRGJC6GQoxwRe0oSIhIXKtTdFAglCRGJCyHdliMQShIiEheUJIKhJCEicUHdTcFQkhCRuFDhWro0CEoSIhIXwutJBB1F86MkISJxQd1NwVCSEJG4oIHrYChJVLFs4w6ueXQWn6zdHnQoIlKNkkQwYrboUFBmrdzCzvL9nNiz7Vf2r9++m/ysdFKTk/hgxWbK9lZw98RPWbJhJ3NKtvLqTcPp1DozoKhFpLqKkJOeoiQRawmfJH7174Ws2VrGrJ+cTEpyEo+/t5J/z1/PzJVbOKlXW648vgvfemQWABmpSfz23P786pUF3PnyQs4a0J6123Zz3ciuzFi6ifdXbOaGE7uT0yI12EqJNEOa3RSMhE4SO/bsY8G67YQc5pRsZVBRa3796qe0a5XBOQM78PL8dcxYuokueZn87rwB9OvYiuyMVEp3lPOnN5fw5qcbAHhr8Ubmr97O3ooQb3y6gQk3jSAjNTng2ok0L5rdFIyEThJzV20j5OHHry/cQHpqMnv3h/jxGb04s397RhyZz9MzV3HrKT04vlvegdfdeGI3BndpTasWKcz8bAt/m76CY49ow4VDOnHzM/P465Rl3HZaz6/8rD37Knhn2SaSkoy05CT6dchhzbYyFqz9gnY5GQzvnq9vQSL1oNlNwUjoJDHrsy0kJxlHdc7lnWWb6JjbAoCji1oDcOGQzlw4pPN/vC4lOYnhR+YDMKBTLteO6Hrg2NtLSvnr1GXsLN/PmN5tyUhN5t/z1/Hi3LXsKN8fNZYz+rXjf87sTec2GucQORwauA5GQieJBeu206Mwm2Hd8/nrlKW8u3wTHXIyaJeTcdjnHHv+AFplpPLP91by6LsrgfA97r9xVEe+MagDLVKT2b2vglkrt9KlTSaDinJ5feEGfv/aYiZ/upGnrxtKj8IssjM0riFSF0oSwUjoJLFqSxk9CrM5qnMOIYc3P93I2QM71OucqclJ3HlOXy47rojSHeVsLdtLt4Iserdv9ZVyI44sOPD4e6OyOHtgBy566D0ueOhdAM7s357fnttfg+AitaTupmDENEmY2RPAGKAl8Dlwj7v/PUrZW4HbgUzgX8D17l5e258VCjmrt+7m5N6FDOiUe2D/eYM61qMGX+pRmE2Pwuxal++Y24IHLj+acdNXUJidwePvr2TWZ1s4o187rjqhmK4FWQ0Sl0iiCml2UyBi3ZK4G/i2u5ebWS/gLTP70N3nVC1kZqcBPwZOAtYBLwK/jOyrlY07ytm7P0TnNpnkZ6Uf2D+yR8FBXtW4BnbO5f7LjgbgzP7teHjaCp6etZrH3i9h6BF5DOuex9CueQzsnMuOPfvJTEvWLCqRiArNbgpETJOEuy+o+jSydQPmVCt6FfCPyvJm9mvgSeqQJFZtKQOgKDJQ/MS3jyMjNanJNFeHFLdhSHEbSneU8/j7Jby+4HPufX0JANkZKewq30/LtBS+NayY743uRmZaQvcMihxSRcjVkghAzD95zOwB4GqgBfAhMKGGYn2Bl6o8nw8Umlmeu2+udr7rgOsAioqKAHh9wef8/rXFwJdJonK2UlNTkJ3OD07pwQ9O6cHWXXv54LPNTFm0kTYt01m1ZRf3TVnGc7PXcGRhFm1aptGlTSbnHt2JZDM6t2nBF7v3M7tkCzv27Ccl2ejfMYeiNpmYBvgkwbg7yfq7jrmYJwl3v8HM/gs4HhgN1DTOkAVUvYFS5eNs4CtJwt3HAeMAhgwZ4i/NW8vNz8w7cLxDZNprPGjdMo3T+7Xn9H7tD+ybtXIL901eyo49+1m5eRevfLSe+6YsA6CwVTpbd+1jb0XoK+fJzUzlmOI2jDwyn5E9CuiS1zKm9RBpDBWa3RSIQPow3L0CmGFmVwDXA/dVK7ITqDpdqPLxjkOde27JVrLTU+jaNouy8v2kpcT3PQyPKW7D498+7sDzddt289K8daSnJPHh6m10yMlgTO9C8rPS2L2vgo/WbGfeqm1MW1rKGws3kJocnp47skcBJ3TLI6/K+IxIPKkIaeA6CEF3dKcQHpOobgEwEHgu8nwgsKF6V1NNtpTtIz87nRevP4F9odChisedDrktuH50Tb+ysL4dcrj02CLcndVbdnP/1GVM+GQ9z89ZA0Cf9q04d1BHLj2uiKz0oN9+kdoLuZMc39/54lLMPiXMrC3h2UqvALuBk4FLI1t1jwGPmtmThGc3/RR4tDY/Z+uuvbTOTCUpyUhPar4zg8yMorxMxl4wgN+c24+P127nnWWbmLq4lN9M+JT7piyla0EWo3oU8O1hR5CTqes1pGkLz25SSyLWYvlV0gl3LT1EeB2LEuAWd3/ZzIqAhUAfd1/l7pPM7B5gKuEB7heAX9Tmh2zZtZf29biiOhGlJCcxqKg1g4pa8/2TjmT+6m08+UEJKzeV8ZcpS3nqgxIuP64LJ3TLo1/HHFqqhSFNUEizmwIRs08Ddy8FRkU5torwYHXVfX8E/ljXn7O1bC99OrQ6dMFmbGDnXAZ2Dl9guGDddn43cRF/nryUP09eSpKFLxQ8prgNZw1oT7e2WV+5zkQkKCHNbgpEwn1l3Fq2lzYt04IOI2707ZDD498+js07y5m/ZhvzVm9n/upt/GvOGh5/vwQzOLFnW8aeP4CCbCULCU6FqyURhIRKEiGHPftCtM5UkqirvKx0TupVyEm9CgHYXraPWSu38NHa7YybtpxhY6cwvHs+143sytCueYc4m0jDC4XQFNgAJFSSqIjMZmqtQdh6y8lM5eQ+hZzcp5CzBrTn2VmreXn+Oi4Z9z7nDerI1cOKv3JPLJHGVqHZTYFIqCSxP7LCUGt1NzWoHoXZ/OysPvzwtJ786Y0lPP5+CeM/XEvb7HS6FrSka0EWJ3TLY0yvQlqkNd8ZZdK4NLspGAmVJCoqwklCYxKNIyM1mTvO7M2NJ3XnpXnrmLdqG59t2skr89fx1AeraJmWzDXDj+DaEV11C3RpUKHIF0CNScReQiWJAy0JjUk0qlYZqVw5tAtXDu0ChL/hfbBiM0/NXMVfpizj79M/4xuDOnDF0C707ZATcLSSCEIe/r+tlkTsJVSS0JhEMJKTjBO653NC93y+N2o7T7xfwosfruXpmas5uiiXq04o5sz+7UlVh7IcpgpXSyIoCfW/ds/+EC3TkslVSyIw/Trm8LvzB/DBHSfz87P6sLVsHzc/M48RY6dy/9RlbNm1N+gQJQ5V3mFHs5tiL6FaErvK93P6EW2azJoRzVlOZirXDD+Cq08o5q0lG3nknZX8/rXF3D91GVcO7cKongUc3zVPtzSXWqlsSagxGnsJlSTK94c4vpvm8DclSUl24PqLJRt28MfXlzBu+goenraCgZ1z+c6II/ha//ZKFnJQFZUD1/o7ibmEShIAJ3RrmosLSXgq7UNXDqZs737Gz13LI+98xvef+pB/913HtSO60r9jjpZrlRpVzm5SL0HsJVSSyGmRSu/2um9TU5eZlsIVQ7tw6bFFPPT2cu6fuozXFmwgPyuN74zoyuVDu+g25vIVB2Y3KUnEXEL18BW1ydQfURxJTjJuPLE7M24/iYeuGEzv9q24e+Iihv52Mn+ZvJQ9+yqCDlGaiMoxCXVLxp6+rkng2rRM4/R+7Ti9Xzvmr97Gg28t5w9vLOHhaSs4tW8h1w7vqjv7NnOVs5t0nUTsKUlIkzKwcy4PXTmY91dsZvzcNUz8+HNe/HAt5x7VkR+c2oNOrTODDlECoNlNwVGSkCZpaNc8hnbN4ydn9uGBt5fxyDsreeXj9Vx9QjG3nHwkmWn6021OQprdFBjlZWnScjJTueOM3rx122jOGdiBv01fwZl/ns5zs1azvyLx1jCXmlVodlNglCQkLnTIbcG9Fw7kqWuHkpGazI9e+Ihz/voOc0q2Bh2axIBmNwVHSULiyvHd8ph48wgevPxotpbt5fwH3+XWZ+cxp2QrHvkgkcQT0uymwKhjV+KOmXFG//aM7FHAfZOX8th74RsK9mqXzQ9P68lJvdrqwyTBVGh2U2DUkpC41TI9hTvO7M2sn57M787rz559FXz7n7O54h8fULJ5V9DhSQP6ckwi4ECaIf3KJe5lpadwybFFvPGDUfz6632Zv3o7J977Ft97fA7vr9isbqgEUNndpNlNsafuJkkYqclJXHl8Maf0acc/31vJ0zNXMWnB5/Rql823hhXz9aM66t5QcUqzm4ITs5aEmaWb2T/MrMTMdpjZPDM7I0pZM7O7zGytmW03s7fMrG+sYpX41i4ng9tP78X7d4xh7Pn9Abj9hY8Zevdkxk5axNptuwOOUOoqpEWHAhPL7qYUYDUwCsgBfgo8Z2bFNZS9ELgGGAG0Ad4DHo9JlJIwMlKTufiYIibePIJnrhvK0CPyePjt5Yy8Zyo3PDmHZRt3Bh2i1JK6m4ITs+4md98F3Fll1ytm9hkwGFhZrfgRwAx3XwFgZk8At8YgTElAZnbgCu41W8t44v1VPPlBCW8s3MCpfdpx++m9KMrT7T6aMs1uCk5gA9dmVgj0ABbUcPgZoJuZ9TCzVOAqYFKU81xnZrPNbHZpaWnjBSwJoVPrTH58Ri+m3jaaK4cWM21JKaf/eRp/eH0x28q0tGpTdWDRIU21iblAfuWRD/4ngX+6+6IaiqwHZgCLgd2Eu59qbEm4+zh3H+LuQwoKChorZEkw+Vnp/PzsPky6dSSjexbwlynLGDF2Kn9UsmiSDlxxrZZEzMU8SZhZEuHxhb3A96MU+zlwDNAZyAB+CUwxM/UJSIPqmNuCBy4fzKRbRjCiRz73VSaLN5awffe+oMOTCM1uCk5Mk4SFL4P9B1AInO/u0f4XHgU86+5r3H2/uz8KtAb6xCZSaW56tWvFA5cPZuLNIxh+ZD73TV7K8LFTuPe1xZoN1QRodlNwYt2SeBDoDZzt7gf7nzcLuNDMCs0sycyuBFKBZbEIUpqv3u1b8eAVg3n1puEc3zWP+99axqh7pnLb8/NZsG570OE1W5rdFJyYzW4ysy7Ad4Fy4PMq99b5LjAdWAj0cfdVwFigLTAPaEk4OZzv7ttiFa80b3075DDum0NYs7WMh99ewQtz1zB+7houGtKZ60d3o0tey6BDbFY0uyk4sZwCWwIc7B3OqlJ2D3BjZBMJTKfWmfz6G/247bSe/OmNJTw9cxUvfriWW0/pwbXDjyBFNxOKCc1uCo5+5SK1kNMilTvP6cvbPzyRUT0K+N3ERXz9/nf4QPeGigmtJxEcJQmROmiXk8HDVw7mwcuPZuOOci4e9z4XPPQeby7ccGCJTWl4B2Y3qbsp5nSDP5E6qlzPYnTPtjw/ZzUPv72Cax+bTc/CbL43uivnDOyob7wNTIsOBUctCZHD1CItmW8eX8xbPxzNny4eiOPc+ux8Tv9/03hj4QZ1QzUgdTcFRy0JkXpKTU7i3EGd+PrAjkxa8Dn3vraY7zw2m74dWtGvQw4XHdOJo4ta61twPWh2U3CUJEQaSFKScWb/9pzSp5DnZ6/hX3NWM+Hj9Tw7ezUDO+XwrWFHcGb/9qSlqAFfVyHNbgqMkoRIA0tNTuKy44q47LgidpXvZ/zcNTzy7kpueXYe90xaxA9O7cm5gzRuURcV6m4KjPKySCNqmZ7ClccX8+ato3jk6mMoyE7ntufn87X7pjPh4/Xs2VcRdIhxQbObgqOWhEgMJCUZJ/Zqy+ieBbz68XrumbSYG56cS35WOt8d2ZWLj+1Mq4zUoMNssjS7KThKEiIxZGacNaADp/dtxzvLNzNu2nJ+M+FTfjdpEaf0LuS/xnSnb4ecoMNscvZXqLspKEoSIgFISU5iVI8CRvUoYN7qbUz4eD1Pz1zFpAWfc3LvQi4Y3IlRPQpokZYcdKhNwoYv9pCWkkRuC7W2Yk1JQiRgR3XO5ajOudx4Ynf++e5K/jHjM978dAPZ6Sl8bUB7hh+ZzxH5LenTvlWz7W4p2VxG59YtdKvwAChJiDQROS1SuWnMkVw/uhszP9vC+LlreXn+Op6ZtRqA7m2zuOK4Is4b3KnZjV+s3LyLYt15NxBKEiJNTGpyEsO65zOsez53n9efJRt2sGDddp6auZo7/72QuycuYlj3fIZ2bUOn1pkM65ZPTmbiJg13Z9WWMo7vlhd0KM2SkoRIE5aWkkS/jjn065jDxccU8dGabYyfu5bJizYwZdFGAFKTjdE923LeoI6c2KstGamJNY6xaedeyvZW0KWNVi8OgpKESBwZ0CmXAZ1y+cXZffhi936Wle5g4sef89L8dbyxMDyOcUqfQs4a2J7h3QsS4uruks27AOiSr+6mIChJiMQhMyMnM5XBXdowuEsbfnxGL95ZvplX5q/jtQWfM/7DteRnpXPxMZ247LgudMxtEXTIh23Jhp0AGpMIiCXSnSqHDBnis2fPDjoMkUDt3R9i+tJSnp656kCX1Em92nL50C4M755PapytpnfZ395n3bbdTL1tdLOd3dXYzGyOuw+p6ZhaEiIJJi0liTG9CxnTu5C123bz9AereGbWKt78dCPZ6Smc2rcdXz+qA0O75jX57qh123bz3orN3DzmSCWIgChJiCSwjrktuO20ntw05kimLNrI5E83MOmTz3lh7hoy05I5vmseI3sUcGLPthTlNa2B4VDI+dn/fUJKknHeoE5Bh9NsqbtJpJnZs6+CaUtKmba0lOlLN1GyuQyA0T0L6N8xh3Y5GQzslEvPdtmBdU3tqwhx2/PzeWneOn55Tl+uOqE4kDiaC3U3icgBGanJnNq3Haf2bQeEZw+9MGcNL81fx1uLSw+US09Jom+HVgzoFL4i/Ij8luRlpdEyLYXUlCSy0hvv4+MPry/hpXnr+OFpPfnm8V0a7efIoaklISIHVISctVt3M3/NNuav3sb8Ndv4eO129uwL/UfZjrktOKa4NbmZaaSnJJGXlcbWsn20zkyle9sskpOS2LOvgg45LejUugW5makHHVeoCDmTP93AL15ewPrte7j02M7cfd6AxqyuRBysJZFQSSI7O9sHDx4cdBgiCcUx9mXmsz+9FRUpmXhyKp6UQnnLdpRntceT0wglpUBSCngIrOYuKqvYS0r5F1hoH+YhkvftInlfGbhTkdqCPTldCKW0IHXXRrI3fkTWxo9Icq23EQtvv/1280gSZrYDWFzP0+QA2+tZrqZjh9pX/XhNx/KBTbWI7WCaWv2q7o+n+tXm/ar+OFb1q2vdatofRP0a672raX9d6xdPf5vV99Wmrl3cvaDGn+juCbMBsxvgHOPqW66mY4faV/14TccSsX7VysRN/WrzfgVVv7rWranUr7Heu4aoXzz9bR6sPoc6VtPWtCdJB+PfDVCupmOH2lf9+MGO1UdTq19D1q0u56tv/Wr7fp/EfM0AAAf2SURBVAVRv7rWrab9QdSvsd67mvYnUv0a9bMl0bqbZnuUfrVEoPrFN9UvfiVy3Q4l0VoS44IOoJGpfvFN9YtfiVy3g0qoloSIiDSsRGtJiIhIA1KSEBGRqJpdkjCzYjMrNbO3IlvNc4PjnJldamalhy4ZP8ys0MzeNbO3zWyKmbUPOqaGZGbHmtl7ZjbNzJ42s4Rak9TMcsxsppntNLN+QcfTEMxsrJlNN7PHE+39qtTskkTE2+4+OrIl1AcpgJklAxcCq4OOpYFtAoa7+yjgMeDbAcfT0FYDJ7n7SGAl8PVgw2lwZcDXgH8FHUhDMLOBQEd3HwEsAi4IOKRG0VyTxLBI9v+tJeZN6i8Fngf+84Y7cczdK9y9sk7ZwIIg42lo7r7e3XdHnu4l8d6/fQn2pewE4PXI40nAsABjaTRNOkmY2ffNbLaZlZvZo9WOtTGzF81sl5mVmNlltTzteqA7MBJoC5zXsFHXXmPUL9KKuAh4thFCrrVGeu8ws6PM7APg+8DcBg671hqrfpHXdwFOpeEvxKtLDI1Wv6amHnVtDXwRebwdaBOjkGOqqd8qfB1wF3AaUH2R3vsJf9sqBI4CXjWz+e6+wMzaAc/UcL5L3P1zoBzAzMYDQ4EXGin+Q2nw+kXO9Zy7hwJuJDXKe+fu84DjzOwi4A7ge41Wg4NrlPqZWSvgceBqd9/XeOEfUmP932uKDquuwDagVaRcDrAlNuHGWH3vRxKLjfAb+GiV5y0Jv3E9qux7HPhdLc6VXeXx3cA3E6x+Ywk3gScR/nZzXwLVLa3K49OAPybYe5cCTADGBF2vxqhflfKPAv2Crlt960o4aTwWefw/wKVB16Extibd3XQQPYD97r6kyr75QN9avHa4mc0xs+lAR+Cpxgiwng67fu5+u7uf6u6nA0vd/abGCvIw1ee9Oyoy82cqcAvw+8YIsJ7qU79LgeOAn0Vm3l3cGAHWU33qh5lNINyV9jczu7rhw2tQB62rh1u1GyKfJX0JrkeiUTX17qZosviyL7DSdsKDmQfl7hOBiY0RVAM67PpV5U3zXjP1ee9mEh5LasrqU7/HCX9Tbcrq9bfp7mc2eESN55B1dfcfxjSiAMRrS2InX/YFVmoF7AgglsaQyPVL5LqB6pdImlNdo4rXJLEESDGzI6vsG0jiTIlM5Polct1A9UskzamuUTXpJGFmKWaWASQDyWaWYWYp7r4LGA/8ysxamtkwwhceNfWm+lckcv0SuW6g+hHn9auqOdX1sAQ9cn6I2QZ3Al5tuzNyrA3wf8AuYBVwWdDxqn7No26qX/zXr7nW9XA23SpcRESiatLdTSIiEiwlCRERiUpJQkREolKSEBGRqJQkREQkKiUJERGJSklCRESiUpIQaUBmdqeZfRJ0HCINRRfTSdyJrB6W7+5nBR1LdWaWBaS7++agY4nGzBy40N0TYq1paVxqSYjUgpml1aacu+8MIkGYWVJk6VqRBqUkIQnHzPqY2atmtsPMNprZ05FlNSuPH2Nmr5vZJjP7wsxmmNnx1c7hZnajmY03s13Abyu7kszsEjNbHjn//5lZfpXXfaW7ycweNbNXzOxmM1trZlvN7BEzy6xSpqWZPWZmO81sg5ndEXnNowep49WR8mdGft5eoPeh6mZmKyMPn4/UcWWVY2dHFuTaY2afmdlvapscJXEpSUhCMbP2wDTgE+BY4GTCi8e8ZGaVf+/ZhO/kOSJSZh4wwczyqp3uF4SXE+1PeK1jgGLgYuBcwiusDQJ+c4iwRgD9IrFUvvbmKsf/AIyK7D+J8O2oR9SiuhnAz4DvAn2AklrU7ZjIv98B2lc+N7PTgCeBvxJeZe0a4ALgt7WIQxJZ0HcY1KatrhvhNZJfiXLsV8DkavtaE76z57FRXmPAeuCKKvsc+Eu1cncCe4CcKvt+AiyrVuaTarGuBpKr7Psb8GbkcRbhVsAlVY63BLZSZb3lGmK+OhLj4EP8rqLV7YJq5aYBP6u27xuEF96xoN9zbcFtaklIohkMjIx0xew0s52EP6QBugGYWVsze9jMlpjZdsIrjbUFiqqda3YN5y9x9+1Vnq+LvPZgFrp7RZTXdANSgZmVBz28jkFtZkjtJ9xSOKAOdatuMPCTar+3pwgnrHYHf6kksnhd41okmiTgVeC2Go5tiPz7T6AQuBVYCZQDk4Hq/e+7ajjHvmrPnUN32x7Oa2qjvFrygdrXrbok4JfA8zUcK61fmBLPlCQk0cwFLiL8jb/6h3Ol4cBN7v4qgJkVEu6fD8JywknkGGBFJJ5MwmMYyw/jfLWp2z7Cq7BVNRfo5e7LDuNnSgJTkpB41crMjqq2bxvhAebvAM+a2VjC34K7Ek4c/+3uOwivXXyFmX1AuDvlHsLjAjHn7jvN7H+BsWa2ifD4wU8Jf7M/nIuYalO3lcAYM3ubcGtkK+GxnFfMrAR4jnBXVj/C4zg/Oow4JEFoTELi1Qjgw2rbve6+DhgGhIBJhBetv59wt0t55LXXEB4wngM8A/wv4Q/OoNwGTAdeBqYCHxEeD9lzGOeqTd3+GziR8FjNhwDu/hrwtcj+mZHtx4SX7JRmTFdcizQxZpZOeDrr7939D0HHI82buptEAmZmg4DehL+9ZwO3R/59Nsi4REBJQqSp+AHQky+ntY509zXBhiSi7iYRETkIDVyLiEhUShIiIhKVkoSIiESlJCEiIlEpSYiISFRKEiIiEtX/B/3VwjtP7gEsAAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "tags": [], + "needs_background": "light" + } + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "G8aLsTeKLOpo" + }, + "source": [ + "keras.backend.clear_session()\n", + "tf.random.set_seed(42)\n", + "np.random.seed(42)\n", + "\n", + "model = keras.models.Sequential()\n", + "model.add(keras.layers.Flatten(input_shape=[32, 32, 3]))\n", + "for _ in range(20):\n", + " model.add(keras.layers.Dense(100,\n", + " kernel_initializer=\"lecun_normal\",\n", + " activation=\"selu\"))\n", + "\n", + "model.add(keras.layers.AlphaDropout(rate=0.1))\n", + "model.add(keras.layers.Dense(10, activation=\"softmax\"))\n", + "\n", + "optimizer = keras.optimizers.SGD(lr=1e-2)\n", + "model.compile(loss=\"sparse_categorical_crossentropy\",\n", + " optimizer=optimizer,\n", + " metrics=[\"accuracy\"])" + ], + "execution_count": 145, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "MoHWUDtBLOpp", + "outputId": "10416f81-f6d1-4e78-8382-15ca285a9a4e", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "n_epochs = 15\n", + "onecycle = OneCycleScheduler(len(X_train_scaled) // batch_size * n_epochs, max_rate=0.05)\n", + "history = model.fit(X_train_scaled, y_train, epochs=n_epochs, batch_size=batch_size,\n", + " validation_data=(X_valid_scaled, y_valid),\n", + " callbacks=[onecycle])" + ], + "execution_count": 146, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Epoch 1/15\n", + "352/352 [==============================] - 3s 5ms/step - loss: 2.2329 - accuracy: 0.2356 - val_loss: 1.7666 - val_accuracy: 0.3756\n", + "Epoch 2/15\n", + "352/352 [==============================] - 2s 5ms/step - loss: 1.7975 - accuracy: 0.3638 - val_loss: 1.6788 - val_accuracy: 0.4198\n", + "Epoch 3/15\n", + "352/352 [==============================] - 2s 5ms/step - loss: 1.6467 - accuracy: 0.4186 - val_loss: 1.6238 - val_accuracy: 0.4298\n", + "Epoch 4/15\n", + "352/352 [==============================] - 2s 5ms/step - loss: 1.5460 - accuracy: 0.4497 - val_loss: 1.7127 - val_accuracy: 0.4134\n", + "Epoch 5/15\n", + "352/352 [==============================] - 2s 5ms/step - loss: 1.4914 - accuracy: 0.4703 - val_loss: 1.6201 - val_accuracy: 0.4420\n", + "Epoch 6/15\n", + "352/352 [==============================] - 2s 5ms/step - loss: 1.4389 - accuracy: 0.4839 - val_loss: 1.5667 - val_accuracy: 0.4528\n", + "Epoch 7/15\n", + "352/352 [==============================] - 2s 5ms/step - loss: 1.4040 - accuracy: 0.5038 - val_loss: 1.5114 - val_accuracy: 0.4684\n", + "Epoch 8/15\n", + "352/352 [==============================] - 2s 5ms/step - loss: 1.3507 - accuracy: 0.5187 - val_loss: 1.4993 - val_accuracy: 0.4828\n", + "Epoch 9/15\n", + "352/352 [==============================] - 2s 5ms/step - loss: 1.2743 - accuracy: 0.5475 - val_loss: 1.5490 - val_accuracy: 0.4810\n", + "Epoch 10/15\n", + "352/352 [==============================] - 2s 5ms/step - loss: 1.1955 - accuracy: 0.5722 - val_loss: 1.5555 - val_accuracy: 0.4952\n", + "Epoch 11/15\n", + "352/352 [==============================] - 2s 5ms/step - loss: 1.1234 - accuracy: 0.6027 - val_loss: 1.5497 - val_accuracy: 0.5010\n", + "Epoch 12/15\n", + "352/352 [==============================] - 2s 5ms/step - loss: 1.0704 - accuracy: 0.6184 - val_loss: 1.4914 - val_accuracy: 0.5132\n", + "Epoch 13/15\n", + "352/352 [==============================] - 2s 5ms/step - loss: 0.9946 - accuracy: 0.6453 - val_loss: 1.5379 - val_accuracy: 0.5220\n", + "Epoch 14/15\n", + "352/352 [==============================] - 2s 5ms/step - loss: 0.9180 - accuracy: 0.6725 - val_loss: 1.5380 - val_accuracy: 0.5330\n", + "Epoch 15/15\n", + "352/352 [==============================] - 2s 5ms/step - loss: 0.8923 - accuracy: 0.6833 - val_loss: 1.5679 - val_accuracy: 0.5334\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "VfCKlk3BLOpp" + }, + "source": [ + "1사이클 방식을 사용해 모델을 15에포크 동안 훈련했습니다. (큰 배치 크기 덕분에) 각 에포크는 3초만 걸렸습니다. 이는 지금까지 훈련한 가장 빠른 모델보다 3배나 더 빠릅니다. 또한 모델 성능도 올라갔습니다(50.8%에서 52.8%). 배치 정규화 모델이 조금 더 성능이 높지만 훈련 속도가 더 느립니다." + ] } - ], - "source": [ - "n_epochs = 15\n", - "onecycle = OneCycleScheduler(len(X_train_scaled) // batch_size * n_epochs, max_rate=0.05)\n", - "history = model.fit(X_train_scaled, y_train, epochs=n_epochs, batch_size=batch_size,\n", - " validation_data=(X_valid_scaled, y_valid),\n", - " callbacks=[onecycle])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "1사이클 방식을 사용해 모델을 15에포크 동안 훈련했습니다. (큰 배치 크기 덕분에) 각 에포크는 3초만 걸렸습니다. 이는 지금까지 훈련한 가장 빠른 모델보다 3배나 더 빠릅니다. 또한 모델 성능도 올라갔습니다(50.8%에서 52.8%). 배치 정규화 모델이 조금 더 성능이 높지만 훈련 속도가 더 느립니다." - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "TensorFlow 2.3 on Python 3.6 (CUDA 10.1)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.9" - }, - "nav_menu": { - "height": "360px", - "width": "416px" - }, - "toc": { - "navigate_menu": true, - "number_sections": true, - "sideBar": true, - "threshold": 6, - "toc_cell": false, - "toc_section_display": "block", - "toc_window_display": false - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} + ] +} \ No newline at end of file