From 0731485df8813ab6118d5086059eaf601256f345 Mon Sep 17 00:00:00 2001
From: JWHan77 <33440610+JWHan77@users.noreply.github.com>
Date: Sun, 21 Oct 2018 22:28:19 +0900
Subject: [PATCH] Update Statoil Kernel as Korean (#38)

* Update Statoil Kernel as Korean
---
 ...arning-with-vgg-16-cnn-aug-lb-0-1712.ipynb | 39 +++++++++----------
 1 file changed, 19 insertions(+), 20 deletions(-)

diff --git a/Korean/Statoil/transfer-learning-with-vgg-16-cnn-aug-lb-0-1712.ipynb b/Korean/Statoil/transfer-learning-with-vgg-16-cnn-aug-lb-0-1712.ipynb
index 64fef9d..1223ff3 100644
--- a/Korean/Statoil/transfer-learning-with-vgg-16-cnn-aug-lb-0-1712.ipynb
+++ b/Korean/Statoil/transfer-learning-with-vgg-16-cnn-aug-lb-0-1712.ipynb
@@ -16,7 +16,7 @@
     "import numpy as np # linear algebra\n",
     "import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)\n",
     "\n",
-    "# Input data files are available in the \"../input/\" directory.\n",
+    "# \"../input/\" 경로로 데이터 파일을 input 할 수 있습니다.\n",
     "# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory\n",
     "\n",
     "#from subprocess import check_output\n",
@@ -131,7 +131,7 @@
     "test['inc_angle']=pd.to_numeric(test['inc_angle'], errors='coerce')\n",
     "X_test_angle=test['inc_angle']\n",
     "\n",
-    "#Generate the training data\n",
+    "# training data를 생성합니다.\n",
     "X_band_1=np.array([np.array(band).astype(np.float32).reshape(75, 75) for band in train[\"band_1\"]])\n",
     "X_band_2=np.array([np.array(band).astype(np.float32).reshape(75, 75) for band in train[\"band_2\"]])\n",
     "X_band_3=(X_band_1+X_band_2)/2 #band_1과 band_2 각 행렬의 값을 더한 평균값으로 만듦. 그래서 band_1과 bnad_2의 크기가 같아야됨.\n",
@@ -193,36 +193,37 @@
     "                         rotation_range = 10) # 지정된 각도 범위내에서 임의로 원본이미지를 회전\n",
     "\n",
     "# 두 generators를 합치는 함수입니다\n",
-    "# We use the exact same generator with the same random seed for both the y and angle arrays\n",
+    "# y와 angle array 모두에 동일한 랜덤 시드의 같은 제네레이터를 사용해야합니다\n",
+
     "def gen_flow_for_two_inputs(X1, X2, y):\n",
     "    genX1 = gen.flow(X1,y,  batch_size=batch_size,seed=55)\n",
     "    genX2 = gen.flow(X1,X2, batch_size=batch_size,seed=55)\n",
     "    while True:\n",
     "            X1i = genX1.next()\n",
     "            X2i = genX2.next()\n",
-    "            #Assert arrays are equal - this was for peace of mind, but slows down training\n",
+    "            #array는 동일합니다 - 마음은 안정되었으나 training 속도가 느려졌습니다.\n",
     "            #np.testing.assert_array_equal(X1i[0],X2i[0])\n",
     "            yield [X1i[0], X2i[1]], X1i[1]\n",
     "\n",
-    "# Finally create generator\n",
+    "#  generator를 생성합니다\n",
     "def get_callbacks(filepath, patience=2):\n",
-    "   es = EarlyStopping('val_loss', patience=10, mode=\"min\")\n",
+    "   es = EarlyStopping('val_loss', patience=10, mode=\"min\") # 더 이상 개선의 여지가 없으면 학습 조기 종료\n",
     "   msave = ModelCheckpoint(filepath, save_best_only=True)\n",
     "   return [es, msave]\n",
     "\n",
     "\n",
     "def getVggAngleModel():\n",
     "    input_2 = Input(shape=[1], name=\"angle\")\n",
-    "    angle_layer = Dense(1, )(input_2)\n",
-    "    base_model = VGG16(weights='imagenet', include_top=False, \n",
-    "                 input_shape=X_train.shape[1:], classes=1)\n",
+    "    angle_layer = Dense(1, )(input_2) # 출력 뉴런의 수는 1\n",
+    "    base_model = VGG16(weights='imagenet', include_top=False, # weights=\"imagenet\" : pre-training on ImageNet\n",
+    "                 input_shape=X_train.shape[1:], classes=1) \n",
     "    x = base_model.get_layer('block5_pool').output\n",
     "    \n",
     "\n",
     "    x = GlobalMaxPooling2D()(x)\n",
     "    merge_one = concatenate([x, angle_layer])\n",
     "    merge_one = Dense(512, activation='relu', name='fc2')(merge_one)\n",
-    "    merge_one = Dropout(0.3)(merge_one)\n",
+    "    merge_one = Dropout(0.3)(merge_one) # Dropout : 일부 weight만 사용\n",
     "    merge_one = Dense(512, activation='relu', name='fc3')(merge_one)\n",
     "    merge_one = Dropout(0.3)(merge_one)\n",
     "    \n",
@@ -236,8 +237,7 @@
     "                  metrics=['accuracy'])\n",
     "    return model\n",
     "\n",
-    "\n",
-    "#Using K-fold Cross Validation with Data Augmentation.\n",
+    "# 데이터 확장과 함께 K-fold Cross Validation을 사용합니다.\n",
     "def myAngleCV(X_train, X_angle, X_test):\n",
     "    K=3\n",
     "    folds = list(StratifiedKFold(n_splits=K, shuffle=True, random_state=16).split(X_train, target_train))\n",
@@ -255,7 +255,7 @@
     "        X_angle_cv=X_angle[train_idx]\n",
     "        X_angle_hold=X_angle[test_idx]\n",
     "\n",
-    "        #define file path and get callbacks\n",
+    "        # file path와 callbacks를 정의합니다.\n",
     "        file_path = \"%s_aug_model_weights.hdf5\"%j\n",
     "        callbacks = get_callbacks(filepath=file_path, patience=5)\n",
     "        gen_flow = gen_flow_for_two_inputs(X_train_cv, X_angle_cv, y_train_cv)\n",
@@ -269,26 +269,26 @@
     "                validation_data=([X_holdout,X_angle_hold], Y_holdout),\n",
     "                callbacks=callbacks)\n",
     "\n",
-    "        #Getting the Best Model\n",
+    "        # 최적의 모델을 가져옵니다.\n",
     "        galaxyModel.load_weights(filepath=file_path)\n",
-    "        #Getting Training Score\n",
+    "        # Training score를 가져옵니다.\n",
     "        score = galaxyModel.evaluate([X_train_cv,X_angle_cv], y_train_cv, verbose=0)\n",
     "        print('Train loss:', score[0])\n",
     "        print('Train accuracy:', score[1])\n",
-    "        #Getting Test Score\n",
+    "        # Test Score를 가져옵니다.\n",
     "        score = galaxyModel.evaluate([X_holdout,X_angle_hold], Y_holdout, verbose=0)\n",
     "        print('Test loss:', score[0])\n",
     "        print('Test accuracy:', score[1])\n",
     "\n",
-    "        #Getting validation Score.\n",
+    "        # validation Score를 가져옵니다.\n",
     "        pred_valid=galaxyModel.predict([X_holdout,X_angle_hold])\n",
     "        y_valid_pred_log[test_idx] = pred_valid.reshape(pred_valid.shape[0])\n",
     "\n",
-    "        #Getting Test Scores\n",
+    "        # Test Scores를 가져옵니다.\n",
     "        temp_test=galaxyModel.predict([X_test, X_test_angle])\n",
     "        y_test_pred_log+=temp_test.reshape(temp_test.shape[0])\n",
     "\n",
-    "        #Getting Train Scores\n",
+    "        # Train Scores를 가져옵니다.\n",
     "        temp_train=galaxyModel.predict([X_train, X_angle])\n",
     "        y_train_pred_log+=temp_train.reshape(temp_train.shape[0])\n",
     "\n",
@@ -323,7 +323,6 @@
    },
    "outputs": [],
    "source": [
-    "#Submission for each day.\n",
     "submission = pd.DataFrame()\n",
     "submission['id']=test['id']\n",
     "submission['is_iceberg']=preds\n",