From 0731485df8813ab6118d5086059eaf601256f345 Mon Sep 17 00:00:00 2001 From: JWHan77 <33440610+JWHan77@users.noreply.github.com> Date: Sun, 21 Oct 2018 22:28:19 +0900 Subject: [PATCH] Update Statoil Kernel as Korean (#38) * Update Statoil Kernel as Korean --- ...arning-with-vgg-16-cnn-aug-lb-0-1712.ipynb | 39 +++++++++---------- 1 file changed, 19 insertions(+), 20 deletions(-) diff --git a/Korean/Statoil/transfer-learning-with-vgg-16-cnn-aug-lb-0-1712.ipynb b/Korean/Statoil/transfer-learning-with-vgg-16-cnn-aug-lb-0-1712.ipynb index 64fef9d..1223ff3 100644 --- a/Korean/Statoil/transfer-learning-with-vgg-16-cnn-aug-lb-0-1712.ipynb +++ b/Korean/Statoil/transfer-learning-with-vgg-16-cnn-aug-lb-0-1712.ipynb @@ -16,7 +16,7 @@ "import numpy as np # linear algebra\n", "import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)\n", "\n", - "# Input data files are available in the \"../input/\" directory.\n", + "# \"../input/\" 경로로 데이터 파일을 input 할 수 있습니다.\n", "# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory\n", "\n", "#from subprocess import check_output\n", @@ -131,7 +131,7 @@ "test['inc_angle']=pd.to_numeric(test['inc_angle'], errors='coerce')\n", "X_test_angle=test['inc_angle']\n", "\n", - "#Generate the training data\n", + "# training data를 생성합니다.\n", "X_band_1=np.array([np.array(band).astype(np.float32).reshape(75, 75) for band in train[\"band_1\"]])\n", "X_band_2=np.array([np.array(band).astype(np.float32).reshape(75, 75) for band in train[\"band_2\"]])\n", "X_band_3=(X_band_1+X_band_2)/2 #band_1과 band_2 각 행렬의 값을 더한 평균값으로 만듦. 그래서 band_1과 bnad_2의 크기가 같아야됨.\n", @@ -193,36 +193,37 @@ " rotation_range = 10) # 지정된 각도 범위내에서 임의로 원본이미지를 회전\n", "\n", "# 두 generators를 합치는 함수입니다\n", - "# We use the exact same generator with the same random seed for both the y and angle arrays\n", + "# y와 angle array 모두에 동일한 랜덤 시드의 같은 제네레이터를 사용해야합니다\n", + "def gen_flow_for_two_inputs(X1, X2, y):\n", " genX1 = gen.flow(X1,y, batch_size=batch_size,seed=55)\n", " genX2 = gen.flow(X1,X2, batch_size=batch_size,seed=55)\n", " while True:\n", " X1i = genX1.next()\n", " X2i = genX2.next()\n", - " #Assert arrays are equal - this was for peace of mind, but slows down training\n", + " #array는 동일합니다 - 마음은 안정되었으나 training 속도가 느려졌습니다.\n", " #np.testing.assert_array_equal(X1i[0],X2i[0])\n", " yield [X1i[0], X2i[1]], X1i[1]\n", "\n", - "# Finally create generator\n", + "# generator를 생성합니다\n", "def get_callbacks(filepath, patience=2):\n", - " es = EarlyStopping('val_loss', patience=10, mode=\"min\")\n", + " es = EarlyStopping('val_loss', patience=10, mode=\"min\") # 더 이상 개선의 여지가 없으면 학습 조기 종료\n", " msave = ModelCheckpoint(filepath, save_best_only=True)\n", " return [es, msave]\n", "\n", "\n", "def getVggAngleModel():\n", " input_2 = Input(shape=[1], name=\"angle\")\n", - " angle_layer = Dense(1, )(input_2)\n", - " base_model = VGG16(weights='imagenet', include_top=False, \n", - " input_shape=X_train.shape[1:], classes=1)\n", + " angle_layer = Dense(1, )(input_2) # 출력 뉴런의 수는 1\n", + " base_model = VGG16(weights='imagenet', include_top=False, # weights=\"imagenet\" : pre-training on ImageNet\n", + " input_shape=X_train.shape[1:], classes=1) \n", " x = base_model.get_layer('block5_pool').output\n", " \n", "\n", " x = GlobalMaxPooling2D()(x)\n", " merge_one = concatenate([x, angle_layer])\n", " merge_one = Dense(512, activation='relu', name='fc2')(merge_one)\n", - " merge_one = Dropout(0.3)(merge_one)\n", + " merge_one = Dropout(0.3)(merge_one) # Dropout : 일부 weight만 사용\n", " merge_one = Dense(512, activation='relu', name='fc3')(merge_one)\n", " merge_one = Dropout(0.3)(merge_one)\n", " \n", @@ -236,8 +237,7 @@ " metrics=['accuracy'])\n", " return model\n", "\n", - "\n", - "#Using K-fold Cross Validation with Data Augmentation.\n", + "# 데이터 확장과 함께 K-fold Cross Validation을 사용합니다.\n", "def myAngleCV(X_train, X_angle, X_test):\n", " K=3\n", " folds = list(StratifiedKFold(n_splits=K, shuffle=True, random_state=16).split(X_train, target_train))\n", @@ -255,7 +255,7 @@ " X_angle_cv=X_angle[train_idx]\n", " X_angle_hold=X_angle[test_idx]\n", "\n", - " #define file path and get callbacks\n", + " # file path와 callbacks를 정의합니다.\n", " file_path = \"%s_aug_model_weights.hdf5\"%j\n", " callbacks = get_callbacks(filepath=file_path, patience=5)\n", " gen_flow = gen_flow_for_two_inputs(X_train_cv, X_angle_cv, y_train_cv)\n", @@ -269,26 +269,26 @@ " validation_data=([X_holdout,X_angle_hold], Y_holdout),\n", " callbacks=callbacks)\n", "\n", - " #Getting the Best Model\n", + " # 최적의 모델을 가져옵니다.\n", " galaxyModel.load_weights(filepath=file_path)\n", - " #Getting Training Score\n", + " # Training score를 가져옵니다.\n", " score = galaxyModel.evaluate([X_train_cv,X_angle_cv], y_train_cv, verbose=0)\n", " print('Train loss:', score[0])\n", " print('Train accuracy:', score[1])\n", - " #Getting Test Score\n", + " # Test Score를 가져옵니다.\n", " score = galaxyModel.evaluate([X_holdout,X_angle_hold], Y_holdout, verbose=0)\n", " print('Test loss:', score[0])\n", " print('Test accuracy:', score[1])\n", "\n", - " #Getting validation Score.\n", + " # validation Score를 가져옵니다.\n", " pred_valid=galaxyModel.predict([X_holdout,X_angle_hold])\n", " y_valid_pred_log[test_idx] = pred_valid.reshape(pred_valid.shape[0])\n", "\n", - " #Getting Test Scores\n", + " # Test Scores를 가져옵니다.\n", " temp_test=galaxyModel.predict([X_test, X_test_angle])\n", " y_test_pred_log+=temp_test.reshape(temp_test.shape[0])\n", "\n", - " #Getting Train Scores\n", + " # Train Scores를 가져옵니다.\n", " temp_train=galaxyModel.predict([X_train, X_angle])\n", " y_train_pred_log+=temp_train.reshape(temp_train.shape[0])\n", "\n", @@ -323,7 +323,6 @@ }, "outputs": [], "source": [ - "#Submission for each day.\n", "submission = pd.DataFrame()\n", "submission['id']=test['id']\n", "submission['is_iceberg']=preds\n",