Skip to content
This repository has been archived by the owner on Mar 8, 2020. It is now read-only.

Update Statoil Kernel as Korean #38

Merged
merged 12 commits into from
Oct 21, 2018
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
"import numpy as np # linear algebra\n",
"import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)\n",
"\n",
"# Input data files are available in the \"../input/\" directory.\n",
"# \"../input/\" 경로로 데이터 파일을 input 할 수 있습니다.\n",
"# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory\n",
"\n",
"#from subprocess import check_output\n",
Expand Down Expand Up @@ -131,7 +131,7 @@
"test['inc_angle']=pd.to_numeric(test['inc_angle'], errors='coerce')\n",
"X_test_angle=test['inc_angle']\n",
"\n",
"#Generate the training data\n",
"# training data를 생성합니다.\n",
"X_band_1=np.array([np.array(band).astype(np.float32).reshape(75, 75) for band in train[\"band_1\"]])\n",
"X_band_2=np.array([np.array(band).astype(np.float32).reshape(75, 75) for band in train[\"band_2\"]])\n",
"X_band_3=(X_band_1+X_band_2)/2 #band_1과 band_2 각 행렬의 값을 더한 평균값으로 만듦. 그래서 band_1과 bnad_2의 크기가 같아야됨.\n",
Expand Down Expand Up @@ -193,36 +193,37 @@
" rotation_range = 10) # 지정된 각도 범위내에서 임의로 원본이미지를 회전\n",
"\n",
"# 두 generators를 합치는 함수입니다\n",
"# We use the exact same generator with the same random seed for both the y and angle arrays\n",
"# y와 angle array 모두에 동일한 랜덤 시드의 같은 제네레이터를 사용해야합니다\n",

"def gen_flow_for_two_inputs(X1, X2, y):\n",
" genX1 = gen.flow(X1,y, batch_size=batch_size,seed=55)\n",
" genX2 = gen.flow(X1,X2, batch_size=batch_size,seed=55)\n",
" while True:\n",
" X1i = genX1.next()\n",
" X2i = genX2.next()\n",
" #Assert arrays are equal - this was for peace of mind, but slows down training\n",
" #array는 동일합니다 - 마음은 안정되었으나 training 속도가 느려졌습니다.\n",
" #np.testing.assert_array_equal(X1i[0],X2i[0])\n",
" yield [X1i[0], X2i[1]], X1i[1]\n",
"\n",
"# Finally create generator\n",
"# generator를 생성합니다\n",
"def get_callbacks(filepath, patience=2):\n",
" es = EarlyStopping('val_loss', patience=10, mode=\"min\")\n",
" es = EarlyStopping('val_loss', patience=10, mode=\"min\") # 더 이상 개선의 여지가 없으면 학습 조기 종료\n",
" msave = ModelCheckpoint(filepath, save_best_only=True)\n",
" return [es, msave]\n",
"\n",
"\n",
"def getVggAngleModel():\n",
" input_2 = Input(shape=[1], name=\"angle\")\n",
" angle_layer = Dense(1, )(input_2)\n",
" base_model = VGG16(weights='imagenet', include_top=False, \n",
" input_shape=X_train.shape[1:], classes=1)\n",
" angle_layer = Dense(1, )(input_2) # 출력 뉴런의 수는 1\n",
" base_model = VGG16(weights='imagenet', include_top=False, # weights=\"imagenet\" : pre-training on ImageNet\n",
" input_shape=X_train.shape[1:], classes=1) \n",
" x = base_model.get_layer('block5_pool').output\n",
" \n",
"\n",
" x = GlobalMaxPooling2D()(x)\n",
" merge_one = concatenate([x, angle_layer])\n",
" merge_one = Dense(512, activation='relu', name='fc2')(merge_one)\n",
" merge_one = Dropout(0.3)(merge_one)\n",
" merge_one = Dropout(0.3)(merge_one) # Dropout : 일부 weight만 사용\n",
" merge_one = Dense(512, activation='relu', name='fc3')(merge_one)\n",
" merge_one = Dropout(0.3)(merge_one)\n",
" \n",
Expand All @@ -236,8 +237,7 @@
" metrics=['accuracy'])\n",
" return model\n",
"\n",
"\n",
"#Using K-fold Cross Validation with Data Augmentation.\n",
"# 데이터 확장과 함께 K-fold Cross Validation을 사용합니다.\n",
"def myAngleCV(X_train, X_angle, X_test):\n",
" K=3\n",
" folds = list(StratifiedKFold(n_splits=K, shuffle=True, random_state=16).split(X_train, target_train))\n",
Expand All @@ -255,7 +255,7 @@
" X_angle_cv=X_angle[train_idx]\n",
" X_angle_hold=X_angle[test_idx]\n",
"\n",
" #define file path and get callbacks\n",
" # file path와 callbacks를 정의합니다.\n",
" file_path = \"%s_aug_model_weights.hdf5\"%j\n",
" callbacks = get_callbacks(filepath=file_path, patience=5)\n",
" gen_flow = gen_flow_for_two_inputs(X_train_cv, X_angle_cv, y_train_cv)\n",
Expand All @@ -269,26 +269,26 @@
" validation_data=([X_holdout,X_angle_hold], Y_holdout),\n",
" callbacks=callbacks)\n",
"\n",
" #Getting the Best Model\n",
" # 최적의 모델을 가져옵니다.\n",
" galaxyModel.load_weights(filepath=file_path)\n",
" #Getting Training Score\n",
" # Training score를 가져옵니다.\n",
" score = galaxyModel.evaluate([X_train_cv,X_angle_cv], y_train_cv, verbose=0)\n",
" print('Train loss:', score[0])\n",
" print('Train accuracy:', score[1])\n",
" #Getting Test Score\n",
" # Test Score를 가져옵니다.\n",
" score = galaxyModel.evaluate([X_holdout,X_angle_hold], Y_holdout, verbose=0)\n",
" print('Test loss:', score[0])\n",
" print('Test accuracy:', score[1])\n",
"\n",
" #Getting validation Score.\n",
" # validation Score를 가져옵니다.\n",
" pred_valid=galaxyModel.predict([X_holdout,X_angle_hold])\n",
" y_valid_pred_log[test_idx] = pred_valid.reshape(pred_valid.shape[0])\n",
"\n",
" #Getting Test Scores\n",
" # Test Scores를 가져옵니다.\n",
" temp_test=galaxyModel.predict([X_test, X_test_angle])\n",
" y_test_pred_log+=temp_test.reshape(temp_test.shape[0])\n",
"\n",
" #Getting Train Scores\n",
" # Train Scores를 가져옵니다.\n",
" temp_train=galaxyModel.predict([X_train, X_angle])\n",
" y_train_pred_log+=temp_train.reshape(temp_train.shape[0])\n",
"\n",
Expand Down Expand Up @@ -323,7 +323,6 @@
},
"outputs": [],
"source": [
"#Submission for each day.\n",
"submission = pd.DataFrame()\n",
"submission['id']=test['id']\n",
"submission['is_iceberg']=preds\n",
Expand Down