Skip to content

Commit

Permalink
Notebooks for Kaggle seizure prediction competition
Browse files Browse the repository at this point in the history
  • Loading branch information
dafnevk committed Oct 10, 2016
1 parent f88f1f3 commit 2e264af
Show file tree
Hide file tree
Showing 2 changed files with 1,107 additions and 0 deletions.
233 changes: 233 additions & 0 deletions notebooks/Kaggle_preparedata.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,233 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 17,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"import os\n",
"import numpy as np \n",
"import pandas as pd\n",
"from scipy.io import loadmat\n",
"from keras.utils.np_utils import to_categorical"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"path_to_train = '/media/sf_VBox_Shared/timeseries/KaggleEEG/train_1/'"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"def mat_to_data(path, max_timesteps=None):\n",
" try :\n",
" mat = loadmat(path)\n",
" names = mat['dataStruct'].dtype.names\n",
" ndata = {n: mat['dataStruct'][n][0, 0] for n in names}\n",
" if max_timesteps is None:\n",
" return ndata['data']\n",
" else:\n",
" return ndata['data'][:max_timesteps,:]\n",
" except :\n",
" return None"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"fnames = os.listdir(path_to_train)"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"fnames_split = [fname.split('_') for fname in fnames if fname[-4:]=='.mat']\n",
"metadata = pd.DataFrame([{'patient':int(f[0]), 'segment':int(f[1]), 'class':int(f[2][0])} for f in fnames_split])"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"nr_val = 10*6"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"metadata_0 = metadata[metadata['class']==0]\n",
"metadata_1 = metadata[metadata['class']==1]\n",
"\n",
"nr_segments_0 = metadata_0['segment']"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"np.random.seed(123)\n",
"ind_0 = metadata_0.sort_values('segment').index\n",
"ind_1 = metadata_1.sort_values('segment').index\n",
"val_ind = np.random.permutation(ind_0[-nr_val:].append( ind_1[-nr_val:]))\n",
"train_ind = np.random.permutation(ind_0[:-nr_val].append(ind_1[:-nr_val]))[:100]"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def read_data(filenames, path_to_train, max_timesteps=None):\n",
" mfiles = [mat_to_data(os.path.join(path_to_train, fname), max_timesteps) for fname in filenames]\n",
" data = [m for m in mfiles if m is not None]\n",
" return np.array(data)"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"X_val = read_data(pd.Series(fnames)[val_ind], path_to_train)\n",
"y_val = metadata.loc[val_ind]['class']\n",
"y_val_binary = to_categorical(np.array(y_val), 2)"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"outdatapath = '/media/sf_VBox_Shared/timeseries/KaggleEEG/train_1/prepared/'\n",
"np.save(outdatapath+'X_val', X_val)\n",
"np.save(outdatapath+'y_val_binary', y_val_binary)"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"X_train = read_data(pd.Series(fnames)[train_ind], path_to_train, max_timesteps=2400)\n",
"y_train = metadata.loc[train_ind]['class']\n",
"y_train_binary = to_categorical(np.array(y_train), 2)\n",
"np.save(outdatapath+'X_train', X_train)\n",
"np.save(outdatapath+'y_train_binary', y_train_binary)"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"(100, 24000, 16)"
]
},
"execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X_train.shape"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"np.save(outdatapath+'y_val_binary', y_val_binary)\n",
"np.save(outdatapath+'y_train_binary', y_train_binary)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.2"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Loading

0 comments on commit 2e264af

Please sign in to comment.