tvt exploration start, references update, changelog update

timothyyu · Feb 12, 2019 · 8b1b639 · 8b1b639
1 parent b177798
commit 8b1b639
Show file tree

Hide file tree

Showing 10 changed files with 35 additions and 24 deletions.
diff --git a/README.md b/README.md
@@ -22,11 +22,5 @@ Repository package requirements/dependencies are defined in `requirements.txt` f
 
 [Repository of an existing attempt to replicate above paper in PyTorch](mlpanda/DeepLearning_Financial: https://github.com/mlpanda/DeepLearning_Financial), checked out as a `git-subrepo` for reference in `submodules` folder. 
 
-## Additional References (APA)
 
-> Hsieh, T.-J., Hsiao, H.-F., & Yeh, W.-C. (2011). Forecasting stock markets using wavelet transforms and recurrent neural networks: An integrated system based on artificial bee colony algorithm. *Applied Soft Computing*, *11*(2), 2510–2525. <https://doi.org/10.1016/j.asoc.2010.09.007>
->
-> Mallat, S. (1989). A Theory for Multiresolution Signal Decomposition: The Wavelet Representation. IEEE Trans. Pattern Anal. Mach. Intell., 11, 674-693.
->
-> M Leavey, C & James, Malcolm & Summerscales, John & Sutton, R. (2003). An introduction to wavelet transforms: A tutorial approach. Insight. 45. 344-353. 10.1784/insi.45.5.344.52875. 
 
diff --git a/data/interim/clean_data.xlsx b/data/interim/clean_data.xlsx
diff --git a/data/interim/clean_data_future.xlsx b/data/interim/clean_data_future.xlsx
diff --git a/data/interim/clean_data_index.xlsx b/data/interim/clean_data_index.xlsx
diff --git a/data/processed/clean_data.xlsx b/data/processed/clean_data.xlsx
diff --git a/data/processed/clean_data_future.xlsx b/data/processed/clean_data_future.xlsx
diff --git a/data/processed/clean_data_index.xlsx b/data/processed/clean_data_index.xlsx
diff --git a/docs/changelog.md b/docs/changelog.md
@@ -1,6 +1,6 @@
 # Changelog
 
-- creation of `data/pickled` folder for pickled data
+- creation of `data/pickled` folder for pickled data (i.e. pickled dataframes)
 - additional files added to `references` 
 - `README.md` update to reflect changes 
 - new functions in`utils.py`: `interval_split(), dict_interval_split(), pickle_save, pickle_load()` (refactored from `1d_train_test_split_exploration.ipynb`)
@@ -10,6 +10,8 @@
   - `from statsmodels.robust import mad`
   - `from scipy import signal`
 - wavelet transform exploration start in `1c_wavelet_draft_test_exploration.ipynb`
+- start of train-validate-test split implementation in`1f_tvt_split_exploration.ipynb`
+- double datetime index & datetime column fixed from `interval_split()` in `utils.py`
 
 ## v0.1.0
 - clean_dataset.py in `wsae_lstm` folder to clean raw dataset, output stored in `data/interim` folder (refactored from `notebooks\0_initial_data_exploration.ipynb` notebook)

diff --git a/notebooks/1f_tvt_split_exploration.ipynb b/notebooks/1f_tvt_split_exploration.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 22,
+   "execution_count": 39,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -45,7 +45,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 23,
+   "execution_count": 40,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -55,7 +55,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 24,
+   "execution_count": 41,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -64,7 +64,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 25,
+   "execution_count": 42,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -73,7 +73,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 26,
+   "execution_count": 43,
    "metadata": {},
    "outputs": [
     {
@@ -289,7 +289,7 @@
        "2008-07-07 -3.531151e+07  "
       ]
      },
-     "execution_count": 26,
+     "execution_count": 43,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -300,7 +300,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 27,
+   "execution_count": 44,
    "metadata": {},
    "outputs": [
     {
@@ -309,7 +309,7 @@
        "612"
       ]
      },
-     "execution_count": 27,
+     "execution_count": 44,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -320,7 +320,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 28,
+   "execution_count": 45,
    "metadata": {},
    "outputs": [
     {
@@ -329,7 +329,7 @@
        "19"
       ]
      },
-     "execution_count": 28,
+     "execution_count": 45,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -340,7 +340,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 29,
+   "execution_count": 46,
    "metadata": {},
    "outputs": [
     {
@@ -349,7 +349,7 @@
        "Timestamp('2008-07-01 00:00:00')"
       ]
      },
-     "execution_count": 29,
+     "execution_count": 46,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -361,7 +361,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 31,
+   "execution_count": 47,
    "metadata": {},
    "outputs": [
     {
@@ -370,7 +370,7 @@
        "Timestamp('2008-10-01 00:00:00')"
       ]
      },
-     "execution_count": 31,
+     "execution_count": 47,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -382,7 +382,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 32,
+   "execution_count": 48,
    "metadata": {},
    "outputs": [
     {
@@ -391,7 +391,7 @@
        "Timestamp('2009-01-01 00:00:00')"
       ]
      },
-     "execution_count": 32,
+     "execution_count": 48,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -401,6 +401,20 @@
     "dict_dataframes_index['csi300 index data'][1].index[0] +monthdelta(6) "
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
   {
    "cell_type": "code",
    "execution_count": null,

diff --git a/wsae_lstm/utils.py b/wsae_lstm/utils.py
@@ -11,7 +11,8 @@
 sys.path.append("../")  
 
 def frames_to_excel(dict_dataframes, path,key_order=None):
-    """Save cleaned data to disk; Write dictionary of dataframes to separate sheets, within 1 file. Optional key_order kwarg for dataframe/sheet order."""
+    """Save cleaned data to disk; Write dictionary of dataframes to separate sheets, within 1 file.
+     Optional key_order kwarg for dataframe/sheet order."""
     # frames_to_excel() source: https://stackoverflow.com/q/51696940
     if key_order is not None:
         list_of_tuples = [(key, dict_dataframes[key]) for key in key_order]