preface.html

<!DOCTYPE html>
<html lang="" xml:lang="">
<head>

  <meta charset="utf-8" />
  <meta http-equiv="X-UA-Compatible" content="IE=edge" />
  <title>Machine Learning for Factor Investing</title>
  <meta name="description" content="Machine Learning for Factor Investing" />
  <meta name="generator" content="bookdown 0.21 and GitBook 2.6.7" />

  <meta property="og:title" content="Machine Learning for Factor Investing" />
  <meta property="og:type" content="book" />
  
  
  <meta name="twitter:card" content="summary" />
  <meta name="twitter:title" content="Machine Learning for Factor Investing" />
  
  
<meta name="author" content="Guillaume Coqueret and Tony Guida" />


<meta name="date" content="2021-04-11" />

  <meta name="viewport" content="width=device-width, initial-scale=1" />
  <meta name="apple-mobile-web-app-capable" content="yes" />
  <meta name="apple-mobile-web-app-status-bar-style" content="black" />
  
  
<link rel="next" href="notdata.html"/>
<script src="libs/header-attrs-2.5/header-attrs.js"></script>
<script src="libs/jquery-2.2.3/jquery.min.js"></script>
<link href="libs/gitbook-2.6.7/css/style.css" rel="stylesheet" />
<link href="libs/gitbook-2.6.7/css/plugin-table.css" rel="stylesheet" />
<link href="libs/gitbook-2.6.7/css/plugin-bookdown.css" rel="stylesheet" />
<link href="libs/gitbook-2.6.7/css/plugin-highlight.css" rel="stylesheet" />
<link href="libs/gitbook-2.6.7/css/plugin-search.css" rel="stylesheet" />
<link href="libs/gitbook-2.6.7/css/plugin-fontsettings.css" rel="stylesheet" />
<link href="libs/gitbook-2.6.7/css/plugin-clipboard.css" rel="stylesheet" />


<link href="libs/anchor-sections-1.0/anchor-sections.css" rel="stylesheet" />
<script src="libs/anchor-sections-1.0/anchor-sections.js"></script>
<script src="libs/kePrint-0.0.1/kePrint.js"></script>
<link href="libs/lightable-0.0.1/lightable.css" rel="stylesheet" />


<style type="text/css">
pre > code.sourceCode { white-space: pre; position: relative; }
pre > code.sourceCode > span { display: inline-block; line-height: 1.25; }
pre > code.sourceCode > span:empty { height: 1.2em; }
.sourceCode { overflow: visible; }
code.sourceCode > span { color: inherit; text-decoration: inherit; }
pre.sourceCode { margin: 0; }
@media screen {
div.sourceCode { overflow: auto; }
}
@media print {
pre > code.sourceCode { white-space: pre-wrap; }
pre > code.sourceCode > span { text-indent: -5em; padding-left: 5em; }
}
pre.numberSource code
  { counter-reset: source-line 0; }
pre.numberSource code > span
  { position: relative; left: -4em; counter-increment: source-line; }
pre.numberSource code > span > a:first-child::before
  { content: counter(source-line);
    position: relative; left: -1em; text-align: right; vertical-align: baseline;
    border: none; display: inline-block;
    -webkit-touch-callout: none; -webkit-user-select: none;
    -khtml-user-select: none; -moz-user-select: none;
    -ms-user-select: none; user-select: none;
    padding: 0 4px; width: 4em;
    color: #aaaaaa;
  }
pre.numberSource { margin-left: 3em; border-left: 1px solid #aaaaaa;  padding-left: 4px; }
div.sourceCode
  {   }
@media screen {
pre > code.sourceCode > span > a:first-child::before { text-decoration: underline; }
}
code span.al { color: #ff0000; font-weight: bold; } /* Alert */
code span.an { color: #60a0b0; font-weight: bold; font-style: italic; } /* Annotation */
code span.at { color: #7d9029; } /* Attribute */
code span.bn { color: #40a070; } /* BaseN */
code span.bu { } /* BuiltIn */
code span.cf { color: #007020; font-weight: bold; } /* ControlFlow */
code span.ch { color: #4070a0; } /* Char */
code span.cn { color: #880000; } /* Constant */
code span.co { color: #60a0b0; font-style: italic; } /* Comment */
code span.cv { color: #60a0b0; font-weight: bold; font-style: italic; } /* CommentVar */
code span.do { color: #ba2121; font-style: italic; } /* Documentation */
code span.dt { color: #902000; } /* DataType */
code span.dv { color: #40a070; } /* DecVal */
code span.er { color: #ff0000; font-weight: bold; } /* Error */
code span.ex { } /* Extension */
code span.fl { color: #40a070; } /* Float */
code span.fu { color: #06287e; } /* Function */
code span.im { } /* Import */
code span.in { color: #60a0b0; font-weight: bold; font-style: italic; } /* Information */
code span.kw { color: #007020; font-weight: bold; } /* Keyword */
code span.op { color: #666666; } /* Operator */
code span.ot { color: #007020; } /* Other */
code span.pp { color: #bc7a00; } /* Preprocessor */
code span.sc { color: #4070a0; } /* SpecialChar */
code span.ss { color: #bb6688; } /* SpecialString */
code span.st { color: #4070a0; } /* String */
code span.va { color: #19177c; } /* Variable */
code span.vs { color: #4070a0; } /* VerbatimString */
code span.wa { color: #60a0b0; font-weight: bold; font-style: italic; } /* Warning */
</style>

</head>

<body>


  <div class="book without-animation with-summary font-size-2 font-family-1" data-basepath=".">

    <div class="book-summary">
      <nav role="navigation">

<ul class="summary">
<li class="chapter" data-level="" data-path="preface.html"><a href="preface.html"><i class="fa fa-check"></i>Preface</a>
<ul>
<li class="chapter" data-level="" data-path="preface.html"><a href="preface.html#what-this-book-is-not-about"><i class="fa fa-check"></i>What this book is not about</a></li>
<li class="chapter" data-level="" data-path="preface.html"><a href="preface.html#the-targeted-audience"><i class="fa fa-check"></i>The targeted audience</a></li>
<li class="chapter" data-level="" data-path="preface.html"><a href="preface.html#how-this-book-is-structured"><i class="fa fa-check"></i>How this book is structured</a></li>
<li class="chapter" data-level="" data-path="preface.html"><a href="preface.html#companion-website"><i class="fa fa-check"></i>Companion website</a></li>
<li class="chapter" data-level="" data-path="preface.html"><a href="preface.html#why-r"><i class="fa fa-check"></i>Why R?</a></li>
<li class="chapter" data-level="" data-path="preface.html"><a href="preface.html#coding-instructions"><i class="fa fa-check"></i>Coding instructions</a></li>
<li class="chapter" data-level="" data-path="preface.html"><a href="preface.html#acknowledgments"><i class="fa fa-check"></i>Acknowledgments</a></li>
<li class="chapter" data-level="" data-path="preface.html"><a href="preface.html#future-developments"><i class="fa fa-check"></i>Future developments</a></li>
</ul></li>
<li class="part"><span><b>I Introduction</b></span></li>
<li class="chapter" data-level="1" data-path="notdata.html"><a href="notdata.html"><i class="fa fa-check"></i><b>1</b> Notations and data</a>
<ul>
<li class="chapter" data-level="1.1" data-path="notdata.html"><a href="notdata.html#notations"><i class="fa fa-check"></i><b>1.1</b> Notations</a></li>
<li class="chapter" data-level="1.2" data-path="notdata.html"><a href="notdata.html#dataset"><i class="fa fa-check"></i><b>1.2</b> Dataset</a></li>
</ul></li>
<li class="chapter" data-level="2" data-path="intro.html"><a href="intro.html"><i class="fa fa-check"></i><b>2</b> Introduction</a>
<ul>
<li class="chapter" data-level="2.1" data-path="intro.html"><a href="intro.html#context"><i class="fa fa-check"></i><b>2.1</b> Context</a></li>
<li class="chapter" data-level="2.2" data-path="intro.html"><a href="intro.html#portfolio-construction-the-workflow"><i class="fa fa-check"></i><b>2.2</b> Portfolio construction: the workflow</a></li>
<li class="chapter" data-level="2.3" data-path="intro.html"><a href="intro.html#machine-learning-is-no-magic-wand"><i class="fa fa-check"></i><b>2.3</b> Machine learning is no magic wand</a></li>
</ul></li>
<li class="chapter" data-level="3" data-path="factor.html"><a href="factor.html"><i class="fa fa-check"></i><b>3</b> Factor investing and asset pricing anomalies</a>
<ul>
<li class="chapter" data-level="3.1" data-path="factor.html"><a href="factor.html#introduction"><i class="fa fa-check"></i><b>3.1</b> Introduction</a></li>
<li class="chapter" data-level="3.2" data-path="factor.html"><a href="factor.html#detecting-anomalies"><i class="fa fa-check"></i><b>3.2</b> Detecting anomalies</a>
<ul>
<li class="chapter" data-level="3.2.1" data-path="factor.html"><a href="factor.html#challenges"><i class="fa fa-check"></i><b>3.2.1</b> Challenges</a></li>
<li class="chapter" data-level="3.2.2" data-path="factor.html"><a href="factor.html#simple-portfolio-sorts"><i class="fa fa-check"></i><b>3.2.2</b> Simple portfolio sorts  </a></li>
<li class="chapter" data-level="3.2.3" data-path="factor.html"><a href="factor.html#factors"><i class="fa fa-check"></i><b>3.2.3</b> Factors</a></li>
<li class="chapter" data-level="3.2.4" data-path="factor.html"><a href="factor.html#predictive-regressions-sorts-and-p-value-issues"><i class="fa fa-check"></i><b>3.2.4</b> Predictive regressions, sorts, and p-value issues</a></li>
<li class="chapter" data-level="3.2.5" data-path="factor.html"><a href="factor.html#fama-macbeth-regressions"><i class="fa fa-check"></i><b>3.2.5</b> Fama-Macbeth regressions</a></li>
<li class="chapter" data-level="3.2.6" data-path="factor.html"><a href="factor.html#factor-competition"><i class="fa fa-check"></i><b>3.2.6</b> Factor competition</a></li>
<li class="chapter" data-level="3.2.7" data-path="factor.html"><a href="factor.html#advanced-techniques"><i class="fa fa-check"></i><b>3.2.7</b> Advanced techniques</a></li>
</ul></li>
<li class="chapter" data-level="3.3" data-path="factor.html"><a href="factor.html#factors-or-characteristics"><i class="fa fa-check"></i><b>3.3</b> Factors or characteristics?</a></li>
<li class="chapter" data-level="3.4" data-path="factor.html"><a href="factor.html#hot-topics-momentum-timing-and-esg"><i class="fa fa-check"></i><b>3.4</b> Hot topics: momentum, timing and ESG</a>
<ul>
<li class="chapter" data-level="3.4.1" data-path="factor.html"><a href="factor.html#factor-momentum"><i class="fa fa-check"></i><b>3.4.1</b> Factor momentum</a></li>
<li class="chapter" data-level="3.4.2" data-path="factor.html"><a href="factor.html#factor-timing"><i class="fa fa-check"></i><b>3.4.2</b> Factor timing</a></li>
<li class="chapter" data-level="3.4.3" data-path="factor.html"><a href="factor.html#the-green-factors"><i class="fa fa-check"></i><b>3.4.3</b> The green factors</a></li>
</ul></li>
<li class="chapter" data-level="3.5" data-path="factor.html"><a href="factor.html#the-links-with-machine-learning"><i class="fa fa-check"></i><b>3.5</b> The links with machine learning</a>
<ul>
<li class="chapter" data-level="3.5.1" data-path="factor.html"><a href="factor.html#a-short-list-of-recent-references"><i class="fa fa-check"></i><b>3.5.1</b> A short list of recent references</a></li>
<li class="chapter" data-level="3.5.2" data-path="factor.html"><a href="factor.html#explicit-connections-with-asset-pricing-models"><i class="fa fa-check"></i><b>3.5.2</b> Explicit connections with asset pricing models</a></li>
</ul></li>
<li class="chapter" data-level="3.6" data-path="factor.html"><a href="factor.html#coding-exercises"><i class="fa fa-check"></i><b>3.6</b> Coding exercises</a></li>
</ul></li>
<li class="chapter" data-level="4" data-path="Data.html"><a href="Data.html"><i class="fa fa-check"></i><b>4</b> Data preprocessing</a>
<ul>
<li class="chapter" data-level="4.1" data-path="Data.html"><a href="Data.html#know-your-data"><i class="fa fa-check"></i><b>4.1</b> Know your data</a></li>
<li class="chapter" data-level="4.2" data-path="Data.html"><a href="Data.html#missing-data"><i class="fa fa-check"></i><b>4.2</b> Missing data</a></li>
<li class="chapter" data-level="4.3" data-path="Data.html"><a href="Data.html#outlier-detection"><i class="fa fa-check"></i><b>4.3</b> Outlier detection</a></li>
<li class="chapter" data-level="4.4" data-path="Data.html"><a href="Data.html#feateng"><i class="fa fa-check"></i><b>4.4</b> Feature engineering</a>
<ul>
<li class="chapter" data-level="4.4.1" data-path="Data.html"><a href="Data.html#feature-selection"><i class="fa fa-check"></i><b>4.4.1</b> Feature selection</a></li>
<li class="chapter" data-level="4.4.2" data-path="Data.html"><a href="Data.html#scaling"><i class="fa fa-check"></i><b>4.4.2</b> Scaling the predictors</a></li>
</ul></li>
<li class="chapter" data-level="4.5" data-path="Data.html"><a href="Data.html#labelling"><i class="fa fa-check"></i><b>4.5</b> Labelling</a>
<ul>
<li class="chapter" data-level="4.5.1" data-path="Data.html"><a href="Data.html#simple-labels"><i class="fa fa-check"></i><b>4.5.1</b> Simple labels</a></li>
<li class="chapter" data-level="4.5.2" data-path="Data.html"><a href="Data.html#categorical-labels"><i class="fa fa-check"></i><b>4.5.2</b> Categorical labels</a></li>
<li class="chapter" data-level="4.5.3" data-path="Data.html"><a href="Data.html#the-triple-barrier-method"><i class="fa fa-check"></i><b>4.5.3</b> The triple barrier method</a></li>
<li class="chapter" data-level="4.5.4" data-path="Data.html"><a href="Data.html#filtering-the-sample"><i class="fa fa-check"></i><b>4.5.4</b> Filtering the sample</a></li>
<li class="chapter" data-level="4.5.5" data-path="Data.html"><a href="Data.html#horizons"><i class="fa fa-check"></i><b>4.5.5</b> Return horizons</a></li>
</ul></li>
<li class="chapter" data-level="4.6" data-path="Data.html"><a href="Data.html#pers"><i class="fa fa-check"></i><b>4.6</b> Handling persistence</a></li>
<li class="chapter" data-level="4.7" data-path="Data.html"><a href="Data.html#extensions"><i class="fa fa-check"></i><b>4.7</b> Extensions</a>
<ul>
<li class="chapter" data-level="4.7.1" data-path="Data.html"><a href="Data.html#transforming-features"><i class="fa fa-check"></i><b>4.7.1</b> Transforming features</a></li>
<li class="chapter" data-level="4.7.2" data-path="Data.html"><a href="Data.html#macrovar"><i class="fa fa-check"></i><b>4.7.2</b> Macro-economic variables</a></li>
<li class="chapter" data-level="4.7.3" data-path="Data.html"><a href="Data.html#active-learning"><i class="fa fa-check"></i><b>4.7.3</b> Active learning</a></li>
</ul></li>
<li class="chapter" data-level="4.8" data-path="Data.html"><a href="Data.html#additional-code-and-results"><i class="fa fa-check"></i><b>4.8</b> Additional code and results</a>
<ul>
<li class="chapter" data-level="4.8.1" data-path="Data.html"><a href="Data.html#impact-of-rescaling-graphical-representation"><i class="fa fa-check"></i><b>4.8.1</b> Impact of rescaling: graphical representation</a></li>
<li class="chapter" data-level="4.8.2" data-path="Data.html"><a href="Data.html#impact-of-rescaling-toy-example"><i class="fa fa-check"></i><b>4.8.2</b> Impact of rescaling: toy example</a></li>
</ul></li>
<li class="chapter" data-level="4.9" data-path="Data.html"><a href="Data.html#coding-exercises-1"><i class="fa fa-check"></i><b>4.9</b> Coding exercises</a></li>
</ul></li>
<li class="part"><span><b>II Common supervised algorithms</b></span></li>
<li class="chapter" data-level="5" data-path="lasso.html"><a href="lasso.html"><i class="fa fa-check"></i><b>5</b> Penalized regressions and sparse hedging for minimum variance portfolios</a>
<ul>
<li class="chapter" data-level="5.1" data-path="lasso.html"><a href="lasso.html#penalized-regressions"><i class="fa fa-check"></i><b>5.1</b> Penalized regressions</a>
<ul>
<li class="chapter" data-level="5.1.1" data-path="lasso.html"><a href="lasso.html#penreg"><i class="fa fa-check"></i><b>5.1.1</b> Simple regressions</a></li>
<li class="chapter" data-level="5.1.2" data-path="lasso.html"><a href="lasso.html#forms-of-penalizations"><i class="fa fa-check"></i><b>5.1.2</b> Forms of penalizations</a></li>
<li class="chapter" data-level="5.1.3" data-path="lasso.html"><a href="lasso.html#illustrations"><i class="fa fa-check"></i><b>5.1.3</b> Illustrations</a></li>
</ul></li>
<li class="chapter" data-level="5.2" data-path="lasso.html"><a href="lasso.html#sparse-hedging-for-minimum-variance-portfolios"><i class="fa fa-check"></i><b>5.2</b> Sparse hedging for minimum variance portfolios</a>
<ul>
<li class="chapter" data-level="5.2.1" data-path="lasso.html"><a href="lasso.html#presentation-and-derivations"><i class="fa fa-check"></i><b>5.2.1</b> Presentation and derivations</a></li>
<li class="chapter" data-level="5.2.2" data-path="lasso.html"><a href="lasso.html#sparseex"><i class="fa fa-check"></i><b>5.2.2</b> Example</a></li>
</ul></li>
<li class="chapter" data-level="5.3" data-path="lasso.html"><a href="lasso.html#predictive-regressions"><i class="fa fa-check"></i><b>5.3</b> Predictive regressions</a>
<ul>
<li class="chapter" data-level="5.3.1" data-path="lasso.html"><a href="lasso.html#literature-review-and-principle"><i class="fa fa-check"></i><b>5.3.1</b> Literature review and principle</a></li>
<li class="chapter" data-level="5.3.2" data-path="lasso.html"><a href="lasso.html#code-and-results"><i class="fa fa-check"></i><b>5.3.2</b> Code and results</a></li>
</ul></li>
<li class="chapter" data-level="5.4" data-path="lasso.html"><a href="lasso.html#coding-exercise"><i class="fa fa-check"></i><b>5.4</b> Coding exercise</a></li>
</ul></li>
<li class="chapter" data-level="6" data-path="trees.html"><a href="trees.html"><i class="fa fa-check"></i><b>6</b> Tree-based methods</a>
<ul>
<li class="chapter" data-level="6.1" data-path="trees.html"><a href="trees.html#simple-trees"><i class="fa fa-check"></i><b>6.1</b> Simple trees</a>
<ul>
<li class="chapter" data-level="6.1.1" data-path="trees.html"><a href="trees.html#principle"><i class="fa fa-check"></i><b>6.1.1</b> Principle</a></li>
<li class="chapter" data-level="6.1.2" data-path="trees.html"><a href="trees.html#treeclass"><i class="fa fa-check"></i><b>6.1.2</b> Further details on classification</a></li>
<li class="chapter" data-level="6.1.3" data-path="trees.html"><a href="trees.html#pruning-criteria"><i class="fa fa-check"></i><b>6.1.3</b> Pruning criteria</a></li>
<li class="chapter" data-level="6.1.4" data-path="trees.html"><a href="trees.html#code-and-interpretation"><i class="fa fa-check"></i><b>6.1.4</b> Code and interpretation</a></li>
</ul></li>
<li class="chapter" data-level="6.2" data-path="trees.html"><a href="trees.html#random-forests"><i class="fa fa-check"></i><b>6.2</b> Random forests</a>
<ul>
<li class="chapter" data-level="6.2.1" data-path="trees.html"><a href="trees.html#principle-1"><i class="fa fa-check"></i><b>6.2.1</b> Principle</a></li>
<li class="chapter" data-level="6.2.2" data-path="trees.html"><a href="trees.html#code-and-results-1"><i class="fa fa-check"></i><b>6.2.2</b> Code and results</a></li>
</ul></li>
<li class="chapter" data-level="6.3" data-path="trees.html"><a href="trees.html#adaboost"><i class="fa fa-check"></i><b>6.3</b> Boosted trees: Adaboost</a>
<ul>
<li class="chapter" data-level="6.3.1" data-path="trees.html"><a href="trees.html#methodology"><i class="fa fa-check"></i><b>6.3.1</b> Methodology</a></li>
<li class="chapter" data-level="6.3.2" data-path="trees.html"><a href="trees.html#illustration"><i class="fa fa-check"></i><b>6.3.2</b> Illustration</a></li>
</ul></li>
<li class="chapter" data-level="6.4" data-path="trees.html"><a href="trees.html#boosted-trees-extreme-gradient-boosting"><i class="fa fa-check"></i><b>6.4</b> Boosted trees: extreme gradient boosting</a>
<ul>
<li class="chapter" data-level="6.4.1" data-path="trees.html"><a href="trees.html#managing-loss"><i class="fa fa-check"></i><b>6.4.1</b> Managing loss</a></li>
<li class="chapter" data-level="6.4.2" data-path="trees.html"><a href="trees.html#penalization"><i class="fa fa-check"></i><b>6.4.2</b> Penalization</a></li>
<li class="chapter" data-level="6.4.3" data-path="trees.html"><a href="trees.html#aggregation"><i class="fa fa-check"></i><b>6.4.3</b> Aggregation</a></li>
<li class="chapter" data-level="6.4.4" data-path="trees.html"><a href="trees.html#tree-structure"><i class="fa fa-check"></i><b>6.4.4</b> Tree structure</a></li>
<li class="chapter" data-level="6.4.5" data-path="trees.html"><a href="trees.html#boostext"><i class="fa fa-check"></i><b>6.4.5</b> Extensions</a></li>
<li class="chapter" data-level="6.4.6" data-path="trees.html"><a href="trees.html#boostcode"><i class="fa fa-check"></i><b>6.4.6</b> Code and results</a></li>
<li class="chapter" data-level="6.4.7" data-path="trees.html"><a href="trees.html#instweight"><i class="fa fa-check"></i><b>6.4.7</b> Instance weighting</a></li>
</ul></li>
<li class="chapter" data-level="6.5" data-path="trees.html"><a href="trees.html#discussion"><i class="fa fa-check"></i><b>6.5</b> Discussion</a></li>
<li class="chapter" data-level="6.6" data-path="trees.html"><a href="trees.html#coding-exercises-2"><i class="fa fa-check"></i><b>6.6</b> Coding exercises</a></li>
</ul></li>
<li class="chapter" data-level="7" data-path="NN.html"><a href="NN.html"><i class="fa fa-check"></i><b>7</b> Neural networks</a>
<ul>
<li class="chapter" data-level="7.1" data-path="NN.html"><a href="NN.html#the-original-perceptron"><i class="fa fa-check"></i><b>7.1</b> The original perceptron</a></li>
<li class="chapter" data-level="7.2" data-path="NN.html"><a href="NN.html#multilayer-perceptron"><i class="fa fa-check"></i><b>7.2</b> Multilayer perceptron</a>
<ul>
<li class="chapter" data-level="7.2.1" data-path="NN.html"><a href="NN.html#introduction-and-notations"><i class="fa fa-check"></i><b>7.2.1</b> Introduction and notations</a></li>
<li class="chapter" data-level="7.2.2" data-path="NN.html"><a href="NN.html#universal-approximation"><i class="fa fa-check"></i><b>7.2.2</b> Universal approximation</a></li>
<li class="chapter" data-level="7.2.3" data-path="NN.html"><a href="NN.html#backprop"><i class="fa fa-check"></i><b>7.2.3</b> Learning via back-propagation</a></li>
<li class="chapter" data-level="7.2.4" data-path="NN.html"><a href="NN.html#NNclass"><i class="fa fa-check"></i><b>7.2.4</b> Further details on classification</a></li>
</ul></li>
<li class="chapter" data-level="7.3" data-path="NN.html"><a href="NN.html#howdeep"><i class="fa fa-check"></i><b>7.3</b> How deep we should go and other practical issues</a>
<ul>
<li class="chapter" data-level="7.3.1" data-path="NN.html"><a href="NN.html#architectural-choices"><i class="fa fa-check"></i><b>7.3.1</b> Architectural choices</a></li>
<li class="chapter" data-level="7.3.2" data-path="NN.html"><a href="NN.html#frequency-of-weight-updates-and-learning-duration"><i class="fa fa-check"></i><b>7.3.2</b> Frequency of weight updates and learning duration</a></li>
<li class="chapter" data-level="7.3.3" data-path="NN.html"><a href="NN.html#penalizations-and-dropout"><i class="fa fa-check"></i><b>7.3.3</b> Penalizations and dropout</a></li>
</ul></li>
<li class="chapter" data-level="7.4" data-path="NN.html"><a href="NN.html#code-samples-and-comments-for-vanilla-mlp"><i class="fa fa-check"></i><b>7.4</b> Code samples and comments for vanilla MLP</a>
<ul>
<li class="chapter" data-level="7.4.1" data-path="NN.html"><a href="NN.html#regression-example"><i class="fa fa-check"></i><b>7.4.1</b> Regression example</a></li>
<li class="chapter" data-level="7.4.2" data-path="NN.html"><a href="NN.html#classification-example"><i class="fa fa-check"></i><b>7.4.2</b> Classification example</a></li>
<li class="chapter" data-level="7.4.3" data-path="NN.html"><a href="NN.html#custloss"><i class="fa fa-check"></i><b>7.4.3</b> Custom losses</a></li>
</ul></li>
<li class="chapter" data-level="7.5" data-path="NN.html"><a href="NN.html#RNN"><i class="fa fa-check"></i><b>7.5</b> Recurrent networks</a>
<ul>
<li class="chapter" data-level="7.5.1" data-path="NN.html"><a href="NN.html#presentation"><i class="fa fa-check"></i><b>7.5.1</b> Presentation</a></li>
<li class="chapter" data-level="7.5.2" data-path="NN.html"><a href="NN.html#code-and-results-2"><i class="fa fa-check"></i><b>7.5.2</b> Code and results</a></li>
</ul></li>
<li class="chapter" data-level="7.6" data-path="NN.html"><a href="NN.html#tabular-networks-tabnets"><i class="fa fa-check"></i><b>7.6</b> Tabular networks (TabNets)</a>
<ul>
<li class="chapter" data-level="7.6.1" data-path="NN.html"><a href="NN.html#the-zoo-of-layers"><i class="fa fa-check"></i><b>7.6.1</b> The zoo of layers</a></li>
<li class="chapter" data-level="7.6.2" data-path="NN.html"><a href="NN.html#sparsemax-activation"><i class="fa fa-check"></i><b>7.6.2</b> Sparsemax activation</a></li>
<li class="chapter" data-level="7.6.3" data-path="NN.html"><a href="NN.html#feature-selection-1"><i class="fa fa-check"></i><b>7.6.3</b> Feature selection</a></li>
<li class="chapter" data-level="7.6.4" data-path="NN.html"><a href="NN.html#the-full-architecture"><i class="fa fa-check"></i><b>7.6.4</b> The full architecture</a></li>
<li class="chapter" data-level="7.6.5" data-path="NN.html"><a href="NN.html#code-and-results-3"><i class="fa fa-check"></i><b>7.6.5</b> Code and results</a></li>
</ul></li>
<li class="chapter" data-level="7.7" data-path="NN.html"><a href="NN.html#other-common-architectures"><i class="fa fa-check"></i><b>7.7</b> Other common architectures</a>
<ul>
<li class="chapter" data-level="7.7.1" data-path="NN.html"><a href="NN.html#generative-aversarial-networks"><i class="fa fa-check"></i><b>7.7.1</b> Generative adversarial networks</a></li>
<li class="chapter" data-level="7.7.2" data-path="NN.html"><a href="NN.html#autoencoders"><i class="fa fa-check"></i><b>7.7.2</b> Autoencoders</a></li>
<li class="chapter" data-level="7.7.3" data-path="NN.html"><a href="NN.html#CNN"><i class="fa fa-check"></i><b>7.7.3</b> A word on convolutional networks</a></li>
</ul></li>
<li class="chapter" data-level="7.8" data-path="NN.html"><a href="NN.html#coding-exercises-3"><i class="fa fa-check"></i><b>7.8</b> Coding exercises</a></li>
</ul></li>
<li class="chapter" data-level="8" data-path="svm.html"><a href="svm.html"><i class="fa fa-check"></i><b>8</b> Support vector machines</a>
<ul>
<li class="chapter" data-level="8.1" data-path="svm.html"><a href="svm.html#svm-for-classification"><i class="fa fa-check"></i><b>8.1</b> SVM for classification</a></li>
<li class="chapter" data-level="8.2" data-path="svm.html"><a href="svm.html#svm-for-regression"><i class="fa fa-check"></i><b>8.2</b> SVM for regression</a></li>
<li class="chapter" data-level="8.3" data-path="svm.html"><a href="svm.html#practice"><i class="fa fa-check"></i><b>8.3</b> Practice</a></li>
<li class="chapter" data-level="8.4" data-path="svm.html"><a href="svm.html#coding-exercises-4"><i class="fa fa-check"></i><b>8.4</b> Coding exercises</a></li>
</ul></li>
<li class="chapter" data-level="9" data-path="bayes.html"><a href="bayes.html"><i class="fa fa-check"></i><b>9</b> Bayesian methods</a>
<ul>
<li class="chapter" data-level="9.1" data-path="bayes.html"><a href="bayes.html#the-bayesian-framework"><i class="fa fa-check"></i><b>9.1</b> The Bayesian framework</a></li>
<li class="chapter" data-level="9.2" data-path="bayes.html"><a href="bayes.html#bayesian-sampling"><i class="fa fa-check"></i><b>9.2</b> Bayesian sampling</a>
<ul>
<li class="chapter" data-level="9.2.1" data-path="bayes.html"><a href="bayes.html#gibbs-sampling"><i class="fa fa-check"></i><b>9.2.1</b> Gibbs sampling</a></li>
<li class="chapter" data-level="9.2.2" data-path="bayes.html"><a href="bayes.html#metropolis-hastings-sampling"><i class="fa fa-check"></i><b>9.2.2</b> Metropolis-Hastings sampling</a></li>
</ul></li>
<li class="chapter" data-level="9.3" data-path="bayes.html"><a href="bayes.html#bayesian-linear-regression"><i class="fa fa-check"></i><b>9.3</b> Bayesian linear regression</a></li>
<li class="chapter" data-level="9.4" data-path="bayes.html"><a href="bayes.html#naive-bayes-classifier"><i class="fa fa-check"></i><b>9.4</b> Naive Bayes classifier</a></li>
<li class="chapter" data-level="9.5" data-path="bayes.html"><a href="bayes.html#BART"><i class="fa fa-check"></i><b>9.5</b> Bayesian additive trees</a>
<ul>
<li class="chapter" data-level="9.5.1" data-path="bayes.html"><a href="bayes.html#general-formulation"><i class="fa fa-check"></i><b>9.5.1</b> General formulation</a></li>
<li class="chapter" data-level="9.5.2" data-path="bayes.html"><a href="bayes.html#priors"><i class="fa fa-check"></i><b>9.5.2</b> Priors</a></li>
<li class="chapter" data-level="9.5.3" data-path="bayes.html"><a href="bayes.html#sampling-and-predictions"><i class="fa fa-check"></i><b>9.5.3</b> Sampling and predictions</a></li>
<li class="chapter" data-level="9.5.4" data-path="bayes.html"><a href="bayes.html#code"><i class="fa fa-check"></i><b>9.5.4</b> Code</a></li>
</ul></li>
</ul></li>
<li class="part"><span><b>III From predictions to portfolios</b></span></li>
<li class="chapter" data-level="10" data-path="valtune.html"><a href="valtune.html"><i class="fa fa-check"></i><b>10</b> Validating and tuning</a>
<ul>
<li class="chapter" data-level="10.1" data-path="valtune.html"><a href="valtune.html#mlmetrics"><i class="fa fa-check"></i><b>10.1</b> Learning metrics</a>
<ul>
<li class="chapter" data-level="10.1.1" data-path="valtune.html"><a href="valtune.html#regression-analysis"><i class="fa fa-check"></i><b>10.1.1</b> Regression analysis</a></li>
<li class="chapter" data-level="10.1.2" data-path="valtune.html"><a href="valtune.html#classification-analysis"><i class="fa fa-check"></i><b>10.1.2</b> Classification analysis</a></li>
</ul></li>
<li class="chapter" data-level="10.2" data-path="valtune.html"><a href="valtune.html#validation"><i class="fa fa-check"></i><b>10.2</b> Validation</a>
<ul>
<li class="chapter" data-level="10.2.1" data-path="valtune.html"><a href="valtune.html#the-variance-bias-tradeoff-theory"><i class="fa fa-check"></i><b>10.2.1</b> The variance-bias tradeoff: theory</a></li>
<li class="chapter" data-level="10.2.2" data-path="valtune.html"><a href="valtune.html#the-variance-bias-tradeoff-illustration"><i class="fa fa-check"></i><b>10.2.2</b> The variance-bias tradeoff: illustration</a></li>
<li class="chapter" data-level="10.2.3" data-path="valtune.html"><a href="valtune.html#the-risk-of-overfitting-principle"><i class="fa fa-check"></i><b>10.2.3</b> The risk of overfitting: principle</a></li>
<li class="chapter" data-level="10.2.4" data-path="valtune.html"><a href="valtune.html#the-risk-of-overfitting-some-solutions"><i class="fa fa-check"></i><b>10.2.4</b> The risk of overfitting: some solutions</a></li>
</ul></li>
<li class="chapter" data-level="10.3" data-path="valtune.html"><a href="valtune.html#the-search-for-good-hyperparameters"><i class="fa fa-check"></i><b>10.3</b> The search for good hyperparameters</a>
<ul>
<li class="chapter" data-level="10.3.1" data-path="valtune.html"><a href="valtune.html#methods"><i class="fa fa-check"></i><b>10.3.1</b> Methods</a></li>
<li class="chapter" data-level="10.3.2" data-path="valtune.html"><a href="valtune.html#example-grid-search"><i class="fa fa-check"></i><b>10.3.2</b> Example: grid search</a></li>
<li class="chapter" data-level="10.3.3" data-path="valtune.html"><a href="valtune.html#example-bayesian-optimization"><i class="fa fa-check"></i><b>10.3.3</b> Example: Bayesian optimization</a></li>
</ul></li>
<li class="chapter" data-level="10.4" data-path="valtune.html"><a href="valtune.html#short-discussion-on-validation-in-backtests"><i class="fa fa-check"></i><b>10.4</b> Short discussion on validation in backtests</a></li>
</ul></li>
<li class="chapter" data-level="11" data-path="ensemble.html"><a href="ensemble.html"><i class="fa fa-check"></i><b>11</b> Ensemble models</a>
<ul>
<li class="chapter" data-level="11.1" data-path="ensemble.html"><a href="ensemble.html#linear-ensembles"><i class="fa fa-check"></i><b>11.1</b> Linear ensembles</a>
<ul>
<li class="chapter" data-level="11.1.1" data-path="ensemble.html"><a href="ensemble.html#principles"><i class="fa fa-check"></i><b>11.1.1</b> Principles</a></li>
<li class="chapter" data-level="11.1.2" data-path="ensemble.html"><a href="ensemble.html#example"><i class="fa fa-check"></i><b>11.1.2</b> Example</a></li>
</ul></li>
<li class="chapter" data-level="11.2" data-path="ensemble.html"><a href="ensemble.html#stacked-ensembles"><i class="fa fa-check"></i><b>11.2</b> Stacked ensembles</a>
<ul>
<li class="chapter" data-level="11.2.1" data-path="ensemble.html"><a href="ensemble.html#two-stage-training"><i class="fa fa-check"></i><b>11.2.1</b> Two-stage training</a></li>
<li class="chapter" data-level="11.2.2" data-path="ensemble.html"><a href="ensemble.html#code-and-results-4"><i class="fa fa-check"></i><b>11.2.2</b> Code and results</a></li>
</ul></li>
<li class="chapter" data-level="11.3" data-path="ensemble.html"><a href="ensemble.html#extensions-1"><i class="fa fa-check"></i><b>11.3</b> Extensions</a>
<ul>
<li class="chapter" data-level="11.3.1" data-path="ensemble.html"><a href="ensemble.html#exogenous-variables"><i class="fa fa-check"></i><b>11.3.1</b> Exogenous variables</a></li>
<li class="chapter" data-level="11.3.2" data-path="ensemble.html"><a href="ensemble.html#shrinking-inter-model-correlations"><i class="fa fa-check"></i><b>11.3.2</b> Shrinking inter-model correlations</a></li>
</ul></li>
<li class="chapter" data-level="11.4" data-path="ensemble.html"><a href="ensemble.html#exercise"><i class="fa fa-check"></i><b>11.4</b> Exercise</a></li>
</ul></li>
<li class="chapter" data-level="12" data-path="backtest.html"><a href="backtest.html"><i class="fa fa-check"></i><b>12</b> Portfolio backtesting</a>
<ul>
<li class="chapter" data-level="12.1" data-path="backtest.html"><a href="backtest.html#protocol"><i class="fa fa-check"></i><b>12.1</b> Setting the protocol</a></li>
<li class="chapter" data-level="12.2" data-path="backtest.html"><a href="backtest.html#turning-signals-into-portfolio-weights"><i class="fa fa-check"></i><b>12.2</b> Turning signals into portfolio weights</a></li>
<li class="chapter" data-level="12.3" data-path="backtest.html"><a href="backtest.html#perfmet"><i class="fa fa-check"></i><b>12.3</b> Performance metrics</a>
<ul>
<li class="chapter" data-level="12.3.1" data-path="backtest.html"><a href="backtest.html#discussion-1"><i class="fa fa-check"></i><b>12.3.1</b> Discussion</a></li>
<li class="chapter" data-level="12.3.2" data-path="backtest.html"><a href="backtest.html#pure-performance-and-risk-indicators"><i class="fa fa-check"></i><b>12.3.2</b> Pure performance and risk indicators</a></li>
<li class="chapter" data-level="12.3.3" data-path="backtest.html"><a href="backtest.html#factor-based-evaluation"><i class="fa fa-check"></i><b>12.3.3</b> Factor-based evaluation</a></li>
<li class="chapter" data-level="12.3.4" data-path="backtest.html"><a href="backtest.html#risk-adjusted-measures"><i class="fa fa-check"></i><b>12.3.4</b> Risk-adjusted measures</a></li>
<li class="chapter" data-level="12.3.5" data-path="backtest.html"><a href="backtest.html#transaction-costs-and-turnover"><i class="fa fa-check"></i><b>12.3.5</b> Transaction costs and turnover</a></li>
</ul></li>
<li class="chapter" data-level="12.4" data-path="backtest.html"><a href="backtest.html#common-errors-and-issues"><i class="fa fa-check"></i><b>12.4</b> Common errors and issues</a>
<ul>
<li class="chapter" data-level="12.4.1" data-path="backtest.html"><a href="backtest.html#forward-looking-data"><i class="fa fa-check"></i><b>12.4.1</b> Forward looking data</a></li>
<li class="chapter" data-level="12.4.2" data-path="backtest.html"><a href="backtest.html#backov"><i class="fa fa-check"></i><b>12.4.2</b> Backtest overfitting</a></li>
<li class="chapter" data-level="12.4.3" data-path="backtest.html"><a href="backtest.html#simple-safeguards"><i class="fa fa-check"></i><b>12.4.3</b> Simple safeguards</a></li>
</ul></li>
<li class="chapter" data-level="12.5" data-path="backtest.html"><a href="backtest.html#implication-of-non-stationarity-forecasting-is-hard"><i class="fa fa-check"></i><b>12.5</b> Implication of non-stationarity: forecasting is hard</a>
<ul>
<li class="chapter" data-level="12.5.1" data-path="backtest.html"><a href="backtest.html#general-comments"><i class="fa fa-check"></i><b>12.5.1</b> General comments</a></li>
<li class="chapter" data-level="12.5.2" data-path="backtest.html"><a href="backtest.html#the-no-free-lunch-theorem"><i class="fa fa-check"></i><b>12.5.2</b> The no free lunch theorem</a></li>
</ul></li>
<li class="chapter" data-level="12.6" data-path="backtest.html"><a href="backtest.html#first-example-a-complete-backtest"><i class="fa fa-check"></i><b>12.6</b> First example: a complete backtest</a></li>
<li class="chapter" data-level="12.7" data-path="backtest.html"><a href="backtest.html#second-example-backtest-overfitting"><i class="fa fa-check"></i><b>12.7</b> Second example: backtest overfitting</a></li>
<li class="chapter" data-level="12.8" data-path="backtest.html"><a href="backtest.html#coding-exercises-5"><i class="fa fa-check"></i><b>12.8</b> Coding exercises</a></li>
</ul></li>
<li class="part"><span><b>IV Further important topics</b></span></li>
<li class="chapter" data-level="13" data-path="interp.html"><a href="interp.html"><i class="fa fa-check"></i><b>13</b> Interpretability</a>
<ul>
<li class="chapter" data-level="13.1" data-path="interp.html"><a href="interp.html#global-interpretations"><i class="fa fa-check"></i><b>13.1</b> Global interpretations</a>
<ul>
<li class="chapter" data-level="13.1.1" data-path="interp.html"><a href="interp.html#surr"><i class="fa fa-check"></i><b>13.1.1</b> Simple models as surrogates</a></li>
<li class="chapter" data-level="13.1.2" data-path="interp.html"><a href="interp.html#variable-importance"><i class="fa fa-check"></i><b>13.1.2</b> Variable importance (tree-based)</a></li>
<li class="chapter" data-level="13.1.3" data-path="interp.html"><a href="interp.html#variable-importance-agnostic"><i class="fa fa-check"></i><b>13.1.3</b> Variable importance (agnostic)</a></li>
<li class="chapter" data-level="13.1.4" data-path="interp.html"><a href="interp.html#partial-dependence-plot"><i class="fa fa-check"></i><b>13.1.4</b> Partial dependence plot</a></li>
</ul></li>
<li class="chapter" data-level="13.2" data-path="interp.html"><a href="interp.html#local-interpretations"><i class="fa fa-check"></i><b>13.2</b> Local interpretations</a>
<ul>
<li class="chapter" data-level="13.2.1" data-path="interp.html"><a href="interp.html#lime"><i class="fa fa-check"></i><b>13.2.1</b> LIME</a></li>
<li class="chapter" data-level="13.2.2" data-path="interp.html"><a href="interp.html#shapley-values"><i class="fa fa-check"></i><b>13.2.2</b> Shapley values</a></li>
<li class="chapter" data-level="13.2.3" data-path="interp.html"><a href="interp.html#breakdown"><i class="fa fa-check"></i><b>13.2.3</b> Breakdown</a></li>
</ul></li>
</ul></li>
<li class="chapter" data-level="14" data-path="causality.html"><a href="causality.html"><i class="fa fa-check"></i><b>14</b> Two key concepts: causality and non-stationarity</a>
<ul>
<li class="chapter" data-level="14.1" data-path="causality.html"><a href="causality.html#causality-1"><i class="fa fa-check"></i><b>14.1</b> Causality</a>
<ul>
<li class="chapter" data-level="14.1.1" data-path="causality.html"><a href="causality.html#granger"><i class="fa fa-check"></i><b>14.1.1</b> Granger causality</a></li>
<li class="chapter" data-level="14.1.2" data-path="causality.html"><a href="causality.html#causal-additive-models"><i class="fa fa-check"></i><b>14.1.2</b> Causal additive models</a></li>
<li class="chapter" data-level="14.1.3" data-path="causality.html"><a href="causality.html#structural-time-series-models"><i class="fa fa-check"></i><b>14.1.3</b> Structural time series models</a></li>
</ul></li>
<li class="chapter" data-level="14.2" data-path="causality.html"><a href="causality.html#nonstat"><i class="fa fa-check"></i><b>14.2</b> Dealing with changing environments</a>
<ul>
<li class="chapter" data-level="14.2.1" data-path="causality.html"><a href="causality.html#non-stationarity-yet-another-illustration"><i class="fa fa-check"></i><b>14.2.1</b> Non-stationarity: yet another illustration</a></li>
<li class="chapter" data-level="14.2.2" data-path="causality.html"><a href="causality.html#online-learning"><i class="fa fa-check"></i><b>14.2.2</b> Online learning</a></li>
<li class="chapter" data-level="14.2.3" data-path="causality.html"><a href="causality.html#homogeneous-transfer-learning"><i class="fa fa-check"></i><b>14.2.3</b> Homogeneous transfer learning</a></li>
</ul></li>
</ul></li>
<li class="chapter" data-level="15" data-path="unsup.html"><a href="unsup.html"><i class="fa fa-check"></i><b>15</b> Unsupervised learning</a>
<ul>
<li class="chapter" data-level="15.1" data-path="unsup.html"><a href="unsup.html#corpred"><i class="fa fa-check"></i><b>15.1</b> The problem with correlated predictors</a></li>
<li class="chapter" data-level="15.2" data-path="unsup.html"><a href="unsup.html#principal-component-analysis-and-autoencoders"><i class="fa fa-check"></i><b>15.2</b> Principal component analysis and autoencoders</a>
<ul>
<li class="chapter" data-level="15.2.1" data-path="unsup.html"><a href="unsup.html#a-bit-of-algebra"><i class="fa fa-check"></i><b>15.2.1</b> A bit of algebra</a></li>
<li class="chapter" data-level="15.2.2" data-path="unsup.html"><a href="unsup.html#pca"><i class="fa fa-check"></i><b>15.2.2</b> PCA</a></li>
<li class="chapter" data-level="15.2.3" data-path="unsup.html"><a href="unsup.html#ae"><i class="fa fa-check"></i><b>15.2.3</b> Autoencoders</a></li>
<li class="chapter" data-level="15.2.4" data-path="unsup.html"><a href="unsup.html#application"><i class="fa fa-check"></i><b>15.2.4</b> Application</a></li>
</ul></li>
<li class="chapter" data-level="15.3" data-path="unsup.html"><a href="unsup.html#clustering-via-k-means"><i class="fa fa-check"></i><b>15.3</b> Clustering via k-means</a></li>
<li class="chapter" data-level="15.4" data-path="unsup.html"><a href="unsup.html#nearest-neighbors"><i class="fa fa-check"></i><b>15.4</b> Nearest neighbors</a></li>
<li class="chapter" data-level="15.5" data-path="unsup.html"><a href="unsup.html#coding-exercise-1"><i class="fa fa-check"></i><b>15.5</b> Coding exercise</a></li>
</ul></li>
<li class="chapter" data-level="16" data-path="RL.html"><a href="RL.html"><i class="fa fa-check"></i><b>16</b> Reinforcement learning</a>
<ul>
<li class="chapter" data-level="16.1" data-path="RL.html"><a href="RL.html#theoretical-layout"><i class="fa fa-check"></i><b>16.1</b> Theoretical layout</a>
<ul>
<li class="chapter" data-level="16.1.1" data-path="RL.html"><a href="RL.html#general-framework"><i class="fa fa-check"></i><b>16.1.1</b> General framework</a></li>
<li class="chapter" data-level="16.1.2" data-path="RL.html"><a href="RL.html#q-learning"><i class="fa fa-check"></i><b>16.1.2</b> Q-learning</a></li>
<li class="chapter" data-level="16.1.3" data-path="RL.html"><a href="RL.html#sarsa"><i class="fa fa-check"></i><b>16.1.3</b> SARSA</a></li>
</ul></li>
<li class="chapter" data-level="16.2" data-path="RL.html"><a href="RL.html#the-curse-of-dimensionality"><i class="fa fa-check"></i><b>16.2</b> The curse of dimensionality</a></li>
<li class="chapter" data-level="16.3" data-path="RL.html"><a href="RL.html#policy-gradient"><i class="fa fa-check"></i><b>16.3</b> Policy gradient</a>
<ul>
<li class="chapter" data-level="16.3.1" data-path="RL.html"><a href="RL.html#principle-2"><i class="fa fa-check"></i><b>16.3.1</b> Principle</a></li>
<li class="chapter" data-level="16.3.2" data-path="RL.html"><a href="RL.html#extensions-2"><i class="fa fa-check"></i><b>16.3.2</b> Extensions</a></li>
</ul></li>
<li class="chapter" data-level="16.4" data-path="RL.html"><a href="RL.html#simple-examples"><i class="fa fa-check"></i><b>16.4</b> Simple examples</a>
<ul>
<li class="chapter" data-level="16.4.1" data-path="RL.html"><a href="RL.html#q-learning-with-simulations"><i class="fa fa-check"></i><b>16.4.1</b> Q-learning with simulations</a></li>
<li class="chapter" data-level="16.4.2" data-path="RL.html"><a href="RL.html#RLemp2"><i class="fa fa-check"></i><b>16.4.2</b> Q-learning with market data</a></li>
</ul></li>
<li class="chapter" data-level="16.5" data-path="RL.html"><a href="RL.html#concluding-remarks"><i class="fa fa-check"></i><b>16.5</b> Concluding remarks</a></li>
<li class="chapter" data-level="16.6" data-path="RL.html"><a href="RL.html#exercises"><i class="fa fa-check"></i><b>16.6</b> Exercises</a></li>
</ul></li>
<li class="part"><span><b>V Appendix</b></span></li>
<li class="chapter" data-level="17" data-path="data-description.html"><a href="data-description.html"><i class="fa fa-check"></i><b>17</b> Data description</a></li>
<li class="chapter" data-level="18" data-path="solutions-to-exercises.html"><a href="solutions-to-exercises.html"><i class="fa fa-check"></i><b>18</b> Solutions to exercises</a>
<ul>
<li class="chapter" data-level="18.1" data-path="solutions-to-exercises.html"><a href="solutions-to-exercises.html#chapter-3"><i class="fa fa-check"></i><b>18.1</b> Chapter 3</a></li>
<li class="chapter" data-level="18.2" data-path="solutions-to-exercises.html"><a href="solutions-to-exercises.html#chapter-4"><i class="fa fa-check"></i><b>18.2</b> Chapter 4</a></li>
<li class="chapter" data-level="18.3" data-path="solutions-to-exercises.html"><a href="solutions-to-exercises.html#chapter-5"><i class="fa fa-check"></i><b>18.3</b> Chapter 5</a></li>
<li class="chapter" data-level="18.4" data-path="solutions-to-exercises.html"><a href="solutions-to-exercises.html#chapter-6"><i class="fa fa-check"></i><b>18.4</b> Chapter 6</a></li>
<li class="chapter" data-level="18.5" data-path="solutions-to-exercises.html"><a href="solutions-to-exercises.html#chapter-7-the-autoencoder-model-universal-approximation"><i class="fa fa-check"></i><b>18.5</b> Chapter 7: the autoencoder model &amp; universal approximation</a></li>
<li class="chapter" data-level="18.6" data-path="solutions-to-exercises.html"><a href="solutions-to-exercises.html#chapter-8"><i class="fa fa-check"></i><b>18.6</b> Chapter 8</a></li>
<li class="chapter" data-level="18.7" data-path="solutions-to-exercises.html"><a href="solutions-to-exercises.html#chapter-11-ensemble-neural-network"><i class="fa fa-check"></i><b>18.7</b> Chapter 11: ensemble neural network</a></li>
<li class="chapter" data-level="18.8" data-path="solutions-to-exercises.html"><a href="solutions-to-exercises.html#chapter-12"><i class="fa fa-check"></i><b>18.8</b> Chapter 12</a>
<ul>
<li class="chapter" data-level="18.8.1" data-path="solutions-to-exercises.html"><a href="solutions-to-exercises.html#ew-portfolios-with-the-tidyverse"><i class="fa fa-check"></i><b>18.8.1</b> EW portfolios with the tidyverse</a></li>
<li class="chapter" data-level="18.8.2" data-path="solutions-to-exercises.html"><a href="solutions-to-exercises.html#advanced-weighting-function"><i class="fa fa-check"></i><b>18.8.2</b> Advanced weighting function</a></li>
<li class="chapter" data-level="18.8.3" data-path="solutions-to-exercises.html"><a href="solutions-to-exercises.html#functional-programming-in-the-backtest"><i class="fa fa-check"></i><b>18.8.3</b> Functional programming in the backtest</a></li>
</ul></li>
<li class="chapter" data-level="18.9" data-path="solutions-to-exercises.html"><a href="solutions-to-exercises.html#chapter-15"><i class="fa fa-check"></i><b>18.9</b> Chapter 15</a></li>
<li class="chapter" data-level="18.10" data-path="solutions-to-exercises.html"><a href="solutions-to-exercises.html#chapter-16"><i class="fa fa-check"></i><b>18.10</b> Chapter 16</a></li>
</ul></li>
</ul>

      </nav>
    </div>

    <div class="book-body">
      <div class="body-inner">
        <div class="book-header" role="navigation">
          <h1>
            <i class="fa fa-circle-o-notch fa-spin"></i><a href="./">Machine Learning for Factor Investing</a>
          </h1>
        </div>

        <div class="page-wrapper" tabindex="-1" role="main">
          <div class="page-inner">

            <section class="normal" id="section-">
<div id="header">
<h1 class="title">Machine Learning for Factor Investing</h1>
<p class="author"><em>Guillaume Coqueret and Tony Guida</em></p>
<p class="date"><em>2021-04-11</em></p>
</div>
<div id="preface" class="section level1 unnumbered">
<h1>Preface</h1>
<p>This book is intended to cover some advanced modelling techniques applied to equity <strong>investment strategies</strong> that are built on <strong>firm characteristics</strong>. The content is threefold. First, we try to simply explain the ideas behind most mainstream machine learning algorithms that are used in equity asset allocation. Second, we mention a wide range of academic references for the readers who wish to push a little further. Finally, we provide hands-on <strong>R</strong> code samples that show how to apply the concepts and tools on a realistic dataset which we share to encourage <strong>reproducibility</strong>.</p>
<div id="what-this-book-is-not-about" class="section level2 unnumbered">
<h2>What this book is not about</h2>
<p>This book deals with machine learning (ML) tools and their applications in factor investing. Factor investing is a subfield of a large discipline that encompasses asset allocation, quantitative trading and wealth management. Its premise is that differences in the returns of firms can be explained by the characteristics of these firms. Thus, it departs from traditional analyses which rely on price and volume data only, like classical portfolio theory à la <span class="citation"><a href="solutions-to-exercises.html#ref-markowitz1952portfolio" role="doc-biblioref">Markowitz</a> (<a href="solutions-to-exercises.html#ref-markowitz1952portfolio" role="doc-biblioref">1952</a>)</span>, or high frequency trading. For a general and broad treatment of Machine Learning in Finance, we refer to <span class="citation"><a href="solutions-to-exercises.html#ref-dixon2020machine" role="doc-biblioref">Matthew F. Dixon, Halperin, and Bilokon</a> (<a href="solutions-to-exercises.html#ref-dixon2020machine" role="doc-biblioref">2020</a>)</span>.</p>
<p>The topics we discuss are related to other themes that will not be covered in the monograph. These themes include:</p>
<ul>
<li>Applications of ML in <strong>other financial fields</strong>, such as <strong>fraud detection</strong> or <strong>credit scoring</strong>. We refer to <span class="citation"><a href="solutions-to-exercises.html#ref-ngai2011application" role="doc-biblioref">Ngai et al.</a> (<a href="solutions-to-exercises.html#ref-ngai2011application" role="doc-biblioref">2011</a>)</span> and <span class="citation"><a href="solutions-to-exercises.html#ref-baesens2015fraud" role="doc-biblioref">Baesens, Van Vlasselaer, and Verbeke</a> (<a href="solutions-to-exercises.html#ref-baesens2015fraud" role="doc-biblioref">2015</a>)</span> for general purpose fraud detection, to <span class="citation"><a href="solutions-to-exercises.html#ref-bhattacharyya2011data" role="doc-biblioref">Bhattacharyya et al.</a> (<a href="solutions-to-exercises.html#ref-bhattacharyya2011data" role="doc-biblioref">2011</a>)</span> for a focus on credit cards and to <span class="citation"><a href="solutions-to-exercises.html#ref-ravisankar2011detection" role="doc-biblioref">Ravisankar et al.</a> (<a href="solutions-to-exercises.html#ref-ravisankar2011detection" role="doc-biblioref">2011</a>)</span> and <span class="citation"><a href="solutions-to-exercises.html#ref-abbasi2012metafraud" role="doc-biblioref">Abbasi et al.</a> (<a href="solutions-to-exercises.html#ref-abbasi2012metafraud" role="doc-biblioref">2012</a>)</span> for studies on fraudulent financial reporting. On the topic of credit scoring, <span class="citation"><a href="solutions-to-exercises.html#ref-wang2011comparative" role="doc-biblioref">G. Wang et al.</a> (<a href="solutions-to-exercises.html#ref-wang2011comparative" role="doc-biblioref">2011</a>)</span> and <span class="citation"><a href="solutions-to-exercises.html#ref-brown2012experimental" role="doc-biblioref">Brown and Mues</a> (<a href="solutions-to-exercises.html#ref-brown2012experimental" role="doc-biblioref">2012</a>)</span> provide overviews of methods and some empirical results. Also, we do not cover ML algorithms for data sampled at higher (daily or intraday) frequencies (microstructure models, limit order book). The chapter from <span class="citation"><a href="solutions-to-exercises.html#ref-kearns2013machine" role="doc-biblioref">Kearns and Nevmyvaka</a> (<a href="solutions-to-exercises.html#ref-kearns2013machine" role="doc-biblioref">2013</a>)</span> and the recent paper by <span class="citation"><a href="solutions-to-exercises.html#ref-sirignano2019universal" role="doc-biblioref">Sirignano and Cont</a> (<a href="solutions-to-exercises.html#ref-sirignano2019universal" role="doc-biblioref">2019</a>)</span> are good introductions on this topic.<br />
</li>
<li><strong>Use cases of alternative datasets</strong> that show how to leverage textual data from social media, satellite imagery, or credit card logs to predict sales, earning reports, and, ultimately, future returns. The literature on this topic is still emerging (see, e.g., <span class="citation"><a href="solutions-to-exercises.html#ref-blank2019using" role="doc-biblioref">Blank, Davis, and Greene</a> (<a href="solutions-to-exercises.html#ref-blank2019using" role="doc-biblioref">2019</a>)</span>, <span class="citation"><a href="solutions-to-exercises.html#ref-jha2019implementing" role="doc-biblioref">Jha</a> (<a href="solutions-to-exercises.html#ref-jha2019implementing" role="doc-biblioref">2019</a>)</span> and <span class="citation"><a href="solutions-to-exercises.html#ref-ke2019predicting" role="doc-biblioref">Z. T. Ke, Kelly, and Xiu</a> (<a href="solutions-to-exercises.html#ref-ke2019predicting" role="doc-biblioref">2019</a>)</span>) but will likely blossom in the near future.<br />
</li>
<li><strong>Technical details</strong> of machine learning tools. While we do provide some insights on specificities of some approaches (those we believe are important), the purpose of the book is not to serve as reference manual on statistical learning. We refer to <span class="citation"><a href="solutions-to-exercises.html#ref-friedman2009elements" role="doc-biblioref">Hastie, Tibshirani, and Friedman</a> (<a href="solutions-to-exercises.html#ref-friedman2009elements" role="doc-biblioref">2009</a>)</span>, <span class="citation"><a href="solutions-to-exercises.html#ref-cornuejols2011apprentissage" role="doc-biblioref">Cornuejols, Miclet, and Barra</a> (<a href="solutions-to-exercises.html#ref-cornuejols2011apprentissage" role="doc-biblioref">2018</a>)</span> (written in French), <span class="citation"><a href="solutions-to-exercises.html#ref-james2013introduction" role="doc-biblioref">James et al.</a> (<a href="solutions-to-exercises.html#ref-james2013introduction" role="doc-biblioref">2013</a>)</span> (coded in R!) and <span class="citation"><a href="solutions-to-exercises.html#ref-mohri2018foundations" role="doc-biblioref">Mohri, Rostamizadeh, and Talwalkar</a> (<a href="solutions-to-exercises.html#ref-mohri2018foundations" role="doc-biblioref">2018</a>)</span> for a general treatment on the subject.<a href="#fn1" class="footnote-ref" id="fnref1"><sup>1</sup></a> Moreover, <span class="citation"><a href="solutions-to-exercises.html#ref-du2013neural" role="doc-biblioref">K.-L. Du and Swamy</a> (<a href="solutions-to-exercises.html#ref-du2013neural" role="doc-biblioref">2013</a>)</span> and <span class="citation"><a href="solutions-to-exercises.html#ref-goodfellow2016deep" role="doc-biblioref">Goodfellow et al.</a> (<a href="solutions-to-exercises.html#ref-goodfellow2016deep" role="doc-biblioref">2016</a>)</span> are solid monographs on neural networks particularly and <span class="citation"><a href="solutions-to-exercises.html#ref-sutton2018reinforcement" role="doc-biblioref">Sutton and Barto</a> (<a href="solutions-to-exercises.html#ref-sutton2018reinforcement" role="doc-biblioref">2018</a>)</span> provide a self-contained and comprehensive tour in reinforcement learning.<br />
</li>
<li>Finally, the book does not cover methods of <strong>natural language processing</strong> (NLP) that can be used to evaluate sentiment which can in turn be translated into investment decisions. This topic has nonetheless been trending lately and we refer to <span class="citation"><a href="solutions-to-exercises.html#ref-loughran2016textual" role="doc-biblioref">Loughran and McDonald</a> (<a href="solutions-to-exercises.html#ref-loughran2016textual" role="doc-biblioref">2016</a>)</span>, <span class="citation"><a href="solutions-to-exercises.html#ref-cong2019analyzing" role="doc-biblioref">Cong, Liang, and Zhang</a> (<a href="solutions-to-exercises.html#ref-cong2019analyzing" role="doc-biblioref">2019a</a>)</span>, <span class="citation"><a href="solutions-to-exercises.html#ref-cong2019textual" role="doc-biblioref">Cong, Liang, and Zhang</a> (<a href="solutions-to-exercises.html#ref-cong2019textual" role="doc-biblioref">2019b</a>)</span> and <span class="citation"><a href="solutions-to-exercises.html#ref-gentzkow2019text" role="doc-biblioref">Gentzkow, Kelly, and Taddy</a> (<a href="solutions-to-exercises.html#ref-gentzkow2019text" role="doc-biblioref">2019</a>)</span> for recent advances on the matter.</li>
</ul>
</div>
<div id="the-targeted-audience" class="section level2 unnumbered">
<h2>The targeted audience</h2>
<p>Who should read this book? This book is intended for two types of audiences. First, <strong>postgraduate students</strong> who wish to pursue their studies in quantitative finance with a view towards investment and asset management. The second target groups are <strong>professionals from the money management industry</strong> who either seek to pivot towards allocation methods that are based on machine learning or are simply interested in these new tools and want to upgrade their set of competences. To a lesser extent, the book can serve <strong>scholars or researchers</strong> who need a manual with a broad spectrum of references both on recent asset pricing issues and on machine learning algorithms applied to money management. While the book covers mostly common methods, it also shows how to implement more exotic models, like causal graphs (Chapter <a href="causality.html#causality">14</a>), Bayesian additive trees (Chapter <a href="bayes.html#bayes">9</a>), and hybrid autoencoders (Chapter <a href="NN.html#NN">7</a>).</p>
<p>The book assumes basic knowledge in <strong>algebra</strong> (matrix manipulation), <strong>analysis</strong> (function differentiation, gradients), <strong>optimization</strong> (first and second order conditions, dual forms), and <strong>statistics</strong> (distributions, moments, tests, simple estimation method like maximum likelihood). A minimal <strong>financial culture</strong> is also required: simple notions like stocks, accounting quantities (e.g., book value) will not be defined in this book. Lastly, all examples and illustrations are coded in R. A minimal culture of the language is sufficient to understand the code snippets which rely heavily on the most common functions of the tidyverse (<span class="citation"><a href="solutions-to-exercises.html#ref-wickham2019welcome" role="doc-biblioref">Wickham et al.</a> (<a href="solutions-to-exercises.html#ref-wickham2019welcome" role="doc-biblioref">2019</a>)</span>, www.tidyverse.org), and piping (<span class="citation"><a href="solutions-to-exercises.html#ref-bache2014magrittr" role="doc-biblioref">Bache and Wickham</a> (<a href="solutions-to-exercises.html#ref-bache2014magrittr" role="doc-biblioref">2014</a>)</span>, <span class="citation"><a href="solutions-to-exercises.html#ref-mailund2019pipelines" role="doc-biblioref">Mailund</a> (<a href="solutions-to-exercises.html#ref-mailund2019pipelines" role="doc-biblioref">2019</a>)</span>).</p>
</div>
<div id="how-this-book-is-structured" class="section level2 unnumbered">
<h2>How this book is structured</h2>
<p>The book is divided into four parts.</p>
<p>Part I gathers preparatory material and starts with notations and data presentation (Chapter <a href="notdata.html#notdata">1</a>), followed by introductory remarks (Chapter <a href="intro.html#intro">2</a>). Chapter <a href="factor.html#factor">3</a> outlines the economic foundations (theoretical and empirical) of factor investing and briefly sums up the dedicated recent literature. Chapter <a href="Data.html#Data">4</a> deals with data preparation. It rapidly recalls the basic tips and warns about some major issues.</p>
<p>Part II of the book is dedicated to predictive algorithms in supervised learning. Those are the most common tools that are used to forecast financial quantities (returns, volatilities, Sharpe ratios, etc.). They range from penalized regressions (Chapter <a href="lasso.html#lasso">5</a>), to tree methods (Chapter <a href="trees.html#trees">6</a>), encompassing neural networks (Chapter <a href="NN.html#NN">7</a>), support vector machines (Chapter <a href="svm.html#svm">8</a>) and Bayesian approaches (Chapter <a href="bayes.html#bayes">9</a>).</p>
<p>The next portion of the book bridges the gap between these tools and their applications in finance. Chapter <a href="valtune.html#valtune">10</a> details how to assess and improve the ML engines defined beforehand. Chapter <a href="ensemble.html#ensemble">11</a> explains how models can be combined and often why that may not be a good idea. Finally, one of the most important chapters (Chapter <a href="backtest.html#backtest">12</a>) reviews the critical steps of portfolio backtesting and mentions the frequent mistakes that are often encountered at this stage.</p>
<p>The end of the book covers a range of advanced topics connected to machine learning more specifically. The first one is <strong>interpretability</strong>. ML models are often considered to be black boxes and this raises trust issues: how and why should one trust ML-based predictions? Chapter <a href="interp.html#interp">13</a> is intended to present methods that help understand what is happening under the hood. Chapter <a href="causality.html#causality">14</a> is focused on <strong>causality</strong>, which is both a much more powerful concept than correlation and also at the heart of many recent discussions in Artificial Intelligence (AI). Most ML tools rely on correlation-like patterns and it is important to underline the benefits of techniques related to causality. Finally, Chapters <a href="unsup.html#unsup">15</a> and <a href="RL.html#RL">16</a> are dedicated to non-supervised methods. The latter can be useful, but their financial applications should be wisely and cautiously motivated. <!-- Lastly, the final chapter (\@ref(NLP)) introduces standard approaches for the treatment of textual data. --></p>
</div>
<div id="companion-website" class="section level2 unnumbered">
<h2>Companion website</h2>
<p>This book is entirely available at <a href="http://www.mlfactor.com" class="uri">http://www.mlfactor.com</a>. It is important that not only the content of the book be accessible, but also the data and code that are used throughout the chapters. They can be found at <a href="https://github.com/shokru/mlfactor.github.io/tree/master/material" class="uri">https://github.com/shokru/mlfactor.github.io/tree/master/material</a>. The online version of the book will be updated beyond the publication of the printed version.</p>
</div>
<div id="why-r" class="section level2 unnumbered">
<h2>Why R?</h2>
<p>The supremacy of Python as <em>the</em> dominant ML programming language is a widespread belief. This is because almost all applications of deep learning (which is as of 2020 one of the most fashionable branches of ML) are coded in Python via Tensorflow or Pytorch.
The fact is that <strong>R</strong> has a <strong>lot</strong> to offer as well. First of all, let us not forget that one of the most influencial textbooks in ML (<span class="citation"><a href="solutions-to-exercises.html#ref-friedman2009elements" role="doc-biblioref">Hastie, Tibshirani, and Friedman</a> (<a href="solutions-to-exercises.html#ref-friedman2009elements" role="doc-biblioref">2009</a>)</span>) is written by statisticians who code in R. Moreover, many statistics-orientated algorithms (e.g., BARTs in Section <a href="bayes.html#BART">9.5</a>) are primarily coded in R and not always in Python. The R offering in Bayesian packages in general (<a href="https://cran.r-project.org/web/views/Bayesian.html" class="uri">https://cran.r-project.org/web/views/Bayesian.html</a>) and in Bayesian learning in particular is probably unmatched.</p>
<p>There are currently several ML frameworks available in R.</p>
<ul>
<li><strong>caret</strong>: <a href="https://topepo.github.io/caret/index.html" class="uri">https://topepo.github.io/caret/index.html</a>, a compilation of more than 200 ML models;<br />
</li>
<li><strong>tidymodels</strong>: <a href="https://github.com/tidymodels" class="uri">https://github.com/tidymodels</a>, a recent collection of packages for ML workflow (developed by Max Kuhn at RStudio, which is a token of high quality material!);<br />
</li>
<li><strong>rtemis</strong>: <a href="https://rtemis.netlify.com" class="uri">https://rtemis.netlify.com</a>, a general purpose package for ML and visualization;<br />
</li>
<li><strong>mlr3</strong>: <a href="https://mlr3.mlr-org.com/index.html" class="uri">https://mlr3.mlr-org.com/index.html</a>, also a simple framework for ML models;<br />
</li>
<li><strong>h2o</strong>: <a href="https://github.com/h2oai/h2o-3/tree/master/h2o-r" class="uri">https://github.com/h2oai/h2o-3/tree/master/h2o-r</a>, a large set of tools provided by h2o (coded in Java);<br />
</li>
<li><strong>Open ML</strong>: <a href="https://github.com/openml/openml-r" class="uri">https://github.com/openml/openml-r</a>, the R version of the OpenML (www.openml.org) community.</li>
</ul>
<p>Moreover, via the <em>reticulate</em> package, it is possible (but not always easy) to benefit from Python tools as well. The most prominent example is the adaptation of the <em>tensorflow</em> and <em>keras</em> libraries to R. Thus, some very advanced Python material is readily available to R users. This is also true for other resources, like Stanford’s CoreNLP library (in Java) which was adapted to R in the package <em>coreNLP</em> (which we will not use in this book).</p>
</div>
<div id="coding-instructions" class="section level2 unnumbered">
<h2>Coding instructions</h2>
<p>One of the purposes of the book is to propose a large-scale tutorial of ML applications in financial predictions and portfolio selection. Thus, one keyword is <strong>REPRODUCIBILITY</strong>! In order to duplicate our results (up to possible randomness in some learning algorithms), you will need running versions of R and RStudio on your computer. The best books to learn R are also often freely available online. A short list can be found here <a href="https://rstudio.com/resources/books/" class="uri">https://rstudio.com/resources/books/</a>. The monograph <em>R for Data Science</em> is probably the most crucial.</p>
<p>In terms of coding requirements, we rely heavily on the <strong>tidyverse</strong>, which is a collection of <strong>packages</strong> (or libraries). The three packages we use most are <strong>dplyr</strong> which implements simple data manipulations (filter, select, arrange), <strong>tidyr</strong> which formats data in a tidy fashion, and <strong>ggplot</strong>, for graphical outputs.</p>
<p>A list of the packages we use can be found in Table <a href="preface.html#tab:packages">0.1</a> below. Packages with a star <span class="math inline">\(*\)</span> need to be installed via <em>bioconductor</em>.<a href="#fn2" class="footnote-ref" id="fnref2"><sup>2</sup></a> Packages with a plus <span class="math inline">\(^+\)</span> need to be installed <strong>manually</strong>.<a href="#fn3" class="footnote-ref" id="fnref3"><sup>3</sup></a></p>
<table>
<caption><span id="tab:packages">TABLE 0.1: </span> List of all packages used in the book.</caption>
<thead>
<tr class="header">
<th align="left"><em>Package</em></th>
<th align="left">Purpose</th>
<th align="center">Chapter(s)</th>
</tr>
</thead>
<tbody>
<tr class="odd">
<td align="left"><em>BART</em></td>
<td align="left">Bayesian additive trees</td>
<td align="center">10</td>
</tr>
<tr class="even">
<td align="left"><em>broom</em></td>
<td align="left">Tidy regression output</td>
<td align="center">5</td>
</tr>
<tr class="odd">
<td align="left"><em>CAM</em><span class="math inline">\(^+\)</span></td>
<td align="left">Causal Additive Models</td>
<td align="center">15</td>
</tr>
<tr class="even">
<td align="left"><em>caTools</em></td>
<td align="left">AUC curves</td>
<td align="center">11</td>
</tr>
<tr class="odd">
<td align="left"><em>CausalImpact</em></td>
<td align="left">Causal inference with structural time series</td>
<td align="center">15</td>
</tr>
<tr class="even">
<td align="left"><em>cowplot</em></td>
<td align="left">Stacking plots</td>
<td align="center">4 &amp; 13</td>
</tr>
<tr class="odd">
<td align="left"><em>breakDown</em></td>
<td align="left">Breakdown interpretability</td>
<td align="center">14</td>
</tr>
<tr class="even">
<td align="left"><em>dummies</em></td>
<td align="left">One-hot encoding</td>
<td align="center">8</td>
</tr>
<tr class="odd">
<td align="left"><em>e1071</em></td>
<td align="left">Support Vector Machines</td>
<td align="center">9</td>
</tr>
<tr class="even">
<td align="left"><em>factoextra</em></td>
<td align="left">PCA visualization</td>
<td align="center">16</td>
</tr>
<tr class="odd">
<td align="left"><em>fastAdaboost</em></td>
<td align="left">Boosted trees</td>
<td align="center">7</td>
</tr>
<tr class="even">
<td align="left"><em>forecast</em></td>
<td align="left">Autocorrelation function</td>
<td align="center">4</td>
</tr>
<tr class="odd">
<td align="left"><em>FNN</em></td>
<td align="left">Nearest Neighbors detection</td>
<td align="center">16</td>
</tr>
<tr class="even">
<td align="left"><em>ggpubr</em></td>
<td align="left">Combining plots</td>
<td align="center">11</td>
</tr>
<tr class="odd">
<td align="left"><em>glmnet</em></td>
<td align="left">Penalized regressions</td>
<td align="center">6</td>
</tr>
<tr class="even">
<td align="left"><em>iml</em></td>
<td align="left">Interpretability tools</td>
<td align="center">14</td>
</tr>
<tr class="odd">
<td align="left"><em>keras</em></td>
<td align="left">Neural networks</td>
<td align="center">8</td>
</tr>
<tr class="even">
<td align="left"><em>lime</em></td>
<td align="left">Interpretability</td>
<td align="center">14</td>
</tr>
<tr class="odd">
<td align="left"><em>lmtest</em></td>
<td align="left">Granger causality</td>
<td align="center">15</td>
</tr>
<tr class="even">
<td align="left"><em>lubridate</em></td>
<td align="left">Handling dates</td>
<td align="center">All (or many)</td>
</tr>
<tr class="odd">
<td align="left"><em>naivebayes</em></td>
<td align="left">Naive Bayes classifier</td>
<td align="center">10</td>
</tr>
<tr class="even">
<td align="left"><em>pcalg</em></td>
<td align="left">Causal graphs</td>
<td align="center">15</td>
</tr>
<tr class="odd">
<td align="left"><em>quadprog</em></td>
<td align="left">Quadratic programming</td>
<td align="center">12</td>
</tr>
<tr class="even">
<td align="left"><em>quantmod</em></td>
<td align="left">Data extraction</td>
<td align="center">4, 12</td>
</tr>
<tr class="odd">
<td align="left"><em>randomForest</em></td>
<td align="left">Random forests</td>
<td align="center">7</td>
</tr>
<tr class="even">
<td align="left"><em>rBayesianOptimization</em></td>
<td align="left">Bayesian hyperparameter tuning</td>
<td align="center">11</td>
</tr>
<tr class="odd">
<td align="left"><em>ReinforcementLearning</em></td>
<td align="left">Reinforcement Learning</td>
<td align="center">17</td>
</tr>
<tr class="even">
<td align="left"><em>Rgraphviz</em><span class="math inline">\(^*\)</span></td>
<td align="left">Causal graphs</td>
<td align="center">15</td>
</tr>
<tr class="odd">
<td align="left"><em>rpart</em> and <em>rpart.plot</em></td>
<td align="left">Simple decision trees</td>
<td align="center">7</td>
</tr>
<tr class="even">
<td align="left"><em>spBayes</em></td>
<td align="left">Bayesian linear regression</td>
<td align="center">10</td>
</tr>
<tr class="odd">
<td align="left"><em>tidyverse</em></td>
<td align="left">Environment for data science, data wrangling</td>
<td align="center">All</td>
</tr>
<tr class="even">
<td align="left"><em>xgboost</em></td>
<td align="left">Boosted trees</td>
<td align="center">7</td>
</tr>
<tr class="odd">
<td align="left"><em>xtable</em></td>
<td align="left">Table formatting</td>
<td align="center">4</td>
</tr>
</tbody>
</table>
<p>Of all of these packages (or collections thereof), the <strong>tidyverse</strong> and <strong>lubridate</strong> are compulsory in almost all sections of the book. To install a new package in R, just type</p>
<p>install.packages(“name_of_the_package”)</p>
<p>in the console. Sometimes, because of function name conflicts (especially with the select() function), we use the syntax package::function() to make sure the function call is from the right source. The exact version of the packages used to compile the book is listed in the “<em>renv.lock</em>” file available on the book’s GitHub web page <a href="https://github.com/shokru/mlfactor.github.io" class="uri">https://github.com/shokru/mlfactor.github.io</a>. One minor comment is the following: while the functions <em>gather()</em> and <em>spread()</em> from the <em>dplyr</em> package have been superseded by <em>pivot_longer()</em> and <em>pivot_wider()</em>, we still use them because of their much more compact syntax.</p>
<p>As much as we could, we created short <strong>code chunks</strong> and commented each line whenever we felt it was useful. Comments are displayed at the end of a row and preceded with a single hastag #.</p>
<p>The book is constructed as a very big notebook, thus results are often presented below code chunks. They can be graphs or tables. Sometimes, they are simple numbers and are preceded with two hashtags ##. The example below illustrates this formatting.</p>
<div class="sourceCode" id="cb1"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb1-1"><a href="preface.html#cb1-1" aria-hidden="true" tabindex="-1"></a><span class="dv">1</span><span class="sc">+</span><span class="dv">2</span> <span class="co"># Example</span></span></code></pre></div>
<pre><code>## [1] 3</code></pre>
<p></p>
<p>The book can be viewed as a very big tutorial. Therefore, most of the chunks depend on previously defined variables. When replicating parts of the code (via online code), please make sure that <strong>the environment includes all relevant variables</strong>. One best practice is to always start by running all code chunks from Chapter <a href="notdata.html#notdata">1</a>. For the exercises, we often resort to variables created in the corresponding chapters.</p>
</div>
<div id="acknowledgments" class="section level2 unnumbered">
<h2>Acknowledgments</h2>
<p>The core of the book was prepared for a series of lectures given by one of the authors to students of master’s degrees in finance at EMLYON Business School and at the Imperial College Business School in the Spring of 2019. We are grateful to those students who asked fruitful questions and thereby contributed to improve the content of the book.</p>
<p>We are grateful to Bertrand Tavin and Gautier Marti for their thorough screening of the book. We also thank Eric André, Aurélie Brossard, Alban Cousin, Frédérique Girod, Philippe Huber, Jean-Michel Maeso, Javier Nogales and for friendly reviews; Christophe Dervieux for his help with bookdown; Mislav Sagovac and Vu Tran for their early feedback; John Kimmel for making this happen and Jonathan Regenstein for his availability, no matter the topic. Lastly, we are grateful for the anonymous reviews collected by John.</p>
</div>
<div id="future-developments" class="section level2 unnumbered">
<h2>Future developments</h2>
<p>Machine learning and factor investing are two immense research domains and the overlap between the two is also quite substantial and developing at a fast pace. The content of this book will always constitute a solid background, but it is naturally destined to obsolescence. Moreover, by construction, some subtopics and many references will have escaped our scrutiny. Our intent is to progressively improve the content of the book and update it with the latest ongoing research. We will be grateful to any comment that helps correct or update the monograph. Thank you for sending your feedback directly (via pull requests) on the book’s website which is hosted at <a href="https://github.com/shokru/mlfactor.github.io" class="uri">https://github.com/shokru/mlfactor.github.io</a>.</p>

</div>
</div>


<div class="footnotes">
<hr />
<ol start="1">
<li id="fn1"><p>For a list of online resources, we recommend the curated page <a href="https://github.com/josephmisiti/awesome-machine-learning/blob/master/books.md" class="uri">https://github.com/josephmisiti/awesome-machine-learning/blob/master/books.md</a>.<a href="preface.html#fnref1" class="footnote-back">↩︎</a></p></li>
<li id="fn2"><p>One example: <a href="https://www.bioconductor.org/packages/release/bioc/html/Rgraphviz.html" class="uri">https://www.bioconductor.org/packages/release/bioc/html/Rgraphviz.html</a><a href="preface.html#fnref2" class="footnote-back">↩︎</a></p></li>
<li id="fn3"><p>By copy-pasting the content of the package in the library folder. To get the address of the folder, execute the command <em>.libPaths()</em> in the R console.<a href="preface.html#fnref3" class="footnote-back">↩︎</a></p></li>
</ol>
</div>
            </section>

          </div>
        </div>
      </div>

<a href="notdata.html" class="navigation navigation-next navigation-unique" aria-label="Next page"><i class="fa fa-angle-right"></i></a>
    </div>
  </div>
<script src="libs/gitbook-2.6.7/js/app.min.js"></script>
<script src="libs/gitbook-2.6.7/js/lunr.js"></script>
<script src="libs/gitbook-2.6.7/js/clipboard.min.js"></script>
<script src="libs/gitbook-2.6.7/js/plugin-search.js"></script>
<script src="libs/gitbook-2.6.7/js/plugin-sharing.js"></script>
<script src="libs/gitbook-2.6.7/js/plugin-fontsettings.js"></script>
<script src="libs/gitbook-2.6.7/js/plugin-bookdown.js"></script>
<script src="libs/gitbook-2.6.7/js/jquery.highlight.js"></script>
<script src="libs/gitbook-2.6.7/js/plugin-clipboard.js"></script>
<script>
gitbook.require(["gitbook"], function(gitbook) {
gitbook.start({
"sharing": {
"github": false,
"facebook": false,
"twitter": true,
"linkedin": true,
"weibo": false,
"instapaper": false,
"vk": false,
"all": ["facebook", "twitter", "linkedin", "weibo", "instapaper"]
},
"fontsettings": {
"theme": "white",
"family": "sans",
"size": 2
},
"edit": null,
"history": {
"link": null,
"text": null
},
"view": {
"link": null,
"text": null
},
"download": null,
"toc": {
"collapse": "section",
"scroll_highlight": true
},
"toolbar": {
"position": "fixed",
"download": false
},
"search": true,
"info": true
});
});
</script>

<!-- dynamically load mathjax for compatibility with self-contained -->
<script>
  (function () {
    var script = document.createElement("script");
    script.type = "text/javascript";
    var src = "true";
    if (src === "" || src === "true") src = "https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-MML-AM_CHTML";
    if (location.protocol !== "file:")
      if (/^https?:/.test(src))
        src = src.replace(/^https?:/, '');
    script.src = src;
    document.getElementsByTagName("head")[0].appendChild(script);
  })();
</script>
</body>

</html>